Beefy Boxes and Bandwidth Generously Provided by pair Networks
XP is just a number
 
PerlMonks  

CGI Script for Reading Newest Nodes

by voyager (Friar)
on Jun 14, 2001 at 01:08 UTC ( [id://88207]=sourcecode: print w/replies, xml ) Need Help??
Category: PerlMonks Related Scripts
Author/Contact Info voyager
Description: This script fetches the XML version of Newest Nodes and creates a page similar to the Newest Nodes page. With this script you can control which sections show up and in what order.
#! /perl/bin/perl -w

use strict;
use CGI();
use CGI::Carp qw(fatalsToBrowser warningsToBrowser);
use LWP::Simple;
use XML::Parser;

use vars qw(
    $q
    $TITLE
    $URL
    $NODETYPE_TITLES
    $NODETYPES_TO_SHOW_FIRST
    $NODETYPES_TO_IGNORE
    $SHOW_AUTHOR
    $xml_source_link
    $nodes_by_type $current_node $current_node_char
    $authors
    $info
);

main();

sub main {
    init();
    get_and_parse_xml();
    print
          $q->header
        , $q->start_html({-title => $TITLE})
        , $q->h1($TITLE)
        , $q->i($info->{cdata})
        , (' ' x 4) . $xml_source_link
    ;
    nodes_to_html();
    ignored_nodes_html();
    print $q->end_html;
}

sub init {
    $| = 1;
    $q = CGI->new;
    $URL = "http://www.perlmonks.org/index.pl?node=Newest%20Nodes%20XM
+L%20Generator";
    $xml_source_link = $q->a({-href => "view-source:$URL"}, 'View the 
+XML Source');
    $TITLE = "Perl Monks - Newest Nodes via XML";
    $NODETYPE_TITLES = {
          perlquestion => "Questions"
        , perlmeditation => "Meditations"
        , user => "Users"
        , note => "Notes"
        , sourcecode => "Code"
        , CUFP => "Cool Uses For Perl"
        , poem => "Poetry"
        , perltutorial => "Tutorials"
        , modulereview => "Module Reviews"
        , obfuscated => "Obfuscation"
        , "categorized question" => "Categorized Questions"
        , "categorized answer" => "Categorized Answers"
        , "monkdiscuss" => "Discussion"
    };
    $NODETYPES_TO_SHOW_FIRST = [
          "perlquestion"
        , "perlmeditation"
        , "monkdiscuss"
        , "CUFP"
        , "sourcecode"
        , "modulereview"
        , "poem"
        , "categorized question"
        , "categorized answer"
    ];
    $NODETYPES_TO_IGNORE = [
          "user"
        , "note"
        , "obfuscated"
    ];
    $SHOW_AUTHOR = 1;
    $nodes_by_type = {};
    $authors = {};
}

####
# subs for parsing xml, handling tags, etc.
####

sub get_and_parse_xml {
    my $xml = get($URL) || die " couldn't get xml";
    my $p = XML::Parser->new (
        Handlers =>{
        Start => \&start_tag,
        End   => \&end_tag,
        Char  => \&char_data
    },) || die "Couldn't create new Parser";
    eval { $p->parse($xml) };
    die "couldn't parse: $@" if $@;
}
sub start_tag {
    my ($p, $el, %attrs) = @_;
    $current_node = {};
    %$current_node = %attrs;
    $current_node_char = '';
}
sub end_tag {
    my ($p, $el) = @_;
    $current_node->{cdata} = $current_node_char;
    INFO_tag() if $el eq 'INFO';
    NODE_tag() if $el eq 'NODE';
    AUTHOR_tag() if $el eq 'AUTHOR';
}
sub char_data {
    my ($p, $string) = @_;
    $current_node_char .= $string;
}
sub INFO_tag {
    $info = $current_node;
}
sub NODE_tag {
    my $nodetype = $current_node->{nodetype};
    push(@{$nodes_by_type->{$nodetype}}, $current_node);    
}
sub AUTHOR_tag {
    my $author_id = $current_node->{node_id};
    my $author_name = $current_node->{cdata};
    $authors->{$author_id} = $author_name;
}

###
# subs for taking parsed nodes and generating html
####

sub nodes_to_html {
    my $html = '';
    foreach my $nodetype (@$NODETYPES_TO_SHOW_FIRST) {
        $html .= html_by_type($nodetype);
        delete $nodes_by_type->{$nodetype};
    }
    foreach my $nodetype (@$NODETYPES_TO_IGNORE) {
        delete $nodes_by_type->{$nodetype};
    }
    foreach my $nodetype (keys %$nodes_by_type) {
        $html .= html_by_type($nodetype);
    }
}

sub html_by_type {
    my $nodetype = shift;
    print $q->h2( node_title($nodetype) );
    my $nodes = $nodes_by_type->{$nodetype} || [];
    unless (@$nodes) {
        print $q->ul($q->li('No nodes') );
        return;
    }
    my $url_base = 'http://www.perlmonks.org/?node_id=';
    print '<ul>';
    foreach my $node (@$nodes) {
        my $link = $q->a({-href => "$url_base$node->{node_id}"}, $node
+->{cdata});
        my $node_text = $q->li($link);
        my $author = $authors->{$node->{author_user}};
        $node_text .= ('&nbsp;' x 2) . $q->small(" by $author") if $SH
+OW_AUTHOR;
        print $node_text;
    }
    print '</ul>';
}

sub ignored_nodes_html {
    my $html = '';
    foreach my $ignore_type (@$NODETYPES_TO_IGNORE) {
        $html .= $q->li( node_title($ignore_type) );
    }
    return unless $html;
    print $q->h2('Ignored Node Types')
        , $q->ul($html)
    ;
}

sub node_title { $NODETYPE_TITLES->{$_[0]} || $_[0] }
Replies are listed 'Best First'.
Re: CGI Script for Reading Newest Nodes
by epoptai (Curate) on Jun 14, 2001 at 07:57 UTC
    Looking good voyager. Here's a small patch that will convert any UTF-8 extended ascii from XML::Parser to latin1 so odd nodetitles or usernames look right.

    Example nodetitle from today's new nodes:

    Before: Reactionary Coding—One-Shot Programs
    After : Reactionary Coding—One-Shot Programs

    Add this conversion subroutine:

    sub UTF8_latin1 { # UTF-8 to latin1 regex from XML::TiePYX (thanks to mirod) my($text) = @_; $text =~ s{([\xc0-\xc3])(.)}{ my $hi = ord($1); my $lo = ord($2); chr((($hi & 0x03) <<6) | ($lo & 0x3F)) }ge; return $text; }
    Add two lines to the foreach loop in sub html_by_type:
    foreach my $node (@$nodes) { $node->{cdata} = UTF8_latin1($node->{cdata}); # ADDED my $link = $q->a({-href => "$url_base$node->{node_id}"}, $node +->{cdata}); my $node_text = $q->li($link); my $author = $authors->{$node->{author_user}}; $author = UTF8_latin1($author); # ADDED $node_text .= ('&nbsp;' x 2) . $q->small(" by $author") if $SH +OW_AUTHOR; print $node_text; }
    Enjoy.

    --
    Check out my Perlmonks Related Scripts like framechat, reputer, and xNN.

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: sourcecode [id://88207]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others musing on the Monastery: (5)
As of 2024-03-29 12:33 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found