This snippet can fetch section code,poetry, obfu, cufp, and snippets from Perlmonks, and then save them in an organized structure.

Thanks

#!/usr/bin/perl use strict; use IO::All; use LWP::Simple; use Data::Dumper; use HTML::Entities; use Template::Extract; my $base = 'http://perlmonks.org/'; my $extor = Template::Extract->new; my $default_next_template = qr'<td align=right width=100><a href="(.+? +)">Next entries--></a>&nbsp;</td></tr></table>'; my $default_desc_template = qr'<data>.+?<field name="doctext">(.+?)</f +ield>.+?</data>'s; my $default_template = <<'.'; [% FOREACH record %][% ... %] <tr class = "post_head">[% ... %] <td>[% ... %] <a HREF="?node_id=[% node_id %]">[% title %]</a><br />[% ... %] on[% ... %] by <a HREF="[% ... %]">[% author %]</a>[% ... %] </td>[% ... %] </tr> [% END %] . my %profile = ( code => { url => $base."?node=Code Catacombs", desc_template => qr'<field name="codedescription">(.+?)</field +>'s, }, poetry => { url => $base.'?node=Perl Poetry', }, cooluses => { url => $base.'?node=Cool Uses for Perl', }, obfu => { url => $base.'?node=Obfuscated Code', }, snippets => { url => $base.'?node=Snippets Section', next_template => qr'&nbsp;&nbsp;<A HREF="(.+?)">Next 20--&gt;</A>' +, desc_template => qr'<field name="snippetdesc">(.+?)</field>'s, template => <<'.', <UL>[% FOREACH record %]<li><a HREF="?node_id=[% node_id %]">[% title +%]</a>[% ... %]by[% ... %]<a HREF="[% ... %]">[% author %]</a>[% END +%]<BR><BR> . }, ); sub clean_data { my $data = shift; $$data =~ s/\r\n/\n/sg; $$data =~ s/^\n+//s; $$data =~ s/\n+$/\n/s; $$data = decode_entities($$data); } mkdir "./perlmonks"; mkdir "./perlmonks/$_" for 0..9; foreach my $p (keys %profile){ while(1){ print "<< $p >>\n"; print $profile{$p}->{url},$/; my $doc = get($profile{$p}{url}); my $rec = $extor->extract( (defined($profile{$p}->{template}) ? $profile{$p}->{template} : $default_template) => $doc ); if(defined($profile{$p}->{next_template})){ $doc =~ m,$profile{$p}->{next_template},; $profile{$p}->{url} = $1 ? $base . $1 : undef; } else { $doc =~ m,$default_next_template,; $profile{$p}->{url} = $1 ? $base . $1 : undef; } my $dup_count = 0; foreach my $r (@{$rec->{record}}){ my $lc = substr($r->{node_id}, length($r->{node_id})-1); if( -e "./perlmonks/$lc/$r->{node_id}" ){ $dup_count++; next; } print '-- ', join(q/ /, @{$r}{qw(title author node_id)}), $/; # CODE my $data = get($base.'?node_id='.$r->{node_id}.';displaytype=dis +playcode'); clean_data(\$data); next unless $data; mkdir "./perlmonks/$lc/$r->{node_id}"; io("./perlmonks/$lc/$r->{node_id}/code")->print($data); # META io("./perlmonks/$lc/$r->{node_id}/meta") ->print(decode_entities($r->{title}).$/.$r->{author}); # DESCRIPTIONS $data = get($base.'?displaytype=xml;node_id='.$r->{node_id}); if($profile{$p}->{desc_template}){ ($data) = ($data =~ m($profile{$p}->{desc_template})); } else { ($data) = ($data =~ m($default_desc_template)); } clean_data(\$data); io("./perlmonks/$lc/$r->{node_id}/desc")->print($data); # last; } if($dup_count > @{$rec->{record}}/2){ print "No further updates from $p\n"; last; } } } __END__

In reply to MonkMirror.pl by xern

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post, it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.