#!/usr/bin/perl -w
use strict;
use HTML::TagParser;
use URI::Fetch;
# Take list of URLs like
# http://everything2.com/user/ameriwire/writeups
# and extract specific writeup URLs: "(thing)"
# (Have to manually add multiple pages of WUs)
my $infile = $ARGV[0];
# Give URL list file in first arg
my $outfile = $ARGV[1];
# Give output file in second arg
my $outfh;
open (my $infh, '<', $infile) or die "Could not open file '$infile' $!";
while (my $line = <$infh>) {
chomp ($line);
my $class = "type"; # .type
my $html = HTML::TagParser->new($line); #Fetch+parse HTML file
my @elem = $html->getElementsByClassName($class); #Grab each instance of .type into array
# (person)
foreach (@elem) { # iterate through array
my $child = $_->firstChild(); # = under
my $ahref = $child->getAttribute("href"); # return value of attrib href
my $wup = "http://everything2.com" . $ahref . "\n"; # "writeup"
print "http://everything2.com" . $ahref . "\n";
open ($outfh, '>>', $outfile) or die "Could not open file '$outfile' $!";
print $outfh $wup; # Text to file
close $outfh;
print "Wrote to " . $outfile . "\n";
}
}