#!/usr/bin/perl -w use strict; use HTML::TagParser; use URI::Fetch; # Take list of URLs like # http://everything2.com/user/ameriwire/writeups # and extract specific writeup URLs: "(thing)" # (Have to manually add multiple pages of WUs) my $infile = $ARGV[0]; # Give URL list file in first arg my $outfile = $ARGV[1]; # Give output file in second arg my $outfh; open (my $infh, '<', $infile) or die "Could not open file '$infile' $!"; while (my $line = <$infh>) { chomp ($line); my $class = "type"; # .type my $html = HTML::TagParser->new($line); #Fetch+parse HTML file my @elem = $html->getElementsByClassName($class); #Grab each instance of .type into array # (person) foreach (@elem) { # iterate through array my $child = $_->firstChild(); # = under my $ahref = $child->getAttribute("href"); # return value of attrib href my $wup = "http://everything2.com" . $ahref . "\n"; # "writeup" print "http://everything2.com" . $ahref . "\n"; open ($outfh, '>>', $outfile) or die "Could not open file '$outfile' $!"; print $outfh $wup; # Text to file close $outfh; print "Wrote to " . $outfile . "\n"; } }