#!/usr/bin/perl -w use strict; use HTML::TagParser; use URI::Fetch; # Take list of URLs like # http://everything2.com/user/ameriwire/writeups # and extract specific writeup URLs: "(thing)" # (Have to manually add multiple pages of WUs) my $infile = $ARGV[0]; # Give URL list file in first arg my $outfile = $ARGV[1]; # Give output file in second arg my $outfh; open (my $infh, '<', $infile) or die "Could not open file '$infile' $!"; while (my $line = <$infh>) { chomp ($line); my $class = "type"; # .type my $html = HTML::TagParser->new($line); #Fetch+parse HTML file my @elem = $html->getElementsByClassName($class); #Grab each instance of .type into array # (person) foreach (@elem) { # iterate through array my $child = $_->firstChild(); # = under my $ahref = $child->getAttribute("href"); # return value of attrib href my $wup = "http://everything2.com" . $ahref . "\n"; # "writeup" print "http://everything2.com" . $ahref . "\n"; open ($outfh, '>>', $outfile) or die "Could not open file '$outfile' $!"; print $outfh $wup; # Text to file close $outfh; print "Wrote to " . $outfile . "\n"; } } #### [v@vonunov ~/perl]$ cat infile.txt http://everything2.com/user/ameriwire/writeups [v@vonunov ~/perl]$ ./get-wus.pl infile.txt outfile.txt http://everything2.com/user/ameriwire/writeups/diverticulosis Wrote to outfile.txt http://everything2.com/user/ameriwire/writeups/W.+Mark+Felt Wrote to outfile.txt http://everything2.com/user/ameriwire/writeups/moral+law Wrote to outfile.txt http://everything2.com/user/ameriwire/writeups/altruism Wrote to outfile.txt [etc.] [v@vonunov ~/perl]$ head outfile.txt http://everything2.com/user/ameriwire/writeups/diverticulosis http://everything2.com/user/ameriwire/writeups/W.+Mark+Felt http://everything2.com/user/ameriwire/writeups/moral+law http://everything2.com/user/ameriwire/writeups/altruism #### #!/usr/bin/perl -w use strict; use HTML::TagParser; use URI::Fetch; # Take list of URLs like # http://everything2.com/user/ameriwire/writeups # and extract specific writeup URLs: "(thing)" # (Have to manually add multiple pages of WUs) my $infile = $ARGV[0]; # Give URL list file in first arg my $outfile = $ARGV[1]; # Give output file in second arg my $outfh; open (my $infh, '<', $infile) or die "Could not open file '$infile' $!"; while (my $line = <$infh>) { chomp ($line); my $class = "type"; # .type my $id = "mainbody"; # #mainbody my $html = HTML::TagParser->new($line); #Fetch+parse HTML file my $body = $html->getElementById($id); #^If we don't do this we get sidebar WUs too my @elem = $body->getElementsByClassName($class); #Grab each instance of .type into array # (person) foreach (@elem) { # iterate through array my $child = $_->firstChild(); # = under my $ahref = $child->getAttribute("href"); # return value of attrib href my $wup = "http://everything2.com" . $ahref . "\n"; # "writeup" print "http://everything2.com" . $ahref . "\n"; open ($outfh, '>>', $outfile) or die "Could not open file '$outfile' $!"; print $outfh $wup; # Text to file close $outfh; print "Wrote to " . $outfile . "\n"; } } #### [v@vonunov ~/perl]$ ./get-wus-bad.pl infile.txt outfile.txt Can't locate object method "getElementsByClassName" via package "HTML::TagParser::Element" at ./get-wus-bad.pl line 31, <$infh. #### [v@vonunov ~/perl]$ ./get-wus-bad.pl infile.txt outfile.txt Can't locate object method "getElementsByClassName" via package "HTML::TagParser::Element" at ./get-wus-bad.pl line 32, <$infh> line 1 (#1) (F) You called a method correctly, and it correctly indicated a package functioning as a class, but that package doesn't define that particular method, nor does any of its base classes. See perlobj. Uncaught exception from user code: Can't locate object method "getElementsByClassName" via package "HTML::TagParser::Element" at ./get-wus-bad.pl line 32.