#!/usr/bin/perl -w
use strict;
use HTML::TagParser;
use URI::Fetch;
# Take list of URLs like
# http://everything2.com/user/ameriwire/writeups
# and extract specific writeup URLs: "(thing)"
# (Have to manually add multiple pages of WUs)
my $infile = $ARGV[0];
# Give URL list file in first arg
my $outfile = $ARGV[1];
# Give output file in second arg
my $outfh;
open (my $infh, '<', $infile) or die "Could not open file '$infile' $!";
while (my $line = <$infh>) {
chomp ($line);
my $class = "type"; # .type
my $html = HTML::TagParser->new($line); #Fetch+parse HTML file
my @elem = $html->getElementsByClassName($class); #Grab each instance of .type into array
# (person)
foreach (@elem) { # iterate through array
my $child = $_->firstChild(); # = under
my $ahref = $child->getAttribute("href"); # return value of attrib href
my $wup = "http://everything2.com" . $ahref . "\n"; # "writeup"
print "http://everything2.com" . $ahref . "\n";
open ($outfh, '>>', $outfile) or die "Could not open file '$outfile' $!";
print $outfh $wup; # Text to file
close $outfh;
print "Wrote to " . $outfile . "\n";
}
}
####
[v@vonunov ~/perl]$ cat infile.txt
http://everything2.com/user/ameriwire/writeups
[v@vonunov ~/perl]$ ./get-wus.pl infile.txt outfile.txt
http://everything2.com/user/ameriwire/writeups/diverticulosis
Wrote to outfile.txt
http://everything2.com/user/ameriwire/writeups/W.+Mark+Felt
Wrote to outfile.txt
http://everything2.com/user/ameriwire/writeups/moral+law
Wrote to outfile.txt
http://everything2.com/user/ameriwire/writeups/altruism
Wrote to outfile.txt
[etc.]
[v@vonunov ~/perl]$ head outfile.txt
http://everything2.com/user/ameriwire/writeups/diverticulosis
http://everything2.com/user/ameriwire/writeups/W.+Mark+Felt
http://everything2.com/user/ameriwire/writeups/moral+law
http://everything2.com/user/ameriwire/writeups/altruism
####
#!/usr/bin/perl -w
use strict;
use HTML::TagParser;
use URI::Fetch;
# Take list of URLs like
# http://everything2.com/user/ameriwire/writeups
# and extract specific writeup URLs: "(thing)"
# (Have to manually add multiple pages of WUs)
my $infile = $ARGV[0];
# Give URL list file in first arg
my $outfile = $ARGV[1];
# Give output file in second arg
my $outfh;
open (my $infh, '<', $infile) or die "Could not open file '$infile' $!";
while (my $line = <$infh>) {
chomp ($line);
my $class = "type"; # .type
my $id = "mainbody"; # #mainbody
my $html = HTML::TagParser->new($line); #Fetch+parse HTML file
my $body = $html->getElementById($id);
#^If we don't do this we get sidebar WUs too
my @elem = $body->getElementsByClassName($class); #Grab each instance of .type into array
# (person)
foreach (@elem) { # iterate through array
my $child = $_->firstChild(); # = under
my $ahref = $child->getAttribute("href"); # return value of attrib href
my $wup = "http://everything2.com" . $ahref . "\n"; # "writeup"
print "http://everything2.com" . $ahref . "\n";
open ($outfh, '>>', $outfile) or die "Could not open file '$outfile' $!";
print $outfh $wup; # Text to file
close $outfh;
print "Wrote to " . $outfile . "\n";
}
}
####
[v@vonunov ~/perl]$ ./get-wus-bad.pl infile.txt outfile.txt
Can't locate object method "getElementsByClassName" via package "HTML::TagParser::Element" at ./get-wus-bad.pl line 31, <$infh.
####
[v@vonunov ~/perl]$ ./get-wus-bad.pl infile.txt outfile.txt
Can't locate object method "getElementsByClassName" via package
"HTML::TagParser::Element" at ./get-wus-bad.pl line 32, <$infh> line 1 (#1)
(F) You called a method correctly, and it correctly indicated a package
functioning as a class, but that package doesn't define that particular
method, nor does any of its base classes. See perlobj.
Uncaught exception from user code:
Can't locate object method "getElementsByClassName" via package "HTML::TagParser::Element" at ./get-wus-bad.pl line 32.