#!/usr/bin/perl
use HTML::LinkExtor;
use Getopt::Std ;
getopts('b:t:');
@ARGV = '-' unless @ARGV;
for my $file (@ARGV) {
extract($file);
}
sub extract {
my $file = shift;
unless (open F, "< $file") {
warn "Couldn't open file $file: $!; skipping\n";
return;
}
my $p = HTML::LinkExtor->new(undef, $opt_b);
while (read F, my $buf, 8192) {
$p->parse($buf);
}
for my $ln ($p->links) {
my @ln = @$ln;
my $tag = shift @ln;
next if $opt_t && lc($opt_t) ne lc($tag);
while (@ln) {
shift @ln;
my $url = shift @ln;
print $url, "\n" unless $seen{$url}++;
}
}
}