#!/usr/bin/perl
use HTML::LinkExtor;
use Getopt::Std ;
getopts('b:t:');
@ARGV = '-' unless @ARGV;
for my $file (@ARGV) {
extract($file);
}
sub extract {
my $file = shift;
unless (open F, "< $file") {
warn "Couldn't open file $file: $!; skipping\n";
return;
}
my $p = HTML::LinkExtor->new(undef, $opt_b);
while (read F, my $buf, 8192) {
$p->parse($buf);
}
for my $ln ($p->links) {
my @ln = @$ln;
my $tag = shift @ln;
next if $opt_t && lc($opt_t) ne lc($tag);
while (@ln) {
shift @ln;
my $url = shift @ln;
print $url, "\n" unless $seen{$url}++;
}
}
}
####
for i in `cat reddit`; do
GET $i | grep -i title
done
##
##
#!/usr/bin/perl
use HTML::TreeBuilder;
my @tags = @ARGV;
my $tree = HTML::TreeBuilder->new; # empty tree
$tree->parse_file(\*STDIN);
my @elements = $tree->find(@tags);
for (@elements) {
my $s = $_->as_text;
$s =~ tr/\n/ /;
print "$s\n";
}
##
##
for i in `cat reddit`; do
GET $i | htmlx title
done