#!/usr/bin/perl use HTML::LinkExtor; use Getopt::Std ; getopts('b:t:'); @ARGV = '-' unless @ARGV; for my $file (@ARGV) { extract($file); } sub extract { my $file = shift; unless (open F, "< $file") { warn "Couldn't open file $file: $!; skipping\n"; return; } my $p = HTML::LinkExtor->new(undef, $opt_b); while (read F, my $buf, 8192) { $p->parse($buf); } for my $ln ($p->links) { my @ln = @$ln; my $tag = shift @ln; next if $opt_t && lc($opt_t) ne lc($tag); while (@ln) { shift @ln; my $url = shift @ln; print $url, "\n" unless $seen{$url}++; } } } ##

##

  for i in `cat reddit`; do
    GET $i | grep -i title
  done

##

##

#!/usr/bin/perl

use HTML::TreeBuilder;

my @tags = @ARGV;

my $tree = HTML::TreeBuilder->new; # empty tree
$tree->parse_file(\*STDIN);
my @elements = $tree->find(@tags);

for (@elements) {
  my $s = $_->as_text;
  $s =~ tr/\n/ /;
  print "$s\n";
}

##

##

  for i in `cat reddit`; do
    GET $i | htmlx title
  done