#!/usr/bin/perl use strict; use warnings; use HTML::TreeBuilder; use LWP::Simple; use URI; use Getopt::Std; my %opts; getopts('i', \%opts); my ($tag, $href) = exists $opts{i} ? ('img', 'src') : ('a', 'href'); if (@ARGV < 1 or @ARGV > 2) { die "Invalid number of arguments"; } my ($url, $regex) = @ARGV; my $uri = URI->new($url); my $tree; $regex ||= '.'; if (-f $url) { $tree = HTML::TreeBuilder->new_from_file($url); } else { my $content = get($uri); die unless defined $content; $tree = HTML::TreeBuilder->new_from_content($content); } die unless defined $tree; foreach my $link ($tree->look_down(_tag => $tag, $href => qr{$regex})) { my $link_url = URI->new_abs($link->attr($href), $uri); print $link_url->as_string, "\n"; }
In reply to Link extractor by marcelo.magallon
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |