use strict;
use warnings;
use feature qw( say );
use LWP::UserAgent qw( );
use URI qw( );
use URI::file qw( );
use XML::LibXML qw( );
use XML::LibXML::XPathContext qw( );
my $parser = XML::LibXML->new();
my $xpc = XML::LibXML::XPathContext->new();
$xpc->registerNs('xi', 'http://www.w3.org/2001/XInclude');
my $ua = LWP::UserAgent->new();
my $root_url = URI->new_abs($ARGV[0], URI::file->cwd());
my @todo = $root_url;
my %found;
while (@todo) {
my $url = pop(@todo);
my $response = $ua->get($url);
if (!$response->is_success()) {
warn("Can't get $url: " . $response->status_line() . "\n");
next;
}
my $xml = $response->decoded_content( charset => 'none' );
my $doc = $parser->parse_string($xml);
for ($xpc->findnodes('//xi:include/@href', $doc)) {
my $child_url = URI->new_abs($_->getValue(), $url);
push @todo, $child_url
if !$found{$child_url}++;
}
}
say for sort keys %found;
Update: Fixed constructor. Made url absolute as required.
|