use strict; use warnings; use feature qw( say ); use LWP::UserAgent qw( ); use URI qw( ); use URI::file qw( ); use XML::LibXML qw( ); use XML::LibXML::XPathContext qw( ); my $parser = XML::LibXML->new(); my $xpc = XML::LibXML::XPathContext->new(); $xpc->registerNs('xi', 'http://www.w3.org/2001/XInclude'); my $ua = LWP::UserAgent->new(); my $root_url = URI->new_abs($ARGV[0], URI::file->cwd()); my @todo = $root_url; my %found; while (@todo) { my $url = pop(@todo); my $response = $ua->get($url); if (!$response->is_success()) { warn("Can't get $url: " . $response->status_line() . "\n"); next; } my $xml = $response->decoded_content( charset => 'none' ); my $doc = $parser->parse_string($xml); for ($xpc->findnodes('//xi:include/@href', $doc)) { my $child_url = URI->new_abs($_->getValue(), $url); push @todo, $child_url if !$found{$child_url}++; } } say for sort keys %found;