Re: XML::LibXML question: How to list XInclude files, which are supposed to be included?
by toolic (Bishop) on Jul 06, 2011 at 17:11 UTC
|
I've never used it or even heard of it, but a quick search of CPAN turns up XML::LibXML::Parser:
# Processing XInclude
$parser->process_xincludes( $doc );
$parser->processXIncludes( $doc );
See the tests for an example of its usage. | [reply] [d/l] |
|
|
| [reply] |
|
|
use warnings;
use strict;
use XML::Twig;
my $myXML = <<EOF;
<?xml version="1.0"?>
<reports xmlns:xi="http://www.w3.org/2001/XInclude">
<xi:include href="xinclude.xml" parse='xml' />
</reports>
EOF
my $t = XML::Twig->new(twig_handlers => {'xi:include' => sub { print $
+_->att('href') } });
$t->parse($myXML);
__END__
xinclude.xml
| [reply] [d/l] |
|
|
|
|
|
|
Re: XML::LibXML question: How to list XInclude files, which are supposed to be included?
by ikegami (Patriarch) on Jul 06, 2011 at 18:33 UTC
|
use strict;
use warnings;
use feature qw( say );
use XML::LibXML qw( );
use XML::LibXML::XPathContext qw( );
my $parser = XML::LibXML->new();
my $doc = $parser->parse_file($qfn);
my $xpc = XML::LibXML::XPathContext->new($doc);
$xpc->registerNs('xi', 'http://www.w3.org/2001/XInclude');
for ($xpc->findnodes('//xi:include/@href')) {
say $_->getValue();
}
Also works,
for ($xpc->findnodes('//xi:include')) {
say $_->getAttribute('href');
}
Update: Fixed constructor. | [reply] [d/l] [select] |
|
|
It won't work if the main XML file includes other XML pieces, which also might include XML pieces. If you parse without XIncluding, then you get these "xi:include/@href" paths - but in this case you won't see second-level (and further) includes
| [reply] |
|
|
| [reply] |
|
|
use strict;
use warnings;
use feature qw( say );
use LWP::UserAgent qw( );
use URI qw( );
use URI::file qw( );
use XML::LibXML qw( );
use XML::LibXML::XPathContext qw( );
my $parser = XML::LibXML->new();
my $xpc = XML::LibXML::XPathContext->new();
$xpc->registerNs('xi', 'http://www.w3.org/2001/XInclude');
my $ua = LWP::UserAgent->new();
my $root_url = URI->new_abs($ARGV[0], URI::file->cwd());
my @todo = $root_url;
my %found;
while (@todo) {
my $url = pop(@todo);
my $response = $ua->get($url);
if (!$response->is_success()) {
warn("Can't get $url: " . $response->status_line() . "\n");
next;
}
my $xml = $response->decoded_content( charset => 'none' );
my $doc = $parser->parse_string($xml);
for ($xpc->findnodes('//xi:include/@href', $doc)) {
my $child_url = URI->new_abs($_->getValue(), $url);
push @todo, $child_url
if !$found{$child_url}++;
}
}
say for sort keys %found;
Update: Fixed constructor. Made url absolute as required.
| [reply] [d/l] |
|
|
|
|
|
|
|
Re: XML::LibXML question: How to list XInclude files, which are supposed to be included?
by AlexFromNJ (Novice) on Jul 07, 2011 at 00:55 UTC
|
I've written a small recursive sub, which looks into the XML file and finds out all the XInclude'd filenames, then calls itself on all these files (it doesn't expand XInclude's)
If any interest, I'll post it here...
Thanks
Alex
| [reply] |
|
|
So did I. How does it differ from mine?
| [reply] |
|
|
my @list = ();
my $parser = XML::LibXML->new();
sub Process($)
{
my $f = shift;
if (-f $f and -r $f)
{
if (scalar(grep($_ eq $f, @list)) == 0)
{
push @list, $f;
my $root = $parser->parse_file($f)->documentElement();
my $prefix = $root->lookupNamespacePrefix('http://www.w3.org/200
+1/XInclude');
if ($prefix)
{
my $ok = 1;
foreach my $xincludeNode ($root->getElementsByTagName("$prefix:inc
+lude"))
{
my $xincludeFilename = File::Spec->rel2abs($xincludeNode->getAtt
+ribute('href'));
$ok &&= Process($xincludeFilename);
}
return $ok;
}
else
{
return 1;
}
}
else
{
return 1;
}
}
else
{
print STDERR "The file '$f' isn't found or not readable\n";
return 0;
}
}
Process(File::Spec->rel2abs($ARGV[0]));
my $list = join("\n", @list);
print $list, "\n";
| [reply] [d/l] |