#### #### use warnings; use strict; use utf8; use XML::LibXML; use URI; use HTTP::Tiny; my $http = HTTP::Tiny->new; my %cache; XML::LibXML::externalEntityLoader(sub { my ($url, $id) = @_; die "Can't handle ID '$id'" if length $id; my $uri = URI->new($url); my $file; if (!$uri->scheme) { $file = $url } elsif ($uri->scheme eq 'file') { $file = $uri->path } if (defined $file) { warn "'$uri' => Loading '$file' from disk\n"; #Debug open my $fh, '<', $file or die "$file: $!"; my $data = do { local $/; <$fh> }; close $fh; return $data; } # else die "Can't handle URL scheme: ".$uri->scheme unless $uri->scheme=~/\Ahttps?\z/i; if (!defined $cache{$uri}) { warn "'$uri' => Fetching...\n"; #Debug my $resp = $http->get($uri); die "$uri: $resp->{status} $resp->{reason}\n" unless $resp->{success}; $cache{$uri} = $resp->{content}; } else { warn "'$uri' => Cached\n"; } #Debug return $cache{$uri}; }); print "Loading schema...\n"; my $xsd = XML::LibXML::Schema->new( location => 'schema.xsd' ); my @xmls = (<<'END_XML_ONE',<<'END_XML_TWO',<<'END_XML_THREE');

x

END_XML_ONE

x

END_XML_TWO

x

END_XML_THREE my $i = 1; for my $xml (@xmls) { print "Validating XML #$i...\n"; my $doc = XML::LibXML->load_xml( string => $xml ); if ( eval { $xsd->validate($doc); 1 } ) { print "=> Valid!\n" } else { print "=> Invalid! $@" } } continue { $i++ } ##
## Loading schema... 'schema.xsd' => Loading 'schema.xsd' from disk 'included.xsd' => Loading 'included.xsd' from disk 'http://www.w3.org/2002/08/xhtml/xhtml1-transitional.xsd' => Fetching... 'http://www.w3.org/2001/xml.xsd' => Fetching... Validating XML #1... => Valid! Validating XML #2... => Invalid! unknown-137e570:0: Schemas validity error : Element '{http://www.example.com}world': The attribute 'foo' is required but missing. Validating XML #3... => Invalid! unknown-137e570:0: Schemas validity error : Element '{http://www.w3.org/1999/xhtml}foo': This element is not expected. Expected is one of ( {http://www.w3.org/1999/xhtml}a, {http://www.w3.org/1999/xhtml}br, {http://www.w3.org/1999/xhtml}span, {http://www.w3.org/1999/xhtml}bdo, {http://www.w3.org/1999/xhtml}object, {http://www.w3.org/1999/xhtml}applet, {http://www.w3.org/1999/xhtml}img, {http://www.w3.org/1999/xhtml}map, {http://www.w3.org/1999/xhtml}iframe, {http://www.w3.org/1999/xhtml}tt ). #### use warnings; use strict; use XML::LibXML; use HTTP::Tiny; use URI; my $parser = XML::LibXML->new; my $cb = XML::LibXML::InputCallback->new; my $http = HTTP::Tiny->new; my %cache; $cb->register_callbacks([ sub { 1 }, # match (URI), returns Bool sub { # open (URI), returns Handle my $uri = URI->new($_[0]); my $file; #warn "Handling <<$uri>>\n"; #Debug if (!$uri->scheme) { $file = $_[0] } elsif ($uri->scheme eq 'file') { $file = $uri->path } elsif ($uri->scheme=~/\Ahttps?\z/i) { if (!defined $cache{$uri}) { my $resp = $http->get($uri); die "$uri: $resp->{status} $resp->{reason}\n" unless $resp->{success}; $cache{$uri} = $resp->{content}; } $file = \$cache{$uri}; } else { die "unsupported URL scheme: ".$uri->scheme } open my $fh, '<', $file or die "$file: $!"; return $fh; }, sub { # read (Handle,Length), returns Data my ($fh,$len) = @_; read($fh, my $buf, $len); return $buf; }, sub { close shift } # close (Handle) ]); $parser->input_callbacks($cb); my $doc = $parser->load_xml( IO => \*DATA ); print "Is valid: ", $doc->is_valid ? "yes" : "no", "\n"; __DATA__ ]> 1 XXXX &icon.url; PubMed 1234567890 &base.url; /1/ #### my $CACHE_DIR = '/tmp/xmlcache'; use File::Path qw/make_path/; make_path($CACHE_DIR, {verbose=>1}); use URI; use HTTP::Tiny; use Text::CleanFragment qw/clean_fragment/; use File::Spec::Functions qw/catfile/; my $http = HTTP::Tiny->new; XML::LibXML::externalEntityLoader(sub { my ($url, $id) = @_; die "Can't handle ID '$id'" if length $id; my $uri = URI->new($url); my $file; if (!$uri->scheme) { $file = $url } elsif ($uri->scheme eq 'file') { $file = $uri->path } elsif ($uri->scheme=~/\Ahttps?\z/i) { # Note there is a (tiny) chance of filename collisions here! $file = catfile($CACHE_DIR, clean_fragment("$uri")); if (!-e $file) { warn "'$uri' => Mirroring to '$file'...\n"; #Debug my $resp = $http->mirror($uri, "$file"); die "$uri: $resp->{status} $resp->{reason}\n" unless $resp->{success}; } } else { die "Can't handle URL scheme: ".$uri->scheme } warn "'$uri' => Loading '$file' from disk\n"; #Debug open my $fh, '<', $file or die "$file: $!"; my $data = do { local $/; <$fh> }; close $fh; return $data; });