##
####
use warnings;
use strict;
use utf8;
use XML::LibXML;
use URI;
use HTTP::Tiny;
my $http = HTTP::Tiny->new;
my %cache;
XML::LibXML::externalEntityLoader(sub {
my ($url, $id) = @_;
die "Can't handle ID '$id'" if length $id;
my $uri = URI->new($url);
my $file;
if (!$uri->scheme) { $file = $url }
elsif ($uri->scheme eq 'file') { $file = $uri->path }
if (defined $file) {
warn "'$uri' => Loading '$file' from disk\n"; #Debug
open my $fh, '<', $file or die "$file: $!";
my $data = do { local $/; <$fh> };
close $fh;
return $data;
} # else
die "Can't handle URL scheme: ".$uri->scheme
unless $uri->scheme=~/\Ahttps?\z/i;
if (!defined $cache{$uri}) {
warn "'$uri' => Fetching...\n"; #Debug
my $resp = $http->get($uri);
die "$uri: $resp->{status} $resp->{reason}\n"
unless $resp->{success};
$cache{$uri} = $resp->{content};
}
else { warn "'$uri' => Cached\n"; } #Debug
return $cache{$uri};
});
print "Loading schema...\n";
my $xsd = XML::LibXML::Schema->new( location => 'schema.xsd' );
my @xmls = (<<'END_XML_ONE',<<'END_XML_TWO',<<'END_XML_THREE');
x
END_XML_ONE
x
END_XML_TWO
x
END_XML_THREE
my $i = 1;
for my $xml (@xmls) {
print "Validating XML #$i...\n";
my $doc = XML::LibXML->load_xml( string => $xml );
if ( eval { $xsd->validate($doc); 1 } )
{ print "=> Valid!\n" }
else { print "=> Invalid! $@" }
} continue { $i++ }
####
Loading schema...
'schema.xsd' => Loading 'schema.xsd' from disk
'included.xsd' => Loading 'included.xsd' from disk
'http://www.w3.org/2002/08/xhtml/xhtml1-transitional.xsd' => Fetching...
'http://www.w3.org/2001/xml.xsd' => Fetching...
Validating XML #1...
=> Valid!
Validating XML #2...
=> Invalid! unknown-137e570:0: Schemas validity error : Element '{http://www.example.com}world': The attribute 'foo' is required but missing.
Validating XML #3...
=> Invalid! unknown-137e570:0: Schemas validity error : Element '{http://www.w3.org/1999/xhtml}foo': This element is not expected. Expected is one of ( {http://www.w3.org/1999/xhtml}a, {http://www.w3.org/1999/xhtml}br, {http://www.w3.org/1999/xhtml}span, {http://www.w3.org/1999/xhtml}bdo, {http://www.w3.org/1999/xhtml}object, {http://www.w3.org/1999/xhtml}applet, {http://www.w3.org/1999/xhtml}img, {http://www.w3.org/1999/xhtml}map, {http://www.w3.org/1999/xhtml}iframe, {http://www.w3.org/1999/xhtml}tt ).
####
use warnings;
use strict;
use XML::LibXML;
use HTTP::Tiny;
use URI;
my $parser = XML::LibXML->new;
my $cb = XML::LibXML::InputCallback->new;
my $http = HTTP::Tiny->new;
my %cache;
$cb->register_callbacks([
sub { 1 }, # match (URI), returns Bool
sub { # open (URI), returns Handle
my $uri = URI->new($_[0]);
my $file;
#warn "Handling <<$uri>>\n"; #Debug
if (!$uri->scheme) { $file = $_[0] }
elsif ($uri->scheme eq 'file') { $file = $uri->path }
elsif ($uri->scheme=~/\Ahttps?\z/i) {
if (!defined $cache{$uri}) {
my $resp = $http->get($uri);
die "$uri: $resp->{status} $resp->{reason}\n"
unless $resp->{success};
$cache{$uri} = $resp->{content};
}
$file = \$cache{$uri};
}
else { die "unsupported URL scheme: ".$uri->scheme }
open my $fh, '<', $file or die "$file: $!";
return $fh;
},
sub { # read (Handle,Length), returns Data
my ($fh,$len) = @_;
read($fh, my $buf, $len);
return $buf;
},
sub { close shift } # close (Handle)
]);
$parser->input_callbacks($cb);
my $doc = $parser->load_xml( IO => \*DATA );
print "Is valid: ", $doc->is_valid ? "yes" : "no", "\n";
__DATA__
]>
1
XXXX
&icon.url;
PubMed
1234567890
&base.url;
/1/
####
my $CACHE_DIR = '/tmp/xmlcache';
use File::Path qw/make_path/;
make_path($CACHE_DIR, {verbose=>1});
use URI;
use HTTP::Tiny;
use Text::CleanFragment qw/clean_fragment/;
use File::Spec::Functions qw/catfile/;
my $http = HTTP::Tiny->new;
XML::LibXML::externalEntityLoader(sub {
my ($url, $id) = @_;
die "Can't handle ID '$id'" if length $id;
my $uri = URI->new($url);
my $file;
if (!$uri->scheme) { $file = $url }
elsif ($uri->scheme eq 'file') { $file = $uri->path }
elsif ($uri->scheme=~/\Ahttps?\z/i) {
# Note there is a (tiny) chance of filename collisions here!
$file = catfile($CACHE_DIR, clean_fragment("$uri"));
if (!-e $file) {
warn "'$uri' => Mirroring to '$file'...\n"; #Debug
my $resp = $http->mirror($uri, "$file");
die "$uri: $resp->{status} $resp->{reason}\n"
unless $resp->{success};
}
}
else { die "Can't handle URL scheme: ".$uri->scheme }
warn "'$uri' => Loading '$file' from disk\n"; #Debug
open my $fh, '<', $file or die "$file: $!";
my $data = do { local $/; <$fh> };
close $fh;
return $data;
});