in reply to Re^3: Parsing HTML/XML with Regular Expressions
in thread Parsing HTML/XML with Regular Expressions
Could it just be a version issue? The code you posted works fine for me:
use warnings; use strict; use XML::LibXML; my $XML = <<'_END_XML_'; <?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE html [ <!ENTITY atad "data"> ] > <html xmlns="http://www.w3.org/1999/xhtml"> <div class="&atad;" id="Hello" /> <div class="&atad;" id="World" /> </html> _END_XML_ print $XML::LibXML::VERSION, " ", XML::LibXML::LIBXML_DOTTED_VERSION, " ", XML::LibXML::LIBXML_VERSION, " ", XML::LibXML::LIBXML_RUNTIME_VERSION, "\n"; my $dom = XML::LibXML->load_xml(string=>$XML); my $xpc = XML::LibXML::XPathContext->new; $xpc->registerNs(xh => 'http://www.w3.org/1999/xhtml'); for my $div ($xpc->findnodes('//xh:div[@class="data"]', $dom)) { print "1:", $div->{id}, "\n" } for my $div ($xpc->findnodes('//xh:div', $dom)) { print "2:", $div->{id}, " ", $div->{class}, "\n" } __END__ 2.0129 2.9.1 20901 20901 1:Hello 1:World 2:Hello data 2:World data
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^5: Parsing HTML/XML with Regular Expressions
by choroba (Cardinal) on Oct 18, 2017 at 22:13 UTC | |
by haukex (Archbishop) on Oct 18, 2017 at 22:54 UTC | |
by choroba (Cardinal) on Oct 19, 2017 at 10:02 UTC |