use XML::LibXML;
my $parser = XML::LibXML->new(recover => 2);
my $xmltree = $parser->parse_html_string($html);
my @nodes = $xmltree->getElementsByTagName('h1');
####
use XML::LibXML::Reader;
my $reader = XML::LibXML::Reader->new(string => $html, recover => 2);
while ($reader->read) {
processNode($reader);
}
sub processNode {
my $reader = shift;
printf "%d %d %s %s\n", ($reader->depth,
$reader->nodeType,
$reader->name,
$reader->value);
}
##
##
7 8 #comment The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags
Entity: line 21: parser error : Opening and ending tag mismatch: link line 20 and head
^