use XML::LibXML; my $parser = XML::LibXML->new(recover => 2); my $xmltree = $parser->parse_html_string($html); my @nodes = $xmltree->getElementsByTagName('h1'); #### use XML::LibXML::Reader; my $reader = XML::LibXML::Reader->new(string => $html, recover => 2); while ($reader->read) { processNode($reader); } sub processNode { my $reader = shift; printf "%d %d %s %s\n", ($reader->depth, $reader->nodeType, $reader->name, $reader->value); } #### 7 8 #comment The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags Entity: line 21: parser error : Opening and ending tag mismatch: link line 20 and head ^