use HTML::TreeBuilder qw( ); use HTML::TreeBuilder::XPath qw( ); my $html = do{local $/; <DATA>}; my $tree = HTML::TreeBuilder->new_from_content($html); my @results = $tree->findnodes( '//li/*[1][name()="span"]/@class/parent::*/parent::*' ); foreach my $li (@results) { # $li holds the HTML::Element object for the LI. ... } $tree->delete();
It's a little bit tricky to serialize the LI without the LI start tag, the SPAN element and the LI end tag.
use HTML::Entities qw( encode_entities ); sub node_as_html { my ($node) = @_; if (ref($node)) { my $html = $node->as_HTML(undef, undef, {}); chomp($html); return $html; } else { return encode_entities($node); } } ... my @children = $li->content_list(); shift(@children); # Skip SPAN print(node_as_html($_)) foreach @children; print("\n"); ...
In reply to Re: simple regex help
by ikegami
in thread simple regex help
by nmerriweather
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |