in reply to easy HTML::TokeParser help request
#!/usr/bin/perl use strict; use warnings; use HTML::TokeParser; my $doc = do { local $/; <DATA> }; my $p = HTML::TokeParser->new( \$doc ); while ( my $outer = $p->get_tag("div") ) { next unless $outer->[1]{class} eq "full"; my $nested_div = 0; while ( my $inner = $p->get_tag ) { # keep count of nested divs $nested_div++ if $inner->[0] eq "div"; $nested_div-- if $inner->[0] eq "/div"; # "full" div has closed last if $nested_div == -1; print $p->get_text, "\n" if $inner->[0] eq "a"; } } __DATA__ <!-- some other divs and so here --> <div class="full"> <div class="content"> <ul class="topics"> <-- I want extract these links div class "full" only --> <li><a href="foobar">foobar</a></li> <li><a href="foobr2">fobar2</a></li> <li><a href="fobar3">foobr3</a></li> </ul> </div> </div> <div class="otherclass"> <div class="content"> <ul class="topics"> <-- I DO NOT WANT these links --> <li><a href="fbaor">fbaor</a></li> <li><a href="fabar2">fabar2</a></li> <li><a href="fbar3">fbar3</a></li> </ul> </div> </div> <!-- some other divs and so here -->
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: easy HTML::TokeParser help request
by 2ge (Scribe) on Aug 04, 2006 at 12:12 UTC |