chariscomp has asked for the wisdom of the Perl Monks concerning the following question:
<div class="content"> <span editable="true" id="nvumaincontent">stuff</span> <span editable="true" optional="true" id="nvutest5"><span style="backg +round: red;">more stuff</span><p>test</p></span><span editable="true" id="nvu56">even more stuff <!-- this is the beginning of a comment --> </span> <div editable="true" optional="true" repeatable="true" movable="true" id="nvutest43">incredible boat loads of stuff <!-- this is another comment --> </div> <div editable="true" id="anotherblock4">an unbelievable quantity of stuff! <!-- yet another comment --> <div id="newtest">Yo, dude!</div> </div> <!-- end main content --> </div>
use File::Find; use strict; use HTML::TokeParser::Simple; #my $new_folder = 'new_html/'; my @html_docs = "test5.html"; our $spancontents=""; my @files; my $ByteCount=0; my $filelist=""; my $isflagon=0; my $idflag; my %spancontents; my $templatelocation; my $currentdoc; foreach my $doc ( @html_docs ) { $currentdoc=$doc; my $p = HTML::TokeParser::Simple->new( file => $doc ); while ( my $token = $p->get_token ) { if ($token->is_start_tag('span') or $token->is_start_tag('div' +)) { if ($token->get_attr('editable')=~/true/) { $isflagon=1; $idflag=$token->get_attr('id'); } } if ( ($token->is_start_tag('span') and $isflagon) .. $token->i +s_end_tag('span') and $isflagon){ my $text=$token->as_is; $spancontents.=$text.","; #next; } if ( ($token->is_start_tag('div') and $isflagon) .. $token->is_end +_tag('div')){ my $text=$token->as_is; $spancontents.=$text.","; #next; #not sure if needed, seems to mess things up } if (($token->is_end_tag('span') or $token->is_end_tag('div')) +and $isflagon) { $isflagon=0; #$spancontents.=$token->as_is.","; #not sure if needed, seems +to mess things up $spancontents{"$idflag"}.=$spancontents; $spancontents=""; } if ($token->is_start_tag('html')) { my $attrs=$token->get_attr('templateref'); $templatelocation=$attrs; } } } print "\n\n\n"; foreach my $value (keys %spancontents) { print "value is $value\n"; print "\nMy $value = $spancontents{$value} \n\n------------------ +-------\n"; }
value is anotherblock4 My anotherblock4 = <div editable="true" id="anotherblock4">,an, unbeli +evable qua ntity of stuff! ,<!-- yet another comment -->, ,<div id="newtest">,Yo, dude!,</div>, ------------------------- value is nvutest43 My nvutest43 = <div editable="true" optional="true" repeatable="true" +movable="t rue" id="nvutest43">,incredible boat loads of stuff ,<!-- this is another comment -->, ,</div>, ------------------------- value is nvutest5 My nvutest5 = <span editable="true" optional="true" id="nvutest5">,<sp +an style=" background: red;">,more stuff,</span>, ------------------------- value is nvumaincontent My nvumaincontent = <span editable="true" id="nvumaincontent">,stuff,< +/span>, ------------------------- value is nvu56 My nvu56 = <span editable="true" id="nvu56">,even more stuff ,<!-- this is the beginning of a comment -->, ,</span>, -------------------------
Edit by castaway - Added readmore tags
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: Question regarding use of HTML::Tokeparser::Simple
by tphyahoo (Vicar) on May 17, 2005 at 08:32 UTC |