THRAK has asked for the wisdom of the Perl Monks concerning the following question:
#!/usr/bin/perl -w use strict; use warnings; use Data::Dumper; use HTML::TokeParser::Simple; $|++; #--- Variables my $file = 'test2.php'; my $line_number; print "#--- TokeParser::Simple String\n"; $line_number = 0; open (FH, "<$file") or die "Unable to open $file $!\n"; while (<FH>) { chomp; $line_number++; my $p = HTML::TokeParser::Simple->new(string => $_); while (my $token = $p->get_token) { if ($token->is_pi) { print $line_number . 'P: ' . $token->get_token0 . "\n"; } if ($token->is_comment) { print $line_number . 'C: ' . $token->as_is . "\n"; } } } close (FH); ############################## print "\n#--- TokeParser::Filehandle\n"; my $fh; open ($fh, "<$file") or die "Unable to open $file $!\n"; $line_number++; my $p = HTML::TokeParser::Simple->new(handle => $fh); while (my $token = $p->get_token) { if ($token->is_pi) { print $line_number . 'P: ' . $token->get_token0() . "\n"; } if ($token->is_comment) { print $line_number . 'Pc: ' . $token->as_is . "\n"; } } close ($fh); ############################## print "\n#--- TokeParser\n"; $line_number = 0; open (FH, "<$file") or die "Unable to open $file $!\n"; while (<FH>) { chomp; $line_number++; print "LINE: $line_number ********\n"; my $p = HTML::TokeParser->new(\$_); while (my $token = $p->get_token) { print Dumper($token) . "\n"; } } close (FH); __END__
And the output I get:<? /**** $Id$ ****/ ?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http:/ +/www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <HTML> <BODY> <P>Some text<BR /> </P> <p><? echo U($date);?></p> <? echo 'This is from PHP.'; ?> <h1><? echo $h1; ?></h1> <?php ?> </body> </html>
#--- TokeParser::Simple String 1C: <? /**** $Id$ ****/ 9P: echo U($date);? 10C: <? 13P: echo $h1; ? 14C: <?php #--- TokeParser::Filehandle 18P: /**** $Id$ ****/ ? 18P: echo U($date);? 18P: echo 'This is from PHP.'; ? 18P: echo $h1; ? 18P: php ? #--- TokeParser LINE: 1 ******** $VAR1 = [ 'C', '<? /**** $Id$ ****/' ]; --- SNIP! --- LINE: 10 ******** $VAR1 = [ 'C', '<?' ]; --- SNIP! --- LINE: 14 ******** $VAR1 = [ 'C', '<?php' ]; --- SNIP! --- This shows HTML::TokeParser sees those three lines as comments not as +PI's.
2005-02-24 Janitored by Arunbear - added readmore tags, as per Monastery guidelines
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: File parsing with HTML::TokeParser::Simple
by Ovid (Cardinal) on Feb 24, 2005 at 20:08 UTC | |
by THRAK (Monk) on Feb 24, 2005 at 20:19 UTC |