#!/usr/bin/perl -w
use strict;
use HTML::TokeParser;
my $string = qq,
clear text clear text
,;
printf "\" appears %s times in this string\n", ($string)=~ y/"/"/;
my $p = HTML::TokeParser->new(\$string);
die $! unless $p;
while (my $token = $p->get_token)
{
# ["S", $tag, %$attr, @$attrseq, $origtext]
if( ($token->[0] eq 'S') ) # is it a starting link tag
{
my ($typeotag, $tag, $attr, $attrseq, $origtext)=@$token;
print "this start tag is ($tag)\n";
print "its attribues are (in original sequence):\n";
printf "(%s)=(%s)\n",$_,$attr->{$_} for(@$attrseq);
print "\n\n-- a start tag no more --\n\n";
}
else
{
printf "something else (%s)\n\t\t(%s)\n",
@{$token};
}
}
__END__
F:\dev\HTML_Tokeparser_Tutorial>perl liar.pl
" appears 4 times in this string
something else (T)
(
clear text )
this start tag is (tag)
its attribues are (in original sequence):
(var1)=(..)
(var2)=(..>..)
-- a start tag no more --
something else (T)
( clear text)
something else (T)
(
)
F:\dev\HTML_Tokeparser_Tutorial>