#!/usr/bin/perl -w use strict; use HTML::TokeParser; my $string = qq, clear text clear text ,; printf "\" appears %s times in this string\n", ($string)=~ y/"/"/; my $p = HTML::TokeParser->new(\$string); die $! unless $p; while (my $token = $p->get_token) { # ["S", $tag, %$attr, @$attrseq, $origtext] if( ($token->[0] eq 'S') ) # is it a starting link tag { my ($typeotag, $tag, $attr, $attrseq, $origtext)=@$token; print "this start tag is ($tag)\n"; print "its attribues are (in original sequence):\n"; printf "(%s)=(%s)\n",$_,$attr->{$_} for(@$attrseq); print "\n\n-- a start tag no more --\n\n"; } else { printf "something else (%s)\n\t\t(%s)\n", @{$token}; } } __END__ F:\dev\HTML_Tokeparser_Tutorial>perl liar.pl " appears 4 times in this string something else (T) ( clear text ) this start tag is (tag) its attribues are (in original sequence): (var1)=(..) (var2)=(..>..) -- a start tag no more -- something else (T) ( clear text) something else (T) ( ) F:\dev\HTML_Tokeparser_Tutorial>