##
use HTML::TokeParser::Simple;
use HTML::Tagset;
my $p = HTML::TokeParser::Simple->new( \$html );
while ( my $token = $p->get_token ) {
next if $token->is_tag
and
exists $HTML::Tagset::isKnown{ $token->return_tag };
print $token->return_text;
}
####
$token->is_end_tag( '/form' );
$token->is_end_tag( 'form' );
####
while ( my $token = $p->get_token ) {
# the following would skip , but not
next if $token->is_valid_tag;
print $token->return_text;
}
##
##
if ( $token->can_link ) {
# check to see if it's really linking to something
}
####
$token->is_head_element;
$token->is_table_element;
$token->is_body_element;
# etc.
####
$token->return_attr;
# becomes
$token->attr;
$token->return_text;
# becomes
$token->text;