use HTML::TokeParser::Simple; my $p = HTML::TokeParser::Simple->new( $somefile ); while ( my $token = $p->get_token ) { next if ! $token->is_text; print $token->return_text; } #### use HTML::TokeParser::Simple; use HTML::Tagset; my $p = HTML::TokeParser::Simple->new( \$html ); while ( my $token = $p->get_token ) { next if $token->is_tag and exists $HTML::Tagset::isKnown{ $token->return_tag }; print $token->return_text; } #### $token->is_end_tag( '/form' ); $token->is_end_tag( 'form' ); #### while ( my $token = $p->get_token ) { # the following would skip

, but not next if $token->is_valid_tag; print $token->return_text; } #### if ( $token->can_link ) { # check to see if it's really linking to something } #### $token->is_head_element; $token->is_table_element; $token->is_body_element; # etc. #### $token->return_attr; # becomes $token->attr; $token->return_text; # becomes $token->text;