my $parser = HTML::TokeParser::Easy->new( $some_html ); while ( my $token = $parser->get_token ) { # This prints all text in an HTML doc (i.e., it strips the HTML) next if ! $token->is_text; print $token->return_text; } #### sub get_token { my $self = shift; my $class = ref $self; my $token = $self->SUPER::get_token; return undef if ! defined $token; bless $token, $class; } # create appropriate methods... #### while ( my $token = $parser->get_token ) { # This prints all text in an HTML doc (i.e., it strips the HTML) next if ! $parser->is_text( $token ); print $parser->return_text( $token ); } #### ################## package HTML::TokeParser::Easy; ################## use strict; use HTML::TokeParser; use vars qw/ @ISA $VERSION $AUTOLOAD /; $VERSION = '1.0'; @ISA = qw/ HTML::TokeParser /; use constant START_TAG => 'S'; use constant END_TAG => 'E'; use constant TEXT => 'T'; use constant COMMENT => 'C'; use constant DECLARATION => 'D'; my %token_spec = ( S => { _name => 'START_TAG', tag => 1, attr => 2, attrseq => 3, text => 4 }, E => { _name => 'END_TAG', tag => 1, text => 2 }, T => { _name => 'TEXT', text => 1 }, C => { _name => 'COMMENT', text => 1 }, D => { _name => 'DECLARATION', text => 1 } ); sub AUTOLOAD { no strict 'refs'; my ($self, $token) = @_; # was it an is_... method? if ( $AUTOLOAD =~ /.*::is_(\w+)/ ) { my $token_type = uc $1; my $tag = &$token_type; *{ $AUTOLOAD } = sub { return $_[ 1 ]->[ 0 ] eq $tag ? 1 : 0 }; return &$AUTOLOAD; } elsif ( $AUTOLOAD =~ /.*::return_(\w+)/ ) { # was it a return_... method? my $token_attr = $1; *{ $AUTOLOAD } = sub { my $attr = $_[ 1 ]->[ 0 ]; if ( exists $token_spec{ $attr }{ $token_attr } ) { return $_[ 1 ]->[ $token_spec{ $attr }{ $token_attr } ]; } else { warn "No such attribute: '$token_attr' for $token_spec{ $attr }{ _name }"; } }; return &$AUTOLOAD; } else { # Yo! You can't do that! die "No such method: $AUTOLOAD"; } }