my $parser = HTML::TokeParser::Easy->new( $some_html );
while ( my $token = $parser->get_token ) {
# This prints all text in an HTML doc (i.e., it strips the HTML)
next if ! $token->is_text;
print $token->return_text;
}
####
sub get_token {
my $self = shift;
my $class = ref $self;
my $token = $self->SUPER::get_token;
return undef if ! defined $token;
bless $token, $class;
}
# create appropriate methods...
####
while ( my $token = $parser->get_token ) {
# This prints all text in an HTML doc (i.e., it strips the HTML)
next if ! $parser->is_text( $token );
print $parser->return_text( $token );
}
####
##################
package HTML::TokeParser::Easy;
##################
use strict;
use HTML::TokeParser;
use vars qw/ @ISA $VERSION $AUTOLOAD /;
$VERSION = '1.0';
@ISA = qw/ HTML::TokeParser /;
use constant START_TAG => 'S';
use constant END_TAG => 'E';
use constant TEXT => 'T';
use constant COMMENT => 'C';
use constant DECLARATION => 'D';
my %token_spec = (
S => {
_name => 'START_TAG',
tag => 1,
attr => 2,
attrseq => 3,
text => 4
},
E => {
_name => 'END_TAG',
tag => 1,
text => 2
},
T => {
_name => 'TEXT',
text => 1
},
C => {
_name => 'COMMENT',
text => 1
},
D => {
_name => 'DECLARATION',
text => 1
} );
sub AUTOLOAD {
no strict 'refs';
my ($self, $token) = @_;
# was it an is_... method?
if ( $AUTOLOAD =~ /.*::is_(\w+)/ ) {
my $token_type = uc $1;
my $tag = &$token_type;
*{ $AUTOLOAD } = sub { return $_[ 1 ]->[ 0 ] eq $tag ? 1 : 0 };
return &$AUTOLOAD;
} elsif ( $AUTOLOAD =~ /.*::return_(\w+)/ ) {
# was it a return_... method?
my $token_attr = $1;
*{ $AUTOLOAD } =
sub {
my $attr = $_[ 1 ]->[ 0 ];
if ( exists $token_spec{ $attr }{ $token_attr } ) {
return $_[ 1 ]->[ $token_spec{ $attr }{ $token_attr } ];
} else {
warn "No such attribute: '$token_attr' for $token_spec{ $attr }{ _name }";
}
};
return &$AUTOLOAD;
} else {
# Yo! You can't do that!
die "No such method: $AUTOLOAD";
}
}