As nearly everyone has pointed out you almost certainly want to this with a module such as HTML::Parser, the following is an example of how you would achieve this for your snippet of HTML:
#!/usr/bin/perl use strict; use warnings; my $text= "<td class=sp></td><td>Hello</td><td class=sp></td><td>Bye</ +td><td class=sp>"; use HTML::Parser; my $parser = HTML::Parser->new( start_h => [ \&start,"self,tag,attr" ] +, start_document_h => [ \&init,"self"]); + $parser->parse($text); foreach my $item ( @{$parser->{_items}} ) { print $item,"\n"; } sub init { my ( $self ) = @_; $self->{_items} = []; } sub start { my ( $self, $tag, $attribs) = @_; if ( $tag eq 'td' && !exists $attribs->{class} ) { $self->handler(text => \&get_text,"self,dtext" ); $self->handler(end => \&end,"self,tag"); } } sub get_text { my ( $self, $text) = @_; $self->{_text} .= $text; } sub end { my ( $self, $tag ) = @_; if ( $tag eq '/td' ) { $self->handler(text => '' ); $self->handler(end => ''); push @{$self->{_items}}, $self->{_text}; $self->{_text} = ''; } }
/J\
In reply to Re: problem with parsing
by gellyfish
in thread problem with parsing
by Anonymous Monk
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |