#!/usr/bin/perl use strict; use warnings; use HTML::TokeParser; my $p = HTML::TokeParser->new( *DATA ) or die; my @html_blocks = ( '' ); while ( my $token = $p->get_token ) { my @t = @{ $token }; my $type = $t[0]; my $type_tag = "$t[0] $t[1]"; my $text_pos = ($type eq 'S' ) ? 4 : ($type eq 'E' ) ? 2 : ($type eq 'T' ) ? 1 : ($type eq 'C' ) ? 1 : ($type eq 'D' ) ? 1 : ($type eq 'PI') ? 2 : die "Can't happen" ; my $text = $t[$text_pos]; push @html_blocks, '' if $html_blocks[-1] and $type_tag eq 'S p'; $html_blocks[-1] .= $text; push @html_blocks, '' if $html_blocks[-1] and $type_tag eq 'E p'; } pop @html_blocks while $html_blocks[-1] eq ''; use Data::Dumper; $Data::Dumper::Useqq = 1; print Dumper \@html_blocks; __END__ HTML::TokeParser


NAME

HTML::TokeParser - Alternative HTML::Parser interface


SYNOPSIS

 use HTML::TokeParser;
 --snip--


DESCRIPTION

The HTML::TokeParser is an --snip--