# Create a new, empty, scalar my $text = ""; # Define what the parser does my $p = HTML::Parser->new( text_h => [ sub {$text .= shift}, 'dtext' ] ); # .. and parse! $p->parse($full_text); #### # now hack off the first lump of words @list_of_words = split /[ \t\r\f]+/, $text, $n;