use HTML::Parser; use Text::Wrap; sub html2text { my $html = shift; my %inside; my $text = ''; my $tag = sub { $inside{$_[0]} += $_[1]; $text .= " " }; my $txt = sub { $text .= $_[0] unless $inside{script} or $inside{style} }; HTML::Parser->new( api_version => 3, handlers => [ start => [$tag, "tagname, '+1'"], end => [$tag, "tagname, '-1'"], text => [$txt, "dtext"] ], marked_sections => 1, )->parse($html); #$text =~ tr/\11\12\40-\176//cd; # remove wide non ascii chars $text = Text::Wrap::fill('', '', $text); $text =~ s/^\s+//; return $text; }