#!/usr/bin/perl use strict; use warnings; use Carp; use HTML::TreeBuilder; use HTML::Scrubber; use Perl6::Slurp; my $tidy = HTML::TreeBuilder->new(); my $scrubber = HTML::Scrubber->new( allow => [ qw[ p em stong a img ] ], rules => [ img => { src => 1, alt => 1, title => 1, width => 1, height => 1, }, a => { href=>1, title=>1, }, ], ); $scrubber->{_p}->empty_element_tags(1); my $html = slurp $ARGV[0]; $tidy->no_expand_entities(1); $tidy->p_strict(1); print $scrubber->scrub($tidy->parse_content($html)->as_XML);