use HTML::TreeBuilder;
$infile = "test.html";
$outfile = "test-treebuilder-output.txt";
open($fh, "<:utf8", $infile);
open(OUT, ">$outfile");
binmode OUT, ':cp1252';
$tree = HTML::TreeBuilder->new();
$tree->parse_file($fh);
$h1Element = $tree->look_down("_tag", "h1");
$h1TrimmedText = $h1Element->as_trimmed_text();
print OUT "$h1TrimmedText\n";
$tree->delete();
close(OUT);
close($fh);
####
test
Décembre
####
$infile = "test.html";
$outfile = "test-print-output.txt";
open($fh, "<:utf8", $infile);
open(OUT, ">$outfile");
binmode OUT, ':cp1252';
@infile = <$fh>;
foreach $line (@infile) {
print OUT $line;
}
close(OUT);
close($fh);