in reply to HTML::TreeBuilder, UTF-8 input, windows-1252 output
Cleaned up:
use strict; use warnings; use feature qw( say ); use HTML::TreeBuilder qw( ); use Object::Destroyer qw( ); open(my $fh_in, "<:encoding(UTF-8)", $ARGV[0]) or die $!; open(my $fh_out, ">:encoding(cp1252)", $ARGV[1]) or die $!; my $tree = Object::Destroyer->new(HTML::TreeBuilder->new(), 'delete'); $tree->parse_file($fh_in); my $h1Element = $tree->look_down("_tag", "h1"); my $h1TrimmedText = $h1Element->as_trimmed_text(); say($fh_out $h1TrimmedText);
|
|---|