- or download this
iconv -f utf8 -t utf8 <dodgy_file> || iconv -f iso-8859-1 -t utf8 <dodgy_file> > <utf8_file> - or download this
perl -MHTML::TreeBuilder::XPath -e'$t= HTML::TreeBuilder::XPath->new;
$t->parse( "<html><body><p>para ©</p></body></html>");
$p=$t->findvalue( "//p");
print $p, "\n";'
- or download this
perl -MHTML::TreeBuilder::XPath -e'$t= HTML::TreeBuilder::XPath->new;
$t->parse( "<html><body><p>para © —</p></body></html>");
@p=$t->findnodes( "//p");
print $p[0]->as_text(), "\n";'
- or download this
perl -MHTML::TreeBuilder::XPath -MHTML::Entities -e'
$t= HTML::TreeBuilder::XPath->new;
$t->parse( "<html><body><p>para © —</p></body></html>");
...
$out= $p[0]->as_text;
encode_entities( $out);
print $out, "\n";'