use Encode; # ... read the html file into $html, and then: from_to( $html, "cp1252", "utf8" ); # now $html contains utf8 data instead of cp1252 data