use Encode; ... my $utf8_version = decode("iso8859-1", $orig_version); ... #### ... # assume that offending text is in $_ use bytes; tr/[\x01-\x7f]//cd; # delete any byte with hi-bit set ...