# first, let's emulate what is showing up in the html data: perl -e 'binmode STDOUT,":utf8"; print "\x92"' | od -txC 0000000 c2 92 0000002 # now let's see how perl handles that as input: perl -e 'binmode STDOUT,":utf8"; print "\x92"' | perl -le 'binmode STDIN,":utf8"; $_=; print; binmode STDOUT,":utf8"; print' | od -txC 0000000 92 0a c2 92 0a 0000005 # perl's internal representation for "unicode" U+0080-U+00FF # is really single bytes, and output to a non-utf8 file handle # will be single bytes; but the utf8 flag is set, and output # to a utf8 file handle will create "wide characters". # Now, to do what really needs to be done in your case: perl -e 'binmode STDOUT,":utf8"; print "\x92"' | perl -le 'use Encode; binmode STDIN,":utf8"; binmode STDOUT,":utf8"; $_=; print; $_=encode("iso-8859-1",$_); $_=decode("cp1252",$_); print' | od -txC 0000000 c2 92 0a e2 80 99 0a 0000007 # the three byte sequence "e2 80 99" is utf8 for U+2019, # "right single quotation mark": perl -e 'binmode STDOUT,":utf8"; print "\x{2019}"' | od -txC 0000000 e2 80 99 0000003 #### tr/\x91-\x94\x96-\x98/''""--~/;