my $utf8_decodable_regex = qr/[\xC0-\xDF][\x80-\xBF] | # 2 bytes unicode char [\xE0-\xEF][\x80-\xBF]{2} | # 3 bytes unicode char [\xF0-\xFF][\x80-\xBF]{3}/x; $testStr = decode('utf-8',$testStr); $testStr =~ s/($utf8_decodable_regex)/decode('utf-8',$1)/gex; $testStr = encode('utf-8',$testStr);