use Encode qw( decode_utf8 ); s{ ^ 0x ( (?: [0-9a-fA-F]{2} ){1,4} ) \z }{ sprintf("U+%X", ord(decode_utf8(pack("H*", $1)))) }xe;