"\x{E4}" =~ /\w/ #### # | the UNIX pipe transports bytes, not # | codepoints. So Perl sees the byte E4 $ echo -e "\xE4"|perl -wE 'say <> ~~ /\w/' # ^^^^^^^ a text operation # sees the codepoint U+00E4