#!perl use strict; use warnings; use Encode 'encode', 'decode'; use charnames ':full'; use Test::More; use Data::Dumper; $Data::Dumper::Useqq = 1; my $octets = "This is the raw input string, also containing an umlaut, in UTF-8 bytes: mot\xc3\x96rhead ... and some more text"; my $expected = "This is the raw input string, also containing an umlaut, in UTF-8 bytes: mot\N{LATIN CAPITAL LETTER O WITH DIAERESIS}rhead ... and some more text"; my $string = decode('UTF-8', $octets); is $string, $expected, "The decoded strings are identical (sanity check)"; my $part = substr( $string, 73, 9 ); is $part, "mot\N{LATIN CAPITAL LETTER O WITH DIAERESIS}rhead", "We snip the correct part" or diag Dumper [$part, "mot\N{LATIN CAPITAL LETTER O WITH DIAERESIS}rhead"]; done_testing();