use utf8; use feature ':5.10'; use Encode qw/:fallback_all/; use PerlIO::encoding; BEGIN{ $PerlIO::encoding::fallback = FB_WARN; } use open IO => ':encoding(UTF-8)', ':std'; # show current PerIO layers of stdout on stderr my @layers = PerlIO::get_layers(STDOUT, details => 1); for (my $i=0; $i<@layers; $i+=3) { printf STDERR "stdout layer %d: (%s,%s,0x%X)\n", $i/3, $layers[$i], $layers[$i+1]||'',$layers[$i+2]; } print STDERR "\n"; # show [a] that Encode works, and [b] the bytes around the bad area. sub examine($) { my $s = shift; my $encoded = Encode::encode( "UTF-8", $s, FB_CROAK ); warn "# utf-8 encoded octet dump (total=", length($encoded), "):\n"; my @x = unpack("C*", $encoded); for ( my $i=1010; $i<1028; ++$i ) { printf STDERR " enc[%d]=0x%02X", $i, $x[$i]; } print STDERR "\n"; } # create sample test string my $test = ''; while ( length($test) <= 520 ) { # typically 2-byte codes $test .= ' предоставлена'; } print "encoded: " . (utf8::is_utf8($test) ? 'yes' : 'no') . "\n"; my $len = length $test; print "length: $len\n"; # this one works examine($test); print "$test\n\n"; # add a 1-byte UTF-8 code point (x) in front, and it fails # in the presence of "use PerlIO::encoding" or "use open IO => .." $test = 'x' . $test; print "encoded: " . (utf8::is_utf8($test) ? 'yes' : 'no') . "\n"; $len = length $test; print "length: $len\n"; examine($test); print "$test\n\n";