use strict; use warnings; use Test::More; use Unicode::Escape; my @test_texts = ( { text => "\x{e3}\x{81}\x{82}" . "\x{e3}\x{81}\x{84}" . "\x{e3}\x{81}\x{86}" . "\x{e3}\x{81}\x{88}" . "\x{e3}\x{81}\x{8a}", name => 'utf8 test from Unicode::Escape-0.0.2', }, { text => q{}, name => 'empty string', }, { text => '0', name => 'zero (false-looking)', }, { text => "\x{c2}\x{a2}" . "\x{c2}\x{a3}" . "\x{c2}\x{a4}", name => 'some two-byte utf8', }, # Unicode::Escape escapes this as '\udbea\udfcd' What's that? # { # text => "\x{f4}\x{8a}\x{af}\x{8d}", # name => 'four-byte utf8', # }, { text => "one: X, two: \x{c2}\x{a5}, three: \x{e3}\x{81}\x{8a}", name => 'mixed character length utf8', }, ); plan 'tests' => scalar @test_texts; foreach my $t ( @test_texts ) { die 'bad test data' if grep { ! defined $t->{$_} } qw( text name ); my $text = $t->{text}; my $canonical = Unicode::Escape::escape( $text ); my $test = utf8_escape( $text ); # I use 'ok' with 'eq' instead of 'is' so that a failure doesn't # puke a lot of unintelligible yuck. ok( $canonical eq $test, "correct escaping for '$t->{name}'" ); }