sub is_binary { my $content = shift; if(!defined($content)) { return 0; } my @char = unpack("C" x length($content),$content); foreach $a (@char) { if($a > 127) { return 1; } } return 0; } #### #!/usr/bin/perl -w use strict; use warnings; use utf8; use lib "www/siikir/cms/src"; use Siikir::Util; binmode(STDOUT, "utf8"); # Valid UTF-8 strings my @valid = @{ Siikir::Util::utf8_decode([ "hello world", "Hello!\nWorld!", "My favorite pokemon is ブラッキー", "No, エーフィ is better than ブラッキー!", "ミュウツー ミュウツー", ])}; # Create some invalid strings. my @invalid = ( scalar(`cat /usr/bin/vim`), scalar(`cat /usr/share/pixmaps/xchat.png`), scalar(map { chr(hex($_)) } qw/0xFF 0x4C 0x3D 0x10 0x27 0x78 0xED/), ); chomp(@invalid); print "Testing valid strings...\n"; foreach my $v (@valid) { my $pass = is_binary($v); print "Str: $v (pass: $pass)\n"; } print "Testing invalid strings...\n"; foreach my $i (@invalid) { my $pass = is_binary($i); print "Pass: $pass\n"; } sub is_binary { my $data = shift; # # Valid UTF-8? Fail: gives a pass to everything. # use Test::utf8; # if (is_valid_string($data)) { # return "true"; # } # return "false"; # Sane UTF-8? Fail: gives a pass to a PNG image # use Test::utf8; # if (is_sane_utf8($data)) { # return "true"; # } # return "false"; # Valid UTF-8? if (utf8::is_utf8($data)) { return "true"; } return "false"; }