.\File\AnyEncoding.pm .\t\AnyEncoding.t #### #!/usr/bin/perl =pod SYNOPSIS use File::AnyEncoding; # unicode file writer - reader my $fun1 = new File::AnyEncoding('utf-16le'); my $text1 = "Hello world"; my $filepath1 = "AnyEncoding-test1.txt"; $fun1->write_file($filepath1, $text1); # writes file with specified encoding my $fun2 = new File::AnyEncoding(); my $text2 = $fun2->read_file($filepath1); # remembers the encoding found in $filepath1 $text2 =~s/world/unicode/; # modify file contents my $filepath2 = "AnyEncoding-test2.txt"; $fun2->write_file($filepath2, $text2); # writes file with encoding found in $filepath1 AUTHOR Rudif c/o Perlmonks =cut package File::AnyEncoding; use strict; use File::BOM qw( :all ); our %supported_encoding = map { $_ => 1 } ( 'NONE', 'UTF-8', 'UTF-16LE' ); sub new { my $class = shift; my $enc = shift // 'utf8'; my $self = {}; bless $self, $class; $self->set_encoding($enc); return $self; } sub set_encoding { my $self = shift; my $enc = shift; unless ( defined $enc && defined $supported_encoding{ $enc } ) { $enc = 'NONE'; #warn "defaulting to $enc"; } $self->{encoding} = $enc; } sub get_encoding { my $self = shift; $self->{encoding}; } sub write_file { my $self = shift; my $filepath = shift; my $text = join '', @_; my $enc = $self->{encoding}; my $FH; if ( $enc eq 'NONE' ) { open $FH, ">", $filepath; #open $FH, ">:raw:encoding(UTF-8):crlf:utf8", $filepath; } else { open $FH, ">:raw:encoding($enc):crlf:utf8", $filepath; print $FH "\x{FEFF}"; } print $FH $text; close $FH; } sub read_file { my $self = shift; my $filepath = shift; open my $FH, '<:bytes', "$filepath"; my ( $enc, $spillage ) = get_encoding_from_filehandle($FH); $enc = $self->set_encoding($enc); if ( $enc eq 'NONE' ) { #binmode $FH, ":encoding(UTF-8)"; close $FH; open $FH, '<', "$filepath"; } else { binmode $FH, ":encoding($enc)"; } my @lines = <$FH>; close $FH; wantarray ? @lines : join '', @lines; } 1; #### #!/usr/bin/perl use strict; $|++; use lib('..'); use Data::HexDump; use File::Path; my $data = '.\data'; rmtree $data; # remove old data if any mkdir $data; use Test::More tests => 28; use File::AnyEncoding; # under test use_ok('File::AnyEncoding'); # test utf-16le encoding { # create object and write test file with specified encoding my $encoding = 'UTF-16LE'; my $fan1 = File::AnyEncoding->new($encoding); isa_ok( $fan1, 'File::AnyEncoding', '$fan1' ); is( $fan1->get_encoding(), 'UTF-16LE', "specified encoding" ); my $text1 = "Hello world \x{263A}\n\n"; my $file1 = "$data\\$encoding.txt"; $fan1->write_file( $file1, $text1 ); my $expected1 = <set_encoding('NONE'); is( $fan1->get_encoding(), 'NONE', "default encoding" ); # read test file and remember encoding my $text2 = $fan1->read_file($file1); is( $fan1->get_encoding(), 'UTF-16LE', "detected encoding" ); my $expected2 = 'Hello world \x{263a}\x{d}\x{a}\x{d}\x{a}'; is( my_reasciify($text2), $expected2, "read_file $file1" ); my @lines2 = $fan1->read_file($file1); is( scalar @lines2, 2, "read_file $file1" ); my $join2 = join( '', @lines2 ); is( my_reasciify($join2), $expected2, "read_file $file1" ); # modify text and write second file using the remebered encoding $text2 =~ s/world/WORLD/; $text2 =~ s/\x{263A}/\x{20AC}/; ( my $file2 = $file1 ) =~ s/.txt/-2.txt/; $fan1->write_file( $file2, $text2 ); # writes file with remembered encoding my $expected3 = <new($encoding); isa_ok( $fan1, 'File::AnyEncoding', '$fan1' ); is( $fan1->get_encoding(), 'UTF-8', "specified encoding" ); my $text1 = "Hello world \x{263A}\n\n"; my $file1 = "$data\\$encoding.txt"; $fan1->write_file( $file1, $text1 ); my $expected1 = <set_encoding('NONE'); is( $fan1->get_encoding(), 'NONE', "default encoding" ); # read test file and remember encoding my $text2 = $fan1->read_file($file1); is( $fan1->get_encoding(), 'UTF-8', "detected encoding" ); my $expected2 = 'Hello world \x{263a}\x{a}\x{a}'; is( my_reasciify($text2), $expected2, "read_file $file1" ); my @lines2 = $fan1->read_file($file1); is( scalar @lines2, 2, "read_file $file1" ); my $join2 = join( '', @lines2 ); is( my_reasciify($join2), $expected2, "read_file $file1" ); # modify text and write second file using the remebered encoding $text2 =~ s/world/WORLD/; $text2 =~ s/\x{263A}/\x{20AC}/; ( my $file2 = $file1 ) =~ s/.txt/-2.txt/; $fan1->write_file( $file2, $text2 ); # writes file with remembered encoding my $expected3 = <new($encoding); isa_ok( $fan1, 'File::AnyEncoding', '$fan1' ); is( $fan1->get_encoding(), 'NONE', "specified encoding" ); my $text1 = "Hello world \x{263A}\n\n"; my $file1 = "$data\\$encoding.txt"; $fan1->write_file( $file1, $text1 ); my $expected1 = <set_encoding('UTF-8'); is( $fan1->get_encoding(), 'UTF-8', "default encoding" ); # read test file and remember encoding my $text2 = $fan1->read_file($file1); is( $fan1->get_encoding(), 'NONE', "detected encoding" ); my $expected2 = 'Hello world \x{e2}\x{98}\x{ba}\x{a}\x{a}'; is( my_reasciify($text2), $expected2, "read_file $file1" ); my @lines2 = $fan1->read_file($file1); is( scalar @lines2, 2, "read_file $file1" ); my $join2 = join( '', @lines2 ); is( my_reasciify($join2), $expected2, "read_file $file1" ); # modify text and write second file using the remebered encoding $text2 =~ s/world/WORLD/; $text2 =~ s/\x{263A}/\x{20AC}/; ( my $file2 = $file1 ) =~ s/.txt/-2.txt/; $fan1->write_file( $file2, $text2 ); # writes file with remembered encoding my $expected3 = <file($file); my $str = ''; while ( local $_ = $f->dump ) { $str .= $_; } $str =~ s/.*00000000/00000000/s; return $str; } # returns sprintf of characters in $string, # replacing those not printable as ascii by their hex code point numeric value # similar to sub in File::BOM sub my_reasciify { my $string = shift; $string = join "", map { my $ord = ord($_); # ($ord > 127 || ($ord < 32 && $ord != 10)) ( $ord > 127 || $ord < 32 ) ? sprintf '\x{%x}', $ord : $_ } split //, $string; } __END__