Code with suggested modifications seems does not work I expected. Another suggestion or advice?
update: fixed code to be correctly working with unicode keys
#!/usr/bin/perl_parallel -w
# For Emacs: -*- mode:cperl; mode:folding; coding:utf-8; -*-
use strict;
use utf8;
use DB_File;
use MLDBM qw (DB_File Storable); # )
use Fcntl;
my $dbfile = 'database.utf';
my %data = ();
# remove old file to be sure we have only new data
unlink $dbfile if -f $dbfile;
tie ( %data, 'MLDBM', $dbfile, O_CREAT | O_RDWR, 0666, $DB_BTREE ) ||
+die $!;
# try to handle unicode by transforming to octets and back
(tied %data)->filter_store_key( sub { $_ = encode('utf8', $_); } );
(tied %data)->filter_fetch_key( sub { $_ = decode('utf8', $_); } );
open DATA, '<:utf8', 'input.utf8' or die $!;
while (<DATA>) {
chomp;
my ($key, $value) = split(':', $_, 2);
$data{$key} = $value;
}
close DATA;
# now we can obtain keys as unicode strings and no only octets
print join(', ', keys %data), "\n";
untie %data;
exit 0;