It seems the difference has to do with using c_get.
I added a little to your test script
#!/usr/bin/perl -w use strict; use warnings; use Encode; use utf8; use BerkeleyDB; use Encode; use Data::Dumper; # Encode::_utf8_off($string); unlink "xx.db"; tie my %h, "BerkeleyDB::Btree", -Filename=>"xx.db", -Flags=>DB_CREAT +E; my $db=tied %h; $Data::Dumper::Useqq=1; $db->filter_fetch_key( sub { warn ">>fetch: ".Dumper($_); $_=decode("utf8", $_); warn "<<fetch: ".Dumper($_); }); $db->filter_store_key( sub { warn ">>store: ".Dumper($_); $_=encode("utf8", $_); warn "<<store: ".Dumper($_); }); my $key = "لل"; print "setting \$h{\$key}\n"; $h{$key}=1; print "reading keys\n"; my @l=keys %h; print "reading with c_get\n"; my $cursor = $db->db_cursor(); my $value; my $status = $cursor->c_get($key, $value, DB_SET_RANGE);
This produces
>>store: $VAR1 = "\x{e4}\x{e4}"; <<store: $VAR1 = "\303\244\303\244"; reading keys >>fetch: $VAR1 = "\303\244\303\244"; <<fetch: $VAR1 = "\x{e4}\x{e4}"; reading with c_get >>store: $VAR1 = "\x{e4}\x{e4}"; <<store: $VAR1 = "\303\244\303\244"; >>fetch: $VAR1 = "\x{e4}\x{e4}"; <<fetch: $VAR1 = "\x{fffd}\x{fffd}";
In reply to Re: BerkeleyDB + UTF8
by ig
in thread BerkeleyDB + UTF8
by tfoertsch
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |