in reply to BerkeleyDB + UTF8
It seems that c_get saves the retreived key into the passed SV without modifying its UTF8 flag then calls the fetch filter passing this SV. Thus, the UTF8 flag of $_ on entry to the fetch filter is whatever it was on $key in the call to c_get.
You can see the effect in this modification of your test script. Note the difference when the UTF8 flag is turned off on $key before calling c_get in the last test.
#!/usr/bin/perl -w use strict; use warnings; use Encode; use utf8; use BerkeleyDB; use Encode; use Data::Dumper; use Devel::Peek; # Encode::_utf8_off($string); unlink "xx.db"; tie my %h, "BerkeleyDB::Btree", -Filename=>"xx.db", -Flags=>DB_CREAT +E; my $db=tied %h; $Data::Dumper::Useqq=1; $db->filter_fetch_key( sub { warn ">>fetch: ".Dumper($_); Dump($_); $_=decode("utf8", $_); warn "<<fetch: ".Dumper($_); Dump($_); }); $db->filter_store_key( sub { warn ">>store: ".Dumper($_); Dump($_); $_=encode("utf8", $_); warn "<<store: ".Dumper($_); Dump($_); }); print "\n\nsetting \$h{\$key}\n"; my $key = "لل"; Dump($key); $h{$key}=1; print "\n\nreading keys with keys \%h\n"; my @l=keys %h; print "\n\nreading with c_get\n"; my $cursor = $db->db_cursor(); my $value; $key = "لل"; print "key before c_get call\n"; Dump($key); print "calling c_get\n"; my $status = $cursor->c_get($key, $value, DB_SET_RANGE); print "\n\ntry again, turning UTF8 flag off key\n"; $key = "لل"; Encode::_utf8_off($key); print "key before c_get call\n"; Dump($key); print "calling c_get\n"; $status = $cursor->c_get($key, $value, DB_SET_RANGE);
Which produces
setting $h{$key} SV = PV(0x9b08c74) at 0x9d35d48 REFCNT = 1 FLAGS = (PADBUSY,PADMY,POK,pPOK,UTF8) PV = 0x9d337a8 "\303\244\303\244"\0 [UTF8 "\x{e4}\x{e4}"] CUR = 4 LEN = 8 >>store: $VAR1 = "\x{e4}\x{e4}"; SV = PVMG(0x9b823a0) at 0x9d35f28 REFCNT = 1 FLAGS = (SMG,POK,pPOK,UTF8) IV = 0 NV = 0 PV = 0x9b58b38 "\303\244\303\244"\0 [UTF8 "\x{e4}\x{e4}"] CUR = 4 LEN = 8 MAGIC = 0x9d41510 MG_VIRTUAL = &PL_vtbl_mglob MG_TYPE = PERL_MAGIC_regex_global(g) MG_LEN = -1 MAGIC = 0x9d95400 MG_VIRTUAL = &PL_vtbl_utf8 MG_TYPE = PERL_MAGIC_utf8(w) MG_LEN = 2 <<store: $VAR1 = "\303\244\303\244"; SV = PVMG(0x9b823a0) at 0x9d35f28 REFCNT = 1 FLAGS = (SMG,POK,pPOK) IV = 0 NV = 0 PV = 0x9d4c998 "\303\244\303\244"\0 CUR = 4 LEN = 8 MAGIC = 0x9d41510 MG_VIRTUAL = &PL_vtbl_mglob MG_TYPE = PERL_MAGIC_regex_global(g) MG_LEN = -1 MAGIC = 0x9d95400 MG_VIRTUAL = &PL_vtbl_utf8 MG_TYPE = PERL_MAGIC_utf8(w) MG_LEN = -1 reading keys with keys %h >>fetch: $VAR1 = "\303\244\303\244"; SV = PVMG(0x9b823a0) at 0x9b25d40 REFCNT = 1 FLAGS = (SMG,POK,pPOK) IV = 0 NV = 0 PV = 0x9d48548 "\303\244\303\244"\0 CUR = 4 LEN = 8 MAGIC = 0x9d907c0 MG_VIRTUAL = &PL_vtbl_mglob MG_TYPE = PERL_MAGIC_regex_global(g) MG_LEN = -1 <<fetch: $VAR1 = "\x{e4}\x{e4}"; SV = PVMG(0x9b823a0) at 0x9b25d40 REFCNT = 1 FLAGS = (SMG,POK,pPOK,UTF8) IV = 0 NV = 0 PV = 0x9d48ea8 "\303\244\303\244"\0 [UTF8 "\x{e4}\x{e4}"] CUR = 4 LEN = 8 MAGIC = 0x9d95700 MG_VIRTUAL = &PL_vtbl_utf8 MG_TYPE = PERL_MAGIC_utf8(w) MG_LEN = 2 MAGIC = 0x9d907c0 MG_VIRTUAL = &PL_vtbl_mglob MG_TYPE = PERL_MAGIC_regex_global(g) MG_LEN = -1 reading with c_get key before c_get call SV = PVMG(0x9b82340) at 0x9d35d48 REFCNT = 1 FLAGS = (PADBUSY,PADMY,SMG,POK,pPOK,UTF8) IV = 0 NV = 0 PV = 0x9d337a8 "\303\244\303\244"\0 [UTF8 "\x{e4}\x{e4}"] CUR = 4 LEN = 8 MAGIC = 0x9d24fd0 MG_VIRTUAL = &PL_vtbl_utf8 MG_TYPE = PERL_MAGIC_utf8(w) MG_LEN = 2 calling c_get >>store: $VAR1 = "\x{e4}\x{e4}"; SV = PVMG(0x9d69908) at 0x9b25944 REFCNT = 1 FLAGS = (SMG,POK,pPOK,UTF8) IV = 0 NV = 0 PV = 0x9d48890 "\303\244\303\244"\0 [UTF8 "\x{e4}\x{e4}"] CUR = 4 LEN = 8 MAGIC = 0x9d69748 MG_VIRTUAL = &PL_vtbl_mglob MG_TYPE = PERL_MAGIC_regex_global(g) MG_LEN = -1 MAGIC = 0x9d69708 MG_VIRTUAL = &PL_vtbl_utf8 MG_TYPE = PERL_MAGIC_utf8(w) MG_LEN = 2 <<store: $VAR1 = "\303\244\303\244"; SV = PVMG(0x9d69908) at 0x9b25944 REFCNT = 1 FLAGS = (SMG,POK,pPOK) IV = 0 NV = 0 PV = 0x9d4b610 "\303\244\303\244"\0 CUR = 4 LEN = 8 MAGIC = 0x9d69748 MG_VIRTUAL = &PL_vtbl_mglob MG_TYPE = PERL_MAGIC_regex_global(g) MG_LEN = -1 MAGIC = 0x9d69708 MG_VIRTUAL = &PL_vtbl_utf8 MG_TYPE = PERL_MAGIC_utf8(w) MG_LEN = -1 >>fetch: $VAR1 = "\x{e4}\x{e4}"; SV = PVMG(0x9b82340) at 0x9d35d48 REFCNT = 1 FLAGS = (PADBUSY,PADMY,SMG,POK,pPOK,UTF8) IV = 0 NV = 0 PV = 0x9d337a8 "\303\244\303\244"\0 [UTF8 "\x{e4}\x{e4}"] CUR = 4 LEN = 8 MAGIC = 0x9d67040 MG_VIRTUAL = &PL_vtbl_mglob MG_TYPE = PERL_MAGIC_regex_global(g) MG_LEN = -1 MAGIC = 0x9d24fd0 MG_VIRTUAL = &PL_vtbl_utf8 MG_TYPE = PERL_MAGIC_utf8(w) MG_LEN = 2 <<fetch: $VAR1 = "\x{fffd}\x{fffd}"; SV = PVMG(0x9b82340) at 0x9d35d48 REFCNT = 1 FLAGS = (PADBUSY,PADMY,SMG,POK,pPOK,UTF8) IV = 0 NV = 0 PV = 0x9d95ce8 "\357\277\275\357\277\275"\0 [UTF8 "\x{fffd}\x{fffd}" +] CUR = 6 LEN = 8 MAGIC = 0x9d67040 MG_VIRTUAL = &PL_vtbl_mglob MG_TYPE = PERL_MAGIC_regex_global(g) MG_LEN = -1 MAGIC = 0x9d24fd0 MG_VIRTUAL = &PL_vtbl_utf8 MG_TYPE = PERL_MAGIC_utf8(w) MG_LEN = 2 try again, turning UTF8 flag off key key before c_get call SV = PVMG(0x9b82340) at 0x9d35d48 REFCNT = 1 FLAGS = (PADBUSY,PADMY,SMG,POK,pPOK) IV = 0 NV = 0 PV = 0x9d95ce8 "\303\244\303\244"\0 CUR = 4 LEN = 8 MAGIC = 0x9d67040 MG_VIRTUAL = &PL_vtbl_mglob MG_TYPE = PERL_MAGIC_regex_global(g) MG_LEN = -1 MAGIC = 0x9d24fd0 MG_VIRTUAL = &PL_vtbl_utf8 MG_TYPE = PERL_MAGIC_utf8(w) MG_LEN = -1 calling c_get >>store: $VAR1 = "\303\244\303\244"; SV = PVMG(0x9d69908) at 0x9b25944 REFCNT = 1 FLAGS = (SMG,POK,pPOK) IV = 0 NV = 0 PV = 0x9d4b610 "\303\244\303\244"\0 CUR = 4 LEN = 8 MAGIC = 0x9d69708 MG_VIRTUAL = &PL_vtbl_mglob MG_TYPE = PERL_MAGIC_regex_global(g) MG_LEN = -1 <<store: $VAR1 = "\303\203\302\244\303\203\302\244"; SV = PVMG(0x9d69908) at 0x9b25944 REFCNT = 1 FLAGS = (SMG,POK,pPOK) IV = 0 NV = 0 PV = 0x9b1fc40 "\303\203\302\244\303\203\302\244"\0 CUR = 8 LEN = 12 MAGIC = 0x9d69708 MG_VIRTUAL = &PL_vtbl_mglob MG_TYPE = PERL_MAGIC_regex_global(g) MG_LEN = -1 >>fetch: $VAR1 = "\303\244\303\244"; SV = PVMG(0x9b82340) at 0x9d35d48 REFCNT = 1 FLAGS = (PADBUSY,PADMY,SMG,POK,pPOK) IV = 0 NV = 0 PV = 0x9d95ce8 "\303\244\303\244"\0 CUR = 4 LEN = 8 MAGIC = 0x9d67040 MG_VIRTUAL = &PL_vtbl_mglob MG_TYPE = PERL_MAGIC_regex_global(g) MG_LEN = -1 MAGIC = 0x9d24fd0 MG_VIRTUAL = &PL_vtbl_utf8 MG_TYPE = PERL_MAGIC_utf8(w) MG_LEN = -1 <<fetch: $VAR1 = "\x{e4}\x{e4}"; SV = PVMG(0x9b82340) at 0x9d35d48 REFCNT = 1 FLAGS = (PADBUSY,PADMY,SMG,POK,pPOK,UTF8) IV = 0 NV = 0 PV = 0x9d66fc8 "\303\244\303\244"\0 [UTF8 "\x{e4}\x{e4}"] CUR = 4 LEN = 8 MAGIC = 0x9d67040 MG_VIRTUAL = &PL_vtbl_mglob MG_TYPE = PERL_MAGIC_regex_global(g) MG_LEN = -1 MAGIC = 0x9d24fd0 MG_VIRTUAL = &PL_vtbl_utf8 MG_TYPE = PERL_MAGIC_utf8(w) MG_LEN = 2
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: BerkeleyDB + UTF8
by tfoertsch (Beadle) on Mar 06, 2009 at 12:40 UTC |