#include #include #include #include #include #include #include #include void main(){ char* instring = "Hauptstraße 3"; // Works with uint8_t too size_t length; uint8_t *result; size_t input_length; input_length=strlen(instring); //u8_strlen works too result = u8_toupper (instring, input_length, NULL, UNINORM_NFC, NULL, &length); #### SV* uppercase_utf8_2(SV* sv) PREINIT: size_t len; char* s; char* upperstring; //uint8_t also SV* aresult; size_t upperlength; CODE: s = SvPVbyte(sv, len); //strings from my editor (locale?) are already utf8 encoded upperstring = u8_toupper (s, len, NULL, NULL,NULL, &upperlength); upperlength = u8_strlen(upperstring); aresult = newSVpv(upperstring,upperlength); free(upperstring); RETVAL = aresult; OUTPUT: RETVAL #### ==614897== Invalid read of size 1 ==614897== at 0x484F234: strlen (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so) ==614897== by 0x485B74A: XS_Simple__Ngram_uppercase_utf8_2 (Ngram.xs:79) ==614897== by 0x2441A9: ??? (in /usr/bin/perl) ==614897== by 0x2396DD: Perl_runops_standard (in /usr/bin/perl) ==614897== by 0x1781DA: perl_run (in /usr/bin/perl) ==614897== by 0x14D639: main (in /usr/bin/perl) ==614897== Address 0x5bb9282 is 0 bytes after a block of size 18 alloc'd ==614897== at 0x484DB80: realloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so) ==614897== by 0x55AEF01: libunistring_u8_casemap (in /usr/lib/x86_64-linux-gnu/libunistring.so.5.0.0) ==614897== by 0x55AF478: u8_toupper (in /usr/lib/x86_64-linux-gnu/libunistring.so.5.0.0) ==614897== by 0x485B73F: XS_Simple__Ngram_uppercase_utf8_2 (Ngram.xs:78) ==614897== by 0x2441A9: ??? (in /usr/bin/perl) ==614897== by 0x2396DD: Perl_runops_standard (in /usr/bin/perl) ==614897== by 0x1781DA: perl_run (in /usr/bin/perl) ==614897== by 0x14D639: main (in /usr/bin/perl) ==614897== ==614897== ==614897== HEAP SUMMARY: ==614897== in use at exit: 2,544,437 bytes in 9,481 blocks ==614897== total heap usage: 33,789 allocs, 24,308 frees, 6,454,177 bytes allocated ==614897== ==614897== 2 bytes in 1 blocks are possibly lost in loss record 1 of 1,342 ==614897== at 0x4846828: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so) ==614897== by 0x24EADB: Perl_sv_magicext (in /usr/bin/perl) ==614897== by 0x24ED0A: Perl_sv_magic (in /usr/bin/perl) ==614897== by 0x183F8C: Perl_gv_fetchpvn_flags (in /usr/bin/perl) ==614897== by 0x174ED2: perl_parse (in /usr/bin/perl) ==614897== by 0x14D55B: main (in /usr/bin/perl)