Beefy Boxes and Bandwidth Generously Provided by pair Networks
Do you know where your variables are?
 
PerlMonks  

Re^3: [5.30] What counts as a Turkic locale?

by RMGir (Prior)
on May 15, 2019 at 12:27 UTC ( [id://11100014]=note: print w/replies, xml ) Need Help??


in reply to Re^2: [5.30] What counts as a Turkic locale?
in thread [5.30] What counts as a Turkic locale?

I don't think it uses an explicit list of locale names. It looks like it's detected via toupper/tolower misbehaviour.

Clone the git repo (git://perl5.git.perl.org/perl.git) then look at the diff for the commit in question:

~/git/perl$ git diff 30d8090de81085bd3dff00c83a7ab6d3ff8dfc8d^! diff --git a/locale.c b/locale.c index 383b2137c0..07e5525c10 100644 --- a/locale.c +++ b/locale.c @@ -1507,6 +1507,7 @@ S_new_ctype(pTHX_ const char *newctype)        /* Don't check for problems if we are suppressing the warnings */      bool check_for_problems = ckWARN_d(WARN_LOCALE) || UNLIKELY(DEBUG +_L_TEST); +    bool maybe_utf8_turkic = FALSE;        PERL_ARGS_ASSERT_NEW_CTYPE;   @@ -1523,6 +1524,14 @@ S_new_ctype(pTHX_ const char *newctype)       * handle this specially because of the three problematic code po +ints */      if (PL_in_utf8_CTYPE_locale) {          Copy(PL_fold_latin1, PL_fold_locale, 256, U8); + +        /* UTF-8 locales can have special handling for 'I' and 'i' if + they are +         * Turkic.  Make sure these two are the only anomalies.  (We +don't use +         * towupper and towlower because they aren't in C89.) */ +        if (toupper('i') == 'i' && tolower('I') == 'I') { +            check_for_problems = TRUE; +            maybe_utf8_turkic = TRUE; +        }      }        /* We don't populate the other lists if a UTF-8 locale, but do ch +eck that @@ -1668,7 +1677,18 @@ S_new_ctype(pTHX_ const char *newctype)              }          }   +        if (bad_count == 2 && maybe_utf8_turkic) { +            bad_count = 0; +            *bad_chars_list = '\0'; +            PL_fold_locale['I'] = 'I'; +            PL_fold_locale['i'] = 'i'; +            PL_in_utf8_turkic_locale = TRUE; +            DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s is turki +c\n", +                                                 __FILE__, __LINE__, +newctype));

Mike

Replies are listed 'Best First'.
Re^4: [5.30] What counts as a Turkic locale?
by daxim (Curate) on May 15, 2019 at 15:28 UTC
    That was helpful. So it's dynamic, therefore reasonably future proof. Nice job, Karl!

    The answer I wanted on my system is:

    › rpm -q --qf %{VERSION} glibc-locale 2.29 › for l in $(locale -a) ; do \ LC_CTYPE=$l perl5.30.0 -Mlocale \ -E'say $ENV{LC_CTYPE} if "i" ne lc "I"' ; \ done az_AZ crh_UA ku_TR ku_TR.utf8 tr_CY tr_CY.utf8 tr_TR tr_TR.utf8 tt_RU@iqtelif
    To save the interested reader a trip to the list of ISO 639 codes, that's Azeri, Crimean Tatar, Kurdish, Turkish, Tatar.

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: note [id://11100014]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others avoiding work at the Monastery: (2)
As of 2024-04-25 05:45 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found