in reply to Re^7: Any good ways to handle NARROW NO-BREAK SPACE characters in regex in newer versions of Perl?
in thread Any good ways to handle NARROW NO-BREAK SPACE characters in regex in newer versions of Perl?

Definitely a bug (I think):

#! /usr/bin/env perl use v5.36; use utf8; # get all the files in the current directory my @files = glob("*"); my ($file) = grep { /Screenshot-2024-02-23-at-1.05.14\s/ } @files; my $ss = $files[0]; my $hex = unpack("H*", $ss); say $hex; say $file; # ERROR! my $blah = "Screenshot-2024-02-23-at-1.05.14 AM.png"; my $hex2 = unpack("H*", $blah); say $hex2; say $hex eq $hex2 ? "hexes equal" : "hexes not equal"; say $blah =~ /Screenshot-2024-02-23-at-1.05.14\s/; # WORKS!

OUTPUT:

53637265656e73686f742d323032342d30322d32332d61742d312e30352e3134e280af +414d2e706e67 Use of uninitialized value $file in say at ./test.pl line 14. Character in 'H' format wrapped in unpack at ./test.pl line 17. 53637265656e73686f742d323032342d30322d32332d61742d312e30352e31342f414d +2e706e67 hexes not equal 1

$PM = "Perl Monk's";
$MC = "Most Clueless Friar Abbot Bishop Pontiff Deacon Curate Priest Vicar Parson";
$nysus = $PM . ' ' . $MC;
Click here if you love Perl Monks

  • Comment on Re^8: Any good ways to handle NARROW NO-BREAK SPACE characters in regex in newer versions of Perl?
  • Select or Download Code

Replies are listed 'Best First'.
Re^9: Any good ways to handle NARROW NO-BREAK SPACE characters in regex in newer versions of Perl?
by nysus (Parson) on Aug 13, 2024 at 17:21 UTC

    And here's a hex dump of the perl script just to confirm variable $blah has the utf8 character in it:

    > $ hexd test.pl + + [±main ●▴] 00 01 02 03 04 05 06 07 - 08 09 0A 0B 0C 0D 0E 0F 012345678 +9ABCDEF 00000000 23 21 20 2F 75 73 72 2F - 62 69 6E 2F 65 6E 76 20 #! /usr/b +in/env 00000010 70 65 72 6C 0A 0A 75 73 - 65 20 76 35 2E 33 36 3B perl..use + v5.36; 00000020 0A 75 73 65 20 75 74 66 - 38 3B 0A 0A 23 20 67 65 .use utf8 +;..# ge 00000030 74 20 61 6C 6C 20 74 68 - 65 20 66 69 6C 65 73 20 t all the + files 00000040 69 6E 20 74 68 65 20 63 - 75 72 72 65 6E 74 20 64 in the cu +rrent d 00000050 69 72 65 63 74 6F 72 79 - 0A 6D 79 20 40 66 69 6C irectory. +my @fil 00000060 65 73 20 3D 20 67 6C 6F - 62 28 22 2A 22 29 3B 0A es = glob +("*");. 00000070 6D 79 20 28 24 66 69 6C - 65 29 20 3D 20 67 72 65 my ($file +) = gre 00000080 70 20 7B 20 2F 53 63 72 - 65 65 6E 73 68 6F 74 2D p { /Scre +enshot- 00000090 32 30 32 34 2D 30 32 2D - 32 33 2D 61 74 2D 31 2E 2024-02-2 +3-at-1. 000000A0 30 35 2E 31 34 5C 73 2F - 20 7D 20 40 66 69 6C 65 05.14\s/ +} @file 000000B0 73 3B 0A 0A 6D 79 20 24 - 73 73 20 3D 20 24 66 69 s;..my $s +s = $fi 000000C0 6C 65 73 5B 30 5D 3B 0A - 6D 79 20 24 68 65 78 20 les[0];.m +y $hex 000000D0 3D 20 75 6E 70 61 63 6B - 28 22 48 2A 22 2C 20 24 = unpack( +"H*", $ 000000E0 73 73 29 3B 0A 73 61 79 - 20 24 68 65 78 3B 0A 0A ss);.say +$hex;.. 000000F0 73 61 79 20 24 66 69 6C - 65 3B 20 23 20 45 52 52 say $file +; # ERR 00000100 4F 52 21 0A 0A 6D 79 20 - 24 62 6C 61 68 20 3D 20 OR!..my $ +blah = 00000110 22 53 63 72 65 65 6E 73 - 68 6F 74 2D 32 30 32 34 "Screensh +ot-2024 00000120 2D 30 32 2D 32 33 2D 61 - 74 2D 31 2E 30 35 2E 31 -02-23-at +-1.05.1 00000130 34 E2 80 AF 41 4D 2E 70 - 6E 67 22 3B 0A 6D 79 20 4...AM.pn +g";.my 00000140 24 68 65 78 32 20 3D 20 - 75 6E 70 61 63 6B 28 22 $hex2 = u +npack(" 00000150 48 2A 22 2C 20 24 62 6C - 61 68 29 3B 0A 73 61 79 H*", $bla +h);.say 00000160 20 24 68 65 78 32 3B 0A - 0A 73 61 79 20 24 68 65 $hex2;.. +say $he 00000170 78 20 65 71 20 24 68 65 - 78 32 20 3F 20 22 68 65 x eq $hex +2 ? "he 00000180 78 65 73 20 65 71 75 61 - 6C 22 20 3A 20 22 68 65 xes equal +" : "he 00000190 78 65 73 20 6E 6F 74 20 - 65 71 75 61 6C 22 3B 0A xes not e +qual";. 000001A0 0A 73 61 79 20 24 62 6C - 61 68 20 3D 7E 20 2F 53 .say $bla +h =~ /S 000001B0 63 72 65 65 6E 73 68 6F - 74 2D 32 30 32 34 2D 30 creenshot +-2024-0 000001C0 32 2D 32 33 2D 61 74 2D - 31 2E 30 35 2E 31 34 5C 2-23-at-1 +.05.14\ 000001D0 73 2F 3B 20 20 23 20 57 - 4F 52 4B 53 21 0A 0A 0A s/; # WO +RKS!...

    $PM = "Perl Monk's";
    $MC = "Most Clueless Friar Abbot Bishop Pontiff Deacon Curate Priest Vicar Parson";
    $nysus = $PM . ' ' . $MC;
    Click here if you love Perl Monks