in reply to Re: Extracting /regex/mg in a while loop
in thread Extracting /regex/mg in a while loop

Thanks (++) for putting in the effort to properly encapsulate this in benchmark code.

I updated the code somewhat to allow it to run on my slightly older (5.34) Perl.
I also added an additional method (kco3) as my preferred way to write your version of the code.

#!/usr/bin/env perl #use v5.38; use feature qw|say|; use autodie; use constant { SEP1 => "\n" . '=' x 40, SEP2 => '-' x 40, }; use Benchmark 'cmpthese'; use File::Spec (); my ($null_fh, $base_str); BEGIN { open $null_fh, '>', File::Spec::->devnull(); $base_str = <<~'EOT'; AdataA BdataB CdataC EOT } my @bench_names = qw{net1 net2 hau1 ner1 ner2 kco1 kco2 kco3}; my (%bench_for, %benches_to_cmp, $mult_str); say 'Test', SEP1; for my $bench (@bench_names) { no strict 'refs'; ($bench_for{$bench} = \&{$bench})->(\$base_str); use strict 'refs'; say SEP2; $benches_to_cmp{$bench} = sub { $bench_for{$bench}->(\$mult_str, $null_fh); }; } for my $mult (1, 10, 100, 1_000, 10_000) { $mult_str = $base_str x $mult; say 'Bench (Length: ', length($mult_str), ')', SEP1; cmpthese 0 => \%benches_to_cmp; say SEP2; } sub net1 ($rstr, $out_fh = *STDOUT) { my $str = $$rstr; while ($str =~ /(^A|^B)(.+)$/mg){ $out_fh->say("$1 net1 $2"); } return; } sub net2 ($rstr, $out_fh = *STDOUT) { my $str = $$rstr; open my $fh, '<', \$str; while (<$fh>) { my ($name, $data) = m/(^A|^B)(.+)$/ or next; $out_fh->say("$name net2 $data"); } return; } sub hau1 ($rstr, $out_fh = *STDOUT) { my $str = $$rstr; while ($str =~ m{^ (?<name>A|B) (?<data>.+) $}xmg) { $out_fh->say("$+{name} hau1 $+{data}"); } return; } sub ner1 ($rstr, $out_fh = *STDOUT) { my $str = $$rstr; $out_fh->say("$1 ner1 $2") while $str =~ /^([AB])(.+)$/mg; return; } sub ner2 ($rstr, $out_fh = *STDOUT) { my $str = $$rstr; $str =~ /^([AB])(.++)$(?{ $out_fh->say("$1 ner2 $2") })./m; return; } sub kco1 ($rstr, $out_fh = *STDOUT) { my $str = $$rstr; my $re = qr{(?m:^([AB])(.+)$)}; $out_fh->say("$1 kco1 $2") while $str =~ /$re/g; return; } sub kco3 ($rstr, $out_fh = *STDOUT) { my $str = $$rstr; my $re = qr{(?m:^(A|B)(.+)$)}; while ($str =~ /$re/go){ $out_fh->say("$1 kco2 $2") ; } return; } sub kco2 ($rstr, $out_fh = *STDOUT) { my $str = $$rstr; my $re = qr{(?m:^([AB])(.+)$)}; $out_fh->say("$1 kco2 $2") while $str =~ /$re/go;
My results are different than yours:
$ perl pm-11154897.pl Test ======================================== ---------------------------------------- ---------------------------------------- ---------------------------------------- ---------------------------------------- ---------------------------------------- ---------------------------------------- ---------------------------------------- ---------------------------------------- Bench (Length: 21) ======================================== Rate net2 kco1 kco2 kco3 net1 ner1 hau1 ner2 net2 260654/s -- -90% -92% -92% -97% -97% -97% -97% kco1 2536874/s 873% -- -20% -21% -68% -69% -70% -71% kco2 3178301/s 1119% 25% -- -1% -60% -61% -62% -64% kco3 3207697/s 1131% 26% 1% -- -59% -61% -62% -63% net1 7889843/s 2927% 211% 148% 146% -- -4% -5% -10% ner1 8226198/s 3056% 224% 159% 156% 4% -- -1% -6% hau1 8333730/s 3097% 229% 162% 160% 6% 1% -- -5% ner2 8767731/s 3264% 246% 176% 173% 11% 7% 5% -- ---------------------------------------- Bench (Length: 210) ======================================== Rate net2 kco1 kco3 kco2 net1 hau1 ner1 ner2 net2 260526/s -- -90% -92% -92% -97% -97% -97% -97% kco1 2492787/s 857% -- -20% -20% -68% -69% -69% -71% kco3 3114015/s 1095% 25% -- -0% -60% -61% -61% -64% kco2 3123870/s 1099% 25% 0% -- -60% -61% -61% -64% net1 7758408/s 2878% 211% 149% 148% -- -3% -3% -10% hau1 7965224/s 2957% 220% 156% 155% 3% -- -0% -7% ner1 7965224/s 2957% 220% 156% 155% 3% 0% -- -7% ner2 8601600/s 3202% 245% 176% 175% 11% 8% 8% -- ---------------------------------------- Bench (Length: 2100) ======================================== Rate net2 kco1 kco2 kco3 hau1 net1 ner1 ner2 net2 257948/s -- -90% -92% -92% -97% -97% -97% -97% kco1 2501079/s 870% -- -19% -20% -67% -68% -68% -71% kco2 3097753/s 1101% 24% -- -1% -59% -60% -60% -64% kco3 3133787/s 1115% 25% 1% -- -58% -60% -60% -64% hau1 7510752/s 2812% 200% 142% 140% -- -3% -4% -13% net1 7771227/s 2913% 211% 151% 148% 3% -- -0% -10% ner1 7794563/s 2922% 212% 152% 149% 4% 0% -- -10% ner2 8628990/s 3245% 245% 179% 175% 15% 11% 11% -- ---------------------------------------- Bench (Length: 21000) ======================================== Rate net2 kco1 kco2 kco3 net1 ner1 hau1 ner2 net2 231838/s -- -91% -93% -93% -97% -97% -97% -97% kco1 2478677/s 969% -- -20% -22% -67% -68% -69% -70% kco2 3094492/s 1235% 25% -- -3% -59% -61% -62% -63% kco3 3178304/s 1271% 28% 3% -- -58% -59% -61% -62% net1 7593413/s 3175% 206% 145% 139% -- -3% -6% -9% ner1 7840377/s 3282% 216% 153% 147% 3% -- -3% -6% hau1 8068001/s 3380% 225% 161% 154% 6% 3% -- -4% ner2 8382339/s 3516% 238% 171% 164% 10% 7% 4% -- ---------------------------------------- Bench (Length: 210000) ======================================== Rate net2 kco1 kco2 kco3 hau1 ner1 net1 ner2 net2 250558/s -- -85% -89% -92% -96% -97% -97% -97% kco1 1640135/s 555% -- -28% -46% -75% -78% -78% -81% kco2 2269326/s 806% 38% -- -25% -66% -70% -70% -73% kco3 3032215/s 1110% 85% 34% -- -54% -59% -60% -65% hau1 6657718/s 2557% 306% 193% 120% -- -11% -13% -22% ner1 7484171/s 2887% 356% 230% 147% 12% -- -2% -13% net1 7612303/s 2938% 364% 235% 151% 14% 2% -- -11% ner2 8558296/s 3316% 422% 277% 182% 29% 14% 12% -- ----------------------------------------

                "These opinions are my own, though for a small fee they be yours too."

Replies are listed 'Best First'.
Re^3: Extracting /regex/mg in a while loop
by kcott (Archbishop) on Oct 11, 2023 at 09:36 UTC
    "My results are different than yours:"

    Well, that's hardly surprising. Surely the fact that none of the tests output anything raised a red flag.

    I put the code you posted in op_bench1.pl and ran it:

    $ ./op_bench1.pl Missing right curly or square bracket at ./op_bench1.pl line 121, at e +nd of line syntax error at ./op_bench1.pl line 121, at EOF Execution of ./op_bench1.pl aborted due to compilation errors.

    I copied op_bench1.pl to op_bench1_mod1.pl; added

    return; }

    to the end of the file and ran it:

    $ ./op_bench1_mod1.pl Test ======================================== ---------------------------------------- ---------------------------------------- ---------------------------------------- ---------------------------------------- ---------------------------------------- ---------------------------------------- ---------------------------------------- ---------------------------------------- Bench (Length: 21) ======================================== Rate net2 kco1 kco3 kco2 net1 hau1 ner1 ner2 net2 98212/s -- -92% -94% -94% -98% -98% -98% -98% kco1 1241107/s 1164% -- -28% -29% -78% -78% -79% -80% kco3 1727274/s 1659% 39% -- -1% -69% -70% -70% -72% kco2 1751536/s 1683% 41% 1% -- -69% -69% -70% -71% net1 5641441/s 5644% 355% 227% 222% -- -0% -3% -7% hau1 5668650/s 5672% 357% 228% 224% 0% -- -3% -7% ner1 5845326/s 5852% 371% 238% 234% 4% 3% -- -4% ner2 6072829/s 6083% 389% 252% 247% 8% 7% 4% -- ---------------------------------------- Bench (Length: 210) ========================================

    I hit Ctrl-C at that point.

    I copied op_bench1_mod1.pl to op_bench1_mod2.pl. I added

    use strict; use warnings;

    after

    #use v5.38;

    and ran it:

    $ ./op_bench1_mod2.pl Illegal character in prototype for main::net1 : $rstr, $out_fh = *STDO +UT at ./op_bench1_mod2.pl line 48. Illegal character after '_' in prototype for main::net1 : $rstr, $out_ +fh = *STDOUT at ./op_bench1_mod2.pl line 48. Illegal character in prototype for main::net2 : $rstr, $out_fh = *STDO +UT at ./op_bench1_mod2.pl line 58. Illegal character after '_' in prototype for main::net2 : $rstr, $out_ +fh = *STDOUT at ./op_bench1_mod2.pl line 58. Illegal character in prototype for main::hau1 : $rstr, $out_fh = *STDO +UT at ./op_bench1_mod2.pl line 71. Illegal character after '_' in prototype for main::hau1 : $rstr, $out_ +fh = *STDOUT at ./op_bench1_mod2.pl line 71. Illegal character in prototype for main::ner1 : $rstr, $out_fh = *STDO +UT at ./op_bench1_mod2.pl line 81. Illegal character after '_' in prototype for main::ner1 : $rstr, $out_ +fh = *STDOUT at ./op_bench1_mod2.pl line 81. Illegal character in prototype for main::ner2 : $rstr, $out_fh = *STDO +UT at ./op_bench1_mod2.pl line 89. Illegal character after '_' in prototype for main::ner2 : $rstr, $out_ +fh = *STDOUT at ./op_bench1_mod2.pl line 89. Global symbol "$rstr" requires explicit package name (did you forget t +o declare "my $rstr"?) at ./op_bench1_mod2.pl line 49. Global symbol "$out_fh" requires explicit package name (did you forget + to declare "my $out_fh"?) at ./op_bench1_mod2.pl line 52. Global symbol "$rstr" requires explicit package name (did you forget t +o declare "my $rstr"?) at ./op_bench1_mod2.pl line 59. Global symbol "$out_fh" requires explicit package name (did you forget + to declare "my $out_fh"?) at ./op_bench1_mod2.pl line 65. Global symbol "$rstr" requires explicit package name (did you forget t +o declare "my $rstr"?) at ./op_bench1_mod2.pl line 72. Global symbol "$out_fh" requires explicit package name (did you forget + to declare "my $out_fh"?) at ./op_bench1_mod2.pl line 75. Global symbol "$rstr" requires explicit package name (did you forget t +o declare "my $rstr"?) at ./op_bench1_mod2.pl line 82. Global symbol "$out_fh" requires explicit package name (did you forget + to declare "my $out_fh"?) at ./op_bench1_mod2.pl line 84. Global symbol "$rstr" requires explicit package name (did you forget t +o declare "my $rstr"?) at ./op_bench1_mod2.pl line 90. Global symbol "$out_fh" requires explicit package name (did you forget + to declare "my $out_fh"?) at ./op_bench1_mod2.pl line 92. ./op_bench1_mod2.pl has too many errors.

    I copied op_bench1_mod2.pl to op_bench1_mod3.pl. Then changed

    #use v5.38; use strict; use warnings; use feature qw|say|;

    to

    use v5.34; use warnings; use experimental 'signatures';

    The names of the subroutines identified the author (first three characters of username in lower-case) and the order of suggested solutions (a single digit). I changed

    my @bench_names = qw{net1 net2 hau1 ner1 ner2 kco1 kco2 kco3};

    to

    my @bench_names = qw{net1 net2 hau1 ner1 ner2 kco1 kco2 net3};

    You'd placed kco3() between kco1() and kco2(): I initially had difficulty locating it as I expected it to be added to the end. You also had it erroneously reporting that it was kco2:

    sub kco3 ($rstr, $out_fh = *STDOUT) { my $str = $$rstr; my $re = qr{(?m:^(A|B)(.+)$)}; while ($str =~ /$re/go){ $out_fh->say("$1 kco2 $2") ; } return; }

    I moved it to the end and fixed those problems. The last code in the script is now:

    sub net3 ($rstr, $out_fh = *STDOUT) { my $str = $$rstr; my $re = qr{(?m:^(A|B)(.+)$)}; while ($str =~ /$re/go){ $out_fh->say("$1 net3 $2"); } return; }

    I then ran this script:

    $ ./op_bench1_mod3.pl Test ======================================== A net1 dataA B net1 dataB ---------------------------------------- A net2 dataA B net2 dataB ---------------------------------------- A hau1 dataA B hau1 dataB ---------------------------------------- A ner1 dataA B ner1 dataB ---------------------------------------- A ner2 dataA B ner2 dataB ---------------------------------------- A kco1 dataA B kco1 dataB ---------------------------------------- A kco2 dataA B kco2 dataB ---------------------------------------- A net3 dataA B net3 dataB ---------------------------------------- Bench (Length: 21) ======================================== Rate net2 hau1 kco1 net3 kco2 net1 ner2 ner1 net2 61894/s -- -64% -70% -75% -75% -75% -75% -78% hau1 170557/s 176% -- -17% -30% -31% -31% -32% -39% kco1 205902/s 233% 21% -- -15% -16% -17% -18% -27% net3 243209/s 293% 43% 18% -- -1% -2% -3% -14% kco2 246357/s 298% 44% 20% 1% -- -0% -2% -12% net1 247587/s 300% 45% 20% 2% 0% -- -1% -12% ner2 250431/s 305% 47% 22% 3% 2% 1% -- -11% ner1 281208/s 354% 65% 37% 16% 14% 14% 12% -- ---------------------------------------- Bench (Length: 210) ======================================== Rate hau1 net2 kco1 net1 ner2 net3 ner1 kco2 hau1 18776/s -- -1% -29% -33% -35% -39% -39% -39% net2 18871/s 1% -- -28% -33% -35% -38% -39% -39% kco1 26290/s 40% 39% -- -6% -9% -14% -15% -15% net1 28066/s 49% 49% 7% -- -3% -8% -9% -9% ner2 28885/s 54% 53% 10% 3% -- -6% -7% -7% net3 30649/s 63% 62% 17% 9% 6% -- -1% -1% ner1 30936/s 65% 64% 18% 10% 7% 1% -- -0% kco2 30946/s 65% 64% 18% 10% 7% 1% 0% -- ---------------------------------------- Bench (Length: 2100) ======================================== Rate hau1 net2 kco1 net1 ner2 ner1 net3 kco2 hau1 1900/s -- -21% -30% -33% -36% -39% -40% -41% net2 2397/s 26% -- -11% -16% -19% -23% -24% -25% kco1 2703/s 42% 13% -- -5% -9% -13% -14% -15% net1 2837/s 49% 18% 5% -- -5% -9% -10% -11% ner2 2973/s 56% 24% 10% 5% -- -5% -5% -7% ner1 3124/s 64% 30% 16% 10% 5% -- -1% -2% net3 3143/s 65% 31% 16% 11% 6% 1% -- -2% kco2 3199/s 68% 33% 18% 13% 8% 2% 2% -- ---------------------------------------- Bench (Length: 21000) ======================================== Rate hau1 net2 kco1 net1 ner2 net3 ner1 kco2 hau1 190/s -- -23% -29% -33% -37% -39% -40% -40% net2 247/s 30% -- -8% -13% -18% -21% -21% -22% kco1 269/s 42% 9% -- -5% -11% -14% -14% -15% net1 285/s 50% 15% 6% -- -6% -9% -9% -10% ner2 302/s 59% 23% 12% 6% -- -4% -4% -5% net3 313/s 65% 27% 16% 10% 4% -- -0% -1% ner1 314/s 65% 27% 17% 10% 4% 0% -- -1% kco2 317/s 67% 29% 18% 12% 5% 1% 1% -- ---------------------------------------- Bench (Length: 210000) ======================================== Rate hau1 net2 kco1 net1 ner2 net3 ner1 kco2 hau1 19.1/s -- -24% -30% -33% -37% -39% -39% -40% net2 25.1/s 31% -- -8% -12% -17% -20% -20% -22% kco1 27.2/s 42% 8% -- -4% -10% -13% -13% -15% net1 28.5/s 49% 13% 5% -- -5% -9% -9% -11% ner2 30.1/s 58% 20% 11% 6% -- -4% -4% -6% net3 31.4/s 64% 25% 15% 10% 4% -- -0% -2% ner1 31.4/s 64% 25% 15% 10% 4% 0% -- -2% kco2 32.0/s 68% 28% 18% 12% 6% 2% 2% -- ----------------------------------------

    Now the results more closely resemble mine.

    Here's op_bench1_mod3.pl in full:

    #!/usr/bin/env perl use v5.34; use warnings; use experimental 'signatures'; use autodie; use constant { SEP1 => "\n" . '=' x 40, SEP2 => '-' x 40, }; use Benchmark 'cmpthese'; use File::Spec (); my ($null_fh, $base_str); BEGIN { open $null_fh, '>', File::Spec::->devnull(); $base_str = <<~'EOT'; AdataA BdataB CdataC EOT } my @bench_names = qw{net1 net2 hau1 ner1 ner2 kco1 kco2 net3}; my (%bench_for, %benches_to_cmp, $mult_str); say 'Test', SEP1; for my $bench (@bench_names) { no strict 'refs'; ($bench_for{$bench} = \&{$bench})->(\$base_str); use strict 'refs'; say SEP2; $benches_to_cmp{$bench} = sub { $bench_for{$bench}->(\$mult_str, $null_fh); }; } for my $mult (1, 10, 100, 1_000, 10_000) { $mult_str = $base_str x $mult; say 'Bench (Length: ', length($mult_str), ')', SEP1; cmpthese 0 => \%benches_to_cmp; say SEP2; } sub net1 ($rstr, $out_fh = *STDOUT) { my $str = $$rstr; while ($str =~ /(^A|^B)(.+)$/mg) { $out_fh->say("$1 net1 $2"); } return; } sub net2 ($rstr, $out_fh = *STDOUT) { my $str = $$rstr; open my $fh, '<', \$str; while (<$fh>) { my ($name, $data) = m/(^A|^B)(.+)$/ or next; $out_fh->say("$name net2 $data"); } return; } sub hau1 ($rstr, $out_fh = *STDOUT) { my $str = $$rstr; while ($str =~ m{^ (?<name>A|B) (?<data>.+) $}xmg) { $out_fh->say("$+{name} hau1 $+{data}"); } return; } sub ner1 ($rstr, $out_fh = *STDOUT) { my $str = $$rstr; $out_fh->say("$1 ner1 $2") while $str =~ /^([AB])(.+)$/mg; return; } sub ner2 ($rstr, $out_fh = *STDOUT) { my $str = $$rstr; $str =~ /^([AB])(.++)$(?{ $out_fh->say("$1 ner2 $2") })./m; return; } sub kco1 ($rstr, $out_fh = *STDOUT) { my $str = $$rstr; my $re = qr{(?m:^([AB])(.+)$)}; $out_fh->say("$1 kco1 $2") while $str =~ /$re/g; return; } sub kco2 ($rstr, $out_fh = *STDOUT) { my $str = $$rstr; my $re = qr{(?m:^([AB])(.+)$)}; $out_fh->say("$1 kco2 $2") while $str =~ /$re/go; return; } sub net3 ($rstr, $out_fh = *STDOUT) { my $str = $$rstr; my $re = qr{(?m:^(A|B)(.+)$)}; while ($str =~ /$re/go){ $out_fh->say("$1 net3 $2"); } return; }

    — Ken