pdl> p $im [ [0 0 1 1 0 0] [2 0 0 1 0 0] [2 2 0 0 0 3] [0 0 0 0 3 0] ] pdl> p $im-> flat-> qsort # Won't be used. Just a demo. [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 2 2 2 3 3] pdl> p $idx = $im-> flat-> qsorti [15 16 14 10 11 21 23 20 18 19 4 5 0 1 7 8 3 2 9 13 6 12 22 17] pdl> p +( $runs ) = $im-> flat-> qsort -> rle [16 3 3 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] pdl> ( $x, $y ) = $im-> one2nd( $idx ) pdl> p $x [3 4 2 4 5 3 5 2 0 1 4 5 0 1 1 2 3 2 3 1 0 0 4 5] pdl> p $y [2 2 2 1 1 3 3 3 3 3 0 0 0 0 1 1 0 0 1 2 1 2 3 2] pdl> p $idx # NB. Destroyed! [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] pdl> #### use strict; use warnings; use feature 'say'; use PDL; use PDL::NiceSlice; use PDL::IO::Image; use PDL::Image2D; use PDL::ImageND; use Encode 'decode'; use Benchmark 'cmpthese'; use Data::Dump; my $fn = 'test.png'; my $img = PDL::IO::Image-> new_from_file( $fn ); my $pdl = $img-> pixels_to_pdl-> short; my $sgm = cc8compt( $pdl == 0 )-> short; # "Short" in line above is required for PDL that ships # with Strawberry 5.26. Otherwise $sgm gets long. Wasn't # so in 5.24. Not nice. # $sgm ('segmented image') will be a test subject. cmpthese( 5, { pdl_naive => \&get_bboxes_pdl_naive, regex => \&get_bboxes_regex, pdl => \&get_bboxes_pdl, }); sub get_bboxes_pdl_naive { return [ map { my $obj = whichND( $sgm == $_ ); [ [ $obj-> slice( 0 )-> minmax ], [ $obj-> slice( 1 )-> minmax ], ] } 1 .. $sgm-> max ] } sub get_bboxes_regex { my ( $w, $h ) = $sgm-> dims; my $str = ${ $sgm-> get_dataref }; my @b = map { [ [ $w, 0 ], [ $h, 0 ] ] } 0 .. $sgm-> max; $w *= 2; for my $y ( 0 .. $h - 1 ) { my $s = decode 'UTF16LE', substr( $str, $y * $w, $w ); while( $s =~ m/[^\0]+/g ) { my $c = ord( $& ); $b[ $c ][0][0] = $-[0] if $-[0] < $b[ $c ][0][0]; $b[ $c ][0][1] = $+[0] - 1 if $+[0] - 1 > $b[ $c ][0][1]; $b[ $c ][1][0] = $y if $y < $b[ $c ][1][0]; $b[ $c ][1][1] = $y if $y > $b[ $c ][1][1]; } } shift @b; return \@b } sub get_bboxes_pdl { my $idx = $sgm-> flat-> qsorti; my ( $runs ) = $sgm-> flat-> index( $idx )-> rle; $runs = $runs-> where( $runs != 0 )-> cumusumover; my ( $x, $y ) = $sgm-> one2nd( $idx ); return [ map { my $start = $runs( $_ - 1 ); my $stop = $runs( $_ ) - 1; [[ $x( $start:$stop )-> minmax ], [ $y( $start:$stop )-> minmax ]] } 1 .. $runs-> nelem - 1 ] } #### s/iter pdl_naive regex pdl pdl_naive 5.53 -- -73% -81% regex 1.51 266% -- -29% pdl 1.07 417% 41% -- #### use strict; use warnings; use PDL; use Benchmark 'cmpthese'; my $x = (( random 10_000_000 ) * 2**16 )-> ushort; my $bads = $x-> setbadif( $x > 2**13 ); # 88% is dull 'background' my $zeroes = $bads-> setbadtoval( 0 ); cmpthese( 5, { zeroes => sub { $zeroes-> qsort }, bads => sub { $bads-> qsort }, }); #### Rate zeroes bads zeroes 2.81/s -- -67% bads 8.42/s 200% -- #### sub get_bboxes_pdl_bad { my $flat = $sgm-> flat-> setvaltobad( 0 ); my $idx = $flat-> qsorti; $idx = $idx-> where( $flat-> index( $idx )-> isgood ); my ( $runs ) = $flat-> index( $idx )-> rle; $runs = $runs-> where( $runs != 0 )-> cumusumover - 1; my ( $x, $y ) = $sgm-> one2nd( $idx ); my $stop = -1; return [ map { my $start = $stop + 1; $stop = $_; [[ $x( $start:$stop )-> minmax ], [ $y( $start:$stop )-> minmax ]] } $runs-> list ] } #### s/iter regex pdl pdl_bad regex 1.51 -- -30% -87% pdl 1.06 42% -- -81% pdl_bad 0.203 641% 421% -- #### my $idx = $flat-> qsorti-> slice([ 0, $flat-> ngood - 1 ]);