Re^5: unpacking 6-bit values

That really helps. You can do some sort of pre-binding the variables and optimize for this situation. It got me a speed doubling. Feel free to modify to your hearts content ...

Sorry for the long posts. Here I ripped out the two slowest ones to make room for the real runner :)

use strict;
use warnings;

use Inline "C";
use Data::Peek;
use Benchmark qw( cmpthese );

my @src = map { pack "C*" => map { int rand 256 } 0 .. 23 } 0 .. 127;#
+8192;

print STDERR "First line of the compressed data ...\n";
DHexDump $src[0];

for (unpack "(a3)*", $src[0]) {
    DHexDump $_;
    my $bits = unpack "B*", $_;
    (my $b8 = substr $bits, 0, 48) =~ s/(.{8})/$1 /g;
    (my $b6 = substr $bits, 0, 48) =~ s/(.{6})/$1 /g;
    print STDERR "      $b8\n      $b6\n";
    }

sub uu
{
    map { map { ($_ - 32) & 63 } unpack "xC*" => $_ }
        split m/ *\n/ => pack "u" => shift;
    } # uu

{   my @lut;
    for (0 .. 0b111111) {
        $lut[$_ << 18] = $_;
        $lut[$_ << 12] = $_;
        $lut[$_ <<  6] = $_;
        $lut[$_      ] = $_;
        }

    sub mlut
    {
        map { my $v = unpack "N", "\00$_\x00\x00";
            ($lut[$v & 0b111111_000000_000000_000000],
             $lut[$v & 0b000000_111111_000000_000000],
             $lut[$v & 0b000000_000000_111111_000000],
                  $v & 0b000000_000000_000000_111111);
            } unpack "(a3)*" => shift;
        } # mlut
    }

{   my $m0 = 0b111111_000000_000000_000000;
    my $m1 = 0b000000_111111_000000_000000;
    my $m2 = 0b000000_000000_111111_000000;
    my $m3 = 0b000000_000000_000000_111111;
    sub asu
    {
        map {my $b = unpack "N", "\x00$_\x00\x00"; (
            ($b & $m0) >> 18,
            ($b & $m1) >> 12,
            ($b & $m2) >>  6,
            ($b & $m3)
            ) } unpack "(a3)*" => shift;
        } # asu
    }

my @dst;

@dst = map { uu ($_) } @src;
print STDERR "$#dst E\n";
print STDERR "uu:    (@dst[0..31] ...\n        @dst[4073..4095])\n";

@dst = map { asu ($_) } @src;
print STDERR "asu:   (@dst[0..31] ...\n        @dst[4073..4095])\n";
@dst = map { mlut ($_) } @src;
print STDERR "mlut:  (@dst[0..31] ...\n        @dst[4073..4095])\n";

@dst = map { uic ($_) } @src;
print STDERR "uic:   (@dst[0..31] ...\n        @dst[4073..4095])\n";
@dst = map { uicm ($_) } @src;
print STDERR "uicm:  (@dst[0..31] ...\n        @dst[4073..4095])\n";

my @b = uicb_init (32);
@dst = map { uicb ($_); map { $$_ } @b } @src;
print STDERR "uicb:  (@dst[0..31] ...\n        @dst[4073..4095])\n";

cmpthese (-2, {
    uu    => sub { uu    ($_) for @src; },
    asu   => sub { asu   ($_) for @src; },
    mlut  => sub { mlut  ($_) for @src; },
    uic   => sub { uic   ($_) for @src; },
    uicm  => sub { uicm  ($_) for @src; },
    uicb  => sub { uicb  ($_) for @src; },
    });

__END__
__C__
void uic (SV *src)
{
    int i = 0;
    STRLEN l;
    unsigned char *s = (unsigned char *)SvPV (src, l);

    inline_stack_vars;
    inline_stack_reset;

    while (i < l) {
        int n = (s[i] >> 2) & 0x3f;
        inline_stack_push (newSViv (n));
        n = (s[i++] & 0x03) << 4;
        n |= (s[i] >> 4) & 0x0f;
        inline_stack_push (newSViv (n));
        n = (s[i++] & 0x0f) << 2;
        n |= (s[i] >> 6) & 0x03;
        inline_stack_push (newSViv (n));
        n = s[i++] & 0x3f;
        inline_stack_push (newSViv (n));
        }

    inline_stack_done;
    } /* uic */

void uicm (SV *src)
{
    int i = 0;
    STRLEN l;
    unsigned char *s = (unsigned char *)SvPV (src, l);

    inline_stack_vars;
    inline_stack_reset;

    while (i < l) {
        int n = (s[i] >> 2) & 0x3f;
        inline_stack_push (sv_2mortal (newSViv (n)));
        n = (s[i++] & 0x03) << 4;
        n |= (s[i] >> 4) & 0x0f;
        inline_stack_push (sv_2mortal (newSViv (n)));
        n = (s[i++] & 0x0f) << 2;
        n |= (s[i] >> 6) & 0x03;
        inline_stack_push (sv_2mortal (newSViv (n)));
        n = s[i++] & 0x3f;
        inline_stack_push (sv_2mortal (newSViv (n)));
        }

    inline_stack_done;
    } /* uicm */

SV **uicb_sv;

void uicb_init (int n)
{
    int i;
    inline_stack_vars;
    inline_stack_reset;

    uicb_sv = (SV **)calloc (sizeof (SV *), n);
    for (i = 0; i < n; i++) {
        inline_stack_push (newRV (uicb_sv[i] = newSViv (i)));
        }
    inline_stack_done;
    } /* uicb_init */

void uicb (SV *src)
{
    int i = 0, idx = 0;
    STRLEN l;
    unsigned char *s = (unsigned char *)SvPV (src, l);

    while (i < l) {
        int n = (s[i] >> 2) & 0x3f;
        sv_setiv (uicb_sv[idx++], n);
        n = (s[i++] & 0x03) << 4;
        n |= (s[i] >> 4) & 0x0f;
        sv_setiv (uicb_sv[idx++], n);
        n = (s[i++] & 0x0f) << 2;
        n |= (s[i] >> 6) & 0x03;
        sv_setiv (uicb_sv[idx++], n);
        n = s[i++] & 0x3f;
        sv_setiv (uicb_sv[idx++], n);
        }
    } /* uicb */
[download]

Leads to

First line of the compressed data ...
0000  18 d5 ba 4c ff fc 69 d3  51 7f f3 84 b8 c5 99 27  ...L..i.Q.....
+.'
0010  e8 16 a8 e0 7f c0 4b cb                           ......K.
0000  18 d5 ba                                          ...
      00011000 11010101 10111010
      000110 001101 010110 111010
0000  4c ff fc                                          L..
      01001100 11111111 11111100
      010011 001111 111111 111100
0000  69 d3 51                                          i.Q
      01101001 11010011 01010001
      011010 011101 001101 010001
0000  7f f3 84                                          ...
      01111111 11110011 10000100
      011111 111111 001110 000100
0000  b8 c5 99                                          ...
      10111000 11000101 10011001
      101110 001100 010110 011001
0000  27 e8 16                                          '..
      00100111 11101000 00010110
      001001 111110 100000 010110
0000  a8 e0 7f                                          ...
      10101000 11100000 01111111
      101010 001110 000001 111111
0000  c0 4b cb                                          .K.
      11000000 01001011 11001011
      110000 000100 101111 001011
4095 E
uu:    (6 13 22 58 19 15 63 60 26 29 13 17 31 63 14 4 46 12 22 25 9 62
+ 32 22 42 14 1 63 48 4 47 11 ...
        55 26 57 32 6 47 51 40 26 6 50 37 62 36 60 37 53 8 54 41 32 33
+ 18)
asu:   (6 13 22 58 19 15 63 60 26 29 13 17 31 63 14 4 46 12 22 25 9 62
+ 32 22 42 14 1 63 48 4 47 11 ...
        55 26 57 32 6 47 51 40 26 6 50 37 62 36 60 37 53 8 54 41 32 33
+ 18)
mlut:  (6 13 22 58 19 15 63 60 26 29 13 17 31 63 14 4 46 12 22 25 9 62
+ 32 22 42 14 1 63 48 4 47 11 ...
        55 26 57 32 6 47 51 40 26 6 50 37 62 36 60 37 53 8 54 41 32 33
+ 18)
[download]

uic:   (6 13 22 58 19 15 63 60 26 29 13 17 31 63 14 4 46 12 22 25 9 62
+ 32 22 42 14 1 63 48 4 47 11 ...
        55 26 57 32 6 47 51 40 26 6 50 37 62 36 60 37 53 8 54 41 32 33
+ 18)
uicm:  (6 13 22 58 19 15 63 60 26 29 13 17 31 63 14 4 46 12 22 25 9 62
+ 32 22 42 14 1 63 48 4 47 11 ...
        55 26 57 32 6 47 51 40 26 6 50 37 62 36 60 37 53 8 54 41 32 33
+ 18)
uicb:  (6 13 22 58 19 15 63 60 26 29 13 17 31 63 14 4 46 12 22 25 9 62
+ 32 22 42 14 1 63 48 4 47 11 ...
        55 26 57 32 6 47 51 40 26 6 50 37 62 36 60 37 53 8 54 41 32 33
+ 18)
        Rate  mlut    uu   asu  uicm   uic  uicb
mlut   917/s    --   -3%   -5%  -84%  -87%  -94%
uu     949/s    3%    --   -2%  -83%  -86%  -94%
asu    964/s    5%    2%    --  -83%  -86%  -94%
uicm  5747/s  527%  506%  496%    --  -18%  -62%
uic   6998/s  663%  638%  626%   22%    --  -54%
uicb 15244/s 1562% 1507% 1482%  165%  118%    --
[download]

Enjoy, Have FUN! H.Merijn

Comment on Re^5: unpacking 6-bit values Select or Download Code

Replies are listed 'Best First'.
Re^6: unpacking 6-bit values by BrowserUk (Patriarch) on Dec 12, 2010 at 14:59 UTC
That's a neat optimisation, but it gets its gains by pushing some of the required processing out of the benchmark. What I mean by that is that the sets of 32 numbers are manipulated in pairs (of sets). So, using uicb() I would have to expand one set to the buffer; copy them somewhere else; then expand the second set; before I could then do the manipulations. Ie, the copying would still need to be done, but it is no longer being measured. Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error. "Science is about questioning the status quo. Questioning authority". In the absence of evidence, opinion is indistinguishable from prejudice.	[reply]
Re^7: unpacking 6-bit values by Tux (Canon) on Dec 12, 2010 at 20:46 UTC
My goal here was just to check the influence of pre-bound variables, a-la `bind_columns ()` in DBI and Text::CSV_XS. It wouldn't be too hard to rewrite the init routine to pass it any for of references and make `uicb ()` (Unpack Inline C Bind), store the results in exactly the variables you want and thus bypass all the unwanted copying and the mortalizing cost. The fact that you are dealing with a very limited range makes this very feasible. Enjoy, Have FUN! H.Merijn	[reply] [d/l] [select]