in reply to Re^4: unpacking 6-bit values
in thread unpacking 6-bit values

That really helps. You can do some sort of pre-binding the variables and optimize for this situation. It got me a speed doubling. Feel free to modify to your hearts content ...

Sorry for the long posts. Here I ripped out the two slowest ones to make room for the real runner :)

use strict; use warnings; use Inline "C"; use Data::Peek; use Benchmark qw( cmpthese ); my @src = map { pack "C*" => map { int rand 256 } 0 .. 23 } 0 .. 127;# +8192; print STDERR "First line of the compressed data ...\n"; DHexDump $src[0]; for (unpack "(a3)*", $src[0]) { DHexDump $_; my $bits = unpack "B*", $_; (my $b8 = substr $bits, 0, 48) =~ s/(.{8})/$1 /g; (my $b6 = substr $bits, 0, 48) =~ s/(.{6})/$1 /g; print STDERR " $b8\n $b6\n"; } sub uu { map { map { ($_ - 32) & 63 } unpack "xC*" => $_ } split m/ *\n/ => pack "u" => shift; } # uu { my @lut; for (0 .. 0b111111) { $lut[$_ << 18] = $_; $lut[$_ << 12] = $_; $lut[$_ << 6] = $_; $lut[$_ ] = $_; } sub mlut { map { my $v = unpack "N", "\00$_\x00\x00"; ($lut[$v & 0b111111_000000_000000_000000], $lut[$v & 0b000000_111111_000000_000000], $lut[$v & 0b000000_000000_111111_000000], $v & 0b000000_000000_000000_111111); } unpack "(a3)*" => shift; } # mlut } { my $m0 = 0b111111_000000_000000_000000; my $m1 = 0b000000_111111_000000_000000; my $m2 = 0b000000_000000_111111_000000; my $m3 = 0b000000_000000_000000_111111; sub asu { map {my $b = unpack "N", "\x00$_\x00\x00"; ( ($b & $m0) >> 18, ($b & $m1) >> 12, ($b & $m2) >> 6, ($b & $m3) ) } unpack "(a3)*" => shift; } # asu } my @dst; @dst = map { uu ($_) } @src; print STDERR "$#dst E\n"; print STDERR "uu: (@dst[0..31] ...\n @dst[4073..4095])\n"; @dst = map { asu ($_) } @src; print STDERR "asu: (@dst[0..31] ...\n @dst[4073..4095])\n"; @dst = map { mlut ($_) } @src; print STDERR "mlut: (@dst[0..31] ...\n @dst[4073..4095])\n"; @dst = map { uic ($_) } @src; print STDERR "uic: (@dst[0..31] ...\n @dst[4073..4095])\n"; @dst = map { uicm ($_) } @src; print STDERR "uicm: (@dst[0..31] ...\n @dst[4073..4095])\n"; my @b = uicb_init (32); @dst = map { uicb ($_); map { $$_ } @b } @src; print STDERR "uicb: (@dst[0..31] ...\n @dst[4073..4095])\n"; cmpthese (-2, { uu => sub { uu ($_) for @src; }, asu => sub { asu ($_) for @src; }, mlut => sub { mlut ($_) for @src; }, uic => sub { uic ($_) for @src; }, uicm => sub { uicm ($_) for @src; }, uicb => sub { uicb ($_) for @src; }, }); __END__ __C__ void uic (SV *src) { int i = 0; STRLEN l; unsigned char *s = (unsigned char *)SvPV (src, l); inline_stack_vars; inline_stack_reset; while (i < l) { int n = (s[i] >> 2) & 0x3f; inline_stack_push (newSViv (n)); n = (s[i++] & 0x03) << 4; n |= (s[i] >> 4) & 0x0f; inline_stack_push (newSViv (n)); n = (s[i++] & 0x0f) << 2; n |= (s[i] >> 6) & 0x03; inline_stack_push (newSViv (n)); n = s[i++] & 0x3f; inline_stack_push (newSViv (n)); } inline_stack_done; } /* uic */ void uicm (SV *src) { int i = 0; STRLEN l; unsigned char *s = (unsigned char *)SvPV (src, l); inline_stack_vars; inline_stack_reset; while (i < l) { int n = (s[i] >> 2) & 0x3f; inline_stack_push (sv_2mortal (newSViv (n))); n = (s[i++] & 0x03) << 4; n |= (s[i] >> 4) & 0x0f; inline_stack_push (sv_2mortal (newSViv (n))); n = (s[i++] & 0x0f) << 2; n |= (s[i] >> 6) & 0x03; inline_stack_push (sv_2mortal (newSViv (n))); n = s[i++] & 0x3f; inline_stack_push (sv_2mortal (newSViv (n))); } inline_stack_done; } /* uicm */ SV **uicb_sv; void uicb_init (int n) { int i; inline_stack_vars; inline_stack_reset; uicb_sv = (SV **)calloc (sizeof (SV *), n); for (i = 0; i < n; i++) { inline_stack_push (newRV (uicb_sv[i] = newSViv (i))); } inline_stack_done; } /* uicb_init */ void uicb (SV *src) { int i = 0, idx = 0; STRLEN l; unsigned char *s = (unsigned char *)SvPV (src, l); while (i < l) { int n = (s[i] >> 2) & 0x3f; sv_setiv (uicb_sv[idx++], n); n = (s[i++] & 0x03) << 4; n |= (s[i] >> 4) & 0x0f; sv_setiv (uicb_sv[idx++], n); n = (s[i++] & 0x0f) << 2; n |= (s[i] >> 6) & 0x03; sv_setiv (uicb_sv[idx++], n); n = s[i++] & 0x3f; sv_setiv (uicb_sv[idx++], n); } } /* uicb */

Leads to

First line of the compressed data ... 0000 18 d5 ba 4c ff fc 69 d3 51 7f f3 84 b8 c5 99 27 ...L..i.Q..... +.' 0010 e8 16 a8 e0 7f c0 4b cb ......K. 0000 18 d5 ba ... 00011000 11010101 10111010 000110 001101 010110 111010 0000 4c ff fc L.. 01001100 11111111 11111100 010011 001111 111111 111100 0000 69 d3 51 i.Q 01101001 11010011 01010001 011010 011101 001101 010001 0000 7f f3 84 ... 01111111 11110011 10000100 011111 111111 001110 000100 0000 b8 c5 99 ... 10111000 11000101 10011001 101110 001100 010110 011001 0000 27 e8 16 '.. 00100111 11101000 00010110 001001 111110 100000 010110 0000 a8 e0 7f ... 10101000 11100000 01111111 101010 001110 000001 111111 0000 c0 4b cb .K. 11000000 01001011 11001011 110000 000100 101111 001011 4095 E uu: (6 13 22 58 19 15 63 60 26 29 13 17 31 63 14 4 46 12 22 25 9 62 + 32 22 42 14 1 63 48 4 47 11 ... 55 26 57 32 6 47 51 40 26 6 50 37 62 36 60 37 53 8 54 41 32 33 + 18) asu: (6 13 22 58 19 15 63 60 26 29 13 17 31 63 14 4 46 12 22 25 9 62 + 32 22 42 14 1 63 48 4 47 11 ... 55 26 57 32 6 47 51 40 26 6 50 37 62 36 60 37 53 8 54 41 32 33 + 18) mlut: (6 13 22 58 19 15 63 60 26 29 13 17 31 63 14 4 46 12 22 25 9 62 + 32 22 42 14 1 63 48 4 47 11 ... 55 26 57 32 6 47 51 40 26 6 50 37 62 36 60 37 53 8 54 41 32 33 + 18)
uic: (6 13 22 58 19 15 63 60 26 29 13 17 31 63 14 4 46 12 22 25 9 62 + 32 22 42 14 1 63 48 4 47 11 ... 55 26 57 32 6 47 51 40 26 6 50 37 62 36 60 37 53 8 54 41 32 33 + 18) uicm: (6 13 22 58 19 15 63 60 26 29 13 17 31 63 14 4 46 12 22 25 9 62 + 32 22 42 14 1 63 48 4 47 11 ... 55 26 57 32 6 47 51 40 26 6 50 37 62 36 60 37 53 8 54 41 32 33 + 18) uicb: (6 13 22 58 19 15 63 60 26 29 13 17 31 63 14 4 46 12 22 25 9 62 + 32 22 42 14 1 63 48 4 47 11 ... 55 26 57 32 6 47 51 40 26 6 50 37 62 36 60 37 53 8 54 41 32 33 + 18) Rate mlut uu asu uicm uic uicb mlut 917/s -- -3% -5% -84% -87% -94% uu 949/s 3% -- -2% -83% -86% -94% asu 964/s 5% 2% -- -83% -86% -94% uicm 5747/s 527% 506% 496% -- -18% -62% uic 6998/s 663% 638% 626% 22% -- -54% uicb 15244/s 1562% 1507% 1482% 165% 118% --

Enjoy, Have FUN! H.Merijn

Replies are listed 'Best First'.
Re^6: unpacking 6-bit values
by BrowserUk (Patriarch) on Dec 12, 2010 at 14:59 UTC

    That's a neat optimisation, but it gets its gains by pushing some of the required processing out of the benchmark.

    What I mean by that is that the sets of 32 numbers are manipulated in pairs (of sets).

    So, using uicb() I would have to expand one set to the buffer; copy them somewhere else; then expand the second set; before I could then do the manipulations. Ie, the copying would still need to be done, but it is no longer being measured.


    Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
    "Science is about questioning the status quo. Questioning authority".
    In the absence of evidence, opinion is indistinguishable from prejudice.

      My goal here was just to check the influence of pre-bound variables, a-la bind_columns () in DBI and Text::CSV_XS. It wouldn't be too hard to rewrite the init routine to pass it any for of references and make uicb () (Unpack Inline C Bind), store the results in exactly the variables you want and thus bypass all the unwanted copying and the mortalizing cost.

      The fact that you are dealing with a very limited range makes this very feasible.


      Enjoy, Have FUN! H.Merijn