in reply to Re^4: unpacking 6-bit values
in thread unpacking 6-bit values
That really helps. You can do some sort of pre-binding the variables and optimize for this situation. It got me a speed doubling. Feel free to modify to your hearts content ...
Sorry for the long posts. Here I ripped out the two slowest ones to make room for the real runner :)
use strict; use warnings; use Inline "C"; use Data::Peek; use Benchmark qw( cmpthese ); my @src = map { pack "C*" => map { int rand 256 } 0 .. 23 } 0 .. 127;# +8192; print STDERR "First line of the compressed data ...\n"; DHexDump $src[0]; for (unpack "(a3)*", $src[0]) { DHexDump $_; my $bits = unpack "B*", $_; (my $b8 = substr $bits, 0, 48) =~ s/(.{8})/$1 /g; (my $b6 = substr $bits, 0, 48) =~ s/(.{6})/$1 /g; print STDERR " $b8\n $b6\n"; } sub uu { map { map { ($_ - 32) & 63 } unpack "xC*" => $_ } split m/ *\n/ => pack "u" => shift; } # uu { my @lut; for (0 .. 0b111111) { $lut[$_ << 18] = $_; $lut[$_ << 12] = $_; $lut[$_ << 6] = $_; $lut[$_ ] = $_; } sub mlut { map { my $v = unpack "N", "\00$_\x00\x00"; ($lut[$v & 0b111111_000000_000000_000000], $lut[$v & 0b000000_111111_000000_000000], $lut[$v & 0b000000_000000_111111_000000], $v & 0b000000_000000_000000_111111); } unpack "(a3)*" => shift; } # mlut } { my $m0 = 0b111111_000000_000000_000000; my $m1 = 0b000000_111111_000000_000000; my $m2 = 0b000000_000000_111111_000000; my $m3 = 0b000000_000000_000000_111111; sub asu { map {my $b = unpack "N", "\x00$_\x00\x00"; ( ($b & $m0) >> 18, ($b & $m1) >> 12, ($b & $m2) >> 6, ($b & $m3) ) } unpack "(a3)*" => shift; } # asu } my @dst; @dst = map { uu ($_) } @src; print STDERR "$#dst E\n"; print STDERR "uu: (@dst[0..31] ...\n @dst[4073..4095])\n"; @dst = map { asu ($_) } @src; print STDERR "asu: (@dst[0..31] ...\n @dst[4073..4095])\n"; @dst = map { mlut ($_) } @src; print STDERR "mlut: (@dst[0..31] ...\n @dst[4073..4095])\n"; @dst = map { uic ($_) } @src; print STDERR "uic: (@dst[0..31] ...\n @dst[4073..4095])\n"; @dst = map { uicm ($_) } @src; print STDERR "uicm: (@dst[0..31] ...\n @dst[4073..4095])\n"; my @b = uicb_init (32); @dst = map { uicb ($_); map { $$_ } @b } @src; print STDERR "uicb: (@dst[0..31] ...\n @dst[4073..4095])\n"; cmpthese (-2, { uu => sub { uu ($_) for @src; }, asu => sub { asu ($_) for @src; }, mlut => sub { mlut ($_) for @src; }, uic => sub { uic ($_) for @src; }, uicm => sub { uicm ($_) for @src; }, uicb => sub { uicb ($_) for @src; }, }); __END__ __C__ void uic (SV *src) { int i = 0; STRLEN l; unsigned char *s = (unsigned char *)SvPV (src, l); inline_stack_vars; inline_stack_reset; while (i < l) { int n = (s[i] >> 2) & 0x3f; inline_stack_push (newSViv (n)); n = (s[i++] & 0x03) << 4; n |= (s[i] >> 4) & 0x0f; inline_stack_push (newSViv (n)); n = (s[i++] & 0x0f) << 2; n |= (s[i] >> 6) & 0x03; inline_stack_push (newSViv (n)); n = s[i++] & 0x3f; inline_stack_push (newSViv (n)); } inline_stack_done; } /* uic */ void uicm (SV *src) { int i = 0; STRLEN l; unsigned char *s = (unsigned char *)SvPV (src, l); inline_stack_vars; inline_stack_reset; while (i < l) { int n = (s[i] >> 2) & 0x3f; inline_stack_push (sv_2mortal (newSViv (n))); n = (s[i++] & 0x03) << 4; n |= (s[i] >> 4) & 0x0f; inline_stack_push (sv_2mortal (newSViv (n))); n = (s[i++] & 0x0f) << 2; n |= (s[i] >> 6) & 0x03; inline_stack_push (sv_2mortal (newSViv (n))); n = s[i++] & 0x3f; inline_stack_push (sv_2mortal (newSViv (n))); } inline_stack_done; } /* uicm */ SV **uicb_sv; void uicb_init (int n) { int i; inline_stack_vars; inline_stack_reset; uicb_sv = (SV **)calloc (sizeof (SV *), n); for (i = 0; i < n; i++) { inline_stack_push (newRV (uicb_sv[i] = newSViv (i))); } inline_stack_done; } /* uicb_init */ void uicb (SV *src) { int i = 0, idx = 0; STRLEN l; unsigned char *s = (unsigned char *)SvPV (src, l); while (i < l) { int n = (s[i] >> 2) & 0x3f; sv_setiv (uicb_sv[idx++], n); n = (s[i++] & 0x03) << 4; n |= (s[i] >> 4) & 0x0f; sv_setiv (uicb_sv[idx++], n); n = (s[i++] & 0x0f) << 2; n |= (s[i] >> 6) & 0x03; sv_setiv (uicb_sv[idx++], n); n = s[i++] & 0x3f; sv_setiv (uicb_sv[idx++], n); } } /* uicb */
Leads to
First line of the compressed data ... 0000 18 d5 ba 4c ff fc 69 d3 51 7f f3 84 b8 c5 99 27 ...L..i.Q..... +.' 0010 e8 16 a8 e0 7f c0 4b cb ......K. 0000 18 d5 ba ... 00011000 11010101 10111010 000110 001101 010110 111010 0000 4c ff fc L.. 01001100 11111111 11111100 010011 001111 111111 111100 0000 69 d3 51 i.Q 01101001 11010011 01010001 011010 011101 001101 010001 0000 7f f3 84 ... 01111111 11110011 10000100 011111 111111 001110 000100 0000 b8 c5 99 ... 10111000 11000101 10011001 101110 001100 010110 011001 0000 27 e8 16 '.. 00100111 11101000 00010110 001001 111110 100000 010110 0000 a8 e0 7f ... 10101000 11100000 01111111 101010 001110 000001 111111 0000 c0 4b cb .K. 11000000 01001011 11001011 110000 000100 101111 001011 4095 E uu: (6 13 22 58 19 15 63 60 26 29 13 17 31 63 14 4 46 12 22 25 9 62 + 32 22 42 14 1 63 48 4 47 11 ... 55 26 57 32 6 47 51 40 26 6 50 37 62 36 60 37 53 8 54 41 32 33 + 18) asu: (6 13 22 58 19 15 63 60 26 29 13 17 31 63 14 4 46 12 22 25 9 62 + 32 22 42 14 1 63 48 4 47 11 ... 55 26 57 32 6 47 51 40 26 6 50 37 62 36 60 37 53 8 54 41 32 33 + 18) mlut: (6 13 22 58 19 15 63 60 26 29 13 17 31 63 14 4 46 12 22 25 9 62 + 32 22 42 14 1 63 48 4 47 11 ... 55 26 57 32 6 47 51 40 26 6 50 37 62 36 60 37 53 8 54 41 32 33 + 18)
uic: (6 13 22 58 19 15 63 60 26 29 13 17 31 63 14 4 46 12 22 25 9 62 + 32 22 42 14 1 63 48 4 47 11 ... 55 26 57 32 6 47 51 40 26 6 50 37 62 36 60 37 53 8 54 41 32 33 + 18) uicm: (6 13 22 58 19 15 63 60 26 29 13 17 31 63 14 4 46 12 22 25 9 62 + 32 22 42 14 1 63 48 4 47 11 ... 55 26 57 32 6 47 51 40 26 6 50 37 62 36 60 37 53 8 54 41 32 33 + 18) uicb: (6 13 22 58 19 15 63 60 26 29 13 17 31 63 14 4 46 12 22 25 9 62 + 32 22 42 14 1 63 48 4 47 11 ... 55 26 57 32 6 47 51 40 26 6 50 37 62 36 60 37 53 8 54 41 32 33 + 18) Rate mlut uu asu uicm uic uicb mlut 917/s -- -3% -5% -84% -87% -94% uu 949/s 3% -- -2% -83% -86% -94% asu 964/s 5% 2% -- -83% -86% -94% uicm 5747/s 527% 506% 496% -- -18% -62% uic 6998/s 663% 638% 626% 22% -- -54% uicb 15244/s 1562% 1507% 1482% 165% 118% --
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^6: unpacking 6-bit values
by BrowserUk (Patriarch) on Dec 12, 2010 at 14:59 UTC | |
by Tux (Canon) on Dec 12, 2010 at 20:46 UTC |