I rewrote the parser using substr as suggested in the thread you provided. It doesn't seem to have made much of a noticeable difference at all in terms of the speed.

Have I fundamentally misunderstood anything? My implementation is below.

Thanks

# main # my $t0 = Benchmark->new(); my $FEED; ($go{file}) ? open $FEED, "<:perlio", $go{file} : open $FEED, "<&", *STDIN; binmode $FEED; my %vtmf_type = ( 0 => \&int_type, 1 => \&byte_type, 2 => \&var_length, 3 => \&long_type, 4 => sub { return 0 }, 5 => \&subtype_type, 6 => \&var_length, 7 => \&price_type, __DEFAULT__ => sub { $log->warn("skipping message"); return ('', 2 +55) } ); my $message_length_bin; while (read $FEED, $message_length_bin, 2) { my $message_length = unpack 'n', $message_length_bin; my $offset = 0; read $FEED, my($message), $message_length; while ($offset < $message_length) { my $tag_bin = substr $message, $offset, 2; $offset += 2; my $tag_field = unpack 'B16', $tag_bin; my $type = oct('0b' . substr $tag_field, 0, 3); my $tag = substr $tag_field, 3, 16; my $value; ($value, $offset) = ($vtmf_type{$type} || $vtmf_type{__DEFAULT +__})->($message, $offset); print oct('0b' . $tag) . '=' . $value . ','; } print "\n"; } my $t1 = Benchmark->new(); if ($go{time}) { my $td = timediff($t1, $t0); $log->info("The code took: ", timestr($td)); } exit 0; # subroutines # sub int_type { my ($message, $offset) = @_; my $num_bin = substr $message, $offset, 4; $offset += 4; my $value = hex unpack "H*", $num_bin; return ($value, $offset); } sub long_type { my ($message, $offset) = @_; my $num_bin = substr $message, $offset, 8; $offset += 8; my ($hi, $lo) = map { hex $_ } (unpack 'A8A8', unpack 'H16', $num +_bin); my $num = $hi * 2**32 + $lo; return ($num, $offset); } sub price_type { my ($message, $offset) = @_; my $num_bin = substr $message, $offset, 8; $offset += 8; my ($hi, $lo) = map { hex $_ } (unpack 'A8A8', unpack 'H16', $num +_bin); my $num = $hi << 8 + $lo; $num /= 10**7; return ($num, $offset); } sub byte_type { my ($message, $offset) = @_; my $num_bin = substr $message, $offset, 1; $offset += 1; my $val = unpack 'A', $num_bin; return ($val, $offset); } sub subtype_type { my ($message, $offset) = @_; my $type = substr $message, $offset, 1; my ($value, $length) = ('', 0); if ($type eq 'B') { my $length_bin = substr $message, $offset, 2; $offset += 2; my $length = hex unpack 'H4', $length_bin; my $value_bin = substr $message, $offset, $length; $offset += $length; } elsif ($type eq 'S') { my $length_bin = substr $message, $offset, 2; $offset += 2; } elsif ($type eq 'P') { #skip 2 bytes $offset += 2; my $num_bin = substr $message, $offset, 8; $offset += 8; my ($hi, $lo) = map { hex $_ } (unpack 'A8A8', unpack 'H16', $ +num_bin); my $num = $hi << 8 + $lo; my $exponent_bin = substr $message, $offset, 2; $offset +=2; my $exponent = hex unpack 'H4', $exponent_bin; $value = $num * 10**$exponent; $length = 13; 1; } else { } return ($value, $length); } sub var_length { my ($message, $offset) = @_; my $length_bin = substr $message, $offset, 1; $offset += 1; my $length = hex unpack 'H2', $length_bin; my $value_bin = substr $message, $offset, $length; $offset += $len +gth; my $value = unpack "A${length}", $value_bin; return ($value, $offset); } __END__

In reply to Re^2: Reading binary file performance by oneill
in thread Reading binary file performance by oneill

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post, it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.