I rewrote the parser using substr as suggested in the thread you provided. It doesn't seem to have made much of a noticeable difference at all in terms of the speed.
Have I fundamentally misunderstood anything? My implementation is below.
Thanks
# main # my $t0 = Benchmark->new(); my $FEED; ($go{file}) ? open $FEED, "<:perlio", $go{file} : open $FEED, "<&", *STDIN; binmode $FEED; my %vtmf_type = ( 0 => \&int_type, 1 => \&byte_type, 2 => \&var_length, 3 => \&long_type, 4 => sub { return 0 }, 5 => \&subtype_type, 6 => \&var_length, 7 => \&price_type, __DEFAULT__ => sub { $log->warn("skipping message"); return ('', 2 +55) } ); my $message_length_bin; while (read $FEED, $message_length_bin, 2) { my $message_length = unpack 'n', $message_length_bin; my $offset = 0; read $FEED, my($message), $message_length; while ($offset < $message_length) { my $tag_bin = substr $message, $offset, 2; $offset += 2; my $tag_field = unpack 'B16', $tag_bin; my $type = oct('0b' . substr $tag_field, 0, 3); my $tag = substr $tag_field, 3, 16; my $value; ($value, $offset) = ($vtmf_type{$type} || $vtmf_type{__DEFAULT +__})->($message, $offset); print oct('0b' . $tag) . '=' . $value . ','; } print "\n"; } my $t1 = Benchmark->new(); if ($go{time}) { my $td = timediff($t1, $t0); $log->info("The code took: ", timestr($td)); } exit 0; # subroutines # sub int_type { my ($message, $offset) = @_; my $num_bin = substr $message, $offset, 4; $offset += 4; my $value = hex unpack "H*", $num_bin; return ($value, $offset); } sub long_type { my ($message, $offset) = @_; my $num_bin = substr $message, $offset, 8; $offset += 8; my ($hi, $lo) = map { hex $_ } (unpack 'A8A8', unpack 'H16', $num +_bin); my $num = $hi * 2**32 + $lo; return ($num, $offset); } sub price_type { my ($message, $offset) = @_; my $num_bin = substr $message, $offset, 8; $offset += 8; my ($hi, $lo) = map { hex $_ } (unpack 'A8A8', unpack 'H16', $num +_bin); my $num = $hi << 8 + $lo; $num /= 10**7; return ($num, $offset); } sub byte_type { my ($message, $offset) = @_; my $num_bin = substr $message, $offset, 1; $offset += 1; my $val = unpack 'A', $num_bin; return ($val, $offset); } sub subtype_type { my ($message, $offset) = @_; my $type = substr $message, $offset, 1; my ($value, $length) = ('', 0); if ($type eq 'B') { my $length_bin = substr $message, $offset, 2; $offset += 2; my $length = hex unpack 'H4', $length_bin; my $value_bin = substr $message, $offset, $length; $offset += $length; } elsif ($type eq 'S') { my $length_bin = substr $message, $offset, 2; $offset += 2; } elsif ($type eq 'P') { #skip 2 bytes $offset += 2; my $num_bin = substr $message, $offset, 8; $offset += 8; my ($hi, $lo) = map { hex $_ } (unpack 'A8A8', unpack 'H16', $ +num_bin); my $num = $hi << 8 + $lo; my $exponent_bin = substr $message, $offset, 2; $offset +=2; my $exponent = hex unpack 'H4', $exponent_bin; $value = $num * 10**$exponent; $length = 13; 1; } else { } return ($value, $length); } sub var_length { my ($message, $offset) = @_; my $length_bin = substr $message, $offset, 1; $offset += 1; my $length = hex unpack 'H2', $length_bin; my $value_bin = substr $message, $offset, $length; $offset += $len +gth; my $value = unpack "A${length}", $value_bin; return ($value, $offset); } __END__
In reply to Re^2: Reading binary file performance
by oneill
in thread Reading binary file performance
by oneill
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |