Stevie-O has asked for the wisdom of the Perl Monks concerning the following question:
I needed (or at least wanted) a module to let me access members of a structure encoded via a pack() string by name, so I wrote this module. You give it a list of names and their corresponding pack formats and it creates a tied hash that allows you to access stuff in the middle of the structure by name.
Comments, and suggestions for a CPAN-friendly name, would be most appreciated :)
package SeeStruct; use strict; use warnings; use Scalar::Util qw(blessed); # why the heck perl doesn't have an 'o +bj()' to match 'ref()', I don't know use Storable; # for supporting embedded structs. use Carp qw(carp croak); use Regexp::Common 'balanced'; use overload fallback => 1, q[""] => 'tostr', q[%{}] => 'tohash'; our $VERSION = '0.01'; use constant { _STRUCTURE_SIZE => 0, _MEMBER_HASH => 1, _TIED_OBJECT = +> 2, _FIRST_MEMBER => 3 }; =pod =head1 NAME SeeStruct (working title; perhaps "Struct::Packed"?) - Treat a data st +ructure creatable by pack() as a hash. Suggestions so far: Class::PackedStruct Data::PackedStruct =head1 VERSION 0.01 pre-alpha ;) =head1 SYNOPSIS # consider the following: # struct data { # long x; # long y; # char name[8]; // ASCIIZ # short data[5]; # } # this corresponds to a pack/unpack string of 'llZ8S5'. use SeeStruct; $data = new SeeStruct ( x => 'l', y => 'l', name => 'Z8', data => +'S5' ); # Several ways to find the size of the structure print "The structure is ", $data->size(), " bytes long."; print "A C coder might say it is ", sizeof $data, " bytes long."; # and of course, size $data and $data->sizeof() work too. $hypothetical = pack('llZ8S5', 10, 20, 'hi', 100, 200, 300, 400, 5 +00); # load a packed string into $data $data->load($hypothetical); $data->{name} = 'hello'; print "The first element of data is $data->{data}[0]"; # prints 10 +0 $data->{data}[1] = 201; # stringify to obtain the packed form. $hypothetical = "$data"; $data2 = new SeeStruct($data); # clone $data; the new structure is + zeroed. print "The members of \$data are ", join(' ', keys %$data); # prints 'x y name data'. } =head1 DESCRIPTION Have you ever had to deal with a large structure in a binary form, usi +ng pack() and unpack() to access it? I do it all the time, and I wind up +doing something like this: @hash{qw(this that theother a b c d e f g)} = unpack('...', $strin +g); It gets worse when there are embedded arrays I want to treat that way: (@hash{qw(x y name)}, @{$hash{data}}[0..4]) = unpack('llZ8S5', $string); (*shudder*) And when I want to change a few members of the structure, +it's even MORE of a PITA: $string = pack('llZ8S5', @hash{qw(x y name)}, @{$hash{data}); Yechh. So I wrote this module. Given a set of (member, type) pairs, yo +u can easily manipulate binary structure members if you know (or can find ou +t) the pack() format for them: $foo = new SeeStruct(x => 'l', y => 'l', name => 'Z8'); $foo->load($string); $foo->{y} = 5; $string = "$foo"; You can even have arrays: $foo = new SeeStruct(x => 'l', y => 'l', name => 'Z8', data => 'S5 +'); $foo->load($string); $foo->{data}[3] = 10; $string = "$foo"; Or embedded structures: $foo => new SeeStruct(hostname => 'Z64', service => 'Z10', addr_in => new SeeStruct(sin_family => 's', sin_port => 'n', sin_addr => 'N', sin_zero => 'a8' ) ); $foo->{hostname} = 'ftp.example.com'; $foo->{service} = 'ftp'; $foo->{addr_in}{sin_family} = 2; # AF_INET; $foo->{addr_in}{sin_port} = 21; $foo->{addr_in}{sin_addr} = inet_aton('127.0.0.1'); Or arrays of embedded structures: $foo => new SeeStruct(hostname => 'Z64', service => 'Z10', addresses => [ new SeeStruct(sin_family => ' +s', sin_port => 'n', sin_addr => 'N', sin_zero => 'a8' ), 5 ] ); # now there are 5 addresses in $foo $SeeStruct::DUMPMODE = 1; # causes SS to behave in a fashion frien +dly to # Data::Dumper and perl5db's "x" command +. print Data::Dumper::Dump \%$foo; $VAR1 = { 'hostname' => undef, 'service' => '', 'addresses' => [ { 'sin_family' => undef, 'sin_port' => 0, 'sin_addr' => 0, 'sin_zero' => ' ' }, { 'sin_family' => undef, 'sin_port' => 0, 'sin_addr' => 0, 'sin_zero' => ' ' }, { 'sin_family' => undef, 'sin_port' => 0, 'sin_addr' => 0, 'sin_zero' => ' ' }, { 'sin_family' => undef, 'sin_port' => 0, 'sin_addr' => 0, 'sin_zero' => ' ' }, { 'sin_family' => undef, 'sin_port' => 0, 'sin_addr' => 0, 'sin_zero' => ' ' } ] }; Note that, unlike the various other modules on CPAN that provide C-str +uct-like access, mine takes actual pack() formats, so one can specify 'n' and ' +N' for explicit network-byte-order packing ;) The general format is this: name => '$typechar' # $typechar in bBhHcCsSiIlLnNvVjJfFdDpPaAZqQ # for a single member of that type. name => $obj # $obj being a SeeStruct object # for a member that is a single embedded stru +ct name => '$schar$len' # $schar in aAZ # $len being a number # for a single member of that type and size. name => '$nchar$n' # $nchar in bBhHcCsSiIlLnNvVjJfFdDpPqQ # $n being a number # for an array of $n elements of type $nchar # this is a shortcut for the following syntax +: name => [ $whatever, # $whatever corresponding to '$typechar', $n ] # '$obj', or '$schar$len', # and $n being a number # for an array of $n elements of type $whatev +er =head1 EXAMPLES $foo = new SeeStruct( this => 'l', that => 's', theother => 'C', ); $bar => new SeeStruct( four_shorts => 's4', also_four_shorts => ['s',4] name => 'Z8', # single element, ASCIIZ string up to +8 long names => ['Z8', 4], # four such names sub_struct => new SeeStruct(lng => 'l', shrt => 's', chr = +> 'C'), struct_array => [ new SeeStruct( lng => 'l', shrt => 's', chr =. 'C'), 5 ] ); # these will croak $invalid = new SeeStruct( severalshorts => ['s4', 2], # how many shorts, 4 or 2? ); $invalid = new SeeStruct( a => 'l', b => 'l', c => 'l', a => 'l' # already have a member named 'a'! ); # A struct with reserved fields: $bar => new SeeStruct( data1 => 'L', '' => 'S', # 16-bit reserved field data2 => 'S', '' => 'L', # 32-bit reserved field ); # the ''-named fields will be given unique generated names. There is also a completely different alternate syntax that I'm going t +o support (but not quite yet). It goes like this: # corresponds to (x => 'l', y => 'l', name => 'Z8', data => 'S5') $struct = new SeeStruct( [qw(x y name data)], 'llZ8S5' ); # (a => 'l', b => 'l', c => ['Z8', 5]) $struct = new SeeStruct( [qw(a b c)], 'll(Z8)5' ); # The real advantage comes from things like this: # (a => 'l', b => 'l', c => new SeeStruct(b=>'C',b2=>'C',w=>'s',d= +>'l'), # d => 'a8') $struct = new SeeStruct( [qw(a b c), [qw(b b2 w d)], qw(d)], 'll(CCsl)a8' ); # (a => 'l', b => [ new SeeStruct(k => 'A8', v => 'A8'), 10 ], c = +> 's') $struct = new SeeStruct( [qw(a b), [qw(k v)], qw(c)], 'l(A8A8)10s' ); Supporting this will be a little complicated because I need to look ah +ead to determine that '(A8)5' is a simple array of type 'A8' whereas '(A8S)5' + needs to be an embedded structure. =cut our %typesize; # I use eval here because q and Q throw a fatal error on perls not bui +lt with 64-bit integer support. eval { $typesize{$_} = length(pack($_, 0)) } for split //, 'bBhHcCsSiI +lLnNvVjJfFdDpPaAZqQ'; our $WARNINGS = 1; our $DUMPMODE = 0; sub size { my $this = shift; return $this->[_STRUCTURE_SIZE]; } # alias size to sizeof so IO syntax "sizeof $obj" works *sizeof = \&size; sub _dupe { Storable::dclone(shift) } sub new { my $class = shift; my @list; my $rcount; my @members; my %members; my $offs = 0; # DWIM #1: new SeeStruct($obj) clones $obj return $_[0]->_dupe->zero if (ref($_[0]) && @_==1); # DWIM #2: alternate (packstring-based) syntax if (ref($_[0]) && @_==2) { @_ = _pack_to_seestruct(reverse @_); } while (my ($name, $type) = splice(@_, 0, 2)) { $name ||= "_reserved_${offs}_" . $rcount++; croak "A member named $name was already specified!" if exists +$members{$name}; my $size; my $dim; # several possibilities: # 1. $type is a single character of [bBhHcCsSiIlLnNvVqQjJfF +dDpPaAZ], in which case this is a single struct element. # Entry is accessed with $obj->{name} # 2. $type is a string consisting of a letter [aAZ] followe +d by a number, in which case this is also a single struct element. # Entry is accessed with $obj->{name} # 3. $type is a reference to another SeeStruct object, in whi +ch case a struct of that type is embedded within. # Note that the passed object is NOT used di +rectly, but is instead cloned (this allows an array to be used). # 4. $type is a string consisting of a letter [bBhHcCsSiIlL +nNvVqQjJfFdDpP] followed by a number, in which case this is an array +of the # specified type. # Entry is accessed with $obj->{name}[index] +. # 5. $type is an reference to an array with two elements, w +ith the first element matching #1, #2, or #3 (NOT #4!), in which case + this is an array # of that type, and the second element being + the size of the array (this is an alternate form of #4, and the only + way to have an array of # a type matching #2 or #3. reparse: if (!ref $type) { if ($type =~ /^[bBhHcCsSiIlLnNvVqQjJfFdDpPaAZ]$/) { # this is for q and Q, which are only defined on perls + built with 64-bit integer support croak "This version of perl does not support type $typ +e" unless exists $typesize{$type}; $size = $typesize{$type}; } elsif ($type =~ /^[aAZ](\d+)$/) { $size = $1; } elsif ($type =~ /^([bBhHcCsSiIlLnNvVqQjJfFdDpP])(\d+)$/) + { croak "Array dimension specified twice for member '$n +ame'" if $dim; $type = $1; $size = $typesize{$type}; $dim = $2; # this is an array element. } else { croak "Can't grok type specifier '$type' for member '$ +name'"; } } elsif (blessed($type)) { croak "Hmm, '$name' has an object for a type, but it's not + another ".__PACKAGE__." class!" unless $type->isa(__PACKAGE__); $type = $type->_dupe(); $size = sizeof $type; } elsif (ref $type eq 'ARRAY') { # if $dim is already defined, they did something crazy lik +e [ [ 'C', 4 ], 5 ]. croak "WTF? You gave me a nested array for the type of '$n +ame'?!?" if $dim; croak "Hmm, '$name' has an array type with ".@$type." elem +ents, but I was expecting 2 elements." unless (@$type == 2); croak "Hmm, '$name' seems to have a nonsensical array size + $type->[1]" unless ($type->[1] >= 1); carp "Struct member '$name' is an array of only 1 element? +" if $WARNINGS and $type->[1] == 1; ($type, $dim) = @$type; print "type is ",ref($type)||$type, ", dim is $dim; repars +ing"; goto reparse; # re-grok $type. } else { croak "I have no idea what to do with the type you specifi +ed for member '$name'"; } #print "$name: type is ",ref($type)||$type, ", dim is ", ($dim +||0); my $initlist = ($dim && ref($type)) ? # for an array of SeeStruct obj +ects, we have to clone another $type object for each entry. [ map{$type->_dupe()} 1..$dim ] : (ref $type) ? # for a single SeeStruct objec +t, we just specify our $type object again. $type : ($dim) ? # for normal types things are +simpler. [ undef() x $dim ] : undef; push @members, $members{$name} = { name => $name, # member name offset => $offs, # offset from start of data tsize => $size * ($dim||1), # total size of al +l elements size => $size, # size of each element type => $type, # pack string, or a reference +to another SeeStruct object dim => $dim, # elements in array (0/undef i +f not array) value => $initlist, # initial value (undef for all + but $types) }; $offs += $size * ($dim || 1); # $dim of 0/undef is the same +size as $dim of 1 } unshift @members, $offs, # size of the structure \%members, # hash of members for quick by +-name access undef; # reference to tied hash; this + will be created by tohash() if/when needed bless(\@members, $class)->zero(); } sub tohash { my $this = shift; $this->[_TIED_OBJECT] ||= $this->_gentiedhash(); # this ret +urns $this->[_TIED_OBJECT], creating it first if necessary } # _gentiedhash # Return a reference to a tied hash to handle dereferencing. # Override it if you want to :) sub _gentiedhash { my ($backref) = @_; my $hash; tie %$hash, __PACKAGE__.'::TiedHash', $backref; $hash } # _gentiedarray # Return a reference to a tied array to handle members that are arrays +. sub _gentiedarray { my ($backref, $member, $size) = @_; my $array; tie @$array, __PACKAGE__.'::TiedArray', $backref, $member, $size; $array } sub _fetch { my ($this, $key) = @_; croak "No such member '$key' in object" unless exists $this->[_MEM +BER_HASH]{$key}; my $member = $this->[_MEMBER_HASH]{$key}; $member->{dim} ? ($member->{_tiedarray} ||= $this->_gentiedarray($key, $membe +r->{dim})) : ($DUMPMODE && ref $member->{type}) ? \%{$member->{value}} : $member->{value}; } sub _store { my ($this, $key, $value) = @_; croak "No such member '$key' in object" unless exists $this->[_MEM +BER_HASH]{$key}; my $member = $this->[_MEMBER_HASH]{$key}; croak "Can't overwrite an array member wholesale" if $member->{dim +}; # TODO: it might make more sense to call the ->load() method right + here automatically... croak "Can't store directly to an inner SeeStruct object (use ->lo +ad())" if ref $member->{type}; $member->{value} = $value; $value; } sub _fetch_index { my ($this, $key, $index) = @_; # FIXME: implement this correctly (not sure if it's correct) my $member = $this->[_MEMBER_HASH]{$key}; ($DUMPMODE && ref $member->{type}) ? \%{$member->{value}[$index]} +: $member->{value}[$index]; } sub _store_index { my ($this, $key, $index, $value) = @_; # FIXME: implement this, correctly (not sure if it's correct) my $member = $this->[_MEMBER_HASH]{$key}; $member->{value}[$index] = $value; } sub _pack_string { my $this = shift; # a beautifully illegible piece of code :) # this just builds a pack string for the complete structure. join ' ', map { my $subtype = ref($_->{type}) # object? ? $_->{type}->_pack_string() # yes +, recurse into it : $_->{type}; # no, + don't recurse into it. $_->{dim} ? "($subtype)$_->{dim}" : $subtype } @{$this}[_FIRST_MEMBER..$#$this]; } sub _unpack_string { my $this = shift; # not quite the same as _pack_string; embedded SeeStructs are extr +acted as blobs and load()ed afterward. join ' ', map { my $subtype = ref($_->{type}) # object? ? 'a'.$_->{type}->size() # yes +, just return blob of data : $_->{type}; # no, + don't recurse into it. $_->{dim} ? "($subtype)$_->{dim}" : $subtype } @{$this}[_FIRST_MEMBER..$#$this]; } sub _pack_list { my $this = shift; # corresponding to _pack_string, returns a list of values for pack +ing purposes. map { # this is kinda convoluted because of the embedded SeeStru +ct thing. # if $type is a reference and $dim is nonzero, we need a n +ested map. ($_->{dim} && ref($_->{type})) ? map { $_->_pack_list } @{$_->{value}} : (ref $_->{type}) ? $_->{value}->_pack_list : ($_->{dim}) ? @{$_->{value}} : $_->{value} } @{$this}[_FIRST_MEMBER..$#$this]; } sub tostr { my $this = shift; pack( $this->_pack_string, $this->_pack_list ); } sub zero { my $this = shift; $this->load("\x00" x $this->[_STRUCTURE_SIZE]); } sub load { my ($this, $value) = @_; #print "load: structure requires ", $this->[_STRUCTURE_SIZE], " by +tes, we were given ", length($value); croak "SeeStruct::load(): structure requires ", $this->[_STRUCTURE +_SIZE], " bytes, but only ", length($value), " bytes given!" unless length($value) >= $this->[_STRUCTURE_SIZE]; # FIXME: implement this fully (i.e. handle embedded structs). #print "load: unpack string is '", $this->_unpack_string, "'"; my @data = unpack($this->_unpack_string, $value); for my $m (@{$this}[_FIRST_MEMBER..$#$this]) { if ($m->{dim} && ref $m->{type}) { # *shudder* an array of SeeStruct objects. for (0..$m->{dim}-1) { $m->{value}[$_]->load(shift @data); } } elsif (ref $m->{type}) { # A single SeeStruct object. $m->{value}->load(shift @data); } elsif ($m->{dim}) { # An array of simple types @{$m->{value}} = splice(@data, 0, $m->{dim}); } else { # A simple type $m->{value} = shift @data; } } $this; } { package SeeStruct::TiedHash; use Scalar::Util qw(weaken); use constant { _PACKED_STRING => 0, _MEMBER_HASH => 1, _TIED_OBJ +ECT => 2, _FIRST_MEMBER => 3 }; sub TIEHASH { my ($class, $backref) = @_; weaken $backref; # weaken the backreference so the circular + references don't keep us alive forever. bless [$backref, 0] } sub FETCH { my ($this, $key) = @_; my $sub = $this->[0]->can('_fetch'); @_ = ($this->[0], $key); goto $sub; # eliminate us from the call stack #$this->[0]->_fetch($key); } sub STORE { my ($this, $key, $value) = @_; my $sub = $this->[0]->can('_store'); @_ = ($this->[0], $key, $value); goto $sub; # eliminate us from the call stack #$this->[0]->_store($key, $value); } sub EXISTS { my ($this, $key) = @_; exists $this->[0][1]{$key}; } # return structure members in order. sub FIRSTKEY { my $this = shift; $this->[0][ ($this->[1]=0) + _FIRST_MEMBER ]{name} } sub NEXTKEY { my $this = shift; if ($this->[1] <= $#{$this->[0]}) { $this->[0][ (++$this->[1]) + _FIRST_MEMBER ]{name} } else { $this->[1] = 0; # reset to the first key return (); } } } { package SeeStruct::TiedArray; use Carp qw(croak); use Scalar::Util qw(weaken); use constant { _PACKED_STRING => 0, _MEMBER_HASH => 1, _TIED_OBJ +ECT => 2, _FIRST_MEMBER => 3, _BACKREF => 0, _MEMBERNAME => 1, _ +MEMBERDIM => 2, }; sub TIEARRAY { my ($class, $backref, $member, $size) = @_; weaken $backref; # weaken the backreference so the circular + references don't keep us alive forever. bless [$backref, $member, $size] } sub FETCHSIZE { my $this = shift; $this->[_MEMBERDIM] } sub FETCH { my ($this, $index) = @_; croak "Index $index out of range for member (0..$this->[2])" u +nless 0 <= $index && $index < $this->[_MEMBERDIM]; my $sub = $this->[0]->can('_fetch_index'); @_ = ($this->[0], $this->[_MEMBERNAME], $index); goto $sub; # eliminate us from the call stack } sub STORE { my ($this, $index, $value) = @_; croak "Index $index out of range for member (0..$this->[2])" u +nless 0 <= $index && $index < $this->[_MEMBERDIM]; my $sub = $this->[0]->can('_store_index'); @_ = ($this->[0], $this->[_MEMBERNAME], $index, $value); goto $sub; # eliminate us from the call stack } sub EXISTS { my ($this, $index) = @_; 0 <= $index && $index < $this->[_MEMBERDIM]; } } # _is_single_element(whatever) # test if the given string corresponds to a single element # (this helps pack_to_seestruct decide that '(A8)5' is an array of 5 + elements # while '(A8S)5' is an array of 5 structures made up of 'A8S'. sub _is_single_element { my $string = shift; $string =~ /^[bBhHcCsSiIlLnNvVjJfFdDpPaAZqQ]$/ || $string =~ /^[aAZ]\d+$/; } # _stick_here($original_string, $current_string) # sticks a 'HERE' in a appropriate place sub _stick_here { my ($org, $cur) = @_; substr($org, 0, -length $cur) . '<<HERE ' . $cur; } # _pack_to_seestruct('string', namelist) # returns a list of arguments for passing to seestruct sub _pack_to_seestruct { my $string = $_[0]; my @namelist = @{$_[1]}; my @args; while (length $string) { $string =~ s/^\s+//; last unless length $string; my $type = substr($string, 0, 1); # grab first character # comments if ($type eq '#') { $string =~ s/^.*$//m; next; } # aAZ may be followed by a number and it's still a single elem +ent if ($type =~ /[aAZ]/) { my $len; substr($string,0,1,''); $len = ($string =~ s/^(\d+)//) ? # extract a number from $str +ing $1 : # if we got a number, use that number ''; # otherwise it's a length of 1 croak "not enough names! at ", _stick_here($_[0], $string) + unless @namelist; push @args, shift @namelist, "$type$len"; next; } # the others may also be followed by a number, but it's an arr +ay depth. if ($type =~ /[bBhHcCsSiIlLnNvVjJfFdDpPqQ]/) { my $dim; substr($string,0,1,''); $dim = ($string =~ s/^(\d+)//) ? # extract a number from $str +ing $1 : # if we got a number, use that number ''; # otherwise it's a length of 1 croak "not enough names! at ", _stick_here($_[0], $string) + unless @namelist; push @args, shift @namelist, "$type$dim"; next; } $string =~ s/^.//, next if $type =~ /\s/; # ignore whitespac +e. if ($type eq '(') { # extract the balanced string. $string =~ s/^($RE{balanced})(\d*)// or croak "Unmatched open parenthesis, man! at ", _stick_h +ere($_[0], $string); my ($group, $dim) = ($1,$2); s/^.//,s/.$// for $group; # remove the parentheses thems +elves. croak "not enough names! at ", _stick_here($_[0], $string) + unless @namelist; my $name = shift @namelist; if (_is_single_element($group)) { # eh, it's just a simple array, man. push @args, $name, $dim ? [ $group, $dim ] : $group; next; } # hmm.. it's not such a simple group :( # make another SeeStruct for embedding purposes croak "not enough names! at ", _stick_here($_[0], $string) + unless @namelist; my $estruct = new SeeStruct ( _pack_to_seestruct ( $group +, shift @namelist ) ); push @args, $name, $dim ? [ $estruct, $dim ] : $estruct; next; } if ($type eq ')') { croak "Unmatched closing parenthesis, man! at ", _stick_he +re($_[0], $string); } croak "Unknown pack character '$type' encountered at ", _stick +_here($_[0], $string) } @args; } 1;
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: Suggestions for a new module
by simonm (Vicar) on Apr 15, 2004 at 02:46 UTC | |
|
Re: Suggestions for a new module
by halley (Prior) on Apr 15, 2004 at 16:44 UTC | |
by Stevie-O (Friar) on Apr 16, 2004 at 05:17 UTC | |
|
Re: Suggestions for a new module
by ysth (Canon) on Apr 15, 2004 at 00:05 UTC |