package SeeStruct; use strict; use warnings; use Scalar::Util qw(blessed); # why the heck perl doesn't have an 'obj()' to match 'ref()', I don't know use Storable; # for supporting embedded structs. use Carp qw(carp croak); use Regexp::Common 'balanced'; use overload fallback => 1, q[""] => 'tostr', q[%{}] => 'tohash'; our $VERSION = '0.01'; use constant { _STRUCTURE_SIZE => 0, _MEMBER_HASH => 1, _TIED_OBJECT => 2, _FIRST_MEMBER => 3 }; =pod =head1 NAME SeeStruct (working title; perhaps "Struct::Packed"?) - Treat a data structure creatable by pack() as a hash. Suggestions so far: Class::PackedStruct Data::PackedStruct =head1 VERSION 0.01 pre-alpha ;) =head1 SYNOPSIS # consider the following: # struct data { # long x; # long y; # char name[8]; // ASCIIZ # short data[5]; # } # this corresponds to a pack/unpack string of 'llZ8S5'. use SeeStruct; $data = new SeeStruct ( x => 'l', y => 'l', name => 'Z8', data => 'S5' ); # Several ways to find the size of the structure print "The structure is ", $data->size(), " bytes long."; print "A C coder might say it is ", sizeof $data, " bytes long."; # and of course, size $data and $data->sizeof() work too. $hypothetical = pack('llZ8S5', 10, 20, 'hi', 100, 200, 300, 400, 500); # load a packed string into $data $data->load($hypothetical); $data->{name} = 'hello'; print "The first element of data is $data->{data}[0]"; # prints 100 $data->{data}[1] = 201; # stringify to obtain the packed form. $hypothetical = "$data"; $data2 = new SeeStruct($data); # clone $data; the new structure is zeroed. print "The members of \$data are ", join(' ', keys %$data); # prints 'x y name data'. } =head1 DESCRIPTION Have you ever had to deal with a large structure in a binary form, using pack() and unpack() to access it? I do it all the time, and I wind up doing something like this: @hash{qw(this that theother a b c d e f g)} = unpack('...', $string); It gets worse when there are embedded arrays I want to treat that way: (@hash{qw(x y name)}, @{$hash{data}}[0..4]) = unpack('llZ8S5', $string); (*shudder*) And when I want to change a few members of the structure, it's even MORE of a PITA: $string = pack('llZ8S5', @hash{qw(x y name)}, @{$hash{data}); Yechh. So I wrote this module. Given a set of (member, type) pairs, you can easily manipulate binary structure members if you know (or can find out) the pack() format for them: $foo = new SeeStruct(x => 'l', y => 'l', name => 'Z8'); $foo->load($string); $foo->{y} = 5; $string = "$foo"; You can even have arrays: $foo = new SeeStruct(x => 'l', y => 'l', name => 'Z8', data => 'S5'); $foo->load($string); $foo->{data}[3] = 10; $string = "$foo"; Or embedded structures: $foo => new SeeStruct(hostname => 'Z64', service => 'Z10', addr_in => new SeeStruct(sin_family => 's', sin_port => 'n', sin_addr => 'N', sin_zero => 'a8' ) ); $foo->{hostname} = 'ftp.example.com'; $foo->{service} = 'ftp'; $foo->{addr_in}{sin_family} = 2; # AF_INET; $foo->{addr_in}{sin_port} = 21; $foo->{addr_in}{sin_addr} = inet_aton('127.0.0.1'); Or arrays of embedded structures: $foo => new SeeStruct(hostname => 'Z64', service => 'Z10', addresses => [ new SeeStruct(sin_family => 's', sin_port => 'n', sin_addr => 'N', sin_zero => 'a8' ), 5 ] ); # now there are 5 addresses in $foo $SeeStruct::DUMPMODE = 1; # causes SS to behave in a fashion friendly to # Data::Dumper and perl5db's "x" command. print Data::Dumper::Dump \%$foo; $VAR1 = { 'hostname' => undef, 'service' => '', 'addresses' => [ { 'sin_family' => undef, 'sin_port' => 0, 'sin_addr' => 0, 'sin_zero' => ' ' }, { 'sin_family' => undef, 'sin_port' => 0, 'sin_addr' => 0, 'sin_zero' => ' ' }, { 'sin_family' => undef, 'sin_port' => 0, 'sin_addr' => 0, 'sin_zero' => ' ' }, { 'sin_family' => undef, 'sin_port' => 0, 'sin_addr' => 0, 'sin_zero' => ' ' }, { 'sin_family' => undef, 'sin_port' => 0, 'sin_addr' => 0, 'sin_zero' => ' ' } ] }; Note that, unlike the various other modules on CPAN that provide C-struct-like access, mine takes actual pack() formats, so one can specify 'n' and 'N' for explicit network-byte-order packing ;) The general format is this: name => '$typechar' # $typechar in bBhHcCsSiIlLnNvVjJfFdDpPaAZqQ # for a single member of that type. name => $obj # $obj being a SeeStruct object # for a member that is a single embedded struct name => '$schar$len' # $schar in aAZ # $len being a number # for a single member of that type and size. name => '$nchar$n' # $nchar in bBhHcCsSiIlLnNvVjJfFdDpPqQ # $n being a number # for an array of $n elements of type $nchar # this is a shortcut for the following syntax: name => [ $whatever, # $whatever corresponding to '$typechar', $n ] # '$obj', or '$schar$len', # and $n being a number # for an array of $n elements of type $whatever =head1 EXAMPLES $foo = new SeeStruct( this => 'l', that => 's', theother => 'C', ); $bar => new SeeStruct( four_shorts => 's4', also_four_shorts => ['s',4] name => 'Z8', # single element, ASCIIZ string up to 8 long names => ['Z8', 4], # four such names sub_struct => new SeeStruct(lng => 'l', shrt => 's', chr => 'C'), struct_array => [ new SeeStruct( lng => 'l', shrt => 's', chr =. 'C'), 5 ] ); # these will croak $invalid = new SeeStruct( severalshorts => ['s4', 2], # how many shorts, 4 or 2? ); $invalid = new SeeStruct( a => 'l', b => 'l', c => 'l', a => 'l' # already have a member named 'a'! ); # A struct with reserved fields: $bar => new SeeStruct( data1 => 'L', '' => 'S', # 16-bit reserved field data2 => 'S', '' => 'L', # 32-bit reserved field ); # the ''-named fields will be given unique generated names. There is also a completely different alternate syntax that I'm going to support (but not quite yet). It goes like this: # corresponds to (x => 'l', y => 'l', name => 'Z8', data => 'S5') $struct = new SeeStruct( [qw(x y name data)], 'llZ8S5' ); # (a => 'l', b => 'l', c => ['Z8', 5]) $struct = new SeeStruct( [qw(a b c)], 'll(Z8)5' ); # The real advantage comes from things like this: # (a => 'l', b => 'l', c => new SeeStruct(b=>'C',b2=>'C',w=>'s',d=>'l'), # d => 'a8') $struct = new SeeStruct( [qw(a b c), [qw(b b2 w d)], qw(d)], 'll(CCsl)a8' ); # (a => 'l', b => [ new SeeStruct(k => 'A8', v => 'A8'), 10 ], c => 's') $struct = new SeeStruct( [qw(a b), [qw(k v)], qw(c)], 'l(A8A8)10s' ); Supporting this will be a little complicated because I need to look ahead to determine that '(A8)5' is a simple array of type 'A8' whereas '(A8S)5' needs to be an embedded structure. =cut our %typesize; # I use eval here because q and Q throw a fatal error on perls not built with 64-bit integer support. eval { $typesize{$_} = length(pack($_, 0)) } for split //, 'bBhHcCsSiIlLnNvVjJfFdDpPaAZqQ'; our $WARNINGS = 1; our $DUMPMODE = 0; sub size { my $this = shift; return $this->[_STRUCTURE_SIZE]; } # alias size to sizeof so IO syntax "sizeof $obj" works *sizeof = \&size; sub _dupe { Storable::dclone(shift) } sub new { my $class = shift; my @list; my $rcount; my @members; my %members; my $offs = 0; # DWIM #1: new SeeStruct($obj) clones $obj return $_[0]->_dupe->zero if (ref($_[0]) && @_==1); # DWIM #2: alternate (packstring-based) syntax if (ref($_[0]) && @_==2) { @_ = _pack_to_seestruct(reverse @_); } while (my ($name, $type) = splice(@_, 0, 2)) { $name ||= "_reserved_${offs}_" . $rcount++; croak "A member named $name was already specified!" if exists $members{$name}; my $size; my $dim; # several possibilities: # 1. $type is a single character of [bBhHcCsSiIlLnNvVqQjJfFdDpPaAZ], in which case this is a single struct element. # Entry is accessed with $obj->{name} # 2. $type is a string consisting of a letter [aAZ] followed by a number, in which case this is also a single struct element. # Entry is accessed with $obj->{name} # 3. $type is a reference to another SeeStruct object, in which case a struct of that type is embedded within. # Note that the passed object is NOT used directly, but is instead cloned (this allows an array to be used). # 4. $type is a string consisting of a letter [bBhHcCsSiIlLnNvVqQjJfFdDpP] followed by a number, in which case this is an array of the # specified type. # Entry is accessed with $obj->{name}[index]. # 5. $type is an reference to an array with two elements, with the first element matching #1, #2, or #3 (NOT #4!), in which case this is an array # of that type, and the second element being the size of the array (this is an alternate form of #4, and the only way to have an array of # a type matching #2 or #3. reparse: if (!ref $type) { if ($type =~ /^[bBhHcCsSiIlLnNvVqQjJfFdDpPaAZ]$/) { # this is for q and Q, which are only defined on perls built with 64-bit integer support croak "This version of perl does not support type $type" unless exists $typesize{$type}; $size = $typesize{$type}; } elsif ($type =~ /^[aAZ](\d+)$/) { $size = $1; } elsif ($type =~ /^([bBhHcCsSiIlLnNvVqQjJfFdDpP])(\d+)$/) { croak "Array dimension specified twice for member '$name'" if $dim; $type = $1; $size = $typesize{$type}; $dim = $2; # this is an array element. } else { croak "Can't grok type specifier '$type' for member '$name'"; } } elsif (blessed($type)) { croak "Hmm, '$name' has an object for a type, but it's not another ".__PACKAGE__." class!" unless $type->isa(__PACKAGE__); $type = $type->_dupe(); $size = sizeof $type; } elsif (ref $type eq 'ARRAY') { # if $dim is already defined, they did something crazy like [ [ 'C', 4 ], 5 ]. croak "WTF? You gave me a nested array for the type of '$name'?!?" if $dim; croak "Hmm, '$name' has an array type with ".@$type." elements, but I was expecting 2 elements." unless (@$type == 2); croak "Hmm, '$name' seems to have a nonsensical array size $type->[1]" unless ($type->[1] >= 1); carp "Struct member '$name' is an array of only 1 element?" if $WARNINGS and $type->[1] == 1; ($type, $dim) = @$type; print "type is ",ref($type)||$type, ", dim is $dim; reparsing"; goto reparse; # re-grok $type. } else { croak "I have no idea what to do with the type you specified for member '$name'"; } #print "$name: type is ",ref($type)||$type, ", dim is ", ($dim||0); my $initlist = ($dim && ref($type)) ? # for an array of SeeStruct objects, we have to clone another $type object for each entry. [ map{$type->_dupe()} 1..$dim ] : (ref $type) ? # for a single SeeStruct object, we just specify our $type object again. $type : ($dim) ? # for normal types things are simpler. [ undef() x $dim ] : undef; push @members, $members{$name} = { name => $name, # member name offset => $offs, # offset from start of data tsize => $size * ($dim||1), # total size of all elements size => $size, # size of each element type => $type, # pack string, or a reference to another SeeStruct object dim => $dim, # elements in array (0/undef if not array) value => $initlist, # initial value (undef for all but $types) }; $offs += $size * ($dim || 1); # $dim of 0/undef is the same size as $dim of 1 } unshift @members, $offs, # size of the structure \%members, # hash of members for quick by-name access undef; # reference to tied hash; this will be created by tohash() if/when needed bless(\@members, $class)->zero(); } sub tohash { my $this = shift; $this->[_TIED_OBJECT] ||= $this->_gentiedhash(); # this returns $this->[_TIED_OBJECT], creating it first if necessary } # _gentiedhash # Return a reference to a tied hash to handle dereferencing. # Override it if you want to :) sub _gentiedhash { my ($backref) = @_; my $hash; tie %$hash, __PACKAGE__.'::TiedHash', $backref; $hash } # _gentiedarray # Return a reference to a tied array to handle members that are arrays. sub _gentiedarray { my ($backref, $member, $size) = @_; my $array; tie @$array, __PACKAGE__.'::TiedArray', $backref, $member, $size; $array } sub _fetch { my ($this, $key) = @_; croak "No such member '$key' in object" unless exists $this->[_MEMBER_HASH]{$key}; my $member = $this->[_MEMBER_HASH]{$key}; $member->{dim} ? ($member->{_tiedarray} ||= $this->_gentiedarray($key, $member->{dim})) : ($DUMPMODE && ref $member->{type}) ? \%{$member->{value}} : $member->{value}; } sub _store { my ($this, $key, $value) = @_; croak "No such member '$key' in object" unless exists $this->[_MEMBER_HASH]{$key}; my $member = $this->[_MEMBER_HASH]{$key}; croak "Can't overwrite an array member wholesale" if $member->{dim}; # TODO: it might make more sense to call the ->load() method right here automatically... croak "Can't store directly to an inner SeeStruct object (use ->load())" if ref $member->{type}; $member->{value} = $value; $value; } sub _fetch_index { my ($this, $key, $index) = @_; # FIXME: implement this correctly (not sure if it's correct) my $member = $this->[_MEMBER_HASH]{$key}; ($DUMPMODE && ref $member->{type}) ? \%{$member->{value}[$index]} : $member->{value}[$index]; } sub _store_index { my ($this, $key, $index, $value) = @_; # FIXME: implement this, correctly (not sure if it's correct) my $member = $this->[_MEMBER_HASH]{$key}; $member->{value}[$index] = $value; } sub _pack_string { my $this = shift; # a beautifully illegible piece of code :) # this just builds a pack string for the complete structure. join ' ', map { my $subtype = ref($_->{type}) # object? ? $_->{type}->_pack_string() # yes, recurse into it : $_->{type}; # no, don't recurse into it. $_->{dim} ? "($subtype)$_->{dim}" : $subtype } @{$this}[_FIRST_MEMBER..$#$this]; } sub _unpack_string { my $this = shift; # not quite the same as _pack_string; embedded SeeStructs are extracted as blobs and load()ed afterward. join ' ', map { my $subtype = ref($_->{type}) # object? ? 'a'.$_->{type}->size() # yes, just return blob of data : $_->{type}; # no, don't recurse into it. $_->{dim} ? "($subtype)$_->{dim}" : $subtype } @{$this}[_FIRST_MEMBER..$#$this]; } sub _pack_list { my $this = shift; # corresponding to _pack_string, returns a list of values for packing purposes. map { # this is kinda convoluted because of the embedded SeeStruct thing. # if $type is a reference and $dim is nonzero, we need a nested map. ($_->{dim} && ref($_->{type})) ? map { $_->_pack_list } @{$_->{value}} : (ref $_->{type}) ? $_->{value}->_pack_list : ($_->{dim}) ? @{$_->{value}} : $_->{value} } @{$this}[_FIRST_MEMBER..$#$this]; } sub tostr { my $this = shift; pack( $this->_pack_string, $this->_pack_list ); } sub zero { my $this = shift; $this->load("\x00" x $this->[_STRUCTURE_SIZE]); } sub load { my ($this, $value) = @_; #print "load: structure requires ", $this->[_STRUCTURE_SIZE], " bytes, we were given ", length($value); croak "SeeStruct::load(): structure requires ", $this->[_STRUCTURE_SIZE], " bytes, but only ", length($value), " bytes given!" unless length($value) >= $this->[_STRUCTURE_SIZE]; # FIXME: implement this fully (i.e. handle embedded structs). #print "load: unpack string is '", $this->_unpack_string, "'"; my @data = unpack($this->_unpack_string, $value); for my $m (@{$this}[_FIRST_MEMBER..$#$this]) { if ($m->{dim} && ref $m->{type}) { # *shudder* an array of SeeStruct objects. for (0..$m->{dim}-1) { $m->{value}[$_]->load(shift @data); } } elsif (ref $m->{type}) { # A single SeeStruct object. $m->{value}->load(shift @data); } elsif ($m->{dim}) { # An array of simple types @{$m->{value}} = splice(@data, 0, $m->{dim}); } else { # A simple type $m->{value} = shift @data; } } $this; } { package SeeStruct::TiedHash; use Scalar::Util qw(weaken); use constant { _PACKED_STRING => 0, _MEMBER_HASH => 1, _TIED_OBJECT => 2, _FIRST_MEMBER => 3 }; sub TIEHASH { my ($class, $backref) = @_; weaken $backref; # weaken the backreference so the circular references don't keep us alive forever. bless [$backref, 0] } sub FETCH { my ($this, $key) = @_; my $sub = $this->[0]->can('_fetch'); @_ = ($this->[0], $key); goto $sub; # eliminate us from the call stack #$this->[0]->_fetch($key); } sub STORE { my ($this, $key, $value) = @_; my $sub = $this->[0]->can('_store'); @_ = ($this->[0], $key, $value); goto $sub; # eliminate us from the call stack #$this->[0]->_store($key, $value); } sub EXISTS { my ($this, $key) = @_; exists $this->[0][1]{$key}; } # return structure members in order. sub FIRSTKEY { my $this = shift; $this->[0][ ($this->[1]=0) + _FIRST_MEMBER ]{name} } sub NEXTKEY { my $this = shift; if ($this->[1] <= $#{$this->[0]}) { $this->[0][ (++$this->[1]) + _FIRST_MEMBER ]{name} } else { $this->[1] = 0; # reset to the first key return (); } } } { package SeeStruct::TiedArray; use Carp qw(croak); use Scalar::Util qw(weaken); use constant { _PACKED_STRING => 0, _MEMBER_HASH => 1, _TIED_OBJECT => 2, _FIRST_MEMBER => 3, _BACKREF => 0, _MEMBERNAME => 1, _MEMBERDIM => 2, }; sub TIEARRAY { my ($class, $backref, $member, $size) = @_; weaken $backref; # weaken the backreference so the circular references don't keep us alive forever. bless [$backref, $member, $size] } sub FETCHSIZE { my $this = shift; $this->[_MEMBERDIM] } sub FETCH { my ($this, $index) = @_; croak "Index $index out of range for member (0..$this->[2])" unless 0 <= $index && $index < $this->[_MEMBERDIM]; my $sub = $this->[0]->can('_fetch_index'); @_ = ($this->[0], $this->[_MEMBERNAME], $index); goto $sub; # eliminate us from the call stack } sub STORE { my ($this, $index, $value) = @_; croak "Index $index out of range for member (0..$this->[2])" unless 0 <= $index && $index < $this->[_MEMBERDIM]; my $sub = $this->[0]->can('_store_index'); @_ = ($this->[0], $this->[_MEMBERNAME], $index, $value); goto $sub; # eliminate us from the call stack } sub EXISTS { my ($this, $index) = @_; 0 <= $index && $index < $this->[_MEMBERDIM]; } } # _is_single_element(whatever) # test if the given string corresponds to a single element # (this helps pack_to_seestruct decide that '(A8)5' is an array of 5 elements # while '(A8S)5' is an array of 5 structures made up of 'A8S'. sub _is_single_element { my $string = shift; $string =~ /^[bBhHcCsSiIlLnNvVjJfFdDpPaAZqQ]$/ || $string =~ /^[aAZ]\d+$/; } # _stick_here($original_string, $current_string) # sticks a 'HERE' in a appropriate place sub _stick_here { my ($org, $cur) = @_; substr($org, 0, -length $cur) . '<