use strict; use XML::Rules; my $parser = XML::Rules->new( start_rules => [ 'PC-InfoData_urn,PC-Compound_atoms' => 'skip', ], rules => [ _default => 'as is', 'PC-CompoundType_id_cid,PC-InfoData_value_binary' => 'content', 'PC-InfoData' => sub { return unless $_[1]->{'PC-InfoData_value'}{'PC-InfoData_value_binary'}; return '@InfoData' => $_[1]->{'PC-InfoData_value'}{'PC-InfoData_value_binary'}; }, 'PC-Compound' => sub { my $id = $_[1]->{'PC-Compound_id'}{'PC-CompoundType'}{'PC-CompoundType_id'}{'PC-CompoundType_id_cid'} or return; # no ID found my $data = $_[1]->{'PC-Compound_props'}{'InfoData'} or return; # no data return $id => $data; }, 'PC-Compounds' => 'pass', ], stripspaces => 7, ); my $data = $parser->parse(\*DATA); use Data::Dumper; print Dumper($data); __DATA__ 1 00000371E07238000000000000000000000000000000 +00000000000000000000000000000000001E00000000000814E1800602080300040008000090080 +0000000000000000001080000020014008000070000052000100000240000000000000000000000 +0000000000000000000000000000