use strict; use Data::Dumper; my %records; # hash to store book info based on author my $data; while () { chomp; $data .= $_; if ($_ eq '') { # process what's in the buffer when we see the end tag my $rec = process_record($data); $records{$rec->{author}} = $rec; $data = ''; } } print print Dumper(\%records); sub process_record { my $rec = shift; my %col; ($col{author}) = $rec =~ m/\s*([^<]*)(?=<)/g; ($col{year}) = $rec =~ m/\s*([^<]*)(?=<)/g; ($col{source}) = $rec =~ m/\s*([^<]*)(?=<)/g; ($col{id}) = $rec =~ m/\s*([^<]*)(?=<)/g; ($col{title}) = $rec =~ m/\s*([^<]*)(?=<)/g; my @keywords = $rec =~ m/<key>\s*([^<]*)(?=<)/g; $col{keywords} = \@keywords; return \%col; } __DATA__ <ref> <provnc> <aulist> <author> Bin Laden </aulist> <year>1990 <source> Cambridge University Press, Cambridge UK, 1st edition <id>1 <keywords> <key>terrorism <key>whatever </keywords> </provnc> <title> Terrorism </ref> <ref> <provnc> <aulist> <author> Sydney </aulist> <year>1990 <source> Cambridge University Press, Cambridge UK, 1st edition <id>1 <keywords> <key>nothing <key>whatever </keywords> </provnc> <title> Terrorism </ref>