use strict; use warnings; use Data::Dumper; my ( $recordkey, %data ); while( my $line = ) { chomp $line; if( $line =~ m/^(\d{7})/ ) { # process the first line of a procedure record (type 10) $recordkey = $1; $data{$recordkey}{10} = $line; } elsif( $line =~ m/^(\d{2})\d{2}\./ ) { # process types 20, 30, 40, 50 push( @{ $data{$recordkey}{$1} }, $line ); } else { # unrecognized line! } } print Dumper( \%data ); __DATA__ 1000001 01.11.199600.00.00001 A1 1 SN Y 2001.11.200400098.0500073.5500083.35 5001.11.1997Professional attendance being an attendance at 5001.11.1997other than consulting rooms, by a general 5001.11.1997practitioner on not more than 1 patient 1000002 01.11.199600.00.00001 A1 1 SN Y 2001.11.200400098.0500073.5500083.35 5001.11.1997Professional attendance being an attendance at 5001.11.1997other than consulting rooms, by a general 5001.11.1997practitioner on not more than 1 patient 1000003 01.11.199600.00.00001 A1 1 SN Y 2001.11.200400098.0500073.5500083.35 5001.11.1997Professional attendance being an attendance at 5001.11.1997other than consulting rooms, by a general 5001.11.1997practitioner on not more than 1 patient OUTPUT $VAR1 = { '1000001' => { '50' => [ '5001.11.1997Professional attendance being an attendance at', '5001.11.1997other than consulting rooms, by a general', '5001.11.1997practitioner on not more than 1 patient' ], '10' => '1000001 01.11.199600.00.00001 A1 1 SN Y', '20' => [ '2001.11.200400098.0500073.5500083.35' ] }, '1000002' => { '50' => [ '5001.11.1997Professional attendance being an attendance at', '5001.11.1997other than consulting rooms, by a general', '5001.11.1997practitioner on not more than 1 patient' ], '10' => '1000002 01.11.199600.00.00001 A1 1 SN Y', '20' => [ '2001.11.200400098.0500073.5500083.35' ] }, etc...