use strict; use warnings; my $state=''; my %tables; my @titles; while (my $line=) { chomp $line; if (-1 != index($line,'place and year data: 67')) {$state='place';@titles=();} elsif (-1 != index($line,'work and language :65')) {$state='work';@titles=();} elsif (-1 != index($line,'Position log')) {$state='position';@titles=();} elsif (-1 != index($line,'|')){ $line=~s/^\s*//; # take off leading spaces $line=~s/\s*$//; # take off trailing spaces $line=~s/^\|//; # take off leading bar my @thisset=split('\|',$line); for my $part (@thisset){ $part=~s/_//g; # remove any underscores $part=~s/^\s*//; # take off leading spaces $part=~s/\s*$//; # take off trailing spaces } unless ($thisset[0]=~m/^\d+$/) { # if first not digits this is a title part my $ix=0; for my $part (@thisset) { if ($part ne '') { unless (defined($titles[$ix])) {$titles[$ix]=$part; } else {$titles[$ix].=' '.$part; } } $ix++; } # part } # not digits else { # first is digits so this is data my %hashpart; my $ix=0; for my $part (@thisset) { $hashpart{$titles[$ix]}=$part; $ix++; } # part push @{$tables{$state}},\%hashpart; } # digits } # not sep } # line use Data::Dumper; print Dumper(\%tables); __DATA__ place and year data: 67 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ |no.| name | age | place | year | |_ _|_ _ _ _|_ _ _ | _ _ _ | _ _ | |1 | sue |33 | NY | 2015 | |2 | mark |28 | cal | 2106 | work and language :65 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ |no.| name | languages | proficiency | time taken| |_ _| _ _ _| _ _ _ _ _ |_ _ _ _ _ _ _| _ _ _ _ _ | |1 | eliz | English | good | 24 hrs | |2 | susan| Spanish | good | 13 hrs | |3 | danny| Italian | decent | 21 hrs | Position log | | |Pos |value | |bulk|lot| prev| newest| |# |Locker|(dfg) |(no) |nul|val |Id | val |val | ----------------------------------------------------------- | 0| 1| 302832| -11.88| 1| 0|Pri| 16| 0| | 1| 9| 302836| 11.88| 9| 0|Pri| 10| 0| | 2| 1| 302832| -11.88| 5| 3|Pri| 14| 4| | 3| 3| 302833| 11.88| 1| 0|sec| 12| 0| | 4| 6| 302837| -11.88| 1| 0|Pri| 16| 3|