in reply to Re^2: Parsing .txt into arrays
in thread Parsing .txt into arrays
So lets focus on hippo's step 1 first, since it is the key to the next steps
In some ways this is easy, and in other ways it will be hard
If i was to do this based on what you have shown us i would start with a base you have already identified, in one case the a table begins with "work and language :65" , another table begins with "place and year data: 67" and yet another with "Position log".
This takes that concept, and uses whats sometimes called a state machine to separate the lines into table parts, i then kept going to parse all the data into a hash of arrays of hashs. I realize its not quite the output style you wanted but it shows a lot of the techniques and you could modify it to get what you want.
Resultuse strict; use warnings; my $state=''; my %tables; my @titles; while (my $line=<DATA>) { chomp $line; if (-1 != index($line,'place and year data: 67')) {$state='place' +;@titles=();} elsif (-1 != index($line,'work and language :65')) {$state='work'; +@titles=();} elsif (-1 != index($line,'Position log')) {$state='positi +on';@titles=();} elsif (-1 != index($line,'|')){ $line=~s/^\s*//; # take off leading spaces $line=~s/\s*$//; # take off trailing spaces $line=~s/^\|//; # take off leading bar my @thisset=split('\|',$line); for my $part (@thisset){ $part=~s/_//g; # remove any underscores $part=~s/^\s*//; # take off leading spaces $part=~s/\s*$//; # take off trailing spaces } unless ($thisset[0]=~m/^\d+$/) { # if first not digits this is a title part my $ix=0; for my $part (@thisset) { if ($part ne '') { unless (defined($titles[$ix])) {$titles[$ix]=$part; } else {$titles[$ix].=' '.$part; } } $ix++; } # part } # not digits else { # first is digits so this is data my %hashpart; my $ix=0; for my $part (@thisset) { $hashpart{$titles[$ix]}=$part; $ix++; } # part push @{$tables{$state}},\%hashpart; } # digits } # not sep } # line use Data::Dumper; print Dumper(\%tables); __DATA__ place and year data: 67 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ |no.| name | age | place | year | |_ _|_ _ _ _|_ _ _ | _ _ _ | _ _ | |1 | sue |33 | NY | 2015 | |2 | mark |28 | cal | 2106 | work and language :65 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ |no.| name | languages | proficiency | time taken| |_ _| _ _ _| _ _ _ _ _ |_ _ _ _ _ _ _| _ _ _ _ _ | |1 | eliz | English | good | 24 hrs | |2 | susan| Spanish | good | 13 hrs | |3 | danny| Italian | decent | 21 hrs | Position log | | |Pos |value | |bulk|lot| prev| newest| |# |Locker|(dfg) |(no) |nul|val |Id | val |val | ----------------------------------------------------------- | 0| 1| 302832| -11.88| 1| 0|Pri| 16| 0| | 1| 9| 302836| 11.88| 9| 0|Pri| 10| 0| | 2| 1| 302832| -11.88| 5| 3|Pri| 14| 4| | 3| 3| 302833| 11.88| 1| 0|sec| 12| 0| | 4| 6| 302837| -11.88| 1| 0|Pri| 16| 3|
$VAR1 = { 'work' => [ { 'languages' => 'English', 'no.' => '1', 'name' => 'eliz', 'time taken' => '24 hrs', 'proficiency' => 'good' }, { 'no.' => '2', 'languages' => 'Spanish', 'name' => 'susan', 'proficiency' => 'good', 'time taken' => '13 hrs' }, { 'name' => 'danny', 'time taken' => '21 hrs', 'proficiency' => 'decent', 'languages' => 'Italian', 'no.' => '3' } ], 'place' => [ { 'year' => '2015', 'place' => 'NY', 'name' => 'sue', 'no.' => '1', 'age' => '33' }, { 'year' => '2106', 'name' => 'mark', 'place' => 'cal', 'no.' => '2', 'age' => '28' } ], 'position' => [ { 'newest val' => '0', 'prev val' => '16', 'bulk val' => '0', 'value (no)' => '-11.88', 'Locker' => '1', '#' => '0', 'nul' => '1', 'Pos (dfg)' => '302832', 'lot Id' => 'Pri' }, { 'newest val' => '0', 'bulk val' => '0', 'prev val' => '10', 'Locker' => '9', 'value (no)' => '11.88', 'nul' => '9', '#' => '1', 'lot Id' => 'Pri', 'Pos (dfg)' => '302836' }, { 'lot Id' => 'Pri', 'Pos (dfg)' => '302832', 'newest val' => '4', 'bulk val' => '3', 'prev val' => '14', 'Locker' => '1', 'value (no)' => '-11.88', 'nul' => '5', '#' => '2' }, { 'nul' => '1', '#' => '3', 'Locker' => '3', 'value (no)' => '11.88', 'bulk val' => '0', 'prev val' => '12', 'newest val' => '0', 'lot Id' => 'sec', 'Pos (dfg)' => '302833' }, { '#' => '4', 'nul' => '1', 'value (no)' => '-11.88', 'Locker' => '6', 'bulk val' => '0', 'prev val' => '16', 'newest val' => '3', 'Pos (dfg)' => '302837', 'lot Id' => 'Pri' } ] };
|
|---|