I would recommend just letting the code parse out each table that it encounters. At the finish_current_table() subroutine, make a decision of whether or not you want to actually keep the current table or not? I just hard coded a regex for /2017.*?Fp379/ but of course this could be more flexible. Note that to "keep" the table, I added it to a @results data structure, which I "dumped" right before the program ends. I would presume that in the "real code", instead of adding to the @results structure, some export() function is called to put the table into a DB or make a discrete file in some sort of CSV format? I did not generate strictly conformant CSV (multi-word strings should be quoted).
From the size of the input file you are describing, it sounds to me like putting these tables into a SQL DB is the right way to go. The Perl DBI is fantastic.
Code:
#!/usr/bin/perl use strict; use warnings; use Data::Dumper; my @results=(); # this is Array of Array, # [$table_name, [@data]] # row[0] of @data contains the column names ################ my $state; my $name; my $line_num_start_rec; my $line_num_end_rec; #may not need this my @data; my @col_names; sub start_new_table_entry { $state = 'GET_TABLE_NAME'; $name = ""; @data=(); @col_names=(); $line_num_start_rec = 0; $line_num_end_rec = 0; } sub finish_current_table { return unless ($state ne 'GET_TABLE_NAME'); ## Ver2 $name .= "Record_Start: $line_num_start_rec\n"; ## Ver2 $name .= "Record_End: $.\n"; ## Ver2 #this is where data is "saved" #probably calls something to put data into a DB? if ($name =~ /2017.*?Fp379/) #decide which tables to "keep" { unshift @data,[@col_names]; push @results,[$name,[@data]]; } $state = 'GET_TABLE_NAME'; return; } start_new_table_entry(); REDO_LINE: while (my $line = <DATA>) { $line =~ s/^\s*//; # delete leading spaces $line =~ s/\s*$//; # delete trailing spaces # (this includes line endings) if ($state eq 'GET_TABLE_NAME') #### TABLE NAME ### { if ($line =~ /^\|/) # premature start of column name state! Who +a! { # special case of malformed table without # a starting banner of --- or _ _ _ # we are already in the column name state! $state = 'GET_COL_NAMES'; $line_num_start_rec = $. if $name eq ""; ## Ver2 Table has no + name redo REDO_LINE; } elsif ($line !~ /^[-_]/) #keep going - normal case { $line_num_start_rec = $. if $name eq ""; ### Ver 2 rec lin +e numbers $name .= "$line\n" if $line =~ /\S/; ### Ver 2 multi-l +ine name } else { $state = 'GET_COL_NAMES'; } } elsif ($state eq 'GET_COL_NAMES') #### COLUMN NAMES ### { if ($line !~ /(^\|[-_])|(^[-])/ ) #keep going { $line =~ s/^\|\s*//; my @col_name_raw = split /\|/,$line; my $col=0; foreach my $this_col (@col_name_raw) { $this_col =~ s/\s*$//; $this_col =~ s/^\s*//; $col_names[$col]//= ""; $this_col = " $this_col" if ($col_names[$col] ne ""); $col_names[$col++] .= "$this_col"; } } else { $state = "GET_DATA"; } } elsif ($state eq 'GET_DATA') #### DATA ROWS ### { if ( $line =~ /^\|/) #keep going { $line =~ s/^\|\s*//; my @this_data = split /\|/,$line; @this_data = map {s/\s*$//;s/^\s*//;$_}@this_data; push @data,[@this_data]; } else { finish_current_table(); start_new_table_entry(); } } } finish_current_table(); # in case of malformed end of table # dump results in "psuedo" CSV format # also consider looking at: # print Dumper \@results; foreach my $tableref (@results) { my ($name,$dataref) = @$tableref; print "TABLE: $name"; ### Ver 2 changed for multi-line name my $row0 = shift @$dataref; print "COLUMNS: ",join(",",@$row0),"\n"; foreach my $row (@$dataref) { print join(",",@$row),"\n"; } print "\n"; } =PRINTED OUTPUT TABLE: 2017 Position log :Fp379 place: cal time: 23:01:45 Record_Start: 31 Record_End: 44 COLUMNS: #,Locker,Pos (dfg),value (no),nul,bulk val,lot Id,prev val,ne +west val 0,1,302832,-11.88,1,0,Pri,16,0 1,9,302836,11.88,9,0,Pri,10,0 2,1,302832,-11.88,5,3,Pri,14,4 3,3,302833,11.88,1,0,sec,12,0 4,6,302837,-11.88,1,0,Pri,16,3 =cut __DATA__ place and year data: 67 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ |no.| name | age | place | year | |_ _|_ _ _ _|_ _ _ | _ _ _ | _ _ | |1 | sue |33 | NY | 2015 | |2 | mark |28 | cal | 2106 | work and language :65 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ |no.| name | languages | proficiency | time taken| |_ _| _ _ _| _ _ _ _ _ |_ _ _ _ _ _ _| _ _ _ _ _ | |1 | eliz | English | good | 24 hrs | |2 | susan| Spanish | good | 13 hrs | |3 | danny| Italian | decent | 21 hrs | Position log | | |Pos |value | |bulk|lot| prev| newest| |# |Locker|(dfg) |(no) |nul|val |Id | val |val | ----------------------------------------------------------- | 0| 1| 302832| -11.88| 1| 0|Pri| 16| 0| | 1| 9| 302836| 11.88| 9| 0|Pri| 10| 0| | 2| 1| 302832| -11.88| 5| 3|Pri| 14| 4| | 3| 3| 302833| 11.88| 1| 0|sec| 12| 0| | 4| 6| 302837| -11.88| 1| 0|Pri| 16| 3| 2017 Position log :Fp379 place: cal time: 23:01:45 | | |Pos |value | |bulk|lot| prev| newest| |# |Locker|(dfg) |(no) |nul|val |Id | val |val | ----------------------------------------------------------- | 0| 1| 302832| -11.88| 1| 0|Pri| 16| 0| | 1| 9| 302836| 11.88| 9| 0|Pri| 10| 0| | 2| 1| 302832| -11.88| 5| 3|Pri| 14| 4| | 3| 3| 302833| 11.88| 1| 0|sec| 12| 0| | 4| 6| 302837| -11.88| 1| 0|Pri| 16| 3|
In reply to Re^3: Parsing .txt into arrays
by Marshall
in thread Parsing .txt into arrays
by Fshah
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |