in reply to Re: Re: To parse or not to parse
in thread To parse or not to parse
Why not just compile everything in your header garbage list into a single regular expression?foreach $RegEx (@HeaderGarbage){ $CHRegex = qr/($RegEx\n)/; push @HeaderRegExes, $CHRegex; } ... foreach $ErrChar (@HeaderRegExes){ ...
@HeaderGarbage = ('LD 20', 'PT0000 ', ... 'CUST ', 'DATE ', 'PAGE ', 'DES '); my $reg = join '|', @HeaderGarbage; my $HeaderRegex = qr/^$reg/; ... while (my $line = <FILE>) { $line =~ s/$HeaderRegex//; # replace matching garbage ...
use strict; use Data::Dumper; my @valid_keys = qw/ TN TYPE CDEN CUST AOM FDN TGAR LDN NCOS SGRP RNPG SCI SSU LNRS XLST SCPW SFLT CAC_MFC CLS CPND_LANG RCO EFD HUNT EHT LHK AST IAPG AACS ITNA DGRP MLWU_LANG DNDR KEY DATE /; my $valid_keys = join '|', @valid_keys; my $RECORDS = ParseInputRecord(); print Dumper(\$RECORDS); sub ParseInputRecord { my (%r, $key, $value); while (my $line = <DATA>) { my ($k, $v) = $line =~ /^($valid_keys|\s+)\s*(.*)$/; if (!$k) { next if !$key; $r{$key} = ParseInputValue($key, $value); # finish previou +s command $key = ''; # reset buffer } elsif (!$key) { ($key, $value) = ($k, $v); } elsif (substr($k,0,1) eq ' ') { $value .= "\n$v"; } else { $r{$key} = ParseInputValue($key, $value); ($key, $value) = ($k, $v); # start next com +mand } } # finish off last command in the buffer $r{$key} = ParseInputValue($key, $value); return \%r; } sub ParseInputValue { my ($key, $value) = @_; if ($key eq 'CLS') { $value =~ s/\n/ /g; # if want to transform to array of commands # $value = [ split /\s+/, $value ]; } elsif ($key eq 'KEY') { $value =~ s/\n(\D)/ $1/g; # merge command lines $value =~ s/\s+(\n|$)/$1/g; # eliminate end spaces } return $value; } __DATA__ DES M3310 TN 004 0 00 04 TYPE 2616 CDEN 8D CUST 0 AOM 0 FDN 7000 TGAR 1 LDN NO NCOS 7 SGRP 0 RNPG 5 SCI 0 SSU 0010 LNRS 16 XLST SCPW 2257 SFLT NO CAC_MFC 0 CLS TLD FBD WTA LPR PUA MTD FNA HTA ADD HFA MWA LMPN RMMD SMWD AAD IMD XHD IRD NID OLD VCE DRG1 POD DSX VMD CMSD CCSA-CSI SWD LNA CNDA CFTA SFA MRD DDV CNID CDCA MSID DAPA BFED RCBD ICDD CDMD MCTD CLBD AUTU GPUA DPUA DNDD CFXA ARHA CLTD ASCD ABDA CFHA FICD NAID BUZZ AGRD MOAD UDI RCC HBTA AHA IPND DDGA NAMA MIND PRSD NRWD NRCD NROD DRDD EXR0 USRA ULAD RTDD RBDD RBHD PGND OCBD FLXD FTTC MCBN CPND_LANG ENG RCO 0 EFD 7000 HUNT 7000 EHT 7000 LHK 0 AST IAPG 0 AACS NO ITNA NO DGRP MLWU_LANG 0 DNDR 0 KEY 00 MCR 2257 0 MARP CPND CPND_LANG ROMAN NAME Darren Powell XPLN 27 DISPLAY_FMT FIRST,LAST 01 MCR 2257 0 CPND CPND_LANG ROMAN NAME Darren Powell XPLN 27 DISPLAY_FMT FIRST,LAST 02 TRN 03 CFW 4 7000 04 AO6 05 SCC 0257 06 RNP 07 08 09 10 11 12 13 14 15 DATE 14 JUL 2003
|
|---|