#! perl -slw use strict; my @lines = ; my $mask = chr(32) x length $lines[ 0 ]; $mask |= $_ for @lines; my $templ = ''; $templ .= 'a' . length( $1 ) . 'x' . length( $2 ) . ' ' while $mask =~ m[([^ ]+)( +|$)]g; print $templ; ## Count the false fields and determin the left/right alignments: my( @left, @right, @blanks, $nFields ); for ( @lines ) { my @fields = unpack $templ, $_; $nFields = $#fields; for my $field ( 0 .. $#fields ) { $right[ $field ] = 1 if substr( $fields[ $field ], 0, 1 ) eq ' '; $left[ $field ] = 1 if substr( $fields[ $field ], -1 ) eq ' '; $blanks[ $field ]++ unless $fields[ $field ] =~ m[\S]; } } my $reField = qr[a\d+x\d+\s]; ## Simplifies template adjustment regex. ## adjusted template fields backwards ## to ensure that we don't screw up the indexing. for my $field ( reverse 0 .. $nFields ) { ## Skip unless a false field next unless defined $blanks[ $field ] and $blanks[ $field ] > ( @lines / 2 ); my $keep; ## Number of template fields to keep ## If the preceding field is left aligned ## and the following is not right aligned if( $field and $left[ $field -1 ] and $field < $nFields and not $right[ $field + 1 ] ) { warn "Amalgamating field $field with the prevous field\n"; $keep = $field - 1; } ## If the preceding field is right aligned ## and the following is not left aligned elsif( $field < $nFields and $right[ $field + 1 ] and $field and not $left[ $field - 1 ] ) { warn "Amalgamating field $field with the next field\n"; $keep = $field; } ## If preceding is left and following is right aligned ## bellyache and do nothing. else { warn "Field $field is probably a false field, but it is not possible\n" . "to determine which adjacent field to amalgamate it with?"; next; ## Update. } ## Amalgamate the appropriate template fields $templ =~ s[ ( ${reField}{$keep} ) a (\d+) x (\d+) \s a (\d+) x (\d+) \s ][ $1 . 'a' . ($2 + $3 + $4) . 'x' . $5 . ' ' ]xe or warn 'No match'; ## Belache if the regex fails. } print $templ; { local $\; ## Split the records and output delimited by '|' print join '|', unpack $templ, $_ for @lines; } __DATA__ The First One Here Is Longer. Collie SN 262287630 77312 93871 MVP abc de gh A Second (PART) here First In 20 MT 169287655 506666 61066 RTD abc fgh 3rd Person "Something" X&Y No SH 564287705 45423 52443 RTE abc gh The Fourth Person 20 MLP 4000 360505504 3530 72201 VRE abc gh The Fifth Name OR Something Twin 200 SH 469505179 3530 72201 VRE abc fgh The Sixth Person OR Item MLP 260505174 3,530 72,201 VRE abc fgh 70 The Seventh Record MLP 764205122 3530 72201 VRE abc gh The Eighth Person MLP MLP 160545154 3530 7220 VRE abc gh #### c:\test>628055 a29x1 a11x1 a2x1 a9x2 a6x2 a6x2 a3x2 a3x1 a2x1 a4x0 Amalgamating field 8 with the next field Amalgamating field 2 with the prevous field a29x1 a14x1 a9x2 a6x2 a6x2 a3x2 a3x1 a7x0 The First One Here Is Longer.|Collie SN |262287630|77312 | 93871|MVP|abc|de gh A Second (PART) here |First In 20 MT|169287655|506666| 61066|RTD|abc| fgh 3rd Person "Something" |X&Y No SH |564287705|45423 | 52443|RTE|abc| gh The Fourth Person 20 |MLP 4000 |360505504|3530 | 72201|VRE|abc| gh The Fifth Name OR Something |Twin 200 SH |469505179|3530 | 72201|VRE|abc| fgh The Sixth Person OR Item |MLP |260505174|3,530 |72,201|VRE|abc| fgh 70 The Seventh Record |MLP |764205122|3530 | 72201|VRE|abc| gh The Eighth Person MLP |MLP |160545154|3530 | 7220|VRE|abc| gh