#! perl -slw use strict; use Tie::File; ## Initialise the filenames here my( $old, $new ) = ( 'old', 'new' ); ## Comment this out, once you specify your real files above!! genTestData( $old, $new ); ## Use the files as arrays. See Tie::File. tie my @old, 'Tie::File', $old; tie my @new, 'Tie::File', $new; ## Build a lookup table into the old array (file) ## keyed by the catenation of fields 2, 3 & 6; my %old; $old{ join $;, ( split "\t", $old[ $_ ] )[ 2, 3, 6 ] } = $_ for 0 .. $#old; ## Remove duplicates from the new file, if any. ## Not sure if this was a requirement, your wording was ambiguous. my %seen; @new = map{ ++$seen{ join( $;, (split '\t' )[ 2, 3 ,6 ] ) } == 1 ? $_ : () } @new; ## Now process the new file line by line for my $lineno ( 0 .. $#new ) { ## Split the TSV data into an array. my @fields = split "\t", $new[ $lineno ]; ## And strip the quotes from the partno for later. $fields[0] =~ s["([^\x22]*)"][$1]; ## Catenate the 3 key fields and do a lookup in the old data table. if( exists $old{ join $;, @fields[ 2, 3, 6 ] } ) { ## If it exists, edit the line if the old file $old[ $old{ join $;, @fields[ 2, 3, 6 ] } ] ## locate the notes field =~ s[ ("[^\x22]*") (?= (?: \t [^\t]*? ){2}$ ) ] { ## Make a modifiable copy my $notes = $1; ## and append the partno to it $notes =~ s[(?<=")(.*)(?=")][$1:$fields[0]]; ## And return the modified field for substituition ## into the old file record. $notes; }xe; print "Updating old line ", $old[ $old{ join $;, @fields[ 2, 3, 6 ] } ]; } ## Else append the new record to the old file ## prefixed with an 'A' else { push @old, 'A' . $new[ $lineno ]; print "Adding new line '", $new[ $lineno ], "' to old file"; } } exit(0); ## This updates the files to disk and closes them. ## Everything from here is for generating test data. sub genTestData { my( $old, $new ) = @_; srand( 1); open OLD, '>', $old or die $!; print OLD genLine() for 1 .. 100; close OLD; open NEW, '>', $new or die $!; print NEW genLine() for 1 .. 20; close NEW; } sub genLine{ join "\t", map{ '"' . $_ . '"' } 10000 + int rand 90000, 'dummy', ('l','r')[ rand() < 0.5 ] . int rand(9), ('l','r')[ rand() < 0.5 ] . int rand(9), 'notes', 'dummy', ''; } __END__ P:\test>295919.pl8 Adding new line '"13471" "dummy" "r1" "r1" "notes" "dummy" ""' to old file Adding new line '"65827" "dummy" "l8" "r7" "notes" "dummy" ""' to old file Updating old line "31648" "dummy" "l8" "r8" "notes:68098" "dummy" "" Adding new line '"69773" "dummy" "r5" "l3" "notes" "dummy" ""' to old file Adding new line '"94869" "dummy" "l5" "l2" "notes" "dummy" ""' to old file Adding new line '"45724" "dummy" "r0" "l1" "notes" "dummy" ""' to old file Updating old line "97885" "dummy" "r5" "r1" "notes:16325" "dummy" "" Updating old line "95152" "dummy" "l4" "l6" "notes:24029" "dummy" "" Adding new line '"49715" "dummy" "l3" "l5" "notes" "dummy" ""' to old file Adding new line '"27962" "dummy" "l7" "r8" "notes" "dummy" ""' to old file Adding new line '"26677" "dummy" "l5" "r5" "notes" "dummy" ""' to old file Adding new line '"73764" "dummy" "r2" "r3" "notes" "dummy" ""' to old file Updating old line "90568" "dummy" "l6" "l3" "notes:90576" "dummy" "" Adding new line '"45765" "dummy" "l2" "r5" "notes" "dummy" ""' to old file Updating old line "75975" "dummy" "l6" "l6" "notes:41819" "dummy" "" Adding new line '"22538" "dummy" "r2" "l8" "notes" "dummy" ""' to old file Adding new line '"43104" "dummy" "l0" "l1" "notes" "dummy" ""' to old file Adding new line '"56614" "dummy" "l3" "l0" "notes" "dummy" ""' to old file Adding new line '"17160" "dummy" "r0" "r2" "notes" "dummy" ""' to old file Adding new line '"72753" "dummy" "r3" "r6" "notes" "dummy" ""' to old file