perl -ne '
if(!/^2/) {
$k = substr($_, 6, 6) . substr($_, 29, 10) . substr($_, 54, 12);
print "$k|$_"
}' file1 | sort -t "|" -k 1,1 >file1.sorted
# This code assumes the fields are in the same place in file2
# as they are in file1, but if not, you'll have to change this.
perl -ne '
$k = substr($_, 6, 6) . substr($_, 29, 10) . substr($_, 54, 12);
print "$k\n" ' file2 | sort -t "|" -k 1,1 >file2.sorted
# I am only outputting the key here since you don't seem
# to be doing anything with the rest of 'line2'
join -t '|' file1.sorted file2.sorted | cut -d '|' -f 2 > duplicates
####
3 110582 SFCA 4158675309 041414041421
3 060784 NYNY 2125552368 190159204657
3 121906 RANC 9195551234 123401123620
####
3 110582 SFCA 4158675309 041414041421
####
3 110582 SFCA 4158675309 041414041421
####
perl -ne '
if(!/^2/) {
$k = substr($_, 6, 6) . substr($_, 29, 10) . substr($_, 54, 12);
print "$k|$_"
}' newdata | sort -t "|" -k 1,1 >newdata.sorted
join -t '|' -v 1 newdata.sorted alreadyprocessed >needsprocessing
cut -d '|' -f 2 needsprocessing >processinput
# Then do the processing
# ...
# ...
# If everything runs okay
cut -d '|' -f 1 needsprocessing |
sort -m - alreadyprocessed >mergeout
mv alreadyprocessed alreadyprocessed.bak
mv mergeout alreadyprocessed