Here is a quick hack, just something to start with, based on the algorithm posted by wfsp of processing each file only once.
Note that I don't have experience with large text files and the code is not optimized, (but I hope to learn something from this to).
use strict;
use warnings;
use Tie::File;
use Fcntl qw(O_RDONLY);
my $file1 = 'file1.txt';
my $file2 = 'file2.txt';
my $file3 = 'file3.txt';
my %result;
#-- Process file 1
tie my @file1_arr, 'Tie::File', $file1, mode => O_RDONLY;
foreach my $record ( @file1_arr) {
my @id = $record =~ m/HWI\-(.*)\#/g; # extract ID's
my @rez = split(/\t/, $record, 4);
# Save some info for later
$result{$id[0]} = [ @rez[1,2] ]; # assume is only 1 ID :)
}
untie @file1_arr; # finished with file 1
#-- Process file 2 and write output to file 3
tie my @file2_arr, 'Tie::File', $file2, mode => O_RDONLY;
#-- The result file
tie my @content, 'Tie::File', $file3;
foreach my $record ( @file2_arr) {
my @id = $record =~ m/HWI\-(.*)\#/g;
my $data2 = $result{$id[0]}[0]; #print " D2 $data2\n";
my $data3 = $result{$id[0]}[1]; #print " D3 $data3\n";
# Output
my @rez = split(/\t/, $record, 4);
my $record_new = "$id[0] $data2 $rez[1] " . abs($data3 - $rez[2]);
push @content, $record_new;
}
untie @file2_arr; # finished with file 2
untie @content; # all finished
|