in reply to generating merged data from matching id in 2 files
Here is a quick hack, just something to start with, based on the algorithm posted by wfsp of processing each file only once.
Note that I don't have experience with large text files and the code is not optimized, (but I hope to learn something from this to).
use strict; use warnings; use Tie::File; use Fcntl qw(O_RDONLY); my $file1 = 'file1.txt'; my $file2 = 'file2.txt'; my $file3 = 'file3.txt'; my %result; #-- Process file 1 tie my @file1_arr, 'Tie::File', $file1, mode => O_RDONLY; foreach my $record ( @file1_arr) { my @id = $record =~ m/HWI\-(.*)\#/g; # extract ID's my @rez = split(/\t/, $record, 4); # Save some info for later $result{$id[0]} = [ @rez[1,2] ]; # assume is only 1 ID :) } untie @file1_arr; # finished with file 1 #-- Process file 2 and write output to file 3 tie my @file2_arr, 'Tie::File', $file2, mode => O_RDONLY; #-- The result file tie my @content, 'Tie::File', $file3; foreach my $record ( @file2_arr) { my @id = $record =~ m/HWI\-(.*)\#/g; my $data2 = $result{$id[0]}[0]; #print " D2 $data2\n"; my $data3 = $result{$id[0]}[1]; #print " D3 $data3\n"; # Output my @rez = split(/\t/, $record, 4); my $record_new = "$id[0] $data2 $rez[1] " . abs($data3 - $rez[2]); push @content, $record_new; } untie @file2_arr; # finished with file 2 untie @content; # all finished
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: generating merged data from matching id in 2 files
by almut (Canon) on Nov 19, 2009 at 18:06 UTC |