Hello
I wrote a perl code to match two files. But when the input file is very large, it runs very very long time.
How I can shorter the running time by change some code?
Thanks in advance for any great help!
Best,
Yue
open(IN1,"tmp12") || die "Cannot open this file";
@lines1 = <IN1>;
open(IN2,"donor_82_01.csv") || die "Cannot open this file";
@lines2 = <IN2>;
open(OUT,">tmp12_01") || die "Cannot open this file";
for $item1(@lines1){
chomp $item1;
#print OUT $item1,"\t";
@tmp1=split(/\t+/, $item1);
for $item2(@lines2){
chomp $item2;
@tmp2=split(/\,+/, $item2);
if ($tmp1[1] eq $tmp2[0]){
print OUT $tmp1[0],",",$item2;
last;
}
$i++
}
print OUT "\n";
}
close(IN1);
close(IN2);
close(OUT);
The file of tmp12 is:
A1BG ENSG00000121410
A1BG-AS1 ENSG00000268895
A1CF ENSG00000148584
A2M ENSG00000175899
A2M-AS1 ENSG00000245105
A2ML1 ENSG00000166535
A2ML1-AS1 ENSG00000256661
A2ML1-AS2 ENSG00000256904
A3GALT2 ENSG00000184389
A4GALT ENSG00000128274
A4GNT ENSG00000118017
AAAS ENSG00000094914
AACS ENSG00000081760
AADAC ENSG00000114771
AADACL2 ENSG00000197953
AADACL2-AS1 ENSG00000242908
AADACL3 ENSG00000188984
AADACL4 ENSG00000204518
AADAT ENSG00000109576
AAGAB ENSG00000103591
AAK1 ENSG00000115977
AAMDC ENSG00000087884
AAMP ENSG00000127837
AANAT ENSG00000129673
AAR2 ENSG00000131043
AARD ENSG00000205002
AARS1 ENSG00000090861
AARS2 ENSG00000124608
AARSD1 ENSG00000266967
AASDH ENSG00000157426
AASDHPPT ENSG00000149313
AASS ENSG00000008311
AATBC ENSG00000215458
AATF ENSG00000275700
AATK ENSG00000181409
ABALON ENSG00000281376
ABAT ENSG00000183044
ABCA1 ENSG00000165029
ABCA10 ENSG00000154263
ABCA12 ENSG00000144452
ABCA13 ENSG00000179869
ABCA2 ENSG00000107331
ABCA3 ENSG00000167972
ABCA4 ENSG00000198691
ABCA5 ENSG00000154265
ABCA6 ENSG00000154262
ABCA7 ENSG00000064687
ABCA8 ENSG00000141338
ABCA9 ENSG00000154258
The file of donor_82_01.csv is:
,AAACCTGAGCGTTTAC-1,AAACCTGAGTCGCCGT-1,AAACCTGGTAGGACAC-1,AAACCTGGTGCC
+TTGG-1,AAACCTGGTTCAGCGC-1
ENSG00000148584,0,0,0,0,0
ENSG00000237613,0,0,0,0,0
ENSG00000186092,0,0,0,0,0
ENSG00000118017,0,0,0,0,0
ENSG00000239945,0,0,0,0,0
ENSG00000205002,0,0,0,0,0
ENSG00000090861,0,0,0,0,0
ENSG00000279928,0,0,0,0,0
ENSG00000181409,0,1,0,1,0
ENSG00000228463,0,0,0,0,0
ENSG00000236743,0,0,0,0,0
ENSG00000165029,0,0,0,0,0
ENSG00000144452,0,0,0,0,0
ENSG00000278566,0,0,0,0,0
ENSG00000179869,0,0,0,0,0
ENSG00000235146,0,0,0,0,0
ENSG00000154262,0,0,0,0,0
ENSG00000141338,0,0,0,0,0
ENSG00000154258,0,0,0,0,0