#!/usr/bin/perl -w use warnings; use strict; # BWA alignment output (.sam) my %bwa = (); my $file1 = shift; open (FILE1, "$file1") || die "Failed to open $file1 for reading : $!"; # Open second file while () { # Reading second hash if ($_ =~ /^[^@]/s) { chomp; my @line = split /\s+/, $_; my $ID; if ($line[2] =~ /[^*]/) { $ID = $line[0]; $bwa{$ID}[0] = $line[0]; # seq ID $bwa{$ID}[1] = $line[2]; # Ref ID $bwa{$ID}[2] = $line[5]; # CIGAR ID for insertion #$bwa{$ID}[3] = @line[9]; # Processed seq (already C->T) $bwa{$ID}[3] = $line[12]; # Edit distance (edited area by # of base) : NM $bwa{$ID}[4] = $line[15]; # No. of mismatches in the alignment : XM $bwa{$ID}[5] = $line[16]; # No. of gap opens for insertion : XO $bwa{$ID}[6] = $line[17]; # No. of gap extensions for deletion :XG $bwa{$ID}[7] = $line[18]; # Mismatching positions / bases : MD } } } close FILE1 || die "Failed to close $file1 : $!"; # ORIGINAL Reference Amplicon File (.fa) my %Amp = (); my $file2 = shift; open (FILE2, "$file2")|| die "Failed to open $file2 for reading : $!"; # Open first file local $/= ">"; my $first=; while () { # Reading first hash chomp; my ($ID, $Seq) = split("\n"); $Amp{$ID}[0] = $ID; $Amp{$ID}[1] = $Seq; } close FILE2 || die "Failed to close $file2 : $!"; # ORIGINAL Input FASTQ Sequencing File (.fq) my %Input = (); my $file3 = shift; open (FILE3, "$file3")|| die "Failed to open $file3 for reading : $!"; # Open first file local $/= "@"; $first=; while () { # Reading first hash chomp; my ($ID, $Seq,undef,undef) = split("\n"); $Input{$ID}[0] = $ID; $Input{$ID}[1] = $Seq; } close FILE3 || die "Failed to close $file3 : $!"; foreach my $ID (keys %bwa) { print "1"; if (exists $Input{$ID}[0] ){ print "2"; if ($bwa{$ID}[0] eq $Input{$ID}[0]){ print "3"; if ($bwa{$ID}[1] eq $Amp{$ID}[0]){ print "4"; if ($bwa{$ID}[3] eq "NM:i:0" && $bwa{$ID}[4] eq "XM:i:0" && $bwa{$ID}[5] eq "XO:i:0" && $bwa{$ID}[6] eq "XG:1:0") { print "$Amp{$ID}[1]\n$Input{$ID}[1]"; } else {print "4out";} } else {print "3out";} } else {print "2out";} } else {print "1out";} } exit; #### @SQ SN:TMEM200B LN:293 @SQ SN:B3GAT2-2_P001 LN:204 Seq1Perfect 0 B3GAT2-2_P001 1 37 204M * 0 0 GGTTGGTTTTTATTTTTTGGAAGAGTTTTAGATTATAGGTGTTGTCGTCGTTAGCGAAGAAGAGTACGTCGGGTTGCGCGCGTTGGTGTTGGTGTTTTTGGCGTAGTTAGGCGAGGTTCGCGTTGCGTTGTTTAGTGGCGCGCGGTAGTTCGGGTCGTTTGTAGCGTCGCGGCGTGGGTACGTGTAGGTGAGTGTTGGGTAGTT &a==aa=====a======aaaaaaa====aaa==a=aaa=a==a=$a=$a==aa$aaaaaaaaa=a$a=$aaa==a$a$a$a==aa=a==aa=a=====aa$a=aa==aaa$aaaa==$a$a==a$a==a===aa=aa$a$a$aa=aa==$aaa=$a===a=aa$a=$a$aa$a=aaa=a$a=a=aaa=aaa=a==aaa=aa== XT:A:U NM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:204 Seq2MM 0 B3GAT2-2_P001 1 37 204M * 0 0 GGTTAATTTTTATTTTTTGGAAGAGTTTTAGATTATAGGTGTTGTCGTCGTTAGCGAAGAAGAGTACGTCGGGTTGCGCGCGTTGGTGTTGGTGTTTTTGGCGTAGTTAGGCGAGGTTCGCGTTGCGTTGTTTAGTGGCGCGCGGTAGTTCGGGTCGTTTGTAGCGTCGCGGCGTGGGTACGTGTAGGTGAGTGTTGGGTAGTT &a==aa=====a======aaaaaaa====aaa==a=aaa=a==a=$a=$a==aa$aaaaaaaaa=a$a=$aaa==a$a$a$a==aa=a==aa=a=====aa$a=aa==aaa$aaaa==$a$a==a$a==a===aa=aa$a$a$aa=aa==$aaa=$a===a=aa$a=$a$aa$a=aaa=a$a=a=aaa=aaa=a==aaa=aa== XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:4G0G198 Seq3In 0 B3GAT2-2_P001 1 37 12M1I192M * 0 0 GGTTGGTTTTTAGTTTTTTGGAAGAGTTTTAGATTATAGGTGTTGTCGTCGTTAGCGAAGAAGAGTACGTCGGGTTGCGCGCGTTGGTGTTGGTGTTTTTGGCGTAGTTAGGCGAGGTTCGCGTTGCGTTGTTTAGTGGCGCGCGGTAGTTCGGGTCGTTTGTAGCGTCGCGGCGTGGGTACGTGTAGGTGAGTGTTGGGTAGTT &a==aa===a==a======aaaaaaa====aaa==a=aaa=a==a=$a=$a==aa$aaaaaaaaa=a$a=$aaa==a$a$a$a==aa=a==aa=a=====aa$a=aa==aaa$aaaa==$a$a==a$a==a===aa=aa$a$a$aa=aa==$aaa=$a===a=aa$a=$a$aa$a=aaa=a$a=a=aaa=aaa=a==aaa=aa== XT:A:U NM:i:1 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:1 MD:Z:204 Seq4Del 0 B3GAT2-2_P001 1 37 55M6D143M * 0 0 GGTTGGTTTTTATTTTTTGGAAGAGTTTTAGATTATAGGTGTTGTCGTCGTTAGCGAGTACGTCGGGTTGCGCGCGTTGGTGTTGGTGTTTTTGGCGTAGTTAGGCGAGGTTCGCGTTGCGTTGTTTAGTGGCGCGCGGTAGTTCGGGTCGTTTGTAGCGTCGCGGCGTGGGTACGTGTAGGTGAGTGTTGGGTAGTT &a==aa=====a======aaaaaaa====aaa==a=aaa=a==a=$a=$a==aa$aaaaaaaaa=a$a=$aaa==a$a$a$a==aa=a==aa=a=====aa$a=aa==aaa$aaaa==$a$a==a$a==a===aa=aa$a$a$aa=aa==$aaa=$a===a=aa$a=$a$aa$a=aaa=a$a=a=aaa=aaa=a==aa XT:A:U NM:i:6 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:6 MD:Z:55^GAAGAA143 Seq5Partial 0 B3GAT2-2_P001 1 37 204M * 0 0 GGTTGGTTTTTATTTTTTGGAAGAGTTTTAGATTATAGGTGTTGTTGTTGTTAGCGAAGAAGAGTACGTCGGGTTGCGCGCGTTGGTGTTGGTGTTTTTGGCGTAGTTAGGCGAGGTTCGCGTTGCGTTGTTTAGTGGCGCGCGGTAGTTCGGGTCGTTTGTAGCGTCGCGGCGTGGGTACGTGTAGGTGAGTGTTGGGTAGTT &a==aa=====a======aaaaaaa====aaa==a=aaa=a==a=$a=$a==aa$aaaaaaaaa=a$a=$aaa==a$a$a$a==aa=a==aa=a=====aa$a=aa==aaa$aaaa==$a$a==a$a==a===aa=aa$a$a$aa=aa==$aaa=$a===a=aa$a=$a$aa$a=aaa=a$a=a=aaa=aaa=a==aaa=aa== XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:45C2C155 Seq6TruncB 0 B3GAT2-2_P001 1 37 189M * 0 0 GGTTGGTTTTTATTTTTTGGAAGAGTTTTAGATTATAGGTGTTGTCGTCGTTAGCGAAGAAGAGTACGTCGGGTTGCGCGCGTTGGTGTTGGTGTTTTTGGCGTAGTTAGGCGAGGTTCGCGTTGCGTTGTTTAGTGGCGCGCGGTAGTTCGGGTCGTTTGTAGCGTCGCGGCGTGGGTACGTGTAGGT &a==aa=====a======aaaaaaa====aaa==a=aaa=a==a=$a=$a==aa$aaaaaaaaa=a$a=$aaa==a$a$a$a==aa=a==aa=a=====aa$a=aa==aaa$aaaa==$a$a==a$a==a===aa=aa$a$a$aa=aa==$aaa=$a===a=aa$a=$a$aa$a=aaa=a$a=a=aaa= XT:A:U NM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:189 Seq7TruncF 0 B3GAT2-2_P001 16 37 189M * 0 0 TTTGGAAGAGTTTTAGATTATAGGTGTTGTCGTCGTTAGCGAAGAAGAGTACGTCGGGTTGCGCGCGTTGGTGTTGGTGTTTTTGGCGTAGTTAGGCGAGGTTCGCGTTGCGTTGTTTAGTGGCGCGCGGTAGTTCGGGTCGTTTGTAGCGTCGCGGCGTGGGTACGTGTAGGTGAGTGTTGGGTAGTT ===aaaaaaa====aaa==a=aaa=a==a=$a=$a==aa$aaaaaaaaa=a$a=$aaa==a$a$a$a==aa=a==aa=a=====aa$a=aa==aaa$aaaa==$a$a==a$a==a===aa=aa$a$a$aa=aa==$aaa=$a===a=aa$a=$a$aa$a=aaa=a$a=a=aaa=aaa=a==aaa=aa== XT:A:U NM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:189 Seq8Incomplete 4 * 0 0 * * 0 0 GGTTGGTTCTTATTCCTTGGAAGAGTTTTAGATTATAGGTGTTGTCGTCGTTAGCGAAGAAGAGTACGTCGGGTTGCGCGCGTTGGTGTTGGTGTTTTTGGCGTAGTTAGGCGAGGTTCGCGTTGCGTTGTTTAGTGGCGCGCGGTAGTTCGGGTCGTTTGTAGCGTCGCGGCGTGGGTACGTGTAGGTGAGTGTTGGGTAGTT &a==aa=====a======aaaaaaa====aaa==a=aaa=a==a=$a=$a==aa$aaaaaaaaa=a$a=$aaa==a$a$a$a==aa=a==aa=a=====aa$a=aa==aaa$aaaa==$a$a==a$a==a===aa=aa$a$a$aa=aa==$aaa=$a===a=aa$a=$a$aa$a=aaa=a$a=a=aaa=aaa=a==aaa=aa== #### >TMEM200B CTCCTCTGCCTGGCTGGTCTTGATCCGAGCGGTCTTCCCGGTGTCTAGCTCAAGTCGCTCCTGCTGCAGCTTCGCTGCGGGCGGAGGAGGTCTGGAAGGAGGGGGCGGGCAGGGAGAGGCTGGAGCCGGTGACGCCCCCTCCTCCCGCGCTGCGGTATGTAAAGCACAGTAGGGGGGAGGTGGGGCCCGGCGAGCGACCCCTGCGGACCTGGGAGGCCCGAGCGCCCCCGCCCCATTTGCTACGGTGCAGCCACGTGCGGGGGTGGGGTCGAGCCCGGGAGGTACTTACCCTGGAGA >B3GAT2-2_P001 GGCTGGCCTTTACCTCCTGGAAGAGCTCCAGACTATAGGTGTTGTCGTCGTCAGCGAAGAAGAGCACGCCGGGCTGCGCGCGCTGGTGCTGGTGCCTCTGGCGCAGCCAGGCGAGGCCCGCGTTGCGCTGCTCAGTGGCGCGCGGCAGCCCGGGCCGCTTGTAGCGCCGCGGCGTGGGCACGTGCAGGTGAGTGCTGGGCAGCC #### @Seq1Perfect GGTTGGTTTTTATTTTTTGGAAGAGTTTTAGATTATAGGTGTTGTCGTCGTTAGCGAAGAAGAGTACGTCGGGTTGCGCGCGTTGGTGTTGGTGTTTTTGGCGTAGTTAGGCGAGGTTCGCGTTGCGTTGTTTAGTGGCGCGCGGTAGTTCGGGTCGTTTGTAGCGTCGCGGCGTGGGTACGTGTAGGTGAGTGTTGGGTAGTT +Seq1Perfect &a==aa=====a======aaaaaaa====aaa==a=aaa=a==a=$a=$a==aa$aaaaaaaaa=a$a=$aaa==a$a$a$a==aa=a==aa=a=====aa$a=aa==aaa$aaaa==$a$a==a$a==a===aa=aa$a$a$aa=aa==$aaa=$a===a=aa$a=$a$aa$a=aaa=a$a=a=aaa=aaa=a==aaa=aa== @Seq2MM GGTTAATTTTTATTTTTTGGAAGAGTTTTAGATTATAGGTGTTGTCGTCGTTAGCGAAGAAGAGTACGTCGGGTTGCGCGCGTTGGTGTTGGTGTTTTTGGCGTAGTTAGGCGAGGTTCGCGTTGCGTTGTTTAGTGGCGCGCGGTAGTTCGGGTCGTTTGTAGCGTCGCGGCGTGGGTACGTGTAGGTGAGTGTTGGGTAGTT +Seq2MM &a==aa=====a======aaaaaaa====aaa==a=aaa=a==a=$a=$a==aa$aaaaaaaaa=a$a=$aaa==a$a$a$a==aa=a==aa=a=====aa$a=aa==aaa$aaaa==$a$a==a$a==a===aa=aa$a$a$aa=aa==$aaa=$a===a=aa$a=$a$aa$a=aaa=a$a=a=aaa=aaa=a==aaa=aa== @Seq3In GGTTGGTTTTTAGTTTTTTGGAAGAGTTTTAGATTATAGGTGTTGTCGTCGTTAGCGAAGAAGAGTACGTCGGGTTGCGCGCGTTGGTGTTGGTGTTTTTGGCGTAGTTAGGCGAGGTTCGCGTTGCGTTGTTTAGTGGCGCGCGGTAGTTCGGGTCGTTTGTAGCGTCGCGGCGTGGGTACGTGTAGGTGAGTGTTGGGTAGTT +Seq3In &a==aa===a==a======aaaaaaa====aaa==a=aaa=a==a=$a=$a==aa$aaaaaaaaa=a$a=$aaa==a$a$a$a==aa=a==aa=a=====aa$a=aa==aaa$aaaa==$a$a==a$a==a===aa=aa$a$a$aa=aa==$aaa=$a===a=aa$a=$a$aa$a=aaa=a$a=a=aaa=aaa=a==aaa=aa== @Seq4Del GGTTGGTTTTTATTTTTTGGAAGAGTTTTAGATTATAGGTGTTGTCGTCGTTAGCGAGTACGTCGGGTTGCGCGCGTTGGTGTTGGTGTTTTTGGCGTAGTTAGGCGAGGTTCGCGTTGCGTTGTTTAGTGGCGCGCGGTAGTTCGGGTCGTTTGTAGCGTCGCGGCGTGGGTACGTGTAGGTGAGTGTTGGGTAGTT +Seq4Del &a==aa=====a======aaaaaaa====aaa==a=aaa=a==a=$a=$a==aa$aaaaaaaaa=a$a=$aaa==a$a$a$a==aa=a==aa=a=====aa$a=aa==aaa$aaaa==$a$a==a$a==a===aa=aa$a$a$aa=aa==$aaa=$a===a=aa$a=$a$aa$a=aaa=a$a=a=aaa=aaa=a==aa @Seq5Partial GGTTGGTTTTTATTTTTTGGAAGAGTTTTAGATTATAGGTGTTGTTGTTGTTAGCGAAGAAGAGTACGTCGGGTTGCGCGCGTTGGTGTTGGTGTTTTTGGCGTAGTTAGGCGAGGTTCGCGTTGCGTTGTTTAGTGGCGCGCGGTAGTTCGGGTCGTTTGTAGCGTCGCGGCGTGGGTACGTGTAGGTGAGTGTTGGGTAGTT +Seq5Partial &a==aa=====a======aaaaaaa====aaa==a=aaa=a==a=$a=$a==aa$aaaaaaaaa=a$a=$aaa==a$a$a$a==aa=a==aa=a=====aa$a=aa==aaa$aaaa==$a$a==a$a==a===aa=aa$a$a$aa=aa==$aaa=$a===a=aa$a=$a$aa$a=aaa=a$a=a=aaa=aaa=a==aaa=aa== @Seq6TruncB GGTTGGTTTTTATTTTTTGGAAGAGTTTTAGATTATAGGTGTTGTCGTCGTTAGCGAAGAAGAGTACGTCGGGTTGCGCGCGTTGGTGTTGGTGTTTTTGGCGTAGTTAGGCGAGGTTCGCGTTGCGTTGTTTAGTGGCGCGCGGTAGTTCGGGTCGTTTGTAGCGTCGCGGCGTGGGTACGTGTAGGT +Seq6TruncB &a==aa=====a======aaaaaaa====aaa==a=aaa=a==a=$a=$a==aa$aaaaaaaaa=a$a=$aaa==a$a$a$a==aa=a==aa=a=====aa$a=aa==aaa$aaaa==$a$a==a$a==a===aa=aa$a$a$aa=aa==$aaa=$a===a=aa$a=$a$aa$a=aaa=a$a=a=aaa= @Seq7TruncF TTTGGAAGAGTTTTAGATTATAGGTGTTGTCGTCGTTAGCGAAGAAGAGTACGTCGGGTTGCGCGCGTTGGTGTTGGTGTTTTTGGCGTAGTTAGGCGAGGTTCGCGTTGCGTTGTTTAGTGGCGCGCGGTAGTTCGGGTCGTTTGTAGCGTCGCGGCGTGGGTACGTGTAGGTGAGTGTTGGGTAGTT +Seq7TruncF ===aaaaaaa====aaa==a=aaa=a==a=$a=$a==aa$aaaaaaaaa=a$a=$aaa==a$a$a$a==aa=a==aa=a=====aa$a=aa==aaa$aaaa==$a$a==a$a==a===aa=aa$a$a$aa=aa==$aaa=$a===a=aa$a=$a$aa$a=aaa=a$a=a=aaa=aaa=a==aaa=aa== @Seq8Incomplete GGTTGGTTCTTATTCCTTGGAAGAGTTTTAGATTATAGGTGTTGTCGTCGTTAGCGAAGAAGAGTACGTCGGGTTGCGCGCGTTGGTGTTGGTGTTTTTGGCGTAGTTAGGCGAGGTTCGCGTTGCGTTGTTTAGTGGCGCGCGGTAGTTCGGGTCGTTTGTAGCGTCGCGGCGTGGGTACGTGTAGGTGAGTGTTGGGTAGTT +Seq8Incomplete &a==aa=====a======aaaaaaa====aaa==a=aaa=a==a=$a=$a==aa$aaaaaaaaa=a$a=$aaa==a$a$a$a==aa=a==aa=a=====aa$a=aa==aaa$aaaa==$a$a==a$a==a===aa=aa$a$a$aa=aa==$aaa=$a===a=aa$a=$a$aa$a=aaa=a$a=a=aaa=aaa=a==aaa=aa==