use warnings; use strict; use Data::Dumper; ## CAUTION: # of sequences in BWA file and # of sequences in input sequencing data should be equal!!## # BWA alignment output (.sam) my %bwa = (); my @splitstore=(); my @splitstoreMatch=(); my @splitstoreDel=(); my @splitstoreIns=(); my @splitcheck=(); my %delxstore=(); my %delystore=(); my %insxstore=(); my %insystore=(); my $ssl=(0); my $file1 = shift; open (FILE1, "$file1") || die "Failed to open $file1 for reading : $!"; # Open second file while () { # Reading second hash if ($_ =~ /^[^@]/s) { chomp; my @line = split /\s+/, $_; my $ID; if ($line[2] =~ /[^*]/) { $ID = $line[0]; push @{$bwa{$ID}}, @line[0,2,5,12,15,16,17,18,3]; #$bwa{$ID}[0] = $line[0]; # seq ID #$bwa{$ID}[1] = $line[2]; # Ref ID #$bwa{$ID}[2] = $line[5]; # CIGAR ID for insertion #$bwa{$ID}[3] = @line[9]; # Processed seq (already C->T) #$bwa{$ID}[3] = $line[12]; # Edit distance (edited area by # of base) : NM #$bwa{$ID}[4] = $line[15]; # No. of mismatches in the alignment : XM #$bwa{$ID}[5] = $line[16]; # No. of gap opens for insertion : XO #$bwa{$ID}[6] = $line[17]; # No. of gap extensions for deletion :XG #$bwa{$ID}[7] = $line[18]; # Mismatching positions / bases : MD #$bwa{$ID}[8] = $line[3]; # Starting position #$bwa{$ID}[10] = Total Number of Bases #$bwa{$ID}[11] = Deletions (0 if no) #$bwa{$ID}[12] = Insertions (0 if no) #$insstore{$ID}[X] = X,Y of bases to match, then length of deletion #$delstore{$ID}[X] = X,Y of bases until insertion, then length of insertion # Modifying CIGAR ID's to keep track of matches, deletions, insertions @splitstore = split /[A-Z]/, $bwa{$ID}[2]; #just the values $ssl = scalar @splitstore; @splitstoreDel = split /[^A-Z]+[M,I](?![D])/, $bwa{$ID}[2]; chop(@splitstoreDel); #just the deletions @splitstoreIns = split /[^A-Z]+[M,D](?![I])/, $bwa{$ID}[2]; chop(@splitstoreIns); #just the insertions @splitstoreMatch = split /[^A-Z]+[D,I](?![M])/, $bwa{$ID}[2]; chop(@splitstoreMatch); #just the matches $splitcheck[0] = $bwa{$ID}[2]; #Internal storage of insertion and deletions foreach my $i (@splitstoreDel){ unless($i eq ""){ $bwa{$ID}[11] +=1;} #define number of insertions } foreach my $i (@splitstoreIns){ unless($i eq ""){ $bwa{$ID}[12] +=1;} #define number of insertions } #Storing Total Bases foreach my $i (@splitstoreIns){ #for each insertion unless($i eq ""){ $bwa{$ID}[10] +=$i;} } foreach my $i (@splitstoreMatch){ #for each match unless($i eq ""){ $bwa{$ID}[10] +=$i;} } #Define positions of Deletions and insertions my $tempM = 0; for(my $i = 0;$i<$ssl;$i++){ if ($splitcheck[0] =~ /^[^A-Z]+[M]/) { #if first section match @splitcheck = split /^[^A-Z]+[M]/, $splitcheck[0]; #eat first section shift(@splitcheck); #get rid of "" $tempM = $splitstore[$i] + $tempM; #shift number of matched bases } elsif ($splitcheck[0] =~ /^[^A-Z]+[D]/){ #if first section deletion @splitcheck = split /^[^A-Z]+[D]/, $splitcheck[0]; #eat first section shift(@splitcheck); #get rid of "" push @{$delxstore{$ID}}, $tempM; ##Note: need to store: ID, X position (number of bases before deletion), Y position (number of bases deleted) push @{$delystore{$ID}}, $splitstore[$i]; $tempM = $splitstore[$i] + $tempM; #shift number of matched bases } elsif ($splitcheck[0] =~ /^[^A-Z]+[I]/){ #if first section insertion @splitcheck = split /^[^A-Z]+[I]/, $splitcheck[0]; #eat first section shift(@splitcheck); #get rid of "" push @{$insxstore{$ID}}, $tempM; push @{$insystore{$ID}}, $splitstore[$i]; ##Note: need to store: ID, X position (number of bases before insertion), Y position (number of bases ins) $tempM = $splitstore[$i] + $tempM; #shift number of matched bases } } ## Bug Checking #print "\n\t@splitstore\t\t"; #print "@splitstoreDel\t\t"; #print "@splitstoreIns\t\t"; #print "$bwa{$ID}[10]\t\t\t"; #if (exists $delxstore{$ID}[1]){print "$delxstore{$ID}[1]\t";} #if (exists $delystore{$ID}[1]){print "$delystore{$ID}[1]\n";} #if (exists $insxstore{$ID}[1]){print "$insxstore{$ID}[1]\t";} #if (exists $insystore{$ID}[1]){print "$insystore{$ID}[1]\n";} #print "$insstore{$ID}\n"; } } } close FILE1 || die "Failed to close $file1 : $!";