in reply to Re: .qual File Writing Complication
in thread .qual File Writing Complication
A .qual file is a numerical representation of the confidence of a nucleotide sequencer. Used by the modern, automated sequencers, they assign a 2 digit value to each nucleotide ACTGU (which are what all DNA is composed of) representing the accuracy of the sequencer guessing at that particular nucleotide (in my case, 40 being = 100% confidence). For example: if I were a DNA sequencer and I KNOW that the first three nucleotides are ACT but I'm only partially confident that the last nucleotide is a G. The .qual file would look something like this: 40 40 40 34. I hope this makes sense! Let me know if you need/would like further explanation!
#!/usr/bin/perl -w use strict; use warnings; use Bio::Seq; use Bio::SeqIO; use Bio::Seq::Quality; my $seq_count = 0; my $used_seqs; my $match; my $front; my $end; my $front_tag = 0; my $end_tag = 0; my $nfront = 0; my $nend = 0; my $no_match = 0; my $seq_file = "fastaTestData.fna"; my $qual_file = "qualTestData.qual"; my $seqIn_obj = Bio::SeqIO->new(-file => "$seq_file", -format => 'fast +a') or die "Can't access $seq_file\n"; #creates the fasta output file print "creating fasta output file\n"; my $fasta = Bio::SeqIO->new(-file => '>testFasta.fasta', -format => 'f +asta'); # creates new qual output file print "creating quality output file\n"; my $quality = Bio::SeqIO->new(-file => '>testQual.qual', -format => 'q +ual'); #reads in the tag file as a fasta file print "Reading in tag file\n"; my $tag_obj = Bio::SeqIO->new(-file => "mids.fa", -format => 'fasta') +or die "Can't access mids.fa\n"; #adds the tag id and sequence into a hash print "creating tag hash\n"; my %tag_hash = (); while(my $tag = $tag_obj->next_seq){ # instantiates the hash map, using the tag id as the key and the t +ag sequence as the value. $tag_hash{$tag->id} = $tag->seq; } # reads in the quality files associated with the fasta files. print "reading in qual file\n"; my $qual_obj = Bio::SeqIO->new(-file => "$qual_file", -format => "qual +") or die "Can't access $qual_file"; print "creating qual hash\n"; my %qual_hash = (); while(my $qual_seq = $qual_obj->_next_qual){ # next line is pushing a reference to an array onto an array of sc +alar array references $qual_hash{$qual_seq->id()} = $qual_seq->qual(); } # reads in the next sequence (next_seq) and stores it in $seqIn_obj th +en prints it out. print "Applying regexs\n"; while (my $seq = $seqIn_obj->next_seq){ print "getting new sequence\n"; $seq_count += 1; $match = 0; $front = 0; $end = 0; my $temp_seq = $seq->seq; while((my $id, my $tag_seq) = each(%tag_hash)){ print "cycling through tag hash\n"; # reverses the tag_seq my $rtag_seq = reverse $tag_seq; for(my $i = 0; $i <= 1; $i += 1){ if($temp_seq =~ s/^$tag_seq//i){ print "removing tag\n"; $match += 1; $front += 1; $front_tag = 1; } if($temp_seq =~ s/^$rtag_seq//i){ print "removing tag\n"; $match += 1; $front += 1; $front_tag = 1; } if($temp_seq =~ s/$tag_seq$//i){ print "removing tag\n"; $match += 1; $end += 1; $end_tag = 1; } if($temp_seq =~ s/$rtag_seq$//i){ print "removing tag\n"; $match += 1; $end += 1; $end_tag = 1; } } } if($front > 1){ $nfront += 1; } if($end > 1){ $nend += 1; } if($match == 0){ $no_match += 1; } if($front <= 1 && $end <= 1 && $match != 0){ print "removing 6 nts and amending qual sequence\n"; $used_seqs += 1; my @qual = @{$qual_hash{$seq->id()}}; print ("@qual\n"); my $qual_string = join(" ", @qual); print "$qual_string\n"; $qual_string =~ s///; print "$qual_string\n"; if($front_tag == 1){ $qual_string =~ s/^\w{32}//; $temp_seq =~ s/^\w{6}//; $front_tag = 0; } elsif($end_tag == 1){ $qual_string =~ s/\w{32}$//; $temp_seq =~ s/\w{6}$//; $end_tag = 0; } # creates the new sequence and passes the metadata to the sequ +ence object print "creating fasta seq and writing to file\n"; my $new_seq = Bio::Seq->new(-seq => $temp_seq, -display_id => +$seq->id, -length => $seq->length, -desc => $seq->desc, -alphabet => $seq->alphabet); # writes the new sequence object to the specified file $fasta->write_seq($new_seq); print "creating qual seq and writing to file\n"; my $new_qual = Bio::Seq::Quality->new(-seq => $qual_string, -i +d => $seq->id(), -desc => $seq->desc); $quality->write_seq($new_qual); } } print("nfront = $nfront\nnend = $nend\nno_match = $no_match\nseq_count + = $seq_count\nused_seqs = $used_seqs\n");
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^3: .qual File Writing Complication
by ww (Archbishop) on Jul 08, 2010 at 21:09 UTC | |
|
Re^3: .qual File Writing Complication
by graff (Chancellor) on Jul 09, 2010 at 01:20 UTC | |
|
Re^3: .qual File Writing Complication
by furry_marmot (Pilgrim) on Jul 09, 2010 at 04:55 UTC | |
|
Re^3: .qual File Writing Complication
by cjfields (Novice) on Jul 12, 2010 at 20:08 UTC |