#!/usr/bin/perl use strict; # quality trimming of fastq file print "Write the window length\n"; my $kmer = 3;#; chomp ($kmer); print "Write the minimum quality score cut-off\n"; my $cut_off = 21;#; chomp ($cut_off); my $file = "lib-pool-fosmid.fastq"; open FASTQ,'<',$file or die "Could not open $file : $!"; open OUT,'>','trimmed_'.$file; my @seq = (); while (my $header1 = ){ # input 4 line records my @rec = (); $rec[0] = $header1; do { $rec[$_] = } for (1..3); chomp(@rec); # process @rec = trim(@rec); # output print "\nOutput written to 'trimmed_$file'\n"; print "$_\n" for @rec; print OUT "$_\n" for @rec; } # trim sub trim { my @rec = @_; my @dna = split '', $rec[1]; my @qual = split '', $rec[3]; my @score = map{ ord($_)-33 } @qual; # lower quality from start my $start = 0; for (@score){ last if ($_ >= $cut_off); ++$start; } # lower quality from end my $end = @qual -1; for (reverse @score){ last if ($_ >= $cut_off); --$end; } print "Start=$start End=$end\n"; # trim @dna = @dna[$start..$end]; @qual = @qual[$start..$end]; @score = @score[$start..$end]; print "Trimmed ".join '',@dna,"\n"; # check each window print "\nWindows\n"; my @final_dna; my @final_qual; while (@dna > 0){ my @dna1 = splice @dna,0,$kmer; my @qual1 = splice @qual,0,$kmer; my @score1 = splice @score,0,$kmer; my $sum = 0; $sum += $_ for @score1; my $avg = $sum/@score1; print "@dna1 | @qual1 | @score1 | Sum=$sum Avg=$avg\n"; if ($avg > $cut_off){ push @final_dna,@dna1; push @final_qual,@qual1; } } $rec[1] = join '',@final_dna; $rec[3] = join '',@final_qual; return @rec; } __DATA__ @SEQ_ID GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT + !''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65