#!user/local/perl #Created by C. Pells, M. R. Snyder, and N. T. Marshall 2017 #Script trims and merges high throughput sequencing reads from fastq files for a specific primer set use Cwd; use warnings; my $StartTime= localtime; my $MasterDir = getcwd; #obtains the current directory opendir (DIR, $MasterDir); my @objects = readdir (DIR); closedir (DIR); foreach (@objects){ print $_,"\n"; } my @Dirs = (); foreach my $O (0..$#objects){ my $CurrDir = ""; if ((length ($objects[$O]) < 7) && ($O>1)){ #Checking if the length of the object name is < 7 characters. All samples are 6 or less. removing the first two elements: "." and ".." $CurrDir = $MasterDir."/".$objects[$O]; #appends directory name to full path push (@Dirs, $CurrDir); } } foreach (@Dirs){ print $_,"\n";#checks that all directories were read in } foreach my $S (0..$#Dirs){ my @files = (); opendir (DIR, $Dirs[$S]) || die "cannot open $Dirs[$S]: $!"; @files = readdir DIR; #reads in all files in a directory closedir DIR; my @AbsFiles = (); foreach my $F (0..$#files){ my $AbsFileName = $Dirs[$S]."/".$files[$F]; #appends file name to full path push (@AbsFiles, $AbsFileName); } foreach my $AF (0..$#AbsFiles){ if ($AbsFiles[$AF] =~ /_R2_001\.fastq$/m){ #finds reverse fastq file my @readbuffer=(); #read in reverse fastq my %RSeqHash; my $c = 0; print "Reading, reversing, complimenting, and trimming reverse fastq file $AbsFiles[$AF]\n"; open (INPUT1, $AbsFiles[$AF]) || die "Can't open file: $!\n"; while (){ chomp ($_); push(@readbuffer, $_); if (@readbuffer == 4) { $rsn = substr($readbuffer[0], 0, 45); #trims reverse seq name $cc++ % 10000 == 0 and print "$rsn\n"; $RSeqHash{$rsn} = $readbuffer[1]; @readbuffer = (); } } } } foreach my $AFx (0..$#AbsFiles){ if ($AbsFiles[$AFx] =~ /_R1_001\.fastq$/m){ #finds forward fastq file print "Reading forward fastq file $AbsFiles[$AFx]\n"; open (INPUT2, $AbsFiles[$AFx]) || die "Can't open file: $!\n"; my $OutMergeName = $Dirs[$S]."/"."Merged.fasta"; open (OUT, ">", "$OutMergeName"); my $cc=0; my @readbuffer = (); while (){ chomp ($_); push(@readbuffer, $_); if (@readbuffer == 4) { my $fsn = substr($readbuffer[0], 0, 45); #trims forward seq name #$cc++ % 10000 == 0 and print "$fsn\n$readbuffer[1]\n"; if ( exists($RSeqHash{$fsn}) ){ #checks to see if forward seq name is present in reverse seq hash print "$fsn was found in Reverse Seq Hash\n"; print OUT "$fsn\n$readbuffer[1]\n"; #ACUAL OUTPUT FILE IS EMPTY!!! } else { $cc++ % 10000 == 0 and print "$fsn not found in Reverse Seq Hash\n"; #PRINTS THIS FOR EVERY LINE IN INPUT2!!! } @readbuffer = (); } } close INPUT1; close INPUT2; close OUT; } } }