#!/usr/bin/perl -w

use strict;
use warnings;
#Initiate all variables, hashes and co

my @folders;
my $folder;
my $file;
my $tocount;

my $reads;

my $trftable;
my $head;
my @line;
my $line;

my %hash;
my $tRNAname;
my @tRF_types;
my $tRF_type;



#Open folders in working directy

@folders=glob("*"); #to get all folders in directory; extension ("*") as wildcard to get all names
foreach$folder(@folders) #to speak to each element in directory
	{
	next if ($folder!~/^UNITAS_/); #skip elements which do not start with "UNITAS"
	opendir(DIR,$folder)||die print$!; #open folder, end script when opening is not possible (DIR is the "filehandle" for the directory)
	print"\n$folder";
	while($file=readdir(DIR)) #returns content of folder
		{
		next if($file!~/\.mapped_sequences$/); #get the mapped_sequences file we need to read out the reads
		print"\n$file"; #print out file names to make sure we get the right files

		$reads = 0; #set the number of reads to 0 for each run

		open(FILE,"$folder/$file")||die print$!; #open file
		while($tocount=<FILE>)#read file
			{
			$tocount =~ s/>//g; #remove all ">"
			next if ($tocount =~ /[A-Za-z]/); #skip lines which contain the sequence
					
			if ($tocount =~ /[0-9]/) #get the read-number
				{
				print"\n$tocount";
				$reads = ($reads + $tocount); # add up all reads
				}
			print"\n$reads";

			}
		close FILE;

		$trftable = 'unitas.tRF-table.txt'; #save file name in variable
		open(TRF,"$folder/$trftable"); #open trf-table.txt

		$head=<TRF>; #remove the first four lines of the trf-table.txt file
		$head=<TRF>;
		$head=<TRF>;
		$head=<TRF>;

		%hash = (); #initiate empty hash

		while($line=<TRF>)
			{
			@line=split("\t",$trftable);
		
			if($line[0]=~s/tRNA-[^-]+-...//) # "tRNA-"(matched tRNA und -) "[^-]+" beginning bis Ende, egal was "-..."(weiterer Strich bis Ende)
				{
				$tRNAname=$line[0];
				$tRNAname=$&; # "$&" = last pattern match
				print"\n$tRNAname";
				}
			else
				{
				$tRNAname=$line[0];
				$tRNAname=~s/-ENS.+$//; # "-ENS.+$" ( matched allen die -ENS. bis Ende enthalten)
				print"\n$tRNAname";
				}
		
			$hash{$tRNAname}{"5p-tR-halves"}+=$line[1]/$reads*1000000;
			$hash{$tRNAname}{"5p-tRFs"}+=$line[3]/$reads*1000000;
			$hash{$tRNAname}{"3p-tR-halves"}+=$line[5]/$reads*1000000;
			$hash{$tRNAname}{"3p-CCA-tRFs"}+=$line[7]/$reads*1000000;
			$hash{$tRNAname}{"3p-tRFs"}+=$line[9]/$reads*1000000;
			$hash{$tRNAname}{"tRF-1"}+=$line[11]/$reads*1000000;
			$hash{$tRNAname}{"tRNA-leader"}+=$line[13]/$reads*1000000;
			$hash{$tRNAname}{"misc-tRFs"}+=$line[15]/$reads*1000000;
			}


		open(MERGE,">merge"); #open new file to save the new sortet stuff in
		
		@tRF_types=("5p-tR-halves","5p-tRFs","3p-tR-halves","3p-CCA-tRFs","3p-tRFs","tRF-1","tRNA-leader","misc-tRFs");
		foreach$tRNAname(sort{$a cmp $b}keys%hash) #sortiert die alphabetisch nach keys
			{
			print MERGE $tRNAname; # print tRNA name
			foreach$tRF_type(@tRF_types) 
				{
				print MERGE"\t$hash{$tRNAname}{$tRF_type}"; # print counts for each tRF type separated by tab
				}
			print MERGE"\n";# print newline
			}

		close TRF;
		close MERGE;
		close DIR;
		}
	}