#!/usr/bin/perl use strict; use warnings; my $file = $ARGV[0]; my @file = split(/\./,$file); my $name = $file[0]; my $splitcolumn = $ARGV[1]; # splitcolumn value is 2 if ($#ARGV != 1 ) { print "Usage: Program_name filename column_number\n"; exit; } open (X, ">", $name."_detailed_info.txt"); open (Y, ">", "temp_".$name."_concatenate.dat"); open (ERR, ">",$name."_log.file"); my @alphabets = ("A".."H"); my @numbers = (1..12); my $cellno = 1; my $count=1; my $alphacounter=0; my $numcounter = 0; my $targetlen = 19; my $errseq=1; my $errlen =1; my @seq; #print X "#Name\tTarget_Seq(5'->3')\tTarget_len\tTarget_GC%\tShort_mRNA(5'->3')\tSense_Strand\tSS_len\tSS_GC%\tAntiSense_Strand(5'->3')\tAnit_Sense\tAS_len\n"; open (A, "<","$file") or die "Could not open the file, $!"; while(my $line = ) { chomp $line; if ($line =~/^\s*$/) {next;} my @temp = split(/\s/,$line); # my $seq = uc($temp[2]); #Target mRNA, length 19 bp my $seq = uc($temp[$splitcolumn]); #Target mRNA, length 19 bp my @tempseq = split (//,$seq); foreach my $tempseq(@tempseq) { if ($tempseq !~/[ATGC]/ig) { print ERR "Error: Target sequence # $errseq: [".$seq."] has bases other than ATGC.\n"; next; } } my $lenseq = length($seq); if ($lenseq < $targetlen || $lenseq > $targetlen) { print ERR "Error: Target sequence # $errseq: [".$seq."] is of ".length($seq)."-bp.\n"; next; } $errseq++; my $a = ($seq=~tr/A//); my $t = ($seq=~tr/T//); my $g = ($seq=~tr/G//); my $c = ($seq=~tr/C//); my $GCper = sprintf ("%.2f", (($g+$c)/$lenseq)*100); my $ATper = sprintf ("%.2f", (($a+$t)/$lenseq)*100); my $mrna = $seq; $mrna =~ s/T/U/g; $mrna =~ /\w{3}(.*)\w{1}/; my $shortmrna = $1; #Short mRNA, first 3 and last 1 base removed # print "$shortmrna\n"; my $lenshortmrna = length ($shortmrna); my $aa = ($shortmrna=~tr/A//); my $uu = ($shortmrna=~tr/U//); my $gg = ($shortmrna=~tr/G//); my $cc = ($shortmrna=~tr/C//); my $GCCper = sprintf ("%.2f", (($gg+$cc)/$lenshortmrna)*100); my $ATTper = sprintf ("%.2f", (($aa+$uu)/$lenshortmrna)*100); my $antisense = $seq; $antisense =~ tr/ATGCatgc/UACGuacg/; my $reverse = "AA".reverse($antisense); #Antisense strand made from target mRNA, added two A's at beginning. # print "$reverse\n"; my $lenreverse = length($reverse); print X "$name$count\t$seq\t$lenseq\t$GCper\t$shortmrna\tSS\t$lenshortmrna\t$GCCper\t$reverse\tAS\t$lenreverse\n"; #Detailed information print print Y "$name$count\tSS\t$shortmrna\n"; #tempfile print print Y "$name$count\tAS\t$reverse\n"; #tempfile print $count++; } close (A); close (X); close (Y);