#!/usr/bin/perl use strict; use warnings; my ($rna,$rnalen,$posi, $pos, $base, $dibase,$percent); my (%bases, %occur, %count); my($max_position, $sum, $tot) = 0; my @dinucleotide = qw(A C G T);#change the number of bases my $file = $ARGV[0] or die "Please provide the input file\n"; my $column = $ARGV[1]; open (X, $file) || die "Check the input $file\n"; while () { # chomp $_; if ($_=~/^#/) { next; } if ($_=~/^\s+/) { next; } if ($_=~/^CLUSTAL/) { next; } if ($_=~/\*/) { next; } my @temp = split (/\s+/, $_); $rna = $temp[$column]; $rna = uc $rna; $rnalen = length($rna); my @nucleotides = split (//,$rna); $tot++; foreach my $key(@dinucleotide) { $bases{$key}=1; $sum++; } $max_position = $rnalen if($rnalen > $max_position); for (my $position=0;$position<=$rnalen-1;$position++) { $dibase = substr($rna, $position, 1); #change the splitting number foreach my $dinucleotide(@dinucleotide) { if ($dibase eq $dinucleotide){ $posi = $position+1; $occur{$posi,$dibase}++; # print "$posi\t$dibase\n"; } } } } for($pos=1;$pos<=$max_position;$pos++) { print "$pos\t"; foreach $base(sort keys %bases) { print "$base\t"; if(exists $occur{$pos,$base}) { my $percent = sprintf ("%0.1f", (($occur{$pos,$base})/$tot)*100); # print "$occur{$pos,$base}\t"; print "$percent\t"; } else { print "0\t"; } } print "\n"; } close (X);