#!/usr/bin/perl use strict; use warnings; use Syntax::Construct qw{ // }; my $endpos = 0; my ($startpos, $count); my %occurrences; my $file = $ARGV[0]; open(DATA, $file); while () { if (/^CLUSTAL.*/) {next;} if (/^ +$/) { $startpos = $endpos + 1; $count = 0; } elsif (/\s+ ([-actg]+) \s*$/x) { ++$count; my @nucleotides = split //, $1; $endpos = $endpos + length $1 if $startpos == $endpos + 1; for my $pos (0 .. $#nucleotides) { ++$occurrences{ $nucleotides[$pos] }[$startpos + $pos] unless '-' eq $nucleotides[$pos]; } } } for my $pos (1 .. $endpos) { print "$pos\t"; for my $nucleotide (sort keys %occurrences) { printf "%s\t%0.1f\t", uc $nucleotide, 100 * ($occurrences{$nucleotide}[$pos] // 0) / $count; } print "\n"; } #### CLUSTAL O(1.2.1) multiple sequence alignment gnl|hbvcds|AB014370_PreC_P-A ------------------------------------------------------------ gnl|hbvcds|AB064314_PreC_P-A ------------------------------------------------------------ gnl|hbvcds|AB014384_C_P-C ------------------------------------------------------------ gnl|hbvcds|AB014385_C_P-C ------------------------------------------------------------ gnl|hbvcds|AB048701_PreS1_P-D atggggcagaatctttccaccagcaatcctctgggattctttcccgaccatcagttggat gnl|hbvcds|AB078031_PreS1_P-D atggggcagaatctttccaccagcaaccctctgggattctttcccgaccaccagttggat gnl|hbvcds|AB030513_S_P-A -----------------------------------------------------------a gnl|hbvcds|AB064314_S_P-A -----------------------------------------------------------c gnl|hbvcds|AB194947_PreS2_P-E -----------------------------------------------------------g gnl|hbvcds|AB194948_PreS2_P-E -----------------------------------------------------------g gnl|hbvcds|AB014370_PreC_P-A tagagtctcctgagcattgctcacctcaccatactgcactcaggcaagccattctctgct gnl|hbvcds|AB064314_PreC_P-A tagagtctcctgagcattgctcacctcaccatacggcactcaggcaagccattctctgct gnl|hbvcds|AB014384_C_P-C tagagtctccggaacattgttcacctcaccatacagcactcaggcaagctattctgtgtt gnl|hbvcds|AB014385_C_P-C tagagtctccggaacattgttcacctcaccatacagcactcaggcaagctattctgtgtt gnl|hbvcds|AB048701_PreS1_P-D gggtttttcttgttgacaagaatcctcacaataccgcagagtctagactcgtggtggact gnl|hbvcds|AB078031_PreS1_P-D gggtttttcttgttgacaagaatcctcacaataccgcagagtctagactcgtggtggact gnl|hbvcds|AB030513_S_P-A gggtttttcttgttgacaagaatcctcacaataccgcagagtctagactcgtggtggact gnl|hbvcds|AB064314_S_P-A gggtttttcttgttgacaagaatcctcacaataccgcagagtctagactcgtggtggact gnl|hbvcds|AB194947_PreS2_P-E gggtttttcttgttgacaaaaatcctcacaataccgcagagtctagactcgtggtggact gnl|hbvcds|AB194948_PreS2_P-E gggtttttcttgttgacaaaaatcctcacaataccgcagagtctagactcgtggtggact * * ** * * ****** **** *** * * * *