Like this?: #! perl -slw
use strict;
use Data::Dump qw[ pp ];
my @freq;
my @data = qw[ AABBC BAABC AABBD AACBB ];
for my $s ( @data ) {
++$freq[ $_ ]{ substr $s, $_, 1 } for 0 .. length( $s ) -1;
}
pp \@freq;
for my $pos ( @freq ) {
( $pos->{ $_ } //= 0 ) /= 4 for 'A' .. 'D';
}
pp \@freq;
__DATA__
C:\test>1162755
[
{ A => 3, B => 1 },
{ A => 4 },
{ A => 1, B => 2, C => 1 },
{ B => 4 },
{ B => 1, C => 2, D => 1 },
]
[
{ A => 0.75, B => 0.25, C => 0, D => 0 },
{ A => 1, B => 0, C => 0, D => 0 },
{ A => 0.25, B => 0.5, C => 0.25, D => 0 },
{ A => 0, B => 1, C => 0, D => 0 },
{ A => 0, B => 0.25, C => 0.5, D => 0.25 },
]
Perhaps this is closer to what you are after (Updated: corrected output ordering):
#! perl -slw
use strict;
use Data::Dump qw[ pp ];
my @freq;
my @data = qw[ AABBC BAABC AABBD AACBB ];
for my $s ( @data ) {
++$freq[ $_ ]{ substr $s, $_, 1 } for 0 .. length( $s ) -1;
}
##pp \@freq;
for my $pos ( @freq ) {
( $pos->{ $_ } //= 0 ) /= 4 for 'A' .. 'D';
}
##pp \@freq;
print join "\t", '', 'A'..'D';
for my $pos ( 0 .. $#freq ) {
printf "%2d\t%s\n", $pos+1, join "\t", map sprintf("%.2f", $_ ),
+ @{ $freq[ $pos ] }{ 'A' .. 'D' };
}
__DATA__
C:\test>1162755
A B C D
1 0.75 0.25 0.00 0.00
2 1.00 0.00 0.00 0.00
3 0.25 0.50 0.25 0.00
4 0.00 1.00 0.00 0.00
5 0.00 0.25 0.50 0.25
Another variations that doesn't hard code the keys: #! perl -slw
use strict;
use Data::Dump qw[ pp ];
my( @freq, %c, $c );
#my @data = qw[ AABBC BAABC AABBD AACBB ];
my @data = qw[ AABBC BAABC AABBD AECBBF ];
for my $s ( @data ) {
++$freq[ $_ ]{ $c = substr $s, $_, 1 }, undef $c{ $c } for 0 .. le
+ngth( $s ) -1;
}
##pp \@freq;
my @oK = sort keys %c;
for my $pos ( @freq ) {
( $pos->{ $_ } //= 0 ) /= 4 for @oK;
}
##pp \@freq;
print join "\t", '', @oK;
for my $pos ( 0 .. $#freq ) {
printf "%2d\t%s\n", $pos+1, join "\t", map sprintf("%.2f", $_ ),
+ @{ $freq[ $pos ] }{ @oK };
}
__DATA__
C:\test>1162755
A B C D E F
1 0.75 0.25 0.00 0.00 0.00 0.00
2 0.75 0.00 0.00 0.00 0.25 0.00
3 0.25 0.50 0.25 0.00 0.00 0.00
4 0.00 1.00 0.00 0.00 0.00 0.00
5 0.00 0.25 0.50 0.25 0.00 0.00
6 0.00 0.00 0.00 0.00 0.00 0.25
With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'
Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
In the absence of evidence, opinion is indistinguishable from prejudice.
| [reply] [d/l] [select] |
Awesome, thanks! Lots of things for me to learn here.
| [reply] |
Lots of things for me to learn here.
Feel free to ask questions :)
It's much quicker and easier to answer your specific queries, than to waste my time, and bore you, with my explanations of all the things I think you might not understand, only to miss the ones that you don't.
With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'
Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
In the absence of evidence, opinion is indistinguishable from prejudice.
| [reply] |
Just for interest, here's a Perl 6 solution that works for arbitrary sets of input characters and arbitrary input lengths:
#! /usr/bin/env perl6
use v6;
my @data = < AABBC BAABC AABBD AACBB >;
# Use mixhashes (self-totalling, and they default to zero)
my @freq = MixHash.new xx max @data».chars;
# Count everything
for @data».comb -> @chars {
for @chars.kv -> $pos, $char {
@freq[$pos]{$char}++;
}
}
# Column labels
my @labels = @data.join.comb.unique.sort;
say join "\t", '', @labels;
# Table rows
for @freq.kv -> $pos, %score {
say join "\t",
($pos+1).fmt("%2d"),
%score{@labels}.map( * / %score.total )».fmt("%.2f")
}
...and the output:
A B C D
1 0.75 0.25 0.00 0.00
2 1.00 0.00 0.00 0.00
3 0.25 0.50 0.25 0.00
4 0.00 1.00 0.00 0.00
5 0.00 0.25 0.50 0.25
Or with:
my @data = < AABBC BAABC AABBD AECBBF >;
...you get:
A B C D E F
1 0.75 0.25 0.00 0.00 0.00 0.00
2 0.75 0.00 0.00 0.00 0.25 0.00
3 0.25 0.50 0.25 0.00 0.00 0.00
4 0.00 1.00 0.00 0.00 0.00 0.00
5 0.00 0.25 0.50 0.25 0.00 0.00
6 0.00 0.00 0.00 0.00 0.00 1.00
| [reply] [d/l] [select] |
#!/usr/bin/perl
# http://perlmonks.org/?node_id=1162755
use strict;
use warnings;
my %score;
chomp(my @array = <DATA>);
for my $i (1..@array)
{
$score{$1}[$-[0]] += 1/@array while $array[$i - 1] =~ /(.)/g;
}
printf " " . "%5s " x @array . "\n", sort keys %score;
for my $pos ( 1..length $array[0] )
{
printf "%1d" . "%7.2f" x @array . "\n",
$pos, map { $score{$_}[$pos - 1] // 0 } sort keys %score;
}
__DATA__
AABBC
BAABC
AABBD
AACBB
| [reply] [d/l] |
Thanks! Interesting approach. I don't quite understand the use of the $-[0] special variable here - would you mind explaining?
| [reply] |
| [reply] |
use strict;
use warnings;
my @strings = qw{
AABABC
BAABEC
AABFBD
AACBDB
CBBDEF
};
my $div = scalar @strings;
my @stringAoA =
map { [ split m{} ] }
@strings;
my %letters;
$letters{ $_ } ++ for map { @{ $_ } } @stringAoA;
my %scores;
for my $posn ( 1 .. length $strings[ 0 ] )
{
for my $row ( 0 .. $#stringAoA )
{
$scores{ $posn }->{ $stringAoA[ $row ]->[ $posn - 1 ] } ++;
}
}
printf
qq{%8s@{ [ q{%8s} x scalar keys %letters ] }\n},
q{},
sort keys %letters;
for my $posn (sort { $a <=> $b } keys %scores )
{
printf
qq{ %8d@{ [ q{%8.2f} x scalar keys %letters ] }\n},
$posn,
map {
defined $scores{ $posn }->{ $_ }
? $scores{ $posn }->{ $_ } / $div
: 0
} sort keys %letters
}
The output.
A B C D E F
1 0.60 0.20 0.20 0.00 0.00 0.00
2 0.80 0.20 0.00 0.00 0.00 0.00
3 0.20 0.60 0.20 0.00 0.00 0.00
4 0.20 0.40 0.00 0.20 0.00 0.20
5 0.00 0.40 0.00 0.20 0.40 0.00
6 0.00 0.20 0.40 0.20 0.00 0.20
I hope this is useful.
| [reply] [d/l] [select] |
Just for my own amusement a oneliner solution:
# warning windows doublequotes
perl -lnE "$ar[0]++; $ar[pos]{$1}++ while /(.)/g;
END{ foreach $row (1..$#ar){print join qq(\t),$row,map{$_,sprintf
+('%.2f',$ar[$row]{$_}/$ar[0] )} sort keys %{$ar[$row]}} }" freq.txt
1 A 0.75 B 0.25
2 A 1.00
3 A 0.25 B 0.50 C 0.25
4 B 1.00
5 B 0.25 C 0.50 D 0.25
The datastructure created is an array where the first element, $ar[0] is a scalar used to hold how many lines we processed. This is because you does not need to track char at position 0, pos starting from 1. Other elements are anonymous hashes where keys are your chars and values are occurrences found (at the position given by the current index of the @ar array we are processing).
See the datastructure with the help of Data::Dump:
perl -MData::Dump -lnE "$ar[0]++; $ar[pos]{$1}++ while /(.)/g;
END{ dd @ar }" freq.txt
(
4, # el 0 is the lines count
{ A => 3, B => 1 }, # el 1 contains occurences found at p
+osition 1
{ A => 4 }, # el 2 .. so on
{ A => 1, B => 2, C => 1 },
{ B => 4 },
{ B => 1, C => 2, D => 1 },
)
Deparsing the first oneliner you can see the whole picture, commented:
perl -MO=Deparse -lnE "$ar[0]++; $ar[pos]{$1}++ while /(.)/g;
END{ foreach $row (1..$#ar){print join qq(\t),$row,map{$_,sprintf
+('%.2f',$ar[$row]{$_}/$ar[0] )} sort keys %{$ar[$row]}}}" freq.txt
BEGIN { $/ = "\n"; $\ = "\n"; } # implicit initialization
BEGIN {
$^H{'feature_unicode'} = q(1);
$^H{'feature_say'} = q(1);
$^H{'feature_state'} = q(1);
$^H{'feature_switch'} = q(1);
}
# our program:
LINE: while (defined($_ = <ARGV>)) { # reading all files because of pe
+rl -n
chomp $_; # automatic handling of end of li
+ne given by perl -l
++$ar[0]; # el 0 keeps track of line proces
+sed
++$ar[pos $_]{$1} while /(.)/g; # /(.)/g return all char setting
+$1 to
# the char and making pos returni
+ng it's position
# so with ++ we augment occurence
+s of char given by $1
# found at position given by pos
sub END {
foreach $row (1 .. $#ar) { # now we process rows of the arra
+y starting
# from 1, because position coinci
+de with array index
print join("\t", # joining all following with a ta
+b
$row, # the row is equal to the positio
+n in the string
map({ # then foreach key of the hash (t
+he el. $row of @a)
$_, # the sorted key
sprintf('%.2f', $ar[$row]{$_} / $a
+r[0]); # it's value divided
+ # by linecount, formatted
} sort(keys %{$ar[$row];})));
}
}
;
}
-e syntax OK
L*
There are no rules, there are no thumbs..
Reinvent the wheel, then learn The Wheel; may be one day you reinvent one of THE WHEELS.
| [reply] [d/l] [select] |