Thanks everyone that helped me to get this done. I was finally able to do the shared word count on my own! Thanks again for all your time and help! And apl Trivial is a very relative term!!! I spent 20 hours to get the counting piece done. :)
$filename = "tryit.txt";
open(IN, $filename) || die;
open(OUT, ">test1.csv") || die;
open(OUT1, ">test2.csv") || die;
my %freqs;
my $story; # Current story name
while (<IN>) {
if (/^\<(.*)\>\s*$/) {
$story = ucfirst $1;
die "Duplicate story title: $story" if exists $freqs{$story};
next;
}
next unless defined $story; # wait until we have a story title
s/[\.,:;\?"!\(\)\[\]\{\}(--)_]//g;
for my $word (/\w+/g) {
# Current story counts
$word = ucfirst $word;
$freqs{$word}{$story}++;
$freqs{all}{$story}++;
# Total counts
$freqs{$word}{total}++;
$freqs{all}{total}++;
}
}
# Print title line
print OUT "\t", (join "\t", sort keys %{$freqs{all}}), "\n";
# Print table
for my $word (sort keys %freqs) {
$freqs{$word}{$_} ||= 0 for keys %{$freqs{all}};
printf OUT "$word\t";
print OUT join "\t", join "\t", map $freqs{$word}{$_}, sort keys %{
+$freqs{$word}};
print OUT "\n";
}
@info=sort keys %{$freqs{all}};
my @countop;
for($i=0;$i<scalar(@info)-1;$i=$i+1)
{
$story1=$info[$i];
for($j=0;$j<scalar(@info)-1;$j=$j+1)
{
$story2=$info[$j];
for my $word (sort keys %freqs) {
$freqs{$word}{$_} ||= 0 for keys %{$freqs{all}};
if($freqs{$word}{$story1} > 0){
if($freqs{$word}{$story2} > 0){
$countop[$i][$j]++;}}
}
}
}
my $m;
my $n;
for($m=0;$m<scalar(@info)-1;$m=$m+1)
{
for($n=0;$n<scalar(@info)-1;$n=$n+1)
{
printf OUT1 "$info[$m],$info[$n],$countop[$m][$n] \n";
}
}
close IN;
close OUT;
close OUT1;
|