Most frequently occuring word no of times occured second most frequent word no of times occured .... .... .... .... #### %count = (); foreach $element (@words) { $count{$element}++; } while ( ($k,$v) = each %count ) { print "$k => $v\n"; } #### the 150 it 85 we 60 are 40 #### sub lexicon_generate { open CP, 'tcorpus.txt' or die $!; #Open file. my @words; while(){ chomp; push @words,split; } close CP; #print "\n@words\n"; $lwords=@words; #print "\n$lwords"; for($i=0;$i<$lwords;$i++) { #print "\nThis is the next token:"; #print "\n$words[$i]"; } #Remove punctuation marks. foreach my $item(@words){ $item=~ tr/*//d; $item=~ tr/(//d; $item=~ tr/)//d; $item=~ tr/""//d; $item=~ tr/''//d; $item=~ tr/?//d; $item=~ tr/,//d; $item=~ tr/. //d; $item=~ tr/-//d; $item=~ tr/"//d; $item=~ tr/'//d; $item=~ tr/!//d; $item=~ tr/;//d; $item= '' unless defined $item; #print "\nThe token after removing punctuation marks:"; #print "\n$item\n"; } #Number of words in @words before removing duplicates. $lnwords=@words; #print "\n$lnwords"; foreach my $final_thing(@words){ #print "$final_thing\n"; } #Remove duplicate strings. my %seen = (); my @uniq = (); foreach my $u_thing(@words) { unless ($seen{$u_thing}) { #if we get here, we have not seen it before $seen{$u_thing} = 1; push (@uniq,$u_thing); } } #print"\nThe unique list:"; #print "\n@uniq"; #Number of words in @words after removing duplicates. $luniq=@uniq; #print "\n$luniq"; open LEX,'>tcorpus_unique.txt' or die $!; foreach my $u_elt(@uniq){ #print "\n$u_elt"; print LEX "\n$u_elt"; } close LEX; } &lexicon_generate();