#!/usr/bin/perl # word counting program use strict; use warnings; use autodie; # list of excluded words my @excluded = qw( a about although also an and another are as at be been before between but by can do during for from has how however in into is it many may more most etc ); # list of excluded characters my @excluded_chars = ( "\\'", "\\:", "\\@", "\\-", "\\~", "\\,", "\\.", "\\(", "\\)", "\\?", "\\*", "\\%", "\\/", "\\[", "\\]", "\\=", '"' ); my %count; # this will contain many words while (<>) { foreach (split) { s/ ([A-Z]) /\L$1/gx; # lowercase each word # remove non-letter characters foreach my $char (@excluded_chars) { $_ =~ s/$char//g; } # remove excluded words foreach my $word (@excluded) { $_ =~ s/\b$word\b//g; } $count{$_}++; # count each separate word } } foreach my $word (sort { $count{$a} <=> $count{$b} or $a cmp $b } keys %count) { print "$count{$word} $word\n"; } #### #!/bin/bash # input a file name like this: # # count_mem.sh filename.txt # if [ $# -eq 0 ]; then echo "example usage: $(basename $0) file.txt" >&2 exit 1 elif [ $# -ge 2 ]; then echo "too many arguments" >&2 exit 2 fi sed s/' '/\\n/g "$1" | tr -d '[\.[]{}(),\!\\'\'''\"'\`\~\@\#\$\%\^\&\*\+\=\|\;\:\<\>\?]' | tr [:upper:] [:lower:] | sed "s/\blong\b//gi" | sed "s/\blist\b//gi" | sed "s/\bof\b//gi" | sed "s/\bexcluded\b//gi" | sed "s/\bwords\b//gi" | sed "s/\bhere\b//gi" | sed '/^$/d' | sort | uniq -c | sort -nr | less