#!/usr/bin/perl
# word counting program
use strict;
use warnings;
use autodie;

# list of excluded words
my @excluded = qw( a about although also an and another are as at be been
    before between but by can do during for from has how however in into is
    it many may more most etc );

# list of excluded characters
my @excluded_chars = ( "\\'", "\\:", "\\@", "\\-", "\\~", "\\,", "\\.", "\\(",
    "\\)", "\\?", "\\*", "\\%", "\\/", "\\[", "\\]", "\\=", '"'
    );

my %count;  # this will contain many words

while (<>) {
    foreach (split) {
        s/ ([A-Z]) /\L$1/gx;  # lowercase each word
        # remove non-letter characters
        foreach my $char (@excluded_chars) {
            $_ =~ s/$char//g;
        }
        # remove excluded words
        foreach my $word (@excluded) {
            $_ =~ s/\b$word\b//g;
        }
        $count{$_}++;  # count each separate word
    }
}

foreach my $word (sort { $count{$a} <=> $count{$b} or $a cmp $b } keys %count) {
    print "$count{$word} $word\n";
}

##</code><code>##

#!/bin/bash
# input a file name like this:
#
# count_mem.sh filename.txt
#

if [ $# -eq 0 ]; then
    echo "example usage: $(basename $0) file.txt" >&2
    exit 1
elif [ $# -ge 2 ]; then
    echo "too many arguments" >&2
    exit 2
fi

sed s/' '/\\n/g "$1" | tr -d '[\.[]{}(),\!\\'\'''\"'\`\~\@\#\$\%\^\&\*\+\=\|\;\:\<\>\?]' | tr [:upper:] [:lower:] | 
sed "s/\blong\b//gi" | 
sed "s/\blist\b//gi" | 
sed "s/\bof\b//gi" | 
sed "s/\bexcluded\b//gi" | 
sed "s/\bwords\b//gi" | 
sed "s/\bhere\b//gi" | 
sed '/^$/d' | 
sort | uniq -c | sort -nr | less