/* -*- mode: c -*- * $Id: wordfreq.gcc,v 1.5 2001/09/18 17:25:18 doug Exp $ * http://www.bagley.org/~doug/shootout/ * Changed by Adrian Merrill 2001/08/22 */ #include #include #include #include #include #include "../../Include/simple_hash.h" #define QUICKIE_STRCMP(a, b) (*(a) != *(b) ? *(a) - *(b) : strcmp((a), (b))) typedef int (*comparator)(const void *, const void *); int cmp_hash(struct ht_node **a, struct ht_node **b) { int val = (*b)->val - (*a)->val; return((val == 0) ? QUICKIE_STRCMP((*b)->key, (*a)->key) : val); } int main() { int readbufsize = 4096; int wordbufsize=16; char *readbuf = (char *)malloc(readbufsize + 1); char *wordbuf = (char *)malloc(wordbufsize + 1); int i = 0; struct ht_ht *ht = ht_create(2048); struct ht_node **sort_array, **sort_tmp, *node; /*new code*/ int nread =0; int wordlen=0; readbuf[0]=0; while (readbuf[i] > 0||(nread = fread(readbuf, sizeof(char), readbufsize, stdin),readbuf[nread] = '\0',i=0,nread > 0) ) { if (isalpha(readbuf[i])){ wordbuf[wordlen++] = tolower(readbuf[i]); if (wordlen == wordbufsize) { wordbufsize *= 2; wordbuf = realloc(wordbuf, wordbufsize + 1); } } else{ if (wordlen > 0) { wordbuf[wordlen] = '\0'; ++(ht_find_new(ht, wordbuf)->val); wordlen = 0; } } i++; } free(readbuf); free(wordbuf); sort_array = sort_tmp = malloc(sizeof(struct ht_node *) * ht_count(ht)); for (node=ht_first(ht); (*sort_tmp++ = node) != 0; node=ht_next(ht)) ; qsort(sort_array, ht_count(ht), sizeof(struct ht_node *), (comparator)cmp_hash); for (i=0; i

##

// -*- mode: c++ -*-
// $Id: wordfreq.g++,v 1.5 2001/07/21 23:51:05 doug Exp $
// http://www.bagley.org/~doug/shootout/
// By Tamás Benkő

#include 
#include 
#include 
#include 
#include 
#include 

using namespace std;

int const bufsize = 4096;
int const wsize = 64;

class word_reader
{
    int ws;
    char buf[bufsize+1], *bptr, *word;
    FILE *input;

    bool fill();

public:
    word_reader(FILE *i): ws(wsize), bptr(buf), word(new char[ws+1]), input(i)
	{*bptr = *word = '\0';}
    int operator()(char const **);
};

inline bool word_reader::fill()
{
    int nread = fread(buf, sizeof(char), bufsize, input);
    buf[nread] = '\0';
    bptr = buf;
    return nread > 0;
}

int word_reader::operator()(char const **w)
{
    int len = 0;
    char c;
    while (*bptr || fill()) {
	if (isalpha(c = *bptr++)) {
	    word[len] = tolower(c);
	    if (++len == ws) {
		char *nword = new char[(ws *= 2)+1];
		memcpy(nword, word, len);
		delete[] word;
		word = nword;
	    }
	}
	else if (len > 0) break;
    }
    *w = word;
    word[len] = '\0';
    return len;
}

typedef hash_map counter;
typedef pair hpair;

namespace std
{
    inline bool operator<(hpair const &lhs, hpair const &rhs)
    {
	return lhs.second != rhs.second ? lhs.second > rhs.second
	    : strcmp(lhs.first, rhs.first) > 0;
    }

    template<> struct equal_to
    {
	bool operator()(char const *s1, char const *s2) const
	    {return strcmp(s1, s2) == 0;}
    };
}

int main()
{
    int len;
    const char *w;
    counter hist;
    word_reader wr(stdin);

    while ((len = wr(&w)) > 0) {
	counter::iterator i = hist.find(w);
	if (i == hist.end()) hist[strcpy(new char[len+1], w)] = 1;
	else ++i->second;
    }

    vector v(hist.begin(), hist.end());
    sort(v.begin(), v.end());
    for (size_t i = 0; i < v.size(); ++i)
	printf("%7d\t%s\n", v[i].second, v[i].first);

    return 0;
}

##

##

// $Id: wordfreq.java,v 1.3 2000/12/17 21:40:53 doug Exp $
// http://www.bagley.org/~doug/shootout/
// Collection class code is from my friend Phil Chu, Thanks Phil!

import java.io.*;
import java.util.*;
import java.text.*;

class Counter {
    int count = 1; 
}

public class wordfreq {

    public static void main(String[] args) {
	wf();
    }

    public static String padleft(String s,int n,char c) {
        int len = s.length();
        if( len>=n ) return s;
        char[] buf = new char[n];
        for( int i=0;i##

#!/usr/local/bin/python
# $Id: wordfreq.python,v 1.9 2001/05/11 17:44:00 doug Exp $
# http://www.bagley.org/~doug/shootout/
#
# adapted from Bill Lear's original python word frequency counter
#
# Joel Rosdahl suggested using translate table to speed up
# word splitting.  That change alone sped this program up by
# at least a factor of 3.
#
# with further speedups from Mark Baker

import sys

def main():
    count = {}
    i_r = map(chr, range(256))

    trans = [' '] * 256
    o_a, o_z = ord('a'), (ord('z')+1)
    trans[ord('A'):(ord('Z')+1)] = i_r[o_a:o_z]
    trans[o_a:o_z] = i_r[o_a:o_z]
    trans = ''.join(trans)

    rl = sys.stdin.readlines

    lines = rl(4095)
    while lines:
        for line in lines:
            for word in line.translate(trans).split():
                try:
                    count[word] += 1
                except KeyError:
                    count[word] = 1
        lines = rl(4095)

    l = zip(count.values(), count.keys())
    l.sort()
    l.reverse()

    print '\n'.join(["%7s\t%s" % (count, word) for (count, word) in l])

main()

####

-- $Id: wordfreq.ghc,v 1.2 2001/02/27 04:04:35 doug Exp $
-- http://www.bagley.org/~doug/shootout/
-- from Julian Assange

-- compile with:
-- ghc -O -package data wordfreq.hs -o wordfreq

module Main where
import List(sortBy)
import Char(toLower,isLower)
import FiniteMap(fmToList,emptyFM,addToFM_C)

main = interact $ unlines . pretty . sort . fmToList . makemap . cwords . lower
 where
       pretty l  = [pad 7 (show n) ++ "\t" ++ w | (w,n) <- l]
                   where
                         pad n s = replicate (n - length s) ' ' ++ s

       sort      = sortBy (\(w0,n0) (w1,n1) -> case compare n1 n0 of
                                               EQ -> compare w1 w0
					       x  -> x)

       makemap   = addFM emptyFM
	           where addFM fm [] = fm
		         addFM fm (x:xs) = addFM (addToFM_C (+) fm x 1) xs

       cwords s  = case dropWhile (not . isLower) s of
		        "" -> []
                        s' -> w : (cwords s'')
                              where (w, s'') = span isLower s' 

       lower     = map toLower

####

perl -nle"y/a-zA-Z/ /cs; ++$h{$_} for split }{ print qq[$_:$h{$_}] for sort keys %h" theFile
break:1
brief:1
bring:3
brought:2
buffalo:16
burden:1
but:20
by:16
call:2
called:6
came:2
campaign:1
can:36
cannot:2
capable:1
capitals:1
career:2
cart:1
case:2
...