Re: Get me excited about perl

Show them Perl's greatest asset -- concise solutions to everyday problems.

If you have time, pick some task that will resonate with as many of them as possible and get one or more of them to solve the chosen task in their favoured langugages before the day.

By way of example (because the examples already exist): frequency count the words in a text file.

C (211 lines):

/* -*- mode: c -*-
 * $Id: wordfreq.gcc,v 1.5 2001/09/18 17:25:18 doug Exp $
 * http://www.bagley.org/~doug/shootout/
 * Changed by Adrian Merrill 2001/08/22
 */

#include <stdio.h>
#include <ctype.h>
#include <malloc.h>
#include <stdlib.h>
#include <string.h>
#include "../../Include/simple_hash.h"

#define QUICKIE_STRCMP(a, b)  (*(a) != *(b) ? *(a) - *(b) : strcmp((a)
+, (b)))

typedef int (*comparator)(const void *, const void *);

int cmp_hash(struct ht_node **a, struct ht_node **b) {
    int val = (*b)->val - (*a)->val;
    return((val == 0) ? QUICKIE_STRCMP((*b)->key, (*a)->key) : val);
}

int main() {
    int readbufsize = 4096;
    int wordbufsize=16;
    char *readbuf = (char *)malloc(readbufsize + 1);
    char *wordbuf = (char *)malloc(wordbufsize + 1);
    int i = 0;
    struct ht_ht *ht = ht_create(2048);
    struct ht_node **sort_array, **sort_tmp, *node;
    /*new code*/
    int nread =0;
    int wordlen=0;
    readbuf[0]=0;
    while (readbuf[i] > 0||(nread = fread(readbuf, sizeof(char), readb
+ufsize, stdin),readbuf[nread] = '\0',i=0,nread > 0) ) {
    if (isalpha(readbuf[i])){
        wordbuf[wordlen++] = tolower(readbuf[i]);
        if (wordlen == wordbufsize) {
        wordbufsize *= 2;
        wordbuf = realloc(wordbuf, wordbufsize + 1);
        }
    }
    else{
        if (wordlen > 0) {
        wordbuf[wordlen] = '\0';
        ++(ht_find_new(ht, wordbuf)->val);
        wordlen = 0;
        } 
    }
    i++;
    }
    free(readbuf);
    free(wordbuf);
    sort_array = sort_tmp =
    malloc(sizeof(struct ht_node *) * ht_count(ht));

    for (node=ht_first(ht); (*sort_tmp++ = node) != 0; node=ht_next(ht
+)) ;

    qsort(sort_array, ht_count(ht), sizeof(struct ht_node *),
      (comparator)cmp_hash);

    for (i=0; i<ht_count(ht); i++)
    printf("%7d\t%s\n", ht_val(sort_array[i]), ht_key(sort_array[i]));
+ 

    ht_destroy(ht);
    return(0);
}
[download]

C++ (79 lines):

// -*- mode: c++ -*-
// $Id: wordfreq.g++,v 1.5 2001/07/21 23:51:05 doug Exp $
// http://www.bagley.org/~doug/shootout/
// By Tamás Benkő

#include <cstdio>
#include <cctype>
#include <cstring>
#include <ext/hash_map>
#include <vector>
#include <algorithm>

using namespace std;

int const bufsize = 4096;
int const wsize = 64;

class word_reader
{
    int ws;
    char buf[bufsize+1], *bptr, *word;
    FILE *input;

    bool fill();

public:
    word_reader(FILE *i): ws(wsize), bptr(buf), word(new char[ws+1]), 
+input(i)
    {*bptr = *word = '\0';}
    int operator()(char const **);
};

inline bool word_reader::fill()
{
    int nread = fread(buf, sizeof(char), bufsize, input);
    buf[nread] = '\0';
    bptr = buf;
    return nread > 0;
}

int word_reader::operator()(char const **w)
{
    int len = 0;
    char c;
    while (*bptr || fill()) {
    if (isalpha(c = *bptr++)) {
        word[len] = tolower(c);
        if (++len == ws) {
        char *nword = new char[(ws *= 2)+1];
        memcpy(nword, word, len);
        delete[] word;
        word = nword;
        }
    }
    else if (len > 0) break;
    }
    *w = word;
    word[len] = '\0';
    return len;
}

typedef hash_map<char const *, int> counter;
typedef pair<char const *, int> hpair;

namespace std
{
    inline bool operator<(hpair const &lhs, hpair const &rhs)
    {
    return lhs.second != rhs.second ? lhs.second > rhs.second
        : strcmp(lhs.first, rhs.first) > 0;
    }

    template<> struct equal_to<char const *>
    {
    bool operator()(char const *s1, char const *s2) const
        {return strcmp(s1, s2) == 0;}
    };
}

int main()
{
    int len;
    const char *w;
    counter hist;
    word_reader wr(stdin);

    while ((len = wr(&w)) > 0) {
    counter::iterator i = hist.find(w);
    if (i == hist.end()) hist[strcpy(new char[len+1], w)] = 1;
    else ++i->second;
    }

    vector<hpair> v(hist.begin(), hist.end());
    sort(v.begin(), v.end());
    for (size_t i = 0; i < v.size(); ++i)
    printf("%7d\t%s\n", v[i].second, v[i].first);

    return 0;
}
[download]

Java (68 lines):

// $Id: wordfreq.java,v 1.3 2000/12/17 21:40:53 doug Exp $
// http://www.bagley.org/~doug/shootout/
// Collection class code is from my friend Phil Chu, Thanks Phil!

import java.io.*;
import java.util.*;
import java.text.*;

class Counter {
    int count = 1; 
}

public class wordfreq {

    public static void main(String[] args) {
    wf();
    }

    public static String padleft(String s,int n,char c) {
        int len = s.length();
        if( len>=n ) return s;
        char[] buf = new char[n];
        for( int i=0;i<n-len;i++ ) buf[i]=c;
        s.getChars(0,len,buf,n-len);
        return new String(buf);
    }
  
    public static void wf() {
        HashMap map = new HashMap();
        try {
            Reader r = new BufferedReader(new InputStreamReader(System
+.in));
            StreamTokenizer st = new StreamTokenizer(r);
            st.lowerCaseMode(true);
            st.whitespaceChars( 0, 64 );
            st.wordChars(65, 90);
            st.whitespaceChars( 91, 96 );
            st.wordChars(97, 122);
            st.whitespaceChars( 123, 255 );
            int tt = st.nextToken();
            while (tt != StreamTokenizer.TT_EOF) {
                if (tt == StreamTokenizer.TT_WORD) {
                    if (map.containsKey(st.sval)) {
                        ((Counter)map.get(st.sval)).count++;
                    } else {
                        map.put(st.sval, new Counter());
                    }
                }
                tt = st.nextToken();
            }
        } catch (IOException e) {
            System.err.println(e);
            return;
        }

        Collection entries = map.entrySet();
    // flatten the entries set into a vector for sorting
    Vector rev_wf = new Vector(entries); 

        // Sort the vector according to its value
        Collections.sort(rev_wf, new Comparator() {
        public int compare(Object o1, Object o2) {
            // First sort by frequency
            int c = ((Counter)((Map.Entry)o2).getValue()).count - ((Co
+unter)((Map.Entry)o1).getValue()).count;
            if (c == 0) { // Second sort by lexicographical order
            c = ((String)((Map.Entry)o2).getKey()).compareTo((String)(
+(Map.Entry)o1).getKey());
            }
            return c;
        }
        }
             );

        Iterator it = rev_wf.iterator();
    Map.Entry ent;
    String word;
    int count;
        while(it.hasNext()) {
            ent = (Map.Entry)it.next();
        word = ((String)ent.getKey());
        count = ((Counter)ent.getValue()).count;
        System.out.println(padleft(Integer.toString(count),7,' ') + "\
+t" + word);
        }
    }
}
[download]

Python (24 lines):

#!/usr/local/bin/python
# $Id: wordfreq.python,v 1.9 2001/05/11 17:44:00 doug Exp $
# http://www.bagley.org/~doug/shootout/
#
# adapted from Bill Lear's original python word frequency counter
#
# Joel Rosdahl suggested using translate table to speed up
# word splitting.  That change alone sped this program up by
# at least a factor of 3.
#
# with further speedups from Mark Baker

import sys

def main():
    count = {}
    i_r = map(chr, range(256))

    trans = [' '] * 256
    o_a, o_z = ord('a'), (ord('z')+1)
    trans[ord('A'):(ord('Z')+1)] = i_r[o_a:o_z]
    trans[o_a:o_z] = i_r[o_a:o_z]
    trans = ''.join(trans)

    rl = sys.stdin.readlines

    lines = rl(4095)
    while lines:
        for line in lines:
            for word in line.translate(trans).split():
                try:
                    count[word] += 1
                except KeyError:
                    count[word] = 1
        lines = rl(4095)

    l = zip(count.values(), count.keys())
    l.sort()
    l.reverse()

    print '\n'.join(["%7s\t%s" % (count, word) for (count, word) in l]
+)

main()
[download]

Haskell (20 lines):

-- $Id: wordfreq.ghc,v 1.2 2001/02/27 04:04:35 doug Exp $
-- http://www.bagley.org/~doug/shootout/
-- from Julian Assange

-- compile with:
-- ghc -O -package data wordfreq.hs -o wordfreq

module Main where
import List(sortBy)
import Char(toLower,isLower)
import FiniteMap(fmToList,emptyFM,addToFM_C)

main = interact $ unlines . pretty . sort . fmToList . makemap . cword
+s . lower
 where
       pretty l  = [pad 7 (show n) ++ "\t" ++ w | (w,n) <- l]
                   where
                         pad n s = replicate (n - length s) ' ' ++ s

       sort      = sortBy (\(w0,n0) (w1,n1) -> case compare n1 n0 of
                                               EQ -> compare w1 w0
                           x  -> x)

       makemap   = addFM emptyFM
               where addFM fm [] = fm
                 addFM fm (x:xs) = addFM (addToFM_C (+) fm x 1) xs

       cwords s  = case dropWhile (not . isLower) s of
                "" -> []
                        s' -> w : (cwords s'')
                              where (w, s'') = span isLower s' 

       lower     = map toLower
[download]

And then write & run your perl solution in real time:

perl -nle"y/a-zA-Z/ /cs; ++$h{$_} for split }{ print qq[$_:$h{$_}] for
+ sort keys %h" theFile
break:1
brief:1
bring:3
brought:2
buffalo:16
burden:1
but:20
by:16
call:2
called:6
came:2
campaign:1
can:36
cannot:2
capable:1
capitals:1
career:2
cart:1
case:2
...
[download]

With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'

Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.

"Science is about questioning the status quo. Questioning authority".

In the absence of evidence, opinion is indistinguishable from prejudice.

RIP Neil Armstrong

Comment on Re: Get me excited about perl Select or Download Code

Replies are listed 'Best First'.
Re^2: Get me excited about perl by tobyink (Canon) on Sep 19, 2012 at 20:21 UTC
`php -R 'foreach (str_word_count(strtolower($argn), 1) as $w) $h[$w]++;' -E 'ksort($h); foreach ($h as $w=>$c) print "$w:$c\n";' <theFile` Update: it's sorted now. `perl -E'sub Monkey::do{say$_,for@_,do{($monkey=[caller(0)]->[3])=~s{::}{ }and$monkey}}"Monkey say"->Monkey::do'`	[reply]
Re^3: Get me excited about perl by BrowserUk (Patriarch) on Sep 19, 2012 at 20:54 UTC
Is the output sorted? With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday' Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error. "Science is about questioning the status quo. Questioning authority". In the absence of evidence, opinion is indistinguishable from prejudice. RIP Neil Armstrong	[reply]
Re^3: Get me excited about perl by flexvault (Monsignor) on Sep 25, 2012 at 13:27 UTC
tobyink, Okay, what's the joke? You brought 'php' to a thread on 'Get me excited about perl'! I can always use a good laugh...Ed "Well done is better than well said." - Benjamin Franklin	[reply]
Re^4: Get me excited about perl by tobyink (Canon) on Sep 25, 2012 at 15:04 UTC
You brought 'php' to a thread on 'Get me excited about perl'! Only as a reply to a post which included C, C++, Java, Python and Haskell source code. My point is that Perl is not unique in its ability to provide concise solutions to text processing problems. The PHP solution is a little longer, sure, but is arguably more readable than the Perl one (to somebody who knows both languages) due to not needing to rely on idioms like the Eskimo kiss operator. `perl -E'sub Monkey::do{say$_,for@_,do{($monkey=[caller(0)]->[3])=~s{::}{ }and$monkey}}"Monkey say"->Monkey::do'`	[reply]
Re^5: Get me excited about perl by BrowserUk (Patriarch) on Oct 29, 2012 at 20:38 UTC
Re^5: Get me excited about perl by flexvault (Monsignor) on Sep 25, 2012 at 16:31 UTC