in reply to Get me excited about perl
Show them Perl's greatest asset -- concise solutions to everyday problems.
If you have time, pick some task that will resonate with as many of them as possible and get one or more of them to solve the chosen task in their favoured langugages before the day.
By way of example (because the examples already exist): frequency count the words in a text file.
/* -*- mode: c -*- * $Id: wordfreq.gcc,v 1.5 2001/09/18 17:25:18 doug Exp $ * http://www.bagley.org/~doug/shootout/ * Changed by Adrian Merrill 2001/08/22 */ #include <stdio.h> #include <ctype.h> #include <malloc.h> #include <stdlib.h> #include <string.h> #include "../../Include/simple_hash.h" #define QUICKIE_STRCMP(a, b) (*(a) != *(b) ? *(a) - *(b) : strcmp((a) +, (b))) typedef int (*comparator)(const void *, const void *); int cmp_hash(struct ht_node **a, struct ht_node **b) { int val = (*b)->val - (*a)->val; return((val == 0) ? QUICKIE_STRCMP((*b)->key, (*a)->key) : val); } int main() { int readbufsize = 4096; int wordbufsize=16; char *readbuf = (char *)malloc(readbufsize + 1); char *wordbuf = (char *)malloc(wordbufsize + 1); int i = 0; struct ht_ht *ht = ht_create(2048); struct ht_node **sort_array, **sort_tmp, *node; /*new code*/ int nread =0; int wordlen=0; readbuf[0]=0; while (readbuf[i] > 0||(nread = fread(readbuf, sizeof(char), readb +ufsize, stdin),readbuf[nread] = '\0',i=0,nread > 0) ) { if (isalpha(readbuf[i])){ wordbuf[wordlen++] = tolower(readbuf[i]); if (wordlen == wordbufsize) { wordbufsize *= 2; wordbuf = realloc(wordbuf, wordbufsize + 1); } } else{ if (wordlen > 0) { wordbuf[wordlen] = '\0'; ++(ht_find_new(ht, wordbuf)->val); wordlen = 0; } } i++; } free(readbuf); free(wordbuf); sort_array = sort_tmp = malloc(sizeof(struct ht_node *) * ht_count(ht)); for (node=ht_first(ht); (*sort_tmp++ = node) != 0; node=ht_next(ht +)) ; qsort(sort_array, ht_count(ht), sizeof(struct ht_node *), (comparator)cmp_hash); for (i=0; i<ht_count(ht); i++) printf("%7d\t%s\n", ht_val(sort_array[i]), ht_key(sort_array[i])); + ht_destroy(ht); return(0); }
// -*- mode: c++ -*- // $Id: wordfreq.g++,v 1.5 2001/07/21 23:51:05 doug Exp $ // http://www.bagley.org/~doug/shootout/ // By Tamás Benkő #include <cstdio> #include <cctype> #include <cstring> #include <ext/hash_map> #include <vector> #include <algorithm> using namespace std; int const bufsize = 4096; int const wsize = 64; class word_reader { int ws; char buf[bufsize+1], *bptr, *word; FILE *input; bool fill(); public: word_reader(FILE *i): ws(wsize), bptr(buf), word(new char[ws+1]), +input(i) {*bptr = *word = '\0';} int operator()(char const **); }; inline bool word_reader::fill() { int nread = fread(buf, sizeof(char), bufsize, input); buf[nread] = '\0'; bptr = buf; return nread > 0; } int word_reader::operator()(char const **w) { int len = 0; char c; while (*bptr || fill()) { if (isalpha(c = *bptr++)) { word[len] = tolower(c); if (++len == ws) { char *nword = new char[(ws *= 2)+1]; memcpy(nword, word, len); delete[] word; word = nword; } } else if (len > 0) break; } *w = word; word[len] = '\0'; return len; } typedef hash_map<char const *, int> counter; typedef pair<char const *, int> hpair; namespace std { inline bool operator<(hpair const &lhs, hpair const &rhs) { return lhs.second != rhs.second ? lhs.second > rhs.second : strcmp(lhs.first, rhs.first) > 0; } template<> struct equal_to<char const *> { bool operator()(char const *s1, char const *s2) const {return strcmp(s1, s2) == 0;} }; } int main() { int len; const char *w; counter hist; word_reader wr(stdin); while ((len = wr(&w)) > 0) { counter::iterator i = hist.find(w); if (i == hist.end()) hist[strcpy(new char[len+1], w)] = 1; else ++i->second; } vector<hpair> v(hist.begin(), hist.end()); sort(v.begin(), v.end()); for (size_t i = 0; i < v.size(); ++i) printf("%7d\t%s\n", v[i].second, v[i].first); return 0; }
// $Id: wordfreq.java,v 1.3 2000/12/17 21:40:53 doug Exp $ // http://www.bagley.org/~doug/shootout/ // Collection class code is from my friend Phil Chu, Thanks Phil! import java.io.*; import java.util.*; import java.text.*; class Counter { int count = 1; } public class wordfreq { public static void main(String[] args) { wf(); } public static String padleft(String s,int n,char c) { int len = s.length(); if( len>=n ) return s; char[] buf = new char[n]; for( int i=0;i<n-len;i++ ) buf[i]=c; s.getChars(0,len,buf,n-len); return new String(buf); } public static void wf() { HashMap map = new HashMap(); try { Reader r = new BufferedReader(new InputStreamReader(System +.in)); StreamTokenizer st = new StreamTokenizer(r); st.lowerCaseMode(true); st.whitespaceChars( 0, 64 ); st.wordChars(65, 90); st.whitespaceChars( 91, 96 ); st.wordChars(97, 122); st.whitespaceChars( 123, 255 ); int tt = st.nextToken(); while (tt != StreamTokenizer.TT_EOF) { if (tt == StreamTokenizer.TT_WORD) { if (map.containsKey(st.sval)) { ((Counter)map.get(st.sval)).count++; } else { map.put(st.sval, new Counter()); } } tt = st.nextToken(); } } catch (IOException e) { System.err.println(e); return; } Collection entries = map.entrySet(); // flatten the entries set into a vector for sorting Vector rev_wf = new Vector(entries); // Sort the vector according to its value Collections.sort(rev_wf, new Comparator() { public int compare(Object o1, Object o2) { // First sort by frequency int c = ((Counter)((Map.Entry)o2).getValue()).count - ((Co +unter)((Map.Entry)o1).getValue()).count; if (c == 0) { // Second sort by lexicographical order c = ((String)((Map.Entry)o2).getKey()).compareTo((String)( +(Map.Entry)o1).getKey()); } return c; } } ); Iterator it = rev_wf.iterator(); Map.Entry ent; String word; int count; while(it.hasNext()) { ent = (Map.Entry)it.next(); word = ((String)ent.getKey()); count = ((Counter)ent.getValue()).count; System.out.println(padleft(Integer.toString(count),7,' ') + "\ +t" + word); } } }
#!/usr/local/bin/python # $Id: wordfreq.python,v 1.9 2001/05/11 17:44:00 doug Exp $ # http://www.bagley.org/~doug/shootout/ # # adapted from Bill Lear's original python word frequency counter # # Joel Rosdahl suggested using translate table to speed up # word splitting. That change alone sped this program up by # at least a factor of 3. # # with further speedups from Mark Baker import sys def main(): count = {} i_r = map(chr, range(256)) trans = [' '] * 256 o_a, o_z = ord('a'), (ord('z')+1) trans[ord('A'):(ord('Z')+1)] = i_r[o_a:o_z] trans[o_a:o_z] = i_r[o_a:o_z] trans = ''.join(trans) rl = sys.stdin.readlines lines = rl(4095) while lines: for line in lines: for word in line.translate(trans).split(): try: count[word] += 1 except KeyError: count[word] = 1 lines = rl(4095) l = zip(count.values(), count.keys()) l.sort() l.reverse() print '\n'.join(["%7s\t%s" % (count, word) for (count, word) in l] +) main()
-- $Id: wordfreq.ghc,v 1.2 2001/02/27 04:04:35 doug Exp $ -- http://www.bagley.org/~doug/shootout/ -- from Julian Assange -- compile with: -- ghc -O -package data wordfreq.hs -o wordfreq module Main where import List(sortBy) import Char(toLower,isLower) import FiniteMap(fmToList,emptyFM,addToFM_C) main = interact $ unlines . pretty . sort . fmToList . makemap . cword +s . lower where pretty l = [pad 7 (show n) ++ "\t" ++ w | (w,n) <- l] where pad n s = replicate (n - length s) ' ' ++ s sort = sortBy (\(w0,n0) (w1,n1) -> case compare n1 n0 of EQ -> compare w1 w0 x -> x) makemap = addFM emptyFM where addFM fm [] = fm addFM fm (x:xs) = addFM (addToFM_C (+) fm x 1) xs cwords s = case dropWhile (not . isLower) s of "" -> [] s' -> w : (cwords s'') where (w, s'') = span isLower s' lower = map toLower
And then write & run your perl solution in real time:
perl -nle"y/a-zA-Z/ /cs; ++$h{$_} for split }{ print qq[$_:$h{$_}] for + sort keys %h" theFile break:1 brief:1 bring:3 brought:2 buffalo:16 burden:1 but:20 by:16 call:2 called:6 came:2 campaign:1 can:36 cannot:2 capable:1 capitals:1 career:2 cart:1 case:2 ...
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: Get me excited about perl
by tobyink (Canon) on Sep 19, 2012 at 20:21 UTC | |
by BrowserUk (Patriarch) on Sep 19, 2012 at 20:54 UTC | |
by flexvault (Monsignor) on Sep 25, 2012 at 13:27 UTC | |
by tobyink (Canon) on Sep 25, 2012 at 15:04 UTC | |
by BrowserUk (Patriarch) on Oct 29, 2012 at 20:38 UTC | |
by flexvault (Monsignor) on Sep 25, 2012 at 16:31 UTC |