#!/usr/bin/perl for($i=1; $i<20000; $i++) { open(IN,"/var/www/data/$i.txt") || die $!; my $url = ; my %banned = ( "a" => undef, "at" => undef, "be" => undef, "for" => undef, "and" => undef, "to" => undef, "of" => undef, "in" => undef, "the" => undef, "as" => undef, "i" => undef, "it" => undef, "are" => undef, "is" => undef, "am" => undef, "on" => undef, "an" => undef, "you" => undef, "me" => undef, "b" => undef, "c" => undef, "d" => undef, "e" => undef, "f" => undef, "g" => undef, "h" => undef, "j" => undef, "k" => undef, "l" => undef, "m" => undef, "n" => undef, "o" => undef, "p" => undef, "q" => undef, "r" => undef, "s" => undef, "t" => undef, "u" => undef, "v" => undef, "w" => undef, "x" => undef, "y" => undef, "z" => undef, "0" => undef, "1" => undef, "2" => undef, "3" => undef, "4" => undef, "5" => undef, "6" => undef, "7" => undef, "8" => undef, "9" => undef, "10" => undef, ); while(){ chomp; @words = split(/\W/, $_); } close(IN); foreach $word (@words) { $word=lc($word); OUTER: while( ( $key, $value) = each %banned ) { if($key eq $word) { $word = ""; last OUTER; } else { $count{$word}++ unless $word eq ""; } } } @keys = reverse sort { $count{$a} <=> $count{$b} } keys %count; @keys = splice(@keys,0,50); print "File: $i\n"; open(FILE,">/var/www/sorted/$i.txt") || die $!; print FILE "$url\n"; print FILE join("\n", @keys); close(FILE); $string = ""; %count = (); }