This is a general code critique.
# Always use strict especially when you are new. Its a boon to debuggi +ng and it helps you from screwing up in some interesting ways (but mo +stly it does some typo-checking) use strict; use warnings; open (TEXTFILE, 'text.txt') or die ("Cannot open file : $!"); # $/ is normally "\n" which is what readline() uses to decide when to +stop reading. You want to assign undef here so that readline() will p +ull in the entire file and not just the first line of the file. $/ = undef; # strict will force you to declare your variables. Here I use my() to +make $big_string be a lexical. my $big_string = <TEXTFILE>; $big_string = lc ($big_string); # I re-wrote this as a tr/// expression so it would be so ugly as a re +gular expression. If you really wanted to use s/// then s/[()]//g wou +ld have been preferred. $big_string =~ tr/()//d; # I declared @words with my() and moved the plus sign out of the chara +cter class. You meant to split on repeated white-space but you actual +ly just indicated that plus signs should also be split on. Moving the + + quantifier onto the character class indicates that any repeated ma +tching of the class should be smushed together. my @words = split (/[,.\s]+/, $big_string); # I replaced your for/push loop with a grep which only passes words th +at are three characters or longer. You should also note that in your +original expression that anything that matches {4,} is also going to +match {3,}. If you really meant to do that then just the single {3,} +would have been good enough. Also, instead of saying [a-zA-Z] you cou +ld have written /[a-z]{3,}/i which is case insensitive. my @gt_three_char_words = grep /[a-z]{3,}/i, @words; # Rename %hash to %word_counter and declare it. my %word_counter; foreach (@gt_three_char_words) { $word_counter{$_}++; } # Sort the list of words so you can then extract whatever portion you +like. @gt_three_char_words = sort { # Sort the word list by occurrances ( $word_counter{$a} <=> $word_counter{$b} ) || # Then alphabetically ( lc( $a ) cmp lc( $b ) } @gt_three_char_words # Print the print join( '', map( "$_: $word_count{$_}\n", @gt_three_char_words[ 0 .. 9 ] ) ) . "\n";
Now here is how I would have written the entire thing.
use strict; use warnings; use Getopt::Long; use File::Slurp; my $filename = 'text.txt'; my $min_word_length = 3; my $start_word = 0; my $words_to_display = 10; GetOptions( "file=s" => \$filename, "min=i" => \$min_word_length, "start=i" => \$start_word, "words=i" => \$words_to_display ); my $original_text = lc read_file( $filename ); $original_text =~ tr/()//d; my @words = grep /[a-z]{$min_word_length,}/i, split /[,.\s]+/, $original_text; my %word_counter; $word_counter{ $_ } ++ for @words; @words = sort { $word_counter{ $a } <=> $word_counter{ $b } || $a cmp $b } @ words; my @words_to_print = @words[ $start_word .. ( $start_word + $words_to_display - 1 ) ]; for my $word ( @words_to_print ) { print "$word: $word_counter{ $word }\n"; }
In reply to Re: Tutelage, part two
by diotalevi
in thread Tutelage, part two
by ctp
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |