use File::Temp qw/ tempfile tempdir /; # this is going to look a lot like # the File::Temp perldoc page my $dir = tempdir(CLEANUP=>1); # basic approach: we're going to open one temp # file per initial letter, then seek back to the # start of the temp file when its time to read # from it. We let File::Temp handle deleting # the temp files. my %filehandles; my $defaultKey = "___"; for('a'..'z',$defaultKey) { $filehandles{$_}=tempfile(DIR=>$dir); } print "Shifting all words into letter data files - "; open(DATA,"all.txt") || die "cannot open all.txt for input: $!"; while() { my $word = $_; chomp($word); $word=~s/^\s+//; my $letter = lc(substr($word,0,1)); if($letter!~/[a-z]/) { $letter = $defaultKey; #"___"; } my $fh=$filehandles{$letter} || die "No file handle for $letter"; print $fh "$word\n"; } close(DATA); print "done\n"; print "Organizing letter file alphabetically - \n"; open(DATA,">all1.txt") || die "cannot do $!"; foreach my $letter (sort keys %filehandles) { print "\tseeking $letter - "; my $fh=$filehandles{$letter} || die "No file handle for $letter"; my @words = (); #open(FILE,$filename) || die "cannot do $!"; # seek back to the start of this temp file to read from # it seek $fh, 0, SEEK_SET; while(<$fh>) { my $word = $_; chomp($word); push(@words,$word); } #close(FILE); print "\t\tsorting - "; @words = sort {lc($a) cmp lc($b)} @words; print "done\n"; print "\t\tremoving duplicates - "; my $prev = "not equal to $words[0]"; @words = grep($_ ne $prev && ($prev = $_, 1), @words); print "done\n"; #foreach my $word (@words) { # print DATA "$word\n"; #} # write all the words in one call, should be faster print DATA join "\n",@words; print DATA "\n" if @words; print "\tdone\n"; } print "done\n";