use File::Temp qw/ tempfile tempdir /;

# this is going to look a lot like
# the File::Temp perldoc page

my $dir = tempdir(CLEANUP=>1);

# basic approach: we're going to open one temp
# file per initial letter, then seek back to the
# start of the temp file when its time to read
# from it.  We let File::Temp handle deleting
# the temp files.
my %filehandles;
my $defaultKey = "___";
for('a'..'z',$defaultKey) {
   $filehandles{$_}=tempfile(DIR=>$dir);
}

print "Shifting all words into letter data files - ";
open(DATA,"all.txt") || die "cannot open all.txt for input: $!";
while(<DATA>) {
    my $word = $_;
    chomp($word);
    $word=~s/^\s+//;
    my $letter = lc(substr($word,0,1));
    if($letter!~/[a-z]/) {
        $letter = $defaultKey; #"___";
    }
    my $fh=$filehandles{$letter} || die "No file handle for $letter";
    print $fh "$word\n";
}
close(DATA);
print "done\n";

print "Organizing letter file alphabetically - \n";
open(DATA,">all1.txt") || die "cannot do $!";
foreach my $letter (sort keys %filehandles) {
    print "\tseeking $letter - ";
    my $fh=$filehandles{$letter} || die "No file handle for $letter";
    my @words = ();
    #open(FILE,$filename) || die "cannot do $!";
    # seek back to the start of this temp file to read from
    # it
    seek $fh, 0, SEEK_SET;
    while(<$fh>) {
        my $word = $_;
        chomp($word);
        push(@words,$word);
    }
    #close(FILE);
    print "\t\tsorting - ";
    @words = sort {lc($a) cmp lc($b)} @words;
    print "done\n";
    print "\t\tremoving duplicates - ";
    my $prev = "not equal to $words[0]";
    @words = grep($_ ne $prev && ($prev = $_, 1), @words);
    print "done\n";
    #foreach my $word (@words) {
    #    print DATA "$word\n";
    #}
    # write all the words in one call, should be faster
    print DATA join "\n",@words;
    print DATA "\n" if @words;
    print "\tdone\n";
}
print "done\n";