use strict; use warnings; die "Arguments = in, out, temp dir, sort max in MB.\n" if $#ARGV != 3; my ($in, $out, $temp, $max) = @ARGV; die "$in does not exist.\n" if !-e $in; die "Can't open $in for read.\n" if !open(FH, $in); die "$temp does not exist, or is not a directory." if !-d $temp; $temp =~ s|/$||; $max *= 1024 * 1024; my (@b1, $size, $n, @t, $t1, $t2); $size = 0; $n = 0; while () { push @b1, $_; $size += length $_; if ($size >= $max) { ### Over limit, write chunk writeTemp(); @b1 = (); $size = 0; } } ### Write whatever's left in buffer writeTemp() if $#b1 != -1; ### Using this so I don't have to write it twice in the code sub writeTemp { $n++; die "Unable to open $temp/$n.txt for write.\n" if !open (FHO, ">$temp/$n.txt"); @b1 = sort @b1; print FHO join('', @b1); print "$in => $temp/$n.txt ($size)\n"; } @t = (1..$n); while ($#t > 0) { $t1 = shift @t; $t2 = shift @t; $n++; mergeFiles("$temp/$t1.txt", "$temp/$t2.txt", "$temp/$n.txt"); print "$temp/$t1.txt + $temp/$t2.txt => $temp/$n.txt\n"; unlink "$temp/$t1.txt"; unlink "$temp/$t2.txt"; push @t, $n; } `mv $temp/$n.txt $out`; print "$temp/$n.txt => $out\n"; sub mergeFiles { my ($f1, $f2, $fo) = @_; die "Unable to open $f1 for read." if !open(FH1, $f1); die "Unable to open $f2 for read." if !open(FH2, $f2); die "Unable to open $fo for write." if !open(FHO, ">$fo"); my $l1 = ; my $l2 = ; while ($l1 && $l2) { if ($l1 lt $l2) { print FHO $l1; $l1 = ; } else { print FHO $l2; $l2 = ; } } local $/ = undef; if ($l1) { print FHO $l1; $l1 = ; print FHO $l1 if $l1; } else { print FHO $l2; $l2 = ; print FHO $l2 if $l2; } }