bannor:~/work/perl/monks$ perl 964355.pl File size: 367001600 keys: 6924700 size: 1129106184 Overhead: 67.50% abaugher 11340 96.6 33.9 1402520 1376916 pts/3 S+ 17:25 4:16 perl 964355.pl bannor:~/work/perl/monks$ cat 964355.pl #!/usr/bin/env perl use Modern::Perl; use Devel::Size qw(total_size); # create a 350MB file with a single * in each line # dividing keys and values of random lengths of 10..40 chars open my $out, '>', 'bigfile' or die $!; while(-s 'bigfile' < 350*1024*1024 ){ my $part1 = join '', map { ('A'..'Z','a'..'z',0..9)[rand(62)] } (0..(rand(30)+10)); my $part2 = join '', map { ('A'..'Z','a'..'z',0..9)[rand(62)] } (0..(rand(30)+10)); print $out "$part1*$part2\n"; } my $filesize = -s 'bigfile'; say 'File size: ', $filesize; # now process the file into a hash and analyze the hash my %h; open my $in, '<', 'bigfile' or die $!; while(<$in>){ chomp; my($unus, $duo) = split '\*'; die "Duplicate key!" if $h{$unus}; # no duplicates $h{$unus} = $duo; } close $in; say 'keys: ', scalar keys %h; my $totalsize = total_size(\%h); say 'size: ', $totalsize; printf "Overhead: %.2f%%\n",($totalsize - $filesize)*100/$totalsize; print `ps auxww|grep 964355.pl`;