bannor:~/work/perl/monks$ perl 964355.pl
File size: 367001600
keys: 6924700
size: 1129106184
Overhead: 67.50%
abaugher 11340 96.6 33.9 1402520 1376916 pts/3 S+   17:25   4:16 perl 964355.pl

bannor:~/work/perl/monks$ cat 964355.pl
#!/usr/bin/env perl
use Modern::Perl;
use Devel::Size qw(total_size);

# create a 350MB file with a single * in each line
# dividing keys and values of random lengths of 10..40 chars
open my $out, '>', 'bigfile' or die $!;
while(-s 'bigfile' < 350*1024*1024 ){
    my $part1 = join '', map { ('A'..'Z','a'..'z',0..9)[rand(62)] } (0..(rand(30)+10));
    my $part2 = join '', map { ('A'..'Z','a'..'z',0..9)[rand(62)] } (0..(rand(30)+10));
    print $out "$part1*$part2\n";
}

my $filesize = -s 'bigfile';
say 'File size: ', $filesize;

# now process the file into a hash and analyze the hash
my %h;
open my $in, '<', 'bigfile' or die $!;
while(<$in>){
        chomp;
        my($unus, $duo) = split '\*';
        die "Duplicate key!" if $h{$unus};  # no duplicates
        $h{$unus} = $duo;
}
close $in;

say 'keys: ', scalar keys %h;
my $totalsize = total_size(\%h);
say 'size: ', $totalsize;
printf "Overhead: %.2f%%\n",($totalsize - $filesize)*100/$totalsize;
print `ps auxww|grep 964355.pl`;