#!/usr/bin/env perl use strict; use warnings; use Gzip::Faster; use Math::Random::MT; my $DATA_LENGTH ||=100000; my $NUM_REPEATS ||=100; my ($quant, $got, $expected); my $MTgen = Math::Random::MT->new(); sub perls_rand { int(rand(1000000)) } sub mt_rand { int($MTgen->rand(1000000)) } my %results = (); my $best_results = [ undef, undef ]; foreach( (["Gzip compression test : Perl's rand()", \&perls_rand, $NUM_REPEATS, $DATA_LENGTH], ["Gzip compression test : Math::Random::MT's rand()", \&mt_rand, $NUM_REPEATS, $DATA_LENGTH], ) ){ my $aresult = do_a_test(@$_); $results{$aresult->[1]} = $aresult; # key is the label print $aresult->[0]."\n\n"; # the report if( ! defined($best_results->[0]) or $best_results->[0] < $aresult->[4] ){ $best_results->[0] = $aresult->[4]; # mean perc diff $best_results->[1] = $_->[0]; # the label } } print "\n\n$0 : done, here is a summary:\n"; foreach(values %results){ print "$0 : ".$_->[1]." with mean %diff from expected to be ".sprintf("%.3f",$_->[4])." %\n"; } print "\n\n$0 : best results for ".$best_results->[1]." with mean %diff from expected to be ".sprintf("%.3f",$best_results->[0])." %\n"; print "$0 : end.\n"; exit(0); # Does a test given # a test label (string) # a sub ref to a rand() # the number of repeats # the string length to produce for compression # see http://search.cpan.org/dist/Statistics-Test-RandomWalk/lib/Statistics/Test/RandomWalk.pm # returns an arrayref with a report, mean percentage difference and change # and iters and nbins (see the return statement below) sub do_a_test { my ($test_label, $rand_sub, $nrepeats, $slength) = @_; my @compression_totals = (0,0,0,0); my ($a); print "$test_label : doing $nrepeats repeats and using random strings of length $slength :\n"; for(1..$nrepeats){ my $random_str_to_compress = ""; for(1..$slength){ $random_str_to_compress .= $rand_sub->(); } my $gzipped_str = Gzip::Faster::gzip($random_str_to_compress); my $length_gzipped_str = length($gzipped_str); my $length_random_str_to_compress = length($random_str_to_compress); $compression_totals[0] += $length_random_str_to_compress; $compression_totals[1] += $length_gzipped_str; $compression_totals[2] += percentage_difference($length_random_str_to_compress, $length_gzipped_str); $compression_totals[3] += percentage_change($length_random_str_to_compress, $length_gzipped_str); print "repeat $_ / $nrepeats:\n" . " length of random string: ".$length_random_str_to_compress."\n" . " length of compressed string: ".$length_gzipped_str."\n" . " compression ratio as percentage change: ".percentage_difference($length_random_str_to_compress, $length_gzipped_str)."\n" . " compression ratio as percentage difference: ".percentage_change($length_random_str_to_compress, $length_gzipped_str)."\n" } $_/=$nrepeats foreach(@compression_totals); my $report = "$test_label\n after $nrepeats repeats and using random strings of length $slength :\n" . "mean length of random string: ".$compression_totals[0]."\n" . "mean length of compressed string: ".$compression_totals[1]."\n" . "mean compression ratio as percentage change: ".$compression_totals[2]."\n" . "mean compression ratio as percentage difference: ".$compression_totals[3] ; return [ $report, $test_label, @compression_totals ] } sub percentage_difference { my ($a, $b) = @_; return 200 * abs($a-$b) / ($a+$b) } sub percentage_change { my ($a, $b) = @_; return 100 - 100 * $a / $b }