-- To shuffle an array a of n elements (indices 0..n-1): for i from 0 to n-2 do j <- random integer such that i <= j < n exchange a[i] and a[j] #### $n = scalar(@aliased); $a = $_ + rand @aliased - $_ # original code = i + rand($n-i) # which becomes.... = i + choiceof(0, 1, ..., $n-i-1) # perl's rand(10) dumps 0-9 inclusive = random integer such that i <= j <= i+$n-i-1 = random integer such that i <= j <= $n-1 = random integer such that i <= j < $n #### for 0 .. $#aliased { $a = $_ + rand @aliased - $_; $b = $aliased[ $_ ], $aliased[ $_ ] = $aliased[ $a ], $aliased[ $a ] = $b } #### for i from 0 to n-1 do j <- random integer such that i <= j < n exchange a[i] and a[j] #### #!/usr/bin/env perl use strict; use warnings; use Statistics::Test::RandomWalk; use Math::Random::MT; my $NUM_TRIALS ||=100; my $NUM_BINS ||= 10; my ($quant, $got, $expected); my $MTgen = Math::Random::MT->new(); sub perls_rand { rand() } sub mt_rand { $MTgen->rand } my %results = (); my $best_results = [ undef, undef ]; foreach( (["RandomWalk test : Perl's rand()", \&perls_rand, $NUM_TRIALS, $NUM_BINS], ["RandomWalk test : Math::Random::MT's rand()", \&mt_rand, $NUM_TRIALS, $NUM_BINS], ) ){ my $aresult = do_a_test(@$_); $results{$aresult->[3]} = $aresult; print $aresult->[0]."\n\n"; if( ! defined($best_results->[0]) or $best_results->[0] > $aresult->[1] ){ $best_results->[0] = $aresult->[1]; # mean perc diff $best_results->[1] = $_->[0]; } } print "\n\n$0 : done, here is a summary:\n"; foreach(values %results){ print "$0 : ".$_->[3]." with mean %diff from expected to be ".sprintf("%.3f",$_->[1])." %\n"; } print "\n\n$0 : best results for ".$best_results->[1]." with mean %diff from expected to be ".sprintf("%.3f",$best_results->[0])." %\n"; print "$0 : end.\n"; exit(0); # Does a trial given # a test label (string) # a sub ref to a rand() # the number of iterations # the number of bins # see http://search.cpan.org/dist/Statistics-Test-RandomWalk/lib/Statistics/Test/RandomWalk.pm # returns an arrayref with a report, mean percentage difference and change # and iters and nbins (see the return statement below) sub do_a_test { my ($test_label, $rand_sub, $niters, $nbins) = @_; my $tester_MT = Statistics::Test::RandomWalk->new(); defined($tester_MT) or die "tester_MT new() failed."; $tester_MT->set_data( [map {$rand_sub->()} 1..$niters] ); my ($quant, $got, $expected) = $tester_MT->test($nbins); my ($subreport, $mean_diff, $mean_change) = @{my_report_make($quant, $got, $expected)}; my $report = "$test_label\n after $niters iterations and using $nbins bins:\n" . $subreport . "======================= end ========================" ; return [ $report, $mean_diff, $mean_change, $test_label, $niters, $nbins ] } # returns an arrayref of # a string report which can be printed # the mean percentage difference ( 200 * abs(A-B)/(A+B) %) # the mean percentage change ( 100 - 100*A/B % ) sub my_report_make { my ($quant, $got, $expected) = @_; my $N = scalar(@$quant); my ($i, $percent_diff, $percent_change); my $sum_diff = 0; my $sum_change = 0; my $ret = "Quantile | Got | Expected | % diff | % change\n==============================================================\n"; for($i=0;$i<$N;$i++){ $percent_diff = percentage_difference($got->[$i], $expected->[$i]); $percent_change = percentage_change($got->[$i], $expected->[$i]); $ret .= sprintf("%-11.3f%12.3f %14.3f%10.3f%12.3f\n", $quant->[$i], $got->[$i], $expected->[$i], $percent_diff, $percent_change); $sum_diff += $percent_diff; $sum_change += $percent_change; } my $mean_diff = $sum_diff/$N; my $mean_change = $sum_change/$N; $ret .= sprintf("\ntotal percentage difference between got and expected: %.7f %%\n>>>> mean percentage difference: %.7f %% <<<<\n", $sum_diff, $mean_diff); $ret .= sprintf("\ntotal percentage change between got and expected: %.7f %%\n>>>> mean percentage change: %.7f %% <<<<\n", $sum_change, $mean_change); return [$ret, $mean_diff, $mean_change] } sub percentage_difference { my ($a, $b) = @_; return 200 * abs($a-$b) / ($a+$b) } sub percentage_change { my ($a, $b) = @_; return 100 - 100 * $a / $b } #### #!/usr/bin/env perl use strict; use warnings; use Gzip::Faster; use Math::Random::MT; my $DATA_LENGTH ||=100000; my $NUM_REPEATS ||=100; my ($quant, $got, $expected); my $MTgen = Math::Random::MT->new(); sub perls_rand { int(rand(1000000)) } sub mt_rand { int($MTgen->rand(1000000)) } my %results = (); my $best_results = [ undef, undef ]; foreach( (["Gzip compression test : Perl's rand()", \&perls_rand, $NUM_REPEATS, $DATA_LENGTH], ["Gzip compression test : Math::Random::MT's rand()", \&mt_rand, $NUM_REPEATS, $DATA_LENGTH], ) ){ my $aresult = do_a_test(@$_); $results{$aresult->[1]} = $aresult; # key is the label print $aresult->[0]."\n\n"; # the report if( ! defined($best_results->[0]) or $best_results->[0] < $aresult->[4] ){ $best_results->[0] = $aresult->[4]; # mean perc diff $best_results->[1] = $_->[0]; # the label } } print "\n\n$0 : done, here is a summary:\n"; foreach(values %results){ print "$0 : ".$_->[1]." with mean %diff from expected to be ".sprintf("%.3f",$_->[4])." %\n"; } print "\n\n$0 : best results for ".$best_results->[1]." with mean %diff from expected to be ".sprintf("%.3f",$best_results->[0])." %\n"; print "$0 : end.\n"; exit(0); # Does a test given # a test label (string) # a sub ref to a rand() # the number of repeats # the string length to produce for compression # see http://search.cpan.org/dist/Statistics-Test-RandomWalk/lib/Statistics/Test/RandomWalk.pm # returns an arrayref with a report, mean percentage difference and change # and iters and nbins (see the return statement below) sub do_a_test { my ($test_label, $rand_sub, $nrepeats, $slength) = @_; my @compression_totals = (0,0,0,0); my ($a); print "$test_label : doing $nrepeats repeats and using random strings of length $slength :\n"; for(1..$nrepeats){ my $random_str_to_compress = ""; for(1..$slength){ $random_str_to_compress .= $rand_sub->(); } my $gzipped_str = Gzip::Faster::gzip($random_str_to_compress); my $length_gzipped_str = length($gzipped_str); my $length_random_str_to_compress = length($random_str_to_compress); $compression_totals[0] += $length_random_str_to_compress; $compression_totals[1] += $length_gzipped_str; $compression_totals[2] += percentage_difference($length_random_str_to_compress, $length_gzipped_str); $compression_totals[3] += percentage_change($length_random_str_to_compress, $length_gzipped_str); print "repeat $_ / $nrepeats:\n" . " length of random string: ".$length_random_str_to_compress."\n" . " length of compressed string: ".$length_gzipped_str."\n" . " compression ratio as percentage change: ".percentage_difference($length_random_str_to_compress, $length_gzipped_str)."\n" . " compression ratio as percentage difference: ".percentage_change($length_random_str_to_compress, $length_gzipped_str)."\n" } $_/=$nrepeats foreach(@compression_totals); my $report = "$test_label\n after $nrepeats repeats and using random strings of length $slength :\n" . "mean length of random string: ".$compression_totals[0]."\n" . "mean length of compressed string: ".$compression_totals[1]."\n" . "mean compression ratio as percentage change: ".$compression_totals[2]."\n" . "mean compression ratio as percentage difference: ".$compression_totals[3] ; return [ $report, $test_label, @compression_totals ] } sub percentage_difference { my ($a, $b) = @_; return 200 * abs($a-$b) / ($a+$b) } sub percentage_change { my ($a, $b) = @_; return 100 - 100 * $a / $b } #### #!/usr/bin/env perl use strict; use warnings; use Math::Random::MT; use Crypt::Random; use Data::Dumper; my $MTgen = Math::Random::MT->new(); # number of repeats my $NUM_REPEATS ||= 2000000; # range of rand ints to produce is 0..$MAX_RAND INCL! my $MAX_RAND ||= 30; # for LAG=1 it calculates the prob between two consecutive random numbers # for LAG=2, it checks when there is one-in-between the two rands. e.g. A x B # e.g. LAG=1 rand[0] -> rand[1] # LAG=3 rand[0] -> rand[3] (rand [1] and [2] are skipped) my $LAG ||= 1; sub perls_rand { int(rand($MAX_RAND+1)) } sub mt_rand { int($MTgen->rand($MAX_RAND+1)) } # size: number of bits in base 10 i.e. a base10 digit # uniform for uniform rn # do not use these two because it will block after a few rand requests sub dev_rand { Crypt::Random::makerandom(Size=>1, Uniform=>1) } sub dev_u_rand { Crypt::Random::makerandom(Size=>1, Uniform=>1, Strength=>0) } my %results = (); my $best_results = [ undef, undef ]; foreach( (["transition matrix test : Perl's rand()", \&perls_rand, $NUM_REPEATS, $MAX_RAND, $LAG], ["transition matrix test : Math::Random::MT's rand()", \&mt_rand, $NUM_REPEATS, $MAX_RAND, $LAG], # do not use this because it will block after a few rand requests: # ["transition matrix test : /dev/random's rand()", \&dev_rand, $NUM_REPEATS, $MAX_RAND, $LAG], # ["transition matrix test : /dev/urandom's rand()", \&dev_u_rand, $NUM_REPEATS, $MAX_RAND, $LAG], ) ){ my $aresult = do_a_test(@$_); $results{$aresult->[1]} = $aresult; print $aresult->[0]; # the report print "====================================\n\n\n"; if( ! defined($best_results->[0]) or $best_results->[0] > $aresult->[2] ){ $best_results->[0] = $aresult->[2]; # mean deviation from expected count $best_results->[1] = $_->[0]; # the label } } print "\n\n$0 : done, here is a summary:\n"; foreach(values %results){ print "$0 : ".$_->[1]." with mean %diff from expected count to be ".sprintf("%.3f",$_->[2])." %\n"; } print "\n\n$0 : best results for ".$best_results->[1]." with mean %diff from expected count to be ".sprintf("%.3f",$best_results->[0])." %\n"; print "$0 : end.\n"; exit(0); sub do_a_test { my ($test_label, $rand_sub, $nrepeats, $maxrand, $lag) = @_; my @mat = ( map { [(0) x ($maxrand+1)] } (0..$maxrand) ); my $expected = $nrepeats / ($maxrand+1)**2; my $expected_probabilities = $expected / $nrepeats; my @allrands = (0..$maxrand); my ($i, $j); my $lag1 = $lag > 1 ? $lag-1 : 0; print "do_a_test() : $test_label :"; for(1..$nrepeats){ $i = $rand_sub->(); for(1..$lag1){ $rand_sub->() } $j = $rand_sub->(); $mat[$i][$j]++; if( $_ % 100000 == 0 ){ print " ".$_; select()->flush(); } } my $deviations = 0; foreach $i (@allrands){ foreach $j (@allrands){ $mat[$i][$j] /= $nrepeats; # convert to probabilities $deviations += percentage_difference($expected_probabilities, $mat[$i][$j]); } } $deviations /= ($maxrand+1)**2; my $report = "$test_label\n after $nrepeats repeats with a lag of $lag and using random integers from 0 to $maxrand:\n"; $report .= " ".join(" ", @allrands)."\n"; foreach $i (@allrands){ $report .= sprintf("%-5d", $i); foreach $j (@allrands){ $report .= sprintf("%10.7f ", $mat[$i][$j]); } $report .= "\n"; } $report .= "\nmean %diff from expected count ($expected) / expected transition probability ($expected_probabilities) is ".sprintf("%.5f", $deviations)." % (instead of 0).\n"; return [$report, $test_label, $deviations ] } sub percentage_difference { my ($a, $b) = @_; return 200 * abs($a-$b) / ($a+$b) } sub percentage_change { my ($a, $b) = @_; return 100 - 100 * $a / $b } #### ./random_walk_test.pl : done 10000000 trials with 10 bins, here is a summary: ./random_walk_test.pl : RandomWalk test : Math::Random::MT's rand() with mean %diff from expected to be 0.022 % ./random_walk_test.pl : RandomWalk test : Perl's rand() with mean %diff from expected to be 0.019 % ./random_walk_test.pl : best results for RandomWalk test : Perl's rand() with mean %diff from expected to be 0.019 % ./random_walk_test.pl : end. ./compression_test.pl : done 100 repeats each time compressing a string of random integers of length 1000000, here is a summary: ./compression_test.pl : Gzip compression test : Math::Random::MT's rand() with mean %diff from expected to be 72.231 % ./compression_test.pl : Gzip compression test : Perl's rand() with mean %diff from expected to be 72.232 % ./compression_test.pl : best results for Gzip compression test : Perl's rand() with mean %diff from expected to be 72.232 % ./compression_test.pl : end. ./transition_matrix_test.pl : done 2000000 repeats, with random integers between 0-30 inclusive and a lag of 1, here is a summary: ./transition_matrix_test.pl : transition matrix test : Perl's rand() with mean %diff from expected count to be 1.796 % ./transition_matrix_test.pl : transition matrix test : Math::Random::MT's rand() with mean %diff from expected count to be 1.797 % ./transition_matrix_test.pl : best results for transition matrix test : Perl's rand() with mean %diff from expected count to be 1.796 % ./transition_matrix_test.pl : end.