#!/usr/bin/perl use strict; use warnings; use Benchmark qw/cmpthese/; use PDL::LiteF; my @number_of_arrays = qw(5 15 30); my @size_of_arrays = qw(5 15 30); my $iterations = 50000; my $max_integer = 100; benchmark_it( \@number_of_arrays, \@size_of_arrays, $max_integer ); #---------------- sub benchmark_it { my $number_of_arrays = shift; my $size_of_arrays = shift; my $max_random_integer = shift; for my $number ( @{$number_of_arrays} ) { for my $size ( @{size_of_arrays} ) { my $data = build_random_array( $number, $size, $max_random_integer ); my $pdldata = pdl $data; print "Results when number of arrays is $number and size of each array is $size:\n"; cmpthese( $iterations, { 'Array-based' => sub { using_array($data) }, 'PDL-based' => sub { using_pdl($pdldata) }, 'Map-based' => sub { using_map($data) }, } ); print "\n"; } } } sub using_array { my $data = shift; my @sums; my $last_row_index = scalar @{$data} - 1; my $last_column_index = scalar @{ $data->[0] } - 1; for my $i ( 0 .. $last_row_index - 1 ) { for my $j ( 0 .. $last_column_index ) { $sums[$j] += $data->[$i][$j]; } # Hard-coded indices run faster. # $sums[0] += $data[$i][0]; # $sums[1] += $data[$i][1]; # $sums[2] += $data[$i][2]; # $sums[3] += $data[$i][3]; } $sums[$_] /= ( $last_row_index + 1 ) for 0 .. $last_column_index; return @sums; } sub using_map { my $data = shift; my $range_max = scalar @{ $data->[0] } - 1; my @sums; map { for my $j ( 0 .. $range_max ) { $sums[$j] += $_->[$j]; } } @{$data}; return \@sums; } sub using_pdl { my $pdldata = shift; $pdldata /= $pdldata->getdim(1); return $pdldata->transpose->sumover; } sub build_random_array { my $number_of_arrays = shift || 10; my $size_of_arrays = shift || 10; my $max_integer = shift || 100; my $data; foreach my $i ( 1 .. $number_of_arrays ) { my @random_array; push @random_array, int rand( $max_integer + 1 ) for ( 1 .. $size_of_arrays ); push @{$data}, \@random_array; } return $data; } __END__ =head1 Synopsis Compare PDL to more conventional methods of finding the average of the column vectors in a 2D matrix. =head1 Results My results on December 27, 2008 Results when number of arrays is 5 and size of each array is 5: Rate PDL-based Array-based Map-based PDL-based 42017/s -- -50% -57% Array-based 84746/s 102% -- -14% Map-based 98039/s 133% 16% -- Results when number of arrays is 30 and size of each array is 30: Rate Array-based Map-based PDL-based Array-based 3987/s -- -17% -89% Map-based 4808/s 21% -- -86% PDL-based 35461/s 789% 638% -- =head1 Notes Note when the matrix is small, 5x5, PDL is slower but as the size of the matrix grows, PDL becomes smokin hot from it's speed. It's nice to see the recent development activity with PDL. =cut