use strict;
use warnings;
use feature 'say';
use Config;
use Benchmark qw/ timeit :hireswallclock /;
use lib '.';
use Max_score_subs ':all';
say "$^V / $Config{ archname } / $Config{ gccversion }";
my $str;
my %tests = (
# C:
c => sub { mscore_c( $str )}, # - dumb
c_better => sub { mscore_c_better( $str )}, # - tweaked
c_omp => sub { mscore_c_omp( $str )}, # - "c", parallel
#
# Fortran:
c2f => sub { mscore_c2f( $str )}, # - "c_better" (in F)
f => sub { mscore_f( $str )}, # - "PDL" (in F)
f_omp => sub { mscore_f_omp( $str )}, # - "f", parallel
);
my $iters = 2e5;
my $fmt = '%8.0f';
for my $L ( 1e4, 1e5, 1e6, 1e7, 1e8 ) {
say "\nString length: " . $L =~ s/(\d)(?=(\d{3})+$)/$1,/gr;
$str = '1' x $L;
substr $str, rand $L, 1 , '0' for 1 .. $L;
my $any;
my %ret;
for my $key ( keys %tests ) {
my $result = $tests{ $key }-> ();
die "<<< $key!!! >>>" unless $result == ( $any //= $result );
my $t = timeit( $iters, $tests{ $key });
$ret{ $key } = $t-> iters / $t-> real
}
print " Rate/s %\n";
$fmt = '%8.1f' if $L > 1e6;
for my $key ( sort { $ret{ $a } <=> $ret{ $b }} keys %ret ) {
printf " %-9s $fmt %5.0f\n",
$key, $ret{ $key }, 100 * $ret{ $key } / $ret{ c }
}
$iters /= 10
}
__END__
v5.42.0 / MSWin32-x64-multi-thread / 13.2.0
String length: 10,000
Rate/s %
f_omp 23804 28
c_omp 25362 29
c 86100 100
c_better 91470 106
c2f 105239 122
f 120134 140
String length: 100,000
Rate/s %
c 8724 100
c_better 9324 107
c2f 10686 122
f 12139 139
c_omp 15369 176
f_omp 15964 183
String length: 1,000,000
Rate/s %
c 875 100
c_better 931 106
c2f 1068 122
f 1213 139
c_omp 2306 264
f_omp 2560 293
String length: 10,000,000
Rate/s %
c 86.8 100
c_better 92.2 106
c2f 104.5 120
f 115.6 133
c_omp 349.6 403
f_omp 426.9 492
String length: 100,000,000
Rate/s %
c 8.6 100
c_better 9.2 106
c2f 10.4 120
f 11.4 131
c_omp 34.7 401
f_omp 42.4 491
####
int mscore_c_omp( SV* str ) {
STRLEN len;
char* buf = SvPVbyte( str, len );
len --;
int nparts = omp_get_num_procs();
if ( nparts > 4 ) nparts = 4;
int psize = len / nparts;
int acc_a[ nparts ];
int cum_a[ nparts ];
int max_a[ nparts ];
#pragma omp parallel for schedule( static, 1 ) \
num_threads( nparts )
for ( int j = 0; j < nparts; j ++ ) {
int lo = j * psize;
int hi = ( j == nparts - 1 ) ? len : lo + psize;
int acc = 0;
int cum = 0;
int max = -1;
for ( int i = lo; i < hi; i ++ ) {
int one = buf[ i ] - '0';
acc += one;
cum += one ? -1 : 1;
if ( cum > max ) max = cum;
}
acc_a[ j ] = acc;
cum_a[ j ] = cum;
max_a[ j ] = max;
}
int acc = acc_a[ 0 ];
int max = max_a[ 0 ];
for ( int j = 1; j < nparts; j ++ ) {
acc += acc_a[ j ];
cum_a[ j ] += cum_a[ j - 1 ];
max_a[ j ] += cum_a[ j - 1 ];
if ( max_a[ j ] > max ) max = max_a[ j ];
}
return acc + max + ( buf[ len ] == '1' );
}
####
subroutine f_omp( n, s, ret )
integer, intent( in ) :: n
integer * 1, intent( in ) :: s( n )
integer, intent( out ) :: ret
integer :: j, nparts, psize
integer, allocatable :: acc_a ( : ), cum_a( : ), min_a( : )
nparts = omp_get_num_procs()
if ( nparts > 4 ) nparts = 4
psize = n / nparts;
allocate( acc_a( nparts ))
allocate( cum_a( nparts ))
allocate( min_a( nparts ))
!$omp parallel do schedule( static, 1 ) num_threads( nparts )
do j = 1, nparts ; block
integer * 1, allocatable :: a( : )
integer, allocatable :: cum( : )
integer :: lo, hi, d, nchunks
integer :: pre, L, i, k
integer :: acc_, cum_, min_
lo = ( j - 1 ) * psize
if ( j == nparts ) then
hi = n - 1
else
hi = j * psize
end if
d = hi - lo
nchunks = d / CHUNK
if ( mod( d, CHUNK ) .ne. 0 ) nchunks = nchunks + 1
acc_ = 0;
cum_ = 0;
min_ = 2;
allocate( a( CHUNK ))
allocate( cum( CHUNK ))
do k = 1, nchunks
pre = lo + ( k - 1 ) * CHUNK
L = min( CHUNK, hi - pre )
a( 1 : L ) = 2 * s( pre + 1 : pre + L ) - A97
cum( 1 ) = cum_ + a( 1 )
do i = 2, L
cum( i ) = cum( i - 1 ) + a( i )
end do
acc_ = acc_ + count( s( pre + 1 : pre + L ) &
== iachar( '1', 1 ))
cum_ = cum( L )
min_ = min( min_, minval( cum( 1 : L )))
end do
acc_a( j ) = acc_
cum_a( j ) = cum_
min_a( j ) = min_
end block ; end do
!$omp end parallel do
do j = 2, nparts
cum_a( j ) = cum_a( j ) + cum_a( j - 1 )
min_a( j ) = min_a( j ) + cum_a( j - 1 )
end do
ret = sum( acc_a ) + ( s( n ) - iachar( '0', 1 )) &
- minval( min_a )
end subroutine f_omp