Update: It is -fopenmp that isn't mentioned in any files under the _Inline/build directory, not -lgomp.
Inline::C is ignoring -fopenmp, which causes warnings behind the scene stating the pragma omp is unknown. This can be seen by specifying BUILD_NOISY and CLEAN_AFTER_BUILD. Afterwards, I look inside the _Inline/build directory and was unable to find -fopenmp mentioned anywhere inside the log files. The -lgomp was there though.
use Inline 'C' => Config => BUILD_NOISY => 1; use Inline 'C' => Config => CCFLAGSEX => '-O3 -fopenmp'; use Inline 'C' => config => LIBS => '-lgomp'; use Inline 'C' => <<'END_C', CLEAN_AFTER_BUILD => 0; // C code END_C
Parallel is possible nonetheless and saw many cores computing simultaneously. The serial code takes 12.4 seconds with parallel completing in 3 seconds on a CentOS 7 VM configured with 4 real cores.
#!/usr/bin/env perl use strict; use warnings; use Inline 'C' => Config => CCFLAGSEX => '-O3'; use Inline 'C' => <<'END_C'; #include <stdio.h> #include <stdlib.h> int escapes( double cr, double ci, int it ) { double zr = 0; double zi = 0; double zrtmp; int i; for(i=0; i<it; i++) { // z <- z^2 + c zrtmp = zr*zr - zi*zi + cr; zi = 2*zr*zi + ci; zr = zrtmp; if (zr*zr + zi*zi > 4) { return 1; } } return 0; } SV* mandel( int yc_beg, int yc_end, double xmin, double xmax, int xstep, double ymin, double ymax, int ystep, int iters ) { int yc, len; SV *buf = newSVpvn("", 0); // array of string to store result char *m = (char *) malloc(ystep * (xstep + 1) * sizeof(char)); for(yc = yc_beg; yc <= yc_end; yc++) { double y = yc*(ymax-ymin)/ystep + ymin; int xc; for(xc=0; xc<xstep; xc++) { double x = xc*(xmax-xmin)/xstep + xmin; escapes(x, y, iters); if (escapes(x, y, iters)) { m[yc * (xstep + 1) + xc] = ' '; } else { m[yc * (xstep + 1) + xc] = 'X'; } } // add end of string m[yc * (xstep+1) + xstep] = '\0'; } for(yc=yc_beg; yc<=yc_end; yc++) { sv_catpv(buf, (char *) &m[yc * (xstep+1)]); sv_catpv(buf, (char *) "\n"); } free(m); return sv_2mortal(buf); } END_C use MCE::Flow; use MCE::Candy; MCE::Flow::init( bounds_only => 1, max_workers => $ENV{'MCE_NUM_THREADS'} || 'auto', gather => MCE::Candy::out_iter_fh(\*STDOUT) ); mce_flow_s sub { my ($mce, $sequence_ref, $chunk_id) = @_; my ($yc_beg, $yc_end) = @$sequence_ref; my $buf = mandel($yc_beg, $yc_end, -2.0, 1.0, 256, -1.0, 1.0, 256, 1 +00000); MCE->gather($chunk_id, $buf); }, 0, 255;
Output order is required and handled by MCE::Candy::out_iter_fh. The output files are identical between C + OpenMP and Inline::C + MCE.
Sincerely, Mario
In reply to Re: Why openMP is not taken into account by XS and inline::C
by marioroy
in thread Why openMP is not taken into account by XS and inline::C
by jmricher70
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |