in reply to Compile and possibly run cuda code on the GPU via Perl's Inline::C

The following code demonstrates how to pass parameters to the function (do_saxpy()) living in Inline::C and how to get back the results. Both as arrayrefs. (see also: Inline::C::Cookbook)

Edit: caveat: return NULL does not translate back to undef in perl-space. Is there an AV-equaivalent for &PL_sv_undef?

#!/usr/bin/perl # by bliako @ PerlMonks.org # date: 01-Jul-2021 # see https://perlmonks.org/?node_id=11134582 # lame example for utilising GPGPU via Inline::C # TODO: extend to taking params and returning back results use strict; use warnings; use FindBin; use Inline C => Config => cc => $FindBin::Bin.'/nvcc-compile.pl', ld => $FindBin::Bin.'/nvcc-link.pl', ; use Inline C => <<'EOC'; // from https://developer.nvidia.com/blog/easy-introduction-cuda-c-and +-c/ // NOTE: don't use main(void), use main()!!! #include <stdio.h> AV *do_saxpy(int N, SV *_x, SV *_y); int array_numelts(SV *array); __global__ void saxpy(int n, double a, double *x, double *y) { int i = blockIdx.x*blockDim.x + threadIdx.x; if (i < n) y[i] = a*x[i] + y[i]; } int array_numelts(SV *array){ int numelts; if( (!SvROK(array)) || (SvTYPE(SvRV(array)) != SVt_PVAV) || ((numelts = av_len((AV *)SvRV(array))) < 0) ) return -1; return numelts; } /* returns an arrayref of results */ AV* do_saxpy( int N, SV *_x, SV *_y ) { double *x, *y, *d_x, *d_y; int nX, nY, i; AV *ret = newAV(); sv_2mortal((SV*)ret); if( N <= 0 ){ fprintf(stderr, "error, N must be positive.\n"); retur +n NULL; } if( ((nX=array_numelts(_x))<0) ||((nY=array_numelts(_y))<0) ){ fprintf(stderr, "err\n"); return NULL; } x = (double*)malloc(N*sizeof(double)); y = (double*)malloc(N*sizeof(double)); cudaMalloc(&d_x, N*sizeof(double)); cudaMalloc(&d_y, N*sizeof(double)); AV *deref_x = (AV *)SvRV(_x), *deref_y = (AV *)SvRV(_y); SV **dummy; for(i=0;i<N;i++){ dummy = av_fetch(deref_x, i, 0); x[i] = SvNV(*dummy); dummy = av_fetch(deref_y, i, 0); y[i] = SvNV(*dummy); printf("do_saxpy() : got in x[%d]=%lf and y[%d]=%lf\n", i, x[i], i +, y[i]); } cudaMemcpy(d_x, x, N*sizeof(double), cudaMemcpyHostToDevice); cudaMemcpy(d_y, y, N*sizeof(double), cudaMemcpyHostToDevice); // Perform SAXPY on 1M elements saxpy<<<(N+255)/256, 256>>>(N, 2.0f, d_x, d_y); // this copies data from GPU (dy) onto CPU memory, we use y because // it's just sitting there and no longer needed cudaMemcpy(y, d_y, N*sizeof(double), cudaMemcpyDeviceToHost); /* add some rubbish to return back as array ref */ for(i=0;i<N;i++){ av_push(ret, newSVnv(y[i])); } double maxError = 0.0f; for(i=0;i<N;i++){ maxError = max(maxError, abs(y[i]-4.0f)); } printf("do_saxpy() : Max error: %f\n", maxError); cudaFree(d_x); cudaFree(d_y); free(x); free(y); return ret; } EOC my $N = 100; #1<<20; my @x = map { rand() } 1..$N; my @y = map { rand() } 1..$N; my $err = do_saxpy($N, \@x, \@y); if( ! defined $err ){ print STDERR "$0 : error, call to do_saxpy() has + failed.\n"; exit(1); } printf "$0 : back to perl-code ...\n"; print "$0 : (perl-code) : got back result :\n".join("\n", @$err)."\n";

bw, bliako

Replies are listed 'Best First'.
Re^2: Compile and run cuda code on the GPU via Perl's Inline::C - passing parameters
by etj (Priest) on Aug 07, 2024 at 16:46 UTC
    My answer to the implied question of what to return on error is don't return NULL, instead return the empty list you just created. That is, change return NULL to return ret in both places.

    But a more Perl-idiomatic way might be to do a croak, since your API doesn't allow a return value indicating error. A way to do that might be to return an array-ref on success, and undef on failure. That would also avoid excess copying of data to and from the stack.