XS to make n-dimensional data into Perl array-refs etc

While working on the "Intensity Landscape" code, I happened to use unpdl on a (3,72000) ndarray. It took about 45 seconds to run. That's because it uses the dog method, which is fabulously slow for ndarrays with a large top dimension (a known problem: see https://github.com/PDLPorters/pdl/issues/421). I decided I would write an XS version, based on the already-existing listref_c code:

static inline SV *pdl2avref(pdl *x, char flatten) {
  int stop = 0, badflag = (x->state & PDL_BADVAL) > 0;
  volatile PDL_Anyval pdl_val = { PDL_INVALID, {0} }; /* same reason a
+s below */
  volatile PDL_Anyval pdl_badval = { PDL_INVALID, {0} };
  if (badflag) {
    if (!(x->has_badvalue && x->badvalue.type != x->datatype)) {
      if (x->has_badvalue)
        pdl_badval = x->badvalue;
      else {
#define X(datatype, ctype, ppsym, ...) \
         pdl_badval.type = datatype; pdl_badval.value.ppsym = PDL.bval
+s.ppsym;
        PDL_GENERICSWITCH(PDL_TYPELIST_ALL, x->datatype, X, )
#undef X
      }
    }
    if (pdl_badval.type < 0) barf("Error getting badvalue, type=%d", p
+dl_badval.type);
  }
  pdl_barf_if_error(pdl_make_physvaffine( x ));
  if (!x->nvals) return newRV_noinc((SV *)newAV());
  void *data = PDL_REPRP(x);
  PDL_Indx ind, inds[!x->ndims ? 1 : x->ndims];
  AV *avs[(flatten || !x->ndims) ? 1 : x->ndims];
  if (flatten || !x->ndims) {
    inds[0] = 0;
    avs[0] = newAV();
    av_extend(avs[0], flatten ? x->nvals : 1);
    if (flatten) for (ind=1; ind < x->ndims; ind++) inds[ind] = 0;
  } else
    for (ind=x->ndims-1; ind >= 0; ind--) {
      inds[ind] = 0;
      avs[ind] = newAV();
      av_extend(avs[ind], x->dims[ind]);
      if (ind < x->ndims-1) av_store(avs[ind+1], 0, newRV_noinc((SV *)
+avs[ind]));
    }
  PDL_Indx *incs = PDL_REPRINCS(x), offs = PDL_REPROFFS(x), lind = 0;
  while (!stop) {
    pdl_val.type = PDL_INVALID;
    PDL_Indx ioff = pdl_get_offset(inds, x->dims, incs, offs, x->ndims
+);
    if (ioff >= 0)
      ANYVAL_FROM_CTYPE_OFFSET(pdl_val, x->datatype, data, ioff);
    if (pdl_val.type < 0) croak("Position out of range");
    SV *sv;
    if (badflag) {
      /* volatile because gcc optimiser otherwise won't recalc for com
+plex double when long-double code added */
      volatile int isbad = ANYVAL_ISBAD(pdl_val, pdl_badval);
      if (isbad == -1) croak("ANYVAL_ISBAD error on types %d, %d", pdl
+_val.type, pdl_badval.type);
      if (isbad)
        sv = newSVpvn( "BAD", 3 );
      else {
        sv = newSV(0);
        ANYVAL_TO_SV(sv, pdl_val);
      }
    } else {
      sv = newSV(0);
      ANYVAL_TO_SV(sv, pdl_val);
    }
    av_store( avs[0], flatten ? lind++ : inds[0], sv );
    stop = 1;
    char didwrap[x->ndims];
    for (ind = 0; ind < x->ndims; ind++) didwrap[ind] = 0;
    for (ind = 0; ind < x->ndims; ind++) {
      if (++(inds[ind]) < x->dims[ind]) {
        stop = 0; break;
      }
      inds[ind] = 0;
      didwrap[ind] = 1;
    }
    if (stop) break;
    if (flatten) continue;
    for (ind=x->ndims-2; ind >= 0; ind--) { /* never redo outer so -2 
+*/
      if (!didwrap[ind]) continue;
      avs[ind] = newAV();
      av_extend(avs[ind], x->dims[ind]);
      av_store(avs[ind+1], inds[ind+1], newRV_noinc((SV *)avs[ind]));
    }
  }
  return newRV_noinc((SV *)avs[(flatten || !x->ndims) ? 0 : x->ndims-1
+]);
}

# ...
MODULE = PDL::Core     PACKAGE = PDL

SV *
unpdl(x)
  pdl *x
CODE:
  RETVAL = pdl2avref(x, 0);
OUTPUT:
  RETVAL
[download]

The bit I thought was quite neat is the logic to keep making new AVs when it wraps dimensions (the n-dimension walking code was already there, but this was new). While this is quite PDL-specific, the concepts should be applicable for any n-dimensional mapping.

Comment on XS to make n-dimensional data into Perl array-refs etc Select or Download Code