skateamac has asked for the wisdom of the Perl Monks concerning the following question:

Hello Monks,

Thank you in advance for your help.
We have an Inline C sub called percent2 that returns the percentiles desired, given a ref to an array of doubles.

($min,$max) = utils::stats::percent2(\@data,0,100);

We are using a temp AV* so that we can sort the data without reordering @data.
The following code was causing a memory leak when called 1000s of times from a 12+ hr-long script:
use Inline C => <<'END_OF_PERCENT2_C_CODE'; #define SvSIOK(sv) ((SvFLAGS(sv) & (SVf_IOK|SVf_IVisUV)) == SVf_IOK) #define SvNSIV(sv) (SvNOK(sv) ? SvNVX(sv) : (SvSIOK(sv) ? SvIVX(sv) : +sv_2nv(sv))) static I32 S_sv_ncmp(pTHX_ SV *a, SV *b) { const NV nv1 = SvNSIV(a); const NV nv2 = SvNSIV(b); return nv1 < nv2 ? -1 : nv1 > nv2 ? 1 : 0; } void percent2(SV* sv, ...) { I32 i; I32 arrayLen; I32 remove_count; AV* data; AV* data_tmp; float value; int index; SV** pvalue; double retval; AV* ret; ret = newAV(); data_tmp = newAV(); Inline_Stack_Vars; data = SvRV(Inline_Stack_Item(0)); if( SvTYPE(data) != SVt_PVAV ) { return; } /* determine the length of the array */ arrayLen = av_len(data) + 1; if (arrayLen > 0) { /* use new tmp array to prevent reordering caller array*/ for (i = 0; i < arrayLen; i++) { pvalue = av_fetch(data,i,0); av_push(data_tmp,*pvalue); } /* sort array in ascending order */ sortsv(AvARRAY(data_tmp),arrayLen, S_sv_ncmp); /* loop through data array and delete undef entries */ remove_count = 0; for (i = 0; i < arrayLen; i++) { /* Fetch the scalar located at i from the array.*/ pvalue = av_fetch(data_tmp,i,0); if (!SvOK(*pvalue)) { remove_count++; } else { break; } } for (i = 0; i < remove_count; i++) { av_shift(data_tmp); } arrayLen -= remove_count; } if (arrayLen > 0) { /* loop through percent args and find given value in array */ for (i = 1; i < Inline_Stack_Items; i++) { value = SvNV(Inline_Stack_Item(i)); if (value <= 100 && value >= 0){ if (value == 100){ value -= 1e-13; } index = (int)((arrayLen-1) * value/100); /* fetch scalar located at calculated index*/ pvalue = av_fetch(data_tmp,index,0); av_push(ret,*pvalue); } } } /* push into return stack */ Inline_Stack_Reset; arrayLen = av_len(ret) + 1; if (arrayLen > 0) { for (i = 0; i < arrayLen; i++) { /* fetch the scalar located at i from the array.*/ pvalue = av_fetch(ret,i,0); /* dereference the scalar into a numeric value. */ retval = SvNV(*pvalue); Inline_Stack_Push(newSVnv(retval)); } } else { Inline_Stack_Push(newSVnv(0)); } Inline_Stack_Done; } END_OF_PERCENT2_C_CODE

I fixed the memory leak by creating and pushing new SVs into data_tmp and ret. And then using av_undef.
This memory leak fix is slightly slower because of the newSVnv lines.

Updated code snippets:
/* use new tmp array to prevent reordering caller array*/ for (i = 0; i < arrayLen; i++) { pvalue = av_fetch(data,i,0); /* get double, create new SV, and push it into data_tmp array */ retval = SvNV(*pvalue); av_push(data_tmp,newSVnv(retval)); } ... /* fetch scalar located at calculated index*/ pvalue = av_fetch(data_tmp,index,0); /* get double, create new SV, and push it into return array */ retval = SvNV(*pvalue); av_push(ret,newSVnv(retval)); ... Inline_Stack_Done; /* free memory */ av_undef(data_tmp); av_undef(ret);
My questions are:

1) In the original code with the mem leak, when I tried only adding "av_undef(data_tmp)" to the very end, it erased @data
and I got the error "Attempt to free unreferenced scalar".
Why? Can you explain this?
I was expecting the reference count for each value in @array to decrease from 2 to 1...not go to 0

2) Is there a more efficient way to do this without altering the order of @data?

Thanks!

Replies are listed 'Best First'.
Re: Inline C memory leak
by BrowserUk (Patriarch) on Oct 31, 2014 at 00:27 UTC

    The main problem is that you aren't (in your original version; didn't look at your update) freeing the temporary arrays. It is not enough to just av_undef() them, you also have to dec the AV reference counts.

    Also, when you copy the original array, you are copying the SVs into the temp array, but not duping the SVs themselves, thus when you av_undef() the temp arrays, you are discarding the SVs that still have pointers in the original array. This may do more duplication than is strictly necessary; but as is it runs clean:

    #! perl -slw use strict; use Inline C => <<'END_OF_PERCENT2_C_CODE'; #define SvSIOK(sv) ((SvFLAGS(sv) & (SVf_IOK|SVf_IVisUV)) == SVf_IOK) #define SvNSIV(sv) (SvNOK(sv) ? SvNVX(sv) : (SvSIOK(sv) ? SvIVX(sv) : +sv_2nv(sv))) static I32 S_sv_ncmp(pTHX_ SV *a, SV *b) { const NV nv1 = SvNSIV(a); const NV nv2 = SvNSIV(b); return nv1 < nv2 ? -1 : nv1 > nv2 ? 1 : 0; } void percent2(SV* sv, ...) { Inline_Stack_Vars; I32 i; I32 arrayLen; I32 remove_count; AV* data; AV* data_tmp; double value; int index; SV** pvalue; double retval; AV* ret; ret = newAV(); data_tmp = newAV(); data = (AV*)SvRV(Inline_Stack_Item(0)); if( SvTYPE(data) != SVt_PVAV ) { return; } /* determine the length of the array */ arrayLen = av_len(data) + 1; if (arrayLen > 0) { /* use new tmp array to prevent reordering caller array*/ for (i = 0; i < arrayLen; i++) { pvalue = av_fetch(data,i,0); av_push( data_tmp, newSVsv( *pvalue ) ); // Copy the SVs + so that you don't destroy the original array when freeing the temp } /* sort array in ascending order */ sortsv(AvARRAY(data_tmp),arrayLen, S_sv_ncmp); /* loop through data array and delete undef entries */ remove_count = 0; for (i = 0; i < arrayLen; i++) { /* Fetch the scalar located at i from the array.*/ pvalue = av_fetch(data_tmp,i,0); if (!SvOK(*pvalue)) { remove_count++; } else { break; } } for (i = 0; i < remove_count; i++) { av_shift(data_tmp); } arrayLen -= remove_count; } if (arrayLen > 0) { /* loop through percent args and find given value in array */ for (i = 1; i < Inline_Stack_Items; i++) { value = SvNV(Inline_Stack_Item(i)); if (value <= 100 && value >= 0){ if (value == 100){ value -= 1e-13; } index = (int)((arrayLen-1) * value/100); /* fetch scalar located at calculated index*/ pvalue = av_fetch(data_tmp,index,0); av_push( ret, newSVsv( *pvalue ) ); /// ditto } } } /* push into return stack */ Inline_Stack_Reset; arrayLen = av_len(ret) + 1; if (arrayLen > 0) { for (i = 0; i < arrayLen; i++) { /* fetch the scalar located at i from the array.*/ pvalue = av_fetch(ret,i,0); /* dereference the scalar into a numeric value. */ retval = SvNV(*pvalue); Inline_Stack_Push( sv_2mortal( newSVnv( retval ) ) ); // M +ortalise the return values } } else { Inline_Stack_Push( sv_2mortal( newSVnv(0) ) ); // Mortalise +the return values } Inline_Stack_Done; av_undef( ret ); // free array memory SvREFCNT_dec( ret ); // free the AV itself av_undef( data_tmp ); // ditto SvREFCNT_dec( data_tmp ); } END_OF_PERCENT2_C_CODE my @data = map int( rand 100 ), 1 .. 100;; print join '|', percent2( \@data, 1, 50 ) for 1 .. 1e9;;

    With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'
    Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
    "Science is about questioning the status quo. Questioning authority".
    In the absence of evidence, opinion is indistinguishable from prejudice.
Re: Inline C memory leak
by davido (Cardinal) on Oct 30, 2014 at 22:02 UTC

    I apologize for not having enough time at the moment to sift through the code to spot the error. If an answer hasn't shown up in a few hours I might be able to later (update: A few hours later, still not finding time / motivation for sifting through guts/xs. Try #xs on irc.perl.org).

    However, I do feel that looking in the right place is important. To that end, it's worth mentioning that it is virtually impossible for this to be an Inline::C issue. Inline::C and Inline really are just automation layers. It's a matter of how your code is dealing with perl(guts|xs|call|api), not of how Inline is automating the XS build process.


    Dave