creamygoodness has asked for the wisdom of the Perl Monks concerning the following question:

Greets,

I'm having some trouble manipulating refcounts at the C level to prevent an object's DESTROY method from being called. What I'd like to do is increment an object's reference count from C, do a lot of processing which may involve several trips across the Perl/C boundary, then decrement the object's reference count from C, triggering destruction.

Here's a script which illustrates the problem using our old favorite, the CD metaphor:

#!/usr/bin/perl use strict; use warnings; use Inline C => <<'END_C'; typedef struct Artist { char *name; SV *perlobj; } Artist; typedef struct CD { Artist *artist; /* ... */ } CD; /* Constructor for Artist. */ SV* new_artist(char *name) { SV *perlobj, *perlref; Artist *artist; /* allocate and initialize Artist struct */ New(0, artist, 1, Artist); artist->name = savepv(name); /* create perl object and ref; store pointer to object in struct * +/ perlref = newSV(0); artist->perlobj = sv_setref_pv(perlref, "Artist", (void*)artist); return perlref; } /* Constructor for CD. */ SV* new_cd(SV *artist_sv) { CD *cd; Artist *artist = INT2PTR( Artist*, SvIV(SvRV(artist_sv)) ); SV *cd_ref; /* allocate and initialize CD struct */ New(0, cd, 1, CD); cd->artist = artist; /* WHY DOESN'T THIS SAVE THE ARTIST FROM DESTRUCTION? */ SvREFCNT_inc(artist->perlobj); cd_ref = newSV(0); sv_setref_pv(cd_ref, "CD", (void*)cd); return cd_ref; } /* getter */ SV* get_name_from_artist(SV *artist_sv) { Artist *artist = INT2PTR( Artist*, SvIV(SvRV(artist_sv)) ); return newSVpv(artist->name, 0); } /* getter */ SV* get_artist_name_from_cd(SV *cd_sv) { CD *cd = INT2PTR( CD*, SvIV(SvRV(cd_sv)) ); return newSVpv(cd->artist->name, 0); } /* Destructor for Artist objects */ void destroy_artist(SV *artist_sv) { Artist *artist = INT2PTR( Artist*, SvIV(SvRV(artist_sv)) ); Safefree(artist->name); Safefree(artist); } /* Destructor for CD objects */ void destroy_cd(SV *cd_sv) { CD *cd = INT2PTR( CD*, SvIV(SvRV(cd_sv)) ); SvREFCNT_dec(cd->artist->perlobj); Safefree(cd); } END_C package Artist; use strict; use warnings; use Devel::Peek qw( Dump ); sub DESTROY { my $artist = shift; my $artist_name = main::get_name_from_artist($artist); print STDERR "Artist::DESTROY called.\n" . "$artist_name commits suicide -- but why?\n" . "Somebody still loves you! You have a refcount of 2:\n"; Dump($artist); main::destroy_artist($artist); } package CD; use strict; use warnings; sub DESTROY { my $cd = shift; print STDERR "CD::DESTROY called.\n"; main::destroy_cd($cd); } package main; use strict; use warnings; # Create an Artist object. my $artist = new_artist("Johnny Houseburner"); # Save a copy of the Artist object in a CD object. my $johnny_houseburner_cd = new_cd($artist); # Displace the old Artist object in Perl-space. No destruction should + occur, # because the CD object holds a reference. But that doesn't work -- D +ESTROY # gets called. $artist = new_artist("FunThree"); my $funthree_cd = new_cd($artist); # access invalid memory. my $whats_his_name = get_artist_name_from_cd($johnny_houseburner_cd); print STDERR "What was that guy's name again? $whats_his_name?\n";

And here's its output:

$ perl cd_artist.plx Artist::DESTROY called. Johnny Houseburner commits suicide -- but why? Somebody still loves you! You have a refcount of 2: SV = RV(0x182186c) at 0x1856bf8 REFCNT = 1 FLAGS = (PADBUSY,PADMY,ROK) RV = 0x1801380 SV = PVMG(0x1822080) at 0x1801380 REFCNT = 2 FLAGS = (OBJECT,IOK,pIOK) IV = 3880080 NV = 0 PV = 0 STASH = 0x180b56c "Artist" What was that guy's name again? yAGS? CD::DESTROY called. CD::DESTROY called. Bus error $

As soon as the last Perl-space reference to the Artist object goes away, Artist::DESTROY gets called, despite the fact that the reference count of the object is 2. If things were working properly, Artist::DESTROY wouldn't get called until after CD::DESTROY gets called.

We can forestall the destruction by making a copy in Perl:

my $copy = $artist; $artist = new_artist("FunThree");
... and in fact, a directly analogous maneuver of creating a new perl scalar reference from within C succeeds as well...
typedef struct CD { Artist *artist; + SV *artist_ref; /* ... */ } CD; @@ -43,7 +44,7 @@ cd->artist = artist; /* WHY DOESN'T THIS SAVE THE ARTIST FROM DESTRUCTION? */ - SvREFCNT_inc(artist->perlobj); + cd->artist_ref = newSVsv(artist_sv); cd_ref = newSV(0); sv_setref_pv(cd_ref, "CD", (void*)cd); @@ -77,7 +78,7 @@ void destroy_cd(SV *cd_sv) { CD *cd = INT2PTR( CD*, SvIV(SvRV(cd_sv)) ); - SvREFCNT_dec(cd->artist->perlobj); + SvREFCNT_dec(cd->artist_ref); Safefree(cd); }

What I don't understand, though, is how creating a new reference via newSVsv differs materially from simply incrementing the reference count of the target object. And while it's possible to add a foo_ref member for each foo member, that's messy and verbose.

/* Yecch. */ typedef struct BloatedObject { Foo *foo; SV *foo_ref; Bar *bar; SV *bar_ref; } BloatedObject; void destroy_bloated_object(BloatedObject *obj) { SvREFCNT_dec(obj->foo_ref); SvREFCNT_dec(obj->bar_ref); Safefree(obj); } /* Better. */ typedef struct LeanObject { Foo *foo; Bar *bar; } LeanObject; void destroy_lean_object(LeanObject *obj) { SvREFCNT_dec(obj->foo->perlobj); SvREFCNT_dec(obj->bar->perlobj); Safefree(obj); }

Suggestions?

--
Marvin Humphrey
Rectangular Research ― http://www.rectangular.com

Replies are listed 'Best First'.
Re: XS: Manipulating refcounts
by mugwumpjism (Hermit) on Sep 19, 2006 at 06:00 UTC

    You're already doing all the right things - looking at the objects with Devel::Peek, storing a blessed IV, etc. Nice work. This is similar to a module I maintain, Set::Object.

    I think all that's happening is that at global destruction time, structures aren't cleaned up in the normal way. After all, you still want destructors for objects in circular references to be called. Instead Perl just calls destructors for all objects, and it wouldn't surprise me if it did that without dropping refcnt values first.

    Perhaps try changing your test program to trigger the objects falling out of scope before the end of the program is reached.

    I also remember having to use the sv2mortal range of macros, perhaps have a look at those too if the above doesn't help.

    $h=$ENV{HOME};my@q=split/\n\n/,`cat $h/.quotes`;$s="$h/." ."signature";$t=`cat $s`;print$t,"\n",$q[rand($#q)],"\n";

      Thanks for the kind words, and for picking up the gauntlet, Sam. (I presume you're Sam Vilain, since that's who's released the last several versions of Set::Object.)

      I think all that's happening is that at global destruction time, structures aren't cleaned up in the normal way. After all, you still want destructors for objects in circular references to be called. Instead Perl just calls destructors for all objects, and it wouldn't surprise me if it did that without dropping refcnt values first. /

      I'd be cheesed if we were getting all the way through to global destruction! The problem is that Artist::DESTROY is getting called earlier -- specifically, when the $artist scalar is assigned a new value, obliterating the last perl-space reference.

      (Your cautionary advice about order of object destruction at globo-destruct-time is taken under advisement nonetheless. If you really need cleanup to proceed according to a strict itinerary, explicit methods are the way to go.)

      I also remember having to use the sv2mortal range of macros, perhaps have a look at those too if the above doesn't help.

      FWIW, using Inline C means using sv_2mortal() lots. :) Anytime you return an SV* from an Inline C function, it creates an XS wrapper that calls sv_2mortal() on your return value. So it better have a reference count of at least one or Perl will attempt a double free! For illustration, here's the wrapper generated by Inline::C for get_name_from_artist...

      XS(XS_main_get_name_from_artist) { dXSARGS; if (items != 1) Perl_croak(aTHX_ "Usage: main::get_name_from_artist(artist_sv) +"); PERL_UNUSED_VAR(cv); /* -W */ { SV * artist_sv = ST(0); SV * RETVAL; RETVAL = get_name_from_artist(artist_sv); ST(0) = RETVAL; sv_2mortal(ST(0)); } XSRETURN(1); }
      If I call sv_2mortal() on a return val from a function which is currently returning an SV with a refcount of 1, Perl tries the double free -- bad!

      I think my problem arises from conceptual confusion as to which SV* Perl considers the "object"... Despite the fact that in the Devel::Peek Dump, the inner SV* is labeled an OBJECT and has the package attached, it seems that Perl considers that outer SV* the object, and so that's whose refcount has to be incremented. The question is how to do that without creating a circular ref.

      --
      Marvin Humphrey
      Rectangular Research ― http://www.rectangular.com
        Anytime you return an SV* from an Inline C function, it creates an XS wrapper that calls sv_2mortal() on your return value.

        I don't think this has anything to do with Inline::C. I think it's something that xsubpp does in creating the C file from the XS file. If I'm not mistaken, you'll get exactly the same thing in a normal XS environment (ie even of you're not using Inline::C).

        Cheers,
        Rob
Re: XS: Manipulating refcounts
by demerphq (Chancellor) on Sep 19, 2006 at 09:38 UTC

    I dont have time to look into this deeper, but i thought i might point you at sv_dump(), the (simplified) internals version of Devel::Peek::Dump() that can be called directly from your XS code. This may make it easier to see what is going on. I know that I have used it to work out refcount issues in XS/internals code before.

    ---
    $world=~s/war/peace/g

      Thanks, demerphq. I've been using sv_dump, which is handy, though it is frustrating that it only shows one level. For instance, it would only show the reference, and not the object (caveat: I'm not sure I haven't got those mixed up -- see my reply to mugwumpjism):

      SV = RV(0x182186c) at 0x1856bf8 REFCNT = 1 FLAGS = (PADBUSY,PADMY,ROK) RV = 0x1801380 SV = PVMG(0x1822080) at 0x1801380 /* instead of... */ SV = RV(0x182186c) at 0x1856bf8 REFCNT = 1 FLAGS = (PADBUSY,PADMY,ROK) RV = 0x1801380 SV = PVMG(0x1822080) at 0x1801380 REFCNT = 2 FLAGS = (OBJECT,IOK,pIOK) IV = 3880080 NV = 0 PV = 0 STASH = 0x180b56c "Artist"

      To find out how to make multiple levels appear, I peeked inside ext/Devel/Peek/Peek.xs and saw that it was implemented using the private function do_sv_dump. Obviously, do_sv_dump doesn't belong in production code, but this is just debugging and it's even handier than sv_dump if you're motivated enough to memorize its 7-argument interface.

      --
      Marvin Humphrey
      Rectangular Research ― http://www.rectangular.com
Re: XS: Manipulating refcounts
by Animator (Hermit) on Sep 20, 2006 at 06:34 UTC

    First of all note that I do not have much experience with C and/or XS.

    So what I'm saying might make sense or not.

    What you are doing is a bit dangerous... (Or atleast I think so).

    You are creating an Artist and then creating a referenceto that artist (which s not a perl structure). So what you are doing is saving the pointer of *artist in $$artist.

    This can easily go wrong... For example: $$artist = 0; or $$artitst--;. If you then call your C code then you will - most likely - get a segmentation fault because you cannot access that memory space...

    I think a better way to do this would be to store the actual Artist object in an array or so in the C code and return an SV/RV with the index in the array.

    So far for the artists, for the CDs I think it would be easier to store the SV (after incrementing it's reference count ofcourse), and not the Artist...

    This would also eliminate the need to store the RV/SV in the Artist object...

    At the moment I haven't played enough with the code to give you a full (and working) example of what I mean. (so consider this as some initial feedback - some of which might be incorrect)... I will try to do this later today

      I played a bit with it...

      This is the only thing I can come up with:

      • sv_setref_pv() upgrades perlref to an RV and returns it,
      • you then store this SV in artist->perlobj,
      • you return the SV. Because of some XS magic (I think) a new SV is created with the contents of the perlref SV,
      • then later in your code you increase the reference counter of the original SV. (perlref - but this is not the same SV as $artist, and even if it were then it would still be wrong since you should be incrementing the reference counter of $$artist).

      What you should be doing instead (I think) is store the thing it refences to in perlobj and increase the counter of that.
      That is: use:

      sv_setref_pv(perlref, "Artist", (void*)artist); artist->perlobj = SvRV(perlref);
      instead of:
      artist->perlobj = sv_setref_pv(perlref, "Artist", (void*)artist);

      The attached code does that and is (IMHO) a bit easier to understand...

      The output of the code:

      Creating artist: Johnny Houseburner Refcount: $artist: 1 Refcount: $$artist: 1 Creating CD (artist = Johnny Houseburner) Refcount: $artist: 1 Refcount: $$artist: 2 Refcount: $cd: 1 Refcount: $$cd: 1 undef $artist Artist name via cd: Johnny Houseburner undef $cd CD::DESTROY called. Artist::DESTROY called (Johnny Houseburner) End block End code

      (Small note: this code is still easy to segfault: all you need to do is change $$artist.)

        Beautifully done, Animator. Your rewrite establishes that what I'd hoped to do is possible. It's clearer IMHO as well. :)

        My crucial error was misunderstanding the return value of sv_setref_pv. I'd thought it returned the object rather than the reference. Once artist->perlobj is assigned the correct value, everything falls into place, as you show. Now I can go implement this algo. Brilliant!

        you return the SV. Because of some XS magic (I think) a new SV is created with the contents of the perlref SV,

        Here's how things progress behind the scenes as control leaves new_artist and we enter XS_main_new_artist, the glue function generated by Inline::C/xsubpp:

        XS(XS_main_new_artist) { dXSARGS; if (items != 1) Perl_croak(aTHX_ "Usage: main::new_artist(name)"); PERL_UNUSED_VAR(cv); /* -W */ { char * name = (char *)SvPV_nolen(ST(0)); SV * RETVAL; RETVAL = new_artist(name); ST(0) = RETVAL; sv_2mortal(ST(0)); } XSRETURN(1); }
        1. The return value from new_artist is copied to RETVAL, which is copied in turn to ST(0), the top of the perl stack.
        2. ST(0) is "mortalized" by calling sv_2mortal( ST(0) ). That's essentially a delayed decrement of its refcount. It won't be freed right away as would be the case if you'd called SvREFCNT_dec -- instead, the cleanup will occur a bit later, after we've had the chance to do something with it.
        3. XSRETURN(1) is invoked, indicating that a single value is being returned via the stack, and control leaves XS land, heading back into world of stock Perl OPs.
        4. The Perl assignment operator = pops the top item off the stack and triggers a call to SvSetMagicSV, and the value of the popped SV is copied into the lexical variable $artist. Now the SV that we created back in new_artist can be freed. (Note that since the Perl interpreter can often tell when an SV is about to be freed, it will probably "steal" the contents and avoid doing unneeded memory allocation and copying).

        Now, consider that in my original program, the mortalized SV on the stack is the same SV that's housed in the Artist struct. I don't fully understand how reference counts affect things in such cases, but that was definitely not what I intended. It does not surprise me that undesirable consequences arose.

        Thanks for your help,

        --
        Marvin Humphrey
        Rectangular Research ― http://www.rectangular.com

      Whatever your level with experience with C/XS I think you've tracked down the critical bug. :) More on that in a bit.

      You're absolutely right that this data structure should use SVs and not C structs. The very idea of using C structs when an ordinary hash would do is ridiculous! But then it wouldn't illustrate the problem I'm trying to solve.

      This technique is being developed for use with Apache Lucy, which will be a large C library with both Perl and Ruby bindings. The "object" member of the struct will actually be a void*, and it could be a Perl object, or a Ruby object, or eventually, something else. (Actually, I don't know exactly whether or how it will work with Ruby -- hopefully it will. :) ) All the Perl-specific C routines need to stay in the XS bindings and cannot be used in the C core. Dereferencing an actual SV and getting at its contents is not an option. However, we need some way of working with native garbage collection, and this is it.

      With regards to whether or not storing a pointer is dangerous... it's common practice, even in core modules. You can make DB_File segfault by decrementing the pointer in the tied object! So don't do that. :)

      #!/usr/bin/perl use strict; use warnings; use DB_File; my %hash; my $tied = tie %hash, 'DB_File', "garbagefile"; $hash{foo} = "bar"; print STDERR $tied->FETCH('foo') . "\n"; $$tied--; print STDERR $tied->FETCH('foo') . "\n"; __END__ Outputs: slothbear:~/perltest marvin$ perl ptrobj_segfault.plx bar Bus error slothbear:~/perltest marvin$
      --
      Marvin Humphrey
      Rectangular Research ― http://www.rectangular.com

        Ofcourse there are (CORE) modules doing it... But it's not that because they do it you should do it too... And in my opinion the tied hash from DB_File and your scalar are two completly differnt things...

        The way to make DB_File segfault is to get the tied object and then change it.

        The way to make your code segfault is to either change the thing it references to or pass the wrong thing to one of your other routines.

        For example:

        my $artist2; my $cd = new_cd($artist2);
        This code will segfault. Sure in this example it is (or atleast should be) obvious why it segfaults... But if you are passing $artist to several subroutines and maybe forgetting it/passing the wrong thing once then it is not that obvious...

        To prevent $$artist from being changed you could use: SvREADONLY_on(SvRV(perlref));.

        But this still leaves the other issue... As in, passing the wrong variable/undef to new_cd(); (for example)...

        I guess it comes down to:

        • Who will be using it?
        • How familiar are they with segfaults?
        • Do they expect to see a segfault in perl?
        • Will they think about your code as causing the segfault? (I guess that comes down to: how well is it documented)
        • Do you really care about the memory that an extra list would use?

        Update: removed accidental link.