in reply to Re^4: Does "preallocating hash improve performance"? Or "using a hash slice"?
in thread Does "preallocating hash improve performance"? Or "using a hash slice"?

I hope perl is not so inefficient that it copies all the content of a list when it needs to process it,

Hoping doesn't change anything; I described what the code (perl sources) actually do; it is your choice whether to believe me or not.

You could always check for yourself:

PP(pp_aassign) { dVAR; dSP; SV **lastlelem = PL_stack_sp; SV **lastrelem = PL_stack_base + POPMARK; SV **firstrelem = PL_stack_base + POPMARK + 1; SV **firstlelem = lastrelem + 1; SV **relem; SV **lelem; SV *sv; AV *ary; I32 gimme; HV *hash; I32 i; int magic; U32 lval = 0; PL_delaymagic = DM_DELAY; /* catch simultaneous items */ gimme = GIMME_V; if (gimme == G_ARRAY) lval = PL_op->op_flags & OPf_MOD || LVRET; /* If there's a common identifier on both sides we have to take * special care that assigning the identifier on the left doesn't * clobber a value on the right that's used later in the list. * Don't bother if LHS is just an empty hash or array. */ if ( (PL_op->op_private & OPpASSIGN_COMMON) && ( firstlelem != lastlelem || ! ((sv = *firstlelem)) || SvMAGICAL(sv) || ! (SvTYPE(sv) == SVt_PVAV || SvTYPE(sv) == SVt_PVHV) || (SvTYPE(sv) == SVt_PVAV && AvFILL((AV*)sv) != -1) || (SvTYPE(sv) == SVt_PVHV && HvUSEDKEYS((HV*)sv) != 0) ) ) { EXTEND_MORTAL(lastrelem - firstrelem + 1); for (relem = firstrelem; relem <= lastrelem; relem++) { if ((sv = *relem)) { TAINT_NOT; /* Each item is independent */ /* Dear TODO test in t/op/sort.t, I love you. (It's relying on a panic, not a "semi-panic" from newSVsv() and then an assertion failure below.) */ if (SvIS_FREED(sv)) { Perl_croak(aTHX_ "panic: attempt to copy freed scalar %p", (void*)sv); } /* Not newSVsv(), as it does not allow copy-on-write, resulting in wasteful copies. We need a second copy of a temp here, hence the SV_NOSTEAL. */ *relem = sv_mortalcopy_flags(sv,SV_GMAGIC|SV_DO_COW_SVSETSV |SV_NOSTEAL); } } } relem = firstrelem; lelem = firstlelem; ary = NULL; hash = NULL; while (lelem <= lastlelem) { TAINT_NOT; /* Each item stands on its own, taintwise. */ sv = *lelem++; switch (SvTYPE(sv)) { case SVt_PVAV: ary = MUTABLE_AV(sv); magic = SvMAGICAL(ary) != 0; ENTER; SAVEFREESV(SvREFCNT_inc_simple_NN(sv)); av_clear(ary); av_extend(ary, lastrelem - relem); i = 0; while (relem <= lastrelem) { /* gobble up all the rest */ SV **didstore; assert(*relem); SvGETMAGIC(*relem); /* before newSV, in case it dies */ sv = newSV(0); sv_setsv_nomg(sv, *relem); *(relem++) = sv; didstore = av_store(ary,i++,sv); if (magic) { if (!didstore) sv_2mortal(sv); if (SvSMAGICAL(sv)) mg_set(sv); } TAINT_NOT; } if (PL_delaymagic & DM_ARRAY_ISA) SvSETMAGIC(MUTABLE_SV(ary)); LEAVE; break; case SVt_PVHV: { /* normal hash */ SV *tmpstr; int odd; int duplicates = 0; SV** topelem = relem; SV **firsthashrelem = relem; hash = MUTABLE_HV(sv); magic = SvMAGICAL(hash) != 0; odd = ((lastrelem - firsthashrelem)&1)? 0 : 1; if ( odd ) { do_oddball(lastrelem, firsthashrelem); /* we have firstlelem to reuse, it's not needed an +ymore */ *(lastrelem+1) = &PL_sv_undef; } ENTER; SAVEFREESV(SvREFCNT_inc_simple_NN(sv)); hv_clear(hash); while (relem < lastrelem+odd) { /* gobble up all the rest * +/ HE *didstore; assert(*relem); /* Copy the key if aassign is called in lvalue context, to avoid having the next op modify our rhs. Copy it also if it is gmagical, lest it make the hv_store_ent call below croak, leaking the value. */ sv = lval || SvGMAGICAL(*relem) ? sv_mortalcopy(*relem) : *relem; relem++; assert(*relem); SvGETMAGIC(*relem); tmpstr = newSV(0); sv_setsv_nomg(tmpstr,*relem++); /* value */ if (gimme == G_ARRAY) { if (hv_exists_ent(hash, sv, 0)) /* key overwrites an existing entry */ duplicates += 2; else { /* copy element back: possibly to an earlier * stack location if we encountered dups earlier, * possibly to a later stack location if odd */ *topelem++ = sv; *topelem++ = tmpstr; } } didstore = hv_store_ent(hash,sv,tmpstr,0); if (magic) { if (!didstore) sv_2mortal(tmpstr); SvSETMAGIC(tmpstr); } TAINT_NOT; } LEAVE; if (duplicates && gimme == G_ARRAY) { /* at this point we have removed the duplicate key +/value * pairs from the stack, but the remaining values +may be * wrong; i.e. with (a 1 a 2 b 3) on the stack we' +ve removed * the (a 2), but the stack now probably contains * (a <freed> b 3), because { hv_save(a,1); hv_sav +e(a,2) } * obliterates the earlier key. So refresh all val +ues. */ lastrelem -= duplicates; relem = firsthashrelem; while (relem < lastrelem+odd) { HE *he; he = hv_fetch_ent(hash, *relem++, 0, 0); *relem++ = (he ? HeVAL(he) : &PL_sv_undef); } } if (odd && gimme == G_ARRAY) lastrelem++; } break; default: if (SvIMMORTAL(sv)) { if (relem <= lastrelem) relem++; break; } if (relem <= lastrelem) { if ( SvTEMP(sv) && !SvSMAGICAL(sv) && SvREFCNT(sv) == 1 && (!isGV_with_GP(sv) || SvFAKE(sv)) && ckWARN(WARN_MISC) ) Perl_warner(aTHX_ packWARN(WARN_MISC), "Useless assignment to a temporary" ); sv_setsv(sv, *relem); *(relem++) = sv; } else sv_setsv(sv, &PL_sv_undef); SvSETMAGIC(sv); break; } } if (PL_delaymagic & ~DM_DELAY) { /* Will be used to set PL_tainting below */ UV tmp_uid = PerlProc_getuid(); UV tmp_euid = PerlProc_geteuid(); UV tmp_gid = PerlProc_getgid(); UV tmp_egid = PerlProc_getegid(); if (PL_delaymagic & DM_UID) { #ifdef HAS_SETRESUID (void)setresuid((PL_delaymagic & DM_RUID) ? PL_delaymagic_uid + : (Uid_t)-1, (PL_delaymagic & DM_EUID) ? PL_delaymagic_euid : (Uid_ +t)-1, (Uid_t)-1); #else # ifdef HAS_SETREUID (void)setreuid((PL_delaymagic & DM_RUID) ? PL_delaymagic_uid +: (Uid_t)-1, (PL_delaymagic & DM_EUID) ? PL_delaymagic_euid : (Uid_t +)-1); # else # ifdef HAS_SETRUID if ((PL_delaymagic & DM_UID) == DM_RUID) { (void)setruid(PL_delaymagic_uid); PL_delaymagic &= ~DM_RUID; } # endif /* HAS_SETRUID */ # ifdef HAS_SETEUID if ((PL_delaymagic & DM_UID) == DM_EUID) { (void)seteuid(PL_delaymagic_euid); PL_delaymagic &= ~DM_EUID; } # endif /* HAS_SETEUID */ if (PL_delaymagic & DM_UID) { if (PL_delaymagic_uid != PL_delaymagic_euid) DIE(aTHX_ "No setreuid available"); (void)PerlProc_setuid(PL_delaymagic_uid); } # endif /* HAS_SETREUID */ #endif /* HAS_SETRESUID */ tmp_uid = PerlProc_getuid(); tmp_euid = PerlProc_geteuid(); } if (PL_delaymagic & DM_GID) { #ifdef HAS_SETRESGID (void)setresgid((PL_delaymagic & DM_RGID) ? PL_delaymagic_gid + : (Gid_t)-1, (PL_delaymagic & DM_EGID) ? PL_delaymagic_egid : (Gid_ +t)-1, (Gid_t)-1); #else # ifdef HAS_SETREGID (void)setregid((PL_delaymagic & DM_RGID) ? PL_delaymagic_gid +: (Gid_t)-1, (PL_delaymagic & DM_EGID) ? PL_delaymagic_egid : (Gid_t +)-1); # else # ifdef HAS_SETRGID if ((PL_delaymagic & DM_GID) == DM_RGID) { (void)setrgid(PL_delaymagic_gid); PL_delaymagic &= ~DM_RGID; } # endif /* HAS_SETRGID */ # ifdef HAS_SETEGID if ((PL_delaymagic & DM_GID) == DM_EGID) { (void)setegid(PL_delaymagic_egid); PL_delaymagic &= ~DM_EGID; } # endif /* HAS_SETEGID */ if (PL_delaymagic & DM_GID) { if (PL_delaymagic_gid != PL_delaymagic_egid) DIE(aTHX_ "No setregid available"); (void)PerlProc_setgid(PL_delaymagic_gid); } # endif /* HAS_SETREGID */ #endif /* HAS_SETRESGID */ tmp_gid = PerlProc_getgid(); tmp_egid = PerlProc_getegid(); } TAINTING_set( TAINTING_get | (tmp_uid && (tmp_euid != tmp_uid || t +mp_egid != tmp_gid)) ); #ifdef NO_TAINT_SUPPORT PERL_UNUSED_VAR(tmp_uid); PERL_UNUSED_VAR(tmp_euid); PERL_UNUSED_VAR(tmp_gid); PERL_UNUSED_VAR(tmp_egid); #endif } PL_delaymagic = 0; if (gimme == G_VOID) SP = firstrelem - 1; else if (gimme == G_SCALAR) { dTARGET; SP = firstrelem; SETi(lastrelem - firstrelem + 1); } else { if (ary || hash) /* note that in this case *firstlelem may have been overwritte +n by sv_undef in the odd hash case */ SP = lastrelem; else { SP = firstrelem + (lastlelem - firstlelem); lelem = firstlelem + (relem - firstrelem); while (relem <= SP) *relem++ = (lelem <= lastlelem) ? *lelem++ : &PL_sv_un +def; } } RETURN;
Well that's too bad,

Oh, sorry. I wasn't aware you had joined the Answers Approval Subcommittee?

if you need to gather your data ...

Are you sure that what I said?


With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'
Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
"Science is about questioning the status quo. Questioning authority". The enemy of (IT) success is complexity.
In the absence of evidence, opinion is indistinguishable from prejudice.
  • Comment on Re^5: Does "preallocating hash improve performance"? Or "using a hash slice"?
  • Download Code

Replies are listed 'Best First'.
Re^6: Does "preallocating hash improve performance"? Or "using a hash slice"?
by Eily (Monsignor) on Feb 21, 2017 at 17:02 UTC
    Hoping doesn't change anything;

    Fair enough.

    I wasn't aware you had joined the Answers Approval Subcommittee?

    I'm planning on leaving, they still didn't deliver the cookies. That (the "too bad" part) was clumsy of me, sorry. I'm not sure how offensive (or line-crossing) my answer is because I may not get all the implications of my formulation. I do understand that I don't get to judge your decision not to post something.

    Are you sure that what I said?

    Even less so now that you ask that. I was trying to make sense of your answer in the context of this thread, clearly I got the wrong message.