in reply to Re^4: Does "preallocating hash improve performance"? Or "using a hash slice"?
in thread Does "preallocating hash improve performance"? Or "using a hash slice"?
I hope perl is not so inefficient that it copies all the content of a list when it needs to process it,
Hoping doesn't change anything; I described what the code (perl sources) actually do; it is your choice whether to believe me or not.
You could always check for yourself:
PP(pp_aassign) { dVAR; dSP; SV **lastlelem = PL_stack_sp; SV **lastrelem = PL_stack_base + POPMARK; SV **firstrelem = PL_stack_base + POPMARK + 1; SV **firstlelem = lastrelem + 1; SV **relem; SV **lelem; SV *sv; AV *ary; I32 gimme; HV *hash; I32 i; int magic; U32 lval = 0; PL_delaymagic = DM_DELAY; /* catch simultaneous items */ gimme = GIMME_V; if (gimme == G_ARRAY) lval = PL_op->op_flags & OPf_MOD || LVRET; /* If there's a common identifier on both sides we have to take * special care that assigning the identifier on the left doesn't * clobber a value on the right that's used later in the list. * Don't bother if LHS is just an empty hash or array. */ if ( (PL_op->op_private & OPpASSIGN_COMMON) && ( firstlelem != lastlelem || ! ((sv = *firstlelem)) || SvMAGICAL(sv) || ! (SvTYPE(sv) == SVt_PVAV || SvTYPE(sv) == SVt_PVHV) || (SvTYPE(sv) == SVt_PVAV && AvFILL((AV*)sv) != -1) || (SvTYPE(sv) == SVt_PVHV && HvUSEDKEYS((HV*)sv) != 0) ) ) { EXTEND_MORTAL(lastrelem - firstrelem + 1); for (relem = firstrelem; relem <= lastrelem; relem++) { if ((sv = *relem)) { TAINT_NOT; /* Each item is independent */ /* Dear TODO test in t/op/sort.t, I love you. (It's relying on a panic, not a "semi-panic" from newSVsv() and then an assertion failure below.) */ if (SvIS_FREED(sv)) { Perl_croak(aTHX_ "panic: attempt to copy freed scalar %p", (void*)sv); } /* Not newSVsv(), as it does not allow copy-on-write, resulting in wasteful copies. We need a second copy of a temp here, hence the SV_NOSTEAL. */ *relem = sv_mortalcopy_flags(sv,SV_GMAGIC|SV_DO_COW_SVSETSV |SV_NOSTEAL); } } } relem = firstrelem; lelem = firstlelem; ary = NULL; hash = NULL; while (lelem <= lastlelem) { TAINT_NOT; /* Each item stands on its own, taintwise. */ sv = *lelem++; switch (SvTYPE(sv)) { case SVt_PVAV: ary = MUTABLE_AV(sv); magic = SvMAGICAL(ary) != 0; ENTER; SAVEFREESV(SvREFCNT_inc_simple_NN(sv)); av_clear(ary); av_extend(ary, lastrelem - relem); i = 0; while (relem <= lastrelem) { /* gobble up all the rest */ SV **didstore; assert(*relem); SvGETMAGIC(*relem); /* before newSV, in case it dies */ sv = newSV(0); sv_setsv_nomg(sv, *relem); *(relem++) = sv; didstore = av_store(ary,i++,sv); if (magic) { if (!didstore) sv_2mortal(sv); if (SvSMAGICAL(sv)) mg_set(sv); } TAINT_NOT; } if (PL_delaymagic & DM_ARRAY_ISA) SvSETMAGIC(MUTABLE_SV(ary)); LEAVE; break; case SVt_PVHV: { /* normal hash */ SV *tmpstr; int odd; int duplicates = 0; SV** topelem = relem; SV **firsthashrelem = relem; hash = MUTABLE_HV(sv); magic = SvMAGICAL(hash) != 0; odd = ((lastrelem - firsthashrelem)&1)? 0 : 1; if ( odd ) { do_oddball(lastrelem, firsthashrelem); /* we have firstlelem to reuse, it's not needed an +ymore */ *(lastrelem+1) = &PL_sv_undef; } ENTER; SAVEFREESV(SvREFCNT_inc_simple_NN(sv)); hv_clear(hash); while (relem < lastrelem+odd) { /* gobble up all the rest * +/ HE *didstore; assert(*relem); /* Copy the key if aassign is called in lvalue context, to avoid having the next op modify our rhs. Copy it also if it is gmagical, lest it make the hv_store_ent call below croak, leaking the value. */ sv = lval || SvGMAGICAL(*relem) ? sv_mortalcopy(*relem) : *relem; relem++; assert(*relem); SvGETMAGIC(*relem); tmpstr = newSV(0); sv_setsv_nomg(tmpstr,*relem++); /* value */ if (gimme == G_ARRAY) { if (hv_exists_ent(hash, sv, 0)) /* key overwrites an existing entry */ duplicates += 2; else { /* copy element back: possibly to an earlier * stack location if we encountered dups earlier, * possibly to a later stack location if odd */ *topelem++ = sv; *topelem++ = tmpstr; } } didstore = hv_store_ent(hash,sv,tmpstr,0); if (magic) { if (!didstore) sv_2mortal(tmpstr); SvSETMAGIC(tmpstr); } TAINT_NOT; } LEAVE; if (duplicates && gimme == G_ARRAY) { /* at this point we have removed the duplicate key +/value * pairs from the stack, but the remaining values +may be * wrong; i.e. with (a 1 a 2 b 3) on the stack we' +ve removed * the (a 2), but the stack now probably contains * (a <freed> b 3), because { hv_save(a,1); hv_sav +e(a,2) } * obliterates the earlier key. So refresh all val +ues. */ lastrelem -= duplicates; relem = firsthashrelem; while (relem < lastrelem+odd) { HE *he; he = hv_fetch_ent(hash, *relem++, 0, 0); *relem++ = (he ? HeVAL(he) : &PL_sv_undef); } } if (odd && gimme == G_ARRAY) lastrelem++; } break; default: if (SvIMMORTAL(sv)) { if (relem <= lastrelem) relem++; break; } if (relem <= lastrelem) { if ( SvTEMP(sv) && !SvSMAGICAL(sv) && SvREFCNT(sv) == 1 && (!isGV_with_GP(sv) || SvFAKE(sv)) && ckWARN(WARN_MISC) ) Perl_warner(aTHX_ packWARN(WARN_MISC), "Useless assignment to a temporary" ); sv_setsv(sv, *relem); *(relem++) = sv; } else sv_setsv(sv, &PL_sv_undef); SvSETMAGIC(sv); break; } } if (PL_delaymagic & ~DM_DELAY) { /* Will be used to set PL_tainting below */ UV tmp_uid = PerlProc_getuid(); UV tmp_euid = PerlProc_geteuid(); UV tmp_gid = PerlProc_getgid(); UV tmp_egid = PerlProc_getegid(); if (PL_delaymagic & DM_UID) { #ifdef HAS_SETRESUID (void)setresuid((PL_delaymagic & DM_RUID) ? PL_delaymagic_uid + : (Uid_t)-1, (PL_delaymagic & DM_EUID) ? PL_delaymagic_euid : (Uid_ +t)-1, (Uid_t)-1); #else # ifdef HAS_SETREUID (void)setreuid((PL_delaymagic & DM_RUID) ? PL_delaymagic_uid +: (Uid_t)-1, (PL_delaymagic & DM_EUID) ? PL_delaymagic_euid : (Uid_t +)-1); # else # ifdef HAS_SETRUID if ((PL_delaymagic & DM_UID) == DM_RUID) { (void)setruid(PL_delaymagic_uid); PL_delaymagic &= ~DM_RUID; } # endif /* HAS_SETRUID */ # ifdef HAS_SETEUID if ((PL_delaymagic & DM_UID) == DM_EUID) { (void)seteuid(PL_delaymagic_euid); PL_delaymagic &= ~DM_EUID; } # endif /* HAS_SETEUID */ if (PL_delaymagic & DM_UID) { if (PL_delaymagic_uid != PL_delaymagic_euid) DIE(aTHX_ "No setreuid available"); (void)PerlProc_setuid(PL_delaymagic_uid); } # endif /* HAS_SETREUID */ #endif /* HAS_SETRESUID */ tmp_uid = PerlProc_getuid(); tmp_euid = PerlProc_geteuid(); } if (PL_delaymagic & DM_GID) { #ifdef HAS_SETRESGID (void)setresgid((PL_delaymagic & DM_RGID) ? PL_delaymagic_gid + : (Gid_t)-1, (PL_delaymagic & DM_EGID) ? PL_delaymagic_egid : (Gid_ +t)-1, (Gid_t)-1); #else # ifdef HAS_SETREGID (void)setregid((PL_delaymagic & DM_RGID) ? PL_delaymagic_gid +: (Gid_t)-1, (PL_delaymagic & DM_EGID) ? PL_delaymagic_egid : (Gid_t +)-1); # else # ifdef HAS_SETRGID if ((PL_delaymagic & DM_GID) == DM_RGID) { (void)setrgid(PL_delaymagic_gid); PL_delaymagic &= ~DM_RGID; } # endif /* HAS_SETRGID */ # ifdef HAS_SETEGID if ((PL_delaymagic & DM_GID) == DM_EGID) { (void)setegid(PL_delaymagic_egid); PL_delaymagic &= ~DM_EGID; } # endif /* HAS_SETEGID */ if (PL_delaymagic & DM_GID) { if (PL_delaymagic_gid != PL_delaymagic_egid) DIE(aTHX_ "No setregid available"); (void)PerlProc_setgid(PL_delaymagic_gid); } # endif /* HAS_SETREGID */ #endif /* HAS_SETRESGID */ tmp_gid = PerlProc_getgid(); tmp_egid = PerlProc_getegid(); } TAINTING_set( TAINTING_get | (tmp_uid && (tmp_euid != tmp_uid || t +mp_egid != tmp_gid)) ); #ifdef NO_TAINT_SUPPORT PERL_UNUSED_VAR(tmp_uid); PERL_UNUSED_VAR(tmp_euid); PERL_UNUSED_VAR(tmp_gid); PERL_UNUSED_VAR(tmp_egid); #endif } PL_delaymagic = 0; if (gimme == G_VOID) SP = firstrelem - 1; else if (gimme == G_SCALAR) { dTARGET; SP = firstrelem; SETi(lastrelem - firstrelem + 1); } else { if (ary || hash) /* note that in this case *firstlelem may have been overwritte +n by sv_undef in the odd hash case */ SP = lastrelem; else { SP = firstrelem + (lastlelem - firstlelem); lelem = firstlelem + (relem - firstrelem); while (relem <= SP) *relem++ = (lelem <= lastlelem) ? *lelem++ : &PL_sv_un +def; } } RETURN;
Well that's too bad,
Oh, sorry. I wasn't aware you had joined the Answers Approval Subcommittee?
if you need to gather your data ...
Are you sure that what I said?
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^6: Does "preallocating hash improve performance"? Or "using a hash slice"?
by Eily (Monsignor) on Feb 21, 2017 at 17:02 UTC |