So I converted a small string/grammar parser, from pure perl, to XS. And benchmarked it. I was surprised, the old pure perl optree implementation, is only 30% of the speed of XS C code (3/11=%30). A string parser written in C with
memcmp() vs PurePerl's
eq, 3x slower. Not bad.
More interesting is, I decided as a crazy C/XS guts hack, to have a Perl XSUB, calling another Perl XSUB, C function to C function. And it was FOUR TIMES FASTER. 4x!!!!
Just by getting rid of the PP for() loop and the internal Perl_call_sv() and Perl_pp_entersub() overhead, and totally removing the the Perl 5 engine/API/interpretor, between 2 Perl 5 XSUBs (C functions), it made things FOUR TIMES FASTER.
So
&$xs('__stdcall') for(0..1000);
vs
for(i=0;i<1000;i++) {/*removd*/XS_Local__C_calltype_to_num_xs(aTHX_ cv);/*removd*/}
these 2 for() loops, one in Perl 5, the other in C99, had a 4x faster difference in speed.
C compiler was -O2 MSVC 2022 x64 on a Intel Core I5-2520M 2.5ghz.
Rate pp xs
pp 3159521/s -- -73%
xs 11612872/s 268% --
Rate pp xs xs2
pp 333/s -- -72% -93%
xs 1192/s 258% -- -74%
xs2 4516/s 1255% 279% --
BEGIN {
sub APICONTROL_CC_STD () { 0 }
sub APICONTROL_CC_C () { 1 }
}
sub calltype_to_num {
my $type = shift;
if (!$type || $type eq "__stdcall" || $type eq "WINAPI" || $type e
+q "NTAPI"
|| $type eq "CALLBACK" ) {
return APICONTROL_CC_STD;
}
elsif ($type eq "_cdecl" || $type eq "__cdecl" || $type eq "WINAPI
+V") {
return APICONTROL_CC_C;
}
else {
warn "unknown calling convention: '$type'";
return APICONTROL_CC_STD;
}
}
I32
calltype_to_num_xs(type)
SV* type
PREINIT:
const char * p;
I32 l;
CODE:
SvGETMAGIC(type);
if(!SvPOK(type)) {
if(!SvOK(type) || (SvIOK(type) && !SvIVX(type)) || !sv_true(ty
+pe)) {
RETVAL = APICONTROL_CC_STD;
} else {
unk:
warn("unknown calling convention: '" SVf "'", type);
RETVAL = APICONTROL_CC_STD;
}
} else {
p = SvPVX(type);
l = (U32)SvCUR(type);
switch(l) {
case STRLENs(""):
if(memEQs(p,l,"")){RETVAL = APICONTROL_CC_STD;break;}
else goto unk;
case STRLENs("CDECL"):
if(memEQs(p,l,"CDECL")){RETVAL = APICONTROL_CC_C;break
+;}
else if(memEQs(p,l,"NTAPI")){RETVAL = APICONTROL_CC_ST
+D;break;}
else if(memEQs(p,l,"cdecl")){RETVAL = APICONTROL_CC_C;
+break;}
else goto unk;
case STRLENs("PASCAL"):
if(memEQs(p,l,"PASCAL")){RETVAL = APICONTROL_CC_STD;br
+eak;}
else if(memEQs(p,l,"WINAPI")){RETVAL = APICONTROL_CC_S
+TD;break;}
else if(memEQs(p,l,"WMIAPI")){RETVAL = APICONTROL_CC_S
+TD;break;}
else if(memEQs(p,l,"pascal")){RETVAL = APICONTROL_CC_S
+TD;break;}
else if(memEQs(p,l,"_cdecl")){RETVAL = APICONTROL_CC_C
+;break;}
else goto unk;
case STRLENs("WINAPIV"):
if(memEQs(p,l,"WINAPIV")){RETVAL = APICONTROL_CC_C;bre
+ak;}
else if(memEQs(p,l,"__cdecl")){RETVAL = APICONTROL_CC_
+C;break;}
else goto unk;
case STRLENs("APIENTRY"):
if(memEQs(p,l,"APIENTRY")){RETVAL = APICONTROL_CC_STD;
+break;}
else if(memEQs(p,l,"CALLBACK")){RETVAL = APICONTROL_CC
+_STD;break;}
else if(memEQs(p,l,"IMAGEAPI")){RETVAL = APICONTROL_CC
+_STD;break;}
else goto unk;
case STRLENs("__CRTDECL"):
if(memEQs(p,l,"__CRTDECL")){RETVAL = APICONTROL_CC_C;b
+reak;}
else if(memEQs(p,l,"__stdcall")){RETVAL = APICONTROL_C
+C_STD;break;}
else goto unk;
case STRLENs("__fastcall"):
if(memEQs(p,l,"__fastcall")){goto unk;RETVAL = APICONT
+ROL_CC_FC;break;}
else if(memEQs(p,l,"__thiscall")){goto unk;RETVAL = AP
+ICONTROL_CC_TC;break;}
else if(memEQs(p,l,"APIPRIVATE")){RETVAL = APICONTROL_
+CC_STD;break;}
else goto unk;
case STRLENs("__vectorcall"):
if(memEQs(p,l,"__vectorcall")){goto unk;RETVAL = APICO
+NTROL_CC_VC;break;}
else goto unk;
case STRLENs("STDAPICALLTYPE"):
if(memEQs(p,l,"STDAPICALLTYPE")){RETVAL = APICONTROL_C
+C_STD;break;}
else goto unk;
case STRLENs("STDAPIVCALLTYPE"):
if(memEQs(p,l,"STDAPIVCALLTYPE")){RETVAL = APICONTROL_
+CC_C;break;}
else goto unk;
case STRLENs("STDMETHODCALLTYPE"):
if(memEQs(p,l,"STDMETHODCALLTYPE")){RETVAL = APICONTRO
+L_CC_STD;break;}
else goto unk;
case STRLENs("STDMETHODVCALLTYPE"):
if(memEQs(p,l,"STDMETHODVCALLTYPE")){RETVAL = APICONTR
+OL_CC_C;break;}
else goto unk;
default: goto unk;
}
}
OUTPUT:
RETVAL
void
calltype_to_num_xs2(intype)
INPUT:
SV* intype
PREINIT:
SV* sv = sv_2mortal(newSVpvs("__stdcall"));
int i;
PPCODE:
SP = &(ST(-1));
for(i=0;i<1000;i++) {
PUSHMARK(SP);
PUSHs(sv);
PUTBACK;
XS_Local__C_calltype_to_num_xs(aTHX_ cv);
SPAGAIN;
SP = &(ST(-1));
}
PUTBACK;
use Local::C;
use Benchmark qw(cmpthese :hireswallclock);
{ my ($pp, $xs, $xs2, $cctype) = (\&Local::C::calltype_to_num, \&Loc
+al::C::calltype_to_num_xs,
\&Local::C::calltype_to_num_xs2);
cmpthese( -1, {
pp => sub{&$pp('__stdcall');},
xs => sub{&$xs('__stdcall');}
});
cmpthese( -1, {
pp => sub{&$pp('__stdcall') for(0..10000);},
xs => sub{&$xs('__stdcall') for(0..10000);},
xs2 => sub{&$xs2('__stdcall') for(0..10);}
});
exit;
}