I backed away from this when I saw you weren't using an MS compiler, as I've no experience of gcc/mingw, but it seems to me that this will remain a mystery until you start inspecting the generated code. With MS CL adding /link /FAs to the compiler options cause it to output a .asm file.
When I run the following:
#! perl -slw
use strict; use Config; print $Config{ ccflags };
use Inline C => Config => BUILD_NOISY => 1, CCFLAGS => $Config{ ccflag
+s } . "/link /FAs";
use Inline C => <<'END_C', NAME => '_junk', CLEAN_AFTER_BUILD =>0;
int i = 0;
void test( SV *sv ) {
++i;
return;
}
int check( SV *sv ) {
return i;
}
END_C
use Time::HiRes qw[ time ];
our $N //= 1e6;
my $start = time;
my $i = 0;
$i = test( 1 ) for 1 .. $N;
printf "Took %fseconds\n", time() - $start;
print check( 1 )
The assembly code produced for test() is pretty much exactly what you'd expect: PUBLIC test
; Function compile flags: /Ogtpy
_TEXT SEGMENT
sv$ = 8
test PROC
; 10 : ++i;
inc DWORD PTR i
; 11 : return;
; 12 : }
ret 0
test ENDP
_TEXT ENDS
But then you have to look at the Perl callable wrapper function to see all the overhead that Perl-callability adds: _TEXT SEGMENT
my_perl$ = 48
cv$ = 56
XS_main_test PROC
; 174 : {
mov QWORD PTR [rsp+8], rbx
mov QWORD PTR [rsp+16], rsi
push rdi
sub rsp, 32 ; 00000020H
mov rdi, rdx
; 175 : dVAR; dXSARGS;
call Perl_get_context
mov rcx, rax
call Perl_Istack_sp_ptr
mov rbx, QWORD PTR [rax]
call Perl_get_context
mov rcx, rax
call Perl_Imarkstack_ptr_ptr
mov rcx, QWORD PTR [rax]
add rcx, -4
movsxd rsi, DWORD PTR [rcx+4]
mov QWORD PTR [rax], rcx
call Perl_get_context
mov rcx, rax
call Perl_Istack_base_ptr
mov rax, QWORD PTR [rax]
lea rdx, QWORD PTR [rax+rsi*8]
inc esi
sub rbx, rdx
sar rbx, 3
; 176 : if (items != 1)
cmp ebx, 1
je SHORT $LN8@XS_main_te
; 177 : croak_xs_usage(cv, "sv");
call Perl_get_context
lea r8, OFFSET FLAT:??_C@_02CPGMCOJE@sv?$AA@
mov rdx, rdi
mov rcx, rax
call Perl_croak_xs_usage
$LN8@XS_main_te:
; 178 : PERL_UNUSED_VAR(ax); /* -Wall */
; 179 : SP -= items;
; 180 : {
; 181 : SV * sv = ST(0)
; 182 : ;
call Perl_get_context
mov rcx, rax
call Perl_Istack_base_ptr
; File c:\test\_inline\build\_junk\_junk.xs
; 30 : temp = PL_markstack_ptr++;
call Perl_get_context
mov rcx, rax
call Perl_Imarkstack_ptr_ptr
; 31 : test(sv);
inc DWORD PTR i
mov rbx, QWORD PTR [rax]
lea rcx, QWORD PTR [rbx+4]
mov QWORD PTR [rax], rcx
; 32 : if (PL_markstack_ptr != temp) {
call Perl_get_context
mov rcx, rax
call Perl_Imarkstack_ptr_ptr
cmp QWORD PTR [rax], rbx
je SHORT $LN4@XS_main_te
; 33 : /* truly void, because dXSARGS not invoked */
; 34 : PL_markstack_ptr = temp;
call Perl_get_context
mov rcx, rax
call Perl_Imarkstack_ptr_ptr
mov QWORD PTR [rax], rbx
; 35 : XSRETURN_EMPTY; /* return empty stack */
call Perl_get_context
mov rcx, rax
call Perl_Istack_base_ptr
movsxd rcx, esi
mov rax, QWORD PTR [rax]
lea rbx, QWORD PTR [rax+rcx*8-8]
call Perl_get_context
mov rcx, rax
call Perl_Istack_sp_ptr
mov QWORD PTR [rax], rbx
$LN4@XS_main_te:
; File c:\test\_inline\build\_junk\_junk.c
; 200 : }
mov rbx, QWORD PTR [rsp+48]
mov rsi, QWORD PTR [rsp+56]
add rsp, 32 ; 00000020H
pop rdi
ret 0
XS_main_test ENDP
_TEXT ENDS
And the real eye-opener comes when start looking at the code behind those call Perl_xxx; littered all over the place. (Why is it necessary to call Perl_get_context() 9 times for EVERY CALL to such a simple function?)
If you assume that your original empty C stub is actually causing code to be generated and run -- and I don't; I think your call to the empty function is being optimised away --then it would be instructive to see the difference in the code that is being called.
With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'
Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
In the absence of evidence, opinion is indistinguishable from prejudice.
Suck that fhit
|