in reply to Pure Perl module(245 lines) that calls external libraries - no XS file.

I've taken the liberty of adding use strict; to your code and running it through perltidy:

# calltests.pl use AsmUtil_IA32; use strict; { $a = "Just Another C&ASM Hacker\n"; $b = "Perl "; # ---- CopyMemory - the 30 lb. sledge hammer version of Perl's s +ubstr() # or unpack("p??",...) ---- remember peek & poke ?, its back as Co +pyMemory: my $CopyMemory = DeclareXSub( "_CopyMemory", "C:\\WINDOWS\\system32\\kernel32.dll!RtlMoveMemory", "i,i,i", "", "s" ); my $QPF = DeclareXSub( "_QPF", "C:\\WINDOWS\\system32\\kernel32.dll!QueryPerformanceFrequency +", "i", "i", "s" ); my $QPC = DeclareXSub( "_QPC", "C:\\WINDOWS\\system32\\kernel32.dll!QueryPerformanceCounter", "i", "i", "s" ); my $qsort = DeclareXSub( "_qsort", "C:\\WINDOWS\\system32\\msvcrt.dll!qsort" +, "i,i,i,i", "", "c" ); my $ClibAtan = DeclareXSub( "_atan", "C:\\WINDOWS\\system32\\msvcrt.dll!atan", "d", "d", "c" ); my $atoi64 = DeclareXSub( "_atoi64", "C:\\WINDOWS\\system32\\msvcrt.dll!_atoi +64", "i", "q", "c" ); my $i64toa = DeclareXSub( "_i64toa", "C:\\WINDOWS\\system32\\msvcrt.dll!_i64t +oa", "q,i,i", "i", "c" ); print ">>> Win32 API __stdcall tests <<<\n"; print "Before CopyMemory: ", $a; $CopyMemory->{Call}( SVPtr($a) + 13, SVPtr($b), 5 ); print "After CopyMemory: ", $a; my $Quad = "\x00" x 8; my $qstr = "\x00" x 80; $QPF->{Call}( SVPtr($Quad) ); my $QPFreturn = unpack( "i", $QPF->{RetEAX} ); if ($QPFreturn) { $i64toa->{Call}( $Quad, SVPtr($qstr), 10 ); print "_i64toa tests:\nticks/second: ", $qstr, "\n"; for ( my $i = 0 ; $i < 50000 ; ++$i ) { $QPC->{Call}( SVPtr($Quad) ); $qstr = "\x00" x 80; $i64toa->{Call}( $Quad, SVPtr($qstr), 10 ); print "ticks: ", $qstr, "\r"; } print "\n"; } print ">>> Microsoft Visual C Run Time msvcrt.dll(and msvcr??.dll) __cdecl t +ests <<<\n"; $qstr = "72623859790382856"; # 0n72623859790382856 == 0x0102030 +405060708 $atoi64->{Call}( SVPtr($qstr) ); printf "_atoi64 call: Quad(longlong) return test (EDX:EAX)=> %08X% +08X\n", unpack( "L", $atoi64->{RetEDX} ), unpack( "L", $atoi64->{RetEAX} + ); printf( "Perl emulated \(un\)pack\(Q,...\) test: %s\n", SVQuad( CQuad("72623859790382856") ) ); $ClibAtan->{Call}(1.00000); printf( "FPU doubles test: 4*atan(1) = Pi = %18.16f\n", 4 * unpack( "d", $ClibAtan->{Ret64bit} ) ); print "---C library qsort calls back to Perl test:\n"; my $qcompare = DeclareCallback( __PACKAGE__ . "::qcompare", "p2,p2", "", "c" ) ; #p2==(short *) my $iArray = pack( "s13", 399, 99, 3, 1, 234, 546, 789, 34, 124, 894, 521, 67 +, 754 ); print "Before sort:", join( ",", unpack( "s13", $iArray ) ), "\n"; $qsort->{Call}( SVPtr($iArray), 13, 2, $qcompare->{Ptr} ); print "After sort:", join( ",", unpack( "s13", $iArray ) ), "\n"; sub qcompare() { # ----- reconstruct @_ without XS ----- my $Cstack = substr( unpack( "P16", $qcompare->{stackPtr} ), 8 ) ; # copy stack in binary form $_[0] = substr( $Cstack, 0, 4 ); # $_[0] == void* $_[1] = substr( $Cstack, 4, 4 ); my $e1 = unpack( "s", unpack( "P2", $_[0] ) ) ; # $e1 = (Perl scalar) *(short *) $_[0] my $e2 = unpack( "s", unpack( "P2", $_[1] ) ); cbreturn( { cbref => $qcompare, ret32 => $e1 - $e2, } ) ; # return result back to C qsort routine } my $arg1 = "Assembly"; my $arg2 = "Callback"; my $arg3 = "To"; my $arg4 = "Perl"; my $ptrptrargs = pack( "PPPPI", $arg1, $arg2, $arg3, $arg4, 0 ); my $cbname = __PACKAGE__ . "::" . "asm2perl"; my $cb_asm2perl = "\x90" . "\x68" . pack( "I", $call_argv_ref ) . # push [Perl_call_argv()] PUSH POINTERS TO PERL XS FUN +CTIONS "\x68" . pack( "I", $get_context_ref ) . # push [Perl_get_co +ntext()] "\x68" . pack( "I", $Tstack_sp_ptr_ref ) . # push [Perl_Tstack +_sp_ptr()] "\x55" . # push ebp "\x89\xE5" . # mov ebp,esp use ebp to access XS # ----------------- dSP; MACRO starts ------------------- "\xff\x55\x08" . # call dword ptr [ebp+8] => call Perl_get_co +ntext() "\x50" . # push eax "\xff\x55\x04" . # call dword ptr [ebp+4] => call Perl_Tstack +_sp_ptr() "\x59" . # pop ecx "\x8B\x00" . # mov eax,dword ptr [eax] "\x89\x45\xec" . # mov dword ptr [sp],eax => local copy of +SP # -------------- perl_call_argv("callbackname",G_DISCARD,char **a +rgs) ----- "\x68" . pack( "P", $ptrptrargs ) . # push char **args "\x68\x02\x00\x00\x00" . # push G_DISCARD "\x68" . pack( "p", $cbname ) . # push ptr to name of perl su +broutine "\xff\x55\x08" . # call Perl_get_context() "\x50" . # push eax "\xff\x55\x0c" . # call perl_call_argv: call dword ptr [ebp ++0x0c] "\x83\xc4\x10" . # add esp,10 CDECL call we maintain stack "\x89\xec" . # mov esp,ebp "\x5D" . # pop ebp "\x83\xc4\x0c" . # add esp,0c "\xc3"; # ret print ">>> internal XSUB\'s(ASM routine) call/callback test <<<\n +"; print "---Perl calls assembly calls back to Perl test:\n"; my $cbtest = DeclareXSub( __PACKAGE__ . "::cbtest", SVPtr($cb_asm2perl), "" ) +; $cbtest->{Call}(); #cbtest(); sub asm2perl { my $lastcaller = ( caller(1) )[3]; print "called from ", $lastcaller . "(\@_ = ", join( " ", @_ ) +, ")\n"; } print "Back to Perl\n"; }

And the module:

package AsmUtil_IA32; use DynaLoader; use Exporter; use Config; use strict; our @ISA = qw(Exporter); our @EXPORT = qw(DeclareXSub DeclareCallback SVPtr CInt CQuad cbreturn SVQuad getparameters $call_argv_ref $get_context_ref $Tstack_sp_ptr +_ref); our @EXPORT_OK = qw(G_DISCARD G_SCALAR G_NOARGS); our @EXPORT_NOT_OK = qw(); our $VERSION = "0.01"; #-------- cop.h: use constant G_SCALAR => 0; use constant G_DISCARD => 2; use constant TRUE => 1; use constant FALSE => 0; #--------------- get perl shared object and some XS routines--- my $perldll = $^X; my $VER = int($]) . int( 1000 * ( $] - int($]) ) ); $perldll =~ s/\./$VER\./; $perldll =~ s/\.exe/\.$Config{so}/; our $perlXS = DynaLoader::dl_load_file("$perldll"); our $call_argv_ref = DynaLoader::dl_find_symbol( $perlXS, "Perl_call_argv" ); # embed. +h our $get_context_ref = DynaLoader::dl_find_symbol( $perlXS, "Perl_get_context" ); our $Tstack_sp_ptr_ref = DynaLoader::dl_find_symbol( $perlXS, "Perl_Tstack_sp_ptr" ); ######################## Subs ############### sub DeclareXSub { my %FARPROC; $FARPROC{namespace} = $_[0]; $FARPROC{lib} = DynaLoader::dl_load_file( ( split( "!", $_[1] ) )[ +0] ) if $_[1] =~ m/\!/; $FARPROC{procptr} = defined( $FARPROC{lib} ) ? DynaLoader::dl_find_symbol( $FARPROC{lib}, ( split( "!", $_[1] + ) )[1] ) : $_[1]; return undef if !defined( $FARPROC{procptr} ); $FARPROC{args} = $_[2]; $FARPROC{rtn} = $_[3]; if ( $^O =~ /win32/i ) { $FARPROC{conv} = defined( $_[4] ) ? $_[4] : "s"; # default calling convention: Win32 __stdcall } else { $FARPROC{conv} = defined( $_[4] ) ? $_[4] : "c"; # default calling convention: UNIX __cdecl } my $stackIN; my @stridx; my @bytype; my $bytspushed; my $asmcode = "\x90"; # machine code starts , this can also be \xcc -user b +reakpoint my @Args = split( ",", $FARPROC{args} ); @Args = reverse @Args; # pushing order last args first foreach my $arg (@Args) { # 4 byte push, and another 4 byte push for doubles,quads $stackIN .= "\x68" . pack( "I", 0 ); $stackIN .= "\x68" . pack( "I", 0 ) if ( $arg =~ m/d|q/i ); push( @stridx, length($stackIN) - 4 + 1 ) if ($arg !~ m/d|q/i) +; push( @stridx, length($stackIN) - 9 + 1 ) if ($arg =~ m/d|q/i) +; push( @bytype, "byval" ) if ($arg =~ m/v|l|i|c|d|q/i); # 32 bit pointers push( @bytype, "byref" ) if ($arg =~ m/p|r/i); # 4 byte aligned, and another 4 for doubles or quads $bytspushed += 4; $bytspushed += 4 if ( $arg =~ m/d|q/i ); } $FARPROC{sindex} = \@stridx; $FARPROC{types} = \@bytype; $FARPROC{stklen} = $bytspushed; $FARPROC{edi} = "null"; # 4 bytes long !!! ,how +convenient $FARPROC{esi} = "null"; $FARPROC{RetEAX} = "null"; # usual return register $FARPROC{RetEDX} = "null"; $FARPROC{Ret64bit} = "nullnull"; # save double or quad re +turns $FARPROC{stackOUT} = "\x00" x $bytspushed; $asmcode .= "$stackIN"; $asmcode .= "\xb8" . CInt( $FARPROC{procptr} ); # mov eax, $pro +cptr $asmcode .= "\xFF\xd0"; # call eax => CALL THE PROCEDURE or in C: (* EAX)(a +rgs, ...); # --- save return values info into Perl Strings, including the sta +ck: # - some calls return values back to the stack, overwriting the or +iginal # args $asmcode .= "\xdd\x1d" . CPtr( $FARPROC{Ret64bit} ) if $FARPROC{rtn} =~ m/d/i; # fstp qword [$FARPROC{Ret64bit}] $asmcode .= "\xa3" . CPtr( $FARPROC{RetEAX} ); # mov [$FARPROC{Ret +EAX}], eax $asmcode .= "\x89\x15" . CPtr( $FARPROC{RetEDX} ); # mov [$FARPROC{Ret +EDX}], edx $asmcode .= "\x89\x35" . CPtr( $FARPROC{esi} ); # mov [$FARPROC{ +esi}], esi $asmcode .= "\x89\x3d" . CPtr( $FARPROC{edi} ); # mov [$FARPROC{ +edi}], edi $asmcode .= "\x8d\xb4\x24" if $FARPROC{conv} =~ m/s/i; # $asmcode .= CInt( -$bytspushed ) if $FARPROC{conv} =~ m/s/i; # lea esi,[esp-$bytspushed] $asmcode .= "\x89\xe6" if $FARPROC{conv} =~ m/c/i; # mov esi, +esp $asmcode .= "\xbf" . CPtr( $FARPROC{stackOUT} ); # mov edi, [$FARPROC{sta +ckOUT}] $asmcode .= "\xb9" . CInt($bytspushed); # mov ecx,$bytspushed - number of bytes +to copy $asmcode .= "\xfc"; # cld $asmcode .= "\xf3\xa4"; # rep movsb [edi],[esi] => copy the stac +k $asmcode .= "\x8b\x3d" . CPtr( $FARPROC{edi} ); # mov edi,[$FAR +PROC{edi}] $asmcode .= "\x8b\x35" . CPtr( $FARPROC{esi} ); # mov esi,[$FAR +PROC{esi}] $asmcode .= "\x81\xc4" . CInt($bytspushed) if $FARPROC{conv} =~ m/c/i; # add esp,$bytspushed : __cdecl $asmcode .= "\xc3"; # ret __stdcall or __cdecl $FARPROC{ASM} = $asmcode; $FARPROC{coderef} = DynaLoader::dl_install_xsub( $FARPROC{namespace}, SVPtr( $FARPRO +C{ASM} ), __FILE__ ); $FARPROC{Call} = sub { my @templates = reverse split( ",", $FARPROC{args} ); my @args = reverse @_; # parameters get pushed last firs +t; # --- edit the machine language pushes with @args --- for ( my $index = 0 ; $index < scalar( @{ $FARPROC{sindex} } ) ; ++$index ) { my @a = split( ":", $args[$index] ) if $args[$index] =~ m/ +\:/; if ( $templates[$index] eq "ss" ) { $args[$index] = $a[0] << 16 + $a[1]; } if ( $templates[$index] eq "cccc" ) { $args[$index] = $a[0] << 24 + $a[1] << 16 + $a[2] << 8 + + $a[3]; } if ( $templates[$index] eq "ccc" ) { $args[$index] = $a[0] << 16 + $a[1] << 8 + $a[2]; } if ( $templates[$index] eq "cc" ) { $args[$index] = $a[0] << 8 + $a[1]; } if ( $templates[$index] eq "scc" ) { $args[$index] = $a[0] << 16 + $a[1] << 8 + $a[2]; } if ( $templates[$index] eq "ccs" ) { $args[$index] = $a[0] << 24 + $a[1] << 16 + $a[2]; } if ( $templates[$index] eq "sc" ) { $args[$index] = $a[0] << 16 + $a[1]; } if ( $templates[$index] eq "cs" ) { $args[$index] = $a[0] << 16 + $a[1]; } if ( $templates[$index] =~ m/d|q/i ) { $args[$index] = pack( "d", $args[$index] ) if $templates[$index] =~ m/d/i; my $Quad = $args[$index] if $templates[$index] =~ m/q/ +i; substr( $FARPROC{ASM}, $FARPROC{sindex}->[$index] + 5, 4, substr( $args[$index], 0, 4 ) ) if $templates[$index] =~ m/d/i; substr( $FARPROC{ASM}, $FARPROC{sindex}->[$index], 4, substr( $args[$index], 4, 4 ) ) if $templates[$index] =~ m/d/i; substr( $FARPROC{ASM}, $FARPROC{sindex}->[$index] + 5, 4, substr( $Quad, 0, 4 ) ) if $templates[$index] =~ m/q/i; substr( $FARPROC{ASM}, $FARPROC{sindex}->[$index], 4, substr( $Quad, 4, 4 ) ) if $templates[$index] =~ m/q/i; } else { substr( $FARPROC{ASM}, $FARPROC{sindex}->[$index], 4, CInt( $args[$index] ) ) if $FARPROC{types}->[$index] eq "byval"; } substr( $FARPROC{ASM}, $FARPROC{sindex}->[$index], 4, CPtr( $args[$index] ) ) if $FARPROC{types}->[$index] eq "byref"; } my $ret = &{ $FARPROC{coderef} }; # Invoke it return $ret; # usually EAX==return value - not reliabe as $FARPR +OC{RetEAX} }; return \%FARPROC ; # make an object out of a hash( has 1 XSUB, 1 sub, 2 arrays, s +everal scalars) } sub DeclareCallback { my %CALLBACK; $CALLBACK{cbname} = $_[0]; $CALLBACK{args} = $_[1]; $CALLBACK{cbrtn} = defined( $_[2] ) ? $_[2] : "I"; $CALLBACK{conv} = defined( $_[3] ) ? $_[3] : "c"; $CALLBACK{ptrptrargs} = "\x00" x 4; # char **args, NULL FOR NOW $CALLBACK{stackPtr} = "\x00" x 4; # ebp $CALLBACK{CallerRtn1} = "\x00" x 8; # eax register usually, possibly for a double $CALLBACK{CallerRtn2} = "\x00" x 4; # edx register usually , for returning 8 byte values edx:eax + - doubles $CALLBACK{ASM} = "\x90" . # nop or debug break "\x55" . # push ebp "\x89\xE5" . # mov ebp,esp # -------- local variables - Perl function pointers, stack info "\x68" . CInt($call_argv_ref) . # push *Perl_call_argv() "\x68" . CInt($get_context_ref) . # push *Perl_get_context( +) "\x68" . CInt($Tstack_sp_ptr_ref) . # push *Perl_Tstack_sp_pt +r() "\x68\x00\x00\x00\x00" . # empty local variable "\x68\x00\x00\x00\x00" . # empty local variable # ------- get ebp to access C stack on the Perl side and save r +eturn # registers---------------- "\x89\x2d" . CPtr( $CALLBACK{stackPtr} ) . # mov ds:[$CALLBACK{stackPtr}],ebp - stack access "\xA3" . CPtr( $CALLBACK{CallerRtn1} ) . # mov ds:[$CALLBACK{CallerRtn1}],eax - save eax primary retur +n register "\x89\x15" . CPtr( $CALLBACK{CallerRtn2} ) . # mov ds:[$CALLBACK{CallerRtn2}],edx - save edx secondary retu +rn register # ----------------- dSP; MACRO starts ------------------- "\xff\x55\xf8" . # call dword ptr [ebp-0x08] => call Perl_get_c +ontext() "\x50" . # push eax "\xff\x55\xf4" . # call dword ptr [ebp-0x0c] => call Perl_Tstac +k_sp_ptr() "\x59" . # pop ecx "\x8B\x00" . # mov eax,dword ptr [eax] "\x89\x45\xec" . # mov dword ptr [sp],eax => local copy of SP # -------------- perl_call_argv(char *callbackname,G_DISCARD,cha +r **args) # ----- "\x68" . CPtr( $CALLBACK{ptrptrargs} ) . # push char **args "\x68\x02\x00\x00\x00" . # push G_DISCARD "\x68" . CPtr( $CALLBACK{cbname} ) . # push ptr to name of perl +subroutine "\xff\x55\xf8" . # call Perl_get_context() "\x50" . # push eax "\xff\x55\xfc" . # call perl_call_argv => call dword ptr [ebp +-0x04] "\x83\xc4\x10" . # add esp,0x10 CDECL call we maintain stack "\x83\xc4\x0c" . # add esp,14 # deallocate local variables "\x89\xec" . # mov esp,ebp "\x5D" . # pop ebp "\xA1" . CPtr( $CALLBACK{CallerRtn1} ) . # mov eax,[$CALLBACK{CallerRtn1}] - return eax to caller "\x8b\x15" . CPtr( $CALLBACK{CallerRtn2} ) . # mov edx,[$CALLBACK{CallerRtn2}] - return edx to cal +ler "\xc3"; # ret $CALLBACK{Ptr} = SVPtr( $CALLBACK{ASM} ); return \%CALLBACK; } sub getparameters { my $argtmpl; if ( !defined( $_[1] ) ) { return []; } my @args = split( ",", $_[1] ); foreach my $arg (@args) { $argtmpl .= $arg; } my $template = "P" . ( 4 * scalar(@args) + 8 ); my $Cstack = substr( unpack( $template, $_[0] ), 8 ); # copy stack in bina +ry form return unpack( $argtmpl, $Cstack ); } sub cbreturn { my %rets = %{ $_[0] }; substr( $rets{cbref}->{CallerRtn1}, 0, 4, pack( "i", $rets{ret32} +) ) if defined( $rets{ret32} ); substr( $rets{cbref}->{CallerRtn1}, 0, 4, pack( "x4i", $rets{ret32 +} ) ) if defined( $rets{ret64} ); # little endian substr( $rets{cbref}->{CallerRtn2}, 0, 4, pack( "ix4", $rets{ret32 +} ) ) if defined( $rets{ret64} ); } sub SVPtr { return unpack( "I", pack( "p", $_[0] ) ); } sub CPtr { return pack( "p", $_[0] ); } sub CInt { return pack( "i", $_[0] ); } sub CShort { return pack( "s", $_[0] ); } sub CByte { return pack( "c", $_[0] ); } sub CDbl { return pack( "d", $_[0] ); } sub CQuad { # emulates pack("Q",...) - assumes decimal string input # --- convert an arbitrary length decimal string to a hex string - +-- my @digits = split( //, $_[0] ); my $lohexstr = substr( sprintf( "%08X", substr( $_[0], -8 ) ), -2 ) ; # gets the first 8 bits my $totquotient = ""; # bit shift to the right 8 bits by dividing by 256, # using arbitrary precision grade school long division for ( my $j = 0 ; $j < 4 ; ++$j ) { # shift 8 bits, 4 times for +lower long my $remainder = ""; $totquotient = ""; my $quotient = ""; my $dividend = ""; my $remainder = ""; for ( my $i = 0 ; $i <= $#digits ; ++$i ) { $dividend = $remainder . $digits[$i]; $quotient = int( $dividend / 256 ); $remainder = $dividend % 256; $totquotient .= sprintf( "%01d", $quotient ); } $totquotient =~ s/^0*//; last if $j == 3; $lohexstr = substr( sprintf( "%08X", substr( $totquotient, -8 ) ), 6, 2 +) . $lohexstr; # unshift 8 more bits @digits = split( //, $totquotient ); } my $hihexstr = sprintf( "%08X", $totquotient ); my $lo = pack( "H*", $lohexstr ); my $hi = pack( "H*", $hihexstr ); ( $hi, $lo ) = ( $lo, $hi ); # little endian return $hi . $lo; } sub SVQuad { # emulates unpack("Q",...) - assumes binary input my ( $hi, $lo ) = unpack( "NN", $_[0] ); ( $hi, $lo ) = ( $lo, $hi ); # little endian return sprintf( "0x%08X%08X", $hi, $lo ) ; # - 64 bit base 10 expressions mean anything ? } 1;

It's cool stuff, but I'm always happier when I see that there are tests (if possible) for a module. Also, this appears to be suitable for a Windows only environment, so a check of $^O at run-time would probably be a great idea.

Alex / talexb / Toronto

"Groklaw is the open-source mentality applied to legal research" ~ Linus Torvalds