in reply to OT: Google PR, PHP -> PERL ?

This has nothing to do with Google's page rank algorithm, if just allows you to access the Google page rank info as described here This is actually just a PHP implementation of Bob Jenkins' 32 bit integer hashing algorithm. I wrote a Perl XS module implementing this algorithm a couple of years ago and it has been on CPAN since 2003 - Digest::JHash. You will need to change the line c=0 in JHash.xs to c=0xe6359a60 before you compile it to get the Google init constant right. Then you can just:

use Digest::JHash 'jhash'; print 'Checksum: 6' . jhash('info:http://www.google.com/'); __DATA__ Checksum: 64222138902

to get the correct checksums. I might point out that this is against the Google TOS (well if you get the checksum wrong that is where they refer you).

Google's pagerank algorithm is a complex beast. You can read about the original implementation in the original paper from Brin and Page at The Anatomy of a Large-Scale Hypertextural Search Engine

Here is the XS code as a standalone Inline C widget:

use Inline C; print '6' . jhash('info:http://www.google.com/'); __END__ __C__ typedef unsigned long UB4; const UB4 INIT = 0xe6359a60; /* Google Magic Value */ #define MIX(a,b,c) \ { \ a -= b; a -= c; a ^= (c>>13); \ b -= c; b -= a; b ^= (a<<8); \ c -= a; c -= b; c ^= (b>>13); \ a -= b; a -= c; a ^= (c>>12); \ b -= c; b -= a; b ^= (a<<16); \ c -= a; c -= b; c ^= (b>>5); \ a -= b; a -= c; a ^= (c>>3); \ b -= c; b -= a; b ^= (a<<10); \ c -= a; c -= b; c ^= (b>>15); \ } unsigned long jhash( SV* str ) { STRLEN rawlen; char* p; UB4 a, b, c, len, length; /* extract the string data and string length from the perl scalar +*/ p = (char*)SvPV(str, rawlen); length = len = (UB4)rawlen; if ( length == 0 ) { printf( "Recieved a null or undef string!\n" ); return 0; } a = b = 0x9e3779b9; /* golden ratio suggested by Jenkins 0x +9E3779B9 */ c = INIT; while (len >= 12) { a += ((UB4)p[0]+((UB4)p[1]<<8)+((UB4)p[2]<<16)+((UB4)p[3]<<24) +); b += ((UB4)p[4]+((UB4)p[5]<<8)+((UB4)p[6]<<16)+((UB4)p[7]<<24) +); c += ((UB4)p[8]+((UB4)p[9]<<8)+((UB4)p[10]<<16)+((UB4)p[11]<<2 +4)); MIX(a, b, c); p += 12; len -= 12; } c += length; switch(len) { case 11: c+=((UB4)p[10]<<24); case 10: c+=((UB4)p[9]<<16); case 9: c+=((UB4)p[8]<<8); case 8: b+=((UB4)p[7]<<24); case 7: b+=((UB4)p[6]<<16); case 6: b+=((UB4)p[5]<<8); case 5: b+=((UB4)p[4]); case 4: a+=((UB4)p[3]<<24); case 3: a+=((UB4)p[2]<<16); case 2: a+=((UB4)p[1]<<8); case 1: a+=((UB4)p[0]); } MIX(a, b, c); return(c); }

cheers

tachyon

Replies are listed 'Best First'.
Re^2: OT: Google PR, PHP -> PERL ?
by 2ge (Scribe) on Aug 28, 2004 at 23:56 UTC
    Hello Tachyon!

    Thank you very much for helping, ofcourse everything works, when I install my new linux. Working under win system is really terrible, I can't even install Jhash properly. Good job, thanks again!