And lastly, since it's speed we're after, a C implementation can't hurt. This benchmarks orders of magnitude faster than anything seen yet. (Update: fixed memory leak)
use Inline C => <<'__EOC__'; SV *fast_c (char *original, char *chopped) { int counts[256] = {0}; /* each potential character */ int ptr = 0; int buffer_size = 0; char *ret = NULL; int ret_ptr = 0; int error = 0; SV *retsv = &PL_sv_undef; while (original[ptr] != '\0') { counts[original[ptr++]]++; buffer_size++; } ptr = 0; while (!error && chopped[ptr] != '\0') { counts[chopped[ptr]]--; buffer_size--; if (counts[chopped[ptr++]] < 0) { error++; } } if (!error) { ret = malloc(buffer_size + 1); for (ptr = 0; ptr <= 255; ptr++) { while (counts[ptr]-- > 0) { ret[ret_ptr++] = ptr; } } ret[ret_ptr] = '\0'; retsv = newSVpvn(ret, strlen(ret)); free(ret); } return(retsv); } __EOC__
And then here's a C implementation of demerphq's "scanning" method, which doesn't rely on counting up letters. This one's even faster:
use Inline C => <<'__EOC__'; SV *scan_c (char *from, char *to) { int f = 0; int t = 0; int from_len = strlen(from); int to_len = strlen(to); int ret_ptr = 0; unsigned char fc, tc; int error = 0; SV *retsv; char *ret; if (!from_len || !to_len) return(&PL_sv_undef); ret = malloc(from_len > to_len ? from_len+1 : to_len+1); while(!error) { fc = from[f]; tc = to[t]; if (fc == tc) { f++; t++; if (to[t] && (to[t] != tc)) { while (from[f] == fc) { f++; ret[ret_ptr++] = fc; } } if (t == to_len) error = 1; } else if (!fc || (fc < tc)) { ret[ret_ptr++] = fc; f++; if (f >= from_len) error = 2; } else { error = 2; } } if (error < 2) { while(f <= from_len) { ret[ret_ptr++] = from[f++]; } retsv = newSVpvn(ret, strlen(ret)); } else { retsv = &PL_sv_undef; } free(ret); return retsv; } __EOC__

In reply to Re: Difference Of Two Strings (in C) by Fastolfe
in thread Difference Of Two Strings by YuckFoo

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post, it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.