Crayman has asked for the wisdom of the Perl Monks concerning the following question:
--snip-- Benchmark: timing 300 iterations of c_strncmp, c_strncmp_M, c_strstr, +c_strstr_M, m_array, m_array_M, m_for, m_for_M, m_while, m_while_M, tr... c_strncmp: 7 wallclock secs ( 7.44 usr + 0.00 sys = 7.44 CPU) c_strncmp_M: 8 wallclock secs ( 7.31 usr + 0.00 sys = 7.31 CPU) c_strstr: 1 wallclock secs ( 1.36 usr + 0.00 sys = 1.36 CPU) c_strstr_M: 1 wallclock secs ( 1.18 usr + 0.00 sys = 1.18 CPU) m_array: 14 wallclock secs (12.25 usr + 0.00 sys = 12.25 CPU) m_array_M: 5 wallclock secs ( 4.91 usr + 0.00 sys = 4.91 CPU) m_for: 12 wallclock secs (11.60 usr + 0.00 sys = 11.60 CPU) m_for_M: 5 wallclock secs ( 4.91 usr + 0.00 sys = 4.91 CPU) m_while: 8 wallclock secs ( 8.41 usr + 0.00 sys = 8.41 CPU) m_while_M: 4 wallclock secs ( 3.94 usr + 0.00 sys = 3.94 CPU) tr: 2 wallclock secs ( 1.31 usr + 0.00 sys = 1.31 CPU) === check counts === _m_while_M 2000 _c_strncmp 6000 _m_for 6000 _m_array 6000 _c_strstr 6000 _m_while 6000 _c_strncmp_M 2000 _tr 6000 _m_for_M 2000 _m_array_M 2000 _c_strstr_M 2000 -- snip -- here is the benchmark code - notice the use of Inline.pm to get some more speeeed by using C. can anyone think of a faster way to count one substring in another??? -- snip --
#!/usr/bin/perl -w use Benchmark('timethese'); use Inline C; my $tststr="abcdefghijklmnopqrstuvwxyz01234567890\n"; my $dta=($tststr x 2000).( ($tststr."\n") x 1900).("\n" x 200); my %cnt=(); my $_tr = sub { my $cnt=$dta=~tr/\n//; $cnt{_tr}=$cnt; }; my $_m_while = sub { my $cnt=0; my $rcdsep="\n"; while ($dta=~m/$rcdsep/g) {$cnt++}; $cnt{_m_while}=$cnt; }; my $_m_for = sub { my $cnt=0; my $rcdsep="\n"; $cnt++ for($dta=~m/$rcdsep/g); $cnt{_m_for}=$cnt; }; my $_m_array = sub { my $cnt=0; my $rcdsep="\n"; $cnt=scalar(@{[$dta=~m/$rcdsep/g]}); $cnt{_m_array}=$cnt; }; my $_m_while_M = sub { my $cnt=0; my $rcdsep="\n\n"; while ($dta=~m/$rcdsep/g) {$cnt++}; $cnt{_m_while_M}=$cnt; }; my $_m_for_M = sub { my $cnt=0; my $rcdsep="\n\n"; $cnt++ for($dta=~m/$rcdsep/g); $cnt{_m_for_M}=$cnt; }; my $_m_array_M = sub { my $cnt=0; my $rcdsep="\n\n"; $cnt=scalar(@{[$dta=~m/$rcdsep/g]}); $cnt{_m_array_M}=$cnt; }; my $_c_strncmp = sub { my $rcdsep="\n"; my $cnt=c_strncmp($dta,$rcdsep); $cnt{_c_strncmp}=$cnt; }; my $_c_strncmp_M = sub { my $rcdsep="\n\n"; my $cnt=c_strncmp($dta,$rcdsep); $cnt{_c_strncmp_M}=$cnt; }; my $_c_strstr = sub { my $rcdsep="\n"; my $cnt=c_strstr($dta,$rcdsep); $cnt{_c_strstr}=$cnt; }; my $_c_strstr_M = sub { my $rcdsep="\n\n"; my $cnt=c_strstr($dta,$rcdsep); $cnt{_c_strstr_M}=$cnt; }; $_tr->(); # seed it?? timethese(300, { tr => $_tr, m_while => $_m_while, m_for => $_m_for, + m_array => $_m_array, m_while_M => $_m_while_M, m_for_M => $_m_for_M, + m_array_M => $_m_array_M, c_strncmp => $_c_strncmp, c_strncmp_M => $_c_strncmp +_M, c_strstr => $_c_strstr, c_strstr_M => $_c_strstr_ +M } ); print "\n=== check counts ===\n"; printf("%18s %5d\n",$_,$cnt{$_}) for (keys %cnt); __END__ __C__ #include <string.h> int c_strncmp(char * buf, char * sep){ int cnt = 0; int seplen = strlen(sep); for(; *buf ; buf++) if(!strncmp(buf,sep,seplen)){ cnt++; buf+=seplen-1; } return cnt; } int c_strstr(char * buf, char * sep){ int cnt = 0; int seplen = strlen(sep); for(; *buf; buf+=seplen, cnt++) buf=strstr(buf,sep); return cnt; } -- snip --
thanks in advance for the input!! ___cliff rayman___cliff_AT_rayman.com___
Edit 2001-03-16 by tye to add <READMORE>
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: quickly counting substrings
by jeroenes (Priest) on Mar 02, 2001 at 15:18 UTC | |
by danger (Priest) on Mar 02, 2001 at 15:44 UTC | |
|
Re: quickly counting substrings
by petral (Curate) on Mar 03, 2001 at 01:25 UTC | |
by Crayman (Acolyte) on Mar 09, 2001 at 12:15 UTC |