#!env perl # # ex_pattern_matching.pl # # Check speed of capturing, case sensitivity, etc. # use strict; use warnings; use Benchmark 'cmpthese'; sub gen_data { my ($len, @alphabet) = @_; my $rv = ""; $rv .= $alphabet[@alphabet * rand] while $len > length $rv; return $rv; } sub x_cap_i { my ($str, $cnt) = @_; ++$cnt while $str =~ /(AA)/gi; return $cnt; } sub x_cap { my ($str, $cnt) = @_; ++$cnt while $str =~ /(AA)/g; return $cnt; } sub x_i { my ($str, $cnt) = @_; ++$cnt while $str =~ /AA/gi; return $cnt; } sub x { my ($str, $cnt) = @_; ++$cnt while $str =~ /AA/g; return $cnt; } sub ovr_cap_i { my ($str, $cnt) = @_; ++$cnt while $str =~ /(?=(AA))/gi; return $cnt; } sub ovr_cap { my ($str, $cnt) = @_; ++$cnt while $str =~ /(?=(AA))/g; return $cnt; } sub ovr_i { my ($str, $cnt) = @_; ++$cnt while $str =~ /(?=AA)/gi; return $cnt; } sub ovr { my ($str, $cnt) = @_; ++$cnt while $str =~ /(?=AA)/g; return $cnt; } sub idx { my ($str, $cnt) = @_; my $pos = 0; $cnt++ while $pos = 1 + index $str, 'AA', $pos; return $cnt; } sub poz { my ($str, $cnt) = @_; $cnt++, pos($str)=pos($str)-1 while $str =~ /AA/g; return $cnt; } print "<", gen_data(25, A=>C=>T=>'G'), ">\n"; my $long = gen_data(150_000, A=>C=>T=>'G'); print "x_cap_i: ", x_cap_i($long), " ", "x_cap: ", x_cap($long), " ", "x_i: ", x_i($long), " ", "x: ", x($long), "\n"; print "poz: ", poz($long), " ", "idx: ", idx($long), " ", "ovr_cap_i: ", ovr_cap_i($long), " ", "ovr_cap: ", ovr_cap($long), " ", "ovr_i: ", ovr_i($long), " ", "ovr: ", ovr($long), "\n"; $long = gen_data(1_500_000, A=>C=>T=>'G'); cmpthese(100, { "x_cap_i" => sub { return x_cap_i($long, 0) }, "x_cap" => sub { return x_cap($long, 0) }, "x_i" => sub { return x_i($long, 0) }, "x" => sub { return x($long, 0) }, "idx" => sub { return idx($long, 0) }, "ovr_cap_i" => sub { return ovr_cap_i($long, 0) }, "ovr_cap" => sub { return ovr_cap($long, 0) }, "ovr_i" => sub { return ovr_i($long, 0) }, "ovr" => sub { return ovr($long, 0) }, "poz" => sub { return poz($long, 0) }, });