The following pragma will "fix" \d. However, re::engine::Plugin does not currently support s/// or split //, just matching. (And it doesn't support named captures either.) Still, it may be helpful for some.
use 5.010;
use strict;
use utf8::all;
BEGIN {
package re::engine::SaneDigits;
no thanks;
use constant TAINT => ${^TAINT};
use re::engine::Plugin ();
use Carp;
sub import
{
re::engine::Plugin->import(
comp => \&comp,
exec => \&exec,
);
}
*unimport = \&re::engine::Plugin::unimport;
sub comp
{
my ($rx) = @_;
my $real = $rx->pattern;
$real =~ s{\\d}{[0-9]}g;
$real =~ s{\\D}{[^0-9]}g;
my %mods = my %mod = $rx->mod;
my $mods = join q(), keys %mods;
$real =~ s{/}{\/}g;
$real = eval qq{ qr/$real/$mods };
$rx->stash({ real => $real });
$rx->num_captures(
FETCH => sub {
my ($rx, $paren) = @_;
croak sprintf(
"%s variable not supported with %s",
{ 0 => q($&), -1 => q($'), -2 => q($`) }->{$paren}
+,
__PACKAGE__,
) if $paren < 1;
my $rv = $rx->stash->{last}[$paren];
return $rv unless TAINT;
$rv =~ /(.*)/;
return $1;
},
);
}
sub exec
{
my ($rx, $str) = @_;
my @results = ($str =~ $rx->stash->{real});
unshift @results, scalar pos;
$rx->stash->{last} = \@results;
return not defined $results[0];
}
};
my $str = "foo23 bar5 bar42";
say $str =~ m/bar(\d+)/i ? "GOT $1" : "NO MATCH";
use re::engine::SaneDigits;
say $str =~ m/bar(\d+)/i ? "GOT $1" : "NO MATCH";
Update: Meh... come to think of it, a re::engine is overkill. Constant overloading does the trick much easier...
use 5.010;
use strict;
use utf8::all;
BEGIN {
package re::SaneDigits;
no thanks;
use overload ();
my %_const_handlers = (qr => \&_qr);
my %_remove_handlers = map { $_ => undef } %_const_handlers;
sub import { overload::constant %_const_handlers }
sub unimport { overload::remove_constant %_remove_handlers }
sub _qr
{
for (@_) {
s/\\d/[0-9]/g;
s/\\D/[^0-9]/g;
return $_;
}
}
};
my $str = "foo23 bar5 bar42";
say $str =~ m/bar(\d+)/i ? "GOT $1" : "NO MATCH";
use re::SaneDigits;
say $str =~ m/bar(\d+)/i ? "GOT $1" : "NO MATCH";
Another CPAN candidate I think.
Update II: Looks like PerlMonks might be breaking my UTF8 again. The "5" character which appears in $str should not be a normal ASCII 5, but a fullwidth 5 (U+U+FF15), which is a character used to include an Arabic numeral 5 within CJK text.
perl -E'sub Monkey::do{say$_,for@_,do{($monkey=[caller(0)]->[3])=~s{::}{ }and$monkey}}"Monkey say"->Monkey::do'
|