in reply to Re^4: Regex Modification
in thread Regex Modification
All rite I am telling the specification for a match:
Full knowledge of a problem if often the first step toward a solution!
Here's an incomplete solution: incomplete because I feel I should be able to match with strings like
qw(x123-456789123-456x x123456-789123456x x123456789123456x)
and I can't. In addition, the regex I came up with is quite complicated, probably excessively so.
Be that as it may, everything else seems to work as intended. The critical portions are the $diff, $d_min and $ndn regexes in the m1() function. I haven't had time to work on this as I would like, but may do so shortly; it's an interesting problem. Sorry for the delay in getting back to you on this. HTH.
use warnings FATAL => 'all' ; use strict; use Test::More # tests => ?? + 1 # Test::NoWarnings add 1 test 'no_plan' ; use Test::NoWarnings; my @matches = ( '123 456 789', qw(123.456.789 123-456-789 123-456789 123456-789 123-456789123456 123456789123-456 123-4-5-6-7-8-9-1-2-3-456 123-4-5-6-789 1234-56789 123456789-123456 123456789 123456789123456), qw(x123-456-789x x123456-789x x123456789x), qw(x123-456789123-456x x123456-789123456x x123456789123456x), ); my @no_matches = ( # none shall pass! '', qw(a abcdefgh abcdefghi 12345678 1234567891234567 123-45-678 123-4567891234-567 -123456789 123456789- 1-23456789 12345678-9 1-2345678-9 12-3456789 1234567-89 12-34567-89 -1234567891234567 1234567891234567- 1-234567891234567 123456789123456-7 12345678-91234567 12-34567891234567 12345678912345-67 ), ); MATCHER: for my $matcher (qw(m1)) { note "matching with $matcher()"; *match = do { no strict 'refs'; *$matcher; }; note "ALL the following should match"; for my $n (@matches) { my ($m, $d) = match($n); ok $m, qq{match: '$n' (diff $d)}; } note "NONE of the following should match"; for my $n (@no_matches) { my ($m, $d) = match($n); ok ! $m, qq{NO match: '$n' (diff $d)}; } } # end for MATCHER sub m1 { my ($string, ) = @_; my $diff = qr{ [-. ] }xms; # differentiator chars my $d_min = qr{ \d{3,} }xms; # minimum group of digits local our ($digits, $diffs); use re 'eval'; my $ndn = qr{ (?<! \d) (?<! $diff) $d_min ($diff?) (?{ $diffs = length $1; }) (?: \d* \g{-1} (?= \d) (?{ $diffs && ++$diffs }) )* ($d_min) (?! \d) (?(?{ $diffs }) (?! \g{-2}) | (?! $diff)) (?(?{ $digits = $+[2] - $diffs; $diffs > 10 || $digits > 15 || $digits < 9 }) (*FAIL)) }xms; my $match = $string =~ $ndn; my $diff_seq = defined($1) ? qq{'$1'} : 'undef'; return $match, $diff_seq; } # end sub m1()
Output:
c:\@Work\Perl\monks\Anonymous Monk\1027898>perl extract_9-15_digits_2. +pl # matching with m1() # ALL the following should match ok 1 - match: '123 456 789' (diff ' ') ok 2 - match: '123.456.789' (diff '.') ok 3 - match: '123-456-789' (diff '-') ok 4 - match: '123-456789' (diff '-') ok 5 - match: '123456-789' (diff '-') ok 6 - match: '123-456789123456' (diff '-') ok 7 - match: '123456789123-456' (diff '-') ok 8 - match: '123-4-5-6-7-8-9-1-2-3-456' (diff '-') ok 9 - match: '123-4-5-6-789' (diff '-') ok 10 - match: '1234-56789' (diff '-') ok 11 - match: '123456789-123456' (diff '-') ok 12 - match: '123456789' (diff '') ok 13 - match: '123456789123456' (diff '') ok 14 - match: 'x123-456-789x' (diff '-') ok 15 - match: 'x123456-789x' (diff '-') ok 16 - match: 'x123456789x' (diff '') not ok 17 - match: 'x123-456789123-456x' (diff undef) # Failed test 'match: 'x123-456789123-456x' (diff undef)' # at extract_9-15_digits_2.pl line 79. not ok 18 - match: 'x123456-789123456x' (diff undef) # Failed test 'match: 'x123456-789123456x' (diff undef)' # at extract_9-15_digits_2.pl line 79. not ok 19 - match: 'x123456789123456x' (diff undef) # Failed test 'match: 'x123456789123456x' (diff undef)' # at extract_9-15_digits_2.pl line 79. # NONE of the following should match ok 20 - NO match: '' (diff undef) ok 21 - NO match: 'a' (diff undef) ok 22 - NO match: 'abcdefgh' (diff undef) ok 23 - NO match: 'abcdefghi' (diff undef) ok 24 - NO match: '12345678' (diff undef) ok 25 - NO match: '1234567891234567' (diff undef) ok 26 - NO match: '123-45-678' (diff undef) ok 27 - NO match: '123-4567891234-567' (diff undef) ok 28 - NO match: '-123456789' (diff undef) ok 29 - NO match: '123456789-' (diff undef) ok 30 - NO match: '1-23456789' (diff undef) ok 31 - NO match: '12345678-9' (diff undef) ok 32 - NO match: '1-2345678-9' (diff undef) ok 33 - NO match: '12-3456789' (diff undef) ok 34 - NO match: '1234567-89' (diff undef) ok 35 - NO match: '12-34567-89' (diff undef) ok 36 - NO match: '-1234567891234567' (diff undef) ok 37 - NO match: '1234567891234567-' (diff undef) ok 38 - NO match: '1-234567891234567' (diff undef) ok 39 - NO match: '123456789123456-7' (diff undef) ok 40 - NO match: '12345678-91234567' (diff undef) ok 41 - NO match: '12-34567891234567' (diff undef) ok 42 - NO match: '12345678912345-67' (diff undef) ok 43 - no warnings 1..43 # Looks like you failed 3 tests of 43.
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^6: Regex Modification
by Anonymous Monk on Apr 12, 2013 at 12:35 UTC | |
by AnomalousMonk (Archbishop) on Apr 13, 2013 at 20:07 UTC | |
by AnomalousMonk (Archbishop) on Apr 14, 2013 at 07:13 UTC | |
by AnomalousMonk (Archbishop) on Apr 16, 2013 at 10:02 UTC |