#!/usr/bin/perl -w # #use re 'debug'; use strict; use Benchmark qw(cmpthese); my $i=0; my @data=( "Abc 21223.7 21225.33 22270.3 22280.1", "Def 21600.23 24567.43" ); sub yourRE { my @fields; foreach(@data) { @fields=($_=~m/^(\w+)\s+([\d\.]+)\s+([\d\.]+)(?:\s+([\d\.]+)\s+([\d\.]+))?/); } } sub hisSplit { my @fields; foreach(@data) { @fields=split; } } sub regex_g { my @fields; foreach(@data) { @fields=($_=~m/(\w+|[0-9.]+)/g); } } sub regex_g2 { my $name; my @digits; foreach(@data) { $name=($_=~m/(\w+)/); @digits=($_=~m/([0-9.]+)/g); } } my $num_rex=qr/(-?(?:\d+(?:\.\d*)?|\.\d+))/; # modified from: perldoc -q scalar # is a number sub demsRE { my @fields; foreach(@data) { @fields=($_=~/^\s* (\w+) \s+ $num_rex \s+ $num_rex (?: \s+ $num_rex \s+ $num_rex )? \s*$/x); } } # /o helps a lot sub demsRE_o { my @fields; foreach(@data) { @fields=($_=~/^\s* (\w+) \s+ $num_rex \s+ $num_rex (?: \s+ $num_rex \s+ $num_rex )? \s*$/xo); } } my $nc_num_rex=qr/(?:\d+(?:\.\d*)?|\.\d+)/; # modified from: perldoc -q # scalar is a number # let's see if it's the captures in dems's approach that slow things down? # turns out it isn't sub demsChkSplt { my @fields; foreach(@data) { if(/^\s* (\w+) \s+ $nc_num_rex \s+ $nc_num_rex (?: \s+ $nc_num_rex \s+ $nc_num_rex )? \s*$/x) { @fields=split; } } } # /o helps a lot sub demsChkSplt_o { my @fields; foreach(@data) { if(/^\s* (\w+) \s+ $nc_num_rex \s+ $nc_num_rex (?: \s+ $nc_num_rex \s+ $nc_num_rex )? \s*$/ox) { @fields=split; } } } cmpthese(-3, { yourRE => \&yourRE, hisSplit => \&hisSplit, regex_g => \®ex_g, regex_g2 => \®ex_g2, demsRE => \&demsRE, demsChkSplt => \&demsChkSplt, demsRE_o => \&demsRE_o, demsChkSplt_o => \&demsChkSplt_o, } );