use strict; use warnings; $_ = 'This,is, an, example. Keep $2.50, 1,500, and 192.168.1.1.'; my @words = tokenize($_); print join "|",@words; ###################################### sub tokenize ###################################### { my $msg = shift; my $ntd = qr/(?<=\D)[,.]/; my $dtn = qr/[,.](?=\D|$)/; my $nv = qr/[^A-Za-z0-9\'\$!-.,]+/; my %words; my @words = grep { !/^$/ and !$words{lc($_)}++} split /$ntd|$dtn|$nv/,$msg; return @words; } ##tokenize #### ############################### sub token_2 ############################### { my $msg = $string; my $ntd = qr/(?<=\D)[,.]|[,.](?=\D|$)|[^\w'\$!,.-]+/; our %words; @words{(split /$ntd/,$msg)} = (); return keys %words; } ## token2 #### ################################ sub take_3 ################################ { my $msg = $string; my %words; @words{$string =~ m/( (?: (?: [\w'\$!-]| (?<=\d)[.,](?=\d) ) )+ )/gx}=(); return keys %words; } ##take_3