use strict; use warnings; $_ = 'This,is, an, example. Keep $2.50, 1,500, and 192.168.1.1.'; my @words = tokenize($_); print join "|",@words; ###################################### sub tokenize ###################################### { my $msg = shift; my $ntd = qr/(?<=\D)[,.]/; my $dtn = qr/[,.](?=\D|$)/; my $nv = qr/[^A-Za-z0-9\'\$!-.,]+/; my %words; my @words = grep { !/^$/ and !$words{lc($_)}++} split /$ntd|$dtn|$nv/,$msg; return @words; } ##tokenize