use Modern::Perl; use utf8; while () { my @words = ($_ =~ m/[a-zA-Z0-9]+\s/g); say 'Regex : ', join '*', @words; @words = ($_ =~ m/[a-zA-Z0-9]+\s*/g); say 'Regex *: ', join '*', @words; @words = split /[^\pL\pN]+/; # split on non letters + numbers say 'split: ', join '*', @words; } __DATA__ This is a simple sentence. This one has punctuation, indeed it has! And multiple spaces all over the place ! And nön-ascii chàraçtérs, wôw! What about l33t sp33ch 4 u?