use constant RE_WORDS => qr[(?:[^\w\'\$!,.-]|(?:(?<=\D)[.,])|(?:[.,](?=\D|$)))+]; sub tokenize_msg_w_oneregex { my %words; @words{ split RE_WORDS, shift } = (); return keys %words; }