$regex = '\w+|\d+|\s+|.*?'; $text = 'The world is foo 2!'; while ($text=~s/^$regex//) { print "token: $1\n"; }