$regex = '(\w+)|(\d+)|(\s+)|([^\w\d\s]+)'; $text = 'The world is foo 2!'; while ( $text=~s/^$regex// ) { print '\w+',$/ if defined $1; print '\d+',$/ if defined $2; print '\s+',$/ if defined $3; print '[^\w\d\s]+',$/ if defined $4; }