# a simple tokenizer $ perl -e 'print join("|",split(/(\W)/,"Hello, World! ?")),"\n";' Hello|,|| |World|!|| ||? # get rid of the empty string tokens $ perl -e 'print join("|",grep {$_}split(/(\W)/,"Hello, World! ?")),"\n";' Hello|,| |World|!| |? # or use plain regex $ perl -e 'print join("|","Hello, World! ?"=~/(\W|\w+)/g),"\n";' Hello|,| |World|!| |?