/[\xC0-\xFF][\x80-\xBF]+/ #### my($utf8, $bare) = (0, 0); use bytes; while(/(?=[\x80-\xFF])(?:[\xC0-\xFF][\x80-\xBF]+|(.))/g) { $bare++ if defined $1; $utf8++ unless defined $1; } print <<"END" utf-8: $utf8 bare: $bare END