#require 5.6; use strict; use warnings; use utf8; my %chars; my %descids; while (<>) { while ( /[^\x{1}-\x{7f}]/g) { ++$chars{$&}; } } foreach my $char (keys %chars){ print "$char found $chars{$char} times\n"; } print "found ". keys(%chars) . " distinct non-ascii chars\n"; #### while(<>){ while (my @matches = /[^\x{1}-\x{7f}]/g){ $conid = /patten-to-find-this-column/; $hash_of_lists{$conid} =[@matches]; # linking this with inner hash of found characters is fuzzy but near... ++$chars{$&}; } }