#! perl -w use utf8; my $set = ''; my @set; for(my $ord = 1; $ord < 64*1024; $ord++) { eval { # not every "character" composed this way is valid Unicode no warnings; if(chr($ord) =~ /[גדהוāăąבא]/) { push @set, $ord; vec($set, $ord, 1) = 1; } } } # "toggle list" AKA inversion list my $toggle = 0; my @list; foreach my $window (0 .. length($set)) { next if vec($set, $window, 8) == $toggle; for my $i (8*$window .. 8*$window+7) { next if (vec($set, $i, 1) == ($toggle ? 1 : 0)); push @list, $i; $toggle = $toggle ? 0 : 255; } } # back to charclass, for Javascript my $class = ''; for(my $i = 0; $i < @list; $i+=2) { $class .= sprintf '\\u%04x', $list[$i]; next if $list[$i+1] == $list[$i]+1; $class .= sprintf '-\\u%04x', $list[$i+1]-1; } print "/[$class]/\n";