#!/usr/bin/perl use Encode; $regex_raw = 'много'; $text_raw = 'там очень много в городе, вот этих'; $regex_utf8_d = decode( 'iso-8859-5', $regex_raw ); $text_utf8_d = decode( 'iso-8859-5', $text_raw ); # the "_d" scalars have the utf8 flag ON # perl will treat their values with character semantics $regex_utf8_e = encode( 'utf8', $regex_utf8_d ); $text_utf8_e = encode( 'utf8', $text_utf8_d ); # the "_e" scalars have the utf8 flag OFF # this use of encode is unnecessary and counter-productive; # it causes perl to treat the values with byte semantics @labels = qw/raw-raw dec-dec enc-enc dec-enc enc-dec/; $match{'raw-raw'} = ($text_raw =~ /$regex_raw/); $match{'dec-dec'} = ($text_utf8_d =~ /$regex_utf8_d/); $match{'enc-enc'} = ($text_utf8_e =~ /$regex_utf8_e/); $match{'dec-enc'} = ($text_utf8_d =~ /$regex_utf8_e/); $match{'enc-dec'} = ($text_utf8_e =~ /$regex_utf8_d/); for ( @labels ) { print "$_ : $match{$_}\n"; } __OUTPUT__ raw-raw : 1 dec-dec : 1 enc-enc : 1 dec-enc : enc-dec :