#!/usr/bin/perl use strict; use warnings; use utf8; binmode(STDOUT, ":utf8"); use open ':encoding(utf8)'; # input/output default encoding will be # UTF-8 my $infile; # reads input file into string $infile open INPUT, "; close INPUT; my $k; # parameter =1 between < > , =2 between > < my $string; # "" my $txtstring = ''; # >"text between"< my $outcode = ''; # output: code and converted text together my $for_conv; # string to be converted by sub my $char; # chatacter from input file my $convert; # converted string by sub # splits input file into characters foreach $char (split//, $infile) { if ($char eq "<") { $k = 1; } if ($k ==2) { $txtstring= $txtstring . $char; } else { $string = $string .$char; } if ($char eq ">") { if (substr($txtstring, 0, 1) eq "&" ){ #   will not be converted $string =$txtstring.$string; #goes to string code $txtstring = ''; ## } $for_conv = $txtstring; $convert = konverter($for_conv); $outcode = $outcode .$convert.$string; $k = 2; $string = ''; $txtstring = ''; } # of if char eq ">" } # of foreach # writing to file my $filename = "index_cyrilic.htm"; open(FH, '>', $filename) or die $!; print FH $outcode ; close(FH); print "\n"; print "code on the output:\n"; print "\n"; print "$outcode\n"; # converting string into Cyrillic sub konverter { # dictionary my %dict = ( "b"=> "б","B"=> "Б","c"=> "ц","C"=> "Ц","č"=> "ч","Č"=> "Ч","ć"=> "ћ","Ć"=> "Ћ","d"=> "д","D"=> "Д","đ"=> "ђ","Đ"=> "Ђ","f"=> "ф","F"=> "Ф","g"=> "г","G"=> "Г","h"=> "х","H"=> "Х","i"=> "и","I"=> "И","l"=> "л","L"=> "Л","m"=> "м","n"=> "н","N"=> "Н","p"=> "п","P" => "П","r" => "р","R" => "Р","s"=> "с","S"=> "С","š"=> "ш","Š"=> "Ш","t"=> "т","u"=> "у","U"=> "У","v"=> "в","V" => "В","z"=> "з", "Z" => "З","ž"=> "ж","Ž"=> "Ж"); my @conv_arr = split (//, $for_conv); # splits input string for conversion my $ind = 0; # index of array element my $out = ""; # output, converted string my $str_char; # string character my $next; # next string character my $nj; # Latin two character letters to be replaced with one Cyrillic my $Nj; my $lj; my $Lj; my $dz; my $Dz; while ($ind <= $#conv_arr){ $str_char = $conv_arr[$ind]; # current character if ($ind ==$#conv_arr) { $next =""; # there are no more characters } else { $next =$conv_arr[$ind+1]; # next character } if (exists ($dict{$str_char})) { # combination nj gives $nj = "њ" if (($str_char eq "n") && ($next eq "j")){ $nj = "њ"; $out = $out.$nj; $ind = $ind+1; } elsif (($str_char eq "N") && ($next eq "j")){ $Nj = "Њ"; $out = $out.$Nj; $ind = $ind+1; } elsif (($str_char eq "l") && ($next eq "j")){ $lj = "љ"; $out = $out.$lj; $ind = $ind+1; } elsif (($str_char eq "L") && ($next eq "j")){ $Lj = "Љ"; $out = $out.$Lj; $ind = $ind+1; } elsif (($str_char eq "d") && ($next eq "ž")){ $dz = "џ"; $out = $out.$dz; $ind = $ind+1; } elsif (($str_char eq "D") && ($next eq "ž")){ $Dz = "Џ"; $out = $out.$Dz; $ind = $ind+1; } else { # one character letters $out = $out.$dict{$str_char}; } $ind++; } # of if exists else { $out = $out.$str_char; $ind++; } } # of while return $out; } # of sub