in reply to Orthography Translation using Regex
#!/usr/local/bin/perl # encodingConverter.pl # Duane L. Blanchard # http://students.washington.edu/blanch/downloads/ # blanch@iname.com use strict; use warnings; #use utf8; use charnames ':full'; #hash tables for each encoding must be at the top #Hash table Keys: Cyrillic Chars, Values: Unicode Char Names my %name = ( # Lowercase "à" => "\N{CYRILLIC SMALL LETTER A}", "á" => "\N{CYRILLIC SMALL LETTER BE}", "â" => "\N{CYRILLIC SMALL LETTER VE}", "ã" => "\N{CYRILLIC SMALL LETTER GHE}", # Uppercase "A" => "\N{CYRILLIC CAPITAL LETTER A}", "Á" => "\N{CYRILLIC CAPITAL LETTER BE}", "Â" => "\N{CYRILLIC CAPITAL LETTER VE}", "Ã" => "\N{CYRILLIC CAPITAL LETTER GHE}", ); # Open the input file my $inFile; until(open(OUTFILE, ">outFile.txt")) { print("\n$inFile could not be found."); } print("What file would you like to convert? \n"); $inFile = <stdin>; #query user for input file chomp $inFile; until(open(inFile, "$inFile")) { print("\n$inFile could not be found.", " Please provide the absolute path. \n"); $inFile = <stdin>; } while (<inFile>) { my $line = $_; # $_ is a line of text my @array = split ("", $line); # $_ is now a character for (@array) { if (exists $name{$_}) # check the hash for $_ { print OUTFILE $name{$_}; # print the Unicode value of $_ } else { print OUTFILE "$_"; # preserves English } } } close OUTFILE; print "\nYour converted text is in:\n", ">> outFile.txt.\n\n";
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: Re: Orthography Translation using Regex
by graff (Chancellor) on Mar 01, 2004 at 09:15 UTC | |
|
Re: Re: Orthography Translation using Regex
by Anonymous Monk on Mar 01, 2004 at 06:25 UTC |