I found the Roman Numeral codes in Unicode rather interesting, so I played with this. It handles all the fancy letters, multiple forms, and numbers greater than 10000.
And here is a test file to call it:=head1 DESCRIPTION See http://www2.inetdirect.net/~charta/Roman_numerals.html for primary reference material on Roman numerals. =over 4 choice of characters input may be case insensitive, *all* insensitive, mixed forms, etc. ( +suppliments?) may use precomposed Unicode number forms for IV etc. choice of IV or IIII forms of 4's on output non-strict input takes IC, IIX, etc. use overbars or apostrophic forms for 10000 and greater powers. choice of I-with-2 or M-with-1 forms. =back =cut use charnames ':full'; our %ivxlcdm= (i=>1, v=>2, x=>3, l=>4, c=>5, d=>6, m=>7, '('=>8, mid=> +9, ')'=>10); my $overbar= "\N{COMBINING OVERLINE}"; #or should it use macron? package Roman2::parse; use Carp; use strict; use utf8; use warnings; use fields qw (suppliments apposmatch_RE translation); use vars qw (@parse_all %translation); @parse_all= (\%ivxlcdm, # all known/allowed forms for parsing. [ 'i', 'I', "\N{ROMAN NUMERAL ONE}", "\N{SMALL ROMAN NUMERAL ONE}" +], [ 'v', 'V', "\N{ROMAN NUMERAL FIVE}", "\N{SMALL ROMAN NUMERAL FIVE} +" ], [ 'x', 'X', "\N{ROMAN NUMERAL TEN}", "\N{SMALL ROMAN NUMERAL TEN}" +], [ 'l', 'L', "\N{ROMAN NUMERAL FIFTY}", "\N{SMALL ROMAN NUMERAL FIFT +Y}" ], [ 'c', 'C', "\N{ROMAN NUMERAL ONE HUNDRED}", "\N{SMALL ROMAN NUMERA +L ONE HUNDRED}" ], [ 'd', 'D', "\N{ROMAN NUMERAL FIVE HUNDRED}", "\N{SMALL ROMAN NUMER +AL FIVE HUNDRED}" ], [ 'm', 'M', "\N{ROMAN NUMERAL ONE THOUSAND}", "\N{SMALL ROMAN NUMER +AL ONE THOUSAND}", "\N{GREEK CAPITAL LETTER PHI}", "\N{GREEK SMALL LETTER PHI}", +"\N{LATIN SMALL LETTER PHI}"], [ '(', 'C', 'c' ], [ 'i', 'I', "\N{ROMAN NUMERAL ONE}", "\N{SMALL ROMAN NUMERAL ONE}", + "\N{GREEK CAPITAL LETTER PHI}", "\N{GREEK SMALL LETTER PHI}", "\N{L +ATIN SMALL LETTER PHI}"], [ ')', "\N{LATIN CAPITAL LETTER OPEN O}", "\N{LATIN SMALL LETTER OP +EN O}", "\N{ROMAN NUMERAL REVERSED ONE HUNDRED}"] ); sub populate_translation (@$) { my ($list, $val)= @_; foreach (@$list) { $translation{$_} = $val; } } { my $parse_all= \@parse_all; #pseudo-hash symtax only works on refe +rences. populate_translation ($parse_all->{i}, 1); populate_translation ($parse_all->{v}, 5); populate_translation ($parse_all->{x}, 10); populate_translation ($parse_all->{l}, 50); populate_translation ($parse_all->{c}, 100); populate_translation ($parse_all->{d}, 500); populate_translation ($parse_all->{m}, 1000); } sub new { my $class= shift; my $self; { no strict 'refs'; $self = bless [\%{"$class\::FIELDS"}], $class; } $self->{suppliments}= \@parse_all; #various forms accepted for inpu +t $self->{translation}= \%translation; #the values of the various item +s. return $self; } sub get_apposmatch_RE() { my $self= shift; # cache the regexp, reuse until suppliments set changes. unless (exists $self->{apposmatch_RE}) { # the form is (open+)(i|m)(close+). my @list= @{$self->{suppliments}{'('}}; my $open= join ('|', map { "\Q$_\E" }(@list)); @list= @{$self->{suppliments}{mid}}; my $mi= join ('|', map { "\Q$_\E" }(@list)); @list= @{$self->{suppliments}{')'}}; my $close= join ('|', map { "\Q$_\E" }(@list)); my $total= "((?:$open)+)($mi)((?:$close)+)"; $self->{apposmatch_RE}= qr{$total}; } return $self->{apposmatch_RE}; } sub _process_appos { my ($self, $open, $mid, $close)= @_; print "found open:$open, mid:$mid, close:$close\n"; my $count= length($open); croak "\"$open$mid$close\" is an unballanced appostrophic form." if +$count != length($close); my $midval= $self->{translation}{$mid}; if ($midval == 1) { # I with n parens becomes M with n-1 overbars. --$count; } elsif ($midval == 1000) { # Phi with n parens becomes M with n overbars. } else { die "Invalid mid-symbol $mid. Stopped "; # die, not croak, because this is an internal error. The translat +ion table doesn't match the suppliments table. } return 'M' . $overbar x $count; } sub appos2over { # I with n parens becomes M with n-1 overbars. # Phi with n parens becomes M with n overbars. my $self= shift; # modify value parameter in-place, so caller sees change. # >> ........... # (open)(i)(close) replaced with function to check $1 and $3 for equa +l length. my $re= $self->get_apposmatch_RE(); $_[0] =~ s/$re/$self->_process_appos($1,$2,$3)/eg; } sub remove_composite { # =COMMENT remove due to bug in Perl ? my $self= shift; $_[0] =~ s/\x{2161}|\x{2171}/II/g; $_[0] =~ s/\x{2162}|\x{2172}/III/g; $_[0] =~ s/\x{2163}|\x{2173}/IV/g; $_[0] =~ s/\x{2165}|\x{2175}/VI/g; $_[0] =~ s/\x{2166}|\x{2176}/VII/g; $_[0] =~ s/\x{2167}|\x{2177}/VIII/g; $_[0] =~ s/\x{2168}|\x{2178}/IX/g; $_[0] =~ s/\x{216a}|\x{217a}/XI/g; $_[0] =~ s/\x{216b}|\x{217b}/XII/g; # =cut } sub _make_list { my ($self, $value)= @_; # This function converts the string of characters into a list of numb +ers, one number for each character. # Each number is the value of the corresponding roman letter. ## while ($value =~ / # make sure that an overbar only follows legal characters. my @result; while ($value =~ /(.)/g) { if ($1 eq $overbar) { $result[-1] *= 10 } else { my $val= $self->{translation}{$1}; croak "Unknown Roman Numeral character \"$1\"" unless $val; push @result, $val; } } return @result; } sub fromRoman { my ($self, $value)= @_; # replace appos. forms with overbars. (look for i and phi) $self->appos2over ($value); # replace composite forms. Need to do this early so inversions work +as visually appear. $self->remove_composite ($value); # create a list of values my @values= $self->_make_list ($value); print join (',', @values), "\n"; # finally, process that list instead of the string. # search for inversions -- smaller preceeding larger. Make those ne +gative. # add the numbers. } ####################################################### package Roman2; use utf8; use warnings; use strict; use fields qw (primary use_precomposed); use vars qw (%known_sets); use Carp; %known_sets= ( ASCII=> [\%ivxlcdm, qw [ I V X L C D M ( I ) ] ], ascii=> [\%ivxlcdm, qw [ i v x l c d m ( I ) ] ], Phi=> [\%ivxlcdm, qw [ I V X L C D ], "\N{GREEK CAPITAL LETTER PHI} +", '(', "-\N{GREEK CAPITAL LETTER PHI}", ')' ], ROMAN=> [\%ivxlcdm, "\N{ROMAN NUMERAL ONE}", "\N{ROMAN NUMERAL FIVE +}", "\N{ROMAN NUMERAL TEN}", "\N{ROMAN NUMERAL FIFTY}", "\N{ROMAN NU +MERAL ONE HUNDRED}", "\N{ROMAN NUMERAL FIVE HUNDRED}", "\N{ROMAN NUME +RAL ONE THOUSAND}", '(', "-\N{GREEK CAPITAL LETTER PHI}", ')' ], ); sub new { my $class= shift; my $self; { no strict 'refs'; $self = bless [\%{"$class\::FIELDS"}], $class; } #start with the default arrangement. $self->{use_precomposed}= undef; #if true, uses U+2161 etc. # parameters, if any, override that. my $primary_param= shift || 'ASCII'; $self->set($primary_param); return $self; } =head2 set Sets the letters used for specific number positions. Usage: r->set ('ASCII'); #single string sets a known scheme. r->set (M=>'(I)', X=>'*'); #pairs specify specific values =cut sub set { my $self= shift; my $argcount= scalar @_; if ($argcount == 1) { # it names a known scheme eval { my @set= @{ $known_sets{$_[0]} }; $self->{primary}= \@set; } or croak "Unknown set '$_[0]'"; } elsif (($argcount&1) != 0) { # odd number is error croak "Invalid number of arguments: must be 1 or even."; } else { # pairs while (@_) { my $key= lc shift; my $value= shift; eval { $self->{primary}{$key}= $value; } or croak "Invalid argument '$key'"; } } } my @digitmap1= ('' , qw (i ii iii iv v vi vii viii ix)); my @digitmap2= ('' , qw (i ii iii iiii iiiii iiiiii iiiiiii iiiiiiii i +x)); sub over2apos ($$$$) { # M with n overbars becomes I with n+1 parens. # no way to specify Phi with n parens instead. Post-process with a s +imple s/// if you need that. my ($input, $L, $R, $I)= @_; # my $letter= substr ($input, 0, 1); # first letter -- change prefix +to infix my $count= length($input); --$count if $I =~ s/^-//; #one fewer set of parens if marked with a + dash. return ($L x $count) . $I . ($R x $count); } sub toRoman { my ($self, $value)= @_; die "value must be a non-negative integer" if $value < 0 || int($value) != $value; my $result= ''; #start with an empty string my @digitcodes= @{$self->{primary}}; shift @digitcodes; #get rid of pseudohash index. for (1..3) { # units, tens, hundreds all the same logic my ($one, $five)= splice (@digitcodes, 0, 2); #shift off two item +s my $ten= $digitcodes[0]; #look at the third too, but don't remove + it. my $digit= $value % 10; #least sig. digit $value /= 10; #removed. my $s= $digitmap1[$digit]; #don't use tr because a code may contain multiple letters. $s =~ s/i/$one/g; $s =~ s/v/$five/g; $s =~ s/x/$ten/g; $result= $s . $result; } if ($value) { # handle higher-order digits: 1000's and up my $one= shift @digitcodes; my $ten= "$one$overbar"; while ($value) { my $digit= $value % 10; #least sig. digit $value /= 10; #removed. my $s= $digitmap2[$digit]; $s =~ s/i/$one/g; $s =~ s/x/$ten/g; $result= $s . $result; # promote everything for the next pass $one= $ten; $ten .= $overbar; } } # use apostrophic forms, if chars supplied. my ($Lapostrophus, $mid, $Rapostrophus)= splice (@digitcodes, 0, 3); if (defined $Lapostrophus && defined $Rapostrophus) { $result =~ s/$self->{primary}{m}$overbar+/over2apos($&,$Lapostroph +us,$Rapostrophus,$mid)/eg; } return $result; } ########################################################## 1; # file loaded OK
use Roman2; use warnings; use strict; sub test { my ($r, $value)= @_; my $result= $r->toRoman ($value); print "$value produces $result\n"; } sub testset1 ($) { my $r1= shift; foreach (1..12) { test ($r1, $_); } test ($r1, 123); test ($r1, 2000); test ($r1, 1999); test ($r1, 4567); test ($r1, 9876); test ($r1, 1000001); test ($r1, 90001); } sub test1 { print "run with ASCII\n"; my $r1= new Roman2:: ; testset1 ($r1); print "change X to *\n"; $r1->set (X=>'*'); testset1 ($r1); print "change to lower-case\n"; $r1->set ('ascii'); testset1 ($r1); print "change to Phi usage\n"; $r1->set ("Phi"); testset1 ($r1); print "change to Unicode ROMAN digits\n"; $r1->set ("ROMAN"); testset1 ($r1); } sub test2 { my $parser= new Roman2::parse:: ; $parser->fromRoman ('((I))(I)Mxvi'); $parser->fromRoman ('xvii'); } #main program test1; test2;
In reply to Roman Numerals by John M. Dlugosz
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |