I've finally got around to extending to my long-running Perl vs C++ Performance series by timing some Roman to Decimal Rosetta PGA-TRAM code on Ubuntu.
Generating the Test Data
You'll need to install the Roman module from CPAN (or simply copy Roman.pm locally) to generate the test data by running:
with:# gen-roman.pl use strict; use warnings; use Roman; for my $n (1..1000) { for my $i (1..3999) { my $r = int(rand(2)) ? uc(roman($i)) : lc(roman($i)); print "$r\n"; } }
perl gen-roman.pl >t1.txt
which will generate a test file t1.txt containing 3,999,000 Roman Numerals.
Running the Benchmark
With that done, you can run rtoa-pgatram.pl below (derived from Rosetta PGA-TRAM) with:
which produced on my laptop:$ perl rtoa-pgatram.pl t1.txt >pgatram.tmp
rtoa pgatram start read_input_files : 1 secs roman_to_arabic : 7 secs output : 0 secs total : 8 secs
rtoa-pgatram.pl
# rtoa-pgatram.pl # Example run: perl rtoa-pgatram.pl t1.txt >pgatram.tmp # # Convert a "modern" Roman Numeral to its arabic (decimal) equivalent. # The alpabetic input string may be assumed to always contain a valid +Roman Numeral in the range 1-3999. # Roman numerals may be upper or lower case. # Error handling is not required. # For example: # input "XLII" should produce the arabic (decimal) value 42 # input "mi" should produce the arabic (decimal) value 1001 use 5.010; # Needed for state use strict; use warnings; use List::Util qw(reduce); sub read_input_files { my $files = shift; # in: reference to a list of files containin +g Roman Numerals (one per line) my @list_ret; # out: reference to a list of the Roman Numer +als in the files for my $fname ( @{$files} ) { open( my $fh, '<', $fname ) or die "error: open '$fname': $!"; while (<$fh>) { chomp; push @list_ret, uc($_); } close($fh) or die "error: close '$fname': $!"; } return \@list_ret; } # Function roman_to_arabic # Input: reference to a list of valid Roman Numerals in the range 1.. +3999 # Output: reference to a list of their arabic (decimal) values sub roman_to_arabic { my $list_in = shift; # in: reference to a list of valid Roman Nu +merals my @list_ret; # out: a list of their integer values state %rtoa = ( M=>1000, D=>500, C=>100, L=>50, X=>10, V=>5, I=>1 ) +; for (@{$list_in}) { push @list_ret, reduce { $a+$b-$a%$b*2 } map { $rtoa{$_} } split +//, uc($_); } return \@list_ret; } @ARGV or die "usage: $0 file...\n"; my @rtoa_files = @ARGV; warn "rtoa pgatram start\n"; my $tstart1 = time; my $aref1 = read_input_files( \@rtoa_files ); my $tend1 = time; my $taken1 = $tend1 - $tstart1; warn "read_input_files : $taken1 secs\n"; my $tstart2 = time; my $aref2 = roman_to_arabic($aref1); my $tend2 = time; my $taken2 = $tend2 - $tstart2; warn "roman_to_arabic : $taken2 secs\n"; my $tstart3 = time; for my $n ( @{$aref2} ) { print "$n\n" } my $tend3 = time; my $taken3 = $tend3 - $tstart3; my $taken = $taken1 + $taken2 + $taken3; warn "output : $taken3 secs\n"; warn "total : $taken secs\n";
I was relieved that this ran a little faster than rtoa-roman.pl, which is just a copy of rtoa-pgatram.pl above that uses Roman's arabic function instead of rtoa-pgatram.pl's pgatram algorithm; that is with:
above replaced with:push @list_ret, reduce { $a+$b-$a%$b*2 } map { $rtoa{$_} } split//, + uc($_);
use Roman; ... push @list_ret, arabic($_);
$ perl rtoa-roman.pl t1.txt >roman.tmp rtoa roman start read_input_files : 1 secs roman_to_arabic : 11 secs output : 1 secs total : 13 secs $ diff roman.tmp pgatram.tmp
Please feel free to reply with alternative Perl roman_to_arabic subroutines, especially if they are faster. Roman to Arabic subroutines in other languages are also welcome.
C++ Version
// rtoa-pgatram.cpp // Compile with: // g++ -o rtoa-pgatram -std=c++20 -Wall -O3 rtoa-pgatram.cpp // or: // clang++ -o rtoa-pgatram -std=c++20 -Wall -O3 rtoa-pgatram.cpp // or: // g++ -o rtoa-pgatram -std=c++20 -Wall -O3 -I "$HOME/local-fast_io/ +fast_io/include" rtoa-pgatram.cpp // to use the locally installed fast_io header-only library #include <cctype> #include <cstring> #include <iostream> #include <string> #include <vector> #include <numeric> #include <chrono> #include <iomanip> // See [id://11149504] for more info on the fast_io C++ library // #include <fast_io.h> // ------------------------------------------------------------------- +--------- typedef std::chrono::high_resolution_clock high_resolution_clock; typedef std::chrono::high_resolution_clock::time_point time_point; typedef std::chrono::milliseconds milliseconds; double elaspe_time( time_point cend, time_point cstart) { return double ( std::chrono::duration_cast<milliseconds>(cend - cstart).count() ) * 1e-3; } // ------------------------------------------------------------------- +--------- using vec_str_type = std::vector<std::string>; using vec_int_type = std::vector<int>; #define MAX_LINE_LEN_L 63 // Read an input file of Roman Numerals and append them to a list // Return the number of Roman Numerals appended static int read_input_file( const char* fname, // in: file name containing a list of +Roman Numerals vec_str_type& vec_ret) // out: a vector of Roman Numeral strin +gs { FILE* fh; char line[MAX_LINE_LEN_L + 1]; int cnt = 0; fh = ::fopen(fname, "r"); if ( fh == NULL ) { std::cerr << "Error opening '" << fname << "' : " << strerror(er +rno) << "\n"; return 0; } while ( ::fgets( line, MAX_LINE_LEN_L, fh ) != NULL ) { size_t len = ::strlen(line); if (len > 0 && line[len-1] == '\n') line[--len] = '\0'; // remov +e newline vec_ret.emplace_back(line); ++cnt; } ::fclose(fh); return cnt; } // --------------------------------------------------------------- // Though there are less than 256 initializers in this ascii table, // the others are guaranteed by ANSI C to be initialized to zero. static const int romtab[256] = { 0,0,0,0,0,0, 0, 0, 0, 0, // 00- 09 0,0,0,0,0,0, 0, 0, 0, 0, // 10- 19 0,0,0,0,0,0, 0, 0, 0, 0, // 20- 29 0,0,0,0,0,0, 0, 0, 0, 0, // 30- 39 0,0,0,0,0,0, 0, 0, 0, 0, // 40- 49 0,0,0,0,0,0, 0, 0, 0, 0, // 50- 59 0,0,0,0,0,0, 0, 100, 500, 0, // 60- 69 0,0,0,1,0,0, 50,1000, 0, 0, // 70- 79 0,0,0,0,0,0, 5, 0, 10, 0, // 80- 89 0,0,0,0,0,0, 0, 0, 0, 100, // 90- 99 500,0,0,0,0,1, 0, 0, 50,1000, // 100-109 0,0,0,0,0,0, 0, 0, 5, 0, // 110-119 10,0,0,0,0,0, 0, 0, 0, 0 // 120-129 }; // Return the arabic number for a roman letter c. // Return zero if the roman letter c is invalid. inline int urtoa(int c) { return romtab[c]; } inline int accfn(int t, char c) { return t + urtoa(c) - t % urtoa(c) * 2; } inline int roman_to_dec(const std::string& s) { return std::accumulate( s.begin(), s.end(), 0, accfn ); } int main(int argc, char* argv[]) { if (argc < 2) { std::cerr << "usage: rtoa-pgatram file...\n"; return 1; } // Get the list of input files from the command line int nfiles = argc - 1; char** fname = &argv[1]; std::cerr << std::setprecision(3) << std::setiosflags(std::ios::fix +ed); time_point cstart1, cend1, cstart2, cend2, cstart3, cend3; // Read the input files into roman_list vec_str_type roman_list; cstart1 = high_resolution_clock::now(); int cnt = 0; for (int i = 0; i < nfiles; ++i) { cnt += read_input_file( fname[i], roman_list ); } cend1 = high_resolution_clock::now(); double ctaken1 = elaspe_time(cend1, cstart1); std::cerr << "read_input_files : " << cnt << " items\n"; std::cerr << "read file time : " << std::setw(8) << ctaken1 << " + secs\n"; // Convert roman to decimal vec_int_type arabic_list; cstart2 = high_resolution_clock::now(); for ( auto const& r : roman_list ) { arabic_list.emplace_back( roman_to_dec(r) ); } cend2 = high_resolution_clock::now(); double ctaken2 = elaspe_time(cend2, cstart2); std::cerr << "roman_to_dec time : " << std::setw(8) << ctaken2 << " + secs\n"; // Output to stdout cstart3 = high_resolution_clock::now(); for ( auto const& i : arabic_list ) { std::cout << i << '\n'; // fast_io::io::println(i); } cend3 = high_resolution_clock::now(); double ctaken3 = elaspe_time(cend3, cstart3); std::cerr << "output time : " << std::setw(8) << ctaken3 << " + secs\n"; return 0; }
$ g++ -o rtoa-pgatram -std=c++20 -Wall -O3 rtoa-pgatram.cpp $ ./rtoa-pgatram t1.txt >cpp.tmp read_input_file : 3999000 items read file time : 0.264 secs roman_to_dec time : 0.095 secs output time : 0.247 secs $ diff roman.tmp cpp.tmp
Update: when built with the fast_io C++ library, the output time is reduced:
output time : 0.098 secs
For details on building C++ with the header-only fast_io library, search for fast_io in this node.
References
See Also
Updated rtoa-pgatram.cpp to allow a list of files in argc/argv[] rather than just one (thanks marioroy). Also made minor improvements to rtoa-pgatram.cpp (including how to build it with the fast_io library). Removed rtoa-roman.pl since it is just rtoa-pgatram.pl with its roman_to_arabic function replaced by Roman's arabic function.
|
---|