in reply to Re^3: Risque Romantic Rosetta Roman Race - Tried Array Lookup
in thread Risque Romantic Rosetta Roman Race
> It now takes the entire CPU (64 logical threads) for Perl MCE 1.0 to run faster. :)
Now that's a challenge! Can I can push the dial further? ... or will the ingenious tybalt89's unorthodox assistance from the side allow you to move the needle back towards 32? :)
Since I know how much you enjoyed my (anonymonk-provoked) MAX_STR_LEN_L hack in the long Long List is Long series, I've tried a similar stunt here in a desperate attempt to improve data locality and cache performance. I also added a (cheating) vector reserve and the total time at the end (thanks for pointing out this oversight).
Anyways, here are the timings of my latest version, rtoa-pgatram-fixed.cpp, using the fast_io library:
$ ./rtoa-pgatram-fixed t1.txt >f.tmp read_input_files : 3999000 items read file time : 0.164 secs roman_to_dec time : 0.088 secs output time : 0.100 secs total time : 0.353 secs $ diff f.tmp roman.tmp $ ./rtoa-pgatram-fixed t1.txt >f.tmp read_input_files : 3999000 items read file time : 0.167 secs roman_to_dec time : 0.086 secs output time : 0.098 secs total time : 0.353 secs $ diff f.tmp roman.tmp $ ./rtoa-pgatram-fixed t1.txt >f.tmp read_input_files : 3999000 items read file time : 0.173 secs roman_to_dec time : 0.086 secs output time : 0.093 secs total time : 0.353 secs $ diff f.tmp roman.tmp
Update: with marioroy rtoa-pgatram-fixed2 below (without fast_io):
$ ./rtoa-pgatram-fixed2 t1.txt >f.tmp read_input_files : 3999000 items read file time : 0.189 secs roman_to_dec time : 0.371 secs total time : 0.560 secs $ diff f.tmp roman.tmp
... with fast_io:
$ ./rtoa-pgatram-fixed2 t1.txt >f.tmp read_input_files : 3999000 items read file time : 0.178 secs roman_to_dec time : 0.143 secs total time : 0.322 secs $ diff f.tmp roman.tmp
rtoa-pgatram-fixed.cpp
// rtoa-pgatram-fixed.cpp. Crude fixed length string version. // Compile with: // g++ -o rtoa-pgatram-fixed -std=c++20 -Wall -O3 rtoa-pgatram-fixed +.cpp // or: // clang++ -o rtoa-pgatram-fixed -std=c++20 -Wall -O3 rtoa-pgatram-f +ixed.cpp // or: // g++ -o rtoa-pgatram-fixed -std=c++20 -Wall -O3 -I "$HOME/local-fa +st_io/fast_io/include" rtoa-pgatram-fixed.cpp // to use the locally installed fast_io header-only library #include <cctype> #include <cstring> #include <iostream> #include <string> #include <vector> #include <numeric> #include <chrono> #include <iomanip> // See [id://11149504] for more info on the fast_io C++ library // #include <fast_io.h> // ------------------------------------------------------------------- +--------- typedef std::chrono::high_resolution_clock high_resolution_clock; typedef std::chrono::high_resolution_clock::time_point time_point; typedef std::chrono::milliseconds milliseconds; double elaspe_time( time_point cend, time_point cstart) { return double ( std::chrono::duration_cast<milliseconds>(cend - cstart).count() ) * 1e-3; } // ------------------------------------------------------------------- +--------- // Longest roman numeral is MMMDCCCLXXXVIII (3888) of length 15 // XXX: I'm off by one somewhere because 3888 fails with // MAX_STR_LEN_L of 16 but works with 17 #define MAX_STR_LEN_L 17 // The basic idea is to keep this struct small and without pointers to // improve data locality/cache performance when traversing the vector struct str_type { char slen; char str[MAX_STR_LEN_L]; }; using vec_str_type = std::vector<str_type>; using vec_int_type = std::vector<int>; // Read an input file of Roman Numerals and append them to a list // Return the number of Roman Numerals appended static int read_input_file( const char* fname, // in: file name containing a list of +Roman Numerals vec_str_type& vec_ret) // out: a vector of Roman Numeral strin +gs { FILE* fh; str_type line; int cnt = 0; fh = ::fopen(fname, "r"); if ( fh == NULL ) { std::cerr << "Error opening '" << fname << "' : " << strerror(er +rno) << "\n"; return 0; } while ( ::fgets( line.str, MAX_STR_LEN_L, fh ) != NULL ) { line.slen = ::strlen(line.str) - 1; // -1 to strip trailing n +ewline vec_ret.emplace_back(line); ++cnt; } ::fclose(fh); return cnt; } // --------------------------------------------------------------- // Though there are less than 256 initializers in this ascii table, // the others are guaranteed by ANSI C to be initialized to zero. static const int romtab[256] = { 0,0,0,0,0,0, 0, 0, 0, 0, // 00- 09 0,0,0,0,0,0, 0, 0, 0, 0, // 10- 19 0,0,0,0,0,0, 0, 0, 0, 0, // 20- 29 0,0,0,0,0,0, 0, 0, 0, 0, // 30- 39 0,0,0,0,0,0, 0, 0, 0, 0, // 40- 49 0,0,0,0,0,0, 0, 0, 0, 0, // 50- 59 0,0,0,0,0,0, 0, 100, 500, 0, // 60- 69 0,0,0,1,0,0, 50,1000, 0, 0, // 70- 79 0,0,0,0,0,0, 5, 0, 10, 0, // 80- 89 0,0,0,0,0,0, 0, 0, 0, 100, // 90- 99 500,0,0,0,0,1, 0, 0, 50,1000, // 100-109 0,0,0,0,0,0, 0, 0, 5, 0, // 110-119 10,0,0,0,0,0, 0, 0, 0, 0 // 120-129 }; // Return the arabic number for a roman letter c. // Return zero if the roman letter c is invalid. inline int urtoa(int c) { return romtab[c]; } inline int accfn(int t, char c) { return t + urtoa(c) - t % urtoa(c) * 2; } inline int roman_to_dec(const str_type& st) { return std::accumulate( st.str, st.str + st.slen, 0, accfn ); } int main(int argc, char* argv[]) { if (argc < 2) { std::cerr << "usage: rtoa-pgatram-fixed file...\n"; return 1; } // Get the list of input files from the command line int nfiles = argc - 1; char** fname = &argv[1]; std::cerr << std::setprecision(3) << std::setiosflags(std::ios::fix +ed); time_point cstart1, cend1, cstart2, cend2, cstart3, cend3; // Read the input files into roman_list vec_str_type roman_list; roman_list.reserve(3999000); cstart1 = high_resolution_clock::now(); int cnt = 0; for (int i = 0; i < nfiles; ++i) { cnt += read_input_file( fname[i], roman_list ); } cend1 = high_resolution_clock::now(); double ctaken1 = elaspe_time(cend1, cstart1); std::cerr << "read_input_files : " << cnt << " items\n"; std::cerr << "read file time : " << std::setw(8) << ctaken1 << " + secs\n"; // Convert roman to decimal vec_int_type arabic_list; arabic_list.reserve(3999000); cstart2 = high_resolution_clock::now(); for ( auto const& r : roman_list ) { arabic_list.emplace_back( roman_to_dec(r) ); } cend2 = high_resolution_clock::now(); double ctaken2 = elaspe_time(cend2, cstart2); std::cerr << "roman_to_dec time : " << std::setw(8) << ctaken2 << " + secs\n"; // Output to stdout cstart3 = high_resolution_clock::now(); for ( auto const& i : arabic_list ) { std::cout << i << '\n'; // fast_io::io::println(i); } cend3 = high_resolution_clock::now(); double ctaken3 = elaspe_time(cend3, cstart3); std::cerr << "output time : " << std::setw(8) << ctaken3 << " + secs\n"; double ctaken = elaspe_time(cend3, cstart1); std::cerr << "total time : " << std::setw(8) << ctaken << +" secs\n"; return 0; }
|
---|
Replies are listed 'Best First'. | |
---|---|
Re^5: Risque Romantic Rosetta Roman Race - MCE Results on AMD Box
by marioroy (Prior) on May 13, 2023 at 10:32 UTC | |
by eyepopslikeamosquito (Archbishop) on May 14, 2023 at 04:17 UTC | |
by eyepopslikeamosquito (Archbishop) on May 15, 2023 at 03:33 UTC | |
by marioroy (Prior) on May 15, 2023 at 12:23 UTC | |
by eyepopslikeamosquito (Archbishop) on May 16, 2023 at 00:36 UTC | |
by marioroy (Prior) on May 16, 2023 at 02:05 UTC | |
|