// rtoa-pgatram-openmp.cpp. Crude first attempt at an OpenMp version. // Compile with: // g++ -o rtoa-pgatram-openmp -std=c++20 -fopenmp -Wall -O3 rtoa-pgatram-openmp.cpp // or: // clang++ -o rtoa-pgatram-openmp -std=c++20 -fopenmp -Wall -O3 rtoa-pgatram-openmp.cpp // or: // g++ -o rtoa-pgatram-openmp -std=c++20 -fopenmp -Wall -O3 -I "$HOME/local-fast_io/fast_io/include" rtoa-pgatram-openmp.cpp // to use the locally installed fast_io header-only library #include #include #include #include #include #ifdef _OPENMP #include #endif #include #include #include #include // See [id://11149504] for more info on the fast_io C++ library #include // ---------------------------------------------------------------------------- typedef std::chrono::high_resolution_clock high_resolution_clock; typedef std::chrono::high_resolution_clock::time_point time_point; typedef std::chrono::milliseconds milliseconds; double elaspe_time( time_point cend, time_point cstart) { return double ( std::chrono::duration_cast(cend - cstart).count() ) * 1e-3; } // ---------------------------------------------------------------------------- // Longest roman numeral is MMMDCCCLXXXVIII (3888) of length 15 // XXX: I'm off by one somewhere because 3888 fails with // MAX_STR_LEN_L of 16 but works with 17 #define MAX_STR_LEN_L 17 // The basic idea is to keep this struct small and without pointers to // improve data locality/cache performance when traversing the vector struct str_type { char slen; char str[MAX_STR_LEN_L]; }; using vec_str_type = std::vector; using vec_int_type = std::vector; // Read an input file of Roman Numerals and append them to a list static void read_input_file( const char* fname, // in: file name containing a list of Roman Numerals vec_str_type& vec_ret) // out: a vector of Roman Numeral strings { FILE* fh; str_type line; fh = ::fopen(fname, "r"); if ( fh == NULL ) { std::cerr << "Error opening '" << fname << "' : " << strerror(errno) << "\n"; return; } while ( ::fgets( line.str, MAX_STR_LEN_L, fh ) != NULL ) { line.slen = ::strlen(line.str) - 1; // -1 to strip trailing newline vec_ret.emplace_back(line); } ::fclose(fh); } // --------------------------------------------------------------- // Though there are less than 256 initializers in this ascii table, // the others are guaranteed by ANSI C to be initialized to zero. static const int romtab[256] = { 0,0,0,0,0,0, 0, 0, 0, 0, // 00- 09 0,0,0,0,0,0, 0, 0, 0, 0, // 10- 19 0,0,0,0,0,0, 0, 0, 0, 0, // 20- 29 0,0,0,0,0,0, 0, 0, 0, 0, // 30- 39 0,0,0,0,0,0, 0, 0, 0, 0, // 40- 49 0,0,0,0,0,0, 0, 0, 0, 0, // 50- 59 0,0,0,0,0,0, 0, 100, 500, 0, // 60- 69 0,0,0,1,0,0, 50,1000, 0, 0, // 70- 79 0,0,0,0,0,0, 5, 0, 10, 0, // 80- 89 0,0,0,0,0,0, 0, 0, 0, 100, // 90- 99 500,0,0,0,0,1, 0, 0, 50,1000, // 100-109 0,0,0,0,0,0, 0, 0, 5, 0, // 110-119 10,0,0,0,0,0, 0, 0, 0, 0 // 120-129 }; // Return the arabic number for a roman letter c. // Return zero if the roman letter c is invalid. inline int urtoa(int c) { return romtab[c]; } inline int accfn(int t, char c) { return t + urtoa(c) - t % urtoa(c) * 2; } inline int roman_to_dec(const str_type& st) { return std::accumulate( st.str, st.str + st.slen, 0, accfn ); } int main(int argc, char* argv[]) { if (argc < 2) { std::cerr << "usage: rtoa-pgatram-openmp file...\n"; return 1; } #ifdef _OPENMP std::cerr << "use OpenMP\n"; #else std::cerr << "don't use OpenMP\n"; #endif // Get the list of input files from the command line int nfiles = argc - 1; char** fname = &argv[1]; std::cerr << std::setprecision(3) << std::setiosflags(std::ios::fixed); time_point cstart1, cend1, cstart2, cend2; #ifdef _OPENMP // Determine the number of threads. const char* env_nthrs = std::getenv("NUM_THREADS"); int nthrs = (env_nthrs && strlen(env_nthrs)) ? ::atoi(env_nthrs) : std::thread::hardware_concurrency(); omp_set_dynamic(false); omp_set_num_threads(nthrs); #else int nthrs = 1; #endif // Read the input files into roman_list vec_str_type roman_list; roman_list.reserve(3999000 * 4); cstart1 = high_resolution_clock::now(); // Run parallel, depending on the number of threads if ( nthrs == 1 || nfiles == 1 ) { for (int i = 0; i < nfiles; ++i) read_input_file( fname[i], roman_list ); } #ifdef _OPENMP else { #pragma omp parallel for schedule(static, 1) for (int i = 0; i < nfiles; ++i) { vec_str_type locvec; read_input_file( fname[i], locvec ); #pragma omp critical { // Append local vector roman_list.insert( roman_list.end(), locvec.begin(), locvec.end() ); } } } #endif cend1 = high_resolution_clock::now(); double ctaken1 = elaspe_time(cend1, cstart1); std::cerr << "read_input_files : " << roman_list.size() << " items\n"; std::cerr << "read file time : " << std::setw(8) << ctaken1 << " secs\n"; // Convert roman to decimal cstart2 = high_resolution_clock::now(); for ( auto const& r : roman_list ) { // std::cout << roman_to_dec(r) << '\n'; fast_io::io::println( roman_to_dec(r) ); } cend2 = high_resolution_clock::now(); double ctaken2 = elaspe_time(cend2, cstart2); std::cerr << "roman_to_dec time : " << std::setw(8) << ctaken2 << " secs\n"; double ctaken = elaspe_time(cend2, cstart1); std::cerr << "total time : " << std::setw(8) << ctaken << " secs\n"; return 0; } #### $ time NUM_THREADS=1 ./rtoa-pgatram-openmp t1.txt t1.txt t1.txt t1.txt >s.tmp use OpenMP read_input_files : 15996000 items read file time : 0.700 secs roman_to_dec time : 0.556 secs total time : 1.256 secs real 0m1.278s user 0m0.928s sys 0m0.350s #### $ time NUM_THREADS=4 ./rtoa-pgatram-openmp t1.txt t1.txt t1.txt t1.txt >s.tmp use OpenMP read_input_files : 15996000 items read file time : 0.405 secs roman_to_dec time : 0.568 secs total time : 0.974 secs real 0m0.995s user 0m1.439s sys 0m0.539s $ cmp f.tmp s.tmp