$ NUM_THREADS=1 ./rtoa-pgatram-allinone2b t1.txt t1.txt t1.txt t1.txt | cksum
do_it_all time : 0.498 secs
737201628 75552000
$ NUM_THREADS=4 ./rtoa-pgatram-allinone2b t1.txt t1.txt t1.txt t1.txt | cksum
do_it_all time : 0.176 secs
737201628 75552000
$ NUM_THREADS=8 ./rtoa-pgatram-allinone2b t1.txt t1.txt t1.txt t1.txt | cksum
do_it_all time : 0.124 secs
737201628 75552000
$ NUM_THREADS=16 ./rtoa-pgatram-allinone2b t1.txt t1.txt t1.txt t1.txt | cksum
do_it_all time : 0.096 secs
737201628 75552000
####
$ NUM_THREADS=1 ./rtoa-pgatram-allinone2c t1.txt t1.txt t1.txt t1.txt | cksum
do_it_all time : 0.488 secs
737201628 75552000
$ NUM_THREADS=4 ./rtoa-pgatram-allinone2c t1.txt t1.txt t1.txt t1.txt | cksum
do_it_all time : 0.143 secs
737201628 75552000
$ NUM_THREADS=8 ./rtoa-pgatram-allinone2c t1.txt t1.txt t1.txt t1.txt | cksum
do_it_all time : 0.091 secs
737201628 75552000
$ NUM_THREADS=16 ./rtoa-pgatram-allinone2c t1.txt t1.txt t1.txt t1.txt | cksum
do_it_all time : 0.065 secs
737201628 75552000
####
$ NUM_THREADS=1 ./rtoa-pgatram-allinone2d t1.txt t1.txt t1.txt t1.txt | cksum
do_it_all time : 0.489 secs
737201628 75552000
$ NUM_THREADS=4 ./rtoa-pgatram-allinone2d t1.txt t1.txt t1.txt t1.txt | cksum
do_it_all time : 0.144 secs
737201628 75552000
$ NUM_THREADS=8 ./rtoa-pgatram-allinone2d t1.txt t1.txt t1.txt t1.txt | cksum
do_it_all time : 0.075 secs
737201628 75552000
$ NUM_THREADS=16 ./rtoa-pgatram-allinone2d t1.txt t1.txt t1.txt t1.txt | cksum
do_it_all time : 0.048 secs
737201628 75552000
####
// rtoa-pgatram-allinone2d.cpp. Crude allinone version.
// based on rtoa-pgatram-allinone2.cpp https://perlmonks.org/?node_id=11152186
//
// Obtain the fast_io library (required dependency):
// git clone --depth=1 https://github.com/cppfastio/fast_io
//
// Compile with g++ or clang++:
// clang++ -o rtoa-pgatram-allinone2d -std=c++20 -fopenmp -Wall -O3 -I fast_io/include rtoa-pgatram-allinone2d.cpp
//
// OpenMP Little Book:
// https://nanxiao.gitbooks.io/openmp-little-book/content/
#include
#include
#include
#include
#include
#include
#ifdef _OPENMP
#include
#endif
#include
#include
#include
#include
// See [id://11149504] for more info on the fast_io C++ library
#include
#include
// ---------------------------------------------------------------
typedef std::chrono::high_resolution_clock high_resolution_clock;
typedef std::chrono::high_resolution_clock::time_point time_point;
typedef std::chrono::milliseconds milliseconds;
double elaspe_time(
time_point cend,
time_point cstart)
{
return double (
std::chrono::duration_cast(cend - cstart).count()
) * 1e-3;
}
// ---------------------------------------------------------------
// Though there are less than 256 initializers in this ascii table,
// the others are guaranteed by ANSI C to be initialized to zero.
static const int romtab[256] = {
0,0,0,0,0,0, 0, 0, 0, 0, // 00- 09
0,0,0,0,0,0, 0, 0, 0, 0, // 10- 19
0,0,0,0,0,0, 0, 0, 0, 0, // 20- 29
0,0,0,0,0,0, 0, 0, 0, 0, // 30- 39
0,0,0,0,0,0, 0, 0, 0, 0, // 40- 49
0,0,0,0,0,0, 0, 0, 0, 0, // 50- 59
0,0,0,0,0,0, 0, 100, 500, 0, // 60- 69
0,0,0,1,0,0, 50,1000, 0, 0, // 70- 79
0,0,0,0,0,0, 5, 0, 10, 0, // 80- 89
0,0,0,0,0,0, 0, 0, 0, 100, // 90- 99
500,0,0,0,0,1, 0, 0, 50,1000, // 100-109
0,0,0,0,0,0, 0, 0, 5, 0, // 110-119
10,0,0,0,0,0, 0, 0, 0, 0 // 120-129
};
// Return the arabic number for a roman letter c.
// Return zero if the roman letter c is invalid.
inline int urtoa(int c) { return romtab[c]; }
inline int accfn(int t, char c) {
return t + urtoa(c) - t % urtoa(c) * 2;
}
inline int roman_to_dec(std::string_view st) {
return std::accumulate(st.begin(), st.end(), 0, accfn);
}
// ---------------------------------------------------------------
inline constexpr auto CHUNK_SIZE = 1048576;
inline constexpr auto LINE_LENGTH = 255;
// Helper function to find '\n'.
inline constexpr char const* find_lf(char const* first, char const* last)
{
while (first != last) {
if (*first == '\n') break;
++first;
}
return first;
}
// Read an input file of Roman Numerals and do it all.
static void do_it_all(
std::ifstream& fin, // in: file input stream containing a list of Roman Numerals
int nthds // in: number of threads
)
{
fast_io::out_buf_type obf{fast_io::out()};
std::binary_semaphore *sem[nthds];
size_t next_chunk_id = 0;
// Create semaphores for orderly output.
for (int i = 0; i < nthds; ++i)
sem[i] = new std::binary_semaphore{0};
#pragma omp parallel
{
std::string buf; buf.resize(CHUNK_SIZE + LINE_LENGTH + 1, '\0');
const char *first, *last;
size_t chunk_id, len;
while (fin.good()) {
std::string output;
len = 0;
// Read the next chunk serially.
//
#pragma omp critical
{
fin.read(&buf[0], CHUNK_SIZE);
if ((len = fin.gcount()) > 0) {
chunk_id = ++next_chunk_id;
if (buf[len - 1] != '\n' && fin.getline(&buf[len], LINE_LENGTH)) {
// Getline discards the newline char and appends null char.
// Therefore, change '\0' to '\n'.
len += fin.gcount();
buf[len - 1] = '\n';
}
}
}
if (!len) break;
buf[len] = '\0';
first = &buf[0];
last = &buf[len];
// Process max Nthreads chunks concurrently.
//
while (first < last) {
auto beg_ptr{first}; first = find_lf(first, last);
auto end_ptr{first};
int dec = roman_to_dec(std::string_view(beg_ptr, end_ptr - beg_ptr));
output.append(fast_io::concatln(dec));
++first;
}
// Output completed chunk, orderly by chunk_id.
//
if (nthds > 1 && chunk_id > 1)
sem[chunk_id % nthds]->acquire();
fast_io::io::print(obf, output);
if (nthds > 1)
sem[(chunk_id + 1) % nthds]->release();
}
}
// Destroy the dynamically allocated semaphores.
for (int i = 0; i < nthds; ++i)
delete sem[i];
}
int main(int argc, char* argv[])
{
if (argc < 2) {
if (argc > 0)
std::cerr << "Usage: rtoa-pgatram-allinone2d file... >out.txt\n";
return 1;
}
std::cerr << std::setprecision(3) << std::setiosflags(std::ios::fixed);
time_point cstartall, cendall;
cstartall = high_resolution_clock::now();
#ifdef _OPENMP
// Determine the number of threads.
int nthds = std::thread::hardware_concurrency();
const char* env_nthds1 = std::getenv("OMP_NUM_THREADS");
const char* env_nthds2 = std::getenv("NUM_THREADS");
if (env_nthds1 && strlen(env_nthds1))
nthds = ::atoi(env_nthds1);
else if (env_nthds2 && strlen(env_nthds2))
nthds = ::atoi(env_nthds2);
omp_set_dynamic(false);
omp_set_num_threads(nthds);
#else
int nthds = 1;
#endif
int nfiles = argc - 1;
char** fname = &argv[1];
for (int i = 0; i < nfiles; ++i) {
std::ifstream fin(fname[i], std::ifstream::binary);
if (!fin.is_open()) {
std::cerr << "Error opening '" << fname[i] << "' : " << strerror(errno) << '\n';
continue;
}
do_it_all(fin, nthds);
fin.close();
}
cendall = high_resolution_clock::now();
double ctakenall = elaspe_time(cendall, cstartall);
std::cerr << "do_it_all time : " << std::setw(8) << ctakenall << " secs\n";
return 0;
}