in reply to Re^5: Rosetta Code: Long List is Long (faster - vec - fast_io)
in thread Rosetta Code: Long List is Long
So far I haven't found any statistically significant speed-ups from employing this library ...
After closer look, it was the -std=c++20 language mode that enables faster vectors by 0.2 ~ 0.4 seconds versus -std=c++11.
Update 1: Support variable length words.
Update 2: Enable parallel sort. See Using Parallel Mode.
I tried OpenMP. Unfortunately, strtok is not thread-safe e.g. strtok(NULL, "\n") causing segfault. So I factored out strtok. The OpenMP result improved by 0.1 seconds. That's because the actual reading is already fast. It takes 2 threads minimally to run faster than non-OpenMP results due to populating vec_rec from local copies.
Building:
clang++ -o llil4vec -std=c++11 -Wall -O3 llil4vec.cpp clang++ -o llil4vec -std=c++20 -Wall -O3 llil4vec.cpp # faster # enable parallel via -fopenmp clang++ -o llil4vec-omp -std=c++11 -fopenmp -Wall -O3 llil4vec.cpp clang++ -o llil4vec-omp -std=c++20 -fopenmp -Wall -O3 llil4vec.cpp # +faster
Running - Real time results:
$ time ./llil4vec big1.txt big2.txt big3.txt >out.txt std:c++11: 2.901 secs std:c++20: 2.850 secs $ time OMP_NUM_THREADS=3 ./llil4vec-omp big1.txt big2.txt big3.txt >ou +t.txt std:c++11: 2.308 secs std:c++20: 2.267 secs
llil4vec.cpp modification, OpenMP-aware:
#if defined(_OPENMP) #include <omp.h> #include <parallel/algorithm> #else #include <algorithm> #endif ... static void get_properties( int nfiles, // in: the number of input files char* fname[], // in: the input file names vec_int_str_type& vec_ret) // out: a vector of properties { #if defined(_OPENMP) omp_set_dynamic(0); omp_set_max_active_levels(1); #pragma omp parallel { vec_int_str_type vec_loc; // thread local copy #pragma omp for nowait schedule(static,1) #endif for (int i = 0; i < nfiles; ++i) { char line[MAX_LINE_LEN_L+1]; char* found; llil_int_type count; FILE* fh = ::fopen(fname[i], "r"); if (fh == NULL) { std::cerr << "Error opening '" << fname[i] << "' : errno=" + << errno << "\n"; continue; } while ( ::fgets(line, MAX_LINE_LEN_L, fh) != NULL ) { found = ::strchr(line, '\t'); count = ::atoll( &line[found - line + 1] ); line[found - line] = '\0'; // word #ifdef MAX_STR_LEN_L str_type fixword { { '\0', '\0', '\0', '\0', '\0', '\0' } +}; ::memcpy( fixword.data(), line, found - line ); #if defined(_OPENMP) vec_loc.emplace_back( -count, fixword ); #else vec_ret.emplace_back( -count, fixword ); #endif #else #if defined(_OPENMP) vec_loc.emplace_back( -count, line ); #else vec_ret.emplace_back( -count, line ); #endif #endif } ::fclose(fh); } #if defined(_OPENMP) #pragma omp critical vec_ret.insert(vec_ret.end(), vec_loc.begin(), vec_loc.end()); } #endif // Needs to be sorted by word for later sum of adjacent count field +s to work #if defined(_OPENMP) __gnu_parallel::sort( vec_ret.begin(), vec_ret.end(), [](const int_str_type& left, const int_str_type& right) { return + left.second < right.second; } ); #else std::sort( vec_ret.begin(), vec_ret.end(), [](const int_str_type& left, const int_str_type& right) { return + left.second < right.second; } ); #endif }
|
---|