clang++ -o llil4vec -std=c++11 -Wall -O3 llil4vec.cpp
clang++ -o llil4vec -std=c++20 -Wall -O3 llil4vec.cpp # faster
# enable parallel via -fopenmp
clang++ -o llil4vec-omp -std=c++11 -fopenmp -Wall -O3 llil4vec.cpp
clang++ -o llil4vec-omp -std=c++20 -fopenmp -Wall -O3 llil4vec.cpp # faster
####
$ time ./llil4vec big1.txt big2.txt big3.txt >out.txt
std:c++11: 2.901 secs
std:c++20: 2.850 secs
$ time OMP_NUM_THREADS=3 ./llil4vec-omp big1.txt big2.txt big3.txt >out.txt
std:c++11: 2.308 secs
std:c++20: 2.267 secs
####
#if defined(_OPENMP)
#include
#include
#else
#include
#endif
...
static void get_properties(
int nfiles, // in: the number of input files
char* fname[], // in: the input file names
vec_int_str_type& vec_ret) // out: a vector of properties
{
#if defined(_OPENMP)
omp_set_dynamic(0);
omp_set_max_active_levels(1);
#pragma omp parallel
{
vec_int_str_type vec_loc; // thread local copy
#pragma omp for nowait schedule(static,1)
#endif
for (int i = 0; i < nfiles; ++i) {
char line[MAX_LINE_LEN_L+1];
char* found;
llil_int_type count;
FILE* fh = ::fopen(fname[i], "r");
if (fh == NULL) {
std::cerr << "Error opening '" << fname[i] << "' : errno=" << errno << "\n";
continue;
}
while ( ::fgets(line, MAX_LINE_LEN_L, fh) != NULL ) {
found = ::strchr(line, '\t');
count = ::atoll( &line[found - line + 1] );
line[found - line] = '\0'; // word
#ifdef MAX_STR_LEN_L
str_type fixword { { '\0', '\0', '\0', '\0', '\0', '\0' } };
::memcpy( fixword.data(), line, found - line );
#if defined(_OPENMP)
vec_loc.emplace_back( -count, fixword );
#else
vec_ret.emplace_back( -count, fixword );
#endif
#else
#if defined(_OPENMP)
vec_loc.emplace_back( -count, line );
#else
vec_ret.emplace_back( -count, line );
#endif
#endif
}
::fclose(fh);
}
#if defined(_OPENMP)
#pragma omp critical
vec_ret.insert(vec_ret.end(), vec_loc.begin(), vec_loc.end());
}
#endif
// Needs to be sorted by word for later sum of adjacent count fields to work
#if defined(_OPENMP)
__gnu_parallel::sort( vec_ret.begin(), vec_ret.end(),
[](const int_str_type& left, const int_str_type& right) { return left.second < right.second; }
);
#else
std::sort( vec_ret.begin(), vec_ret.end(),
[](const int_str_type& left, const int_str_type& right) { return left.second < right.second; }
);
#endif
}