in reply to Rosetta Test: Long List is Long
After testing the correctness of the new get_properties interface with a simple Catch2 unit test, I was curious to learn of its performance characteristics. So I removed Catch2 and converted the unit test into a simple get_properties test driver, tdriver.cpp.
tdriver.cpp
// tdriver.cpp #include <cstdio> #include <cstddef> #include <cstdint> #include <cstdlib> #include <cstring> #include <ctime> #include <string> #include <array> #include <vector> #include <utility> #include <iterator> #include <execution> #include <algorithm> #include <boost/container_hash/hash.hpp> #include <iostream> #include <iomanip> #include <fstream> #include <sstream> // ------------------------------------------------------------------- +--------- typedef int_fast64_t llil_int_type; // All words in big1.txt, big2.txt, big3.txt are <= 6 chars in length. // big.txt max word length is 6 // // To use (limited length) fixed length strings uncomment the next lin +e. #define MAX_STR_LEN_L 6 #ifdef MAX_STR_LEN_L struct str_type : std::array<char, MAX_STR_LEN_L> { bool operator==( const str_type& o ) const { return ::memcmp(this->data(), o.data(), MAX_STR_LEN_L) == 0; } bool operator<( const str_type& o ) const { return ::memcmp(this->data(), o.data(), MAX_STR_LEN_L) < 0; } }; #else struct str_type : std::basic_string<char> { bool operator==( const str_type& o ) const { return ::strcmp(this->data(), o.data()) == 0; } bool operator<( const str_type& o ) const { return ::strcmp(this->data(), o.data()) < 0; } }; #endif using str_int_type = std::pair<str_type, llil_int_type>; using vec_str_int_type = std::vector<str_int_type>; // inject specialization of std::hash for str_type into namespace std namespace std { template<> struct hash<str_type> { std::size_t operator()( str_type const& v ) const noexcept { #if 0 return boost::hash_range( v.cbegin(), v.cend() ); #else std::basic_string_view<char> bv { reinterpret_cast<const char*>(v.data()), v.size() * sizeof +(char) }; return std::hash<std::basic_string_view<char>>()(bv); #endif } }; } // Test with std::map, std::unordered_map or phmap::parallel_flat_hash +_map // ... or boost::unordered_map or ankerl::unordered_dense::map #define MT_STD_MAP_L 0 #define MT_STD_UNORDERED_MAP_L 1 #define MT_PARALLEL_FLAT_HASH_MAP_L 2 #define MT_BOOST_UNORDERED_MAP_L 3 #define MT_ANKERL_UNORDERED_DENSE_MAP_L 6 // Uncomment one of the map types below // #define MAP_TYPE_L MT_STD_MAP_L // #define MAP_TYPE_L MT_STD_UNORDERED_MAP_L #define MAP_TYPE_L MT_PARALLEL_FLAT_HASH_MAP_L // #define MAP_TYPE_L MT_BOOST_UNORDERED_MAP_L // #define MAP_TYPE_L MT_ANKERL_UNORDERED_DENSE_MAP_L #if MAP_TYPE_L == MT_STD_MAP_L #include <map> using map_str_int_type = std::map<str_type, llil_int_type>; #elif MAP_TYPE_L == MT_STD_UNORDERED_MAP_L #include <unordered_map> using map_str_int_type = std::unordered_map<str_type, llil_int_type>; #elif MAP_TYPE_L == MT_PARALLEL_FLAT_HASH_MAP_L #include <parallel_hashmap/phmap.h> // create the parallel_flat_hash_map without internal mutexes using map_str_int_type = phmap::parallel_flat_hash_map< str_type, llil_int_type, phmap::priv::hash_default_hash<str_type>, phmap::priv::hash_default_eq<str_type>, phmap::priv::Allocator<phmap::priv::Pair<const str_type, llil_int_t +ype>>, 8, phmap::NullMutex >; #elif MAP_TYPE_L == MT_BOOST_UNORDERED_MAP_L #include <boost/unordered_map.hpp> using map_str_int_type = boost::unordered_map<str_type, llil_int_type> +; #elif MAP_TYPE_L == MT_ANKERL_UNORDERED_DENSE_MAP_L #include <ankerl/unordered_dense.h> using map_str_int_type = ankerl::unordered_dense::map<str_type, llil_i +nt_type>; #else #error "Unsupported map_str_int_type" #endif // Simple RAII timer ------------------------------------------------- +---------- // Create a MyTimer object in a scope: // { // MyTimer tt; // ... // } // to automatically print the time taken in the block to stderr #include <chrono> inline double elaspe_time( std::chrono::high_resolution_clock::time_point cend, std::chrono::high_resolution_clock::time_point cstart) { return double( std::chrono::duration_cast<std::chrono::milliseconds +>(cend - cstart).count() ) * 1e-3; } class MyTimer { public: MyTimer() { stnow_m = std::chrono::high_resolution_clock::now(); } ~MyTimer() { auto endnow = std::chrono::high_resolution_clock::now(); std::cerr << " (" << elaspe_time(endnow, stnow_m) << " seconds)\ +n"; } private: std::chrono::time_point<std::chrono::high_resolution_clock> stnow_m +; }; // ------------------------------------------------------------------- +-- #include "get_properties.inl" int main(int argc, char* argv[]) { if (argc < 2) { std::cerr << "usage: tdriver file1 file2 ... >out.txt\n"; return 1; } #ifdef MAX_STR_LEN_L std::cerr << "tdriver (fixed string length=" << MAX_STR_LEN_L << ") + start\n"; #else std::cerr << "tdriver start\n"; #endif #if MAP_TYPE_L == MT_STD_MAP_L std::cerr << "use std::map\n"; #elif MAP_TYPE_L == MT_STD_UNORDERED_MAP_L std::cerr << "use std::unordered_map\n"; #elif MAP_TYPE_L == MT_PARALLEL_FLAT_HASH_MAP_L std::cerr << "use phmap::parallel_flat_hash_map\n"; #elif MAP_TYPE_L == MT_BOOST_UNORDERED_MAP_L std::cerr << "use boost::unordered_map\n"; #elif MAP_TYPE_L == MT_ANKERL_UNORDERED_DENSE_MAP_L std::cerr << "use ankerl::unordered_dense::map\n"; #else #error "Unsupported map_str_int_type" #endif // Get the list of input files from the command line int nfiles = argc - 1; char** fname = &argv[1]; map_str_int_type mymap; for ( int i = 0; i < nfiles; ++i ) { MyTimer tt; llil_int_type nlines = get_properties(fname[i], mymap); std::cerr << fname[i] << ": nlines=" << nlines; } // Store the properties into a vector vec_str_int_type propvec( mymap.begin(), mymap.end() ); mymap.clear(); // Sort the vector by (count) in reverse order, (name) in lexical o +rder std::sort( // Standard sort propvec.begin(), propvec.end(), [](const str_int_type& left, const str_int_type& right) { return left.second != right.second ? left.second > right.second : left.first < right.first; } ); // Output the sorted vector #ifdef MAX_STR_LEN_L for ( auto const& n : propvec ) ::printf("%.*s\t%ld\n", MAX_STR_LEN_L, n.first.data(), n.second) +; #else for ( auto const& n : propvec ) std::cout << n.first << "\t" << n.second << "\n"; #endif return 0; }
Update: added support for MT_ANKERL_UNORDERED_DENSE_MAP_L via a github clone:
and adding "-I $HOME/myabseil/unordered_dense/include" to the clang++ command line as shown below.~/myabseil$ git clone https://github.com/martinus/unordered_dense
After building on Ubuntu with:
clang++ -o tdriver -std=c++20 -Wall -O3 -I "$HOME/local-parallel-hashm +ap/parallel-hashmap" -I "$HOME/local-boost/boost_1_81_0" -I "$HOME/m +yabseil/unordered_dense/include" tdriver.cpp
a typical run is:
$ ./tdriver big1.txt big2.txt big3.txt big4.txt big5.txt big6.txt >f.t +mp $ cmp f.tmp good.tmp
with cmp used to verify that the tdriver output is correct.
Results
I show here just some basic results of running this program (with MAX_STR_LEN defined) processing six shuffled big.txt files, where each of the six files was generated using gen-llil.pl from Re^3: Rosetta Code: Long List is Long (Test File Generators) with:
and shuffled with shuffle.pl from Re: Rosetta Code: Long List is Long - JudySL code with:perl gen-llil.pl big1.txt 200 3 1 perl gen-llil.pl big2.txt 200 3 1 ...
perl shuffle.pl big1.txt >tmp && mv tmp big1.txt perl shuffle.pl big2.txt >tmp && mv tmp big2.txt ...
$ ./tdriver big1.txt big2.txt big3.txt big4.txt big5.txt big6.txt >f.t +mp tdriver (fixed string length=6) start use std::map big1.txt: nlines=3515200 (7.492 seconds) big2.txt: nlines=3515200 (9.953 seconds) big3.txt: nlines=3515200 (11.089 seconds) big4.txt: nlines=3515200 (10.696 seconds) big5.txt: nlines=3515200 (11.449 seconds) big6.txt: nlines=3515200 (11.765 seconds) $ cmp f.tmp good.tmp $ ./tdriver big1.txt big2.txt big3.txt big4.txt big5.txt big6.txt >f.t +mp tdriver (fixed string length=6) start use std::unordered_map big1.txt: nlines=3515200 (2.134 seconds) big2.txt: nlines=3515200 (2.859 seconds) big3.txt: nlines=3515200 (2.244 seconds) big4.txt: nlines=3515200 (3.666 seconds) big5.txt: nlines=3515200 (2.184 seconds) big6.txt: nlines=3515200 (1.882 seconds) $ cmp f.tmp good.tmp $ ./tdriver big1.txt big2.txt big3.txt big4.txt big5.txt big6.txt >f.t +mp tdriver (fixed string length=6) start use phmap::parallel_flat_hash_map big1.txt: nlines=3515200 (0.597 seconds) big2.txt: nlines=3515200 (1.08 seconds) big3.txt: nlines=3515200 (1.604 seconds) big4.txt: nlines=3515200 (0.795 seconds) big5.txt: nlines=3515200 (2.317 seconds) big6.txt: nlines=3515200 (0.894 seconds) $ cmp f.tmp good.tmp $ ./tdriver big1.txt big2.txt big3.txt big4.txt big5.txt big6.txt >f.t +mp tdriver (fixed string length=6) start use boost::unordered_map big1.txt: nlines=3515200 (2.1 seconds) big2.txt: nlines=3515200 (2.037 seconds) big3.txt: nlines=3515200 (1.327 seconds) big4.txt: nlines=3515200 (2.935 seconds) big5.txt: nlines=3515200 (1.436 seconds) big6.txt: nlines=3515200 (1.553 seconds) $ cmp f.tmp good.tmp UPDATE: $ ./tdriver big1.txt big2.txt big3.txt big4.txt big5.txt big6.txt >f.t +mp tdriver (fixed string length=6) start use ankerl::unordered_dense::map big1.txt: nlines=3515200 (0.994 seconds) big2.txt: nlines=3515200 (1.11 seconds) big3.txt: nlines=3515200 (0.767 seconds) big4.txt: nlines=3515200 (1.784 seconds) big5.txt: nlines=3515200 (0.866 seconds) big6.txt: nlines=3515200 (0.772 seconds) $ cmp f.tmp good.tmp
Though surprised by the wide variation in times with different files of the same size, I was relieved to note that this result agrees with marioroy's experiences with llil4map. Though there is quite a bit of variation between runs, phmap::parallel_flat_hash_map and ankerl::unordered_dense::map are clearly the fastest.
Abseil
After doing some random searching for other C++ hash implementations, I stumbled upon Google's Open Source Abseil C++ library. In particular, its absl::flat_hash_map looked worth a try given the following references:
I was unable to build absl::flat_hash_map by just dropping in a header file, as I'd done with most of the other third-party code. So I built the Abseil library in the official way, using CMake ... which wasn't easy, given I was a complete Abseil and CMake ignoramus. :)
Note that you don't need to build the whole Abseil library in order to use it. I created a new $HOME/myabseil directory then performed the following initial steps in this directory:
ClangOverrides.txt
I also created this ClangOverrides.txt file in my $HOME directory to specify the compiler flags to use for the build:
SET (CMAKE_C_FLAGS_INIT "-Wall -std=c11") SET (CMAKE_C_FLAGS_DEBUG_INIT "-g") SET (CMAKE_C_FLAGS_MINSIZEREL_INIT "-Os -DNDEBUG") SET (CMAKE_C_FLAGS_RELEASE_INIT "-O3 -DNDEBUG") SET (CMAKE_C_FLAGS_RELWITHDEBINFO_INIT "-O2 -g") SET (CMAKE_CXX_FLAGS_INIT "-Wall -std=c++20") SET (CMAKE_CXX_FLAGS_DEBUG_INIT "-g") SET (CMAKE_CXX_FLAGS_MINSIZEREL_INIT "-Os -DNDEBUG") SET (CMAKE_CXX_FLAGS_RELEASE_INIT "-O3 -DNDEBUG") SET (CMAKE_CXX_FLAGS_RELWITHDEBINFO_INIT "-O2 -g")
and further set these two environment variables to get the build to use the clang compiler:
$ export CC=/usr/bin/clang $ export CXX=/usr/bin/clang++
CMakeLists.txt
Finally, I created CMakeLists.txt in my $HOME/myabseil directory containing:
cmake_minimum_required(VERSION 3.10) project(my_tdriver) # Abseil is ok with either C++14 or C++17 or C++20 set(CMAKE_CXX_STANDARD 20) # Process Abseil's CMake build system add_subdirectory(abseil-cpp) add_executable(tdriver tdriver.cc) # Declare dependencies on the absl libraries # See: https://github.com/abseil/abseil-cpp/blob/master/absl/container +/CMakeLists.txt (flat_hash_map) target_link_libraries(tdriver absl::container_memory absl::core_headers absl::hash_function_defaults absl::raw_hash_map absl::algorithm_container absl::memory )
Building tdriver
With that done, I set up a recommended "out of source" CMake build:
$ cd $HOME/myabseil $ mkdir build $ cd build $ cmake -DCMAKE_USER_MAKE_RULES_OVERRIDE=~/ClangOverrides.txt -DCMAKE_ +BUILD_TYPE=Release ..
That generated a Makefile in the build directory, allowing me to kiss CMake goodbye, and just use the good old Unix make command from now on. :)
To see everything that happens during the build, I took to running:
make VERBOSE=1 2>&1 | tee makebuild.tmp
tdriver.cc
Finally, here is the tdriver.cc I used, very similar to tdriver.cpp but with get_properties.inl embedded in it:
// tdriver.cc #include <cstdio> #include <cstddef> #include <cstdint> #include <cstdlib> #include <cstring> #include <ctime> #include <string> #include <array> #include <vector> #include <utility> #include <iterator> #include <execution> #include <algorithm> // #include <boost/container_hash/hash.hpp> #include <iostream> #include <iomanip> #include <fstream> #include <sstream> // ------------------------------------------------------------------- +--------- typedef int_fast64_t llil_int_type; // All words in big1.txt, big2.txt, big3.txt are <= 6 chars in length. // big.txt max word length is 6 // long.txt max word length is 208 // // Based on rough benchmarking, the short fixed string hack below is o +nly // worth trying for MAX_STR_LEN_L up to about 22. // See also https://backlinko.com/google-keyword-study // // To use (limited length) fixed length strings uncomment the next lin +e. #define MAX_STR_LEN_L 6 #ifdef MAX_STR_LEN_L struct str_type : std::array<char, MAX_STR_LEN_L> { bool operator==( const str_type& o ) const { return ::memcmp(this->data(), o.data(), MAX_STR_LEN_L) == 0; } bool operator<( const str_type& o ) const { return ::memcmp(this->data(), o.data(), MAX_STR_LEN_L) < 0; } }; #else struct str_type : std::basic_string<char> { bool operator==( const str_type& o ) const { return ::strcmp(this->data(), o.data()) == 0; } bool operator<( const str_type& o ) const { return ::strcmp(this->data(), o.data()) < 0; } }; #endif using str_int_type = std::pair<str_type, llil_int_type>; using vec_str_int_type = std::vector<str_int_type>; // inject specialization of std::hash for str_type into namespace std namespace std { template<> struct hash<str_type> { std::size_t operator()( str_type const& v ) const noexcept { #if 0 return boost::hash_range( v.cbegin(), v.cend() ); #else std::basic_string_view<char> bv { reinterpret_cast<const char*>(v.data()), v.size() * sizeof +(char) }; return std::hash<std::basic_string_view<char>>()(bv); #endif } }; } // Test with std::map, std::unordered_map or phmap::parallel_flat_hash +_map // or boost::unorderedmap or absl::flat_hash_map #define MT_STD_MAP_L 0 #define MT_STD_UNORDERED_MAP_L 1 #define MT_PARALLEL_FLAT_HASH_MAP_L 2 #define MT_BOOST_UNORDERED_MAP_L 3 #define MT_GOOGLE_HASH_MAP_L 4 #define MT_GOOGLE_NODE_MAP_L 5 // Uncomment to use absl::Hash function object // #define MT_USE_ABSL_HASH_L 1 // Uncomment one of the map types below // #define MAP_TYPE_L MT_STD_MAP_L // #define MAP_TYPE_L MT_STD_UNORDERED_MAP_L // #define MAP_TYPE_L MT_PARALLEL_FLAT_HASH_MAP_L // #define MAP_TYPE_L MT_BOOST_UNORDERED_MAP_L #define MAP_TYPE_L MT_GOOGLE_HASH_MAP_L // #define MAP_TYPE_L MT_GOOGLE_NODE_MAP_L #if MAP_TYPE_L == MT_STD_MAP_L #include <map> using map_str_int_type = std::map<str_type, llil_int_type>; #elif MAP_TYPE_L == MT_STD_UNORDERED_MAP_L #include <unordered_map> using map_str_int_type = std::unordered_map<str_type, llil_int_type>; #elif MAP_TYPE_L == MT_PARALLEL_FLAT_HASH_MAP_L #include <parallel_hashmap/phmap.h> // create the parallel_flat_hash_map without internal mutexes using map_str_int_type = phmap::parallel_flat_hash_map< str_type, llil_int_type, phmap::priv::hash_default_hash<str_type>, phmap::priv::hash_default_eq<str_type>, phmap::priv::Allocator<phmap::priv::Pair<const str_type, llil_int_t +ype>>, 8, phmap::NullMutex >; #elif MAP_TYPE_L == MT_BOOST_UNORDERED_MAP_L #include <boost/unordered_map.hpp> using map_str_int_type = boost::unordered_map<str_type, llil_int_type> +; #elif MAP_TYPE_L == MT_GOOGLE_HASH_MAP_L #include <absl/container/flat_hash_map.h> #include <absl/container/node_hash_map.h> #ifdef MT_USE_ABSL_HASH_L using map_str_int_type = absl::flat_hash_map<str_type, llil_int_type, +absl::Hash<str_type>>; #else using map_str_int_type = absl::flat_hash_map<str_type, llil_int_type>; #endif #elif MAP_TYPE_L == MT_GOOGLE_NODE_MAP_L #include <absl/hash/hash.h> using map_str_int_type = absl::node_hash_map<str_type, llil_int_type>; #else #error "Unsupported map_str_int_type" #endif // Simple RAII timer ------------------------------------------------- +---------- // Create a MyTimer object in a scope: // { // MyTimer tt; // ... // } // to automatically print the time taken in the block to stderr #include <chrono> inline double elaspe_time( std::chrono::high_resolution_clock::time_point cend, std::chrono::high_resolution_clock::time_point cstart) { return double( std::chrono::duration_cast<std::chrono::milliseconds +>(cend - cstart).count() ) * 1e-3; } class MyTimer { public: MyTimer() { stnow_m = std::chrono::high_resolution_clock::now(); } ~MyTimer() { auto endnow = std::chrono::high_resolution_clock::now(); std::cerr << " (" << elaspe_time(endnow, stnow_m) << " seconds)\ +n"; } private: std::chrono::time_point<std::chrono::high_resolution_clock> stnow_m +; }; // ------------------------------------------------------------------- +-- // #include "get_properties.inl" // get_properties.inl // Note: str_type, llil_int_type and map_str_int_type are not defined +here. // llil_int_type is normally defined as: typedef int_fast64_t llil_int +_type; // Note: int_fast64_t is defined in <cstdint> // map_str_int_type is keyed by str_type with value of llil_int_type. // These three types must be defined by the code that includes this .i +nl file. // map_str_int_type can be many different types, std::map, std::unorde +red_map, ... // so long as all operations on map_upd below are supported. #include <cstdint> #include <cstdio> #include <array> #include <algorithm> inline llil_int_type fast_atoll64( const char* str ) { llil_int_type val = 0; // int sign = 0; // if ( *str == '-' ) { // sign = 1, ++str; // } uint8_t digit; while ((digit = uint8_t(*str++ - '0')) <= 9) val = val * 10 + digit +; // return sign ? -val : val; return val; } // Limit line length and use ANSI C functions to try to boost performa +nce #define MAX_LINE_LEN_L 255 // Update map_upd with the properties found in file fname // Return the number of properties in file fname (or -1 if fname could + not be opened) static llil_int_type get_properties( const char* fname, // in : the input file name map_str_int_type& map_upd // inout: the map to be updated ) { std::array<char, MAX_LINE_LEN_L + 1> line; char* found; llil_int_type count; llil_int_type nprop = 0; FILE* fh = ::fopen(fname, "r"); if ( fh == NULL ) return -1; while ( ::fgets( line.data(), static_cast<int>(MAX_LINE_LEN_L), fh +) != NULL ) { ++nprop; found = std::find( line.begin(), line.end(), '\t' ); count = fast_atoll64( found + 1 ); // Note: using emplace() is faster than map_upd[fixword] += coun +t; #ifdef MAX_STR_LEN_L str_type fixword {}; // {} initializes all elements of fixword +to '\0' std::copy( line.begin(), found, fixword.begin() ); const auto [it, success] = map_upd.emplace( fixword, count ); #else *found = '\0'; const auto [it, success] = map_upd.emplace(str_type{ line.data() + }, count); #endif if (!success) it->second += count; } ::fclose(fh); return nprop; } // ------------------------------------------------------------------- +-- int main(int argc, char* argv[]) { if (argc < 2) { std::cerr << "usage: tdriver file1 file2 ... >out.txt\n"; return 1; } #ifdef MAX_STR_LEN_L std::cerr << "tdriver (fixed string length=" << MAX_STR_LEN_L << ") + start\n"; #else std::cerr << "tdriver start\n"; #endif #if MAP_TYPE_L == MT_STD_MAP_L std::cerr << "use std::map\n"; #elif MAP_TYPE_L == MT_STD_UNORDERED_MAP_L std::cerr << "use std::unordered_map\n"; #elif MAP_TYPE_L == MT_PARALLEL_FLAT_HASH_MAP_L std::cerr << "use phmap::parallel_flat_hash_map\n"; #elif MAP_TYPE_L == MT_BOOST_UNORDERED_MAP_L std::cerr << "use boost::unordered_map\n"; #elif MAP_TYPE_L == MT_GOOGLE_HASH_MAP_L std::cerr << "use absl::flat_hash_map\n"; #elif MAP_TYPE_L == MT_GOOGLE_NODE_MAP_L std::cerr << "use absl::node_hash_map\n"; #else #error "Unsupported map_str_int_type" #endif #ifdef MT_USE_ABSL_HASH_L std::cerr << "use absl::Hash function object in ctor\n"; #endif // Get the list of input files from the command line int nfiles = argc - 1; char** fname = &argv[1]; map_str_int_type mymap; for ( int i = 0; i < nfiles; ++i ) { MyTimer tt; llil_int_type nlines = get_properties(fname[i], mymap); std::cerr << fname[i] << ": nlines=" << nlines; } // Store the properties into a vector vec_str_int_type propvec( mymap.begin(), mymap.end() ); mymap.clear(); // Sort the vector by (count) in reverse order, (name) in lexical o +rder std::sort( // Standard sort propvec.begin(), propvec.end(), [](const str_int_type& left, const str_int_type& right) { return left.second != right.second ? left.second > right.second : left.first < right.first; } ); // Output the sorted vector #ifdef MAX_STR_LEN_L for ( auto const& n : propvec ) ::printf("%.*s\t%ld\n", MAX_STR_LEN_L, n.first.data(), n.second) +; #else for ( auto const& n : propvec ) std::cout << n.first << "\t" << n.second << "\n"; #endif return 0; }
Results
Abseil's absl::flat_hash_map ran very slightly faster than phmap::parallel_flat_hash_map and ankerl::unordered_dense::map (while being more painful to build :).
$ ./tdriver big1.txt big2.txt big3.txt big4.txt big5.txt big6.txt >f.t +mp tdriver (fixed string length=6) start use phmap::parallel_flat_hash_map big1.txt: nlines=3515200 (0.597 seconds) big2.txt: nlines=3515200 (1.08 seconds) big3.txt: nlines=3515200 (1.604 seconds) big4.txt: nlines=3515200 (0.795 seconds) big5.txt: nlines=3515200 (2.317 seconds) big6.txt: nlines=3515200 (0.894 seconds) $ cmp f.tmp good.tmp $ ./tdriver big1.txt big2.txt big3.txt big4.txt big5.txt big6.txt >f.t +mp tdriver (fixed string length=6) start use absl::flat_hash_map big1.txt: nlines=3515200 (0.568 seconds) big2.txt: nlines=3515200 (0.626 seconds) big3.txt: nlines=3515200 (1.067 seconds) big4.txt: nlines=3515200 (0.767 seconds) big5.txt: nlines=3515200 (1.057 seconds) big6.txt: nlines=3515200 (0.820 seconds) $ cmp f.tmp good.tmp
Updated 14-03-2023: Added support for MT_ANKERL_UNORDERED_DENSE_MAP_L to tdriver.cpp (ankerl::unordered_dense::map seems to have similar performance to phmap::parallel_flat_hash_map).
Minor formatting and wording improvements were made long after the original post was made.
|
---|
Replies are listed 'Best First'. | |
---|---|
Re^2: Rosetta Test: Long List is Long - Abseil, tdriver.cc
by marioroy (Prior) on Mar 14, 2023 at 11:27 UTC |