// tdriver.cpp #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // ---------------------------------------------------------------------------- typedef int_fast64_t llil_int_type; // All words in big1.txt, big2.txt, big3.txt are <= 6 chars in length. // big.txt max word length is 6 // // To use (limited length) fixed length strings uncomment the next line. #define MAX_STR_LEN_L 6 #ifdef MAX_STR_LEN_L struct str_type : std::array { bool operator==( const str_type& o ) const { return ::memcmp(this->data(), o.data(), MAX_STR_LEN_L) == 0; } bool operator<( const str_type& o ) const { return ::memcmp(this->data(), o.data(), MAX_STR_LEN_L) < 0; } }; #else struct str_type : std::basic_string { bool operator==( const str_type& o ) const { return ::strcmp(this->data(), o.data()) == 0; } bool operator<( const str_type& o ) const { return ::strcmp(this->data(), o.data()) < 0; } }; #endif using str_int_type = std::pair; using vec_str_int_type = std::vector; // inject specialization of std::hash for str_type into namespace std namespace std { template<> struct hash { std::size_t operator()( str_type const& v ) const noexcept { #if 0 return boost::hash_range( v.cbegin(), v.cend() ); #else std::basic_string_view bv { reinterpret_cast(v.data()), v.size() * sizeof(char) }; return std::hash>()(bv); #endif } }; } // Test with std::map, std::unordered_map or phmap::parallel_flat_hash_map // ... or boost::unordered_map or ankerl::unordered_dense::map #define MT_STD_MAP_L 0 #define MT_STD_UNORDERED_MAP_L 1 #define MT_PARALLEL_FLAT_HASH_MAP_L 2 #define MT_BOOST_UNORDERED_MAP_L 3 #define MT_ANKERL_UNORDERED_DENSE_MAP_L 6 // Uncomment one of the map types below // #define MAP_TYPE_L MT_STD_MAP_L // #define MAP_TYPE_L MT_STD_UNORDERED_MAP_L #define MAP_TYPE_L MT_PARALLEL_FLAT_HASH_MAP_L // #define MAP_TYPE_L MT_BOOST_UNORDERED_MAP_L // #define MAP_TYPE_L MT_ANKERL_UNORDERED_DENSE_MAP_L #if MAP_TYPE_L == MT_STD_MAP_L #include using map_str_int_type = std::map; #elif MAP_TYPE_L == MT_STD_UNORDERED_MAP_L #include using map_str_int_type = std::unordered_map; #elif MAP_TYPE_L == MT_PARALLEL_FLAT_HASH_MAP_L #include // create the parallel_flat_hash_map without internal mutexes using map_str_int_type = phmap::parallel_flat_hash_map< str_type, llil_int_type, phmap::priv::hash_default_hash, phmap::priv::hash_default_eq, phmap::priv::Allocator>, 8, phmap::NullMutex >; #elif MAP_TYPE_L == MT_BOOST_UNORDERED_MAP_L #include using map_str_int_type = boost::unordered_map; #elif MAP_TYPE_L == MT_ANKERL_UNORDERED_DENSE_MAP_L #include using map_str_int_type = ankerl::unordered_dense::map; #else #error "Unsupported map_str_int_type" #endif // Simple RAII timer ----------------------------------------------------------- // Create a MyTimer object in a scope: // { // MyTimer tt; // ... // } // to automatically print the time taken in the block to stderr #include inline double elaspe_time( std::chrono::high_resolution_clock::time_point cend, std::chrono::high_resolution_clock::time_point cstart) { return double( std::chrono::duration_cast(cend - cstart).count() ) * 1e-3; } class MyTimer { public: MyTimer() { stnow_m = std::chrono::high_resolution_clock::now(); } ~MyTimer() { auto endnow = std::chrono::high_resolution_clock::now(); std::cerr << " (" << elaspe_time(endnow, stnow_m) << " seconds)\n"; } private: std::chrono::time_point stnow_m; }; // --------------------------------------------------------------------- #include "get_properties.inl" int main(int argc, char* argv[]) { if (argc < 2) { std::cerr << "usage: tdriver file1 file2 ... >out.txt\n"; return 1; } #ifdef MAX_STR_LEN_L std::cerr << "tdriver (fixed string length=" << MAX_STR_LEN_L << ") start\n"; #else std::cerr << "tdriver start\n"; #endif #if MAP_TYPE_L == MT_STD_MAP_L std::cerr << "use std::map\n"; #elif MAP_TYPE_L == MT_STD_UNORDERED_MAP_L std::cerr << "use std::unordered_map\n"; #elif MAP_TYPE_L == MT_PARALLEL_FLAT_HASH_MAP_L std::cerr << "use phmap::parallel_flat_hash_map\n"; #elif MAP_TYPE_L == MT_BOOST_UNORDERED_MAP_L std::cerr << "use boost::unordered_map\n"; #elif MAP_TYPE_L == MT_ANKERL_UNORDERED_DENSE_MAP_L std::cerr << "use ankerl::unordered_dense::map\n"; #else #error "Unsupported map_str_int_type" #endif // Get the list of input files from the command line int nfiles = argc - 1; char** fname = &argv[1]; map_str_int_type mymap; for ( int i = 0; i < nfiles; ++i ) { MyTimer tt; llil_int_type nlines = get_properties(fname[i], mymap); std::cerr << fname[i] << ": nlines=" << nlines; } // Store the properties into a vector vec_str_int_type propvec( mymap.begin(), mymap.end() ); mymap.clear(); // Sort the vector by (count) in reverse order, (name) in lexical order std::sort( // Standard sort propvec.begin(), propvec.end(), [](const str_int_type& left, const str_int_type& right) { return left.second != right.second ? left.second > right.second : left.first < right.first; } ); // Output the sorted vector #ifdef MAX_STR_LEN_L for ( auto const& n : propvec ) ::printf("%.*s\t%ld\n", MAX_STR_LEN_L, n.first.data(), n.second); #else for ( auto const& n : propvec ) std::cout << n.first << "\t" << n.second << "\n"; #endif return 0; }