// llil.cpp. C++ 11 version of Perl llil.pl. // g++ compile on Linux: // g++ -o llil -std=c++11 -Wall -O3 llil.cpp // This g++ command also works with mingw C++ compiler (https://sourceforge.net/projects/mingw-w64) // that comes bundled with Strawberry Perl (C:\Strawberry\c\bin\g++.exe). // Example run: llil tt1.txt tt2.txt >out.txt // Uncomment next line to sort by creating a multimap (instead of via the sort function) // #define LLIL_SORT_VIA_MULTIMAP_L 1 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // ------------------------------------------------------------ // Performance hacks to speed up IO. // See https://www.reddit.com/r/rust/comments/9xedap/how_to_achieve_fast_stdinstdout_io_suitable_for/ // Avoid flush by using "\n" instead of std::endl (this made a big difference in this program!) // This one made almost no difference in this program: // const auto io_speed_up =[](){ // std::ios::sync_with_stdio(false); // std::cin.tie(nullptr); // return nullptr; // }(); // ------------------------------------------------------------ using str_int_type = std::pair; using map_str_int_type = std::unordered_map; using vec_str_int_type = std::vector; // Mimic the Perl get_properties subroutine static void get_properties( int nfiles, // in: the number of input files char* fname[], // in: the input file names map_str_int_type& hash_ret) // out: a hash of properties { for (int i = 0; i < nfiles; ++i) { std::ifstream llfile(fname[i]); if (!llfile) { std::cerr << "Error opening '" << fname[i] << "'\n"; return; } for (std::string line; std::getline(llfile, line); ) { std::string word, count; std::stringstream ssline(line); std::getline(ssline, word, '\t'); std::getline(ssline, count); hash_ret[word] += std::stoi(count); } } } #ifdef LLIL_SORT_VIA_MULTIMAP_L // Convert a std::unordered_map to a std::multimap static std::multimap invert_map(const std::unordered_map& m) { std::multimap mm; for (std::unordered_map::const_iterator it = m.begin(); it != m.end(); ++it) { mm.insert( std::make_pair(it->second, it->first) ); } return mm; } #endif int main(int argc, char* argv[]) { if (argc < 2) { std::cerr << "usage: llil file1 file2 ... >out.txt\n"; return 1; } #ifdef LLIL_SORT_VIA_MULTIMAP_L std::cerr << "llil start (multimap version)\n"; #else std::cerr << "llil start (sort version)\n"; #endif time_t tstart1 = ::time(NULL); // Create the hash of properties map_str_int_type hash_ret; get_properties(argc - 1, &argv[1], hash_ret); time_t tend1 = ::time(NULL); long taken1 = static_cast(::difftime(tend1, tstart1) + 0.5); std::cerr << "get_properties : " << taken1 << " secs\n"; // Sort descending by value, i.e. mimic this Perl code in C++: // sort { $href->{$b} <=> $href->{$a} || $a cmp $b } keys %{$href} time_t tstart2 = ::time(NULL); #ifdef LLIL_SORT_VIA_MULTIMAP_L std::multimap newmap = invert_map(hash_ret); for (std::multimap::reverse_iterator it = newmap.rbegin(); it != newmap.rend(); ++it) { std::cout << it->second << '\t' << it->first << "\n"; } #else vec_str_int_type v( hash_ret.begin(), hash_ret.end() ); std::sort( v.begin(), v.end(), [](const str_int_type& left, const str_int_type& right) { return right.second != left.second ? right.second < left.second : left.first < right.first; } ); for ( const std::pair& n : v ) { std::cout << n.first << '\t' << n.second << "\n"; } #endif time_t tend2 = ::time(NULL); long taken2 = static_cast(::difftime(tend2, tstart2) + 0.5); long taken = static_cast(::difftime(tend2, tstart1) + 0.5); std::cerr << "sort + output : " << taken2 << " secs\n"; std::cerr << "total : " << taken << " secs\n"; return 0; }