// ---------------------------------------------------------------------------- // tally-count.cpp // Tally adjacent count fields of duplicate key names. // // Obtain the fast_io library (required dependency): // git clone --depth=1 https://github.com/cppfastio/fast_io // // clang++ compile on Linux: // clang++ -o tally-count -std=c++20 -Wall -O3 tally-count.cpp // // Example run: // LC_ALL=C parsort -k1 big{1,2,3}.txt | ./tally-count |\ // LC_ALL=C parsort -k2nr > out.txt // ---------------------------------------------------------------------------- #include // The fast_io header must come after chrono, else build error: // "no member named 'concatln' in namespace 'fast_io'" #include "fast_io/include/fast_io.h" #include #include #include #include #include #include #include // fast_atoll64 // https://stackoverflow.com/questions/16826422/ // c-most-efficient-way-to-convert-string-to-int-faster-than-atoi inline int64_t fast_atoll64( const char* str ) { int64_t val = 0; int sign = 0; if ( *str == '-' ) { sign = 1, ++str; } uint8_t digit; while ((digit = uint8_t(*str++ - '0')) <= 9) val = val * 10 + digit; return sign ? -val : val; } #define MAX_LINE_LEN_L 255 int main(int argc, char* argv[]) { std::array line, name; char* found; long long count, sum; int flag = 0; // obtain the first key-value pair delimited by tab while ( ::fgets( line.data(), static_cast(MAX_LINE_LEN_L), ::stdin ) != NULL ) { if ( ( found = std::find( line.begin(), line.end(), '\t' ) ) == line.end() ) continue; sum = fast_atoll64( found + 1 ); *found = '\0'; // key name ::memcpy( name.data(), line.data(), found - line.data() + 1 ); flag = 1; break; } // process the rest of standard input while ( ::fgets( line.data(), static_cast(MAX_LINE_LEN_L), ::stdin ) != NULL ) { if ( ( found = std::find( line.begin(), line.end(), '\t' ) ) == line.end() ) continue; count = fast_atoll64( found + 1 ); *found = '\0'; // key name if ( ! ::strcmp( line.data(), name.data() ) ) { sum += count; } else { fast_io::io::println( fast_io::mnp::os_c_str(name.data()), "\t", sum ); ::memcpy( name.data(), line.data(), found - line.data() + 1 ); sum = count; } } if ( flag ) fast_io::io::println( fast_io::mnp::os_c_str(name.data()), "\t", sum ); return 0; }