Re: Rosetta Code: Long List is Long (Updated Solutions)

As you might expect, I've created improved versions of my original Perl and C++ solutions posted in the root node. Since I'm all out of fresh ideas to try, I'll post my improved solutions here. Further performance improvement suggestions are welcome.

Improved Perl Version llil2.pl

# llil2.pl
# Example run: perl llil2.pl tt1.txt tt2.txt tt3.txt >out.txt

use strict;
use warnings;

# --------------------------------------------------------------------
+--
# LLiL specification
# ------------------
# A LLiL-format file is a text file.
# Each line consists of a lowercase name a TAB character and a non-neg
+ative integer count.
# That is, each line must match : ^[a-z]+\t\d+$
# For example, reading the LLiL-format files, tt1.txt containing:
#   camel\t42
#   pearl\t94
#   dromedary\t69
# and tt2.txt containing:
#   camel\t8
#   hello\t12345
#   dromedary\t1
# returns this hashref:
#   $hash_ret{"camel"}     = 50
#   $hash_ret{"dromedary"} = 70
#   $hash_ret{"hello"}     = 12345
#   $hash_ret{"pearl"}     = 94
# That is, values are added for items with the same key.
#
# To get the required LLiL text, you must sort the returned hashref
# descending by value and insert a TAB separator:
#   hello\t12345
#   pearl\t94
#   dromedary\t70
#   camel\t50
# To make testing via diff easier, we further sort ascending by name
# for lines with the same value.
# --------------------------------------------------------------------
+--

# Function get_properties
# Read a list of LLiL-format files
# Return a reference to a hash of properties
sub get_properties
{
   my $files = shift;    # in:  reference to a list of LLiL-format fil
+es
   my %hash_ret;         # out: reference to a hash of properties
   for my $fname ( @{$files} ) {
      open( my $fh, '<', $fname ) or die "error: open '$fname': $!";
      while (<$fh>) {
         chomp;
         my ($word, $count) = split /\t/;
         $hash_ret{$word} += $count;
      }
      close($fh) or die "error: close '$fname': $!";
   }
   return \%hash_ret;
}

# ----------------- mainline -----------------------------------------
+--

@ARGV or die "usage: $0 file...\n";
my @llil_files = @ARGV;

warn "llil2 start\n";
my $tstart1 = time;
my $href    = get_properties( \@llil_files );
my $tend1   = time;
my $taken1  = $tend1 - $tstart1;
warn "get_properties : $taken1 secs\n";

my $tstart2 = time;
# Using two sorts is waaay faster than one in Perl for some reason! (s
+ee [id://11148545])
for my $key ( sort { $href->{$b} <=> $href->{$a} } sort keys %{$href} 
+) {
   print "$key\t$href->{$key}\n";
}
my $tend2  = time;
my $taken2 = $tend2 - $tstart2;
my $taken  = $tend2 - $tstart1;

warn "sort + output  : $taken2 secs\n";
warn "total          : $taken secs\n";
[download]

Improved C++ Version llil2.cpp

// llil2.cpp. C++ 11 version of Perl llil.pl.
// llil2.cpp is faster than llil.cpp while also clarifying limits:
//   - all keys should be less than 200 or so characters in length
//   - numbers are 64 bit integers (max: 9,223,372,036,854,775,807)
// g++ compile on Linux:
//    g++ -o llil2 -std=c++11 -Wall -O3 llil2.cpp
// This g++ command also works with mingw C++ compiler (https://source
+forge.net/projects/mingw-w64)
// that comes bundled with Strawberry Perl (C:\Strawberry\c\bin\g++.ex
+e).
// Example run: llil2 tt1.txt tt2.txt tt3.txt >out.txt

#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <cstdio>

#include <string>
#include <vector>
#include <set>
#include <map>

#include <algorithm>
#include <utility>

#include <iostream>
#include <fstream>
#include <sstream>

static_assert(sizeof(size_t) == sizeof(int64_t), "size_t too small, ne
+ed a 64-bit compile");

// -------------------------------------------------------------------
+---------

// Crude hack to see Windows Private Bytes in Task Manager by sleeping
+ at
// program end (see also sleep hack at end of main)
//    #include <chrono>
//    #include <thread>

// For some performance hacks to speed up C++ I/O see:
//   https://www.reddit.com/r/rust/comments/9xedap/how_to_achieve_fast
+_stdinstdout_io_suitable_for/
// The only one we use here is to prefer "\n" to std::endl to reduce s
+tdout flushing

// -------------------------------------------------------------------
+---------

typedef long long llil_int_type;
using str_int_type     = std::pair<std::string, llil_int_type>;
using map_str_int_type = std::map<std::string, llil_int_type>;
using vec_str_int_type = std::vector<str_int_type>;

// Mimic the Perl get_properties subroutine --------------------------
+--

// Limit line length and use lower level ANSI C functions to try to bo
+ost I/O performance
// TODO (maybe):
//   - reading: Try ::setvbuf(fh, NULL, _IOFBF, 65536) or some such on
+ input files
//   - writing: Try ::setvbuf(stdout, stdout_buf, _IOFBF, sizeof(stdou
+t_buf)) on stdout
//              ... or instead of writing to stdout, take an output fi
+le as a program argument
#define MAX_LINE_LEN_L 255

static void get_properties(
   int                nfiles,      //  in: the number of input files
   char*              fname[],     //  in: the input file names
   map_str_int_type&  hash_ret)    // out: a hash of properties
{
   FILE* fh;
   char line[MAX_LINE_LEN_L+1];
   char* word; char* count;
   for (int i = 0; i < nfiles; ++i) {
      fh = ::fopen(fname[i], "r");
      if (fh == NULL) {
         std::cerr << "Error opening '" << fname[i] << "'\n";
         return;
      }
      while ( ::fgets(line, MAX_LINE_LEN_L, fh) != NULL ) {
         word  = ::strtok(line, "\t");
         count = ::strtok(NULL, "\n");
         hash_ret[word] += ::atoll(count);
      }
      ::fclose(fh);
   }
}

// -------------------------------------------------------------------
+--

int main(int argc, char* argv[])
{
   if (argc < 2) {
      std::cerr << "usage: llil2 file1 file2 ... >out.txt\n";
      return 1;
   }

   std::cerr << "llil2 start\n";
   time_t tstart1 = ::time(NULL);

   // Create the hash of properties
   map_str_int_type hash_ret;
   get_properties(argc - 1, &argv[1], hash_ret);
   time_t tend1 = ::time(NULL);
   long taken1  = static_cast<long>(::difftime(tend1, tstart1) + 0.5);
   std::cerr << "get_properties : " << taken1 << " secs\n";

   // Sort descending by value, i.e. mimic this Perl code in C++:
   //   sort { $href->{$b} <=> $href->{$a} || $a cmp $b } keys %{$href
+}
   time_t tstart2 = ::time(NULL);
   vec_str_int_type v( hash_ret.begin(), hash_ret.end() );
   std::sort( v.begin(), v.end(),
      [](const str_int_type& left, const str_int_type& right) { return
+ right.second != left.second ? right.second < left.second : left.firs
+t < right.first; }
   );

   // Output the merged properties
   for ( auto const& n : v ) { std::cout << n.first << '\t' << n.secon
+d << '\n'; }

   time_t tend2 = ::time(NULL);
   long taken2  = static_cast<long>(::difftime(tend2, tstart2) + 0.5);
   long taken   = static_cast<long>(::difftime(tend2, tstart1) + 0.5);
   std::cerr << "sort + output  : " << taken2 << " secs\n";
   std::cerr << "total          : " << taken << " secs\n";

   // Hack to see Private Bytes in Windows Task Manager (uncomment nex
+t line so process doesn't exit too quickly)
   //   std::this_thread::sleep_for(std::chrono::milliseconds(90000000
+));

   return 0;
}
[download]

Performance Analysis

Performance of my second Perl version improved from:

get_properties : 11 secs
sort + output  : 74 secs
total          : 85 secs
[download]

to:

get_properties : 11 secs
sort + output  : 25 secs
total          : 36 secs
[download]

update (much later in llil2grt.pl) to:

get_properties : 10 secs
sort + output  : 20 secs
total          : 30 secs
[download]

Performance of my second C++ version improved from:

get_properties : 9 secs
sort + output  : 7 secs
total          : 16 secs
[download]

to:

get_properties : 6 secs
sort + output  : 6 secs
total          : 12 secs
[download]

Memory use (Windows Private Bytes) was 2,896,104K for the Perl version (update: 2,657,968K), 1,176,048K for the C++ std::unordered_map version (update: 1,218,720K for std::map).

Update: Surprisingly, making a one line change in llil2.cpp above from:

using map_str_int_type = std::unordered_map<std::string, llil_int_type
+>;
// to (llil2a.cpp):
using map_str_int_type = std::map<std::string, llil_int_type>;
[download]

resulted in a significant speed improvement in llil2a (with similar Windows Private Bytes):

get_properties : 4 secs
sort + output  : 5 secs
total          : 9 secs
[download]

My second Perl version improved from 85 seconds to 36 seconds (update: much later to 30 seconds in llil2grt). Incredibly, this spectacular performance improvement is entirely due to a very surprising one line change from:

sort { $href->{$b} <=> $href->{$a} || $a cmp $b } keys %{$href}
[download]

to:

sort { $href->{$b} <=> $href->{$a} } sort keys %{$href} )
[download]

When more is known of the reason for this incredible difference, I'll update this node.

In contrast, I had to work a lot harder to improve the performance of my C++ version, switching in desperation to some ugly lower level ANSI C functions in its get_properties function. For cheap thrills, I also switched from a 32-bit to a 64-bit integer counter llil_int_type. I suspect future minor performance tweaks may come from further improving I/O (e.g. by fiddling with file buffering and buffer sizes).

As described here this is by far Perl's best performance so far in my three serious Rosetta nodes:

For the simple GoL algorithm, C++ was 12.5 times faster, memory use was 2.8 times less.
For the complex GoL algorithm, C++ was 212.5 times faster; memory use was 10.1 times less.
For the Long List is Long algorithm (this node), C++ was 3 times faster; memory use 2.2 times less.

I suspect this is because Long List is Long seems to be mostly I/O bound, while the GoL algorithms were mostly CPU bound.

Updated: Changed std::unordered_map to std::map in llil2.cpp above.

Comment on Re: Rosetta Code: Long List is Long (Updated Solutions) Select or Download Code

Replies are listed 'Best First'.
Re^2: Rosetta Code: Long List is Long (Updated Solutions) by Tux (Canon) on Dec 05, 2022 at 15:33 UTC
Some wiser monk might correct me, but I think this difference is because "simple" blocks can be internally optimized (edited: typoes corrected after reading the replies) `my @sorted = sort @unsorted; my @sorted = sort { $a <=> $b } @unsorted; my @sorted = sort { $a cmp $b } @unsorted;` [download] Are all internally optimized to something simple and efficient. Once the block gets more complicated, like having multiple statements and/or multiple expressions, every block has to be wrapped in scopes for each call to that sub. This is one of the reasons why the ~~Larry-Rossler~~ Guttman-Rosler Transform is so efficient. `my @sorted = sort { $a->{id} <=> $b->{id} \|\| $a->{foo} cmp $b->{foo} } + @unsorted; # SLOW my @sorted = map { $_->[1] } sort { $a->[0] cmp $b->[0] } map { pack " +l>A*", $_->{id}, $_->{foo} } @unsorted; # FAST` [download] Enjoy, Have FUN! H.Merijn	[reply] [d/l] [select]
Re^3: Rosetta Code: Long List is Long (Updated Solutions) by jwkrahn (Abbot) on Dec 09, 2022 at 20:53 UTC
This is one of the reasons why the Larry-Rossler is so efficient. I think that you are thinking of the Guttman-Rosler Transform? `my @sorted = sort { $b->{id} <=> $b->{id} \|\| $a->{foo} cmp $b->{foo} } + @unsorted; # SLOW` [download] Comparing `$b->{id}` to itself will not work very well.	[reply] [d/l]
Re^4: Rosetta Code: Long List is Long (Updated Solutions) by eyepopslikeamosquito (Archbishop) on Dec 09, 2022 at 21:17 UTC
I think that you are thinking of the Guttman-Rosler Transform? Agreed. BTW I believe the GRT is generally faster than the Schwartzian Transform (this node has more detail on sorting history in Perl). Though fond of the GRT, I couldn't make it work for this problem because of the unusual requirement to sort descending by count yet ascending by name (it would work nicely to sort both fields ascending via the classic pack "NA*" trick). If anyone can see a way to make GRT work for this problem, please let us know.	[reply]
Re^5: Rosetta Code: Long List is Long (Updated Solutions) by jwkrahn (Abbot) on Dec 09, 2022 at 22:47 UTC
Re^6: Rosetta Code: Long List is Long (Updated Solutions - GRT) by eyepopslikeamosquito (Archbishop) on Dec 10, 2022 at 13:53 UTC
Some notes below your chosen depth have not been shown here
Re^2: Rosetta Code: Long List is Long (Updated Solutions) by marioroy (Prior) on Dec 05, 2022 at 18:04 UTC
You may have missed the dualvar solution. Sorting an array of dualvars involves zero hash lookups. `use Scalar::Util 'dualvar'; our @data; while (my ($k, $v) = each %{$href}) { push @data, dualvar($v, $k); } # output array of dualvars; sorted by number, string print "$_\t".(0+$_)."\n" for sort { $b <=> $a } sort @data;` [download] `# llil2.pl sort + output : 19 secs # llil3.pl sort + output : 16 secs -- dualvar` [download]	[reply] [d/l] [select]
Re^3: Rosetta Code: Long List is Long (Updated Solutions - dualvar) by eyepopslikeamosquito (Archbishop) on Dec 05, 2022 at 22:28 UTC
I noticed it, but (wrongly) assumed applying its remarkable two sort trick to my original solution would have the same effect. For completeness, I created `llil2d.pl` (shown below) by applying your dualvar array trick to my original `llil2.pl` two-sort solution, with minimal changes. I can confirm that it is indeed about 3 seconds faster and with slightly lower memory use. Despite using Perl for 20 years, I'd never heard of `dualvar` before (update: oops, turns out I had :). Huge kudos to marioroy for unearthing this! `llil2d start get_properties : 11 secs sort + output : 22 secs total : 33 secs Memory use (Windows Private Bytes): 2,824,184K (slightly lower than 2,896,104K for llil2.pl)` [download] For completeness, here is my adjusted `llil2d.pl`: # llil2d.pl. Remarkable dualvar version based on [marioroy]'s concocti +on. # Example run: perl llil2d.pl tt1.txt tt2.txt tt3.txt >out.txt use strict; use warnings; use feature qw{say}; use Scalar::Util qw{dualvar}; # -------------------------------------------------------------------- +-- # LLiL specification # ------------------ # A LLiL-format file is a text file. # Each line consists of a lowercase name a TAB character and a non-neg +ative integer count. # That is, each line must match : ^[a-z]+\t\d+$ # For example, reading the LLiL-format files, tt1.txt containing: # camel\t42 # pearl\t94 # dromedary\t69 # and tt2.txt containing: # camel\t8 # hello\t12345 # dromedary\t1 # returns this hashref: # $hash_ret{"camel"} = 50 # $hash_ret{"dromedary"} = 70 # $hash_ret{"hello"} = 12345 # $hash_ret{"pearl"} = 94 # That is, values are added for items with the same key. # # To get the required LLiL text, you must sort the returned hashref # descending by value and insert a TAB separator: # hello\t12345 # pearl\t94 # dromedary\t70 # camel\t50 # To make testing via diff easier, we further sort ascending by name # for lines with the same value. # -------------------------------------------------------------------- +-- # Function get_properties # Read a list of LLiL-format files # Return a reference to a hash of properties sub get_properties { my $files = shift; # in: reference to a list of LLiL-format fil +es my %hash_ret; # out: reference to a hash of properties for my $fname ( @{$files} ) { open( my $fh, '<', $fname ) or die "error: open '$fname': $!"; while (<$fh>) { chomp; my ($word, $count) = split /\t/; $hash_ret{$word} += $count; } close($fh) or die "error: close '$fname': $!"; } return \%hash_ret; } # ----------------- mainline ----------------------------------------- +-- @ARGV or die "usage: $0 file...\n"; my @llil_files = @ARGV; warn "llil2d start\n"; my $tstart1 = time; my $href = get_properties( \@llil_files ); my $tend1 = time; my $taken1 = $tend1 - $tstart1; warn "get_properties : $taken1 secs\n"; my $tstart2 = time; my @data; while ( my ($k, $v) = each %{$href} ) { push @data, dualvar($v, $k) } # Using two sorts is waaay faster than one! (see [id://11148545]) for my $key ( sort { $b <=> $a } sort @data ) { say "$key\t" . (0 + $key); } my $tend2 = time; my $taken2 = $tend2 - $tstart2; my $taken = $tend2 - $tstart1; warn "sort + output : $taken2 secs\n"; warn "total : $taken secs\n"; [download] Update: llil2grt.pl is about three seconds faster than `llil2d.pl` above, while using slightly less memory. References Added Later Dualvar: Schizophrenic var by bliako ListUtil.xs (including dualvar source code) Create your own dualvars by brian_d_foy at The Effective Perler Some ideas to try in the future: salva super-fast CPAN Sort modules: Sort::Packed, Sort::Key, Sort::Key::Radix, Sort::Key::Multi Judy arrays (e.g. see Re^3: Unsigned 64-bit integer as Judy key and Re^2: Rosetta Code: Long List is Long) Simple Perl versions using built-in Perl sort idioms especially GRT (this is now done) See also: Re^5: Create sort function from a text file (Sorting References: Schwartzian, GRT, Orcish, External, Parallel)	[reply] [d/l] [select]
Re^2: Rosetta Code: Long List is Long (Updated C++ Solutions) by eyepopslikeamosquito (Archbishop) on Dec 19, 2022 at 05:45 UTC
After being surprised in the extreme by marioroy's astonishing two-sort dualvar Perl solution and my accidental jwkrahn-inspired llil2grt.pl Perl GRT solution, I couldn't restrain myself from trying to steal ideas from these two nodes to improve the (9 second) speed of my fastest llil2a C++ solution. llil2m.cpp Here is my C++ solution inspired by marioroy's famous two-sort Perl concoction. Read more... (9 kB) The above program can be compiled with or without `MARIOROY_TWO_SORT_TRICK_L` defined. Without `MARIOROY_TWO_SORT_TRICK_L` defined I saw: `> llil2m big1.txt big2.txt big3.txt >llil2m.tmp llil2m start get_properties CPU time : 4.187 secs vector copy CPU time : 0.612 secs vector sort CPU time : 2.89 secs write stdout CPU time : 1.383 secs total CPU time : 9.074 secs total wall clock : 9 secs` [download] So far so good. This is just the fastest C++ solution found previously. Beside myself with excitement, I defined `MARIOROY_TWO_SORT_TRICK_L` ... but sobbed when I saw: `> llil2m big1.txt big2.txt big3.txt >llil2m.tmp llil2m start get_properties CPU time : 4.286 secs vector copy CPU time : 0.613 secs vector stable sort CPU time : 2.977 secs write stdout CPU time : 1.363 secs total CPU time : 9.244 secs total wall clock : 9 secs Memory use (Windows Private Bytes): 1,218,716K` [download] No improvement. At all. Aaaargh! When I changed `stable_sort` to `sort` the sort speed improved dramatically from `2.977 secs` to just `0.750 secs` ... but of course the output was wrong. It seems there is a high performance price to pay for a stable sort in this case. Oh well, at least we now have an alternative nine second solution in C++ using a stable sort. Sorting Algorithms BTW, from sort - perl pragma to control sort() behaviour The sort pragma is now a no-op, and its use is discouraged. ... The default sort has been stable since v5.8.0, and given this consistent behaviour for almost two decades, everyone has come to assume stability. Given the above performance difference between stable and non-stable sorts, this looks like an unfortunate decision to me. After all, there may be times when you really want the performance boost of a non-stable sort in Perl. Update: Stroustrup, in his C++ Programming Language book, notes that `stable_sort` improves towards `Nlog(N)` when the system has sufficient memory, but degrades to `Nlog(N)log(N)` otherwise ... so I guess the use case for needing a non-stable sort in Perl is when you are sorting huge arrays. See also Sorting algorithm (wikipedia). llil2grt.cpp* Here is my C++ solution inspired by this llil2grt.pl Perl GRT solution. Desperate to avoid the `sort` function after being burnt during my `llil2m.cpp` adventure, and remembering jwkrahn's cute GRT trick of achieving a descending sort via an ascending sort of negative numbers, I had to try a similar stunt in C++. In `llil2grt` below, I avoided `sort` simply by creating an inverted `set_int_str_type` container from the existing `map_str_int_type` container. // llil2grt.cpp. // Inspired by llilgrt.pl: improve sort performance via a negative cou +nt. // g++ compile on Linux: // g++ -o llil2grt -std=c++11 -Wall -O3 llil2grt.cpp // This g++ command also works with mingw C++ compiler (https://source +forge.net/projects/mingw-w64) // that comes bundled with Strawberry Perl (C:\Strawberry\c\bin\g++.ex +e). // Example run: llil2grt tt1.txt tt2.txt tt3.txt >out.txt #include <cstddef> #include <cstdint> #include <cstdlib> #include <cstring> #include <ctime> #include <cstdio> #include <string> #include <vector> #include <set> #include <map> #include <algorithm> #include <utility> #include <iostream> #include <fstream> #include <sstream> static_assert(sizeof(size_t) == sizeof(int64_t), "size_t too small, ne +ed a 64-bit compile"); // ------------------------------------------------------------------- +--------- // Crude hack to see Windows Private Bytes in Task Manager by sleeping + at // program end (see also sleep hack at end of main) // #include <chrono> // #include <thread> // ------------------------------------------------------------------- +--------- typedef long long llil_int_type; using str_int_type = std::pair<std::string, llil_int_type>; using int_str_type = std::pair<llil_int_type, std::string>; using map_str_int_type = std::map<std::string, llil_int_type>; using vec_str_int_type = std::vector<str_int_type>; using set_int_str_type = std::set<int_str_type>; // Mimic the Perl get_properties subroutine -------------------------- +-- // Limit line length and use ANSI C functions to try to boost performa +nce #define MAX_LINE_LEN_L 255 static void get_properties( int nfiles, // in: the number of input files char* fname[], // in: the input file names map_str_int_type& hash_ret) // out: a hash of properties { FILE* fh; char line[MAX_LINE_LEN_L+1]; char* word; llil_int_type count; for (int i = 0; i < nfiles; ++i) { fh = ::fopen(fname[i], "r"); if (fh == NULL) { std::cerr << "Error opening '" << fname[i] << "' : errno=" << + errno << "\n"; continue; } while ( ::fgets(line, MAX_LINE_LEN_L, fh) != NULL ) { word = ::strtok(line, "\t"); count = ::atoll( ::strtok(NULL, "\n") ); hash_ret[word] -= count; } ::fclose(fh); } } // ------------------------------------------------------------------- +-- int main(int argc, char* argv[]) { if (argc < 2) { std::cerr << "usage: llil2grt file1 file2 ... >out.txt\n"; return 1; } std::cerr << "llil2grt start\n"; time_t tstart1 = ::time(NULL); clock_t cstart1 = ::clock(); // Create the hash of properties map_str_int_type hash_ret; get_properties(argc - 1, &argv[1], hash_ret); clock_t cend1 = ::clock(); double ctaken1 = (double) (cend1 - cstart1) / (double)CLOCKS_PER_SE +C; std::cerr << "get_properties CPU time : " << ctaken1 << " secs +\n"; clock_t cstart2 = ::clock(); // To avoid calling sort(), try creating an inverted std::set conta +iner // Note: negative count gives desired ordering (just like Perl GRT +sort :) set_int_str_type s; for ( auto const& kv : hash_ret ) { // Note: emplace_hint() was a lot faster than emplace() s.emplace_hint( s.end(), kv.second, kv.first ); } clock_t cend2s = ::clock(); // Output the (already sorted) std::set - no sort() function requir +ed! // Note: fix up negative count via -n.first for ( auto const& n : s ) { std::cout << n.second << '\t' << -n.fir +st << '\n'; } clock_t cend2 = ::clock(); time_t tend2 = ::time(NULL); long ttaken = static_cast<long>(::difftime(tend2, tstart1) + 0 +.5); double ctaken = (double) (cend2 - cstart1) / (double)CLOCKS_PER_ +SEC; double ctaken2s = (double) (cend2s - cstart2) / (double)CLOCKS_PER +_SEC; double ctaken2o = (double) (cend2 - cend2s) / (double)CLOCKS_PER +_SEC; std::cerr << "emplace set sort CPU time : " << ctaken2s << " sec +s\n"; std::cerr << "write stdout CPU time : " << ctaken2o << " sec +s\n"; std::cerr << "total CPU time : " << ctaken << " sec +s\n"; std::cerr << "total wall clock time : " << ttaken << " sec +s\n"; // Hack to see Private Bytes in Windows Task Manager (uncomment nex +t line so process doesn't exit too quickly) // std::this_thread::sleep_for(std::chrono::milliseconds(90000000 +)); return 0; } [download] I was pleasantly surprised to see this improved my fastest C++ solution from 9 seconds down to 7: `> llil2grt big1.txt big2.txt big3.txt >llil2grt.tmp llil2grt start get_properties CPU time : 4.252 secs emplace set sort CPU time : 1.282 secs write stdout CPU time : 1.716 secs total CPU time : 7.254 secs total wall clock time : 7 secs Memory use (Windows Private Bytes): 1,626,728K` [download] Though faster, it's no surprise memory use increased because instead of sorting in place, you're creating a new `set_int_str_type` container. Updated: Minor cosmetic changes were made to llil2grt.cpp after posting. Added "Sorting Algorithms" section with a bit more information on stable sorts.	[reply] [d/l] [select]


Keep It Simple, Stupid
	PerlMonks