def sort_native( cat_count ): once = sorted( cat_count.keys() ) return sorted( once, key = lambda k: cat_count[ k ], reverse = True ) #### sort { $href->{$b} <=> $href->{$a} } sort keys %{$href} #### def sort_native( cat_count :dict[ str, int ] ) ->list[ str ]: """ Returns: A `list` of sorted keys by decreasing order of values & increasing order of keys. Args: cat_count: A `dict` with string key & integer value. """ once = sorted( cat_count.keys() ) return sorted( once, key = lambda k: cat_count[ k ], reverse = True ) #### #!/usr/local/bin/python3.10 # Source: https://perlmonks.org/index.pl?node_id=11148702 # # This is one Python implementation based on the problem specification ... # # Rosetta Code: Long List is Long, 20221130, # by eyepopslikeamosquito # https://perlmonks.org/?node_id=11148465 import sys from collections import defaultdict from hashlib import sha256 from pathlib import PosixPath from timeit import default_timer # Takes ~0.3 s. def verify( output :str, # The digest of the output produced from eyepopslikeamosquito's # program, "gen-llil.pl", after generating 3 input files. Update it # as needed. expected_sum :str = '70a1644743e9b9d8d73094ed1826527f27a7f3f131c3f28a63aaeb85e1af8fef' ) ->None: """ Prints a message about output being different than expected. Args: output : Stringified output of sorted input. expected_sum: SHA-256 digest sum in hexadecimal of expected ASCII output. """ # Need to encode the resulting string with encoding of "ascii" to match up # of the input. # Also make sure that each category-count pair ends with a newline. sum = sha256( output.encode( encoding = 'ascii' ) ).hexdigest() if sum != expected_sum: sys.stderr.write( f"OUTPUT is DIFFERENT!\n {sum}\n" ) return # Takes ~7-11 s. def sort_val_desc_key_asc( cat_count :dict[ str, int ] ) ->list[ str ]: """ Returns: A `list` of sorted keys by decreasing order of values & increasing order of keys. Args: cat_count: A `dict` with string key & integer value. """ once = sorted( cat_count.keys() ) return sorted( once, key = lambda k: cat_count[ k ], reverse = True ) # Takes ~12-16 s. def collect( data_list :list ) ->dict[ str, int ]: """ Returns: A `dict` with category as key & total count as value. Args: data_list: list of file paths as `pathlib.PosixPath` objects. Side effect: Updates `time_stat` with the time taken to collect the data. """ cat_count = defaultdict( lambda: 0 ) delimiter = '\t' open_prop = { 'mode' : 'rt', 'encoding' : 'ascii', 'newline' : '\n' } for path in data_list: with path.open( **open_prop ) as fh: for line in fh: category, number = line.split( delimiter, 1 ) cat_count[ category ] += int( number ) return cat_count # Collect file paths. if sys.argv[1:]: data_list = [ PosixPath( p ) for p in sys.argv[1:] ] else: sys.exit( 'Give a file list with data to process' ) start = default_timer() # Process. cat_count = collect( data_list ) end_collect = default_timer() # Sort. sorted_key = sort_val_desc_key_asc( cat_count ) end_sort = default_timer() # Format; take ~7 s. stringified = ''.join( f'{k}\t{cat_count[ k ]}\n' for k in sorted_key ) end_stringification = default_timer() # Either verify or print to verifiy outside of the program. # Verification is slightly slower than dumping string on standard output. verify_or_print = 'NOT verify' if verify_or_print == 'verify': output_label = 'sha256 verification' verify( stringified ) else: output_label = 'output' print( stringified, end = '' ) end = default_timer() # Print time taken. stat = { 'collect' : end_collect - start, 'sort' : end_sort - end_collect, 'stringification' : end_stringification - end_sort, output_label : end - end_stringification, # ~28-36 s. 'total' : end - start } max_label_width = max( len( k ) for k in stat.keys() ) # decimal_place = 1 max_time_width = max( len( f'{t:0.{decimal_place}f}' ) for t in stat.values() ) time_format = f'{max_time_width}.{decimal_place}f' # out_format = '{label:{label_pad}}: {time:{time_format}} s\n' # sys.stderr.write( f'# {__file__}\n' ) for step, time in stat.items(): mess = out_format.format( label = step, label_pad = max_label_width, time = time, time_format = time_format ) sys.stderr.write( mess ) #### # parv-20221210-two-sorts.py collect : 15.9 s sort : 7.8 s stringification: 7.5 s output : 0.1 s total : 31.3 s