Good point about using LANG=C (to make for a fairer comparison for I had set ascii encoding to parse the input (but not during sorting🤔) in my Python version).
With that change, takes ~99 s; that and 2 sorts, takes ~60 s.
#!/bin/sh
# Source: https://perlmonks.org/?node_id=11148801
#
# This is one shell implementation based on the problem specification
+...
#
# Rosetta Code: Long List is Long, 20221130,
# by eyepopslikeamosquito
# https://perlmonks.org/?node_id=11148465
#
# Changed to set LANG=C & 2 sorts per marioroy's observations ...
# https://perlmonks.org/?node_id=11148782
case $# in
0 )
printf "Give a list of files to sort.\n" >&2
exit 1
;;
esac
LC_ALL=C
export LC_ALL
LANG=C
export LANG
start=$( date '+%s' )
# Takes ~60 s.
awk ' \
{ cat_count[ $1 ] += $2 } \
END \
{ for ( cat in cat_count ) \
{ printf "%s\t%s\n", cat, cat_count[ cat ] } \
} \
' $@ \
| sort -k1,1 | sort -k2,2rn
end=$( date '+%s' )
printf "total time: %d s\n" $(( end - start )) >&2