#!/usr/bin/env perl
# time NUM_THREADS=3 perl pcount.pl big
use v5.36;
use autodie;
use MCE;
exit 1 if not @ARGV;
my $mul_pattern = 'mul\(\d{1,3},\d{1,3}\)';
my $filename = shift;
my $count = 0;
sub reduce_count ($worker_count) {
$count += $worker_count;
}
my $mce = MCE->new(
max_workers => $ENV{NUM_THREADS} // MCE::Util::get_ncpu(),
chunk_size => 65536*16,
use_slurpio => 1,
gather => \&reduce_count,
user_func => sub {
my ($mce, $slurp_ref, $chunk_id) = @_;
my $count = () = $$slurp_ref =~ m/$mul_pattern/g;
$mce->gather($count);
}
)->spawn;
$mce->process({ input_data => $filename });
$mce->shutdown;
print "Found $count matches.\n";
####
Found 1999533 matches.
1: 4.420s
2: 2.263s needs 2 workers to reach Python performance
3: 1.511s
4: 1.154s
5: 0.940s
6: 0.788s
7: 0.680s
8: 0.600s
9: 0.538s
##
##
#!/usr/bin/env python
# time NUM_THREADS=3 python pcount.py big
import os, re, sys
from multiprocessing import Pool, cpu_count
if len(sys.argv) < 2: sys.exit(1)
def read_file (file, chunk_size=65536*16):
"""
Lazy function generator to read a file in chunks,
including to the end of line.
"""
while True:
chunk = file.read(chunk_size)
if not chunk:
break
if not chunk.endswith('\n'):
chunk += file.readline()
yield chunk
def process_chunk(chunk):
"""
Worker function to process chunks in parallel.
"""
count = len(mul_re.findall(chunk))
return count
mul_re = re.compile(r"mul\(\d{1,3},\d{1,3}\)")
num_processes = int(os.getenv('NUM_THREADS') or cpu_count())
p = Pool(num_processes)
file_name = sys.argv[1]
try:
with open (file_name, "r") as file:
results = p.map(process_chunk, read_file(file))
p.close()
p.join()
except Exception as e:
print(e, file=sys.stderr)
sys.exit(1)
print(f"Found {sum(results)} matches.")
##
##
Found 1999533 matches.
1: 3.131s
2: 1.824s
3: 1.408s
4: 1.178s
5: 1.187s
6: 1.187s
7: 1.172s
8: 1.008s
9: 0.995s