#!/usr/bin/env python # time NUM_THREADS=3 python pcount.py big import os, re, sys from multiprocessing import Pool, cpu_count if len(sys.argv) < 2: sys.exit(1) def read_file (file, chunk_size=65536*16): """ Lazy function generator to read a file in chunks, including to the end of line. """ while True: chunk = file.read(chunk_size) if not chunk: break if not chunk.endswith('\n'): chunk += file.readline() yield chunk def process_chunk(chunk): """ Worker function to process chunks in parallel. """ count = len(mul_re.findall(chunk)) return count mul_re = re.compile(r"mul\(\d{1,3},\d{1,3}\)") num_processes = int(os.getenv('NUM_THREADS') or cpu_count()) p = Pool(num_processes) file_name = sys.argv[1] try: with open (file_name, "r") as file: results = p.map(process_chunk, read_file(file)) p.close() p.join() except Exception as e: print(e, file=sys.stderr) sys.exit(1) print(f"Found {sum(results)} matches.")