#!/usr/local/bin/perl use strict; use warnings; # IO performance in 1.699_010 is faster than MCE 1.608. # https://metacpan.org/release/MARIOROY/MCE-1.699_010 use MCE::Loop; use MCE::Candy; my $dir = 'logs/*.log.gz'; my @files = sort(glob "$dir"); my $pattern = "some_string"; MCE::Loop::init { gather => MCE::Candy::out_iter_fh(\*STDOUT), chunk_size => '240k', max_workers => 24, use_slurpio => 1, }; open( my $fh, "-|", "zcat", @files ) or die "open error: $!\n"; mce_loop { my ( $mce, $slurp_ref, $chunk_id ) = @_; my $buf = ''; # Quickly determine if a match is found... # ...and process slurped chunk only if true. if ( $$slurp_ref =~ /$pattern/m ) { # The following is fast on Unix, but performance degrades # drastically on Windows beyond 4 workers. open my $MEM_FH, '<', $slurp_ref; while ( my $line = <$MEM_FH> ) { if ( $line =~ /$pattern/ ) { my @matches = $line =~ /".*?"|\S+/g; $buf .= "$matches[0],$matches[1],$matches[3],$matches[4]\n"; } } close $MEM_FH; # Therefore, use the following construction on Windows. # while ( $$slurp_ref =~ /([^\n]+\n)/mg ) { # # my $line = $1; # possibly save $1 to not lose the value # # not necessary for this demonstration # my @matches = $1 =~ /".*?"|\S+/g; # $buf .= "$matches[0],$matches[1],$matches[3],$matches[4]\n"; # } } # Send output to the manager process for orderly output to STDOUT $mce->gather($chunk_id, $buf); } $fh; close $fh;