#! perl -slw use strict; our $NBUF //= 5000; our $IBUF //= 2e6; my $start = time; my @outFHs; my @outBufs; my $n = 0; my( $o, $buf ) = 0; open DISK, '<', $ARGV[0] or die $!; while( read( DISK, $buf, $IBUF, $o ) ) { open RAM, '<', \$buf; while( my $line = <RAM> ) { unless( $line =~ /\n$/ ) { $buf = $line; $o = length $buf; next; } ++$n; my $key = substr( $line, 7, 3 ) % 600; if( push( @{ $outBufs[ $key ] }, $line ) > $NBUF ) { unless( defined $outFHs[ $key ] ) { open $outFHs[ $key ], '>', "$key.out" or die $!; } print { $outFHs[ $key ] } @{ $outBufs[ $key ] }; @{ $outBufs[ $key ] } = (); } } } print { $outFHs[ $_ ] } @{ $outBufs[ $_ ] } for 0 .. $#outBufs; close $_ for @outFHs; close DISK; printf "Took %d seconds for $n records\n", time() - $start, $n; __END__ C:\test>Ibufd.pl 1GB.csv Took 132 seconds for 16777216 records
In reply to Re: how to split huge file (16.7 million lines in; 600 output files; 132 seconds)
by BrowserUk
in thread how to split huge file reading into multiple threads
by sagarika
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |