@set = (
[ 'file00.csv', 'file01.csv', 'file02.csv', ],
[ 'file03.csv', 'file04.csv', 'file05'csv', ],
[ 'file06.csv', 'file07.csv', ],
[ 'file08.csv', 'file09.csv', ],
[ 'file10.csv', 'file11.csv', ],
)
####
# my $n = 5;
my @set;
my $file_count;
my $partition_size;
my $remainder;
$file_count = scalar @file; # 12
if ( $file_count >= $n ) {
$partition_size = int( $file_count / $n ); # 2
$remainder = $file_count % $n; # 2
}
else {
$partition_size = 1;
$remainder = 0;
}
my $i = 0;
while ( scalar @file ) {
foreach my $j ( 1 .. $partition_size ) {
my $fn = shift @file;
push @{$set[$i]}, $fn;
}
if ( $i < $remainder ) {
my $fn = shift @file;
push @{$set[$i]}, $fn;
}
$i++;
}
##
##
#!/usr/bin/perl
use strict;
use warnings;
use Cwd;
use Data::Dumper;
use Getopt::Long;
use IO::Compress::Gzip qw( $GzipError );
use Text::CSV;
$Data::Dumper::Deepcopy = 1;
$Data::Dumper::Sortkeys = 1;
$| = 1;
srand();
my $output_files = 5;
my $outfile_name = $0 . q{.csv};
my $path = q{./};
$outfile_name =~ s/\.pl.*$//g;
GetOptions(
q{help} => sub {
&help(
output_files => $output_files,
outfile_name => $outfile_name,
path => $path,
);
},
q{output_files:i} => \$output_files,
q{outfile_name:s} => \$outfile_name,
q{path:s} => \$path,
);
my $start_dir = getcwd;
if ( !-d $path ) {
die qq{Directory $path not found: $!\n};
}
my @file = get_files( path => $path, );
my @set =
partition_files( files => \@file, n => $output_files, );
write_subfiles( set => \@set, prefix => $outfile_name, );
#
# Subroutines
#
sub help {
my ( %param, ) = @_;
print sprintf
< -s $b } @file;
return @file;
}
sub partition_files {
my (%param) = @_;
my @set;
my $file_count;
my $partition_size;
my $remainder;
my $n = $param{n};
my @file = @{ $param{files} };
$file_count = scalar @file; # 12
if ( $file_count >= $n ) {
$partition_size = int( $file_count / $n ); # 2
$remainder = $file_count % $n; # 2
}
else {
$partition_size = 1;
$remainder = 0;
}
my $i = 0;
while ( scalar @file ) {
foreach my $j ( 1 .. $partition_size ) {
my $fn = shift @file;
push @{ $set[$i] }, $fn;
}
if ( $i < $remainder ) {
my $fn = shift @file;
push @{ $set[$i] }, $fn;
}
$i++;
}
return @set;
}
sub write_subfiles {
my (%param) = @_;
my @set = @{ $param{set} };
my $prefix = $param{prefix};
my $name_format =
$prefix . q{-} . q{%0}
. int( log( scalar @set ) / log(10) + 1 + 1 ) . q{d}
. q{.csv} . q{.gz};
my $csv =
Text::CSV->new(
{ binary => 1, auto_diag => 1, eol => $/, } );
foreach my $i ( 0 .. $#set ) {
my $fn = sprintf $name_format, $i;
my $z = new IO::Compress::Gzip $fn,
-Level => IO::Compress::Gzip::Z_BEST_COMPRESSION,
or die
qq{IO::Compress::Gzip failed: $GzipError\n};
foreach my $ifn ( @{ $set[$i] } ) {
my $flag = 1;
open my $ifh, q{<:encoding(utf8)}, $ifn
or die qq{$ifn: $!};
while ( my $row = $csv->getline($ifh) ) {
if ($flag) {
$flag--;
next;
}
my $status = $csv->print( $z, $row, );
$row = undef;
}
close $ifh;
}
$z->close;
}
}