use strict; use warnings; use Text::CSV; use DBI; # input filename, and output file template with %d for interval # my $input_filename = 'td.data'; my $output_filename = 'split_%d.data'; # Divide loci into groups of one million per output file sub calculate_interval { return int((shift) / 1000000) }; my $dbh = DBI->connect ("dbi:CSV:", undef, undef, { csv_eol => "\n", csv_sep_char => "\t", csv_class => "Text::CSV_XS", csv_null => 1, csv_tables => { genetics => { f_file => $input_filename,, col_names => [qw(a b c d locus f g h i j k l m n o)], }}, RaiseError => 1, PrintError => 1, }) or die $DBI::errstr; # Magic my $sth = $dbh->prepare("select * from genetics order by locus"); $sth->execute; # Grunt work to output into separate files $, = "\t"; my $output; my $output_interval = -1; while (my @row = $sth->fetchrow_array) { my $interval = calculate_interval $row[4]; if ($interval ne $output_interval) { $output_interval = $interval; open $output, '>', sprintf($output_filename, $interval) or die "$output_filename $!"; } print $output @row, "\n"; }