use strict;
use warnings;
use Text::CSV;
use DBI;

# input filename, and output file template with %d for interval #
my $input_filename = 'td.data';
my $output_filename = 'split_%d.data';

# Divide loci into groups of one million per output file
sub calculate_interval { return int((shift) / 1000000) };

my $dbh = DBI->connect ("dbi:CSV:", undef, undef, {
	csv_eol          => "\n",
	csv_sep_char     => "\t",
	csv_class        => "Text::CSV_XS",
	csv_null         => 1,
	csv_tables       => { genetics => {
				f_file => $input_filename,,
				col_names => [qw(a b c d locus f g h i j k l m n o)],
			    }},
	RaiseError       => 1,
	PrintError       => 1,
}) or die $DBI::errstr;

# Magic
my $sth = $dbh->prepare("select * from genetics order by locus");
$sth->execute;

# Grunt work to output into separate files
$, = "\t";
my $output;
my $output_interval = -1;
while (my @row = $sth->fetchrow_array) {
	my $interval = calculate_interval $row[4];
	if ($interval ne $output_interval) {
		$output_interval = $interval;
		open $output, '>', sprintf($output_filename, $interval)
			or die "$output_filename $!";
	}	
        print $output @row, "\n";
}