Postgres may not be portable than SQLite. Postgres has geometric data type and functions, and if you take begin and end for a box of no height on the x axis, Postgres's box operator will work. For exmaple, Begin=100 and End=200 record is coordinate of ((100,0),(200,0)).
http://www.postgresql.org/docs/8.2/static/functions-geometry.html
Your sample data of 5301598 records took me an hour to load into default installation of Postgres, but it gives me very quick response.
###SQL###
drop table chromosomes;
create table chromosomes(
score integer
,div numeric(6,1)
,del numeric(6,1)
,ins numeric(6,1)
,q_sequence text
--,begin1 integer
--,end1 integer
,pos box
,left_paren text
,repeat1 text
,repeat2 text
,class_family text
,begin2 text
,end2 text
,left_paren2 text
,ID integer
);
create index idx_chromosomes_1 on chromosomes(q_sequence);
create index idx_chromosomes_2 on chromosomes using gist(pos);
###PERL###
use strict;
use warnings;
use DBI;
my($dbh);
system('date');
$dbh = DBI->connect('dbi:Pg:dbname=test1','','',{AutoCommit=>0})
or die $dbh->errstr;
#populate($dbh);
select_test($dbh);
$dbh->disconnect;
system('date');
exit 0;
sub populate{
my($dbh)=@_;
my($sth,$sql,$cnt);
$cnt=0;
$sql="insert into chromosomes values (?,?,?,?,?,?,?,?,?,?,?,?,
+?,?)";
$sth=$dbh->prepare($sql) or die $dbh->errstr;
eval{
open my $fh , "<", "hg19.fa.out" or die $!;
while(my $line=<$fh>){
$cnt++;
next if $cnt <= 3;
chomp $line;
$line =~ s/^\s+//;
my @vals=split(/\s+/, $line);
#begin and end to box type
$vals[5]="(($vals[5],0),($vals[6],0))";
splice(@vals,6,1);
$sth->execute(@vals);
}
close $fh;
$sth->finish;
};
if($@){
$dbh->rollback;
}else {
$dbh->commit;
}
}
sub select_test{
my($dbh)=@_;
my($sth,$sql);
#SQL for overlaps
#$sql="select * from chromosomes where pos && box '((10000,0),
+(20000,0))'";
#SQL for include
$sql="select * from chromosomes where pos @ box '((10000,0),(2
+0000,0))'";
$sth=$dbh->prepare($sql) or die $dbh->errstr;
$sth->execute or die $dbh->errstr;
while(my $href= $sth->fetchrow_hashref){
print $href->{pos} . "\n";
}
$sth->finish;
}
|