sub iterblast{ # settings my $myblast = "blastall -p blastn -m 8"; my $starterseq = "/share/home/tjuh/Thema12-2011/Project/testblast/matchedhits.fasta"; my $blastdb = "/share/home/tjuh/Thema12-2011/Project/databases/termite_hindgut/hindgut_db"; my $outputroot = "/share/home/tjuh/Thema12-2011/Project/testblast/test1"; my $evalue = 0.01; my $wordsize = 11; my $roundlimit = 4; # output files: my $seq_db_fasta = $outputroot . ".tmp"; my $seq_db_all = $outputroot . ".all"; my $logfile = $outputroot . ".log"; # remove existing files system("rm -rf $seq_db_fasta"); system("rm -rf $seq_db_all"); system("rm -rf $logfile"); open(LOG,">$logfile"); print LOG "Configurations:\n"; print LOG "Input file : $starterseq\n"; print LOG "Output files : $outputroot\-\n"; print LOG "Blast Engine : $myblast\n"; print LOG "Target database : $blastdb\n"; print LOG "E-value cut-off : $evalue\n"; print LOG "Blast word size : $wordsize\n"; print LOG "Blast round limit: $roundlimit\n\n"; print LOG "Runs:\n"; # hits hash my %hits = (); my %queryids = (); # Start the iterative blastn rounds for (my $round = 1; $round <= $roundlimit; $round++) { my $hitcount = 0; my $infile = ""; # the initial fasta file is only run once if ($round > 1) { $infile = $seq_db_fasta; } else { $infile = $starterseq; } my $saveblst = $outputroot . ".txt"; system("rm -rf $saveblst"); # print LOG info my $curtime = `date`; chomp($curtime); print LOG "$curtime - Blast seq: $infile\n"; print LOG "$curtime - Blast db : $blastdb\n"; # run blast system("$myblast -i $infile -d $blastdb -o $saveblst -e $evalue -W $wordsize"); open(PARSE, "<$saveblst"); open(FASTA, ">$seq_db_fasta"); # parse blast output (tab form) and add matched id's in hits hash my @lines = (); while (my $line = ){ chomp $line; @lines = split(/\t/, $line); $queryids{$lines[0]}++; $hits{$lines[1]}++; } #print "Hash size queryids: ", scalar keys %queryids, "\n"; #print "Hash size hits: ", scalar keys %hits, "\n"; # get header and sequence with 'fastacmd' and write to new fasta file while (my $key = each %hits){ #print $key . "\n"; if (exists($queryids{$key})){ # only writes uniq ids to file #print $key . "\n"; $queryids{$key} = 1; } else { system("fastacmd -d $blastdb -s 'lcl|$key' >> $seq_db_fasta"); $hitcount++; } } print LOG "Round $round has $hitcount unique hits\n\n"; # evaluate if to run for next blast round if ($hitcount == 0) { print LOG "Hitcount: $hitcount through Round $round ==> STOP\n"; $round = $roundlimit; } close(FASTA); close(PARSE); } print "Total uniq hits found in all rounds: ", scalar keys %hits, "\n"; close(LOG); }