or download this
#Given a gff file and embl file for a genome extract the upstream sequ
+ences that correspond to intergenic regions
#STEP1: Parse a GFF file to get upstream regions
...
#extract sequences 200 nts upstream upto 5 nts after the start codon t
+hrough sfetch
cat file.upstream.coord.txt | perl -F'\t' -lane '$length=$F[2]-$F[1];
+if($length>200 && $F[3] eq "+"){$from=$F[2]-200;$to=$F[2]+7; $name =
+join("_",$F[0],$from,$to,$F[3]); print "sfetch -d bacteria.embl -F \"
+fasta\" -f $from -t $to -r \"$name\" ." }elsif($length < 200 && $F[3]
+ eq "+"){$from=$F[1];$to=$F[2]+7; $name=join("_", $F[0],$from,$to, $F
+[3]); print "sfetch -d bacteria.embl -F \"fasta\" -f $from -t $to -r
+\"$name\" ." }elsif($length > 200 && $F[3] eq "-"){$from=$F[1]+200; $
+to=$F[1]-7; $name =join("_", $F[0],$from,$to, $F[3]); print "sfetch -
+d bacteria.embl -F \"fasta\" -f $from -t $to -r \"$name\" ." }elsif($
+length <200 && $F[3] eq "-"){$from=$F[2];$to=$F[1]-7;$name=join("_",
+$F[0],$from,$to, $F[3]); print "bacteria.embl -F \"fasta\" -f $from -
+t $to -r \"$name\" ."} ' | sh > file.upstream.fa