Help for this page

Select Code to Download


  1. or download this
    #Given a gff file and embl file for a genome extract the upstream sequ
    +ences that correspond to intergenic regions
    #STEP1: Parse a GFF file to get upstream regions
    ...
    
    #extract sequences 200 nts upstream upto 5 nts after the start codon t
    +hrough sfetch
    cat file.upstream.coord.txt | perl -F'\t' -lane '$length=$F[2]-$F[1]; 
    +if($length>200 && $F[3] eq "+"){$from=$F[2]-200;$to=$F[2]+7; $name = 
    +join("_",$F[0],$from,$to,$F[3]); print "sfetch -d bacteria.embl -F \"
    +fasta\" -f $from -t $to -r \"$name\" ." }elsif($length < 200 && $F[3]
    + eq "+"){$from=$F[1];$to=$F[2]+7; $name=join("_", $F[0],$from,$to, $F
    +[3]); print "sfetch -d bacteria.embl -F \"fasta\" -f $from -t $to -r 
    +\"$name\" ." }elsif($length > 200 && $F[3] eq "-"){$from=$F[1]+200; $
    +to=$F[1]-7; $name =join("_", $F[0],$from,$to, $F[3]); print "sfetch -
    +d bacteria.embl -F \"fasta\" -f $from -t $to -r \"$name\" ." }elsif($
    +length <200 && $F[3] eq "-"){$from=$F[2];$to=$F[1]-7;$name=join("_", 
    +$F[0],$from,$to, $F[3]); print "bacteria.embl -F \"fasta\" -f $from -
    +t $to -r \"$name\" ."} ' | sh > file.upstream.fa
    
  2. or download this