Query 10550 CTTGGTTAGTACTGAATCCCATATATACTATGTTTTTCCTATACATATGTACTTATGATA 10609
||||| ||||||||||||||||||||||||||||||||||||||||||||||||||||||
Sbjct 74391 CTTGGATAGTACTGAATCCCATATATACTATGTTTTTCCTATACATATGTACTTATGATA 74332
####
Query 16319 CCCACTCGGGCCCGGCTCCAGCTCCTGCACCGCCTGGGCCAGCCTCCGCATGTTAAGGGC 16378
||||||||||||| |||||||||||||||||||||||||||||| |||||||||||||||
Sbjct 140831 CCCACTCGGGCCCCGCTCCAGCTCCTGCACCGCCTGGGCCAGCCACCGCATGTTAAGGGC 14077
####
my $registry = 'Bio::EnsEMBL::Registry';
$registry->load_registry_from_db(
-host => 'ensembldb.ensembl.org',
-user => 'anonymous'
);
my $home = $ENV{'HOME'};
my($ID, $query, $off, $idi, $subject, $ref, $st);
print "ID\tposition\tvariation\tRef Genome coordinates\n";
unless(open DATA, "Input_files/Contig_Alignment_Selected_3.txt"){die "Cannot open the file file $! \n";}
while() {
chomp;
if(m[^>]) { #Checks the start of the alignements
($ID) = (split '\|',$_)[1];#splits the first line with '|'
($ref) = $ID =~ /(\d+)\s+ref$/;
}
if(/^\s+Identities/){ #gets the percentage of identity
my($identity, undef) = split/,/ ;
($idi) = $identity =~ /\sIdentities\s\=\s\d{3}\/\d{3}\s\((\d{2,3}\%)\)$/;
}
if(/^\s+Strand/){ #check strands Plus/Minus
($st) = $_ =~/^\s\w+\=\w{4}\/(\w{4,5})$/;
}
if(m/^Query/) {
($query) = m[^Query\s+(\d+)];
my $top = substr $_, 15;#substring the first 15 char
my $pipes = substr ,15; #same,if the Sbjct is more than 5 numbers then this doesnt worx
my $subject = ;
my($value) = $subject =~ /^Sbjct\s+(\d+)/;
my $bot = substr $subject, 15;#if the Sbjct is more than 5 numbers then this doesnt work
my $p = 0 ;
while ($p = 1+index $pipes,' ', $p) {
my $pos1 = $value-$p;
my $pos2 = $value+$p;
my $var1 = substr( $top, $p-1, 1 );
my $var2 = substr( $bot, $p-1, 1 );
# my $genomref1 = 4900000 + $pos1;
my $genomref2 = 4899999 + $pos2;
if($st eq "Minus") {
print join"\t", $ref,$pos1, $var1."/".$var2,$genomref2 ;
snpdetails($genomref2);
}else{
print join "\t", $ref,$pos2, $var1."/".$var2,$genomref2;
snpdetails($genomref2);
}
}
}
#}
####
BLASTN 2.2.24+
Reference: Zheng Zhang, Scott Schwartz, Lukas Wagner, and
Webb Miller (2000), "A greedy algorithm for aligning DNA
sequences", J Comput Biol 2000; 7(1-2):203-14.
RID: 5ZHMGK7311R
Query= NODE_16_length_35408_cov_15.061031
Length=35478
Score E
Sequences producing significant alignments: (Bits) Value
lcl|14079 ref|NC_000009.11|:4900000-5300000 Homo sapiens chro... 1.655e+04 0.0
ALIGNMENTS
>lcl|14079 ref|NC_000009.11|:4900000-5300000 Homo sapiens chromosome 9,
GRCh37 primary reference assembly
Length=400001
Score = 1.655e+04 bits (8960), Expect = 0.0
Identities = 9014/9037 (99%), Gaps = 15/9037 (0%)
Strand=Plus/Minus
Query 10190 TGGAGTGCAGTGGCGCAATCTCGGCTCACTGCAAGCATCGCCTCCTGGGTTCACGCCATT 10249
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Sbjct 74751 TGGAGTGCAGTGGCGCAATCTCGGCTCACTGCAAGCATCGCCTCCTGGGTTCACGCCATT 74692
Query 10250 CTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCATCTGCCACCATGCCCCACTAA 10309
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Sbjct 74691 CTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCATCTGCCACCATGCCCCACTAA 74632
Query 10310 ttttttctattttttAGTAGAGACGGGGTTTCACCATGTTAGCCAGGATGGTCTCGATCT 10369
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Sbjct 74631 TTTTTTCTATTTTTTAGTAGAGACGGGGTTTCACCATGTTAGCCAGGATGGTCTCGATCT 74572
Query 10370 CCTGACCTCGTGATCCGCCCACCTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCC 10429
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Sbjct 74571 CCTGACCTCGTGATCCGCCCACCTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCC 74512
Query 36624 aTGTTTTGAGCATATAGGGAAAATTTATAAAAATTGGCCATGATGaaacataagctcaaa 36683
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Sbjct 100670 ATGTTTTGAGCATATAGGGAAAATTTATAAAAATTGGCCATGATGAAACATAAGCTCAAA 100611
Query 36684 aagtttaaaaagaaaactcctaaaagttggcataacaaagcctaaaaaTCATTTCAAACT 36743
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Sbjct 100610 AAGTTTAAAAAGAAAACTCCTAAAAGTTGGCATAACAAAGCCTAAAAATCATTTCAAACT 100551
Query 36744 TGGTATAACTGTTACTAGAAAACCATCTACACAATGACTATATATATGCCTTTATTTCAT 36803
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Sbjct 100550 TGGTATAACTGTTACTAGAAAACCATCTACACAATGACTATATATATGCCTTTATTTCAT 100491
Query 36804 TTTTATGTTACGCTTCTCTTTATATTTGAATCATTCCTTTAAACTACATAAACATTTTCA 36863
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Sbjct 100490 TTTTATGTTACGCTTCTCTTTATATTTGAATCATTCCTTTAAACTACATAAACATTTTCA 100431
Query 36864 AGTGTTTGTAAATACCCTTTTAAAAATTACTGCTGTTAGCTGTTCTTCATGATTTTCTTA 36923
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Sbjct 100430 AGTGTTTGTAAATACCCTTTTAAAAATTACTGCTGTTAGCTGTTCTTCATGATTTTCTTA 100371
Query 36924 CTGGTCTCCTTACACATTCGAAATTGGACATTTCCGACTATTTCCTTGGTATGTTTTATA 36983
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Sbjct 100370 CTGGTCTCCTTACACATTCGAAATTGGACATTTCCGACTATTTCCTTGGTATGTTTTATA 100311