40 50 60 70 80 90
HAHU TTKTYFPHFDLSHGSAQVKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVN
... ..... . : ::: :.. ..: :.
CG1674 MDSTLNIENVNDPTSIASDLSAENTKADLVS
10 20 30
####
#!/usr/bin/perl -w
use strict;
while() {
print "---------------\n";
if (/^(\s+\d{2,3})+/) { # Start of block
print "Analyze:\n$_";
# Here I'm just grabbing individual lines from the
# fasta output into variables. There's the sample
# scale, the sample, the match (dots and colons),
# the library and the library scale.
my $samScale = $_;
my $sample = ;
my $match = ;
my $library = ;
my $libScale = ;
# I'm using a regular expression to figure out how
# how long the leading blanks are and how long the
# trailing blanks are.
my ( $endBlanks, $startBlanks ) =
$match =~ /^((\s+).+?)\s+$/;
print "Start at " . length($startBlanks);
print ", end at " . length($endBlanks) . "\n";
# Since the regular expression grabbed the relevant
# pieces of the strong but we just want the length,
# we do that conversino here.
my ( $start, $end ) =
( length($startBlanks), length($endBlanks) );
# Done .. print out the matching parts.
print "Sample match is: " .
substr($sample,$start, $end-$start) . "\n";
print "Library match is: " .
substr($library,$start, $end-$start) . "\n";
} else {
# Skip the parts that appear to be commentary.
# Debug code, thuse commented out but left behind.
# print "Skip:\n$_";
}
}
__DATA__
40 50 60 70 80 90
HAHU TTKTYFPHFDLSHGSAQVKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVN
... ..... . : ::: :.. ..: :.
CG1674 MDSTLNIENVNDPTSIASDLSAENTKADLVS
10 20 30
100 110 120 130 140
HAHU FKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKYR
.. . .. :. : :: : : : ::.:
CG1674 LNEPNVNDQTSSASDLTAENTKADHDSLNKPKDFNNQILNIISDIDINIKAQEKITQLKE
40 50 60 70 80 90
>>CG11153-PA type=protein; loc=4:complement(821536..8223 (580 aa)
initn: 43 init1: 43 opt: 69 Z-score: 84.3 bits: 23.5 E(): 1.3
Smith-Waterman score: 69; 45.455% identity (48.387% ungapped) in 33 a
+a overlap (57-89:513-543)
30 40 50 60 70 80
HAHU EALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTNAVAHVDDMPNALSALSDL
: ...:: : . :: :..:: : :: :
CG1115 AEMRQLWCRTGGVSGGSGSLCADACPKGSGGSNSQVAVAAAAAVYHLQDM--ASSAASTA
490 500 510 520 530 540
####
---------------
Analyze:
40 50 60 70 80 90
Start at 37, end at 67
Sample match is: NAVAHVDDMPNALSALSDLHAHKLRVDPVN
Library match is: DSTLNIENVNDPTSIASDLSAENTKADLVS
---------------
---------------
Analyze:
100 110 120 130 140
Start at 7, end at 37
Sample match is: FKLLSHCLLVTLAAHLPAEFTPAVHASLDK
Library match is: LNEPNVNDQTSSASDLTAENTKADHDSLNK
---------------
---------------
---------------
---------------
---------------
---------------
---------------
Analyze:
30 40 50 60 70 80
Start at 37, end at 65
Sample match is: GHGKKVADALTNAVAHVDDMPNALSALS
Library match is: GSNSQVAVAAAAAVYHLQDM--ASSAAS