#!/usr/bin/perl -w use strict; while() { print "---------------\n"; if (/^(\s+\d{2,3})+/) { # Start of block print "Analyze:\n$_"; # Here I'm just grabbing individual lines from the # fasta output into variables. There's the sample # scale, the sample, the match (dots and colons), # the library and the library scale. my $samScale = $_; my $sample = ; my $match = ; my $library = ; my $libScale = ; # I'm using a regular expression to figure out how # how long the leading blanks are and how long the # trailing blanks are. my ( $endBlanks, $startBlanks ) = $match =~ /^((\s+).+?)\s+$/; print "Start at " . length($startBlanks); print ", end at " . length($endBlanks) . "\n"; # Since the regular expression grabbed the relevant # pieces of the strong but we just want the length, # we do that conversino here. my ( $start, $end ) = ( length($startBlanks), length($endBlanks) ); # Done .. print out the matching parts. print "Sample match is: " . substr($sample,$start, $end-$start) . "\n"; print "Library match is: " . substr($library,$start, $end-$start) . "\n"; } else { # Skip the parts that appear to be commentary. # Debug code, thuse commented out but left behind. # print "Skip:\n$_"; } } __DATA__ 40 50 60 70 80 90 HAHU TTKTYFPHFDLSHGSAQVKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVN ... ..... . : ::: :.. ..: :. CG1674 MDSTLNIENVNDPTSIASDLSAENTKADLVS 10 20 30 100 110 120 130 140 HAHU FKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKYR .. . .. :. : :: : : : ::.: CG1674 LNEPNVNDQTSSASDLTAENTKADHDSLNKPKDFNNQILNIISDIDINIKAQEKITQLKE 40 50 60 70 80 90 >>CG11153-PA type=protein; loc=4:complement(821536..8223 (580 aa) initn: 43 init1: 43 opt: 69 Z-score: 84.3 bits: 23.5 E(): 1.3 Smith-Waterman score: 69; 45.455% identity (48.387% ungapped) in 33 a +a overlap (57-89:513-543) 30 40 50 60 70 80 HAHU EALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTNAVAHVDDMPNALSALSDL : ...:: : . :: :..:: : :: : CG1115 AEMRQLWCRTGGVSGGSGSLCADACPKGSGGSNSQVAVAAAAAVYHLQDM--ASSAASTA 490 500 510 520 530 540