#!/usr/bin/perl -w

use strict;

while(<DATA>) {
  print "---------------\n";
  if (/^(\s+\d{2,3})+/) { #  Start of block
    print "Analyze:\n$_";

    #  Here I'm just grabbing individual lines from the
    #  fasta output into variables. There's the sample
    #  scale, the sample, the match (dots and colons),
    #  the library and the library scale.

    my $samScale = $_;
    my $sample = <DATA>;
    my $match = <DATA>;
    my $library = <DATA>;
    my $libScale = <DATA>;

    #  I'm using a regular expression to figure out how
    #  how long the leading blanks are and how long the
    #  trailing blanks are.

    my ( $endBlanks, $startBlanks ) =
      $match =~ /^((\s+).+?)\s+$/;
    print "Start at " . length($startBlanks);
    print ", end at " . length($endBlanks) . "\n";

    #  Since the regular expression grabbed the relevant 
    #  pieces of the strong but we just want the length, 
    #  we do that conversino here.

    my ( $start, $end ) =
      ( length($startBlanks), length($endBlanks) );

    #  Done .. print out the matching parts.

    print "Sample match is: " .
      substr($sample,$start, $end-$start) . "\n";
    print "Library match is: " .
      substr($library,$start, $end-$start) . "\n";
  } else {

    #  Skip the parts that appear to be commentary.
    #  Debug code, thuse commented out but left behind.

    # print "Skip:\n$_";
  }
}

__DATA__
        40        50        60        70        80        90       
HAHU   TTKTYFPHFDLSHGSAQVKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVN
                                     ... ..... .  :  ::: :.. ..: :.
CG1674                              MDSTLNIENVNDPTSIASDLSAENTKADLVS
                                            10        20        30 

       100       110       120       130       140                 
HAHU   FKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKYR                
       ..  .    .. :. : :: : : : ::.:                              
CG1674 LNEPNVNDQTSSASDLTAENTKADHDSLNKPKDFNNQILNIISDIDINIKAQEKITQLKE
              40        50        60        70        80        90 

>>CG11153-PA type=protein; loc=4:complement(821536..8223  (580 aa)
 initn:  43 init1:  43 opt:  69  Z-score: 84.3  bits: 23.5 E():  1.3
Smith-Waterman score: 69;  45.455% identity (48.387% ungapped) in 33 a
+a overlap (57-89:513-543)

         30        40        50        60        70        80      
HAHU   EALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTNAVAHVDDMPNALSALSDL
                                     : ...:: : . :: :..::  : :: :  
CG1115 AEMRQLWCRTGGVSGGSGSLCADACPKGSGGSNSQVAVAAAAAVYHLQDM--ASSAASTA
            490       500       510       520       530         540