ID SNP
FT SNP 433
FT /note="refAllele: T SNPstrains: 7083_1#5=C 7414_8#8=C 7480_8#49=C "
FT /colour=1
FT SNP 442
FT /note="refAllele: T SNPstrains: 7065_8#2=C 7065_8#94=C 7083_1#2=C 7083_1#3=C 7083_1#41=C 7083_1#42=C 7083_1#43=C "
FT /colour=1
FT SNP 460
FT /note="refAllele: T SNPstrains: 7564_8#14=C "
FT /colour=1
FT SNP 703
FT /note="refAllele: G SNPstrains: 7521_5#39=A (non-synonymous) (AA Ala->Thr) "
FT /colour=2
FT SNP 937
FT /note="refAllele: G SNPstrains: 7414_8#30=T (non-synonymous) (AA Val->Leu) "
FT /colour=2
FT SNP 1269
FT /note="refAllele: G SNPstrains: 7480_7#22=A (synonymous) 7480_7#62=A (synonymous) "
FT /colour=3
FT SNP 1804
FT /note="refAllele: T SNPstrains: 7414_7#66=A (non-synonymous) (AA Ser->Thr) 7414_8#44=A (non-synonymous) (AA Ser->Thr) 7521_6#54=A (non-synonymous) (AA Ser->Thr) "
FT /colour=2
etc etc...
####
use strict;
use warnings;
use feature qw(say);
my $file = "BSAC.pl";
my %cod = ( 1 => "red", 2 => "non", 3 => "green" );
open my $in, "<", "$file";
open my $out, ">", "output.txt";
say $out "Coordinate No of Strains AA Change";
my $SNP;
my $count;
my $change;
while ( my $line = <$in> ) {
chomp $line;
say qq(DEBUG: Line = "$line");
if ( $line =~ /^FT\s+SNP\s+(\d+)/ ){
$SNP = $1;
say qq(\$SNP = $1;);
}
elsif ( $line =~ /^FT\s+\/note="(.*)"/) {
my $note = $1;
say qq(my \$note = $1);
$count = ($note =~ tr/=/=/);
$note =~ /\((AA \w+->\w+)\)\s*$/;
$change = $1 || "";
}
elsif ( $line =~ /^FT\s+\/colour=(\d+)/ ) {
say qq(Code = $1);
if ( $cod{$1} eq "non" ) {
printf $out "%-12.12s %-15.15s %s\n", $SNP, $count, $change;
}
}
}
####
Coordinates No of Strains AA Change
703 1 AA Ala->Thr
937 1 AA Val->Leu
1804 3 AA Ser->Thr