>GTM1_RAT GLUTATHIONE S-TRANSFERASE YB1 (EC 2.5.1.18) (  (217 aa) 
 WFAGDKVTYVDFLAYDILDQYHIFEPKCLDAFPNLKDFLARFEGLKKISAYMKSSRYLST 
 PIFSKLAQWSNK                                                 

>GTMU_CRILO GLUTATHIONE S-TRANSFERASE Y1 (EC 2.5.1.18)   (217 aa) 
 FAGDKVTLCGFLAYDVLDQYQMFEPKCLDPFPNLKDFLARFEGLKKISAYMKTSRFLRRP 
 IFSKMAQWSNK                                                  

>GTM1_HUMAN GLUTATHIONE S-TRANSFERASE MU 1 (EC 2.5.1.18  (217 aa) 
 LPEKLKLYSEFLGKRPWFAGNKITFVDFLVYDVLDLHRIFEPKCLDAFPNLKDFISRFEG 
 LEKISAYMKSSRFLPRPVFSKMAVWGNK                                 

>GLNA_ANASP GLUTAMINE SYNTHETASE (EC 6.3.1.2) (GLUTAMAT  (473 aa) 
 SLELALEALENDHAFLTDTGVFTEDFIQNWIDYKLANEVKQMQLRPH-PYEFSIYYDV   

>GTM4_HUMAN GLUTATHIONE S-TRANSFERASE MU 4 (EC 2.5.1.18  (218 aa) 
 LPTMMQHFSQFLGKRPWFVGDKITFVDFLAYDVLDLHRIFEPNCLDAFPNLKDFISRFEG 
 LEKISAYMKSSRFLPKPLYTRVAVWGNK                                 

##</code><code>##

#!/usr/bin/perl

# Check fasta file format, report cases that don't meet expectations

use strict;
use warnings;

my %records;

$/ = '';   # special Perl rule: setting $/ to empty string
           # means: treat each sequence of one or more blank lines
           # as a record separator

while (<>) 
{
    my @lines = split /[\r\n]+/;  # split on line-breaks

    my $key = shift @lines; # first line should be the identifier
    if ( $key !~ /^>/ )
    {
        warn "record $. does not start with '>':\n$_\n";
    }
    my $data = '';
    for my $line ( @lines )
    {
        warn "record $. contains odd data:\n$_\n" unless ( $line =~ /^ [-A-Z]+\s*$/ );
        $data .= $line."\n";  # (remember to put linefeeds back)
    }

    if ( exists( $records{$key} ))   # have we seen this identifyier before?
    {
        warn "key $key found on different data:\n$data\n\n$records{$key}\n\n"
            if ( $records{$key} ne $data );
    }
    else  # we haven't seen this value of $key before
    {
        $records{$key} = $data;
    }
}
printf STDERR "%d records checked\n", scalar keys %records;

# just for fun, let's try to output some data:

for my $chosen ( grep /^>GTM1_/, keys %records )
{
    print join "\n", $chosen, $records{$chosen}, '';
}

##</code><code>##

perl test-format.pl *.fasta > test.out