use strict; use warnings; use autodie; use constant START_FASTA_REC => qr{ \A [^\]]+ \] }xms; use constant SUB_SEQUENCE => qr{ CDECGKEFSQGAHLQTHQKVH }xms; # use constant SUB_SEQUENCE => qr{ NOT_PRESENT }xms; # for debug MAIN { my $filename = 'file,fasta'; open my $fh_fasta, '<', $filename; my $fasta_record; LINE: while (my $line = <$fh_fasta>) { chomp $line; if ($line =~ s{ ${ \START_FASTA_REC }}''xms) { process_fasta_record($fasta_record) if defined $fasta_record and length $fasta_record; $fasta_record = $line; next LINE; } $fasta_record .= $line; } close $fh_fasta; # process final fasta record. process_fasta_record($fasta_record) if defined $fasta_record and length $fasta_record; exit; # normal exit from MAIN block } # end MAIN block die "unexpected exit from MAIN block"; # subroutines ###################################################### sub process_fasta_record { my ($fasta_record, ) = @_; print "'$fasta_record' \n"; # for debug if ($fasta_record =~ SUB_SEQUENCE) { print "The protein contains the domain"; } else { print "The protein doesn't contain the domain"; } }