#!/usr/bin/perl use strict; use warnings; local $/; # Slurp my $content = ; my ($header) = $content =~ /^(>.*?\n)/m; while ($content =~ /^[\w]*?([VMFWLCA]{8,})[\w]*?$/mg) { my $sequence = $1; print $header, "contains $sequence at position ", pos($content) - length($sequence), "\n"; $header = ""; } __DATA__ >P30450 | Homo sapiens (Human). | NCBI_TaxID=9606; | 365 | Name=HLA-A; Synonyms=HLAA;M MAVMAPRTLVLLLSGALALTQTWAGSHSMRYFYTSVSRPGRGEPRFIAVGYVDDT QFVRFDSDAASQRMEPRAPWIEQEGPEYWDRNTRNVKAHSQTDRANLGTLRGYYNQSEDGS TIQRMYGCDVGPDGRFLRGYQQDAYDGKDYIALNEDLRSWTAADMAAQITQRKW ETAHEAEQWRAYLEGRCVEWLRRYLENGKETLQRTDAPKTHMTHHAVSDHEATL RCWALSFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWASVVVPSGQEQ RYTCHVQHEGLPKPLTLRWEPSSQPTIPIVGIIAGLVLFGAVIAGAVVAAVMWRRKS SDRKGGSYSQAASSDSAQGSDMSLTACKVVAVLMLCLAVIFLC #### >P30450 | Homo sapiens (Human). | NCBI_TaxID=9606; | 365 | Name=HLA-A; Synonyms=HLAA;M contains AVVAAVMW at position 420 contains VVAVLMLCLAV at position 461