#!/usr/bin/env perl use strict; use warnings; use autodie; my ($fasta_in, $fasta_out) = qw{ pm_1205271_bio_fasta_extract_dummy_in.fasta pm_1205271_bio_fasta_extract_dummy_out.fasta }; my $re = qr[(.{10}GG)\z]; { open my $in_fh, '<', $fasta_in; open my $out_fh, '>', $fasta_out; local $/ = "\n>"; while (<$in_fh>) { chomp; substr $_, 0, 1, '' if $. == 1; my ($head, $seq) = split /\n/; next unless $seq =~ $re; next if index($seq, $1) < length($seq) - length($1); print $out_fh ">$head\n$seq\n"; } } #### $ cat pm_1205271_bio_fasta_extract_dummy_in.fasta >1: not wanted - too short xxxGG >2: wanted - exact match xxxxxxxxxxGG >3: not wanted - not unique xxxxxxxxxxGGxxxxxxxxxxGG >4: wanted - unique (but only just) xxxxxxxxxGGxxxxxxxxxxGG >5: not wanted - no GG at end xxxxxxxxxxGGx >6: not wanted - no match at all xxxxxxxxxxAA >7: wanted - match unique yyyyyyyyyyGGxxxxxxxxxxGG $ pm_1205271_bio_fasta_extract.pl $ cat pm_1205271_bio_fasta_extract_dummy_out.fasta >2: wanted - exact match xxxxxxxxxxGG >4: wanted - unique (but only just) xxxxxxxxxGGxxxxxxxxxxGG >7: wanted - match unique yyyyyyyyyyGGxxxxxxxxxxGG