#!/usr/bin/perl use warnings; $a=">gi1 cds ATG fun >gi2 cds ATG fun >gi3 cds GGG fun"; while ($a=~ m/(>.*?fun)/gs) { $b1=$&; $b2=$&; while ($b1=~ />.*?cds/gs) { $h=$&; $b2=~ s/$h//g; $b2=~ s/fun//g; $seq=$b2; $seq=~ s/\n//; $hdr_seq="$h\n"."$seq\n"; push @hdr_seq1,$hdr_seq; push @only_seq1,$seq; } } # To remove multiple copies (if any): my %seen; # declare a hash my @only_seq=(); my @hdr_seq=(); @only_seq=grep{!$seen{$_}++}@only_seq1; @hdr_seq=grep{!$seen{$_}++}@hdr_seq1; print "\n\n A. Header & sequences are:\n\n"; print join ("\n", @hdr_seq); print "\n"; print "\n B. Only sequences are:\n\n"; print join ("\n", @only_seq); print "\n\n"; $num=0; foreach my $item1 (@only_seq) {$num++; # No.1 curly $seq1=$item1; foreach my $item2 (@hdr_seq) { # No.2 curly if (defined $item2) { $item2=$item2; $item3=$item2;} while ($item2=~ m/>.*cds/gs) { $hdr2=$&; $item3=~ s/$hdr2//; $item3=~ s/\s//; $seq2=$item3; $ele2="$hdr2\n"."$seq2\n"; if ($seq1 eq $seq2) {push @result1,$ele2;} else {push @result1,$ele2;} } } # No.2 curly } # No.1 curly ###################################### my @result=(); @result=grep{!$seen{$_}++}@result1; print "\n C. Non-redundant sequences are:\n\n"; print join ("",@result); print "\n"; exit; #### Microsoft Windows [Version 6.1.7600] Copyright (c) 2009 Microsoft Corporation. All rights reserved. C:\Users\x\Desktop>try.pl A. Header & sequences are: >gi1 cds ATG >gi2 cds ATG >gi3 cds GGG B. Only sequences are: ATG GGG C. Non-redundant sequences are: (This is wrong) >gi1 cds ATG >gi2 cds ATG >gi3 cds GGG #### >gi1 cds ATG >gi3 cds GGG