#usr/bin/perl -w use strict; if (scalar(@ARGV) != 1) { print "\n"; print "Usage: script.pl "; print "\n"; exit(); } my ($FILENAME) = @ARGV; #read in file open(INFILE, $FILENAME); ## remove existing files my $remove = "new_alignment_".$FILENAME; #remove any existing results file if (unlink($remove) == 1) { print "Existing \"$remove\" file was removed\n"; } ## generate storage file my $outputfile = "new_alignment_".$FILENAME; unless ( open(POS, ">>$outputfile") ) { print "Cannot open file \"$outputfile\" to write to!!\n\n"; exit; } ## declare variables my @array1 = (); my @no_duplicates = (); my %seen = (); our $protein_id; my $element; my $key; my $line; #read file and do stuff while ($line = ) { if($line =~/(\d+)_(\d+)_(\d+)/){ #print POS $line."\n"; # check if the fasta file ID's print push(@array1, $line); # store all the id's in an array } #remove duplicates in "array1" and store array elements in a new array "no_duplicates" foreach my $a(@array1){ unless ($seen{$a}){ push (@no_duplicates, $a); chomp @no_duplicates; $seen{$a} = 1; } } } #now start poping a single element from the array each time writing out all sequences with the id while ($element = pop @no_duplicates){ $protein_id = $element; #print $protein_id."\n"; #check if no duplicates are kept - it works #now open the file again and start search for the popped element and see whether it marches an id #if it does print the id and the all the blocks (joines together with that id) while(my $line2 = ){ if($line2 =~ /$proitein_id/){ #says protein_id need explicit package, why?? print $line2; } } }