- get the user-specified sequence file, library file and cut-off percentage - run fasta34.exe with the sequence file and library file - open the file that was output by fasta34 - set INPUT_RECORD_SEPARATOR ($/) to ">>" - reading each record of the fasta34 output file: -- the first read will contain just ">>", so skip it -- match /(\d+\.\d+)% identity/ and save the captured string as $per -- push the record onto an "org" array -- if $per > cut-off value, push the record onto an "input" array # You now have all the original fasta output in @org, and # all the "above-threshold" records in @input. At this point, # you want to run fasta again, but I can't figure out what # input you want to give it, how many times you really need # to run it, or how you should use the subsequent output. #### #!/usr/bin/perl use strict; use warnings; chdir "c:/perl/sam" or die "can't chdir to c:/perl/sam: $!"; # get the user-specified sequence file, library file # and cut-off percentage my ( $seqfile, $libfile, $cutoff ) = @ARGV; die "Usage: $0 seq.file lib.file cutoff\n" unless ( @ARGV == 3 and -f $seqfile and -f $libfile and $cutoff =~ /^\d+\.?\d*$/ ); # run fasta34.exe with the sequence file and library file my $main = "main.fasta"; my $result = system("fasta34.exe -O $main -Q $seqfile $library"); if ($result >> 8) { warn "fasta34.exe ended with non-zero exit status \n"; } # open the file that was output by fasta34 open(FASTA,"$main") or die "cant open fasta34 output file: $!"; $/ = '>>'; # set INPUT_RECORD_SEPARATOR to ">>" while () { chomp; # this removes ">>" from the end of the string next if ( /^\s*$/ ); # skip the first read (update: decided not to test length()) ($per) = /(\d+\.\d+)% identity/; push @org, ">>$_"; push @input, ">>$_" if ($per > $cutoff); } close FASTA; # if you want to save @input and/or @org to a file and run fasta34 again: open OUT, ">nextinput.fasta" or die $!; print OUT join '', @input; close OUT; # and likewise for @org