sub getSeqfromfasta2lmers
{
my $file = shift;
my @seqs= ();
open INFILE, "<$file" or die "$0: Can't open file $file: $!";
my $in = Bio::SeqIO->new(-format => 'fasta',
-noclose => 1 ,
-fh => \*INFILE);
while ( my $seq = $in->next_seq() ) {
push @seqs, $seq->seq();
} #end while
return @seqs;
}
####
my $nofseq = scalar( getSeqfromfasta2lmers( $file ) );
####
my @input_seqs = getSeqfromfasta2lmers($file);
####
my $ip = @input_seqs;
####
sub getSeqfromfasta2lmers {
my $file = shift;
## Use a lexical file handle so thaty the file is closed automatically
open my fh, "<$file" or die "$0: Can't open file $file: $!";
my $in = Bio::SeqIO->newFH( -format => 'fasta', -fh => $fh );
return <$in>; ### MUST BE CALLED IN A LIST CONTEXT!!!!
}
####
## Get the sequences.
my @input_seqs = getSeqfromfasta2lmers($file);
## And a count of them
my $nofseq = @input_seqs;
####
sub main_process {
....
## Just reusing the array from outside the loop/sub will save a lot of time
## and some space.
## DELETE my @input_seqs = getSeqfromfasta2lmers($file);
## This doesn't appear to be used anywhere, but if I missed it and it is
## replace references to $ip with $nofseq
## DELETE my $ip = @input_seqs;
####
sub getlmersfromseq {
my ($seqsarr,$l)= @_;
my @lmers;
@lmers = map {substrings $_, $l} @{$seqsarr};
my @uniq_lmers = uniq @lmers;
return @uniq_lmers;
}
####
sub getlmersfromseq {
my ($seqsarr,$l)= @_;
return uniq map {substrings $_, $l} @{$seqsarr};
}
####
sub gen_param {
my ( $file, $file_neg, $nofseq ) = @_;
my @param_groups;
foreach my $wlen ( 8, 15, 20 ) {
foreach my $fract ( 0.8, 0.5 ) {
foreach my $q ( $nofseq, $nofseq * 1.5 ) {
push @param_groups, {
file => $file,
file_neg => $file_neg,
submt_len => 5,
submt_d => 1,
e => 0,
W_size => $wlen,
lp => $fract * $wlen,
support_threshold => $q,
min_inst_lower => $q,
min_inst_upper => ( 3 * $q ),
polyTA_lim => 0.8,
poly_lim => 0.8,
};
}
}
}
return @param_groups;
}
####
$hash{ 'some complicated key' } = [@matches];
####
$hash{ 'some complicated key' } = \@matches;
####
my $output = main_process( @{$_}{
qw/ file file_neg submt_len submt_d e W_size lp support_threshold min_inst_lower min_inst_upper polyTA_lim poly_lim /
});
$result{ 'ParamGroup' . $count++ } = $output;
####
} # ----- end foreach $mcands -----
return;
}