sub WEED { my ($span, %matches) = @_; my %sets; my @newfast; my @sortedKeys; my $i = 0; while ($i < $num) { $sortedKeys[$i] = $i; $i++; } # Rather than the unsorted method of (keys %matches), I iterate through # the fastheaders array, which stores the fasta_ids of interest, in their # order within the file. This array will need updating at the end of this # subroutine WEEDFAST: for my $fasta_id (@fastheaders) { my $setscounter = 0; # We only want to keep groups that contain matches for all patterns next unless defined %{$matches{$fasta_id}}; # keep the user updated on program progression print "Currently analysing results from sequence:\n $fasta_id\n"; # The key $site below is an index value (0, 1, 2, ..), so incrementing # in numerical order makes the most sense. It corresponds to the user- # input patterns that were searched for for my $site ( @sortedKeys ) { # We only want to keep groups that contain matches for all patterns last unless defined @{$matches{$fasta_id}{$site}}; # We are looking for all matches within $span of EVERY match, so # for each match, find it's position, and the position $span away WEEDLOW: for my $low (@{$matches{$fasta_id}{$site}}) { my $lowerlimit = $low + 0; my $upperlimit = $span + $lowerlimit; # Now, any and all matches between $lowerlimit and $upperlimit should be # saved into an %HoA for my $sitekey ( @sortedKeys ) { next unless defined @{$matches{$fasta_id}{$sitekey}}; # this part should be self-explanatory, though it is the core of the # sub. For each element, where is it located? If it is below the lower # limit (only occurring when $lowerlimit defined by a different $sitkey) # then proceed to the next. If it is above the upperlimit, then there # won't be any more within the array, so skip to next sitekey @{$sets{$fasta_id}[$setscounter]{$sitekey}} = grep { $_ >= $lowerlimit and $_ <= $upperlimit } @{$matches{$fasta_id}{$sitekey}}; unless (@{$sets{$fasta_id}[$setscounter]{$sitekey}}) { %{$sets{$fasta_id}[$setscounter]} = (); next WEEDLOW; } } # a possibly redundant check to make sure we have matches from ALL patterns if (scalar keys %{$sets{$fasta_id}[$setscounter]} < $num) { %{$sets{$fasta_id}[$setscounter]} = (); } # Finally, if we are sure this is a hit, then increment setscounter # to get the next set $setscounter++ if scalar %{$sets{$fasta_id}[$setscounter]}; # closes for my low } #closes for my site } # Originally, I was getting arrays of empty subhashes, since $setscounter was # being initialized, though never having a subhash assigned to it. Therefore, # I threw this in. If there is a subhash present as the last element of the array, # the scalar call will come back with the bits being used (1/8, etc). Otherwise, # it will return 0. If it's 0, we pop that out, and there's no array remaining. unless (defined @{$sets{$fasta_id}}) { delete $sets{$fasta_id}; next WEEDFAST; } if (@{$sets{$fasta_id}}) { pop @{$sets{$fasta_id}} unless scalar %{$sets{$fasta_id}[$#{$sets{$fasta_id}}]}; } # this will reset the @fastarray array, since we have possibly removed fasta_id keys from # our hash of matches. if (@{$sets{$fasta_id}}) { push(@newfast,$fasta_id); } # similarly, we want to get rid of empty hash elements. else { delete $sets{$fasta_id}; } #closes for fastarray } # as @fastarray is global, this will have global effect, and we do not need to return it. @fastarray=@newfast; return %sets; #closes subroutine }