in reply to Re: Equation - code review
in thread Equation - code review

Hi,
Thanks for your valuable comments.
For your first two question,
>>
I just copied the method from my module and created a dummy arguments around it. I can't paste the entire module, since dependency is more and the security. I apologize for that.

Here I have Updated my code based on your comments, please review it
#!c:\perl\bin\perl use strict; use warnings; use Math::BigInt; use Data::Dumper; my %index_val = ( A=> 2, C => 3, D => 4, N => 2, M => 2, T => 1, G => 4); my $sequence="ACTGACN"; my $distance_index= int(rand(4)); my $window=int(rand(3)); my $arr_ref = alpha_index({ sequence => $sequence, distance => $distance_index, aa_index => \%index_val, window_size => $window }); print STDERR Dumper($arr_ref); sub alpha_index { my $self=shift; my $sequence=$self->{sequence}; my $distance=$self->{distance}; my $aa_index=$self->{aa_index}; my @carbon_index; my $window_size= $self->{window_size}; if($self->{window_size} ==1) { $window_size=1;} elsif($self->{window_size} > 1 ) { $window_size = int(($self->{window_size}-1)/2); } else { $window_size=0; } foreach (0.. (length($sequence)-1)) { my %carbon_aa; my ($left,$middle,$right) = $sequence =~ m/(\w{$_})?(\w{1} +)?(\w*)/; $carbon_aa{$middle}=$aa_index->{$middle} if($middle); $left=reverse($left); my $initial_index=2; my $length_index= ($window_size) ? $window_size:length($left); if($self->{window_size} ==1 ) { push @carbon_index, \%carbon_aa if(%carbon_aa); next; } foreach my $increment (0..$length_index-1) { my $left_aa=substr($left,$increment,1); next if not exists $aa_index->{$left_aa}; my $power=Math::BigInt->new($initial_index); my $distance_index=($power->bpow($distance))->numify() +; $carbon_aa{$middle}+= ( $aa_index->{$left_aa} * (1/$distance_index)); $initial_index++; } $initial_index=2; $length_index= ($window_size)?$window_size:length($right); foreach my $increment (0..$length_index-1) { my $right_aa=substr($right,$increment,1); next if not exists $aa_index->{$right_aa}; my $power=Math::BigInt->new($initial_index); my $distance_index=($power->bpow($distance))->numify() +; $carbon_aa{$middle}+= ( $aa_index->{$right_aa} * ( 1/$distance_index)); $initial_index++; } push @carbon_index, \%carbon_aa if(%carbon_aa); } return \@carbon_index; }

Thanks in advance.
- kulls

Replies are listed 'Best First'.
Re^3: Equation - code review
by imp (Priest) on Aug 16, 2006 at 08:53 UTC
    Before you start looking for areas that you can optimize you should look for areas that should be optimized. You can do this using Benchmark and splitting the function into working pieces, and then testing those pieces.

    This line strikes me as possibly inefficient because it creates a new regex for each iteration, which means 1000 regex for a 1000 character sequence.

    ($left,$middle,$right) = $sequence =~ m/(\w{$_})?(\w{1})?(\w*)/;
    So I wrote a benchmark to try a few strategies: Results for running 500 iterations with length 7:
              Rate re_orig  re_set bsubstr
    re_orig  681/s      --    -12%    -92%
    re_set   774/s     14%      --    -91%
    bsubstr 8475/s   1144%    995%      --
    
    Results for running 500 iterations with length 100:
              Rate re_orig  re_set bsubstr
    re_orig 48.1/s      --    -12%    -93%
    re_set  54.9/s     14%      --    -92%
    bsubstr  718/s   1392%   1209%      --
    
    Results for running 100 iterations with length 1000:
              Rate  re_set bsubstr
    re_set  4.00/s      --    -93%
    bsubstr 59.5/s   1387%      --
    
    
Re^3: Equation - code review
by GrandFather (Saint) on Aug 16, 2006 at 11:13 UTC

    With a little tidying (to my eye anyway) and applying suggestions from imp and BrowserUK the following code gives the same answers as the original for the small range of values I've tested and ought to be somewhat faster.

    #!c:\perl\bin\perl use strict; use warnings; my %index_val = ( A=> 2, C => 3, D => 4, N => 2, M => 2, T => 1, G => 4); my $sequence="ACTGACN"; my $distance_index = 2; my $window = 2; my $arr_ref = &alpha_index({ sequence => $sequence, distance => $distance_index, aa_index => \%index_val, window_size => $window }); for my $hashRef (@$arr_ref) { print join "\n", map {"$_ => $hashRef->{$_}"} keys %$hashRef; print "\n"; } sub alpha_index { my $self=shift; my $sequence=$self->{sequence}; my $distance=$self->{distance}; my $aa_index=$self->{aa_index}; my @carbon_index; my $window_size = int $self->{window_size}; $window_size = int ($window_size - 1) / 2 if $window_size > 1; foreach my $len (0 .. (length($sequence)-1)) { my %carbon_aa; my ($left, $middle, $right) = splitStr ($sequence, $len); $carbon_aa{$middle} = $aa_index->{$middle} if $middle; $left = reverse $left; my $initial_index = 2; my $length_index= $window_size ? $window_size : length $left; if($self->{window_size} == 1 ) { push @carbon_index, \%carbon_aa if %carbon_aa; next; } foreach my $increment (0 .. $length_index-1) { my $left_aa=substr($left,$increment,1); next if not exists $aa_index->{$left_aa}; my $distance_index= $initial_index ** $distance; $carbon_aa{$middle} += $aa_index->{$left_aa} * (1/$distanc +e_index); $initial_index++; } $initial_index=2; $length_index= $window_size ? $window_size : length $right; foreach my $increment (0 .. $length_index-1) { my $right_aa=substr ($right, $increment, 1); next if not exists $aa_index->{$right_aa}; my $distance_index= $initial_index ** $distance; $carbon_aa{$middle} += $aa_index->{$right_aa} * (1 / $dist +ance_index); $initial_index++; } push @carbon_index, \%carbon_aa if %carbon_aa; } return \@carbon_index; } sub splitStr { return ( substr($_[0], 0, $_[1]), substr($_[0], $_[1], 1), substr($_[0], $_[1] + 1) ); }

    DWIM is Perl's answer to Gödel