comment on

The following code may help. It uses a binary search to find the closest value in the spectrum data to each value in the subspectrum data. Because this requires a sort and elimination of duplicates pass there would be some virtue in factoring that code out so it is done once may be of advantage. The code below uses the sub gf2 to preprocess @spectrum for gf1.


sub gf2 {
    my ($spectrum, $subsp) = @_;
    my %unique;
    
    @unique{@spectrum} = ();
    
    my @uspectrum = sort {$a <=> $b} keys %unique;
    
    return gf1(\@uspectrum, $subsp);
}


sub gf1 {
    my ($spectrum, $subsp) = @_;
    my $sum = 0;

    for my $value (@$subsp) {
        my $mid = int (@$spectrum / 2);
        my $span = int (@$spectrum / 2);
        
        while (int $span) {
            if ($value < $spectrum->[$mid]) {
                $mid -= int (($span /= 2) + 0.5);
            } elsif ($value > $spectrum->[$mid]) {
                $mid += int (0.5 + ($span /= 2));
            } else {
                last;
            }
        }
        
        my $delta;
        my $left = $mid ? abs ($value - $spectrum->[$mid - 1]) : 1e6;
        my $right = $mid < $#$spectrum ? abs ($value - $spectrum->[$mi
+d + 1]) : 1e6;
        my $cent = abs ($value - $spectrum->[$mid]);
        $sum += (sort {$a <=> $b} ($left, $right, $cent))[0];
    }

    my $result = 1000;
    $result = @$subsp / $sum if $sum != 0;
    
    return $result;
}
[download]

Benchmark results are:

gf1 result: 2.11267605633804
gf2 result: 2.11267605633804
op result: 2.11267605633804

      Rate      op     gf2     gf1
op  7.57/s      --   -100%   -100%
gf2 7732/s 102012%      --    -10%
gf1 8582/s 113238%     11%      --
[download]

use warnings;
use strict;
use Benchmark qw(cmpthese);

my @spectrum = qw/174.0 102.99 71.62 70.53 82.49 69.05 18.94 102.99 71
+.62 70.53 82.49 69.05 18.94 102.99 71.62 70.53 82.49 69.05 18.94 102.
+99 71.62 70.53 82.49 69.05 18.94/;
my @subsp = qw(102.0 18.94 71.61 82.0 173.0 68.7);
my %unique;

@unique{@spectrum} = ();

my @uspectrum = sort {$a <=> $b} keys %unique;

print "gf1 result: ", gf1(\@uspectrum, \@subsp), "\n";
print "gf2 result: ", gf2(\@spectrum, \@subsp), "\n";
print "op result: ", op(\@subsp, \@spectrum, 3), "\n\n";


cmpthese (-5,
        {
        gf1 => sub {gf1(\@uspectrum, \@subsp)},
        gf2 => sub {gf1(\@spectrum, \@subsp)},
        op => sub {op(\@subsp, \@spectrum, 3)},
        }
    );

sub gf2 {
    my ($spectrum, $subsp) = @_;
    my %unique;
    
    @unique{@spectrum} = ();
    
    my @uspectrum = sort {$a <=> $b} keys %unique;
    
    return gf1(\@uspectrum, $subsp);
}


sub gf1 {
    my ($spectrum, $subsp) = @_;
    my $sum = 0;

    for my $value (@$subsp) {
        my $mid = int (@$spectrum / 2);
        my $span = int (@$spectrum / 2);
        
        while (int $span) {
            if ($value < $spectrum->[$mid]) {
                $mid -= int (($span /= 2) + 0.5);
            } elsif ($value > $spectrum->[$mid]) {
                $mid += int (0.5 + ($span /= 2));
            } else {
                last;
            }
        }
        
        my $delta;
        my $left = $mid ? abs ($value - $spectrum->[$mid - 1]) : 1e6;
        my $right = $mid < $#$spectrum ? abs ($value - $spectrum->[$mi
+d + 1]) : 1e6;
        my $cent = abs ($value - $spectrum->[$mid]);
        $sum += (sort {$a <=> $b} ($left, $right, $cent))[0];
    }

    my $result = 1000;
    $result = @$subsp / $sum if $sum != 0;
    
    return $result;
}

sub op {
    my ($request, $spectrum, $limit) = @_[0..3];

    my @spectrum = @$spectrum;
    my @request  = sort {$a <=> $b} @$request;

    return 0, [] unless @request;
    return 0, [] if @spectrum < @request;
   
    my %good;
    foreach my $signal (@request) {
        my @good;
        return 0 unless (@good = grep { abs($_-$signal) <= $limit } @s
+pectrum);
        %good = (%good, map { ($_ => 1) } @good);
    }
    @spectrum = sort {$a <=> $b} grep { $good{$_} } @spectrum;
   
    my $alpha;
    my $best_subsp;
    my $alphabeta;
    $alphabeta = sub {
        my ($beta, $spectrum, $subsp, $found) = @_;
        unless (@$subsp) {
            $alpha = $beta;
            $best_subsp = $found;
            return;
        }

        my $compared = $subsp->[0];
        foreach my $sign_id (0..$#{$spectrum}) {
            my $diff = abs($spectrum->[$sign_id] - $compared);
            next if $diff > $limit;
            next if defined($alpha) && ($diff + $beta) >= $alpha;
            $alphabeta->($diff + $beta,
                [@{$spectrum}[0..$sign_id-1, $sign_id+1..$#{$spectrum}
+]],
                [@{$subsp}[1..$#{$subsp}]],
                [@$found, $spectrum->[$sign_id]],
            );
        }
        return;
    };
    $alphabeta->(0, \@spectrum, \@request, []);

    return 0 unless defined $alpha;
    return $alpha ? @request/$alpha : 1000;    
}
[download]

DWIM is Perl's answer to Gödel

In reply to Re: Calculate the similarity of two arrays of numbers by GrandFather
in thread Calculate the similarity of two arrays of numbers by Ieronim

Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!

Titles consisting of a single word are discouraged, and in most cases are disallowed outright.

Read Where should I post X? if you're not absolutely sure you're posting in the right place.

Please read these before you post! —

Posts may use any of the Perl Monks Approved HTML tags:

a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr

You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)

	For:		Use:
	&		`&`
	<		`<`
	>		`>`
	[		`[`
	]		`]`

Link using PerlMonks shortcuts! What shortcuts can I use for linking?

See Writeup Formatting Tips and other pages linked from there for more info.