in reply to Re: Average Price Algorithm
in thread Average Price Algorithm
The algorithm below is a hybrid of the one proposed by Limbic-Region and the one I originally proposed. Smallest buckets are filled first (as per Limbic-Region), but when we encounter N equal sized buckets we allocate items N (one per bucket) at a time until we can't. Then we go back to allocating items to buckets one by one. This prevents the first of N buckets from hogging all the "good" values.
I'd still very much appreciate it if someone could find an example or two that doesn't work with this algorithm. Though I'm not convinced that this problem is anything like NP complete, I'm sure the code I've posted has room for improvement.
use strict; use warnings; sub demoAllocation($$$); demoAllocation ("Distribution: all at mean" , {a=>1000,b=>2000,c=>3000} , { '6.0' => 6000 } ); demoAllocation ("Distribution: unskewed" , {a=>1000,b=>2000,c=>3000} , { '3.0' => 300, '4.0' => 600, '5.0' => 700, '5.5' => 900 , '6.0' => 1000 , '6.5' => 900, '7.0' => 700, '8.0' => 600, '9.0' => 300 } ); demoAllocation ("Distribution: skewed" , {a=>1000,b=>2000,c=>3000} , { '3.0' => 4000, '12.0' => 2000 } ); demoAllocation ("Distribution: skewed: Nasty (Relatively prime)" , {a=>3, b=>3} , {'1.0' => 4, '2.0' => 1, '4.0' => 1 } ); demoAllocation ("Distribution: skewed: Nasty (Grandfather)" , {a=>30, b=>30} , {'1.0' => 40, '2.0' => 10, '4.0' => 10 } ); demoAllocation ("Distribution: Original poster" , {A=>65, B=>12, C=>24, D=>19, E=>30} , {'93.8' => 5, '93.81' => 20, '93.82' => 10 , '93.83' => 15, '93.84' => 25, '93.85' => 5 , '93.87'=>20, '94.0' => 5, '94.1' => 35 , '94.2'=> 10 } ); demoAllocation ("Distribution: camelback: Nasty mark II (Grandfather)/2 " , { a => 15, b => 10, c => 5 } , {'1.0' => 15, '2.0' => 6, '4.0' => 3, '8.0' => 6 } ); demoAllocation ("Distribution: camelback: Nasty mark II (Grandfather)" , { a => 30, b => 20, c => 10 } , {'1.0' => 30, '2.0' => 12, '4.0' => 6, '8.0' => 12 } ); demoAllocation ("Distribution: flat: Limbic~Region" , { a => 3, b => 4, c => 2, d => 2 } , {'1.0' => 1, '2.0' => 1, '3.0' => 1, '4.0' => 1 , '5.0' => 1, '6.0' => 1, '7.0' => 1, '8.0' => 1 , '9.0' => 1, '10.0' => 1, '11.0' => 1 } ); #------------------------------------------------------------ sub demoAllocation($$$) { my ($sDescription, $hBuckets, $hFrequency) = @_; print "$sDescription\n"; my ($dAvg, $hAllocation) = allocate($hBuckets, $hFrequency); foreach my $sId (sort keys %$hAllocation) { my $hItems = $hAllocation->{$sId}; my $dSum = 0; my $iCount = 0; my ($dBucketAvg, $dDeviation); my $iBucketSize = $hBuckets->{$sId}; print "$sId:"; foreach my $dValue (sort keys %$hItems) { my $iFreq = $hItems->{$dValue}; printf "\t%s \@ \$%.2f\n", $iFreq, $dValue; $dSum += $dValue*$iFreq; $iCount += $iFreq; } $dBucketAvg = $dSum/$iCount; $dDeviation = $dBucketAvg - $dAvg; if ($iBucketSize != $iCount) { printf "\t**ERROR**: bucket size: %d, actual allocation: %d\n" , $iBucketSize, $iCount; } printf "\tcount(e/a): %d/%d, bucket avg: \$%.2f, " ."deviation: \$%.3f\n\n" , $iBucketSize, $iCount, $dBucketAvg, $dDeviation; } print "\n"; } #------------------------------------------------------------ sub allocate($$) { my ($hBuckets, $hFrequency) = @_; #calculate deviations from the mean my $dAvg=calcWeightedAvg($hFrequency); my ($iFreqAvg, $aAbove, $aBelow) = calcDeviations($hFrequency, $dAvg); #sort buckets by size: smallest first my $hBucketsBySize = groupBucketsBySize($hBuckets); my @aBuckets = map { [ $_, $hBucketsBySize->{$_} ] } sort keys %$hBucketsBySize; #my @aBuckets = sort { $hBuckets->{$a} <=> $hBuckets->{$b} # } keys %$hBuckets; #allocate items to buckets, smallest first my %hAllocations; my $iFirstAbove = 0; my $iFirstBelow = 0; foreach my $aSameSizeBuckets (@aBuckets) { my $iSize = $aSameSizeBuckets->[0]; my $aIds = $aSameSizeBuckets->[1]; #my $iSize = $hBuckets->{$sId}; fillBucket($iSize, \%hAllocations, $aIds , $dAvg, \$iFreqAvg , $aAbove, \$iFirstAbove , $aBelow, \$iFirstBelow); } return ($dAvg, \%hAllocations); } #------------------------------------------------------------ # SUPPORTING FUNCTIONS - alphabetical order #------------------------------------------------------------ sub calcDeviations($$) { my ($hFrequency, $dAvg) = @_; my @aAbove; my @aBelow; my $iFreqAvg = 0; #calculate deviations from mean while (my ($dValue,$iFreq) = each(%$hFrequency)) { if ($dValue == $dAvg) { $iFreqAvg+=$iFreq; next; } my $dDeviation = $dValue - $dAvg; if (0 < $dDeviation) { push @aAbove, [ $dDeviation, $dValue, $iFreq ]; } else { push @aBelow, [ -$dDeviation, $dValue, $iFreq ]; } } #sort with smallest deviations first return ( $iFreqAvg , [ sort { compareDeviations($a,$b) } @aAbove ] , [ sort { compareDeviations($a,$b) } @aBelow ] ); } #------------------------------------------------------------ sub compareDeviations($$) { my ($x, $y) = @_; return $x->[0] <=> $y->[0]; } #------------------------------------------------------------ sub calcWeightedAvg($) { my $hFrequency = shift @_; my $dSum=0; my $iCount=0; while (my ($dValue,$iFreq) = each(%$hFrequency)) { $dSum+=$dValue*$iFreq; $iCount+=$iFreq; } return $dSum/$iCount; } #------------------------------------------------------------ sub fillBucket($$$$$$$) { my ($iSize, $hAllocations, $aIds , $dAvg, $rFreqAvg , $aAbove, $rFirstAbove , $aBelow, $rFirstBelow) = @_; my $iFilled = 0; #Phase I - allocate to all buckets simultaneously #fillBucketOneByOne($iSize, $hAllocations, $aIds, \$iFilled # , $dAvg, $rFreqAvg # , $aAbove, $rFirstAbove # , $aBelow, $rFirstBelow); #$iSize -= $iFilled; #Phase II - allocate to each bucket individually foreach my $sId (@$aIds) { next unless $iSize; $iFilled = 0; fillBucketOneByOne($iSize , $hAllocations, [$sId], \$iFilled , $dAvg, $rFreqAvg , $aAbove, $rFirstAbove , $aBelow, $rFirstBelow); } } #------------------------------------------------------------ sub fillBucketOneByOne($$$$$$$$$) { my ($iNeeded, $hAllocations, $aIds, $rFilled , $dAvg, $rFreqAvg , $aAbove, $rFirstAbove , $aBelow, $rFirstBelow) = @_; my $iBucketCount = scalar(@$aIds); #take items that are at the mean, if we can if ($iNeeded*$iBucketCount <= $$rFreqAvg) { foreach (@$aIds) { $hAllocations->{$_}->{$dAvg} = $iNeeded; $$rFreqAvg-=$iNeeded; } $$rFilled = $iNeeded; return; } elsif ($iNeeded % $iBucketCount) { return; } while ($iBucketCount <= $$rFreqAvg) { foreach (@$aIds) { $hAllocations->{$_}->{$dAvg}++; $$rFreqAvg--; } $$rFilled++; $iNeeded--; } my $aUp = $aAbove->[$$rFirstAbove]; my $aDown = $aBelow->[$$rFirstBelow]; my $dNetDeviation = 0; #take whatever creates the smallest net deviation # [0] deviation # [1] value # [2] frequency while ($iNeeded > 0) { my $bUseUp = 0; if ($aUp) { if ($aDown) { my $dNetUp = $dNetDeviation + $aUp->[0]; my $dNetDown = $dNetDeviation - $aDown->[0]; if (abs($dNetUp) < abs($dNetDown)) { $bUseUp = 1; $dNetDeviation = $dNetUp; } else { $bUseUp = 0; $dNetDeviation = $dNetDown; } } else { $bUseUp = 1; } } elsif ($aDown) { $bUseUp = 0; } else { return; } if ($bUseUp) { #$hItems->{$aUp->[1]} ++; return if ($aUp->[2] % $iBucketCount); foreach (@$aIds) { $hAllocations->{$_}->{$aUp->[1]} ++; $aUp->[2]--; } $$rFirstAbove++ unless $aUp->[2]; $aUp = $aAbove->[$$rFirstAbove]; } else { #$hItems->{$aDown->[1]} ++; return if ($aDown->[2] % $iBucketCount); foreach (@$aIds) { $hAllocations->{$_}->{$aDown->[1]} ++; $aDown->[2]--; } $$rFirstBelow++ unless $aDown->[2]; $aDown = $aBelow->[$$rFirstBelow]; } $$rFilled++; $iNeeded--; } #return $hItems; } #------------------------------------------------------------ sub groupBucketsBySize($) { my $hBuckets = shift @_; my $hBucketsBySize = {}; while (my ($sId, $iSize) = each (%$hBuckets)) { my $aIds = $hBucketsBySize->{$iSize}; $aIds = $hBucketsBySize->{$iSize} = [] unless $aIds; push @$aIds, $sId; } return $hBucketsBySize; }
Best, beth
Update 1: fixed bug in code that was originally posted.
Update 2: (Feb 3, 8:00 UTC) fixed bucket count bug identified below by Grandfather and Limbic~Region. I also made the output scream **ERROR** if this kind of mistake happens again. This is only a bug fix, however. The bucket counts are now right but the distributions discussed below - "Nasty mark II (Grandfather)" and "Limbic~Region" are still suboptimal.
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^3: Average Price Algorithm
by GrandFather (Saint) on Feb 03, 2009 at 01:13 UTC | |
|
Re^3: Average Price Algorithm
by ELISHEVA (Prior) on Feb 03, 2009 at 08:48 UTC | |
|
Re^3: Average Price Algorithm
by Limbic~Region (Chancellor) on Feb 03, 2009 at 02:50 UTC | |
by GrandFather (Saint) on Feb 03, 2009 at 03:03 UTC |