Discarding the required number of largest or smallest is the only way to achieve your "efficiency" definition. This takes a second or two for random lists of 100,000 (by avoiding recalculating the average):
#! perl -slw use strict; use List::Util qw[ sum ]; use Math::Random::MT qw[ rand srand ]; our $MAX //= 2**24; our $N //= 1e5; our $S //= 1; our $TARGET //= 5e6; srand( $S ); my $nameRoot = 'fileAAAAA'; my %files = map { $nameRoot++ => int( rand $MAX ) } 1 .. $N; my @namesByValue = sort{ $files{ $a } <=> $files{ $b } } keys %files; my @orderedValues = @files{ @namesByValue }; my $ave = sum( @orderedValues ) / @orderedValues ; print "Original average: ", $ave; my @namesToDiscard; if( $ave > $TARGET ) { ## Loose some big ones. while( $ave > ( $TARGET + 500 ) ) { push @namesToDiscard, pop @namesByValue; my $change = ( pop( @orderedValues ) - $ave ) / @orderedValues +; $ave -= $change; } } else { ## loose some small ones; while( $ave < ( $TARGET + 500 ) ) { push @namesToDiscard, shift @namesByValue; my $change = ( $ave - shift @orderedValues ) / @orderedValues +; $ave += $change; } } printf "TARGET: $TARGET; Achieved ave:$ave (using %d files)\n", scalar @orderedValues; delete @files{ @namesToDiscard }; __END__ c:\test>858276 -TARGET=5e6 Original average: 8374785.96135 TARGET: 5e6; Achieved ave:5000496.12061175 (using 59762 files) c:\test>858276 -TARGET=10e6 Original average: 8374785.96135 TARGET: 10e6; Achieved ave:10000541.8121829 (using 80621 files) c:\test>858276 -TARGET=7.5e6 Original average: 8374785.96135 TARGET: 7.5e6; Achieved ave:7500462.85185227 (using 89593 files)
In reply to Re: Creating a sub-list constrained by an average while maintaining maximum number of original elements
by BrowserUk
in thread Creating a sub-list constrained by an average while maintaining maximum number of original elements
by Jack B. Nymbol
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |