# randomly choose subsets of items from a set, without replacement. # items in the set are not equally probable. use strict; use warnings; # recursively build a balanced binary search tree of weighted items. # this is not terribly efficient, and could be improved. sub build_tree { my ($weights) = @_; my @items = sort keys %$weights; return build_tree_rec($weights, @items); } # build_tree sub build_tree_rec { my ($weights, @items) = @_; return unless @items; my $mid = int(@items / 2); my $tree = { }; $tree->{item} = $items[$mid]; $tree->{left} = build_tree_rec($weights, @items[0..($mid-1)]); $tree->{right} = build_tree_rec($weights, @items[($mid+1)..$#items] +); $tree->{weight} = $weights->{ $tree->{item} }; calc_weight($tree); return $tree; } # build_tree_rec # change the weight of an item in the tree. # this is written non-recursively for speed. sub change_weight { my ($item, $weight, $tree) = @_; my @path; while ($tree && $item ne $tree->{item}) { push @path, $tree; if ($item lt $tree->{item}) { $tree = $tree->{left}; } else { $tree = $tree->{right}; } } return unless $tree; # item not found $tree->{weight} = $weight; # recalculate weights as necessary calc_weight($tree); while (@path) { $tree = pop @path; calc_weight($tree); } } # change_weight # calculate the total weight of a tree. assumes that the weights # of the left and right subtrees have already been calculated. sub calc_weight { my ($tree) = @_; my $left = $tree->{left}; my $right = $tree->{right}; $tree->{total_weight} = $tree->{weight}; $tree->{total_weight} += $left->{total_weight} if $left; $tree->{total_weight} += $right->{total_weight} if $right; } # calc_weight # randomly choose an item from the tree. sub choose_item { my ($tree) = @_; my $val = rand($tree->{total_weight}); while ($val >= $tree->{weight}) { $val -= $tree->{weight}; my $left = $tree->{left}; if ($left && $val < $left->{total_weight}) { $tree = $left; } else { $val -= $left->{total_weight} if $left; my $right = $tree->{right}; if ($right && $val < $right->{total_weight}) { $tree = $right; } else { # if we get here, there has been some funny round-off in $val. # this should be very unlikely. don't search any further # or we might move into a branch with zero weight. last; } } } # while $val return $tree->{item}; } # choose_item # randomly choose several different items from the tree. # you need to pass in the weights of the items, so they can be # restored at the end. this is inconvenient, but could be fixed. # this runs in O(k*log n) time, where there are n items # in the tree and k are being chosen. sub choose_items { my ($num, $weights, $tree) = @_; my @items; for (1 .. $num) { my $item = choose_item($tree); push @items, $item; change_weight($item, 0, $tree); # don't pick this item again! } foreach my $item (@items) { change_weight($item, $weights->{$item}, $tree); } return @items; } # choose_items # exercise the trees a bit. my %weights = ( a => 1, b => 1, c => 1.5, d => .5, ); my $tree = build_tree(\%weights); for (1..20) { my @items = choose_items(2, \%weights, $tree); print "@items\n"; }