in reply to Points on a line and associated intervals
The goal is to produce the highest quality set of points at the most uniform spacing possible.
Your algorithm as described will not achieve this. It will effectively (though not quite), select the 10 highest quality points, but do it in a very complicated way.
Although you are selecting the smallest interval to discard a point from each time, when you consolidate the remaining point to a new interval, you are taking no account of the relative size of the new intervals you are creating.
This is a (fairly crude) implementation of your algorithm, and the results it typically produces from a randomly generated set of data. I've marked the values retained from the input dataset with an asterix:
#! perl -slw use strict; use Data::Dumper; use List::Util qw[ reduce first ]; our $N ||= 100; our $MIN ||= 10; my @data = sort{ $a->[ 0 ] <=> $b->[ 0 ] } map{ [ rand 1000, int rand +10 ] } 1 .. $N; print Dumper \@data; my @intervals; reduce { push @intervals, [ abs( $data[ $a ][ 0 ] - $data[ $b ][ 0 ] ), $a, + $b ]; $b; } 0 .. $#data; @intervals = sort { $a->[0] <=> $b->[0] } @intervals; while( grep( defined, @data ) > $MIN ) { my( $i, $x, $y ) = @{ shift @intervals }; next unless defined $data[ $x ] and defined $data[ $y ]; my $discard = $data[ $x ][ 1] < $data[ $y ][ 1 ] ? $x : $y; delete $data[ $discard ]; if( $discard == $x ) { --$x while $x > 0 and not defined $data[ $x ]; next if $x == 0 or not defined $data[ $x ]; } else { ++$y while $y < $#data and not defined $data[ $y ]; next if $y == $#data or not defined $data[ $y ]; } my $newInterval = [ abs( $data[ $x ] - $data[ $y ] ), $x, $y ]; my $insertPoint = first{ $intervals[ $_ ][0] > $newInterval->[0] } + 0 .. $#intervals; $insertPoint = $#intervals unless defined $insertPoint; splice @intervals, $insertPoint, 0, $newInterval; } @data = grep defined, @data; print Dumper \@data; __END__ $VAR1 = [ [ '10.101318359375', 0 ], [ '15.472412109375', 8 ], * [ '21.209716796875', 9 ], [ '41.80908203125', 1 ], [ '45.379638671875', 0 ], * [ '72.6318359375', 7 ], [ '80.047607421875', 1 ], [ '89.019775390625', 0 ], * [ '94.90966796875', 8 ], [ '101.470947265625', 6 ], [ '133.36181640625', 5 ], [ '142.913818359375', 6 ], [ '151.2451171875', 6 ], [ '168.15185546875', 2 ], [ '169.403076171875', 7 ], [ '174.01123046875', 5 ], [ '182.037353515625', 8 ], [ '185.9130859375', 7 ], [ '223.57177734375', 1 ], [ '231.99462890625', 7 ], [ '236.02294921875', 5 ], [ '262.359619140625', 3 ], [ '263.031005859375', 7 ], [ '279.1748046875', 1 ], [ '280.303955078125', 0 ], [ '289.73388671875', 8 ], [ '308.41064453125', 3 ], [ '317.657470703125', 1 ], [ '324.5849609375', 6 ], * [ '341.033935546875', 9 ], [ '343.170166015625', 0 ], [ '353.271484375', 3 ], [ '355.438232421875', 6 ], [ '371.27685546875', 3 ], [ '379.241943359375', 3 ], [ '382.62939453125', 0 ], * [ '386.23046875', 9 ], [ '389.34326171875', 3 ], [ '394.47021484375', 0 ], [ '399.4140625', 1 ], [ '407.440185546875', 2 ], [ '425.201416015625', 6 ], [ '426.361083984375', 0 ], [ '438.65966796875', 3 ], [ '447.93701171875', 5 ], [ '455.810546875', 7 ], [ '457.45849609375', 1 ], [ '465.51513671875', 5 ], [ '468.8720703125', 6 ], [ '475.799560546875', 4 ], [ '491.14990234375', 5 ], [ '496.2158203125', 9 ], [ '503.570556640625', 7 ], [ '511.566162109375', 2 ], [ '543.3349609375', 9 ], [ '561.767578125', 6 ], [ '563.690185546875', 0 ], [ '571.8994140625', 2 ], [ '574.89013671875', 3 ], [ '595.21484375', 7 ], [ '600.03662109375', 3 ], [ '608.062744140625', 1 ], [ '615.081787109375', 8 ], [ '631.195068359375', 9 ], [ '634.33837890625', 2 ], [ '638.031005859375', 5 ], [ '639.251708984375', 7 ], [ '672.30224609375', 8 ], [ '672.698974609375', 5 ], [ '678.64990234375', 3 ], [ '690.73486328125', 7 ], [ '710.63232421875', 5 ], [ '718.475341796875', 4 ], [ '761.3525390625', 2 ], [ '770.69091796875', 9 ], [ '790.6494140625', 1 ], * [ '796.417236328125', 8 ], [ '807.647705078125', 4 ], [ '809.38720703125', 1 ], [ '809.9365234375', 3 ], * [ '813.873291015625', 5 ], [ '819.76318359375', 4 ], [ '821.6552734375', 4 ], [ '833.80126953125', 5 ], * [ '843.170166015625', 8 ], [ '843.505859375', 2 ], [ '861.99951171875', 4 ], [ '865.6005859375', 5 ], [ '866.14990234375', 6 ], [ '871.124267578125', 4 ], * [ '890.960693359375', 9 ], [ '911.102294921875', 1 ], [ '912.353515625', 7 ], [ '919.708251953125', 1 ], [ '922.91259765625', 8 ], [ '956.695556640625', 2 ], [ '973.388671875', 5 ], * [ '977.142333984375', 8 ], [ '993.438720703125', 8 ], [ '998.1689453125', 5 ] ]; $VAR1 = [ [ '21.209716796875', 9 ], [ '72.6318359375', 7 ], [ '94.90966796875', 8 ], [ '341.033935546875', 9 ], [ '386.23046875', 9 ], [ '796.417236328125', 8 ], [ '813.873291015625', 5 ], [ '843.170166015625', 8 ], [ '890.960693359375', 9 ], [ '977.142333984375', 8 ] ];
As you can see, not quite the results you are after.
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: Points on a line and associated intervals
by srdst13 (Pilgrim) on May 18, 2006 at 10:56 UTC | |
by BrowserUk (Patriarch) on May 18, 2006 at 12:28 UTC |