in reply to Re: How to best eliminate values in a list that are outliers
in thread How to best eliminate values in a list that are outliers

Here is an attempt at implementing the Hampel identifier method that is mentioned in the article. However, reliable identification of outliers is problematic with so few datapoints.
#!/usr/bin/env perl use strict; use warnings; use Statistics::Descriptive; use List::Util qw/min max/; my $stat = Statistics::Descriptive::Full->new(); #my @data = (4, 4, 3, 2); # "A" data #my @data = (1, 5, 6); # "B" data #my @data = (1, 80000, 2, 4, 1200); # "C" data my @data = (0.1, 1500, 1700, 2100, 3200); # "D" data print "Starting data: ", join(", ", @data), "\n\n"; $stat->add_data(@data); # References # http://exploringdatablog.blogspot.com/2013/02/finding-outliers-in-nu +merical-data.html # https://en.wikipedia.org/wiki/Median_absolute_deviation my $median = $stat->median(); my @abs_res = map { abs($median - $_) } @data; my $abs_res_stat = Statistics::Descriptive::Full->new(); $abs_res_stat->add_data(@abs_res); my $MAD = $abs_res_stat->median(); my $t = 3; my $lower_limit = $median-$t*$MAD; my $upper_limit = $median+$t*$MAD; print " Median: $median\n"; print " MAD: $MAD\n"; print " t: $t\n\n"; print "Lower limit: $lower_limit\n"; print "Upper Limit: $upper_limit\n\n"; my @filtered_data; foreach my $datum (@data) { my $is_outlier = (($datum < $lower_limit) or ($datum > $upper_limi +t)) ? 1 : 0; unless($is_outlier) { push @filtered_data, $datum }; } print "Filtered data: ", join(", ", @filtered_data), "\n\n"; print "Minimum value of filtered data is: ", min(@filtered_data), "\n +"; exit;