#!/usr/bin/env perl use strict; use warnings; use Statistics::Descriptive; use List::Util qw/min max/; my $stat = Statistics::Descriptive::Full->new(); #my @data = (4, 4, 3, 2); # "A" data #my @data = (1, 5, 6); # "B" data #my @data = (1, 80000, 2, 4, 1200); # "C" data my @data = (0.1, 1500, 1700, 2100, 3200); # "D" data print "Starting data: ", join(", ", @data), "\n\n"; $stat->add_data(@data); # References # http://exploringdatablog.blogspot.com/2013/02/finding-outliers-in-numerical-data.html # https://en.wikipedia.org/wiki/Median_absolute_deviation my $median = $stat->median(); my @abs_res = map { abs($median - $_) } @data; my $abs_res_stat = Statistics::Descriptive::Full->new(); $abs_res_stat->add_data(@abs_res); my $MAD = $abs_res_stat->median(); my $t = 3; my $lower_limit = $median-$t*$MAD; my $upper_limit = $median+$t*$MAD; print " Median: $median\n"; print " MAD: $MAD\n"; print " t: $t\n\n"; print "Lower limit: $lower_limit\n"; print "Upper Limit: $upper_limit\n\n"; my @filtered_data; foreach my $datum (@data) { my $is_outlier = (($datum < $lower_limit) or ($datum > $upper_limit)) ? 1 : 0; unless($is_outlier) { push @filtered_data, $datum }; } print "Filtered data: ", join(", ", @filtered_data), "\n\n"; print "Minimum value of filtered data is: ", min(@filtered_data), "\n"; exit;