The algorithm is generating some FalsePositives, hence added Fine Tuning through iterations

use warnings; use strict; use Data::Dumper; #my @data = map {rand} 1..100; my @dt = (1,2,3,40,40,40,40,42,43,45,80,85,90,91,91,91,91,4,9,10); my @clustercenters = getClusterCenters(3,@dt); @clustercenters = sort { $a <=> $b } @clustercenters; my ($low, $medium, $high) = @clustercenters; my %tags = ( $low => "low", $medium => "medium", $high =>"high", ); print ("\n\n $low \t$medium \t$high\n"); print "\nclosest(12): ", $tags{ closest(12, @clustercenters) }; print "\nclosest(43): ", $tags{ closest(43, @clustercenters) }; print "\n"; sub closest { my ($val,@arr) = @_; my @list = sort { abs($a - $val) <=> abs($b - $val) } @arr; return $list[0]; } sub getClusterCenters{ my ($n, @data) = @_; my $iter = 4; my @centers = (); for (1..$iter){ my @clustercenters = get1DClusterCenters($n,@data); @clustercenters = sort { $a <=> $b } @clustercenters; print "\n",join("\t", @clustercenters); my @tcenters = @clustercenters; for(my $i=0; $i <= $#clustercenters; $i++){ $centers[$i] += +$clustercenters[$i]; } } print "\n",join("\t", @centers ); @centers = map { $_ = $_ / $iter; } @centers; return @centers; } # It takes a 1D array of values and returns centers of clusters sorted sub get1DClusterCenters{ my ($num_clust, @data) = @_; my $tol = 0.001; # stopping tolerance # initialize by choosing random points the data my @center = @data[ map {rand @data} 1..$num_clust ]; my $diff; my @members; my @cluster; do { $diff = 0; # Assign points to nearest center my @cluster; foreach my $point (@data) { my $closest = 0; my $dist = abs $point - $center[ $closest ]; for my $idx (1..$#center) { if (abs $point - $center[ $idx ] < $dist) { $dist = abs $point - $center[ $idx ]; $closest = $idx; } } push @cluster, [$point, $closest]; } # compute new centers foreach my $center_idx (0..$#center) { @members = grep {$_->[1] == $center_idx} @cluster; my $sum = 0; # print "\n\n** group $center_idx \n"; foreach my $member (@members) { # print "\t ",$member->[0]; $sum += $member->[0]; } my $new_center = @members ? $sum / @members : $center[ $ce +nter_idx ]; $diff += abs $center[ $center_idx ] - $new_center; $center[ $center_idx ] = $new_center; } } while ($diff > $tol); #print "Centers are:\n"; my @cluster_means = (); foreach my $center_idx (0..$#center) { #print "\n$center_idx $center[ $center_idx ]\n"; push (@cluster_means, int($center[ $center_idx ]) ); } @cluster_means = sort { $a <=> $b } @cluster_means; # print "\nCLUSTER MEANS: ", join(",", @cluster_means); return @cluster_means; }

In reply to Re^2: Making sense of data: Clustering OR A coding challenge by mahesh557
in thread Making sense of data: Clustering OR A coding challenge by belg4mit

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post, it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.