comment on

The algorithm is generating some FalsePositives, hence added Fine Tuning through iterations

use warnings;
use strict;
use Data::Dumper;



#my @data = map {rand} 1..100;
my @dt = (1,2,3,40,40,40,40,42,43,45,80,85,90,91,91,91,91,4,9,10);
my @clustercenters =  getClusterCenters(3,@dt);
@clustercenters = sort { $a <=> $b } @clustercenters;
my ($low, $medium, $high) = @clustercenters;
my %tags = ( $low => "low", 
             $medium => "medium", 
             $high =>"high",
             );

print ("\n\n $low \t$medium \t$high\n");
print "\nclosest(12): ", $tags{ closest(12, @clustercenters) };
print "\nclosest(43): ", $tags{ closest(43, @clustercenters) };
print "\n";

sub closest {
    my ($val,@arr) = @_;
    my @list = sort { abs($a - $val) <=> abs($b - $val) } @arr;
    return $list[0];
}

sub getClusterCenters{
    my ($n, @data) = @_;
    my $iter = 4;
    my @centers = ();

    for (1..$iter){
        my @clustercenters =  get1DClusterCenters($n,@data);
        @clustercenters = sort { $a <=> $b } @clustercenters;
        print "\n",join("\t", @clustercenters);
        my @tcenters = @clustercenters;

        
        for(my $i=0; $i <= $#clustercenters; $i++){   $centers[$i] += 
+$clustercenters[$i];   }
        
    }

    print "\n",join("\t", @centers );
    @centers = map { $_ = $_ / $iter; } @centers;

    return @centers;
}

# It takes a 1D array of values and returns centers of clusters sorted
sub get1DClusterCenters{    
    my ($num_clust, @data) = @_;

    my $tol = 0.001;    # stopping tolerance

    # initialize by choosing random points the data
    my @center = @data[ map {rand @data} 1..$num_clust ];

    my $diff;
    my @members;
    my @cluster;

    do {
        $diff = 0;

        # Assign points to nearest center
        my @cluster;
        foreach my $point (@data) {
            my $closest = 0;
            my $dist = abs $point - $center[ $closest ];
            for my $idx (1..$#center) {
                if (abs $point - $center[ $idx ] < $dist) {
                    $dist = abs $point - $center[ $idx ];
                    $closest = $idx;
                }
            }
            push @cluster, [$point, $closest];
        }

        # compute new centers
        foreach my $center_idx (0..$#center) {
            @members = grep {$_->[1] == $center_idx} @cluster;
            my $sum = 0;
          #  print "\n\n** group $center_idx \n";
            foreach my $member (@members) {
           #     print "\t ",$member->[0];
                $sum += $member->[0];
            }
            my $new_center = @members ? $sum / @members : $center[ $ce
+nter_idx ];
            $diff += abs $center[ $center_idx ] - $new_center;
            $center[ $center_idx ] = $new_center;
        }

    } while ($diff > $tol);

    #print "Centers are:\n";
    my @cluster_means = ();

    foreach my $center_idx (0..$#center) {
        #print "\n$center_idx    $center[ $center_idx ]\n";
        push (@cluster_means, int($center[ $center_idx ]) );  
    }
    @cluster_means = sort { $a <=> $b } @cluster_means;

   # print "\nCLUSTER MEANS: ", join(",", @cluster_means);
   return @cluster_means;

}
[download]

In reply to Re^2: Making sense of data: Clustering OR A coding challenge by mahesh557
in thread Making sense of data: Clustering OR A coding challenge by belg4mit

Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!

Titles consisting of a single word are discouraged, and in most cases are disallowed outright.

Read Where should I post X? if you're not absolutely sure you're posting in the right place.

Please read these before you post! —

Posts may use any of the Perl Monks Approved HTML tags:

a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr

You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)

	For:		Use:
	&		`&`
	<		`<`
	>		`>`
	[		`[`
	]		`]`

Link using PerlMonks shortcuts! What shortcuts can I use for linking?

See Writeup Formatting Tips and other pages linked from there for more info.