#!/usr/bin/perl
################################################################################################
#   QUINLANS ALGORITHM FOR DECISION TREE
#   Author : Sandhya Manickavasagam email- smanicak@syr.edu
################################################################################################

use DBI;
use Tree::DAG_Node;
use AI::DecisionTree;
use Graph::Easy;
$table ="income";
my $file_to_append = "C:\\Documents and Settings\\smanicka\\Desktop\\quinlans\\output_quinlans.txt";
open (MYFILE,">$file_to_append")or warn "Can't open file to append";
print MYFILE "\t\t\tCSE 787 - ANALYTICAL DATA MINING - PROJECT 2\n================================================================================\n\n";
print MYFILE "\n\n\t\t\tQUINLAN'S DECISION TREE ALGORITHM\n---------------------------------------------------------------------------------\n";
print MYFILE "\n\t\t\tName :\t Sandhya Manickavasagam\n\t\t\tSU-ID:660185882\n================================================================================\n\n";
my $dtree = new AI::DecisionTree(noise_mode=>'pick_best');

$dbh = DBI->connect('dbi:ODBC:driver=microsoft access driver (*.mdb);dbq=C:\Documents and Settings\smanicka\Desktop\quinlans\sample.mdb') or warn("Sorry,Cant connect to the table\n$DBI::errstr \n") or warn " Cannot connect to database";
my %data;
$query = qq(select count(*) from $table);
$sth = $dbh->prepare($query) or print "cannot prepare";
$sth ->execute();
$count_row = $sth->fetchrow_array();
my $query = "select * from $table";
print MYFILE "\nExecuting $query \n";
$sth = $dbh->prepare($query) or print "cannot prepare";
      $sth ->execute();
       while (@rows=$sth->fetchrow_array()) {
       $count_col =0;
                         foreach $row (@rows){
                        # print "$row,";
                         $count_col ++;
                         }
                        # print  "\n";
                         }

print MYFILE "There are $count_row rows of data in this table\n";
print MYFILE "\nThere are $sth->{NUM_OF_FIELDS} columns\n\nAssuming that the first column is the transaction number and hence not a part of the data used for mining : \n\nThe columns under consideration are :\n\n\t$sth->{NAME}->[1]\n\t$sth->{NAME}->[2]\n\t$sth->{NAME}->[3]\n\t$sth->{NAME}->[4]\n\t$sth->{NAME}->[5]\n";
for (my $i=1;$i<$count_col;$i++){

push (@columns_list,"$sth->{NAME}->[$i]");

}
print MYFILE "\nThe Distinct entries are : \n ";
foreach $c (@columns_list){

print MYFILE "\n$c\n================\n";
$query = "select distinct $c from $table";
#print "$query\n";

$sth = $dbh->prepare($query) or print "cannot prepare";
     $sth ->execute();
       while (@rows = $sth->fetchrow_array()) {
		 foreach $row (@rows){
                         print MYFILE "$row\n";
                         push((@{"$c"}),$row);
                        }
       }

 }



print MYFILE "\n";
foreach $c (@columns_list){

foreach $k ( @{"$c"}) {
#print "$k \n";
$occ = 0;
$query = qq(select $c from $table where $c = '$k');

$stmnt = qq(select $c from $table where $c = $k);
$sth = $dbh->prepare($query) or print "cannot prepare\n";
$sth ->execute() or (($sth = $dbh->prepare($stmnt) or print "cannot prepare") and $sth ->execute());

       while (@rows = $sth->fetchrow_array()) {
                $occ++;
                                 }

print MYFILE "$k occurs $occ times\n";
$ratio = $occ/$count_row;
#${$c}{"$k"} = $ratio ;
$data{"$k"} = $ratio;


}
print MYFILE "\n";
}

# print "\n";
#foreach $c (@columns_list){
#print "\n$c\n";
#foreach $r (@{"$c"}){
# print "$r \t";
#}
#}

print MYFILE "\nStoring the entries into a hash table\n------------------------------------------------------\n";
while (($key, $value) = each(%data)){
     print MYFILE $key.", ".$value."\n";
}

# Calculate ENTROPY
 sub log2 {
 my $n = $_;
 return log($n)/log(2);
 }

#gain of each column should be calculated and column with max gain should become root

#my $root = Tree::DAG_Node->new();
#$root->name("Outlook");
#$new_daughter = Tree::DAG_Node->new();
#  $new_daughter->name("");
#  $root->add_daughter($new_daughter);


# A set of training data for deciding whether to play
$query = qq(select * from $table);
print MYFILE "\nExecuting $query \n";
$sth = $dbh->prepare($query) or print "cannot prepare";
$sth ->execute();
while(@rows = $sth->fetchrow_array()){
#if($rows[4] eq 1){
#$rows[4] = "yes";
#}
#else{
#$rows[4] = "no";
#}

foreach $r (@rows){

print "$r \t\t";
}

print "\n";
#}
################################################################################
# For the Most part, the entire program will work for any given data.If a new  #
# table needs to be used, please change the mappings below to reflect the new  #
# table values.                                                                #
################################################################################
$dtree->add_instance
    (attributes=> {Income=> qq($rows[1]),
    		   Student=> qq ($rows[2]),
                   Credit_Rating=> qq($rows[3])
                  },

     result => qq($rows[4]));
 }


$dtree ->train();
@rules =  $dtree->rule_statements();

print MYFILE "\n Mining the Data \n==========================================\n";
$i =0;
print MYFILE "\n\nDecision Rules \n================================================\n";
foreach $rule (@rules){
print MYFILE "$rule \n";
$i = $i + 1;
@{"arr".$i} = split (/\s+/,$rule);
foreach $a (@{"arr".$i}){
#print "$a \t";
if ($a eq "if") {
next;
}
if ($a eq "and"){
last;
}
else{
if ($a =~ m/=/){
@ar = split (/=/,$a);
$root_of_tree = "<root>$ar[0]</root> \n";
}
}
}
}

print MYFILE "\n\n DECISION TREE |N=======================================\n\n$root_of_tree\n";
print "\n$root_of_tree\n";
@rules =  $dtree->rule_statements();
$k = 0;
foreach $rule (@rules){
$k ++;
}

foreach $rules (@rules){
@rule_arr = split (/\s+/,$rules);
for($t =0;$t< $#rule_arr;$t++){
my @node;
if ($rule_arr[$t] =~m/=/){
@leaf = split (/\=/,$rule_arr[$t]);
push(@node," <branch node = \"$leaf[0]\" attribute =\"$leaf[1]\" \/>");
}
if ($rule_arr[$t] =~ m/\-\>/){
print "\n <Class value>\"$rule_arr[$t+1]\"</Class value>\n\n";
print MYFILE  "\n <Class value>\"$rule_arr[$t+1]\"</Class value>\n\n";
}

foreach $n (@node){
print "$n \n";
print MYFILE "$n \n";
}
}
}


#if ($rule_arr[$t] =~ m/\=/){
#  @leaf = split (/\=/,$rule_arr);
#  print "Leaf node : $leaf[0]\n";
#  }
#}
#}