#!/usr/bin/env perl

use strict;
use warnings;

# Uses:
#   Algorithm::Evolutionary
#   Perl module for performing paradigm-free evolutionary algorithms
#   by J. J. Merelo, jmerelo (at) geneura.ugr.es
# for estimating the optimal combination of base weights so that their
# sum makes a set target weight sum.
# there are some parameters to set like the target, the base weights
# and the maximum difference between an optimal solution and target.
# Solution for:
#     http://perlmonks.org/?node_id=1216334
# tested with small (4) and large (17) number of weights.
# Caveat: We are asked to minimise the number of base weights used
# as well as the discrepancy from the target sum weight.
# The fitness function must be crafted carefully in order to
# reflect the tradeoff between minimising the discrepancy
# and minimising the number of base weights used.
# What's more important?
# If the problem is laid out with details on how tolerances
# are to be used then we can work out a better fitness function
# right now it is a sum of gauss(discrepancy)
# and IMPORTANCE_OF_MINIMISING_NUM_WEIGHT_USED_OVER_DISCREPANCY/num_weights used
# IMPORTANCE_OF_MINIMISING_NUM_WEIGHT_USED_OVER_DISCREPANCY reflects said tradeoff.
# Author: bliako
# Date: 11/06/2018

use IO::Handle;
use Algorithm::Evolutionary::Experiment;
use Algorithm::Evolutionary::Op::Easy;
use Algorithm::Evolutionary::Op::Bitflip;
use Algorithm::Evolutionary::Op::Crossover;
use Algorithm::Evolutionary::Op::Mutation;
use Algorithm::Evolutionary::Op::Gene_Boundary_Crossover;

STDOUT->autoflush(1);
STDERR->autoflush(1);

my $time_started = time;
my $seed = $time_started;
srand($seed);

# break if no change in fitness for so many iterations (set to 0 for no effect)
use constant BREAK_ON_STALE => 20;
# 0 is a good setting:
use constant VERBOSE => 0;

# the available weights to combine with tolerances
# (which have no influence with this version)
# exact solution exists for these exact weights:
# [242,189,0,0,33,0,0,0,0,0,0,0,0,0,0,0,0] -> discr=0, fitne=0.399275613734753, weights used=3 : 242*0.01 + 189*0.12 + 0*1.1 + 0*2.2 + 33*5.3 + 0*8.3 + 0*9.3 + 0*11.3 + 0*13.3 + 0*15.3 + 0*17.3 + 0*19.5 + 0*29.5 + 0*39.5 + 0*45.5 + 0*50.1 + 0*100.6 = 200
my $weights = [
        [0.01, 0.0],
        [0.12, 0.0],
        [1.1, 0.0],
        [2.2, 0.0],
        [5.3, 0.0],
        [8.3, 0.0],
        [9.3, 0.0],
        [11.3, 0.0],
        [13.3, 0.0],
        [15.3, 0.0],
        [17.3, 0.0],
        [19.5, 0.0],
        [29.5, 0.0],
        [39.5, 0.0],
        [45.5, 0.0],
        [50.1, 0.0],
        [100.6, 0.0]
]; my $num_weights = scalar(@$weights);
# set the target sum weight
my $TARGET = 200.0;

# set the maximum weight discrepancy from target we will call a solution
my $MAXIMUM_DISCREPANCY = 0.04;

# set a max number of iterations in case adequate solution is not found until
# then, then the best of all solutions found will be given
my $MAX_ITERATIONS = 7000;

# increase this if minimising number of base weights is important
# compared to discrepancy from target sum weight:
# WARNING: increasing it too much will minimise weights and give damn all
# about sum of weights!!! 5e-7 is ok
my $IMPORTANCE_OF_MINIMISING_NUM_WEIGHT_USED_OVER_DISCREPANCY = 5e-07;

# not anything of general interest below

use constant M_PI => 3.14159265359;
my $sigma1 = 1.0;
my $sigma2 = 100000.0;
my $_bell_curve_coeff1 = 1.0/($sigma1*sqrt(2.0*M_PI));
my $_bell_curve_coeff2 = 1.0/(2.0*$sigma2*$sigma2);
# a zero-mean bell-curve with specified sigma
# the input will be a discrepancy (-100000 to +100000!)
# and the output will be fitness (0-1) peaking at zero:
# control the disc spread with sigma1 and peak with sigma2 (above)
sub bell_curve_with_2_sigma { return $_bell_curve_coeff1 * exp(-$_bell_curve_coeff2 * ($_[0]) * ($_[0]) ) }

# the error of a good solution, set as appropriate
# else it will loop after a good solution found. no harm really.
# this is a trial-and-error
my $MINIMUM_FITNESS = bell_curve_with_2_sigma($MAXIMUM_DISCREPANCY) + num_weights_used_fitness(1);

# nothing to change below

my $fitness = sub {
        my $individual = shift;
        my $genes = chromosome2genes($individual->Chrom());
        # discrepancy can be positive or negative.
        # GA aims to max fitness, so find a function which converts
        # discrepancy to fitness which peaks at discrepancy=0 and zeroes at
        # the tails (discr negative or positive). One such function is the zero-mean bell curve
        # Here is modified to have two control coefficients one for spread and one for height,
        # see earlier for sigma1 and sigma2:
        my ($disc, $num_weights_used) = @{calculate_discrepancy($genes, $weights)};
        return bell_curve_with_2_sigma($disc) + num_weights_used_fitness($num_weights_used);
};

sub     num_weights_used_fitness {
        my $nw = $_[0];
        if( $nw == 0 ){ return 0 }
        return $IMPORTANCE_OF_MINIMISING_NUM_WEIGHT_USED_OVER_DISCREPANCY / $nw
}
my $num_genes = $num_weights;

print "$0 : starting with:\n"
        ."\t$num_weights base weights,\n"
        ."\ta target sum weight of $TARGET,\n"
        ."\ttarget maximum discrepancy of $MAXIMUM_DISCREPANCY (which makes minimum fitness $MINIMUM_FITNESS).\n"
        ."\tThis are my weights/tolerances:\n";
my $i = 1;
for(@$weights){
        print "\t\t".($i++).") ".join(", +/- ", @$_)."\n";
}

# tests the output of the bell curve func
#for(-10000..10000){ print $_ . " " . bell_curve_with_2_sigma($_) . "\n"; } exit(0);

# crossover with 2 points and 1 prob of mutation
my $m = Algorithm::Evolutionary::Op::Bitflip->new(2, 0.18); # flip this number of bits randomly
my $c = Algorithm::Evolutionary::Op::Crossover->new(2);
my $bm = Algorithm::Evolutionary::Op::Mutation->new(0.05);
my $bc = Algorithm::Evolutionary::Op::Gene_Boundary_Crossover->new(3, 3);
my $ez = new Algorithm::Evolutionary::Op::Easy(
        $fitness,
        0.8,
        [$m, $c, $bc]
);
my $popSize = 35; # population size, each individual in this pop has a chromosome which consists of 2 genes
my $indiType = 'BitString'; # the chromosome is a sequence of bits as a string
my $indiSize = 8*$num_genes; # 8 bits per gene to make an integer, 1 gene = 1 weight multiplier (so we can have mults up to 255)
my $e = new Algorithm::Evolutionary::Experiment(
        $popSize,
        $indiType,
        $indiSize,
        $ez
);
my $populationRef;
my $previous_fitness = 0;
my ($current_fitness, $best);
my $nochange = 0;
my $iters = 0;
my $best_solution_overall = undef;
my $best_fitness_overall = 0;
my $best_str = "<NA>";
while($iters++ < $MAX_ITERATIONS){
        $populationRef = $e->go();
        if( VERBOSE > 0 ){
                for($i=scalar(@$populationRef);$i-->0;){
                        print $populationRef->[$i]->asString()." ".individual2string($populationRef->[$i], $weights)." pop=$i\n";
                }
        }
        $best = $populationRef->[0];
        #print "Best so far after $iters iterations: ", $best->asString(), " (", individual2string($best, $weights),"), ".solution2string($best, $weights)."\n";
        print "Best so far after $iters iterations: ". individual2string($best, $weights).": ".solution2string($best, $weights)."\n";

        $current_fitness = $best->Fitness();

        # keep a record of best solution found:
        if( $current_fitness > $best_fitness_overall ){
                $best_solution_overall = $best;
                $best_str = "Best so far after $iters iterations: ". individual2string($best, $weights).": ".solution2string($best, $weights);
        }

        # break if solution good enough (see MINIMUM_FITNESS) is found
        if( $current_fitness > $MINIMUM_FITNESS ){
                $best_solution_overall = $best;
                print "\n$0 : bingo! minimum acceptable fitness reached : $current_fitness > $MINIMUM_FITNESS\n\n";
                last
        }

        if( BREAK_ON_STALE > 0 ){
                if( $previous_fitness == $current_fitness ){
                        $nochange++
                } else { $nochange = 0 }
                if( $nochange > BREAK_ON_STALE ){
                        # for too many iterations we are stuck in local minimum, how to get out:
                        if( $m->{rate} > 0.99 ){
                                # rate change has no effect, reset the population and start afresh
                                print "$0 : reseting the population in order to get out of the local minimum...\n";
                                $e = new Algorithm::Evolutionary::Experiment(
                                        $popSize,
                                        $indiType,
                                        $indiSize,
                                        $ez
                                );
                        } else {
                                # change GA params hoping to get out of the local min
                                $m->{rate} = $m->{rate}*1.01;
                                print "$0 : increasing bitflit rate in order to get out of the local minimum, new value : ".$m->{rate}."\n";
                                $nochange = 0;
                        }
                }
        }
        $previous_fitness = $current_fitness;
}
$best = $best_solution_overall;
print "\n$0 : Final solution found after $iters iterations: ".individual2string($best, $weights)." : ".solution2string($best, $weights)."\n";
print "$0 : seed = $seed\n";
print "$0 : done in ".(time-$time_started)." seconds.\n";
exit(0);

sub     calculate_discrepancy {
        my $genes = $_[0]; # genes as numbers (not bits): num weights for specifc weight
        my $we = $_[1]; # the description of weights

        my ($i, $amult);
        my $num_nonzero_multipliers = 0;
        my $sum = 0.0; # total weight
        for($i=$num_genes;$i-->0;){
                $amult = $genes->[$i];
                if( $amult > 0.0 ){
                        $sum += $genes->[$i] * $weights->[$i]->[0];
                        $num_nonzero_multipliers++;
                }
        }
        # note: this is not fitness, this is discrepancy
        # as the difference of the current sum of weights from target total weight
        return [$TARGET-$sum, $num_nonzero_multipliers];
}
sub solution2string {
        my $individual = $_[0];
        my $we = $_[1]; # the weights description
        my $genes = chromosome2genes($individual->Chrom());
        my $ret = "";
        my $sum = 0.0;
        my $i;
        for($i=0;$i<$num_genes;$i++){
                $sum += $genes->[$i] * $weights->[$i]->[0];
                $ret .= $genes->[$i] . '*' . $weights->[$i]->[0] . ' + '
        }
        $ret =~ s/ \+ $/ = $sum/;
        return $ret
}
sub individual2string {
        my $individual = $_[0];
        my $we = $_[1]; # the weights description
        my $genes = chromosome2genes($individual->Chrom());
        my ($disc, $num_nonzero_mult) = @{calculate_discrepancy($genes, $we)};
        my $fit = $fitness->($individual);
        return genes2string($genes) . " -> discr=$disc, fitne=$fit, weights used=$num_nonzero_mult"
}
# interpret an array of genes wrt our problem, i.e. a set weight multipliers
sub genes2string {
        my $genes = $_[0];
        my $ret = "[";
        my $i;
        for($i=0;$i<$num_genes;$i++){ $ret .= $genes->[$i]."," }
        $ret =~ s/,$/]/;
        return $ret
}
# convert a huge bit string into an array of genes
# the array to place the genes in is given
sub chromosome2genes {
        my $achromosome = $_[0]; # chromosome bit string containing all genes as 10101

        my @retgenes = (0)x$num_genes;

        # convert a chromosome which consists of genes which consist of bits(alleles)
        # into a set of numbers to be applied to our problem.
        # each chromosome below consists of a number of genes (equal to the number of variables
        # we are looking for) which consist of 8 bits (0sign+8)
        # these 8bits are interpreted as integers in 0-255 range (which is enough for our problem
        # however if solution involved bigger numbers we need to increase range/bits)
        my $i=0;
        while( $achromosome =~ /([01]{8})/g ){
                my $g2 = $1;

                # Here is how a sequence of 8bits is converted to unsigned integers.
                # I am sure there is a better way using pack which i refuse to understand.
                my $g = 0;
                my $j = 1;
                map { $g += $_*$j; $j*=2; } split(//, $g2);
                $retgenes[$i++] = $g;
                #print "$g2->num=$g\n";
        }
        return \@retgenes
}
#     _   _      
#    '\\-//`
#     (o o) 
# ooO--(_)--Ooo--
#----------------
     __END__