package EA::GA; # Author: Stefan Kamphausen # Copyright 2001 Stefan Kamphausen. # This implements a Simple Somewhat Generalized Genetic Algorithm # See the bottom of this file for the POD documentation. Search for the # DOCS-Header. # You can run this file through either pod2man or pod2html to produce pretty # documentation in manual or html file format (these utilities are part of the # Perl 5 distribution). #################################################################### ## LICENSE #################################################################### # This program is free software; you can redistribute it # and/or modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA # 02111-1307, USA. # Please Visit # http://www.skamphausen.de/software # for recent versions, other (free) software written by this author # and whatever else you might expect from such a page. use strict; #use Data::Dumper; $GA::VERSION='0.6'; sub new { my ($class,$fitness,$tokenref) = @_; my $self = {}; # An array which contains all the individuals as arrays of tokens $self->{pop} = (); # A Hash which contains all the fitness-values adressed by # the concatenated tokens $self->{fitvals} = (); # the 'alphabet' of allowed symbols $self->{tokens} = $tokenref; # user provided fitness function, gets array of tokens as arg $self->{fitness} = $fitness; # default value for mutation probability; may be overridden $self->{mut_prob} = 0.05; # a counter for the generation $self->{generation} = 0; bless $self, $class; return $self; } sub init_pop { my ($self,$size,$length) = @_; my ($i,$j); for ($i=0;$i<$size;$i++) { for ($j=0;$j<$length;$j++) { my $rtok = $self->random_token(); #print "RTOK: $rtok\n"; push @{$self->{pop}[$i]}, $rtok; } } #print Dumper(@{$self->{pop}}); $self->calculate_fitness(); $self->sort_pop(); } sub sort_pop { my ($self) = @_; @{$self->{pop}} = sort {$self->{fitvals}{join "",@{$a}} <=> $self->{fitvals}{join "",@{$b}}} @{$self->{pop}}; } sub calculate_fitness { my ($self) = @_; my ($i,$f); %{$self->{fitvals}} = (); foreach $i (@{$self->{pop}}) { #print "CALC: Indiv:",@{$i},"\n"; $f = &{$self->{fitness}}(@{$i}); $self->{fitvals}{join "",@{$i}} = $f; } } ### Return some values sub best_fit { my $self = shift; return $self->{fitvals}{join("",@{$self->{pop}[0]})}; } sub generation { my $self = shift; return $self->{generation}; } sub random_token { my ($self) = @_; my $max = scalar(@{$self->{tokens}}); my $ran = int rand $max; my $tok = @{$self->{tokens}}[$ran]; #print "Ran: $ran, Max: $max "; #print "Token: $tok\n"; return $tok; } ### Dawn of The Next Generation # combines mutation and crossover sub breed { # FIXME: optional mut_prob my $self = shift; my $opt_mutation_rate = shift; my @new_pop = (); my ($p1,$p2,$c,$i); # mutation my $p_mut = $opt_mutation_rate || $self->{mut_prob}; # prepare for roulette wheel my $a_sum = 0; my @fit = (); foreach (@{$self->{pop}}) { # my $f = $self->{fitvals}{join "",@{$_}}; # $a_sum += 1.0/($f+1); # push @fit, $f; my $f = $self->{fitvals}{join "",@{$_}}; my $f2 = 1.0/($f+1); $a_sum += $f2; push @fit, $f2; } @fit = sort {$b <=> $a} @fit; #print "BREED: Fitness\n",Dumper(@fit),"\n\n"; my $length = scalar(@{$self->{pop}}); # Golden Cage $new_pop[0] = @{$self->{pop}}[0]; # Choose Parents for ($i=1;$i<$length;$i++) { $p1 = rwheel(\@fit,$a_sum); $p2 = rwheel(\@fit,$a_sum); # print "P1: $p1 P2: $p2\n"; push @new_pop, $self->crossover_mut($p1,$p2,$p_mut); } @{$self->{pop}} = @new_pop; $self->calculate_fitness(); $self->sort_pop(); return ++$self->{generation}; } sub mutate { my ($self,$rate) = @_; my ($ran,$i,$t); my $the_rate = $rate || $self->{mut_prob}; foreach $i (@{$self->{pop}}) { for ($t=0;$trandom_token(); } } } } sub crossover_mut { my $self = shift; my $p1 = shift; my $p2 = shift; my $opt_mutation_rate = shift; my ($ran,$t,$new_size); $ran = rand(); my $pmut = $opt_mutation_rate || $self->{mut_prob}; my $pp1 = (1.0-$pmut)/2.0; # 50:50 for the size of the new one my @new = (); if ($ran < 0.5) { $new_size = scalar(@{$self->{pop}[$p1]}) } else { $new_size = scalar(@{$self->{pop}[$p2]}) } for($t=0;$t<$new_size;$t++) { $ran = rand(); # 50:50 to take gene from p1 or p2 unless mutation if ($ran < $pmut) { #print "M"; $new[$t] = $self->random_token(); } elsif ($ran < $pp1) { #print "1"; $new[$t] = @{@{$self->{pop}}[$p1]}[$t]; } else { #print "2"; $new[$t] = @{@{$self->{pop}}[$p2]}[$t]; } } return \@new; } sub crossover { my $self = shift; my $p1 = shift; my $p2 = shift; my ($ran,$t,$new_size); $ran = rand(); # 50:50 for the size of the new one my @new = (); if ($ran < 0.5) { $new_size = scalar(@{$self->{pop}[$p1]}) } else { $new_size = scalar(@{$self->{pop}[$p2]}) } for($t=0;$t<$new_size;$t++) { $ran = rand(); # 50:50 to take gene from p1 or p2 unless mutation if ($ran < 0.5) { $new[$t] = @{@{$self->{pop}}[$p1]}[$t]; } else { $new[$t] = @{@{$self->{pop}}[$p2]}[$t]; } } return \@new; } ### Print-Outs sub dump_indivs { my $self = shift; my $i; my $len = scalar(@{$self->{pop}}); for ($i=0;$i<$len;$i++) { my $s = join("",@{$self->{pop}[$i]}); printf "%4d ",$i; print $s; printf " {%5d}\n",$self->{fitvals}{$s}; } } sub dump_best { my $self = shift; my $s = join("",@{$self->{pop}[0]}); print $s; printf " {%f}\n",$self->{fitvals}{$s}; } ### Random sub rwheel { # random element of an array according to it's value # aka roulette wheel my ($a_ref,$a_sum) = @_; my @arr = @{$a_ref}; my $sum = 0; my $i; # print "RWHEEL: length = ",scalar(@arr),"\n"; # print "RWHEEL ARRAY: ",join(" ",@arr),"\n"; my $ran = rand $a_sum; # print "RWHEEL: RAN $ran < $a_sum\n"; for ($i=0;$i $ran ) { return $i; } } die "ARGH! I never should have reached this point!\n"; } 1; __END__ ############################################################ # DOCS # ############################################################ =head1 NAME EA::GA - a general genetic algorithm library =head1 SYNOPSIS # This is a little example use EA::GA; # evolve a string that matches this target $target = "Hello_World"; $len = length $target; # create an array of allowed tokens @token = (); for ('a'..'z') { push @token, $_; } for ('A'..'Z') { push @token, $_; } push @token, "_"; # New GA object that sets the alphabet and the # fitness function $p = EA::GA->new(\&fitness_function,\@token); # initialise the population $p->init_pop(100,$len); do { # breed the next generation using crossover and mutation $gen = $p->breed(); printf "[%5d] ", $gen; # built in data dumper $p->dump_best(); # best_fit return the fitness of the best } while ($p->best_fit() > 0 && $gen < 2000); $p->dump_best(); exit(0); # Now all we need is the fitness function that needs to understand # the representation of an individual sub fitness_function { my @indiv_tokens = @_ ; # Representation my $s1 = join "", @indiv_tokens; my $sum = 0; my $f; for($f=0;$f<$len;$f++) { my $z1=substr($s1,$f,1); my $z2=substr($target,$f,1); my $a=(ord($z1)-ord($z2))*(ord($z1)-ord($z2)); $sum +=$a; } return $sum; } =pod =head1 DESCRIPTION C implements a (hopefully) generalized genetic algorithm. It does this by using an array of allowed tokens as individuals. The user has to provide a fitness function. There the actual representation is implemented. If you got a string of chars it is quite easy: simply join them. If you want to have real numbers you should probably use a bitwise representation and calculate the real values in your fitness function. =head2 The Easy Way The easy setup is pretty easy. With $p = EA::GA->new(\&fitness_function,\@token); you create a new GA object which knows all the allowed tokens and how to calculate the fitness of an individual. Then use $p->init_pop($pop_size,$length_of_individual); to initialise a random population of I<$pop_size> individuals, each of length I<$length_of_individual>. I do not know how to make them of variable length right now. The main thing to do now is use the simplified C-method $gen = $p->breed(); You can give an optional argument to the C method which will be interpreted as the mutation probabiliy. This method combines mutation and crossover (for each token there is a decision from which parent to take the token) and returns the number of the generation. =head2 The Detailed Way There are methods that provide mutation, crossover and other functionality and can be called directly in case you do not want to use the built in C method. These and other methods will soon be listed in alphabetical order. Right before that again the note that you probably don't need this. =over 4 =item best_fit() Returns the fitness of the best individual of the whole population if the population is sorted (actually returns the first element of the internal population array). =item calculate_fitness() Updates the (internal) fitness values by calling the user provided fitness function for each individual. =item crossover($p1,$p2) Does a simple crossover schema. All individuals are internally represented as an array of tokens. This crossover needs the numbers of two parents (I<$p1> and I<$p2>), usually drawn using the Roulette Wheel technique. For each token of the offspring there is a fifty:fifty decision whether to take from parent one or parent two. =item crossover_mut($p1,$p2,$optional_mutation_prob) Almost the same as C just that there is a little probabiliy that a new random token is used instead of on of the parents. =item dump_best() This prints the best individual to stdout in a somewhat reasonable way. =item dump_indivs() Prints the whole population including their fitness values. =item generation() Returns the number of the current generation. =item mutate($optional_mutation_prob) Performs a mutation on the whole generation. =item sort_pop() Whenever a new population has been created and the fitness values have been calculated it is necessary to sort the population. Some routines rely on that. =item random_token() Return a random token from the user provided alphabet of allowed tokens. =back =head1 AUTHORS Stefan Kamphausen Imail@skamphausen.deE> I =cut