use strict; use warnings; my %match = ( 'ABC.com' => { 'XYZ.com' => { company => 1, # reg. only contact => 0, # no match address => 3, # reg. and admin. phone => 2, # admin. only email => 2, # admin. only }, 'BBC.com' => { company => 3, contact => 3, address => 3, phone => 3, email => 3, }, }, 'FOO.com' => { 'BAR.com' => { company => 1, contact => 3, address => 3, phone => 2, email => 1, }, 'BAZ.com' => { company => 3, contact => 2, address => 2, phone => 2, email => 2, }, }, ); # I'm sure there is a faster way to count bits. sub bit_count { my ( $number ) = @_; my $bits = unpack 'b*', pack 'C', $number; my $bits_set = ( $bits =~ tr/1// ); return $bits_set; } sub confidence_by_bits { my ( $href ) = @_; my $weight = 0; foreach ( values %{ $href } ) { $weight += bit_count($_); } return $weight; } sub confidence_by_weighted_position { my ( $href ) = @_; my $weight = 0; foreach ( values %{ $href } ) { # Reg - slightly important if ( $_ | 1 ) { $weight += 1; } # Admin - very important if ( $_ | 2 ) { $weight += 5; } # Bonus for both if ( $_ == 3 ) { $weight += 20; } } return $weight; } sub confidence_by_weighted_field { my ( $href ) = @_; my $weight = 0; while ( my ( $key, $val ) = each %{ $href } ) { my $bits = bit_count($val); if ( $key eq 'company' ) { $bits *= 5; } elsif ( $key eq 'phone' ) { $bits *= 0.8; } $weight += $bits; } return $weight; } print_with_confidence( \%match, \&confidence_by_bits ); print_with_confidence( \%match, \&confidence_by_weighted_position ); print_with_confidence( \%match, \&confidence_by_weighted_field ); sub print_with_confidence { my ( $match_href, $conf_func ) = @_; while ( my ( $domain1, $v1 ) = each %match ) { my @candidates; while ( my ( $domain2, $v2 ) = each %$v1 ) { my $confidence = $conf_func->( $v2 ); push @candidates, [ $domain2, $confidence ]; } @candidates = sort { $b->[1] <=> $a->[1] || $b->[0] cmp $a->[0] } @candidates; print "$domain1\n"; foreach ( @candidates ) { my ( $domain2, $confidence ) = @{ $_ }; printf "\t%7d\t%s\n", $confidence, $domain2; } } print "\n"; }