use strict ; use warnings ; use Data::Dumper ; open my $dataIn1, "<", "Attributes_ID.txt" or die "NO ID FILE: $!" ; open my $dataIn2, "<", "Attributes.txt" or die "NO ATTR FILE: $!" ; my $data = () ; my $attrs = () ; sub getdata { my ( $fileName, $type ) = split /\t/, $_[0] ; push @{$data}, $type unless !defined $fileName ; } sub getattrs { my @attrs = split /\t/, $_[0] ; push @{$attrs}, \@attrs unless !defined $attrs[0] ; } sub calcPercentages { # INPUT: Hash reference # Determine the total amount of attributes # Walk through each category: Circle, Triangle, ... # Take the hit count divided by the total amount of attributes (multiplied by 100?) # For each category add something to the hash to store the percentage # e.g. CircleChance, TriangleChance, .... # askQuestions could potentially be called here } sub askQuestions { # INPUT: Hash reference # my $h = ... # Question 1: Does this attribute occur in Circle more than 50% of the time, and less than 10% of the time in Triangle # if ( $h->{ CircleChance } > 50 && $h->{ TriangleChance } < 10 ) { # Do something here. # E.g. Store another result $h # } } while( <$dataIn1> ) { chomp ; getdata( $_ ) ; } while( <$dataIn2> ) { chomp ; getattrs( $_ ) ; } my @result; for( my $j = 0 ; $j < @{$attrs} ; ++$j ) { my %subres ; my $id = $attrs->[$j][0] ; @subres{@{$data}} = ( 0 ) x @{$attrs->[0]} ; for( my $i = 1 ; $i < @{$attrs->[$j]} ; ++$i ) { if ( $attrs->[$j][$i] == 1 ) { ++$subres{ $data->[$i-1]} ; } } ; # You could potentially start calculating hit count percentages per category here: calcPercentages( \%subres ) ; push @result, { $id => \%subres } ; } print Dumper(\@result) ;