Anonymous Monk has asked for the wisdom of the Perl Monks concerning the following question:

Hi Monks, I would like to compare the values of two hashes, to check a value from two hashes are same. And to report whether they are same or not. I have given my code(as far as I can get, that is) and the Data::Dumper of hashes.

#!/software/bin/perl use strict; use warnings ; use Getopt::Long; use Benchmark; use Config::Config qw(Sequenom.ini); use Database::Conn; use Data::Dumper; GetOptions( "sam=s" => \my $sample ); my $geno_seq_list = getseqgenotypes($sample); my $cave_snp_list = getcavemansnpfile($sample); #print Dumper($geno_seq_list); print scalar %$geno_seq_list,"\n"; foreach my $sam (keys % {$geno_seq_list} ) { my $seq_used = $geno_seq_list->{$sam}; my $cave_used = $cave_snp_list->{$sam}; foreach my $seq2com (@ {$seq_used } ){ # foreach my $cave2com( @ {$cave_used} ){ print $seq2com->{chromosome},":" ,$cave2com->{chromoso +me},"\n";#The problem is here how do I get to check and print without + going through this loop.$cave2com->{chromosome} changes where as the + $seq2com->{chromosome} is the same * # } } } sub getseqgenotypes { my $snpconn; my $gen_list = {}; $snpconn = Database::Conn->new('live'); $snpconn->addConnection(DBI->connect('dbi:Oracle:ps.world','pwq','pwq +', {RaiseError =>1 , AutoCommit=>0}),'pret'); my $id_ind = $snpconn->execute('snp::Sequenom::getIdIndforExomeSample +',$sample); my $genotype = $snpconn->executeArrRef('snp::Sequenom::getGenotypeCal +lsPosition', $id_ind); foreach my $geno (@ { $genotype } ){ push @{ $gen_list->{ $geno->[1] } },{ chromosome => $geno->[2], position => $geno->[3], genotype => $geno->[4], }; } return ($gen_list); }#end of sub getseqgenotypes sub getcavemansnpfile { my $nstconn; my $caveman_list = {}; $nstconn = Database::Conn->new('live'); $nstconn->addConnection(DBI->connect('dbi:Oracle:peqt.world','rea +d','read', {RaiseError =>1 , AutoCommit=>0}),'tes'); my $id_sample = $nstconn->execute('nst::Caveman::getSampleid', $sa +mple); #print "IDSample: $id_sample\n"; my $file_location = $nstconn->execute('nst::Caveman::getCaveManSNP +SFile', $id_sample); open (SNPFILE, "<$file_location") || die "Error: Cannot open the +file $file_location:$!\n"; while(<SNPFILE>) { chomp; next if /^>/; my @data = split ; my ($nor_geno, $tumor_geno) = split/\//, $data[5]; # array of hash push @{ $caveman_list->{$sample} }, { chromosome=>$data[0], position =>$data[1], genotype =>$nor_geno, }; }#end of while loop close(SNPFILE); return ($caveman_list); }
Dumper $gen_seq_list
$VAR1 = { '42-MG-BA' => [ { 'chromosome' => '19', 'position' => '35770059', 'genotype' => 'TC' }, { 'chromosome' => '2', 'position' => '68019584', 'genotype' => 'G' }, { 'chromosome' => '16', 'position' => '9561557', 'genotype' => 'G' },
Dumper, $cave_snp_list
$VAR1 = { '42-MG-BA' => [ { 'chromosome' => '7', 'position' => '158751481', 'genotype' => 'CC' }, { 'chromosome' => '7', 'position' => '158773210', 'genotype' => 'CG' }, { 'chromosome' => '7', 'position' => '158773232', 'genotype' => 'AG' }, { 'chromosome' => '7', 'position' => '158773936', 'genotype' => 'AA' },

I want to compare the genotypes of two hashes when the chromosome and position match. They both belong42-MG-BA(which is the sample).
foreach my $sam (keys % {$geno_seq_list} ) { my $seq_used = $geno_seq_list->{$sam}; my $cave_used = $cave_snp_list->{$sam}; foreach my $seq2com (@ {$seq_used } ){ # foreach my $cave2com( @ {$cave_used } ){ print $seq2com->{chromosome},":" ,$cave2com->{chromoso +me},"\n"; # } } }
Could I do this with out going through the nested foreach loop? Suggestion are received with thanks! :) Thanks monks

Replies are listed 'Best First'.
Re: comparing values of o hashes
by hbm (Hermit) on Sep 13, 2011 at 14:34 UTC
    I want to compare the genotypes of two hashes when the chromosome and position match.

    Within one hash, is it possible for two or more records to have the same chromosome and position? If not, you might change your underlying record structure to a hash of genotypes, keyed by a merged string of chromosome and position. Something like this:

    $gen_seq_list{'42-MG-BA'} = ( '19_35770059' => 'TC', '2_68019584' => 'G', '16_9561557' => 'G' );

    And then perhaps:

    for my $seq (keys %$gen_seq_list) { for (keys %{$seq}) { if ($gen_seq_list{$seq}{$_} == $cave_snp_list{$seq}{$_}) { ... } } }

    Another thought is to build one hash instead of two like this:

    use strict; use warnings; my %genes = ( '42-MG-BA' => { '19_35770059' => { cave => 'TC', gen => 'AA' }, '2_68019584' => { cave => 'G' }, '16_9561557' => { gen => 'AG' }, }, '41-OK-DZ' => { '7_158773210' => { gen => 'AA', cave => 'G'}, }, ); for my $gene (keys %genes) { for my $seq (keys %{$genes{$gene}}) { my $href = \%{$genes{$gene}{$seq}}; if (keys %$href > 1) { print "$gene:$seq:", join(",", map {"$_=$$href{$_}"} keys %$href), "\n"; } } } __OUTPUT__ 41-OK-DZ:7_158773210:gen=AA,cave=G 42-MG-BA:19_35770059:gen=AA,cave=TC