in reply to Hash - Compare - Multiple value keys
But to make things easier for others, I've re-indented it.
Once I get back, if no one else has figured this out, I'll take a look during the F1 race.
my %file1=(); while(<INPUT1>){ (my $id, my $number) = split("\t", $_); if ($id=~ m/^(CLS_S3_Contig[0-9]+)([-]?)([0-9]+)([_]?)([0-9]+)$/i) + { my $matched_id=$id; # breaks the CLS_Contig1000_200-202 to its + componenents for (my $i=$3-8;$i<$5+9;$i++){ push (@{$file1{$1}}, $i); # This makes a hash of names and + each position # plus/minus 8 - this is the has +h with each key # have multiple values. } }#end for if else {my $mismatch_id=$id; # print "$mismatch_id does not match with CLS_S3_Contigs!\n"; } # end for else } # end for while #reading the file 2 with several columns. columns 2, 5 make a hash of +multiple #value and $mismatch column is the one that need to meet the condition while(<INPUT2>){ my ($serial_no ,$contig_id, $position_with_gap, $gap, $position_corrected, $ATGCN, $depth, $consensus, $mismatch, $star, $geno_A, $geno_B) = split("\t", $_) +; push (@{$file2{$contig_id}}, $position_corrected); #Make a hash of + contig ID #and base posit +ion one key #multiple value +s push (@{$file2_2{$position_corrected}}, $mismatch); } #end for while # Here we are going to have access each element of hash of hash my $rHoH = foo();my %hash_1; my( $contig_id, $position_corrected, $mismatch ); for my $serial_no ( keys %$rHoH ) { $contig_id = $rHoH->{ $serial_no }->{ 'contigID' }; $position_corrected = $rHoH->{ $serial_no}->{ 'position_corrected' + }; $mismatch = $rHoH->{ $serial_no }->{ 'mismatch' }; # Now we want to know how many of contigs contain more than three mism +atch #Hash_1 here if ($mismatch >= 3) { #$hash_1{$contig_id} = $position_corrected; ## this will result a hash with name of contig and only one va +lue per contig # that is why it is commented here push (@{$hash_1{$contig_id}}, $position_corrected); # This mak +e a hash # with one + key with # multiple + values #print RESULTS "$contig_id\t$position_corrected\t$mismatch\n"; + # This prints all contige that have more than 3 mismatch. } } # here is where I messed up for the query. I cannot control this loop. # it finds the things but fails to print them only once. foreach $1 (sort keys %file1){ foreach my $position1 (@{$file1{$1}}){ $found =0; foreach $contig_id(sort keys %hash_1){ foreach my $position (@{$hash_1{$contig_id}}){ $found = 1 if $1 =~ /^$contig_id/ && $contig_id=~ /^$1 +/ && $position1==$position; print RESULTS "$position1\t$1\n" if $found; print "not matched\n" if !$found; } } } } ############################################################## sub foo { my ( $serial_no ,$contig_id, $position_with_gap, $gap, $position_corrected, $ATGCN, $depth, $consensus, $mismatch, $star, $geno_A, $geno_B); my %HoH = (); open(INPUT2,$ARGV[1]) || die "Cannot open file \"$ARGV[1]\""; # MA +P file while( <INPUT2> ) { ( $serial_no ,$contig_id, $position_with_gap, $gap, $position_ +corrected, $ATGCN, $depth, $consensus, $mismatch, $star, $geno_A, $gen +o_B) = split("\t", $_); $HoH{$serial_no} {'contigID'} = $contig_id; $HoH{$serial_no} {'position_corrected'} = $position_corrected; $HoH{$serial_no} {'mismatch'} = $mismatch; } return \%HoH; }
|
|---|