in reply to Re^3: Data munging
in thread Data munging

Hi BroswerUk,
I am not trying to fool anyone to solve my problem! There has been a copy & paste error. I had commented use strict; at the beginning because it was giving me errors and that part was not copied in the code block that I posted! Initially I used %hash1 and %hash2, but then I changed them to references. Apparently I had missed the last block of code in the copy & paste. Here is the updated version with all variables initialized and tested.
use strict; use warnings; use Data::Dumper; # if (@ARGV < 1) # { # print "Usage: $0 inputDir \n"; #e.g ./ # exit; # } my $file1 = "ref.txt"; my $file2 = "query.txt"; # my($key1, $key2, $hash1, $hash2); open (IN1,'<'.$file1) || die "***can't open the file $!\n"; my @lines1 = <IN1>; close IN1; #$i=0; for (@lines1) { chomp; my @a1 = split(/\t/, $_); my $key1 = $a1[0]; my $rs = $a1[1]; my $re = $a1[2]; #push(@{ $hash1->{$key1} } , "$rs\t$re" ); push(@{ $hash1->{$key1} } , $_ ); } open (IN2,'<'.$file2) || die "***can't open the file $!\n"; my @lines2 = <IN2>; close IN1; for (@lines2) { chomp; my @a2 = split(/\t/, $_); my $qs = $a2[1]; my $qe = $a2[2]; my $key2 = $a2[0]; #push(@{ $hash2->{$key2} } , "$qs\t$qe"); push(@{ $hash2->{$key2} } , $_ ); } #print Dumper(\%$hash2); my @common_keys = grep { exists $hash1->{$_} } sort keys %$hash2; my %seen; for (sort @common_keys) { for my $r (0..$#{ $hash1->{$_} }) { for my $q (0..$#{ $hash2->{$_} }) { #print "@{ $hash1->{$_} }[$i]\t@{ $hash2->{$_} }[$i]\t"; + my ($query_key, $query_start, $query_end, @qtail) = split( +/\t/, $hash2->{$_}[$q]); my ($ref_key, $ref_start, $ref_end, @rtail) = split(/\t/, +$hash1->{$_}[$r]); if( ($query_start >= $ref_start && $query_start < += $ref_end) || ($query_end >= $ref_start && $query_end <= $r +ef_end) || ($ref_start >= $query_start && $ref_start <= +$query_end) || ($ref_end >= $query_start && $ref_end <= $que +ry_end) ) { $seen{$_}{$query_start}++; } } } } #print Dumper(\%seen); my $overlap_count; for my $key (sort keys %$hash2) { for my $i (0..$#{ $hash2->{$key} } ) { my @s = split(/\t/, $hash2->{$key}[$i]); my @head = @s[0..2]; my @tail = @s[3..$#s]; #print "***$tail[0]\n"; my $start = $s[1]; # print "***$start\n"; if( exists $seen{$key}{$start} ) { $overlap_count = $seen{$key}{$start}; #print "$hash2->{$key}[$i]\t$overlap_count\n" print map {"$_\t"} insert_field(\@head, \$overlap_count, \ +@tail); print "\n" } else { $overlap_count = 0; #print "$hash2->{$key}[$i]\t$overlap_count\n"; print map {"$_\t"} insert_field(\@head, \$overlap_count, \ +@tail); print "\n" } #print "\n"; } } sub insert_field { my ($head, $insert, $tail) = @_; my @line; push(@line, @$head, $$insert, @$tail); return @line; }
Your comment about me fooling is unacceptable to me. Please turn off the use strict at the beginning and check if you get an answer! Everyone can make mistakes when copying and pasting even if we check before pasting!