comment on

copy & paste errors. Here is the code with use strict and all variables initialized and tested.


use strict;
use warnings;
use Data::Dumper;

# if (@ARGV < 1)
# {
    # print "Usage: $0 inputDir \n";
    #e.g ./
    # exit;
# }
my $file1 = "ref.txt";
my $file2 = "query.txt";
#

my($key1, $key2, $hash1, $hash2);

open (IN1,'<'.$file1) || die "***can't open the file $!\n";


my @lines1 = <IN1>;
close IN1;
#$i=0;
for (@lines1)
{
    chomp;
    
    my @a1 = split(/\t/, $_);
    
    my $key1 = $a1[0];
    my $rs = $a1[1];
    my $re = $a1[2];
    
    #push(@{ $hash1->{$key1} } , "$rs\t$re" );
    push(@{ $hash1->{$key1} } , $_ );

}


open (IN2,'<'.$file2) || die "***can't open the file $!\n";
my @lines2 = <IN2>;
close IN1;

for (@lines2)
{
    chomp;
    
    my @a2 = split(/\t/, $_);
    
    my $qs = $a2[1];
    my $qe = $a2[2];
    my $key2 = $a2[0];
    #push(@{ $hash2->{$key2} } , "$qs\t$qe");
    push(@{ $hash2->{$key2} } , $_ );

}

#print Dumper(\%$hash2);

my @common_keys = grep { exists $hash1->{$_} } sort keys %$hash2;

my %seen;
for (sort @common_keys)
{
    for my $r (0..$#{ $hash1->{$_} })
    {
        for my $q (0..$#{ $hash2->{$_} })
        {
            #print "@{ $hash1->{$_} }[$i]\t@{ $hash2->{$_} }[$i]\t";  
+      
            my ($query_key, $query_start, $query_end, @qtail) = split(
+/\t/, $hash2->{$_}[$q]);
            my ($ref_key, $ref_start, $ref_end, @rtail) = split(/\t/, 
+$hash1->{$_}[$r]);
            if(          ($query_start >= $ref_start && $query_start <
+= $ref_end) || 
                         ($query_end >= $ref_start && $query_end <= $r
+ef_end) || 
                         ($ref_start >= $query_start && $ref_start <= 
+$query_end) ||
                         ($ref_end >= $query_start && $ref_end <= $que
+ry_end) )  
            {
                $seen{$_}{$query_start}++;
            }
        }   
    }
    
}
#print Dumper(\%seen);
my $overlap_count;
for my $key (sort keys %$hash2)
{    
    for my $i (0..$#{ $hash2->{$key} } )
    {
        my @s = split(/\t/, $hash2->{$key}[$i]);
        my @head = @s[0..2];
        my @tail = @s[3..$#s];
        #print "***$tail[0]\n";
        my $start = $s[1];
        
        # print "***$start\n";
        if( exists $seen{$key}{$start} )
        {
            $overlap_count = $seen{$key}{$start};
            #print "$hash2->{$key}[$i]\t$overlap_count\n"
            
            print map {"$_\t"} insert_field(\@head, \$overlap_count, \
+@tail); 
            print "\n"
        }
        else
        {
            
            $overlap_count = 0;
            #print "$hash2->{$key}[$i]\t$overlap_count\n";
            print map {"$_\t"} insert_field(\@head, \$overlap_count, \
+@tail); 
            print "\n"
        }
        #print "\n";
    }
}





sub insert_field
{
    my ($head, $insert, $tail) = @_;
    my @line;
    push(@line, @$head, $$insert, @$tail);
    
    return @line;
    
}
[download]

In reply to Re^4: Data munging by umasuresh
in thread Data munging by umasuresh

Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!

Titles consisting of a single word are discouraged, and in most cases are disallowed outright.

Read Where should I post X? if you're not absolutely sure you're posting in the right place.

Please read these before you post! —

Posts may use any of the Perl Monks Approved HTML tags:

a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr

You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)

	For:		Use:
	&		`&`
	<		`<`
	>		`>`
	[		`[`
	]		`]`

Link using PerlMonks shortcuts! What shortcuts can I use for linking?

See Writeup Formatting Tips and other pages linked from there for more info.