Yes, that would ruin my approach. In this case I would introduce and array to the hash. It complicates things, but only a little bit :-)
Update: Corrected a wrong hash key in the printout.
Update2: Removed incorrect next statement.
Here is the modified version. The algo can get heavy if the data is large, so maybe wiser monks could help a bit here?
#! /usr/bin/perl
use strict;
use warnings;
my @d1 = qw{
CLS_S3_Contig2721-139_168
CLS_S3_Contig2722-375_390
CLS_S3_Contig2725-323_362
CLS_S3_Contig2725-455_480
CLS_S3_Contig2728-117_144
CLS_S3_Contig2728-437_472
CLS_S3_Contig2729-119_130
CLS_S3_Contig2729-163_220
CLS_S3_Contig2730-181_202
CLS_S3_Contig2730-361_384
CLS_S3_Contig2731-824_843
CLS_S3_Contig2731-1150_1201
CLS_S3_Contig2735-571_636
CLS_S3_Contig2735-677_710
CLS_S3_Contig2735-775_810
};
my @d2 = qw{
CLS_S3_Contig2721-142_169
CLS_S3_Contig6525-509_514
CLS_S3_Contig6525-493_502
CLS_S3_Contig6525-503_508
CLS_S3_Contig2977-365_376
CLS_S3_Contig2977-77_82
CLS_S3_Contig2977-83_90
CLS_S3_Contig4978-271_274
CLS_S3_Contig4978-385_388
CLS_S3_Contig2730-365_389
};
{
my %d_one = ();
my %d_two = ();
for( @d1 ) {
my ($key, $start, $end) = $_ =~ /(.*)-(\d+)_(\d+)/;
push @{ $d_one{$key} }, { 'start' => $start, 'end' => $end };
}
for( @d2 ) {
my ($key, $start, $end) = $_ =~ /(.*)-(\d+)_(\d+)/;
push @{ $d_two{$key} }, { 'start' => $start, 'end' => $end };
+
}
use Data::Dumper;
print Dumper(\%d_one, \%d_two);
print "Genename(file1)\t\tstart\tend\t**\tGenename(file2)\t\tstart
+\tend\n\n";
foreach my $key_one ( keys %d_one ) {
if( $d_two{$key_one} ) { # We found a matching key!
foreach my $limits_one ( @{ $d_one{$key_one} } ) {
foreach my $limits_two ( @{ $d_two{$key_one} } ) {
if( $limits_two->{'start'} >= $limits_one->{'start
+'} &&
$limits_two->{'start'} <= $limits_one->{'end'}
+ ) {
print "$key_one\t$limits_one->{'start'}\t$
+limits_one->{'end'}"
. "\t**\t"
. "$key_one\t$limits_two->{'start'}\t$
+limits_two->{'end'}\n";
}
}
}
}
}
}
Note, write test cases for this or test otherwise extensively. Below is the current output of my script:
cpt2jmo@phantom:~/misc$ ./2_hashes_with_arrays.pl
$VAR1 = {
'CLS_S3_Contig2722' => [
{
'end' => '390',
'start' => '375'
}
],
'CLS_S3_Contig2725' => [
{
'end' => '362',
'start' => '323'
},
{
'end' => '480',
'start' => '455'
}
],
'CLS_S3_Contig2729' => [
{
'end' => '130',
'start' => '119'
},
{
'end' => '220',
'start' => '163'
}
],
'CLS_S3_Contig2731' => [
{
'end' => '843',
'start' => '824'
},
{
'end' => '1201',
'start' => '1150'
}
],
'CLS_S3_Contig2728' => [
{
'end' => '144',
'start' => '117'
},
{
'end' => '472',
'start' => '437'
}
],
'CLS_S3_Contig2721' => [
{
'end' => '168',
'start' => '139'
}
],
'CLS_S3_Contig2735' => [
{
'end' => '636',
'start' => '571'
},
{
'end' => '710',
'start' => '677'
},
{
'end' => '810',
'start' => '775'
}
],
'CLS_S3_Contig2730' => [
{
'end' => '202',
'start' => '181'
},
{
'end' => '384',
'start' => '361'
}
]
};
$VAR2 = {
'CLS_S3_Contig2721' => [
{
'end' => '169',
'start' => '142'
}
],
'CLS_S3_Contig6525' => [
{
'end' => '514',
'start' => '509'
},
{
'end' => '502',
'start' => '493'
},
{
'end' => '508',
'start' => '503'
}
],
'CLS_S3_Contig2730' => [
{
'end' => '389',
'start' => '365'
}
],
'CLS_S3_Contig4978' => [
{
'end' => '274',
'start' => '271'
},
{
'end' => '388',
'start' => '385'
}
],
'CLS_S3_Contig2977' => [
{
'end' => '376',
'start' => '365'
},
{
'end' => '82',
'start' => '77'
},
{
'end' => '90',
'start' => '83'
}
]
};
Genename(file1) start end ** Genename(file2)
+ start end
CLS_S3_Contig2721 139 168 ** CLS_S3_Contig2721
+ 142 169
CLS_S3_Contig2730 361 384 ** CLS_S3_Contig2730
+ 365 389
--
seek $her, $from, $everywhere if exists $true{love};
|