push @sort_array, $trial_1;
push @pointer_array, $key_r;
####
push @sort_array, [ $trial_1, $key_r ];
####
my @sorted = sort{ $a->[ 0 ] <=> $b->[ 0 ] } @sort_array;
####
my( $closest, $pointer ) = @{ $sorted[ 0 ] };
####
#!/usr/bin/perl -slw
use strict;
use integer;
use vars qw( %tf1 %tf2 );
## Establish an array of all chromosome files.
my @files = ("chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9",
"chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17",
"chr18", "chr19", "chr20", "chr21", "chr22", "chrX", "chrY");
## Loop through each file, calculate the overlaps, and output to file.
foreach (@files) {
print STDOUT scalar localtime;
open TF1, "<", "TF1.$_.gff" or die "Cannot open the TF1 hit file!"; ## tf1 hit file
open TF2, "<", "TF2.$_.gff" or die "Cannot open the TF2 hit file!"; ## tf2 hit file
open OUT, ">", "both.$_.gff" or die "Cannot open the output file!"; ## output file
while () {
chomp;
my ($location, undef, $type, $start, $end, $score, $strand, undef) = split "\t"; ## Parse
my ($chr, $location_numbers) = split "_", $location; ## Get the chromosome
my ($target_start, $target_end) = split "-", $location_numbers; ## Get the start site for the 10kb sequence "blasted" against
my $new_start = $target_start + $start; ## Use this for true genomic location
my $new_end = $target_start + $end; ## Use thie for true genomic location
## Build a hash and put the data inside;
$tf1{ $new_start }[0] = $chr;
$tf1{ $new_start }[1] = $type; ## Transcription factor
$tf1{ $new_start }[2] = $new_end;
$tf1{ $new_start }[3] = $score; ## Alignment score from PWM blast
$tf1{ $new_start }[4] = $strand; ## Strand where the match is found
}
while () {
chomp;
my ($location, undef, $type, $start, $end, $score, $strand, undef) = split "\t"; ## Parse
my ($chr, $location_numbers) = split "_", $location; ## Get the chromosome
my ($target_start, $target_end) = split "-", $location_numbers; ## Get the start site for the 10kb sequence "blasted" against
my $new_start = $target_start + $start; ## Use this for true genomic location
my $new_end = $target_start + $end; ## Use thie for true genomic location
## Build a hash and put the data inside;
$tf2{ $new_start }[0] = $chr;
$tf2{ $new_start }[1] = $type; ## Transcription factor
$tf2{ $new_start }[2] = $new_end;
$tf2{ $new_start }[3] = $score; ## Alignment score from PWM blast
$tf2{ $new_start }[4] = $strand; ## Strand where the match is found
}
## Close the unused files
close TF1;
close TF2;
OUTER:foreach my $key_c (sort keys %tf2) {
my @sort_array = ();
my @pointer_array = ();
my $start_c = $key_c;
my $end_c = $tf2{ $key_c }[2];
INNER:foreach my $key_r (sort keys %tf1) {
my $start_r = $key_r;
my $end_r = $tf1{ $key_r }[2];
my $trial_1 = $start_r - $end_c;
my $trial_2 = $start_c - $end_r;
if ($trial_1 >= 0) {
push @sort_array, $trial_1;
push @pointer_array, $key_r;
next INNER;
}
elsif ($trial_2 >=0) {
push @sort_array, $trial_2;
push @pointer_array, $key_r;
next INNER;
}
else { next INNER; }
}
my @sorted_indexes = sort{ $sort_array[ $a ] <=> $sort_array[ $b ] } 0 .. $#sortedArray;
my $closest = $sort_array[ $sorted_indexes[ 0 ] ];
my $pointer = $pointer_array[ $sorted_indexes[ 0 ] ];
if ($closest < 50) {
print OUT join "\t",
$tf2{ $key_c }[0], $tf2{ $key_c }[1], 'tf2', $start_c, $end_c,
$tf2{ $key_c }[3], $tf2{ $key_c }[4], 'tf1', $pointer,
$tf1{ $pointer }[2], $tf1{ $pointer }[3], $tf1{ $pointer }[4];
}
else { next OUTER; }
}
## Close the output file
close OUT;
print STDOUT scalar localtime;
} ## cycle through again til done with all files
####
#!/usr/bin/perl -slw
use strict;
use integer;
use vars qw( %tf1 %tf2 );
## Establish an array of all chromosome files.
my @files = ("chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9",
"chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17",
"chr18", "chr19", "chr20", "chr21", "chr22", "chrX", "chrY");
## Loop through each file, calculate the overlaps, and output to file.
foreach (@files) {
print STDOUT scalar localtime;
open TF1, "<", "TF1.$_.gff" or die "Cannot open the TF1 hit file!"; ## tf1 hit file
open TF2, "<", "TF2.$_.gff" or die "Cannot open the TF2 hit file!"; ## tf2 hit file
open OUT, ">", "both.$_.gff" or die "Cannot open the output file!"; ## output file
while () {
chomp;
my ($location, undef, $type, $start, $end, $score, $strand, undef) = split "\t"; ## Parse
my ($chr, $location_numbers) = split "_", $location; ## Get the chromosome
my ($target_start, $target_end) = split "-", $location_numbers; ## Get the start site for the 10kb sequence "blasted" against
my $new_start = $target_start + $start; ## Use this for true genomic location
my $new_end = $target_start + $end; ## Use thie for true genomic location
## Build a hash and put the data inside;
$tf1{ $new_start }[0] = $chr;
$tf1{ $new_start }[1] = $type; ## Transcription factor
$tf1{ $new_start }[2] = $new_end;
$tf1{ $new_start }[3] = $score; ## Alignment score from PWM blast
$tf1{ $new_start }[4] = $strand; ## Strand where the match is found
}
while () {
chomp;
my ($location, undef, $type, $start, $end, $score, $strand, undef) = split "\t"; ## Parse
my ($chr, $location_numbers) = split "_", $location; ## Get the chromosome
my ($target_start, $target_end) = split "-", $location_numbers; ## Get the start site for the 10kb sequence "blasted" against
my $new_start = $target_start + $start; ## Use this for true genomic location
my $new_end = $target_start + $end; ## Use thie for true genomic location
## Build a hash and put the data inside;
$tf2{ $new_start }[0] = $chr;
$tf2{ $new_start }[1] = $type; ## Transcription factor
$tf2{ $new_start }[2] = $new_end;
$tf2{ $new_start }[3] = $score; ## Alignment score from PWM blast
$tf2{ $new_start }[4] = $strand; ## Strand where the match is found
}
## Close the unused files
close TF1;
close TF2;
foreach my $key_c ( keys %tf2 ) {
my( $best, $best_key ) = ( 1e99, '' );
my $start_c = $key_c;
my $end_c = $tf2{ $key_c }[2];
foreach my $key_r ( keys %tf1) {
my $start_r = $key_r;
my $end_r = $tf1{ $key_r }[2];
my $trial_1 = $start_r - $end_c;
my $trial_2 = $start_c - $end_r;
if ($trial_1 >= 0) {
if( $trial_1 < $best ) {
$best = $trial_1;
$best_key = $key_r;
}
}
elsif ($trial_2 >=0) {
if( $trial_2 < $best ) {
$best = $trial_2;
$best_key = $key_r;
}
}
}
if( $best < 50 ) {
print OUT join "\t",
$tf2{ $key_c }[0], $tf2{ $key_c }[1], 'tf2', $start_c, $end_c,
$tf2{ $key_c }[3], $tf2{ $key_c }[4], 'tf1', $best_key,
$tf1{ $best_key}[2], $tf1{ $best_key}[3], $tf1{ $best_key}[4];
}
}
## Close the output file
close OUT;
print STDOUT scalar localtime;
} ## cycle through again til done with all files