#!/usr/bin/perl -slw use strict; use integer; use vars qw( %tf1 %tf2 ); ## Establish an array of all chromosome files. my @files = ("chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", "chr22", "chrX", "chrY"); ## Loop through each file, calculate the overlaps, and output to file. foreach (@files) { print STDOUT scalar localtime; open TF1, "<", "TF1.$_.gff" or die "Cannot open the TF1 hit file!"; ## tf1 hit file open TF2, "<", "TF2.$_.gff" or die "Cannot open the TF2 hit file!"; ## tf2 hit file open OUT, ">", "both.$_.gff" or die "Cannot open the output file!"; ## output file while () { chomp; my ($location, undef, $type, $start, $end, $score, $strand, undef) = split "\t"; ## Parse my ($chr, $location_numbers) = split "_", $location; ## Get the chromosome my ($target_start, $target_end) = split "-", $location_numbers; ## Get the start site for the 10kb sequence "blasted" against my $new_start = $target_start + $start; ## Use this for true genomic location my $new_end = $target_start + $end; ## Use thie for true genomic location ## Build a hash and put the data inside; $tf1{ $new_start }[0] = $chr; $tf1{ $new_start }[1] = $type; ## Transcription factor $tf1{ $new_start }[2] = $new_end; $tf1{ $new_start }[3] = $score; ## Alignment score from PWM blast $tf1{ $new_start }[4] = $strand; ## Strand where the match is found } while () { chomp; my ($location, undef, $type, $start, $end, $score, $strand, undef) = split "\t"; ## Parse my ($chr, $location_numbers) = split "_", $location; ## Get the chromosome my ($target_start, $target_end) = split "-", $location_numbers; ## Get the start site for the 10kb sequence "blasted" against my $new_start = $target_start + $start; ## Use this for true genomic location my $new_end = $target_start + $end; ## Use thie for true genomic location ## Build a hash and put the data inside; $tf2{ $new_start }[0] = $chr; $tf2{ $new_start }[1] = $type; ## Transcription factor $tf2{ $new_start }[2] = $new_end; $tf2{ $new_start }[3] = $score; ## Alignment score from PWM blast $tf2{ $new_start }[4] = $strand; ## Strand where the match is found } ## Close the unused files close TF1; close TF2; OUTER:foreach my $key_c (sort keys %tf2) { my @sort_array = (); my @pointer_array = (); my $start_c = $key_c; my $end_c = $tf2{ $key_c }[2]; INNER:foreach my $key_r (sort keys %tf1) { my $start_r = $key_r; my $end_r = $tf1{ $key_r }[2]; my $trial_1 = $start_r - $end_c; my $trial_2 = $start_c - $end_r; if ($trial_1 >= 0) { push @sort_array, $trial_1; push @pointer_array, $key_r; next INNER; } elsif ($trial_2 >=0) { push @sort_array, $trial_2; push @pointer_array, $key_r; next INNER; } else { next INNER; } } my @sorted_indexes = sort{ $sort_array[ $a ] <=> $sort_array[ $b ] } 0 .. $#sortedArray; my $closest = $sort_array[ $sorted_indexes[ 0 ] ]; my $pointer = $pointer_array[ $sorted_indexes[ 0 ] ]; if ($closest < 50) { print OUT join "\t", $tf2{ $key_c }[0], $tf2{ $key_c }[1], 'tf2', $start_c, $end_c, $tf2{ $key_c }[3], $tf2{ $key_c }[4], 'tf1', $pointer, $tf1{ $pointer }[2], $tf1{ $pointer }[3], $tf1{ $pointer }[4]; } else { next OUTER; } } ## Close the output file close OUT; print STDOUT scalar localtime; } ## cycle through again til done with all files