#!/usr/bin/perl -slw use strict; use integer; use vars qw( %tf1 %tf2 ); ## Establish an array of all chromosome files. my @files = ("chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", "chr22", "chrX", "chrY"); ## Loop through each file, calculate the overlaps, and output to file. foreach (@files) { print STDOUT scalar localtime; open TF1, "<", "TF1.$_.gff" or die "Cannot open the TF1 hit file!"; ## tf1 hit file open TF2, "<", "TF2.$_.gff" or die "Cannot open the TF2 hit file!"; ## tf2 hit file open OUT, ">", "both.$_.gff" or die "Cannot open the output file!"; ## output file while () { chomp; my ($location, undef, $type, $start, $end, $score, $strand, undef) = split "\t"; ## Parse my ($chr, $location_numbers) = split "_", $location; ## Get the chromosome my ($target_start, $target_end) = split "-", $location_numbers; ## Get the start site for the 10kb sequence "blasted" against my $new_start = $target_start + $start; ## Use this for true genomic location my $new_end = $target_start + $end; ## Use thie for true genomic location ## Build a hash and put the data inside; $tf1{ $new_start }[0] = $chr; $tf1{ $new_start }[1] = $type; ## Transcription factor $tf1{ $new_start }[2] = $new_end; $tf1{ $new_start }[3] = $score; ## Alignment score from PWM blast $tf1{ $new_start }[4] = $strand; ## Strand where the match is found } while () { chomp; my ($location, undef, $type, $start, $end, $score, $strand, undef) = split "\t"; ## Parse my ($chr, $location_numbers) = split "_", $location; ## Get the chromosome my ($target_start, $target_end) = split "-", $location_numbers; ## Get the start site for the 10kb sequence "blasted" against my $new_start = $target_start + $start; ## Use this for true genomic location my $new_end = $target_start + $end; ## Use thie for true genomic location ## Build a hash and put the data inside; $tf2{ $new_start }[0] = $chr; $tf2{ $new_start }[1] = $type; ## Transcription factor $tf2{ $new_start }[2] = $new_end; $tf2{ $new_start }[3] = $score; ## Alignment score from PWM blast $tf2{ $new_start }[4] = $strand; ## Strand where the match is found } ## Close the unused files close TF1; close TF2; foreach my $key_c ( keys %tf2 ) { my( $best, $best_key ) = ( 1e99, '' ); my $start_c = $key_c; my $end_c = $tf2{ $key_c }[2]; foreach my $key_r ( keys %tf1) { my $start_r = $key_r; my $end_r = $tf1{ $key_r }[2]; my $trial_1 = $start_r - $end_c; my $trial_2 = $start_c - $end_r; if ($trial_1 >= 0) { if( $trial_1 < $best ) { $best = $trial_1; $best_key = $key_r; } } elsif ($trial_2 >=0) { if( $trial_2 < $best ) { $best = $trial_2; $best_key = $key_r; } } } if( $best < 50 ) { print OUT join "\t", $tf2{ $key_c }[0], $tf2{ $key_c }[1], 'tf2', $start_c, $end_c, $tf2{ $key_c }[3], $tf2{ $key_c }[4], 'tf1', $best_key, $tf1{ $best_key}[2], $tf1{ $best_key}[3], $tf1{ $best_key}[4]; } } ## Close the output file close OUT; print STDOUT scalar localtime; } ## cycle through again til done with all files