#!/usr/bin/perl -slw use strict; use integer; use vars qw( %tf1 %tf2 ); ## Establish an array of all chromosome files. my @files = ("chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", "chr22", "chrX", "chrY"); ## Loop through each file, calculate the overlaps, and output to file. foreach (@files) { print STDOUT scalar localtime; open TF1, "<", "TF1.$_.gff" or die "Cannot open the TF1 hit file!"; ## tf1 hit file open TF2, "<", "TF2.$_.gff" or die "Cannot open the TF2 hit file!"; ## tf2 hit file open OUT, ">", "both.$_.gff" or die "Cannot open the output file!"; ## output file while () { chomp; my ($location, undef, $type, $start, $end, $score, $strand, undef) = split "\t"; ## Parse my ($chr, $location_numbers) = split "_", $location; ## Get the chromosome my ($target_start, $target_end) = split "-", $location_numbers; ## Get the start site for the 10kb sequence "blasted" against my $new_start = $target_start + $start; ## Use this for true genomic location my $new_end = $target_start + $end; ## Use thie for true genomic location ## Build a hash and put the data inside; $tf1{ $new_start }[0] = $chr; $tf1{ $new_start }[1] = $type; ## Transcription factor $tf1{ $new_start }[2] = $new_end; $tf1{ $new_start }[3] = $score; ## Alignment score from PWM blast $tf1{ $new_start }[4] = $strand; ## Strand where the match is found } while () { chomp; my ($location, undef, $type, $start, $end, $score, $strand, undef) = split "\t"; ## Parse my ($chr, $location_numbers) = split "_", $location; ## Get the chromosome my ($target_start, $target_end) = split "-", $location_numbers; ## Get the start site for the 10kb sequence "blasted" against my $new_start = $target_start + $start; ## Use this for true genomic location my $new_end = $target_start + $end; ## Use thie for true genomic location ## Build a hash and put the data inside; $tf2{ $new_start }[0] = $chr; $tf2{ $new_start }[1] = $type; ## Transcription factor $tf2{ $new_start }[2] = $new_end; $tf2{ $new_start }[3] = $score; ## Alignment score from PWM blast $tf2{ $new_start }[4] = $strand; ## Strand where the match is found } ## Close the unused files close TF1; close TF2; OUTER:foreach my $key_c (sort keys %tf2) { my @sort_array = (); my @pointer_array = (); my $start_c = $key_c; my $end_c = $tf2{ $key_c }[2]; INNER:foreach my $key_r (sort keys %tf1) { my $start_r = $key_r; my $end_r = $tf1{ $key_r }[2]; my $trial_1 = $start_r - $end_c; my $trial_2 = $start_c - $end_r; if ($trial_1 >= 0) { push @sort_array, $trial_1; push @pointer_array, $key_r; next INNER; } elsif ($trial_2 >=0) { push @sort_array, $trial_2; push @pointer_array, $key_r; next INNER; } else { next INNER; } } my $array = \@sort_array; ## create array ref my $array_which = \@pointer_array; ## know which tf1 binding site my @closest = bubblesmart($array, $array_which); ## pipe to sub and get the closest value my $closest = shift @closest; my $pointer = shift @closest; if ($closest < 50) { print OUT "$tf2{ $key_c }[0]\t$tf2{ $key_c }[1]\ttf2\t$start_c\t$end_c\t$tf2{ $key_c }[3]\t$tf2{ $key_c }[4]", "tf1\t$pointer\t$tf1{ $pointer }[2]\t$tf1{ $pointer }[3]\t$tf1{ $pointer }[4]\t"; } else { next OUTER; } } ## Close the output file close OUT; print STDOUT scalar localtime; } ## cycle through again til done with all files ## Code taken from Mastering Algorithms with Perl, with a small modification sub bubblesmart { my $array=shift; my $array_which = shift; my $start = 0; ## The start index of the bubbling scan. my $ncomp = 0; ## The number of comparisons. my $nswap = 0; ## The number of swaps. my $i = $#$array; while (1) { my $new_start; ## The new start index of the bubbling scan. my $new_end = 0; ## The new end index of the bubbling scan. for (my $j = $start || 1; $j <= $i; $j++) { $ncomp++; if ($array->[ $j -1 ] gt $array->[ $j ]) { @$array[ $j, $j -1 ] = @$array[ $j - 1, $j ]; @$array_which[ $j, $j -1 ] = @$array_which[ $j - 1, $j ]; $nswap++; $new_end = $j - 1; $new_start = $j - 1 unless defined $new_start; } } last unless defined $new_start; ## No swaps: we're done. $i = $new_end; $start = $new_start; } my $return_value = pop @$array; my $return_point = pop @$array_which; #print STDOUT "bubblesmart: ", scalar @$array, " elements, $ncomp comparisons, $nswap swaps\n"; return $return_value, $return_point; }