#!/usr/bin/perl -w use strict; our (@window, @probe)=(); our @main = &open_file_main(); our @annot = &open_file_annot(); &outfile; # loop through the main data file OLC: foreach my $md (@main) { # remove newlines chomp $md; # pull out chromosome #, window start, end my ($main_chrom, $winl, $winr) = split(/\t/, $md); # put the window start, end into array for further processing @window = ($winl, $winr); # loop through the "annotation" file ILC: foreach my $ad (@annot) { # pull out the chromosome #, window start, end my ($an_chrom, undef, undef, $prol, $pror, undef, undef, undef, $mess) = split(/\t/, $ad); # make sure the chromosomes match to save processor time, skip if no match if ($main_chrom ne $an_chrom) {next ILC;} # get the gene name fromt the $mess variable my (undef, undef, $name) = split(/\;/, $mess); # load the window start sites for the individual probes @probe = ($prol, $pror); # call the range_finding sub to look for matches my $return = range_find(); if ($return eq 1) { # upon matching, print out the name of the gene along with the original values # and print OUTPUT "$name\t $md\n"; next OLC; } else {next ILC;} } } close OUTPUT; exit; sub open_file_main { # open the file, pull in the data print " What is the name of the ChIPOTle file\?"; chomp (my $file = ); open (FILE, $file) || die "Cannot open $!"; my @data = ; close FILE; return @data; } sub open_file_annot { # open the file, pull in the data print " What is the name of the annotation file\?"; chomp (my $file = ); open (FILE, $file) || die "Cannot open $!"; my @data = ; close FILE; return @data; } sub outfile { # open the outputfile open (OUTPUT, ">output.txt")|| die "Cannot open $!"; } sub calc_range { # simple sub to create the full window from the start and end sites my @peaks = @_; my @peak_range = ($peaks[0] .. $peaks[1]); return @peak_range; } sub range_find { # loop in loop to look for ANY overlap of the values; will be a true/false return my @range1 = &calc_range(@window); my @range2 = &calc_range(@probe); my $test = pop @range2; my $test2 = pop @range1; OLC2: foreach my $value1 (@range1) { # look to see if the windows don't overlap if ($test lt $window[0]) {return 0;} # look to see if the windows don't overlap elsif ($test2 lt $probe[0]) {return 0;} ILC2: foreach my $value2 (@range2) { if ( $value1 eq $value2) {return 1;} else {next ILC2;} } } return 0; } #### chr10 726178 726428 1.121753867297440e-012 1.160706397628387e-015 6 4.81000 chr10 4922028 4922428 2.163402534569952e-012 7.067018499345696e-014 9 4.43000 chr10 5126478 5127178 8.348017333255820e-013 8.243249541245406e-021 15 4.42000 chr10 14649778 14650028 2.090598548899472e-013 6.000728326245207e-018 6 5.04000 chr10 14651328 14651428 2.915017653920210e-012 1.890051416361198e-014 3 4.20000 #### chr1 NGS primary_transcript 4224 7502 -1 - . ID=00003; accession=BC063682; Name=FLJ25222; ncbi_gene_id=374666; synonyms=-; description=CXYorf1-related protein; url=http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db%3Dnucleotide%26cmd%3Dsearch%26term%3DBC063682 chr1 NGS transcription_start_site 7502 7502 -1 - . Parent=00003; accession=BC063682; Name=FLJ25222; ncbi_gene_id=374666; synonyms=-; description=CXYorf1-related protein chr1 NGS primary_transcript 4268 7438 -2 - . ID=00005; accession=BC073913; Name=MGC52000; ncbi_gene_id=375260; synonyms=CXYorf1|MGC104889|MGC111476|MGC117230|MGC90409; description=CXYorf1-related protein; url=http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db%3Dnucleotide%26cmd%3Dsearch%26term%3DBC073913 chr1 NGS transcription_start_site 7438 7438 -2 - . Parent=00005; accession=BC073913; Name=MGC52000; ncbi_gene_id=375260; synonyms=CXYorf1|MGC104889|MGC111476|MGC117230|MGC90409; description=CXYorf1-related protein chr1 NGS primary_transcript 4268 14754 -3 - . ID=00004; accession=BC048328; Name=MGC52000; ncbi_gene_id=375260; synonyms=CXYorf1|MGC104889|MGC111476|MGC117230|MGC90409; description=CXYorf1-related protein; url=http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db%3Dnucleotide%26cmd%3Dsearch%26term%3DBC048328 chr1 NGS transcription_start_site 14754 14754 -3 - . Parent=00004; accession=BC048328; Name=MGC52000; ncbi_gene_id=375260; synonyms=CXYorf1|MGC104889|MGC111476|MGC117230|MGC90409; description=CXYorf1-related protein chr1 NGS primary_transcript 4268 19697 -4 - . ID=00006; accession=BC110996; Name=MGC52000; ncbi_gene_id=375260; synonyms=CXYorf1|MGC104889|MGC111476|MGC117230|MGC90409; description=CXYorf1-related protein; url=http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db%3Dnucleotide%26cmd%3Dsearch%26term%3DBC110996