#!/usr/bin/perl use strict; use warnings; my $microsats = $ARGV[0]; my $orths = $ARGV[1]; open (MIC,"<$microsats") or die "Cannot open file $microsats: $!"; open (ORTHS,">$orths") or die "Cannot open file $orths: $!"; my %starthash=(); my %preused = (); my $startcord = 5; my $endcord = 7; # example of input line: # "NA1182988952620.b.scf dinucleotide AG 555 65 6000 : 6002 a-g" while (my $line = ){ chomp $line; my @fields = split(/\t/,$line); push @{$starthash{$fields[$startcord]}} , $line; } while (my $line = ){ chomp $line; next if exists $preused{$line}; $preused{$line} = 1; my @fields = split(/\t/,$line); my @finalstatement = (); push @finalstatement, $line; my $searchstart = $fields[$startcord]-1; my $searchend = $fields[$endcord]+1; my $printer =0; $printer = 1 if $searchstart <= 1297 && 1297 <= $searchend; # turning $printer on at a line that is having the stated problem for (my $i = $searchstart; $i<= $searchend; $i++){ if (exists $starthash{$i}){ my @orthologous = @{$starthash{$i}}; delete $starthash{$i}; foreach my $single (@orthologous){ next if exists $preused{$single}; $preused{$single} = 1; push @finalstatement, $single; my @sields = split("\t",$single); $searchend = $sields[$endcord] + 1 if ($sields[$endcord] + 1) > $searchend; $i = $sields[$startcord] - 2 if ($sields[$startcord] - 1) < $searchstart; $searchstart = $sields[$startcord] - 1 if ($sields[$startcord] - 1) < $searchstart; } } } my $final = join("\t", @finalstatement)."\n"; print ORTHS $final; }