pwy nodes A a b c d e f B a b c #### de A ef A ab B ac B bc B #### my $in=$ARGV[0] || "pathways.col"; open (IN,$in) or die "cannot open $in\n"; my %HoCplx2ID; my %HoPwyPair; while (my $lines=){ next if ($lines =~/^#/); next if ($lines =~/^UNIQUE-ID/); chomp $lines; my @cols=split(/\t/,$lines); my $cmplxID=$cols[0]; #print $cmplxID."\n"; my $cmplxNm=$cols[1]; my @restCols=@cols[2..$#cols]; my @cycIDs=grep(/^GCXG-/, @restCols); @cycIDs=grep($_ ne '',@cycIDs); print "cycIDs array\n"; print Dumper(@cycIDs); my $pwySize=scalar(@cycIDs); push (@{$HoCplx2ID{$cmplxID}},@cycIDs); for (my $i=0; $i < ($pwySize-1); $i++){ my $pair =join("\t",$cycIDs[$i],$cycIDs[$i+1]); $HoPwyPair{$pair}{$cmplxID}=$pwySize; } } close(IN); ########## print out pairwise with PA01 locusIDs ###### my $org=$ARGV[1]|| "PA01"; my $outfile="$in.$org.pairwise.nxtNeighb.tab"; #open (OUT,">",$outfile); ### step 1 for each pair find smallest pathway my %HoSmPwy; foreach my $pair (keys %HoPwyPair){ $HoSmPwy{$pair}=100; foreach my $pwy (keys %{$HoPwyPair{$pair}}){ if ($HoPwyPair{$pair}{$pwy} < $HoSmPwy{$pair}) { $HoSmPwy{$pair}=$HoPwyPair{$pair}{$pwy}; } } } print "hash of smallest pathways\n"; #print Dumper(%HoSmPwy); ### step 2 for each pathway, look at each pair if that pwy size = smallest pathway , then print ## print "output\n"; foreach my $pwy (keys(%HoCplx2ID)){ my @units=@{$HoCplx2ID{$pwy}}; my $pwySize=scalar(@units); for (my $i=0; $i < ($pwySize-1); $i++){ my $pair =join("\t",$units[$i],$units[$i+1]); if ($pwySize = $HoSmPwy{$pair}) { # print $pair."\n"; } } } ####