in reply to Finding all connected nodes in an all-against-all comparison
This is a somewhat tricky problem because later data may require that separate groups that were formed by earlier data need to be merged. Where the code is a little tricky it's worth being a little long winded, use explicit identifier names and comment each case. Consider:
use strict; use List::Util; my %nodes; my @groups; while (<DATA>) { chomp; my ($node1, $node2) = split; if (! exists $nodes{$node1} && ! exists $nodes{$node2}) { # New group $nodes{$node1} = @groups; $nodes{$node2} = @groups; push @groups, [$node1, $node2]; next; } if (! exists $nodes{$node1}) { # node1 is part of node2's group push @{$groups[$nodes{$node2}]}, $node1; $nodes{$node1} = $nodes{$node2}; } if (! exists $nodes{$node2}) { # node2 is part of node1's group push @{$groups[$nodes{$node1}]}, $node2; $nodes{$node2} = $nodes{$node1}; } next # Already met this pairing if $nodes{$node1} == $nodes{$node2}; # node1 and node2 are in different groups. Merge the groups my ($group, $nulGroup) = ($nodes{$node1}, $nodes{$node2}); push @{$groups[$group]}, @{$groups[$nulGroup]}; $nodes{$_} = $group for @{$groups[$nulGroup]}; $groups[$nulGroup] = undef; } @groups = grep {defined} @groups; for my $group (0 .. $#groups) { print 'Group', $group + 1, ': ', join (', ', @{$groups[$group]}), +"\n"; } __DATA__ Contig1 Contig2 Contig1 Contig3 Contig2 Contig1 Contig2 Contig3 Contig3 Contig1 Contig3 Contig2 Contig3 Contig4 Contig4 Contig3 Contig4 Contig5 Contig6 Contig7 Contig7 Contig6 Contig8 Contig9 Contig9 Contig10 Contig10 Contig8 Contig10 Contig11 Contig11 Contig10 C12 Contig11 C12 Contig5
Prints:
Group1: Contig6, Contig7 Group2: Contig8, Contig9, Contig10, Contig11, C12, Contig1, Contig2, C +ontig3, Contig4, Contig5
|
|---|