use strict; use warnings; my %overlays; open my $DATA, '<', 'PAIRS2Clean.DAT'; while (<$DATA>) { chomp; my @next = grep $_, split; while (@next > 1) { my $top = shift @next; $overlays{$top}{$next[0]} = 1; } } close $DATA; my @chains = buildChains (\%overlays); my %assdTops; # Top (group id) associated with a node # Run through chains and build groupings for my $chain (@chains) { my $top = shift @$chain; # Use first item in chain as group id my @priorTops = map {$assdTops{$_}} grep {exists $assdTops{$_}} @$chain; if (@priorTops) { # Found groups that can be amalgamated my %seen; # remove duplicates @priorTops = grep {! $seen{$_}++} @priorTops; # make sure old id node is included in super group $assdTops{$top} = $priorTops[0] unless $priorTops[0] == $top; # First item becomes super group id $top = shift @priorTops; # Merge nodes into super group for my $oldTop (@priorTops) { $assdTops{$_} = $top for grep {$assdTops{$_} == $oldTop} keys %assdTops; } } $assdTops{$_} = $top for @$chain; } my $done; # Find group ids that belong to another group until ($done) { $done = 1; for my $key (sort keys %assdTops) { next unless exists $assdTops{$assdTops{$key}} && $assdTops{$assdTops{$key}} != $assdTops{$key}; # Found nested node. Move it. $assdTops{$key} = $assdTops{$assdTops{$key}}; $done = 0; } } my %groups; # Build hash of group ids and group members $groups{$assdTops{$_}}{$_}++ > 1 and die "$_ is in more than one group" for keys %assdTops; # print groups for my $group (sort {$a <=> $b} keys %groups) { print join (', ', $group, sort {$a <=> $b} keys %{$groups{$group}}), "\n\n"; } sub buildChains { # Build chains my $overs = shift; my @chains; for my $top (sort {$a <=> $b} keys %$overs) { if (! exists $overlays{$top}) { push @chains, [$top]; next; } my @subChains = buildChains ($overlays{$top}); push @chains, [$top, @$_] for @subChains; } return @chains; }