#!/usr/bin/perl use strict; use warnings; my %lookup_ids_by_line; my %lookup_lines_by_id; chomp (my @lines = ); foreach my $line (@lines) { my ($id1, $id2) = $line =~ m/\.(\S+)/g; push(@{$lookup_ids_by_line{$line}}, $id1, $id2); push(@{$lookup_lines_by_id{$id1}}, $line); push(@{$lookup_lines_by_id{$id2}}, $line); } my $cluster_number = 0; foreach my $line (@lines) { next unless(exists($lookup_ids_by_line{$line})); $cluster_number++; my @lines_in_cluster = ($line); my @ids_to_check = @{$lookup_ids_by_line{$line}}; delete($lookup_ids_by_line{$line}); while(@ids_to_check) { my $id = pop(@ids_to_check); next unless(exists($lookup_lines_by_id{$id})); foreach my $line (@{$lookup_lines_by_id{$id}}) { if(exists($lookup_ids_by_line{$line})) { push(@lines_in_cluster, $line); push(@ids_to_check, @{$lookup_ids_by_line{$line}}); delete($lookup_ids_by_line{$line}); } } delete($lookup_lines_by_id{$id}); } print "cluster number $cluster_number:\n\t" . join("\n\t",@lines_in_cluster) . "\n"; } __DATA__ ID5141.C1665 ID5141.C2448 ID5141.C1253 ID5144.C2039 ID5141.C1596 ID5144.C1956 ID5141.C1906 ID5144.C2149 ID5141.C1221 ID5144.C1956 ID5141.C2149 ID5141.C2386 ID5141.C2039 ID5142.C1221 ID5141.C5887 ID5141.C7685 ID5141.C1005 ID5142.C2808 ID5141.C1046 ID5141.C1596 ID5141.C2386 ID5141.C4990 ID5141.C7685 ID5141.C4888