#!/usr/bin/perl use strict; use warnings; my @clusters; chomp (my @lines = ); foreach my $line (0..$#lines) { my @ids = $lines[$line] =~ m/\.(\S+)/g; my $in_cluster; foreach my $cluster (0..$#clusters) { if ( map { my $id = $_; grep { $id eq $_ } @ids } @{$clusters[$cluster]->{ids}} ) { if(defined($in_cluster)) { push(@{$clusters[$in_cluster]->{lines}}, @{$clusters[$cluster]->{lines}}); push(@{$clusters[$in_cluster]->{ids}}, @{$clusters[$cluster]->{ids}}); splice(@clusters,$cluster,1); last; } else { $in_cluster = $cluster; push(@{$clusters[$cluster]->{lines}}, $line); push(@{$clusters[$cluster]->{ids}}, @ids); } } } unless(defined($in_cluster)) { my $cluster = {}; push(@{$cluster->{lines}}, $line); push(@{$cluster->{ids}}, @ids); push(@clusters, $cluster); } } foreach my $cluster (0..$#clusters) { print "Cluster number " . ($cluster + 1) . ":\n"; foreach my $line (sort { $a <=> $b } @{$clusters[$cluster]->{lines}}) { print "\t$lines[$line]\n"; } print "\n"; } exit(0); __DATA__ ID5141.C1665 ID5141.C2448 ID5141.C1253 ID5144.C2039 ID5141.C1596 ID5144.C1956 ID5141.C1906 ID5144.C2149 ID5141.C1221 ID5144.C1956 ID5141.C2149 ID5141.C2386 ID5141.C2039 ID5142.C1221 ID5141.C5887 ID5141.C7685 ID5141.C1005 ID5142.C2808 ID5141.C1046 ID5141.C1596 ID5141.C2386 ID5141.C4990 ID5141.C7685 ID5141.C4888