... my $docsProcessed; my $size = scalar (@{$arrayDocs}); for (my $i = 0; $i < ($size - 1); $i++) { # next if already processed next if (defined $docsProcessed->{$arrayDocs->[$i]}); for (my $j = $i + 1; $j < ($size - 1); $j++) { next if (defined $docsProcessed->{$arrayDocs->[$j]}); $similarity = similarity($arrayDocs->[$i], $arrayDocs->[$j], $self->{THRESHOLD}); if ($similarity >= $self->{THRESHOLD}) { # Add the processed document into the cluster push (@{$clusters->{$arrayDocs->[$i]}}, $arrayDocs->[$j]); # Add the document to the processed hash $docsProcessed->{$arrayDocs->[$j]} = 1; } } } ...