#!/usr/bin/perl use Compress::Zlib 'compress'; # Usage: $arrayref = similarity( LIST) # Returns: AoA reference to string similarity table for LIST sub similarity { my (%single, @ret) = map {$_ => length compress $_} @_; for my $this (@_) { push @ret, [ map { (length compress $this . $_) / ($single{$this} + $single{$_}) } @_ ]; } \@ret; } my @titles = ( q(The Last Public Hanging In Old West Virginia - Flatt and Scruggs), q(Flatt_and_Scruggs__The_Last_Public_Hanging_In_Old_West_Virginia), q(Rainy Day Woman Number 12 and 35 - Flatt and Scruggs), q(Rainy Day Woman Number Twelve and Thirty-five - Bob Dylan), ); my $results = similarity @titles; for my $this (@$results) { print pack('A6' x @$this, map {sprintf '%4.3f', $_} @$this), $/; } __END__ 0.529 0.715 0.784 0.841 0.708 0.529 0.887 0.870 0.784 0.863 0.536 0.748 0.848 0.863 0.739 0.532