#!/usr/bin/perl -w use strict; use warnings; my @LIST = qw(2E4H9A ACD1WB 10DLT4 EFC4Y0 1XAZ9B 3S6UDA AX79C2 CQAJ5F DGAK9F 0JQ0A9 1A9AP9 CH9FA3); my @SEARCHSTR = qw(*A 1* 3*A A*9C*2 *9C2); print "\n\n"; foreach my $s (@LIST) { print "\n"; foreach my $ss (@SEARCHSTR) { print( " $s==$ss ", CalcMatch($s, $ss)); } } exit; ################################################## # String | v2023.12.19 # This function compares two strings and returns # a number between 0 to 9 that shows how similar # they are. Zero means there are no similarities # at all, while 9 means the two match perfectly. # The comparison is not case sensitive. # The search string may contain asterisks which # will match a string of any length. # # Usage: INTEGER = CalcMatch(String, SearchString) # sub CalcMatch { defined $_[0] && defined $_[1] or return 1; my $SL = length($_[0]); my $FL = length($_[1]); $SL && $FL or return 1; my $STR = uc(shift); my $FIND = uc(shift); my $SAME = 9; my $COUNT = CountMatchingBytes($STR, $FIND) or return 0; my $SIMILAR = int($COUNT / length($STR) * ($SAME - 1)); $FIND =~ tr|*||s; # Delete double asterisks my $ACOUNT = $FIND =~ tr|*|*|; # Count asterisks if ($ACOUNT == 0) { return $STR eq $FIND ? $SAME : $SIMILAR; } if ($ACOUNT == length($FIND)) { return $SAME; } # Locate first and last asterisk my $FIRST = index($FIND, '*'); my $LAST = ($ACOUNT > 1) ? rindex($FIND, '*') : $FIRST; # Here we're going to "cut" both strings into three parts: # 1) the part before first asterisk # 2) the part between two asterisks # 3) the part after last asterisk # And then we're going to compare each except the middle one, # because we just do a search on the middle string. # Okay, so let's compare the first section # IF the pattern doesn't start with an asterisk: if ($FIRST > 0) { substr($STR, 0, $FIRST) eq substr($FIND, 0, $FIRST) or return $SIMILAR; } # Let's compare the last section # IF the pattern doesn't end with an asterisk. if ($LAST < length($FIND) - 1) { my $LEN = length($FIND) - $LAST - 1; substr($STR, -$LEN) eq substr($FIND, -$LEN) or return $SIMILAR; } # Now, the part between the two asterisks must appear # somewhere in the middle of the string. if ($ACOUNT > 1 && $LAST > $FIRST) { my $MIDLEN = length($STR) - $FIRST - (length($FIND) - $LAST) + 1; return index(substr($STR, $FIRST, $MIDLEN), substr($FIND, $FIRST+1, $LAST - $FIRST - 1)) < 0 ? $SIMILAR : $SAME; } # More than 2 asterisk are not supported. if ($ACOUNT > 2) { return $SIMILAR; # I just noticed that this line will probably not get executed ever. lol } return $SAME; } ################################################## # String | v2023.12.19 # This function counts how many of the characters in # STR1 match characters listed in STR2. # # Usage: COUNT = CountMatchingBytes(STR1, STR2) # sub CountMatchingBytes { defined $_[0] && defined $_[1] or return 0; my $P = length($_[0]) or return 0; my $C = 0; length($_[1]) or return 0; while ($P--) { index($_[1], substr($_[0], $P, 1)) < 0 or $C++; } return $C; }