$len = length($field);
$field1 = substr($field, 0, int($len/2));
$field2 = substr($field, -int($len/2));
$field = $field1 if ($field1 eq $field2);
####
$field =~ s/^(.+)\s*\1/$1/;
####
$field =~ s/(.{2,})\s*\1/$1/g;
####
$field =~ s/\b(.+)\b\s*\1\b/$1/g;
####
sub test1 {
my $len = length($_[0]);
my $part1 = substr($_[0], 0, int($len/2));
my $part2 = substr($_[0], -int($len/2));
$_[0] = $part1 if ($part1 eq $part2);
}
sub test2 {
$_[0] =~ s/^(.+)\s*\1/$1/;
}
sub test3 {
$_[0] =~ s/(.{2,})\s*\1/$1/g;
}
sub test4 {
$_[0] =~ s/\b(.+)\b\s*\1\b/$1/g;
}
foreach $test (qw( test1 test2 test3 test4 )) {
print($test, "\n");
foreach (
'John SmithJohn Smith',
'John Smith John Smith',
'John Smith John Smith',
'foo John Smith John Smith bar',
'John Johnson',
'foo John Johnson bar',
'John Smith!John Smith',
) {
my $field = $_;
&$test($field);
print($field, "\n");
}
print("\n");
}
__END__
output
======
test1
John Smith
John Smith
John Smith John Smith <-- case not covererd
foo John Smith John Smith bar <-- case not covererd
John Johnson
foo John Johnson bar <-- case not covererd
John Smith <-- slightly buggy
test2
John Smith
John Smith
John Smith
foo John Smith John Smith bar <-- case not covererd
Johnson <-- buggy
foo John Johnson bar <-- case not covererd
John Smith!John Smith
test3
John Smith
John Smith
John Smith
foo John Smith bar
Johnson <-- buggy
foo Johnson bar <-- buggy
John Smith!John Smith
test4
John Smith
John Smith
John Smith
foo John Smith bar
John Johnson
foo John Johnson bar
John Smith!John Smith