sub test1 { my $len = length($_[0]); my $part1 = substr($_[0], 0, int($len/2)); my $part2 = substr($_[0], -int($len/2)); $_[0] = $part1 if ($part1 eq $part2); } sub test2 { $_[0] =~ s/^(.+)\s*\1/$1/; } sub test3 { $_[0] =~ s/(.{2,})\s*\1/$1/g; } sub test4 { $_[0] =~ s/\b(.+)\b\s*\1\b/$1/g; } foreach $test (qw( test1 test2 test3 test4 )) { print($test, "\n"); foreach ( 'John SmithJohn Smith', 'John Smith John Smith', 'John Smith John Smith', 'foo John Smith John Smith bar', 'John Johnson', 'foo John Johnson bar', 'John Smith!John Smith', ) { my $field = $_; &$test($field); print($field, "\n"); } print("\n"); } __END__ output ====== test1 John Smith John Smith John Smith John Smith <-- case not covererd foo John Smith John Smith bar <-- case not covererd John Johnson foo John Johnson bar <-- case not covererd John Smith <-- slightly buggy test2 John Smith John Smith John Smith foo John Smith John Smith bar <-- case not covererd Johnson <-- buggy foo John Johnson bar <-- case not covererd John Smith!John Smith test3 John Smith John Smith John Smith foo John Smith bar Johnson <-- buggy foo Johnson bar <-- buggy John Smith!John Smith test4 John Smith John Smith John Smith foo John Smith bar John Johnson foo John Johnson bar John Smith!John Smith