For completeness, here is the answers above with the use of Test::More

use utf8 is needed because of the accented characters in the text in the bottom part

use strict; use warnings; use utf8; use Test::More; use open OUT => ':encoding(utf8)'; use open ':std'; my $pos=1; while ( my $t = <DATA> ) { chomp $t; #printf "orig: %-30s translated: %s\n", $t, translate_yeli2($t); my @data = split/\s\*\*\s/, $t; my $res = translate_eily2($data[0]); # die $data[1]; ok ($res eq $data[1], "*" . $data[0]. "* -> *" . $res . "* / *". $d +ata[1]. "* " . $pos++); } done_testing; sub translate { my $str = shift; $str =~ tr/-/ /; #replace - with a space $str =~ tr/a-zA-Z/ /cs; #replace non letter with a space my @words = split( /\s+/, $str ); foreach my $w (@words) { #insert a space when a upper case is inside a word if ( $w =~ /\p{isLower}\p{isUpper}/ ) { my @all; while ( $w =~ m/\G(\p{isUpper}*\p{isLower}+)/g ) { push @all, $1; } $w = join( " ", @all ); } else { $w = ucfirst( lc($w) ); # we are using side effect of fore +ach loop } } return join( ' ', @words ); } sub translate_eily { my $name = shift; $name =~ s/ [^a-zA-Z]+ # Non letter chars | # (?<= [a-zA-Z] ) # Something that comes after a let +ter (?= [A-Z] ) # and comes before an uppercase le +tter / /xg; # \u is short for ucfirst and \L for lc $name =~ s/(\w+)/\u\L$1/g; $name =~s/\s+$//g; return $name; } sub translate_hippo { my $str = shift; $str =~ tr/a-zA-Z/ /cs; #replace non letter with a space my @words = split( /\s+/, $str ); foreach my $w (@words) { #insert a space when a upper case is inside a word $w =~ s/(\p{isLower})(\p{isUpper})/$1 $2/g or $w = ucfirst( lc($w) ); } return join( ' ', @words ); } sub translate_choroba { my ($str) = @_; $str =~ tr/-/ /; #$str =~ tr/a-zA-Z/ /cs; $str =~ tr/a-zA-Z/ /cs; #replace non letter with a space $str =~ s/(?<=\p{isLower})(?=\p{isUpper})/ /g; $str =~ s/(?:(?<=\s)|(?<=^))(\p{isLower})/\u$1/g; $str =~ s/\s+$//r; } sub translate_eily2 { my $name = shift; $name =~ s/ \P{isLetter}+ # Non letter chars | # or (?<= \p{isLetter} ) # Something that comes after a + letter (?= \p{isUpper} ) # and comes before an upperc +ase letter / /xg; # print "after s: *$name*\n"; # \u is short for ucfirst and \L for lc $name =~ s/(\w+)/\u\L$1/g; $name =~s/\s+$//g; return $name; } __DATA__ Acierno James S., Jr. ** Acierno James S Jr Ackermann-Hirschi L. ** Ackermann Hirschi L Alba-Castro Jose-Luis ** Alba Castro Jose Luis Boulangère Françoise ** Boulangère Françoise AlconadaVerzini M. J. ** Alconada Verzini M J AmorDosSantos S. P ** Amor Dos Santos S P da Costa F. Barreiro Guimaraes ** Da Costa F Barreiro Guimaraes deRenstrom P. A. Bruckman ** De Renstrom P A Bruckman Fauccigiannelli M. ** Fauccigiannelli M FaucciGiannelli M. ** Faucci Giannelli M Yao W-M ** Yao W M Yao W-M. ** Yao W M Yao W. -M ** Yao W M Yao W. -M. ** Yao W M

In reply to Re^4: regex: help for improvement by frazap
in thread regex: help for improvement by frazap

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post, it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.