$seq =~ s{ ([^U]) (?# anything other than U can be the "bracketing" character) (U+) (?# match one or more Us) (?=\1) (?# followed by the bracketing character again) }{ # replace the bracketing character and the Us that follow it # with an equal number of copies of the bracketing character $1 x (1 + length($2)) }xeg; # replace all embedded sequences of Us in one go #### $seq =~ s{^(U+)}{"I" x length($1)}eg; $seq =~ s{(U+)$}{"O" x length($1)}eg;