$seq =~ s{ ([^U]) (?# anything other than U can be the "bracketing" character) (U+) (?# match one or more Us) (?=\1) (?# followed by the bracketing character again) }{ # replace the bracketing character and the Us that follow it # with an equal number of copies of the bracketing character $1 x (1 + length($2)) }xeg; # replace all embedded sequences of Us in one go