use strict; use warnings; my $str = 'Data < '; # this code only works if same name tags are never nested # in your XML-like samples. $str =~ s/^\s+//; my $sResult=''; while ($str =~ m{<(\w+)>((?:[^<]|<(?!/\1>))*)\s*}g) { my $tag = $1; my $innards = $2; $innards =~ s//>/; $sResult .= "<$tag>$innards"; } print STDERR "output: $sResult\n"; #### sub PolishHTML { my $str = shift; if ($AllowXHTML) { $str =~ s{(.*?)(&\w+;|&#\d+;|<\w[\w\d:\-]*(?:\s+\w[\w\d:\-]*(?:\s*=\s*(?:[^" '><\s]+|(?:'[^']*')+|(?:"[^"]*")+))?)*\s*/?>|||$)} {HTML::Entities::encode($1, '^\r\n\t !\#\$%\"\'-;=?-~').$2}gem; } else { $str =~ s{(.*?)(&\w+;|&#\d+;|<\w[\w\d:\-]*(?:\s+\w[\w\d:\-]*(?:\s*=\s*(?:[^" '><\s]+|(?:'[^']*')+|(?:"[^"]*")+))?)*\s*>|||$)} {HTML::Entities::encode($1, '^\r\n\t !\#\$%\"\'-;=?-~').$2}gem; } return $str; }