use Regexp::NamedCaptures; @files = glob "*.xml"; undef $/; for $file (@files) { $indent = 0; open FILE, $file or die "Couldn't open $file for reading: $!"; $_ = readline *FILE; close FILE or die "Couldn't close $file: $!"; # Remove whitespace between > and < if that is the only thing separating # them s/(?<=>)\s+(?=<)//g; # Indent s{ # Capture a tag <$close_tag$name$empty_tag>, # a potential closing slash $close_tag # the contents $name # a potential closing slash $empty_tag <(?<\$close_tag>/?)(?<\$name>[^/>]+)(?<\$empty_tag>/?)> # Optional white space \s* # Optional tag. # $4 contains either undef, "<" or " tag. No alteration to indentation. # $1: A closing tag. Drop one indentation level # else: An opening tag. Increase one indentation level $indent += $empty_tag ? 0 : $close_tag ? -1 : 1; # Put the captured tag back into place "<$close_tag$name$empty_tag>" . # Two closing tags in a row. Add a newline and indent the next line ($close_tag and ($next_tag_start eq "", $file or die "Couldn't open $file for writing: $!"; print FILE or die "Couldn't write to $file: $!"; close FILE or die "Couldn't close $file: $!"; }