use Regexp::NamedCaptures;
@files = glob "*.xml";
undef $/;
for $file (@files) {
$indent = 0;
open FILE, $file or die "Couldn't open $file for reading: $!";
$_ = readline *FILE;
close FILE or die "Couldn't close $file: $!";
# Remove whitespace between > and < if that is the only thing separating
# them
s/(?<=>)\s+(?=<)//g;
# Indent
s{ # Capture a tag <$close_tag$name$empty_tag>,
# a potential closing slash $close_tag
# the contents $name
# a potential closing slash $empty_tag
<(?<\$close_tag>/?)(?<\$name>[^/>]+)(?<\$empty_tag>/?)>
# Optional white space
\s*
# Optional tag.
# $4 contains either undef, "<" or ""
(?=(?<\$next_tag_start>?))?
}
{
# Adjust the indentation level.
# $3: A tag. No alteration to indentation.
# $1: A closing tag. Drop one indentation level
# else: An opening tag. Increase one indentation level
$indent +=
$empty_tag ? 0 :
$close_tag ? -1 :
1;
# Put the captured tag back into place
"<$close_tag$name$empty_tag>" .
# Two closing tags in a row. Add a newline and indent the next line
($close_tag and ($next_tag_start eq "") ?
"\n" . (" " x $indent) :
# This isn't a closing tag but the next tag is. Add a newline and
# indent the next line.
$next_tag_start ?
"\n" . (" " x $indent) :
# This isn't a closing tag - no special indentation. I forget why
# this works.
""
)
# /g repeat as necessary
# /e Execute the block of perl code to create replacement text
# /x Allow whitespace and comments in the regex
}gex;
open FILE, ">", $file or die "Couldn't open $file for writing: $!";
print FILE or die "Couldn't write to $file: $!";
close FILE or die "Couldn't close $file: $!";
}