#!/usr/bin/perl $/ = "\n"; # let's read whole structures while (<>) { my $chk = $_; # make a copy that we can muck with $chk =~ s{}{}g; # remove known "good tags" patterns my $prob = ( $chk =~ /[<>]/ ) ? 'stray angle bracket(s)' : ''; $chk =~ s{\&\w+\;}{}g; # remove known "good entities" $prob .= ' stray ampersand(s)' if ( $chk =~ /\&/ ); print "Record $. has $prob:\n$_" if $prob; } #### # perl 1-liner to output one "tag" per line: perl -pe 's{^.*?<}{}; s{>[^<]*}{>\n}g;' file.xml | sort | uniq -c