#!/usr/bin/perl -w use strict; use XML::Parser; my $file= 'crap.xml'; my $fixes=0; my @tags; # stack of tags used to figure out the last non closed tag my $p= XML::Parser->new( Handlers => { Start => sub { push @tags, $_[1]; }, End => sub { pop @tags; }, }, ErrorContext => 1, ); my( $error, $last_error); do { $last_error= $error||''; undef $@; eval{ $p->parsefile( $file); }; #warn "error: $@ => close $tags[-1]\n" if( $@ && ($@ ne $last_error)); if( $@=~ m{^\s*mismatched tag at line (\d+), column (\d+)}) { close_tag( $file, $tags[-1], $1, $2); $fixes++; } # you could add other types of fixes below } until( !$@ || ($@ eq $last_error)); if( $@) { print "could not fix the file: $@\n"; } else { print "success! ($fixes tags fixed)\n"; } sub close_tag { my( $file, $tag, $line, $column)= @_; my $temp= "crap.new"; open( my $in, '<', $file) or die "cannot open file (r) '$file': $!\n"; open( my $out, '>', $temp) or die "cannot open file (w) '$temp': $!\n"; # print the beginning of the file (untouched) for (1..$line-1) { print {$out} scalar <$in>; } # close the tag my $faulty_line=<$in>; # the reported column seems to be off by 3, but I suspect this might # vary depending on the xml prefix, so this looks safer my $real_column= rindex( $faulty_line, '<', $column) - 1; substr( $faulty_line, $real_column, 0)= "\n"; print {$out} $faulty_line; # finish printing while( <$in>) { print {$out} $_; } close $in; close $out; rename $temp, $file or die "cannot replace file '$file' by new version in '$temp'"; }