Ms Foo Mr Bar Mss Toto Dr Tata #### Ms Foo Mr Bar Mss Toto Dr Tata #### my %change=( 'CommitteeList' => 'person', # CommitteeList tag should become person 'Ch_Chair' => 'person[officer]'); # Ch_Chair tag should become person too # but consecutive officers should then be # wrapped in an officer tag my %wrap= ( officers => 'person[officer]+', # this defines the wrapping perslist => 'officers?, person+'); # then we wrap the whole list in a perslist tag my @wrap=( 'officers', 'perslist'); # to process them in the proper order #### my %wrapper; # stores the regular expression generated from %wrap local $/=""; # I have previously inserted those tags so I can split my file in independant chunks while(my $bit= <$infile>) { # the chunk tag is removed here, amongst other irrelevant things foreach my $tag ( keys %change) { change_tag( $bit, $tag, $change{$tag}); } # looks easy isn't it? foreach my $tag (@wrap) { wrap( $bit, $tag, $wrap{$tag}); } # easy too! # remove the extra er attributes (amongst other things) here print $bit; # spit it out } # this one is easy # change source_tag into target_tag, adding an er attribute if specified sub change_tag($$$) { my $source_tag = $_[1] || warn "no source_tag"; my $target_tag= $_[2] || warn "no target_tag"; my $target_att=''; # check if an attribute should be included (in brackets) if( $target_tag=~ /(\w+)\[(\w+)\]/) { $target_tag=$1; $target_att=$2 }; # replace opening tags, existing attributes are untouched if( $target_att) { $_[0]=~ s{<$source_tag\b} {<$target_tag er="$target_att"}gs; } else { $_[0]=~ s{<$source_tag\b} {<$target_tag}gs; } # replace end tags $_[0]=~ s{} {}gs; } # this one is a little scarier (especially the last line!) sub wrap($$$) { my $tag= $_[1]; my $expr=$_[2]; $wrapper{$tag}||= make_wrapper( $expr, $tag); &{$wrapper{$tag}}; # Gee, this looks weird! Did I unknowingly use the # "call this function with the same @_" trick? I guess so } # this one is the one that does the real work sub make_wrapper($$) { my( $expr, $tag)= @_; my $att= ''; my $subr; # figure out whether an attribute should be included if( $tag=~ /(\w+)\[(\w+)\]/) { $tag=$1; $att=$2 }; # build regexp from the nicer syntax $expr=~ s{(\w+)\b(?![\[\]])}{(<$1.*?\\\s*)}g; # no attribute given $expr=~ s{(\w+)\[(\w+)\]}{(<$1 er=\"$2\".*?\\\s*)}g; # attribute given $expr=~ s{,\s*} {\\\s*}g; # now build the wrapper subroutine, replacing the expression by the tag if( $att) { $subr= "{ ".'$_[0]'."=~ s{($expr)}{<$tag er=\"$att\">\n".'$1'."\n}sgo;} "; } else { $subr= "{ ".'$_[0]'."=~ s{($expr)}{<$tag>\n".'$1'."\n}sgo;} "; } return eval "sub { $subr }"; }