#!/usr/bin/perl use strict; use warnings; no warnings 'uninitialized'; use XML::Rules; my $parser = XML::Rules->new( style => 'filter', # we want to filter (modify) the XML, not extract data rules => { _default => 'raw', # we want to copy most tags intact, including the whitespace in and around them # the data of the tags will end up in the _content pseudoattribute of the parent tag 'category,subCategory,code' => 'raw extended', # these three we need not only to copy, but also made easier to access. # The "raw extended" rule causes the data of that tag to be available in the hash of the parent tag # also as ":category", ":subCategory" and ":code" so you do not have to search through the _content array 'ResultItem' => 'as array', # we expect several tags and want to store the data of each in an array . # the array will be accessible using the 'ResultItem' key in the hash containing the data of the parent tag 'results' => sub { my ($tag,$attrs) = @_; # this is the Perl way to assign names to subroutine/function parameters # this subroutine is called whenever the ... is fully parsed and the rules # specified for the child tags evaluated. if ($attrs->{ResultItem} and @{$attrs->{ResultItem}} > 1) { # if there are any tags and there's more than one @{$attrs->{ResultItem}} = sort { # sort allows you to specify the code to be used to compare the items to sort # the items are made available as $a and $b to the code. # in this case the $a and $b are hashes created by processing the child tags of the tags. $a->{':category'}{_content} cmp $b->{':category'}{_content} or $a->{':subCategory'}{_content} cmp $b->{':subCategory'}{_content} or $a->{':code'}{_content} cmp $b->{':code'}{_content} } @{$attrs->{ResultItem}}; } $attrs->{_content} =~ s/^\s+// if (!ref $attrs->{_content}); # remove the accumulated whitespace that was present between the tags return [$tag => $attrs] } } ); $parser->filterfile("test.msg", "test-result.msg");