in reply to Re^2: getting ancestors of element
in thread getting ancestors of element

XML::Simple folds (by default) on the name= attribute. At lines -3, -8 from the end you have two <members> lines with the same name="AttributeDesc". This fooled XML::Simple into treating the two blocks as the same, allowing the second instance to overwrite the first. By adding KeyAttr=>[] as an option to XMLin, this default behaviour is suppressed and the expected result is obtained.

use feature ":5.14"; use warnings FATAL => qw(all); use strict; use Data::Dump qw(dump pp); use XML::Simple; my $x = XMLin(<<'END', KeyAttr=>[]); <classes name="Panoply::AttributeDesc"> <all_members name="AttributeDesc" protection="public" scope="Panop +ly::AttributeDesc" virtualness="non_virtual"/> <all_members name="AttributeDesc" protection="public" scope="Panop +ly::AttributeDesc" virtualness="non_virtual"/> <all_members name="description" protection="public" scope="Panoply +::AttributeDesc" virtualness="non_virtual"/> <all_members name="name" protection="public" scope="Panoply::Attri +buteDesc" virtualness="non_virtual"/> <all_members name="value" protection="private" scope="Panoply::Att +ributeDesc" virtualness="non_virtual"/> <public_members> <members kind="variable" name="name" protection="public" static= +"no" type="std::string" virtualness="non_virtual"></members> <members kind="variable" name="value" protection="public" static +="no" type="std::string" virtualness="non_virtual"></members> <members kind="variable" name="description" protection="public" +static="no" type="std::string" virtualness="non_virtual"></members> </public_members> <public_methods> <members const="no" kind="function" name="AttributeDesc" protect +ion="public" static="no" virtualness="non_virtual" volatile="no"> <parameters declaration_name="name" type="const std::string &a +mp;"/> <parameters declaration_name="value" type="const std::string & +amp;"/> <parameters declaration_name="desc" default_value="&quot;&quot +;" type="const std::string &amp;"/> </members> <members const="no" kind="function" name="AttributeDesc" protect +ion="public" static="no" virtualness="non_virtual" volatile="no"></me +mbers> </public_methods> </classes> END #pp($x); sub r($$$;$); sub r($$$;$) {my ($r, $l, $e, $a) = @_; $a = [] unless $a; return unless $l and ref($l); if (ref($l) =~ /HASH/) {for(sort keys %$l) {unless (/$e/) {push @$a, $_; r($r, $l->{$_}, $e, $a); pop @$a; } else {&$r(@$a); } } } elsif (ref($l) =~ /ARRAY/) {for(1..@$l) {unless ($l->[$_-1] =~ /$e/) {push @$a, $_; r($r, $l->[$_-1], $e, $a); pop @$a; } else {&$r(@$a); } } } } r sub {say "@_"}, $x, "parameters";

Produces

public_methods members 1

Replies are listed 'Best First'.
Re^4: getting ancestors of element
by jccunning (Acolyte) on Sep 08, 2012 at 16:40 UTC
    Thanks to you and anonymous monk I have almost hacked together an api comparison script. I don't have much experience in perl but here is what I have so far. Maybe you can help clean it up or approach it from a better angle. Providing script and two example xml files that need to be named oldapi.xml and newapi.xml. Run script and provide two xml files as argument to produce apichanges.html. Also can you tell me why I am getting uninitialized value $e in regexp error.
    #!/usr/bin/perl -- use strict; use warnings; use feature ":5.14"; use XML::Simple; use XML::Twig; use Data::Dump qw' dd '; use Data::Dumper; use List::Compare; use HTML::Table; my $ndiff; my $odiff; my @oapi; my @napi; my @oldclasses; my @newclasses; my @oldfiles; my @newfiles; my @oldnsp; my @newnsp; my @newonly; my @oldonly; my @names_in_newapi; my @names_in_oldapi; my @memnew; my @memold; my @list; my $i = 0; my $j = 0; my $apichanges = 'apichanges.html'; my ($tablec, $tablef, $tablen, $tablemst, $tname); my $tr = 2; my $crow = 2; my $ccol = 3; ############################################## # doxygen generates DoxyDocs.pm by default that represents the complet +e API # rename older version of api to oldDoxyDocs.pm and rename new version + to newDoxyDocs.pm # inside these two files rename the default $doxydocs variable to $old +doxydocs # and $newdoxydocs in their related file. ############################################## # require "oldDoxyDocs.pm"; # our $olddoxydocs; # require "newDoxyDocs.pm"; # our $newdoxydocs; ############################################## # Script takes oldDoxyDocs.pm and newDoxyDocs.pm and converts to xml # then filters out unneeded tags from xml then puts all classes, # files, namespaces into hash containing two arrays with each element # of array containing all related properties of each class, etc. # # USAGE: apixml.pl oldapi.xml newapi.xml # Then open apichanges.html # # following lines convert pm files to xml files oldapi.xml and newapi. +xml ############################################## # my $ofh = 'oldapi.xml'; # my $oxs = new XML::Simple(RootName => "apiroot"); # $oxs->XMLout($olddoxydocs, XMLDecl => 1, OutputFile => $ofh); # add +this option to convert attributes to elements NoAttr => 1, # my $nfh = 'newapi.xml'; # my $nxs = new XML::Simple(RootName => "apiroot"); # $nxs->XMLout($newdoxydocs, XMLDecl => 1, OutputFile => $nfh); Main( @ARGV ); exit( 0 ); sub Main { my %files; my %class; my %results; my $item_to_compare; my $filename; my $ssprint = sub { my( $twig, $_ ) = @_; push @{ $files{ $filename }}, $_->sprint; # push all classes, + files, namespaces into files hash with separate array for each file return; }; my $twig = XML::Twig->new( ignore_elts => { brief => 'discard', detailed => 'discard', in +cludes => 'discard', included_by => 'discard', reimplemented_by => 'd +iscard' }, pretty_print => 'indented', escape_gt => 1, keep_encoding => 1, TwigHandlers => { 'apiroot/classes' => $ssprint, 'apiroot/files' => $ssprint, 'apiroot/namespaces' => $ssprint, }, ); for my $file( @_ ) { $filename = $file; eval { $twig->parsefile( $file ); 1; } or warn "ERROR parsefile($file): $@ "; # following code gathers names of all classes, files, namespac +es # from oldapi and newapi xml files and puts in arrays my $root = $twig->root; my @class = $root->children( 'classes' ); foreach my $cls (@class) { my $clsname = $cls->{'att'}->{'name'}; if ($filename eq 'oldapi.xml') { push (\@oldclasses, $clsname); } else { push (\@newclasses, $clsname); } } my @hfiles = $root->children( 'files' ); foreach my $hfile (@hfiles) { if ($filename eq 'oldapi.xml') { push (\@oldfiles, $hfile->{'att'}->{'name'}); } else { push (\@newfiles, $hfile->{'att'}->{'name'}); } } my @namesp = $root->children( 'namespaces' ); foreach my $nsp (@namesp) { if ($filename eq 'oldapi.xml') { push (\@oldnsp, $nsp->{'att'}->{'name'}); } else { push (\@newnsp, $nsp->{'att'}->{'name'}); } } $twig->purge; } # dd \%files; open (OUTFILE, ">$apichanges") or die "Cannot open $apichanges for + writing \n"; print OUTFILE <<EOF; <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> <head> <style type="text/css"> h1 {text-align:center} table.api { font-family: verdana,arial,sans-serif; font-size:11px; color:#333333; border-width: 1px; border-color: #666666; border-collapse: collapse; } table.api th { border-width: 1px; padding: 8px; border-style: solid; border-color: #666666; background-color: #dedede; } table.api td { border-width: 1px; padding: 8px; border-style: solid; border-color: #666666; background-color: #ffffff; } </style> </head> <body> <h1>API Changes</h1> EOF $tablec = new HTML::Table(-rows=>1, -cols=>2, -align=>'center', -rules=>'rows,cols', -border=>2, -bgcolor=>'white', -width=>'85%', -spacing=>0, -padding=>0, -style=>'color: green', -class=>'api', -evenrowclass=>'even', -oddrowclass=>'odd', -head=> ['Classes Added', 'Classes Removed +'], ); # following code compares arrays to find new classes, # files, namespaces added and old ones removed my $cc = List::Compare->new(\@oldclasses, \@newclasses); @newonly = $cc->get_complement; @oldonly = $cc->get_unique; for my $row (@newonly) { $tablec->addRow($row); } for my $row (@oldonly) { $tablec->setCell($tr++,2,$row); } print OUTFILE $tablec; undef @newonly; undef @oldonly; $tr = 2; $tablef = new HTML::Table(-rows=>1, -cols=>2, -align=>'center', -rules=>'rows,cols', -border=>2, -bgcolor=>'white', -width=>'85%', -spacing=>0, -padding=>0, -style=>'color: green', -class=>'api', -evenrowclass=>'even', -oddrowclass=>'odd', -head=> ['Header Files Added', 'Header Fil +es Removed'], ); my $fc = List::Compare->new(\@oldfiles, \@newfiles); @newonly = $fc->get_complement; @oldonly = $fc->get_unique; for my $row (@newonly) { $tablef->addRow($row); } for my $row (@oldonly) { $tablef->setCell($tr++,2,$row); } print OUTFILE $tablef; undef @newonly; undef @oldonly; $tr = 2; $tablen = new HTML::Table(-rows=>1, -cols=>2, -align=>'center', -rules=>'rows,cols', -border=>2, -bgcolor=>'white', -width=>'85%', -spacing=>0, -padding=>0, -style=>'color: green', -class=>'api', -evenrowclass=>'even', -oddrowclass=>'odd', -head=> ['Namespaces Added', 'Namespaces R +emoved'], ); my $nc = List::Compare->new(\@oldnsp, \@newnsp); @newonly = $nc->get_complement; @oldonly = $nc->get_unique; for my $row (@newonly) { $tablen->addRow($row); } for my $row (@oldonly) { $tablen->setCell($tr++,2,$row); } print OUTFILE $tablen; undef @newonly; undef @oldonly; $tr = 2; # following code references the two arrays in hash that contain # all info about classes, files, and namespaces in old and new api # then compares arrays to find what is new, changed or removed. my $hashref = \%files; my $oldapi = $hashref->{"oldapi.xml"}; my $newapi = $hashref->{"newapi.xml"}; my $lc = List::Compare->new(\@{$oldapi}, \@{$newapi}); @oldonly = $lc->get_unique; # unique to old version @newonly = $lc->get_complement; # get all new items includes all i +tem prop info $tablemst = new HTML::Table(-rows=>1, -cols=>3, -align=>'center', -rules=>'rows,cols', -border=>2, -bgcolor=>'white', -width=>'85%', -spacing=>0, -padding=>0, -style=>'color: green', -class=>'api', -evenrowclass=>'even', -oddrowclass=>'odd', -head=> ['Modified Class, File, or Namespa +ce', 'Locations found', 'Specific Change'], ); # following code takes each new, changed, or removed item and # filters through regex to remove xml markup to make it easier to +read # also takes just the name of each item to be used for later compa +risons for $odiff (@oldonly) { if($odiff =~ m/(.*name.*?\>)/) { push (@names_in_oldapi, $1); } } for $ndiff (@newonly) { if($ndiff =~ m/(.*name.*?\>)/) { push (@names_in_newapi, $1); } } # this subroutine finds index of element representing item that # was modified in oldonly and newonly arrays so they can be extrac +ted # and broken down and compared to find exactly what changed sub findindex { 1 while $_[0] ne pop; @_-1; } sub ret_ancestors($$$;$); sub ret_ancestors($$$;$) {my ($r, $l, $e, $a) = @_; $a = [] unless $a; return unless $l and ref($l); if (ref($l) =~ /HASH/) {for(sort keys %$l) {unless (/$e/) {push @$a, $_; ret_ancestors($r, $l->{$_}, $e, $a); pop @$a;} else {&$r(@$a);} } } elsif (ref($l) =~ /ARRAY/) {for(1..@$l) {unless ($l->[$_-1] =~ /$e/) {push @$a, $_; ret_ancestors($r, $l->[$_-1], $e, $a); pop @$a;} else {&$r(@$a);} } } } # following code finds indexes of elements that exist in both arra +ys after lc compare is done, # -1 means that item does not exist in oldapi array for $item_to_compare (@names_in_newapi) { my $oindex = findindex($item_to_compare, @names_in_oldapi); my $nindex = findindex($item_to_compare, @names_in_newapi); push (@oapi, (split '\n', $oldonly[$oindex])) if $oindex != -1; +# ignore items that are new or removed push (@napi, (split '\n', $newonly[$nindex])) if $oindex != -1; +# only split on items that have changed my $oitemstr = XMLin($oldonly[$oindex], KeyAttr=>['name', 'decla +ration_name']); # convert to hash my $nitemstr = XMLin($newonly[$nindex], KeyAttr=>['name', 'decla +ration_name']); # convert to hash map(s/\W<([^>]+)>/$1/g, $item_to_compare); map(s/(\/|<\/\w+>)//g, $item_to_compare); my $memc = List::Compare->new(\@oapi, \@napi); @memnew = $memc->get_complement; # item property changes that ex +ist in new version or unique if prop added in new api @memold = $memc->get_unique; # item property as it appears in ol +d api or unique if prop removed in new api if (@memold) { map(s/\W<([^>]+)>/$1/, @memold); } # strips out x +ml tags for easier viewing if (@memold) { map(s/(\/|<\/\w+>)//, @memold); } if (@memnew) { map(s/\W<([^>]+)>/$1/, @memnew); } if (@memnew) { map(s/(\/|<\/\w+>)//, @memnew); } if (@memnew) { if (@memold) { for $tname (@memold) { $tname =~ m/[declaration_]?name="(\w+[:]?.\w+[:]?.\w+) +/; #extract name from name attribute $tablemst->addRow($item_to_compare,$1); ret_ancestors sub {@list = join(" => ", @_)}, $nitemstr, + $1; $tablemst->setCell($tr++,2,@list); $tablemst->setCell($crow++,$ccol,"<b>From this:</b><br\> +$memold[$i++]<br\><br\><b>To this:</b><br\>$memnew[$j++]"); } } elsif (!@memold) { for $tname (@memnew) { $tname =~ m/[declaration_]?name="(\w+[:]?.\w+[:]?.\w+)/; $tablemst->addRow($item_to_compare,$1); ret_ancestors sub {@list = join(" => ", @_)}, $nitemstr, +$1; $tablemst->setCell($tr++,2,@list); $tablemst->setCell($crow++,$ccol,"<b>From this:</b><br\>N +EW ITEM ADDED<br\><br\><b>To this:</b><br\>$memnew[$j++]"); } } } elsif (@memold) { if (!@memnew) { for $tname (@memold) { $tname =~ m/[declaration_]?name="(\w+[:]?.\w+[:]?.\w+) +/; #extract name from name attribute $tablemst->addRow($item_to_compare,$1); ret_ancestors sub {@list = join(" => ", @_)}, $oitemst +r, $1; $tablemst->setCell($tr++,2,@list); $tablemst->setCell($crow++,$ccol,"<b>From this:</b><br +\>$memold[$i++]<br\><br\><b>To this:</b><br\>ITEM REMOVED<br\><br\>") +; } } } else { $tablemst->setCell(2, 1, 'Nothing Changed'); } $i = 0; $j = 0; undef @oapi; undef @napi; undef @memold; undef @memnew; } undef @oldonly; undef @newonly; print OUTFILE $tablemst; print OUTFILE <<COF; </body> </html> COF close OUTFILE; }
    <?xml version='1.0' standalone='yes'?> <apiroot> <classes name="Panoply::AttributeDesc"> <all_members name="AttributeDesc" protection="public" scope="Panop +ly::AttributeDesc" virtualness="non_virtual" /> <public_members> <members name="xname" kind="variable" protection="public" static +="no" type="std::string" virtualness="non_virtual"> </members> <members name="value" kind="variable" protection="public" static +="no" type="std::string" virtualness="non_virtual"> </members> </public_members> </classes> <classes name="Panoply::BAR"> <all_members name="BAR" protection="public" scope="Panoply::BAR" v +irtualness="non_virtual" /> <all_members name="type" protection="public" scope="Panoply::BAR" +virtualness="pure_virtual" /> <public_methods> <members name="BAR" const="no" kind="function" protection="publi +c" static="no" virtualness="non_virtual" volatile="no"> <parameters declaration_name="pciReg" type="Register::Ptr" /> </members> <members name="~BAR" const="no" kind="function" protection="publ +ic" static="no" type="virtual" virtualness="virtual" volatile="no"> </members> </public_methods> </classes> </apiroot>
    <?xml version='1.0' standalone='yes'?> <apiroot> <classes name="Panoply::AttributeDesc"> <all_members name="AttributeDesc" protection="public" scope="Panop +ly::AttributeDesc" virtualness="non_virtual" /> <public_members> <members name="zxname" kind="variable" protection="public" stati +c="no" type="std::string" virtualness="non_virtual"> </members> <members name="value" kind="variable" protection="public" static +="no" type="std::string" virtualness="non_virtual"> </members> </public_members> </classes> <classes name="Panoply::BAR"> <all_members name="BAR" protection="public" scope="Panoply::BAR" v +irtualness="non_virtual" /> <all_members name="type" protection="public" scope="Panoply::BAR" +virtualness="pure_virtual" /> <all_members name="~BAR" protection="public" scope="Panoply::BAR" +virtualness="virtual" /> <public_methods> <members name="BAR" const="no" kind="function" protection="publi +c" static="no" virtualness="non_virtual" volatile="no"> <parameters declaration_name="pciReg" type="Register::Ptr" /> </members> <members name="~BAR" const="no" kind="function" protection="publ +ic" static="no" type="virtual" virtualness="virtual" volatile="no"> </members> </public_methods> </classes> </apiroot>

      I think it should be unless($e), not unless(/$e/). When this change is made the code runs well. Please consider using the here file syntax for test scripts as it makes it easier to test them:

      my @l = << 'END'; aaaa bbbb END

      Thanks!

        I discovered that $1 is sometimes undefined on lines 381 and 394 when calling ret_ancestors and caused the error. Need to rethink my logic in the For loop on line 349.
        I discovered my problem is in regex
        $tname =~ m/[declaration_]?name="(\w+[:]?.\w+[:]?.\w+);
        where I am trying to extract the value of the name attribute which could be single word all lowercase or all uppercase, could also be Panoply::Blah or Panoply::Blah::Blah2 or have special character in name such as ~BAR In script $tname would contain something like
        all_members name="~BAR" protection="public" scope="Panoply::BAR"
        Also noticed from test that passing ~BAR in the argument $1 in
        ret_ancestors sub {@list = join(" => ", @_)}, $nitemstr, $1;
        Does not return a list because of ~ character. Any help with regex appreciated.
Re^4: getting ancestors of element
by jccunning (Acolyte) on Sep 12, 2012 at 15:58 UTC
    Can you tell me why the following only returns something if I remove 'name' from the KeyAttr list. Can you provide list of keys to XMLin. Is it also possible to return something like: public_methods => members => AttributeDesc => parameters for 'desc'.
    #!/usr/bin/perl use feature ":5.14"; use warnings FATAL => qw(all); use strict; use Data::Dump qw(dump pp); use XML::Simple; my @list; my $x = XMLin(<<'END', KeyAttr=>['name', 'declaration_name']); <classes> <public_methods> <members const="no" kind="function" name="AttributeDesc" protect +ion="public" static="no" virtualness="non_virtual" volatile="no"> <parameters declaration_name="name" type="const std::string &a +mp;"/> <parameters declaration_name="value" type="const std::string & +amp;"/> <parameters declaration_name="desc" default_value="&quot;&quot +;" type="const std::string &amp;"/> </members> <members const="no" kind="function" name="AttributeDesc" protect +ion="public" static="no" virtualness="non_virtual" volatile="no"></me +mbers> </public_methods> </classes> END # pp($x); sub r($$$;$); sub r($$$;$) {my ($r, $l, $e, $a) = @_; $a = [] unless $a; return unless $l and ref($l); if (ref($l) =~ /HASH/) {for(sort keys %$l) {unless (/$e/) {push @$a, $_; r($r, $l->{$_}, $e, $a); pop @$a; } else {&$r(@$a); } } } elsif (ref($l) =~ /ARRAY/) {for(1..@$l) {unless ($l->[$_-1] =~ /$e/) {push @$a, $_; r($r, $l->[$_-1], $e, $a); pop @$a; } else {&$r(@$a); } } } } r sub {push (@list, join(" => ", @_))}, $x, "desc"; # ret_ancestors sub {say "@_"}, $x, "desc"; my $elist = join("\n", @list); print "$elist\n";