in reply to need a xml parser module

You may want to have a look at Ways to Rome 2 - Kourallinen Dollareita for a dozen ways to do this, with various XML modules.

Replies are listed 'Best First'.
Re^2: need a xml parser module
by Jenda (Abbot) on Dec 13, 2007 at 14:10 UTC

    As it seems that mirod still did not have time to update those pages and to make the comparison more complete ... here's the solution using XML::Rules:

    #!/usr/bin/perl -w use strict; use XML::Rules; use FindBin qw($Bin); use lib $Bin; use wtr2_base; my $DEBUG=0; init_db(); my $parser = XML::Rules->new( start_rules => [ Finvoice => sub { $_[1]->{errors} = []; # initialization reset_default_row_id(); return 1; # necessary so that the tag is actually processe +d! }, 'PaymentTermsDetails,VatSpecificationDetails,EpiDetails,Seller +AccountDetails,BuyerPostalAddressDetails,SellerInformationDetails' => + 'skip', # I don't care about those at all ], rules => [ _default => 'content', # unless I say otherwise I'm only interested in the conten +t 'PaymentStatusDetails,BuyerCommunicationDetails,SellerPostalAd +dressDetails,SellerCommunicationDetails' => 'pass no content', # I want to dissolve those into their parent tag. I'm not +interested in the whitespace around the child nodes 'DeliveredQuantity,OrderedQuantity' => 'as is', # I need both the content and the attributes of those two BuyerPartyDetails => sub { delete $_[1]->{_content}; check_buyer( $_[1]->{BuyerPartyIdentifier}, $_[1]->{Buyer +OrganisationName}, $_[3]->[0]{errors} ); return $_[0] => $_[1]; }, OrderIdentifier => sub { check_po( $_[1]->{_content}, $_[3]->[0]{errors}); return OrderIdentifier => $_[1]->{_content}; }, InvoiceRow => sub { delete $_[1]->{_content}; $_[1]->{RowIdentifier} = default_row_id() unless $_[1]->{R +owIdentifier}; print "checking row $_[1]->{RowIdentifier}\n" if $DEBUG; check_qtty( $_[1]->{RowIdentifier}, $_[1]->{DeliveredQuantity}->{_content}, $_[1]->{DeliveredQuantity}->{QuantityUnitCode}, $_[1]->{OrderedQuantity}->{_content}, $_[1]->{OrderedQuantity}->{QuantityUnitCode}, $_[3]->[0]{errors} ); return '@invoicerow' => { row_id => $_[1]->{RowIdentifier}, sku => $_[1]->{ArticleIdentifier}, name => $_[1]->{ArticleName}, qty => $_[1]->{DeliveredQuantity}->{_content +}, qty_unit => $_[1]->{DeliveredQuantity}->{Quantity +UnitCode}, unit_price => $_[1]->{UnitPriceAmount}, amount_no_tax => $_[1]->{RowVatExcludedAmount}, tax => $_[1]->{RowVatAmount}, amount => $_[1]->{RowAmount}, } }, InvoiceDetails => sub { return invoice => { number => $_[1]->{InvoiceNumber}, date => $_[1]->{InvoiceDate}, po => $_[1]->{OrderIdentifier}, amount_no_tax => $_[1]->{InvoiceTotalVatExcludedAmoun +t}, tax => $_[1]->{InvoiceTotalVatAmount}, amount => $_[1]->{InvoiceTotalVatIncludedAmoun +t}, }, }, SellerPartyDetails => sub { return seller => { identifier => $_[1]->{SellerPartyIdentifier}, name => $_[1]->{SellerOrganisationName}, tax_code => $_[1]->{SellerOrganisationTaxCode}, }, address => { street => $_[1]->{SellerStreetName}, town => $_[1]->{SellerTownName}, zip => $_[1]->{SellerPostCodeIdentifier}, country_code => $_[1]->{CountryCode}, po_box => $_[1]->{SellerPostOfficeBoxIdentifie +r}, } }, Finvoice => sub { delete $_[1]->{_content}; $_[1]->{invoice}{payment_status} = delete $_[1]->{PaymentS +tatusCode}; return invoice => $_[1]->{invoice}, seller => $_[1]->{seller}, address => $_[1]->{address}, invoicerow => $_[1]->{invoicerow}, errors => $_[1]->{errors}, contact => { name => $_[1]->{SellerContactPersonName} +, phone => $_[1]->{SellerPhoneNumberIdentif +ier}, email => $_[1]->{SellerEmailaddressIdenti +fier}, }; }, ], ); my $filter = XML::Rules->new( style => 'filter', rules => [ _default => 'raw', Finvoice => sub { push @{$_[1]->{_content}}, " ", [errors => {error => $_[4 +]->{parameters}}], "\n"; return ($_[0] => $_[1]); } ] ); my @files= @ARGV || (<$dir{invoices}/*.xml>); foreach my $file (@files) { print "Processing $file\n" if $DEBUG; my $data = $parser->parsefile($file); if (@{$data->{errors}}) { my $errors = $data->{errors}; print "ERROR in $file\n ", join( "\n ", @$errors), "\n"; my $rejected_file= rejected( $file); print "adding errors in $rejected_file\n" if( $DEBUG); open my $OUT, '>', $rejected_file or die "Can't open '$rejecte +d_file' for writing: $^E\n"; $filter->filterfile( $file, $OUT, $errors); close $OUT; } else { print "storing invoice $data->{invoice}->{number}\n"; store_all( $data); } }