Re: parse XML huge file using cpan modules

To add to the list of options...

use strict;
use XML::Rules;
use Data::Dumper qw(Dumper);

my $parser = XML::Rules->new(
    stripspaces => 15,
    rules => {
        'name,value' => 'content',
        statistic => sub { return '%' . $_[1]->{type} => { $_[1]->{nam
+e} => $_[1]->{value}} },
        resourceGroup => 'no content array',
        statRecord => sub {
            #print Dumper($_[1]);
            
            foreach my $group (@{$_[1]->{resourceGroup}}) {
                print "$_[1]->{time}|$group->{name}|$group->{Lifetime}
+{LCONNFAIL}|$group->{Lifetime}{LLOSTCONN}|$group->{Lifetime}{LIDLETIM
+EOUT}|$group->{Startup}{SIPADDR}|$group->{Startup}{SIPPORT}\n";
            }
            return;
        }
    }
);

print "time|resourceGroup name|LCONNFAIL|LLOSTCONN|LIDLETIMEOUT|SIPADD
+R|SIPPORT\n";

$parser->parse(\*DATA);

__DATA__
<?xml version="1.0" encoding="UTF-8"?>
<ctgStatistics xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
   xsi:noNamespaceSchemaLocation="ctgstatslog.xsd">
   <statRecord type="interval" length="60" time="2019-07-16T08:23:59">
      <resourceGroup name="CSCS1SVGM1">
         <statistic type="Startup">
...
[download]

use strict;
use XML::Rules;
use Data::Dumper qw(Dumper);

my $parser = XML::Rules->new(
    stripspaces => 15,
    rules => {
        'name,value' => 'content',
        statistic => sub { return $_[1]->{name} => $_[1]->{value} },
        resourceGroup => 'no content array',
        statRecord => sub {
            #print Dumper($_[1]);
            
            foreach my $group (@{$_[1]->{resourceGroup}}) {
                print "$_[1]->{time}|$group->{name}|$group->{LCONNFAIL
+}|$group->{LLOSTCONN}|$group->{LIDLETIMEOUT}|$group->{SIPADDR}|$group
+->{SIPPORT}\n";
            }
            return;
        }
    }
);

print "time|resourceGroup name|LCONNFAIL|LLOSTCONN|LIDLETIMEOUT|SIPADD
+R|SIPPORT\n";

$parser->parse(\*DATA);

__DATA__
<?xml version="1.0" encoding="UTF-8"?>
<ctgStatistics xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
   xsi:noNamespaceSchemaLocation="ctgstatslog.xsd">
   <statRecord type="interval" length="60" time="2019-07-16T08:23:59">
      <resourceGroup name="CSCS1SVGM1">
         <statistic type="Startup">
...
[download]

The first version preserves the statistics type in the data provided to the handler of the statRecord tag, the second assumes there will be no duplicate names of statistics and ignores the types.

There's only the data from one <statRecord> in memory at any time.

Jenda
Enoch was right!
Enjoy the last years of Rome.

Comment on Re: parse XML huge file using cpan modules Select or Download Code


Clear questions and runnable code get the best and fastest answer
	PerlMonks