I have a massive (600Mb+) xml file i need to process to extract some data from. all the line breaks have been removed and the file is one massive line.
I'm not what you would call extremely experienced with XML, and see my machine consume all available (2.7gb) of ram before running out of memory on a pretty simple script.
#!/usr/bin/perl -w
use strict;
use XML::Twig;
use Data::Dumper;
$|++;
my $t = XML::Twig->new(
#twig_roots => { 'Person' => 1}, # uncommen
+t to dump entire XML in a hr form
twig_handlers => { 'Person' => \&person },
pretty_print => 'indented',
keep_encoding => 1,
);
$t->parsefile('./File.xml');
$t->flush;
sub person {
my ($t, $section) = @_;
# my $root = $section->root(); # uncomment do dump entire xml in
+a hr form
my $id= $section->att('id');
my (@firstname, @middlename, @lastname, $description);
my @para= $section->getElementsByTagName('Name');
foreach my $obj (@para) {
if ($obj->att('NameType') eq 'Primary Name' ) {
my $child = $obj->first_child('NameValue');
@firstname = $child->fields('FirstName');
@middlename= $child->fields('MiddleName');
@lastname = $child->fields('Surname');
}
}
my @list= $section->getElementsByTagName('Descriptions');
foreach my $obj (@list) {
my $child = $obj->first_child('Description');
$description = $child->{'att'}->{'Description2'} if ($child->{'att
+'}->{'Description2'});
}
print "$id,$firstname[0],$middlename[0],$lastname[0],$description\
+n" if ($description);
}
if someone could provide some insight or alternative(s) it would be appreciated!