in reply to XML to HashRef and then to JSON
All the above scripts are NOT properly converting the child "Emphasis"That's not quite true because as far as the parser is concerned the "Emphasis" is a valid XML tag. You will have to do a bit of manual labour to achieve your desired output.
I couldn't find a way to get the inner content of a node without getting the node's tags as well, so needed to use a regular expression to remove them. Hopefully this will get you on your way:
See XML::LibXML::Node for explanation of these methods.use Data::Dumper; use XML::LibXML; my $xml = q| <Publisher> <UniqueDOI>978-3-642-123456</UniqueDOI> <ChapterInfo ChapterType="OriginalPaper"> <Title Language="En">Is Light Blue (<Emphasis Type="Italic">az +zurro</Emphasis>) Color Name Universal in the Italian Language?</Titl +e> </ChapterInfo> </Publisher> |; my $doc = XML::LibXML->load_xml(string => $xml); my @Publishers = $doc->findnodes('//Publisher'); for my $Publisher ( @Publishers ) { my ($ChapterInfo) = $Publisher->findnodes('ChapterInfo'); my ($Title) = $ChapterInfo->findnodes('Title'); # get the Title node as literal XML my $content = $Title->toString(); print "Title content:\n$content\n"; # remove first and last XML tags $content =~ s/^<[^>]*>(.*)<[^>]*>$/$1/; # construct the hash reference my $hash = { UniqueDOI => $Publisher->findvalue('UniqueDOI'), ChapterInfo => { ChapterType => $ChapterInfo->getAttribute('ChapterType'), Title => { Language => $Title->getAttribute('Language'), content => $content, }, }, }; print Dumper($hash); }
Output:
Title content: <Title Language="En">Is Light Blue (<Emphasis Type="Italic">azzurro</E +mphasis>) Color Name Universal in the Italian Language?</Title> $VAR1 = { 'UniqueDOI' => '978-3-642-123456', 'ChapterInfo' => { 'ChapterType' => 'OriginalPaper' 'Title' => { 'Language' => 'En', 'content' => 'Is Light Blue (<Emphasis Type="Italic">azzur +ro</Emphasis>) Color Name Universal in the Italian Language?' }, }, };
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: XML to HashRef and then to JSON
by dominic01 (Sexton) on Mar 15, 2016 at 05:07 UTC |