frazap has asked for the wisdom of the Perl Monks concerning the following question:
{"status":"ok","message-type":"work-list","message-version":"1.0.0","m +essage":{"facets":{},"total-results":1,"items":[{"author":[{"given":" +Brahim","family":"Oubaha","sequence":"first","affiliation":[{"name":" +Laboratory of Biology and Biotechnology of Microorganisms Faculty of +Sciences Semlalia Cadi Ayyad University Marrakech Morocco"},{"name":" +Laboratory of Interaction Plant\u2010Microorganisms, Department of Bi +ology University of Fribourg Fribourg Switzerland"}]},{"given":"Ahmed +","family":"Nafis","sequence":"additional","affiliation":[{"name":"La +boratory of Biology and Biotechnology of Microorganisms Faculty of Sc +iences Semlalia Cadi Ayyad University Marrakech Morocco"}]},{"given": +"Mohamed","family":"Baz","sequence":"additional","affiliation":[{"nam +e":"Laboratory of Biology and Biotechnology of Microorganisms Faculty + of Sciences Semlalia Cadi Ayyad University Marrakech Morocco"}]},{"g +iven":"Felix","family":"Mauch","sequence":"additional","affiliation": +[{"name":"Laboratory of Interaction Plant\u2010Microorganisms, Depart +ment of Biology University of Fribourg Fribourg Switzerland"}]},{"ORC +ID":"http:\/\/orcid.org\/0000-0001-7949-5592","authenticated-orcid":f +alse,"given":"Mustapha","family":"Barakate","sequence":"additional"," +affiliation":[{"name":"Laboratory of Biology and Biotechnology of Mic +roorganisms Faculty of Sciences Semlalia Cadi Ayyad University Marrak +ech Morocco"},{"name":"Agrobiosciences & Fertilizers Program Univ +ersity Mohammed IV Polytechnic (UM6P) Benguerir Morocco"}]}],"title": +["The potential of antagonistic moroccan\n Streptomyces\n + isolates for the biological control of damping\u2010off di +sease of pea (\n Pisum sativum\n L.) caused by\ +n Aphanomyces euteiches"]}],"items-per-page":20,"query":{" +start-index":0,"search-terms":null}}}
I want to extract the names and affiliations with the format name, firstname; affiliation1 - affiliation2 - ...
I have the following code that workssub unfoldauthors { my ($data_ar) = @_; return unless ($data_ar); $data_ar = $data_ar->[0]; # die Dumper $data_ar; my @paths = qw( $..family $..given $..affiliation ); my @selectors; for my $p (@paths) { push @selectors, JSON::Path->new($p); } my $aff_name = JSON::Path->new('$..name'); my $name; my @lines; my @sep = ( ", ", "; ", " " ); my $col = 0; for my $s (@selectors) { my @text = $s->values($data_ar); #print $col, " ", Dumper(@text), "\n"; my $authors_limit = 50; my $last = $authors_limit - 1; if ( @text > $authors_limit ) { @text = @text[ 0 .. $last ]; $text[$last] .= " ..." if ( $col == 1 ); } my $pos = 0; for my $name (@text) { #print "$col $name\n"; if ( $col == 2 ) { my @aff = $aff_name->values( $text[$pos] ); #print "aff:", Dumper( @aff ), "\n"; $name = join( " - ", @aff ); #print "\$name $name\n"; } $lines[ $pos++ ] .= $name . $sep[$col]; } $col++; } return \@lines; }
I first iterate over the nodes at the "upper level" family, given, affiliation, and for the affiliation array ref, I extract the name values.
My question: could I have done this more directly in one step ?
Thanks
François
Here is a complete working example
use strict; use warnings; use open qw<:std :encoding(UTF-8) >; use Data::Dumper; use REST::Client::CrossRef; use Log::Any::Adapter( 'File', './log_auth.txt', "log_level" => "info" + ); use JSON::Path; my $cr = REST::Client::CrossRef->new( mailto => 'd...h', spit_raw_data => 0, add_end_flag => 1, json_path => [ ['$.title'], ['$.author'], ], json_path_callback => { '$.author' => \&unfoldauthors }, ); $cr->init_cache( { BasePath => "C:\\Windows\\Temp\\perl", NoUpdate => 15 * 60, verbose => 0 } ); sub unfoldauthors { my ($data_ar) = @_; return unless ($data_ar); $data_ar = $data_ar->[0]; my @paths = qw( $..family $..given $..affiliation ); my @selectors; for my $p (@paths) { push @selectors, JSON::Path->new($p); } my $aff_name = JSON::Path->new('$..name'); my $name; my @lines; my @sep = ( ", ", "; ", " " ); my $col = 0; for my $s (@selectors) { my @text = $s->values($data_ar); my $authors_limit = 50; my $last = $authors_limit - 1; if ( @text > $authors_limit ) { @text = @text[ 0 .. $last ]; $text[$last] .= " ..." if ( $col == 1 ); } my $pos = 0; for my $name (@text) { print "$col $name\n"; if ( $col == 2 ) { my @aff = $aff_name->values( $text[$pos] ); $name = join( " - ", @aff ); } $lines[ $pos++ ] .= $name . $sep[$col]; } $col++; } return \@lines; } $cr->init_cache( { BasePath => "C:\\Windows\\Temp\\perl", NoUpdate => 15 * 60, verbose => 0 } ); my $select = "author,title"; while (<DATA>) { chomp; my $data = $cr->works_from_doi( $_, { 'has-affiliation' => 'true' }, $sel +ect ); next unless ($data); for my $row (@$data) { print "\n" unless ($row); while ( my ( $f, $v ) = each %$row ) { if ( $f eq '$.title' ) { print "***$v\n\n"; } else { print "$v \n"; } } } } __DATA__ 10.1111/jph.12775
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: JSON::Path and node iteration
by haukex (Archbishop) on Mar 21, 2019 at 15:31 UTC | |
by frazap (Monk) on Mar 21, 2019 at 15:46 UTC | |
by haukex (Archbishop) on Mar 21, 2019 at 15:51 UTC | |
by frazap (Monk) on Mar 22, 2019 at 06:53 UTC | |
by hdb (Monsignor) on Mar 22, 2019 at 14:15 UTC | |
|