use XML::Simple;
use Data::Dump "pp";
use Text::CSV;
my $csv = Text::CSV->new({ binary => 1, auto_diag => 1, eol => "\n"})
or die "Cannot use CSV: " . Text::CSV->error_diag();
# open in append mode
open my $fh, ">>", "file1.csv" or die "Failed to open file: $!";
$csv->print($fh, [ "Drug Name", "Drug Bank ID", "Category", "Pubmed ID
+" ]);
#--close $fh;
@wantedcat = ("Blood", "Thrombus", "Thrombosis", "Anticoagulation", "C
+oagulation", "Anticoagulants", "Antithrombins");
my $simple = XML::Simple->new();
my $tree = $simple->XMLin('database-01.xml');
#print pp($tree);
$alldrug = $tree->{drug};
#print pp($alldrug);
@allkeys = keys %$alldrug;
#print scalar @allkeys;
for $i (@allkeys){
#print $i;
#print \n;
@matchedcat = ();
#if ($i eq "Cetuximab"){
$isEmpty = pp($tree->{drug}->{$i}->{categories}->{category});
if ($isEmpty eq "undef"){
} else {
$isArray = $tree->{drug}->{$i}->{categories}->{category};
if (ref($isArray) eq "ARRAY"){
for $o (@{$tree->{drug}->{$i}->{categories}->{category}}){
#print pp($o);
#print "\n";
$cat = $o->{'category'};
#print "$cat\n";
for $i (@wantedcat){
if ($cat eq $i){
push(@matchedcat, $cat);
}
}
}
} else {
#print "$i is HASH\n";
#print pp($isArray);
$cat = $isArray->{'category'};
#print $cat;
for $i (@wantedcat){
if ($cat eq $i){
push(@matchedcat, $cat);
}
}
}
#print scalar @matchedcat;
if (scalar @matchedcat > 0){
$count = $count + 1;
$name = $i;
#print "$name \n";
$category = "";
for $x (@matchedcat){
$category = $category . "$x,";
}
$category =~ s/,$//;
#print "$category\n";
# ### --- print all IDs of drug
$ids = "";
$pids = "";
$idArray = $tree->{drug}->{$i}->{'drugbank-id'};
if (ref($idArray) eq "ARRAY"){
for my $s (@{$tree->{drug}->{$i}->{'drugbank-id'}}){
if ((ref($s) eq "HASH")){
$id = pp($s->{'content'});
$id =~ s/"//g;
#print $id;
#print "\n";
$ids = $ids . "$id,";
} else {
#print $s;
#print "\n";
$ids = $ids . "$s,";
}
}
} else {
#print pp($idArray);
$id = pp($idArray->{'content'});
$id =~ s/"//g;
$ids = $ids . "$id,";
}
$ids =~ s/,$//;
#print $ids;
#print "\n --- THE END --- \n";
$pidEmpty = pp($tree->{drug}->{$i}->{'general-references'}->{artic
+les}->{article});
if ($pidEmpty eq "undef") {
}else{
$pidArray = $tree->{drug}->{$i}->{'general-references'}->{arti
+cles}->{article};
}
if (ref($pidArray) eq "ARRAY"){
for $y (@{$tree->{drug}->{$i}->{'general-references'}->{articl
+es}->{article}}){
#if ((ref($y) eq "HASH")){
#print pp($y);
#print "\n";
$pid = $y->{'pubmed-id'};
print "$pid\n";
$pids = $pids . "$y,";
#}
}
}else{
$pid = $pidArray->{'pubmed-id'};
$pids = $pids . "$pid,";
}
$pids =~ s/,$//;
#print $name, $ids, $category, $pids;
#open my $fh, ">>", "$file1.csv" or die "Failed to open file:
+$!";
$csv->print($fh, [ $name, $ids, $category, $pids]);
#close $fh;
}
}
}
print "DONE.";
close $fh;
|