#!/usr/bin/perl use strict; use warnings; use XML::LibXML; my $public_id = "-//NLM//DTD eSummaryResult, 29 October 2004//EN"; my $system_id = "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eSummary_041029.dtd"; my $dtd = XML::LibXML::Dtd->new($public_id, $system_id); my $filename='/g/Washu_PopGen/test_gi_docsumms_delVer4.xml'; my $parser = XML::LibXML->new(); my $doc = $parser->parse_file($filename); my $outfile ='/g/Washu_PopGen/test_gi_taxid_table.txt'; $doc ->validate($dtd); open(OUTFILE,">",$outfile); print OUTFILE join("t", qw(Id TaxId Length Status ReplacedBy))."\n"; foreach my $DocSum ($doc->findnodes('/eSummaryResult/DocSum')) { my($Id) = $DocSum->findnodes('./Id'); print OUTFILE $Id->to_literal, "\t"; my($TaxId) = $DocSum->findnodes('./TaxId'); print OUTFILE $TaxId->to_literal, "\t"; my($Length) = $DocSum->findnodes('./Length'); print OUTFILE $Length->to_literal, "\t"; my($Status) = $DocSum->findnodes('./Status'); print OUTFILE $Status->to_literal, "\t"; my($ReplacedBy) = $DocSum->findnodes('./ReplacedBy'); print OUTFILE $ReplacedBy->to_literal, "\n"; }