Hello: I recently wrote a script that will extract medical data from XML. I've been using XML::DOM. however, upon further testing. I realize that for my needs (this will be an automated script handling upto 7 20k XML documents at one time), that DOM isn't flexible enough.

Upon further research, realize that XML::SAX (or any general event driven XML parser) is probably my best bet for my requirements. However, I've never used SAX before and I have no idea how to begin to change over my code. I I've been pooring over web documents (including an online PDF from the PERL and XML books from O'reilly and New Rider, and I am still lost (or maybe its panic taking hold?)

Here is my original, XML::DOM code

package CathReport; require XML::DOM; use Date::Manip; use POSIX qw(ceil floor); use strict; sub new { my $self = {}; $self->{CASE_HEADER} = undef; $self->{PATIENT} = undef; $self->{STAFF} = undef; $self->{PROC} = undef; $self->{LAB} = undef; $self->{PROC} = undef; $self->{ALDRETE} = undef; $self->{MED} = undef; $self->{CONDITIONS} = undef; bless($self); # but see below return $self; } 1; ## ³ sub CamPatData { my $obj = shift; Date_Init("TZ=EST"); if (@_) { my $file = shift; my $parser = XML::DOM::Parser->new(); my $doc = $parser->parsefile($file); my (@patdemo, @header); my (%staff_set, %labs, %inv, %cond); my(%staff, %p1, %lab, %p2, %diag); my (%aldrete, %notes, %vital, %tools); my (%header); my ($cathno, $t_arrive, $t_end, $t_start, $t_event, $event_nam +e); my %cathevents; my ($ln, $fn, $sex, $dob, $htu, $htn, $wtu, $wtn, $patnum, $pa +tient, $dos, $stat, $proct, $endt); my ($s1, $s2, $s3, $workerbee); my ($chemical, $bc, $bu, $bv); my ($procname, $procsite, @sites, $amounts, $connum, $loc, $pi +n); my ($cdate, $ctime, $csite, $chr1, $chr2); foreach my $cde ($doc->getElementsByTagName('CathDefinedEvents +')) { $event_name = $cde->getAttribute('DefinedEventText'); $t_event = $cde->getElementsByTagName('EventTime')->item(0 +)->getFirstChild->getNodeValue; $cathevents{$event_name} = $t_event; } #Get Patient Data foreach $patient($doc->getElementsByTagName('SA_PATIENT')) { $ln = $patient->getElementsByTagName('MY_LAST_NAME')->item +(0)->getFirstChild->getNodeValue; $fn = $patient->getElementsByTagName('MY_FIRST_NAME')->ite +m(0)->getFirstChild->getNodeValue; $dob = $patient->getElementsByTagName('MY_BIRTH_DATE')->it +em(0)->getFirstChild->getNodeValue; $sex = $patient->getElementsByTagName('MY_GENDER')->item(0 +)->getFirstChild->getNodeValue; $dob = $patient->getElementsByTagName('MY_BIRTH_DATE')->it +em(0)->getFirstChild->getNodeValue; } foreach $patient($doc->getElementsByTagName('CathStudy')) { $htn = $patient->getElementsByTagName('PatHeight')->item(0 +)->getFirstChild->getNodeValue; $htu = $patient->getElementsByTagName('PatHeight')->item(0 +)->getAttribute('Units'); $wtn = $patient->getElementsByTagName('PatWeight')->item(0 +)->getFirstChild->getNodeValue; $wtu = $patient->getElementsByTagName('PatWeight')->item(0 +)->getAttribute('Units'); $patnum = $patient->getElementsByTagName('StudyID')->item( +0)->getFirstChild->getNodeValue; } #Rounding Off $wtn = ceil($wtn); $htn = ceil($htn); my $date = ParseDate($dob); $dob = UnixDate($date,"%m/%d/%Y"); @patdemo = ($ln, $fn, $dob, $sex, $htn, $htu, $wtn, $wtu, $pat +num); foreach $workerbee($doc->getElementsByTagName('CathStaff')) { $s1 = $workerbee->getElementsByTagName('LastName')->item(0 +)->getFirstChild->getNodeValue; $s2 = $workerbee->getElementsByTagName('FirstName')->item( +0)->getFirstChild->getNodeValue; $s3 = $workerbee->getAttribute('Role'); $staff_set{$s3} = "$s2 $s1"; } foreach my $chemical($doc->getElementsByTagName('BloodComposit +ion')) { my $bc = $chemical->getAttribute('BCMeasurement'); my $bv = $chemical->getElementsByTagName('Value')->item(0) +->getFirstChild->getNodeValue; #$bv = floor($bv); $bv = sprintf("%.2f", $bv); $lab{$bc} = "$bv"; } foreach $pin($doc->getElementsByTagName('CathProcedure')) { $procname = $pin->getAttribute('Procedure'); $connum = $pin->getAttribute('ProcNum'); #Check site $inv{$connum} = "$procname"; } foreach my $dictation($doc->getElementsByTagName('CathLogE +vent')) { my $timestamp = $dictation->getElementsByTagName('Even +tTime')->item(0)->getFirstChild->getNodeValue; my $notetext = $dictation->getElementsByTagName('Event +Text')->item(0)->getFirstChild->getNodeValue; $timestamp =~s/T/ /g; my $date = ParseDate($timestamp); #$timestamp = UnixDate($date,"%m/%d/%Y"); + $timestamp = UnixDate($date,"%R"); $notes{$timestamp} = $notetext; } foreach my $oa($doc->getElementsByTagName('CathOA')) { my $oatime = $oa->getElementsByTagName('OAtime')->ite +m(0)->getFirstChild->getNodeValue; my $sysbp = $oa->getElementsByTagName('Systolic')->i +tem(0)->getFirstChild->getNodeValue; my $diabp = $oa->getElementsByTagName('Diastolic')-> +item(0)->getFirstChild->getNodeValue; my $hrt = $oa->getElementsByTagName('HeartRate')->it +em(0)->getFirstChild->getNodeValue; my $sao2 = $oa->getElementsByTagName('SaO2')->item(0 +)->getFirstChild->getNodeValue; my $resp = $oa->getElementsByTagName('RespRate')->it +em(0)->getFirstChild->getNodeValue; #my $oacom = $oa->getElementsByTagName('OAText')->it +em(0)->getFirstChild->getNodeValue; my $oacom = ""; $oatime =~s/T/ /g; $oatime = UnixDate($oatime,"%R"); $vital{$oatime} = [$sysbp, $diabp, $hrt, $sao2, $res +p,$oacom]; } ###Aldrete foreach my $ald ($doc->getElementsByTagName('CathAldrete') +) { my $aldtype = $ald->getAttribute('AldreteType'); my $aldact = $ald->getElementsByTagName('Activity' +)->item(0)->getFirstChild->getNodeValue; my $aldresp = $ald->getElementsByTagName('Respirat +ion')->item(0)->getFirstChild->getNodeValue; my $aldcirc = $ald->getElementsByTagName('Circulat +ion')->item(0)->getFirstChild->getNodeValue; my $aldloc = $ald->getElementsByTagName('LOC')->it +em(0)->getFirstChild->getNodeValue; my $aldcolor = $ald->getElementsByTagName('Color') +->item(0)->getFirstChild->getNodeValue; my $aldtot = $ald->getElementsByTagName('TotalScor +e')->item(0)->getFirstChild->getNodeValue; $aldrete{$aldtype} = [$aldact, $aldresp, $aldloc, +$aldcolor, $aldtot, $aldcirc]; } #$obj->{CONDITIONS} = \%cond; $obj->{PATIENT} = \@patdemo; $obj->{STAFF} = \%staff_set; $obj->{LAB} = \%lab; $obj->{PROC} = \%inv; $obj->{NOTES} = \%notes; $obj->{ONASS} = \%vital; $obj->{ALDRETE} = \%aldrete; } return $obj; }

Any ideas would appreciate

Cappadonna

Readmore tags added by GrandFather


In reply to Rewriting XML::DOM based module as XML::SAX by Cappadonna3030

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post, it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.