#createMarkup creates markup for dates and numbers in the given text, #e.g. 'On Oct. 21, the Dow Jones rose to 10043 points' should become #'On Oct. 21, the Dow Jones rose to 10043 points sub createMarkup { my ($text, $doc) = @_; #create parent node my $node = new XML::DOM::Element ($doc, 'mytxt'); #markup dates my $textNode = $node->addText ($text); markupElement ($textNode, $node, \&findDate, 'date'); #markup numbers foreach my $child ($node->getChildNodes ()) { next unless $child->isTextNode (); my $frag = $child->getNodeValue (); markupElement ($child, $node, \&findNumber, 'number'); } return $node; } sub markupElement { my ($textNode, $parent, $rFindFunc, $elemName) = @_; my $doc = $parent->getOwnerDocument (); die unless $textNode->isTextNode (); my $nextNode = $textNode->getNextSibling (); my $text = $textNode->getValue (); while (my ($before, $elem, $after) = &$rFindFunc ($text)) { $textNode->setValue ($before); my $elemNode = new XML::DOM::Element ($doc, $elemName); $elemNode->setValue ($elem); $parent->insertBefore ($elemNode, $nextNode); $textNode = $doc->createTextNode ($after); $parent->insertBefore ($textNode, $nextNode); $text = $after; } }