and an example input file:#!/usr/bin/perl use strict; use File::Basename; use XML::Twig; binmode(STDOUT, ":utf8"); my ($inFile)=@ARGV; unless (open ("INFILE", "<:utf8", "$inFile")){ die "$inFile: No such file or directory."; } my $t= XML::Twig->new( twig_handlers => { "identifier" => \§ion, "record" => sub { $_[0]->flush; } }, ); $t->set_pretty_print("nice"); $t->set_keep_encoding; $t->parsefile($inFile); exit 0; sub section { my( $t, $elt)= @_; my $elt_txt = $elt->text; my $new_elt; if ($elt_txt =~ /^http:\/\/arxiv.org/) { $new_elt = XML::Twig::Elt->new( $elt->tag . ".url" => $elt_txt +); } elsif ( $elt_txt =~ /^doi:/ ) { $new_elt = XML::Twig::Elt->new( $elt->tag . '.doi' => $elt_txt +); } $elt->replace_with($new_elt) if $new_elt; }
<?xml version="1.0" encoding="UTF-8"?> <harvest> <record> <header> <datestamp>2005-09-18</datestamp> <setSpec>cs</setSpec> </header> <metadata> <title>Memory-Based Lexical Acquisition and Processing</title> <creator>Daelemans, Walter</creator> <subject>Computation & Language</subject> <subject>Computer Science - Computation & Language</subject> <description>Comment: 18 pages</description> <date>1994-05-16</date> <type>text</type> <identifier>http://arxiv.org/abs/cmp-lg/9405018</identifier> <identifier>Steffens (ed.) Machine Translation & Lexion. Springer +, 1995</identifier> </metadata> </record> </harvest>
In reply to XML::Twig::flush() and html/xml entities by mandarin
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |