#!/usr/bin/perl
use strict;
use warnings;
my $tag;
my $output;
my $fh;
while (<DATA>)
{
chomp;
s/\^[A-Z]//g;
s/[\cA-\cZ]/ /g;
s/\s*$//g;
s/\s+/ /g;
s/<(.+)>\n/<$1>/g;
s/<\/(\w+)>/<\/$1>\n/g; # Add the new line at the end of each
+ tag
s/<.+><.+>//g; # Delete the empty tags
if(/^<(\w+)>$/)
{
$fh = xml_output($output, $tag, $fh);
$output = "";
$tag = $1;
}
else
{ # not a {TAG} line
next unless($tag);
next if(/^\s*$/);
next if(/^$/);
chomp;
$output .= ($output) ? " $_" : "<$tag>$_";
}
} # End of While loop
$fh = xml_output($output, $tag, $fh);
sub xml_output
{
my ($output, $tag, $fh) = @_;
if($output) {
if($output =~ m/<ID>(.*)<\/ID>/) # Store the value of StoryId
+for the filename
{
if($fh) {
close($fh);
}
open($fh, '>', "$1.xml") or die "$1.xml: $!";
print $fh "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
$output =~ s/<(.+)>\n/<$1>/g;
print $fh "$output\n";
}
}
return($fh);
} # End of sub srout
__DATA__
<Root>^M
<ID>3592467</ID>^M
<P>World War II vets need Social Security</P>^M
<type>LETTERS</type> <word>FINANCE</word> ^M
<Text>
The majority of World War II vets, which includes me, who are still li
+ving are on Social Security
</Text>^M
</Root>^M
I have a script above which writes into a file based on <ID> tag value.
In the file the tags are not formated. How to format the text. |