You don't need to use CDATA section as Base64 encoding does not use '<' or '&'
Darn! I don't know what to say, especially as I am not able to reproduce the bug. What versions of perl, XML::Parser and expat are you using? On which OS?
The code below works just fine for me (I actually get a single call for each long element).
#!/usr/bin/perl
use strict;
use warnings;
use XML::Parser;
my $size= 500000;
my @base64_chars=('a'..'z','A'..'Z','0'..'9','+','/','=');
my $string= join( '', map { $base64_chars[rand(@base64_chars)] }(1..$s
+ize));
my $doc= qq{<doc>
<elt>foo</elt>
<long1>$string</long1>
<long2>$string</long2>
<elt>bar</elt>
</doc>};
my $p= XML::Parser->new( Handlers => { Char => \&char, });
$p->parse( $doc);
exit;
sub char
{ my( $expat, $char)= @_;
print "in ", $expat->current_element, " - ",
"length char: ", length( $char), " - ",
"length recognized: ", length( $expat->recognized_string), "
+ - ",
"length original: ", length( $expat->original_string), "\n",
;
}
I also tried getting the data from a file, and including "\n" to get several calls to the Char handler, and it all worked nicely:
#!/usr/bin/perl
use strict;
use warnings;
use XML::Parser;
use Fatal qw(open);
my $line_size= 100000;
my $nb_lines= 5;
my @base64_chars=('a'..'z','A'..'Z','0'..'9','+','/','=');
my $line= join( '', map { $base64_chars[rand(@base64_chars)] }(1..$lin
+e_size));
$line.= "\n";
my $string= $line x $nb_lines;
my( $long1_length, $long2_length);
my $doc= qq{<doc><elt>foo</elt><long1>$string</long1><long2>$string</l
+ong2><elt>bar</elt></doc>};
open( my $xml, '>', "$0.xml");
print {$xml} $doc;
close $xml;
my $p= XML::Parser->new( Handlers => { Char => \&char, });
$p->parsefile( "$0.xml");
print "long1: $long1_length\n";
print "long2: $long2_length\n";
exit;
sub char
{ my( $expat, $char)= @_;
print "in ", $expat->current_element, " - ",
"length char: ", length( $char), " - ",
"length recognized: ", length( $expat->recognized_string), "
+ - ",
"length original: ", length( $expat->original_string), "\n",
;
if( $expat->in_element( 'long1')) { $long1_length+= length( $char);
+ }
if( $expat->in_element( 'long2')) { $long2_length+= length( $char);
+ }
}
|