Thanks to kind advice given by various nice folks around here I've been working on a different approach. This seems pretty fast, but is it fast enough me wonders. This version only deals with standard < > tags.
#!/usr/bin/perl; use Modern::Perl; use Time::HiRes qw( gettimeofday tv_interval ); my $qd; $qd->{'a'} = 'b'; $qd->{'b'} = 'c'; $qd->{'c'} = 21; $qd->{'action'} = "default"; $qd->{'lang'} = "en"; my $knowncmds = "(qd|double|use)"; say aXML_eval(qq@<html lang="<qd a="b">lang</qd>"> <head> <title><qd a="b" b="c">action</qd></title> </head> <body> orphan data 1 <double><qd><qd><qd>a</qd></qd></qd></double> <use>actions/<qd>action</qd>/body.aXML</use> orphan data 2 </body></html>@ ); sub aXML_qd { $qd->{$_[1]}; } sub aXML_double { $_[1]*2; } sub aXML_use { "opened file :: $_[1]"; } sub aXML_eval { my $aXML = shift; $aXML =~ s@</@{endtag}@gs; $aXML =~ s@/@{bs}@gs; $aXML =~ s@{endtag}@</@gs; $aXML =~ s@:@{colon}@gs; $aXML =~ s@;@{semicolon}@gs; my @chars = split //, $aXML; my $level = 0; my $dropping_level; my $dropping = 0; my $dropnext = 0; my @finalletters; foreach my $char (@chars) { if ($dropnext) { $dropping = 1; $dropnext = 0; } if ($char eq "<") { $level++; } if ($char eq ">") { $level--; } if ($char eq "/") { unless($dropping) { $dropping_level = $level; $dropnext = 1; } } if ($dropping) { if ($level < $dropping_level) { $dropping = 0; } + } push (@finalletters,$char) unless $dropping; } $aXML = join('',@finalletters); while ($aXML =~ m@<([^<>]+?)>(.*?)</>@gs) { my $original_tag = $1; my $original_data = $2; my $original_string = "<$1>$2</>"; my $replace_string; if ($original_tag =~ m@([^\s]+?)\s(.*)@gs) { my $tag_name = $1; my $tag_attr = $2; $tag_attr =~ s@(.*?)="(.*?)"@ $1 =` '$2',@gs; chop($tag_attr); $replace_string = ";$tag_name:{$tag_attr },'$original_data':;" +; } else { $replace_string = ";$original_tag:{},'$original_data':;"; } $aXML =~ s@$original_string@$replace_string@gs; } $aXML =~ s@:;@\)\]@gs; $aXML =~ s@;([^:;]+?):@\[$1\(@gs; $aXML =~ s@`@>@gs; $aXML =~ s@{bs}@/@gs; $aXML =~ s@{colon}@:@gs; $aXML =~ s@{semicolon}@;@gs; say "save this to disc...\n"; say "$aXML\n\n\n"; say "then process to get this...\n\n"; my $start = [ gettimeofday ]; while ($aXML =~ m@\[([^\[\]]*?)\]@gs) { my $tag = $1; my $original_string = "\[$1\]"; my $replace_string = ""; if ($tag =~ m@(.*)\(\{(.*?)\},'(.*)'\)@gs) { my $cmd = $1; my $args = $2; my $data = $3; if ($cmd =~ m@$knowncmds@s) { eval("\$replace_string = aXML_$tag;"); } else { my $new_args; if ($args =~ m@\S@g) { my $arg_string = "\$temp_arg_hash = \{ $args \}"; my $temp_arg_hash; eval ($arg_string); while (my ($key,$val) = each(%$temp_arg_hash) ) { $new_args .= " $key=\"$val\""; } $args = $new_args; } $replace_string = "<$cmd$args>$data</$cmd>"; } } $original_string =~ s@\(@\\\(@gs; $original_string =~ s@\)@\\\)@gs; $original_string =~ s@\[@\\\[@gs; $original_string =~ s@\]@\\\]@gs; $original_string =~ s@\{@\\\{@gs; $original_string =~ s@\}@\\\}@gs; $aXML =~ s@$original_string@$replace_string@gs; } my $end = [ gettimeofday ]; my $total_elapsed = tv_interval($start,$end); $aXML .= "\n\nprocessing took : $total_elapsed seconds"; return $aXML; }
In reply to Fast enough yet? by Logicus
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |