The "cleaner" way is to find in what text() elements and at what positions inside them you need to insert the tags, then replace each text() element with the text before the position, the tag, and the text after the position:
#!/usr/bin/perl use warnings; use strict; use feature qw{ say }; use XML::LibXML; use List::Util qw{ sum }; sub insert_tag { my ($text, $pos, $tag_name, $query) = @_; my $before = substr $text, 0, $pos; my $after = substr $text, $pos; my $parent = $text->parentNode; $parent->insertBefore('XML::LibXML::Text'->new($before), $text); $parent->insertAfter('XML::LibXML::Text'->new($after), $text); my $tag = 'XML::LibXML::Element'->new($tag_name); $parent->replaceChild($tag, $text); $tag->{query} = $query; } my $xml = "<foo>The quick br<bar>o</bar>wn <baz>f<bar>o</bar>x</baz>"; $xml .= " jumps over the lazy d<bar>o</bar>g.</foo>"; my $new_element = "canid"; my @queried = ("lazy dog", "quick brown fox",); my $dom = 'XML::LibXML'->load_xml(string => $xml); for my $query (@queried) { my @texts = $dom->findnodes('//text()'); my ($from, $to) = (0, 0); my $found; OUTER: while ($to <= $#texts) { my $subtext = join "", @texts[ $from .. $to ]; for my $length (1 .. length $query) { my $subquery = substr $query, 0, $length; $found = index $subtext, $subquery; ++$to, next OUTER if -1 != $found && $length < length $query && $length == length($subtext) - $found; $to = ++$from, next OUTER if -1 == $found; } my $subtext_length = sum(map length, @texts[ $from .. $to ]); my $last_pos = length($texts[$to]) - ($subtext_length - $found + - length $query); insert_tag($texts[$to], $last_pos, 'end', $query); insert_tag($texts[$from], $found, 'start', $query); last OUTER; } } print $dom;
($q=q:Sq=~/;[c](.)(.)/;chr(-||-|5+lengthSq)`"S|oS2"`map{chr |+ord }map{substrSq`S_+|`|}3E|-|`7**2-3:)=~y+S|`+$1,++print+eval$q,q,a,
In reply to Re: LiBXML: New markup while preserving earlier tags?
by choroba
in thread LiBXML: New markup while preserving earlier tags?
by Samantabhadra
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |