in reply to LiBXML: New markup while preserving earlier tags?
The "cleaner" way is to find in what text() elements and at what positions inside them you need to insert the tags, then replace each text() element with the text before the position, the tag, and the text after the position:
#!/usr/bin/perl use warnings; use strict; use feature qw{ say }; use XML::LibXML; use List::Util qw{ sum }; sub insert_tag { my ($text, $pos, $tag_name, $query) = @_; my $before = substr $text, 0, $pos; my $after = substr $text, $pos; my $parent = $text->parentNode; $parent->insertBefore('XML::LibXML::Text'->new($before), $text); $parent->insertAfter('XML::LibXML::Text'->new($after), $text); my $tag = 'XML::LibXML::Element'->new($tag_name); $parent->replaceChild($tag, $text); $tag->{query} = $query; } my $xml = "<foo>The quick br<bar>o</bar>wn <baz>f<bar>o</bar>x</baz>"; $xml .= " jumps over the lazy d<bar>o</bar>g.</foo>"; my $new_element = "canid"; my @queried = ("lazy dog", "quick brown fox",); my $dom = 'XML::LibXML'->load_xml(string => $xml); for my $query (@queried) { my @texts = $dom->findnodes('//text()'); my ($from, $to) = (0, 0); my $found; OUTER: while ($to <= $#texts) { my $subtext = join "", @texts[ $from .. $to ]; for my $length (1 .. length $query) { my $subquery = substr $query, 0, $length; $found = index $subtext, $subquery; ++$to, next OUTER if -1 != $found && $length < length $query && $length == length($subtext) - $found; $to = ++$from, next OUTER if -1 == $found; } my $subtext_length = sum(map length, @texts[ $from .. $to ]); my $last_pos = length($texts[$to]) - ($subtext_length - $found + - length $query); insert_tag($texts[$to], $last_pos, 'end', $query); insert_tag($texts[$from], $found, 'start', $query); last OUTER; } } print $dom;
($q=q:Sq=~/;[c](.)(.)/;chr(-||-|5+lengthSq)`"S|oS2"`map{chr |+ord }map{substrSq`S_+|`|}3E|-|`7**2-3:)=~y+S|`+$1,++print+eval$q,q,a,
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: LiBXML: New markup while preserving earlier tags?
by Samantabhadra (Acolyte) on Jun 23, 2018 at 18:15 UTC | |
by choroba (Cardinal) on Jun 24, 2018 at 15:21 UTC | |
by Samantabhadra (Acolyte) on Jun 26, 2018 at 17:56 UTC | |
by choroba (Cardinal) on Jun 26, 2018 at 20:23 UTC | |
by Samantabhadra (Acolyte) on Jun 27, 2018 at 09:16 UTC |