The code works by checking each word in the string, and seeing if it could be the start of a match. In cases where the same word could start several matches, the longer matches are tried first. When a match is found, the link is inserted, and the search continues at the next word after the new link.
Enjoy!
#!/usr/local/bin/perl -w use strict; my %table = ( "lines_of_text" => "foo.html", "this" => "bar.html", "its_full" => "foobar.html", "this_thing" => "baz.html", ); # create a lookup table, based on the first words of each key my %lookup; while (my($key, $val) = each %table) { # clean lookup keys, just in case $key =~ tr/A-Za-z_//cd; $key =~ tr/_//s; my($first) = split /_/, $key; push @{$lookup{$first}}, [$key, $val]; } # sort each lookup array by length of matching text while (my($first, $aref) = each %lookup) { $lookup{$first} = [ sort {length $b->[0] <=> length $a->[0]} @{$lookup{$first}} ]; } my $string; { local $/; $string = <DATA>; } $string =~ /^\s*/g; # Here's where the fun starts! my $begin = pos($string); # for each word, see if it's the start of any of the matching texts FIRST: while ($string =~ /(\S+)(\s*)/g) { my $end = pos($string) - length $2; my $first = lc $1; $first =~ tr/A-Za-z//cd; my $matches = $lookup{$first}; next unless $matches; # for each possible matching text, see if a match occurs MATCHES: for my $m (0 .. $#$matches) { my $match = $matches->[$m]; my $words = $first; my $space = 0; # get the appropriate number of next words for (1 .. $match->[0] =~ tr/_//) { last unless $string =~ /(\S+)(\s*)/g; $words .= "_$1"; $space = length $2; } if ($words eq $match->[0]) { # match found: put a link around the text my $text = substr($string, $begin, pos($string) - $begin - + $space); my $link = qq{<A HREF="$match->[1]">$text</A>}; substr($string, $begin, pos($string) - $begin - $space) = +$link; pos($string) = $begin + length $link; # remove this match, so only the first occurence will be l +inked splice(@$matches, $m, 1); next FIRST; } pos($string) = $end; } } continue { $begin = pos($string); } print $string; __DATA__ This is just lines of text here, and also there. Consider this human readable text; it's full of letters and punctuation.
In reply to Re: Mass Text Replacement
by chipmunk
in thread Mass Text Replacement
by tedv
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |