#!/usr/bin/perl # Unicode entity text => Unicode decimal number %lookup = ("Adieresis" => 196, "Aring" => 197, "Ccedilla" => 199, "Eacute" => 201, "Ntilde" => 209, "Odieresis" => 214 ); # A few lines of text to test. # Only elements 4 and 6 should match @source = ("fred", "adieresis", "Adieresis", "&adieresis;", "&Adieresis;", "", "fr&Adieresis;ed" ); foreach (@source) { # regexp: [^;]+? matches 1+ characters which are not a semi-colon # ([^;]+?) Remember it (in $1) # &([^;]+?);? basically matches a (pseudo) entity s/&([^;]+);?/"&#".eval(exists $lookup{\1} ? $lookup{\1} : \1).";"/e; print "($1) $_\n"; } #### () fred () adieresis () Adieresis (adieresis) &#; (Adieresis) &#; (Adieresis) (Adieresis) fr&#;ed