use strict; use warnings; use feature 'state'; use feature 'say'; #use utf8; #refer to node id=11154836 below use autodie; use Encode qw(decode); while (my $clmnVal = <>) { $clmnVal = decode('UTF-8', $clmnVal); chomp $clmnVal; $clmnVal = smblRpr( $clmnVal ); #say unpack('U0H*', $clmnVal); say $clmnVal; } exit; sub smblRpr { my $strng = $_[0]; state %smblCo = ( # code point => HTML description "\N{U+201A}" => "‚", # SINGLE LOW-9 QUOTATION MARK "\N{U+201C}" => "“", # DOUBLE LEFT QUOTATION MARK "\N{U+201D}" => "”", # DOUBLE RIGHT QUOTATION MARK "\N{U+201E}" => "„", # DOUBLE LOW-9 QUOTATION MARK "\N{U+2013}" => "–", # EN DASH "\N{U+2014}" => "—", # EM DASH "\N{U+2018}" => "‘", # LEFT SINGLE QUOTATION MARK "\N{U+2019}" => "’", # RIGHT SINGLE QUOTATION MARK "\N{U+2020}" => "†", # DAGGER "\N{U+2026}" => "…", # HORIZONTAL ELLIPSIS "\N{U+2122}" => "™", # TRADE MARK SIGH "\N{U+00A8}" => "©", # COPYRIGHT SIGN "\N{U+00AE}" => "®", # REGESTERED SIGN "\N{U+00B1}" => "±", # Plus-Minus sign "\N{U+00BC}" => "¼", # Vulgar fraction one quarter "\N{U+00BD}" => "½", # Vulgar fraction one half "\N{U+00BE}" => "¾", # Vulgar fraction three quarter "\N{U+00B0}" => "°", # Degree sign ); state $regex = do{ my @chars = keys %smblCo; local $" = '|'; qr/@chars/; }; return ($strng =~ s/($regex)/$smblCo{$1}/gr); }