#!/usr/bin/perl -CS use strict; open(DB,"UnicodeData.txt") or die "UnicodeData.txt: $!"; my @lines = grep /^1E[A-F].;/, ; close DB; # load the decomp hash: keys are unicode Vietnamese, # values are two-character decompositions my %decomp; for ( @lines ) { my ($u,$d) = (split /;/, $_ )[0,5]; my $uc = chr( hex( $u )); my $cc = join '', map { chr( hex( $_ )) } split / /,$d; $decomp{$uc} = $cc; } my $todecomp = join '', keys %decomp; # now apply decomposition to data: while (<>) { s/([$todecomp])/$decomp{$1}/g; print; }