#! perl use Unicode::Normalize; # for the NFKD function use strict; use warnings; my ($FOO, $BAR, $BAZ) = qw(FOO BAR BAZ); # uses system default encoding for INFILE; say # '<:encoding(iso-8859-1)' to explicitly use iso-latin-1 open(INFILE, '<', 'test1.txt'); while () { chomp; my ($category, $fornom, $surnom, $pass, @rest) = split; die "Extra crud at the end of the line: @rest" if (@rest); my ($squashed_fornom) = NFKD($fornom) # NFKD separates accented letters # into letters + combining mark $squashed_fornom =~ s/\pM//g; # remove marks $squashed_fornom = lc($squashed_fornom); # lowercase my ($squashed_surnom) = NFKD($surnom); $squashed_surnom =~ s/\pM//g; $squashed_surnom = lc($squashed_surnom); print "$squashed_fornom $squashed_surnom"; print "|$pass|$fornom $surnom|$FOO|$BAR|$BAZ|$FOO $fornom $surnom\n"; }