## ... %tags as defined in the OP ... my $tagcnt = scalar keys %tags; # let's be sure about how many tags there are warn scalar localtime(), $/; # initial time mark my $prev_codepoint = 0; my @outrec; my $ndone = 0; while (<>) { next if /^#/; # skip comments chomp; my ($codepoint, $tag, $content) = split /\t/; $codepoint =~ s/^U\+//; # replace U+ with 0x $codepoint = hex $codepoint; # treat 0x number as hex, convert to dec if ( $codepoint != $prev_codepoint ) { printrec( $prev_codepoint, \@outrec ) if ( $prev_codepoint ); @outrec = map { '' } (1..$tagcnt); $prev_codepoint = $codepoint; } $outrec[$tags{$tag}] = $content; # ongoing time-stamping: warn "$ndone done at ".scalar localtime()."\n" if ( ++$ndone % 1000 == 0 ); } printrec( $prev_codepoint, \@outrec ); warn scalar localtime(), $/; # final timestamp sub printrec { my ( $cp, $rec ) = @_; my $s = join( "\t", $cp, @$rec ) . "\n"; $s =~ s/([\x{10000}-\x{1FFFFF}])/'\x{'.(sprintf '%X', ord $1).'}'/ge; print $s; };