use utf8; open(READ,"test.txt"); @lines = ; close(READ); foreach (@lines) { $_ =~ s/(\p{Han}+?)\((\p{Hiragana}+?)\)/\\ruby{\1}{\2}/g; print; } # force file to save as unicode: 日本語