#!/usr/bin/perl use locale; use POSIX(locale_h); # query and save the old locale $old_locale = setlocale(LC_CTYPE); # set locale (doesn't work) setlocale(LC_CTYPE, "ISO8859"); print "Input word: "; $word = ; chomp($word); ... $fileNo = 1; # HTML filenames have already been converted to numbers (eg., 1.html etc) while ($fileNo <= 610) { open FH, "K:\\$fileNo.html" or die "Can't open: $!"; open OUT, ">>K:\\sentences.txt"; # print OUT "$fileNo.html\n"; # print "$fileNo.html\n"; $/ = "."; while () { ... # if (/(^|\. ])([a-z0-9\,\:\;\"\' ]* $word[a-z0-9\,\:\;\"\'\. ]*)$/i) { # this one doesn't work! if (/(^|\. ])([a-zαινσϊρό0-9\,\:\;\"\' ]* $word[a-zαινσϊρό0-9\,\:\;\"\'\. ]*)$/i) { # so I've had to do this instead! print OUT "\n$fileNo.html: $2\n"; print "\n$fileNo.html: $2\n"; } } close OUT; close FH; $fileNo++; }