#! # Read Greek file (WE_EX.txt) saved in UNICODE (UTF-8) format # and divide it into words and then into individual characters; # then test to see whether the character is a vowel or not. # If yes, augment counter; if no, go to next character. # Write the vowel to output file (WE_EX.out) # use strict; use warnings; use Encode; use feature 'unicode_strings'; use utf8; # # open my $IN, '<:encoding(UTF-8)', "WE_EX.txt" or die "Can't open file WE_EX.txt for reading: $!"; # open my $OUT, ">WE_EX2.out" or die "Can't open file WE_EX2.out for writing: $!"; # # # SUBROUTINE: IS_VOWEL # This subroutine checks to see whether a unicode Greek # character/code point is a vowel or not. # If it is, it returns the vowel. If not, it returns 0, FALSE. # sub is_vowel { utf8::encode($_[0]); if ($_[0] =~ /\X{1F00-1FE3}/ || # Hex code points: ExtendedGreek $_[0] =~ /X{1FE6-1FFE}/ || $_[0] =~ /X{0386-038F}/ || # Hex code points: GreekAndCoptic $_[0] =~ /X{0390}/ || $_[0] =~ /X{0391}/ || $_[0] =~ /X{0395}/ || $_[0] =~ /X{0397}/ || $_[0] =~ /X{0399}/ || $_[0] =~ /X{039F}/ || $_[0] =~ /X{03A5}/ || $_[0] =~ /X{03A9}/ || $_[0] =~ /X{03AA-03B1}/ || $_[0] =~ /X{03B5}/ || $_[0] =~ /X{03B7}/ || $_[0] =~ /X{03B9}/ || $_[0] =~ /X{03BF}/ || $_[0] =~ /X{03C5}/ || $_[0] =~ /X{03C9-03CE}/) { return $_[0]; } else { return 0; } } # # # MAIN PROGRAM # my (@words, $char, $vowel); while (<$IN>) { # Read Greek Unicode @words = split /[\W]/, ; # Divide into words foreach (@words) { # For each word print $OUT (encode ('UTF-8', $_)) . "\n"; # Write output my $count = 0; # Count vowels my $end = length($_); # Get word length for (my $i = 0; $i < $end; $i++) { # Inspect each char $char = substr($_, $i, 1); $vowel = &is_vowel($char); $count += 1 if ($vowel); print $OUT (encode ('UTF-8', $vowel)) . "\n"; # Write out } print $OUT "The number of vowels is: $count.\n"; } } close $IN; close $OUT;