Thanks for taking this question farther. The list of non-fixes for File::Slurp made me willing to try Path::Tiny. Where it ended up is having the routines that get english and russian captions completely analogous to each other:
sub get_eng_text { use 5.010; use HTML::FromText; use File::Slurp; use Path::Tiny; use utf8; ### revision for better utf8 encodings 7/18 # using Path::Tiny instead of deprecated File::Slurp # now analagous to get_rus_text my $rvars = shift; my %vars = %$rvars; my %content; my $refc = \%content; opendir my $eh, $vars{"eng_captions"} or warn "no eng captions $!\n"; while (defined ($_ = readdir($eh))){ next if m/~$/; next if -d; if (m/txt$/){ my $file = path($vars{"eng_captions"},$_); my $guts = $file->slurp_utf8; my $temp = text2html( $guts, urls => 1, email => 1, paras => 1, ); # surround by divs my $oitop = read_file($vars{"oitop"}); my $oibottom = read_file($vars{"oibottom"}); my $text = $oitop.$temp.$oibottom; #say "text is $text"; $content{$_} = $text; } } closedir $eh; #important to sort my @return; foreach my $key (sort keys %content) { print $content{$key} . "\n"; push @return, $content{$key}; } return \@return; } sub get_rus_text { use 5.010; use HTML::FromText; use File::Slurp; use Path::Tiny; use utf8; ### revision for better russian use 7/18 # run cyrillic through HTML::FromText # using Path::Tiny instead of deprecated File::Slurp # use utf8 allows use of cyrillic from within this coding unit my $rvars = shift; my %vars = %$rvars; my %content; my $refc = \%content; opendir my $eh, $vars{"rus_captions"} or warn "no rus captions $!\n"; while (defined ($_ = readdir($eh))){ next if m/~$/; next if -d; if (m/txt$/){ my $file = path($vars{"rus_captions"},$_); my $guts = $file->slurp_utf8; my $temp = text2html( $guts, urls => 1, email => 1, paras => 1, ); # surround by divs my $oitop = read_file($vars{"oitop"}); my $oibottom = read_file($vars{"oibottom"}); my $text = $oitop.$temp.$oibottom; #say "text is $text"; $content{$_} = $text; } } closedir $eh; #important to sort my @return; foreach my $key (sort keys %content) { print $content{$key} . "\n"; push @return, $content{$key}; } return \@return; }
I tried to combine these two into one function and call it slightly differently, but I didn't succeed on the first try. Sometimes I just have to go with what I've got and call the template good enough for now.
In reply to Re^2: dealing with cyrillic characters (perlunitut)
by Aldebaran
in thread dealing with cyrillic characters
by Aldebaran
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |