sub get_eng_text {
use 5.010;
use HTML::FromText;
use File::Slurp;
use Path::Tiny;
use utf8;
### revision for better utf8 encodings 7/18
# using Path::Tiny instead of deprecated File::Slurp
# now analagous to get_rus_text
my $rvars = shift;
my %vars = %$rvars;
my %content;
my $refc = \%content;
opendir my $eh, $vars{"eng_captions"} or warn "no eng captions $!\n";
while (defined ($_ = readdir($eh))){
next if m/~$/;
next if -d;
if (m/txt$/){
my $file = path($vars{"eng_captions"},$_);
my $guts = $file->slurp_utf8;
my $temp = text2html(
$guts,
urls => 1,
email => 1,
paras => 1,
);
# surround by divs
my $oitop = read_file($vars{"oitop"});
my $oibottom = read_file($vars{"oibottom"});
my $text = $oitop.$temp.$oibottom;
#say "text is $text";
$content{$_} = $text;
}
}
closedir $eh;
#important to sort
my @return;
foreach my $key (sort keys %content) {
print $content{$key} . "\n";
push @return, $content{$key};
}
return \@return;
}
sub get_rus_text {
use 5.010;
use HTML::FromText;
use File::Slurp;
use Path::Tiny;
use utf8;
### revision for better russian use 7/18
# run cyrillic through HTML::FromText
# using Path::Tiny instead of deprecated File::Slurp
# use utf8 allows use of cyrillic from within this coding unit
my $rvars = shift;
my %vars = %$rvars;
my %content;
my $refc = \%content;
opendir my $eh, $vars{"rus_captions"} or warn "no rus captions $!\n";
while (defined ($_ = readdir($eh))){
next if m/~$/;
next if -d;
if (m/txt$/){
my $file = path($vars{"rus_captions"},$_);
my $guts = $file->slurp_utf8;
my $temp = text2html(
$guts,
urls => 1,
email => 1,
paras => 1,
);
# surround by divs
my $oitop = read_file($vars{"oitop"});
my $oibottom = read_file($vars{"oibottom"});
my $text = $oitop.$temp.$oibottom;
#say "text is $text";
$content{$_} = $text;
}
}
closedir $eh;
#important to sort
my @return;
foreach my $key (sort keys %content) {
print $content{$key} . "\n";
push @return, $content{$key};
}
return \@return;
}