sub get_eng_text { use 5.010; use HTML::FromText; use File::Slurp; use Path::Tiny; use utf8; ### revision for better utf8 encodings 7/18 # using Path::Tiny instead of deprecated File::Slurp # now analagous to get_rus_text my $rvars = shift; my %vars = %$rvars; my %content; my $refc = \%content; opendir my $eh, $vars{"eng_captions"} or warn "no eng captions $!\n"; while (defined ($_ = readdir($eh))){ next if m/~$/; next if -d; if (m/txt$/){ my $file = path($vars{"eng_captions"},$_); my $guts = $file->slurp_utf8; my $temp = text2html( $guts, urls => 1, email => 1, paras => 1, ); # surround by divs my $oitop = read_file($vars{"oitop"}); my $oibottom = read_file($vars{"oibottom"}); my $text = $oitop.$temp.$oibottom; #say "text is $text"; $content{$_} = $text; } } closedir $eh; #important to sort my @return; foreach my $key (sort keys %content) { print $content{$key} . "\n"; push @return, $content{$key}; } return \@return; } sub get_rus_text { use 5.010; use HTML::FromText; use File::Slurp; use Path::Tiny; use utf8; ### revision for better russian use 7/18 # run cyrillic through HTML::FromText # using Path::Tiny instead of deprecated File::Slurp # use utf8 allows use of cyrillic from within this coding unit my $rvars = shift; my %vars = %$rvars; my %content; my $refc = \%content; opendir my $eh, $vars{"rus_captions"} or warn "no rus captions $!\n"; while (defined ($_ = readdir($eh))){ next if m/~$/; next if -d; if (m/txt$/){ my $file = path($vars{"rus_captions"},$_); my $guts = $file->slurp_utf8; my $temp = text2html( $guts, urls => 1, email => 1, paras => 1, ); # surround by divs my $oitop = read_file($vars{"oitop"}); my $oibottom = read_file($vars{"oibottom"}); my $text = $oitop.$temp.$oibottom; #say "text is $text"; $content{$_} = $text; } } closedir $eh; #important to sort my @return; foreach my $key (sort keys %content) { print $content{$key} . "\n"; push @return, $content{$key}; } return \@return; }