#!/usr/bin/perl -CSDA
use Modern::Perl;
use Data::Dump qw{dd};
use Path::Tiny;
# read
my @essay = path('./essay.txt')->lines_utf8({chomp => 1});
print "\n\n\nessay is: "; dd(\@essay);
my %stats = (
Sort_alphabetically_ignoring_capitalization => [
sort { lc $a cmp lc $b }
@essay
],
#2. Sort alphabetically with upper case words just in front of low
+er case words with the same initial characters.
# --- needs sophisticated algorithm to extract "same ininitial cha
+racters" first...
# I don't code that
#3. Sort by frequency, from high to low, (any order for equal freq
+uency).
# frequency of words or lines? if words then:
Sort_by_fequency => do {
# word count
my %wc;
$wc{$_}++ for # 3. and feed these words to
+ "for" loop
map { (split /\s+/, $_) } # 2. each line becomes stream of
+ words
@essay; # 1. lines from file
# return array ref... (read from botton -- take words, sort co
+mparing counts,
# if counts are same then compare words, make pairs word -- wo
+rd count
[
map { [$_, $wc{$_}] }
sort { $wc{$b} <=> $wc{$a} or $a cmp $b }
keys %wc
];
},
#4. Sort by frequency, with alphabetical order for words with the
+same frequency.
# same as above
);
print "\n\n\nstats are: "; dd(\%stats);
essay is: [
"Lorem ipsum dolor sit amet, eos ei nihil feugait, ius id sonet volu
+mus molestiae, no nonumes vivendo nam. Mea diam",
"putant te. Volumus euripidis instructior id pro, et accusata instru
+ctior quo. Et sed facete alienum, duo cu audire",
"expetendis. Pro nibh nostrum efficiendi te.",
"",
"Unum viderer mnesarchum eos no, dico liberavisse ius eu, ad dicant
+aliquid partiendo sed. Mea no vivendo persecuti",
"abhorreant. Enim possim mei ut, nibh noluisse delectus ei his. Atqu
+i convenire vituperatoribus his at, ut meliore",
"senserit usu. Idque verear latine mel id, everti latine et qui, in
+alia erat vix. Ex his accusata elaboraret, quem illud",
"in eam.",
"",
]
stats are: {
Sort_alphabetically_ignoring_capitalization => [
"",
"",
"abhorreant. Enim possim mei ut, nibh noluisse delectus ei his. At
+qui convenire vituperatoribus his at, ut meliore",
"expetendis. Pro nibh nostrum efficiendi te.",
"in eam.",
"Lorem ipsum dolor sit amet, eos ei nihil feugait, ius id sonet vo
+lumus molestiae, no nonumes vivendo nam. Mea diam",
"putant te. Volumus euripidis instructior id pro, et accusata inst
+ructior quo. Et sed facete alienum, duo cu audire",
"senserit usu. Idque verear latine mel id, everti latine et qui, i
+n alia erat vix. Ex his accusata elaboraret, quem illud",
"Unum viderer mnesarchum eos no, dico liberavisse ius eu, ad dican
+t aliquid partiendo sed. Mea no vivendo persecuti",
],
Sort_by_fequency => [
["Mea", 2],
["accusata", 2],
["ei", 2],
["eos", 2],
["et", 2],
["his", 2],
["id", 2],
["in", 2],
["instructior", 2],
["ius", 2],
["latine", 2],
["nibh", 2],
.......
["ut,", 1],
["verear", 1],
["viderer", 1],
["vituperatoribus", 1],
["vix.", 1],
["volumus", 1],
],
}