#!/usr/bin/env perl # for https://perlmonks.org/?node_id=1221711 use strict; use warnings; select(STDERR); $| = 1; select(STDOUT); $| = 1; srand(1234); # read a text or a dictionary # find each word and filter out rubbish (numbers, initials, etc) # enter the words in a hash keyed on their last N chars # - an attempt to rhyme. # TeX::Hyphen hyphenates the words so it's better but misses # out a lot of words. # create a pattern for the sentence, (0,1,0,0,1,0) # meaning: pick random word for first word, # second word change the ending, (1) so pick up # a random word with different ending. # 3rd word continue with same ending as previous (0) # etc. my $num_sentences = 100; #my $dict_filename = '/usr/share/dict/words'; #my $dict_filename = './words'; #my $dict_filename = './dict.txt'; #my $dict_filename = './KingJamesBible.txt'; my $dict_filename = './ShelleyFrankenstein.txt'; #my $dict_filename = './wordlist'; open(my $dfh, '<', $dict_filename) || die "dict file: $!"; my %words_keyed_on_ending = (); my ($aword, $aline); my $progress = 1; print "reading dict: "; while( $aline = <$dfh> ){ chomp($aline); $aline =~ s/[0-9]+\:[0-9]*//g; foreach my $aword (split/\W+/, $aline){ if( $aword =~ /^[0-9A-Z\.,]+$/ ){ next } my $last_syl = substr($aword, -3); $words_keyed_on_ending{$last_syl} = [] unless exists $words_keyed_on_ending{$last_syl}; push(@{$words_keyed_on_ending{$last_syl}}, $aword); } print "$progress " if ++$progress % 10000 == 0; # if( $progress > 100 ){ last } } print "\n"; my @keys_of_words_keyed_on_ending = keys %words_keyed_on_ending; #for (sort @keys_of_words_keyed_on_ending){ print "$_ : ".join("---", @{$words_keyed_on_ending{$_}})."\n"; } exit(0); for(1..$num_sentences){ print join(" ", @{make_sentence()})."\n"; } exit(0); sub make_sentence { my @pat = (0,1,0,0,1,1,0,0); my $current_ending = $keys_of_words_keyed_on_ending[rand @keys_of_words_keyed_on_ending]; my $bag_of_words = $words_keyed_on_ending{$current_ending}; my $current_word = ucfirst $bag_of_words->[rand @$bag_of_words]; my @sent = ($current_word); #print "current word: $current_word (ending in $current_ending)\n"; #print "bag of words: ".join(", ", @$bag_of_words)."\n"; my $last_word = $current_word; for(1..$#pat){ if( $pat[$_] ){ #print "changing ending to "; # change the ending $current_ending = $keys_of_words_keyed_on_ending[rand @keys_of_words_keyed_on_ending]; $bag_of_words = $words_keyed_on_ending{$current_ending}; $current_word = $bag_of_words->[rand @$bag_of_words]; } else { # use same ending $current_word = $bag_of_words->[rand @$bag_of_words]; } if( $current_word ne $last_word ){ $last_word = $current_word; push(@sent, $current_word); } #print "current word: $current_word (ending in $current_ending)\n"; #print "bag of words: ".join(", ", @$bag_of_words)."\n"; } #print "num words: ".scalar(@sent)."\n"; return \@sent; }