comment on

0: #!/usr/bin/perl
1: # This script was inspired by the interesting book <i>What is Random?</i>, by
2: # Edward Beltrami.  (As well as [Be a Monkey!] by [KM]). Basically, the idea is 
3: # to randomly generate text one character at the time. The probability of a
4: # letter being picked is based on the frequency it occurs after the preceding 
5: # letters. (u is more likely to come after q, h is more likely to come after t,
6: # and so on.) The result is text that is random yet surprisingly familiar.
7: #
8: # The script works by processing a sample text file. It stores combinations of
9: # four letters in a hash, with keys being the first three letters, and values 
10: # being array references possible fourth letters. For example:
11: #
12: #    Well, then this is the sample sentence?
13: #
14: # would have a data structure including these entries, among others:
15: #
16: #    key           array
17: #
18: # { 'wel' }  ->  ('l')
19: # { 'the' }  ->  ('n', ' ')
20: # { ' th' }  ->  ('e', 'i', 'e')
21: #
22: # To generate the next character in the sequence, the script looks at the last
23: # three characters it generated, and randomly picks a character from the 
24: # character array of that key.
25: #
26: # I also included some quick text processing features, because most text is available
27: # in HTML form.
28: #
29: # I ran it one several texts, including The Adventures of Huckleberry Finn, 
30: # another Perl script, and the Gnome GPL. It will generate gibberish in any 
31: # language, depending on what you feed it. A good place to find texts is 
32: # etext.virginia.edu.
33: #
34: # The larger the file, the more random the result will be (and the more RAM
35: # you need!). Here is some text the script generated when given a copy of 
36: # Hamlet. The only modification I made to it was to add line breaks. (The names 
37: # were preserved because they were the only things in upper case.)
38: #
39: #   HAMLET  On we would sance is with ther, his let's his fathe gainst the roth 
40: #   goody see you now, on of him coldese with hool not.
41: #
42: #   OPHELIA  Sir, year seek his dothnessumentis in thanks; And be to us of the 
43: #   nigh which most vacannot desenday 'Tis rights, minders -- here the 
44: #   commissess assural spect of sift much deed, I much say, my sition ser 
45: #   willain the mine.
46: #
47: #   LORD POLONIUS  She garbages, and beward.
48: #
49: #   LAERTES  Go truth the that woe imattend, to crossinews is as of words!
50: 
51: use strict;
52: my %strings;      #a hash to store the letter groups
53: srand;            #needed in older perl versions
54: 
55: &process_file;    #process the stinkin' file
56: &gibberish;       #make gibberish
57: 
58: sub process_file
59:    {
60:    my @quartet;
61:    my $groups;    #number of four character groups found
62:    OPEN:          #loop until a valid filename is entered
63:       {
64:       print "Enter file path: ";
65:       chomp (my $file = <STDIN>);
66:       open (FILE, $file) && last OPEN;
67:       print "File not found, you loser.\n";
68:       redo OPEN;
69:       }
70:    
71:    do { local $/; $_ = <FILE>; };
72:    print "Loaded file into memory.\n";
73:    
74:    print "\nQuick and dirty strip of HTML tags? >> ";
75:    chomp (my $htmlstrip = <STDIN>);
76:    if ($htmlstrip =~ /y/)
77:       {
78:       s/<br.{,2}>/\n/g;
79:       print "Converted breaks into newlines.\n";
80:       s/<[^>]+>//g;
81:       print "Devoured HTML tags.\n";
82:       s/&\w{1,4};//g;
83:       print "Pulverised special characters.\n";
84:       }
85:    
86:    print "\nTrash newlines and tabs? >> ";
87:    chomp (my $ntstrip = <STDIN>);
88:    if ($ntstrip =~ /y/)
89:       {
90:       s/\t//g;
91:       s/\n//g;
92:       print "Stripped newlines and tabs.\n";
93:       }
94:    
95:    print "\nSquash multiple spaces? >> ";
96:    chomp (my $space = <STDIN>);
97:    if ($space =~ /y/)
98:       {
99:       s/\s+/ /g;
100:       print "Destroyed redundant spaces.\n";
101:       }
102:    
103:    print "\nIgnore x or more of the same char in a row? >> x = ";
104:    chomp (my $repeats = <STDIN>);
105:    if ($repeats)
106:       {
107:       $repeats--;
108:       s/(.)\1{$repeats,}//g;
109:       print "Slaughtered repeating characters.\n";
110:       }
111:    
112:    print 
113:       "\nEnter regexes of stuff you want to ignore,\n", 
114:       "separated by spaces. example: \\n \\t{1,3} \\W >> ";
115:    chomp (my $ignore = <STDIN>);
116:    my @chars = split /\s/, $ignore;
117:    foreach my $char (@chars)
118:       {
119:       eval 
120:          "s/$char//g || die \"$!\"" ?
121:       print "Executed Regex: $char.\n" :
122:       print "\nRegex failed: $char.\nReason: $@\n";
123:       }
124:    
125:    print "\nCase insensitive? (allows more randomness with smaller file) >> ";
126:    chomp (my $insensitive = <STDIN>);
127:    if ($insensitive =~ /y/) { $_ = lc $_; }
128:    
129:    close FILE;
130:       
131:    s/(.)((.)(.)(.))/$2/s;
132:    @quartet = ($1,$3,$4,$5);
133:    $groups = 1;
134:    $_ = reverse $_;
135:    my $time = time;
136:    print "Reversed the text, so I can use chop instead of a regexp.\n";
137:    print "One dot equals 1000 chars processed.\n";
138:    
139:    while ($_)                          #go through the file
140:       {
141:       my $char = pop @quartet;         #get the last of the 4 chars
142:       my $pre = join '', @quartet;     #join the other three
143:       push @{$strings{$pre}}, $char;   #put them in a hash of arrays
144:       push @quartet, $char;            #put the last char back on
145:       shift @quartet;                  #remove the first character
146:       push (@quartet, (chop $_));      #get a new char on the end
147:       $groups++;
148:       ($groups % 1000) or print ".";   #every 1000 times print a dot
149:       }  
150:    $time = time - $time;               #find out the time elapsed
151:    print "\n$groups combinations logged in $time seconds.\n";
152:    }
153: 
154: sub gibberish
155:    {
156:    GIBBERISH:
157:       {
158:       print "Enter amount of gibberish (0 to quit): ";
159:       chomp (my $length = <STDIN>);
160:       $length || last GIBBERISH;
161:       my @keys = keys %strings;
162:       my @last3 = split ('', $keys[int rand($#keys)]);
163:       undef @keys;
164:       print @last3;
165:       $length -= 3;
166:       for (0..$length)
167:          {
168:          my $curr = join ('', @last3);
169:          my @currarray = @{$strings{$curr}};
170:          my $element = $currarray[int rand($#currarray - 1)];
171:          shift @last3;
172:          push @last3, $element;
173:          print $element;
174:          }
175:       print "\n\n";
176:       redo GIBBERISH;
177:       }
178:    }

In reply to Is it random? by ColonelPanic

Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!

Titles consisting of a single word are discouraged, and in most cases are disallowed outright.

Read Where should I post X? if you're not absolutely sure you're posting in the right place.

Please read these before you post! —

Posts may use any of the Perl Monks Approved HTML tags:

a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr

You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)

	For:		Use:
	&		`&`
	<		`<`
	>		`>`
	[		`[`
	]		`]`

Link using PerlMonks shortcuts! What shortcuts can I use for linking?

See Writeup Formatting Tips and other pages linked from there for more info.