0: #!/usr/bin/perl
1: # This script was inspired by the interesting book <i>What is Random?</i>, by
2: # Edward Beltrami. (As well as [Be a Monkey!] by [KM]). Basically, the idea is
3: # to randomly generate text one character at the time. The probability of a
4: # letter being picked is based on the frequency it occurs after the preceding
5: # letters. (u is more likely to come after q, h is more likely to come after t,
6: # and so on.) The result is text that is random yet surprisingly familiar.
7: #
8: # The script works by processing a sample text file. It stores combinations of
9: # four letters in a hash, with keys being the first three letters, and values
10: # being array references possible fourth letters. For example:
11: #
12: # Well, then this is the sample sentence?
13: #
14: # would have a data structure including these entries, among others:
15: #
16: # key array
17: #
18: # { 'wel' } -> ('l')
19: # { 'the' } -> ('n', ' ')
20: # { ' th' } -> ('e', 'i', 'e')
21: #
22: # To generate the next character in the sequence, the script looks at the last
23: # three characters it generated, and randomly picks a character from the
24: # character array of that key.
25: #
26: # I also included some quick text processing features, because most text is available
27: # in HTML form.
28: #
29: # I ran it one several texts, including The Adventures of Huckleberry Finn,
30: # another Perl script, and the Gnome GPL. It will generate gibberish in any
31: # language, depending on what you feed it. A good place to find texts is
32: # etext.virginia.edu.
33: #
34: # The larger the file, the more random the result will be (and the more RAM
35: # you need!). Here is some text the script generated when given a copy of
36: # Hamlet. The only modification I made to it was to add line breaks. (The names
37: # were preserved because they were the only things in upper case.)
38: #
39: # HAMLET On we would sance is with ther, his let's his fathe gainst the roth
40: # goody see you now, on of him coldese with hool not.
41: #
42: # OPHELIA Sir, year seek his dothnessumentis in thanks; And be to us of the
43: # nigh which most vacannot desenday 'Tis rights, minders -- here the
44: # commissess assural spect of sift much deed, I much say, my sition ser
45: # willain the mine.
46: #
47: # LORD POLONIUS She garbages, and beward.
48: #
49: # LAERTES Go truth the that woe imattend, to crossinews is as of words!
50:
51: use strict;
52: my %strings; #a hash to store the letter groups
53: srand; #needed in older perl versions
54:
55: &process_file; #process the stinkin' file
56: &gibberish; #make gibberish
57:
58: sub process_file
59: {
60: my @quartet;
61: my $groups; #number of four character groups found
62: OPEN: #loop until a valid filename is entered
63: {
64: print "Enter file path: ";
65: chomp (my $file = <STDIN>);
66: open (FILE, $file) && last OPEN;
67: print "File not found, you loser.\n";
68: redo OPEN;
69: }
70:
71: do { local $/; $_ = <FILE>; };
72: print "Loaded file into memory.\n";
73:
74: print "\nQuick and dirty strip of HTML tags? >> ";
75: chomp (my $htmlstrip = <STDIN>);
76: if ($htmlstrip =~ /y/)
77: {
78: s/<br.{,2}>/\n/g;
79: print "Converted breaks into newlines.\n";
80: s/<[^>]+>//g;
81: print "Devoured HTML tags.\n";
82: s/&\w{1,4};//g;
83: print "Pulverised special characters.\n";
84: }
85:
86: print "\nTrash newlines and tabs? >> ";
87: chomp (my $ntstrip = <STDIN>);
88: if ($ntstrip =~ /y/)
89: {
90: s/\t//g;
91: s/\n//g;
92: print "Stripped newlines and tabs.\n";
93: }
94:
95: print "\nSquash multiple spaces? >> ";
96: chomp (my $space = <STDIN>);
97: if ($space =~ /y/)
98: {
99: s/\s+/ /g;
100: print "Destroyed redundant spaces.\n";
101: }
102:
103: print "\nIgnore x or more of the same char in a row? >> x = ";
104: chomp (my $repeats = <STDIN>);
105: if ($repeats)
106: {
107: $repeats--;
108: s/(.)\1{$repeats,}//g;
109: print "Slaughtered repeating characters.\n";
110: }
111:
112: print
113: "\nEnter regexes of stuff you want to ignore,\n",
114: "separated by spaces. example: \\n \\t{1,3} \\W >> ";
115: chomp (my $ignore = <STDIN>);
116: my @chars = split /\s/, $ignore;
117: foreach my $char (@chars)
118: {
119: eval
120: "s/$char//g || die \"$!\"" ?
121: print "Executed Regex: $char.\n" :
122: print "\nRegex failed: $char.\nReason: $@\n";
123: }
124:
125: print "\nCase insensitive? (allows more randomness with smaller file) >> ";
126: chomp (my $insensitive = <STDIN>);
127: if ($insensitive =~ /y/) { $_ = lc $_; }
128:
129: close FILE;
130:
131: s/(.)((.)(.)(.))/$2/s;
132: @quartet = ($1,$3,$4,$5);
133: $groups = 1;
134: $_ = reverse $_;
135: my $time = time;
136: print "Reversed the text, so I can use chop instead of a regexp.\n";
137: print "One dot equals 1000 chars processed.\n";
138:
139: while ($_) #go through the file
140: {
141: my $char = pop @quartet; #get the last of the 4 chars
142: my $pre = join '', @quartet; #join the other three
143: push @{$strings{$pre}}, $char; #put them in a hash of arrays
144: push @quartet, $char; #put the last char back on
145: shift @quartet; #remove the first character
146: push (@quartet, (chop $_)); #get a new char on the end
147: $groups++;
148: ($groups % 1000) or print "."; #every 1000 times print a dot
149: }
150: $time = time - $time; #find out the time elapsed
151: print "\n$groups combinations logged in $time seconds.\n";
152: }
153:
154: sub gibberish
155: {
156: GIBBERISH:
157: {
158: print "Enter amount of gibberish (0 to quit): ";
159: chomp (my $length = <STDIN>);
160: $length || last GIBBERISH;
161: my @keys = keys %strings;
162: my @last3 = split ('', $keys[int rand($#keys)]);
163: undef @keys;
164: print @last3;
165: $length -= 3;
166: for (0..$length)
167: {
168: my $curr = join ('', @last3);
169: my @currarray = @{$strings{$curr}};
170: my $element = $currarray[int rand($#currarray - 1)];
171: shift @last3;
172: push @last3, $element;
173: print $element;
174: }
175: print "\n\n";
176: redo GIBBERISH;
177: }
178: }
In reply to Is it random? by ColonelPanic
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |