1: #!/usr/bin/perl
2: # This script was inspired by the interesting book <i>What is Random?</i>, by
3: # Edward Beltrami. (As well as [Be a Monkey!] by [KM]). Basically, the idea is
4: # to randomly generate text one character at the time. The probability of a
5: # letter being picked is based on the frequency it occurs after the preceding
6: # letters. (u is more likely to come after q, h is more likely to come after t,
7: # and so on.) The result is text that is random yet surprisingly familiar.
8: #
9: # The script works by processing a sample text file. It stores combinations of
10: # four letters in a hash, with keys being the first three letters, and values
11: # being array references possible fourth letters. For example:
12: #
13: # Well, then this is the sample sentence?
14: #
15: # would have a data structure including these entries, among others:
16: #
17: # key array
18: #
19: # { 'wel' } -> ('l')
20: # { 'the' } -> ('n', ' ')
21: # { ' th' } -> ('e', 'i', 'e')
22: #
23: # To generate the next character in the sequence, the script looks at the last
24: # three characters it generated, and randomly picks a character from the
25: # character array of that key.
26: #
27: # I also included some quick text processing features, because most text is available
28: # in HTML form.
29: #
30: # I ran it one several texts, including The Adventures of Huckleberry Finn,
31: # another Perl script, and the Gnome GPL. It will generate gibberish in any
32: # language, depending on what you feed it. A good place to find texts is
33: # etext.virginia.edu.
34: #
35: # The larger the file, the more random the result will be (and the more RAM
36: # you need!). Here is some text the script generated when given a copy of
37: # Hamlet. The only modification I made to it was to add line breaks. (The names
38: # were preserved because they were the only things in upper case.)
39: #
40: # HAMLET On we would sance is with ther, his let's his fathe gainst the roth
41: # goody see you now, on of him coldese with hool not.
42: #
43: # OPHELIA Sir, year seek his dothnessumentis in thanks; And be to us of the
44: # nigh which most vacannot desenday 'Tis rights, minders -- here the
45: # commissess assural spect of sift much deed, I much say, my sition ser
46: # willain the mine.
47: #
48: # LORD POLONIUS She garbages, and beward.
49: #
50: # LAERTES Go truth the that woe imattend, to crossinews is as of words!
51:
52: use strict;
53: my %strings; #a hash to store the letter groups
54: srand; #needed in older perl versions
55:
56: &process_file; #process the stinkin' file
57: &gibberish; #make gibberish
58:
59: sub process_file
60: {
61: my @quartet;
62: my $groups; #number of four character groups found
63: OPEN: #loop until a valid filename is entered
64: {
65: print "Enter file path: ";
66: chomp (my $file = <STDIN>);
67: open (FILE, $file) && last OPEN;
68: print "File not found, you loser.\n";
69: redo OPEN;
70: }
71:
72: do { local $/; $_ = <FILE>; };
73: print "Loaded file into memory.\n";
74:
75: print "\nQuick and dirty strip of HTML tags? >> ";
76: chomp (my $htmlstrip = <STDIN>);
77: if ($htmlstrip =~ /y/)
78: {
79: s/<br.{,2}>/\n/g;
80: print "Converted breaks into newlines.\n";
81: s/<[^>]+>//g;
82: print "Devoured HTML tags.\n";
83: s/&\w{1,4};//g;
84: print "Pulverised special characters.\n";
85: }
86:
87: print "\nTrash newlines and tabs? >> ";
88: chomp (my $ntstrip = <STDIN>);
89: if ($ntstrip =~ /y/)
90: {
91: s/\t//g;
92: s/\n//g;
93: print "Stripped newlines and tabs.\n";
94: }
95:
96: print "\nSquash multiple spaces? >> ";
97: chomp (my $space = <STDIN>);
98: if ($space =~ /y/)
99: {
100: s/\s+/ /g;
101: print "Destroyed redundant spaces.\n";
102: }
103:
104: print "\nIgnore x or more of the same char in a row? >> x = ";
105: chomp (my $repeats = <STDIN>);
106: if ($repeats)
107: {
108: $repeats--;
109: s/(.)\1{$repeats,}//g;
110: print "Slaughtered repeating characters.\n";
111: }
112:
113: print
114: "\nEnter regexes of stuff you want to ignore,\n",
115: "separated by spaces. example: \\n \\t{1,3} \\W >> ";
116: chomp (my $ignore = <STDIN>);
117: my @chars = split /\s/, $ignore;
118: foreach my $char (@chars)
119: {
120: eval
121: "s/$char//g || die \"$!\"" ?
122: print "Executed Regex: $char.\n" :
123: print "\nRegex failed: $char.\nReason: $@\n";
124: }
125:
126: print "\nCase insensitive? (allows more randomness with smaller file) >> ";
127: chomp (my $insensitive = <STDIN>);
128: if ($insensitive =~ /y/) { $_ = lc $_; }
129:
130: close FILE;
131:
132: s/(.)((.)(.)(.))/$2/s;
133: @quartet = ($1,$3,$4,$5);
134: $groups = 1;
135: $_ = reverse $_;
136: my $time = time;
137: print "Reversed the text, so I can use chop instead of a regexp.\n";
138: print "One dot equals 1000 chars processed.\n";
139:
140: while ($_) #go through the file
141: {
142: my $char = pop @quartet; #get the last of the 4 chars
143: my $pre = join '', @quartet; #join the other three
144: push @{$strings{$pre}}, $char; #put them in a hash of arrays
145: push @quartet, $char; #put the last char back on
146: shift @quartet; #remove the first character
147: push (@quartet, (chop $_)); #get a new char on the end
148: $groups++;
149: ($groups % 1000) or print "."; #every 1000 times print a dot
150: }
151: $time = time - $time; #find out the time elapsed
152: print "\n$groups combinations logged in $time seconds.\n";
153: }
154:
155: sub gibberish
156: {
157: GIBBERISH:
158: {
159: print "Enter amount of gibberish (0 to quit): ";
160: chomp (my $length = <STDIN>);
161: $length || last GIBBERISH;
162: my @keys = keys %strings;
163: my @last3 = split ('', $keys[int rand($#keys)]);
164: undef @keys;
165: print @last3;
166: $length -= 3;
167: for (0..$length)
168: {
169: my $curr = join ('', @last3);
170: my @currarray = @{$strings{$curr}};
171: my $element = $currarray[int rand($#currarray - 1)];
172: shift @last3;
173: push @last3, $element;
174: print $element;
175: }
176: print "\n\n";
177: redo GIBBERISH;
178: }
179: }