#!/usr/bin/perl # This script was inspired by the interesting book What is Random?, by # Edward Beltrami. (As well as [Be a Monkey!] by [KM]). Basically, the idea is # to randomly generate text one character at the time. The probability of a # letter being picked is based on the frequency it occurs after the preceding # letters. (u is more likely to come after q, h is more likely to come after t, # and so on.) The result is text that is random yet surprisingly familiar. # # The script works by processing a sample text file. It stores combinations of # four letters in a hash, with keys being the first three letters, and values # being array references possible fourth letters. For example: # # Well, then this is the sample sentence? # # would have a data structure including these entries, among others: # # key array # # { 'wel' } -> ('l') # { 'the' } -> ('n', ' ') # { ' th' } -> ('e', 'i', 'e') # # To generate the next character in the sequence, the script looks at the last # three characters it generated, and randomly picks a character from the # character array of that key. # # I also included some quick text processing features, because most text is available # in HTML form. # # I ran it one several texts, including The Adventures of Huckleberry Finn, # another Perl script, and the Gnome GPL. It will generate gibberish in any # language, depending on what you feed it. A good place to find texts is # etext.virginia.edu. # # The larger the file, the more random the result will be (and the more RAM # you need!). Here is some text the script generated when given a copy of # Hamlet. The only modification I made to it was to add line breaks. (The names # were preserved because they were the only things in upper case.) # # HAMLET On we would sance is with ther, his let's his fathe gainst the roth # goody see you now, on of him coldese with hool not. # # OPHELIA Sir, year seek his dothnessumentis in thanks; And be to us of the # nigh which most vacannot desenday 'Tis rights, minders -- here the # commissess assural spect of sift much deed, I much say, my sition ser # willain the mine. # # LORD POLONIUS She garbages, and beward. # # LAERTES Go truth the that woe imattend, to crossinews is as of words! use strict; my %strings; #a hash to store the letter groups srand; #needed in older perl versions &process_file; #process the stinkin' file &gibberish; #make gibberish sub process_file { my @quartet; my $groups; #number of four character groups found OPEN: #loop until a valid filename is entered { print "Enter file path: "; chomp (my $file = ); open (FILE, $file) && last OPEN; print "File not found, you loser.\n"; redo OPEN; } do { local $/; $_ = ; }; print "Loaded file into memory.\n"; print "\nQuick and dirty strip of HTML tags? >> "; chomp (my $htmlstrip = ); if ($htmlstrip =~ /y/) { s//\n/g; print "Converted breaks into newlines.\n"; s/<[^>]+>//g; print "Devoured HTML tags.\n"; s/&\w{1,4};//g; print "Pulverised special characters.\n"; } print "\nTrash newlines and tabs? >> "; chomp (my $ntstrip = ); if ($ntstrip =~ /y/) { s/\t//g; s/\n//g; print "Stripped newlines and tabs.\n"; } print "\nSquash multiple spaces? >> "; chomp (my $space = ); if ($space =~ /y/) { s/\s+/ /g; print "Destroyed redundant spaces.\n"; } print "\nIgnore x or more of the same char in a row? >> x = "; chomp (my $repeats = ); if ($repeats) { $repeats--; s/(.)\1{$repeats,}//g; print "Slaughtered repeating characters.\n"; } print "\nEnter regexes of stuff you want to ignore,\n", "separated by spaces. example: \\n \\t{1,3} \\W >> "; chomp (my $ignore = ); my @chars = split /\s/, $ignore; foreach my $char (@chars) { eval "s/$char//g || die \"$!\"" ? print "Executed Regex: $char.\n" : print "\nRegex failed: $char.\nReason: $@\n"; } print "\nCase insensitive? (allows more randomness with smaller file) >> "; chomp (my $insensitive = ); if ($insensitive =~ /y/) { $_ = lc $_; } close FILE; s/(.)((.)(.)(.))/$2/s; @quartet = ($1,$3,$4,$5); $groups = 1; $_ = reverse $_; my $time = time; print "Reversed the text, so I can use chop instead of a regexp.\n"; print "One dot equals 1000 chars processed.\n"; while ($_) #go through the file { my $char = pop @quartet; #get the last of the 4 chars my $pre = join '', @quartet; #join the other three push @{$strings{$pre}}, $char; #put them in a hash of arrays push @quartet, $char; #put the last char back on shift @quartet; #remove the first character push (@quartet, (chop $_)); #get a new char on the end $groups++; ($groups % 1000) or print "."; #every 1000 times print a dot } $time = time - $time; #find out the time elapsed print "\n$groups combinations logged in $time seconds.\n"; } sub gibberish { GIBBERISH: { print "Enter amount of gibberish (0 to quit): "; chomp (my $length = ); $length || last GIBBERISH; my @keys = keys %strings; my @last3 = split ('', $keys[int rand($#keys)]); undef @keys; print @last3; $length -= 3; for (0..$length) { my $curr = join ('', @last3); my @currarray = @{$strings{$curr}}; my $element = $currarray[int rand($#currarray - 1)]; shift @last3; push @last3, $element; print $element; } print "\n\n"; redo GIBBERISH; } }