#!/usr/bin/env -S perl -w ##!/usr/bin/env -S perl -wd use v5.30.0; use strict; use List::AllUtils qw( reduce ); my ($slurpee, $length, $sum); { local $/; ($slurpee) = ; } $length = length $slurpee; my @regexes = ( [ qr/[A-Z]/, "uppercase characters", 0 ], [ qr/[a-z]/, "lowercase characters", 0 ], [ qr/\d/, "digits", 0 ], [ qr/\s/, "whitespace characters", 0 ], # # Note: $ must be \$, and - must be first to avoid range interpretation. # [ qr/[-~`!@#\$%^&*()_+={}\[\]|\\:;"'<>,.?\/]/, "punctuation characters", 0 ], ); #for my $c (split //, $slurpee) { print $c; } for my $case (@regexes) { say "seeding // with: $case->[0]"; "Aa5: " =~ $case->[0]; # seed the // iteration say "matched: '$&'" if $&; for (split //, $slurpee) { // and $case->[2]++; } } for my $case (@regexes) { printf("%4d %s\n", $case->[2], $case->[1]); } $sum = reduce { $a + $b } (map $_->[2], @regexes); printf(" sum and length: %3d and %3d\n", $sum, $length); say "\nNow extract the string between HTML tags with //..."; my $str = "Before tagbetween tagsafter tag"; say "\n$str"; $str =~ s{ (?: (?<= \w) (?= <) | (?<= >) (?= \w) ) }{ }xg; # insert whitespace say $str; my @tokens = split / /, $str; say "Tokens...\n"; for (@tokens) { say }; my $between; for (@tokens) { if (/<\w>/../<\/\w>/) { $between .= "$_ " unless // and $&; } } chop $between if $between; say "'$between'"; $str = "\n'Before tagbetween tagsafter tag'"; say $str; say "Parse it again with..."; my $regex = qr/ (<\w+>) (.*) (<\/\w+>) /x; say $regex; $str =~ $regex; say "\$1: '$1'"; say "\$2: '$2'"; say "\$3: '$3'"; exit(0); __END__ Last night I dreamt I went to Manderley again. This will come as a surprise to Daphne since she did not write these lines. Here is a line containing stuff ,?- ! : that should/must be deleted/// ; : ! before using it as a one-time-pad. A one-time-pad should contain only characters, no punctuation, no parentheticals like (this is bogus) or [(this is bogus, too)], or {also this}; no contractions, such as I'll or it's or digits such as 0, 123, -75 or 8 P.M., and no numbers, such as $1,234.69. If you want to use numbers in your message, spell them out; one-hundred dollars and sixty-nine cents, or theeepm. These non-alpha characters in the one-time-pad will be discarded, but they must be entered eactly as represented in the book used as the pad. Let the encoding program decide what to use and what to skip. Some of the text is from "Rebecca", an out-of copyright but not out-of-print fictional work that can be freely downloaded as an eBook from Project Gutenberg. I use it as the raw source for one-time pads in a cryptologic research study; i.e., extract potential pad bits from somewhere in the text, randomly chosen with seek from EOF. Munge the characters, encrypt the message and delete the characters used for the pad. Since both encoder and decoder use the same seek expression, both pads are guaranteed to be identical, and since the characters used to create the pad are deleted, never to be seen again, the pad is guaranteed to be used exactly once. Does not scale for large organizations but works flawlessly for a small group of conspirators.