#! perl -slw use strict; use Time::HiRes qw[ time ]; sub uniq{ my %x; @x{@_} = (); keys %x } $|++; our $W //= 200; our $P //= 1000; my @idx = ( uniq( map int( rand 60000 ), 1 .. 2*$W ) )[ 0 .. $W-1 ]; print STDERR "Words: ", ~~@idx; my %paras; for my $p ( 1 .. $P ) { local $_ = join ' ', uniq map $idx[ rand @idx ], 0 .. rand( $W/2 ); $paras{ $p } //= ''; for my $i ( 0 .. $#idx ) { my $word = $idx[ $i ]; if( m[\b\Q$word]i ) { vec( $paras{ $p }, $i, 1 ) = 1; } } } print STDERR "Paras: ", scalar keys %paras; my $start = time; my @parasByWordCount = map{ unpack 'x[N]A*', $_ } sort map { pack 'NA*', unpack( '%32b*', $paras{ $_ } ), $_ } keys %paras; my @set = shift @parasByWordCount; my $mask = $paras{ $set[ 0 ] }; while( @parasByWordCount ) { my $next = shift @parasByWordCount; if( ( $mask | $paras{ $next } ) ne $mask ) { ## $Next contains new paras, so add to set push @set, $next; $mask |= $paras{ $next } } ## otherwise just discard it } printf STDERR "Took %.9f seconds\n", time() - $start; printf STDERR "All the words are covered by %d paras:\n", scalar @set; printf "%3d : %s\n", $_, unpack 'b*', $paras{ $_ } for sort{ $a<=>$b } @set; printf "All : %s\n", unpack 'b*', $mask; __END__ [ 5:17:45.68]C:\test>845818 -W=200 -P=600000 > nul Words: 200 Paras: 600000 Took 71.555999994 seconds All the words are covered by 200 paras: