#!/usr/bin/perl use strict; use warnings; my $maxw = 600; # 200 my $maxp = 612; # 600_000 sub uniq { my %uniq; @uniq{@_} = (); return keys %uniq; } # uniq print {*STDERR} "Generate fake paragraphs\n"; my @para = map [uniq map int(rand $maxw),0..int(rand $maxw)],1..$maxp; print {*STDERR} "Sort\n"; @para = sort {@$a <=> @$b} @para; print {*STDERR} "Count reversed hash\n"; my %words_in_para; for(my $i=0;$i<=$#para;$i++){ foreach my $word(@{ $para[$i] }){ push @{ $words_in_para{$word} },$i; } } print {*STDERR} "Solving\n"; my $start_time = time(); my $i = 0; while ($i <= $#para){ my $para = $para[$i]; unless(grep 1 == @{ $words_in_para{$_} },@$para){ # all words appear somewhere else, delete the paragraph foreach my $word (@$para){ $words_in_para{$word} = [ grep $_ ne $i,@{ $words_in_para{$word} } ]; } $para[$i] = []; } print {*STDERR} "$i/$maxp\r"; $i++; } my $end_time = time(); print "\n"; print join "\n",map {'['.join(' ',sort {$a<=>$b} @$_).']'} grep @$_,@para; print "\n"; print $end_time - $start_time,"s\n";