Using jethro/tye's interpretation of your description:
#! perl -slw use strict; use Data::Dump qw[ pp ]; use Algorithm::Combinatorics qw[ combinations ]; my @lines = map [ do{ chomp; split ',' } ], <DATA>; my $w = $#{ $lines[0] }; my @keyFields; WIDTH: for my $k ( 1 .. $w ) { my $iter = combinations( [ 0 .. $w ], $k ); COMB: while( my $c = $iter->next ) { my %uniq; for my $a ( @lines ) { next COMB if exists $uniq{ join ',', @{ $a }[ @$c ] }; $uniq{ join ',', @{ $a }[ @$c ] } = 1; } @keyFields = @$c; last WIDTH; } } print "All lines are unique using a combination of fields:[ @keyFields + ]"; __DATA__ 2,6,5,2,1,1,7,9,8,6 5,8,5,0,9,3,8,9,0,2 2,3,1,1,5,2,9,7,8,3 0,1,3,7,6,2,4,3,7,5 4,6,2,8,6,4,1,5,4,3 4,6,7,2,0,9,6,5,0,9 5,6,2,4,3,7,1,9,3,5 2,5,7,1,0,0,0,5,8,5 3,8,1,4,9,2,5,8,1,0 5,2,2,2,0,7,2,8,3,1 7,1,2,6,5,4,0,9,2,5 1,6,3,7,3,8,7,0,7,7 0,0,8,9,9,8,3,3,6,0 0,2,5,3,8,4,1,8,9,4 5,6,9,0,6,4,9,5,0,7 9,0,9,3,2,6,3,2,4,6 3,3,0,4,8,5,7,7,2,4 3,1,3,0,0,3,1,7,3,8 0,6,7,0,8,9,4,8,4,8 0,2,0,3,7,4,6,8,4,5
Output:
c:\test>896650-2 All lines are unique using a combination of fields:[ 3 6 ]
On a randomly generated dataset of the scale you've suggested, it takes around 30 seconds to complete:
[19:42:44.71] c:\test>head -2 896650.dat 357,67,815,493,516,302,810,899,672,91,542,795,527,429,217,502,811,554, +345,312,689,336,710,194,778,869,711,413,402,313,960,351,264,511,558,3 +01,184,414,955,528,44,839,786,363,629,599,611,623,488,534,948,140,489 +,474,511,662,217,665,58,930,879,683,764,203,863,384,509,5,612,903,0,3 +82,969,240,130,460,652,474,478,562,7,117,360,688,702,657,329,626,521, +808,547,477,903,510,913,883,398,201,375,729 675,393,313,605,265,95,415,813,667,95,945,188,2,646,803,534,842,589,29 +9,934,395,429,34,733,684,412,799,463,896,130,896,505,530,669,793,750, +527,865,514,55,25,659,668,780,892,119,532,163,707,132,85,841,327,314, +408,284,714,166,344,425,165,998,843,272,612,671,873,772,134,665,331,7 +26,356,522,838,22,875,191,835,965,373,304,885,7,586,908,588,623,764,7 +02,4,656,794,289,18,872,639,495,513,35 [19:54:27.66] c:\test>wc -l 896650.dat 10000 896650.dat [19:54:40.64] c:\test>896650-2 896650.dat All lines are unique using a combination of fields:[ 0 1 2 ] [19:55:10.04] c:\test>
In reply to Re: Finding Keys from data
by BrowserUk
in thread Finding Keys from data
by aartist
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |