use strict; use warnings; use Fcntl qw(SEEK_SET SEEK_CUR SEEK_END); my $data =<< "END"; head1|head2|head3 val1|val2|val3 val1|val4|val5 val6|val4|val5 val2|val7|val5 val3|val7|val3 END open my $fh , "<", \$data or die "can't open input file $!"; my $header_line = <$fh>; # get first line chomp $header_line; my @headers = split(/\|/,$header_line); my $num_columns = @headers; # scalar value of an array is number of elements my $begin_of_data = tell($fh); #save byte position of start of data rows my %seen; foreach my $column_nr (0..$num_columns-1) { # read whole file looking only at one column while (<$fh>) { chomp; my $field = (split(/\|/,$_))[$column_nr]; $seen{$field}++; # counts num times "seen" } # unique values are those which were only "seen" once print "UNIQUE VALUES for ",shift @headers,":\n"; foreach my $field_value (sort keys %seen) { print "$field_value\n" if $seen{$field_value} == 1; } # reset file pointer to beginning and do everything again for # the next column seek ($fh,$begin_of_data,SEEK_SET); %seen=(); # clear the hash for calculating next column's data print "\n"; } __END__ UNIQUE VALUES for head1: val2 val3 val6 UNIQUE VALUES for head2: val2 UNIQUE VALUES for head3: #### use strict; use warnings; use Data::Dump qw (dump dd); my $data =<< "END"; head1|head2|head3 val1|val2|val3 val1|val4|val5 val6|val4|val5 val2|val7|val5 val3|val7|val3 END open my $fh , "<", \$data or die "can't open input file $!"; my $header_line = <$fh>; # get first line chomp $header_line; my @headers = split(/\|/,$header_line); my %seen; # value => [array of the positive column numbers it's unique in] # like "somevalue" => [1,2] # a negative column number means it was seen at least twice # and therefore that value is not unique, "one of a kind" # in that column # like "somevalue" =>[1,2,-3] while (defined (my $line=<$fh>)) { chomp $line; next if $line =~ /^\s*$/; my @fields = split(/\|/,$line); my $col=1; # cols are 1...n # can't have a negative sero! foreach my $field (@fields) { if (my ($col_seen_before) = grep{$col == abs($_)}@{$seen{$field}}) { if ($col_seen_before <0) { # nothing to do...#this is nth time this value seen in this col } else { # flip the sign of column number to negative to indicate 2nd # this value has been seen in this column. @{$seen{$field}} = map{$_ = -$_ if $_==$col_seen_before; $_}@{$seen{$field}}; } } else { push @{$seen{$field}}, $col; #first time seen in this column } $col++; #dd \%seen; } } # unique values are those which have a positive value for that col number foreach my $value_seen (sort keys %seen) { if (my @unique_cols = sort{$a<=>$b} grep{$_>0}@{$seen{$value_seen}}) { print "$value_seen is unique in cols: @unique_cols\n"; } } __END__ Previous program output: UNIQUE VALUES for head1: val2 val3 val6 UNIQUE VALUES for head2: val2 UNIQUE VALUES for head3: (NONE) This program's output: There could be many formats to present this information... val2 is unique in cols: 1 2 val3 is unique in cols: 1 val6 is unique in cols: 1 This means nothing was unique in column 3. column 1 has 3 unique values. column 2 has one unique value.