use strict; use warnings; use Fcntl qw(SEEK_SET SEEK_CUR SEEK_END); my $data =<< "END"; head1|head2|head3 val1|val2|val3 val1|val4|val5 val6|val4|val5 val2|val7|val5 val3|val7|val3 END open my $fh , "<", \$data or die "can't open input file $!"; my $header_line = <$fh>; # get first line chomp $header_line; my @headers = split(/\|/,$header_line); my $num_columns = @headers; # scalar value of an array is number of elements my $begin_of_data = tell($fh); #save byte position of start of data rows my %seen; foreach my $column_nr (0..$num_columns-1) { # read whole file looking only at one column while (<$fh>) { chomp; my $field = (split(/\|/,$_))[$column_nr]; $seen{$field}++; # counts num times "seen" } # unique values are those which were only "seen" once print "UNIQUE VALUES for ",shift @headers,":\n"; foreach my $field_value (sort keys %seen) { print "$field_value\n" if $seen{$field_value} == 1; } # reset file pointer to beginning and do everything again for # the next column seek ($fh,$begin_of_data,SEEK_SET); %seen=(); # clear the hash for calculating next column's data print "\n"; } __END__ UNIQUE VALUES for head1: val2 val3 val6 UNIQUE VALUES for head2: val2 UNIQUE VALUES for head3: ##

##

use strict;
use warnings;
use Data::Dump qw (dump dd);

my $data =<< "END";
head1|head2|head3
val1|val2|val3
val1|val4|val5
val6|val4|val5
val2|val7|val5
val3|val7|val3
END

open my $fh , "<", \$data or die "can't open input file $!";

my $header_line = <$fh>; # get first line
chomp $header_line;
my @headers = split(/\|/,$header_line);

my %seen; # value => [array of the positive column numbers it's unique in]
          # like "somevalue" => [1,2]
          # a negative column number means it was seen at least twice
          # and therefore that value is not unique, "one of a kind"
          # in that column 
          # like "somevalue" =>[1,2,-3]

while (defined (my $line=<$fh>))
{
    chomp $line;
    next if $line =~ /^\s*$/; 
    
    my @fields = split(/\|/,$line); 
        
    my $col=1;  # cols are 1...n 
                # can't have a negative sero!
                
    foreach my $field (@fields)
    {
       if (my ($col_seen_before) = grep{$col == abs($_)}@{$seen{$field}})
       {
            if ($col_seen_before <0)
            {
               # nothing to do...#this is nth time this value seen in this col
            } 
            else
            {
               # flip the sign of column number to negative to indicate 2nd 
               # this value has been seen in this column.
               
               @{$seen{$field}} = map{$_ = -$_ if $_==$col_seen_before; $_}@{$seen{$field}};  
            }
       }
       else
       {    
            push @{$seen{$field}}, $col; #first time seen in this column
       }
       $col++;  
       #dd \%seen;
    }
    
}

# unique values are those which have a positive value for that col number

foreach my $value_seen (sort keys %seen)
{  
    if (my @unique_cols = sort{$a<=>$b} grep{$_>0}@{$seen{$value_seen}})
    {
       print "$value_seen is unique in cols: @unique_cols\n";
    }
}

__END__
Previous program output:
UNIQUE VALUES for head1:
val2
val3
val6

UNIQUE VALUES for head2:
val2

UNIQUE VALUES for head3:
(NONE)

This program's output:
There could be many formats to present this information...
val2 is unique in cols: 1 2
val3 is unique in cols: 1
val6 is unique in cols: 1

This means nothing was unique in column 3.
column 1 has 3 unique values.
column 2 has one unique value.