while (<>) { print unless seen_bucket($_); } __END__ Ten sets of 1..400 should get uniq'd to 1..400 $ perl -le 'for(1..10){for(1..400){print}}' | perl dup.pl | wc -l 400 Ten sets of 1..401 should get uniq'd to (1..401) x 10 because 2 buckets of 200 lines hold upto 400 $ perl -le 'for(1..10){for(1..401){print}}' | perl dup.pl | wc -l 4010