in reply to Cout & parsing
use strict; use warnings; use Email::Address (); my @fields; { open(my $fh_in, '<', ...) or die("Unable to open input file: $!\n"); # The input is a binary file. binmode($fh_in); for (;;) { my $buf; # Obtain the length of the field. read($fh_in, $buf='', 1) or last; $len = ord($buf); # Obtain the field. read($fh_in, $buf='', $len) or die("Bad input file\n"); push(@fields, $buf); } } @fields % 3 == 0 or die("Bad input file\n"); # Get every third field, starting with the first: my @email_addrs = do { my $n = 3; my $t = $n-1; grep !($t=($t+1)%$n), @fields }; # Convert to Email::Address objects. # Group by host. my %grouped; foreach my $email_addr (@email_addrs) { foreach my $o (Email::Address->parse($email_addr)) { $host = lc($o->host()); push(@{$grouped{$host}}, $o); } } # Determine the most common hosts. my @highest = sort { @{$grouped{$b}} <=> @{$grouped{$a}} } keys %group +ed; # Filter out unwanted. Keep ties. if (@higest >= 5) { my $min = @{$grouped{$highest[4]}}; @highest = grep { @{$grouped{$_}} >= $min } @highest; } # Print results. foreach my $host (@highest) { my $count = @{$grouped{$host}}; print("$host ($count)\n") }
Untested.
|
|---|