#!/usr/bin/perl use warnings; use strict; sub mbox_read { my ($fh) = @_; my ($from, $msg, $len, $lines, $line); # read 'From ' line: while ($from = <$fh>) { last if $from =~ /\AFrom /; die 'wrong mailbox format or current position' if $from !~ /\A\r?\n/; } # exit on EOF return if !defined $from; # read email header and detect body size if possible while ($line = <$fh>) { $msg .= $line; last if $line =~ /\A\r?\n/; # end of header if ($line =~ /\AContent-Length:\s+(\d+)\Z/i) { $len = $1; } elsif ($line =~ /\ALines:\s+(\d+)\Z/i) { $lines = $1; } } # read Content-Length: bytes or Lines: lines, if possible if ($len || $lines) { my $read_bytes; if ($len) { $read_bytes = read $fh, $msg, $len, length($msg); die "read: $!" if !defined $read_bytes; } else { $read_bytes = length $msg; $msg .= scalar <$fh> for 1 .. $lines; $read_bytes = length($msg) - $read_bytes; } # check is we really at end of message now, or Content-Length: is wrong my $extra_bytes = read $fh, my $next_lines, 64; die "read: $!" if !defined $extra_bytes; if ($next_lines =~ /\A[\r\n]*From / || (eof($fh) && $next_lines =~ /\A[\r\n]*\z/)) { seek $fh, -$extra_bytes, 1 or die "seek: $!"; } else { substr($msg, -$read_bytes, $read_bytes, q{}); seek $fh, -($extra_bytes+$read_bytes), 1 or die "seek: $!"; goto LINE_BY_LINE; } } # else read line-by-line until 'From ' or EOF else { LINE_BY_LINE: while ($line = <$fh>) { if ($line !~ /\AFrom /) { $msg .= $line; } else { seek $fh, -length($line), 1 or die "seek: $!"; last; } } # remove last empty string, if any, because it usually belong to # mbox format instead of message body $msg =~ s/^\r?\n\z//m; } return $msg; } for my $file (@ARGV) { open my $fh, '<', $file or die "open: $!"; my $count = 0; $count++ while mbox_read($fh); close $fh; printf "%5d %s\n", $count, $file; }