$ time perl -MMail::Box::Manager -le '
$m=Mail::Box::Manager->new;
$f=$m->open(folder=>"Mail/-default");
print scalar $f->messages
'
Unexpected end of header (C parser):
charset="iso-8859-1"
3554
real 0m8.222s
user 0m7.928s
sys 0m0.200s
####
$ time ./mbox_scan Mail/-default
3549 Mail/-default
real 0m0.492s
user 0m0.408s
sys 0m0.080s
####
#!/usr/bin/perl
use warnings;
use strict;
sub mbox_read {
my ($fh) = @_;
my ($from, $msg, $len, $lines, $line);
# read 'From ' line:
while ($from = <$fh>) {
last if $from =~ /\AFrom /;
die 'wrong mailbox format or current position' if $from !~ /\A\r?\n/;
}
# exit on EOF
return if !defined $from;
# read email header and detect body size if possible
while ($line = <$fh>) {
$msg .= $line;
last if $line =~ /\A\r?\n/; # end of header
if ($line =~ /\AContent-Length:\s+(\d+)\Z/i) {
$len = $1;
}
elsif ($line =~ /\ALines:\s+(\d+)\Z/i) {
$lines = $1;
}
}
# read Content-Length: bytes or Lines: lines, if possible
if ($len || $lines) {
my $read_bytes;
if ($len) {
$read_bytes = read $fh, $msg, $len, length($msg);
die "read: $!" if !defined $read_bytes;
}
else {
$read_bytes = length $msg;
$msg .= scalar <$fh> for 1 .. $lines;
$read_bytes = length($msg) - $read_bytes;
}
# check is we really at end of message now, or Content-Length: is wrong
my $extra_bytes = read $fh, my $next_lines, 64;
die "read: $!" if !defined $extra_bytes;
if ($next_lines =~ /\A[\r\n]*From / ||
(eof($fh) && $next_lines =~ /\A[\r\n]*\z/)) {
seek $fh, -$extra_bytes, 1 or die "seek: $!";
}
else {
substr($msg, -$read_bytes, $read_bytes, q{});
seek $fh, -($extra_bytes+$read_bytes), 1 or die "seek: $!";
goto LINE_BY_LINE;
}
}
# else read line-by-line until 'From ' or EOF
else {
LINE_BY_LINE:
while ($line = <$fh>) {
if ($line !~ /\AFrom /) {
$msg .= $line;
}
else {
seek $fh, -length($line), 1 or die "seek: $!";
last;
}
}
# remove last empty string, if any, because it usually belong to
# mbox format instead of message body
$msg =~ s/^\r?\n\z//m;
}
return $msg;
}
for my $file (@ARGV) {
open my $fh, '<', $file or die "open: $!";
my $count = 0;
$count++ while mbox_read($fh);
close $fh;
printf "%5d %s\n", $count, $file;
}