renarios has asked for the wisdom of the Perl Monks concerning the following question:

Hi all great Monks

I am trying to write a excel plugin for spamassassin, since I (my girlfriend!!) got some bizarre excel-like spam.
As an example I took an existing PDF-plugin and modified it a bit. Thing is: It doesn't do a thing!
My Perl-knowledge is miserable, so my hope is pointed at you!
First dilemma is to get a temporary file on filesystem. Can you help me out?
Here's my code: (btw, I leeched an existing xls2txt converter, which works fine from the command line)

# XLS scan, inspired by Ocr.pm # For more details see # http://blog.atmail.com/?p=61 # XLSassassin v0.1 - Beta package Excel; use strict; use warnings; use Mail::SpamAssassin; use Mail::SpamAssassin::Util; use Mail::SpamAssassin::Plugin; our @ISA = qw (Mail::SpamAssassin::Plugin); # constructor: register the eval rule sub new { my ( $class, $mailsa ) = @_; $class = ref($class) || $class; my $self = $class->SUPER::new($mailsa); bless( $self, $class ); $self->register_eval_rule("check_xls"); return $self; } sub check_xls { my ( $self, $pms ) = @_; # Skip reading a XLS over 1024kb my $maxsize = '1024'; my $req = $pms->{main}->{conf}->{required_score}; # If the XLS already created, skip, we are calling ourself again return if(-e "/tmp/spamassassin_$$.xls"); # Check wether xls2csv is installed return if(!-e "/usr/bin/xls2csv"); foreach my $p ( $pms->{msg}->find_parts( qr(application/excel )i ), $pms->{msg}->find_parts( qr(application/vnd.ms-excel)i ) ) { my ( $ctype, $boundary, $charset, $name ) = Mail::SpamAssassin::Util::parse_content_type ( $p->get_header('content-type') ); $ctype = lc($ctype); if ( $ctype eq "application/vnd.ms-excel" ) { #keep raw email for debugging later my $imgfilename = "/tmp/raw.eml"; unless (-e $imgfilename) { if (open RAW, ">$imgfilename") { print RAW $pms->{msg}->get_pristine(); close RAW; debuglog("Saved: $imgfilename"); } } open(XLS, ">/tmp/spamassassin_$$.xls"); foreach $p ( $p->decode() ) { print XLS $p; } close XLS; # Skip attachments larger the X kb my $size = (stat("/tmp/spamassassin_$$.xls"))[7]; $size = int($size / 1024); # For this moment skip this... #next if($size > $maxsize); print STDERR "perl /usr/bin/xls2csv /tmp/spamassassin_$$.xls > /tm +p/spamassassin.txt.$$"; #system("perl /usr/bin/xls2csv /tmp/spamassassin_$$.xls > /tmp/spa +massassin.xls.txt.$$"); system("/usr/bin/xls2csv -x /tmp/spamassassin_$$.xls -c /tmp/spamass +assin.xls.txt.$$"); # #print STDERR "Exec: /usr/bin/pdfimages /tmp/spamassassin_$$.xls +/tmp/spamassassin.xls-img.$$\n"; # system("/usr/bin/pdfimages /tmp/spamassassin_$$.xls /tmp/spamassa +ssin.xls-img.$$"); # Insert the message, and scan back via Spamassassin to get the sc +ore count my $message; $message = "From: sa\@local.com\nContent-type: text/plain\n\n"; opendir(DIR, "/tmp"); my @files = readdir(DIR); # Read each excel sheet extracted, pipe to gocr to get the XLS ima +ge text foreach my $file (@files) { next if($file !~ /spamassassin.xls.txt.$$/); #print STDERR "/usr/local/bin/gocr -i /tmp/$file -o /tmp/msg.txt - +d 0 -s 15"; # NEED TO SANATIZE $file $file =~ /(spamassassin.*)/; $file = $1; # system("/usr/local/bin/gocr -i /tmp/$file -o /tmp/spamassassin.tx +t.$$ -d 0 -s 15"); # Delete the image once done #unlink($file); } # Slurp in the file foreach("/tmp/spamassassin.img.$$.txt", "/tmp/spamassassin.txt.$$") +{ open(XLS, $_); # Now, print if the XLS had while(<XLS>) { $message .= $_; } close(XLS); } # We have our text, parse it my $spamtest = Mail::SpamAssassin->new(); my $mail = $spamtest->parse($message); my $status = $spamtest->check($mail); my $score = $status->get_body_only_points(); # Message is marked as Spam if($score >= $req) { #print STDERR "MARKED"; #print STDERR $status->get_report(); $self->_clean_xls_tmp(); return 1; #$message = $status->rewrite_mail(); } else { # Message is clean clean_xls_tmp(); return 0; } } } # We should not reach here return 0; } sub _clean_xls_tmp { #unlink("/tmp/spamassassin_$$.xls"); #unlink("/tmp/spamassassin.txt.$$ +"); } } 1;
Thanks in advance, Renarios