my $do_data_magic_only_once = undef; sub new { my $self = {}; my $proto = shift; my $class = ref($proto) || $proto; $self->{MF} = []; $self->{magic} = []; if (! @_) { if( $do_data_magic_only_once ){ $self->{magic} = $do_data_magic_only_once; } else { my $fh = *File::MMagic::DATA{IO}; binmode($fh); bless $fh, 'FileHandle' if ref $fh ne 'FileHandle'; my $dataLoc; # code block to localise the no strict;, contribute by Simon Matthews { no strict 'refs'; my $instance = \${ "$class\::_instance" }; $$instance = $fh->tell() unless $$instance; $dataLoc = $$instance; } $fh->seek($dataLoc, 0); &readMagicHandle($self, $fh); $do_data_magic_only_once = $self->{magic}; } } else { my $filename = shift; my $fh = new FileHandle; if ($fh->open("< $filename")) { binmode($fh); &readMagicHandle($self, $fh); } else { warn __PACKAGE__ . " couldn't load specified file $filename"; } } # from the BSD names.h, some tokens for hard-coded checks of # different texts. This isn't rocket science. It's prone to # failure so these checks are only a last resort. # removSpecials() can be used to remove those afterwards. $self->{SPECIALS} = { "message/rfc822" => [ "^Received:", "^>From ", "^From ", "^To: ", "^Return-Path: ", "^Cc: ", "^X-Mailer: "], "message/news" => [ "^Newsgroups: ", "^Path: ", "^X-Newsreader: "], "text/html" => [ "]*>", "]*>", "]*>", "]*>", "]*>", "]*>", "]*>", "]*>", "]*>", "]*>", ], "text/x-roff" => [ '^\\.\\\\"', "^\\.SH ", "^\\.PP ", "^\\.TH ", "^\\.BR ", "^\\.SS ", "^\\.TP ", "^\\.IR ", ], }; $self->{FILEEXTS} = { '\.gz$' => 'application/x-gzip', '\.bz2$' => 'application/x-bzip2', '\.Z$' => 'application/x-compress', '\.txt$' => 'text/plain', '\.html$' => 'text/html', '\.htm$' => 'text/html', }; bless($self); return $self; }