Here is the base.pm that is causing all the ruckus
package SST::Scanner::Base;
use warnings;
use strict;
use File::Package;
# Cwd - functions to manipulate the current working directory
use Cwd;
# Encode and decode filenames in base64
use MIME::Base64;
# Data compression
use Tie::Gzip;
use constant TRUE => 1;
use constant FALSE => 0;
use Log::Log4perl qw(:easy);
=head1 NAME
SST::Scanner::Base - Base class for scanning, all scanner classes inhe
+rit from this class.
=head1 VERSION
Version 0.01
=cut
our $VERSION = '0.01';
=head1 SYNOPSIS
=head1 METHODS
=head2 new()
Returns a new instance of the parent class. Initializes a number of o
+bject variables
_mount_dir - the location on the local machine the filesystem is mount
+ed, this is where the object scans from.
_num_errors - int containing the number of directories that couldnt be
+ accessed
_output_dir - the location where the raw data shall be written
_filer_path - URL to the data on the filer
=cut
sub new {
my $class = shift;
my $p = shift;
my $self = {};
$self->{'_mount_dir'} = '';
$self->{'_num_errors'} = 0;
$self->{'_num_files'}=0;
$self->{'_output_dir'} = '';
$self->{'_filer_path'} = '';
bless( $self, $class );
return $self;
}
=head2 change_directory()
Attempts to change directory into the directory specified, accepts bot
+h relative and full paths.
On success returns TRUE (1) and logs change at debug level.
On failure returns FALSE (0) and logs failure at error level.
=cut
sub change_directory {
my $self = shift;
my $dir = shift;
if ( !chdir($dir) ) {
$self->logger()->error("chdir $dir failed $!\n");
return undef;
}
else {
$self->logger()->debug("Changing Directory to $dir");
return TRUE;
}
}
=head2 logger()
Returns the global logging object
=cut
sub logger {
return Log::Log4perl->get_logger('SST::Scanner');
}
=head2 scan_dir()
This is the actual scanning kernel, it opens the specified directory a
+nd calls itself recursively for each subdirectory.
For each file found the file information is determined and output to t
+he raw data file.
=cut
sub scan_dir {
my $self = shift;
my $cwd = getcwd(); # The directory the method was called
+ from.
my $num_files = 0;
# Data dir - The directory on the filesystem to look in
# Filer dir - The globally accessible location for the dir including t
+he filer name etc
my ( $datadir, $filerdir ) = @_;
# Attempt to chdir into the directory to scan, if not possible, log th
+e error and return
unless ( $self->change_directory($datadir) ) {
print ERR_FILE "Cannot change into directory $datadir $!\n";
$self->{'_num_errors'}++;
return FALSE;
}
my $dir = getcwd(); # The directory that is actually being scan
+ned.
chomp($dir);
# Open the directory, this sets the access time on the directory but n
+ot the files within it.
# If the directory cannot be opened die because it should essentially
+be .
opendir DIR, $dir || $self->logger()->logdie("opendir $dir failed
+ $! \n");
my @files = readdir DIR; # @files contains the directory listin
+g
closedir DIR;
foreach my $file (@files) {
# Skip current and higher directories
next if ( ( $file eq "." ) || ( $file eq ".." ) );
# File is the local path to the file from the scanners perspective i
+e /tmp/foo
# Filer path is the URL to the file on the filer. i.e. filer:/vol/qt
+ree/foo
my $filerpath = $filerdir . "/" . $file;
# This is the only stat(2) we perform. This sets the "_" perl
# internal below. Note also that as this is symlink aware, we
# don't need to explicitly test for symlink below.
my @fstat = lstat($file);
# Check what type of file it is and act appropriately
if ( -d _ ) { # directory - so must be scanned
if ( $file eq ".snapshot" ) {
$self->logger()->debug("Skipping : $filerpath\n");
}
else {
$self->logger()->debug("Descending to: $filerpath\n");
$self->scan_dir( $file, $filerpath );
}
}
elsif ( -f _ ) { # plain file - must be analyzed
$num_files++;
my $length = length($filerpath)
; # Traditionally the output file's first field was the
+length of the filename.
my $b_fname = encode_base64($filerpath)
; # In order to avoid strange chars in the csv file, fil
+enames are base64 encoded
$b_fname =~
s/\n//g; # New lines must be removed from the base64
+filename
my $owner =
$self->get_file_owner( getcwd() . "/" . $file, \@fstat )
; # lookup the owner, this is dealt with by the child cl
+ass and is platform specific.
my $line = join( ":", $length, $b_fname, $owner, @fstat );
my @items=split /:/, $line;
if (@items!=16){
$self->logger->warn("Invalid Row: $owner, $file");
}
unless ( print RAW_DATA "$line,\n")
{
$self->logger->fatal("Unable to print filehandle RAW_D
+ATA: $!");
die $!;
}
}
}
# Change directory back to where we started
$self->change_directory($cwd);
$self->logger()->debug("PROCESS_DIR $filerdir: num_files = $num_fi
+les\n");
$self->{'_num_files'} += $num_files;
return TRUE;
}
=head2 output_dir()
Gets and sets the directory to store output information for this volum
+e.
=cut
sub output_dir {
my $self = shift;
if ( my $new_dir = shift ) {
$self->{'_output_dir'} = $new_dir;
}
return $self->{'_output_dir'};
}
=head2 files_scanned()
Returns the number of files scanned successfully by the tool.
=cut
sub files_scanned {
my $self = shift;
return $self->{'_num_files'};
}
=head2 errors_found()
Returns the number of directories that could not be accessed.
=cut
sub errors_found {
my $self = shift;
return $self->{'_num_errors'};
}
=head2 mount_dir()
Returns the directory that the filesystem is mounted on.
=cut
sub mount_dir {
my $self = shift;
return $self->{'_mount_dir'};
}
=item filer_path()
Gets and sets the path to the data
=cut
sub filer_path {
my $self = shift;
if ( my $new_filer = shift ) {
$self->{'_filer_path'} = $new_filer;
}
return $self->{'_filer_path'};
}
=head2 scan()
Call the scan method on an object to initiate the scan. It will creat
+e the appropriate directory structure, test the filesystem is availab
+le, then perform the scanning.
=cut
sub scan {
my $self = shift;
$self->logger()->logdie("Output Directory must be set")
unless $self->output_dir();
# Test
#unless ( $self->test() ) {
# $self->logger()->error( "Test failed for: " . $self->filer_pa
+th() );
# return FALSE;
#}
# Create the output directory
unless ( mkdir $self->output_dir() ) {
$self->logger->error( "Cannot create output directory "
. $self->output_dir()
. " $! \n" );
return FALSE;
}
# Open the output file
my $output_file = $self->output_dir() . "/rawdata.txt.gz";
my $error_file = $self->output_dir() . "/access_errors.txt.gz";
# Enable compression on the output file
tie *RAW_DATA, 'Tie::Gzip';
# Enable compression on the error file.
tie *ERR_FILE, 'Tie::Gzip';
unless ( open( RAW_DATA, ">$output_file" ) ) {
$self->logger->error(
"Cannot open output file $output_file for writing: $!");
return FALSE;
}
# Open the error file
unless ( open( ERR_FILE, "> $error_file" ) ) {
$self->logger->error(
"Cannot open error file $error_file for writing: $!");
return FALSE;
}
$self->logger->info( "Scanning Volume: " . $self->filer_path() . "
+\n" );
# Mount the filesystem
$self->mount() or return FALSE;
# Perform the scan and record the status in $status.
my $status = FALSE;
$status = $self->scan_dir( $self->mount_dir(), $self->filer_path()
+ );
#Unmount the filesystem
$self->unmount() or return FALSE;
# Close the output and error files
close RAW_DATA;
close ERR_FILE;
# Return the status
return $status;
}
|