package SST::Scanner::Base; use warnings; use strict; use File::Package; # Cwd - functions to manipulate the current working directory use Cwd; # Encode and decode filenames in base64 use MIME::Base64; # Data compression use Tie::Gzip; use constant TRUE => 1; use constant FALSE => 0; use Log::Log4perl qw(:easy); =head1 NAME SST::Scanner::Base - Base class for scanning, all scanner classes inherit from this class. =head1 VERSION Version 0.01 =cut our $VERSION = '0.01'; =head1 SYNOPSIS =head1 METHODS =head2 new() Returns a new instance of the parent class. Initializes a number of object variables _mount_dir - the location on the local machine the filesystem is mounted, this is where the object scans from. _num_errors - int containing the number of directories that couldnt be accessed _output_dir - the location where the raw data shall be written _filer_path - URL to the data on the filer =cut sub new { my $class = shift; my $p = shift; my $self = {}; $self->{'_mount_dir'} = ''; $self->{'_num_errors'} = 0; $self->{'_num_files'}=0; $self->{'_output_dir'} = ''; $self->{'_filer_path'} = ''; bless( $self, $class ); return $self; } =head2 change_directory() Attempts to change directory into the directory specified, accepts both relative and full paths. On success returns TRUE (1) and logs change at debug level. On failure returns FALSE (0) and logs failure at error level. =cut sub change_directory { my $self = shift; my $dir = shift; if ( !chdir($dir) ) { $self->logger()->error("chdir $dir failed $!\n"); return undef; } else { $self->logger()->debug("Changing Directory to $dir"); return TRUE; } } =head2 logger() Returns the global logging object =cut sub logger { return Log::Log4perl->get_logger('SST::Scanner'); } =head2 scan_dir() This is the actual scanning kernel, it opens the specified directory and calls itself recursively for each subdirectory. For each file found the file information is determined and output to the raw data file. =cut sub scan_dir { my $self = shift; my $cwd = getcwd(); # The directory the method was called from. my $num_files = 0; # Data dir - The directory on the filesystem to look in # Filer dir - The globally accessible location for the dir including the filer name etc my ( $datadir, $filerdir ) = @_; # Attempt to chdir into the directory to scan, if not possible, log the error and return unless ( $self->change_directory($datadir) ) { print ERR_FILE "Cannot change into directory $datadir $!\n"; $self->{'_num_errors'}++; return FALSE; } my $dir = getcwd(); # The directory that is actually being scanned. chomp($dir); # Open the directory, this sets the access time on the directory but not the files within it. # If the directory cannot be opened die because it should essentially be . opendir DIR, $dir || $self->logger()->logdie("opendir $dir failed $! \n"); my @files = readdir DIR; # @files contains the directory listing closedir DIR; foreach my $file (@files) { # Skip current and higher directories next if ( ( $file eq "." ) || ( $file eq ".." ) ); # File is the local path to the file from the scanners perspective ie /tmp/foo # Filer path is the URL to the file on the filer. i.e. filer:/vol/qtree/foo my $filerpath = $filerdir . "/" . $file; # This is the only stat(2) we perform. This sets the "_" perl # internal below. Note also that as this is symlink aware, we # don't need to explicitly test for symlink below. my @fstat = lstat($file); # Check what type of file it is and act appropriately if ( -d _ ) { # directory - so must be scanned if ( $file eq ".snapshot" ) { $self->logger()->debug("Skipping : $filerpath\n"); } else { $self->logger()->debug("Descending to: $filerpath\n"); $self->scan_dir( $file, $filerpath ); } } elsif ( -f _ ) { # plain file - must be analyzed $num_files++; my $length = length($filerpath) ; # Traditionally the output file's first field was the length of the filename. my $b_fname = encode_base64($filerpath) ; # In order to avoid strange chars in the csv file, filenames are base64 encoded $b_fname =~ s/\n//g; # New lines must be removed from the base64 filename my $owner = $self->get_file_owner( getcwd() . "/" . $file, \@fstat ) ; # lookup the owner, this is dealt with by the child class and is platform specific. my $line = join( ":", $length, $b_fname, $owner, @fstat ); my @items=split /:/, $line; if (@items!=16){ $self->logger->warn("Invalid Row: $owner, $file"); } unless ( print RAW_DATA "$line,\n") { $self->logger->fatal("Unable to print filehandle RAW_DATA: $!"); die $!; } } } # Change directory back to where we started $self->change_directory($cwd); $self->logger()->debug("PROCESS_DIR $filerdir: num_files = $num_files\n"); $self->{'_num_files'} += $num_files; return TRUE; } =head2 output_dir() Gets and sets the directory to store output information for this volume. =cut sub output_dir { my $self = shift; if ( my $new_dir = shift ) { $self->{'_output_dir'} = $new_dir; } return $self->{'_output_dir'}; } =head2 files_scanned() Returns the number of files scanned successfully by the tool. =cut sub files_scanned { my $self = shift; return $self->{'_num_files'}; } =head2 errors_found() Returns the number of directories that could not be accessed. =cut sub errors_found { my $self = shift; return $self->{'_num_errors'}; } =head2 mount_dir() Returns the directory that the filesystem is mounted on. =cut sub mount_dir { my $self = shift; return $self->{'_mount_dir'}; } =item filer_path() Gets and sets the path to the data =cut sub filer_path { my $self = shift; if ( my $new_filer = shift ) { $self->{'_filer_path'} = $new_filer; } return $self->{'_filer_path'}; } =head2 scan() Call the scan method on an object to initiate the scan. It will create the appropriate directory structure, test the filesystem is available, then perform the scanning. =cut sub scan { my $self = shift; $self->logger()->logdie("Output Directory must be set") unless $self->output_dir(); # Test #unless ( $self->test() ) { # $self->logger()->error( "Test failed for: " . $self->filer_path() ); # return FALSE; #} # Create the output directory unless ( mkdir $self->output_dir() ) { $self->logger->error( "Cannot create output directory " . $self->output_dir() . " $! \n" ); return FALSE; } # Open the output file my $output_file = $self->output_dir() . "/rawdata.txt.gz"; my $error_file = $self->output_dir() . "/access_errors.txt.gz"; # Enable compression on the output file tie *RAW_DATA, 'Tie::Gzip'; # Enable compression on the error file. tie *ERR_FILE, 'Tie::Gzip'; unless ( open( RAW_DATA, ">$output_file" ) ) { $self->logger->error( "Cannot open output file $output_file for writing: $!"); return FALSE; } # Open the error file unless ( open( ERR_FILE, "> $error_file" ) ) { $self->logger->error( "Cannot open error file $error_file for writing: $!"); return FALSE; } $self->logger->info( "Scanning Volume: " . $self->filer_path() . "\n" ); # Mount the filesystem $self->mount() or return FALSE; # Perform the scan and record the status in $status. my $status = FALSE; $status = $self->scan_dir( $self->mount_dir(), $self->filer_path() ); #Unmount the filesystem $self->unmount() or return FALSE; # Close the output and error files close RAW_DATA; close ERR_FILE; # Return the status return $status; }