Below is the code which I was talking about.
use strict;
use warnings;
use File::Find;
use Time::localtime;
use File::Copy 'cp';
use File::Copy 'mv';
use File::Path qw(make_path);
use threads;
use Thread::Queue;
# Package for debugging need to remove
use diagnostics;
use Data::Dumper;
sub CreateIndividualArchive {
my $srcDir = shift;
my $destDir = shift;
my $arrayFileList = shift;
my $pathDelimiter = "/";
#my $tempFile = "dump.txt";
my @chars = ( '0' .. '9', 'A' .. 'F' );
my $len = 8;
my $string;
while ( $len-- ) { $string .= $chars[ rand @chars ] }
my $tempFile = $string;
#print "Temp fils is $tempFile\n";
my $fh;
my $fileName = $destDir . $pathDelimiter . $tempFile;
open $fh, '>', $fileName or die "Cannot open $tempFile :$!";
my $dirName = $arrayFileList->[0];
chop($dirName);
for ( @$arrayFileList[ 1 .. $#$arrayFileList ] ) {
print $fh "$_\n";
}
close($fh);
my $tarFileList = $destDir . $pathDelimiter . $tempFile;
my $tarExt = ".tar.gz";
my $tarFileName = $destDir . $pathDelimiter . $dirName . $tarExt;
my $cmd = "tar -zcf $tarFileName -C $srcDir -T $tarFileLis
+t";
#print "Cmd is $cmd\n";
print "CMD = $cmd\n";
my @tarOutput = `tar -zcf $tarFileName -C $srcDir -T $tarFileList
+2>&1`;
if ( $? == -1 ) {
print "Archiving of the files fails : $!\n";
unlink $tarFileName;
return 0;
}
unlink $tarFileList;
return 1;
}
sub Thread {
my $hashParm = shift;
my %hashFileList = %$hashParm;
my $sourcePath = shift;
my $destinationPath = shift;
my $keys;
my $values;
my @arrayValues;
my @rowData = ();
my $totalKey = keys %hashFileList;
my $pathDelimiter = "/";
#print "In thread and total keys received $totalKey\n";
while ( ( $keys, $values ) = each(%hashFileList) ) {
push( @arrayValues, $keys . $pathDelimiter );
my @row = ( $keys . $pathDelimiter, $keys . $pathDelimiter, 0
+);
push( @rowData, \@row );
my @arrayParm = @{$values};
foreach my $value (@arrayParm) {
my $fileName = $$value[0];
my $fileSize = $$value[1];
push( @arrayValues, $fileName );
my @row = ( $keys . $pathDelimiter, $fileName, $fileSize )
+;
push( @rowData, \@row );
}
#print "SourcePath $sourcePath and dest $destinationPath\n";
my $error = CreateIndividualArchive( $sourcePath, $destination
+Path,
\@arrayValues );
if ( $error eq 0 ) {
print "Error while doing tar is $error\n";
}
@arrayValues = ();
@rowData = ();
}
}
sub ScanDirWithPattern {
my $sourcePath = shift;
my $hashFileList = shift;
my $pathDelimiter = "/";
my $pattern = ".txt";
if ( 0 eq opendir( DIR, $sourcePath ) ) {
print "Failed to open directory $sourcePath\n";
return 0;
}
my @dirList;
if ( 0 eq ( @dirList = readdir(DIR) ) ) {
print "Failed to read directory $sourcePath\n";
closedir(DIR);
return 0;
}
closedir(DIR);
foreach my $dir (@dirList) {
#print "Current directory is $dir\n";
next if ( $dir eq "." or $dir eq ".." );
my $currentDir = $sourcePath . $pathDelimiter . $dir;
if ( -d $currentDir ) {
if ( 0 eq opendir( DIR, $currentDir ) ) {
print "Failed to open directory $currentDir\n";
return 0;
}
my @fileList;
if ( 0 eq ( @fileList = readdir(DIR) ) ) {
print "Failed to read directory $dir\n";
closedir(DIR);
return 0;
}
closedir(DIR);
my @relativeFileArray;
foreach my $file (@fileList) {
next if ( $file eq "." or $file eq ".." );
my $currentFile =
$sourcePath . $pathDelimiter . $dir . $pathDelimiter
+ . $file;
next if ( -d $currentFile );
if ( -f $currentFile ) {
if ( $currentFile =~ /$pattern/i ) {
my $relativeFile = $dir . $pathDelimiter .
+ $file;
my $size = -s $currentFile;
my @currentFileArray = ( $relativeFile, $size
+);
print "Inserting the $relativeFile in arra
+y\n";
push( @relativeFileArray, \@currentFileArray )
+;
}
}
}
$hashFileList->{$dir} = \@relativeFileArray;
}
}
return 1;
}
sub Create {
my ( $sourcePath, $destinationPath ) = @_;
my $pathDelimiter = "/";
my %hashFileList;
my $folderName = "temp";
my $error = ScanDirWithPattern( $sourcePath, \%hashFileList );
if ( $error eq 0 ) {
print "Error while scaning $sourcePath for files\n";
return 0;
}
my $keys;
my $values;
my @arrayValues;
my @rowData = ();
my $totalKeys = keys %hashFileList;
my $numThreads = 5; #For the time being
if ( $totalKeys le $numThreads ) {
$numThreads = $totalKeys;
}
my $bucketSize = $totalKeys / $numThreads;
my @keys = keys %hashFileList;
my @arrThreads;
my $i = 0;
my @arrHash;
my $tempDir = $destinationPath . $pathDelimiter . $folderName;
make_path($tempDir);
$destinationPath = $tempDir;
while ( my @keys2 = splice @keys, 0, $bucketSize ) {
my %hash1;
@hash1{@keys2} = @hashFileList{@keys2};
push @arrHash, \%hash1;
}
for my $href (@arrHash) {
my $t = threads->create( \&Thread, \%$href, $sourcePath,
$destinationPath );
push( @arrThreads, $t );
}
foreach (@arrThreads) {
my $num = $_->join;
#print "done with $num\n";
}
}
my $srcDir = "";
my $destDir = "";
if ( @ARGV < 2 ) {
die "$0 - Need source and destination directory\n"
. "Usage: perl $0 src dest\n";
}
$srcDir = shift;
$destDir = shift;
Create($srcDir, $destDir);
Now when I run above code and check how many instance of either tar or gzip is running I get below output.
[root@localhost trunk]# ps -eaf | grep "tar -zcf"
root 1962 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//t
+emp/Test2444.tar.gz -C /root/tests -T /root/dump//temp/48DEB775
root 1987 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//t
+emp/Test1585.tar.gz -C /root/tests -T /root/dump//temp/77415208
root 1994 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//t
+emp/Test1106.tar.gz -C /root/tests -T /root/dump//temp/BFA8D1F4
root 1998 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//t
+emp/Test636.tar.gz -C /root/tests -T /root/dump//temp/8BED4FA8
root 2016 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//t
+emp/Test273.tar.gz -C /root/tests -T /root/dump//temp/C228C9E6
root 2021 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//t
+emp/Test2573.tar.gz -C /root/tests -T /root/dump//temp/044B2F61
root 2149 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//t
+emp/Test2563.tar.gz -C /root/tests -T /root/dump//temp/9657C48F
root 2150 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//t
+emp/Test1553.tar.gz -C /root/tests -T /root/dump//temp/71BE66D1
root 2152 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//t
+emp/Test1726.tar.gz -C /root/tests -T /root/dump//temp/1B2D081F
root 2200 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//t
+emp/Test493.tar.gz -C /root/tests -T /root/dump//temp/8932236E
root 2201 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//t
+emp/Test2274.tar.gz -C /root/tests -T /root/dump//temp/F42D8053
root 2206 25225 0 19:54 pts/1 00:00:00 grep tar -zcf
[root@localhost trunk]# ps -eaf | grep "tar -zcf"
root 1994 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//t
+emp/Test1106.tar.gz -C /root/tests -T /root/dump//temp/BFA8D1F4
root 1998 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//t
+emp/Test636.tar.gz -C /root/tests -T /root/dump//temp/8BED4FA8
root 2021 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//t
+emp/Test2573.tar.gz -C /root/tests -T /root/dump//temp/044B2F61
root 2149 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//t
+emp/Test2563.tar.gz -C /root/tests -T /root/dump//temp/9657C48F
root 2150 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//t
+emp/Test1553.tar.gz -C /root/tests -T /root/dump//temp/71BE66D1
root 2152 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//t
+emp/Test1726.tar.gz -C /root/tests -T /root/dump//temp/1B2D081F
root 2200 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//t
+emp/Test493.tar.gz -C /root/tests -T /root/dump//temp/8932236E
root 2201 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//t
+emp/Test2274.tar.gz -C /root/tests -T /root/dump//temp/F42D8053
root 2300 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//t
+emp/Test1508.tar.gz -C /root/tests -T /root/dump//temp/573093A4
root 2301 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//t
+emp/Test431.tar.gz -C /root/tests -T /root/dump//temp/1A75C3EF
root 2353 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//t
+emp/Test1088.tar.gz -C /root/tests -T /root/dump//temp/02CA6015
root 2368 25225 0 19:54 pts/1 00:00:00 grep tar -zcf
^[[A[root@localhost trunk]# ps -eaf | grep "gzip"
root 2208 2200 2 19:54 pts/0 00:00:01 gzip
root 2209 2149 1 19:54 pts/0 00:00:00 gzip
root 2210 2150 1 19:54 pts/0 00:00:01 gzip
root 2302 2301 0 19:54 pts/0 00:00:00 gzip
root 2303 2300 1 19:54 pts/0 00:00:00 gzip
root 2371 2353 3 19:54 pts/0 00:00:01 gzip
root 2384 2377 0 19:55 pts/0 00:00:00 gzip
root 2387 2386 0 19:55 pts/0 00:00:00 gzip
root 2499 2389 2 19:55 pts/0 00:00:00 gzip
root 2581 2509 0 19:55 pts/0 00:00:00 gzip
root 2663 2583 4 19:55 pts/0 00:00:00 gzip
root 2691 25225 0 19:55 pts/1 00:00:00 grep gzip
root 2700 2665 0 19:55 pts/0 00:00:00 gzip
Now as I have only 5 thread created I expect to have only 5 instance of tar and similarly 5 instance of gzip. But that doesn't seems to be the case.
Any thoughts to fix this issue.
Please note that the actual data is not text files so the archiving will take some time to complete the operation.
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.