# returns the $limit largest files from the flist file,
# full path to file in $name
sub process_flist {
my ($name, $limit) = @_;
my ($nlines, $total, @lines, @size);
open(my $fh, '<', $name) or die("Error opening file `$name': $!\n");
while (<$fh>) {
my @f = split / /;
# skip files that have a space or other whitespace
# characters in their name
next if @f > 10;
# store file size in bytes and the full path to the file
push @lines, $f[4] . '/' . $f[1];
}
$nlines = scalar @lines;
{
# disable warnings because the array to be sorted has
# the following format "12345/path/to/file"
# Perl would complain this is not a number
# but the <=> comparison operator will handle such
# input properly
# this is needed so the files can be sorted
# with a single pass through
# the flist file
no warnings 'numeric';
$total = sum(@lines);
$limit = min($limit, $nlines);
@lines = (sort {$b <=> $a} @lines)[0 .. ($limit - 1)];
}
# returns the number of files, their cumulative size,
# and the $limit largest files
return ($nlines, $total, @lines);
}
####
find /tgt -type f -name input | xargs wc -l
197898 .../input
213267 .../input
240331 .../input
194063 .../input
191862 .../input
179495 .../input
218041 .../input
1434957 total
####
51 opt/src/t.tar 100444 1247464676 290119680 283320 NA 1 0xbe2d 0x40000006
####
...
while (<$fh>) {
next unless m/^[0-9]+ ([^ ]+) [0-9]+ [0-9]+ ([0-9]+)/;
push @lines, $2 . '/' . $1;
}
...