Beefy Boxes and Bandwidth Generously Provided by pair Networks
Welcome to the Monastery
 
PerlMonks  

shloop -- execute shell command on a list

by graff (Chancellor)
on Mar 25, 2002 at 04:15 UTC ( [id://154018]=sourcecode: print w/replies, xml ) Need Help??
Category: Utilities
Author/Contact Info David Graff <graff at ldc.upenn.edu>
Description: I wrote the first version several years ago, and have used/ tweaked it ever since; very handy for running any kind of filtering procedure on a list of files to produce another list of files (also handy for copy/delete/rename/symlink). I've kluged it to work on MS-Windows* (using "system()"), as well as unix (printing to "| /bin/sh"). update: I added a thorough pod at the end. update: (2011-03) added '-o' option and more powerful usage for '-s' options.
#!/usr/bin/perl

# Program:      shloop
# Written by:   Dave Graff <graff@ldc.upenn.edu>
# Purpose:      runs a command line or script on each item in list

use strict;
use Getopt::Long;
use Pod::Usage;

my $bgnbold = "\033[1m";
my $endbold = "\033[0m";

$SIG{INT} = sub { close SH; exit };

my @subopt;
my ( $eopt, $fopt ) = ( "", "");
my ( $verbose, $bold, $noop, $redirect, $output_only, $man ) = (0,0,0,
+0,0,0);
my $cmd_okay = GetOptions( 's=s@' => \@subopt,
                           'e:s' => \$eopt,
                           'f:s' => \$fopt,
                           'v' => \$verbose,
                           'b' => \$bold,
                           'n' => \$noop,
                           'o' => \$output_only,
                           'r' => \$redirect,
                           'man' => \$man
                           );

pod2usage(1) unless ( $cmd_okay );
pod2usage(-exitstatus => 0, -verbose => 2) if $man;

pod2usage( -message => "Exactly one of '-e cmd' or '-f script' is need
+ed\n",
           -exitval => 2, verbose => 1 ) unless (( $eopt ne "" ) ^ ( $
+fopt ne "" ));
my $arg_check = $output_only + $redirect;
if ( $arg_check == 2 ) {
    pod2usage( -message => "Cannot use both '-r' and '-o' in one run\n
+",
               -exitval => 2, verbose => 1 );
}

# Parse -s args, if any

my @subold = my @subnew = my @subrgx = ();

while ( @subopt )
{
    my $subex = shift( @subopt );
    ( my $colon_check = $subex ) =~ s/\\://g;  # remove any colons tha
+t are "escaped"
    my $ncolons = ( $colon_check =~ tr/:/:/ ); # count remaining colon
+s
    if ( $ncolons == 1 ) {
        $subex =~ /(.*?(?:[^\\])):(.*)/;  # simple 'old:new' pattern
        push( @subold, $1 );
        push( @subnew, $2 );
    }
    elsif ( $ncolons == 3 ) {
        push @subrgx, $subex;  # "new-improved complex regex" '[sy]:ol
+d:new:[igsm]?' pattern
    }
    else {
        pod2usage( -message => "Bad argument for -s",
                   -exitval => 2, verbose => 1 );
    }
}

# Place the command line or script in $command

my $command = "";
if ( $eopt ne "" ) {
    $command = $eopt;
} else {
    unless ( -r $fopt ) {  # script file not found in CWD?
        my $base = $0;
        $base =~ s%[^/]*$%%;
        my $sfile = "";
        if ( $ENV{SHLOOP_PATH} ne "" ) {  # is script in SHLOOP_PATH?
            foreach my $p ( split( /:/, $ENV{SHLOOP_PATH} )) {
                if ( -r "$p/$fopt" ) {
                    $sfile = "$p/$fopt";
                    last;
                }
            }
        }
        if ( $sfile eq "" ) {  # how about where $0 is?
            if ( -r "$base$fopt" ) {
                $sfile = "$base$fopt";
            } else {
                my $shldir = ( $ENV{SHLOOP_PATH} ne "" ) ?
                    $ENV{SHLOOP_PATH} : "unset env.variable";
                pod2usage( -message => "No script file $fopt in $ENV{P
+WD}, ".
                           "$base or SHLOOP_PATH ($shldir)\n",
                           -exitval => 2, verbose => 1 );
            }
        }
        $fopt = $sfile;
    }
    $command = "";
    open( SCRPT, "<$fopt" );
    while (<SCRPT>)
    {
        if ( /^\#\# / ) {
            while ( /(\S*)\:(\S*)/ ) {
                push( @subold, $1 );
                push( @subnew, $2 );
                $_ = $';            # $' refers to everything that fol
+lowed the match
            }
        }
        next if ( /^\#/ || /^\s*$/ );
        s/\s*$//;  # remove all trailing whitespace (including \n)

        if ( $command =~ /[\&\|\\]$/ ) {  # if command string ends wit
+h &, | or \
            $command =~ s/\\$//;          # final \ flags line continu
+ation; remove it
            $command .= " ";              # add a space
        } elsif ( $command ne "" ) {
            $command .= "; ";             # add a shell command termin
+ation
        }
        $command .= $_;   # append current line of script to command s
+tring
    }
    close SCRPT;
    if ( $command eq "" ) {
        pod2usage( -message => "No commands found in script file $fopt
+",
                   -exitval => 2, verbose => 1 );
    }
}

# if the user has not provided any arg-editing instructions,
# we need to give up if they also specified '-r', '-o' or '\o' in the 
+command

if ( @subold == 0 and @subrgx == 0 ) {
    pod2usage( -message => "Must have at least one '-s old:new' to use
+ '-r', '-o' or '\\o'\n",
               -exitval => 2, verbose => 1 ) if ( $redirect or $output
+_only or $command =~ /\\o/ );
}

# Open the shell process that will execute commands, if necessary

my $shellop = 0;
unless ( $command eq "rename" ||   # These commands use perl internals
+,
         $command eq "unlink" ||   # so only launch a shell for other 
+things
         $command eq "delete" ||
         $command eq "symlink" ||
         $command =~ /^mode 0[0-7]{3}/ ) {
    unless ( exists( $ENV{winbootdir} )) {
        my $shell = "/bin/sh";
        open( SH, "| $shell" ) or die "Can't launch subshell ($shell);
+ sorry.\n";
        $shellop++;
    }
    $shellop++;
}

# Process the input list

while (<>)
{
    s/[\r\n]+$//;
    my $oldf = my $newf = $_;
    my $i = 0;
    while ( $i < @subold )  # apply string substitutions in the order 
+given
    {               # (subs in a script file are applied after subs on
+ cmdline)
        eval "\$newf =~ s{$subold[$i]}{$subnew[$i]}";
        $i++;
    }
    $i = 0;
    while ( $i < @subrgx ) {
        eval "\$newf =~ $subrgx[$i]";
        $i++;
    }
        
    my $cmd = $command;
    if ( $cmd =~ /\\i/ ) {
        $cmd =~ s/\\i/$oldf/g;
    } elsif ( ! $output_only ) {
        $cmd .= " $oldf";
    }
    if ( @subold or @subrgx ) {
        if ( $cmd =~ /\\o/ ) {
            $cmd =~ s/\\o/$newf/g;
        } else {
            $cmd .= " >" if ( $redirect );
            $cmd .= " $newf";
        }
    }
    if ( $noop ) {
        print "$cmd\n";
    } else {
        if ( $bold || $verbose ) {
            my $rprt = $cmd;          
            $rprt =~ s/(\W)/\\$1/g
                if ( $shellop == 2 ); # escape special chars for shell
+ output
            $rprt = $bgnbold . $rprt . $endbold if ( $bold );
            if ( $shellop == 2 ) {
                print SH "printf '%s\n' $rprt 1>&2\n";  
            } else {
                print STDERR "$rprt\n";
            }
        }
        if ( $shellop == 2 ) {
            print SH "$cmd\n";
        } elsif ( $shellop ) {
            system( $cmd );
        } else {
            if ( $command eq "rename" ) {
                if ( $oldf eq $newf ) { # user didn't specify "-s old:
+new" for rename
                    my @args = split( /\s+/, $oldf );  # see if list i
+tem has two words
                    if ( @args == 2 ) {
                        ( $oldf, $newf ) = @args;
                    } else {
                        warn "rename skipped for $oldf -- need 2 args 
+(or -s old:new)\n";
                        next;
                    }
                }
                rename( $oldf, $newf ) || warn "rename failed for $old
+f -> $newf\n";
            } elsif ( $command eq "symlink" ) {
                if ( $oldf eq $newf ) { # user didn't specify "-s old:
+new" for symlink
                    my @args = map { s:/$::; $_ } split( /\s+/, $oldf 
+);  # 2 words in list item?
                    if ( @args == 2 ) {
                        ( $oldf, $newf ) = @args; # 2 words: use as "t
+arget", "linkname"
                    } elsif ( @args == 1 && $oldf =~ m%/([^/]+)$% ) {
                        $newf = $1;  # 1 word containing ".../name$": 
+use name as linkname
                    } else {
                        warn "symlink skipped for $oldf -- can't estab
+lish link name\n";
                        next;
                    }
                }
                symlink( $oldf, $newf ) || warn "symlink failed for $o
+ldf -> $newf\n";
            } elsif ( $command eq "unlink" || $command eq "delete" ) {
                unlink( $oldf ) || warn "unlink failed for $oldf\n";
            } elsif ( $command =~ /mode (0[0-7]{3})/ ) {
                chmod( oct($1), $oldf ) || warn "chmod failed for $old
+f -> $newf\n";
            }
        }
    }
}
close SH if ( $shellop == 2 );

=head1 NAME

shloop - perform a shell command or script on a list of names

=head1 SYNOPSIS

  shloop -e 'cmd line' [-n|-v|-b] [[-o|-r] -s old:new ...] [list ...]
  shloop -f scriptfile [-n|-v|-b] [[-o|-r] -s old:new ...] [list ...]

   optional form for '-s' argument:  -s '(s|y|tr):old:new:[egicds]*'

=head1 OPTIONS

  -n : do Nothing, just list command lines as they would be run
  -v : (verbose) list command lines to STDERR as they are run
  -b : list command lines to STDERR in BOLD as they are run
  -o : discard the original input string, only use the output string
  -r : (with -s) put ">" (redirect) between input and output args
  -s : for each input string, create an output string by replacing
       "old" with "new" (multiple "-s old:new" args are allowed)

  Use '\i' in 'cmd line' or script to place list item in special posit
+ion(s);
  with -s, use '\o' to place output name in special position(s).

  shloop -m  prints the user manual

=head1 DESCRIPTION

Shloop applies a shell command or script to each element (each line)
of a list.  The list can be provided on stdin via a pipeline, or read
from one or more files named on the command line.

If the "-e 'cmd line'" option is used to provide a shell command, the
default operation is to read each line from the input list, append tha
+t
to the 'cmd line' string, and execute the result in a subshell.

If the "-f scriptfile" option is used, the script file is read
first, each line of the script is joined into a single string with
semi-colons (except for lines ending with "&" or "|"), and then each
element of the input list is appended to this string and passed to the
subshell.  You cannot use "-f" and "-e" together.

The "-n" option ("no-execution") can be used to see how the resulting
command lines will look when passed to the subshell; the commands will
be echoed to stdout and will not be executed.  (Redirecting stdout to
a file will produce a usable shell script for the given list.)

The "-v" ("verbose") or "-b" ("bold verbose") option can be used to
echo each command line to stderr prior to execution, so that any
resulting error messages from the subshell can be traced to the
particular list element(s) involved.

For either command method, one or more "-s old:new" options can be
used to create an "output" list element by means of perl-style string
replacement on each input element.  For each line of the input list,
an output element is created using "s{old}{new}" in perl -- that is,
find the first occurrence of the string "old" (or whatever precedes
the colon in the "-s" argument), and replace it with "new" (or
whatever follows the colon in the "-s" argument).  Multiple "-s"
options are applied to the input string in sequence.  If the colon is
the last character in the argument, the "old" pattern is simply
deleted from each element (replaced with an empty string).  The colon
can be preceded by "^" or "$" to specify the beginning or end of the
input string, respectively.

Certain characters in the "old" part of the "-s" argument (in addition
to the "^$" just mentioned) will have special effects in the regex
replacement.  In particular, the period, asterisk, plus and question
mark characters (".*+?") serve, as they normally do in perl, to define
various "wildcard" matches in regular expressions.  (Briefly stated,
"." will match any single character; the others, "*+?", are
quantifiers on the previous character, matching "0 or more", "one or
more" or "0 or 1" occurrences of that character, respectively.)  The
special effects can be avoided by preceding these characters with a
backslash. The following examples illustrate the basics:

  input element  -s argument   output element
  try/foo.bar     .:_x_        _x_ry/foo.bar
                  /.:_x_       try_x_oo.bar
                  \.:_x_       try/foo_x_bar
                 fo?:_x_       try/_x_o.bar
                 fo*:_x_       try/_x_.bar
                  o+:          try/f.bar
                  \+:          try/foo.bar (no match, no change!)
                   r:          ty/foo.bar
                  r$:          try/foo.ba

Since some of these characters also have special meanings for the unix
shell, it's best to place single-quotes around the argument when they
come into play (see examples below).

All kinds of substitutions are supported, including things like
"(..)(..):$2$1"; slashes within the "old" and "new" strings do not
need to be escaped with backslashes.

In general, it's a very good idea to use the "-n" option
with "-s" to see what the command lines will look like first,
then leave off the "-n" (or change it to "-v") once you've
made sure that the substitutions are working the way you want.

In the case of a script file, it is possible to specify string
replacements within the script, by means of special "comment" lines
containing "old:new" arguments.  For example, placing the line:

   ##  ^:cmp.  $:.Z

in a shloop script file would result in each element of the input list
being modified to have "cmp." at the beginning and ".Z" at the end to
produce an output element for each execution of the script.  The
substitutions are applied in the order presented, but after any "-s"
arguments provided on the command line -- this means the command line
could override the script substitutions, by altering target patterns
before the script operations take effect.

Comment lines in the script that begin with just one "#" are ignored
by shloop, and can be used to provide any sort of commentary or
documentation throughout the script file.

If string replacements are applied, the default operation is to append
both the input list element and the modified "output" list element to
the shell command string (separated by a space), and execute the
result.  With the "-r" option, the input and output elements are
separated by " > " instead, to perform redirection.  For example:

        echo mytxt.txt | shloop  -e wc  -s 'txt$:wc'  -r

will execute this command in the subshell:

        wc mytxt.txt > mytxt.wc

(If you are in a directory containing 200 files named "*.txt", then
replacing "echo mytxt.txt" with "ls *.txt" in the above example will
create 200 files named "*.wc".)

Both "input" and "output" names can be placed at arbitrary points in
the shell command string by using the special placeholders "\i" and
"\o", respectively, in the 'cmd line' string or the script file (the
"-r" option would have no effect in this case).  For example:

  shloop -e "sed 's/-/ /g' \i > \o; wc -w \o" -s txt:nd

will execute commands like this in the subshell:

  sed 's/-/ /g' file.txt > file.nd; wc -w file.nd

Note the use of different quotation marks outside and inside the "cmd
line" argument.  Quoting in shell commands is a vast and subtle topic;
again, it's best to use "-n" first, to see what will happen.

=head2 Alternate (advanced) form of '-s' arguments

In addition to the simple '-s old:new' type of edit directive, shloop
also supports a full-fledged perl-regex specification, allowing you to
use the "tr///" (a.k.a. "y///") operator to transliterate characters
(rather than the "s///" operator used by '-s old:new'), and to use the
"e", "i" and/or "g" modifiers on "s///" (whereas '-s old:new' never
uses these modifiers).  To use this more powerful approach, make sure
to have three colons in the directive, like this:

 -s s:old:new:   # replace first "old" with "new" (same as -s old:new)
 -s s:foo:bar:gi # replace all "foo","Foo","FOO","fOo", etc with "bar"
 -s 's:^:sprintf("ln_%04d\t",$.):e'
                 # prepend line number as "ln_0001\t", etc
 -s tr:abc:def:  # replace all "a" with "d", "b" with "e", "c" with "f
+"
 -s y:x::d       # remove all "x"

If your "old" or "new" string needs to use a colon as part of the
string, put a backslash in front of each "data" colon (and make sure
to single-quote the whole expression on the shell command line):

 -s 's:host\:/path:new/path:'  # change "host:/path" to "new/path"

If you forget the backslash, shloop will exit with an error message
(because it found the wrong number of unescaped colons in the string).

=head2 Use of perl-interal functions instead of shell commands

Some special "cmd line" operations are defined to use perl-internal
function calls, rather than using a subshell: "unlink" or "delete"
(delete files), "mode NNN" (change file permissions), "symlink"
(create symlinks) and "rename".  These cannot be combined with each
other, or with other shell commands, in a single run.  The
"unlink" and "mode" functions will apply only to the input elements
(so don't use "-s old:new" with these); "symlink" and "rename"
need two file names to operate on, which can be handled either by
using "-s old:new"", or by providing two space-separated tokens
on each line in the input list. "symlink" can also work on a single
file name, if that name contains a "/" -- in this case, the "output"
name (applied to the symlink that is created), consists of the string
that follows the last "/" in the input name; that is, a symlink is
created in the current working directory, with the same file name as
the input list item, pointing to the path given in the list.

=head1 EXAMPLES

  ls | shloop -e rename -s '$:.done'

changes the name of every file in the current working directory by
adding the string ".done" at the end; note that "-e mv" would work
too, but might take longer.

  shloop -e 'compress -c \i |
    uuencode \i.Z > \i.Zuu' file.list

for each file named in file.list, compress and uuencode its contents
and save the output in a new file; the resulting output files will
have the same names as the originals, but with ".Zuu" added at the
end.  (When passed through "uudecode" later on, e.g. after being sent
somewhere by email, these output files will produce compressed
versions of the originals, having the original names with ".Z"
appended, and when those ".Z" files are passed to the "uncompress" or
"gunzip" command, the original files will be restored with their
original names.)

  ls *.tex | shloop -e 'latex \i;
    dvips \o; lpr \o.ps' -v -s '\.tex$:'

converts all ".tex" files in the current directory to PostScript
format, and sends the Postscript files to the printer; the
the "dvips" program uses default file name extensions
for its input and output, so a file name with no extension is used.
The same result can be obtained using the following script file:

  ## .tex$:
  latex \i
  dvips \o
  lpr \o.ps

The first line of this script specifies the string substitution to be
done on each name in the input list (replace ".tex" at the end of the
string with nothing), and the following lines are shell commands with
placeholders for the original and modified names as arguments to the
commands.

=head1 AUTHOR

David Graff (graff@ldc.upenn.edu)

=cut

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: sourcecode [id://154018]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others pondering the Monastery: (4)
As of 2024-03-29 00:51 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found