#! /usr/local/bin/perl -w
## author: parv, parv UNDERSCORE fm AT emailuser DOT net
##
## date: jan 09 2003
##
## version: 0.012
##
## license:
## this software is free to be used in any form only if proper
## credit is given. i am not responsible for any kind of damage or
## loss. use it at your own risk.
##
## name: sanefilename.perl
##
## purpose:
## change characters in file names which are not composed of
## [-_.a-zA-Z0-9] characters. and do other changes; see
## sanitize() sub.
##
## usage:
## sanefilename.perl [-options] file [file2 file3 ...]
##
## for details, issue this command...
##
## sanefilename.perl -usage
##
use strict;
# pod module to generate, well, usage messages
use Pod::Usage;
# module to parse options
use Getopt::Long qw(:config default require_order);
# modules to move & parse file name
use File::Basename;
use File::Copy;
# existent file name handler - sub dispatch table
#
my %existent =
( # option given to handle existent file names
#
'opt' => [ 'notexist' , qr/^(?:ask|force|notexist)$/ ]
# based on $existent{opt}->[0], will contain reference
# to one of the following subs
, 'handle' => undef
# TO BE IMPLEMENTED
, 'ask' => sub { return; }
# force file move
, 'force' =>
sub { my ($path , $old, $new , $exist) = @_;
verbose(" '${path}${new}' already exists, will be overw
+ritten...\n")
if -e "${path}$new";
move_file($path , $old , $new , $exist);
return;
}
# new file does't exist, so move
, 'notexist' =>
sub { my ($path , $old, $new , $exist) = @_;
unless ( -e "${path}$new" )
{ move_file($path , $old , $new , $exist);
return;
}
verbose(" '${path}${new}' already exists, '${path}${old
+}' skipped ...\n");
return;
}
);
# get & check options
# ----
# save existent handling option separately so that it will be easy
# to update valid options, $existent{opt}->[1], and related sub
# references
# ----
my $opt = get_opt( $existent{'opt'} );
# check if any arguments remain which will be file names
#
pod2usage( '-msg' => " give file name(s) to change to sane version(s)\
+n"
, '-exitval' => 0
, '-verbose' => 0
)
unless scalar @ARGV;
# save the appropriate sub reference based on dup option
# to save time in foreach() due to double indirection
#
$existent{'handle'} = $existent{ $existent{'opt'}->[0] };
# regex to specify valid characters
#
my $char_re = valid_char( $opt->{'lower'} );
foreach (@ARGV)
{
my ($old_file , $path) = fileparse($_ , '');
# sanity check
die " '$path' - not a writable directory, exiting... \n"
unless -d $path || -w $path;
unless ( -e $path . $old_file )
{
verbose(" '${path}${old_file}' doesn't exist, skipped...\n");
next;
}
# if the old name is OK, skip
#
if ( $old_file !~ m/$char_re->[1]/ )
{
print " '${path}${old_file}' - okay, skipped...\n"
if $opt->{'verbose'} >= 3;
next;
}
# calculate new file name
my $new_file =
sanitize( \$old_file , $char_re , $opt->{lower} );
# very unlikely scenario that _current_ file name will be same
# as the calculated one from this _current_ name
#
if ( $old_file eq $new_file )
{
verbose(<<_MSG_);
sanitized new name is same as the old.
${path}${old_file} is not moved...
_MSG_
next;
}
# call appropriate sub depending on the -exist option given
#
$existent{handle}->($path , $old_file , $new_file , $existent{'opt'}
+->[0] );
}
print " ...done\n" if $opt->{verbose};
## subroutines
# calculate new file name from the old
sub sanitize
{
# $old - old name
# $char - regex (array ref) specifying valid character set
# $lower - option if to exclude upper case letters
#
my ($old , $char , $lower) = @_;
die "sanitize: expecting \$old and\$char to be defined\n"
unless defined $old
and ref $char eq 'ARRAY';
local $_ = $$old;
tr [A-Z] [a-z] if $lower;
# remove end non [-_.${alpha}\d] characters
#
#s/(?: $char->[1] $ | ^ $char->[1] )//x;
# change all the "wrong" characters to -
#
s/$char->[1]/-/g;
# prefer - to _
s/(?:_-|-_)/-/go;
# prefer . to - or _
s/(?:\.[-_]|[-_]\.)/./go;
# minimize the consecutive occurrence of . - _ to one of each
#
s/([-._]){2,}/$1/go;
return $_;
}
sub move_file
{
my ($path , $old , $new , $exist) = @_;
unless (defined $path and defined $old and defined $new)
{
verbose("move_file: \$path, \$old, \$new is/are undefined");
return;
}
$old = $path . $old;
$new = $path . $new;
my $old_new = sub { printf " '%s' -> '%s'\n", ($old , $new); };
$old_new->()
if $opt->{'nomove'}
or $opt->{'verbose'} >= 2
or ($opt->{'verbose'} && ($exist eq 'force'));
return if $opt->{'nomove'};
move("$old" , "$new")
|| die "couldn't move '$old' to '$new': $!\n" ;
return;
}
sub valid_char
{
my $low = shift;
# create list to be used as regex in file renaming
#
my $char = '-_.0-9a-z';
$char .= 'A-Z' unless $low;
# return two regexen: latter is the complement of the former
#
return [ qr/[$char]/ , qr/[^$char]/ ];
}
sub get_opt
{
# existent handling option is saved in this array at [0]
my $exist = shift;
my %opt = ( # supersedes any option, shows program usage
'usage' => 0
# control amount of output generated
, 'verbose' => 2
# check option if names needed to be lowercased
, 'lower' => 0
# instead of actual move, only show the new name
, 'nomove' => 0
);
# get options
GetOptions( 'usage|help' => \$opt{'usage'}
, 'exist=s' => \$exist->[0]
, 'ask' => sub { $exist->[0] = 'ask' }
, 'force' => sub { $exist->[0] = 'force' }
, 'notexist|skip'
=> sub { $exist->[0] = 'notexist' }
, 'quiet' => sub { $opt{'verbose'} = 0 }
, 'verbose=i' => \$opt{'verbose'}
, 'lower' => \$opt{'lower'}
, 'nomove' => \$opt{'nomove'}
)
|| die pod2usage('-exitval' => 2 , '-verbose' => 1);
# exit normally is asked for usage
#
pod2usage('-exitval' => 0 , '-verbose' => 3)
if $opt{'usage'};
# die horribly due to wrong option given
#
pod2usage( '-msg' => " incorrect (existent and/or verbose options w
+ere given\n"
, '-exitval' => 1
, '-verbose' => 0
)
unless check_opt($exist , $opt{'verbose'});
return \%opt;
}
sub check_opt
{
my ($exist , $verbose) = @_;
return ( $verbose =~ m/^\d+$/ && $exist->[0] =~ m/$exist->[1]/ );
}
sub verbose
{
warn(@_) if $opt->{'verbose'};
return;
}
__DATA__
## start usage documentation
=head1 NAME
sanefilename.perl - undo odd characters in file names
=head1 SYNOPSIS
B<sanefilename.perl> [-options] F<file> [F<file2> F<file3> ...]
B<sanefilename.perl> -usage
=head1 DESCRIPTION
sanefilename.perl changes characters in file names which are
not composed of C<[-_.a-zA-Z0-9]> characters.
all the characters not matching C<[-_.a-zA-Z0-9]> are converted to
I<->.
I<-_> or I<_-> sequence is changed to single I<->.
Any sequence of I<.->, I<._>, I<-.>, I<_.> is changed to single I<.>.
multiple occurrences of [-_.] are changed to one.
=head1 OPTIONS
=over
=item B<-usage>
shows the whole program documentation; supersedes any other option
=item B<-nomove>
do not actually move the files, just print the old given names and
new sanitized ones.
=item B<-lower>
excludes [A-Z] characters from the [-_.a-zA-Z0-9] character set.
=item B<-quiet> same as -verbose=0
=item B<-verbose>=I<0>|I<1>|I<2>|I<3>
controls how much output is generated; larger is the number, more the
output be.
=over
=item 0
only fatal error messages are generated
=item 1
as 0, plus non-fatal error messages are shown
=item 2
default. as 1, plus converted file names are shown
=item 3
as 2, plus OK/unchanged old file names are shown
=back
=item B<-ask> (unimplemented)
=item B<-force>
=item B<-notexist> | B<-skip>
=item B<-exist>=I<ask>|I<force>|I<notexist>|I<skip>
controls the behaviour when sanitized new file name is as an existing
file.
if the new sanitized name is same as old given name, that is not
considered to be an already existent file.
consider there are two files: I<P^Q^R> and I<P-Q-R>. the sanitized
name for the first file, without the B<-lower> option, would be
I<P-Q-R>. since this soon-to-be file already exists, it creates
a good situtation for providing an option.
the file name I<P-Q-R>, without the B<-lower> option, produces the
same new name. any further processing or file moving is skipped,
except for any messages requested via B<-verbose> option.
=over
=item ask (unimplemented)
an alternative name is asked to enter.
=item force
old given file is moved to the new calculated name, obliterating an
already existing file.
=item notexist | skip
default. move old file to new name only if it does not exist.
=back
=back
=head1 See Also
see these fine perlpods too:
=over
=item *
File::Basename
=item *
File::Copy
=back
=head1 Distribution and such
parv, parv UNDERSCORE fm AT emailuser DOT net
jan 09 2003
version 0.012
this software is free to be used in any form only if proper credit is
given. i am not responsible for any kind of damage or loss. use it
at your own risk.
=cut
|