in reply to Unicode file names
2023-01-04 Edit: I updated the WinReadDir() function. It can now look into directories that have special characters in their name.
2023-01-07: Edit: Added character filters to prevent code injection vulnerability with $REGEX, and I also shortened the $JSCODE somewhat. Removed an unused variable.
#!/usr/bin/perl use 5.004; use strict; use warnings; my $PATH = GetCurrentDirectory(); my @L = WinReadDir($PATH, 0, 'S|T|M|N', 40); print "\n$PATH\n"; print "\nFile(s) in the current directory:\n"; foreach(@L) { my ($Size, $Attr, $Date, $Name) = split(/\|/, $_); print "\n $Size ", TimeStamp($Date), ' ', GetStrAttr($Attr), " $N +ame"; if (index($Name, '{') >= 0) { print "\n\nType a new file name for this file (without path).", "\nor just press ENTER to skip renaming.\n"; my $NewName = <STDIN>; if (length($NewName)) { my $FullName = $PATH . "\\" . $Name; print "RENAME:", WinRenameFile($FullName, $NewName); } } } print "\n"; exit; ################################################## # File | v2023.1.4 # This function converts a Windows file attribute # number to string representation. # # Usage: STRING = GetAttrStr(ATTRIBUTE) # sub GetStrAttr { my $A = defined $_[0] ? $_[0] : 0; my $OUTPUT = '.' x 8; my $LETTER = 'LCASHRDV'; my $P = 0; foreach(1024, 2048, 32, 4, 2, 1, 16, 8) { if ($A & $_) { vec($OUTPUT, $P, 8) = vec($LETTER, $P, 8); } $P++; } while (length($OUTPUT) < 4) { $OUTPUT .= '.'; } return $OUTPUT; } ################################################## # File | v2023.1.4 # This function is a customizable readdir() function # for Windows. It returns the directory contents as # an array using a tiny JavaScript program to # collect the data. [Tested with TinyPerl 5.8 on WinXP.] # # Usage: LIST = WinReadDir(PATH, [SUBDIR, [PATTRN, [MAX, [REGEX]]]]) # # NOTE: This function only works in Windows XP or higher! # In Linux and other operating systems, an # empty list will be returned. # # The easiest way to use the function is to simply # provide a path as the first argument: # # my @LIST = WinReadDir('C:\\HOME\\PERL'); # # This will return something like this: # # 00000000000 1672793716 More\ # 00000000002 1656954558 Myfile{1758}.txt # 00000000241 1574651186 SPEAK.VBS # 00000001663 1607229792 Numbers.pl # 00000002456 1670070972 cut.pl # 00000059994 1672797060 filelib.pl # 00000361490 1669902950 lib.pl # # The first 12 digits are the file's size. The next 10 digits # are the file's last modified date given in seconds. # If a file name ends with backslash "\" character, then # that means it's a directory name. # # Each line is a separate array element. # If a name contains unicode characters, the special # characters will appear as a number between brackets. # For example: Myfile{1758}.txt # If you press ALT + 1758, it produces a little star icon. # And that's the code that appears in the file name there. # # If a file or directory name contains the '{' character, # then it will appear as '{123}' Also, if you need to refer # to a directory that contains the '{' then again, # you would do this: # # my @LIST = WinReadDir('C:\\TEMP\\x{123}45}'); # # As a result, the function will look in the directory C:\TEMP\x{45} # # The PATH may also contain special characters: # # my @LIST = WinReadDir('C:\\TEMP\\MyWeirdFolder{9700}\\data', 1); # # The second argument is either 1 or 0. Default is zero. # One means that sub-directories will be scanned as well. # In that case, the output will look something like this: # # 00000000000 1661404204 IMGS\ # 00000000000 1659249130 DOCS\ # 00000014010 1642132972 DOCS\0023.bmp # 00000014060 1115495874 DOCS\index.html # 00000015838 1667767236 rr.bmp # 00000015866 1667184404 MANDEL3.BMP # 00000016730 1141128000 FeatherTexture.bmp # 00000017062 1141128000 IMGS\Coffee Bean.bmp # 00000018938 1666659612 TEMP.jif # # Notice that directories' size is always zero. # Also notice that the list is sorted, and whatever comes # first will determine how the list is sorted. You can # change this order using the 3rd argument: # # my @LIST = WinReadDir('C:\\TEMP', 1, 'N S M'); # # The string 'N S M' will substitute the Name of the file # first, then the Size and finally the Modified date all # separated by spaces. You may use a different separator. # I use space just because it makes it easier to read, but # you could use the '|' character which then would allow # you to split the items using the split() function: # # my @LIST = WinReadDir('C:\\TEMP', 1, 'N|S|M'); # foreach (@LIST) # { # my @ITEM = split(/\|/, $_); # # $ITEM[0] ---> NAME OF FILE # # $ITEM[1] ---> FILE SIZE # # $ITEM[2] ---> MODIFIED DATE # } # # There are more values available. For example, if you want # the full name of the file with path, then use letter 'F': # # my @LIST = WinReadDir('C:\\TEMP', 1, 'S**F'); # # This will produce a list which starts with the file size, # followed by the long file name. It will look something like this: # # 000000000000**C:\TEMP\MyWeirdFolder{931}{931}\ # 000000000000**C:\TEMP\x{123}45}\ # 000000000000**C:\TEMP\x{123}45}\test.txt # 000000000002**C:\TEMP\testing{931}.txt # # These are more values that you can use: # # S = insert file size # N = insert file name # M = insert file last modified date # C = insert file date of creation # A = insert file last accessed date # H = insert file's short name (8+3 format) # F = insert file's full name with path # T = insert file's attributes # # You can create your own customized list using a # combinations of the above letters. # # The 4th argument allows you to limit the directory listing # to only X items. For example, here we request only the # first 10 files in the directory list. And we want the file # attribute first, then date of creation, and the full name: # # my @L = WinReadDir("C:\\WINDOWS", 0, 'T|C|F', 10); # # Returns the following list: # # 0016|1665934687|C:\WINDOWS\Config\ # 0016|1665934687|C:\WINDOWS\Cursors\ # 0016|1665934687|C:\WINDOWS\Help\ # 0016|1665934687|C:\WINDOWS\Media\ # 0016|1665934687|C:\WINDOWS\msagent\ # 0016|1665934687|C:\WINDOWS\repair\ # 0016|1665934687|C:\WINDOWS\system32\ # 0016|1665934687|C:\WINDOWS\system\ # 0018|1665934687|C:\WINDOWS\inf\ # 0021|1665934687|C:\WINDOWS\Fonts\ # # The attribute is a 12-bit integer. # The meaning of the bits is described here: # # 0 = Normal file # 1 = Read-only file # 2 = Hidden file # 4 = System file # 8 = Disk drive volume label (Not a real file) # 16 = Directory # 32 = Archive (most files) # 1024 = Link or shortcut # 2048 = Compressed file # # The 5th argument allows you to filter the results using a # regex enclosed as a string. Now, keep in mind, we are not # using Perl's regex engine. This is nowhere near as # sophisticated, but it's better than nothing. Here, for # example, we search for all executable files in the # Windows directory: # # my @L = WinReadDir("C:\\WINDOWS", 0, 'S bytes, name: F', 10, '/ex +e/i'); # # 000000010752 bytes, name: C:\WINDOWS\hh.exe # 000000015360 bytes, name: C:\WINDOWS\TASKMAN.EXE # 000000025600 bytes, name: C:\WINDOWS\twunk_32.exe # 000000049680 bytes, name: C:\WINDOWS\twunk_16.exe # 000000069120 bytes, name: C:\WINDOWS\NOTEPAD.EXE # 000000069632 bytes, name: C:\WINDOWS\ALCMTR.EXE # 000000086016 bytes, name: C:\WINDOWS\SOUNDMAN.EXE # 000000146432 bytes, name: C:\WINDOWS\regedit.exe # 000000256192 bytes, name: C:\WINDOWS\winhelp.exe # 000000283648 bytes, name: C:\WINDOWS\winhlp32.exe # # In the next example, we want to find all files that have # some special characters in their file name: # # my @L = WinReadDir("C:\\TEMP", 0, 'S M T N', 0, '/[{]+/i'); # # So, we get this list: # # 000000000000 1672896164 0016 MyWeirdFolder{931}{931}\ # 000000000000 1672896244 0032 testing{931}.txt # 000000000000 1672896984 0016 x{123}45}\ # # Usage: LIST = WinReadDir(PATH, [SUBDIR, [PATTRN, [MAX, [REGEX]]]]) # sub WinReadDir { my @DIR; $^O =~ m/MSWIN/i or return @DIR; my $PATH = defined $_[0] ? $_[0] : 'C:\\'; $PATH =~ tr|\/|\\|; # Convert / to \ $PATH =~ tr|\\||s; # Remove duplicate backslash. $PATH =~ s/\\/\\\\/g; # Now double each backslash. my $RET = defined $_[2] ? $_[2] : ''; length($RET) or $RET = 'S M N'; $RET =~ tr|'\r\n\\||d; # Filter out unsafe characters my $START = -1; # Start of separator string my $J = ''; # JavaScript code will go here my @f = ('toASCII(n.slice(PATHLEN))+d', 'fSize(s)', 'toASCII(n)+d', 'fDate(f.DateCreated)', 'fDate(f.DateLastModified)', 'fDate(f.DateLastAccessed)', 'fAttr(f)', 'f.ShortName'); for (my $i = 0; $i < length($RET); $i++) { my $c = index('NSFCMATH', substr($RET, $i, 1)); if ($c >= 0) { if ($START >= 0) { $J .= "'" . substr($RET, $START, $i - $START) . "',"; } $J .= "$f[$c],"; $START = -1; } elsif ($START < 0) { $START = $i; } } if ($START < 0) { $J = substr($J, 0, length($J) - 1); } else { $J .= "'" . substr($RET, $START) . "'"; } undef $RET; # Okay, at this point, $J should contain a list of properties # we want to save from each directory and file. These are things # we just plucked out of @f. For example, to record the file size, # $RET had to include the letter 'S' and when we see the letter S, # we insert "fSize(s)," into $J. This list in $J will then become # part of the JavaScript code. When the JS script runs, it creates # a list, joins the items and pushes the string into an array. my $RECURSIVE = defined $_[1] && $_[1] ? 'DIR(FullName);' : ''; my $MAX = defined $_[3] ? $_[3] : 0; $MAX =~ tr|0-9||cd; # Remove everything except numbers $MAX = ($MAX) ? "if(OUTPUT.length>=$MAX)return;" : ''; my $REGEX = defined $_[4] ? $_[4] : ''; # If the regex match is not true, then we continue reading # the directory, otherwise we add the file to our list. # The Regex only tests the name of the file, not its path. # So, if the path contains the pattern we're looking for, # we won't see that. # If $REGEX is not provided, then it won't become part of the code. if (length($REGEX)) { # We need to remove forward slashes and backslashes # among other things to prevent code injection vulnerability: $REGEX =~ tr|\/\\'"<>\r\n||d; $REGEX = "NameOnly=toASCII(FullName+'').split(BS).pop();if(!(/$REG +EX/.test(NameOnly)))continue;"; } my $JSCODE = "PATH=CNV('$PATH');OUTPUT=[];BS='\\\\';PATHLEN=PATH.len +gth+((PATH.slice(-1)==BS)?0:1);try{FSO=new ActiveXObject('Scripting.F +ileSystemObject');DIR(PATH);WScript.StdOut.WriteLine(OUTPUT.sort().jo +in('\\n'));}catch(e){}function PACK(d,n){$MAX var f=d?FSO.GetFolder(n +):FSO.GetFile(n);var s=d?0:f.Size;n+='';OUTPUT.push([$J].join(''));}f +unction CNV(s){var i,P;s=s.split('{');for(i=0;i<s.length;i++){P=s[i]. +indexOf('}');if(P>0)s[i]=String.fromCharCode(s[i].substr(0,P)&0xffff) ++s[i].slice(P+1);}return s.join('');}function DIR(p){var F=FSO.GetFol +der(p),FC,File,FullName;for(FC=new Enumerator(F.SubFolders);!FC.atEnd +();FC.moveNext()){FullName=FC.item();Folder=FSO.GetFolder(FullName);$ +REGEX PACK(BS,FullName);$RECURSIVE}for(FC=new Enumerator(F.files);!FC +.atEnd();FC.moveNext()){FullName=FC.item();$REGEX PACK('',FullName);} +}function toASCII(s){var i,T=[];s+='';for(i=0;i<s.length;i++){c=s.cha +rCodeAt(i);T.push(((c<32&&c!=10&&c!=13)||c>126||c==123)?'{'+c+'}':s.c +harAt(i));}return T.join('');}function fSize(s){return('000000000000' ++s).slice(-12);}function fDate(d){return('0000000000'+(d*1)).slice(-1 +3).substr(0,10);}function fAttr(f){return('0000'+f.Attributes).slice( +-4);}"; mkdir "C:\\TEMP"; my $JSFILE = "C:\\TEMP\\GETDIR.JS"; open(my $FILE, ">$JSFILE") or return @DIR; binmode $FILE; print $FILE $JSCODE; close $FILE; if (-s $JSFILE != length($JSCODE)) { return @DIR; } @DIR = split(/\n/, `CSCRIPT.EXE //NOLOGO $JSFILE`); unlink $JSFILE; return @DIR; } ################################################## # File | v2023.1.3 # This function renames a file whose name contains # special unicode characters. It cannot rename # directories, only files! # # Usage: STATUS = WinRenameFile(FULLPATH, NEWNAME, [FORCE]) # # Unicode characters must be placed # between {} brackets in decimal format. # For example: {9674} is the representation of a # little diamond shaped character that you can # replicate by pressing ALT + 9674. # # So, if you have a file called "Myfile{9674}.txt" # and you want to rename it to "Myfile.txt" then simply do: # # WinRenameFile('C:\\HOME\\Myfile{9674}.txt', 'Myfile.txt'); # # This will rename the file. If you want to make sure that # the file gets renamed even if there is another file by that name, # then use 1 for the third argument: # # WinRenameFile('C:\\Users\\Zsolt\\Desktop\\Myfile{9674}.txt', 'Myfi +le.txt', 1); # # And if the new file name exists AND happens to be read-only, # the file will not be renamed. However, if you specify 2 for the # third argument, then the read-only "Myfile.txt" will be deleted # first, and then the file will be renamed anyway: # # WinRenameFile('C:\\HOME\\Myfile{9674}.txt', 'Myfile.txt', 2); # # Note: Using 1 or 2 option will not remove "Myfile.txt" if that # happens to be a directory! # # This can be used to remove unicode letters to make # files accessible to simple command-line applications. # # You may use normal forward slash in place of backslash. # It makes things a bit clearer: # # WinRenameFile('C:/HOME/Myfile{9674}.txt', 'Myfile.txt'); # # You must not type any slashes in the second name. # The new name must only contain a file name and extension. # If you want to move the file to another directory or another # drive, you should use the builtin rename() function. # # This function returns non-zero on success or # zero if the file could not be renamed. # # NOTE: This function only works in Windows XP or higher! # In Linux and other operating systems, no change will # take place and the function always returns zero. # # Usage: STATUS = WinRenameFile(FULLPATH, NEWNAME, [FORCE]) # sub WinRenameFile { $^O =~ m/MSWIN/i or return 0; defined $_[0] && defined $_[1] or return 0; my ($OLD, $NEW) = @_; my $FORCE = defined $_[2] ? $_[2] : 0; $OLD =~ tr|\x00-\x1F\"$\|<>||d; # Remove illegal characters $OLD =~ tr|\/|\\|; # Convert / to \ $OLD =~ tr|\\||s; # Remove duplicate backslash. $OLD =~ s/\\/\\\\/g; # Now double each backslash. $NEW =~ tr|\x00-\x1F\"$\|<>||d; # Remove illegal characters $NEW =~ tr|\\|\/|; # Convert \ to / if (index($NEW, '/') >= 0) { return 0; } length($OLD) or return 0; length($NEW) or return 0; my $JSCODE = "FORCE=$FORCE;OLD=CNV('$OLD');NEW=CNV('$NEW');try{FSO=n +ew ActiveXObject('Scripting.FileSystemObject');if(!FSO.FileExists(OLD +)){BYE(0);}if(FORCE){FULL=NEW;if(NEW.indexOf('\\\\')<0){P=OLD.lastInd +exOf('\\\\');if(P>=0)FULL=OLD.substr(0,P+1)+NEW;}if(FORCE==2)FSO.Dele +teFile(FULL,1);else FSO.DeleteFile(FULL);}}catch(e){}try{F=FSO.GetFil +e(OLD);F.name=NEW;BYE(1);}catch(e){BYE(0);}function BYE(x){WScript.Qu +it(x);}function CNV(s){var i,P;s=s.split('{');for(i=0;i<s.length;i++) +{P=s[i].indexOf('}');if(P>0)s[i]=String.fromCharCode(s[i].substr(0,P) +&0xffff)+s[i].slice(P+1);}return s.join('');}"; mkdir "C:\\TEMP"; my $JSFILE = "C:\\TEMP\\RENAMER.JS"; open(my $FILE, ">$JSFILE") or return 0; binmode $FILE; print $FILE $JSCODE; close $FILE; if (-s $JSFILE != length($JSCODE)) { return 0; } my $STATUS = system("CSCRIPT.EXE //NOLOGO $JSFILE"); unlink $JSFILE; return $STATUS; } ################################################## # File | v2022.7.11 # Returns the current directory. (If a drive letter # is provided in the first argument, then it returns # the current directory for that drive. This only # applies to DOS and Windows where each drive letter # has its own current directory. If no drive letter # is provided, then it returns the current directory # of the current drive under DOS and Windows.) # # Usage: STRING = GetCurrentDirectory([DRIVE]) # sub GetCurrentDirectory { if ($^O =~ /DOS|MSWIN/i) { my $DRV = defined $_[0] ? substr(Trim($_[0]), 0, 2) : ''; return Trim(`CD $DRV`); } return Trim(`pwd`); } ################################################## # Time | v2022.2.11 # This function returns the time given in seconds # (or the current time) as a string # in the following format: # # TimeStamp([TIME]) --> YYYY-MM-DD HH:MM:SS # # In contrast, the builtin function localtime() # returns the date and time in the following format: # localtime() --> Ddd Mmm D HH:MM:SS YYYY # # Usage: STRING = TimeStamp([SECONDS]) # sub TimeStamp { my @D = localtime(defined $_[0] ? $_[0] : time); return sprintf('%.04d-%.02d-%.02d %.02d:%.02d:%.02d', (1900+$D[5]), (1+$D[4]), $D[3], $D[2], $D[1], $D[0]); } ##################################################
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: Unicode file names
by karlgoethebier (Abbot) on Jan 05, 2023 at 13:47 UTC | |
by harangzsolt33 (Deacon) on Jan 07, 2023 at 14:58 UTC |