Perlfan52 has asked for the wisdom of the Perl Monks concerning the following question:
This must be an internal bug of perl. It is well known that perl had (and probably has) unicode/utf8 issues. I am using Strawberry perl 5.30.0 (built for MSWin32-x64-multi-thread) on windows 10 pro client with recent updates.
Here are the codes and the files that are needed to reproduce this problem.
- The searched files are in http://ftp.freedb.org/pub/freedb/freedb-update-20200201-20200301.tar.bz2
I am extracting this file into C:/MyScripts/freedb-update-20200201-20200301
- The module version of the script C:/MyScripts/search_script_with module.pl is:
- The corresponding module C:/MyScripts/searchFreedb.pm isBEGIN{push(@INC,'C:/MyScripts');} use searchFreedb; mainSearchFreedb('C:/MyScripts/freedb-update-20200201-20200301'); print "End of script\n";
- The standalone version of the script C:/MyScripts/search_script_standalone.pl is:package searchFreedb; use strict; use utf8; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); require Exporter; @ISA = qw(Exporter); @EXPORT = qw(mainSearchFreedb); @EXPORT_OK = qw(mainSearchFreedb); $VERSION = 1.0; $| = 1; ############################################################# # mainSearchFreedb ############################################################# sub mainSearchFreedb { my ($searchdir) = @_; open(FILE, ">C:/MyScripts/sresult_module.txt") || die "$!\n"; binmode FILE, ":utf8"; recursivSearchFreedb($searchdir); close(FILE); } ############################################################# # recursivSearchFreedb ############################################################# sub recursivSearchFreedb { my ($dir) = @_; die "dir $dir!\n" if(!$dir || !(-e $dir && -d $dir)); $dir =~ s/[\/\\]+/\//og; $dir = $dir . '/' if( $dir !~ /\/$/o ); my ($dirname) = ( $dir =~ /^.*\/([^\/]+?)\/*$/o ); opendir(DIR,$dir) || warn __LINE__."$!\n"; my @all_dir_files = readdir(DIR); closedir(DIR); print "Folder: $dir => $dirname\n"; foreach my $dir_file ( sort @all_dir_files ) { $dir_file =~ /^\.+$/o && next; my $abspath = $dir . $dir_file; if( -d $abspath ) { recursivSearchFreedb($abspath); } else { if($dir_file =~ /(^COPYING$|^README$)$)/io) { print "skipping $dir_file\n"; next; } elsif(-z $abspath) { next; } my ($content); open(IN, "<$abspath") || die "$!\n"; while(my $line = <IN>) { next if not $line =~ /^#\s+xmcd/o; $content .= $line; my ($TITLEALL,$DISCID,$GENRE); for(;;) { my $line2 = <IN>; if($line2=~/^\s*DTITLE\s*=(.*)$/o) {$TITLEALL .= $1;} if($line2=~/^\s*DISCID=\s*(.+?)\s*$/o) {$DISCID = $1;} if($line2=~/^\s*DGENRE\s*=(.*)$/o) {$GENRE .= $1;} $content .= $line2; if($line2 =~ /^PLAYORDER=/o) { if( $TITLEALL =~ /Romanti[cqk]/io ) { print FILE "$content\n"; } last; } } } close(IN); } } } ############################################################## # end of package ############################################################## 1;
I am starting the module version withuse strict; use utf8; $| = 1; ############################################################# # recursivSearchFreedb ############################################################# sub recursivSearchFreedb { my ($dir) = @_; die "dir $dir\n" if(!$dir || !(-e $dir && -d $dir)); $dir =~ s/[\/\\]+/\//og; $dir = $dir . '/' if( $dir !~ /\/$/o ); my ($dirname) = ( $dir =~ /^.*\/([^\/]+?)\/*$/o ); opendir(DIR,$dir) || warn __LINE__."$!\n"; my @all_dir_files = readdir(DIR); closedir(DIR); print "Folder: $dir => $dirname\n"; foreach my $dir_file ( sort @all_dir_files ) { $dir_file =~ /^\.+$/o && next; my $abspath = $dir . $dir_file; if( -d $abspath ) { recursivSearchFreedb($abspath); } else { if($dir_file =~ /(^COPYING$|^README$)/io) { print "skipping $dir_file\n"; next; } elsif(-z $abspath) { next; } my ($content); open(IN, "<$abspath") || die "$!\n"; while(my $line = <IN>) { next if not $line =~ /^#\s+xmcd/o; $content .= $line; my ($TITLEALL,$DISCID,$GENRE); for(;;) { my $line2 = <IN>; if($line2=~/^\s*DTITLE\s*=(.*)$/o) {$TITLEALL .= $1;} if($line2=~/^\s*DISCID=\s*(.+?)\s*$/o) {$DISCID = $1;} if($line2=~/^\s*DGENRE\s*=(.*)$/o) {$GENRE .= $1;} $content .= $line2; if($line2 =~ /^PLAYORDER=/o) { if( $TITLEALL =~ /Romanti[cqk]/io ) { print FILE "$content\n"; } last; } } } close(IN); } } } ############################################################ # main starts here ############################################################ open(FILE, ">C:/MyScripts/sresult_standalone.txt") || die "$!\n"; binmode FILE, ":utf8"; recursivSearchFreedb('C:/MyScripts/freedb-update-20200201-20200301'); close(FILE); print "End of script\n";
The result is:
I am starting the standalone version withFolder: C:/MyScripts/freedb-20200201-20200301/ Folder: C:/MyScripts/freedb-20200201-20200301/blues/ Folder: C:/MyScripts/freedb-20200201-20200301/classical/ Folder: C:/MyScripts/freedb-20200201-20200301/country/ Folder: C:/MyScripts/freedb-20200201-20200301/data/ Folder: C:/MyScripts/freedb-20200201-20200301/folk/ Folder: C:/MyScripts/freedb-20200201-20200301/jazz/ Folder: C:/MyScripts/freedb-20200201-20200301/misc/ Folder: C:/MyScripts/freedb-20200201-20200301/newage/ Folder: C:/MyScripts/freedb-20200201-20200301/reggae/ Folder: C:/MyScripts/freedb-20200201-20200301/rock/ Folder: C:/MyScripts/freedb-20200201-20200301/soundtrack/ End of script
The result is (it crashes very quickly):
Any ideas why the standalone version crashes? Can you reproduce the problem on your own pc? Thank you for your answers or ideas.Folder: C:/MyScripts/freedb-20200201-20200301/ Folder: C:/MyScripts/freedb-20200201-20200301/blues/ Malformed UTF-8 character: \xf6\x6e\x20\x26 (unexpected non-continuati +on byte 0x6e, immediately after start byte 0xf6; need 4 bytes, got 1) + in pattern match (m//) at C:\MYSCRI~1\SEARCH~2.PL line 55, <IN> line + 67. Malformed UTF-8 character (fatal) at C:\MYSCRI~1\SEARCH~2.PL line 55, +<IN> line 67.
|
|---|