Balawoo has asked for the wisdom of the Perl Monks concerning the following question:
https://www.developpez.net/forums/d1941017/autres-langages/perl/langage/impossible-convertir-cellule-html-accents-convertis/ When I do a test script like this one, it's working
#!/usr/bin/perl use HTML::Entities; use warnings; #use utf8; #binmode(stdout => utf8); my $unsafe_chars = "< & >"; my $string="<br>àéèçûîùô<>"; print $string, "\n"; print encode_entities($string, "àéèçûîùô");
When I do the same based on an excel file
Block Description, the result is wrong, it' like that#!/usr/bin/perl -w ####################################################### # # Author: Polina Antipova aka Kitsune # # What it does: # This script converts XLS files to XML format compatible with Tes +tlink. # # More info at https://bitbucket.org/kitsuneo/xls2testlink-script/wiki # Suggestion/bugs are also accepted at polina.antipova@gmail.com # # !! Script is very data sensible. Please, be careful with input XLS f +ile !! # ######################################################## use warnings; use Encode qw/encode decode/; use Spreadsheet::ParseExcel; use File::Basename; use feature qw(say); use feature "switch"; #use Text::EtText::EtText2HTML; #use HTML::TextToHTML; use HTML::Entities; sub log_and_print { print (scalar(localtime()), " ", @_, "\n"); print LOGFILE (scalar(localtime()), " ", @_, "\n"); } sub print_log { print LOGFILE (scalar(localtime()), " ", @_, "\n"); } sub print_txt { print TXT (@_); } sub print_xml { print XML (@_); } # Source file and path are taken as script parameter if (@ARGV) { $full_source_file_path = $ARGV[0]; } else { die (" ! Please, enter file name as script parameter. \n See READM +E.txt for mor details"); } # Parse @ARGV, get path and file from script parameter ($source_file_name, $source_file_path, $source_file_suffix) = filepars +e($full_source_file_path, qr/\.[^.]*/); # Define all files and paths $source_file = "$source_file_name$source_file_suffix"; $logs_dir = "logs"; $converted_dir = "converted_files"; # Create folders for logs and converted files chdir ($source_file_path); mkdir ($logs_dir); mkdir ($converted_dir); # Output files get names after source file $txt_file = ("$source_file_name\_parsed.txt"); $xml_file = ("$source_file_name\_resulted.xml"); $file_log = ("$source_file_name\_debug.log"); # Files + folders knocked into variables $txt = "$converted_dir/$txt_file"; $xml = "$converted_dir/$xml_file"; $log = "$logs_dir/$file_log"; open (LOGFILE, "> $log") || die("Could not open file! $log"); open (TXT, "> $txt") || die("Could not open file! $txt"); log_and_print ("**************"); log_and_print ("Start working"); log_and_print ("STEP1: converting $source_file to TXT \n"); # see http://search.cpan.org/~jmcnamara/Spreadsheet-ParseExcel-0.59/li +b/Spreadsheet/ParseExcel.pm#SYNOPSIS # STEP1: The data from XLS file is stored in temp TXT file my $parser = Spreadsheet::ParseExcel->new(); my $workbook = $parser->parse($source_file); if ( !defined $workbook ) { die $parser->error(), ".\n"; } for my $worksheet ( $workbook->worksheets() ) { my ( $row_min, $row_max ) = $worksheet->row_range(); my ( $col_min, $col_max ) = $worksheet->col_range(); for my $row ( $row_min .. $row_max ) { for my $col ( $col_min .. $col_max ) { my $cell = $worksheet->get_cell( $row, $col ); next unless $cell; # Every cell is readed as row in .txt. Output format "R +ow, Col, value" $cell_unformatted = $cell->unformatted(); $cell_unformatted =~ s/\n/<br>/g; print_txt "$row;;$col;;", $cell_unformatted ,"\n"; } } } close(TXT); log_and_print ("STEP1: OK \n Result: file $txt is created \n"); log_and_print ("STEP2: converting $txt to XML \n"); # HERE THE KINGDOM OF CONVERTING BEGINS open (XML, "> $xml") || die("Could not open file! $xml"); # Read TXT to an array open (SOURCE, "$txt") || die ("Could not open file! $txt"); @parsed_data = <SOURCE>; close (SOURCE); # Initialize auxiliary variables $source_row_counter = 0; $commented = "//"; $header_of_xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<req +uirements>\n"; $end_of_kw = "</requirements>"; # Print first row of XML file print_xml $header_of_xml; # Read every row from .txt array. foreach $file_row (@parsed_data){ chomp $file_row; # Cell is processed according to its positions and value @splited_row = split(/;;/, $file_row); $cell_row_position = $splited_row[0]; $cell_col_position = $splited_row[1]; $cell_value = $splited_row[2]; print_log ("Debug: Current position: row - $cell_row_position, col + - $cell_col_position"); # First cell in excel file is expected to be mark of new test case + (one test case - one excel row) # If cell is first, all test case variable are cleaned up if ($cell_col_position == 0){ $test_requirement = ""; $tc_docid_tag = ""; $tc_title_tag = ""; $tc_version_tag = ""; $tc_revision_tag = ""; $tc_node_order_tag = ""; $tc_description_tag = ""; $tc_status_tag = ""; $tc_type_tag = ""; $tc_coverage_tag = ""; # TODO shift positions in arrays: in excel rows starts from 1, + in parsed excel from 0 print_log ("Debug: Start accumulate test case at row position +$cell_row_position"); } # Each value is checked if it is commented or not # Commented if cell is empty or first cell symbol is double slash +"//" no warnings; $if_commented = substr $cell_value, 0, 2; # Skip cells from Row1 and Column A - reserved for Header and comm +ents if (($cell_row_position == 0) and ($cell_col_position == 0)) { print_log ("File: Skip \"Header or Comments\". Cell position: +row - $cell_row_position, col - $cell_col_position"); } # Skip empty cells # Check that 1st test case has a test suite name (!mandatory) # Skip cells started with "//" elsif ($if_commented eq $commented) { print_log ("File: Skip empty cell. Cell begins with \"$if_comm +ented\", cell position: row - $cell_row_position, col - $cell_col_pos +ition"); log_and_print ("! File: Test suite (row2) must not be commente +d, otherwise .xml will be broken!") if (($cell_row_position == 1)&& ( +$cell_col_position ==1)); log_and_print ("! File: Test case name (row", $cell_row_positi +on+1, ") must not be commented, otherwise .xml will be broken!") if ( +$cell_col_position == 2); } # Accumulate test case record from all other values # Script use hard-coded values (columns numbers) and expects preci +se work with source file else { given ($cell_col_position) { # Block <requirement> # Block <docid> when (0) {; my $tc_docid = "$cell_value"; $tc_docid_tag = "<requirement><docid ><![CDATA +[$tc_docid]]></docid>\n"; print_log("Ligne\n $cell_col_position\n $tc_docid_ +tag") } # Block <title> when (1) { my $tc_title = "$cell_value"; $tc_title_tag = "<title><![CDATA[$tc_title]]>< +/title>\n"; } # Block <version> when (2) { my $tc_version = "$cell_value"; $tc_version_tag = "<version>$tc_version</versi +on>\n"; } # Block <revision> when (3) { my $tc_revision = "$cell_value"; $tc_revision_tag = "<revision> $tc_revision </ +revision>\n"; } # Block <node_order> when (4) { my $tc_node_order = "$cell_value"; $tc_node_order_tag = "<node_order>$tc_node +_order</node_order>\n"; } # Block <description> when (5) { my $tc_description = shift; my $tc_description = "$cell_value"; $t = encode_entities($tc_description, "àéèçûîùôâ"); $tc_description_tag = "<description><![CDATA[$ +t]]></description>\n"; } # Block <status> when (6) { my $tc_status = "$cell_value"; $tc_status_tag = "<status><![CDATA[V]]></status>\n +"; } # Block <type> when (7) { my $tc_type = "$cell_value"; $tc_type_tag = "<type><![CDATA[3]]></type>\n"; } # Block <excepted_coverage> when (8) { my $tc_coverage = "$cell_value"; $tc_coverage_tag = "<expected_coverage><![CDATA[1] +]></expected_coverage>\n"; # Final accumulated requirement $test_requirement = "$tc_docid_tag $tc_title_tag $tc_version_t +ag $tc_revision_tag $tc_node_order_tag $tc_description_tag $tc_status +_tag $tc_type_tag $tc_coverage_tag </requirement>\n"; print_xml $test_requirement; } # When cell is last in the excel row - call to print test case to +file if ($cell_col_position == 8) { $source_row_counter++; print_log ("File: Print accumulated keyword at row position $c +ell_row_position"); print_log ("Fichier $test_requirement"); print_xml $test_requirement; } } } } # Close XML file - double </testsuite> print_xml ($end_of_kw, "\n"); # deduct one row from row counter (reserved for headers) for get numb +er of processed Excel test cases $source_row_counter = $source_row_counter -1; log_and_print ("STEP2: OK \n Result: file $xml is created \n"); log_and_print ("Processed $source_row_counter rows from $source_file +\n See debug log for more details.\n\n"); close(XML); close(LOGFILE);
Thanks for your helpéiùûôêçà<><br>aqwzsx
|
|---|