VIR19;VIR20;VIR;B744; VIR19;VIR207;VIR;B744; VIR19;VIR21;VIR;B744; VIR19;AF34B;VIR;B744; VIR19;AF34C;VIR;B744; VIR19;IB580F;VIR;B744; VIR19;IB581F;VIR;B744; VIR19;AL34BC;VIR;B744; VIR19;AL34AC;VIR;B744; VIR19;ADH00A;VIR;B744; VIR19;ADH01A;VIR;B744; AL34BC;IB580F;VIR;B744; AL34BC;IB581F;VIR;B744; AL34BC;AL34BC;VIR;B744; AL34BC;AL34AC;VIR;B744; AL34BC;ADH00A;VIR;B744; AL34BC;ADH01A;VIR;B744; #### AL34BC;AL34AC;VIR;B744; VIR19;VIR20;VIR;B744; #### AL34BC;AL34BC;VIR;B744; (because they are the same name) VIR19;VIR21;VIR;B744; (because the names are too far) #### #!/usr/bin/perl use strict; use warnings; use diagnostics; use Cwd; use Fcntl qw(SEEK_SET); my $Current_Dir = getcwd; print STDOUT "the current directory is $Current_Dir"; ##################################################################### my $file = "$ARGV[0]"; open INFILE_1, '<', $file or die "Can't open '$file' : $!\n"; ########################################################################### my $outfile = "analysis_file"; open OUTFILE, '>', $outfile or die "Can't open '$outfile' : $!\n"; # the hash tables my %decrement; my %increment; # Synonymous with existance. my $start_of_DATA = tell INFILE_1; while (my $line = ) { chomp($line); my @ELEMENTS = split(/;/,$line); # suppresses the /n of the current line my $Key; my $Suffix_1; my $Suffix_2; my $Suffix_next; my $item_next; my $item = $ELEMENTS[0]; # for example : VIR19, VIR20, VIR207 (only letters then two or three numbers) # the suffix will be the two numbers if ( $item =~ m{\A ([A-Z]+) (\d\d | \d\d\d) \z}x) { $Key = $1; $Suffix_1 = $2; # to see in which test the data pass print STDOUT "1\n"; ($Suffix_next = $Suffix_1)++; $item_next = $Key . $Suffix_next; $increment{$item} = $item_next; $decrement{$item_next} = $item; } # L'expression if ... elsif ... else permet d'enchaîner une série d'instructions et évite d'avoir à imbriquer des instructions if. # for example : AF34B,AF34C ... (two letters then two numbers and a letter at the end of the string) # the suffix will be the two numbers and the last letter # because ++($foo = 'B'); => C elsif ($item =~ m{\A ([A-Z][A-Z]\d\d)([A-Z]) \z}x){ $Key = $1; $Suffix_1 = $2; print STDOUT "2\n"; ($Suffix_next = $Suffix_1)++; $item_next = $Key . $Suffix_next . $Suffix_2; $increment{$item} = $item_next; $decrement{$item_next} = $item; } # for example : ADH00A,ADH01A ... (three letters then two numbers then a letter) # the suffix will be the two numbers # because ++($foo = 'B'); => C elsif ($item =~ m{\A ([A-Z][A-Z][A-Z])(\d\d)([A-Z]) \z}x){ $Key = $1; $Suffix_1 = $2; $Suffix_2 = $3; print STDOUT "3\n"; ($Suffix_next = $Suffix_1)++; $item_next = $Key . $Suffix_next . $Suffix_2; print STDOUT "$item_next\n"; $increment{$item} = $item_next; $decrement{$item_next} = $item; } # for example : IB580F,IB581F ... (three numbers and a letter at the end of the string) # the suffix will be the three numbers # because ++($foo = '580'); => 581 elsif ($item =~ m{\A (\w+) (\d\d\d) ([A-Z]) \z}x){ $Key = $1; $Suffix_1 = $2; $Suffix_2 = $3; print STDOUT "4\n"; ($Suffix_next = $Suffix_1)++; $item_next = $Key . $Suffix_next . $Suffix_2; print STDOUT "$item_next\n"; $increment{$item} = $item_next; $decrement{$item_next} = $item; } # for example : AL34BC and AL34AC (two numbers and two letters at the end of the string) # the suffix will be the three numbers # because ++($foo = '34B'); => 34C elsif ($item =~ m{\A (\w+) (\d\d[A-Z]) ([A-Z]) \z}x){ $Key = $1; $Suffix_1 = $2; $Suffix_2 = $3; print STDOUT "5\n"; ($Suffix_next = $Suffix_1)++; $item_next = $Key . $Suffix_next . $Suffix_2; print STDOUT "$item_next\n"; $increment{$item} = $item_next; $decrement{$item_next} = $item; } else { die "Invalid data: $_"; } # Reparse DATA seek(INFILE_1, $start_of_DATA, SEEK_SET); while (my $line_DATA = ) { chomp($line_DATA); my @ELEMENTS_DATA = split(/;/,$line_DATA); my $item_DATA = $ELEMENTS_DATA[1]; print STDOUT "le item de DATA est $item_DATA\n"; print STDOUT "le item est $item\n"; if ($item eq $decrement{ $item_DATA } ) { print OUTFILE "$item;$item_DATA\n"; } elsif ($item eq $increment{$item_DATA } ) { print OUTFILE "$item;$item_DATA\n"; } print OUTFILE "\n"; } #End of the first WHILE } close INFILE_1; close OUTFILE; #### print STDOUT "le item est $item\n"; if ($item eq $decrement{ $item_DATA } ) { print OUTFILE "$item;$item_DATA\n"; } elsif ($item eq $increment{$item_DATA } ) { print OUTFILE "$item;$item_DATA\n";