#!/usr/bin/perl -w use strict; use Getopt::Long; #usage example: perl GetbackIDs.pl -p /path_to_files -e [table file extension] #requires a table file and a "IDs database" in ".txt" format that share their name my ($path, $ext); GetOptions( 'path=s' => \$path, 'extension=s' => \$ext, ); print "$path\n"; chdir $path or die "ERROR: Unable to enter $path: $!\n"; opendir (TEMP , "."); my @files = readdir (TEMP); closedir TEMP; print "@files\n"; my $name; my @db; for my $file (@files) { if($file=~/(\w+).$ext/){ $name = "$1"; print"This is the Filename: $file\n"; open (INFILE, "$file") || die ("cannot open input file"); chomp(my @data = ); my$file2= "$name.bd"; print"This is the DBname:$file2\n"; open (DB, "$file2") || die ("cannot open input file"); chomp(@db = ); } #Edition "on the fly" via One-Liner for(@db){ my ($dbid,$firstid) = split(/\t/, $_); chomp $firstid; print"This is my $dbid and its $firstid\n"; ##ONELINER #if id matches, replace id my$susti=`perl -pi -e 's/$dbid/$firstid/g' $name.$ext`; } } #### #Database of table names and new names Aspergillus_clavatus_1 XP_001276684.1 pectate lyase, putative [Aspergillus clavatus NRRL 1] Aspergillus_fumigatus_2 XP_001276694.1 conserved hypothetical protein [Aspergillus fumigatus NRRL 1] Aspergillus_flavus_3 XP_001276726.1 tyrosinase central domain protein [Aspergillus flavus NRRL 1] Aspergillus_terreus_4 XP_001276738.1 endoglucanase, putative [Aspergillus terreus NRRL 1] #Lines of the table to be renamed Aspergillus_clavatus_1 Aspergillus_flavus_198 Aspergillus_terreus_166 Aspergillus_fumigatus_2 Aspergillus_clavatus_1 Aspergillus_flavus_3 Aspergillus_terreus_4 Aspergillus_fumigatus_2 Aspergillus_clavatus_3 Aspergillus_flavus_198 Aspergillus_terreu_166 Aspergillus_fumigatus_16 #Expected result (See that in some cases there's no replacement to be done, if the ID is not present in the names "database" file XP_001276684.1 pectate lyase, putative [Aspergillus clavatus NRRL 1] Aspergillus_flavus_198 Aspergillus_terreus_166 XP_001276694.1 conserved hypothetical protein [Aspergillus fumigatus NRRL 1] XP_001276684.1 pectate lyase, putative [Aspergillus clavatus NRRL 1] XP_001276726.1 tyrosinase central domain protein [Aspergillus flavus NRRL 1] XP_001276738.1 endoglucanase, putative [Aspergillus terreus NRRL 1] XP_001276694.1 conserved hypothetical protein [Aspergillus fumigatus NRRL 1] Aspergillus_clavatus_3 Aspergillus_flavus_198 Aspergillus_terreu_166 Aspergillus_fumigatus_16