my %dict = map { chomp; split "\t", $_, 2 } <$fh>;
####
> grpsTbl <- read.csv("Orthogroups_3.csv", header=T, sep = "\t", row.names = 1, stringsAsFactors=F)
####
"PBANKA_0000600, PBANKA_0000701, PBANKA_0000801, PBANKA_0001001, PBANKA_0001101, PBANKA_0001201, PBANKA_0001301, PBANKA_0001401, PBANKA_0001501, PBANKA_0006300, PBANKA_0006401, PBANKA_0006501, PBANKA_0006600, PBANKA_0006701,"
####
"PmUG01_00010100.1-p1, PmUG01_00010200.1-p1, PmUG01_00010400.1-p1, PmUG01_00010500.1-p1, PmUG01_00010600.1-p1, PmUG01_00010700.1-p1, PmUG01_00010800.1-p1, PmUG01_00010900.1-p1, PmUG01_00011000.1-p1, PmUG01_00011300.1-p1, PmUG01_00011400.1-p1, PmUG01_00011600.1-p1, PmUG01_00011700.1-p1, PmUG01_00012100.1-p1, PmUG01_00012200.1-p1,"
####
PVX_088085 Protein processing in endoplasmic reticulum
PVX_114095 Protein processing in endoplasmic reticulum
PVX_123055 Ribosome biogenesis in eukaryotes
PYYM_1032000 -
PYYM_1120600 -
PCYB_031930 Purine metabolism; Metabolic pathways; DNA replication; Pyrimidine metabolism
####
for my $k (keys %dict) {
my $v=$dict{$k};
warn 'for lookup:'.$k.' tab in field:'.$v."\n" if ($v=~"\t");
}
####
for lookup:PVX_114095 tab in field:Protein processing in endoplasmic reticulum
for lookup:PYYM_1032000 tab in field:-
for lookup:PVX_088085 tab in field:Protein processing in endoplasmic reticulum
####
# This script was excerpted from http://stackoverflow.com/questions/11678939/replace-text-based-on-a-dictionary
use strict;
use warnings;
#use Text::CSV;
use Data::Dumper;
local $Data::Dumper::Deepcopy=1;
local $Data::Dumper::Purity=1;
local $Data::Dumper::Sortkeys=0;
local $Data::Dumper::Indent=3;
open my $fh, '<', 'kegg_pathway_title.txt' or die $!;
my %dict = map { chomp; split "\t", $_, 2 } <$fh>;
warn Dumper \%dict;
for my $k (keys %dict) {
my $v=$dict{$k};
warn 'for lookup:'.$k.' tab in field:'.$v."\n" if ($v=~"\t");
}
#my %dict = map { chomp; split ' ', $_, 2 } <$fh>;
my $re = join '|', keys %dict;
#close $fh;
open $fh, '<', 'Orthogroups_3.csv' or die $!;
while (<$fh>) {
print $. ."\n";
next if $. < 2;
my @a0=split("\t",$_); warn Dumper \@a0;
s/($re)/$dict{$1}/g;
print;
}