#!/usr/bin/perl use warnings; use diagnostics; # Title: convertDataToGeneSymbol.pl # Author: Nicholas Bense # Date: 11/4/15 # Open a filehandle to read file #1 open(INF1,"<",'/scratch/Drosophila/fb_synonym_fb_2014_05.tsv' ) or die $!; # Open a filehandle to read file #2 open(INF2,"<",'/scratch/Drosophila/FlyRNAi_data_baseline_vs_EGF.txt') or die $!; # Open a filehandle to read file #3 open(INF3,"<",'/scratch/Drosophila/gene_association.goa_fly') or die $!; # Open a filehandle to write new file open(OUTF1,">",'FLYRNAi_data_baseline_vs_EGFSymbol.txt') or die $!; # Initialize a hash for the gene symbol conversion my %geneSymbolConversion; # Read Input File 1 line by line while (){ # Get rid of whitespace chomp; # Split the line my @INF1Array = split("\t", $_); # Filter entries starting with FBgn while ($INF1Array[0] =~ /(^FBgn\d+)/){ # Assign column 1 to hash key scalar my $geneID = $INF1Array[0]; # Assign column 2 to hash value scalar my $geneSymbol = $INF1Array[1]; # Assign key and value to hash $geneSymbolConversion{$geneID} = $geneSymbol; } } # Read Input File 2 line by line while (){ # Get rid of whitespace chomp; # Initialize key value in case it is not found my $geneSymbol = "NA"; # Split the line on tabs my ($geneID, $EGF_Baseline, $EGF_Stimulus) = split("\t", $_); # Check if the codon is present in the hash if (defined $geneSymbolConversion{$geneID}){ # Get the value associated with the codon from the hash $geneSymbol = $geneSymbolConversion{$geneID}; } # Join data and print to output file print OUTF1 join( "\t", $geneID, $geneSymbol, $EGF_Baseline, $EGF_Stimulus), "\n"; }