Ellhar has asked for the wisdom of the Perl Monks concerning the following question:
#! /usr/bin/perl -w # Script to Create Database files use strict; my $output; my $infile; my $summary; my $input = "C:\\Elliott\\Database\\Repository\\"; my $line; my $wait; my $linecount = 0; my @fieldsize; my @temparray; my $i; my @inputfiles = ("gene2accession", "gene2go","gene2sts","gene2unigene +","gene2pubmed","gene2refseq","gene_history","gene_info","gene_refseq +_uniprotkb_collab","generifs_basic","hiv_interactions","interactions" +); $summary = $input."myentrezgenefilesummary.txt"; open (SUMMARY,"> $summary") or die "Cannot open $summary: $!"; print SUMMARY "This file lists the processed files their field and max +imum field size\n"; print SUMMARY "This data can be used to determine the varchar field si +zes in the novel therapies SQL database\n"; foreach (@inputfiles) { $linecount = 0; #get input and output files and open for reading/writing $infile = $input.$_; $output = $input.$_.".txt"; open (INFILE, "< $infile") or die "Cannot open $input: $!"; open (OUTFILE,"> $output") or die "Cannot open $output: $!"; print "FILE ", $infile, " OPEN", "\n"; while ($line = <INFILE>) { chomp $line; if ($linecount == 0) { #if first line print field names #print "SUMMARY is".(is_writable_fh(\*SUMMARY)?"":"n't +")." writable.\n"; if ($_ =~ /gene2accession/) { print OUTFILE "Taxon\tGeneID\tStatus\tRNA_Nucleoti +de_Accession\tRNA_Nucleotide_gi\tProtein_Accession\tProtein_gi\tGenom +ic_Nucleotide_Accession\tGenomic_Nucleotide_gi\tGenomic_Accession_Sta +rt_Pos\tGenomic_Accession_End_Pos\tOrientation\tAssembly\n"; print SUMMARY "Field lengths for file $_\n"; print SUMMARY "Taxon\tGeneID\tStatus\tRNA_Nucleoti +de_Accession\tRNA_Nucleotide_gi\tProtein_Accession\tProtein_gi\tGenom +ic_Nucleotide_Accession\tGenomic_Nucleotide_gi\tGenomic_Accession_Sta +rt_Pos\tGenomic_Accession_End_Pos\tOrientation\tAssembly\n"; } elsif ($_ =~ /gene2go/){ print OUTFILE "Taxon\tGeneID\tGO_ID\tEvidence\tQua +lifier\tGO_term\tPubMedID\tCategory\n"; print SUMMARY "Field lengths for file $_\n"; print SUMMARY "Taxon\tGeneID\tGO_ID\tEvidence\tQua +lifier\tGO_term\tPubMedID\tCategory\n"; } elsif ($_ =~ /gene2pubmed/){ print OUTFILE "Taxon\tGeneID\tPubMedID\n"; print SUMMARY "Field lengths for file $_\n"; print SUMMARY "Taxon\tGeneID\tPubMedID\n"; } elsif ($_ =~ /gene2refseq/){ print OUTFILE "Taxon\tGeneID\tStatus\tRNA_Nucleoti +de_Accession\tRNA_Nucleotide_gi\tProtein_Accession\tProtein_gi\tGenom +ic_Nucleotide_Accession\tGenomic_Nucleotide_gi\tGenomic_Accession_Sta +rt_Pos\tGenomic_Accession_End_Pos\tOrientation\tAssembly\n"; print SUMMARY "Field lengths for file $_\n"; print SUMMARY "Taxon\tGeneID\tStatus\tRNA_Nucleoti +de_Accession\tRNA_Nucleotide_gi\tProtein_Accession\tProtein_gi\tGenom +ic_Nucleotide_Accession\tGenomic_Nucleotide_gi\tGenomic_Accession_Sta +rt_Pos\tGenomic_Accession_End_Pos\tOrientation\tAssembly\n"; } elsif ($_ =~ /gene2sts/){ print OUTFILE "GeneID\tUniSTSID\n"; print SUMMARY "Field lengths for file $_\n"; print SUMMARY "GeneID\tUniSTSID\n"; } elsif ($_ =~ /gene2unigene/){ print OUTFILE "GeneID\tUnigeneUD\n"; print SUMMARY "Field lengths for file $_\n"; print SUMMARY "GeneID\tUnigeneUD\n"; } elsif ($_ =~ /gene_history/){ print OUTFILE "Taxon\tGeneID\tDiscontinued_GeneID\ +tDiscontinued_Symbol\n"; print SUMMARY "Field lengths for file $_\n"; print SUMMARY "Taxon\tGeneID\tDiscontinued_GeneID\ +tDiscontinued_Symbol\n"; } elsif ($_ =~ /gene_info/){ print OUTFILE "Taxon\tGeneID\tSymbol\tLocusTag\tSy +nonyms\tdbXrefs\tChromosome\tMap_Location\tDescription\tType_Of_Gene\ +tSymbol_From_Nomenclature_Authority\tFull_Name_From_Nomenclature_Auth +ority\tNomenclature_Status\tOther_Designations\tModification_Date\n"; print SUMMARY "Field lengths for file $_\n"; print SUMMARY "Taxon\tGeneID\tSymbol\tLocusTag\tSy +nonyms\tdbXrefs\tChromosome\tMap_Location\tDescription\tType_Of_Gene\ +tSymbol_From_Nomenclature_Authority\tFull_Name_From_Nomenclature_Auth +ority\tNomenclature_Status\tOther_Designations\tModification_Date\n"; } elsif ($_ =~ /gene_refseq_uniprotkb_collab/){ print OUTFILE "NCBI_Protein_Accession\tUniProtKB_P +rotein_Accession\n"; print SUMMARY "Field lengths for file $_\n"; print SUMMARY "NCBI_Protein_Accession\tUniProtKB_P +rotein_Accession\n"; } elsif ($_ =~ /generifs_basic/){ print OUTFILE "Taxon\tGeneID\tPubMedID\tLastUpdate +\tGeneRIFText\n"; print OUTFILE "$line\n"; # no header row in file get field for comparison @fieldsize = split(/\t/, $line); print SUMMARY "Field lengths for file $_\n"; print SUMMARY "Taxon\tGeneID\tPubMedID\tLastUpdate +\tGeneRIFText\n"; } elsif ($_ =~ /hiv_interactions/){ print OUTFILE "Taxon\tGeneID\tProductAccession\tPr +oductName\tInteractionShortName\tInteractorTaxon\tInteractorGeneID\tI +nteractorProdictAccession\tInteractorProductName\tPubMedID\tLastUpdat +e\tGeneRIFText\n"; print OUTFILE "$line\n"; # no header row in file get field for comparison @fieldsize = split(/\t/, $line); print SUMMARY "Field lengths for file $_\n"; print SUMMARY "Taxon\tGeneID\tProductAccession\tPr +oductName\tInteractionShortName\tInteractorTaxon\tInteractorGeneID\tI +nteractorProdictAccession\tInteractorProductName\tPubMedID\tLastUpdat +e\tGeneRIFText\n"; } elsif ($_ =~ /interactions/){ print OUTFILE "Taxon\tGeneID\tProteinAccession\tGe +neName\tKeyPhrase\tInteractorTaxon\tInteractorGeneID\tInteractionType +\tInteractorProductAccession\tInteractorProductName\tComplexID\tCompl +exIDType\tComplexName\tPubMedID\tLastUpdate\tGeneRIFText\tInteraction +ID\tInteractionIDType\n"; print SUMMARY "Field lengths for file $_\n"; print SUMMARY "Taxon\tGeneID\tProteinAccession\tGe +neName\tKeyPhrase\tInteractorTaxon\tInteractorGeneID\tInteractionType +\tInteractorProductAccession\tInteractorProductName\tComplexID\tCompl +exIDType\tComplexName\tPubMedID\tLastUpdate\tGeneRIFText\tInteraction +ID\tInteractionIDType\n"; } else { print "Header line for this input file not definae +d please contact system administator.\n"; exit; } $linecount = 1; } else { #print line of data to outfile print OUTFILE "$line\n"; #get fields to test size against existing. @temparray = split(/\t/, $line); # if elements in array test new values to see if large +r than existing. if (@fieldsize > 0) { #test the field in the line to see if larger than +previous and replace field size value if so. for($i=0; $i<@temparray; $i++) { if (length($fieldsize[$i]) < length($temparray +[$i])) { $fieldsize[$i] = $temparray[$i]; } } } #set fields array else { @fieldsize = split(/\t/, $line); } } } #print "SUMMARY is".(is_writable_fh(\*SUMMARY)?"":"n't")." wri +table.\n"; #print summary field size information to summary file foreach(@fieldsize) { print SUMMARY length($_), "\t"; } print SUMMARY "\n\n"; @fieldsize = (); print "FINSHED PROCESSING ", $infile, "\n"; $linecount =0; close INFILE; close OUTFILE; } close SUMMARY; sub is_writable_fh { my($fh)=@_; local $\=''; return print $fh ''; }
|
---|
Replies are listed 'Best First'. | |
---|---|
Re: Can't write to open writeable Filehandle
by moritz (Cardinal) on May 09, 2008 at 10:44 UTC | |
Re: Can't write to open writeable Filehandle
by roboticus (Chancellor) on May 09, 2008 at 10:55 UTC | |
by Ellhar (Novice) on May 09, 2008 at 11:20 UTC | |
by roboticus (Chancellor) on May 09, 2008 at 11:32 UTC | |
by Ellhar (Novice) on May 09, 2008 at 11:53 UTC | |
Re: Can't write to open writeable Filehandle
by hipowls (Curate) on May 09, 2008 at 12:09 UTC |