gene 337..2799 /gene="thrA" /locus_tag="t0002" /db_xref="GeneID:1066974" CDS 337..2799 /gene="thrA" /locus_tag="t0002" /note="multifunctional homotetrameric enzyme that catalyzes the phosphorylation of aspartate to form aspartyl-4-phosphate as well as conversion of aspartate semialdehyde to homoserine; functions in a number of amino acid biosynthetic pathways" /codon_start=1 /transl_table=11 /product="bifunctional aspartokinase I/homeserine dehydrogenase I" /protein_id="NP_803887.1" /db_xref="GI:29140545" /db_xref="GeneID:1066974" /translation="MRVLKFGGTSVANAERFLRVADILESNSRQGQVATVLSAPAKIT NHLVAMIEKTIGGQDALPNISDAERIFSDLLAGLASAQPGFPLARLKMVVEQEFAQIK HVLHGISLLGQCPDSINAALICRGEKMSIAIMAGLLEARGHRVTVIDPVEKLLAVGHY LESTVDIAESTRRIAASQIPADHMILMAGFTAGNEKGELVVLGRNGSDYSAAVLAACL RADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQF QIPCLIKNTGNPQAPGTLIGASSDDDNLPVKGISNLNNMAMFSVSGPGMKGMIGMAAR VFAAMSRAGISVVLITQSSSEYSISFCVPQSDCARARRAMQDEFYLELKEGLLEPLAV TERLAIISVVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATT GVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQTWLKNKHIDLRVCGVANSKA LLTNVHGLNLDNWQAELAQANAPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYAD FLREGFHVVTPNKKANTSSMDYYHQLRFAAAQSRRKFLYDTNVGAGLPVIENLQNLLN AGDELQKFSGILSGSLSFIFGKLEEGMSLSQATALAREMGYTEPDPRDDLSGMDVARK LLILARETGRELELSDIVIEPVLPDEFDASGDVTAFMAHLPQLDDAFAARVAKARDEG KVLRYVGNIEEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAG NDVTAAGVFADLLRTLSWKLGV" gene 2801..3730 /gene="thrB" /locus_tag="t0003" /db_xref="GeneID:1066981" CDS 2801..3730 /gene="thrB" /locus_tag="t0003" /note="catalyzes the formation of O-phospho-L-homoserine from L-homoserine in threonine biosynthesis from asparate" /codon_start=1 /transl_table=11 /product="homoserine kinase" /protein_id="NP_803888.1" /db_xref="GI:29140546" /db_xref="GeneID:1066981" /translation="MVKVYAPASSANMSVGFDVLGAAVTPVDGTLLGDVVSVEAADHF RLHNLGRFADKLPPEPRENIVYQCWERFCQALGKTIPVAMTLEKNMPIGSGLGSSACS VVAALVAMNEHCGKPLNDTRLLALMGELEGRISGSIHYDNVAPCFLGGMQLMIEENGI ISQQVPGFDEWLWVLAYPGIKVSTAEARAILPAQYRRQDCIAHGRHLAGFIHACYSRQ PQLAAALMKDVIAEPYRARLLPGFSQARQAVSEIGALASGISGSGPTLFALCDKPETA QRVADWLSKHYLQNQEGFVHICRLDTAGARVVG" #### ORIGIN 1 agagattacg tctggttgca agagatcata acaggggaaa ttgattgaaa ataaatatat 61 cgccagcagc acatgaacaa gtttcggaat gtgatcaatt taaaaattta ttgacttagg 121 cgggcagata ctttaaccaa tataggaata caagacagac aaataaaaat gacagagtac 181 acaacatcca tgaaccgcat cagcaccacc accattacca ccatcaccat taccacaggt ... 4791781 acgcgcgcgc cttttacgcc tgctaaccac tctggaggcg gccgatgacc acaaattaac 4791841 cgactggcta caacagcgaa tcggcctgct gggacagcga gatacggcaa tgttgcaccg 4791901 tttggtccat gatattgaaa aaaaactaac aaaataacgt gttgtaattt ttaaaataat 4791961 a // #### #!/usr/bin/perl use warnings; use strict; use Getopt::Std; local $/; our %opts; getopts('hf:', \%opts); die("Usage: uv_mutant.pl -f .gbk\nAdd -h for html output\n") unless $opts{f}; my $file = $opts{f}; my $genome; my $total_mutations; open(FH, $file) or die "File couldn't be opened"; my $contents = ; close(FH); #Extract the entire genome $contents =~ m#ORIGIN(.+?)//#s or die "No genome data found."; $genome = $1; # Remove extraneous characters, make it one big long string to use substr position on it $genome =~ s/[\d\s]+//g; # Calculate total possible mutations while( $genome =~ /[ct](?=[ct])/g ) { $total_mutations++; } #print "\nTotal possible mutations (pyramidine dimerizations): $total_mutations\n\n"; # Extract all the gene definitions, end at protein translation. my @genes; @genes = $contents =~ m#(? $geneid, prod_pro => $gene_product, gene_mutants => $gene_mutations, mutant_prob => $probability }; #printf "%-20s%-10d%-25d%.5f%% %s\n", $gene_name, $geneid, $gene_mutations, $probability, $gene_product; } } if($opts{h}) { html_out($total_mutations, %mutant_genes) }else{ print "UV Mutation (pyramidine dimerization) Analysis\n"; print "Total possible mutations in genome: $total_mutations\n\n"; print "\nGenes sorted by UV mutation probability:\n", "=" x 65, "\n"; foreach (sort by_descending_probability keys %mutant_genes) { printf "%-20s%.5f%% %s\n", $_, $mutant_genes{$_}{mutant_prob}, $mutant_genes{$_}{prod_pro}; } } sub by_descending_probability { $mutant_genes{$b}{mutant_prob} <=> $mutant_genes{$a}{mutant_prob}; } sub html_out { my $total_muts = shift; print "\n\n\n"; print "

UV Mutant Analysis

\n"; print "Total Possible mutations in Genome: $total_muts
\n"; print "Gene mutations sorted by decending probability of mutation
\n"; print "\n\n"; #my %mutant_genes = shift; #Gives an odd numbered hash assignment error when prototyped foreach (sort by_descending_probability keys %mutant_genes) { print "\n"; } print "
GenePossible Gene MutationsMutation Probability (%)Gene Product
$_$mutant_genes{$_}{gene_mutants}$mutant_genes{$_}{mutant_prob}$mutant_genes{$_}{prod_pro}
\n"; print "\n"; } ##
## #!/usr/bin/perl use warnings; use strict; use Getopt::Std; use Number::Format qw(:subs); undef $/; my $num_precision = 5; our %opts; getopts('hf:', \%opts); die("Usage: uv_mutant.pl -f .gbk\nAdd -h for html output\n") unless $opts{f}; my $file = $opts{f}; my $genome; open(FH, '<', $file) or die "File couldn't be opened: $!"; my $contents = ; close(FH); #Extract the entire genome $contents =~ m#ORIGIN(.+?)//#s or die "No genome data found."; $genome = $1; # Remove extraneous characters, make it one big long string to use substr position on it $genome =~ s/[\d\r\n\s]+//g; # Calculate total possible mutations my %mutations = find_possible_mutations($genome); # Extract all the gene definitions, end at protein translation. my @genes; @genes = $contents =~ m#(? format_number($gene_mutations{tt}/$mutations{tt}*100, $num_precision), pct => format_number($gene_mutations{ct}/$mutations{ct}*100, $num_precision), pcc => format_number($gene_mutations{cc}/$mutations{cc}*100, $num_precision), ptotal => format_number($gene_mutations{total}/$mutations{total}*100, $num_precision) ); #Pull out GeneID (if exists) if( $gene =~ m#/db_xref="GeneID:(\d+)"# ) { $geneid = $1; } #Pull out Protein Product, if exists if( $gene =~ m#/product="([^"]+)"# ) { $gene_product = $1; $gene_product =~ s/\n\s*/ /g; #Clear out newlines and indentation } $mutant_genes{$gene_name} = { gene_id => $geneid, prod_pro => $gene_product, %gene_mutations, %probability }; } } if($opts{h}) { html_out($mutations{total}, \%mutant_genes) }else{ print "UV Mutation (pyramidine dimerization) Analysis\n"; print "Total possible mutations in genome: $mutations{total}\n\n"; print "\nGenes sorted by UV mutation probability:\n", "=" x 65, "\n"; foreach (sort by_descending_probability keys %mutant_genes) { printf "%-20s%.5f%% %s\n", $_, $mutant_genes{$_}{ptt}, $mutant_genes{$_}{prod_pro}; } } sub by_descending_probability { $mutant_genes{$b}{ptt} <=> $mutant_genes{$a}{ptt}; } sub html_out { my ($total_muts, $mutant_ref) = @_; my %mutant_genes = %{$mutant_ref}; print "\n\n\n"; print "

UV Mutant Analysis

\n"; print "Total Possible mutations in Genome: $total_muts
\n"; print "Gene mutations sorted by decending probability of mutation
\n"; print "\n\n"; #my %mutant_genes = shift; #Gives an odd numbered hash assignment error when prototyped foreach (sort by_descending_probability keys %mutant_genes) { print "\n"; } print "
GenePossible Gene Mutations TTPossible Gene Mutations CTPossible Gene Mutations CCMutation Probability TT(%)Mutation Probability CT(%)Mutation Probability CC(%)Gene Product
$_$mutant_genes{$_}{tt}$mutant_genes{$_}{ct}$mutant_genes{$_}{cc}$mutant_genes{$_}{ptt}$mutant_genes{$_}{pct}$mutant_genes{$_}{pcc}$mutant_genes{$_}{prod_pro}
\n"; print "\n"; } sub find_possible_mutations { my $genome = shift; my %mutations = ( tt => 0, ct => 0, cc => 0, total => 0 ); # Set all values to zero to start incase no possible sites found. # Find all possible Thymidine dimerizations (most common dimerization) while( $genome =~ /t(?=t)/g ) { $mutations{tt}++; } # Find all possible heterogeneous dimerization sites (less common) while( $genome =~ /c(?=t)/g ) { $mutations{ct}++; } while( $genome =~ /t(?=c)/g ) { $mutations{ct}++; } # Find all possible Cystine dimerization sites (least common) while( $genome =~ /c(?=c)/g ) { $mutations{cc}++; } # Store the total mutations for later calculations $mutations{total} = $mutations{tt} + $mutations{ct} + $mutations{cc}; return %mutations; }