##
ORIGIN
1 agagattacg tctggttgca agagatcata acaggggaaa ttgattgaaa ataaatatat
61 cgccagcagc acatgaacaa gtttcggaat gtgatcaatt taaaaattta ttgacttagg
121 cgggcagata ctttaaccaa tataggaata caagacagac aaataaaaat gacagagtac
181 acaacatcca tgaaccgcat cagcaccacc accattacca ccatcaccat taccacaggt
...
4791781 acgcgcgcgc cttttacgcc tgctaaccac tctggaggcg gccgatgacc acaaattaac
4791841 cgactggcta caacagcgaa tcggcctgct gggacagcga gatacggcaa tgttgcaccg
4791901 tttggtccat gatattgaaa aaaaactaac aaaataacgt gttgtaattt ttaaaataat
4791961 a
//
####
#!/usr/bin/perl
use warnings;
use strict;
use Getopt::Std;
local $/;
our %opts;
getopts('hf:', \%opts);
die("Usage: uv_mutant.pl -f .gbk\nAdd -h for html output\n") unless $opts{f};
my $file = $opts{f};
my $genome;
my $total_mutations;
open(FH, $file) or die "File couldn't be opened";
my $contents = ;
close(FH);
#Extract the entire genome
$contents =~ m#ORIGIN(.+?)//#s or die "No genome data found.";
$genome = $1;
# Remove extraneous characters, make it one big long string to use substr position on it
$genome =~ s/[\d\s]+//g;
# Calculate total possible mutations
while( $genome =~ /[ct](?=[ct])/g ) {
$total_mutations++;
}
#print "\nTotal possible mutations (pyramidine dimerizations): $total_mutations\n\n";
# Extract all the gene definitions, end at protein translation.
my @genes;
@genes = $contents =~ m#(? $geneid,
prod_pro => $gene_product,
gene_mutants => $gene_mutations,
mutant_prob => $probability
};
#printf "%-20s%-10d%-25d%.5f%% %s\n", $gene_name, $geneid, $gene_mutations, $probability, $gene_product;
}
}
if($opts{h}) {
html_out($total_mutations, %mutant_genes)
}else{
print "UV Mutation (pyramidine dimerization) Analysis\n";
print "Total possible mutations in genome: $total_mutations\n\n";
print "\nGenes sorted by UV mutation probability:\n", "=" x 65, "\n";
foreach (sort by_descending_probability keys %mutant_genes) {
printf "%-20s%.5f%% %s\n", $_, $mutant_genes{$_}{mutant_prob}, $mutant_genes{$_}{prod_pro};
}
}
sub by_descending_probability {
$mutant_genes{$b}{mutant_prob} <=> $mutant_genes{$a}{mutant_prob};
}
sub html_out {
my $total_muts = shift;
print "\n\n\n";
print "UV Mutant Analysis
\n";
print "Total Possible mutations in Genome: $total_muts
\n";
print "Gene mutations sorted by decending probability of mutation
\n";
print "\nGene Possible Gene Mutations Mutation Probability (%) Gene Product \n";
#my %mutant_genes = shift; #Gives an odd numbered hash assignment error when prototyped
foreach (sort by_descending_probability keys %mutant_genes) {
print "$_ $mutant_genes{$_}{gene_mutants} $mutant_genes{$_}{mutant_prob} $mutant_genes{$_}{prod_pro} \n";
}
print "
\n";
print "\n";
}
## ##
#!/usr/bin/perl
use warnings;
use strict;
use Getopt::Std;
use Number::Format qw(:subs);
undef $/;
my $num_precision = 5;
our %opts;
getopts('hf:', \%opts);
die("Usage: uv_mutant.pl -f .gbk\nAdd -h for html output\n") unless $opts{f};
my $file = $opts{f};
my $genome;
open(FH, '<', $file) or die "File couldn't be opened: $!";
my $contents = ;
close(FH);
#Extract the entire genome
$contents =~ m#ORIGIN(.+?)//#s or die "No genome data found.";
$genome = $1;
# Remove extraneous characters, make it one big long string to use substr position on it
$genome =~ s/[\d\r\n\s]+//g;
# Calculate total possible mutations
my %mutations = find_possible_mutations($genome);
# Extract all the gene definitions, end at protein translation.
my @genes;
@genes = $contents =~ m#(? format_number($gene_mutations{tt}/$mutations{tt}*100, $num_precision),
pct => format_number($gene_mutations{ct}/$mutations{ct}*100, $num_precision),
pcc => format_number($gene_mutations{cc}/$mutations{cc}*100, $num_precision),
ptotal => format_number($gene_mutations{total}/$mutations{total}*100, $num_precision)
);
#Pull out GeneID (if exists)
if( $gene =~ m#/db_xref="GeneID:(\d+)"# ) {
$geneid = $1;
}
#Pull out Protein Product, if exists
if( $gene =~ m#/product="([^"]+)"# ) {
$gene_product = $1;
$gene_product =~ s/\n\s*/ /g; #Clear out newlines and indentation
}
$mutant_genes{$gene_name} = { gene_id => $geneid,
prod_pro => $gene_product,
%gene_mutations,
%probability
};
}
}
if($opts{h}) {
html_out($mutations{total}, \%mutant_genes)
}else{
print "UV Mutation (pyramidine dimerization) Analysis\n";
print "Total possible mutations in genome: $mutations{total}\n\n";
print "\nGenes sorted by UV mutation probability:\n", "=" x 65, "\n";
foreach (sort by_descending_probability keys %mutant_genes) {
printf "%-20s%.5f%% %s\n", $_, $mutant_genes{$_}{ptt}, $mutant_genes{$_}{prod_pro};
}
}
sub by_descending_probability {
$mutant_genes{$b}{ptt} <=> $mutant_genes{$a}{ptt};
}
sub html_out {
my ($total_muts, $mutant_ref) = @_;
my %mutant_genes = %{$mutant_ref};
print "\n\n\n";
print "UV Mutant Analysis
\n";
print "Total Possible mutations in Genome: $total_muts
\n";
print "Gene mutations sorted by decending probability of mutation
\n";
print "\nGene Possible Gene Mutations TT Possible Gene Mutations CT Possible Gene Mutations CC Mutation Probability TT(%) Mutation Probability CT(%) Mutation Probability CC(%) Gene Product \n";
#my %mutant_genes = shift; #Gives an odd numbered hash assignment error when prototyped
foreach (sort by_descending_probability keys %mutant_genes) {
print "$_ $mutant_genes{$_}{tt} $mutant_genes{$_}{ct} $mutant_genes{$_}{cc} $mutant_genes{$_}{ptt} $mutant_genes{$_}{pct} $mutant_genes{$_}{pcc} $mutant_genes{$_}{prod_pro} \n";
}
print "
\n";
print "\n";
}
sub find_possible_mutations {
my $genome = shift;
my %mutations = ( tt => 0,
ct => 0,
cc => 0,
total => 0 ); # Set all values to zero to start incase no possible sites found.
# Find all possible Thymidine dimerizations (most common dimerization)
while( $genome =~ /t(?=t)/g ) {
$mutations{tt}++;
}
# Find all possible heterogeneous dimerization sites (less common)
while( $genome =~ /c(?=t)/g ) {
$mutations{ct}++;
}
while( $genome =~ /t(?=c)/g ) {
$mutations{ct}++;
}
# Find all possible Cystine dimerization sites (least common)
while( $genome =~ /c(?=c)/g ) {
$mutations{cc}++;
}
# Store the total mutations for later calculations
$mutations{total} = $mutations{tt} + $mutations{ct} + $mutations{cc};
return %mutations;
}