#!/usr/local/bin/perl

package BioParser;

use strict;
use warnings;
use autodie;
use Carp qw/croak cluck/;
use DBM::Deep;
use Fcntl;
use feature qw/say switch/;
use Statistics::Descriptive;
use Storable;
use GD::Graph::linespoints;

$|++;

my $file = q{};
my @data = ();

# bp_content method
sub bp_count
{
    my ($data) = @_;

    # obtain the count of each nucleotide in the sequence
    my $a_count = ($data =~ tr/Aa//);
    my $c_count = ($data =~ tr/Cc//);
    my $g_count = ($data =~ tr/Gg//);
    my $t_count = ($data =~ tr/Tt//);

    return ($a_count, $c_count, $g_count, $t_count);
}


# calc_annealing_temp method
sub calc_annealing_temp
{
    my ($data) = @_;

    # returns the result of calling the calc_melting_temp method
    # in order to establish an initial temp
    return (&calc_melting_temp() - 5);
}


# calc_melting_temp method
sub calc_melting_temp
{
   my ($data) = @_;

   # obtain the nucleotide counts
   my ($a_count, $c_count, $g_count, $t_count) = &bp_count($data);

   # calculate approximate melting point temperature
   my $mp = 4 * ($g_count + $c_count) + 2 * ($a_count + $t_count);

   return $mp;
}


# 'codon_to_amino_acid' function
sub codon_to_aa
{
    # store the codon in the 'codon' variable
    my ($codon) = @_;

    # scan the value in the codon variable
    given ($codon)
    {
        # convert the codon to the appropriate amino acid and return it
        when (/GC./i) { 'A'; }
        when (/TG[TC]/i) { 'C'; }
        when (/GA[TC]/i) { 'D'; }
        when (/GA[AG]/i) { 'E'; }
        when (/TT[TC]/i) { 'F'; }
        when (/GG./i)    { 'G'; }
        when (/CA[TC]/i) { 'H'; }
        when (/AT[TCA]/i) {'I'; }
        when (/AA[AG]/i) { 'K'; }
        when (/TT[AG]|CT./i) { 'L'; }
        when (/ATG/i) { 'M'; }
        when (/AA[TC]/i) { 'N'; }
        when (/CC./i) { 'P'; }
        when (/CA[AG]/i) { 'Q'; }
        when (/CG.|AG[AG]/i) { 'R'; }
        when (/TC.|AG[TC]/i) { 'S'; }
        when (/AC./i) { 'T'; }
        when (/GT./i) { 'V'; }
        when (/TGG/i) { 'W'; }
        when (/TA[TC]/i) { 'Y'; }
        when (/TA[AG]|TGA/i) { '*'; }

        # none of the previous regex patterns matched
        default { say "I'm sorry. I don't recognize that codon!"; }
    }
}

# complement method
sub complement
{
    my ($data) = @_;
    my $complement = q{};

    # transliterate the sequence using base pairing rules
    ($complement = $data) =~ tr/acgtACGT/tgcaTGCA/;

    return $complement;
}


sub connect_db
{
    # import the DBI module
    use DBI;

    # accept the necessary parameters for database connectivity
    # $db is the database and $sql is the sql statement
    my ($db, $sql) = @_;

    # connect to the database with no user ID or password specified
    # Handle errors and assign a reference to the $dbh scalar.
    # State the reason for the database related error
    my $dbh = DBI->connect("dbi:SQLite:dbname=$db",
                            "",
                            "",
                            {RaiseError => 1},
    ) or die $DBI::errstr;

    # Prepare the sql statement and store it in the statement handler
    # scalar

    my $sth = $dbh->prepare($sql);

    # Execute the sql statement stored in the statement handler
    $sth->execute();

    # Retrieve the results from the query and store the results in the
    # result scalar
    my $result = $sth->fetch();

    # Loop over the array reference and display the record(s) followed
    # by a new line
    for (@$result)
    {
        print $_, "\n";
    }
                                                                                                                                                                                         
    $sth->finish();

    # Close the database connection
    $dbh->disconnect();
}

# display_graphic method
sub display_graphic
{
    my ($data) = @_;



}

# display_sequence method
sub display_sequence
{
    my ($data) = @_;

    # displays the sequence unaltered
    return $data;
}

# gc_content method
sub gc_content
{
    my ($data) = @_;

    # establish lexical variables
    my ($a_count, $c_count, $g_count, $t_count);

    # obtain the length of the sequence
    my $len1 = length $data;

    # obtain the count of each nucleotide in the sequence
    $a_count = ($data =~ tr/Aa//);
    $c_count = ($data =~ tr/Cc//);
    $g_count = ($data =~ tr/Gg//);
    $t_count = ($data =~ tr/Tt//);

    # calculate the percentage of g/c content
    my $gc_content = (($g_count + $c_count) / $len1) * 100;

    return $gc_content;
}

# multiple_sequence_alignment method
sub perform_multiple_alignment
{

    # receives input and output files
    my ($in_file, $out_file) = @_;

    # display the message as a HERE document, uses the external program, muscle,
    # to perform the multiple sequence alignment using the specified parameters
    # with the clustalw format used by default.
print<<'EOF';

Please wait while I perform your multiple sequence alignment.
Thank you.

EOF

my @alignment = `muscle -in $in_file -clw`;

print<<'EOF';
Alignment complete. Please retrieve your results in the designated
output file. Thank you.

EOF

}

# read_fasta_file method
sub read_fasta_file
{
    # receive the file name
    my ($file) = @_;
    my @data;

    # open the file for reading
    sysopen FH, $file, 'O_RDONLY', 0755;

    # read the entire file and store it in the @data array
    @data = <FH>;

    # close the file handle
    close FH;

    # remove the fasta header from the data
    for (@data)
    {
        s/^\>.*$//;
    }

    # return the modified data to the caller
    return @data;
}


# read_genbank_file method
sub read_genbank_file
{
    # receive the file
    my ($file) = @_;


}

# read_pdb_file method
sub read_pdb_file
{
    # receive the file
    my ($file) = @_;





}


# read_swiss_prot_file method
sub read_swiss_prot_file
{
    # receive the file
    my ($file) = @_;





}


# reverse_complement method
sub reverse_complement
{
    my ($data) = @_;
    my $complement = q{};
    my $revcom = q{};

    # transliterate the sequence using base pairing rules
    # copies the initial sequence into the complement variable
    ($complement = $data) =~ tr/acgtACGT/tgcaTGCA/;

    # reverses the complement, stores the value in the revcom variable
    # and returns it
    $revcom = reverse $complement;
    return $revcom;
}


sub search_sequence
{
    my ($data) = @_;




}


# seq_length method
sub seq_length
{
    my ($data) = @_;

    # return the length of the sequence
    return length ($data);
}


sub store_sequence_info
{
    my ($data, %args) = @_




}


# transcribe method
sub transcribe
{
    my ($data) = @_;
    my $mrna = q{};

    # copies the sequence into the mrna variable then globally substitutes
    # thymine for uracil
    ($mrna = $data) =~ s/t/u/gi;

    return $mrna;
}


# 'translate_sequence' function
sub translate_sequence
{
    #
    my ($data) = @_;

    # declare loop counter variable
    my $i = q{};

    # initialize the 'codon' variable
    my $codon = q{};

    # initialize the 'protein' variable
    my $protein = q{};

    # iterate over the dna sequence and extract each codon it finds
    # pass the codon to the 'codon2aa' function and assemble the protein
    # chain
    for ($i = 0; $i < (length ($data) - 2); $i += 3)
    {
        $codon = substr($data, $i, 3);
        $protein .= codon_to_aa($codon);
    }

    # return the protein to the calling object
    return $protein;
}


sub usage
{
    my ($data) = @_;
    print "Usage forms:\n\n";
    print '$obj = new GCCBioParser(-attribute => value)', "\n\n";
    print '$obj->method(argument)', "\n";
}


# 'AUTOLOAD' function
sub AUTOLOAD
{
   # declare the autoload variable to handle undefined functions
   our $AUTOLOAD;

   # warn the user that the function called does not exist
   warn "\nI don't see a function called $AUTOLOAD.\n";
   warn "Perhaps you intended to call a different function?\n";
}

# 'DESTROY' function
# Note: This function is called automatically upon program completion
sub DESTROY
{
   # Freeing allocated memory...
}


1;


=head1 DESCRIPTION

Allows the user to input a genetic sequence via a file read
where upon the the sequence is parsed and the relevent information
is parsed

=head2 Methods

B<AUTOLOAD> - Called automatically when the user attempts to call a
non-existent method.

B<bp_count> - Helper method (not called directly). Returns the individual
counts of each nucelotide in the biological sequence.

B<calc_annealing_temp> - Displays the estimated annealing point temperature
of the biological sequence. Calls the calc_melting_point method in order to
obtain the starting temperature.

B<calc_melting_temp> - Displays the estimated melting point temperature
of the biological sequence. Calls the bp_count helper method in order to
obtain the necessary frequencies.

B<codon_to_aa> - Receives a condon and returns the appropriate amino
acid.

B<complement> - Returns the complement of the sequence. Adheres to
Chargaff's base pairing rules.

B<connect_db> - Initiates a connection to a local or remote database for
sequence and/or information retrieval.

B<DESTROY> - Called automatically when an object exits scope and the
memory is was using needs to be re-claimed.

B<display_graphic> - Creates a two-dimensional or three-dimensional graphic
representing aspects of the sequences under examination. The type of graphic
is passed to the method in the form of a hash attribute with the image stored
on a physical memory device.

B<display_sequence> - Returns the sequence passed to the constructor in the
form of a file.

B<gc_content> - Calculates the percentage of guanosine and cytosine, GC,
in the biological sequence.

B<perform_multiple_alignment> - Receives two file names, one for input
and another for output. Retrieves two or more biological sequences
contained within the input file and performs a multiple sequence
alignment with the result stored in the output file passed to the method.

B<read_fasta_file> - Receives a fasta file and stores the file contents in
an array.

B<read_genbank_file> - Receives a genbank file and stores the file contents
in an array.

B<read_swiss_prot_file> - Receives a swiss-prot file and stores the file
contents in an array.

B<retrieve_data> - Retrieves a complex data structure, via data
deserialization, from a physical device.

B<reverse_complement> - Returns the reverse complement of the sequence
passed to the constructor using Chargaff's base pairing rules.

B<seq_length> - Returns the length of the sequence.

B<store_data> - Stores a complex data structure, via data serialization, on
a physical device for subsequent retrieval.

B<store_sequence_info> - Creates and stores sequence information, e.g.
Genbank section data (accession, origin, etc.) in a DBM hash for rapid
retrieval.

B<transcribe> - Transcribes and displays the DNA -> mRNA sequence to
standard output.

B<translate_seq> - Translates and displays the mRNA sequence into a
polypeptide chain of amino acids. Uses the IUPAC naming conventions.

B<usage> - Displays examples of how the module can be used for common tasks
pertaining to bioinformatics and/or computational biology.

=head1 REQUIRED ARGUMENTS

=head1 OPTIONS

The user can specify the format of the biological sequence, e.g.,
fasta, Genbank, protein, etc.

=head1 DEPENDENCIES

This library requires the user to have installed several, at the
time of this writing, non-core modules.

=head1 INCOMPATIBILITIES

Every reasonable effort has been made to maintain compatibility
with previous versions of Perl.

=head1 BUGS AND LIMITATIONS

None are known at this time

=head1 AUTHOR


=head1 LICENSE AND COPYRIGHT

Copyright 2013
This library is free software; you may redistribute it
under the same terms as Perl itself.