#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
my $inputFile;
my $sequenceFile;
my $outputFile;
my $template;
my %hashComps = ("T"=>"A", "G"=>"C", "C"=>"G", "A"=>"T", "N"=>"N");
my %hashStrand = ("f"=>"+", "r"=>"-", "+"=>"+", "-"=>"-");
Getopt::Long::Configure ('bundling');
GetOptions ('i|input_file=s' => \$inputFile,
's|sequence_file=s' => \$sequenceFile,
'o|output_file=s' => \$outputFile);
if(!defined($inputFile)) {
die ("Usage: FASTAgeneratorDNA.pl -i -s -o \n");
}
if(!defined($sequenceFile)) {
die ("Usage: FASTAgeneratorDNA.pl -i -s -o \n");
}
if(!defined($outputFile)) {
die ("Usage: FASTAgeneratorDNA.pl -i -s -o \n");
}
open (INFILE, "<$inputFile") or die( "Cannot open file : $!" );
open (SEQFILE, "<$sequenceFile") or die( "Cannot open file : $!" );
open (OUTFILE, ">$outputFile") or die ("Cannot open file for output: $!");
my $DNAsequence = "";
while (my $line = ) {
chomp($line);
if ($line =~ />/) { #if line contains FASTA header after any amount of whitespace
next;
} else {
$DNAsequence .= $line;
} #end of else structure
} #end of while loop for each line
while (my $coordinates = ) {
my @coordinates_array = split /\s+/, $coordinates;
my $seqid = $coordinates_array[0];
my $strand = $hashStrand{$coordinates_array[1]};
my $start = $coordinates_array[2] - 1;
my $stop = $coordinates_array[3] - $start;
print (OUTFILE "\n>$seqid");
$template = substr($DNAsequence, $start, $stop);
my $revcomp = "";
if ($strand eq "+") {
print (OUTFILE "\n$template");
} elsif ($strand eq "-") {
while ($template) {
my $lastbase = uc(chop($template));
my $nextbase = $hashComps{$lastbase};
$revcomp .= $nextbase;
} #end of while loop for revcomp
print (OUTFILE "\n$revcomp");
} else {
die( "Please specify the strand with a plus or minus symbol.");
} #end of else structure
} #end of while loop for each set of coordinates
close (OUTFILE) or die( "Cannot close file : $!");
close (SEQFILE) or die( "Cannot close file : $!");
close (INFILE) or die( "Cannot close file : $!");