#!/usr/bin/perl use strict; use warnings; use Getopt::Long; my $inputFile; my $sequenceFile; my $outputFile; my $template; my %hashComps = ("T"=>"A", "G"=>"C", "C"=>"G", "A"=>"T", "N"=>"N"); my %hashStrand = ("f"=>"+", "r"=>"-", "+"=>"+", "-"=>"-"); Getopt::Long::Configure ('bundling'); GetOptions ('i|input_file=s' => \$inputFile, 's|sequence_file=s' => \$sequenceFile, 'o|output_file=s' => \$outputFile); if(!defined($inputFile)) { die ("Usage: FASTAgeneratorDNA.pl -i -s -o \n"); } if(!defined($sequenceFile)) { die ("Usage: FASTAgeneratorDNA.pl -i -s -o \n"); } if(!defined($outputFile)) { die ("Usage: FASTAgeneratorDNA.pl -i -s -o \n"); } open (INFILE, "<$inputFile") or die( "Cannot open file : $!" ); open (SEQFILE, "<$sequenceFile") or die( "Cannot open file : $!" ); open (OUTFILE, ">$outputFile") or die ("Cannot open file for output: $!"); my $DNAsequence = ""; while (my $line = ) { chomp($line); if ($line =~ />/) { #if line contains FASTA header after any amount of whitespace next; } else { $DNAsequence .= $line; } #end of else structure } #end of while loop for each line while (my $coordinates = ) { my @coordinates_array = split /\s+/, $coordinates; my $seqid = $coordinates_array[0]; my $strand = $hashStrand{$coordinates_array[1]}; my $start = $coordinates_array[2] - 1; my $stop = $coordinates_array[3] - $start; print (OUTFILE "\n>$seqid"); $template = substr($DNAsequence, $start, $stop); my $revcomp = ""; if ($strand eq "+") { print (OUTFILE "\n$template"); } elsif ($strand eq "-") { while ($template) { my $lastbase = uc(chop($template)); my $nextbase = $hashComps{$lastbase}; $revcomp .= $nextbase; } #end of while loop for revcomp print (OUTFILE "\n$revcomp"); } else { die( "Please specify the strand with a plus or minus symbol."); } #end of else structure } #end of while loop for each set of coordinates close (OUTFILE) or die( "Cannot close file : $!"); close (SEQFILE) or die( "Cannot close file : $!"); close (INFILE) or die( "Cannot close file : $!");