#!/usr/bin/perl -w
use strict;
use Getopt::Long;
use Bio::SeqIO;
use File::Basename;
use Pod::Usage;
# seq-convert
# Copyright 2003 Tex Thompson <tex@mail.rit.edu>
# This is free software released under the Perl artistic license, see
+the
# RIT Package website at http://bioinformatics.rit.edu/~tex/ritpackage
+/ for
# more information.
$0 = basename $0;
our @valid_formats = qw( fasta genbank embl gcg swiss );
# Bioperl supports more valid file formats, but they are currently unt
+ested
my $formats = "Valid sequence file formats:\n";
map { $formats .= "\t$_\n" } @valid_formats;
# parse the command line options
my ( $input, $output, $subseq, $help, $print_formats );
GetOptions( 'input=s', \$input,
'output=s', \$output,
'help', \$help,
'subseq=s', \$subseq,
'formats', \$print_formats );
if ( !$output ) { $output = 'fasta' };
# die and print some information if appropriate
if ( $print_formats ) { die $formats };
if ( $help ) { pod2usage( -verbose => 2 ) };
die pod2usage( -verbose => 0 ) unless @ARGV;
# if a range is provided, make sure that it is valid
if ( $subseq ) {
validate_range( $subseq );
}
# create input/output objects using Bio::SeqIO
my $infile = $ARGV[0];
my ($in, $out);
eval {
if ( $input ) {
$in = Bio::SeqIO->new( -file => $infile, -format => $input );
} else {
$in = Bio::SeqIO->new( -file => $infile );
}
};
# catch exceptions from creating Bio::SeqIO input object
if ( $@ ) { print "Couldn't open file $infile\: $!\n";exit(1) };
eval {
$out = Bio::SeqIO->new( -fh=>\*STDOUT, -format => $output );
};
# catch exceptions from creating Bio::SeqIO input object
if ( $@ ) { print "Error using format $output: $!\n"; exit(1) };
if ( $subseq ) {
my ($start,$end) = split /\-/, $subseq;
my $seqobj = $in->next_seq();
print $seqobj->subseq( $start, $end );
} else {
while ( my $seq = $in->next_seq() ) {
$out->write_seq($seq);
}
}
print "\n";
###############
# Subroutines #
###############
sub validate_range {
my $range = shift;
my $invalid_range = 1;
if ( $range =~ /\d+-\d+/ ) {
$invalid_range = 0;
} else {
# clean up error handling around here
$invalid_range = 1;
print "Bad range: $_\n";
}
}
#####################
+
# Usage Information #
+
#####################
+
+
=head1 NAME
+
+
seq-convert - Conversion of biological sequence files.
=head1 SYNOPSIS
seq-convert [options] input-file
+
options:
+
--input <inputformat>
--output <outputformat>
--formats
--subseq <range>
--help
=head1 OPTIONS
=over 8
+
+
=item B< --input>
+
Specifies the format of the input file. Defaults to fasta.
=item B< --output>
Specifies the output format. Defaults to fasta.
=item B< --print-formats>
Prints the sequence file formats available to this program.
+
=item B< --subsequence range>
Selects a subsequence of the sequence contained in the input file. Ran
+ges
should have the form x-y, where x and y are positive integers.
=item B< --help>
Prints a detailed help message.
=item B< --version>
Prints version information.
=back
=head1 EXAMPLES
+
# print a help message
$ seq-convert --help
# convert mySeq.fasta to a GCG formatted file
$ seq-convert --input fasta --output gcg mySeq.fasta
# convert the first 100 nucleotides from mySequence.genbank
# into a fasta formatted file
$ seq-convert --input genbank --output fasta mySequence.genbank
$ seq-convert --input genbank mySequence.genbank
=head1 DESCRIPTION
+
Part of the RIT Bioinformatics Package:
http://bioinformatics.sourceforge.net
This program reads a sequence from a file, converts it to another form
+at and
prints the converted file to standard output.
=head1 AUTHOR
+
+
Tex Thompson <tex@bioinformatics.rit.edu>
=head1 LICENSE
+
+
B<seq-convert> is licensed under the GNU GPL license, available from
http://www.gnu.org/.
=cut
|