comment on

#!/usr/bin/perl -w

use strict;
use Bio::DB::GenBank;
use Bio::SeqIO;
use File::Basename;
use Pod::Usage;
use Getopt::Long;

# configuration information
my ($verbose,$help,$format,$filename);
# set up defaults
$verbose  = 0;
$help     = 0;
$format   = 'GCG';
$filename = 'ncbi_seq';

GetOptions( 'verbose'    => \$verbose,
            'filename=s' => \$filename,
            'format=s'   => \$format,
            'help'       => \$help,
);
my $outfile = make_outputfile($filename);
my $progname = basename $0;

# process arguments
if ( $help ) {
   pod2usage( -verbose => 2 );
}
pod2usage(2) unless @ARGV;

# start program flow
my $gb = new Bio::DB::GenBank( 
   -retrievaltype => 'tempfile', 
   -format => 'fasta'
);

my $seqout;
my $seqio = $gb->get_Stream_by_acc( \@ARGV );
my $count = 0;
while( my $sequence =  $seqio->next_seq ) {
   $outfile = make_outputfile( $ARGV[ $count ] );
   print "outfile = $outfile\n";
   $seqout = Bio::SeqIO->new( -file => ">$outfile", 
                              -format => $format );

   $seqout->write_seq($sequence);
   if ( $verbose ) {
      print "Successfully fetched ", $sequence->display_id,
            "\n";
}

sub make_outputfile {
   my $file = shift;

   if ( -f $file ) {
      my $suffix = 0;
      while ( -f "$file.$suffix" ) {
         $suffix += 1;
      }
      $file = "$file.$suffix";
   }
   return $file;
}


=head1 NAME

ncbi-fetch - fetch sequences directly from NCBI sequence databases

=head1 SYNOPSIS

ncbi-fetch accession1 [accession2 ...]

 options: 
 --format sequence_format
 --combine
 --help
 --verbose 

=head1 OPTIONS

=over 1

=item --verbose

 Causes ncbi-fetch to print output as it fetches sequences
 (turned off by default).

=item --format

 Specifies the sequence file format, valid formats include GenBank,
 FASTA, EMBL and GCG.

=item --help

 Prints a help message.

=back

=head1 EXAMPLES

 # Fetch the entire E. coli K-12 genome from the NCBI
 $ ncbi-fetch U00096

 # Fetch some cdk7 sequences from the NCBI
 $ ncbi-fetch NM_001239 NM_078489 NM_021128


=head1 DESCRIPTION

B<ncbi-fetch> will fetch sequences from the NCBI using Bio::DB::GenBan
+k Perl
module (available as part of the BioPerl package). Each sequence is sa
+ved to a
separate file named by accession number. This program will introduce a
three-second delay in between successive requests in order to avoid pl
+acing too
much stress on the NCBI servers.

=head1 AUTHOR

Tex Thompson <tex@biosysadmin.com>

=head1 LICENSE

B<ncbi-fetch> is licensed under the GNU GPL license, available from
http://www.gnu.org/.

=cut
[download]

In reply to ncbi-fetch by biosysadmin

Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!

Titles consisting of a single word are discouraged, and in most cases are disallowed outright.

Read Where should I post X? if you're not absolutely sure you're posting in the right place.

Please read these before you post! —

Posts may use any of the Perl Monks Approved HTML tags:

a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr

You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)

	For:		Use:
	&		`&`
	<		`<`
	>		`>`
	[		`[`
	]		`]`

Link using PerlMonks shortcuts! What shortcuts can I use for linking?

See Writeup Formatting Tips and other pages linked from there for more info.