#!/usr/bin/perl -w use strict; use Data::Dumper; my $fastaSuffix =".fasta"; @ARGV =qw(-) unless @ARGV; foreach my $filename (@ARGV) { local *FILE; open FILE, "<$filename" or die "Couldn't open '$filename': $!"; my $genbank = new GBlite(\*FILE); while (my $entry= $genbank->nextENTRY) { my $ac_number=$entry->AC_number; my $gene_id=$entry->gene_id; my $DNA_seq=$entry->DNA_seq; my $start_stop_cs=$entry->start_stop_cs; my $aa_seq=$entry->aa_seq; my $transl_start_site=$entry->transl_start_site; my $protein_prod_names=$entry->protein_prod_names; my $chromosomal_location=$entry->chromosomal_location; my $fasta = $ac_number . $fastaSuffix; local *FASTA; warn "[working '$fasta']\n"; open FASTA, ">$fasta" or die "Couldn't open '$fasta': $!"; print FASTA ">$ac_number\n", ">$gene_id\n", ">$DNA_seq\n", ">$start_stop_cs\n", ">$aa_seq\n", ">$transl_start_site\n", ">$protein_prod_names\n", ">$chromosomal_location\n"; package GBlite; use strict; sub new { my ($class, $fh) = @_; if (ref $fh !~ /GLOB/) {die "GBlite error: new expects a GLOB reference not $fh\n"} my $this =bless{}; $this-> {FH}=$fh; $this-> {LASTLINE}=""; $this-> {DONE}=0; return $this; } sub netxEntry { my ($this)=@_; $this->fastForward or return 0; my $FH = $this ->{FH}; #Theses are the fields that will be kept my ($ac_number, $gene_id, $DNA_seq, $start_stop_cs, $aa_seq, $transl_start_site, $protein_prod_names, $chromosomal_location); #get ac_number which may be in several lines my %ac_number; while (<$FH>) { if (/^VERSION/) { $this->{LASTLINE}=$_; last; } else { $_= ~m/s*(\w+)/; my @ac_number=split; foreach my $ac_number(@ac_number) { $ac_number{$ac_number}++; } } } $ac_number=[keys %ac_number]; }