##### not filished, neither the connection with the database is closed##### ] But I do not know if it is a good way to solve such problems. Then, I thought to use hashes., where key is a kind of counter and value each element I want extract.#!/usr/bin/perl -w #use warnings; use strict; use DBI; use DBD::mysql; use BeginPerlBioinfo; # + + #use Test::More tests=> 15 use Exporter; OUR @ISA = qw(EXPORTER); OUR @EXPORT=qw(get_file_data get_accession get_gene get_origin get_tra +nslation); sub connect_to_db { my ($dbh, $sth, $...., $..., $..., $...); my $dbname = "1"; my $dbserver = ""; my $datasource = "dbi:mysql:database=$dbname;host = $dbserver"; my $username = ""; my $password = "p"; my $dbh= DBI->connect($datasource,$username, $password,{ PrintError=>0, #disable PrintError RaiseError=>1 #enable RaiseError }) || die "Error opening database: $DBI::errstr\n"; return ($dbh); } $sth = $dbh-> prepare("INSERT INTO Chrom_4_AC (AC_number, Gene_id) VALUES (? ,? )"); + # use placeholders #Declare and initialize variables my $genbank='chrom_CDS-4.txt'; my record=""; my $gene_id=""; my $AC_number=""; my $DNA_seq=""; my $start_stop_cs=""; my aa_seq=""; my $transl_start_site=""; my $protein_prod_names=""; my $chromosomal_location=""; + #Open the Genbank file #unless (open (GBFILE, $genbank)) { # print "CANNOT open Genbank file \n"; # exit; # # $/="//\n'; #set input separator and read in arecord to scalar #$record = <GBFILE> #reset input separator #$/=$save_input_separator; #$record = ~/^(LOCUS.**ORIGIN\s*\n)(.*)\/\/\n/s); #print $record; sub get_file_data { my ($filename) = @_; my @filedata =(); unless (open(GET_FILE_DATA, $filename)) { print STDERR "CANNOT open file\n"; exit; } @filedata = <GET_FILE_DATA>; close GET_FILE_DATA; return @filedata; } sub get_accession { my $genbank = shift; if ($genbank = ~/ACCESSION\s*(\w+)/) { return $1; } else { return "error"; } } + + sub get_gene { my $genbank= shift; if($genbank=/gene="(.*?)"/s) { return $1; } else { return "error"; } } sub get_origin { my $seq; chop $seq; if($genbank =~/ORIGIN|s*(.*)\/\//s) { $seq = $1; } else { return "error"; } $seq =~s/[\s\d]//g; return uc($seq); } sub get_translation { my $genbank=shift; my $pro; if($genbank=~/translation="(.*?)"/s) { $pro = $1; } else { return "error"; } $pro=~s/[\s]//g; return uc($pro); }
######not finished############## I am completely lost now.. Could you give me an idea how to solve this problem.. I appreciate any help.#!/usr/bin/perl -w use strict; use Data::Dumper; my $fastaSuffix =".fasta"; @ARGV =qw(-) unless @ARGV; foreach my $filename (@ARGV) { local *FILE; open FILE, "<$filename" or die "Couldn't open '$filename': $!"; my $genbank = new GBlite(\*FILE); while (my $entry= $genbank->nextENTRY) { my $ac_number=$entry->AC_number; my $gene_id=$entry->gene_id; my $DNA_seq=$entry->DNA_seq; my $start_stop_cs=$entry->start_stop_cs; my $aa_seq=$entry->aa_seq; my $transl_start_site=$entry->transl_start_site; my $protein_prod_names=$entry->protein_prod_names; my $chromosomal_location=$entry->chromosomal_location; my $fasta = $ac_number . $fastaSuffix; local *FASTA; warn "[working '$fasta']\n"; open FASTA, ">$fasta" or die "Couldn't open '$fasta': $!"; print FASTA ">$ac_number\n", ">$gene_id\n", ">$DNA_seq\n", ">$start_st +op_cs\n", ">$aa_seq\n", ">$transl_start_site\n", ">$protein_prod_nam +es\n", ">$chromosomal_location\n"; package GBlite; use strict; sub new { my ($class, $fh) = @_; if (ref $fh !~ /GLOB/) {die "GBlite error: new expects a GLOB refere +nce not $fh\n"} my $this =bless{}; $this-> {FH}=$fh; $this-> {LASTLINE}=""; $this-> {DONE}=0; return $this; } sub netxEntry { my ($this)=@_; $this->fastForward or return 0; my $FH = $this ->{FH}; #Theses are the fields that will be kept my ($ac_number, $gene_id, $DNA_seq, $start_stop_cs, $aa_seq, $transl_s +tart_site, $protein_prod_names, $chromosomal_location); #get ac_number which may be in several lines my %ac_number; while (<$FH>) { if (/^VERSION/) { $this->{LASTLINE}=$_; last; } else { $_= ~m/s*(\w+)/; my @ac_number=split; foreach my $ac_number(@ac_number) { $ac_number{$ac_number}++; } } } $ac_number=[keys %ac_number]; }
In reply to some perl advice by malaguena
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |