Oapples has asked for the wisdom of the Perl Monks concerning the following question:
Hi Perl Monks
I recently was working with a perl script that I'm having trouble with. The error I get is "readline() on closed filehandle I16S at makeTable.pl line 141." Could someone help me with this? Thanks
here is the code
#!/USR/BIN/perl -w use strict; use LWP::Simple; use XML::Simple; use Data::Dumper; use DB_File; use Getopt::Long; my ($infile, $removeSmall, $removeLarge, $cdHit); $infile = $ARGV[0]; my $option = GetOptions( "i=s" => \$infile, "s=i" => \$removeSmall, "l=i" => \$removeLarge, "c=s" => \$cdHit, ); if (! $infile) { print "Usage make_table5 [option]\n"; print "\nOptions\n\n"; print " -i input filename in TinySeq XML format, required\n"; print " -s minimal sequences size, default 10 \n"; print " -l maximal sequences size, defalut 10000\n"; print " -c sequence identity threshold, range from 1 to 0.7, de +falut 1\n"; print "\n\nQuestion, bugs, contact no one\n"; exit; } $removeSmall = ($removeSmall) ? $removeSmall : 10; $removeLarge = ($removeLarge) ? $removeLarge : 10000; $cdHit = ($cdHit) ? $cdHit : 1; #print "$infile $removeSmall $removeLarge $cdHit\n"; my $outtable = "$infile".".tab"; my $outfa = "$infile".".faa"; my $out16S = "$infile".".16S"; open (IF, "<$infile") or die "couldn't open $infile, $!\n"; open (OT, ">$outtable"); open (OF, ">$outfa"); open (O16S, ">$out16S"); #my $data = XMLin($infile); my $taxon = {}; my $genus = {}; my $holder = []; my $abbPool = {}; my $dataHolder = {}; print OT "Abbreviation\tSequenceDescription\tOrganism\tProteinSize\tGe +nBankIndex\#\tGroup\tKingdom\n"; while (<IF>) { push @$holder,$_; if (/\<\/TSeq\>/) { my $record = SimpleXML($holder); $holder = []; if (! exists $record->{'TSeq_taxid'}) { next; } # $record->{'TSeq_taxid'} = (exists $record->{'TSeq_taxid'}) ? +$record->{'TSeq_taxid'} : 32630; $record->{'TSeq_Description'} = $record->{'TSeq_defline'}; $record->{'TSeq_Description'} =~ s/ \[.+\]//; if ($record->{'TSeq_length'} < $removeSmall || $record->{'TSeq +_length'} > $removeLarge) { next; } split_seq(\$record->{'TSeq_sequence'}); $dataHolder->{$record->{'TSeq_gi'}} = $record; } } if ($cdHit < 1) { CdHit($dataHolder, $cdHit); } foreach (values %$dataHolder) { my $record = $_; my $taxid = $record->{'TSeq_taxid'}; if (! exists $taxon->{$record->{'TSeq_taxid'}}) { $taxon->{$record->{'TSeq_taxid'}} = Taxonomy($record->{'TS +eq_taxid'}); } my $abb; if (exists $record->{'TSeq_orgname'}) { if ($record->{'TSeq_orgname'} =~ /([A-Z])\w+\s([a-z][a-z]) +\w*/) { $abb = $1.$2; } else { $abb = 'Orf'; } } else { $record->{'TSeq_orgname'} = 'unknown'; $abb = 'Orf'; } if (! exists $abbPool->{$abb}) { $abbPool->{$abb} = 1; $abb .= '1'; } else { $abbPool->{$abb} ++; $abb .= $abbPool->{$abb}; } if (exists $taxon->{$taxid}->{'genus'}) { $genus->{$taxon->{$taxid}->{'genus'}->{'Name'}} = 'n'; } print OT "$abb\t"; # print OT "$description\t"; print OT "$record->{'TSeq_Description'}\t"; print OT "$record->{'TSeq_orgname'}\t"; print OT "$record->{'TSeq_length'}\t"; print OT "$record->{'TSeq_gi'}\t"; print OT "$taxon->{$taxid}->{'group'}\t"; print OT "$taxon->{$taxid}->{'superkingdom'}->{'Name'}\t"; # print "$taxon->{$taxid}->{'species'}->{'Name'}\t"; # print "$record->{'TSeq_gi'}\t$group\t$taxid\t$genus\t$class\t +$phylum\t$kingdom\t$superkingdom"; print OT "\n"; my $acc = (exists $record->{'TSeq_accver'}) ? $record->{'TSeq_ +accver'} : $record->{'TSeq_sid'}; print OF "\>$abb gi\|$record->{'TSeq_gi'}\|$acc $record->{'TSe +q_defline'}\n"; print OF "$record->{'TSeq_sequence'}\n"; } print "\n\n"; close IF; close OF; close OT; my $in16S = '/Users/saierlab/db/all_16S_rRNA.faa'; open (I16S,"$in16S"); my $inList = 'no'; my $genus16S = ''; while(<I16S>) { if ($_ =~ /\>(\w+)\s/) { $genus16S = $1; } if (exists $genus->{$genus16S}) { print O16S $_; $genus->{$genus16S} = 'p'; next; } } my $no16S = 0; foreach (keys %$genus) { # print "$genus->{$_}\n"; if ($genus->{$_} eq 'n') { $no16S++; print "$_\n"; next; } } if ($no16S) { # print $no16S; print "The genus list above do not have 16S rRNA in our database\n +\n"; } close I16S; close O16S; sub Taxonomy { my ($taxid) = @_; my $taxDB = {}; tie %$taxDB, "DB_File","/Users/saierlab/db/Taxonomy.db"; # tie %$taxDB, "DB_File",".Taxonomy.db"; unless (exists $taxDB->{$taxid}) { print "get taxonomy id $taxid information from NCBI...\n"; my $url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch +.fcgi?db=taxonomy&retmode=xml&id=$taxid"; my $taxOri = XMLin(get($url)); foreach (@{$taxOri->{'Taxon'}->{'LineageEx'}->{'Taxon'}}) { $taxDB->{$taxid} .= "$_->{'Rank'}\t$_->{'ScientificName'}\ +t$_->{'TaxId'}\t"; } if (exists $taxDB->{$taxid}) { $taxDB->{$taxid} =~ s/\t$//; # $taxDB->{$taxid} = "superkingdom\tunknown\t12908"; # } else { # Taxonomy($taxDB); } } return SplitTAX($taxDB->{$taxid}); } sub SplitTAX { my ($string) = @_; my $tax = {}; my @array = split(/\t/,$string); my $name = ''; my $rank = ''; my $id = ''; while (@array) { $rank = shift @array; $name = shift @array; $id = shift @array; if ($rank && $name && $id) { $tax->{$rank}->{'Name'} = $name; $tax->{$rank}->{'TaxId'} = $id; } $rank = ''; $name = ''; $id = ''; } if (exists $tax->{'superkingdom'}) { if (exists $tax->{'kingdom'}) { $tax->{'group'} = $tax->{'kingdom'}->{'Name'}; } elsif (exists $tax->{'phylum'}) { $tax->{'group'} = ($tax->{'phylum'}->{'Name'} eq 'Proteoba +cteria' && exists $tax->{'class'}) ? $tax->{'class'}->{'Name'} : $tax->{'phylum'}->{'Name' +}; } elsif (exists $tax->{'class'}) { $tax->{'group'} = $tax->{'class'}->{'Name'}; } elsif (exists $tax->{'family'}) { $tax->{'group'} = $tax->{'family'}->{'Name'}; } else { $tax->{'group'} = 'none'; } } else { $tax->{'superkingdom'}->{'Name'} = 'Unclassified'; $tax->{'superkingdom'}->{'TaxId'} = 12908; $tax->{'group'} = 'none'; } return $tax; } sub SimpleXML { my ($array) = @_; unless ($array) { return ''; } my $hash; foreach my $in (@$array) { chomp $in; if ($in =~ /^(\s*)\<(\S+)\>(.+)\<\/(\S+)\>/) { $hash->{$2} = $3; # print "$1 $2 $3 $4\n"; next; } if ($in =~ /^(\s*)\<(\S+)\>$/) { # print "$1 $2\n"; next; } } return $hash; } sub split_seq { for (my $i = 1; $i < (length(${$_[0]}) / 71); $i++) { substr ${$_[0]}, 71 * $i -1 , 0, "\n"; } } sub CdHit { my ($dataHolder, $cdHit) = @_; $cdHit = ($cdHit < 0.7 ) ? 0.7 : $cdHit; my $coHitHolder = []; open (OS,">temp_CdHit.in"); foreach my $gi (keys %$dataHolder) { my $record = $dataHolder->{$gi}; my $acc = (exists $record->{'TSeq_accver'}) ? $record->{'TSeq_ +accver'} : $record->{'TSeq_sid'}; print OS "\>gi\|$record->{'TSeq_gi'}\|$acc $record->{'TSeq_def +line'}\n"; print OS "$record->{'TSeq_sequence'}\n"; } system ("cd-hit -i temp_CdHit.in -o temp_CdHit.out -d 50 -c $cdHit +"); open (Ocd, "temp_CdHit.out.clstr"); my $group = -1; while (<Ocd>) { if ($_ =~ /gi\|(\d+)\|.+ (\d+\%|\*)$/) { push @{$coHitHolder->[$group]}, $1; # print "$1 \t$dataHolder->{$1}->{'TSeq_taxid'}\t"; # print "$dataHolder->{$1}->{'TSeq_length'}\t$2\t"; # print "$dataHolder->{$1}->{'TSeq_orgname'}\t"; # print "\n"; # delete $dataHolder->{$1}; } elsif ($_ =~ /\>Cluster/) { # print "\n$_"; $group ++; } } close Ocd; system ("rm -rf temp_CdHit*"); foreach my $cluster (@$coHitHolder) { @$cluster = sort {$dataHolder->{$a}->{'TSeq_taxid'} <=> $dataH +older->{$b}->{'TSeq_taxid'}} @$cluster; my $clsSize = (@$cluster); if ($clsSize > 1 ) { for (my $i = 1; $i < $clsSize; $i++) { # print "$cluster->[$i] "; delete $dataHolder->{$cluster->[$i]}; } } # print $dataHolder->{$cluster->[0]}->{'TSeq_taxid'}; # print $cluster->[0]; # print "@$cluster "; } }
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: readline on closed filehandle error
by jwkrahn (Abbot) on Jan 24, 2012 at 00:41 UTC | |
by kielstirling (Scribe) on Jan 24, 2012 at 04:22 UTC | |
|
Re: readline on closed filehandle error
by ww (Archbishop) on Jan 24, 2012 at 00:56 UTC | |
|
Re: readline on closed filehandle error
by Khen1950fx (Canon) on Jan 24, 2012 at 03:38 UTC | |
by jwkrahn (Abbot) on Jan 24, 2012 at 07:23 UTC |