Oapples has asked for the wisdom of the Perl Monks concerning the following question:

Hi Perl Monks

I recently was working with a perl script that I'm having trouble with. The error I get is "readline() on closed filehandle I16S at makeTable.pl line 141." Could someone help me with this? Thanks

here is the code

#!/USR/BIN/perl -w use strict; use LWP::Simple; use XML::Simple; use Data::Dumper; use DB_File; use Getopt::Long; my ($infile, $removeSmall, $removeLarge, $cdHit); $infile = $ARGV[0]; my $option = GetOptions( "i=s" => \$infile, "s=i" => \$removeSmall, "l=i" => \$removeLarge, "c=s" => \$cdHit, ); if (! $infile) { print "Usage make_table5 [option]\n"; print "\nOptions\n\n"; print " -i input filename in TinySeq XML format, required\n"; print " -s minimal sequences size, default 10 \n"; print " -l maximal sequences size, defalut 10000\n"; print " -c sequence identity threshold, range from 1 to 0.7, de +falut 1\n"; print "\n\nQuestion, bugs, contact no one\n"; exit; } $removeSmall = ($removeSmall) ? $removeSmall : 10; $removeLarge = ($removeLarge) ? $removeLarge : 10000; $cdHit = ($cdHit) ? $cdHit : 1; #print "$infile $removeSmall $removeLarge $cdHit\n"; my $outtable = "$infile".".tab"; my $outfa = "$infile".".faa"; my $out16S = "$infile".".16S"; open (IF, "<$infile") or die "couldn't open $infile, $!\n"; open (OT, ">$outtable"); open (OF, ">$outfa"); open (O16S, ">$out16S"); #my $data = XMLin($infile); my $taxon = {}; my $genus = {}; my $holder = []; my $abbPool = {}; my $dataHolder = {}; print OT "Abbreviation\tSequenceDescription\tOrganism\tProteinSize\tGe +nBankIndex\#\tGroup\tKingdom\n"; while (<IF>) { push @$holder,$_; if (/\<\/TSeq\>/) { my $record = SimpleXML($holder); $holder = []; if (! exists $record->{'TSeq_taxid'}) { next; } # $record->{'TSeq_taxid'} = (exists $record->{'TSeq_taxid'}) ? +$record->{'TSeq_taxid'} : 32630; $record->{'TSeq_Description'} = $record->{'TSeq_defline'}; $record->{'TSeq_Description'} =~ s/ \[.+\]//; if ($record->{'TSeq_length'} < $removeSmall || $record->{'TSeq +_length'} > $removeLarge) { next; } split_seq(\$record->{'TSeq_sequence'}); $dataHolder->{$record->{'TSeq_gi'}} = $record; } } if ($cdHit < 1) { CdHit($dataHolder, $cdHit); } foreach (values %$dataHolder) { my $record = $_; my $taxid = $record->{'TSeq_taxid'}; if (! exists $taxon->{$record->{'TSeq_taxid'}}) { $taxon->{$record->{'TSeq_taxid'}} = Taxonomy($record->{'TS +eq_taxid'}); } my $abb; if (exists $record->{'TSeq_orgname'}) { if ($record->{'TSeq_orgname'} =~ /([A-Z])\w+\s([a-z][a-z]) +\w*/) { $abb = $1.$2; } else { $abb = 'Orf'; } } else { $record->{'TSeq_orgname'} = 'unknown'; $abb = 'Orf'; } if (! exists $abbPool->{$abb}) { $abbPool->{$abb} = 1; $abb .= '1'; } else { $abbPool->{$abb} ++; $abb .= $abbPool->{$abb}; } if (exists $taxon->{$taxid}->{'genus'}) { $genus->{$taxon->{$taxid}->{'genus'}->{'Name'}} = 'n'; } print OT "$abb\t"; # print OT "$description\t"; print OT "$record->{'TSeq_Description'}\t"; print OT "$record->{'TSeq_orgname'}\t"; print OT "$record->{'TSeq_length'}\t"; print OT "$record->{'TSeq_gi'}\t"; print OT "$taxon->{$taxid}->{'group'}\t"; print OT "$taxon->{$taxid}->{'superkingdom'}->{'Name'}\t"; # print "$taxon->{$taxid}->{'species'}->{'Name'}\t"; # print "$record->{'TSeq_gi'}\t$group\t$taxid\t$genus\t$class\t +$phylum\t$kingdom\t$superkingdom"; print OT "\n"; my $acc = (exists $record->{'TSeq_accver'}) ? $record->{'TSeq_ +accver'} : $record->{'TSeq_sid'}; print OF "\>$abb gi\|$record->{'TSeq_gi'}\|$acc $record->{'TSe +q_defline'}\n"; print OF "$record->{'TSeq_sequence'}\n"; } print "\n\n"; close IF; close OF; close OT; my $in16S = '/Users/saierlab/db/all_16S_rRNA.faa'; open (I16S,"$in16S"); my $inList = 'no'; my $genus16S = ''; while(<I16S>) { if ($_ =~ /\>(\w+)\s/) { $genus16S = $1; } if (exists $genus->{$genus16S}) { print O16S $_; $genus->{$genus16S} = 'p'; next; } } my $no16S = 0; foreach (keys %$genus) { # print "$genus->{$_}\n"; if ($genus->{$_} eq 'n') { $no16S++; print "$_\n"; next; } } if ($no16S) { # print $no16S; print "The genus list above do not have 16S rRNA in our database\n +\n"; } close I16S; close O16S; sub Taxonomy { my ($taxid) = @_; my $taxDB = {}; tie %$taxDB, "DB_File","/Users/saierlab/db/Taxonomy.db"; # tie %$taxDB, "DB_File",".Taxonomy.db"; unless (exists $taxDB->{$taxid}) { print "get taxonomy id $taxid information from NCBI...\n"; my $url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch +.fcgi?db=taxonomy&retmode=xml&id=$taxid"; my $taxOri = XMLin(get($url)); foreach (@{$taxOri->{'Taxon'}->{'LineageEx'}->{'Taxon'}}) { $taxDB->{$taxid} .= "$_->{'Rank'}\t$_->{'ScientificName'}\ +t$_->{'TaxId'}\t"; } if (exists $taxDB->{$taxid}) { $taxDB->{$taxid} =~ s/\t$//; # $taxDB->{$taxid} = "superkingdom\tunknown\t12908"; # } else { # Taxonomy($taxDB); } } return SplitTAX($taxDB->{$taxid}); } sub SplitTAX { my ($string) = @_; my $tax = {}; my @array = split(/\t/,$string); my $name = ''; my $rank = ''; my $id = ''; while (@array) { $rank = shift @array; $name = shift @array; $id = shift @array; if ($rank && $name && $id) { $tax->{$rank}->{'Name'} = $name; $tax->{$rank}->{'TaxId'} = $id; } $rank = ''; $name = ''; $id = ''; } if (exists $tax->{'superkingdom'}) { if (exists $tax->{'kingdom'}) { $tax->{'group'} = $tax->{'kingdom'}->{'Name'}; } elsif (exists $tax->{'phylum'}) { $tax->{'group'} = ($tax->{'phylum'}->{'Name'} eq 'Proteoba +cteria' && exists $tax->{'class'}) ? $tax->{'class'}->{'Name'} : $tax->{'phylum'}->{'Name' +}; } elsif (exists $tax->{'class'}) { $tax->{'group'} = $tax->{'class'}->{'Name'}; } elsif (exists $tax->{'family'}) { $tax->{'group'} = $tax->{'family'}->{'Name'}; } else { $tax->{'group'} = 'none'; } } else { $tax->{'superkingdom'}->{'Name'} = 'Unclassified'; $tax->{'superkingdom'}->{'TaxId'} = 12908; $tax->{'group'} = 'none'; } return $tax; } sub SimpleXML { my ($array) = @_; unless ($array) { return ''; } my $hash; foreach my $in (@$array) { chomp $in; if ($in =~ /^(\s*)\<(\S+)\>(.+)\<\/(\S+)\>/) { $hash->{$2} = $3; # print "$1 $2 $3 $4\n"; next; } if ($in =~ /^(\s*)\<(\S+)\>$/) { # print "$1 $2\n"; next; } } return $hash; } sub split_seq { for (my $i = 1; $i < (length(${$_[0]}) / 71); $i++) { substr ${$_[0]}, 71 * $i -1 , 0, "\n"; } } sub CdHit { my ($dataHolder, $cdHit) = @_; $cdHit = ($cdHit < 0.7 ) ? 0.7 : $cdHit; my $coHitHolder = []; open (OS,">temp_CdHit.in"); foreach my $gi (keys %$dataHolder) { my $record = $dataHolder->{$gi}; my $acc = (exists $record->{'TSeq_accver'}) ? $record->{'TSeq_ +accver'} : $record->{'TSeq_sid'}; print OS "\>gi\|$record->{'TSeq_gi'}\|$acc $record->{'TSeq_def +line'}\n"; print OS "$record->{'TSeq_sequence'}\n"; } system ("cd-hit -i temp_CdHit.in -o temp_CdHit.out -d 50 -c $cdHit +"); open (Ocd, "temp_CdHit.out.clstr"); my $group = -1; while (<Ocd>) { if ($_ =~ /gi\|(\d+)\|.+ (\d+\%|\*)$/) { push @{$coHitHolder->[$group]}, $1; # print "$1 \t$dataHolder->{$1}->{'TSeq_taxid'}\t"; # print "$dataHolder->{$1}->{'TSeq_length'}\t$2\t"; # print "$dataHolder->{$1}->{'TSeq_orgname'}\t"; # print "\n"; # delete $dataHolder->{$1}; } elsif ($_ =~ /\>Cluster/) { # print "\n$_"; $group ++; } } close Ocd; system ("rm -rf temp_CdHit*"); foreach my $cluster (@$coHitHolder) { @$cluster = sort {$dataHolder->{$a}->{'TSeq_taxid'} <=> $dataH +older->{$b}->{'TSeq_taxid'}} @$cluster; my $clsSize = (@$cluster); if ($clsSize > 1 ) { for (my $i = 1; $i < $clsSize; $i++) { # print "$cluster->[$i] "; delete $dataHolder->{$cluster->[$i]}; } } # print $dataHolder->{$cluster->[0]}->{'TSeq_taxid'}; # print $cluster->[0]; # print "@$cluster "; } }

Replies are listed 'Best First'.
Re: readline on closed filehandle error
by jwkrahn (Abbot) on Jan 24, 2012 at 00:41 UTC
    my $in16S = '/Users/saierlab/db/all_16S_rRNA.faa'; open (I16S,"$in16S"); my $inList = 'no'; my $genus16S = ''; while(<I16S>) {

    You should verify that open worked correctly before trying to use an invalid filehandle:

    my $in16S = '/Users/saierlab/db/all_16S_rRNA.faa'; open I16S, '<', $in16S or die "Cannot open '$in16S' because: $!"; my $inList = 'no'; my $genus16S = ''; while ( <I16S> ) {

      I second this .. if open fails and you don't catch it the read on the filehandle when running with warnings will produce the error "readline() on closed filehandle". Good to see that you have warnings on +1 to that ;)

      Kiel R Stirling
Re: readline on closed filehandle error
by ww (Archbishop) on Jan 24, 2012 at 00:56 UTC
    Cut your code down to a minimal (preferably 15 lines or less), compliable script which reproduces the problem. At that point, you have a far better chance that the (busy) Monks will be able to take the time to help...

    ... and, in doing so, you may well discover why the 317 line versions fails. This is way too much code for a very basic question.

    But, in the interest of demonstrating our desire to help you learn, you should modify your code to test the open at 137. Instead of

    open (I16S,"$in16S");

    test the open (and specify that you are opening for a read):

    open (I16S, "<", "$in16S" or die "Can't open $fh for read: $!;"

    And while you're at it, look at using variables instead of raw filehandles; e.g., $fh as the first element insde the parens) and (perhaps; as I said, 300+ lines is too much of a burden) use a meaningful name for the var, $in16S.

Re: readline on closed filehandle error
by Khen1950fx (Canon) on Jan 24, 2012 at 03:38 UTC

    I condensed your script down to the offending part. It still needs some work, but the readline warning disappeared.

    #!/usr/bin/perl use strict; no strict 'refs'; use warnings; my $in16S = '/root/Desktop/pro.faa'; open I16S, '<', $in16S; my %r_genus16S; my ($genus16s) = <I16S>; while ( defined( ${'genus16S'} = readline(*I16S) ) ) { if ( $_ =~ />(\w+)\s/ ) { ${'genus16S'} = $1; } if ( exists $${'genus16S'}{ ${'genus16S'} } ) { print { 'I16S'; } $_; $${'genus16S'}{ ${'genus16S'} } = 'p'; next; } } my $no16S = 0; foreach $_ ( keys %${ 'r_genus16S'; } ) { if ( $${'genus16S'}{$_} eq 'n' ) { ++$no16S; print "$_\n"; next; } } close I16S;
      open I16S, '<', $in16S;

      You don't test that open succeeded so this code has the same problem as the OP's code.

      my %r_genus16S;

      You don't use this variable anywhere so why are you creating it?

      my ($genus16s) = <I16S>;

      That reads the entire file and assigns the first line to $genus16s.

      while ( defined( ${'genus16S'} = readline(*I16S) ) ) {

      Which means that this loop will never execute because I16S is already at EOF at this point.    And WTF is up with ${'genus16S'}?