uninitialized split value

etheral has asked for the wisdom of the Perl Monks concerning the following question:

####################################################################
##                               MAIN
####################################################################

print "Reading a file...\n";

my $file = get_file('pdb244l.ent'); #execute get_file subroutine for a
+ given input file
                                    #and store the data within $file

print "Parsing a file...\n";

my %record_types = parse_pdb_file($file);

print "Getting data...\n";

$record_types{'HELIX'} ? my %helix = get_helix($record_types{'HELIX'})
+ : ();

$record_types{'SHEET'} ? my %sheet = get_sheet($record_types{'SHEET'})
+ : ();

$record_types{'TURN'}  ? my %turn  = get_turn($record_types{'TURN'})  
+: ();

my %chains = extractSEQRES($record_types{'SEQRES'});

exit;

####################################################################
##                               SUBS
####################################################################

sub get_file { #to read from a file

  my ($input_file) = @_;
  
  open (IN, $input_file) || die "Cannot open $input_file for reading: 
+$OS_ERROR\n"; #open a filehandle or die  
  
  my $sequence = ''; 
  
  foreach my $line (<IN>) { #for each line in the filehandle IN

    $sequence .= $line # add (concatenate) to a string sequence 
      
  }
  
  return $sequence; #return the string sequence
  
  close (IN);

}

sub parse_pdb_file { #to return a hash with keys that are record type 
+names and 
                     #values that are scalar containing lines for that
+ record type
  
  my @file = @_;

  my %record_types = ();
  
  foreach my $line (@file) {
    
    my ($record_type) = ($line =~ /^(\S+)/); #the pattern (\S+) is ret
+urned and saved in $recordtype
    
    if (defined $record_types{$record_type} ) {
      
      $record_types{$record_type} .= $line;
      
    } else {
      
      $record_types{$record_type} = $line; #.= fails if a key is undef
+ined
      
    }
    
  }
  
  return %record_types;
  
}

sub get_helix { #to return a scalar containing H if given a scalar con
+tains HELIX line

  my($helix) = @_;

  my @record = split ( /\n/, $helix);
  
  my %chain_hash = ();
  
  foreach my $line (@record) {
    
    my($this_chain) = stripspaces(substr($line, 19, 1));
    
    my($start) = stripspaces(substr($line, 21, 4));
    
    my($length) = stripspaces(substr($line, 71, 5));
    
    if (defined $chain_hash{$this_chain}) {
      
      $chain_hash{$this_chain} .= ':' . 'H' x $length . ";$start";
      
    } else {
      
      $chain_hash{$this_chain} = 'H' x $length . ";$start";
      
    }
  
  }
  
  return %chain_hash;

}

sub get_sheet  { #to return a scalar containing H if given a scalar co
+ntains SHEET line

  my($sheet) = @_;
  
  my @record = split ( /\n/, $sheet);
  
  my %chain_hash = ();
  
  foreach my $line (@record) {
    
    my($this_chain) = stripspaces(substr($line, 21, 1));
    
    my($start) = stripspaces(substr($line, 22, 4));
    
    my($end) = stripspaces(substr($line, 33, 4));

    my($length)    = $end - $start + 1;
    
    if (defined $chain_hash{$this_chain}) {
     
      $chain_hash{$this_chain} .= ':' . 'S' x $length . ";$start";

    } else {
       
      $chain_hash{$this_chain} = 'S' x $length . ";$start";

    }
   
  }

  return %chain_hash;

}

sub get_turn { #to return a scalar containing H if given a scalar cont
+ains TURN line

  my($turn) = @_;

  my @record = split ( /\n/, $turn);

  my %chain_hash = ();
    
  foreach my $line (@record) {
   
    my($this_chain) = stripspaces(substr($line, 19, 1));
    
    my($start) = stripspaces(substr($line, 20, 4));
    
    my($end) = stripspaces(substr($line, 31, 4));

    my($length)    = $end - $start + 1;
    
    if (defined $chain_hash{$this_chain}) {

      $chain_hash{$this_chain} .= ':' . 'T' x $length . ";$start";

    } else {
       
      $chain_hash{$this_chain} = 'T' x $length . ";$start";

    }
   
  }

  return %chain_hash;

}

sub stripspaces { #to remove leading and trailing spaces

  my($string) = @_;

  $string =~ s/^\s*//;

  $string =~ s/\s*$//;

  return $string;

}

sub extractSEQRES {
  
  my ($seqres) = @_;
  
  my $lastchain;
  
  my $sequence = '';
  
  my %results = ();
  
  my @record = split(/\n/, $seqres);
  
  foreach my $line (@record) {
    
    my ($thischain) = stripspaces(substr($line, 11, 1));
    
    my ($residues)  = substr($line, 19, 52);    
    
    if (not defined $lastchain) {
      
      $sequence = $residues;
      
    } elsif ("$thischain" eq "$lastchain") {
      
      $sequence .= $residues;
      
    } elsif ($sequence) {
      
      $results{$lastchain} = $sequence;
      
      $sequence = $residues;
      
    }
    
    $lastchain = $thischain;
    
  }
  
  $results{$lastchain} = $sequence;
  
  return %results;
  
}
[download]

When I run this code, I get an error Use of uninitialized value $seqres in split at ./PDB_secondary_ex11.5.pl line 255, <IN> line 1726. Use of uninitialized value $lastchain in hash element at ./PDB_secondary_ex11.5.pl line 283, <IN> line 1726. Both errors refer to extractSEQRES sub, the rest of the code is fine. My question is what is wrong, please enlighten me:) Please take notice that

my($turn) = @_;

  my @record = split ( /\n/, $turn);
[download]

is ok, and the same statement in sub extractSEQRES is not, yet BOTH exist in record_types hash.

Comment on uninitialized split value Select or Download Code

Replies are listed 'Best First'.
Re: uninitialized split value by toolic (Bishop) on Oct 17, 2011 at 13:53 UTC
Use of uninitialized value $seqres in split at You would get that error if the `%record_types` hash does not have a `SEQRES` key: `use Data::Dumper; print Dumper(\%record_types);` [download] Basic debugging checklist	[reply] [d/l] [select]
Re^2: uninitialized split value by etheral (Acolyte) on Oct 17, 2011 at 14:45 UTC
I know. The funny thing is it does have those keys. Paranormal activity here??	[reply]
Re^3: uninitialized split value by toolic (Bishop) on Oct 17, 2011 at 14:56 UTC
http://sscce.org	[reply]
Re^4: uninitialized split value by etheral (Acolyte) on Oct 17, 2011 at 16:20 UTC
Re^5: uninitialized split value by Marshall (Canon) on Oct 17, 2011 at 16:35 UTC
Re: uninitialized split value by Anonymous Monk on Oct 17, 2011 at 14:03 UTC
use diagnostics/ splain, (W uninitialized) Basically, you're treating $seqres, $line, and all the substrings you create , as if they're defined Something else that is peculiar, you're calling stripspaces on a single character, is that what you want? And also that tertiary syntax usage seems very bizzare `$ perl -MO=Deparse,-p $record_types{'HELIX'} ? my %helix = get_helix($record_types{'HELIX'}) + : (); __END__ ($record_types{'HELIX'} ? (my(%helix) = get_helix($record_types{'HELIX +'})) : ());` [download] easier written as `$record_types{'HELIX'} and my %helix = get_helix($record_types{'HELIX'} );`	[reply] [d/l] [select]
Re: uninitialized split value by GrandFather (Saint) on Oct 17, 2011 at 22:26 UTC
If I replace the get_file sub with: `sub get_file { #to read from a file return <<SEQUENCE; #return the string sequence SEQUENCE }` [download] I get: `Reading a file... Use of uninitialized value $record_type in hash element at noname1.pl +line 53. Parsing a file... Getting data... Use of uninitialized value $record_type in hash element at noname1.pl +line 59. Use of uninitialized value $seqres in split at noname1.pl line 194. Use of uninitialized value $lastchain in hash element at noname1.pl li +ne 222.` [download] which is consistent with all those replies saying you need to handle blank lines. Adding: `next if $line !~ /\S/;` [download] as the first line of the for loop in parse_pdb_file cleans up 1/2 the warnings. Adding: `return if ! defined $seqres \|\| $seqres !~ /\S/;` [download] as the second code line of extractSEQRES fixes the rest. A better analysis of the code than I have done may suggest better places to detect and handle unexpected file content. True laziness is hard work	[reply] [d/l] [select]
Re: uninitialized split value by jwkrahn (Abbot) on Oct 17, 2011 at 20:58 UTC
my $file = get_file('pdb244l.ent'); #execute get_file subroutine for a + given input file ... my %record_types = parse_pdb_file($file); ... sub get_file { #to read from a file my ($input_file) = @_; open (IN, $input_file) \|\| die "Cannot open $input_file for reading: +$OS_ERROR\n"; #open a filehandle or die my $sequence = ''; foreach my $line (<IN>) { #for each line in the filehandle IN $sequence .= $line # add (concatenate) to a string sequence } return $sequence; #return the string sequence close (IN); } ... sub parse_pdb_file { #to return a hash with keys that are record type +names and #values that are scalar containing lines for that + record type my @file = @_; my %record_types = (); foreach my $line (@file) { my ($record_type) = ($line =~ /^(\S+)/); #the pattern (\S+) is ret +urned and saved in $recordtype if (defined $record_types{$record_type} ) { $record_types{$record_type} .= $line; } else { $record_types{$record_type} = $line; #.= fails if a key is undef +ined } } return %record_types; } [download] You are reading in the file as a single block of data but then you are parsing the file as if it were separated into lines which means that you are only populating `%record_types` from the first line of the file.	[reply] [d/l] [select]