sub load_files() { #get a list of all files in directory; ignore all files beginning with a . and other sub directories opendir(my $dh, $dirname) or die "can't opendir $dirname: $!"; my @files = grep (/^[^\.]/ && -f "$dirname/$_", readdir($dh)); #only keep those not beginning with '.' and are files @files = sort(@files); #sort lexically, 'B' comes before 'a', so that output list is always in same order closedir $dh; my $numfiles = 0; foreach my $file (@files) { #loop through the files $numfiles++; my $accefound = 0; my $namefound = 0; my $monofound = 0; my $chargefound =0; open(my $file_fh, "< $dirname/$file") or die("$$: Error: failed to open file $dirname/$file. $!\n"); while(<$file_fh>) { #read each line of file if (/()(.+)(<\/weight>)/ && !$monofound) { #if first encounter with the tag $monofound = $2; $monofound =~ s/^\s+//; #trim leading whitespace of string $monofound =~ s/\s+$//; #trim trailing whitespace of string } elsif (/()(.+)(<\/name>)/ && !$namefound) { #if first encounter with the tag $namefound = $2; $namefound =~ s/^\s+//; #trim leading whitespace of string $namefound =~ s/\s+$//; #trim trailing whitespace of string } elsif (/()(.+)(<\/accession>)/ && !$accefound) { #if first encounter with the tag (the tag might not be unique) $accefound = $2; $accefound =~ s/^\s+//; #trim leading whitespace of string $accefound =~ s/\s+$//; #trim trailing whitespace of string } elsif (/()(.+)(<\/formal_charge>)/ && !$chargefound) { #if first encounter with the tag $chargefound = $2; $chargefound =~ s/^\s+//; #trim leading whitespace of string $chargefound =~ s/\s+$//; #trim trailing whitespace of string } } print "$monofound\t$namefound\t$accefound\t$chargefound\n"; close($file_fh) or die("$$: Error: failed to close file $dirname/$file. $!\n"); } } main();