Hello Monks
I'm going through one to many file(s) that contain chromosome information. I am processing this by chromosome, therefore when I reach a new chromosome in the file, I need to rewind one line in the file in order to not skip processing the first line of the next chromosome. I've tried this several different ways using FileHandle and IO::File, but none of them seem to produce the desired output.
Here's the IO::File version of the code:
use IO::File;
use IO::Seekable;
use Data::Dumper;
use strict;
use warnings;
my @files = qw( test_input.txt);
my @fileHndls;
foreach my $file (@files) {
if ( ! -s $file ) {
die "File $file does not exist! Check the command line or
+the pedigree file for errors!\n";
}
my $fh = new IO::File;
if ( $file =~ /.*\.bz2/ ) {
if ( $fh->open("bzcat $file |")) {
push @fileHndls, $fh;
} else {
die "Could not uncompress file $file on "
. "the fly!\n";
}
} elsif ( $file =~ /.*\.gz/ ) {
if ( $fh->open("gunzip -c $file |")) {
push @fileHndls, $fh;
} else {
die "Could not uncompress file $file on "
. "the fly!\n";
}
} else {
if ( $fh->open("< $file")) {
push @fileHndls, $fh;
} else {
die "Could not open file $file!\n";
}
}
}
my @chromosomes = qw( chr1 chr2 chr3 chr4 chr5 chr6
chr7 chr8 chr9 chr10 chr11 chr12
chr13 chr14 chr15 chr16 chr17
chr18 chr19 chr20 chr21 chr22
chrX chrY chrM );
foreach my $cChrom (@chromosomes) {
print "processing chrom $cChrom\n";
my ($cIndex) = grep { $chromosomes[$_] eq $cChrom } 0..$#chromosome
+s;
print "filehandles for $cChrom:\n";
print Dumper \@fileHndls;
my $fileHndlsReturn = process($cChrom, $cIndex, \@fileHndls);
@fileHndls = @$fileHndlsReturn;
}
exit;
sub process {
my $currChrom = shift;
my $currIndex = shift;
my $fileHandlesRef = shift;
my @fileHandles = @$fileHandlesRef;
my @newFileHandles;
for (my $i = 0; $i <= $#fileHandles; $i++) {
print " processing file $i\n";
my $fh = $fileHandles[$i];
print Dumper $fh;
while (1) {
last if ( $fh->eof() );
$_ = <$fh>;
next if ($_ =~ /^#|^\s*$|^>locus/ );
print "$_";
my @fields = split;
if ( $fields[3] ne $currChrom ) {
my ($chrIndex) = grep { $chromosomes[$_] eq $fields[3] } 0..$#
+chromosomes;
print "chrIndex $chrIndex currIndex $currIndex\n";
if ($chrIndex > $currIndex) {
print "skipping rest of file because greater than current ch
+romosome $currChrom\n";
$fh->seek(-1,1);
$newFileHandles[$i] = $fh;
last;
} else {
print "skipping chrom cuz not current. currChrom $currChrom
+ line chromosome: $fields[3]\n";
next;
}
}
} # end while
} # end foreach fh
return \@newFileHandles;
}
I've also uploaded this at:
here
A test input file is located at:
here
Thanks for your help.