aquinom has asked for the wisdom of the Perl Monks concerning the following question:
#!/usr/bin/perl use strict; use warnings; use Pod::Usage; use Getopt::Long; my $KG_FILE; my $OUTFILE; my $KNOME_FILE; GetOptions( '1kg=s' =>\$KG_FILE, 'knome=s' =>\$KNOME_FILE, 'o=s' =>\$OUTFILE, 'help' =>sub{pod2usage(1);} ) or pod2usage(2); =head SYNOPSIS putLinesBackinVCF.pl Options: -1kg 1000 genomes input vcf file -knome knome file with sites -o output file -help display this message =cut if (!$KG_FILE){ pod2usage(2); die "Please supply an input 1000 genomes file\n"; } if(!$KNOME_FILE){ pod2usage(2); die "Please supply our sites file\n"; } if(!$OUTFILE){ pod2usage(2); die "Please supply an output file\n"; } unless(open( KGFILE, $KG_FILE)){ die "Couldn't create link to 1kg file: $!\n"; } unless(open( KNOME, $KNOME_FILE)){ die "Couldn't create link to knome input file: $!\n"; } unless (open( OUTFILE,">", $OUTFILE)){ die "Couldn't create link to output file: $!\n"; } my $pos = 0; while (<KGFILE>){ if ($_ =~ /^#/){ next; } # skip header lines... my ($kgLine) = $_; chomp $kgLine; my @kgArr = (split("\t", $kgLine))[0,1]; #grab the chrom and posit +ion columns my $chr; my @arr; LOOP: { do{ # go through the KNOME file; # print out the positions till we reach (or exceed) the position f +rom the KGFILE my $line = <KNOME>; if (not defined $line) { last; } chomp $line; @arr = (split("\t", $line))[0,1]; $arr[0] =~ s/^0//; $chr = $arr[0]; #remove leading 0's from chromosome number $pos = $arr[1]+1; #to adjust positions to base 1 rather than + base 0 if ($pos < $kgArr[1]){ print OUTFILE "$chr\t$pos\n"; #probably unncessary if sta +tement but shouldn't hurt anything even if so } } while $pos < $kgArr[1]; } if($kgArr[1] < $pos){ #at this point the position from KNOME is + no longer LESS THAN position from KGFILE print OUTFILE $kgLine,"\n"; while ($kgArr[1] < $pos){ #if last position from KNOME is GREATER THAN position from KGFILE we n +ow need to loop through #and print all the lines from KGFILE until we catch back up to KNOME's + position! my $catchup = (<KGFILE>); chomp $catchup; @kgArr = (split("\t", $catchup))[0,1]; if ($kgArr[1] == $pos){ #If we get to the point where th +ey're equal, good... just print, this while loop will end, and we'll +start the whole process over print OUTFILE $catchup,"\n"; } elsif($kgArr[1] > $pos){ print OUTFILE "$chr\t$pos\n"; redo LOOP ; last; #print OUTFILE "$catchup\n"; #if KGFILE's position has become GREATER than the KNOME $pos again, ho +wever, it gets tricky... # but now we need to go through KNOME file positions again and see if +that position matches later on... # can't just quit the loop or this line will be lost, but we don't rea +lly want to print it here either... # } } } elsif($kgArr[1] == $pos){ print OUTFILE $kgLine,"\n"; } } #if we get to the end of the KGFILE and there's still lines left in th +e KNOME file, just print the rest of those lines chr and pos values; while (<KNOME>){ chomp; my @remainingArr = (split("\t", $_))[0,1]; $remainingArr[0] =~ s/^0//; my $remChr = $remainingArr[0]; my $remPos = $remainingArr[1]+1; print OUTFILE "$remChr\t$remPos\n"; }
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: Trouble with do{}while LABELS
by ikegami (Patriarch) on Aug 09, 2011 at 22:09 UTC | |
by Anonymous Monk on Aug 09, 2011 at 23:45 UTC | |
|
Re: Trouble with do{}while LABELS
by duyet (Friar) on Aug 09, 2011 at 20:11 UTC | |
by aquinom (Sexton) on Aug 09, 2011 at 20:52 UTC | |
by aquinom (Sexton) on Aug 09, 2011 at 20:23 UTC | |
|
Re: Trouble with do{}while LABELS
by cjb (Friar) on Aug 09, 2011 at 21:56 UTC |