#!/usr/bin/perl -w #This code joins randomly haplotypes two by two from the output given by Eli's program with an arbitrary number of populations. It needs a 2 allele output. It is the infile for R calculation. if ( @ARGV != 1 ) { print "incorrect usage ---- TYPE IN COMMAND LINE: perl eli+.pl infile\n"; exit(); } print "What's the OUTFILE?\n"; $OUTFILE = ; open (OUT, ">$OUTFILE") or die "could not create $OUTFILE\n"; open (IN, $ARGV[0]); #open the first argument #transform the input list in an array of arrays @total = (); @haplotypes=(); $currentPop = 0; $sampleSize={}; # This will hold the sample size for each population while() { chomp; if (/^(\-{0,1}\d+\t-{0,1}\d+)/) { $sampleSize{$currentPop}++; @temp = split; push @{$haplotypes{$currentPop}}, [@temp]; }elsif (/segsites: (\d+)/) { @{$TempList{$currentPop}}=(); while(@{$haplotypes{$currentPop}}) { push(@{$TempList{$currentPop}}, splice(@{$haplotypes{$currentPop}}, rand(@{$haplotypes{$currentPop}}), 1)) } @{$haplotypes{$currentPop}} = @{$TempList{$currentPop}}; push @total, $currentPop; $currentPop++; } } foreach $_ (@total) { print "Population $_\n"; print OUT "Population $_\n"; print "$sampleSize{$_}\n"; for($i = 0; $i < $sampleSize{$_}/2; $i++) { @pair =(); @pair = splice @{$haplotypes{$_}}, 0, 2; print "$pair[0][0]\t$pair[1][0]\t$pair[0][1]\t$pair[1][1]\t\n"; print OUT "$pair[0][0]\t$pair[1][0]\t$pair[0][1]\t$pair[1][1]\t\n"; } } print "$currentPop\n";