#!/usr/bin/perl -w use strict; use warnings; use threads; use threads::shared; use Statistics::R; # using R to do t.test if(@ARGV != 3) { print STDERR "Usage: test_mt.pl input_list1 input_list2 output\n"; exit(0); } my ($inf1, $inf2, $outf)=@ARGV; my @inf1=`ls $inf1`; # get all of files in list1 my @inf2=`ls $inf2`; # get all of files in list2 my %hash :shared; sub read_file{ my $inf=shift; $hash{$inf} = &share({}); open(IN, $inf) or die "cannot open $inf\n"; while(){ if($_=~/\w/){ chomp; my @info=split(/\t/, $_); # lock(%hash); $hash{$inf}{$info[0]."_".$info[1]}=$info[2]; } } close IN; } my @threads; my $thread_count=0; for(my $i=0; $i<=$#inf1; $i++){ my $t = threads->create(\&read_file, $inf1[$i]); push(@threads, $t); $thread_count++; } for(my $i=0; $i<=$#inf2; $i++){ my $t = threads->create(\&read_file, $inf2[$i]); push(@threads, $t); $thread_count++; } print STDERR "Total threads to read the files: $thread_count\n"; $_->join foreach @threads; sleep 1; open(OUT, ">$outf") or die "cannot open $outf\n"; # Create a communication bridge with R and start R my $R = Statistics::R->new(); ### below is to do some R related calculation based on the hash; the problem should exist above :) open(IN, $inf1[0]) or die "cannot open $inf1[0]\n"; while(){ if($_=~/\w/){ my @info=split(/\t/, $_); my $cpg_score; my $total1; my $total2; my @list1; my @list2; foreach my $sample (@inf1){ $cpg_score.="\t".$hash{$sample}{$info[0]."_".$info[1]}; $total1+=$hash{$sample}{$info[0]."_".$info[1]}; push(@list1, $hash{$sample}{$info[0]."_".$info[1]}); } foreach my $sample (@inf2){ $cpg_score.="\t".$hash{$sample}{$info[0]."_".$info[1]}; $total2+=$hash{$sample}{$info[0]."_".$info[1]}; push(@list2, $hash{$sample}{$info[0]."_".$info[1]}); } if(abs($total1/@sample1 - $total2/@sample2)>=0.2){ my $mean1=sprintf("%.2f", $total1/@inf1); my $mean2=sprintf("%.2f", $total2/@inf2); my $list1=join",", @list1; my $list2=join",", @list2; ### Run R commands $R->run(qq`x <- t.test(c($list1), c($list2))`); my $p_value= $R -> get('x$p.value'); print OUT "$info[0]\t$info[1]$cpg_score\t$mean1\t$mean2\t$p_value\n"; } } } $R->stop(); close IN; close OUT;