#!/usr/bin/perl
use DBM::Deep;
use Getopt::Std;
# Check for pre-existing output files and die if they exist
if (-e "present"|| -e "absent") {
die "Remove existing ouput files before running script!";
}
# Define command syntax for output to screen in case of user error
my $syntax = "\nCommand Syntax: \n\nihcrdb -i -b \n\n";
# Define hash for storage of command line arguments and define single-letter switches to accept
my %arghash = ();
getopts("i:b:", \%arghash);
# If all necessary arguments are not defined on command line, die with error message and syntax output to screen
unless (defined ($arghash{i} && $arghash{b})) {
die "Insufficient commmand line arguments supplied! Quitting...\n $syntax";
}
# Define input file, output file and blast database = assign relevant arghash values to them
(my $input, my $background) = ($arghash{i}, $arghash{b});
# Define scalar variable to hold ref to Deep DB
my $db = new DBM::Deep "CRDB";
# Get hash from DB
my %pahash = %{$db->{hash}};
# Open input file or die
open (INPUT, $input) or die "Cannot open infile!$!";
# Enter while loop for file parse
while () {
# Skip header and Affy control lines
next if (/^\s*$/) || (/^Gene/) || (/^AFFX/) || (/^2000/);
# Split line on tabs, assign to array and chomp
chomp (my @linearray = split "\t", $_);
# Extract 3 required values
my $name = shift @linearray;
my $signal = shift @linearray;
my $affycall = shift @linearray;
# Increment Present count fot sequence if above bg and present else increment absent count
if ($affycall eq "P" && $signal>$background) {
$pahash{$name}->[0]++;
}
else {
$pahash{$name}->[1]++;
}
}
# Open present and absent output files
open (PRESENT, ">present");
open (ABSENT, ">absent");
# Print sequence name and number of calls to output files. Output as present if EVER called present
foreach my $key (sort keys %pahash) {
if (defined $pahash{$key}->[0]) {
print PRESENT "$key\t$pahash{$key}->[0] present calls";
if (defined $pahash{$key}->[1]) {
print PRESENT "\t$pahash{$key}->[1] absent calls\n";
}
else {print PRESENT "\n";}
}
else {
print ABSENT "$key\t($pahash{$key}->[1] absent calls)\n";
}
}
# Reassociate updated hash with stored DB
$db->{hash} = \%pahash;
# Close all files and exit
close INFILE;
close PRESENT;
close ABSENT;
exit;