#!/usr/bin/env perl
use strict;
use warnings;
use autodie;
use constant {
GENE_FILE => 'pm_1192233_genes.txt',
MISSING_FILE => 'pm_1192233_missing.txt',
OUT_PREFIX => 'pm_1192233_out_',
};
my @ano_files = @ARGV;
my ($gene_list, $gene_re) = get_gene_data(GENE_FILE);
for my $ano_file (@ano_files) {
open my $fh, '<', $ano_file;
while (<$fh>) {
print { get_out_fh($1) } $_ if /$gene_re/;
}
}
gen_missing_report($gene_list);
{
my %fh_for;
sub get_out_fh {
unless (exists $fh_for{$_[0]}) {
open $fh_for{$_[0]}, '>', OUT_PREFIX . $_[0];
}
return $fh_for{$_[0]};
}
sub gen_missing_report {
my ($genes) = @_;
open my $fh, '>', MISSING_FILE;
print $fh "--- START MISSING LIST ---\n";
print $fh "$_\n" for grep { ! exists $fh_for{$_} } @$genes;
print $fh "--- END MISSING LIST ---\n";
}
}
sub get_gene_data {
my ($file) = @_;
my @list;
open my $fh, '<', $file;
push @list, $_ while <$fh>;
chomp @list;
@list = sort { length $b <=> length $a } @list;
my $alt = join '|', @list;
return (\@list, qr{($alt)});
}
####
$ cat pm_1192233_genes.txt
ACMSD
CRYM
ARIB1B
GENE_NOT_ANOTATED
ALSO_ABSENT
$ cat pm_1192233_anot_1.txt
ACMSD 1
XXXX 1
CRYM 1
$ cat pm_1192233_anot_2.txt
ARIB1B 2
YYYY 2
ACMSD 2
$ cat pm_1192233_anot_3.txt
WWWW 3
CRYM 3
ARIB1B 3
ZZZZ 3
####
$ pm_1192233_multi_file_output.pl pm_1192233_anot_*
$
####
$ cat pm_1192233_out_ACMSD
ACMSD 1
ACMSD 2
$ cat pm_1192233_out_ARIB1B
ARIB1B 2
ARIB1B 3
$ cat pm_1192233_out_CRYM
CRYM 1
CRYM 3
$ cat pm_1192233_missing.txt
--- START MISSING LIST ---
GENE_NOT_ANOTATED
ALSO_ABSENT
--- END MISSING LIST ---