#!/usr/bin/env perl
use 5.28.0;
use warnings;
use autodie;
use Text::CSV;
use constant {
SEP_CHAR => "\t",
BED_CHR => 0,
BED_START => 1,
BED_END => 2,
BED_STRAND => 5,
BED_LINE => 6,
};
my (%start_plus, %start_minus, %start_dud);
my %dispatch = (
'+' => sub { ++$start_plus{$_[BED_CHR]}{$_[BED_START]} },
'-' => sub { ++$start_minus{$_[BED_CHR]}{$_[BED_END]} },
'0' => sub { $start_dud{$_[BED_CHR]}{$_[BED_LINE]} = $_[BED_STRAND] },
);
my $csv = Text::CSV::->new({sep_char => SEP_CHAR});
while (my $row = $csv->getline(\*DATA)) {
next if 0 == index $row->[0], '#';
my $sign = exists $dispatch{$row->[BED_STRAND]} ? $row->[BED_STRAND] : 0;
$dispatch{$sign}->(@$row, $.);
}
# This part for testing and demonstration purposes only
use Data::Dump;
say '=' x 10, ' Raw Extracted Data ', '=' x 20;
say 'Plus Strands';
say '-' x 50;
dd \%start_plus;
say '-' x 50;
say 'Minus Strands';
say '-' x 50;
dd \%start_minus;
say '-' x 50;
say 'Problem Strands';
say '-' x 50;
dd \%start_dud;
say '=' x 50;
# End this testing & demo part
my %filehandle_for;
for my $chr (sort keys %start_plus) {
for my $file_id (ALL => $chr) {
my $fh = _get_results_fh(\%filehandle_for, $file_id);
$fh->say(join SEP_CHAR, CHR => 'Plus Strand BedGraph:');
for my $key (sort {$a <=> $b} keys %{$start_plus{$chr}}) {
$fh->say(join SEP_CHAR, $chr, $key + 1, $start_plus{$chr}{$key});
}
}
}
# This part for testing and demonstration purposes only
say "\n", '=' x 10, ' Output Files ', '=' x 26;
for my $outfile (qw{
pm_11135183_bio_bed_modified_chr7_results.bed
pm_11135183_bio_bed_modified_chr8_results.bed
pm_11135183_bio_bed_modified_ALL_results.bed
}) {
say $outfile;
say '-' x 50;
system cat => $outfile;
say '-' x 50;
}
# End this testing & demo part
sub _get_results_fh {
my ($fh_for, $chr) = @_;
return $fh_for->{$chr} || _gen_results_fh($fh_for, $chr);
}
sub _gen_results_fh {
my ($fh_for, $chr) = @_;
my $fmt = 'pm_11135183_bio_bed_modified_%s_results.bed';
my $filename = sprintf $fmt, $chr;
open my $fh, '>', $filename;
$fh_for->{$chr} = $fh;
return $fh;
}
__DATA__
#chromosome start end position col-5 strand
chr7 127471196 127472363 Pos1 0 @
chr7 127471196 127472363 Pos1 0 +
chr7 127471196 127472363 Pos1 0 +
chr7 127471196 127472363 Pos1 0 +
chr7 127471196 127472363 Pos1 0 +
chr7 127471196 127472363 Pos1 0 +
chr7 127472363 127473530 Pos2 0 +
chr7 127473530 127474697 Pos3 0 -
chr7 127474697 127475864 Pos4 0 -
chr7 127474697 127475864 Pos4 0 _
chr8 40000 40500 Pos5 0 +
chr8 40000 40600 Pos5 0 +
chr8 40000 40650 Pos5 0 =
chr8 40000 40650 Pos5 0 +
chr8 41000 41200 Pos6 0 -
####
$ ./pm_11135183_bio_bed_modified.pl
========== Raw Extracted Data ====================
Plus Strands
--------------------------------------------------
{ chr7 => { 127471196 => 5, 127472363 => 1 }, chr8 => { 40000 => 3 } }
--------------------------------------------------
Minus Strands
--------------------------------------------------
{ chr7 => { 127474697 => 1, 127475864 => 1 }, chr8 => { 41200 => 1 } }
--------------------------------------------------
Problem Strands
--------------------------------------------------
{ chr7 => { 2 => "\@", 11 => "_" }, chr8 => { 14 => "=" } }
==================================================
========== Output Files ==========================
pm_11135183_bio_bed_modified_chr7_results.bed
--------------------------------------------------
CHR Plus Strand BedGraph:
chr7 127471197 5
chr7 127472364 1
--------------------------------------------------
pm_11135183_bio_bed_modified_chr8_results.bed
--------------------------------------------------
CHR Plus Strand BedGraph:
chr8 40001 3
--------------------------------------------------
pm_11135183_bio_bed_modified_ALL_results.bed
--------------------------------------------------
CHR Plus Strand BedGraph:
chr7 127471197 5
chr7 127472364 1
CHR Plus Strand BedGraph:
chr8 40001 3
--------------------------------------------------
####
{
my %filehandle_for;
sub _get_results_fh { my ($chr) = @_; ... }
sub _gen_results_fh { my ($chr) = @_; ... }
}