#!/usr/bin/perl

use strict;
use warnings;
use feature qw( say );

#Take input and output names from command line
my $in_qfn          = $ARGV[0];
my $out_qfn         = $ARGV[1];

my $transcripts_qfn = "/path/HumanTranscriptInfo";

my @transcripts;
{
   open(my $transcripts_fh, "<", $transcripts_qfn)
      or die("Can't open \"$transcripts_qfn\": $!\n");
   while (<$transcripts_fh>) {
      chomp;
      push @transcripts, [ split(/\t/, $_) ];
   }    
}

{
   open(my $in_fh, "<", $in_qfn)
      or die("Can't open \"$in_qfn\": $!\n");
   open(my $out_fh, ">", $out_qfn)
      or die("Can't create \"$out_qfn\": $!\n");
while (<$in_fh>) {
      chomp;

      #define what each column is to print later
      my ($ID, $strand, $chr, $start, $sequence, $quality, $numPositions, $mismatches) = split(/\t/, $_);

      #define an end position based on sequence length
      my $end = $start+length($sequence);
      
      #define useful info from reference file
      for my $transcript (@transcripts) {
         my $ref_chr    = $transcript->[0];
         my $ref_strand = $transcript->[6];
         my $ref_start  = $transcript->[3];
         my $ref_end    = $transcript->[4];
         my $info = $transcript->[8];
         
         #now compare values, and if there is a match for strand, chromosome, and the start and stop within reference range range, print values
         if ($chr eq $ref_chr && $strand eq $ref_strand && $end >= $ref_start && $end <= $ref_end) {
            say $out_fh join("\t", $ID, $strand, $chr, $start, $end, $sequence, $numPositions, $mismatches, $info);
         }
         
         }
     }
}
}