#!/usr/bin/perl use strict; use warnings; use feature qw( say ); #Take input and output names from command line my $in_qfn = $ARGV[0]; my $out_qfn = $ARGV[1]; my $transcripts_qfn = "/path/HumanTranscriptInfo"; my @transcripts; { open(my $transcripts_fh, "<", $transcripts_qfn) or die("Can't open \"$transcripts_qfn\": $!\n"); while (<$transcripts_fh>) { chomp; push @transcripts, [ split(/\t/, $_) ]; } } { open(my $in_fh, "<", $in_qfn) or die("Can't open \"$in_qfn\": $!\n"); open(my $out_fh, ">", $out_qfn) or die("Can't create \"$out_qfn\": $!\n"); while (<$in_fh>) { chomp; #define what each column is to print later my ($ID, $strand, $chr, $start, $sequence, $quality, $numPositions, $mismatches) = split(/\t/, $_); #define an end position based on sequence length my $end = $start+length($sequence); #define useful info from reference file for my $transcript (@transcripts) { my $ref_chr = $transcript->[0]; my $ref_strand = $transcript->[6]; my $ref_start = $transcript->[3]; my $ref_end = $transcript->[4]; my $info = $transcript->[8]; #now compare values, and if there is a match for strand, chromosome, and the start and stop within reference range range, print values if ($chr eq $ref_chr && $strand eq $ref_strand && $end >= $ref_start && $end <= $ref_end) { say $out_fh join("\t", $ID, $strand, $chr, $start, $end, $sequence, $numPositions, $mismatches, $info); } } } } }