#!/usr/bin/perl

# Task: Extract GeneID-Number and gene information

use strict;
use warnings;

use Data::Dump;

my $in;
my $hr_data;

# 1) open the .gff Inputfile and while reading line by line split $data at each tab and put them in the @array

open ($in, '<', "Genomteil.gff") or die $!;

while (my $line1 = readline ($in)) {
    chomp ($line1);    # Removes trailing \n
    my @a_line1 = split ("\t", $line1);

    if ($a_line1[2] eq 'gene') {
        if ($a_line1[8] =~ /.*;db_xref=GeneID:(\d+)/) {
            $GeneID = $1;
            # We found a GeneID. Create a record (array-reference) to store with the data from this line
            my $ar_record = [$a_line1[3], $a_line1[4], $a_line1[6]];    #the array will be used as values for my hash later
                                                                        # Also, read the next line from file, which we expect to contain CDS or exon
            my $line2     = readline ($in);
            chomp ($line2);
            my @a_line2 = split ("\t", $line2);
            if ($a_line2[2] =~ /CDS|exon/) {                            # Alternatively: ($a_line2[2] eq 'CDS' or $a_line2[2] eq 'exon')
                push (@{$ar_record}, $a_line2[2]);
                $hr_data->{$GeneID} = $ar_record;
            } else {
                print ("Error: next line does not contain CDS or exon [$.]\n");
                next;
            }
        } else {
            print ("Error: 'gene' textblock found, but no GeneID present at line [$.]\n");
            next;
        }

    } ## end if ($a_line1[2] eq 'gene')
} ## end while (my $line1 = readline...)

close $in;

Data::Dump::dd($hr_data);