for (@spliceout) { s/\s+$//; } #### #!/usr/bin/perl -w use strict; my $inputfile1 = $ARGV[0]; open (FILE1, $inputfile1) or die "Uh oh.. unable to find file $inputfile1"; ##Opens input file my @file1 = ; #loads inputfile1 data into array close FILE1; my @matches; foreach my $file1 (@file1) { if($file1 =~ m/splic/) { push (@matches, $file1); ##loads matches into array @matches } } my @col1; ## column 1 my @col_ID; ## column 2 my @col3; ## column 3 my @col_strand_direction; ## column 6 foreach my $match(@matches) { ## process each line, splitting columns and move onto next line my @colsplit = split("\t", $match); push (@col3, $colsplit[2] . "\n"); ##pushes third column to @col3 array push (@col1, $colsplit[0] . "\n"); push (@col_ID, $colsplit[1] . "\n"); push (@col_strand_direction, $colsplit[5] . "\n"); } my @intron_from_boundary; my @baseref; foreach my $col3line(@col3) { if ($col3line =~ m/([\+|\-]\d+)\w+(\[[ACTG]])/) { ##pulls out + or - and subsequent number and [base change] push (@intron_from_boundary, $1 . "\n"); ##$1 pushes what is in the first set of brackets push (@baseref, $2 . "\n"); } } ## need to take each intronmatch value and work out its position relative to intron/exon boundary my $left_of_boundary; my $intron_from_boundary; my $new_left; my @spliceout; ## split seq of @col1 into array my $i = 0; foreach my $col1(@col1) { my @col1split = split(//, $col1); ##for -7: $left_of_boundary = 10; ##10 to the left if ($col_strand_direction[$i] =~ m/\+/) { $left_of_boundary = $left_of_boundary + $intron_from_boundary[$i]; ##3 to the left $new_left = 23 - $left_of_boundary; ## 20 } else { $left_of_boundary = $left_of_boundary - $intron_from_boundary[$i]; ##3 to the left $new_left = 23 - $left_of_boundary; ## 20 } my @spliceout = splice @col1split, $new_left, 22; ##want to pull out 3 letters to left of [G] and 16 to the right } print "@spliceout\n"; open (MYFILE, '>>fasta'); print MYFILE (">" . "$col_ID[$i]" , "@spliceout" , "\n"); close (MYFILE); ++$i; }