#!/usr/bin/perl -w use warnings; use strict; use diagnostics; $/ = "//"; my $input = 'CUTR;AAABtest;plus;185-190;438-440;576-579'; #two sets of "instructions" I can interchange in $input for testing #'CUTR25;AAAB01008851;plus;764935-764946;765050-765289;765372-765659' #'CUTR;AAABtest;plus;185-190;438-440;576-579' my @input = split (/;/, $input); my $name = $input[0]; my $contig = $input[1]; my $ori = $input[2]; my @E1 = split (/-/, $input[3]); my @E2 = split (/-/, $input[4]); my @E3 = split (/-/, $input[5]); print "$name\n$contig\n$ori\n@E1\n@E2\n@E3\n\n"; #get and edit contig so it can be searched and used my $contigfile = "$contig.txt"; open(CONTIG, $contigfile) or die (">Could not open $contigfile!\n$!"); my $rawseq = ; my $goodseq = editcontig($rawseq); #clear giant variable to free some memory???? $rawseq = ''; #Construct exon 1 and save to $EXON1 my $offset1 = $E1[0]-1; my $length1 = $E1[1]-$offset1; my $EXON1 = ''; $EXON1 = substr($goodseq, $offset1, $length1); #Construct exon 2 my $offset2 = $E2[0]-1; my $length2 = $E2[1]-$offset2; my $EXON2 = ''; $EXON2 = substr($goodseq, $offset2, $length2); #Construct exon3 my $offset3 = $E3[0]-1; my $length3 = $E3[1]-$offset3; my $EXON3 = ''; $EXON3 = substr($goodseq, $offset3, $length3); print "$EXON1\n\n$EXON2\n\n$EXON3"; exit; ########### SUBS ############ sub editcontig { my ($rawseq) = @_; $rawseq =~ s/\s//g; $rawseq =~ s/0//g; $rawseq =~ s/1//g; $rawseq =~ s/2//g; $rawseq =~ s/3//g; $rawseq =~ s/4//g; $rawseq =~ s/5//g; $rawseq =~ s/6//g; $rawseq =~ s/7//g; $rawseq =~ s/8//g; $rawseq =~ s/9//g; return $rawseq; } #### 01 aagaagaagf agaagaagag agagaacttg gaaacagaat tgtagaacag atttatagac 61 agagcaaagt acaattccgt ttcagacaag taacaagagt gaaatagata taagactcag 121 agaaagtaaa aagcgtcaag taaggtacag tgaggagaaa gtaatgagtg tgtatgttca 181 atcgBROOKS tacccttgcg tatctcctcc ttcctcagcc accaacagtt catactcctg 241 cgcagcgtat cgatcccagt cagtaagcta aaaggtcaaa cacatgttat tagatggcat 301 ttcgatagaa tgtcatcgtg ctattcttac atcgtcgttt tcgcgctgtg caaatggatc 361 acgctgctcg tgctccatat acttctccag attgaagaac gtgtcgaaga aaatgggcgt 421 cattttgcat cgctttaWAS ggaccaacgt tacacagcca ggagttgctg gttttatcat 481 atcaagcatc tggaagcaaa cgagtactcc ttagcatatc gaaccaactc catctaccac 541 tccccaaaca aacacatact tgacacaggc agtctHEREa gggtagggtt tcgataccga 601 gcgattccat gcgatgctgc tgctcctcgt agaagtactc cagctcgtac atcgagagga