Anonymous Monk has asked for the wisdom of the Perl Monks concerning the following question:
CDS join(2432..2501,5144..5154,5746..5760,6411..6446, 7558..7650,8929..8982,11919..11963,12056..12109, 12202..12255,12562..12615,13036..13089,13613..136 +66, 15217..15261,15553..15606,15706..15750,16140..161 +93, 16692..16790,16934..16978,17093..17191,17612..176 +65, 17791..17898,18259..18312,18426..18524,19436..194 +89, 19953..20051,20452..20505,21059..21112,21263..213 +16, 21590..21643,22596..22640,23773..23871,25090..251 +97, 25867..25920,26854..26907,27588..27641,27746..277 +99, 27896..28003,28365..28418,29217..29270,30273..304 +34, 31647..31754,32427..32534,32920..32973,33060..331 +67, 33308..33361,33733..33840,34316..34369,34496..346 +03, 34936..35194,35602..35786,36498..36740,37554..377 +00) /gene="COL1A2" /codon_start=1 /product="pro-alpha 2(I) collagen" /protein_id="AAB93981.1" /db_xref="GI:2735715" /translation="MLSFVDTRTLLLLAVTLCLATCQSLQEETVRKGPA +GDRGPRGER GPPGPPGRDGEDGPTGPPGPPGPPGPPGLGGNFAAQYDGKGVGLGPGPM +GLMGPRGPP GAAGAPGPQGFQGPAGEPGEPGQTGPAGARGPAGPPGKAGEDGHPGKPG +RPGERGVVG PQGARGFPGTPGLPGFKGIRGHNGLDGLKGQPGAPGVKGEPGAPGENGT +PGQTGARGL PGERGRVGAPGPAGARGSDGSVGPVGPAGPIGSAGPPGFPGAPGPKGEI +GAIGNAGPA GPAGPRGEVGLPGLSGPVGPPGNPGANGLTGAKGAAGLPGVAGAPGLPG +PRGIPGPVG AAGATGARGLVGEPGPAGSKGESGNKGEPGSAGPQGPPGPSGEEGKRGP +NGEAGSAGP PGPPGLRGSPGSRGLPGADGRAGVMGPPGSRGASGPAGVRGPNGDAGRP +GEPGLMGPR GLPGSPGNIGPAGKEGPVGLPGIDGRPGPIGPVGARGEPGNIGFPGPKG +PTGDPGKNG DKGHAGLAGARGAPGPDGNNGAQGPPGPQGVQGGKGEQGPAGPPGFQGL +PGPSGPAGE VGKPGERGLHGEFGLPGPAGPRGERGPPGESGAAGPTGPIGSRGPSGPP +GPDGNKGEP GVVGAVGTAGPSGPSGLPGERGAAGIPGGKGEKGEPGLRGEIGNPGRDG +ARGAHGAVG APGPAGATGDRGEAGAAGPAGPAGPRGSPGERGEVGPAGPNGFAGPAGA +AGQPGAKGE RGGKGPKGENGVVGPTGPVGAAGPAGPNGPPGPAGSRGDGGPPGMTGFP +GAAGRTGPP GPSGISGPPGPPGPAGKEGLRGPRGDQGPVGRTGEVGAVGPPGFAGEKG +PSGEAGTAG PPGTPGPQGLLGAPGILGLPGSRGERGLPGVAGAVGEPGPLGIAGPPGA +RGPPGAVGS PGVNGAPGEAGRDGNPGNDGPPGRDGQPGHKGERGYPGNIGPVGAAGAP +GPHGPVGPA GKHGNRGETGPSGPVGPAGAVGPRGPSGPQGIRGDKGEPGEKGPRGLPG +FKGHNGLQG LPGIAGHHGDQGAPGSVGPAGPRGPAGPSGPAGKDGRTGHPGTVGPAGI +RGPQGHQGP AGPPGPPGPPGPPGVSGGGYDFGYDGDFYRADQPRSAPSLRPKDYEVDA +TLKSLNNQI ETLLTPEGSRKNPARTCRDLRLSHPEWSSGYYWIDPNQGCTMEAIKVYC +DFPTGETCI RAQPENIPAKNWYRSSKDKKHVWLGETINAGSQFEYNVEGVTSKEMATQ +LAFMRLLAN YASQNITYHCKNSIAYMDEETGNLKKAVILQGSNDVELVAEGNSRFTYT +VLVDGCSKK TNEWGKTIIEYKTNKPSRLPFLDIAPLDIGGADHEFFVDIGPVCFK" exon 2432..2501 /gene="COL1A2" /number=1 protein_bind 2487..2500 /gene="COL1A2" /note="putative" /citation=[7] /bound_moiety="NF1" intron 2502..5143 /gene="COL1A2" /citation=[10] /citation=[7] /number=1 protein_bind 3380..3386 /note="putative; bottom strand" /bound_moiety="AP1" protein_bind 3407..3413 /gene="COL1A2" /note="putative" /citation=[7] /bound_moiety="AP1" repeat_region 3716..3747 /citation=[7] /rpt_type=tandem /rpt_unit="gt" exon 5144..5154 /gene="COL1A2" /number=2 intron 5155..5745 /gene="COL1A2" /citation=[10] /number=2 exon 5746..5760 /gene="COL1A2" /number=3
if($line7=~/^\s+\/translation\=\"(.*)exon/gs) { $amino_acid_seq=$1; } print $amino_acid_seq."\n";
|
|---|