#!/usr/bin/perl -CSDA use utf8; use Modern::Perl; no warnings qw{uninitialized}; use Data::Dumper; use Path::Tiny; my $data = path('file,fasta')->slurp_utf8() =~ s/\s//mgr; warn $data; print $data =~ /$_/ ? "The protein contains the domain -- $_\n" : "The protein doesn't contain the domain -- $_\n" for 'KCKQCGKGFSRRSALNV', 'CGK', 'XXXX'; for my $lookfor (qw{CGK SQRLNR SQR PYKC PYKCK}) { pos $data = 0; while ($data =~ /$lookfor/gc) { print "there is $lookfor at ", (pos $data), "\n"; } } result: AAF88103.1zincfingerprotein226[Homosapiens]MNMFKEAVTFKDVAVAFTEEELGLLGP +AXRKLYRDVMVENFRNLLSVGHPPFKQDVSPIERNEQLWIMTTATRRQGNLGEKNQSKLITVQDRESEE +ELSCWQIWQQIANDLTRCQDSMINNSQCHKQGDFPYQVGTELSIQISEDENYIVNKADGPNNTGNPEFP +ILRTQDSWRKTFLTESQRLNRDQQISIKNKLCQCKKGVDPIGWISHHDGHRVHKSEKSYRPNDYEKDNM +KILTFDHNSMIHTGQKSYQCNECKKPFSDLSSFDLHQQLQSGEKSLTCVERGKGFCYSPVLPVHQKVHV +GEKLKCDECGKEFSQGAHLQTHQKVHVIEKPYKCKQCGKGFSRRSALNVHCKVHTAEKPYNCEECGRAF +SQASHLQDHQRLHTGEKPFKCDACGKSFSRNSHLQSHQRVHTGEKPYKCEECGKGFICSSNLYIHQRVH +TGEKPYKCEECGKGFSRPSSLQAHQGVHTGEKSYICTVCGKGFTLSSNLQAHQRVHTGEKPYKCNECGK +SFRRNSHYQVHLVVHTGEKPYKCEICGKGFSQSSYLQIHQKAHSIEKPFKCEECGQGFNQSSRLQIHQL +IHTGEKPYKCEECGKGFSRRADLKIHCRIHTGEKPYNCEECGKVFRQASNLLAHQRVHSGEKPFKCEEC +GKSFGRSAHLQAHQKVHTGDKPYKCDECGKGFKWSLNLDMHQRVHTGEKPYKCGECGKYFSQASSLQLH +QSVHTGEKPYKCDVCGKVFSRSSQLQSHQRVHTGEKPYKCEICGKSFSWRSNLTVHHRIHVGDKSYKSN +RGGKNIRESTQEKKSIK at ./a.pl line 10. The protein contains the domain -- KCKQCGKGFSRRSALNV The protein contains the domain -- CGK The protein doesn't contain the domain -- XXXX there is CGK at 357 there is CGK at 385 there is CGK at 441 there is CGK at 469 there is CGK at 497 there is CGK at 525 there is CGK at 553 there is CGK at 581 there is CGK at 637 there is CGK at 665 there is CGK at 693 there is CGK at 721 there is CGK at 749 there is CGK at 777 there is CGK at 805 there is SQRLNR at 229 there is SQR at 226 there is PYKC at 380 there is PYKC at 464 there is PYKC at 492 there is PYKC at 548 there is PYKC at 576 there is PYKC at 632 there is PYKC at 716 there is PYKC at 744 there is PYKC at 772 there is PYKC at 800 there is PYKCK at 381

In reply to Re^2: Finding pattern in a file by leszekdubiel
in thread Finding pattern in a file by shabird

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post, it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.