in reply to deleting a particular character and its coresponding score in 2 different files!!!

The posting appears to have been updated considerably... and the specification has shifted. Plus ça change :-(

OK. I give up. I really hope this is something useful and not just homework. The program below gives:

  >s1:  A  G  C  T  T  T  T  C  G  G  G  C  A  A  T
       23 43 45 65 76 54  3 34 54 65  7 45 56 87 56
    ->  A  G  C  T  T  T  C  G  G  C  A  A  T -- dropped 6 10
       23 43 45 65 76 54 34 54 65 45 56 87 56
  >s2:  G  C  T  G  C  C  C  C  C  C  A  T  C  T  T
       23 43 23 45 65 45 76 78 34  8 12 32 65 23 25
    ->  G  C  T  G  C  C  C  C  C  A  T  C  T  T -- dropped 9
       23 43 23 45 65 45 76 78 34 12 32 65 23 25
  >s3:  T  C  G  T  A  G  C  T  G  A  A  A  A  T  C
       12 23 34 45 56 54 43 32 65 43 12 34 75 76 45
    ->  T  C  G  T  A  G  C  T  G  A  A  A  A  T  C -- all kept
       12 23 34 45 56 54 43 32 65 43 12 34 75 76 45

#!/usr/bin/perl use strict; use warnings; my $seq_txt = <<FILE1; >s1 AGCTTTTCGGGCAAT >s2 GCTGCCCCCCATCTT >s3 TCGTAGCTGAAAATC FILE1 my $num_txt = <<FILE2; >s1 23 43 45 65 76 54 3 34 54 65 7 45 56 87 56 >s2 23 43 23 45 65 45 76 78 34 8 12 32 65 23 25 >s3 12 23 34 45 56 54 43 32 65 43 12 34 75 76 45 FILE2 open(my $FH1, '<', \$seq_txt) or die "$!" ; open(my $FH2, '<', \$num_txt) or die "$!" ; while (!eof($FH1)) { # Fetch sequence numbers and make sure we are in sync # # For tidyness, trim all leading and trailing whitespace -- which al +so takes care of # the newline. # # Skip blank lines in both files -- mostly so we're not foxed by tra +iling blankness. my $sid_1 = <$FH1> ; $sid_1 =~ s/^\s*// ; $sid_1 =~ s/\s*$// ; next if ($sid_1 eq '') ; # Comfortable with blank lines in file 1 my $sid_2 = '' ; while ($sid_2 eq '') { # Allow blank lines in file 2 also defined($sid_2 = <$FH2>) or die "file 2 -- hit eof expecting '$si +d_1'" ; $sid_2 =~ s/^\s*// ; $sid_2 =~ s/\s*$// ; } ; if ($sid_1 ne $sid_2) { die "File 1 sequence '$sid_1' ne file 2 '$sid_2'" ; } ; # Fetch the sequence -- discard all whitespace and check validity (m +ay not be empty) defined(my $seq = <$FH1>) or die "file 1 -- unexpected eof" ; $seq =~ s/\s+//g ; $seq =~ m/^[ACGT]+$/ or die "file 1 -- invalid sequence" ; my @seq = split(//, $seq) ; # Fetch the scores -- check valid form and length defined(my $num = <$FH2>) or die "file 2 -- unexpected eof" ; $num =~ s/^\s*// ; $num =~ s/\s*$// ; $num =~ m/^(?:\d+(?:\s+|\z))*$/ or die "file 2 -- invalid scores" ; my @num = split(/\s+/, $num) ; if (@num != @seq) { die "sequence $sid_1: sequence and scores don't +match" ; } ; # We pass everything except letters (and the related score) where th +e letter is: # # a. last of repeated sequence of 3 or more. # # b. its score is < 10 my @exam = map { ($num[$_] > 10) # must be OK if scor +e > 10 || ($_ < 2) # too early for repe +at of 3 or more ! || ($seq[$_] ne $seq[$_-1]) # not a repeat || ($seq[$_] ne $seq[$_-2]) # not a long enough +repeat || ( ($_ != $#seq) && ($seq[$_] eq $seq[$_+1]) ) # not the end of a r +epeat } (0..$#seq) ; my @keep = grep { $exam[$_] } (0..$#seq) ; my @drop = grep { !$exam[$_] } (0..$#seq) ; # Crunch printf "%6s%s\n", "$sid_1:", show_seq(@seq) ; printf "%6s%s\n", "" , show_num(@num) ; printf "%6s%s", "->", show_seq(@seq[@keep]) ; if (@drop) { print " -- dropped @drop\n" ; } else { print " -- all kept\n" ; } ; printf "%6s%s\n", "", show_num(@num[@keep]) ; $seq = join(' ', @seq[@keep]) ; $num = join(' ', @num[@keep]) ; # Now do whatever's required with the results .... } ; # Make sure that the two files have finished together. while (<$FH2>) { m/^\s*$/ or die "File 2 contains more than file 1" ; } ; #_____________________________________________________________________ +_ sub show_seq { return ' '.join(' ', @_) ; } ; sub show_num { return join('', map sprintf('%3d', $_), @_) ; } ;
  • Comment on Re: deleting a particular character and its coresponding score in 2 different files!!!
  • Download Code

Replies are listed 'Best First'.
Re^2: deleting a particular character and its coresponding score in 2 different files!!!
by Anonymous Monk on Oct 15, 2008 at 06:21 UTC
    This one gives me in 2 different files..
    #!/usr/bin/perl open FILE,"seq.txt"; open OUT1,">seq.out"; open OUT2,">num.out"; $seq = "";$num = ""; while(<FILE>) { if($_ =~ /^>/) { push(@seq,$seq) if $seq; $seq=''; push(@seq,"\n$_"); } else { $_ =~ s/\s*$//; $seq = $seq.$_; } } push(@seq,$seq); close(FILE); open FILE,"num.txt"; while(<FILE>) { if($_ =~ /^>/) { push(@num,$num) if $num; $num=''; push(@num,"\n$_"); } else { $_ =~ s/\s*$/ /; $num = $num."$_"; } } push(@num,$num); close(FILE); while($_ = shift @seq) { my @marker; if( $_ =~ />/ ) { $shead = $_; $nhead = shift @num; $seq = shift @seq; $num = shift @num; #print $seq,"\n"; #print $num,"\n"; @s = split (//,$seq); @n = split (/ /,$num); $pre = "null"; $cnt = 1; $arrcnt = -1; foreach(@s) { if($_ eq $pre) { $cnt++; } else { if ($cnt >= 3 && $n[$arrcnt] < 10) { push(@marker,$arrcnt); } $cnt = 1; } $pre = $_; $arrcnt++; } $exp = 0; foreach(@marker) { $_ = $_ - $exp; splice(@n,$_,1); splice(@s,$_,1); $exp++; } $seq = join(" ",@s); $num = join(" ",@n); print OUT1 $shead; print OUT1 $seq,"\n"; print OUT2 $shead; print OUT2 $num,"\n"; } else { print "give proper input file"; exit; } }
    THANK U ALL FOR THE HELP...Am learning a lot :)
      well...thats "myself" on the above program... THANK U ALL FOR THE HELP...Am learning a lot :)