mlsmit10 has asked for the wisdom of the Perl Monks concerning the following question:
Hi, guys. I've posted previously about this particular script, but I've recently run into a new problem with it. When running the script, I receive the following error: Use of uninitialized value $_ in pattern match (m//) at /Users/smithcabinets26/Desktop/RAD/Digester/Improving/Triple.pl line 66.
This is line 66:
my @third_fragments = grep !/$rsite3/, $second_fragments[$i];
The script takes a genome, cuts it at two restriction sites, excludes fragments containing a third site, and tells me how many fragments are present in particular size ranges. Line 6 works to remove the fragments with the third site present. The script seems to be functioning correctly, but I am not sure why the above error would occur and am worried that it may be affecting my output in some way. Thanks in advance for your help. The entire script is below.
#! /usr/bin/perl -w # a script to get fragments of a genome based on restriction enzyme # retrieves the fragments greater than XX bp and less than XX bp # whole genome in one gzip file (fasta format)! # calculates the number of fragments you will get per genome for RADse +q # perl genomeFragmentor_doubleDigest.pl genome.gz restriction_site1 re +striction_site2 organism # perl genomeFragmentor_doubleDigest.pl Anolisgenome.gz CCTGCAGG GGATC +C Anolis use strict; my %genome = fasta_read_gzip_alt($ARGV[0]); #my %genome = fasta_read_alt($ARGV[0]); my $rsite1 = $ARGV[1]; my $rsite2 = $ARGV[2]; my $rsite3 = $ARGV[3]; my $totalCount = 0; # count of all fragments in the genome my $totalBP = 0; # the total number of base pairs in the se +lected fragments my $organism = $ARGV[4]; my $radtagfile = $organism."_Radseq_fragments_doubleDigest_".$rsite1." +_".$rsite2."_".$rsite3.".fasta"; my $sizesfile = $organism."_Radseq_fragments_doubleDigest_".$rsite1."_ +".$rsite2."_".$rsite3."_sizes.txt"; open (OUT, ">$radtagfile"); open (SIZE, ">$sizesfile"); print SIZE "length organism\n"; # prepare a summary file my ($Second, $Minute, $Hour, $Day, $Month, $Year, $WeekDay, $DayOfYear +, $IsDST) = localtime(time); $Month++; $Year += 1900; my $date = $Month."_".$Day."_".$Year; my $summaryfile = $organism."_summary_doubleDigest_".$rsite1."_".$rsit +e2."_".$rsite3."_".$date.".txt"; open (RESULTS, ">$summaryfile"); my %final_fragments; my @all_size_fragments; # start creating the fragments foreach my $seqname (keys %genome) { print "Working on sequence $seqname.\n"; my @first_fragments = split("$rsite1", $genome{$seqname}); + # split the fragments up based on the enzyme motif # add the restriction site motif back onto the fragments $first_fragments[0] .= $rsite1; $first_fragments[scalar @first_fragments - 1] = $rsite1.$first_fra +gments[scalar @first_fragments - 1]; for (my $i = 0; $i < scalar @first_fragments; ++$i) { if ($i != 0 or $i != (scalar @first_fragments - 1)) { $first_fragments[$i] = $rsite1.$first_fragments[$i].$rsite +1; } # now split the fragment using $rsite2 # repair the first and last fragments to include $rsite2 # these are the only fragments to contain both restriction sit +es, so keep them in @final_fragments my @second_fragments = split($rsite2, $first_fragments[$i]); $second_fragments[0] .= $rsite2; $second_fragments[scalar @second_fragments - 1] = $rsite2.$sec +ond_fragments[scalar @second_fragments - 1]; foreach my $fragment (@second_fragments) { push(@all_size_fragments, length($fragment)); } my @third_fragments = grep !/$rsite3/, $second_fragments[$i]; $third_fragments[0] .= $rsite3; $third_fragments[scalar @third_fragments - 1] = $rsite3.$third +_fragments[scalar @third_fragments - 1]; foreach my $fragment (@third_fragments) { push(@all_size_fragments, length($fragment)); } my $final_fragment1 = $seqname."_".$i."_1"; my $final_fragment2 = $seqname."_".$i."_2"; $final_fragments{$final_fragment1} = $third_fragments[0]; $final_fragments{$final_fragment2} = $third_fragments[scalar @ +third_fragments - 1]; } } # keep a score of how many fragments fall within a particular size ran +ge my $size_100_150 = 0; my $size_151_200 = 0; my $size_201_250 = 0; my $size_251_300 = 0; my $size_301_350 = 0; my $size_351_400 = 0; my $size_401_450 = 0; my $size_451_500 = 0; my $size_501_550 = 0; my $size_551_600 = 0; my $size_small = 0; my $size_large = 0; foreach my $fragment (keys %final_fragments) { # add on $rsite1 to both sides of the fragment my $fragmentLength = length($final_fragments{$fragment}); print OUT ">$fragment", "_", "1\n"; print OUT substr($final_fragments{$fragment}, 0, 96), "\n"; print OUT ">$fragment", "_", "2rc\n"; print OUT revcom(substr($final_fragments{$fragment}, $fragmentLeng +th - 96, 96)), "\n"; $totalBP += $fragmentLength; if ($fragmentLength >= 100 and $fragmentLength <= 150) { ++$size_100_150; } elsif ($fragmentLength > 150 and $fragmentLength <= 200) { ++$size_151_200; } elsif ($fragmentLength > 200 and $fragmentLength <= 250) { ++$size_201_250; } elsif ($fragmentLength > 250 and $fragmentLength <= 300) { ++$size_251_300; } elsif ($fragmentLength > 300 and $fragmentLength <= 350) { ++$size_301_350; } elsif ($fragmentLength > 350 and $fragmentLength <= 400) { ++$size_351_400; } elsif ($fragmentLength > 400 and $fragmentLength <= 450) { ++$size_401_450; } elsif ($fragmentLength > 450 and $fragmentLength <= 500) { ++$size_451_500; } elsif ($fragmentLength > 500 and $fragmentLength <= 550) { ++$size_501_550; } elsif ($fragmentLength > 550 and $fragmentLength <= 600) { ++$size_551_600; } elsif ($fragmentLength < 100) { ++$size_small; } elsif ($fragmentLength > 600) { ++$size_large; } } $totalCount = scalar keys %final_fragments; # count of all +fragments in the genome print RESULTS "The restriction sites used were:\n"; print RESULTS $ARGV[1], "\n"; print RESULTS $ARGV[2], "\n\n"; print RESULTS "There were ", $totalCount, " fragments from the whole g +enome.\n"; print RESULTS "There were ", $size_100_150, " fragments between 100 an +d 150 bp.\n"; print RESULTS "There were ", $size_151_200, " fragments between 151 an +d 200 bp.\n"; print RESULTS "There were ", $size_201_250, " fragments between 201 an +d 250 bp.\n"; print RESULTS "There were ", $size_251_300, " fragments between 251 an +d 300 bp.\n"; print RESULTS "There were ", $size_301_350, " fragments between 301 an +d 350 bp.\n"; print RESULTS "There were ", $size_351_400, " fragments between 351 an +d 400 bp.\n"; print RESULTS "There were ", $size_401_450, " fragments between 401 an +d 450 bp.\n"; print RESULTS "There were ", $size_451_500, " fragments between 451 an +d 500 bp.\n"; print RESULTS "There were ", $size_501_550, " fragments between 501 an +d 550 bp.\n"; print RESULTS "There were ", $size_551_600, " fragments between 551 an +d 600 bp.\n"; print RESULTS "There were ", $size_small, " fragments smaller than 100 + bp.\n"; print RESULTS "There were ", $size_large, " fragments larger than 600 +bp.\n\n"; print RESULTS "There are ", $totalBP, " base pairs in the fragments.\n +"; print RESULTS "\nJust some notes of reference:\n"; print RESULTS "HiSeq 2000 gives 375,000,000 reads.\n"; print RESULTS "HiSeq 2500 gives 742,000,000 reads.\n"; close OUT; close RESULTS; foreach my $length (@all_size_fragments) { print SIZE $length, "\t", $organism, "\n"; } exit; sub fasta_read_gzip_alt { # reads in a gzip fasta file and pases it into a hash # version 1.0 (my $filename) = @_; # be sure to include the path my %fasta; open(FASTA, "gunzip -c $filename |") || die "can't open pipe to $f +ilename"; my $fastaData; my $sequence = ''; my $name = ''; while(<FASTA>) { $fastaData = $_; $fastaData =~ s/\n//gms; if ($fastaData =~ />/) { if ($sequence) { # if there is a sequence, +then the sequence belongs to the last name $fasta{$name} = $sequence; } # reinitialize everything $sequence = ''; # start over! $name = $fastaData; $name =~ s/>//gms; } elsif (eof FASTA) { $fasta{$name} = $sequence; } else { $sequence .= $fastaData; } } close FASTA; return %fasta; } sub revcom { (my $sequence) = @_; $sequence = reverse($sequence); $sequence =~ tr/AGCTRYMKSWHBVDNagctrymkswhbvdn/TCGAYRKMSWDVBHNtcga +yrkmswdvbhn/; return $sequence; } sub fasta_read_alt { # reads in a fasta file and pases it into a hash # version 1.0 (my $filename) = @_; # be sure to include the path my %fasta; open(FASTA, $filename); my $fastaData; my $sequence = ''; my $name = ''; while(<FASTA>) { $fastaData = $_; $fastaData =~ s/\n//gms; if ($fastaData =~ />/) { if ($sequence) { # if there is a sequence, +then the sequence belongs to the last name $fasta{$name} = $sequence; } # reinitialize everything $sequence = ''; # start over! $name = $fastaData; $name =~ s/>//gms; } elsif (eof FASTA) { $fasta{$name} = $sequence; } else { $sequence .= $fastaData; } } close FASTA; return %fasta; }
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: Bioinformatics- Use of uninitialized value
by kennethk (Abbot) on Jul 21, 2014 at 21:41 UTC | |
by mlsmit10 (Initiate) on Jul 21, 2014 at 21:51 UTC | |
|
Re: Bioinformatics- Use of uninitialized value
by johngg (Canon) on Jul 21, 2014 at 22:45 UTC | |
|
Re: Bioinformatics- Use of uninitialized value
by GrandFather (Saint) on Jul 22, 2014 at 00:03 UTC | |
by xyzzy (Pilgrim) on Jul 22, 2014 at 03:26 UTC | |
by GrandFather (Saint) on Jul 22, 2014 at 05:11 UTC | |
|
Re: Bioinformatics- Use of uninitialized value
by AnomalousMonk (Archbishop) on Jul 21, 2014 at 22:06 UTC |