use strict; use warnings; use PDL; use Inline with => 'PDL'; use PDL::GSL::RNG; my $genome = ''; my @id_lines; my @runs; ########################## # Read ########################## while ( ) { chomp; if ( /^>/ ) { push @id_lines, $_ } else { push @runs, length; $genome .= $_ } } ########################## # Shuffle ########################## my $rng = PDL::GSL::RNG-> new( 'taus' ); $rng-> set_seed( time ); my $start = 0; my $stop = length( $genome ) - 1; my $window = 3; while ( $start < $stop ) { my $len = $start + $window > $stop ? $stop - $start : $window; my $p = mkpiddle( $genome, $start, $len ); $rng-> ran_shuffle( $p ); $start += $window } ########################## # Output ########################## $start = 0; for ( 0 .. $#runs ) { print $id_lines[ $_ ], "\n", substr( $genome, $start, $runs[ $_ ]), "\n"; $start += $runs[ $_ ] } ########################## # Guts ########################## use Inline C => <<'END_OF_C'; static void default_magic( pdl *p, int pa ) { p-> data = 0; } pdl* mkpiddle( char* data, int ofs, int len ) { PDL_Indx dims[] = { len }; pdl* npdl = PDL-> pdlnew(); PDL-> setdims( npdl, dims, 1 ); npdl-> datatype = PDL_B; npdl-> data = data + ofs; npdl-> state |= PDL_DONTTOUCHDATA | PDL_ALLOCATED; PDL-> add_deletedata_magic( npdl, default_magic, 0 ); return npdl; } END_OF_C __DATA__ >Chr1 CCCTAAACCCTAAACCCTAAACCCTAAACCTCTGAATCCTTAATCCCTAAATCCCTAAAT >Chr2 TATGACGTTTAGGGACGATCTTAATGACGTTTAGGGTTTTATCGATCAGCGACGTAGGGA >Chr3 GTTTAGGGTTTAGGGTTTAGGGTTTAGGGTTTAGGGTTTAGGGTTTAGGGTTTAGGGTTT >Chr4 AACAAGGTACTCTCATCTCTTTACTGGGTAAATAACATATCAACTTGGACCTCATTCATA >Chr5 AACATGATTCACACCTTGATGATGTTTTTAGAGAGTTCTCGTGTGAGGCGATTCTTGAGG