in reply to Re^4: String Compression Optimization (repeated concatenated subsequences)
in thread String Compression Optimization (repeated concatenated subsequences)
For comparison purposes:
u: 1 wallclock secs ( 0.53 usr + 0.00 sys = 0.53 CPU) @ 112.78/s (n +=60)
The length and complexity of the real data elements are the cause of all your back tracking woe's. So, do away with them:)
What I did was to extract an array of the significant bits of the data ("L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0" etc.), then uniq those using a hash and build a substitution or placeholder table using 'aa;', 'ab;', 'ac;' etc. I then concatenate these into a single string and use my findRuns() (with appropriately modified regexes).
Once the placeholder string has been compressed, I use a reverse lookup table to put back the original data elements into the compressed string.
#! perl -slw use strict; use Data::Dumper; sub findRuns { use constant { BASESEQ => 0, SAVED => 1, OFFSET => 2, LENGTH => 3, REPEATS => 4 }; my $s = shift; my @rls; push @rls, [ $1, length $2, pos($s) - length( $1 ), length( $1 ) + length( $2 ), length( $2 ) / length( $1 ) + 1, ] while $s =~ m[ .*? ( (?: [a-z]{2} ; )+ ) (?= ( \1{1,254} ) [^>]* (?: < | $ ) ) ]gx; for my $p ( 0 .. $#rls ) { next unless defined $rls[ $p ]; for my $q ( $p+1 .. $#rls ) { next unless defined $rls[ $p ] and defined $rls[ $q ]; last if $rls[ $q ][ OFFSET ] > $rls[ $p ][ OFFSET ] + $rls +[ $p ][ LENGTH ]; while( $rls[ $q ][ OFFSET ] >= $rls[ $p ][ OFFSET ] && $rls[ $q ][ OFFSET ] < $rls[ $p ][ OFFSET ] + $rls[ $ +p ][ LENGTH ] ) { if( $rls[ $p ][ SAVED ] < $rls[ $q ][ SAVED ] ) { last unless $rls[ $p ][ REPEATS ]-- > 2; $rls[ $p ][ REPEATS ] -= length $rls[ $p ][ BASESE +Q ]; } else { last unless $rls[ $q ][ REPEATS ]-- > 2; $rls[ $q ][ REPEATS ] -= length $rls[ $q ][ BASESE +Q ]; $rls[ $q ][ OFFSET ] += length $rls[ $q ][ BASESE +Q ]; } } $rls[ $q ][ OFFSET ] >= $rls[ $p ][ OFFSET ] && $rls[ $q ][ OFFSET ] < $rls[ $p ][ OFFSET ] + $rls[ $p ][ + LENGTH ] and delete $rls[ $rls[ $p ][ SAVED ] < $rls[ $q ][ SAV +ED ] ? $p : $q ]; } } @rls = grep defined, @rls; for ( @rls ) { my $re = qr[(?:$_->[ BASESEQ ]){$_->[ &REPEATS ]}]; $s =~ s[( (?: ^ | > ) [^<]*? ) $re]{ ($1||'') . '<' . $_->[ RE +PEATS ] . ":$_->[ BASESEQ ]>" }ex; } $s = findRuns( $s ) if $s =~ m[ (?: ^ | > ) [^<]*? ( (?: [a-z]{2} +; )+ ) \1 [^>]*? (?: < | $ ) ]x; return $s; } ## Main program ## Extract the significant data my @seqs = map{ m[^> lst2flex_tset\s+([^;]+);] } grep{ m[^>] } <DATA>; #print Dumper \@seqs; ## Build a placeholder lookup table ## Using 'aa;', 'ab;' etc. allows for 676 individual elements ## Using 'aaa;' etc. 18525 and so on. ## The semicolon separator allows use to limit backtracking in the reg +ex my $placeholder = 'aa'; my %seqs; @seqs{ @seqs } = (); @seqs{ keys %seqs } = map{ $placeholder++ . ';' } 1 .. keys %seqs; ## Build a reverse lookup table for re-substitution once the ## compression has been performed. my %r_seqs; @r_seqs{ values %seqs } = keys %seqs; #print "Reverse substitution table\n", Dumper \%r_seqs; ## Build the string of placeholders. my $seq = join'', @seqs{ @seqs }; print "Placeholder sequence\n", $seq; ## Do the compression my $compressed = findRuns $seq; print "Compressed placeholder sequence\n", $compressed; ## Substitute back the original data into the compressed sequence ## Add som tabs and newlines in strategic places to aid verification. $compressed =~ s[( [a-z]{2} ; )][ "\t" . $r_seqs{ $1 } . "\n" ]xge; $compressed =~ s[(?=<)|(?<=:|>)][\n]g; print $compressed; __DATA__ // header opcode_mode=normal; import tset lst2flex_tset; vector ($tset,dbg0,dbg1,dclk,dfrm,dmiso,dmosi,dpin,dpout,dsclk,dscsn,g +pio0,gpio1,gpio2,gpio3,gpio4,gpio5,gpio6,gpio7,rst_n,vstate0,vstate1, +vstate2) { start_label start_vector: > lst2flex_tset L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1; // + 1 > lst2flex_tset L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1; // + 2 > lst2flex_tset L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1; // + 3 > lst2flex_tset L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1; // + 4 > lst2flex_tset L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1; // + 5 > lst2flex_tset L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1; // + 6 > lst2flex_tset L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1; // + 7 > lst2flex_tset L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1; // + 8 > lst2flex_tset L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1; // + 9 > lst2flex_tset L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1; // + 10 > lst2flex_tset L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1; // + 11 > lst2flex_tset L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1; // + 12 > lst2flex_tset L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1; // + 13 > lst2flex_tset L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1; // + 14 > lst2flex_tset L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1; // + 15 > lst2flex_tset L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1; // + 16 > lst2flex_tset L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1; // + 17 > lst2flex_tset L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1; // + 18 > lst2flex_tset L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1; // + 19 > lst2flex_tset L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1; // + 20 > lst2flex_tset L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1; // + 21 > lst2flex_tset L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1; // + 22 > lst2flex_tset L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1; // + 23 > lst2flex_tset L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1; // + 24 > lst2flex_tset L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1; // + 25 > lst2flex_tset L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1; // + 26 > lst2flex_tset L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1; // + 27 > lst2flex_tset L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1; // + 28 > lst2flex_tset L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1; // + 29 > lst2flex_tset L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1; // + 30 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 H 1 0 1 0; // + 31 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0; // + 32 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0; // + 33 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 34 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 35 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 36 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 37 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 38 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 39 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 40 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 41 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 42 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 43 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0; // + 44 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0; // + 45 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 46 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 47 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 48 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 49 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 50 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 51 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 52 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 53 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 54 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 55 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 56 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 57 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 58 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 59 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 60 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 61 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 62 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 63 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 64 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 65 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 66 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 67 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 68 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 69 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 70 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 71 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 72 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 73 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 74 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 75 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 76 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 77 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 78 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 79 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0; // + 80 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0; // + 81 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 82 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 83 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 84 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 85 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 86 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 87 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 88 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 89 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 90 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 91 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 92 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 93 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 94 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 95 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 96 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 97 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 98 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 99 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 100 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 101 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0; // + 102 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0; // + 103 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0; // + 104 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0; // + 105 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 H 1 0 1 0; // + 106 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 H 1 0 1 0; // + 107 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 108 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 109 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 H 1 0 1 0; // + 110 > lst2flex_tset L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1; // + 111 > lst2flex_tset L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 1 1 1; // + 112 > lst2flex_tset L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 1 1 1; // + 113 > lst2flex_tset L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1; // + 114 > lst2flex_tset L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1; // + 115 > lst2flex_tset L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1; // + 116 > lst2flex_tset L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1; // + 117 > lst2flex_tset L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1; // + 118 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 H 1 0 1 0; // + 119 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 120 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 121 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 122 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 123 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 124 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 125 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 126 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 127 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 128 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 129 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 H 1 0 1 0; // + 130 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 H 1 0 1 0; // + 131 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0; // + 132 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0; // + 133 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 134 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 135 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 136 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 137 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 138 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 139 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 140 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 141 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 142 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 143 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 144 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 145 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 146 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 147 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 148 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 149 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 150 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 151 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 152 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 153 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 154 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 155 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 156 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 157 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 158 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 159 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0; // + 160 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0; // + 161 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 162 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 163 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 164 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 165 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 H 1 0 1 0; // + 166 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 H 1 0 1 0; // + 167 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0; // + 168 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0; // + 169 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 170 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 171 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 172 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 173 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 174 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 175 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 176 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 177 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 178 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 179 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 180 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 181 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 182 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 183 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 184 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 185 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 186 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0; // + 187 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 H 1 0 1 0; // + 188 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 H 1 0 1 0; // + 189 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 1 H 1 0 1 0; // + 190 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 1 H 1 0 1 0; // + 191 > lst2flex_tset L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0; // + 192 > lst2flex_tset L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0; // + 193 halt > lst2flex_tset H 0 0 0 0 L 0 L L H L H L H L H L H 1 0 0 0; // + 194 }
As you can see below, the placeholder sequence is much shorter , and the regular nature of the placeholders means the regex engine has a much simpler task finding the sequences.
I haven't bothered to re-integrate the compressed sequences with the header and trailer information, as you haven't said what format would be used for the repeat sequences.
I've also injected some newlines and tabs to make visual verification easier.
P:\test>390718 Placeholder sequence ag;ac;ag;ac;ag;ac;ag;ac;ag;ac;ag;ac;ag;ac;ag;ac;ag;ac;ag;ac;aj;af;aj;a +f;aj;af;aj;af;aj;af;aa;ab;ak;ah;an;ah;an;ah;an;ah;an;ah;an;ab;ak;ah;a +n;ah;an;ah;an;ah;an;ah;an;ah;an;ah;an;ah;an;ah;an;ah;an;ah;an;ah;an;a +h;an;ah;an;ah;an;ah;an;ah;an;ab;ak;ah;an;ah;an;ah;an;ah;an;ah;an;ah;a +n;ah;an;ah;an;ah;an;ah;an;ab;ak;ab;ak;am;aa;ah;an;am;aj;ae;al;af;aj;a +f;aj;af;aa;ah;an;ah;an;ah;an;ah;an;ah;an;am;aa;ab;ak;ah;an;ah;an;ah;a +n;ah;an;ah;an;ah;an;ah;an;ah;an;ah;an;ah;an;ah;an;ah;an;ah;an;ab;ak;a +h;an;ah;an;am;aa;ab;ak;ah;an;ah;an;ah;an;ah;an;ah;an;ah;an;ah;an;ah;a +n;ah;an;am;aa;ad;ai;ab;ak; Compressed placeholder sequence <2:ag;ac;ag;ac;ag;ac;ag;ac;ag;ac;><2:aj;af;aj;af;>aj;af;aa;ab;ak;<2:ah +;an;ah;an;>ah;an;ab;ak;<2:ah;an;ah;an;ah;an;ah;an;ah;an;ah;an;ah;an;a +h;an;>ah;an;ab;ak;<2:ah;an;ah;an;ah;an;ah;an;ah;an;><2:ab;ak;>am;aa;a +h;an;am;aj;ae;al;<2:af;aj;>af;aa;<2:ah;an;><3:ah;an;>am;aa;ab;ak;<2:a +h;an;ah;an;ah;an;ah;an;ah;an;ah;an;>ah;an;ab;ak;<2:ah;an;>am;aa;ab;ak +;<2:ah;an;ah;an;ah;an;ah;an;>ah;an;am;aa;ad;ai;ab;ak; <2: L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1 L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1 L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1 L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1 L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1 L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1 L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1 L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1 L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1 L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 0 0 1 1 > <2: L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1 L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1 L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1 L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1 > L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1 L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 H 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0 <2: L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 > L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0 <2: L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 > L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0 <2: L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 > <2: L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0 > L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 H 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 H 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 H 1 0 1 0 L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1 L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 1 1 1 L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 1 1 1 <2: L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1 L 0 0 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1 > L 0 1 0 0 L 0 L L H 0 0 0 0 0 0 0 0 1 0 1 1 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 H 1 0 1 0 <2: L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 > <3: L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 > L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 H 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 H 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0 <2: L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 > L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0 <2: L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 > L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 H 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 H 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0 <2: L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 > L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 L 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 0 H 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 0 H 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 1 H 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 1 H 1 0 1 0 L 1 1 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0 L 1 0 0 0 L 0 L L H 0 L 0 L 0 L 1 L 1 0 1 0
|
|---|