in reply to Algorithm inspiration required.
This is my take on solving this (if I understand the spec right that is) using a finite state automaton to look for a specific arbitrary-length sequence of characters from a stream. It does not need to store any data at all except the string to look for and an array of bits of same length.
As an example the input stream is a seemingly infinite sequence of random numbers between 0 and 9.The sequence to detect is '0,1,2,3,4,5'.
For a seed of '123' this is encountered on the 151436th rand() call!
./detector.pl
#!/usr/bin/env perl #### # detects a sequence of ascii characters # unicode can be accommodated with minimal effort. # The heart of the detector is a a chain of states # who know how to read a symbol from the stream, # check if same as required, trigger a response if it is # propagate the symbol to the next state. # there are as many states as the length of the sequence # to detect # # Date: 18/06/2018 # Author: bliako # re : https://perlmonks.org/?node_id=1216847 #### use strict; use warnings; use constant DEBUG => 0; srand 123; my @to_detect = split(//, '012345'); ## # nothing to change below ## my $N = scalar(@to_detect); my @buffer_detect = (0) x $N; my $ref_buffer_detect = \@buffer_detect; my ($i); my $state = undef; my $next_state = undef; my $previous_state = undef; for($i=0;$i<$N;$i++){ $next_state = $state; $state = { # specify a state with its own data hash and a few sub +s for its operation 'init' => sub { # called first upon creation and resets the st +ate etc. my ($self, $params) = @_; if( DEBUG ){ print "init() : initialising $sel +f...\n"; } $self->{'data'} = { 'self' => $self, 'buffer-bit' => \$buffer_detect[$i], 'char-to-detect' => $to_detect[$i], 'next-state' => $next_state, 'previous-state' => $previous_state, 'current-symbol' => undef }; }, 'enter' => sub { # called when a new symbol from the stream is +available # calling enter() on the first state, will tri +gger a ripple # of enter()s through all the states my ($self, $newsymbol) = @_; if( DEBUG ){ print "enter() : self=$self, news +ymbol=$newsymbol\n"; } if( defined($self->{'data'}->{'current-symbol' +}) ){ $self->{'propagate'}->($self); } $self->{'data'}->{'current-symbol'} = $newsymb +ol; $self->{'check'}->($self); }, 'check' => sub { # check if current symbol is what we are looki +ng for at this particular state my ($self) = @_; my $d = $self->{'data'}; if( DEBUG ){ print "check() : checking if '".$ +d->{'current-symbol'}."' is the same as '".$d->{'char-to-detect'}."' +... "; } if( $d->{'current-symbol'} eq $d->{'char-to-detect'} ){ if( DEBUG ){ print "yes\n"; } ${$d->{'buffer-bit'}} = 1 } else { if( DEBUG ){ print "no\n"; } ${$d->{'buffer-bit'}} = 0 } }, 'propagate' => sub { # propagate our current symbol to the next sta +te my ($self) = @_; if( DEBUG ){ print "propagate() : self=$self\n +"; } my $d = $self->{'data'}; my $n = $d->{'next-state'}; if( defined($n) ){ if( DEBUG ){ print "propagate() : next + state exists : $n and its self is ".$n->{'data'}->{'self'}." and my +symbol is ".$d->{'current-symbol'}."\n"; } $n->{'enter'}->( $n->{'data'}->{'self'}, $d->{'current-symbol'} ); } }, }; print "i=$i, state created $state\n"; # initialise created state $state->{'init'}->($state); $previous_state = $state; }; my $FSM = $state; my $ch; for(0..50000000){ # our stream is a random ascii char generator # my $symbol = chr(32+int(rand(125-32))); # or random numbers my $symbol = int(rand(10)); # all we need to do is to enter() the symbol in the first stat +e # it will be ripple through all states $FSM->{'enter'}->($FSM, $symbol); # at the end, check the buffer for 1's $ch = 100.0 * check_buffer($ref_buffer_detect, $N); print "$0 : $_ : symbol '$symbol', buffer: ".join('', @buffer_ +detect)." : $ch %\n"; if( $ch == 100 ){ print "$0 : sequence detected, stopping here +.\n"; last } } # takes a reference to a buffer of 1 and 0's and counts the proportion + of 1s # over length. For efficiency supply ref to buffer and length # returns num(1) / total_length # sub check_buffer { my $buf = $_[0]; # the detection buffer as array ref my $l = $_[1]; # length of buffer (so that we don't find it he +re) my $i = 0; foreach(@$buf){ $i++ if $_ == 1; } return $i / $l; } 1;
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: Algorithm inspiration required.
by BrowserUk (Patriarch) on Jun 18, 2018 at 21:11 UTC | |
by bliako (Abbot) on Jun 19, 2018 at 08:26 UTC |