in reply to Finding patterns in packet data?
Coding it was not as easy as I hoped. :-(
use strict; # Takes the minimum count you are interested in and an array of string +s, # returns the list of patterns of the maximum size that can be found i +n # at least that threshold of the strings, sorted by number of strings. # sub find_max_pat { my $min = shift; # Sanity check return () unless $min <= scalar (@_); # %active_pats are the counts of the size of remaining active patt +erns my %active_pats = ('', scalar (@_)); # @searches contain search information for all strings my @searches = map {new Search::Strings($_)} @_; my %last_pats = (); while (%active_pats) { %last_pats = %active_pats; %active_pats = (); # Get counts at the next length foreach my $search (@searches) { foreach my $pat ($search->inc(\%last_pats)) { ++$active_pats{$pat}; } } # Remove patterns below our threshold foreach my $pat (keys %active_pats) { delete $active_pats{$pat} unless ($min <= $active_pats{$pa +t}); } } return keys %last_pats; } package Search::Strings; use strict; sub inc { my $self = shift; my $is_cont = shift; my $str = $self->{str}; my $len = length($str); my $pat_len = ++$self->{pat_len}; my %cur_pat; foreach my $pat (keys %{$self->{pats}}) { next unless exists $is_cont->{$pat}; my $spots = $self->{pats}{$pat}; if ($len < $pat_len + $spots->[-1]) { # This pattern reached the end of the string pop @$spots; } foreach my $spot (@$spots) { push @{$cur_pat{ substr($str, $spot, $pat_len) }}, $spot; } } $self->{pats} = \%cur_pat; return keys %cur_pat; } sub new { my $class = shift; my $self = {}; $self->{str} = shift; $self->{pat_len} = 0; my %pats = ('', [0..(length($self->{str}) - 1)]); $self->{pats} = \%pats; return bless $self, $class; }
|
|---|