I'd be inclined to use a hash containing pairs of counts and following word hashes. The following works for generating data sets of arbitary orders:
use strict; use warnings; use Data::Dump::Streamer; use constant ORDER => 3; my %chains; while (<DATA>) { my @words = split; my @chain; for (@words) { push @chain, $_; next if @chain < ORDER; shift @chain if @chain > ORDER; my $root; for my $index (0 .. ORDER - 1) { if (defined $root) { ++$root->[1]{$chain[$index]}[0]; $root = $root->[1]{$chain[$index]}; } else { ++$chains{$chain[$index]}[0]; $root = $chains{$chain[$index]}; } } } } Dump (\%chains); __DATA__ I will not eat them in a bar. I will not eat them in a car. I will not eat them Sam I am. I will not eat green eggs and ham!
Prints:
$HASH1 = { eat => [ 4, { green => [ 1, { eggs => [ 1 ] } ], them => [ 3, { in => [ 2 ], Sam => [ 1 ] } ] } ], eggs => [ 1, { and => [ 1, { "ham!" => [ 1 ] } ] } ], green => [ 1, { eggs => [ 1, { and => [ 1 ] } ] } ], I => [ 4, { will => [ 4, { not => [ 4 ] } ] } ], in => [ 2, { a => [ 2, { "bar." => [ 1 ], "car." => [ 1 ] } ] } ], not => [ 4, { eat => [ 4, { green => [ 1 ], them => [ 3 ] } ] } ], Sam => [ 1, { I => [ 1, { "am." => [ 1 ] } ] } ], them => [ 3, { in => [ 2, { a => [ 2 ] } ], Sam => [ 1, { I => [ 1 ] } ] } ], will => [ 4, { not => [ 4, { eat => [ 4 ] } ] } ] };
In reply to Re: Trying to select the best data structure
by GrandFather
in thread Trying to select the best data structure
by chinamox
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |