############################################################################### # NAME: recurse_tree # DESCRIPTION: 1) Use recursion and the Tree::Nary module to define @small_arrays of # files and directories, as well as a @top_level_dir array # 2) Inputs to this function must be Dir Nodes, they are the root # of an N-ary tree, or root of some subtree of the N-ary tree. These # Nodes are Tree::Nary nodes. # 3) If the Dir Node repesents a dir from files.txt or dirs.txt that # has subdirecties, or that contains files, these elements will be # in a linked list to which the Dir Node will have a {children} # pointer. # 4) Such {children} lists will be all files, all directories, or a mix # of files and directories with any files appearing first (i.e. leftmost) # in the linked list. A Dir Node could also have an empty {children} # pointer. # 5) Nodes in this linked list are File and/or Dir Nodes. They are connected by # {next} pointers, the next pointer points to the Node to the left of the # current Node. Only the leftmost Node has a connection to the Parent Node # (it is pointed to the parent's {children} pointer. # 6) This function will test the input Dir Node to see if it is a base case or # or a recursive case, and of what type. # 7) If the input Dir Node has a {children} linked list, the function will traverse # the list and make similar tests on each Node in the list. # 8) The rightmost Node in a {children} linked list will have a {next} pointer # that points to NULL, and is used to terminate the traversal of the linked list. ############################################################################### sub recurse_tree { # Address of the Root of an N-ary tree, or root of some subtree # N-ary tree given this function as input. It MUST be a directory node. # This node is passed by REFERENCE so changes occurring in this function # will change global values. my $node_address = shift; # Create deep clone of subtree my $node_tmp = Storable::dclone($node_address); # Freeze this deep clone to protect it against unwanted changes # which may occur in function recurse_tree() my $node = Storable::freeze($node_tmp); ### my $node = $$node_address; my $debug = TRUE; my $tree_size = Tree::Nary->n_nodes($root_advanced, $Tree::Nary::TRAVERSE_ALL); print "ENTERING RECURSE_TREE: The tree has $tree_size nodes\n"; if ($debug) { print "Entering recurse_tree\n"; } # Defines maximum size of a directory @small_array use constant MAX_SIZE_DIR_GROUP => 5; # In addition to pointers to other nodes, an N-ary tree node has a {data} pointer, which is # a pointer to the user-defined variable or data structure which holds the node's data. my $node_data; # For this script, the {data} pointer is for a hash containing various information # on the Dir or File that the node represents my %data_hash; if (defined $node) { # Get address of node data hash $node_data = $node->{data}; # Convert this address into a physical hash %data_hash = %$node_data } else { print "ERROR in recurse_tree, Node was undefined\n"; exit 1; } my $call_recurse_tree = sub { my $node = shift; &recurse_tree(\$node); }; # ERROR: Input node cannot be a File Node, either the N-ary tree is # malformed or the script has malfunctioned if($data_hash{type} == FILE) { print "ERROR: In recurse_tree, Node $data_hash{file_name} is a File\n"; exit 1; } elsif ( !defined($data_hash{file_name}) && !defined($data_hash{dir_name}) ){ print "ERROR: Tree Nary error, this node has no data\n"; exit 1; } else { # Input node is type == DIR, and name of element is known, as expected # RECURSIVE CASE #1: Node needing merge has {dir_merge_count} over 20, # so it needs to be placed in @top_level_dirs (which will be # processed ahead of @small_arrays of dirs) Each of these @small_arrays # will be size 20 or less. Top level dir's {needs_merge} is flipped to FALSE. # Note: {children} pointer of the input Dir Node must == DIR, because if # it was FILE its {dir_merge_count} could be no more than 3. # Then, traverse the {children} list of this Node. if ( ($data_hash{needs_merge} == TRUE) && ($data_hash{dir_merge_count} > MAX_SIZE_DIR_GROUP) ){ if ($debug){ print "\nENTERING RECURSIVE CASE #1: The tree has $tree_size nodes\n"; print "Input Dir Node has name $data_hash{dir_name}\n"; print "Input Dir Node has needs_merge $data_hash{needs_merge}\n"; print "Input Dir Node has dir_merge_count $data_hash{dir_merge_count}\n"; } # Flip the Dir Node's {needs_merge} value to false $node->{data}->{needs_merge} = FALSE; # Add the input Dir Node to @top_level_dirs push(@top_level_dirs, $data_hash{dir_vep}); # PROCESS CHILDREN LIST: recurse_tree will be called on each child of the input Dir Node Tree::Nary->children_foreach($node, $Tree::Nary::TRAVERSE_ALL, $call_recurse_tree); $tree_size = Tree::Nary->n_nodes($root_advanced, $Tree::Nary::TRAVERSE_ALL); print "LEAVING RECURSIVE CASE #1: The tree has $tree_size nodes\n"; } # RECURSIVE CASE #2: A Dir Node that is NOT a merge candidate, but that # has subdirectories, must still be processed by recurse_tree(), otherwise there # would be no way to process elements that are beneath such a Dir Node. # Recursion will continue for such a directory whether or not it's child subdirectories # require a merge. elsif ( ($data_hash{needs_merge} == FALSE) && (defined($node->{children}->{data}->{type})) && ($node->{children}->{data}->{type} == DIR) ){ if ($debug){ print "\nENTERING RECURSIVE CASE #2: The tree has $tree_size nodes\n"; print "Input Dir Node has name $data_hash{dir_name}\n"; print "Input Dir Node has needs_merge $data_hash{needs_merge}\n"; print "Type of Dir Node children list is $node->{children}->{data}->{type}\n"; } # PROCESS CHILDREN LIST: recurse_tree will be called on each child of $node Tree::Nary->children_foreach($node, $Tree::Nary::TRAVERSE_ALL, $call_recurse_tree); $tree_size = Tree::Nary->n_nodes($root_advanced, $Tree::Nary::TRAVERSE_ALL); print "LEAVING RECURSIVE CASE #2: The tree has $tree_size nodes\n"; } # BASE CASE #1: Node needing merge has 0 < {dir_merge_count} <= 20, and has # a {children} list consisting of directories. # This Dir node is a merge candidate and is the parent of a subtree of # directories small enough to form a "safe subtree" that can be loaded to a # @small_array of directories. Any files encountered in this subtree # will be loaded into one or more file @small_arrays of files by find_subtree(). # find_subtree() will also put ALL merge_candidate dirs in this subtree into a single # @small_array for dirs. Note: since the input Dir Node is a merge_candidate it # will have a {dir_merge_count} of at least 1. elsif ( ($data_hash{needs_merge} == TRUE) && ($data_hash{dir_merge_count} <= MAX_SIZE_DIR_GROUP) && (defined($node->{children}->{data}->{type})) && ($node->{children}->{data}->{type} == DIR) ){ if ($debug){ print "\nENTERING BASE CASE #1: The tree has $tree_size nodes\n"; print "Input Dir Node has name $data_hash{dir_name}\n"; print "Input Dir Node has needs_merge $data_hash{needs_merge}\n"; print "Input Dir Node has dir_merge_count $data_hash{dir_merge_count}\n"; print "Type of Dir Node children list is $node->{children}->{data}->{type}\n"; } &find_subtree(\$node); $tree_size = Tree::Nary->n_nodes($root_advanced, $Tree::Nary::TRAVERSE_ALL); print "LEAVING BASE CASE #1: The tree has $tree_size nodes\n"; } # BASE CASE #2: Process a Dir Node, whether or not it is a merge candidate, that has files as children. # The find_subtree function will correctly deal with the children list, whether it is 100% files # or a mixture of files and directories. If the input Dir Node is a merge candidate, it will be # put into a directory @small_array of size 1. elsif ( (defined($node->{children}->{data}->{type})) && ($node->{children}->{data}->{type} == FILE) ){ if ($debug){ print "\nENTERING BASE CASE #2: The tree has $tree_size nodes\n"; print "Input Dir Node has name $data_hash{dir_name}\n"; print "Type of Dir Node children list is $node->{children}->{data}->{type}\n"; } &find_subtree(\$node); $tree_size = Tree::Nary->n_nodes($root_advanced, $Tree::Nary::TRAVERSE_ALL); print "LEAVING BASE CASE #2: The tree has $tree_size nodes\n" } # BASE CASE #3: Process a Dir Node that is a merge candidate and that has no children elsif ( ($data_hash{needs_merge} == TRUE) && !(defined($node->{children}->{data}->{type})) ){ if ($debug){ print "\nENTERING BASE CASE #3: The tree has $tree_size nodes\n"; print "Input Dir Node has name $data_hash{dir_name}\n"; print "This Dir Node has no children\n"; } &no_subtree(\$node); $tree_size = Tree::Nary->n_nodes($root_advanced, $Tree::Nary::TRAVERSE_ALL); print "LEAVING BASE CASE #3: The tree has $tree_size nodes\n" } else { # DEFAULT ERROR CASE print "\nERROR: In DEFAULT ERROR CASE, this Dir Node does not fit any existing base or recursive case\n"; print "Input Dir Node has name $data_hash{dir_name}\n"; print "Input Dir Node has needs_merge $data_hash{needs_merge}\n"; print "Input Dir Node has dir_merge_count $data_hash{dir_merge_count}\n"; print "Type of Dir Node children list is $node->{children}->{data}->{type}\n"; exit 1; } } # end type == DIR else }