comment on

This has been a source of so much frustration, what I orginally thought would be a fun little project has turned out to be a bloated ugly annoyingly defunct task, it's supposed to scan a directory tree and return it as hash so that it can be outputted as XML to be used by the rest of a Content Management system, unfortunately perl isn't behaving as Iexpected it to

I'm considering just rewriting the whole things as it seems to have gone haywire - the following code makes no promises in terms of beauty, it's still got my scruffy commenting all over the place

oh yes, and did I mention, it doesn't work, at all - some basic functionality if you uncomment where I say though

#!/usr/bin/perl
#this is free software, you can distribute it under the same terms as 
+perl itself
# scanfiletree pre alpha
#by Christopher Monahan 
# scans a filetree as defined by a set of parameters passed to it 
# and returns it in the preferred format  eg: XML

use warnings;
use strict;
# use XML::Simple;
use Cwd;
use Data::Dumper;
 
#beginning of variable declarations
local $::path;                 #scalar for the path when in string for
+m
local @::path;                 #array for the path when in list form
local @::direntry;           #array for holding the directory entries 
+of the directory being scanned 
# local %::params;               #basic parameters passed to scantree 
+via arguments and the configuration
local $::root;                 #the virtual root to start scanning fro
+m
local $::troot;                #the true root to start scanning from
local $::outputfile;           #the name of the file that will receive
+ the fatefull treedata
# local %::handler_self_params;  #parameters with some extrapolation i
+nvolved destined for controlling the handler subroutines
# local $::mainflag;             #the main argument passed to scantree
# local $::regdir[0] = 1;        #the value for the register sub
local %::treehash;             #the hash the tree will be stored in
local $::status;               #here to stop things if things go sour,
+ or tell a particular stage to implement advanced mechanisms
# end of variable declarations

#begin sub definitions

sub scan { #for scanning arrays in general
    my @hunt = split (/&/, $_[0]);
    if ($#hunt ne 0){
        my $count = 0;
        my %founda;
        my $length = 0;
        while ($count != @hunt){
            $founda{$count} = grep {/$hunt[$count]/} @_;
            $length = $length + length($founda{$count});
            $count++;
            }
    return $length; #? will this work ?
    }
    else{
        my @foundb = 0;
            @foundb = grep {/$_[0]/} @_;
        return $#foundb;
    }
}


### eatdir section

# sub handle_eatdir_params {
#      if (defined (%::handler_self_params{"eatdir"}{"split"})) {
#          @::path = split (/\//, $_[0])
#      } 
#     if (defined (%::handler_self_params{"eatdir"}{"register"})){
#         my $count = 0;
#         while ($count ne $#path){
#             if ($count == 0){
#             register;
#             $count++
#         }
#     }

#     if (%::handler_self_params {"eatdir"} {"register"}){
#         return @path
#     }
#     else {
#         return $_[0]
#     }
# }

sub eatdir {
    my $seendir = $_[0];
    my @direntries;
    my $direntry;
#     my @seendir = handle_eatdir_params(@_);
#     my @seendir = @_;
#     unless ($#seendir eq 0) {my $seendir = join ("/", @seendir)}
#     else {my $seendir = $seendir[0]}

    opendir SEENDIR, $seendir;
    @direntries = readdir SEENDIR;
    closedir SEENDIR;
    $direntries[0] = "pie";
    $direntries[1] = "pie";
    foreach $direntry (@direntries){
        if (-d "$seendir/$direntry"){
            eatdir ("$seendir/$direntry")
        }
    }
    my $direntries = join ( ':' , @direntries);
    handle_direntries("${seendir}:${direntries}");
}

### direntry subs ###
sub handle_direntries{
    &::handle_direntries_params($_[0]);
    &::handle_direntries_main;
}

sub handle_direntries_params {
    ($::path, @::direntry) = split (/:/, $_[0]);
    @::path = split (/\//, $::path);
    # more argument parsing for options here later
}
    
sub handle_direntries_main{
    my $count = 0;
    local %::pathline;
    while ($count ne $#::path){
        $::pathline{"$::path[$count]:$count"} = {};
        $count ++;
    }
    $count = 0;
    while ($count ne $#::direntry){
        unless (-d "$::path/$::direntry[$count]"){
             $::pathline{"$::path[$#::path]:$#::path"}{"$::direntry[$c
+ount]"} = $::FileLabel;
         }
        $count ++;
     }
    $count = $#::path;
    while ($count gt 0){
        my $lcount = $count - 1;
        $::pathline{"$::path[$lcount]:$lcount"}{"$::path[$count]"} = $
+::pathline{"$::path[$count]"};
        delete $::pathline{"$::path[$count]"};
        $count --;
    }
    local $::mergecount;
    local $::mergepoint;
    &handle_direntries_merge;
}

sub mergepoint{
    if (defined $_[0]){
        my $val = join ('{', $::mergepoint,$_[0]); #}
        $val = "$val}";
        $::mergepoint = $val;
        return $val;
    }
    else{
        return ' ';
    }
}
    

sub handle_direntries_merge{
    no strict 'refs';
#     my $merge = $_[0];
#     my $treeref = $_[1];
    if (defined $_[0]){
        my $mergepoint = mergepoint($_[0]);
    }
    else {
        my $mergepoint = mergepoint;
    }
    my @mergekeys = keys %::pathline$mergepoint};
    my $mergekey;
    foreach ($mergekey, @mergekeys){
        if (exists %::treehash$mergepoint{$mergekey}){
            $::mergecount++;
            handle_direntries_merge($mergekey);
            $::mergecount - 1;
        }
        else {
            $::treehash$mergepoint{$mergekey} = $::pathline$mergepoint
+{$mergekey};
        }
    }
}

    #here should lie some code for taking the hash of a particular pat
+hline, and merging it in with the main treehash        
    #at the moment just dump for diagnostic purposes - comment out the
+ above of the  and uncomment the following line to get some tacky fun
+ctionality
#     print Dumper(%{$_[0]});


# sub handle_direntries_main_alt{
#     my $count = 1;
#     my $treeref = \%::treehash;
#     while ($count != $#::path){
#         $treeref = \%$treeref->{$::path[$count]};
#         $count++;
#         next if $count == $#::path;
#         unless (defined($treeref->{$::path[$count]})) {
#         $treeref->{$::path[$count]} = {"$::DirLabel" => $::path[$cou
+nt]}
#         }

#     $count = 0;
#     while ($count != $#::direntries){
#         $treeref -> {$::path[$count]}{$::direntry[$count]} = "$::Fil
+eLabel" unless $treeref -> {$::path[$count]}{$::direntry[$count]} = "
+$::DirLabel";
#     } 
# }
# }

# sub register { #sub for keeping track of directories: eg - making su
+re we are not in a recursive symlink loop, could use inode numbers he
+re
#     if(){
#         $::regdir[$::regdir[0]]="first $main::regdir[$::regdir[0]]"
#     }
### stage subs ###

sub begin{
    print "Ready to scan $::troot and deposit the resulting XML into $
+$::outputfile \n shall I proceed? (y/n)\n";
    my $confirm = <STDIN>;
    if ($confirm =~ /y|Y/){
        chroot($::troot);
        &eatdir($::troot);
    }
    else{
        die "it was a pleasure anyway \n";
        $::status = 0;
    }
    %::treehash = ("begin","+","root","/$::root");
}



sub handle_main_params{ #{ # this bits a stub
#     $::mainflag = \$ARGV[0] unless defined(@_);
# #     if ($::mainflag =~ /-x|--routine/){
# #         ## routine scheduling bit
# #     }
#     elsif ($::mainflag eq "-c") {
#         ##configuration bit
#     }
#     else {
        $::root = \$ARGV[0];
        $::outputfile = \$ARGV[1];
        my $cd = cwd;
        unless ($::root =~ /[^\/]/){
             $::troot = "$cd\/$$::root";
         }
        else {
            $::troot = $$::root;
         }
}

sub finish{#again another stub
    print Dumper(%::treehash);
}

# end of subroutine definitions

#main

$::FileLabel = "File";
# $::DirLabel = '+';

handle_main_params ();

begin();

finish();

print 'we have reached the end 2 \n';

## end of main

h
[download]

Moved to SoPW by planetscape - and added some rudimentary formatting

( keep:0 edit:24 reap:0 )

In reply to Screwed tree scanning by Maze

Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!

Titles consisting of a single word are discouraged, and in most cases are disallowed outright.

Read Where should I post X? if you're not absolutely sure you're posting in the right place.

Please read these before you post! —

Posts may use any of the Perl Monks Approved HTML tags:

a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr

You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)

	For:		Use:
	&		`&`
	<		`<`
	>		`>`
	[		`[`
	]		`]`

Link using PerlMonks shortcuts! What shortcuts can I use for linking?

See Writeup Formatting Tips and other pages linked from there for more info.