Beefy Boxes and Bandwidth Generously Provided by pair Networks
Just another Perl shrine
 
PerlMonks  

Screwed tree scanning

by Maze (Sexton)
on Sep 30, 2006 at 12:22 UTC ( [id://575666]=perlquestion: print w/replies, xml ) Need Help??

Maze has asked for the wisdom of the Perl Monks concerning the following question:

This has been a source of so much frustration, what I orginally thought would be a fun little project has turned out to be a bloated ugly annoyingly defunct task, it's supposed to scan a directory tree and return it as hash so that it can be outputted as XML to be used by the rest of a Content Management system, unfortunately perl isn't behaving as Iexpected it to

I'm considering just rewriting the whole things as it seems to have gone haywire - the following code makes no promises in terms of beauty, it's still got my scruffy commenting all over the place

oh yes, and did I mention, it doesn't work, at all - some basic functionality if you uncomment where I say though

#!/usr/bin/perl #this is free software, you can distribute it under the same terms as +perl itself # scanfiletree pre alpha #by Christopher Monahan # scans a filetree as defined by a set of parameters passed to it # and returns it in the preferred format eg: XML use warnings; use strict; # use XML::Simple; use Cwd; use Data::Dumper; #beginning of variable declarations local $::path; #scalar for the path when in string for +m local @::path; #array for the path when in list form local @::direntry; #array for holding the directory entries +of the directory being scanned # local %::params; #basic parameters passed to scantree +via arguments and the configuration local $::root; #the virtual root to start scanning fro +m local $::troot; #the true root to start scanning from local $::outputfile; #the name of the file that will receive + the fatefull treedata # local %::handler_self_params; #parameters with some extrapolation i +nvolved destined for controlling the handler subroutines # local $::mainflag; #the main argument passed to scantree # local $::regdir[0] = 1; #the value for the register sub local %::treehash; #the hash the tree will be stored in local $::status; #here to stop things if things go sour, + or tell a particular stage to implement advanced mechanisms # end of variable declarations #begin sub definitions sub scan { #for scanning arrays in general my @hunt = split (/&/, $_[0]); if ($#hunt ne 0){ my $count = 0; my %founda; my $length = 0; while ($count != @hunt){ $founda{$count} = grep {/$hunt[$count]/} @_; $length = $length + length($founda{$count}); $count++; } return $length; #? will this work ? } else{ my @foundb = 0; @foundb = grep {/$_[0]/} @_; return $#foundb; } } ### eatdir section # sub handle_eatdir_params { # if (defined (%::handler_self_params{"eatdir"}{"split"})) { # @::path = split (/\//, $_[0]) # } # if (defined (%::handler_self_params{"eatdir"}{"register"})){ # my $count = 0; # while ($count ne $#path){ # if ($count == 0){ # register; # $count++ # } # } # if (%::handler_self_params {"eatdir"} {"register"}){ # return @path # } # else { # return $_[0] # } # } sub eatdir { my $seendir = $_[0]; my @direntries; my $direntry; # my @seendir = handle_eatdir_params(@_); # my @seendir = @_; # unless ($#seendir eq 0) {my $seendir = join ("/", @seendir)} # else {my $seendir = $seendir[0]} opendir SEENDIR, $seendir; @direntries = readdir SEENDIR; closedir SEENDIR; $direntries[0] = "pie"; $direntries[1] = "pie"; foreach $direntry (@direntries){ if (-d "$seendir/$direntry"){ eatdir ("$seendir/$direntry") } } my $direntries = join ( ':' , @direntries); handle_direntries("${seendir}:${direntries}"); } ### direntry subs ### sub handle_direntries{ &::handle_direntries_params($_[0]); &::handle_direntries_main; } sub handle_direntries_params { ($::path, @::direntry) = split (/:/, $_[0]); @::path = split (/\//, $::path); # more argument parsing for options here later } sub handle_direntries_main{ my $count = 0; local %::pathline; while ($count ne $#::path){ $::pathline{"$::path[$count]:$count"} = {}; $count ++; } $count = 0; while ($count ne $#::direntry){ unless (-d "$::path/$::direntry[$count]"){ $::pathline{"$::path[$#::path]:$#::path"}{"$::direntry[$c +ount]"} = $::FileLabel; } $count ++; } $count = $#::path; while ($count gt 0){ my $lcount = $count - 1; $::pathline{"$::path[$lcount]:$lcount"}{"$::path[$count]"} = $ +::pathline{"$::path[$count]"}; delete $::pathline{"$::path[$count]"}; $count --; } local $::mergecount; local $::mergepoint; &handle_direntries_merge; } sub mergepoint{ if (defined $_[0]){ my $val = join ('{', $::mergepoint,$_[0]); #} $val = "$val}"; $::mergepoint = $val; return $val; } else{ return ' '; } } sub handle_direntries_merge{ no strict 'refs'; # my $merge = $_[0]; # my $treeref = $_[1]; if (defined $_[0]){ my $mergepoint = mergepoint($_[0]); } else { my $mergepoint = mergepoint; } my @mergekeys = keys %::pathline$mergepoint}; my $mergekey; foreach ($mergekey, @mergekeys){ if (exists %::treehash$mergepoint{$mergekey}){ $::mergecount++; handle_direntries_merge($mergekey); $::mergecount - 1; } else { $::treehash$mergepoint{$mergekey} = $::pathline$mergepoint +{$mergekey}; } } } #here should lie some code for taking the hash of a particular pat +hline, and merging it in with the main treehash #at the moment just dump for diagnostic purposes - comment out the + above of the and uncomment the following line to get some tacky fun +ctionality # print Dumper(%{$_[0]}); # sub handle_direntries_main_alt{ # my $count = 1; # my $treeref = \%::treehash; # while ($count != $#::path){ # $treeref = \%$treeref->{$::path[$count]}; # $count++; # next if $count == $#::path; # unless (defined($treeref->{$::path[$count]})) { # $treeref->{$::path[$count]} = {"$::DirLabel" => $::path[$cou +nt]} # } # $count = 0; # while ($count != $#::direntries){ # $treeref -> {$::path[$count]}{$::direntry[$count]} = "$::Fil +eLabel" unless $treeref -> {$::path[$count]}{$::direntry[$count]} = " +$::DirLabel"; # } # } # } # sub register { #sub for keeping track of directories: eg - making su +re we are not in a recursive symlink loop, could use inode numbers he +re # if(){ # $::regdir[$::regdir[0]]="first $main::regdir[$::regdir[0]]" # } ### stage subs ### sub begin{ print "Ready to scan $::troot and deposit the resulting XML into $ +$::outputfile \n shall I proceed? (y/n)\n"; my $confirm = <STDIN>; if ($confirm =~ /y|Y/){ chroot($::troot); &eatdir($::troot); } else{ die "it was a pleasure anyway \n"; $::status = 0; } %::treehash = ("begin","+","root","/$::root"); } sub handle_main_params{ #{ # this bits a stub # $::mainflag = \$ARGV[0] unless defined(@_); # # if ($::mainflag =~ /-x|--routine/){ # # ## routine scheduling bit # # } # elsif ($::mainflag eq "-c") { # ##configuration bit # } # else { $::root = \$ARGV[0]; $::outputfile = \$ARGV[1]; my $cd = cwd; unless ($::root =~ /[^\/]/){ $::troot = "$cd\/$$::root"; } else { $::troot = $$::root; } } sub finish{#again another stub print Dumper(%::treehash); } # end of subroutine definitions #main $::FileLabel = "File"; # $::DirLabel = '+'; handle_main_params (); begin(); finish(); print 'we have reached the end 2 \n'; ## end of main h

Moved to SoPW by planetscape - and added some rudimentary formatting

( keep:0 edit:24 reap:0 )

Replies are listed 'Best First'.
Re: Screwed tree scanning
by jdporter (Paladin) on Sep 30, 2006 at 14:38 UTC

    Ah - something like this:

    use File::Find; use XML::Simple; use strict; use warnings; my $root = shift or die "Usage: $0 rootdir\n"; my %tree; find( sub { #print "$File::Find::name\n"; my @path = split m,/,, $File::Find::name; my $tree = \%tree; $tree = (($tree->{'dir'} ||= {})->{shift @path} ||= {}) while @path > ( -d _ ? 0 : 1 ); @path and # it's a file push @{ $tree->{'file'} ||= [] }, shift @path; }, $root ); print XMLout( \%tree );
    We're building the house of the future together.

      yes something like that

      I think my posted code is a prima donna example of what can happen if you throw yourself into a task without first checking to see if it's already been done

      also your code shows me just how much I have yet to learn about perl, it uses syntatic magic I never knew was possible

Re: Screwed tree scanning
by McDarren (Abbot) on Sep 30, 2006 at 12:53 UTC
    # sub register { #sub for keeping track of directories: eg - making su +re we are not in a recursive symlink loop, could use inode numbers he +re

    Actually, what I suspect you _really_ need is File::Find. Check it out - I urge you ;)

      Yes; also Getopt::Long. And probably one of the various config file management modules.

      We're building the house of the future together.

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: perlquestion [id://575666]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others avoiding work at the Monastery: (4)
As of 2024-04-25 15:57 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found