in reply to Need help with subdividing SGML files
(Edited to reflect the code that actually DID work.)#!/usr/bin/perl -w #Purpose: To Take a DOS file wildcard and thus take all the matching c +ustom SGML files in the working directory and subdivide them into new + files whose names are the id's of the divs in those original files. use strict; print "Enter the name of a file containing the list of files you want +to work on.\n"; our $lines = ""; our @InFileNames; our @OutFileNames; our @OutFileExtensions; our @OutFileContent; my $i = 0; my $j = 0; my $k =0; my $TheFile = <STDIN>; chomp ($TheFile); #open file and get all text sub OpenFile { open(FILE, $_[0]) or $lines = ""; local $/ = undef; $lines = <FILE>; #remove blank lines $lines =~ s/\n{2}/\n/gms; close(FILE); } #add ¥ to closing div tags sub MarkClose { $lines =~ s/(<div type)/¥$1/gms; $lines =~ s/\A¥//gms; } #open output.txt for appending and write results to it sub FileAppend { my $Outfile = ">>" . $_[0] . "." . $_[1]; my $Content = $_[2]; open(FILE, $Outfile) or die "Can't open $Outfile.\n"; print FILE $Content; print FILE "\n"; close FILE; } #Create an array containing all file in the directory matching the glo +b. sub GetInFileList { my $FileDef = $_[0]; open (FILE, $FileDef) or die "That isn't a valid file, Wesley!"; local $/ = undef; $lines = <FILE>; #remove blank lines $lines =~ s/\n{2}/\n/gms; close(FILE); @InFileNames = split /\n/, $lines; #If the program can't give a list to Muhammed, than Muhammed will give + a list to the program. } #Populate an array with the contents of the id attribute of every <div +> tag in the input file. sub GetOutFilesList { $k = 0; @OutFileNames = $lines =~ m/<div type[^>]*>/gms; while ($k < (scalar(@OutFileNames))){ $OutFileNames[$k] =~ s/<div type="[^"]*" id="([^"]*)"[^>]*>/$1/gms +; $OutFileNames[$k] =~ s/\./_/gms; $k = $k + 1; } $k = 0; @OutFileExtensions = $lines =~ m/<div type[^>]*>/gms; while ($k < (scalar(@OutFileExtensions))){ $OutFileExtensions[$k] =~ s/<div[1-9]? type="([^"]*)" id="[^"]*"[^ +>]*>/$1/gms; $OutFileExtensions[$k] =~ s/\./_/gms; $k = $k + 1; } } #Subdivides the File into the subfiles. sub GetOutFileContent { my $LinesString = $_[0]; @OutFileContent = split /¥/, $LinesString; } ### Does the job &GetInFileList($TheFile); $i =0; while ($i < (scalar(@InFileNames))) { &OpenFile($InFileNames[$i]); &MarkClose(); &GetOutFilesList; &GetOutFileContent($lines); $j=0; while ($j < (scalar(@OutFileNames))){ &FileAppend($OutFileNames[$j], $OutFileExtensions[$j], $OutFil +eContent[$j]); $j = $j + 1; } $i = $i + 1; } #be nice and say it's done print "Program Finished\n";
|
|---|