#!perl use strict; #use warnings; use File::Spec::Functions ':ALL'; sub escapeSpecial($) { my $str=shift(@_); return(quotemeta($str)); } sub getEntityType($) { my $wholeStr=shift(@_); if(!$wholeStr || $wholeStr=~m{^\s*$}) { return('',''); } my $comparator='(\s*?<(\b\w+?\b:?\b\w+?\b).*?(?:\bname\b.*?=.*?(?: +\"|\')\b\w*?\b(?:\"|\'))?.*?>)'; if(!($wholeStr=~m{$comparator}s)) { return('',''); } return($1,$2); } sub getEntry($) { my $wholeStr=shift(@_); my $entityType=getEntityType($wholeStr); my $entry; my $comparator; my $tail; if(!$entityType) { return($wholeStr,'','','',''); } ($entry,$entityType)=getEntityType($wholeStr); $tail=substr($entry,length($entry)-2,2); if($tail eq '/>') { return getSimpleEntry($wholeStr,$entityType,$entry); } else { return getComplexEntry($wholeStr,$entityType,$entry); } } sub getComplexEntry($$$) { my $wholeStr=shift(@_); my $entityType=shift(@_); my $anyTag='.*?</?(?:[a-z_][a-z0-9.:_-]*?).*?>'; my $closeTag='</(?:xs:|.).*?>'; my $comparator; my $entityName; my $entityDefinition; my $resultStr; my $beginMatch; my $matchLen; my $chunk; my $theHead; my $theTail; my $matches=0; $comparator=$anyTag; if($entityType =~ m{(?:xs.*?|.*?)?:?import}) { $wholeStr=~m{($comparator)}sg; $entityDefinition=$1; $matchLen=pos($wholeStr); $beginMatch=0; if($wholeStr=~m{(?=$entityDefinition).}sg) { $beginMatch=pos($wholeStr)-1; } $resultStr=substr($wholeStr,0,$beginMatch).substr($wholeStr,$m +atchLen); $comparator='namespace.*?=.*?(?:\"|\')(.*?)(?:\"|\')'; $wholeStr=~m{$comparator}s; $entityName=$1; return($resultStr,$entityDefinition,$entityName,$entityType,'c +omplex'); } $chunk=$wholeStr; $chunk=~m{$anyTag}s; $beginMatch=$-[0]; $matchLen=$beginMatch; $comparator='\bname\b.*?=.*?(?:\"|\')(\b\w*?\b)(?:\"|\')'; $chunk=~m{$comparator}s; $entityName=$1; while($chunk=~m{($anyTag)}sg) { my $endOfMatch=$+[0]; my $tmp=$1; $tmp=~m{<}s; my $tmpBegin=$-[0]; $theHead=substr($tmp,$tmpBegin,2); $theTail=substr($tmp,length($tmp)-2); if($theHead ne '</') { if($theHead=~m{<.} and $theTail ne '/>'){$matches++}; } else{$matches--}; if(!$matches) { $closeTag='</'.$entityType.'.*?>'; $chunk=~m{$closeTag}s; $matchLen+=$+[0]; last; } $matchLen+=$endOfMatch; $chunk=substr($chunk,$endOfMatch); } $entityDefinition=substr($wholeStr,$beginMatch,$matchLen); $resultStr=substr($wholeStr,0,$beginMatch).substr($wholeStr,$begin +Match+$matchLen); return($resultStr,$entityDefinition,$entityName,$entityType,'compl +ex'); } sub getSimpleEntry($$$) { my $wholeStr=shift(@_); my $entityType=shift(@_); my $entityDefinition; my $comparator; my $entityName; my $resultStr; my $beginMatch; my $endMatch; $comparator='\s*?<'.$entityType.'.*?(?:\bname\b.*?=.*?(?:\"|\')(\b +\w*?\b)(?:\"|\'))?.*?/>'; if($wholeStr=~m{($comparator)}sg) { $entityDefinition=$1; $beginMatch=$-[0]; $endMatch=$+[0]; $comparator='\bname\b.*?=.*?(?:\"|\')(\b\w*?\b)(?:\"|\')'; if($wholeStr=~m{$comparator}s) { $entityName=$1; } $resultStr=substr($wholeStr,0,$beginMatch).substr($wholeStr,$e +ndMatch); return($resultStr,$entityDefinition,$entityName,$entityType,'s +imple'); } return($wholeStr,'','','',''); } my @myArgs=@ARGV; if(!$myArgs[0]) { my @myPath=splitpath(__FILE__); my $myName=$myPath[2]; print "\nUsage: ".$myName." file(s)\n\n"; print "Reorganizes xsd files to be more human readable. Puts any +header information\n"; print "first, then tries to determine a root entity and places it +next. Should more\n"; print "than one entity be a possible root entity, all are listed, +and if none are\n"; print "found, a comment to that effect is entered. Next, any rema +ining complex\n"; print "elements, then simple elements, then complex entities, and +simple entities.\n"; print "Last whatever is left (if any!;)\n\n"; print "The only requirement is a well-formed file. Qualification +for a root entity\n"; print "is that it not be referenced by any other entity.\n\n"; print "Results of the process are placed in a like named file with + the prefix of\n"; print "'ReorgXSD.\n\n"; print "Shortcomings:\n"; print "\tDoes not analyze DTD entries to determine proper settings + for\n"; print "\ttype/attribute/ref, specifically, prefixes. It merely as +sumes that\n"; print "\tany prefix ending in a colon (:) is valid an has been pro +vided for.\n\n"; print "\tCannot distinguish between relevance of entities if they +are named\n"; print "\talike.\n\n"; print "\tComments are not preserved as they cannot be meaningfully +\n"; print "\tinterpolated. There is no way to tell if a comment goes +with a\n"; print "\tspecific entity or group of entities or the whole file!:- +(\n\n"; print "\tThe concept of non-reference indicating a root element ca +n be\n"; print "\tnaive in the case of very complicated xsd files with impo +rts and\n"; print "\tother outside references.\n\n"; exit 0; } while(@myArgs) { my $inFileName=$myArgs[0]; my @pathMembers=splitpath($inFileName); my $outFileName=$pathMembers[0].$pathMembers[1]."ReorgXSD.".$pathMembe +rs[2]; my ($initFile,$fileHdr,$fileFtr); { local $/=undef; open(INITFILE,"< $inFileName") or die "Could not open initializati +on file '".$inFileName."': $!\n"; $initFile=<INITFILE>; close(INITFILE); } my $comparator; # normalize the parsing input: # remove comments (cannot meaningfully interpolate these) # remove the <?xml... and <xs:schema... header # remove the </xs:schema... footer # reduce newlines to single occurrences. $comparator='(^(?:<\?xml.*?>)*.*?<(?:xs.*?|.*:)schema\b.+?>)'; $initFile=~m{$comparator}s; $fileHdr=$1; $initFile=$'; $comparator='(^</(?:xs.?|.*):?\bschema\b.*?>)'; $initFile=~m{$comparator}ms; $fileFtr=$1; $initFile=$`; $comparator='\s*?<!--.*?-->'; $initFile=~s/$comparator//sg; $comparator='(\r\n|[\r\n]){2,}'; $initFile=~s/$comparator/$1/g; my @AllEntities; my @AllElements; my %UnReferenced; my %ComplexElements; my %SimpleElements; my @xsImports; my %ComplexEntities; my %SimpleEntities; my $fileIterator=$initFile; my $elmDef; my $elmName; my $elmType; my $tagType; ($fileIterator,$elmDef,$elmName,$elmType,$tagType)=getEntry($fileItera +tor); while($elmDef) { if($tagType eq 'complex') { if($elmType =~ m{(?:xs.*?|.*?):?element}) { %ComplexElements=(%ComplexElements,$elmName=>$elmDef); @AllEntities=(@AllEntities,$elmName); @AllElements=(@AllElements,$elmName); } elsif($elmType =~ m{(?:xs.*?|.*?):?import}) { @xsImports=(@xsImports,$elmDef); } elsif($elmName) { %ComplexEntities=(%ComplexEntities,$elmName=>$elmDef); @AllEntities=(@AllEntities,$elmName); } } else { if($elmType =~ m{(?:xs.*?|.*?):?element}) { %SimpleElements=(%SimpleElements,$elmName=>$elmDef); @AllEntities=(@AllEntities,$elmName); @AllElements=(@AllElements,$elmName); } elsif($elmType =~ m{(?:xs.*?|.*?):?import}) { @xsImports=(@xsImports,$elmDef); } elsif($elmName) { %SimpleEntities=(%SimpleEntities,$elmName=>$elmDef); @AllEntities=(@AllEntities,$elmName); } } ($fileIterator,$elmDef,$elmName,$elmType,$tagType)=getEntry($fileI +terator); } my $rootName=''; my @rootDefn; foreach my $entity (sort @AllEntities) { my $refd=0; foreach my $key (sort @AllElements) { next if $key eq $entity; if( ($ComplexEntities{$key}=~m{((?:\b\w+\b)(?<!name)\s*?=\s*?( +?:\"|\')(?:\w+?:)?\b$entity\b(?:\"|\'))}) || ($ComplexElements{$key}=~m{((?:\b\w+\b)(?<!name)\s*?=\s*?( +?:\"|\')(?:\w+?:)?\b$entity\b(?:\"|\'))}) || ($SimpleEntities{$key}=~m{((?:\b\w+\b)(?<!name)\s*?=\s*?(? +:\"|\')(?:\w+?:)?\b$entity\b(?:\"|\'))}) || ($SimpleElements{$key}=~m{((?:\b\w+\b)(?<!name)\s*?=\s*?(? +:\"|\')(?:\w+?:)?\b$entity\b(?:\"|\'))}) ) { $refd=1; last; } } if(!$refd) { %UnReferenced=(%UnReferenced,$entity=>1); } } foreach my $key (sort @AllElements) { if($UnReferenced{$key}) { if($ComplexElements{$key}) { @rootDefn=(@rootDefn,$ComplexElements{$key}); delete $ComplexElements{$key}; } elsif($SimpleElements{$key}) { @rootDefn=(@rootDefn,$SimpleElements{$key}); delete $SimpleElements{$key}; } elsif($ComplexEntities{$key}) { @rootDefn=(@rootDefn,$ComplexEntities{$key}); delete $ComplexEntities{$key}; } elsif($SimpleEntities{$key}) { @rootDefn=(@rootDefn,$SimpleEntities{$key}); delete $SimpleEntities{$key}; } } } open(OUTFILE,"> $outFileName") or die "Could not open output file $!\n +"; print OUTFILE $fileHdr."\n"; if(@xsImports) { print OUTFILE "\n<!-- imports (ReorgXSD) -->\n"; foreach my $entity (@xsImports) { print OUTFILE $entity."\n"; } } if(@rootDefn && @rootDefn == 1) { print OUTFILE "\n<!-- Root Element (ReorgXSD) -->\n"; print OUTFILE $rootDefn[0]."\n"; } elsif(@rootDefn) { my $numRoots=@rootDefn; print OUTFILE "\n<!-- No Root Element Found ($numRoots possible) ( +ReorgXSD) -->\n"; foreach my $defn (@rootDefn) { print OUTFILE $defn."\n"; } } else { print OUTFILE "\n<!-- No Root Element Found (ReorgXSD) -->\n"; } if(%ComplexElements) { print OUTFILE "\n<!-- Complex elements (ReorgXSD) -->\n"; foreach my $key (sort keys %ComplexElements) { print OUTFILE $ComplexElements{$key}."\n"; } } if(%SimpleElements) { print OUTFILE "\n<!-- Simple elements (ReorgXSD) -->\n"; foreach my $key (sort keys %SimpleElements) { print OUTFILE $SimpleElements{$key}."\n"; } } if(%ComplexEntities) { print OUTFILE "\n<!-- other named complex entities (ReorgXSD) -->\ +n"; foreach my $key (sort keys %ComplexEntities) { print OUTFILE $ComplexEntities{$key}."\n"; } } if(%SimpleEntities) { print OUTFILE "\n<!-- other named simple entities (ReorgXSD) -->\n +"; foreach my $key (sort keys %SimpleEntities) { print OUTFILE $SimpleEntities{$key}."\n"; } } if($fileIterator and $fileIterator !~ m{^\s*$}) { print OUTFILE "\n<!-- misc entities (ReorgXSD) -->\n"; print OUTFILE $fileIterator; } print OUTFILE "\n"; print OUTFILE $fileFtr; print OUTFILE "\n"; close(OUTFILE); print "Results are in file '".$outFileName."'\n"; shift(@myArgs); }
In reply to ReorgXSD.pl by dsilvia
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |