#!perl use strict; #use warnings; use File::Spec::Functions ':ALL'; sub escapeSpecial($) { my $str=shift(@_); return(quotemeta($str)); } sub getEntityType($) { my $wholeStr=shift(@_); if(!$wholeStr || $wholeStr=~m{^\s*$}) { return('',''); } my $comparator='(\s*?<(\b\w+?\b:?\b\w+?\b).*?(?:\bname\b.*?=.*?(?:\"|\')\b\w*?\b(?:\"|\'))?.*?>)'; if(!($wholeStr=~m{$comparator}s)) { return('',''); } return($1,$2); } sub getEntry($) { my $wholeStr=shift(@_); my $entityType=getEntityType($wholeStr); my $entry; my $comparator; my $tail; if(!$entityType) { return($wholeStr,'','','',''); } ($entry,$entityType)=getEntityType($wholeStr); $tail=substr($entry,length($entry)-2,2); if($tail eq '/>') { return getSimpleEntry($wholeStr,$entityType,$entry); } else { return getComplexEntry($wholeStr,$entityType,$entry); } } sub getComplexEntry($$$) { my $wholeStr=shift(@_); my $entityType=shift(@_); my $anyTag='.*?'; my $closeTag=''; my $comparator; my $entityName; my $entityDefinition; my $resultStr; my $beginMatch; my $matchLen; my $chunk; my $theHead; my $theTail; my $matches=0; $comparator=$anyTag; if($entityType =~ m{(?:xs.*?|.*?)?:?import}) { $wholeStr=~m{($comparator)}sg; $entityDefinition=$1; $matchLen=pos($wholeStr); $beginMatch=0; if($wholeStr=~m{(?=$entityDefinition).}sg) { $beginMatch=pos($wholeStr)-1; } $resultStr=substr($wholeStr,0,$beginMatch).substr($wholeStr,$matchLen); $comparator='namespace.*?=.*?(?:\"|\')(.*?)(?:\"|\')'; $wholeStr=~m{$comparator}s; $entityName=$1; return($resultStr,$entityDefinition,$entityName,$entityType,'complex'); } $chunk=$wholeStr; $chunk=~m{$anyTag}s; $beginMatch=$-[0]; $matchLen=$beginMatch; $comparator='\bname\b.*?=.*?(?:\"|\')(\b\w*?\b)(?:\"|\')'; $chunk=~m{$comparator}s; $entityName=$1; while($chunk=~m{($anyTag)}sg) { my $endOfMatch=$+[0]; my $tmp=$1; $tmp=~m{<}s; my $tmpBegin=$-[0]; $theHead=substr($tmp,$tmpBegin,2); $theTail=substr($tmp,length($tmp)-2); if($theHead ne ''){$matches++}; } else{$matches--}; if(!$matches) { $closeTag=''; $chunk=~m{$closeTag}s; $matchLen+=$+[0]; last; } $matchLen+=$endOfMatch; $chunk=substr($chunk,$endOfMatch); } $entityDefinition=substr($wholeStr,$beginMatch,$matchLen); $resultStr=substr($wholeStr,0,$beginMatch).substr($wholeStr,$beginMatch+$matchLen); return($resultStr,$entityDefinition,$entityName,$entityType,'complex'); } sub getSimpleEntry($$$) { my $wholeStr=shift(@_); my $entityType=shift(@_); my $entityDefinition; my $comparator; my $entityName; my $resultStr; my $beginMatch; my $endMatch; $comparator='\s*?<'.$entityType.'.*?(?:\bname\b.*?=.*?(?:\"|\')(\b\w*?\b)(?:\"|\'))?.*?/>'; if($wholeStr=~m{($comparator)}sg) { $entityDefinition=$1; $beginMatch=$-[0]; $endMatch=$+[0]; $comparator='\bname\b.*?=.*?(?:\"|\')(\b\w*?\b)(?:\"|\')'; if($wholeStr=~m{$comparator}s) { $entityName=$1; } $resultStr=substr($wholeStr,0,$beginMatch).substr($wholeStr,$endMatch); return($resultStr,$entityDefinition,$entityName,$entityType,'simple'); } return($wholeStr,'','','',''); } my @myArgs=@ARGV; if(!$myArgs[0]) { my @myPath=splitpath(__FILE__); my $myName=$myPath[2]; print "\nUsage: ".$myName." file(s)\n\n"; print "Reorganizes xsd files to be more human readable. Puts any header information\n"; print "first, then tries to determine a root entity and places it next. Should more\n"; print "than one entity be a possible root entity, all are listed, and if none are\n"; print "found, a comment to that effect is entered. Next, any remaining complex\n"; print "elements, then simple elements, then complex entities, and simple entities.\n"; print "Last whatever is left (if any!;)\n\n"; print "The only requirement is a well-formed file. Qualification for a root entity\n"; print "is that it not be referenced by any other entity.\n\n"; print "Results of the process are placed in a like named file with the prefix of\n"; print "'ReorgXSD.\n\n"; print "Shortcomings:\n"; print "\tDoes not analyze DTD entries to determine proper settings for\n"; print "\ttype/attribute/ref, specifically, prefixes. It merely assumes that\n"; print "\tany prefix ending in a colon (:) is valid an has been provided for.\n\n"; print "\tCannot distinguish between relevance of entities if they are named\n"; print "\talike.\n\n"; print "\tComments are not preserved as they cannot be meaningfully\n"; print "\tinterpolated. There is no way to tell if a comment goes with a\n"; print "\tspecific entity or group of entities or the whole file!:-(\n\n"; print "\tThe concept of non-reference indicating a root element can be\n"; print "\tnaive in the case of very complicated xsd files with imports and\n"; print "\tother outside references.\n\n"; exit 0; } while(@myArgs) { my $inFileName=$myArgs[0]; my @pathMembers=splitpath($inFileName); my $outFileName=$pathMembers[0].$pathMembers[1]."ReorgXSD.".$pathMembers[2]; my ($initFile,$fileHdr,$fileFtr); { local $/=undef; open(INITFILE,"< $inFileName") or die "Could not open initialization file '".$inFileName."': $!\n"; $initFile=; close(INITFILE); } my $comparator; # normalize the parsing input: # remove comments (cannot meaningfully interpolate these) # remove the )*.*?<(?:xs.*?|.*:)schema\b.+?>)'; $initFile=~m{$comparator}s; $fileHdr=$1; $initFile=$'; $comparator='(^)'; $initFile=~m{$comparator}ms; $fileFtr=$1; $initFile=$`; $comparator='\s*?'; $initFile=~s/$comparator//sg; $comparator='(\r\n|[\r\n]){2,}'; $initFile=~s/$comparator/$1/g; my @AllEntities; my @AllElements; my %UnReferenced; my %ComplexElements; my %SimpleElements; my @xsImports; my %ComplexEntities; my %SimpleEntities; my $fileIterator=$initFile; my $elmDef; my $elmName; my $elmType; my $tagType; ($fileIterator,$elmDef,$elmName,$elmType,$tagType)=getEntry($fileIterator); while($elmDef) { if($tagType eq 'complex') { if($elmType =~ m{(?:xs.*?|.*?):?element}) { %ComplexElements=(%ComplexElements,$elmName=>$elmDef); @AllEntities=(@AllEntities,$elmName); @AllElements=(@AllElements,$elmName); } elsif($elmType =~ m{(?:xs.*?|.*?):?import}) { @xsImports=(@xsImports,$elmDef); } elsif($elmName) { %ComplexEntities=(%ComplexEntities,$elmName=>$elmDef); @AllEntities=(@AllEntities,$elmName); } } else { if($elmType =~ m{(?:xs.*?|.*?):?element}) { %SimpleElements=(%SimpleElements,$elmName=>$elmDef); @AllEntities=(@AllEntities,$elmName); @AllElements=(@AllElements,$elmName); } elsif($elmType =~ m{(?:xs.*?|.*?):?import}) { @xsImports=(@xsImports,$elmDef); } elsif($elmName) { %SimpleEntities=(%SimpleEntities,$elmName=>$elmDef); @AllEntities=(@AllEntities,$elmName); } } ($fileIterator,$elmDef,$elmName,$elmType,$tagType)=getEntry($fileIterator); } my $rootName=''; my @rootDefn; foreach my $entity (sort @AllEntities) { my $refd=0; foreach my $key (sort @AllElements) { next if $key eq $entity; if( ($ComplexEntities{$key}=~m{((?:\b\w+\b)(?1); } } foreach my $key (sort @AllElements) { if($UnReferenced{$key}) { if($ComplexElements{$key}) { @rootDefn=(@rootDefn,$ComplexElements{$key}); delete $ComplexElements{$key}; } elsif($SimpleElements{$key}) { @rootDefn=(@rootDefn,$SimpleElements{$key}); delete $SimpleElements{$key}; } elsif($ComplexEntities{$key}) { @rootDefn=(@rootDefn,$ComplexEntities{$key}); delete $ComplexEntities{$key}; } elsif($SimpleEntities{$key}) { @rootDefn=(@rootDefn,$SimpleEntities{$key}); delete $SimpleEntities{$key}; } } } open(OUTFILE,"> $outFileName") or die "Could not open output file $!\n"; print OUTFILE $fileHdr."\n"; if(@xsImports) { print OUTFILE "\n\n"; foreach my $entity (@xsImports) { print OUTFILE $entity."\n"; } } if(@rootDefn && @rootDefn == 1) { print OUTFILE "\n\n"; print OUTFILE $rootDefn[0]."\n"; } elsif(@rootDefn) { my $numRoots=@rootDefn; print OUTFILE "\n\n"; foreach my $defn (@rootDefn) { print OUTFILE $defn."\n"; } } else { print OUTFILE "\n\n"; } if(%ComplexElements) { print OUTFILE "\n\n"; foreach my $key (sort keys %ComplexElements) { print OUTFILE $ComplexElements{$key}."\n"; } } if(%SimpleElements) { print OUTFILE "\n\n"; foreach my $key (sort keys %SimpleElements) { print OUTFILE $SimpleElements{$key}."\n"; } } if(%ComplexEntities) { print OUTFILE "\n\n"; foreach my $key (sort keys %ComplexEntities) { print OUTFILE $ComplexEntities{$key}."\n"; } } if(%SimpleEntities) { print OUTFILE "\n\n"; foreach my $key (sort keys %SimpleEntities) { print OUTFILE $SimpleEntities{$key}."\n"; } } if($fileIterator and $fileIterator !~ m{^\s*$}) { print OUTFILE "\n\n"; print OUTFILE $fileIterator; } print OUTFILE "\n"; print OUTFILE $fileFtr; print OUTFILE "\n"; close(OUTFILE); print "Results are in file '".$outFileName."'\n"; shift(@myArgs); }