Because of the enormous amount of duplication in XSD files, writing them by hand to can be very tedious. There are tools to reduce the pain, but they are usually quite expensive and still require quite a bit of manual work.
MJ - I have written a small DSL in Tcl to automate the largest part of creating an XSD. I call it TSD for Tcl Schema Definition
The DSL is fairly straightforward For example this tsd file:
text {<?xml version="1.0" encoding="UTF-8"?>} text {<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified">} define root item+ define item { part1! part2? part3 } define part1 xsd:string define part2 xsd:decimal {This is optional} extension part3 xsd:normalizedString {{attr1 {this is attr1}}} { Complex stuff } text "</xsd:schema>"
will be translated to:
<?xml version="1.0" encoding="UTF-8"?> <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified"> <xsd:element name="root" type="rootType"/> <xsd:complexType name="rootType"> <xsd:sequence> <xsd:element minOccurs="1" maxOccurs="unbounded" ref="item"/> </xsd:sequence> </xsd:complexType> <xsd:element name="item" type="itemType"/> <xsd:complexType name="itemType"> <xsd:sequence> <xsd:element minOccurs="1" maxOccurs="1" ref="part1"/> <xsd:element minOccurs="0" maxOccurs="1" ref="part2"/> <xsd:element ref="part3"/> </xsd:sequence> </xsd:complexType> <xsd:element name="part1" type="xsd:string"/> <xsd:element name="part2" type="xsd:decimal"> <xsd:annotation> <xsd:documentation>This is optional</xsd:documentation> </xsd:annotation> </xsd:element> <xsd:element name="part3" type="part3Type"/> <xsd:complexType name="part3Type"> <xsd:annotation> <xsd:documentation> Complex stuff </xsd:documentation> </xsd:annotation> <xsd:simpleContent> <xsd:extension base="xsd:normalizedString"> <xsd:attribute name="attr1" use="optional"> <xsd:annotation> <xsd:documentation>this is attr1</xsd:documentation> </xsd:annotation> </xsd:attribute> </xsd:extension> </xsd:simpleContent> </xsd:complexType> </xsd:schema>
The script:
package require Tcl 8.5 package require tdom proc define {element sequence {documentation {}}} { if { [llength $sequence] == 1 && [string range $sequence 0 3] eq "xsd:" } { emitElement $element $sequence $documentation return } set typename ${element}Type emitElement $element $typename emitTypeDef $typename $sequence $documentation } proc extension {element baseType attributes {documentation {}}} { emit [subst {<xsd:element name="$element" type="${element}Type"/> <xsd:complexType name="${element}Type"> <xsd:annotation> <xsd:documentation>$documentation</xsd:documentation> </xsd:annotation> <xsd:simpleContent> <xsd:extension base="$baseType"> }] foreach attribute $attributes { lassign $attribute name doc emit [ subst { <xsd:attribute name="$name" use="optional"> <xsd:annotation> <xsd:documentation>$doc</xsd:documentation> </xsd:annotation> </xsd:attribute>}] } emit { </xsd:extension> </xsd:simpleContent> </xsd:complexType>} } proc text text { emit $text\n } proc include {filename} { set f [open $filename] fconfigure $f -encoding utf-8 emit \n[read $f]\n close $f } proc emitElement {element type {documentation {}}} { if {$element eq {}} { set cardinality {} switch -- [string index $type end] { + { set type [string range $type 0 end-1 ] set cardinality {minOccurs="1" maxOccurs="unbounded"} } ? { set type [string range $type 0 end-1 ] set cardinality {minOccurs="0" maxOccurs="1"} } ! { set type [string range $type 0 end-1 ] set cardinality {minOccurs="1" maxOccurs="1"} } } emit "<xsd:element $cardinality ref=\"$type\">\n" } { emit "<xsd:element name=\"$element\" type=\"$type\">\n" } if {$documentation ne {}} { emit "<xsd:annotation>\n" emit "<xsd:documentation>$documentation</xsd:documentation>\n" emit "</xsd:annotation>\n" } emit "</xsd:element>\n" } proc emitSequence {sequence} { emit "<xsd:sequence>\n" foreach item $sequence { emitElement {} $item } emit "</xsd:sequence>\n" } proc emitTypeDef {type sequence documentation} { emit "<xsd:complexType name=\"$type\">\n" if {$documentation ne {}} { emit "<xsd:annotation>\n" emit "<xsd:documentation>$documentation</xsd:documentation>\n" emit "</xsd:annotation>\n" } emitSequence $sequence emit "</xsd:complexType>\n" } proc emit text { # This is used to emit the generated xsd text. Redefine this to match your usecase. error "You'll need to override emit" } proc beautify {xml} { set xml "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n[[dom parse $xml] asXML -indent 2]" } # ================================ # redefine emit to do the right thing proc emit {text} { append ::result $text } if {$argc < 1 || $argc > 2} { puts stderr "Usage: tsd2xsd tsd-file ?xsd-file?" exit } set path [pwd] cd [file dirname [lindex $argv 1]] lassign $argv tsd xsd source -encoding utf-8 $tsd if {$xsd ne {} } { set f [open $xsd w] fconfigure $f -encoding utf-8 } else { set f stdout } if {[catch {puts $f [beautify $result]} error]} { puts stderr $errorInfo puts $f $result } close $f cd $path