Source code for openpyxl.develop.classify

from __future__ import absolute_import, print_function
# Copyright (c) 2010-2017 openpyxl

"""
Generate Python classes from XML Schema
Disclaimer: this is really shabby, "works well enough" code.

The spyne library does a much better job of interpreting the schema.
"""

import argparse
import re
import logging

logging.basicConfig(filename="classify.log", level=logging.DEBUG)

from openpyxl.tests.schema import (
    sheet_src,
    chart_src,
    drawing_main_src,
    drawing_src,
    shared_src,
    )

from lxml.etree import parse


XSD = "http://www.w3.org/2001/XMLSchema"

simple_mapping = {
    'xsd:boolean':'Bool',
    'xsd:unsignedInt':'Integer',
    'xsd:unsignedShort':'Integer',
    'xsd:int':'Integer',
    'xsd:double':'Float',
    'xsd:string':'String',
    'xsd:unsignedByte':'Integer',
    'xsd:byte':'Integer',
    'xsd:long':'Float',
    'xsd:token':'String',
    'xsd:dateTime':'DateTime',
    's:ST_Panose':'HexBinary',
    's:ST_Lang':'String',
    'ST_Percentage':'String',
    'ST_PositivePercentage':'Percentage',
    'ST_TextPoint':'TextPoint',
    'ST_UniversalMeasure':'UniversalMeasure',
    'ST_Coordinate32':'Coordinate',
    'ST_Coordinate':'Coordinate',
    'ST_Coordinate32Unqualified':'Coordinate',
    's:ST_Xstring':'String',
    'ST_Angle':'Integer',
}

complex_mapping = {
    'Boolean':'Bool',
    'Double':'Float',
    'Long':'Integer',
}


ST_REGEX = re.compile("(?P<schema>[a-z]:)?(?P<typename>ST_[A-Za-z]+)")


[docs]def get_attribute_group(schema, tagname): node = schema.find("{%s}attributeGroup[@name='%s']" % (XSD, tagname)) attrs = node.findall("{%s}attribute" % XSD) return attrs
[docs]def get_element_group(schema, tagname): node = schema.find("{%s}group[@name='%s']" % (XSD, tagname)) return node.findall(".//{%s}element" % XSD)
[docs]def classify(tagname, src=sheet_src, schema=None): """ Generate a Python-class based on the schema definition """ if schema is None: schema = parse(src) node = schema.find("{%s}complexType[@name='%s']" % (XSD, tagname)) if node is None: pass raise ValueError("Tag {0} not found".format(tagname)) types = set() s = """\n\nclass %s(Serialisable):\n\n""" % tagname[3:] attrs = [] node = derived(node) node = extends(node) # attributes attributes = node.findall("{%s}attribute" % XSD) _group = node.find("{%s}attributeGroup" % XSD) if _group is not None: s += " #Using attribute group{0}\n".format(_group.get('ref')) attributes.extend(get_attribute_group(schema, _group.get('ref'))) for el in attributes: attr = el.attrib if 'ref' in attr: continue attrs.append(attr['name']) if attr.get("use") == "optional": attr["use"] = "allow_none=True" else: attr["use"] = "" if attr.get("type").startswith("ST_"): attr['type'] = simple(attr.get("type"), schema, attr['use']) types.add(attr['type'].split("(")[0]) s += " {name} = {type}\n".format(**attr) else: if attr['type'] in simple_mapping: attr['type'] = simple_mapping[attr['type']] types.add(attr['type']) s += " {name} = {type}({use})\n".format(**attr) else: s += " {name} = Typed(expected_type={type}, {use})\n".format(**attr) children = [] element_names =[] elements = node.findall(".//{%s}element" % XSD) choice = node.findall("{%s}choice" % XSD) if choice: s += """ # some elements are choice\n""" groups = node.findall("{%s}sequence/{%s}group" % (XSD, XSD)) for group in groups: ref = group.get("ref") s += """ # uses element group {0}\n""".format(ref) elements.extend(get_element_group(schema, ref)) for el in elements: attr = {'name': el.get("name"),} typename = el.get("type") if typename is None: logging.log(logging.DEBUG, "Cannot resolve {0}".format(el.tag)) continue match = ST_REGEX.match(typename) if typename.startswith("xsd:"): attr['type'] = simple_mapping[typename] types.add(attr['type']) elif match is not None: src = srcs_mapping.get(match.group('schema')) if src is not None: schema = parse(src) typename = match.group('typename') attr['type'] = simple(typename, schema) else: if (typename.startswith("a:") or typename.startswith("s:") ): attr['type'] = typename[5:] else: attr['type'] = typename[3:] children.append(typename) element_names.append(attr['name']) attr['use'] = "" if el.get("minOccurs") == "0" or el in choice: attr['use'] = "allow_none=True" attrs.append(attr['name']) if attr['type'] in complex_mapping: attr['type'] = complex_mapping[attr['type']] s += " {name} = {type}(nested=True, {use})\n".format(**attr) else: s += " {name} = Typed(expected_type={type}, {use})\n".format(**attr) if element_names: names = (c for c in element_names) s += "\n __elements__ = {0}\n".format(tuple(names)) if attrs: s += "\n def __init__(self,\n" for a in attrs: s += " %s=None,\n" % a s += " ):\n" else: s += " pass" for attr in attrs: s += " self.{0} = {0}\n".format(attr) return s, types, children
[docs]def derived(node): base = node.find("{%s}simpleContent" % XSD) return base or node
[docs]def extends(node): base = node.find("{%s}extension" % XSD) return base or node
[docs]def simple(tagname, schema, use=""): node = schema.find("{%s}simpleType[@name='%s']" % (XSD, tagname)) constraint = node.find("{%s}restriction" % XSD) if constraint is None: return "unknown defintion for {0}".format(tagname) typ = constraint.get("base") typ = "{0}()".format(simple_mapping.get(typ, typ)) values = constraint.findall("{%s}enumeration" % XSD) values = [v.get('value') for v in values] if values: s = "Set" if "none" in values: idx = values.index("none") del values[idx] s = "NoneSet" typ = s + "(values=({0}))".format(values) return typ
srcs_mapping = {'a:':drawing_main_src, 's:':shared_src}
[docs]class ClassMaker: """ Generate """ def __init__(self, tagname, src=sheet_src, classes=set()): self.schema=parse(src) self.types = set() self.classes = classes self.body = "" self.create(tagname)
[docs] def create(self, tagname): body, types, children = classify(tagname, schema=self.schema) self.body = body + self.body self.types = self.types.union(types) for child in children: if (child.startswith("a:") or child.startswith("s:") ): src = srcs_mapping[child[:2]] tagname = child[2:] if tagname not in self.classes: cm = ClassMaker(tagname, src=src, classes=self.classes) self.body = cm.body + self.body # prepend dependent types self.types.union(cm.types) self.classes.add(tagname) self.classes.union(cm.classes) continue if child not in self.classes: self.create(child) self.classes.add(child)
def __str__(self): s = """#Autogenerated schema from openpyxl.descriptors.serialisable import Serialisable from openpyxl.descriptors import (\n Typed,""" for t in self.types: s += "\n {0},".format(t) s += (")\n") s += self.body return s
[docs]def make(element, schema=sheet_src): cm = ClassMaker(element, schema) print(cm)
commands = argparse.ArgumentParser(description="Generate Python classes for a specific scheme element") commands.add_argument('element', help='The XML type to be converted') commands.add_argument('--schema', help='The relevant schema. The default is for worksheets', choices=["sheet_src", "chart_src", "shared_src", "drawing_src", "drawing_main_src"], default="sheet_src", ) if __name__ == "__main__": args = commands.parse_args() schema = globals().get(args.schema) make(args.element, schema)