from __future__ import absolute_import
# Copyright (c) 2010-2017 openpyxl
import os
from zipfile import ZipFile
from lxml.etree import XMLSchema
from lxml.etree import parse
# Provide schema based validators, lxml required
# use schema.validate(Element) or schema.assertValid(Element) for messages
SCHEMA_FOLDER = os.path.join(os.path.dirname(__file__), 'schemas')
sheet_src = os.path.join(SCHEMA_FOLDER, 'sml.xsd')
sheet_schema = XMLSchema(file=sheet_src)
chart_src = os.path.join(SCHEMA_FOLDER, 'dml-chart.xsd')
chart_schema = XMLSchema(file=chart_src)
drawing_src = os.path.join(SCHEMA_FOLDER, 'dml-spreadsheetDrawing.xsd')
drawing_schema = XMLSchema(file=drawing_src)
drawing_main_src = os.path.join(SCHEMA_FOLDER, "dml-main.xsd")
shared_src = os.path.join(SCHEMA_FOLDER, "shared-commonSimpleTypes.xsd")
extended_src = os.path.join(SCHEMA_FOLDER, "shared-documentPropertiesExtended.xsd")
rel_src = os.path.join(SCHEMA_FOLDER, "opc-relationships.xsd")
sml_files = ['xl/styles.xml'] # , 'xl/workbook.xml']
[docs]def validate_archive(file_path):
zipfile = ZipFile(file_path)
try:
for entry in zipfile.infolist():
filename = entry.filename
f = zipfile.open(entry)
root = parse(f).getroot()
if filename in sml_files or filename.startswith('xl/worksheets/sheet'):
if root.get('{http://www.w3.org/XML/1998/namespace}space'):
# not allowed by schema
del root.attrib['{http://www.w3.org/XML/1998/namespace}space']
sheet_schema.assertValid(root)
finally:
zipfile.close()