From: Raffael Meyer <14891507+barredterra@users.noreply.github.com> Date: Wed, 22 Jan 2025 16:12:14 +0000 (+0100) Subject: feat: strict mode for parsing (#55) X-Git-Tag: 2025.1.0~1 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=94eab55532baa0ae141037d85d3e4a5f266b95fa;p=thirdparty%2Fpython-drafthorse.git feat: strict mode for parsing (#55) * feat: strict mode for parsing * docs: lax and strict parsing and serialization --- diff --git a/README.rst b/README.rst index 63d297d..07cac92 100644 --- a/README.rst +++ b/README.rst @@ -45,6 +45,8 @@ Parsing:: doc = Document.parse(samplexml) print(doc.trade.agreement.seller.name) +``Document.parse()`` taskes a boolean parameter ``strict`` which defaults to ``True``. This means that the parser will raise an error if it encounters any unknown element. If you set it to ``False``, the parser will not raise an error and parse whatever it can. + Generating:: import os @@ -136,6 +138,7 @@ Generating:: with open("output.pdf", "wb") as f: f.write(new_pdf_bytes) +``Document.serialize()`` will validate the generated XML against the specified schema and raise an error if it is not valid. If you want to avoid validation, you can set the ``schema`` parameter to ``None``. Development ----------- diff --git a/drafthorse/models/container.py b/drafthorse/models/container.py index eb1755e..f5ec87e 100644 --- a/drafthorse/models/container.py +++ b/drafthorse/models/container.py @@ -19,9 +19,9 @@ class Container: def empty_element(self): return self.child_type() - def add_from_etree(self, root): + def add_from_etree(self, root, strict=True): childel = self.empty_element() - childel.from_etree(root) + childel.from_etree(root, strict) self.add(childel) @@ -46,7 +46,7 @@ class SimpleContainer(Container): self.set_element(el, child) el.append_to(node) - def add_from_etree(self, root): + def add_from_etree(self, root, strict=True): self.add(root.text) @@ -60,7 +60,7 @@ class CurrencyContainer(SimpleContainer): el._amount = child[0] el._currency = child[1] - def add_from_etree(self, root): + def add_from_etree(self, root, strict=True): self.add((root.text, root.attrib.get("currencyID"))) @@ -74,7 +74,7 @@ class IDContainer(SimpleContainer): el._text = child[1] el._scheme_id = child[0] - def add_from_etree(self, root): + def add_from_etree(self, root, strict=True): self.add((root.attrib["schemeID"], root.text)) @@ -87,5 +87,5 @@ class StringContainer(SimpleContainer): def set_element(self, el, child): el._text = child - def add_from_etree(self, root): + def add_from_etree(self, root, strict=True): self.add(root.text) diff --git a/drafthorse/models/elements.py b/drafthorse/models/elements.py index ff27264..2d763b7 100644 --- a/drafthorse/models/elements.py +++ b/drafthorse/models/elements.py @@ -88,7 +88,7 @@ class Element(metaclass=BaseElementMeta): ) return super().__setattr__(key, value) - def from_etree(self, root): + def from_etree(self, root, strict=True): if ( hasattr(self, "Meta") and hasattr(self.Meta, "namespace") @@ -109,19 +109,19 @@ class Element(metaclass=BaseElementMeta): if child.tag in field_index: name, _childel = field_index[child.tag] if isinstance(getattr(self, name), Container): - getattr(self, name).add_from_etree(child) + getattr(self, name).add_from_etree(child, strict) else: - getattr(self, name).from_etree(child) - else: + getattr(self, name).from_etree(child, strict) + elif strict: raise TypeError("Unknown element {}".format(child.tag)) return self @classmethod - def parse(cls, xmlinput): + def parse(cls, xmlinput, strict=True): from lxml import etree root = etree.fromstring(xmlinput) - return cls().from_etree(root) + return cls().from_etree(root, strict) class StringElement(Element): @@ -149,7 +149,7 @@ class StringElement(Element): node.text = self._text return node - def from_etree(self, root): + def from_etree(self, root, strict=True): self._text = root.text self._set_on_input = True return self @@ -168,7 +168,7 @@ class DecimalElement(StringElement): def __str__(self): return self._value - def from_etree(self, root): + def from_etree(self, root, strict=True): self._value = Decimal(root.text) self._set_on_input = True return self @@ -189,7 +189,7 @@ class QuantityElement(StringElement): def __str__(self): return "{} {}".format(self._amount, self._unit_code) - def from_etree(self, root): + def from_etree(self, root, strict=True): self._amount = Decimal(root.text) self._unit_code = root.attrib["unitCode"] self._set_on_input = True @@ -211,7 +211,7 @@ class CurrencyElement(StringElement): del node.attrib["currencyID"] return node - def from_etree(self, root): + def from_etree(self, root, strict=True): self._amount = Decimal(root.text) self._currency = root.attrib.get("currencyID") or None self._set_on_input = True @@ -235,7 +235,7 @@ class ClassificationElement(StringElement): node.attrib["listVersionID"] = self._list_version_id return node - def from_etree(self, root): + def from_etree(self, root, strict=True): self._text = Decimal(root.text) self._list_id = root.attrib["listID"] self._list_version_id = root.attrib["listVersionID"] @@ -260,7 +260,7 @@ class BinaryObjectElement(StringElement): node.text = self._text return node - def from_etree(self, root): + def from_etree(self, root, strict=True): self._mime_code = root.attrib["mimeCode"] self._filename = root.attrib["filename"] self._text = root.text @@ -283,7 +283,7 @@ class AgencyIDElement(StringElement): node.attrib["schemeAgencyID"] = self._scheme_id return node - def from_etree(self, root): + def from_etree(self, root, strict=True): self._text = root.text self._scheme_id = root.attrib["schemeAgencyID"] self._set_on_input = True @@ -306,7 +306,7 @@ class IDElement(StringElement): node.attrib["schemeID"] = self._scheme_id return node - def from_etree(self, root): + def from_etree(self, root, strict=True): self._text = root.text try: self._scheme_id = root.attrib["schemeID"] @@ -346,11 +346,14 @@ class DateTimeElement(StringElement): t.append(node) return t - def from_etree(self, root): + def from_etree(self, root, strict=True): if len(root) != 1: raise TypeError("Date containers should have one child") if root[0].tag != "{%s}%s" % (self._date_time_namespace, "DateTimeString"): - raise TypeError("Tag %s not recognized" % root[0].tag) + if strict: + raise TypeError("Tag %s not recognized" % root[0].tag) + else: + return self self._format = root[0].attrib["format"] if self._format == "102": self._value = datetime.strptime(root[0].text, "%Y%m%d").date() @@ -362,7 +365,7 @@ class DateTimeElement(StringElement): self._value = w.monday() else: self._value = datetime.strptime(root[0].text + "1", "%G%V%u").date() - else: + elif strict: raise TypeError( "Date format %s cannot be parsed" % root[0].attrib["format"] ) @@ -384,7 +387,7 @@ class DirectDateTimeElement(StringElement): t.text = self._value.strftime("%Y-%m-%dT%H:%M:%S") return t - def from_etree(self, root): + def from_etree(self, root, strict=True): try: self._value = datetime.strptime(root.text, "%Y-%m-%dT%H:%M:%S").date() except Exception: @@ -416,7 +419,7 @@ class IndicatorElement(StringElement): def __str__(self): return "{}".format(self._value) - def from_etree(self, root): + def from_etree(self, root, strict=True): if len(root) != 1: raise TypeError("Indicator containers should have one child") if root[0].tag != "{%s}%s" % (NS_UDT, "Indicator"):