]> git.ipfire.org Git - thirdparty/python-drafthorse.git/commitdiff
Add xml info to pdf metadata2 (#15)
authorMarko Luther <marko.luther@gmx.net>
Sun, 10 Mar 2024 15:47:25 +0000 (16:47 +0100)
committerGitHub <noreply@github.com>
Sun, 10 Mar 2024 15:47:25 +0000 (16:47 +0100)
* Bump to 2.3.0

* PR #7 Include XML information in PDF metadata (cmcproject)

* update mustang validator

* fixes fx:DocumentFileName / fx:DocumentType order

* remove `schemas/ZUGFeRD2p2_extension_schema.xmp` (replaced by `xmp_schema.py` generator)

* removes date and seller from pdf metadata subject to simplify the code and remove hard coded English language

* removes doc type date and seller from pdf metadata subject (incl. the restriction to documents of type 380) to simplify the code and remove hard coded English language

* remove unused (now) unused constant INVOICE_TYPE_CODE and avoid the use of use bare `except`

* removes failing "Invalid doc type! XML value for TypeCode shall be 380 for an invoice." test

* allows to supply explicit profile level and extends profile auto detection to cover XRECHNUNG

* minor code style improvements like lazy % formatting in logging functions (logging-fstring-interpolation)

* fixes style (black)

* tests of auto detecting a XRechnung v2 and v3 profiles

* blacking again

* tests for en16931 auto profile recognition and auto profile recognition failure

* black again

* typo

* allow users to set custom pdf metadata and the PDF language identifier used by PDF readers for blind people

* black

* spelling

* Update drafthorse/pdf.py

* Run black

---------

Co-authored-by: Raphael Michel <michel@rami.io>
Co-authored-by: Raphael Michel <mail@raphaelmichel.de>
README.rst
drafthorse/__init__.py
drafthorse/models/elements.py
drafthorse/pdf.py
drafthorse/schema/ZUGFeRD2p2_extension_schema.xmp [deleted file]
drafthorse/xmp_schema.py [new file with mode: 0644]
tests/conftest.py [new file with mode: 0644]
tests/samples/empty_pdf16.pdf [new file with mode: 0644]
tests/samples/invoice_pdf17.pdf [new file with mode: 0644]
tests/test_mininal.py

index b540a41883f7a69fd9b834d1b049d7c1aef54486..d248047275c755cdba4eecb56bd742e1e15dc595 100644 (file)
@@ -22,6 +22,8 @@ further abstractions or simplifications. You can set and parse all parameters de
 All output is validated against the official XSDs, but no validation of profile levels
 (basic, comfort, extended) is performed.
 
+The profile level is detected automatically based on the XML data and added to the PDF metadata.
+
 Usage
 -----
 
@@ -112,7 +114,7 @@ Generating::
     # Note that the existing PDF should be compliant to PDF/A-3!
     # You can validate this here: https://www.pdf-online.com/osa/validate.aspx
     with open("input.pdf", "rb") as original_file:
-        new_pdf_bytes = attach_xml(original_file.read(), xml, 'EXTENDED')
+        new_pdf_bytes = attach_xml(original_file.read(), xml)
 
     with open("output.pdf", "wb") as f:
         f.write(new_pdf_bytes)
@@ -135,9 +137,9 @@ To validate files using mustang::
 
     git clone https://github.com/ZUGFeRD/mustangproject.git
     cd mustangproject
-    git checkout core-2.5.1
+    git checkout core-2.9.0
     ./mvnw clean package
-    java -jar Mustang-CLI/target/Mustang-CLI-2.5.1-SNAPSHOT.jar --action validate --source invoice.pdf
+    java -jar Mustang-CLI/target/Mustang-CLI-2.7.4-SNAPSHOT.jar --action validate --source invoice.pdf
 
 
 Credits and License
index 6a86bc70329d83b2a40db1ae2a3474d1fd4a6913..9971c5f8601d499e9228c24ea0a565ecdeb85a0a 100644 (file)
@@ -1 +1 @@
-version = "2.2.2"
+version = "2.3.0"
index 80a68201161114435a79ebbe1d7c16655d9201e5..ff272643d0834081cb47fc7eed515f18bf224699 100644 (file)
@@ -15,6 +15,7 @@ from .fields import Field
 class BaseElementMeta(type):
     @classmethod
     def __prepare__(self, name, bases):
+        del name, bases
         return collections.OrderedDict()
 
     def __new__(mcls, name, bases, attrs):
@@ -50,7 +51,7 @@ class Element(metaclass=BaseElementMeta):
 
     def to_etree(self):
         node = self._etree_node()
-        for k, v in self._data.items():
+        for _, v in self._data.items():
             if v is not None:
                 v.append_to(node)
         return node
@@ -309,7 +310,7 @@ class IDElement(StringElement):
         self._text = root.text
         try:
             self._scheme_id = root.attrib["schemeID"]
-        except:
+        except Exception:
             root.attrib["schemeID"] = ""
             self._scheme_id = root.attrib["schemeID"]
         self._set_on_input = True
@@ -386,7 +387,7 @@ class DirectDateTimeElement(StringElement):
     def from_etree(self, root):
         try:
             self._value = datetime.strptime(root.text, "%Y-%m-%dT%H:%M:%S").date()
-        except:
+        except Exception:
             self._value = ""
         self._set_on_input = True
         return self
index cc7aa4d747c97f45ffc267f92a479fc526920d90..b02f13eaef3193f1b710f5a3e6be49ffb8ca8e1f 100644 (file)
@@ -24,9 +24,8 @@
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-import datetime
 import logging
-import os
+from datetime import datetime, timezone
 from io import BytesIO
 from lxml import etree
 from pypdf import PdfReader, PdfWriter
@@ -38,10 +37,43 @@ from pypdf.generic import (
     create_string_object,
 )
 
+from drafthorse.xmp_schema import XMP_SCHEMA
+
+logging.basicConfig()
 logger = logging.getLogger("drafthorse")
+logger.setLevel(logging.INFO)
 
 
-def attach_xml(original_pdf, xml_data, level="BASIC"):
+def attach_xml(original_pdf, xml_data, level=None, metadata=None, lang=None):
+    """
+    Create the ZUGFeRD invoice by attaching
+    the input XML and proper metadata
+    :param original_pdf: Input PDF
+    :param xml_data: Input XML
+    :param level: optional Factur-X profile level
+    one of ``{'MINIMUM', 'BASIC WL', 'BASIC', 'EN 16931', 'EXTENDED', 'XRECHNUNG'}``.
+    If omitted, autodetection is performed
+    :type level: string
+    :param metadata: optional dict with user defined PDF metadata
+    for fields "author", "keywords", "title" and "subject". If metadata is None (default value),
+    this lib will generate some metadata by extracting relevant info from the Factur-X/Order-X XML.
+    Here is an example for the metadata argument:
+    ```
+    pdf_metadata = {
+        'author': 'MyCompany',
+        'keywords': 'Factur-X, Invoice',
+        'title': 'MyCompany: Invoice I1242',
+        'subject':
+          'Factur-X invoice I1242 dated 2017-08-17 issued by MyCompany',
+    }
+    ```
+    :type metadata: dict
+    :param lang: Language identifier in RFC 3066 format to specify the
+    natural language of the PDF document. Used by PDF readers for blind people.
+    Example: en-US or fr-FR
+    :type lang: string
+    :return: Output PDF containing the metadata and XML
+    """
     if not isinstance(original_pdf, bytes):
         raise TypeError("Please supply original PDF as bytes.")
     if not isinstance(xml_data, bytes):
@@ -58,8 +90,14 @@ def attach_xml(original_pdf, xml_data, level="BASIC"):
         output._ID = original_pdf_id
         # else : generate some ?
 
-    _facturx_update_metadata_add_attachment(
-        output, xml_data, {}, level, output_intents=_get_original_output_intents(reader)
+    # Extract metadata from XML
+    pdf_metadata, profile = _extract_xml_info(xml_data, level, metadata)
+
+    # Extract output intents from input PDF
+    output_intents = _get_original_output_intents(reader)
+
+    _update_metadata_add_attachment(
+        output, xml_data, pdf_metadata, profile, output_intents, lang
     )
 
     outbuffer = BytesIO()
@@ -69,6 +107,11 @@ def attach_xml(original_pdf, xml_data, level="BASIC"):
 
 
 def _get_original_output_intents(original_pdf):
+    """
+    Get output intents from input PDF
+    :param original_pdf: Input PDF
+    :return: Output PDF metadata information
+    """
     output_intents = []
     try:
         pdf_root = original_pdf.trailer["/Root"]
@@ -85,8 +128,13 @@ def _get_original_output_intents(original_pdf):
 
 
 def _prepare_pdf_metadata_txt(pdf_metadata):
-    pdf_date = datetime.datetime.utcnow().strftime("D:%Y%m%d%H%M%SZ")
-    info_dict = {
+    """
+    Create PDF info for the Document Properties section
+    :param pdf_metadata: Metadata
+    :return: PDF info
+    """
+    pdf_date = datetime.now(tz=timezone.utc).strftime("D:%Y%m%d%H%M%SZ")
+    return {
         "/Author": pdf_metadata.get("author", ""),
         "/CreationDate": pdf_date,
         "/Creator": "python-drafthorse",
@@ -95,108 +143,52 @@ def _prepare_pdf_metadata_txt(pdf_metadata):
         "/Subject": pdf_metadata.get("subject", ""),
         "/Title": pdf_metadata.get("title", ""),
     }
-    return info_dict
 
 
-def _prepare_pdf_metadata_xml(level, pdf_metadata):
-    nsmap_x = {"x": "adobe:ns:meta/"}
-    nsmap_rdf = {"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#"}
-    nsmap_dc = {"dc": "http://purl.org/dc/elements/1.1/"}
-    nsmap_pdf = {"pdf": "http://ns.adobe.com/pdf/1.3/"}
-    nsmap_xmp = {"xmp": "http://ns.adobe.com/xap/1.0/"}
-    nsmap_pdfaid = {"pdfaid": "http://www.aiim.org/pdfa/ns/id/"}
-    nsmap_zf = {"zf": "urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0#"}
-    ns_x = "{%s}" % nsmap_x["x"]
-    ns_dc = "{%s}" % nsmap_dc["dc"]
-    ns_rdf = "{%s}" % nsmap_rdf["rdf"]
-    ns_pdf = "{%s}" % nsmap_pdf["pdf"]
-    ns_xmp = "{%s}" % nsmap_xmp["xmp"]
-    ns_pdfaid = "{%s}" % nsmap_pdfaid["pdfaid"]
-    ns_zf = "{%s}" % nsmap_zf["zf"]
-    ns_xml = "{http://www.w3.org/XML/1998/namespace}"
-
-    root = etree.Element(ns_x + "xmpmeta", nsmap=nsmap_x)
-    rdf = etree.SubElement(root, ns_rdf + "RDF", nsmap=nsmap_rdf)
-    desc_pdfaid = etree.SubElement(rdf, ns_rdf + "Description", nsmap=nsmap_pdfaid)
-    desc_pdfaid.set(ns_rdf + "about", "")
-    etree.SubElement(desc_pdfaid, ns_pdfaid + "part").text = "3"
-    etree.SubElement(desc_pdfaid, ns_pdfaid + "conformance").text = "B"
-    desc_dc = etree.SubElement(rdf, ns_rdf + "Description", nsmap=nsmap_dc)
-    desc_dc.set(ns_rdf + "about", "")
-    dc_title = etree.SubElement(desc_dc, ns_dc + "title")
-    dc_title_alt = etree.SubElement(dc_title, ns_rdf + "Alt")
-    dc_title_alt_li = etree.SubElement(dc_title_alt, ns_rdf + "li")
-    dc_title_alt_li.text = pdf_metadata.get("title", "")
-    dc_title_alt_li.set(ns_xml + "lang", "x-default")
-    dc_creator = etree.SubElement(desc_dc, ns_dc + "creator")
-    dc_creator_seq = etree.SubElement(dc_creator, ns_rdf + "Seq")
-    etree.SubElement(dc_creator_seq, ns_rdf + "li").text = pdf_metadata.get(
-        "author", ""
+def _prepare_xmp_metadata(profile, pdf_metadata):
+    """
+    Prepare pdf metadata using the FACTUR-X XMP extension schema
+    :param profile: Invoice profile
+    :param pdf_metadata: PDF metadata
+    :return: metadata XML
+    """
+    xml_str = XMP_SCHEMA.format(
+        title=pdf_metadata.get("title", ""),
+        author=pdf_metadata.get("author", ""),
+        subject=pdf_metadata.get("subject", ""),
+        producer="pypdf",
+        creator_tool="python-drafthorse",
+        timestamp=datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%S+00:00"),
+        urn="urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0#",
+        documenttype="INVOICE",
+        xml_filename="factur-x.xml",
+        version="1.0",
+        xmp_level=profile,
     )
-    dc_desc = etree.SubElement(desc_dc, ns_dc + "description")
-    dc_desc_alt = etree.SubElement(dc_desc, ns_rdf + "Alt")
-    dc_desc_alt_li = etree.SubElement(dc_desc_alt, ns_rdf + "li")
-    dc_desc_alt_li.text = pdf_metadata.get("subject", "")
-    dc_desc_alt_li.set(ns_xml + "lang", "x-default")
-    desc_adobe = etree.SubElement(rdf, ns_rdf + "Description", nsmap=nsmap_pdf)
-    desc_adobe.set(ns_rdf + "about", "")
-    producer = etree.SubElement(desc_adobe, ns_pdf + "Producer")
-    producer.text = "pypdf"
-    desc_xmp = etree.SubElement(rdf, ns_rdf + "Description", nsmap=nsmap_xmp)
-    desc_xmp.set(ns_rdf + "about", "")
-    creator = etree.SubElement(desc_xmp, ns_xmp + "CreatorTool")
-    creator.text = "python-drafthorse"
-    xmp_date = datetime.datetime.utcnow().replace(microsecond=0).isoformat() + "+00:00"
-    etree.SubElement(desc_xmp, ns_xmp + "CreateDate").text = xmp_date
-    etree.SubElement(desc_xmp, ns_xmp + "ModifyDate").text = xmp_date
+    return xml_str.encode("utf-8")
 
-    # Now is the ZUGFeRD description tag
-    zugferd_desc = etree.SubElement(rdf, ns_rdf + "Description", nsmap=nsmap_zf)
-    zugferd_desc.set(ns_rdf + "about", "")
-    fx_doc_type = etree.SubElement(zugferd_desc, ns_zf + "DocumentType", nsmap=nsmap_zf)
-    fx_doc_type.text = "INVOICE"
-    fx_doc_filename = etree.SubElement(
-        zugferd_desc, ns_zf + "DocumentFileName", nsmap=nsmap_zf
-    )
-    fx_doc_filename.text = "factur-x.xml"
-    fx_doc_version = etree.SubElement(zugferd_desc, ns_zf + "Version", nsmap=nsmap_zf)
-    fx_doc_version.text = "1.0"
-    fx_conformance_level = etree.SubElement(
-        zugferd_desc, ns_zf + "ConformanceLevel", nsmap=nsmap_zf
-    )
-    fx_conformance_level.text = level
 
-    xmp_file = os.path.join(
-        os.path.dirname(__file__),
-        "schema",
-        "ZUGFeRD2p2_extension_schema.xmp",
-    )
-    # Reason for defining a parser below:
-    # http://lxml.de/FAQ.html#why-doesn-t-the-pretty-print-option-reformat-my-xml-output
-    parser = etree.XMLParser(remove_blank_text=True)
-    facturx_ext_schema_root = etree.parse(open(xmp_file), parser)
-    # The Factur-X extension schema must be embedded into each PDF document
-    facturx_ext_schema_desc_xpath = facturx_ext_schema_root.xpath(
-        "//rdf:Description", namespaces=nsmap_rdf
-    )
-    rdf.append(facturx_ext_schema_desc_xpath[1])
-
-    # TODO: should be UTF-16be ??
-    xml_str = etree.tostring(
-        root, pretty_print=True, encoding="UTF-8", xml_declaration=False
-    )
-    head = '<?xpacket begin="\ufeff" id="W5M0MpCehiHzreSzNTczkc9d"?>'.encode("utf-8")
-    tail = '<?xpacket end="w"?>'.encode("utf-8")
-    xml_final_str = head + xml_str + tail
-    return xml_final_str
-
-
-def _facturx_update_metadata_add_attachment(
-    pdf_filestream, facturx_xml_str, pdf_metadata, facturx_level, output_intents
+def _update_metadata_add_attachment(
+    pdf_filestream,
+    facturx_xml_str,
+    pdf_metadata,
+    facturx_level,
+    output_intents,
+    lang=None,
 ):
+    """
+    Update PDF metadata and attach XML file
+    :param pdf_filestream: PDF data
+    :param facturx_xml_str: XML data
+    :param pdf_metadata: PDF metadata
+    :param facturx_level: Invoice profile
+    :param output_intents: Output intents from input PDF
+    :param lang: Language identifier in RFC 3066 format
+    """
+    # Disable encoding
     # md5sum = hashlib.md5(facturx_xml_str).hexdigest()
     # md5sum_obj = create_string_object(md5sum)
-    pdf_date = datetime.datetime.utcnow().strftime("D:%Y%m%d%H%M%SZ")
+    pdf_date = datetime.now(tz=timezone.utc).strftime("D:%Y%m%d%H%M%SZ")
     params_dict = DictionaryObject(
         {
             # NameObject('/CheckSum'): md5sum_obj,
@@ -206,7 +198,7 @@ def _facturx_update_metadata_add_attachment(
         }
     )
     file_entry = DecodedStreamObject()
-    file_entry.set_data(facturx_xml_str)  # here we integrate the file itself
+    file_entry.set_data(facturx_xml_str)
     file_entry.update(
         {
             NameObject("/Type"): NameObject("/EmbeddedFile"),
@@ -227,7 +219,7 @@ def _facturx_update_metadata_add_attachment(
                 "/Data" if facturx_level in ("BASIC-WL", "MINIMUM") else "/Alternative"
             ),
             NameObject("/Desc"): create_string_object(
-                "Invoice metadata conforming to ZUGFeRD standard (http://www.ferd-net.de/front_content.php?idcat=231&lang=4)"
+                "Invoice metadata conforming to ZUGFeRD standard (http://www.ferd-net.de/)"
             ),
             NameObject("/Type"): NameObject("/Filespec"),
             NameObject("/F"): fname_obj,
@@ -237,9 +229,7 @@ def _facturx_update_metadata_add_attachment(
     )
     filespec_obj = pdf_filestream._add_object(filespec_dict)
     name_arrayobj_cdict = {fname_obj: filespec_obj}
-    name_arrayobj_content_sort = list(
-        sorted(name_arrayobj_cdict.items(), key=lambda x: x[0])
-    )
+    name_arrayobj_content_sort = sorted(name_arrayobj_cdict.items(), key=lambda x: x[0])
     name_arrayobj_content_final = []
     af_list = []
     for fname_obj, filespec_obj in name_arrayobj_content_sort:
@@ -264,7 +254,7 @@ def _facturx_update_metadata_add_attachment(
         output_intent_obj = pdf_filestream._add_object(output_intent_dict)
         res_output_intents.append(output_intent_obj)
     # Update the root
-    metadata_xml_str = _prepare_pdf_metadata_xml(facturx_level, pdf_metadata)
+    metadata_xml_str = _prepare_xmp_metadata(facturx_level, pdf_metadata)
     metadata_file_entry = DecodedStreamObject()
     metadata_file_entry.set_data(metadata_xml_str)
     metadata_file_entry.update(
@@ -288,5 +278,76 @@ def _facturx_update_metadata_add_attachment(
         pdf_filestream._root_object.update(
             {NameObject("/OutputIntents"): ArrayObject(res_output_intents)}
         )
+    if lang:
+        pdf_filestream._root_object.update(
+            {
+                NameObject("/Lang"): create_string_object(lang.replace("_", "-")),
+            }
+        )
     metadata_txt_dict = _prepare_pdf_metadata_txt(pdf_metadata)
     pdf_filestream.add_metadata(metadata_txt_dict)
+
+
+def _extract_xml_info(xml_data, level=None, metadata=None):
+    """
+    Extract metadata and profile from XML further added to the PDF
+    :param xml_data: XML data
+    :param level: optional Factur-X profile level
+       one of {MINIMUM, BASIC WL, BASIC, EN 16931, EXTENDED, XRECHNUNG}
+       if omitted autodetection is performed
+    :param metadata: optional dict with user defined pdf_metadata
+        for fields "author", "keywords", "title" and "subject"
+    :return: Metadata and profile
+    """
+
+    xml_etree = etree.fromstring(xml_data)
+    namespaces = xml_etree.nsmap
+
+    # get metadata
+    number_xpath = xml_etree.xpath(
+        "//rsm:ExchangedDocument/ram:ID", namespaces=namespaces
+    )
+    number = number_xpath[0].text
+    seller_xpath = xml_etree.xpath(
+        "//ram:ApplicableHeaderTradeAgreement/ram:SellerTradeParty/ram:Name",
+        namespaces=namespaces,
+    )
+    seller = seller_xpath[0].text
+
+    if metadata is None:
+        metadata = {}
+    pdf_metadata = {
+        "author": metadata.get("author", seller),
+        "keywords": metadata.get("keywords", "Factur-X"),
+        "title": metadata.get("title", number),
+        "subject": metadata.get("subject", number),
+    }
+
+    # get profile
+    doc_id_xpath = xml_etree.xpath(
+        "//rsm:ExchangedDocumentContext"
+        "/ram:GuidelineSpecifiedDocumentContextParameter"
+        "/ram:ID",
+        namespaces=namespaces,
+    )
+    doc_id = doc_id_xpath[0].text
+
+    if level is None:
+        # autodetection of Factur-X profile
+        profile = doc_id.split(":")[-1]
+        if doc_id.split(":")[-1] in ["basic", "extended"]:
+            profile = doc_id.split(":")[-1]
+        elif doc_id.split(":")[-1].startswith("xrechnung"):
+            profile = "xrechnung"
+        elif doc_id.split(":")[-2] == "en16931":
+            profile = doc_id.split(":")[-2]
+            profile = profile[:2] + " " + profile[2:]
+        else:
+            raise Exception("Invalid XML profile!")
+    else:
+        profile = level
+
+    profile = profile.upper()
+    logger.info("Factur-X profile detected from XML: %s", profile)
+
+    return pdf_metadata, profile
diff --git a/drafthorse/schema/ZUGFeRD2p2_extension_schema.xmp b/drafthorse/schema/ZUGFeRD2p2_extension_schema.xmp
deleted file mode 100644 (file)
index 8b6d1ae..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-<!--
-(c) PDFlib GmbH 2019
-
-Sample Factur-X XMP with the required PDF/A extension schema description
-for the XMP properties of the Factur-X schema and the actual Factur-X properties.
-
-Schema name: Factur-X PDFA Extension Schema
-Preferred schema namespace prefix: fx
-Schema namespace URI: urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0#
-
-Based on the Factur-X specification published at http://fnfe-mpe.org/factur-x/factur-x_en/
--->
-
-<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
-
-  <!-- The actual Factur-X properties; adjust if required -->
-  <rdf:Description rdf:about="" xmlns:fx="urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0#">
-        <fx:ConformanceLevel>BASIC</fx:ConformanceLevel>
-        <fx:DocumentFileName>factur-x.xml</fx:DocumentFileName>
-        <fx:DocumentType>INVOICE</fx:DocumentType>
-        <fx:Version>1.0</fx:Version>
-  </rdf:Description>
-
-  <!-- PDF/A extension schema description for the Factur-X schema.  -->
-  <rdf:Description rdf:about=""
-               xmlns:pdfaExtension="http://www.aiim.org/pdfa/ns/extension/"
-               xmlns:pdfaSchema="http://www.aiim.org/pdfa/ns/schema#"
-               xmlns:pdfaProperty="http://www.aiim.org/pdfa/ns/property#">
-
-        <pdfaExtension:schemas>
-               <rdf:Bag>
-                  <rdf:li rdf:parseType="Resource">
-                         <pdfaSchema:schema>Factur-X PDF/A Extension Schema</pdfaSchema:schema>
-                         <pdfaSchema:namespaceURI>urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0#</pdfaSchema:namespaceURI>
-                         <pdfaSchema:prefix>fx</pdfaSchema:prefix>
-                         <pdfaSchema:property>
-                                <rdf:Seq>
-                                       <rdf:li rdf:parseType="Resource">
-                                          <pdfaProperty:name>DocumentFileName</pdfaProperty:name>
-                                          <pdfaProperty:valueType>Text</pdfaProperty:valueType>
-                                          <pdfaProperty:category>external</pdfaProperty:category>
-                                          <pdfaProperty:description>name of the embedded XML invoice file</pdfaProperty:description>
-                                       </rdf:li>
-                                       <rdf:li rdf:parseType="Resource">
-                                          <pdfaProperty:name>DocumentType</pdfaProperty:name>
-                                          <pdfaProperty:valueType>Text</pdfaProperty:valueType>
-                                          <pdfaProperty:category>external</pdfaProperty:category>
-                                          <pdfaProperty:description>INVOICE</pdfaProperty:description>
-                                       </rdf:li>
-                                       <rdf:li rdf:parseType="Resource">
-                                          <pdfaProperty:name>Version</pdfaProperty:name>
-                                          <pdfaProperty:valueType>Text</pdfaProperty:valueType>
-                                          <pdfaProperty:category>external</pdfaProperty:category>
-                                          <pdfaProperty:description>The actual version of the Factur-X XML schema</pdfaProperty:description>
-                                       </rdf:li>
-                                       <rdf:li rdf:parseType="Resource">
-                                          <pdfaProperty:name>ConformanceLevel</pdfaProperty:name>
-                                          <pdfaProperty:valueType>Text</pdfaProperty:valueType>
-                                          <pdfaProperty:category>external</pdfaProperty:category>
-                                          <pdfaProperty:description>The conformance level of the embedded Factur-X data</pdfaProperty:description>
-                                       </rdf:li>
-                                </rdf:Seq>
-                         </pdfaSchema:property>
-                  </rdf:li>
-               </rdf:Bag>
-        </pdfaExtension:schemas>
-  </rdf:Description>
-</rdf:RDF>
\ No newline at end of file
diff --git a/drafthorse/xmp_schema.py b/drafthorse/xmp_schema.py
new file mode 100644 (file)
index 0000000..c02b38d
--- /dev/null
@@ -0,0 +1,86 @@
+"""
+FACTUR-X XMP with the required PDF/A extension schema description
+"""
+
+XMP_SCHEMA = """
+<?xpacket begin="\ufeff" id="W5M0MpCehiHzreSzNTczkc9d"?>
+<x:xmpmeta xmlns:x="adobe:ns:meta/">
+  <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+    <rdf:Description xmlns:pdfaid="http://www.aiim.org/pdfa/ns/id/" rdf:about="">
+      <pdfaid:part>3</pdfaid:part>
+      <pdfaid:conformance>B</pdfaid:conformance>
+    </rdf:Description>
+    <rdf:Description xmlns:dc="http://purl.org/dc/elements/1.1/" rdf:about="">
+      <dc:title>
+        <rdf:Alt>
+          <rdf:li xml:lang="x-default">{title}</rdf:li>
+        </rdf:Alt>
+      </dc:title>
+      <dc:creator>
+        <rdf:Seq>
+          <rdf:li>{author}</rdf:li>
+        </rdf:Seq>
+      </dc:creator>
+      <dc:description>
+        <rdf:Alt>
+          <rdf:li xml:lang="x-default">{subject}</rdf:li>
+        </rdf:Alt>
+      </dc:description>
+    </rdf:Description>
+    <rdf:Description xmlns:pdf="http://ns.adobe.com/pdf/1.3/" rdf:about="">
+      <pdf:Producer>{producer}</pdf:Producer>
+    </rdf:Description>
+    <rdf:Description xmlns:xmp="http://ns.adobe.com/xap/1.0/" rdf:about="">
+      <xmp:CreatorTool>{creator_tool}</xmp:CreatorTool>
+      <xmp:CreateDate>{timestamp}</xmp:CreateDate>
+      <xmp:ModifyDate>{timestamp}</xmp:ModifyDate>
+    </rdf:Description>
+    <rdf:Description xmlns:pdfaExtension="http://www.aiim.org/pdfa/ns/extension/" xmlns:pdfaSchema="http://www.aiim.org/pdfa/ns/schema#" xmlns:pdfaProperty="http://www.aiim.org/pdfa/ns/property#" rdf:about="">
+      <pdfaExtension:schemas>
+        <rdf:Bag>
+          <rdf:li rdf:parseType="Resource">
+            <pdfaSchema:schema>Factur-X PDFA Extension Schema</pdfaSchema:schema>
+            <pdfaSchema:namespaceURI>{urn}</pdfaSchema:namespaceURI>
+            <pdfaSchema:prefix>fx</pdfaSchema:prefix>
+            <pdfaSchema:property>
+              <rdf:Seq>
+                <rdf:li rdf:parseType="Resource">
+                  <pdfaProperty:name>DocumentFileName</pdfaProperty:name>
+                  <pdfaProperty:valueType>Text</pdfaProperty:valueType>
+                  <pdfaProperty:category>external</pdfaProperty:category>
+                  <pdfaProperty:description>The name of the embedded XML document</pdfaProperty:description>
+                </rdf:li>
+                <rdf:li rdf:parseType="Resource">
+                  <pdfaProperty:name>DocumentType</pdfaProperty:name>
+                  <pdfaProperty:valueType>Text</pdfaProperty:valueType>
+                  <pdfaProperty:category>external</pdfaProperty:category>
+                  <pdfaProperty:description>The type of the hybrid document in capital letters, e.g. INVOICE or ORDER</pdfaProperty:description>
+                </rdf:li>
+                <rdf:li rdf:parseType="Resource">
+                  <pdfaProperty:name>Version</pdfaProperty:name>
+                  <pdfaProperty:valueType>Text</pdfaProperty:valueType>
+                  <pdfaProperty:category>external</pdfaProperty:category>
+                  <pdfaProperty:description>The actual version of the standard applying to the embedded XML document</pdfaProperty:description>
+                </rdf:li>
+                <rdf:li rdf:parseType="Resource">
+                  <pdfaProperty:name>ConformanceLevel</pdfaProperty:name>
+                  <pdfaProperty:valueType>Text</pdfaProperty:valueType>
+                  <pdfaProperty:category>external</pdfaProperty:category>
+                  <pdfaProperty:description>The conformance level of the embedded XML document</pdfaProperty:description>
+                </rdf:li>
+              </rdf:Seq>
+            </pdfaSchema:property>
+          </rdf:li>
+        </rdf:Bag>
+      </pdfaExtension:schemas>
+    </rdf:Description>
+    <rdf:Description xmlns:fx="{urn}" rdf:about="">
+      <fx:DocumentFileName>{xml_filename}</fx:DocumentFileName>
+      <fx:DocumentType>{documenttype}</fx:DocumentType>
+      <fx:Version>{version}</fx:Version>
+      <fx:ConformanceLevel>{xmp_level}</fx:ConformanceLevel>
+    </rdf:Description>
+  </rdf:RDF>
+</x:xmpmeta>
+<?xpacket end="w"?>
+"""
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644 (file)
index 0000000..04abdb4
--- /dev/null
@@ -0,0 +1,94 @@
+import os
+import pytest
+from datetime import date, datetime, timezone
+from decimal import Decimal
+
+from drafthorse.models.accounting import ApplicableTradeTax
+from drafthorse.models.document import Document
+from drafthorse.models.note import IncludedNote
+from drafthorse.models.tradelines import LineItem
+
+
+@pytest.fixture
+def invoice_document(request):
+    doc = Document()
+    doc.context.guideline_parameter.id = (
+        "urn:cen.eu:en16931:2017#conformant#urn:factur-x.eu:1p0:extended"
+    )
+    doc.header.id = "RE1337"
+    doc.header.type_code = request.param
+    doc.header.name = "RECHNUNG"
+    doc.header.issue_date_time = date.today()
+    doc.header.languages.add("de")
+
+    note = IncludedNote()
+    note.content.add("Test Node 1")
+    doc.header.notes.add(note)
+
+    doc.trade.agreement.seller.name = "Lieferant GmbH"
+    doc.trade.settlement.payee.name = "Kunde GmbH"
+
+    doc.trade.agreement.buyer.name = "Kunde GmbH"
+    doc.trade.settlement.invoicee.name = "Kunde GmbH"
+
+    doc.trade.settlement.currency_code = "EUR"
+    doc.trade.settlement.payment_means.type_code = "ZZZ"
+
+    doc.trade.agreement.seller.address.country_id = "DE"
+    doc.trade.agreement.seller.address.country_subdivision = "Bayern"
+
+    doc.trade.agreement.seller_order.issue_date_time = datetime.now(timezone.utc)
+    doc.trade.agreement.buyer_order.issue_date_time = datetime.now(timezone.utc)
+    doc.trade.settlement.advance_payment.received_date = datetime.now(timezone.utc)
+    doc.trade.agreement.customer_order.issue_date_time = datetime.now(timezone.utc)
+
+    li = LineItem()
+    li.document.line_id = "1"
+    li.product.name = "Rainbow"
+    li.agreement.gross.amount = Decimal("999.00")
+    li.agreement.gross.basis_quantity = (Decimal("1.0000"), "C62")  # C62 == pieces
+    li.agreement.net.amount = Decimal("999.00")
+    li.agreement.net.basis_quantity = (Decimal("999.00"), "EUR")
+    li.delivery.billed_quantity = (Decimal("1.0000"), "C62")  # C62 == pieces
+    li.settlement.trade_tax.type_code = "VAT"
+    li.settlement.trade_tax.category_code = "E"
+    li.settlement.trade_tax.rate_applicable_percent = Decimal("0.00")
+    li.settlement.monetary_summation.total_amount = Decimal("999.00")
+    doc.trade.items.add(li)
+
+    trade_tax = ApplicableTradeTax()
+    trade_tax.calculated_amount = Decimal("0.00")
+    trade_tax.basis_amount = Decimal("999.00")
+    trade_tax.type_code = "VAT"
+    trade_tax.category_code = "E"
+    trade_tax.rate_applicable_percent = Decimal("0.00")
+    doc.trade.settlement.trade_tax.add(trade_tax)
+
+    doc.trade.settlement.monetary_summation.line_total = Decimal("999.00")
+    doc.trade.settlement.monetary_summation.charge_total = Decimal("0.00")
+    doc.trade.settlement.monetary_summation.allowance_total = Decimal("0.00")
+    doc.trade.settlement.monetary_summation.tax_basis_total = Decimal("999.00")
+    doc.trade.settlement.monetary_summation.tax_total = Decimal("0.00")
+    doc.trade.settlement.monetary_summation.grand_total = Decimal("999.00")
+    doc.trade.settlement.monetary_summation.due_amount = Decimal("999.00")
+
+    return doc
+
+
+@pytest.fixture
+def empty_pdf16_bytes():
+    pdf_file = open(
+        os.path.join(os.path.dirname(__file__), "samples", "empty_pdf16.pdf"), "rb"
+    ).read()
+
+    return pdf_file
+
+
+@pytest.fixture
+def invoice_pdf17_bytes():
+    pdf_file = open(
+        os.path.join(os.path.dirname(__file__), "samples", "invoice_pdf17.pdf"),
+        "rb",
+    ).read()
+
+    return pdf_file
diff --git a/tests/samples/empty_pdf16.pdf b/tests/samples/empty_pdf16.pdf
new file mode 100644 (file)
index 0000000..eae1b7a
Binary files /dev/null and b/tests/samples/empty_pdf16.pdf differ
diff --git a/tests/samples/invoice_pdf17.pdf b/tests/samples/invoice_pdf17.pdf
new file mode 100644 (file)
index 0000000..2b18f66
Binary files /dev/null and b/tests/samples/invoice_pdf17.pdf differ
index aeaa34b1e2667f94b1c0df3909eec8d3aaf86906..d21a3b4ad7de58549c9ae77f483cc7e34665a775 100644 (file)
-import os
-from datetime import date, datetime, timezone
-from decimal import Decimal
-
-from drafthorse.models.accounting import ApplicableTradeTax
-from drafthorse.models.document import Document
-from drafthorse.models.note import IncludedNote
-from drafthorse.models.tradelines import LineItem
+import pytest
+
 from drafthorse.pdf import attach_xml
 
 
-def test_readme_construction_example():
-    doc = Document()
+@pytest.mark.parametrize("invoice_document", ["380"], indirect=True)
+def test_readme_construction_example_pdf16(invoice_document, empty_pdf16_bytes):
+    """
+    Test using a PDF 1.6 version
+    """
+    xml = invoice_document.serialize(schema="FACTUR-X_EXTENDED")
+    output_pdf = attach_xml(empty_pdf16_bytes, xml)
+
+    assert output_pdf
+
+
+@pytest.mark.parametrize("invoice_document", ["380"], indirect=True)
+def test_readme_construction_example_pdf17(invoice_document, invoice_pdf17_bytes):
+    """
+    Test using a PDF 1.7 version in order to cover the output intents handling
+    """
+    xml = invoice_document.serialize(schema="FACTUR-X_EXTENDED")
+    output_pdf = attach_xml(invoice_pdf17_bytes, xml)
+
+    assert output_pdf
+
+
+@pytest.mark.parametrize("invoice_document", ["380"], indirect=True)
+def test_readme_construction_example_pdf17_en16931(
+    invoice_document, invoice_pdf17_bytes
+):
+    """
+    Test using a PDF 1.7 version in order to cover the output intents handling
+    """
+    doc = invoice_document
+    doc.context.guideline_parameter.id = "urn:cen.eu:en16931:2017"
+    xml = doc.serialize(schema="FACTUR-X_EXTENDED")
+    output_pdf = attach_xml(invoice_pdf17_bytes, xml)
+
+    assert output_pdf
+
+
+@pytest.mark.parametrize("invoice_document", ["380"], indirect=True)
+def test_readme_construction_example_pdf17_xrechnung2(
+    invoice_document, invoice_pdf17_bytes
+):
+    """
+    Test using a PDF 1.7 version in order to cover the output intents handling
+    """
+    doc = invoice_document
     doc.context.guideline_parameter.id = (
-        "urn:cen.eu:en16931:2017#conformant#urn:factur-x.eu:1p0:extended"
+        "urn:cen.eu:en16931:2017#compliant#urn:xoev-de:kosit:standard:xrechnung_2.3"
     )
-    doc.header.id = "RE1337"
-    doc.header.type_code = "380"
-    doc.header.name = "RECHNUNG"
-    doc.header.issue_date_time = date.today()
-    doc.header.languages.add("de")
-
-    note = IncludedNote()
-    note.content.add("Test Node 1")
-    doc.header.notes.add(note)
-
-    doc.trade.agreement.seller.name = "Lieferant GmbH"
-    doc.trade.settlement.payee.name = "Kunde GmbH"
-
-    doc.trade.agreement.buyer.name = "Kunde GmbH"
-    doc.trade.settlement.invoicee.name = "Kunde GmbH"
-
-    doc.trade.settlement.currency_code = "EUR"
-    doc.trade.settlement.payment_means.type_code = "ZZZ"
-
-    doc.trade.agreement.seller.address.country_id = "DE"
-    doc.trade.agreement.seller.address.country_subdivision = "Bayern"
-
-    doc.trade.agreement.seller_order.issue_date_time = datetime.now(timezone.utc)
-    doc.trade.agreement.buyer_order.issue_date_time = datetime.now(timezone.utc)
-    doc.trade.settlement.advance_payment.received_date = datetime.now(timezone.utc)
-    doc.trade.agreement.customer_order.issue_date_time = datetime.now(timezone.utc)
-
-    li = LineItem()
-    li.document.line_id = "1"
-    li.product.name = "Rainbow"
-    li.agreement.gross.amount = Decimal("999.00")
-    li.agreement.gross.basis_quantity = (Decimal("1.0000"), "C62")  # C62 == pieces
-    li.agreement.net.amount = Decimal("999.00")
-    li.agreement.net.basis_quantity = (Decimal("999.00"), "EUR")
-    li.delivery.billed_quantity = (Decimal("1.0000"), "C62")  # C62 == pieces
-    li.settlement.trade_tax.type_code = "VAT"
-    li.settlement.trade_tax.category_code = "E"
-    li.settlement.trade_tax.rate_applicable_percent = Decimal("0.00")
-    li.settlement.monetary_summation.total_amount = Decimal("999.00")
-    doc.trade.items.add(li)
-
-    trade_tax = ApplicableTradeTax()
-    trade_tax.calculated_amount = Decimal("0.00")
-    trade_tax.basis_amount = Decimal("999.00")
-    trade_tax.type_code = "VAT"
-    trade_tax.category_code = "AE"
-    trade_tax.exemption_reason_code = "VATEX-EU-AE"
-    trade_tax.rate_applicable_percent = Decimal("0.00")
-    doc.trade.settlement.trade_tax.add(trade_tax)
-
-    doc.trade.settlement.monetary_summation.line_total = Decimal("999.00")
-    doc.trade.settlement.monetary_summation.charge_total = Decimal("0.00")
-    doc.trade.settlement.monetary_summation.allowance_total = Decimal("0.00")
-    doc.trade.settlement.monetary_summation.tax_basis_total = Decimal("999.00")
-    doc.trade.settlement.monetary_summation.tax_total = Decimal("0.00")
-    doc.trade.settlement.monetary_summation.grand_total = Decimal("999.00")
-    doc.trade.settlement.monetary_summation.due_amount = Decimal("999.00")
+    xml = doc.serialize(schema="FACTUR-X_EXTENDED")
+    output_pdf = attach_xml(invoice_pdf17_bytes, xml)
 
+    assert output_pdf
+
+
+@pytest.mark.parametrize("invoice_document", ["380"], indirect=True)
+def test_readme_construction_example_pdf17_xrechnung3(
+    invoice_document, invoice_pdf17_bytes
+):
+    """
+    Test using a PDF 1.7 version in order to cover the output intents handling
+    """
+    doc = invoice_document
+    doc.context.guideline_parameter.id = (
+        "urn:cen.eu:en16931:2017#compliant#urn:xeinkauf.de:kosit:xrechnung_3.0"
+    )
+    xml = doc.serialize(schema="FACTUR-X_EXTENDED")
+    output_pdf = attach_xml(invoice_pdf17_bytes, xml)
+
+    assert output_pdf
+
+
+@pytest.mark.parametrize("invoice_document", ["380"], indirect=True)
+def test_invalid_invoice_XML_profile_exceptions(invoice_document, invoice_pdf17_bytes):
+    """
+    Test using a PDF 1.7 version in order to cover the output intents handling
+    """
+    doc = invoice_document
+    doc.context.guideline_parameter.id = (
+        "urn:cen.eu:en16932:2017#conformant#urn:factur-x.eu:1p0:wrong"
+    )
     xml = doc.serialize(schema="FACTUR-X_EXTENDED")
-    with open(
-        os.path.join(os.path.dirname(__file__), "samples", "Empty.pdf"), "rb"
-    ) as original_file:
-        assert attach_xml(original_file.read(), xml, "EXTENDED")
+    # invalid pdf type
+    with pytest.raises(Exception) as exc_info:
+        attach_xml(invoice_pdf17_bytes, xml)
+
+    assert str(exc_info.value) == "Invalid XML profile!"
+
+
+@pytest.mark.parametrize("invoice_document", ["220"], indirect=True)
+def test_invalid_invoice_exceptions(invoice_document, invoice_pdf17_bytes):
+    """
+    Test invalid cases
+    """
+    xml = invoice_document.serialize(schema="FACTUR-X_EXTENDED")
+
+    # invalid pdf type
+    with pytest.raises(Exception) as exc_info:
+        attach_xml("invalid_pdf_type", xml)
+
+    assert str(exc_info.value) == "Please supply original PDF as bytes."
+
+    # invalid xml type
+    with pytest.raises(Exception) as exc_info:
+        attach_xml(invoice_pdf17_bytes, "invalid_xml_type")
+
+    assert str(exc_info.value) == "Please supply XML data as bytes."