]> git.ipfire.org Git - thirdparty/python-drafthorse.git/commitdiff
Fix validation errors in for PDF/A-3
authorRaphael Michel <mail@raphaelmichel.de>
Wed, 12 Dec 2018 15:00:30 +0000 (16:00 +0100)
committerRaphael Michel <mail@raphaelmichel.de>
Wed, 12 Dec 2018 15:00:30 +0000 (16:00 +0100)
README.rst
drafthorse/pdf.py

index 7bb80a81bedc9e3a78e2d87f670c1908645e8d84..3c02389e0a069c37a25464ac784cab5690ae0ab6 100644 (file)
@@ -53,6 +53,9 @@ Generating::
     >>> xml
     b'<?xml version="1.0" encoding="UTF-8"?><rsm:CrossIndustryDocument …'
 
+    # Attach XML to an existing PDF.
+    # Note that the existing PDF should be compliant to PDF/A-3!
+    # You can validate this here: https://www.pdf-online.com/osa/validate.aspx
     >>> new_pdf_bytes = attach_xml(original_pdf_bytes, xml, 'BASIC')
 
 
index 9aa4b5d81e36f61238b18717d35f4634b5addcb4..e2b6bbe507f2e092675ae8108adea4bf21436675 100644 (file)
@@ -46,7 +46,7 @@ def attach_xml(original_pdf, xml_data, level='BASIC'):
     # for page in reader.pages:
     #    output.addPage(page)
 
-    output._header = "%PDF-1.6".encode()
+    output._header = "%PDF-1.6\r\n%\xc7\xec\x8f\xa2".encode()
     output.appendPagesFromReader(reader)
 
     original_pdf_id = reader.trailer.get('/ID')
@@ -82,7 +82,7 @@ def _get_original_output_intents(original_pdf):
 
 
 def _prepare_pdf_metadata_txt(pdf_metadata):
-    pdf_date = datetime.datetime.now().isoformat()
+    pdf_date = datetime.datetime.utcnow().strftime('D:%Y%m%d%H%M%SZ')
     info_dict = {
         '/Author': pdf_metadata.get('author', ''),
         '/CreationDate': pdf_date,
@@ -141,9 +141,9 @@ def _prepare_pdf_metadata_xml(level, pdf_metadata):
     desc_xmp.set(ns_rdf + 'about', '')
     creator = etree.SubElement(desc_xmp, ns_xmp + 'CreatorTool')
     creator.text = 'python-drafthorse'
-    timestamp = datetime.datetime.now().isoformat()
-    etree.SubElement(desc_xmp, ns_xmp + 'CreateDate').text = timestamp
-    etree.SubElement(desc_xmp, ns_xmp + 'ModifyDate').text = timestamp
+    xmp_date = datetime.datetime.utcnow().replace(microsecond=0).isoformat() + '+00:00'
+    etree.SubElement(desc_xmp, ns_xmp + 'CreateDate').text = xmp_date
+    etree.SubElement(desc_xmp, ns_xmp + 'ModifyDate').text = xmp_date
 
     # Now is the ZUGFeRD description tag
     zugferd_desc = etree.SubElement(rdf, ns_rdf + 'Description', nsmap=nsmap_zf)
@@ -178,10 +178,11 @@ def _facturx_update_metadata_add_attachment(pdf_filestream, facturx_xml_str, pdf
                                             output_intents):
     md5sum = hashlib.md5(facturx_xml_str).hexdigest()
     md5sum_obj = createStringObject(md5sum)
+    pdf_date = datetime.datetime.utcnow().strftime('D:%Y%m%d%H%M%SZ')
     params_dict = DictionaryObject({
         #NameObject('/CheckSum'): md5sum_obj,
-        NameObject('/ModDate'): createStringObject(datetime.datetime.now().isoformat()),
-        NameObject('/CreationDate'): createStringObject(datetime.datetime.now().isoformat()),
+        NameObject('/ModDate'): createStringObject(pdf_date),
+        NameObject('/CreationDate'): createStringObject(pdf_date),
         NameObject('/Size'): NameObject(str(len(facturx_xml_str))),
     })
     file_entry = DecodedStreamObject()