]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Enhancement: add layout options for email conversion (#8907)
authorSilvia Bigler <silvia@silvia-bigler.dev>
Fri, 7 Feb 2025 18:32:35 +0000 (19:32 +0100)
committerGitHub <noreply@github.com>
Fri, 7 Feb 2025 18:32:35 +0000 (18:32 +0000)
---------

Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
16 files changed:
docs/configuration.md
src-ui/messages.xlf
src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html
src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts
src-ui/src/app/data/mail-rule.ts
src/documents/consumer.py
src/documents/tests/test_consumer.py
src/documents/tests/test_migration_workflows.py
src/documents/tests/utils.py
src/locale/en_US/LC_MESSAGES/django.po
src/paperless/settings.py
src/paperless_mail/migrations/0029_mailrule_pdf_layout.py [new file with mode: 0644]
src/paperless_mail/models.py
src/paperless_mail/parsers.py
src/paperless_mail/serialisers.py
src/paperless_mail/tests/test_parsers.py

index 359a51482b8b03e22e16b20e1fbae8e4ed8cd359..b81c10b0d9930754372632b7defbc0d037bf7020 100644 (file)
@@ -198,6 +198,18 @@ Docker, this may be the `environment` key of the webserver or a
 containing the configuration parameters. Be sure to use the correct format
 and watch out for indentation if editing the YAML file.
 
+### Email Parsing
+
+#### [`PAPERLESS_EMAIL_PARSE_DEFAULT_LAYOUT=<int>`(#PAPERLESS_EMAIL_PARSE_DEFAULT_LAYOUT) {#PAPERLESS_EMAIL_PARSE_DEFAULT_LAYOUT}
+
+: The default layout to use for emails that are consumed as documents. Must be one of the integer choices below. Note that mail
+rules can specify this setting, thus this fallback is used for the default selection and for .eml files consumed by other means.
+
+    - `1` = Text, then HTML
+    - `2` = HTML, then text
+    - `3` = HTML only
+    - `4` = Text only
+
 ## Paths and folders
 
 #### [`PAPERLESS_CONSUMPTION_DIR=<path>`](#PAPERLESS_CONSUMPTION_DIR) {#PAPERLESS_CONSUMPTION_DIR}
index 1072e89c6e713424fef64598783871787e36325e..34959e4a0fa30244fa8a80fd1d9681e1121d8a6a 100644 (file)
         </context-group>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
-          <context context-type="linenumber">75</context>
+          <context context-type="linenumber">76</context>
         </context-group>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.html</context>
         </context-group>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
-          <context context-type="linenumber">74</context>
+          <context context-type="linenumber">75</context>
         </context-group>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.html</context>
         </context-group>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
-          <context context-type="linenumber">64</context>
+          <context context-type="linenumber">88</context>
         </context-group>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
           <context context-type="linenumber">43</context>
         </context-group>
       </trans-unit>
+      <trans-unit id="3842519365862452117" datatype="html">
+        <source>PDF layout</source>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
+          <context context-type="linenumber">44</context>
+        </context-group>
+      </trans-unit>
       <trans-unit id="2873939123535615966" datatype="html">
         <source>Include only files matching</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
-          <context context-type="linenumber">46</context>
+          <context context-type="linenumber">47</context>
         </context-group>
       </trans-unit>
       <trans-unit id="7233407036155150477" datatype="html">
         <source>Optional. Wildcards e.g. *.pdf or *invoice* allowed. Can be comma-separated list. Case insensitive.</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
-          <context context-type="linenumber">46</context>
+          <context context-type="linenumber">47</context>
         </context-group>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
-          <context context-type="linenumber">47</context>
+          <context context-type="linenumber">48</context>
         </context-group>
       </trans-unit>
       <trans-unit id="1546332577833742677" datatype="html">
         <source>Exclude files matching</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
-          <context context-type="linenumber">47</context>
+          <context context-type="linenumber">48</context>
         </context-group>
       </trans-unit>
       <trans-unit id="9216117865911519658" datatype="html">
         <source>Action</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
-          <context context-type="linenumber">53</context>
+          <context context-type="linenumber">54</context>
         </context-group>
       </trans-unit>
       <trans-unit id="7841986067387421166" datatype="html">
         <source>Only performed if the mail is processed.</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
-          <context context-type="linenumber">53</context>
+          <context context-type="linenumber">54</context>
         </context-group>
       </trans-unit>
       <trans-unit id="1261794314435932203" datatype="html">
         <source>Action parameter</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
-          <context context-type="linenumber">55</context>
+          <context context-type="linenumber">56</context>
         </context-group>
       </trans-unit>
       <trans-unit id="6093797930511670257" datatype="html">
         <source>Assign title from</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
-          <context context-type="linenumber">57</context>
+          <context context-type="linenumber">58</context>
         </context-group>
       </trans-unit>
       <trans-unit id="5232720756589450549" datatype="html">
         <source>Assign owner from rule</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
-          <context context-type="linenumber">58</context>
+          <context context-type="linenumber">59</context>
         </context-group>
       </trans-unit>
       <trans-unit id="6695990587380209737" datatype="html">
         <source>Assign document type</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
-          <context context-type="linenumber">62</context>
+          <context context-type="linenumber">63</context>
         </context-group>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
         <source>Assign correspondent from</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
-          <context context-type="linenumber">63</context>
+          <context context-type="linenumber">64</context>
         </context-group>
       </trans-unit>
       <trans-unit id="4875491778188965469" datatype="html">
         <source>Assign correspondent</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
-          <context context-type="linenumber">65</context>
+          <context context-type="linenumber">66</context>
         </context-group>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
         <source>Error</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
-          <context context-type="linenumber">72</context>
+          <context context-type="linenumber">73</context>
         </context-group>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
         <source>Only process attachments</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
-          <context context-type="linenumber">38</context>
+          <context context-type="linenumber">39</context>
         </context-group>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
-          <context context-type="linenumber">49</context>
+          <context context-type="linenumber">50</context>
         </context-group>
       </trans-unit>
       <trans-unit id="936923743212522897" datatype="html">
         <source>Process all files, including &apos;inline&apos; attachments</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
-          <context context-type="linenumber">42</context>
+          <context context-type="linenumber">43</context>
         </context-group>
       </trans-unit>
       <trans-unit id="9025522236384167767" datatype="html">
         <source>Process message as .eml</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
-          <context context-type="linenumber">53</context>
+          <context context-type="linenumber">54</context>
         </context-group>
       </trans-unit>
       <trans-unit id="7411485377918318115" datatype="html">
         <source>Process message as .eml and attachments separately</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
-          <context context-type="linenumber">57</context>
+          <context context-type="linenumber">58</context>
+        </context-group>
+      </trans-unit>
+      <trans-unit id="8776300244268604360" datatype="html">
+        <source>System default</source>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
+          <context context-type="linenumber">65</context>
+        </context-group>
+      </trans-unit>
+      <trans-unit id="4812910224365219000" datatype="html">
+        <source>Text, then HTML</source>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
+          <context context-type="linenumber">69</context>
+        </context-group>
+      </trans-unit>
+      <trans-unit id="3181744476823286470" datatype="html">
+        <source>HTML, then text</source>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
+          <context context-type="linenumber">73</context>
+        </context-group>
+      </trans-unit>
+      <trans-unit id="9048933760263399623" datatype="html">
+        <source>HTML only</source>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
+          <context context-type="linenumber">77</context>
+        </context-group>
+      </trans-unit>
+      <trans-unit id="3835211125655594627" datatype="html">
+        <source>Text only</source>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
+          <context context-type="linenumber">81</context>
         </context-group>
       </trans-unit>
       <trans-unit id="2784260611081866636" datatype="html">
         <source>Move to specified folder</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
-          <context context-type="linenumber">68</context>
+          <context context-type="linenumber">92</context>
         </context-group>
       </trans-unit>
       <trans-unit id="4593278936733161020" datatype="html">
         <source>Mark as read, don&apos;t process read mails</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
-          <context context-type="linenumber">72</context>
+          <context context-type="linenumber">96</context>
         </context-group>
       </trans-unit>
       <trans-unit id="2378921144019636516" datatype="html">
         <source>Flag the mail, don&apos;t process flagged mails</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
-          <context context-type="linenumber">76</context>
+          <context context-type="linenumber">100</context>
         </context-group>
       </trans-unit>
       <trans-unit id="6457024618858980302" datatype="html">
         <source>Tag the mail with specified tag, don&apos;t process tagged mails</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
-          <context context-type="linenumber">80</context>
+          <context context-type="linenumber">104</context>
         </context-group>
       </trans-unit>
       <trans-unit id="4673329664686432878" datatype="html">
         <source>Use subject as title</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
-          <context context-type="linenumber">87</context>
+          <context context-type="linenumber">111</context>
         </context-group>
       </trans-unit>
       <trans-unit id="8645471396972938185" datatype="html">
         <source>Use attachment filename as title</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
-          <context context-type="linenumber">91</context>
+          <context context-type="linenumber">115</context>
         </context-group>
       </trans-unit>
       <trans-unit id="2881879110886196973" datatype="html">
         <source>Do not assign title from this rule</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
-          <context context-type="linenumber">95</context>
+          <context context-type="linenumber">119</context>
         </context-group>
       </trans-unit>
       <trans-unit id="1568902914205618549" datatype="html">
         <source>Do not assign a correspondent</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
-          <context context-type="linenumber">102</context>
+          <context context-type="linenumber">126</context>
         </context-group>
       </trans-unit>
       <trans-unit id="3567746385454588269" datatype="html">
         <source>Use mail address</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
-          <context context-type="linenumber">106</context>
+          <context context-type="linenumber">130</context>
         </context-group>
       </trans-unit>
       <trans-unit id="445154175758965852" datatype="html">
         <source>Use name (or mail address if not available)</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
-          <context context-type="linenumber">110</context>
+          <context context-type="linenumber">134</context>
         </context-group>
       </trans-unit>
       <trans-unit id="1258862217749148424" datatype="html">
         <source>Use correspondent selected below</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
-          <context context-type="linenumber">114</context>
+          <context context-type="linenumber">138</context>
         </context-group>
       </trans-unit>
       <trans-unit id="3147349817770432927" datatype="html">
         <source>Create new mail rule</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
-          <context context-type="linenumber">166</context>
+          <context context-type="linenumber">190</context>
         </context-group>
       </trans-unit>
       <trans-unit id="3374331029704382439" datatype="html">
         <source>Edit mail rule</source>
         <context-group purpose="location">
           <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
-          <context context-type="linenumber">170</context>
+          <context context-type="linenumber">194</context>
         </context-group>
       </trans-unit>
       <trans-unit id="8911059720204770105" datatype="html">
index a9ad3040bd8486c9f110f1a306f45bc35e4462bb..afe6c2ab93f2e4b9d1d9abbd9ff7a9305d92f818 100644 (file)
@@ -41,6 +41,7 @@
       <div class="col-md-6">
         <pngx-input-select [horizontal]="true" i18n-title title="Consumption scope" [items]="consumptionScopeOptions" formControlName="consumption_scope" i18n-hint hint="See docs for .eml processing requirements"></pngx-input-select>
         <pngx-input-select [horizontal]="true" i18n-title title="Attachment type" [items]="attachmentTypeOptions" formControlName="attachment_type"></pngx-input-select>
+        <pngx-input-select [horizontal]="true" i18n-title title="PDF layout" [items]="pdfLayoutOptions" formControlName="pdf_layout"></pngx-input-select>
       </div>
       <div class="col-md-6">
         <pngx-input-text [horizontal]="true" i18n-title title="Include only files matching" formControlName="filter_attachment_filename_include" i18n-hint hint="Optional. Wildcards e.g. *.pdf or *invoice* allowed. Can be comma-separated list. Case insensitive." [error]="error?.filter_attachment_filename_include"></pngx-input-text>
index f2d8236bc22995323e25ce4653c9efb45e8b5cc2..3d4924c0b67ed778fa6d57fe772a5f6c4b22174b 100644 (file)
@@ -18,6 +18,7 @@ import {
   MailMetadataTitleOption,
   MailRule,
   MailRuleConsumptionScope,
+  MailRulePdfLayout,
 } from 'src/app/data/mail-rule'
 import { CorrespondentService } from 'src/app/services/rest/correspondent.service'
 import { DocumentTypeService } from 'src/app/services/rest/document-type.service'
@@ -58,6 +59,29 @@ const CONSUMPTION_SCOPE_OPTIONS = [
   },
 ]
 
+const PDF_LAYOUT_OPTIONS = [
+  {
+    id: MailRulePdfLayout.Default,
+    name: $localize`System default`,
+  },
+  {
+    id: MailRulePdfLayout.TextHtml,
+    name: $localize`Text, then HTML`,
+  },
+  {
+    id: MailRulePdfLayout.HtmlText,
+    name: $localize`HTML, then text`,
+  },
+  {
+    id: MailRulePdfLayout.HtmlOnly,
+    name: $localize`HTML only`,
+  },
+  {
+    id: MailRulePdfLayout.TextOnly,
+    name: $localize`Text only`,
+  },
+]
+
 const ACTION_OPTIONS = [
   {
     id: MailAction.Delete,
@@ -184,6 +208,7 @@ export class MailRuleEditDialogComponent extends EditDialogComponent<MailRule> {
       filter_attachment_filename_exclude: new FormControl(null),
       maximum_age: new FormControl(null),
       attachment_type: new FormControl(MailFilterAttachmentType.Attachments),
+      pdf_layout: new FormControl(MailRulePdfLayout.Default),
       consumption_scope: new FormControl(MailRuleConsumptionScope.Attachments),
       order: new FormControl(null),
       action: new FormControl(MailAction.MarkRead),
@@ -232,4 +257,8 @@ export class MailRuleEditDialogComponent extends EditDialogComponent<MailRule> {
   get consumptionScopeOptions() {
     return CONSUMPTION_SCOPE_OPTIONS
   }
+
+  get pdfLayoutOptions() {
+    return PDF_LAYOUT_OPTIONS
+  }
 }
index 6e2c468a2d2400a0555b730d116c8fd05eeb0f2a..4c47b65006d5f679400658d327375f0636483be8 100644 (file)
@@ -11,6 +11,14 @@ export enum MailRuleConsumptionScope {
   Everything = 3,
 }
 
+export enum MailRulePdfLayout {
+  Default = 0,
+  TextHtml = 1,
+  HtmlText = 2,
+  HtmlOnly = 3,
+  TextOnly = 4,
+}
+
 export enum MailAction {
   Delete = 1,
   Move = 2,
@@ -59,6 +67,8 @@ export interface MailRule extends ObjectWithPermissions {
 
   attachment_type: MailFilterAttachmentType
 
+  pdf_layout: MailRulePdfLayout
+
   action: MailAction
 
   action_parameter?: string
index 35c18ac7b0a361bc76ddd4b0ef1dbcf8c668afef..81739fa7a8839e7ec2e442e7b42102453a16bb78 100644 (file)
@@ -48,6 +48,7 @@ from documents.templating.workflows import parse_w_workflow_placeholders
 from documents.utils import copy_basic_file_stats
 from documents.utils import copy_file_with_basic_stats
 from documents.utils import run_subprocess
+from paperless_mail.parsers import MailDocumentParser
 
 
 class WorkflowTriggerPlugin(
@@ -479,7 +480,18 @@ class ConsumerPlugin(
                 ConsumerStatusShortMessage.PARSING_DOCUMENT,
             )
             self.log.debug(f"Parsing {self.filename}...")
-            document_parser.parse(self.working_copy, mime_type, self.filename)
+            if (
+                isinstance(document_parser, MailDocumentParser)
+                and self.input_doc.mailrule_id
+            ):
+                document_parser.parse(
+                    self.working_copy,
+                    mime_type,
+                    self.filename,
+                    self.input_doc.mailrule_id,
+                )
+            else:
+                document_parser.parse(self.working_copy, mime_type, self.filename)
 
             self.log.debug(f"Generating thumbnail for {self.filename}...")
             self._send_progress(
index a862d7fa0b738f5b1db6e5eb19b56e2a498afad3..6f576ab24a280d84fa10fe2daef5ac95d84daf7d 100644 (file)
@@ -21,6 +21,7 @@ from guardian.core import ObjectPermissionChecker
 
 from documents.consumer import ConsumerError
 from documents.data_models import DocumentMetadataOverrides
+from documents.data_models import DocumentSource
 from documents.models import Correspondent
 from documents.models import CustomField
 from documents.models import Document
@@ -35,6 +36,8 @@ from documents.tasks import sanity_check
 from documents.tests.utils import DirectoriesMixin
 from documents.tests.utils import FileSystemAssertsMixin
 from documents.tests.utils import GetConsumerMixin
+from paperless_mail.models import MailRule
+from paperless_mail.parsers import MailDocumentParser
 
 
 class TestAttributes(UnittestTestCase):
@@ -243,6 +246,8 @@ def fake_magic_from_file(file, *, mime=False):
             return "image/png"
         elif os.path.splitext(file)[1] == ".webp":
             return "image/webp"
+        elif os.path.splitext(file)[1] == ".eml":
+            return "message/rfc822"
         else:
             return "unknown"
     else:
@@ -975,6 +980,59 @@ class TestConsumer(
             self.assertEqual(command[0], "qpdf")
             self.assertEqual(command[1], "--replace-input")
 
+    @mock.patch("paperless_mail.models.MailRule.objects.get")
+    @mock.patch("paperless_mail.parsers.MailDocumentParser.parse")
+    @mock.patch("documents.parsers.document_consumer_declaration.send")
+    def test_mail_parser_receives_mailrule(
+        self,
+        mock_consumer_declaration_send: mock.Mock,
+        mock_mail_parser_parse: mock.Mock,
+        mock_mailrule_get: mock.Mock,
+    ):
+        """
+        GIVEN:
+            - A mail document from a mail rule
+        WHEN:
+            - The consumer is run
+        THEN:
+            - The mail parser should receive the mail rule
+        """
+        mock_consumer_declaration_send.return_value = [
+            (
+                None,
+                {
+                    "parser": MailDocumentParser,
+                    "mime_types": {"message/rfc822": ".eml"},
+                    "weight": 0,
+                },
+            ),
+        ]
+        mock_mailrule_get.return_value = mock.Mock(
+            pdf_layout=MailRule.PdfLayout.HTML_ONLY,
+        )
+        with self.get_consumer(
+            filepath=(
+                Path(__file__).parent.parent.parent
+                / Path("paperless_mail")
+                / Path("tests")
+                / Path("samples")
+            ).resolve()
+            / "html.eml",
+            source=DocumentSource.MailFetch,
+            mailrule_id=1,
+        ) as consumer:
+            # fails because no gotenberg
+            with self.assertRaises(
+                ConsumerError,
+            ):
+                consumer.run()
+                mock_mail_parser_parse.assert_called_once_with(
+                    consumer.working_copy,
+                    "message/rfc822",
+                    file_name="sample.pdf",
+                    mailrule=mock_mailrule_get.return_value,
+                )
+
 
 @mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
 class TestConsumerCreatedDate(DirectoriesMixin, GetConsumerMixin, TestCase):
index 69f5ed5fbe95cb9a65704913db67a2ef3fb6faeb..9895188188a4e42c4169fe7640824ec546b76e68 100644 (file)
@@ -8,7 +8,7 @@ class TestMigrateWorkflow(TestMigrations):
     dependencies = (
         (
             "paperless_mail",
-            "0028_alter_mailaccount_password_and_more",
+            "0029_mailrule_pdf_layout",
         ),
     )
 
index 739433bb67195df011250e3a0e4ee26b89a0111a..fc50b3948a0702f3f53967ed53ed07127547fc64 100644 (file)
@@ -340,11 +340,16 @@ class GetConsumerMixin:
         filepath: Path,
         overrides: DocumentMetadataOverrides | None = None,
         source: DocumentSource = DocumentSource.ConsumeFolder,
+        mailrule_id: int | None = None,
     ) -> Generator[ConsumerPlugin, None, None]:
         # Store this for verification
         self.status = DummyProgressManager(filepath.name, None)
         reader = ConsumerPlugin(
-            ConsumableDocument(source, original_file=filepath),
+            ConsumableDocument(
+                source,
+                original_file=filepath,
+                mailrule_id=mailrule_id or None,
+            ),
             overrides or DocumentMetadataOverrides(),
             self.status,  # type: ignore
             self.dirs.scratch_dir,
index 0ef4c1dc8203fa60d1db359b3a09d3c83c12ebdd..57494a5c25e389a7f1e30322683014906a44c3af 100644 (file)
@@ -2,7 +2,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: paperless-ngx\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2025-01-27 08:19-0800\n"
+"POT-Creation-Date: 2025-01-28 12:17-0800\n"
 "PO-Revision-Date: 2022-02-17 04:17\n"
 "Last-Translator: \n"
 "Language-Team: English\n"
@@ -90,7 +90,7 @@ msgid "Automatic"
 msgstr ""
 
 #: documents/models.py:67 documents/models.py:433 documents/models.py:1493
-#: paperless_mail/models.py:23 paperless_mail/models.py:136
+#: paperless_mail/models.py:23 paperless_mail/models.py:143
 msgid "name"
 msgstr ""
 
@@ -276,7 +276,7 @@ msgstr ""
 msgid "warning"
 msgstr ""
 
-#: documents/models.py:387 paperless_mail/models.py:350
+#: documents/models.py:387 paperless_mail/models.py:363
 msgid "error"
 msgstr ""
 
@@ -818,7 +818,7 @@ msgstr ""
 msgid "filter filename"
 msgstr ""
 
-#: documents/models.py:1066 paperless_mail/models.py:193
+#: documents/models.py:1066 paperless_mail/models.py:200
 msgid ""
 "Only consume documents which entirely match this filename if specified. "
 "Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
@@ -988,15 +988,15 @@ msgid ""
 "Assign a document title, can include some placeholders, see documentation."
 msgstr ""
 
-#: documents/models.py:1287 paperless_mail/models.py:261
+#: documents/models.py:1287 paperless_mail/models.py:274
 msgid "assign this tag"
 msgstr ""
 
-#: documents/models.py:1296 paperless_mail/models.py:269
+#: documents/models.py:1296 paperless_mail/models.py:282
 msgid "assign this document type"
 msgstr ""
 
-#: documents/models.py:1305 paperless_mail/models.py:283
+#: documents/models.py:1305 paperless_mail/models.py:296
 msgid "assign this correspondent"
 msgstr ""
 
@@ -1112,7 +1112,7 @@ msgstr ""
 msgid "workflow actions"
 msgstr ""
 
-#: documents/models.py:1495 paperless_mail/models.py:138
+#: documents/models.py:1495 paperless_mail/models.py:145
 msgid "order"
 msgstr ""
 
@@ -1124,7 +1124,7 @@ msgstr ""
 msgid "actions"
 msgstr ""
 
-#: documents/models.py:1511 paperless_mail/models.py:147
+#: documents/models.py:1511 paperless_mail/models.py:154
 msgid "enabled"
 msgstr ""
 
@@ -1838,161 +1838,185 @@ msgid "Process all files, including 'inline' attachments."
 msgstr ""
 
 #: paperless_mail/models.py:119
-msgid "Delete"
+msgid "System default"
 msgstr ""
 
 #: paperless_mail/models.py:120
-msgid "Move to specified folder"
+msgid "Text, then HTML"
 msgstr ""
 
 #: paperless_mail/models.py:121
-msgid "Mark as read, don't process read mails"
+msgid "HTML, then text"
 msgstr ""
 
 #: paperless_mail/models.py:122
-msgid "Flag the mail, don't process flagged mails"
+msgid "HTML only"
 msgstr ""
 
 #: paperless_mail/models.py:123
-msgid "Tag the mail with specified tag, don't process tagged mails"
+msgid "Text only"
 msgstr ""
 
 #: paperless_mail/models.py:126
-msgid "Use subject as title"
+msgid "Delete"
 msgstr ""
 
 #: paperless_mail/models.py:127
-msgid "Use attachment filename as title"
+msgid "Move to specified folder"
 msgstr ""
 
 #: paperless_mail/models.py:128
+msgid "Mark as read, don't process read mails"
+msgstr ""
+
+#: paperless_mail/models.py:129
+msgid "Flag the mail, don't process flagged mails"
+msgstr ""
+
+#: paperless_mail/models.py:130
+msgid "Tag the mail with specified tag, don't process tagged mails"
+msgstr ""
+
+#: paperless_mail/models.py:133
+msgid "Use subject as title"
+msgstr ""
+
+#: paperless_mail/models.py:134
+msgid "Use attachment filename as title"
+msgstr ""
+
+#: paperless_mail/models.py:135
 msgid "Do not assign title from rule"
 msgstr ""
 
-#: paperless_mail/models.py:131
+#: paperless_mail/models.py:138
 msgid "Do not assign a correspondent"
 msgstr ""
 
-#: paperless_mail/models.py:132
+#: paperless_mail/models.py:139
 msgid "Use mail address"
 msgstr ""
 
-#: paperless_mail/models.py:133
+#: paperless_mail/models.py:140
 msgid "Use name (or mail address if not available)"
 msgstr ""
 
-#: paperless_mail/models.py:134
+#: paperless_mail/models.py:141
 msgid "Use correspondent selected below"
 msgstr ""
 
-#: paperless_mail/models.py:144
+#: paperless_mail/models.py:151
 msgid "account"
 msgstr ""
 
-#: paperless_mail/models.py:150 paperless_mail/models.py:305
+#: paperless_mail/models.py:157 paperless_mail/models.py:318
 msgid "folder"
 msgstr ""
 
-#: paperless_mail/models.py:154
+#: paperless_mail/models.py:161
 msgid ""
 "Subfolders must be separated by a delimiter, often a dot ('.') or slash "
 "('/'), but it varies by mail server."
 msgstr ""
 
-#: paperless_mail/models.py:160
+#: paperless_mail/models.py:167
 msgid "filter from"
 msgstr ""
 
-#: paperless_mail/models.py:167
+#: paperless_mail/models.py:174
 msgid "filter to"
 msgstr ""
 
-#: paperless_mail/models.py:174
+#: paperless_mail/models.py:181
 msgid "filter subject"
 msgstr ""
 
-#: paperless_mail/models.py:181
+#: paperless_mail/models.py:188
 msgid "filter body"
 msgstr ""
 
-#: paperless_mail/models.py:188
+#: paperless_mail/models.py:195
 msgid "filter attachment filename inclusive"
 msgstr ""
 
-#: paperless_mail/models.py:200
+#: paperless_mail/models.py:207
 msgid "filter attachment filename exclusive"
 msgstr ""
 
-#: paperless_mail/models.py:205
+#: paperless_mail/models.py:212
 msgid ""
 "Do not consume documents which entirely match this filename if specified. "
 "Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
 msgstr ""
 
-#: paperless_mail/models.py:212
+#: paperless_mail/models.py:219
 msgid "maximum age"
 msgstr ""
 
-#: paperless_mail/models.py:214
+#: paperless_mail/models.py:221
 msgid "Specified in days."
 msgstr ""
 
-#: paperless_mail/models.py:218
+#: paperless_mail/models.py:225
 msgid "attachment type"
 msgstr ""
 
-#: paperless_mail/models.py:222
+#: paperless_mail/models.py:229
 msgid ""
 "Inline attachments include embedded images, so it's best to combine this "
 "option with a filename filter."
 msgstr ""
 
-#: paperless_mail/models.py:228
+#: paperless_mail/models.py:235
 msgid "consumption scope"
 msgstr ""
 
-#: paperless_mail/models.py:234
+#: paperless_mail/models.py:241
+msgid "pdf layout"
+msgstr ""
+
+#: paperless_mail/models.py:247
 msgid "action"
 msgstr ""
 
-#: paperless_mail/models.py:240
+#: paperless_mail/models.py:253
 msgid "action parameter"
 msgstr ""
 
-#: paperless_mail/models.py:245
+#: paperless_mail/models.py:258
 msgid ""
 "Additional parameter for the action selected above, i.e., the target folder "
 "of the move to folder action. Subfolders must be separated by dots."
 msgstr ""
 
-#: paperless_mail/models.py:253
+#: paperless_mail/models.py:266
 msgid "assign title from"
 msgstr ""
 
-#: paperless_mail/models.py:273
+#: paperless_mail/models.py:286
 msgid "assign correspondent from"
 msgstr ""
 
-#: paperless_mail/models.py:287
+#: paperless_mail/models.py:300
 msgid "Assign the rule owner to documents"
 msgstr ""
 
-#: paperless_mail/models.py:313
+#: paperless_mail/models.py:326
 msgid "uid"
 msgstr ""
 
-#: paperless_mail/models.py:321
+#: paperless_mail/models.py:334
 msgid "subject"
 msgstr ""
 
-#: paperless_mail/models.py:329
+#: paperless_mail/models.py:342
 msgid "received"
 msgstr ""
 
-#: paperless_mail/models.py:336
+#: paperless_mail/models.py:349
 msgid "processed"
 msgstr ""
 
-#: paperless_mail/models.py:342
+#: paperless_mail/models.py:355
 msgid "status"
 msgstr ""
index a817abd70ebb693f31a117c376f0fcc1cb9c0245..846b9e0ee85d1de622e8e2ca01e424d8a277791f 100644 (file)
@@ -1030,6 +1030,11 @@ CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
 
 GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs")
 
+# Fallback layout for .eml consumption
+EMAIL_PARSE_DEFAULT_LAYOUT = __get_int(
+    "PAPERLESS_EMAIL_PARSE_DEFAULT_LAYOUT",
+    1,  # MailRule.PdfLayout.TEXT_HTML but that can't be imported here
+)
 
 # Pre-2.x versions of Paperless stored your documents locally with GPG
 # encryption, but that is no longer the default.  This behaviour is still
diff --git a/src/paperless_mail/migrations/0029_mailrule_pdf_layout.py b/src/paperless_mail/migrations/0029_mailrule_pdf_layout.py
new file mode 100644 (file)
index 0000000..fe7a93b
--- /dev/null
@@ -0,0 +1,28 @@
+# Generated by Django 5.1.3 on 2024-11-24 12:39
+
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("paperless_mail", "0028_alter_mailaccount_password_and_more"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="mailrule",
+            name="pdf_layout",
+            field=models.PositiveIntegerField(
+                choices=[
+                    (0, "System default"),
+                    (1, "Text, then HTML"),
+                    (2, "HTML, then text"),
+                    (3, "HTML only"),
+                    (4, "Text only"),
+                ],
+                default=0,
+                verbose_name="pdf layout",
+            ),
+        ),
+    ]
index 46b9db1ff44490bdb200c183b1fdaa2cdec94d94..cf33a056b6508cf02841cd427651a37c604edb7c 100644 (file)
@@ -115,6 +115,13 @@ class MailRule(document_models.ModelWithOwner):
         ATTACHMENTS_ONLY = 1, _("Only process attachments.")
         EVERYTHING = 2, _("Process all files, including 'inline' attachments.")
 
+    class PdfLayout(models.IntegerChoices):
+        DEFAULT = 0, _("System default")
+        TEXT_HTML = 1, _("Text, then HTML")
+        HTML_TEXT = 2, _("HTML, then text")
+        HTML_ONLY = 3, _("HTML only")
+        TEXT_ONLY = 4, _("Text only")
+
     class MailAction(models.IntegerChoices):
         DELETE = 1, _("Delete")
         MOVE = 2, _("Move to specified folder")
@@ -230,6 +237,12 @@ class MailRule(document_models.ModelWithOwner):
         default=ConsumptionScope.ATTACHMENTS_ONLY,
     )
 
+    pdf_layout = models.PositiveIntegerField(
+        _("pdf layout"),
+        choices=PdfLayout.choices,
+        default=PdfLayout.DEFAULT,
+    )
+
     action = models.PositiveIntegerField(
         _("action"),
         choices=MailAction.choices,
index d98fb72382e74641a00189423506d3b238bc6da8..44032a2e93180b1ced9e04a26c2be8e54bfb9e16 100644 (file)
@@ -22,6 +22,7 @@ from documents.parsers import DocumentParser
 from documents.parsers import ParseError
 from documents.parsers import make_thumbnail_from_pdf
 from paperless.models import OutputTypeChoices
+from paperless_mail.models import MailRule
 
 
 class MailDocumentParser(DocumentParser):
@@ -121,7 +122,13 @@ class MailDocumentParser(DocumentParser):
         result.sort(key=lambda item: (item["prefix"], item["key"]))
         return result
 
-    def parse(self, document_path: Path, mime_type: str, file_name=None):
+    def parse(
+        self,
+        document_path: Path,
+        mime_type: str,
+        file_name=None,
+        mailrule_id: int | None = None,
+    ):
         """
         Parses the given .eml into formatted text, based on the decoded email.
 
@@ -180,7 +187,11 @@ class MailDocumentParser(DocumentParser):
             self.date = mail.date
 
         self.log.debug("Creating a PDF from the email")
-        self.archive_path = self.generate_pdf(mail)
+        if mailrule_id:
+            rule = MailRule.objects.get(pk=mailrule_id)
+            self.archive_path = self.generate_pdf(mail, rule.pdf_layout)
+        else:
+            self.archive_path = self.generate_pdf(mail)
 
     @staticmethod
     def parse_file_to_message(filepath: Path) -> MailMessage:
@@ -217,11 +228,19 @@ class MailDocumentParser(DocumentParser):
                 f"{settings.TIKA_ENDPOINT}: {err}",
             ) from err
 
-    def generate_pdf(self, mail_message: MailMessage) -> Path:
+    def generate_pdf(
+        self,
+        mail_message: MailMessage,
+        pdf_layout: MailRule.PdfLayout | None = None,
+    ) -> Path:
         archive_path = Path(self.tempdir) / "merged.pdf"
 
         mail_pdf_file = self.generate_pdf_from_mail(mail_message)
 
+        pdf_layout = (
+            pdf_layout or settings.EMAIL_PARSE_DEFAULT_LAYOUT
+        )  # EMAIL_PARSE_DEFAULT_LAYOUT is a MailRule.PdfLayout
+
         # If no HTML content, create the PDF from the message
         # Otherwise, create 2 PDFs and merge them with Gotenberg
         if not mail_message.html:
@@ -246,7 +265,15 @@ class MailDocumentParser(DocumentParser):
                 if pdf_a_format is not None:
                     route.pdf_format(pdf_a_format)
 
-                route.merge([mail_pdf_file, pdf_of_html_content])
+                match pdf_layout:
+                    case MailRule.PdfLayout.HTML_TEXT:
+                        route.merge([pdf_of_html_content, mail_pdf_file])
+                    case MailRule.PdfLayout.HTML_ONLY:
+                        route.merge([pdf_of_html_content])
+                    case MailRule.PdfLayout.TEXT_ONLY:
+                        route.merge([mail_pdf_file])
+                    case MailRule.PdfLayout.TEXT_HTML | _:
+                        route.merge([mail_pdf_file, pdf_of_html_content])
 
                 try:
                     response = route.run()
index 5623f62c386ef4d9c40ba9a3039fd9c246f5d1e2..e9836b421d2437357f9e184a485114f8e1ec5c17 100644 (file)
@@ -96,6 +96,7 @@ class MailRuleSerializer(OwnedObjectSerializer):
             "order",
             "attachment_type",
             "consumption_scope",
+            "pdf_layout",
             "owner",
             "user_can_change",
             "permissions",
index e8186ea0f6cdb3ab97c855dcbf2ad49d2a72034f..dbd2c82cd0403380333036b613504044d662228d 100644 (file)
@@ -1,6 +1,7 @@
 import datetime
 import logging
 from pathlib import Path
+from unittest import mock
 
 import httpx
 import pytest
@@ -662,3 +663,67 @@ class TestParser:
         request = httpx_mock.get_request()
 
         assert str(request.url) == "http://localhost:3000/forms/chromium/convert/html"
+
+    @pytest.mark.httpx_mock(can_send_already_matched_responses=True)
+    @mock.patch("gotenberg_client._merge.MergeRoute.merge")
+    @mock.patch("paperless_mail.models.MailRule.objects.get")
+    def test_generate_pdf_layout_options(
+        self,
+        mock_mailrule_get: mock.Mock,
+        mock_merge_route: mock.Mock,
+        httpx_mock: HTTPXMock,
+        mail_parser: MailDocumentParser,
+        html_email_file: Path,
+        html_email_pdf_file: Path,
+    ):
+        """
+        GIVEN:
+            - Email message
+        WHEN:
+            - Email is parsed with different layout options
+        THEN:
+            - Gotenberg is called with the correct layout option
+        """
+        httpx_mock.add_response(
+            url="http://localhost:9998/tika/text",
+            method="PUT",
+            json={
+                "Content-Type": "text/html",
+                "X-TIKA:Parsed-By": [],
+                "X-TIKA:content": "This is some Tika HTML text",
+            },
+        )
+        httpx_mock.add_response(
+            url="http://localhost:3000/forms/chromium/convert/html",
+            method="POST",
+            content=html_email_pdf_file.read_bytes(),
+        )
+        httpx_mock.add_response(
+            url="http://localhost:3000/forms/pdfengines/merge",
+            method="POST",
+            content=b"Pretend merged PDF content",
+        )
+
+        def test_layout_option(layout_option, expected_calls, expected_pdf_names):
+            mock_mailrule_get.return_value = mock.Mock(pdf_layout=layout_option)
+            mail_parser.parse(
+                document_path=html_email_file,
+                mime_type="message/rfc822",
+                mailrule_id=1,
+            )
+            args, _ = mock_merge_route.call_args
+            assert len(args[0]) == expected_calls
+            for i, pdf in enumerate(expected_pdf_names):
+                assert args[0][i].name == pdf
+
+        # 1 = MailRule.PdfLayout.TEXT_HTML
+        test_layout_option(1, 2, ["email_as_pdf.pdf", "html.pdf"])
+
+        # 2 = MailRule.PdfLayout.HTML_TEXT
+        test_layout_option(2, 2, ["html.pdf", "email_as_pdf.pdf"])
+
+        # 3 = MailRule.PdfLayout.HTML_ONLY
+        test_layout_option(3, 1, ["html.pdf"])
+
+        # 4 = MailRule.PdfLayout.TEXT_ONLY
+        test_layout_option(4, 1, ["email_as_pdf.pdf"])