From: Stephen Finucane <stephen@that.guru>
Date: Sat, 6 Jun 2026 11:33:39 +0000 (+0100)
Subject: parser: Strip NUL bytes
X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;h=758b99b149891e0e93caf0dc0f77475e3e4f4730;p=thirdparty%2Fpatchwork.git

parser: Strip NUL bytes

psycopg (v3) strictly rejects NUL bytes (0x00) in PostgreSQL text
fields, unlike psycopg2 which handled them silently. Malformed emails
(such as the codec-null.mbox fuzz fixture in our test suite) contain NUL
bytes in headers and body content, causing DataError on insert.

Strip NUL bytes from all text fields before they reach the database.
This is safe for all fields:

* Headers are restricted to printable ASCII by RFC 5322 meaning NUL
  bytes there always indicate corruption

* Email body / commit message content is plain text so NUL bytes are
  equally invalid here

* Unified diffs should also never contain NUL bytes. Git uses their
  presence as the heuristic to classify a file as binary, at which point
  it either emits 'Binary files ... differ' or, with --binary, a
  base85-encoded binary patch, both of which are entirely printable
  ASCII. A legitimate patch produced by git format-patch will therefore
  never carry NUL bytes in the diff text itself.

Signed-off-by: Stephen Finucane <stephen@that.guru>
---

diff --git a/patchwork/parser.py b/patchwork/parser.py
index c33ada8d..75a6bf33 100644
--- a/patchwork/parser.py
+++ b/patchwork/parser.py
@@ -151,6 +151,7 @@ def clean_header(header):
         return None
 
     header_str = str(sane_header)
+    header_str = header_str.replace('\x00', '')
 
     return normalise_space(header_str)
 
@@ -498,7 +499,7 @@ def find_headers(mail):
         if header is not None
     ]
 
-    return '\n'.join(strings)
+    return '\n'.join(strings).replace('\x00', '')
 
 
 def find_message_id(mail):
@@ -697,6 +698,9 @@ def find_patch_content(mail):
 
     commentbuf = clean_content(commentbuf)
 
+    if patchbuf:
+        patchbuf = patchbuf.replace('\x00', '')
+
     return patchbuf, commentbuf
 
 
@@ -873,7 +877,7 @@ def clean_content(content):
     sig_re = re.compile(r'^(-- |_+)\n.*', re.S | re.M)
     content = sig_re.sub('', content)
 
-    return content.strip()
+    return content.strip().replace('\x00', '')
 
 
 def parse_patch(content):