From: Stephen Finucane Date: Sat, 6 Jun 2026 11:33:39 +0000 (+0100) Subject: parser: Strip NUL bytes X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;h=758b99b149891e0e93caf0dc0f77475e3e4f4730;p=thirdparty%2Fpatchwork.git parser: Strip NUL bytes psycopg (v3) strictly rejects NUL bytes (0x00) in PostgreSQL text fields, unlike psycopg2 which handled them silently. Malformed emails (such as the codec-null.mbox fuzz fixture in our test suite) contain NUL bytes in headers and body content, causing DataError on insert. Strip NUL bytes from all text fields before they reach the database. This is safe for all fields: * Headers are restricted to printable ASCII by RFC 5322 meaning NUL bytes there always indicate corruption * Email body / commit message content is plain text so NUL bytes are equally invalid here * Unified diffs should also never contain NUL bytes. Git uses their presence as the heuristic to classify a file as binary, at which point it either emits 'Binary files ... differ' or, with --binary, a base85-encoded binary patch, both of which are entirely printable ASCII. A legitimate patch produced by git format-patch will therefore never carry NUL bytes in the diff text itself. Signed-off-by: Stephen Finucane --- diff --git a/patchwork/parser.py b/patchwork/parser.py index c33ada8d..75a6bf33 100644 --- a/patchwork/parser.py +++ b/patchwork/parser.py @@ -151,6 +151,7 @@ def clean_header(header): return None header_str = str(sane_header) + header_str = header_str.replace('\x00', '') return normalise_space(header_str) @@ -498,7 +499,7 @@ def find_headers(mail): if header is not None ] - return '\n'.join(strings) + return '\n'.join(strings).replace('\x00', '') def find_message_id(mail): @@ -697,6 +698,9 @@ def find_patch_content(mail): commentbuf = clean_content(commentbuf) + if patchbuf: + patchbuf = patchbuf.replace('\x00', '') + return patchbuf, commentbuf @@ -873,7 +877,7 @@ def clean_content(content): sig_re = re.compile(r'^(-- |_+)\n.*', re.S | re.M) content = sig_re.sub('', content) - return content.strip() + return content.strip().replace('\x00', '') def parse_patch(content):