Correctly fold unknown-8bit originating from encoded words. (#142517)

author R. David Murray <rdmurray@bitdance.com>

Wed, 24 Dec 2025 14:14:39 +0000 (09:14 -0500)

committer GitHub <noreply@github.com>

Wed, 24 Dec 2025 14:14:39 +0000 (09:14 -0500)
author R. David Murray <rdmurray@bitdance.com>
Wed, 24 Dec 2025 14:14:39 +0000 (09:14 -0500)
committer GitHub <noreply@github.com>
Wed, 24 Dec 2025 14:14:39 +0000 (09:14 -0500)
diff --git a/Lib/email/_encoded_words.py b/Lib/email/_encoded_words.py

index 6795a606de037e2e428f95087e394e9e16a5ebbb..05a34a4c10523369712c7134a9f092d11cad562b 100644 (file)
--- a/Lib/email/_encoded_words.py
+++ b/Lib/email/_encoded_words.py
@@ -219,7 +219,7 @@ def encode(string, charset='utf-8', encoding=None, lang=''):
  
      """
      if charset == 'unknown-8bit':
-        bstring = string.encode('ascii', 'surrogateescape')
+        bstring = string.encode('utf-8', 'surrogateescape')
      else:
          bstring = string.encode(charset)
      if encoding is None:
diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py

index f33844910beee4de08ff69e9aca777e1d4c8b18f..426ec4644e3096124d0e1ef660113e9bd279be2a 100644 (file)
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -3340,5 +3340,13 @@ class TestFolding(TestEmailBase):
          token = parser.get_address_list(text)[0]
          self._test(token, expected, policy=policy)
  
+    def test_encoded_word_with_undecodable_bytes(self):
+        self._test(parser.get_address_list(
+            ' =?utf-8?Q?=E5=AE=A2=E6=88=B6=E6=AD=A3=E8=A6=8F=E4=BA=A4=E7?='
+                )[0],
+            ' =?unknown-8bit?b?5a6i5oi25q2j6KaP5Lqk5w==?=\n',
+            )
+
+
  if __name__ == '__main__':
      unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2025-12-10-10-00-06.gh-issue-142517.fG4hbe.rst b/Misc/NEWS.d/next/Library/2025-12-10-10-00-06.gh-issue-142517.fG4hbe.rst

new file mode 100644 (file)

index 0000000..388fff0
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-12-10-10-00-06.gh-issue-142517.fG4hbe.rst
@@ -0,0 +1,4 @@
+The non-``compat32`` :mod:`email` policies now correctly handle refolding
+encoded words that contain bytes that can not be decoded in their specified
+character set.  Previously this resulting in an encoding exception during
+folding.
author	R. David Murray <rdmurray@bitdance.com>
	Wed, 24 Dec 2025 14:14:39 +0000 (09:14 -0500)
committer	GitHub <noreply@github.com>
	Wed, 24 Dec 2025 14:14:39 +0000 (09:14 -0500)
Lib/email/_encoded_words.py		patch \| blob \| blame \| history
Lib/test/test_email/test__header_value_parser.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2025-12-10-10-00-06.gh-issue-142517.fG4hbe.rst	[new file with mode: 0644]	patch \| blob