]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-130631: Make join_header_words() more similar to the original Perl version (GH...
authorSerhiy Storchaka <storchaka@gmail.com>
Wed, 9 Apr 2025 08:08:04 +0000 (11:08 +0300)
committerGitHub <noreply@github.com>
Wed, 9 Apr 2025 08:08:04 +0000 (11:08 +0300)
* Always quote strings with non-ASCII characters.
* Allow some non-separator and non-control characters (like "." or "-")
  be unquoted.
* Always quote strings that end with "\n".
* Use the fullmatch() method for clarity and optimization.

Lib/http/cookiejar.py
Lib/test/test_http_cookiejar.py
Misc/NEWS.d/next/Library/2025-02-27-14-25-01.gh-issue-130631.dmZcZM.rst [new file with mode: 0644]

index fb0fd2e97999af1af515a20d8d1f465677fb714e..68cf16c93cc1c83b5fb54d79a09bde1d18f22578 100644 (file)
@@ -430,6 +430,7 @@ def split_header_words(header_values):
         if pairs: result.append(pairs)
     return result
 
+HEADER_JOIN_TOKEN_RE = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")
 HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
 def join_header_words(lists):
     """Do the inverse (almost) of the conversion done by split_header_words.
@@ -437,10 +438,10 @@ def join_header_words(lists):
     Takes a list of lists of (key, value) pairs and produces a single header
     value.  Attribute values are quoted if needed.
 
-    >>> join_header_words([[("text/plain", None), ("charset", "iso-8859-1")]])
-    'text/plain; charset="iso-8859-1"'
-    >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859-1")]])
-    'text/plain, charset="iso-8859-1"'
+    >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
+    'text/plain; charset="iso-8859/1"'
+    >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
+    'text/plain, charset="iso-8859/1"'
 
     """
     headers = []
@@ -448,7 +449,7 @@ def join_header_words(lists):
         attr = []
         for k, v in pairs:
             if v is not None:
-                if not re.search(r"^\w+$", v):
+                if not HEADER_JOIN_TOKEN_RE.fullmatch(v):
                     v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v)  # escape " and \
                     v = '"%s"' % v
                 k = "%s=%s" % (k, v)
index 25a671809d4499a697bf6cdec85be509d2de2c7a..cf02c5b43a2e43cbbb4d726a2a153fce21a0a3bc 100644 (file)
@@ -285,11 +285,21 @@ Got:          '%s'
             ("foo=bar;bar=baz", "foo=bar; bar=baz"),
             ('foo bar baz', "foo; bar; baz"),
             (r'foo="\"" bar="\\"', r'foo="\""; bar="\\"'),
+            ("föo=bär", 'föo="bär"'),
             ('foo,,,bar', 'foo, bar'),
             ('foo=bar,bar=baz', 'foo=bar, bar=baz'),
+            ("foo=\n", 'foo=""'),
+            ('foo="\n"', 'foo="\n"'),
+            ('foo=bar\n', 'foo=bar'),
+            ('foo="bar\n"', 'foo="bar\n"'),
+            ('foo=bar\nbaz', 'foo=bar; baz'),
+            ('foo="bar\nbaz"', 'foo="bar\nbaz"'),
 
             ('text/html; charset=iso-8859-1',
-             'text/html; charset="iso-8859-1"'),
+             'text/html; charset=iso-8859-1'),
+
+            ('text/html; charset="iso-8859/1"',
+             'text/html; charset="iso-8859/1"'),
 
             ('foo="bar"; port="80,81"; discard, bar=baz',
              'foo=bar; port="80,81"; discard, bar=baz'),
@@ -297,8 +307,8 @@ Got:          '%s'
             (r'Basic realm="\"foo\\\\bar\""',
              r'Basic; realm="\"foo\\\\bar\""'),
 
-            ('n; foo="foo;_", bar=foo!_',
-             'n; foo="foo;_", bar="foo!_"'),
+            ('n; foo="foo;_", bar="foo,_"',
+             'n; foo="foo;_", bar="foo,_"'),
             ]
 
         for arg, expect in tests:
@@ -553,7 +563,7 @@ class CookieTests(unittest.TestCase):
         self.assertIsNone(cookie.value)
         self.assertEqual(cookie.name, '"spam"')
         self.assertEqual(lwp_cookie_str(cookie), (
-            r'"spam"; path="/foo/"; domain="www.acme.com"; '
+            r'"spam"; path="/foo/"; domain=www.acme.com; '
             'path_spec; discard; version=0'))
         old_str = repr(c)
         c.save(ignore_expires=True, ignore_discard=True)
diff --git a/Misc/NEWS.d/next/Library/2025-02-27-14-25-01.gh-issue-130631.dmZcZM.rst b/Misc/NEWS.d/next/Library/2025-02-27-14-25-01.gh-issue-130631.dmZcZM.rst
new file mode 100644 (file)
index 0000000..c9dc9ba
--- /dev/null
@@ -0,0 +1,3 @@
+:func:`!http.cookiejar.join_header_words` is now more similar to the original
+Perl version. It now quotes the same set of characters and always quote
+values that end with ``"\n"``.