]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-121645: Add PyBytes_Join() function (#121646)
authorVictor Stinner <vstinner@python.org>
Fri, 30 Aug 2024 12:57:33 +0000 (14:57 +0200)
committerGitHub <noreply@github.com>
Fri, 30 Aug 2024 12:57:33 +0000 (12:57 +0000)
* Replace _PyBytes_Join() with PyBytes_Join().
* Keep _PyBytes_Join() as an alias to PyBytes_Join().

Doc/c-api/bytes.rst
Doc/whatsnew/3.14.rst
Include/cpython/bytesobject.h
Lib/test/test_capi/test_bytes.py
Misc/NEWS.d/next/C_API/2024-07-12-13-40-59.gh-issue-121645.96QvD3.rst [new file with mode: 0644]
Modules/_io/bufferedio.c
Modules/_io/iobase.c
Modules/_sre/sre.c
Modules/_testcapi/bytes.c
Objects/bytesobject.c

index bca78a9c369385a20b33cb08b1ef9fb7b8560877..3d0501c8253f714cbec136b9c2b3e7c914a5e37a 100644 (file)
@@ -189,6 +189,24 @@ called with a non-bytes parameter.
    to *newpart* (i.e. decrements its reference count).
 
 
+.. c:function:: PyObject* PyBytes_Join(PyObject *sep, PyObject *iterable)
+
+   Similar to ``sep.join(iterable)`` in Python.
+
+   *sep* must be Python :class:`bytes` object.
+   (Note that :c:func:`PyUnicode_Join` accepts ``NULL`` separator and treats
+   it as a space, whereas :c:func:`PyBytes_Join` doesn't accept ``NULL``
+   separator.)
+
+   *iterable* must be an iterable object yielding objects that implement the
+   :ref:`buffer protocol <bufferobjects>`.
+
+   On success, return a new :class:`bytes` object.
+   On error, set an exception and return ``NULL``.
+
+   .. versionadded: 3.14
+
+
 .. c:function:: int _PyBytes_Resize(PyObject **bytes, Py_ssize_t newsize)
 
    Resize a bytes object. *newsize* will be the new length of the bytes object.
index 44b373ac95d35150821da2161eb22951a4fbd3b3..975af420f9b375ce4a1176b0ef4192fe137532fe 100644 (file)
@@ -485,6 +485,11 @@ New Features
 
   (Contributed by Victor Stinner in :gh:`120389`.)
 
+* Add :c:func:`PyBytes_Join(sep, iterable) <PyBytes_Join>` function,
+  similar to ``sep.join(iterable)`` in Python.
+  (Contributed by Victor Stinner in :gh:`121645`.)
+
+
 Porting to Python 3.14
 ----------------------
 
index 41537210b748a1c51b15f9226d14e33cdcfd43b2..cf3f0387ecf32381a592b0ee263a71af701e7ef5 100644 (file)
@@ -32,6 +32,7 @@ static inline Py_ssize_t PyBytes_GET_SIZE(PyObject *op) {
 }
 #define PyBytes_GET_SIZE(self) PyBytes_GET_SIZE(_PyObject_CAST(self))
 
-/* _PyBytes_Join(sep, x) is like sep.join(x).  sep must be PyBytesObject*,
-   x must be an iterable object. */
-PyAPI_FUNC(PyObject*) _PyBytes_Join(PyObject *sep, PyObject *x);
+PyAPI_FUNC(PyObject*) PyBytes_Join(PyObject *sep, PyObject *iterable);
+
+// Alias kept for backward compatibility
+#define _PyBytes_Join PyBytes_Join
index d5f047bcf182779f1638d3f210ecd346cc995ef9..5908d79e14029bdabb731ac008c536a251f843f4 100644 (file)
@@ -249,6 +249,46 @@ class CAPITest(unittest.TestCase):
         # CRASHES resize(NULL, 0, False)
         # CRASHES resize(NULL, 3, False)
 
+    def test_join(self):
+        """Test PyBytes_Join()"""
+        bytes_join = _testcapi.bytes_join
+
+        self.assertEqual(bytes_join(b'', []), b'')
+        self.assertEqual(bytes_join(b'sep', []), b'')
+
+        self.assertEqual(bytes_join(b'', [b'a', b'b', b'c']), b'abc')
+        self.assertEqual(bytes_join(b'-', [b'a', b'b', b'c']), b'a-b-c')
+        self.assertEqual(bytes_join(b' - ', [b'a', b'b', b'c']), b'a - b - c')
+        self.assertEqual(bytes_join(b'-', [bytearray(b'abc'),
+                                           memoryview(b'def')]),
+                         b'abc-def')
+
+        self.assertEqual(bytes_join(b'-', iter([b'a', b'b', b'c'])), b'a-b-c')
+
+        # invalid 'sep' argument
+        with self.assertRaises(TypeError):
+            bytes_join(bytearray(b'sep'), [])
+        with self.assertRaises(TypeError):
+            bytes_join(memoryview(b'sep'), [])
+        with self.assertRaises(TypeError):
+            bytes_join('', [])  # empty Unicode string
+        with self.assertRaises(TypeError):
+            bytes_join('unicode', [])
+        with self.assertRaises(TypeError):
+            bytes_join(123, [])
+        with self.assertRaises(SystemError):
+            self.assertEqual(bytes_join(NULL, [b'a', b'b', b'c']), b'abc')
+
+        # invalid 'iterable' argument
+        with self.assertRaises(TypeError):
+            bytes_join(b'', [b'bytes', 'unicode'])
+        with self.assertRaises(TypeError):
+            bytes_join(b'', [b'bytes', 123])
+        with self.assertRaises(TypeError):
+            bytes_join(b'', 123)
+        with self.assertRaises(SystemError):
+            bytes_join(b'', NULL)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/C_API/2024-07-12-13-40-59.gh-issue-121645.96QvD3.rst b/Misc/NEWS.d/next/C_API/2024-07-12-13-40-59.gh-issue-121645.96QvD3.rst
new file mode 100644 (file)
index 0000000..1cca9b2
--- /dev/null
@@ -0,0 +1,2 @@
+Add :c:func:`PyBytes_Join(sep, iterable) <PyBytes_Join>` function, similar to
+``sep.join(iterable)`` in Python. Patch by Victor Stinner.
index e45323c93a17ef83e7a94f96e539b2866abbf24d..bc5fff54a62b6dac3abfd54797839a3764c12f57 100644 (file)
@@ -1283,7 +1283,7 @@ found:
         Py_CLEAR(res);
         goto end;
     }
-    Py_XSETREF(res, _PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), chunks));
+    Py_XSETREF(res, PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), chunks));
 
 end:
     LEAVE_BUFFERED(self)
@@ -1736,7 +1736,7 @@ _bufferedreader_read_all(buffered *self)
                 goto cleanup;
             }
             else {
-                tmp = _PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), chunks);
+                tmp = PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), chunks);
                 res = tmp;
                 goto cleanup;
             }
index 184e0b7d1aa7f1a12ca62e594cec1336fe0c79f9..419e5516b5c11ef9c6ff019ecaed0ac37dcc1501 100644 (file)
@@ -999,7 +999,7 @@ _io__RawIOBase_readall_impl(PyObject *self)
             return NULL;
         }
     }
-    result = _PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), chunks);
+    result = PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), chunks);
     Py_DECREF(chunks);
     return result;
 }
index 01420d1a10b1cff1ae936194b6647512e002a4dc..32f91af8dcf869e2ffc0aef37d8a46c8481d154f 100644 (file)
@@ -1287,7 +1287,7 @@ pattern_subx(_sremodulestate* module_state,
     }
     else {
         if (state.isbytes)
-            item = _PyBytes_Join(joiner, list);
+            item = PyBytes_Join(joiner, list);
         else
             item = PyUnicode_Join(joiner, list);
         Py_DECREF(joiner);
@@ -2918,7 +2918,7 @@ expand_template(TemplateObject *self, MatchObject *match)
     }
     else {
         Py_SET_SIZE(list, count);
-        result = _PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), list);
+        result = PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), list);
     }
 
 cleanup:
index 02294d8887abb786cf25bb11609642ed2baaa5bd..33903de14ba68d693da9b9646bdfe71b47b569e6 100644 (file)
@@ -37,8 +37,23 @@ bytes_resize(PyObject *Py_UNUSED(module), PyObject *args)
 }
 
 
+/* Test PyBytes_Join() */
+static PyObject *
+bytes_join(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    PyObject *sep, *iterable;
+    if (!PyArg_ParseTuple(args, "OO", &sep, &iterable)) {
+        return NULL;
+    }
+    NULLABLE(sep);
+    NULLABLE(iterable);
+    return PyBytes_Join(sep, iterable);
+}
+
+
 static PyMethodDef test_methods[] = {
     {"bytes_resize", bytes_resize, METH_VARARGS},
+    {"bytes_join", bytes_join, METH_VARARGS},
     {NULL},
 };
 
index e88b199d89f75801551acc1a7c56c5368c739d87..c467b242b4cfc24090e0ae7e4f4a303c0e51d93a 100644 (file)
@@ -1867,11 +1867,19 @@ bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
 }
 
 PyObject *
-_PyBytes_Join(PyObject *sep, PyObject *x)
+PyBytes_Join(PyObject *sep, PyObject *iterable)
 {
-    assert(sep != NULL && PyBytes_Check(sep));
-    assert(x != NULL);
-    return bytes_join((PyBytesObject*)sep, x);
+    if (sep == NULL) {
+        PyErr_BadInternalCall();
+        return NULL;
+    }
+    if (!PyBytes_Check(sep)) {
+        PyErr_Format(PyExc_TypeError,
+                     "sep: expected bytes, got %T", sep);
+        return NULL;
+    }
+
+    return stringlib_bytes_join(sep, iterable);
 }
 
 /*[clinic input]