gh-111140: Adds PyLong_AsNativeBytes and PyLong_FromNative[Unsigned]Bytes functions...

author Steve Dower <steve.dower@python.org>

Mon, 12 Feb 2024 20:13:13 +0000 (20:13 +0000)

committer GitHub <noreply@github.com>

Mon, 12 Feb 2024 20:13:13 +0000 (20:13 +0000)
author Steve Dower <steve.dower@python.org>
Mon, 12 Feb 2024 20:13:13 +0000 (20:13 +0000)
committer GitHub <noreply@github.com>
Mon, 12 Feb 2024 20:13:13 +0000 (20:13 +0000)
diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst

index f42e23db89ae399acc17cc81259ba5fa60e17c93..c39823e5e6787f0e14052e7e5ecc2161ee061d4a 100644 (file)
--- a/Doc/c-api/long.rst
+++ b/Doc/c-api/long.rst
@@ -113,6 +113,28 @@ distinguished from a number.  Use :c:func:`PyErr_Occurred` to disambiguate.
     retrieved from the resulting value using :c:func:`PyLong_AsVoidPtr`.
  
  
+.. c:function:: PyObject* PyLong_FromNativeBytes(const void* buffer, size_t n_bytes, int endianness)
+
+   Create a Python integer from the value contained in the first *n_bytes* of
+   *buffer*, interpreted as a two's-complement signed number.
+
+   *endianness* may be passed ``-1`` for the native endian that CPython was
+   compiled with, or else ``0`` for big endian and ``1`` for little.
+
+   .. versionadded:: 3.13
+
+
+.. c:function:: PyObject* PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n_bytes, int endianness)
+
+   Create a Python integer from the value contained in the first *n_bytes* of
+   *buffer*, interpreted as an unsigned number.
+
+   *endianness* may be passed ``-1`` for the native endian that CPython was
+   compiled with, or else ``0`` for big endian and ``1`` for little.
+
+   .. versionadded:: 3.13
+
+
  .. XXX alias PyLong_AS_LONG (for now)
  .. c:function:: long PyLong_AsLong(PyObject *obj)
  
@@ -332,6 +354,50 @@ distinguished from a number.  Use :c:func:`PyErr_Occurred` to disambiguate.
     Returns ``NULL`` on error.  Use :c:func:`PyErr_Occurred` to disambiguate.
  
  
+.. c:function:: Py_ssize_t PyLong_AsNativeBytes(PyObject *pylong, void* buffer, Py_ssize_t n_bytes, int endianness)
+
+   Copy the Python integer value to a native *buffer* of size *n_bytes*::
+
+      int value;
+      Py_ssize_t bytes = PyLong_CopyBits(v, &value, sizeof(value), -1);
+      if (bytes < 0) {
+          // Error occurred
+          return NULL;
+      }
+      else if (bytes > sizeof(value)) {
+          // Overflow occurred, but 'value' contains as much as could fit
+      }
+
+   *endianness* may be passed ``-1`` for the native endian that CPython was
+   compiled with, or ``0`` for big endian and ``1`` for little.
+
+   Return ``-1`` with an exception raised if *pylong* cannot be interpreted as
+   an integer. Otherwise, return the size of the buffer required to store the
+   value. If this is equal to or less than *n_bytes*, the entire value was
+   copied.
+
+   Unless an exception is raised, all *n_bytes* of the buffer will be written
+   with as much of the value as can fit. This allows the caller to ignore all
+   non-negative results if the intent is to match the typical behavior of a
+   C-style downcast.
+
+   Values are always copied as twos-complement, and sufficient size will be
+   requested for a sign bit. For example, this may cause an value that fits into
+   8 bytes when treated as unsigned to request 9 bytes, even though all eight
+   bytes were copied into the buffer. What has been omitted is the zero sign
+   bit, which is redundant when the intention is to treat the value as unsigned.
+
+   Passing *n_bytes* of zero will always return the requested buffer size.
+
+   .. note::
+
+      When the value does not fit in the provided buffer, the requested size
+      returned from the function may be larger than necessary. Passing 0 to this
+      function is not an accurate way to determine the bit length of a value.
+
+   .. versionadded:: 3.13
+
+
  .. c:function:: int PyUnstable_Long_IsCompact(const PyLongObject* op)
  
     Return 1 if *op* is compact, 0 otherwise.
diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst

index 191657061f74033fdd356024d0ac49bb572c7eb2..b96720df0a2f2d1b1b5e7ae43feced81811e7f61 100644 (file)
--- a/Doc/whatsnew/3.13.rst
+++ b/Doc/whatsnew/3.13.rst
@@ -587,6 +587,7 @@ Tier 2 IR by Mark Shannon and Guido van Rossum.
  Tier 2 optimizer by Ken Jin.)
  
  
+
  Deprecated
  ==========
  
@@ -1526,6 +1527,11 @@ New Features
  
    (Contributed by Victor Stinner and Petr Viktorin in :gh:`110850`.)
  
+* Add :c:func:`PyLong_AsNativeBytes`, :c:func:`PyLong_FromNativeBytes` and
+  :c:func:`PyLong_FromUnsignedNativeBytes` functions to simplify converting
+  between native integer types and Python :class:`int` objects.
+  (Contributed by Steve Dower in :gh:`111140`.)
+
  
  Porting to Python 3.13
  ----------------------
@@ -1585,7 +1591,6 @@ Porting to Python 3.13
    platforms, the ``HAVE_STDDEF_H`` macro is only defined on Windows.
    (Contributed by Victor Stinner in :gh:`108765`.)
  
-
  Deprecated
  ----------
  
diff --git a/Include/cpython/longobject.h b/Include/cpython/longobject.h

index fd1be29ed397d181bfd67cc155742b20a9c599f6..07251db6bcc203c506f9fd03bde9142f0ce7f955 100644 (file)
--- a/Include/cpython/longobject.h
+++ b/Include/cpython/longobject.h
@@ -4,6 +4,40 @@
  
  PyAPI_FUNC(PyObject*) PyLong_FromUnicodeObject(PyObject *u, int base);
  
+/* PyLong_AsNativeBytes: Copy the integer value to a native variable.
+   buffer points to the first byte of the variable.
+   n_bytes is the number of bytes available in the buffer. Pass 0 to request
+   the required size for the value.
+   endianness is -1 for native endian, 0 for big endian or 1 for little.
+   Big endian mode will write the most significant byte into the address
+   directly referenced by buffer; little endian will write the least significant
+   byte into that address.
+
+   If an exception is raised, returns a negative value.
+   Otherwise, returns the number of bytes that are required to store the value.
+   To check that the full value is represented, ensure that the return value is
+   equal or less than n_bytes.
+   All n_bytes are guaranteed to be written (unless an exception occurs), and
+   so ignoring a positive return value is the equivalent of a downcast in C.
+   In cases where the full value could not be represented, the returned value
+   may be larger than necessary - this function is not an accurate way to
+   calculate the bit length of an integer object.
+   */
+PyAPI_FUNC(Py_ssize_t) PyLong_AsNativeBytes(PyObject* v, void* buffer,
+    Py_ssize_t n_bytes, int endianness);
+
+/* PyLong_FromNativeBytes: Create an int value from a native integer
+   n_bytes is the number of bytes to read from the buffer. Passing 0 will
+   always produce the zero int.
+   PyLong_FromUnsignedNativeBytes always produces a non-negative int.
+   endianness is -1 for native endian, 0 for big endian or 1 for little.
+
+   Returns the int object, or NULL with an exception set. */
+PyAPI_FUNC(PyObject*) PyLong_FromNativeBytes(const void* buffer, size_t n_bytes,
+    int endianness);
+PyAPI_FUNC(PyObject*) PyLong_FromUnsignedNativeBytes(const void* buffer,
+    size_t n_bytes, int endianness);
+
  PyAPI_FUNC(int) PyUnstable_Long_IsCompact(const PyLongObject* op);
  PyAPI_FUNC(Py_ssize_t) PyUnstable_Long_CompactValue(const PyLongObject* op);
  
@@ -50,7 +84,7 @@ PyAPI_FUNC(PyObject *) _PyLong_FromByteArray(
  */
  PyAPI_FUNC(int) _PyLong_AsByteArray(PyLongObject* v,
      unsigned char* bytes, size_t n,
-    int little_endian, int is_signed);
+    int little_endian, int is_signed, int with_exceptions);
  
  /* For use by the gcd function in mathmodule.c */
  PyAPI_FUNC(PyObject *) _PyLong_GCD(PyObject *, PyObject *);
diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py

index 8e3ef25d1ff86fc3b471f98bf06ccf7dffb9791c..fc82cbfa66ea7aa98bc1eac465176b75a7c8087a 100644 (file)
--- a/Lib/test/test_capi/test_long.py
+++ b/Lib/test/test_capi/test_long.py
@@ -1,5 +1,6 @@
  import unittest
  import sys
+import test.support as support
  
  from test.support import import_helper
  
@@ -423,6 +424,150 @@ class LongTests(unittest.TestCase):
          self.assertRaises(OverflowError, asvoidptr, -2**1000)
          # CRASHES asvoidptr(NULL)
  
+    def test_long_asnativebytes(self):
+        import math
+        from _testcapi import (
+            pylong_asnativebytes as asnativebytes,
+            SIZE_MAX,
+        )
+
+        # Abbreviate sizeof(Py_ssize_t) to SZ because we use it a lot
+        SZ = int(math.ceil(math.log(SIZE_MAX + 1) / math.log(2)) / 8)
+        MAX_SSIZE = 2 ** (SZ * 8 - 1) - 1
+        MAX_USIZE = 2 ** (SZ * 8) - 1
+        if support.verbose:
+            print(f"SIZEOF_SIZE={SZ}\n{MAX_SSIZE=:016X}\n{MAX_USIZE=:016X}")
+
+        # These tests check that the requested buffer size is correct
+        for v, expect in [
+            (0, SZ),
+            (512, SZ),
+            (-512, SZ),
+            (MAX_SSIZE, SZ),
+            (MAX_USIZE, SZ + 1),
+            (-MAX_SSIZE, SZ),
+            (-MAX_USIZE, SZ + 1),
+            (2**255-1, 32),
+            (-(2**255-1), 32),
+            (2**256-1, 33),
+            (-(2**256-1), 33),
+        ]:
+            with self.subTest(f"sizeof-{v:X}"):
+                buffer = bytearray(1)
+                self.assertEqual(expect, asnativebytes(v, buffer, 0, -1),
+                    "PyLong_AsNativeBytes(v, NULL, 0, -1)")
+                # Also check via the __index__ path
+                self.assertEqual(expect, asnativebytes(Index(v), buffer, 0, -1),
+                    "PyLong_AsNativeBytes(Index(v), NULL, 0, -1)")
+
+        # We request as many bytes as `expect_be` contains, and always check
+        # the result (both big and little endian). We check the return value
+        # independently, since the buffer should always be filled correctly even
+        # if we need more bytes
+        for v, expect_be, expect_n in [
+            (0,         b'\x00',                1),
+            (0,         b'\x00' * 2,            2),
+            (0,         b'\x00' * 8,            min(8, SZ)),
+            (1,         b'\x01',                1),
+            (1,         b'\x00' * 10 + b'\x01', min(11, SZ)),
+            (42,        b'\x2a',                1),
+            (42,        b'\x00' * 10 + b'\x2a', min(11, SZ)),
+            (-1,        b'\xff',                1),
+            (-1,        b'\xff' * 10,           min(11, SZ)),
+            (-42,       b'\xd6',                1),
+            (-42,       b'\xff' * 10 + b'\xd6', min(11, SZ)),
+            # Extracts 255 into a single byte, but requests sizeof(Py_ssize_t)
+            (255,       b'\xff',                SZ),
+            (255,       b'\x00\xff',            2),
+            (256,       b'\x01\x00',            2),
+            # Extracts successfully (unsigned), but requests 9 bytes
+            (2**63,     b'\x80' + b'\x00' * 7,  9),
+            # "Extracts", but requests 9 bytes
+            (-2**63,    b'\x80' + b'\x00' * 7,  9),
+            (2**63,     b'\x00\x80' + b'\x00' * 7, 9),
+            (-2**63,    b'\xff\x80' + b'\x00' * 7, 9),
+
+            (2**255-1,      b'\x7f' + b'\xff' * 31,                 32),
+            (-(2**255-1),   b'\x80' + b'\x00' * 30 + b'\x01',       32),
+            # Request extra bytes, but result says we only needed 32
+            (-(2**255-1),   b'\xff\x80' + b'\x00' * 30 + b'\x01',   32),
+            (-(2**255-1),   b'\xff\xff\x80' + b'\x00' * 30 + b'\x01', 32),
+
+            # Extracting 256 bits of integer will request 33 bytes, but still
+            # copy as many bits as possible into the buffer. So we *can* copy
+            # into a 32-byte buffer, though negative number may be unrecoverable
+            (2**256-1,      b'\xff' * 32,                           33),
+            (2**256-1,      b'\x00' + b'\xff' * 32,                 33),
+            (-(2**256-1),   b'\x00' * 31 + b'\x01',                 33),
+            (-(2**256-1),   b'\xff' + b'\x00' * 31 + b'\x01',       33),
+            (-(2**256-1),   b'\xff\xff' + b'\x00' * 31 + b'\x01',   33),
+
+            # The classic "Windows HRESULT as negative number" case
+            #   HRESULT hr;
+            #   PyLong_CopyBits(<-2147467259>, &hr, sizeof(HRESULT))
+            #   assert(hr == E_FAIL)
+            (-2147467259, b'\x80\x00\x40\x05', 4),
+        ]:
+            with self.subTest(f"{v:X}-{len(expect_be)}bytes"):
+                n = len(expect_be)
+                buffer = bytearray(n)
+                expect_le = expect_be[::-1]
+
+                self.assertEqual(expect_n, asnativebytes(v, buffer, n, 0),
+                    f"PyLong_AsNativeBytes(v, buffer, {n}, <big>)")
+                self.assertEqual(expect_be, buffer[:n], "<big>")
+                self.assertEqual(expect_n, asnativebytes(v, buffer, n, 1),
+                    f"PyLong_AsNativeBytes(v, buffer, {n}, <little>)")
+                self.assertEqual(expect_le, buffer[:n], "<little>")
+
+        # Check a few error conditions. These are validated in code, but are
+        # unspecified in docs, so if we make changes to the implementation, it's
+        # fine to just update these tests rather than preserve the behaviour.
+        with self.assertRaises(SystemError):
+            asnativebytes(1, buffer, 0, 2)
+        with self.assertRaises(TypeError):
+            asnativebytes('not a number', buffer, 0, -1)
+
+    def test_long_fromnativebytes(self):
+        import math
+        from _testcapi import (
+            pylong_fromnativebytes as fromnativebytes,
+            SIZE_MAX,
+        )
+
+        # Abbreviate sizeof(Py_ssize_t) to SZ because we use it a lot
+        SZ = int(math.ceil(math.log(SIZE_MAX + 1) / math.log(2)) / 8)
+        MAX_SSIZE = 2 ** (SZ * 8 - 1) - 1
+        MAX_USIZE = 2 ** (SZ * 8) - 1
+
+        for v_be, expect_s, expect_u in [
+            (b'\x00', 0, 0),
+            (b'\x01', 1, 1),
+            (b'\xff', -1, 255),
+            (b'\x00\xff', 255, 255),
+            (b'\xff\xff', -1, 65535),
+        ]:
+            with self.subTest(f"{expect_s}-{expect_u:X}-{len(v_be)}bytes"):
+                n = len(v_be)
+                v_le = v_be[::-1]
+
+                self.assertEqual(expect_s, fromnativebytes(v_be, n, 0, 1),
+                    f"PyLong_FromNativeBytes(buffer, {n}, <big>)")
+                self.assertEqual(expect_s, fromnativebytes(v_le, n, 1, 1),
+                    f"PyLong_FromNativeBytes(buffer, {n}, <little>)")
+                self.assertEqual(expect_u, fromnativebytes(v_be, n, 0, 0),
+                    f"PyLong_FromUnsignedNativeBytes(buffer, {n}, <big>)")
+                self.assertEqual(expect_u, fromnativebytes(v_le, n, 1, 0),
+                    f"PyLong_FromUnsignedNativeBytes(buffer, {n}, <little>)")
+
+                # Check native endian when the result would be the same either
+                # way and we can test it.
+                if v_be == v_le:
+                    self.assertEqual(expect_s, fromnativebytes(v_be, n, -1, 1),
+                        f"PyLong_FromNativeBytes(buffer, {n}, <native>)")
+                    self.assertEqual(expect_u, fromnativebytes(v_be, n, -1, 0),
+                        f"PyLong_FromUnsignedNativeBytes(buffer, {n}, <native>)")
+
  
  if __name__ == "__main__":
      unittest.main()
diff --git a/Misc/NEWS.d/next/C API/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst b/Misc/NEWS.d/next/C API/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst

new file mode 100644 (file)

index 0000000..a8aa191
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst
@@ -0,0 +1,2 @@
+Adds :c:func:`PyLong_AsNativeBytes`, :c:func:`PyLong_FromNativeBytes` and
+:c:func:`PyLong_FromUnsignedNativeBytes` functions.
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c

index d794af8de2b8f021a5b9db09dcda7af940a66644..a3239ec0f529609aaea23b06b03a1d98112f554c 100644 (file)
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -2393,7 +2393,7 @@ textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
          return -1;
  
      if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
-                            PY_LITTLE_ENDIAN, 0) < 0) {
+                            PY_LITTLE_ENDIAN, 0, 1) < 0) {
          Py_DECREF(cookieLong);
          return -1;
      }
diff --git a/Modules/_pickle.c b/Modules/_pickle.c

index f210c0ca2059911d0339f7360e35cc9084158501..0d83261168185d98010d9a58be5b7dd95a4da5c7 100644 (file)
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -2162,7 +2162,8 @@ save_long(PicklerObject *self, PyObject *obj)
          pdata = (unsigned char *)PyBytes_AS_STRING(repr);
          i = _PyLong_AsByteArray((PyLongObject *)obj,
                                  pdata, nbytes,
-                                1 /* little endian */ , 1 /* signed */ );
+                                1 /* little endian */ , 1 /* signed */ ,
+                                1 /* with exceptions */);
          if (i < 0)
              goto error;
          /* If the int is negative, this may be a byte more than
diff --git a/Modules/_randommodule.c b/Modules/_randommodule.c

index 5481ed9b348ed7754a48d04218aee3f847b0ffb2..4463157d62248ddeebfb8e14c1220cb4d1eca17b 100644 (file)
--- a/Modules/_randommodule.c
+++ b/Modules/_randommodule.c
@@ -342,7 +342,8 @@ random_seed(RandomObject *self, PyObject *arg)
      res = _PyLong_AsByteArray((PyLongObject *)n,
                                (unsigned char *)key, keyused * 4,
                                PY_LITTLE_ENDIAN,
-                              0); /* unsigned */
+                              0, /* unsigned */
+                              1); /* with exceptions */
      if (res == -1) {
          goto Done;
      }
diff --git a/Modules/_sqlite/util.c b/Modules/_sqlite/util.c

index 833a666301d8ff9c979ba13a96b6caad2300d38f..9e8613ef67916e9662e930767a17393702e87537 100644 (file)
--- a/Modules/_sqlite/util.c
+++ b/Modules/_sqlite/util.c
@@ -162,7 +162,7 @@ _pysqlite_long_as_int64(PyObject * py_val)
          sqlite_int64 int64val;
          if (_PyLong_AsByteArray((PyLongObject *)py_val,
                                  (unsigned char *)&int64val, sizeof(int64val),
-                                IS_LITTLE_ENDIAN, 1 /* signed */) >= 0) {
+                                IS_LITTLE_ENDIAN, 1 /* signed */, 0) >= 0) {
              return int64val;
          }
      }
diff --git a/Modules/_struct.c b/Modules/_struct.c

index bd16fa89f189455c90624fed78b992cc979e7291..fa2cd37e003e0a2c4cfde6ef5c9520bc6196b3de 100644 (file)
--- a/Modules/_struct.c
+++ b/Modules/_struct.c
@@ -1000,9 +1000,10 @@ bp_longlong(_structmodulestate *state, char *p, PyObject *v, const formatdef *f)
                                (unsigned char *)p,
                                8,
                                0, /* little_endian */
-                              1  /* signed */);
+                              1, /* signed */
+                              0  /* !with_exceptions */);
      Py_DECREF(v);
-    if (res == -1 && PyErr_Occurred()) {
+    if (res < 0) {
          PyErr_Format(state->StructError,
                       "'%c' format requires %lld <= number <= %lld",
                       f->format,
@@ -1024,9 +1025,10 @@ bp_ulonglong(_structmodulestate *state, char *p, PyObject *v, const formatdef *f
                                (unsigned char *)p,
                                8,
                                0, /* little_endian */
-                              0  /* signed */);
+                              0, /* signed */
+                              0  /* !with_exceptions */);
      Py_DECREF(v);
-    if (res == -1 && PyErr_Occurred()) {
+    if (res < 0) {
          PyErr_Format(state->StructError,
                       "'%c' format requires 0 <= number <= %llu",
                       f->format,
@@ -1260,9 +1262,10 @@ lp_longlong(_structmodulestate *state, char *p, PyObject *v, const formatdef *f)
                                (unsigned char *)p,
                                8,
                                1, /* little_endian */
-                              1  /* signed */);
+                              1, /* signed */
+                              0  /* !with_exceptions */);
      Py_DECREF(v);
-    if (res == -1 && PyErr_Occurred()) {
+    if (res < 0) {
          PyErr_Format(state->StructError,
                       "'%c' format requires %lld <= number <= %lld",
                       f->format,
@@ -1284,9 +1287,10 @@ lp_ulonglong(_structmodulestate *state, char *p, PyObject *v, const formatdef *f
                                (unsigned char *)p,
                                8,
                                1, /* little_endian */
-                              0  /* signed */);
+                              0, /* signed */
+                              0  /* !with_exceptions */);
      Py_DECREF(v);
-    if (res == -1 && PyErr_Occurred()) {
+    if (res < 0) {
          PyErr_Format(state->StructError,
                       "'%c' format requires 0 <= number <= %llu",
                       f->format,
diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c

index 32ad8d32ab852395ea8ab488cf71d83e0e6407ae..dc21cf9f47522895dd0c02e3418b7725c526f995 100644 (file)
--- a/Modules/_testcapi/long.c
+++ b/Modules/_testcapi/long.c
@@ -776,6 +776,51 @@ pylong_asvoidptr(PyObject *module, PyObject *arg)
      return Py_NewRef((PyObject *)value);
  }
  
+static PyObject *
+pylong_asnativebytes(PyObject *module, PyObject *args)
+{
+    PyObject *v;
+    Py_buffer buffer;
+    Py_ssize_t n, endianness;
+    if (!PyArg_ParseTuple(args, "Ow*nn", &v, &buffer, &n, &endianness)) {
+        return NULL;
+    }
+    if (buffer.readonly) {
+        PyErr_SetString(PyExc_TypeError, "buffer must be writable");
+        PyBuffer_Release(&buffer);
+        return NULL;
+    }
+    if (buffer.len < n) {
+        PyErr_SetString(PyExc_ValueError, "buffer must be at least 'n' bytes");
+        PyBuffer_Release(&buffer);
+        return NULL;
+    }
+    Py_ssize_t res = PyLong_AsNativeBytes(v, buffer.buf, n, (int)endianness);
+    PyBuffer_Release(&buffer);
+    return res >= 0 ? PyLong_FromSsize_t(res) : NULL;
+}
+
+static PyObject *
+pylong_fromnativebytes(PyObject *module, PyObject *args)
+{
+    Py_buffer buffer;
+    Py_ssize_t n, endianness, signed_;
+    if (!PyArg_ParseTuple(args, "y*nnn", &buffer, &n, &endianness, &signed_)) {
+        return NULL;
+    }
+    if (buffer.len < n) {
+        PyErr_SetString(PyExc_ValueError, "buffer must be at least 'n' bytes");
+        PyBuffer_Release(&buffer);
+        return NULL;
+    }
+    PyObject *res = signed_
+        ? PyLong_FromNativeBytes(buffer.buf, n, (int)endianness)
+        : PyLong_FromUnsignedNativeBytes(buffer.buf, n, (int)endianness);
+    PyBuffer_Release(&buffer);
+    return res;
+}
+
+
  static PyMethodDef test_methods[] = {
      _TESTCAPI_TEST_LONG_AND_OVERFLOW_METHODDEF
      _TESTCAPI_TEST_LONG_API_METHODDEF
@@ -804,6 +849,8 @@ static PyMethodDef test_methods[] = {
      {"pylong_as_size_t",            pylong_as_size_t,           METH_O},
      {"pylong_asdouble",             pylong_asdouble,            METH_O},
      {"pylong_asvoidptr",            pylong_asvoidptr,           METH_O},
+    {"pylong_asnativebytes",        pylong_asnativebytes,       METH_VARARGS},
+    {"pylong_fromnativebytes",      pylong_fromnativebytes,     METH_VARARGS},
      {NULL},
  };
  
@@ -813,6 +860,5 @@ _PyTestCapi_Init_Long(PyObject *mod)
      if (PyModule_AddFunctions(mod, test_methods) < 0) {
          return -1;
      }
-
      return 0;
  }
diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c

index f6181168a85ae15437c177aabeda47abcfcf7f33..e3789867dc085fde321effba85dca17e198dabd3 100644 (file)
--- a/Modules/_tkinter.c
+++ b/Modules/_tkinter.c
@@ -926,7 +926,8 @@ AsObj(PyObject *value)
                                  (unsigned char *)(void *)&wideValue,
                                  sizeof(wideValue),
                                  PY_LITTLE_ENDIAN,
-                                /* signed */ 1) == 0) {
+                                /* signed */ 1,
+                                /* with_exceptions */ 1) == 0) {
              return Tcl_NewWideIntObj(wideValue);
          }
          PyErr_Clear();
diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c

index 5d3c16a98423baaa7dc8da532a9c6225d8c430e6..2125da437963d2deffb3227b85f053408b0356f5 100644 (file)
--- a/Modules/cjkcodecs/multibytecodec.c
+++ b/Modules/cjkcodecs/multibytecodec.c
@@ -973,7 +973,8 @@ _multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEn
  
      if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
                              1 /* little-endian */ ,
-                            0 /* unsigned */ ) < 0) {
+                            0 /* unsigned */ ,
+                            1 /* with_exceptions */) < 0) {
          goto errorexit;
      }
  
@@ -1255,7 +1256,8 @@ _multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDe
  
      if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
                              1 /* little-endian */ ,
-                            0 /* unsigned */ ) < 0) {
+                            0 /* unsigned */ ,
+                            1 /* with_exceptions */) < 0) {
          return NULL;
      }
  
diff --git a/Objects/longobject.c b/Objects/longobject.c

index e655ba19e8f1c12d7831a5c138374fcb7a48e991..932111f58425f227e0e8acf52edcf2829ae6bc9b 100644 (file)
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -928,7 +928,8 @@ _PyLong_FromByteArray(const unsigned char* bytes, size_t n,
  int
  _PyLong_AsByteArray(PyLongObject* v,
                      unsigned char* bytes, size_t n,
-                    int little_endian, int is_signed)
+                    int little_endian, int is_signed,
+                    int with_exceptions)
  {
      Py_ssize_t i;               /* index into v->long_value.ob_digit */
      Py_ssize_t ndigits;         /* number of digits */
@@ -945,8 +946,10 @@ _PyLong_AsByteArray(PyLongObject* v,
      ndigits = _PyLong_DigitCount(v);
      if (_PyLong_IsNegative(v)) {
          if (!is_signed) {
-            PyErr_SetString(PyExc_OverflowError,
-                            "can't convert negative int to unsigned");
+            if (with_exceptions) {
+                PyErr_SetString(PyExc_OverflowError,
+                                "can't convert negative int to unsigned");
+            }
              return -1;
          }
          do_twos_comp = 1;
@@ -967,7 +970,12 @@ _PyLong_AsByteArray(PyLongObject* v,
      /* Copy over all the Python digits.
         It's crucial that every Python digit except for the MSD contribute
         exactly PyLong_SHIFT bits to the total, so first assert that the int is
-       normalized. */
+       normalized.
+       NOTE: PyLong_AsNativeBytes() assumes that this function will fill in 'n'
+       bytes even if it eventually fails to convert the whole number. Make sure
+       you account for that if you are changing this algorithm to return without
+       doing that.
+       */
      assert(ndigits == 0 || v->long_value.ob_digit[ndigits - 1] != 0);
      j = 0;
      accum = 0;
@@ -1052,11 +1060,203 @@ _PyLong_AsByteArray(PyLongObject* v,
      return 0;
  
    Overflow:
-    PyErr_SetString(PyExc_OverflowError, "int too big to convert");
+    if (with_exceptions) {
+        PyErr_SetString(PyExc_OverflowError, "int too big to convert");
+    }
      return -1;
  
  }
  
+// Refactored out for readability, not reuse
+static inline int
+_fits_in_n_bits(Py_ssize_t v, Py_ssize_t n)
+{
+    if (n >= (Py_ssize_t)sizeof(Py_ssize_t) * 8) {
+        return 1;
+    }
+    // If all bits above n are the same, we fit.
+    // (Use n-1 if we require the sign bit to be consistent.)
+    Py_ssize_t v_extended = v >> ((int)n - 1);
+    return v_extended == 0 || v_extended == -1;
+}
+
+static inline int
+_resolve_endianness(int *endianness)
+{
+    if (*endianness < 0) {
+        *endianness = PY_LITTLE_ENDIAN;
+    }
+    if (*endianness != 0 && *endianness != 1) {
+        PyErr_SetString(PyExc_SystemError, "invalid 'endianness' value");
+        return -1;
+    }
+    return 0;
+}
+
+Py_ssize_t
+PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness)
+{
+    PyLongObject *v;
+    union {
+        Py_ssize_t v;
+        unsigned char b[sizeof(Py_ssize_t)];
+    } cv;
+    int do_decref = 0;
+    Py_ssize_t res = 0;
+
+    if (vv == NULL || n < 0) {
+        PyErr_BadInternalCall();
+        return -1;
+    }
+
+    int little_endian = endianness;
+    if (_resolve_endianness(&little_endian) < 0) {
+        return -1;
+    }
+
+    if (PyLong_Check(vv)) {
+        v = (PyLongObject *)vv;
+    }
+    else {
+        v = (PyLongObject *)_PyNumber_Index(vv);
+        if (v == NULL) {
+            return -1;
+        }
+        do_decref = 1;
+    }
+
+    if (_PyLong_IsCompact(v)) {
+        res = 0;
+        cv.v = _PyLong_CompactValue(v);
+        /* Most paths result in res = sizeof(compact value). Only the case
+         * where 0 < n < sizeof(compact value) do we need to check and adjust
+         * our return value. */
+        res = sizeof(cv.b);
+        if (n <= 0) {
+            // nothing to do!
+        }
+        else if (n <= sizeof(cv.b)) {
+#if PY_LITTLE_ENDIAN
+            if (little_endian) {
+                memcpy(buffer, cv.b, n);
+            }
+            else {
+                for (Py_ssize_t i = 0; i < n; ++i) {
+                    ((unsigned char*)buffer)[n - i - 1] = cv.b[i];
+                }
+            }
+#else
+            if (little_endian) {
+                for (Py_ssize_t i = 0; i < n; ++i) {
+                    ((unsigned char*)buffer)[i] = cv.b[sizeof(cv.b) - i - 1];
+                }
+            }
+            else {
+                memcpy(buffer, &cv.b[sizeof(cv.b) - n], n);
+            }
+#endif
+
+            /* If we fit, return the requested number of bytes */
+            if (_fits_in_n_bits(cv.v, n * 8)) {
+                res = n;
+            }
+        }
+        else {
+            unsigned char fill = cv.v < 0 ? 0xFF : 0x00;
+#if PY_LITTLE_ENDIAN
+            if (little_endian) {
+                memcpy(buffer, cv.b, sizeof(cv.b));
+                memset((char *)buffer + sizeof(cv.b), fill, n - sizeof(cv.b));
+            }
+            else {
+                unsigned char *b = (unsigned char *)buffer;
+                for (Py_ssize_t i = 0; i < n - (int)sizeof(cv.b); ++i) {
+                    *b++ = fill;
+                }
+                for (Py_ssize_t i = sizeof(cv.b); i > 0; --i) {
+                    *b++ = cv.b[i - 1];
+                }
+            }
+#else
+            if (little_endian) {
+                unsigned char *b = (unsigned char *)buffer;
+                for (Py_ssize_t i = sizeof(cv.b); i > 0; --i) {
+                    *b++ = cv.b[i - 1];
+                }
+                for (Py_ssize_t i = 0; i < n - sizeof(cv.b); ++i) {
+                    *b++ = fill;
+                }
+            }
+            else {
+                memset(buffer, fill, n - sizeof(cv.b));
+                memcpy((char *)buffer + n - sizeof(cv.b), cv.b, sizeof(cv.b));
+            }
+#endif
+        }
+    }
+    else {
+        if (n > 0) {
+            _PyLong_AsByteArray(v, buffer, (size_t)n, little_endian, 1, 0);
+        }
+
+        // More efficient calculation for number of bytes required?
+        size_t nb = _PyLong_NumBits((PyObject *)v);
+        /* Normally this would be((nb - 1) / 8) + 1 to avoid rounding up
+         * multiples of 8 to the next byte, but we add an implied bit for
+         * the sign and it cancels out. */
+        size_t n_needed = (nb / 8) + 1;
+        res = (Py_ssize_t)n_needed;
+        if ((size_t)res != n_needed) {
+            PyErr_SetString(PyExc_OverflowError,
+                "value too large to convert");
+            res = -1;
+        }
+    }
+
+    if (do_decref) {
+        Py_DECREF(v);
+    }
+
+    return res;
+}
+
+
+PyObject *
+PyLong_FromNativeBytes(const void* buffer, size_t n, int endianness)
+{
+    if (!buffer) {
+        PyErr_BadInternalCall();
+        return NULL;
+    }
+
+    int little_endian = endianness;
+    if (_resolve_endianness(&little_endian) < 0) {
+        return NULL;
+    }
+
+    return _PyLong_FromByteArray((const unsigned char *)buffer, n,
+                                 little_endian, 1);
+}
+
+
+PyObject *
+PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n, int endianness)
+{
+    if (!buffer) {
+        PyErr_BadInternalCall();
+        return NULL;
+    }
+
+    int little_endian = endianness;
+    if (_resolve_endianness(&little_endian) < 0) {
+        return NULL;
+    }
+
+    return _PyLong_FromByteArray((const unsigned char *)buffer, n,
+                                 little_endian, 0);
+}
+
+
  /* Create a new int object from a C pointer */
  
  PyObject *
@@ -1231,7 +1431,7 @@ PyLong_AsLongLong(PyObject *vv)
      }
      else {
          res = _PyLong_AsByteArray((PyLongObject *)v, (unsigned char *)&bytes,
-                                  SIZEOF_LONG_LONG, PY_LITTLE_ENDIAN, 1);
+                                  SIZEOF_LONG_LONG, PY_LITTLE_ENDIAN, 1, 1);
      }
      if (do_decref) {
          Py_DECREF(v);
@@ -1270,7 +1470,7 @@ PyLong_AsUnsignedLongLong(PyObject *vv)
      }
      else {
          res = _PyLong_AsByteArray((PyLongObject *)vv, (unsigned char *)&bytes,
-                              SIZEOF_LONG_LONG, PY_LITTLE_ENDIAN, 0);
+                              SIZEOF_LONG_LONG, PY_LITTLE_ENDIAN, 0, 1);
      }
  
      /* Plan 9 can't handle long long in ? : expressions */
@@ -6068,7 +6268,7 @@ int_to_bytes_impl(PyObject *self, Py_ssize_t length, PyObject *byteorder,
  
      if (_PyLong_AsByteArray((PyLongObject *)self,
                              (unsigned char *)PyBytes_AS_STRING(bytes),
-                            length, little_endian, is_signed) < 0) {
+                            length, little_endian, is_signed, 1) < 0) {
          Py_DECREF(bytes);
          return NULL;
      }
author	Steve Dower <steve.dower@python.org>
	Mon, 12 Feb 2024 20:13:13 +0000 (20:13 +0000)
committer	GitHub <noreply@github.com>
	Mon, 12 Feb 2024 20:13:13 +0000 (20:13 +0000)
Doc/c-api/long.rst		patch \| blob \| blame \| history
Doc/whatsnew/3.13.rst		patch \| blob \| blame \| history
Include/cpython/longobject.h		patch \| blob \| blame \| history
Lib/test/test_capi/test_long.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/C API/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst	[new file with mode: 0644]	patch \| blob
Modules/_io/textio.c		patch \| blob \| blame \| history
Modules/_pickle.c		patch \| blob \| blame \| history
Modules/_randommodule.c		patch \| blob \| blame \| history
Modules/_sqlite/util.c		patch \| blob \| blame \| history
Modules/_struct.c		patch \| blob \| blame \| history
Modules/_testcapi/long.c		patch \| blob \| blame \| history
Modules/_tkinter.c		patch \| blob \| blame \| history
Modules/cjkcodecs/multibytecodec.c		patch \| blob \| blame \| history
Objects/longobject.c		patch \| blob \| blame \| history