gh-111140: PyLong_From/AsNativeBytes: Take *flags* rather than just *endianness*...

author Steve Dower <steve.dower@python.org>

Fri, 5 Apr 2024 14:21:16 +0000 (15:21 +0100)

committer GitHub <noreply@github.com>

Fri, 5 Apr 2024 14:21:16 +0000 (16:21 +0200)
author Steve Dower <steve.dower@python.org>
Fri, 5 Apr 2024 14:21:16 +0000 (15:21 +0100)
committer GitHub <noreply@github.com>
Fri, 5 Apr 2024 14:21:16 +0000 (16:21 +0200)
diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst

index 6a7eba7761de1ae1e5ff64bf9a36611a3f4aa83f..1eb8f191c3ca327a04b81b2486be0fbe896016be 100644 (file)
--- a/Doc/c-api/long.rst
+++ b/Doc/c-api/long.rst
@@ -113,24 +113,28 @@ distinguished from a number.  Use :c:func:`PyErr_Occurred` to disambiguate.
     retrieved from the resulting value using :c:func:`PyLong_AsVoidPtr`.
  
  
-.. c:function:: PyObject* PyLong_FromNativeBytes(const void* buffer, size_t n_bytes, int endianness)
+.. c:function:: PyObject* PyLong_FromNativeBytes(const void* buffer, size_t n_bytes, int flags)
  
     Create a Python integer from the value contained in the first *n_bytes* of
     *buffer*, interpreted as a two's-complement signed number.
  
-   *endianness* may be passed ``-1`` for the native endian that CPython was
-   compiled with, or else ``0`` for big endian and ``1`` for little.
+   *flags* are as for :c:func:`PyLong_AsNativeBytes`. Passing ``-1`` will select
+   the native endian that CPython was compiled with and assume that the
+   most-significant bit is a sign bit. Passing
+   ``Py_ASNATIVEBYTES_UNSIGNED_BUFFER`` will produce the same result as calling
+   :c:func:`PyLong_FromUnsignedNativeBytes`. Other flags are ignored.
  
     .. versionadded:: 3.13
  
  
-.. c:function:: PyObject* PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n_bytes, int endianness)
+.. c:function:: PyObject* PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n_bytes, int flags)
  
     Create a Python integer from the value contained in the first *n_bytes* of
     *buffer*, interpreted as an unsigned number.
  
-   *endianness* may be passed ``-1`` for the native endian that CPython was
-   compiled with, or else ``0`` for big endian and ``1`` for little.
+   *flags* are as for :c:func:`PyLong_AsNativeBytes`. Passing ``-1`` will select
+   the native endian that CPython was compiled with and assume that the
+   most-significant bit is not a sign bit. Flags other than endian are ignored.
  
     .. versionadded:: 3.13
  
@@ -354,14 +358,41 @@ distinguished from a number.  Use :c:func:`PyErr_Occurred` to disambiguate.
     Returns ``NULL`` on error.  Use :c:func:`PyErr_Occurred` to disambiguate.
  
  
-.. c:function:: Py_ssize_t PyLong_AsNativeBytes(PyObject *pylong, void* buffer, Py_ssize_t n_bytes, int endianness)
+.. c:function:: Py_ssize_t PyLong_AsNativeBytes(PyObject *pylong, void* buffer, Py_ssize_t n_bytes, int flags)
  
-   Copy the Python integer value to a native *buffer* of size *n_bytes*::
+   Copy the Python integer value *pylong* to a native *buffer* of size
+   *n_bytes*. The *flags* can be set to ``-1`` to behave similarly to a C cast,
+   or to values documented below to control the behavior.
+
+   Returns ``-1`` with an exception raised on error.  This may happen if
+   *pylong* cannot be interpreted as an integer, or if *pylong* was negative
+   and the ``Py_ASNATIVEBYTES_REJECT_NEGATIVE`` flag was set.
+
+   Otherwise, returns the number of bytes required to store the value.
+   If this is equal to or less than *n_bytes*, the entire value was copied.
+   All *n_bytes* of the buffer are written: large buffers are padded with
+   zeroes.
+
+   If the returned value is greater than than *n_bytes*, the value was
+   truncated: as many of the lowest bits of the value as could fit are written,
+   and the higher bits are ignored. This matches the typical behavior
+   of a C-style downcast.
+
+   .. note::
+
+      Overflow is not considered an error. If the returned value
+      is larger than *n_bytes*, most significant bits were discarded.
+
+   ``0`` will never be returned.
+
+   Values are always copied as two's-complement.
+
+   Usage example::
  
        int32_t value;
        Py_ssize_t bytes = PyLong_AsNativeBits(pylong, &value, sizeof(value), -1);
        if (bytes < 0) {
-          // A Python exception was set with the reason.
+          // Failed. A Python exception was set with the reason.
            return NULL;
        }
        else if (bytes <= (Py_ssize_t)sizeof(value)) {
@@ -372,19 +403,24 @@ distinguished from a number.  Use :c:func:`PyErr_Occurred` to disambiguate.
            // lowest bits of pylong.
        }
  
-   The above example may look *similar* to
-   :c:func:`PyLong_As* <PyLong_AsSize_t>`
-   but instead fills in a specific caller defined type and never raises an
-   error about of the :class:`int` *pylong*'s value regardless of *n_bytes*
-   or the returned byte count.
+   Passing zero to *n_bytes* will return the size of a buffer that would
+   be large enough to hold the value. This may be larger than technically
+   necessary, but not unreasonably so.
  
-   To get at the entire potentially big Python value, this can be used to
-   reserve enough space and copy it::
+   .. note::
+
+      Passing *n_bytes=0* to this function is not an accurate way to determine
+      the bit length of a value.
+
+   If *n_bytes=0*, *buffer* may be ``NULL``.
+
+   To get at the entire Python value of an unknown size, the function can be
+   called twice: first to determine the buffer size, then to fill it::
  
        // Ask how much space we need.
        Py_ssize_t expected = PyLong_AsNativeBits(pylong, NULL, 0, -1);
        if (expected < 0) {
-          // A Python exception was set with the reason.
+          // Failed. A Python exception was set with the reason.
            return NULL;
        }
        assert(expected != 0);  // Impossible per the API definition.
@@ -395,11 +431,11 @@ distinguished from a number.  Use :c:func:`PyErr_Occurred` to disambiguate.
        }
        // Safely get the entire value.
        Py_ssize_t bytes = PyLong_AsNativeBits(pylong, bignum, expected, -1);
-      if (bytes < 0) {  // Exception set.
+      if (bytes < 0) {  // Exception has been set.
            free(bignum);
            return NULL;
        }
-      else if (bytes > expected) {  // Be safe, should not be possible.
+      else if (bytes > expected) {  // This should not be possible.
            PyErr_SetString(PyExc_RuntimeError,
                "Unexpected bignum truncation after a size check.");
            free(bignum);
@@ -409,35 +445,51 @@ distinguished from a number.  Use :c:func:`PyErr_Occurred` to disambiguate.
        // ... use bignum ...
        free(bignum);
  
-   *endianness* may be passed ``-1`` for the native endian that CPython was
-   compiled with, or ``0`` for big endian and ``1`` for little.
-
-   Returns ``-1`` with an exception raised if *pylong* cannot be interpreted as
-   an integer. Otherwise, return the size of the buffer required to store the
-   value. If this is equal to or less than *n_bytes*, the entire value was
-   copied. ``0`` will never be returned.
-
-   Unless an exception is raised, all *n_bytes* of the buffer will always be
-   written. In the case of truncation, as many of the lowest bits of the value
-   as could fit are written. This allows the caller to ignore all non-negative
-   results if the intent is to match the typical behavior of a C-style
-   downcast. No exception is set on truncation.
-
-   Values are always copied as two's-complement and sufficient buffer will be
-   requested to include a sign bit. For example, this may cause an value that
-   fits into 8 bytes when treated as unsigned to request 9 bytes, even though
-   all eight bytes were copied into the buffer. What has been omitted is the
-   zero sign bit -- redundant if the caller's intention is to treat the value
-   as unsigned.
-
-   Passing zero to *n_bytes* will return the size of a buffer that would
-   be large enough to hold the value. This may be larger than technically
-   necessary, but not unreasonably so.
+   *flags* is either ``-1`` (``Py_ASNATIVEBYTES_DEFAULTS``) to select defaults
+   that behave most like a C cast, or a combintation of the other flags in
+   the table below.
+   Note that ``-1`` cannot be combined with other flags.
+
+   Currently, ``-1`` corresponds to
+   ``Py_ASNATIVEBYTES_NATIVE_ENDIAN | Py_ASNATIVEBYTES_UNSIGNED_BUFFER``.
+
+   ============================================= ======
+   Flag                                          Value
+   ============================================= ======
+   .. c:macro:: Py_ASNATIVEBYTES_DEFAULTS        ``-1``
+   .. c:macro:: Py_ASNATIVEBYTES_BIG_ENDIAN      ``0``
+   .. c:macro:: Py_ASNATIVEBYTES_LITTLE_ENDIAN   ``1``
+   .. c:macro:: Py_ASNATIVEBYTES_NATIVE_ENDIAN   ``3``
+   .. c:macro:: Py_ASNATIVEBYTES_UNSIGNED_BUFFER ``4``
+   .. c:macro:: Py_ASNATIVEBYTES_REJECT_NEGATIVE ``8``
+   ============================================= ======
+
+   Specifying ``Py_ASNATIVEBYTES_NATIVE_ENDIAN`` will override any other endian
+   flags. Passing ``2`` is reserved.
+
+   By default, sufficient buffer will be requested to include a sign bit.
+   For example, when converting 128 with *n_bytes=1*, the function will return
+   2 (or more) in order to store a zero sign bit.
+
+   If ``Py_ASNATIVEBYTES_UNSIGNED_BUFFER`` is specified, a zero sign bit
+   will be omitted from size calculations. This allows, for example, 128 to fit
+   in a single-byte buffer. If the destination buffer is later treated as
+   signed, a positive input value may become negative.
+   Note that the flag does not affect handling of negative values: for those,
+   space for a sign bit is always requested.
+
+   Specifying ``Py_ASNATIVEBYTES_REJECT_NEGATIVE`` causes an exception to be set
+   if *pylong* is negative. Without this flag, negative values will be copied
+   provided there is enough space for at least one sign bit, regardless of
+   whether ``Py_ASNATIVEBYTES_UNSIGNED_BUFFER`` was specified.
  
     .. note::
  
-      Passing *n_bytes=0* to this function is not an accurate way to determine
-      the bit length of a value.
+      With the default *flags* (``-1``, or *UNSIGNED_BUFFER*  without
+      *REJECT_NEGATIVE*), multiple Python integers can map to a single value
+      without overflow. For example, both ``255`` and ``-1`` fit a single-byte
+      buffer and set all its bits.
+      This matches typical C cast behavior.
  
     .. versionadded:: 3.13
  
diff --git a/Include/cpython/longobject.h b/Include/cpython/longobject.h

index 07251db6bcc203c506f9fd03bde9142f0ce7f955..189229ee1035d837ffefbf68b81b74815cc3d182 100644 (file)
--- a/Include/cpython/longobject.h
+++ b/Include/cpython/longobject.h
@@ -4,11 +4,24 @@
  
  PyAPI_FUNC(PyObject*) PyLong_FromUnicodeObject(PyObject *u, int base);
  
+#define Py_ASNATIVEBYTES_DEFAULTS -1
+#define Py_ASNATIVEBYTES_BIG_ENDIAN 0
+#define Py_ASNATIVEBYTES_LITTLE_ENDIAN 1
+#define Py_ASNATIVEBYTES_NATIVE_ENDIAN 3
+#define Py_ASNATIVEBYTES_UNSIGNED_BUFFER 4
+#define Py_ASNATIVEBYTES_REJECT_NEGATIVE 8
+
  /* PyLong_AsNativeBytes: Copy the integer value to a native variable.
     buffer points to the first byte of the variable.
     n_bytes is the number of bytes available in the buffer. Pass 0 to request
     the required size for the value.
-   endianness is -1 for native endian, 0 for big endian or 1 for little.
+   flags is a bitfield of the following flags:
+   * 1 - little endian
+   * 2 - native endian
+   * 4 - unsigned destination (e.g. don't reject copying 255 into one byte)
+   * 8 - raise an exception for negative inputs
+   If flags is -1 (all bits set), native endian is used and value truncation
+   behaves most like C (allows negative inputs and allow MSB set).
     Big endian mode will write the most significant byte into the address
     directly referenced by buffer; little endian will write the least significant
     byte into that address.
@@ -24,19 +37,20 @@ PyAPI_FUNC(PyObject*) PyLong_FromUnicodeObject(PyObject *u, int base);
     calculate the bit length of an integer object.
     */
  PyAPI_FUNC(Py_ssize_t) PyLong_AsNativeBytes(PyObject* v, void* buffer,
-    Py_ssize_t n_bytes, int endianness);
+    Py_ssize_t n_bytes, int flags);
  
  /* PyLong_FromNativeBytes: Create an int value from a native integer
     n_bytes is the number of bytes to read from the buffer. Passing 0 will
     always produce the zero int.
     PyLong_FromUnsignedNativeBytes always produces a non-negative int.
-   endianness is -1 for native endian, 0 for big endian or 1 for little.
+   flags is the same as for PyLong_AsNativeBytes, but only supports selecting
+   the endianness or forcing an unsigned buffer.
  
     Returns the int object, or NULL with an exception set. */
  PyAPI_FUNC(PyObject*) PyLong_FromNativeBytes(const void* buffer, size_t n_bytes,
-    int endianness);
+    int flags);
  PyAPI_FUNC(PyObject*) PyLong_FromUnsignedNativeBytes(const void* buffer,
-    size_t n_bytes, int endianness);
+    size_t n_bytes, int flags);
  
  PyAPI_FUNC(int) PyUnstable_Long_IsCompact(const PyLongObject* op);
  PyAPI_FUNC(Py_ssize_t) PyUnstable_Long_CompactValue(const PyLongObject* op);
diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py

index d2140154d811b4d777ef78ddabe259c8e5fb8906..83f894e552f9832dcb67514f009bbb2c59ad1f9e 100644 (file)
--- a/Lib/test/test_capi/test_long.py
+++ b/Lib/test/test_capi/test_long.py
@@ -483,8 +483,12 @@ class LongTests(unittest.TestCase):
              (-MAX_USIZE, SZ + 1),
              (2**255-1, 32),
              (-(2**255-1), 32),
+            (2**255, 33),
+            (-(2**255), 33), # if you ask, we'll say 33, but 32 would do
              (2**256-1, 33),
              (-(2**256-1), 33),
+            (2**256, 33),
+            (-(2**256), 33),
          ]:
              with self.subTest(f"sizeof-{v:X}"):
                  buffer = bytearray(b"\x5a")
@@ -523,15 +527,17 @@ class LongTests(unittest.TestCase):
              (-1,        b'\xff' * 10,           min(11, SZ)),
              (-42,       b'\xd6',                1),
              (-42,       b'\xff' * 10 + b'\xd6', min(11, SZ)),
-            # Extracts 255 into a single byte, but requests sizeof(Py_ssize_t)
-            (255,       b'\xff',                SZ),
+            # Extracts 255 into a single byte, but requests 2
+            # (this is currently a special case, and "should" request SZ)
+            (255,       b'\xff',                2),
              (255,       b'\x00\xff',            2),
              (256,       b'\x01\x00',            2),
+            (0x80,      b'\x00' * 7 + b'\x80',  min(8, SZ)),
              # Extracts successfully (unsigned), but requests 9 bytes
              (2**63,     b'\x80' + b'\x00' * 7,  9),
-            # "Extracts", but requests 9 bytes
-            (-2**63,    b'\x80' + b'\x00' * 7,  9),
              (2**63,     b'\x00\x80' + b'\x00' * 7, 9),
+            # Extracts into 8 bytes, but if you provide 9 we'll say 9
+            (-2**63,    b'\x80' + b'\x00' * 7,  8),
              (-2**63,    b'\xff\x80' + b'\x00' * 7, 9),
  
              (2**255-1,      b'\x7f' + b'\xff' * 31,                 32),
@@ -548,10 +554,15 @@ class LongTests(unittest.TestCase):
              (-(2**256-1),   b'\x00' * 31 + b'\x01',                 33),
              (-(2**256-1),   b'\xff' + b'\x00' * 31 + b'\x01',       33),
              (-(2**256-1),   b'\xff\xff' + b'\x00' * 31 + b'\x01',   33),
+            # However, -2**255 precisely will extract into 32 bytes and return
+            # success. For bigger buffers, it will still succeed, but will
+            # return 33
+            (-(2**255),     b'\x80' + b'\x00' * 31,                 32),
+            (-(2**255),     b'\xff\x80' + b'\x00' * 31,             33),
  
              # The classic "Windows HRESULT as negative number" case
              #   HRESULT hr;
-            #   PyLong_CopyBits(<-2147467259>, &hr, sizeof(HRESULT))
+            #   PyLong_AsNativeBytes(<-2147467259>, &hr, sizeof(HRESULT), -1)
              #   assert(hr == E_FAIL)
              (-2147467259, b'\x80\x00\x40\x05', 4),
          ]:
@@ -569,14 +580,102 @@ class LongTests(unittest.TestCase):
                      f"PyLong_AsNativeBytes(v, buffer, {n}, <little>)")
                  self.assertEqual(expect_le, buffer[:n], "<little>")
  
+        # Test cases that do not request size for a sign bit when we pass the
+        # Py_ASNATIVEBYTES_UNSIGNED_BUFFER flag
+        for v, expect_be, expect_n in [
+            (255,       b'\xff',                1),
+            # We pass a 2 byte buffer so it just uses the whole thing
+            (255,       b'\x00\xff',            2),
+
+            (2**63,     b'\x80' + b'\x00' * 7,  8),
+            # We pass a 9 byte buffer so it uses the whole thing
+            (2**63,     b'\x00\x80' + b'\x00' * 7, 9),
+
+            (2**256-1,  b'\xff' * 32,           32),
+            # We pass a 33 byte buffer so it uses the whole thing
+            (2**256-1,  b'\x00' + b'\xff' * 32, 33),
+        ]:
+            with self.subTest(f"{v:X}-{len(expect_be)}bytes-unsigned"):
+                n = len(expect_be)
+                buffer = bytearray(b"\xa5"*n)
+                self.assertEqual(expect_n, asnativebytes(v, buffer, n, 4),
+                    f"PyLong_AsNativeBytes(v, buffer, {n}, <big|unsigned>)")
+                self.assertEqual(expect_n, asnativebytes(v, buffer, n, 5),
+                    f"PyLong_AsNativeBytes(v, buffer, {n}, <little|unsigned>)")
+
+        # Ensure Py_ASNATIVEBYTES_REJECT_NEGATIVE raises on negative value
+        with self.assertRaises(ValueError):
+            asnativebytes(-1, buffer, 0, 8)
+
          # Check a few error conditions. These are validated in code, but are
          # unspecified in docs, so if we make changes to the implementation, it's
          # fine to just update these tests rather than preserve the behaviour.
-        with self.assertRaises(SystemError):
-            asnativebytes(1, buffer, 0, 2)
          with self.assertRaises(TypeError):
              asnativebytes('not a number', buffer, 0, -1)
  
+    def test_long_asnativebytes_fuzz(self):
+        import math
+        from random import Random
+        from _testcapi import (
+            pylong_asnativebytes as asnativebytes,
+            SIZE_MAX,
+        )
+
+        # Abbreviate sizeof(Py_ssize_t) to SZ because we use it a lot
+        SZ = int(math.ceil(math.log(SIZE_MAX + 1) / math.log(2)) / 8)
+
+        rng = Random()
+        # Allocate bigger buffer than actual values are going to be
+        buffer = bytearray(260)
+
+        for _ in range(1000):
+            n = rng.randrange(1, 256)
+            bytes_be = bytes([
+                # Ensure the most significant byte is nonzero
+                rng.randrange(1, 256),
+                *[rng.randrange(256) for _ in range(n - 1)]
+            ])
+            bytes_le = bytes_be[::-1]
+            v = int.from_bytes(bytes_le, 'little')
+
+            expect_1 = expect_2 = (SZ, n)
+            if bytes_be[0] & 0x80:
+                # All values are positive, so if MSB is set, expect extra bit
+                # when we request the size or have a large enough buffer
+                expect_1 = (SZ, n + 1)
+                # When passing Py_ASNATIVEBYTES_UNSIGNED_BUFFER, we expect the
+                # return to be exactly the right size.
+                expect_2 = (n,)
+
+            try:
+                actual = asnativebytes(v, buffer, 0, -1)
+                self.assertIn(actual, expect_1)
+
+                actual = asnativebytes(v, buffer, len(buffer), 0)
+                self.assertIn(actual, expect_1)
+                self.assertEqual(bytes_be, buffer[-n:])
+
+                actual = asnativebytes(v, buffer, len(buffer), 1)
+                self.assertIn(actual, expect_1)
+                self.assertEqual(bytes_le, buffer[:n])
+
+                actual = asnativebytes(v, buffer, n, 4)
+                self.assertIn(actual, expect_2, bytes_be.hex())
+                actual = asnativebytes(v, buffer, n, 5)
+                self.assertIn(actual, expect_2, bytes_be.hex())
+            except AssertionError as ex:
+                value_hex = ''.join(reversed([
+                    f'{b:02X}{"" if i % 8 else "_"}'
+                    for i, b in enumerate(bytes_le, start=1)
+                ])).strip('_')
+                if support.verbose:
+                    print()
+                    print(n, 'bytes')
+                    print('hex =', value_hex)
+                    print('int =', v)
+                    raise
+                raise AssertionError(f"Value: 0x{value_hex}") from ex
+
      def test_long_fromnativebytes(self):
          import math
          from _testcapi import (
@@ -617,6 +716,11 @@ class LongTests(unittest.TestCase):
                      self.assertEqual(expect_u, fromnativebytes(v_be, n, -1, 0),
                          f"PyLong_FromUnsignedNativeBytes(buffer, {n}, <native>)")
  
+                # Swap the unsigned request for tests and use the
+                # Py_ASNATIVEBYTES_UNSIGNED_BUFFER flag instead
+                self.assertEqual(expect_u, fromnativebytes(v_be, n, 4, 1),
+                    f"PyLong_FromNativeBytes(buffer, {n}, <big|unsigned>)")
+
  
  if __name__ == "__main__":
      unittest.main()
diff --git a/Misc/NEWS.d/next/C API/2024-02-28-15-50-01.gh-issue-111140.mpwcUg.rst b/Misc/NEWS.d/next/C API/2024-02-28-15-50-01.gh-issue-111140.mpwcUg.rst

new file mode 100644 (file)

index 0000000..113db93
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2024-02-28-15-50-01.gh-issue-111140.mpwcUg.rst
@@ -0,0 +1,3 @@
+Add additional flags to :c:func:`PyLong_AsNativeBytes` and
+:c:func:`PyLong_FromNativeBytes` to allow the caller to determine how to handle
+edge cases around values that fill the entire buffer.
diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c

index 28dca01bee09a07201394082a2f8350743d7a0bb..769c3909ea3fb1eeabac105d4369561c64942677 100644 (file)
--- a/Modules/_testcapi/long.c
+++ b/Modules/_testcapi/long.c
@@ -52,8 +52,8 @@ pylong_asnativebytes(PyObject *module, PyObject *args)
  {
      PyObject *v;
      Py_buffer buffer;
-    Py_ssize_t n, endianness;
-    if (!PyArg_ParseTuple(args, "Ow*nn", &v, &buffer, &n, &endianness)) {
+    Py_ssize_t n, flags;
+    if (!PyArg_ParseTuple(args, "Ow*nn", &v, &buffer, &n, &flags)) {
          return NULL;
      }
      if (buffer.readonly) {
@@ -66,7 +66,7 @@ pylong_asnativebytes(PyObject *module, PyObject *args)
          PyBuffer_Release(&buffer);
          return NULL;
      }
-    Py_ssize_t res = PyLong_AsNativeBytes(v, buffer.buf, n, (int)endianness);
+    Py_ssize_t res = PyLong_AsNativeBytes(v, buffer.buf, n, (int)flags);
      PyBuffer_Release(&buffer);
      return res >= 0 ? PyLong_FromSsize_t(res) : NULL;
  }
@@ -76,8 +76,8 @@ static PyObject *
  pylong_fromnativebytes(PyObject *module, PyObject *args)
  {
      Py_buffer buffer;
-    Py_ssize_t n, endianness, signed_;
-    if (!PyArg_ParseTuple(args, "y*nnn", &buffer, &n, &endianness, &signed_)) {
+    Py_ssize_t n, flags, signed_;
+    if (!PyArg_ParseTuple(args, "y*nnn", &buffer, &n, &flags, &signed_)) {
          return NULL;
      }
      if (buffer.len < n) {
@@ -86,8 +86,8 @@ pylong_fromnativebytes(PyObject *module, PyObject *args)
          return NULL;
      }
      PyObject *res = signed_
-        ? PyLong_FromNativeBytes(buffer.buf, n, (int)endianness)
-        : PyLong_FromUnsignedNativeBytes(buffer.buf, n, (int)endianness);
+        ? PyLong_FromNativeBytes(buffer.buf, n, (int)flags)
+        : PyLong_FromUnsignedNativeBytes(buffer.buf, n, (int)flags);
      PyBuffer_Release(&buffer);
      return res;
  }
diff --git a/Objects/longobject.c b/Objects/longobject.c

index cc2fe11f31c43052d121303716416a688d8f5de0..c4ab064d688d674b2153bc57f28aa5e451245547 100644 (file)
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -1083,18 +1083,17 @@ _fits_in_n_bits(Py_ssize_t v, Py_ssize_t n)
  static inline int
  _resolve_endianness(int *endianness)
  {
-    if (*endianness < 0) {
+    if (*endianness == -1 || (*endianness & 2)) {
          *endianness = PY_LITTLE_ENDIAN;
+    } else {
+        *endianness &= 1;
      }
-    if (*endianness != 0 && *endianness != 1) {
-        PyErr_SetString(PyExc_SystemError, "invalid 'endianness' value");
-        return -1;
-    }
+    assert(*endianness == 0 || *endianness == 1);
      return 0;
  }
  
  Py_ssize_t
-PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness)
+PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int flags)
  {
      PyLongObject *v;
      union {
@@ -1109,7 +1108,7 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness)
          return -1;
      }
  
-    int little_endian = endianness;
+    int little_endian = flags;
      if (_resolve_endianness(&little_endian) < 0) {
          return -1;
      }
@@ -1125,6 +1124,15 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness)
          do_decref = 1;
      }
  
+    if ((flags != -1 && (flags & Py_ASNATIVEBYTES_REJECT_NEGATIVE))
+        && _PyLong_IsNegative(v)) {
+        PyErr_SetString(PyExc_ValueError, "Cannot convert negative int");
+        if (do_decref) {
+            Py_DECREF(v);
+        }
+        return -1;
+    }
+
      if (_PyLong_IsCompact(v)) {
          res = 0;
          cv.v = _PyLong_CompactValue(v);
@@ -1159,6 +1167,15 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness)
              /* If we fit, return the requested number of bytes */
              if (_fits_in_n_bits(cv.v, n * 8)) {
                  res = n;
+            } else if (cv.v > 0 && _fits_in_n_bits(cv.v, n * 8 + 1)) {
+                /* Positive values with the MSB set do not require an
+                 * additional bit when the caller's intent is to treat them
+                 * as unsigned. */
+                if (flags == -1 || (flags & Py_ASNATIVEBYTES_UNSIGNED_BUFFER)) {
+                    res = n;
+                } else {
+                    res = n + 1;
+                }
              }
          }
          else {
@@ -1199,17 +1216,55 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness)
              _PyLong_AsByteArray(v, buffer, (size_t)n, little_endian, 1, 0);
          }
  
-        // More efficient calculation for number of bytes required?
+        /* Calculates the number of bits required for the *absolute* value
+         * of v. This does not take sign into account, only magnitude. */
          size_t nb = _PyLong_NumBits((PyObject *)v);
-        /* Normally this would be((nb - 1) / 8) + 1 to avoid rounding up
-         * multiples of 8 to the next byte, but we add an implied bit for
-         * the sign and it cancels out. */
-        size_t n_needed = (nb / 8) + 1;
-        res = (Py_ssize_t)n_needed;
-        if ((size_t)res != n_needed) {
-            PyErr_SetString(PyExc_OverflowError,
-                "value too large to convert");
+        if (nb == (size_t)-1) {
              res = -1;
+        } else {
+            /* Normally this would be((nb - 1) / 8) + 1 to avoid rounding up
+             * multiples of 8 to the next byte, but we add an implied bit for
+             * the sign and it cancels out. */
+            res = (Py_ssize_t)(nb / 8) + 1;
+        }
+
+        /* Two edge cases exist that are best handled after extracting the
+         * bits. These may result in us reporting overflow when the value
+         * actually fits.
+         */
+        if (n > 0 && res == n + 1 && nb % 8 == 0) {
+            if (_PyLong_IsNegative(v)) {
+                /* Values of 0x80...00 from negative values that use every
+                 * available bit in the buffer do not require an additional
+                 * bit to store the sign. */
+                int is_edge_case = 1;
+                unsigned char *b = (unsigned char *)buffer;
+                for (Py_ssize_t i = 0; i < n && is_edge_case; ++i, ++b) {
+                    if (i == 0) {
+                        is_edge_case = (*b == (little_endian ? 0 : 0x80));
+                    } else if (i < n - 1) {
+                        is_edge_case = (*b == 0);
+                    } else {
+                        is_edge_case = (*b == (little_endian ? 0x80 : 0));
+                    }
+                }
+                if (is_edge_case) {
+                    res = n;
+                }
+            }
+            else {
+                /* Positive values with the MSB set do not require an
+                 * additional bit when the caller's intent is to treat them
+                 * as unsigned. */
+                unsigned char *b = (unsigned char *)buffer;
+                if (b[little_endian ? n - 1 : 0] & 0x80) {
+                    if (flags == -1 || (flags & Py_ASNATIVEBYTES_UNSIGNED_BUFFER)) {
+                        res = n;
+                    } else {
+                        res = n + 1;
+                    }
+                }
+            }
          }
      }
  
@@ -1222,38 +1277,41 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness)
  
  
  PyObject *
-PyLong_FromNativeBytes(const void* buffer, size_t n, int endianness)
+PyLong_FromNativeBytes(const void* buffer, size_t n, int flags)
  {
      if (!buffer) {
          PyErr_BadInternalCall();
          return NULL;
      }
  
-    int little_endian = endianness;
+    int little_endian = flags;
      if (_resolve_endianness(&little_endian) < 0) {
          return NULL;
      }
  
-    return _PyLong_FromByteArray((const unsigned char *)buffer, n,
-                                 little_endian, 1);
+    return _PyLong_FromByteArray(
+        (const unsigned char *)buffer,
+        n,
+        little_endian,
+        (flags == -1 || !(flags & Py_ASNATIVEBYTES_UNSIGNED_BUFFER)) ? 1 : 0
+    );
  }
  
  
  PyObject *
-PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n, int endianness)
+PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n, int flags)
  {
      if (!buffer) {
          PyErr_BadInternalCall();
          return NULL;
      }
  
-    int little_endian = endianness;
+    int little_endian = flags;
      if (_resolve_endianness(&little_endian) < 0) {
          return NULL;
      }
  
-    return _PyLong_FromByteArray((const unsigned char *)buffer, n,
-                                 little_endian, 0);
+    return _PyLong_FromByteArray((const unsigned char *)buffer, n, little_endian, 0);
  }
author	Steve Dower <steve.dower@python.org>
	Fri, 5 Apr 2024 14:21:16 +0000 (15:21 +0100)
committer	GitHub <noreply@github.com>
	Fri, 5 Apr 2024 14:21:16 +0000 (16:21 +0200)
Doc/c-api/long.rst		patch \| blob \| blame \| history
Include/cpython/longobject.h		patch \| blob \| blame \| history
Lib/test/test_capi/test_long.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/C API/2024-02-28-15-50-01.gh-issue-111140.mpwcUg.rst	[new file with mode: 0644]	patch \| blob
Modules/_testcapi/long.c		patch \| blob \| blame \| history
Objects/longobject.c		patch \| blob \| blame \| history