gh-111140: Improve PyLong_AsNativeBytes API doc example & improve the test (#115380)

author Gregory P. Smith <greg@krypto.org>

Thu, 22 Feb 2024 03:27:16 +0000 (19:27 -0800)

committer GitHub <noreply@github.com>

Thu, 22 Feb 2024 03:27:16 +0000 (03:27 +0000)
author Gregory P. Smith <greg@krypto.org>
Thu, 22 Feb 2024 03:27:16 +0000 (19:27 -0800)
committer GitHub <noreply@github.com>
Thu, 22 Feb 2024 03:27:16 +0000 (03:27 +0000)
diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst

index f24282e76a33d15a892f038934719ef6f1d05505..582f5c7bf05471c853d8fb78a2516854a7304407 100644 (file)
--- a/Doc/c-api/long.rst
+++ b/Doc/c-api/long.rst
@@ -358,46 +358,86 @@ distinguished from a number.  Use :c:func:`PyErr_Occurred` to disambiguate.
  
     Copy the Python integer value to a native *buffer* of size *n_bytes*::
  
-      int value;
-      Py_ssize_t bytes = PyLong_AsNativeBytes(v, &value, sizeof(value), -1);
+      int32_t value;
+      Py_ssize_t bytes = PyLong_AsNativeBits(pylong, &value, sizeof(value), -1);
        if (bytes < 0) {
-          // Error occurred
+          // A Python exception was set with the reason.
            return NULL;
        }
        else if (bytes <= (Py_ssize_t)sizeof(value)) {
            // Success!
        }
        else {
-          // Overflow occurred, but 'value' contains truncated value
+          // Overflow occurred, but 'value' contains the truncated
+          // lowest bits of pylong.
        }
  
+   The above example may look *similar* to
+   :c:func:`PyLong_As* <PyLong_AsSize_t>`
+   but instead fills in a specific caller defined type and never raises an
+   error about of the :class:`int` *pylong*'s value regardless of *n_bytes*
+   or the returned byte count.
+
+   To get at the entire potentially big Python value, this can be used to
+   reserve enough space and copy it::
+
+      // Ask how much space we need.
+      Py_ssize_t expected = PyLong_AsNativeBits(pylong, NULL, 0, -1);
+      if (expected < 0) {
+          // A Python exception was set with the reason.
+          return NULL;
+      }
+      assert(expected != 0);  // Impossible per the API definition.
+      uint8_t *bignum = malloc(expected);
+      if (!bignum) {
+          PyErr_SetString(PyExc_MemoryError, "bignum malloc failed.");
+          return NULL;
+      }
+      // Safely get the entire value.
+      Py_ssize_t bytes = PyLong_AsNativeBits(pylong, bignum, expected, -1);
+      if (bytes < 0) {  // Exception set.
+          free(bignum);
+          return NULL;
+      }
+      else if (bytes > expected) {  // Be safe, should not be possible.
+          PyErr_SetString(PyExc_RuntimeError,
+              "Unexpected bignum truncation after a size check.");
+          free(bignum);
+          return NULL;
+      }
+      // The expected success given the above pre-check.
+      // ... use bignum ...
+      free(bignum);
+
     *endianness* may be passed ``-1`` for the native endian that CPython was
     compiled with, or ``0`` for big endian and ``1`` for little.
  
-   Return ``-1`` with an exception raised if *pylong* cannot be interpreted as
+   Returns ``-1`` with an exception raised if *pylong* cannot be interpreted as
     an integer. Otherwise, return the size of the buffer required to store the
     value. If this is equal to or less than *n_bytes*, the entire value was
-   copied.
+   copied. ``0`` will never be returned.
  
-   Unless an exception is raised, all *n_bytes* of the buffer will be written
-   with as much of the value as can fit. This allows the caller to ignore all
-   non-negative results if the intent is to match the typical behavior of a
-   C-style downcast. No exception is set for this case.
+   Unless an exception is raised, all *n_bytes* of the buffer will always be
+   written. In the case of truncation, as many of the lowest bits of the value
+   as could fit are written. This allows the caller to ignore all non-negative
+   results if the intent is to match the typical behavior of a C-style
+   downcast. No exception is set on truncation.
  
-   Values are always copied as two's-complement, and sufficient buffer will be
+   Values are always copied as two's-complement and sufficient buffer will be
     requested to include a sign bit. For example, this may cause an value that
     fits into 8 bytes when treated as unsigned to request 9 bytes, even though
     all eight bytes were copied into the buffer. What has been omitted is the
-   zero sign bit, which is redundant when the intention is to treat the value as
-   unsigned.
+   zero sign bit -- redundant if the caller's intention is to treat the value
+   as unsigned.
  
-   Passing zero to *n_bytes* will return the requested buffer size.
+   Passing zero to *n_bytes* will return the size of a buffer that would
+   be large enough to hold the value. This may be larger than technically
+   necessary, but not unreasonably so.
  
     .. note::
  
-      When the value does not fit in the provided buffer, the requested size
-      returned from the function may be larger than necessary. Passing 0 to this
-      function is not an accurate way to determine the bit length of a value.
+      Passing *n_bytes=0* to this function is not an accurate way to determine
+      the bit length of a value.
  
     .. versionadded:: 3.13
  
diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py

index fc82cbfa66ea7aa98bc1eac465176b75a7c8087a..9f5ee507a8eb85e3aabe402e1cc8131d9e740e4e 100644 (file)
--- a/Lib/test/test_capi/test_long.py
+++ b/Lib/test/test_capi/test_long.py
@@ -438,7 +438,12 @@ class LongTests(unittest.TestCase):
          if support.verbose:
              print(f"SIZEOF_SIZE={SZ}\n{MAX_SSIZE=:016X}\n{MAX_USIZE=:016X}")
  
-        # These tests check that the requested buffer size is correct
+        # These tests check that the requested buffer size is correct.
+        # This matches our current implementation: We only specify that the
+        # return value is a size *sufficient* to hold the result when queried
+        # using n_bytes=0. If our implementation changes, feel free to update
+        # the expectations here -- or loosen them to be range checks.
+        # (i.e. 0 *could* be stored in 1 byte and 512 in 2)
          for v, expect in [
              (0, SZ),
              (512, SZ),
@@ -453,12 +458,25 @@ class LongTests(unittest.TestCase):
              (-(2**256-1), 33),
          ]:
              with self.subTest(f"sizeof-{v:X}"):
-                buffer = bytearray(1)
+                buffer = bytearray(b"\x5a")
                  self.assertEqual(expect, asnativebytes(v, buffer, 0, -1),
-                    "PyLong_AsNativeBytes(v, NULL, 0, -1)")
+                    "PyLong_AsNativeBytes(v, <unknown>, 0, -1)")
+                self.assertEqual(buffer, b"\x5a",
+                    "buffer overwritten when it should not have been")
                  # Also check via the __index__ path
                  self.assertEqual(expect, asnativebytes(Index(v), buffer, 0, -1),
-                    "PyLong_AsNativeBytes(Index(v), NULL, 0, -1)")
+                    "PyLong_AsNativeBytes(Index(v), <unknown>, 0, -1)")
+                self.assertEqual(buffer, b"\x5a",
+                    "buffer overwritten when it should not have been")
+
+        # Test that we populate n=2 bytes but do not overwrite more.
+        buffer = bytearray(b"\x99"*3)
+        self.assertEqual(2, asnativebytes(4, buffer, 2, 0),  # BE
+            "PyLong_AsNativeBytes(v, <3 byte buffer>, 2, 0)  // BE")
+        self.assertEqual(buffer, b"\x00\x04\x99")
+        self.assertEqual(2, asnativebytes(4, buffer, 2, 1),  # LE
+            "PyLong_AsNativeBytes(v, <3 byte buffer>, 2, 1)  // LE")
+        self.assertEqual(buffer, b"\x04\x00\x99")
  
          # We request as many bytes as `expect_be` contains, and always check
          # the result (both big and little endian). We check the return value
@@ -510,7 +528,9 @@ class LongTests(unittest.TestCase):
          ]:
              with self.subTest(f"{v:X}-{len(expect_be)}bytes"):
                  n = len(expect_be)
-                buffer = bytearray(n)
+                # Fill the buffer with dummy data to ensure all bytes
+                # are overwritten.
+                buffer = bytearray(b"\xa5"*n)
                  expect_le = expect_be[::-1]
  
                  self.assertEqual(expect_n, asnativebytes(v, buffer, n, 0),
author	Gregory P. Smith <greg@krypto.org>
	Thu, 22 Feb 2024 03:27:16 +0000 (19:27 -0800)
committer	GitHub <noreply@github.com>
	Thu, 22 Feb 2024 03:27:16 +0000 (03:27 +0000)
Doc/c-api/long.rst		patch \| blob \| blame \| history
Lib/test/test_capi/test_long.py		patch \| blob \| blame \| history