gh-139353: Rename formatter_unicode.c to unicode_formatter.c (#139723)

author Victor Stinner <vstinner@python.org>

Wed, 8 Oct 2025 12:56:00 +0000 (14:56 +0200)

committer GitHub <noreply@github.com>

Wed, 8 Oct 2025 12:56:00 +0000 (14:56 +0200)
author Victor Stinner <vstinner@python.org>
Wed, 8 Oct 2025 12:56:00 +0000 (14:56 +0200)
committer GitHub <noreply@github.com>
Wed, 8 Oct 2025 12:56:00 +0000 (14:56 +0200)
diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h

index c85c01da89a2ffb6e517e84d8f40ff801ed37846..f1c9bcd47888b103f72001cd827f6ae1424537c1 100644 (file)
--- a/Include/internal/pycore_unicodeobject.h
+++ b/Include/internal/pycore_unicodeobject.h
@@ -11,6 +11,44 @@ extern "C" {
  #include "pycore_fileutils.h"     // _Py_error_handler
  #include "pycore_ucnhash.h"       // _PyUnicode_Name_CAPI
  
+// Maximum code point of Unicode 6.0: 0x10ffff (1,114,111).
+#define _Py_MAX_UNICODE 0x10ffff
+
+
+static inline void
+_PyUnicode_Fill(int kind, void *data, Py_UCS4 value,
+                Py_ssize_t start, Py_ssize_t length)
+{
+    assert(0 <= start);
+    switch (kind) {
+    case PyUnicode_1BYTE_KIND: {
+        assert(value <= 0xff);
+        Py_UCS1 ch = (unsigned char)value;
+        Py_UCS1 *to = (Py_UCS1 *)data + start;
+        memset(to, ch, length);
+        break;
+    }
+    case PyUnicode_2BYTE_KIND: {
+        assert(value <= 0xffff);
+        Py_UCS2 ch = (Py_UCS2)value;
+        Py_UCS2 *to = (Py_UCS2 *)data + start;
+        const Py_UCS2 *end = to + length;
+        for (; to < end; ++to) *to = ch;
+        break;
+    }
+    case PyUnicode_4BYTE_KIND: {
+        assert(value <= _Py_MAX_UNICODE);
+        Py_UCS4 ch = value;
+        Py_UCS4 * to = (Py_UCS4 *)data + start;
+        const Py_UCS4 *end = to + length;
+        for (; to < end; ++to) *to = ch;
+        break;
+    }
+    default: Py_UNREACHABLE();
+    }
+}
+
+
  /* --- Characters Type APIs ----------------------------------------------- */
  
  extern int _PyUnicode_IsXidStart(Py_UCS4 ch);
@@ -240,21 +278,6 @@ extern PyObject* _PyUnicode_XStrip(
      );
  
  
-/* Using explicit passed-in values, insert the thousands grouping
-   into the string pointed to by buffer.  For the argument descriptions,
-   see Objects/stringlib/localeutil.h */
-extern Py_ssize_t _PyUnicode_InsertThousandsGrouping(
-    _PyUnicodeWriter *writer,
-    Py_ssize_t n_buffer,
-    PyObject *digits,
-    Py_ssize_t d_pos,
-    Py_ssize_t n_digits,
-    Py_ssize_t min_width,
-    const char *grouping,
-    PyObject *thousands_sep,
-    Py_UCS4 *maxchar,
-    int forward);
-
  /* Dedent a string.
     Behaviour is expected to be an exact match of `textwrap.dedent`.
     Return a new reference on success, NULL with exception set on error.
diff --git a/Makefile.pre.in b/Makefile.pre.in

index 987d55a9bdbd3b574c46a102e38b4b22c1d91426..a5223246845dcf60d6bdd083156c5210876aca92 100644 (file)
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -501,7 +501,6 @@ PYTHON_OBJS=        \
                 Python/pystrtod.o \
                 Python/pystrhex.o \
                 Python/dtoa.o \
-               Python/formatter_unicode.o \
                 Python/fileutils.o \
                 Python/suggestions.o \
                 Python/perf_trampoline.o \
@@ -558,6 +557,7 @@ OBJECT_OBJS=        \
                 Objects/tupleobject.o \
                 Objects/typeobject.o \
                 Objects/typevarobject.o \
+               Objects/unicode_formatter.o \
                 Objects/unicodeobject.o \
                 Objects/unicodectype.o \
                 Objects/unionobject.o \
@@ -2091,7 +2091,6 @@ UNICODE_DEPS = \
                 $(srcdir)/Objects/stringlib/fastsearch.h \
                 $(srcdir)/Objects/stringlib/find.h \
                 $(srcdir)/Objects/stringlib/find_max_char.h \
-               $(srcdir)/Objects/stringlib/localeutil.h \
                 $(srcdir)/Objects/stringlib/partition.h \
                 $(srcdir)/Objects/stringlib/replace.h \
                 $(srcdir)/Objects/stringlib/repr.h \
diff --git a/Objects/stringlib/localeutil.h b/Objects/stringlib/localeutil.h

deleted file mode 100644 (file)

index a4ab701..0000000
--- a/Objects/stringlib/localeutil.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/* _PyUnicode_InsertThousandsGrouping() helper functions */
-
-typedef struct {
-    const char *grouping;
-    char previous;
-    Py_ssize_t i; /* Where we're currently pointing in grouping. */
-} GroupGenerator;
-
-
-static void
-GroupGenerator_init(GroupGenerator *self, const char *grouping)
-{
-    self->grouping = grouping;
-    self->i = 0;
-    self->previous = 0;
-}
-
-
-/* Returns the next grouping, or 0 to signify end. */
-static Py_ssize_t
-GroupGenerator_next(GroupGenerator *self)
-{
-    /* Note that we don't really do much error checking here. If a
-       grouping string contains just CHAR_MAX, for example, then just
-       terminate the generator. That shouldn't happen, but at least we
-       fail gracefully. */
-    switch (self->grouping[self->i]) {
-    case 0:
-        return self->previous;
-    case CHAR_MAX:
-        /* Stop the generator. */
-        return 0;
-    default: {
-        char ch = self->grouping[self->i];
-        self->previous = ch;
-        self->i++;
-        return (Py_ssize_t)ch;
-    }
-    }
-}
-
-
-/* Fill in some digits, leading zeros, and thousands separator. All
-   are optional, depending on when we're called. */
-static void
-InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos,
-                             PyObject *digits, Py_ssize_t *digits_pos,
-                             Py_ssize_t n_chars, Py_ssize_t n_zeros,
-                             PyObject *thousands_sep, Py_ssize_t thousands_sep_len,
-                             Py_UCS4 *maxchar, int forward)
-{
-    if (!writer) {
-        /* if maxchar > 127, maxchar is already set */
-        if (*maxchar == 127 && thousands_sep) {
-            Py_UCS4 maxchar2 = PyUnicode_MAX_CHAR_VALUE(thousands_sep);
-            *maxchar = Py_MAX(*maxchar, maxchar2);
-        }
-        return;
-    }
-
-    if (thousands_sep) {
-        if (!forward) {
-            *buffer_pos -= thousands_sep_len;
-        }
-        /* Copy the thousands_sep chars into the buffer. */
-        _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
-                                      thousands_sep, 0,
-                                      thousands_sep_len);
-        if (forward) {
-            *buffer_pos += thousands_sep_len;
-        }
-    }
-
-    if (!forward) {
-        *buffer_pos -= n_chars;
-        *digits_pos -= n_chars;
-    }
-    _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
-                                  digits, *digits_pos,
-                                  n_chars);
-    if (forward) {
-        *buffer_pos += n_chars;
-        *digits_pos += n_chars;
-    }
-
-    if (n_zeros) {
-        if (!forward) {
-            *buffer_pos -= n_zeros;
-        }
-        int kind = PyUnicode_KIND(writer->buffer);
-        void *data = PyUnicode_DATA(writer->buffer);
-        unicode_fill(kind, data, '0', *buffer_pos, n_zeros);
-        if (forward) {
-            *buffer_pos += n_zeros;
-        }
-    }
-}
diff --git a/Python/formatter_unicode.c b/Objects/unicode_formatter.c

similarity index 88%

rename from Python/formatter_unicode.c

rename to Objects/unicode_formatter.c

index 30807f428c7d71b095ed45b5139bf6fa1c01026b..b8604d1355940a516390c31fd917c13202877319 100644 (file)
--- a/Python/formatter_unicode.c
+++ b/Objects/unicode_formatter.c
@@ -8,6 +8,241 @@
  #include "pycore_unicodeobject.h" // PyUnicode_MAX_CHAR_VALUE()
  #include <locale.h>
  
+
+/* _PyUnicode_InsertThousandsGrouping() helper functions */
+
+typedef struct {
+    const char *grouping;
+    char previous;
+    Py_ssize_t i; /* Where we're currently pointing in grouping. */
+} GroupGenerator;
+
+
+static void
+GroupGenerator_init(GroupGenerator *self, const char *grouping)
+{
+    self->grouping = grouping;
+    self->i = 0;
+    self->previous = 0;
+}
+
+
+/* Returns the next grouping, or 0 to signify end. */
+static Py_ssize_t
+GroupGenerator_next(GroupGenerator *self)
+{
+    /* Note that we don't really do much error checking here. If a
+       grouping string contains just CHAR_MAX, for example, then just
+       terminate the generator. That shouldn't happen, but at least we
+       fail gracefully. */
+    switch (self->grouping[self->i]) {
+    case 0:
+        return self->previous;
+    case CHAR_MAX:
+        /* Stop the generator. */
+        return 0;
+    default: {
+        char ch = self->grouping[self->i];
+        self->previous = ch;
+        self->i++;
+        return (Py_ssize_t)ch;
+    }
+    }
+}
+
+
+/* Fill in some digits, leading zeros, and thousands separator. All
+   are optional, depending on when we're called. */
+static void
+InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos,
+                             PyObject *digits, Py_ssize_t *digits_pos,
+                             Py_ssize_t n_chars, Py_ssize_t n_zeros,
+                             PyObject *thousands_sep, Py_ssize_t thousands_sep_len,
+                             Py_UCS4 *maxchar, int forward)
+{
+    if (!writer) {
+        /* if maxchar > 127, maxchar is already set */
+        if (*maxchar == 127 && thousands_sep) {
+            Py_UCS4 maxchar2 = PyUnicode_MAX_CHAR_VALUE(thousands_sep);
+            *maxchar = Py_MAX(*maxchar, maxchar2);
+        }
+        return;
+    }
+
+    if (thousands_sep) {
+        if (!forward) {
+            *buffer_pos -= thousands_sep_len;
+        }
+        /* Copy the thousands_sep chars into the buffer. */
+        _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
+                                      thousands_sep, 0,
+                                      thousands_sep_len);
+        if (forward) {
+            *buffer_pos += thousands_sep_len;
+        }
+    }
+
+    if (!forward) {
+        *buffer_pos -= n_chars;
+        *digits_pos -= n_chars;
+    }
+    _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
+                                  digits, *digits_pos,
+                                  n_chars);
+    if (forward) {
+        *buffer_pos += n_chars;
+        *digits_pos += n_chars;
+    }
+
+    if (n_zeros) {
+        if (!forward) {
+            *buffer_pos -= n_zeros;
+        }
+        int kind = PyUnicode_KIND(writer->buffer);
+        void *data = PyUnicode_DATA(writer->buffer);
+        _PyUnicode_Fill(kind, data, '0', *buffer_pos, n_zeros);
+        if (forward) {
+            *buffer_pos += n_zeros;
+        }
+    }
+}
+
+
+/**
+ * InsertThousandsGrouping:
+ * @writer: Unicode writer.
+ * @n_buffer: Number of characters in @buffer.
+ * @digits: Digits we're reading from. If count is non-NULL, this is unused.
+ * @d_pos: Start of digits string.
+ * @n_digits: The number of digits in the string, in which we want
+ *            to put the grouping chars.
+ * @min_width: The minimum width of the digits in the output string.
+ *             Output will be zero-padded on the left to fill.
+ * @grouping: see definition in localeconv().
+ * @thousands_sep: see definition in localeconv().
+ *
+ * There are 2 modes: counting and filling. If @writer is NULL,
+ *  we are in counting mode, else filling mode.
+ * If counting, the required buffer size is returned.
+ * If filling, we know the buffer will be large enough, so we don't
+ *  need to pass in the buffer size.
+ * Inserts thousand grouping characters (as defined by grouping and
+ *  thousands_sep) into @writer.
+ *
+ * Return value: -1 on error, number of characters otherwise.
+ **/
+static Py_ssize_t
+_PyUnicode_InsertThousandsGrouping(
+    _PyUnicodeWriter *writer,
+    Py_ssize_t n_buffer,
+    PyObject *digits,
+    Py_ssize_t d_pos,
+    Py_ssize_t n_digits,
+    Py_ssize_t min_width,
+    const char *grouping,
+    PyObject *thousands_sep,
+    Py_UCS4 *maxchar,
+    int forward)
+{
+    min_width = Py_MAX(0, min_width);
+    if (writer) {
+        assert(digits != NULL);
+        assert(maxchar == NULL);
+    }
+    else {
+        assert(digits == NULL);
+        assert(maxchar != NULL);
+    }
+    assert(0 <= d_pos);
+    assert(0 <= n_digits);
+    assert(grouping != NULL);
+
+    Py_ssize_t count = 0;
+    Py_ssize_t n_zeros;
+    int loop_broken = 0;
+    int use_separator = 0; /* First time through, don't append the
+                              separator. They only go between
+                              groups. */
+    Py_ssize_t buffer_pos;
+    Py_ssize_t digits_pos;
+    Py_ssize_t len;
+    Py_ssize_t n_chars;
+    Py_ssize_t remaining = n_digits; /* Number of chars remaining to
+                                        be looked at */
+    /* A generator that returns all of the grouping widths, until it
+       returns 0. */
+    GroupGenerator groupgen;
+    GroupGenerator_init(&groupgen, grouping);
+    const Py_ssize_t thousands_sep_len = PyUnicode_GET_LENGTH(thousands_sep);
+
+    /* if digits are not grouped, thousands separator
+       should be an empty string */
+    assert(!(grouping[0] == CHAR_MAX && thousands_sep_len != 0));
+
+    digits_pos = d_pos + (forward ? 0 : n_digits);
+    if (writer) {
+        buffer_pos = writer->pos + (forward ? 0 : n_buffer);
+        assert(buffer_pos <= PyUnicode_GET_LENGTH(writer->buffer));
+        assert(digits_pos <= PyUnicode_GET_LENGTH(digits));
+    }
+    else {
+        buffer_pos = forward ? 0 : n_buffer;
+    }
+
+    if (!writer) {
+        *maxchar = 127;
+    }
+
+    while ((len = GroupGenerator_next(&groupgen)) > 0) {
+        len = Py_MIN(len, Py_MAX(Py_MAX(remaining, min_width), 1));
+        n_zeros = Py_MAX(0, len - remaining);
+        n_chars = Py_MAX(0, Py_MIN(remaining, len));
+
+        /* Use n_zero zero's and n_chars chars */
+
+        /* Count only, don't do anything. */
+        count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
+
+        /* Copy into the writer. */
+        InsertThousandsGrouping_fill(writer, &buffer_pos,
+                                     digits, &digits_pos,
+                                     n_chars, n_zeros,
+                                     use_separator ? thousands_sep : NULL,
+                                     thousands_sep_len, maxchar, forward);
+
+        /* Use a separator next time. */
+        use_separator = 1;
+
+        remaining -= n_chars;
+        min_width -= len;
+
+        if (remaining <= 0 && min_width <= 0) {
+            loop_broken = 1;
+            break;
+        }
+        min_width -= thousands_sep_len;
+    }
+    if (!loop_broken) {
+        /* We left the loop without using a break statement. */
+
+        len = Py_MAX(Py_MAX(remaining, min_width), 1);
+        n_zeros = Py_MAX(0, len - remaining);
+        n_chars = Py_MAX(0, Py_MIN(remaining, len));
+
+        /* Use n_zero zero's and n_chars chars */
+        count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
+
+        /* Copy into the writer. */
+        InsertThousandsGrouping_fill(writer, &buffer_pos,
+                                     digits, &digits_pos,
+                                     n_chars, n_zeros,
+                                     use_separator ? thousands_sep : NULL,
+                                     thousands_sep_len, maxchar, forward);
+    }
+    return count;
+}
+
+
  /* Raises an exception about an unknown presentation type for this
   * type. */
  
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 11ba147a74469231dd236e07944a34cafd60bc5d..c71f9d3f71dea5e69ea79880c240de0a9f432fcb 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -104,9 +104,7 @@ NOTE: In the interpreter's initialization phase, some globals are currently
  
  */
  
-// Maximum code point of Unicode 6.0: 0x10ffff (1,114,111).
-// The value must be the same in fileutils.c.
-#define MAX_UNICODE 0x10ffff
+#define MAX_UNICODE _Py_MAX_UNICODE
  
  #ifdef Py_DEBUG
  #  define _PyUnicode_CHECK(op) _PyUnicode_CheckConsistency(op, 0)
@@ -420,39 +418,6 @@ static void clear_global_interned_strings(void)
          return unicode_get_empty();  \
      } while (0)
  
-static inline void
-unicode_fill(int kind, void *data, Py_UCS4 value,
-             Py_ssize_t start, Py_ssize_t length)
-{
-    assert(0 <= start);
-    switch (kind) {
-    case PyUnicode_1BYTE_KIND: {
-        assert(value <= 0xff);
-        Py_UCS1 ch = (unsigned char)value;
-        Py_UCS1 *to = (Py_UCS1 *)data + start;
-        memset(to, ch, length);
-        break;
-    }
-    case PyUnicode_2BYTE_KIND: {
-        assert(value <= 0xffff);
-        Py_UCS2 ch = (Py_UCS2)value;
-        Py_UCS2 *to = (Py_UCS2 *)data + start;
-        const Py_UCS2 *end = to + length;
-        for (; to < end; ++to) *to = ch;
-        break;
-    }
-    case PyUnicode_4BYTE_KIND: {
-        assert(value <= MAX_UNICODE);
-        Py_UCS4 ch = value;
-        Py_UCS4 * to = (Py_UCS4 *)data + start;
-        const Py_UCS4 *end = to + length;
-        for (; to < end; ++to) *to = ch;
-        break;
-    }
-    default: Py_UNREACHABLE();
-    }
-}
-
  
  /* Fast detection of the most frequent whitespace characters */
  const unsigned char _Py_ascii_whitespace[] = {
@@ -9735,142 +9700,6 @@ any_find_slice(PyObject* s1, PyObject* s2,
      return result;
  }
  
-/* _PyUnicode_InsertThousandsGrouping() helper functions */
-#include "stringlib/localeutil.h"
-
-/**
- * InsertThousandsGrouping:
- * @writer: Unicode writer.
- * @n_buffer: Number of characters in @buffer.
- * @digits: Digits we're reading from. If count is non-NULL, this is unused.
- * @d_pos: Start of digits string.
- * @n_digits: The number of digits in the string, in which we want
- *            to put the grouping chars.
- * @min_width: The minimum width of the digits in the output string.
- *             Output will be zero-padded on the left to fill.
- * @grouping: see definition in localeconv().
- * @thousands_sep: see definition in localeconv().
- *
- * There are 2 modes: counting and filling. If @writer is NULL,
- *  we are in counting mode, else filling mode.
- * If counting, the required buffer size is returned.
- * If filling, we know the buffer will be large enough, so we don't
- *  need to pass in the buffer size.
- * Inserts thousand grouping characters (as defined by grouping and
- *  thousands_sep) into @writer.
- *
- * Return value: -1 on error, number of characters otherwise.
- **/
-Py_ssize_t
-_PyUnicode_InsertThousandsGrouping(
-    _PyUnicodeWriter *writer,
-    Py_ssize_t n_buffer,
-    PyObject *digits,
-    Py_ssize_t d_pos,
-    Py_ssize_t n_digits,
-    Py_ssize_t min_width,
-    const char *grouping,
-    PyObject *thousands_sep,
-    Py_UCS4 *maxchar,
-    int forward)
-{
-    min_width = Py_MAX(0, min_width);
-    if (writer) {
-        assert(digits != NULL);
-        assert(maxchar == NULL);
-    }
-    else {
-        assert(digits == NULL);
-        assert(maxchar != NULL);
-    }
-    assert(0 <= d_pos);
-    assert(0 <= n_digits);
-    assert(grouping != NULL);
-
-    Py_ssize_t count = 0;
-    Py_ssize_t n_zeros;
-    int loop_broken = 0;
-    int use_separator = 0; /* First time through, don't append the
-                              separator. They only go between
-                              groups. */
-    Py_ssize_t buffer_pos;
-    Py_ssize_t digits_pos;
-    Py_ssize_t len;
-    Py_ssize_t n_chars;
-    Py_ssize_t remaining = n_digits; /* Number of chars remaining to
-                                        be looked at */
-    /* A generator that returns all of the grouping widths, until it
-       returns 0. */
-    GroupGenerator groupgen;
-    GroupGenerator_init(&groupgen, grouping);
-    const Py_ssize_t thousands_sep_len = PyUnicode_GET_LENGTH(thousands_sep);
-
-    /* if digits are not grouped, thousands separator
-       should be an empty string */
-    assert(!(grouping[0] == CHAR_MAX && thousands_sep_len != 0));
-
-    digits_pos = d_pos + (forward ? 0 : n_digits);
-    if (writer) {
-        buffer_pos = writer->pos + (forward ? 0 : n_buffer);
-        assert(buffer_pos <= PyUnicode_GET_LENGTH(writer->buffer));
-        assert(digits_pos <= PyUnicode_GET_LENGTH(digits));
-    }
-    else {
-        buffer_pos = forward ? 0 : n_buffer;
-    }
-
-    if (!writer) {
-        *maxchar = 127;
-    }
-
-    while ((len = GroupGenerator_next(&groupgen)) > 0) {
-        len = Py_MIN(len, Py_MAX(Py_MAX(remaining, min_width), 1));
-        n_zeros = Py_MAX(0, len - remaining);
-        n_chars = Py_MAX(0, Py_MIN(remaining, len));
-
-        /* Use n_zero zero's and n_chars chars */
-
-        /* Count only, don't do anything. */
-        count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
-
-        /* Copy into the writer. */
-        InsertThousandsGrouping_fill(writer, &buffer_pos,
-                                     digits, &digits_pos,
-                                     n_chars, n_zeros,
-                                     use_separator ? thousands_sep : NULL,
-                                     thousands_sep_len, maxchar, forward);
-
-        /* Use a separator next time. */
-        use_separator = 1;
-
-        remaining -= n_chars;
-        min_width -= len;
-
-        if (remaining <= 0 && min_width <= 0) {
-            loop_broken = 1;
-            break;
-        }
-        min_width -= thousands_sep_len;
-    }
-    if (!loop_broken) {
-        /* We left the loop without using a break statement. */
-
-        len = Py_MAX(Py_MAX(remaining, min_width), 1);
-        n_zeros = Py_MAX(0, len - remaining);
-        n_chars = Py_MAX(0, Py_MIN(remaining, len));
-
-        /* Use n_zero zero's and n_chars chars */
-        count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
-
-        /* Copy into the writer. */
-        InsertThousandsGrouping_fill(writer, &buffer_pos,
-                                     digits, &digits_pos,
-                                     n_chars, n_zeros,
-                                     use_separator ? thousands_sep : NULL,
-                                     thousands_sep_len, maxchar, forward);
-    }
-    return count;
-}
  
  Py_ssize_t
  PyUnicode_Count(PyObject *str,
@@ -10427,7 +10256,7 @@ _PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
      assert(fill_char <= PyUnicode_MAX_CHAR_VALUE(unicode));
      assert(start >= 0);
      assert(start + length <= PyUnicode_GET_LENGTH(unicode));
-    unicode_fill(kind, data, fill_char, start, length);
+    _PyUnicode_Fill(kind, data, fill_char, start, length);
  }
  
  Py_ssize_t
@@ -10496,9 +10325,10 @@ pad(PyObject *self,
      kind = PyUnicode_KIND(u);
      data = PyUnicode_DATA(u);
      if (left)
-        unicode_fill(kind, data, fill, 0, left);
+        _PyUnicode_Fill(kind, data, fill, 0, left);
      if (right)
-        unicode_fill(kind, data, fill, left + _PyUnicode_LENGTH(self), right);
+        _PyUnicode_Fill(kind, data, fill,
+                        left + _PyUnicode_LENGTH(self), right);
      _PyUnicode_FastCopyCharacters(u, left, self, 0, _PyUnicode_LENGTH(self));
      assert(_PyUnicode_CheckConsistency(u, 1));
      return u;
@@ -11910,7 +11740,7 @@ unicode_expandtabs_impl(PyObject *self, int tabsize)
              if (tabsize > 0) {
                  incr = tabsize - (line_pos % tabsize);
                  line_pos += incr;
-                unicode_fill(kind, dest_data, ' ', j, incr);
+                _PyUnicode_Fill(kind, dest_data, ' ', j, incr);
                  j += incr;
              }
          }
@@ -15405,7 +15235,7 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx,
      /* Pad left with the fill character if needed */
      if (arg->width > len && !(arg->flags & F_LJUST)) {
          sublen = arg->width - len;
-        unicode_fill(writer->kind, writer->data, fill, writer->pos, sublen);
+        _PyUnicode_Fill(writer->kind, writer->data, fill, writer->pos, sublen);
          writer->pos += sublen;
          arg->width = len;
      }
@@ -15437,7 +15267,7 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx,
      /* Pad right with the fill character if needed */
      if (arg->width > len) {
          sublen = arg->width - len;
-        unicode_fill(writer->kind, writer->data, ' ', writer->pos, sublen);
+        _PyUnicode_Fill(writer->kind, writer->data, ' ', writer->pos, sublen);
          writer->pos += sublen;
      }
      return 0;
diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj

index 5ceddf759b8f3bc124ca33cd812169c90df96f6f..c4a11fa9b242bdfb6f6edc62bd827886ba09f222 100644 (file)
--- a/PCbuild/_freeze_module.vcxproj
+++ b/PCbuild/_freeze_module.vcxproj
@@ -165,6 +165,7 @@
      <ClCompile Include="..\Objects\typeobject.c" />
      <ClCompile Include="..\Objects\typevarobject.c" />
      <ClCompile Include="..\Objects\unicodectype.c" />
+    <ClCompile Include="..\Objects\unicode_formatter.c" />
      <ClCompile Include="..\Objects\unicodeobject.c" />
      <ClCompile Include="..\Objects\unionobject.c" />
      <ClCompile Include="..\Objects\weakrefobject.c" />
@@ -209,7 +210,6 @@
      <ClCompile Include="..\Python\errors.c" />
      <ClCompile Include="..\Python\fileutils.c" />
      <ClCompile Include="..\Python\flowgraph.c" />
-    <ClCompile Include="..\Python\formatter_unicode.c" />
      <ClCompile Include="..\Python\frame.c" />
      <ClCompile Include="..\Python\future.c" />
      <ClCompile Include="..\Python\gc.c" />
diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters

index 332d466b1f7409b0039ec94cd0cc347e4ed4cc82..7bbbec2c9887bf37bae656c06b62d143cd84d909 100644 (file)
--- a/PCbuild/_freeze_module.vcxproj.filters
+++ b/PCbuild/_freeze_module.vcxproj.filters
@@ -160,9 +160,6 @@
      <ClCompile Include="..\Python\flowgraph.c">
        <Filter>Source Files</Filter>
      </ClCompile>
-    <ClCompile Include="..\Python\formatter_unicode.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
      <ClCompile Include="..\Python\frame.c">
        <Filter>Source Files</Filter>
      </ClCompile>
@@ -487,6 +484,9 @@
      <ClCompile Include="..\Objects\unicodectype.c">
        <Filter>Source Files</Filter>
      </ClCompile>
+    <ClCompile Include="..\Objects\unicode_formatter.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
      <ClCompile Include="..\Objects\unicodeobject.c">
        <Filter>Source Files</Filter>
      </ClCompile>
diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj

index 71f508a7e8b5fbd63f806d55774f063249adc4a6..e2e1e415827e6f6d37403979d9fafef893c9a9dc 100644 (file)
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -559,6 +559,7 @@
      <ClCompile Include="..\Objects\typeobject.c" />
      <ClCompile Include="..\Objects\typevarobject.c" />
      <ClCompile Include="..\Objects\unicodectype.c" />
+    <ClCompile Include="..\Objects\unicode_formatter.c" />
      <ClCompile Include="..\Objects\unicodeobject.c" />
      <ClCompile Include="..\Objects\unionobject.c" />
      <ClCompile Include="..\Objects\weakrefobject.c" />
@@ -605,7 +606,6 @@
      <ClCompile Include="..\Python\errors.c" />
      <ClCompile Include="..\Python\fileutils.c" />
      <ClCompile Include="..\Python\flowgraph.c" />
-    <ClCompile Include="..\Python\formatter_unicode.c" />
      <ClCompile Include="..\Python\frame.c" />
      <ClCompile Include="..\Python\frozen.c">
        <AdditionalIncludeDirectories>$(GeneratedFrozenModulesDir)Python;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters

index 547e9ee1abf3704f76b16d0d6ecb506c6139d7f0..7e7ed9c2ae6c4393936cd0b3389cbc9de301e261 100644 (file)
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -1274,6 +1274,9 @@
      <ClCompile Include="..\Objects\unicodectype.c">
        <Filter>Objects</Filter>
      </ClCompile>
+    <ClCompile Include="..\Objects\unicode_formatter.c">
+      <Filter>Objects</Filter>
+    </ClCompile>
      <ClCompile Include="..\Objects\unicodeobject.c">
        <Filter>Objects</Filter>
      </ClCompile>
@@ -1382,9 +1385,6 @@
      <ClCompile Include="..\Python\flowgraph.c">
        <Filter>Python</Filter>
      </ClCompile>
-    <ClCompile Include="..\Python\formatter_unicode.c">
-      <Filter>Python</Filter>
-    </ClCompile>
      <ClCompile Include="..\Python\frozen.c">
        <Filter>Python</Filter>
      </ClCompile>
diff --git a/Python/fileutils.c b/Python/fileutils.c

index 2a3f12d4e872f89205f86d1e6101466d1948c58d..b808229716fd9ca6a0fb201a579f104a6fed89ff 100644 (file)
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -2,6 +2,7 @@
  #include "pycore_fileutils.h"     // fileutils definitions
  #include "pycore_runtime.h"       // _PyRuntime
  #include "pycore_pystate.h"       // _Py_AssertHoldsTstate()
+#include "pycore_unicodeobject.h" // _Py_MAX_UNICODE
  #include "osdefs.h"               // SEP
  
  #include <stdlib.h>               // mbstowcs()
@@ -50,9 +51,6 @@ extern int winerror_to_errno(int);
  int _Py_open_cloexec_works = -1;
  #endif
  
-// The value must be the same in unicodeobject.c.
-#define MAX_UNICODE 0x10ffff
-
  // mbstowcs() and mbrtowc() errors
  static const size_t DECODE_ERROR = ((size_t)-1);
  #ifdef HAVE_MBRTOWC
@@ -123,7 +121,7 @@ is_valid_wide_char(wchar_t ch)
  {
  #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
      /* Oracle Solaris doesn't use Unicode code points as wchar_t encoding
-       for non-Unicode locales, which makes values higher than MAX_UNICODE
+       for non-Unicode locales, which makes values higher than _Py_MAX_UNICODE
         possibly valid. */
      return 1;
  #endif
@@ -132,7 +130,7 @@ is_valid_wide_char(wchar_t ch)
          return 0;
      }
  #if SIZEOF_WCHAR_T > 2
-    if (ch > MAX_UNICODE) {
+    if (ch > _Py_MAX_UNICODE) {
          // bpo-35883: Reject characters outside [U+0000; U+10ffff] range.
          // The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629,
          // it creates characters outside the [U+0000; U+10ffff] range:
author	Victor Stinner <vstinner@python.org>
	Wed, 8 Oct 2025 12:56:00 +0000 (14:56 +0200)
committer	GitHub <noreply@github.com>
	Wed, 8 Oct 2025 12:56:00 +0000 (14:56 +0200)
Include/internal/pycore_unicodeobject.h		patch \| blob \| blame \| history
Makefile.pre.in		patch \| blob \| blame \| history
Objects/stringlib/localeutil.h	[deleted file]	patch \| blob \| blame \| history
Objects/unicode_formatter.c	[moved from Python/formatter_unicode.c with 88% similarity]	patch \| blob \| blame \| history
Objects/unicodeobject.c		patch \| blob \| blame \| history
PCbuild/_freeze_module.vcxproj		patch \| blob \| blame \| history
PCbuild/_freeze_module.vcxproj.filters		patch \| blob \| blame \| history
PCbuild/pythoncore.vcxproj		patch \| blob \| blame \| history
PCbuild/pythoncore.vcxproj.filters		patch \| blob \| blame \| history
Python/fileutils.c		patch \| blob \| blame \| history