gh-106320: Remove private _PyUnicode codecs C API functions (#106385)

author Victor Stinner <vstinner@python.org>

Tue, 4 Jul 2023 07:29:52 +0000 (09:29 +0200)

committer GitHub <noreply@github.com>

Tue, 4 Jul 2023 07:29:52 +0000 (07:29 +0000)
author Victor Stinner <vstinner@python.org>
Tue, 4 Jul 2023 07:29:52 +0000 (09:29 +0200)
committer GitHub <noreply@github.com>
Tue, 4 Jul 2023 07:29:52 +0000 (07:29 +0000)
diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h

index fcd9c28e030c1f96a8fcf9e6fc7f6e01d5a9fde0..dc8f6437c0e2c4e95fd6345d966f7d6d775ee552 100644 (file)
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@@ -461,112 +461,6 @@ PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode);
  
  #define _PyUnicode_AsString PyUnicode_AsUTF8
  
-/* --- UTF-7 Codecs ------------------------------------------------------- */
-
-PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
-    PyObject *unicode,          /* Unicode object */
-    int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
-    int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
-    const char *errors          /* error handling */
-    );
-
-/* --- UTF-8 Codecs ------------------------------------------------------- */
-
-PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
-    PyObject *unicode,
-    const char *errors);
-
-/* --- UTF-32 Codecs ------------------------------------------------------ */
-
-PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
-    PyObject *object,           /* Unicode object */
-    const char *errors,         /* error handling */
-    int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
-    );
-
-/* --- UTF-16 Codecs ------------------------------------------------------ */
-
-/* Returns a Python string object holding the UTF-16 encoded value of
-   the Unicode data.
-
-   If byteorder is not 0, output is written according to the following
-   byte order:
-
-   byteorder == -1: little endian
-   byteorder == 0:  native byte order (writes a BOM mark)
-   byteorder == 1:  big endian
-
-   If byteorder is 0, the output string will always start with the
-   Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
-   prepended.
-*/
-PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
-    PyObject* unicode,          /* Unicode object */
-    const char *errors,         /* error handling */
-    int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
-    );
-
-/* --- Unicode-Escape Codecs ---------------------------------------------- */
-
-/* Variant of PyUnicode_DecodeUnicodeEscape that supports partial decoding. */
-PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeStateful(
-        const char *string,     /* Unicode-Escape encoded string */
-        Py_ssize_t length,      /* size of string */
-        const char *errors,     /* error handling */
-        Py_ssize_t *consumed    /* bytes consumed */
-);
-/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
-   chars. */
-PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
-        const char *string,     /* Unicode-Escape encoded string */
-        Py_ssize_t length,      /* size of string */
-        const char *errors,     /* error handling */
-        Py_ssize_t *consumed,   /* bytes consumed */
-        const char **first_invalid_escape  /* on return, points to first
-                                              invalid escaped char in
-                                              string. */
-);
-
-/* --- Raw-Unicode-Escape Codecs ---------------------------------------------- */
-
-/* Variant of PyUnicode_DecodeRawUnicodeEscape that supports partial decoding. */
-PyAPI_FUNC(PyObject*) _PyUnicode_DecodeRawUnicodeEscapeStateful(
-        const char *string,     /* Unicode-Escape encoded string */
-        Py_ssize_t length,      /* size of string */
-        const char *errors,     /* error handling */
-        Py_ssize_t *consumed    /* bytes consumed */
-);
-
-/* --- Latin-1 Codecs ----------------------------------------------------- */
-
-PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
-    PyObject* unicode,
-    const char* errors);
-
-/* --- ASCII Codecs ------------------------------------------------------- */
-
-PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
-    PyObject* unicode,
-    const char* errors);
-
-/* --- Character Map Codecs ----------------------------------------------- */
-
-/* Translate an Unicode object by applying a character mapping table to
-   it and return the resulting Unicode object.
-
-   The mapping table must map Unicode ordinal integers to Unicode strings,
-   Unicode ordinal integers or None (causing deletion of the character).
-
-   Mapping tables may be dictionaries or sequences. Unmapped character
-   ordinals (ones which cause a LookupError) are left untouched and
-   are copied as-is.
-*/
-PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
-    PyObject *unicode,          /* Unicode object */
-    PyObject *mapping,          /* encoding mapping */
-    const char *errors          /* error handling */
-    );
-
  /* --- Decimal Encoder ---------------------------------------------------- */
  
  /* Coverts a Unicode object holding a decimal value to an ASCII string
diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h

index da01f57f962793f820c95be2cf28825f6dba475e..dd20ac19d413b8a4bff6a08dfab045aa290fe8a4 100644 (file)
--- a/Include/internal/pycore_unicodeobject.h
+++ b/Include/internal/pycore_unicodeobject.h
@@ -177,6 +177,106 @@ PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
      Py_ssize_t start,
      Py_ssize_t end);
  
+/* --- UTF-7 Codecs ------------------------------------------------------- */
+
+extern PyObject* _PyUnicode_EncodeUTF7(
+    PyObject *unicode,          /* Unicode object */
+    int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
+    int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
+    const char *errors);        /* error handling */
+
+/* --- UTF-8 Codecs ------------------------------------------------------- */
+
+PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
+    PyObject *unicode,
+    const char *errors);
+
+/* --- UTF-32 Codecs ------------------------------------------------------ */
+
+PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
+    PyObject *object,           /* Unicode object */
+    const char *errors,         /* error handling */
+    int byteorder);             /* byteorder to use 0=BOM+native;-1=LE,1=BE */
+
+/* --- UTF-16 Codecs ------------------------------------------------------ */
+
+/* Returns a Python string object holding the UTF-16 encoded value of
+   the Unicode data.
+
+   If byteorder is not 0, output is written according to the following
+   byte order:
+
+   byteorder == -1: little endian
+   byteorder == 0:  native byte order (writes a BOM mark)
+   byteorder == 1:  big endian
+
+   If byteorder is 0, the output string will always start with the
+   Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
+   prepended.
+*/
+PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
+    PyObject* unicode,          /* Unicode object */
+    const char *errors,         /* error handling */
+    int byteorder);             /* byteorder to use 0=BOM+native;-1=LE,1=BE */
+
+/* --- Unicode-Escape Codecs ---------------------------------------------- */
+
+/* Variant of PyUnicode_DecodeUnicodeEscape that supports partial decoding. */
+extern PyObject* _PyUnicode_DecodeUnicodeEscapeStateful(
+    const char *string,     /* Unicode-Escape encoded string */
+    Py_ssize_t length,      /* size of string */
+    const char *errors,     /* error handling */
+    Py_ssize_t *consumed);  /* bytes consumed */
+
+/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
+   chars. */
+PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
+    const char *string,     /* Unicode-Escape encoded string */
+    Py_ssize_t length,      /* size of string */
+    const char *errors,     /* error handling */
+    Py_ssize_t *consumed,   /* bytes consumed */
+    const char **first_invalid_escape); /* on return, points to first
+                                           invalid escaped char in
+                                           string. */
+
+/* --- Raw-Unicode-Escape Codecs ---------------------------------------------- */
+
+/* Variant of PyUnicode_DecodeRawUnicodeEscape that supports partial decoding. */
+extern PyObject* _PyUnicode_DecodeRawUnicodeEscapeStateful(
+    const char *string,     /* Unicode-Escape encoded string */
+    Py_ssize_t length,      /* size of string */
+    const char *errors,     /* error handling */
+    Py_ssize_t *consumed);  /* bytes consumed */
+
+/* --- Latin-1 Codecs ----------------------------------------------------- */
+
+extern PyObject* _PyUnicode_AsLatin1String(
+    PyObject* unicode,
+    const char* errors);
+
+/* --- ASCII Codecs ------------------------------------------------------- */
+
+extern PyObject* _PyUnicode_AsASCIIString(
+    PyObject* unicode,
+    const char* errors);
+
+/* --- Character Map Codecs ----------------------------------------------- */
+
+/* Translate an Unicode object by applying a character mapping table to
+   it and return the resulting Unicode object.
+
+   The mapping table must map Unicode ordinal integers to Unicode strings,
+   Unicode ordinal integers or None (causing deletion of the character).
+
+   Mapping tables may be dictionaries or sequences. Unmapped character
+   ordinals (ones which cause a LookupError) are left untouched and
+   are copied as-is.
+*/
+extern PyObject* _PyUnicode_EncodeCharmap(
+    PyObject *unicode,          /* Unicode object */
+    PyObject *mapping,          /* encoding mapping */
+    const char *errors);        /* error handling */
+
  /* --- Methods & Slots ---------------------------------------------------- */
  
  extern PyObject* _PyUnicode_JoinArray(
diff --git a/Parser/string_parser.c b/Parser/string_parser.c

index 20459e8946349496d3d5a8ca77c9fbae8f627cda..bc1f99d607ae4d6c71527c00abc13b862639ae1f 100644 (file)
--- a/Parser/string_parser.c
+++ b/Parser/string_parser.c
@@ -1,6 +1,7 @@
  #include <stdbool.h>
  
  #include <Python.h>
+#include "pycore_unicodeobject.h" // _PyUnicode_DecodeUnicodeEscapeInternal()
  
  #include "tokenizer.h"
  #include "pegen.h"
author	Victor Stinner <vstinner@python.org>
	Tue, 4 Jul 2023 07:29:52 +0000 (09:29 +0200)
committer	GitHub <noreply@github.com>
	Tue, 4 Jul 2023 07:29:52 +0000 (07:29 +0000)
Include/cpython/unicodeobject.h		patch \| blob \| blame \| history
Include/internal/pycore_unicodeobject.h		patch \| blob \| blame \| history
Parser/string_parser.c		patch \| blob \| blame \| history