]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
bpo-1635741: _PyUnicode_Name_CAPI moves to internal C API (GH-22713)
authorVictor Stinner <vstinner@python.org>
Mon, 26 Oct 2020 15:43:47 +0000 (16:43 +0100)
committerGitHub <noreply@github.com>
Mon, 26 Oct 2020 15:43:47 +0000 (16:43 +0100)
The private _PyUnicode_Name_CAPI structure of the PyCapsule API
unicodedata.ucnhash_CAPI moves to the internal C API. Moreover, the
structure gets a new state member which must be passed to the
getcode() and getname() functions.

* Move Include/ucnhash.h to Include/internal/pycore_ucnhash.h
* unicodedata module is now built with Py_BUILD_CORE_MODULE.
* unicodedata: move hashAPI variable into unicodedata_module_state.

Doc/whatsnew/3.10.rst
Include/internal/pycore_ucnhash.h [moved from Include/ucnhash.h with 60% similarity]
Makefile.pre.in
Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-1635741.e3BcPM.rst [new file with mode: 0644]
Modules/Setup
Modules/unicodedata.c
Objects/unicodeobject.c
PCbuild/pythoncore.vcxproj
PCbuild/pythoncore.vcxproj.filters
Python/codecs.c
setup.py

index 6206c949cc59bbe3fca5f4cd1795953a78095484..581d3a57e84573845f1cde8a494340f4b2e8160d 100644 (file)
@@ -407,6 +407,12 @@ Porting to Python 3.10
   Unicode object without initial data.
   (Contributed by Inada Naoki in :issue:`36346`.)
 
+* The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API
+  ``unicodedata.ucnhash_CAPI`` moves to the internal C API. Moreover,
+  the structure gets a new ``state`` member which must be passed to the
+  ``getcode()`` and ``getname()`` functions.
+  (Contributed by Victor Stinner in :issue:`1635741`.)
+
 Deprecated
 ----------
 
similarity index 60%
rename from Include/ucnhash.h
rename to Include/internal/pycore_ucnhash.h
index 45362e997dfa118f30d2c5bb4406105f744ce53b..380b9415d4280077a3c0bf0fe0bacb82734c8378 100644 (file)
@@ -1,11 +1,14 @@
 /* Unicode name database interface */
-#ifndef Py_LIMITED_API
-#ifndef Py_UCNHASH_H
-#define Py_UCNHASH_H
+#ifndef Py_INTERNAL_UCNHASH_H
+#define Py_INTERNAL_UCNHASH_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+#ifndef Py_BUILD_CORE
+#  error "this header requires Py_BUILD_CORE define"
+#endif
+
 /* revised ucnhash CAPI interface (exported through a "wrapper") */
 
 #define PyUnicodeData_CAPSULE_NAME "unicodedata.ucnhash_CAPI"
@@ -15,16 +18,22 @@ typedef struct {
     /* Size of this struct */
     int size;
 
+    // state which must be passed as the first parameter to getname()
+    // and getcode()
+    void *state;
+
     /* Get name for a given character code.  Returns non-zero if
        success, zero if not.  Does not set Python exceptions.
        If self is NULL, data come from the default version of the database.
        If it is not NULL, it should be a unicodedata.ucd_X_Y_Z object */
-    int (*getname)(PyObject *self, Py_UCS4 code, char* buffer, int buflen,
+    int (*getname)(void *state, PyObject *self, Py_UCS4 code,
+                   char* buffer, int buflen,
                    int with_alias_and_seq);
 
     /* Get character code for a given name.  Same error handling
        as for getname. */
-    int (*getcode)(PyObject *self, const char* name, int namelen, Py_UCS4* code,
+    int (*getcode)(void *state, PyObject *self,
+                   const char* name, int namelen, Py_UCS4* code,
                    int with_named_seq);
 
 } _PyUnicode_Name_CAPI;
@@ -32,5 +41,4 @@ typedef struct {
 #ifdef __cplusplus
 }
 #endif
-#endif /* !Py_UCNHASH_H */
-#endif /* !Py_LIMITED_API */
+#endif /* !Py_INTERNAL_UCNHASH_H */
index 921bd08ea505d6c2e58e973aa5a4de5f46e2dfdd..fe226ce45d8e9cbe6e17010c0496314643df60e7 100644 (file)
@@ -1065,7 +1065,6 @@ PYTHON_HEADERS= \
                $(srcdir)/Include/traceback.h \
                $(srcdir)/Include/tracemalloc.h \
                $(srcdir)/Include/tupleobject.h \
-               $(srcdir)/Include/ucnhash.h \
                $(srcdir)/Include/unicodeobject.h \
                $(srcdir)/Include/warnings.h \
                $(srcdir)/Include/weakrefobject.h \
@@ -1129,6 +1128,7 @@ PYTHON_HEADERS= \
                $(srcdir)/Include/internal/pycore_sysmodule.h \
                $(srcdir)/Include/internal/pycore_traceback.h \
                $(srcdir)/Include/internal/pycore_tuple.h \
+               $(srcdir)/Include/internal/pycore_ucnhash.h \
                $(srcdir)/Include/internal/pycore_unionobject.h \
                $(srcdir)/Include/internal/pycore_warnings.h \
                $(DTRACE_HEADERS)
diff --git a/Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-1635741.e3BcPM.rst b/Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-1635741.e3BcPM.rst
new file mode 100644 (file)
index 0000000..5272ad5
--- /dev/null
@@ -0,0 +1,4 @@
+The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API
+``unicodedata.ucnhash_CAPI`` moves to the internal C API. Moreover, the
+structure gets a new ``state`` member which must be passed to the
+``getcode()`` and ``getname()`` functions. Patch by Victor Stinner.
index 87f3a7cb43a0257bcba8ffc15dc70aca7458736c..6f9bb813cef273283024f718e93dc83d70a6e166 100644 (file)
@@ -185,7 +185,7 @@ _symtable symtablemodule.c
 #_json -I$(srcdir)/Include/internal -DPy_BUILD_CORE_BUILTIN _json.c    # _json speedups
 #_statistics _statisticsmodule.c # statistics accelerator
 
-#unicodedata unicodedata.c    # static Unicode character database
+#unicodedata unicodedata.c -DPy_BUILD_CORE_BUILTIN   # static Unicode character database
 
 
 # Modules with some UNIX dependencies -- on by default:
index 941fd2faa742a60542c7e57ae847ceb3fd01678e..bfd8ab503c8ccdf934aba2d4dec600fe61910d38 100644 (file)
@@ -16,7 +16,7 @@
 #define PY_SSIZE_T_CLEAN
 
 #include "Python.h"
-#include "ucnhash.h"
+#include "pycore_ucnhash.h"       // _PyUnicode_Name_CAPI
 #include "structmember.h"         // PyMemberDef
 
 #include <stdbool.h>
@@ -97,6 +97,8 @@ typedef struct {
     // Borrowed reference to &UCD_Type. It is used to prepare the code
     // to convert the UCD_Type static type to a heap type.
     PyTypeObject *ucd_type;
+
+    _PyUnicode_Name_CAPI capi;
 } unicodedata_module_state;
 
 // bpo-1635741: Temporary global state until the unicodedata module
@@ -1180,10 +1182,11 @@ _getucname(unicodedata_module_state *state, PyObject *self,
 }
 
 static int
-capi_getucname(PyObject *self, Py_UCS4 code, char* buffer, int buflen,
+capi_getucname(void *state_raw, PyObject *self, Py_UCS4 code,
+               char* buffer, int buflen,
                int with_alias_and_seq)
 {
-    unicodedata_module_state *state = &global_module_state;
+    unicodedata_module_state *state = (unicodedata_module_state *)state_raw;
     return _getucname(state, self, code, buffer, buflen, with_alias_and_seq);
 
 }
@@ -1323,21 +1326,15 @@ _getcode(unicodedata_module_state *state, PyObject* self,
 }
 
 static int
-capi_getcode(PyObject* self, const char* name, int namelen, Py_UCS4* code,
+capi_getcode(void *state_raw, PyObject* self,
+             const char* name, int namelen, Py_UCS4* code,
              int with_named_seq)
 {
-    unicodedata_module_state *state = &global_module_state;
+    unicodedata_module_state *state = (unicodedata_module_state *)state_raw;
     return _getcode(state, self, name, namelen, code, with_named_seq);
 
 }
 
-static const _PyUnicode_Name_CAPI hashAPI =
-{
-    sizeof(_PyUnicode_Name_CAPI),
-    capi_getucname,
-    capi_getcode
-};
-
 /* -------------------------------------------------------------------- */
 /* Python bindings */
 
@@ -1510,6 +1507,11 @@ PyInit_unicodedata(void)
     PyObject *m, *v;
     unicodedata_module_state *state = &global_module_state;
 
+    state->capi.size = sizeof(_PyUnicode_Name_CAPI);
+    state->capi.state = state;
+    state->capi.getname = capi_getucname;
+    state->capi.getcode = capi_getcode;
+
     Py_SET_TYPE(&UCD_Type, &PyType_Type);
     state->ucd_type = &UCD_Type;
 
@@ -1528,7 +1530,7 @@ PyInit_unicodedata(void)
         PyModule_AddObject(m, "ucd_3_2_0", v);
 
     /* Export C API */
-    v = PyCapsule_New((void *)&hashAPI, PyUnicodeData_CAPSULE_NAME, NULL);
+    v = PyCapsule_New((void *)&state->capi, PyUnicodeData_CAPSULE_NAME, NULL);
     if (v != NULL)
         PyModule_AddObject(m, "ucnhash_CAPI", v);
     return m;
index f963deb0201a48f0de7fac0477c2bc5ebe936b3e..ba48d35aa40b190677e1e18309912845be8b0d1c 100644 (file)
@@ -40,16 +40,16 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
 #define PY_SSIZE_T_CLEAN
 #include "Python.h"
-#include "pycore_abstract.h"       // _PyIndex_Check()
-#include "pycore_bytes_methods.h"  // _Py_bytes_lower()
-#include "pycore_initconfig.h"     // _PyStatus_OK()
-#include "pycore_interp.h"         // PyInterpreterState.fs_codec
-#include "pycore_object.h"         // _PyObject_GC_TRACK()
-#include "pycore_pathconfig.h"     // _Py_DumpPathConfig()
-#include "pycore_pylifecycle.h"    // _Py_SetFileSystemEncoding()
-#include "pycore_pystate.h"        // _PyInterpreterState_GET()
-#include "ucnhash.h"               // _PyUnicode_Name_CAPI
-#include "stringlib/eq.h"          // unicode_eq()
+#include "pycore_abstract.h"      // _PyIndex_Check()
+#include "pycore_bytes_methods.h" // _Py_bytes_lower()
+#include "pycore_initconfig.h"    // _PyStatus_OK()
+#include "pycore_interp.h"        // PyInterpreterState.fs_codec
+#include "pycore_object.h"        // _PyObject_GC_TRACK()
+#include "pycore_pathconfig.h"    // _Py_DumpPathConfig()
+#include "pycore_pylifecycle.h"   // _Py_SetFileSystemEncoding()
+#include "pycore_pystate.h"       // _PyInterpreterState_GET()
+#include "pycore_ucnhash.h"       // _PyUnicode_Name_CAPI
+#include "stringlib/eq.h"         // unicode_eq()
 
 #ifdef MS_WINDOWS
 #include <windows.h>
@@ -6344,7 +6344,7 @@ PyUnicode_AsUTF16String(PyObject *unicode)
 
 /* --- Unicode Escape Codec ----------------------------------------------- */
 
-static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
+static _PyUnicode_Name_CAPI *ucnhash_capi = NULL;
 
 PyObject *
 _PyUnicode_DecodeUnicodeEscape(const char *s,
@@ -6497,11 +6497,11 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
 
             /* \N{name} */
         case 'N':
-            if (ucnhash_CAPI == NULL) {
+            if (ucnhash_capi == NULL) {
                 /* load the unicode data module */
-                ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
+                ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
                                                 PyUnicodeData_CAPSULE_NAME, 1);
-                if (ucnhash_CAPI == NULL) {
+                if (ucnhash_capi == NULL) {
                     PyErr_SetString(
                         PyExc_UnicodeError,
                         "\\N escapes not supported (can't load unicodedata module)"
@@ -6523,7 +6523,8 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
                     s++;
                     ch = 0xffffffff; /* in case 'getcode' messes up */
                     if (namelen <= INT_MAX &&
-                        ucnhash_CAPI->getcode(NULL, start, (int)namelen,
+                        ucnhash_capi->getcode(ucnhash_capi->state, NULL,
+                                              start, (int)namelen,
                                               &ch, 0)) {
                         assert(ch <= MAX_UNICODE);
                         WRITE_CHAR(ch);
index 266a193c1e86a33f46e5fd0f486d9b049143bdd8..600f33b2c6e3b5e347b944f5265ce6ac837c40e0 100644 (file)
     <ClInclude Include="..\Include\internal\pycore_sysmodule.h" />
     <ClInclude Include="..\Include\internal\pycore_traceback.h" />
     <ClInclude Include="..\Include\internal\pycore_tuple.h" />
+    <ClInclude Include="..\Include\internal\pycore_ucnhash.h" />
     <ClInclude Include="..\Include\internal\pycore_unionobject.h" />
     <ClInclude Include="..\Include\internal\pycore_warnings.h" />
     <ClInclude Include="..\Include\interpreteridobject.h" />
     <ClInclude Include="..\Include\traceback.h" />
     <ClInclude Include="..\Include\tracemalloc.h" />
     <ClInclude Include="..\Include\tupleobject.h" />
-    <ClInclude Include="..\Include\ucnhash.h" />
     <ClInclude Include="..\Include\unicodeobject.h" />
     <ClInclude Include="..\Include\weakrefobject.h" />
     <ClInclude Include="..\Modules\_math.h" />
index 22d9b791576987483e0bd53a7f8e0991ea259500..75b91d8ed87f948fd9cae9a913148117472e0f91 100644 (file)
     <ClInclude Include="..\Include\tupleobject.h">
       <Filter>Include</Filter>
     </ClInclude>
-    <ClInclude Include="..\Include\ucnhash.h">
-      <Filter>Include</Filter>
-    </ClInclude>
     <ClInclude Include="..\Include\unicodeobject.h">
       <Filter>Include</Filter>
     </ClInclude>
     <ClInclude Include="..\Include\internal\pycore_tuple.h">
       <Filter>Include\internal</Filter>
     </ClInclude>
+    <ClInclude Include="..\Include\internal\pycore_ucnhash.h">
+      <Filter>Include\internal</Filter>
+    </ClInclude>
     <ClInclude Include="..\Include\internal\pycore_unionobject.h">
       <Filter>Include\internal</Filter>
     </ClInclude>
index ade14187204f09ab771c9e4a29221bee35aa4cfd..62d1f3f3ac0d3c33d743435679fb6dd3b6c37ec1 100644 (file)
@@ -11,7 +11,7 @@ Copyright (c) Corporation for National Research Initiatives.
 #include "Python.h"
 #include "pycore_interp.h"        // PyInterpreterState.codec_search_path
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
-#include "ucnhash.h"
+#include "pycore_ucnhash.h"       // _PyUnicode_Name_CAPI
 #include <ctype.h>
 
 const char *Py_hexdigits = "0123456789abcdef";
@@ -954,7 +954,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
     return Py_BuildValue("(Nn)", res, end);
 }
 
-static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
+static _PyUnicode_Name_CAPI *ucnhash_capi = NULL;
 
 PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
 {
@@ -976,17 +976,19 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
             return NULL;
         if (!(object = PyUnicodeEncodeError_GetObject(exc)))
             return NULL;
-        if (!ucnhash_CAPI) {
+        if (!ucnhash_capi) {
             /* load the unicode data module */
-            ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
+            ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
                                             PyUnicodeData_CAPSULE_NAME, 1);
-            if (!ucnhash_CAPI)
+            if (!ucnhash_capi) {
                 return NULL;
+            }
         }
         for (i = start, ressize = 0; i < end; ++i) {
             /* object is guaranteed to be "ready" */
             c = PyUnicode_READ_CHAR(object, i);
-            if (ucnhash_CAPI->getname(NULL, c, buffer, sizeof(buffer), 1)) {
+            if (ucnhash_capi->getname(ucnhash_capi->state, NULL,
+                                      c, buffer, sizeof(buffer), 1)) {
                 replsize = 1+1+1+(int)strlen(buffer)+1;
             }
             else if (c >= 0x10000) {
@@ -1009,7 +1011,8 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
             i < end; ++i) {
             c = PyUnicode_READ_CHAR(object, i);
             *outp++ = '\\';
-            if (ucnhash_CAPI->getname(NULL, c, buffer, sizeof(buffer), 1)) {
+            if (ucnhash_capi->getname(ucnhash_capi->state, NULL,
+                                      c, buffer, sizeof(buffer), 1)) {
                 *outp++ = 'N';
                 *outp++ = '{';
                 strcpy((char *)outp, buffer);
index d3fd7bca6438afd327cb85a956771b141ad8139d..8a4abe5a648fd931ff261c3fbfad647caf404c37 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -878,7 +878,8 @@ class PyBuildExt(build_ext):
         self.add(Extension('_lsprof', ['_lsprof.c', 'rotatingtree.c']))
         # static Unicode character database
         self.add(Extension('unicodedata', ['unicodedata.c'],
-                           depends=['unicodedata_db.h', 'unicodename_db.h']))
+                           depends=['unicodedata_db.h', 'unicodename_db.h'],
+                           extra_compile_args=['-DPy_BUILD_CORE_MODULE']))
         # _opcode module
         self.add(Extension('_opcode', ['_opcode.c']))
         # asyncio speedups