]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-103583: Add codecs and maps to _codecs_* module state (#103540)
authorErlend E. Aasland <erlend.aasland@protonmail.com>
Mon, 17 Apr 2023 00:41:25 +0000 (02:41 +0200)
committerGitHub <noreply@github.com>
Mon, 17 Apr 2023 00:41:25 +0000 (02:41 +0200)
Modules/cjkcodecs/_codecs_cn.c
Modules/cjkcodecs/_codecs_hk.c
Modules/cjkcodecs/_codecs_iso2022.c
Modules/cjkcodecs/_codecs_jp.c
Modules/cjkcodecs/_codecs_kr.c
Modules/cjkcodecs/_codecs_tw.c
Modules/cjkcodecs/cjkcodecs.h
Modules/cjkcodecs/multibytecodec.c
Tools/c-analyzer/cpython/globals-to-fix.tsv

index 8a62f7e257c6b19c6116f3e396772a50fe689422..e2c7908c9bb2753cddef80b34afd0f1e9a9fc405 100644 (file)
@@ -453,14 +453,14 @@ DECODER(hz)
 }
 
 
-BEGIN_MAPPINGS_LIST
+BEGIN_MAPPINGS_LIST(4)
   MAPPING_DECONLY(gb2312)
   MAPPING_DECONLY(gbkext)
   MAPPING_ENCONLY(gbcommon)
   MAPPING_ENCDEC(gb18030ext)
 END_MAPPINGS_LIST
 
-BEGIN_CODECS_LIST
+BEGIN_CODECS_LIST(4)
   CODEC_STATELESS(gb2312)
   CODEC_STATELESS(gbk)
   CODEC_STATELESS(gb18030)
index 4f21569a0ce73fc9c26baf53e85e906ea413dccc..43593b873733e65c36af906d9924b223d60c8448 100644 (file)
@@ -177,14 +177,13 @@ DECODER(big5hkscs)
     return 0;
 }
 
-
-BEGIN_MAPPINGS_LIST
+BEGIN_MAPPINGS_LIST(3)
   MAPPING_DECONLY(big5hkscs)
   MAPPING_ENCONLY(big5hkscs_bmp)
   MAPPING_ENCONLY(big5hkscs_nonbmp)
 END_MAPPINGS_LIST
 
-BEGIN_CODECS_LIST
+BEGIN_CODECS_LIST(1)
   CODEC_STATELESS_WINIT(big5hkscs)
 END_CODECS_LIST
 
index 7394cf67e0e7dd561f4c7c99d101ce5768fcec52..cf34752e16a52757c961dc6c07f45d4ddc219804 100644 (file)
@@ -1119,18 +1119,19 @@ static const struct iso2022_designation iso2022_jp_ext_designations[] = {
 CONFIGDEF(jp_ext, NO_SHIFT | USE_JISX0208_EXT)
 
 
-BEGIN_MAPPINGS_LIST
+BEGIN_MAPPINGS_LIST(0)
   /* no mapping table here */
 END_MAPPINGS_LIST
 
-#define ISO2022_CODEC(variation) {              \
+#define ISO2022_CODEC(variation)                \
+NEXT_CODEC = (MultibyteCodec){                  \
     "iso2022_" #variation,                      \
     &iso2022_##variation##_config,              \
     iso2022_codec_init,                         \
     _STATEFUL_METHODS(iso2022)                  \
-},
+};
 
-BEGIN_CODECS_LIST
+BEGIN_CODECS_LIST(7)
   ISO2022_CODEC(kr)
   ISO2022_CODEC(jp)
   ISO2022_CODEC(jp_1)
index 3a332953b957cbdd968d8d293a6ed4a1afa47905..7a8b78a23592eacff699182c5b6421edc71abb7a 100644 (file)
@@ -733,7 +733,7 @@ DECODER(shift_jis_2004)
 }
 
 
-BEGIN_MAPPINGS_LIST
+BEGIN_MAPPINGS_LIST(11)
   MAPPING_DECONLY(jisx0208)
   MAPPING_DECONLY(jisx0212)
   MAPPING_ENCONLY(jisxcommon)
@@ -747,14 +747,19 @@ BEGIN_MAPPINGS_LIST
   MAPPING_ENCDEC(cp932ext)
 END_MAPPINGS_LIST
 
-BEGIN_CODECS_LIST
+#define CODEC_CUSTOM(NAME, N, METH) \
+    NEXT_CODEC = (MultibyteCodec){NAME, (void *)N, NULL, _STATELESS_METHODS(METH)};
+
+BEGIN_CODECS_LIST(7)
   CODEC_STATELESS(shift_jis)
   CODEC_STATELESS(cp932)
   CODEC_STATELESS(euc_jp)
   CODEC_STATELESS(shift_jis_2004)
   CODEC_STATELESS(euc_jis_2004)
-  { "euc_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(euc_jis_2004) },
-  { "shift_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(shift_jis_2004) },
+  CODEC_CUSTOM("euc_jisx0213", 2000, euc_jis_2004)
+  CODEC_CUSTOM("shift_jisx0213", 2000, shift_jis_2004)
 END_CODECS_LIST
 
+#undef CODEC_CUSTOM
+
 I_AM_A_MODULE_FOR(jp)
index 72641e495af0b026db332003b045eb04dce5bdc2..fd9a9fd92db1fdd0859161c4465a31c16dfd0fd8 100644 (file)
@@ -453,13 +453,13 @@ DECODER(johab)
 #undef FILL
 
 
-BEGIN_MAPPINGS_LIST
+BEGIN_MAPPINGS_LIST(3)
   MAPPING_DECONLY(ksx1001)
   MAPPING_ENCONLY(cp949)
   MAPPING_DECONLY(cp949ext)
 END_MAPPINGS_LIST
 
-BEGIN_CODECS_LIST
+BEGIN_CODECS_LIST(3)
   CODEC_STATELESS(euc_kr)
   CODEC_STATELESS(cp949)
   CODEC_STATELESS(johab)
index 722b26b128a708c13d92ff1252d66588479c71d4..3e44099141443473318e28f18b67ff223d437314 100644 (file)
@@ -130,12 +130,12 @@ DECODER(cp950)
 
 
 
-BEGIN_MAPPINGS_LIST
+BEGIN_MAPPINGS_LIST(2)
   MAPPING_ENCDEC(big5)
   MAPPING_ENCDEC(cp950ext)
 END_MAPPINGS_LIST
 
-BEGIN_CODECS_LIST
+BEGIN_CODECS_LIST(2)
   CODEC_STATELESS(big5)
   CODEC_STATELESS(cp950)
 END_CODECS_LIST
index d9aeec2ff40b08b5e1b1373e944af8ec9d9a1931..646a9fd255ce201f1dea67950598139a61efeaf2 100644 (file)
@@ -60,8 +60,20 @@ struct pair_encodemap {
     DBCHAR code;
 };
 
-static const MultibyteCodec *codec_list;
-static const struct dbcs_map *mapping_list;
+typedef struct {
+    int num_mappings;
+    int num_codecs;
+    struct dbcs_map *mapping_list;
+    MultibyteCodec *codec_list;
+} cjkcodecs_module_state;
+
+static inline cjkcodecs_module_state *
+get_module_state(PyObject *mod)
+{
+    void *state = PyModule_GetState(mod);
+    assert(state != NULL);
+    return (cjkcodecs_module_state *)state;
+}
 
 #define CODEC_INIT(encoding)                                            \
     static int encoding##_codec_init(const void *config)
@@ -202,16 +214,42 @@ static const struct dbcs_map *mapping_list;
 #define TRYMAP_DEC(charset, assi, c1, c2)                     \
     _TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
 
-#define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = {
-#define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},
-#define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap},
-#define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap},
-#define END_MAPPINGS_LIST                               \
-    {"", NULL, NULL} };                                 \
-    static const struct dbcs_map *mapping_list =        \
-        (const struct dbcs_map *)_mapping_list;
+#define BEGIN_MAPPINGS_LIST(NUM)                                    \
+static int                                                          \
+add_mappings(cjkcodecs_module_state *st)                            \
+{                                                                   \
+    int idx = 0;                                                    \
+    (void)idx;                                                      \
+    st->num_mappings = NUM;                                         \
+    st->mapping_list = PyMem_Calloc(NUM, sizeof(struct dbcs_map));  \
+    if (st->mapping_list == NULL) {                                 \
+        return -1;                                                  \
+    }
+
+#define MAPPING_ENCONLY(enc) \
+    st->mapping_list[idx++] = (struct dbcs_map){#enc, (void*)enc##_encmap, NULL};
+#define MAPPING_DECONLY(enc) \
+    st->mapping_list[idx++] = (struct dbcs_map){#enc, NULL, (void*)enc##_decmap};
+#define MAPPING_ENCDEC(enc) \
+    st->mapping_list[idx++] = (struct dbcs_map){#enc, (void*)enc##_encmap, (void*)enc##_decmap};
+
+#define END_MAPPINGS_LIST               \
+    assert(st->num_mappings == idx);    \
+    return 0;                           \
+}
+
+#define BEGIN_CODECS_LIST(NUM)                                  \
+static int                                                      \
+add_codecs(cjkcodecs_module_state *st)                          \
+{                                                               \
+    int idx = 0;                                                \
+    (void)idx;                                                  \
+    st->num_codecs = NUM;                                       \
+    st->codec_list = PyMem_Calloc(NUM, sizeof(MultibyteCodec)); \
+    if (st->codec_list == NULL) {                               \
+        return -1;                                              \
+    }
 
-#define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = {
 #define _STATEFUL_METHODS(enc)          \
     enc##_encode,                       \
     enc##_encode_init,                  \
@@ -222,23 +260,21 @@ static const struct dbcs_map *mapping_list;
 #define _STATELESS_METHODS(enc)         \
     enc##_encode, NULL, NULL,           \
     enc##_decode, NULL, NULL,
-#define CODEC_STATEFUL(enc) {           \
-    #enc, NULL, NULL,                   \
-    _STATEFUL_METHODS(enc)              \
-},
-#define CODEC_STATELESS(enc) {          \
-    #enc, NULL, NULL,                   \
-    _STATELESS_METHODS(enc)             \
-},
-#define CODEC_STATELESS_WINIT(enc) {    \
-    #enc, NULL,                         \
-    enc##_codec_init,                   \
-    _STATELESS_METHODS(enc)             \
-},
-#define END_CODECS_LIST                                 \
-    {"", NULL,} };                                      \
-    static const MultibyteCodec *codec_list =           \
-        (const MultibyteCodec *)_codec_list;
+
+#define NEXT_CODEC \
+    st->codec_list[idx++]
+
+#define CODEC_STATEFUL(enc) \
+    NEXT_CODEC = (MultibyteCodec){#enc, NULL, NULL, _STATEFUL_METHODS(enc)};
+#define CODEC_STATELESS(enc) \
+    NEXT_CODEC = (MultibyteCodec){#enc, NULL, NULL, _STATELESS_METHODS(enc)};
+#define CODEC_STATELESS_WINIT(enc) \
+    NEXT_CODEC = (MultibyteCodec){#enc, NULL, enc##_codec_init, _STATELESS_METHODS(enc)};
+
+#define END_CODECS_LIST             \
+    assert(st->num_codecs == idx);  \
+    return 0;                       \
+}
 
 
 
@@ -249,53 +285,70 @@ getmultibytecodec(void)
 }
 
 static PyObject *
-getcodec(PyObject *self, PyObject *encoding)
+_getcodec(const MultibyteCodec *codec)
 {
-    PyObject *codecobj, *r, *cofunc;
-    const MultibyteCodec *codec;
-    const char *enc;
-
-    if (!PyUnicode_Check(encoding)) {
-        PyErr_SetString(PyExc_TypeError,
-                        "encoding name must be a string.");
+    PyObject *cofunc = getmultibytecodec();
+    if (cofunc == NULL) {
         return NULL;
     }
-    enc = PyUnicode_AsUTF8(encoding);
-    if (enc == NULL)
-        return NULL;
 
-    cofunc = getmultibytecodec();
-    if (cofunc == NULL)
+    PyObject *codecobj = PyCapsule_New((void *)codec,
+                                       PyMultibyteCodec_CAPSULE_NAME,
+                                       NULL);
+    if (codecobj == NULL) {
+        Py_DECREF(cofunc);
         return NULL;
+    }
 
-    for (codec = codec_list; codec->encoding[0]; codec++)
-        if (strcmp(codec->encoding, enc) == 0)
-            break;
+    PyObject *res = PyObject_CallOneArg(cofunc, codecobj);
+    Py_DECREF(codecobj);
+    Py_DECREF(cofunc);
+    return res;
+}
 
-    if (codec->encoding[0] == '\0') {
-        PyErr_SetString(PyExc_LookupError,
-                        "no such codec is supported.");
+static PyObject *
+getcodec(PyObject *self, PyObject *encoding)
+{
+    if (!PyUnicode_Check(encoding)) {
+        PyErr_SetString(PyExc_TypeError,
+                        "encoding name must be a string.");
         return NULL;
     }
-
-    codecobj = PyCapsule_New((void *)codec, PyMultibyteCodec_CAPSULE_NAME, NULL);
-    if (codecobj == NULL)
+    const char *enc = PyUnicode_AsUTF8(encoding);
+    if (enc == NULL) {
         return NULL;
+    }
 
-    r = PyObject_CallOneArg(cofunc, codecobj);
-    Py_DECREF(codecobj);
-    Py_DECREF(cofunc);
+    cjkcodecs_module_state *st = get_module_state(self);
+    for (int i = 0; i < st->num_codecs; i++) {
+        const MultibyteCodec *codec = &st->codec_list[i];
+        if (strcmp(codec->encoding, enc) == 0) {
+            return _getcodec(codec);
+        }
+    }
 
-    return r;
+    PyErr_SetString(PyExc_LookupError,
+                    "no such codec is supported.");
+    return NULL;
 }
 
+static int add_mappings(cjkcodecs_module_state *);
+static int add_codecs(cjkcodecs_module_state *);
 
 static int
 register_maps(PyObject *module)
 {
-    const struct dbcs_map *h;
+    // Init module state.
+    cjkcodecs_module_state *st = get_module_state(module);
+    if (add_mappings(st) < 0) {
+        return -1;
+    }
+    if (add_codecs(st) < 0) {
+        return -1;
+    }
 
-    for (h = mapping_list; h->charset[0] != '\0'; h++) {
+    for (int i = 0; i < st->num_mappings; i++) {
+        const struct dbcs_map *h = &st->mapping_list[i];
         char mhname[256] = "__map_";
         strcpy(mhname + sizeof("__map_") - 1, h->charset);
 
@@ -394,6 +447,13 @@ _cjk_exec(PyObject *module)
     return register_maps(module);
 }
 
+static void
+_cjk_free(void *mod)
+{
+    cjkcodecs_module_state *st = get_module_state((PyObject *)mod);
+    PyMem_Free(st->mapping_list);
+    PyMem_Free(st->codec_list);
+}
 
 static struct PyMethodDef _cjk_methods[] = {
     {"getcodec", (PyCFunction)getcodec, METH_O, ""},
@@ -409,9 +469,10 @@ static PyModuleDef_Slot _cjk_slots[] = {
     static struct PyModuleDef _cjk_module = {                           \
         PyModuleDef_HEAD_INIT,                                          \
         .m_name = "_codecs_"#loc,                                       \
-        .m_size = 0,                                                    \
+        .m_size = sizeof(cjkcodecs_module_state),                       \
         .m_methods = _cjk_methods,                                      \
         .m_slots = _cjk_slots,                                          \
+        .m_free = _cjk_free,                                            \
     };                                                                  \
                                                                         \
     PyMODINIT_FUNC                                                      \
index 8564494f6262fb3d1e75f5f6d20b0b50def8f4b5..55778cdb59e4dc6b173e3efebcb1d457ee33215c 100644 (file)
@@ -19,26 +19,27 @@ typedef struct {
     PyTypeObject *writer_type;
     PyTypeObject *multibytecodec_type;
     PyObject *str_write;
-} _multibytecodec_state;
+} module_state;
 
-static _multibytecodec_state *
-_multibytecodec_get_state(PyObject *module)
+static module_state *
+get_module_state(PyObject *module)
 {
-    _multibytecodec_state *state = PyModule_GetState(module);
+    module_state *state = PyModule_GetState(module);
     assert(state != NULL);
     return state;
 }
 
 static struct PyModuleDef _multibytecodecmodule;
-static _multibytecodec_state *
-_multibyte_codec_find_state_by_type(PyTypeObject *type)
+
+static module_state *
+find_state_by_def(PyTypeObject *type)
 {
     PyObject *module = PyType_GetModuleByDef(type, &_multibytecodecmodule);
     assert(module != NULL);
-    return _multibytecodec_get_state(module);
+    return get_module_state(module);
 }
 
-#define clinic_get_state() _multibyte_codec_find_state_by_type(type)
+#define clinic_get_state() find_state_by_def(type)
 /*[clinic input]
 module _multibytecodec
 class _multibytecodec.MultibyteCodec "MultibyteCodecObject *" "clinic_get_state()->multibytecodec_type"
@@ -1040,7 +1041,7 @@ mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
     if (codec == NULL)
         goto errorexit;
 
-    _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
+    module_state *state = find_state_by_def(type);
     if (!MultibyteCodec_Check(state, codec)) {
         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
         goto errorexit;
@@ -1315,7 +1316,7 @@ mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
     if (codec == NULL)
         goto errorexit;
 
-    _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
+    module_state *state = find_state_by_def(type);
     if (!MultibyteCodec_Check(state, codec)) {
         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
         goto errorexit;
@@ -1630,7 +1631,7 @@ mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
     if (codec == NULL)
         goto errorexit;
 
-    _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
+    module_state *state = find_state_by_def(type);
     if (!MultibyteCodec_Check(state, codec)) {
         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
         goto errorexit;
@@ -1735,7 +1736,7 @@ _multibytecodec_MultibyteStreamWriter_write_impl(MultibyteStreamWriterObject *se
                                                  PyObject *strobj)
 /*[clinic end generated code: output=68ade3aea26410ac input=199f26f68bd8425a]*/
 {
-    _multibytecodec_state *state = PyType_GetModuleState(cls);
+    module_state *state = PyType_GetModuleState(cls);
     assert(state != NULL);
     if (mbstreamwriter_iwrite(self, strobj, state->str_write)) {
         return NULL;
@@ -1766,7 +1767,7 @@ _multibytecodec_MultibyteStreamWriter_writelines_impl(MultibyteStreamWriterObjec
         return NULL;
     }
 
-    _multibytecodec_state *state = PyType_GetModuleState(cls);
+    module_state *state = PyType_GetModuleState(cls);
     assert(state != NULL);
     for (i = 0; i < PySequence_Length(lines); i++) {
         /* length can be changed even within this loop */
@@ -1817,7 +1818,7 @@ _multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *se
 
     assert(PyBytes_Check(pwrt));
 
-    _multibytecodec_state *state = PyType_GetModuleState(cls);
+    module_state *state = PyType_GetModuleState(cls);
     assert(state != NULL);
 
     if (PyBytes_Size(pwrt) > 0) {
@@ -1853,7 +1854,7 @@ mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
     if (codec == NULL)
         goto errorexit;
 
-    _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
+    module_state *state = find_state_by_def(type);
     if (!MultibyteCodec_Check(state, codec)) {
         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
         goto errorexit;
@@ -1963,7 +1964,7 @@ _multibytecodec___create_codec(PyObject *module, PyObject *arg)
     if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
         return NULL;
 
-    _multibytecodec_state *state = _multibytecodec_get_state(module);
+    module_state *state = get_module_state(module);
     self = PyObject_GC_New(MultibyteCodecObject, state->multibytecodec_type);
     if (self == NULL)
         return NULL;
@@ -1976,7 +1977,7 @@ _multibytecodec___create_codec(PyObject *module, PyObject *arg)
 static int
 _multibytecodec_traverse(PyObject *mod, visitproc visit, void *arg)
 {
-    _multibytecodec_state *state = _multibytecodec_get_state(mod);
+    module_state *state = get_module_state(mod);
     Py_VISIT(state->multibytecodec_type);
     Py_VISIT(state->encoder_type);
     Py_VISIT(state->decoder_type);
@@ -1988,7 +1989,7 @@ _multibytecodec_traverse(PyObject *mod, visitproc visit, void *arg)
 static int
 _multibytecodec_clear(PyObject *mod)
 {
-    _multibytecodec_state *state = _multibytecodec_get_state(mod);
+    module_state *state = get_module_state(mod);
     Py_CLEAR(state->multibytecodec_type);
     Py_CLEAR(state->encoder_type);
     Py_CLEAR(state->decoder_type);
@@ -2022,7 +2023,7 @@ _multibytecodec_free(void *mod)
 static int
 _multibytecodec_exec(PyObject *mod)
 {
-    _multibytecodec_state *state = _multibytecodec_get_state(mod);
+    module_state *state = get_module_state(mod);
     state->str_write = PyUnicode_InternFromString("write");
     if (state->str_write == NULL) {
         return -1;
@@ -2056,7 +2057,7 @@ static PyModuleDef_Slot _multibytecodec_slots[] = {
 static struct PyModuleDef _multibytecodecmodule = {
     .m_base = PyModuleDef_HEAD_INIT,
     .m_name = "_multibytecodec",
-    .m_size = sizeof(_multibytecodec_state),
+    .m_size = sizeof(module_state),
     .m_methods = _multibytecodec_methods,
     .m_slots = _multibytecodec_slots,
     .m_traverse = _multibytecodec_traverse,
index 5c173b1041e3e452629f00ebac637bcb489de866..849fd5d9a1e8d529c8a48e96ed119297243de75a 100644 (file)
@@ -506,8 +506,6 @@ Modules/cjkcodecs/_codecs_iso2022.c jisx0208_init   initialized     -
 Modules/cjkcodecs/_codecs_iso2022.c    jisx0212_init   initialized     -
 Modules/cjkcodecs/_codecs_iso2022.c    jisx0213_init   initialized     -
 Modules/cjkcodecs/_codecs_iso2022.c    gb2312_init     initialized     -
-Modules/cjkcodecs/cjkcodecs.h  -       codec_list      -
-Modules/cjkcodecs/cjkcodecs.h  -       mapping_list    -
 Modules/readline.c     -       libedit_append_replace_history_offset   -
 Modules/readline.c     -       using_libedit_emulation -
 Modules/readline.c     -       libedit_history_start   -