]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
bpo-46541: Discover the global strings. (gh-31346)
authorEric Snow <ericsnowcurrently@gmail.com>
Tue, 15 Feb 2022 00:36:51 +0000 (17:36 -0700)
committerGitHub <noreply@github.com>
Tue, 15 Feb 2022 00:36:51 +0000 (17:36 -0700)
Instead of manually enumerating the global strings in generate_global_objects.py, we extrapolate the list from usage of _Py_ID() and _Py_STR() in the source files.

This is partly inspired by gh-31261.

https://bugs.python.org/issue46541

Include/internal/pycore_global_strings.h
Include/internal/pycore_runtime_init.h
Objects/typeobject.c
Objects/weakrefobject.c
Python/_warnings.c
Python/ast_opt.c
Python/compile.c
Python/pythonrun.c
Tools/scripts/generate_global_objects.py

index 17241b3a3dd166667b8cf3cb94008ef8eda80f85..aa597bc8281a5aa6fda60852fe4d0c8fdede13d2 100644 (file)
@@ -28,13 +28,6 @@ extern "C" {
 /* The following is auto-generated by Tools/scripts/generate_global_objects.py. */
 struct _Py_global_strings {
     struct {
-        STRUCT_FOR_STR(empty, "")
-        STRUCT_FOR_STR(dot, ".")
-        STRUCT_FOR_STR(comma_sep, ", ")
-        STRUCT_FOR_STR(percent, "%")
-        STRUCT_FOR_STR(dbl_percent, "%%")
-
-        // "anonymous" labels
         STRUCT_FOR_STR(anon_dictcomp, "<dictcomp>")
         STRUCT_FOR_STR(anon_genexpr, "<genexpr>")
         STRUCT_FOR_STR(anon_lambda, "<lambda>")
@@ -42,7 +35,12 @@ struct _Py_global_strings {
         STRUCT_FOR_STR(anon_module, "<module>")
         STRUCT_FOR_STR(anon_setcomp, "<setcomp>")
         STRUCT_FOR_STR(anon_string, "<string>")
+        STRUCT_FOR_STR(comma_sep, ", ")
+        STRUCT_FOR_STR(dbl_percent, "%%")
+        STRUCT_FOR_STR(dot, ".")
         STRUCT_FOR_STR(dot_locals, ".<locals>")
+        STRUCT_FOR_STR(empty, "")
+        STRUCT_FOR_STR(percent, "%")
     } literals;
 
     struct {
@@ -330,6 +328,7 @@ struct _Py_global_strings {
 #define _Py_STR(NAME) \
      (_Py_SINGLETON(strings.literals._ ## NAME._ascii.ob_base))
 
+#define _Py_DECLARE_STR(name, str)
 
 #ifdef __cplusplus
 }
index 045ae5d2835b1fb1d9afd3b641eb5633cbafd395..04c1e671235eae60135153471da142c01369875b 100644 (file)
@@ -644,12 +644,6 @@ extern "C" {
         \
         .strings = { \
             .literals = { \
-                INIT_STR(empty, ""), \
-                INIT_STR(dot, "."), \
-                INIT_STR(comma_sep, ", "), \
-                INIT_STR(percent, "%"), \
-                INIT_STR(dbl_percent, "%%"), \
-                \
                 INIT_STR(anon_dictcomp, "<dictcomp>"), \
                 INIT_STR(anon_genexpr, "<genexpr>"), \
                 INIT_STR(anon_lambda, "<lambda>"), \
@@ -657,7 +651,12 @@ extern "C" {
                 INIT_STR(anon_module, "<module>"), \
                 INIT_STR(anon_setcomp, "<setcomp>"), \
                 INIT_STR(anon_string, "<string>"), \
+                INIT_STR(comma_sep, ", "), \
+                INIT_STR(dbl_percent, "%%"), \
+                INIT_STR(dot, "."), \
                 INIT_STR(dot_locals, ".<locals>"), \
+                INIT_STR(empty, ""), \
+                INIT_STR(percent, "%"), \
             }, \
             .identifiers = { \
                 INIT_ID(Py_Repr), \
index 3f8f36a9c4648ae29281309af26513c7c695390f..8c4901119de7df87512b6c45369fddba676abcc0 100644 (file)
@@ -4546,6 +4546,7 @@ object_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
             Py_DECREF(sorted_methods);
             return NULL;
         }
+        _Py_DECLARE_STR(comma_sep, ", ");
         joined = PyUnicode_Join(&_Py_STR(comma_sep), sorted_methods);
         method_count = PyObject_Length(sorted_methods);
         Py_DECREF(sorted_methods);
index 71dfa640ebf5784d28a15e658578fd451dfff61b..1712533a39d802c880e64f32641a0ef41a37633d 100644 (file)
@@ -458,12 +458,12 @@ proxy_checkref(PyWeakReference *proxy)
         return res; \
     }
 
-#define WRAP_METHOD(method, special) \
+#define WRAP_METHOD(method, SPECIAL) \
     static PyObject * \
     method(PyObject *proxy, PyObject *Py_UNUSED(ignored)) { \
             UNWRAP(proxy); \
             Py_INCREF(proxy); \
-            PyObject* res = PyObject_CallMethodNoArgs(proxy, &_Py_ID(special)); \
+            PyObject* res = PyObject_CallMethodNoArgs(proxy, &_Py_ID(SPECIAL)); \
             Py_DECREF(proxy); \
             return res; \
         }
index a47e5fef6865f1b9e1a393e430a1cbdb627669cb..03e6ffcee0ac24ab91ab73a5a3893c41c32bc1d6 100644 (file)
@@ -186,8 +186,8 @@ check_matched(PyInterpreterState *interp, PyObject *obj, PyObject *arg)
     return rc;
 }
 
-#define GET_WARNINGS_ATTR(interp, attr, try_import) \
-    get_warnings_attr(interp, &_Py_ID(attr), try_import)
+#define GET_WARNINGS_ATTR(interp, ATTR, try_import) \
+    get_warnings_attr(interp, &_Py_ID(ATTR), try_import)
 
 /*
    Returns a new reference.
index 2911370649790265b56adced95d0be6f3f962f1b..77ed29d0cdddd81f975cfc17f91beffb8448066d 100644 (file)
@@ -268,6 +268,8 @@ parse_literal(PyObject *fmt, Py_ssize_t *ppos, PyArena *arena)
     PyObject *str = PyUnicode_Substring(fmt, start, pos);
     /* str = str.replace('%%', '%') */
     if (str && has_percents) {
+        _Py_DECLARE_STR(percent, "%");
+        _Py_DECLARE_STR(dbl_percent, "%%");
         Py_SETREF(str, PyUnicode_Replace(str, &_Py_STR(dbl_percent),
                                          &_Py_STR(percent), -1));
     }
index ac4960b5df320ebf7872f16622417cd248cbec90..1cf20d3a36ac116b037ad1f0dffdd07b17b17a79 100644 (file)
@@ -667,6 +667,7 @@ compiler_set_qualname(struct compiler *c)
                 || parent->u_scope_type == COMPILER_SCOPE_ASYNC_FUNCTION
                 || parent->u_scope_type == COMPILER_SCOPE_LAMBDA)
             {
+                _Py_DECLARE_STR(dot_locals, ".<locals>");
                 base = PyUnicode_Concat(parent->u_qualname,
                                         &_Py_STR(dot_locals));
                 if (base == NULL)
@@ -2022,6 +2023,7 @@ compiler_mod(struct compiler *c, mod_ty mod)
 {
     PyCodeObject *co;
     int addNone = 1;
+    _Py_DECLARE_STR(anon_module, "<module>");
     if (!compiler_enter_scope(c, &_Py_STR(anon_module), COMPILER_SCOPE_MODULE,
                               mod, 1)) {
         return NULL;
@@ -2876,6 +2878,7 @@ compiler_lambda(struct compiler *c, expr_ty e)
         return 0;
     }
 
+    _Py_DECLARE_STR(anon_lambda, "<lambda>");
     if (!compiler_enter_scope(c, &_Py_STR(anon_lambda), COMPILER_SCOPE_LAMBDA,
                               (void *)e, e->lineno)) {
         return 0;
@@ -5347,6 +5350,7 @@ static int
 compiler_genexp(struct compiler *c, expr_ty e)
 {
     assert(e->kind == GeneratorExp_kind);
+    _Py_DECLARE_STR(anon_genexpr, "<genexpr>");
     return compiler_comprehension(c, e, COMP_GENEXP, &_Py_STR(anon_genexpr),
                                   e->v.GeneratorExp.generators,
                                   e->v.GeneratorExp.elt, NULL);
@@ -5356,6 +5360,7 @@ static int
 compiler_listcomp(struct compiler *c, expr_ty e)
 {
     assert(e->kind == ListComp_kind);
+    _Py_DECLARE_STR(anon_listcomp, "<listcomp>");
     return compiler_comprehension(c, e, COMP_LISTCOMP, &_Py_STR(anon_listcomp),
                                   e->v.ListComp.generators,
                                   e->v.ListComp.elt, NULL);
@@ -5365,6 +5370,7 @@ static int
 compiler_setcomp(struct compiler *c, expr_ty e)
 {
     assert(e->kind == SetComp_kind);
+    _Py_DECLARE_STR(anon_setcomp, "<setcomp>");
     return compiler_comprehension(c, e, COMP_SETCOMP, &_Py_STR(anon_setcomp),
                                   e->v.SetComp.generators,
                                   e->v.SetComp.elt, NULL);
@@ -5375,6 +5381,7 @@ static int
 compiler_dictcomp(struct compiler *c, expr_ty e)
 {
     assert(e->kind == DictComp_kind);
+    _Py_DECLARE_STR(anon_dictcomp, "<dictcomp>");
     return compiler_comprehension(c, e, COMP_DICTCOMP, &_Py_STR(anon_dictcomp),
                                   e->v.DictComp.generators,
                                   e->v.DictComp.key, e->v.DictComp.value);
index b34a22391822b7c23989889d7d16fb82f88c1eb2..38ca952838a1f4584100407ff22737912e3369fe 100644 (file)
@@ -515,6 +515,7 @@ parse_syntax_error(PyObject *err, PyObject **message, PyObject **filename,
         goto finally;
     if (v == Py_None) {
         Py_DECREF(v);
+        _Py_DECLARE_STR(anon_string, "<string>");
         *filename = &_Py_STR(anon_string);
         Py_INCREF(*filename);
     }
@@ -1562,6 +1563,7 @@ PyRun_StringFlags(const char *str, int start, PyObject *globals,
     if (arena == NULL)
         return NULL;
 
+    _Py_DECLARE_STR(anon_string, "<string>");
     mod = _PyParser_ASTFromString(
             str, &_Py_STR(anon_string), start, flags, arena);
 
index 73068894d974b069dcb0eee41899b1f3f0cbc785..e989f3c086f344f1a52f03ea7a64dce9de4bd743 100644 (file)
@@ -13,298 +13,112 @@ INTERNAL = os.path.join(ROOT, 'Include', 'internal')
 STRING_LITERALS = {
     'empty': '',
     'dot': '.',
-    'comma_sep': ', ',
-    'percent': '%',
-    'dbl_percent': '%%',
-
-    '"anonymous" labels': None,
-    'anon_dictcomp': '<dictcomp>',
-    'anon_genexpr': '<genexpr>',
-    'anon_lambda': '<lambda>',
-    'anon_listcomp': '<listcomp>',
-    'anon_module': '<module>',
-    'anon_setcomp': '<setcomp>',
-    'anon_string': '<string>',
-    'dot_locals': '.<locals>',
+}
+IGNORED = {
+    'ACTION',  # Python/_warnings.c
+    'ATTR',  # Python/_warnings.c and Objects/funcobject.c
+    'DUNDER',  # Objects/typeobject.c
+    'RDUNDER',  # Objects/typeobject.c
+    'SPECIAL',  # Objects/weakrefobject.c
 }
 IDENTIFIERS = [
-    'Py_Repr',
-    'TextIOWrapper',
+    # from ADD() Python/_warnings.c
+    'default',
+    'ignore',
+
+    # from GET_WARNINGS_ATTR() in Python/_warnings.c
     'WarningMessage',
-    '_',
-    '__IOBase_closed',
-    '__abc_tpflags__',
+    '_showwarnmsg',
+    '_warn_unawaited_coroutine',
+    'defaultaction',
+    'filters',
+    'onceregistry',
+
+    # from WRAP_METHOD() in Objects/weakrefobject.c
+    '__bytes__',
+    '__reversed__',
+
+    # from COPY_ATTR() in Objects/funcobject.c
+    '__module__',
+    '__name__',
+    '__qualname__',
+    '__doc__',
+    '__annotations__',
+
+    # from SLOT* in Objects/typeobject.c
     '__abs__',
-    '__abstractmethods__',
     '__add__',
-    '__aenter__',
-    '__aexit__',
-    '__aiter__',
-    '__all__',
     '__and__',
-    '__anext__',
-    '__annotations__',
-    '__args__',
-    '__await__',
-    '__bases__',
-    '__bool__',
-    '__build_class__',
-    '__builtins__',
-    '__bytes__',
-    '__call__',
-    '__cantrace__',
-    '__class__',
-    '__class_getitem__',
-    '__classcell__',
-    '__complex__',
-    '__contains__',
-    '__copy__',
-    '__del__',
-    '__delattr__',
-    '__delete__',
-    '__delitem__',
-    '__dict__',
-    '__dir__',
     '__divmod__',
-    '__doc__',
-    '__enter__',
-    '__eq__',
-    '__exit__',
-    '__file__',
     '__float__',
     '__floordiv__',
-    '__format__',
-    '__fspath__',
-    '__ge__',
-    '__get__',
-    '__getattr__',
-    '__getattribute__',
-    '__getinitargs__',
     '__getitem__',
-    '__getnewargs__',
-    '__getnewargs_ex__',
-    '__getstate__',
-    '__gt__',
-    '__hash__',
     '__iadd__',
     '__iand__',
     '__ifloordiv__',
     '__ilshift__',
     '__imatmul__',
     '__imod__',
-    '__import__',
     '__imul__',
-    '__index__',
-    '__init__',
-    '__init_subclass__',
-    '__instancecheck__',
     '__int__',
     '__invert__',
     '__ior__',
-    '__ipow__',
     '__irshift__',
-    '__isabstractmethod__',
     '__isub__',
-    '__iter__',
     '__itruediv__',
     '__ixor__',
-    '__le__',
-    '__len__',
-    '__length_hint__',
-    '__loader__',
     '__lshift__',
-    '__lt__',
-    '__ltrace__',
-    '__main__',
     '__matmul__',
-    '__missing__',
     '__mod__',
-    '__module__',
-    '__mro_entries__',
     '__mul__',
-    '__name__',
-    '__ne__',
     '__neg__',
-    '__new__',
-    '__newobj__',
-    '__newobj_ex__',
-    '__next__',
-    '__note__',
     '__or__',
-    '__origin__',
-    '__package__',
-    '__parameters__',
-    '__path__',
     '__pos__',
     '__pow__',
-    '__prepare__',
-    '__qualname__',
     '__radd__',
     '__rand__',
     '__rdivmod__',
-    '__reduce__',
-    '__reduce_ex__',
-    '__repr__',
-    '__reversed__',
     '__rfloordiv__',
     '__rlshift__',
     '__rmatmul__',
     '__rmod__',
     '__rmul__',
     '__ror__',
-    '__round__',
     '__rpow__',
     '__rrshift__',
     '__rshift__',
     '__rsub__',
     '__rtruediv__',
     '__rxor__',
-    '__set__',
-    '__set_name__',
-    '__setattr__',
-    '__setitem__',
-    '__setstate__',
-    '__sizeof__',
-    '__slotnames__',
-    '__slots__',
-    '__spec__',
     '__str__',
     '__sub__',
-    '__subclasscheck__',
-    '__subclasshook__',
     '__truediv__',
-    '__trunc__',
-    '__warningregistry__',
-    '__weakref__',
     '__xor__',
-    '_abc_impl',
-    '_blksize',
-    '_dealloc_warn',
-    '_finalizing',
-    '_find_and_load',
-    '_fix_up_module',
-    '_get_sourcefile',
-    '_handle_fromlist',
-    '_initializing',
-    '_is_text_encoding',
-    '_lock_unlock_module',
-    '_showwarnmsg',
-    '_shutdown',
-    '_slotnames',
-    '_strptime_time',
-    '_uninitialized_submodules',
-    '_warn_unawaited_coroutine',
-    '_xoptions',
-    'add',
-    'append',
-    'big',
-    'buffer',
-    'builtins',
-    'clear',
-    'close',
-    'code',
-    'copy',
-    'copyreg',
-    'decode',
-    'default',
-    'defaultaction',
-    'difference_update',
-    'dispatch_table',
-    'displayhook',
-    'enable',
-    'encoding',
-    'end_lineno',
-    'end_offset',
-    'errors',
-    'excepthook',
-    'extend',
-    'filename',
-    'fileno',
-    'fillvalue',
-    'filters',
-    'find_class',
-    'flush',
-    'get',
-    'get_source',
-    'getattr',
-    'ignore',
-    'importlib',
-    'intersection',
-    'isatty',
-    'items',
-    'iter',
-    'keys',
-    'last_traceback',
-    'last_type',
-    'last_value',
-    'latin1',
-    'lineno',
-    'little',
-    'match',
-    'metaclass',
-    'mode',
-    'modules',
-    'mro',
-    'msg',
-    'n_fields',
-    'n_sequence_fields',
-    'n_unnamed_fields',
-    'name',
-    'obj',
-    'offset',
-    'onceregistry',
-    'open',
-    'parent',
-    'partial',
-    'path',
-    'peek',
-    'persistent_id',
-    'persistent_load',
-    'print_file_and_line',
-    'ps1',
-    'ps2',
-    'raw',
-    'read',
-    'read1',
-    'readable',
-    'readall',
-    'readinto',
-    'readinto1',
-    'readline',
-    'reducer_override',
-    'reload',
-    'replace',
-    'reset',
-    'return',
-    'reversed',
-    'seek',
-    'seekable',
-    'send',
-    'setstate',
-    'sort',
-    'stderr',
-    'stdin',
-    'stdout',
-    'strict',
-    'symmetric_difference_update',
-    'tell',
-    'text',
-    'threading',
-    'throw',
-    'unraisablehook',
-    'values',
-    'version',
-    'warnings',
-    'warnoptions',
-    'writable',
-    'write',
-    'zipimporter',
 ]
 
 
 #######################################
 # helpers
 
+def iter_global_strings():
+    id_regex = re.compile(r'\b_Py_ID\((\w+)\)')
+    str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)')
+    for dirname, _, files in os.walk(ROOT):
+        if os.path.relpath(dirname, ROOT).startswith('Include'):
+            continue
+        for name in files:
+            if not name.endswith(('.c', '.h')):
+                continue
+            filename = os.path.join(dirname, name)
+            with open(os.path.join(filename), encoding='utf-8') as infile:
+                for lno, line in enumerate(infile, 1):
+                    for m in id_regex.finditer(line):
+                        identifier, = m.groups()
+                        yield identifier, None, filename, lno, line
+                    for m in str_regex.finditer(line):
+                        varname, string = m.groups()
+                        yield varname, string, filename, lno, line
+
 def iter_to_marker(lines, marker):
     for line in lines:
         if line.rstrip() == marker:
@@ -354,7 +168,7 @@ START = '/* The following is auto-generated by Tools/scripts/generate_global_obj
 END = '/* End auto-generated code */'
 
 
-def generate_global_strings():
+def generate_global_strings(identifiers, strings):
     filename = os.path.join(INTERNAL, 'pycore_global_strings.h')
 
     # Read the non-generated part of the file.
@@ -371,22 +185,18 @@ def generate_global_strings():
         printer.write(START)
         with printer.block('struct _Py_global_strings', ';'):
             with printer.block('struct', ' literals;'):
-                for name, literal in STRING_LITERALS.items():
-                    if literal is None:
-                        outfile.write('\n')
-                        printer.write(f'// {name}')
-                    else:
-                        printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
+                for name, literal in sorted(strings.items()):
+                    printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
             outfile.write('\n')
             with printer.block('struct', ' identifiers;'):
-                for name in sorted(IDENTIFIERS):
+                for name in sorted(identifiers):
                     assert name.isidentifier(), name
                     printer.write(f'STRUCT_FOR_ID({name})')
         printer.write(END)
         printer.write(after)
 
 
-def generate_runtime_init():
+def generate_runtime_init(identifiers, strings):
     # First get some info from the declarations.
     nsmallposints = None
     nsmallnegints = None
@@ -432,13 +242,10 @@ def generate_runtime_init():
                 # Global strings.
                 with printer.block('.strings =', ','):
                     with printer.block('.literals =', ','):
-                        for name, literal in STRING_LITERALS.items():
-                            if literal is None:
-                                printer.write('')
-                            else:
-                                printer.write(f'INIT_STR({name}, "{literal}"),')
+                        for name, literal in sorted(strings.items()):
+                            printer.write(f'INIT_STR({name}, "{literal}"),')
                     with printer.block('.identifiers =', ','):
-                        for name in sorted(IDENTIFIERS):
+                        for name in sorted(identifiers):
                             assert name.isidentifier(), name
                             printer.write(f'INIT_ID({name}),')
         printer.write(END)
@@ -507,9 +314,9 @@ TYPESLOTS_RE = re.compile(r'''
      )
 ''', re.VERBOSE)
 
-def check_orphan_strings():
+def check_orphan_strings(identifiers):
     literals = set(n for n, s in STRING_LITERALS.items() if s)
-    identifiers = set(IDENTIFIERS)
+    identifiers = set(identifiers)
     files = glob.iglob(os.path.join(ROOT, '**', '*.[ch]'), recursive=True)
     for i, filename in enumerate(files, start=1):
         print('.', end='')
@@ -586,11 +393,23 @@ def check_orphan_strings():
 # the script
 
 def main(*, check=False) -> None:
-    generate_global_strings()
-    generate_runtime_init()
+    identifiers = set(IDENTIFIERS)
+    strings = dict(STRING_LITERALS)
+    for name, string, filename, lno, _ in iter_global_strings():
+        if string is None:
+            if name not in IGNORED:
+                identifiers.add(name)
+        else:
+            if name not in strings:
+                strings[name] = string
+            elif string != strings[name]:
+                raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}')
+
+    generate_global_strings(identifiers, strings)
+    generate_runtime_init(identifiers, strings)
 
     if check:
-        check_orphan_strings()
+        check_orphan_strings(identifiers)
 
 
 if __name__ == '__main__':