From: Petr Viktorin Date: Fri, 9 Jan 2026 10:16:50 +0000 (+0100) Subject: gh-141004: Improve `make check-c-api-docs` (GH-143564) X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=234a15dc4ec2d8f8ababea91532ebe896a96387a;p=thirdparty%2FPython%2Fcpython.git gh-141004: Improve `make check-c-api-docs` (GH-143564) - Gather all documented names into a set in a single pass. This makes the check much faster. - Do not match substrings (e.g. documenting `PyErr_WarnEx` doesn't mean that `PyErr_Warn` is documented) - Consider `PY`-prefixed names (a lot of old macros use this) --- diff --git a/Tools/check-c-api-docs/ignored_c_api.txt b/Tools/check-c-api-docs/ignored_c_api.txt index e81ffd51e193..e0b94edf7488 100644 --- a/Tools/check-c-api-docs/ignored_c_api.txt +++ b/Tools/check-c-api-docs/ignored_c_api.txt @@ -45,6 +45,27 @@ Py_LL Py_SAFE_DOWNCAST Py_ULL Py_VA_COPY +PYLONG_BITS_IN_DIGIT +PY_DWORD_MAX +PY_FORMAT_SIZE_T +PY_INT32_T +PY_INT64_T +PY_LITTLE_ENDIAN +PY_LLONG_MAX +PY_LLONG_MIN +PY_LONG_LONG +PY_SIZE_MAX +PY_UINT32_T +PY_UINT64_T +PY_ULLONG_MAX +# patchlevel.h +PYTHON_ABI_STRING +PYTHON_API_STRING +PY_RELEASE_LEVEL_ALPHA +PY_RELEASE_LEVEL_BETA +PY_RELEASE_LEVEL_FINAL +PY_RELEASE_LEVEL_GAMMA +PY_VERSION # unicodeobject.h Py_UNICODE_SIZE # cpython/methodobject.h @@ -91,3 +112,39 @@ Py_FrozenMain # cpython/unicodeobject.h PyUnicode_IS_COMPACT PyUnicode_IS_COMPACT_ASCII +# pythonrun.h +PyErr_Display +# cpython/objimpl.h +PyObject_GET_WEAKREFS_LISTPTR +# cpython/pythonrun.h +PyOS_Readline +# cpython/warnings.h +PyErr_Warn +# fileobject.h +PY_STDIOTEXTMODE +# structmember.h +PY_WRITE_RESTRICTED +# pythread.h +PY_TIMEOUT_T +PY_TIMEOUT_MAX +# cpython/pyctype.h +PY_CTF_ALNUM +PY_CTF_ALPHA +PY_CTF_DIGIT +PY_CTF_LOWER +PY_CTF_SPACE +PY_CTF_UPPER +PY_CTF_XDIGIT +# cpython/code.h +PY_DEF_EVENT +PY_FOREACH_CODE_EVENT +# cpython/funcobject.h +PY_DEF_EVENT +PY_FOREACH_FUNC_EVENT +# cpython/monitoring.h +PY_MONITORING_EVENT_BRANCH +# cpython/dictobject.h +PY_DEF_EVENT +PY_FOREACH_DICT_EVENT +# cpython/pystats.h +PYSTATS_MAX_UOP_ID diff --git a/Tools/check-c-api-docs/main.py b/Tools/check-c-api-docs/main.py index 6bdf80a9ae89..3debb9ed09da 100644 --- a/Tools/check-c-api-docs/main.py +++ b/Tools/check-c-api-docs/main.py @@ -8,6 +8,7 @@ SIMPLE_FUNCTION_REGEX = re.compile(r"PyAPI_FUNC(.+) (\w+)\(") SIMPLE_MACRO_REGEX = re.compile(r"# *define *(\w+)(\(.+\))? ") SIMPLE_INLINE_REGEX = re.compile(r"static inline .+( |\n)(\w+)") SIMPLE_DATA_REGEX = re.compile(r"PyAPI_DATA\(.+\) (\w+)") +API_NAME_REGEX = re.compile(r'\bP[yY][a-zA-Z0-9_]+') CPYTHON = Path(__file__).parent.parent.parent INCLUDE = CPYTHON / "Include" @@ -72,24 +73,10 @@ def found_ignored_documented(singular: bool) -> str: ) -def is_documented(name: str) -> bool: - """ - Is a name present in the C API documentation? - """ - for path in C_API_DOCS.iterdir(): - if path.is_dir(): - continue - if path.suffix != ".rst": - continue - - text = path.read_text(encoding="utf-8") - if name in text: - return True - - return False - - -def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]: +def scan_file_for_docs( + filename: str, + text: str, + names: set[str]) -> tuple[list[str], list[str]]: """ Scan a header file for C API functions. """ @@ -98,7 +85,7 @@ def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]: colors = _colorize.get_colors() def check_for_name(name: str) -> None: - documented = is_documented(name) + documented = name in names if documented and (name in IGNORED): documented_ignored.append(name) elif not documented and (name not in IGNORED): @@ -106,14 +93,14 @@ def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]: for function in SIMPLE_FUNCTION_REGEX.finditer(text): name = function.group(2) - if not name.startswith("Py"): + if not API_NAME_REGEX.fullmatch(name): continue check_for_name(name) for macro in SIMPLE_MACRO_REGEX.finditer(text): name = macro.group(1) - if not name.startswith("Py"): + if not API_NAME_REGEX.fullmatch(name): continue if "(" in name: @@ -123,14 +110,14 @@ def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]: for inline in SIMPLE_INLINE_REGEX.finditer(text): name = inline.group(2) - if not name.startswith("Py"): + if not API_NAME_REGEX.fullmatch(name): continue check_for_name(name) for data in SIMPLE_DATA_REGEX.finditer(text): name = data.group(1) - if not name.startswith("Py"): + if not API_NAME_REGEX.fullmatch(name): continue check_for_name(name) @@ -152,6 +139,14 @@ def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]: def main() -> None: + print("Gathering C API names from docs...") + names = set() + for path in C_API_DOCS.glob('**/*.rst'): + text = path.read_text(encoding="utf-8") + for name in API_NAME_REGEX.findall(text): + names.add(name) + print(f"Got {len(names)} names!") + print("Scanning for undocumented C API functions...") files = [*INCLUDE.iterdir(), *(INCLUDE / "cpython").iterdir()] all_missing: list[str] = [] @@ -162,7 +157,7 @@ def main() -> None: continue assert file.exists() text = file.read_text(encoding="utf-8") - missing, ignored = scan_file_for_docs(str(file.relative_to(INCLUDE)), text) + missing, ignored = scan_file_for_docs(str(file.relative_to(INCLUDE)), text, names) all_found_ignored += ignored all_missing += missing