]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-141004: Improve `make check-c-api-docs` (GH-143564)
authorPetr Viktorin <encukou@gmail.com>
Fri, 9 Jan 2026 10:16:50 +0000 (11:16 +0100)
committerGitHub <noreply@github.com>
Fri, 9 Jan 2026 10:16:50 +0000 (11:16 +0100)
- Gather all documented names into a set in a single pass.
  This makes the check much faster.

- Do not match substrings (e.g. documenting `PyErr_WarnEx`
  doesn't mean that `PyErr_Warn` is documented)

- Consider `PY`-prefixed names (a lot of old macros use this)

Tools/check-c-api-docs/ignored_c_api.txt
Tools/check-c-api-docs/main.py

index e81ffd51e193b2a9f5b623d3aba8ceb93e1d4788..e0b94edf74885383cd9989633b949fbf77b89720 100644 (file)
@@ -45,6 +45,27 @@ Py_LL
 Py_SAFE_DOWNCAST
 Py_ULL
 Py_VA_COPY
+PYLONG_BITS_IN_DIGIT
+PY_DWORD_MAX
+PY_FORMAT_SIZE_T
+PY_INT32_T
+PY_INT64_T
+PY_LITTLE_ENDIAN
+PY_LLONG_MAX
+PY_LLONG_MIN
+PY_LONG_LONG
+PY_SIZE_MAX
+PY_UINT32_T
+PY_UINT64_T
+PY_ULLONG_MAX
+# patchlevel.h
+PYTHON_ABI_STRING
+PYTHON_API_STRING
+PY_RELEASE_LEVEL_ALPHA
+PY_RELEASE_LEVEL_BETA
+PY_RELEASE_LEVEL_FINAL
+PY_RELEASE_LEVEL_GAMMA
+PY_VERSION
 # unicodeobject.h
 Py_UNICODE_SIZE
 # cpython/methodobject.h
@@ -91,3 +112,39 @@ Py_FrozenMain
 # cpython/unicodeobject.h
 PyUnicode_IS_COMPACT
 PyUnicode_IS_COMPACT_ASCII
+# pythonrun.h
+PyErr_Display
+# cpython/objimpl.h
+PyObject_GET_WEAKREFS_LISTPTR
+# cpython/pythonrun.h
+PyOS_Readline
+# cpython/warnings.h
+PyErr_Warn
+# fileobject.h
+PY_STDIOTEXTMODE
+# structmember.h
+PY_WRITE_RESTRICTED
+# pythread.h
+PY_TIMEOUT_T
+PY_TIMEOUT_MAX
+# cpython/pyctype.h
+PY_CTF_ALNUM
+PY_CTF_ALPHA
+PY_CTF_DIGIT
+PY_CTF_LOWER
+PY_CTF_SPACE
+PY_CTF_UPPER
+PY_CTF_XDIGIT
+# cpython/code.h
+PY_DEF_EVENT
+PY_FOREACH_CODE_EVENT
+# cpython/funcobject.h
+PY_DEF_EVENT
+PY_FOREACH_FUNC_EVENT
+# cpython/monitoring.h
+PY_MONITORING_EVENT_BRANCH
+# cpython/dictobject.h
+PY_DEF_EVENT
+PY_FOREACH_DICT_EVENT
+# cpython/pystats.h
+PYSTATS_MAX_UOP_ID
index 6bdf80a9ae8985d8c3cc29f84f0c70d6ebc09685..3debb9ed09da78e6c5d5ace593b73cdc77c1e4f2 100644 (file)
@@ -8,6 +8,7 @@ SIMPLE_FUNCTION_REGEX = re.compile(r"PyAPI_FUNC(.+) (\w+)\(")
 SIMPLE_MACRO_REGEX = re.compile(r"# *define *(\w+)(\(.+\))? ")
 SIMPLE_INLINE_REGEX = re.compile(r"static inline .+( |\n)(\w+)")
 SIMPLE_DATA_REGEX = re.compile(r"PyAPI_DATA\(.+\) (\w+)")
+API_NAME_REGEX = re.compile(r'\bP[yY][a-zA-Z0-9_]+')
 
 CPYTHON = Path(__file__).parent.parent.parent
 INCLUDE = CPYTHON / "Include"
@@ -72,24 +73,10 @@ def found_ignored_documented(singular: bool) -> str:
     )
 
 
-def is_documented(name: str) -> bool:
-    """
-    Is a name present in the C API documentation?
-    """
-    for path in C_API_DOCS.iterdir():
-        if path.is_dir():
-            continue
-        if path.suffix != ".rst":
-            continue
-
-        text = path.read_text(encoding="utf-8")
-        if name in text:
-            return True
-
-    return False
-
-
-def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]:
+def scan_file_for_docs(
+    filename: str,
+    text: str,
+    names: set[str]) -> tuple[list[str], list[str]]:
     """
     Scan a header file for  C API functions.
     """
@@ -98,7 +85,7 @@ def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]:
     colors = _colorize.get_colors()
 
     def check_for_name(name: str) -> None:
-        documented = is_documented(name)
+        documented = name in names
         if documented and (name in IGNORED):
             documented_ignored.append(name)
         elif not documented and (name not in IGNORED):
@@ -106,14 +93,14 @@ def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]:
 
     for function in SIMPLE_FUNCTION_REGEX.finditer(text):
         name = function.group(2)
-        if not name.startswith("Py"):
+        if not API_NAME_REGEX.fullmatch(name):
             continue
 
         check_for_name(name)
 
     for macro in SIMPLE_MACRO_REGEX.finditer(text):
         name = macro.group(1)
-        if not name.startswith("Py"):
+        if not API_NAME_REGEX.fullmatch(name):
             continue
 
         if "(" in name:
@@ -123,14 +110,14 @@ def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]:
 
     for inline in SIMPLE_INLINE_REGEX.finditer(text):
         name = inline.group(2)
-        if not name.startswith("Py"):
+        if not API_NAME_REGEX.fullmatch(name):
             continue
 
         check_for_name(name)
 
     for data in SIMPLE_DATA_REGEX.finditer(text):
         name = data.group(1)
-        if not name.startswith("Py"):
+        if not API_NAME_REGEX.fullmatch(name):
             continue
 
         check_for_name(name)
@@ -152,6 +139,14 @@ def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]:
 
 
 def main() -> None:
+    print("Gathering C API names from docs...")
+    names = set()
+    for path in C_API_DOCS.glob('**/*.rst'):
+        text = path.read_text(encoding="utf-8")
+        for name in API_NAME_REGEX.findall(text):
+            names.add(name)
+    print(f"Got {len(names)} names!")
+
     print("Scanning for undocumented C API functions...")
     files = [*INCLUDE.iterdir(), *(INCLUDE / "cpython").iterdir()]
     all_missing: list[str] = []
@@ -162,7 +157,7 @@ def main() -> None:
             continue
         assert file.exists()
         text = file.read_text(encoding="utf-8")
-        missing, ignored = scan_file_for_docs(str(file.relative_to(INCLUDE)), text)
+        missing, ignored = scan_file_for_docs(str(file.relative_to(INCLUDE)), text, names)
         all_found_ignored += ignored
         all_missing += missing