From: Mauro Carvalho Chehab Date: Thu, 26 Mar 2026 19:09:43 +0000 (+0100) Subject: docs: kdoc_diff: add a helper tool to help checking kdoc regressions X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=210a923aab5b81b6930715c4e845d58d406b154d;p=thirdparty%2Fkernel%2Flinux.git docs: kdoc_diff: add a helper tool to help checking kdoc regressions Checking for regressions at kernel-doc can be hard. Add a helper tool to make such task easier. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <24b3116a78348b13a74d1ff5e141160ef9705dd3.1774551940.git.mchehab+huawei@kernel.org> --- diff --git a/tools/docs/kdoc_diff b/tools/docs/kdoc_diff new file mode 100755 index 0000000000000..1aa16bdccaa33 --- /dev/null +++ b/tools/docs/kdoc_diff @@ -0,0 +1,508 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2026: Mauro Carvalho Chehab . +# +# pylint: disable=R0903,R0912,R0913,R0914,R0915,R0917 + +""" +docdiff - Check differences between kernel‑doc output between two different +commits. + +Examples +-------- + +Compare the kernel‑doc output between the last two 5.15 releases:: + + $ kdoc_diff v6.18..v6.19 + +Both outputs are cached + +Force a complete documentation scan and clean any previous cache from +6.19 to the current HEAD:: + + $ kdoc_diff 6.19.. --full --clean + +Check differences only on a single driver since origin/main:: + + $ kdoc_diff origin/main drivers/media + +Generate an YAML file and use it to check for regressions:: + + $ kdoc_diff HEAD~ drivers/media --regression + + +""" + +import os +import sys +import argparse +import subprocess +import shutil +import re +import signal + +from glob import iglob + + +SRC_DIR = os.path.dirname(os.path.realpath(__file__)) +WORK_DIR = os.path.abspath(os.path.join(SRC_DIR, "../..")) + +KDOC_BINARY = os.path.join(SRC_DIR, "kernel-doc") +KDOC_PARSER_TEST = os.path.join(WORK_DIR, "tools/unittests/test_kdoc_parser.py") + +CACHE_DIR = ".doc_diff_cache" +YAML_NAME = "out.yaml" + +DIR_NAME = { + "full": os.path.join(CACHE_DIR, "full"), + "partial": os.path.join(CACHE_DIR, "partial"), + "no-cache": os.path.join(CACHE_DIR, "no_cache"), + "tmp": os.path.join(CACHE_DIR, "__tmp__"), +} + +class GitHelper: + """Handles all Git operations""" + + def __init__(self, work_dir=None): + self.work_dir = work_dir + + def is_inside_repository(self): + """Check if we're inside a Git repository""" + try: + output = subprocess.check_output(["git", "rev-parse", + "--is-inside-work-tree"], + cwd=self.work_dir, + stderr=subprocess.STDOUT, + universal_newlines=True) + + return output.strip() == "true" + except subprocess.CalledProcessError: + return False + + def is_valid_commit(self, commit_hash): + """ + Validate that a ref (branch, tag, commit hash, etc.) can be + resolved to a commit. + """ + try: + subprocess.check_output(["git", "rev-parse", commit_hash], + cwd=self.work_dir, + stderr=subprocess.STDOUT) + return True + except subprocess.CalledProcessError: + return False + + def get_short_hash(self, commit_hash): + """Get short commit hash""" + try: + return subprocess.check_output(["git", "rev-parse", "--short", + commit_hash], + cwd=self.work_dir, + stderr=subprocess.STDOUT, + universal_newlines=True).strip() + except subprocess.CalledProcessError: + return "" + + def has_uncommitted_changes(self): + """Check for uncommitted changes""" + try: + subprocess.check_output(["git", "diff-index", + "--quiet", "HEAD", "--"], + cwd=self.work_dir, + stderr=subprocess.STDOUT) + return False + except subprocess.CalledProcessError: + return True + + def get_current_branch(self): + """Get current branch name""" + return subprocess.check_output(["git", "branch", "--show-current"], + cwd=self.work_dir, + universal_newlines=True).strip() + + def checkout_commit(self, commit_hash, quiet=True): + """Checkout a commit safely""" + args = ["git", "checkout", "-f"] + if quiet: + args.append("-q") + args.append(commit_hash) + try: + subprocess.check_output(args, cwd=self.work_dir, + stderr=subprocess.STDOUT) + + # Double-check if branch actually switched + branch = self.get_short_hash("HEAD") + if commit_hash != branch: + raise RuntimeError(f"Branch changed to '{branch}' instead of '{commit_hash}'") + + return True + except subprocess.CalledProcessError as e: + print(f"ERROR: Failed to checkout {commit_hash}: {e}", + file=sys.stderr) + return False + + +class CacheManager: + """Manages persistent cache directories""" + + def __init__(self, work_dir): + self.work_dir = work_dir + + def initialize(self): + """Create cache directories if they don't exist""" + for dir_path in DIR_NAME.values(): + abs_path = os.path.join(self.work_dir, dir_path) + if not os.path.exists(abs_path): + os.makedirs(abs_path, exist_ok=True, mode=0o755) + + def get_commit_cache(self, commit_hash, path): + """Generate cache path for a commit""" + hash_short = GitHelper(self.work_dir).get_short_hash(commit_hash) + if not hash_short: + hash_short = commit_hash + + return os.path.join(path, hash_short) + +class KernelDocRunner: + """Runs kernel-doc documentation generator""" + + def __init__(self, work_dir, kdoc_binary): + self.work_dir = work_dir + self.kdoc_binary = kdoc_binary + self.kdoc_files = None + + def find_kdoc_references(self): + """Find all files marked with kernel-doc:: directives""" + if self.kdoc_files: + print("Using cached Kdoc refs") + return self.kdoc_files + + print("Finding kernel-doc entries in Documentation...") + + files = os.path.join(self.work_dir, 'Documentation/**/*.rst') + pattern = re.compile(r"^\.\.\s+kernel-doc::\s*(\S+)") + kdoc_files = set() + + for file_path in iglob(files, recursive=True): + try: + with open(file_path, 'r', encoding='utf-8') as fp: + for line in fp: + match = pattern.match(line.strip()) + if match: + kdoc_files.add(match.group(1)) + + except OSError: + continue + + self.kdoc_files = list(kdoc_files) + + return self.kdoc_files + + def gen_yaml(self, yaml_file, kdoc_files): + """Runs kernel-doc to generate a yaml file with man and rst.""" + cmd = [self.kdoc_binary, "--man", "--rst", "--yaml", yaml_file] + cmd += kdoc_files + + print(f"YAML regression test file will be stored at: {yaml_file}") + + try: + subprocess.check_call(cmd, cwd=self.work_dir, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL) + except subprocess.CalledProcessError: + return False + + return True + + def run_unittest(self, yaml_file): + """Run unit tests with the generated yaml file""" + cmd = [KDOC_PARSER_TEST, "-q", "--yaml", yaml_file] + result = subprocess.run(cmd, cwd=self.work_dir) + + if result.returncode: + print("To check for problems, try to run it again with -v\n") + print("Use -k to filter results\n\n\t$", end="") + print(" ".join(cmd) + "\n") + + return True + + def normal_run(self, tmp_dir, output_dir, kdoc_files): + """Generate man, rst and errors, storing them at tmp_dir.""" + os.makedirs(tmp_dir, exist_ok=True) + + try: + with open(os.path.join(tmp_dir, "man.log"), "w", encoding="utf-8") as out: + subprocess.check_call([self.kdoc_binary, "--man"] + kdoc_files, + cwd=self.work_dir, + stdout=out, stderr=subprocess.DEVNULL) + + with open(os.path.join(tmp_dir, "rst.log"), "w", encoding="utf-8") as out: + with open(os.path.join(tmp_dir, "err.log"), "w", encoding="utf-8") as err: + subprocess.check_call([self.kdoc_binary, "--rst"] + kdoc_files, + cwd=self.work_dir, + stdout=out, stderr=err) + except subprocess.CalledProcessError: + return False + + if output_dir: + os.replace(tmp_dir, output_dir) + + return True + + def run(self, commit_hash, tmp_dir, output_dir, kdoc_files, is_regression, + is_end): + """Run kernel-doc on its several ways""" + if not kdoc_files: + raise RuntimeError("No kernel-doc references found") + + git_helper = GitHelper(self.work_dir) + if not git_helper.checkout_commit(commit_hash, quiet=True): + raise RuntimeError(f"ERROR: can't checkout commit {commit_hash}") + + print(f"Processing {commit_hash}...") + + if not is_regression: + return self.normal_run(tmp_dir, output_dir, kdoc_files) + + yaml_file = os.path.join(tmp_dir, YAML_NAME) + + if not is_end: + return self.gen_yaml(yaml_file, kdoc_files) + + return self.run_unittest(yaml_file) + +class DiffManager: + """Compare documentation output directories with an external diff.""" + def __init__(self, diff_tool="diff", diff_args=None): + self.diff_tool = diff_tool + # default: unified, no context, ignore whitespace changes + self.diff_args = diff_args or ["-u0", "-w"] + + def diff_directories(self, dir1, dir2): + """Compare two directories using an external diff.""" + print(f"\nDiffing {dir1} and {dir2}:") + + dir1_files = set() + dir2_files = set() + has_diff = False + + for root, _, files in os.walk(dir1): + for file in files: + dir1_files.add(os.path.relpath(os.path.join(root, file), dir1)) + for root, _, files in os.walk(dir2): + for file in files: + dir2_files.add(os.path.relpath(os.path.join(root, file), dir2)) + + common_files = sorted(dir1_files & dir2_files) + for file in common_files: + f1 = os.path.join(dir1, file) + f2 = os.path.join(dir2, file) + + cmd = [self.diff_tool] + self.diff_args + [f1, f2] + try: + result = subprocess.run( + cmd, capture_output=True, text=True, check=False + ) + if result.stdout: + has_diff = True + print(f"\n{file}") + print(result.stdout, end="") + except FileNotFoundError: + print(f"ERROR: {self.diff_tool} not found") + sys.exit(1) + + # Show files that exist only in one directory + only_in_dir1 = dir1_files - dir2_files + only_in_dir2 = dir2_files - dir1_files + if only_in_dir1 or only_in_dir2: + has_diff = True + print("\nDifferential files:") + for f in sorted(only_in_dir1): + print(f" - {f} (only in {dir1})") + for f in sorted(only_in_dir2): + print(f" + {f} (only in {dir2})") + + if not has_diff: + print("\nNo differences between those two commits") + + +class SignalHandler(): + """Signal handler class.""" + + def restore(self, force_exit=False): + """Restore original HEAD state.""" + if self.restored: + return + + print(f"Restoring original branch: {self.original_head}") + try: + subprocess.check_call( + ["git", "checkout", "-f", self.original_head], + cwd=self.git_helper.work_dir, + stderr=subprocess.STDOUT, + ) + except subprocess.CalledProcessError as e: + print(f"Failed to restore: {e}", file=sys.stderr) + + for sig, handler in self.old_handler.items(): + signal.signal(sig, handler) + + self.restored = True + + if force_exit: + sys.exit(1) + + def signal_handler(self, sig, _): + """Handle interrupt signals.""" + print(f"\nSignal {sig} received. Restoring original state...") + + self.restore(force_exit=True) + + def __enter__(self): + """Allow using it via with command.""" + for sig in [signal.SIGINT, signal.SIGTERM]: + self.old_handler[sig] = signal.getsignal(sig) + signal.signal(sig, self.signal_handler) + + return self + + def __exit__(self, *args): + """Restore signals at the end of with block.""" + self.restore() + + def __init__(self, git_helper, original_head): + self.git_helper = git_helper + self.original_head = original_head + self.old_handler = {} + self.restored = False + +def parse_commit_range(value): + """Handle a commit range.""" + if ".." not in value: + begin = value + end = "HEAD" + else: + begin, _, end = value.partition("..") + if not end: + end = "HEAD" + + if not begin: + raise argparse.ArgumentTypeError("Need a commit begginning") + + + print(f"Range: {begin} to {end}") + + return begin, end + + +def main(): + """Main code""" + parser = argparse.ArgumentParser(description="Compare kernel documentation between commits") + parser.add_argument("commits", type=parse_commit_range, + help="commit range like old..new") + parser.add_argument("files", nargs="*", + help="files to process – if supplied the --full flag is ignored") + + parser.add_argument("--full", "-f", action="store_true", + help="Force a full scan of Documentation/*") + + parser.add_argument("--regression", "-r", action="store_true", + help="Use YAML format to check for regressions") + + parser.add_argument("--work-dir", "-w", default=WORK_DIR, + help="work dir (default: %(default)s)") + + parser.add_argument("--clean", "-c", action="store_true", + help="Clean caches") + + args = parser.parse_args() + + if args.files and args.full: + raise argparse.ArgumentError(args.full, + "cannot combine '--full' with an explicit file list") + + work_dir = os.path.abspath(args.work_dir) + + # Initialize cache + cache = CacheManager(work_dir) + cache.initialize() + + # Validate git repository + git_helper = GitHelper(work_dir) + if not git_helper.is_inside_repository(): + raise RuntimeError("Must run inside Git repository") + + old_commit, new_commit = args.commits + + old_commit = git_helper.get_short_hash(old_commit) + new_commit = git_helper.get_short_hash(new_commit) + + # Validate commits + for commit in [old_commit, new_commit]: + if not git_helper.is_valid_commit(commit): + raise RuntimeError(f"Commit '{commit}' does not exist") + + # Check for uncommitted changes + if git_helper.has_uncommitted_changes(): + raise RuntimeError("Uncommitted changes present. Commit or stash first.") + + runner = KernelDocRunner(git_helper.work_dir, KDOC_BINARY) + + # Get files to be parsed + cache_msg = " (results will be cached)" + if args.full: + kdoc_files = ["."] + diff_type = "full" + print(f"Parsing all files at {work_dir}") + if not args.files: + diff_type = "partial" + kdoc_files = runner.find_kdoc_references() + print(f"Parsing files with kernel-doc markups at {work_dir}/Documentation") + else: + diff_type = "no-cache" + cache_msg = "" + kdoc_files = args.files + + tmp_dir = DIR_NAME["tmp"] + out_path = DIR_NAME[diff_type] + + if not args.regression: + print(f"Output will be stored at: {out_path}{cache_msg}") + + # Just in case - should never happen in practice + if not kdoc_files: + raise argparse.ArgumentError(args.files, + "No kernel-doc references found") + + original_head = git_helper.get_current_branch() + + old_cache = cache.get_commit_cache(old_commit, out_path) + new_cache = cache.get_commit_cache(new_commit, out_path) + + with SignalHandler(git_helper, original_head): + if args.clean or diff_type == "no-cache": + for cache_dir in [old_cache, new_cache]: + if cache_dir and os.path.exists(cache_dir): + shutil.rmtree(cache_dir) + + if args.regression or not os.path.exists(old_cache): + old_success = runner.run(old_commit, tmp_dir, old_cache, kdoc_files, + args.regression, False) + else: + old_success = True + + if args.regression or not os.path.exists(new_cache): + new_success = runner.run(new_commit, tmp_dir, new_cache, kdoc_files, + args.regression, True) + else: + new_success = True + + if not (old_success and new_success): + raise RuntimeError("Failed to generate documentation") + + if not args.regression: + diff_manager = DiffManager() + diff_manager.diff_directories(old_cache, new_cache) + +if __name__ == "__main__": + main()