From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 18:09:41 +0000 (+0100) Subject: docs: kdoc_parser: avoid tokenizing structs everytime X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=79d881beb721d27f679f0dc1cba2d5fe2d7f6d8d;p=thirdparty%2Fkernel%2Flinux.git docs: kdoc_parser: avoid tokenizing structs everytime Most of the rules inside CTransforms are of the type CMatch. Don't re-parse the source code every time. Doing this doesn't change the output, but makes kdoc almost as fast as before the tokenizer patches: # Before tokenizer patches $ time ./scripts/kernel-doc . -man >original 2>&1 real 0m42.933s user 0m36.523s sys 0m1.145s # After tokenizer patches $ time ./scripts/kernel-doc . -man >before 2>&1 real 1m29.853s user 1m23.974s sys 0m1.237s # After this patch $ time ./scripts/kernel-doc . -man >after 2>&1 real 0m48.579s user 0m45.938s sys 0m0.988s $ diff -s before after Files before and after are identical Manually checked the differences between original and after with: $ diff -U0 -prBw original after|grep -v Warning|grep -v "@@"|less They're due: - whitespace fixes; - struct_group are now better handled; - several badly-generated man pages from broken inline kernel-doc markups are now fixed. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <1cc2a4286ebf7d4b2d03fcaf42a1ba9fa09004b9.1773770483.git.mchehab+huawei@kernel.org> --- diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 8b2c9d0f0c583..f6c4ee3b18c90 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -737,7 +737,6 @@ class KernelDoc: # # Go through the list of members applying all of our transformations. # - members = trim_private_members(members) members = self.xforms.apply("struct", members) # diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py index 2056572852fd0..5a62d4a450cbc 100644 --- a/tools/lib/python/kdoc/xforms_lists.py +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -5,7 +5,7 @@ import re from kdoc.kdoc_re import KernRe -from kdoc.c_lex import CMatch +from kdoc.c_lex import CMatch, CTokenizer struct_args_pattern = r'([^,)]+)' @@ -16,6 +16,12 @@ class CTransforms: into something we can parse and generate kdoc for. """ + # + # NOTE: + # Due to performance reasons, place CMatch rules before KernRe, + # as this avoids running the C parser every time. + # + #: Transforms for structs and unions. struct_xforms = [ # Strip attributes @@ -124,13 +130,25 @@ class CTransforms: "var": var_xforms, } - def apply(self, xforms_type, text): + def apply(self, xforms_type, source): """ - Apply a set of transforms to a block of text. + Apply a set of transforms to a block of source. + + As tokenizer is used here, this function also remove comments + at the end. """ if xforms_type not in self.xforms: - return text + return source + + if isinstance(source, str): + source = CTokenizer(source) for search, subst in self.xforms[xforms_type]: - text = search.sub(subst, text) - return text + # + # KernRe only accept strings. + # + if isinstance(search, KernRe): + source = str(source) + + source = search.sub(subst, source) + return str(source)