summaryrefslogtreecommitdiff
path: root/tools/lib/python/kdoc/kdoc_parser.py
diff options
context:
space:
mode:
authorJonathan Corbet <corbet@lwn.net>2026-03-22 15:06:59 -0600
committerJonathan Corbet <corbet@lwn.net>2026-03-22 15:06:59 -0600
commit781171bec0650c00c642564afcb5cce57abda5bf (patch)
tree31e8cf6d41b40bef9f432ebbe4a8180f32af9fbf /tools/lib/python/kdoc/kdoc_parser.py
parent6108c809f4fd9dbb1a138ba4326d645cc3113a8d (diff)
parent7538df7a2d7d26428803cf8053476169a6d28659 (diff)
downloadlwn-781171bec0650c00c642564afcb5cce57abda5bf.tar.gz
lwn-781171bec0650c00c642564afcb5cce57abda5bf.zip
Merge branch 'mauro' into docs-mw
Mauro says: This patch series change how kdoc parser handles macro replacements. Instead of heavily relying on regular expressions that can sometimes be very complex, it uses a C lexical tokenizer. This ensures that BEGIN/END blocks on functions and structs are properly handled, even when nested. Checking before/after the patch series, for both man pages and rst only had: - whitespace differences; - struct_group macros now are shown as inner anonimous structs as it should be. Also, I didn't notice any relevant change on the documentation build time. With that regards, right now, every time a CMatch replacement rule takes in place, it does: for each transform: - tokenizes the source code; - handle CMatch; - convert tokens back to a string. A possible optimization would be to do, instead: - tokenizes source code; - for each transform handle CMatch; - convert tokens back to a string. For now, I opted not do do it, because: - too much changes on a single row; - docs build time is taking ~3:30 minutes, which is about the same time it ws taken before the changes; - there is a very dirty hack inside function_xforms: (KernRe(r"_noprof"), ""). This is meant to change function prototypes instead of function arguments. So, if ok for you, I would prefer to merge this one first. We can later optimize kdoc_parser to avoid multiple token <-> string conversions. - One important aspect of this series is that it introduces unittests for kernel-doc. I used it a lot during the development of this series, to ensure that the changes I was doing were producing the expected results. Tests are on two separate files that can be executed directly. Alternatively, there is a run.py script that runs all of them (and any other python script named tools/unittests/test_*.py"): $ tools/unittests/run.py test_cmatch: TestSearch: test_search_acquires_multiple: OK test_search_acquires_nested_paren: OK test_search_acquires_simple: OK test_search_must_hold: OK test_search_must_hold_shared: OK test_search_no_false_positive: OK test_search_no_function: OK test_search_no_macro_remains: OK TestSubMultipleMacros: test_acquires_multiple: OK test_acquires_nested_paren: OK test_acquires_simple: OK test_mixed_macros: OK test_must_hold: OK test_must_hold_shared: OK test_no_false_positive: OK test_no_function: OK test_no_macro_remains: OK TestSubSimple: test_rise_early_greedy: OK test_rise_multiple_greedy: OK test_strip_multiple_acquires: OK test_sub_count_parameter: OK test_sub_mixed_placeholders: OK test_sub_multiple_placeholders: OK test_sub_no_placeholder: OK test_sub_single_placeholder: OK test_sub_with_capture: OK test_sub_zero_placeholder: OK TestSubWithLocalXforms: test_functions_with_acquires_and_releases: OK test_raw_struct_group: OK test_raw_struct_group_tagged: OK test_struct_group: OK test_struct_group_attr: OK test_struct_group_tagged_with_private: OK test_struct_kcov: OK test_vars_stackdepot: OK test_tokenizer: TestPublicPrivate: test_balanced_inner_private: OK test_balanced_non_greddy_private: OK test_balanced_private: OK test_no private: OK test_unbalanced_inner_private: OK test_unbalanced_private: OK test_unbalanced_struct_group_tagged_with_private: OK test_unbalanced_two_struct_group_tagged_first_with_private: OK test_unbalanced_without_end_of_line: OK TestTokenizer: test_basic_tokens: OK test_depth_counters: OK test_mismatch_error: OK Ran 47 tests
Diffstat (limited to 'tools/lib/python/kdoc/kdoc_parser.py')
-rw-r--r--tools/lib/python/kdoc/kdoc_parser.py35
1 files changed, 20 insertions, 15 deletions
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index edf70ba139a5..f6c4ee3b18c9 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -13,7 +13,8 @@ import sys
import re
from pprint import pformat
-from kdoc.kdoc_re import NestedMatch, KernRe
+from kdoc.c_lex import CTokenizer, tokenizer_set_log
+from kdoc.kdoc_re import KernRe
from kdoc.kdoc_item import KdocItem
#
@@ -84,15 +85,9 @@ def trim_private_members(text):
"""
Remove ``struct``/``enum`` members that have been marked "private".
"""
- # First look for a "public:" block that ends a private region, then
- # handle the "private until the end" case.
- #
- text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text)
- text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text)
- #
- # We needed the comments to do the above, but now we can take them out.
- #
- return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip()
+
+ tokens = CTokenizer(text)
+ return str(tokens)
class state:
"""
@@ -258,6 +253,8 @@ class KernelDoc:
self.config = config
self.xforms = xforms
+ tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ")
+
# Initial state for the state machines
self.state = state.NORMAL
@@ -726,6 +723,7 @@ class KernelDoc:
#
# Do the basic parse to get the pieces of the declaration.
#
+ proto = trim_private_members(proto)
struct_parts = self.split_struct_proto(proto)
if not struct_parts:
self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!")
@@ -739,7 +737,6 @@ class KernelDoc:
#
# Go through the list of members applying all of our transformations.
#
- members = trim_private_members(members)
members = self.xforms.apply("struct", members)
#
@@ -766,6 +763,7 @@ class KernelDoc:
# Strip preprocessor directives. Note that this depends on the
# trailing semicolon we added in process_proto_type().
#
+ proto = trim_private_members(proto)
proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto)
#
# Parse out the name and members of the enum. Typedef form first.
@@ -773,7 +771,7 @@ class KernelDoc:
r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;')
if r.search(proto):
declaration_name = r.group(2)
- members = trim_private_members(r.group(1))
+ members = r.group(1)
#
# Failing that, look for a straight enum
#
@@ -781,7 +779,7 @@ class KernelDoc:
r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}')
if r.match(proto):
declaration_name = r.group(1)
- members = trim_private_members(r.group(2))
+ members = r.group(2)
#
# OK, this isn't going to work.
#
@@ -810,9 +808,10 @@ class KernelDoc:
member_set = set()
members = KernRe(r'\([^;)]*\)').sub('', members)
for arg in members.split(','):
- if not arg:
- continue
arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg)
+ if not arg.strip():
+ continue
+
self.entry.parameterlist.append(arg)
if arg not in self.entry.parameterdescs:
self.entry.parameterdescs[arg] = self.undescribed
@@ -1355,6 +1354,12 @@ class KernelDoc:
elif doc_content.search(line):
self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}")
self.state = state.PROTO
+
+ #
+ # Don't let it add partial comments at the code, as breaks the
+ # logic meant to remove comments from prototypes.
+ #
+ self.process_proto_type(ln, "/**\n" + line)
# else ... ??
def process_inline_text(self, ln, line):