diff options
| author | Jonathan Corbet <corbet@lwn.net> | 2026-03-22 15:06:59 -0600 |
|---|---|---|
| committer | Jonathan Corbet <corbet@lwn.net> | 2026-03-22 15:06:59 -0600 |
| commit | 781171bec0650c00c642564afcb5cce57abda5bf (patch) | |
| tree | 31e8cf6d41b40bef9f432ebbe4a8180f32af9fbf /tools/lib/python/kdoc/kdoc_parser.py | |
| parent | 6108c809f4fd9dbb1a138ba4326d645cc3113a8d (diff) | |
| parent | 7538df7a2d7d26428803cf8053476169a6d28659 (diff) | |
| download | lwn-781171bec0650c00c642564afcb5cce57abda5bf.tar.gz lwn-781171bec0650c00c642564afcb5cce57abda5bf.zip | |
Merge branch 'mauro' into docs-mw
Mauro says:
This patch series change how kdoc parser handles macro replacements.
Instead of heavily relying on regular expressions that can sometimes
be very complex, it uses a C lexical tokenizer. This ensures that
BEGIN/END blocks on functions and structs are properly handled,
even when nested.
Checking before/after the patch series, for both man pages and
rst only had:
- whitespace differences;
- struct_group macros now are shown as inner anonimous structs
as it should be.
Also, I didn't notice any relevant change on the documentation build
time. With that regards, right now, every time a CMatch replacement
rule takes in place, it does:
for each transform:
- tokenizes the source code;
- handle CMatch;
- convert tokens back to a string.
A possible optimization would be to do, instead:
- tokenizes source code;
- for each transform handle CMatch;
- convert tokens back to a string.
For now, I opted not do do it, because:
- too much changes on a single row;
- docs build time is taking ~3:30 minutes, which is
about the same time it ws taken before the changes;
- there is a very dirty hack inside function_xforms:
(KernRe(r"_noprof"), ""). This is meant to change
function prototypes instead of function arguments.
So, if ok for you, I would prefer to merge this one first. We can later
optimize kdoc_parser to avoid multiple token <-> string conversions.
-
One important aspect of this series is that it introduces unittests
for kernel-doc. I used it a lot during the development of this series,
to ensure that the changes I was doing were producing the expected
results. Tests are on two separate files that can be executed directly.
Alternatively, there is a run.py script that runs all of them (and
any other python script named tools/unittests/test_*.py"):
$ tools/unittests/run.py
test_cmatch:
TestSearch:
test_search_acquires_multiple: OK
test_search_acquires_nested_paren: OK
test_search_acquires_simple: OK
test_search_must_hold: OK
test_search_must_hold_shared: OK
test_search_no_false_positive: OK
test_search_no_function: OK
test_search_no_macro_remains: OK
TestSubMultipleMacros:
test_acquires_multiple: OK
test_acquires_nested_paren: OK
test_acquires_simple: OK
test_mixed_macros: OK
test_must_hold: OK
test_must_hold_shared: OK
test_no_false_positive: OK
test_no_function: OK
test_no_macro_remains: OK
TestSubSimple:
test_rise_early_greedy: OK
test_rise_multiple_greedy: OK
test_strip_multiple_acquires: OK
test_sub_count_parameter: OK
test_sub_mixed_placeholders: OK
test_sub_multiple_placeholders: OK
test_sub_no_placeholder: OK
test_sub_single_placeholder: OK
test_sub_with_capture: OK
test_sub_zero_placeholder: OK
TestSubWithLocalXforms:
test_functions_with_acquires_and_releases: OK
test_raw_struct_group: OK
test_raw_struct_group_tagged: OK
test_struct_group: OK
test_struct_group_attr: OK
test_struct_group_tagged_with_private: OK
test_struct_kcov: OK
test_vars_stackdepot: OK
test_tokenizer:
TestPublicPrivate:
test_balanced_inner_private: OK
test_balanced_non_greddy_private: OK
test_balanced_private: OK
test_no private: OK
test_unbalanced_inner_private: OK
test_unbalanced_private: OK
test_unbalanced_struct_group_tagged_with_private: OK
test_unbalanced_two_struct_group_tagged_first_with_private: OK
test_unbalanced_without_end_of_line: OK
TestTokenizer:
test_basic_tokens: OK
test_depth_counters: OK
test_mismatch_error: OK
Ran 47 tests
Diffstat (limited to 'tools/lib/python/kdoc/kdoc_parser.py')
| -rw-r--r-- | tools/lib/python/kdoc/kdoc_parser.py | 35 |
1 files changed, 20 insertions, 15 deletions
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index edf70ba139a5..f6c4ee3b18c9 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -13,7 +13,8 @@ import sys import re from pprint import pformat -from kdoc.kdoc_re import NestedMatch, KernRe +from kdoc.c_lex import CTokenizer, tokenizer_set_log +from kdoc.kdoc_re import KernRe from kdoc.kdoc_item import KdocItem # @@ -84,15 +85,9 @@ def trim_private_members(text): """ Remove ``struct``/``enum`` members that have been marked "private". """ - # First look for a "public:" block that ends a private region, then - # handle the "private until the end" case. - # - text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) - text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) - # - # We needed the comments to do the above, but now we can take them out. - # - return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() + + tokens = CTokenizer(text) + return str(tokens) class state: """ @@ -258,6 +253,8 @@ class KernelDoc: self.config = config self.xforms = xforms + tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ") + # Initial state for the state machines self.state = state.NORMAL @@ -726,6 +723,7 @@ class KernelDoc: # # Do the basic parse to get the pieces of the declaration. # + proto = trim_private_members(proto) struct_parts = self.split_struct_proto(proto) if not struct_parts: self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") @@ -739,7 +737,6 @@ class KernelDoc: # # Go through the list of members applying all of our transformations. # - members = trim_private_members(members) members = self.xforms.apply("struct", members) # @@ -766,6 +763,7 @@ class KernelDoc: # Strip preprocessor directives. Note that this depends on the # trailing semicolon we added in process_proto_type(). # + proto = trim_private_members(proto) proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) # # Parse out the name and members of the enum. Typedef form first. @@ -773,7 +771,7 @@ class KernelDoc: r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') if r.search(proto): declaration_name = r.group(2) - members = trim_private_members(r.group(1)) + members = r.group(1) # # Failing that, look for a straight enum # @@ -781,7 +779,7 @@ class KernelDoc: r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') if r.match(proto): declaration_name = r.group(1) - members = trim_private_members(r.group(2)) + members = r.group(2) # # OK, this isn't going to work. # @@ -810,9 +808,10 @@ class KernelDoc: member_set = set() members = KernRe(r'\([^;)]*\)').sub('', members) for arg in members.split(','): - if not arg: - continue arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) + if not arg.strip(): + continue + self.entry.parameterlist.append(arg) if arg not in self.entry.parameterdescs: self.entry.parameterdescs[arg] = self.undescribed @@ -1355,6 +1354,12 @@ class KernelDoc: elif doc_content.search(line): self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") self.state = state.PROTO + + # + # Don't let it add partial comments at the code, as breaks the + # logic meant to remove comments from prototypes. + # + self.process_proto_type(ln, "/**\n" + line) # else ... ?? def process_inline_text(self, ln, line): |
