diff options
Diffstat (limited to 'scripts')
l---------[-rwxr-xr-x] | scripts/kernel-doc | 2440 | ||||
-rwxr-xr-x | scripts/kernel-doc.pl | 2439 | ||||
-rwxr-xr-x | scripts/kernel-doc.py | 315 | ||||
-rw-r--r-- | scripts/lib/kdoc/kdoc_files.py | 282 | ||||
-rwxr-xr-x | scripts/lib/kdoc/kdoc_output.py | 793 | ||||
-rwxr-xr-x | scripts/lib/kdoc/kdoc_parser.py | 1715 | ||||
-rwxr-xr-x | scripts/lib/kdoc/kdoc_re.py | 273 |
7 files changed, 5818 insertions, 2439 deletions
diff --git a/scripts/kernel-doc b/scripts/kernel-doc index af6cf408b96d..3b6ef807791a 100755..120000 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1,2439 +1 @@ -#!/usr/bin/env perl -# SPDX-License-Identifier: GPL-2.0 -# vim: softtabstop=4 - -use warnings; -use strict; - -## Copyright (c) 1998 Michael Zucchi, All Rights Reserved ## -## Copyright (C) 2000, 1 Tim Waugh <twaugh@redhat.com> ## -## Copyright (C) 2001 Simon Huggins ## -## Copyright (C) 2005-2012 Randy Dunlap ## -## Copyright (C) 2012 Dan Luedtke ## -## ## -## #define enhancements by Armin Kuster <akuster@mvista.com> ## -## Copyright (c) 2000 MontaVista Software, Inc. ## -# -# Copyright (C) 2022 Tomasz Warniełło (POD) - -use Pod::Usage qw/pod2usage/; - -=head1 NAME - -kernel-doc - Print formatted kernel documentation to stdout - -=head1 SYNOPSIS - - kernel-doc [-h] [-v] [-Werror] [-Wall] [-Wreturn] [-Wshort-desc[ription]] [-Wcontents-before-sections] - [ -man | - -rst [-enable-lineno] | - -none - ] - [ - -export | - -internal | - [-function NAME] ... | - [-nosymbol NAME] ... - ] - [-no-doc-sections] - [-export-file FILE] ... - FILE ... - -Run `kernel-doc -h` for details. - -=head1 DESCRIPTION - -Read C language source or header FILEs, extract embedded documentation comments, -and print formatted documentation to standard output. - -The documentation comments are identified by the "/**" opening comment mark. - -See Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax. - -=cut - -# more perldoc at the end of the file - -## init lots of data - -my $errors = 0; -my $warnings = 0; -my $anon_struct_union = 0; - -# match expressions used to find embedded type information -my $type_constant = '\b``([^\`]+)``\b'; -my $type_constant2 = '\%([-_*\w]+)'; -my $type_func = '(\w+)\(\)'; -my $type_param = '\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)'; -my $type_param_ref = '([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)'; -my $type_fp_param = '\@(\w+)\(\)'; # Special RST handling for func ptr params -my $type_fp_param2 = '\@(\w+->\S+)\(\)'; # Special RST handling for structs with func ptr params -my $type_env = '(\$\w+)'; -my $type_enum = '\&(enum\s*([_\w]+))'; -my $type_struct = '\&(struct\s*([_\w]+))'; -my $type_typedef = '\&(typedef\s*([_\w]+))'; -my $type_union = '\&(union\s*([_\w]+))'; -my $type_member = '\&([_\w]+)(\.|->)([_\w]+)'; -my $type_fallback = '\&([_\w]+)'; -my $type_member_func = $type_member . '\(\)'; - -# Output conversion substitutions. -# One for each output format - -# these are pretty rough -my @highlights_man = ( - [$type_constant, "\$1"], - [$type_constant2, "\$1"], - [$type_func, "\\\\fB\$1\\\\fP"], - [$type_enum, "\\\\fI\$1\\\\fP"], - [$type_struct, "\\\\fI\$1\\\\fP"], - [$type_typedef, "\\\\fI\$1\\\\fP"], - [$type_union, "\\\\fI\$1\\\\fP"], - [$type_param, "\\\\fI\$1\\\\fP"], - [$type_param_ref, "\\\\fI\$1\$2\\\\fP"], - [$type_member, "\\\\fI\$1\$2\$3\\\\fP"], - [$type_fallback, "\\\\fI\$1\\\\fP"] - ); -my $blankline_man = ""; - -# rst-mode -my @highlights_rst = ( - [$type_constant, "``\$1``"], - [$type_constant2, "``\$1``"], - - # Note: need to escape () to avoid func matching later - [$type_member_func, "\\:c\\:type\\:`\$1\$2\$3\\\\(\\\\) <\$1>`"], - [$type_member, "\\:c\\:type\\:`\$1\$2\$3 <\$1>`"], - [$type_fp_param, "**\$1\\\\(\\\\)**"], - [$type_fp_param2, "**\$1\\\\(\\\\)**"], - [$type_func, "\$1()"], - [$type_enum, "\\:c\\:type\\:`\$1 <\$2>`"], - [$type_struct, "\\:c\\:type\\:`\$1 <\$2>`"], - [$type_typedef, "\\:c\\:type\\:`\$1 <\$2>`"], - [$type_union, "\\:c\\:type\\:`\$1 <\$2>`"], - - # in rst this can refer to any type - [$type_fallback, "\\:c\\:type\\:`\$1`"], - [$type_param_ref, "**\$1\$2**"] - ); -my $blankline_rst = "\n"; - -# read arguments -if ($#ARGV == -1) { - pod2usage( - -message => "No arguments!\n", - -exitval => 1, - -verbose => 99, - -sections => 'SYNOPSIS', - -output => \*STDERR, - ); -} - -my $kernelversion; - -my $dohighlight = ""; - -my $verbose = 0; -my $Werror = 0; -my $Wreturn = 0; -my $Wshort_desc = 0; -my $output_mode = "rst"; -my $output_preformatted = 0; -my $no_doc_sections = 0; -my $enable_lineno = 0; -my @highlights = @highlights_rst; -my $blankline = $blankline_rst; -my $modulename = "Kernel API"; - -use constant { - OUTPUT_ALL => 0, # output all symbols and doc sections - OUTPUT_INCLUDE => 1, # output only specified symbols - OUTPUT_EXPORTED => 2, # output exported symbols - OUTPUT_INTERNAL => 3, # output non-exported symbols -}; -my $output_selection = OUTPUT_ALL; -my $show_not_found = 0; # No longer used - -my @export_file_list; - -my @build_time; -if (defined($ENV{'KBUILD_BUILD_TIMESTAMP'}) && - (my $seconds = `date -d "${ENV{'KBUILD_BUILD_TIMESTAMP'}}" +%s`) ne '') { - @build_time = gmtime($seconds); -} else { - @build_time = localtime; -} - -my $man_date = ('January', 'February', 'March', 'April', 'May', 'June', - 'July', 'August', 'September', 'October', - 'November', 'December')[$build_time[4]] . - " " . ($build_time[5]+1900); - -# Essentially these are globals. -# They probably want to be tidied up, made more localised or something. -# CAVEAT EMPTOR! Some of the others I localised may not want to be, which -# could cause "use of undefined value" or other bugs. -my ($function, %function_table, %parametertypes, $declaration_purpose); -my %nosymbol_table = (); -my $declaration_start_line; -my ($type, $declaration_name, $return_type); -my ($newsection, $newcontents, $prototype, $brcount); - -if (defined($ENV{'KBUILD_VERBOSE'}) && $ENV{'KBUILD_VERBOSE'} =~ '1') { - $verbose = 1; -} - -if (defined($ENV{'KCFLAGS'})) { - my $kcflags = "$ENV{'KCFLAGS'}"; - - if ($kcflags =~ /(\s|^)-Werror(\s|$)/) { - $Werror = 1; - } -} - -# reading this variable is for backwards compat just in case -# someone was calling it with the variable from outside the -# kernel's build system -if (defined($ENV{'KDOC_WERROR'})) { - $Werror = "$ENV{'KDOC_WERROR'}"; -} -# other environment variables are converted to command-line -# arguments in cmd_checkdoc in the build system - -# Generated docbook code is inserted in a template at a point where -# docbook v3.1 requires a non-zero sequence of RefEntry's; see: -# https://www.oasis-open.org/docbook/documentation/reference/html/refentry.html -# We keep track of number of generated entries and generate a dummy -# if needs be to ensure the expanded template can be postprocessed -# into html. -my $section_counter = 0; - -my $lineprefix=""; - -# Parser states -use constant { - STATE_NORMAL => 0, # normal code - STATE_NAME => 1, # looking for function name - STATE_BODY_MAYBE => 2, # body - or maybe more description - STATE_BODY => 3, # the body of the comment - STATE_BODY_WITH_BLANK_LINE => 4, # the body, which has a blank line - STATE_PROTO => 5, # scanning prototype - STATE_DOCBLOCK => 6, # documentation block - STATE_INLINE => 7, # gathering doc outside main block -}; -my $state; -my $leading_space; - -# Inline documentation state -use constant { - STATE_INLINE_NA => 0, # not applicable ($state != STATE_INLINE) - STATE_INLINE_NAME => 1, # looking for member name (@foo:) - STATE_INLINE_TEXT => 2, # looking for member documentation - STATE_INLINE_END => 3, # done - STATE_INLINE_ERROR => 4, # error - Comment without header was found. - # Spit a warning as it's not - # proper kernel-doc and ignore the rest. -}; -my $inline_doc_state; - -#declaration types: can be -# 'function', 'struct', 'union', 'enum', 'typedef' -my $decl_type; - -# Name of the kernel-doc identifier for non-DOC markups -my $identifier; - -my $doc_start = '^/\*\*\s*$'; # Allow whitespace at end of comment start. -my $doc_end = '\*/'; -my $doc_com = '\s*\*\s*'; -my $doc_com_body = '\s*\* ?'; -my $doc_decl = $doc_com . '(\w+)'; -# @params and a strictly limited set of supported section names -# Specifically: -# Match @word: -# @...: -# @{section-name}: -# while trying to not match literal block starts like "example::" -# -my $doc_sect = $doc_com . - '\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$'; -my $doc_content = $doc_com_body . '(.*)'; -my $doc_block = $doc_com . 'DOC:\s*(.*)?'; -my $doc_inline_start = '^\s*/\*\*\s*$'; -my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; -my $doc_inline_end = '^\s*\*/\s*$'; -my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; -my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; -my $export_symbol_ns = '^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*;'; -my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; -my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; - -my %parameterdescs; -my %parameterdesc_start_lines; -my @parameterlist; -my %sections; -my @sectionlist; -my %section_start_lines; -my $sectcheck; -my $struct_actual; - -my $contents = ""; -my $new_start_line = 0; - -# the canonical section names. see also $doc_sect above. -my $section_default = "Description"; # default section -my $section_intro = "Introduction"; -my $section = $section_default; -my $section_context = "Context"; -my $section_return = "Return"; - -my $undescribed = "-- undescribed --"; - -reset_state(); - -while ($ARGV[0] =~ m/^--?(.*)/) { - my $cmd = $1; - shift @ARGV; - if ($cmd eq "man") { - $output_mode = "man"; - @highlights = @highlights_man; - $blankline = $blankline_man; - } elsif ($cmd eq "rst") { - $output_mode = "rst"; - @highlights = @highlights_rst; - $blankline = $blankline_rst; - } elsif ($cmd eq "none") { - $output_mode = "none"; - } elsif ($cmd eq "module") { # not needed for XML, inherits from calling document - $modulename = shift @ARGV; - } elsif ($cmd eq "function") { # to only output specific functions - $output_selection = OUTPUT_INCLUDE; - $function = shift @ARGV; - $function_table{$function} = 1; - } elsif ($cmd eq "nosymbol") { # Exclude specific symbols - my $symbol = shift @ARGV; - $nosymbol_table{$symbol} = 1; - } elsif ($cmd eq "export") { # only exported symbols - $output_selection = OUTPUT_EXPORTED; - %function_table = (); - } elsif ($cmd eq "internal") { # only non-exported symbols - $output_selection = OUTPUT_INTERNAL; - %function_table = (); - } elsif ($cmd eq "export-file") { - my $file = shift @ARGV; - push(@export_file_list, $file); - } elsif ($cmd eq "v") { - $verbose = 1; - } elsif ($cmd eq "Werror") { - $Werror = 1; - } elsif ($cmd eq "Wreturn") { - $Wreturn = 1; - } elsif ($cmd eq "Wshort-desc" or $cmd eq "Wshort-description") { - $Wshort_desc = 1; - } elsif ($cmd eq "Wall") { - $Wreturn = 1; - $Wshort_desc = 1; - } elsif (($cmd eq "h") || ($cmd eq "help")) { - pod2usage(-exitval => 0, -verbose => 2); - } elsif ($cmd eq 'no-doc-sections') { - $no_doc_sections = 1; - } elsif ($cmd eq 'enable-lineno') { - $enable_lineno = 1; - } elsif ($cmd eq 'show-not-found') { - $show_not_found = 1; # A no-op but don't fail - } else { - # Unknown argument - pod2usage( - -message => "Argument unknown!\n", - -exitval => 1, - -verbose => 99, - -sections => 'SYNOPSIS', - -output => \*STDERR, - ); - } - if ($#ARGV < 0){ - pod2usage( - -message => "FILE argument missing\n", - -exitval => 1, - -verbose => 99, - -sections => 'SYNOPSIS', - -output => \*STDERR, - ); - } -} - -# continue execution near EOF; - -sub findprog($) -{ - foreach(split(/:/, $ENV{PATH})) { - return "$_/$_[0]" if(-x "$_/$_[0]"); - } -} - -# get kernel version from env -sub get_kernel_version() { - my $version = 'unknown kernel version'; - - if (defined($ENV{'KERNELVERSION'})) { - $version = $ENV{'KERNELVERSION'}; - } - return $version; -} - -# -sub print_lineno { - my $lineno = shift; - if ($enable_lineno && defined($lineno)) { - print ".. LINENO " . $lineno . "\n"; - } -} - -sub emit_warning { - my $location = shift; - my $msg = shift; - print STDERR "$location: warning: $msg"; - ++$warnings; -} -## -# dumps section contents to arrays/hashes intended for that purpose. -# -sub dump_section { - my $file = shift; - my $name = shift; - my $contents = join "\n", @_; - - if ($name =~ m/$type_param/) { - $name = $1; - $parameterdescs{$name} = $contents; - $sectcheck = $sectcheck . $name . " "; - $parameterdesc_start_lines{$name} = $new_start_line; - $new_start_line = 0; - } elsif ($name eq "@\.\.\.") { - $name = "..."; - $parameterdescs{$name} = $contents; - $sectcheck = $sectcheck . $name . " "; - $parameterdesc_start_lines{$name} = $new_start_line; - $new_start_line = 0; - } else { - if (defined($sections{$name}) && ($sections{$name} ne "")) { - # Only warn on user specified duplicate section names. - if ($name ne $section_default) { - emit_warning("${file}:$.", "duplicate section name '$name'\n"); - } - $sections{$name} .= $contents; - } else { - $sections{$name} = $contents; - push @sectionlist, $name; - $section_start_lines{$name} = $new_start_line; - $new_start_line = 0; - } - } -} - -## -# dump DOC: section after checking that it should go out -# -sub dump_doc_section { - my $file = shift; - my $name = shift; - my $contents = join "\n", @_; - - if ($no_doc_sections) { - return; - } - - return if (defined($nosymbol_table{$name})); - - if (($output_selection == OUTPUT_ALL) || - (($output_selection == OUTPUT_INCLUDE) && - defined($function_table{$name}))) - { - dump_section($file, $name, $contents); - output_blockhead({'sectionlist' => \@sectionlist, - 'sections' => \%sections, - 'module' => $modulename, - 'content-only' => ($output_selection != OUTPUT_ALL), }); - } -} - -## -# output function -# -# parameterdescs, a hash. -# function => "function name" -# parameterlist => @list of parameters -# parameterdescs => %parameter descriptions -# sectionlist => @list of sections -# sections => %section descriptions -# - -sub output_highlight { - my $contents = join "\n",@_; - my $line; - -# DEBUG -# if (!defined $contents) { -# use Carp; -# confess "output_highlight got called with no args?\n"; -# } - -# print STDERR "contents b4:$contents\n"; - eval $dohighlight; - die $@ if $@; -# print STDERR "contents af:$contents\n"; - - foreach $line (split "\n", $contents) { - if (! $output_preformatted) { - $line =~ s/^\s*//; - } - if ($line eq ""){ - if (! $output_preformatted) { - print $lineprefix, $blankline; - } - } else { - if ($output_mode eq "man" && substr($line, 0, 1) eq ".") { - print "\\&$line"; - } else { - print $lineprefix, $line; - } - } - print "\n"; - } -} - -## -# output function in man -sub output_function_man(%) { - my %args = %{$_[0]}; - my ($parameter, $section); - my $count; - my $func_macro = $args{'func_macro'}; - my $paramcount = $#{$args{'parameterlist'}}; # -1 is empty - - print ".TH \"$args{'function'}\" 9 \"$args{'function'}\" \"$man_date\" \"Kernel Hacker's Manual\" LINUX\n"; - - print ".SH NAME\n"; - print $args{'function'} . " \\- " . $args{'purpose'} . "\n"; - - print ".SH SYNOPSIS\n"; - if ($args{'functiontype'} ne "") { - print ".B \"" . $args{'functiontype'} . "\" " . $args{'function'} . "\n"; - } else { - print ".B \"" . $args{'function'} . "\n"; - } - $count = 0; - my $parenth = "("; - my $post = ","; - foreach my $parameter (@{$args{'parameterlist'}}) { - if ($count == $#{$args{'parameterlist'}}) { - $post = ");"; - } - $type = $args{'parametertypes'}{$parameter}; - if ($type =~ m/$function_pointer/) { - # pointer-to-function - print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n"; - } else { - $type =~ s/([^\*])$/$1 /; - print ".BI \"" . $parenth . $type . "\" " . " \"" . $post . "\"\n"; - } - $count++; - $parenth = ""; - } - - $paramcount = $#{$args{'parameterlist'}}; # -1 is empty - if ($paramcount >= 0) { - print ".SH ARGUMENTS\n"; - } - foreach $parameter (@{$args{'parameterlist'}}) { - my $parameter_name = $parameter; - $parameter_name =~ s/\[.*//; - - print ".IP \"" . $parameter . "\" 12\n"; - output_highlight($args{'parameterdescs'}{$parameter_name}); - } - foreach $section (@{$args{'sectionlist'}}) { - print ".SH \"", uc $section, "\"\n"; - output_highlight($args{'sections'}{$section}); - } -} - -## -# output enum in man -sub output_enum_man(%) { - my %args = %{$_[0]}; - my ($parameter, $section); - my $count; - - print ".TH \"$args{'module'}\" 9 \"enum $args{'enum'}\" \"$man_date\" \"API Manual\" LINUX\n"; - - print ".SH NAME\n"; - print "enum " . $args{'enum'} . " \\- " . $args{'purpose'} . "\n"; - - print ".SH SYNOPSIS\n"; - print "enum " . $args{'enum'} . " {\n"; - $count = 0; - foreach my $parameter (@{$args{'parameterlist'}}) { - print ".br\n.BI \" $parameter\"\n"; - if ($count == $#{$args{'parameterlist'}}) { - print "\n};\n"; - last; - } else { - print ", \n.br\n"; - } - $count++; - } - - print ".SH Constants\n"; - foreach $parameter (@{$args{'parameterlist'}}) { - my $parameter_name = $parameter; - $parameter_name =~ s/\[.*//; - - print ".IP \"" . $parameter . "\" 12\n"; - output_highlight($args{'parameterdescs'}{$parameter_name}); - } - foreach $section (@{$args{'sectionlist'}}) { - print ".SH \"$section\"\n"; - output_highlight($args{'sections'}{$section}); - } -} - -## -# output struct in man -sub output_struct_man(%) { - my %args = %{$_[0]}; - my ($parameter, $section); - - print ".TH \"$args{'module'}\" 9 \"" . $args{'type'} . " " . $args{'struct'} . "\" \"$man_date\" \"API Manual\" LINUX\n"; - - print ".SH NAME\n"; - print $args{'type'} . " " . $args{'struct'} . " \\- " . $args{'purpose'} . "\n"; - - my $declaration = $args{'definition'}; - $declaration =~ s/\t/ /g; - $declaration =~ s/\n/"\n.br\n.BI \"/g; - print ".SH SYNOPSIS\n"; - print $args{'type'} . " " . $args{'struct'} . " {\n.br\n"; - print ".BI \"$declaration\n};\n.br\n\n"; - - print ".SH Members\n"; - foreach $parameter (@{$args{'parameterlist'}}) { - ($parameter =~ /^#/) && next; - - my $parameter_name = $parameter; - $parameter_name =~ s/\[.*//; - - ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next; - print ".IP \"" . $parameter . "\" 12\n"; - output_highlight($args{'parameterdescs'}{$parameter_name}); - } - foreach $section (@{$args{'sectionlist'}}) { - print ".SH \"$section\"\n"; - output_highlight($args{'sections'}{$section}); - } -} - -## -# output typedef in man -sub output_typedef_man(%) { - my %args = %{$_[0]}; - my ($parameter, $section); - - print ".TH \"$args{'module'}\" 9 \"$args{'typedef'}\" \"$man_date\" \"API Manual\" LINUX\n"; - - print ".SH NAME\n"; - print "typedef " . $args{'typedef'} . " \\- " . $args{'purpose'} . "\n"; - - foreach $section (@{$args{'sectionlist'}}) { - print ".SH \"$section\"\n"; - output_highlight($args{'sections'}{$section}); - } -} - -sub output_blockhead_man(%) { - my %args = %{$_[0]}; - my ($parameter, $section); - my $count; - - print ".TH \"$args{'module'}\" 9 \"$args{'module'}\" \"$man_date\" \"API Manual\" LINUX\n"; - - foreach $section (@{$args{'sectionlist'}}) { - print ".SH \"$section\"\n"; - output_highlight($args{'sections'}{$section}); - } -} - -## -# output in restructured text -# - -# -# This could use some work; it's used to output the DOC: sections, and -# starts by putting out the name of the doc section itself, but that tends -# to duplicate a header already in the template file. -# -sub output_blockhead_rst(%) { - my %args = %{$_[0]}; - my ($parameter, $section); - - foreach $section (@{$args{'sectionlist'}}) { - next if (defined($nosymbol_table{$section})); - - if ($output_selection != OUTPUT_INCLUDE) { - print ".. _$section:\n\n"; - print "**$section**\n\n"; - } - print_lineno($section_start_lines{$section}); - output_highlight_rst($args{'sections'}{$section}); - print "\n"; - } -} - -# -# Apply the RST highlights to a sub-block of text. -# -sub highlight_block($) { - # The dohighlight kludge requires the text be called $contents - my $contents = shift; - eval $dohighlight; - die $@ if $@; - return $contents; -} - -# -# Regexes used only here. -# -my $sphinx_literal = '^[^.].*::$'; -my $sphinx_cblock = '^\.\.\ +code-block::'; - -sub output_highlight_rst { - my $input = join "\n",@_; - my $output = ""; - my $line; - my $in_literal = 0; - my $litprefix; - my $block = ""; - - foreach $line (split "\n",$input) { - # - # If we're in a literal block, see if we should drop out - # of it. Otherwise pass the line straight through unmunged. - # - if ($in_literal) { - if (! ($line =~ /^\s*$/)) { - # - # If this is the first non-blank line in a literal - # block we need to figure out what the proper indent is. - # - if ($litprefix eq "") { - $line =~ /^(\s*)/; - $litprefix = '^' . $1; - $output .= $line . "\n"; - } elsif (! ($line =~ /$litprefix/)) { - $in_literal = 0; - } else { - $output .= $line . "\n"; - } - } else { - $output .= $line . "\n"; - } - } - # - # Not in a literal block (or just dropped out) - # - if (! $in_literal) { - $block .= $line . "\n"; - if (($line =~ /$sphinx_literal/) || ($line =~ /$sphinx_cblock/)) { - $in_literal = 1; - $litprefix = ""; - $output .= highlight_block($block); - $block = "" - } - } - } - - if ($block) { - $output .= highlight_block($block); - } - - $output =~ s/^\n+//g; - $output =~ s/\n+$//g; - - foreach $line (split "\n", $output) { - print $lineprefix . $line . "\n"; - } -} - -sub output_function_rst(%) { - my %args = %{$_[0]}; - my ($parameter, $section); - my $oldprefix = $lineprefix; - - my $signature = ""; - my $func_macro = $args{'func_macro'}; - my $paramcount = $#{$args{'parameterlist'}}; # -1 is empty - - if ($func_macro) { - $signature = $args{'function'}; - } else { - if ($args{'functiontype'}) { - $signature = $args{'functiontype'} . " "; - } - $signature .= $args{'function'} . " ("; - } - - my $count = 0; - foreach my $parameter (@{$args{'parameterlist'}}) { - if ($count ne 0) { - $signature .= ", "; - } - $count++; - $type = $args{'parametertypes'}{$parameter}; - - if ($type =~ m/$function_pointer/) { - # pointer-to-function - $signature .= $1 . $parameter . ") (" . $2 . ")"; - } else { - $signature .= $type; - } - } - - if (!$func_macro) { - $signature .= ")"; - } - - if ($args{'typedef'} || $args{'functiontype'} eq "") { - print ".. c:macro:: ". $args{'function'} . "\n\n"; - - if ($args{'typedef'}) { - print_lineno($declaration_start_line); - print " **Typedef**: "; - $lineprefix = ""; - output_highlight_rst($args{'purpose'}); - print "\n\n**Syntax**\n\n"; - print " ``$signature``\n\n"; - } else { - print "``$signature``\n\n"; - } - } else { - print ".. c:function:: $signature\n\n"; - } - - if (!$args{'typedef'}) { - print_lineno($declaration_start_line); - $lineprefix = " "; - output_highlight_rst($args{'purpose'}); - print "\n"; - } - - # - # Put our descriptive text into a container (thus an HTML <div>) to help - # set the function prototypes apart. - # - $lineprefix = " "; - if ($paramcount >= 0) { - print ".. container:: kernelindent\n\n"; - print $lineprefix . "**Parameters**\n\n"; - } - foreach $parameter (@{$args{'parameterlist'}}) { - my $parameter_name = $parameter; - $parameter_name =~ s/\[.*//; - $type = $args{'parametertypes'}{$parameter}; - - if ($type ne "") { - print $lineprefix . "``$type``\n"; - } else { - print $lineprefix . "``$parameter``\n"; - } - - print_lineno($parameterdesc_start_lines{$parameter_name}); - - $lineprefix = " "; - if (defined($args{'parameterdescs'}{$parameter_name}) && - $args{'parameterdescs'}{$parameter_name} ne $undescribed) { - output_highlight_rst($args{'parameterdescs'}{$parameter_name}); - } else { - print $lineprefix . "*undescribed*\n"; - } - $lineprefix = " "; - print "\n"; - } - - output_section_rst(@_); - $lineprefix = $oldprefix; -} - -sub output_section_rst(%) { - my %args = %{$_[0]}; - my $section; - my $oldprefix = $lineprefix; - - foreach $section (@{$args{'sectionlist'}}) { - print $lineprefix . "**$section**\n\n"; - print_lineno($section_start_lines{$section}); - output_highlight_rst($args{'sections'}{$section}); - print "\n"; - } - print "\n"; -} - -sub output_enum_rst(%) { - my %args = %{$_[0]}; - my ($parameter); - my $oldprefix = $lineprefix; - my $count; - my $outer; - - my $name = $args{'enum'}; - print "\n\n.. c:enum:: " . $name . "\n\n"; - - print_lineno($declaration_start_line); - $lineprefix = " "; - output_highlight_rst($args{'purpose'}); - print "\n"; - - print ".. container:: kernelindent\n\n"; - $outer = $lineprefix . " "; - $lineprefix = $outer . " "; - print $outer . "**Constants**\n\n"; - foreach $parameter (@{$args{'parameterlist'}}) { - print $outer . "``$parameter``\n"; - - if ($args{'parameterdescs'}{$parameter} ne $undescribed) { - output_highlight_rst($args{'parameterdescs'}{$parameter}); - } else { - print $lineprefix . "*undescribed*\n"; - } - print "\n"; - } - print "\n"; - $lineprefix = $oldprefix; - output_section_rst(@_); -} - -sub output_typedef_rst(%) { - my %args = %{$_[0]}; - my ($parameter); - my $oldprefix = $lineprefix; - my $name; - - $name = $args{'typedef'}; - - print "\n\n.. c:type:: " . $name . "\n\n"; - print_lineno($declaration_start_line); - $lineprefix = " "; - output_highlight_rst($args{'purpose'}); - print "\n"; - - $lineprefix = $oldprefix; - output_section_rst(@_); -} - -sub output_struct_rst(%) { - my %args = %{$_[0]}; - my ($parameter); - my $oldprefix = $lineprefix; - - my $name = $args{'struct'}; - if ($args{'type'} eq 'union') { - print "\n\n.. c:union:: " . $name . "\n\n"; - } else { - print "\n\n.. c:struct:: " . $name . "\n\n"; - } - - print_lineno($declaration_start_line); - $lineprefix = " "; - output_highlight_rst($args{'purpose'}); - print "\n"; - - print ".. container:: kernelindent\n\n"; - print $lineprefix . "**Definition**::\n\n"; - my $declaration = $args{'definition'}; - $lineprefix = $lineprefix . " "; - $declaration =~ s/\t/$lineprefix/g; - print $lineprefix . $args{'type'} . " " . $args{'struct'} . " {\n$declaration" . $lineprefix . "};\n\n"; - - $lineprefix = " "; - print $lineprefix . "**Members**\n\n"; - foreach $parameter (@{$args{'parameterlist'}}) { - ($parameter =~ /^#/) && next; - - my $parameter_name = $parameter; - $parameter_name =~ s/\[.*//; - - ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next; - $type = $args{'parametertypes'}{$parameter}; - print_lineno($parameterdesc_start_lines{$parameter_name}); - print $lineprefix . "``" . $parameter . "``\n"; - $lineprefix = " "; - output_highlight_rst($args{'parameterdescs'}{$parameter_name}); - $lineprefix = " "; - print "\n"; - } - print "\n"; - - $lineprefix = $oldprefix; - output_section_rst(@_); -} - -## none mode output functions - -sub output_function_none(%) { -} - -sub output_enum_none(%) { -} - -sub output_typedef_none(%) { -} - -sub output_struct_none(%) { -} - -sub output_blockhead_none(%) { -} - -## -# generic output function for all types (function, struct/union, typedef, enum); -# calls the generated, variable output_ function name based on -# functype and output_mode -sub output_declaration { - no strict 'refs'; - my $name = shift; - my $functype = shift; - my $func = "output_${functype}_$output_mode"; - - return if (defined($nosymbol_table{$name})); - - if (($output_selection == OUTPUT_ALL) || - (($output_selection == OUTPUT_INCLUDE || - $output_selection == OUTPUT_EXPORTED) && - defined($function_table{$name})) || - ($output_selection == OUTPUT_INTERNAL && - !($functype eq "function" && defined($function_table{$name})))) - { - &$func(@_); - $section_counter++; - } -} - -## -# generic output function - calls the right one based on current output mode. -sub output_blockhead { - no strict 'refs'; - my $func = "output_blockhead_" . $output_mode; - &$func(@_); - $section_counter++; -} - -## -# takes a declaration (struct, union, enum, typedef) and -# invokes the right handler. NOT called for functions. -sub dump_declaration($$) { - no strict 'refs'; - my ($prototype, $file) = @_; - my $func = "dump_" . $decl_type; - &$func(@_); -} - -sub dump_union($$) { - dump_struct(@_); -} - -sub dump_struct($$) { - my $x = shift; - my $file = shift; - my $decl_type; - my $members; - my $type = qr{struct|union}; - # For capturing struct/union definition body, i.e. "{members*}qualifiers*" - my $qualifiers = qr{$attribute|__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned}; - my $definition_body = qr{\{(.*)\}\s*$qualifiers*}; - my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;}; - - if ($x =~ /($type)\s+(\w+)\s*$definition_body/) { - $decl_type = $1; - $declaration_name = $2; - $members = $3; - } elsif ($x =~ /typedef\s+($type)\s*$definition_body\s*(\w+)\s*;/) { - $decl_type = $1; - $declaration_name = $3; - $members = $2; - } - - if ($members) { - if ($identifier ne $declaration_name) { - emit_warning("${file}:$.", "expecting prototype for $decl_type $identifier. Prototype was for $decl_type $declaration_name instead\n"); - return; - } - - # ignore members marked private: - $members =~ s/\/\*\s*private:.*?\/\*\s*public:.*?\*\///gosi; - $members =~ s/\/\*\s*private:.*//gosi; - # strip comments: - $members =~ s/\/\*.*?\*\///gos; - # strip attributes - $members =~ s/\s*$attribute/ /gi; - $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; - $members =~ s/\s*__counted_by\s*\([^;]*\)/ /gos; - $members =~ s/\s*__counted_by_(le|be)\s*\([^;]*\)/ /gos; - $members =~ s/\s*__packed\s*/ /gos; - $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; - $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; - $members =~ s/\s*____cacheline_aligned/ /gos; - # unwrap struct_group(): - # - first eat non-declaration parameters and rewrite for final match - # - then remove macro, outer parens, and trailing semicolon - $members =~ s/\bstruct_group\s*\(([^,]*,)/STRUCT_GROUP(/gos; - $members =~ s/\bstruct_group_attr\s*\(([^,]*,){2}/STRUCT_GROUP(/gos; - $members =~ s/\bstruct_group_tagged\s*\(([^,]*),([^,]*),/struct $1 $2; STRUCT_GROUP(/gos; - $members =~ s/\b__struct_group\s*\(([^,]*,){3}/STRUCT_GROUP(/gos; - $members =~ s/\bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;/$2/gos; - - my $args = qr{([^,)]+)}; - # replace DECLARE_BITMAP - $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; - $members =~ s/DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, PHY_INTERFACE_MODE_MAX)/gos; - $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; - # replace DECLARE_HASHTABLE - $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos; - # replace DECLARE_KFIFO - $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos; - # replace DECLARE_KFIFO_PTR - $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos; - # replace DECLARE_FLEX_ARRAY - $members =~ s/(?:__)?DECLARE_FLEX_ARRAY\s*\($args,\s*$args\)/$1 $2\[\]/gos; - #replace DEFINE_DMA_UNMAP_ADDR - $members =~ s/DEFINE_DMA_UNMAP_ADDR\s*\($args\)/dma_addr_t $1/gos; - #replace DEFINE_DMA_UNMAP_LEN - $members =~ s/DEFINE_DMA_UNMAP_LEN\s*\($args\)/__u32 $1/gos; - my $declaration = $members; - - # Split nested struct/union elements as newer ones - while ($members =~ m/$struct_members/) { - my $newmember; - my $maintype = $1; - my $ids = $4; - my $content = $3; - foreach my $id(split /,/, $ids) { - $newmember .= "$maintype $id; "; - - $id =~ s/[:\[].*//; - $id =~ s/^\s*\**(\S+)\s*/$1/; - foreach my $arg (split /;/, $content) { - next if ($arg =~ m/^\s*$/); - if ($arg =~ m/^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)/) { - # pointer-to-function - my $type = $1; - my $name = $2; - my $extra = $3; - next if (!$name); - if ($id =~ m/^\s*$/) { - # anonymous struct/union - $newmember .= "$type$name$extra; "; - } else { - $newmember .= "$type$id.$name$extra; "; - } - } else { - my $type; - my $names; - $arg =~ s/^\s+//; - $arg =~ s/\s+$//; - # Handle bitmaps - $arg =~ s/:\s*\d+\s*//g; - # Handle arrays - $arg =~ s/\[.*\]//g; - # The type may have multiple words, - # and multiple IDs can be defined, like: - # const struct foo, *bar, foobar - # So, we remove spaces when parsing the - # names, in order to match just names - # and commas for the names - $arg =~ s/\s*,\s*/,/g; - if ($arg =~ m/(.*)\s+([\S+,]+)/) { - $type = $1; - $names = $2; - } else { - $newmember .= "$arg; "; - next; - } - foreach my $name (split /,/, $names) { - $name =~ s/^\s*\**(\S+)\s*/$1/; - next if (($name =~ m/^\s*$/)); - if ($id =~ m/^\s*$/) { - # anonymous struct/union - $newmember .= "$type $name; "; - } else { - $newmember .= "$type $id.$name; "; - } - } - } - } - } - $members =~ s/$struct_members/$newmember/; - } - - # Ignore other nested elements, like enums - $members =~ s/(\{[^\{\}]*\})//g; - - create_parameterlist($members, ';', $file, $declaration_name); - check_sections($file, $declaration_name, $decl_type, $sectcheck, $struct_actual); - - # Adjust declaration for better display - $declaration =~ s/([\{;])/$1\n/g; - $declaration =~ s/\}\s+;/};/g; - # Better handle inlined enums - do {} while ($declaration =~ s/(enum\s+\{[^\}]+),([^\n])/$1,\n$2/); - - my @def_args = split /\n/, $declaration; - my $level = 1; - $declaration = ""; - foreach my $clause (@def_args) { - $clause =~ s/^\s+//; - $clause =~ s/\s+$//; - $clause =~ s/\s+/ /; - next if (!$clause); - $level-- if ($clause =~ m/(\})/ && $level > 1); - if (!($clause =~ m/^\s*#/)) { - $declaration .= "\t" x $level; - } - $declaration .= "\t" . $clause . "\n"; - $level++ if ($clause =~ m/(\{)/ && !($clause =~m/\}/)); - } - output_declaration($declaration_name, - 'struct', - {'struct' => $declaration_name, - 'module' => $modulename, - 'definition' => $declaration, - 'parameterlist' => \@parameterlist, - 'parameterdescs' => \%parameterdescs, - 'parametertypes' => \%parametertypes, - 'sectionlist' => \@sectionlist, - 'sections' => \%sections, - 'purpose' => $declaration_purpose, - 'type' => $decl_type - }); - } else { - print STDERR "${file}:$.: error: Cannot parse struct or union!\n"; - ++$errors; - } -} - - -sub show_warnings($$) { - my $functype = shift; - my $name = shift; - - return 0 if (defined($nosymbol_table{$name})); - - return 1 if ($output_selection == OUTPUT_ALL); - - if ($output_selection == OUTPUT_EXPORTED) { - if (defined($function_table{$name})) { - return 1; - } else { - return 0; - } - } - if ($output_selection == OUTPUT_INTERNAL) { - if (!($functype eq "function" && defined($function_table{$name}))) { - return 1; - } else { - return 0; - } - } - if ($output_selection == OUTPUT_INCLUDE) { - if (defined($function_table{$name})) { - return 1; - } else { - return 0; - } - } - die("Please add the new output type at show_warnings()"); -} - -sub dump_enum($$) { - my $x = shift; - my $file = shift; - my $members; - - # ignore members marked private: - $x =~ s/\/\*\s*private:.*?\/\*\s*public:.*?\*\///gosi; - $x =~ s/\/\*\s*private:.*}/}/gosi; - - $x =~ s@/\*.*?\*/@@gos; # strip comments. - # strip #define macros inside enums - $x =~ s@#\s*((define|ifdef|if)\s+|endif)[^;]*;@@gos; - - if ($x =~ /typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;/) { - $declaration_name = $2; - $members = $1; - } elsif ($x =~ /enum\s+(\w*)\s*\{(.*)\}/) { - $declaration_name = $1; - $members = $2; - } - - if ($members) { - if ($identifier ne $declaration_name) { - if ($identifier eq "") { - emit_warning("${file}:$.", "wrong kernel-doc identifier on line:\n"); - } else { - emit_warning("${file}:$.", "expecting prototype for enum $identifier. Prototype was for enum $declaration_name instead\n"); - } - return; - } - $declaration_name = "(anonymous)" if ($declaration_name eq ""); - - my %_members; - - $members =~ s/\s+$//; - $members =~ s/\([^;]*?[\)]//g; - - foreach my $arg (split ',', $members) { - $arg =~ s/^\s*(\w+).*/$1/; - push @parameterlist, $arg; - if (!$parameterdescs{$arg}) { - $parameterdescs{$arg} = $undescribed; - if (show_warnings("enum", $declaration_name)) { - emit_warning("${file}:$.", "Enum value '$arg' not described in enum '$declaration_name'\n"); - } - } - $_members{$arg} = 1; - } - - while (my ($k, $v) = each %parameterdescs) { - if (!exists($_members{$k})) { - if (show_warnings("enum", $declaration_name)) { - emit_warning("${file}:$.", "Excess enum value '$k' description in '$declaration_name'\n"); - } - } - } - - output_declaration($declaration_name, - 'enum', - {'enum' => $declaration_name, - 'module' => $modulename, - 'parameterlist' => \@parameterlist, - 'parameterdescs' => \%parameterdescs, - 'sectionlist' => \@sectionlist, - 'sections' => \%sections, - 'purpose' => $declaration_purpose - }); - } else { - print STDERR "${file}:$.: error: Cannot parse enum!\n"; - ++$errors; - } -} - -my $typedef_type = qr { ((?:\s+[\w\*]+\b){1,8})\s* }x; -my $typedef_ident = qr { \*?\s*(\w\S+)\s* }x; -my $typedef_args = qr { \s*\((.*)\); }x; - -my $typedef1 = qr { typedef$typedef_type\($typedef_ident\)$typedef_args }x; -my $typedef2 = qr { typedef$typedef_type$typedef_ident$typedef_args }x; - -sub dump_typedef($$) { - my $x = shift; - my $file = shift; - - $x =~ s@/\*.*?\*/@@gos; # strip comments. - - # Parse function typedef prototypes - if ($x =~ $typedef1 || $x =~ $typedef2) { - $return_type = $1; - $declaration_name = $2; - my $args = $3; - $return_type =~ s/^\s+//; - - if ($identifier ne $declaration_name) { - emit_warning("${file}:$.", "expecting prototype for typedef $identifier. Prototype was for typedef $declaration_name instead\n"); - return; - } - - create_parameterlist($args, ',', $file, $declaration_name); - - output_declaration($declaration_name, - 'function', - {'function' => $declaration_name, - 'typedef' => 1, - 'module' => $modulename, - 'functiontype' => $return_type, - 'parameterlist' => \@parameterlist, - 'parameterdescs' => \%parameterdescs, - 'parametertypes' => \%parametertypes, - 'sectionlist' => \@sectionlist, - 'sections' => \%sections, - 'purpose' => $declaration_purpose - }); - return; - } - - while (($x =~ /\(*.\)\s*;$/) || ($x =~ /\[*.\]\s*;$/)) { - $x =~ s/\(*.\)\s*;$/;/; - $x =~ s/\[*.\]\s*;$/;/; - } - - if ($x =~ /typedef.*\s+(\w+)\s*;/) { - $declaration_name = $1; - - if ($identifier ne $declaration_name) { - emit_warning("${file}:$.", "expecting prototype for typedef $identifier. Prototype was for typedef $declaration_name instead\n"); - return; - } - - output_declaration($declaration_name, - 'typedef', - {'typedef' => $declaration_name, - 'module' => $modulename, - 'sectionlist' => \@sectionlist, - 'sections' => \%sections, - 'purpose' => $declaration_purpose - }); - } else { - print STDERR "${file}:$.: error: Cannot parse typedef!\n"; - ++$errors; - } -} - -sub save_struct_actual($) { - my $actual = shift; - - # strip all spaces from the actual param so that it looks like one string item - $actual =~ s/\s*//g; - $struct_actual = $struct_actual . $actual . " "; -} - -sub create_parameterlist($$$$) { - my $args = shift; - my $splitter = shift; - my $file = shift; - my $declaration_name = shift; - my $type; - my $param; - - # temporarily replace commas inside function pointer definition - my $arg_expr = qr{\([^\),]+}; - while ($args =~ /$arg_expr,/) { - $args =~ s/($arg_expr),/$1#/g; - } - - foreach my $arg (split($splitter, $args)) { - # strip comments - $arg =~ s/\/\*.*\*\///; - # ignore argument attributes - $arg =~ s/\sPOS0?\s/ /; - # strip leading/trailing spaces - $arg =~ s/^\s*//; - $arg =~ s/\s*$//; - $arg =~ s/\s+/ /; - - if ($arg =~ /^#/) { - # Treat preprocessor directive as a typeless variable just to fill - # corresponding data structures "correctly". Catch it later in - # output_* subs. - push_parameter($arg, "", "", $file); - } elsif ($arg =~ m/\(.+\)\s*\(/) { - # pointer-to-function - $arg =~ tr/#/,/; - $arg =~ m/[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)/; - $param = $1; - $type = $arg; - $type =~ s/([^\(]+\(\*?)\s*$param/$1/; - save_struct_actual($param); - push_parameter($param, $type, $arg, $file, $declaration_name); - } elsif ($arg =~ m/\(.+\)\s*\[/) { - # array-of-pointers - $arg =~ tr/#/,/; - $arg =~ m/[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)/; - $param = $1; - $type = $arg; - $type =~ s/([^\(]+\(\*?)\s*$param/$1/; - save_struct_actual($param); - push_parameter($param, $type, $arg, $file, $declaration_name); - } elsif ($arg) { - $arg =~ s/\s*:\s*/:/g; - $arg =~ s/\s*\[/\[/g; - - my @args = split('\s*,\s*', $arg); - if ($args[0] =~ m/\*/) { - $args[0] =~ s/(\*+)\s*/ $1/; - } - - my @first_arg; - if ($args[0] =~ /^(.*\s+)(.*?\[.*\].*)$/) { - shift @args; - push(@first_arg, split('\s+', $1)); - push(@first_arg, $2); - } else { - @first_arg = split('\s+', shift @args); - } - - unshift(@args, pop @first_arg); - $type = join " ", @first_arg; - - foreach $param (@args) { - if ($param =~ m/^(\*+)\s*(.*)/) { - save_struct_actual($2); - - push_parameter($2, "$type $1", $arg, $file, $declaration_name); - } elsif ($param =~ m/(.*?):(\w+)/) { - if ($type ne "") { # skip unnamed bit-fields - save_struct_actual($1); - push_parameter($1, "$type:$2", $arg, $file, $declaration_name) - } - } else { - save_struct_actual($param); - push_parameter($param, $type, $arg, $file, $declaration_name); - } - } - } - } -} - -sub push_parameter($$$$$) { - my $param = shift; - my $type = shift; - my $org_arg = shift; - my $file = shift; - my $declaration_name = shift; - - if (($anon_struct_union == 1) && ($type eq "") && - ($param eq "}")) { - return; # ignore the ending }; from anon. struct/union - } - - $anon_struct_union = 0; - $param =~ s/[\[\)].*//; - - if ($type eq "" && $param =~ /\.\.\.$/) - { - if (!$param =~ /\w\.\.\.$/) { - # handles unnamed variable parameters - $param = "..."; - } elsif ($param =~ /\w\.\.\.$/) { - # for named variable parameters of the form `x...`, remove the dots - $param =~ s/\.\.\.$//; - } - if (!defined $parameterdescs{$param} || $parameterdescs{$param} eq "") { - $parameterdescs{$param} = "variable arguments"; - } - } - elsif ($type eq "" && ($param eq "" or $param eq "void")) - { - $param="void"; - $parameterdescs{void} = "no arguments"; - } - elsif ($type eq "" && ($param eq "struct" or $param eq "union")) - # handle unnamed (anonymous) union or struct: - { - $type = $param; - $param = "{unnamed_" . $param . "}"; - $parameterdescs{$param} = "anonymous\n"; - $anon_struct_union = 1; - } - elsif ($param =~ "__cacheline_group" ) - # handle cache group enforcing variables: they do not need be described in header files - { - return; # ignore __cacheline_group_begin and __cacheline_group_end - } - - # warn if parameter has no description - # (but ignore ones starting with # as these are not parameters - # but inline preprocessor statements); - # Note: It will also ignore void params and unnamed structs/unions - if (!defined $parameterdescs{$param} && $param !~ /^#/) { - $parameterdescs{$param} = $undescribed; - - if (show_warnings($type, $declaration_name) && $param !~ /\./) { - emit_warning("${file}:$.", "Function parameter or struct member '$param' not described in '$declaration_name'\n"); - } - } - - # strip spaces from $param so that it is one continuous string - # on @parameterlist; - # this fixes a problem where check_sections() cannot find - # a parameter like "addr[6 + 2]" because it actually appears - # as "addr[6", "+", "2]" on the parameter list; - # but it's better to maintain the param string unchanged for output, - # so just weaken the string compare in check_sections() to ignore - # "[blah" in a parameter string; - ###$param =~ s/\s*//g; - push @parameterlist, $param; - $org_arg =~ s/\s\s+/ /g; - $parametertypes{$param} = $org_arg; -} - -sub check_sections($$$$$) { - my ($file, $decl_name, $decl_type, $sectcheck, $prmscheck) = @_; - my @sects = split ' ', $sectcheck; - my @prms = split ' ', $prmscheck; - my $err; - my ($px, $sx); - my $prm_clean; # strip trailing "[array size]" and/or beginning "*" - - foreach $sx (0 .. $#sects) { - $err = 1; - foreach $px (0 .. $#prms) { - $prm_clean = $prms[$px]; - $prm_clean =~ s/\[.*\]//; - $prm_clean =~ s/$attribute//i; - # ignore array size in a parameter string; - # however, the original param string may contain - # spaces, e.g.: addr[6 + 2] - # and this appears in @prms as "addr[6" since the - # parameter list is split at spaces; - # hence just ignore "[..." for the sections check; - $prm_clean =~ s/\[.*//; - - ##$prm_clean =~ s/^\**//; - if ($prm_clean eq $sects[$sx]) { - $err = 0; - last; - } - } - if ($err) { - if ($decl_type eq "function") { - emit_warning("${file}:$.", - "Excess function parameter " . - "'$sects[$sx]' " . - "description in '$decl_name'\n"); - } elsif (($decl_type eq "struct") or - ($decl_type eq "union")) { - emit_warning("${file}:$.", - "Excess $decl_type member " . - "'$sects[$sx]' " . - "description in '$decl_name'\n"); - } - } - } -} - -## -# Checks the section describing the return value of a function. -sub check_return_section { - my $file = shift; - my $declaration_name = shift; - my $return_type = shift; - - # Ignore an empty return type (It's a macro) - # Ignore functions with a "void" return type. (But don't ignore "void *") - if (($return_type eq "") || ($return_type =~ /void\s*\w*\s*$/)) { - return; - } - - if (!defined($sections{$section_return}) || - $sections{$section_return} eq "") - { - emit_warning("${file}:$.", - "No description found for return value of " . - "'$declaration_name'\n"); - } -} - -## -# takes a function prototype and the name of the current file being -# processed and spits out all the details stored in the global -# arrays/hashes. -sub dump_function($$) { - my $prototype = shift; - my $file = shift; - my $func_macro = 0; - - print_lineno($new_start_line); - - $prototype =~ s/^static +//; - $prototype =~ s/^extern +//; - $prototype =~ s/^asmlinkage +//; - $prototype =~ s/^inline +//; - $prototype =~ s/^__inline__ +//; - $prototype =~ s/^__inline +//; - $prototype =~ s/^__always_inline +//; - $prototype =~ s/^noinline +//; - $prototype =~ s/^__FORTIFY_INLINE +//; - $prototype =~ s/__init +//; - $prototype =~ s/__init_or_module +//; - $prototype =~ s/__deprecated +//; - $prototype =~ s/__flatten +//; - $prototype =~ s/__meminit +//; - $prototype =~ s/__must_check +//; - $prototype =~ s/__weak +//; - $prototype =~ s/__sched +//; - $prototype =~ s/_noprof//; - $prototype =~ s/__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +//; - $prototype =~ s/__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +//; - $prototype =~ s/__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +//; - $prototype =~ s/DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)/$1, $2/; - my $define = $prototype =~ s/^#\s*define\s+//; #ak added - $prototype =~ s/__attribute_const__ +//; - $prototype =~ s/__attribute__\s*\(\( - (?: - [\w\s]++ # attribute name - (?:\([^)]*+\))? # attribute arguments - \s*+,? # optional comma at the end - )+ - \)\)\s+//x; - - # Yes, this truly is vile. We are looking for: - # 1. Return type (may be nothing if we're looking at a macro) - # 2. Function name - # 3. Function parameters. - # - # All the while we have to watch out for function pointer parameters - # (which IIRC is what the two sections are for), C types (these - # regexps don't even start to express all the possibilities), and - # so on. - # - # If you mess with these regexps, it's a good idea to check that - # the following functions' documentation still comes out right: - # - parport_register_device (function pointer parameters) - # - atomic_set (macro) - # - pci_match_device, __copy_to_user (long return type) - my $name = qr{[a-zA-Z0-9_~:]+}; - my $prototype_end1 = qr{[^\(]*}; - my $prototype_end2 = qr{[^\{]*}; - my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)}; - my $type1 = qr{[\w\s]+}; - my $type2 = qr{$type1\*+}; - - if ($define && $prototype =~ m/^()($name)\s+/) { - # This is an object-like macro, it has no return type and no parameter - # list. - # Function-like macros are not allowed to have spaces between - # declaration_name and opening parenthesis (notice the \s+). - $return_type = $1; - $declaration_name = $2; - $func_macro = 1; - } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ || - $prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ || - $prototype =~ m/^($type2+)\s*($name)\s*$prototype_end/) { - $return_type = $1; - $declaration_name = $2; - my $args = $3; - - create_parameterlist($args, ',', $file, $declaration_name); - } else { - emit_warning("${file}:$.", "cannot understand function prototype: '$prototype'\n"); - return; - } - - if ($identifier ne $declaration_name) { - emit_warning("${file}:$.", "expecting prototype for $identifier(). Prototype was for $declaration_name() instead\n"); - return; - } - - my $prms = join " ", @parameterlist; - check_sections($file, $declaration_name, "function", $sectcheck, $prms); - - # This check emits a lot of warnings at the moment, because many - # functions don't have a 'Return' doc section. So until the number - # of warnings goes sufficiently down, the check is only performed in - # -Wreturn mode. - # TODO: always perform the check. - if ($Wreturn && !$func_macro) { - check_return_section($file, $declaration_name, $return_type); - } - - # The function parser can be called with a typedef parameter. - # Handle it. - if ($return_type =~ /typedef/) { - output_declaration($declaration_name, - 'function', - {'function' => $declaration_name, - 'typedef' => 1, - 'module' => $modulename, - 'functiontype' => $return_type, - 'parameterlist' => \@parameterlist, - 'parameterdescs' => \%parameterdescs, - 'parametertypes' => \%parametertypes, - 'sectionlist' => \@sectionlist, - 'sections' => \%sections, - 'purpose' => $declaration_purpose, - 'func_macro' => $func_macro - }); - } else { - output_declaration($declaration_name, - 'function', - {'function' => $declaration_name, - 'module' => $modulename, - 'functiontype' => $return_type, - 'parameterlist' => \@parameterlist, - 'parameterdescs' => \%parameterdescs, - 'parametertypes' => \%parametertypes, - 'sectionlist' => \@sectionlist, - 'sections' => \%sections, - 'purpose' => $declaration_purpose, - 'func_macro' => $func_macro - }); - } -} - -sub reset_state { - $function = ""; - %parameterdescs = (); - %parametertypes = (); - @parameterlist = (); - %sections = (); - @sectionlist = (); - $sectcheck = ""; - $struct_actual = ""; - $prototype = ""; - - $state = STATE_NORMAL; - $inline_doc_state = STATE_INLINE_NA; -} - -sub tracepoint_munge($) { - my $file = shift; - my $tracepointname = 0; - my $tracepointargs = 0; - - if ($prototype =~ m/TRACE_EVENT\((.*?),/) { - $tracepointname = $1; - } - if ($prototype =~ m/DEFINE_SINGLE_EVENT\((.*?),/) { - $tracepointname = $1; - } - if ($prototype =~ m/DEFINE_EVENT\((.*?),(.*?),/) { - $tracepointname = $2; - } - $tracepointname =~ s/^\s+//; #strip leading whitespace - if ($prototype =~ m/TP_PROTO\((.*?)\)/) { - $tracepointargs = $1; - } - if (($tracepointname eq 0) || ($tracepointargs eq 0)) { - emit_warning("${file}:$.", "Unrecognized tracepoint format: \n". - "$prototype\n"); - } else { - $prototype = "static inline void trace_$tracepointname($tracepointargs)"; - $identifier = "trace_$identifier"; - } -} - -sub syscall_munge() { - my $void = 0; - - $prototype =~ s@[\r\n]+@ @gos; # strip newlines/CR's -## if ($prototype =~ m/SYSCALL_DEFINE0\s*\(\s*(a-zA-Z0-9_)*\s*\)/) { - if ($prototype =~ m/SYSCALL_DEFINE0/) { - $void = 1; -## $prototype = "long sys_$1(void)"; - } - - $prototype =~ s/SYSCALL_DEFINE.*\(/long sys_/; # fix return type & func name - if ($prototype =~ m/long (sys_.*?),/) { - $prototype =~ s/,/\(/; - } elsif ($void) { - $prototype =~ s/\)/\(void\)/; - } - - # now delete all of the odd-number commas in $prototype - # so that arg types & arg names don't have a comma between them - my $count = 0; - my $len = length($prototype); - if ($void) { - $len = 0; # skip the for-loop - } - for (my $ix = 0; $ix < $len; $ix++) { - if (substr($prototype, $ix, 1) eq ',') { - $count++; - if ($count % 2 == 1) { - substr($prototype, $ix, 1) = ' '; - } - } - } -} - -sub process_proto_function($$) { - my $x = shift; - my $file = shift; - - $x =~ s@\/\/.*$@@gos; # strip C99-style comments to end of line - - if ($x =~ /^#/ && $x !~ /^#\s*define/) { - # do nothing - } elsif ($x =~ /([^\{]*)/) { - $prototype .= $1; - } - - if (($x =~ /\{/) || ($x =~ /\#\s*define/) || ($x =~ /;/)) { - $prototype =~ s@/\*.*?\*/@@gos; # strip comments. - $prototype =~ s@[\r\n]+@ @gos; # strip newlines/cr's. - $prototype =~ s@^\s+@@gos; # strip leading spaces - - # Handle prototypes for function pointers like: - # int (*pcs_config)(struct foo) - $prototype =~ s@^(\S+\s+)\(\s*\*(\S+)\)@$1$2@gos; - - if ($prototype =~ /SYSCALL_DEFINE/) { - syscall_munge(); - } - if ($prototype =~ /TRACE_EVENT/ || $prototype =~ /DEFINE_EVENT/ || - $prototype =~ /DEFINE_SINGLE_EVENT/) - { - tracepoint_munge($file); - } - dump_function($prototype, $file); - reset_state(); - } -} - -sub process_proto_type($$) { - my $x = shift; - my $file = shift; - - $x =~ s@[\r\n]+@ @gos; # strip newlines/cr's. - $x =~ s@^\s+@@gos; # strip leading spaces - $x =~ s@\s+$@@gos; # strip trailing spaces - $x =~ s@\/\/.*$@@gos; # strip C99-style comments to end of line - - if ($x =~ /^#/) { - # To distinguish preprocessor directive from regular declaration later. - $x .= ";"; - } - - while (1) { - if ( $x =~ /([^\{\};]*)([\{\};])(.*)/ ) { - if( length $prototype ) { - $prototype .= " " - } - $prototype .= $1 . $2; - ($2 eq '{') && $brcount++; - ($2 eq '}') && $brcount--; - if (($2 eq ';') && ($brcount == 0)) { - dump_declaration($prototype, $file); - reset_state(); - last; - } - $x = $3; - } else { - $prototype .= $x; - last; - } - } -} - - -sub map_filename($) { - my $file; - my ($orig_file) = @_; - - if (defined($ENV{'SRCTREE'})) { - $file = "$ENV{'SRCTREE'}" . "/" . $orig_file; - } else { - $file = $orig_file; - } - - return $file; -} - -sub process_export_file($) { - my ($orig_file) = @_; - my $file = map_filename($orig_file); - - if (!open(IN,"<$file")) { - print STDERR "Error: Cannot open file $file\n"; - ++$errors; - return; - } - - while (<IN>) { - if (/$export_symbol/) { - next if (defined($nosymbol_table{$2})); - $function_table{$2} = 1; - } - if (/$export_symbol_ns/) { - next if (defined($nosymbol_table{$2})); - $function_table{$2} = 1; - } - } - - close(IN); -} - -# -# Parsers for the various processing states. -# -# STATE_NORMAL: looking for the /** to begin everything. -# -sub process_normal() { - if (/$doc_start/o) { - $state = STATE_NAME; # next line is always the function name - $declaration_start_line = $. + 1; - } -} - -# -# STATE_NAME: Looking for the "name - description" line -# -sub process_name($$) { - my $file = shift; - my $descr; - - if (/$doc_block/o) { - $state = STATE_DOCBLOCK; - $contents = ""; - $new_start_line = $.; - - if ( $1 eq "" ) { - $section = $section_intro; - } else { - $section = $1; - } - } elsif (/$doc_decl/o) { - $identifier = $1; - my $is_kernel_comment = 0; - my $decl_start = qr{$doc_com}; - # test for pointer declaration type, foo * bar() - desc - my $fn_type = qr{\w+\s*\*\s*}; - my $parenthesis = qr{\(\w*\)}; - my $decl_end = qr{[-:].*}; - if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) { - $identifier = $1; - } - if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) { - $decl_type = $1; - $identifier = $2; - $is_kernel_comment = 1; - } - # Look for foo() or static void foo() - description; or misspelt - # identifier - elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || - /^$decl_start$fn_type?(\w+[^-:]*)$parenthesis?\s*$decl_end$/) { - $identifier = $1; - $decl_type = 'function'; - $identifier =~ s/^define\s+//; - $is_kernel_comment = 1; - } - $identifier =~ s/\s+$//; - - $state = STATE_BODY; - # if there's no @param blocks need to set up default section - # here - $contents = ""; - $section = $section_default; - $new_start_line = $. + 1; - if (/[-:](.*)/) { - # strip leading/trailing/multiple spaces - $descr= $1; - $descr =~ s/^\s*//; - $descr =~ s/\s*$//; - $descr =~ s/\s+/ /g; - $declaration_purpose = $descr; - $state = STATE_BODY_MAYBE; - } else { - $declaration_purpose = ""; - } - - if (!$is_kernel_comment) { - emit_warning("${file}:$.", "This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n$_"); - $state = STATE_NORMAL; - } - - if (($declaration_purpose eq "") && $Wshort_desc) { - emit_warning("${file}:$.", "missing initial short description on line:\n$_"); - } - - if ($identifier eq "" && $decl_type ne "enum") { - emit_warning("${file}:$.", "wrong kernel-doc identifier on line:\n$_"); - $state = STATE_NORMAL; - } - - if ($verbose) { - print STDERR "${file}:$.: info: Scanning doc for $decl_type $identifier\n"; - } - } else { - emit_warning("${file}:$.", "Cannot understand $_ on line $. - I thought it was a doc line\n"); - $state = STATE_NORMAL; - } -} - - -# -# STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment. -# -sub process_body($$) { - my $file = shift; - - if ($state == STATE_BODY_WITH_BLANK_LINE && /^\s*\*\s?\S/) { - dump_section($file, $section, $contents); - $section = $section_default; - $new_start_line = $.; - $contents = ""; - } - - if (/$doc_sect/i) { # case insensitive for supported section names - $newsection = $1; - $newcontents = $2; - - # map the supported section names to the canonical names - if ($newsection =~ m/^description$/i) { - $newsection = $section_default; - } elsif ($newsection =~ m/^context$/i) { - $newsection = $section_context; - } elsif ($newsection =~ m/^returns?$/i) { - $newsection = $section_return; - } elsif ($newsection =~ m/^\@return$/) { - # special: @return is a section, not a param description - $newsection = $section_return; - } - - if (($contents ne "") && ($contents ne "\n")) { - dump_section($file, $section, $contents); - $section = $section_default; - } - - $state = STATE_BODY; - $contents = $newcontents; - $new_start_line = $.; - while (substr($contents, 0, 1) eq " ") { - $contents = substr($contents, 1); - } - if ($contents ne "") { - $contents .= "\n"; - } - $section = $newsection; - $leading_space = undef; - } elsif (/$doc_end/) { - if (($contents ne "") && ($contents ne "\n")) { - dump_section($file, $section, $contents); - $section = $section_default; - $contents = ""; - } - # look for doc_com + <text> + doc_end: - if ($_ =~ m'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') { - emit_warning("${file}:$.", "suspicious ending line: $_"); - } - - $prototype = ""; - $state = STATE_PROTO; - $brcount = 0; - $new_start_line = $. + 1; - } elsif (/$doc_content/) { - if ($1 eq "") { - if ($section eq $section_context) { - dump_section($file, $section, $contents); - $section = $section_default; - $contents = ""; - $new_start_line = $.; - $state = STATE_BODY; - } else { - if ($section ne $section_default) { - $state = STATE_BODY_WITH_BLANK_LINE; - } else { - $state = STATE_BODY; - } - $contents .= "\n"; - } - } elsif ($state == STATE_BODY_MAYBE) { - # Continued declaration purpose - chomp($declaration_purpose); - $declaration_purpose .= " " . $1; - $declaration_purpose =~ s/\s+/ /g; - } else { - my $cont = $1; - if ($section =~ m/^@/ || $section eq $section_context) { - if (!defined $leading_space) { - if ($cont =~ m/^(\s+)/) { - $leading_space = $1; - } else { - $leading_space = ""; - } - } - $cont =~ s/^$leading_space//; - } - $contents .= $cont . "\n"; - } - } else { - # i dont know - bad line? ignore. - emit_warning("${file}:$.", "bad line: $_"); - } -} - - -# -# STATE_PROTO: reading a function/whatever prototype. -# -sub process_proto($$) { - my $file = shift; - - if (/$doc_inline_oneline/) { - $section = $1; - $contents = $2; - if ($contents ne "") { - $contents .= "\n"; - dump_section($file, $section, $contents); - $section = $section_default; - $contents = ""; - } - } elsif (/$doc_inline_start/) { - $state = STATE_INLINE; - $inline_doc_state = STATE_INLINE_NAME; - } elsif ($decl_type eq 'function') { - process_proto_function($_, $file); - } else { - process_proto_type($_, $file); - } -} - -# -# STATE_DOCBLOCK: within a DOC: block. -# -sub process_docblock($$) { - my $file = shift; - - if (/$doc_end/) { - dump_doc_section($file, $section, $contents); - $section = $section_default; - $contents = ""; - $function = ""; - %parameterdescs = (); - %parametertypes = (); - @parameterlist = (); - %sections = (); - @sectionlist = (); - $prototype = ""; - $state = STATE_NORMAL; - } elsif (/$doc_content/) { - if ( $1 eq "" ) { - $contents .= $blankline; - } else { - $contents .= $1 . "\n"; - } - } -} - -# -# STATE_INLINE: docbook comments within a prototype. -# -sub process_inline($$) { - my $file = shift; - - # First line (state 1) needs to be a @parameter - if ($inline_doc_state == STATE_INLINE_NAME && /$doc_inline_sect/o) { - $section = $1; - $contents = $2; - $new_start_line = $.; - if ($contents ne "") { - while (substr($contents, 0, 1) eq " ") { - $contents = substr($contents, 1); - } - $contents .= "\n"; - } - $inline_doc_state = STATE_INLINE_TEXT; - # Documentation block end */ - } elsif (/$doc_inline_end/) { - if (($contents ne "") && ($contents ne "\n")) { - dump_section($file, $section, $contents); - $section = $section_default; - $contents = ""; - } - $state = STATE_PROTO; - $inline_doc_state = STATE_INLINE_NA; - # Regular text - } elsif (/$doc_content/) { - if ($inline_doc_state == STATE_INLINE_TEXT) { - $contents .= $1 . "\n"; - # nuke leading blank lines - if ($contents =~ /^\s*$/) { - $contents = ""; - } - } elsif ($inline_doc_state == STATE_INLINE_NAME) { - $inline_doc_state = STATE_INLINE_ERROR; - emit_warning("${file}:$.", "Incorrect use of kernel-doc format: $_"); - } - } -} - - -sub process_file($) { - my $file; - my ($orig_file) = @_; - - $file = map_filename($orig_file); - - if (!open(IN_FILE,"<$file")) { - print STDERR "Error: Cannot open file $file\n"; - ++$errors; - return; - } - - $. = 1; - - $section_counter = 0; - while (<IN_FILE>) { - while (!/^ \*/ && s/\\\s*$//) { - $_ .= <IN_FILE>; - } - # Replace tabs by spaces - while ($_ =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e) {}; - # Hand this line to the appropriate state handler - if ($state == STATE_NORMAL) { - process_normal(); - } elsif ($state == STATE_NAME) { - process_name($file, $_); - } elsif ($state == STATE_BODY || $state == STATE_BODY_MAYBE || - $state == STATE_BODY_WITH_BLANK_LINE) { - process_body($file, $_); - } elsif ($state == STATE_INLINE) { # scanning for inline parameters - process_inline($file, $_); - } elsif ($state == STATE_PROTO) { - process_proto($file, $_); - } elsif ($state == STATE_DOCBLOCK) { - process_docblock($file, $_); - } - } - - # Make sure we got something interesting. - if (!$section_counter && $output_mode ne "none") { - if ($output_selection == OUTPUT_INCLUDE) { - emit_warning("${file}:1", "'$_' not found\n") - for keys %function_table; - } else { - emit_warning("${file}:1", "no structured comments found\n"); - } - } - close IN_FILE; -} - -$kernelversion = get_kernel_version(); - -# generate a sequence of code that will splice in highlighting information -# using the s// operator. -for (my $k = 0; $k < @highlights; $k++) { - my $pattern = $highlights[$k][0]; - my $result = $highlights[$k][1]; -# print STDERR "scanning pattern:$pattern, highlight:($result)\n"; - $dohighlight .= "\$contents =~ s:$pattern:$result:gs;\n"; -} - -if ($output_selection == OUTPUT_EXPORTED || - $output_selection == OUTPUT_INTERNAL) { - - push(@export_file_list, @ARGV); - - foreach (@export_file_list) { - chomp; - process_export_file($_); - } -} - -foreach (@ARGV) { - chomp; - process_file($_); -} -if ($verbose && $errors) { - print STDERR "$errors errors\n"; -} -if ($verbose && $warnings) { - print STDERR "$warnings warnings\n"; -} - -if ($Werror && $warnings) { - print STDERR "$warnings warnings as Errors\n"; - exit($warnings); -} else { - exit($output_mode eq "none" ? 0 : $errors) -} - -__END__ - -=head1 OPTIONS - -=head2 Output format selection (mutually exclusive): - -=over 8 - -=item -man - -Output troff manual page format. - -=item -rst - -Output reStructuredText format. This is the default. - -=item -none - -Do not output documentation, only warnings. - -=back - -=head2 Output format modifiers - -=head3 reStructuredText only - -=head2 Output selection (mutually exclusive): - -=over 8 - -=item -export - -Only output documentation for the symbols that have been exported using -EXPORT_SYMBOL() and related macros in any input FILE or -export-file FILE. - -=item -internal - -Only output documentation for the symbols that have NOT been exported using -EXPORT_SYMBOL() and related macros in any input FILE or -export-file FILE. - -=item -function NAME - -Only output documentation for the given function or DOC: section title. -All other functions and DOC: sections are ignored. - -May be specified multiple times. - -=item -nosymbol NAME - -Exclude the specified symbol from the output documentation. - -May be specified multiple times. - -=back - -=head2 Output selection modifiers: - -=over 8 - -=item -no-doc-sections - -Do not output DOC: sections. - -=item -export-file FILE - -Specify an additional FILE in which to look for EXPORT_SYMBOL information. - -To be used with -export or -internal. - -May be specified multiple times. - -=back - -=head3 reStructuredText only - -=over 8 - -=item -enable-lineno - -Enable output of .. LINENO lines. - -=back - -=head2 Other parameters: - -=over 8 - -=item -h, -help - -Print this help. - -=item -v - -Verbose output, more warnings and other information. - -=item -Werror - -Treat warnings as errors. - -=back - -=cut +kernel-doc.py
\ No newline at end of file diff --git a/scripts/kernel-doc.pl b/scripts/kernel-doc.pl new file mode 100755 index 000000000000..5db23cbf4eb2 --- /dev/null +++ b/scripts/kernel-doc.pl @@ -0,0 +1,2439 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 +# vim: softtabstop=4 + +use warnings; +use strict; + +## Copyright (c) 1998 Michael Zucchi, All Rights Reserved ## +## Copyright (C) 2000, 1 Tim Waugh <twaugh@redhat.com> ## +## Copyright (C) 2001 Simon Huggins ## +## Copyright (C) 2005-2012 Randy Dunlap ## +## Copyright (C) 2012 Dan Luedtke ## +## ## +## #define enhancements by Armin Kuster <akuster@mvista.com> ## +## Copyright (c) 2000 MontaVista Software, Inc. ## +# +# Copyright (C) 2022 Tomasz Warniełło (POD) + +use Pod::Usage qw/pod2usage/; + +=head1 NAME + +kernel-doc - Print formatted kernel documentation to stdout + +=head1 SYNOPSIS + + kernel-doc [-h] [-v] [-Werror] [-Wall] [-Wreturn] [-Wshort-desc[ription]] [-Wcontents-before-sections] + [ -man | + -rst [-enable-lineno] | + -none + ] + [ + -export | + -internal | + [-function NAME] ... | + [-nosymbol NAME] ... + ] + [-no-doc-sections] + [-export-file FILE] ... + FILE ... + +Run `kernel-doc -h` for details. + +=head1 DESCRIPTION + +Read C language source or header FILEs, extract embedded documentation comments, +and print formatted documentation to standard output. + +The documentation comments are identified by the "/**" opening comment mark. + +See Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax. + +=cut + +# more perldoc at the end of the file + +## init lots of data + +my $errors = 0; +my $warnings = 0; +my $anon_struct_union = 0; + +# match expressions used to find embedded type information +my $type_constant = '\b``([^\`]+)``\b'; +my $type_constant2 = '\%([-_*\w]+)'; +my $type_func = '(\w+)\(\)'; +my $type_param = '\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)'; +my $type_param_ref = '([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)'; +my $type_fp_param = '\@(\w+)\(\)'; # Special RST handling for func ptr params +my $type_fp_param2 = '\@(\w+->\S+)\(\)'; # Special RST handling for structs with func ptr params +my $type_env = '(\$\w+)'; +my $type_enum = '\&(enum\s*([_\w]+))'; +my $type_struct = '\&(struct\s*([_\w]+))'; +my $type_typedef = '\&(typedef\s*([_\w]+))'; +my $type_union = '\&(union\s*([_\w]+))'; +my $type_member = '\&([_\w]+)(\.|->)([_\w]+)'; +my $type_fallback = '\&([_\w]+)'; +my $type_member_func = $type_member . '\(\)'; + +# Output conversion substitutions. +# One for each output format + +# these are pretty rough +my @highlights_man = ( + [$type_constant, "\$1"], + [$type_constant2, "\$1"], + [$type_func, "\\\\fB\$1\\\\fP"], + [$type_enum, "\\\\fI\$1\\\\fP"], + [$type_struct, "\\\\fI\$1\\\\fP"], + [$type_typedef, "\\\\fI\$1\\\\fP"], + [$type_union, "\\\\fI\$1\\\\fP"], + [$type_param, "\\\\fI\$1\\\\fP"], + [$type_param_ref, "\\\\fI\$1\$2\\\\fP"], + [$type_member, "\\\\fI\$1\$2\$3\\\\fP"], + [$type_fallback, "\\\\fI\$1\\\\fP"] + ); +my $blankline_man = ""; + +# rst-mode +my @highlights_rst = ( + [$type_constant, "``\$1``"], + [$type_constant2, "``\$1``"], + + # Note: need to escape () to avoid func matching later + [$type_member_func, "\\:c\\:type\\:`\$1\$2\$3\\\\(\\\\) <\$1>`"], + [$type_member, "\\:c\\:type\\:`\$1\$2\$3 <\$1>`"], + [$type_fp_param, "**\$1\\\\(\\\\)**"], + [$type_fp_param2, "**\$1\\\\(\\\\)**"], + [$type_func, "\$1()"], + [$type_enum, "\\:c\\:type\\:`\$1 <\$2>`"], + [$type_struct, "\\:c\\:type\\:`\$1 <\$2>`"], + [$type_typedef, "\\:c\\:type\\:`\$1 <\$2>`"], + [$type_union, "\\:c\\:type\\:`\$1 <\$2>`"], + + # in rst this can refer to any type + [$type_fallback, "\\:c\\:type\\:`\$1`"], + [$type_param_ref, "**\$1\$2**"] + ); +my $blankline_rst = "\n"; + +# read arguments +if ($#ARGV == -1) { + pod2usage( + -message => "No arguments!\n", + -exitval => 1, + -verbose => 99, + -sections => 'SYNOPSIS', + -output => \*STDERR, + ); +} + +my $kernelversion; + +my $dohighlight = ""; + +my $verbose = 0; +my $Werror = 0; +my $Wreturn = 0; +my $Wshort_desc = 0; +my $output_mode = "rst"; +my $output_preformatted = 0; +my $no_doc_sections = 0; +my $enable_lineno = 0; +my @highlights = @highlights_rst; +my $blankline = $blankline_rst; +my $modulename = "Kernel API"; + +use constant { + OUTPUT_ALL => 0, # output all symbols and doc sections + OUTPUT_INCLUDE => 1, # output only specified symbols + OUTPUT_EXPORTED => 2, # output exported symbols + OUTPUT_INTERNAL => 3, # output non-exported symbols +}; +my $output_selection = OUTPUT_ALL; +my $show_not_found = 0; # No longer used + +my @export_file_list; + +my @build_time; +if (defined($ENV{'KBUILD_BUILD_TIMESTAMP'}) && + (my $seconds = `date -d "${ENV{'KBUILD_BUILD_TIMESTAMP'}}" +%s`) ne '') { + @build_time = gmtime($seconds); +} else { + @build_time = localtime; +} + +my $man_date = ('January', 'February', 'March', 'April', 'May', 'June', + 'July', 'August', 'September', 'October', + 'November', 'December')[$build_time[4]] . + " " . ($build_time[5]+1900); + +# Essentially these are globals. +# They probably want to be tidied up, made more localised or something. +# CAVEAT EMPTOR! Some of the others I localised may not want to be, which +# could cause "use of undefined value" or other bugs. +my ($function, %function_table, %parametertypes, $declaration_purpose); +my %nosymbol_table = (); +my $declaration_start_line; +my ($type, $declaration_name, $return_type); +my ($newsection, $newcontents, $prototype, $brcount); + +if (defined($ENV{'KBUILD_VERBOSE'}) && $ENV{'KBUILD_VERBOSE'} =~ '1') { + $verbose = 1; +} + +if (defined($ENV{'KCFLAGS'})) { + my $kcflags = "$ENV{'KCFLAGS'}"; + + if ($kcflags =~ /(\s|^)-Werror(\s|$)/) { + $Werror = 1; + } +} + +# reading this variable is for backwards compat just in case +# someone was calling it with the variable from outside the +# kernel's build system +if (defined($ENV{'KDOC_WERROR'})) { + $Werror = "$ENV{'KDOC_WERROR'}"; +} +# other environment variables are converted to command-line +# arguments in cmd_checkdoc in the build system + +# Generated docbook code is inserted in a template at a point where +# docbook v3.1 requires a non-zero sequence of RefEntry's; see: +# https://www.oasis-open.org/docbook/documentation/reference/html/refentry.html +# We keep track of number of generated entries and generate a dummy +# if needs be to ensure the expanded template can be postprocessed +# into html. +my $section_counter = 0; + +my $lineprefix=""; + +# Parser states +use constant { + STATE_NORMAL => 0, # normal code + STATE_NAME => 1, # looking for function name + STATE_BODY_MAYBE => 2, # body - or maybe more description + STATE_BODY => 3, # the body of the comment + STATE_BODY_WITH_BLANK_LINE => 4, # the body, which has a blank line + STATE_PROTO => 5, # scanning prototype + STATE_DOCBLOCK => 6, # documentation block + STATE_INLINE => 7, # gathering doc outside main block +}; +my $state; +my $leading_space; + +# Inline documentation state +use constant { + STATE_INLINE_NA => 0, # not applicable ($state != STATE_INLINE) + STATE_INLINE_NAME => 1, # looking for member name (@foo:) + STATE_INLINE_TEXT => 2, # looking for member documentation + STATE_INLINE_END => 3, # done + STATE_INLINE_ERROR => 4, # error - Comment without header was found. + # Spit a warning as it's not + # proper kernel-doc and ignore the rest. +}; +my $inline_doc_state; + +#declaration types: can be +# 'function', 'struct', 'union', 'enum', 'typedef' +my $decl_type; + +# Name of the kernel-doc identifier for non-DOC markups +my $identifier; + +my $doc_start = '^/\*\*\s*$'; # Allow whitespace at end of comment start. +my $doc_end = '\*/'; +my $doc_com = '\s*\*\s*'; +my $doc_com_body = '\s*\* ?'; +my $doc_decl = $doc_com . '(\w+)'; +# @params and a strictly limited set of supported section names +# Specifically: +# Match @word: +# @...: +# @{section-name}: +# while trying to not match literal block starts like "example::" +# +my $doc_sect = $doc_com . + '\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$'; +my $doc_content = $doc_com_body . '(.*)'; +my $doc_block = $doc_com . 'DOC:\s*(.*)?'; +my $doc_inline_start = '^\s*/\*\*\s*$'; +my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; +my $doc_inline_end = '^\s*\*/\s*$'; +my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; +my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; +my $export_symbol_ns = '^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*;'; +my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; +my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; + +my %parameterdescs; +my %parameterdesc_start_lines; +my @parameterlist; +my %sections; +my @sectionlist; +my %section_start_lines; +my $sectcheck; +my $struct_actual; + +my $contents = ""; +my $new_start_line = 0; + +# the canonical section names. see also $doc_sect above. +my $section_default = "Description"; # default section +my $section_intro = "Introduction"; +my $section = $section_default; +my $section_context = "Context"; +my $section_return = "Return"; + +my $undescribed = "-- undescribed --"; + +reset_state(); + +while ($ARGV[0] =~ m/^--?(.*)/) { + my $cmd = $1; + shift @ARGV; + if ($cmd eq "man") { + $output_mode = "man"; + @highlights = @highlights_man; + $blankline = $blankline_man; + } elsif ($cmd eq "rst") { + $output_mode = "rst"; + @highlights = @highlights_rst; + $blankline = $blankline_rst; + } elsif ($cmd eq "none") { + $output_mode = "none"; + } elsif ($cmd eq "module") { # not needed for XML, inherits from calling document + $modulename = shift @ARGV; + } elsif ($cmd eq "function") { # to only output specific functions + $output_selection = OUTPUT_INCLUDE; + $function = shift @ARGV; + $function_table{$function} = 1; + } elsif ($cmd eq "nosymbol") { # Exclude specific symbols + my $symbol = shift @ARGV; + $nosymbol_table{$symbol} = 1; + } elsif ($cmd eq "export") { # only exported symbols + $output_selection = OUTPUT_EXPORTED; + %function_table = (); + } elsif ($cmd eq "internal") { # only non-exported symbols + $output_selection = OUTPUT_INTERNAL; + %function_table = (); + } elsif ($cmd eq "export-file") { + my $file = shift @ARGV; + push(@export_file_list, $file); + } elsif ($cmd eq "v") { + $verbose = 1; + } elsif ($cmd eq "Werror") { + $Werror = 1; + } elsif ($cmd eq "Wreturn") { + $Wreturn = 1; + } elsif ($cmd eq "Wshort-desc" or $cmd eq "Wshort-description") { + $Wshort_desc = 1; + } elsif ($cmd eq "Wall") { + $Wreturn = 1; + $Wshort_desc = 1; + } elsif (($cmd eq "h") || ($cmd eq "help")) { + pod2usage(-exitval => 0, -verbose => 2); + } elsif ($cmd eq 'no-doc-sections') { + $no_doc_sections = 1; + } elsif ($cmd eq 'enable-lineno') { + $enable_lineno = 1; + } elsif ($cmd eq 'show-not-found') { + $show_not_found = 1; # A no-op but don't fail + } else { + # Unknown argument + pod2usage( + -message => "Argument unknown!\n", + -exitval => 1, + -verbose => 99, + -sections => 'SYNOPSIS', + -output => \*STDERR, + ); + } + if ($#ARGV < 0){ + pod2usage( + -message => "FILE argument missing\n", + -exitval => 1, + -verbose => 99, + -sections => 'SYNOPSIS', + -output => \*STDERR, + ); + } +} + +# continue execution near EOF; + +sub findprog($) +{ + foreach(split(/:/, $ENV{PATH})) { + return "$_/$_[0]" if(-x "$_/$_[0]"); + } +} + +# get kernel version from env +sub get_kernel_version() { + my $version = 'unknown kernel version'; + + if (defined($ENV{'KERNELVERSION'})) { + $version = $ENV{'KERNELVERSION'}; + } + return $version; +} + +# +sub print_lineno { + my $lineno = shift; + if ($enable_lineno && defined($lineno)) { + print ".. LINENO " . $lineno . "\n"; + } +} + +sub emit_warning { + my $location = shift; + my $msg = shift; + print STDERR "$location: warning: $msg"; + ++$warnings; +} +## +# dumps section contents to arrays/hashes intended for that purpose. +# +sub dump_section { + my $file = shift; + my $name = shift; + my $contents = join "\n", @_; + + if ($name =~ m/$type_param/) { + $name = $1; + $parameterdescs{$name} = $contents; + $sectcheck = $sectcheck . $name . " "; + $parameterdesc_start_lines{$name} = $new_start_line; + $new_start_line = 0; + } elsif ($name eq "@\.\.\.") { + $name = "..."; + $parameterdescs{$name} = $contents; + $sectcheck = $sectcheck . $name . " "; + $parameterdesc_start_lines{$name} = $new_start_line; + $new_start_line = 0; + } else { + if (defined($sections{$name}) && ($sections{$name} ne "")) { + # Only warn on user specified duplicate section names. + if ($name ne $section_default) { + emit_warning("${file}:$.", "duplicate section name '$name'\n"); + } + $sections{$name} .= $contents; + } else { + $sections{$name} = $contents; + push @sectionlist, $name; + $section_start_lines{$name} = $new_start_line; + $new_start_line = 0; + } + } +} + +## +# dump DOC: section after checking that it should go out +# +sub dump_doc_section { + my $file = shift; + my $name = shift; + my $contents = join "\n", @_; + + if ($no_doc_sections) { + return; + } + + return if (defined($nosymbol_table{$name})); + + if (($output_selection == OUTPUT_ALL) || + (($output_selection == OUTPUT_INCLUDE) && + defined($function_table{$name}))) + { + dump_section($file, $name, $contents); + output_blockhead({'sectionlist' => \@sectionlist, + 'sections' => \%sections, + 'module' => $modulename, + 'content-only' => ($output_selection != OUTPUT_ALL), }); + } +} + +## +# output function +# +# parameterdescs, a hash. +# function => "function name" +# parameterlist => @list of parameters +# parameterdescs => %parameter descriptions +# sectionlist => @list of sections +# sections => %section descriptions +# + +sub output_highlight { + my $contents = join "\n",@_; + my $line; + +# DEBUG +# if (!defined $contents) { +# use Carp; +# confess "output_highlight got called with no args?\n"; +# } + +# print STDERR "contents b4:$contents\n"; + eval $dohighlight; + die $@ if $@; +# print STDERR "contents af:$contents\n"; + + foreach $line (split "\n", $contents) { + if (! $output_preformatted) { + $line =~ s/^\s*//; + } + if ($line eq ""){ + if (! $output_preformatted) { + print $lineprefix, $blankline; + } + } else { + if ($output_mode eq "man" && substr($line, 0, 1) eq ".") { + print "\\&$line"; + } else { + print $lineprefix, $line; + } + } + print "\n"; + } +} + +## +# output function in man +sub output_function_man(%) { + my %args = %{$_[0]}; + my ($parameter, $section); + my $count; + my $func_macro = $args{'func_macro'}; + my $paramcount = $#{$args{'parameterlist'}}; # -1 is empty + + print ".TH \"$args{'function'}\" 9 \"$args{'function'}\" \"$man_date\" \"Kernel Hacker's Manual\" LINUX\n"; + + print ".SH NAME\n"; + print $args{'function'} . " \\- " . $args{'purpose'} . "\n"; + + print ".SH SYNOPSIS\n"; + if ($args{'functiontype'} ne "") { + print ".B \"" . $args{'functiontype'} . "\" " . $args{'function'} . "\n"; + } else { + print ".B \"" . $args{'function'} . "\n"; + } + $count = 0; + my $parenth = "("; + my $post = ","; + foreach my $parameter (@{$args{'parameterlist'}}) { + if ($count == $#{$args{'parameterlist'}}) { + $post = ");"; + } + $type = $args{'parametertypes'}{$parameter}; + if ($type =~ m/$function_pointer/) { + # pointer-to-function + print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n"; + } else { + $type =~ s/([^\*])$/$1 /; + print ".BI \"" . $parenth . $type . "\" " . " \"" . $post . "\"\n"; + } + $count++; + $parenth = ""; + } + + $paramcount = $#{$args{'parameterlist'}}; # -1 is empty + if ($paramcount >= 0) { + print ".SH ARGUMENTS\n"; + } + foreach $parameter (@{$args{'parameterlist'}}) { + my $parameter_name = $parameter; + $parameter_name =~ s/\[.*//; + + print ".IP \"" . $parameter . "\" 12\n"; + output_highlight($args{'parameterdescs'}{$parameter_name}); + } + foreach $section (@{$args{'sectionlist'}}) { + print ".SH \"", uc $section, "\"\n"; + output_highlight($args{'sections'}{$section}); + } +} + +## +# output enum in man +sub output_enum_man(%) { + my %args = %{$_[0]}; + my ($parameter, $section); + my $count; + + print ".TH \"$args{'module'}\" 9 \"enum $args{'enum'}\" \"$man_date\" \"API Manual\" LINUX\n"; + + print ".SH NAME\n"; + print "enum " . $args{'enum'} . " \\- " . $args{'purpose'} . "\n"; + + print ".SH SYNOPSIS\n"; + print "enum " . $args{'enum'} . " {\n"; + $count = 0; + foreach my $parameter (@{$args{'parameterlist'}}) { + print ".br\n.BI \" $parameter\"\n"; + if ($count == $#{$args{'parameterlist'}}) { + print "\n};\n"; + last; + } else { + print ", \n.br\n"; + } + $count++; + } + + print ".SH Constants\n"; + foreach $parameter (@{$args{'parameterlist'}}) { + my $parameter_name = $parameter; + $parameter_name =~ s/\[.*//; + + print ".IP \"" . $parameter . "\" 12\n"; + output_highlight($args{'parameterdescs'}{$parameter_name}); + } + foreach $section (@{$args{'sectionlist'}}) { + print ".SH \"$section\"\n"; + output_highlight($args{'sections'}{$section}); + } +} + +## +# output struct in man +sub output_struct_man(%) { + my %args = %{$_[0]}; + my ($parameter, $section); + + print ".TH \"$args{'module'}\" 9 \"" . $args{'type'} . " " . $args{'struct'} . "\" \"$man_date\" \"API Manual\" LINUX\n"; + + print ".SH NAME\n"; + print $args{'type'} . " " . $args{'struct'} . " \\- " . $args{'purpose'} . "\n"; + + my $declaration = $args{'definition'}; + $declaration =~ s/\t/ /g; + $declaration =~ s/\n/"\n.br\n.BI \"/g; + print ".SH SYNOPSIS\n"; + print $args{'type'} . " " . $args{'struct'} . " {\n.br\n"; + print ".BI \"$declaration\n};\n.br\n\n"; + + print ".SH Members\n"; + foreach $parameter (@{$args{'parameterlist'}}) { + ($parameter =~ /^#/) && next; + + my $parameter_name = $parameter; + $parameter_name =~ s/\[.*//; + + ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next; + print ".IP \"" . $parameter . "\" 12\n"; + output_highlight($args{'parameterdescs'}{$parameter_name}); + } + foreach $section (@{$args{'sectionlist'}}) { + print ".SH \"$section\"\n"; + output_highlight($args{'sections'}{$section}); + } +} + +## +# output typedef in man +sub output_typedef_man(%) { + my %args = %{$_[0]}; + my ($parameter, $section); + + print ".TH \"$args{'module'}\" 9 \"$args{'typedef'}\" \"$man_date\" \"API Manual\" LINUX\n"; + + print ".SH NAME\n"; + print "typedef " . $args{'typedef'} . " \\- " . $args{'purpose'} . "\n"; + + foreach $section (@{$args{'sectionlist'}}) { + print ".SH \"$section\"\n"; + output_highlight($args{'sections'}{$section}); + } +} + +sub output_blockhead_man(%) { + my %args = %{$_[0]}; + my ($parameter, $section); + my $count; + + print ".TH \"$args{'module'}\" 9 \"$args{'module'}\" \"$man_date\" \"API Manual\" LINUX\n"; + + foreach $section (@{$args{'sectionlist'}}) { + print ".SH \"$section\"\n"; + output_highlight($args{'sections'}{$section}); + } +} + +## +# output in restructured text +# + +# +# This could use some work; it's used to output the DOC: sections, and +# starts by putting out the name of the doc section itself, but that tends +# to duplicate a header already in the template file. +# +sub output_blockhead_rst(%) { + my %args = %{$_[0]}; + my ($parameter, $section); + + foreach $section (@{$args{'sectionlist'}}) { + next if (defined($nosymbol_table{$section})); + + if ($output_selection != OUTPUT_INCLUDE) { + print ".. _$section:\n\n"; + print "**$section**\n\n"; + } + print_lineno($section_start_lines{$section}); + output_highlight_rst($args{'sections'}{$section}); + print "\n"; + } +} + +# +# Apply the RST highlights to a sub-block of text. +# +sub highlight_block($) { + # The dohighlight kludge requires the text be called $contents + my $contents = shift; + eval $dohighlight; + die $@ if $@; + return $contents; +} + +# +# Regexes used only here. +# +my $sphinx_literal = '^[^.].*::$'; +my $sphinx_cblock = '^\.\.\ +code-block::'; + +sub output_highlight_rst { + my $input = join "\n",@_; + my $output = ""; + my $line; + my $in_literal = 0; + my $litprefix; + my $block = ""; + + foreach $line (split "\n",$input) { + # + # If we're in a literal block, see if we should drop out + # of it. Otherwise pass the line straight through unmunged. + # + if ($in_literal) { + if (! ($line =~ /^\s*$/)) { + # + # If this is the first non-blank line in a literal + # block we need to figure out what the proper indent is. + # + if ($litprefix eq "") { + $line =~ /^(\s*)/; + $litprefix = '^' . $1; + $output .= $line . "\n"; + } elsif (! ($line =~ /$litprefix/)) { + $in_literal = 0; + } else { + $output .= $line . "\n"; + } + } else { + $output .= $line . "\n"; + } + } + # + # Not in a literal block (or just dropped out) + # + if (! $in_literal) { + $block .= $line . "\n"; + if (($line =~ /$sphinx_literal/) || ($line =~ /$sphinx_cblock/)) { + $in_literal = 1; + $litprefix = ""; + $output .= highlight_block($block); + $block = "" + } + } + } + + if ($block) { + $output .= highlight_block($block); + } + + $output =~ s/^\n+//g; + $output =~ s/\n+$//g; + + foreach $line (split "\n", $output) { + print $lineprefix . $line . "\n"; + } +} + +sub output_function_rst(%) { + my %args = %{$_[0]}; + my ($parameter, $section); + my $oldprefix = $lineprefix; + + my $signature = ""; + my $func_macro = $args{'func_macro'}; + my $paramcount = $#{$args{'parameterlist'}}; # -1 is empty + + if ($func_macro) { + $signature = $args{'function'}; + } else { + if ($args{'functiontype'}) { + $signature = $args{'functiontype'} . " "; + } + $signature .= $args{'function'} . " ("; + } + + my $count = 0; + foreach my $parameter (@{$args{'parameterlist'}}) { + if ($count ne 0) { + $signature .= ", "; + } + $count++; + $type = $args{'parametertypes'}{$parameter}; + + if ($type =~ m/$function_pointer/) { + # pointer-to-function + $signature .= $1 . $parameter . ") (" . $2 . ")"; + } else { + $signature .= $type; + } + } + + if (!$func_macro) { + $signature .= ")"; + } + + if ($args{'typedef'} || $args{'functiontype'} eq "") { + print ".. c:macro:: ". $args{'function'} . "\n\n"; + + if ($args{'typedef'}) { + print_lineno($declaration_start_line); + print " **Typedef**: "; + $lineprefix = ""; + output_highlight_rst($args{'purpose'}); + print "\n\n**Syntax**\n\n"; + print " ``$signature``\n\n"; + } else { + print "``$signature``\n\n"; + } + } else { + print ".. c:function:: $signature\n\n"; + } + + if (!$args{'typedef'}) { + print_lineno($declaration_start_line); + $lineprefix = " "; + output_highlight_rst($args{'purpose'}); + print "\n"; + } + + # + # Put our descriptive text into a container (thus an HTML <div>) to help + # set the function prototypes apart. + # + $lineprefix = " "; + if ($paramcount >= 0) { + print ".. container:: kernelindent\n\n"; + print $lineprefix . "**Parameters**\n\n"; + } + foreach $parameter (@{$args{'parameterlist'}}) { + my $parameter_name = $parameter; + $parameter_name =~ s/\[.*//; + $type = $args{'parametertypes'}{$parameter}; + + if ($type ne "") { + print $lineprefix . "``$type``\n"; + } else { + print $lineprefix . "``$parameter``\n"; + } + + print_lineno($parameterdesc_start_lines{$parameter_name}); + + $lineprefix = " "; + if (defined($args{'parameterdescs'}{$parameter_name}) && + $args{'parameterdescs'}{$parameter_name} ne $undescribed) { + output_highlight_rst($args{'parameterdescs'}{$parameter_name}); + } else { + print $lineprefix . "*undescribed*\n"; + } + $lineprefix = " "; + print "\n"; + } + + output_section_rst(@_); + $lineprefix = $oldprefix; +} + +sub output_section_rst(%) { + my %args = %{$_[0]}; + my $section; + my $oldprefix = $lineprefix; + + foreach $section (@{$args{'sectionlist'}}) { + print $lineprefix . "**$section**\n\n"; + print_lineno($section_start_lines{$section}); + output_highlight_rst($args{'sections'}{$section}); + print "\n"; + } + print "\n"; +} + +sub output_enum_rst(%) { + my %args = %{$_[0]}; + my ($parameter); + my $oldprefix = $lineprefix; + my $count; + my $outer; + + my $name = $args{'enum'}; + print "\n\n.. c:enum:: " . $name . "\n\n"; + + print_lineno($declaration_start_line); + $lineprefix = " "; + output_highlight_rst($args{'purpose'}); + print "\n"; + + print ".. container:: kernelindent\n\n"; + $outer = $lineprefix . " "; + $lineprefix = $outer . " "; + print $outer . "**Constants**\n\n"; + foreach $parameter (@{$args{'parameterlist'}}) { + print $outer . "``$parameter``\n"; + + if ($args{'parameterdescs'}{$parameter} ne $undescribed) { + output_highlight_rst($args{'parameterdescs'}{$parameter}); + } else { + print $lineprefix . "*undescribed*\n"; + } + print "\n"; + } + print "\n"; + $lineprefix = $oldprefix; + output_section_rst(@_); +} + +sub output_typedef_rst(%) { + my %args = %{$_[0]}; + my ($parameter); + my $oldprefix = $lineprefix; + my $name; + + $name = $args{'typedef'}; + + print "\n\n.. c:type:: " . $name . "\n\n"; + print_lineno($declaration_start_line); + $lineprefix = " "; + output_highlight_rst($args{'purpose'}); + print "\n"; + + $lineprefix = $oldprefix; + output_section_rst(@_); +} + +sub output_struct_rst(%) { + my %args = %{$_[0]}; + my ($parameter); + my $oldprefix = $lineprefix; + + my $name = $args{'struct'}; + if ($args{'type'} eq 'union') { + print "\n\n.. c:union:: " . $name . "\n\n"; + } else { + print "\n\n.. c:struct:: " . $name . "\n\n"; + } + + print_lineno($declaration_start_line); + $lineprefix = " "; + output_highlight_rst($args{'purpose'}); + print "\n"; + + print ".. container:: kernelindent\n\n"; + print $lineprefix . "**Definition**::\n\n"; + my $declaration = $args{'definition'}; + $lineprefix = $lineprefix . " "; + $declaration =~ s/\t/$lineprefix/g; + print $lineprefix . $args{'type'} . " " . $args{'struct'} . " {\n$declaration" . $lineprefix . "};\n\n"; + + $lineprefix = " "; + print $lineprefix . "**Members**\n\n"; + foreach $parameter (@{$args{'parameterlist'}}) { + ($parameter =~ /^#/) && next; + + my $parameter_name = $parameter; + $parameter_name =~ s/\[.*//; + + ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next; + $type = $args{'parametertypes'}{$parameter}; + print_lineno($parameterdesc_start_lines{$parameter_name}); + print $lineprefix . "``" . $parameter . "``\n"; + $lineprefix = " "; + output_highlight_rst($args{'parameterdescs'}{$parameter_name}); + $lineprefix = " "; + print "\n"; + } + print "\n"; + + $lineprefix = $oldprefix; + output_section_rst(@_); +} + +## none mode output functions + +sub output_function_none(%) { +} + +sub output_enum_none(%) { +} + +sub output_typedef_none(%) { +} + +sub output_struct_none(%) { +} + +sub output_blockhead_none(%) { +} + +## +# generic output function for all types (function, struct/union, typedef, enum); +# calls the generated, variable output_ function name based on +# functype and output_mode +sub output_declaration { + no strict 'refs'; + my $name = shift; + my $functype = shift; + my $func = "output_${functype}_$output_mode"; + + return if (defined($nosymbol_table{$name})); + + if (($output_selection == OUTPUT_ALL) || + (($output_selection == OUTPUT_INCLUDE || + $output_selection == OUTPUT_EXPORTED) && + defined($function_table{$name})) || + ($output_selection == OUTPUT_INTERNAL && + !($functype eq "function" && defined($function_table{$name})))) + { + &$func(@_); + $section_counter++; + } +} + +## +# generic output function - calls the right one based on current output mode. +sub output_blockhead { + no strict 'refs'; + my $func = "output_blockhead_" . $output_mode; + &$func(@_); + $section_counter++; +} + +## +# takes a declaration (struct, union, enum, typedef) and +# invokes the right handler. NOT called for functions. +sub dump_declaration($$) { + no strict 'refs'; + my ($prototype, $file) = @_; + my $func = "dump_" . $decl_type; + &$func(@_); +} + +sub dump_union($$) { + dump_struct(@_); +} + +sub dump_struct($$) { + my $x = shift; + my $file = shift; + my $decl_type; + my $members; + my $type = qr{struct|union}; + # For capturing struct/union definition body, i.e. "{members*}qualifiers*" + my $qualifiers = qr{$attribute|__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned}; + my $definition_body = qr{\{(.*)\}\s*$qualifiers*}; + my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;}; + + if ($x =~ /($type)\s+(\w+)\s*$definition_body/) { + $decl_type = $1; + $declaration_name = $2; + $members = $3; + } elsif ($x =~ /typedef\s+($type)\s*$definition_body\s*(\w+)\s*;/) { + $decl_type = $1; + $declaration_name = $3; + $members = $2; + } + + if ($members) { + if ($identifier ne $declaration_name) { + emit_warning("${file}:$.", "expecting prototype for $decl_type $identifier. Prototype was for $decl_type $declaration_name instead\n"); + return; + } + + # ignore members marked private: + $members =~ s/\/\*\s*private:.*?\/\*\s*public:.*?\*\///gosi; + $members =~ s/\/\*\s*private:.*//gosi; + # strip comments: + $members =~ s/\/\*.*?\*\///gos; + # strip attributes + $members =~ s/\s*$attribute/ /gi; + $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; + $members =~ s/\s*__counted_by\s*\([^;]*\)/ /gos; + $members =~ s/\s*__counted_by_(le|be)\s*\([^;]*\)/ /gos; + $members =~ s/\s*__packed\s*/ /gos; + $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; + $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; + $members =~ s/\s*____cacheline_aligned/ /gos; + # unwrap struct_group(): + # - first eat non-declaration parameters and rewrite for final match + # - then remove macro, outer parens, and trailing semicolon + $members =~ s/\bstruct_group\s*\(([^,]*,)/STRUCT_GROUP(/gos; + $members =~ s/\bstruct_group_attr\s*\(([^,]*,){2}/STRUCT_GROUP(/gos; + $members =~ s/\bstruct_group_tagged\s*\(([^,]*),([^,]*),/struct $1 $2; STRUCT_GROUP(/gos; + $members =~ s/\b__struct_group\s*\(([^,]*,){3}/STRUCT_GROUP(/gos; + $members =~ s/\bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;/$2/gos; + + my $args = qr{([^,)]+)}; + # replace DECLARE_BITMAP + $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; + $members =~ s/DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, PHY_INTERFACE_MODE_MAX)/gos; + $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; + # replace DECLARE_HASHTABLE + $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos; + # replace DECLARE_KFIFO + $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos; + # replace DECLARE_KFIFO_PTR + $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos; + # replace DECLARE_FLEX_ARRAY + $members =~ s/(?:__)?DECLARE_FLEX_ARRAY\s*\($args,\s*$args\)/$1 $2\[\]/gos; + #replace DEFINE_DMA_UNMAP_ADDR + $members =~ s/DEFINE_DMA_UNMAP_ADDR\s*\($args\)/dma_addr_t $1/gos; + #replace DEFINE_DMA_UNMAP_LEN + $members =~ s/DEFINE_DMA_UNMAP_LEN\s*\($args\)/__u32 $1/gos; + my $declaration = $members; + + # Split nested struct/union elements as newer ones + while ($members =~ m/$struct_members/) { + my $newmember; + my $maintype = $1; + my $ids = $4; + my $content = $3; + foreach my $id(split /,/, $ids) { + $newmember .= "$maintype $id; "; + + $id =~ s/[:\[].*//; + $id =~ s/^\s*\**(\S+)\s*/$1/; + foreach my $arg (split /;/, $content) { + next if ($arg =~ m/^\s*$/); + if ($arg =~ m/^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)/) { + # pointer-to-function + my $type = $1; + my $name = $2; + my $extra = $3; + next if (!$name); + if ($id =~ m/^\s*$/) { + # anonymous struct/union + $newmember .= "$type$name$extra; "; + } else { + $newmember .= "$type$id.$name$extra; "; + } + } else { + my $type; + my $names; + $arg =~ s/^\s+//; + $arg =~ s/\s+$//; + # Handle bitmaps + $arg =~ s/:\s*\d+\s*//g; + # Handle arrays + $arg =~ s/\[.*\]//g; + # The type may have multiple words, + # and multiple IDs can be defined, like: + # const struct foo, *bar, foobar + # So, we remove spaces when parsing the + # names, in order to match just names + # and commas for the names + $arg =~ s/\s*,\s*/,/g; + if ($arg =~ m/(.*)\s+([\S+,]+)/) { + $type = $1; + $names = $2; + } else { + $newmember .= "$arg; "; + next; + } + foreach my $name (split /,/, $names) { + $name =~ s/^\s*\**(\S+)\s*/$1/; + next if (($name =~ m/^\s*$/)); + if ($id =~ m/^\s*$/) { + # anonymous struct/union + $newmember .= "$type $name; "; + } else { + $newmember .= "$type $id.$name; "; + } + } + } + } + } + $members =~ s/$struct_members/$newmember/; + } + + # Ignore other nested elements, like enums + $members =~ s/(\{[^\{\}]*\})//g; + + create_parameterlist($members, ';', $file, $declaration_name); + check_sections($file, $declaration_name, $decl_type, $sectcheck, $struct_actual); + + # Adjust declaration for better display + $declaration =~ s/([\{;])/$1\n/g; + $declaration =~ s/\}\s+;/};/g; + # Better handle inlined enums + do {} while ($declaration =~ s/(enum\s+\{[^\}]+),([^\n])/$1,\n$2/); + + my @def_args = split /\n/, $declaration; + my $level = 1; + $declaration = ""; + foreach my $clause (@def_args) { + $clause =~ s/^\s+//; + $clause =~ s/\s+$//; + $clause =~ s/\s+/ /; + next if (!$clause); + $level-- if ($clause =~ m/(\})/ && $level > 1); + if (!($clause =~ m/^\s*#/)) { + $declaration .= "\t" x $level; + } + $declaration .= "\t" . $clause . "\n"; + $level++ if ($clause =~ m/(\{)/ && !($clause =~m/\}/)); + } + output_declaration($declaration_name, + 'struct', + {'struct' => $declaration_name, + 'module' => $modulename, + 'definition' => $declaration, + 'parameterlist' => \@parameterlist, + 'parameterdescs' => \%parameterdescs, + 'parametertypes' => \%parametertypes, + 'sectionlist' => \@sectionlist, + 'sections' => \%sections, + 'purpose' => $declaration_purpose, + 'type' => $decl_type + }); + } else { + print STDERR "${file}:$.: error: Cannot parse struct or union!\n"; + ++$errors; + } +} + + +sub show_warnings($$) { + my $functype = shift; + my $name = shift; + + return 0 if (defined($nosymbol_table{$name})); + + return 1 if ($output_selection == OUTPUT_ALL); + + if ($output_selection == OUTPUT_EXPORTED) { + if (defined($function_table{$name})) { + return 1; + } else { + return 0; + } + } + if ($output_selection == OUTPUT_INTERNAL) { + if (!($functype eq "function" && defined($function_table{$name}))) { + return 1; + } else { + return 0; + } + } + if ($output_selection == OUTPUT_INCLUDE) { + if (defined($function_table{$name})) { + return 1; + } else { + return 0; + } + } + die("Please add the new output type at show_warnings()"); +} + +sub dump_enum($$) { + my $x = shift; + my $file = shift; + my $members; + + # ignore members marked private: + $x =~ s/\/\*\s*private:.*?\/\*\s*public:.*?\*\///gosi; + $x =~ s/\/\*\s*private:.*}/}/gosi; + + $x =~ s@/\*.*?\*/@@gos; # strip comments. + # strip #define macros inside enums + $x =~ s@#\s*((define|ifdef|if)\s+|endif)[^;]*;@@gos; + + if ($x =~ /typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;/) { + $declaration_name = $2; + $members = $1; + } elsif ($x =~ /enum\s+(\w*)\s*\{(.*)\}/) { + $declaration_name = $1; + $members = $2; + } + + if ($members) { + if ($identifier ne $declaration_name) { + if ($identifier eq "") { + emit_warning("${file}:$.", "wrong kernel-doc identifier on line:\n"); + } else { + emit_warning("${file}:$.", "expecting prototype for enum $identifier. Prototype was for enum $declaration_name instead\n"); + } + return; + } + $declaration_name = "(anonymous)" if ($declaration_name eq ""); + + my %_members; + + $members =~ s/\s+$//; + $members =~ s/\([^;]*?[\)]//g; + + foreach my $arg (split ',', $members) { + $arg =~ s/^\s*(\w+).*/$1/; + push @parameterlist, $arg; + if (!$parameterdescs{$arg}) { + $parameterdescs{$arg} = $undescribed; + if (show_warnings("enum", $declaration_name)) { + emit_warning("${file}:$.", "Enum value '$arg' not described in enum '$declaration_name'\n"); + } + } + $_members{$arg} = 1; + } + + while (my ($k, $v) = each %parameterdescs) { + if (!exists($_members{$k})) { + if (show_warnings("enum", $declaration_name)) { + emit_warning("${file}:$.", "Excess enum value '$k' description in '$declaration_name'\n"); + } + } + } + + output_declaration($declaration_name, + 'enum', + {'enum' => $declaration_name, + 'module' => $modulename, + 'parameterlist' => \@parameterlist, + 'parameterdescs' => \%parameterdescs, + 'sectionlist' => \@sectionlist, + 'sections' => \%sections, + 'purpose' => $declaration_purpose + }); + } else { + print STDERR "${file}:$.: error: Cannot parse enum!\n"; + ++$errors; + } +} + +my $typedef_type = qr { ((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s* }x; +my $typedef_ident = qr { \*?\s*(\w\S+)\s* }x; +my $typedef_args = qr { \s*\((.*)\); }x; + +my $typedef1 = qr { typedef$typedef_type\($typedef_ident\)$typedef_args }x; +my $typedef2 = qr { typedef$typedef_type$typedef_ident$typedef_args }x; + +sub dump_typedef($$) { + my $x = shift; + my $file = shift; + + $x =~ s@/\*.*?\*/@@gos; # strip comments. + + # Parse function typedef prototypes + if ($x =~ $typedef1 || $x =~ $typedef2) { + $return_type = $1; + $declaration_name = $2; + my $args = $3; + $return_type =~ s/^\s+//; + + if ($identifier ne $declaration_name) { + emit_warning("${file}:$.", "expecting prototype for typedef $identifier. Prototype was for typedef $declaration_name instead\n"); + return; + } + + create_parameterlist($args, ',', $file, $declaration_name); + + output_declaration($declaration_name, + 'function', + {'function' => $declaration_name, + 'typedef' => 1, + 'module' => $modulename, + 'functiontype' => $return_type, + 'parameterlist' => \@parameterlist, + 'parameterdescs' => \%parameterdescs, + 'parametertypes' => \%parametertypes, + 'sectionlist' => \@sectionlist, + 'sections' => \%sections, + 'purpose' => $declaration_purpose + }); + return; + } + + while (($x =~ /\(*.\)\s*;$/) || ($x =~ /\[*.\]\s*;$/)) { + $x =~ s/\(*.\)\s*;$/;/; + $x =~ s/\[*.\]\s*;$/;/; + } + + if ($x =~ /typedef.*\s+(\w+)\s*;/) { + $declaration_name = $1; + + if ($identifier ne $declaration_name) { + emit_warning("${file}:$.", "expecting prototype for typedef $identifier. Prototype was for typedef $declaration_name instead\n"); + return; + } + + output_declaration($declaration_name, + 'typedef', + {'typedef' => $declaration_name, + 'module' => $modulename, + 'sectionlist' => \@sectionlist, + 'sections' => \%sections, + 'purpose' => $declaration_purpose + }); + } else { + print STDERR "${file}:$.: error: Cannot parse typedef!\n"; + ++$errors; + } +} + +sub save_struct_actual($) { + my $actual = shift; + + # strip all spaces from the actual param so that it looks like one string item + $actual =~ s/\s*//g; + $struct_actual = $struct_actual . $actual . " "; +} + +sub create_parameterlist($$$$) { + my $args = shift; + my $splitter = shift; + my $file = shift; + my $declaration_name = shift; + my $type; + my $param; + + # temporarily replace commas inside function pointer definition + my $arg_expr = qr{\([^\),]+}; + while ($args =~ /$arg_expr,/) { + $args =~ s/($arg_expr),/$1#/g; + } + + foreach my $arg (split($splitter, $args)) { + # strip comments + $arg =~ s/\/\*.*\*\///; + # ignore argument attributes + $arg =~ s/\sPOS0?\s/ /; + # strip leading/trailing spaces + $arg =~ s/^\s*//; + $arg =~ s/\s*$//; + $arg =~ s/\s+/ /; + + if ($arg =~ /^#/) { + # Treat preprocessor directive as a typeless variable just to fill + # corresponding data structures "correctly". Catch it later in + # output_* subs. + push_parameter($arg, "", "", $file); + } elsif ($arg =~ m/\(.+\)\s*\(/) { + # pointer-to-function + $arg =~ tr/#/,/; + $arg =~ m/[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)/; + $param = $1; + $type = $arg; + $type =~ s/([^\(]+\(\*?)\s*$param/$1/; + save_struct_actual($param); + push_parameter($param, $type, $arg, $file, $declaration_name); + } elsif ($arg =~ m/\(.+\)\s*\[/) { + # array-of-pointers + $arg =~ tr/#/,/; + $arg =~ m/[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)/; + $param = $1; + $type = $arg; + $type =~ s/([^\(]+\(\*?)\s*$param/$1/; + save_struct_actual($param); + push_parameter($param, $type, $arg, $file, $declaration_name); + } elsif ($arg) { + $arg =~ s/\s*:\s*/:/g; + $arg =~ s/\s*\[/\[/g; + + my @args = split('\s*,\s*', $arg); + if ($args[0] =~ m/\*/) { + $args[0] =~ s/(\*+)\s*/ $1/; + } + + my @first_arg; + if ($args[0] =~ /^(.*\s+)(.*?\[.*\].*)$/) { + shift @args; + push(@first_arg, split('\s+', $1)); + push(@first_arg, $2); + } else { + @first_arg = split('\s+', shift @args); + } + + unshift(@args, pop @first_arg); + $type = join " ", @first_arg; + + foreach $param (@args) { + if ($param =~ m/^(\*+)\s*(.*)/) { + save_struct_actual($2); + + push_parameter($2, "$type $1", $arg, $file, $declaration_name); + } elsif ($param =~ m/(.*?):(\w+)/) { + if ($type ne "") { # skip unnamed bit-fields + save_struct_actual($1); + push_parameter($1, "$type:$2", $arg, $file, $declaration_name) + } + } else { + save_struct_actual($param); + push_parameter($param, $type, $arg, $file, $declaration_name); + } + } + } + } +} + +sub push_parameter($$$$$) { + my $param = shift; + my $type = shift; + my $org_arg = shift; + my $file = shift; + my $declaration_name = shift; + + if (($anon_struct_union == 1) && ($type eq "") && + ($param eq "}")) { + return; # ignore the ending }; from anon. struct/union + } + + $anon_struct_union = 0; + $param =~ s/[\[\)].*//; + + if ($type eq "" && $param =~ /\.\.\.$/) + { + if (!$param =~ /\w\.\.\.$/) { + # handles unnamed variable parameters + $param = "..."; + } elsif ($param =~ /\w\.\.\.$/) { + # for named variable parameters of the form `x...`, remove the dots + $param =~ s/\.\.\.$//; + } + if (!defined $parameterdescs{$param} || $parameterdescs{$param} eq "") { + $parameterdescs{$param} = "variable arguments"; + } + } + elsif ($type eq "" && ($param eq "" or $param eq "void")) + { + $param="void"; + $parameterdescs{void} = "no arguments"; + } + elsif ($type eq "" && ($param eq "struct" or $param eq "union")) + # handle unnamed (anonymous) union or struct: + { + $type = $param; + $param = "{unnamed_" . $param . "}"; + $parameterdescs{$param} = "anonymous\n"; + $anon_struct_union = 1; + } + elsif ($param =~ "__cacheline_group" ) + # handle cache group enforcing variables: they do not need be described in header files + { + return; # ignore __cacheline_group_begin and __cacheline_group_end + } + + # warn if parameter has no description + # (but ignore ones starting with # as these are not parameters + # but inline preprocessor statements); + # Note: It will also ignore void params and unnamed structs/unions + if (!defined $parameterdescs{$param} && $param !~ /^#/) { + $parameterdescs{$param} = $undescribed; + + if (show_warnings($type, $declaration_name) && $param !~ /\./) { + emit_warning("${file}:$.", "Function parameter or struct member '$param' not described in '$declaration_name'\n"); + } + } + + # strip spaces from $param so that it is one continuous string + # on @parameterlist; + # this fixes a problem where check_sections() cannot find + # a parameter like "addr[6 + 2]" because it actually appears + # as "addr[6", "+", "2]" on the parameter list; + # but it's better to maintain the param string unchanged for output, + # so just weaken the string compare in check_sections() to ignore + # "[blah" in a parameter string; + ###$param =~ s/\s*//g; + push @parameterlist, $param; + $org_arg =~ s/\s\s+/ /g; + $parametertypes{$param} = $org_arg; +} + +sub check_sections($$$$$) { + my ($file, $decl_name, $decl_type, $sectcheck, $prmscheck) = @_; + my @sects = split ' ', $sectcheck; + my @prms = split ' ', $prmscheck; + my $err; + my ($px, $sx); + my $prm_clean; # strip trailing "[array size]" and/or beginning "*" + + foreach $sx (0 .. $#sects) { + $err = 1; + foreach $px (0 .. $#prms) { + $prm_clean = $prms[$px]; + $prm_clean =~ s/\[.*\]//; + $prm_clean =~ s/$attribute//i; + # ignore array size in a parameter string; + # however, the original param string may contain + # spaces, e.g.: addr[6 + 2] + # and this appears in @prms as "addr[6" since the + # parameter list is split at spaces; + # hence just ignore "[..." for the sections check; + $prm_clean =~ s/\[.*//; + + ##$prm_clean =~ s/^\**//; + if ($prm_clean eq $sects[$sx]) { + $err = 0; + last; + } + } + if ($err) { + if ($decl_type eq "function") { + emit_warning("${file}:$.", + "Excess function parameter " . + "'$sects[$sx]' " . + "description in '$decl_name'\n"); + } elsif (($decl_type eq "struct") or + ($decl_type eq "union")) { + emit_warning("${file}:$.", + "Excess $decl_type member " . + "'$sects[$sx]' " . + "description in '$decl_name'\n"); + } + } + } +} + +## +# Checks the section describing the return value of a function. +sub check_return_section { + my $file = shift; + my $declaration_name = shift; + my $return_type = shift; + + # Ignore an empty return type (It's a macro) + # Ignore functions with a "void" return type. (But don't ignore "void *") + if (($return_type eq "") || ($return_type =~ /void\s*\w*\s*$/)) { + return; + } + + if (!defined($sections{$section_return}) || + $sections{$section_return} eq "") + { + emit_warning("${file}:$.", + "No description found for return value of " . + "'$declaration_name'\n"); + } +} + +## +# takes a function prototype and the name of the current file being +# processed and spits out all the details stored in the global +# arrays/hashes. +sub dump_function($$) { + my $prototype = shift; + my $file = shift; + my $func_macro = 0; + + print_lineno($new_start_line); + + $prototype =~ s/^static +//; + $prototype =~ s/^extern +//; + $prototype =~ s/^asmlinkage +//; + $prototype =~ s/^inline +//; + $prototype =~ s/^__inline__ +//; + $prototype =~ s/^__inline +//; + $prototype =~ s/^__always_inline +//; + $prototype =~ s/^noinline +//; + $prototype =~ s/^__FORTIFY_INLINE +//; + $prototype =~ s/__init +//; + $prototype =~ s/__init_or_module +//; + $prototype =~ s/__deprecated +//; + $prototype =~ s/__flatten +//; + $prototype =~ s/__meminit +//; + $prototype =~ s/__must_check +//; + $prototype =~ s/__weak +//; + $prototype =~ s/__sched +//; + $prototype =~ s/_noprof//; + $prototype =~ s/__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +//; + $prototype =~ s/__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +//; + $prototype =~ s/__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +//; + $prototype =~ s/DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)/$1, $2/; + my $define = $prototype =~ s/^#\s*define\s+//; #ak added + $prototype =~ s/__attribute_const__ +//; + $prototype =~ s/__attribute__\s*\(\( + (?: + [\w\s]++ # attribute name + (?:\([^)]*+\))? # attribute arguments + \s*+,? # optional comma at the end + )+ + \)\)\s+//x; + + # Yes, this truly is vile. We are looking for: + # 1. Return type (may be nothing if we're looking at a macro) + # 2. Function name + # 3. Function parameters. + # + # All the while we have to watch out for function pointer parameters + # (which IIRC is what the two sections are for), C types (these + # regexps don't even start to express all the possibilities), and + # so on. + # + # If you mess with these regexps, it's a good idea to check that + # the following functions' documentation still comes out right: + # - parport_register_device (function pointer parameters) + # - atomic_set (macro) + # - pci_match_device, __copy_to_user (long return type) + my $name = qr{[a-zA-Z0-9_~:]+}; + my $prototype_end1 = qr{[^\(]*}; + my $prototype_end2 = qr{[^\{]*}; + my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)}; + my $type1 = qr{[\w\s]+}; + my $type2 = qr{$type1\*+}; + + if ($define && $prototype =~ m/^()($name)\s+/) { + # This is an object-like macro, it has no return type and no parameter + # list. + # Function-like macros are not allowed to have spaces between + # declaration_name and opening parenthesis (notice the \s+). + $return_type = $1; + $declaration_name = $2; + $func_macro = 1; + } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ || + $prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ || + $prototype =~ m/^($type2+)\s*($name)\s*$prototype_end/) { + $return_type = $1; + $declaration_name = $2; + my $args = $3; + + create_parameterlist($args, ',', $file, $declaration_name); + } else { + emit_warning("${file}:$.", "cannot understand function prototype: '$prototype'\n"); + return; + } + + if ($identifier ne $declaration_name) { + emit_warning("${file}:$.", "expecting prototype for $identifier(). Prototype was for $declaration_name() instead\n"); + return; + } + + my $prms = join " ", @parameterlist; + check_sections($file, $declaration_name, "function", $sectcheck, $prms); + + # This check emits a lot of warnings at the moment, because many + # functions don't have a 'Return' doc section. So until the number + # of warnings goes sufficiently down, the check is only performed in + # -Wreturn mode. + # TODO: always perform the check. + if ($Wreturn && !$func_macro) { + check_return_section($file, $declaration_name, $return_type); + } + + # The function parser can be called with a typedef parameter. + # Handle it. + if ($return_type =~ /typedef/) { + output_declaration($declaration_name, + 'function', + {'function' => $declaration_name, + 'typedef' => 1, + 'module' => $modulename, + 'functiontype' => $return_type, + 'parameterlist' => \@parameterlist, + 'parameterdescs' => \%parameterdescs, + 'parametertypes' => \%parametertypes, + 'sectionlist' => \@sectionlist, + 'sections' => \%sections, + 'purpose' => $declaration_purpose, + 'func_macro' => $func_macro + }); + } else { + output_declaration($declaration_name, + 'function', + {'function' => $declaration_name, + 'module' => $modulename, + 'functiontype' => $return_type, + 'parameterlist' => \@parameterlist, + 'parameterdescs' => \%parameterdescs, + 'parametertypes' => \%parametertypes, + 'sectionlist' => \@sectionlist, + 'sections' => \%sections, + 'purpose' => $declaration_purpose, + 'func_macro' => $func_macro + }); + } +} + +sub reset_state { + $function = ""; + %parameterdescs = (); + %parametertypes = (); + @parameterlist = (); + %sections = (); + @sectionlist = (); + $sectcheck = ""; + $struct_actual = ""; + $prototype = ""; + + $state = STATE_NORMAL; + $inline_doc_state = STATE_INLINE_NA; +} + +sub tracepoint_munge($) { + my $file = shift; + my $tracepointname = 0; + my $tracepointargs = 0; + + if ($prototype =~ m/TRACE_EVENT\((.*?),/) { + $tracepointname = $1; + } + if ($prototype =~ m/DEFINE_SINGLE_EVENT\((.*?),/) { + $tracepointname = $1; + } + if ($prototype =~ m/DEFINE_EVENT\((.*?),(.*?),/) { + $tracepointname = $2; + } + $tracepointname =~ s/^\s+//; #strip leading whitespace + if ($prototype =~ m/TP_PROTO\((.*?)\)/) { + $tracepointargs = $1; + } + if (($tracepointname eq 0) || ($tracepointargs eq 0)) { + emit_warning("${file}:$.", "Unrecognized tracepoint format: \n". + "$prototype\n"); + } else { + $prototype = "static inline void trace_$tracepointname($tracepointargs)"; + $identifier = "trace_$identifier"; + } +} + +sub syscall_munge() { + my $void = 0; + + $prototype =~ s@[\r\n]+@ @gos; # strip newlines/CR's +## if ($prototype =~ m/SYSCALL_DEFINE0\s*\(\s*(a-zA-Z0-9_)*\s*\)/) { + if ($prototype =~ m/SYSCALL_DEFINE0/) { + $void = 1; +## $prototype = "long sys_$1(void)"; + } + + $prototype =~ s/SYSCALL_DEFINE.*\(/long sys_/; # fix return type & func name + if ($prototype =~ m/long (sys_.*?),/) { + $prototype =~ s/,/\(/; + } elsif ($void) { + $prototype =~ s/\)/\(void\)/; + } + + # now delete all of the odd-number commas in $prototype + # so that arg types & arg names don't have a comma between them + my $count = 0; + my $len = length($prototype); + if ($void) { + $len = 0; # skip the for-loop + } + for (my $ix = 0; $ix < $len; $ix++) { + if (substr($prototype, $ix, 1) eq ',') { + $count++; + if ($count % 2 == 1) { + substr($prototype, $ix, 1) = ' '; + } + } + } +} + +sub process_proto_function($$) { + my $x = shift; + my $file = shift; + + $x =~ s@\/\/.*$@@gos; # strip C99-style comments to end of line + + if ($x =~ /^#/ && $x !~ /^#\s*define/) { + # do nothing + } elsif ($x =~ /([^\{]*)/) { + $prototype .= $1; + } + + if (($x =~ /\{/) || ($x =~ /\#\s*define/) || ($x =~ /;/)) { + $prototype =~ s@/\*.*?\*/@@gos; # strip comments. + $prototype =~ s@[\r\n]+@ @gos; # strip newlines/cr's. + $prototype =~ s@^\s+@@gos; # strip leading spaces + + # Handle prototypes for function pointers like: + # int (*pcs_config)(struct foo) + $prototype =~ s@^(\S+\s+)\(\s*\*(\S+)\)@$1$2@gos; + + if ($prototype =~ /SYSCALL_DEFINE/) { + syscall_munge(); + } + if ($prototype =~ /TRACE_EVENT/ || $prototype =~ /DEFINE_EVENT/ || + $prototype =~ /DEFINE_SINGLE_EVENT/) + { + tracepoint_munge($file); + } + dump_function($prototype, $file); + reset_state(); + } +} + +sub process_proto_type($$) { + my $x = shift; + my $file = shift; + + $x =~ s@[\r\n]+@ @gos; # strip newlines/cr's. + $x =~ s@^\s+@@gos; # strip leading spaces + $x =~ s@\s+$@@gos; # strip trailing spaces + $x =~ s@\/\/.*$@@gos; # strip C99-style comments to end of line + + if ($x =~ /^#/) { + # To distinguish preprocessor directive from regular declaration later. + $x .= ";"; + } + + while (1) { + if ( $x =~ /([^\{\};]*)([\{\};])(.*)/ ) { + if( length $prototype ) { + $prototype .= " " + } + $prototype .= $1 . $2; + ($2 eq '{') && $brcount++; + ($2 eq '}') && $brcount--; + if (($2 eq ';') && ($brcount == 0)) { + dump_declaration($prototype, $file); + reset_state(); + last; + } + $x = $3; + } else { + $prototype .= $x; + last; + } + } +} + + +sub map_filename($) { + my $file; + my ($orig_file) = @_; + + if (defined($ENV{'SRCTREE'})) { + $file = "$ENV{'SRCTREE'}" . "/" . $orig_file; + } else { + $file = $orig_file; + } + + return $file; +} + +sub process_export_file($) { + my ($orig_file) = @_; + my $file = map_filename($orig_file); + + if (!open(IN,"<$file")) { + print STDERR "Error: Cannot open file $file\n"; + ++$errors; + return; + } + + while (<IN>) { + if (/$export_symbol/) { + next if (defined($nosymbol_table{$2})); + $function_table{$2} = 1; + } + if (/$export_symbol_ns/) { + next if (defined($nosymbol_table{$2})); + $function_table{$2} = 1; + } + } + + close(IN); +} + +# +# Parsers for the various processing states. +# +# STATE_NORMAL: looking for the /** to begin everything. +# +sub process_normal() { + if (/$doc_start/o) { + $state = STATE_NAME; # next line is always the function name + $declaration_start_line = $. + 1; + } +} + +# +# STATE_NAME: Looking for the "name - description" line +# +sub process_name($$) { + my $file = shift; + my $descr; + + if (/$doc_block/o) { + $state = STATE_DOCBLOCK; + $contents = ""; + $new_start_line = $.; + + if ( $1 eq "" ) { + $section = $section_intro; + } else { + $section = $1; + } + } elsif (/$doc_decl/o) { + $identifier = $1; + my $is_kernel_comment = 0; + my $decl_start = qr{$doc_com}; + # test for pointer declaration type, foo * bar() - desc + my $fn_type = qr{\w+\s*\*\s*}; + my $parenthesis = qr{\(\w*\)}; + my $decl_end = qr{[-:].*}; + if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) { + $identifier = $1; + } + if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) { + $decl_type = $1; + $identifier = $2; + $is_kernel_comment = 1; + } + # Look for foo() or static void foo() - description; or misspelt + # identifier + elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || + /^$decl_start$fn_type?(\w+[^-:]*)$parenthesis?\s*$decl_end$/) { + $identifier = $1; + $decl_type = 'function'; + $identifier =~ s/^define\s+//; + $is_kernel_comment = 1; + } + $identifier =~ s/\s+$//; + + $state = STATE_BODY; + # if there's no @param blocks need to set up default section + # here + $contents = ""; + $section = $section_default; + $new_start_line = $. + 1; + if (/[-:](.*)/) { + # strip leading/trailing/multiple spaces + $descr= $1; + $descr =~ s/^\s*//; + $descr =~ s/\s*$//; + $descr =~ s/\s+/ /g; + $declaration_purpose = $descr; + $state = STATE_BODY_MAYBE; + } else { + $declaration_purpose = ""; + } + + if (!$is_kernel_comment) { + emit_warning("${file}:$.", "This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n$_"); + $state = STATE_NORMAL; + } + + if (($declaration_purpose eq "") && $Wshort_desc) { + emit_warning("${file}:$.", "missing initial short description on line:\n$_"); + } + + if ($identifier eq "" && $decl_type ne "enum") { + emit_warning("${file}:$.", "wrong kernel-doc identifier on line:\n$_"); + $state = STATE_NORMAL; + } + + if ($verbose) { + print STDERR "${file}:$.: info: Scanning doc for $decl_type $identifier\n"; + } + } else { + emit_warning("${file}:$.", "Cannot understand $_ on line $. - I thought it was a doc line\n"); + $state = STATE_NORMAL; + } +} + + +# +# STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment. +# +sub process_body($$) { + my $file = shift; + + if ($state == STATE_BODY_WITH_BLANK_LINE && /^\s*\*\s?\S/) { + dump_section($file, $section, $contents); + $section = $section_default; + $new_start_line = $.; + $contents = ""; + } + + if (/$doc_sect/i) { # case insensitive for supported section names + $newsection = $1; + $newcontents = $2; + + # map the supported section names to the canonical names + if ($newsection =~ m/^description$/i) { + $newsection = $section_default; + } elsif ($newsection =~ m/^context$/i) { + $newsection = $section_context; + } elsif ($newsection =~ m/^returns?$/i) { + $newsection = $section_return; + } elsif ($newsection =~ m/^\@return$/) { + # special: @return is a section, not a param description + $newsection = $section_return; + } + + if (($contents ne "") && ($contents ne "\n")) { + dump_section($file, $section, $contents); + $section = $section_default; + } + + $state = STATE_BODY; + $contents = $newcontents; + $new_start_line = $.; + while (substr($contents, 0, 1) eq " ") { + $contents = substr($contents, 1); + } + if ($contents ne "") { + $contents .= "\n"; + } + $section = $newsection; + $leading_space = undef; + } elsif (/$doc_end/) { + if (($contents ne "") && ($contents ne "\n")) { + dump_section($file, $section, $contents); + $section = $section_default; + $contents = ""; + } + # look for doc_com + <text> + doc_end: + if ($_ =~ m'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') { + emit_warning("${file}:$.", "suspicious ending line: $_"); + } + + $prototype = ""; + $state = STATE_PROTO; + $brcount = 0; + $new_start_line = $. + 1; + } elsif (/$doc_content/) { + if ($1 eq "") { + if ($section eq $section_context) { + dump_section($file, $section, $contents); + $section = $section_default; + $contents = ""; + $new_start_line = $.; + $state = STATE_BODY; + } else { + if ($section ne $section_default) { + $state = STATE_BODY_WITH_BLANK_LINE; + } else { + $state = STATE_BODY; + } + $contents .= "\n"; + } + } elsif ($state == STATE_BODY_MAYBE) { + # Continued declaration purpose + chomp($declaration_purpose); + $declaration_purpose .= " " . $1; + $declaration_purpose =~ s/\s+/ /g; + } else { + my $cont = $1; + if ($section =~ m/^@/ || $section eq $section_context) { + if (!defined $leading_space) { + if ($cont =~ m/^(\s+)/) { + $leading_space = $1; + } else { + $leading_space = ""; + } + } + $cont =~ s/^$leading_space//; + } + $contents .= $cont . "\n"; + } + } else { + # i dont know - bad line? ignore. + emit_warning("${file}:$.", "bad line: $_"); + } +} + + +# +# STATE_PROTO: reading a function/whatever prototype. +# +sub process_proto($$) { + my $file = shift; + + if (/$doc_inline_oneline/) { + $section = $1; + $contents = $2; + if ($contents ne "") { + $contents .= "\n"; + dump_section($file, $section, $contents); + $section = $section_default; + $contents = ""; + } + } elsif (/$doc_inline_start/) { + $state = STATE_INLINE; + $inline_doc_state = STATE_INLINE_NAME; + } elsif ($decl_type eq 'function') { + process_proto_function($_, $file); + } else { + process_proto_type($_, $file); + } +} + +# +# STATE_DOCBLOCK: within a DOC: block. +# +sub process_docblock($$) { + my $file = shift; + + if (/$doc_end/) { + dump_doc_section($file, $section, $contents); + $section = $section_default; + $contents = ""; + $function = ""; + %parameterdescs = (); + %parametertypes = (); + @parameterlist = (); + %sections = (); + @sectionlist = (); + $prototype = ""; + $state = STATE_NORMAL; + } elsif (/$doc_content/) { + if ( $1 eq "" ) { + $contents .= $blankline; + } else { + $contents .= $1 . "\n"; + } + } +} + +# +# STATE_INLINE: docbook comments within a prototype. +# +sub process_inline($$) { + my $file = shift; + + # First line (state 1) needs to be a @parameter + if ($inline_doc_state == STATE_INLINE_NAME && /$doc_inline_sect/o) { + $section = $1; + $contents = $2; + $new_start_line = $.; + if ($contents ne "") { + while (substr($contents, 0, 1) eq " ") { + $contents = substr($contents, 1); + } + $contents .= "\n"; + } + $inline_doc_state = STATE_INLINE_TEXT; + # Documentation block end */ + } elsif (/$doc_inline_end/) { + if (($contents ne "") && ($contents ne "\n")) { + dump_section($file, $section, $contents); + $section = $section_default; + $contents = ""; + } + $state = STATE_PROTO; + $inline_doc_state = STATE_INLINE_NA; + # Regular text + } elsif (/$doc_content/) { + if ($inline_doc_state == STATE_INLINE_TEXT) { + $contents .= $1 . "\n"; + # nuke leading blank lines + if ($contents =~ /^\s*$/) { + $contents = ""; + } + } elsif ($inline_doc_state == STATE_INLINE_NAME) { + $inline_doc_state = STATE_INLINE_ERROR; + emit_warning("${file}:$.", "Incorrect use of kernel-doc format: $_"); + } + } +} + + +sub process_file($) { + my $file; + my ($orig_file) = @_; + + $file = map_filename($orig_file); + + if (!open(IN_FILE,"<$file")) { + print STDERR "Error: Cannot open file $file\n"; + ++$errors; + return; + } + + $. = 1; + + $section_counter = 0; + while (<IN_FILE>) { + while (!/^ \*/ && s/\\\s*$//) { + $_ .= <IN_FILE>; + } + # Replace tabs by spaces + while ($_ =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e) {}; + # Hand this line to the appropriate state handler + if ($state == STATE_NORMAL) { + process_normal(); + } elsif ($state == STATE_NAME) { + process_name($file, $_); + } elsif ($state == STATE_BODY || $state == STATE_BODY_MAYBE || + $state == STATE_BODY_WITH_BLANK_LINE) { + process_body($file, $_); + } elsif ($state == STATE_INLINE) { # scanning for inline parameters + process_inline($file, $_); + } elsif ($state == STATE_PROTO) { + process_proto($file, $_); + } elsif ($state == STATE_DOCBLOCK) { + process_docblock($file, $_); + } + } + + # Make sure we got something interesting. + if (!$section_counter && $output_mode ne "none") { + if ($output_selection == OUTPUT_INCLUDE) { + emit_warning("${file}:1", "'$_' not found\n") + for keys %function_table; + } else { + emit_warning("${file}:1", "no structured comments found\n"); + } + } + close IN_FILE; +} + +$kernelversion = get_kernel_version(); + +# generate a sequence of code that will splice in highlighting information +# using the s// operator. +for (my $k = 0; $k < @highlights; $k++) { + my $pattern = $highlights[$k][0]; + my $result = $highlights[$k][1]; +# print STDERR "scanning pattern:$pattern, highlight:($result)\n"; + $dohighlight .= "\$contents =~ s:$pattern:$result:gs;\n"; +} + +if ($output_selection == OUTPUT_EXPORTED || + $output_selection == OUTPUT_INTERNAL) { + + push(@export_file_list, @ARGV); + + foreach (@export_file_list) { + chomp; + process_export_file($_); + } +} + +foreach (@ARGV) { + chomp; + process_file($_); +} +if ($verbose && $errors) { + print STDERR "$errors errors\n"; +} +if ($verbose && $warnings) { + print STDERR "$warnings warnings\n"; +} + +if ($Werror && $warnings) { + print STDERR "$warnings warnings as Errors\n"; + exit($warnings); +} else { + exit($output_mode eq "none" ? 0 : $errors) +} + +__END__ + +=head1 OPTIONS + +=head2 Output format selection (mutually exclusive): + +=over 8 + +=item -man + +Output troff manual page format. + +=item -rst + +Output reStructuredText format. This is the default. + +=item -none + +Do not output documentation, only warnings. + +=back + +=head2 Output format modifiers + +=head3 reStructuredText only + +=head2 Output selection (mutually exclusive): + +=over 8 + +=item -export + +Only output documentation for the symbols that have been exported using +EXPORT_SYMBOL() and related macros in any input FILE or -export-file FILE. + +=item -internal + +Only output documentation for the symbols that have NOT been exported using +EXPORT_SYMBOL() and related macros in any input FILE or -export-file FILE. + +=item -function NAME + +Only output documentation for the given function or DOC: section title. +All other functions and DOC: sections are ignored. + +May be specified multiple times. + +=item -nosymbol NAME + +Exclude the specified symbol from the output documentation. + +May be specified multiple times. + +=back + +=head2 Output selection modifiers: + +=over 8 + +=item -no-doc-sections + +Do not output DOC: sections. + +=item -export-file FILE + +Specify an additional FILE in which to look for EXPORT_SYMBOL information. + +To be used with -export or -internal. + +May be specified multiple times. + +=back + +=head3 reStructuredText only + +=over 8 + +=item -enable-lineno + +Enable output of .. LINENO lines. + +=back + +=head2 Other parameters: + +=over 8 + +=item -h, -help + +Print this help. + +=item -v + +Verbose output, more warnings and other information. + +=item -Werror + +Treat warnings as errors. + +=back + +=cut diff --git a/scripts/kernel-doc.py b/scripts/kernel-doc.py new file mode 100755 index 000000000000..12ae66f40bd7 --- /dev/null +++ b/scripts/kernel-doc.py @@ -0,0 +1,315 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. +# +# pylint: disable=C0103,R0915 +# +# Converted from the kernel-doc script originally written in Perl +# under GPLv2, copyrighted since 1998 by the following authors: +# +# Aditya Srivastava <yashsri421@gmail.com> +# Akira Yokosawa <akiyks@gmail.com> +# Alexander A. Klimov <grandmaster@al2klimov.de> +# Alexander Lobakin <aleksander.lobakin@intel.com> +# André Almeida <andrealmeid@igalia.com> +# Andy Shevchenko <andriy.shevchenko@linux.intel.com> +# Anna-Maria Behnsen <anna-maria@linutronix.de> +# Armin Kuster <akuster@mvista.com> +# Bart Van Assche <bart.vanassche@sandisk.com> +# Ben Hutchings <ben@decadent.org.uk> +# Borislav Petkov <bbpetkov@yahoo.de> +# Chen-Yu Tsai <wenst@chromium.org> +# Coco Li <lixiaoyan@google.com> +# Conchúr Navid <conchur@web.de> +# Daniel Santos <daniel.santos@pobox.com> +# Danilo Cesar Lemes de Paula <danilo.cesar@collabora.co.uk> +# Dan Luedtke <mail@danrl.de> +# Donald Hunter <donald.hunter@gmail.com> +# Gabriel Krisman Bertazi <krisman@collabora.co.uk> +# Greg Kroah-Hartman <gregkh@linuxfoundation.org> +# Harvey Harrison <harvey.harrison@gmail.com> +# Horia Geanta <horia.geanta@freescale.com> +# Ilya Dryomov <idryomov@gmail.com> +# Jakub Kicinski <kuba@kernel.org> +# Jani Nikula <jani.nikula@intel.com> +# Jason Baron <jbaron@redhat.com> +# Jason Gunthorpe <jgg@nvidia.com> +# Jérémy Bobbio <lunar@debian.org> +# Johannes Berg <johannes.berg@intel.com> +# Johannes Weiner <hannes@cmpxchg.org> +# Jonathan Cameron <Jonathan.Cameron@huawei.com> +# Jonathan Corbet <corbet@lwn.net> +# Jonathan Neuschäfer <j.neuschaefer@gmx.net> +# Kamil Rytarowski <n54@gmx.com> +# Kees Cook <kees@kernel.org> +# Laurent Pinchart <laurent.pinchart@ideasonboard.com> +# Levin, Alexander (Sasha Levin) <alexander.levin@verizon.com> +# Linus Torvalds <torvalds@linux-foundation.org> +# Lucas De Marchi <lucas.demarchi@profusion.mobi> +# Mark Rutland <mark.rutland@arm.com> +# Markus Heiser <markus.heiser@darmarit.de> +# Martin Waitz <tali@admingilde.org> +# Masahiro Yamada <masahiroy@kernel.org> +# Matthew Wilcox <willy@infradead.org> +# Mauro Carvalho Chehab <mchehab+huawei@kernel.org> +# Michal Wajdeczko <michal.wajdeczko@intel.com> +# Michael Zucchi +# Mike Rapoport <rppt@linux.ibm.com> +# Niklas Söderlund <niklas.soderlund@corigine.com> +# Nishanth Menon <nm@ti.com> +# Paolo Bonzini <pbonzini@redhat.com> +# Pavan Kumar Linga <pavan.kumar.linga@intel.com> +# Pavel Pisa <pisa@cmp.felk.cvut.cz> +# Peter Maydell <peter.maydell@linaro.org> +# Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com> +# Randy Dunlap <rdunlap@infradead.org> +# Richard Kennedy <richard@rsk.demon.co.uk> +# Rich Walker <rw@shadow.org.uk> +# Rolf Eike Beer <eike-kernel@sf-tec.de> +# Sakari Ailus <sakari.ailus@linux.intel.com> +# Silvio Fricke <silvio.fricke@gmail.com> +# Simon Huggins +# Tim Waugh <twaugh@redhat.com> +# Tomasz Warniełło <tomasz.warniello@gmail.com> +# Utkarsh Tripathi <utripathi2002@gmail.com> +# valdis.kletnieks@vt.edu <valdis.kletnieks@vt.edu> +# Vegard Nossum <vegard.nossum@oracle.com> +# Will Deacon <will.deacon@arm.com> +# Yacine Belkadi <yacine.belkadi.1@gmail.com> +# Yujie Liu <yujie.liu@intel.com> + +""" +kernel_doc +========== + +Print formatted kernel documentation to stdout + +Read C language source or header FILEs, extract embedded +documentation comments, and print formatted documentation +to standard output. + +The documentation comments are identified by the "/**" +opening comment mark. + +See Documentation/doc-guide/kernel-doc.rst for the +documentation comment syntax. +""" + +import argparse +import logging +import os +import sys + +# Import Python modules + +LIB_DIR = "lib/kdoc" +SRC_DIR = os.path.dirname(os.path.realpath(__file__)) + +sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) + +from kdoc_files import KernelFiles # pylint: disable=C0413 +from kdoc_output import RestFormat, ManFormat # pylint: disable=C0413 + +DESC = """ +Read C language source or header FILEs, extract embedded documentation comments, +and print formatted documentation to standard output. + +The documentation comments are identified by the "/**" opening comment mark. + +See Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax. +""" + +EXPORT_FILE_DESC = """ +Specify an additional FILE in which to look for EXPORT_SYMBOL information. + +May be used multiple times. +""" + +EXPORT_DESC = """ +Only output documentation for the symbols that have been +exported using EXPORT_SYMBOL() and related macros in any input +FILE or -export-file FILE. +""" + +INTERNAL_DESC = """ +Only output documentation for the symbols that have NOT been +exported using EXPORT_SYMBOL() and related macros in any input +FILE or -export-file FILE. +""" + +FUNCTION_DESC = """ +Only output documentation for the given function or DOC: section +title. All other functions and DOC: sections are ignored. + +May be used multiple times. +""" + +NOSYMBOL_DESC = """ +Exclude the specified symbol from the output documentation. + +May be used multiple times. +""" + +FILES_DESC = """ +Header and C source files to be parsed. +""" + +WARN_CONTENTS_BEFORE_SECTIONS_DESC = """ +Warns if there are contents before sections (deprecated). + +This option is kept just for backward-compatibility, but it does nothing, +neither here nor at the original Perl script. +""" + + +class MsgFormatter(logging.Formatter): + """Helper class to format warnings on a similar way to kernel-doc.pl""" + + def format(self, record): + record.levelname = record.levelname.capitalize() + return logging.Formatter.format(self, record) + +def main(): + """Main program""" + + parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, + description=DESC) + + # Normal arguments + + parser.add_argument("-v", "-verbose", "--verbose", action="store_true", + help="Verbose output, more warnings and other information.") + + parser.add_argument("-d", "-debug", "--debug", action="store_true", + help="Enable debug messages") + + parser.add_argument("-M", "-modulename", "--modulename", + default="Kernel API", + help="Allow setting a module name at the output.") + + parser.add_argument("-l", "-enable-lineno", "--enable_lineno", + action="store_true", + help="Enable line number output (only in ReST mode)") + + # Arguments to control the warning behavior + + parser.add_argument("-Wreturn", "--wreturn", action="store_true", + help="Warns about the lack of a return markup on functions.") + + parser.add_argument("-Wshort-desc", "-Wshort-description", "--wshort-desc", + action="store_true", + help="Warns if initial short description is missing") + + parser.add_argument("-Wcontents-before-sections", + "--wcontents-before-sections", action="store_true", + help=WARN_CONTENTS_BEFORE_SECTIONS_DESC) + + parser.add_argument("-Wall", "--wall", action="store_true", + help="Enable all types of warnings") + + parser.add_argument("-Werror", "--werror", action="store_true", + help="Treat warnings as errors.") + + parser.add_argument("-export-file", "--export-file", action='append', + help=EXPORT_FILE_DESC) + + # Output format mutually-exclusive group + + out_group = parser.add_argument_group("Output format selection (mutually exclusive)") + + out_fmt = out_group.add_mutually_exclusive_group() + + out_fmt.add_argument("-m", "-man", "--man", action="store_true", + help="Output troff manual page format.") + out_fmt.add_argument("-r", "-rst", "--rst", action="store_true", + help="Output reStructuredText format (default).") + out_fmt.add_argument("-N", "-none", "--none", action="store_true", + help="Do not output documentation, only warnings.") + + # Output selection mutually-exclusive group + + sel_group = parser.add_argument_group("Output selection (mutually exclusive)") + sel_mut = sel_group.add_mutually_exclusive_group() + + sel_mut.add_argument("-e", "-export", "--export", action='store_true', + help=EXPORT_DESC) + + sel_mut.add_argument("-i", "-internal", "--internal", action='store_true', + help=INTERNAL_DESC) + + sel_mut.add_argument("-s", "-function", "--symbol", action='append', + help=FUNCTION_DESC) + + # Those are valid for all 3 types of filter + parser.add_argument("-n", "-nosymbol", "--nosymbol", action='append', + help=NOSYMBOL_DESC) + + parser.add_argument("-D", "-no-doc-sections", "--no-doc-sections", + action='store_true', help="Don't outputt DOC sections") + + parser.add_argument("files", metavar="FILE", + nargs="+", help=FILES_DESC) + + args = parser.parse_args() + + if args.wall: + args.wreturn = True + args.wshort_desc = True + args.wcontents_before_sections = True + + logger = logging.getLogger() + + if not args.debug: + logger.setLevel(logging.INFO) + else: + logger.setLevel(logging.DEBUG) + + formatter = MsgFormatter('%(levelname)s: %(message)s') + + handler = logging.StreamHandler() + handler.setFormatter(formatter) + + logger.addHandler(handler) + + if args.man: + out_style = ManFormat(modulename=args.modulename) + elif args.none: + out_style = None + else: + out_style = RestFormat() + + kfiles = KernelFiles(verbose=args.verbose, + out_style=out_style, werror=args.werror, + wreturn=args.wreturn, wshort_desc=args.wshort_desc, + wcontents_before_sections=args.wcontents_before_sections) + + kfiles.parse(args.files, export_file=args.export_file) + + for t in kfiles.msg(enable_lineno=args.enable_lineno, export=args.export, + internal=args.internal, symbol=args.symbol, + nosymbol=args.nosymbol, export_file=args.export_file, + no_doc_sections=args.no_doc_sections): + msg = t[1] + if msg: + print(msg) + + error_count = kfiles.errors + if not error_count: + sys.exit(0) + + if args.werror: + print(f"{error_count} warnings as errors") + sys.exit(error_count) + + if args.verbose: + print(f"{error_count} errors") + + if args.none: + sys.exit(0) + + sys.exit(error_count) + + +# Call main method +if __name__ == "__main__": + main() diff --git a/scripts/lib/kdoc/kdoc_files.py b/scripts/lib/kdoc/kdoc_files.py new file mode 100644 index 000000000000..dd003feefd1b --- /dev/null +++ b/scripts/lib/kdoc/kdoc_files.py @@ -0,0 +1,282 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. +# +# pylint: disable=R0903,R0913,R0914,R0917 + +""" +Parse lernel-doc tags on multiple kernel source files. +""" + +import argparse +import logging +import os +import re + +from kdoc_parser import KernelDoc +from kdoc_output import OutputFormat + + +class GlobSourceFiles: + """ + Parse C source code file names and directories via an Interactor. + """ + + def __init__(self, srctree=None, valid_extensions=None): + """ + Initialize valid extensions with a tuple. + + If not defined, assume default C extensions (.c and .h) + + It would be possible to use python's glob function, but it is + very slow, and it is not interactive. So, it would wait to read all + directories before actually do something. + + So, let's use our own implementation. + """ + + if not valid_extensions: + self.extensions = (".c", ".h") + else: + self.extensions = valid_extensions + + self.srctree = srctree + + def _parse_dir(self, dirname): + """Internal function to parse files recursively""" + + with os.scandir(dirname) as obj: + for entry in obj: + name = os.path.join(dirname, entry.name) + + if entry.is_dir(): + yield from self._parse_dir(name) + + if not entry.is_file(): + continue + + basename = os.path.basename(name) + + if not basename.endswith(self.extensions): + continue + + yield name + + def parse_files(self, file_list, file_not_found_cb): + """ + Define an interator to parse all source files from file_list, + handling directories if any + """ + + if not file_list: + return + + for fname in file_list: + if self.srctree: + f = os.path.join(self.srctree, fname) + else: + f = fname + + if os.path.isdir(f): + yield from self._parse_dir(f) + elif os.path.isfile(f): + yield f + elif file_not_found_cb: + file_not_found_cb(fname) + + +class KernelFiles(): + """ + Parse kernel-doc tags on multiple kernel source files. + + There are two type of parsers defined here: + - self.parse_file(): parses both kernel-doc markups and + EXPORT_SYMBOL* macros; + - self.process_export_file(): parses only EXPORT_SYMBOL* macros. + """ + + def warning(self, msg): + """Ancillary routine to output a warning and increment error count""" + + self.config.log.warning(msg) + self.errors += 1 + + def error(self, msg): + """Ancillary routine to output an error and increment error count""" + + self.config.log.error(msg) + self.errors += 1 + + def parse_file(self, fname): + """ + Parse a single Kernel source. + """ + + # Prevent parsing the same file twice if results are cached + if fname in self.files: + return + + doc = KernelDoc(self.config, fname) + export_table, entries = doc.parse_kdoc() + + self.export_table[fname] = export_table + + self.files.add(fname) + self.export_files.add(fname) # parse_kdoc() already check exports + + self.results[fname] = entries + + def process_export_file(self, fname): + """ + Parses EXPORT_SYMBOL* macros from a single Kernel source file. + """ + + # Prevent parsing the same file twice if results are cached + if fname in self.export_files: + return + + doc = KernelDoc(self.config, fname) + export_table = doc.parse_export() + + if not export_table: + self.error(f"Error: Cannot check EXPORT_SYMBOL* on {fname}") + export_table = set() + + self.export_table[fname] = export_table + self.export_files.add(fname) + + def file_not_found_cb(self, fname): + """ + Callback to warn if a file was not found. + """ + + self.error(f"Cannot find file {fname}") + + def __init__(self, verbose=False, out_style=None, + werror=False, wreturn=False, wshort_desc=False, + wcontents_before_sections=False, + logger=None): + """ + Initialize startup variables and parse all files + """ + + if not verbose: + verbose = bool(os.environ.get("KBUILD_VERBOSE", 0)) + + if out_style is None: + out_style = OutputFormat() + + if not werror: + kcflags = os.environ.get("KCFLAGS", None) + if kcflags: + match = re.search(r"(\s|^)-Werror(\s|$)/", kcflags) + if match: + werror = True + + # reading this variable is for backwards compat just in case + # someone was calling it with the variable from outside the + # kernel's build system + kdoc_werror = os.environ.get("KDOC_WERROR", None) + if kdoc_werror: + werror = kdoc_werror + + # Some variables are global to the parser logic as a whole as they are + # used to send control configuration to KernelDoc class. As such, + # those variables are read-only inside the KernelDoc. + self.config = argparse.Namespace + + self.config.verbose = verbose + self.config.werror = werror + self.config.wreturn = wreturn + self.config.wshort_desc = wshort_desc + self.config.wcontents_before_sections = wcontents_before_sections + + if not logger: + self.config.log = logging.getLogger("kernel-doc") + else: + self.config.log = logger + + self.config.warning = self.warning + + self.config.src_tree = os.environ.get("SRCTREE", None) + + # Initialize variables that are internal to KernelFiles + + self.out_style = out_style + + self.errors = 0 + self.results = {} + + self.files = set() + self.export_files = set() + self.export_table = {} + + def parse(self, file_list, export_file=None): + """ + Parse all files + """ + + glob = GlobSourceFiles(srctree=self.config.src_tree) + + for fname in glob.parse_files(file_list, self.file_not_found_cb): + self.parse_file(fname) + + for fname in glob.parse_files(export_file, self.file_not_found_cb): + self.process_export_file(fname) + + def out_msg(self, fname, name, arg): + """ + Return output messages from a file name using the output style + filtering. + + If output type was not handled by the syler, return None. + """ + + # NOTE: we can add rules here to filter out unwanted parts, + # although OutputFormat.msg already does that. + + return self.out_style.msg(fname, name, arg) + + def msg(self, enable_lineno=False, export=False, internal=False, + symbol=None, nosymbol=None, no_doc_sections=False, + filenames=None, export_file=None): + """ + Interacts over the kernel-doc results and output messages, + returning kernel-doc markups on each interaction + """ + + self.out_style.set_config(self.config) + + if not filenames: + filenames = sorted(self.results.keys()) + + for fname in filenames: + function_table = set() + + if internal or export: + if not export_file: + export_file = [fname] + + for f in export_file: + function_table |= self.export_table[f] + + if symbol: + for s in symbol: + function_table.add(s) + + self.out_style.set_filter(export, internal, symbol, nosymbol, + function_table, enable_lineno, + no_doc_sections) + + msg = "" + for name, arg in self.results[fname]: + msg += self.out_msg(fname, name, arg) + + if msg is None: + ln = arg.get("ln", 0) + dtype = arg.get('type', "") + + self.config.log.warning("%s:%d Can't handle %s", + fname, ln, dtype) + if msg: + yield fname, msg diff --git a/scripts/lib/kdoc/kdoc_output.py b/scripts/lib/kdoc/kdoc_output.py new file mode 100755 index 000000000000..86102e628d91 --- /dev/null +++ b/scripts/lib/kdoc/kdoc_output.py @@ -0,0 +1,793 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. +# +# pylint: disable=C0301,R0902,R0911,R0912,R0913,R0914,R0915,R0917 + +""" +Implement output filters to print kernel-doc documentation. + +The implementation uses a virtual base class (OutputFormat) which +contains a dispatches to virtual methods, and some code to filter +out output messages. + +The actual implementation is done on one separate class per each type +of output. Currently, there are output classes for ReST and man/troff. +""" + +import os +import re +from datetime import datetime + +from kdoc_parser import KernelDoc, type_param +from kdoc_re import KernRe + + +function_pointer = KernRe(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False) + +# match expressions used to find embedded type information +type_constant = KernRe(r"\b``([^\`]+)``\b", cache=False) +type_constant2 = KernRe(r"\%([-_*\w]+)", cache=False) +type_func = KernRe(r"(\w+)\(\)", cache=False) +type_param_ref = KernRe(r"([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) + +# Special RST handling for func ptr params +type_fp_param = KernRe(r"\@(\w+)\(\)", cache=False) + +# Special RST handling for structs with func ptr params +type_fp_param2 = KernRe(r"\@(\w+->\S+)\(\)", cache=False) + +type_env = KernRe(r"(\$\w+)", cache=False) +type_enum = KernRe(r"\&(enum\s*([_\w]+))", cache=False) +type_struct = KernRe(r"\&(struct\s*([_\w]+))", cache=False) +type_typedef = KernRe(r"\&(typedef\s*([_\w]+))", cache=False) +type_union = KernRe(r"\&(union\s*([_\w]+))", cache=False) +type_member = KernRe(r"\&([_\w]+)(\.|->)([_\w]+)", cache=False) +type_fallback = KernRe(r"\&([_\w]+)", cache=False) +type_member_func = type_member + KernRe(r"\(\)", cache=False) + + +class OutputFormat: + """ + Base class for OutputFormat. If used as-is, it means that only + warnings will be displayed. + """ + + # output mode. + OUTPUT_ALL = 0 # output all symbols and doc sections + OUTPUT_INCLUDE = 1 # output only specified symbols + OUTPUT_EXPORTED = 2 # output exported symbols + OUTPUT_INTERNAL = 3 # output non-exported symbols + + # Virtual member to be overriden at the inherited classes + highlights = [] + + def __init__(self): + """Declare internal vars and set mode to OUTPUT_ALL""" + + self.out_mode = self.OUTPUT_ALL + self.enable_lineno = None + self.nosymbol = {} + self.symbol = None + self.function_table = None + self.config = None + self.no_doc_sections = False + + self.data = "" + + def set_config(self, config): + """ + Setup global config variables used by both parser and output. + """ + + self.config = config + + def set_filter(self, export, internal, symbol, nosymbol, function_table, + enable_lineno, no_doc_sections): + """ + Initialize filter variables according with the requested mode. + + Only one choice is valid between export, internal and symbol. + + The nosymbol filter can be used on all modes. + """ + + self.enable_lineno = enable_lineno + self.no_doc_sections = no_doc_sections + self.function_table = function_table + + if symbol: + self.out_mode = self.OUTPUT_INCLUDE + elif export: + self.out_mode = self.OUTPUT_EXPORTED + elif internal: + self.out_mode = self.OUTPUT_INTERNAL + else: + self.out_mode = self.OUTPUT_ALL + + if nosymbol: + self.nosymbol = set(nosymbol) + + + def highlight_block(self, block): + """ + Apply the RST highlights to a sub-block of text. + """ + + for r, sub in self.highlights: + block = r.sub(sub, block) + + return block + + def out_warnings(self, args): + """ + Output warnings for identifiers that will be displayed. + """ + + warnings = args.get('warnings', []) + + for log_msg in warnings: + self.config.warning(log_msg) + + def check_doc(self, name, args): + """Check if DOC should be output""" + + if self.no_doc_sections: + return False + + if name in self.nosymbol: + return False + + if self.out_mode == self.OUTPUT_ALL: + self.out_warnings(args) + return True + + if self.out_mode == self.OUTPUT_INCLUDE: + if name in self.function_table: + self.out_warnings(args) + return True + + return False + + def check_declaration(self, dtype, name, args): + """ + Checks if a declaration should be output or not based on the + filtering criteria. + """ + + if name in self.nosymbol: + return False + + if self.out_mode == self.OUTPUT_ALL: + self.out_warnings(args) + return True + + if self.out_mode in [self.OUTPUT_INCLUDE, self.OUTPUT_EXPORTED]: + if name in self.function_table: + return True + + if self.out_mode == self.OUTPUT_INTERNAL: + if dtype != "function": + self.out_warnings(args) + return True + + if name not in self.function_table: + self.out_warnings(args) + return True + + return False + + def msg(self, fname, name, args): + """ + Handles a single entry from kernel-doc parser + """ + + self.data = "" + + dtype = args.get('type', "") + + if dtype == "doc": + self.out_doc(fname, name, args) + return self.data + + if not self.check_declaration(dtype, name, args): + return self.data + + if dtype == "function": + self.out_function(fname, name, args) + return self.data + + if dtype == "enum": + self.out_enum(fname, name, args) + return self.data + + if dtype == "typedef": + self.out_typedef(fname, name, args) + return self.data + + if dtype in ["struct", "union"]: + self.out_struct(fname, name, args) + return self.data + + # Warn if some type requires an output logic + self.config.log.warning("doesn't now how to output '%s' block", + dtype) + + return None + + # Virtual methods to be overridden by inherited classes + # At the base class, those do nothing. + def out_doc(self, fname, name, args): + """Outputs a DOC block""" + + def out_function(self, fname, name, args): + """Outputs a function""" + + def out_enum(self, fname, name, args): + """Outputs an enum""" + + def out_typedef(self, fname, name, args): + """Outputs a typedef""" + + def out_struct(self, fname, name, args): + """Outputs a struct""" + + +class RestFormat(OutputFormat): + """Consts and functions used by ReST output""" + + highlights = [ + (type_constant, r"``\1``"), + (type_constant2, r"``\1``"), + + # Note: need to escape () to avoid func matching later + (type_member_func, r":c:type:`\1\2\3\\(\\) <\1>`"), + (type_member, r":c:type:`\1\2\3 <\1>`"), + (type_fp_param, r"**\1\\(\\)**"), + (type_fp_param2, r"**\1\\(\\)**"), + (type_func, r"\1()"), + (type_enum, r":c:type:`\1 <\2>`"), + (type_struct, r":c:type:`\1 <\2>`"), + (type_typedef, r":c:type:`\1 <\2>`"), + (type_union, r":c:type:`\1 <\2>`"), + + # in rst this can refer to any type + (type_fallback, r":c:type:`\1`"), + (type_param_ref, r"**\1\2**") + ] + blankline = "\n" + + sphinx_literal = KernRe(r'^[^.].*::$', cache=False) + sphinx_cblock = KernRe(r'^\.\.\ +code-block::', cache=False) + + def __init__(self): + """ + Creates class variables. + + Not really mandatory, but it is a good coding style and makes + pylint happy. + """ + + super().__init__() + self.lineprefix = "" + + def print_lineno(self, ln): + """Outputs a line number""" + + if self.enable_lineno and ln is not None: + ln += 1 + self.data += f".. LINENO {ln}\n" + + def output_highlight(self, args): + """ + Outputs a C symbol that may require being converted to ReST using + the self.highlights variable + """ + + input_text = args + output = "" + in_literal = False + litprefix = "" + block = "" + + for line in input_text.strip("\n").split("\n"): + + # If we're in a literal block, see if we should drop out of it. + # Otherwise, pass the line straight through unmunged. + if in_literal: + if line.strip(): # If the line is not blank + # If this is the first non-blank line in a literal block, + # figure out the proper indent. + if not litprefix: + r = KernRe(r'^(\s*)') + if r.match(line): + litprefix = '^' + r.group(1) + else: + litprefix = "" + + output += line + "\n" + elif not KernRe(litprefix).match(line): + in_literal = False + else: + output += line + "\n" + else: + output += line + "\n" + + # Not in a literal block (or just dropped out) + if not in_literal: + block += line + "\n" + if self.sphinx_literal.match(line) or self.sphinx_cblock.match(line): + in_literal = True + litprefix = "" + output += self.highlight_block(block) + block = "" + + # Handle any remaining block + if block: + output += self.highlight_block(block) + + # Print the output with the line prefix + for line in output.strip("\n").split("\n"): + self.data += self.lineprefix + line + "\n" + + def out_section(self, args, out_docblock=False): + """ + Outputs a block section. + + This could use some work; it's used to output the DOC: sections, and + starts by putting out the name of the doc section itself, but that + tends to duplicate a header already in the template file. + """ + + sectionlist = args.get('sectionlist', []) + sections = args.get('sections', {}) + section_start_lines = args.get('section_start_lines', {}) + + for section in sectionlist: + # Skip sections that are in the nosymbol_table + if section in self.nosymbol: + continue + + if out_docblock: + if not self.out_mode == self.OUTPUT_INCLUDE: + self.data += f".. _{section}:\n\n" + self.data += f'{self.lineprefix}**{section}**\n\n' + else: + self.data += f'{self.lineprefix}**{section}**\n\n' + + self.print_lineno(section_start_lines.get(section, 0)) + self.output_highlight(sections[section]) + self.data += "\n" + self.data += "\n" + + def out_doc(self, fname, name, args): + if not self.check_doc(name, args): + return + self.out_section(args, out_docblock=True) + + def out_function(self, fname, name, args): + + oldprefix = self.lineprefix + signature = "" + + func_macro = args.get('func_macro', False) + if func_macro: + signature = args['function'] + else: + if args.get('functiontype'): + signature = args['functiontype'] + " " + signature += args['function'] + " (" + + parameterlist = args.get('parameterlist', []) + parameterdescs = args.get('parameterdescs', {}) + parameterdesc_start_lines = args.get('parameterdesc_start_lines', {}) + + ln = args.get('declaration_start_line', 0) + + count = 0 + for parameter in parameterlist: + if count != 0: + signature += ", " + count += 1 + dtype = args['parametertypes'].get(parameter, "") + + if function_pointer.search(dtype): + signature += function_pointer.group(1) + parameter + function_pointer.group(3) + else: + signature += dtype + + if not func_macro: + signature += ")" + + self.print_lineno(ln) + if args.get('typedef') or not args.get('functiontype'): + self.data += f".. c:macro:: {args['function']}\n\n" + + if args.get('typedef'): + self.data += " **Typedef**: " + self.lineprefix = "" + self.output_highlight(args.get('purpose', "")) + self.data += "\n\n**Syntax**\n\n" + self.data += f" ``{signature}``\n\n" + else: + self.data += f"``{signature}``\n\n" + else: + self.data += f".. c:function:: {signature}\n\n" + + if not args.get('typedef'): + self.print_lineno(ln) + self.lineprefix = " " + self.output_highlight(args.get('purpose', "")) + self.data += "\n" + + # Put descriptive text into a container (HTML <div>) to help set + # function prototypes apart + self.lineprefix = " " + + if parameterlist: + self.data += ".. container:: kernelindent\n\n" + self.data += f"{self.lineprefix}**Parameters**\n\n" + + for parameter in parameterlist: + parameter_name = KernRe(r'\[.*').sub('', parameter) + dtype = args['parametertypes'].get(parameter, "") + + if dtype: + self.data += f"{self.lineprefix}``{dtype}``\n" + else: + self.data += f"{self.lineprefix}``{parameter}``\n" + + self.print_lineno(parameterdesc_start_lines.get(parameter_name, 0)) + + self.lineprefix = " " + if parameter_name in parameterdescs and \ + parameterdescs[parameter_name] != KernelDoc.undescribed: + + self.output_highlight(parameterdescs[parameter_name]) + self.data += "\n" + else: + self.data += f"{self.lineprefix}*undescribed*\n\n" + self.lineprefix = " " + + self.out_section(args) + self.lineprefix = oldprefix + + def out_enum(self, fname, name, args): + + oldprefix = self.lineprefix + name = args.get('enum', '') + parameterlist = args.get('parameterlist', []) + parameterdescs = args.get('parameterdescs', {}) + ln = args.get('declaration_start_line', 0) + + self.data += f"\n\n.. c:enum:: {name}\n\n" + + self.print_lineno(ln) + self.lineprefix = " " + self.output_highlight(args.get('purpose', '')) + self.data += "\n" + + self.data += ".. container:: kernelindent\n\n" + outer = self.lineprefix + " " + self.lineprefix = outer + " " + self.data += f"{outer}**Constants**\n\n" + + for parameter in parameterlist: + self.data += f"{outer}``{parameter}``\n" + + if parameterdescs.get(parameter, '') != KernelDoc.undescribed: + self.output_highlight(parameterdescs[parameter]) + else: + self.data += f"{self.lineprefix}*undescribed*\n\n" + self.data += "\n" + + self.lineprefix = oldprefix + self.out_section(args) + + def out_typedef(self, fname, name, args): + + oldprefix = self.lineprefix + name = args.get('typedef', '') + ln = args.get('declaration_start_line', 0) + + self.data += f"\n\n.. c:type:: {name}\n\n" + + self.print_lineno(ln) + self.lineprefix = " " + + self.output_highlight(args.get('purpose', '')) + + self.data += "\n" + + self.lineprefix = oldprefix + self.out_section(args) + + def out_struct(self, fname, name, args): + + name = args.get('struct', "") + purpose = args.get('purpose', "") + declaration = args.get('definition', "") + dtype = args.get('type', "struct") + ln = args.get('declaration_start_line', 0) + + parameterlist = args.get('parameterlist', []) + parameterdescs = args.get('parameterdescs', {}) + parameterdesc_start_lines = args.get('parameterdesc_start_lines', {}) + + self.data += f"\n\n.. c:{dtype}:: {name}\n\n" + + self.print_lineno(ln) + + oldprefix = self.lineprefix + self.lineprefix += " " + + self.output_highlight(purpose) + self.data += "\n" + + self.data += ".. container:: kernelindent\n\n" + self.data += f"{self.lineprefix}**Definition**::\n\n" + + self.lineprefix = self.lineprefix + " " + + declaration = declaration.replace("\t", self.lineprefix) + + self.data += f"{self.lineprefix}{dtype} {name}" + ' {' + "\n" + self.data += f"{declaration}{self.lineprefix}" + "};\n\n" + + self.lineprefix = " " + self.data += f"{self.lineprefix}**Members**\n\n" + for parameter in parameterlist: + if not parameter or parameter.startswith("#"): + continue + + parameter_name = parameter.split("[", maxsplit=1)[0] + + if parameterdescs.get(parameter_name) == KernelDoc.undescribed: + continue + + self.print_lineno(parameterdesc_start_lines.get(parameter_name, 0)) + + self.data += f"{self.lineprefix}``{parameter}``\n" + + self.lineprefix = " " + self.output_highlight(parameterdescs[parameter_name]) + self.lineprefix = " " + + self.data += "\n" + + self.data += "\n" + + self.lineprefix = oldprefix + self.out_section(args) + + +class ManFormat(OutputFormat): + """Consts and functions used by man pages output""" + + highlights = ( + (type_constant, r"\1"), + (type_constant2, r"\1"), + (type_func, r"\\fB\1\\fP"), + (type_enum, r"\\fI\1\\fP"), + (type_struct, r"\\fI\1\\fP"), + (type_typedef, r"\\fI\1\\fP"), + (type_union, r"\\fI\1\\fP"), + (type_param, r"\\fI\1\\fP"), + (type_param_ref, r"\\fI\1\2\\fP"), + (type_member, r"\\fI\1\2\3\\fP"), + (type_fallback, r"\\fI\1\\fP") + ) + blankline = "" + + date_formats = [ + "%a %b %d %H:%M:%S %Z %Y", + "%a %b %d %H:%M:%S %Y", + "%Y-%m-%d", + "%b %d %Y", + "%B %d %Y", + "%m %d %Y", + ] + + def __init__(self, modulename): + """ + Creates class variables. + + Not really mandatory, but it is a good coding style and makes + pylint happy. + """ + + super().__init__() + self.modulename = modulename + + dt = None + tstamp = os.environ.get("KBUILD_BUILD_TIMESTAMP") + if tstamp: + for fmt in self.date_formats: + try: + dt = datetime.strptime(tstamp, fmt) + break + except ValueError: + pass + + if not dt: + dt = datetime.now() + + self.man_date = dt.strftime("%B %Y") + + def output_highlight(self, block): + """ + Outputs a C symbol that may require being highlighted with + self.highlights variable using troff syntax + """ + + contents = self.highlight_block(block) + + if isinstance(contents, list): + contents = "\n".join(contents) + + for line in contents.strip("\n").split("\n"): + line = KernRe(r"^\s*").sub("", line) + if not line: + continue + + if line[0] == ".": + self.data += "\\&" + line + "\n" + else: + self.data += line + "\n" + + def out_doc(self, fname, name, args): + sectionlist = args.get('sectionlist', []) + sections = args.get('sections', {}) + + if not self.check_doc(name, args): + return + + self.data += f'.TH "{self.modulename}" 9 "{self.modulename}" "{self.man_date}" "API Manual" LINUX' + "\n" + + for section in sectionlist: + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(sections.get(section)) + + def out_function(self, fname, name, args): + """output function in man""" + + parameterlist = args.get('parameterlist', []) + parameterdescs = args.get('parameterdescs', {}) + sectionlist = args.get('sectionlist', []) + sections = args.get('sections', {}) + + self.data += f'.TH "{args["function"]}" 9 "{args["function"]}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"{args['function']} \\- {args['purpose']}\n" + + self.data += ".SH SYNOPSIS\n" + if args.get('functiontype', ''): + self.data += f'.B "{args["functiontype"]}" {args["function"]}' + "\n" + else: + self.data += f'.B "{args["function"]}' + "\n" + + count = 0 + parenth = "(" + post = "," + + for parameter in parameterlist: + if count == len(parameterlist) - 1: + post = ");" + + dtype = args['parametertypes'].get(parameter, "") + if function_pointer.match(dtype): + # Pointer-to-function + self.data += f'".BI "{parenth}{function_pointer.group(1)}" " ") ({function_pointer.group(2)}){post}"' + "\n" + else: + dtype = KernRe(r'([^\*])$').sub(r'\1 ', dtype) + + self.data += f'.BI "{parenth}{dtype}" "{post}"' + "\n" + count += 1 + parenth = "" + + if parameterlist: + self.data += ".SH ARGUMENTS\n" + + for parameter in parameterlist: + parameter_name = re.sub(r'\[.*', '', parameter) + + self.data += f'.IP "{parameter}" 12' + "\n" + self.output_highlight(parameterdescs.get(parameter_name, "")) + + for section in sectionlist: + self.data += f'.SH "{section.upper()}"' + "\n" + self.output_highlight(sections[section]) + + def out_enum(self, fname, name, args): + + name = args.get('enum', '') + parameterlist = args.get('parameterlist', []) + sectionlist = args.get('sectionlist', []) + sections = args.get('sections', {}) + + self.data += f'.TH "{self.modulename}" 9 "enum {args["enum"]}" "{self.man_date}" "API Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"enum {args['enum']} \\- {args['purpose']}\n" + + self.data += ".SH SYNOPSIS\n" + self.data += f"enum {args['enum']}" + " {\n" + + count = 0 + for parameter in parameterlist: + self.data += f'.br\n.BI " {parameter}"' + "\n" + if count == len(parameterlist) - 1: + self.data += "\n};\n" + else: + self.data += ", \n.br\n" + + count += 1 + + self.data += ".SH Constants\n" + + for parameter in parameterlist: + parameter_name = KernRe(r'\[.*').sub('', parameter) + self.data += f'.IP "{parameter}" 12' + "\n" + self.output_highlight(args['parameterdescs'].get(parameter_name, "")) + + for section in sectionlist: + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(sections[section]) + + def out_typedef(self, fname, name, args): + module = self.modulename + typedef = args.get('typedef') + purpose = args.get('purpose') + sectionlist = args.get('sectionlist', []) + sections = args.get('sections', {}) + + self.data += f'.TH "{module}" 9 "{typedef}" "{self.man_date}" "API Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"typedef {typedef} \\- {purpose}\n" + + for section in sectionlist: + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(sections.get(section)) + + def out_struct(self, fname, name, args): + module = self.modulename + struct_type = args.get('type') + struct_name = args.get('struct') + purpose = args.get('purpose') + definition = args.get('definition') + sectionlist = args.get('sectionlist', []) + parameterlist = args.get('parameterlist', []) + sections = args.get('sections', {}) + parameterdescs = args.get('parameterdescs', {}) + + self.data += f'.TH "{module}" 9 "{struct_type} {struct_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"{struct_type} {struct_name} \\- {purpose}\n" + + # Replace tabs with two spaces and handle newlines + declaration = definition.replace("\t", " ") + declaration = KernRe(r"\n").sub('"\n.br\n.BI "', declaration) + + self.data += ".SH SYNOPSIS\n" + self.data += f"{struct_type} {struct_name} " + "{" + "\n.br\n" + self.data += f'.BI "{declaration}\n' + "};\n.br\n\n" + + self.data += ".SH Members\n" + for parameter in parameterlist: + if parameter.startswith("#"): + continue + + parameter_name = re.sub(r"\[.*", "", parameter) + + if parameterdescs.get(parameter_name) == KernelDoc.undescribed: + continue + + self.data += f'.IP "{parameter}" 12' + "\n" + self.output_highlight(parameterdescs.get(parameter_name)) + + for section in sectionlist: + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(sections.get(section)) diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py new file mode 100755 index 000000000000..4f036c720b36 --- /dev/null +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -0,0 +1,1715 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. +# +# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 + +""" +kdoc_parser +=========== + +Read a C language source or header FILE and extract embedded +documentation comments +""" + +import argparse +import re +from pprint import pformat + +from kdoc_re import NestedMatch, KernRe + + +# +# Regular expressions used to parse kernel-doc markups at KernelDoc class. +# +# Let's declare them in lowercase outside any class to make easier to +# convert from the python script. +# +# As those are evaluated at the beginning, no need to cache them +# + +# Allow whitespace at end of comment start. +doc_start = KernRe(r'^/\*\*\s*$', cache=False) + +doc_end = KernRe(r'\*/', cache=False) +doc_com = KernRe(r'\s*\*\s*', cache=False) +doc_com_body = KernRe(r'\s*\* ?', cache=False) +doc_decl = doc_com + KernRe(r'(\w+)', cache=False) + +# @params and a strictly limited set of supported section names +# Specifically: +# Match @word: +# @...: +# @{section-name}: +# while trying to not match literal block starts like "example::" +# +doc_sect = doc_com + \ + KernRe(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$', + flags=re.I, cache=False) + +doc_content = doc_com_body + KernRe(r'(.*)', cache=False) +doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) +doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) +doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) +doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) +doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) +attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", + flags=re.I | re.S, cache=False) + +export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) +export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) + +type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) + + +class KernelDoc: + """ + Read a C language source or header FILE and extract embedded + documentation comments. + """ + + # Parser states + STATE_NORMAL = 0 # normal code + STATE_NAME = 1 # looking for function name + STATE_BODY_MAYBE = 2 # body - or maybe more description + STATE_BODY = 3 # the body of the comment + STATE_BODY_WITH_BLANK_LINE = 4 # the body which has a blank line + STATE_PROTO = 5 # scanning prototype + STATE_DOCBLOCK = 6 # documentation block + STATE_INLINE = 7 # gathering doc outside main block + + st_name = [ + "NORMAL", + "NAME", + "BODY_MAYBE", + "BODY", + "BODY_WITH_BLANK_LINE", + "PROTO", + "DOCBLOCK", + "INLINE", + ] + + # Inline documentation state + STATE_INLINE_NA = 0 # not applicable ($state != STATE_INLINE) + STATE_INLINE_NAME = 1 # looking for member name (@foo:) + STATE_INLINE_TEXT = 2 # looking for member documentation + STATE_INLINE_END = 3 # done + STATE_INLINE_ERROR = 4 # error - Comment without header was found. + # Spit a warning as it's not + # proper kernel-doc and ignore the rest. + + st_inline_name = [ + "", + "_NAME", + "_TEXT", + "_END", + "_ERROR", + ] + + # Section names + + section_default = "Description" # default section + section_intro = "Introduction" + section_context = "Context" + section_return = "Return" + + undescribed = "-- undescribed --" + + def __init__(self, config, fname): + """Initialize internal variables""" + + self.fname = fname + self.config = config + + # Initial state for the state machines + self.state = self.STATE_NORMAL + self.inline_doc_state = self.STATE_INLINE_NA + + # Store entry currently being processed + self.entry = None + + # Place all potential outputs into an array + self.entries = [] + + # TODO: rename to emit_message after removal of kernel-doc.pl + def emit_warning(self, ln, msg, warning=True): + """Emit a message""" + + log_msg = f"{self.fname}:{ln} {msg}" + + if not warning: + self.config.log.info(log_msg) + return + + if self.entry: + # Delegate warning output to output logic, as this way it + # will report warnings/info only for symbols that are output + + self.entry.warnings.append(log_msg) + return + + self.config.log.warning(log_msg) + + def dump_section(self, start_new=True): + """ + Dumps section contents to arrays/hashes intended for that purpose. + """ + + name = self.entry.section + contents = self.entry.contents + + if type_param.match(name): + name = type_param.group(1) + + self.entry.parameterdescs[name] = contents + self.entry.parameterdesc_start_lines[name] = self.entry.new_start_line + + self.entry.sectcheck += name + " " + self.entry.new_start_line = 0 + + elif name == "@...": + name = "..." + self.entry.parameterdescs[name] = contents + self.entry.sectcheck += name + " " + self.entry.parameterdesc_start_lines[name] = self.entry.new_start_line + self.entry.new_start_line = 0 + + else: + if name in self.entry.sections and self.entry.sections[name] != "": + # Only warn on user-specified duplicate section names + if name != self.section_default: + self.emit_warning(self.entry.new_start_line, + f"duplicate section name '{name}'\n") + self.entry.sections[name] += contents + else: + self.entry.sections[name] = contents + self.entry.sectionlist.append(name) + self.entry.section_start_lines[name] = self.entry.new_start_line + self.entry.new_start_line = 0 + +# self.config.log.debug("Section: %s : %s", name, pformat(vars(self.entry))) + + if start_new: + self.entry.section = self.section_default + self.entry.contents = "" + + # TODO: rename it to store_declaration after removal of kernel-doc.pl + def output_declaration(self, dtype, name, **args): + """ + Stores the entry into an entry array. + + The actual output and output filters will be handled elsewhere + """ + + # The implementation here is different than the original kernel-doc: + # instead of checking for output filters or actually output anything, + # it just stores the declaration content at self.entries, as the + # output will happen on a separate class. + # + # For now, we're keeping the same name of the function just to make + # easier to compare the source code of both scripts + + args["declaration_start_line"] = self.entry.declaration_start_line + args["type"] = dtype + args["warnings"] = self.entry.warnings + + # TODO: use colletions.OrderedDict to remove sectionlist + + sections = args.get('sections', {}) + sectionlist = args.get('sectionlist', []) + + # Drop empty sections + # TODO: improve empty sections logic to emit warnings + for section in ["Description", "Return"]: + if section in sectionlist: + if not sections[section].rstrip(): + del sections[section] + sectionlist.remove(section) + + self.entries.append((name, args)) + + self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) + + def reset_state(self, ln): + """ + Ancillary routine to create a new entry. It initializes all + variables used by the state machine. + """ + + self.entry = argparse.Namespace + + self.entry.contents = "" + self.entry.function = "" + self.entry.sectcheck = "" + self.entry.struct_actual = "" + self.entry.prototype = "" + + self.entry.warnings = [] + + self.entry.parameterlist = [] + self.entry.parameterdescs = {} + self.entry.parametertypes = {} + self.entry.parameterdesc_start_lines = {} + + self.entry.section_start_lines = {} + self.entry.sectionlist = [] + self.entry.sections = {} + + self.entry.anon_struct_union = False + + self.entry.leading_space = None + + # State flags + self.state = self.STATE_NORMAL + self.inline_doc_state = self.STATE_INLINE_NA + self.entry.brcount = 0 + + self.entry.in_doc_sect = False + self.entry.declaration_start_line = ln + 1 + + def push_parameter(self, ln, decl_type, param, dtype, + org_arg, declaration_name): + """ + Store parameters and their descriptions at self.entry. + """ + + if self.entry.anon_struct_union and dtype == "" and param == "}": + return # Ignore the ending }; from anonymous struct/union + + self.entry.anon_struct_union = False + + param = KernRe(r'[\[\)].*').sub('', param, count=1) + + if dtype == "" and param.endswith("..."): + if KernRe(r'\w\.\.\.$').search(param): + # For named variable parameters of the form `x...`, + # remove the dots + param = param[:-3] + else: + # Handles unnamed variable parameters + param = "..." + + if param not in self.entry.parameterdescs or \ + not self.entry.parameterdescs[param]: + + self.entry.parameterdescs[param] = "variable arguments" + + elif dtype == "" and (not param or param == "void"): + param = "void" + self.entry.parameterdescs[param] = "no arguments" + + elif dtype == "" and param in ["struct", "union"]: + # Handle unnamed (anonymous) union or struct + dtype = param + param = "{unnamed_" + param + "}" + self.entry.parameterdescs[param] = "anonymous\n" + self.entry.anon_struct_union = True + + # Handle cache group enforcing variables: they do not need + # to be described in header files + elif "__cacheline_group" in param: + # Ignore __cacheline_group_begin and __cacheline_group_end + return + + # Warn if parameter has no description + # (but ignore ones starting with # as these are not parameters + # but inline preprocessor statements) + if param not in self.entry.parameterdescs and not param.startswith("#"): + self.entry.parameterdescs[param] = self.undescribed + + if "." not in param: + if decl_type == 'function': + dname = f"{decl_type} parameter" + else: + dname = f"{decl_type} member" + + self.emit_warning(ln, + f"{dname} '{param}' not described in '{declaration_name}'") + + # Strip spaces from param so that it is one continuous string on + # parameterlist. This fixes a problem where check_sections() + # cannot find a parameter like "addr[6 + 2]" because it actually + # appears as "addr[6", "+", "2]" on the parameter list. + # However, it's better to maintain the param string unchanged for + # output, so just weaken the string compare in check_sections() + # to ignore "[blah" in a parameter string. + + self.entry.parameterlist.append(param) + org_arg = KernRe(r'\s\s+').sub(' ', org_arg) + self.entry.parametertypes[param] = org_arg + + def save_struct_actual(self, actual): + """ + Strip all spaces from the actual param so that it looks like + one string item. + """ + + actual = KernRe(r'\s*').sub("", actual, count=1) + + self.entry.struct_actual += actual + " " + + def create_parameter_list(self, ln, decl_type, args, + splitter, declaration_name): + """ + Creates a list of parameters, storing them at self.entry. + """ + + # temporarily replace all commas inside function pointer definition + arg_expr = KernRe(r'(\([^\),]+),') + while arg_expr.search(args): + args = arg_expr.sub(r"\1#", args) + + for arg in args.split(splitter): + # Strip comments + arg = KernRe(r'\/\*.*\*\/').sub('', arg) + + # Ignore argument attributes + arg = KernRe(r'\sPOS0?\s').sub(' ', arg) + + # Strip leading/trailing spaces + arg = arg.strip() + arg = KernRe(r'\s+').sub(' ', arg, count=1) + + if arg.startswith('#'): + # Treat preprocessor directive as a typeless variable just to fill + # corresponding data structures "correctly". Catch it later in + # output_* subs. + + # Treat preprocessor directive as a typeless variable + self.push_parameter(ln, decl_type, arg, "", + "", declaration_name) + + elif KernRe(r'\(.+\)\s*\(').search(arg): + # Pointer-to-function + + arg = arg.replace('#', ',') + + r = KernRe(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') + if r.match(arg): + param = r.group(1) + else: + self.emit_warning(ln, f"Invalid param: {arg}") + param = arg + + dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) + self.save_struct_actual(param) + self.push_parameter(ln, decl_type, param, dtype, + arg, declaration_name) + + elif KernRe(r'\(.+\)\s*\[').search(arg): + # Array-of-pointers + + arg = arg.replace('#', ',') + r = KernRe(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') + if r.match(arg): + param = r.group(1) + else: + self.emit_warning(ln, f"Invalid param: {arg}") + param = arg + + dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) + + self.save_struct_actual(param) + self.push_parameter(ln, decl_type, param, dtype, + arg, declaration_name) + + elif arg: + arg = KernRe(r'\s*:\s*').sub(":", arg) + arg = KernRe(r'\s*\[').sub('[', arg) + + args = KernRe(r'\s*,\s*').split(arg) + if args[0] and '*' in args[0]: + args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) + + first_arg = [] + r = KernRe(r'^(.*\s+)(.*?\[.*\].*)$') + if args[0] and r.match(args[0]): + args.pop(0) + first_arg.extend(r.group(1)) + first_arg.append(r.group(2)) + else: + first_arg = KernRe(r'\s+').split(args.pop(0)) + + args.insert(0, first_arg.pop()) + dtype = ' '.join(first_arg) + + for param in args: + if KernRe(r'^(\*+)\s*(.*)').match(param): + r = KernRe(r'^(\*+)\s*(.*)') + if not r.match(param): + self.emit_warning(ln, f"Invalid param: {param}") + continue + + param = r.group(1) + + self.save_struct_actual(r.group(2)) + self.push_parameter(ln, decl_type, r.group(2), + f"{dtype} {r.group(1)}", + arg, declaration_name) + + elif KernRe(r'(.*?):(\w+)').search(param): + r = KernRe(r'(.*?):(\w+)') + if not r.match(param): + self.emit_warning(ln, f"Invalid param: {param}") + continue + + if dtype != "": # Skip unnamed bit-fields + self.save_struct_actual(r.group(1)) + self.push_parameter(ln, decl_type, r.group(1), + f"{dtype}:{r.group(2)}", + arg, declaration_name) + else: + self.save_struct_actual(param) + self.push_parameter(ln, decl_type, param, dtype, + arg, declaration_name) + + def check_sections(self, ln, decl_name, decl_type, sectcheck, prmscheck): + """ + Check for errors inside sections, emitting warnings if not found + parameters are described. + """ + + sects = sectcheck.split() + prms = prmscheck.split() + err = False + + for sx in range(len(sects)): # pylint: disable=C0200 + err = True + for px in range(len(prms)): # pylint: disable=C0200 + prm_clean = prms[px] + prm_clean = KernRe(r'\[.*\]').sub('', prm_clean) + prm_clean = attribute.sub('', prm_clean) + + # ignore array size in a parameter string; + # however, the original param string may contain + # spaces, e.g.: addr[6 + 2] + # and this appears in @prms as "addr[6" since the + # parameter list is split at spaces; + # hence just ignore "[..." for the sections check; + prm_clean = KernRe(r'\[.*').sub('', prm_clean) + + if prm_clean == sects[sx]: + err = False + break + + if err: + if decl_type == 'function': + dname = f"{decl_type} parameter" + else: + dname = f"{decl_type} member" + + self.emit_warning(ln, + f"Excess {dname} '{sects[sx]}' description in '{decl_name}'") + + def check_return_section(self, ln, declaration_name, return_type): + """ + If the function doesn't return void, warns about the lack of a + return description. + """ + + if not self.config.wreturn: + return + + # Ignore an empty return type (It's a macro) + # Ignore functions with a "void" return type (but not "void *") + if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): + return + + if not self.entry.sections.get("Return", None): + self.emit_warning(ln, + f"No description found for return value of '{declaration_name}'") + + def dump_struct(self, ln, proto): + """ + Store an entry for an struct or union + """ + + type_pattern = r'(struct|union)' + + qualifiers = [ + "__attribute__", + "__packed", + "__aligned", + "____cacheline_aligned_in_smp", + "____cacheline_aligned", + ] + + definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" + struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') + + # Extract struct/union definition + members = None + declaration_name = None + decl_type = None + + r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) + if r.search(proto): + decl_type = r.group(1) + declaration_name = r.group(2) + members = r.group(3) + else: + r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') + + if r.search(proto): + decl_type = r.group(1) + declaration_name = r.group(3) + members = r.group(2) + + if not members: + self.emit_warning(ln, f"{proto} error: Cannot parse struct or union!") + return + + if self.entry.identifier != declaration_name: + self.emit_warning(ln, + f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") + return + + args_pattern = r'([^,)]+)' + + sub_prefixes = [ + (KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), + (KernRe(r'\/\*\s*private:.*', re.S | re.I), ''), + + # Strip comments + (KernRe(r'\/\*.*?\*\/', re.S), ''), + + # Strip attributes + (attribute, ' '), + (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__packed\s*', re.S), ' '), + (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned', re.S), ' '), + + # Unwrap struct_group macros based on this definition: + # __struct_group(TAG, NAME, ATTRS, MEMBERS...) + # which has variants like: struct_group(NAME, MEMBERS...) + # Only MEMBERS arguments require documentation. + # + # Parsing them happens on two steps: + # + # 1. drop struct group arguments that aren't at MEMBERS, + # storing them as STRUCT_GROUP(MEMBERS) + # + # 2. remove STRUCT_GROUP() ancillary macro. + # + # The original logic used to remove STRUCT_GROUP() using an + # advanced regex: + # + # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; + # + # with two patterns that are incompatible with + # Python re module, as it has: + # + # - a recursive pattern: (?1) + # - an atomic grouping: (?>...) + # + # I tried a simpler version: but it didn't work either: + # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; + # + # As it doesn't properly match the end parenthesis on some cases. + # + # So, a better solution was crafted: there's now a NestedMatch + # class that ensures that delimiters after a search are properly + # matched. So, the implementation to drop STRUCT_GROUP() will be + # handled in separate. + + (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), + (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), + + # Replace macros + # + # TODO: use NestedMatch for FOO($1, $2, ...) matches + # + # it is better to also move those to the NestedMatch logic, + # to ensure that parenthesis will be properly matched. + + (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), + (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), + (KernRe(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), + (KernRe(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), + (KernRe(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), + (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), + (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), + ] + + # Regexes here are guaranteed to have the end limiter matching + # the start delimiter. Yet, right now, only one replace group + # is allowed. + + sub_nested_prefixes = [ + (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), + ] + + for search, sub in sub_prefixes: + members = search.sub(sub, members) + + nested = NestedMatch() + + for search, sub in sub_nested_prefixes: + members = nested.sub(search, sub, members) + + # Keeps the original declaration as-is + declaration = members + + # Split nested struct/union elements + # + # This loop was simpler at the original kernel-doc perl version, as + # while ($members =~ m/$struct_members/) { ... } + # reads 'members' string on each interaction. + # + # Python behavior is different: it parses 'members' only once, + # creating a list of tuples from the first interaction. + # + # On other words, this won't get nested structs. + # + # So, we need to have an extra loop on Python to override such + # re limitation. + + while True: + tuples = struct_members.findall(members) + if not tuples: + break + + for t in tuples: + newmember = "" + maintype = t[0] + s_ids = t[5] + content = t[3] + + oldmember = "".join(t) + + for s_id in s_ids.split(','): + s_id = s_id.strip() + + newmember += f"{maintype} {s_id}; " + s_id = KernRe(r'[:\[].*').sub('', s_id) + s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) + + for arg in content.split(';'): + arg = arg.strip() + + if not arg: + continue + + r = KernRe(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') + if r.match(arg): + # Pointer-to-function + dtype = r.group(1) + name = r.group(2) + extra = r.group(3) + + if not name: + continue + + if not s_id: + # Anonymous struct/union + newmember += f"{dtype}{name}{extra}; " + else: + newmember += f"{dtype}{s_id}.{name}{extra}; " + + else: + arg = arg.strip() + # Handle bitmaps + arg = KernRe(r':\s*\d+\s*').sub('', arg) + + # Handle arrays + arg = KernRe(r'\[.*\]').sub('', arg) + + # Handle multiple IDs + arg = KernRe(r'\s*,\s*').sub(',', arg) + + r = KernRe(r'(.*)\s+([\S+,]+)') + + if r.search(arg): + dtype = r.group(1) + names = r.group(2) + else: + newmember += f"{arg}; " + continue + + for name in names.split(','): + name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() + + if not name: + continue + + if not s_id: + # Anonymous struct/union + newmember += f"{dtype} {name}; " + else: + newmember += f"{dtype} {s_id}.{name}; " + + members = members.replace(oldmember, newmember) + + # Ignore other nested elements, like enums + members = re.sub(r'(\{[^\{\}]*\})', '', members) + + self.create_parameter_list(ln, decl_type, members, ';', + declaration_name) + self.check_sections(ln, declaration_name, decl_type, + self.entry.sectcheck, self.entry.struct_actual) + + # Adjust declaration for better display + declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) + declaration = KernRe(r'\}\s+;').sub('};', declaration) + + # Better handle inlined enums + while True: + r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') + if not r.search(declaration): + break + + declaration = r.sub(r'\1,\n\2', declaration) + + def_args = declaration.split('\n') + level = 1 + declaration = "" + for clause in def_args: + + clause = clause.strip() + clause = KernRe(r'\s+').sub(' ', clause, count=1) + + if not clause: + continue + + if '}' in clause and level > 1: + level -= 1 + + if not KernRe(r'^\s*#').match(clause): + declaration += "\t" * level + + declaration += "\t" + clause + "\n" + if "{" in clause and "}" not in clause: + level += 1 + + self.output_declaration(decl_type, declaration_name, + struct=declaration_name, + definition=declaration, + parameterlist=self.entry.parameterlist, + parameterdescs=self.entry.parameterdescs, + parametertypes=self.entry.parametertypes, + parameterdesc_start_lines=self.entry.parameterdesc_start_lines, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, + purpose=self.entry.declaration_purpose) + + def dump_enum(self, ln, proto): + """ + Stores an enum inside self.entries array. + """ + + # Ignore members marked private + proto = KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) + proto = KernRe(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) + + # Strip comments + proto = KernRe(r'\/\*.*?\*\/', flags=re.S).sub('', proto) + + # Strip #define macros inside enums + proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) + + members = None + declaration_name = None + + r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') + if r.search(proto): + declaration_name = r.group(2) + members = r.group(1).rstrip() + else: + r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') + if r.match(proto): + declaration_name = r.group(1) + members = r.group(2).rstrip() + + if not members: + self.emit_warning(ln, f"{proto}: error: Cannot parse enum!") + return + + if self.entry.identifier != declaration_name: + if self.entry.identifier == "": + self.emit_warning(ln, + f"{proto}: wrong kernel-doc identifier on prototype") + else: + self.emit_warning(ln, + f"expecting prototype for enum {self.entry.identifier}. Prototype was for enum {declaration_name} instead") + return + + if not declaration_name: + declaration_name = "(anonymous)" + + member_set = set() + + members = KernRe(r'\([^;]*?[\)]').sub('', members) + + for arg in members.split(','): + if not arg: + continue + arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) + self.entry.parameterlist.append(arg) + if arg not in self.entry.parameterdescs: + self.entry.parameterdescs[arg] = self.undescribed + self.emit_warning(ln, + f"Enum value '{arg}' not described in enum '{declaration_name}'") + member_set.add(arg) + + for k in self.entry.parameterdescs: + if k not in member_set: + self.emit_warning(ln, + f"Excess enum value '%{k}' description in '{declaration_name}'") + + self.output_declaration('enum', declaration_name, + enum=declaration_name, + parameterlist=self.entry.parameterlist, + parameterdescs=self.entry.parameterdescs, + parameterdesc_start_lines=self.entry.parameterdesc_start_lines, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, + purpose=self.entry.declaration_purpose) + + def dump_declaration(self, ln, prototype): + """ + Stores a data declaration inside self.entries array. + """ + + if self.entry.decl_type == "enum": + self.dump_enum(ln, prototype) + return + + if self.entry.decl_type == "typedef": + self.dump_typedef(ln, prototype) + return + + if self.entry.decl_type in ["union", "struct"]: + self.dump_struct(ln, prototype) + return + + self.output_declaration(self.entry.decl_type, prototype, + entry=self.entry) + + def dump_function(self, ln, prototype): + """ + Stores a function of function macro inside self.entries array. + """ + + func_macro = False + return_type = '' + decl_type = 'function' + + # Prefixes that would be removed + sub_prefixes = [ + (r"^static +", "", 0), + (r"^extern +", "", 0), + (r"^asmlinkage +", "", 0), + (r"^inline +", "", 0), + (r"^__inline__ +", "", 0), + (r"^__inline +", "", 0), + (r"^__always_inline +", "", 0), + (r"^noinline +", "", 0), + (r"^__FORTIFY_INLINE +", "", 0), + (r"__init +", "", 0), + (r"__init_or_module +", "", 0), + (r"__deprecated +", "", 0), + (r"__flatten +", "", 0), + (r"__meminit +", "", 0), + (r"__must_check +", "", 0), + (r"__weak +", "", 0), + (r"__sched +", "", 0), + (r"_noprof", "", 0), + (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0), + (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", 0), + (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0), + (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2", 0), + (r"__attribute_const__ +", "", 0), + + # It seems that Python support for re.X is broken: + # At least for me (Python 3.13), this didn't work +# (r""" +# __attribute__\s*\(\( +# (?: +# [\w\s]+ # attribute name +# (?:\([^)]*\))? # attribute arguments +# \s*,? # optional comma at the end +# )+ +# \)\)\s+ +# """, "", re.X), + + # So, remove whitespaces and comments from it + (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+", "", 0), + ] + + for search, sub, flags in sub_prefixes: + prototype = KernRe(search, flags).sub(sub, prototype) + + # Macros are a special case, as they change the prototype format + new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) + if new_proto != prototype: + is_define_proto = True + prototype = new_proto + else: + is_define_proto = False + + # Yes, this truly is vile. We are looking for: + # 1. Return type (may be nothing if we're looking at a macro) + # 2. Function name + # 3. Function parameters. + # + # All the while we have to watch out for function pointer parameters + # (which IIRC is what the two sections are for), C types (these + # regexps don't even start to express all the possibilities), and + # so on. + # + # If you mess with these regexps, it's a good idea to check that + # the following functions' documentation still comes out right: + # - parport_register_device (function pointer parameters) + # - atomic_set (macro) + # - pci_match_device, __copy_to_user (long return type) + + name = r'[a-zA-Z0-9_~:]+' + prototype_end1 = r'[^\(]*' + prototype_end2 = r'[^\{]*' + prototype_end = fr'\(({prototype_end1}|{prototype_end2})\)' + + # Besides compiling, Perl qr{[\w\s]+} works as a non-capturing group. + # So, this needs to be mapped in Python with (?:...)? or (?:...)+ + + type1 = r'(?:[\w\s]+)?' + type2 = r'(?:[\w\s]+\*+)+' + + found = False + + if is_define_proto: + r = KernRe(r'^()(' + name + r')\s+') + + if r.search(prototype): + return_type = '' + declaration_name = r.group(2) + func_macro = True + + found = True + + if not found: + patterns = [ + rf'^()({name})\s*{prototype_end}', + rf'^({type1})\s+({name})\s*{prototype_end}', + rf'^({type2})\s*({name})\s*{prototype_end}', + ] + + for p in patterns: + r = KernRe(p) + + if r.match(prototype): + + return_type = r.group(1) + declaration_name = r.group(2) + args = r.group(3) + + self.create_parameter_list(ln, decl_type, args, ',', + declaration_name) + + found = True + break + if not found: + self.emit_warning(ln, + f"cannot understand function prototype: '{prototype}'") + return + + if self.entry.identifier != declaration_name: + self.emit_warning(ln, + f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead") + return + + prms = " ".join(self.entry.parameterlist) + self.check_sections(ln, declaration_name, "function", + self.entry.sectcheck, prms) + + self.check_return_section(ln, declaration_name, return_type) + + if 'typedef' in return_type: + self.output_declaration(decl_type, declaration_name, + function=declaration_name, + typedef=True, + functiontype=return_type, + parameterlist=self.entry.parameterlist, + parameterdescs=self.entry.parameterdescs, + parametertypes=self.entry.parametertypes, + parameterdesc_start_lines=self.entry.parameterdesc_start_lines, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, + purpose=self.entry.declaration_purpose, + func_macro=func_macro) + else: + self.output_declaration(decl_type, declaration_name, + function=declaration_name, + typedef=False, + functiontype=return_type, + parameterlist=self.entry.parameterlist, + parameterdescs=self.entry.parameterdescs, + parametertypes=self.entry.parametertypes, + parameterdesc_start_lines=self.entry.parameterdesc_start_lines, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, + purpose=self.entry.declaration_purpose, + func_macro=func_macro) + + def dump_typedef(self, ln, proto): + """ + Stores a typedef inside self.entries array. + """ + + typedef_type = r'((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' + typedef_ident = r'\*?\s*(\w\S+)\s*' + typedef_args = r'\s*\((.*)\);' + + typedef1 = KernRe(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) + typedef2 = KernRe(r'typedef' + typedef_type + typedef_ident + typedef_args) + + # Strip comments + proto = KernRe(r'/\*.*?\*/', flags=re.S).sub('', proto) + + # Parse function typedef prototypes + for r in [typedef1, typedef2]: + if not r.match(proto): + continue + + return_type = r.group(1).strip() + declaration_name = r.group(2) + args = r.group(3) + + if self.entry.identifier != declaration_name: + self.emit_warning(ln, + f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") + return + + decl_type = 'function' + self.create_parameter_list(ln, decl_type, args, ',', declaration_name) + + self.output_declaration(decl_type, declaration_name, + function=declaration_name, + typedef=True, + functiontype=return_type, + parameterlist=self.entry.parameterlist, + parameterdescs=self.entry.parameterdescs, + parametertypes=self.entry.parametertypes, + parameterdesc_start_lines=self.entry.parameterdesc_start_lines, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, + purpose=self.entry.declaration_purpose) + return + + # Handle nested parentheses or brackets + r = KernRe(r'(\(*.\)\s*|\[*.\]\s*);$') + while r.search(proto): + proto = r.sub('', proto) + + # Parse simple typedefs + r = KernRe(r'typedef.*\s+(\w+)\s*;') + if r.match(proto): + declaration_name = r.group(1) + + if self.entry.identifier != declaration_name: + self.emit_warning(ln, f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") + return + + self.output_declaration('typedef', declaration_name, + typedef=declaration_name, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, + purpose=self.entry.declaration_purpose) + return + + self.emit_warning(ln, "error: Cannot parse typedef!") + + @staticmethod + def process_export(function_set, line): + """ + process EXPORT_SYMBOL* tags + + This method doesn't use any variable from the class, so declare it + with a staticmethod decorator. + """ + + # Note: it accepts only one EXPORT_SYMBOL* per line, as having + # multiple export lines would violate Kernel coding style. + + if export_symbol.search(line): + symbol = export_symbol.group(2) + function_set.add(symbol) + return + + if export_symbol_ns.search(line): + symbol = export_symbol_ns.group(2) + function_set.add(symbol) + + def process_normal(self, ln, line): + """ + STATE_NORMAL: looking for the /** to begin everything. + """ + + if not doc_start.match(line): + return + + # start a new entry + self.reset_state(ln) + self.entry.in_doc_sect = False + + # next line is always the function name + self.state = self.STATE_NAME + + def process_name(self, ln, line): + """ + STATE_NAME: Looking for the "name - description" line + """ + + if doc_block.search(line): + self.entry.new_start_line = ln + + if not doc_block.group(1): + self.entry.section = self.section_intro + else: + self.entry.section = doc_block.group(1) + + self.entry.identifier = self.entry.section + self.state = self.STATE_DOCBLOCK + return + + if doc_decl.search(line): + self.entry.identifier = doc_decl.group(1) + self.entry.is_kernel_comment = False + + decl_start = str(doc_com) # comment block asterisk + fn_type = r"(?:\w+\s*\*\s*)?" # type (for non-functions) + parenthesis = r"(?:\(\w*\))?" # optional parenthesis on function + decl_end = r"(?:[-:].*)" # end of the name part + + # test for pointer declaration type, foo * bar() - desc + r = KernRe(fr"^{decl_start}([\w\s]+?){parenthesis}?\s*{decl_end}?$") + if r.search(line): + self.entry.identifier = r.group(1) + + # Test for data declaration + r = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)") + if r.search(line): + self.entry.decl_type = r.group(1) + self.entry.identifier = r.group(2) + self.entry.is_kernel_comment = True + else: + # Look for foo() or static void foo() - description; + # or misspelt identifier + + r1 = KernRe(fr"^{decl_start}{fn_type}(\w+)\s*{parenthesis}\s*{decl_end}?$") + r2 = KernRe(fr"^{decl_start}{fn_type}(\w+[^-:]*){parenthesis}\s*{decl_end}$") + + for r in [r1, r2]: + if r.search(line): + self.entry.identifier = r.group(1) + self.entry.decl_type = "function" + + r = KernRe(r"define\s+") + self.entry.identifier = r.sub("", self.entry.identifier) + self.entry.is_kernel_comment = True + break + + self.entry.identifier = self.entry.identifier.strip(" ") + + self.state = self.STATE_BODY + + # if there's no @param blocks need to set up default section here + self.entry.section = self.section_default + self.entry.new_start_line = ln + 1 + + r = KernRe("[-:](.*)") + if r.search(line): + # strip leading/trailing/multiple spaces + self.entry.descr = r.group(1).strip(" ") + + r = KernRe(r"\s+") + self.entry.descr = r.sub(" ", self.entry.descr) + self.entry.declaration_purpose = self.entry.descr + self.state = self.STATE_BODY_MAYBE + else: + self.entry.declaration_purpose = "" + + if not self.entry.is_kernel_comment: + self.emit_warning(ln, + f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") + self.state = self.STATE_NORMAL + + if not self.entry.declaration_purpose and self.config.wshort_desc: + self.emit_warning(ln, + f"missing initial short description on line:\n{line}") + + if not self.entry.identifier and self.entry.decl_type != "enum": + self.emit_warning(ln, + f"wrong kernel-doc identifier on line:\n{line}") + self.state = self.STATE_NORMAL + + if self.config.verbose: + self.emit_warning(ln, + f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", + warning=False) + + return + + # Failed to find an identifier. Emit a warning + self.emit_warning(ln, f"Cannot find identifier on line:\n{line}") + + def process_body(self, ln, line): + """ + STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment. + """ + + if self.state == self.STATE_BODY_WITH_BLANK_LINE: + r = KernRe(r"\s*\*\s?\S") + if r.match(line): + self.dump_section() + self.entry.section = self.section_default + self.entry.new_start_line = ln + self.entry.contents = "" + + if doc_sect.search(line): + self.entry.in_doc_sect = True + newsection = doc_sect.group(1) + + if newsection.lower() in ["description", "context"]: + newsection = newsection.title() + + # Special case: @return is a section, not a param description + if newsection.lower() in ["@return", "@returns", + "return", "returns"]: + newsection = "Return" + + # Perl kernel-doc has a check here for contents before sections. + # the logic there is always false, as in_doc_sect variable is + # always true. So, just don't implement Wcontents_before_sections + + # .title() + newcontents = doc_sect.group(2) + if not newcontents: + newcontents = "" + + if self.entry.contents.strip("\n"): + self.dump_section() + + self.entry.new_start_line = ln + self.entry.section = newsection + self.entry.leading_space = None + + self.entry.contents = newcontents.lstrip() + if self.entry.contents: + self.entry.contents += "\n" + + self.state = self.STATE_BODY + return + + if doc_end.search(line): + self.dump_section() + + # Look for doc_com + <text> + doc_end: + r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') + if r.match(line): + self.emit_warning(ln, f"suspicious ending line: {line}") + + self.entry.prototype = "" + self.entry.new_start_line = ln + 1 + + self.state = self.STATE_PROTO + return + + if doc_content.search(line): + cont = doc_content.group(1) + + if cont == "": + if self.entry.section == self.section_context: + self.dump_section() + + self.entry.new_start_line = ln + self.state = self.STATE_BODY + else: + if self.entry.section != self.section_default: + self.state = self.STATE_BODY_WITH_BLANK_LINE + else: + self.state = self.STATE_BODY + + self.entry.contents += "\n" + + elif self.state == self.STATE_BODY_MAYBE: + + # Continued declaration purpose + self.entry.declaration_purpose = self.entry.declaration_purpose.rstrip() + self.entry.declaration_purpose += " " + cont + + r = KernRe(r"\s+") + self.entry.declaration_purpose = r.sub(' ', + self.entry.declaration_purpose) + + else: + if self.entry.section.startswith('@') or \ + self.entry.section == self.section_context: + if self.entry.leading_space is None: + r = KernRe(r'^(\s+)') + if r.match(cont): + self.entry.leading_space = len(r.group(1)) + else: + self.entry.leading_space = 0 + + # Double-check if leading space are realy spaces + pos = 0 + for i in range(0, self.entry.leading_space): + if cont[i] != " ": + break + pos += 1 + + cont = cont[pos:] + + # NEW LOGIC: + # In case it is different, update it + if self.entry.leading_space != pos: + self.entry.leading_space = pos + + self.entry.contents += cont + "\n" + return + + # Unknown line, ignore + self.emit_warning(ln, f"bad line: {line}") + + def process_inline(self, ln, line): + """STATE_INLINE: docbook comments within a prototype.""" + + if self.inline_doc_state == self.STATE_INLINE_NAME and \ + doc_inline_sect.search(line): + self.entry.section = doc_inline_sect.group(1) + self.entry.new_start_line = ln + + self.entry.contents = doc_inline_sect.group(2).lstrip() + if self.entry.contents != "": + self.entry.contents += "\n" + + self.inline_doc_state = self.STATE_INLINE_TEXT + # Documentation block end */ + return + + if doc_inline_end.search(line): + if self.entry.contents not in ["", "\n"]: + self.dump_section() + + self.state = self.STATE_PROTO + self.inline_doc_state = self.STATE_INLINE_NA + return + + if doc_content.search(line): + if self.inline_doc_state == self.STATE_INLINE_TEXT: + self.entry.contents += doc_content.group(1) + "\n" + if not self.entry.contents.strip(" ").rstrip("\n"): + self.entry.contents = "" + + elif self.inline_doc_state == self.STATE_INLINE_NAME: + self.emit_warning(ln, + f"Incorrect use of kernel-doc format: {line}") + + self.inline_doc_state = self.STATE_INLINE_ERROR + + def syscall_munge(self, ln, proto): # pylint: disable=W0613 + """ + Handle syscall definitions + """ + + is_void = False + + # Strip newlines/CR's + proto = re.sub(r'[\r\n]+', ' ', proto) + + # Check if it's a SYSCALL_DEFINE0 + if 'SYSCALL_DEFINE0' in proto: + is_void = True + + # Replace SYSCALL_DEFINE with correct return type & function name + proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) + + r = KernRe(r'long\s+(sys_.*?),') + if r.search(proto): + proto = KernRe(',').sub('(', proto, count=1) + elif is_void: + proto = KernRe(r'\)').sub('(void)', proto, count=1) + + # Now delete all of the odd-numbered commas in the proto + # so that argument types & names don't have a comma between them + count = 0 + length = len(proto) + + if is_void: + length = 0 # skip the loop if is_void + + for ix in range(length): + if proto[ix] == ',': + count += 1 + if count % 2 == 1: + proto = proto[:ix] + ' ' + proto[ix + 1:] + + return proto + + def tracepoint_munge(self, ln, proto): + """ + Handle tracepoint definitions + """ + + tracepointname = None + tracepointargs = None + + # Match tracepoint name based on different patterns + r = KernRe(r'TRACE_EVENT\((.*?),') + if r.search(proto): + tracepointname = r.group(1) + + r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') + if r.search(proto): + tracepointname = r.group(1) + + r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') + if r.search(proto): + tracepointname = r.group(2) + + if tracepointname: + tracepointname = tracepointname.lstrip() + + r = KernRe(r'TP_PROTO\((.*?)\)') + if r.search(proto): + tracepointargs = r.group(1) + + if not tracepointname or not tracepointargs: + self.emit_warning(ln, + f"Unrecognized tracepoint format:\n{proto}\n") + else: + proto = f"static inline void trace_{tracepointname}({tracepointargs})" + self.entry.identifier = f"trace_{self.entry.identifier}" + + return proto + + def process_proto_function(self, ln, line): + """Ancillary routine to process a function prototype""" + + # strip C99-style comments to end of line + r = KernRe(r"\/\/.*$", re.S) + line = r.sub('', line) + + if KernRe(r'\s*#\s*define').match(line): + self.entry.prototype = line + elif line.startswith('#'): + # Strip other macros like #ifdef/#ifndef/#endif/... + pass + else: + r = KernRe(r'([^\{]*)') + if r.match(line): + self.entry.prototype += r.group(1) + " " + + if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): + # strip comments + r = KernRe(r'/\*.*?\*/') + self.entry.prototype = r.sub('', self.entry.prototype) + + # strip newlines/cr's + r = KernRe(r'[\r\n]+') + self.entry.prototype = r.sub(' ', self.entry.prototype) + + # strip leading spaces + r = KernRe(r'^\s+') + self.entry.prototype = r.sub('', self.entry.prototype) + + # Handle self.entry.prototypes for function pointers like: + # int (*pcs_config)(struct foo) + + r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') + self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) + + if 'SYSCALL_DEFINE' in self.entry.prototype: + self.entry.prototype = self.syscall_munge(ln, + self.entry.prototype) + + r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') + if r.search(self.entry.prototype): + self.entry.prototype = self.tracepoint_munge(ln, + self.entry.prototype) + + self.dump_function(ln, self.entry.prototype) + self.reset_state(ln) + + def process_proto_type(self, ln, line): + """Ancillary routine to process a type""" + + # Strip newlines/cr's. + line = KernRe(r'[\r\n]+', re.S).sub(' ', line) + + # Strip leading spaces + line = KernRe(r'^\s+', re.S).sub('', line) + + # Strip trailing spaces + line = KernRe(r'\s+$', re.S).sub('', line) + + # Strip C99-style comments to the end of the line + line = KernRe(r"\/\/.*$", re.S).sub('', line) + + # To distinguish preprocessor directive from regular declaration later. + if line.startswith('#'): + line += ";" + + r = KernRe(r'([^\{\};]*)([\{\};])(.*)') + while True: + if r.search(line): + if self.entry.prototype: + self.entry.prototype += " " + self.entry.prototype += r.group(1) + r.group(2) + + self.entry.brcount += r.group(2).count('{') + self.entry.brcount -= r.group(2).count('}') + + self.entry.brcount = max(self.entry.brcount, 0) + + if r.group(2) == ';' and self.entry.brcount == 0: + self.dump_declaration(ln, self.entry.prototype) + self.reset_state(ln) + break + + line = r.group(3) + else: + self.entry.prototype += line + break + + def process_proto(self, ln, line): + """STATE_PROTO: reading a function/whatever prototype.""" + + if doc_inline_oneline.search(line): + self.entry.section = doc_inline_oneline.group(1) + self.entry.contents = doc_inline_oneline.group(2) + + if self.entry.contents != "": + self.entry.contents += "\n" + self.dump_section(start_new=False) + + elif doc_inline_start.search(line): + self.state = self.STATE_INLINE + self.inline_doc_state = self.STATE_INLINE_NAME + + elif self.entry.decl_type == 'function': + self.process_proto_function(ln, line) + + else: + self.process_proto_type(ln, line) + + def process_docblock(self, ln, line): + """STATE_DOCBLOCK: within a DOC: block.""" + + if doc_end.search(line): + self.dump_section() + self.output_declaration("doc", self.entry.identifier, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines) + self.reset_state(ln) + + elif doc_content.search(line): + self.entry.contents += doc_content.group(1) + "\n" + + def parse_export(self): + """ + Parses EXPORT_SYMBOL* macros from a single Kernel source file. + """ + + export_table = set() + + try: + with open(self.fname, "r", encoding="utf8", + errors="backslashreplace") as fp: + + for line in fp: + self.process_export(export_table, line) + + except IOError: + return None + + return export_table + + def parse_kdoc(self): + """ + Open and process each line of a C source file. + The parsing is controlled via a state machine, and the line is passed + to a different process function depending on the state. The process + function may update the state as needed. + + Besides parsing kernel-doc tags, it also parses export symbols. + """ + + cont = False + prev = "" + prev_ln = None + export_table = set() + + try: + with open(self.fname, "r", encoding="utf8", + errors="backslashreplace") as fp: + for ln, line in enumerate(fp): + + line = line.expandtabs().strip("\n") + + # Group continuation lines on prototypes + if self.state == self.STATE_PROTO: + if line.endswith("\\"): + prev += line.rstrip("\\") + cont = True + + if not prev_ln: + prev_ln = ln + + continue + + if cont: + ln = prev_ln + line = prev + line + prev = "" + cont = False + prev_ln = None + + self.config.log.debug("%d %s%s: %s", + ln, self.st_name[self.state], + self.st_inline_name[self.inline_doc_state], + line) + + # This is an optimization over the original script. + # There, when export_file was used for the same file, + # it was read twice. Here, we use the already-existing + # loop to parse exported symbols as well. + # + # TODO: It should be noticed that not all states are + # needed here. On a future cleanup, process export only + # at the states that aren't handling comment markups. + self.process_export(export_table, line) + + # Hand this line to the appropriate state handler + if self.state == self.STATE_NORMAL: + self.process_normal(ln, line) + elif self.state == self.STATE_NAME: + self.process_name(ln, line) + elif self.state in [self.STATE_BODY, self.STATE_BODY_MAYBE, + self.STATE_BODY_WITH_BLANK_LINE]: + self.process_body(ln, line) + elif self.state == self.STATE_INLINE: # scanning for inline parameters + self.process_inline(ln, line) + elif self.state == self.STATE_PROTO: + self.process_proto(ln, line) + elif self.state == self.STATE_DOCBLOCK: + self.process_docblock(ln, line) + except OSError: + self.config.log.error(f"Error: Cannot open file {self.fname}") + + return export_table, self.entries diff --git a/scripts/lib/kdoc/kdoc_re.py b/scripts/lib/kdoc/kdoc_re.py new file mode 100755 index 000000000000..e81695b273bf --- /dev/null +++ b/scripts/lib/kdoc/kdoc_re.py @@ -0,0 +1,273 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. + +""" +Regular expression ancillary classes. + +Those help caching regular expressions and do matching for kernel-doc. +""" + +import re + +# Local cache for regular expressions +re_cache = {} + + +class KernRe: + """ + Helper class to simplify regex declaration and usage, + + It calls re.compile for a given pattern. It also allows adding + regular expressions and define sub at class init time. + + Regular expressions can be cached via an argument, helping to speedup + searches. + """ + + def _add_regex(self, string, flags): + """ + Adds a new regex or re-use it from the cache. + """ + + if string in re_cache: + self.regex = re_cache[string] + else: + self.regex = re.compile(string, flags=flags) + + if self.cache: + re_cache[string] = self.regex + + def __init__(self, string, cache=True, flags=0): + """ + Compile a regular expression and initialize internal vars. + """ + + self.cache = cache + self.last_match = None + + self._add_regex(string, flags) + + def __str__(self): + """ + Return the regular expression pattern. + """ + return self.regex.pattern + + def __add__(self, other): + """ + Allows adding two regular expressions into one. + """ + + return KernRe(str(self) + str(other), cache=self.cache or other.cache, + flags=self.regex.flags | other.regex.flags) + + def match(self, string): + """ + Handles a re.match storing its results + """ + + self.last_match = self.regex.match(string) + return self.last_match + + def search(self, string): + """ + Handles a re.search storing its results + """ + + self.last_match = self.regex.search(string) + return self.last_match + + def findall(self, string): + """ + Alias to re.findall + """ + + return self.regex.findall(string) + + def split(self, string): + """ + Alias to re.split + """ + + return self.regex.split(string) + + def sub(self, sub, string, count=0): + """ + Alias to re.sub + """ + + return self.regex.sub(sub, string, count=count) + + def group(self, num): + """ + Returns the group results of the last match + """ + + return self.last_match.group(num) + + +class NestedMatch: + """ + Finding nested delimiters is hard with regular expressions. It is + even harder on Python with its normal re module, as there are several + advanced regular expressions that are missing. + + This is the case of this pattern: + + '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;' + + which is used to properly match open/close parenthesis of the + string search STRUCT_GROUP(), + + Add a class that counts pairs of delimiters, using it to match and + replace nested expressions. + + The original approach was suggested by: + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + + Although I re-implemented it to make it more generic and match 3 types + of delimiters. The logic checks if delimiters are paired. If not, it + will ignore the search string. + """ + + # TODO: make NestedMatch handle multiple match groups + # + # Right now, regular expressions to match it are defined only up to + # the start delimiter, e.g.: + # + # \bSTRUCT_GROUP\( + # + # is similar to: STRUCT_GROUP\((.*)\) + # except that the content inside the match group is delimiter's aligned. + # + # The content inside parenthesis are converted into a single replace + # group (e.g. r`\1'). + # + # It would be nice to change such definition to support multiple + # match groups, allowing a regex equivalent to. + # + # FOO\((.*), (.*), (.*)\) + # + # it is probably easier to define it not as a regular expression, but + # with some lexical definition like: + # + # FOO(arg1, arg2, arg3) + + DELIMITER_PAIRS = { + '{': '}', + '(': ')', + '[': ']', + } + + RE_DELIM = re.compile(r'[\{\}\[\]\(\)]') + + def _search(self, regex, line): + """ + Finds paired blocks for a regex that ends with a delimiter. + + The suggestion of using finditer to match pairs came from: + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + but I ended using a different implementation to align all three types + of delimiters and seek for an initial regular expression. + + The algorithm seeks for open/close paired delimiters and place them + into a stack, yielding a start/stop position of each match when the + stack is zeroed. + + The algorithm shoud work fine for properly paired lines, but will + silently ignore end delimiters that preceeds an start delimiter. + This should be OK for kernel-doc parser, as unaligned delimiters + would cause compilation errors. So, we don't need to rise exceptions + to cover such issues. + """ + + stack = [] + + for match_re in regex.finditer(line): + start = match_re.start() + offset = match_re.end() + + d = line[offset - 1] + if d not in self.DELIMITER_PAIRS: + continue + + end = self.DELIMITER_PAIRS[d] + stack.append(end) + + for match in self.RE_DELIM.finditer(line[offset:]): + pos = match.start() + offset + + d = line[pos] + + if d in self.DELIMITER_PAIRS: + end = self.DELIMITER_PAIRS[d] + + stack.append(end) + continue + + # Does the end delimiter match what it is expected? + if stack and d == stack[-1]: + stack.pop() + + if not stack: + yield start, offset, pos + 1 + break + + def search(self, regex, line): + """ + This is similar to re.search: + + It matches a regex that it is followed by a delimiter, + returning occurrences only if all delimiters are paired. + """ + + for t in self._search(regex, line): + + yield line[t[0]:t[2]] + + def sub(self, regex, sub, line, count=0): + """ + This is similar to re.sub: + + It matches a regex that it is followed by a delimiter, + replacing occurrences only if all delimiters are paired. + + if r'\1' is used, it works just like re: it places there the + matched paired data with the delimiter stripped. + + If count is different than zero, it will replace at most count + items. + """ + out = "" + + cur_pos = 0 + n = 0 + + for start, end, pos in self._search(regex, line): + out += line[cur_pos:start] + + # Value, ignoring start/end delimiters + value = line[end:pos - 1] + + # replaces \1 at the sub string, if \1 is used there + new_sub = sub + new_sub = new_sub.replace(r'\1', value) + + out += new_sub + + # Drop end ';' if any + if line[pos] == ';': + pos += 1 + + cur_pos = pos + n += 1 + + if count and count >= n: + break + + # Append the remaining string + l = len(line) + out += line[cur_pos:l] + + return out |