diff options
Diffstat (limited to 'tools/perf/Documentation')
-rw-r--r-- | tools/perf/Documentation/perf-bench.txt | 3 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-config.txt | 33 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-dlfilter.txt | 22 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-ftrace.txt | 16 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-record.txt | 95 | ||||
-rw-r--r-- | tools/perf/Documentation/perf.data-file-format.txt | 2 |
6 files changed, 72 insertions, 99 deletions
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index f04f0eaded98..ca5789625cd2 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -67,6 +67,9 @@ SUBSYSTEM 'internals':: Benchmark internal perf functionality. +'uprobe':: + Benchmark overhead of uprobe + BPF. + 'all':: All benchmark subsystems. diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 1478068ad5dd..0b4e79dbd3f6 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -125,9 +125,6 @@ Given a $HOME/.perfconfig like this: group = true skip-empty = true - [llvm] - dump-obj = true - clang-opt = -g You can hide source code of annotate feature setting the config to false with @@ -657,36 +654,6 @@ ftrace.*:: -F option is not specified. Possible values are 'function' and 'function_graph'. -llvm.*:: - llvm.clang-path:: - Path to clang. If omit, search it from $PATH. - - llvm.clang-bpf-cmd-template:: - Cmdline template. Below lines show its default value. Environment - variable is used to pass options. - "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\ - "-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE " \ - "$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \ - "-Wno-unused-value -Wno-pointer-sign " \ - "-working-directory $WORKING_DIR " \ - "-c \"$CLANG_SOURCE\" --target=bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE" - - llvm.clang-opt:: - Options passed to clang. - - llvm.kbuild-dir:: - kbuild directory. If not set, use /lib/modules/`uname -r`/build. - If set to "" deliberately, skip kernel header auto-detector. - - llvm.kbuild-opts:: - Options passed to 'make' when detecting kernel header options. - - llvm.dump-obj:: - Enable perf dump BPF object files compiled by LLVM. - - llvm.opts:: - Options passed to llc. - samples.*:: samples.context:: diff --git a/tools/perf/Documentation/perf-dlfilter.txt b/tools/perf/Documentation/perf-dlfilter.txt index fb22e3b31dc5..8887cc20a809 100644 --- a/tools/perf/Documentation/perf-dlfilter.txt +++ b/tools/perf/Documentation/perf-dlfilter.txt @@ -64,6 +64,12 @@ internal filtering. If implemented, 'filter_description' should return a one-line description of the filter, and optionally a longer description. +Do not assume the 'sample' argument is valid (dereferenceable) +after 'filter_event' and 'filter_event_early' return. + +Do not assume data referenced by pointers in struct perf_dlfilter_sample +is valid (dereferenceable) after 'filter_event' and 'filter_event_early' return. + The perf_dlfilter_sample structure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -150,7 +156,8 @@ struct perf_dlfilter_fns { const char *(*srcline)(void *ctx, __u32 *line_number); struct perf_event_attr *(*attr)(void *ctx); __s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len); - void *(*reserved[120])(void *); + void (*al_cleanup)(void *ctx, struct perf_dlfilter_al *al); + void *(*reserved[119])(void *); }; ---- @@ -161,7 +168,8 @@ struct perf_dlfilter_fns { 'args' returns arguments from --dlarg options. 'resolve_address' provides information about 'address'. al->size must be set -before calling. Returns 0 on success, -1 otherwise. +before calling. Returns 0 on success, -1 otherwise. Call al_cleanup() (if present, +see below) when 'al' data is no longer needed. 'insn' returns instruction bytes and length. @@ -171,6 +179,12 @@ before calling. Returns 0 on success, -1 otherwise. 'object_code' reads object code and returns the number of bytes read. +'al_cleanup' must be called (if present, so check perf_dlfilter_fns.al_cleanup != NULL) +after resolve_address() to free any associated resources. + +Do not assume pointers obtained via perf_dlfilter_fns are valid (dereferenceable) +after 'filter_event' and 'filter_event_early' return. + The perf_dlfilter_al structure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -197,9 +211,13 @@ struct perf_dlfilter_al { /* Below members are only populated by resolve_ip() */ __u8 filtered; /* true if this sample event will be filtered out */ const char *comm; + void *priv; /* Private data. Do not change */ }; ---- +Do not assume data referenced by pointers in struct perf_dlfilter_al +is valid (dereferenceable) after 'filter_event' and 'filter_event_early' return. + perf_dlfilter_sample flags ~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index df4595563801..d780b93fcf87 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -96,8 +96,9 @@ OPTIONS for 'perf ftrace trace' --func-opts:: List of options allowed to set: - call-graph - Display kernel stack trace for function tracer. - irq-info - Display irq context info for function tracer. + + - call-graph - Display kernel stack trace for function tracer. + - irq-info - Display irq context info for function tracer. -G:: --graph-funcs=:: @@ -118,11 +119,12 @@ OPTIONS for 'perf ftrace trace' --graph-opts:: List of options allowed to set: - nosleep-time - Measure on-CPU time only for function_graph tracer. - noirqs - Ignore functions that happen inside interrupt. - verbose - Show process names, PIDs, timestamps, etc. - thresh=<n> - Setup trace duration threshold in microseconds. - depth=<n> - Set max depth for function graph tracer to follow. + + - nosleep-time - Measure on-CPU time only for function_graph tracer. + - noirqs - Ignore functions that happen inside interrupt. + - verbose - Show process names, PIDs, timestamps, etc. + - thresh=<n> - Setup trace duration threshold in microseconds. + - depth=<n> - Set max depth for function graph tracer to follow. OPTIONS for 'perf ftrace latency' diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 680396c56bd1..d5217be012d7 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -99,20 +99,6 @@ OPTIONS If you want to profile write accesses in [0x1000~1008), just set 'mem:0x1000/8:w'. - - a BPF source file (ending in .c) or a precompiled object file (ending - in .o) selects one or more BPF events. - The BPF program can attach to various perf events based on the ELF section - names. - - When processing a '.c' file, perf searches an installed LLVM to compile it - into an object file first. Optional clang options can be passed via the - '--clang-opt' command line option, e.g.: - - perf record --clang-opt "-DLINUX_VERSION_CODE=0x50000" \ - -e tests/bpf-script-example.c - - Note: '--clang-opt' must be placed before '--event/-e'. - - a group of events surrounded by a pair of brace ("{event1,event2,...}"). Each event is separated by commas and the group should be quoted to prevent the shell interpretation. You also need to use --group on @@ -523,9 +509,10 @@ CLOCK_BOOTTIME, CLOCK_REALTIME and CLOCK_TAI. Select AUX area tracing Snapshot Mode. This option is valid only with an AUX area tracing event. Optionally, certain snapshot capturing parameters can be specified in a string that follows this option: - 'e': take one last snapshot on exit; guarantees that there is at least one + + - 'e': take one last snapshot on exit; guarantees that there is at least one snapshot in the output file; - <size>: if the PMU supports this, specify the desired snapshot size. + - <size>: if the PMU supports this, specify the desired snapshot size. In Snapshot Mode trace data is captured only when signal SIGUSR2 is received and on exit if the above 'e' option is given. @@ -547,14 +534,6 @@ PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT, CoreSight or Arm SPE) switch events will be enabled automatically, which can be suppressed by by the option --no-switch-events. ---clang-path=PATH:: -Path to clang binary to use for compiling BPF scriptlets. -(enabled when BPF support is on) - ---clang-opt=OPTIONS:: -Options passed to clang when compiling BPF scriptlets. -(enabled when BPF support is on) - --vmlinux=PATH:: Specify vmlinux path which has debuginfo. (enabled when BPF prologue is on) @@ -572,8 +551,9 @@ providing implementation for Posix AIO API. --affinity=mode:: Set affinity mask of trace reading thread according to the policy defined by 'mode' value: - node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer - cpu - thread affinity mask is set to cpu of the processed mmap buffer + + - node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer + - cpu - thread affinity mask is set to cpu of the processed mmap buffer --mmap-flush=number:: @@ -625,16 +605,17 @@ Record timestamp boundary (time of first/last samples). --switch-output[=mode]:: Generate multiple perf.data files, timestamp prefixed, switching to a new one based on 'mode' value: - "signal" - when receiving a SIGUSR2 (default value) or - <size> - when reaching the size threshold, size is expected to - be a number with appended unit character - B/K/M/G - <time> - when reaching the time threshold, size is expected to - be a number with appended unit character - s/m/h/d - Note: the precision of the size threshold hugely depends - on your configuration - the number and size of your ring - buffers (-m). It is generally more precise for higher sizes - (like >5M), for lower values expect different sizes. + - "signal" - when receiving a SIGUSR2 (default value) or + - <size> - when reaching the size threshold, size is expected to + be a number with appended unit character - B/K/M/G + - <time> - when reaching the time threshold, size is expected to + be a number with appended unit character - s/m/h/d + + Note: the precision of the size threshold hugely depends + on your configuration - the number and size of your ring + buffers (-m). It is generally more precise for higher sizes + (like >5M), for lower values expect different sizes. A possible use case is to, given an external event, slice the perf.data file that gets then processed, possibly via a perf script, to decide if that @@ -680,11 +661,12 @@ choice in this option. For example, --synth=no would have MMAP events for kernel and modules. Available types are: - 'task' - synthesize FORK and COMM events for each task - 'mmap' - synthesize MMAP events for each process (implies 'task') - 'cgroup' - synthesize CGROUP events for each cgroup - 'all' - synthesize all events (default) - 'no' - do not synthesize any of the above events + + - 'task' - synthesize FORK and COMM events for each task + - 'mmap' - synthesize MMAP events for each process (implies 'task') + - 'cgroup' - synthesize CGROUP events for each cgroup + - 'all' - synthesize all events (default) + - 'no' - do not synthesize any of the above events --tail-synthesize:: Instead of collecting non-sample events (for example, fork, comm, mmap) at @@ -736,18 +718,19 @@ ctl-fifo / ack-fifo are opened and used as ctl-fd / ack-fd as follows. Listen on ctl-fd descriptor for command to control measurement. Available commands: - 'enable' : enable events - 'disable' : disable events - 'enable name' : enable event 'name' - 'disable name' : disable event 'name' - 'snapshot' : AUX area tracing snapshot). - 'stop' : stop perf record - 'ping' : ping - - 'evlist [-v|-g|-F] : display all events - -F Show just the sample frequency used for each event. - -v Show all fields. - -g Show event group information. + + - 'enable' : enable events + - 'disable' : disable events + - 'enable name' : enable event 'name' + - 'disable name' : disable event 'name' + - 'snapshot' : AUX area tracing snapshot). + - 'stop' : stop perf record + - 'ping' : ping + - 'evlist [-v|-g|-F] : display all events + + -F Show just the sample frequency used for each event. + -v Show all fields. + -g Show event group information. Measurements can be started with events disabled using --delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor to synchronize with the @@ -808,10 +791,10 @@ the second monitors CPUs 1 and 5-7 with the affinity mask 5-7. <spec> value can also be a string meaning predefined parallel threads layout: - cpu - create new data streaming thread for every monitored cpu - core - create new thread to monitor CPUs grouped by a core - package - create new thread to monitor CPUs grouped by a package - numa - create new threed to monitor CPUs grouped by a NUMA domain + - cpu - create new data streaming thread for every monitored cpu + - core - create new thread to monitor CPUs grouped by a core + - package - create new thread to monitor CPUs grouped by a package + - numa - create new threed to monitor CPUs grouped by a NUMA domain Predefined layouts can be used on systems with large number of CPUs in order not to spawn multiple per-cpu streaming threads but still avoid LOST diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index 635ba043fd7d..010a4edcd384 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -43,7 +43,7 @@ struct perf_file_section { Flags section: -For each of the optional features a perf_file_section it placed after the data +For each of the optional features a perf_file_section is placed after the data section if the feature bit is set in the perf_header flags bitset. The respective perf_file_section points to the data of the additional header and defines its size. |