diff options
Diffstat (limited to 'tools/perf/Documentation')
-rw-r--r-- | tools/perf/Documentation/itrace.txt | 14 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-bench.txt | 11 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-config.txt | 5 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-data.txt | 3 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-ftrace.txt | 75 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-intel-pt.txt | 63 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-list.txt | 1 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-record.txt | 44 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-script.txt | 4 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-stat.txt | 44 | ||||
-rw-r--r-- | tools/perf/Documentation/perf.data-file-format.txt | 13 |
11 files changed, 247 insertions, 30 deletions
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index e817179c5027..d3740c8f399b 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -18,6 +18,7 @@ l synthesize last branch entries (use with i or x) L synthesize last branch entries on existing event records s skip initial number of events + q quicker (less detailed) decoding The default is all events i.e. the same as --itrace=ibxwpe, except for perf script where it is --itrace=ce @@ -47,3 +48,16 @@ --itrace=i0nss1000000 skips the first million instructions. + + The 'e' option may be followed by flags which affect what errors will or + will not be reported. Each flag must be preceded by either '+' or '-'. + The flags are: + o overflow + l trace data lost + + If supported, the 'd' option may be followed by flags which affect what + debug messages will or will not be logged. Each flag must be preceded + by either '+' or '-'. The flags are: + a all perf events + + If supported, the 'q' option may be repeated to increase the effect. diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index bad16512c48d..a0529c7fa5ef 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -49,6 +49,9 @@ SUBSYSTEM 'sched':: Scheduler and IPC mechanisms. +'syscall':: + System call performance (throughput). + 'mem':: Memory access performance. @@ -137,6 +140,14 @@ Example of *pipe* 59004 ops/sec --------------------- +SUITES FOR 'syscall' +~~~~~~~~~~~~~~~~~~ +*basic*:: +Suite for evaluating performance of core system call throughput (both usecs/op and ops/sec metrics). +This uses a single thread simply doing getppid(2), which is a simple syscall where the result is not +cached by glibc. + + SUITES FOR 'mem' ~~~~~~~~~~~~~~~~ *memcpy*:: diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index c7d3df5798e2..76408d986aed 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -614,8 +614,9 @@ trace.*:: ftrace.*:: ftrace.tracer:: - Can be used to select the default tracer. Possible values are - 'function' and 'function_graph'. + Can be used to select the default tracer when neither -G nor + -F option is not specified. Possible values are 'function' and + 'function_graph'. llvm.*:: llvm.clang-path:: diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt index c87180764829..726b9bc9e1a7 100644 --- a/tools/perf/Documentation/perf-data.txt +++ b/tools/perf/Documentation/perf-data.txt @@ -27,6 +27,9 @@ OPTIONS for 'convert' --to-ctf:: Triggers the CTF conversion, specify the path of CTF data directory. +--tod:: + Convert time to wall clock time. + -i:: Specify input perf data file path. diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index b80c84307dc9..78358af9a1c4 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -24,16 +24,28 @@ OPTIONS -t:: --tracer=:: - Tracer to use: function_graph or function. + Tracer to use when neither -G nor -F option is not + specified: function_graph or function. -v:: --verbose=:: Verbosity level. +-F:: +--funcs:: + List all available functions to trace. + -p:: --pid=:: Trace on existing process id (comma separated list). +--tid=:: + Trace on existing thread id (comma separated list). + +-D:: +--delay:: + Time (ms) to wait before starting tracing after program start. + -a:: --all-cpus:: Force system-wide collection. Scripts run without a <command> @@ -48,39 +60,58 @@ OPTIONS Ranges of CPUs are specified with -: 0-2. Default is to trace on all online CPUs. +-m:: +--buffer-size:: + Set the size of per-cpu tracing buffer, <size> is expected to + be a number with appended unit character - B/K/M/G. + +--inherit:: + Trace children processes spawned by our target. + -T:: --trace-funcs=:: - Only trace functions given by the argument. Multiple functions - can be given by using this option more than once. The function - argument also can be a glob pattern. It will be passed to - 'set_ftrace_filter' in tracefs. + Select function tracer and set function filter on the given + function (or a glob pattern). Multiple functions can be given + by using this option more than once. The function argument also + can be a glob pattern. It will be passed to 'set_ftrace_filter' + in tracefs. -N:: --notrace-funcs=:: - Do not trace functions given by the argument. Like -T option, - this can be used more than once to specify multiple functions - (or glob patterns). It will be passed to 'set_ftrace_notrace' - in tracefs. + Select function tracer and do not trace functions given by the + argument. Like -T option, this can be used more than once to + specify multiple functions (or glob patterns). It will be + passed to 'set_ftrace_notrace' in tracefs. + +--func-opts:: + List of options allowed to set: + call-graph - Display kernel stack trace for function tracer. + irq-info - Display irq context info for function tracer. -G:: --graph-funcs=:: - Set graph filter on the given function (or a glob pattern). - This is useful for the function_graph tracer only and enables - tracing for functions executed from the given function. - This can be used more than once to specify multiple functions. - It will be passed to 'set_graph_function' in tracefs. + Select function_graph tracer and set graph filter on the given + function (or a glob pattern). This is useful to trace for + functions executed from the given function. This can be used more + than once to specify multiple functions. It will be passed to + 'set_graph_function' in tracefs. -g:: --nograph-funcs=:: - Set graph notrace filter on the given function (or a glob pattern). - Like -G option, this is useful for the function_graph tracer only - and disables tracing for function executed from the given function. - This can be used more than once to specify multiple functions. - It will be passed to 'set_graph_notrace' in tracefs. + Select function_graph tracer and set graph notrace filter on the + given function (or a glob pattern). Like -G option, this is useful + for the function_graph tracer only and disables tracing for function + executed from the given function. This can be used more than once to + specify multiple functions. It will be passed to 'set_graph_notrace' + in tracefs. --D:: ---graph-depth=:: - Set max depth for function graph tracer to follow +--graph-opts:: + List of options allowed to set: + nosleep-time - Measure on-CPU time only for function_graph tracer. + noirqs - Ignore functions that happen inside interrupt. + verbose - Show process names, PIDs, timestamps, etc. + thresh=<n> - Setup trace duration threshold in microseconds. + depth=<n> - Set max depth for function graph tracer to follow. SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index f4cd49a7fcdb..d5a266d7f15b 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -825,6 +825,7 @@ The letters are: l synthesize last branch entries (use with i or x) L synthesize last branch entries on existing event records s skip initial number of events + q quicker (less detailed) decoding "Instructions" events look like they were recorded by "perf record -e instructions". @@ -871,11 +872,24 @@ Developer Manuals. Error events show where the decoder lost the trace. Error events are quite important. Users must know if what they are seeing is a complete -picture or not. +picture or not. The "e" option may be followed by flags which affect what errors +will or will not be reported. Each flag must be preceded by either '+' or '-'. +The flags supported by Intel PT are: + -o Suppress overflow errors + -l Suppress trace data lost errors +For example, for errors but not overflow or data lost errors: + + --itrace=e-o-l The "d" option will cause the creation of a file "intel_pt.log" containing all decoded packets and instructions. Note that this option slows down the decoder -and that the resulting file may be very large. +and that the resulting file may be very large. The "d" option may be followed +by flags which affect what debug messages will or will not be logged. Each flag +must be preceded by either '+' or '-'. The flags support by Intel PT are: + -a Suppress logging of perf events + +a Log all perf events +By default, logged perf events are filtered by any specified time ranges, but +flag +a overrides that. In addition, the period of the "instructions" event can be specified. e.g. @@ -956,6 +970,51 @@ at the beginning. This is useful to ignore initialization code. skips the first million instructions. +The q option changes the way the trace is decoded. The decoding is much faster +but much less detailed. Specifically, with the q option, the decoder does not +decode TNT packets, and does not walk object code, but gets the ip from FUP and +TIP packets. The q option can be used with the b and i options but the period +is not used. The q option decodes more quickly, but is useful only if the +control flow of interest is represented or indicated by FUP, TIP, TIP.PGE, or +TIP.PGD packets (refer below). However the q option could be used to find time +ranges that could then be decoded fully using the --time option. + +What will *not* be decoded with the (single) q option: + + - direct calls and jmps + - conditional branches + - non-branch instructions + +What *will* be decoded with the (single) q option: + + - asynchronous branches such as interrupts + - indirect branches + - function return target address *if* the noretcomp config term (refer + config terms section) was used + - start of (control-flow) tracing + - end of (control-flow) tracing, if it is not out of context + - power events, ptwrite, transaction start and abort + - instruction pointer associated with PSB packets + +Note the q option does not specify what events will be synthesized e.g. the p +option must be used also to show power events. + +Repeating the q option (double-q i.e. qq) results in even faster decoding and even +less detail. The decoder decodes only extended PSB (PSB+) packets, getting the +instruction pointer if there is a FUP packet within PSB+ (i.e. between PSB and +PSBEND). Note PSB packets occur regularly in the trace based on the psb_period +config term (refer config terms section). There will be a FUP packet if the +PSB+ occurs while control flow is being traced. + +What will *not* be decoded with the qq option: + + - everything except instruction pointer associated with PSB packets + +What *will* be decoded with the qq option: + + - instruction pointer associated with PSB packets + + dump option ~~~~~~~~~~~ diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 376a50b3452d..10ed539a8859 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -119,6 +119,7 @@ It's also possible to use pmu syntax: perf record -e r1a8 -a sleep 1 perf record -e cpu/r1a8/ ... + perf record -e cpu/r0x1a8/ ... You should refer to the processor specific documentation for getting these details. Some of them are referenced in the SEE ALSO section below. diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index fa8a5fcd27ab..3f72d8e261f3 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -407,8 +407,9 @@ if combined with -a or -C options. -D:: --delay=:: -After starting the program, wait msecs before measuring. This is useful to -filter out the startup phase of the program, which is often very different. +After starting the program, wait msecs before measuring (-1: start with events +disabled). This is useful to filter out the startup phase of the program, which +is often very different. -I:: --intr-regs:: @@ -626,6 +627,45 @@ option. The -e option and this one can be mixed and matched. Events can be grouped using the {} notation. endif::HAVE_LIBPFM[] +--control fd:ctl-fd[,ack-fd] +Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, +'disable': disable events). Measurements can be started with events disabled using +--delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor +to synchronize with the controlling process. Example of bash shell script to enable and +disable events during measurements: + +#!/bin/bash + +ctl_dir=/tmp/ + +ctl_fifo=${ctl_dir}perf_ctl.fifo +test -p ${ctl_fifo} && unlink ${ctl_fifo} +mkfifo ${ctl_fifo} +exec {ctl_fd}<>${ctl_fifo} + +ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo +test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo} +mkfifo ${ctl_ack_fifo} +exec {ctl_fd_ack}<>${ctl_ack_fifo} + +perf record -D -1 -e cpu-cycles -a \ + --control fd:${ctl_fd},${ctl_fd_ack} \ + -- sleep 30 & +perf_pid=$! + +sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})" +sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})" + +exec {ctl_fd_ack}>&- +unlink ${ctl_ack_fifo} + +exec {ctl_fd}>&- +unlink ${ctl_fifo} + +wait -n ${perf_pid} +exit $? + + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1] diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 372dfd110e6d..4f712fb8f175 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -322,6 +322,10 @@ OPTIONS --show-cgroup-events Display cgroup events i.e. events of type PERF_RECORD_CGROUP. +--show-text-poke-events + Display text poke events i.e. events of type PERF_RECORD_TEXT_POKE and + PERF_RECORD_KSYMBOL. + --demangle:: Demangle symbol names to human readable form. It's enabled by default, disable with --no-demangle. diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index b029ee728a0b..c9bfefc051fb 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -176,6 +176,45 @@ with it. --append may be used here. Examples: 3>results perf stat --log-fd 3 -- $cmd 3>>results perf stat --log-fd 3 --append -- $cmd +--control fd:ctl-fd[,ack-fd] +Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, +'disable': disable events). Measurements can be started with events disabled using +--delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor +to synchronize with the controlling process. Example of bash shell script to enable and +disable events during measurements: + +#!/bin/bash + +ctl_dir=/tmp/ + +ctl_fifo=${ctl_dir}perf_ctl.fifo +test -p ${ctl_fifo} && unlink ${ctl_fifo} +mkfifo ${ctl_fifo} +exec {ctl_fd}<>${ctl_fifo} + +ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo +test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo} +mkfifo ${ctl_ack_fifo} +exec {ctl_fd_ack}<>${ctl_ack_fifo} + +perf stat -D -1 -e cpu-cycles -a -I 1000 \ + --control fd:${ctl_fd},${ctl_fd_ack} \ + -- sleep 30 & +perf_pid=$! + +sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})" +sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})" + +exec {ctl_fd_ack}>&- +unlink ${ctl_ack_fifo} + +exec {ctl_fd}>&- +unlink ${ctl_fifo} + +wait -n ${perf_pid} +exit $? + + --pre:: --post:: Pre and post measurement hooks, e.g.: @@ -238,8 +277,9 @@ mode, use --per-node in addition to -a. (system-wide). -D msecs:: --delay msecs:: -After starting the program, wait msecs before measuring. This is useful to -filter out the startup phase of the program, which is often very different. +After starting the program, wait msecs before measuring (-1: start with events +disabled). This is useful to filter out the startup phase of the program, +which is often very different. -T:: --transaction:: diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index b6472e463284..9ee96640744e 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -389,6 +389,19 @@ struct { Example: cpu pmu capabilities: branches=32, max_precise=3, pmu_name=icelake + HEADER_CLOCK_DATA = 29, + + Contains clock id and its reference time together with wall clock + time taken at the 'same time', both values are in nanoseconds. + The format of data is as below. + +struct { + u32 version; /* version = 1 */ + u32 clockid; + u64 wall_clock_ns; + u64 clockid_time_ns; +}; + other bits are reserved and should ignored for now HEADER_FEAT_BITS = 256, |