From 35995a4d815586bc968a857f7235707940a2f755 Mon Sep 17 00:00:00 2001 From: Eduard - Gabriel Munteanu Date: Tue, 19 Aug 2008 20:43:25 +0300 Subject: SLUB: Replace __builtin_return_address(0) with _RET_IP_. This patch replaces __builtin_return_address(0) with _RET_IP_, since a previous patch moved _RET_IP_ and _THIS_IP_ to include/linux/kernel.h and they're widely available now. This makes for shorter and easier to read code. [penberg@cs.helsinki.fi: remove _RET_IP_ casts to void pointer] Signed-off-by: Eduard - Gabriel Munteanu Signed-off-by: Pekka Enberg --- include/linux/slab.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 000da12b5cf0..c97ed28559ec 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -253,9 +253,9 @@ static inline void *kmem_cache_alloc_node(struct kmem_cache *cachep, * request comes from. */ #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB) -extern void *__kmalloc_track_caller(size_t, gfp_t, void*); +extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long); #define kmalloc_track_caller(size, flags) \ - __kmalloc_track_caller(size, flags, __builtin_return_address(0)) + __kmalloc_track_caller(size, flags, _RET_IP_) #else #define kmalloc_track_caller(size, flags) \ __kmalloc(size, flags) @@ -271,10 +271,10 @@ extern void *__kmalloc_track_caller(size_t, gfp_t, void*); * allocation request comes from. */ #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB) -extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *); +extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long); #define kmalloc_node_track_caller(size, flags, node) \ __kmalloc_node_track_caller(size, flags, node, \ - __builtin_return_address(0)) + _RET_IP_) #else #define kmalloc_node_track_caller(size, flags, node) \ __kmalloc_node(size, flags, node) -- cgit v1.2.3 From b9ce08c01020eb28bfbfa6faf1c740281c5f418e Mon Sep 17 00:00:00 2001 From: Eduard - Gabriel Munteanu Date: Sun, 10 Aug 2008 20:14:03 +0300 Subject: kmemtrace: Core implementation. kmemtrace provides tracing for slab allocator functions, such as kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected data is then fed to the userspace application in order to analyse allocation hotspots, internal fragmentation and so on, making it possible to see how well an allocator performs, as well as debug and profile kernel code. Signed-off-by: Eduard - Gabriel Munteanu Signed-off-by: Pekka Enberg --- Documentation/kernel-parameters.txt | 10 ++ MAINTAINERS | 6 + include/linux/kmemtrace.h | 85 +++++++++ init/main.c | 2 + lib/Kconfig.debug | 28 +++ mm/Makefile | 1 + mm/kmemtrace.c | 335 ++++++++++++++++++++++++++++++++++++ 7 files changed, 467 insertions(+) create mode 100644 include/linux/kmemtrace.h create mode 100644 mm/kmemtrace.c (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e0f346d201ed..542c2d8843db 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -49,6 +49,7 @@ parameter is applicable: ISAPNP ISA PnP code is enabled. ISDN Appropriate ISDN support is enabled. JOY Appropriate joystick support is enabled. + KMEMTRACE kmemtrace is enabled. LIBATA Libata driver is enabled LP Printer support is enabled. LOOP Loopback device support is enabled. @@ -1018,6 +1019,15 @@ and is between 256 and 4096 characters. It is defined in the file use the HighMem zone if it exists, and the Normal zone if it does not. + kmemtrace.enable= [KNL,KMEMTRACE] Format: { yes | no } + Controls whether kmemtrace is enabled + at boot-time. + + kmemtrace.subbufs=n [KNL,KMEMTRACE] Overrides the number of + subbufs kmemtrace's relay channel has. Set this + higher than default (KMEMTRACE_N_SUBBUFS in code) if + you experience buffer overruns. + movablecore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter is similar to kernelcore except it specifies the amount of memory used for migratable allocations. diff --git a/MAINTAINERS b/MAINTAINERS index 618c1ef4a397..e2b3c8555051 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2565,6 +2565,12 @@ M: jason.wessel@windriver.com L: kgdb-bugreport@lists.sourceforge.net S: Maintained +KMEMTRACE +P: Eduard - Gabriel Munteanu +M: eduard.munteanu@linux360.ro +L: linux-kernel@vger.kernel.org +S: Maintained + KPROBES P: Ananth N Mavinakayanahalli M: ananth@in.ibm.com diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h new file mode 100644 index 000000000000..2c332010cb4e --- /dev/null +++ b/include/linux/kmemtrace.h @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2008 Eduard - Gabriel Munteanu + * + * This file is released under GPL version 2. + */ + +#ifndef _LINUX_KMEMTRACE_H +#define _LINUX_KMEMTRACE_H + +#ifdef __KERNEL__ + +#include +#include + +enum kmemtrace_type_id { + KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ + KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */ + KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */ +}; + +#ifdef CONFIG_KMEMTRACE + +extern void kmemtrace_init(void); + +static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node) +{ + trace_mark(kmemtrace_alloc, "type_id %d call_site %lu ptr %lu " + "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d", + type_id, call_site, (unsigned long) ptr, + bytes_req, bytes_alloc, (unsigned long) gfp_flags, node); +} + +static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr) +{ + trace_mark(kmemtrace_free, "type_id %d call_site %lu ptr %lu", + type_id, call_site, (unsigned long) ptr); +} + +#else /* CONFIG_KMEMTRACE */ + +static inline void kmemtrace_init(void) +{ +} + +static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node) +{ +} + +static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr) +{ +} + +#endif /* CONFIG_KMEMTRACE */ + +static inline void kmemtrace_mark_alloc(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags) +{ + kmemtrace_mark_alloc_node(type_id, call_site, ptr, + bytes_req, bytes_alloc, gfp_flags, -1); +} + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_KMEMTRACE_H */ + diff --git a/init/main.c b/init/main.c index 7e117a231af1..be1fe2242a55 100644 --- a/init/main.c +++ b/init/main.c @@ -69,6 +69,7 @@ #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC #include @@ -653,6 +654,7 @@ asmlinkage void __init start_kernel(void) enable_debug_pagealloc(); cpu_hotplug_init(); kmem_cache_init(); + kmemtrace_init(); debug_objects_mem_init(); idr_init_cache(); setup_per_cpu_pageset(); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b0f239e443bc..78d669b461d2 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -803,6 +803,34 @@ config FIREWIRE_OHCI_REMOTE_DMA If unsure, say N. +config KMEMTRACE + bool "Kernel memory tracer (kmemtrace)" + depends on RELAY && DEBUG_FS && MARKERS + help + kmemtrace provides tracing for slab allocator functions, such as + kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected + data is then fed to the userspace application in order to analyse + allocation hotspots, internal fragmentation and so on, making it + possible to see how well an allocator performs, as well as debug + and profile kernel code. + + This requires an userspace application to use. See + Documentation/vm/kmemtrace.txt for more information. + + Saying Y will make the kernel somewhat larger and slower. However, + if you disable kmemtrace at run-time or boot-time, the performance + impact is minimal (depending on the arch the kernel is built for). + + If unsure, say N. + +config KMEMTRACE_DEFAULT_ENABLED + bool "Enabled by default at boot" + depends on KMEMTRACE + help + Say Y here to enable kmemtrace at boot-time by default. Whatever + the choice, the behavior can be overridden by a kernel parameter, + as described in documentation. + menuconfig BUILD_DOCSRC bool "Build targets in Documentation/ tree" depends on HEADERS_CHECK diff --git a/mm/Makefile b/mm/Makefile index c06b45a1ff5f..3782eb66d4b3 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -34,3 +34,4 @@ obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_SMP) += allocpercpu.o obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o +obj-$(CONFIG_KMEMTRACE) += kmemtrace.o diff --git a/mm/kmemtrace.c b/mm/kmemtrace.c new file mode 100644 index 000000000000..83ad1cc71a92 --- /dev/null +++ b/mm/kmemtrace.c @@ -0,0 +1,335 @@ +/* + * Copyright (C) 2008 Pekka Enberg, Eduard - Gabriel Munteanu + * + * This file is released under GPL version 2. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define KMEMTRACE_SUBBUF_SIZE 524288 +#define KMEMTRACE_DEF_N_SUBBUFS 20 + +static struct rchan *kmemtrace_chan; +static u32 kmemtrace_buf_overruns; + +static unsigned int kmemtrace_n_subbufs; +#ifdef CONFIG_KMEMTRACE_DEFAULT_ENABLED +static unsigned int kmemtrace_enabled = 1; +#else +static unsigned int kmemtrace_enabled = 0; +#endif + +/* + * The sequence number is used for reordering kmemtrace packets + * in userspace, since they are logged as per-CPU data. + * + * atomic_t should always be a 32-bit signed integer. Wraparound is not + * likely to occur, but userspace can deal with it by expecting a certain + * sequence number in the next packet that will be read. + */ +static atomic_t kmemtrace_seq_num; + +#define KMEMTRACE_ABI_VERSION 1 + +static u32 kmemtrace_abi_version __read_mostly = KMEMTRACE_ABI_VERSION; + +enum kmemtrace_event_id { + KMEMTRACE_EVENT_ALLOC = 0, + KMEMTRACE_EVENT_FREE, +}; + +struct kmemtrace_event { + u8 event_id; + u8 type_id; + u16 event_size; + s32 seq_num; + u64 call_site; + u64 ptr; +} __attribute__ ((__packed__)); + +struct kmemtrace_stats_alloc { + u64 bytes_req; + u64 bytes_alloc; + u32 gfp_flags; + s32 numa_node; +} __attribute__ ((__packed__)); + +static void kmemtrace_probe_alloc(void *probe_data, void *call_data, + const char *format, va_list *args) +{ + unsigned long flags; + struct kmemtrace_event *ev; + struct kmemtrace_stats_alloc *stats; + void *buf; + + local_irq_save(flags); + + buf = relay_reserve(kmemtrace_chan, + sizeof(struct kmemtrace_event) + + sizeof(struct kmemtrace_stats_alloc)); + if (!buf) + goto failed; + + /* + * Don't convert this to use structure initializers, + * C99 does not guarantee the rvalues evaluation order. + */ + + ev = buf; + ev->event_id = KMEMTRACE_EVENT_ALLOC; + ev->type_id = va_arg(*args, int); + ev->event_size = sizeof(struct kmemtrace_event) + + sizeof(struct kmemtrace_stats_alloc); + ev->seq_num = atomic_add_return(1, &kmemtrace_seq_num); + ev->call_site = va_arg(*args, unsigned long); + ev->ptr = va_arg(*args, unsigned long); + + stats = buf + sizeof(struct kmemtrace_event); + stats->bytes_req = va_arg(*args, unsigned long); + stats->bytes_alloc = va_arg(*args, unsigned long); + stats->gfp_flags = va_arg(*args, unsigned long); + stats->numa_node = va_arg(*args, int); + +failed: + local_irq_restore(flags); +} + +static void kmemtrace_probe_free(void *probe_data, void *call_data, + const char *format, va_list *args) +{ + unsigned long flags; + struct kmemtrace_event *ev; + + local_irq_save(flags); + + ev = relay_reserve(kmemtrace_chan, sizeof(struct kmemtrace_event)); + if (!ev) + goto failed; + + /* + * Don't convert this to use structure initializers, + * C99 does not guarantee the rvalues evaluation order. + */ + ev->event_id = KMEMTRACE_EVENT_FREE; + ev->type_id = va_arg(*args, int); + ev->event_size = sizeof(struct kmemtrace_event); + ev->seq_num = atomic_add_return(1, &kmemtrace_seq_num); + ev->call_site = va_arg(*args, unsigned long); + ev->ptr = va_arg(*args, unsigned long); + +failed: + local_irq_restore(flags); +} + +static struct dentry * +kmemtrace_create_buf_file(const char *filename, struct dentry *parent, + int mode, struct rchan_buf *buf, int *is_global) +{ + return debugfs_create_file(filename, mode, parent, buf, + &relay_file_operations); +} + +static int kmemtrace_remove_buf_file(struct dentry *dentry) +{ + debugfs_remove(dentry); + + return 0; +} + +static int kmemtrace_subbuf_start(struct rchan_buf *buf, + void *subbuf, + void *prev_subbuf, + size_t prev_padding) +{ + if (relay_buf_full(buf)) { + /* + * We know it's not SMP-safe, but neither + * debugfs_create_u32() is. + */ + kmemtrace_buf_overruns++; + return 0; + } + + return 1; +} + +static struct rchan_callbacks relay_callbacks = { + .create_buf_file = kmemtrace_create_buf_file, + .remove_buf_file = kmemtrace_remove_buf_file, + .subbuf_start = kmemtrace_subbuf_start, +}; + +static struct dentry *kmemtrace_dir; +static struct dentry *kmemtrace_overruns_dentry; +static struct dentry *kmemtrace_abi_version_dentry; + +static struct dentry *kmemtrace_enabled_dentry; + +static int kmemtrace_start_probes(void) +{ + int err; + + err = marker_probe_register("kmemtrace_alloc", "type_id %d " + "call_site %lu ptr %lu " + "bytes_req %lu bytes_alloc %lu " + "gfp_flags %lu node %d", + kmemtrace_probe_alloc, NULL); + if (err) + return err; + err = marker_probe_register("kmemtrace_free", "type_id %d " + "call_site %lu ptr %lu", + kmemtrace_probe_free, NULL); + + return err; +} + +static void kmemtrace_stop_probes(void) +{ + marker_probe_unregister("kmemtrace_alloc", + kmemtrace_probe_alloc, NULL); + marker_probe_unregister("kmemtrace_free", + kmemtrace_probe_free, NULL); +} + +static int kmemtrace_enabled_get(void *data, u64 *val) +{ + *val = *((int *) data); + + return 0; +} + +static int kmemtrace_enabled_set(void *data, u64 val) +{ + u64 old_val = kmemtrace_enabled; + + *((int *) data) = !!val; + + if (old_val == val) + return 0; + if (val) + kmemtrace_start_probes(); + else + kmemtrace_stop_probes(); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(kmemtrace_enabled_fops, + kmemtrace_enabled_get, + kmemtrace_enabled_set, "%llu\n"); + +static void kmemtrace_cleanup(void) +{ + if (kmemtrace_enabled_dentry) + debugfs_remove(kmemtrace_enabled_dentry); + + kmemtrace_stop_probes(); + + if (kmemtrace_abi_version_dentry) + debugfs_remove(kmemtrace_abi_version_dentry); + if (kmemtrace_overruns_dentry) + debugfs_remove(kmemtrace_overruns_dentry); + + relay_close(kmemtrace_chan); + kmemtrace_chan = NULL; + + if (kmemtrace_dir) + debugfs_remove(kmemtrace_dir); +} + +static int __init kmemtrace_setup_late(void) +{ + if (!kmemtrace_chan) + goto failed; + + kmemtrace_dir = debugfs_create_dir("kmemtrace", NULL); + if (!kmemtrace_dir) + goto cleanup; + + kmemtrace_abi_version_dentry = + debugfs_create_u32("abi_version", S_IRUSR, + kmemtrace_dir, &kmemtrace_abi_version); + kmemtrace_overruns_dentry = + debugfs_create_u32("total_overruns", S_IRUSR, + kmemtrace_dir, &kmemtrace_buf_overruns); + if (!kmemtrace_overruns_dentry || !kmemtrace_abi_version_dentry) + goto cleanup; + + kmemtrace_enabled_dentry = + debugfs_create_file("enabled", S_IRUSR | S_IWUSR, + kmemtrace_dir, &kmemtrace_enabled, + &kmemtrace_enabled_fops); + if (!kmemtrace_enabled_dentry) + goto cleanup; + + if (relay_late_setup_files(kmemtrace_chan, "cpu", kmemtrace_dir)) + goto cleanup; + + printk(KERN_INFO "kmemtrace: fully up.\n"); + + return 0; + +cleanup: + kmemtrace_cleanup(); +failed: + return 1; +} +late_initcall(kmemtrace_setup_late); + +static int __init kmemtrace_set_boot_enabled(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "yes")) + kmemtrace_enabled = 1; + else if (!strcmp(str, "no")) + kmemtrace_enabled = 0; + else + return -EINVAL; + + return 0; +} +early_param("kmemtrace.enable", kmemtrace_set_boot_enabled); + +static int __init kmemtrace_set_subbufs(char *str) +{ + get_option(&str, &kmemtrace_n_subbufs); + return 0; +} +early_param("kmemtrace.subbufs", kmemtrace_set_subbufs); + +void kmemtrace_init(void) +{ + if (!kmemtrace_enabled) + return; + + if (!kmemtrace_n_subbufs) + kmemtrace_n_subbufs = KMEMTRACE_DEF_N_SUBBUFS; + + kmemtrace_chan = relay_open(NULL, NULL, KMEMTRACE_SUBBUF_SIZE, + kmemtrace_n_subbufs, &relay_callbacks, + NULL); + if (unlikely(!kmemtrace_chan)) { + printk(KERN_ERR "kmemtrace: could not open relay channel.\n"); + return; + } + + if (unlikely(kmemtrace_start_probes())) + goto probe_fail; + + printk(KERN_INFO "kmemtrace: early init successful.\n"); + + return; + +probe_fail: + printk(KERN_ERR "kmemtrace: could not register marker probes!\n"); + kmemtrace_cleanup(); +} + -- cgit v1.2.3 From 36555751c6751a5bdfd6d7bdf0648343bb1ef0de Mon Sep 17 00:00:00 2001 From: Eduard - Gabriel Munteanu Date: Sun, 10 Aug 2008 20:14:05 +0300 Subject: kmemtrace: SLAB hooks. This adds hooks for the SLAB allocator, to allow tracing with kmemtrace. We also convert some inline functions to __always_inline to make sure _RET_IP_, which expands to __builtin_return_address(0), always works as expected. Signed-off-by: Eduard - Gabriel Munteanu Signed-off-by: Pekka Enberg --- include/linux/slab_def.h | 68 ++++++++++++++++++++++++++++++++++++++++------ mm/slab.c | 71 +++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 123 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 39c3a5eb8ebe..7555ce99f6d2 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -14,6 +14,7 @@ #include /* kmalloc_sizes.h needs PAGE_SIZE */ #include /* kmalloc_sizes.h needs L1_CACHE_BYTES */ #include +#include /* Size description struct for general caches. */ struct cache_sizes { @@ -28,8 +29,26 @@ extern struct cache_sizes malloc_sizes[]; void *kmem_cache_alloc(struct kmem_cache *, gfp_t); void *__kmalloc(size_t size, gfp_t flags); -static inline void *kmalloc(size_t size, gfp_t flags) +#ifdef CONFIG_KMEMTRACE +extern void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags); +extern size_t slab_buffer_size(struct kmem_cache *cachep); +#else +static __always_inline void * +kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags) { + return kmem_cache_alloc(cachep, flags); +} +static inline size_t slab_buffer_size(struct kmem_cache *cachep) +{ + return 0; +} +#endif + +static __always_inline void *kmalloc(size_t size, gfp_t flags) +{ + struct kmem_cache *cachep; + void *ret; + if (__builtin_constant_p(size)) { int i = 0; @@ -50,10 +69,17 @@ static inline void *kmalloc(size_t size, gfp_t flags) found: #ifdef CONFIG_ZONE_DMA if (flags & GFP_DMA) - return kmem_cache_alloc(malloc_sizes[i].cs_dmacachep, - flags); + cachep = malloc_sizes[i].cs_dmacachep; + else #endif - return kmem_cache_alloc(malloc_sizes[i].cs_cachep, flags); + cachep = malloc_sizes[i].cs_cachep; + + ret = kmem_cache_alloc_notrace(cachep, flags); + + kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret, + size, slab_buffer_size(cachep), flags); + + return ret; } return __kmalloc(size, flags); } @@ -62,8 +88,25 @@ found: extern void *__kmalloc_node(size_t size, gfp_t flags, int node); extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); -static inline void *kmalloc_node(size_t size, gfp_t flags, int node) +#ifdef CONFIG_KMEMTRACE +extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep, + gfp_t flags, + int nodeid); +#else +static __always_inline void * +kmem_cache_alloc_node_notrace(struct kmem_cache *cachep, + gfp_t flags, + int nodeid) +{ + return kmem_cache_alloc_node(cachep, flags, nodeid); +} +#endif + +static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { + struct kmem_cache *cachep; + void *ret; + if (__builtin_constant_p(size)) { int i = 0; @@ -84,11 +127,18 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node) found: #ifdef CONFIG_ZONE_DMA if (flags & GFP_DMA) - return kmem_cache_alloc_node(malloc_sizes[i].cs_dmacachep, - flags, node); + cachep = malloc_sizes[i].cs_dmacachep; + else #endif - return kmem_cache_alloc_node(malloc_sizes[i].cs_cachep, - flags, node); + cachep = malloc_sizes[i].cs_cachep; + + ret = kmem_cache_alloc_node_notrace(cachep, flags, node); + + kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, + ret, size, slab_buffer_size(cachep), + flags, node); + + return ret; } return __kmalloc_node(size, flags, node); } diff --git a/mm/slab.c b/mm/slab.c index a14787799014..b6d9b8cdefa9 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -112,6 +112,7 @@ #include #include #include +#include #include #include @@ -568,6 +569,14 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp) #endif +#ifdef CONFIG_KMEMTRACE +size_t slab_buffer_size(struct kmem_cache *cachep) +{ + return cachep->buffer_size; +} +EXPORT_SYMBOL(slab_buffer_size); +#endif + /* * Do not go above this order unless 0 objects fit into the slab. */ @@ -3613,10 +3622,23 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) */ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) { - return __cache_alloc(cachep, flags, __builtin_return_address(0)); + void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0)); + + kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret, + obj_size(cachep), cachep->buffer_size, flags); + + return ret; } EXPORT_SYMBOL(kmem_cache_alloc); +#ifdef CONFIG_KMEMTRACE +void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags) +{ + return __cache_alloc(cachep, flags, __builtin_return_address(0)); +} +EXPORT_SYMBOL(kmem_cache_alloc_notrace); +#endif + /** * kmem_ptr_validate - check if an untrusted pointer might be a slab entry. * @cachep: the cache we're checking against @@ -3661,23 +3683,47 @@ out: #ifdef CONFIG_NUMA void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) { - return __cache_alloc_node(cachep, flags, nodeid, - __builtin_return_address(0)); + void *ret = __cache_alloc_node(cachep, flags, nodeid, + __builtin_return_address(0)); + + kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret, + obj_size(cachep), cachep->buffer_size, + flags, nodeid); + + return ret; } EXPORT_SYMBOL(kmem_cache_alloc_node); +#ifdef CONFIG_KMEMTRACE +void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep, + gfp_t flags, + int nodeid) +{ + return __cache_alloc_node(cachep, flags, nodeid, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(kmem_cache_alloc_node_notrace); +#endif + static __always_inline void * __do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller) { struct kmem_cache *cachep; + void *ret; cachep = kmem_find_general_cachep(size, flags); if (unlikely(ZERO_OR_NULL_PTR(cachep))) return cachep; - return kmem_cache_alloc_node(cachep, flags, node); + ret = kmem_cache_alloc_node_notrace(cachep, flags, node); + + kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, + (unsigned long) caller, ret, + size, cachep->buffer_size, flags, node); + + return ret; } -#ifdef CONFIG_DEBUG_SLAB +#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE) void *__kmalloc_node(size_t size, gfp_t flags, int node) { return __do_kmalloc_node(size, flags, node, @@ -3710,6 +3756,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, void *caller) { struct kmem_cache *cachep; + void *ret; /* If you want to save a few bytes .text space: replace * __ with kmem_. @@ -3719,11 +3766,17 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, cachep = __find_general_cachep(size, flags); if (unlikely(ZERO_OR_NULL_PTR(cachep))) return cachep; - return __cache_alloc(cachep, flags, caller); + ret = __cache_alloc(cachep, flags, caller); + + kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, + (unsigned long) caller, ret, + size, cachep->buffer_size, flags); + + return ret; } -#ifdef CONFIG_DEBUG_SLAB +#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE) void *__kmalloc(size_t size, gfp_t flags) { return __do_kmalloc(size, flags, __builtin_return_address(0)); @@ -3762,6 +3815,8 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) debug_check_no_obj_freed(objp, obj_size(cachep)); __cache_free(cachep, objp); local_irq_restore(flags); + + kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, objp); } EXPORT_SYMBOL(kmem_cache_free); @@ -3788,6 +3843,8 @@ void kfree(const void *objp) debug_check_no_obj_freed(objp, obj_size(c)); __cache_free(c, (void *)objp); local_irq_restore(flags); + + kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, objp); } EXPORT_SYMBOL(kfree); -- cgit v1.2.3 From 3eae2cb24a96509e0a38cc48dc1538a2826f4e33 Mon Sep 17 00:00:00 2001 From: Eduard - Gabriel Munteanu Date: Sun, 10 Aug 2008 20:14:07 +0300 Subject: kmemtrace: SLOB hooks. This adds hooks for the SLOB allocator, to allow tracing with kmemtrace. We also convert some inline functions to __always_inline to make sure _RET_IP_, which expands to __builtin_return_address(0), always works as expected. Acked-by: Matt Mackall Signed-off-by: Eduard - Gabriel Munteanu Signed-off-by: Pekka Enberg --- include/linux/slob_def.h | 9 +++++---- mm/slob.c | 37 +++++++++++++++++++++++++++++++------ 2 files changed, 36 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h index 59a3fa476ab9..0ec00b39d006 100644 --- a/include/linux/slob_def.h +++ b/include/linux/slob_def.h @@ -3,14 +3,15 @@ void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); -static inline void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) +static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep, + gfp_t flags) { return kmem_cache_alloc_node(cachep, flags, -1); } void *__kmalloc_node(size_t size, gfp_t flags, int node); -static inline void *kmalloc_node(size_t size, gfp_t flags, int node) +static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { return __kmalloc_node(size, flags, node); } @@ -23,12 +24,12 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node) * kmalloc is the normal method of allocating memory * in the kernel. */ -static inline void *kmalloc(size_t size, gfp_t flags) +static __always_inline void *kmalloc(size_t size, gfp_t flags) { return __kmalloc_node(size, flags, -1); } -static inline void *__kmalloc(size_t size, gfp_t flags) +static __always_inline void *__kmalloc(size_t size, gfp_t flags) { return kmalloc(size, flags); } diff --git a/mm/slob.c b/mm/slob.c index cb675d126791..55de44ae5d30 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -65,6 +65,7 @@ #include #include #include +#include #include /* @@ -463,27 +464,38 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node) { unsigned int *m; int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); + void *ret; if (size < PAGE_SIZE - align) { if (!size) return ZERO_SIZE_PTR; m = slob_alloc(size + align, gfp, align, node); + if (!m) return NULL; *m = size; - return (void *)m + align; + ret = (void *)m + align; + + kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, + _RET_IP_, ret, + size, size + align, gfp, node); } else { - void *ret; + unsigned int order = get_order(size); - ret = slob_new_page(gfp | __GFP_COMP, get_order(size), node); + ret = slob_new_page(gfp | __GFP_COMP, order, node); if (ret) { struct page *page; page = virt_to_page(ret); page->private = size; } - return ret; + + kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, + _RET_IP_, ret, + size, PAGE_SIZE << order, gfp, node); } + + return ret; } EXPORT_SYMBOL(__kmalloc_node); @@ -501,6 +513,8 @@ void kfree(const void *block) slob_free(m, *m + align); } else put_page(&sp->page); + + kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, block); } EXPORT_SYMBOL(kfree); @@ -569,10 +583,19 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node) { void *b; - if (c->size < PAGE_SIZE) + if (c->size < PAGE_SIZE) { b = slob_alloc(c->size, flags, c->align, node); - else + kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, + _RET_IP_, b, c->size, + SLOB_UNITS(c->size) * SLOB_UNIT, + flags, node); + } else { b = slob_new_page(flags, get_order(c->size), node); + kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, + _RET_IP_, b, c->size, + PAGE_SIZE << get_order(c->size), + flags, node); + } if (c->ctor) c->ctor(b); @@ -608,6 +631,8 @@ void kmem_cache_free(struct kmem_cache *c, void *b) } else { __kmem_cache_free(b, c->size); } + + kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, b); } EXPORT_SYMBOL(kmem_cache_free); -- cgit v1.2.3 From 5b882be4e00e53a44f47ad7eb997cac2938848bf Mon Sep 17 00:00:00 2001 From: Eduard - Gabriel Munteanu Date: Tue, 19 Aug 2008 20:43:26 +0300 Subject: kmemtrace: SLUB hooks. This adds hooks for the SLUB allocator, to allow tracing with kmemtrace. Signed-off-by: Eduard - Gabriel Munteanu Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 53 ++++++++++++++++++++++++++++++++++++--- mm/slub.c | 65 +++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 109 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 2f5c16b1aacd..dc28432b5b9a 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -10,6 +10,7 @@ #include #include #include +#include enum stat_item { ALLOC_FASTPATH, /* Allocation from cpu slab */ @@ -204,13 +205,31 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size) void *kmem_cache_alloc(struct kmem_cache *, gfp_t); void *__kmalloc(size_t size, gfp_t flags); +#ifdef CONFIG_KMEMTRACE +extern void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags); +#else +static __always_inline void * +kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags) +{ + return kmem_cache_alloc(s, gfpflags); +} +#endif + static __always_inline void *kmalloc_large(size_t size, gfp_t flags) { - return (void *)__get_free_pages(flags | __GFP_COMP, get_order(size)); + unsigned int order = get_order(size); + void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order); + + kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret, + size, PAGE_SIZE << order, flags); + + return ret; } static __always_inline void *kmalloc(size_t size, gfp_t flags) { + void *ret; + if (__builtin_constant_p(size)) { if (size > PAGE_SIZE) return kmalloc_large(size, flags); @@ -221,7 +240,13 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) if (!s) return ZERO_SIZE_PTR; - return kmem_cache_alloc(s, flags); + ret = kmem_cache_alloc_notrace(s, flags); + + kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, + _THIS_IP_, ret, + size, s->size, flags); + + return ret; } } return __kmalloc(size, flags); @@ -231,8 +256,24 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) void *__kmalloc_node(size_t size, gfp_t flags, int node); void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); +#ifdef CONFIG_KMEMTRACE +extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *s, + gfp_t gfpflags, + int node); +#else +static __always_inline void * +kmem_cache_alloc_node_notrace(struct kmem_cache *s, + gfp_t gfpflags, + int node) +{ + return kmem_cache_alloc_node(s, gfpflags, node); +} +#endif + static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { + void *ret; + if (__builtin_constant_p(size) && size <= PAGE_SIZE && !(flags & SLUB_DMA)) { struct kmem_cache *s = kmalloc_slab(size); @@ -240,7 +281,13 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) if (!s) return ZERO_SIZE_PTR; - return kmem_cache_alloc_node(s, flags, node); + ret = kmem_cache_alloc_node_notrace(s, flags, node); + + kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, + _THIS_IP_, ret, + size, s->size, flags, node); + + return ret; } return __kmalloc_node(size, flags, node); } diff --git a/mm/slub.c b/mm/slub.c index 06da86654875..4c48a0146afd 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -24,6 +24,7 @@ #include #include #include +#include /* * Lock order: @@ -1613,18 +1614,46 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) { - return slab_alloc(s, gfpflags, -1, _RET_IP_); + void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_); + + kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret, + s->objsize, s->size, gfpflags); + + return ret; } EXPORT_SYMBOL(kmem_cache_alloc); +#ifdef CONFIG_KMEMTRACE +void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags) +{ + return slab_alloc(s, gfpflags, -1, _RET_IP_); +} +EXPORT_SYMBOL(kmem_cache_alloc_notrace); +#endif + #ifdef CONFIG_NUMA void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) { - return slab_alloc(s, gfpflags, node, _RET_IP_); + void *ret = slab_alloc(s, gfpflags, node, _RET_IP_); + + kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret, + s->objsize, s->size, gfpflags, node); + + return ret; } EXPORT_SYMBOL(kmem_cache_alloc_node); #endif +#ifdef CONFIG_KMEMTRACE +void *kmem_cache_alloc_node_notrace(struct kmem_cache *s, + gfp_t gfpflags, + int node) +{ + return slab_alloc(s, gfpflags, node, _RET_IP_); +} +EXPORT_SYMBOL(kmem_cache_alloc_node_notrace); +#endif + /* * Slow patch handling. This may still be called frequently since objects * have a longer lifetime than the cpu slabs in most processing loads. @@ -1732,6 +1761,8 @@ void kmem_cache_free(struct kmem_cache *s, void *x) page = virt_to_head_page(x); slab_free(s, page, x, _RET_IP_); + + kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, x); } EXPORT_SYMBOL(kmem_cache_free); @@ -2650,6 +2681,7 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags) void *__kmalloc(size_t size, gfp_t flags) { struct kmem_cache *s; + void *ret; if (unlikely(size > PAGE_SIZE)) return kmalloc_large(size, flags); @@ -2659,7 +2691,12 @@ void *__kmalloc(size_t size, gfp_t flags) if (unlikely(ZERO_OR_NULL_PTR(s))) return s; - return slab_alloc(s, flags, -1, _RET_IP_); + ret = slab_alloc(s, flags, -1, _RET_IP_); + + kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret, + size, s->size, flags); + + return ret; } EXPORT_SYMBOL(__kmalloc); @@ -2678,16 +2715,30 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node) void *__kmalloc_node(size_t size, gfp_t flags, int node) { struct kmem_cache *s; + void *ret; - if (unlikely(size > PAGE_SIZE)) - return kmalloc_large_node(size, flags, node); + if (unlikely(size > PAGE_SIZE)) { + ret = kmalloc_large_node(size, flags, node); + + kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, + _RET_IP_, ret, + size, PAGE_SIZE << get_order(size), + flags, node); + + return ret; + } s = get_slab(size, flags); if (unlikely(ZERO_OR_NULL_PTR(s))) return s; - return slab_alloc(s, flags, node, _RET_IP_); + ret = slab_alloc(s, flags, node, _RET_IP_); + + kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret, + size, s->size, flags, node); + + return ret; } EXPORT_SYMBOL(__kmalloc_node); #endif @@ -2745,6 +2796,8 @@ void kfree(const void *x) return; } slab_free(page->slab, page, object, _RET_IP_); + + kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, x); } EXPORT_SYMBOL(kfree); -- cgit v1.2.3 From 73cd6af0413225b0ada8b8881c3e0cfd26506dfa Mon Sep 17 00:00:00 2001 From: Eduard - Gabriel Munteanu Date: Tue, 19 Aug 2008 20:43:24 +0300 Subject: kmemtrace: Better alternative to "kmemtrace: fix printk format warnings". Fix the problem "kmemtrace: fix printk format warnings" attempted to fix, but resulted in marker-probe format mismatch warnings. Instead of carrying size_t into probes, we get rid of it by casting to unsigned long, just as we did with gfp_t. This way, we don't need to change marker format strings and we don't have to rely on other format specifiers like "%zu", making for consistent use of more generic data types (since there are no format specifiers for gfp_t, for example). Signed-off-by: Eduard - Gabriel Munteanu Signed-off-by: Pekka Enberg --- include/linux/kmemtrace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h index 2c332010cb4e..5bea8ead6a6b 100644 --- a/include/linux/kmemtrace.h +++ b/include/linux/kmemtrace.h @@ -33,7 +33,8 @@ static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, trace_mark(kmemtrace_alloc, "type_id %d call_site %lu ptr %lu " "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d", type_id, call_site, (unsigned long) ptr, - bytes_req, bytes_alloc, (unsigned long) gfp_flags, node); + (unsigned long) bytes_req, (unsigned long) bytes_alloc, + (unsigned long) gfp_flags, node); } static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id, -- cgit v1.2.3 From 36994e58a48fb8f9651c7dc845a6de298aba5bfc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 29 Dec 2008 13:42:23 -0800 Subject: tracing/kmemtrace: normalize the raw tracer event to the unified tracing API Impact: new tracer plugin This patch adapts kmemtrace raw events tracing to the unified tracing API. To enable and use this tracer, just do the following: echo kmemtrace > /debugfs/tracing/current_tracer cat /debugfs/tracing/trace You will have the following output: # tracer: kmemtrace # # # ALLOC TYPE REQ GIVEN FLAGS POINTER NODE CALLER # FREE | | | | | | | | # | type_id 1 call_site 18446744071565527833 ptr 18446612134395152256 type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1 type_id 1 call_site 18446744071565585534 ptr 18446612134405955584 type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1 type_id 0 call_site 18446744071565636711 ptr 18446612134345164672 bytes_req 240 bytes_alloc 240 gfp_flags 208 node -1 type_id 1 call_site 18446744071565585534 ptr 18446612134405955584 type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1 type_id 0 call_site 18446744071565636711 ptr 18446612134345164912 bytes_req 240 bytes_alloc 240 gfp_flags 208 node -1 type_id 1 call_site 18446744071565585534 ptr 18446612134405955584 type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1 type_id 0 call_site 18446744071565636711 ptr 18446612134345165152 bytes_req 240 bytes_alloc 240 gfp_flags 208 node -1 type_id 0 call_site 18446744071566144042 ptr 18446612134346191680 bytes_req 1304 bytes_alloc 1312 gfp_flags 208 node -1 type_id 1 call_site 18446744071565585534 ptr 18446612134405955584 type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1 type_id 1 call_site 18446744071565585534 ptr 18446612134405955584 That was to stay backward compatible with the format output produced in inux/tracepoint.h. This is the default ouput, but note that I tried something else. If you change an option: echo kmem_minimalistic > /debugfs/trace_options and then cat /debugfs/trace, you will have the following output: # tracer: kmemtrace # # # ALLOC TYPE REQ GIVEN FLAGS POINTER NODE CALLER # FREE | | | | | | | | # | - C 0xffff88007c088780 file_free_rcu + K 4096 4096 000000d0 0xffff88007cad6000 -1 getname - C 0xffff88007cad6000 putname + K 4096 4096 000000d0 0xffff88007cad6000 -1 getname + K 240 240 000000d0 0xffff8800790dc780 -1 d_alloc - C 0xffff88007cad6000 putname + K 4096 4096 000000d0 0xffff88007cad6000 -1 getname + K 240 240 000000d0 0xffff8800790dc870 -1 d_alloc - C 0xffff88007cad6000 putname + K 4096 4096 000000d0 0xffff88007cad6000 -1 getname + K 240 240 000000d0 0xffff8800790dc960 -1 d_alloc + K 1304 1312 000000d0 0xffff8800791d7340 -1 reiserfs_alloc_inode - C 0xffff88007cad6000 putname + K 4096 4096 000000d0 0xffff88007cad6000 -1 getname - C 0xffff88007cad6000 putname + K 992 1000 000000d0 0xffff880079045b58 -1 alloc_inode + K 768 1024 000080d0 0xffff88007c096400 -1 alloc_pipe_info + K 240 240 000000d0 0xffff8800790dca50 -1 d_alloc + K 272 320 000080d0 0xffff88007c088780 -1 get_empty_filp + K 272 320 000080d0 0xffff88007c088000 -1 get_empty_filp Yeah I shall confess kmem_minimalistic should be: kmem_alternative. Whatever, I find it more readable but this a personal opinion of course. We can drop it if you want. On the ALLOC/FREE column, + means an allocation and - a free. On the type column, you have K = kmalloc, C = cache, P = page I would like the flags to be GFP_* strings but that would not be easy to not break the column with strings.... About the node...it seems to always be -1. I don't know why but that shouldn't be difficult to find. I moved linux/tracepoint.h to trace/tracepoint.h as well. I think that would be more easy to find the tracer headers if they are all in their common directory. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- include/linux/kmemtrace.h | 86 ------------ include/linux/slab_def.h | 2 +- include/linux/slub_def.h | 2 +- include/trace/kmemtrace.h | 75 ++++++++++ init/main.c | 2 +- kernel/trace/Kconfig | 22 +++ kernel/trace/Makefile | 1 + kernel/trace/kmemtrace.c | 343 ++++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace.h | 25 ++++ lib/Kconfig.debug | 20 --- mm/kmemtrace.c | 2 +- mm/slob.c | 2 +- mm/slub.c | 2 +- 13 files changed, 472 insertions(+), 112 deletions(-) delete mode 100644 include/linux/kmemtrace.h create mode 100644 include/trace/kmemtrace.h create mode 100644 kernel/trace/kmemtrace.c (limited to 'include/linux') diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h deleted file mode 100644 index 5bea8ead6a6b..000000000000 --- a/include/linux/kmemtrace.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (C) 2008 Eduard - Gabriel Munteanu - * - * This file is released under GPL version 2. - */ - -#ifndef _LINUX_KMEMTRACE_H -#define _LINUX_KMEMTRACE_H - -#ifdef __KERNEL__ - -#include -#include - -enum kmemtrace_type_id { - KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ - KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */ - KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */ -}; - -#ifdef CONFIG_KMEMTRACE - -extern void kmemtrace_init(void); - -static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node) -{ - trace_mark(kmemtrace_alloc, "type_id %d call_site %lu ptr %lu " - "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d", - type_id, call_site, (unsigned long) ptr, - (unsigned long) bytes_req, (unsigned long) bytes_alloc, - (unsigned long) gfp_flags, node); -} - -static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr) -{ - trace_mark(kmemtrace_free, "type_id %d call_site %lu ptr %lu", - type_id, call_site, (unsigned long) ptr); -} - -#else /* CONFIG_KMEMTRACE */ - -static inline void kmemtrace_init(void) -{ -} - -static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node) -{ -} - -static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr) -{ -} - -#endif /* CONFIG_KMEMTRACE */ - -static inline void kmemtrace_mark_alloc(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags) -{ - kmemtrace_mark_alloc_node(type_id, call_site, ptr, - bytes_req, bytes_alloc, gfp_flags, -1); -} - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_KMEMTRACE_H */ - diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 7555ce99f6d2..455f9affea9a 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -14,7 +14,7 @@ #include /* kmalloc_sizes.h needs PAGE_SIZE */ #include /* kmalloc_sizes.h needs L1_CACHE_BYTES */ #include -#include +#include /* Size description struct for general caches. */ struct cache_sizes { diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index dc28432b5b9a..6b657f7dcb2b 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -10,7 +10,7 @@ #include #include #include -#include +#include enum stat_item { ALLOC_FASTPATH, /* Allocation from cpu slab */ diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h new file mode 100644 index 000000000000..ad8b7857855a --- /dev/null +++ b/include/trace/kmemtrace.h @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2008 Eduard - Gabriel Munteanu + * + * This file is released under GPL version 2. + */ + +#ifndef _LINUX_KMEMTRACE_H +#define _LINUX_KMEMTRACE_H + +#ifdef __KERNEL__ + +#include +#include + +enum kmemtrace_type_id { + KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ + KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */ + KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */ +}; + +#ifdef CONFIG_KMEMTRACE + +extern void kmemtrace_init(void); + +extern void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node); + +extern void kmemtrace_mark_free(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr); + +#else /* CONFIG_KMEMTRACE */ + +static inline void kmemtrace_init(void) +{ +} + +static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node) +{ +} + +static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr) +{ +} + +#endif /* CONFIG_KMEMTRACE */ + +static inline void kmemtrace_mark_alloc(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags) +{ + kmemtrace_mark_alloc_node(type_id, call_site, ptr, + bytes_req, bytes_alloc, gfp_flags, -1); +} + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_KMEMTRACE_H */ + diff --git a/init/main.c b/init/main.c index 9711586aa7c9..beca7aaddb22 100644 --- a/init/main.c +++ b/init/main.c @@ -70,7 +70,7 @@ #include #include #include -#include +#include #ifdef CONFIG_X86_LOCAL_APIC #include diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e2a4ff6fc3a6..27fb74b06b3c 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -264,6 +264,28 @@ config HW_BRANCH_TRACER This tracer records all branches on the system in a circular buffer giving access to the last N branches for each cpu. +config KMEMTRACE + bool "Trace SLAB allocations" + select TRACING + depends on RELAY + help + kmemtrace provides tracing for slab allocator functions, such as + kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected + data is then fed to the userspace application in order to analyse + allocation hotspots, internal fragmentation and so on, making it + possible to see how well an allocator performs, as well as debug + and profile kernel code. + + This requires an userspace application to use. See + Documentation/vm/kmemtrace.txt for more information. + + Saying Y will make the kernel somewhat larger and slower. However, + if you disable kmemtrace at run-time or boot-time, the performance + impact is minimal (depending on the arch the kernel is built for). + + If unsure, say N. + + config DYNAMIC_FTRACE bool "enable/disable ftrace tracepoints dynamically" depends on FUNCTION_TRACER diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 349d5a93653f..513dc86b5dfa 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -33,5 +33,6 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o obj-$(CONFIG_POWER_TRACER) += trace_power.o +obj-$(CONFIG_KMEMTRACE) += kmemtrace.o libftrace-y := ftrace.o diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c new file mode 100644 index 000000000000..d69cbe3c2a4b --- /dev/null +++ b/kernel/trace/kmemtrace.c @@ -0,0 +1,343 @@ +/* + * Memory allocator tracing + * + * Copyright (C) 2008 Eduard - Gabriel Munteanu + * Copyright (C) 2008 Pekka Enberg + * Copyright (C) 2008 Frederic Weisbecker + */ + +#include +#include +#include +#include +#include + +#include "trace.h" +#include "trace_output.h" + +/* Select an alternative, minimalistic output than the original one */ +#define TRACE_KMEM_OPT_MINIMAL 0x1 + +static struct tracer_opt kmem_opts[] = { + /* Default disable the minimalistic output */ + { TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) }, + { } +}; + +static struct tracer_flags kmem_tracer_flags = { + .val = 0, + .opts = kmem_opts +}; + + +static bool kmem_tracing_enabled __read_mostly; +static struct trace_array *kmemtrace_array; + +static int kmem_trace_init(struct trace_array *tr) +{ + int cpu; + kmemtrace_array = tr; + + for_each_cpu_mask(cpu, cpu_possible_map) + tracing_reset(tr, cpu); + + kmem_tracing_enabled = true; + + return 0; +} + +static void kmem_trace_reset(struct trace_array *tr) +{ + kmem_tracing_enabled = false; +} + +static void kmemtrace_headers(struct seq_file *s) +{ + /* Don't need headers for the original kmemtrace output */ + if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)) + return; + + seq_printf(s, "#\n"); + seq_printf(s, "# ALLOC TYPE REQ GIVEN FLAGS " + " POINTER NODE CALLER\n"); + seq_printf(s, "# FREE | | | | " + " | | | |\n"); + seq_printf(s, "# |\n\n"); +} + +/* + * The two following functions give the original output from kmemtrace, + * or something close to....perhaps they need some missing things + */ +static enum print_line_t +kmemtrace_print_alloc_original(struct trace_iterator *iter, + struct kmemtrace_alloc_entry *entry) +{ + struct trace_seq *s = &iter->seq; + int ret; + + /* Taken from the old linux/kmemtrace.h */ + ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu " + "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n", + entry->type_id, entry->call_site, (unsigned long) entry->ptr, + (unsigned long) entry->bytes_req, (unsigned long) entry->bytes_alloc, + (unsigned long) entry->gfp_flags, entry->node); + + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t +kmemtrace_print_free_original(struct trace_iterator *iter, + struct kmemtrace_free_entry *entry) +{ + struct trace_seq *s = &iter->seq; + int ret; + + /* Taken from the old linux/kmemtrace.h */ + ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu\n", + entry->type_id, entry->call_site, (unsigned long) entry->ptr); + + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + + +/* The two other following provide a more minimalistic output */ +static enum print_line_t +kmemtrace_print_alloc_compress(struct trace_iterator *iter, + struct kmemtrace_alloc_entry *entry) +{ + struct trace_seq *s = &iter->seq; + int ret; + + /* Alloc entry */ + ret = trace_seq_printf(s, " + "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Type */ + switch (entry->type_id) { + case KMEMTRACE_TYPE_KMALLOC: + ret = trace_seq_printf(s, "K "); + break; + case KMEMTRACE_TYPE_CACHE: + ret = trace_seq_printf(s, "C "); + break; + case KMEMTRACE_TYPE_PAGES: + ret = trace_seq_printf(s, "P "); + break; + default: + ret = trace_seq_printf(s, "? "); + } + + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Requested */ + ret = trace_seq_printf(s, "%4d ", entry->bytes_req); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Allocated */ + ret = trace_seq_printf(s, "%4d ", entry->bytes_alloc); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Flags + * TODO: would be better to see the name of the GFP flag names + */ + ret = trace_seq_printf(s, "%08x ", entry->gfp_flags); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Pointer to allocated */ + ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Node */ + ret = trace_seq_printf(s, "%4d ", entry->node); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Call site */ + ret = seq_print_ip_sym(s, entry->call_site, 0); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + if (!trace_seq_printf(s, "\n")) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t +kmemtrace_print_free_compress(struct trace_iterator *iter, + struct kmemtrace_free_entry *entry) +{ + struct trace_seq *s = &iter->seq; + int ret; + + /* Free entry */ + ret = trace_seq_printf(s, " - "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Type */ + switch (entry->type_id) { + case KMEMTRACE_TYPE_KMALLOC: + ret = trace_seq_printf(s, "K "); + break; + case KMEMTRACE_TYPE_CACHE: + ret = trace_seq_printf(s, "C "); + break; + case KMEMTRACE_TYPE_PAGES: + ret = trace_seq_printf(s, "P "); + break; + default: + ret = trace_seq_printf(s, "? "); + } + + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Skip requested/allocated/flags */ + ret = trace_seq_printf(s, " "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Pointer to allocated */ + ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Skip node */ + ret = trace_seq_printf(s, " "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Call site */ + ret = seq_print_ip_sym(s, entry->call_site, 0); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + if (!trace_seq_printf(s, "\n")) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter) +{ + struct trace_entry *entry = iter->ent; + + switch (entry->type) { + case TRACE_KMEM_ALLOC: { + struct kmemtrace_alloc_entry *field; + trace_assign_type(field, entry); + if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL) + return kmemtrace_print_alloc_compress(iter, field); + else + return kmemtrace_print_alloc_original(iter, field); + } + + case TRACE_KMEM_FREE: { + struct kmemtrace_free_entry *field; + trace_assign_type(field, entry); + if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL) + return kmemtrace_print_free_compress(iter, field); + else + return kmemtrace_print_free_original(iter, field); + } + + default: + return TRACE_TYPE_UNHANDLED; + } +} + +/* Trace allocations */ +void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node) +{ + struct ring_buffer_event *event; + struct kmemtrace_alloc_entry *entry; + struct trace_array *tr = kmemtrace_array; + unsigned long irq_flags; + + if (!kmem_tracing_enabled) + return; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, 0, 0); + + entry->ent.type = TRACE_KMEM_ALLOC; + entry->call_site = call_site; + entry->ptr = ptr; + entry->bytes_req = bytes_req; + entry->bytes_alloc = bytes_alloc; + entry->gfp_flags = gfp_flags; + entry->node = node; + + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + + trace_wake_up(); +} + +void kmemtrace_mark_free(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr) +{ + struct ring_buffer_event *event; + struct kmemtrace_free_entry *entry; + struct trace_array *tr = kmemtrace_array; + unsigned long irq_flags; + + if (!kmem_tracing_enabled) + return; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, 0, 0); + + entry->ent.type = TRACE_KMEM_FREE; + entry->type_id = type_id; + entry->call_site = call_site; + entry->ptr = ptr; + + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + + trace_wake_up(); +} + +static struct tracer kmem_tracer __read_mostly = { + .name = "kmemtrace", + .init = kmem_trace_init, + .reset = kmem_trace_reset, + .print_line = kmemtrace_print_line, + .print_header = kmemtrace_headers, + .flags = &kmem_tracer_flags +}; + +static int __init init_kmem_tracer(void) +{ + return register_tracer(&kmem_tracer); +} + +device_initcall(init_kmem_tracer); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cc7a4f864036..534505bb39b0 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -9,6 +9,7 @@ #include #include #include +#include enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -29,6 +30,8 @@ enum trace_type { TRACE_GRAPH_ENT, TRACE_USER_STACK, TRACE_HW_BRANCHES, + TRACE_KMEM_ALLOC, + TRACE_KMEM_FREE, TRACE_POWER, __TRACE_LAST_TYPE @@ -170,6 +173,24 @@ struct trace_power { struct power_trace state_data; }; +struct kmemtrace_alloc_entry { + struct trace_entry ent; + enum kmemtrace_type_id type_id; + unsigned long call_site; + const void *ptr; + size_t bytes_req; + size_t bytes_alloc; + gfp_t gfp_flags; + int node; +}; + +struct kmemtrace_free_entry { + struct trace_entry ent; + enum kmemtrace_type_id type_id; + unsigned long call_site; + const void *ptr; +}; + /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: @@ -280,6 +301,10 @@ extern void __ftrace_bad_type(void); TRACE_GRAPH_RET); \ IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\ IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ + IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \ + TRACE_KMEM_ALLOC); \ + IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ + TRACE_KMEM_FREE); \ __ftrace_bad_type(); \ } while (0) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b5417e23ba94..b0f239e443bc 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -803,26 +803,6 @@ config FIREWIRE_OHCI_REMOTE_DMA If unsure, say N. -config KMEMTRACE - bool "Kernel memory tracer (kmemtrace)" - depends on RELAY && DEBUG_FS && MARKERS - help - kmemtrace provides tracing for slab allocator functions, such as - kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected - data is then fed to the userspace application in order to analyse - allocation hotspots, internal fragmentation and so on, making it - possible to see how well an allocator performs, as well as debug - and profile kernel code. - - This requires an userspace application to use. See - Documentation/vm/kmemtrace.txt for more information. - - Saying Y will make the kernel somewhat larger and slower. However, - if you disable kmemtrace at run-time or boot-time, the performance - impact is minimal (depending on the arch the kernel is built for). - - If unsure, say N. - menuconfig BUILD_DOCSRC bool "Build targets in Documentation/ tree" depends on HEADERS_CHECK diff --git a/mm/kmemtrace.c b/mm/kmemtrace.c index 2a70a805027c..0573b5080cc4 100644 --- a/mm/kmemtrace.c +++ b/mm/kmemtrace.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #define KMEMTRACE_SUBBUF_SIZE 524288 #define KMEMTRACE_DEF_N_SUBBUFS 20 diff --git a/mm/slob.c b/mm/slob.c index 0f1a49f40690..4d1c0fc33b6b 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -65,7 +65,7 @@ #include #include #include -#include +#include #include /* diff --git a/mm/slub.c b/mm/slub.c index cc4001fee7ac..7bf8cf8ec082 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3 From f00012074b1a1a67d9c8603617bbbab267347ca6 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 9 Jan 2009 11:29:42 +0800 Subject: ftrace, ia64: Add macro for ftrace_caller Define FTRACE_ADDR. In IA64, a function pointer isn't a 'unsigned long' but a 'struct {unsigned long ip, unsigned long gp}'. Signed-off-by: Shaohua Li Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 4 ++++ kernel/trace/ftrace.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 677432b9cb7e..054721487574 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -126,6 +126,10 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); + +#ifndef FTRACE_ADDR +#define FTRACE_ADDR ((unsigned long)ftrace_caller) +#endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER extern void ftrace_graph_caller(void); extern int ftrace_enable_ftrace_graph_caller(void); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 76bb884b6e16..9f536108d3f3 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -455,7 +455,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) unsigned long ip, fl; unsigned long ftrace_addr; - ftrace_addr = (unsigned long)ftrace_caller; + ftrace_addr = (unsigned long)FTRACE_ADDR; ip = rec->ip; -- cgit v1.2.3 From b1818748b0cf9427e48acf9713295e829a0d715f Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Mon, 19 Jan 2009 10:31:01 +0100 Subject: x86, ftrace, hw-branch-tracer: dump trace on oops Dump the branch trace on an oops (based on ftrace_dump_on_oops). Signed-off-by: Markus Metzger Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 6 ++++++ include/linux/ftrace.h | 13 +++++++++++++ kernel/trace/trace.h | 1 - kernel/trace/trace_hw_branches.c | 29 ++++++++++++++++++++++------- 4 files changed, 41 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 6b1f6f6f8661..077c9ea655fc 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -195,6 +196,11 @@ unsigned __kprobes long oops_begin(void) int cpu; unsigned long flags; + /* notify the hw-branch tracer so it may disable tracing and + add the last trace to the trace buffer - + the earlier this happens, the more useful the trace. */ + trace_hw_branch_oops(); + oops_enter(); /* racy, but better than risking deadlock. */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 054721487574..9f7880d87c39 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -496,4 +496,17 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk) #endif /* CONFIG_TRACING */ + +#ifdef CONFIG_HW_BRANCH_TRACER + +void trace_hw_branch(u64 from, u64 to); +void trace_hw_branch_oops(void); + +#else /* CONFIG_HW_BRANCH_TRACER */ + +static inline void trace_hw_branch(u64 from, u64 to) {} +static inline void trace_hw_branch_oops(void) {} + +#endif /* CONFIG_HW_BRANCH_TRACER */ + #endif /* _LINUX_FTRACE_H */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 54b72781e920..b96037d970df 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -438,7 +438,6 @@ void trace_function(struct trace_array *tr, void trace_graph_return(struct ftrace_graph_ret *trace); int trace_graph_entry(struct ftrace_graph_ent *trace); -void trace_hw_branch(struct trace_array *tr, u64 from, u64 to); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index 398195397c75..e56df2c7d679 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -40,6 +40,7 @@ static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer); #define this_buffer per_cpu(buffer, smp_processor_id()) static int __read_mostly trace_hw_branches_enabled; +static struct trace_array *hw_branch_trace __read_mostly; /* @@ -128,6 +129,8 @@ static struct notifier_block bts_hotcpu_notifier __cpuinitdata = { static int bts_trace_init(struct trace_array *tr) { + hw_branch_trace = tr; + register_hotcpu_notifier(&bts_hotcpu_notifier); tracing_reset_online_cpus(tr); bts_trace_start(tr); @@ -170,8 +173,9 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) return TRACE_TYPE_UNHANDLED; } -void trace_hw_branch(struct trace_array *tr, u64 from, u64 to) +void trace_hw_branch(u64 from, u64 to) { + struct trace_array *tr = hw_branch_trace; struct ring_buffer_event *event; struct hw_branch_entry *entry; unsigned long irq1, irq2; @@ -204,8 +208,7 @@ void trace_hw_branch(struct trace_array *tr, u64 from, u64 to) local_irq_restore(irq1); } -static void trace_bts_at(struct trace_array *tr, - const struct bts_trace *trace, void *at) +static void trace_bts_at(const struct bts_trace *trace, void *at) { struct bts_struct bts; int err = 0; @@ -220,7 +223,7 @@ static void trace_bts_at(struct trace_array *tr, switch (bts.qualifier) { case BTS_BRANCH: - trace_hw_branch(tr, bts.variant.lbr.from, bts.variant.lbr.to); + trace_hw_branch(bts.variant.lbr.from, bts.variant.lbr.to); break; } } @@ -236,12 +239,15 @@ static void trace_bts_cpu(void *arg) const struct bts_trace *trace; unsigned char *at; - if (!this_tracer) + if (unlikely(!tr)) return; if (unlikely(atomic_read(&tr->data[raw_smp_processor_id()]->disabled))) return; + if (unlikely(!this_tracer)) + return; + ds_suspend_bts(this_tracer); trace = ds_read_bts(this_tracer); if (!trace) @@ -249,11 +255,11 @@ static void trace_bts_cpu(void *arg) for (at = trace->ds.top; (void *)at < trace->ds.end; at += trace->ds.size) - trace_bts_at(tr, trace, at); + trace_bts_at(trace, at); for (at = trace->ds.begin; (void *)at < trace->ds.top; at += trace->ds.size) - trace_bts_at(tr, trace, at); + trace_bts_at(trace, at); out: ds_resume_bts(this_tracer); @@ -268,6 +274,15 @@ static void trace_bts_prepare(struct trace_iterator *iter) mutex_unlock(&bts_tracer_mutex); } +void trace_hw_branch_oops(void) +{ + mutex_lock(&bts_tracer_mutex); + + trace_bts_cpu(hw_branch_trace); + + mutex_unlock(&bts_tracer_mutex); +} + struct tracer bts_tracer __read_mostly = { .name = "hw-branch-tracer", -- cgit v1.2.3 From 7e49fcce1bdadd723ae6a0b3b324c4daced61563 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 Jan 2009 19:01:40 -0500 Subject: trace, lockdep: manual preempt count adding for local_bh_disable Impact: fix to preempt trace triggering lockdep check_flag failure In local_bh_disable, the use of add_preempt_count causes the preempt tracer to start recording the time preemption is off. But because it already modified the preempt_count to show softirqs disabled, and before it called the lockdep code to handle this, it causes a state that lockdep can not handle. The preempt tracer will reset the ring buffer on start of a trace, and the ring buffer reset code does a spin_lock_irqsave. This calls into lockdep and lockdep will fail when it detects the invalid state of having softirqs disabled but the internal current->softirqs_enabled is still set. The fix is to manually add the SOFTIRQ_OFFSET to preempt count and call the preempt tracer code outside the lockdep critical area. Thanks to Peter Zijlstra for suggesting this solution. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 ++ kernel/sched.c | 8 ++++---- kernel/softirq.c | 13 ++++++++++++- 3 files changed, 18 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4cae9b81a1f8..33085b88f87b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -137,6 +137,8 @@ extern unsigned long nr_uninterruptible(void); extern unsigned long nr_active(void); extern unsigned long nr_iowait(void); +extern unsigned long get_parent_ip(unsigned long addr); + struct seq_file; struct cfs_rq; struct task_group; diff --git a/kernel/sched.c b/kernel/sched.c index 52bbf1c842a8..c154825ae753 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4399,10 +4399,7 @@ void scheduler_tick(void) #endif } -#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ - defined(CONFIG_PREEMPT_TRACER)) - -static inline unsigned long get_parent_ip(unsigned long addr) +unsigned long get_parent_ip(unsigned long addr) { if (in_lock_functions(addr)) { addr = CALLER_ADDR2; @@ -4412,6 +4409,9 @@ static inline unsigned long get_parent_ip(unsigned long addr) return addr; } +#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ + defined(CONFIG_PREEMPT_TRACER)) + void __kprobes add_preempt_count(int val) { #ifdef CONFIG_DEBUG_PREEMPT diff --git a/kernel/softirq.c b/kernel/softirq.c index bdbe9de9cd8d..6edfc2c11d99 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -79,13 +80,23 @@ static void __local_bh_disable(unsigned long ip) WARN_ON_ONCE(in_irq()); raw_local_irq_save(flags); - add_preempt_count(SOFTIRQ_OFFSET); + /* + * The preempt tracer hooks into add_preempt_count and will break + * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET + * is set and before current->softirq_enabled is cleared. + * We must manually increment preempt_count here and manually + * call the trace_preempt_off later. + */ + preempt_count() += SOFTIRQ_OFFSET; /* * Were softirqs turned off above: */ if (softirq_count() == SOFTIRQ_OFFSET) trace_softirqs_off(ip); raw_local_irq_restore(flags); + + if (preempt_count() == SOFTIRQ_OFFSET) + trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); } #else /* !CONFIG_TRACE_IRQFLAGS */ static inline void __local_bh_disable(unsigned long ip) -- cgit v1.2.3 From 9011262a37cb438f0fa9394b5e83840db8f9680a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 23 Jan 2009 12:06:23 -0200 Subject: ftrace: add ftrace_vprintk Impact: new helper function Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 8 ++++++++ kernel/trace/trace.c | 9 +++++++++ 2 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9f7880d87c39..7840e718c6c7 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -302,6 +302,9 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); extern int __ftrace_printk(unsigned long ip, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); +# define ftrace_vprintk(fmt, ap) __ftrace_printk(_THIS_IP_, fmt, ap) +extern int +__ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); extern void ftrace_dump(void); #else static inline void @@ -317,6 +320,11 @@ ftrace_printk(const char *fmt, ...) { return 0; } +static inline int +ftrace_vprintk(const char *fmt, va_list ap) +{ + return 0; +} static inline void ftrace_dump(void) { } #endif diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2129ab9d2a48..2f8ac1f008f5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2951,6 +2951,15 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...) } EXPORT_SYMBOL_GPL(__ftrace_printk); +int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) +{ + if (!(trace_flags & TRACE_ITER_PRINTK)) + return 0; + + return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); +} +EXPORT_SYMBOL_GPL(__ftrace_vprintk); + static int trace_panic_handler(struct notifier_block *this, unsigned long event, void *unused) { -- cgit v1.2.3 From 157f9c00e88529ed84bd7d581a40d411e5414cf0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 26 Jan 2009 15:00:56 -0200 Subject: tracing/blktrace: fix up checkpatch reported problems in ftrace plugin patch Also make sure sparse (make C=2 block/blktrace.o) is happy too. Reported-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- block/blktrace.c | 40 +++++++++++++++++++++++++--------------- fs/partitions/check.c | 5 +---- include/linux/blktrace_api.h | 5 +++++ 3 files changed, 31 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/block/blktrace.c b/block/blktrace.c index 630f167f8240..1b2267c798b6 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -37,7 +37,7 @@ static int __read_mostly blk_tracer_enabled; static struct tracer_opt blk_tracer_opts[] = { /* Default disable the minimalistic output */ - { TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC ) }, + { TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC) }, { } }; @@ -169,7 +169,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, pid_t pid; int cpu, pc = 0; - if (unlikely(bt->trace_state != Blktrace_running || !blk_tracer_enabled)) + if (unlikely(bt->trace_state != Blktrace_running || + !blk_tracer_enabled)) return; what |= ddir_act[rw & WRITE]; @@ -192,7 +193,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, sizeof(*t) + pdu_len, &flags); if (!event) return; - + ent = ring_buffer_event_data(event); t = (struct blk_io_trace *)ent; pc = preempt_count(); @@ -234,7 +235,7 @@ record_it: if (blk_tr) { ring_buffer_unlock_commit(blk_tr->buffer, event, flags); if (pid != 0 && - (blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC) == 0 && + !(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC) && (trace_flags & TRACE_ITER_STACKTRACE) != 0) __trace_stack(blk_tr, NULL, flags, 5, pc); trace_wake_up(); @@ -955,19 +956,27 @@ static void blk_unregister_tracepoints(void) static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) { - int i = 0; + int i = 0; - if (t->action & BLK_TC_DISCARD) rwbs[i++] = 'D'; - else if (t->action & BLK_TC_WRITE) rwbs[i++] = 'W'; - else if (t->bytes) rwbs[i++] = 'R'; - else rwbs[i++] = 'N'; + if (t->action & BLK_TC_DISCARD) + rwbs[i++] = 'D'; + else if (t->action & BLK_TC_WRITE) + rwbs[i++] = 'W'; + else if (t->bytes) + rwbs[i++] = 'R'; + else + rwbs[i++] = 'N'; - if (t->action & BLK_TC_AHEAD) rwbs[i++] = 'A'; - if (t->action & BLK_TC_BARRIER) rwbs[i++] = 'B'; - if (t->action & BLK_TC_SYNC) rwbs[i++] = 'S'; - if (t->action & BLK_TC_META) rwbs[i++] = 'M'; + if (t->action & BLK_TC_AHEAD) + rwbs[i++] = 'A'; + if (t->action & BLK_TC_BARRIER) + rwbs[i++] = 'B'; + if (t->action & BLK_TC_SYNC) + rwbs[i++] = 'S'; + if (t->action & BLK_TC_META) + rwbs[i++] = 'M'; - rwbs[i] = '\0'; + rwbs[i] = '\0'; } static inline @@ -1049,7 +1058,8 @@ static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent) return trace_seq_printf(s, "[%s]\n", cmd); } -static int blk_log_with_error(struct trace_seq *s, const struct trace_entry *ent) +static int blk_log_with_error(struct trace_seq *s, + const struct trace_entry *ent) { if (t_sec(ent)) return trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent), diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 01714efdc65a..8a17f7edcc74 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "check.h" @@ -268,10 +269,6 @@ ssize_t part_fail_store(struct device *dev, } #endif -#ifdef CONFIG_BLK_DEV_IO_TRACE -extern struct attribute_group blk_trace_attr_group; -#endif - static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 1dba3493d520..59b4b2e8ab67 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -142,6 +142,9 @@ struct blk_user_trace_setup { #ifdef __KERNEL__ #if defined(CONFIG_BLK_DEV_IO_TRACE) + +#include + struct blk_trace { int trace_state; struct rchan *rchan; @@ -192,6 +195,8 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, extern int blk_trace_startstop(struct request_queue *q, int start); extern int blk_trace_remove(struct request_queue *q); +extern struct attribute_group blk_trace_attr_group; + #else /* !CONFIG_BLK_DEV_IO_TRACE */ #define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) #define blk_trace_shutdown(q) do { } while (0) -- cgit v1.2.3 From 0a9877514c4fed10a70720293b37213dd172ee3e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 5 Feb 2009 16:12:56 -0200 Subject: ring_buffer: remove unused flags parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: API change, cleanup >From ring_buffer_{lock_reserve,unlock_commit}. $ codiff /tmp/vmlinux.before /tmp/vmlinux.after linux-2.6-tip/kernel/trace/trace.c: trace_vprintk | -14 trace_graph_return | -14 trace_graph_entry | -10 trace_function | -8 __ftrace_trace_stack | -8 ftrace_trace_userstack | -8 tracing_sched_switch_trace | -8 ftrace_trace_special | -12 tracing_sched_wakeup_trace | -8 9 functions changed, 90 bytes removed, diff: -90 linux-2.6-tip/block/blktrace.c: __blk_add_trace | -1 1 function changed, 1 bytes removed, diff: -1 /tmp/vmlinux.after: 10 functions changed, 91 bytes removed, diff: -91 Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Frédéric Weisbecker Signed-off-by: Ingo Molnar --- block/blktrace.c | 8 +++--- include/linux/ring_buffer.h | 9 +++---- kernel/trace/kmemtrace.c | 12 +++------ kernel/trace/ring_buffer.c | 9 ++----- kernel/trace/trace.c | 56 ++++++++++++++-------------------------- kernel/trace/trace_boot.c | 12 +++------ kernel/trace/trace_branch.c | 7 +++-- kernel/trace/trace_hw_branches.c | 6 ++--- kernel/trace/trace_mmiotrace.c | 12 +++------ kernel/trace/trace_power.c | 12 +++------ 10 files changed, 51 insertions(+), 92 deletions(-) (limited to 'include/linux') diff --git a/block/blktrace.c b/block/blktrace.c index d9d7146ee023..8e52f24cc8f9 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -165,7 +165,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, struct task_struct *tsk = current; struct ring_buffer_event *event = NULL; struct blk_io_trace *t; - unsigned long flags; + unsigned long flags = 0; unsigned long *sequence; pid_t pid; int cpu, pc = 0; @@ -191,7 +191,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, tracing_record_cmdline(current); event = ring_buffer_lock_reserve(blk_tr->buffer, - sizeof(*t) + pdu_len, &flags); + sizeof(*t) + pdu_len); if (!event) return; @@ -241,11 +241,11 @@ record_it: memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); if (blk_tr) { - ring_buffer_unlock_commit(blk_tr->buffer, event, flags); + ring_buffer_unlock_commit(blk_tr->buffer, event); if (pid != 0 && !(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC) && (trace_flags & TRACE_ITER_STACKTRACE) != 0) - __trace_stack(blk_tr, flags, 5, pc); + __trace_stack(blk_tr, 0, 5, pc); trace_wake_up(); return; } diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index b3b359660082..3110d92e7d81 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -74,13 +74,10 @@ void ring_buffer_free(struct ring_buffer *buffer); int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size); -struct ring_buffer_event * -ring_buffer_lock_reserve(struct ring_buffer *buffer, - unsigned long length, - unsigned long *flags); +struct ring_buffer_event *ring_buffer_lock_reserve(struct ring_buffer *buffer, + unsigned long length); int ring_buffer_unlock_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags); + struct ring_buffer_event *event); int ring_buffer_write(struct ring_buffer *buffer, unsigned long length, void *data); diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c index f04c0625f1cd..256749d1032a 100644 --- a/kernel/trace/kmemtrace.c +++ b/kernel/trace/kmemtrace.c @@ -272,13 +272,11 @@ void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, struct ring_buffer_event *event; struct kmemtrace_alloc_entry *entry; struct trace_array *tr = kmemtrace_array; - unsigned long irq_flags; if (!kmem_tracing_enabled) return; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); if (!event) return; entry = ring_buffer_event_data(event); @@ -292,7 +290,7 @@ void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, entry->gfp_flags = gfp_flags; entry->node = node; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); trace_wake_up(); } @@ -305,13 +303,11 @@ void kmemtrace_mark_free(enum kmemtrace_type_id type_id, struct ring_buffer_event *event; struct kmemtrace_free_entry *entry; struct trace_array *tr = kmemtrace_array; - unsigned long irq_flags; if (!kmem_tracing_enabled) return; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); if (!event) return; entry = ring_buffer_event_data(event); @@ -322,7 +318,7 @@ void kmemtrace_mark_free(enum kmemtrace_type_id type_id, entry->call_site = call_site; entry->ptr = ptr; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); trace_wake_up(); } diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b36d7374ceef..aee76b3eeed2 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1257,7 +1257,6 @@ static DEFINE_PER_CPU(int, rb_need_resched); * ring_buffer_lock_reserve - reserve a part of the buffer * @buffer: the ring buffer to reserve from * @length: the length of the data to reserve (excluding event header) - * @flags: a pointer to save the interrupt flags * * Returns a reseverd event on the ring buffer to copy directly to. * The user of this interface will need to get the body to write into @@ -1270,9 +1269,7 @@ static DEFINE_PER_CPU(int, rb_need_resched); * If NULL is returned, then nothing has been allocated or locked. */ struct ring_buffer_event * -ring_buffer_lock_reserve(struct ring_buffer *buffer, - unsigned long length, - unsigned long *flags) +ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; @@ -1339,15 +1336,13 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, * ring_buffer_unlock_commit - commit a reserved * @buffer: The buffer to commit to * @event: The event pointer to commit. - * @flags: the interrupt flags received from ring_buffer_lock_reserve. * * This commits the data to the ring buffer, and releases any locks held. * * Must be paired with ring_buffer_lock_reserve. */ int ring_buffer_unlock_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags) + struct ring_buffer_event *event) { struct ring_buffer_per_cpu *cpu_buffer; int cpu = raw_smp_processor_id(); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3536ef41575d..eb453a238a6f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -783,14 +783,12 @@ trace_function(struct trace_array *tr, { struct ring_buffer_event *event; struct ftrace_entry *entry; - unsigned long irq_flags; /* If we are reading the ring buffer, don't trace */ if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) return; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); if (!event) return; entry = ring_buffer_event_data(event); @@ -798,7 +796,7 @@ trace_function(struct trace_array *tr, entry->ent.type = TRACE_FN; entry->ip = ip; entry->parent_ip = parent_ip; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); } #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -809,20 +807,18 @@ static void __trace_graph_entry(struct trace_array *tr, { struct ring_buffer_event *event; struct ftrace_graph_ent_entry *entry; - unsigned long irq_flags; if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) return; - event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry)); if (!event) return; entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, flags, pc); entry->ent.type = TRACE_GRAPH_ENT; entry->graph_ent = *trace; - ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); + ring_buffer_unlock_commit(global_trace.buffer, event); } static void __trace_graph_return(struct trace_array *tr, @@ -832,20 +828,18 @@ static void __trace_graph_return(struct trace_array *tr, { struct ring_buffer_event *event; struct ftrace_graph_ret_entry *entry; - unsigned long irq_flags; if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) return; - event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry)); if (!event) return; entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, flags, pc); entry->ent.type = TRACE_GRAPH_RET; entry->ret = *trace; - ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); + ring_buffer_unlock_commit(global_trace.buffer, event); } #endif @@ -866,10 +860,8 @@ static void __ftrace_trace_stack(struct trace_array *tr, struct ring_buffer_event *event; struct stack_entry *entry; struct stack_trace trace; - unsigned long irq_flags; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); if (!event) return; entry = ring_buffer_event_data(event); @@ -884,7 +876,7 @@ static void __ftrace_trace_stack(struct trace_array *tr, trace.entries = entry->caller; save_stack_trace(&trace); - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); #endif } @@ -912,13 +904,11 @@ static void ftrace_trace_userstack(struct trace_array *tr, struct ring_buffer_event *event; struct userstack_entry *entry; struct stack_trace trace; - unsigned long irq_flags; if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) return; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); if (!event) return; entry = ring_buffer_event_data(event); @@ -933,7 +923,7 @@ static void ftrace_trace_userstack(struct trace_array *tr, trace.entries = entry->caller; save_stack_trace_user(&trace); - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); #endif } @@ -950,10 +940,8 @@ ftrace_trace_special(void *__tr, struct ring_buffer_event *event; struct trace_array *tr = __tr; struct special_entry *entry; - unsigned long irq_flags; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); if (!event) return; entry = ring_buffer_event_data(event); @@ -962,9 +950,9 @@ ftrace_trace_special(void *__tr, entry->arg1 = arg1; entry->arg2 = arg2; entry->arg3 = arg3; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - ftrace_trace_stack(tr, irq_flags, 4, pc); - ftrace_trace_userstack(tr, irq_flags, pc); + ring_buffer_unlock_commit(tr->buffer, event); + ftrace_trace_stack(tr, 0, 4, pc); + ftrace_trace_userstack(tr, 0, pc); trace_wake_up(); } @@ -984,10 +972,8 @@ tracing_sched_switch_trace(struct trace_array *tr, { struct ring_buffer_event *event; struct ctx_switch_entry *entry; - unsigned long irq_flags; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); if (!event) return; entry = ring_buffer_event_data(event); @@ -1000,7 +986,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_prio = next->prio; entry->next_state = next->state; entry->next_cpu = task_cpu(next); - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); ftrace_trace_stack(tr, flags, 5, pc); ftrace_trace_userstack(tr, flags, pc); } @@ -1013,10 +999,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr, { struct ring_buffer_event *event; struct ctx_switch_entry *entry; - unsigned long irq_flags; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); if (!event) return; entry = ring_buffer_event_data(event); @@ -1029,7 +1013,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_prio = wakee->prio; entry->next_state = wakee->state; entry->next_cpu = task_cpu(wakee); - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); ftrace_trace_stack(tr, flags, 6, pc); ftrace_trace_userstack(tr, flags, pc); @@ -2841,7 +2825,7 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) trace_buf[len] = 0; size = sizeof(*entry) + len + 1; - event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, size); if (!event) goto out_unlock; entry = ring_buffer_event_data(event); @@ -2852,7 +2836,7 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) memcpy(&entry->buf, trace_buf, len); entry->buf[len] = 0; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); out_unlock: spin_unlock_irqrestore(&trace_buf_lock, irq_flags); diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 1f07895977a0..4e08debf662d 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -132,7 +132,6 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { struct ring_buffer_event *event; struct trace_boot_call *entry; - unsigned long irq_flags; struct trace_array *tr = boot_trace; if (!tr || !pre_initcalls_finished) @@ -144,15 +143,14 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) sprint_symbol(bt->func, (unsigned long)fn); preempt_disable(); - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); if (!event) goto out; entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, 0, 0); entry->ent.type = TRACE_BOOT_CALL; entry->boot_call = *bt; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); trace_wake_up(); @@ -164,7 +162,6 @@ void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) { struct ring_buffer_event *event; struct trace_boot_ret *entry; - unsigned long irq_flags; struct trace_array *tr = boot_trace; if (!tr || !pre_initcalls_finished) @@ -173,15 +170,14 @@ void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) sprint_symbol(bt->func, (unsigned long)fn); preempt_disable(); - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); if (!event) goto out; entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, 0, 0); entry->ent.type = TRACE_BOOT_RET; entry->boot_ret = *bt; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); trace_wake_up(); diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 027e83690615..770e52acfc10 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -33,7 +33,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) struct trace_array *tr = branch_tracer; struct ring_buffer_event *event; struct trace_branch *entry; - unsigned long flags, irq_flags; + unsigned long flags; int cpu, pc; const char *p; @@ -52,8 +52,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) goto out; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); if (!event) goto out; @@ -75,7 +74,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) entry->line = f->line; entry->correct = val == expect; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); out: atomic_dec(&tr->data[cpu]->disabled); diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index fff3545fc866..e720c001db2b 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -175,7 +175,7 @@ void trace_hw_branch(u64 from, u64 to) struct trace_array *tr = hw_branch_trace; struct ring_buffer_event *event; struct hw_branch_entry *entry; - unsigned long irq1, irq2; + unsigned long irq1; int cpu; if (unlikely(!tr)) @@ -189,7 +189,7 @@ void trace_hw_branch(u64 from, u64 to) if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) goto out; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq2); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); if (!event) goto out; entry = ring_buffer_event_data(event); @@ -198,7 +198,7 @@ void trace_hw_branch(u64 from, u64 to) entry->ent.cpu = cpu; entry->from = from; entry->to = to; - ring_buffer_unlock_commit(tr->buffer, event, irq2); + ring_buffer_unlock_commit(tr->buffer, event); out: atomic_dec(&tr->data[cpu]->disabled); diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index ec78e244242e..104ddebc11d1 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -307,10 +307,8 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, { struct ring_buffer_event *event; struct trace_mmiotrace_rw *entry; - unsigned long irq_flags; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); if (!event) { atomic_inc(&dropped_count); return; @@ -319,7 +317,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, tracing_generic_entry_update(&entry->ent, 0, preempt_count()); entry->ent.type = TRACE_MMIO_RW; entry->rw = *rw; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); trace_wake_up(); } @@ -337,10 +335,8 @@ static void __trace_mmiotrace_map(struct trace_array *tr, { struct ring_buffer_event *event; struct trace_mmiotrace_map *entry; - unsigned long irq_flags; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); if (!event) { atomic_inc(&dropped_count); return; @@ -349,7 +345,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr, tracing_generic_entry_update(&entry->ent, 0, preempt_count()); entry->ent.type = TRACE_MMIO_MAP; entry->map = *map; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); trace_wake_up(); } diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c index faa6ab7a1f5c..3b1a292d12d2 100644 --- a/kernel/trace/trace_power.c +++ b/kernel/trace/trace_power.c @@ -115,7 +115,6 @@ void trace_power_end(struct power_trace *it) struct ring_buffer_event *event; struct trace_power *entry; struct trace_array_cpu *data; - unsigned long irq_flags; struct trace_array *tr = power_trace; if (!trace_power_enabled) @@ -125,15 +124,14 @@ void trace_power_end(struct power_trace *it) it->end = ktime_get(); data = tr->data[smp_processor_id()]; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); if (!event) goto out; entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, 0, 0); entry->ent.type = TRACE_POWER; entry->state_data = *it; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); trace_wake_up(); @@ -148,7 +146,6 @@ void trace_power_mark(struct power_trace *it, unsigned int type, struct ring_buffer_event *event; struct trace_power *entry; struct trace_array_cpu *data; - unsigned long irq_flags; struct trace_array *tr = power_trace; if (!trace_power_enabled) @@ -162,15 +159,14 @@ void trace_power_mark(struct power_trace *it, unsigned int type, it->end = it->stamp; data = tr->data[smp_processor_id()]; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); if (!event) goto out; entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, 0, 0); entry->ent.type = TRACE_POWER; entry->state_data = *it; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); trace_wake_up(); -- cgit v1.2.3 From 78d904b46a72fcf15ea6a39672bbef92953876b5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 5 Feb 2009 18:43:07 -0500 Subject: ring-buffer: add NMI protection for spinlocks Impact: prevent deadlock in NMI The ring buffers are not yet totally lockless with writing to the buffer. When a writer crosses a page, it grabs a per cpu spinlock to protect against a reader. The spinlocks taken by a writer are not to protect against other writers, since a writer can only write to its own per cpu buffer. The spinlocks protect against readers that can touch any cpu buffer. The writers are made to be reentrant with the spinlocks disabling interrupts. The problem arises when an NMI writes to the buffer, and that write crosses a page boundary. If it grabs a spinlock, it can be racing with another writer (since disabling interrupts does not protect against NMIs) or with a reader on the same CPU. Luckily, most of the users are not reentrant and protects against this issue. But if a user of the ring buffer becomes reentrant (which is what the ring buffers do allow), if the NMI also writes to the ring buffer then we risk the chance of a deadlock. This patch moves the ftrace_nmi_enter called by nmi_enter() to the ring buffer code. It replaces the current ftrace_nmi_enter that is used by arch specific code to arch_ftrace_nmi_enter and updates the Kconfig to handle it. When an NMI is called, it will set a per cpu variable in the ring buffer code and will clear it when the NMI exits. If a write to the ring buffer crosses page boundaries inside an NMI, a trylock is used on the spin lock instead. If the spinlock fails to be acquired, then the entry is discarded. This bug appeared in the ftrace work in the RT tree, where event tracing is reentrant. This workaround solved the deadlocks that appeared there. Signed-off-by: Steven Rostedt --- arch/x86/Kconfig | 1 + arch/x86/kernel/ftrace.c | 8 ++++---- include/linux/ftrace_irq.h | 10 +++++++++- kernel/trace/Kconfig | 8 ++++++++ kernel/trace/ring_buffer.c | 48 ++++++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 68 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 73f7fe8fd4d1..a6be725cb049 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -34,6 +34,7 @@ config X86 select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACE_MCOUNT_TEST + select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE || FUNCTION_GRAPH_TRACER select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER select HAVE_ARCH_TRACEHOOK diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 4d33224c055f..4c683587055b 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -113,7 +113,7 @@ static void ftrace_mod_code(void) MCOUNT_INSN_SIZE); } -void ftrace_nmi_enter(void) +void arch_ftrace_nmi_enter(void) { atomic_inc(&in_nmi); /* Must have in_nmi seen before reading write flag */ @@ -124,7 +124,7 @@ void ftrace_nmi_enter(void) } } -void ftrace_nmi_exit(void) +void arch_ftrace_nmi_exit(void) { /* Finish all executions before clearing in_nmi */ smp_wmb(); @@ -376,12 +376,12 @@ int ftrace_disable_ftrace_graph_caller(void) */ static atomic_t in_nmi; -void ftrace_nmi_enter(void) +void arch_ftrace_nmi_enter(void) { atomic_inc(&in_nmi); } -void ftrace_nmi_exit(void) +void arch_ftrace_nmi_exit(void) { atomic_dec(&in_nmi); } diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h index 366a054d0b05..29de6779a963 100644 --- a/include/linux/ftrace_irq.h +++ b/include/linux/ftrace_irq.h @@ -2,7 +2,15 @@ #define _LINUX_FTRACE_IRQ_H -#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_GRAPH_TRACER) +#ifdef CONFIG_FTRACE_NMI_ENTER +extern void arch_ftrace_nmi_enter(void); +extern void arch_ftrace_nmi_exit(void); +#else +static inline void arch_ftrace_nmi_enter(void) { } +static inline void arch_ftrace_nmi_exit(void) { } +#endif + +#ifdef CONFIG_RING_BUFFER extern void ftrace_nmi_enter(void); extern void ftrace_nmi_exit(void); #else diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 28f2644484d9..25131a5d5e4f 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -9,6 +9,9 @@ config USER_STACKTRACE_SUPPORT config NOP_TRACER bool +config HAVE_FTRACE_NMI_ENTER + bool + config HAVE_FUNCTION_TRACER bool @@ -37,6 +40,11 @@ config TRACER_MAX_TRACE config RING_BUFFER bool +config FTRACE_NMI_ENTER + bool + depends on HAVE_FTRACE_NMI_ENTER + default y + config TRACING bool select DEBUG_FS diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b36d7374ceef..a60a6a852f42 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4,6 +4,7 @@ * Copyright (C) 2008 Steven Rostedt */ #include +#include #include #include #include @@ -18,6 +19,35 @@ #include "trace.h" +/* + * Since the write to the buffer is still not fully lockless, + * we must be careful with NMIs. The locks in the writers + * are taken when a write crosses to a new page. The locks + * protect against races with the readers (this will soon + * be fixed with a lockless solution). + * + * Because we can not protect against NMIs, and we want to + * keep traces reentrant, we need to manage what happens + * when we are in an NMI. + */ +static DEFINE_PER_CPU(int, rb_in_nmi); + +void ftrace_nmi_enter(void) +{ + __get_cpu_var(rb_in_nmi)++; + /* call arch specific handler too */ + arch_ftrace_nmi_enter(); +} + +void ftrace_nmi_exit(void) +{ + arch_ftrace_nmi_exit(); + __get_cpu_var(rb_in_nmi)--; + /* NMIs are not recursive */ + WARN_ON_ONCE(__get_cpu_var(rb_in_nmi)); +} + + /* * A fast way to enable or disable all ring buffers is to * call tracing_on or tracing_off. Turning off the ring buffers @@ -982,6 +1012,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer *buffer = cpu_buffer->buffer; struct ring_buffer_event *event; unsigned long flags; + bool lock_taken = false; commit_page = cpu_buffer->commit_page; /* we just need to protect against interrupts */ @@ -995,7 +1026,19 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *next_page = tail_page; local_irq_save(flags); - __raw_spin_lock(&cpu_buffer->lock); + /* + * NMIs can happen after we take the lock. + * If we are in an NMI, only take the lock + * if it is not already taken. Otherwise + * simply fail. + */ + if (unlikely(__get_cpu_var(rb_in_nmi))) { + if (!__raw_spin_trylock(&cpu_buffer->lock)) + goto out_unlock; + } else + __raw_spin_lock(&cpu_buffer->lock); + + lock_taken = true; rb_inc_page(cpu_buffer, &next_page); @@ -1097,7 +1140,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, if (tail <= BUF_PAGE_SIZE) local_set(&tail_page->write, tail); - __raw_spin_unlock(&cpu_buffer->lock); + if (likely(lock_taken)) + __raw_spin_unlock(&cpu_buffer->lock); local_irq_restore(flags); return NULL; } -- cgit v1.2.3 From d8b891a2db13c8ed296158d6f8c4e335896d0cef Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 5 Feb 2009 19:54:51 -0500 Subject: ring-buffer: allow tracing_off to be used in core kernel code tracing_off() is the fastest way to stop recording to the ring buffers. This may be used in places like panic and die, just before the ftrace_dump is called. This patch adds the appropriate CPP conditionals to make it a stub function when the ring buffer is not configured it. Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index b3b359660082..ac94c066f6e9 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -124,9 +124,18 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); u64 ring_buffer_time_stamp(int cpu); void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); +/* + * The below functions are fine to use outside the tracing facility. + */ +#ifdef CONFIG_RING_BUFFER void tracing_on(void); void tracing_off(void); void tracing_off_permanent(void); +#else +static inline void tracing_on(void) { } +static inline void tracing_off(void) { } +static inline void tracing_off_permanent(void) { } +#endif void *ring_buffer_alloc_read_page(struct ring_buffer *buffer); void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); -- cgit v1.2.3 From 375b38b4214f29109a393ab762d468054bf52354 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 6 Feb 2009 00:51:37 -0500 Subject: nmi: add generic nmi tracking state This code adds an in_nmi() macro that uses the current tasks preempt count to track when it is in NMI context. Other parts of the kernel can use this to determine if the context is in NMI context or not. This code was inspired by the -rt patch in_nmi version that was written by Peter Zijlstra, who borrowed that code from Mathieu Desnoyers. Reported-by: Andrew Morton Signed-off-by: Steven Rostedt --- include/linux/hardirq.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index f83288347dda..f3cf86e1465b 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -61,6 +61,12 @@ #error PREEMPT_ACTIVE is too low! #endif +#define NMI_OFFSET (PREEMPT_ACTIVE << 1) + +#if NMI_OFFSET >= 0x80000000 +#error PREEMPT_ACTIVE too high! +#endif + #define hardirq_count() (preempt_count() & HARDIRQ_MASK) #define softirq_count() (preempt_count() & SOFTIRQ_MASK) #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK)) @@ -73,6 +79,11 @@ #define in_softirq() (softirq_count()) #define in_interrupt() (irq_count()) +/* + * Are we in NMI context? + */ +#define in_nmi() (preempt_count() & NMI_OFFSET) + #if defined(CONFIG_PREEMPT) # define PREEMPT_INATOMIC_BASE kernel_locked() # define PREEMPT_CHECK_OFFSET 1 @@ -167,6 +178,8 @@ extern void irq_exit(void); #define nmi_enter() \ do { \ ftrace_nmi_enter(); \ + BUG_ON(in_nmi()); \ + add_preempt_count(NMI_OFFSET); \ lockdep_off(); \ rcu_nmi_enter(); \ __irq_enter(); \ @@ -177,6 +190,8 @@ extern void irq_exit(void); __irq_exit(); \ rcu_nmi_exit(); \ lockdep_on(); \ + BUG_ON(!in_nmi()); \ + sub_preempt_count(NMI_OFFSET); \ ftrace_nmi_exit(); \ } while (0) -- cgit v1.2.3 From a81bd80a0b0a405dc0483e2c428332d69da2c79f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 6 Feb 2009 01:45:16 -0500 Subject: ring-buffer: use generic version of in_nmi Impact: clean up Now that a generic in_nmi is available, this patch removes the special code in the ring_buffer and implements the in_nmi generic version instead. With this change, I was also able to rename the "arch_ftrace_nmi_enter" back to "ftrace_nmi_enter" and remove the code from the ring buffer. Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 4 ++-- include/linux/ftrace_irq.h | 8 -------- kernel/trace/ring_buffer.c | 43 +++++++++++++------------------------------ 3 files changed, 15 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 918073c6681b..d74d75e0952d 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -113,7 +113,7 @@ static void ftrace_mod_code(void) MCOUNT_INSN_SIZE); } -void arch_ftrace_nmi_enter(void) +void ftrace_nmi_enter(void) { atomic_inc(&nmi_running); /* Must have nmi_running seen before reading write flag */ @@ -124,7 +124,7 @@ void arch_ftrace_nmi_enter(void) } } -void arch_ftrace_nmi_exit(void) +void ftrace_nmi_exit(void) { /* Finish all executions before clearing nmi_running */ smp_wmb(); diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h index 29de6779a963..dca7bf8cffe2 100644 --- a/include/linux/ftrace_irq.h +++ b/include/linux/ftrace_irq.h @@ -3,14 +3,6 @@ #ifdef CONFIG_FTRACE_NMI_ENTER -extern void arch_ftrace_nmi_enter(void); -extern void arch_ftrace_nmi_exit(void); -#else -static inline void arch_ftrace_nmi_enter(void) { } -static inline void arch_ftrace_nmi_exit(void) { } -#endif - -#ifdef CONFIG_RING_BUFFER extern void ftrace_nmi_enter(void); extern void ftrace_nmi_exit(void); #else diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a60a6a852f42..5ee344417cd5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -19,35 +20,6 @@ #include "trace.h" -/* - * Since the write to the buffer is still not fully lockless, - * we must be careful with NMIs. The locks in the writers - * are taken when a write crosses to a new page. The locks - * protect against races with the readers (this will soon - * be fixed with a lockless solution). - * - * Because we can not protect against NMIs, and we want to - * keep traces reentrant, we need to manage what happens - * when we are in an NMI. - */ -static DEFINE_PER_CPU(int, rb_in_nmi); - -void ftrace_nmi_enter(void) -{ - __get_cpu_var(rb_in_nmi)++; - /* call arch specific handler too */ - arch_ftrace_nmi_enter(); -} - -void ftrace_nmi_exit(void) -{ - arch_ftrace_nmi_exit(); - __get_cpu_var(rb_in_nmi)--; - /* NMIs are not recursive */ - WARN_ON_ONCE(__get_cpu_var(rb_in_nmi)); -} - - /* * A fast way to enable or disable all ring buffers is to * call tracing_on or tracing_off. Turning off the ring buffers @@ -1027,12 +999,23 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, local_irq_save(flags); /* + * Since the write to the buffer is still not + * fully lockless, we must be careful with NMIs. + * The locks in the writers are taken when a write + * crosses to a new page. The locks protect against + * races with the readers (this will soon be fixed + * with a lockless solution). + * + * Because we can not protect against NMIs, and we + * want to keep traces reentrant, we need to manage + * what happens when we are in an NMI. + * * NMIs can happen after we take the lock. * If we are in an NMI, only take the lock * if it is not already taken. Otherwise * simply fail. */ - if (unlikely(__get_cpu_var(rb_in_nmi))) { + if (unlikely(in_nmi())) { if (!__raw_spin_trylock(&cpu_buffer->lock)) goto out_unlock; } else -- cgit v1.2.3 From 57794a9d48b63e34acbe63282628c9f029603308 Mon Sep 17 00:00:00 2001 From: Wenji Huang Date: Fri, 6 Feb 2009 17:33:27 +0800 Subject: trace: trivial fixes in comment typos. Impact: clean up Fixed several typos in the comments. Signed-off-by: Wenji Huang Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 2 +- kernel/trace/ftrace.c | 6 +++--- kernel/trace/trace.h | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 7840e718c6c7..5e302d636fc2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -140,7 +140,7 @@ static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; } #endif /** - * ftrace_make_nop - convert code into top + * ftrace_make_nop - convert code into nop * @mod: module structure if called by module load initialization * @rec: the mcount call site record * @addr: the address that the call site should be calling diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 68610031780b..1796e018fbff 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -465,7 +465,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) * it is not enabled then do nothing. * * If this record is not to be traced and - * it is enabled then disabled it. + * it is enabled then disable it. * */ if (rec->flags & FTRACE_FL_NOTRACE) { @@ -485,7 +485,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) return 0; - /* Record is not filtered and is not enabled do nothing */ + /* Record is not filtered or enabled, do nothing */ if (!fl) return 0; @@ -507,7 +507,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) } else { - /* if record is not enabled do nothing */ + /* if record is not enabled, do nothing */ if (!(rec->flags & FTRACE_FL_ENABLED)) return 0; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 5efc4c707f7e..f92aba52a894 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -616,12 +616,12 @@ extern struct tracer nop_trace; * preempt_enable (after a disable), a schedule might take place * causing an infinite recursion. * - * To prevent this, we read the need_recshed flag before + * To prevent this, we read the need_resched flag before * disabling preemption. When we want to enable preemption we * check the flag, if it is set, then we call preempt_enable_no_resched. * Otherwise, we call preempt_enable. * - * The rational for doing the above is that if need resched is set + * The rational for doing the above is that if need_resched is set * and we have yet to reschedule, we are either in an atomic location * (where we do not need to check for scheduling) or we are inside * the scheduler and do not want to resched. @@ -642,7 +642,7 @@ static inline int ftrace_preempt_disable(void) * * This is a scheduler safe way to enable preemption and not miss * any preemption checks. The disabled saved the state of preemption. - * If resched is set, then we were either inside an atomic or + * If resched is set, then we are either inside an atomic or * are inside the scheduler (we would have already scheduled * otherwise). In this case, we do not want to call normal * preempt_enable, but preempt_enable_no_resched instead. -- cgit v1.2.3 From 1292211058aaf872eeb2a0e2677d237916b4501f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 7 Feb 2009 22:16:12 +0100 Subject: tracing/power: move the power trace headers to a dedicated file Impact: cleanup Move the power tracer headers to trace/power.h to keep ftrace.h and power bits more easy to maintain as separated topics. Signed-off-by: Frederic Weisbecker Cc: Arjan van de Ven Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 2 +- arch/x86/kernel/process.c | 2 +- include/linux/ftrace.h | 30 ------------------------- include/trace/power.h | 35 ++++++++++++++++++++++++++++++ kernel/trace/trace.h | 1 + kernel/trace/trace_power.c | 2 +- 6 files changed, 39 insertions(+), 33 deletions(-) create mode 100644 include/trace/power.h (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 4b1c319d30c3..7ed925edf4d2 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e68bb9e30864..026819ffcb0c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 5e302d636fc2..106b7909d500 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -339,36 +339,6 @@ ftrace_init_module(struct module *mod, unsigned long *start, unsigned long *end) { } #endif -enum { - POWER_NONE = 0, - POWER_CSTATE = 1, - POWER_PSTATE = 2, -}; - -struct power_trace { -#ifdef CONFIG_POWER_TRACER - ktime_t stamp; - ktime_t end; - int type; - int state; -#endif -}; - -#ifdef CONFIG_POWER_TRACER -extern void trace_power_start(struct power_trace *it, unsigned int type, - unsigned int state); -extern void trace_power_mark(struct power_trace *it, unsigned int type, - unsigned int state); -extern void trace_power_end(struct power_trace *it); -#else -static inline void trace_power_start(struct power_trace *it, unsigned int type, - unsigned int state) { } -static inline void trace_power_mark(struct power_trace *it, unsigned int type, - unsigned int state) { } -static inline void trace_power_end(struct power_trace *it) { } -#endif - - /* * Structure that defines an entry function trace. */ diff --git a/include/trace/power.h b/include/trace/power.h new file mode 100644 index 000000000000..c7cefbcdaea4 --- /dev/null +++ b/include/trace/power.h @@ -0,0 +1,35 @@ +#ifndef _TRACE_POWER_H +#define _TRACE_POWER_H + +#include + +enum { + POWER_NONE = 0, + POWER_CSTATE = 1, + POWER_PSTATE = 2, +}; + +struct power_trace { +#ifdef CONFIG_POWER_TRACER + ktime_t stamp; + ktime_t end; + int type; + int state; +#endif +}; + +#ifdef CONFIG_POWER_TRACER +extern void trace_power_start(struct power_trace *it, unsigned int type, + unsigned int state); +extern void trace_power_mark(struct power_trace *it, unsigned int type, + unsigned int state); +extern void trace_power_end(struct power_trace *it); +#else +static inline void trace_power_start(struct power_trace *it, unsigned int type, + unsigned int state) { } +static inline void trace_power_mark(struct power_trace *it, unsigned int type, + unsigned int state) { } +static inline void trace_power_end(struct power_trace *it) { } +#endif + +#endif /* _TRACE_POWER_H */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index a011ec062225..1ecfb9d2b365 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -10,6 +10,7 @@ #include #include #include +#include enum trace_type { __TRACE_FIRST_TYPE = 0, diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c index bfc21f8079ab..b1d0d087d3a6 100644 --- a/kernel/trace/trace_power.c +++ b/kernel/trace/trace_power.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include -- cgit v1.2.3 From c3706f005c3aaf570e71f0f083fdbb59a5a9fa2e Mon Sep 17 00:00:00 2001 From: Wenji Huang Date: Tue, 10 Feb 2009 01:03:18 -0500 Subject: tracing: fix typos in comments Impact: clean up. Fix typos in the comments. Signed-off-by: Wenji Huang Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 2 +- kernel/trace/ring_buffer.c | 8 ++++---- kernel/trace/trace.c | 2 +- kernel/trace/trace_hw_branches.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 3c103d636da3..8e6646a54acf 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -8,7 +8,7 @@ struct ring_buffer; struct ring_buffer_iter; /* - * Don't reference this struct directly, use functions below. + * Don't refer to this struct directly, use functions below. */ struct ring_buffer_event { u32 type:2, len:3, time_delta:27; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 10d202ea06f3..fa64e1f003eb 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -91,7 +91,7 @@ EXPORT_SYMBOL_GPL(tracing_off); * tracing_off_permanent - permanently disable ring buffers * * This function, once called, will disable all ring buffers - * permanenty. + * permanently. */ void tracing_off_permanent(void) { @@ -210,7 +210,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); struct buffer_data_page { u64 time_stamp; /* page time stamp */ - local_t commit; /* write commited index */ + local_t commit; /* write committed index */ unsigned char data[]; /* data of buffer page */ }; @@ -260,7 +260,7 @@ struct ring_buffer_per_cpu { struct list_head pages; struct buffer_page *head_page; /* read from head */ struct buffer_page *tail_page; /* write to tail */ - struct buffer_page *commit_page; /* commited pages */ + struct buffer_page *commit_page; /* committed pages */ struct buffer_page *reader_page; unsigned long overrun; unsigned long entries; @@ -303,7 +303,7 @@ struct ring_buffer_iter { * check_pages - integrity check of buffer pages * @cpu_buffer: CPU buffer with pages to test * - * As a safty measure we check to make sure the data pages have not + * As a safety measure we check to make sure the data pages have not * been corrupted. */ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d89821283b47..d7c175a442df 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1963,7 +1963,7 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf, struct tracer_opt *trace_opts = current_trace->flags->opts; - /* calulate max size */ + /* calculate max size */ for (i = 0; trace_options[i]; i++) { len += strlen(trace_options[i]); len += 3; /* "no" and space */ diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index e3e7db61c067..0794dd33f27b 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -75,7 +75,7 @@ static void bts_trace_start(struct trace_array *tr) } /* - * Start tracing on the current cpu. + * Stop tracing on the current cpu. * The argument is ignored. * * pre: bts_tracer_mutex must be locked. -- cgit v1.2.3 From 5a5fb7dbe88dd57dc2bef0f3be9da991e789612d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 12 Feb 2009 10:53:37 -0500 Subject: preempt-count: force hardirq-count to max of 10 To add a bit in the preempt_count to be set when in NMI context, we found that some archs did not have enough bits to spare. This is due to the hardirq_count being a mask that can hold NR_IRQS. Some archs allow for over 16000 IRQs, and that would require a mask of 14 bits. The sofitrq mask is 8 bits and the preempt disable mask is also 8 bits. The PREEMP_ACTIVE bit is bit 30, and bit 31 would make the preempt_count (which is type int) a negative number. A negative preempt_count is a sign of failure. Add them up 14+8+8+1+1 you get 32 bits. No room for the NMI bit. But the hardirq_count is to track the number of nested IRQs, not the number of total IRQs. This originally took the paranoid approach of setting the max nesting to NR_IRQS. But when we have archs with over 1000 IRQs, it is not practical to think they will ever all nest on a single CPU. Not to mention that this would most definitely cause a stack overflow. This patch sets a max of 10 bits to be used for IRQ nesting. I did a 'git grep HARDIRQ' to examine all users of HARDIRQ_BITS and HARDIRQ_MASK, and found that making it a max of 10 would not hurt anyone. I did find that the m68k expected it to be 8 bits, so I allow for the archs to set the number to be less than 10. I removed the setting of HARDIRQ_BITS from the archs that set it to more than 10. This includes ALPHA, ia64 and avr32. This will always allow room for the NMI bit, and if we need to allow for NMI nesting, we have 4 bits to play with. Signed-off-by: Steven Rostedt --- arch/alpha/include/asm/hardirq.h | 13 ----------- arch/avr32/include/asm/hardirq.h | 11 --------- arch/ia64/include/asm/hardirq.h | 10 --------- include/linux/hardirq.h | 48 ++++++++++++++++++++-------------------- 4 files changed, 24 insertions(+), 58 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/include/asm/hardirq.h b/arch/alpha/include/asm/hardirq.h index d953e234daa8..88971460fa6c 100644 --- a/arch/alpha/include/asm/hardirq.h +++ b/arch/alpha/include/asm/hardirq.h @@ -14,17 +14,4 @@ typedef struct { void ack_bad_irq(unsigned int irq); -#define HARDIRQ_BITS 12 - -/* - * The hardirq mask has to be large enough to have - * space for potentially nestable IRQ sources in the system - * to nest on a single CPU. On Alpha, interrupts are masked at the CPU - * by IPL as well as at the system level. We only have 8 IPLs (UNIX PALcode) - * so we really only have 8 nestable IRQs, but allow some overhead - */ -#if (1 << HARDIRQ_BITS) < 16 -#error HARDIRQ_BITS is too low! -#endif - #endif /* _ALPHA_HARDIRQ_H */ diff --git a/arch/avr32/include/asm/hardirq.h b/arch/avr32/include/asm/hardirq.h index 267354356f60..015bc75ea798 100644 --- a/arch/avr32/include/asm/hardirq.h +++ b/arch/avr32/include/asm/hardirq.h @@ -20,15 +20,4 @@ void ack_bad_irq(unsigned int irq); #endif /* __ASSEMBLY__ */ -#define HARDIRQ_BITS 12 - -/* - * The hardirq mask has to be large enough to have - * space for potentially all IRQ sources in the system - * nesting on a single CPU: - */ -#if (1 << HARDIRQ_BITS) < NR_IRQS -# error HARDIRQ_BITS is too low! -#endif - #endif /* __ASM_AVR32_HARDIRQ_H */ diff --git a/arch/ia64/include/asm/hardirq.h b/arch/ia64/include/asm/hardirq.h index 140e495b8e0e..d514cd9edb49 100644 --- a/arch/ia64/include/asm/hardirq.h +++ b/arch/ia64/include/asm/hardirq.h @@ -20,16 +20,6 @@ #define local_softirq_pending() (local_cpu_data->softirq_pending) -#define HARDIRQ_BITS 14 - -/* - * The hardirq mask has to be large enough to have space for potentially all IRQ sources - * in the system nesting on a single CPU: - */ -#if (1 << HARDIRQ_BITS) < NR_IRQS -# error HARDIRQ_BITS is too low! -#endif - extern void __iomem *ipi_base_addr; void ack_bad_irq(unsigned int irq); diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index f3cf86e1465b..9841221f53f2 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -15,61 +15,61 @@ * - bits 0-7 are the preemption count (max preemption depth: 256) * - bits 8-15 are the softirq count (max # of softirqs: 256) * - * The hardirq count can be overridden per architecture, the default is: + * The hardirq count can in theory reach the same as NR_IRQS. + * In reality, the number of nested IRQS is limited to the stack + * size as well. For archs with over 1000 IRQS it is not practical + * to expect that they will all nest. We give a max of 10 bits for + * hardirq nesting. An arch may choose to give less than 10 bits. + * m68k expects it to be 8. * - * - bits 16-27 are the hardirq count (max # of hardirqs: 4096) - * - ( bit 28 is the PREEMPT_ACTIVE flag. ) + * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024) + * - bit 26 is the NMI_MASK + * - bit 28 is the PREEMPT_ACTIVE flag * * PREEMPT_MASK: 0x000000ff * SOFTIRQ_MASK: 0x0000ff00 - * HARDIRQ_MASK: 0x0fff0000 + * HARDIRQ_MASK: 0x03ff0000 + * NMI_MASK: 0x04000000 */ #define PREEMPT_BITS 8 #define SOFTIRQ_BITS 8 +#define NMI_BITS 1 -#ifndef HARDIRQ_BITS -#define HARDIRQ_BITS 12 +#define MAX_HARDIRQ_BITS 10 -#ifndef MAX_HARDIRQS_PER_CPU -#define MAX_HARDIRQS_PER_CPU NR_IRQS +#ifndef HARDIRQ_BITS +# define HARDIRQ_BITS MAX_HARDIRQ_BITS #endif -/* - * The hardirq mask has to be large enough to have space for potentially - * all IRQ sources in the system nesting on a single CPU. - */ -#if (1 << HARDIRQ_BITS) < MAX_HARDIRQS_PER_CPU -# error HARDIRQ_BITS is too low! -#endif +#if HARDIRQ_BITS > MAX_HARDIRQ_BITS +#error HARDIRQ_BITS too high! #endif #define PREEMPT_SHIFT 0 #define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) #define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) +#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS) #define __IRQ_MASK(x) ((1UL << (x))-1) #define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT) #define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) #define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) +#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT) #define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) #define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) +#define NMI_OFFSET (1UL << NMI_SHIFT) -#if PREEMPT_ACTIVE < (1 << (HARDIRQ_SHIFT + HARDIRQ_BITS)) +#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS)) #error PREEMPT_ACTIVE is too low! #endif -#define NMI_OFFSET (PREEMPT_ACTIVE << 1) - -#if NMI_OFFSET >= 0x80000000 -#error PREEMPT_ACTIVE too high! -#endif - #define hardirq_count() (preempt_count() & HARDIRQ_MASK) #define softirq_count() (preempt_count() & SOFTIRQ_MASK) -#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK)) +#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ + | NMI_MASK)) /* * Are we doing bottom half or hardware interrupt processing? @@ -82,7 +82,7 @@ /* * Are we in NMI context? */ -#define in_nmi() (preempt_count() & NMI_OFFSET) +#define in_nmi() (preempt_count() & NMI_MASK) #if defined(CONFIG_PREEMPT) # define PREEMPT_INATOMIC_BASE kernel_locked() -- cgit v1.2.3 From 2a7b8df04c11a70105c1abe67d006455d3bdc944 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 12 Feb 2009 14:16:46 -0500 Subject: sched: do not account for NMIs Impact: avoid corruption in system time accounting Martin Schwidefsky told me that there was an issue with NMIs and system accounting. The problem is that the accounting code is not reentrant, and if an NMI goes off after an interrupt it can corrupt the accounting. For now, the best we can do is to treat NMIs like SMIs and they are not accounted for. This patch changes nmi_enter to not call __irq_enter and to do the preempt-count and tracing calls directly. Signed-off-by: Steven Rostedt --- include/linux/hardirq.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 9841221f53f2..faa1cf848bcd 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -175,24 +175,24 @@ extern void irq_enter(void); */ extern void irq_exit(void); -#define nmi_enter() \ - do { \ - ftrace_nmi_enter(); \ - BUG_ON(in_nmi()); \ - add_preempt_count(NMI_OFFSET); \ - lockdep_off(); \ - rcu_nmi_enter(); \ - __irq_enter(); \ +#define nmi_enter() \ + do { \ + ftrace_nmi_enter(); \ + BUG_ON(in_nmi()); \ + add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ + lockdep_off(); \ + rcu_nmi_enter(); \ + trace_hardirq_enter(); \ } while (0) -#define nmi_exit() \ - do { \ - __irq_exit(); \ - rcu_nmi_exit(); \ - lockdep_on(); \ - BUG_ON(!in_nmi()); \ - sub_preempt_count(NMI_OFFSET); \ - ftrace_nmi_exit(); \ +#define nmi_exit() \ + do { \ + trace_hardirq_exit(); \ + rcu_nmi_exit(); \ + lockdep_on(); \ + BUG_ON(!in_nmi()); \ + sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ + ftrace_nmi_exit(); \ } while (0) #endif /* LINUX_HARDIRQ_H */ -- cgit v1.2.3 From f6180773d90595650e11de0118bb112018290915 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 14 Feb 2009 00:40:25 -0500 Subject: ftrace: add command interface for function selection Allow for other tracers to add their own commands for function selection. This interface gives a trace the ability to name a command for function selection. Right now it is pretty limited in what it offers, but this is a building step for more features. The :mod: command is converted to this interface and also serves as a template for other implementations. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 16 ++++++++ kernel/trace/ftrace.c | 106 ++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 111 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 106b7909d500..f0a0ecc63b5c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -95,6 +95,13 @@ stack_trace_sysctl(struct ctl_table *table, int write, loff_t *ppos); #endif +struct ftrace_func_command { + struct list_head list; + char *name; + int (*func)(char *func, char *cmd, + char *params, int enable); +}; + #ifdef CONFIG_DYNAMIC_FTRACE /* asm/ftrace.h must be defined for archs supporting dynamic ftrace */ #include @@ -119,6 +126,9 @@ struct dyn_ftrace { int ftrace_force_update(void); void ftrace_set_filter(unsigned char *buf, int len, int reset); +int register_ftrace_command(struct ftrace_func_command *cmd); +int unregister_ftrace_command(struct ftrace_func_command *cmd); + /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); extern int ftrace_dyn_arch_init(void *data); @@ -202,6 +212,12 @@ extern void ftrace_enable_daemon(void); # define ftrace_disable_daemon() do { } while (0) # define ftrace_enable_daemon() do { } while (0) static inline void ftrace_release(void *start, unsigned long size) { } +static inline int register_ftrace_command(struct ftrace_func_command *cmd) +{ +} +static inline int unregister_ftrace_command(char *cmd_name) +{ +} #endif /* CONFIG_DYNAMIC_FTRACE */ /* totally disable ftrace - can not re-enable after this */ diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 340f88b68d9e..45a44c402566 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1239,9 +1239,93 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable) spin_unlock(&ftrace_lock); } +/* + * We register the module command as a template to show others how + * to register the a command as well. + */ + +static int +ftrace_mod_callback(char *func, char *cmd, char *param, int enable) +{ + char *mod; + + /* + * cmd == 'mod' because we only registered this func + * for the 'mod' ftrace_func_command. + * But if you register one func with multiple commands, + * you can tell which command was used by the cmd + * parameter. + */ + + /* we must have a module name */ + if (!param) + return -EINVAL; + + mod = strsep(¶m, ":"); + if (!strlen(mod)) + return -EINVAL; + + ftrace_match_module_records(func, mod, enable); + return 0; +} + +static struct ftrace_func_command ftrace_mod_cmd = { + .name = "mod", + .func = ftrace_mod_callback, +}; + +static int __init ftrace_mod_cmd_init(void) +{ + return register_ftrace_command(&ftrace_mod_cmd); +} +device_initcall(ftrace_mod_cmd_init); + +static LIST_HEAD(ftrace_commands); +static DEFINE_MUTEX(ftrace_cmd_mutex); + +int register_ftrace_command(struct ftrace_func_command *cmd) +{ + struct ftrace_func_command *p; + int ret = 0; + + mutex_lock(&ftrace_cmd_mutex); + list_for_each_entry(p, &ftrace_commands, list) { + if (strcmp(cmd->name, p->name) == 0) { + ret = -EBUSY; + goto out_unlock; + } + } + list_add(&cmd->list, &ftrace_commands); + out_unlock: + mutex_unlock(&ftrace_cmd_mutex); + + return ret; +} + +int unregister_ftrace_command(struct ftrace_func_command *cmd) +{ + struct ftrace_func_command *p, *n; + int ret = -ENODEV; + + mutex_lock(&ftrace_cmd_mutex); + list_for_each_entry_safe(p, n, &ftrace_commands, list) { + if (strcmp(cmd->name, p->name) == 0) { + ret = 0; + list_del_init(&p->list); + goto out_unlock; + } + } + out_unlock: + mutex_unlock(&ftrace_cmd_mutex); + + return ret; +} + static int ftrace_process_regex(char *buff, int len, int enable) { - char *func, *mod, *command, *next = buff; + struct ftrace_func_command *p; + char *func, *command, *next = buff; + int ret = -EINVAL; func = strsep(&next, ":"); @@ -1250,21 +1334,21 @@ static int ftrace_process_regex(char *buff, int len, int enable) return 0; } - /* command fonud */ + /* command found */ command = strsep(&next, ":"); - if (strcmp(command, "mod") == 0) { - /* only match modules */ - if (!next) - return -EINVAL; - - mod = strsep(&next, ":"); - ftrace_match_module_records(func, mod, enable); - return 0; + mutex_lock(&ftrace_cmd_mutex); + list_for_each_entry(p, &ftrace_commands, list) { + if (strcmp(p->name, command) == 0) { + ret = p->func(func, command, next, enable); + goto out_unlock; + } } + out_unlock: + mutex_unlock(&ftrace_cmd_mutex); - return -EINVAL; + return ret; } static ssize_t -- cgit v1.2.3 From 59df055f1991c9fc0c71a9230663c39188f6972f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 14 Feb 2009 15:29:06 -0500 Subject: ftrace: trace different functions with a different tracer Impact: new feature Currently, the function tracer only gives you an ability to hook a tracer to all functions being traced. The dynamic function trace allows you to pick and choose which of those functions will be traced, but all functions being traced will call all tracers that registered with the function tracer. This patch adds a new feature that allows a tracer to hook to specific functions, even when all functions are being traced. It allows for different functions to call different tracer hooks. The way this is accomplished is by a special function that will hook to the function tracer and will set up a hash table knowing which tracer hook to call with which function. This is the most general and easiest method to accomplish this. Later, an arch may choose to supply their own method in changing the mcount call of a function to call a different tracer. But that will be an exercise for the future. To register a function: struct ftrace_hook_ops { void (*func)(unsigned long ip, unsigned long parent_ip, void **data); int (*callback)(unsigned long ip, void **data); void (*free)(void **data); }; int register_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops, void *data); glob is a simple glob to search for the functions to hook. ops is a pointer to the operations (listed below) data is the default data to be passed to the hook functions when traced ops: func is the hook function to call when the functions are traced callback is a callback function that is called when setting up the hash. That is, if the tracer needs to do something special for each function, that is being traced, and wants to give each function its own data. The address of the entry data is passed to this callback, so that the callback may wish to update the entry to whatever it would like. free is a callback for when the entry is freed. In case the tracer allocated any data, it is give the chance to free it. To unregister we have three functions: void unregister_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops, void *data) This will unregister all hooks that match glob, point to ops, and have its data matching data. (note, if glob is NULL, blank or '*', all functions will be tested). void unregister_ftrace_function_hook_func(char *glob, struct ftrace_hook_ops *ops) This will unregister all functions matching glob that has an entry pointing to ops. void unregister_ftrace_function_hook_all(char *glob) This simply unregisters all funcs. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 18 ++++ kernel/trace/ftrace.c | 247 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 265 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f0a0ecc63b5c..13918c4400ad 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -106,6 +106,24 @@ struct ftrace_func_command { /* asm/ftrace.h must be defined for archs supporting dynamic ftrace */ #include +struct ftrace_hook_ops { + void (*func)(unsigned long ip, + unsigned long parent_ip, + void **data); + int (*callback)(unsigned long ip, void **data); + void (*free)(void **data); +}; + +extern int +register_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops, + void *data); +extern void +unregister_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops, + void *data); +extern void +unregister_ftrace_function_hook_func(char *glob, struct ftrace_hook_ops *ops); +extern void unregister_ftrace_function_hook_all(char *glob); + enum { FTRACE_FL_FREE = (1 << 0), FTRACE_FL_FAILED = (1 << 1), diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 157d4f68b0e0..0b80e325f296 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -1245,6 +1246,252 @@ static int __init ftrace_mod_cmd_init(void) } device_initcall(ftrace_mod_cmd_init); +#define FTRACE_HASH_BITS 7 +#define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS) +static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly; + +struct ftrace_func_hook { + struct hlist_node node; + struct ftrace_hook_ops *ops; + unsigned long flags; + unsigned long ip; + void *data; + struct rcu_head rcu; +}; + +static void +function_trace_hook_call(unsigned long ip, unsigned long parent_ip) +{ + struct ftrace_func_hook *entry; + struct hlist_head *hhd; + struct hlist_node *n; + unsigned long key; + int resched; + + key = hash_long(ip, FTRACE_HASH_BITS); + + hhd = &ftrace_func_hash[key]; + + if (hlist_empty(hhd)) + return; + + /* + * Disable preemption for these calls to prevent a RCU grace + * period. This syncs the hash iteration and freeing of items + * on the hash. rcu_read_lock is too dangerous here. + */ + resched = ftrace_preempt_disable(); + hlist_for_each_entry_rcu(entry, n, hhd, node) { + if (entry->ip == ip) + entry->ops->func(ip, parent_ip, &entry->data); + } + ftrace_preempt_enable(resched); +} + +static struct ftrace_ops trace_hook_ops __read_mostly = +{ + .func = function_trace_hook_call, +}; + +static int ftrace_hook_registered; + +static void __enable_ftrace_function_hook(void) +{ + int i; + + if (ftrace_hook_registered) + return; + + for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { + struct hlist_head *hhd = &ftrace_func_hash[i]; + if (hhd->first) + break; + } + /* Nothing registered? */ + if (i == FTRACE_FUNC_HASHSIZE) + return; + + __register_ftrace_function(&trace_hook_ops); + ftrace_startup(0); + ftrace_hook_registered = 1; +} + +static void __disable_ftrace_function_hook(void) +{ + int i; + + if (!ftrace_hook_registered) + return; + + for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { + struct hlist_head *hhd = &ftrace_func_hash[i]; + if (hhd->first) + return; + } + + /* no more funcs left */ + __unregister_ftrace_function(&trace_hook_ops); + ftrace_shutdown(0); + ftrace_hook_registered = 0; +} + + +static void ftrace_free_entry_rcu(struct rcu_head *rhp) +{ + struct ftrace_func_hook *entry = + container_of(rhp, struct ftrace_func_hook, rcu); + + if (entry->ops->free) + entry->ops->free(&entry->data); + kfree(entry); +} + + +int +register_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops, + void *data) +{ + struct ftrace_func_hook *entry; + struct ftrace_page *pg; + struct dyn_ftrace *rec; + unsigned long key; + int type, len, not; + int count = 0; + char *search; + + type = ftrace_setup_glob(glob, strlen(glob), &search, ¬); + len = strlen(search); + + /* we do not support '!' for function hooks */ + if (WARN_ON(not)) + return -EINVAL; + + mutex_lock(&ftrace_lock); + do_for_each_ftrace_rec(pg, rec) { + + if (rec->flags & FTRACE_FL_FAILED) + continue; + + if (!ftrace_match_record(rec, search, len, type)) + continue; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + /* If we did not hook to any, then return error */ + if (!count) + count = -ENOMEM; + goto out_unlock; + } + + count++; + + entry->data = data; + + /* + * The caller might want to do something special + * for each function we find. We call the callback + * to give the caller an opportunity to do so. + */ + if (ops->callback) { + if (ops->callback(rec->ip, &entry->data) < 0) { + /* caller does not like this func */ + kfree(entry); + continue; + } + } + + entry->ops = ops; + entry->ip = rec->ip; + + key = hash_long(entry->ip, FTRACE_HASH_BITS); + hlist_add_head_rcu(&entry->node, &ftrace_func_hash[key]); + + } while_for_each_ftrace_rec(); + __enable_ftrace_function_hook(); + + out_unlock: + mutex_unlock(&ftrace_lock); + + return count; +} + +enum { + HOOK_TEST_FUNC = 1, + HOOK_TEST_DATA = 2 +}; + +static void +__unregister_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops, + void *data, int flags) +{ + struct ftrace_func_hook *entry; + struct hlist_node *n, *tmp; + char str[KSYM_SYMBOL_LEN]; + int type = MATCH_FULL; + int i, len = 0; + char *search; + + if (glob && (strcmp(glob, "*") || !strlen(glob))) + glob = NULL; + else { + int not; + + type = ftrace_setup_glob(glob, strlen(glob), &search, ¬); + len = strlen(search); + + /* we do not support '!' for function hooks */ + if (WARN_ON(not)) + return; + } + + mutex_lock(&ftrace_lock); + for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { + struct hlist_head *hhd = &ftrace_func_hash[i]; + + hlist_for_each_entry_safe(entry, n, tmp, hhd, node) { + + /* break up if statements for readability */ + if ((flags & HOOK_TEST_FUNC) && entry->ops != ops) + continue; + + if ((flags & HOOK_TEST_DATA) && entry->data != data) + continue; + + /* do this last, since it is the most expensive */ + if (glob) { + kallsyms_lookup(entry->ip, NULL, NULL, + NULL, str); + if (!ftrace_match(str, glob, len, type)) + continue; + } + + hlist_del(&entry->node); + call_rcu(&entry->rcu, ftrace_free_entry_rcu); + } + } + __disable_ftrace_function_hook(); + mutex_unlock(&ftrace_lock); +} + +void +unregister_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops, + void *data) +{ + __unregister_ftrace_function_hook(glob, ops, data, + HOOK_TEST_FUNC | HOOK_TEST_DATA); +} + +void +unregister_ftrace_function_hook_func(char *glob, struct ftrace_hook_ops *ops) +{ + __unregister_ftrace_function_hook(glob, ops, NULL, HOOK_TEST_FUNC); +} + +void unregister_ftrace_function_hook_all(char *glob) +{ + __unregister_ftrace_function_hook(glob, NULL, NULL, 0); +} + static LIST_HEAD(ftrace_commands); static DEFINE_MUTEX(ftrace_cmd_mutex); -- cgit v1.2.3 From 988ae9d6b2bc3ebdc1a488490250a6812f85e9d4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 14 Feb 2009 19:17:02 -0500 Subject: ring-buffer: add tracing_is_on to test if ring buffer is enabled This patch adds the tracing_is_on() interface to tell if the ring buffer is turned on or not. Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 2 ++ kernel/trace/ring_buffer.c | 9 +++++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 8e6646a54acf..f5e793d69bd3 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -128,10 +128,12 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); void tracing_on(void); void tracing_off(void); void tracing_off_permanent(void); +int tracing_is_on(void); #else static inline void tracing_on(void) { } static inline void tracing_off(void) { } static inline void tracing_off_permanent(void) { } +static inline int tracing_is_on(void) { return 0; } #endif void *ring_buffer_alloc_read_page(struct ring_buffer *buffer); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2b4626ce95d6..8f19f1aa42b0 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -98,6 +98,15 @@ void tracing_off_permanent(void) set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); } +/** + * tracing_is_on - show state of ring buffers enabled + */ +int tracing_is_on(void) +{ + return ring_buffer_flags == RB_BUFFERS_ON; +} +EXPORT_SYMBOL_GPL(tracing_is_on); + #include "trace.h" /* Up this if you want to test the TIME_EXTENTS and normalization */ -- cgit v1.2.3 From 809dcf29ce4e1723709910878e050bd187617e0e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 16 Feb 2009 23:06:01 -0500 Subject: ftrace: add pretty print to selected fuction traces This patch adds a call back for the tracers that have hooks to selected functions. This allows the tracer to show better output in the set_ftrace_filter file. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 6 ++++++ kernel/trace/ftrace.c | 3 +++ 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 13918c4400ad..b331e216d8a1 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -106,12 +106,18 @@ struct ftrace_func_command { /* asm/ftrace.h must be defined for archs supporting dynamic ftrace */ #include +struct seq_file; + struct ftrace_hook_ops { void (*func)(unsigned long ip, unsigned long parent_ip, void **data); int (*callback)(unsigned long ip, void **data); void (*free)(void **data); + int (*print)(struct seq_file *m, + unsigned long ip, + struct ftrace_hook_ops *ops, + void *data); }; extern int diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1e058848cddb..6533c1d20155 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -834,6 +834,9 @@ static int t_hash_show(struct seq_file *m, void *v) rec = hlist_entry(hnd, struct ftrace_func_hook, node); + if (rec->ops->print) + return rec->ops->print(m, rec->ip, rec->ops, rec->data); + kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); seq_printf(m, "%s:", str); -- cgit v1.2.3 From 97d0bb8dcd8c2812e1927cdb51d7b1f9c98352b5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 11:47:39 +0100 Subject: ftrace: fix !CONFIG_FTRACE [un_]register_ftrace_command() prototypes Impact: build fix Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b331e216d8a1..63281228ce3e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -238,9 +238,11 @@ extern void ftrace_enable_daemon(void); static inline void ftrace_release(void *start, unsigned long size) { } static inline int register_ftrace_command(struct ftrace_func_command *cmd) { + return -EINVAL; } static inline int unregister_ftrace_command(char *cmd_name) { + return -EINVAL; } #endif /* CONFIG_DYNAMIC_FTRACE */ -- cgit v1.2.3 From b6887d7916e44c1d8913084fb6aa5004d9473f1a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 17 Feb 2009 12:32:04 -0500 Subject: ftrace: rename _hook to _probe Impact: clean up Ingo Molnar did not like the _hook naming convention used by the select function tracer. Luis Claudio R. Goncalves suggested using the "_probe" extension. This patch implements the change of calling the functions and variables "_hook" and replacing them with "_probe". Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 12 +++---- kernel/trace/ftrace.c | 78 +++++++++++++++++++++--------------------- kernel/trace/trace_functions.c | 26 +++++++------- 3 files changed, 58 insertions(+), 58 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 63281228ce3e..9d224c43e634 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -108,7 +108,7 @@ struct ftrace_func_command { struct seq_file; -struct ftrace_hook_ops { +struct ftrace_probe_ops { void (*func)(unsigned long ip, unsigned long parent_ip, void **data); @@ -116,19 +116,19 @@ struct ftrace_hook_ops { void (*free)(void **data); int (*print)(struct seq_file *m, unsigned long ip, - struct ftrace_hook_ops *ops, + struct ftrace_probe_ops *ops, void *data); }; extern int -register_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops, +register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, void *data); extern void -unregister_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops, +unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, void *data); extern void -unregister_ftrace_function_hook_func(char *glob, struct ftrace_hook_ops *ops); -extern void unregister_ftrace_function_hook_all(char *glob); +unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops); +extern void unregister_ftrace_function_probe_all(char *glob); enum { FTRACE_FL_FREE = (1 << 0), diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index af9d95c0e4de..330a059f6ed7 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -255,9 +255,9 @@ static struct pid * const ftrace_swapper_pid = &init_struct_pid; static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly; -struct ftrace_func_hook { +struct ftrace_func_probe { struct hlist_node node; - struct ftrace_hook_ops *ops; + struct ftrace_probe_ops *ops; unsigned long flags; unsigned long ip; void *data; @@ -830,11 +830,11 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos) static int t_hash_show(struct seq_file *m, void *v) { - struct ftrace_func_hook *rec; + struct ftrace_func_probe *rec; struct hlist_node *hnd = v; char str[KSYM_SYMBOL_LEN]; - rec = hlist_entry(hnd, struct ftrace_func_hook, node); + rec = hlist_entry(hnd, struct ftrace_func_probe, node); if (rec->ops->print) return rec->ops->print(m, rec->ip, rec->ops, rec->data); @@ -1351,9 +1351,9 @@ static int __init ftrace_mod_cmd_init(void) device_initcall(ftrace_mod_cmd_init); static void -function_trace_hook_call(unsigned long ip, unsigned long parent_ip) +function_trace_probe_call(unsigned long ip, unsigned long parent_ip) { - struct ftrace_func_hook *entry; + struct ftrace_func_probe *entry; struct hlist_head *hhd; struct hlist_node *n; unsigned long key; @@ -1379,18 +1379,18 @@ function_trace_hook_call(unsigned long ip, unsigned long parent_ip) ftrace_preempt_enable(resched); } -static struct ftrace_ops trace_hook_ops __read_mostly = +static struct ftrace_ops trace_probe_ops __read_mostly = { - .func = function_trace_hook_call, + .func = function_trace_probe_call, }; -static int ftrace_hook_registered; +static int ftrace_probe_registered; -static void __enable_ftrace_function_hook(void) +static void __enable_ftrace_function_probe(void) { int i; - if (ftrace_hook_registered) + if (ftrace_probe_registered) return; for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { @@ -1402,16 +1402,16 @@ static void __enable_ftrace_function_hook(void) if (i == FTRACE_FUNC_HASHSIZE) return; - __register_ftrace_function(&trace_hook_ops); + __register_ftrace_function(&trace_probe_ops); ftrace_startup(0); - ftrace_hook_registered = 1; + ftrace_probe_registered = 1; } -static void __disable_ftrace_function_hook(void) +static void __disable_ftrace_function_probe(void) { int i; - if (!ftrace_hook_registered) + if (!ftrace_probe_registered) return; for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { @@ -1421,16 +1421,16 @@ static void __disable_ftrace_function_hook(void) } /* no more funcs left */ - __unregister_ftrace_function(&trace_hook_ops); + __unregister_ftrace_function(&trace_probe_ops); ftrace_shutdown(0); - ftrace_hook_registered = 0; + ftrace_probe_registered = 0; } static void ftrace_free_entry_rcu(struct rcu_head *rhp) { - struct ftrace_func_hook *entry = - container_of(rhp, struct ftrace_func_hook, rcu); + struct ftrace_func_probe *entry = + container_of(rhp, struct ftrace_func_probe, rcu); if (entry->ops->free) entry->ops->free(&entry->data); @@ -1439,10 +1439,10 @@ static void ftrace_free_entry_rcu(struct rcu_head *rhp) int -register_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops, +register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, void *data) { - struct ftrace_func_hook *entry; + struct ftrace_func_probe *entry; struct ftrace_page *pg; struct dyn_ftrace *rec; int type, len, not; @@ -1453,7 +1453,7 @@ register_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops, type = ftrace_setup_glob(glob, strlen(glob), &search, ¬); len = strlen(search); - /* we do not support '!' for function hooks */ + /* we do not support '!' for function probes */ if (WARN_ON(not)) return -EINVAL; @@ -1468,7 +1468,7 @@ register_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops, entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) { - /* If we did not hook to any, then return error */ + /* If we did not process any, then return error */ if (!count) count = -ENOMEM; goto out_unlock; @@ -1498,7 +1498,7 @@ register_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops, hlist_add_head_rcu(&entry->node, &ftrace_func_hash[key]); } while_for_each_ftrace_rec(); - __enable_ftrace_function_hook(); + __enable_ftrace_function_probe(); out_unlock: mutex_unlock(&ftrace_lock); @@ -1507,15 +1507,15 @@ register_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops, } enum { - HOOK_TEST_FUNC = 1, - HOOK_TEST_DATA = 2 + PROBE_TEST_FUNC = 1, + PROBE_TEST_DATA = 2 }; static void -__unregister_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops, +__unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, void *data, int flags) { - struct ftrace_func_hook *entry; + struct ftrace_func_probe *entry; struct hlist_node *n, *tmp; char str[KSYM_SYMBOL_LEN]; int type = MATCH_FULL; @@ -1530,7 +1530,7 @@ __unregister_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops, type = ftrace_setup_glob(glob, strlen(glob), &search, ¬); len = strlen(search); - /* we do not support '!' for function hooks */ + /* we do not support '!' for function probes */ if (WARN_ON(not)) return; } @@ -1542,10 +1542,10 @@ __unregister_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops, hlist_for_each_entry_safe(entry, n, tmp, hhd, node) { /* break up if statements for readability */ - if ((flags & HOOK_TEST_FUNC) && entry->ops != ops) + if ((flags & PROBE_TEST_FUNC) && entry->ops != ops) continue; - if ((flags & HOOK_TEST_DATA) && entry->data != data) + if ((flags & PROBE_TEST_DATA) && entry->data != data) continue; /* do this last, since it is the most expensive */ @@ -1560,27 +1560,27 @@ __unregister_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops, call_rcu(&entry->rcu, ftrace_free_entry_rcu); } } - __disable_ftrace_function_hook(); + __disable_ftrace_function_probe(); mutex_unlock(&ftrace_lock); } void -unregister_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops, +unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, void *data) { - __unregister_ftrace_function_hook(glob, ops, data, - HOOK_TEST_FUNC | HOOK_TEST_DATA); + __unregister_ftrace_function_probe(glob, ops, data, + PROBE_TEST_FUNC | PROBE_TEST_DATA); } void -unregister_ftrace_function_hook_func(char *glob, struct ftrace_hook_ops *ops) +unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) { - __unregister_ftrace_function_hook(glob, ops, NULL, HOOK_TEST_FUNC); + __unregister_ftrace_function_probe(glob, ops, NULL, PROBE_TEST_FUNC); } -void unregister_ftrace_function_hook_all(char *glob) +void unregister_ftrace_function_probe_all(char *glob) { - __unregister_ftrace_function_hook(glob, NULL, NULL, 0); + __unregister_ftrace_function_probe(glob, NULL, NULL, 0); } static LIST_HEAD(ftrace_commands); diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 021a574c5988..6ea73ed03bfa 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -269,21 +269,21 @@ ftrace_traceoff(unsigned long ip, unsigned long parent_ip, void **data) static int ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip, - struct ftrace_hook_ops *ops, void *data); + struct ftrace_probe_ops *ops, void *data); -static struct ftrace_hook_ops traceon_hook_ops = { +static struct ftrace_probe_ops traceon_probe_ops = { .func = ftrace_traceon, .print = ftrace_trace_onoff_print, }; -static struct ftrace_hook_ops traceoff_hook_ops = { +static struct ftrace_probe_ops traceoff_probe_ops = { .func = ftrace_traceoff, .print = ftrace_trace_onoff_print, }; static int ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip, - struct ftrace_hook_ops *ops, void *data) + struct ftrace_probe_ops *ops, void *data) { char str[KSYM_SYMBOL_LEN]; long count = (long)data; @@ -291,7 +291,7 @@ ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip, kallsyms_lookup(ip, NULL, NULL, NULL, str); seq_printf(m, "%s:", str); - if (ops == &traceon_hook_ops) + if (ops == &traceon_probe_ops) seq_printf(m, "traceon"); else seq_printf(m, "traceoff"); @@ -306,15 +306,15 @@ ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip, static int ftrace_trace_onoff_unreg(char *glob, char *cmd, char *param) { - struct ftrace_hook_ops *ops; + struct ftrace_probe_ops *ops; /* we register both traceon and traceoff to this callback */ if (strcmp(cmd, "traceon") == 0) - ops = &traceon_hook_ops; + ops = &traceon_probe_ops; else - ops = &traceoff_hook_ops; + ops = &traceoff_probe_ops; - unregister_ftrace_function_hook_func(glob, ops); + unregister_ftrace_function_probe_func(glob, ops); return 0; } @@ -322,7 +322,7 @@ ftrace_trace_onoff_unreg(char *glob, char *cmd, char *param) static int ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable) { - struct ftrace_hook_ops *ops; + struct ftrace_probe_ops *ops; void *count = (void *)-1; char *number; int ret; @@ -336,9 +336,9 @@ ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable) /* we register both traceon and traceoff to this callback */ if (strcmp(cmd, "traceon") == 0) - ops = &traceon_hook_ops; + ops = &traceon_probe_ops; else - ops = &traceoff_hook_ops; + ops = &traceoff_probe_ops; if (!param) goto out_reg; @@ -357,7 +357,7 @@ ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable) return ret; out_reg: - ret = register_ftrace_function_hook(glob, ops, count); + ret = register_ftrace_function_probe(glob, ops, count); return ret; } -- cgit v1.2.3 From fe1200b63d158b28eef6d4de1e5b5f99c681ba2f Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 17 Feb 2009 12:05:07 -0500 Subject: SLUB: Introduce and use SLUB_MAX_SIZE and SLUB_PAGE_SHIFT constants As a preparational patch to bump up page allocator pass-through threshold, introduce two new constants SLUB_MAX_SIZE and SLUB_PAGE_SHIFT and convert mm/slub.c to use them. Reported-by: "Zhang, Yanmin" Tested-by: "Zhang, Yanmin" Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 19 ++++++++++++++++--- mm/slub.c | 16 ++++++++-------- 2 files changed, 24 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 2f5c16b1aacd..986e09dcfd8f 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -120,11 +120,24 @@ struct kmem_cache { #define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE) +/* + * Maximum kmalloc object size handled by SLUB. Larger object allocations + * are passed through to the page allocator. The page allocator "fastpath" + * is relatively slow so we need this value sufficiently high so that + * performance critical objects are allocated through the SLUB fastpath. + * + * This should be dropped to PAGE_SIZE / 2 once the page allocator + * "fastpath" becomes competitive with the slab allocator fastpaths. + */ +#define SLUB_MAX_SIZE (PAGE_SIZE) + +#define SLUB_PAGE_SHIFT (PAGE_SHIFT + 1) + /* * We keep the general caches in an array of slab caches that are used for * 2^x bytes of allocations. */ -extern struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1]; +extern struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT]; /* * Sorry that the following has to be that ugly but some versions of GCC @@ -212,7 +225,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) static __always_inline void *kmalloc(size_t size, gfp_t flags) { if (__builtin_constant_p(size)) { - if (size > PAGE_SIZE) + if (size > SLUB_MAX_SIZE) return kmalloc_large(size, flags); if (!(flags & SLUB_DMA)) { @@ -234,7 +247,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { if (__builtin_constant_p(size) && - size <= PAGE_SIZE && !(flags & SLUB_DMA)) { + size <= SLUB_MAX_SIZE && !(flags & SLUB_DMA)) { struct kmem_cache *s = kmalloc_slab(size); if (!s) diff --git a/mm/slub.c b/mm/slub.c index bdc9abb08a23..5a5e7f5bf799 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2475,7 +2475,7 @@ EXPORT_SYMBOL(kmem_cache_destroy); * Kmalloc subsystem *******************************************************************/ -struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned; +struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT] __cacheline_aligned; EXPORT_SYMBOL(kmalloc_caches); static int __init setup_slub_min_order(char *str) @@ -2537,7 +2537,7 @@ panic: } #ifdef CONFIG_ZONE_DMA -static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1]; +static struct kmem_cache *kmalloc_caches_dma[SLUB_PAGE_SHIFT]; static void sysfs_add_func(struct work_struct *w) { @@ -2658,7 +2658,7 @@ void *__kmalloc(size_t size, gfp_t flags) { struct kmem_cache *s; - if (unlikely(size > PAGE_SIZE)) + if (unlikely(size > SLUB_MAX_SIZE)) return kmalloc_large(size, flags); s = get_slab(size, flags); @@ -2686,7 +2686,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) { struct kmem_cache *s; - if (unlikely(size > PAGE_SIZE)) + if (unlikely(size > SLUB_MAX_SIZE)) return kmalloc_large_node(size, flags, node); s = get_slab(size, flags); @@ -2985,7 +2985,7 @@ void __init kmem_cache_init(void) caches++; } - for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) { + for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { create_kmalloc_cache(&kmalloc_caches[i], "kmalloc", 1 << i, GFP_KERNEL); caches++; @@ -3022,7 +3022,7 @@ void __init kmem_cache_init(void) slab_state = UP; /* Provide the correct kmalloc names now that the caches are up */ - for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) + for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) kmalloc_caches[i]. name = kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); @@ -3222,7 +3222,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) { struct kmem_cache *s; - if (unlikely(size > PAGE_SIZE)) + if (unlikely(size > SLUB_MAX_SIZE)) return kmalloc_large(size, gfpflags); s = get_slab(size, gfpflags); @@ -3238,7 +3238,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, { struct kmem_cache *s; - if (unlikely(size > PAGE_SIZE)) + if (unlikely(size > SLUB_MAX_SIZE)) return kmalloc_large_node(size, gfpflags, node); s = get_slab(size, gfpflags); -- cgit v1.2.3 From 000ab691172db3921efa3cb7f17fc79235a1de7f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 17 Feb 2009 13:35:06 -0500 Subject: ftrace: allow archs to preform pre and post process for code modification This patch creates the weak functions: ftrace_arch_code_modify_prepare and ftrace_arch_code_modify_post_process that are called before and after the stop machine is called to modify the kernel text. If the arch needs to do pre or post processing, it only needs to define these functions. [ Update: Ingo Molnar suggested using the name ftrace_arch_code_modify_* over using ftrace_arch_modify_* ] Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 3 +++ kernel/trace/ftrace.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 677432b9cb7e..fdb2a89ae543 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -99,6 +99,9 @@ stack_trace_sysctl(struct ctl_table *table, int write, /* asm/ftrace.h must be defined for archs supporting dynamic ftrace */ #include +int ftrace_arch_code_modify_prepare(void); +int ftrace_arch_code_modify_post_process(void); + enum { FTRACE_FL_FREE = (1 << 0), FTRACE_FL_FAILED = (1 << 1), diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index fdf913dfc7e8..72316d9647bd 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -585,6 +585,24 @@ ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) return 1; } +/* + * archs can override this function if they must do something + * before the modifying code is performed. + */ +int __weak ftrace_arch_code_modify_prepare(void) +{ + return 0; +} + +/* + * archs can override this function if they must do something + * after the modifying code is performed. + */ +int __weak ftrace_arch_code_modify_post_process(void) +{ + return 0; +} + static int __ftrace_modify_code(void *data) { int *command = data; @@ -607,7 +625,17 @@ static int __ftrace_modify_code(void *data) static void ftrace_run_update_code(int command) { + int ret; + + ret = ftrace_arch_code_modify_prepare(); + FTRACE_WARN_ON(ret); + if (ret) + return; + stop_machine(__ftrace_modify_code, &command, NULL); + + ret = ftrace_arch_code_modify_post_process(); + FTRACE_WARN_ON(ret); } static ftrace_func_t saved_ftrace_func; -- cgit v1.2.3 From 7c37730cd31ddb2d3a1da142af9b18c29b8c433b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 24 Feb 2009 12:07:53 -0500 Subject: tracing: add DEFINE_TRACE_FMT to tracepoint.h This patch creates a DEFINE_TRACE_FMT to map to DECLARE_TRACE. This allows for the developers to place format strings and args in with their tracepoint declaration. A tracer may now override the DEFINE_TRACE_FMT macro and use it to record a default format. Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 757005458366..34ae464effff 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -153,4 +153,7 @@ static inline void tracepoint_synchronize_unregister(void) synchronize_sched(); } +#define DEFINE_TRACE_FMT(name, proto, args, fmt) \ + DECLARE_TRACE(name, TPPROTO(proto), TPARGS(args)) + #endif -- cgit v1.2.3 From eef62a6826b8ab530cefff5aa55c1661a209c803 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 25 Feb 2009 15:49:52 -0500 Subject: tracing: rename DEFINE_TRACE_FMT to just TRACE_FORMAT There's been a bit confusion to whether DEFINE/DECLARE_TRACE_FMT should be a DEFINE or a DECLARE. Ingo Molnar suggested simply calling it TRACE_FORMAT. Reported-by: Ingo Molnar Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 2 +- include/trace/sched_event_types.h | 26 +++++++++++++------------- kernel/trace/trace_events.h | 4 ++-- 3 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 34ae464effff..3de09fa8e01d 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -153,7 +153,7 @@ static inline void tracepoint_synchronize_unregister(void) synchronize_sched(); } -#define DEFINE_TRACE_FMT(name, proto, args, fmt) \ +#define TRACE_FORMAT(name, proto, args, fmt) \ DECLARE_TRACE(name, TPPROTO(proto), TPARGS(args)) #endif diff --git a/include/trace/sched_event_types.h b/include/trace/sched_event_types.h index a4f662940f4e..a3d3d66a51c8 100644 --- a/include/trace/sched_event_types.h +++ b/include/trace/sched_event_types.h @@ -1,72 +1,72 @@ /* use instead */ -#ifndef DEFINE_TRACE_FMT +#ifndef TRACE_FORMAT # error Do not include this file directly. # error Unless you know what you are doing. #endif -DEFINE_TRACE_FMT(sched_kthread_stop, +TRACE_FORMAT(sched_kthread_stop, TPPROTO(struct task_struct *t), TPARGS(t), TPFMT("task %s:%d", t->comm, t->pid)); -DEFINE_TRACE_FMT(sched_kthread_stop_ret, +TRACE_FORMAT(sched_kthread_stop_ret, TPPROTO(int ret), TPARGS(ret), TPFMT("ret=%d", ret)); -DEFINE_TRACE_FMT(sched_wait_task, +TRACE_FORMAT(sched_wait_task, TPPROTO(struct rq *rq, struct task_struct *p), TPARGS(rq, p), TPFMT("task %s:%d", p->comm, p->pid)); -DEFINE_TRACE_FMT(sched_wakeup, +TRACE_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed")); -DEFINE_TRACE_FMT(sched_wakeup_new, +TRACE_FORMAT(sched_wakeup_new, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d", p->comm, p->pid, success?"succeeded":"failed")); -DEFINE_TRACE_FMT(sched_switch, +TRACE_FORMAT(sched_switch, TPPROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TPARGS(rq, prev, next), TPFMT("task %s:%d ==> %s:%d", prev->comm, prev->pid, next->comm, next->pid)); -DEFINE_TRACE_FMT(sched_migrate_task, +TRACE_FORMAT(sched_migrate_task, TPPROTO(struct task_struct *p, int orig_cpu, int dest_cpu), TPARGS(p, orig_cpu, dest_cpu), TPFMT("task %s:%d from: %d to: %d", p->comm, p->pid, orig_cpu, dest_cpu)); -DEFINE_TRACE_FMT(sched_process_free, +TRACE_FORMAT(sched_process_free, TPPROTO(struct task_struct *p), TPARGS(p), TPFMT("task %s:%d", p->comm, p->pid)); -DEFINE_TRACE_FMT(sched_process_exit, +TRACE_FORMAT(sched_process_exit, TPPROTO(struct task_struct *p), TPARGS(p), TPFMT("task %s:%d", p->comm, p->pid)); -DEFINE_TRACE_FMT(sched_process_wait, +TRACE_FORMAT(sched_process_wait, TPPROTO(struct pid *pid), TPARGS(pid), TPFMT("pid %d", pid)); -DEFINE_TRACE_FMT(sched_process_fork, +TRACE_FORMAT(sched_process_fork, TPPROTO(struct task_struct *parent, struct task_struct *child), TPARGS(parent, child), TPFMT("parent %s:%d child %s:%d", parent->comm, parent->pid, child->comm, child->pid)); -DEFINE_TRACE_FMT(sched_signal_send, +TRACE_FORMAT(sched_signal_send, TPPROTO(int sig, struct task_struct *p), TPARGS(sig, p), TPFMT("sig: %d task %s:%d", sig, p->comm, p->pid)); diff --git a/kernel/trace/trace_events.h b/kernel/trace/trace_events.h index cb8455b3ac9a..deb95e5006c8 100644 --- a/kernel/trace/trace_events.h +++ b/kernel/trace/trace_events.h @@ -17,8 +17,8 @@ struct ftrace_event_call { #undef TPFMT #define TPFMT(fmt, args...) fmt "\n", ##args -#undef DEFINE_TRACE_FMT -#define DEFINE_TRACE_FMT(call, proto, args, fmt) \ +#undef TRACE_FORMAT +#define TRACE_FORMAT(call, proto, args, fmt) \ static void ftrace_event_##call(proto) \ { \ event_trace_printk(_RET_IP_, "(" #call ") " fmt); \ -- cgit v1.2.3 From 3cdfdf91fcc77cfc82592e2b5c2ab35abe819c41 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 25 Feb 2009 15:54:30 -0500 Subject: tracing: wrap arguments with PARAMS Peter Zijlstra warned that TPPROTO and TPARGS might become something other than a simple copy of itself. To prevent this from having side effects in the TRACE_FORMAT macro in tracepoint.h, we add a PARAMS() macro to be defined as just a wrapper. Reported-by: Peter Zijlstra Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 3de09fa8e01d..62d13391a240 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -153,7 +153,8 @@ static inline void tracepoint_synchronize_unregister(void) synchronize_sched(); } +#define PARAMS(args...) args #define TRACE_FORMAT(name, proto, args, fmt) \ - DECLARE_TRACE(name, TPPROTO(proto), TPARGS(args)) + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #endif -- cgit v1.2.3 From 14131f2f98ac350ee9e73faed916d2238a8b6a0d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Feb 2009 18:47:11 +0100 Subject: tracing: implement trace_clock_*() APIs Impact: implement new tracing timestamp APIs Add three trace clock variants, with differing scalability/precision tradeoffs: - local: CPU-local trace clock - medium: scalable global clock with some jitter - global: globally monotonic, serialized clock Make the ring-buffer use the local trace clock internally. Acked-by: Peter Zijlstra Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/trace_clock.h | 19 +++++++++ kernel/trace/Makefile | 1 + kernel/trace/ring_buffer.c | 5 +-- kernel/trace/trace_clock.c | 101 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 123 insertions(+), 3 deletions(-) create mode 100644 include/linux/trace_clock.h create mode 100644 kernel/trace/trace_clock.c (limited to 'include/linux') diff --git a/include/linux/trace_clock.h b/include/linux/trace_clock.h new file mode 100644 index 000000000000..7a8130384087 --- /dev/null +++ b/include/linux/trace_clock.h @@ -0,0 +1,19 @@ +#ifndef _LINUX_TRACE_CLOCK_H +#define _LINUX_TRACE_CLOCK_H + +/* + * 3 trace clock variants, with differing scalability/precision + * tradeoffs: + * + * - local: CPU-local trace clock + * - medium: scalable global clock with some jitter + * - global: globally monotonic, serialized clock + */ +#include +#include + +extern u64 notrace trace_clock_local(void); +extern u64 notrace trace_clock(void); +extern u64 notrace trace_clock_global(void); + +#endif /* _LINUX_TRACE_CLOCK_H */ diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 664b6c0dc75a..c931fe0560cb 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o obj-$(CONFIG_RING_BUFFER) += ring_buffer.o obj-$(CONFIG_TRACING) += trace.o +obj-$(CONFIG_TRACING) += trace_clock.o obj-$(CONFIG_TRACING) += trace_output.o obj-$(CONFIG_TRACING) += trace_stat.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 8f19f1aa42b0..a8c275c01e83 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4,6 +4,7 @@ * Copyright (C) 2008 Steven Rostedt */ #include +#include #include #include #include @@ -12,7 +13,6 @@ #include #include #include -#include /* used for sched_clock() (for now) */ #include #include #include @@ -112,14 +112,13 @@ EXPORT_SYMBOL_GPL(tracing_is_on); /* Up this if you want to test the TIME_EXTENTS and normalization */ #define DEBUG_SHIFT 0 -/* FIXME!!! */ u64 ring_buffer_time_stamp(int cpu) { u64 time; preempt_disable_notrace(); /* shift to debug/test normalization and TIME_EXTENTS */ - time = sched_clock() << DEBUG_SHIFT; + time = trace_clock_local() << DEBUG_SHIFT; preempt_enable_no_resched_notrace(); return time; diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c new file mode 100644 index 000000000000..2d4953f93560 --- /dev/null +++ b/kernel/trace/trace_clock.c @@ -0,0 +1,101 @@ +/* + * tracing clocks + * + * Copyright (C) 2009 Red Hat, Inc., Ingo Molnar + * + * Implements 3 trace clock variants, with differing scalability/precision + * tradeoffs: + * + * - local: CPU-local trace clock + * - medium: scalable global clock with some jitter + * - global: globally monotonic, serialized clock + * + * Tracer plugins will chose a default from these clocks. + */ +#include +#include +#include +#include +#include +#include + +/* + * trace_clock_local(): the simplest and least coherent tracing clock. + * + * Useful for tracing that does not cross to other CPUs nor + * does it go through idle events. + */ +u64 notrace trace_clock_local(void) +{ + /* + * sched_clock() is an architecture implemented, fast, scalable, + * lockless clock. It is not guaranteed to be coherent across + * CPUs, nor across CPU idle events. + */ + return sched_clock(); +} + +/* + * trace_clock(): 'inbetween' trace clock. Not completely serialized, + * but not completely incorrect when crossing CPUs either. + * + * This is based on cpu_clock(), which will allow at most ~1 jiffy of + * jitter between CPUs. So it's a pretty scalable clock, but there + * can be offsets in the trace data. + */ +u64 notrace trace_clock(void) +{ + return cpu_clock(raw_smp_processor_id()); +} + + +/* + * trace_clock_global(): special globally coherent trace clock + * + * It has higher overhead than the other trace clocks but is still + * an order of magnitude faster than GTOD derived hardware clocks. + * + * Used by plugins that need globally coherent timestamps. + */ + +static u64 prev_trace_clock_time; + +static raw_spinlock_t trace_clock_lock ____cacheline_aligned_in_smp = + (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + +u64 notrace trace_clock_global(void) +{ + unsigned long flags; + int this_cpu; + u64 now; + + raw_local_irq_save(flags); + + this_cpu = raw_smp_processor_id(); + now = cpu_clock(this_cpu); + /* + * If in an NMI context then dont risk lockups and return the + * cpu_clock() time: + */ + if (unlikely(in_nmi())) + goto out; + + __raw_spin_lock(&trace_clock_lock); + + /* + * TODO: if this happens often then maybe we should reset + * my_scd->clock to prev_trace_clock_time+1, to make sure + * we start ticking with the local clock from now on? + */ + if ((s64)(now - prev_trace_clock_time) < 0) + now = prev_trace_clock_time + 1; + + prev_trace_clock_time = now; + + __raw_spin_unlock(&trace_clock_lock); + + out: + raw_local_irq_restore(flags); + + return now; +} -- cgit v1.2.3 From 629928041c53771f9902753d50fef6b35f36d33d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 28 Feb 2009 02:47:59 -0500 Subject: tracing: create the C style tracing for the sched subsystem This patch utilizes the TRACE_EVENT_FORMAT macro to enable the C style faster tracing for the sched subsystem trace points. Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 3 + include/trace/sched_event_types.h | 119 ++++++++++++++++++++++++++++++-------- 2 files changed, 97 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 62d13391a240..152b2f03fb86 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -157,4 +157,7 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_FORMAT(name, proto, args, fmt) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define TRACE_EVENT_FORMAT(name, proto, args, fmt, struct, tpfmt) \ + TRACE_FORMAT(name, PARAMS(proto), PARAMS(args), PARAMS(fmt)) + #endif diff --git a/include/trace/sched_event_types.h b/include/trace/sched_event_types.h index 2ada206565a3..ba059c10b58a 100644 --- a/include/trace/sched_event_types.h +++ b/include/trace/sched_event_types.h @@ -1,6 +1,6 @@ /* use instead */ -#ifndef TRACE_FORMAT +#ifndef TRACE_EVENT_FORMAT # error Do not include this file directly. # error Unless you know what you are doing. #endif @@ -8,70 +8,139 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM sched -TRACE_FORMAT(sched_kthread_stop, +TRACE_EVENT_FORMAT(sched_kthread_stop, TPPROTO(struct task_struct *t), TPARGS(t), - TPFMT("task %s:%d", t->comm, t->pid)); + TPFMT("task %s:%d", t->comm, t->pid), + TRACE_STRUCT( + TRACE_FIELD(pid_t, pid, t->pid) + ), + TPRAWFMT("task %d") + ); -TRACE_FORMAT(sched_kthread_stop_ret, +TRACE_EVENT_FORMAT(sched_kthread_stop_ret, TPPROTO(int ret), TPARGS(ret), - TPFMT("ret=%d", ret)); + TPFMT("ret=%d", ret), + TRACE_STRUCT( + TRACE_FIELD(int, ret, ret) + ), + TPRAWFMT("ret=%d") + ); -TRACE_FORMAT(sched_wait_task, +TRACE_EVENT_FORMAT(sched_wait_task, TPPROTO(struct rq *rq, struct task_struct *p), TPARGS(rq, p), - TPFMT("task %s:%d", p->comm, p->pid)); + TPFMT("task %s:%d", p->comm, p->pid), + TRACE_STRUCT( + TRACE_FIELD(pid_t, pid, p->pid) + ), + TPRAWFMT("task %d") + ); -TRACE_FORMAT(sched_wakeup, +TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", - p->comm, p->pid, success?"succeeded":"failed")); + p->comm, p->pid, success ? "succeeded" : "failed"), + TRACE_STRUCT( + TRACE_FIELD(pid_t, pid, p->pid) + TRACE_FIELD(int, success, success) + ), + TPRAWFMT("task %d success=%d") + ); -TRACE_FORMAT(sched_wakeup_new, +TRACE_EVENT_FORMAT(sched_wakeup_new, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d", - p->comm, p->pid, success?"succeeded":"failed")); + p->comm, p->pid, success ? "succeeded" : "failed"), + TRACE_STRUCT( + TRACE_FIELD(pid_t, pid, p->pid) + TRACE_FIELD(int, success, success) + ), + TPRAWFMT("task %d success=%d") + ); -TRACE_FORMAT(sched_switch, +TRACE_EVENT_FORMAT(sched_switch, TPPROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TPARGS(rq, prev, next), TPFMT("task %s:%d ==> %s:%d", - prev->comm, prev->pid, next->comm, next->pid)); + prev->comm, prev->pid, next->comm, next->pid), + TRACE_STRUCT( + TRACE_FIELD(pid_t, prev_pid, prev->pid) + TRACE_FIELD(int, prev_prio, prev->prio) + TRACE_FIELD(pid_t, next_pid, next->pid) + TRACE_FIELD(int, next_prio, next->prio) + ), + TPRAWFMT("prev %d:%d ==> next %d:%d") + ); -TRACE_FORMAT(sched_migrate_task, +TRACE_EVENT_FORMAT(sched_migrate_task, TPPROTO(struct task_struct *p, int orig_cpu, int dest_cpu), TPARGS(p, orig_cpu, dest_cpu), TPFMT("task %s:%d from: %d to: %d", - p->comm, p->pid, orig_cpu, dest_cpu)); + p->comm, p->pid, orig_cpu, dest_cpu), + TRACE_STRUCT( + TRACE_FIELD(pid_t, pid, p->pid) + TRACE_FIELD(int, orig_cpu, orig_cpu) + TRACE_FIELD(int, dest_cpu, dest_cpu) + ), + TPRAWFMT("task %d from: %d to: %d") + ); -TRACE_FORMAT(sched_process_free, +TRACE_EVENT_FORMAT(sched_process_free, TPPROTO(struct task_struct *p), TPARGS(p), - TPFMT("task %s:%d", p->comm, p->pid)); + TPFMT("task %s:%d", p->comm, p->pid), + TRACE_STRUCT( + TRACE_FIELD(pid_t, pid, p->pid) + ), + TPRAWFMT("task %d") + ); -TRACE_FORMAT(sched_process_exit, +TRACE_EVENT_FORMAT(sched_process_exit, TPPROTO(struct task_struct *p), TPARGS(p), - TPFMT("task %s:%d", p->comm, p->pid)); + TPFMT("task %s:%d", p->comm, p->pid), + TRACE_STRUCT( + TRACE_FIELD(pid_t, pid, p->pid) + ), + TPRAWFMT("task %d") + ); -TRACE_FORMAT(sched_process_wait, +TRACE_EVENT_FORMAT(sched_process_wait, TPPROTO(struct pid *pid), TPARGS(pid), - TPFMT("pid %d", pid)); + TPFMT("pid %d", pid_nr(pid)), + TRACE_STRUCT( + TRACE_FIELD(pid_t, pid, pid_nr(pid)) + ), + TPRAWFMT("task %d") + ); -TRACE_FORMAT(sched_process_fork, +TRACE_EVENT_FORMAT(sched_process_fork, TPPROTO(struct task_struct *parent, struct task_struct *child), TPARGS(parent, child), TPFMT("parent %s:%d child %s:%d", - parent->comm, parent->pid, child->comm, child->pid)); + parent->comm, parent->pid, child->comm, child->pid), + TRACE_STRUCT( + TRACE_FIELD(pid_t, parent, parent->pid) + TRACE_FIELD(pid_t, child, child->pid) + ), + TPRAWFMT("parent %d child %d") + ); -TRACE_FORMAT(sched_signal_send, +TRACE_EVENT_FORMAT(sched_signal_send, TPPROTO(int sig, struct task_struct *p), TPARGS(sig, p), - TPFMT("sig: %d task %s:%d", sig, p->comm, p->pid)); + TPFMT("sig: %d task %s:%d", sig, p->comm, p->pid), + TRACE_STRUCT( + TRACE_FIELD(int, sig, sig) + TRACE_FIELD(pid_t, pid, p->pid) + ), + TPRAWFMT("sig: %d task %d") + ); #undef TRACE_SYSTEM -- cgit v1.2.3 From c79a61f55773d2519fd0525bf58385f7d20752d3 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-Koenig Date: Fri, 27 Feb 2009 21:30:03 +0100 Subject: tracing: make CALLER_ADDRx overwriteable The current definition of CALLER_ADDRx isn't suitable for all platforms. E.g. for ARM __builtin_return_address(N) doesn't work for N > 0 and AFAIK for powerpc there are no frame pointers needed to have a working __builtin_return_address. This patch allows defining the CALLER_ADDRx macros in and let these take precedence. Because now is included unconditionally in all archs that don't already had this include get an empty one for free. Signed-off-by: Uwe Kleine-Koenig Cc: Peter Zijlstra Cc: Ingo Molnar Reviewed-by: KOSAKI Motohiro Signed-off-by: Steven Rostedt --- arch/alpha/include/asm/ftrace.h | 1 + arch/avr32/include/asm/ftrace.h | 1 + arch/blackfin/include/asm/ftrace.h | 1 + arch/cris/include/asm/ftrace.h | 1 + arch/h8300/include/asm/ftrace.h | 1 + arch/m68k/include/asm/ftrace.h | 1 + arch/mips/include/asm/ftrace.h | 1 + arch/parisc/include/asm/ftrace.h | 1 + arch/um/include/asm/ftrace.h | 1 + arch/xtensa/include/asm/ftrace.h | 1 + include/asm-frv/ftrace.h | 1 + include/asm-m32r/ftrace.h | 1 + include/asm-mn10300/ftrace.h | 1 + include/linux/ftrace.h | 41 +++++++++++++++++++------------------- 14 files changed, 34 insertions(+), 20 deletions(-) create mode 100644 arch/alpha/include/asm/ftrace.h create mode 100644 arch/avr32/include/asm/ftrace.h create mode 100644 arch/blackfin/include/asm/ftrace.h create mode 100644 arch/cris/include/asm/ftrace.h create mode 100644 arch/h8300/include/asm/ftrace.h create mode 100644 arch/m68k/include/asm/ftrace.h create mode 100644 arch/mips/include/asm/ftrace.h create mode 100644 arch/parisc/include/asm/ftrace.h create mode 100644 arch/um/include/asm/ftrace.h create mode 100644 arch/xtensa/include/asm/ftrace.h create mode 100644 include/asm-frv/ftrace.h create mode 100644 include/asm-m32r/ftrace.h create mode 100644 include/asm-mn10300/ftrace.h (limited to 'include/linux') diff --git a/arch/alpha/include/asm/ftrace.h b/arch/alpha/include/asm/ftrace.h new file mode 100644 index 000000000000..40a8c178f10d --- /dev/null +++ b/arch/alpha/include/asm/ftrace.h @@ -0,0 +1 @@ +/* empty */ diff --git a/arch/avr32/include/asm/ftrace.h b/arch/avr32/include/asm/ftrace.h new file mode 100644 index 000000000000..40a8c178f10d --- /dev/null +++ b/arch/avr32/include/asm/ftrace.h @@ -0,0 +1 @@ +/* empty */ diff --git a/arch/blackfin/include/asm/ftrace.h b/arch/blackfin/include/asm/ftrace.h new file mode 100644 index 000000000000..40a8c178f10d --- /dev/null +++ b/arch/blackfin/include/asm/ftrace.h @@ -0,0 +1 @@ +/* empty */ diff --git a/arch/cris/include/asm/ftrace.h b/arch/cris/include/asm/ftrace.h new file mode 100644 index 000000000000..40a8c178f10d --- /dev/null +++ b/arch/cris/include/asm/ftrace.h @@ -0,0 +1 @@ +/* empty */ diff --git a/arch/h8300/include/asm/ftrace.h b/arch/h8300/include/asm/ftrace.h new file mode 100644 index 000000000000..40a8c178f10d --- /dev/null +++ b/arch/h8300/include/asm/ftrace.h @@ -0,0 +1 @@ +/* empty */ diff --git a/arch/m68k/include/asm/ftrace.h b/arch/m68k/include/asm/ftrace.h new file mode 100644 index 000000000000..40a8c178f10d --- /dev/null +++ b/arch/m68k/include/asm/ftrace.h @@ -0,0 +1 @@ +/* empty */ diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h new file mode 100644 index 000000000000..40a8c178f10d --- /dev/null +++ b/arch/mips/include/asm/ftrace.h @@ -0,0 +1 @@ +/* empty */ diff --git a/arch/parisc/include/asm/ftrace.h b/arch/parisc/include/asm/ftrace.h new file mode 100644 index 000000000000..40a8c178f10d --- /dev/null +++ b/arch/parisc/include/asm/ftrace.h @@ -0,0 +1 @@ +/* empty */ diff --git a/arch/um/include/asm/ftrace.h b/arch/um/include/asm/ftrace.h new file mode 100644 index 000000000000..40a8c178f10d --- /dev/null +++ b/arch/um/include/asm/ftrace.h @@ -0,0 +1 @@ +/* empty */ diff --git a/arch/xtensa/include/asm/ftrace.h b/arch/xtensa/include/asm/ftrace.h new file mode 100644 index 000000000000..40a8c178f10d --- /dev/null +++ b/arch/xtensa/include/asm/ftrace.h @@ -0,0 +1 @@ +/* empty */ diff --git a/include/asm-frv/ftrace.h b/include/asm-frv/ftrace.h new file mode 100644 index 000000000000..40a8c178f10d --- /dev/null +++ b/include/asm-frv/ftrace.h @@ -0,0 +1 @@ +/* empty */ diff --git a/include/asm-m32r/ftrace.h b/include/asm-m32r/ftrace.h new file mode 100644 index 000000000000..40a8c178f10d --- /dev/null +++ b/include/asm-m32r/ftrace.h @@ -0,0 +1 @@ +/* empty */ diff --git a/include/asm-mn10300/ftrace.h b/include/asm-mn10300/ftrace.h new file mode 100644 index 000000000000..40a8c178f10d --- /dev/null +++ b/include/asm-mn10300/ftrace.h @@ -0,0 +1 @@ +/* empty */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 847bb3c48dd0..1f69ac7c1587 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -11,6 +11,8 @@ #include #include +#include + #ifdef CONFIG_FUNCTION_TRACER extern int ftrace_enabled; @@ -103,8 +105,6 @@ struct ftrace_func_command { }; #ifdef CONFIG_DYNAMIC_FTRACE -/* asm/ftrace.h must be defined for archs supporting dynamic ftrace */ -#include int ftrace_arch_code_modify_prepare(void); int ftrace_arch_code_modify_post_process(void); @@ -282,24 +282,25 @@ static inline void __ftrace_enabled_restore(int enabled) #endif } -#ifdef CONFIG_FRAME_POINTER -/* TODO: need to fix this for ARM */ -# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -# define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) -# define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) -# define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3)) -# define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4)) -# define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5)) -# define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6)) -#else -# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -# define CALLER_ADDR1 0UL -# define CALLER_ADDR2 0UL -# define CALLER_ADDR3 0UL -# define CALLER_ADDR4 0UL -# define CALLER_ADDR5 0UL -# define CALLER_ADDR6 0UL -#endif +#ifndef HAVE_ARCH_CALLER_ADDR +# ifdef CONFIG_FRAME_POINTER +# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +# define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) +# define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) +# define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3)) +# define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4)) +# define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5)) +# define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6)) +# else +# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +# define CALLER_ADDR1 0UL +# define CALLER_ADDR2 0UL +# define CALLER_ADDR3 0UL +# define CALLER_ADDR4 0UL +# define CALLER_ADDR5 0UL +# define CALLER_ADDR6 0UL +# endif +#endif /* ifndef HAVE_ARCH_CALLER_ADDR */ #ifdef CONFIG_IRQSOFF_TRACER extern void time_hardirqs_on(unsigned long a0, unsigned long a1); -- cgit v1.2.3 From ef7a4a161472b952941bf78855a9cd95703c024e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 3 Mar 2009 00:27:49 -0500 Subject: ring-buffer: fix ring_buffer_read_page The ring_buffer_read_page was broken if it were to only copy part of the page. This patch fixes that up as well as adds a parameter to allow a length field, in order to only copy part of the buffer page. Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 7 +++- kernel/trace/ring_buffer.c | 92 +++++++++++++++++++++++++++++---------------- 2 files changed, 64 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index f5e793d69bd3..79fcbc4b09d6 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -121,6 +121,9 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); u64 ring_buffer_time_stamp(int cpu); void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); +size_t ring_buffer_page_len(void *page); + + /* * The below functions are fine to use outside the tracing facility. */ @@ -138,8 +141,8 @@ static inline int tracing_is_on(void) { return 0; } void *ring_buffer_alloc_read_page(struct ring_buffer *buffer); void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); -int ring_buffer_read_page(struct ring_buffer *buffer, - void **data_page, int cpu, int full); +int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page, + size_t len, int cpu, int full); enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9baad7ee4b36..2ad6bae95a3d 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -234,6 +234,11 @@ static void rb_init_page(struct buffer_data_page *bpage) local_set(&bpage->commit, 0); } +size_t ring_buffer_page_len(void *page) +{ + return local_read(&((struct buffer_data_page *)page)->commit); +} + /* * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing * this issue out. @@ -2378,8 +2383,8 @@ static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, */ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) { - unsigned long addr; struct buffer_data_page *bpage; + unsigned long addr; addr = __get_free_page(GFP_KERNEL); if (!addr) @@ -2387,6 +2392,8 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) bpage = (void *)addr; + rb_init_page(bpage); + return bpage; } @@ -2406,6 +2413,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) * ring_buffer_read_page - extract a page from the ring buffer * @buffer: buffer to extract from * @data_page: the page to use allocated from ring_buffer_alloc_read_page + * @len: amount to extract * @cpu: the cpu of the buffer to extract * @full: should the extraction only happen when the page is full. * @@ -2418,7 +2426,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) * rpage = ring_buffer_alloc_read_page(buffer); * if (!rpage) * return error; - * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0); + * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0); * if (ret >= 0) * process_page(rpage, ret); * @@ -2435,71 +2443,89 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) * <0 if no data has been transferred. */ int ring_buffer_read_page(struct ring_buffer *buffer, - void **data_page, int cpu, int full) + void **data_page, size_t len, int cpu, int full) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; struct ring_buffer_event *event; struct buffer_data_page *bpage; + struct buffer_page *reader; unsigned long flags; + unsigned int commit; unsigned int read; int ret = -1; if (!data_page) - return 0; + return -1; bpage = *data_page; if (!bpage) - return 0; + return -1; spin_lock_irqsave(&cpu_buffer->reader_lock, flags); - /* - * rb_buffer_peek will get the next ring buffer if - * the current reader page is empty. - */ - event = rb_buffer_peek(buffer, cpu, NULL); - if (!event) + reader = rb_get_reader_page(cpu_buffer); + if (!reader) goto out; - /* check for data */ - if (!local_read(&cpu_buffer->reader_page->page->commit)) - goto out; + event = rb_reader_event(cpu_buffer); + + read = reader->read; + commit = rb_page_commit(reader); - read = cpu_buffer->reader_page->read; /* - * If the writer is already off of the read page, then simply - * switch the read page with the given page. Otherwise - * we need to copy the data from the reader to the writer. + * If len > what's left on the page, and the writer is also off of + * the read page, then simply switch the read page with the given + * page. Otherwise we need to copy the data from the reader to the + * writer. */ - if (cpu_buffer->reader_page == cpu_buffer->commit_page) { - unsigned int commit = rb_page_commit(cpu_buffer->reader_page); + if ((len < (commit - read)) || + cpu_buffer->reader_page == cpu_buffer->commit_page) { struct buffer_data_page *rpage = cpu_buffer->reader_page->page; + unsigned int pos = read; + unsigned int size; if (full) goto out; - /* The writer is still on the reader page, we must copy */ - memcpy(bpage->data + read, rpage->data + read, commit - read); - /* consume what was read */ - cpu_buffer->reader_page->read = commit; + if (len > (commit - read)) + len = (commit - read); + + size = rb_event_length(event); + + if (len < size) + goto out; + + /* Need to copy one event at a time */ + do { + memcpy(bpage->data + pos, rpage->data + pos, size); + + len -= size; + + rb_advance_reader(cpu_buffer); + pos = reader->read; + + event = rb_reader_event(cpu_buffer); + size = rb_event_length(event); + } while (len > size); /* update bpage */ - local_set(&bpage->commit, commit); - if (!read) - bpage->time_stamp = rpage->time_stamp; + local_set(&bpage->commit, pos); + bpage->time_stamp = rpage->time_stamp; + } else { /* swap the pages */ rb_init_page(bpage); - bpage = cpu_buffer->reader_page->page; - cpu_buffer->reader_page->page = *data_page; - local_set(&cpu_buffer->reader_page->write, 0); - cpu_buffer->reader_page->read = 0; + bpage = reader->page; + reader->page = *data_page; + local_set(&reader->write, 0); + reader->read = 0; *data_page = bpage; + + /* update the entry counter */ + rb_remove_entries(cpu_buffer, bpage, read); } ret = read; - /* update the entry counter */ - rb_remove_entries(cpu_buffer, bpage, read); out: spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); -- cgit v1.2.3 From 5e1607a00bd082972629d3d68c95c8bcf902b55a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 5 Mar 2009 10:24:48 +0100 Subject: tracing: rename ftrace_printk() => trace_printk() Impact: cleanup Use a more generic name - this also allows the prototype to move to kernel.h and be generally available to kernel developers who want to do some quick tracing. Signed-off-by: Ingo Molnar --- Documentation/ftrace.txt | 6 +++--- include/linux/ftrace.h | 18 +++++++++--------- kernel/trace/trace.c | 8 ++++---- kernel/trace/trace.h | 2 +- 4 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt index 2041ee951c1a..22614bef6359 100644 --- a/Documentation/ftrace.txt +++ b/Documentation/ftrace.txt @@ -1466,11 +1466,11 @@ want, depending on your needs. You can put some comments on specific functions by using -ftrace_printk() For example, if you want to put a comment inside +trace_printk() For example, if you want to put a comment inside the __might_sleep() function, you just have to include - and call ftrace_printk() inside __might_sleep() + and call trace_printk() inside __might_sleep() -ftrace_printk("I'm a comment!\n") +trace_printk("I'm a comment!\n") will produce: diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1f69ac7c1587..fbb9c364e166 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -329,11 +329,11 @@ extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); /** - * ftrace_printk - printf formatting in the ftrace buffer + * trace_printk - printf formatting in the ftrace buffer * @fmt: the printf format for printing * - * Note: __ftrace_printk is an internal function for ftrace_printk and - * the @ip is passed in via the ftrace_printk macro. + * Note: __trace_printk is an internal function for trace_printk and + * the @ip is passed in via the trace_printk macro. * * This function allows a kernel developer to debug fast path sections * that printk is not appropriate for. By scattering in various @@ -341,14 +341,14 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); * where problems are occurring. * * This is intended as a debugging tool for the developer only. - * Please refrain from leaving ftrace_printks scattered around in + * Please refrain from leaving trace_printks scattered around in * your code. */ -# define ftrace_printk(fmt...) __ftrace_printk(_THIS_IP_, fmt) +# define trace_printk(fmt...) __trace_printk(_THIS_IP_, fmt) extern int -__ftrace_printk(unsigned long ip, const char *fmt, ...) +__trace_printk(unsigned long ip, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); -# define ftrace_vprintk(fmt, ap) __ftrace_printk(_THIS_IP_, fmt, ap) +# define ftrace_vprintk(fmt, ap) __trace_printk(_THIS_IP_, fmt, ap) extern int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); extern void ftrace_dump(void); @@ -356,13 +356,13 @@ extern void ftrace_dump(void); static inline void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } static inline int -ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); +trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); static inline void tracing_start(void) { } static inline void tracing_stop(void) { } static inline void ftrace_off_permanent(void) { } static inline int -ftrace_printk(const char *fmt, ...) +trace_printk(const char *fmt, ...) { return 0; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d1ef43999d9e..c0e9c1263393 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -48,7 +48,7 @@ unsigned long __read_mostly tracing_thresh; * We need to change this state when a selftest is running. * A selftest will lurk into the ring-buffer to count the * entries inserted during the selftest although some concurrent - * insertions into the ring-buffer such as ftrace_printk could occurred + * insertions into the ring-buffer such as trace_printk could occurred * at the same time, giving false positive or negative results. */ static bool __read_mostly tracing_selftest_running; @@ -291,7 +291,7 @@ static const char *trace_options[] = { "block", "stacktrace", "sched-tree", - "ftrace_printk", + "trace_printk", "ftrace_preempt", "branch", "annotate", @@ -3768,7 +3768,7 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) } EXPORT_SYMBOL_GPL(trace_vprintk); -int __ftrace_printk(unsigned long ip, const char *fmt, ...) +int __trace_printk(unsigned long ip, const char *fmt, ...) { int ret; va_list ap; @@ -3781,7 +3781,7 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...) va_end(ap); return ret; } -EXPORT_SYMBOL_GPL(__ftrace_printk); +EXPORT_SYMBOL_GPL(__trace_printk); int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 12cd119cca32..8beff03fda68 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -115,7 +115,7 @@ struct userstack_entry { }; /* - * ftrace_printk entry: + * trace_printk entry: */ struct print_entry { struct trace_entry ent; -- cgit v1.2.3 From 526211bc58c4b3265352801c5a7f469af5c34711 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 5 Mar 2009 10:28:45 +0100 Subject: tracing: move utility functions from ftrace.h to kernel.h Make common utility functions such as trace_printk() and tracing_start()/tracing_stop() generally available to kernel code. Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 58 ++------------------------------------------------ include/linux/kernel.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index fbb9c364e166..5b64303ec9f2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -318,62 +318,6 @@ static inline void __ftrace_enabled_restore(int enabled) # define trace_preempt_off(a0, a1) do { } while (0) #endif -#ifdef CONFIG_TRACING -extern int ftrace_dump_on_oops; - -extern void tracing_start(void); -extern void tracing_stop(void); -extern void ftrace_off_permanent(void); - -extern void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); - -/** - * trace_printk - printf formatting in the ftrace buffer - * @fmt: the printf format for printing - * - * Note: __trace_printk is an internal function for trace_printk and - * the @ip is passed in via the trace_printk macro. - * - * This function allows a kernel developer to debug fast path sections - * that printk is not appropriate for. By scattering in various - * printk like tracing in the code, a developer can quickly see - * where problems are occurring. - * - * This is intended as a debugging tool for the developer only. - * Please refrain from leaving trace_printks scattered around in - * your code. - */ -# define trace_printk(fmt...) __trace_printk(_THIS_IP_, fmt) -extern int -__trace_printk(unsigned long ip, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -# define ftrace_vprintk(fmt, ap) __trace_printk(_THIS_IP_, fmt, ap) -extern int -__ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); -extern void ftrace_dump(void); -#else -static inline void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } -static inline int -trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); - -static inline void tracing_start(void) { } -static inline void tracing_stop(void) { } -static inline void ftrace_off_permanent(void) { } -static inline int -trace_printk(const char *fmt, ...) -{ - return 0; -} -static inline int -ftrace_vprintk(const char *fmt, va_list ap) -{ - return 0; -} -static inline void ftrace_dump(void) { } -#endif - #ifdef CONFIG_FTRACE_MCOUNT_RECORD extern void ftrace_init(void); extern void ftrace_init_module(struct module *mod, @@ -542,6 +486,8 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk) return tsk->trace & TSK_TRACE_FL_GRAPH; } +extern int ftrace_dump_on_oops; + #endif /* CONFIG_TRACING */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 7fa371898e3e..08bf5da86676 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -367,6 +367,64 @@ static inline char *pack_hex_byte(char *buf, u8 byte) ({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; }) #endif +/* + * General tracing related utility functions - trace_printk(), + * tracing_start()/tracing_stop: + */ +#ifdef CONFIG_TRACING +extern void tracing_start(void); +extern void tracing_stop(void); +extern void ftrace_off_permanent(void); + +extern void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); + +/** + * trace_printk - printf formatting in the ftrace buffer + * @fmt: the printf format for printing + * + * Note: __trace_printk is an internal function for trace_printk and + * the @ip is passed in via the trace_printk macro. + * + * This function allows a kernel developer to debug fast path sections + * that printk is not appropriate for. By scattering in various + * printk like tracing in the code, a developer can quickly see + * where problems are occurring. + * + * This is intended as a debugging tool for the developer only. + * Please refrain from leaving trace_printks scattered around in + * your code. + */ +# define trace_printk(fmt...) __trace_printk(_THIS_IP_, fmt) +extern int +__trace_printk(unsigned long ip, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +# define ftrace_vprintk(fmt, ap) __trace_printk(_THIS_IP_, fmt, ap) +extern int +__ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); +extern void ftrace_dump(void); +#else +static inline void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } +static inline int +trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); + +static inline void tracing_start(void) { } +static inline void tracing_stop(void) { } +static inline void ftrace_off_permanent(void) { } +static inline int +trace_printk(const char *fmt, ...) +{ + return 0; +} +static inline int +ftrace_vprintk(const char *fmt, va_list ap) +{ + return 0; +} +static inline void ftrace_dump(void) { } +#endif + /* * Display an IP address in readable format. */ -- cgit v1.2.3 From 0012693ad4f636c720fed3802027f9427962f540 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 5 Mar 2009 01:49:22 +0100 Subject: tracing/function-graph-tracer: use the more lightweight local clock Impact: decrease hangs risks with the graph tracer on slow systems Since the function graph tracer can spend too much time on timer interrupts, it's better now to use the more lightweight local clock. Anyway, the function graph traces are more reliable on a per cpu trace. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Peter Zijlstra LKML-Reference: <49af243d.06e9300a.53ad.ffff840c@mx.google.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 2 +- include/linux/ftrace.h | 13 +++++++------ kernel/trace/trace_functions_graph.c | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 3925ec0184b1..a85da1764b1c 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -436,7 +436,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) return; } - calltime = cpu_clock(raw_smp_processor_id()); + calltime = trace_clock_local(); if (ftrace_push_return_trace(old, calltime, self_addr, &trace.depth) == -EBUSY) { diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1f69ac7c1587..6ea62acbe4b6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1,15 +1,16 @@ #ifndef _LINUX_FTRACE_H #define _LINUX_FTRACE_H -#include -#include -#include -#include -#include -#include +#include #include +#include #include +#include +#include #include +#include +#include +#include #include diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index c009553a8e81..e527f2f66c73 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -112,7 +112,7 @@ unsigned long ftrace_return_to_handler(void) unsigned long ret; ftrace_pop_return_trace(&trace, &ret); - trace.rettime = cpu_clock(raw_smp_processor_id()); + trace.rettime = trace_clock_local(); ftrace_graph_return(&trace); if (unlikely(!ret)) { -- cgit v1.2.3 From 2002c258faaa8f89543df284fdbaa9e4b171547f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 5 Mar 2009 10:35:56 -0500 Subject: tracing: add tracing_on/tracing_off to kernel.h Impact: cleanup The functions tracing_start/tracing_stop have been moved to kernel.h. These are not the functions a developer most likely wants to use when they want to insert a place to stop tracing and restart it from user space. tracing_start/tracing_stop was created to work with things like suspend to ram, where even calling smp_processor_id() can crash the system. The tracing_start/tracing_stop was used to stop the tracer from doing anything. These are still light weight functions, but add a bit more overhead to be able to stop the tracers. They also have no interface back to userland. That is, if the kernel calls tracing_stop, userland can not start tracing. What a developer most likely wants to use is tracing_on/tracing_off. These are very light weight functions (simply sets or clears a bit). These functions just stop recording into the ring buffer. The tracers don't even know that this happens except that they would receive NULL from the ring_buffer_lock_reserve function. Also, there's a way for the user land to enable or disable this bit. In debugfs/tracing/tracing_on, a user may echo "0" (same as tracing_off()) or echo "1" (same as tracing_on()) into this file. This becomes handy when a kernel developer is debugging and wants tracing to turn off when it hits an anomaly. Then the developer can examine the trace, and restart tracing if they want to try again (echo 1 > tracing_on). This patch moves the prototypes for tracing_on/tracing_off to kernel.h and comments their use, so that a kernel developer will know how to use them. Signed-off-by: Steven Rostedt --- include/linux/kernel.h | 29 ++++++++++++++++++++++++++++- include/linux/ring_buffer.h | 15 --------------- 2 files changed, 28 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 08bf5da86676..d4614a8a034b 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -369,8 +369,35 @@ static inline char *pack_hex_byte(char *buf, u8 byte) /* * General tracing related utility functions - trace_printk(), - * tracing_start()/tracing_stop: + * tracing_on/tracing_off and tracing_start()/tracing_stop + * + * Use tracing_on/tracing_off when you want to quickly turn on or off + * tracing. It simply enables or disables the recording of the trace events. + * This also corresponds to the user space debugfs/tracing/tracing_on + * file, which gives a means for the kernel and userspace to interact. + * Place a tracing_off() in the kernel where you want tracing to end. + * From user space, examine the trace, and then echo 1 > tracing_on + * to continue tracing. + * + * tracing_stop/tracing_start has slightly more overhead. It is used + * by things like suspend to ram where disabling the recording of the + * trace is not enough, but tracing must actually stop because things + * like calling smp_processor_id() may crash the system. + * + * Most likely, you want to use tracing_on/tracing_off. */ +#ifdef CONFIG_RING_BUFFER +void tracing_on(void); +void tracing_off(void); +/* trace_off_permanent stops recording with no way to bring it back */ +void tracing_off_permanent(void); +int tracing_is_on(void); +#else +static inline void tracing_on(void) { } +static inline void tracing_off(void) { } +static inline void tracing_off_permanent(void) { } +static inline int tracing_is_on(void) { return 0; } +#endif #ifdef CONFIG_TRACING extern void tracing_start(void); extern void tracing_stop(void); diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 79fcbc4b09d6..b1a0068a5557 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -124,21 +124,6 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); size_t ring_buffer_page_len(void *page); -/* - * The below functions are fine to use outside the tracing facility. - */ -#ifdef CONFIG_RING_BUFFER -void tracing_on(void); -void tracing_off(void); -void tracing_off_permanent(void); -int tracing_is_on(void); -#else -static inline void tracing_on(void) { } -static inline void tracing_off(void) { } -static inline void tracing_off_permanent(void) { } -static inline int tracing_is_on(void) { return 0; } -#endif - void *ring_buffer_alloc_read_page(struct ring_buffer *buffer); void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page, -- cgit v1.2.3 From 0e39ac444636ff5be39b26f1cb56d79594654dda Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 6 Mar 2009 10:35:52 -0500 Subject: tracing, Text Edit Lock - Architecture Independent Code This is an architecture independant synchronization around kernel text modifications through use of a global mutex. A mutex has been chosen so that kprobes, the main user of this, can sleep during memory allocation between the memory read of the instructions it must replace and the memory write of the breakpoint. Other user of this interface: immediate values. Paravirt and alternatives are always done when SMP is inactive, so there is no need to use locks. Signed-off-by: Mathieu Desnoyers LKML-Reference: <49B142D8.7020601@redhat.com> Signed-off-by: Ingo Molnar --- include/linux/memory.h | 6 ++++++ mm/memory.c | 10 ++++++++++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memory.h b/include/linux/memory.h index 3fdc10806d31..86a6c0f0518d 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -99,4 +99,10 @@ enum mem_add_context { BOOT, HOTPLUG }; #define hotplug_memory_notifier(fn, pri) do { } while (0) #endif +/* + * Kernel text modification mutex, used for code patching. Users of this lock + * can sleep. + */ +extern struct mutex text_mutex; + #endif /* _LINUX_MEMORY_H_ */ diff --git a/mm/memory.c b/mm/memory.c index baa999e87cd2..05fab3bc5b4b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -48,6 +48,8 @@ #include #include #include +#include +#include #include #include #include @@ -99,6 +101,14 @@ int randomize_va_space __read_mostly = 2; #endif +/* + * mutex protecting text section modification (dynamic code patching). + * some users need to sleep (allocating memory...) while they hold this lock. + * + * NOT exported to modules - patching kernel text is a really delicate matter. + */ +DEFINE_MUTEX(text_mutex); + static int __init disable_randmaps(char *s) { randomize_va_space = 0; -- cgit v1.2.3 From 1427cdf0592368bdec57276edaf714040ee8744f Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 6 Mar 2009 17:21:47 +0100 Subject: tracing: infrastructure for supporting binary record Impact: save on memory for tracing Current tracers are typically using a struct(like struct ftrace_entry, struct ctx_switch_entry, struct special_entr etc...)to record a binary event. These structs can only record a their own kind of events. A new kind of tracer need a new struct and a lot of code too handle it. So we need a generic binary record for events. This infrastructure is for this purpose. [fweisbec@gmail.com: rebase against latest -tip, make it safe while sched tracing as reported by Steven Rostedt] Signed-off-by: Lai Jiangshan Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt LKML-Reference: <1236356510-8381-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 3 ++ kernel/trace/Kconfig | 6 +++ kernel/trace/Makefile | 1 + kernel/trace/trace.c | 56 ++++++++++++++++++++++++++++ kernel/trace/trace.h | 12 ++++++ kernel/trace/trace_bprintk.c | 87 ++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace_output.c | 75 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 240 insertions(+) create mode 100644 kernel/trace/trace_bprintk.c (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 498769425eb2..1c9cdca02580 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -223,6 +223,9 @@ extern int ftrace_make_nop(struct module *mod, */ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); +#ifdef CONFIG_TRACE_BPRINTK +extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args); +#endif /* May be defined in arch */ extern int ftrace_arch_read_dyn_info(char *buf, int size); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 058d949a3214..ad8d3617d0a6 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -97,6 +97,12 @@ config FUNCTION_GRAPH_TRACER This is done by setting the current return address on the current task structure into a stack of calls. +config TRACE_BPRINTK + bool "Binary printk for tracing" + default y + depends on TRACING + select BINARY_PRINTF + config IRQSOFF_TRACER bool "Interrupts-off Latency Tracer" default n diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index f44736c7574a..46557ef4c379 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_TRACING) += trace_clock.o obj-$(CONFIG_TRACING) += trace_output.o obj-$(CONFIG_TRACING) += trace_stat.o +obj-$(CONFIG_TRACE_BPRINTK) += trace_bprintk.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e6144acf2b75..ff53509e19f8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3792,6 +3792,62 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) } EXPORT_SYMBOL_GPL(__ftrace_vprintk); +/** + * trace_vbprintk - write binary msg to tracing buffer + * + * Caller must insure @fmt are valid when msg is in tracing buffer. + */ +int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) +{ + static DEFINE_SPINLOCK(trace_buf_lock); + static u32 trace_buf[TRACE_BUF_SIZE]; + + struct ring_buffer_event *event; + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + struct bprintk_entry *entry; + unsigned long flags; + int resched; + int cpu, len = 0, size, pc; + + if (tracing_disabled || !trace_bprintk_enable) + return 0; + + pc = preempt_count(); + resched = ftrace_preempt_disable(); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + + if (unlikely(atomic_read(&data->disabled))) + goto out; + + spin_lock_irqsave(&trace_buf_lock, flags); + len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); + + if (len > TRACE_BUF_SIZE || len < 0) + goto out_unlock; + + size = sizeof(*entry) + sizeof(u32) * len; + event = trace_buffer_lock_reserve(tr, TRACE_BPRINTK, size, flags, pc); + if (!event) + goto out_unlock; + entry = ring_buffer_event_data(event); + entry->ip = ip; + entry->fmt = fmt; + + memcpy(entry->buf, trace_buf, sizeof(u32) * len); + ring_buffer_unlock_commit(tr->buffer, event); + +out_unlock: + spin_unlock_irqrestore(&trace_buf_lock, flags); + +out: + ftrace_preempt_enable(resched); + + return len; +} +EXPORT_SYMBOL_GPL(trace_vbprintk); + static int trace_panic_handler(struct notifier_block *this, unsigned long event, void *unused) { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8beff03fda68..0f5077f8f957 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -20,6 +20,7 @@ enum trace_type { TRACE_WAKE, TRACE_STACK, TRACE_PRINT, + TRACE_BPRINTK, TRACE_SPECIAL, TRACE_MMIO_RW, TRACE_MMIO_MAP, @@ -124,6 +125,16 @@ struct print_entry { char buf[]; }; +struct bprintk_entry { + struct trace_entry ent; + unsigned long ip; + const char *fmt; + u32 buf[]; +}; +#ifdef CONFIG_TRACE_BPRINTK +extern int trace_bprintk_enable; +#endif + #define TRACE_OLD_SIZE 88 struct trace_field_cont { @@ -285,6 +296,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ + IF_ASSIGN(var, ent, struct bprintk_entry, TRACE_BPRINTK);\ IF_ASSIGN(var, ent, struct special_entry, 0); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ TRACE_MMIO_RW); \ diff --git a/kernel/trace/trace_bprintk.c b/kernel/trace/trace_bprintk.c new file mode 100644 index 000000000000..1f8e532c3fb9 --- /dev/null +++ b/kernel/trace/trace_bprintk.c @@ -0,0 +1,87 @@ +/* + * trace binary printk + * + * Copyright (C) 2008 Lai Jiangshan + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "trace.h" + +/* binary printk basic */ +static DEFINE_MUTEX(btrace_mutex); +static int btrace_metadata_count; + +static inline void lock_btrace(void) +{ + mutex_lock(&btrace_mutex); +} + +static inline void unlock_btrace(void) +{ + mutex_unlock(&btrace_mutex); +} + +static void get_btrace_metadata(void) +{ + lock_btrace(); + btrace_metadata_count++; + unlock_btrace(); +} + +static void put_btrace_metadata(void) +{ + lock_btrace(); + btrace_metadata_count--; + unlock_btrace(); +} + +/* events tracer */ +int trace_bprintk_enable; + +static void start_bprintk_trace(struct trace_array *tr) +{ + get_btrace_metadata(); + tracing_reset_online_cpus(tr); + trace_bprintk_enable = 1; +} + +static void stop_bprintk_trace(struct trace_array *tr) +{ + trace_bprintk_enable = 0; + tracing_reset_online_cpus(tr); + put_btrace_metadata(); +} + +static int init_bprintk_trace(struct trace_array *tr) +{ + start_bprintk_trace(tr); + return 0; +} + +static struct tracer bprintk_trace __read_mostly = +{ + .name = "events", + .init = init_bprintk_trace, + .reset = stop_bprintk_trace, + .start = start_bprintk_trace, + .stop = stop_bprintk_trace, +}; + +static __init int init_bprintk(void) +{ + return register_tracer(&bprintk_trace); +} + +device_initcall(init_bprintk); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 306fef84c503..4ab71201862e 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -53,6 +53,26 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) return len; } +static int +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) +{ + int len = (PAGE_SIZE - 1) - s->len; + int ret; + + if (!len) + return 0; + + ret = bstr_printf(s->buffer + s->len, len, fmt, binary); + + /* If we can't write it all, don't bother writing anything */ + if (ret >= len) + return 0; + + s->len += ret; + + return len; +} + /** * trace_seq_puts - trace sequence printing of simple string * @s: trace sequence descriptor @@ -855,6 +875,60 @@ static struct trace_event trace_print_event = { .raw = trace_print_raw, }; +/* TRACE_BPRINTK */ +static enum print_line_t +trace_bprintk_print(struct trace_iterator *iter, int flags) +{ + struct trace_entry *entry = iter->ent; + struct trace_seq *s = &iter->seq; + struct bprintk_entry *field; + + trace_assign_type(field, entry); + + if (!seq_print_ip_sym(s, field->ip, flags)) + goto partial; + + if (!trace_seq_puts(s, ": ")) + goto partial; + + if (!trace_seq_bprintf(s, field->fmt, field->buf)) + goto partial; + + return TRACE_TYPE_HANDLED; + + partial: + return TRACE_TYPE_PARTIAL_LINE; +} + +static enum print_line_t +trace_bprintk_raw(struct trace_iterator *iter, int flags) +{ + struct trace_entry *entry = iter->ent; + struct trace_seq *s = &iter->seq; + struct bprintk_entry *field; + + trace_assign_type(field, entry); + + if (!trace_seq_printf(s, ": %lx : ", field->ip)) + goto partial; + + if (!trace_seq_bprintf(s, field->fmt, field->buf)) + goto partial; + + return TRACE_TYPE_HANDLED; + + partial: + return TRACE_TYPE_PARTIAL_LINE; +} + +static struct trace_event trace_bprintk_event = { + .type = TRACE_BPRINTK, + .trace = trace_bprintk_print, + .raw = trace_bprintk_raw, + .hex = trace_nop_print, + .binary = trace_nop_print, +}; + static struct trace_event *events[] __initdata = { &trace_fn_event, &trace_ctx_event, @@ -863,6 +937,7 @@ static struct trace_event *events[] __initdata = { &trace_stack_event, &trace_user_stack_event, &trace_print_event, + &trace_bprintk_event, NULL }; -- cgit v1.2.3 From 1ba28e02a18cbdbea123836f6c98efb09cbf59ec Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 6 Mar 2009 17:21:48 +0100 Subject: tracing: add trace_bprintk() Impact: add a generic printk() for tracing, like trace_printk() trace_bprintk() uses the infrastructure to record events on ring_buffer. [ fweisbec@gmail.com: ported to latest -tip, made it work if !CONFIG_MODULES, never free the format strings from modules because we can't keep track of them and conditionnaly create the ftrace format strings section (reported by Steven Rostedt) ] Signed-off-by: Lai Jiangshan Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt LKML-Reference: <1236356510-8381-4-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 9 ++++ include/linux/ftrace.h | 21 ++++++++++ include/linux/module.h | 5 +++ kernel/module.c | 6 +++ kernel/trace/trace.c | 15 +++++++ kernel/trace/trace_bprintk.c | 87 ++++++++++++++++++++++++++++++++++----- 6 files changed, 133 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 0add6b28c366..48ade3168b13 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -69,6 +69,14 @@ #define FTRACE_EVENTS() #endif +#ifdef CONFIG_TRACING +#define TRACE_PRINTKS() VMLINUX_SYMBOL(__start___trace_bprintk_fmt) = .; \ + *(__trace_printk_fmt) /* Trace_printk fmt' pointer */ \ + VMLINUX_SYMBOL(__stop___trace_bprintk_fmt) = .; +#else +#define TRACE_PRINTKS() +#endif + /* .data section */ #define DATA_DATA \ *(.data) \ @@ -100,6 +108,7 @@ *(__vermagic) /* Kernel version magic */ \ *(__markers_strings) /* Markers: strings */ \ *(__tracepoints_strings)/* Tracepoints: strings */ \ + TRACE_PRINTKS() \ } \ \ .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1c9cdca02580..1cc8ca453a9b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -225,6 +225,27 @@ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); #ifdef CONFIG_TRACE_BPRINTK extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args); +extern int __trace_bprintk(unsigned long ip, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); + +static inline void ____trace_bprintk_check_format(const char *fmt, ...) + __attribute__ ((format (printf, 1, 2))); +static inline void ____trace_bprintk_check_format(const char *fmt, ...) {} +#define __trace_bprintk_check_format(fmt, args...) \ +do { \ + if (0) \ + ____trace_bprintk_check_format(fmt, ##args); \ +} while (0) + +#define trace_bprintk(fmt, args...) \ +do { \ + static char *__attribute__((section("__trace_bprintk_fmt"))) \ + trace_bprintk_fmt = fmt; \ + __trace_bprintk_check_format(fmt, ##args); \ + __trace_bprintk(_THIS_IP_, trace_bprintk_fmt, ##args); \ +} while (0) +#else +#define trace_bprintk trace_printk #endif /* May be defined in arch */ diff --git a/include/linux/module.h b/include/linux/module.h index 145a75528cc1..8cbec972d8e7 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -329,6 +329,11 @@ struct module unsigned int num_tracepoints; #endif +#ifdef CONFIG_TRACE_BPRINTK + const char **trace_bprintk_fmt_start; + unsigned int num_trace_bprintk_fmt; +#endif + #ifdef CONFIG_MODULE_UNLOAD /* What modules depend on me? */ struct list_head modules_which_use_me; diff --git a/kernel/module.c b/kernel/module.c index 22d7379709da..2dece104f9a1 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2158,6 +2158,12 @@ static noinline struct module *load_module(void __user *umod, &mod->num_tracepoints); #endif +#ifdef CONFIG_TRACE_BPRINTK + mod->trace_bprintk_fmt_start = section_objs(hdr, sechdrs, secstrings, + "__trace_bprintk_fmt", sizeof(char *), + &mod->num_trace_bprintk_fmt); +#endif + #ifdef CONFIG_MODVERSIONS if ((mod->num_syms && !mod->crcs) || (mod->num_gpl_syms && !mod->gpl_crcs) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ff53509e19f8..46b3cd7a5752 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3848,6 +3848,21 @@ out: } EXPORT_SYMBOL_GPL(trace_vbprintk); +int __trace_bprintk(unsigned long ip, const char *fmt, ...) +{ + int ret; + va_list ap; + + if (!fmt) + return 0; + + va_start(ap, fmt); + ret = trace_vbprintk(ip, fmt, ap); + va_end(ap); + return ret; +} +EXPORT_SYMBOL_GPL(__trace_bprintk); + static int trace_panic_handler(struct notifier_block *this, unsigned long event, void *unused) { diff --git a/kernel/trace/trace_bprintk.c b/kernel/trace/trace_bprintk.c index 1f8e532c3fb9..f4c245a5cd33 100644 --- a/kernel/trace/trace_bprintk.c +++ b/kernel/trace/trace_bprintk.c @@ -19,9 +19,21 @@ #include "trace.h" +#ifdef CONFIG_MODULES + /* binary printk basic */ static DEFINE_MUTEX(btrace_mutex); -static int btrace_metadata_count; +/* + * modules trace_bprintk()'s formats are autosaved in struct trace_bprintk_fmt + * which are queued on trace_bprintk_fmt_list. + */ +static LIST_HEAD(trace_bprintk_fmt_list); + +struct trace_bprintk_fmt { + struct list_head list; + char fmt[0]; +}; + static inline void lock_btrace(void) { @@ -33,26 +45,75 @@ static inline void unlock_btrace(void) mutex_unlock(&btrace_mutex); } -static void get_btrace_metadata(void) + +static inline struct trace_bprintk_fmt *lookup_format(const char *fmt) { - lock_btrace(); - btrace_metadata_count++; - unlock_btrace(); + struct trace_bprintk_fmt *pos; + list_for_each_entry(pos, &trace_bprintk_fmt_list, list) { + if (!strcmp(pos->fmt, fmt)) + return pos; + } + return NULL; } -static void put_btrace_metadata(void) +static +void hold_module_trace_bprintk_format(const char **start, const char **end) { + const char **iter; lock_btrace(); - btrace_metadata_count--; + for (iter = start; iter < end; iter++) { + struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter); + if (tb_fmt) { + *iter = tb_fmt->fmt; + continue; + } + + tb_fmt = kmalloc(offsetof(struct trace_bprintk_fmt, fmt) + + strlen(*iter) + 1, GFP_KERNEL); + if (tb_fmt) { + list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list); + strcpy(tb_fmt->fmt, *iter); + *iter = tb_fmt->fmt; + } else + *iter = NULL; + } unlock_btrace(); } +static int module_trace_bprintk_format_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct module *mod = data; + if (mod->num_trace_bprintk_fmt) { + const char **start = mod->trace_bprintk_fmt_start; + const char **end = start + mod->num_trace_bprintk_fmt; + + if (val == MODULE_STATE_COMING) + hold_module_trace_bprintk_format(start, end); + } + return 0; +} + +#else /* !CONFIG_MODULES */ +__init static int +module_trace_bprintk_format_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return 0; +} +#endif /* CONFIG_MODULES */ + + +__initdata_or_module static +struct notifier_block module_trace_bprintk_format_nb = { + .notifier_call = module_trace_bprintk_format_notify, +}; + /* events tracer */ int trace_bprintk_enable; static void start_bprintk_trace(struct trace_array *tr) { - get_btrace_metadata(); tracing_reset_online_cpus(tr); trace_bprintk_enable = 1; } @@ -61,7 +122,6 @@ static void stop_bprintk_trace(struct trace_array *tr) { trace_bprintk_enable = 0; tracing_reset_online_cpus(tr); - put_btrace_metadata(); } static int init_bprintk_trace(struct trace_array *tr) @@ -81,7 +141,14 @@ static struct tracer bprintk_trace __read_mostly = static __init int init_bprintk(void) { - return register_tracer(&bprintk_trace); + int ret = register_module_notifier(&module_trace_bprintk_format_nb); + if (ret) + return ret; + + ret = register_tracer(&bprintk_trace); + if (ret) + unregister_module_notifier(&module_trace_bprintk_format_nb); + return ret; } device_initcall(init_bprintk); -- cgit v1.2.3 From 769b0441f438c4bb4872cb8560eb6fe51bcc09ee Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 6 Mar 2009 17:21:49 +0100 Subject: tracing/core: drop the old trace_printk() implementation in favour of trace_bprintk() Impact: faster and lighter tracing Now that we have trace_bprintk() which is faster and consume lesser memory than trace_printk() and has the same purpose, we can now drop the old implementation in favour of the binary one from trace_bprintk(), which means we move all the implementation of trace_bprintk() to trace_printk(), so the Api doesn't change except that we must now use trace_seq_bprintk() to print the TRACE_PRINT entries. Some changes result of this: - Previously, trace_bprintk depended of a single tracer and couldn't work without. This tracer has been dropped and the whole implementation of trace_printk() (like the module formats management) is now integrated in the tracing core (comes with CONFIG_TRACING), though we keep the file trace_printk (previously trace_bprintk.c) where we can find the module management. Thus we don't overflow trace.c - changes some parts to use trace_seq_bprintk() to print TRACE_PRINT entries. - change a bit trace_printk/trace_vprintk macros to support non-builtin formats constants, and fix 'const' qualifiers warnings. But this is all transparent for developers. - etc... V2: - Rebase against last changes - Fix mispell on the changelog V3: - Rebase against last changes (moving trace_printk() to kernel.h) Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt LKML-Reference: <1236356510-8381-5-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 25 ----- include/linux/kernel.h | 34 +++++- include/linux/module.h | 2 +- kernel/trace/Kconfig | 7 +- kernel/trace/Makefile | 2 +- kernel/trace/trace.c | 212 ++++++++++------------------------- kernel/trace/trace.h | 14 +-- kernel/trace/trace_bprintk.c | 154 ------------------------- kernel/trace/trace_functions_graph.c | 6 +- kernel/trace/trace_mmiotrace.c | 9 +- kernel/trace/trace_output.c | 70 ++---------- kernel/trace/trace_output.h | 2 + kernel/trace/trace_printk.c | 138 +++++++++++++++++++++++ 13 files changed, 262 insertions(+), 413 deletions(-) delete mode 100644 kernel/trace/trace_bprintk.c create mode 100644 kernel/trace/trace_printk.c (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1cc8ca453a9b..e1583f2639b0 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -223,31 +223,6 @@ extern int ftrace_make_nop(struct module *mod, */ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); -#ifdef CONFIG_TRACE_BPRINTK -extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args); -extern int __trace_bprintk(unsigned long ip, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); - -static inline void ____trace_bprintk_check_format(const char *fmt, ...) - __attribute__ ((format (printf, 1, 2))); -static inline void ____trace_bprintk_check_format(const char *fmt, ...) {} -#define __trace_bprintk_check_format(fmt, args...) \ -do { \ - if (0) \ - ____trace_bprintk_check_format(fmt, ##args); \ -} while (0) - -#define trace_bprintk(fmt, args...) \ -do { \ - static char *__attribute__((section("__trace_bprintk_fmt"))) \ - trace_bprintk_fmt = fmt; \ - __trace_bprintk_check_format(fmt, ##args); \ - __trace_bprintk(_THIS_IP_, trace_bprintk_fmt, ##args); \ -} while (0) -#else -#define trace_bprintk trace_printk -#endif - /* May be defined in arch */ extern int ftrace_arch_read_dyn_info(char *buf, int size); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 7aef15c4645e..4e726b9a71ec 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -423,6 +423,16 @@ extern void ftrace_off_permanent(void); extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); +static inline void __attribute__ ((format (printf, 1, 2))) +____trace_printk_check_format(const char *fmt, ...) +{ +} +#define __trace_printk_check_format(fmt, args...) \ +do { \ + if (0) \ + ____trace_printk_check_format(fmt, ##args); \ +} while (0) + /** * trace_printk - printf formatting in the ftrace buffer * @fmt: the printf format for printing @@ -439,13 +449,31 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); * Please refrain from leaving trace_printks scattered around in * your code. */ -# define trace_printk(fmt...) __trace_printk(_THIS_IP_, fmt) + +#define trace_printk(fmt, args...) \ +do { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))); \ + trace_printk_fmt = fmt; \ + __trace_printk_check_format(fmt, ##args); \ + __trace_printk(_THIS_IP_, trace_printk_fmt, ##args); \ +} while (0) + extern int __trace_printk(unsigned long ip, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); -# define ftrace_vprintk(fmt, ap) __trace_printk(_THIS_IP_, fmt, ap) + +#define ftrace_vprintk(fmt, vargs) \ +do { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))); \ + trace_printk_fmt = fmt; \ + __ftrace_vprintk(_THIS_IP_, trace_printk_fmt, vargs); \ +} while (0) + extern int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); + extern void ftrace_dump(void); #else static inline void @@ -467,7 +495,7 @@ ftrace_vprintk(const char *fmt, va_list ap) return 0; } static inline void ftrace_dump(void) { } -#endif +#endif /* CONFIG_TRACING */ /* * Display an IP address in readable format. diff --git a/include/linux/module.h b/include/linux/module.h index 8cbec972d8e7..22d9878e868c 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -329,7 +329,7 @@ struct module unsigned int num_tracepoints; #endif -#ifdef CONFIG_TRACE_BPRINTK +#ifdef CONFIG_TRACING const char **trace_bprintk_fmt_start; unsigned int num_trace_bprintk_fmt; #endif diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index ad8d3617d0a6..8e4a2a61cd75 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -52,6 +52,7 @@ config TRACING select STACKTRACE if STACKTRACE_SUPPORT select TRACEPOINTS select NOP_TRACER + select BINARY_PRINTF # # Minimum requirements an architecture has to meet for us to @@ -97,12 +98,6 @@ config FUNCTION_GRAPH_TRACER This is done by setting the current return address on the current task structure into a stack of calls. -config TRACE_BPRINTK - bool "Binary printk for tracing" - default y - depends on TRACING - select BINARY_PRINTF - config IRQSOFF_TRACER bool "Interrupts-off Latency Tracer" default n diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 46557ef4c379..c7a2943796eb 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -22,7 +22,7 @@ obj-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_TRACING) += trace_clock.o obj-$(CONFIG_TRACING) += trace_output.o obj-$(CONFIG_TRACING) += trace_stat.o -obj-$(CONFIG_TRACE_BPRINTK) += trace_bprintk.o +obj-$(CONFIG_TRACING) += trace_printk.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 46b3cd7a5752..cc94f8642485 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1169,6 +1169,67 @@ void trace_graph_return(struct ftrace_graph_ret *trace) } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +/** + * trace_vprintk - write binary msg to tracing buffer + * + */ +int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) +{ + static DEFINE_SPINLOCK(trace_buf_lock); + static u32 trace_buf[TRACE_BUF_SIZE]; + + struct ring_buffer_event *event; + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + struct print_entry *entry; + unsigned long flags; + int resched; + int cpu, len = 0, size, pc; + + if (unlikely(tracing_selftest_running || tracing_disabled)) + return 0; + + /* Don't pollute graph traces with trace_vprintk internals */ + pause_graph_tracing(); + + pc = preempt_count(); + resched = ftrace_preempt_disable(); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + + if (unlikely(atomic_read(&data->disabled))) + goto out; + + spin_lock_irqsave(&trace_buf_lock, flags); + len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); + + if (len > TRACE_BUF_SIZE || len < 0) + goto out_unlock; + + size = sizeof(*entry) + sizeof(u32) * len; + event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, flags, pc); + if (!event) + goto out_unlock; + entry = ring_buffer_event_data(event); + entry->ip = ip; + entry->depth = depth; + entry->fmt = fmt; + + memcpy(entry->buf, trace_buf, sizeof(u32) * len); + ring_buffer_unlock_commit(tr->buffer, event); + +out_unlock: + spin_unlock_irqrestore(&trace_buf_lock, flags); + +out: + ftrace_preempt_enable(resched); + unpause_graph_tracing(); + + return len; +} +EXPORT_SYMBOL_GPL(trace_vprintk); + enum trace_file_type { TRACE_FILE_LAT_FMT = 1, TRACE_FILE_ANNOTATE = 2, @@ -1564,7 +1625,7 @@ static enum print_line_t print_printk_msg_only(struct trace_iterator *iter) trace_assign_type(field, entry); - ret = trace_seq_printf(s, "%s", field->buf); + ret = trace_seq_bprintf(s, field->fmt, field->buf); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -3714,155 +3775,6 @@ static __init int tracer_init_debugfs(void) return 0; } -int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) -{ - static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; - static char trace_buf[TRACE_BUF_SIZE]; - - struct ring_buffer_event *event; - struct trace_array *tr = &global_trace; - struct trace_array_cpu *data; - int cpu, len = 0, size, pc; - struct print_entry *entry; - unsigned long irq_flags; - - if (tracing_disabled || tracing_selftest_running) - return 0; - - pc = preempt_count(); - preempt_disable_notrace(); - cpu = raw_smp_processor_id(); - data = tr->data[cpu]; - - if (unlikely(atomic_read(&data->disabled))) - goto out; - - pause_graph_tracing(); - raw_local_irq_save(irq_flags); - __raw_spin_lock(&trace_buf_lock); - len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); - - len = min(len, TRACE_BUF_SIZE-1); - trace_buf[len] = 0; - - size = sizeof(*entry) + len + 1; - event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc); - if (!event) - goto out_unlock; - entry = ring_buffer_event_data(event); - entry->ip = ip; - entry->depth = depth; - - memcpy(&entry->buf, trace_buf, len); - entry->buf[len] = 0; - ring_buffer_unlock_commit(tr->buffer, event); - - out_unlock: - __raw_spin_unlock(&trace_buf_lock); - raw_local_irq_restore(irq_flags); - unpause_graph_tracing(); - out: - preempt_enable_notrace(); - - return len; -} -EXPORT_SYMBOL_GPL(trace_vprintk); - -int __trace_printk(unsigned long ip, const char *fmt, ...) -{ - int ret; - va_list ap; - - if (!(trace_flags & TRACE_ITER_PRINTK)) - return 0; - - va_start(ap, fmt); - ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); - va_end(ap); - return ret; -} -EXPORT_SYMBOL_GPL(__trace_printk); - -int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) -{ - if (!(trace_flags & TRACE_ITER_PRINTK)) - return 0; - - return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); -} -EXPORT_SYMBOL_GPL(__ftrace_vprintk); - -/** - * trace_vbprintk - write binary msg to tracing buffer - * - * Caller must insure @fmt are valid when msg is in tracing buffer. - */ -int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) -{ - static DEFINE_SPINLOCK(trace_buf_lock); - static u32 trace_buf[TRACE_BUF_SIZE]; - - struct ring_buffer_event *event; - struct trace_array *tr = &global_trace; - struct trace_array_cpu *data; - struct bprintk_entry *entry; - unsigned long flags; - int resched; - int cpu, len = 0, size, pc; - - if (tracing_disabled || !trace_bprintk_enable) - return 0; - - pc = preempt_count(); - resched = ftrace_preempt_disable(); - cpu = raw_smp_processor_id(); - data = tr->data[cpu]; - - if (unlikely(atomic_read(&data->disabled))) - goto out; - - spin_lock_irqsave(&trace_buf_lock, flags); - len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); - - if (len > TRACE_BUF_SIZE || len < 0) - goto out_unlock; - - size = sizeof(*entry) + sizeof(u32) * len; - event = trace_buffer_lock_reserve(tr, TRACE_BPRINTK, size, flags, pc); - if (!event) - goto out_unlock; - entry = ring_buffer_event_data(event); - entry->ip = ip; - entry->fmt = fmt; - - memcpy(entry->buf, trace_buf, sizeof(u32) * len); - ring_buffer_unlock_commit(tr->buffer, event); - -out_unlock: - spin_unlock_irqrestore(&trace_buf_lock, flags); - -out: - ftrace_preempt_enable(resched); - - return len; -} -EXPORT_SYMBOL_GPL(trace_vbprintk); - -int __trace_bprintk(unsigned long ip, const char *fmt, ...) -{ - int ret; - va_list ap; - - if (!fmt) - return 0; - - va_start(ap, fmt); - ret = trace_vbprintk(ip, fmt, ap); - va_end(ap); - return ret; -} -EXPORT_SYMBOL_GPL(__trace_bprintk); - static int trace_panic_handler(struct notifier_block *this, unsigned long event, void *unused) { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 0f5077f8f957..6140922392c8 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -20,7 +20,6 @@ enum trace_type { TRACE_WAKE, TRACE_STACK, TRACE_PRINT, - TRACE_BPRINTK, TRACE_SPECIAL, TRACE_MMIO_RW, TRACE_MMIO_MAP, @@ -120,16 +119,10 @@ struct userstack_entry { */ struct print_entry { struct trace_entry ent; - unsigned long ip; + unsigned long ip; int depth; - char buf[]; -}; - -struct bprintk_entry { - struct trace_entry ent; - unsigned long ip; - const char *fmt; - u32 buf[]; + const char *fmt; + u32 buf[]; }; #ifdef CONFIG_TRACE_BPRINTK extern int trace_bprintk_enable; @@ -296,7 +289,6 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ - IF_ASSIGN(var, ent, struct bprintk_entry, TRACE_BPRINTK);\ IF_ASSIGN(var, ent, struct special_entry, 0); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ TRACE_MMIO_RW); \ diff --git a/kernel/trace/trace_bprintk.c b/kernel/trace/trace_bprintk.c deleted file mode 100644 index f4c245a5cd33..000000000000 --- a/kernel/trace/trace_bprintk.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * trace binary printk - * - * Copyright (C) 2008 Lai Jiangshan - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "trace.h" - -#ifdef CONFIG_MODULES - -/* binary printk basic */ -static DEFINE_MUTEX(btrace_mutex); -/* - * modules trace_bprintk()'s formats are autosaved in struct trace_bprintk_fmt - * which are queued on trace_bprintk_fmt_list. - */ -static LIST_HEAD(trace_bprintk_fmt_list); - -struct trace_bprintk_fmt { - struct list_head list; - char fmt[0]; -}; - - -static inline void lock_btrace(void) -{ - mutex_lock(&btrace_mutex); -} - -static inline void unlock_btrace(void) -{ - mutex_unlock(&btrace_mutex); -} - - -static inline struct trace_bprintk_fmt *lookup_format(const char *fmt) -{ - struct trace_bprintk_fmt *pos; - list_for_each_entry(pos, &trace_bprintk_fmt_list, list) { - if (!strcmp(pos->fmt, fmt)) - return pos; - } - return NULL; -} - -static -void hold_module_trace_bprintk_format(const char **start, const char **end) -{ - const char **iter; - lock_btrace(); - for (iter = start; iter < end; iter++) { - struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter); - if (tb_fmt) { - *iter = tb_fmt->fmt; - continue; - } - - tb_fmt = kmalloc(offsetof(struct trace_bprintk_fmt, fmt) - + strlen(*iter) + 1, GFP_KERNEL); - if (tb_fmt) { - list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list); - strcpy(tb_fmt->fmt, *iter); - *iter = tb_fmt->fmt; - } else - *iter = NULL; - } - unlock_btrace(); -} - -static int module_trace_bprintk_format_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - struct module *mod = data; - if (mod->num_trace_bprintk_fmt) { - const char **start = mod->trace_bprintk_fmt_start; - const char **end = start + mod->num_trace_bprintk_fmt; - - if (val == MODULE_STATE_COMING) - hold_module_trace_bprintk_format(start, end); - } - return 0; -} - -#else /* !CONFIG_MODULES */ -__init static int -module_trace_bprintk_format_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - return 0; -} -#endif /* CONFIG_MODULES */ - - -__initdata_or_module static -struct notifier_block module_trace_bprintk_format_nb = { - .notifier_call = module_trace_bprintk_format_notify, -}; - -/* events tracer */ -int trace_bprintk_enable; - -static void start_bprintk_trace(struct trace_array *tr) -{ - tracing_reset_online_cpus(tr); - trace_bprintk_enable = 1; -} - -static void stop_bprintk_trace(struct trace_array *tr) -{ - trace_bprintk_enable = 0; - tracing_reset_online_cpus(tr); -} - -static int init_bprintk_trace(struct trace_array *tr) -{ - start_bprintk_trace(tr); - return 0; -} - -static struct tracer bprintk_trace __read_mostly = -{ - .name = "events", - .init = init_bprintk_trace, - .reset = stop_bprintk_trace, - .start = start_bprintk_trace, - .stop = stop_bprintk_trace, -}; - -static __init int init_bprintk(void) -{ - int ret = register_module_notifier(&module_trace_bprintk_format_nb); - if (ret) - return ret; - - ret = register_tracer(&bprintk_trace); - if (ret) - unregister_module_notifier(&module_trace_bprintk_format_nb); - return ret; -} - -device_initcall(init_bprintk); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index e527f2f66c73..453ebd3b636e 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -742,7 +742,11 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s, } /* The comment */ - ret = trace_seq_printf(s, "/* %s", trace->buf); + ret = trace_seq_printf(s, "/* "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_bprintf(s, trace->fmt, trace->buf); if (!ret) return TRACE_TYPE_PARTIAL_LINE; diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index c401b908e805..23e346a734ca 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -254,15 +254,18 @@ static enum print_line_t mmio_print_mark(struct trace_iterator *iter) { struct trace_entry *entry = iter->ent; struct print_entry *print = (struct print_entry *)entry; - const char *msg = print->buf; struct trace_seq *s = &iter->seq; unsigned long long t = ns2usecs(iter->ts); - unsigned long usec_rem = do_div(t, 1000000ULL); + unsigned long usec_rem = do_div(t, USEC_PER_SEC); unsigned secs = (unsigned long)t; int ret; /* The trailing newline must be in the message. */ - ret = trace_seq_printf(s, "MARK %u.%06lu %s", secs, usec_rem, msg); + ret = trace_seq_printf(s, "MARK %u.%06lu ", secs, usec_rem); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_bprintf(s, print->fmt, print->buf); if (!ret) return TRACE_TYPE_PARTIAL_LINE; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 4ab71201862e..ef8fd661b217 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -53,8 +53,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) return len; } -static int -trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) +int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) { int len = (PAGE_SIZE - 1) - s->len; int ret; @@ -834,54 +833,12 @@ static struct trace_event trace_user_stack_event = { }; /* TRACE_PRINT */ -static enum print_line_t trace_print_print(struct trace_iterator *iter, - int flags) -{ - struct print_entry *field; - struct trace_seq *s = &iter->seq; - - trace_assign_type(field, iter->ent); - - if (!seq_print_ip_sym(s, field->ip, flags)) - goto partial; - - if (!trace_seq_printf(s, ": %s", field->buf)) - goto partial; - - return TRACE_TYPE_HANDLED; - - partial: - return TRACE_TYPE_PARTIAL_LINE; -} - -static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags) -{ - struct print_entry *field; - - trace_assign_type(field, iter->ent); - - if (!trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf)) - goto partial; - - return TRACE_TYPE_HANDLED; - - partial: - return TRACE_TYPE_PARTIAL_LINE; -} - -static struct trace_event trace_print_event = { - .type = TRACE_PRINT, - .trace = trace_print_print, - .raw = trace_print_raw, -}; - -/* TRACE_BPRINTK */ static enum print_line_t -trace_bprintk_print(struct trace_iterator *iter, int flags) +trace_print_print(struct trace_iterator *iter, int flags) { struct trace_entry *entry = iter->ent; struct trace_seq *s = &iter->seq; - struct bprintk_entry *field; + struct print_entry *field; trace_assign_type(field, entry); @@ -900,14 +857,13 @@ trace_bprintk_print(struct trace_iterator *iter, int flags) return TRACE_TYPE_PARTIAL_LINE; } -static enum print_line_t -trace_bprintk_raw(struct trace_iterator *iter, int flags) + +static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags) { - struct trace_entry *entry = iter->ent; + struct print_entry *field; struct trace_seq *s = &iter->seq; - struct bprintk_entry *field; - trace_assign_type(field, entry); + trace_assign_type(field, iter->ent); if (!trace_seq_printf(s, ": %lx : ", field->ip)) goto partial; @@ -921,12 +877,11 @@ trace_bprintk_raw(struct trace_iterator *iter, int flags) return TRACE_TYPE_PARTIAL_LINE; } -static struct trace_event trace_bprintk_event = { - .type = TRACE_BPRINTK, - .trace = trace_bprintk_print, - .raw = trace_bprintk_raw, - .hex = trace_nop_print, - .binary = trace_nop_print, + +static struct trace_event trace_print_event = { + .type = TRACE_PRINT, + .trace = trace_print_print, + .raw = trace_print_raw, }; static struct trace_event *events[] __initdata = { @@ -937,7 +892,6 @@ static struct trace_event *events[] __initdata = { &trace_stack_event, &trace_user_stack_event, &trace_print_event, - &trace_bprintk_event, NULL }; diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 8a34d688ed63..3b90e6ade1aa 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -18,6 +18,8 @@ struct trace_event { extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); extern int +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary); +extern int seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags); extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c new file mode 100644 index 000000000000..a50aea22e929 --- /dev/null +++ b/kernel/trace/trace_printk.c @@ -0,0 +1,138 @@ +/* + * trace binary printk + * + * Copyright (C) 2008 Lai Jiangshan + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "trace.h" + +#ifdef CONFIG_MODULES + +/* + * modules trace_printk()'s formats are autosaved in struct trace_bprintk_fmt + * which are queued on trace_bprintk_fmt_list. + */ +static LIST_HEAD(trace_bprintk_fmt_list); + +/* serialize accesses to trace_bprintk_fmt_list */ +static DEFINE_MUTEX(btrace_mutex); + +struct trace_bprintk_fmt { + struct list_head list; + char fmt[0]; +}; + +static inline struct trace_bprintk_fmt *lookup_format(const char *fmt) +{ + struct trace_bprintk_fmt *pos; + list_for_each_entry(pos, &trace_bprintk_fmt_list, list) { + if (!strcmp(pos->fmt, fmt)) + return pos; + } + return NULL; +} + +static +void hold_module_trace_bprintk_format(const char **start, const char **end) +{ + const char **iter; + + mutex_lock(&btrace_mutex); + for (iter = start; iter < end; iter++) { + struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter); + if (tb_fmt) { + *iter = tb_fmt->fmt; + continue; + } + + tb_fmt = kmalloc(offsetof(struct trace_bprintk_fmt, fmt) + + strlen(*iter) + 1, GFP_KERNEL); + if (tb_fmt) { + list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list); + strcpy(tb_fmt->fmt, *iter); + *iter = tb_fmt->fmt; + } else + *iter = NULL; + } + mutex_unlock(&btrace_mutex); +} + +static int module_trace_bprintk_format_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct module *mod = data; + if (mod->num_trace_bprintk_fmt) { + const char **start = mod->trace_bprintk_fmt_start; + const char **end = start + mod->num_trace_bprintk_fmt; + + if (val == MODULE_STATE_COMING) + hold_module_trace_bprintk_format(start, end); + } + return 0; +} + +#else /* !CONFIG_MODULES */ +__init static int +module_trace_bprintk_format_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return 0; +} +#endif /* CONFIG_MODULES */ + + +__initdata_or_module static +struct notifier_block module_trace_bprintk_format_nb = { + .notifier_call = module_trace_bprintk_format_notify, +}; + +int __trace_printk(unsigned long ip, const char *fmt, ...) + { + int ret; + va_list ap; + + if (unlikely(!fmt)) + return 0; + + if (!(trace_flags & TRACE_ITER_PRINTK)) + return 0; + + va_start(ap, fmt); + ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); + va_end(ap); + return ret; +} +EXPORT_SYMBOL_GPL(__trace_printk); + +int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) + { + if (unlikely(!fmt)) + return 0; + + if (!(trace_flags & TRACE_ITER_PRINTK)) + return 0; + + return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); +} +EXPORT_SYMBOL_GPL(__ftrace_vprintk); + + +static __init int init_trace_printk(void) +{ + return register_module_notifier(&module_trace_bprintk_format_nb); +} + +early_initcall(init_trace_printk); -- cgit v1.2.3 From 7bffc23e56e92c14b787bf4d95249a32085bfed5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 9 Mar 2009 10:11:36 +0100 Subject: tracing: optimize trace_printk() Impact: micro-optimization trace_printk() does this unconditionally: trace_printk_fmt = fmt; Where trace_printk_fmt is an entry into a global array. This is very SMP-unfriendly. So only write it once per bootup. Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Peter Zijlstra LKML-Reference: <1236356510-8381-5-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 4e726b9a71ec..7742798c9208 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -454,7 +454,10 @@ do { \ do { \ static const char *trace_printk_fmt \ __attribute__((section("__trace_printk_fmt"))); \ - trace_printk_fmt = fmt; \ + \ + if (!trace_printk_fmt) \ + trace_printk_fmt = fmt; \ + \ __trace_printk_check_format(fmt, ##args); \ __trace_printk(_THIS_IP_, trace_printk_fmt, ##args); \ } while (0) @@ -467,7 +470,10 @@ __trace_printk(unsigned long ip, const char *fmt, ...) do { \ static const char *trace_printk_fmt \ __attribute__((section("__trace_printk_fmt"))); \ - trace_printk_fmt = fmt; \ + \ + if (!trace_printk_fmt) \ + trace_printk_fmt = fmt; \ + \ __ftrace_vprintk(_THIS_IP_, trace_printk_fmt, vargs); \ } while (0) -- cgit v1.2.3 From 2939b0469d04ba9ac791aca9a81625d7eb50662b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 9 Mar 2009 15:47:18 -0400 Subject: tracing: replace TP with TP_ Impact: clean up The macros TPPROTO, TPARGS, TPFMT, TPRAWFMT, and TPCMD all look a bit ugly. This patch adds an underscore to their names. Signed-off-by: Steven Rostedt --- Documentation/tracepoints.txt | 8 +-- include/linux/tracepoint.h | 6 +-- include/trace/block.h | 70 ++++++++++++------------ include/trace/irq_event_types.h | 16 +++--- include/trace/lockdep_event_types.h | 24 ++++----- include/trace/power.h | 12 ++--- include/trace/sched_event_types.h | 98 +++++++++++++++++----------------- include/trace/workqueue.h | 16 +++--- kernel/trace/trace_event_types.h | 28 +++++----- kernel/trace/trace_events_stage_2.h | 6 +-- kernel/trace/trace_events_stage_3.h | 8 +-- kernel/trace/trace_export.c | 8 +-- kernel/trace/trace_format.h | 55 ------------------- samples/tracepoints/tp-samples-trace.h | 8 +-- 14 files changed, 154 insertions(+), 209 deletions(-) delete mode 100644 kernel/trace/trace_format.h (limited to 'include/linux') diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt index 6f0a044f5b5e..4ff43c6de299 100644 --- a/Documentation/tracepoints.txt +++ b/Documentation/tracepoints.txt @@ -45,8 +45,8 @@ In include/trace/subsys.h : #include DECLARE_TRACE(subsys_eventname, - TPPROTO(int firstarg, struct task_struct *p), - TPARGS(firstarg, p)); + TP_PROTO(int firstarg, struct task_struct *p), + TP_ARGS(firstarg, p)); In subsys/file.c (where the tracing statement must be added) : @@ -66,10 +66,10 @@ Where : - subsys is the name of your subsystem. - eventname is the name of the event to trace. -- TPPROTO(int firstarg, struct task_struct *p) is the prototype of the +- TP_PROTO(int firstarg, struct task_struct *p) is the prototype of the function called by this tracepoint. -- TPARGS(firstarg, p) are the parameters names, same as found in the +- TP_ARGS(firstarg, p) are the parameters names, same as found in the prototype. Connecting a function (probe) to a tracepoint is done by providing a diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 152b2f03fb86..3bcc3e171443 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -31,8 +31,8 @@ struct tracepoint { * Keep in sync with vmlinux.lds.h. */ -#define TPPROTO(args...) args -#define TPARGS(args...) args +#define TP_PROTO(args...) args +#define TP_ARGS(args...) args #ifdef CONFIG_TRACEPOINTS @@ -65,7 +65,7 @@ struct tracepoint { { \ if (unlikely(__tracepoint_##name.state)) \ __DO_TRACE(&__tracepoint_##name, \ - TPPROTO(proto), TPARGS(args)); \ + TP_PROTO(proto), TP_ARGS(args)); \ } \ static inline int register_trace_##name(void (*probe)(proto)) \ { \ diff --git a/include/trace/block.h b/include/trace/block.h index 25c6a1fd5b77..25b7068b819e 100644 --- a/include/trace/block.h +++ b/include/trace/block.h @@ -5,72 +5,72 @@ #include DECLARE_TRACE(block_rq_abort, - TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); + TP_PROTO(struct request_queue *q, struct request *rq), + TP_ARGS(q, rq)); DECLARE_TRACE(block_rq_insert, - TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); + TP_PROTO(struct request_queue *q, struct request *rq), + TP_ARGS(q, rq)); DECLARE_TRACE(block_rq_issue, - TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); + TP_PROTO(struct request_queue *q, struct request *rq), + TP_ARGS(q, rq)); DECLARE_TRACE(block_rq_requeue, - TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); + TP_PROTO(struct request_queue *q, struct request *rq), + TP_ARGS(q, rq)); DECLARE_TRACE(block_rq_complete, - TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); + TP_PROTO(struct request_queue *q, struct request *rq), + TP_ARGS(q, rq)); DECLARE_TRACE(block_bio_bounce, - TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); + TP_PROTO(struct request_queue *q, struct bio *bio), + TP_ARGS(q, bio)); DECLARE_TRACE(block_bio_complete, - TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); + TP_PROTO(struct request_queue *q, struct bio *bio), + TP_ARGS(q, bio)); DECLARE_TRACE(block_bio_backmerge, - TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); + TP_PROTO(struct request_queue *q, struct bio *bio), + TP_ARGS(q, bio)); DECLARE_TRACE(block_bio_frontmerge, - TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); + TP_PROTO(struct request_queue *q, struct bio *bio), + TP_ARGS(q, bio)); DECLARE_TRACE(block_bio_queue, - TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); + TP_PROTO(struct request_queue *q, struct bio *bio), + TP_ARGS(q, bio)); DECLARE_TRACE(block_getrq, - TPPROTO(struct request_queue *q, struct bio *bio, int rw), - TPARGS(q, bio, rw)); + TP_PROTO(struct request_queue *q, struct bio *bio, int rw), + TP_ARGS(q, bio, rw)); DECLARE_TRACE(block_sleeprq, - TPPROTO(struct request_queue *q, struct bio *bio, int rw), - TPARGS(q, bio, rw)); + TP_PROTO(struct request_queue *q, struct bio *bio, int rw), + TP_ARGS(q, bio, rw)); DECLARE_TRACE(block_plug, - TPPROTO(struct request_queue *q), - TPARGS(q)); + TP_PROTO(struct request_queue *q), + TP_ARGS(q)); DECLARE_TRACE(block_unplug_timer, - TPPROTO(struct request_queue *q), - TPARGS(q)); + TP_PROTO(struct request_queue *q), + TP_ARGS(q)); DECLARE_TRACE(block_unplug_io, - TPPROTO(struct request_queue *q), - TPARGS(q)); + TP_PROTO(struct request_queue *q), + TP_ARGS(q)); DECLARE_TRACE(block_split, - TPPROTO(struct request_queue *q, struct bio *bio, unsigned int pdu), - TPARGS(q, bio, pdu)); + TP_PROTO(struct request_queue *q, struct bio *bio, unsigned int pdu), + TP_ARGS(q, bio, pdu)); DECLARE_TRACE(block_remap, - TPPROTO(struct request_queue *q, struct bio *bio, dev_t dev, - sector_t from, sector_t to), - TPARGS(q, bio, dev, from, to)); + TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, + sector_t from, sector_t to), + TP_ARGS(q, bio, dev, from, to)); #endif diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h index 65850bc5ea06..0147d9eef5f4 100644 --- a/include/trace/irq_event_types.h +++ b/include/trace/irq_event_types.h @@ -9,25 +9,25 @@ #define TRACE_SYSTEM irq TRACE_EVENT_FORMAT(irq_handler_entry, - TPPROTO(int irq, struct irqaction *action), - TPARGS(irq, action), - TPFMT("irq=%d handler=%s", irq, action->name), + TP_PROTO(int irq, struct irqaction *action), + TP_ARGS(irq, action), + TP_FMT("irq=%d handler=%s", irq, action->name), TRACE_STRUCT( TRACE_FIELD(int, irq, irq) ), - TPRAWFMT("irq %d") + TP_RAW_FMT("irq %d") ); TRACE_EVENT_FORMAT(irq_handler_exit, - TPPROTO(int irq, struct irqaction *action, int ret), - TPARGS(irq, action, ret), - TPFMT("irq=%d handler=%s return=%s", + TP_PROTO(int irq, struct irqaction *action, int ret), + TP_ARGS(irq, action, ret), + TP_FMT("irq=%d handler=%s return=%s", irq, action->name, ret ? "handled" : "unhandled"), TRACE_STRUCT( TRACE_FIELD(int, irq, irq) TRACE_FIELD(int, ret, ret) ), - TPRAWFMT("irq %d ret %d") + TP_RAW_FMT("irq %d ret %d") ); #undef TRACE_SYSTEM diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h index f713d74a82b4..1f00e8b3543e 100644 --- a/include/trace/lockdep_event_types.h +++ b/include/trace/lockdep_event_types.h @@ -10,32 +10,32 @@ #ifdef CONFIG_LOCKDEP TRACE_FORMAT(lock_acquire, - TPPROTO(struct lockdep_map *lock, unsigned int subclass, + TP_PROTO(struct lockdep_map *lock, unsigned int subclass, int trylock, int read, int check, struct lockdep_map *next_lock, unsigned long ip), - TPARGS(lock, subclass, trylock, read, check, next_lock, ip), - TPFMT("%s%s%s", trylock ? "try " : "", + TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), + TP_FMT("%s%s%s", trylock ? "try " : "", read ? "read " : "", lock->name) ); TRACE_FORMAT(lock_release, - TPPROTO(struct lockdep_map *lock, int nested, unsigned long ip), - TPARGS(lock, nested, ip), - TPFMT("%s", lock->name) + TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + TP_ARGS(lock, nested, ip), + TP_FMT("%s", lock->name) ); #ifdef CONFIG_LOCK_STAT TRACE_FORMAT(lock_contended, - TPPROTO(struct lockdep_map *lock, unsigned long ip), - TPARGS(lock, ip), - TPFMT("%s", lock->name) + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + TP_ARGS(lock, ip), + TP_FMT("%s", lock->name) ); TRACE_FORMAT(lock_acquired, - TPPROTO(struct lockdep_map *lock, unsigned long ip), - TPARGS(lock, ip), - TPFMT("%s", lock->name) + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + TP_ARGS(lock, ip), + TP_FMT("%s", lock->name) ); #endif diff --git a/include/trace/power.h b/include/trace/power.h index 38aca537e497..ef204666e983 100644 --- a/include/trace/power.h +++ b/include/trace/power.h @@ -18,15 +18,15 @@ struct power_trace { }; DECLARE_TRACE(power_start, - TPPROTO(struct power_trace *it, unsigned int type, unsigned int state), - TPARGS(it, type, state)); + TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state), + TP_ARGS(it, type, state)); DECLARE_TRACE(power_mark, - TPPROTO(struct power_trace *it, unsigned int type, unsigned int state), - TPARGS(it, type, state)); + TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state), + TP_ARGS(it, type, state)); DECLARE_TRACE(power_end, - TPPROTO(struct power_trace *it), - TPARGS(it)); + TP_PROTO(struct power_trace *it), + TP_ARGS(it)); #endif /* _TRACE_POWER_H */ diff --git a/include/trace/sched_event_types.h b/include/trace/sched_event_types.h index a6de5c1601a0..71b14828a957 100644 --- a/include/trace/sched_event_types.h +++ b/include/trace/sched_event_types.h @@ -9,143 +9,143 @@ #define TRACE_SYSTEM sched TRACE_EVENT_FORMAT(sched_kthread_stop, - TPPROTO(struct task_struct *t), - TPARGS(t), - TPFMT("task %s:%d", t->comm, t->pid), + TP_PROTO(struct task_struct *t), + TP_ARGS(t), + TP_FMT("task %s:%d", t->comm, t->pid), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, t->pid) ), - TPRAWFMT("task %d") + TP_RAW_FMT("task %d") ); TRACE_EVENT_FORMAT(sched_kthread_stop_ret, - TPPROTO(int ret), - TPARGS(ret), - TPFMT("ret=%d", ret), + TP_PROTO(int ret), + TP_ARGS(ret), + TP_FMT("ret=%d", ret), TRACE_STRUCT( TRACE_FIELD(int, ret, ret) ), - TPRAWFMT("ret=%d") + TP_RAW_FMT("ret=%d") ); TRACE_EVENT_FORMAT(sched_wait_task, - TPPROTO(struct rq *rq, struct task_struct *p), - TPARGS(rq, p), - TPFMT("task %s:%d", p->comm, p->pid), + TP_PROTO(struct rq *rq, struct task_struct *p), + TP_ARGS(rq, p), + TP_FMT("task %s:%d", p->comm, p->pid), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) ), - TPRAWFMT("task %d") + TP_RAW_FMT("task %d") ); TRACE_EVENT_FORMAT(sched_wakeup, - TPPROTO(struct rq *rq, struct task_struct *p, int success), - TPARGS(rq, p, success), - TPFMT("task %s:%d %s", + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + TP_ARGS(rq, p, success), + TP_FMT("task %s:%d %s", p->comm, p->pid, success ? "succeeded" : "failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), - TPRAWFMT("task %d success=%d") + TP_RAW_FMT("task %d success=%d") ); TRACE_EVENT_FORMAT(sched_wakeup_new, - TPPROTO(struct rq *rq, struct task_struct *p, int success), - TPARGS(rq, p, success), - TPFMT("task %s:%d", + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + TP_ARGS(rq, p, success), + TP_FMT("task %s:%d", p->comm, p->pid, success ? "succeeded" : "failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), - TPRAWFMT("task %d success=%d") + TP_RAW_FMT("task %d success=%d") ); TRACE_EVENT_FORMAT(sched_switch, - TPPROTO(struct rq *rq, struct task_struct *prev, + TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), - TPARGS(rq, prev, next), - TPFMT("task %s:%d ==> %s:%d", + TP_ARGS(rq, prev, next), + TP_FMT("task %s:%d ==> %s:%d", prev->comm, prev->pid, next->comm, next->pid), TRACE_STRUCT( TRACE_FIELD(pid_t, prev_pid, prev->pid) TRACE_FIELD(int, prev_prio, prev->prio) TRACE_FIELD_SPECIAL(char next_comm[TASK_COMM_LEN], next_comm, - TPCMD(memcpy(TRACE_ENTRY->next_comm, + TP_CMD(memcpy(TRACE_ENTRY->next_comm, next->comm, TASK_COMM_LEN))) TRACE_FIELD(pid_t, next_pid, next->pid) TRACE_FIELD(int, next_prio, next->prio) ), - TPRAWFMT("prev %d:%d ==> next %s:%d:%d") + TP_RAW_FMT("prev %d:%d ==> next %s:%d:%d") ); TRACE_EVENT_FORMAT(sched_migrate_task, - TPPROTO(struct task_struct *p, int orig_cpu, int dest_cpu), - TPARGS(p, orig_cpu, dest_cpu), - TPFMT("task %s:%d from: %d to: %d", + TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), + TP_ARGS(p, orig_cpu, dest_cpu), + TP_FMT("task %s:%d from: %d to: %d", p->comm, p->pid, orig_cpu, dest_cpu), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, orig_cpu, orig_cpu) TRACE_FIELD(int, dest_cpu, dest_cpu) ), - TPRAWFMT("task %d from: %d to: %d") + TP_RAW_FMT("task %d from: %d to: %d") ); TRACE_EVENT_FORMAT(sched_process_free, - TPPROTO(struct task_struct *p), - TPARGS(p), - TPFMT("task %s:%d", p->comm, p->pid), + TP_PROTO(struct task_struct *p), + TP_ARGS(p), + TP_FMT("task %s:%d", p->comm, p->pid), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) ), - TPRAWFMT("task %d") + TP_RAW_FMT("task %d") ); TRACE_EVENT_FORMAT(sched_process_exit, - TPPROTO(struct task_struct *p), - TPARGS(p), - TPFMT("task %s:%d", p->comm, p->pid), + TP_PROTO(struct task_struct *p), + TP_ARGS(p), + TP_FMT("task %s:%d", p->comm, p->pid), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) ), - TPRAWFMT("task %d") + TP_RAW_FMT("task %d") ); TRACE_EVENT_FORMAT(sched_process_wait, - TPPROTO(struct pid *pid), - TPARGS(pid), - TPFMT("pid %d", pid_nr(pid)), + TP_PROTO(struct pid *pid), + TP_ARGS(pid), + TP_FMT("pid %d", pid_nr(pid)), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, pid_nr(pid)) ), - TPRAWFMT("task %d") + TP_RAW_FMT("task %d") ); TRACE_EVENT_FORMAT(sched_process_fork, - TPPROTO(struct task_struct *parent, struct task_struct *child), - TPARGS(parent, child), - TPFMT("parent %s:%d child %s:%d", + TP_PROTO(struct task_struct *parent, struct task_struct *child), + TP_ARGS(parent, child), + TP_FMT("parent %s:%d child %s:%d", parent->comm, parent->pid, child->comm, child->pid), TRACE_STRUCT( TRACE_FIELD(pid_t, parent, parent->pid) TRACE_FIELD(pid_t, child, child->pid) ), - TPRAWFMT("parent %d child %d") + TP_RAW_FMT("parent %d child %d") ); TRACE_EVENT_FORMAT(sched_signal_send, - TPPROTO(int sig, struct task_struct *p), - TPARGS(sig, p), - TPFMT("sig: %d task %s:%d", sig, p->comm, p->pid), + TP_PROTO(int sig, struct task_struct *p), + TP_ARGS(sig, p), + TP_FMT("sig: %d task %s:%d", sig, p->comm, p->pid), TRACE_STRUCT( TRACE_FIELD(int, sig, sig) TRACE_FIELD(pid_t, pid, p->pid) ), - TPRAWFMT("sig: %d task %d") + TP_RAW_FMT("sig: %d task %d") ); #undef TRACE_SYSTEM diff --git a/include/trace/workqueue.h b/include/trace/workqueue.h index 867829df4571..7626523deeba 100644 --- a/include/trace/workqueue.h +++ b/include/trace/workqueue.h @@ -6,20 +6,20 @@ #include DECLARE_TRACE(workqueue_insertion, - TPPROTO(struct task_struct *wq_thread, struct work_struct *work), - TPARGS(wq_thread, work)); + TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), + TP_ARGS(wq_thread, work)); DECLARE_TRACE(workqueue_execution, - TPPROTO(struct task_struct *wq_thread, struct work_struct *work), - TPARGS(wq_thread, work)); + TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), + TP_ARGS(wq_thread, work)); /* Trace the creation of one workqueue thread on a cpu */ DECLARE_TRACE(workqueue_creation, - TPPROTO(struct task_struct *wq_thread, int cpu), - TPARGS(wq_thread, cpu)); + TP_PROTO(struct task_struct *wq_thread, int cpu), + TP_ARGS(wq_thread, cpu)); DECLARE_TRACE(workqueue_destruction, - TPPROTO(struct task_struct *wq_thread), - TPARGS(wq_thread)); + TP_PROTO(struct task_struct *wq_thread), + TP_ARGS(wq_thread)); #endif /* __TRACE_WORKQUEUE_H */ diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h index fb4eba166433..d94179aa1fc2 100644 --- a/kernel/trace/trace_event_types.h +++ b/kernel/trace/trace_event_types.h @@ -10,7 +10,7 @@ TRACE_EVENT_FORMAT(function, TRACE_FN, ftrace_entry, ignore, TRACE_FIELD(unsigned long, ip, ip) TRACE_FIELD(unsigned long, parent_ip, parent_ip) ), - TPRAWFMT(" %lx <-- %lx") + TP_RAW_FMT(" %lx <-- %lx") ); TRACE_EVENT_FORMAT(funcgraph_entry, TRACE_GRAPH_ENT, @@ -19,7 +19,7 @@ TRACE_EVENT_FORMAT(funcgraph_entry, TRACE_GRAPH_ENT, TRACE_FIELD(unsigned long, graph_ent.func, func) TRACE_FIELD(int, graph_ent.depth, depth) ), - TPRAWFMT("--> %lx (%d)") + TP_RAW_FMT("--> %lx (%d)") ); TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET, @@ -28,7 +28,7 @@ TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET, TRACE_FIELD(unsigned long, ret.func, func) TRACE_FIELD(int, ret.depth, depth) ), - TPRAWFMT("<-- %lx (%d)") + TP_RAW_FMT("<-- %lx (%d)") ); TRACE_EVENT_FORMAT(wakeup, TRACE_WAKE, ctx_switch_entry, ignore, @@ -41,7 +41,7 @@ TRACE_EVENT_FORMAT(wakeup, TRACE_WAKE, ctx_switch_entry, ignore, TRACE_FIELD(unsigned char, next_state, next_state) TRACE_FIELD(unsigned int, next_cpu, next_cpu) ), - TPRAWFMT("%u:%u:%u ==+ %u:%u:%u [%03u]") + TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]") ); TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore, @@ -54,7 +54,7 @@ TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore, TRACE_FIELD(unsigned char, next_state, next_state) TRACE_FIELD(unsigned int, next_cpu, next_cpu) ), - TPRAWFMT("%u:%u:%u ==+ %u:%u:%u [%03u]") + TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]") ); TRACE_EVENT_FORMAT(special, TRACE_SPECIAL, special_entry, ignore, @@ -63,7 +63,7 @@ TRACE_EVENT_FORMAT(special, TRACE_SPECIAL, special_entry, ignore, TRACE_FIELD(unsigned long, arg2, arg2) TRACE_FIELD(unsigned long, arg3, arg3) ), - TPRAWFMT("(%08lx) (%08lx) (%08lx)") + TP_RAW_FMT("(%08lx) (%08lx) (%08lx)") ); /* @@ -83,7 +83,7 @@ TRACE_EVENT_FORMAT(kernel_stack, TRACE_STACK, stack_entry, ignore, TRACE_FIELD(unsigned long, caller[6], stack6) TRACE_FIELD(unsigned long, caller[7], stack7) ), - TPRAWFMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" + TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n") ); @@ -98,7 +98,7 @@ TRACE_EVENT_FORMAT(user_stack, TRACE_USER_STACK, userstack_entry, ignore, TRACE_FIELD(unsigned long, caller[6], stack6) TRACE_FIELD(unsigned long, caller[7], stack7) ), - TPRAWFMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" + TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n") ); @@ -108,7 +108,7 @@ TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, TRACE_FIELD(unsigned int, depth, depth) TRACE_FIELD_ZERO_CHAR(buf) ), - TPRAWFMT("%08lx (%d) %s") + TP_RAW_FMT("%08lx (%d) %s") ); TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore, @@ -118,7 +118,7 @@ TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore, TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, file) TRACE_FIELD(char, correct, correct) ), - TPRAWFMT("%u:%s:%s (%u)") + TP_RAW_FMT("%u:%s:%s (%u)") ); TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore, @@ -126,7 +126,7 @@ TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore, TRACE_FIELD(u64, from, from) TRACE_FIELD(u64, to, to) ), - TPRAWFMT("from: %llx to: %llx") + TP_RAW_FMT("from: %llx to: %llx") ); TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore, @@ -136,7 +136,7 @@ TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore, TRACE_FIELD(int, state_data.type, type) TRACE_FIELD(int, state_data.state, state) ), - TPRAWFMT("%llx->%llx type:%u state:%u") + TP_RAW_FMT("%llx->%llx type:%u state:%u") ); TRACE_EVENT_FORMAT(kmem_alloc, TRACE_KMEM_ALLOC, kmemtrace_alloc_entry, ignore, @@ -149,7 +149,7 @@ TRACE_EVENT_FORMAT(kmem_alloc, TRACE_KMEM_ALLOC, kmemtrace_alloc_entry, ignore, TRACE_FIELD(gfp_t, gfp_flags, gfp_flags) TRACE_FIELD(int, node, node) ), - TPRAWFMT("type:%u call_site:%lx ptr:%p req:%lu alloc:%lu" + TP_RAW_FMT("type:%u call_site:%lx ptr:%p req:%lu alloc:%lu" " flags:%x node:%d") ); @@ -159,7 +159,7 @@ TRACE_EVENT_FORMAT(kmem_free, TRACE_KMEM_FREE, kmemtrace_free_entry, ignore, TRACE_FIELD(unsigned long, call_site, call_site) TRACE_FIELD(const void *, ptr, ptr) ), - TPRAWFMT("type:%u call_site:%lx ptr:%p") + TP_RAW_FMT("type:%u call_site:%lx ptr:%p") ); #undef TRACE_SYSTEM diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index d24a97e74aea..8e2e0f56c2a8 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h @@ -20,7 +20,7 @@ * * field = (typeof(field))entry; * - * ret = trace_seq_printf(s, "%s", "\n"); + * ret = trace_seq_printf(s, "%s", "\n"); * if (!ret) * return TRACE_TYPE_PARTIAL_LINE; * @@ -44,8 +44,8 @@ field->item, -#undef TPRAWFMT -#define TPRAWFMT(args...) args +#undef TP_RAW_FMT +#define TP_RAW_FMT(args...) args #undef TRACE_EVENT_FORMAT #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 2c8d76c7dbed..557ca52bcdcd 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -106,8 +106,8 @@ * */ -#undef TPFMT -#define TPFMT(fmt, args...) fmt "\n", ##args +#undef TP_FMT +#define TP_FMT(fmt, args...) fmt "\n", ##args #define _TRACE_FORMAT(call, proto, args, fmt) \ static void ftrace_event_##call(proto) \ @@ -152,8 +152,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #define TRACE_FIELD(type, item, assign)\ entry->item = assign; -#undef TPCMD -#define TPCMD(cmd...) cmd +#undef TP_CMD +#define TP_CMD(cmd...) cmd #undef TRACE_ENTRY #define TRACE_ENTRY entry diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 7162ab49d05d..e62bc10f8103 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -26,8 +26,8 @@ return 0; -#undef TPRAWFMT -#define TPRAWFMT(args...) args +#undef TP_RAW_FMT +#define TP_RAW_FMT(args...) args #undef TRACE_EVENT_FORMAT #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ @@ -57,8 +57,8 @@ ftrace_format_##call(struct trace_seq *s) \ #define TRACE_FIELD(type, item, assign)\ entry->item = assign; -#undef TPCMD -#define TPCMD(cmd...) cmd +#undef TP_CMD +#define TP_CMD(cmd...) cmd #undef TRACE_ENTRY #define TRACE_ENTRY entry diff --git a/kernel/trace/trace_format.h b/kernel/trace/trace_format.h deleted file mode 100644 index 97e59a9c82ea..000000000000 --- a/kernel/trace/trace_format.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Setup the showing format of trace point. - * - * int - * ftrace_format_##call(struct trace_seq *s) - * { - * struct ftrace_raw_##call field; - * int ret; - * - * ret = trace_seq_printf(s, #type " " #item ";" - * " size:%d; offset:%d;\n", - * sizeof(field.type), - * offsetof(struct ftrace_raw_##call, - * item)); - * - * } - */ - -#undef TRACE_STRUCT -#define TRACE_STRUCT(args...) args - -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%u;\tsize:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ - if (!ret) \ - return 0; - - -#undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ - ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \ - "offset:%u;\tsize:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ - if (!ret) \ - return 0; - -#undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ -static int \ -ftrace_format_##call(struct trace_seq *s) \ -{ \ - struct ftrace_raw_##call field; \ - int ret; \ - \ - tstruct; \ - \ - trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \ - \ - return ret; \ -} - diff --git a/samples/tracepoints/tp-samples-trace.h b/samples/tracepoints/tp-samples-trace.h index 01724e04c556..dffdc49878af 100644 --- a/samples/tracepoints/tp-samples-trace.h +++ b/samples/tracepoints/tp-samples-trace.h @@ -5,9 +5,9 @@ #include DECLARE_TRACE(subsys_event, - TPPROTO(struct inode *inode, struct file *file), - TPARGS(inode, file)); + TP_PROTO(struct inode *inode, struct file *file), + TP_ARGS(inode, file)); DECLARE_TRACE(subsys_eventb, - TPPROTO(void), - TPARGS()); + TP_PROTO(void), + TP_ARGS()); #endif -- cgit v1.2.3 From da4d03020c2af32f73e8bfbab0a66620d85bb9bb Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 9 Mar 2009 17:14:30 -0400 Subject: tracing: new format for specialized trace points Impact: clean up and enhancement The TRACE_EVENT_FORMAT macro looks quite ugly and is limited in its ability to save data as well as to print the record out. Working with Ingo Molnar, we came up with a new format that is much more pleasing to the eye of C developers. This new macro is more C style than the old macro, and is more obvious to what it does. Here's the example. The only updated macro in this patch is the sched_switch trace point. The old method looked like this: TRACE_EVENT_FORMAT(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TP_ARGS(rq, prev, next), TP_FMT("task %s:%d ==> %s:%d", prev->comm, prev->pid, next->comm, next->pid), TRACE_STRUCT( TRACE_FIELD(pid_t, prev_pid, prev->pid) TRACE_FIELD(int, prev_prio, prev->prio) TRACE_FIELD_SPECIAL(char next_comm[TASK_COMM_LEN], next_comm, TP_CMD(memcpy(TRACE_ENTRY->next_comm, next->comm, TASK_COMM_LEN))) TRACE_FIELD(pid_t, next_pid, next->pid) TRACE_FIELD(int, next_prio, next->prio) ), TP_RAW_FMT("prev %d:%d ==> next %s:%d:%d") ); The above method is hard to read and requires two format fields. The new method: /* * Tracepoint for task switches, performed by the scheduler: * * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ TRACE_EVENT(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TP_ARGS(rq, prev, next), TP_STRUCT__entry( __array( char, prev_comm, TASK_COMM_LEN ) __field( pid_t, prev_pid ) __field( int, prev_prio ) __array( char, next_comm, TASK_COMM_LEN ) __field( pid_t, next_pid ) __field( int, next_prio ) ), TP_printk("task %s:%d [%d] ==> %s:%d [%d]", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, __entry->next_comm, __entry->next_pid, __entry->next_prio), TP_fast_assign( memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; __entry->prev_prio = prev->prio; memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; ) ); This macro is called TRACE_EVENT, it is broken up into 5 parts: TP_PROTO: the proto type of the trace point TP_ARGS: the arguments of the trace point TP_STRUCT_entry: the structure layout of the entry in the ring buffer TP_printk: the printk format TP_fast_assign: the method used to write the entry into the ring buffer The structure is the definition of how the event will be saved in the ring buffer. The printk is used by the internal tracing in case of an oops, and the kernel needs to print out the format of the record to the console. This the TP_printk gives a means to show the records in a human readable format. It is also used to print out the data from the trace file. The TP_fast_assign is executed directly. It is basically like a C function, where the __entry is the handle to the record. Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 3 + include/trace/sched_event_types.h | 48 +++++++---- kernel/trace/trace.h | 5 -- kernel/trace/trace_event_types.h | 3 +- kernel/trace/trace_events.c | 159 +----------------------------------- kernel/trace/trace_events_stage_1.h | 28 ++++--- kernel/trace/trace_events_stage_2.h | 89 ++++++++++++++++---- kernel/trace/trace_events_stage_3.h | 34 +++----- kernel/trace/trace_export.c | 23 +++++- 9 files changed, 159 insertions(+), 233 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 3bcc3e171443..6b4f1bb3701e 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -160,4 +160,7 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_EVENT_FORMAT(name, proto, args, fmt, struct, tpfmt) \ TRACE_FORMAT(name, PARAMS(proto), PARAMS(args), PARAMS(fmt)) +#define TRACE_EVENT(name, proto, args, struct, print, assign) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + #endif diff --git a/include/trace/sched_event_types.h b/include/trace/sched_event_types.h index 71b14828a957..aa77fb754038 100644 --- a/include/trace/sched_event_types.h +++ b/include/trace/sched_event_types.h @@ -62,25 +62,41 @@ TRACE_EVENT_FORMAT(sched_wakeup_new, TP_RAW_FMT("task %d success=%d") ); -TRACE_EVENT_FORMAT(sched_switch, +/* + * Tracepoint for task switches, performed by the scheduler: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_switch, + TP_PROTO(struct rq *rq, struct task_struct *prev, - struct task_struct *next), + struct task_struct *next), + TP_ARGS(rq, prev, next), - TP_FMT("task %s:%d ==> %s:%d", - prev->comm, prev->pid, next->comm, next->pid), - TRACE_STRUCT( - TRACE_FIELD(pid_t, prev_pid, prev->pid) - TRACE_FIELD(int, prev_prio, prev->prio) - TRACE_FIELD_SPECIAL(char next_comm[TASK_COMM_LEN], - next_comm, - TP_CMD(memcpy(TRACE_ENTRY->next_comm, - next->comm, - TASK_COMM_LEN))) - TRACE_FIELD(pid_t, next_pid, next->pid) - TRACE_FIELD(int, next_prio, next->prio) + + TP_STRUCT__entry( + __array( char, prev_comm, TASK_COMM_LEN ) + __field( pid_t, prev_pid ) + __field( int, prev_prio ) + __array( char, next_comm, TASK_COMM_LEN ) + __field( pid_t, next_pid ) + __field( int, next_prio ) ), - TP_RAW_FMT("prev %d:%d ==> next %s:%d:%d") - ); + + TP_printk("task %s:%d [%d] ==> %s:%d [%d]", + __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, + __entry->next_comm, __entry->next_pid, __entry->next_prio), + + TP_fast_assign( + memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + __entry->prev_pid = prev->pid; + __entry->prev_prio = prev->prio; + memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); + __entry->next_pid = next->pid; + __entry->next_prio = next->prio; + ) +); TRACE_EVENT_FORMAT(sched_migrate_task, TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2bfb7d11fc17..c5e1d8865fe4 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -751,12 +751,7 @@ struct ftrace_event_call { int (*regfunc)(void); void (*unregfunc)(void); int id; - struct dentry *raw_dir; - int raw_enabled; - int type; int (*raw_init)(void); - int (*raw_reg)(void); - void (*raw_unreg)(void); int (*show_format)(struct trace_seq *s); }; diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h index d94179aa1fc2..5cca4c978bde 100644 --- a/kernel/trace/trace_event_types.h +++ b/kernel/trace/trace_event_types.h @@ -106,9 +106,10 @@ TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, TRACE_STRUCT( TRACE_FIELD(unsigned long, ip, ip) TRACE_FIELD(unsigned int, depth, depth) + TRACE_FIELD(char *, fmt, fmt) TRACE_FIELD_ZERO_CHAR(buf) ), - TP_RAW_FMT("%08lx (%d) %s") + TP_RAW_FMT("%08lx (%d) fmt:%p %s") ); TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore, diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index fa32ca320767..1880a6438097 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -59,22 +59,12 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call, call->enabled = 0; call->unregfunc(); } - if (call->raw_enabled) { - call->raw_enabled = 0; - call->raw_unreg(); - } break; case 1: - if (!call->enabled && - (call->type & TRACE_EVENT_TYPE_PRINTF)) { + if (!call->enabled) { call->enabled = 1; call->regfunc(); } - if (!call->raw_enabled && - (call->type & TRACE_EVENT_TYPE_RAW)) { - call->raw_enabled = 1; - call->raw_reg(); - } break; } } @@ -300,7 +290,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, struct ftrace_event_call *call = filp->private_data; char *buf; - if (call->enabled || call->raw_enabled) + if (call->enabled) buf = "1\n"; else buf = "0\n"; @@ -346,107 +336,6 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } -static ssize_t -event_type_read(struct file *filp, char __user *ubuf, size_t cnt, - loff_t *ppos) -{ - struct ftrace_event_call *call = filp->private_data; - char buf[16]; - int r = 0; - - if (call->type & TRACE_EVENT_TYPE_PRINTF) - r += sprintf(buf, "printf\n"); - - if (call->type & TRACE_EVENT_TYPE_RAW) - r += sprintf(buf+r, "raw\n"); - - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} - -static ssize_t -event_type_write(struct file *filp, const char __user *ubuf, size_t cnt, - loff_t *ppos) -{ - struct ftrace_event_call *call = filp->private_data; - char buf[64]; - - /* - * If there's only one type, we can't change it. - * And currently we always have printf type, and we - * may or may not have raw type. - * - * This is a redundant check, the file should be read - * only if this is the case anyway. - */ - - if (!call->raw_init) - return -EPERM; - - if (cnt >= sizeof(buf)) - return -EINVAL; - - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - - buf[cnt] = 0; - - if (!strncmp(buf, "printf", 6) && - (!buf[6] || isspace(buf[6]))) { - - call->type = TRACE_EVENT_TYPE_PRINTF; - - /* - * If raw enabled, the disable it and enable - * printf type. - */ - if (call->raw_enabled) { - call->raw_enabled = 0; - call->raw_unreg(); - - call->enabled = 1; - call->regfunc(); - } - - } else if (!strncmp(buf, "raw", 3) && - (!buf[3] || isspace(buf[3]))) { - - call->type = TRACE_EVENT_TYPE_RAW; - - /* - * If printf enabled, the disable it and enable - * raw type. - */ - if (call->enabled) { - call->enabled = 0; - call->unregfunc(); - - call->raw_enabled = 1; - call->raw_reg(); - } - } else - return -EINVAL; - - *ppos += cnt; - - return cnt; -} - -static ssize_t -event_available_types_read(struct file *filp, char __user *ubuf, size_t cnt, - loff_t *ppos) -{ - struct ftrace_event_call *call = filp->private_data; - char buf[16]; - int r = 0; - - r += sprintf(buf, "printf\n"); - - if (call->raw_init) - r += sprintf(buf+r, "raw\n"); - - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} - #undef FIELD #define FIELD(type, name) \ #type, #name, (unsigned int)offsetof(typeof(field), name), \ @@ -470,6 +359,7 @@ static int trace_write_header(struct trace_seq *s) FIELD(int, pid), FIELD(int, tgid)); } + static ssize_t event_format_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -527,13 +417,6 @@ static const struct seq_operations show_set_event_seq_ops = { .stop = t_stop, }; -static const struct file_operations ftrace_avail_fops = { - .open = ftrace_event_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static const struct file_operations ftrace_set_event_fops = { .open = ftrace_event_seq_open, .read = seq_read, @@ -548,17 +431,6 @@ static const struct file_operations ftrace_enable_fops = { .write = event_enable_write, }; -static const struct file_operations ftrace_type_fops = { - .open = tracing_open_generic, - .read = event_type_read, - .write = event_type_write, -}; - -static const struct file_operations ftrace_available_types_fops = { - .open = tracing_open_generic, - .read = event_available_types_read, -}; - static const struct file_operations ftrace_event_format_fops = { .open = tracing_open_generic, .read = event_format_read, @@ -647,9 +519,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) } } - /* default the output to printf */ - call->type = TRACE_EVENT_TYPE_PRINTF; - call->dir = debugfs_create_dir(call->name, d_events); if (!call->dir) { pr_warning("Could not create debugfs " @@ -665,21 +534,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) "'%s/enable' entry\n", call->name); } - /* Only let type be writable, if we can change it */ - entry = debugfs_create_file("type", - call->raw_init ? 0644 : 0444, - call->dir, call, - &ftrace_type_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'%s/type' entry\n", call->name); - - entry = debugfs_create_file("available_types", 0444, call->dir, call, - &ftrace_available_types_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'%s/available_types' entry\n", call->name); - /* A trace may not want to export its format */ if (!call->show_format) return 0; @@ -704,13 +558,6 @@ static __init int event_trace_init(void) if (!d_tracer) return 0; - entry = debugfs_create_file("available_events", 0444, d_tracer, - (void *)&show_event_seq_ops, - &ftrace_avail_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'available_events' entry\n"); - entry = debugfs_create_file("set_event", 0644, d_tracer, (void *)&show_set_event_seq_ops, &ftrace_set_event_fops); diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h index 3830a731424c..edfcbd3a0d1b 100644 --- a/kernel/trace/trace_events_stage_1.h +++ b/kernel/trace/trace_events_stage_1.h @@ -18,19 +18,23 @@ #define TRACE_FORMAT(call, proto, args, fmt) #undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(name, proto, args, fmt, tstruct, tpfmt) \ - struct ftrace_raw_##name { \ - struct trace_entry ent; \ - tstruct \ - }; \ - static struct ftrace_event_call event_##name +#define TRACE_EVENT_FORMAT(name, proto, args, fmt, tstruct, tpfmt) + +#undef __array +#define __array(type, item, len) type item[len]; -#undef TRACE_STRUCT -#define TRACE_STRUCT(args...) args +#undef __field +#define __field(type, item) type item; -#define TRACE_FIELD(type, item, assign) \ - type item; -#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ - type_item; +#undef TP_STRUCT__entry +#define TP_STRUCT__entry(args...) args + +#undef TRACE_EVENT +#define TRACE_EVENT(name, proto, args, tstruct, print, assign) \ + struct ftrace_raw_##name { \ + struct trace_entry ent; \ + tstruct \ + }; \ + static struct ftrace_event_call event_##name #include diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index 8e2e0f56c2a8..d91bf4c56661 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h @@ -32,23 +32,14 @@ * in binary. */ -#undef TRACE_STRUCT -#define TRACE_STRUCT(args...) args +#undef __entry +#define __entry field -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign) \ - field->item, +#undef TP_printk +#define TP_printk(fmt, args...) fmt "\n", args -#undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ - field->item, - - -#undef TP_RAW_FMT -#define TP_RAW_FMT(args...) args - -#undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, print, assign) \ enum print_line_t \ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ { \ @@ -66,14 +57,76 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ \ field = (typeof(field))entry; \ \ - ret = trace_seq_printf(s, tpfmt "%s", tstruct "\n"); \ + ret = trace_seq_printf(s, print); \ if (!ret) \ return TRACE_TYPE_PARTIAL_LINE; \ \ return TRACE_TYPE_HANDLED; \ } - + #include -#include "trace_format.h" +/* + * Setup the showing format of trace point. + * + * int + * ftrace_format_##call(struct trace_seq *s) + * { + * struct ftrace_raw_##call field; + * int ret; + * + * ret = trace_seq_printf(s, #type " " #item ";" + * " size:%d; offset:%d;\n", + * sizeof(field.type), + * offsetof(struct ftrace_raw_##call, + * item)); + * + * } + */ + +#undef TP_STRUCT__entry +#define TP_STRUCT__entry(args...) args + +#undef __field +#define __field(type, item) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef __array +#define __array(type, item, len) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef __entry +#define __entry "REC" + +#undef TP_printk +#define TP_printk(fmt, args...) "%s, %s\n", #fmt, #args + +#undef TP_fast_assign +#define TP_fast_assign(args...) args + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, print, func) \ +static int \ +ftrace_format_##call(struct trace_seq *s) \ +{ \ + struct ftrace_raw_##call field; \ + int ret; \ + \ + tstruct; \ + \ + trace_seq_printf(s, "\nprint fmt: " print); \ + \ + return ret; \ +} + #include diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 41b82b93c9c7..8e398d864096 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -144,27 +144,15 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .unregfunc = ftrace_unreg_event_##call, \ } -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign)\ - entry->item = assign; - -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign)\ - entry->item = assign; - -#undef TP_CMD -#define TP_CMD(cmd...) cmd - -#undef TRACE_ENTRY -#define TRACE_ENTRY entry +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, raw) \ + TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) -#undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ - cmd; +#undef __entry +#define __entry entry -#undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ -_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, print, assign) \ \ static struct ftrace_event_call event_##call; \ \ @@ -185,7 +173,7 @@ static void ftrace_raw_event_##call(proto) \ return; \ entry = ring_buffer_event_data(event); \ \ - tstruct; \ + assign; \ \ trace_current_buffer_unlock_commit(event, irq_flags, pc); \ } \ @@ -226,10 +214,8 @@ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ .system = __stringify(TRACE_SYSTEM), \ - .regfunc = ftrace_reg_event_##call, \ - .unregfunc = ftrace_unreg_event_##call, \ .raw_init = ftrace_raw_init_event_##call, \ - .raw_reg = ftrace_raw_reg_event_##call, \ - .raw_unreg = ftrace_raw_unreg_event_##call, \ + .regfunc = ftrace_raw_reg_event_##call, \ + .unregfunc = ftrace_raw_unreg_event_##call, \ .show_format = ftrace_format_##call, \ } diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index e62bc10f8103..23ae78430d58 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -15,7 +15,28 @@ #include "trace_output.h" -#include "trace_format.h" + +#undef TRACE_STRUCT +#define TRACE_STRUCT(args...) args + +#undef TRACE_FIELD +#define TRACE_FIELD(type, item, assign) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + + +#undef TRACE_FIELD_SPECIAL +#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ + ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; #undef TRACE_FIELD_ZERO_CHAR #define TRACE_FIELD_ZERO_CHAR(item) \ -- cgit v1.2.3 From 157587d7ac555458da9f682e3250135e468470a6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 10 Mar 2009 00:15:34 -0400 Subject: tracing: remove obsolete TRACE_EVENT_FORMAT macro Impact: clean up The TRACE_EVENT_FORMAT macro is no longer used by trace points and only the DECLARE_TRACE, TRACE_FORMAT or TRACE_EVENT macros should be used by them. Although the TRACE_EVENT_FORMAT macro is still used by the internal tracing utility, it should not be used in core kernel code. Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 3 --- include/trace/lockdep_event_types.h | 2 +- include/trace/sched_event_types.h | 2 +- kernel/trace/trace_events_stage_1.h | 3 --- kernel/trace/trace_events_stage_3.h | 6 +----- 5 files changed, 3 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 6b4f1bb3701e..69b56988813d 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -157,9 +157,6 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_FORMAT(name, proto, args, fmt) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) -#define TRACE_EVENT_FORMAT(name, proto, args, fmt, struct, tpfmt) \ - TRACE_FORMAT(name, PARAMS(proto), PARAMS(args), PARAMS(fmt)) - #define TRACE_EVENT(name, proto, args, struct, print, assign) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h index 1f00e8b3543e..adccfcd2ec8f 100644 --- a/include/trace/lockdep_event_types.h +++ b/include/trace/lockdep_event_types.h @@ -1,5 +1,5 @@ -#ifndef TRACE_EVENT_FORMAT +#ifndef TRACE_FORMAT # error Do not include this file directly. # error Unless you know what you are doing. #endif diff --git a/include/trace/sched_event_types.h b/include/trace/sched_event_types.h index 0bbbf410e01f..fb37af672c88 100644 --- a/include/trace/sched_event_types.h +++ b/include/trace/sched_event_types.h @@ -1,6 +1,6 @@ /* use instead */ -#ifndef TRACE_EVENT_FORMAT +#ifndef TRACE_EVENT # error Do not include this file directly. # error Unless you know what you are doing. #endif diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h index edfcbd3a0d1b..15e9bf965a18 100644 --- a/kernel/trace/trace_events_stage_1.h +++ b/kernel/trace/trace_events_stage_1.h @@ -17,9 +17,6 @@ #undef TRACE_FORMAT #define TRACE_FORMAT(call, proto, args, fmt) -#undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(name, proto, args, fmt, tstruct, tpfmt) - #undef __array #define __array(type, item, len) type item[len]; diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 8e398d864096..3ba55d4ab073 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -35,7 +35,7 @@ * } * * - * For those macros defined with TRACE_EVENT_FORMAT: + * For those macros defined with TRACE_EVENT: * * static struct ftrace_event_call event_; * @@ -144,10 +144,6 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .unregfunc = ftrace_unreg_event_##call, \ } -#undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, raw) \ - TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) - #undef __entry #define __entry entry -- cgit v1.2.3 From 30a8fecc2d34f086df34fe2f2b926f080e002600 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 10 Mar 2009 12:41:38 -0400 Subject: tracing: flip the TP_printk and TP_fast_assign in the TRACE_EVENT macro Impact: clean up In trying to stay consistant with the C style format in the TRACE_EVENT macro, it makes more sense to do the printk after the assigning of the variables. Reported-by: Ingo Molnar Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 2 +- include/trace/irq_event_types.h | 8 +-- include/trace/sched_event_types.h | 102 ++++++++++++++++++------------------ kernel/trace/trace_events_stage_1.h | 2 +- kernel/trace/trace_events_stage_2.h | 4 +- kernel/trace/trace_events_stage_3.h | 2 +- 6 files changed, 60 insertions(+), 60 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 69b56988813d..c7b09452514b 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -157,7 +157,7 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_FORMAT(name, proto, args, fmt) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) -#define TRACE_EVENT(name, proto, args, struct, print, assign) \ +#define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #endif diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h index 43bcb74dd49f..214bb928fe9e 100644 --- a/include/trace/irq_event_types.h +++ b/include/trace/irq_event_types.h @@ -31,13 +31,13 @@ TRACE_EVENT(irq_handler_exit, __field( int, ret ) ), - TP_printk("irq=%d return=%s", - __entry->irq, __entry->ret ? "handled" : "unhandled"), - TP_fast_assign( __entry->irq = irq; __entry->ret = ret; - ) + ), + + TP_printk("irq=%d return=%s", + __entry->irq, __entry->ret ? "handled" : "unhandled") ); #undef TRACE_SYSTEM diff --git a/include/trace/sched_event_types.h b/include/trace/sched_event_types.h index fb37af672c88..63547dc1125f 100644 --- a/include/trace/sched_event_types.h +++ b/include/trace/sched_event_types.h @@ -22,12 +22,12 @@ TRACE_EVENT(sched_kthread_stop, __field( pid_t, pid ) ), - TP_printk("task %s:%d", __entry->comm, __entry->pid), - TP_fast_assign( memcpy(__entry->comm, t->comm, TASK_COMM_LEN); __entry->pid = t->pid; - ) + ), + + TP_printk("task %s:%d", __entry->comm, __entry->pid) ); /* @@ -43,11 +43,11 @@ TRACE_EVENT(sched_kthread_stop_ret, __field( int, ret ) ), - TP_printk("ret %d", __entry->ret), - TP_fast_assign( __entry->ret = ret; - ) + ), + + TP_printk("ret %d", __entry->ret) ); /* @@ -68,14 +68,14 @@ TRACE_EVENT(sched_wait_task, __field( int, prio ) ), - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio), - TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->prio = p->prio; - ) + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) ); /* @@ -97,16 +97,16 @@ TRACE_EVENT(sched_wakeup, __field( int, success ) ), - TP_printk("task %s:%d [%d] success=%d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success), - TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->prio = p->prio; __entry->success = success; - ) + ), + + TP_printk("task %s:%d [%d] success=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->success) ); /* @@ -128,16 +128,16 @@ TRACE_EVENT(sched_wakeup_new, __field( int, success ) ), - TP_printk("task %s:%d [%d] success=%d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success), - TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->prio = p->prio; __entry->success = success; - ) + ), + + TP_printk("task %s:%d [%d] success=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->success) ); /* @@ -162,10 +162,6 @@ TRACE_EVENT(sched_switch, __field( int, next_prio ) ), - TP_printk("task %s:%d [%d] ==> %s:%d [%d]", - __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, - __entry->next_comm, __entry->next_pid, __entry->next_prio), - TP_fast_assign( memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; @@ -173,7 +169,11 @@ TRACE_EVENT(sched_switch, memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; - ) + ), + + TP_printk("task %s:%d [%d] ==> %s:%d [%d]", + __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, + __entry->next_comm, __entry->next_pid, __entry->next_prio) ); /* @@ -193,17 +193,17 @@ TRACE_EVENT(sched_migrate_task, __field( int, dest_cpu ) ), - TP_printk("task %s:%d [%d] from: %d to: %d", - __entry->comm, __entry->pid, __entry->prio, - __entry->orig_cpu, __entry->dest_cpu), - TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->prio = p->prio; __entry->orig_cpu = orig_cpu; __entry->dest_cpu = dest_cpu; - ) + ), + + TP_printk("task %s:%d [%d] from: %d to: %d", + __entry->comm, __entry->pid, __entry->prio, + __entry->orig_cpu, __entry->dest_cpu) ); /* @@ -221,14 +221,14 @@ TRACE_EVENT(sched_process_free, __field( int, prio ) ), - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio), - TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->prio = p->prio; - ) + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) ); /* @@ -246,14 +246,14 @@ TRACE_EVENT(sched_process_exit, __field( int, prio ) ), - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio), - TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->prio = p->prio; - ) + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) ); /* @@ -271,14 +271,14 @@ TRACE_EVENT(sched_process_wait, __field( int, prio ) ), - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio), - TP_fast_assign( memcpy(__entry->comm, current->comm, TASK_COMM_LEN); __entry->pid = pid_nr(pid); __entry->prio = current->prio; - ) + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) ); /* @@ -297,16 +297,16 @@ TRACE_EVENT(sched_process_fork, __field( pid_t, child_pid ) ), - TP_printk("parent %s:%d child %s:%d", - __entry->parent_comm, __entry->parent_pid, - __entry->child_comm, __entry->child_pid), - TP_fast_assign( memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); __entry->parent_pid = parent->pid; memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); __entry->child_pid = child->pid; - ) + ), + + TP_printk("parent %s:%d child %s:%d", + __entry->parent_comm, __entry->parent_pid, + __entry->child_comm, __entry->child_pid) ); /* @@ -324,14 +324,14 @@ TRACE_EVENT(sched_signal_send, __field( pid_t, pid ) ), - TP_printk("sig: %d task %s:%d", - __entry->sig, __entry->comm, __entry->pid), - TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->sig = sig; - ) + ), + + TP_printk("sig: %d task %s:%d", + __entry->sig, __entry->comm, __entry->pid) ); #undef TRACE_SYSTEM diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h index 15e9bf965a18..82f68443c556 100644 --- a/kernel/trace/trace_events_stage_1.h +++ b/kernel/trace/trace_events_stage_1.h @@ -27,7 +27,7 @@ #define TP_STRUCT__entry(args...) args #undef TRACE_EVENT -#define TRACE_EVENT(name, proto, args, tstruct, print, assign) \ +#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ struct ftrace_raw_##name { \ struct trace_entry ent; \ tstruct \ diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index d91bf4c56661..1ad9f8d2fe45 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h @@ -39,7 +39,7 @@ #define TP_printk(fmt, args...) fmt "\n", args #undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, print, assign) \ +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ enum print_line_t \ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ { \ @@ -115,7 +115,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #define TP_fast_assign(args...) args #undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, print, func) \ +#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ static int \ ftrace_format_##call(struct trace_seq *s) \ { \ diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 3ba55d4ab073..d6de06b9201a 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -148,7 +148,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #define __entry entry #undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, print, assign) \ +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ \ static struct ftrace_event_call event_##call; \ \ -- cgit v1.2.3 From 823f9124fb2e33eeb624d139978a52089f8a02ae Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 10 Mar 2009 12:58:51 -0400 Subject: tracing: document TRACE_EVENT macro in tracepoint.h Impact: clean up / comments Kosaki Motohiro asked about an explanation to the TRACE_EVENT macro. Ingo Molnar replied with a nice description. This patch takes the description that Ingo wrote (with some slight modifications) and adds it to the tracepoint.h file. Reported-by: KOSAKI Motohiro Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 103 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index c7b09452514b..119ece224c21 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -157,6 +157,109 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_FORMAT(name, proto, args, fmt) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + +/* + * For use with the TRACE_EVENT macro: + * + * We define a tracepoint, its arguments, its printk format + * and its 'fast binay record' layout. + * + * Firstly, name your tracepoint via TRACE_EVENT(name : the + * 'subsystem_event' notation is fine. + * + * Think about this whole construct as the + * 'trace_sched_switch() function' from now on. + * + * + * TRACE_EVENT(sched_switch, + * + * * + * * A function has a regular function arguments + * * prototype, declare it via TP_PROTO(): + * * + * + * TP_PROTO(struct rq *rq, struct task_struct *prev, + * struct task_struct *next), + * + * * + * * Define the call signature of the 'function'. + * * (Design sidenote: we use this instead of a + * * TP_PROTO1/TP_PROTO2/TP_PROTO3 ugliness.) + * * + * + * TP_ARGS(rq, prev, next), + * + * * + * * Fast binary tracing: define the trace record via + * * TP_STRUCT__entry(). You can think about it like a + * * regular C structure local variable definition. + * * + * * This is how the trace record is structured and will + * * be saved into the ring buffer. These are the fields + * * that will be exposed to user-space in + * * /debug/tracing/events/<*>/format. + * * + * * The declared 'local variable' is called '__entry' + * * + * * __field(pid_t, prev_prid) is equivalent to a standard declariton: + * * + * * pid_t prev_pid; + * * + * * __array(char, prev_comm, TASK_COMM_LEN) is equivalent to: + * * + * * char prev_comm[TASK_COMM_LEN]; + * * + * + * TP_STRUCT__entry( + * __array( char, prev_comm, TASK_COMM_LEN ) + * __field( pid_t, prev_pid ) + * __field( int, prev_prio ) + * __array( char, next_comm, TASK_COMM_LEN ) + * __field( pid_t, next_pid ) + * __field( int, next_prio ) + * ), + * + * * + * * Assign the entry into the trace record, by embedding + * * a full C statement block into TP_fast_assign(). You + * * can refer to the trace record as '__entry' - + * * otherwise you can put arbitrary C code in here. + * * + * * Note: this C code will execute every time a trace event + * * happens, on an active tracepoint. + * * + * + * TP_fast_assign( + * memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + * __entry->prev_pid = prev->pid; + * __entry->prev_prio = prev->prio; + * memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); + * __entry->next_pid = next->pid; + * __entry->next_prio = next->prio; + * ) + * + * * + * * Formatted output of a trace record via TP_printk(). + * * This is how the tracepoint will appear under ftrace + * * plugins that make use of this tracepoint. + * * + * * (raw-binary tracing wont actually perform this step.) + * * + * + * TP_printk("task %s:%d [%d] ==> %s:%d [%d]", + * __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, + * __entry->next_comm, __entry->next_pid, __entry->next_prio), + * + * ); + * + * This macro construct is thus used for the regular printk format + * tracing setup, it is used to construct a function pointer based + * tracepoint callback (this is used by programmatic plugins and + * can also by used by generic instrumentation like SystemTap), and + * it is also used to expose a structured trace record in + * /debug/tracing/events/. + */ + #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) -- cgit v1.2.3 From ef18012b248b47ec9a12c3a83ca5e99782d39c5d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 10 Mar 2009 14:10:56 -0400 Subject: tracing: remove funky whitespace in the trace code Impact: clean up There existed a lot of 's in the tracing code. This patch removes them. Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 16 +++--- kernel/trace/blktrace.c | 10 ++-- kernel/trace/trace.c | 2 +- kernel/trace/trace_branch.c | 2 +- kernel/trace/trace_events_stage_3.h | 98 ++++++++++++++++++------------------ kernel/trace/trace_export.c | 2 +- kernel/trace/trace_functions_graph.c | 6 +-- kernel/trace/trace_output.c | 14 +++--- kernel/trace/trace_workqueue.c | 6 +-- 9 files changed, 78 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 119ece224c21..d35a7ee7611f 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -178,8 +178,8 @@ static inline void tracepoint_synchronize_unregister(void) * * prototype, declare it via TP_PROTO(): * * * - * TP_PROTO(struct rq *rq, struct task_struct *prev, - * struct task_struct *next), + * TP_PROTO(struct rq *rq, struct task_struct *prev, + * struct task_struct *next), * * * * * Define the call signature of the 'function'. @@ -187,7 +187,7 @@ static inline void tracepoint_synchronize_unregister(void) * * TP_PROTO1/TP_PROTO2/TP_PROTO3 ugliness.) * * * - * TP_ARGS(rq, prev, next), + * TP_ARGS(rq, prev, next), * * * * * Fast binary tracing: define the trace record via @@ -229,13 +229,13 @@ static inline void tracepoint_synchronize_unregister(void) * * happens, on an active tracepoint. * * * - * TP_fast_assign( - * memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); - * __entry->prev_pid = prev->pid; - * __entry->prev_prio = prev->prio; + * TP_fast_assign( + * memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + * __entry->prev_pid = prev->pid; + * __entry->prev_prio = prev->prio; * memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); * __entry->next_pid = next->pid; - * __entry->next_prio = next->prio; + * __entry->next_prio = next->prio; * ) * * * diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index e39679a72a3b..bec69d3678c1 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -33,7 +33,7 @@ static struct trace_array *blk_tr; static int __read_mostly blk_tracer_enabled; /* Select an alternative, minimalistic output than the original one */ -#define TRACE_BLK_OPT_CLASSIC 0x1 +#define TRACE_BLK_OPT_CLASSIC 0x1 static struct tracer_opt blk_tracer_opts[] = { /* Default disable the minimalistic output */ @@ -564,7 +564,7 @@ EXPORT_SYMBOL_GPL(blk_trace_startstop); /** * blk_trace_ioctl: - handle the ioctls associated with tracing * @bdev: the block device - * @cmd: the ioctl cmd + * @cmd: the ioctl cmd * @arg: the argument data, if any * **/ @@ -1128,9 +1128,9 @@ static void blk_tracer_reset(struct trace_array *tr) static struct { const char *act[2]; - int (*print)(struct trace_seq *s, const struct trace_entry *ent); + int (*print)(struct trace_seq *s, const struct trace_entry *ent); } what2act[] __read_mostly = { - [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, + [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic }, [__BLK_TA_GETRQ] = {{ "G", "getrq" }, blk_log_generic }, @@ -1229,7 +1229,7 @@ static struct tracer blk_tracer __read_mostly = { }; static struct trace_event trace_blk_event = { - .type = TRACE_BLK, + .type = TRACE_BLK, .trace = blk_trace_event_print, .binary = blk_trace_event_print_binary, }; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index cc94f8642485..8c6a902db40a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -799,7 +799,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, entry->preempt_count = pc & 0xff; entry->pid = (tsk) ? tsk->pid : 0; - entry->tgid = (tsk) ? tsk->tgid : 0; + entry->tgid = (tsk) ? tsk->tgid : 0; entry->flags = #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index aaa0755268b9..ad8c22efff41 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -157,7 +157,7 @@ static enum print_line_t trace_branch_print(struct trace_iterator *iter, static struct trace_event trace_branch_event = { - .type = TRACE_BRANCH, + .type = TRACE_BRANCH, .trace = trace_branch_print, }; diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 6ee1de59f19d..ae2e323df0c7 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -5,23 +5,23 @@ * * static void ftrace_event_(proto) * { - * event_trace_printk(_RET_IP_, ": " ); + * event_trace_printk(_RET_IP_, ": " ); * } * * static int ftrace_reg_event_(void) * { - * int ret; + * int ret; * - * ret = register_trace_(ftrace_event_); - * if (!ret) - * pr_info("event trace: Could not activate trace point " - * "probe to "); - * return ret; + * ret = register_trace_(ftrace_event_); + * if (!ret) + * pr_info("event trace: Could not activate trace point " + * "probe to "); + * return ret; * } * * static void ftrace_unreg_event_(void) * { - * unregister_trace_(ftrace_event_); + * unregister_trace_(ftrace_event_); * } * * For those macros defined with TRACE_FORMAT: @@ -29,9 +29,9 @@ * static struct ftrace_event_call __used * __attribute__((__aligned__(4))) * __attribute__((section("_ftrace_events"))) event_ = { - * .name = "", - * .regfunc = ftrace_reg_event_, - * .unregfunc = ftrace_unreg_event_, + * .name = "", + * .regfunc = ftrace_reg_event_, + * .unregfunc = ftrace_unreg_event_, * } * * @@ -41,66 +41,66 @@ * * static void ftrace_raw_event_(proto) * { - * struct ring_buffer_event *event; - * struct ftrace_raw_ *entry; <-- defined in stage 1 - * unsigned long irq_flags; - * int pc; - * - * local_save_flags(irq_flags); - * pc = preempt_count(); - * - * event = trace_current_buffer_lock_reserve(event_.id, - * sizeof(struct ftrace_raw_), - * irq_flags, pc); - * if (!event) - * return; - * entry = ring_buffer_event_data(event); - * - * ; <-- Here we assign the entries by the __field and + * struct ring_buffer_event *event; + * struct ftrace_raw_ *entry; <-- defined in stage 1 + * unsigned long irq_flags; + * int pc; + * + * local_save_flags(irq_flags); + * pc = preempt_count(); + * + * event = trace_current_buffer_lock_reserve(event_.id, + * sizeof(struct ftrace_raw_), + * irq_flags, pc); + * if (!event) + * return; + * entry = ring_buffer_event_data(event); + * + * ; <-- Here we assign the entries by the __field and * __array macros. * - * trace_current_buffer_unlock_commit(event, irq_flags, pc); + * trace_current_buffer_unlock_commit(event, irq_flags, pc); * } * * static int ftrace_raw_reg_event_(void) * { - * int ret; + * int ret; * - * ret = register_trace_(ftrace_raw_event_); - * if (!ret) - * pr_info("event trace: Could not activate trace point " - * "probe to "); - * return ret; + * ret = register_trace_(ftrace_raw_event_); + * if (!ret) + * pr_info("event trace: Could not activate trace point " + * "probe to "); + * return ret; * } * * static void ftrace_unreg_event_(void) * { - * unregister_trace_(ftrace_raw_event_); + * unregister_trace_(ftrace_raw_event_); * } * * static struct trace_event ftrace_event_type_ = { - * .trace = ftrace_raw_output_, <-- stage 2 + * .trace = ftrace_raw_output_, <-- stage 2 * }; * * static int ftrace_raw_init_event_(void) * { - * int id; + * int id; * - * id = register_ftrace_event(&ftrace_event_type_); - * if (!id) - * return -ENODEV; - * event_.id = id; - * return 0; + * id = register_ftrace_event(&ftrace_event_type_); + * if (!id) + * return -ENODEV; + * event_.id = id; + * return 0; * } * * static struct ftrace_event_call __used * __attribute__((__aligned__(4))) * __attribute__((section("_ftrace_events"))) event_ = { - * .name = "", + * .name = "", * .system = "", - * .raw_init = ftrace_raw_init_event_, - * .regfunc = ftrace_reg_event_, - * .unregfunc = ftrace_unreg_event_, + * .raw_init = ftrace_raw_init_event_, + * .regfunc = ftrace_reg_event_, + * .unregfunc = ftrace_unreg_event_, * .show_format = ftrace_format_, * } * @@ -138,7 +138,7 @@ _TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ - .name = #call, \ + .name = #call, \ .system = __stringify(TRACE_SYSTEM), \ .regfunc = ftrace_reg_event_##call, \ .unregfunc = ftrace_unreg_event_##call, \ @@ -163,7 +163,7 @@ static void ftrace_raw_event_##call(proto) \ pc = preempt_count(); \ \ event = trace_current_buffer_lock_reserve(event_##call.id, \ - sizeof(struct ftrace_raw_##call), \ + sizeof(struct ftrace_raw_##call), \ irq_flags, pc); \ if (!event) \ return; \ @@ -208,7 +208,7 @@ static int ftrace_raw_init_event_##call(void) \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ - .name = #call, \ + .name = #call, \ .system = __stringify(TRACE_SYSTEM), \ .raw_init = ftrace_raw_init_event_##call, \ .regfunc = ftrace_raw_reg_event_##call, \ diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 23ae78430d58..4d9952d3df50 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -94,7 +94,7 @@ ftrace_format_##call(struct trace_seq *s) \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ - .name = #call, \ + .name = #call, \ .id = proto, \ .system = __stringify(TRACE_SYSTEM), \ .show_format = ftrace_format_##call, \ diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 453ebd3b636e..d1493b853e41 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -841,12 +841,12 @@ static void graph_trace_close(struct trace_iterator *iter) } static struct tracer graph_trace __read_mostly = { - .name = "function_graph", + .name = "function_graph", .open = graph_trace_open, .close = graph_trace_close, .wait_pipe = poll_wait_pipe, - .init = graph_trace_init, - .reset = graph_trace_reset, + .init = graph_trace_init, + .reset = graph_trace_reset, .print_line = print_graph_function, .print_header = print_graph_headers, .flags = &tracer_flags, diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index ef8fd661b217..491832af9ba1 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -565,7 +565,7 @@ static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags) } static struct trace_event trace_fn_event = { - .type = TRACE_FN, + .type = TRACE_FN, .trace = trace_fn_trace, .raw = trace_fn_raw, .hex = trace_fn_hex, @@ -696,7 +696,7 @@ static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter, } static struct trace_event trace_ctx_event = { - .type = TRACE_CTX, + .type = TRACE_CTX, .trace = trace_ctx_print, .raw = trace_ctx_raw, .hex = trace_ctx_hex, @@ -704,7 +704,7 @@ static struct trace_event trace_ctx_event = { }; static struct trace_event trace_wake_event = { - .type = TRACE_WAKE, + .type = TRACE_WAKE, .trace = trace_wake_print, .raw = trace_wake_raw, .hex = trace_wake_hex, @@ -759,7 +759,7 @@ static enum print_line_t trace_special_bin(struct trace_iterator *iter, } static struct trace_event trace_special_event = { - .type = TRACE_SPECIAL, + .type = TRACE_SPECIAL, .trace = trace_special_print, .raw = trace_special_print, .hex = trace_special_hex, @@ -796,7 +796,7 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter, } static struct trace_event trace_stack_event = { - .type = TRACE_STACK, + .type = TRACE_STACK, .trace = trace_stack_print, .raw = trace_special_print, .hex = trace_special_hex, @@ -825,7 +825,7 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter, } static struct trace_event trace_user_stack_event = { - .type = TRACE_USER_STACK, + .type = TRACE_USER_STACK, .trace = trace_user_stack_print, .raw = trace_special_print, .hex = trace_special_hex, @@ -879,7 +879,7 @@ static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags) static struct trace_event trace_print_event = { - .type = TRACE_PRINT, + .type = TRACE_PRINT, .trace = trace_print_print, .raw = trace_print_raw, }; diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c index 4664990fe9c5..e542483df623 100644 --- a/kernel/trace/trace_workqueue.c +++ b/kernel/trace/trace_workqueue.c @@ -19,14 +19,14 @@ struct cpu_workqueue_stats { /* Useful to know if we print the cpu headers */ bool first_entry; int cpu; - pid_t pid; + pid_t pid; /* Can be inserted from interrupt or user context, need to be atomic */ - atomic_t inserted; + atomic_t inserted; /* * Don't need to be atomic, works are serialized in a single workqueue thread * on a single CPU. */ - unsigned int executed; + unsigned int executed; }; /* List of workqueue threads on one cpu */ -- cgit v1.2.3 From 48ead02030f849d011259244bb4ea9b985479006 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 12 Mar 2009 18:24:49 +0100 Subject: tracing/core: bring back raw trace_printk for dynamic formats strings Impact: fix callsites with dynamic format strings Since its new binary implementation, trace_printk() internally uses static containers for the format strings on each callsites. But the value is assigned once at build time, which means that it can't take dynamic formats. So this patch unearthes the raw trace_printk implementation for the callers that will need trace_printk to be able to carry these dynamic format strings. The trace_printk() macro will use the appropriate implementation for each callsite. Most of the time however, the binary implementation will still be used. The other impact of this patch is that mmiotrace_printk() will use the old implementation because it calls the low level trace_vprintk and we can't guess here whether the format passed in it is dynamic or not. Some parts of this patch have been written by Steven Rostedt (most notably the part that chooses the appropriate implementation for each callsites). Signed-off-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- include/linux/kernel.h | 40 +++++++++++------ kernel/trace/trace.c | 85 +++++++++++++++++++++++++++++++++--- kernel/trace/trace.h | 13 +++++- kernel/trace/trace_event_types.h | 11 ++++- kernel/trace/trace_functions_graph.c | 6 +-- kernel/trace/trace_mmiotrace.c | 7 +-- kernel/trace/trace_output.c | 57 +++++++++++++++++++++--- kernel/trace/trace_printk.c | 33 +++++++++++--- 8 files changed, 213 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 7742798c9208..1daca3b062bb 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -452,31 +452,45 @@ do { \ #define trace_printk(fmt, args...) \ do { \ - static const char *trace_printk_fmt \ - __attribute__((section("__trace_printk_fmt"))); \ - \ - if (!trace_printk_fmt) \ - trace_printk_fmt = fmt; \ - \ __trace_printk_check_format(fmt, ##args); \ - __trace_printk(_THIS_IP_, trace_printk_fmt, ##args); \ + if (__builtin_constant_p(fmt)) { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))) = \ + __builtin_constant_p(fmt) ? fmt : NULL; \ + \ + __trace_bprintk(_THIS_IP_, trace_printk_fmt, ##args); \ + } else \ + __trace_printk(_THIS_IP_, fmt, ##args); \ } while (0) +extern int +__trace_bprintk(unsigned long ip, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); + extern int __trace_printk(unsigned long ip, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); +/* + * The double __builtin_constant_p is because gcc will give us an error + * if we try to allocate the static variable to fmt if it is not a + * constant. Even with the outer if statement. + */ #define ftrace_vprintk(fmt, vargs) \ do { \ - static const char *trace_printk_fmt \ - __attribute__((section("__trace_printk_fmt"))); \ - \ - if (!trace_printk_fmt) \ - trace_printk_fmt = fmt; \ + if (__builtin_constant_p(fmt)) { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))) = \ + __builtin_constant_p(fmt) ? fmt : NULL; \ \ - __ftrace_vprintk(_THIS_IP_, trace_printk_fmt, vargs); \ + __ftrace_vbprintk(_THIS_IP_, trace_printk_fmt, vargs); \ + } else \ + __ftrace_vprintk(_THIS_IP_, fmt, vargs); \ } while (0) +extern int +__ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap); + extern int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 62a63b2b33dd..dbb077d8a172 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1179,10 +1179,10 @@ void trace_graph_return(struct ftrace_graph_ret *trace) /** - * trace_vprintk - write binary msg to tracing buffer + * trace_vbprintk - write binary msg to tracing buffer * */ -int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) +int trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args) { static raw_spinlock_t trace_buf_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; @@ -1191,7 +1191,7 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) struct ring_buffer_event *event; struct trace_array *tr = &global_trace; struct trace_array_cpu *data; - struct print_entry *entry; + struct bprint_entry *entry; unsigned long flags; int resched; int cpu, len = 0, size, pc; @@ -1219,7 +1219,7 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) goto out_unlock; size = sizeof(*entry) + sizeof(u32) * len; - event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, flags, pc); + event = trace_buffer_lock_reserve(tr, TRACE_BPRINT, size, flags, pc); if (!event) goto out_unlock; entry = ring_buffer_event_data(event); @@ -1240,6 +1240,60 @@ out: return len; } +EXPORT_SYMBOL_GPL(trace_vbprintk); + +int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) +{ + static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; + static char trace_buf[TRACE_BUF_SIZE]; + + struct ring_buffer_event *event; + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + int cpu, len = 0, size, pc; + struct print_entry *entry; + unsigned long irq_flags; + + if (tracing_disabled || tracing_selftest_running) + return 0; + + pc = preempt_count(); + preempt_disable_notrace(); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + + if (unlikely(atomic_read(&data->disabled))) + goto out; + + pause_graph_tracing(); + raw_local_irq_save(irq_flags); + __raw_spin_lock(&trace_buf_lock); + len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); + + len = min(len, TRACE_BUF_SIZE-1); + trace_buf[len] = 0; + + size = sizeof(*entry) + len + 1; + event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc); + if (!event) + goto out_unlock; + entry = ring_buffer_event_data(event); + entry->ip = ip; + entry->depth = depth; + + memcpy(&entry->buf, trace_buf, len); + entry->buf[len] = 0; + ring_buffer_unlock_commit(tr->buffer, event); + + out_unlock: + __raw_spin_unlock(&trace_buf_lock); + raw_local_irq_restore(irq_flags); + unpause_graph_tracing(); + out: + preempt_enable_notrace(); + + return len; +} EXPORT_SYMBOL_GPL(trace_vprintk); enum trace_file_type { @@ -1628,6 +1682,22 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) return TRACE_TYPE_HANDLED; } +static enum print_line_t print_bprintk_msg_only(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry = iter->ent; + struct bprint_entry *field; + int ret; + + trace_assign_type(field, entry); + + ret = trace_seq_bprintf(s, field->fmt, field->buf); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + static enum print_line_t print_printk_msg_only(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; @@ -1637,7 +1707,7 @@ static enum print_line_t print_printk_msg_only(struct trace_iterator *iter) trace_assign_type(field, entry); - ret = trace_seq_bprintf(s, field->fmt, field->buf); + ret = trace_seq_printf(s, "%s", field->buf); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -1702,6 +1772,11 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter) return ret; } + if (iter->ent->type == TRACE_BPRINT && + trace_flags & TRACE_ITER_PRINTK && + trace_flags & TRACE_ITER_PRINTK_MSGONLY) + return print_bprintk_msg_only(iter); + if (iter->ent->type == TRACE_PRINT && trace_flags & TRACE_ITER_PRINTK && trace_flags & TRACE_ITER_PRINTK_MSGONLY) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 336324d717f8..cede1ab49d07 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -20,6 +20,7 @@ enum trace_type { TRACE_WAKE, TRACE_STACK, TRACE_PRINT, + TRACE_BPRINT, TRACE_SPECIAL, TRACE_MMIO_RW, TRACE_MMIO_MAP, @@ -117,7 +118,7 @@ struct userstack_entry { /* * trace_printk entry: */ -struct print_entry { +struct bprint_entry { struct trace_entry ent; unsigned long ip; int depth; @@ -125,6 +126,13 @@ struct print_entry { u32 buf[]; }; +struct print_entry { + struct trace_entry ent; + unsigned long ip; + int depth; + char buf[]; +}; + #define TRACE_OLD_SIZE 88 struct trace_field_cont { @@ -286,6 +294,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ + IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \ IF_ASSIGN(var, ent, struct special_entry, 0); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ TRACE_MMIO_RW); \ @@ -570,6 +579,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace, extern void *head_page(struct trace_array_cpu *data); extern long ns2usecs(cycle_t nsec); extern int +trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args); +extern int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args); extern unsigned long trace_flags; diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h index 5cca4c978bde..d0907d746425 100644 --- a/kernel/trace/trace_event_types.h +++ b/kernel/trace/trace_event_types.h @@ -102,7 +102,7 @@ TRACE_EVENT_FORMAT(user_stack, TRACE_USER_STACK, userstack_entry, ignore, "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n") ); -TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, +TRACE_EVENT_FORMAT(bprint, TRACE_PRINT, bprint_entry, ignore, TRACE_STRUCT( TRACE_FIELD(unsigned long, ip, ip) TRACE_FIELD(unsigned int, depth, depth) @@ -112,6 +112,15 @@ TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, TP_RAW_FMT("%08lx (%d) fmt:%p %s") ); +TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, + TRACE_STRUCT( + TRACE_FIELD(unsigned long, ip, ip) + TRACE_FIELD(unsigned int, depth, depth) + TRACE_FIELD_ZERO_CHAR(buf) + ), + TP_RAW_FMT("%08lx (%d) fmt:%p %s") +); + TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore, TRACE_STRUCT( TRACE_FIELD(unsigned int, line, line) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 8566c14b3e9a..4c388607ed67 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -684,7 +684,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, } static enum print_line_t -print_graph_comment(struct print_entry *trace, struct trace_seq *s, +print_graph_comment(struct bprint_entry *trace, struct trace_seq *s, struct trace_entry *ent, struct trace_iterator *iter) { int i; @@ -781,8 +781,8 @@ print_graph_function(struct trace_iterator *iter) trace_assign_type(field, entry); return print_graph_return(&field->ret, s, entry, iter); } - case TRACE_PRINT: { - struct print_entry *field; + case TRACE_BPRINT: { + struct bprint_entry *field; trace_assign_type(field, entry); return print_graph_comment(field, s, entry, iter); } diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 23e346a734ca..f095916e477f 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -254,6 +254,7 @@ static enum print_line_t mmio_print_mark(struct trace_iterator *iter) { struct trace_entry *entry = iter->ent; struct print_entry *print = (struct print_entry *)entry; + const char *msg = print->buf; struct trace_seq *s = &iter->seq; unsigned long long t = ns2usecs(iter->ts); unsigned long usec_rem = do_div(t, USEC_PER_SEC); @@ -261,11 +262,7 @@ static enum print_line_t mmio_print_mark(struct trace_iterator *iter) int ret; /* The trailing newline must be in the message. */ - ret = trace_seq_printf(s, "MARK %u.%06lu ", secs, usec_rem); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - ret = trace_seq_bprintf(s, print->fmt, print->buf); + ret = trace_seq_printf(s, "MARK %u.%06lu %s", secs, usec_rem, msg); if (!ret) return TRACE_TYPE_PARTIAL_LINE; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 491832af9ba1..ea9d3b410c7a 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -832,13 +832,13 @@ static struct trace_event trace_user_stack_event = { .binary = trace_special_bin, }; -/* TRACE_PRINT */ +/* TRACE_BPRINT */ static enum print_line_t -trace_print_print(struct trace_iterator *iter, int flags) +trace_bprint_print(struct trace_iterator *iter, int flags) { struct trace_entry *entry = iter->ent; struct trace_seq *s = &iter->seq; - struct print_entry *field; + struct bprint_entry *field; trace_assign_type(field, entry); @@ -858,9 +858,10 @@ trace_print_print(struct trace_iterator *iter, int flags) } -static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags) +static enum print_line_t +trace_bprint_raw(struct trace_iterator *iter, int flags) { - struct print_entry *field; + struct bprint_entry *field; struct trace_seq *s = &iter->seq; trace_assign_type(field, iter->ent); @@ -878,12 +879,55 @@ static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags) } +static struct trace_event trace_bprint_event = { + .type = TRACE_BPRINT, + .trace = trace_bprint_print, + .raw = trace_bprint_raw, +}; + +/* TRACE_PRINT */ +static enum print_line_t trace_print_print(struct trace_iterator *iter, + int flags) +{ + struct print_entry *field; + struct trace_seq *s = &iter->seq; + + trace_assign_type(field, iter->ent); + + if (!seq_print_ip_sym(s, field->ip, flags)) + goto partial; + + if (!trace_seq_printf(s, ": %s", field->buf)) + goto partial; + + return TRACE_TYPE_HANDLED; + + partial: + return TRACE_TYPE_PARTIAL_LINE; +} + +static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags) +{ + struct print_entry *field; + + trace_assign_type(field, iter->ent); + + if (!trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf)) + goto partial; + + return TRACE_TYPE_HANDLED; + + partial: + return TRACE_TYPE_PARTIAL_LINE; +} + static struct trace_event trace_print_event = { - .type = TRACE_PRINT, + .type = TRACE_PRINT, .trace = trace_print_print, .raw = trace_print_raw, }; + static struct trace_event *events[] __initdata = { &trace_fn_event, &trace_ctx_event, @@ -891,6 +935,7 @@ static struct trace_event *events[] __initdata = { &trace_special_event, &trace_stack_event, &trace_user_stack_event, + &trace_bprint_event, &trace_print_event, NULL }; diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index a50aea22e929..f307a11e2332 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -99,7 +99,7 @@ struct notifier_block module_trace_bprintk_format_nb = { .notifier_call = module_trace_bprintk_format_notify, }; -int __trace_printk(unsigned long ip, const char *fmt, ...) +int __trace_bprintk(unsigned long ip, const char *fmt, ...) { int ret; va_list ap; @@ -111,13 +111,13 @@ int __trace_printk(unsigned long ip, const char *fmt, ...) return 0; va_start(ap, fmt); - ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); + ret = trace_vbprintk(ip, task_curr_ret_stack(current), fmt, ap); va_end(ap); return ret; } -EXPORT_SYMBOL_GPL(__trace_printk); +EXPORT_SYMBOL_GPL(__trace_bprintk); -int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) +int __ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap) { if (unlikely(!fmt)) return 0; @@ -125,11 +125,34 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) if (!(trace_flags & TRACE_ITER_PRINTK)) return 0; + return trace_vbprintk(ip, task_curr_ret_stack(current), fmt, ap); +} +EXPORT_SYMBOL_GPL(__ftrace_vbprintk); + +int __trace_printk(unsigned long ip, const char *fmt, ...) +{ + int ret; + va_list ap; + + if (!(trace_flags & TRACE_ITER_PRINTK)) + return 0; + + va_start(ap, fmt); + ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); + va_end(ap); + return ret; +} +EXPORT_SYMBOL_GPL(__trace_printk); + +int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) +{ + if (!(trace_flags & TRACE_ITER_PRINTK)) + return 0; + return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); } EXPORT_SYMBOL_GPL(__ftrace_vprintk); - static __init int init_trace_printk(void) { return register_module_notifier(&module_trace_bprintk_format_nb); -- cgit v1.2.3 From 5d592b44b29a1d73e13d5c9e3426eed843bdc359 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Thu, 12 Mar 2009 14:33:36 -0400 Subject: tracing: tracepoints for softirq entry/exit - add softirq-to-name array Create a 'softirq_to_name' array, which is indexed by softirq #, so that we can easily convert between the softirq index # and its name, in order to get more meaningful output messages. LKML-Reference: <20090312183336.GB3352@redhat.com> Signed-off-by: Jason Baron Signed-off-by: Steven Rostedt --- include/linux/interrupt.h | 5 +++++ kernel/softirq.c | 9 ++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 472f11765f60..9b7e9d743476 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -258,6 +258,11 @@ enum NR_SOFTIRQS }; +/* map softirq index to softirq name. update 'softirq_to_name' in + * kernel/softirq.c when adding a new softirq. + */ +extern char *softirq_to_name[NR_SOFTIRQS]; + /* softirq mask and active fields moved to irq_cpustat_t in * asm/hardirq.h to get better cache usage. KAO */ diff --git a/kernel/softirq.c b/kernel/softirq.c index 7571bcb71be4..9f90fdc039f4 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -53,6 +53,12 @@ static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); +char *softirq_to_name[NR_SOFTIRQS] = { + "HI_SOFTIRQ", "TIMER_SOFTIRQ", "NET_TX_SOFTIRQ", "NET_RX_SOFTIRQ", + "BLOCK_SOFTIRQ", "TASKLET_SOFTIRQ", "SCHED_SOFTIRQ", "HRTIMER_SOFTIRQ", + "RCU_SOFTIRQ" +}; + /* * we cannot loop indefinitely here to avoid userspace starvation, * but we also don't want to introduce a worst case 1/HZ latency @@ -209,9 +215,10 @@ restart: h->action(h); if (unlikely(prev_count != preempt_count())) { - printk(KERN_ERR "huh, entered softirq %td %p" + printk(KERN_ERR "huh, entered softirq %td %s %p" "with preempt_count %08x," " exited with %08x?\n", h - softirq_vec, + softirq_to_name[h - softirq_vec], h->action, prev_count, preempt_count()); preempt_count() = prev_count; } -- cgit v1.2.3 From ee08c6eccb7d1295516f7cf420fddf7b14e9146f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 7 Mar 2009 05:52:59 +0100 Subject: tracing/ftrace: syscall tracing infrastructure, basics Provide basic callbacks to do syscall tracing. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Cc: Lai Jiangshan LKML-Reference: <1236401580-5758-2-git-send-email-fweisbec@gmail.com> [ simplified it to a trace_printk() for now. ] Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 21 ++++++++ kernel/trace/Kconfig | 10 ++++ kernel/trace/Makefile | 1 + kernel/trace/trace.h | 2 + kernel/trace/trace_syscalls.c | 113 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 147 insertions(+) create mode 100644 kernel/trace/trace_syscalls.c (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e1583f2639b0..c146c1021a29 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -503,4 +503,25 @@ static inline void trace_hw_branch_oops(void) {} #endif /* CONFIG_HW_BRANCH_TRACER */ +/* + * A syscall entry in the ftrace syscalls array. + * + * @syscall_nr: syscall number + */ +struct syscall_trace_entry { + int syscall_nr; +}; + +#ifdef CONFIG_FTRACE_SYSCALLS +extern void start_ftrace_syscalls(void); +extern void stop_ftrace_syscalls(void); +extern void ftrace_syscall_enter(struct pt_regs *regs); +extern void ftrace_syscall_exit(struct pt_regs *regs); +#else +static inline void start_ftrace_syscalls(void) { } +static inline void stop_ftrace_syscalls(void) { } +static inline void ftrace_syscall_enter(struct pt_regs *regs) { } +static inline void ftrace_syscall_exit(struct pt_regs *regs) { } +#endif + #endif /* _LINUX_FTRACE_H */ diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 8e4a2a61cd75..95a0ad191f19 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -34,6 +34,9 @@ config HAVE_FTRACE_MCOUNT_RECORD config HAVE_HW_BRANCH_TRACER bool +config HAVE_FTRACE_SYSCALLS + bool + config TRACER_MAX_TRACE bool @@ -175,6 +178,13 @@ config EVENT_TRACER allowing the user to pick and choose which trace point they want to trace. +config FTRACE_SYSCALLS + bool "Trace syscalls" + depends on HAVE_FTRACE_SYSCALLS + select TRACING + help + Basic tracer to catch the syscall entry and exit events. + config BOOT_TRACER bool "Trace boot initcalls" select TRACING diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index c7a2943796eb..c3feea01c3e0 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -43,5 +43,6 @@ obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o obj-$(CONFIG_EVENT_TRACER) += trace_events.o obj-$(CONFIG_EVENT_TRACER) += events.o obj-$(CONFIG_EVENT_TRACER) += trace_export.o +obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c5e1d8865fe4..3d49daae47dc 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -30,6 +30,8 @@ enum trace_type { TRACE_GRAPH_ENT, TRACE_USER_STACK, TRACE_HW_BRANCHES, + TRACE_SYSCALL_ENTER, + TRACE_SYSCALL_EXIT, TRACE_KMEM_ALLOC, TRACE_KMEM_FREE, TRACE_POWER, diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c new file mode 100644 index 000000000000..66cf97449af3 --- /dev/null +++ b/kernel/trace/trace_syscalls.c @@ -0,0 +1,113 @@ +#include +#include + +#include + +#include "trace_output.h" +#include "trace.h" + +static atomic_t refcount; + +void start_ftrace_syscalls(void) +{ + unsigned long flags; + struct task_struct *g, *t; + + if (atomic_inc_return(&refcount) != 1) + goto out; + + read_lock_irqsave(&tasklist_lock, flags); + + do_each_thread(g, t) { + set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE); + } while_each_thread(g, t); + + read_unlock_irqrestore(&tasklist_lock, flags); +out: + atomic_dec(&refcount); +} + +void stop_ftrace_syscalls(void) +{ + unsigned long flags; + struct task_struct *g, *t; + + if (atomic_dec_return(&refcount)) + goto out; + + read_lock_irqsave(&tasklist_lock, flags); + + do_each_thread(g, t) { + clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE); + } while_each_thread(g, t); + + read_unlock_irqrestore(&tasklist_lock, flags); +out: + atomic_inc(&refcount); +} + +void ftrace_syscall_enter(struct pt_regs *regs) +{ + int syscall_nr; + + syscall_nr = syscall_get_nr(current, regs); + + trace_printk("syscall %d enter\n", syscall_nr); +} + +void ftrace_syscall_exit(struct pt_regs *regs) +{ + int syscall_nr; + + syscall_nr = syscall_get_nr(current, regs); + + trace_printk("syscall %d exit\n", syscall_nr); +} + +static int init_syscall_tracer(struct trace_array *tr) +{ + start_ftrace_syscalls(); + + return 0; +} + +static void reset_syscall_tracer(struct trace_array *tr) +{ + stop_ftrace_syscalls(); +} + +static struct trace_event syscall_enter_event = { + .type = TRACE_SYSCALL_ENTER, +}; + +static struct trace_event syscall_exit_event = { + .type = TRACE_SYSCALL_EXIT, +}; + +static struct tracer syscall_tracer __read_mostly = { + .name = "syscall", + .init = init_syscall_tracer, + .reset = reset_syscall_tracer +}; + +__init int register_ftrace_syscalls(void) +{ + int ret; + + ret = register_ftrace_event(&syscall_enter_event); + if (!ret) { + printk(KERN_WARNING "event %d failed to register\n", + syscall_enter_event.type); + WARN_ON_ONCE(1); + } + + ret = register_ftrace_event(&syscall_exit_event); + if (!ret) { + printk(KERN_WARNING "event %d failed to register\n", + syscall_exit_event.type); + WARN_ON_ONCE(1); + } + + return register_tracer(&syscall_tracer); +} +device_initcall(register_ftrace_syscalls); -- cgit v1.2.3 From e94142a67f8bad494c593f0a07c9fc2fbec98c0e Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 13 Mar 2009 17:51:27 +0800 Subject: ftrace: remove struct list_head from struct dyn_ftrace Impact: save memory The struct dyn_ftrace table is very large, this patch will save about 50%. Signed-off-by: Lai Jiangshan Cc: Steven Rostedt LKML-Reference: <49BA2C9F.8020009@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 1 - kernel/trace/ftrace.c | 14 ++++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index c146c1021a29..9d598bbf28a6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -145,7 +145,6 @@ enum { }; struct dyn_ftrace { - struct list_head list; unsigned long ip; /* address of mcount call-site */ unsigned long flags; struct dyn_arch_ftrace arch; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index bf78a4c75c67..90d5729afeff 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -272,7 +272,7 @@ enum { static int ftrace_filtered; -static LIST_HEAD(ftrace_new_addrs); +static struct dyn_ftrace *ftrace_new_addrs; static DEFINE_MUTEX(ftrace_regex_lock); @@ -409,8 +409,8 @@ ftrace_record_ip(unsigned long ip) return NULL; rec->ip = ip; - - list_add(&rec->list, &ftrace_new_addrs); + rec->flags = (unsigned long)ftrace_new_addrs; + ftrace_new_addrs = rec; return rec; } @@ -716,19 +716,21 @@ unsigned long ftrace_update_tot_cnt; static int ftrace_update_code(struct module *mod) { - struct dyn_ftrace *p, *t; + struct dyn_ftrace *p; cycle_t start, stop; start = ftrace_now(raw_smp_processor_id()); ftrace_update_cnt = 0; - list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) { + while (ftrace_new_addrs) { /* If something went wrong, bail without enabling anything */ if (unlikely(ftrace_disabled)) return -1; - list_del_init(&p->list); + p = ftrace_new_addrs; + ftrace_new_addrs = (struct dyn_ftrace *)p->flags; + p->flags = 0L; /* convert record (i.e, patch mcount-call with NOP) */ if (ftrace_code_disable(mod, p)) { -- cgit v1.2.3 From bed1ffca022cc876fb83161d26670e9b5d3cf36b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 13 Mar 2009 15:42:11 +0100 Subject: tracing/syscalls: core infrastructure for syscalls tracing, enhancements Impact: new feature This adds the generic support for syscalls tracing. This is currently exploited through a devoted tracer but other tracing engines can use it. (They just have to play with {start,stop}_ftrace_syscalls() and use the display callbacks unless they want to override them.) The syscalls prototypes definitions are abused here to steal some metadata informations: - syscall name, param types, param names, number of params The syscall addr is not directly saved during this definition because we don't know if its prototype is available in the namespace. But we don't really need it. The arch has just to build a function able to resolve the syscall number to its metadata struct. The current tracer prints the syscall names, parameters names and values (and their types optionally). Currently the value is a raw hex but higher level values diplaying is on my TODO list. Signed-off-by: Frederic Weisbecker LKML-Reference: <1236955332-10133-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 11 ++- include/linux/ftrace.h | 14 +++- include/linux/syscalls.h | 60 +++++++++++++++- kernel/trace/trace.h | 17 +++++ kernel/trace/trace_syscalls.c | 146 +++++++++++++++++++++++++++++++++++--- 5 files changed, 234 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 0e0f39be6c8b..d3bc3c86df6a 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -77,6 +77,14 @@ #define TRACE_PRINTKS() #endif +#ifdef CONFIG_FTRACE_SYSCALLS +#define TRACE_SYSCALLS() VMLINUX_SYMBOL(__start_syscalls_metadata) = .; \ + *(__syscalls_metadata) \ + VMLINUX_SYMBOL(__stop_syscalls_metadata) = .; +#else +#define TRACE_SYSCALLS() +#endif + /* .data section */ #define DATA_DATA \ *(.data) \ @@ -99,7 +107,8 @@ LIKELY_PROFILE() \ BRANCH_PROFILE() \ TRACE_PRINTKS() \ - FTRACE_EVENTS() + FTRACE_EVENTS() \ + TRACE_SYSCALLS() #define RO_DATA(align) \ . = ALIGN((align)); \ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index c146c1021a29..6dc1c652447e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -506,13 +506,21 @@ static inline void trace_hw_branch_oops(void) {} /* * A syscall entry in the ftrace syscalls array. * - * @syscall_nr: syscall number + * @name: name of the syscall + * @nb_args: number of parameters it takes + * @types: list of types as strings + * @args: list of args as strings (args[i] matches types[i]) */ -struct syscall_trace_entry { - int syscall_nr; +struct syscall_metadata { + const char *name; + int nb_args; + const char **types; + const char **args; }; #ifdef CONFIG_FTRACE_SYSCALLS +extern void arch_init_ftrace_syscalls(void); +extern struct syscall_metadata *syscall_nr_to_meta(int nr); extern void start_ftrace_syscalls(void); extern void stop_ftrace_syscalls(void); extern void ftrace_syscall_enter(struct pt_regs *regs); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index f9f900cfd066..0cff9bb80b02 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -65,6 +65,7 @@ struct old_linux_dirent; #include #include #include +#include #define __SC_DECL1(t1, a1) t1 a1 #define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__) @@ -95,7 +96,46 @@ struct old_linux_dirent; #define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__) #define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) +#ifdef CONFIG_FTRACE_SYSCALLS +#define __SC_STR_ADECL1(t, a) #a +#define __SC_STR_ADECL2(t, a, ...) #a, __SC_STR_ADECL1(__VA_ARGS__) +#define __SC_STR_ADECL3(t, a, ...) #a, __SC_STR_ADECL2(__VA_ARGS__) +#define __SC_STR_ADECL4(t, a, ...) #a, __SC_STR_ADECL3(__VA_ARGS__) +#define __SC_STR_ADECL5(t, a, ...) #a, __SC_STR_ADECL4(__VA_ARGS__) +#define __SC_STR_ADECL6(t, a, ...) #a, __SC_STR_ADECL5(__VA_ARGS__) + +#define __SC_STR_TDECL1(t, a) #t +#define __SC_STR_TDECL2(t, a, ...) #t, __SC_STR_TDECL1(__VA_ARGS__) +#define __SC_STR_TDECL3(t, a, ...) #t, __SC_STR_TDECL2(__VA_ARGS__) +#define __SC_STR_TDECL4(t, a, ...) #t, __SC_STR_TDECL3(__VA_ARGS__) +#define __SC_STR_TDECL5(t, a, ...) #t, __SC_STR_TDECL4(__VA_ARGS__) +#define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) + +#define SYSCALL_METADATA(sname, nb) \ + static const struct syscall_metadata __used \ + __attribute__((__aligned__(4))) \ + __attribute__((section("__syscalls_metadata"))) \ + __syscall_meta_##sname = { \ + .name = "sys"#sname, \ + .nb_args = nb, \ + .types = types_##sname, \ + .args = args_##sname, \ + } + +#define SYSCALL_DEFINE0(sname) \ + static const struct syscall_metadata __used \ + __attribute__((__aligned__(4))) \ + __attribute__((section("__syscalls_metadata"))) \ + __syscall_meta_##sname = { \ + .name = "sys_"#sname, \ + .nb_args = 0, \ + }; \ + asmlinkage long sys_##sname(void) + +#else #define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void) +#endif + #define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__) #define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__) #define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__) @@ -117,10 +157,26 @@ struct old_linux_dirent; #endif #endif +#ifdef CONFIG_FTRACE_SYSCALLS +#define SYSCALL_DEFINEx(x, sname, ...) \ + static const char *types_##sname[] = { \ + __SC_STR_TDECL##x(__VA_ARGS__) \ + }; \ + static const char *args_##sname[] = { \ + __SC_STR_ADECL##x(__VA_ARGS__) \ + }; \ + SYSCALL_METADATA(sname, x); \ + __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) +#else +#define SYSCALL_DEFINEx(x, sname, ...) \ + __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) +#endif + #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS #define SYSCALL_DEFINE(name) static inline long SYSC_##name -#define SYSCALL_DEFINEx(x, name, ...) \ + +#define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__)); \ static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__)); \ asmlinkage long SyS##name(__SC_LONG##x(__VA_ARGS__)) \ @@ -134,7 +190,7 @@ struct old_linux_dirent; #else /* CONFIG_HAVE_SYSCALL_WRAPPERS */ #define SYSCALL_DEFINE(name) asmlinkage long sys_##name -#define SYSCALL_DEFINEx(x, name, ...) \ +#define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__)) #endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3d49daae47dc..d80ca0d464d9 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -194,6 +194,19 @@ struct kmemtrace_free_entry { const void *ptr; }; +struct syscall_trace_enter { + struct trace_entry ent; + int nr; + unsigned long args[]; +}; + +struct syscall_trace_exit { + struct trace_entry ent; + int nr; + unsigned long ret; +}; + + /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: @@ -306,6 +319,10 @@ extern void __ftrace_bad_type(void); TRACE_KMEM_ALLOC); \ IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ TRACE_KMEM_FREE); \ + IF_ASSIGN(var, ent, struct syscall_trace_enter, \ + TRACE_SYSCALL_ENTER); \ + IF_ASSIGN(var, ent, struct syscall_trace_exit, \ + TRACE_SYSCALL_EXIT); \ __ftrace_bad_type(); \ } while (0) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 66cf97449af3..c72e599230ff 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -1,6 +1,5 @@ -#include #include - +#include #include #include "trace_output.h" @@ -8,6 +7,90 @@ static atomic_t refcount; +/* Our two options */ +enum { + TRACE_SYSCALLS_OPT_TYPES = 0x1, +}; + +static struct tracer_opt syscalls_opts[] = { + { TRACER_OPT(syscall_arg_type, TRACE_SYSCALLS_OPT_TYPES) }, + { } +}; + +static struct tracer_flags syscalls_flags = { + .val = 0, /* By default: no args types */ + .opts = syscalls_opts +}; + +enum print_line_t +print_syscall_enter(struct trace_iterator *iter, int flags) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *ent = iter->ent; + struct syscall_trace_enter *trace; + struct syscall_metadata *entry; + int i, ret, syscall; + + trace_assign_type(trace, ent); + + syscall = trace->nr; + + entry = syscall_nr_to_meta(syscall); + if (!entry) + goto end; + + ret = trace_seq_printf(s, "%s(", entry->name); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + for (i = 0; i < entry->nb_args; i++) { + /* parameter types */ + if (syscalls_flags.val & TRACE_SYSCALLS_OPT_TYPES) { + ret = trace_seq_printf(s, "%s ", entry->types[i]); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + /* parameter values */ + ret = trace_seq_printf(s, "%s: %lx%s ", entry->args[i], + trace->args[i], + i == entry->nb_args - 1 ? ")" : ","); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + +end: + trace_seq_printf(s, "\n"); + return TRACE_TYPE_HANDLED; +} + +enum print_line_t +print_syscall_exit(struct trace_iterator *iter, int flags) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *ent = iter->ent; + struct syscall_trace_exit *trace; + int syscall; + struct syscall_metadata *entry; + int ret; + + trace_assign_type(trace, ent); + + syscall = trace->nr; + + entry = syscall_nr_to_meta(syscall); + if (!entry) { + trace_seq_printf(s, "\n"); + return TRACE_TYPE_HANDLED; + } + + ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, + trace->ret); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + void start_ftrace_syscalls(void) { unsigned long flags; @@ -16,6 +99,7 @@ void start_ftrace_syscalls(void) if (atomic_inc_return(&refcount) != 1) goto out; + arch_init_ftrace_syscalls(); read_lock_irqsave(&tasklist_lock, flags); do_each_thread(g, t) { @@ -48,20 +132,63 @@ out: void ftrace_syscall_enter(struct pt_regs *regs) { + struct syscall_trace_enter *entry; + struct syscall_metadata *sys_data; + struct ring_buffer_event *event; + int size; int syscall_nr; + int cpu; syscall_nr = syscall_get_nr(current, regs); - trace_printk("syscall %d enter\n", syscall_nr); + cpu = raw_smp_processor_id(); + + sys_data = syscall_nr_to_meta(syscall_nr); + if (!sys_data) + return; + + size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; + + event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_ENTER, size, + 0, 0); + if (!event) + return; + + entry = ring_buffer_event_data(event); + entry->nr = syscall_nr; + syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); + + trace_current_buffer_unlock_commit(event, 0, 0); + trace_wake_up(); } void ftrace_syscall_exit(struct pt_regs *regs) { + struct syscall_trace_exit *entry; + struct syscall_metadata *sys_data; + struct ring_buffer_event *event; int syscall_nr; + int cpu; syscall_nr = syscall_get_nr(current, regs); - trace_printk("syscall %d exit\n", syscall_nr); + cpu = raw_smp_processor_id(); + + sys_data = syscall_nr_to_meta(syscall_nr); + if (!sys_data) + return; + + event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_EXIT, + sizeof(*entry), 0, 0); + if (!event) + return; + + entry = ring_buffer_event_data(event); + entry->nr = syscall_nr; + entry->ret = syscall_get_return_value(current, regs); + + trace_current_buffer_unlock_commit(event, 0, 0); + trace_wake_up(); } static int init_syscall_tracer(struct trace_array *tr) @@ -77,17 +204,20 @@ static void reset_syscall_tracer(struct trace_array *tr) } static struct trace_event syscall_enter_event = { - .type = TRACE_SYSCALL_ENTER, + .type = TRACE_SYSCALL_ENTER, + .trace = print_syscall_enter, }; static struct trace_event syscall_exit_event = { - .type = TRACE_SYSCALL_EXIT, + .type = TRACE_SYSCALL_EXIT, + .trace = print_syscall_exit, }; static struct tracer syscall_tracer __read_mostly = { - .name = "syscall", + .name = "syscall", .init = init_syscall_tracer, - .reset = reset_syscall_tracer + .reset = reset_syscall_tracer, + .flags = &syscalls_flags, }; __init int register_ftrace_syscalls(void) -- cgit v1.2.3 From 37886f6a9f62d22530ffee8d3f9215c8345b6969 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 17 Mar 2009 17:22:06 -0400 Subject: ring-buffer: add api to allow a tracer to change clock source This patch adds a new function called ring_buffer_set_clock that allows a tracer to assign its own clock source to the buffer. Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 7 +++-- kernel/trace/ring_buffer.c | 65 ++++++++++++++++++++++++++------------------- kernel/trace/trace.c | 21 ++++++++++----- 3 files changed, 57 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index b1a0068a5557..9e6052bd1a1c 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -118,8 +118,11 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer); unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); -u64 ring_buffer_time_stamp(int cpu); -void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); +u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); +void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, + int cpu, u64 *ts); +void ring_buffer_set_clock(struct ring_buffer *buffer, + u64 (*clock)(void)); size_t ring_buffer_page_len(void *page); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 58128ad2fde0..bbf51922a8ca 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -180,29 +180,6 @@ EXPORT_SYMBOL_GPL(tracing_is_on); #include "trace.h" -/* Up this if you want to test the TIME_EXTENTS and normalization */ -#define DEBUG_SHIFT 0 - -u64 ring_buffer_time_stamp(int cpu) -{ - u64 time; - - preempt_disable_notrace(); - /* shift to debug/test normalization and TIME_EXTENTS */ - time = trace_clock_local() << DEBUG_SHIFT; - preempt_enable_no_resched_notrace(); - - return time; -} -EXPORT_SYMBOL_GPL(ring_buffer_time_stamp); - -void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) -{ - /* Just stupid testing the normalize function and deltas */ - *ts >>= DEBUG_SHIFT; -} -EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); - #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) #define RB_ALIGNMENT 4U #define RB_MAX_SMALL_DATA 28 @@ -374,6 +351,7 @@ struct ring_buffer { #ifdef CONFIG_HOTPLUG_CPU struct notifier_block cpu_notify; #endif + u64 (*clock)(void); }; struct ring_buffer_iter { @@ -394,6 +372,30 @@ struct ring_buffer_iter { _____ret; \ }) +/* Up this if you want to test the TIME_EXTENTS and normalization */ +#define DEBUG_SHIFT 0 + +u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) +{ + u64 time; + + preempt_disable_notrace(); + /* shift to debug/test normalization and TIME_EXTENTS */ + time = buffer->clock() << DEBUG_SHIFT; + preempt_enable_no_resched_notrace(); + + return time; +} +EXPORT_SYMBOL_GPL(ring_buffer_time_stamp); + +void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, + int cpu, u64 *ts) +{ + /* Just stupid testing the normalize function and deltas */ + *ts >>= DEBUG_SHIFT; +} +EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); + /** * check_pages - integrity check of buffer pages * @cpu_buffer: CPU buffer with pages to test @@ -569,6 +571,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); buffer->flags = flags; + buffer->clock = trace_clock_local; /* need at least two pages */ if (buffer->pages == 1) @@ -645,6 +648,12 @@ ring_buffer_free(struct ring_buffer *buffer) } EXPORT_SYMBOL_GPL(ring_buffer_free); +void ring_buffer_set_clock(struct ring_buffer *buffer, + u64 (*clock)(void)) +{ + buffer->clock = clock; +} + static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); static void @@ -1191,7 +1200,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, cpu_buffer->tail_page = next_page; /* reread the time stamp */ - *ts = ring_buffer_time_stamp(cpu_buffer->cpu); + *ts = ring_buffer_time_stamp(buffer, cpu_buffer->cpu); cpu_buffer->tail_page->page->time_stamp = *ts; } @@ -1334,7 +1343,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) return NULL; - ts = ring_buffer_time_stamp(cpu_buffer->cpu); + ts = ring_buffer_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); /* * Only the first commit can update the timestamp. @@ -2051,7 +2060,8 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) case RINGBUF_TYPE_DATA: if (ts) { *ts = cpu_buffer->read_stamp + event->time_delta; - ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts); + ring_buffer_normalize_time_stamp(buffer, + cpu_buffer->cpu, ts); } return event; @@ -2112,7 +2122,8 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) case RINGBUF_TYPE_DATA: if (ts) { *ts = iter->read_stamp + event->time_delta; - ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts); + ring_buffer_normalize_time_stamp(buffer, + cpu_buffer->cpu, ts); } return event; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8f89690230e6..3be2f788e10d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -155,13 +155,6 @@ ns2usecs(cycle_t nsec) return nsec; } -cycle_t ftrace_now(int cpu) -{ - u64 ts = ring_buffer_time_stamp(cpu); - ring_buffer_normalize_time_stamp(cpu, &ts); - return ts; -} - /* * The global_trace is the descriptor that holds the tracing * buffers for the live tracing. For each CPU, it contains @@ -178,6 +171,20 @@ static struct trace_array global_trace; static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); +cycle_t ftrace_now(int cpu) +{ + u64 ts; + + /* Early boot up does not have a buffer yet */ + if (!global_trace.buffer) + return trace_clock_local(); + + ts = ring_buffer_time_stamp(global_trace.buffer, cpu); + ring_buffer_normalize_time_stamp(global_trace.buffer, cpu, &ts); + + return ts; +} + /* * The max_tr is used to snapshot the global_trace when a maximum * latency is reached. Some tracers will use this to store a maximum -- cgit v1.2.3 From 97e7e4f391cac2b00417b581b432533d245d4fd0 Mon Sep 17 00:00:00 2001 From: Witold Baryluk Date: Tue, 17 Mar 2009 21:15:44 +0100 Subject: tracing: optimization of branch tracer Impact: better performance for if branch tracer Use an array to count the hit and misses of a conditional instead of using another conditional. This cuts down on saturation of branch predictions and increases performance of modern pipelined architectures. Signed-off-by: Witold Baryluk Signed-off-by: Steven Rostedt --- include/linux/compiler.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d95da1020f1c..6faa7e549de4 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -68,6 +68,7 @@ struct ftrace_branch_data { unsigned long miss; unsigned long hit; }; + unsigned long miss_hit[2]; }; }; @@ -125,10 +126,7 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); .line = __LINE__, \ }; \ ______r = !!(cond); \ - if (______r) \ - ______f.hit++; \ - else \ - ______f.miss++; \ + ______f.miss_hit[______r]++; \ ______r; \ })) #endif /* CONFIG_PROFILE_ALL_BRANCHES */ -- cgit v1.2.3 From 2d622719f1572ef31e0616444a515eba3094d050 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 22 Mar 2009 03:30:49 -0500 Subject: tracing: add ring_buffer_event_discard() to ring buffer This patch overloads RINGBUF_TYPE_PADDING to provide a way to discard events from the ring buffer, for the event-filtering mechanism introduced in a subsequent patch. I did the initial version but thanks to Steven Rostedt for adding the parts that actually made it work. ;-) Signed-off-by: Tom Zanussi Acked-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- include/linux/ring_buffer.h | 11 +++-- kernel/trace/ring_buffer.c | 117 ++++++++++++++++++++++++++++++++++++-------- 2 files changed, 105 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 9e6052bd1a1c..e1b7b2173885 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -18,10 +18,13 @@ struct ring_buffer_event { /** * enum ring_buffer_type - internal ring buffer types * - * @RINGBUF_TYPE_PADDING: Left over page padding - * array is ignored - * size is variable depending on how much + * @RINGBUF_TYPE_PADDING: Left over page padding or discarded event + * If time_delta is 0: + * array is ignored + * size is variable depending on how much * padding is needed + * If time_delta is non zero: + * everything else same as RINGBUF_TYPE_DATA * * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta * array[0] = time delta (28 .. 59) @@ -65,6 +68,8 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event) return event->time_delta; } +void ring_buffer_event_discard(struct ring_buffer_event *event); + /* * size is in bytes for each per CPU buffer. */ diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 384ca5d9d729..a09027ec1714 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -189,16 +189,65 @@ enum { RB_LEN_TIME_STAMP = 16, }; -/* inline for ring buffer fast paths */ +static inline int rb_null_event(struct ring_buffer_event *event) +{ + return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0; +} + +static inline int rb_discarded_event(struct ring_buffer_event *event) +{ + return event->type == RINGBUF_TYPE_PADDING && event->time_delta; +} + +static void rb_event_set_padding(struct ring_buffer_event *event) +{ + event->type = RINGBUF_TYPE_PADDING; + event->time_delta = 0; +} + +/** + * ring_buffer_event_discard - discard an event in the ring buffer + * @buffer: the ring buffer + * @event: the event to discard + * + * Sometimes a event that is in the ring buffer needs to be ignored. + * This function lets the user discard an event in the ring buffer + * and then that event will not be read later. + * + * Note, it is up to the user to be careful with this, and protect + * against races. If the user discards an event that has been consumed + * it is possible that it could corrupt the ring buffer. + */ +void ring_buffer_event_discard(struct ring_buffer_event *event) +{ + event->type = RINGBUF_TYPE_PADDING; + /* time delta must be non zero */ + if (!event->time_delta) + event->time_delta = 1; +} + static unsigned -rb_event_length(struct ring_buffer_event *event) +rb_event_data_length(struct ring_buffer_event *event) { unsigned length; + if (event->len) + length = event->len * RB_ALIGNMENT; + else + length = event->array[0]; + return length + RB_EVNT_HDR_SIZE; +} + +/* inline for ring buffer fast paths */ +static unsigned +rb_event_length(struct ring_buffer_event *event) +{ switch (event->type) { case RINGBUF_TYPE_PADDING: - /* undefined */ - return -1; + if (rb_null_event(event)) + /* undefined */ + return -1; + return rb_event_data_length(event); case RINGBUF_TYPE_TIME_EXTEND: return RB_LEN_TIME_EXTEND; @@ -207,11 +256,7 @@ rb_event_length(struct ring_buffer_event *event) return RB_LEN_TIME_STAMP; case RINGBUF_TYPE_DATA: - if (event->len) - length = event->len * RB_ALIGNMENT; - else - length = event->array[0]; - return length + RB_EVNT_HDR_SIZE; + return rb_event_data_length(event); default: BUG(); } @@ -845,11 +890,6 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) } EXPORT_SYMBOL_GPL(ring_buffer_resize); -static inline int rb_null_event(struct ring_buffer_event *event) -{ - return event->type == RINGBUF_TYPE_PADDING; -} - static inline void * __rb_data_page_index(struct buffer_data_page *bpage, unsigned index) { @@ -1219,7 +1259,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, if (tail < BUF_PAGE_SIZE) { /* Mark the rest of the page with padding */ event = __rb_page_index(tail_page, tail); - event->type = RINGBUF_TYPE_PADDING; + rb_event_set_padding(event); } if (tail <= BUF_PAGE_SIZE) @@ -1969,7 +2009,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) event = rb_reader_event(cpu_buffer); - if (event->type == RINGBUF_TYPE_DATA) + if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event)) cpu_buffer->entries--; rb_update_read_stamp(cpu_buffer, event); @@ -2052,9 +2092,18 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) switch (event->type) { case RINGBUF_TYPE_PADDING: - RB_WARN_ON(cpu_buffer, 1); + if (rb_null_event(event)) + RB_WARN_ON(cpu_buffer, 1); + /* + * Because the writer could be discarding every + * event it creates (which would probably be bad) + * if we were to go back to "again" then we may never + * catch up, and will trigger the warn on, or lock + * the box. Return the padding, and we will release + * the current locks, and try again. + */ rb_advance_reader(cpu_buffer); - return NULL; + return event; case RINGBUF_TYPE_TIME_EXTEND: /* Internal data, OK to advance */ @@ -2115,8 +2164,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) switch (event->type) { case RINGBUF_TYPE_PADDING: - rb_inc_iter(iter); - goto again; + if (rb_null_event(event)) { + rb_inc_iter(iter); + goto again; + } + rb_advance_iter(iter); + return event; case RINGBUF_TYPE_TIME_EXTEND: /* Internal data, OK to advance */ @@ -2163,10 +2216,16 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) if (!cpumask_test_cpu(cpu, buffer->cpumask)) return NULL; + again: spin_lock_irqsave(&cpu_buffer->reader_lock, flags); event = rb_buffer_peek(buffer, cpu, ts); spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + if (event && event->type == RINGBUF_TYPE_PADDING) { + cpu_relax(); + goto again; + } + return event; } @@ -2185,10 +2244,16 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) struct ring_buffer_event *event; unsigned long flags; + again: spin_lock_irqsave(&cpu_buffer->reader_lock, flags); event = rb_iter_peek(iter, ts); spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + if (event && event->type == RINGBUF_TYPE_PADDING) { + cpu_relax(); + goto again; + } + return event; } @@ -2207,6 +2272,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) struct ring_buffer_event *event = NULL; unsigned long flags; + again: /* might be called in atomic */ preempt_disable(); @@ -2228,6 +2294,11 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) out: preempt_enable(); + if (event && event->type == RINGBUF_TYPE_PADDING) { + cpu_relax(); + goto again; + } + return event; } EXPORT_SYMBOL_GPL(ring_buffer_consume); @@ -2306,6 +2377,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; unsigned long flags; + again: spin_lock_irqsave(&cpu_buffer->reader_lock, flags); event = rb_iter_peek(iter, ts); if (!event) @@ -2315,6 +2387,11 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) out: spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + if (event && event->type == RINGBUF_TYPE_PADDING) { + cpu_relax(); + goto again; + } + return event; } EXPORT_SYMBOL_GPL(ring_buffer_read); -- cgit v1.2.3 From c0f92ba99bdeaf35f9c580291b4e1a657c67fbd4 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 22 Mar 2009 23:10:44 +0100 Subject: debugfs: function to know if debugfs is initialized Impact: add new debugfs API With ftrace, some tracers are registered in early initcalls and attempt to create files on the debugfs filesystem. Depending on when they are activated, they can try to create their file at any time. Some checks can be done on the tracing area but providing a helper to know if debugfs is registered make it really more easy. Signed-off-by: Frederic Weisbecker Acked-by: Greg Kroah-Hartman Cc: Steven Rostedt LKML-Reference: <1237759847-21025-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- fs/debugfs/inode.c | 16 ++++++++++++++++ include/linux/debugfs.h | 8 ++++++++ 2 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 81ae9ea3c6e1..0662ba6de85a 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -30,6 +30,7 @@ static struct vfsmount *debugfs_mount; static int debugfs_mount_count; +static bool debugfs_registered; static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t dev) { @@ -496,6 +497,16 @@ exit: } EXPORT_SYMBOL_GPL(debugfs_rename); +/** + * debugfs_initialized - Tells whether debugfs has been registered + */ +bool debugfs_initialized(void) +{ + return debugfs_registered; +} +EXPORT_SYMBOL_GPL(debugfs_initialized); + + static struct kobject *debug_kobj; static int __init debugfs_init(void) @@ -509,11 +520,16 @@ static int __init debugfs_init(void) retval = register_filesystem(&debug_fs_type); if (retval) kobject_put(debug_kobj); + else + debugfs_registered = true; + return retval; } static void __exit debugfs_exit(void) { + debugfs_registered = false; + simple_release_fs(&debugfs_mount, &debugfs_mount_count); unregister_filesystem(&debug_fs_type); kobject_put(debug_kobj); diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index af0e01d4c663..eb5c2ba2f81a 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -71,6 +71,9 @@ struct dentry *debugfs_create_bool(const char *name, mode_t mode, struct dentry *debugfs_create_blob(const char *name, mode_t mode, struct dentry *parent, struct debugfs_blob_wrapper *blob); + +bool debugfs_initialized(void); + #else #include @@ -183,6 +186,11 @@ static inline struct dentry *debugfs_create_blob(const char *name, mode_t mode, return ERR_PTR(-ENODEV); } +static inline bool debugfs_initialized(void) +{ + return false; +} + #endif #endif -- cgit v1.2.3 From 5d1a03dc541dc6672e60e57249ed22f40654ca47 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 23 Mar 2009 23:38:49 -0400 Subject: function-graph: moved the timestamp from arch to generic code This patch move the timestamp from happening in the arch specific code into the general code. This allows for better control by the tracer to time manipulation. Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 6 +----- include/linux/ftrace.h | 3 +-- kernel/trace/trace_functions_graph.c | 8 +++++--- 3 files changed, 7 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 57b33edb7ce3..61df77532120 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -410,7 +410,6 @@ int ftrace_disable_ftrace_graph_caller(void) void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) { unsigned long old; - unsigned long long calltime; int faulted; struct ftrace_graph_ent trace; unsigned long return_hooker = (unsigned long) @@ -453,10 +452,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) return; } - calltime = trace_clock_local(); - - if (ftrace_push_return_trace(old, calltime, - self_addr, &trace.depth) == -EBUSY) { + if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) { *parent = old; return; } diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index db3fed630db3..1141248c84ee 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -369,8 +369,7 @@ struct ftrace_ret_stack { extern void return_to_handler(void); extern int -ftrace_push_return_trace(unsigned long ret, unsigned long long time, - unsigned long func, int *depth); +ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth); extern void ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index e876816fa8e7..d28687e7b3a7 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -57,9 +57,9 @@ static struct tracer_flags tracer_flags = { /* Add a function return address to the trace stack on thread info.*/ int -ftrace_push_return_trace(unsigned long ret, unsigned long long time, - unsigned long func, int *depth) +ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) { + unsigned long long calltime; int index; if (!current->ret_stack) @@ -71,11 +71,13 @@ ftrace_push_return_trace(unsigned long ret, unsigned long long time, return -EBUSY; } + calltime = trace_clock_local(); + index = ++current->curr_ret_stack; barrier(); current->ret_stack[index].ret = ret; current->ret_stack[index].func = func; - current->ret_stack[index].calltime = time; + current->ret_stack[index].calltime = calltime; *depth = index; return 0; -- cgit v1.2.3 From 8aef2d2856158a36c295a8d1288281e4839bff13 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 24 Mar 2009 01:10:15 -0400 Subject: function-graph: ignore times across schedule Impact: more accurate timings The current method of function graph tracing does not take into account the time spent when a task is not running. This shows functions that call schedule have increased costs: 3) + 18.664 us | } ------------------------------------------ 3) -0 => kblockd-123 ------------------------------------------ 3) | finish_task_switch() { 3) 1.441 us | _spin_unlock_irq(); 3) 3.966 us | } 3) ! 2959.433 us | } 3) ! 2961.465 us | } This patch uses the tracepoint in the scheduling context switch to account for time that has elapsed while a task is scheduled out. Now we see: ------------------------------------------ 3) -0 => edac-po-1067 ------------------------------------------ 3) | finish_task_switch() { 3) 0.685 us | _spin_unlock_irq(); 3) 2.331 us | } 3) + 41.439 us | } 3) + 42.663 us | } Signed-off-by: Steven Rostedt --- include/linux/sched.h | 2 ++ kernel/trace/ftrace.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 89cd308cc7a5..471e36d30123 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1409,6 +1409,8 @@ struct task_struct { int curr_ret_stack; /* Stack of return addresses for return function tracing */ struct ftrace_ret_stack *ret_stack; + /* time stamp for last schedule */ + unsigned long long ftrace_timestamp; /* * Number of functions that haven't been traced * because of depth overrun. diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index c81a759fbf76..0b90364d1a2c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -29,6 +29,8 @@ #include #include +#include + #include #include "trace.h" @@ -2590,6 +2592,31 @@ free: return ret; } +static void +ftrace_graph_probe_sched_switch(struct rq *__rq, struct task_struct *prev, + struct task_struct *next) +{ + unsigned long long timestamp; + int index; + + timestamp = trace_clock_local(); + + prev->ftrace_timestamp = timestamp; + + /* only process tasks that we timestamped */ + if (!next->ftrace_timestamp) + return; + + /* + * Update all the counters in next to make up for the + * time next was sleeping. + */ + timestamp -= next->ftrace_timestamp; + + for (index = next->curr_ret_stack; index >= 0; index--) + next->ret_stack[index].calltime += timestamp; +} + /* Allocate a return stack for each task */ static int start_graph_tracing(void) { @@ -2611,6 +2638,13 @@ static int start_graph_tracing(void) ret = alloc_retstack_tasklist(ret_stack_list); } while (ret == -EAGAIN); + if (!ret) { + ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch); + if (ret) + pr_info("ftrace_graph: Couldn't activate tracepoint" + " probe to kernel_sched_switch\n"); + } + kfree(ret_stack_list); return ret; } @@ -2674,6 +2708,7 @@ void unregister_ftrace_graph(void) mutex_lock(&ftrace_lock); atomic_dec(&ftrace_graph_active); + unregister_trace_sched_switch(ftrace_graph_probe_sched_switch); ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; ftrace_graph_entry = ftrace_graph_entry_stub; ftrace_shutdown(FTRACE_STOP_FUNC_RET); @@ -2694,6 +2729,7 @@ void ftrace_graph_init_task(struct task_struct *t) t->curr_ret_stack = -1; atomic_set(&t->tracing_graph_pause, 0); atomic_set(&t->trace_overrun, 0); + t->ftrace_timestamp = 0; } else t->ret_stack = NULL; } -- cgit v1.2.3 From ee000b7f9fe429d2470c674ccec8d344f6789e0d Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 24 Mar 2009 13:38:06 +0800 Subject: tracing: use union for multi-usages field Impact: cleanup struct dyn_ftrace::ip has different usages in his lifecycle, we use union for it. And also for struct dyn_ftrace::flags. Signed-off-by: Lai Jiangshan Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <49C871BE.3080405@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 12 +++++++++--- kernel/trace/ftrace.c | 8 ++++---- 2 files changed, 13 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1141248c84ee..015a3d22cf74 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -145,9 +145,15 @@ enum { }; struct dyn_ftrace { - unsigned long ip; /* address of mcount call-site */ - unsigned long flags; - struct dyn_arch_ftrace arch; + union { + unsigned long ip; /* address of mcount call-site */ + struct dyn_ftrace *freelist; + }; + union { + unsigned long flags; + struct dyn_ftrace *newlist; + }; + struct dyn_arch_ftrace arch; }; int ftrace_force_update(void); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index bb377112b1bb..7b8722baf153 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -341,7 +341,7 @@ static inline int record_frozen(struct dyn_ftrace *rec) static void ftrace_free_rec(struct dyn_ftrace *rec) { - rec->ip = (unsigned long)ftrace_free_records; + rec->freelist = ftrace_free_records; ftrace_free_records = rec; rec->flags |= FTRACE_FL_FREE; } @@ -379,7 +379,7 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) return NULL; } - ftrace_free_records = (void *)rec->ip; + ftrace_free_records = rec->freelist; memset(rec, 0, sizeof(*rec)); return rec; } @@ -411,7 +411,7 @@ ftrace_record_ip(unsigned long ip) return NULL; rec->ip = ip; - rec->flags = (unsigned long)ftrace_new_addrs; + rec->newlist = ftrace_new_addrs; ftrace_new_addrs = rec; return rec; @@ -731,7 +731,7 @@ static int ftrace_update_code(struct module *mod) return -1; p = ftrace_new_addrs; - ftrace_new_addrs = (struct dyn_ftrace *)p->flags; + ftrace_new_addrs = p->newlist; p->flags = 0L; /* convert record (i.e, patch mcount-call with NOP) */ -- cgit v1.2.3