From c574358e8b48adf646f9d5ef70dc76c5d4ad9387 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 21 Sep 2009 17:01:06 -0700 Subject: proc: document `guest' column in /proc/stat We added a new column in cpuX lines of /proc/stat, to show the amount of time spent by a cpu servicing a guest, without updating Documentation/filesystems/proc.txt Signed-off-by: Eric Dumazet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'Documentation/filesystems/proc.txt') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index ffead13f9443..1c96cb6c7972 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1032,9 +1032,9 @@ Various pieces of information about kernel activity are available in the since the system first booted. For a quick look, simply cat the file: > cat /proc/stat - cpu 2255 34 2290 22625563 6290 127 456 0 - cpu0 1132 34 1441 11311718 3675 127 438 0 - cpu1 1123 0 849 11313845 2614 0 18 0 + cpu 2255 34 2290 22625563 6290 127 456 0 0 + cpu0 1132 34 1441 11311718 3675 127 438 0 0 + cpu1 1123 0 849 11313845 2614 0 18 0 0 intr 114930548 113199788 3 0 5 263 0 4 [... lots more numbers ...] ctxt 1990473 btime 1062191376 @@ -1056,6 +1056,7 @@ second). The meanings of the columns are as follows, from left to right: - irq: servicing interrupts - softirq: servicing softirqs - steal: involuntary wait +- guest: running a guest The "intr" line gives counts of interrupts serviced since boot time, for each of the possible system interrupts. The first column is the total of all -- cgit v1.2.3 From 398499d5f3613c47f2143b8c54a04efb5d7a6da9 Mon Sep 17 00:00:00 2001 From: "Moussa A. Ba" Date: Mon, 21 Sep 2009 17:02:29 -0700 Subject: pagemap clear_refs: modify to specify anon or mapped vma clearing The patch makes the clear_refs more versatile in adding the option to select anonymous pages or file backed pages for clearing. This addition has a measurable impact on user space application performance as it decreases the number of pagewalks in scenarios where one is only interested in a specific type of page (anonymous or file mapped). The patch adds anonymous and file backed filters to the clear_refs interface. echo 1 > /proc/PID/clear_refs resets the bits on all pages echo 2 > /proc/PID/clear_refs resets the bits on anonymous pages only echo 3 > /proc/PID/clear_refs resets the bits on file backed pages only Any other value is ignored Signed-off-by: Moussa A. Ba Signed-off-by: Jared E. Hulbert Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 13 +++++++++++++ fs/proc/task_mmu.c | 28 ++++++++++++++++++++++++---- 2 files changed, 37 insertions(+), 4 deletions(-) (limited to 'Documentation/filesystems/proc.txt') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 1c96cb6c7972..ae7f8bb1b7bc 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -375,6 +375,19 @@ of memory currently marked as referenced or accessed. This file is only present if the CONFIG_MMU kernel configuration option is enabled. +The /proc/PID/clear_refs is used to reset the PG_Referenced and ACCESSED/YOUNG +bits on both physical and virtual pages associated with a process. +To clear the bits for all the pages associated with the process + > echo 1 > /proc/PID/clear_refs + +To clear the bits for the anonymous pages associated with the process + > echo 2 > /proc/PID/clear_refs + +To clear the bits for the file mapped pages associated with the process + > echo 3 > /proc/PID/clear_refs +Any other value written to /proc/PID/clear_refs will have no effect. + + 1.2 Kernel data --------------- diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 9bd8be1d235c..59e98fea34a4 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -465,6 +465,10 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, return 0; } +#define CLEAR_REFS_ALL 1 +#define CLEAR_REFS_ANON 2 +#define CLEAR_REFS_MAPPED 3 + static ssize_t clear_refs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { @@ -472,13 +476,15 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, char buffer[PROC_NUMBUF], *end; struct mm_struct *mm; struct vm_area_struct *vma; + int type; memset(buffer, 0, sizeof(buffer)); if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; - if (!simple_strtol(buffer, &end, 0)) + type = simple_strtol(buffer, &end, 0); + if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) return -EINVAL; if (*end == '\n') end++; @@ -494,9 +500,23 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, down_read(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) { clear_refs_walk.private = vma; - if (!is_vm_hugetlb_page(vma)) - walk_page_range(vma->vm_start, vma->vm_end, - &clear_refs_walk); + if (is_vm_hugetlb_page(vma)) + continue; + /* + * Writing 1 to /proc/pid/clear_refs affects all pages. + * + * Writing 2 to /proc/pid/clear_refs only affects + * Anonymous pages. + * + * Writing 3 to /proc/pid/clear_refs only affects file + * mapped pages. + */ + if (type == CLEAR_REFS_ANON && vma->vm_file) + continue; + if (type == CLEAR_REFS_MAPPED && !vma->vm_file) + continue; + walk_page_range(vma->vm_start, vma->vm_end, + &clear_refs_walk); } flush_tlb_mm(mm); up_read(&mm->mmap_sem); -- cgit v1.2.3 From 495789a51a91cb8c015d8d77fecbac1caf20b186 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 21 Sep 2009 17:03:14 -0700 Subject: oom: make oom_score to per-process value oom-killer kills a process, not task. Then oom_score should be calculated as per-process too. it makes consistency more and makes speed up select_bad_process(). Signed-off-by: KOSAKI Motohiro Cc: Paul Menage Cc: David Rientjes Cc: KAMEZAWA Hiroyuki Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 2 +- fs/proc/base.c | 2 +- mm/oom_kill.c | 35 +++++++++++++++++++++++++++++------ 3 files changed, 31 insertions(+), 8 deletions(-) (limited to 'Documentation/filesystems/proc.txt') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index ae7f8bb1b7bc..75988ba26a51 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1205,7 +1205,7 @@ The following heuristics are then applied: * if the task was reniced, its score doubles * superuser or direct hardware access tasks (CAP_SYS_ADMIN, CAP_SYS_RESOURCE or CAP_SYS_RAWIO) have their score divided by 4 - * if oom condition happened in one cpuset and checked task does not belong + * if oom condition happened in one cpuset and checked process does not belong to it, its score is divided by 8 * the resulting score is multiplied by two to the power of oom_adj, i.e. points <<= oom_adj when it is positive and diff --git a/fs/proc/base.c b/fs/proc/base.c index 81cfff82875b..71a34253dcbb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -447,7 +447,7 @@ static int proc_oom_score(struct task_struct *task, char *buffer) do_posix_clock_monotonic_gettime(&uptime); read_lock(&tasklist_lock); - points = badness(task, uptime.tv_sec); + points = badness(task->group_leader, uptime.tv_sec); read_unlock(&tasklist_lock); return sprintf(buffer, "%lu\n", points); } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 630b77fe862f..372692294844 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -34,6 +34,23 @@ int sysctl_oom_dump_tasks; static DEFINE_SPINLOCK(zone_scan_lock); /* #define DEBUG */ +/* + * Is all threads of the target process nodes overlap ours? + */ +static int has_intersects_mems_allowed(struct task_struct *tsk) +{ + struct task_struct *t; + + t = tsk; + do { + if (cpuset_mems_allowed_intersects(current, t)) + return 1; + t = next_thread(t); + } while (t != tsk); + + return 0; +} + /** * badness - calculate a numeric value for how bad this task has been * @p: task struct of which task we should calculate @@ -59,6 +76,9 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) struct mm_struct *mm; struct task_struct *child; int oom_adj = p->signal->oom_adj; + struct task_cputime task_time; + unsigned long utime; + unsigned long stime; if (oom_adj == OOM_DISABLE) return 0; @@ -106,8 +126,11 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) * of seconds. There is no particular reason for this other than * that it turned out to work very well in practice. */ - cpu_time = (cputime_to_jiffies(p->utime) + cputime_to_jiffies(p->stime)) - >> (SHIFT_HZ + 3); + thread_group_cputime(p, &task_time); + utime = cputime_to_jiffies(task_time.utime); + stime = cputime_to_jiffies(task_time.stime); + cpu_time = (utime + stime) >> (SHIFT_HZ + 3); + if (uptime >= p->start_time.tv_sec) run_time = (uptime - p->start_time.tv_sec) >> 10; @@ -148,7 +171,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) * because p may have allocated or otherwise mapped memory on * this node before. However it will be less likely. */ - if (!cpuset_mems_allowed_intersects(current, p)) + if (!has_intersects_mems_allowed(p)) points /= 8; /* @@ -204,13 +227,13 @@ static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist, static struct task_struct *select_bad_process(unsigned long *ppoints, struct mem_cgroup *mem) { - struct task_struct *g, *p; + struct task_struct *p; struct task_struct *chosen = NULL; struct timespec uptime; *ppoints = 0; do_posix_clock_monotonic_gettime(&uptime); - do_each_thread(g, p) { + for_each_process(p) { unsigned long points; /* @@ -263,7 +286,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints, chosen = p; *ppoints = points; } - } while_each_thread(g, p); + } return chosen; } -- cgit v1.2.3 From d899bf7b55f503ba7d3d07ed27c3a37e270fa7db Mon Sep 17 00:00:00 2001 From: Stefani Seibold Date: Tue, 22 Sep 2009 16:45:40 -0700 Subject: procfs: provide stack information for threads A patch to give a better overview of the userland application stack usage, especially for embedded linux. Currently you are only able to dump the main process/thread stack usage which is showed in /proc/pid/status by the "VmStk" Value. But you get no information about the consumed stack memory of the the threads. There is an enhancement in the /proc//{task/*,}/*maps and which marks the vm mapping where the thread stack pointer reside with "[thread stack xxxxxxxx]". xxxxxxxx is the maximum size of stack. This is a value information, because libpthread doesn't set the start of the stack to the top of the mapped area, depending of the pthread usage. A sample output of /proc//task//maps looks like: 08048000-08049000 r-xp 00000000 03:00 8312 /opt/z 08049000-0804a000 rw-p 00001000 03:00 8312 /opt/z 0804a000-0806b000 rw-p 00000000 00:00 0 [heap] a7d12000-a7d13000 ---p 00000000 00:00 0 a7d13000-a7f13000 rw-p 00000000 00:00 0 [thread stack: 001ff4b4] a7f13000-a7f14000 ---p 00000000 00:00 0 a7f14000-a7f36000 rw-p 00000000 00:00 0 a7f36000-a8069000 r-xp 00000000 03:00 4222 /lib/libc.so.6 a8069000-a806b000 r--p 00133000 03:00 4222 /lib/libc.so.6 a806b000-a806c000 rw-p 00135000 03:00 4222 /lib/libc.so.6 a806c000-a806f000 rw-p 00000000 00:00 0 a806f000-a8083000 r-xp 00000000 03:00 14462 /lib/libpthread.so.0 a8083000-a8084000 r--p 00013000 03:00 14462 /lib/libpthread.so.0 a8084000-a8085000 rw-p 00014000 03:00 14462 /lib/libpthread.so.0 a8085000-a8088000 rw-p 00000000 00:00 0 a8088000-a80a4000 r-xp 00000000 03:00 8317 /lib/ld-linux.so.2 a80a4000-a80a5000 r--p 0001b000 03:00 8317 /lib/ld-linux.so.2 a80a5000-a80a6000 rw-p 0001c000 03:00 8317 /lib/ld-linux.so.2 afaf5000-afb0a000 rw-p 00000000 00:00 0 [stack] ffffe000-fffff000 r-xp 00000000 00:00 0 [vdso] Also there is a new entry "stack usage" in /proc//{task/*,}/status which will you give the current stack usage in kb. A sample output of /proc/self/status looks like: Name: cat State: R (running) Tgid: 507 Pid: 507 . . . CapBnd: fffffffffffffeff voluntary_ctxt_switches: 0 nonvoluntary_ctxt_switches: 0 Stack usage: 12 kB I also fixed stack base address in /proc//{task/*,}/stat to the base address of the associated thread stack and not the one of the main process. This makes more sense. [akpm@linux-foundation.org: fs/proc/array.c now needs walk_page_range()] Signed-off-by: Stefani Seibold Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Alexey Dobriyan Cc: "Eric W. Biederman" Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 5 ++- fs/exec.c | 2 + fs/proc/array.c | 85 +++++++++++++++++++++++++++++++++++++- fs/proc/task_mmu.c | 19 +++++++++ include/linux/sched.h | 1 + kernel/fork.c | 2 + mm/Makefile | 4 +- 7 files changed, 114 insertions(+), 4 deletions(-) (limited to 'Documentation/filesystems/proc.txt') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 75988ba26a51..b5aee7838a00 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -176,6 +176,7 @@ read the file /proc/PID/status: CapBnd: ffffffffffffffff voluntary_ctxt_switches: 0 nonvoluntary_ctxt_switches: 1 + Stack usage: 12 kB This shows you nearly the same information you would get if you viewed it with the ps command. In fact, ps uses the proc file system to obtain its @@ -229,6 +230,7 @@ Table 1-2: Contents of the statm files (as of 2.6.30-rc7) Mems_allowed_list Same as previous, but in "list format" voluntary_ctxt_switches number of voluntary context switches nonvoluntary_ctxt_switches number of non voluntary context switches + Stack usage: stack usage high water mark (round up to page size) .............................................................................. Table 1-3: Contents of the statm files (as of 2.6.8-rc3) @@ -307,7 +309,7 @@ address perms offset dev inode pathname 08049000-0804a000 rw-p 00001000 03:00 8312 /opt/test 0804a000-0806b000 rw-p 00000000 00:00 0 [heap] a7cb1000-a7cb2000 ---p 00000000 00:00 0 -a7cb2000-a7eb2000 rw-p 00000000 00:00 0 +a7cb2000-a7eb2000 rw-p 00000000 00:00 0 [threadstack:001ff4b4] a7eb2000-a7eb3000 ---p 00000000 00:00 0 a7eb3000-a7ed5000 rw-p 00000000 00:00 0 a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6 @@ -343,6 +345,7 @@ is not associated with a file: [stack] = the stack of the main process [vdso] = the "virtual dynamic shared object", the kernel system call handler + [threadstack:xxxxxxxx] = the stack of the thread, xxxxxxxx is the stack size or if empty, the mapping is anonymous. diff --git a/fs/exec.c b/fs/exec.c index 69bb9d899791..5c833c18d0d4 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1357,6 +1357,8 @@ int do_execve(char * filename, if (retval < 0) goto out; + current->stack_start = current->mm->start_stack; + /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; diff --git a/fs/proc/array.c b/fs/proc/array.c index 725a650bbbb8..0c6bc602e6c4 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -82,6 +82,7 @@ #include #include #include +#include #include #include @@ -321,6 +322,87 @@ static inline void task_context_switch_counts(struct seq_file *m, p->nivcsw); } +struct stack_stats { + struct vm_area_struct *vma; + unsigned long startpage; + unsigned long usage; +}; + +static int stack_usage_pte_range(pmd_t *pmd, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + struct stack_stats *ss = walk->private; + struct vm_area_struct *vma = ss->vma; + pte_t *pte, ptent; + spinlock_t *ptl; + int ret = 0; + + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + for (; addr != end; pte++, addr += PAGE_SIZE) { + ptent = *pte; + +#ifdef CONFIG_STACK_GROWSUP + if (pte_present(ptent) || is_swap_pte(ptent)) + ss->usage = addr - ss->startpage + PAGE_SIZE; +#else + if (pte_present(ptent) || is_swap_pte(ptent)) { + ss->usage = ss->startpage - addr + PAGE_SIZE; + pte++; + ret = 1; + break; + } +#endif + } + pte_unmap_unlock(pte - 1, ptl); + cond_resched(); + return ret; +} + +static inline unsigned long get_stack_usage_in_bytes(struct vm_area_struct *vma, + struct task_struct *task) +{ + struct stack_stats ss; + struct mm_walk stack_walk = { + .pmd_entry = stack_usage_pte_range, + .mm = vma->vm_mm, + .private = &ss, + }; + + if (!vma->vm_mm || is_vm_hugetlb_page(vma)) + return 0; + + ss.vma = vma; + ss.startpage = task->stack_start & PAGE_MASK; + ss.usage = 0; + +#ifdef CONFIG_STACK_GROWSUP + walk_page_range(KSTK_ESP(task) & PAGE_MASK, vma->vm_end, + &stack_walk); +#else + walk_page_range(vma->vm_start, (KSTK_ESP(task) & PAGE_MASK) + PAGE_SIZE, + &stack_walk); +#endif + return ss.usage; +} + +static inline void task_show_stack_usage(struct seq_file *m, + struct task_struct *task) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = get_task_mm(task); + + if (mm) { + down_read(&mm->mmap_sem); + vma = find_vma(mm, task->stack_start); + if (vma) + seq_printf(m, "Stack usage:\t%lu kB\n", + get_stack_usage_in_bytes(vma, task) >> 10); + + up_read(&mm->mmap_sem); + mmput(mm); + } +} + int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -340,6 +422,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, task_show_regs(m, task); #endif task_context_switch_counts(m, task); + task_show_stack_usage(m, task); return 0; } @@ -481,7 +564,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, rsslim, mm ? mm->start_code : 0, mm ? mm->end_code : 0, - (permitted && mm) ? mm->start_stack : 0, + (permitted) ? task->stack_start : 0, esp, eip, /* The signal information here is obsolete. diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 366b1017a4f1..2a1bef9203c6 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -243,6 +243,25 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) } else if (vma->vm_start <= mm->start_stack && vma->vm_end >= mm->start_stack) { name = "[stack]"; + } else { + unsigned long stack_start; + struct proc_maps_private *pmp; + + pmp = m->private; + stack_start = pmp->task->stack_start; + + if (vma->vm_start <= stack_start && + vma->vm_end >= stack_start) { + pad_len_spaces(m, len); + seq_printf(m, + "[threadstack:%08lx]", +#ifdef CONFIG_STACK_GROWSUP + vma->vm_end - stack_start +#else + stack_start - vma->vm_start +#endif + ); + } } } else { name = "[vdso]"; diff --git a/include/linux/sched.h b/include/linux/sched.h index 6448bbc6406b..3cbc6c0be666 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1529,6 +1529,7 @@ struct task_struct { /* bitmask of trace recursion */ unsigned long trace_recursion; #endif /* CONFIG_TRACING */ + unsigned long stack_start; }; /* Future-safe accessor for struct task_struct's cpus_allowed. */ diff --git a/kernel/fork.c b/kernel/fork.c index 7cf45812ce84..8f45b0ebdda7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1095,6 +1095,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->bts = NULL; + p->stack_start = stack_start; + /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); diff --git a/mm/Makefile b/mm/Makefile index 728a9fde49d1..88193d73cd1a 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -11,10 +11,10 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ maccess.o page_alloc.o page-writeback.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ - page_isolation.o mm_init.o mmu_context.o $(mmu-y) + page_isolation.o mm_init.o mmu_context.o \ + pagewalk.o $(mmu-y) obj-y += init-mm.o -obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o obj-$(CONFIG_BOUNCE) += bounce.o obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o obj-$(CONFIG_HAS_DMA) += dmapool.o -- cgit v1.2.3 From 296c355cd6443d89fa251885a8d78778fe111dc4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 30 Sep 2009 00:32:42 -0400 Subject: ext4: Use tracepoints for mb_history trace file The /proc/fs/ext4//mb_history was maintained manually, and had a number of problems: it required a largish amount of memory to be allocated for each ext4 filesystem, and the s_mb_history_lock introduced a CPU contention problem. By ripping out the mb_history code and replacing it with ftrace tracepoints, and we get more functionality: timestamps, event filtering, the ability to correlate mballoc history with other ext4 tracepoints, etc. Signed-off-by: "Theodore Ts'o" --- Documentation/filesystems/proc.txt | 1 - fs/ext4/ext4.h | 14 +- fs/ext4/mballoc.c | 301 ++----------------------------------- fs/ext4/mballoc.h | 33 ---- fs/ext4/super.c | 18 +-- include/trace/events/ext4.h | 163 ++++++++++++++++++++ 6 files changed, 182 insertions(+), 348 deletions(-) (limited to 'Documentation/filesystems/proc.txt') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index b5aee7838a00..2c48f945546b 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1113,7 +1113,6 @@ Table 1-12: Files in /proc/fs/ext4/ .............................................................................. File Content mb_groups details of multiblock allocator buddy cache of free blocks - mb_history multiblock allocation history .............................................................................. diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b491576e11c3..c508cf7be75c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -65,6 +65,12 @@ typedef __u32 ext4_lblk_t; /* data type for block group number */ typedef unsigned int ext4_group_t; +/* + * Flags used in mballoc's allocation_context flags field. + * + * Also used to show what's going on for debugging purposes when the + * flag field is exported via the traceport interface + */ /* prefer goal again. length */ #define EXT4_MB_HINT_MERGE 0x0001 @@ -971,14 +977,6 @@ struct ext4_sb_info { unsigned long s_mb_last_group; unsigned long s_mb_last_start; - /* history to debug policy */ - struct ext4_mb_history *s_mb_history; - int s_mb_history_cur; - int s_mb_history_max; - int s_mb_history_num; - spinlock_t s_mb_history_lock; - int s_mb_history_filter; - /* stats for buddy allocator */ spinlock_t s_mb_pa_lock; atomic_t s_bal_reqs; /* number of reqs with len > 1 */ diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 3e2320e66721..bba12824defa 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2096,207 +2096,6 @@ out: return err; } -#ifdef EXT4_MB_HISTORY -struct ext4_mb_proc_session { - struct ext4_mb_history *history; - struct super_block *sb; - int start; - int max; -}; - -static void *ext4_mb_history_skip_empty(struct ext4_mb_proc_session *s, - struct ext4_mb_history *hs, - int first) -{ - if (hs == s->history + s->max) - hs = s->history; - if (!first && hs == s->history + s->start) - return NULL; - while (hs->orig.fe_len == 0) { - hs++; - if (hs == s->history + s->max) - hs = s->history; - if (hs == s->history + s->start) - return NULL; - } - return hs; -} - -static void *ext4_mb_seq_history_start(struct seq_file *seq, loff_t *pos) -{ - struct ext4_mb_proc_session *s = seq->private; - struct ext4_mb_history *hs; - int l = *pos; - - if (l == 0) - return SEQ_START_TOKEN; - hs = ext4_mb_history_skip_empty(s, s->history + s->start, 1); - if (!hs) - return NULL; - while (--l && (hs = ext4_mb_history_skip_empty(s, ++hs, 0)) != NULL); - return hs; -} - -static void *ext4_mb_seq_history_next(struct seq_file *seq, void *v, - loff_t *pos) -{ - struct ext4_mb_proc_session *s = seq->private; - struct ext4_mb_history *hs = v; - - ++*pos; - if (v == SEQ_START_TOKEN) - return ext4_mb_history_skip_empty(s, s->history + s->start, 1); - else - return ext4_mb_history_skip_empty(s, ++hs, 0); -} - -static int ext4_mb_seq_history_show(struct seq_file *seq, void *v) -{ - char buf[25], buf2[25], buf3[25], *fmt; - struct ext4_mb_history *hs = v; - - if (v == SEQ_START_TOKEN) { - seq_printf(seq, "%-5s %-8s %-23s %-23s %-23s %-5s " - "%-5s %-2s %-6s %-5s %-5s %-6s\n", - "pid", "inode", "original", "goal", "result", "found", - "grps", "cr", "flags", "merge", "tail", "broken"); - return 0; - } - - if (hs->op == EXT4_MB_HISTORY_ALLOC) { - fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u " - "0x%04x %-5s %-5u %-6u\n"; - sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group, - hs->result.fe_start, hs->result.fe_len, - hs->result.fe_logical); - sprintf(buf, "%u/%d/%u@%u", hs->orig.fe_group, - hs->orig.fe_start, hs->orig.fe_len, - hs->orig.fe_logical); - sprintf(buf3, "%u/%d/%u@%u", hs->goal.fe_group, - hs->goal.fe_start, hs->goal.fe_len, - hs->goal.fe_logical); - seq_printf(seq, fmt, hs->pid, hs->ino, buf, buf3, buf2, - hs->found, hs->groups, hs->cr, hs->flags, - hs->merged ? "M" : "", hs->tail, - hs->buddy ? 1 << hs->buddy : 0); - } else if (hs->op == EXT4_MB_HISTORY_PREALLOC) { - fmt = "%-5u %-8u %-23s %-23s %-23s\n"; - sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group, - hs->result.fe_start, hs->result.fe_len, - hs->result.fe_logical); - sprintf(buf, "%u/%d/%u@%u", hs->orig.fe_group, - hs->orig.fe_start, hs->orig.fe_len, - hs->orig.fe_logical); - seq_printf(seq, fmt, hs->pid, hs->ino, buf, "", buf2); - } else if (hs->op == EXT4_MB_HISTORY_DISCARD) { - sprintf(buf2, "%u/%d/%u", hs->result.fe_group, - hs->result.fe_start, hs->result.fe_len); - seq_printf(seq, "%-5u %-8u %-23s discard\n", - hs->pid, hs->ino, buf2); - } else if (hs->op == EXT4_MB_HISTORY_FREE) { - sprintf(buf2, "%u/%d/%u", hs->result.fe_group, - hs->result.fe_start, hs->result.fe_len); - seq_printf(seq, "%-5u %-8u %-23s free\n", - hs->pid, hs->ino, buf2); - } - return 0; -} - -static void ext4_mb_seq_history_stop(struct seq_file *seq, void *v) -{ -} - -static const struct seq_operations ext4_mb_seq_history_ops = { - .start = ext4_mb_seq_history_start, - .next = ext4_mb_seq_history_next, - .stop = ext4_mb_seq_history_stop, - .show = ext4_mb_seq_history_show, -}; - -static int ext4_mb_seq_history_open(struct inode *inode, struct file *file) -{ - struct super_block *sb = PDE(inode)->data; - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_mb_proc_session *s; - int rc; - int size; - - if (unlikely(sbi->s_mb_history == NULL)) - return -ENOMEM; - s = kmalloc(sizeof(*s), GFP_KERNEL); - if (s == NULL) - return -ENOMEM; - s->sb = sb; - size = sizeof(struct ext4_mb_history) * sbi->s_mb_history_max; - s->history = kmalloc(size, GFP_KERNEL); - if (s->history == NULL) { - kfree(s); - return -ENOMEM; - } - - spin_lock(&sbi->s_mb_history_lock); - memcpy(s->history, sbi->s_mb_history, size); - s->max = sbi->s_mb_history_max; - s->start = sbi->s_mb_history_cur % s->max; - spin_unlock(&sbi->s_mb_history_lock); - - rc = seq_open(file, &ext4_mb_seq_history_ops); - if (rc == 0) { - struct seq_file *m = (struct seq_file *)file->private_data; - m->private = s; - } else { - kfree(s->history); - kfree(s); - } - return rc; - -} - -static int ext4_mb_seq_history_release(struct inode *inode, struct file *file) -{ - struct seq_file *seq = (struct seq_file *)file->private_data; - struct ext4_mb_proc_session *s = seq->private; - kfree(s->history); - kfree(s); - return seq_release(inode, file); -} - -static ssize_t ext4_mb_seq_history_write(struct file *file, - const char __user *buffer, - size_t count, loff_t *ppos) -{ - struct seq_file *seq = (struct seq_file *)file->private_data; - struct ext4_mb_proc_session *s = seq->private; - struct super_block *sb = s->sb; - char str[32]; - int value; - - if (count >= sizeof(str)) { - printk(KERN_ERR "EXT4-fs: %s string too long, max %u bytes\n", - "mb_history", (int)sizeof(str)); - return -EOVERFLOW; - } - - if (copy_from_user(str, buffer, count)) - return -EFAULT; - - value = simple_strtol(str, NULL, 0); - if (value < 0) - return -ERANGE; - EXT4_SB(sb)->s_mb_history_filter = value; - - return count; -} - -static const struct file_operations ext4_mb_seq_history_fops = { - .owner = THIS_MODULE, - .open = ext4_mb_seq_history_open, - .read = seq_read, - .write = ext4_mb_seq_history_write, - .llseek = seq_lseek, - .release = ext4_mb_seq_history_release, -}; - static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) { struct super_block *sb = seq->private; @@ -2396,82 +2195,6 @@ static const struct file_operations ext4_mb_seq_groups_fops = { .release = seq_release, }; -static void ext4_mb_history_release(struct super_block *sb) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - - if (sbi->s_proc != NULL) { - remove_proc_entry("mb_groups", sbi->s_proc); - if (sbi->s_mb_history_max) - remove_proc_entry("mb_history", sbi->s_proc); - } - kfree(sbi->s_mb_history); -} - -static void ext4_mb_history_init(struct super_block *sb) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - int i; - - if (sbi->s_proc != NULL) { - if (sbi->s_mb_history_max) - proc_create_data("mb_history", S_IRUGO, sbi->s_proc, - &ext4_mb_seq_history_fops, sb); - proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, - &ext4_mb_seq_groups_fops, sb); - } - - sbi->s_mb_history_cur = 0; - spin_lock_init(&sbi->s_mb_history_lock); - i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); - sbi->s_mb_history = i ? kzalloc(i, GFP_KERNEL) : NULL; - /* if we can't allocate history, then we simple won't use it */ -} - -static noinline_for_stack void -ext4_mb_store_history(struct ext4_allocation_context *ac) -{ - struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); - struct ext4_mb_history h; - - if (sbi->s_mb_history == NULL) - return; - - if (!(ac->ac_op & sbi->s_mb_history_filter)) - return; - - h.op = ac->ac_op; - h.pid = current->pid; - h.ino = ac->ac_inode ? ac->ac_inode->i_ino : 0; - h.orig = ac->ac_o_ex; - h.result = ac->ac_b_ex; - h.flags = ac->ac_flags; - h.found = ac->ac_found; - h.groups = ac->ac_groups_scanned; - h.cr = ac->ac_criteria; - h.tail = ac->ac_tail; - h.buddy = ac->ac_buddy; - h.merged = 0; - if (ac->ac_op == EXT4_MB_HISTORY_ALLOC) { - if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && - ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group) - h.merged = 1; - h.goal = ac->ac_g_ex; - h.result = ac->ac_f_ex; - } - - spin_lock(&sbi->s_mb_history_lock); - memcpy(sbi->s_mb_history + sbi->s_mb_history_cur, &h, sizeof(h)); - if (++sbi->s_mb_history_cur >= sbi->s_mb_history_max) - sbi->s_mb_history_cur = 0; - spin_unlock(&sbi->s_mb_history_lock); -} - -#else -#define ext4_mb_history_release(sb) -#define ext4_mb_history_init(sb) -#endif - /* Create and initialize ext4_group_info data for the given group. */ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, @@ -2690,7 +2413,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) sbi->s_mb_stats = MB_DEFAULT_STATS; sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; - sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); @@ -2708,7 +2430,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) spin_lock_init(&lg->lg_prealloc_lock); } - ext4_mb_history_init(sb); + if (sbi->s_proc) + proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, + &ext4_mb_seq_groups_fops, sb); if (sbi->s_journal) sbi->s_journal->j_commit_callback = release_blocks_on_commit; @@ -2788,7 +2512,8 @@ int ext4_mb_release(struct super_block *sb) } free_percpu(sbi->s_locality_groups); - ext4_mb_history_release(sb); + if (sbi->s_proc) + remove_proc_entry("mb_groups", sbi->s_proc); return 0; } @@ -3274,7 +2999,10 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac) atomic_inc(&sbi->s_bal_breaks); } - ext4_mb_store_history(ac); + if (ac->ac_op == EXT4_MB_HISTORY_ALLOC) + trace_ext4_mballoc_alloc(ac); + else + trace_ext4_mballoc_prealloc(ac); } /* @@ -3774,7 +3502,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, if (ac) { ac->ac_sb = sb; ac->ac_inode = pa->pa_inode; - ac->ac_op = EXT4_MB_HISTORY_DISCARD; } while (bit < end) { @@ -3794,7 +3521,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, ac->ac_b_ex.fe_start = bit; ac->ac_b_ex.fe_len = next - bit; ac->ac_b_ex.fe_logical = 0; - ext4_mb_store_history(ac); + trace_ext4_mballoc_discard(ac); } trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit, @@ -3829,9 +3556,6 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, ext4_group_t group; ext4_grpblk_t bit; - if (ac) - ac->ac_op = EXT4_MB_HISTORY_DISCARD; - trace_ext4_mb_release_group_pa(ac, pa); BUG_ON(pa->pa_deleted == 0); ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); @@ -3846,7 +3570,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, ac->ac_b_ex.fe_start = bit; ac->ac_b_ex.fe_len = pa->pa_len; ac->ac_b_ex.fe_logical = 0; - ext4_mb_store_history(ac); + trace_ext4_mballoc_discard(ac); } return 0; @@ -4737,7 +4461,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); if (ac) { - ac->ac_op = EXT4_MB_HISTORY_FREE; ac->ac_inode = inode; ac->ac_sb = sb; } @@ -4804,7 +4527,7 @@ do_more: ac->ac_b_ex.fe_group = block_group; ac->ac_b_ex.fe_start = bit; ac->ac_b_ex.fe_len = count; - ext4_mb_store_history(ac); + trace_ext4_mballoc_free(ac); } err = ext4_mb_load_buddy(sb, block_group, &e4b); diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 14f25f253112..0ca811061bc7 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -52,18 +52,8 @@ extern u8 mb_enable_debug; #define mb_debug(n, fmt, a...) #endif -/* - * with EXT4_MB_HISTORY mballoc stores last N allocations in memory - * and you can monitor it in /proc/fs/ext4//mb_history - */ -#define EXT4_MB_HISTORY #define EXT4_MB_HISTORY_ALLOC 1 /* allocation */ #define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */ -#define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */ -#define EXT4_MB_HISTORY_FREE 8 /* free */ - -#define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \ - EXT4_MB_HISTORY_PREALLOC) /* * How long mballoc can look for a best extent (in found extents) @@ -217,22 +207,6 @@ struct ext4_allocation_context { #define AC_STATUS_FOUND 2 #define AC_STATUS_BREAK 3 -struct ext4_mb_history { - struct ext4_free_extent orig; /* orig allocation */ - struct ext4_free_extent goal; /* goal allocation */ - struct ext4_free_extent result; /* result allocation */ - unsigned pid; - unsigned ino; - __u16 found; /* how many extents have been found */ - __u16 groups; /* how many groups have been scanned */ - __u16 tail; /* what tail broke some buddy */ - __u16 buddy; /* buddy the tail ^^^ broke */ - __u16 flags; - __u8 cr:3; /* which phase the result extent was found at */ - __u8 op:4; - __u8 merged:1; -}; - struct ext4_buddy { struct page *bd_buddy_page; void *bd_buddy; @@ -247,13 +221,6 @@ struct ext4_buddy { #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) -#ifndef EXT4_MB_HISTORY -static inline void ext4_mb_store_history(struct ext4_allocation_context *ac) -{ - return; -} -#endif - #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e5b206a043a5..12e726a7073f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -50,13 +50,6 @@ #define CREATE_TRACE_POINTS #include -static int default_mb_history_length = 1000; - -module_param_named(default_mb_history_length, default_mb_history_length, - int, 0644); -MODULE_PARM_DESC(default_mb_history_length, - "Default number of entries saved for mb_history"); - struct proc_dir_entry *ext4_proc_root; static struct kset *ext4_kset; @@ -1079,7 +1072,7 @@ enum { Opt_journal_update, Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, - Opt_data_err_abort, Opt_data_err_ignore, Opt_mb_history_length, + Opt_data_err_abort, Opt_data_err_ignore, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, @@ -1126,7 +1119,6 @@ static const match_table_t tokens = { {Opt_data_writeback, "data=writeback"}, {Opt_data_err_abort, "data_err=abort"}, {Opt_data_err_ignore, "data_err=ignore"}, - {Opt_mb_history_length, "mb_history_length=%u"}, {Opt_offusrjquota, "usrjquota="}, {Opt_usrjquota, "usrjquota=%s"}, {Opt_offgrpjquota, "grpjquota="}, @@ -1367,13 +1359,6 @@ static int parse_options(char *options, struct super_block *sb, case Opt_data_err_ignore: clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); break; - case Opt_mb_history_length: - if (match_int(&args[0], &option)) - return 0; - if (option < 0) - return 0; - sbi->s_mb_history_max = option; - break; #ifdef CONFIG_QUOTA case Opt_usrjquota: qtype = USRQUOTA; @@ -2435,7 +2420,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; - sbi->s_mb_history_max = default_mb_history_length; set_opt(sbi->s_mount_opt, BARRIER); diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 7c6bbb7198a3..b8320256dc5d 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -743,6 +743,169 @@ TRACE_EVENT(ext4_alloc_da_blocks, __entry->data_blocks, __entry->meta_blocks) ); +TRACE_EVENT(ext4_mballoc_alloc, + TP_PROTO(struct ext4_allocation_context *ac), + + TP_ARGS(ac), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( __u16, found ) + __field( __u16, groups ) + __field( __u16, buddy ) + __field( __u16, flags ) + __field( __u16, tail ) + __field( __u8, cr ) + __field( __u32, orig_logical ) + __field( int, orig_start ) + __field( __u32, orig_group ) + __field( int, orig_len ) + __field( __u32, goal_logical ) + __field( int, goal_start ) + __field( __u32, goal_group ) + __field( int, goal_len ) + __field( __u32, result_logical ) + __field( int, result_start ) + __field( __u32, result_group ) + __field( int, result_len ) + ), + + TP_fast_assign( + __entry->dev = ac->ac_inode->i_sb->s_dev; + __entry->ino = ac->ac_inode->i_ino; + __entry->found = ac->ac_found; + __entry->flags = ac->ac_flags; + __entry->groups = ac->ac_groups_scanned; + __entry->buddy = ac->ac_buddy; + __entry->tail = ac->ac_tail; + __entry->cr = ac->ac_criteria; + __entry->orig_logical = ac->ac_o_ex.fe_logical; + __entry->orig_start = ac->ac_o_ex.fe_start; + __entry->orig_group = ac->ac_o_ex.fe_group; + __entry->orig_len = ac->ac_o_ex.fe_len; + __entry->goal_logical = ac->ac_g_ex.fe_logical; + __entry->goal_start = ac->ac_g_ex.fe_start; + __entry->goal_group = ac->ac_g_ex.fe_group; + __entry->goal_len = ac->ac_g_ex.fe_len; + __entry->result_logical = ac->ac_f_ex.fe_logical; + __entry->result_start = ac->ac_f_ex.fe_start; + __entry->result_group = ac->ac_f_ex.fe_group; + __entry->result_len = ac->ac_f_ex.fe_len; + ), + + TP_printk("dev %s inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u " + "result %u/%d/%u@%u blks %u grps %u cr %u flags 0x%04x " + "tail %u broken %u", + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->orig_group, __entry->orig_start, + __entry->orig_len, __entry->orig_logical, + __entry->goal_group, __entry->goal_start, + __entry->goal_len, __entry->goal_logical, + __entry->result_group, __entry->result_start, + __entry->result_len, __entry->result_logical, + __entry->found, __entry->groups, __entry->cr, + __entry->flags, __entry->tail, + __entry->buddy ? 1 << __entry->buddy : 0) +); + +TRACE_EVENT(ext4_mballoc_prealloc, + TP_PROTO(struct ext4_allocation_context *ac), + + TP_ARGS(ac), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( __u32, orig_logical ) + __field( int, orig_start ) + __field( __u32, orig_group ) + __field( int, orig_len ) + __field( __u32, result_logical ) + __field( int, result_start ) + __field( __u32, result_group ) + __field( int, result_len ) + ), + + TP_fast_assign( + __entry->dev = ac->ac_inode->i_sb->s_dev; + __entry->ino = ac->ac_inode->i_ino; + __entry->orig_logical = ac->ac_o_ex.fe_logical; + __entry->orig_start = ac->ac_o_ex.fe_start; + __entry->orig_group = ac->ac_o_ex.fe_group; + __entry->orig_len = ac->ac_o_ex.fe_len; + __entry->result_logical = ac->ac_b_ex.fe_logical; + __entry->result_start = ac->ac_b_ex.fe_start; + __entry->result_group = ac->ac_b_ex.fe_group; + __entry->result_len = ac->ac_b_ex.fe_len; + ), + + TP_printk("dev %s inode %lu orig %u/%d/%u@%u result %u/%d/%u@%u", + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->orig_group, __entry->orig_start, + __entry->orig_len, __entry->orig_logical, + __entry->result_group, __entry->result_start, + __entry->result_len, __entry->result_logical) +); + +TRACE_EVENT(ext4_mballoc_discard, + TP_PROTO(struct ext4_allocation_context *ac), + + TP_ARGS(ac), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( __u32, result_logical ) + __field( int, result_start ) + __field( __u32, result_group ) + __field( int, result_len ) + ), + + TP_fast_assign( + __entry->dev = ac->ac_inode->i_sb->s_dev; + __entry->ino = ac->ac_inode->i_ino; + __entry->result_logical = ac->ac_b_ex.fe_logical; + __entry->result_start = ac->ac_b_ex.fe_start; + __entry->result_group = ac->ac_b_ex.fe_group; + __entry->result_len = ac->ac_b_ex.fe_len; + ), + + TP_printk("dev %s inode %lu extent %u/%d/%u@%u ", + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->result_group, __entry->result_start, + __entry->result_len, __entry->result_logical) +); + +TRACE_EVENT(ext4_mballoc_free, + TP_PROTO(struct ext4_allocation_context *ac), + + TP_ARGS(ac), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( __u32, result_logical ) + __field( int, result_start ) + __field( __u32, result_group ) + __field( int, result_len ) + ), + + TP_fast_assign( + __entry->dev = ac->ac_inode->i_sb->s_dev; + __entry->ino = ac->ac_inode->i_ino; + __entry->result_logical = ac->ac_b_ex.fe_logical; + __entry->result_start = ac->ac_b_ex.fe_start; + __entry->result_group = ac->ac_b_ex.fe_group; + __entry->result_len = ac->ac_b_ex.fe_len; + ), + + TP_printk("dev %s inode %lu extent %u/%d/%u@%u ", + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->result_group, __entry->result_start, + __entry->result_len, __entry->result_logical) +); + #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ -- cgit v1.2.3