diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-09 11:46:20 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-09 11:46:20 -0800 |
commit | 9f2a635235823cf016eb8af0aeb3c0b2b25cea64 (patch) | |
tree | 1d22be7599ab78365344f1352bd60723dd680250 /kernel | |
parent | fb46e22a9e3863e08aef8815df9f17d0f4b9aede (diff) | |
parent | 6dff315972640bfe542e2d044933751afd8e6c4a (diff) | |
download | lwn-9f2a635235823cf016eb8af0aeb3c0b2b25cea64.tar.gz lwn-9f2a635235823cf016eb8af0aeb3c0b2b25cea64.zip |
Merge tag 'mm-nonmm-stable-2024-01-09-10-33' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull non-MM updates from Andrew Morton:
"Quite a lot of kexec work this time around. Many singleton patches in
many places. The notable patch series are:
- nilfs2 folio conversion from Matthew Wilcox in 'nilfs2: Folio
conversions for file paths'.
- Additional nilfs2 folio conversion from Ryusuke Konishi in 'nilfs2:
Folio conversions for directory paths'.
- IA64 remnant removal in Heiko Carstens's 'Remove unused code after
IA-64 removal'.
- Arnd Bergmann has enabled the -Wmissing-prototypes warning
everywhere in 'Treewide: enable -Wmissing-prototypes'. This had
some followup fixes:
- Nathan Chancellor has cleaned up the hexagon build in the series
'hexagon: Fix up instances of -Wmissing-prototypes'.
- Nathan also addressed some s390 warnings in 's390: A couple of
fixes for -Wmissing-prototypes'.
- Arnd Bergmann addresses the same warnings for MIPS in his series
'mips: address -Wmissing-prototypes warnings'.
- Baoquan He has made kexec_file operate in a top-down-fitting manner
similar to kexec_load in the series 'kexec_file: Load kernel at top
of system RAM if required'
- Baoquan He has also added the self-explanatory 'kexec_file: print
out debugging message if required'.
- Some checkstack maintenance work from Tiezhu Yang in the series
'Modify some code about checkstack'.
- Douglas Anderson has disentangled the watchdog code's logging when
multiple reports are occurring simultaneously. The series is
'watchdog: Better handling of concurrent lockups'.
- Yuntao Wang has contributed some maintenance work on the crash code
in 'crash: Some cleanups and fixes'"
* tag 'mm-nonmm-stable-2024-01-09-10-33' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (157 commits)
crash_core: fix and simplify the logic of crash_exclude_mem_range()
x86/crash: use SZ_1M macro instead of hardcoded value
x86/crash: remove the unused image parameter from prepare_elf_headers()
kdump: remove redundant DEFAULT_CRASH_KERNEL_LOW_SIZE
scripts/decode_stacktrace.sh: strip unexpected CR from lines
watchdog: if panicking and we dumped everything, don't re-enable dumping
watchdog/hardlockup: use printk_cpu_sync_get_irqsave() to serialize reporting
watchdog/softlockup: use printk_cpu_sync_get_irqsave() to serialize reporting
watchdog/hardlockup: adopt softlockup logic avoiding double-dumps
kexec_core: fix the assignment to kimage->control_page
x86/kexec: fix incorrect end address passed to kernel_ident_mapping_init()
lib/trace_readwrite.c:: replace asm-generic/io with linux/io
nilfs2: cpfile: fix some kernel-doc warnings
stacktrace: fix kernel-doc typo
scripts/checkstack.pl: fix no space expression between sp and offset
x86/kexec: fix incorrect argument passed to kexec_dprintk()
x86/kexec: use pr_err() instead of kexec_dprintk() when an error occurs
nilfs2: add missing set_freezable() for freezable kthread
kernel: relay: remove relay_file_splice_read dead code, doesn't work
docs: submit-checklist: remove all of "make namespacecheck"
...
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/crash_core.c | 89 | ||||
-rw-r--r-- | kernel/fork.c | 28 | ||||
-rw-r--r-- | kernel/kexec_core.c | 21 | ||||
-rw-r--r-- | kernel/kexec_file.c | 20 | ||||
-rw-r--r-- | kernel/ptrace.c | 13 | ||||
-rw-r--r-- | kernel/reboot.c | 17 | ||||
-rw-r--r-- | kernel/relay.c | 162 | ||||
-rw-r--r-- | kernel/resource.c | 57 | ||||
-rw-r--r-- | kernel/sched/fair.c | 13 | ||||
-rw-r--r-- | kernel/sched/sched.h | 11 | ||||
-rw-r--r-- | kernel/signal.c | 28 | ||||
-rw-r--r-- | kernel/stacktrace.c | 2 | ||||
-rw-r--r-- | kernel/watchdog.c | 40 |
13 files changed, 187 insertions, 314 deletions
diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 56cf4ad7abbb..d48315667752 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -13,7 +13,6 @@ #include <linux/memory.h> #include <linux/cpuhotplug.h> #include <linux/memblock.h> -#include <linux/kexec.h> #include <linux/kmemleak.h> #include <asm/page.h> @@ -551,9 +550,11 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, phdr->p_filesz = phdr->p_memsz = mend - mstart + 1; phdr->p_align = 0; ehdr->e_phnum++; - pr_debug("Crash PT_LOAD ELF header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n", - phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz, - ehdr->e_phnum, phdr->p_offset); +#ifdef CONFIG_KEXEC_FILE + kexec_dprintk("Crash PT_LOAD ELF header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n", + phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz, + ehdr->e_phnum, phdr->p_offset); +#endif phdr++; } @@ -565,9 +566,8 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, int crash_exclude_mem_range(struct crash_mem *mem, unsigned long long mstart, unsigned long long mend) { - int i, j; + int i; unsigned long long start, end, p_start, p_end; - struct range temp_range = {0, 0}; for (i = 0; i < mem->nr_ranges; i++) { start = mem->ranges[i].start; @@ -575,72 +575,51 @@ int crash_exclude_mem_range(struct crash_mem *mem, p_start = mstart; p_end = mend; - if (mstart > end || mend < start) + if (p_start > end) continue; + /* + * Because the memory ranges in mem->ranges are stored in + * ascending order, when we detect `p_end < start`, we can + * immediately exit the for loop, as the subsequent memory + * ranges will definitely be outside the range we are looking + * for. + */ + if (p_end < start) + break; + /* Truncate any area outside of range */ - if (mstart < start) + if (p_start < start) p_start = start; - if (mend > end) + if (p_end > end) p_end = end; /* Found completely overlapping range */ if (p_start == start && p_end == end) { - mem->ranges[i].start = 0; - mem->ranges[i].end = 0; - if (i < mem->nr_ranges - 1) { - /* Shift rest of the ranges to left */ - for (j = i; j < mem->nr_ranges - 1; j++) { - mem->ranges[j].start = - mem->ranges[j+1].start; - mem->ranges[j].end = - mem->ranges[j+1].end; - } - - /* - * Continue to check if there are another overlapping ranges - * from the current position because of shifting the above - * mem ranges. - */ - i--; - mem->nr_ranges--; - continue; - } + memmove(&mem->ranges[i], &mem->ranges[i + 1], + (mem->nr_ranges - (i + 1)) * sizeof(mem->ranges[i])); + i--; mem->nr_ranges--; - return 0; - } - - if (p_start > start && p_end < end) { + } else if (p_start > start && p_end < end) { /* Split original range */ + if (mem->nr_ranges >= mem->max_nr_ranges) + return -ENOMEM; + + memmove(&mem->ranges[i + 2], &mem->ranges[i + 1], + (mem->nr_ranges - (i + 1)) * sizeof(mem->ranges[i])); + mem->ranges[i].end = p_start - 1; - temp_range.start = p_end + 1; - temp_range.end = end; + mem->ranges[i + 1].start = p_end + 1; + mem->ranges[i + 1].end = end; + + i++; + mem->nr_ranges++; } else if (p_start != start) mem->ranges[i].end = p_start - 1; else mem->ranges[i].start = p_end + 1; - break; - } - - /* If a split happened, add the split to array */ - if (!temp_range.end) - return 0; - - /* Split happened */ - if (i == mem->max_nr_ranges - 1) - return -ENOMEM; - - /* Location where new range should go */ - j = i + 1; - if (j < mem->nr_ranges) { - /* Move over all ranges one slot towards the end */ - for (i = mem->nr_ranges - 1; i >= j; i--) - mem->ranges[i + 1] = mem->ranges[i]; } - mem->ranges[j].start = temp_range.start; - mem->ranges[j].end = temp_range.end; - mem->nr_ranges++; return 0; } diff --git a/kernel/fork.c b/kernel/fork.c index 56cf276432c8..b32e323adbbf 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -165,7 +165,6 @@ void __weak arch_release_task_struct(struct task_struct *tsk) { } -#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR static struct kmem_cache *task_struct_cachep; static inline struct task_struct *alloc_task_struct_node(int node) @@ -177,9 +176,6 @@ static inline void free_task_struct(struct task_struct *tsk) { kmem_cache_free(task_struct_cachep, tsk); } -#endif - -#ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR /* * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a @@ -412,24 +408,6 @@ void thread_stack_cache_init(void) } # endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */ -#else /* CONFIG_ARCH_THREAD_STACK_ALLOCATOR */ - -static int alloc_thread_stack_node(struct task_struct *tsk, int node) -{ - unsigned long *stack; - - stack = arch_alloc_thread_stack_node(tsk, node); - tsk->stack = stack; - return stack ? 0 : -ENOMEM; -} - -static void free_thread_stack(struct task_struct *tsk) -{ - arch_free_thread_stack(tsk); - tsk->stack = NULL; -} - -#endif /* !CONFIG_ARCH_THREAD_STACK_ALLOCATOR */ /* SLAB cache for signal_struct structures (tsk->signal) */ static struct kmem_cache *signal_cachep; @@ -1039,7 +1017,6 @@ static void set_max_threads(unsigned int max_threads_suggested) int arch_task_struct_size __read_mostly; #endif -#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR static void task_struct_whitelist(unsigned long *offset, unsigned long *size) { /* Fetch thread_struct whitelist for the architecture. */ @@ -1054,12 +1031,10 @@ static void task_struct_whitelist(unsigned long *offset, unsigned long *size) else *offset += offsetof(struct task_struct, thread); } -#endif /* CONFIG_ARCH_TASK_STRUCT_ALLOCATOR */ void __init fork_init(void) { int i; -#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR #ifndef ARCH_MIN_TASKALIGN #define ARCH_MIN_TASKALIGN 0 #endif @@ -1072,7 +1047,6 @@ void __init fork_init(void) arch_task_struct_size, align, SLAB_PANIC|SLAB_ACCOUNT, useroffset, usersize, NULL); -#endif /* do the arch specific task caches init */ arch_task_cache_init(); @@ -1606,7 +1580,7 @@ static void complete_vfork_done(struct task_struct *tsk) static int wait_for_vfork_done(struct task_struct *child, struct completion *vfork) { - unsigned int state = TASK_UNINTERRUPTIBLE|TASK_KILLABLE|TASK_FREEZABLE; + unsigned int state = TASK_KILLABLE|TASK_FREEZABLE; int killed; cgroup_enter_frozen(); diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index be5642a4ec49..a08031b57a61 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -52,6 +52,8 @@ atomic_t __kexec_lock = ATOMIC_INIT(0); /* Flag to indicate we are going to kexec a new kernel */ bool kexec_in_progress = false; +bool kexec_file_dbg_print; + int kexec_should_crash(struct task_struct *p) { /* @@ -276,8 +278,8 @@ int kimage_is_destination_range(struct kimage *image, unsigned long mstart, mend; mstart = image->segment[i].mem; - mend = mstart + image->segment[i].memsz; - if ((end > mstart) && (start < mend)) + mend = mstart + image->segment[i].memsz - 1; + if ((end >= mstart) && (start <= mend)) return 1; } @@ -370,7 +372,7 @@ static struct page *kimage_alloc_normal_control_pages(struct kimage *image, pfn = page_to_boot_pfn(pages); epfn = pfn + count; addr = pfn << PAGE_SHIFT; - eaddr = epfn << PAGE_SHIFT; + eaddr = (epfn << PAGE_SHIFT) - 1; if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) || kimage_is_destination_range(image, addr, eaddr)) { list_add(&pages->lru, &extra_pages); @@ -430,7 +432,7 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image, pages = NULL; size = (1 << order) << PAGE_SHIFT; - hole_start = (image->control_page + (size - 1)) & ~(size - 1); + hole_start = ALIGN(image->control_page, size); hole_end = hole_start + size - 1; while (hole_end <= crashk_res.end) { unsigned long i; @@ -447,7 +449,7 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image, mend = mstart + image->segment[i].memsz - 1; if ((hole_end >= mstart) && (hole_start <= mend)) { /* Advance the hole to the end of the segment */ - hole_start = (mend + (size - 1)) & ~(size - 1); + hole_start = ALIGN(mend, size); hole_end = hole_start + size - 1; break; } @@ -455,7 +457,7 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image, /* If I don't overlap any segments I have found my hole! */ if (i == image->nr_segments) { pages = pfn_to_page(hole_start >> PAGE_SHIFT); - image->control_page = hole_end; + image->control_page = hole_end + 1; break; } } @@ -716,7 +718,7 @@ static struct page *kimage_alloc_page(struct kimage *image, /* If the page is not a destination page use it */ if (!kimage_is_destination_range(image, addr, - addr + PAGE_SIZE)) + addr + PAGE_SIZE - 1)) break; /* @@ -1063,9 +1065,10 @@ __bpf_kfunc void crash_kexec(struct pt_regs *regs) * panic(). Otherwise parallel calls of panic() and crash_kexec() * may stop each other. To exclude them, we use panic_cpu here too. */ + old_cpu = PANIC_CPU_INVALID; this_cpu = raw_smp_processor_id(); - old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu); - if (old_cpu == PANIC_CPU_INVALID) { + + if (atomic_try_cmpxchg(&panic_cpu, &old_cpu, this_cpu)) { /* This is the 1st CPU which comes here, so go ahead. */ __crash_kexec(regs); diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index f9a419cd22d4..bef2f6f2571b 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -123,6 +123,8 @@ void kimage_file_post_load_cleanup(struct kimage *image) */ kfree(image->image_loader_data); image->image_loader_data = NULL; + + kexec_file_dbg_print = false; } #ifdef CONFIG_KEXEC_SIG @@ -202,6 +204,8 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd, if (ret < 0) return ret; image->kernel_buf_len = ret; + kexec_dprintk("kernel: %p kernel_size: %#lx\n", + image->kernel_buf, image->kernel_buf_len); /* Call arch image probe handlers */ ret = arch_kexec_kernel_image_probe(image, image->kernel_buf, @@ -278,6 +282,7 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd, if (!image) return -ENOMEM; + kexec_file_dbg_print = !!(flags & KEXEC_FILE_DEBUG); image->file_mode = 1; if (kexec_on_panic) { @@ -384,13 +389,14 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, if (ret) goto out; + kexec_dprintk("nr_segments = %lu\n", image->nr_segments); for (i = 0; i < image->nr_segments; i++) { struct kexec_segment *ksegment; ksegment = &image->segment[i]; - pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n", - i, ksegment->buf, ksegment->bufsz, ksegment->mem, - ksegment->memsz); + kexec_dprintk("segment[%d]: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n", + i, ksegment->buf, ksegment->bufsz, ksegment->mem, + ksegment->memsz); ret = kimage_load_segment(image, &image->segment[i]); if (ret) @@ -403,6 +409,8 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, if (ret) goto out; + kexec_dprintk("kexec_file_load: type:%u, start:0x%lx head:0x%lx flags:0x%lx\n", + image->type, image->start, image->head, flags); /* * Free up any temporary buffers allocated which are not needed * after image has been loaded @@ -426,11 +434,11 @@ static int locate_mem_hole_top_down(unsigned long start, unsigned long end, unsigned long temp_start, temp_end; temp_end = min(end, kbuf->buf_max); - temp_start = temp_end - kbuf->memsz; + temp_start = temp_end - kbuf->memsz + 1; do { /* align down start */ - temp_start = temp_start & (~(kbuf->buf_align - 1)); + temp_start = ALIGN_DOWN(temp_start, kbuf->buf_align); if (temp_start < start || temp_start < kbuf->buf_min) return 0; @@ -592,6 +600,8 @@ static int kexec_walk_resources(struct kexec_buf *kbuf, IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY, crashk_res.start, crashk_res.end, kbuf, func); + else if (kbuf->top_down) + return walk_system_ram_res_rev(0, ULONG_MAX, kbuf, func); else return walk_system_ram_res(0, ULONG_MAX, kbuf, func); } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 5c579fb9a5e3..2fabd497d659 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -145,20 +145,9 @@ void __ptrace_unlink(struct task_struct *child) */ if (!(child->flags & PF_EXITING) && (child->signal->flags & SIGNAL_STOP_STOPPED || - child->signal->group_stop_count)) { + child->signal->group_stop_count)) child->jobctl |= JOBCTL_STOP_PENDING; - /* - * This is only possible if this thread was cloned by the - * traced task running in the stopped group, set the signal - * for the future reports. - * FIXME: we should change ptrace_init_task() to handle this - * case. - */ - if (!(child->jobctl & JOBCTL_STOP_SIGMASK)) - child->jobctl |= SIGSTOP; - } - /* * If transition to TASK_STOPPED is pending or in TASK_TRACED, kick * @child in the butt. Note that @resume should be used iff @child diff --git a/kernel/reboot.c b/kernel/reboot.c index 395a0ea3c7a8..c3a3b82c4f64 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -59,6 +59,14 @@ struct sys_off_handler { }; /* + * This variable is used to indicate if a halt was initiated instead of a + * reboot when the reboot call was invoked with LINUX_REBOOT_CMD_POWER_OFF, but + * the system cannot be powered off. This allowes kernel_halt() to notify users + * of that. + */ +static bool poweroff_fallback_to_halt; + +/* * Temporary stub that prevents linkage failure while we're in process * of removing all uses of legacy pm_power_off() around the kernel. */ @@ -297,7 +305,10 @@ void kernel_halt(void) kernel_shutdown_prepare(SYSTEM_HALT); migrate_to_reboot_cpu(); syscore_shutdown(); - pr_emerg("System halted\n"); + if (poweroff_fallback_to_halt) + pr_emerg("Power off not available: System halted instead\n"); + else + pr_emerg("System halted\n"); kmsg_dump(KMSG_DUMP_SHUTDOWN); machine_halt(); } @@ -732,8 +743,10 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, /* Instead of trying to make the power_off code look like * halt when pm_power_off is not set do it the easy way. */ - if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !kernel_can_power_off()) + if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !kernel_can_power_off()) { + poweroff_fallback_to_halt = true; cmd = LINUX_REBOOT_CMD_HALT; + } mutex_lock(&system_transition_mutex); switch (cmd) { diff --git a/kernel/relay.c b/kernel/relay.c index 83fe0325cde1..a8e90e98bf2c 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1073,167 +1073,6 @@ static ssize_t relay_file_read(struct file *filp, return written; } -static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed) -{ - rbuf->bytes_consumed += bytes_consumed; - - if (rbuf->bytes_consumed >= rbuf->chan->subbuf_size) { - relay_subbufs_consumed(rbuf->chan, rbuf->cpu, 1); - rbuf->bytes_consumed %= rbuf->chan->subbuf_size; - } -} - -static void relay_pipe_buf_release(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) -{ - struct rchan_buf *rbuf; - - rbuf = (struct rchan_buf *)page_private(buf->page); - relay_consume_bytes(rbuf, buf->private); -} - -static const struct pipe_buf_operations relay_pipe_buf_ops = { - .release = relay_pipe_buf_release, - .try_steal = generic_pipe_buf_try_steal, - .get = generic_pipe_buf_get, -}; - -static void relay_page_release(struct splice_pipe_desc *spd, unsigned int i) -{ -} - -/* - * subbuf_splice_actor - splice up to one subbuf's worth of data - */ -static ssize_t subbuf_splice_actor(struct file *in, - loff_t *ppos, - struct pipe_inode_info *pipe, - size_t len, - unsigned int flags, - int *nonpad_ret) -{ - unsigned int pidx, poff, total_len, subbuf_pages, nr_pages; - struct rchan_buf *rbuf = in->private_data; - unsigned int subbuf_size = rbuf->chan->subbuf_size; - uint64_t pos = (uint64_t) *ppos; - uint32_t alloc_size = (uint32_t) rbuf->chan->alloc_size; - size_t read_start = (size_t) do_div(pos, alloc_size); - size_t read_subbuf = read_start / subbuf_size; - size_t padding = rbuf->padding[read_subbuf]; - size_t nonpad_end = read_subbuf * subbuf_size + subbuf_size - padding; - struct page *pages[PIPE_DEF_BUFFERS]; - struct partial_page partial[PIPE_DEF_BUFFERS]; - struct splice_pipe_desc spd = { - .pages = pages, - .nr_pages = 0, - .nr_pages_max = PIPE_DEF_BUFFERS, - .partial = partial, - .ops = &relay_pipe_buf_ops, - .spd_release = relay_page_release, - }; - ssize_t ret; - - if (rbuf->subbufs_produced == rbuf->subbufs_consumed) - return 0; - if (splice_grow_spd(pipe, &spd)) - return -ENOMEM; - - /* - * Adjust read len, if longer than what is available - */ - if (len > (subbuf_size - read_start % subbuf_size)) - len = subbuf_size - read_start % subbuf_size; - - subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT; - pidx = (read_start / PAGE_SIZE) % subbuf_pages; - poff = read_start & ~PAGE_MASK; - nr_pages = min_t(unsigned int, subbuf_pages, spd.nr_pages_max); - - for (total_len = 0; spd.nr_pages < nr_pages; spd.nr_pages++) { - unsigned int this_len, this_end, private; - unsigned int cur_pos = read_start + total_len; - - if (!len) - break; - - this_len = min_t(unsigned long, len, PAGE_SIZE - poff); - private = this_len; - - spd.pages[spd.nr_pages] = rbuf->page_array[pidx]; - spd.partial[spd.nr_pages].offset = poff; - - this_end = cur_pos + this_len; - if (this_end >= nonpad_end) { - this_len = nonpad_end - cur_pos; - private = this_len + padding; - } - spd.partial[spd.nr_pages].len = this_len; - spd.partial[spd.nr_pages].private = private; - - len -= this_len; - total_len += this_len; - poff = 0; - pidx = (pidx + 1) % subbuf_pages; - - if (this_end >= nonpad_end) { - spd.nr_pages++; - break; - } - } - - ret = 0; - if (!spd.nr_pages) - goto out; - - ret = *nonpad_ret = splice_to_pipe(pipe, &spd); - if (ret < 0 || ret < total_len) - goto out; - - if (read_start + ret == nonpad_end) - ret += padding; - -out: - splice_shrink_spd(&spd); - return ret; -} - -static ssize_t relay_file_splice_read(struct file *in, - loff_t *ppos, - struct pipe_inode_info *pipe, - size_t len, - unsigned int flags) -{ - ssize_t spliced; - int ret; - int nonpad_ret = 0; - - ret = 0; - spliced = 0; - - while (len && !spliced) { - ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret); - if (ret < 0) - break; - else if (!ret) { - if (flags & SPLICE_F_NONBLOCK) - ret = -EAGAIN; - break; - } - - *ppos += ret; - if (ret > len) - len = 0; - else - len -= ret; - spliced += nonpad_ret; - nonpad_ret = 0; - } - - if (spliced) - return spliced; - - return ret; -} const struct file_operations relay_file_operations = { .open = relay_file_open, @@ -1242,6 +1081,5 @@ const struct file_operations relay_file_operations = { .read = relay_file_read, .llseek = no_llseek, .release = relay_file_release, - .splice_read = relay_file_splice_read, }; EXPORT_SYMBOL_GPL(relay_file_operations); diff --git a/kernel/resource.c b/kernel/resource.c index 91be1bc50b60..fcbca39dbc45 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -27,6 +27,8 @@ #include <linux/mount.h> #include <linux/resource_ext.h> #include <uapi/linux/magic.h> +#include <linux/string.h> +#include <linux/vmalloc.h> #include <asm/io.h> @@ -430,6 +432,61 @@ int walk_system_ram_res(u64 start, u64 end, void *arg, } /* + * This function, being a variant of walk_system_ram_res(), calls the @func + * callback against all memory ranges of type System RAM which are marked as + * IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY in reversed order, i.e., from + * higher to lower. + */ +int walk_system_ram_res_rev(u64 start, u64 end, void *arg, + int (*func)(struct resource *, void *)) +{ + struct resource res, *rams; + int rams_size = 16, i; + unsigned long flags; + int ret = -1; + + /* create a list */ + rams = kvcalloc(rams_size, sizeof(struct resource), GFP_KERNEL); + if (!rams) + return ret; + + flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + i = 0; + while ((start < end) && + (!find_next_iomem_res(start, end, flags, IORES_DESC_NONE, &res))) { + if (i >= rams_size) { + /* re-alloc */ + struct resource *rams_new; + + rams_new = kvrealloc(rams, rams_size * sizeof(struct resource), + (rams_size + 16) * sizeof(struct resource), + GFP_KERNEL); + if (!rams_new) + goto out; + + rams = rams_new; + rams_size += 16; + } + + rams[i].start = res.start; + rams[i++].end = res.end; + + start = res.end + 1; + } + + /* go reverse */ + for (i--; i >= 0; i--) { + ret = (*func)(&rams[i], arg); + if (ret) + break; + } + +out: + kvfree(rams); + return ret; +} + +/* * This function calls the @func callback against all memory ranges, which * are ranges marked as IORESOURCE_MEM and IORESOUCE_BUSY. */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b803030c3a03..533547e3c90a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -13097,19 +13097,6 @@ next_cpu: return 0; } -#else /* CONFIG_FAIR_GROUP_SCHED */ - -void free_fair_sched_group(struct task_group *tg) { } - -int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) -{ - return 1; -} - -void online_fair_sched_group(struct task_group *tg) { } - -void unregister_fair_sched_group(struct task_group *tg) { } - #endif /* CONFIG_FAIR_GROUP_SCHED */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e58a54bda77d..001fe047bd5d 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -461,10 +461,21 @@ static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data) extern int tg_nop(struct task_group *tg, void *data); +#ifdef CONFIG_FAIR_GROUP_SCHED extern void free_fair_sched_group(struct task_group *tg); extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent); extern void online_fair_sched_group(struct task_group *tg); extern void unregister_fair_sched_group(struct task_group *tg); +#else +static inline void free_fair_sched_group(struct task_group *tg) { } +static inline int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) +{ + return 1; +} +static inline void online_fair_sched_group(struct task_group *tg) { } +static inline void unregister_fair_sched_group(struct task_group *tg) { } +#endif + extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, struct sched_entity *se, int cpu, struct sched_entity *parent); diff --git a/kernel/signal.c b/kernel/signal.c index 47a7602dfe8d..c9c57d053ce4 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -171,16 +171,6 @@ static bool recalc_sigpending_tsk(struct task_struct *t) return false; } -/* - * After recalculating TIF_SIGPENDING, we need to make sure the task wakes up. - * This is superfluous when called on current, the wakeup is a harmless no-op. - */ -void recalc_sigpending_and_wake(struct task_struct *t) -{ - if (recalc_sigpending_tsk(t)) - signal_wake_up(t, 0); -} - void recalc_sigpending(void) { if (!recalc_sigpending_tsk(current) && !freezing(current)) @@ -1348,10 +1338,8 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, action->sa.sa_handler = SIG_DFL; if (handler == HANDLER_EXIT) action->sa.sa_flags |= SA_IMMUTABLE; - if (blocked) { + if (blocked) sigdelset(&t->blocked, sig); - recalc_sigpending_and_wake(t); - } } /* * Don't clear SIGNAL_UNKILLABLE for traced tasks, users won't expect @@ -1361,6 +1349,9 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, (!t->ptrace || (handler == HANDLER_EXIT))) t->signal->flags &= ~SIGNAL_UNKILLABLE; ret = send_signal_locked(sig, info, t, PIDTYPE_PID); + /* This can happen if the signal was already pending and blocked */ + if (!task_sigpending(t)) + signal_wake_up(t, 0); spin_unlock_irqrestore(&t->sighand->siglock, flags); return ret; @@ -1376,12 +1367,12 @@ int force_sig_info(struct kernel_siginfo *info) */ int zap_other_threads(struct task_struct *p) { - struct task_struct *t = p; + struct task_struct *t; int count = 0; p->signal->group_stop_count = 0; - while_each_thread(p, t) { + for_other_threads(p, t) { task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); /* Don't require de_thread to wait for the vhost_worker */ if ((t->flags & (PF_IO_WORKER | PF_USER_WORKER)) != PF_USER_WORKER) @@ -2465,12 +2456,10 @@ static bool do_signal_stop(int signr) sig->group_exit_code = signr; sig->group_stop_count = 0; - if (task_set_jobctl_pending(current, signr | gstop)) sig->group_stop_count++; - t = current; - while_each_thread(current, t) { + for_other_threads(current, t) { /* * Setting state to TASK_STOPPED for a group * stop is always done with the siglock held, @@ -2966,8 +2955,7 @@ static void retarget_shared_pending(struct task_struct *tsk, sigset_t *which) if (sigisemptyset(&retarget)) return; - t = tsk; - while_each_thread(tsk, t) { + for_other_threads(tsk, t) { if (t->flags & PF_EXITING) continue; diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index 4f65824879ab..afb3c116da91 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c @@ -126,7 +126,7 @@ EXPORT_SYMBOL_GPL(stack_trace_save); /** * stack_trace_save_tsk - Save a task stack trace into a storage array - * @task: The task to examine + * @tsk: The task to examine * @store: Pointer to storage array * @size: Size of the storage array * @skipnr: Number of entries to skip at the start of the stack trace diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 5cd6d4e26915..81a8862295d6 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -91,7 +91,7 @@ static DEFINE_PER_CPU(atomic_t, hrtimer_interrupts); static DEFINE_PER_CPU(int, hrtimer_interrupts_saved); static DEFINE_PER_CPU(bool, watchdog_hardlockup_warned); static DEFINE_PER_CPU(bool, watchdog_hardlockup_touched); -static unsigned long watchdog_hardlockup_all_cpu_dumped; +static unsigned long hard_lockup_nmi_warn; notrace void arch_touch_nmi_watchdog(void) { @@ -151,12 +151,32 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs) */ if (is_hardlockup(cpu)) { unsigned int this_cpu = smp_processor_id(); + unsigned long flags; /* Only print hardlockups once. */ if (per_cpu(watchdog_hardlockup_warned, cpu)) return; + /* + * Prevent multiple hard-lockup reports if one cpu is already + * engaged in dumping all cpu back traces. + */ + if (sysctl_hardlockup_all_cpu_backtrace) { + if (test_and_set_bit_lock(0, &hard_lockup_nmi_warn)) + return; + } + + /* + * NOTE: we call printk_cpu_sync_get_irqsave() after printing + * the lockup message. While it would be nice to serialize + * that printout, we really want to make sure that if some + * other CPU somehow locked up while holding the lock associated + * with printk_cpu_sync_get_irqsave() that we can still at least + * get the message about the lockup out. + */ pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", cpu); + printk_cpu_sync_get_irqsave(flags); + print_modules(); print_irqtrace_events(current); if (cpu == this_cpu) { @@ -164,17 +184,17 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs) show_regs(regs); else dump_stack(); + printk_cpu_sync_put_irqrestore(flags); } else { + printk_cpu_sync_put_irqrestore(flags); trigger_single_cpu_backtrace(cpu); } - /* - * Perform multi-CPU dump only once to avoid multiple - * hardlockups generating interleaving traces - */ - if (sysctl_hardlockup_all_cpu_backtrace && - !test_and_set_bit(0, &watchdog_hardlockup_all_cpu_dumped)) + if (sysctl_hardlockup_all_cpu_backtrace) { trigger_allbutcpu_cpu_backtrace(cpu); + if (!hardlockup_panic) + clear_bit_unlock(0, &hard_lockup_nmi_warn); + } if (hardlockup_panic) nmi_panic(regs, "Hard LOCKUP"); @@ -448,6 +468,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) struct pt_regs *regs = get_irq_regs(); int duration; int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; + unsigned long flags; if (!watchdog_enabled) return HRTIMER_NORESTART; @@ -514,6 +535,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) /* Start period for the next softlockup warning. */ update_report_ts(); + printk_cpu_sync_get_irqsave(flags); pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", smp_processor_id(), duration, current->comm, task_pid_nr(current)); @@ -523,10 +545,12 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) show_regs(regs); else dump_stack(); + printk_cpu_sync_put_irqrestore(flags); if (softlockup_all_cpu_backtrace) { trigger_allbutcpu_cpu_backtrace(smp_processor_id()); - clear_bit_unlock(0, &soft_lockup_nmi_warn); + if (!softlockup_panic) + clear_bit_unlock(0, &soft_lockup_nmi_warn); } add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); |