summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.hz2
-rw-r--r--kernel/cgroup/cgroup-v1.c2
-rw-r--r--kernel/cgroup/cgroup.c26
-rw-r--r--kernel/configs/debug.config1
-rw-r--r--kernel/crash_reserve.c9
-rw-r--r--kernel/debug/debug_core.c14
-rw-r--r--kernel/debug/kdb/kdb_io.c4
-rw-r--r--kernel/debug/kdb/kdb_main.c85
-rw-r--r--kernel/events/core.c18
-rw-r--r--kernel/events/uprobes.c16
-rw-r--r--kernel/exit.c11
-rw-r--r--kernel/fork.c184
-rwxr-xr-xkernel/gen_kheaders.sh1
-rw-r--r--kernel/hung_task.c38
-rw-r--r--kernel/irq/irqdomain.c14
-rw-r--r--kernel/irq/migration.c2
-rw-r--r--kernel/kcsan/kcsan_test.c2
-rw-r--r--kernel/kexec_core.c10
-rw-r--r--kernel/kexec_elf.c2
-rw-r--r--kernel/kexec_file.c12
-rw-r--r--kernel/kthread.c4
-rw-r--r--kernel/locking/mutex.c14
-rw-r--r--kernel/locking/percpu-rwsem.c2
-rw-r--r--kernel/module/main.c13
-rw-r--r--kernel/panic.c6
-rw-r--r--kernel/rcu/Kconfig2
-rw-r--r--kernel/rcu/rcutorture.c2
-rw-r--r--kernel/rcu/srcutree.c2
-rw-r--r--kernel/rcu/tasks.h2
-rw-r--r--kernel/rcu/tree_nocb.h4
-rw-r--r--kernel/reboot.c140
-rw-r--r--kernel/relay.c3
-rw-r--r--kernel/resource.c18
-rw-r--r--kernel/sched/core.c1
-rw-r--r--kernel/sched/ext.c8
-rw-r--r--kernel/sched/ext_idle.c12
-rw-r--r--kernel/sched/psi.c2
-rw-r--r--kernel/signal.c15
-rw-r--r--kernel/time/clocksource.c2
-rw-r--r--kernel/time/hrtimer.c71
-rw-r--r--kernel/time/sleep_timeout.c2
-rw-r--r--kernel/time/timekeeping.c94
-rw-r--r--kernel/time/timer.c8
-rw-r--r--kernel/time/timer_list.c2
-rw-r--r--kernel/trace/Kconfig3
-rw-r--r--kernel/trace/ftrace.c1
-rw-r--r--kernel/trace/ring_buffer.c252
-rw-r--r--kernel/trace/rv/rv.c3
-rw-r--r--kernel/trace/trace.c461
-rw-r--r--kernel/trace/trace.h41
-rw-r--r--kernel/trace/trace_events.c47
-rw-r--r--kernel/trace/trace_output.c4
-rw-r--r--kernel/ucount.c95
-rw-r--r--kernel/watchdog_perf.c6
-rw-r--r--kernel/workqueue.c14
55 files changed, 1150 insertions, 659 deletions
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 38ef6d06888e..ce1435cb08b1 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -30,7 +30,7 @@ choice
250 Hz is a good compromise choice allowing server performance
while also showing good interactive responsiveness even
on SMP and NUMA systems. If you are going to be using NTSC video
- or multimedia, selected 300Hz instead.
+ or multimedia, select 300Hz instead.
config HZ_300
bool "300 HZ"
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 11ea8d24ac72..fa24c032ed6f 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -851,7 +851,7 @@ static int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent
if (kernfs_type(kn) != KERNFS_DIR)
return -ENOTDIR;
- if (kn->parent != new_parent)
+ if (rcu_access_pointer(kn->__parent) != new_parent)
return -EIO;
/*
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index f231fe3a0744..27f08aa17b56 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -633,9 +633,22 @@ int cgroup_task_count(const struct cgroup *cgrp)
return count;
}
+static struct cgroup *kn_priv(struct kernfs_node *kn)
+{
+ struct kernfs_node *parent;
+ /*
+ * The parent can not be replaced due to KERNFS_ROOT_INVARIANT_PARENT.
+ * Therefore it is always safe to dereference this pointer outside of a
+ * RCU section.
+ */
+ parent = rcu_dereference_check(kn->__parent,
+ kernfs_root_flags(kn) & KERNFS_ROOT_INVARIANT_PARENT);
+ return parent->priv;
+}
+
struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
{
- struct cgroup *cgrp = of->kn->parent->priv;
+ struct cgroup *cgrp = kn_priv(of->kn);
struct cftype *cft = of_cft(of);
/*
@@ -1612,7 +1625,7 @@ void cgroup_kn_unlock(struct kernfs_node *kn)
if (kernfs_type(kn) == KERNFS_DIR)
cgrp = kn->priv;
else
- cgrp = kn->parent->priv;
+ cgrp = kn_priv(kn);
cgroup_unlock();
@@ -1644,7 +1657,7 @@ struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline)
if (kernfs_type(kn) == KERNFS_DIR)
cgrp = kn->priv;
else
- cgrp = kn->parent->priv;
+ cgrp = kn_priv(kn);
/*
* We're gonna grab cgroup_mutex which nests outside kernfs
@@ -1682,7 +1695,7 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
cfile->kn = NULL;
spin_unlock_irq(&cgroup_file_kn_lock);
- del_timer_sync(&cfile->notify_timer);
+ timer_delete_sync(&cfile->notify_timer);
}
kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
@@ -2118,7 +2131,8 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
root->kf_root = kernfs_create_root(kf_sops,
KERNFS_ROOT_CREATE_DEACTIVATED |
KERNFS_ROOT_SUPPORT_EXPORTOP |
- KERNFS_ROOT_SUPPORT_USER_XATTR,
+ KERNFS_ROOT_SUPPORT_USER_XATTR |
+ KERNFS_ROOT_INVARIANT_PARENT,
root_cgrp);
if (IS_ERR(root->kf_root)) {
ret = PTR_ERR(root->kf_root);
@@ -4115,7 +4129,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
{
struct cgroup_file_ctx *ctx = of->priv;
- struct cgroup *cgrp = of->kn->parent->priv;
+ struct cgroup *cgrp = kn_priv(of->kn);
struct cftype *cft = of_cft(of);
struct cgroup_subsys_state *css;
int ret;
diff --git a/kernel/configs/debug.config b/kernel/configs/debug.config
index 20552f163930..8aafd050b754 100644
--- a/kernel/configs/debug.config
+++ b/kernel/configs/debug.config
@@ -73,7 +73,6 @@ CONFIG_DEBUG_VM=y
CONFIG_DEBUG_VM_PGFLAGS=y
CONFIG_DEBUG_VM_RB=y
CONFIG_DEBUG_VM_VMACACHE=y
-CONFIG_GENERIC_PTDUMP=y
CONFIG_KASAN=y
CONFIG_KASAN_GENERIC=y
CONFIG_KASAN_INLINE=y
diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c
index a620fb4b2116..aff7c0fdbefa 100644
--- a/kernel/crash_reserve.c
+++ b/kernel/crash_reserve.c
@@ -375,11 +375,10 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
return 0;
}
-void __init reserve_crashkernel_generic(char *cmdline,
- unsigned long long crash_size,
- unsigned long long crash_base,
- unsigned long long crash_low_size,
- bool high)
+void __init reserve_crashkernel_generic(unsigned long long crash_size,
+ unsigned long long crash_base,
+ unsigned long long crash_low_size,
+ bool high)
{
unsigned long long search_end = CRASH_ADDR_LOW_MAX, search_base = 0;
bool fixed_base = false;
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index ce1bb2301c06..0b9495187fba 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -837,10 +837,6 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
{
struct kgdb_state kgdb_var;
struct kgdb_state *ks = &kgdb_var;
- int ret = 0;
-
- if (arch_kgdb_ops.enable_nmi)
- arch_kgdb_ops.enable_nmi(0);
/*
* Avoid entering the debugger if we were triggered due to an oops
* but panic_timeout indicates the system should automatically
@@ -858,15 +854,11 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
ks->linux_regs = regs;
if (kgdb_reenter_check(ks))
- goto out; /* Ouch, double exception ! */
+ return 0; /* Ouch, double exception ! */
if (kgdb_info[ks->cpu].enter_kgdb != 0)
- goto out;
+ return 0;
- ret = kgdb_cpu_enter(ks, regs, DCPU_WANT_MASTER);
-out:
- if (arch_kgdb_ops.enable_nmi)
- arch_kgdb_ops.enable_nmi(1);
- return ret;
+ return kgdb_cpu_enter(ks, regs, DCPU_WANT_MASTER);
}
NOKPROBE_SYMBOL(kgdb_handle_exception);
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 6a77f1c779c4..9b11b10b120c 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -334,7 +334,7 @@ poll_again:
*cp = '\0';
p_tmp = strrchr(buffer, ' ');
p_tmp = (p_tmp ? p_tmp + 1 : buffer);
- strscpy(tmpbuffer, p_tmp, sizeof(tmpbuffer));
+ strscpy(tmpbuffer, p_tmp);
*cp = tmp;
len = strlen(tmpbuffer);
@@ -452,7 +452,7 @@ poll_again:
char *kdb_getstr(char *buffer, size_t bufsize, const char *prompt)
{
if (prompt && kdb_prompt_str != prompt)
- strscpy(kdb_prompt_str, prompt, CMD_BUFLEN);
+ strscpy(kdb_prompt_str, prompt);
kdb_printf("%s", kdb_prompt_str);
kdb_nextline = 1; /* Prompt and input resets line number */
return kdb_read(buffer, bufsize);
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 5f4be507d79f..7a4d2d4689a5 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -25,7 +25,6 @@
#include <linux/smp.h>
#include <linux/utsname.h>
#include <linux/vmalloc.h>
-#include <linux/atomic.h>
#include <linux/moduleparam.h>
#include <linux/mm.h>
#include <linux/init.h>
@@ -105,7 +104,7 @@ static kdbmsg_t kdbmsgs[] = {
KDBMSG(NOENVVALUE, "Environment variable should have value"),
KDBMSG(NOTIMP, "Command not implemented"),
KDBMSG(ENVFULL, "Environment full"),
- KDBMSG(ENVBUFFULL, "Environment buffer full"),
+ KDBMSG(KMALLOCFAILED, "Failed to allocate memory"),
KDBMSG(TOOMANYBPT, "Too many breakpoints defined"),
#ifdef CONFIG_CPU_XSCALE
KDBMSG(TOOMANYDBREGS, "More breakpoints than ibcr registers defined"),
@@ -130,13 +129,9 @@ static const int __nkdb_err = ARRAY_SIZE(kdbmsgs);
/*
- * Initial environment. This is all kept static and local to
- * this file. We don't want to rely on the memory allocation
- * mechanisms in the kernel, so we use a very limited allocate-only
- * heap for new and altered environment variables. The entire
- * environment is limited to a fixed number of entries (add more
- * to __env[] if required) and a fixed amount of heap (add more to
- * KDB_ENVBUFSIZE if required).
+ * Initial environment. This is all kept static and local to this file.
+ * The entire environment is limited to a fixed number of entries
+ * (add more to __env[] if required)
*/
static char *__env[31] = {
@@ -259,35 +254,6 @@ char *kdbgetenv(const char *match)
}
/*
- * kdballocenv - This function is used to allocate bytes for
- * environment entries.
- * Parameters:
- * bytes The number of bytes to allocate in the static buffer.
- * Returns:
- * A pointer to the allocated space in the buffer on success.
- * NULL if bytes > size available in the envbuffer.
- * Remarks:
- * We use a static environment buffer (envbuffer) to hold the values
- * of dynamically generated environment variables (see kdb_set). Buffer
- * space once allocated is never free'd, so over time, the amount of space
- * (currently 512 bytes) will be exhausted if env variables are changed
- * frequently.
- */
-static char *kdballocenv(size_t bytes)
-{
-#define KDB_ENVBUFSIZE 512
- static char envbuffer[KDB_ENVBUFSIZE];
- static int envbufsize;
- char *ep = NULL;
-
- if ((KDB_ENVBUFSIZE - envbufsize) >= bytes) {
- ep = &envbuffer[envbufsize];
- envbufsize += bytes;
- }
- return ep;
-}
-
-/*
* kdbgetulenv - This function will return the value of an unsigned
* long-valued environment variable.
* Parameters:
@@ -348,9 +314,9 @@ static int kdb_setenv(const char *var, const char *val)
varlen = strlen(var);
vallen = strlen(val);
- ep = kdballocenv(varlen + vallen + 2);
- if (ep == (char *)0)
- return KDB_ENVBUFFULL;
+ ep = kmalloc(varlen + vallen + 2, GFP_KDB);
+ if (!ep)
+ return KDB_KMALLOCFAILED;
sprintf(ep, "%s=%s", var, val);
@@ -359,6 +325,7 @@ static int kdb_setenv(const char *var, const char *val)
&& ((strncmp(__env[i], var, varlen) == 0)
&& ((__env[i][varlen] == '\0')
|| (__env[i][varlen] == '=')))) {
+ kfree_const(__env[i]);
__env[i] = ep;
return 0;
}
@@ -2119,32 +2086,6 @@ static int kdb_dmesg(int argc, const char **argv)
return 0;
}
#endif /* CONFIG_PRINTK */
-
-/* Make sure we balance enable/disable calls, must disable first. */
-static atomic_t kdb_nmi_disabled;
-
-static int kdb_disable_nmi(int argc, const char *argv[])
-{
- if (atomic_read(&kdb_nmi_disabled))
- return 0;
- atomic_set(&kdb_nmi_disabled, 1);
- arch_kgdb_ops.enable_nmi(0);
- return 0;
-}
-
-static int kdb_param_enable_nmi(const char *val, const struct kernel_param *kp)
-{
- if (!atomic_add_unless(&kdb_nmi_disabled, -1, 0))
- return -EINVAL;
- arch_kgdb_ops.enable_nmi(1);
- return 0;
-}
-
-static const struct kernel_param_ops kdb_param_ops_enable_nmi = {
- .set = kdb_param_enable_nmi,
-};
-module_param_cb(enable_nmi, &kdb_param_ops_enable_nmi, NULL, 0600);
-
/*
* kdb_cpu - This function implements the 'cpu' command.
* cpu [<cpunum>]
@@ -2836,20 +2777,10 @@ static kdbtab_t maintab[] = {
},
};
-static kdbtab_t nmicmd = {
- .name = "disable_nmi",
- .func = kdb_disable_nmi,
- .usage = "",
- .help = "Disable NMI entry to KDB",
- .flags = KDB_ENABLE_ALWAYS_SAFE,
-};
-
/* Initialize the kdb command table. */
static void __init kdb_inittab(void)
{
kdb_register_table(maintab, ARRAY_SIZE(maintab));
- if (arch_kgdb_ops.enable_nmi)
- kdb_register_table(&nmicmd, 1);
}
/* Execute any commands defined in kdb_cmds. */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0bb21659e252..128db74e9eab 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2451,6 +2451,7 @@ ctx_time_update_event(struct perf_event_context *ctx, struct perf_event *event)
#define DETACH_GROUP 0x01UL
#define DETACH_CHILD 0x02UL
#define DETACH_DEAD 0x04UL
+#define DETACH_EXIT 0x08UL
/*
* Cross CPU call to remove a performance event
@@ -2465,6 +2466,7 @@ __perf_remove_from_context(struct perf_event *event,
void *info)
{
struct perf_event_pmu_context *pmu_ctx = event->pmu_ctx;
+ enum perf_event_state state = PERF_EVENT_STATE_OFF;
unsigned long flags = (unsigned long)info;
ctx_time_update(cpuctx, ctx);
@@ -2473,16 +2475,19 @@ __perf_remove_from_context(struct perf_event *event,
* Ensure event_sched_out() switches to OFF, at the very least
* this avoids raising perf_pending_task() at this time.
*/
- if (flags & DETACH_DEAD)
+ if (flags & DETACH_EXIT)
+ state = PERF_EVENT_STATE_EXIT;
+ if (flags & DETACH_DEAD) {
event->pending_disable = 1;
+ state = PERF_EVENT_STATE_DEAD;
+ }
event_sched_out(event, ctx);
+ perf_event_set_state(event, min(event->state, state));
if (flags & DETACH_GROUP)
perf_group_detach(event);
if (flags & DETACH_CHILD)
perf_child_detach(event);
list_del_event(event, ctx);
- if (flags & DETACH_DEAD)
- event->state = PERF_EVENT_STATE_DEAD;
if (!pmu_ctx->nr_events) {
pmu_ctx->rotate_necessary = 0;
@@ -13731,12 +13736,7 @@ perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
mutex_lock(&parent_event->child_mutex);
}
- perf_remove_from_context(event, detach_flags);
-
- raw_spin_lock_irq(&ctx->lock);
- if (event->state > PERF_EVENT_STATE_EXIT)
- perf_event_set_state(event, PERF_EVENT_STATE_EXIT);
- raw_spin_unlock_irq(&ctx->lock);
+ perf_remove_from_context(event, detach_flags | DETACH_EXIT);
/*
* Child events can be freed.
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 70c84b9d7be3..615b4e6d22c7 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -173,6 +173,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
DEFINE_FOLIO_VMA_WALK(pvmw, old_folio, vma, addr, 0);
int err;
struct mmu_notifier_range range;
+ pte_t pte;
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, addr,
addr + PAGE_SIZE);
@@ -192,6 +193,16 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
if (!page_vma_mapped_walk(&pvmw))
goto unlock;
VM_BUG_ON_PAGE(addr != pvmw.address, old_page);
+ pte = ptep_get(pvmw.pte);
+
+ /*
+ * Handle PFN swap PTES, such as device-exclusive ones, that actually
+ * map pages: simply trigger GUP again to fix it up.
+ */
+ if (unlikely(!pte_present(pte))) {
+ page_vma_mapped_walk_done(&pvmw);
+ goto unlock;
+ }
if (new_page) {
folio_get(new_folio);
@@ -206,7 +217,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
inc_mm_counter(mm, MM_ANONPAGES);
}
- flush_cache_page(vma, addr, pte_pfn(ptep_get(pvmw.pte)));
+ flush_cache_page(vma, addr, pte_pfn(pte));
ptep_clear_flush(vma, addr, pvmw.pte);
if (new_page)
set_pte_at(mm, addr, pvmw.pte,
@@ -1692,7 +1703,8 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
}
vma = _install_special_mapping(mm, area->vaddr, PAGE_SIZE,
- VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO,
+ VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO|
+ VM_SEALED_SYSMAP,
&xol_mapping);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
diff --git a/kernel/exit.c b/kernel/exit.c
index c2e6c7b7779f..1b51dc099f1e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -268,6 +268,9 @@ repeat:
leader = p->group_leader;
if (leader != p && thread_group_empty(leader)
&& leader->exit_state == EXIT_ZOMBIE) {
+ /* for pidfs_exit() and do_notify_parent() */
+ if (leader->signal->flags & SIGNAL_GROUP_EXIT)
+ leader->exit_code = leader->signal->group_exit_code;
/*
* If we were the last child thread and the leader has
* exited already, and the leader's parent ignores SIGCHLD,
@@ -756,12 +759,6 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
kill_orphaned_pgrp(tsk->group_leader, NULL);
tsk->exit_state = EXIT_ZOMBIE;
- /*
- * Ignore thread-group leaders that exited before all
- * subthreads did.
- */
- if (!delay_group_leader(tsk))
- do_notify_pidfd(tsk);
if (unlikely(tsk->ptrace)) {
int sig = thread_group_leader(tsk) &&
@@ -774,6 +771,8 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
do_notify_parent(tsk, tsk->exit_signal);
} else {
autoreap = true;
+ /* untraced sub-thread */
+ do_notify_pidfd(tsk);
}
if (autoreap) {
diff --git a/kernel/fork.c b/kernel/fork.c
index 1b659b07ecd5..c4b26cd8998b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -311,11 +311,9 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
* so memcg accounting is performed manually on assigning/releasing
* stacks to tasks. Drop __GFP_ACCOUNT.
*/
- stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN,
- VMALLOC_START, VMALLOC_END,
+ stack = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN,
THREADINFO_GFP & ~__GFP_ACCOUNT,
- PAGE_KERNEL,
- 0, node, __builtin_return_address(0));
+ node, __builtin_return_address(0));
if (!stack)
return -ENOMEM;
@@ -436,35 +434,6 @@ static struct kmem_cache *vm_area_cachep;
/* SLAB cache for mm_struct structures (tsk->mm) */
static struct kmem_cache *mm_cachep;
-#ifdef CONFIG_PER_VMA_LOCK
-
-/* SLAB cache for vm_area_struct.lock */
-static struct kmem_cache *vma_lock_cachep;
-
-static bool vma_lock_alloc(struct vm_area_struct *vma)
-{
- vma->vm_lock = kmem_cache_alloc(vma_lock_cachep, GFP_KERNEL);
- if (!vma->vm_lock)
- return false;
-
- init_rwsem(&vma->vm_lock->lock);
- vma->vm_lock_seq = UINT_MAX;
-
- return true;
-}
-
-static inline void vma_lock_free(struct vm_area_struct *vma)
-{
- kmem_cache_free(vma_lock_cachep, vma->vm_lock);
-}
-
-#else /* CONFIG_PER_VMA_LOCK */
-
-static inline bool vma_lock_alloc(struct vm_area_struct *vma) { return true; }
-static inline void vma_lock_free(struct vm_area_struct *vma) {}
-
-#endif /* CONFIG_PER_VMA_LOCK */
-
struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
{
struct vm_area_struct *vma;
@@ -474,14 +443,46 @@ struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
return NULL;
vma_init(vma, mm);
- if (!vma_lock_alloc(vma)) {
- kmem_cache_free(vm_area_cachep, vma);
- return NULL;
- }
return vma;
}
+static void vm_area_init_from(const struct vm_area_struct *src,
+ struct vm_area_struct *dest)
+{
+ dest->vm_mm = src->vm_mm;
+ dest->vm_ops = src->vm_ops;
+ dest->vm_start = src->vm_start;
+ dest->vm_end = src->vm_end;
+ dest->anon_vma = src->anon_vma;
+ dest->vm_pgoff = src->vm_pgoff;
+ dest->vm_file = src->vm_file;
+ dest->vm_private_data = src->vm_private_data;
+ vm_flags_init(dest, src->vm_flags);
+ memcpy(&dest->vm_page_prot, &src->vm_page_prot,
+ sizeof(dest->vm_page_prot));
+ /*
+ * src->shared.rb may be modified concurrently when called from
+ * dup_mmap(), but the clone will reinitialize it.
+ */
+ data_race(memcpy(&dest->shared, &src->shared, sizeof(dest->shared)));
+ memcpy(&dest->vm_userfaultfd_ctx, &src->vm_userfaultfd_ctx,
+ sizeof(dest->vm_userfaultfd_ctx));
+#ifdef CONFIG_ANON_VMA_NAME
+ dest->anon_name = src->anon_name;
+#endif
+#ifdef CONFIG_SWAP
+ memcpy(&dest->swap_readahead_info, &src->swap_readahead_info,
+ sizeof(dest->swap_readahead_info));
+#endif
+#ifndef CONFIG_MMU
+ dest->vm_region = src->vm_region;
+#endif
+#ifdef CONFIG_NUMA
+ dest->vm_policy = src->vm_policy;
+#endif
+}
+
struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
{
struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
@@ -491,15 +492,8 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
- /*
- * orig->shared.rb may be modified concurrently, but the clone
- * will be reinitialized.
- */
- data_race(memcpy(new, orig, sizeof(*new)));
- if (!vma_lock_alloc(new)) {
- kmem_cache_free(vm_area_cachep, new);
- return NULL;
- }
+ vm_area_init_from(orig, new);
+ vma_lock_init(new, true);
INIT_LIST_HEAD(&new->anon_vma_chain);
vma_numab_state_init(new);
dup_anon_vma_name(orig, new);
@@ -511,35 +505,15 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
return new;
}
-void __vm_area_free(struct vm_area_struct *vma)
+void vm_area_free(struct vm_area_struct *vma)
{
+ /* The vma should be detached while being destroyed. */
+ vma_assert_detached(vma);
vma_numab_state_free(vma);
free_anon_vma_name(vma);
- vma_lock_free(vma);
kmem_cache_free(vm_area_cachep, vma);
}
-#ifdef CONFIG_PER_VMA_LOCK
-static void vm_area_free_rcu_cb(struct rcu_head *head)
-{
- struct vm_area_struct *vma = container_of(head, struct vm_area_struct,
- vm_rcu);
-
- /* The vma should not be locked while being destroyed. */
- VM_BUG_ON_VMA(rwsem_is_locked(&vma->vm_lock->lock), vma);
- __vm_area_free(vma);
-}
-#endif
-
-void vm_area_free(struct vm_area_struct *vma)
-{
-#ifdef CONFIG_PER_VMA_LOCK
- call_rcu(&vma->vm_rcu, vm_area_free_rcu_cb);
-#else
- __vm_area_free(vma);
-#endif
-}
-
static void account_kernel_stack(struct task_struct *tsk, int account)
{
if (IS_ENABLED(CONFIG_VMAP_STACK)) {
@@ -830,6 +804,36 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
#define mm_free_pgd(mm)
#endif /* CONFIG_MMU */
+#ifdef CONFIG_MM_ID
+static DEFINE_IDA(mm_ida);
+
+static inline int mm_alloc_id(struct mm_struct *mm)
+{
+ int ret;
+
+ ret = ida_alloc_range(&mm_ida, MM_ID_MIN, MM_ID_MAX, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ mm->mm_id = ret;
+ return 0;
+}
+
+static inline void mm_free_id(struct mm_struct *mm)
+{
+ const mm_id_t id = mm->mm_id;
+
+ mm->mm_id = MM_ID_DUMMY;
+ if (id == MM_ID_DUMMY)
+ return;
+ if (WARN_ON_ONCE(id < MM_ID_MIN || id > MM_ID_MAX))
+ return;
+ ida_free(&mm_ida, id);
+}
+#else /* !CONFIG_MM_ID */
+static inline int mm_alloc_id(struct mm_struct *mm) { return 0; }
+static inline void mm_free_id(struct mm_struct *mm) {}
+#endif /* CONFIG_MM_ID */
+
static void check_mm(struct mm_struct *mm)
{
int i;
@@ -933,6 +937,7 @@ void __mmdrop(struct mm_struct *mm)
WARN_ON_ONCE(mm == current->active_mm);
mm_free_pgd(mm);
+ mm_free_id(mm);
destroy_context(mm);
mmu_notifier_subscriptions_destroy(mm);
check_mm(mm);
@@ -1267,6 +1272,15 @@ static void mm_init_uprobes_state(struct mm_struct *mm)
#endif
}
+static void mmap_init_lock(struct mm_struct *mm)
+{
+ init_rwsem(&mm->mmap_lock);
+ mm_lock_seqcount_init(mm);
+#ifdef CONFIG_PER_VMA_LOCK
+ rcuwait_init(&mm->vma_writer_wait);
+#endif
+}
+
static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
struct user_namespace *user_ns)
{
@@ -1308,6 +1322,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
if (mm_alloc_pgd(mm))
goto fail_nopgd;
+ if (mm_alloc_id(mm))
+ goto fail_noid;
+
if (init_new_context(p, mm))
goto fail_nocontext;
@@ -1327,6 +1344,8 @@ fail_pcpu:
fail_cid:
destroy_context(mm);
fail_nocontext:
+ mm_free_id(mm);
+fail_noid:
mm_free_pgd(mm);
fail_nopgd:
free_mm(mm);
@@ -1563,6 +1582,17 @@ struct mm_struct *get_task_mm(struct task_struct *task)
}
EXPORT_SYMBOL_GPL(get_task_mm);
+static bool may_access_mm(struct mm_struct *mm, struct task_struct *task, unsigned int mode)
+{
+ if (mm == current->mm)
+ return true;
+ if (ptrace_may_access(task, mode))
+ return true;
+ if ((mode & PTRACE_MODE_READ) && perfmon_capable())
+ return true;
+ return false;
+}
+
struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
{
struct mm_struct *mm;
@@ -1575,7 +1605,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
mm = get_task_mm(task);
if (!mm) {
mm = ERR_PTR(-ESRCH);
- } else if (mm != current->mm && !ptrace_may_access(task, mode)) {
+ } else if (!may_access_mm(mm, task, mode)) {
mmput(mm);
mm = ERR_PTR(-EACCES);
}
@@ -3183,6 +3213,11 @@ void __init mm_cache_init(void)
void __init proc_caches_init(void)
{
+ struct kmem_cache_args args = {
+ .use_freeptr_offset = true,
+ .freeptr_offset = offsetof(struct vm_area_struct, vm_freeptr),
+ };
+
sighand_cachep = kmem_cache_create("sighand_cache",
sizeof(struct sighand_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
@@ -3199,11 +3234,10 @@ void __init proc_caches_init(void)
sizeof(struct fs_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
NULL);
-
- vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT);
-#ifdef CONFIG_PER_VMA_LOCK
- vma_lock_cachep = KMEM_CACHE(vma_lock, SLAB_PANIC|SLAB_ACCOUNT);
-#endif
+ vm_area_cachep = kmem_cache_create("vm_area_struct",
+ sizeof(struct vm_area_struct), &args,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
+ SLAB_ACCOUNT);
mmap_init();
nsproxy_cache_init();
}
diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh
index 00529c81cc40..c9e5dc068e85 100755
--- a/kernel/gen_kheaders.sh
+++ b/kernel/gen_kheaders.sh
@@ -89,7 +89,6 @@ rm -f "${tmpdir}.contents.txt"
# Create archive and try to normalize metadata for reproducibility.
tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \
- --exclude=".__afs*" --exclude=".nfs*" \
--owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \
-I $XZ -cf $tarfile -C "${tmpdir}/" . > /dev/null
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 04efa7a6e69b..dc898ec93463 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -93,6 +93,43 @@ static struct notifier_block panic_block = {
.notifier_call = hung_task_panic,
};
+
+#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
+static void debug_show_blocker(struct task_struct *task)
+{
+ struct task_struct *g, *t;
+ unsigned long owner;
+ struct mutex *lock;
+
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "No rcu lock held");
+
+ lock = READ_ONCE(task->blocker_mutex);
+ if (!lock)
+ return;
+
+ owner = mutex_get_owner(lock);
+ if (unlikely(!owner)) {
+ pr_err("INFO: task %s:%d is blocked on a mutex, but the owner is not found.\n",
+ task->comm, task->pid);
+ return;
+ }
+
+ /* Ensure the owner information is correct. */
+ for_each_process_thread(g, t) {
+ if ((unsigned long)t == owner) {
+ pr_err("INFO: task %s:%d is blocked on a mutex likely owned by task %s:%d.\n",
+ task->comm, task->pid, t->comm, t->pid);
+ sched_show_task(t);
+ return;
+ }
+ }
+}
+#else
+static inline void debug_show_blocker(struct task_struct *task)
+{
+}
+#endif
+
static void check_hung_task(struct task_struct *t, unsigned long timeout)
{
unsigned long switch_count = t->nvcsw + t->nivcsw;
@@ -152,6 +189,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
" disables this message.\n");
sched_show_task(t);
+ debug_show_blocker(t);
hung_task_show_lock = true;
if (sysctl_hung_task_all_cpu_backtrace)
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 2861f89880af..9d5c8651492d 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -398,7 +398,7 @@ void irq_domain_remove(struct irq_domain *domain)
* If the going away domain is the default one, reset it.
*/
if (unlikely(irq_default_domain == domain))
- irq_set_default_host(NULL);
+ irq_set_default_domain(NULL);
mutex_unlock(&irq_domain_mutex);
@@ -573,7 +573,7 @@ struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
EXPORT_SYMBOL_GPL(irq_find_matching_fwspec);
/**
- * irq_set_default_host() - Set a "default" irq domain
+ * irq_set_default_domain() - Set a "default" irq domain
* @domain: default domain pointer
*
* For convenience, it's possible to set a "default" domain that will be used
@@ -581,16 +581,16 @@ EXPORT_SYMBOL_GPL(irq_find_matching_fwspec);
* platforms that want to manipulate a few hard coded interrupt numbers that
* aren't properly represented in the device-tree.
*/
-void irq_set_default_host(struct irq_domain *domain)
+void irq_set_default_domain(struct irq_domain *domain)
{
pr_debug("Default domain set to @0x%p\n", domain);
irq_default_domain = domain;
}
-EXPORT_SYMBOL_GPL(irq_set_default_host);
+EXPORT_SYMBOL_GPL(irq_set_default_domain);
/**
- * irq_get_default_host() - Retrieve the "default" irq domain
+ * irq_get_default_domain() - Retrieve the "default" irq domain
*
* Returns: the default domain, if any.
*
@@ -598,11 +598,11 @@ EXPORT_SYMBOL_GPL(irq_set_default_host);
* systems that cannot implement a firmware->fwnode mapping (which
* both DT and ACPI provide).
*/
-struct irq_domain *irq_get_default_host(void)
+struct irq_domain *irq_get_default_domain(void)
{
return irq_default_domain;
}
-EXPORT_SYMBOL_GPL(irq_get_default_host);
+EXPORT_SYMBOL_GPL(irq_get_default_domain);
static bool irq_domain_is_nomap(struct irq_domain *domain)
{
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 147cabb4c077..f2b2929986ff 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -37,7 +37,7 @@ bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear)
void irq_force_complete_move(struct irq_desc *desc)
{
- for (struct irq_data *d = irq_desc_get_irq_data(desc); d; d = d->parent_data) {
+ for (struct irq_data *d = irq_desc_get_irq_data(desc); d; d = irqd_get_parent_data(d)) {
if (d->chip && d->chip->irq_force_complete_move) {
d->chip->irq_force_complete_move(d);
return;
diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c
index 117d9d4d3c3b..6ce73cceaf53 100644
--- a/kernel/kcsan/kcsan_test.c
+++ b/kernel/kcsan/kcsan_test.c
@@ -1500,7 +1500,7 @@ static int access_thread(void *arg)
func();
}
} while (!torture_must_stop());
- del_timer_sync(&timer);
+ timer_delete_sync(&timer);
destroy_timer_on_stack(&timer);
torture_kthread_stopping("access_thread");
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index c22ad51c4317..3e62b944c883 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -210,6 +210,16 @@ int sanity_check_segment_list(struct kimage *image)
}
#endif
+ /*
+ * The destination addresses are searched from system RAM rather than
+ * being allocated from the buddy allocator, so they are not guaranteed
+ * to be accepted by the current kernel. Accept the destination
+ * addresses before kexec swaps their content with the segments' source
+ * pages to avoid accessing memory before it is accepted.
+ */
+ for (i = 0; i < nr_segments; i++)
+ accept_memory(image->segment[i].mem, image->segment[i].memsz);
+
return 0;
}
diff --git a/kernel/kexec_elf.c b/kernel/kexec_elf.c
index d3689632e8b9..3a5c25b2adc9 100644
--- a/kernel/kexec_elf.c
+++ b/kernel/kexec_elf.c
@@ -390,7 +390,7 @@ int kexec_elf_load(struct kimage *image, struct elfhdr *ehdr,
struct kexec_buf *kbuf,
unsigned long *lowest_load_addr)
{
- unsigned long lowest_addr = UINT_MAX;
+ unsigned long lowest_addr = ULONG_MAX;
int ret;
size_t i;
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 3eedb8c226ad..fba686487e3b 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -464,6 +464,12 @@ static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
continue;
}
+ /* Make sure this does not conflict with exclude range */
+ if (arch_check_excluded_range(image, temp_start, temp_end)) {
+ temp_start = temp_start - PAGE_SIZE;
+ continue;
+ }
+
/* We found a suitable memory range */
break;
} while (1);
@@ -498,6 +504,12 @@ static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
continue;
}
+ /* Make sure this does not conflict with exclude range */
+ if (arch_check_excluded_range(image, temp_start, temp_end)) {
+ temp_start = temp_start + PAGE_SIZE;
+ continue;
+ }
+
/* We found a suitable memory range */
break;
} while (1);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 5dc5b0d7238e..77c44924cf54 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -1362,14 +1362,14 @@ static void kthread_cancel_delayed_work_timer(struct kthread_work *work,
struct kthread_worker *worker = work->worker;
/*
- * del_timer_sync() must be called to make sure that the timer
+ * timer_delete_sync() must be called to make sure that the timer
* callback is not running. The lock must be temporary released
* to avoid a deadlock with the callback. In the meantime,
* any queuing is blocked by setting the canceling counter.
*/
work->canceling++;
raw_spin_unlock_irqrestore(&worker->lock, *flags);
- del_timer_sync(&dwork->timer);
+ timer_delete_sync(&dwork->timer);
raw_spin_lock_irqsave(&worker->lock, *flags);
work->canceling--;
}
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 19b636f60a24..555e2b3a665a 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -72,6 +72,14 @@ static inline unsigned long __owner_flags(unsigned long owner)
return owner & MUTEX_FLAGS;
}
+/* Do not use the return value as a pointer directly. */
+unsigned long mutex_get_owner(struct mutex *lock)
+{
+ unsigned long owner = atomic_long_read(&lock->owner);
+
+ return (unsigned long)__owner_task(owner);
+}
+
/*
* Returns: __mutex_owner(lock) on failure or NULL on success.
*/
@@ -182,6 +190,9 @@ static void
__mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
struct list_head *list)
{
+#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
+ WRITE_ONCE(current->blocker_mutex, lock);
+#endif
debug_mutex_add_waiter(lock, waiter, current);
list_add_tail(&waiter->list, list);
@@ -197,6 +208,9 @@ __mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter)
__mutex_clear_flag(lock, MUTEX_FLAGS);
debug_mutex_remove_waiter(lock, waiter, current);
+#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
+ WRITE_ONCE(current->blocker_mutex, NULL);
+#endif
}
/*
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 6083883c4fe0..d6964fc29f51 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -184,7 +184,7 @@ EXPORT_SYMBOL_GPL(__percpu_down_read);
#define per_cpu_sum(var) \
({ \
- typeof(var) __sum = 0; \
+ TYPEOF_UNQUAL(var) __sum = 0; \
int cpu; \
compiletime_assert_atomic_type(__sum); \
for_each_possible_cpu(cpu) \
diff --git a/kernel/module/main.c b/kernel/module/main.c
index debeb0eb15d7..a2859dc3eea6 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -3744,6 +3744,19 @@ bool is_module_text_address(unsigned long addr)
return __module_text_address(addr) != NULL;
}
+void module_for_each_mod(int(*func)(struct module *mod, void *data), void *data)
+{
+ struct module *mod;
+
+ guard(rcu)();
+ list_for_each_entry_rcu(mod, &modules, list) {
+ if (mod->state == MODULE_STATE_UNFORMED)
+ continue;
+ if (func(mod, data))
+ break;
+ }
+}
+
/**
* __module_text_address() - get the module whose code contains an address.
* @addr: the address.
diff --git a/kernel/panic.c b/kernel/panic.c
index 0c55eec9e874..a3889f38153d 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -833,9 +833,15 @@ device_initcall(register_warn_debugfs);
*/
__visible noinstr void __stack_chk_fail(void)
{
+ unsigned long flags;
+
instrumentation_begin();
+ flags = user_access_save();
+
panic("stack-protector: Kernel stack is corrupted in: %pB",
__builtin_return_address(0));
+
+ user_access_restore(flags);
instrumentation_end();
}
EXPORT_SYMBOL(__stack_chk_fail);
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index aa42de4d2768..4d9b21f69eaa 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -68,6 +68,8 @@ config TREE_SRCU
config FORCE_NEED_SRCU_NMI_SAFE
bool "Force selection of NEED_SRCU_NMI_SAFE"
depends on !TINY_SRCU
+ depends on RCU_EXPERT
+ depends on ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select NEED_SRCU_NMI_SAFE
default n
help
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 65095664f5c5..4fa7772be183 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -2324,7 +2324,7 @@ rcu_torture_reader(void *arg)
stutter_wait("rcu_torture_reader");
} while (!torture_must_stop());
if (irqreader && cur_ops->irq_capable) {
- del_timer_sync(&t);
+ timer_delete_sync(&t);
destroy_timer_on_stack(&t);
}
tick_dep_clear_task(current, TICK_DEP_BIT_RCU);
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index d2a694944553..9a59b071501b 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -690,7 +690,7 @@ void cleanup_srcu_struct(struct srcu_struct *ssp)
for_each_possible_cpu(cpu) {
struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
- del_timer_sync(&sdp->delay_work);
+ timer_delete_sync(&sdp->delay_work);
flush_work(&sdp->work);
if (WARN_ON(rcu_segcblist_n_cbs(&sdp->srcu_cblist)))
return; /* Forgot srcu_barrier(), so just leak it! */
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 466668eb4fad..c0cc7ae41106 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -1086,7 +1086,7 @@ static void rcu_tasks_postscan(struct list_head *hop)
}
if (!IS_ENABLED(CONFIG_TINY_RCU))
- del_timer_sync(&tasks_rcu_exit_srcu_stall_timer);
+ timer_delete_sync(&tasks_rcu_exit_srcu_stall_timer);
}
/* See if tasks are still holding out, complain if so. */
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 5ff3bc56ff51..fa269d34167a 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -206,7 +206,7 @@ static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
if (rdp_gp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
WRITE_ONCE(rdp_gp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
- del_timer(&rdp_gp->nocb_timer);
+ timer_delete(&rdp_gp->nocb_timer);
}
if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) {
@@ -822,7 +822,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
if (my_rdp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
- del_timer(&my_rdp->nocb_timer);
+ timer_delete(&my_rdp->nocb_timer);
}
WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
diff --git a/kernel/reboot.c b/kernel/reboot.c
index 41ab9e1ba357..ec087827c85c 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -36,6 +36,8 @@ enum reboot_mode reboot_mode DEFAULT_REBOOT_MODE;
EXPORT_SYMBOL_GPL(reboot_mode);
enum reboot_mode panic_reboot_mode = REBOOT_UNDEFINED;
+static enum hw_protection_action hw_protection_action = HWPROT_ACT_SHUTDOWN;
+
/*
* This variable is used privately to keep track of whether or not
* reboot_type is still set to its default value (i.e., reboot= hasn't
@@ -229,6 +231,9 @@ EXPORT_SYMBOL(unregister_restart_handler);
/**
* do_kernel_restart - Execute kernel restart handler call chain
*
+ * @cmd: pointer to buffer containing command to execute for restart
+ * or %NULL
+ *
* Calls functions registered with register_restart_handler.
*
* Expected to be called from machine_restart as last step of the restart
@@ -933,61 +938,86 @@ void orderly_reboot(void)
}
EXPORT_SYMBOL_GPL(orderly_reboot);
+static const char *hw_protection_action_str(enum hw_protection_action action)
+{
+ switch (action) {
+ case HWPROT_ACT_SHUTDOWN:
+ return "shutdown";
+ case HWPROT_ACT_REBOOT:
+ return "reboot";
+ default:
+ return "undefined";
+ }
+}
+
+static enum hw_protection_action hw_failure_emergency_action;
+
/**
- * hw_failure_emergency_poweroff_func - emergency poweroff work after a known delay
- * @work: work_struct associated with the emergency poweroff function
+ * hw_failure_emergency_action_func - emergency action work after a known delay
+ * @work: work_struct associated with the emergency action function
*
* This function is called in very critical situations to force
- * a kernel poweroff after a configurable timeout value.
+ * a kernel poweroff or reboot after a configurable timeout value.
*/
-static void hw_failure_emergency_poweroff_func(struct work_struct *work)
+static void hw_failure_emergency_action_func(struct work_struct *work)
{
+ const char *action_str = hw_protection_action_str(hw_failure_emergency_action);
+
+ pr_emerg("Hardware protection timed-out. Trying forced %s\n",
+ action_str);
+
/*
- * We have reached here after the emergency shutdown waiting period has
- * expired. This means orderly_poweroff has not been able to shut off
- * the system for some reason.
+ * We have reached here after the emergency action waiting period has
+ * expired. This means orderly_poweroff/reboot has not been able to
+ * shut off the system for some reason.
*
- * Try to shut down the system immediately using kernel_power_off
- * if populated
+ * Try to shut off the system immediately if possible
*/
- pr_emerg("Hardware protection timed-out. Trying forced poweroff\n");
- kernel_power_off();
+
+ if (hw_failure_emergency_action == HWPROT_ACT_REBOOT)
+ kernel_restart(NULL);
+ else
+ kernel_power_off();
/*
* Worst of the worst case trigger emergency restart
*/
- pr_emerg("Hardware protection shutdown failed. Trying emergency restart\n");
+ pr_emerg("Hardware protection %s failed. Trying emergency restart\n",
+ action_str);
emergency_restart();
}
-static DECLARE_DELAYED_WORK(hw_failure_emergency_poweroff_work,
- hw_failure_emergency_poweroff_func);
+static DECLARE_DELAYED_WORK(hw_failure_emergency_action_work,
+ hw_failure_emergency_action_func);
/**
- * hw_failure_emergency_poweroff - Trigger an emergency system poweroff
+ * hw_failure_emergency_schedule - Schedule an emergency system shutdown or reboot
+ *
+ * @action: The hardware protection action to be taken
+ * @action_delay_ms: Time in milliseconds to elapse before triggering action
*
* This may be called from any critical situation to trigger a system shutdown
- * after a given period of time. If time is negative this is not scheduled.
+ * or reboot after a given period of time.
+ * If time is negative this is not scheduled.
*/
-static void hw_failure_emergency_poweroff(int poweroff_delay_ms)
+static void hw_failure_emergency_schedule(enum hw_protection_action action,
+ int action_delay_ms)
{
- if (poweroff_delay_ms <= 0)
+ if (action_delay_ms <= 0)
return;
- schedule_delayed_work(&hw_failure_emergency_poweroff_work,
- msecs_to_jiffies(poweroff_delay_ms));
+ hw_failure_emergency_action = action;
+ schedule_delayed_work(&hw_failure_emergency_action_work,
+ msecs_to_jiffies(action_delay_ms));
}
/**
- * __hw_protection_shutdown - Trigger an emergency system shutdown or reboot
+ * __hw_protection_trigger - Trigger an emergency system shutdown or reboot
*
* @reason: Reason of emergency shutdown or reboot to be printed.
* @ms_until_forced: Time to wait for orderly shutdown or reboot before
* triggering it. Negative value disables the forced
* shutdown or reboot.
- * @shutdown: If true, indicates that a shutdown will happen
- * after the critical tempeature is reached.
- * If false, indicates that a reboot will happen
- * after the critical tempeature is reached.
+ * @action: The hardware protection action to be taken.
*
* Initiate an emergency system shutdown or reboot in order to protect
* hardware from further damage. Usage examples include a thermal protection.
@@ -995,11 +1025,16 @@ static void hw_failure_emergency_poweroff(int poweroff_delay_ms)
* pending even if the previous request has given a large timeout for forced
* shutdown/reboot.
*/
-void __hw_protection_shutdown(const char *reason, int ms_until_forced, bool shutdown)
+void __hw_protection_trigger(const char *reason, int ms_until_forced,
+ enum hw_protection_action action)
{
static atomic_t allow_proceed = ATOMIC_INIT(1);
- pr_emerg("HARDWARE PROTECTION shutdown (%s)\n", reason);
+ if (action == HWPROT_ACT_DEFAULT)
+ action = hw_protection_action;
+
+ pr_emerg("HARDWARE PROTECTION %s (%s)\n",
+ hw_protection_action_str(action), reason);
/* Shutdown should be initiated only once. */
if (!atomic_dec_and_test(&allow_proceed))
@@ -1009,13 +1044,55 @@ void __hw_protection_shutdown(const char *reason, int ms_until_forced, bool shut
* Queue a backup emergency shutdown in the event of
* orderly_poweroff failure
*/
- hw_failure_emergency_poweroff(ms_until_forced);
- if (shutdown)
+ hw_failure_emergency_schedule(action, ms_until_forced);
+ if (action == HWPROT_ACT_REBOOT)
+ orderly_reboot();
+ else
orderly_poweroff(true);
+}
+EXPORT_SYMBOL_GPL(__hw_protection_trigger);
+
+static bool hw_protection_action_parse(const char *str,
+ enum hw_protection_action *action)
+{
+ if (sysfs_streq(str, "shutdown"))
+ *action = HWPROT_ACT_SHUTDOWN;
+ else if (sysfs_streq(str, "reboot"))
+ *action = HWPROT_ACT_REBOOT;
else
- orderly_reboot();
+ return false;
+
+ return true;
+}
+
+static int __init hw_protection_setup(char *str)
+{
+ hw_protection_action_parse(str, &hw_protection_action);
+ return 1;
+}
+__setup("hw_protection=", hw_protection_setup);
+
+#ifdef CONFIG_SYSFS
+static ssize_t hw_protection_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%s\n",
+ hw_protection_action_str(hw_protection_action));
+}
+static ssize_t hw_protection_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf,
+ size_t count)
+{
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!hw_protection_action_parse(buf, &hw_protection_action))
+ return -EINVAL;
+
+ return count;
}
-EXPORT_SYMBOL_GPL(__hw_protection_shutdown);
+static struct kobj_attribute hw_protection_attr = __ATTR_RW(hw_protection);
+#endif
static int __init reboot_setup(char *str)
{
@@ -1276,6 +1353,7 @@ static struct kobj_attribute reboot_cpu_attr = __ATTR_RW(cpu);
#endif
static struct attribute *reboot_attrs[] = {
+ &hw_protection_attr.attr,
&reboot_mode_attr.attr,
#ifdef CONFIG_X86
&reboot_force_attr.attr,
diff --git a/kernel/relay.c b/kernel/relay.c
index a8ae436dc77e..5ac7e711e4b6 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -351,10 +351,9 @@ static struct dentry *relay_create_buf_file(struct rchan *chan,
struct dentry *dentry;
char *tmpname;
- tmpname = kzalloc(NAME_MAX + 1, GFP_KERNEL);
+ tmpname = kasprintf(GFP_KERNEL, "%s%d", chan->base_filename, cpu);
if (!tmpname)
return NULL;
- snprintf(tmpname, NAME_MAX, "%s%d", chan->base_filename, cpu);
/* Create file in fs */
dentry = chan->cb->create_buf_file(tmpname, chan->parent,
diff --git a/kernel/resource.c b/kernel/resource.c
index 12004452d999..8d3e6ed0bdc1 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -561,8 +561,7 @@ static int __region_intersects(struct resource *parent, resource_size_t start,
struct resource res, o;
bool covered;
- res.start = start;
- res.end = start + size - 1;
+ res = DEFINE_RES(start, size, 0);
for (p = parent->child; p ; p = p->sibling) {
if (!resource_intersection(p, &res, &o))
@@ -1714,18 +1713,13 @@ static int __init reserve_setup(char *str)
* I/O port space; otherwise assume it's memory.
*/
if (io_start < 0x10000) {
- res->flags = IORESOURCE_IO;
+ *res = DEFINE_RES_IO_NAMED(io_start, io_num, "reserved");
parent = &ioport_resource;
} else {
- res->flags = IORESOURCE_MEM;
+ *res = DEFINE_RES_MEM_NAMED(io_start, io_num, "reserved");
parent = &iomem_resource;
}
- res->name = "reserved";
- res->start = io_start;
- res->end = io_start + io_num - 1;
res->flags |= IORESOURCE_BUSY;
- res->desc = IORES_DESC_NONE;
- res->child = NULL;
if (request_resource(parent, res) == 0)
reserved = x+1;
}
@@ -1975,11 +1969,7 @@ get_free_mem_region(struct device *dev, struct resource *base,
*/
revoke_iomem(res);
} else {
- res->start = addr;
- res->end = addr + size - 1;
- res->name = name;
- res->desc = desc;
- res->flags = IORESOURCE_MEM;
+ *res = DEFINE_RES_NAMED_DESC(addr, size, name, IORESOURCE_MEM, desc);
/*
* Only succeed if the resource hosts an exclusive
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index cfaca3040b2f..c81cf642dba0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -10703,7 +10703,6 @@ void sched_mm_cid_after_execve(struct task_struct *t)
smp_mb();
t->last_mm_cid = t->mm_cid = mm_cid_get(rq, t, mm);
}
- rseq_set_notify_resume(t);
}
void sched_mm_cid_fork(struct task_struct *t)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 21575d39c376..66bcd40a28ca 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4171,8 +4171,8 @@ static struct scx_dispatch_q *create_dsq(u64 dsq_id, int node)
init_dsq(dsq, dsq_id);
- ret = rhashtable_insert_fast(&dsq_hash, &dsq->hash_node,
- dsq_hash_params);
+ ret = rhashtable_lookup_insert_fast(&dsq_hash, &dsq->hash_node,
+ dsq_hash_params);
if (ret) {
kfree(dsq);
return ERR_PTR(ret);
@@ -5361,6 +5361,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
*/
cpus_read_lock();
+ scx_idle_enable(ops);
+
if (scx_ops.init) {
ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init);
if (ret) {
@@ -5427,8 +5429,6 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
if (scx_ops.cpu_acquire || scx_ops.cpu_release)
static_branch_enable(&scx_ops_cpu_preempt);
- scx_idle_enable(ops);
-
/*
* Lock out forks, cgroup on/offlining and moves before opening the
* floodgate so that they don't wander into the operations prematurely.
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
index 52c36a70a3d0..cb343ca889e0 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -544,7 +544,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
* core.
*/
if (flags & SCX_PICK_IDLE_CORE) {
- cpu = prev_cpu;
+ cpu = -EBUSY;
goto out_unlock;
}
}
@@ -584,8 +584,6 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
* increasing distance.
*/
cpu = scx_pick_idle_cpu(p->cpus_ptr, node, flags);
- if (cpu >= 0)
- goto out_unlock;
out_unlock:
rcu_read_unlock();
@@ -723,14 +721,14 @@ static void reset_idle_masks(struct sched_ext_ops *ops)
void scx_idle_enable(struct sched_ext_ops *ops)
{
if (!ops->update_idle || (ops->flags & SCX_OPS_KEEP_BUILTIN_IDLE))
- static_branch_enable(&scx_builtin_idle_enabled);
+ static_branch_enable_cpuslocked(&scx_builtin_idle_enabled);
else
- static_branch_disable(&scx_builtin_idle_enabled);
+ static_branch_disable_cpuslocked(&scx_builtin_idle_enabled);
if (ops->flags & SCX_OPS_BUILTIN_IDLE_PER_NODE)
- static_branch_enable(&scx_builtin_idle_per_node);
+ static_branch_enable_cpuslocked(&scx_builtin_idle_per_node);
else
- static_branch_disable(&scx_builtin_idle_per_node);
+ static_branch_disable_cpuslocked(&scx_builtin_idle_per_node);
#ifdef CONFIG_SMP
reset_idle_masks(ops);
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index bb56805e3d47..1396674fa722 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -1440,7 +1440,7 @@ void psi_trigger_destroy(struct psi_trigger *t)
group->rtpoll_task,
lockdep_is_held(&group->rtpoll_trigger_lock));
rcu_assign_pointer(group->rtpoll_task, NULL);
- del_timer(&group->rtpoll_timer);
+ timer_delete(&group->rtpoll_timer);
}
}
mutex_unlock(&group->rtpoll_trigger_lock);
diff --git a/kernel/signal.c b/kernel/signal.c
index 86ba66d95da5..f8859faa26c5 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -176,9 +176,10 @@ static bool recalc_sigpending_tsk(struct task_struct *t)
void recalc_sigpending(void)
{
- if (!recalc_sigpending_tsk(current) && !freezing(current))
- clear_thread_flag(TIF_SIGPENDING);
-
+ if (!recalc_sigpending_tsk(current) && !freezing(current)) {
+ if (unlikely(test_thread_flag(TIF_SIGPENDING)))
+ clear_thread_flag(TIF_SIGPENDING);
+ }
}
EXPORT_SYMBOL(recalc_sigpending);
@@ -2179,11 +2180,9 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
WARN_ON_ONCE(!tsk->ptrace &&
(tsk->group_leader != tsk || !thread_group_empty(tsk)));
- /*
- * Notify for thread-group leaders without subthreads.
- */
- if (thread_group_empty(tsk))
- do_notify_pidfd(tsk);
+
+ /* ptraced, or group-leader without sub-threads */
+ do_notify_pidfd(tsk);
if (sig != SIGCHLD) {
/*
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index e0eeacbe2521..bb48498ebb5a 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -619,7 +619,7 @@ static inline void clocksource_stop_watchdog(void)
{
if (!watchdog_running || (watchdog && !list_empty(&watchdog_list)))
return;
- del_timer(&watchdog_timer);
+ timer_delete(&watchdog_timer);
watchdog_running = 0;
}
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 22376a1a75b9..517ee2590a29 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -465,19 +465,17 @@ static inline void debug_hrtimer_activate(struct hrtimer *timer,
static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
#endif
-static inline void
-debug_init(struct hrtimer *timer, clockid_t clockid,
- enum hrtimer_mode mode)
+static inline void debug_setup(struct hrtimer *timer, clockid_t clockid, enum hrtimer_mode mode)
{
debug_hrtimer_init(timer);
- trace_hrtimer_init(timer, clockid, mode);
+ trace_hrtimer_setup(timer, clockid, mode);
}
-static inline void debug_init_on_stack(struct hrtimer *timer, clockid_t clockid,
- enum hrtimer_mode mode)
+static inline void debug_setup_on_stack(struct hrtimer *timer, clockid_t clockid,
+ enum hrtimer_mode mode)
{
debug_hrtimer_init_on_stack(timer);
- trace_hrtimer_init(timer, clockid, mode);
+ trace_hrtimer_setup(timer, clockid, mode);
}
static inline void debug_activate(struct hrtimer *timer,
@@ -1316,8 +1314,6 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
struct hrtimer_clock_base *base;
unsigned long flags;
- if (WARN_ON_ONCE(!timer->function))
- return;
/*
* Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft
* match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard
@@ -1429,7 +1425,7 @@ static __always_inline bool is_migration_base(struct hrtimer_clock_base *base)
* running.
*
* This prevents priority inversion: if the soft irq thread is preempted
- * in the middle of a timer callback, then calling del_timer_sync() can
+ * in the middle of a timer callback, then calling hrtimer_cancel() can
* lead to two issues:
*
* - If the caller is on a remote CPU then it has to spin wait for the timer
@@ -1592,8 +1588,9 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id)
}
}
-static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
- enum hrtimer_mode mode)
+static void __hrtimer_setup(struct hrtimer *timer,
+ enum hrtimer_restart (*function)(struct hrtimer *),
+ clockid_t clock_id, enum hrtimer_mode mode)
{
bool softtimer = !!(mode & HRTIMER_MODE_SOFT);
struct hrtimer_cpu_base *cpu_base;
@@ -1626,39 +1623,12 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
timer->is_hard = !!(mode & HRTIMER_MODE_HARD);
timer->base = &cpu_base->clock_base[base];
timerqueue_init(&timer->node);
-}
-
-static void __hrtimer_setup(struct hrtimer *timer,
- enum hrtimer_restart (*function)(struct hrtimer *),
- clockid_t clock_id, enum hrtimer_mode mode)
-{
- __hrtimer_init(timer, clock_id, mode);
if (WARN_ON_ONCE(!function))
- timer->function = hrtimer_dummy_timeout;
+ ACCESS_PRIVATE(timer, function) = hrtimer_dummy_timeout;
else
- timer->function = function;
-}
-
-/**
- * hrtimer_init - initialize a timer to the given clock
- * @timer: the timer to be initialized
- * @clock_id: the clock to be used
- * @mode: The modes which are relevant for initialization:
- * HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT,
- * HRTIMER_MODE_REL_SOFT
- *
- * The PINNED variants of the above can be handed in,
- * but the PINNED bit is ignored as pinning happens
- * when the hrtimer is started
- */
-void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
- enum hrtimer_mode mode)
-{
- debug_init(timer, clock_id, mode);
- __hrtimer_init(timer, clock_id, mode);
+ ACCESS_PRIVATE(timer, function) = function;
}
-EXPORT_SYMBOL_GPL(hrtimer_init);
/**
* hrtimer_setup - initialize a timer to the given clock
@@ -1676,7 +1646,7 @@ EXPORT_SYMBOL_GPL(hrtimer_init);
void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *),
clockid_t clock_id, enum hrtimer_mode mode)
{
- debug_init(timer, clock_id, mode);
+ debug_setup(timer, clock_id, mode);
__hrtimer_setup(timer, function, clock_id, mode);
}
EXPORT_SYMBOL_GPL(hrtimer_setup);
@@ -1695,7 +1665,7 @@ void hrtimer_setup_on_stack(struct hrtimer *timer,
enum hrtimer_restart (*function)(struct hrtimer *),
clockid_t clock_id, enum hrtimer_mode mode)
{
- debug_init_on_stack(timer, clock_id, mode);
+ debug_setup_on_stack(timer, clock_id, mode);
__hrtimer_setup(timer, function, clock_id, mode);
}
EXPORT_SYMBOL_GPL(hrtimer_setup_on_stack);
@@ -1769,7 +1739,7 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
raw_write_seqcount_barrier(&base->seq);
__remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
- fn = timer->function;
+ fn = ACCESS_PRIVATE(timer, function);
/*
* Clear the 'is relative' flag for the TIME_LOW_RES case. If the
@@ -2044,7 +2014,7 @@ void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
* Make the enqueue delivery mode check work on RT. If the sleeper
* was initialized for hard interrupt delivery, force the mode bit.
* This is a special case for hrtimer_sleepers because
- * __hrtimer_init_sleeper() determines the delivery mode on RT so the
+ * __hrtimer_setup_sleeper() determines the delivery mode on RT so the
* fiddling with this decision is avoided at the call sites.
*/
if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard)
@@ -2054,8 +2024,8 @@ void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
}
EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires);
-static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
- clockid_t clock_id, enum hrtimer_mode mode)
+static void __hrtimer_setup_sleeper(struct hrtimer_sleeper *sl,
+ clockid_t clock_id, enum hrtimer_mode mode)
{
/*
* On PREEMPT_RT enabled kernels hrtimers which are not explicitly
@@ -2081,8 +2051,7 @@ static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
mode |= HRTIMER_MODE_HARD;
}
- __hrtimer_init(&sl->timer, clock_id, mode);
- sl->timer.function = hrtimer_wakeup;
+ __hrtimer_setup(&sl->timer, hrtimer_wakeup, clock_id, mode);
sl->task = current;
}
@@ -2095,8 +2064,8 @@ static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
void hrtimer_setup_sleeper_on_stack(struct hrtimer_sleeper *sl,
clockid_t clock_id, enum hrtimer_mode mode)
{
- debug_init_on_stack(&sl->timer, clock_id, mode);
- __hrtimer_init_sleeper(sl, clock_id, mode);
+ debug_setup_on_stack(&sl->timer, clock_id, mode);
+ __hrtimer_setup_sleeper(sl, clock_id, mode);
}
EXPORT_SYMBOL_GPL(hrtimer_setup_sleeper_on_stack);
diff --git a/kernel/time/sleep_timeout.c b/kernel/time/sleep_timeout.c
index dfe939f6e4ec..c0e960a5de39 100644
--- a/kernel/time/sleep_timeout.c
+++ b/kernel/time/sleep_timeout.c
@@ -97,7 +97,7 @@ signed long __sched schedule_timeout(signed long timeout)
timer.timer.expires = expire;
add_timer(&timer.timer);
schedule();
- del_timer_sync(&timer.timer);
+ timer_delete_sync(&timer.timer);
/* Remove the timer from the object tracker */
destroy_timer_on_stack(&timer.timer);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 929846b8b45a..1e67d076f195 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -682,19 +682,20 @@ static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int act
}
/**
- * timekeeping_forward - update clock to given cycle now value
+ * timekeeping_forward_now - update clock to the current time
* @tk: Pointer to the timekeeper to update
- * @cycle_now: Current clocksource read value
*
* Forward the current clock to update its state since the last call to
* update_wall_time(). This is useful before significant clock changes,
* as it avoids having to deal with this time offset explicitly.
*/
-static void timekeeping_forward(struct timekeeper *tk, u64 cycle_now)
+static void timekeeping_forward_now(struct timekeeper *tk)
{
- u64 delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask,
- tk->tkr_mono.clock->max_raw_delta);
+ u64 cycle_now, delta;
+ cycle_now = tk_clock_read(&tk->tkr_mono);
+ delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask,
+ tk->tkr_mono.clock->max_raw_delta);
tk->tkr_mono.cycle_last = cycle_now;
tk->tkr_raw.cycle_last = cycle_now;
@@ -710,21 +711,6 @@ static void timekeeping_forward(struct timekeeper *tk, u64 cycle_now)
}
/**
- * timekeeping_forward_now - update clock to the current time
- * @tk: Pointer to the timekeeper to update
- *
- * Forward the current clock to update its state since the last call to
- * update_wall_time(). This is useful before significant clock changes,
- * as it avoids having to deal with this time offset explicitly.
- */
-static void timekeeping_forward_now(struct timekeeper *tk)
-{
- u64 cycle_now = tk_clock_read(&tk->tkr_mono);
-
- timekeeping_forward(tk, cycle_now);
-}
-
-/**
* ktime_get_real_ts64 - Returns the time of day in a timespec64.
* @ts: pointer to the timespec to be set
*
@@ -2165,54 +2151,6 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
return offset;
}
-static u64 timekeeping_accumulate(struct timekeeper *tk, u64 offset,
- enum timekeeping_adv_mode mode,
- unsigned int *clock_set)
-{
- int shift = 0, maxshift;
-
- /*
- * TK_ADV_FREQ indicates that adjtimex(2) directly set the
- * frequency or the tick length.
- *
- * Accumulate the offset, so that the new multiplier starts from
- * now. This is required as otherwise for offsets, which are
- * smaller than tk::cycle_interval, timekeeping_adjust() could set
- * xtime_nsec backwards, which subsequently causes time going
- * backwards in the coarse time getters. But even for the case
- * where offset is greater than tk::cycle_interval the periodic
- * accumulation does not have much value.
- *
- * Also reset tk::ntp_error as it does not make sense to keep the
- * old accumulated error around in this case.
- */
- if (mode == TK_ADV_FREQ) {
- timekeeping_forward(tk, tk->tkr_mono.cycle_last + offset);
- tk->ntp_error = 0;
- return 0;
- }
-
- /*
- * With NO_HZ we may have to accumulate many cycle_intervals
- * (think "ticks") worth of time at once. To do this efficiently,
- * we calculate the largest doubling multiple of cycle_intervals
- * that is smaller than the offset. We then accumulate that
- * chunk in one go, and then try to consume the next smaller
- * doubled multiple.
- */
- shift = ilog2(offset) - ilog2(tk->cycle_interval);
- shift = max(0, shift);
- /* Bound shift to one less than what overflows tick_length */
- maxshift = (64 - (ilog2(ntp_tick_length()) + 1)) - 1;
- shift = min(shift, maxshift);
- while (offset >= tk->cycle_interval) {
- offset = logarithmic_accumulation(tk, offset, shift, clock_set);
- if (offset < tk->cycle_interval << shift)
- shift--;
- }
- return offset;
-}
-
/*
* timekeeping_advance - Updates the timekeeper to the current time and
* current NTP tick length
@@ -2222,6 +2160,7 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode)
struct timekeeper *tk = &tk_core.shadow_timekeeper;
struct timekeeper *real_tk = &tk_core.timekeeper;
unsigned int clock_set = 0;
+ int shift = 0, maxshift;
u64 offset;
guard(raw_spinlock_irqsave)(&tk_core.lock);
@@ -2238,7 +2177,24 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode)
if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK)
return false;
- offset = timekeeping_accumulate(tk, offset, mode, &clock_set);
+ /*
+ * With NO_HZ we may have to accumulate many cycle_intervals
+ * (think "ticks") worth of time at once. To do this efficiently,
+ * we calculate the largest doubling multiple of cycle_intervals
+ * that is smaller than the offset. We then accumulate that
+ * chunk in one go, and then try to consume the next smaller
+ * doubled multiple.
+ */
+ shift = ilog2(offset) - ilog2(tk->cycle_interval);
+ shift = max(0, shift);
+ /* Bound shift to one less than what overflows tick_length */
+ maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
+ shift = min(shift, maxshift);
+ while (offset >= tk->cycle_interval) {
+ offset = logarithmic_accumulation(tk, offset, shift, &clock_set);
+ if (offset < tk->cycle_interval<<shift)
+ shift--;
+ }
/* Adjust the multiplier to correct NTP error */
timekeeping_adjust(tk, offset);
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index c8f776dc6ee0..4d915c0a263c 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -744,7 +744,7 @@ static bool timer_fixup_init(void *addr, enum debug_obj_state state)
switch (state) {
case ODEBUG_STATE_ACTIVE:
- del_timer_sync(timer);
+ timer_delete_sync(timer);
debug_object_init(timer, &timer_debug_descr);
return true;
default:
@@ -790,7 +790,7 @@ static bool timer_fixup_free(void *addr, enum debug_obj_state state)
switch (state) {
case ODEBUG_STATE_ACTIVE:
- del_timer_sync(timer);
+ timer_delete_sync(timer);
debug_object_free(timer, &timer_debug_descr);
return true;
default:
@@ -1212,10 +1212,10 @@ EXPORT_SYMBOL(mod_timer_pending);
*
* mod_timer(timer, expires) is equivalent to:
*
- * del_timer(timer); timer->expires = expires; add_timer(timer);
+ * timer_delete(timer); timer->expires = expires; add_timer(timer);
*
* mod_timer() is more efficient than the above open coded sequence. In
- * case that the timer is inactive, the del_timer() part is a NOP. The
+ * case that the timer is inactive, the timer_delete() part is a NOP. The
* timer is in any case activated with the new expiry time @expires.
*
* Note that if there are multiple unserialized concurrent users of the
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index cfbb46cc4e76..b03d0ada6469 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -46,7 +46,7 @@ static void
print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
int idx, u64 now)
{
- SEQ_printf(m, " #%d: <%p>, %ps", idx, taddr, timer->function);
+ SEQ_printf(m, " #%d: <%p>, %ps", idx, taddr, ACCESS_PRIVATE(timer, function));
SEQ_printf(m, ", S:%02x", timer->state);
SEQ_printf(m, "\n");
SEQ_printf(m, " # expires at %Lu-%Lu nsecs [in %Ld to %Ld nsecs]\n",
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 033fba0633cf..a3f35c7d83b6 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -265,8 +265,7 @@ config FUNCTION_GRAPH_RETADDR
config FUNCTION_TRACE_ARGS
bool
- depends on HAVE_FUNCTION_ARG_ACCESS_API
- depends on DEBUG_INFO_BTF
+ depends on PROBE_EVENTS_BTF_ARGS
default y
help
If supported with function argument access API and BTF, then
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 92015de6203d..1a48aedb5255 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6855,6 +6855,7 @@ ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer)
}
}
}
+ cond_resched();
} while_for_each_ftrace_rec();
return fail ? -EINVAL : 0;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 9d4d951090d3..c0f877d39a24 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -31,6 +31,7 @@
#include <asm/local64.h>
#include <asm/local.h>
+#include <asm/setup.h>
#include "trace.h"
@@ -48,9 +49,12 @@ static void update_pages_handler(struct work_struct *work);
struct ring_buffer_meta {
int magic;
- int struct_size;
- unsigned long text_addr;
- unsigned long data_addr;
+ int struct_sizes;
+ unsigned long total_size;
+ unsigned long buffers_offset;
+};
+
+struct ring_buffer_cpu_meta {
unsigned long first_buffer;
unsigned long head_buffer;
unsigned long commit_buffer;
@@ -517,7 +521,7 @@ struct ring_buffer_per_cpu {
struct mutex mapping_lock;
unsigned long *subbuf_ids; /* ID to subbuf VA */
struct trace_buffer_meta *meta_page;
- struct ring_buffer_meta *ring_meta;
+ struct ring_buffer_cpu_meta *ring_meta;
/* ring buffer pages to update, > 0 to add, < 0 to remove */
long nr_pages_to_update;
@@ -550,8 +554,7 @@ struct trace_buffer {
unsigned long range_addr_start;
unsigned long range_addr_end;
- long last_text_delta;
- long last_data_delta;
+ struct ring_buffer_meta *meta;
unsigned int subbuf_size;
unsigned int subbuf_order;
@@ -1271,7 +1274,7 @@ static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
rb_set_list_to_head(head->list.prev);
if (cpu_buffer->ring_meta) {
- struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
+ struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
meta->head_buffer = (unsigned long)head->page;
}
}
@@ -1569,7 +1572,7 @@ out_locked:
static unsigned long
rb_range_align_subbuf(unsigned long addr, int subbuf_size, int nr_subbufs)
{
- addr += sizeof(struct ring_buffer_meta) +
+ addr += sizeof(struct ring_buffer_cpu_meta) +
sizeof(int) * nr_subbufs;
return ALIGN(addr, subbuf_size);
}
@@ -1580,19 +1583,22 @@ rb_range_align_subbuf(unsigned long addr, int subbuf_size, int nr_subbufs)
static void *rb_range_meta(struct trace_buffer *buffer, int nr_pages, int cpu)
{
int subbuf_size = buffer->subbuf_size + BUF_PAGE_HDR_SIZE;
- unsigned long ptr = buffer->range_addr_start;
- struct ring_buffer_meta *meta;
+ struct ring_buffer_cpu_meta *meta;
+ struct ring_buffer_meta *bmeta;
+ unsigned long ptr;
int nr_subbufs;
- if (!ptr)
+ bmeta = buffer->meta;
+ if (!bmeta)
return NULL;
+ ptr = (unsigned long)bmeta + bmeta->buffers_offset;
+ meta = (struct ring_buffer_cpu_meta *)ptr;
+
/* When nr_pages passed in is zero, the first meta has already been initialized */
if (!nr_pages) {
- meta = (struct ring_buffer_meta *)ptr;
nr_subbufs = meta->nr_subbufs;
} else {
- meta = NULL;
/* Include the reader page */
nr_subbufs = nr_pages + 1;
}
@@ -1624,7 +1630,7 @@ static void *rb_range_meta(struct trace_buffer *buffer, int nr_pages, int cpu)
}
/* Return the start of subbufs given the meta pointer */
-static void *rb_subbufs_from_meta(struct ring_buffer_meta *meta)
+static void *rb_subbufs_from_meta(struct ring_buffer_cpu_meta *meta)
{
int subbuf_size = meta->subbuf_size;
unsigned long ptr;
@@ -1640,7 +1646,7 @@ static void *rb_subbufs_from_meta(struct ring_buffer_meta *meta)
*/
static void *rb_range_buffer(struct ring_buffer_per_cpu *cpu_buffer, int idx)
{
- struct ring_buffer_meta *meta;
+ struct ring_buffer_cpu_meta *meta;
unsigned long ptr;
int subbuf_size;
@@ -1666,14 +1672,77 @@ static void *rb_range_buffer(struct ring_buffer_per_cpu *cpu_buffer, int idx)
}
/*
+ * See if the existing memory contains a valid meta section.
+ * if so, use that, otherwise initialize it.
+ */
+static bool rb_meta_init(struct trace_buffer *buffer, int scratch_size)
+{
+ unsigned long ptr = buffer->range_addr_start;
+ struct ring_buffer_meta *bmeta;
+ unsigned long total_size;
+ int struct_sizes;
+
+ bmeta = (struct ring_buffer_meta *)ptr;
+ buffer->meta = bmeta;
+
+ total_size = buffer->range_addr_end - buffer->range_addr_start;
+
+ struct_sizes = sizeof(struct ring_buffer_cpu_meta);
+ struct_sizes |= sizeof(*bmeta) << 16;
+
+ /* The first buffer will start word size after the meta page */
+ ptr += sizeof(*bmeta);
+ ptr = ALIGN(ptr, sizeof(long));
+ ptr += scratch_size;
+
+ if (bmeta->magic != RING_BUFFER_META_MAGIC) {
+ pr_info("Ring buffer boot meta mismatch of magic\n");
+ goto init;
+ }
+
+ if (bmeta->struct_sizes != struct_sizes) {
+ pr_info("Ring buffer boot meta mismatch of struct size\n");
+ goto init;
+ }
+
+ if (bmeta->total_size != total_size) {
+ pr_info("Ring buffer boot meta mismatch of total size\n");
+ goto init;
+ }
+
+ if (bmeta->buffers_offset > bmeta->total_size) {
+ pr_info("Ring buffer boot meta mismatch of offset outside of total size\n");
+ goto init;
+ }
+
+ if (bmeta->buffers_offset != (void *)ptr - (void *)bmeta) {
+ pr_info("Ring buffer boot meta mismatch of first buffer offset\n");
+ goto init;
+ }
+
+ return true;
+
+ init:
+ bmeta->magic = RING_BUFFER_META_MAGIC;
+ bmeta->struct_sizes = struct_sizes;
+ bmeta->total_size = total_size;
+ bmeta->buffers_offset = (void *)ptr - (void *)bmeta;
+
+ /* Zero out the scatch pad */
+ memset((void *)bmeta + sizeof(*bmeta), 0, bmeta->buffers_offset - sizeof(*bmeta));
+
+ return false;
+}
+
+/*
* See if the existing memory contains valid ring buffer data.
* As the previous kernel must be the same as this kernel, all
* the calculations (size of buffers and number of buffers)
* must be the same.
*/
-static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu,
- struct trace_buffer *buffer, int nr_pages,
- unsigned long *subbuf_mask)
+static bool rb_cpu_meta_valid(struct ring_buffer_cpu_meta *meta, int cpu,
+ struct trace_buffer *buffer, int nr_pages,
+ unsigned long *subbuf_mask)
{
int subbuf_size = PAGE_SIZE;
struct buffer_data_page *subbuf;
@@ -1684,20 +1753,6 @@ static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu,
if (!subbuf_mask)
return false;
- /* Check the meta magic and meta struct size */
- if (meta->magic != RING_BUFFER_META_MAGIC ||
- meta->struct_size != sizeof(*meta)) {
- pr_info("Ring buffer boot meta[%d] mismatch of magic or struct size\n", cpu);
- return false;
- }
-
- /* The subbuffer's size and number of subbuffers must match */
- if (meta->subbuf_size != subbuf_size ||
- meta->nr_subbufs != nr_pages + 1) {
- pr_info("Ring buffer boot meta [%d] mismatch of subbuf_size/nr_pages\n", cpu);
- return false;
- }
-
buffers_start = meta->first_buffer;
buffers_end = meta->first_buffer + (subbuf_size * meta->nr_subbufs);
@@ -1743,7 +1798,7 @@ static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu,
return true;
}
-static int rb_meta_subbuf_idx(struct ring_buffer_meta *meta, void *subbuf);
+static int rb_meta_subbuf_idx(struct ring_buffer_cpu_meta *meta, void *subbuf);
static int rb_read_data_buffer(struct buffer_data_page *dpage, int tail, int cpu,
unsigned long long *timestamp, u64 *delta_ptr)
@@ -1810,7 +1865,7 @@ static int rb_validate_buffer(struct buffer_data_page *dpage, int cpu)
/* If the meta data has been validated, now validate the events */
static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
{
- struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
+ struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
struct buffer_page *head_page;
unsigned long entry_bytes = 0;
unsigned long entries = 0;
@@ -1891,24 +1946,13 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
}
}
-/* Used to calculate data delta */
-static char rb_data_ptr[] = "";
-
-#define THIS_TEXT_PTR ((unsigned long)rb_meta_init_text_addr)
-#define THIS_DATA_PTR ((unsigned long)rb_data_ptr)
-
-static void rb_meta_init_text_addr(struct ring_buffer_meta *meta)
-{
- meta->text_addr = THIS_TEXT_PTR;
- meta->data_addr = THIS_DATA_PTR;
-}
-
-static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages)
+static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages, int scratch_size)
{
- struct ring_buffer_meta *meta;
+ struct ring_buffer_cpu_meta *meta;
unsigned long *subbuf_mask;
unsigned long delta;
void *subbuf;
+ bool valid = false;
int cpu;
int i;
@@ -1916,20 +1960,21 @@ static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages)
subbuf_mask = bitmap_alloc(nr_pages + 1, GFP_KERNEL);
/* If subbuf_mask fails to allocate, then rb_meta_valid() will return false */
+ if (rb_meta_init(buffer, scratch_size))
+ valid = true;
+
for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
void *next_meta;
meta = rb_range_meta(buffer, nr_pages, cpu);
- if (rb_meta_valid(meta, cpu, buffer, nr_pages, subbuf_mask)) {
+ if (valid && rb_cpu_meta_valid(meta, cpu, buffer, nr_pages, subbuf_mask)) {
/* Make the mappings match the current address */
subbuf = rb_subbufs_from_meta(meta);
delta = (unsigned long)subbuf - meta->first_buffer;
meta->first_buffer += delta;
meta->head_buffer += delta;
meta->commit_buffer += delta;
- buffer->last_text_delta = THIS_TEXT_PTR - meta->text_addr;
- buffer->last_data_delta = THIS_DATA_PTR - meta->data_addr;
continue;
}
@@ -1940,16 +1985,12 @@ static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages)
memset(meta, 0, next_meta - (void *)meta);
- meta->magic = RING_BUFFER_META_MAGIC;
- meta->struct_size = sizeof(*meta);
-
meta->nr_subbufs = nr_pages + 1;
meta->subbuf_size = PAGE_SIZE;
subbuf = rb_subbufs_from_meta(meta);
meta->first_buffer = (unsigned long)subbuf;
- rb_meta_init_text_addr(meta);
/*
* The buffers[] array holds the order of the sub-buffers
@@ -1971,7 +2012,7 @@ static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages)
static void *rbm_start(struct seq_file *m, loff_t *pos)
{
struct ring_buffer_per_cpu *cpu_buffer = m->private;
- struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
+ struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
unsigned long val;
if (!meta)
@@ -1996,7 +2037,7 @@ static void *rbm_next(struct seq_file *m, void *v, loff_t *pos)
static int rbm_show(struct seq_file *m, void *v)
{
struct ring_buffer_per_cpu *cpu_buffer = m->private;
- struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
+ struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
unsigned long val = (unsigned long)v;
if (val == 1) {
@@ -2045,7 +2086,7 @@ int ring_buffer_meta_seq_init(struct file *file, struct trace_buffer *buffer, in
static void rb_meta_buffer_update(struct ring_buffer_per_cpu *cpu_buffer,
struct buffer_page *bpage)
{
- struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
+ struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
if (meta->head_buffer == (unsigned long)bpage->page)
cpu_buffer->head_page = bpage;
@@ -2060,7 +2101,7 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
long nr_pages, struct list_head *pages)
{
struct trace_buffer *buffer = cpu_buffer->buffer;
- struct ring_buffer_meta *meta = NULL;
+ struct ring_buffer_cpu_meta *meta = NULL;
struct buffer_page *bpage, *tmp;
bool user_thread = current->mm != NULL;
gfp_t mflags;
@@ -2184,7 +2225,7 @@ static struct ring_buffer_per_cpu *
rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
- struct ring_buffer_meta *meta;
+ struct ring_buffer_cpu_meta *meta;
struct buffer_page *bpage;
struct page *page;
int ret;
@@ -2313,6 +2354,7 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
int order, unsigned long start,
unsigned long end,
+ unsigned long scratch_size,
struct lock_class_key *key)
{
struct trace_buffer *buffer;
@@ -2355,10 +2397,23 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
/* If start/end are specified, then that overrides size */
if (start && end) {
+ unsigned long buffers_start;
unsigned long ptr;
int n;
- size = end - start;
+ /* Make sure that start is word aligned */
+ start = ALIGN(start, sizeof(long));
+
+ /* scratch_size needs to be aligned too */
+ scratch_size = ALIGN(scratch_size, sizeof(long));
+
+ /* Subtract the buffer meta data and word aligned */
+ buffers_start = start + sizeof(struct ring_buffer_cpu_meta);
+ buffers_start = ALIGN(buffers_start, sizeof(long));
+ buffers_start += scratch_size;
+
+ /* Calculate the size for the per CPU data */
+ size = end - buffers_start;
size = size / nr_cpu_ids;
/*
@@ -2368,7 +2423,7 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
* needed, plus account for the integer array index that
* will be appended to the meta data.
*/
- nr_pages = (size - sizeof(struct ring_buffer_meta)) /
+ nr_pages = (size - sizeof(struct ring_buffer_cpu_meta)) /
(subbuf_size + sizeof(int));
/* Need at least two pages plus the reader page */
if (nr_pages < 3)
@@ -2376,8 +2431,8 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
again:
/* Make sure that the size fits aligned */
- for (n = 0, ptr = start; n < nr_cpu_ids; n++) {
- ptr += sizeof(struct ring_buffer_meta) +
+ for (n = 0, ptr = buffers_start; n < nr_cpu_ids; n++) {
+ ptr += sizeof(struct ring_buffer_cpu_meta) +
sizeof(int) * nr_pages;
ptr = ALIGN(ptr, subbuf_size);
ptr += subbuf_size * nr_pages;
@@ -2394,7 +2449,7 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
buffer->range_addr_start = start;
buffer->range_addr_end = end;
- rb_range_meta_init(buffer, nr_pages);
+ rb_range_meta_init(buffer, nr_pages, scratch_size);
} else {
/* need at least two pages */
@@ -2447,7 +2502,7 @@ struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
struct lock_class_key *key)
{
/* Default buffer page size - one system page */
- return alloc_buffer(size, flags, 0, 0, 0,key);
+ return alloc_buffer(size, flags, 0, 0, 0, 0, key);
}
EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
@@ -2459,6 +2514,7 @@ EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
* @order: sub-buffer order
* @start: start of allocated range
* @range_size: size of allocated range
+ * @scratch_size: size of scratch area (for preallocated memory buffers)
* @key: ring buffer reader_lock_key.
*
* Currently the only flag that is available is the RB_FL_OVERWRITE
@@ -2469,32 +2525,29 @@ EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
struct trace_buffer *__ring_buffer_alloc_range(unsigned long size, unsigned flags,
int order, unsigned long start,
unsigned long range_size,
+ unsigned long scratch_size,
struct lock_class_key *key)
{
- return alloc_buffer(size, flags, order, start, start + range_size, key);
+ return alloc_buffer(size, flags, order, start, start + range_size,
+ scratch_size, key);
}
-/**
- * ring_buffer_last_boot_delta - return the delta offset from last boot
- * @buffer: The buffer to return the delta from
- * @text: Return text delta
- * @data: Return data delta
- *
- * Returns: The true if the delta is non zero
- */
-bool ring_buffer_last_boot_delta(struct trace_buffer *buffer, long *text,
- long *data)
+void *ring_buffer_meta_scratch(struct trace_buffer *buffer, unsigned int *size)
{
- if (!buffer)
- return false;
+ struct ring_buffer_meta *meta;
+ void *ptr;
- if (!buffer->last_text_delta)
- return false;
+ if (!buffer || !buffer->meta)
+ return NULL;
- *text = buffer->last_text_delta;
- *data = buffer->last_data_delta;
+ meta = buffer->meta;
- return true;
+ ptr = (void *)ALIGN((unsigned long)meta + sizeof(*meta), sizeof(long));
+
+ if (size)
+ *size = (void *)meta + meta->buffers_offset - ptr;
+
+ return ptr;
}
/**
@@ -3105,7 +3158,7 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
}
/* Return the index into the sub-buffers for a given sub-buffer */
-static int rb_meta_subbuf_idx(struct ring_buffer_meta *meta, void *subbuf)
+static int rb_meta_subbuf_idx(struct ring_buffer_cpu_meta *meta, void *subbuf)
{
void *subbuf_array;
@@ -3117,7 +3170,7 @@ static int rb_meta_subbuf_idx(struct ring_buffer_meta *meta, void *subbuf)
static void rb_update_meta_head(struct ring_buffer_per_cpu *cpu_buffer,
struct buffer_page *next_page)
{
- struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
+ struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
unsigned long old_head = (unsigned long)next_page->page;
unsigned long new_head;
@@ -3134,7 +3187,7 @@ static void rb_update_meta_head(struct ring_buffer_per_cpu *cpu_buffer,
static void rb_update_meta_reader(struct ring_buffer_per_cpu *cpu_buffer,
struct buffer_page *reader)
{
- struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
+ struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
void *old_reader = cpu_buffer->reader_page->page;
void *new_reader = reader->page;
int id;
@@ -3763,7 +3816,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
rb_page_write(cpu_buffer->commit_page));
rb_inc_page(&cpu_buffer->commit_page);
if (cpu_buffer->ring_meta) {
- struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
+ struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
meta->commit_buffer = (unsigned long)cpu_buffer->commit_page->page;
}
/* add barrier to keep gcc from optimizing too much */
@@ -5963,7 +6016,7 @@ static void rb_update_meta_page(struct ring_buffer_per_cpu *cpu_buffer)
meta->read = cpu_buffer->read;
/* Some archs do not have data cache coherency between kernel and user-space */
- flush_dcache_folio(virt_to_folio(cpu_buffer->meta_page));
+ flush_kernel_vmap_range(cpu_buffer->meta_page, PAGE_SIZE);
}
static void
@@ -6016,7 +6069,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
if (cpu_buffer->mapped) {
rb_update_meta_page(cpu_buffer);
if (cpu_buffer->ring_meta) {
- struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
+ struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
meta->commit_buffer = meta->head_buffer;
}
}
@@ -6050,7 +6103,6 @@ static void reset_disabled_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
- struct ring_buffer_meta *meta;
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return;
@@ -6069,11 +6121,6 @@ void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
atomic_dec(&cpu_buffer->record_disabled);
atomic_dec(&cpu_buffer->resize_disabled);
- /* Make sure persistent meta now uses this buffer's addresses */
- meta = rb_range_meta(buffer, 0, cpu_buffer->cpu);
- if (meta)
- rb_meta_init_text_addr(meta);
-
mutex_unlock(&buffer->mutex);
}
EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
@@ -6088,7 +6135,6 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
{
struct ring_buffer_per_cpu *cpu_buffer;
- struct ring_buffer_meta *meta;
int cpu;
/* prevent another thread from changing buffer sizes */
@@ -6116,11 +6162,6 @@ void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
reset_disabled_cpu_buffer(cpu_buffer);
- /* Make sure persistent meta now uses this buffer's addresses */
- meta = rb_range_meta(buffer, 0, cpu_buffer->cpu);
- if (meta)
- rb_meta_init_text_addr(meta);
-
atomic_dec(&cpu_buffer->record_disabled);
atomic_sub(RESET_BIT, &cpu_buffer->resize_disabled);
}
@@ -7278,7 +7319,8 @@ consume:
out:
/* Some archs do not have data cache coherency between kernel and user-space */
- flush_dcache_folio(virt_to_folio(cpu_buffer->reader_page->page));
+ flush_kernel_vmap_range(cpu_buffer->reader_page->page,
+ buffer->subbuf_size + BUF_PAGE_HDR_SIZE);
rb_update_meta_page(cpu_buffer);
@@ -7411,9 +7453,9 @@ static __init int rb_write_something(struct rb_test_data *data, bool nested)
/* Ignore dropped events before test starts. */
if (started) {
if (nested)
- data->bytes_dropped += len;
- else
data->bytes_dropped_nested += len;
+ else
+ data->bytes_dropped += len;
}
return len;
}
diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c
index 50344aa9f7f9..968c5c3b0246 100644
--- a/kernel/trace/rv/rv.c
+++ b/kernel/trace/rv/rv.c
@@ -809,7 +809,8 @@ int rv_register_monitor(struct rv_monitor *monitor, struct rv_monitor *parent)
if (p && rv_is_nested_monitor(p)) {
pr_info("Parent monitor %s is already nested, cannot nest further\n",
parent->name);
- return -EINVAL;
+ retval = -EINVAL;
+ goto out_unlock;
}
r = kzalloc(sizeof(struct rv_monitor_def), GFP_KERNEL);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 826267f5b650..b581e388a9d9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -49,6 +49,8 @@
#include <linux/fsnotify.h>
#include <linux/irq_work.h>
#include <linux/workqueue.h>
+#include <linux/sort.h>
+#include <linux/io.h> /* vmap_page_range() */
#include <asm/setup.h> /* COMMAND_LINE_SIZE */
@@ -3340,10 +3342,9 @@ out_nobuffer:
}
EXPORT_SYMBOL_GPL(trace_vbprintk);
-__printf(3, 0)
-static int
-__trace_array_vprintk(struct trace_buffer *buffer,
- unsigned long ip, const char *fmt, va_list args)
+static __printf(3, 0)
+int __trace_array_vprintk(struct trace_buffer *buffer,
+ unsigned long ip, const char *fmt, va_list args)
{
struct ring_buffer_event *event;
int len = 0, size;
@@ -3393,7 +3394,6 @@ out_nobuffer:
return len;
}
-__printf(3, 0)
int trace_array_vprintk(struct trace_array *tr,
unsigned long ip, const char *fmt, va_list args)
{
@@ -3423,7 +3423,6 @@ int trace_array_vprintk(struct trace_array *tr,
* Note, trace_array_init_printk() must be called on @tr before this
* can be used.
*/
-__printf(3, 0)
int trace_array_printk(struct trace_array *tr,
unsigned long ip, const char *fmt, ...)
{
@@ -3468,7 +3467,6 @@ int trace_array_init_printk(struct trace_array *tr)
}
EXPORT_SYMBOL_GPL(trace_array_init_printk);
-__printf(3, 4)
int trace_array_printk_buf(struct trace_buffer *buffer,
unsigned long ip, const char *fmt, ...)
{
@@ -3484,7 +3482,6 @@ int trace_array_printk_buf(struct trace_buffer *buffer,
return ret;
}
-__printf(2, 0)
int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
{
return trace_array_vprintk(printk_trace, ip, fmt, args);
@@ -4206,7 +4203,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
* safe to use if the array has delta offsets
* Force printing via the fields.
*/
- if ((tr->text_delta || tr->data_delta) &&
+ if ((tr->text_delta) &&
event->type > __TRACE_LAST_TYPE)
return print_event_fields(iter, event);
@@ -6001,11 +5998,130 @@ ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
return __tracing_resize_ring_buffer(tr, size, cpu_id);
}
+struct trace_mod_entry {
+ unsigned long mod_addr;
+ char mod_name[MODULE_NAME_LEN];
+};
+
+struct trace_scratch {
+ unsigned long text_addr;
+ unsigned long nr_entries;
+ struct trace_mod_entry entries[];
+};
+
+static DEFINE_MUTEX(scratch_mutex);
+
+static int cmp_mod_entry(const void *key, const void *pivot)
+{
+ unsigned long addr = (unsigned long)key;
+ const struct trace_mod_entry *ent = pivot;
+
+ if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
+ return 0;
+ else
+ return addr - ent->mod_addr;
+}
+
+/**
+ * trace_adjust_address() - Adjust prev boot address to current address.
+ * @tr: Persistent ring buffer's trace_array.
+ * @addr: Address in @tr which is adjusted.
+ */
+unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
+{
+ struct trace_module_delta *module_delta;
+ struct trace_scratch *tscratch;
+ struct trace_mod_entry *entry;
+ int idx = 0, nr_entries;
+
+ /* If we don't have last boot delta, return the address */
+ if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
+ return addr;
+
+ /* tr->module_delta must be protected by rcu. */
+ guard(rcu)();
+ tscratch = tr->scratch;
+ /* if there is no tscrach, module_delta must be NULL. */
+ module_delta = READ_ONCE(tr->module_delta);
+ if (!module_delta || tscratch->entries[0].mod_addr > addr)
+ return addr + tr->text_delta;
+
+ /* Note that entries must be sorted. */
+ nr_entries = tscratch->nr_entries;
+ if (nr_entries == 1 ||
+ tscratch->entries[nr_entries - 1].mod_addr < addr)
+ idx = nr_entries - 1;
+ else {
+ entry = __inline_bsearch((void *)addr,
+ tscratch->entries,
+ nr_entries - 1,
+ sizeof(tscratch->entries[0]),
+ cmp_mod_entry);
+ if (entry)
+ idx = entry - tscratch->entries;
+ }
+
+ return addr + module_delta->delta[idx];
+}
+
+#ifdef CONFIG_MODULES
+static int save_mod(struct module *mod, void *data)
+{
+ struct trace_array *tr = data;
+ struct trace_scratch *tscratch;
+ struct trace_mod_entry *entry;
+ unsigned int size;
+
+ tscratch = tr->scratch;
+ if (!tscratch)
+ return -1;
+ size = tr->scratch_size;
+
+ if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
+ return -1;
+
+ entry = &tscratch->entries[tscratch->nr_entries];
+
+ tscratch->nr_entries++;
+
+ entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
+ strscpy(entry->mod_name, mod->name);
+
+ return 0;
+}
+#else
+static int save_mod(struct module *mod, void *data)
+{
+ return 0;
+}
+#endif
+
static void update_last_data(struct trace_array *tr)
{
- if (!tr->text_delta && !tr->data_delta)
+ struct trace_module_delta *module_delta;
+ struct trace_scratch *tscratch;
+
+ if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
+ return;
+
+ if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
return;
+ /* Only if the buffer has previous boot data clear and update it. */
+ tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
+
+ /* Reset the module list and reload them */
+ if (tr->scratch) {
+ struct trace_scratch *tscratch = tr->scratch;
+
+ memset(tscratch->entries, 0,
+ flex_array_size(tscratch, entries, tscratch->nr_entries));
+ tscratch->nr_entries = 0;
+
+ guard(mutex)(&scratch_mutex);
+ module_for_each_mod(save_mod, tr);
+ }
+
/*
* Need to clear all CPU buffers as there cannot be events
* from the previous boot mixed with events with this boot
@@ -6016,7 +6132,17 @@ static void update_last_data(struct trace_array *tr)
/* Using current data now */
tr->text_delta = 0;
- tr->data_delta = 0;
+
+ if (!tr->scratch)
+ return;
+
+ tscratch = tr->scratch;
+ module_delta = READ_ONCE(tr->module_delta);
+ WRITE_ONCE(tr->module_delta, NULL);
+ kfree_rcu(module_delta, rcu);
+
+ /* Set the persistent ring buffer meta data to this address */
+ tscratch->text_addr = (unsigned long)_text;
}
/**
@@ -6825,19 +6951,102 @@ tracing_total_entries_read(struct file *filp, char __user *ubuf,
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
-static ssize_t
-tracing_last_boot_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+#define LAST_BOOT_HEADER ((void *)1)
+
+static void *l_next(struct seq_file *m, void *v, loff_t *pos)
{
- struct trace_array *tr = filp->private_data;
- struct seq_buf seq;
- char buf[64];
+ struct trace_array *tr = m->private;
+ struct trace_scratch *tscratch = tr->scratch;
+ unsigned int index = *pos;
+
+ (*pos)++;
+
+ if (*pos == 1)
+ return LAST_BOOT_HEADER;
+
+ /* Only show offsets of the last boot data */
+ if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
+ return NULL;
+
+ /* *pos 0 is for the header, 1 is for the first module */
+ index--;
+
+ if (index >= tscratch->nr_entries)
+ return NULL;
+
+ return &tscratch->entries[index];
+}
+
+static void *l_start(struct seq_file *m, loff_t *pos)
+{
+ mutex_lock(&scratch_mutex);
+
+ return l_next(m, NULL, pos);
+}
+
+static void l_stop(struct seq_file *m, void *p)
+{
+ mutex_unlock(&scratch_mutex);
+}
+
+static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
+{
+ struct trace_scratch *tscratch = tr->scratch;
+
+ /*
+ * Do not leak KASLR address. This only shows the KASLR address of
+ * the last boot. When the ring buffer is started, the LAST_BOOT
+ * flag gets cleared, and this should only report "current".
+ * Otherwise it shows the KASLR address from the previous boot which
+ * should not be the same as the current boot.
+ */
+ if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
+ seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
+ else
+ seq_puts(m, "# Current\n");
+}
+
+static int l_show(struct seq_file *m, void *v)
+{
+ struct trace_array *tr = m->private;
+ struct trace_mod_entry *entry = v;
+
+ if (v == LAST_BOOT_HEADER) {
+ show_last_boot_header(m, tr);
+ return 0;
+ }
+
+ seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
+ return 0;
+}
+
+static const struct seq_operations last_boot_seq_ops = {
+ .start = l_start,
+ .next = l_next,
+ .stop = l_stop,
+ .show = l_show,
+};
+
+static int tracing_last_boot_open(struct inode *inode, struct file *file)
+{
+ struct trace_array *tr = inode->i_private;
+ struct seq_file *m;
+ int ret;
- seq_buf_init(&seq, buf, 64);
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
- seq_buf_printf(&seq, "text delta:\t%ld\n", tr->text_delta);
- seq_buf_printf(&seq, "data delta:\t%ld\n", tr->data_delta);
+ ret = seq_open(file, &last_boot_seq_ops);
+ if (ret) {
+ trace_array_put(tr);
+ return ret;
+ }
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, seq_buf_used(&seq));
+ m = file->private_data;
+ m->private = tr;
+
+ return 0;
}
static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
@@ -7466,10 +7675,10 @@ static const struct file_operations trace_time_stamp_mode_fops = {
};
static const struct file_operations last_boot_fops = {
- .open = tracing_open_generic_tr,
- .read = tracing_last_boot_read,
- .llseek = generic_file_llseek,
- .release = tracing_release_generic_tr,
+ .open = tracing_last_boot_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = tracing_seq_release,
};
#ifdef CONFIG_TRACER_SNAPSHOT
@@ -8292,6 +8501,10 @@ static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
struct trace_iterator *iter = &info->iter;
int ret = 0;
+ /* A memmap'ed buffer is not supported for user space mmap */
+ if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP)
+ return -ENODEV;
+
/* Currently the boot mapped buffer is not supported for mmap */
if (iter->tr->flags & TRACE_ARRAY_FL_BOOT)
return -ENODEV;
@@ -9209,22 +9422,125 @@ static struct dentry *trace_instance_dir;
static void
init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
+#ifdef CONFIG_MODULES
+static int make_mod_delta(struct module *mod, void *data)
+{
+ struct trace_module_delta *module_delta;
+ struct trace_scratch *tscratch;
+ struct trace_mod_entry *entry;
+ struct trace_array *tr = data;
+ int i;
+
+ tscratch = tr->scratch;
+ module_delta = READ_ONCE(tr->module_delta);
+ for (i = 0; i < tscratch->nr_entries; i++) {
+ entry = &tscratch->entries[i];
+ if (strcmp(mod->name, entry->mod_name))
+ continue;
+ if (mod->state == MODULE_STATE_GOING)
+ module_delta->delta[i] = 0;
+ else
+ module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
+ - entry->mod_addr;
+ break;
+ }
+ return 0;
+}
+#else
+static int make_mod_delta(struct module *mod, void *data)
+{
+ return 0;
+}
+#endif
+
+static int mod_addr_comp(const void *a, const void *b, const void *data)
+{
+ const struct trace_mod_entry *e1 = a;
+ const struct trace_mod_entry *e2 = b;
+
+ return e1->mod_addr > e2->mod_addr ? 1 : -1;
+}
+
+static void setup_trace_scratch(struct trace_array *tr,
+ struct trace_scratch *tscratch, unsigned int size)
+{
+ struct trace_module_delta *module_delta;
+ struct trace_mod_entry *entry;
+ int i, nr_entries;
+
+ if (!tscratch)
+ return;
+
+ tr->scratch = tscratch;
+ tr->scratch_size = size;
+
+ if (tscratch->text_addr)
+ tr->text_delta = (unsigned long)_text - tscratch->text_addr;
+
+ if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
+ goto reset;
+
+ /* Check if each module name is a valid string */
+ for (i = 0; i < tscratch->nr_entries; i++) {
+ int n;
+
+ entry = &tscratch->entries[i];
+
+ for (n = 0; n < MODULE_NAME_LEN; n++) {
+ if (entry->mod_name[n] == '\0')
+ break;
+ if (!isprint(entry->mod_name[n]))
+ goto reset;
+ }
+ if (n == MODULE_NAME_LEN)
+ goto reset;
+ }
+
+ /* Sort the entries so that we can find appropriate module from address. */
+ nr_entries = tscratch->nr_entries;
+ sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
+ mod_addr_comp, NULL, NULL);
+
+ if (IS_ENABLED(CONFIG_MODULES)) {
+ module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
+ if (!module_delta) {
+ pr_info("module_delta allocation failed. Not able to decode module address.");
+ goto reset;
+ }
+ init_rcu_head(&module_delta->rcu);
+ } else
+ module_delta = NULL;
+ WRITE_ONCE(tr->module_delta, module_delta);
+
+ /* Scan modules to make text delta for modules. */
+ module_for_each_mod(make_mod_delta, tr);
+ return;
+ reset:
+ /* Invalid trace modules */
+ memset(tscratch, 0, size);
+}
+
static int
allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
{
enum ring_buffer_flags rb_flags;
+ struct trace_scratch *tscratch;
+ unsigned int scratch_size = 0;
rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
buf->tr = tr;
if (tr->range_addr_start && tr->range_addr_size) {
+ /* Add scratch buffer to handle 128 modules */
buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
tr->range_addr_start,
- tr->range_addr_size);
+ tr->range_addr_size,
+ struct_size(tscratch, entries, 128));
+
+ tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
+ setup_trace_scratch(tr, tscratch, scratch_size);
- ring_buffer_last_boot_delta(buf->buffer,
- &tr->text_delta, &tr->data_delta);
/*
* This is basically the same as a mapped buffer,
* with the same restrictions.
@@ -9293,6 +9609,7 @@ static void free_trace_buffers(struct trace_array *tr)
return;
free_trace_buffer(&tr->array_buffer);
+ kfree(tr->module_delta);
#ifdef CONFIG_TRACER_MAX_TRACE
free_trace_buffer(&tr->max_buffer);
@@ -9458,6 +9775,7 @@ trace_array_create_systems(const char *name, const char *systems,
free_cpumask_var(tr->pipe_cpumask);
free_cpumask_var(tr->tracing_cpumask);
kfree_const(tr->system_names);
+ kfree(tr->range_name);
kfree(tr->name);
kfree(tr);
@@ -9488,29 +9806,27 @@ static int instance_mkdir(const char *name)
return ret;
}
-static u64 map_pages(u64 start, u64 size)
+static u64 map_pages(unsigned long start, unsigned long size)
{
- struct page **pages;
- phys_addr_t page_start;
- unsigned int page_count;
- unsigned int i;
- void *vaddr;
-
- page_count = DIV_ROUND_UP(size, PAGE_SIZE);
+ unsigned long vmap_start, vmap_end;
+ struct vm_struct *area;
+ int ret;
- page_start = start;
- pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
- if (!pages)
+ area = get_vm_area(size, VM_IOREMAP);
+ if (!area)
return 0;
- for (i = 0; i < page_count; i++) {
- phys_addr_t addr = page_start + i * PAGE_SIZE;
- pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
+ vmap_start = (unsigned long) area->addr;
+ vmap_end = vmap_start + size;
+
+ ret = vmap_page_range(vmap_start, vmap_end,
+ start, pgprot_nx(PAGE_KERNEL));
+ if (ret < 0) {
+ free_vm_area(area);
+ return 0;
}
- vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL);
- kfree(pages);
- return (u64)(unsigned long)vaddr;
+ return (u64)vmap_start;
}
/**
@@ -9584,6 +9900,11 @@ static int __remove_instance(struct trace_array *tr)
free_trace_buffers(tr);
clear_tracing_err_log(tr);
+ if (tr->range_name) {
+ reserve_mem_release_by_name(tr->range_name);
+ kfree(tr->range_name);
+ }
+
for (i = 0; i < tr->nr_topts; i++) {
kfree(tr->topts[i].topts);
}
@@ -9905,6 +10226,24 @@ static void trace_module_remove_evals(struct module *mod)
static inline void trace_module_remove_evals(struct module *mod) { }
#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
+static void trace_module_record(struct module *mod, bool add)
+{
+ struct trace_array *tr;
+ unsigned long flags;
+
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+ flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
+ /* Update any persistent trace array that has already been started */
+ if (flags == TRACE_ARRAY_FL_BOOT && add) {
+ guard(mutex)(&scratch_mutex);
+ save_mod(mod, tr);
+ } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
+ /* Update delta if the module loaded in previous boot */
+ make_mod_delta(mod, tr);
+ }
+ }
+}
+
static int trace_module_notify(struct notifier_block *self,
unsigned long val, void *data)
{
@@ -9913,9 +10252,11 @@ static int trace_module_notify(struct notifier_block *self,
switch (val) {
case MODULE_STATE_COMING:
trace_module_add_evals(mod);
+ trace_module_record(mod, true);
break;
case MODULE_STATE_GOING:
trace_module_remove_evals(mod);
+ trace_module_record(mod, false);
break;
}
@@ -10364,6 +10705,7 @@ static inline void do_allocate_snapshot(const char *name) { }
__init static void enable_instances(void)
{
struct trace_array *tr;
+ bool memmap_area = false;
char *curr_str;
char *name;
char *str;
@@ -10381,6 +10723,7 @@ __init static void enable_instances(void)
bool traceoff = false;
char *flag_delim;
char *addr_delim;
+ char *rname __free(kfree) = NULL;
tok = strsep(&curr_str, ",");
@@ -10431,16 +10774,31 @@ __init static void enable_instances(void)
name);
continue;
}
+ memmap_area = true;
} else if (tok) {
if (!reserve_mem_find_by_name(tok, &start, &size)) {
start = 0;
pr_warn("Failed to map boot instance %s to %s\n", name, tok);
continue;
}
+ rname = kstrdup(tok, GFP_KERNEL);
}
if (start) {
- addr = map_pages(start, size);
+ /* Start and size must be page aligned */
+ if (start & ~PAGE_MASK) {
+ pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
+ continue;
+ }
+ if (size & ~PAGE_MASK) {
+ pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
+ continue;
+ }
+
+ if (memmap_area)
+ addr = map_pages(start, size);
+ else
+ addr = (unsigned long)phys_to_virt(start);
if (addr) {
pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
name, &start, (unsigned long)size);
@@ -10467,15 +10825,18 @@ __init static void enable_instances(void)
update_printk_trace(tr);
/*
- * If start is set, then this is a mapped buffer, and
- * cannot be deleted by user space, so keep the reference
- * to it.
+ * memmap'd buffers can not be freed.
*/
- if (start) {
- tr->flags |= TRACE_ARRAY_FL_BOOT;
+ if (memmap_area) {
+ tr->flags |= TRACE_ARRAY_FL_MEMMAP;
tr->ref++;
}
+ if (start) {
+ tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
+ tr->range_name = no_free_ptr(rname);
+ }
+
while ((tok = strsep(&curr_str, ","))) {
early_enable_events(tr, tok, true);
}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4a6621e2a0fa..79be1995db44 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -313,6 +313,11 @@ struct trace_func_repeats {
u64 ts_last_call;
};
+struct trace_module_delta {
+ struct rcu_head rcu;
+ long delta[];
+};
+
/*
* The trace array - an array of per-CPU trace arrays. This is the
* highest level data structure that individual tracers deal with.
@@ -349,8 +354,13 @@ struct trace_array {
unsigned int mapped;
unsigned long range_addr_start;
unsigned long range_addr_size;
+ char *range_name;
long text_delta;
- long data_delta;
+ struct trace_module_delta *module_delta;
+ void *scratch; /* pointer in persistent memory */
+ int scratch_size;
+
+ int buffer_disabled;
struct trace_pid_list __rcu *filtered_pids;
struct trace_pid_list __rcu *filtered_no_pids;
@@ -368,7 +378,6 @@ struct trace_array {
* CONFIG_TRACER_MAX_TRACE.
*/
arch_spinlock_t max_lock;
- int buffer_disabled;
#ifdef CONFIG_FTRACE_SYSCALLS
int sys_refcount_enter;
int sys_refcount_exit;
@@ -434,9 +443,11 @@ struct trace_array {
};
enum {
- TRACE_ARRAY_FL_GLOBAL = BIT(0),
- TRACE_ARRAY_FL_BOOT = BIT(1),
- TRACE_ARRAY_FL_MOD_INIT = BIT(2),
+ TRACE_ARRAY_FL_GLOBAL = BIT(0),
+ TRACE_ARRAY_FL_BOOT = BIT(1),
+ TRACE_ARRAY_FL_LAST_BOOT = BIT(2),
+ TRACE_ARRAY_FL_MOD_INIT = BIT(3),
+ TRACE_ARRAY_FL_MEMMAP = BIT(4),
};
#ifdef CONFIG_MODULES
@@ -463,6 +474,8 @@ extern int tracing_set_clock(struct trace_array *tr, const char *clockstr);
extern bool trace_clock_in_ns(struct trace_array *tr);
+extern unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr);
+
/*
* The global tracer (top) should be the first trace array added,
* but we check the flag anyway.
@@ -785,6 +798,8 @@ extern void trace_find_cmdline(int pid, char comm[]);
extern int trace_find_tgid(int pid);
extern void trace_event_follow_fork(struct trace_array *tr, bool enable);
+extern int trace_events_enabled(struct trace_array *tr, const char *system);
+
#ifdef CONFIG_DYNAMIC_FTRACE
extern unsigned long ftrace_update_tot_cnt;
extern unsigned long ftrace_number_of_pages;
@@ -838,13 +853,15 @@ static inline void __init disable_tracing_selftest(const char *reason)
extern void *head_page(struct trace_array_cpu *data);
extern unsigned long long ns2usecs(u64 nsec);
-extern int
-trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
-extern int
-trace_vprintk(unsigned long ip, const char *fmt, va_list args);
-extern int
-trace_array_vprintk(struct trace_array *tr,
- unsigned long ip, const char *fmt, va_list args);
+
+__printf(2, 0)
+int trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
+__printf(2, 0)
+int trace_vprintk(unsigned long ip, const char *fmt, va_list args);
+__printf(3, 0)
+int trace_array_vprintk(struct trace_array *tr,
+ unsigned long ip, const char *fmt, va_list args);
+__printf(3, 4)
int trace_array_printk_buf(struct trace_buffer *buffer,
unsigned long ip, const char *fmt, ...);
void trace_printk_seq(struct trace_seq *s);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 8e7603acca21..069e92856bda 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -470,6 +470,7 @@ static void test_event_printk(struct trace_event_call *call)
case '%':
continue;
case 'p':
+ do_pointer:
/* Find dereferencing fields */
switch (fmt[i + 1]) {
case 'B': case 'R': case 'r':
@@ -498,6 +499,12 @@ static void test_event_printk(struct trace_event_call *call)
continue;
if (fmt[i + j] == '*') {
star = true;
+ /* Handle %*pbl case */
+ if (!j && fmt[i + 1] == 'p') {
+ arg++;
+ i++;
+ goto do_pointer;
+ }
continue;
}
if ((fmt[i + j] == 's')) {
@@ -1820,28 +1827,28 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
return cnt;
}
-static ssize_t
-system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
- loff_t *ppos)
+/*
+ * Returns:
+ * 0 : no events exist?
+ * 1 : all events are disabled
+ * 2 : all events are enabled
+ * 3 : some events are enabled and some are enabled
+ */
+int trace_events_enabled(struct trace_array *tr, const char *system)
{
- const char set_to_char[4] = { '?', '0', '1', 'X' };
- struct trace_subsystem_dir *dir = filp->private_data;
- struct event_subsystem *system = dir->subsystem;
struct trace_event_call *call;
struct trace_event_file *file;
- struct trace_array *tr = dir->tr;
- char buf[2];
int set = 0;
- int ret;
- mutex_lock(&event_mutex);
+ guard(mutex)(&event_mutex);
+
list_for_each_entry(file, &tr->events, list) {
call = file->event_call;
if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
!trace_event_name(call) || !call->class || !call->class->reg)
continue;
- if (system && strcmp(call->class->system, system->name) != 0)
+ if (system && strcmp(call->class->system, system) != 0)
continue;
/*
@@ -1857,7 +1864,23 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
if (set == 3)
break;
}
- mutex_unlock(&event_mutex);
+
+ return set;
+}
+
+static ssize_t
+system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
+ loff_t *ppos)
+{
+ const char set_to_char[4] = { '?', '0', '1', 'X' };
+ struct trace_subsystem_dir *dir = filp->private_data;
+ struct event_subsystem *system = dir->subsystem;
+ struct trace_array *tr = dir->tr;
+ char buf[2];
+ int set;
+ int ret;
+
+ set = trace_events_enabled(tr, system ? system->name : NULL);
buf[0] = set_to_char[set];
buf[1] = '\n';
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 72b699f909e8..fee40ffbd490 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -5,6 +5,7 @@
* Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
*
*/
+#include "trace.h"
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/ftrace.h>
@@ -1340,7 +1341,6 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
struct trace_seq *s = &iter->seq;
unsigned long *p;
unsigned long *end;
- long delta = iter->tr->text_delta;
trace_assign_type(field, iter->ent);
end = (unsigned long *)((long)iter->ent + iter->ent_size);
@@ -1357,7 +1357,7 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
trace_seq_puts(s, "[FTRACE TRAMPOLINE]\n");
continue;
}
- seq_print_ip_sym(s, (*p) + delta, flags);
+ seq_print_ip_sym(s, trace_adjust_address(iter->tr, *p), flags);
trace_seq_putc(s, '\n');
}
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 86c5f1c0bad9..8686e329b8f2 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -11,11 +11,14 @@
struct ucounts init_ucounts = {
.ns = &init_user_ns,
.uid = GLOBAL_ROOT_UID,
- .count = ATOMIC_INIT(1),
+ .count = RCUREF_INIT(1),
};
#define UCOUNTS_HASHTABLE_BITS 10
-static struct hlist_head ucounts_hashtable[(1 << UCOUNTS_HASHTABLE_BITS)];
+#define UCOUNTS_HASHTABLE_ENTRIES (1 << UCOUNTS_HASHTABLE_BITS)
+static struct hlist_nulls_head ucounts_hashtable[UCOUNTS_HASHTABLE_ENTRIES] = {
+ [0 ... UCOUNTS_HASHTABLE_ENTRIES - 1] = HLIST_NULLS_HEAD_INIT(0)
+};
static DEFINE_SPINLOCK(ucounts_lock);
#define ucounts_hashfn(ns, uid) \
@@ -24,7 +27,6 @@ static DEFINE_SPINLOCK(ucounts_lock);
#define ucounts_hashentry(ns, uid) \
(ucounts_hashtable + ucounts_hashfn(ns, uid))
-
#ifdef CONFIG_SYSCTL
static struct ctl_table_set *
set_lookup(struct ctl_table_root *root)
@@ -127,88 +129,73 @@ void retire_userns_sysctls(struct user_namespace *ns)
#endif
}
-static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struct hlist_head *hashent)
+static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid,
+ struct hlist_nulls_head *hashent)
{
struct ucounts *ucounts;
+ struct hlist_nulls_node *pos;
- hlist_for_each_entry(ucounts, hashent, node) {
- if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns))
- return ucounts;
+ guard(rcu)();
+ hlist_nulls_for_each_entry_rcu(ucounts, pos, hashent, node) {
+ if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns)) {
+ if (rcuref_get(&ucounts->count))
+ return ucounts;
+ }
}
return NULL;
}
static void hlist_add_ucounts(struct ucounts *ucounts)
{
- struct hlist_head *hashent = ucounts_hashentry(ucounts->ns, ucounts->uid);
+ struct hlist_nulls_head *hashent = ucounts_hashentry(ucounts->ns, ucounts->uid);
+
spin_lock_irq(&ucounts_lock);
- hlist_add_head(&ucounts->node, hashent);
+ hlist_nulls_add_head_rcu(&ucounts->node, hashent);
spin_unlock_irq(&ucounts_lock);
}
-static inline bool get_ucounts_or_wrap(struct ucounts *ucounts)
+struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
{
- /* Returns true on a successful get, false if the count wraps. */
- return !atomic_add_negative(1, &ucounts->count);
-}
+ struct hlist_nulls_head *hashent = ucounts_hashentry(ns, uid);
+ struct ucounts *ucounts, *new;
-struct ucounts *get_ucounts(struct ucounts *ucounts)
-{
- if (!get_ucounts_or_wrap(ucounts)) {
- put_ucounts(ucounts);
- ucounts = NULL;
- }
- return ucounts;
-}
+ ucounts = find_ucounts(ns, uid, hashent);
+ if (ucounts)
+ return ucounts;
-struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
-{
- struct hlist_head *hashent = ucounts_hashentry(ns, uid);
- bool wrapped;
- struct ucounts *ucounts, *new = NULL;
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ new->ns = ns;
+ new->uid = uid;
+ rcuref_init(&new->count, 1);
spin_lock_irq(&ucounts_lock);
ucounts = find_ucounts(ns, uid, hashent);
- if (!ucounts) {
+ if (ucounts) {
spin_unlock_irq(&ucounts_lock);
-
- new = kzalloc(sizeof(*new), GFP_KERNEL);
- if (!new)
- return NULL;
-
- new->ns = ns;
- new->uid = uid;
- atomic_set(&new->count, 1);
-
- spin_lock_irq(&ucounts_lock);
- ucounts = find_ucounts(ns, uid, hashent);
- if (!ucounts) {
- hlist_add_head(&new->node, hashent);
- get_user_ns(new->ns);
- spin_unlock_irq(&ucounts_lock);
- return new;
- }
+ kfree(new);
+ return ucounts;
}
- wrapped = !get_ucounts_or_wrap(ucounts);
+ hlist_nulls_add_head_rcu(&new->node, hashent);
+ get_user_ns(new->ns);
spin_unlock_irq(&ucounts_lock);
- kfree(new);
- if (wrapped) {
- put_ucounts(ucounts);
- return NULL;
- }
- return ucounts;
+ return new;
}
void put_ucounts(struct ucounts *ucounts)
{
unsigned long flags;
- if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) {
- hlist_del_init(&ucounts->node);
+ if (rcuref_put(&ucounts->count)) {
+ spin_lock_irqsave(&ucounts_lock, flags);
+ hlist_nulls_del_rcu(&ucounts->node);
spin_unlock_irqrestore(&ucounts_lock, flags);
+
put_user_ns(ucounts->ns);
- kfree(ucounts);
+ kfree_rcu(ucounts, rcu);
}
}
diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c
index a78ff092d636..75af12ff774e 100644
--- a/kernel/watchdog_perf.c
+++ b/kernel/watchdog_perf.c
@@ -269,12 +269,10 @@ void __init hardlockup_config_perf_event(const char *str)
} else {
unsigned int len = comma - str;
- if (len >= sizeof(buf))
+ if (len > sizeof(buf))
return;
- if (strscpy(buf, str, sizeof(buf)) < 0)
- return;
- buf[len] = 0;
+ strscpy(buf, str, len);
if (kstrtoull(buf, 16, &config))
return;
}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index bfe030b443e2..cf6203282737 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2057,11 +2057,11 @@ static int try_to_grab_pending(struct work_struct *work, u32 cflags,
struct delayed_work *dwork = to_delayed_work(work);
/*
- * dwork->timer is irqsafe. If del_timer() fails, it's
+ * dwork->timer is irqsafe. If timer_delete() fails, it's
* guaranteed that the timer is not queued anywhere and not
* running on the local CPU.
*/
- if (likely(del_timer(&dwork->timer)))
+ if (likely(timer_delete(&dwork->timer)))
return 1;
}
@@ -3069,7 +3069,7 @@ restart:
break;
}
- del_timer_sync(&pool->mayday_timer);
+ timer_delete_sync(&pool->mayday_timer);
raw_spin_lock_irq(&pool->lock);
/*
* This is necessary even after a new worker was just successfully
@@ -4281,7 +4281,7 @@ EXPORT_SYMBOL_GPL(flush_work);
bool flush_delayed_work(struct delayed_work *dwork)
{
local_irq_disable();
- if (del_timer_sync(&dwork->timer))
+ if (timer_delete_sync(&dwork->timer))
__queue_work(dwork->cpu, dwork->wq, &dwork->work);
local_irq_enable();
return flush_work(&dwork->work);
@@ -4984,9 +4984,9 @@ static void put_unbound_pool(struct worker_pool *pool)
reap_dying_workers(&cull_list);
/* shut down the timers */
- del_timer_sync(&pool->idle_timer);
+ timer_delete_sync(&pool->idle_timer);
cancel_work_sync(&pool->idle_cull_work);
- del_timer_sync(&pool->mayday_timer);
+ timer_delete_sync(&pool->mayday_timer);
/* RCU protected to allow dereferences from get_work_pool() */
call_rcu(&pool->rcu, rcu_free_pool);
@@ -7637,7 +7637,7 @@ notrace void wq_watchdog_touch(int cpu)
static void wq_watchdog_set_thresh(unsigned long thresh)
{
wq_watchdog_thresh = 0;
- del_timer_sync(&wq_watchdog_timer);
+ timer_delete_sync(&wq_watchdog_timer);
if (thresh) {
wq_watchdog_thresh = thresh;