summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup/cgroup.c2
-rw-r--r--kernel/debug/debug_core.c14
-rw-r--r--kernel/debug/kdb/kdb_io.c4
-rw-r--r--kernel/debug/kdb/kdb_main.c85
-rw-r--r--kernel/events/core.c18
-rw-r--r--kernel/events/uprobes.c3
-rw-r--r--kernel/exit.c11
-rwxr-xr-xkernel/gen_kheaders.sh1
-rw-r--r--kernel/irq/irqdomain.c14
-rw-r--r--kernel/irq/migration.c2
-rw-r--r--kernel/kcsan/kcsan_test.c2
-rw-r--r--kernel/kthread.c4
-rw-r--r--kernel/panic.c6
-rw-r--r--kernel/rcu/Kconfig2
-rw-r--r--kernel/rcu/rcutorture.c2
-rw-r--r--kernel/rcu/srcutree.c2
-rw-r--r--kernel/rcu/tasks.h2
-rw-r--r--kernel/rcu/tree_nocb.h4
-rw-r--r--kernel/sched/core.c1
-rw-r--r--kernel/sched/ext.c8
-rw-r--r--kernel/sched/ext_idle.c12
-rw-r--r--kernel/sched/psi.c2
-rw-r--r--kernel/signal.c8
-rw-r--r--kernel/time/clocksource.c2
-rw-r--r--kernel/time/hrtimer.c71
-rw-r--r--kernel/time/sleep_timeout.c2
-rw-r--r--kernel/time/timekeeping.c94
-rw-r--r--kernel/time/timer.c8
-rw-r--r--kernel/time/timer_list.c2
-rw-r--r--kernel/trace/Kconfig3
-rw-r--r--kernel/trace/ftrace.c1
-rw-r--r--kernel/trace/ring_buffer.c5
-rw-r--r--kernel/trace/rv/rv.c3
-rw-r--r--kernel/trace/trace.c78
-rw-r--r--kernel/trace/trace.h17
-rw-r--r--kernel/trace/trace_events.c7
-rw-r--r--kernel/workqueue.c14
37 files changed, 196 insertions, 320 deletions
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index ac2db99941ca..27f08aa17b56 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1695,7 +1695,7 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
cfile->kn = NULL;
spin_unlock_irq(&cgroup_file_kn_lock);
- del_timer_sync(&cfile->notify_timer);
+ timer_delete_sync(&cfile->notify_timer);
}
kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index ce1bb2301c06..0b9495187fba 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -837,10 +837,6 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
{
struct kgdb_state kgdb_var;
struct kgdb_state *ks = &kgdb_var;
- int ret = 0;
-
- if (arch_kgdb_ops.enable_nmi)
- arch_kgdb_ops.enable_nmi(0);
/*
* Avoid entering the debugger if we were triggered due to an oops
* but panic_timeout indicates the system should automatically
@@ -858,15 +854,11 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
ks->linux_regs = regs;
if (kgdb_reenter_check(ks))
- goto out; /* Ouch, double exception ! */
+ return 0; /* Ouch, double exception ! */
if (kgdb_info[ks->cpu].enter_kgdb != 0)
- goto out;
+ return 0;
- ret = kgdb_cpu_enter(ks, regs, DCPU_WANT_MASTER);
-out:
- if (arch_kgdb_ops.enable_nmi)
- arch_kgdb_ops.enable_nmi(1);
- return ret;
+ return kgdb_cpu_enter(ks, regs, DCPU_WANT_MASTER);
}
NOKPROBE_SYMBOL(kgdb_handle_exception);
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 6a77f1c779c4..9b11b10b120c 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -334,7 +334,7 @@ poll_again:
*cp = '\0';
p_tmp = strrchr(buffer, ' ');
p_tmp = (p_tmp ? p_tmp + 1 : buffer);
- strscpy(tmpbuffer, p_tmp, sizeof(tmpbuffer));
+ strscpy(tmpbuffer, p_tmp);
*cp = tmp;
len = strlen(tmpbuffer);
@@ -452,7 +452,7 @@ poll_again:
char *kdb_getstr(char *buffer, size_t bufsize, const char *prompt)
{
if (prompt && kdb_prompt_str != prompt)
- strscpy(kdb_prompt_str, prompt, CMD_BUFLEN);
+ strscpy(kdb_prompt_str, prompt);
kdb_printf("%s", kdb_prompt_str);
kdb_nextline = 1; /* Prompt and input resets line number */
return kdb_read(buffer, bufsize);
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 5f4be507d79f..7a4d2d4689a5 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -25,7 +25,6 @@
#include <linux/smp.h>
#include <linux/utsname.h>
#include <linux/vmalloc.h>
-#include <linux/atomic.h>
#include <linux/moduleparam.h>
#include <linux/mm.h>
#include <linux/init.h>
@@ -105,7 +104,7 @@ static kdbmsg_t kdbmsgs[] = {
KDBMSG(NOENVVALUE, "Environment variable should have value"),
KDBMSG(NOTIMP, "Command not implemented"),
KDBMSG(ENVFULL, "Environment full"),
- KDBMSG(ENVBUFFULL, "Environment buffer full"),
+ KDBMSG(KMALLOCFAILED, "Failed to allocate memory"),
KDBMSG(TOOMANYBPT, "Too many breakpoints defined"),
#ifdef CONFIG_CPU_XSCALE
KDBMSG(TOOMANYDBREGS, "More breakpoints than ibcr registers defined"),
@@ -130,13 +129,9 @@ static const int __nkdb_err = ARRAY_SIZE(kdbmsgs);
/*
- * Initial environment. This is all kept static and local to
- * this file. We don't want to rely on the memory allocation
- * mechanisms in the kernel, so we use a very limited allocate-only
- * heap for new and altered environment variables. The entire
- * environment is limited to a fixed number of entries (add more
- * to __env[] if required) and a fixed amount of heap (add more to
- * KDB_ENVBUFSIZE if required).
+ * Initial environment. This is all kept static and local to this file.
+ * The entire environment is limited to a fixed number of entries
+ * (add more to __env[] if required)
*/
static char *__env[31] = {
@@ -259,35 +254,6 @@ char *kdbgetenv(const char *match)
}
/*
- * kdballocenv - This function is used to allocate bytes for
- * environment entries.
- * Parameters:
- * bytes The number of bytes to allocate in the static buffer.
- * Returns:
- * A pointer to the allocated space in the buffer on success.
- * NULL if bytes > size available in the envbuffer.
- * Remarks:
- * We use a static environment buffer (envbuffer) to hold the values
- * of dynamically generated environment variables (see kdb_set). Buffer
- * space once allocated is never free'd, so over time, the amount of space
- * (currently 512 bytes) will be exhausted if env variables are changed
- * frequently.
- */
-static char *kdballocenv(size_t bytes)
-{
-#define KDB_ENVBUFSIZE 512
- static char envbuffer[KDB_ENVBUFSIZE];
- static int envbufsize;
- char *ep = NULL;
-
- if ((KDB_ENVBUFSIZE - envbufsize) >= bytes) {
- ep = &envbuffer[envbufsize];
- envbufsize += bytes;
- }
- return ep;
-}
-
-/*
* kdbgetulenv - This function will return the value of an unsigned
* long-valued environment variable.
* Parameters:
@@ -348,9 +314,9 @@ static int kdb_setenv(const char *var, const char *val)
varlen = strlen(var);
vallen = strlen(val);
- ep = kdballocenv(varlen + vallen + 2);
- if (ep == (char *)0)
- return KDB_ENVBUFFULL;
+ ep = kmalloc(varlen + vallen + 2, GFP_KDB);
+ if (!ep)
+ return KDB_KMALLOCFAILED;
sprintf(ep, "%s=%s", var, val);
@@ -359,6 +325,7 @@ static int kdb_setenv(const char *var, const char *val)
&& ((strncmp(__env[i], var, varlen) == 0)
&& ((__env[i][varlen] == '\0')
|| (__env[i][varlen] == '=')))) {
+ kfree_const(__env[i]);
__env[i] = ep;
return 0;
}
@@ -2119,32 +2086,6 @@ static int kdb_dmesg(int argc, const char **argv)
return 0;
}
#endif /* CONFIG_PRINTK */
-
-/* Make sure we balance enable/disable calls, must disable first. */
-static atomic_t kdb_nmi_disabled;
-
-static int kdb_disable_nmi(int argc, const char *argv[])
-{
- if (atomic_read(&kdb_nmi_disabled))
- return 0;
- atomic_set(&kdb_nmi_disabled, 1);
- arch_kgdb_ops.enable_nmi(0);
- return 0;
-}
-
-static int kdb_param_enable_nmi(const char *val, const struct kernel_param *kp)
-{
- if (!atomic_add_unless(&kdb_nmi_disabled, -1, 0))
- return -EINVAL;
- arch_kgdb_ops.enable_nmi(1);
- return 0;
-}
-
-static const struct kernel_param_ops kdb_param_ops_enable_nmi = {
- .set = kdb_param_enable_nmi,
-};
-module_param_cb(enable_nmi, &kdb_param_ops_enable_nmi, NULL, 0600);
-
/*
* kdb_cpu - This function implements the 'cpu' command.
* cpu [<cpunum>]
@@ -2836,20 +2777,10 @@ static kdbtab_t maintab[] = {
},
};
-static kdbtab_t nmicmd = {
- .name = "disable_nmi",
- .func = kdb_disable_nmi,
- .usage = "",
- .help = "Disable NMI entry to KDB",
- .flags = KDB_ENABLE_ALWAYS_SAFE,
-};
-
/* Initialize the kdb command table. */
static void __init kdb_inittab(void)
{
kdb_register_table(maintab, ARRAY_SIZE(maintab));
- if (arch_kgdb_ops.enable_nmi)
- kdb_register_table(&nmicmd, 1);
}
/* Execute any commands defined in kdb_cmds. */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0bb21659e252..128db74e9eab 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2451,6 +2451,7 @@ ctx_time_update_event(struct perf_event_context *ctx, struct perf_event *event)
#define DETACH_GROUP 0x01UL
#define DETACH_CHILD 0x02UL
#define DETACH_DEAD 0x04UL
+#define DETACH_EXIT 0x08UL
/*
* Cross CPU call to remove a performance event
@@ -2465,6 +2466,7 @@ __perf_remove_from_context(struct perf_event *event,
void *info)
{
struct perf_event_pmu_context *pmu_ctx = event->pmu_ctx;
+ enum perf_event_state state = PERF_EVENT_STATE_OFF;
unsigned long flags = (unsigned long)info;
ctx_time_update(cpuctx, ctx);
@@ -2473,16 +2475,19 @@ __perf_remove_from_context(struct perf_event *event,
* Ensure event_sched_out() switches to OFF, at the very least
* this avoids raising perf_pending_task() at this time.
*/
- if (flags & DETACH_DEAD)
+ if (flags & DETACH_EXIT)
+ state = PERF_EVENT_STATE_EXIT;
+ if (flags & DETACH_DEAD) {
event->pending_disable = 1;
+ state = PERF_EVENT_STATE_DEAD;
+ }
event_sched_out(event, ctx);
+ perf_event_set_state(event, min(event->state, state));
if (flags & DETACH_GROUP)
perf_group_detach(event);
if (flags & DETACH_CHILD)
perf_child_detach(event);
list_del_event(event, ctx);
- if (flags & DETACH_DEAD)
- event->state = PERF_EVENT_STATE_DEAD;
if (!pmu_ctx->nr_events) {
pmu_ctx->rotate_necessary = 0;
@@ -13731,12 +13736,7 @@ perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
mutex_lock(&parent_event->child_mutex);
}
- perf_remove_from_context(event, detach_flags);
-
- raw_spin_lock_irq(&ctx->lock);
- if (event->state > PERF_EVENT_STATE_EXIT)
- perf_event_set_state(event, PERF_EVENT_STATE_EXIT);
- raw_spin_unlock_irq(&ctx->lock);
+ perf_remove_from_context(event, detach_flags | DETACH_EXIT);
/*
* Child events can be freed.
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 2746791ce1e2..615b4e6d22c7 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1703,7 +1703,8 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
}
vma = _install_special_mapping(mm, area->vaddr, PAGE_SIZE,
- VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO,
+ VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO|
+ VM_SEALED_SYSMAP,
&xol_mapping);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
diff --git a/kernel/exit.c b/kernel/exit.c
index c2e6c7b7779f..1b51dc099f1e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -268,6 +268,9 @@ repeat:
leader = p->group_leader;
if (leader != p && thread_group_empty(leader)
&& leader->exit_state == EXIT_ZOMBIE) {
+ /* for pidfs_exit() and do_notify_parent() */
+ if (leader->signal->flags & SIGNAL_GROUP_EXIT)
+ leader->exit_code = leader->signal->group_exit_code;
/*
* If we were the last child thread and the leader has
* exited already, and the leader's parent ignores SIGCHLD,
@@ -756,12 +759,6 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
kill_orphaned_pgrp(tsk->group_leader, NULL);
tsk->exit_state = EXIT_ZOMBIE;
- /*
- * Ignore thread-group leaders that exited before all
- * subthreads did.
- */
- if (!delay_group_leader(tsk))
- do_notify_pidfd(tsk);
if (unlikely(tsk->ptrace)) {
int sig = thread_group_leader(tsk) &&
@@ -774,6 +771,8 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
do_notify_parent(tsk, tsk->exit_signal);
} else {
autoreap = true;
+ /* untraced sub-thread */
+ do_notify_pidfd(tsk);
}
if (autoreap) {
diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh
index 00529c81cc40..c9e5dc068e85 100755
--- a/kernel/gen_kheaders.sh
+++ b/kernel/gen_kheaders.sh
@@ -89,7 +89,6 @@ rm -f "${tmpdir}.contents.txt"
# Create archive and try to normalize metadata for reproducibility.
tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \
- --exclude=".__afs*" --exclude=".nfs*" \
--owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \
-I $XZ -cf $tarfile -C "${tmpdir}/" . > /dev/null
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 2861f89880af..9d5c8651492d 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -398,7 +398,7 @@ void irq_domain_remove(struct irq_domain *domain)
* If the going away domain is the default one, reset it.
*/
if (unlikely(irq_default_domain == domain))
- irq_set_default_host(NULL);
+ irq_set_default_domain(NULL);
mutex_unlock(&irq_domain_mutex);
@@ -573,7 +573,7 @@ struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
EXPORT_SYMBOL_GPL(irq_find_matching_fwspec);
/**
- * irq_set_default_host() - Set a "default" irq domain
+ * irq_set_default_domain() - Set a "default" irq domain
* @domain: default domain pointer
*
* For convenience, it's possible to set a "default" domain that will be used
@@ -581,16 +581,16 @@ EXPORT_SYMBOL_GPL(irq_find_matching_fwspec);
* platforms that want to manipulate a few hard coded interrupt numbers that
* aren't properly represented in the device-tree.
*/
-void irq_set_default_host(struct irq_domain *domain)
+void irq_set_default_domain(struct irq_domain *domain)
{
pr_debug("Default domain set to @0x%p\n", domain);
irq_default_domain = domain;
}
-EXPORT_SYMBOL_GPL(irq_set_default_host);
+EXPORT_SYMBOL_GPL(irq_set_default_domain);
/**
- * irq_get_default_host() - Retrieve the "default" irq domain
+ * irq_get_default_domain() - Retrieve the "default" irq domain
*
* Returns: the default domain, if any.
*
@@ -598,11 +598,11 @@ EXPORT_SYMBOL_GPL(irq_set_default_host);
* systems that cannot implement a firmware->fwnode mapping (which
* both DT and ACPI provide).
*/
-struct irq_domain *irq_get_default_host(void)
+struct irq_domain *irq_get_default_domain(void)
{
return irq_default_domain;
}
-EXPORT_SYMBOL_GPL(irq_get_default_host);
+EXPORT_SYMBOL_GPL(irq_get_default_domain);
static bool irq_domain_is_nomap(struct irq_domain *domain)
{
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 147cabb4c077..f2b2929986ff 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -37,7 +37,7 @@ bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear)
void irq_force_complete_move(struct irq_desc *desc)
{
- for (struct irq_data *d = irq_desc_get_irq_data(desc); d; d = d->parent_data) {
+ for (struct irq_data *d = irq_desc_get_irq_data(desc); d; d = irqd_get_parent_data(d)) {
if (d->chip && d->chip->irq_force_complete_move) {
d->chip->irq_force_complete_move(d);
return;
diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c
index 117d9d4d3c3b..6ce73cceaf53 100644
--- a/kernel/kcsan/kcsan_test.c
+++ b/kernel/kcsan/kcsan_test.c
@@ -1500,7 +1500,7 @@ static int access_thread(void *arg)
func();
}
} while (!torture_must_stop());
- del_timer_sync(&timer);
+ timer_delete_sync(&timer);
destroy_timer_on_stack(&timer);
torture_kthread_stopping("access_thread");
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 5dc5b0d7238e..77c44924cf54 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -1362,14 +1362,14 @@ static void kthread_cancel_delayed_work_timer(struct kthread_work *work,
struct kthread_worker *worker = work->worker;
/*
- * del_timer_sync() must be called to make sure that the timer
+ * timer_delete_sync() must be called to make sure that the timer
* callback is not running. The lock must be temporary released
* to avoid a deadlock with the callback. In the meantime,
* any queuing is blocked by setting the canceling counter.
*/
work->canceling++;
raw_spin_unlock_irqrestore(&worker->lock, *flags);
- del_timer_sync(&dwork->timer);
+ timer_delete_sync(&dwork->timer);
raw_spin_lock_irqsave(&worker->lock, *flags);
work->canceling--;
}
diff --git a/kernel/panic.c b/kernel/panic.c
index 0c55eec9e874..a3889f38153d 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -833,9 +833,15 @@ device_initcall(register_warn_debugfs);
*/
__visible noinstr void __stack_chk_fail(void)
{
+ unsigned long flags;
+
instrumentation_begin();
+ flags = user_access_save();
+
panic("stack-protector: Kernel stack is corrupted in: %pB",
__builtin_return_address(0));
+
+ user_access_restore(flags);
instrumentation_end();
}
EXPORT_SYMBOL(__stack_chk_fail);
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index aa42de4d2768..4d9b21f69eaa 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -68,6 +68,8 @@ config TREE_SRCU
config FORCE_NEED_SRCU_NMI_SAFE
bool "Force selection of NEED_SRCU_NMI_SAFE"
depends on !TINY_SRCU
+ depends on RCU_EXPERT
+ depends on ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select NEED_SRCU_NMI_SAFE
default n
help
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 65095664f5c5..4fa7772be183 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -2324,7 +2324,7 @@ rcu_torture_reader(void *arg)
stutter_wait("rcu_torture_reader");
} while (!torture_must_stop());
if (irqreader && cur_ops->irq_capable) {
- del_timer_sync(&t);
+ timer_delete_sync(&t);
destroy_timer_on_stack(&t);
}
tick_dep_clear_task(current, TICK_DEP_BIT_RCU);
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index d2a694944553..9a59b071501b 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -690,7 +690,7 @@ void cleanup_srcu_struct(struct srcu_struct *ssp)
for_each_possible_cpu(cpu) {
struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
- del_timer_sync(&sdp->delay_work);
+ timer_delete_sync(&sdp->delay_work);
flush_work(&sdp->work);
if (WARN_ON(rcu_segcblist_n_cbs(&sdp->srcu_cblist)))
return; /* Forgot srcu_barrier(), so just leak it! */
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 466668eb4fad..c0cc7ae41106 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -1086,7 +1086,7 @@ static void rcu_tasks_postscan(struct list_head *hop)
}
if (!IS_ENABLED(CONFIG_TINY_RCU))
- del_timer_sync(&tasks_rcu_exit_srcu_stall_timer);
+ timer_delete_sync(&tasks_rcu_exit_srcu_stall_timer);
}
/* See if tasks are still holding out, complain if so. */
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 5ff3bc56ff51..fa269d34167a 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -206,7 +206,7 @@ static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
if (rdp_gp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
WRITE_ONCE(rdp_gp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
- del_timer(&rdp_gp->nocb_timer);
+ timer_delete(&rdp_gp->nocb_timer);
}
if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) {
@@ -822,7 +822,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
if (my_rdp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
- del_timer(&my_rdp->nocb_timer);
+ timer_delete(&my_rdp->nocb_timer);
}
WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index cfaca3040b2f..c81cf642dba0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -10703,7 +10703,6 @@ void sched_mm_cid_after_execve(struct task_struct *t)
smp_mb();
t->last_mm_cid = t->mm_cid = mm_cid_get(rq, t, mm);
}
- rseq_set_notify_resume(t);
}
void sched_mm_cid_fork(struct task_struct *t)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 21575d39c376..66bcd40a28ca 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4171,8 +4171,8 @@ static struct scx_dispatch_q *create_dsq(u64 dsq_id, int node)
init_dsq(dsq, dsq_id);
- ret = rhashtable_insert_fast(&dsq_hash, &dsq->hash_node,
- dsq_hash_params);
+ ret = rhashtable_lookup_insert_fast(&dsq_hash, &dsq->hash_node,
+ dsq_hash_params);
if (ret) {
kfree(dsq);
return ERR_PTR(ret);
@@ -5361,6 +5361,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
*/
cpus_read_lock();
+ scx_idle_enable(ops);
+
if (scx_ops.init) {
ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init);
if (ret) {
@@ -5427,8 +5429,6 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
if (scx_ops.cpu_acquire || scx_ops.cpu_release)
static_branch_enable(&scx_ops_cpu_preempt);
- scx_idle_enable(ops);
-
/*
* Lock out forks, cgroup on/offlining and moves before opening the
* floodgate so that they don't wander into the operations prematurely.
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
index 52c36a70a3d0..cb343ca889e0 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -544,7 +544,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
* core.
*/
if (flags & SCX_PICK_IDLE_CORE) {
- cpu = prev_cpu;
+ cpu = -EBUSY;
goto out_unlock;
}
}
@@ -584,8 +584,6 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
* increasing distance.
*/
cpu = scx_pick_idle_cpu(p->cpus_ptr, node, flags);
- if (cpu >= 0)
- goto out_unlock;
out_unlock:
rcu_read_unlock();
@@ -723,14 +721,14 @@ static void reset_idle_masks(struct sched_ext_ops *ops)
void scx_idle_enable(struct sched_ext_ops *ops)
{
if (!ops->update_idle || (ops->flags & SCX_OPS_KEEP_BUILTIN_IDLE))
- static_branch_enable(&scx_builtin_idle_enabled);
+ static_branch_enable_cpuslocked(&scx_builtin_idle_enabled);
else
- static_branch_disable(&scx_builtin_idle_enabled);
+ static_branch_disable_cpuslocked(&scx_builtin_idle_enabled);
if (ops->flags & SCX_OPS_BUILTIN_IDLE_PER_NODE)
- static_branch_enable(&scx_builtin_idle_per_node);
+ static_branch_enable_cpuslocked(&scx_builtin_idle_per_node);
else
- static_branch_disable(&scx_builtin_idle_per_node);
+ static_branch_disable_cpuslocked(&scx_builtin_idle_per_node);
#ifdef CONFIG_SMP
reset_idle_masks(ops);
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index bb56805e3d47..1396674fa722 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -1440,7 +1440,7 @@ void psi_trigger_destroy(struct psi_trigger *t)
group->rtpoll_task,
lockdep_is_held(&group->rtpoll_trigger_lock));
rcu_assign_pointer(group->rtpoll_task, NULL);
- del_timer(&group->rtpoll_timer);
+ timer_delete(&group->rtpoll_timer);
}
}
mutex_unlock(&group->rtpoll_trigger_lock);
diff --git a/kernel/signal.c b/kernel/signal.c
index 614d78fe3451..f8859faa26c5 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2180,11 +2180,9 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
WARN_ON_ONCE(!tsk->ptrace &&
(tsk->group_leader != tsk || !thread_group_empty(tsk)));
- /*
- * Notify for thread-group leaders without subthreads.
- */
- if (thread_group_empty(tsk))
- do_notify_pidfd(tsk);
+
+ /* ptraced, or group-leader without sub-threads */
+ do_notify_pidfd(tsk);
if (sig != SIGCHLD) {
/*
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index e0eeacbe2521..bb48498ebb5a 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -619,7 +619,7 @@ static inline void clocksource_stop_watchdog(void)
{
if (!watchdog_running || (watchdog && !list_empty(&watchdog_list)))
return;
- del_timer(&watchdog_timer);
+ timer_delete(&watchdog_timer);
watchdog_running = 0;
}
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 22376a1a75b9..517ee2590a29 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -465,19 +465,17 @@ static inline void debug_hrtimer_activate(struct hrtimer *timer,
static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
#endif
-static inline void
-debug_init(struct hrtimer *timer, clockid_t clockid,
- enum hrtimer_mode mode)
+static inline void debug_setup(struct hrtimer *timer, clockid_t clockid, enum hrtimer_mode mode)
{
debug_hrtimer_init(timer);
- trace_hrtimer_init(timer, clockid, mode);
+ trace_hrtimer_setup(timer, clockid, mode);
}
-static inline void debug_init_on_stack(struct hrtimer *timer, clockid_t clockid,
- enum hrtimer_mode mode)
+static inline void debug_setup_on_stack(struct hrtimer *timer, clockid_t clockid,
+ enum hrtimer_mode mode)
{
debug_hrtimer_init_on_stack(timer);
- trace_hrtimer_init(timer, clockid, mode);
+ trace_hrtimer_setup(timer, clockid, mode);
}
static inline void debug_activate(struct hrtimer *timer,
@@ -1316,8 +1314,6 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
struct hrtimer_clock_base *base;
unsigned long flags;
- if (WARN_ON_ONCE(!timer->function))
- return;
/*
* Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft
* match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard
@@ -1429,7 +1425,7 @@ static __always_inline bool is_migration_base(struct hrtimer_clock_base *base)
* running.
*
* This prevents priority inversion: if the soft irq thread is preempted
- * in the middle of a timer callback, then calling del_timer_sync() can
+ * in the middle of a timer callback, then calling hrtimer_cancel() can
* lead to two issues:
*
* - If the caller is on a remote CPU then it has to spin wait for the timer
@@ -1592,8 +1588,9 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id)
}
}
-static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
- enum hrtimer_mode mode)
+static void __hrtimer_setup(struct hrtimer *timer,
+ enum hrtimer_restart (*function)(struct hrtimer *),
+ clockid_t clock_id, enum hrtimer_mode mode)
{
bool softtimer = !!(mode & HRTIMER_MODE_SOFT);
struct hrtimer_cpu_base *cpu_base;
@@ -1626,39 +1623,12 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
timer->is_hard = !!(mode & HRTIMER_MODE_HARD);
timer->base = &cpu_base->clock_base[base];
timerqueue_init(&timer->node);
-}
-
-static void __hrtimer_setup(struct hrtimer *timer,
- enum hrtimer_restart (*function)(struct hrtimer *),
- clockid_t clock_id, enum hrtimer_mode mode)
-{
- __hrtimer_init(timer, clock_id, mode);
if (WARN_ON_ONCE(!function))
- timer->function = hrtimer_dummy_timeout;
+ ACCESS_PRIVATE(timer, function) = hrtimer_dummy_timeout;
else
- timer->function = function;
-}
-
-/**
- * hrtimer_init - initialize a timer to the given clock
- * @timer: the timer to be initialized
- * @clock_id: the clock to be used
- * @mode: The modes which are relevant for initialization:
- * HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT,
- * HRTIMER_MODE_REL_SOFT
- *
- * The PINNED variants of the above can be handed in,
- * but the PINNED bit is ignored as pinning happens
- * when the hrtimer is started
- */
-void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
- enum hrtimer_mode mode)
-{
- debug_init(timer, clock_id, mode);
- __hrtimer_init(timer, clock_id, mode);
+ ACCESS_PRIVATE(timer, function) = function;
}
-EXPORT_SYMBOL_GPL(hrtimer_init);
/**
* hrtimer_setup - initialize a timer to the given clock
@@ -1676,7 +1646,7 @@ EXPORT_SYMBOL_GPL(hrtimer_init);
void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *),
clockid_t clock_id, enum hrtimer_mode mode)
{
- debug_init(timer, clock_id, mode);
+ debug_setup(timer, clock_id, mode);
__hrtimer_setup(timer, function, clock_id, mode);
}
EXPORT_SYMBOL_GPL(hrtimer_setup);
@@ -1695,7 +1665,7 @@ void hrtimer_setup_on_stack(struct hrtimer *timer,
enum hrtimer_restart (*function)(struct hrtimer *),
clockid_t clock_id, enum hrtimer_mode mode)
{
- debug_init_on_stack(timer, clock_id, mode);
+ debug_setup_on_stack(timer, clock_id, mode);
__hrtimer_setup(timer, function, clock_id, mode);
}
EXPORT_SYMBOL_GPL(hrtimer_setup_on_stack);
@@ -1769,7 +1739,7 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
raw_write_seqcount_barrier(&base->seq);
__remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
- fn = timer->function;
+ fn = ACCESS_PRIVATE(timer, function);
/*
* Clear the 'is relative' flag for the TIME_LOW_RES case. If the
@@ -2044,7 +2014,7 @@ void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
* Make the enqueue delivery mode check work on RT. If the sleeper
* was initialized for hard interrupt delivery, force the mode bit.
* This is a special case for hrtimer_sleepers because
- * __hrtimer_init_sleeper() determines the delivery mode on RT so the
+ * __hrtimer_setup_sleeper() determines the delivery mode on RT so the
* fiddling with this decision is avoided at the call sites.
*/
if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard)
@@ -2054,8 +2024,8 @@ void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
}
EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires);
-static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
- clockid_t clock_id, enum hrtimer_mode mode)
+static void __hrtimer_setup_sleeper(struct hrtimer_sleeper *sl,
+ clockid_t clock_id, enum hrtimer_mode mode)
{
/*
* On PREEMPT_RT enabled kernels hrtimers which are not explicitly
@@ -2081,8 +2051,7 @@ static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
mode |= HRTIMER_MODE_HARD;
}
- __hrtimer_init(&sl->timer, clock_id, mode);
- sl->timer.function = hrtimer_wakeup;
+ __hrtimer_setup(&sl->timer, hrtimer_wakeup, clock_id, mode);
sl->task = current;
}
@@ -2095,8 +2064,8 @@ static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
void hrtimer_setup_sleeper_on_stack(struct hrtimer_sleeper *sl,
clockid_t clock_id, enum hrtimer_mode mode)
{
- debug_init_on_stack(&sl->timer, clock_id, mode);
- __hrtimer_init_sleeper(sl, clock_id, mode);
+ debug_setup_on_stack(&sl->timer, clock_id, mode);
+ __hrtimer_setup_sleeper(sl, clock_id, mode);
}
EXPORT_SYMBOL_GPL(hrtimer_setup_sleeper_on_stack);
diff --git a/kernel/time/sleep_timeout.c b/kernel/time/sleep_timeout.c
index dfe939f6e4ec..c0e960a5de39 100644
--- a/kernel/time/sleep_timeout.c
+++ b/kernel/time/sleep_timeout.c
@@ -97,7 +97,7 @@ signed long __sched schedule_timeout(signed long timeout)
timer.timer.expires = expire;
add_timer(&timer.timer);
schedule();
- del_timer_sync(&timer.timer);
+ timer_delete_sync(&timer.timer);
/* Remove the timer from the object tracker */
destroy_timer_on_stack(&timer.timer);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 929846b8b45a..1e67d076f195 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -682,19 +682,20 @@ static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int act
}
/**
- * timekeeping_forward - update clock to given cycle now value
+ * timekeeping_forward_now - update clock to the current time
* @tk: Pointer to the timekeeper to update
- * @cycle_now: Current clocksource read value
*
* Forward the current clock to update its state since the last call to
* update_wall_time(). This is useful before significant clock changes,
* as it avoids having to deal with this time offset explicitly.
*/
-static void timekeeping_forward(struct timekeeper *tk, u64 cycle_now)
+static void timekeeping_forward_now(struct timekeeper *tk)
{
- u64 delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask,
- tk->tkr_mono.clock->max_raw_delta);
+ u64 cycle_now, delta;
+ cycle_now = tk_clock_read(&tk->tkr_mono);
+ delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask,
+ tk->tkr_mono.clock->max_raw_delta);
tk->tkr_mono.cycle_last = cycle_now;
tk->tkr_raw.cycle_last = cycle_now;
@@ -710,21 +711,6 @@ static void timekeeping_forward(struct timekeeper *tk, u64 cycle_now)
}
/**
- * timekeeping_forward_now - update clock to the current time
- * @tk: Pointer to the timekeeper to update
- *
- * Forward the current clock to update its state since the last call to
- * update_wall_time(). This is useful before significant clock changes,
- * as it avoids having to deal with this time offset explicitly.
- */
-static void timekeeping_forward_now(struct timekeeper *tk)
-{
- u64 cycle_now = tk_clock_read(&tk->tkr_mono);
-
- timekeeping_forward(tk, cycle_now);
-}
-
-/**
* ktime_get_real_ts64 - Returns the time of day in a timespec64.
* @ts: pointer to the timespec to be set
*
@@ -2165,54 +2151,6 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
return offset;
}
-static u64 timekeeping_accumulate(struct timekeeper *tk, u64 offset,
- enum timekeeping_adv_mode mode,
- unsigned int *clock_set)
-{
- int shift = 0, maxshift;
-
- /*
- * TK_ADV_FREQ indicates that adjtimex(2) directly set the
- * frequency or the tick length.
- *
- * Accumulate the offset, so that the new multiplier starts from
- * now. This is required as otherwise for offsets, which are
- * smaller than tk::cycle_interval, timekeeping_adjust() could set
- * xtime_nsec backwards, which subsequently causes time going
- * backwards in the coarse time getters. But even for the case
- * where offset is greater than tk::cycle_interval the periodic
- * accumulation does not have much value.
- *
- * Also reset tk::ntp_error as it does not make sense to keep the
- * old accumulated error around in this case.
- */
- if (mode == TK_ADV_FREQ) {
- timekeeping_forward(tk, tk->tkr_mono.cycle_last + offset);
- tk->ntp_error = 0;
- return 0;
- }
-
- /*
- * With NO_HZ we may have to accumulate many cycle_intervals
- * (think "ticks") worth of time at once. To do this efficiently,
- * we calculate the largest doubling multiple of cycle_intervals
- * that is smaller than the offset. We then accumulate that
- * chunk in one go, and then try to consume the next smaller
- * doubled multiple.
- */
- shift = ilog2(offset) - ilog2(tk->cycle_interval);
- shift = max(0, shift);
- /* Bound shift to one less than what overflows tick_length */
- maxshift = (64 - (ilog2(ntp_tick_length()) + 1)) - 1;
- shift = min(shift, maxshift);
- while (offset >= tk->cycle_interval) {
- offset = logarithmic_accumulation(tk, offset, shift, clock_set);
- if (offset < tk->cycle_interval << shift)
- shift--;
- }
- return offset;
-}
-
/*
* timekeeping_advance - Updates the timekeeper to the current time and
* current NTP tick length
@@ -2222,6 +2160,7 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode)
struct timekeeper *tk = &tk_core.shadow_timekeeper;
struct timekeeper *real_tk = &tk_core.timekeeper;
unsigned int clock_set = 0;
+ int shift = 0, maxshift;
u64 offset;
guard(raw_spinlock_irqsave)(&tk_core.lock);
@@ -2238,7 +2177,24 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode)
if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK)
return false;
- offset = timekeeping_accumulate(tk, offset, mode, &clock_set);
+ /*
+ * With NO_HZ we may have to accumulate many cycle_intervals
+ * (think "ticks") worth of time at once. To do this efficiently,
+ * we calculate the largest doubling multiple of cycle_intervals
+ * that is smaller than the offset. We then accumulate that
+ * chunk in one go, and then try to consume the next smaller
+ * doubled multiple.
+ */
+ shift = ilog2(offset) - ilog2(tk->cycle_interval);
+ shift = max(0, shift);
+ /* Bound shift to one less than what overflows tick_length */
+ maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
+ shift = min(shift, maxshift);
+ while (offset >= tk->cycle_interval) {
+ offset = logarithmic_accumulation(tk, offset, shift, &clock_set);
+ if (offset < tk->cycle_interval<<shift)
+ shift--;
+ }
/* Adjust the multiplier to correct NTP error */
timekeeping_adjust(tk, offset);
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index c8f776dc6ee0..4d915c0a263c 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -744,7 +744,7 @@ static bool timer_fixup_init(void *addr, enum debug_obj_state state)
switch (state) {
case ODEBUG_STATE_ACTIVE:
- del_timer_sync(timer);
+ timer_delete_sync(timer);
debug_object_init(timer, &timer_debug_descr);
return true;
default:
@@ -790,7 +790,7 @@ static bool timer_fixup_free(void *addr, enum debug_obj_state state)
switch (state) {
case ODEBUG_STATE_ACTIVE:
- del_timer_sync(timer);
+ timer_delete_sync(timer);
debug_object_free(timer, &timer_debug_descr);
return true;
default:
@@ -1212,10 +1212,10 @@ EXPORT_SYMBOL(mod_timer_pending);
*
* mod_timer(timer, expires) is equivalent to:
*
- * del_timer(timer); timer->expires = expires; add_timer(timer);
+ * timer_delete(timer); timer->expires = expires; add_timer(timer);
*
* mod_timer() is more efficient than the above open coded sequence. In
- * case that the timer is inactive, the del_timer() part is a NOP. The
+ * case that the timer is inactive, the timer_delete() part is a NOP. The
* timer is in any case activated with the new expiry time @expires.
*
* Note that if there are multiple unserialized concurrent users of the
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index cfbb46cc4e76..b03d0ada6469 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -46,7 +46,7 @@ static void
print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
int idx, u64 now)
{
- SEQ_printf(m, " #%d: <%p>, %ps", idx, taddr, timer->function);
+ SEQ_printf(m, " #%d: <%p>, %ps", idx, taddr, ACCESS_PRIVATE(timer, function));
SEQ_printf(m, ", S:%02x", timer->state);
SEQ_printf(m, "\n");
SEQ_printf(m, " # expires at %Lu-%Lu nsecs [in %Ld to %Ld nsecs]\n",
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 033fba0633cf..a3f35c7d83b6 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -265,8 +265,7 @@ config FUNCTION_GRAPH_RETADDR
config FUNCTION_TRACE_ARGS
bool
- depends on HAVE_FUNCTION_ARG_ACCESS_API
- depends on DEBUG_INFO_BTF
+ depends on PROBE_EVENTS_BTF_ARGS
default y
help
If supported with function argument access API and BTF, then
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 92015de6203d..1a48aedb5255 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6855,6 +6855,7 @@ ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer)
}
}
}
+ cond_resched();
} while_for_each_ftrace_rec();
return fail ? -EINVAL : 0;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index d8d7b28e2c2f..c0f877d39a24 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -6016,7 +6016,7 @@ static void rb_update_meta_page(struct ring_buffer_per_cpu *cpu_buffer)
meta->read = cpu_buffer->read;
/* Some archs do not have data cache coherency between kernel and user-space */
- flush_dcache_folio(virt_to_folio(cpu_buffer->meta_page));
+ flush_kernel_vmap_range(cpu_buffer->meta_page, PAGE_SIZE);
}
static void
@@ -7319,7 +7319,8 @@ consume:
out:
/* Some archs do not have data cache coherency between kernel and user-space */
- flush_dcache_folio(virt_to_folio(cpu_buffer->reader_page->page));
+ flush_kernel_vmap_range(cpu_buffer->reader_page->page,
+ buffer->subbuf_size + BUF_PAGE_HDR_SIZE);
rb_update_meta_page(cpu_buffer);
diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c
index 50344aa9f7f9..968c5c3b0246 100644
--- a/kernel/trace/rv/rv.c
+++ b/kernel/trace/rv/rv.c
@@ -809,7 +809,8 @@ int rv_register_monitor(struct rv_monitor *monitor, struct rv_monitor *parent)
if (p && rv_is_nested_monitor(p)) {
pr_info("Parent monitor %s is already nested, cannot nest further\n",
parent->name);
- return -EINVAL;
+ retval = -EINVAL;
+ goto out_unlock;
}
r = kzalloc(sizeof(struct rv_monitor_def), GFP_KERNEL);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 103b193875b3..b581e388a9d9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -50,6 +50,7 @@
#include <linux/irq_work.h>
#include <linux/workqueue.h>
#include <linux/sort.h>
+#include <linux/io.h> /* vmap_page_range() */
#include <asm/setup.h> /* COMMAND_LINE_SIZE */
@@ -3341,10 +3342,9 @@ out_nobuffer:
}
EXPORT_SYMBOL_GPL(trace_vbprintk);
-__printf(3, 0)
-static int
-__trace_array_vprintk(struct trace_buffer *buffer,
- unsigned long ip, const char *fmt, va_list args)
+static __printf(3, 0)
+int __trace_array_vprintk(struct trace_buffer *buffer,
+ unsigned long ip, const char *fmt, va_list args)
{
struct ring_buffer_event *event;
int len = 0, size;
@@ -3394,7 +3394,6 @@ out_nobuffer:
return len;
}
-__printf(3, 0)
int trace_array_vprintk(struct trace_array *tr,
unsigned long ip, const char *fmt, va_list args)
{
@@ -3424,7 +3423,6 @@ int trace_array_vprintk(struct trace_array *tr,
* Note, trace_array_init_printk() must be called on @tr before this
* can be used.
*/
-__printf(3, 0)
int trace_array_printk(struct trace_array *tr,
unsigned long ip, const char *fmt, ...)
{
@@ -3469,7 +3467,6 @@ int trace_array_init_printk(struct trace_array *tr)
}
EXPORT_SYMBOL_GPL(trace_array_init_printk);
-__printf(3, 4)
int trace_array_printk_buf(struct trace_buffer *buffer,
unsigned long ip, const char *fmt, ...)
{
@@ -3485,7 +3482,6 @@ int trace_array_printk_buf(struct trace_buffer *buffer,
return ret;
}
-__printf(2, 0)
int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
{
return trace_array_vprintk(printk_trace, ip, fmt, args);
@@ -8505,6 +8501,10 @@ static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
struct trace_iterator *iter = &info->iter;
int ret = 0;
+ /* A memmap'ed buffer is not supported for user space mmap */
+ if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP)
+ return -ENODEV;
+
/* Currently the boot mapped buffer is not supported for mmap */
if (iter->tr->flags & TRACE_ARRAY_FL_BOOT)
return -ENODEV;
@@ -9609,13 +9609,11 @@ static void free_trace_buffers(struct trace_array *tr)
return;
free_trace_buffer(&tr->array_buffer);
+ kfree(tr->module_delta);
#ifdef CONFIG_TRACER_MAX_TRACE
free_trace_buffer(&tr->max_buffer);
#endif
-
- if (tr->range_addr_start)
- vunmap((void *)tr->range_addr_start);
}
static void init_trace_flags_index(struct trace_array *tr)
@@ -9808,29 +9806,27 @@ static int instance_mkdir(const char *name)
return ret;
}
-static u64 map_pages(u64 start, u64 size)
+static u64 map_pages(unsigned long start, unsigned long size)
{
- struct page **pages;
- phys_addr_t page_start;
- unsigned int page_count;
- unsigned int i;
- void *vaddr;
-
- page_count = DIV_ROUND_UP(size, PAGE_SIZE);
+ unsigned long vmap_start, vmap_end;
+ struct vm_struct *area;
+ int ret;
- page_start = start;
- pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
- if (!pages)
+ area = get_vm_area(size, VM_IOREMAP);
+ if (!area)
return 0;
- for (i = 0; i < page_count; i++) {
- phys_addr_t addr = page_start + i * PAGE_SIZE;
- pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
+ vmap_start = (unsigned long) area->addr;
+ vmap_end = vmap_start + size;
+
+ ret = vmap_page_range(vmap_start, vmap_end,
+ start, pgprot_nx(PAGE_KERNEL));
+ if (ret < 0) {
+ free_vm_area(area);
+ return 0;
}
- vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL);
- kfree(pages);
- return (u64)(unsigned long)vaddr;
+ return (u64)vmap_start;
}
/**
@@ -10709,6 +10705,7 @@ static inline void do_allocate_snapshot(const char *name) { }
__init static void enable_instances(void)
{
struct trace_array *tr;
+ bool memmap_area = false;
char *curr_str;
char *name;
char *str;
@@ -10777,6 +10774,7 @@ __init static void enable_instances(void)
name);
continue;
}
+ memmap_area = true;
} else if (tok) {
if (!reserve_mem_find_by_name(tok, &start, &size)) {
start = 0;
@@ -10787,7 +10785,20 @@ __init static void enable_instances(void)
}
if (start) {
- addr = map_pages(start, size);
+ /* Start and size must be page aligned */
+ if (start & ~PAGE_MASK) {
+ pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
+ continue;
+ }
+ if (size & ~PAGE_MASK) {
+ pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
+ continue;
+ }
+
+ if (memmap_area)
+ addr = map_pages(start, size);
+ else
+ addr = (unsigned long)phys_to_virt(start);
if (addr) {
pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
name, &start, (unsigned long)size);
@@ -10814,10 +10825,13 @@ __init static void enable_instances(void)
update_printk_trace(tr);
/*
- * If start is set, then this is a mapped buffer, and
- * cannot be deleted by user space, so keep the reference
- * to it.
+ * memmap'd buffers can not be freed.
*/
+ if (memmap_area) {
+ tr->flags |= TRACE_ARRAY_FL_MEMMAP;
+ tr->ref++;
+ }
+
if (start) {
tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
tr->range_name = no_free_ptr(rname);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index c20f6bcc200a..79be1995db44 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -447,6 +447,7 @@ enum {
TRACE_ARRAY_FL_BOOT = BIT(1),
TRACE_ARRAY_FL_LAST_BOOT = BIT(2),
TRACE_ARRAY_FL_MOD_INIT = BIT(3),
+ TRACE_ARRAY_FL_MEMMAP = BIT(4),
};
#ifdef CONFIG_MODULES
@@ -852,13 +853,15 @@ static inline void __init disable_tracing_selftest(const char *reason)
extern void *head_page(struct trace_array_cpu *data);
extern unsigned long long ns2usecs(u64 nsec);
-extern int
-trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
-extern int
-trace_vprintk(unsigned long ip, const char *fmt, va_list args);
-extern int
-trace_array_vprintk(struct trace_array *tr,
- unsigned long ip, const char *fmt, va_list args);
+
+__printf(2, 0)
+int trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
+__printf(2, 0)
+int trace_vprintk(unsigned long ip, const char *fmt, va_list args);
+__printf(3, 0)
+int trace_array_vprintk(struct trace_array *tr,
+ unsigned long ip, const char *fmt, va_list args);
+__printf(3, 4)
int trace_array_printk_buf(struct trace_buffer *buffer,
unsigned long ip, const char *fmt, ...);
void trace_printk_seq(struct trace_seq *s);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 8638b7f7ff85..069e92856bda 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -470,6 +470,7 @@ static void test_event_printk(struct trace_event_call *call)
case '%':
continue;
case 'p':
+ do_pointer:
/* Find dereferencing fields */
switch (fmt[i + 1]) {
case 'B': case 'R': case 'r':
@@ -498,6 +499,12 @@ static void test_event_printk(struct trace_event_call *call)
continue;
if (fmt[i + j] == '*') {
star = true;
+ /* Handle %*pbl case */
+ if (!j && fmt[i + 1] == 'p') {
+ arg++;
+ i++;
+ goto do_pointer;
+ }
continue;
}
if ((fmt[i + j] == 's')) {
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index bfe030b443e2..cf6203282737 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2057,11 +2057,11 @@ static int try_to_grab_pending(struct work_struct *work, u32 cflags,
struct delayed_work *dwork = to_delayed_work(work);
/*
- * dwork->timer is irqsafe. If del_timer() fails, it's
+ * dwork->timer is irqsafe. If timer_delete() fails, it's
* guaranteed that the timer is not queued anywhere and not
* running on the local CPU.
*/
- if (likely(del_timer(&dwork->timer)))
+ if (likely(timer_delete(&dwork->timer)))
return 1;
}
@@ -3069,7 +3069,7 @@ restart:
break;
}
- del_timer_sync(&pool->mayday_timer);
+ timer_delete_sync(&pool->mayday_timer);
raw_spin_lock_irq(&pool->lock);
/*
* This is necessary even after a new worker was just successfully
@@ -4281,7 +4281,7 @@ EXPORT_SYMBOL_GPL(flush_work);
bool flush_delayed_work(struct delayed_work *dwork)
{
local_irq_disable();
- if (del_timer_sync(&dwork->timer))
+ if (timer_delete_sync(&dwork->timer))
__queue_work(dwork->cpu, dwork->wq, &dwork->work);
local_irq_enable();
return flush_work(&dwork->work);
@@ -4984,9 +4984,9 @@ static void put_unbound_pool(struct worker_pool *pool)
reap_dying_workers(&cull_list);
/* shut down the timers */
- del_timer_sync(&pool->idle_timer);
+ timer_delete_sync(&pool->idle_timer);
cancel_work_sync(&pool->idle_cull_work);
- del_timer_sync(&pool->mayday_timer);
+ timer_delete_sync(&pool->mayday_timer);
/* RCU protected to allow dereferences from get_work_pool() */
call_rcu(&pool->rcu, rcu_free_pool);
@@ -7637,7 +7637,7 @@ notrace void wq_watchdog_touch(int cpu)
static void wq_watchdog_set_thresh(unsigned long thresh)
{
wq_watchdog_thresh = 0;
- del_timer_sync(&wq_watchdog_timer);
+ timer_delete_sync(&wq_watchdog_timer);
if (thresh) {
wq_watchdog_thresh = thresh;