diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit_watch.c | 2 | ||||
-rw-r--r-- | kernel/cgroup/cgroup.c | 5 | ||||
-rw-r--r-- | kernel/cpu.c | 27 | ||||
-rw-r--r-- | kernel/events/core.c | 23 | ||||
-rw-r--r-- | kernel/power/qos.c | 8 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 9 | ||||
-rw-r--r-- | kernel/sched/core.c | 5 | ||||
-rw-r--r-- | kernel/sched/fair.c | 29 | ||||
-rw-r--r-- | kernel/sched/idle.c | 24 | ||||
-rw-r--r-- | kernel/signal.c | 2 | ||||
-rw-r--r-- | kernel/time/ntp.c | 2 |
11 files changed, 104 insertions, 32 deletions
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 1f31c2f1e6fc..4508d5e0cf69 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -351,12 +351,12 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent) struct dentry *d = kern_path_locked(watch->path, parent); if (IS_ERR(d)) return PTR_ERR(d); - inode_unlock(d_backing_inode(parent->dentry)); if (d_is_positive(d)) { /* update watch filter fields */ watch->dev = d->d_sb->s_dev; watch->ino = d_backing_inode(d)->i_ino; } + inode_unlock(d_backing_inode(parent->dentry)); dput(d); return 0; } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 080561bb8a4b..ef4242e5d4bc 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2119,11 +2119,12 @@ int cgroup_do_get_tree(struct fs_context *fc) nsdentry = kernfs_node_dentry(cgrp->kn, sb); dput(fc->root); - fc->root = nsdentry; if (IS_ERR(nsdentry)) { - ret = PTR_ERR(nsdentry); deactivate_locked_super(sb); + ret = PTR_ERR(nsdentry); + nsdentry = NULL; } + fc->root = nsdentry; } if (!ctx->kfc.new_sb_created) diff --git a/kernel/cpu.c b/kernel/cpu.c index fc28e17940e0..e2cad3ee2ead 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2373,7 +2373,18 @@ void __init boot_cpu_hotplug_init(void) this_cpu_write(cpuhp_state.state, CPUHP_ONLINE); } -enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO; +/* + * These are used for a global "mitigations=" cmdline option for toggling + * optional CPU mitigations. + */ +enum cpu_mitigations { + CPU_MITIGATIONS_OFF, + CPU_MITIGATIONS_AUTO, + CPU_MITIGATIONS_AUTO_NOSMT, +}; + +static enum cpu_mitigations cpu_mitigations __ro_after_init = + CPU_MITIGATIONS_AUTO; static int __init mitigations_parse_cmdline(char *arg) { @@ -2390,3 +2401,17 @@ static int __init mitigations_parse_cmdline(char *arg) return 0; } early_param("mitigations", mitigations_parse_cmdline); + +/* mitigations=off */ +bool cpu_mitigations_off(void) +{ + return cpu_mitigations == CPU_MITIGATIONS_OFF; +} +EXPORT_SYMBOL_GPL(cpu_mitigations_off); + +/* mitigations=auto,nosmt */ +bool cpu_mitigations_auto_nosmt(void) +{ + return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; +} +EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt); diff --git a/kernel/events/core.c b/kernel/events/core.c index aec8dba2bea4..00a014670ed0 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1031,7 +1031,7 @@ perf_cgroup_set_timestamp(struct task_struct *task, { } -void +static inline void perf_cgroup_switch(struct task_struct *task, struct task_struct *next) { } @@ -10535,6 +10535,15 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, goto err_ns; } + /* + * Disallow uncore-cgroup events, they don't make sense as the cgroup will + * be different on other CPUs in the uncore mask. + */ + if (pmu->task_ctx_nr == perf_invalid_context && cgroup_fd != -1) { + err = -EINVAL; + goto err_pmu; + } + if (event->attr.aux_output && !(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT)) { err = -EOPNOTSUPP; @@ -11323,8 +11332,11 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, int err; /* - * Get the target context (task or percpu): + * Grouping is not supported for kernel events, neither is 'AUX', + * make sure the caller's intentions are adjusted. */ + if (attr->aux_output) + return ERR_PTR(-EINVAL); event = perf_event_alloc(attr, cpu, task, NULL, NULL, overflow_handler, context, -1); @@ -11336,6 +11348,9 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, /* Mark owner so we could distinguish it from user events. */ event->owner = TASK_TOMBSTONE; + /* + * Get the target context (task or percpu): + */ ctx = find_get_context(event->pmu, task, event); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); @@ -11787,7 +11802,7 @@ inherit_event(struct perf_event *parent_event, GFP_KERNEL); if (!child_ctx->task_ctx_data) { free_event(child_event); - return NULL; + return ERR_PTR(-ENOMEM); } } @@ -11890,7 +11905,7 @@ static int inherit_group(struct perf_event *parent_event, if (IS_ERR(child_ctr)) return PTR_ERR(child_ctr); - if (sub->aux_event == parent_event && + if (sub->aux_event == parent_event && child_ctr && !perf_get_aux_event(child_ctr, leader)) return -EINVAL; } diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 04e83fdfbe80..a45cba7df0ae 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -814,6 +814,8 @@ EXPORT_SYMBOL_GPL(freq_qos_update_request); */ int freq_qos_remove_request(struct freq_qos_request *req) { + int ret; + if (!req) return -EINVAL; @@ -821,7 +823,11 @@ int freq_qos_remove_request(struct freq_qos_request *req) "%s() called for unknown object\n", __func__)) return -EINVAL; - return freq_qos_apply(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); + ret = freq_qos_apply(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); + req->qos = NULL; + req->type = 0; + + return ret; } EXPORT_SYMBOL_GPL(freq_qos_remove_request); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 83105874f255..26b9168321e7 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -734,8 +734,15 @@ zone_found: * We have found the zone. Now walk the radix tree to find the leaf node * for our PFN. */ + + /* + * If the zone we wish to scan is the the current zone and the + * pfn falls into the current node then we do not need to walk + * the tree. + */ node = bm->cur.node; - if (((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) + if (zone == bm->cur.zone && + ((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) goto node_found; node = zone->rtree; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0f2eb3629070..44123b4d14e8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1065,7 +1065,7 @@ uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id) * affecting a valid clamp bucket, the next time it's enqueued, * it will already see the updated clamp bucket value. */ - if (!p->uclamp[clamp_id].active) { + if (p->uclamp[clamp_id].active) { uclamp_rq_dec_id(rq, p, clamp_id); uclamp_rq_inc_id(rq, p, clamp_id); } @@ -6019,10 +6019,11 @@ void init_idle(struct task_struct *idle, int cpu) struct rq *rq = cpu_rq(cpu); unsigned long flags; + __sched_fork(0, idle); + raw_spin_lock_irqsave(&idle->pi_lock, flags); raw_spin_lock(&rq->lock); - __sched_fork(0, idle); idle->state = TASK_RUNNING; idle->se.exec_start = sched_clock(); idle->flags |= PF_IDLE; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 22a2fed29054..69a81a5709ff 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7548,6 +7548,19 @@ static void update_blocked_averages(int cpu) update_rq_clock(rq); /* + * update_cfs_rq_load_avg() can call cpufreq_update_util(). Make sure + * that RT, DL and IRQ signals have been updated before updating CFS. + */ + curr_class = rq->curr->sched_class; + update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class); + update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class); + update_irq_load_avg(rq, 0); + + /* Don't need periodic decay once load/util_avg are null */ + if (others_have_blocked(rq)) + done = false; + + /* * Iterates the task_group tree in a bottom up fashion, see * list_add_leaf_cfs_rq() for details. */ @@ -7574,14 +7587,6 @@ static void update_blocked_averages(int cpu) done = false; } - curr_class = rq->curr->sched_class; - update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class); - update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class); - update_irq_load_avg(rq, 0); - /* Don't need periodic decay once load/util_avg are null */ - if (others_have_blocked(rq)) - done = false; - update_blocked_load_status(rq, !done); rq_unlock_irqrestore(rq, &rf); } @@ -7642,12 +7647,18 @@ static inline void update_blocked_averages(int cpu) rq_lock_irqsave(rq, &rf); update_rq_clock(rq); - update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq); + /* + * update_cfs_rq_load_avg() can call cpufreq_update_util(). Make sure + * that RT, DL and IRQ signals have been updated before updating CFS. + */ curr_class = rq->curr->sched_class; update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class); update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class); update_irq_load_avg(rq, 0); + + update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq); + update_blocked_load_status(rq, cfs_rq_has_blocked(cfs_rq) || others_have_blocked(rq)); rq_unlock_irqrestore(rq, &rf); } diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index f65ef1e2f204..80167b1bd98a 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -104,7 +104,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, * update no idle residency and return. */ if (current_clr_polling_and_test()) { - dev->last_residency = 0; + dev->last_residency_ns = 0; local_irq_enable(); return -EBUSY; } @@ -165,7 +165,9 @@ static void cpuidle_idle_call(void) * until a proper wakeup interrupt happens. */ - if (idle_should_enter_s2idle() || dev->use_deepest_state) { + if (idle_should_enter_s2idle() || dev->forced_idle_latency_limit_ns) { + u64 max_latency_ns; + if (idle_should_enter_s2idle()) { rcu_idle_enter(); @@ -176,12 +178,16 @@ static void cpuidle_idle_call(void) } rcu_idle_exit(); + + max_latency_ns = U64_MAX; + } else { + max_latency_ns = dev->forced_idle_latency_limit_ns; } tick_nohz_idle_stop_tick(); rcu_idle_enter(); - next_state = cpuidle_find_deepest_state(drv, dev); + next_state = cpuidle_find_deepest_state(drv, dev, max_latency_ns); call_cpuidle(drv, dev, next_state); } else { bool stop_tick = true; @@ -311,7 +317,7 @@ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) return HRTIMER_NORESTART; } -void play_idle(unsigned long duration_us) +void play_idle_precise(u64 duration_ns, u64 latency_ns) { struct idle_timer it; @@ -323,29 +329,29 @@ void play_idle(unsigned long duration_us) WARN_ON_ONCE(current->nr_cpus_allowed != 1); WARN_ON_ONCE(!(current->flags & PF_KTHREAD)); WARN_ON_ONCE(!(current->flags & PF_NO_SETAFFINITY)); - WARN_ON_ONCE(!duration_us); + WARN_ON_ONCE(!duration_ns); rcu_sleep_check(); preempt_disable(); current->flags |= PF_IDLE; - cpuidle_use_deepest_state(true); + cpuidle_use_deepest_state(latency_ns); it.done = 0; hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); it.timer.function = idle_inject_timer_fn; - hrtimer_start(&it.timer, ns_to_ktime(duration_us * NSEC_PER_USEC), + hrtimer_start(&it.timer, ns_to_ktime(duration_ns), HRTIMER_MODE_REL_PINNED); while (!READ_ONCE(it.done)) do_idle(); - cpuidle_use_deepest_state(false); + cpuidle_use_deepest_state(0); current->flags &= ~PF_IDLE; preempt_fold_need_resched(); preempt_enable(); } -EXPORT_SYMBOL_GPL(play_idle); +EXPORT_SYMBOL_GPL(play_idle_precise); void cpu_startup_entry(enum cpuhp_state state) { diff --git a/kernel/signal.c b/kernel/signal.c index c4da1ef56fdf..bcd46f547db3 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2205,8 +2205,8 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t */ preempt_disable(); read_unlock(&tasklist_lock); - preempt_enable_no_resched(); cgroup_enter_frozen(); + preempt_enable_no_resched(); freezable_schedule(); cgroup_leave_frozen(true); } else { diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 65eb796610dc..069ca78fb0bf 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -771,7 +771,7 @@ int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts, /* fill PPS status fields */ pps_fill_timex(txc); - txc->time.tv_sec = (time_t)ts->tv_sec; + txc->time.tv_sec = ts->tv_sec; txc->time.tv_usec = ts->tv_nsec; if (!(time_status & STA_NANO)) txc->time.tv_usec = ts->tv_nsec / NSEC_PER_USEC; |