diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/exit.c | 19 | ||||
-rw-r--r-- | kernel/panic.c | 6 | ||||
-rw-r--r-- | kernel/pid_namespace.c | 20 | ||||
-rw-r--r-- | kernel/rcutree.c | 2 | ||||
-rw-r--r-- | kernel/rcutree.h | 14 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 165 | ||||
-rw-r--r-- | kernel/sys.c | 6 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 7 | ||||
-rw-r--r-- | kernel/trace/trace.c | 2 | ||||
-rw-r--r-- | kernel/watchdog.c | 19 |
10 files changed, 168 insertions, 92 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index 34867cc5b42a..2f59cc334516 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -72,6 +72,18 @@ static void __unhash_process(struct task_struct *p, bool group_dead) list_del_rcu(&p->tasks); list_del_init(&p->sibling); __this_cpu_dec(process_counts); + /* + * If we are the last child process in a pid namespace to be + * reaped, notify the reaper sleeping zap_pid_ns_processes(). + */ + if (IS_ENABLED(CONFIG_PID_NS)) { + struct task_struct *parent = p->real_parent; + + if ((task_active_pid_ns(parent)->child_reaper == parent) && + list_empty(&parent->children) && + (parent->flags & PF_EXITING)) + wake_up_process(parent); + } } list_del_rcu(&p->thread_group); } @@ -643,6 +655,7 @@ static void exit_mm(struct task_struct * tsk) mm_release(tsk, mm); if (!mm) return; + sync_mm_rss(mm); /* * Serialize with any possible pending coredump. * We must hold mmap_sem around checking core_state @@ -719,12 +732,6 @@ static struct task_struct *find_new_reaper(struct task_struct *father) zap_pid_ns_processes(pid_ns); write_lock_irq(&tasklist_lock); - /* - * We can not clear ->child_reaper or leave it alone. - * There may by stealth EXIT_DEAD tasks on ->children, - * forget_original_parent() must move them somewhere. - */ - pid_ns->child_reaper = init_pid_ns.child_reaper; } else if (father->signal->has_child_subreaper) { struct task_struct *reaper; diff --git a/kernel/panic.c b/kernel/panic.c index 8ed89a175d79..d2a5f4ecc6dd 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -27,7 +27,7 @@ #define PANIC_TIMER_STEP 100 #define PANIC_BLINK_SPD 18 -int panic_on_oops; +int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE; static unsigned long tainted_mask; static int pause_on_oops; static int pause_on_oops_flag; @@ -108,8 +108,6 @@ void panic(const char *fmt, ...) */ crash_kexec(NULL); - kmsg_dump(KMSG_DUMP_PANIC); - /* * Note smp_send_stop is the usual smp shutdown function, which * unfortunately means it may not be hardened to work in a panic @@ -117,6 +115,8 @@ void panic(const char *fmt, ...) */ smp_send_stop(); + kmsg_dump(KMSG_DUMP_PANIC); + atomic_notifier_call_chain(&panic_notifier_list, 0, buf); bust_spinlocks(0); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 16b20e38c4a1..b3c7fd554250 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -184,11 +184,31 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) } read_unlock(&tasklist_lock); + /* Firstly reap the EXIT_ZOMBIE children we may have. */ do { clear_thread_flag(TIF_SIGPENDING); rc = sys_wait4(-1, NULL, __WALL, NULL); } while (rc != -ECHILD); + /* + * sys_wait4() above can't reap the TASK_DEAD children. + * Make sure they all go away, see __unhash_process(). + */ + for (;;) { + bool need_wait = false; + + read_lock(&tasklist_lock); + if (!list_empty(¤t->children)) { + __set_current_state(TASK_UNINTERRUPTIBLE); + need_wait = true; + } + read_unlock(&tasklist_lock); + + if (!need_wait) + break; + schedule(); + } + if (pid_ns->reboot) current->signal->group_exit_code = pid_ns->reboot; diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 0da7b88d92d0..3b0f1337f75b 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1397,6 +1397,8 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) rdp->qlen_lazy += rsp->qlen_lazy; rdp->qlen += rsp->qlen; rdp->n_cbs_adopted += rsp->qlen; + if (rsp->qlen_lazy != rsp->qlen) + rcu_idle_count_callbacks_posted(); rsp->qlen_lazy = 0; rsp->qlen = 0; diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 7f5d138dedf5..ea056495783e 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -84,6 +84,20 @@ struct rcu_dynticks { /* Process level is worth LLONG_MAX/2. */ int dynticks_nmi_nesting; /* Track NMI nesting level. */ atomic_t dynticks; /* Even value for idle, else odd. */ +#ifdef CONFIG_RCU_FAST_NO_HZ + int dyntick_drain; /* Prepare-for-idle state variable. */ + unsigned long dyntick_holdoff; + /* No retries for the jiffy of failure. */ + struct timer_list idle_gp_timer; + /* Wake up CPU sleeping with callbacks. */ + unsigned long idle_gp_timer_expires; + /* When to wake up CPU (for repost). */ + bool idle_first_pass; /* First pass of attempt to go idle? */ + unsigned long nonlazy_posted; + /* # times non-lazy CBs posted to CPU. */ + unsigned long nonlazy_posted_snap; + /* idle-period nonlazy_posted snapshot. */ +#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ }; /* RCU's kthread states for tracing. */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 2411000d9869..5271a020887e 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1886,8 +1886,9 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs * any flavor of RCU. */ -int rcu_needs_cpu(int cpu) +int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) { + *delta_jiffies = ULONG_MAX; return rcu_cpu_has_callbacks(cpu); } @@ -1962,41 +1963,6 @@ static void rcu_idle_count_callbacks_posted(void) #define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ -/* Loop counter for rcu_prepare_for_idle(). */ -static DEFINE_PER_CPU(int, rcu_dyntick_drain); -/* If rcu_dyntick_holdoff==jiffies, don't try to enter dyntick-idle mode. */ -static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); -/* Timer to awaken the CPU if it enters dyntick-idle mode with callbacks. */ -static DEFINE_PER_CPU(struct timer_list, rcu_idle_gp_timer); -/* Scheduled expiry time for rcu_idle_gp_timer to allow reposting. */ -static DEFINE_PER_CPU(unsigned long, rcu_idle_gp_timer_expires); -/* Enable special processing on first attempt to enter dyntick-idle mode. */ -static DEFINE_PER_CPU(bool, rcu_idle_first_pass); -/* Running count of non-lazy callbacks posted, never decremented. */ -static DEFINE_PER_CPU(unsigned long, rcu_nonlazy_posted); -/* Snapshot of rcu_nonlazy_posted to detect meaningful exits from idle. */ -static DEFINE_PER_CPU(unsigned long, rcu_nonlazy_posted_snap); - -/* - * Allow the CPU to enter dyntick-idle mode if either: (1) There are no - * callbacks on this CPU, (2) this CPU has not yet attempted to enter - * dyntick-idle mode, or (3) this CPU is in the process of attempting to - * enter dyntick-idle mode. Otherwise, if we have recently tried and failed - * to enter dyntick-idle mode, we refuse to try to enter it. After all, - * it is better to incur scheduling-clock interrupts than to spin - * continuously for the same time duration! - */ -int rcu_needs_cpu(int cpu) -{ - /* Flag a new idle sojourn to the idle-entry state machine. */ - per_cpu(rcu_idle_first_pass, cpu) = 1; - /* If no callbacks, RCU doesn't need the CPU. */ - if (!rcu_cpu_has_callbacks(cpu)) - return 0; - /* Otherwise, RCU needs the CPU only if it recently tried and failed. */ - return per_cpu(rcu_dyntick_holdoff, cpu) == jiffies; -} - /* * Does the specified flavor of RCU have non-lazy callbacks pending on * the specified CPU? Both RCU flavor and CPU are specified by the @@ -2040,6 +2006,47 @@ static bool rcu_cpu_has_nonlazy_callbacks(int cpu) } /* + * Allow the CPU to enter dyntick-idle mode if either: (1) There are no + * callbacks on this CPU, (2) this CPU has not yet attempted to enter + * dyntick-idle mode, or (3) this CPU is in the process of attempting to + * enter dyntick-idle mode. Otherwise, if we have recently tried and failed + * to enter dyntick-idle mode, we refuse to try to enter it. After all, + * it is better to incur scheduling-clock interrupts than to spin + * continuously for the same time duration! + * + * The delta_jiffies argument is used to store the time when RCU is + * going to need the CPU again if it still has callbacks. The reason + * for this is that rcu_prepare_for_idle() might need to post a timer, + * but if so, it will do so after tick_nohz_stop_sched_tick() has set + * the wakeup time for this CPU. This means that RCU's timer can be + * delayed until the wakeup time, which defeats the purpose of posting + * a timer. + */ +int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) +{ + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + + /* Flag a new idle sojourn to the idle-entry state machine. */ + rdtp->idle_first_pass = 1; + /* If no callbacks, RCU doesn't need the CPU. */ + if (!rcu_cpu_has_callbacks(cpu)) { + *delta_jiffies = ULONG_MAX; + return 0; + } + if (rdtp->dyntick_holdoff == jiffies) { + /* RCU recently tried and failed, so don't try again. */ + *delta_jiffies = 1; + return 1; + } + /* Set up for the possibility that RCU will post a timer. */ + if (rcu_cpu_has_nonlazy_callbacks(cpu)) + *delta_jiffies = RCU_IDLE_GP_DELAY; + else + *delta_jiffies = RCU_IDLE_LAZY_GP_DELAY; + return 0; +} + +/* * Handler for smp_call_function_single(). The only point of this * handler is to wake the CPU up, so the handler does only tracing. */ @@ -2075,21 +2082,24 @@ static void rcu_idle_gp_timer_func(unsigned long cpu_in) */ static void rcu_prepare_for_idle_init(int cpu) { - per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; - setup_timer(&per_cpu(rcu_idle_gp_timer, cpu), - rcu_idle_gp_timer_func, cpu); - per_cpu(rcu_idle_gp_timer_expires, cpu) = jiffies - 1; - per_cpu(rcu_idle_first_pass, cpu) = 1; + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + + rdtp->dyntick_holdoff = jiffies - 1; + setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu); + rdtp->idle_gp_timer_expires = jiffies - 1; + rdtp->idle_first_pass = 1; } /* * Clean up for exit from idle. Because we are exiting from idle, there - * is no longer any point to rcu_idle_gp_timer, so cancel it. This will + * is no longer any point to ->idle_gp_timer, so cancel it. This will * do nothing if this timer is not active, so just cancel it unconditionally. */ static void rcu_cleanup_after_idle(int cpu) { - del_timer(&per_cpu(rcu_idle_gp_timer, cpu)); + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + + del_timer(&rdtp->idle_gp_timer); trace_rcu_prep_idle("Cleanup after idle"); } @@ -2108,42 +2118,41 @@ static void rcu_cleanup_after_idle(int cpu) * Because it is not legal to invoke rcu_process_callbacks() with irqs * disabled, we do one pass of force_quiescent_state(), then do a * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked - * later. The per-cpu rcu_dyntick_drain variable controls the sequencing. + * later. The ->dyntick_drain field controls the sequencing. * * The caller must have disabled interrupts. */ static void rcu_prepare_for_idle(int cpu) { struct timer_list *tp; + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); /* * If this is an idle re-entry, for example, due to use of * RCU_NONIDLE() or the new idle-loop tracing API within the idle * loop, then don't take any state-machine actions, unless the * momentary exit from idle queued additional non-lazy callbacks. - * Instead, repost the rcu_idle_gp_timer if this CPU has callbacks + * Instead, repost the ->idle_gp_timer if this CPU has callbacks * pending. */ - if (!per_cpu(rcu_idle_first_pass, cpu) && - (per_cpu(rcu_nonlazy_posted, cpu) == - per_cpu(rcu_nonlazy_posted_snap, cpu))) { + if (!rdtp->idle_first_pass && + (rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) { if (rcu_cpu_has_callbacks(cpu)) { - tp = &per_cpu(rcu_idle_gp_timer, cpu); - mod_timer_pinned(tp, per_cpu(rcu_idle_gp_timer_expires, cpu)); + tp = &rdtp->idle_gp_timer; + mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); } return; } - per_cpu(rcu_idle_first_pass, cpu) = 0; - per_cpu(rcu_nonlazy_posted_snap, cpu) = - per_cpu(rcu_nonlazy_posted, cpu) - 1; + rdtp->idle_first_pass = 0; + rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1; /* * If there are no callbacks on this CPU, enter dyntick-idle mode. * Also reset state to avoid prejudicing later attempts. */ if (!rcu_cpu_has_callbacks(cpu)) { - per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; - per_cpu(rcu_dyntick_drain, cpu) = 0; + rdtp->dyntick_holdoff = jiffies - 1; + rdtp->dyntick_drain = 0; trace_rcu_prep_idle("No callbacks"); return; } @@ -2152,36 +2161,37 @@ static void rcu_prepare_for_idle(int cpu) * If in holdoff mode, just return. We will presumably have * refrained from disabling the scheduling-clock tick. */ - if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) { + if (rdtp->dyntick_holdoff == jiffies) { trace_rcu_prep_idle("In holdoff"); return; } - /* Check and update the rcu_dyntick_drain sequencing. */ - if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { + /* Check and update the ->dyntick_drain sequencing. */ + if (rdtp->dyntick_drain <= 0) { /* First time through, initialize the counter. */ - per_cpu(rcu_dyntick_drain, cpu) = RCU_IDLE_FLUSHES; - } else if (per_cpu(rcu_dyntick_drain, cpu) <= RCU_IDLE_OPT_FLUSHES && + rdtp->dyntick_drain = RCU_IDLE_FLUSHES; + } else if (rdtp->dyntick_drain <= RCU_IDLE_OPT_FLUSHES && !rcu_pending(cpu) && !local_softirq_pending()) { /* Can we go dyntick-idle despite still having callbacks? */ - trace_rcu_prep_idle("Dyntick with callbacks"); - per_cpu(rcu_dyntick_drain, cpu) = 0; - per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; - if (rcu_cpu_has_nonlazy_callbacks(cpu)) - per_cpu(rcu_idle_gp_timer_expires, cpu) = + rdtp->dyntick_drain = 0; + rdtp->dyntick_holdoff = jiffies; + if (rcu_cpu_has_nonlazy_callbacks(cpu)) { + trace_rcu_prep_idle("Dyntick with callbacks"); + rdtp->idle_gp_timer_expires = jiffies + RCU_IDLE_GP_DELAY; - else - per_cpu(rcu_idle_gp_timer_expires, cpu) = + } else { + rdtp->idle_gp_timer_expires = jiffies + RCU_IDLE_LAZY_GP_DELAY; - tp = &per_cpu(rcu_idle_gp_timer, cpu); - mod_timer_pinned(tp, per_cpu(rcu_idle_gp_timer_expires, cpu)); - per_cpu(rcu_nonlazy_posted_snap, cpu) = - per_cpu(rcu_nonlazy_posted, cpu); + trace_rcu_prep_idle("Dyntick with lazy callbacks"); + } + tp = &rdtp->idle_gp_timer; + mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); + rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; return; /* Nothing more to do immediately. */ - } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { + } else if (--(rdtp->dyntick_drain) <= 0) { /* We have hit the limit, so time to give up. */ - per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; + rdtp->dyntick_holdoff = jiffies; trace_rcu_prep_idle("Begin holdoff"); invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ return; @@ -2227,7 +2237,7 @@ static void rcu_prepare_for_idle(int cpu) */ static void rcu_idle_count_callbacks_posted(void) { - __this_cpu_add(rcu_nonlazy_posted, 1); + __this_cpu_add(rcu_dynticks.nonlazy_posted, 1); } #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ @@ -2238,11 +2248,12 @@ static void rcu_idle_count_callbacks_posted(void) static void print_cpu_stall_fast_no_hz(char *cp, int cpu) { - struct timer_list *tltp = &per_cpu(rcu_idle_gp_timer, cpu); + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + struct timer_list *tltp = &rdtp->idle_gp_timer; sprintf(cp, "drain=%d %c timer=%lu", - per_cpu(rcu_dyntick_drain, cpu), - per_cpu(rcu_dyntick_holdoff, cpu) == jiffies ? 'H' : '.', + rdtp->dyntick_drain, + rdtp->dyntick_holdoff == jiffies ? 'H' : '.', timer_pending(tltp) ? tltp->expires - jiffies : -1); } diff --git a/kernel/sys.c b/kernel/sys.c index f0ec44dcd415..e0c8ffc50d7f 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2127,9 +2127,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, else return -EINVAL; break; - case PR_GET_TID_ADDRESS: - error = prctl_get_tid_address(me, (int __user **)arg2); - break; default: return -EINVAL; } @@ -2147,6 +2144,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_SET_MM: error = prctl_set_mm(arg2, arg3, arg4, arg5); break; + case PR_GET_TID_ADDRESS: + error = prctl_get_tid_address(me, (int __user **)arg2); + break; case PR_SET_CHILD_SUBREAPER: me->signal->is_child_subreaper = !!arg2; error = 0; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index da70c6db496c..869997833928 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -274,6 +274,7 @@ EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); static void tick_nohz_stop_sched_tick(struct tick_sched *ts) { unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; + unsigned long rcu_delta_jiffies; ktime_t last_update, expires, now; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; u64 time_delta; @@ -322,7 +323,7 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts) time_delta = timekeeping_max_deferment(); } while (read_seqretry(&xtime_lock, seq)); - if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || + if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) || arch_needs_cpu(cpu)) { next_jiffies = last_jiffies + 1; delta_jiffies = 1; @@ -330,6 +331,10 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts) /* Get the next timer wheel timer */ next_jiffies = get_next_timer_interrupt(last_jiffies); delta_jiffies = next_jiffies - last_jiffies; + if (rcu_delta_jiffies < delta_jiffies) { + next_jiffies = last_jiffies + rcu_delta_jiffies; + delta_jiffies = rcu_delta_jiffies; + } } /* * Do not stop the tick, if we are only one off diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 68032c6177db..49249c28690d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -371,7 +371,7 @@ EXPORT_SYMBOL_GPL(tracing_on); void tracing_off(void) { if (global_trace.buffer) - ring_buffer_record_on(global_trace.buffer); + ring_buffer_record_off(global_trace.buffer); /* * This flag is only looked at when buffers haven't been * allocated yet. We don't really care about the race diff --git a/kernel/watchdog.c b/kernel/watchdog.c index e5e1d85b8c7c..4b1dfba70f7c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -372,6 +372,13 @@ static int watchdog(void *unused) #ifdef CONFIG_HARDLOCKUP_DETECTOR +/* + * People like the simple clean cpu node info on boot. + * Reduce the watchdog noise by only printing messages + * that are different from what cpu0 displayed. + */ +static unsigned long cpu0_err; + static int watchdog_nmi_enable(int cpu) { struct perf_event_attr *wd_attr; @@ -390,11 +397,21 @@ static int watchdog_nmi_enable(int cpu) /* Try to register using hardware perf events */ event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); + + /* save cpu0 error for future comparision */ + if (cpu == 0 && IS_ERR(event)) + cpu0_err = PTR_ERR(event); + if (!IS_ERR(event)) { - pr_info("enabled, takes one hw-pmu counter.\n"); + /* only print for cpu0 or different than cpu0 */ + if (cpu == 0 || cpu0_err) + pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n"); goto out_save; } + /* skip displaying the same error again */ + if (cpu > 0 && (PTR_ERR(event) == cpu0_err)) + return PTR_ERR(event); /* vary the KERN level based on the returned errno */ if (PTR_ERR(event) == -EOPNOTSUPP) |