diff options
Diffstat (limited to 'include')
39 files changed, 789 insertions, 348 deletions
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h index 9a62937c56ca..51969436b8b8 100644 --- a/include/asm-generic/cputime.h +++ b/include/asm-generic/cputime.h @@ -4,66 +4,12 @@ #include <linux/time.h> #include <linux/jiffies.h> -typedef unsigned long __nocast cputime_t; - -#define cputime_one_jiffy jiffies_to_cputime(1) -#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct) -#define cputime_to_scaled(__ct) (__ct) -#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz) - -typedef u64 __nocast cputime64_t; - -#define cputime64_to_jiffies64(__ct) (__force u64)(__ct) -#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif) - -#define nsecs_to_cputime64(__ct) \ - jiffies64_to_cputime64(nsecs_to_jiffies64(__ct)) - - -/* - * Convert cputime to microseconds and back. - */ -#define cputime_to_usecs(__ct) \ - jiffies_to_usecs(cputime_to_jiffies(__ct)) -#define usecs_to_cputime(__usec) \ - jiffies_to_cputime(usecs_to_jiffies(__usec)) -#define usecs_to_cputime64(__usec) \ - jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000)) - -/* - * Convert cputime to seconds and back. - */ -#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ) -#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ) - -/* - * Convert cputime to timespec and back. - */ -#define timespec_to_cputime(__val) \ - jiffies_to_cputime(timespec_to_jiffies(__val)) -#define cputime_to_timespec(__ct,__val) \ - jiffies_to_timespec(cputime_to_jiffies(__ct),__val) - -/* - * Convert cputime to timeval and back. - */ -#define timeval_to_cputime(__val) \ - jiffies_to_cputime(timeval_to_jiffies(__val)) -#define cputime_to_timeval(__ct,__val) \ - jiffies_to_timeval(cputime_to_jiffies(__ct),__val) - -/* - * Convert cputime to clock and back. - */ -#define cputime_to_clock_t(__ct) \ - jiffies_to_clock_t(cputime_to_jiffies(__ct)) -#define clock_t_to_cputime(__x) \ - jiffies_to_cputime(clock_t_to_jiffies(__x)) +#ifndef CONFIG_VIRT_CPU_ACCOUNTING +# include <asm-generic/cputime_jiffies.h> +#endif -/* - * Convert cputime64 to clock. - */ -#define cputime64_to_clock_t(__ct) \ - jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct)) +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +# include <asm-generic/cputime_nsecs.h> +#endif #endif diff --git a/include/asm-generic/cputime_jiffies.h b/include/asm-generic/cputime_jiffies.h new file mode 100644 index 000000000000..272ecba9f588 --- /dev/null +++ b/include/asm-generic/cputime_jiffies.h @@ -0,0 +1,72 @@ +#ifndef _ASM_GENERIC_CPUTIME_JIFFIES_H +#define _ASM_GENERIC_CPUTIME_JIFFIES_H + +typedef unsigned long __nocast cputime_t; + +#define cputime_one_jiffy jiffies_to_cputime(1) +#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct) +#define cputime_to_scaled(__ct) (__ct) +#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz) + +typedef u64 __nocast cputime64_t; + +#define cputime64_to_jiffies64(__ct) (__force u64)(__ct) +#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif) + + +/* + * Convert nanoseconds to cputime + */ +#define nsecs_to_cputime64(__nsec) \ + jiffies64_to_cputime64(nsecs_to_jiffies64(__nsec)) +#define nsecs_to_cputime(__nsec) \ + jiffies_to_cputime(nsecs_to_jiffies(__nsec)) + + +/* + * Convert cputime to microseconds and back. + */ +#define cputime_to_usecs(__ct) \ + jiffies_to_usecs(cputime_to_jiffies(__ct)) +#define usecs_to_cputime(__usec) \ + jiffies_to_cputime(usecs_to_jiffies(__usec)) +#define usecs_to_cputime64(__usec) \ + jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000)) + +/* + * Convert cputime to seconds and back. + */ +#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ) +#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ) + +/* + * Convert cputime to timespec and back. + */ +#define timespec_to_cputime(__val) \ + jiffies_to_cputime(timespec_to_jiffies(__val)) +#define cputime_to_timespec(__ct,__val) \ + jiffies_to_timespec(cputime_to_jiffies(__ct),__val) + +/* + * Convert cputime to timeval and back. + */ +#define timeval_to_cputime(__val) \ + jiffies_to_cputime(timeval_to_jiffies(__val)) +#define cputime_to_timeval(__ct,__val) \ + jiffies_to_timeval(cputime_to_jiffies(__ct),__val) + +/* + * Convert cputime to clock and back. + */ +#define cputime_to_clock_t(__ct) \ + jiffies_to_clock_t(cputime_to_jiffies(__ct)) +#define clock_t_to_cputime(__x) \ + jiffies_to_cputime(clock_t_to_jiffies(__x)) + +/* + * Convert cputime64 to clock. + */ +#define cputime64_to_clock_t(__ct) \ + jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct)) + +#endif diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h new file mode 100644 index 000000000000..b6485cafb7bd --- /dev/null +++ b/include/asm-generic/cputime_nsecs.h @@ -0,0 +1,104 @@ +/* + * Definitions for measuring cputime in nsecs resolution. + * + * Based on <arch/ia64/include/asm/cputime.h> + * + * Copyright (C) 2007 FUJITSU LIMITED + * Copyright (C) 2007 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _ASM_GENERIC_CPUTIME_NSECS_H +#define _ASM_GENERIC_CPUTIME_NSECS_H + +typedef u64 __nocast cputime_t; +typedef u64 __nocast cputime64_t; + +#define cputime_one_jiffy jiffies_to_cputime(1) + +/* + * Convert cputime <-> jiffies (HZ) + */ +#define cputime_to_jiffies(__ct) \ + ((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) +#define cputime_to_scaled(__ct) (__ct) +#define jiffies_to_cputime(__jif) \ + (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ)) +#define cputime64_to_jiffies64(__ct) \ + ((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) +#define jiffies64_to_cputime64(__jif) \ + (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ)) + + +/* + * Convert cputime <-> nanoseconds + */ +#define nsecs_to_cputime(__nsecs) ((__force u64)(__nsecs)) + + +/* + * Convert cputime <-> microseconds + */ +#define cputime_to_usecs(__ct) \ + ((__force u64)(__ct) / NSEC_PER_USEC) +#define usecs_to_cputime(__usecs) \ + (__force cputime_t)((__usecs) * NSEC_PER_USEC) +#define usecs_to_cputime64(__usecs) \ + (__force cputime64_t)((__usecs) * NSEC_PER_USEC) + +/* + * Convert cputime <-> seconds + */ +#define cputime_to_secs(__ct) \ + ((__force u64)(__ct) / NSEC_PER_SEC) +#define secs_to_cputime(__secs) \ + (__force cputime_t)((__secs) * NSEC_PER_SEC) + +/* + * Convert cputime <-> timespec (nsec) + */ +static inline cputime_t timespec_to_cputime(const struct timespec *val) +{ + u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec; + return (__force cputime_t) ret; +} +static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val) +{ + val->tv_sec = (__force u64) ct / NSEC_PER_SEC; + val->tv_nsec = (__force u64) ct % NSEC_PER_SEC; +} + +/* + * Convert cputime <-> timeval (msec) + */ +static inline cputime_t timeval_to_cputime(struct timeval *val) +{ + u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC; + return (__force cputime_t) ret; +} +static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val) +{ + val->tv_sec = (__force u64) ct / NSEC_PER_SEC; + val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC; +} + +/* + * Convert cputime <-> clock (USER_HZ) + */ +#define cputime_to_clock_t(__ct) \ + ((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ)) +#define clock_t_to_cputime(__x) \ + (__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ)) + +/* + * Convert cputime64 to clock. + */ +#define cputime64_to_clock_t(__ct) \ + cputime_to_clock_t((__force cputime_t)__ct) + +#endif diff --git a/include/linux/aer.h b/include/linux/aer.h index 544abdb2238c..ec10e1b24c1c 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -49,8 +49,8 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) } #endif -extern void cper_print_aer(const char *prefix, int cper_severity, - struct aer_capability_regs *aer); +extern void cper_print_aer(const char *prefix, struct pci_dev *dev, + int cper_severity, struct aer_capability_regs *aer); extern int cper_severity_to_aer(int cper_severity); extern void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, int severity); diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index e24339ccb7f0..b28d161c1091 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -3,12 +3,40 @@ #ifdef CONFIG_CONTEXT_TRACKING #include <linux/sched.h> +#include <linux/percpu.h> + +struct context_tracking { + /* + * When active is false, probes are unset in order + * to minimize overhead: TIF flags are cleared + * and calls to user_enter/exit are ignored. This + * may be further optimized using static keys. + */ + bool active; + enum { + IN_KERNEL = 0, + IN_USER, + } state; +}; + +DECLARE_PER_CPU(struct context_tracking, context_tracking); + +static inline bool context_tracking_in_user(void) +{ + return __this_cpu_read(context_tracking.state) == IN_USER; +} + +static inline bool context_tracking_active(void) +{ + return __this_cpu_read(context_tracking.active); +} extern void user_enter(void); extern void user_exit(void); extern void context_tracking_task_switch(struct task_struct *prev, struct task_struct *next); #else +static inline bool context_tracking_in_user(void) { return false; } static inline void user_enter(void) { } static inline void user_exit(void) { } static inline void context_tracking_task_switch(struct task_struct *prev, diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 92691d85c320..e5ca8ef50e9b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -74,7 +74,7 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, * SAVE_REGS - The ftrace_ops wants regs saved at each function called * and passed to the callback. If this flag is set, but the * architecture does not support passing regs - * (ARCH_SUPPORTS_FTRACE_SAVE_REGS is not defined), then the + * (CONFIG_DYNAMIC_FTRACE_WITH_REGS is not defined), then the * ftrace_ops will fail to register, unless the next flag * is set. * SAVE_REGS_IF_SUPPORTED - This is the same as SAVE_REGS, but if the @@ -418,7 +418,7 @@ void ftrace_modify_all_code(int command); #endif #ifndef FTRACE_REGS_ADDR -#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS # define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller) #else # define FTRACE_REGS_ADDR FTRACE_ADDR @@ -480,7 +480,7 @@ extern int ftrace_make_nop(struct module *mod, */ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); -#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS /** * ftrace_modify_call - convert from one addr to another (no nop) * @rec: the mcount call site record diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index a3d489531d83..13a54d0bdfa8 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -49,7 +49,6 @@ struct trace_entry { unsigned char flags; unsigned char preempt_count; int pid; - int padding; }; #define FTRACE_MAX_EVENT \ @@ -84,6 +83,9 @@ struct trace_iterator { long idx; cpumask_var_t started; + + /* it's true when current open file is snapshot */ + bool snapshot; }; enum trace_iter_flags { @@ -272,7 +274,7 @@ extern int trace_define_field(struct ftrace_event_call *call, const char *type, extern int trace_add_event_call(struct ftrace_event_call *call); extern void trace_remove_event_call(struct ftrace_event_call *call); -#define is_signed_type(type) (((type)(-1)) < 0) +#define is_signed_type(type) (((type)(-1)) < (type)0) int trace_set_clr_event(const char *system, const char *event, int set); diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 624ef3f45c8e..29eb805ea4a6 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -153,7 +153,7 @@ extern void rcu_nmi_exit(void); */ #define __irq_enter() \ do { \ - vtime_account_irq_enter(current); \ + account_irq_enter_time(current); \ add_preempt_count(HARDIRQ_OFFSET); \ trace_hardirq_enter(); \ } while (0) @@ -169,7 +169,7 @@ extern void irq_enter(void); #define __irq_exit() \ do { \ trace_hardirq_exit(); \ - vtime_account_irq_exit(current); \ + account_irq_exit_time(current); \ sub_preempt_count(HARDIRQ_OFFSET); \ } while (0) @@ -180,10 +180,10 @@ extern void irq_exit(void); #define nmi_enter() \ do { \ + lockdep_off(); \ ftrace_nmi_enter(); \ BUG_ON(in_nmi()); \ add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ - lockdep_off(); \ rcu_nmi_enter(); \ trace_hardirq_enter(); \ } while (0) @@ -192,10 +192,10 @@ extern void irq_exit(void); do { \ trace_hardirq_exit(); \ rcu_nmi_exit(); \ - lockdep_on(); \ BUG_ON(!in_nmi()); \ sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ ftrace_nmi_exit(); \ + lockdep_on(); \ } while (0) #endif /* LINUX_HARDIRQ_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 6d087c5f57f7..5cd0f0949927 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -10,7 +10,9 @@ #include <linux/pid_namespace.h> #include <linux/user_namespace.h> #include <linux/securebits.h> +#include <linux/seqlock.h> #include <net/net_namespace.h> +#include <linux/sched/rt.h> #ifdef CONFIG_SMP # define INIT_PUSHABLE_TASKS(tsk) \ @@ -141,6 +143,15 @@ extern struct task_group root_task_group; # define INIT_PERF_EVENTS(tsk) #endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +# define INIT_VTIME(tsk) \ + .vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \ + .vtime_snap = 0, \ + .vtime_snap_whence = VTIME_SYS, +#else +# define INIT_VTIME(tsk) +#endif + #define INIT_TASK_COMM "swapper" /* @@ -210,6 +221,7 @@ extern struct task_group root_task_group; INIT_TRACE_RECURSION \ INIT_TASK_RCU_PREEMPT(tsk) \ INIT_CPUSET_SEQ \ + INIT_VTIME(tsk) \ } diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 6a9e8f5399e2..f5dbce50466e 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -3,6 +3,20 @@ #include <linux/llist.h> +/* + * An entry can be in one of four states: + * + * free NULL, 0 -> {claimed} : free to be used + * claimed NULL, 3 -> {pending} : claimed to be enqueued + * pending next, 3 -> {busy} : queued, pending callback + * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed + */ + +#define IRQ_WORK_PENDING 1UL +#define IRQ_WORK_BUSY 2UL +#define IRQ_WORK_FLAGS 3UL +#define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */ + struct irq_work { unsigned long flags; struct llist_node llnode; @@ -16,8 +30,14 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *)) work->func = func; } -bool irq_work_queue(struct irq_work *work); +void irq_work_queue(struct irq_work *work); void irq_work_run(void); void irq_work_sync(struct irq_work *work); +#ifdef CONFIG_IRQ_WORK +bool irq_work_needs_cpu(void); +#else +static bool irq_work_needs_cpu(void) { return false; } +#endif + #endif /* _LINUX_IRQ_WORK_H */ diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 66b70780e910..ed5f6ed6eb77 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -127,7 +127,7 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t) extern void account_steal_time(cputime_t); extern void account_idle_time(cputime_t); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE static inline void account_process_tick(struct task_struct *tsk, int user) { vtime_account_user(tsk); diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 23755ba42abc..4b6ef4d33cc2 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -49,16 +49,6 @@ #define KPROBE_REENTER 0x00000004 #define KPROBE_HIT_SSDONE 0x00000008 -/* - * If function tracer is enabled and the arch supports full - * passing of pt_regs to function tracing, then kprobes can - * optimize on top of function tracing. - */ -#if defined(CONFIG_FUNCTION_TRACER) && defined(ARCH_SUPPORTS_FTRACE_SAVE_REGS) \ - && defined(ARCH_SUPPORTS_KPROBES_ON_FTRACE) -# define KPROBES_CAN_USE_FTRACE -#endif - /* Attach to insert probes on any functions which should be ignored*/ #define __kprobes __attribute__((__section__(".kprobes.text"))) @@ -316,7 +306,7 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table, #endif #endif /* CONFIG_OPTPROBES */ -#ifdef KPROBES_CAN_USE_FTRACE +#ifdef CONFIG_KPROBES_ON_FTRACE extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *regs); extern int arch_prepare_kprobe_ftrace(struct kprobe *p); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2c497ab0d03d..b7996a768eb2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -22,6 +22,7 @@ #include <linux/rcupdate.h> #include <linux/ratelimit.h> #include <linux/err.h> +#include <linux/irqflags.h> #include <asm/signal.h> #include <linux/kvm.h> @@ -740,15 +741,52 @@ static inline int kvm_deassign_device(struct kvm *kvm, } #endif /* CONFIG_IOMMU_API */ -static inline void kvm_guest_enter(void) +static inline void __guest_enter(void) { - BUG_ON(preemptible()); /* * This is running in ioctl context so we can avoid * the call to vtime_account() with its unnecessary idle check. */ - vtime_account_system_irqsafe(current); + vtime_account_system(current); current->flags |= PF_VCPU; +} + +static inline void __guest_exit(void) +{ + /* + * This is running in ioctl context so we can avoid + * the call to vtime_account() with its unnecessary idle check. + */ + vtime_account_system(current); + current->flags &= ~PF_VCPU; +} + +#ifdef CONFIG_CONTEXT_TRACKING +extern void guest_enter(void); +extern void guest_exit(void); + +#else /* !CONFIG_CONTEXT_TRACKING */ +static inline void guest_enter(void) +{ + __guest_enter(); +} + +static inline void guest_exit(void) +{ + __guest_exit(); +} +#endif /* !CONFIG_CONTEXT_TRACKING */ + +static inline void kvm_guest_enter(void) +{ + unsigned long flags; + + BUG_ON(preemptible()); + + local_irq_save(flags); + guest_enter(); + local_irq_restore(flags); + /* KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode * is very similar to exiting to userspase from rcu point of view. In @@ -761,12 +799,11 @@ static inline void kvm_guest_enter(void) static inline void kvm_guest_exit(void) { - /* - * This is running in ioctl context so we can avoid - * the call to vtime_account() with its unnecessary idle check. - */ - vtime_account_system_irqsafe(current); - current->flags &= ~PF_VCPU; + unsigned long flags; + + local_irq_save(flags); + guest_exit(); + local_irq_restore(flags); } /* diff --git a/include/linux/llist.h b/include/linux/llist.h index a5199f6d0e82..d0ab98f73d38 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -125,6 +125,31 @@ static inline void init_llist_head(struct llist_head *list) (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member)) /** + * llist_for_each_entry_safe - iterate safely against remove over some entries + * of lock-less list of given type. + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as a temporary storage. + * @node: the fist entry of deleted list entries. + * @member: the name of the llist_node with the struct. + * + * In general, some entries of the lock-less list can be traversed + * safely only after being removed from list, so start with an entry + * instead of list head. This variant allows removal of entries + * as we iterate. + * + * If being used on entries deleted from lock-less list directly, the + * traverse order is from the newest to the oldest added entry. If + * you want to traverse from the oldest to the newest, you must + * reverse the order by yourself before traversing. + */ +#define llist_for_each_entry_safe(pos, n, node, member) \ + for ((pos) = llist_entry((node), typeof(*(pos)), member), \ + (n) = (pos)->member.next; \ + &(pos)->member != NULL; \ + (pos) = llist_entry(n, typeof(*(pos)), member), \ + (n) = (&(pos)->member != NULL) ? (pos)->member.next : NULL) + +/** * llist_empty - tests whether a lock-less list is empty * @head: the list to test * diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0108a56f814e..28bd5fa2ff2e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -429,7 +429,7 @@ extern int memcg_limited_groups_array_size; * the slab_mutex must be held when looping through those caches */ #define for_each_memcg_cache_index(_idx) \ - for ((_idx) = 0; i < memcg_limited_groups_array_size; (_idx)++) + for ((_idx) = 0; (_idx) < memcg_limited_groups_array_size; (_idx)++) static inline bool memcg_kmem_enabled(void) { diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index bc823c4c028b..deca87452528 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -151,7 +151,7 @@ struct mmu_notifier_ops { * Therefore notifier chains can only be traversed when either * * 1. mmap_sem is held. - * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->mutex). + * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->rwsem). * 3. No other concurrent thread can access the list (release) */ struct mmu_notifier { diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 6bfb2faa0b19..e47ee462c2f2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -135,16 +135,21 @@ struct hw_perf_event { struct { /* software */ struct hrtimer hrtimer; }; + struct { /* tracepoint */ + struct task_struct *tp_target; + /* for tp_event->class */ + struct list_head tp_list; + }; #ifdef CONFIG_HAVE_HW_BREAKPOINT struct { /* breakpoint */ - struct arch_hw_breakpoint info; - struct list_head bp_list; /* * Crufty hack to avoid the chicken and egg * problem hw_breakpoint has with context * creation and event initalization. */ struct task_struct *bp_target; + struct arch_hw_breakpoint info; + struct list_head bp_list; }; #endif }; @@ -817,6 +822,17 @@ do { \ } while (0) +struct perf_pmu_events_attr { + struct device_attribute attr; + u64 id; +}; + +#define PMU_EVENT_ATTR(_name, _var, _id, _show) \ +static struct perf_pmu_events_attr _var = { \ + .attr = __ATTR(_name, 0444, _show, NULL), \ + .id = _id, \ +}; + #define PMU_FORMAT_ATTR(_name, _format) \ static ssize_t \ _name##_show(struct device *dev, \ diff --git a/include/linux/printk.h b/include/linux/printk.h index 9afc01e5a0a6..86c4b6294713 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -98,9 +98,6 @@ int no_printk(const char *fmt, ...) extern asmlinkage __printf(1, 2) void early_printk(const char *fmt, ...); -extern int printk_needs_cpu(int cpu); -extern void printk_tick(void); - #ifdef CONFIG_PRINTK asmlinkage __printf(5, 0) int vprintk_emit(int facility, int level, diff --git a/include/linux/profile.h b/include/linux/profile.h index a0fc32279fc0..21123902366d 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -82,9 +82,6 @@ int task_handoff_unregister(struct notifier_block * n); int profile_event_register(enum profile_type, struct notifier_block * n); int profile_event_unregister(enum profile_type, struct notifier_block * n); -int register_timer_hook(int (*hook)(struct pt_regs *)); -void unregister_timer_hook(int (*hook)(struct pt_regs *)); - struct pt_regs; #else @@ -135,16 +132,6 @@ static inline int profile_event_unregister(enum profile_type t, struct notifier_ #define profile_handoff_task(a) (0) #define profile_munmap(a) do { } while (0) -static inline int register_timer_hook(int (*hook)(struct pt_regs *)) -{ - return -ENOSYS; -} - -static inline void unregister_timer_hook(int (*hook)(struct pt_regs *)) -{ - return; -} - #endif /* CONFIG_PROFILING */ #endif /* _LINUX_PROFILE_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 275aa3f1062d..b758ce17b309 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -53,7 +53,10 @@ extern int rcutorture_runnable; /* for sysctl */ extern void rcutorture_record_test_transition(void); extern void rcutorture_record_progress(unsigned long vernum); extern void do_trace_rcu_torture_read(char *rcutorturename, - struct rcu_head *rhp); + struct rcu_head *rhp, + unsigned long secs, + unsigned long c_old, + unsigned long c); #else static inline void rcutorture_record_test_transition(void) { @@ -63,9 +66,13 @@ static inline void rcutorture_record_progress(unsigned long vernum) } #ifdef CONFIG_RCU_TRACE extern void do_trace_rcu_torture_read(char *rcutorturename, - struct rcu_head *rhp); + struct rcu_head *rhp, + unsigned long secs, + unsigned long c_old, + unsigned long c); #else -#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) +#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \ + do { } while (0) #endif #endif @@ -749,7 +756,7 @@ static inline void rcu_preempt_sleep_check(void) * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU) * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may * be preempted, but explicit blocking is illegal. Finally, in preemptible - * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds, + * RCU implementations in real-time (with -rt patchset) kernel builds, * RCU read-side critical sections may be preempted and they may also * block, but only when acquiring spinlocks that are subject to priority * inheritance. diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 519777e3fa01..1342e69542f3 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -167,6 +167,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu); +unsigned long ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu); u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, diff --git a/include/linux/sched.h b/include/linux/sched.h index d2112477ff5e..33cc42130371 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -304,19 +304,6 @@ static inline void lockup_detector_init(void) } #endif -#ifdef CONFIG_DETECT_HUNG_TASK -extern unsigned int sysctl_hung_task_panic; -extern unsigned long sysctl_hung_task_check_count; -extern unsigned long sysctl_hung_task_timeout_secs; -extern unsigned long sysctl_hung_task_warnings; -extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); -#else -/* Avoid need for ifdefs elsewhere in the code */ -enum { sysctl_hung_task_timeout_secs = 0 }; -#endif - /* Attach to any functions which should be ignored in wchan output. */ #define __sched __attribute__((__section__(".sched.text"))) @@ -338,23 +325,6 @@ extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner); struct nsproxy; struct user_namespace; -/* - * Default maximum number of active map areas, this limits the number of vmas - * per mm struct. Users can overwrite this number by sysctl but there is a - * problem. - * - * When a program's coredump is generated as ELF format, a section is created - * per a vma. In ELF, the number of sections is represented in unsigned short. - * This means the number of sections should be smaller than 65535 at coredump. - * Because the kernel adds some informative sections to a image of program at - * generating coredump, we need some margin. The number of extra sections is - * 1-3 now and depends on arch. We use "5" as safe margin, here. - */ -#define MAPCOUNT_ELF_CORE_MARGIN (5) -#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN) - -extern int sysctl_max_map_count; - #include <linux/aio.h> #ifdef CONFIG_MMU @@ -1194,6 +1164,7 @@ struct sched_entity { /* rq "owned" by this entity/group: */ struct cfs_rq *my_q; #endif + /* * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be * removed when useful for applications beyond shares distribution (e.g. @@ -1208,6 +1179,7 @@ struct sched_entity { struct sched_rt_entity { struct list_head run_list; unsigned long timeout; + unsigned long watchdog_stamp; unsigned int time_slice; struct sched_rt_entity *back; @@ -1220,11 +1192,6 @@ struct sched_rt_entity { #endif }; -/* - * default timeslice is 100 msecs (used only for SCHED_RR tasks). - * Timeslices get refilled after they expire. - */ -#define RR_TIMESLICE (100 * HZ / 1000) struct rcu_node; @@ -1368,6 +1335,15 @@ struct task_struct { #ifndef CONFIG_VIRT_CPU_ACCOUNTING struct cputime prev_cputime; #endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN + seqlock_t vtime_seqlock; + unsigned long long vtime_snap; + enum { + VTIME_SLEEPING = 0, + VTIME_USER, + VTIME_SYS, + } vtime_snap_whence; +#endif unsigned long nvcsw, nivcsw; /* context switch counts */ struct timespec start_time; /* monotonic time */ struct timespec real_start_time; /* boot based time */ @@ -1622,37 +1598,6 @@ static inline void set_numabalancing_state(bool enabled) } #endif -/* - * Priority of a process goes from 0..MAX_PRIO-1, valid RT - * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH - * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority - * values are inverted: lower p->prio value means higher priority. - * - * The MAX_USER_RT_PRIO value allows the actual maximum - * RT priority to be separate from the value exported to - * user-space. This allows kernel threads to set their - * priority to a value higher than any user task. Note: - * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. - */ - -#define MAX_USER_RT_PRIO 100 -#define MAX_RT_PRIO MAX_USER_RT_PRIO - -#define MAX_PRIO (MAX_RT_PRIO + 40) -#define DEFAULT_PRIO (MAX_RT_PRIO + 20) - -static inline int rt_prio(int prio) -{ - if (unlikely(prio < MAX_RT_PRIO)) - return 1; - return 0; -} - -static inline int rt_task(struct task_struct *p) -{ - return rt_prio(p->prio); -} - static inline struct pid *task_pid(struct task_struct *task) { return task->pids[PIDTYPE_PID].pid; @@ -1792,6 +1737,37 @@ static inline void put_task_struct(struct task_struct *t) __put_task_struct(t); } +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +extern void task_cputime(struct task_struct *t, + cputime_t *utime, cputime_t *stime); +extern void task_cputime_scaled(struct task_struct *t, + cputime_t *utimescaled, cputime_t *stimescaled); +extern cputime_t task_gtime(struct task_struct *t); +#else +static inline void task_cputime(struct task_struct *t, + cputime_t *utime, cputime_t *stime) +{ + if (utime) + *utime = t->utime; + if (stime) + *stime = t->stime; +} + +static inline void task_cputime_scaled(struct task_struct *t, + cputime_t *utimescaled, + cputime_t *stimescaled) +{ + if (utimescaled) + *utimescaled = t->utimescaled; + if (stimescaled) + *stimescaled = t->stimescaled; +} + +static inline cputime_t task_gtime(struct task_struct *t) +{ + return t->gtime; +} +#endif extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); @@ -2033,58 +2009,7 @@ extern void wake_up_idle_cpu(int cpu); static inline void wake_up_idle_cpu(int cpu) { } #endif -extern unsigned int sysctl_sched_latency; -extern unsigned int sysctl_sched_min_granularity; -extern unsigned int sysctl_sched_wakeup_granularity; -extern unsigned int sysctl_sched_child_runs_first; - -enum sched_tunable_scaling { - SCHED_TUNABLESCALING_NONE, - SCHED_TUNABLESCALING_LOG, - SCHED_TUNABLESCALING_LINEAR, - SCHED_TUNABLESCALING_END, -}; -extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; - -extern unsigned int sysctl_numa_balancing_scan_delay; -extern unsigned int sysctl_numa_balancing_scan_period_min; -extern unsigned int sysctl_numa_balancing_scan_period_max; -extern unsigned int sysctl_numa_balancing_scan_period_reset; -extern unsigned int sysctl_numa_balancing_scan_size; -extern unsigned int sysctl_numa_balancing_settle_count; - -#ifdef CONFIG_SCHED_DEBUG -extern unsigned int sysctl_sched_migration_cost; -extern unsigned int sysctl_sched_nr_migrate; -extern unsigned int sysctl_sched_time_avg; -extern unsigned int sysctl_timer_migration; -extern unsigned int sysctl_sched_shares_window; - -int sched_proc_update_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, - loff_t *ppos); -#endif -#ifdef CONFIG_SCHED_DEBUG -static inline unsigned int get_sysctl_timer_migration(void) -{ - return sysctl_timer_migration; -} -#else -static inline unsigned int get_sysctl_timer_migration(void) -{ - return 1; -} -#endif -extern unsigned int sysctl_sched_rt_period; -extern int sysctl_sched_rt_runtime; - -int sched_rt_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); - #ifdef CONFIG_SCHED_AUTOGROUP -extern unsigned int sysctl_sched_autogroup_enabled; - extern void sched_autogroup_create_attach(struct task_struct *p); extern void sched_autogroup_detach(struct task_struct *p); extern void sched_autogroup_fork(struct signal_struct *sig); @@ -2100,30 +2025,6 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { } static inline void sched_autogroup_exit(struct signal_struct *sig) { } #endif -#ifdef CONFIG_CFS_BANDWIDTH -extern unsigned int sysctl_sched_cfs_bandwidth_slice; -#endif - -#ifdef CONFIG_RT_MUTEXES -extern int rt_mutex_getprio(struct task_struct *p); -extern void rt_mutex_setprio(struct task_struct *p, int prio); -extern void rt_mutex_adjust_pi(struct task_struct *p); -static inline bool tsk_is_pi_blocked(struct task_struct *tsk) -{ - return tsk->pi_blocked_on != NULL; -} -#else -static inline int rt_mutex_getprio(struct task_struct *p) -{ - return p->normal_prio; -} -# define rt_mutex_adjust_pi(p) do { } while (0) -static inline bool tsk_is_pi_blocked(struct task_struct *tsk) -{ - return false; -} -#endif - extern bool yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); extern int task_prio(const struct task_struct *p); @@ -2753,8 +2654,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); -extern void normalize_rt_tasks(void); - #ifdef CONFIG_CGROUP_SCHED extern struct task_group root_task_group; diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h new file mode 100644 index 000000000000..94e19ea28fc3 --- /dev/null +++ b/include/linux/sched/rt.h @@ -0,0 +1,58 @@ +#ifndef _SCHED_RT_H +#define _SCHED_RT_H + +/* + * Priority of a process goes from 0..MAX_PRIO-1, valid RT + * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH + * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority + * values are inverted: lower p->prio value means higher priority. + * + * The MAX_USER_RT_PRIO value allows the actual maximum + * RT priority to be separate from the value exported to + * user-space. This allows kernel threads to set their + * priority to a value higher than any user task. Note: + * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. + */ + +#define MAX_USER_RT_PRIO 100 +#define MAX_RT_PRIO MAX_USER_RT_PRIO + +#define MAX_PRIO (MAX_RT_PRIO + 40) +#define DEFAULT_PRIO (MAX_RT_PRIO + 20) + +static inline int rt_prio(int prio) +{ + if (unlikely(prio < MAX_RT_PRIO)) + return 1; + return 0; +} + +static inline int rt_task(struct task_struct *p) +{ + return rt_prio(p->prio); +} + +#ifdef CONFIG_RT_MUTEXES +extern int rt_mutex_getprio(struct task_struct *p); +extern void rt_mutex_setprio(struct task_struct *p, int prio); +extern void rt_mutex_adjust_pi(struct task_struct *p); +static inline bool tsk_is_pi_blocked(struct task_struct *tsk) +{ + return tsk->pi_blocked_on != NULL; +} +#else +static inline int rt_mutex_getprio(struct task_struct *p) +{ + return p->normal_prio; +} +# define rt_mutex_adjust_pi(p) do { } while (0) +static inline bool tsk_is_pi_blocked(struct task_struct *tsk) +{ + return false; +} +#endif + +extern void normalize_rt_tasks(void); + + +#endif /* _SCHED_RT_H */ diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h new file mode 100644 index 000000000000..d2bb0ae979d0 --- /dev/null +++ b/include/linux/sched/sysctl.h @@ -0,0 +1,110 @@ +#ifndef _SCHED_SYSCTL_H +#define _SCHED_SYSCTL_H + +#ifdef CONFIG_DETECT_HUNG_TASK +extern unsigned int sysctl_hung_task_panic; +extern unsigned long sysctl_hung_task_check_count; +extern unsigned long sysctl_hung_task_timeout_secs; +extern unsigned long sysctl_hung_task_warnings; +extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos); +#else +/* Avoid need for ifdefs elsewhere in the code */ +enum { sysctl_hung_task_timeout_secs = 0 }; +#endif + +/* + * Default maximum number of active map areas, this limits the number of vmas + * per mm struct. Users can overwrite this number by sysctl but there is a + * problem. + * + * When a program's coredump is generated as ELF format, a section is created + * per a vma. In ELF, the number of sections is represented in unsigned short. + * This means the number of sections should be smaller than 65535 at coredump. + * Because the kernel adds some informative sections to a image of program at + * generating coredump, we need some margin. The number of extra sections is + * 1-3 now and depends on arch. We use "5" as safe margin, here. + */ +#define MAPCOUNT_ELF_CORE_MARGIN (5) +#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN) + +extern int sysctl_max_map_count; + +extern unsigned int sysctl_sched_latency; +extern unsigned int sysctl_sched_min_granularity; +extern unsigned int sysctl_sched_wakeup_granularity; +extern unsigned int sysctl_sched_child_runs_first; + +enum sched_tunable_scaling { + SCHED_TUNABLESCALING_NONE, + SCHED_TUNABLESCALING_LOG, + SCHED_TUNABLESCALING_LINEAR, + SCHED_TUNABLESCALING_END, +}; +extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; + +extern unsigned int sysctl_numa_balancing_scan_delay; +extern unsigned int sysctl_numa_balancing_scan_period_min; +extern unsigned int sysctl_numa_balancing_scan_period_max; +extern unsigned int sysctl_numa_balancing_scan_period_reset; +extern unsigned int sysctl_numa_balancing_scan_size; +extern unsigned int sysctl_numa_balancing_settle_count; + +#ifdef CONFIG_SCHED_DEBUG +extern unsigned int sysctl_sched_migration_cost; +extern unsigned int sysctl_sched_nr_migrate; +extern unsigned int sysctl_sched_time_avg; +extern unsigned int sysctl_timer_migration; +extern unsigned int sysctl_sched_shares_window; + +int sched_proc_update_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, + loff_t *ppos); +#endif +#ifdef CONFIG_SCHED_DEBUG +static inline unsigned int get_sysctl_timer_migration(void) +{ + return sysctl_timer_migration; +} +#else +static inline unsigned int get_sysctl_timer_migration(void) +{ + return 1; +} +#endif + +/* + * control realtime throttling: + * + * /proc/sys/kernel/sched_rt_period_us + * /proc/sys/kernel/sched_rt_runtime_us + */ +extern unsigned int sysctl_sched_rt_period; +extern int sysctl_sched_rt_runtime; + +#ifdef CONFIG_CFS_BANDWIDTH +extern unsigned int sysctl_sched_cfs_bandwidth_slice; +#endif + +#ifdef CONFIG_SCHED_AUTOGROUP +extern unsigned int sysctl_sched_autogroup_enabled; +#endif + +/* + * default timeslice is 100 msecs (used only for SCHED_RR tasks). + * Timeslices get refilled after they expire. + */ +#define RR_TIMESLICE (100 * HZ / 1000) + +extern int sched_rr_timeslice; + +extern int sched_rr_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); + +extern int sched_rt_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); + +#endif /* _SCHED_SYSCTL_H */ diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h index e0106d8581d3..c65dee059913 100644 --- a/include/linux/smpboot.h +++ b/include/linux/smpboot.h @@ -14,6 +14,8 @@ struct smpboot_thread_data; * @thread_should_run: Check whether the thread should run or not. Called with * preemption disabled. * @thread_fn: The associated thread function + * @create: Optional setup function, called when the thread gets + * created (Not called from the thread context) * @setup: Optional setup function, called when the thread gets * operational the first time * @cleanup: Optional cleanup function, called when the thread @@ -22,6 +24,7 @@ struct smpboot_thread_data; * parked (cpu offline) * @unpark: Optional unpark function, called when the thread is * unparked (cpu online) + * @selfparking: Thread is not parked by the park function. * @thread_comm: The base name of the thread */ struct smp_hotplug_thread { @@ -29,10 +32,12 @@ struct smp_hotplug_thread { struct list_head list; int (*thread_should_run)(unsigned int cpu); void (*thread_fn)(unsigned int cpu); + void (*create)(unsigned int cpu); void (*setup)(unsigned int cpu); void (*cleanup)(unsigned int cpu, bool online); void (*park)(unsigned int cpu); void (*unpark)(unsigned int cpu); + bool selfparking; const char *thread_comm; }; diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 6eb691b08358..04f4121a23ae 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -151,30 +151,14 @@ void srcu_barrier(struct srcu_struct *sp); * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot * and while lockdep is disabled. * - * Note that if the CPU is in the idle loop from an RCU point of view - * (ie: that we are in the section between rcu_idle_enter() and - * rcu_idle_exit()) then srcu_read_lock_held() returns false even if - * the CPU did an srcu_read_lock(). The reason for this is that RCU - * ignores CPUs that are in such a section, considering these as in - * extended quiescent state, so such a CPU is effectively never in an - * RCU read-side critical section regardless of what RCU primitives it - * invokes. This state of affairs is required --- we need to keep an - * RCU-free window in idle where the CPU may possibly enter into low - * power mode. This way we can notice an extended quiescent state to - * other CPUs that started a grace period. Otherwise we would delay any - * grace period as long as we run in the idle task. - * - * Similarly, we avoid claiming an SRCU read lock held if the current - * CPU is offline. + * Note that SRCU is based on its own statemachine and it doesn't + * relies on normal RCU, it can be called from the CPU which + * is in the idle loop from an RCU point of view or offline. */ static inline int srcu_read_lock_held(struct srcu_struct *sp) { if (!debug_lockdep_rcu_enabled()) return 1; - if (rcu_is_cpu_idle()) - return 0; - if (!rcu_lockdep_current_cpu_online()) - return 0; return lock_is_held(&sp->dep_map); } @@ -236,8 +220,6 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) int retval = __srcu_read_lock(sp); rcu_lock_acquire(&(sp)->dep_map); - rcu_lockdep_assert(!rcu_is_cpu_idle(), - "srcu_read_lock() used illegally while idle"); return retval; } @@ -251,8 +233,6 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) static inline void srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp) { - rcu_lockdep_assert(!rcu_is_cpu_idle(), - "srcu_read_unlock() used illegally while idle"); rcu_lock_release(&(sp)->dep_map); __srcu_read_unlock(sp, idx); } diff --git a/include/linux/tick.h b/include/linux/tick.h index 1a6567b48492..553272e6af55 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -8,6 +8,8 @@ #include <linux/clockchips.h> #include <linux/irqflags.h> +#include <linux/percpu.h> +#include <linux/hrtimer.h> #ifdef CONFIG_GENERIC_CLOCKEVENTS @@ -122,13 +124,26 @@ static inline int tick_oneshot_mode_active(void) { return 0; } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ # ifdef CONFIG_NO_HZ +DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched); + +static inline int tick_nohz_tick_stopped(void) +{ + return __this_cpu_read(tick_cpu_sched.tick_stopped); +} + extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); -# else + +# else /* !CONFIG_NO_HZ */ +static inline int tick_nohz_tick_stopped(void) +{ + return 0; +} + static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } diff --git a/include/linux/tsacct_kern.h b/include/linux/tsacct_kern.h index 44893e5ec8f7..3251965bf4cc 100644 --- a/include/linux/tsacct_kern.h +++ b/include/linux/tsacct_kern.h @@ -23,12 +23,15 @@ static inline void bacct_add_tsk(struct user_namespace *user_ns, #ifdef CONFIG_TASK_XACCT extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p); extern void acct_update_integrals(struct task_struct *tsk); +extern void acct_account_cputime(struct task_struct *tsk); extern void acct_clear_integrals(struct task_struct *tsk); #else static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) {} static inline void acct_update_integrals(struct task_struct *tsk) {} +static inline void acct_account_cputime(struct task_struct *tsk) +{} static inline void acct_clear_integrals(struct task_struct *tsk) {} #endif /* CONFIG_TASK_XACCT */ diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 4f628a6fc5b4..02b83db8e2c5 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -35,13 +35,20 @@ struct inode; # include <asm/uprobes.h> #endif +#define UPROBE_HANDLER_REMOVE 1 +#define UPROBE_HANDLER_MASK 1 + +enum uprobe_filter_ctx { + UPROBE_FILTER_REGISTER, + UPROBE_FILTER_UNREGISTER, + UPROBE_FILTER_MMAP, +}; + struct uprobe_consumer { int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); - /* - * filter is optional; If a filter exists, handler is run - * if and only if filter returns true. - */ - bool (*filter)(struct uprobe_consumer *self, struct task_struct *task); + bool (*filter)(struct uprobe_consumer *self, + enum uprobe_filter_ctx ctx, + struct mm_struct *mm); struct uprobe_consumer *next; }; @@ -94,6 +101,7 @@ extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsign extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); +extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_mmap(struct vm_area_struct *vma); extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end); @@ -117,6 +125,11 @@ uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) { return -ENOSYS; } +static inline int +uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add) +{ + return -ENOSYS; +} static inline void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) { diff --git a/include/linux/usb.h b/include/linux/usb.h index 689b14b26c8d..4d22d0f6167a 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -357,6 +357,8 @@ struct usb_bus { int bandwidth_int_reqs; /* number of Interrupt requests */ int bandwidth_isoc_reqs; /* number of Isoc. requests */ + unsigned resuming_ports; /* bit array: resuming root-hub ports */ + #if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE) struct mon_bus *mon_bus; /* non-null when associated */ int monitored; /* non-zero when monitored */ diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 608050b2545f..0a78df5f6cfd 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -430,6 +430,9 @@ extern void usb_hcd_poll_rh_status(struct usb_hcd *hcd); extern void usb_wakeup_notification(struct usb_device *hdev, unsigned int portnum); +extern void usb_hcd_start_port_resume(struct usb_bus *bus, int portnum); +extern void usb_hcd_end_port_resume(struct usb_bus *bus, int portnum); + /* The D0/D1 toggle bits ... USE WITH CAUTION (they're almost hcd-internal) */ #define usb_gettoggle(dev, ep, out) (((dev)->toggle[out] >> (ep)) & 1) #define usb_dotoggle(dev, ep, out) ((dev)->toggle[out] ^= (1 << (ep))) diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 5de7a220e986..0e5ac93bab10 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -33,6 +33,7 @@ struct usbnet { wait_queue_head_t *wait; struct mutex phy_mutex; unsigned char suspend_count; + unsigned char pkt_cnt, pkt_err; /* i/o info: pipes etc */ unsigned in, out; @@ -70,6 +71,7 @@ struct usbnet { # define EVENT_DEV_OPEN 7 # define EVENT_DEVICE_REPORT_IDLE 8 # define EVENT_NO_RUNTIME_PM 9 +# define EVENT_RX_KILL 10 }; static inline struct usb_driver *driver_of(struct usb_interface *intf) @@ -100,7 +102,6 @@ struct driver_info { #define FLAG_LINK_INTR 0x0800 /* updates link (carrier) status */ #define FLAG_POINTTOPOINT 0x1000 /* possibly use "usb%d" names */ -#define FLAG_NOARP 0x2000 /* device can't do ARP */ /* * Indicates to usbnet, that USB driver accumulates multiple IP packets. @@ -108,6 +109,7 @@ struct driver_info { */ #define FLAG_MULTI_PACKET 0x2000 #define FLAG_RX_ASSEMBLE 0x4000 /* rx packets may span >1 frames */ +#define FLAG_NOARP 0x8000 /* device can't do ARP */ /* init device ... can sleep, or cause probe() failure */ int (*bind)(struct usbnet *, struct usb_interface *); diff --git a/include/linux/vtime.h b/include/linux/vtime.h index ae30ab58431a..71a5782d8c59 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -6,15 +6,46 @@ struct task_struct; #ifdef CONFIG_VIRT_CPU_ACCOUNTING extern void vtime_task_switch(struct task_struct *prev); extern void vtime_account_system(struct task_struct *tsk); -extern void vtime_account_system_irqsafe(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk); extern void vtime_account_user(struct task_struct *tsk); -extern void vtime_account(struct task_struct *tsk); -#else +extern void vtime_account_irq_enter(struct task_struct *tsk); + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +static inline bool vtime_accounting_enabled(void) { return true; } +#endif + +#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ + static inline void vtime_task_switch(struct task_struct *prev) { } static inline void vtime_account_system(struct task_struct *tsk) { } -static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { } -static inline void vtime_account(struct task_struct *tsk) { } +static inline void vtime_account_user(struct task_struct *tsk) { } +static inline void vtime_account_irq_enter(struct task_struct *tsk) { } +static inline bool vtime_accounting_enabled(void) { return false; } +#endif + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +extern void arch_vtime_task_switch(struct task_struct *tsk); +extern void vtime_account_irq_exit(struct task_struct *tsk); +extern bool vtime_accounting_enabled(void); +extern void vtime_user_enter(struct task_struct *tsk); +static inline void vtime_user_exit(struct task_struct *tsk) +{ + vtime_account_user(tsk); +} +extern void vtime_guest_enter(struct task_struct *tsk); +extern void vtime_guest_exit(struct task_struct *tsk); +extern void vtime_init_idle(struct task_struct *tsk); +#else +static inline void vtime_account_irq_exit(struct task_struct *tsk) +{ + /* On hard|softirq exit we always account to hard|softirq cputime */ + vtime_account_system(tsk); +} +static inline void vtime_user_enter(struct task_struct *tsk) { } +static inline void vtime_user_exit(struct task_struct *tsk) { } +static inline void vtime_guest_enter(struct task_struct *tsk) { } +static inline void vtime_guest_exit(struct task_struct *tsk) { } +static inline void vtime_init_idle(struct task_struct *tsk) { } #endif #ifdef CONFIG_IRQ_TIME_ACCOUNTING @@ -23,25 +54,15 @@ extern void irqtime_account_irq(struct task_struct *tsk); static inline void irqtime_account_irq(struct task_struct *tsk) { } #endif -static inline void vtime_account_irq_enter(struct task_struct *tsk) +static inline void account_irq_enter_time(struct task_struct *tsk) { - /* - * Hardirq can interrupt idle task anytime. So we need vtime_account() - * that performs the idle check in CONFIG_VIRT_CPU_ACCOUNTING. - * Softirq can also interrupt idle task directly if it calls - * local_bh_enable(). Such case probably don't exist but we never know. - * Ksoftirqd is not concerned because idle time is flushed on context - * switch. Softirqs in the end of hardirqs are also not a problem because - * the idle time is flushed on hardirq time already. - */ - vtime_account(tsk); + vtime_account_irq_enter(tsk); irqtime_account_irq(tsk); } -static inline void vtime_account_irq_exit(struct task_struct *tsk) +static inline void account_irq_exit_time(struct task_struct *tsk) { - /* On hard|softirq exit we always account to hard|softirq cputime */ - vtime_account_system(tsk); + vtime_account_irq_exit(tsk); irqtime_account_irq(tsk); } diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 498433dd067d..938b7fd11204 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -34,17 +34,17 @@ extern int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); -extern int datagram_recv_ctl(struct sock *sk, - struct msghdr *msg, - struct sk_buff *skb); - -extern int datagram_send_ctl(struct net *net, - struct sock *sk, - struct msghdr *msg, - struct flowi6 *fl6, - struct ipv6_txoptions *opt, - int *hlimit, int *tclass, - int *dontfrag); +extern int ip6_datagram_recv_ctl(struct sock *sk, + struct msghdr *msg, + struct sk_buff *skb); + +extern int ip6_datagram_send_ctl(struct net *net, + struct sock *sk, + struct msghdr *msg, + struct flowi6 *fl6, + struct ipv6_txoptions *opt, + int *hlimit, int *tclass, + int *dontfrag); #define LOOPBACK4_IPV6 cpu_to_be32(0x7f000006) diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h new file mode 100644 index 000000000000..88b878383797 --- /dev/null +++ b/include/trace/events/ras.h @@ -0,0 +1,77 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ras + +#if !defined(_TRACE_AER_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_AER_H + +#include <linux/tracepoint.h> +#include <linux/edac.h> + + +/* + * PCIe AER Trace event + * + * These events are generated when hardware detects a corrected or + * uncorrected event on a PCIe device. The event report has + * the following structure: + * + * char * dev_name - The name of the slot where the device resides + * ([domain:]bus:device.function). + * u32 status - Either the correctable or uncorrectable register + * indicating what error or errors have been seen + * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED + */ + +#define aer_correctable_errors \ + {BIT(0), "Receiver Error"}, \ + {BIT(6), "Bad TLP"}, \ + {BIT(7), "Bad DLLP"}, \ + {BIT(8), "RELAY_NUM Rollover"}, \ + {BIT(12), "Replay Timer Timeout"}, \ + {BIT(13), "Advisory Non-Fatal"} + +#define aer_uncorrectable_errors \ + {BIT(4), "Data Link Protocol"}, \ + {BIT(12), "Poisoned TLP"}, \ + {BIT(13), "Flow Control Protocol"}, \ + {BIT(14), "Completion Timeout"}, \ + {BIT(15), "Completer Abort"}, \ + {BIT(16), "Unexpected Completion"}, \ + {BIT(17), "Receiver Overflow"}, \ + {BIT(18), "Malformed TLP"}, \ + {BIT(19), "ECRC"}, \ + {BIT(20), "Unsupported Request"} + +TRACE_EVENT(aer_event, + TP_PROTO(const char *dev_name, + const u32 status, + const u8 severity), + + TP_ARGS(dev_name, status, severity), + + TP_STRUCT__entry( + __string( dev_name, dev_name ) + __field( u32, status ) + __field( u8, severity ) + ), + + TP_fast_assign( + __assign_str(dev_name, dev_name); + __entry->status = status; + __entry->severity = severity; + ), + + TP_printk("%s PCIe Bus Error: severity=%s, %s\n", + __get_str(dev_name), + __entry->severity == HW_EVENT_ERR_CORRECTED ? "Corrected" : + __entry->severity == HW_EVENT_ERR_FATAL ? + "Fatal" : "Uncorrected", + __entry->severity == HW_EVENT_ERR_CORRECTED ? + __print_flags(__entry->status, "|", aer_correctable_errors) : + __print_flags(__entry->status, "|", aer_uncorrectable_errors)) +); + +#endif /* _TRACE_AER_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index d4f559b1ec34..1918e832da4f 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -44,8 +44,10 @@ TRACE_EVENT(rcu_utilization, * of a new grace period or the end of an old grace period ("cpustart" * and "cpuend", respectively), a CPU passing through a quiescent * state ("cpuqs"), a CPU coming online or going offline ("cpuonl" - * and "cpuofl", respectively), and a CPU being kicked for being too - * long in dyntick-idle mode ("kick"). + * and "cpuofl", respectively), a CPU being kicked for being too + * long in dyntick-idle mode ("kick"), a CPU accelerating its new + * callbacks to RCU_NEXT_READY_TAIL ("AccReadyCB"), and a CPU + * accelerating its new callbacks to RCU_WAIT_TAIL ("AccWaitCB"). */ TRACE_EVENT(rcu_grace_period, @@ -393,7 +395,7 @@ TRACE_EVENT(rcu_kfree_callback, */ TRACE_EVENT(rcu_batch_start, - TP_PROTO(char *rcuname, long qlen_lazy, long qlen, int blimit), + TP_PROTO(char *rcuname, long qlen_lazy, long qlen, long blimit), TP_ARGS(rcuname, qlen_lazy, qlen, blimit), @@ -401,7 +403,7 @@ TRACE_EVENT(rcu_batch_start, __field(char *, rcuname) __field(long, qlen_lazy) __field(long, qlen) - __field(int, blimit) + __field(long, blimit) ), TP_fast_assign( @@ -411,7 +413,7 @@ TRACE_EVENT(rcu_batch_start, __entry->blimit = blimit; ), - TP_printk("%s CBs=%ld/%ld bl=%d", + TP_printk("%s CBs=%ld/%ld bl=%ld", __entry->rcuname, __entry->qlen_lazy, __entry->qlen, __entry->blimit) ); @@ -523,22 +525,30 @@ TRACE_EVENT(rcu_batch_end, */ TRACE_EVENT(rcu_torture_read, - TP_PROTO(char *rcutorturename, struct rcu_head *rhp), + TP_PROTO(char *rcutorturename, struct rcu_head *rhp, + unsigned long secs, unsigned long c_old, unsigned long c), - TP_ARGS(rcutorturename, rhp), + TP_ARGS(rcutorturename, rhp, secs, c_old, c), TP_STRUCT__entry( __field(char *, rcutorturename) __field(struct rcu_head *, rhp) + __field(unsigned long, secs) + __field(unsigned long, c_old) + __field(unsigned long, c) ), TP_fast_assign( __entry->rcutorturename = rcutorturename; __entry->rhp = rhp; + __entry->secs = secs; + __entry->c_old = c_old; + __entry->c = c; ), - TP_printk("%s torture read %p", - __entry->rcutorturename, __entry->rhp) + TP_printk("%s torture read %p %luus c: %lu %lu", + __entry->rcutorturename, __entry->rhp, + __entry->secs, __entry->c_old, __entry->c) ); /* @@ -608,7 +618,8 @@ TRACE_EVENT(rcu_barrier, #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) #define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \ do { } while (0) -#define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) +#define trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \ + do { } while (0) #define trace_rcu_barrier(name, s, cpu, cnt, done) do { } while (0) #endif /* #else #ifdef CONFIG_RCU_TRACE */ diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h index 77cdba9df274..bb991dfe134f 100644 --- a/include/uapi/linux/auto_fs.h +++ b/include/uapi/linux/auto_fs.h @@ -28,25 +28,16 @@ #define AUTOFS_MIN_PROTO_VERSION AUTOFS_PROTO_VERSION /* - * Architectures where both 32- and 64-bit binaries can be executed - * on 64-bit kernels need this. This keeps the structure format - * uniform, and makes sure the wait_queue_token isn't too big to be - * passed back down to the kernel. - * - * This assumes that on these architectures: - * mode 32 bit 64 bit - * ------------------------- - * int 32 bit 32 bit - * long 32 bit 64 bit - * - * If so, 32-bit user-space code should be backwards compatible. + * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed + * back to the kernel via ioctl from userspace. On architectures where 32- and + * 64-bit userspace binaries can be executed it's important that the size of + * autofs_wqt_t stays constant between 32- and 64-bit Linux kernels so that we + * do not break the binary ABI interface by changing the structure size. */ - -#if defined(__sparc__) || defined(__mips__) || defined(__x86_64__) \ - || defined(__powerpc__) || defined(__s390__) -typedef unsigned int autofs_wqt_t; -#else +#if defined(__ia64__) || defined(__alpha__) /* pure 64bit architectures */ typedef unsigned long autofs_wqt_t; +#else +typedef unsigned int autofs_wqt_t; #endif /* Packet types */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 4f63c05d27c9..9fa9c622a7f4 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -579,7 +579,8 @@ enum perf_event_type { * { u32 size; * char data[size];}&& PERF_SAMPLE_RAW * - * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK + * { u64 nr; + * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK * * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index 50598472dc41..f738e25377ff 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -152,6 +152,12 @@ #define USB_INTRF_FUNC_SUSPEND_LP (1 << (8 + 0)) #define USB_INTRF_FUNC_SUSPEND_RW (1 << (8 + 1)) +/* + * Interface status, Figure 9-5 USB 3.0 spec + */ +#define USB_INTRF_STAT_FUNC_RW_CAP 1 +#define USB_INTRF_STAT_FUNC_RW 2 + #define USB_ENDPOINT_HALT 0 /* IN/OUT will STALL */ /* Bit array elements as returned by the USB_REQ_GET_STATUS request. */ |