diff options
Diffstat (limited to 'include/linux')
32 files changed, 556 insertions, 119 deletions
diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 301de78d65f7..6c502cb13c95 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -548,6 +548,27 @@ static inline int atomic_dec_if_positive(atomic_t *v) } #endif +/** + * fetch_or - perform *ptr |= mask and return old value of *ptr + * @ptr: pointer to value + * @mask: mask to OR on the value + * + * cmpxchg based fetch_or, macro so it works for different integer types + */ +#ifndef fetch_or +#define fetch_or(ptr, mask) \ +({ typeof(*(ptr)) __old, __val = *(ptr); \ + for (;;) { \ + __old = cmpxchg((ptr), __val, __val | (mask)); \ + if (__old == __val) \ + break; \ + __val = __old; \ + } \ + __old; \ +}) +#endif + + #ifdef CONFIG_GENERIC_ATOMIC64 #include <asm-generic/atomic64.h> #endif diff --git a/include/linux/bio.h b/include/linux/bio.h index cb6888824108..88bc64f00bb5 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -320,11 +320,6 @@ static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv) struct bvec_iter iter = bio->bi_iter; int idx; - if (!bio_flagged(bio, BIO_CLONED)) { - *bv = bio->bi_io_vec[bio->bi_vcnt - 1]; - return; - } - if (unlikely(!bio_multiple_segments(bio))) { *bv = bio_iovec(bio); return; diff --git a/include/linux/cache.h b/include/linux/cache.h index 17e7e82d2aa7..1be04f8c563a 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -12,10 +12,24 @@ #define SMP_CACHE_BYTES L1_CACHE_BYTES #endif +/* + * __read_mostly is used to keep rarely changing variables out of frequently + * updated cachelines. If an architecture doesn't support it, ignore the + * hint. + */ #ifndef __read_mostly #define __read_mostly #endif +/* + * __ro_after_init is used to mark things that are read-only after init (i.e. + * after mark_rodata_ro() has been called). These are effectively read-only, + * but may get written to during init, so can't live in .rodata (via "const"). + */ +#ifndef __ro_after_init +#define __ro_after_init __attribute__((__section__(".data..ro_after_init"))) +#endif + #ifndef ____cacheline_aligned #define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) #endif diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index bdcf358dfce2..0d442e34c349 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -190,9 +190,9 @@ extern void clockevents_config_and_register(struct clock_event_device *dev, extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq); static inline void -clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec) +clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 maxsec) { - return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, freq, minsec); + return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, freq, maxsec); } extern void clockevents_suspend(void); diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 6013021a3b39..a307bf62974f 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -118,6 +118,23 @@ struct clocksource { /* simplify initialization of mask field */ #define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1) +static inline u32 clocksource_freq2mult(u32 freq, u32 shift_constant, u64 from) +{ + /* freq = cyc/from + * mult/2^shift = ns/cyc + * mult = ns/cyc * 2^shift + * mult = from/freq * 2^shift + * mult = from * 2^shift / freq + * mult = (from<<shift) / freq + */ + u64 tmp = ((u64)from) << shift_constant; + + tmp += freq/2; /* round for do_div */ + do_div(tmp, freq); + + return (u32)tmp; +} + /** * clocksource_khz2mult - calculates mult from khz and shift * @khz: Clocksource frequency in KHz @@ -128,19 +145,7 @@ struct clocksource { */ static inline u32 clocksource_khz2mult(u32 khz, u32 shift_constant) { - /* khz = cyc/(Million ns) - * mult/2^shift = ns/cyc - * mult = ns/cyc * 2^shift - * mult = 1Million/khz * 2^shift - * mult = 1000000 * 2^shift / khz - * mult = (1000000<<shift) / khz - */ - u64 tmp = ((u64)1000000) << shift_constant; - - tmp += khz/2; /* round for do_div */ - do_div(tmp, khz); - - return (u32)tmp; + return clocksource_freq2mult(khz, shift_constant, NSEC_PER_MSEC); } /** @@ -154,19 +159,7 @@ static inline u32 clocksource_khz2mult(u32 khz, u32 shift_constant) */ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant) { - /* hz = cyc/(Billion ns) - * mult/2^shift = ns/cyc - * mult = ns/cyc * 2^shift - * mult = 1Billion/hz * 2^shift - * mult = 1000000000 * 2^shift / hz - * mult = (1000000000<<shift) / hz - */ - u64 tmp = ((u64)1000000000) << shift_constant; - - tmp += hz/2; /* round for do_div */ - do_div(tmp, hz); - - return (u32)tmp; + return clocksource_freq2mult(hz, shift_constant, NSEC_PER_SEC); } /** diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 48f5aab117ae..b5ff9881bef8 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -20,12 +20,14 @@ # define __pmem __attribute__((noderef, address_space(5))) #ifdef CONFIG_SPARSE_RCU_POINTER # define __rcu __attribute__((noderef, address_space(4))) -#else +#else /* CONFIG_SPARSE_RCU_POINTER */ # define __rcu -#endif +#endif /* CONFIG_SPARSE_RCU_POINTER */ +# define __private __attribute__((noderef)) extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); -#else +# define ACCESS_PRIVATE(p, member) (*((typeof((p)->member) __force *) &(p)->member)) +#else /* __CHECKER__ */ # define __user # define __kernel # define __safe @@ -44,7 +46,9 @@ extern void __chk_io_ptr(const volatile void __iomem *); # define __percpu # define __rcu # define __pmem -#endif +# define __private +# define ACCESS_PRIVATE(p, member) ((p)->member) +#endif /* __CHECKER__ */ /* Indirect macros required for expanded argument pasting, eg. __LINE__. */ #define ___PASTE(a,b) a##b @@ -263,8 +267,9 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * In contrast to ACCESS_ONCE these two macros will also work on aggregate * data types like structs or unions. If the size of the accessed data * type exceeds the word size of the machine (e.g., 32 bits or 64 bits) - * READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a - * compile-time warning. + * READ_ONCE() and WRITE_ONCE() will fall back to memcpy(). There's at + * least two memcpy()s: one for the __builtin_memcpy() and then one for + * the macro doing the copy of variable - '__u' allocated on the stack. * * Their two major use cases are: (1) Mediating communication between * process-level code and irq/NMI handlers, all running on the same CPU, diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 75857cda38e9..5e45cf930a3f 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -386,7 +386,7 @@ static inline void dma_free_attrs(struct device *dev, size_t size, if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) return; - if (!ops->free) + if (!ops->free || !cpu_addr) return; debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); @@ -641,31 +641,40 @@ static inline void dmam_release_declared_memory(struct device *dev) } #endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */ -static inline void *dma_alloc_writecombine(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t gfp) +static inline void *dma_alloc_wc(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t gfp) { DEFINE_DMA_ATTRS(attrs); dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); return dma_alloc_attrs(dev, size, dma_addr, gfp, &attrs); } +#ifndef dma_alloc_writecombine +#define dma_alloc_writecombine dma_alloc_wc +#endif -static inline void dma_free_writecombine(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_addr) +static inline void dma_free_wc(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_addr) { DEFINE_DMA_ATTRS(attrs); dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); return dma_free_attrs(dev, size, cpu_addr, dma_addr, &attrs); } +#ifndef dma_free_writecombine +#define dma_free_writecombine dma_free_wc +#endif -static inline int dma_mmap_writecombine(struct device *dev, - struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, - size_t size) +static inline int dma_mmap_wc(struct device *dev, + struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, + size_t size) { DEFINE_DMA_ATTRS(attrs); dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs); } +#ifndef dma_mmap_writecombine +#define dma_mmap_writecombine dma_mmap_wc +#endif #ifdef CONFIG_NEED_DMA_MAP_STATE #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index c2b340e23f62..6d9df3f7e334 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -713,6 +713,18 @@ static inline void __ftrace_enabled_restore(int enabled) #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5)) #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6)) +static inline unsigned long get_lock_parent_ip(void) +{ + unsigned long addr = CALLER_ADDR0; + + if (!in_lock_functions(addr)) + return addr; + addr = CALLER_ADDR1; + if (!in_lock_functions(addr)) + return addr; + return CALLER_ADDR2; +} + #ifdef CONFIG_IRQSOFF_TRACER extern void time_hardirqs_on(unsigned long a0, unsigned long a1); extern void time_hardirqs_off(unsigned long a0, unsigned long a1); diff --git a/include/linux/init.h b/include/linux/init.h index b449f378f995..aedb254abc37 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -142,6 +142,10 @@ void prepare_namespace(void); void __init load_default_modules(void); int __init init_rootfs(void); +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void); +#endif + extern void (*late_time_init)(void); extern bool initcall_debug; diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 24bea087e7af..afb45597fb5f 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -20,6 +20,7 @@ struct resource { resource_size_t end; const char *name; unsigned long flags; + unsigned long desc; struct resource *parent, *sibling, *child; }; @@ -49,12 +50,19 @@ struct resource { #define IORESOURCE_WINDOW 0x00200000 /* forwarded by bridge */ #define IORESOURCE_MUXED 0x00400000 /* Resource is software muxed */ +#define IORESOURCE_EXT_TYPE_BITS 0x01000000 /* Resource extended types */ +#define IORESOURCE_SYSRAM 0x01000000 /* System RAM (modifier) */ + #define IORESOURCE_EXCLUSIVE 0x08000000 /* Userland may not map this resource */ + #define IORESOURCE_DISABLED 0x10000000 #define IORESOURCE_UNSET 0x20000000 /* No address assigned yet */ #define IORESOURCE_AUTO 0x40000000 #define IORESOURCE_BUSY 0x80000000 /* Driver has marked this resource busy */ +/* I/O resource extended types */ +#define IORESOURCE_SYSTEM_RAM (IORESOURCE_MEM|IORESOURCE_SYSRAM) + /* PnP IRQ specific bits (IORESOURCE_BITS) */ #define IORESOURCE_IRQ_HIGHEDGE (1<<0) #define IORESOURCE_IRQ_LOWEDGE (1<<1) @@ -105,6 +113,22 @@ struct resource { /* PCI control bits. Shares IORESOURCE_BITS with above PCI ROM. */ #define IORESOURCE_PCI_FIXED (1<<4) /* Do not move resource */ +/* + * I/O Resource Descriptors + * + * Descriptors are used by walk_iomem_res_desc() and region_intersects() + * for searching a specific resource range in the iomem table. Assign + * a new descriptor when a resource range supports the search interfaces. + * Otherwise, resource.desc must be set to IORES_DESC_NONE (0). + */ +enum { + IORES_DESC_NONE = 0, + IORES_DESC_CRASH_KERNEL = 1, + IORES_DESC_ACPI_TABLES = 2, + IORES_DESC_ACPI_NV_STORAGE = 3, + IORES_DESC_PERSISTENT_MEMORY = 4, + IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5, +}; /* helpers to define resources */ #define DEFINE_RES_NAMED(_start, _size, _name, _flags) \ @@ -113,6 +137,7 @@ struct resource { .end = (_start) + (_size) - 1, \ .name = (_name), \ .flags = (_flags), \ + .desc = IORES_DESC_NONE, \ } #define DEFINE_RES_IO_NAMED(_start, _size, _name) \ @@ -170,6 +195,10 @@ static inline unsigned long resource_type(const struct resource *res) { return res->flags & IORESOURCE_TYPE_BITS; } +static inline unsigned long resource_ext_type(const struct resource *res) +{ + return res->flags & IORESOURCE_EXT_TYPE_BITS; +} /* True iff r1 completely contains r2 */ static inline bool resource_contains(struct resource *r1, struct resource *r2) { @@ -239,8 +268,8 @@ extern int walk_system_ram_res(u64 start, u64 end, void *arg, int (*func)(u64, u64, void *)); extern int -walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end, void *arg, - int (*func)(u64, u64, void *)); +walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end, + void *arg, int (*func)(u64, u64, void *)); /* True if any part of r1 overlaps r2 */ static inline bool resource_overlaps(struct resource *r1, struct resource *r2) diff --git a/include/linux/irq.h b/include/linux/irq.h index d5ebd94822d2..c4de62348ff2 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -140,7 +140,7 @@ struct irq_domain; * @ipi_offset: Offset of first IPI target cpu in @affinity. Optional. */ struct irq_common_data { - unsigned int state_use_accessors; + unsigned int __private state_use_accessors; #ifdef CONFIG_NUMA unsigned int node; #endif @@ -214,7 +214,7 @@ enum { IRQD_FORWARDED_TO_VCPU = (1 << 20), }; -#define __irqd_to_state(d) ((d)->common->state_use_accessors) +#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) static inline bool irqd_is_setaffinity_pending(struct irq_data *d) { @@ -305,6 +305,8 @@ static inline void irqd_clr_forwarded_to_vcpu(struct irq_data *d) __irqd_to_state(d) &= ~IRQD_FORWARDED_TO_VCPU; } +#undef __irqd_to_state + static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) { return d->hwirq; diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 4b9f85c963d0..0fdc798e3ff7 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -1,6 +1,7 @@ #ifndef _LINUX_KASAN_H #define _LINUX_KASAN_H +#include <linux/sched.h> #include <linux/types.h> struct kmem_cache; @@ -13,7 +14,6 @@ struct vm_struct; #include <asm/kasan.h> #include <asm/pgtable.h> -#include <linux/sched.h> extern unsigned char kasan_zero_page[PAGE_SIZE]; extern pte_t kasan_zero_pte[PTRS_PER_PTE]; @@ -43,6 +43,8 @@ static inline void kasan_disable_current(void) void kasan_unpoison_shadow(const void *address, size_t size); +void kasan_unpoison_task_stack(struct task_struct *task); + void kasan_alloc_pages(struct page *page, unsigned int order); void kasan_free_pages(struct page *page, unsigned int order); @@ -66,6 +68,8 @@ void kasan_free_shadow(const struct vm_struct *vm); static inline void kasan_unpoison_shadow(const void *address, size_t size) {} +static inline void kasan_unpoison_task_stack(struct task_struct *task) {} + static inline void kasan_enable_current(void) {} static inline void kasan_disable_current(void) {} diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 861f690aa791..5276fe0916fc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -25,6 +25,7 @@ #include <linux/irqflags.h> #include <linux/context_tracking.h> #include <linux/irqbypass.h> +#include <linux/swait.h> #include <asm/signal.h> #include <linux/kvm.h> @@ -218,7 +219,7 @@ struct kvm_vcpu { int fpu_active; int guest_fpu_loaded, guest_xcr0_loaded; unsigned char fpu_counter; - wait_queue_head_t wq; + struct swait_queue_head wq; struct pid *pid; int sigset_active; sigset_t sigset; @@ -782,7 +783,7 @@ static inline bool kvm_arch_has_assigned_device(struct kvm *kvm) } #endif -static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) +static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) { #ifdef __KVM_HAVE_ARCH_WQP return vcpu->arch.wqp; diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h index e23121f9d82a..59ccab297ae0 100644 --- a/include/linux/latencytop.h +++ b/include/linux/latencytop.h @@ -37,6 +37,9 @@ account_scheduler_latency(struct task_struct *task, int usecs, int inter) void clear_all_latency_tracing(struct task_struct *p); +extern int sysctl_latencytop(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); + #else static inline void diff --git a/include/linux/list.h b/include/linux/list.h index 30cf4200ab40..5356f4d661a7 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -113,17 +113,6 @@ extern void __list_del_entry(struct list_head *entry); extern void list_del(struct list_head *entry); #endif -#ifdef CONFIG_DEBUG_LIST -/* - * See devm_memremap_pages() which wants DEBUG_LIST=y to assert if one - * of the pages it allocates is ever passed to list_add() - */ -extern void list_force_poison(struct list_head *entry); -#else -/* fallback to the less strict LIST_POISON* definitions */ -#define list_force_poison list_del -#endif - /** * list_replace - replace old entry by new one * @old : the element to be replaced diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 4dca42fd32f5..d026b190c530 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -261,7 +261,6 @@ struct held_lock { /* * Initialization, self-test and debugging-output methods: */ -extern void lockdep_init(void); extern void lockdep_info(void); extern void lockdep_reset(void); extern void lockdep_reset_lock(struct lockdep_map *lock); @@ -392,7 +391,6 @@ static inline void lockdep_on(void) # define lockdep_set_current_reclaim_state(g) do { } while (0) # define lockdep_clear_current_reclaim_state() do { } while (0) # define lockdep_trace_alloc(g) do { } while (0) -# define lockdep_init() do { } while (0) # define lockdep_info() do { } while (0) # define lockdep_init_map(lock, name, key, sub) \ do { (void)(name); (void)(key); } while (0) diff --git a/include/linux/mm.h b/include/linux/mm.h index 516e14944339..3579d1e2fe3a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -387,7 +387,8 @@ enum { REGION_MIXED, }; -int region_intersects(resource_size_t offset, size_t size, const char *type); +int region_intersects(resource_size_t offset, size_t size, unsigned long flags, + unsigned long desc); /* Support for virtually mapped pages */ struct page *vmalloc_to_page(const void *addr); @@ -2138,6 +2139,8 @@ int remap_pfn_range(struct vm_area_struct *, unsigned long addr, int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn); +int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, pgprot_t pgprot); int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn); int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 624b78b848b8..944b2b37313b 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -566,10 +566,26 @@ static inline void clear_tlb_flush_pending(struct mm_struct *mm) } #endif -struct vm_special_mapping -{ - const char *name; +struct vm_fault; + +struct vm_special_mapping { + const char *name; /* The name, e.g. "[vdso]". */ + + /* + * If .fault is not provided, this points to a + * NULL-terminated array of pages that back the special mapping. + * + * This must not be NULL unless .fault is provided. + */ struct page **pages; + + /* + * If non-NULL, then this is called to resolve page faults + * on the special mapping. If used, .pages is not checked. + */ + int (*fault)(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, + struct vm_fault *vmf); }; enum tlb_flush_reason { diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f5c5a3fa2c81..79ec7bbf0155 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -468,6 +468,7 @@ struct perf_event { int group_flags; struct perf_event *group_leader; struct pmu *pmu; + void *pmu_private; enum perf_event_active_state state; unsigned int attach_state; @@ -1109,12 +1110,6 @@ static inline void perf_event_task_tick(void) { } static inline int perf_event_release_kernel(struct perf_event *event) { return 0; } #endif -#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL) -extern bool perf_event_can_stop_tick(void); -#else -static inline bool perf_event_can_stop_tick(void) { return true; } -#endif - #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) extern void perf_restore_debug_store(void); #else diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 907f3fd191ac..62d44c176071 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -128,9 +128,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer); void run_posix_cpu_timers(struct task_struct *task); void posix_cpu_timers_exit(struct task_struct *task); void posix_cpu_timers_exit_group(struct task_struct *task); - -bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk); - void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, cputime_t *newval, cputime_t *oldval); diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h index 54bf1484d41f..35ac903956c7 100644 --- a/include/linux/pps_kernel.h +++ b/include/linux/pps_kernel.h @@ -111,22 +111,17 @@ static inline void timespec_to_pps_ktime(struct pps_ktime *kt, kt->nsec = ts.tv_nsec; } -#ifdef CONFIG_NTP_PPS - static inline void pps_get_ts(struct pps_event_time *ts) { - ktime_get_raw_and_real_ts64(&ts->ts_raw, &ts->ts_real); -} + struct system_time_snapshot snap; -#else /* CONFIG_NTP_PPS */ - -static inline void pps_get_ts(struct pps_event_time *ts) -{ - ktime_get_real_ts64(&ts->ts_real); + ktime_get_snapshot(&snap); + ts->ts_real = ktime_to_timespec64(snap.real); +#ifdef CONFIG_NTP_PPS + ts->ts_raw = ktime_to_timespec64(snap.raw); +#endif } -#endif /* CONFIG_NTP_PPS */ - /* Subtract known time delay from PPS event time(s) */ static inline void pps_sub_ts(struct pps_event_time *ts, struct timespec64 delta) { diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index b8b73066d137..6b15e168148a 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -38,6 +38,7 @@ struct ptp_clock_request { }; }; +struct system_device_crosststamp; /** * struct ptp_clock_info - decribes a PTP hardware clock * @@ -67,6 +68,11 @@ struct ptp_clock_request { * @gettime64: Reads the current time from the hardware clock. * parameter ts: Holds the result. * + * @getcrosststamp: Reads the current time from the hardware clock and + * system clock simultaneously. + * parameter cts: Contains timestamp (device,system) pair, + * where system time is realtime and monotonic. + * * @settime64: Set the current time on the hardware clock. * parameter ts: Time value to set. * @@ -105,6 +111,8 @@ struct ptp_clock_info { int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta); int (*adjtime)(struct ptp_clock_info *ptp, s64 delta); int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts); + int (*getcrosststamp)(struct ptp_clock_info *ptp, + struct system_device_crosststamp *cts); int (*settime64)(struct ptp_clock_info *p, const struct timespec64 *ts); int (*enable)(struct ptp_clock_info *ptp, struct ptp_clock_request *request, int on); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 14e6f47ee16f..b5d48bd56e3f 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -360,8 +360,6 @@ void rcu_user_exit(void); #else static inline void rcu_user_enter(void) { } static inline void rcu_user_exit(void) { } -static inline void rcu_user_hooks_switch(struct task_struct *prev, - struct task_struct *next) { } #endif /* CONFIG_NO_HZ_FULL */ #ifdef CONFIG_RCU_NOCB_CPU diff --git a/include/linux/sched.h b/include/linux/sched.h index a10494a94cc3..c617ea12c6b7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -182,8 +182,6 @@ extern void update_cpu_load_nohz(int active); static inline void update_cpu_load_nohz(int active) { } #endif -extern unsigned long get_parent_ip(unsigned long addr); - extern void dump_cpu_task(int cpu); struct seq_file; @@ -719,6 +717,10 @@ struct signal_struct { /* Earliest-expiration cache. */ struct task_cputime cputime_expires; +#ifdef CONFIG_NO_HZ_FULL + unsigned long tick_dep_mask; +#endif + struct list_head cpu_timers[3]; struct pid *tty_old_pgrp; @@ -920,6 +922,10 @@ static inline int sched_info_on(void) #endif } +#ifdef CONFIG_SCHEDSTATS +void force_schedstat_enabled(void); +#endif + enum cpu_idle_type { CPU_IDLE, CPU_NOT_IDLE, @@ -1289,6 +1295,8 @@ struct sched_rt_entity { unsigned long timeout; unsigned long watchdog_stamp; unsigned int time_slice; + unsigned short on_rq; + unsigned short on_list; struct sched_rt_entity *back; #ifdef CONFIG_RT_GROUP_SCHED @@ -1329,10 +1337,6 @@ struct sched_dl_entity { * task has to wait for a replenishment to be performed at the * next firing of dl_timer. * - * @dl_new tells if a new instance arrived. If so we must - * start executing it with full runtime and reset its absolute - * deadline; - * * @dl_boosted tells if we are boosted due to DI. If so we are * outside bandwidth enforcement mechanism (but only until we * exit the critical section); @@ -1340,7 +1344,7 @@ struct sched_dl_entity { * @dl_yielded tells if task gave up the cpu before consuming * all its available runtime during the last job. */ - int dl_throttled, dl_new, dl_boosted, dl_yielded; + int dl_throttled, dl_boosted, dl_yielded; /* * Bandwidth enforcement timer. Each -deadline task has its @@ -1542,6 +1546,10 @@ struct task_struct { VTIME_SYS, } vtime_snap_whence; #endif + +#ifdef CONFIG_NO_HZ_FULL + unsigned long tick_dep_mask; +#endif unsigned long nvcsw, nivcsw; /* context switch counts */ u64 start_time; /* monotonic time in nsec */ u64 real_start_time; /* boot based time in nsec */ @@ -2356,10 +2364,7 @@ static inline void wake_up_nohz_cpu(int cpu) { } #endif #ifdef CONFIG_NO_HZ_FULL -extern bool sched_can_stop_tick(void); extern u64 scheduler_tick_max_deferment(void); -#else -static inline bool sched_can_stop_tick(void) { return false; } #endif #ifdef CONFIG_SCHED_AUTOGROUP diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index c9e4731cf10b..4f080ab4f2cd 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -95,4 +95,8 @@ extern int sysctl_numa_balancing(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +extern int sysctl_schedstats(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); + #endif /* _SCHED_SYSCTL_H */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index f5f80c5643ac..dc8eb63c6568 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -99,8 +99,23 @@ void process_srcu(struct work_struct *work); } /* - * define and init a srcu struct at build time. - * dont't call init_srcu_struct() nor cleanup_srcu_struct() on it. + * Define and initialize a srcu struct at build time. + * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it. + * + * Note that although DEFINE_STATIC_SRCU() hides the name from other + * files, the per-CPU variable rules nevertheless require that the + * chosen name be globally unique. These rules also prohibit use of + * DEFINE_STATIC_SRCU() within a function. If these rules are too + * restrictive, declare the srcu_struct manually. For example, in + * each file: + * + * static struct srcu_struct my_srcu; + * + * Then, before the first use of each my_srcu, manually initialize it: + * + * init_srcu_struct(&my_srcu); + * + * See include/linux/percpu-defs.h for the rules on per-CPU variables. */ #define __DEFINE_SRCU(name, is_static) \ static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\ diff --git a/include/linux/swait.h b/include/linux/swait.h new file mode 100644 index 000000000000..c1f9c62a8a50 --- /dev/null +++ b/include/linux/swait.h @@ -0,0 +1,172 @@ +#ifndef _LINUX_SWAIT_H +#define _LINUX_SWAIT_H + +#include <linux/list.h> +#include <linux/stddef.h> +#include <linux/spinlock.h> +#include <asm/current.h> + +/* + * Simple wait queues + * + * While these are very similar to the other/complex wait queues (wait.h) the + * most important difference is that the simple waitqueue allows for + * deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold + * times. + * + * In order to make this so, we had to drop a fair number of features of the + * other waitqueue code; notably: + * + * - mixing INTERRUPTIBLE and UNINTERRUPTIBLE sleeps on the same waitqueue; + * all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right + * sleeper state. + * + * - the exclusive mode; because this requires preserving the list order + * and this is hard. + * + * - custom wake functions; because you cannot give any guarantees about + * random code. + * + * As a side effect of this; the data structures are slimmer. + * + * One would recommend using this wait queue where possible. + */ + +struct task_struct; + +struct swait_queue_head { + raw_spinlock_t lock; + struct list_head task_list; +}; + +struct swait_queue { + struct task_struct *task; + struct list_head task_list; +}; + +#define __SWAITQUEUE_INITIALIZER(name) { \ + .task = current, \ + .task_list = LIST_HEAD_INIT((name).task_list), \ +} + +#define DECLARE_SWAITQUEUE(name) \ + struct swait_queue name = __SWAITQUEUE_INITIALIZER(name) + +#define __SWAIT_QUEUE_HEAD_INITIALIZER(name) { \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ + .task_list = LIST_HEAD_INIT((name).task_list), \ +} + +#define DECLARE_SWAIT_QUEUE_HEAD(name) \ + struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INITIALIZER(name) + +extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name, + struct lock_class_key *key); + +#define init_swait_queue_head(q) \ + do { \ + static struct lock_class_key __key; \ + __init_swait_queue_head((q), #q, &__key); \ + } while (0) + +#ifdef CONFIG_LOCKDEP +# define __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name) \ + ({ init_swait_queue_head(&name); name; }) +# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name) \ + struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name) +#else +# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name) \ + DECLARE_SWAIT_QUEUE_HEAD(name) +#endif + +static inline int swait_active(struct swait_queue_head *q) +{ + return !list_empty(&q->task_list); +} + +extern void swake_up(struct swait_queue_head *q); +extern void swake_up_all(struct swait_queue_head *q); +extern void swake_up_locked(struct swait_queue_head *q); + +extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); +extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state); +extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state); + +extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait); +extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait); + +/* as per ___wait_event() but for swait, therefore "exclusive == 0" */ +#define ___swait_event(wq, condition, state, ret, cmd) \ +({ \ + struct swait_queue __wait; \ + long __ret = ret; \ + \ + INIT_LIST_HEAD(&__wait.task_list); \ + for (;;) { \ + long __int = prepare_to_swait_event(&wq, &__wait, state);\ + \ + if (condition) \ + break; \ + \ + if (___wait_is_interruptible(state) && __int) { \ + __ret = __int; \ + break; \ + } \ + \ + cmd; \ + } \ + finish_swait(&wq, &__wait); \ + __ret; \ +}) + +#define __swait_event(wq, condition) \ + (void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, \ + schedule()) + +#define swait_event(wq, condition) \ +do { \ + if (condition) \ + break; \ + __swait_event(wq, condition); \ +} while (0) + +#define __swait_event_timeout(wq, condition, timeout) \ + ___swait_event(wq, ___wait_cond_timeout(condition), \ + TASK_UNINTERRUPTIBLE, timeout, \ + __ret = schedule_timeout(__ret)) + +#define swait_event_timeout(wq, condition, timeout) \ +({ \ + long __ret = timeout; \ + if (!___wait_cond_timeout(condition)) \ + __ret = __swait_event_timeout(wq, condition, timeout); \ + __ret; \ +}) + +#define __swait_event_interruptible(wq, condition) \ + ___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0, \ + schedule()) + +#define swait_event_interruptible(wq, condition) \ +({ \ + int __ret = 0; \ + if (!(condition)) \ + __ret = __swait_event_interruptible(wq, condition); \ + __ret; \ +}) + +#define __swait_event_interruptible_timeout(wq, condition, timeout) \ + ___swait_event(wq, ___wait_cond_timeout(condition), \ + TASK_INTERRUPTIBLE, timeout, \ + __ret = schedule_timeout(__ret)) + +#define swait_event_interruptible_timeout(wq, condition, timeout) \ +({ \ + long __ret = timeout; \ + if (!___wait_cond_timeout(condition)) \ + __ret = __swait_event_interruptible_timeout(wq, \ + condition, timeout); \ + __ret; \ +}) + +#endif /* _LINUX_SWAIT_H */ diff --git a/include/linux/tick.h b/include/linux/tick.h index 97fd4e543846..21f73649a4dc 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -97,6 +97,19 @@ static inline void tick_broadcast_exit(void) tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT); } +enum tick_dep_bits { + TICK_DEP_BIT_POSIX_TIMER = 0, + TICK_DEP_BIT_PERF_EVENTS = 1, + TICK_DEP_BIT_SCHED = 2, + TICK_DEP_BIT_CLOCK_UNSTABLE = 3 +}; + +#define TICK_DEP_MASK_NONE 0 +#define TICK_DEP_MASK_POSIX_TIMER (1 << TICK_DEP_BIT_POSIX_TIMER) +#define TICK_DEP_MASK_PERF_EVENTS (1 << TICK_DEP_BIT_PERF_EVENTS) +#define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED) +#define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE) + #ifdef CONFIG_NO_HZ_COMMON extern int tick_nohz_enabled; extern int tick_nohz_tick_stopped(void); @@ -154,9 +167,73 @@ static inline int housekeeping_any_cpu(void) return cpumask_any_and(housekeeping_mask, cpu_online_mask); } -extern void tick_nohz_full_kick(void); +extern void tick_nohz_dep_set(enum tick_dep_bits bit); +extern void tick_nohz_dep_clear(enum tick_dep_bits bit); +extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit); +extern void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit); +extern void tick_nohz_dep_set_task(struct task_struct *tsk, + enum tick_dep_bits bit); +extern void tick_nohz_dep_clear_task(struct task_struct *tsk, + enum tick_dep_bits bit); +extern void tick_nohz_dep_set_signal(struct signal_struct *signal, + enum tick_dep_bits bit); +extern void tick_nohz_dep_clear_signal(struct signal_struct *signal, + enum tick_dep_bits bit); + +/* + * The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases + * on top of static keys. + */ +static inline void tick_dep_set(enum tick_dep_bits bit) +{ + if (tick_nohz_full_enabled()) + tick_nohz_dep_set(bit); +} + +static inline void tick_dep_clear(enum tick_dep_bits bit) +{ + if (tick_nohz_full_enabled()) + tick_nohz_dep_clear(bit); +} + +static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) +{ + if (tick_nohz_full_cpu(cpu)) + tick_nohz_dep_set_cpu(cpu, bit); +} + +static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) +{ + if (tick_nohz_full_cpu(cpu)) + tick_nohz_dep_clear_cpu(cpu, bit); +} + +static inline void tick_dep_set_task(struct task_struct *tsk, + enum tick_dep_bits bit) +{ + if (tick_nohz_full_enabled()) + tick_nohz_dep_set_task(tsk, bit); +} +static inline void tick_dep_clear_task(struct task_struct *tsk, + enum tick_dep_bits bit) +{ + if (tick_nohz_full_enabled()) + tick_nohz_dep_clear_task(tsk, bit); +} +static inline void tick_dep_set_signal(struct signal_struct *signal, + enum tick_dep_bits bit) +{ + if (tick_nohz_full_enabled()) + tick_nohz_dep_set_signal(signal, bit); +} +static inline void tick_dep_clear_signal(struct signal_struct *signal, + enum tick_dep_bits bit) +{ + if (tick_nohz_full_enabled()) + tick_nohz_dep_clear_signal(signal, bit); +} + extern void tick_nohz_full_kick_cpu(int cpu); -extern void tick_nohz_full_kick_all(void); extern void __tick_nohz_task_switch(void); #else static inline int housekeeping_any_cpu(void) @@ -166,9 +243,21 @@ static inline int housekeeping_any_cpu(void) static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } + +static inline void tick_dep_set(enum tick_dep_bits bit) { } +static inline void tick_dep_clear(enum tick_dep_bits bit) { } +static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { } +static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { } +static inline void tick_dep_set_task(struct task_struct *tsk, + enum tick_dep_bits bit) { } +static inline void tick_dep_clear_task(struct task_struct *tsk, + enum tick_dep_bits bit) { } +static inline void tick_dep_set_signal(struct signal_struct *signal, + enum tick_dep_bits bit) { } +static inline void tick_dep_clear_signal(struct signal_struct *signal, + enum tick_dep_bits bit) { } + static inline void tick_nohz_full_kick_cpu(int cpu) { } -static inline void tick_nohz_full_kick(void) { } -static inline void tick_nohz_full_kick_all(void) { } static inline void __tick_nohz_task_switch(void) { } #endif diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 25247220b4b7..e88005459035 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -50,6 +50,7 @@ struct tk_read_base { * @offs_tai: Offset clock monotonic -> clock tai * @tai_offset: The current UTC to TAI offset in seconds * @clock_was_set_seq: The sequence number of clock was set events + * @cs_was_changed_seq: The sequence number of clocksource change events * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second * @raw_time: Monotonic raw base time in timespec64 format * @cycle_interval: Number of clock cycles in one NTP interval @@ -91,6 +92,7 @@ struct timekeeper { ktime_t offs_tai; s32 tai_offset; unsigned int clock_was_set_seq; + u8 cs_was_changed_seq; ktime_t next_leap_ktime; struct timespec64 raw_time; diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index ec89d846324c..96f37bee3bc1 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -267,6 +267,64 @@ extern void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw, struct timespec64 *ts_real); /* + * struct system_time_snapshot - simultaneous raw/real time capture with + * counter value + * @cycles: Clocksource counter value to produce the system times + * @real: Realtime system time + * @raw: Monotonic raw system time + * @clock_was_set_seq: The sequence number of clock was set events + * @cs_was_changed_seq: The sequence number of clocksource change events + */ +struct system_time_snapshot { + cycle_t cycles; + ktime_t real; + ktime_t raw; + unsigned int clock_was_set_seq; + u8 cs_was_changed_seq; +}; + +/* + * struct system_device_crosststamp - system/device cross-timestamp + * (syncronized capture) + * @device: Device time + * @sys_realtime: Realtime simultaneous with device time + * @sys_monoraw: Monotonic raw simultaneous with device time + */ +struct system_device_crosststamp { + ktime_t device; + ktime_t sys_realtime; + ktime_t sys_monoraw; +}; + +/* + * struct system_counterval_t - system counter value with the pointer to the + * corresponding clocksource + * @cycles: System counter value + * @cs: Clocksource corresponding to system counter value. Used by + * timekeeping code to verify comparibility of two cycle values + */ +struct system_counterval_t { + cycle_t cycles; + struct clocksource *cs; +}; + +/* + * Get cross timestamp between system clock and device clock + */ +extern int get_device_system_crosststamp( + int (*get_time_fn)(ktime_t *device_time, + struct system_counterval_t *system_counterval, + void *ctx), + void *ctx, + struct system_time_snapshot *history, + struct system_device_crosststamp *xtstamp); + +/* + * Simultaneously snapshot realtime and monotonic raw clocks + */ +extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot); + +/* * Persistent clock related interfaces */ extern int persistent_clock_is_local; diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index acfdbf353a0b..be586c632a0c 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -134,9 +134,6 @@ extern void syscall_unregfunc(void); void *it_func; \ void *__data; \ \ - if (!cpu_online(raw_smp_processor_id())) \ - return; \ - \ if (!(cond)) \ return; \ prercu; \ @@ -343,15 +340,19 @@ extern void syscall_unregfunc(void); * "void *__data, proto" as the callback prototype. */ #define DECLARE_TRACE_NOARGS(name) \ - __DECLARE_TRACE(name, void, , 1, void *__data, __data) + __DECLARE_TRACE(name, void, , \ + cpu_online(raw_smp_processor_id()), \ + void *__data, __data) #define DECLARE_TRACE(name, proto, args) \ - __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), 1, \ - PARAMS(void *__data, proto), \ - PARAMS(__data, args)) + __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ + cpu_online(raw_smp_processor_id()), \ + PARAMS(void *__data, proto), \ + PARAMS(__data, args)) #define DECLARE_TRACE_CONDITION(name, proto, args, cond) \ - __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), PARAMS(cond), \ + __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ + cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \ PARAMS(void *__data, proto), \ PARAMS(__data, args)) diff --git a/include/linux/wait.h b/include/linux/wait.h index ae71a769b89e..27d7a0ab5da3 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -338,7 +338,7 @@ do { \ schedule(); try_to_freeze()) /** - * wait_event - sleep (or freeze) until a condition gets true + * wait_event_freezable - sleep (or freeze) until a condition gets true * @wq: the waitqueue to wait on * @condition: a C expression for the event to wait for * |