diff options
Diffstat (limited to 'include/linux')
47 files changed, 818 insertions, 151 deletions
diff --git a/include/linux/ata.h b/include/linux/ata.h index d2992bfa1706..c1a2f345cbe6 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -487,8 +487,8 @@ enum ata_tf_protocols { }; enum ata_ioctls { - ATA_IOC_GET_IO32 = 0x309, - ATA_IOC_SET_IO32 = 0x324, + ATA_IOC_GET_IO32 = 0x309, /* HDIO_GET_32BIT */ + ATA_IOC_SET_IO32 = 0x324, /* HDIO_SET_32BIT */ }; /* core structures */ diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 301de78d65f7..6c502cb13c95 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -548,6 +548,27 @@ static inline int atomic_dec_if_positive(atomic_t *v) } #endif +/** + * fetch_or - perform *ptr |= mask and return old value of *ptr + * @ptr: pointer to value + * @mask: mask to OR on the value + * + * cmpxchg based fetch_or, macro so it works for different integer types + */ +#ifndef fetch_or +#define fetch_or(ptr, mask) \ +({ typeof(*(ptr)) __old, __val = *(ptr); \ + for (;;) { \ + __old = cmpxchg((ptr), __val, __val | (mask)); \ + if (__old == __val) \ + break; \ + __val = __old; \ + } \ + __old; \ +}) +#endif + + #ifdef CONFIG_GENERIC_ATOMIC64 #include <asm-generic/atomic64.h> #endif diff --git a/include/linux/bio.h b/include/linux/bio.h index 5349e6816cbb..88bc64f00bb5 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -310,6 +310,38 @@ static inline void bio_clear_flag(struct bio *bio, unsigned int bit) bio->bi_flags &= ~(1U << bit); } +static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv) +{ + *bv = bio_iovec(bio); +} + +static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv) +{ + struct bvec_iter iter = bio->bi_iter; + int idx; + + if (unlikely(!bio_multiple_segments(bio))) { + *bv = bio_iovec(bio); + return; + } + + bio_advance_iter(bio, &iter, iter.bi_size); + + if (!iter.bi_bvec_done) + idx = iter.bi_idx - 1; + else /* in the middle of bvec */ + idx = iter.bi_idx; + + *bv = bio->bi_io_vec[idx]; + + /* + * iter.bi_bvec_done records actual length of the last bvec + * if this bio ends in the middle of one io vector + */ + if (iter.bi_bvec_done) + bv->bv_len = iter.bi_bvec_done; +} + enum bip_flags { BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4571ef1a12a9..413c84fbc4ed 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -895,7 +895,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq) { struct request_queue *q = rq->q; - if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC)) + if (unlikely(rq->cmd_type != REQ_TYPE_FS)) return q->limits.max_hw_sectors; if (!q->limits.chunk_sectors || (rq->cmd_flags & REQ_DISCARD)) @@ -1372,6 +1372,13 @@ static inline void put_dev_sector(Sector p) page_cache_release(p.v); } +static inline bool __bvec_gap_to_prev(struct request_queue *q, + struct bio_vec *bprv, unsigned int offset) +{ + return offset || + ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q)); +} + /* * Check if adding a bio_vec after bprv with offset would create a gap in * the SG list. Most drivers don't care about this, but some do. @@ -1381,18 +1388,22 @@ static inline bool bvec_gap_to_prev(struct request_queue *q, { if (!queue_virt_boundary(q)) return false; - return offset || - ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q)); + return __bvec_gap_to_prev(q, bprv, offset); } static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, struct bio *next) { - if (!bio_has_data(prev)) - return false; + if (bio_has_data(prev) && queue_virt_boundary(q)) { + struct bio_vec pb, nb; + + bio_get_last_bvec(prev, &pb); + bio_get_first_bvec(next, &nb); - return bvec_gap_to_prev(q, &prev->bi_io_vec[prev->bi_vcnt - 1], - next->bi_io_vec[0].bv_offset); + return __bvec_gap_to_prev(q, &pb, nb.bv_offset); + } + + return false; } static inline bool req_gap_back_merge(struct request *req, struct bio *bio) diff --git a/include/linux/cache.h b/include/linux/cache.h index 17e7e82d2aa7..1be04f8c563a 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -12,10 +12,24 @@ #define SMP_CACHE_BYTES L1_CACHE_BYTES #endif +/* + * __read_mostly is used to keep rarely changing variables out of frequently + * updated cachelines. If an architecture doesn't support it, ignore the + * hint. + */ #ifndef __read_mostly #define __read_mostly #endif +/* + * __ro_after_init is used to mark things that are read-only after init (i.e. + * after mark_rodata_ro() has been called). These are effectively read-only, + * but may get written to during init, so can't live in .rodata (via "const"). + */ +#ifndef __ro_after_init +#define __ro_after_init __attribute__((__section__(".data..ro_after_init"))) +#endif + #ifndef ____cacheline_aligned #define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) #endif diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index c1ef6f14e7be..15151f3c4120 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -75,6 +75,7 @@ #define CEPH_FEATURE_CRUSH_TUNABLES5 (1ULL<<58) /* chooseleaf stable mode */ // duplicated since it was introduced at the same time as CEPH_FEATURE_CRUSH_TUNABLES5 #define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING (1ULL<<58) /* New, v7 encoding */ +#define CEPH_FEATURE_FS_FILE_LAYOUT_V2 (1ULL<<58) /* file_layout_t */ /* * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index bdcf358dfce2..0d442e34c349 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -190,9 +190,9 @@ extern void clockevents_config_and_register(struct clock_event_device *dev, extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq); static inline void -clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec) +clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 maxsec) { - return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, freq, minsec); + return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, freq, maxsec); } extern void clockevents_suspend(void); diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 6013021a3b39..a307bf62974f 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -118,6 +118,23 @@ struct clocksource { /* simplify initialization of mask field */ #define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1) +static inline u32 clocksource_freq2mult(u32 freq, u32 shift_constant, u64 from) +{ + /* freq = cyc/from + * mult/2^shift = ns/cyc + * mult = ns/cyc * 2^shift + * mult = from/freq * 2^shift + * mult = from * 2^shift / freq + * mult = (from<<shift) / freq + */ + u64 tmp = ((u64)from) << shift_constant; + + tmp += freq/2; /* round for do_div */ + do_div(tmp, freq); + + return (u32)tmp; +} + /** * clocksource_khz2mult - calculates mult from khz and shift * @khz: Clocksource frequency in KHz @@ -128,19 +145,7 @@ struct clocksource { */ static inline u32 clocksource_khz2mult(u32 khz, u32 shift_constant) { - /* khz = cyc/(Million ns) - * mult/2^shift = ns/cyc - * mult = ns/cyc * 2^shift - * mult = 1Million/khz * 2^shift - * mult = 1000000 * 2^shift / khz - * mult = (1000000<<shift) / khz - */ - u64 tmp = ((u64)1000000) << shift_constant; - - tmp += khz/2; /* round for do_div */ - do_div(tmp, khz); - - return (u32)tmp; + return clocksource_freq2mult(khz, shift_constant, NSEC_PER_MSEC); } /** @@ -154,19 +159,7 @@ static inline u32 clocksource_khz2mult(u32 khz, u32 shift_constant) */ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant) { - /* hz = cyc/(Billion ns) - * mult/2^shift = ns/cyc - * mult = ns/cyc * 2^shift - * mult = 1Billion/hz * 2^shift - * mult = 1000000000 * 2^shift / hz - * mult = (1000000000<<shift) / hz - */ - u64 tmp = ((u64)1000000000) << shift_constant; - - tmp += hz/2; /* round for do_div */ - do_div(tmp, hz); - - return (u32)tmp; + return clocksource_freq2mult(hz, shift_constant, NSEC_PER_SEC); } /** diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 48f5aab117ae..b5ff9881bef8 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -20,12 +20,14 @@ # define __pmem __attribute__((noderef, address_space(5))) #ifdef CONFIG_SPARSE_RCU_POINTER # define __rcu __attribute__((noderef, address_space(4))) -#else +#else /* CONFIG_SPARSE_RCU_POINTER */ # define __rcu -#endif +#endif /* CONFIG_SPARSE_RCU_POINTER */ +# define __private __attribute__((noderef)) extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); -#else +# define ACCESS_PRIVATE(p, member) (*((typeof((p)->member) __force *) &(p)->member)) +#else /* __CHECKER__ */ # define __user # define __kernel # define __safe @@ -44,7 +46,9 @@ extern void __chk_io_ptr(const volatile void __iomem *); # define __percpu # define __rcu # define __pmem -#endif +# define __private +# define ACCESS_PRIVATE(p, member) ((p)->member) +#endif /* __CHECKER__ */ /* Indirect macros required for expanded argument pasting, eg. __LINE__. */ #define ___PASTE(a,b) a##b @@ -263,8 +267,9 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * In contrast to ACCESS_ONCE these two macros will also work on aggregate * data types like structs or unions. If the size of the accessed data * type exceeds the word size of the machine (e.g., 32 bits or 64 bits) - * READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a - * compile-time warning. + * READ_ONCE() and WRITE_ONCE() will fall back to memcpy(). There's at + * least two memcpy()s: one for the __builtin_memcpy() and then one for + * the macro doing the copy of variable - '__u' allocated on the stack. * * Their two major use cases are: (1) Mediating communication between * process-level code and irq/NMI handlers, all running on the same CPU, diff --git a/include/linux/cpu.h b/include/linux/cpu.h index d2ca8c38f9c4..f9b1fab4388a 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -16,6 +16,7 @@ #include <linux/node.h> #include <linux/compiler.h> #include <linux/cpumask.h> +#include <linux/cpuhotplug.h> struct device; struct device_node; @@ -27,6 +28,9 @@ struct cpu { struct device dev; }; +extern void boot_cpu_init(void); +extern void boot_cpu_state_init(void); + extern int register_cpu(struct cpu *cpu, int num); extern struct device *get_cpu_device(unsigned cpu); extern bool cpu_is_hotpluggable(unsigned cpu); @@ -74,7 +78,7 @@ enum { /* migration should happen before other stuff but after perf */ CPU_PRI_PERF = 20, CPU_PRI_MIGRATION = 10, - CPU_PRI_SMPBOOT = 9, + /* bring up workqueues before normal notifiers and down after */ CPU_PRI_WORKQUEUE_UP = 5, CPU_PRI_WORKQUEUE_DOWN = -5, @@ -97,9 +101,7 @@ enum { * Called on the new cpu, just before * enabling interrupts. Must not sleep, * must not fail */ -#define CPU_DYING_IDLE 0x000B /* CPU (unsigned)v dying, reached - * idle loop. */ -#define CPU_BROKEN 0x000C /* CPU (unsigned)v did not die properly, +#define CPU_BROKEN 0x000B /* CPU (unsigned)v did not die properly, * perhaps due to preemption. */ /* Used for CPU hotplug events occurring while tasks are frozen due to a suspend @@ -118,6 +120,7 @@ enum { #ifdef CONFIG_SMP +extern bool cpuhp_tasks_frozen; /* Need to know about CPUs going up/down? */ #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) #define cpu_notifier(fn, pri) { \ @@ -167,7 +170,6 @@ static inline void __unregister_cpu_notifier(struct notifier_block *nb) } #endif -void smpboot_thread_init(void); int cpu_up(unsigned int cpu); void notify_cpu_starting(unsigned int cpu); extern void cpu_maps_update_begin(void); @@ -177,6 +179,7 @@ extern void cpu_maps_update_done(void); #define cpu_notifier_register_done cpu_maps_update_done #else /* CONFIG_SMP */ +#define cpuhp_tasks_frozen 0 #define cpu_notifier(fn, pri) do { (void)(fn); } while (0) #define __cpu_notifier(fn, pri) do { (void)(fn); } while (0) @@ -215,10 +218,6 @@ static inline void cpu_notifier_register_done(void) { } -static inline void smpboot_thread_init(void) -{ -} - #endif /* CONFIG_SMP */ extern struct bus_type cpu_subsys; @@ -265,11 +264,6 @@ static inline int disable_nonboot_cpus(void) { return 0; } static inline void enable_nonboot_cpus(void) {} #endif /* !CONFIG_PM_SLEEP_SMP */ -enum cpuhp_state { - CPUHP_OFFLINE, - CPUHP_ONLINE, -}; - void cpu_startup_entry(enum cpuhp_state state); void cpu_idle_poll_ctrl(bool enable); @@ -280,14 +274,15 @@ void arch_cpu_idle_enter(void); void arch_cpu_idle_exit(void); void arch_cpu_idle_dead(void); -DECLARE_PER_CPU(bool, cpu_dead_idle); - int cpu_report_state(int cpu); int cpu_check_up_prepare(int cpu); void cpu_set_state_online(int cpu); #ifdef CONFIG_HOTPLUG_CPU bool cpu_wait_death(unsigned int cpu, int seconds); bool cpu_report_death(void); +void cpuhp_report_idle_dead(void); +#else +static inline void cpuhp_report_idle_dead(void) { } #endif /* #ifdef CONFIG_HOTPLUG_CPU */ #endif /* _LINUX_CPU_H_ */ diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h new file mode 100644 index 000000000000..5d68e15e46b7 --- /dev/null +++ b/include/linux/cpuhotplug.h @@ -0,0 +1,93 @@ +#ifndef __CPUHOTPLUG_H +#define __CPUHOTPLUG_H + +enum cpuhp_state { + CPUHP_OFFLINE, + CPUHP_CREATE_THREADS, + CPUHP_NOTIFY_PREPARE, + CPUHP_BRINGUP_CPU, + CPUHP_AP_IDLE_DEAD, + CPUHP_AP_OFFLINE, + CPUHP_AP_NOTIFY_STARTING, + CPUHP_AP_ONLINE, + CPUHP_TEARDOWN_CPU, + CPUHP_AP_ONLINE_IDLE, + CPUHP_AP_SMPBOOT_THREADS, + CPUHP_AP_NOTIFY_ONLINE, + CPUHP_AP_ONLINE_DYN, + CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30, + CPUHP_ONLINE, +}; + +int __cpuhp_setup_state(enum cpuhp_state state, const char *name, bool invoke, + int (*startup)(unsigned int cpu), + int (*teardown)(unsigned int cpu)); + +/** + * cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks + * @state: The state for which the calls are installed + * @name: Name of the callback (will be used in debug output) + * @startup: startup callback function + * @teardown: teardown callback function + * + * Installs the callback functions and invokes the startup callback on + * the present cpus which have already reached the @state. + */ +static inline int cpuhp_setup_state(enum cpuhp_state state, + const char *name, + int (*startup)(unsigned int cpu), + int (*teardown)(unsigned int cpu)) +{ + return __cpuhp_setup_state(state, name, true, startup, teardown); +} + +/** + * cpuhp_setup_state_nocalls - Setup hotplug state callbacks without calling the + * callbacks + * @state: The state for which the calls are installed + * @name: Name of the callback. + * @startup: startup callback function + * @teardown: teardown callback function + * + * Same as @cpuhp_setup_state except that no calls are executed are invoked + * during installation of this callback. NOP if SMP=n or HOTPLUG_CPU=n. + */ +static inline int cpuhp_setup_state_nocalls(enum cpuhp_state state, + const char *name, + int (*startup)(unsigned int cpu), + int (*teardown)(unsigned int cpu)) +{ + return __cpuhp_setup_state(state, name, false, startup, teardown); +} + +void __cpuhp_remove_state(enum cpuhp_state state, bool invoke); + +/** + * cpuhp_remove_state - Remove hotplug state callbacks and invoke the teardown + * @state: The state for which the calls are removed + * + * Removes the callback functions and invokes the teardown callback on + * the present cpus which have already reached the @state. + */ +static inline void cpuhp_remove_state(enum cpuhp_state state) +{ + __cpuhp_remove_state(state, true); +} + +/** + * cpuhp_remove_state_nocalls - Remove hotplug state callbacks without invoking + * teardown + * @state: The state for which the calls are removed + */ +static inline void cpuhp_remove_state_nocalls(enum cpuhp_state state) +{ + __cpuhp_remove_state(state, false); +} + +#ifdef CONFIG_SMP +void cpuhp_online_idle(enum cpuhp_state state); +#else +static inline void cpuhp_online_idle(enum cpuhp_state state) { } +#endif + +#endif diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 7781ce110503..c4b5f4b3f8f8 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -409,9 +409,7 @@ static inline bool d_mountpoint(const struct dentry *dentry) */ static inline unsigned __d_entry_type(const struct dentry *dentry) { - unsigned type = READ_ONCE(dentry->d_flags); - smp_rmb(); - return type & DCACHE_ENTRY_TYPE; + return dentry->d_flags & DCACHE_ENTRY_TYPE; } static inline bool d_is_miss(const struct dentry *dentry) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 75857cda38e9..5e45cf930a3f 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -386,7 +386,7 @@ static inline void dma_free_attrs(struct device *dev, size_t size, if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) return; - if (!ops->free) + if (!ops->free || !cpu_addr) return; debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); @@ -641,31 +641,40 @@ static inline void dmam_release_declared_memory(struct device *dev) } #endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */ -static inline void *dma_alloc_writecombine(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t gfp) +static inline void *dma_alloc_wc(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t gfp) { DEFINE_DMA_ATTRS(attrs); dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); return dma_alloc_attrs(dev, size, dma_addr, gfp, &attrs); } +#ifndef dma_alloc_writecombine +#define dma_alloc_writecombine dma_alloc_wc +#endif -static inline void dma_free_writecombine(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_addr) +static inline void dma_free_wc(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_addr) { DEFINE_DMA_ATTRS(attrs); dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); return dma_free_attrs(dev, size, cpu_addr, dma_addr, &attrs); } +#ifndef dma_free_writecombine +#define dma_free_writecombine dma_free_wc +#endif -static inline int dma_mmap_writecombine(struct device *dev, - struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, - size_t size) +static inline int dma_mmap_wc(struct device *dev, + struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, + size_t size) { DEFINE_DMA_ATTRS(attrs); dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs); } +#ifndef dma_mmap_writecombine +#define dma_mmap_writecombine dma_mmap_wc +#endif #ifdef CONFIG_NEED_DMA_MAP_STATE #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index c2b340e23f62..6d9df3f7e334 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -713,6 +713,18 @@ static inline void __ftrace_enabled_restore(int enabled) #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5)) #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6)) +static inline unsigned long get_lock_parent_ip(void) +{ + unsigned long addr = CALLER_ADDR0; + + if (!in_lock_functions(addr)) + return addr; + addr = CALLER_ADDR1; + if (!in_lock_functions(addr)) + return addr; + return CALLER_ADDR2; +} + #ifdef CONFIG_IRQSOFF_TRACER extern void time_hardirqs_on(unsigned long a0, unsigned long a1); extern void time_hardirqs_off(unsigned long a0, unsigned long a1); diff --git a/include/linux/init.h b/include/linux/init.h index b449f378f995..aedb254abc37 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -142,6 +142,10 @@ void prepare_namespace(void); void __init load_default_modules(void); int __init init_rootfs(void); +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void); +#endif + extern void (*late_time_init)(void); extern bool initcall_debug; diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 24bea087e7af..afb45597fb5f 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -20,6 +20,7 @@ struct resource { resource_size_t end; const char *name; unsigned long flags; + unsigned long desc; struct resource *parent, *sibling, *child; }; @@ -49,12 +50,19 @@ struct resource { #define IORESOURCE_WINDOW 0x00200000 /* forwarded by bridge */ #define IORESOURCE_MUXED 0x00400000 /* Resource is software muxed */ +#define IORESOURCE_EXT_TYPE_BITS 0x01000000 /* Resource extended types */ +#define IORESOURCE_SYSRAM 0x01000000 /* System RAM (modifier) */ + #define IORESOURCE_EXCLUSIVE 0x08000000 /* Userland may not map this resource */ + #define IORESOURCE_DISABLED 0x10000000 #define IORESOURCE_UNSET 0x20000000 /* No address assigned yet */ #define IORESOURCE_AUTO 0x40000000 #define IORESOURCE_BUSY 0x80000000 /* Driver has marked this resource busy */ +/* I/O resource extended types */ +#define IORESOURCE_SYSTEM_RAM (IORESOURCE_MEM|IORESOURCE_SYSRAM) + /* PnP IRQ specific bits (IORESOURCE_BITS) */ #define IORESOURCE_IRQ_HIGHEDGE (1<<0) #define IORESOURCE_IRQ_LOWEDGE (1<<1) @@ -105,6 +113,22 @@ struct resource { /* PCI control bits. Shares IORESOURCE_BITS with above PCI ROM. */ #define IORESOURCE_PCI_FIXED (1<<4) /* Do not move resource */ +/* + * I/O Resource Descriptors + * + * Descriptors are used by walk_iomem_res_desc() and region_intersects() + * for searching a specific resource range in the iomem table. Assign + * a new descriptor when a resource range supports the search interfaces. + * Otherwise, resource.desc must be set to IORES_DESC_NONE (0). + */ +enum { + IORES_DESC_NONE = 0, + IORES_DESC_CRASH_KERNEL = 1, + IORES_DESC_ACPI_TABLES = 2, + IORES_DESC_ACPI_NV_STORAGE = 3, + IORES_DESC_PERSISTENT_MEMORY = 4, + IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5, +}; /* helpers to define resources */ #define DEFINE_RES_NAMED(_start, _size, _name, _flags) \ @@ -113,6 +137,7 @@ struct resource { .end = (_start) + (_size) - 1, \ .name = (_name), \ .flags = (_flags), \ + .desc = IORES_DESC_NONE, \ } #define DEFINE_RES_IO_NAMED(_start, _size, _name) \ @@ -170,6 +195,10 @@ static inline unsigned long resource_type(const struct resource *res) { return res->flags & IORESOURCE_TYPE_BITS; } +static inline unsigned long resource_ext_type(const struct resource *res) +{ + return res->flags & IORESOURCE_EXT_TYPE_BITS; +} /* True iff r1 completely contains r2 */ static inline bool resource_contains(struct resource *r1, struct resource *r2) { @@ -239,8 +268,8 @@ extern int walk_system_ram_res(u64 start, u64 end, void *arg, int (*func)(u64, u64, void *)); extern int -walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end, void *arg, - int (*func)(u64, u64, void *)); +walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end, + void *arg, int (*func)(u64, u64, void *)); /* True if any part of r1 overlaps r2 */ static inline bool resource_overlaps(struct resource *r1, struct resource *r2) diff --git a/include/linux/irq.h b/include/linux/irq.h index 3c1c96786248..c4de62348ff2 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -133,17 +133,23 @@ struct irq_domain; * Use accessor functions to deal with it * @node: node index useful for balancing * @handler_data: per-IRQ data for the irq_chip methods - * @affinity: IRQ affinity on SMP + * @affinity: IRQ affinity on SMP. If this is an IPI + * related irq, then this is the mask of the + * CPUs to which an IPI can be sent. * @msi_desc: MSI descriptor + * @ipi_offset: Offset of first IPI target cpu in @affinity. Optional. */ struct irq_common_data { - unsigned int state_use_accessors; + unsigned int __private state_use_accessors; #ifdef CONFIG_NUMA unsigned int node; #endif void *handler_data; struct msi_desc *msi_desc; cpumask_var_t affinity; +#ifdef CONFIG_GENERIC_IRQ_IPI + unsigned int ipi_offset; +#endif }; /** @@ -208,7 +214,7 @@ enum { IRQD_FORWARDED_TO_VCPU = (1 << 20), }; -#define __irqd_to_state(d) ((d)->common->state_use_accessors) +#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) static inline bool irqd_is_setaffinity_pending(struct irq_data *d) { @@ -299,6 +305,8 @@ static inline void irqd_clr_forwarded_to_vcpu(struct irq_data *d) __irqd_to_state(d) &= ~IRQD_FORWARDED_TO_VCPU; } +#undef __irqd_to_state + static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) { return d->hwirq; @@ -341,6 +349,8 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @irq_get_irqchip_state: return the internal state of an interrupt * @irq_set_irqchip_state: set the internal state of a interrupt * @irq_set_vcpu_affinity: optional to target a vCPU in a virtual machine + * @ipi_send_single: send a single IPI to destination cpus + * @ipi_send_mask: send an IPI to destination cpus in cpumask * @flags: chip specific flags */ struct irq_chip { @@ -385,6 +395,9 @@ struct irq_chip { int (*irq_set_vcpu_affinity)(struct irq_data *data, void *vcpu_info); + void (*ipi_send_single)(struct irq_data *data, unsigned int cpu); + void (*ipi_send_mask)(struct irq_data *data, const struct cpumask *dest); + unsigned long flags; }; @@ -934,4 +947,12 @@ static inline u32 irq_reg_readl(struct irq_chip_generic *gc, return readl(gc->reg_base + reg_offset); } +/* Contrary to Linux irqs, for hardware irqs the irq number 0 is valid */ +#define INVALID_HWIRQ (~0UL) +irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu); +int __ipi_send_single(struct irq_desc *desc, unsigned int cpu); +int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest); +int ipi_send_single(unsigned int virq, unsigned int cpu); +int ipi_send_mask(unsigned int virq, const struct cpumask *dest); + #endif /* _LINUX_IRQ_H */ diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index ce824db48d64..80f89e4a29ac 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -261,9 +261,6 @@ extern void gic_write_compare(cycle_t cnt); extern void gic_write_cpu_compare(cycle_t cnt, int cpu); extern void gic_start_count(void); extern void gic_stop_count(void); -extern void gic_send_ipi(unsigned int intr); -extern unsigned int plat_ipi_call_int_xlate(unsigned int); -extern unsigned int plat_ipi_resched_int_xlate(unsigned int); extern int gic_get_c0_compare_int(void); extern int gic_get_c0_perfcount_int(void); extern int gic_get_c0_fdc_int(void); diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 04579d9fbce4..ed48594e96d2 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -74,6 +74,7 @@ enum irq_domain_bus_token { DOMAIN_BUS_PCI_MSI, DOMAIN_BUS_PLATFORM_MSI, DOMAIN_BUS_NEXUS, + DOMAIN_BUS_IPI, }; /** @@ -172,6 +173,12 @@ enum { /* Core calls alloc/free recursive through the domain hierarchy. */ IRQ_DOMAIN_FLAG_AUTO_RECURSIVE = (1 << 1), + /* Irq domain is an IPI domain with virq per cpu */ + IRQ_DOMAIN_FLAG_IPI_PER_CPU = (1 << 2), + + /* Irq domain is an IPI domain with single virq */ + IRQ_DOMAIN_FLAG_IPI_SINGLE = (1 << 3), + /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved * for implementation specific purposes and ignored by the @@ -206,6 +213,8 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, extern struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, enum irq_domain_bus_token bus_token); extern void irq_set_default_host(struct irq_domain *host); +extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, + irq_hw_number_t hwirq, int node); static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node) { @@ -335,6 +344,11 @@ int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr, const u32 *intspec, unsigned int intsize, irq_hw_number_t *out_hwirq, unsigned int *out_type); +/* IPI functions */ +unsigned int irq_reserve_ipi(struct irq_domain *domain, + const struct cpumask *dest); +void irq_destroy_ipi(unsigned int irq); + /* V2 interfaces to support hierarchy IRQ domains. */ extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, unsigned int virq); @@ -400,6 +414,22 @@ static inline bool irq_domain_is_hierarchy(struct irq_domain *domain) { return domain->flags & IRQ_DOMAIN_FLAG_HIERARCHY; } + +static inline bool irq_domain_is_ipi(struct irq_domain *domain) +{ + return domain->flags & + (IRQ_DOMAIN_FLAG_IPI_PER_CPU | IRQ_DOMAIN_FLAG_IPI_SINGLE); +} + +static inline bool irq_domain_is_ipi_per_cpu(struct irq_domain *domain) +{ + return domain->flags & IRQ_DOMAIN_FLAG_IPI_PER_CPU; +} + +static inline bool irq_domain_is_ipi_single(struct irq_domain *domain) +{ + return domain->flags & IRQ_DOMAIN_FLAG_IPI_SINGLE; +} #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ static inline void irq_domain_activate_irq(struct irq_data *data) { } static inline void irq_domain_deactivate_irq(struct irq_data *data) { } @@ -413,6 +443,21 @@ static inline bool irq_domain_is_hierarchy(struct irq_domain *domain) { return false; } + +static inline bool irq_domain_is_ipi(struct irq_domain *domain) +{ + return false; +} + +static inline bool irq_domain_is_ipi_per_cpu(struct irq_domain *domain) +{ + return false; +} + +static inline bool irq_domain_is_ipi_single(struct irq_domain *domain) +{ + return false; +} #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ #else /* CONFIG_IRQ_DOMAIN */ diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 4b9f85c963d0..0fdc798e3ff7 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -1,6 +1,7 @@ #ifndef _LINUX_KASAN_H #define _LINUX_KASAN_H +#include <linux/sched.h> #include <linux/types.h> struct kmem_cache; @@ -13,7 +14,6 @@ struct vm_struct; #include <asm/kasan.h> #include <asm/pgtable.h> -#include <linux/sched.h> extern unsigned char kasan_zero_page[PAGE_SIZE]; extern pte_t kasan_zero_pte[PTRS_PER_PTE]; @@ -43,6 +43,8 @@ static inline void kasan_disable_current(void) void kasan_unpoison_shadow(const void *address, size_t size); +void kasan_unpoison_task_stack(struct task_struct *task); + void kasan_alloc_pages(struct page *page, unsigned int order); void kasan_free_pages(struct page *page, unsigned int order); @@ -66,6 +68,8 @@ void kasan_free_shadow(const struct vm_struct *vm); static inline void kasan_unpoison_shadow(const void *address, size_t size) {} +static inline void kasan_unpoison_task_stack(struct task_struct *task) {} + static inline void kasan_enable_current(void) {} static inline void kasan_disable_current(void) {} diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 861f690aa791..5276fe0916fc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -25,6 +25,7 @@ #include <linux/irqflags.h> #include <linux/context_tracking.h> #include <linux/irqbypass.h> +#include <linux/swait.h> #include <asm/signal.h> #include <linux/kvm.h> @@ -218,7 +219,7 @@ struct kvm_vcpu { int fpu_active; int guest_fpu_loaded, guest_xcr0_loaded; unsigned char fpu_counter; - wait_queue_head_t wq; + struct swait_queue_head wq; struct pid *pid; int sigset_active; sigset_t sigset; @@ -782,7 +783,7 @@ static inline bool kvm_arch_has_assigned_device(struct kvm *kvm) } #endif -static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) +static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) { #ifdef __KVM_HAVE_ARCH_WQP return vcpu->arch.wqp; diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h index e23121f9d82a..59ccab297ae0 100644 --- a/include/linux/latencytop.h +++ b/include/linux/latencytop.h @@ -37,6 +37,9 @@ account_scheduler_latency(struct task_struct *task, int usecs, int inter) void clear_all_latency_tracing(struct task_struct *p); +extern int sysctl_latencytop(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); + #else static inline void diff --git a/include/linux/libata.h b/include/linux/libata.h index bec2abbd7ab2..2c4ebef79d0c 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -720,7 +720,7 @@ struct ata_device { union { u16 id[ATA_ID_WORDS]; /* IDENTIFY xxx DEVICE data */ u32 gscr[SATA_PMP_GSCR_DWORDS]; /* PMP GSCR block */ - }; + } ____cacheline_aligned; /* DEVSLP Timing Variables from Identify Device Data Log */ u8 devslp_timing[ATA_LOG_DEVSLP_SIZE]; diff --git a/include/linux/list.h b/include/linux/list.h index 30cf4200ab40..5356f4d661a7 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -113,17 +113,6 @@ extern void __list_del_entry(struct list_head *entry); extern void list_del(struct list_head *entry); #endif -#ifdef CONFIG_DEBUG_LIST -/* - * See devm_memremap_pages() which wants DEBUG_LIST=y to assert if one - * of the pages it allocates is ever passed to list_add() - */ -extern void list_force_poison(struct list_head *entry); -#else -/* fallback to the less strict LIST_POISON* definitions */ -#define list_force_poison list_del -#endif - /** * list_replace - replace old entry by new one * @old : the element to be replaced diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 4dca42fd32f5..d026b190c530 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -261,7 +261,6 @@ struct held_lock { /* * Initialization, self-test and debugging-output methods: */ -extern void lockdep_init(void); extern void lockdep_info(void); extern void lockdep_reset(void); extern void lockdep_reset_lock(struct lockdep_map *lock); @@ -392,7 +391,6 @@ static inline void lockdep_on(void) # define lockdep_set_current_reclaim_state(g) do { } while (0) # define lockdep_clear_current_reclaim_state() do { } while (0) # define lockdep_trace_alloc(g) do { } while (0) -# define lockdep_init() do { } while (0) # define lockdep_info() do { } while (0) # define lockdep_init_map(lock, name, key, sub) \ do { (void)(name); (void)(key); } while (0) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 51f1e540fc2b..58eef02edc7e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4245,7 +4245,9 @@ struct mlx5_ifc_modify_tir_bitmask_bits { u8 reserved_at_20[0x1b]; u8 self_lb_en[0x1]; - u8 reserved_at_3c[0x3]; + u8 reserved_at_3c[0x1]; + u8 hash[0x1]; + u8 reserved_at_3e[0x1]; u8 lro[0x1]; }; diff --git a/include/linux/mm.h b/include/linux/mm.h index 516e14944339..3579d1e2fe3a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -387,7 +387,8 @@ enum { REGION_MIXED, }; -int region_intersects(resource_size_t offset, size_t size, const char *type); +int region_intersects(resource_size_t offset, size_t size, unsigned long flags, + unsigned long desc); /* Support for virtually mapped pages */ struct page *vmalloc_to_page(const void *addr); @@ -2138,6 +2139,8 @@ int remap_pfn_range(struct vm_area_struct *, unsigned long addr, int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn); +int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, pgprot_t pgprot); int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn); int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 624b78b848b8..944b2b37313b 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -566,10 +566,26 @@ static inline void clear_tlb_flush_pending(struct mm_struct *mm) } #endif -struct vm_special_mapping -{ - const char *name; +struct vm_fault; + +struct vm_special_mapping { + const char *name; /* The name, e.g. "[vdso]". */ + + /* + * If .fault is not provided, this points to a + * NULL-terminated array of pages that back the special mapping. + * + * This must not be NULL unless .fault is provided. + */ struct page **pages; + + /* + * If non-NULL, then this is called to resolve page faults + * on the special mapping. If used, .pages is not checked. + */ + int (*fault)(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, + struct vm_fault *vmf); }; enum tlb_flush_reason { diff --git a/include/linux/notifier.h b/include/linux/notifier.h index d14a4c362465..4149868de4e6 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -47,6 +47,8 @@ * runtime initialization. */ +struct notifier_block; + typedef int (*notifier_fn_t)(struct notifier_block *nb, unsigned long action, void *data); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f5c5a3fa2c81..79ec7bbf0155 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -468,6 +468,7 @@ struct perf_event { int group_flags; struct perf_event *group_leader; struct pmu *pmu; + void *pmu_private; enum perf_event_active_state state; unsigned int attach_state; @@ -1109,12 +1110,6 @@ static inline void perf_event_task_tick(void) { } static inline int perf_event_release_kernel(struct perf_event *event) { return 0; } #endif -#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL) -extern bool perf_event_can_stop_tick(void); -#else -static inline bool perf_event_can_stop_tick(void) { return true; } -#endif - #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) extern void perf_restore_debug_store(void); #else diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 907f3fd191ac..62d44c176071 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -128,9 +128,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer); void run_posix_cpu_timers(struct task_struct *task); void posix_cpu_timers_exit(struct task_struct *task); void posix_cpu_timers_exit_group(struct task_struct *task); - -bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk); - void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, cputime_t *newval, cputime_t *oldval); diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h index 54bf1484d41f..35ac903956c7 100644 --- a/include/linux/pps_kernel.h +++ b/include/linux/pps_kernel.h @@ -111,22 +111,17 @@ static inline void timespec_to_pps_ktime(struct pps_ktime *kt, kt->nsec = ts.tv_nsec; } -#ifdef CONFIG_NTP_PPS - static inline void pps_get_ts(struct pps_event_time *ts) { - ktime_get_raw_and_real_ts64(&ts->ts_raw, &ts->ts_real); -} + struct system_time_snapshot snap; -#else /* CONFIG_NTP_PPS */ - -static inline void pps_get_ts(struct pps_event_time *ts) -{ - ktime_get_real_ts64(&ts->ts_real); + ktime_get_snapshot(&snap); + ts->ts_real = ktime_to_timespec64(snap.real); +#ifdef CONFIG_NTP_PPS + ts->ts_raw = ktime_to_timespec64(snap.raw); +#endif } -#endif /* CONFIG_NTP_PPS */ - /* Subtract known time delay from PPS event time(s) */ static inline void pps_sub_ts(struct pps_event_time *ts, struct timespec64 delta) { diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index b8b73066d137..6b15e168148a 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -38,6 +38,7 @@ struct ptp_clock_request { }; }; +struct system_device_crosststamp; /** * struct ptp_clock_info - decribes a PTP hardware clock * @@ -67,6 +68,11 @@ struct ptp_clock_request { * @gettime64: Reads the current time from the hardware clock. * parameter ts: Holds the result. * + * @getcrosststamp: Reads the current time from the hardware clock and + * system clock simultaneously. + * parameter cts: Contains timestamp (device,system) pair, + * where system time is realtime and monotonic. + * * @settime64: Set the current time on the hardware clock. * parameter ts: Time value to set. * @@ -105,6 +111,8 @@ struct ptp_clock_info { int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta); int (*adjtime)(struct ptp_clock_info *ptp, s64 delta); int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts); + int (*getcrosststamp)(struct ptp_clock_info *ptp, + struct system_device_crosststamp *cts); int (*settime64)(struct ptp_clock_info *p, const struct timespec64 *ts); int (*enable)(struct ptp_clock_info *ptp, struct ptp_clock_request *request, int on); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 14e6f47ee16f..2657aff2725b 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -332,9 +332,7 @@ void rcu_init(void); void rcu_sched_qs(void); void rcu_bh_qs(void); void rcu_check_callbacks(int user); -struct notifier_block; -int rcu_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu); +void rcu_report_dead(unsigned int cpu); #ifndef CONFIG_TINY_RCU void rcu_end_inkernel_boot(void); @@ -360,8 +358,6 @@ void rcu_user_exit(void); #else static inline void rcu_user_enter(void) { } static inline void rcu_user_exit(void) { } -static inline void rcu_user_hooks_switch(struct task_struct *prev, - struct task_struct *next) { } #endif /* CONFIG_NO_HZ_FULL */ #ifdef CONFIG_RCU_NOCB_CPU diff --git a/include/linux/sched.h b/include/linux/sched.h index a10494a94cc3..c617ea12c6b7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -182,8 +182,6 @@ extern void update_cpu_load_nohz(int active); static inline void update_cpu_load_nohz(int active) { } #endif -extern unsigned long get_parent_ip(unsigned long addr); - extern void dump_cpu_task(int cpu); struct seq_file; @@ -719,6 +717,10 @@ struct signal_struct { /* Earliest-expiration cache. */ struct task_cputime cputime_expires; +#ifdef CONFIG_NO_HZ_FULL + unsigned long tick_dep_mask; +#endif + struct list_head cpu_timers[3]; struct pid *tty_old_pgrp; @@ -920,6 +922,10 @@ static inline int sched_info_on(void) #endif } +#ifdef CONFIG_SCHEDSTATS +void force_schedstat_enabled(void); +#endif + enum cpu_idle_type { CPU_IDLE, CPU_NOT_IDLE, @@ -1289,6 +1295,8 @@ struct sched_rt_entity { unsigned long timeout; unsigned long watchdog_stamp; unsigned int time_slice; + unsigned short on_rq; + unsigned short on_list; struct sched_rt_entity *back; #ifdef CONFIG_RT_GROUP_SCHED @@ -1329,10 +1337,6 @@ struct sched_dl_entity { * task has to wait for a replenishment to be performed at the * next firing of dl_timer. * - * @dl_new tells if a new instance arrived. If so we must - * start executing it with full runtime and reset its absolute - * deadline; - * * @dl_boosted tells if we are boosted due to DI. If so we are * outside bandwidth enforcement mechanism (but only until we * exit the critical section); @@ -1340,7 +1344,7 @@ struct sched_dl_entity { * @dl_yielded tells if task gave up the cpu before consuming * all its available runtime during the last job. */ - int dl_throttled, dl_new, dl_boosted, dl_yielded; + int dl_throttled, dl_boosted, dl_yielded; /* * Bandwidth enforcement timer. Each -deadline task has its @@ -1542,6 +1546,10 @@ struct task_struct { VTIME_SYS, } vtime_snap_whence; #endif + +#ifdef CONFIG_NO_HZ_FULL + unsigned long tick_dep_mask; +#endif unsigned long nvcsw, nivcsw; /* context switch counts */ u64 start_time; /* monotonic time in nsec */ u64 real_start_time; /* boot based time in nsec */ @@ -2356,10 +2364,7 @@ static inline void wake_up_nohz_cpu(int cpu) { } #endif #ifdef CONFIG_NO_HZ_FULL -extern bool sched_can_stop_tick(void); extern u64 scheduler_tick_max_deferment(void); -#else -static inline bool sched_can_stop_tick(void) { return false; } #endif #ifdef CONFIG_SCHED_AUTOGROUP diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index c9e4731cf10b..4f080ab4f2cd 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -95,4 +95,8 @@ extern int sysctl_numa_balancing(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +extern int sysctl_schedstats(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); + #endif /* _SCHED_SYSCTL_H */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4ce9ff7086f4..d3fcd4591ce4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1985,6 +1985,30 @@ static inline void skb_reserve(struct sk_buff *skb, int len) skb->tail += len; } +/** + * skb_tailroom_reserve - adjust reserved_tailroom + * @skb: buffer to alter + * @mtu: maximum amount of headlen permitted + * @needed_tailroom: minimum amount of reserved_tailroom + * + * Set reserved_tailroom so that headlen can be as large as possible but + * not larger than mtu and tailroom cannot be smaller than + * needed_tailroom. + * The required headroom should already have been reserved before using + * this function. + */ +static inline void skb_tailroom_reserve(struct sk_buff *skb, unsigned int mtu, + unsigned int needed_tailroom) +{ + SKB_LINEAR_ASSERT(skb); + if (mtu < skb_tailroom(skb) - needed_tailroom) + /* use at most mtu */ + skb->reserved_tailroom = skb_tailroom(skb) - mtu; + else + /* use up to all available space */ + skb->reserved_tailroom = needed_tailroom; +} + #define ENCAP_TYPE_ETHER 0 #define ENCAP_TYPE_IPPROTO 1 diff --git a/include/linux/srcu.h b/include/linux/srcu.h index f5f80c5643ac..dc8eb63c6568 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -99,8 +99,23 @@ void process_srcu(struct work_struct *work); } /* - * define and init a srcu struct at build time. - * dont't call init_srcu_struct() nor cleanup_srcu_struct() on it. + * Define and initialize a srcu struct at build time. + * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it. + * + * Note that although DEFINE_STATIC_SRCU() hides the name from other + * files, the per-CPU variable rules nevertheless require that the + * chosen name be globally unique. These rules also prohibit use of + * DEFINE_STATIC_SRCU() within a function. If these rules are too + * restrictive, declare the srcu_struct manually. For example, in + * each file: + * + * static struct srcu_struct my_srcu; + * + * Then, before the first use of each my_srcu, manually initialize it: + * + * init_srcu_struct(&my_srcu); + * + * See include/linux/percpu-defs.h for the rules on per-CPU variables. */ #define __DEFINE_SRCU(name, is_static) \ static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\ diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index eead8ab93c0a..881a79d52467 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -100,6 +100,7 @@ struct plat_stmmacenet_data { int interface; struct stmmac_mdio_bus_data *mdio_bus_data; struct device_node *phy_node; + struct device_node *mdio_node; struct stmmac_dma_cfg *dma_cfg; int clk_csr; int has_gmac; diff --git a/include/linux/swait.h b/include/linux/swait.h new file mode 100644 index 000000000000..c1f9c62a8a50 --- /dev/null +++ b/include/linux/swait.h @@ -0,0 +1,172 @@ +#ifndef _LINUX_SWAIT_H +#define _LINUX_SWAIT_H + +#include <linux/list.h> +#include <linux/stddef.h> +#include <linux/spinlock.h> +#include <asm/current.h> + +/* + * Simple wait queues + * + * While these are very similar to the other/complex wait queues (wait.h) the + * most important difference is that the simple waitqueue allows for + * deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold + * times. + * + * In order to make this so, we had to drop a fair number of features of the + * other waitqueue code; notably: + * + * - mixing INTERRUPTIBLE and UNINTERRUPTIBLE sleeps on the same waitqueue; + * all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right + * sleeper state. + * + * - the exclusive mode; because this requires preserving the list order + * and this is hard. + * + * - custom wake functions; because you cannot give any guarantees about + * random code. + * + * As a side effect of this; the data structures are slimmer. + * + * One would recommend using this wait queue where possible. + */ + +struct task_struct; + +struct swait_queue_head { + raw_spinlock_t lock; + struct list_head task_list; +}; + +struct swait_queue { + struct task_struct *task; + struct list_head task_list; +}; + +#define __SWAITQUEUE_INITIALIZER(name) { \ + .task = current, \ + .task_list = LIST_HEAD_INIT((name).task_list), \ +} + +#define DECLARE_SWAITQUEUE(name) \ + struct swait_queue name = __SWAITQUEUE_INITIALIZER(name) + +#define __SWAIT_QUEUE_HEAD_INITIALIZER(name) { \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ + .task_list = LIST_HEAD_INIT((name).task_list), \ +} + +#define DECLARE_SWAIT_QUEUE_HEAD(name) \ + struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INITIALIZER(name) + +extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name, + struct lock_class_key *key); + +#define init_swait_queue_head(q) \ + do { \ + static struct lock_class_key __key; \ + __init_swait_queue_head((q), #q, &__key); \ + } while (0) + +#ifdef CONFIG_LOCKDEP +# define __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name) \ + ({ init_swait_queue_head(&name); name; }) +# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name) \ + struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name) +#else +# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name) \ + DECLARE_SWAIT_QUEUE_HEAD(name) +#endif + +static inline int swait_active(struct swait_queue_head *q) +{ + return !list_empty(&q->task_list); +} + +extern void swake_up(struct swait_queue_head *q); +extern void swake_up_all(struct swait_queue_head *q); +extern void swake_up_locked(struct swait_queue_head *q); + +extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); +extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state); +extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state); + +extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait); +extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait); + +/* as per ___wait_event() but for swait, therefore "exclusive == 0" */ +#define ___swait_event(wq, condition, state, ret, cmd) \ +({ \ + struct swait_queue __wait; \ + long __ret = ret; \ + \ + INIT_LIST_HEAD(&__wait.task_list); \ + for (;;) { \ + long __int = prepare_to_swait_event(&wq, &__wait, state);\ + \ + if (condition) \ + break; \ + \ + if (___wait_is_interruptible(state) && __int) { \ + __ret = __int; \ + break; \ + } \ + \ + cmd; \ + } \ + finish_swait(&wq, &__wait); \ + __ret; \ +}) + +#define __swait_event(wq, condition) \ + (void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, \ + schedule()) + +#define swait_event(wq, condition) \ +do { \ + if (condition) \ + break; \ + __swait_event(wq, condition); \ +} while (0) + +#define __swait_event_timeout(wq, condition, timeout) \ + ___swait_event(wq, ___wait_cond_timeout(condition), \ + TASK_UNINTERRUPTIBLE, timeout, \ + __ret = schedule_timeout(__ret)) + +#define swait_event_timeout(wq, condition, timeout) \ +({ \ + long __ret = timeout; \ + if (!___wait_cond_timeout(condition)) \ + __ret = __swait_event_timeout(wq, condition, timeout); \ + __ret; \ +}) + +#define __swait_event_interruptible(wq, condition) \ + ___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0, \ + schedule()) + +#define swait_event_interruptible(wq, condition) \ +({ \ + int __ret = 0; \ + if (!(condition)) \ + __ret = __swait_event_interruptible(wq, condition); \ + __ret; \ +}) + +#define __swait_event_interruptible_timeout(wq, condition, timeout) \ + ___swait_event(wq, ___wait_cond_timeout(condition), \ + TASK_INTERRUPTIBLE, timeout, \ + __ret = schedule_timeout(__ret)) + +#define swait_event_interruptible_timeout(wq, condition, timeout) \ +({ \ + long __ret = timeout; \ + if (!___wait_cond_timeout(condition)) \ + __ret = __swait_event_interruptible_timeout(wq, \ + condition, timeout); \ + __ret; \ +}) + +#endif /* _LINUX_SWAIT_H */ diff --git a/include/linux/tick.h b/include/linux/tick.h index 97fd4e543846..21f73649a4dc 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -97,6 +97,19 @@ static inline void tick_broadcast_exit(void) tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT); } +enum tick_dep_bits { + TICK_DEP_BIT_POSIX_TIMER = 0, + TICK_DEP_BIT_PERF_EVENTS = 1, + TICK_DEP_BIT_SCHED = 2, + TICK_DEP_BIT_CLOCK_UNSTABLE = 3 +}; + +#define TICK_DEP_MASK_NONE 0 +#define TICK_DEP_MASK_POSIX_TIMER (1 << TICK_DEP_BIT_POSIX_TIMER) +#define TICK_DEP_MASK_PERF_EVENTS (1 << TICK_DEP_BIT_PERF_EVENTS) +#define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED) +#define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE) + #ifdef CONFIG_NO_HZ_COMMON extern int tick_nohz_enabled; extern int tick_nohz_tick_stopped(void); @@ -154,9 +167,73 @@ static inline int housekeeping_any_cpu(void) return cpumask_any_and(housekeeping_mask, cpu_online_mask); } -extern void tick_nohz_full_kick(void); +extern void tick_nohz_dep_set(enum tick_dep_bits bit); +extern void tick_nohz_dep_clear(enum tick_dep_bits bit); +extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit); +extern void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit); +extern void tick_nohz_dep_set_task(struct task_struct *tsk, + enum tick_dep_bits bit); +extern void tick_nohz_dep_clear_task(struct task_struct *tsk, + enum tick_dep_bits bit); +extern void tick_nohz_dep_set_signal(struct signal_struct *signal, + enum tick_dep_bits bit); +extern void tick_nohz_dep_clear_signal(struct signal_struct *signal, + enum tick_dep_bits bit); + +/* + * The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases + * on top of static keys. + */ +static inline void tick_dep_set(enum tick_dep_bits bit) +{ + if (tick_nohz_full_enabled()) + tick_nohz_dep_set(bit); +} + +static inline void tick_dep_clear(enum tick_dep_bits bit) +{ + if (tick_nohz_full_enabled()) + tick_nohz_dep_clear(bit); +} + +static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) +{ + if (tick_nohz_full_cpu(cpu)) + tick_nohz_dep_set_cpu(cpu, bit); +} + +static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) +{ + if (tick_nohz_full_cpu(cpu)) + tick_nohz_dep_clear_cpu(cpu, bit); +} + +static inline void tick_dep_set_task(struct task_struct *tsk, + enum tick_dep_bits bit) +{ + if (tick_nohz_full_enabled()) + tick_nohz_dep_set_task(tsk, bit); +} +static inline void tick_dep_clear_task(struct task_struct *tsk, + enum tick_dep_bits bit) +{ + if (tick_nohz_full_enabled()) + tick_nohz_dep_clear_task(tsk, bit); +} +static inline void tick_dep_set_signal(struct signal_struct *signal, + enum tick_dep_bits bit) +{ + if (tick_nohz_full_enabled()) + tick_nohz_dep_set_signal(signal, bit); +} +static inline void tick_dep_clear_signal(struct signal_struct *signal, + enum tick_dep_bits bit) +{ + if (tick_nohz_full_enabled()) + tick_nohz_dep_clear_signal(signal, bit); +} + extern void tick_nohz_full_kick_cpu(int cpu); -extern void tick_nohz_full_kick_all(void); extern void __tick_nohz_task_switch(void); #else static inline int housekeeping_any_cpu(void) @@ -166,9 +243,21 @@ static inline int housekeeping_any_cpu(void) static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } + +static inline void tick_dep_set(enum tick_dep_bits bit) { } +static inline void tick_dep_clear(enum tick_dep_bits bit) { } +static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { } +static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { } +static inline void tick_dep_set_task(struct task_struct *tsk, + enum tick_dep_bits bit) { } +static inline void tick_dep_clear_task(struct task_struct *tsk, + enum tick_dep_bits bit) { } +static inline void tick_dep_set_signal(struct signal_struct *signal, + enum tick_dep_bits bit) { } +static inline void tick_dep_clear_signal(struct signal_struct *signal, + enum tick_dep_bits bit) { } + static inline void tick_nohz_full_kick_cpu(int cpu) { } -static inline void tick_nohz_full_kick(void) { } -static inline void tick_nohz_full_kick_all(void) { } static inline void __tick_nohz_task_switch(void) { } #endif diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 25247220b4b7..e88005459035 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -50,6 +50,7 @@ struct tk_read_base { * @offs_tai: Offset clock monotonic -> clock tai * @tai_offset: The current UTC to TAI offset in seconds * @clock_was_set_seq: The sequence number of clock was set events + * @cs_was_changed_seq: The sequence number of clocksource change events * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second * @raw_time: Monotonic raw base time in timespec64 format * @cycle_interval: Number of clock cycles in one NTP interval @@ -91,6 +92,7 @@ struct timekeeper { ktime_t offs_tai; s32 tai_offset; unsigned int clock_was_set_seq; + u8 cs_was_changed_seq; ktime_t next_leap_ktime; struct timespec64 raw_time; diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index ec89d846324c..96f37bee3bc1 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -267,6 +267,64 @@ extern void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw, struct timespec64 *ts_real); /* + * struct system_time_snapshot - simultaneous raw/real time capture with + * counter value + * @cycles: Clocksource counter value to produce the system times + * @real: Realtime system time + * @raw: Monotonic raw system time + * @clock_was_set_seq: The sequence number of clock was set events + * @cs_was_changed_seq: The sequence number of clocksource change events + */ +struct system_time_snapshot { + cycle_t cycles; + ktime_t real; + ktime_t raw; + unsigned int clock_was_set_seq; + u8 cs_was_changed_seq; +}; + +/* + * struct system_device_crosststamp - system/device cross-timestamp + * (syncronized capture) + * @device: Device time + * @sys_realtime: Realtime simultaneous with device time + * @sys_monoraw: Monotonic raw simultaneous with device time + */ +struct system_device_crosststamp { + ktime_t device; + ktime_t sys_realtime; + ktime_t sys_monoraw; +}; + +/* + * struct system_counterval_t - system counter value with the pointer to the + * corresponding clocksource + * @cycles: System counter value + * @cs: Clocksource corresponding to system counter value. Used by + * timekeeping code to verify comparibility of two cycle values + */ +struct system_counterval_t { + cycle_t cycles; + struct clocksource *cs; +}; + +/* + * Get cross timestamp between system clock and device clock + */ +extern int get_device_system_crosststamp( + int (*get_time_fn)(ktime_t *device_time, + struct system_counterval_t *system_counterval, + void *ctx), + void *ctx, + struct system_time_snapshot *history, + struct system_device_crosststamp *xtstamp); + +/* + * Simultaneously snapshot realtime and monotonic raw clocks + */ +extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot); + +/* * Persistent clock related interfaces */ extern int persistent_clock_is_local; diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 429fdfc3baf5..925730bc9fc1 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -568,6 +568,8 @@ enum { FILTER_DYN_STRING, FILTER_PTR_STRING, FILTER_TRACE_FN, + FILTER_COMM, + FILTER_CPU, }; extern int trace_event_raw_init(struct trace_event_call *call); diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index acfdbf353a0b..be586c632a0c 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -134,9 +134,6 @@ extern void syscall_unregfunc(void); void *it_func; \ void *__data; \ \ - if (!cpu_online(raw_smp_processor_id())) \ - return; \ - \ if (!(cond)) \ return; \ prercu; \ @@ -343,15 +340,19 @@ extern void syscall_unregfunc(void); * "void *__data, proto" as the callback prototype. */ #define DECLARE_TRACE_NOARGS(name) \ - __DECLARE_TRACE(name, void, , 1, void *__data, __data) + __DECLARE_TRACE(name, void, , \ + cpu_online(raw_smp_processor_id()), \ + void *__data, __data) #define DECLARE_TRACE(name, proto, args) \ - __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), 1, \ - PARAMS(void *__data, proto), \ - PARAMS(__data, args)) + __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ + cpu_online(raw_smp_processor_id()), \ + PARAMS(void *__data, proto), \ + PARAMS(__data, args)) #define DECLARE_TRACE_CONDITION(name, proto, args, cond) \ - __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), PARAMS(cond), \ + __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ + cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \ PARAMS(void *__data, proto), \ PARAMS(__data, args)) diff --git a/include/linux/wait.h b/include/linux/wait.h index ae71a769b89e..27d7a0ab5da3 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -338,7 +338,7 @@ do { \ schedule(); try_to_freeze()) /** - * wait_event - sleep (or freeze) until a condition gets true + * wait_event_freezable - sleep (or freeze) until a condition gets true * @wq: the waitqueue to wait on * @condition: a C expression for the event to wait for * diff --git a/include/linux/writeback.h b/include/linux/writeback.h index b333c945e571..d0b5ca5d4e08 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -198,6 +198,7 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc, void wbc_detach_inode(struct writeback_control *wbc); void wbc_account_io(struct writeback_control *wbc, struct page *page, size_t bytes); +void cgroup_writeback_umount(void); /** * inode_attach_wb - associate an inode with its wb @@ -301,6 +302,10 @@ static inline void wbc_account_io(struct writeback_control *wbc, { } +static inline void cgroup_writeback_umount(void) +{ +} + #endif /* CONFIG_CGROUP_WRITEBACK */ /* |