diff options
Diffstat (limited to 'include/linux')
366 files changed, 7640 insertions, 3168 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4d2f0bed7a06..bfacb9475aac 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -324,6 +324,17 @@ int acpi_unmap_cpu(int cpu); acpi_handle acpi_get_processor_handle(int cpu); +/** + * acpi_get_cpu_uid() - Get ACPI Processor UID of from MADT table + * @cpu: Logical CPU number (0-based) + * @uid: Pointer to store ACPI Processor UID + * + * Return: 0 on success (ACPI Processor ID stored in *uid); + * -EINVAL if CPU number is invalid or out of range; + * -ENODEV if ACPI Processor UID for the CPU is not found. + */ +int acpi_get_cpu_uid(unsigned int cpu, u32 *uid); + #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr); #endif @@ -791,6 +802,14 @@ const char *acpi_get_subsystem_id(acpi_handle handle); int acpi_mrrm_max_mem_region(void); #endif +#define ACPI_CMOS_RTC_IDS \ + { "PNP0B00", }, \ + { "PNP0B01", }, \ + { "PNP0B02", }, \ + { "", } + +extern bool cmos_rtc_platform_device_present; + #else /* !CONFIG_ACPI */ #define acpi_disabled 1 @@ -1116,6 +1135,8 @@ static inline int acpi_mrrm_max_mem_region(void) return 1; } +#define cmos_rtc_platform_device_present false + #endif /* !CONFIG_ACPI */ #ifdef CONFIG_ACPI_HMAT diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 7f00c5285a32..f92a36187a52 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -5,6 +5,7 @@ #define __LINUX_ARM_MPAM_H #include <linux/acpi.h> +#include <linux/resctrl_types.h> #include <linux/types.h> struct mpam_msc; @@ -49,6 +50,37 @@ static inline int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, } #endif +bool resctrl_arch_alloc_capable(void); +bool resctrl_arch_mon_capable(void); + +void resctrl_arch_set_cpu_default_closid(int cpu, u32 closid); +void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid); +void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 rmid); +void resctrl_arch_sched_in(struct task_struct *tsk); +bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid); +bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid); +u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid); +void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid); +u32 resctrl_arch_system_num_rmid_idx(void); + +struct rdt_resource; +void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, enum resctrl_event_id evtid); +void resctrl_arch_mon_ctx_free(struct rdt_resource *r, enum resctrl_event_id evtid, void *ctx); + +/* + * The CPU configuration for MPAM is cheap to write, and is only written if it + * has changed. No need for fine grained enables. + */ +static inline void resctrl_arch_enable_mon(void) { } +static inline void resctrl_arch_disable_mon(void) { } +static inline void resctrl_arch_enable_alloc(void) { } +static inline void resctrl_arch_disable_alloc(void) { } + +static inline unsigned int resctrl_arch_round_mon_val(unsigned int val) +{ + return val; +} + /** * mpam_register_requestor() - Register a requestor with the MPAM driver * @partid_max: The maximum PARTID value the requestor can generate. diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 70807c679f1a..82a32526df64 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -309,17 +309,19 @@ struct atm_ioctl { /** * register_atm_ioctl - register handler for ioctl operations + * @ioctl: ioctl handler to register * * Special (non-device) handlers of ioctl's should * register here. If you're a normal device, you should * set .ioctl in your atmdev_ops instead. */ -void register_atm_ioctl(struct atm_ioctl *); +void register_atm_ioctl(struct atm_ioctl *ioctl); /** * deregister_atm_ioctl - remove the ioctl handler + * @ioctl: ioctl handler to deregister */ -void deregister_atm_ioctl(struct atm_ioctl *); +void deregister_atm_ioctl(struct atm_ioctl *ioctl); /* register_atmdevice_notifier - register atm_dev notify events diff --git a/include/linux/audit.h b/include/linux/audit.h index b642b5faca65..803b0183d98d 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -15,7 +15,16 @@ #include <uapi/linux/audit.h> #include <uapi/linux/fanotify.h> -#define AUDIT_INO_UNSET ((unsigned long)-1) +#define AUDIT_STATUS_ALL (AUDIT_STATUS_ENABLED | \ + AUDIT_STATUS_FAILURE | \ + AUDIT_STATUS_PID | \ + AUDIT_STATUS_RATE_LIMIT | \ + AUDIT_STATUS_BACKLOG_LIMIT | \ + AUDIT_STATUS_BACKLOG_WAIT_TIME | \ + AUDIT_STATUS_LOST | \ + AUDIT_STATUS_BACKLOG_WAIT_TIME_ACTUAL) + +#define AUDIT_INO_UNSET ((u64)-1) #define AUDIT_DEV_UNSET ((dev_t)-1) struct audit_sig_info { diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index 4086afd0cc6b..bc09b55e3682 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -271,6 +271,8 @@ struct auxiliary_device *__devm_auxiliary_device_create(struct device *dev, __devm_auxiliary_device_create(dev, KBUILD_MODNAME, devname, \ platform_data, 0) +bool dev_is_auxiliary(struct device *dev); + /** * module_auxiliary_driver() - Helper macro for registering an auxiliary driver * @__auxiliary_driver: auxiliary driver struct diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h index 407f7005e6d6..8bcb9b726262 100644 --- a/include/linux/auxvec.h +++ b/include/linux/auxvec.h @@ -4,6 +4,6 @@ #include <uapi/linux/auxvec.h> -#define AT_VECTOR_SIZE_BASE 22 /* NEW_AUX_ENT entries in auxiliary table */ +#define AT_VECTOR_SIZE_BASE 24 /* NEW_AUX_ENT entries in auxiliary table */ /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */ #endif /* _LINUX_AUXVEC_H */ diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index c88fd4d37d1f..a06b93446d10 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -237,7 +237,7 @@ static inline void wb_get(struct bdi_writeback *wb) } /** - * wb_put - decrement a wb's refcount + * wb_put_many - decrement a wb's refcount * @wb: bdi_writeback to put * @nr: number of references to put */ diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 0c8342747cab..5b7d12b40d5e 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -136,6 +136,19 @@ static inline bool mapping_can_writeback(struct address_space *mapping) return inode_to_bdi(mapping->host)->capabilities & BDI_CAP_WRITEBACK; } +/* Must not be used by file systems that support cgroup writeback */ +static inline int bdi_wb_dirty_exceeded(struct backing_dev_info *bdi) +{ + return bdi->wb.dirty_exceeded; +} + +/* Must not be used by file systems that support cgroup writeback */ +static inline void bdi_wb_stat_mod(struct inode *inode, enum wb_stat_item item, + s64 amount) +{ + wb_stat_mod(&inode_to_bdi(inode)->wb, item, amount); +} + #ifdef CONFIG_CGROUP_WRITEBACK struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi, diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h index 1476a6ed1bfd..c939cd222730 100644 --- a/include/linux/backing-file.h +++ b/include/linux/backing-file.h @@ -18,10 +18,10 @@ struct backing_file_ctx { void (*end_write)(struct kiocb *iocb, ssize_t); }; -struct file *backing_file_open(const struct path *user_path, int flags, +struct file *backing_file_open(const struct file *user_file, int flags, const struct path *real_path, const struct cred *cred); -struct file *backing_tmpfile_open(const struct path *user_path, int flags, +struct file *backing_tmpfile_open(const struct file *user_file, int flags, const struct path *real_parentpath, umode_t mode, const struct cred *cred); ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter, diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h index 21e4652dcfd2..af5178434ec6 100644 --- a/include/linux/bio-integrity.h +++ b/include/linux/bio-integrity.h @@ -78,7 +78,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len, int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter); int bio_integrity_map_iter(struct bio *bio, struct uio_meta *meta); void bio_integrity_unmap_user(struct bio *bio); -bool bio_integrity_prep(struct bio *bio); +void bio_integrity_prep(struct bio *bio, unsigned int action); void bio_integrity_advance(struct bio *bio, unsigned int bytes_done); void bio_integrity_trim(struct bio *bio); int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask); @@ -104,9 +104,8 @@ static inline void bio_integrity_unmap_user(struct bio *bio) { } -static inline bool bio_integrity_prep(struct bio *bio) +static inline void bio_integrity_prep(struct bio *bio, unsigned int action) { - return true; } static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src, @@ -144,5 +143,12 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page, void bio_integrity_alloc_buf(struct bio *bio, bool zero_buffer); void bio_integrity_free_buf(struct bio_integrity_payload *bip); +void bio_integrity_setup_default(struct bio *bio); + +unsigned int fs_bio_integrity_alloc(struct bio *bio); +void fs_bio_integrity_free(struct bio *bio); +void fs_bio_integrity_generate(struct bio *bio); +int fs_bio_integrity_verify(struct bio *bio, sector_t sector, + unsigned int size); #endif /* _LINUX_BIO_INTEGRITY_H */ diff --git a/include/linux/bio.h b/include/linux/bio.h index 36a3f2275ecd..97d747320b35 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -350,8 +350,7 @@ extern void bioset_exit(struct bio_set *); extern int biovec_init_pool(mempool_t *pool, int pool_entries); struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, - blk_opf_t opf, gfp_t gfp_mask, - struct bio_set *bs); + blk_opf_t opf, gfp_t gfp, struct bio_set *bs); struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask); extern void bio_put(struct bio *); @@ -433,6 +432,8 @@ extern void bio_uninit(struct bio *); void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf); void bio_reuse(struct bio *bio, blk_opf_t opf); void bio_chain(struct bio *, struct bio *); +void bio_await(struct bio *bio, void *priv, + void (*submit)(struct bio *bio, void *priv)); int __must_check bio_add_page(struct bio *bio, struct page *page, unsigned len, unsigned off); @@ -474,7 +475,7 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); -int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter); +int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter, size_t maxlen); void bio_iov_iter_unbounce(struct bio *bio, bool is_error, bool mark_dirty); extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index b0395e4ccf90..b007d54a9036 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -46,6 +46,7 @@ struct device; * bitmap_and(dst, src1, src2, nbits) *dst = *src1 & *src2 * bitmap_or(dst, src1, src2, nbits) *dst = *src1 | *src2 * bitmap_weighted_or(dst, src1, src2, nbits) *dst = *src1 | *src2. Returns Hamming Weight of dst + * bitmap_weighted_xor(dst, src1, src2, nbits) *dst = *src1 ^ *src2. Returns Hamming Weight of dst * bitmap_xor(dst, src1, src2, nbits) *dst = *src1 ^ *src2 * bitmap_andnot(dst, src1, src2, nbits) *dst = *src1 & ~(*src2) * bitmap_complement(dst, src, nbits) *dst = ~(*src) @@ -57,6 +58,7 @@ struct device; * bitmap_weight(src, nbits) Hamming Weight: number set bits * bitmap_weight_and(src1, src2, nbits) Hamming Weight of and'ed bitmap * bitmap_weight_andnot(src1, src2, nbits) Hamming Weight of andnot'ed bitmap + * bitmap_weight_from(src, start, end) Hamming Weight starting from @start * bitmap_set(dst, pos, nbits) Set specified bit area * bitmap_clear(dst, pos, nbits) Clear specified bit area * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area @@ -168,6 +170,8 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); unsigned int __bitmap_weighted_or(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); +unsigned int __bitmap_weighted_xor(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, @@ -353,6 +357,18 @@ unsigned int bitmap_weighted_or(unsigned long *dst, const unsigned long *src1, } static __always_inline +unsigned int bitmap_weighted_xor(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) +{ + if (small_const_nbits(nbits)) { + *dst = *src1 ^ *src2; + return hweight_long(*dst & BITMAP_LAST_WORD_MASK(nbits)); + } else { + return __bitmap_weighted_xor(dst, src1, src2, nbits); + } +} + +static __always_inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { @@ -479,6 +495,38 @@ unsigned long bitmap_weight_andnot(const unsigned long *src1, return __bitmap_weight_andnot(src1, src2, nbits); } +/** + * bitmap_weight_from - Hamming weight for a memory region + * @bitmap: The base address + * @start: The bitnumber to starts weighting + * @end: the bitmap size in bits + * + * Returns the number of set bits in the region. If @start >= @end, + * return >= end. + */ +static __always_inline +unsigned long bitmap_weight_from(const unsigned long *bitmap, + unsigned int start, unsigned int end) +{ + unsigned long w; + + if (unlikely(start >= end)) + return end; + + if (small_const_nbits(end)) + return hweight_long(*bitmap & GENMASK(end - 1, start)); + + bitmap += start / BITS_PER_LONG; + /* Opencode round_down() to not include math.h */ + end -= start & ~(BITS_PER_LONG - 1); + start %= BITS_PER_LONG; + w = bitmap_weight(bitmap, end); + if (start) + w -= hweight_long(*bitmap & BITMAP_LAST_WORD_MASK(start)); + + return w; +} + static __always_inline void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits) { diff --git a/include/linux/bitops.h b/include/linux/bitops.h index ea7898cc5903..657eab2725ce 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -179,9 +179,11 @@ static inline __u8 ror8(__u8 word, unsigned int shift) /** * sign_extend32 - sign extend a 32-bit value using specified bit as sign-bit * @value: value to sign extend - * @index: 0 based bit index (0<=index<32) to sign bit + * @index: 0 based bit index (0 <= index < 32) to sign bit * * This is safe to use for 16- and 8-bit types as well. + * + * Return: 32-bit sign extended value */ static __always_inline __s32 sign_extend32(__u32 value, int index) { @@ -192,7 +194,11 @@ static __always_inline __s32 sign_extend32(__u32 value, int index) /** * sign_extend64 - sign extend a 64-bit value using specified bit as sign-bit * @value: value to sign extend - * @index: 0 based bit index (0<=index<64) to sign bit + * @index: 0 based bit index (0 <= index < 64) to sign bit + * + * This is safe to use for 32-, 16- and 8-bit types as well. + * + * Return: 64-bit sign extended value */ static __always_inline __s64 sign_extend64(__u64 value, int index) { @@ -230,7 +236,7 @@ static inline int get_count_order_long(unsigned long l) /** * parity8 - get the parity of an u8 value - * @value: the value to be examined + * @val: the value to be examined * * Determine the parity of the u8 argument. * diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index c15b1ac62765..b1b530613c34 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -8,17 +8,13 @@ struct request; -/* - * Maximum contiguous integrity buffer allocation. - */ -#define BLK_INTEGRITY_MAX_SIZE SZ_2M - enum blk_integrity_flags { BLK_INTEGRITY_NOVERIFY = 1 << 0, BLK_INTEGRITY_NOGENERATE = 1 << 1, BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2, BLK_INTEGRITY_REF_TAG = 1 << 3, BLK_INTEGRITY_STACKED = 1 << 4, + BLK_SPLIT_INTERVAL_CAPABLE = 1 << 5, }; const char *blk_integrity_profile_name(struct blk_integrity *bi); @@ -180,4 +176,27 @@ static inline struct bio_vec rq_integrity_vec(struct request *rq) } #endif /* CONFIG_BLK_DEV_INTEGRITY */ +enum bio_integrity_action { + BI_ACT_BUFFER = (1u << 0), /* allocate buffer */ + BI_ACT_CHECK = (1u << 1), /* generate / verify PI */ + BI_ACT_ZERO = (1u << 2), /* zero buffer */ +}; + +/** + * bio_integrity_action - return the integrity action needed for a bio + * @bio: bio to operate on + * + * Returns the mask of integrity actions (BI_ACT_*) that need to be performed + * for @bio. + */ +unsigned int __bio_integrity_action(struct bio *bio); +static inline unsigned int bio_integrity_action(struct bio *bio) +{ + if (!blk_get_integrity(bio->bi_bdev->bd_disk)) + return 0; + if (bio_integrity(bio)) + return 0; + return __bio_integrity_action(bio); +} + #endif /* _LINUX_BLK_INTEGRITY_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d463b9b5a0a5..890128cdea1c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -13,6 +13,7 @@ #include <linux/minmax.h> #include <linux/timer.h> #include <linux/workqueue.h> +#include <linux/completion.h> #include <linux/wait.h> #include <linux/bio.h> #include <linux/gfp.h> @@ -38,6 +39,7 @@ struct blk_flush_queue; struct kiocb; struct pr_ops; struct rq_qos; +struct hd_geometry; struct blk_report_zones_args; struct blk_queue_stats; struct blk_stat_callback; @@ -200,10 +202,14 @@ struct gendisk { u8 __rcu *zones_cond; unsigned int zone_wplugs_hash_bits; atomic_t nr_zone_wplugs; - spinlock_t zone_wplugs_lock; + spinlock_t zone_wplugs_hash_lock; struct mempool *zone_wplugs_pool; struct hlist_head *zone_wplugs_hash; struct workqueue_struct *zone_wplugs_wq; + spinlock_t zone_wplugs_list_lock; + struct list_head zone_wplugs_list; + struct task_struct *zone_wplugs_worker; + struct completion zone_wplugs_worker_bio_done; #endif /* CONFIG_BLK_DEV_ZONED */ #if IS_ENABLED(CONFIG_CDROM) @@ -502,7 +508,7 @@ struct request_queue { /* hw dispatch queues */ unsigned int nr_hw_queues; - struct blk_mq_hw_ctx * __rcu *queue_hw_ctx; + struct blk_mq_hw_ctx * __rcu *queue_hw_ctx __counted_by_ptr(nr_hw_queues); struct percpu_ref q_usage_counter; struct lock_class_key io_lock_cls_key; @@ -668,6 +674,7 @@ enum { QUEUE_FLAG_NO_ELV_SWITCH, /* can't switch elevator any more */ QUEUE_FLAG_QOS_ENABLED, /* qos is enabled */ QUEUE_FLAG_BIO_ISSUE_TIME, /* record bio->issue_time_ns */ + QUEUE_FLAG_ZONED_QD1_WRITES, /* Limit zoned devices writes to QD=1 */ QUEUE_FLAG_MAX }; @@ -707,6 +714,8 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); test_bit(QUEUE_FLAG_DISABLE_WBT_DEF, &(q)->queue_flags) #define blk_queue_no_elv_switch(q) \ test_bit(QUEUE_FLAG_NO_ELV_SWITCH, &(q)->queue_flags) +#define blk_queue_zoned_qd1_writes(q) \ + test_bit(QUEUE_FLAG_ZONED_QD1_WRITES, &(q)->queue_flags) extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q); @@ -1467,24 +1476,23 @@ static inline bool bdev_rot(struct block_device *bdev) return blk_queue_rot(bdev_get_queue(bdev)); } -static inline bool bdev_nonrot(struct block_device *bdev) +static inline bool bdev_synchronous(struct block_device *bdev) { - return !bdev_rot(bdev); + return bdev->bd_disk->queue->limits.features & BLK_FEAT_SYNCHRONOUS; } -static inline bool bdev_synchronous(struct block_device *bdev) +static inline bool bdev_has_integrity_csum(struct block_device *bdev) { - return bdev->bd_disk->queue->limits.features & BLK_FEAT_SYNCHRONOUS; + struct queue_limits *lim = bdev_limits(bdev); + + return IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && + lim->integrity.csum_type != BLK_INTEGRITY_CSUM_NONE; } static inline bool bdev_stable_writes(struct block_device *bdev) { - struct request_queue *q = bdev_get_queue(bdev); - - if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && - q->limits.integrity.csum_type != BLK_INTEGRITY_CSUM_NONE) - return true; - return q->limits.features & BLK_FEAT_STABLE_WRITES; + return bdev_has_integrity_csum(bdev) || + (bdev_limits(bdev)->features & BLK_FEAT_STABLE_WRITES); } static inline bool blk_queue_write_cache(struct request_queue *q) @@ -1877,6 +1885,24 @@ static inline int bio_split_rw_at(struct bio *bio, return bio_split_io_at(bio, lim, segs, max_bytes, lim->dma_alignment); } +/* + * Maximum contiguous integrity buffer allocation. + */ +#define BLK_INTEGRITY_MAX_SIZE SZ_2M + +/* + * Maximum size of I/O that needs a block layer integrity buffer. Limited + * by the number of intervals for which we can fit the integrity buffer into + * the buffer size. Because the buffer is a single segment it is also limited + * by the maximum segment size. + */ +static inline unsigned int max_integrity_io_size(struct queue_limits *lim) +{ + return min_t(unsigned int, lim->max_segment_size, + (BLK_INTEGRITY_MAX_SIZE / lim->integrity.metadata_size) << + lim->integrity.interval_exp); +} + #define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { } #endif /* _LINUX_BLKDEV_H */ diff --git a/include/linux/bnxt/ulp.h b/include/linux/bnxt/ulp.h new file mode 100644 index 000000000000..0851ad3394b0 --- /dev/null +++ b/include/linux/bnxt/ulp.h @@ -0,0 +1,144 @@ +/* Broadcom NetXtreme-C/E network driver. + * + * Copyright (c) 2016-2018 Broadcom Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + */ + +#ifndef BNXT_ULP_H +#define BNXT_ULP_H + +#include <linux/auxiliary_bus.h> + +#define BNXT_MIN_ROCE_CP_RINGS 2 +#define BNXT_MIN_ROCE_STAT_CTXS 1 + +#define BNXT_MAX_ROCE_MSIX_VF 2 +#define BNXT_MAX_ROCE_MSIX_NPAR_PF 5 +#define BNXT_MAX_ROCE_MSIX 64 + +struct hwrm_async_event_cmpl; +struct bnxt; + +enum bnxt_auxdev_type { + BNXT_AUXDEV_RDMA = 0, + BNXT_AUXDEV_FWCTL, + __BNXT_AUXDEV_MAX +}; + +struct bnxt_aux_priv { + struct auxiliary_device aux_dev; + struct bnxt_en_dev *edev; + int id; +}; + +struct bnxt_msix_entry { + u32 vector; + u32 ring_idx; + u32 db_offset; +}; + +struct bnxt_ulp_ops { + /* async_notifier() cannot sleep (in BH context) */ + void (*ulp_async_notifier)(void *, struct hwrm_async_event_cmpl *); + void (*ulp_irq_stop)(void *, bool); + void (*ulp_irq_restart)(void *, struct bnxt_msix_entry *); +}; + +struct bnxt_fw_msg { + void *msg; + int msg_len; + void *resp; + int resp_max_len; + int timeout; +}; + +struct bnxt_ulp { + void *handle; + struct bnxt_ulp_ops __rcu *ulp_ops; + unsigned long *async_events_bmap; + u16 max_async_event_id; + u16 msix_requested; +}; + +struct bnxt_en_dev { + struct net_device *net; + struct pci_dev *pdev; + struct bnxt_msix_entry msix_entries[BNXT_MAX_ROCE_MSIX]; + u32 flags; + #define BNXT_EN_FLAG_ROCEV1_CAP 0x1 + #define BNXT_EN_FLAG_ROCEV2_CAP 0x2 + #define BNXT_EN_FLAG_ROCE_CAP (BNXT_EN_FLAG_ROCEV1_CAP | \ + BNXT_EN_FLAG_ROCEV2_CAP) + #define BNXT_EN_FLAG_ULP_STOPPED 0x8 + #define BNXT_EN_FLAG_VF 0x10 +#define BNXT_EN_VF(edev) ((edev)->flags & BNXT_EN_FLAG_VF) + #define BNXT_EN_FLAG_ROCE_VF_RES_MGMT 0x20 + #define BNXT_EN_FLAG_SW_RES_LMT 0x40 +#define BNXT_EN_SW_RES_LMT(edev) ((edev)->flags & BNXT_EN_FLAG_SW_RES_LMT) + + struct bnxt_ulp *ulp_tbl; + int l2_db_size; /* Doorbell BAR size in + * bytes mapped by L2 + * driver. + */ + int l2_db_size_nc; /* Doorbell BAR size in + * bytes mapped as non- + * cacheable. + */ + int l2_db_offset; /* Doorbell offset in + * bytes within + * l2_db_size_nc. + */ + u16 chip_num; + u16 hw_ring_stats_size; + u16 pf_port_id; + unsigned long en_state; /* Could be checked in + * RoCE driver suspend + * mode only. Will be + * updated in resume. + */ + void __iomem *bar0; + + u16 ulp_num_msix_vec; + u16 ulp_num_ctxs; + + /* serialize ulp operations */ + struct mutex en_dev_lock; +}; + +static inline bool bnxt_ulp_registered(struct bnxt_en_dev *edev) +{ + if (edev && rcu_access_pointer(edev->ulp_tbl->ulp_ops)) + return true; + return false; +} + +int bnxt_get_ulp_msix_num(struct bnxt *bp); +int bnxt_get_ulp_msix_num_in_use(struct bnxt *bp); +void bnxt_set_ulp_msix_num(struct bnxt *bp, int num); +int bnxt_get_ulp_stat_ctxs(struct bnxt *bp); +void bnxt_set_ulp_stat_ctxs(struct bnxt *bp, int num_ctxs); +int bnxt_get_ulp_stat_ctxs_in_use(struct bnxt *bp); +void bnxt_set_dflt_ulp_stat_ctxs(struct bnxt *bp); +void bnxt_ulp_stop(struct bnxt *bp); +void bnxt_ulp_start(struct bnxt *bp); +void bnxt_ulp_sriov_cfg(struct bnxt *bp, int num_vfs); +void bnxt_ulp_irq_stop(struct bnxt *bp); +void bnxt_ulp_irq_restart(struct bnxt *bp, int err); +void bnxt_ulp_async_events(struct bnxt *bp, struct hwrm_async_event_cmpl *cmpl); +void bnxt_aux_devices_uninit(struct bnxt *bp); +void bnxt_aux_devices_del(struct bnxt *bp); +void bnxt_aux_devices_add(struct bnxt *bp); +void bnxt_aux_devices_init(struct bnxt *bp); +int bnxt_register_dev(struct bnxt_en_dev *edev, struct bnxt_ulp_ops *ulp_ops, + void *handle); +void bnxt_unregister_dev(struct bnxt_en_dev *edev); +int bnxt_send_msg(struct bnxt_en_dev *edev, struct bnxt_fw_msg *fw_msg); +void bnxt_register_async_events(struct bnxt_en_dev *edev, + unsigned long *events_bmap, u16 max_id); +int bnxt_auxdev_id_alloc(struct bnxt *bp); +void bnxt_auxdev_id_free(struct bnxt *bp, int id); +#endif diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index 25df9260d206..692a5acc2ffc 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -36,9 +36,9 @@ bool __init cmdline_has_extra_options(void); * The checksum will be used with the BOOTCONFIG_MAGIC and the size for * embedding the bootconfig in the initrd image. */ -static inline __init uint32_t xbc_calc_checksum(void *data, uint32_t size) +static inline __init uint32_t xbc_calc_checksum(const void *data, uint32_t size) { - unsigned char *p = data; + const unsigned char *p = data; uint32_t ret = 0; while (size--) @@ -66,7 +66,7 @@ struct xbc_node { /* Node tree access raw APIs */ struct xbc_node * __init xbc_root_node(void); -int __init xbc_node_index(struct xbc_node *node); +uint16_t __init xbc_node_index(struct xbc_node *node); struct xbc_node * __init xbc_node_get_parent(struct xbc_node *node); struct xbc_node * __init xbc_node_get_child(struct xbc_node *node); struct xbc_node * __init xbc_node_get_next(struct xbc_node *node); diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 2f535331f926..b2e79c2b41d5 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -184,7 +184,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, struct bpf_prog_array *array; array = rcu_access_pointer(cgrp->bpf.effective[type]); - return array != &bpf_empty_prog_array.hdr; + return array != &bpf_empty_prog_array; } /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b78b53198a2e..b4b703c90ca9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -124,7 +124,7 @@ struct bpf_map_ops { u32 (*map_fd_sys_lookup_elem)(void *ptr); void (*map_seq_show_elem)(struct bpf_map *map, void *key, struct seq_file *m); - int (*map_check_btf)(const struct bpf_map *map, + int (*map_check_btf)(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type); @@ -656,7 +656,7 @@ static inline bool bpf_map_support_seq_show(const struct bpf_map *map) map->ops->map_seq_show_elem; } -int map_check_no_btf(const struct bpf_map *map, +int map_check_no_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type); @@ -1541,6 +1541,8 @@ bool bpf_has_frame_pointer(unsigned long ip); int bpf_jit_charge_modmem(u32 size); void bpf_jit_uncharge_modmem(u32 size); bool bpf_prog_has_trampoline(const struct bpf_prog *prog); +bool bpf_insn_is_indirect_target(const struct bpf_verifier_env *env, const struct bpf_prog *prog, + int insn_idx); #else static inline int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr, @@ -1854,6 +1856,10 @@ struct bpf_link_ops { * target hook is sleepable, we'll go through tasks trace RCU GP and * then "classic" RCU GP; this need for chaining tasks trace and * classic RCU GPs is designated by setting bpf_link->sleepable flag + * + * For non-sleepable tracepoint links we go through SRCU gp instead, + * since RCU is not used in that case. Sleepable tracepoints still + * follow the scheme above. */ void (*dealloc_deferred)(struct bpf_link *link); int (*detach)(struct bpf_link *link); @@ -2365,18 +2371,13 @@ struct bpf_prog_array { struct bpf_prog_array_item items[]; }; -struct bpf_empty_prog_array { - struct bpf_prog_array hdr; - struct bpf_prog *null_prog; -}; - /* to avoid allocating empty bpf_prog_array for cgroups that * don't have bpf program attached use one global 'bpf_empty_prog_array' * It will not be modified the caller of bpf_prog_array_alloc() * (since caller requested prog_cnt == 0) * that pointer should be 'freed' by bpf_prog_array_free() */ -extern struct bpf_empty_prog_array bpf_empty_prog_array; +extern struct bpf_prog_array bpf_empty_prog_array; struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); void bpf_prog_array_free(struct bpf_prog_array *progs); @@ -3946,6 +3947,9 @@ static inline bool bpf_is_subprog(const struct bpf_prog *prog) return prog->aux->func_idx != 0; } +const struct bpf_line_info *bpf_find_linfo(const struct bpf_prog *prog, u32 insn_off); +void bpf_get_linfo_file_line(struct btf *btf, const struct bpf_line_info *linfo, + const char **filep, const char **linep, int *nump); int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char **filep, const char **linep, int *nump); struct bpf_prog *bpf_prog_find_from_stack(void); diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 85efa9772530..9e4f5c45c974 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -54,7 +54,6 @@ struct bpf_local_storage_map { u32 bucket_log; u16 elem_size; u16 cache_idx; - bool use_kmalloc_nolock; }; struct bpf_local_storage_data { @@ -86,8 +85,7 @@ struct bpf_local_storage_elem { */ }; atomic_t state; - bool use_kmalloc_nolock; - /* 3 bytes hole */ + /* 4 bytes hole */ /* The data is stored in another cacheline to minimize * the number of cachelines access during a cache hit. */ @@ -104,7 +102,6 @@ struct bpf_local_storage { rqspinlock_t lock; /* Protect adding/removing from the "list" */ u64 mem_charge; /* Copy of mem charged to owner. Protected by "lock" */ refcount_t owner_refcnt;/* Used to pin owner when map_free is uncharging */ - bool use_kmalloc_nolock; }; /* U16_MAX is much more than enough for sk local storage @@ -137,8 +134,7 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr); struct bpf_map * bpf_local_storage_map_alloc(union bpf_attr *attr, - struct bpf_local_storage_cache *cache, - bool use_kmalloc_nolock); + struct bpf_local_storage_cache *cache); void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage, struct bpf_local_storage_map *smap, @@ -176,7 +172,7 @@ u32 bpf_local_storage_destroy(struct bpf_local_storage *local_storage); void bpf_local_storage_map_free(struct bpf_map *map, struct bpf_local_storage_cache *cache); -int bpf_local_storage_map_check_btf(const struct bpf_map *map, +int bpf_local_storage_map_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type); @@ -192,7 +188,7 @@ int bpf_selem_link_map(struct bpf_local_storage_map *smap, struct bpf_local_storage_elem * bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value, - bool swap_uptrs, gfp_t gfp_flags); + bool swap_uptrs); void bpf_selem_free(struct bpf_local_storage_elem *selem, bool reuse_now); @@ -200,12 +196,11 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem, int bpf_local_storage_alloc(void *owner, struct bpf_local_storage_map *smap, - struct bpf_local_storage_elem *first_selem, - gfp_t gfp_flags); + struct bpf_local_storage_elem *first_selem); struct bpf_local_storage_data * bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, - void *value, u64 map_flags, bool swap_uptrs, gfp_t gfp_flags); + void *value, u64 map_flags, bool swap_uptrs); u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map); diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h index e45162ef59bb..4ce0d27f8ea2 100644 --- a/include/linux/bpf_mem_alloc.h +++ b/include/linux/bpf_mem_alloc.h @@ -14,6 +14,8 @@ struct bpf_mem_alloc { struct obj_cgroup *objcg; bool percpu; struct work_struct work; + void (*dtor_ctx_free)(void *ctx); + void *dtor_ctx; }; /* 'size != 0' is for bpf_mem_alloc which manages fixed-size objects. @@ -32,6 +34,10 @@ int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg /* The percpu allocation with a specific unit size. */ int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size); void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma); +void bpf_mem_alloc_set_dtor(struct bpf_mem_alloc *ma, + void (*dtor)(void *obj, void *ctx), + void (*dtor_ctx_free)(void *ctx), + void *ctx); /* Check the allocation size for kmalloc equivalent allocator */ int bpf_mem_alloc_check_size(bool percpu, size_t size); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index ef8e45a362d9..b148f816f25b 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -38,10 +38,9 @@ struct bpf_reg_state { /* Ordering of fields matters. See states_equal() */ enum bpf_reg_type type; /* - * Fixed part of pointer offset, pointer types only. - * Or constant delta between "linked" scalars with the same ID. + * Constant delta between "linked" scalars with the same ID. */ - s32 off; + s32 delta; union { /* valid when type == PTR_TO_PACKET */ int range; @@ -146,9 +145,9 @@ struct bpf_reg_state { * Upper bit of ID is used to remember relationship between "linked" * registers. Example: * r1 = r2; both will have r1->id == r2->id == N - * r1 += 10; r1->id == N | BPF_ADD_CONST and r1->off == 10 + * r1 += 10; r1->id == N | BPF_ADD_CONST and r1->delta == 10 * r3 = r2; both will have r3->id == r2->id == N - * w3 += 10; r3->id == N | BPF_ADD_CONST32 and r3->off == 10 + * w3 += 10; r3->id == N | BPF_ADD_CONST32 and r3->delta == 10 */ #define BPF_ADD_CONST64 (1U << 31) #define BPF_ADD_CONST32 (1U << 30) @@ -221,14 +220,67 @@ enum bpf_stack_slot_type { STACK_DYNPTR, STACK_ITER, STACK_IRQ_FLAG, + STACK_POISON, }; #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ +/* 4-byte stack slot granularity for liveness analysis */ +#define BPF_HALF_REG_SIZE 4 +#define STACK_SLOT_SZ 4 +#define STACK_SLOTS (MAX_BPF_STACK / BPF_HALF_REG_SIZE) /* 128 */ + +typedef struct { + u64 v[2]; +} spis_t; + +#define SPIS_ZERO ((spis_t){}) +#define SPIS_ALL ((spis_t){{ U64_MAX, U64_MAX }}) + +static inline bool spis_is_zero(spis_t s) +{ + return s.v[0] == 0 && s.v[1] == 0; +} + +static inline bool spis_equal(spis_t a, spis_t b) +{ + return a.v[0] == b.v[0] && a.v[1] == b.v[1]; +} + +static inline spis_t spis_or(spis_t a, spis_t b) +{ + return (spis_t){{ a.v[0] | b.v[0], a.v[1] | b.v[1] }}; +} + +static inline spis_t spis_and(spis_t a, spis_t b) +{ + return (spis_t){{ a.v[0] & b.v[0], a.v[1] & b.v[1] }}; +} + +static inline spis_t spis_not(spis_t s) +{ + return (spis_t){{ ~s.v[0], ~s.v[1] }}; +} + +static inline bool spis_test_bit(spis_t s, u32 slot) +{ + return s.v[slot / 64] & BIT_ULL(slot % 64); +} + +static inline void spis_or_range(spis_t *mask, u32 lo, u32 hi) +{ + u32 w; + + for (w = lo; w <= hi && w < STACK_SLOTS; w++) + mask->v[w / 64] |= BIT_ULL(w % 64); +} + #define BPF_REGMASK_ARGS ((1 << BPF_REG_1) | (1 << BPF_REG_2) | \ (1 << BPF_REG_3) | (1 << BPF_REG_4) | \ (1 << BPF_REG_5)) +#define BPF_MAIN_FUNC (-1) + #define BPF_DYNPTR_SIZE sizeof(struct bpf_dynptr_kern) #define BPF_DYNPTR_NR_SLOTS (BPF_DYNPTR_SIZE / BPF_REG_SIZE) @@ -266,6 +318,7 @@ struct bpf_reference_state { struct bpf_retval_range { s32 minval; s32 maxval; + bool return_32bit; }; /* state of the program: @@ -424,7 +477,6 @@ struct bpf_verifier_state { bool speculative; bool in_sleepable; - bool cleaned; /* first and last insn idx of this verifier state */ u32 first_insn_idx; @@ -578,16 +630,17 @@ struct bpf_insn_aux_data { /* below fields are initialized once */ unsigned int orig_idx; /* original instruction index */ - bool jmp_point; - bool prune_point; + u32 jmp_point:1; + u32 prune_point:1; /* ensure we check state equivalence and save state checkpoint and * this instruction, regardless of any heuristics */ - bool force_checkpoint; + u32 force_checkpoint:1; /* true if instruction is a call to a helper function that * accepts callback function as a parameter. */ - bool calls_callback; + u32 calls_callback:1; + u32 indirect_target:1; /* if it is an indirect jump target */ /* * CFG strongly connected component this instruction belongs to, * zero if it is a singleton SCC. @@ -595,6 +648,18 @@ struct bpf_insn_aux_data { u32 scc; /* registers alive before this instruction. */ u16 live_regs_before; + /* + * Bitmask of R0-R9 that hold known values at this instruction. + * const_reg_mask: scalar constants that fit in 32 bits. + * const_reg_map_mask: map pointers, val is map_index into used_maps[]. + * const_reg_subprog_mask: subprog pointers, val is subprog number. + * const_reg_vals[i] holds the 32-bit value for register i. + * Populated by compute_const_regs() pre-pass. + */ + u16 const_reg_mask; + u16 const_reg_map_mask; + u16 const_reg_subprog_mask; + u32 const_reg_vals[10]; }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ @@ -652,7 +717,7 @@ enum priv_stack_mode { }; struct bpf_subprog_info { - /* 'start' has to be the first field otherwise find_subprog() won't work */ + const char *name; /* name extracted from BTF */ u32 start; /* insn idx of function entry point */ u32 linfo_idx; /* The idx to the main_prog->aux->linfo */ u32 postorder_start; /* The idx to the env->cfg.insn_postorder */ @@ -787,6 +852,8 @@ struct bpf_verifier_env { const struct bpf_line_info *prev_linfo; struct bpf_verifier_log log; struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 2]; /* max + 2 for the fake and exception subprogs */ + /* subprog indices sorted in topological order: leaves first, callers last */ + int subprog_topo_order[BPF_MAX_SUBPROGS + 2]; union { struct bpf_idmap idmap_scratch; struct bpf_idset idset_scratch; @@ -805,6 +872,8 @@ struct bpf_verifier_env { } cfg; struct backtrack_state bt; struct bpf_jmp_history_entry *cur_hist_ent; + /* Per-callsite copy of parent's converged at_stack_in for cross-frame fills. */ + struct arg_track **callsite_at_stack; u32 pass_cnt; /* number of times do_check() was called */ u32 subprog_cnt; /* number of instructions analyzed by the verifier */ @@ -837,7 +906,9 @@ struct bpf_verifier_env { u64 scratched_stack_slots; u64 prev_log_pos, prev_insn_print_pos; /* buffer used to temporary hold constants as scalar registers */ - struct bpf_reg_state fake_reg[2]; + struct bpf_reg_state fake_reg[1]; + /* buffers used to save updated reg states while simulating branches */ + struct bpf_reg_state true_reg1, true_reg2, false_reg1, false_reg2; /* buffer used to generate temporary string representations, * e.g., in reg_type_str() to generate reg_type string */ @@ -863,6 +934,30 @@ static inline struct bpf_subprog_info *subprog_info(struct bpf_verifier_env *env return &env->subprog_info[subprog]; } +struct bpf_call_summary { + u8 num_params; + bool is_void; + bool fastcall; +}; + +static inline bool bpf_helper_call(const struct bpf_insn *insn) +{ + return insn->code == (BPF_JMP | BPF_CALL) && + insn->src_reg == 0; +} + +static inline bool bpf_pseudo_call(const struct bpf_insn *insn) +{ + return insn->code == (BPF_JMP | BPF_CALL) && + insn->src_reg == BPF_PSEUDO_CALL; +} + +static inline bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn) +{ + return insn->code == (BPF_JMP | BPF_CALL) && + insn->src_reg == BPF_PSEUDO_KFUNC_CALL; +} + __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, va_list args); __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, @@ -891,6 +986,41 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env, bpf_log(&env->log, "verifier bug: " fmt "\n", ##args); \ }) +static inline void mark_prune_point(struct bpf_verifier_env *env, int idx) +{ + env->insn_aux_data[idx].prune_point = true; +} + +static inline bool bpf_is_prune_point(struct bpf_verifier_env *env, int insn_idx) +{ + return env->insn_aux_data[insn_idx].prune_point; +} + +static inline void mark_force_checkpoint(struct bpf_verifier_env *env, int idx) +{ + env->insn_aux_data[idx].force_checkpoint = true; +} + +static inline bool bpf_is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx) +{ + return env->insn_aux_data[insn_idx].force_checkpoint; +} + +static inline void mark_calls_callback(struct bpf_verifier_env *env, int idx) +{ + env->insn_aux_data[idx].calls_callback = true; +} + +static inline bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx) +{ + return env->insn_aux_data[insn_idx].calls_callback; +} + +static inline void mark_jmp_point(struct bpf_verifier_env *env, int idx) +{ + env->insn_aux_data[idx].jmp_point = true; +} + static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env) { struct bpf_verifier_state *cur = env->cur_state; @@ -932,6 +1062,11 @@ static inline void bpf_trampoline_unpack_key(u64 key, u32 *obj_id, u32 *btf_id) *btf_id = key & 0x7FFFFFFF; } +int bpf_check_btf_info_early(struct bpf_verifier_env *env, + const union bpf_attr *attr, bpfptr_t uattr); +int bpf_check_btf_info(struct bpf_verifier_env *env, + const union bpf_attr *attr, bpfptr_t uattr); + int bpf_check_attach_target(struct bpf_verifier_log *log, const struct bpf_prog *prog, const struct bpf_prog *tgt_prog, @@ -941,6 +1076,93 @@ void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab); int mark_chain_precision(struct bpf_verifier_env *env, int regno); +int bpf_is_state_visited(struct bpf_verifier_env *env, int insn_idx); +int bpf_update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st); + +void bpf_clear_jmp_history(struct bpf_verifier_state *state); +int bpf_copy_verifier_state(struct bpf_verifier_state *dst_state, + const struct bpf_verifier_state *src); +struct list_head *bpf_explored_state(struct bpf_verifier_env *env, int idx); +void bpf_free_verifier_state(struct bpf_verifier_state *state, bool free_self); +void bpf_free_backedges(struct bpf_scc_visit *visit); +int bpf_push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur, + int insn_flags, u64 linked_regs); +void bpf_bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_history_entry *hist); +void bpf_mark_reg_not_init(const struct bpf_verifier_env *env, + struct bpf_reg_state *reg); +void bpf_mark_reg_unknown_imprecise(struct bpf_reg_state *reg); +void bpf_mark_all_scalars_precise(struct bpf_verifier_env *env, + struct bpf_verifier_state *st); +void bpf_clear_singular_ids(struct bpf_verifier_env *env, struct bpf_verifier_state *st); +int bpf_mark_chain_precision(struct bpf_verifier_env *env, + struct bpf_verifier_state *starting_state, + int regno, bool *changed); + +static inline int bpf_get_spi(s32 off) +{ + return (-off - 1) / BPF_REG_SIZE; +} + +static inline struct bpf_func_state *bpf_func(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg) +{ + struct bpf_verifier_state *cur = env->cur_state; + + return cur->frame[reg->frameno]; +} + +/* Return IP for a given frame in a call stack */ +static inline u32 bpf_frame_insn_idx(struct bpf_verifier_state *st, u32 frame) +{ + return frame == st->curframe + ? st->insn_idx + : st->frame[frame + 1]->callsite; +} + +static inline bool bpf_is_jmp_point(struct bpf_verifier_env *env, int insn_idx) +{ + return env->insn_aux_data[insn_idx].jmp_point; +} + +static inline bool bpf_is_spilled_reg(const struct bpf_stack_state *stack) +{ + return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL; +} + +static inline bool bpf_is_spilled_scalar_reg(const struct bpf_stack_state *stack) +{ + return bpf_is_spilled_reg(stack) && stack->spilled_ptr.type == SCALAR_VALUE; +} + +static inline bool bpf_register_is_null(struct bpf_reg_state *reg) +{ + return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0); +} + +static inline void bpf_bt_set_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg) +{ + bt->reg_masks[frame] |= 1 << reg; +} + +static inline void bpf_bt_set_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot) +{ + bt->stack_masks[frame] |= 1ull << slot; +} + +static inline bool bt_is_frame_reg_set(struct backtrack_state *bt, u32 frame, u32 reg) +{ + return bt->reg_masks[frame] & (1 << reg); +} + +static inline bool bt_is_frame_slot_set(struct backtrack_state *bt, u32 frame, u32 slot) +{ + return bt->stack_masks[frame] & (1ull << slot); +} + +bool bpf_map_is_rdonly(const struct bpf_map *map); +int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val, + bool is_ldsx); + #define BPF_BASE_TYPE_MASK GENMASK(BPF_BASE_TYPE_BITS - 1, 0) /* extract base type from bpf_{arg, return, reg}_type. */ @@ -1077,22 +1299,194 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie u32 frameno, bool print_all); void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate, u32 frameno); +u32 bpf_vlog_alignment(u32 pos); struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off); int bpf_jmp_offset(struct bpf_insn *insn); struct bpf_iarray *bpf_insn_successors(struct bpf_verifier_env *env, u32 idx); void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask); -bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx); +bool bpf_subprog_is_global(const struct bpf_verifier_env *env, int subprog); + +int bpf_find_subprog(struct bpf_verifier_env *env, int off); +int bpf_compute_const_regs(struct bpf_verifier_env *env); +int bpf_prune_dead_branches(struct bpf_verifier_env *env); +int bpf_check_cfg(struct bpf_verifier_env *env); +int bpf_compute_postorder(struct bpf_verifier_env *env); +int bpf_compute_scc(struct bpf_verifier_env *env); + +struct bpf_map_desc { + struct bpf_map *ptr; + int uid; +}; + +struct bpf_kfunc_call_arg_meta { + /* In parameters */ + struct btf *btf; + u32 func_id; + u32 kfunc_flags; + const struct btf_type *func_proto; + const char *func_name; + /* Out parameters */ + u32 ref_obj_id; + u8 release_regno; + bool r0_rdonly; + u32 ret_btf_id; + u64 r0_size; + u32 subprogno; + struct { + u64 value; + bool found; + } arg_constant; + + /* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling, + * generally to pass info about user-defined local kptr types to later + * verification logic + * bpf_obj_drop/bpf_percpu_obj_drop + * Record the local kptr type to be drop'd + * bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type) + * Record the local kptr type to be refcount_incr'd and use + * arg_owning_ref to determine whether refcount_acquire should be + * fallible + */ + struct btf *arg_btf; + u32 arg_btf_id; + bool arg_owning_ref; + bool arg_prog; + + struct { + struct btf_field *field; + } arg_list_head; + struct { + struct btf_field *field; + } arg_rbtree_root; + struct { + enum bpf_dynptr_type type; + u32 id; + u32 ref_obj_id; + } initialized_dynptr; + struct { + u8 spi; + u8 frameno; + } iter; + struct bpf_map_desc map; + u64 mem_size; +}; + +int bpf_get_helper_proto(struct bpf_verifier_env *env, int func_id, + const struct bpf_func_proto **ptr); +int bpf_fetch_kfunc_arg_meta(struct bpf_verifier_env *env, s32 func_id, + s16 offset, struct bpf_kfunc_call_arg_meta *meta); +bool bpf_is_async_callback_calling_insn(struct bpf_insn *insn); +bool bpf_is_sync_callback_calling_insn(struct bpf_insn *insn); +static inline bool bpf_is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_ITER_NEXT; +} + +static inline bool bpf_is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_SLEEPABLE; +} +bool bpf_is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta); +struct bpf_iarray *bpf_iarray_realloc(struct bpf_iarray *old, size_t n_elem); +int bpf_copy_insn_array_uniq(struct bpf_map *map, u32 start, u32 end, u32 *off); +bool bpf_insn_is_cond_jump(u8 code); +bool bpf_is_may_goto_insn(struct bpf_insn *insn); + +void bpf_verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn); +bool bpf_get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call, + struct bpf_call_summary *cs); +s64 bpf_helper_stack_access_bytes(struct bpf_verifier_env *env, + struct bpf_insn *insn, int arg, + int insn_idx); +s64 bpf_kfunc_stack_access_bytes(struct bpf_verifier_env *env, + struct bpf_insn *insn, int arg, + int insn_idx); +int bpf_compute_subprog_arg_access(struct bpf_verifier_env *env); int bpf_stack_liveness_init(struct bpf_verifier_env *env); void bpf_stack_liveness_free(struct bpf_verifier_env *env); -int bpf_update_live_stack(struct bpf_verifier_env *env); -int bpf_mark_stack_read(struct bpf_verifier_env *env, u32 frameno, u32 insn_idx, u64 mask); -void bpf_mark_stack_write(struct bpf_verifier_env *env, u32 frameno, u64 mask); -int bpf_reset_stack_write_marks(struct bpf_verifier_env *env, u32 insn_idx); -int bpf_commit_stack_write_marks(struct bpf_verifier_env *env); int bpf_live_stack_query_init(struct bpf_verifier_env *env, struct bpf_verifier_state *st); bool bpf_stack_slot_alive(struct bpf_verifier_env *env, u32 frameno, u32 spi); -void bpf_reset_live_stack_callchain(struct bpf_verifier_env *env); +int bpf_compute_live_registers(struct bpf_verifier_env *env); + +#define BPF_MAP_KEY_POISON (1ULL << 63) +#define BPF_MAP_KEY_SEEN (1ULL << 62) + +static inline bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) +{ + return aux->map_ptr_state.poison; +} + +static inline bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux) +{ + return aux->map_ptr_state.unpriv; +} + +static inline bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux) +{ + return aux->map_key_state & BPF_MAP_KEY_POISON; +} + +static inline bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux) +{ + return !(aux->map_key_state & BPF_MAP_KEY_SEEN); +} + +static inline u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux) +{ + return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON); +} + +#define MAX_PACKET_OFF 0xffff +#define CALLER_SAVED_REGS 6 + +enum bpf_reg_arg_type { + SRC_OP, /* register is used as source operand */ + DST_OP, /* register is used as destination operand */ + DST_OP_NO_MARK /* same as above, check only, don't mark */ +}; + +#define MAX_KFUNC_DESCS 256 + +struct bpf_kfunc_desc { + struct btf_func_model func_model; + u32 func_id; + s32 imm; + u16 offset; + unsigned long addr; +}; + +struct bpf_kfunc_desc_tab { + /* Sorted by func_id (BTF ID) and offset (fd_array offset) during + * verification. JITs do lookups by bpf_insn, where func_id may not be + * available, therefore at the end of verification do_misc_fixups() + * sorts this by imm and offset. + */ + struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS]; + u32 nr_descs; +}; + +/* Functions exported from verifier.c, used by fixups.c */ +bool bpf_is_reg64(struct bpf_insn *insn, u32 regno, struct bpf_reg_state *reg, enum bpf_reg_arg_type t); +void bpf_clear_insn_aux_data(struct bpf_verifier_env *env, int start, int len); +void bpf_mark_subprog_exc_cb(struct bpf_verifier_env *env, int subprog); +bool bpf_allow_tail_call_in_subprogs(struct bpf_verifier_env *env); +bool bpf_verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm); +int bpf_add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, u16 offset); +int bpf_fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + struct bpf_insn *insn_buf, int insn_idx, int *cnt); + +/* Functions in fixups.c, called from bpf_check() */ +int bpf_remove_fastcall_spills_fills(struct bpf_verifier_env *env); +int bpf_optimize_bpf_loop(struct bpf_verifier_env *env); +void bpf_opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env); +int bpf_opt_remove_dead_code(struct bpf_verifier_env *env); +int bpf_opt_remove_nops(struct bpf_verifier_env *env); +int bpf_opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, const union bpf_attr *attr); +int bpf_convert_ctx_accesses(struct bpf_verifier_env *env); +int bpf_jit_subprogs(struct bpf_verifier_env *env); +int bpf_fixup_call_args(struct bpf_verifier_env *env); +int bpf_do_misc_fixups(struct bpf_verifier_env *env); #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 115a964f3006..174687c4c80a 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -266,6 +266,9 @@ #define BCM54XX_TOP_MISC_IDDQ_SD (1 << 2) #define BCM54XX_TOP_MISC_IDDQ_SR (1 << 3) +#define BCM54XX_TOP_MISC_MII_BUF_CNTL0 (MII_BCM54XX_EXP_SEL_TOP + 0x00) +#define BCM54XX_MII_BUF_CNTL0_AUTOGREEEN_EN BIT(0) + #define BCM54XX_TOP_MISC_LED_CTL (MII_BCM54XX_EXP_SEL_TOP + 0x0C) #define BCM54XX_LED4_SEL_INTR BIT(1) diff --git a/include/linux/bsg.h b/include/linux/bsg.h index ee2df73edf83..162730bfc2d8 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -7,13 +7,17 @@ struct bsg_device; struct device; struct request_queue; +struct io_uring_cmd; typedef int (bsg_sg_io_fn)(struct request_queue *, struct sg_io_v4 *hdr, bool open_for_write, unsigned int timeout); +typedef int (bsg_uring_cmd_fn)(struct request_queue *q, struct io_uring_cmd *ioucmd, + unsigned int issue_flags, bool open_for_write); + struct bsg_device *bsg_register_queue(struct request_queue *q, struct device *parent, const char *name, - bsg_sg_io_fn *sg_io_fn); + bsg_sg_io_fn *sg_io_fn, bsg_uring_cmd_fn *uring_cmd_fn); void bsg_unregister_queue(struct bsg_device *bcd); #endif /* _LINUX_BSG_H */ diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 139bdececdcf..af011db39ab3 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -217,7 +217,7 @@ BTF_SET8_END(name) #else -#define BTF_ID_LIST(name) static u32 __maybe_unused name[64]; +#define BTF_ID_LIST(name) static u32 __maybe_unused name[128]; #define BTF_ID(prefix, name) #define BTF_ID_FLAGS(prefix, name, ...) #define BTF_ID_UNUSED diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index b16b88bfbc3e..e4939e33b4b5 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -73,8 +73,8 @@ struct buffer_head { bh_end_io_t *b_end_io; /* I/O completion */ void *b_private; /* reserved for b_end_io */ struct list_head b_assoc_buffers; /* associated with another mapping */ - struct address_space *b_assoc_map; /* mapping this buffer is - associated with */ + struct mapping_metadata_bhs *b_mmb; /* head of the list of metadata bhs + * this buffer is associated with */ atomic_t b_count; /* users using this buffer_head */ spinlock_t b_uptodate_lock; /* Used by the first bh in a page, to * serialise IO completion of other @@ -205,12 +205,12 @@ struct buffer_head *create_empty_buffers(struct folio *folio, void end_buffer_read_sync(struct buffer_head *bh, int uptodate); void end_buffer_write_sync(struct buffer_head *bh, int uptodate); -/* Things to do with buffers at mapping->private_list */ -void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode); -int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end, - bool datasync); -int generic_buffers_fsync(struct file *file, loff_t start, loff_t end, - bool datasync); +/* Things to do with metadata buffers list */ +void mmb_mark_buffer_dirty(struct buffer_head *bh, struct mapping_metadata_bhs *mmb); +int mmb_fsync_noflush(struct file *file, struct mapping_metadata_bhs *mmb, + loff_t start, loff_t end, bool datasync); +int mmb_fsync(struct file *file, struct mapping_metadata_bhs *mmb, + loff_t start, loff_t end, bool datasync); void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len); static inline void clean_bdev_bh_alias(struct buffer_head *bh) @@ -515,10 +515,10 @@ bool block_dirty_folio(struct address_space *mapping, struct folio *folio); void buffer_init(void); bool try_to_free_buffers(struct folio *folio); -int inode_has_buffers(struct inode *inode); -void invalidate_inode_buffers(struct inode *inode); -int remove_inode_buffers(struct inode *inode); -int sync_mapping_buffers(struct address_space *mapping); +void mmb_init(struct mapping_metadata_bhs *mmb, struct address_space *mapping); +bool mmb_has_buffers(struct mapping_metadata_bhs *mmb); +void mmb_invalidate(struct mapping_metadata_bhs *mmb); +int mmb_sync(struct mapping_metadata_bhs *mmb); void invalidate_bh_lrus(void); void invalidate_bh_lrus_cpu(void); bool has_bh_in_lru(int cpu, void *dummy); @@ -528,10 +528,7 @@ extern int buffer_heads_over_limit; static inline void buffer_init(void) {} static inline bool try_to_free_buffers(struct folio *folio) { return true; } -static inline int inode_has_buffers(struct inode *inode) { return 0; } -static inline void invalidate_inode_buffers(struct inode *inode) {} -static inline int remove_inode_buffers(struct inode *inode) { return 1; } -static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } +static inline int mmb_sync(struct mapping_metadata_bhs *mmb) { return 0; } static inline void invalidate_bh_lrus(void) {} static inline void invalidate_bh_lrus_cpu(void) {} static inline bool has_bh_in_lru(int cpu, void *dummy) { return false; } diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h index 2cfbb4c65c78..d3dc5dc5f916 100644 --- a/include/linux/build_bug.h +++ b/include/linux/build_bug.h @@ -32,7 +32,8 @@ /** * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied * error message. - * @condition: the condition which the compiler should know is false. + * @cond: the condition which the compiler should know is false. + * @msg: build-time error message * * See BUILD_BUG_ON for description. */ @@ -60,6 +61,7 @@ /** * static_assert - check integer constant expression at build time + * @expr: expression to be checked * * static_assert() is a wrapper for the C11 _Static_assert, with a * little macro magic to make the message optional (defaulting to the diff --git a/include/linux/bus/stm32_firewall.h b/include/linux/bus/stm32_firewall.h new file mode 100644 index 000000000000..e5fac85fe346 --- /dev/null +++ b/include/linux/bus/stm32_firewall.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023, STMicroelectronics - All Rights Reserved + */ + +#ifndef _STM32_FIREWALL_H +#define _STM32_FIREWALL_H + +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/types.h> + +/** + * STM32_PERIPHERAL_FIREWALL: This type of firewall protects peripherals + * STM32_MEMORY_FIREWALL: This type of firewall protects memories/subsets of memory + * zones + * STM32_NOTYPE_FIREWALL: Undefined firewall type + */ + +#define STM32_PERIPHERAL_FIREWALL BIT(1) +#define STM32_MEMORY_FIREWALL BIT(2) +#define STM32_NOTYPE_FIREWALL BIT(3) + +/** + * struct stm32_firewall_controller - Information on firewall controller supplying services + * + * @name: Name of the firewall controller + * @dev: Device reference of the firewall controller + * @mmio: Base address of the firewall controller + * @entry: List entry of the firewall controller list + * @type: Type of firewall + * @max_entries: Number of entries covered by the firewall + * @grant_access: Callback used to grant access for a device access against a + * firewall controller + * @release_access: Callback used to release resources taken by a device when access was + * granted + * @grant_memory_range_access: Callback used to grant access for a device to a given memory region + */ +struct stm32_firewall_controller { + const char *name; + struct device *dev; + void __iomem *mmio; + struct list_head entry; + unsigned int type; + unsigned int max_entries; + + int (*grant_access)(struct stm32_firewall_controller *ctrl, u32 id); + void (*release_access)(struct stm32_firewall_controller *ctrl, u32 id); + int (*grant_memory_range_access)(struct stm32_firewall_controller *ctrl, phys_addr_t paddr, + size_t size); +}; + +/** + * stm32_firewall_controller_register - Register a firewall controller to the STM32 firewall + * framework + * @firewall_controller: Firewall controller to register + * + * Returns 0 in case of success or -ENODEV if no controller was given. + */ +int stm32_firewall_controller_register(struct stm32_firewall_controller *firewall_controller); + +/** + * stm32_firewall_controller_unregister - Unregister a firewall controller from the STM32 + * firewall framework + * @firewall_controller: Firewall controller to unregister + */ +void stm32_firewall_controller_unregister(struct stm32_firewall_controller *firewall_controller); + +/** + * stm32_firewall_populate_bus - Populate device tree nodes that have a correct firewall + * configuration. This is used at boot-time only, as a sanity check + * between device tree and firewalls hardware configurations to + * prevent a kernel crash when a device driver is not granted access + * + * @firewall_controller: Firewall controller which nodes will be populated or not + * + * Returns 0 in case of success or appropriate errno code if error occurred. + */ +int stm32_firewall_populate_bus(struct stm32_firewall_controller *firewall_controller); + +#endif /* _STM32_FIREWALL_H */ diff --git a/include/linux/bus/stm32_firewall_device.h b/include/linux/bus/stm32_firewall_device.h index eaa7a3f54450..6c878f3ca86f 100644 --- a/include/linux/bus/stm32_firewall_device.h +++ b/include/linux/bus/stm32_firewall_device.h @@ -112,6 +112,25 @@ int stm32_firewall_grant_access_by_id(struct stm32_firewall *firewall, u32 subsy */ void stm32_firewall_release_access_by_id(struct stm32_firewall *firewall, u32 subsystem_id); +/** + * stm32_firewall_get_grant_all_access - Allocate and get all the firewall(s) associated to given + * device. Then, try to grant access rights for each element. + * This function is basically a helper function that wraps + * both stm32_firewall_get_firewall() and + * stm32_firewall_grant_access() on all firewall references of + * a device along with the allocation of the array. + * Realease access using stm32_firewall_release_access* APIs + * when done. + * + * @dev: Device performing the checks + * @firewall: Pointer to the array of firewall references to be allocated + * @nb_firewall: Number of allocated elements in @firewall + * + * Returns 0 on success, or appropriate errno code if error occurred. + */ +int stm32_firewall_get_grant_all_access(struct device *dev, struct stm32_firewall **firewall, + int *nb_firewall); + #else /* CONFIG_STM32_FIREWALL */ static inline int stm32_firewall_get_firewall(struct device_node *np, @@ -141,5 +160,12 @@ static inline void stm32_firewall_release_access_by_id(struct stm32_firewall *fi { } +static inline int stm32_firewall_get_grant_all_access(struct device *dev, + struct stm32_firewall **firewall, + int *nb_firewall) +{ + return -ENODEV; +} + #endif /* CONFIG_STM32_FIREWALL */ #endif /* STM32_FIREWALL_DEVICE_H */ diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 06fb60471aaf..d36dd476feda 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -203,15 +203,6 @@ static inline void bvec_iter_advance_single(const struct bio_vec *bv, ((bvl = mp_bvec_iter_bvec((bio_vec), (iter))), 1); \ bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len)) -/* for iterating one bio from start to end */ -#define BVEC_ITER_ALL_INIT (struct bvec_iter) \ -{ \ - .bi_sector = 0, \ - .bi_size = UINT_MAX, \ - .bi_idx = 0, \ - .bi_bvec_done = 0, \ -} - static inline struct bio_vec *bvec_init_iter_all(struct bvec_iter_all *iter_all) { iter_all->done = 0; diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index bb92f5c169ca..f42563739d2e 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -17,6 +17,7 @@ #include <linux/refcount.h> #include <linux/percpu-refcount.h> #include <linux/percpu-rwsem.h> +#include <linux/sched.h> #include <linux/u64_stats_sync.h> #include <linux/workqueue.h> #include <linux/bpf-cgroup-defs.h> @@ -167,6 +168,7 @@ struct cgroup_file { struct kernfs_node *kn; unsigned long notified_at; struct timer_list notify_timer; + spinlock_t lock; }; /* @@ -609,6 +611,9 @@ struct cgroup { /* used to wait for offlining of csses */ wait_queue_head_t offline_waitq; + /* used by cgroup_rmdir() to wait for dying tasks to leave */ + wait_queue_head_t dying_populated_waitq; + /* used to schedule release agent */ struct work_struct release_agent_work; @@ -624,6 +629,9 @@ struct cgroup { #ifdef CONFIG_BPF_SYSCALL struct bpf_local_storage __rcu *bpf_cgrp_storage; #endif +#ifdef CONFIG_EXT_SUB_SCHED + struct scx_sched __rcu *scx_sched; +#endif /* All ancestors including self */ union { diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index bc892e3b37ee..e52160e85af4 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -42,6 +42,14 @@ struct kernel_clone_args; #ifdef CONFIG_CGROUPS +/* + * To avoid confusing the compiler (and generating warnings) with code + * that attempts to access what would be a 0-element array (i.e. sized + * to a potentially empty array when CGROUP_SUBSYS_COUNT == 0), this + * constant expression can be added. + */ +#define CGROUP_HAS_SUBSYS_CONFIG (CGROUP_SUBSYS_COUNT > 0) + enum css_task_iter_flags { CSS_TASK_ITER_PROCS = (1U << 0), /* walk only threadgroup leaders */ CSS_TASK_ITER_THREADED = (1U << 1), /* walk all threaded css_sets in the domain */ @@ -76,6 +84,7 @@ enum cgroup_lifetime_events { extern struct file_system_type cgroup_fs_type; extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; +extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; extern struct blocking_notifier_head cgroup_lifetime_notifier; @@ -103,6 +112,8 @@ extern struct blocking_notifier_head cgroup_lifetime_notifier; #define cgroup_subsys_on_dfl(ss) \ static_branch_likely(&ss ## _on_dfl_key) +bool cgroup_on_dfl(const struct cgroup *cgrp); + bool css_has_online_children(struct cgroup_subsys_state *css); struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup, @@ -274,6 +285,32 @@ void css_task_iter_end(struct css_task_iter *it); for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ (pos) = css_next_descendant_post((pos), (css))) +/* iterate over child cgrps, lock should be held throughout iteration */ +#define cgroup_for_each_live_child(child, cgrp) \ + list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \ + if (({ lockdep_assert_held(&cgroup_mutex); \ + cgroup_is_dead(child); })) \ + ; \ + else + +/* walk live descendants in pre order */ +#define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) \ + css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL)) \ + if (({ lockdep_assert_held(&cgroup_mutex); \ + (dsct) = (d_css)->cgroup; \ + cgroup_is_dead(dsct); })) \ + ; \ + else + +/* walk live descendants in postorder */ +#define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) \ + css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL)) \ + if (({ lockdep_assert_held(&cgroup_mutex); \ + (dsct) = (d_css)->cgroup; \ + cgroup_is_dead(dsct); })) \ + ; \ + else + /** * cgroup_taskset_for_each - iterate cgroup_taskset * @task: the loop cursor @@ -337,6 +374,27 @@ static inline u64 cgroup_id(const struct cgroup *cgrp) } /** + * cgroup_css - obtain a cgroup's css for the specified subsystem + * @cgrp: the cgroup of interest + * @ss: the subsystem of interest (%NULL returns @cgrp->self) + * + * Return @cgrp's css (cgroup_subsys_state) associated with @ss. This + * function must be called either under cgroup_mutex or rcu_read_lock() and + * the caller is responsible for pinning the returned css if it wants to + * keep accessing it outside the said locks. This function may return + * %NULL if @cgrp doesn't have @subsys_id enabled. + */ +static inline struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp, + struct cgroup_subsys *ss) +{ + if (CGROUP_HAS_SUBSYS_CONFIG && ss) + return rcu_dereference_check(cgrp->subsys[ss->id], + lockdep_is_held(&cgroup_mutex)); + else + return &cgrp->self; +} + +/** * css_is_dying - test whether the specified css is dying * @css: target css * @@ -372,6 +430,11 @@ static inline bool css_is_self(struct cgroup_subsys_state *css) return false; } +static inline bool cgroup_is_dead(const struct cgroup *cgrp) +{ + return !(cgrp->self.flags & CSS_ONLINE); +} + static inline void cgroup_get(struct cgroup *cgrp) { css_get(&cgrp->self); @@ -387,8 +450,6 @@ static inline void cgroup_put(struct cgroup *cgrp) css_put(&cgrp->self); } -extern struct mutex cgroup_mutex; - static inline void cgroup_lock(void) { mutex_lock(&cgroup_mutex); diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index dbc4162921e9..ea95ca4bc11c 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -286,15 +286,18 @@ static __always_inline _type class_##_name##_constructor(_init_args) \ __no_context_analysis \ { _type t = _init; return t; } -#define EXTEND_CLASS(_name, ext, _init, _init_args...) \ -typedef lock_##_name##_t lock_##_name##ext##_t; \ +#define EXTEND_CLASS_COND(_name, ext, _cond, _init, _init_args...) \ +typedef lock_##_name##_t lock_##_name##ext##_t; \ typedef class_##_name##_t class_##_name##ext##_t; \ -static __always_inline void class_##_name##ext##_destructor(class_##_name##_t *p) \ -{ class_##_name##_destructor(p); } \ +static __always_inline void class_##_name##ext##_destructor(class_##_name##_t *_T) \ +{ if (_cond) return; class_##_name##_destructor(_T); } \ static __always_inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ __no_context_analysis \ { class_##_name##_t t = _init; return t; } +#define EXTEND_CLASS(_name, ext, _init, _init_args...) \ + EXTEND_CLASS_COND(_name, ext, 0, _init, _init_args) + #define CLASS(_name, var) \ class_##_name##_t var __cleanup(class_##_name##_destructor) = \ class_##_name##_constructor @@ -394,12 +397,12 @@ static __maybe_unused const bool class_##_name##_is_conditional = _is_cond __DEFINE_GUARD_LOCK_PTR(_name, _T) #define DEFINE_GUARD(_name, _type, _lock, _unlock) \ - DEFINE_CLASS(_name, _type, if (!__GUARD_IS_ERR(_T)) { _unlock; }, ({ _lock; _T; }), _type _T); \ + DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \ DEFINE_CLASS_IS_GUARD(_name) #define DEFINE_GUARD_COND_4(_name, _ext, _lock, _cond) \ __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ - EXTEND_CLASS(_name, _ext, \ + EXTEND_CLASS_COND(_name, _ext, __GUARD_IS_ERR(*_T), \ ({ void *_t = _T; int _RET = (_lock); if (_T && !(_cond)) _t = ERR_PTR(_RET); _t; }), \ class_##_name##_t _T) \ static __always_inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ @@ -488,7 +491,7 @@ typedef struct { \ static __always_inline void class_##_name##_destructor(class_##_name##_t *_T) \ __no_context_analysis \ { \ - if (!__GUARD_IS_ERR(_T->lock)) { _unlock; } \ + if (_T->lock) { _unlock; } \ } \ \ __DEFINE_GUARD_LOCK_PTR(_name, &_T->lock) @@ -565,7 +568,7 @@ __DEFINE_LOCK_GUARD_0(_name, _lock) #define DEFINE_LOCK_GUARD_1_COND_4(_name, _ext, _lock, _cond) \ __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ - EXTEND_CLASS(_name, _ext, \ + EXTEND_CLASS_COND(_name, _ext, __GUARD_IS_ERR(_T->lock), \ ({ class_##_name##_t _t = { .lock = l }, *_T = &_t;\ int _RET = (_lock); \ if (_T->lock && !(_cond)) _T->lock = ERR_PTR(_RET);\ diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index b0df28ddd394..6adb72761246 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -43,9 +43,9 @@ enum clock_event_state { /* * Clock event features */ -# define CLOCK_EVT_FEAT_PERIODIC 0x000001 -# define CLOCK_EVT_FEAT_ONESHOT 0x000002 -# define CLOCK_EVT_FEAT_KTIME 0x000004 +# define CLOCK_EVT_FEAT_PERIODIC 0x000001 +# define CLOCK_EVT_FEAT_ONESHOT 0x000002 +# define CLOCK_EVT_FEAT_CLOCKSOURCE_COUPLED 0x000004 /* * x86(64) specific (mis)features: @@ -73,6 +73,7 @@ enum clock_event_state { * level handler of the event source * @set_next_event: set next event function using a clocksource delta * @set_next_ktime: set next event function using a direct ktime value + * @set_next_coupled: set next event function for clocksource coupled mode * @next_event: local storage for the next event in oneshot mode * @max_delta_ns: maximum delta value in ns * @min_delta_ns: minimum delta value in ns @@ -80,6 +81,8 @@ enum clock_event_state { * @shift: nanoseconds to cycles divisor (power of two) * @state_use_accessors:current state of the device, assigned by the core code * @features: features + * @cs_id: Clocksource ID to denote the clocksource for coupled mode + * @next_event_forced: True if the last programming was a forced event * @retries: number of forced programming retries * @set_state_periodic: switch state to periodic * @set_state_oneshot: switch state to oneshot @@ -101,6 +104,7 @@ struct clock_event_device { void (*event_handler)(struct clock_event_device *); int (*set_next_event)(unsigned long evt, struct clock_event_device *); int (*set_next_ktime)(ktime_t expires, struct clock_event_device *); + void (*set_next_coupled)(u64 cycles, struct clock_event_device *); ktime_t next_event; u64 max_delta_ns; u64 min_delta_ns; @@ -108,6 +112,8 @@ struct clock_event_device { u32 shift; enum clock_event_state state_use_accessors; unsigned int features; + enum clocksource_ids cs_id; + unsigned int next_event_forced; unsigned long retries; int (*set_state_periodic)(struct clock_event_device *); diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 65b7c41471c3..7c38190b10bf 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -25,8 +25,7 @@ struct clocksource_base; struct clocksource; struct module; -#if defined(CONFIG_ARCH_CLOCKSOURCE_DATA) || \ - defined(CONFIG_GENERIC_GETTIMEOFDAY) +#if defined(CONFIG_GENERIC_GETTIMEOFDAY) #include <asm/clocksource.h> #endif @@ -44,8 +43,6 @@ struct module; * @shift: Cycle to nanosecond divisor (power of two) * @max_idle_ns: Maximum idle time permitted by the clocksource (nsecs) * @maxadj: Maximum adjustment value to mult (~11%) - * @uncertainty_margin: Maximum uncertainty in nanoseconds per half second. - * Zero says to use default WATCHDOG_THRESHOLD. * @archdata: Optional arch-specific data * @max_cycles: Maximum safe cycle value which won't overflow on * multiplication @@ -105,10 +102,6 @@ struct clocksource { u32 shift; u64 max_idle_ns; u32 maxadj; - u32 uncertainty_margin; -#ifdef CONFIG_ARCH_CLOCKSOURCE_DATA - struct arch_clocksource_data archdata; -#endif u64 max_cycles; u64 max_raw_delta; const char *name; @@ -133,6 +126,7 @@ struct clocksource { struct list_head wd_list; u64 cs_last; u64 wd_last; + unsigned int wd_cpu; #endif struct module *owner; }; @@ -142,13 +136,19 @@ struct clocksource { */ #define CLOCK_SOURCE_IS_CONTINUOUS 0x01 #define CLOCK_SOURCE_MUST_VERIFY 0x02 +#define CLOCK_SOURCE_CALIBRATED 0x04 #define CLOCK_SOURCE_WATCHDOG 0x10 #define CLOCK_SOURCE_VALID_FOR_HRES 0x20 #define CLOCK_SOURCE_UNSTABLE 0x40 #define CLOCK_SOURCE_SUSPEND_NONSTOP 0x80 #define CLOCK_SOURCE_RESELECT 0x100 -#define CLOCK_SOURCE_VERIFY_PERCPU 0x200 +#define CLOCK_SOURCE_CAN_INLINE_READ 0x200 +#define CLOCK_SOURCE_HAS_COUPLED_CLOCK_EVENT 0x400 + +#define CLOCK_SOURCE_WDTEST 0x800 +#define CLOCK_SOURCE_WDTEST_PERCPU 0x1000 + /* simplify initialization of mask field */ #define CLOCKSOURCE_MASK(bits) GENMASK_ULL((bits) - 1, 0) @@ -298,21 +298,6 @@ static inline void timer_probe(void) {} #define TIMER_ACPI_DECLARE(name, table_id, fn) \ ACPI_DECLARE_PROBE_ENTRY(timer, name, table_id, 0, NULL, 0, fn) -static inline unsigned int clocksource_get_max_watchdog_retry(void) -{ - /* - * When system is in the boot phase or under heavy workload, there - * can be random big latencies during the clocksource/watchdog - * read, so allow retries to filter the noise latency. As the - * latency's frequency and maximum value goes up with the number of - * CPUs, scale the number of retries with the number of online - * CPUs. - */ - return (ilog2(num_online_cpus()) / 2) + 1; -} - -void clocksource_verify_percpu(struct clocksource *cs); - /** * struct clocksource_base - hardware abstraction for clock on which a clocksource * is based diff --git a/include/linux/cma.h b/include/linux/cma.h index d0793eaaadaa..8555d38a97b1 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -61,14 +61,4 @@ extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data) extern bool cma_intersects(struct cma *cma, unsigned long start, unsigned long end); extern void cma_reserve_pages_on_error(struct cma *cma); - -#ifdef CONFIG_DMA_CMA -extern bool cma_skip_dt_default_reserved_mem(void); -#else -static inline bool cma_skip_dt_default_reserved_mem(void) -{ - return false; -} -#endif - #endif diff --git a/include/linux/compiler-context-analysis.h b/include/linux/compiler-context-analysis.h index 00c074a2ccb0..a9317571e6af 100644 --- a/include/linux/compiler-context-analysis.h +++ b/include/linux/compiler-context-analysis.h @@ -320,6 +320,38 @@ static inline void _context_unsafe_alias(void **p) { } */ #define __releases(...) __releases_ctx_lock(__VA_ARGS__) +/* + * Clang's analysis does not care precisely about the value, only that it is + * either zero or non-zero. So the __cond_acquires() interface might be + * misleading if we say that @ret is the value returned if acquired. Instead, + * provide symbolic variants which we translate. + */ +#define __cond_acquires_impl_not_true(x, ...) __try_acquires##__VA_ARGS__##_ctx_lock(0, x) +#define __cond_acquires_impl_not_false(x, ...) __try_acquires##__VA_ARGS__##_ctx_lock(1, x) +#define __cond_acquires_impl_not_nonzero(x, ...) __try_acquires##__VA_ARGS__##_ctx_lock(0, x) +#define __cond_acquires_impl_not_0(x, ...) __try_acquires##__VA_ARGS__##_ctx_lock(1, x) +#define __cond_acquires_impl_not_nonnull(x, ...) __try_acquires##__VA_ARGS__##_ctx_lock(0, x) +#define __cond_acquires_impl_not_NULL(x, ...) __try_acquires##__VA_ARGS__##_ctx_lock(1, x) + +/** + * __cond_releases() - function attribute, function conditionally + * releases a context lock exclusively + * @ret: abstract value returned by function if context lock releases + * @x: context lock instance pointer + * + * Function attribute declaring that the function conditionally releases the + * given context lock instance @x exclusively. The associated context(s) must + * be active on entry. The function return value @ret denotes when the context + * lock is released. + * + * @ret may be one of: true, false, nonzero, 0, nonnull, NULL. + * + * NOTE: clang does not have a native attribute for this; instead implement + * it as an unconditional release and a conditional acquire for the + * inverted condition -- which is semantically equivalent. + */ +#define __cond_releases(ret, x) __releases(x) __cond_acquires_impl_not_##ret(x) + /** * __acquire() - function to acquire context lock exclusively * @x: context lock instance pointer diff --git a/include/linux/compiler.h b/include/linux/compiler.h index af16624b29fd..cb2f6050bdf7 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -149,10 +149,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #endif #ifndef RELOC_HIDE -# define RELOC_HIDE(ptr, off) \ - ({ unsigned long __ptr; \ - __ptr = (unsigned long) (ptr); \ - (typeof(ptr)) (__ptr + (off)); }) +# define RELOC_HIDE(ptr, off) ((typeof(ptr))((unsigned long)(ptr) + (off))) #endif #define absolute_pointer(val) RELOC_HIDE((void *)(val), 0) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 890076d0974b..e8fd77593b68 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -432,18 +432,11 @@ struct ftrace_likely_data { #define at_least #endif -/* Do not trap wrapping arithmetic within an annotated function. */ -#ifdef CONFIG_UBSAN_INTEGER_WRAP -# define __signed_wrap __attribute__((no_sanitize("signed-integer-overflow"))) -#else -# define __signed_wrap -#endif - /* Section for code which can't be instrumented at all */ #define __noinstr_section(section) \ noinline notrace __attribute((__section__(section))) \ __no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage \ - __no_sanitize_memory __signed_wrap + __no_sanitize_memory #define noinstr __noinstr_section(".noinstr.text") diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index 13b35637bd5a..fe915afdece5 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -13,8 +13,8 @@ #ifndef _LINUX_CONSOLE_STRUCT_H #define _LINUX_CONSOLE_STRUCT_H -#include <linux/wait.h> #include <linux/vt.h> +#include <linux/wait.h> #include <linux/workqueue.h> struct uni_pagedict; @@ -58,6 +58,33 @@ struct vc_state { bool reverse; }; +/** + * struct vc_font - Describes a font + * @width: The width of a single glyph in bits + * @height: The height of a single glyph in scanlines + * @charcount: The number of glyphs in the font + * @data: The raw font data + * + * Font data is organized as an array of glyphs. Each glyph is a bitmap with + * set bits indicating the foreground color. Unset bits indicate background + * color. The fields @width and @height store a single glyph's number of + * horizontal bits and vertical scanlines. If width is not a multiple of 8, + * there are trailing bits to fill up the byte. These bits should not be drawn. + * + * The field @data points to the first glyph's first byte. The value @charcount + * gives the number of glyphs in the font. There are no empty scanlines between + * two adjacent glyphs. + */ +struct vc_font { + unsigned int width; + unsigned int height; + unsigned int charcount; + const unsigned char *data; +}; + +unsigned int vc_font_pitch(const struct vc_font *font); +unsigned int vc_font_size(const struct vc_font *font); + /* * Example: vc_data of a console that was scrolled 3 lines down. * @@ -120,9 +147,9 @@ struct vc_data { unsigned short vc_complement_mask; /* [#] Xor mask for mouse pointer */ unsigned short vc_s_complement_mask; /* Saved mouse pointer mask */ unsigned long vc_pos; /* Cursor address */ - /* fonts */ + /* fonts */ unsigned short vc_hi_font_mask; /* [#] Attribute set for upper 256 chars of font or 0 if not supported */ - struct console_font vc_font; /* Current VC font set */ + struct vc_font vc_font; /* Current VC font set */ unsigned short vc_video_erase_char; /* Background erase character */ /* VT terminal data */ unsigned int vc_state; /* Escape sequence parser state */ @@ -160,6 +187,7 @@ struct vc_data { struct uni_pagedict **uni_pagedict_loc; /* [!] Location of uni_pagedict variable for this console */ u32 **vc_uni_lines; /* unicode screen content */ u16 *vc_saved_screen; + u32 **vc_saved_uni_lines; unsigned int vc_saved_cols; unsigned int vc_saved_rows; /* additional information is in vt_kern.h */ diff --git a/include/linux/coreboot.h b/include/linux/coreboot.h new file mode 100644 index 000000000000..5d40ca7a1d89 --- /dev/null +++ b/include/linux/coreboot.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * coreboot.h + * + * Coreboot device and driver interfaces. + * + * Copyright 2014 Gerd Hoffmann <kraxel@redhat.com> + * Copyright 2017 Google Inc. + * Copyright 2017 Samuel Holland <samuel@sholland.org> + */ + +#ifndef _LINUX_COREBOOT_H +#define _LINUX_COREBOOT_H + +#include <linux/compiler_attributes.h> +#include <linux/stddef.h> +#include <linux/types.h> + +typedef __aligned(4) u64 cb_u64; + +/* List of coreboot entry structures that is used */ + +#define CB_TAG_FRAMEBUFFER 0x12 +#define LB_TAG_CBMEM_ENTRY 0x31 + +/* Generic */ +struct coreboot_table_entry { + u32 tag; + u32 size; +}; + +/* Points to a CBMEM entry */ +struct lb_cbmem_ref { + u32 tag; + u32 size; + + cb_u64 cbmem_addr; +}; + +/* Corresponds to LB_TAG_CBMEM_ENTRY */ +struct lb_cbmem_entry { + u32 tag; + u32 size; + + cb_u64 address; + u32 entry_size; + u32 id; +}; + +#define LB_FRAMEBUFFER_ORIENTATION_NORMAL 0 +#define LB_FRAMEBUFFER_ORIENTATION_BOTTOM_UP 1 +#define LB_FRAMEBUFFER_ORIENTATION_LEFT_UP 2 +#define LB_FRAMEBUFFER_ORIENTATION_RIGHT_UP 3 + +/* Describes framebuffer setup by coreboot */ +struct lb_framebuffer { + u32 tag; + u32 size; + + cb_u64 physical_address; + u32 x_resolution; + u32 y_resolution; + u32 bytes_per_line; + u8 bits_per_pixel; + u8 red_mask_pos; + u8 red_mask_size; + u8 green_mask_pos; + u8 green_mask_size; + u8 blue_mask_pos; + u8 blue_mask_size; + u8 reserved_mask_pos; + u8 reserved_mask_size; + u8 orientation; +}; + +/* + * True if the coreboot-provided data is large enough to hold information + * on the linear framebuffer. False otherwise. + */ +#define LB_FRAMEBUFFER_HAS_LFB(__fb) \ + ((__fb)->size >= offsetofend(struct lb_framebuffer, reserved_mask_size)) + +/* + * True if the coreboot-provided data is large enough to hold information + * on the display orientation. False otherwise. + */ +#define LB_FRAMEBUFFER_HAS_ORIENTATION(__fb) \ + ((__fb)->size >= offsetofend(struct lb_framebuffer, orientation)) + +#endif /* _LINUX_COREBOOT_H */ diff --git a/include/linux/count_zeros.h b/include/linux/count_zeros.h index 5b8ff5ac660d..b72ba3faa036 100644 --- a/include/linux/count_zeros.h +++ b/include/linux/count_zeros.h @@ -18,7 +18,7 @@ * * If the MSB of @x is set, the result is 0. * If only the LSB of @x is set, then the result is BITS_PER_LONG-1. - * If @x is 0 then the result is COUNT_LEADING_ZEROS_0. + * If @x is 0 then the result is BITS_PER_LONG. */ static inline int count_leading_zeros(unsigned long x) { @@ -28,8 +28,6 @@ static inline int count_leading_zeros(unsigned long x) return BITS_PER_LONG - fls64(x); } -#define COUNT_LEADING_ZEROS_0 BITS_PER_LONG - /** * count_trailing_zeros - Count the number of zeros from the LSB forwards * @x: The value @@ -38,16 +36,11 @@ static inline int count_leading_zeros(unsigned long x) * * If the LSB of @x is set, the result is 0. * If only the MSB of @x is set, then the result is BITS_PER_LONG-1. - * If @x is 0 then the result is COUNT_TRAILING_ZEROS_0. + * If @x is 0 then the result is BITS_PER_LONG. */ static inline int count_trailing_zeros(unsigned long x) { -#define COUNT_TRAILING_ZEROS_0 (-1) - - if (sizeof(x) == 4) - return ffs(x); - else - return (x != 0) ? __ffs(x) : COUNT_TRAILING_ZEROS_0; + return x ? __ffs(x) : BITS_PER_LONG; } #endif /* _LINUX_BITOPS_COUNT_ZEROS_H_ */ diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 8239cd95a005..9b6b0d87fdb0 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -229,8 +229,8 @@ static inline bool cpu_attack_vector_mitigated(enum cpu_attack_vectors v) #define smt_mitigations SMT_MITIGATIONS_OFF #endif -int arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status); -int arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status); -int arch_lock_indir_br_lp_status(struct task_struct *t, unsigned long status); +int arch_prctl_get_branch_landing_pad_state(struct task_struct *t, unsigned long __user *state); +int arch_prctl_set_branch_landing_pad_state(struct task_struct *t, unsigned long state); +int arch_prctl_lock_branch_landing_pad_state(struct task_struct *t); #endif /* _LINUX_CPU_H_ */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index cc894fc38971..2ab691828e48 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -79,8 +79,9 @@ struct cpufreq_policy { * called, but you're in IRQ context */ struct freq_constraints constraints; - struct freq_qos_request *min_freq_req; - struct freq_qos_request *max_freq_req; + struct freq_qos_request min_freq_req; + struct freq_qos_request max_freq_req; + struct freq_qos_request boost_freq_req; struct cpufreq_frequency_table *freq_table; enum cpufreq_table_sorting freq_table_sorted; @@ -232,7 +233,7 @@ static inline bool policy_is_inactive(struct cpufreq_policy *policy) static inline bool policy_is_shared(struct cpufreq_policy *policy) { - return cpumask_weight(policy->cpus) > 1; + return cpumask_nth(1, policy->cpus) < nr_cpumask_bits; } #ifdef CONFIG_CPU_FREQ @@ -372,7 +373,7 @@ struct cpufreq_driver { * conditions) scale invariance can be disabled, which causes the * schedutil governor to fall back to the latter. */ - void (*adjust_perf)(unsigned int cpu, + void (*adjust_perf)(struct cpufreq_policy *policy, unsigned long min_perf, unsigned long target_perf, unsigned long capacity); @@ -617,7 +618,7 @@ struct cpufreq_governor { /* Pass a target to the cpufreq driver */ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq); -void cpufreq_driver_adjust_perf(unsigned int cpu, +void cpufreq_driver_adjust_perf(struct cpufreq_policy *policy, unsigned long min_perf, unsigned long target_perf, unsigned long capacity); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 62cd7b35a29c..22ba327ec227 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -92,7 +92,6 @@ enum cpuhp_state { CPUHP_NET_DEV_DEAD, CPUHP_IOMMU_IOVA_DEAD, CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, - CPUHP_PADATA_DEAD, CPUHP_AP_DTPM_CPU_DEAD, CPUHP_RANDOM_PREPARE, CPUHP_WORKQUEUE_PREP, diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 4073690504a7..a2485348def3 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -188,6 +188,7 @@ extern void cpuidle_driver_state_disabled(struct cpuidle_driver *drv, int idx, extern void cpuidle_unregister_driver(struct cpuidle_driver *drv); extern int cpuidle_register_device(struct cpuidle_device *dev); extern void cpuidle_unregister_device(struct cpuidle_device *dev); +extern void cpuidle_unregister_device_no_lock(struct cpuidle_device *dev); extern int cpuidle_register(struct cpuidle_driver *drv, const struct cpumask *const coupled_cpus); extern void cpuidle_unregister(struct cpuidle_driver *drv); @@ -226,6 +227,7 @@ static inline void cpuidle_unregister_driver(struct cpuidle_driver *drv) { } static inline int cpuidle_register_device(struct cpuidle_device *dev) {return -ENODEV; } static inline void cpuidle_unregister_device(struct cpuidle_device *dev) { } +static inline void cpuidle_unregister_device_no_lock(struct cpuidle_device *dev) {} static inline int cpuidle_register(struct cpuidle_driver *drv, const struct cpumask *const coupled_cpus) {return -ENODEV; } diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index d35726d6a415..c1dee3f971a9 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -34,13 +34,6 @@ static inline void arch_kexec_protect_crashkres(void) { } static inline void arch_kexec_unprotect_crashkres(void) { } #endif -#ifdef CONFIG_CRASH_DM_CRYPT -int crash_load_dm_crypt_keys(struct kimage *image); -ssize_t dm_crypt_keys_read(char *buf, size_t count, u64 *ppos); -#else -static inline int crash_load_dm_crypt_keys(struct kimage *image) {return 0; } -#endif - #ifndef arch_crash_handle_hotplug_event static inline void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) { } #endif @@ -96,4 +89,11 @@ static inline void crash_save_cpu(struct pt_regs *regs, int cpu) {}; static inline int kimage_crash_copy_vmcoreinfo(struct kimage *image) { return 0; }; #endif /* CONFIG_CRASH_DUMP*/ +#ifdef CONFIG_CRASH_DM_CRYPT +int crash_load_dm_crypt_keys(struct kimage *image); +ssize_t dm_crypt_keys_read(char *buf, size_t count, u64 *ppos); +#else +static inline int crash_load_dm_crypt_keys(struct kimage *image) {return 0; } +#endif + #endif /* LINUX_CRASH_CORE_H */ diff --git a/include/linux/cred.h b/include/linux/cred.h index ed1609d78cd7..c6676265a985 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -33,12 +33,14 @@ struct group_info { /** * get_group_info - Get a reference to a group info structure - * @group_info: The group info to reference + * @gi: The group info to reference * * This gets a reference to a set of supplementary groups. * * If the caller is accessing a task's credentials, they must hold the RCU read * lock when reading. + * + * Returns: @gi */ static inline struct group_info *get_group_info(struct group_info *gi) { @@ -209,6 +211,8 @@ DEFINE_CLASS(override_creds, * usage count. The purpose of this is to attempt to catch at compile time the * accidental alteration of a set of credentials that should be considered * immutable. + * + * Returns: @cred when the references are acquired, NULL otherwise. */ static inline const struct cred *get_cred_many(const struct cred *cred, int nr) { @@ -246,8 +250,8 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) } /** - * put_cred - Release a reference to a set of credentials - * @cred: The credentials to release + * put_cred_many - Release a reference to a set of credentials + * @_cred: The credentials to release * @nr: Number of references to release * * Release a reference to a set of credentials, deleting them when the last ref diff --git a/include/linux/damon.h b/include/linux/damon.h index a4fea23da857..d9a3babbafc1 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -55,6 +55,8 @@ struct damon_size_range { * @list: List head for siblings. * @age: Age of this region. * + * For any use case, @ar should be non-zero positive size. + * * @nr_accesses is reset to zero for every &damon_attrs->aggr_interval and be * increased for every &damon_attrs->sample_interval if an access to the region * during the last sampling interval is found. The update of this field should @@ -214,11 +216,22 @@ struct damos_quota_goal { }; /** + * enum damos_quota_goal_tuner - Goal-based quota tuning logic. + * @DAMOS_QUOTA_GOAL_TUNER_CONSIST: Aim long term consistent quota. + * @DAMOS_QUOTA_GOAL_TUNER_TEMPORAL: Aim zero quota asap. + */ +enum damos_quota_goal_tuner { + DAMOS_QUOTA_GOAL_TUNER_CONSIST, + DAMOS_QUOTA_GOAL_TUNER_TEMPORAL, +}; + +/** * struct damos_quota - Controls the aggressiveness of the given scheme. * @reset_interval: Charge reset interval in milliseconds. * @ms: Maximum milliseconds that the scheme can use. * @sz: Maximum bytes of memory that the action can be applied. * @goals: Head of quota tuning goals (&damos_quota_goal) list. + * @goal_tuner: Goal-based @esz tuning algorithm to use. * @esz: Effective size quota in bytes. * * @weight_sz: Weight of the region's size for prioritization. @@ -260,6 +273,7 @@ struct damos_quota { unsigned long ms; unsigned long sz; struct list_head goals; + enum damos_quota_goal_tuner goal_tuner; unsigned long esz; unsigned int weight_sz; @@ -647,8 +661,7 @@ struct damon_operations { void (*prepare_access_checks)(struct damon_ctx *context); unsigned int (*check_accesses)(struct damon_ctx *context); int (*get_scheme_score)(struct damon_ctx *context, - struct damon_target *t, struct damon_region *r, - struct damos *scheme); + struct damon_region *r, struct damos *scheme); unsigned long (*apply_scheme)(struct damon_ctx *context, struct damon_target *t, struct damon_region *r, struct damos *scheme, unsigned long *sz_filter_passed); @@ -810,6 +823,12 @@ struct damon_ctx { struct damos_walk_control *walk_control; struct mutex walk_control_lock; + /* + * indicate if this may be corrupted. Currentonly this is set only for + * damon_commit_ctx() failure. + */ + bool maybe_corrupted; + /* Working thread of the given DAMON context */ struct task_struct *kdamond; /* Protects @kdamond field access */ @@ -975,6 +994,7 @@ int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control); int damon_set_region_biggest_system_ram_default(struct damon_target *t, unsigned long *start, unsigned long *end, + unsigned long addr_unit, unsigned long min_region_sz); #endif /* CONFIG_DAMON */ diff --git a/include/linux/dax.h b/include/linux/dax.h index bf103f317cac..10a7cc79aea5 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -69,7 +69,7 @@ static inline bool daxdev_mapping_supported(const struct vm_area_desc *desc, const struct inode *inode, struct dax_device *dax_dev) { - if (!vma_desc_test_flags(desc, VMA_SYNC_BIT)) + if (!vma_desc_test(desc, VMA_SYNC_BIT)) return true; if (!IS_DAX(inode)) return false; @@ -115,7 +115,7 @@ static inline bool daxdev_mapping_supported(const struct vm_area_desc *desc, const struct inode *inode, struct dax_device *dax_dev) { - return !vma_desc_test_flags(desc, VMA_SYNC_BIT); + return !vma_desc_test(desc, VMA_SYNC_BIT); } static inline size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 898c60d21c92..c5bd5a74baba 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -264,6 +264,7 @@ extern void d_invalidate(struct dentry *); extern struct dentry * d_make_root(struct inode *); extern void d_mark_tmpfile(struct file *, struct inode *); +int d_mark_tmpfile_name(struct file *file, const struct qstr *name); extern void d_tmpfile(struct file *, struct inode *); extern struct dentry *d_find_alias(struct inode *); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 38f625af6ab4..cd4faaf5d427 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -755,4 +755,11 @@ static inline unsigned long to_bytes(sector_t n) return (n << SECTOR_SHIFT); } +static inline void dm_stack_bs_limits(struct queue_limits *limits, unsigned int bs) +{ + limits->logical_block_size = max(limits->logical_block_size, bs); + limits->physical_block_size = max(limits->physical_block_size, bs); + limits->io_min = max(limits->io_min, bs); +} + #endif /* _LINUX_DEVICE_MAPPER_H */ diff --git a/include/linux/device.h b/include/linux/device.h index 0be95294b6e6..67cec9ec0cd0 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -190,6 +190,22 @@ ssize_t device_show_string(struct device *dev, struct device_attribute *attr, struct device_attribute dev_attr_##_name = __ATTR_RW_MODE(_name, 0600) /** + * DEVICE_ATTR_RW_NAMED - Define a read-write device attribute with a sysfs name + * that differs from the function name. + * @_name: Attribute function preface + * @_attrname: Attribute name as it wil be exposed in the sysfs. + * + * Like DEVICE_ATTR_RW(), but allows for reusing names under separate paths in + * the same driver. + */ +#define DEVICE_ATTR_RW_NAMED(_name, _attrname) \ + struct device_attribute dev_attr_##_name = { \ + .attr = { .name = _attrname, .mode = 0644 }, \ + .show = _name##_show, \ + .store = _name##_store, \ + } + +/** * DEVICE_ATTR_RO - Define a readable device attribute. * @_name: Attribute name. * @@ -208,6 +224,21 @@ ssize_t device_show_string(struct device *dev, struct device_attribute *attr, struct device_attribute dev_attr_##_name = __ATTR_RO_MODE(_name, 0400) /** + * DEVICE_ATTR_RO_NAMED - Define a read-only device attribute with a sysfs name + * that differs from the function name. + * @_name: Attribute function preface + * @_attrname: Attribute name as it wil be exposed in the sysfs. + * + * Like DEVICE_ATTR_RO(), but allows for reusing names under separate paths in + * the same driver. + */ +#define DEVICE_ATTR_RO_NAMED(_name, _attrname) \ + struct device_attribute dev_attr_##_name = { \ + .attr = { .name = _attrname, .mode = 0444 }, \ + .show = _name##_show, \ + } + +/** * DEVICE_ATTR_WO - Define an admin-only writable device attribute. * @_name: Attribute name. * @@ -217,6 +248,21 @@ ssize_t device_show_string(struct device *dev, struct device_attribute *attr, struct device_attribute dev_attr_##_name = __ATTR_WO(_name) /** + * DEVICE_ATTR_WO_NAMED - Define a read-only device attribute with a sysfs name + * that differs from the function name. + * @_name: Attribute function preface + * @_attrname: Attribute name as it wil be exposed in the sysfs. + * + * Like DEVICE_ATTR_WO(), but allows for reusing names under separate paths in + * the same driver. + */ +#define DEVICE_ATTR_WO_NAMED(_name, _attrname) \ + struct device_attribute dev_attr_##_name = { \ + .attr = { .name = _attrname, .mode = 0200 }, \ + .store = _name##_store, \ + } + +/** * DEVICE_ULONG_ATTR - Define a device attribute backed by an unsigned long. * @_name: Attribute name. * @_mode: File mode. @@ -483,6 +529,8 @@ struct device_physical_location { * on. This shrinks the "Board Support Packages" (BSPs) and * minimizes board-specific #ifdefs in drivers. * @driver_data: Private pointer for driver specific info. + * @driver_override: Driver name to force a match. Do not touch directly; use + * device_set_driver_override() instead. * @links: Links to suppliers and consumers of this device. * @power: For device power management. * See Documentation/driver-api/pm/devices.rst for details. @@ -576,6 +624,10 @@ struct device { core doesn't touch it */ void *driver_data; /* Driver data, set and get with dev_set_drvdata/dev_get_drvdata */ + struct { + const char *name; + spinlock_t lock; + } driver_override; struct mutex mutex; /* mutex to synchronize calls to * its driver. */ @@ -701,6 +753,54 @@ struct device_link { #define kobj_to_dev(__kobj) container_of_const(__kobj, struct device, kobj) +int __device_set_driver_override(struct device *dev, const char *s, size_t len); + +/** + * device_set_driver_override() - Helper to set or clear driver override. + * @dev: Device to change + * @s: NUL-terminated string, new driver name to force a match, pass empty + * string to clear it ("" or "\n", where the latter is only for sysfs + * interface). + * + * Helper to set or clear driver override of a device. + * + * Returns: 0 on success or a negative error code on failure. + */ +static inline int device_set_driver_override(struct device *dev, const char *s) +{ + return __device_set_driver_override(dev, s, s ? strlen(s) : 0); +} + +/** + * device_has_driver_override() - Check if a driver override has been set. + * @dev: device to check + * + * Returns true if a driver override has been set for this device. + */ +static inline bool device_has_driver_override(struct device *dev) +{ + guard(spinlock)(&dev->driver_override.lock); + return !!dev->driver_override.name; +} + +/** + * device_match_driver_override() - Match a driver against the device's driver_override. + * @dev: device to check + * @drv: driver to match against + * + * Returns > 0 if a driver override is set and matches the given driver, 0 if a + * driver override is set but does not match, or < 0 if a driver override is not + * set at all. + */ +static inline int device_match_driver_override(struct device *dev, + const struct device_driver *drv) +{ + guard(spinlock)(&dev->driver_override.lock); + if (dev->driver_override.name) + return !strcmp(dev->driver_override.name, drv->name); + return -1; +} + /** * device_iommu_mapped - Returns true when the device DMA is translated * by an IOMMU @@ -911,6 +1011,7 @@ static inline void device_unlock(struct device *dev) } DEFINE_GUARD(device, struct device *, device_lock(_T), device_unlock(_T)) +DEFINE_GUARD_COND(device, _intr, device_lock_interruptible(_T), _RET == 0) static inline void device_lock_assert(struct device *dev) { @@ -1131,9 +1232,9 @@ device_create_with_groups(const struct class *cls, struct device *parent, dev_t void device_destroy(const struct class *cls, dev_t devt); int __must_check device_add_groups(struct device *dev, - const struct attribute_group **groups); + const struct attribute_group *const *groups); void device_remove_groups(struct device *dev, - const struct attribute_group **groups); + const struct attribute_group *const *groups); static inline int __must_check device_add_group(struct device *dev, const struct attribute_group *grp) diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 99c3c83ea520..c1b463cd6464 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -35,6 +35,8 @@ struct fwnode_handle; * otherwise. It may also return error code if determining that * the driver supports the device is not possible. In case of * -EPROBE_DEFER it will queue the device for deferred probing. + * Note: This callback may be invoked with or without the device + * lock held. * @uevent: Called when a device is added, removed, or a few other things * that generate uevents to add the environment variables. * @probe: Called when a new device or driver add to this bus, and callback @@ -63,6 +65,9 @@ struct fwnode_handle; * this bus. * @pm: Power management operations of this bus, callback the specific * device driver's pm-ops. + * @driver_override: Set to true if this bus supports the driver_override + * mechanism, which allows userspace to force a specific + * driver to bind to a device via a sysfs attribute. * @need_parent_lock: When probing or removing a device on this bus, the * device core should lock the device's parent. * @@ -104,6 +109,7 @@ struct bus_type { const struct dev_pm_ops *pm; + bool driver_override; bool need_parent_lock; }; diff --git a/include/linux/device/class.h b/include/linux/device/class.h index 65880e60c720..78ab8d2b3e30 100644 --- a/include/linux/device/class.h +++ b/include/linux/device/class.h @@ -50,8 +50,8 @@ struct fwnode_handle; struct class { const char *name; - const struct attribute_group **class_groups; - const struct attribute_group **dev_groups; + const struct attribute_group *const *class_groups; + const struct attribute_group *const *dev_groups; int (*dev_uevent)(const struct device *dev, struct kobj_uevent_env *env); char *(*devnode)(const struct device *dev, umode_t *mode); @@ -62,7 +62,7 @@ struct class { int (*shutdown_pre)(struct device *dev); const struct kobj_ns_type_operations *ns_type; - const void *(*namespace)(const struct device *dev); + const struct ns_common *(*namespace)(const struct device *dev); void (*get_ownership)(const struct device *dev, kuid_t *uid, kgid_t *gid); @@ -180,9 +180,9 @@ struct class_attribute { struct class_attribute class_attr_##_name = __ATTR_WO(_name) int __must_check class_create_file_ns(const struct class *class, const struct class_attribute *attr, - const void *ns); + const struct ns_common *ns); void class_remove_file_ns(const struct class *class, const struct class_attribute *attr, - const void *ns); + const struct ns_common *ns); static inline int __must_check class_create_file(const struct class *class, const struct class_attribute *attr) diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 133b9e637b55..166933b82e27 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -407,7 +407,7 @@ struct dma_buf { * through the device. * * - Dynamic importers should set fences for any access that they can't - * disable immediately from their &dma_buf_attach_ops.move_notify + * disable immediately from their &dma_buf_attach_ops.invalidate_mappings * callback. * * IMPORTANT: @@ -446,7 +446,7 @@ struct dma_buf_attach_ops { bool allow_peer2peer; /** - * @move_notify: [optional] notification that the DMA-buf is moving + * @invalidate_mappings: [optional] notification that the DMA-buf is moving * * If this callback is provided the framework can avoid pinning the * backing store while mappings exists. @@ -456,14 +456,10 @@ struct dma_buf_attach_ops { * called with this lock held as well. This makes sure that no mapping * is created concurrently with an ongoing move operation. * - * Mappings stay valid and are not directly affected by this callback. - * But the DMA-buf can now be in a different physical location, so all - * mappings should be destroyed and re-created as soon as possible. - * - * New mappings can be created after this callback returns, and will - * point to the new location of the DMA-buf. + * See the kdoc for dma_buf_invalidate_mappings() for details on the + * required behavior. */ - void (*move_notify)(struct dma_buf_attachment *attach); + void (*invalidate_mappings)(struct dma_buf_attachment *attach); }; /** @@ -578,7 +574,8 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *, enum dma_data_direction); void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *, enum dma_data_direction); -void dma_buf_move_notify(struct dma_buf *dma_buf); +void dma_buf_invalidate_mappings(struct dma_buf *dma_buf); +bool dma_buf_attach_revocable(struct dma_buf_attachment *attach); int dma_buf_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction dir); int dma_buf_end_cpu_access(struct dma_buf *dma_buf, diff --git a/include/linux/dma-buf/heaps/cma.h b/include/linux/dma-buf/heaps/cma.h deleted file mode 100644 index e751479e21e7..000000000000 --- a/include/linux/dma-buf/heaps/cma.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef DMA_BUF_HEAP_CMA_H_ -#define DMA_BUF_HEAP_CMA_H_ - -struct cma; - -#ifdef CONFIG_DMABUF_HEAPS_CMA -int dma_heap_cma_register_heap(struct cma *cma); -#else -static inline int dma_heap_cma_register_heap(struct cma *cma) -{ - return 0; -} -#endif // CONFIG_DMABUF_HEAPS_CMA - -#endif // DMA_BUF_HEAP_CMA_H_ diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 079b3dec0a16..370b3d2bba37 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -38,7 +38,6 @@ struct dma_fence_array_cb { struct dma_fence_array { struct dma_fence base; - spinlock_t lock; unsigned num_fences; atomic_t num_pending; struct dma_fence **fences; diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h index 5cd3ba53b4a1..df3beadf1515 100644 --- a/include/linux/dma-fence-chain.h +++ b/include/linux/dma-fence-chain.h @@ -46,7 +46,6 @@ struct dma_fence_chain { */ struct irq_work work; }; - spinlock_t lock; }; diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index d4c92fd35092..b52ab692b22e 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -34,7 +34,8 @@ struct seq_file; * @ops: dma_fence_ops associated with this fence * @rcu: used for releasing fence with kfree_rcu * @cb_list: list of all callbacks to call - * @lock: spin_lock_irqsave used for locking + * @extern_lock: external spin_lock_irqsave used for locking (deprecated) + * @inline_lock: alternative internal spin_lock_irqsave used for locking * @context: execution context this fence belongs to, returned by * dma_fence_context_alloc() * @seqno: the sequence number of this fence inside the execution context, @@ -48,6 +49,8 @@ struct seq_file; * atomic ops (bit_*), so taking the spinlock will not be needed most * of the time. * + * DMA_FENCE_FLAG_INITIALIZED_BIT - fence was initialized + * DMA_FENCE_FLAG_INLINE_LOCK_BIT - use inline spinlock instead of external one * DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled * DMA_FENCE_FLAG_TIMESTAMP_BIT - timestamp recorded for fence signaling * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called @@ -65,8 +68,11 @@ struct seq_file; * been completed, or never called at all. */ struct dma_fence { - spinlock_t *lock; - const struct dma_fence_ops *ops; + union { + spinlock_t *extern_lock; + spinlock_t inline_lock; + }; + const struct dma_fence_ops __rcu *ops; /* * We clear the callback list on kref_put so that by the time we * release the fence it is unused. No one should be adding to the @@ -98,6 +104,8 @@ struct dma_fence { }; enum dma_fence_flag_bits { + DMA_FENCE_FLAG_INITIALIZED_BIT, + DMA_FENCE_FLAG_INLINE_LOCK_BIT, DMA_FENCE_FLAG_SEQNO64_BIT, DMA_FENCE_FLAG_SIGNALED_BIT, DMA_FENCE_FLAG_TIMESTAMP_BIT, @@ -218,6 +226,10 @@ struct dma_fence_ops { * timed out. Can also return other error values on custom implementations, * which should be treated as if the fence is signaled. For example a hardware * lockup could be reported like that. + * + * Implementing this callback prevents the fence from detaching after + * signaling and so it is necessary for the module providing the + * dma_fence_ops to stay loaded as long as the dma_fence exists. */ signed long (*wait)(struct dma_fence *fence, bool intr, signed long timeout); @@ -229,6 +241,13 @@ struct dma_fence_ops { * Can be called from irq context. This callback is optional. If it is * NULL, then dma_fence_free() is instead called as the default * implementation. + * + * Implementing this callback prevents the fence from detaching after + * signaling and so it is necessary for the module providing the + * dma_fence_ops to stay loaded as long as the dma_fence exists. + * + * If the callback is implemented the memory backing the dma_fence + * object must be freed RCU safe. */ void (*release)(struct dma_fence *fence); @@ -264,6 +283,19 @@ void dma_fence_free(struct dma_fence *fence); void dma_fence_describe(struct dma_fence *fence, struct seq_file *seq); /** + * dma_fence_was_initialized - test if fence was initialized + * @fence: fence to test + * + * Return: True if fence was ever initialized, false otherwise. Works correctly + * only when memory backing the fence structure is zero initialized on + * allocation. + */ +static inline bool dma_fence_was_initialized(struct dma_fence *fence) +{ + return fence && test_bit(DMA_FENCE_FLAG_INITIALIZED_BIT, &fence->flags); +} + +/** * dma_fence_put - decreases refcount of the fence * @fence: fence to reduce refcount of */ @@ -351,6 +383,45 @@ dma_fence_get_rcu_safe(struct dma_fence __rcu **fencep) } while (1); } +/** + * dma_fence_spinlock - return pointer to the spinlock protecting the fence + * @fence: the fence to get the lock from + * + * Return either the pointer to the embedded or the external spin lock. + */ +static inline spinlock_t *dma_fence_spinlock(struct dma_fence *fence) +{ + return test_bit(DMA_FENCE_FLAG_INLINE_LOCK_BIT, &fence->flags) ? + &fence->inline_lock : fence->extern_lock; +} + +/** + * dma_fence_lock_irqsave - irqsave lock the fence + * @fence: the fence to lock + * @flags: where to store the CPU flags. + * + * Lock the fence, preventing it from changing to the signaled state. + */ +#define dma_fence_lock_irqsave(fence, flags) \ + spin_lock_irqsave(dma_fence_spinlock(fence), flags) + +/** + * dma_fence_unlock_irqrestore - unlock the fence and irqrestore + * @fence: the fence to unlock + * @flags: the CPU flags to restore + * + * Unlock the fence, allowing it to change its state to signaled again. + */ +#define dma_fence_unlock_irqrestore(fence, flags) \ + spin_unlock_irqrestore(dma_fence_spinlock(fence), flags) + +/** + * dma_fence_assert_held - lockdep assertion that fence is locked + * @fence: the fence which should be locked + */ +#define dma_fence_assert_held(fence) \ + lockdep_assert_held(dma_fence_spinlock(fence)); + #ifdef CONFIG_LOCKDEP bool dma_fence_begin_signalling(void); void dma_fence_end_signalling(bool cookie); @@ -439,13 +510,19 @@ dma_fence_test_signaled_flag(struct dma_fence *fence) static inline bool dma_fence_is_signaled_locked(struct dma_fence *fence) { + const struct dma_fence_ops *ops; + if (dma_fence_test_signaled_flag(fence)) return true; - if (fence->ops->signaled && fence->ops->signaled(fence)) { + rcu_read_lock(); + ops = rcu_dereference(fence->ops); + if (ops && ops->signaled && ops->signaled(fence)) { + rcu_read_unlock(); dma_fence_signal_locked(fence); return true; } + rcu_read_unlock(); return false; } @@ -469,13 +546,19 @@ dma_fence_is_signaled_locked(struct dma_fence *fence) static inline bool dma_fence_is_signaled(struct dma_fence *fence) { + const struct dma_fence_ops *ops; + if (dma_fence_test_signaled_flag(fence)) return true; - if (fence->ops->signaled && fence->ops->signaled(fence)) { + rcu_read_lock(); + ops = rcu_dereference(fence->ops); + if (ops && ops->signaled && ops->signaled(fence)) { + rcu_read_unlock(); dma_fence_signal(fence); return true; } + rcu_read_unlock(); return false; } @@ -680,7 +763,7 @@ extern const struct dma_fence_ops dma_fence_chain_ops; */ static inline bool dma_fence_is_array(struct dma_fence *fence) { - return fence->ops == &dma_fence_array_ops; + return rcu_access_pointer(fence->ops) == &dma_fence_array_ops; } /** @@ -691,7 +774,7 @@ static inline bool dma_fence_is_array(struct dma_fence *fence) */ static inline bool dma_fence_is_chain(struct dma_fence *fence) { - return fence->ops == &dma_fence_chain_ops; + return rcu_access_pointer(fence->ops) == &dma_fence_chain_ops; } /** diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 60b63756df82..6a1832a73cad 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -91,14 +91,8 @@ static inline void set_dma_ops(struct device *dev, #endif /* CONFIG_ARCH_HAS_DMA_OPS */ #ifdef CONFIG_DMA_CMA -extern struct cma *dma_contiguous_default_area; - -static inline struct cma *dev_get_cma_area(struct device *dev) -{ - if (dev && dev->cma_area) - return dev->cma_area; - return dma_contiguous_default_area; -} +struct cma *dev_get_cma_area(struct device *dev); +struct cma *dma_contiguous_get_area_by_idx(unsigned int idx); void dma_contiguous_reserve(phys_addr_t addr_limit); int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, @@ -117,6 +111,10 @@ static inline struct cma *dev_get_cma_area(struct device *dev) { return NULL; } +static inline struct cma *dma_contiguous_get_area_by_idx(unsigned int idx) +{ + return NULL; +} static inline void dma_contiguous_reserve(phys_addr_t limit) { } @@ -147,9 +145,6 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page, { __free_pages(page, get_order(size)); } -static inline void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) -{ -} #endif /* CONFIG_DMA_CMA*/ #ifdef CONFIG_DMA_DECLARE_COHERENT @@ -361,6 +356,12 @@ static inline void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, } #endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */ +#ifndef CONFIG_ARCH_HAS_BATCHED_DMA_SYNC +static inline void arch_sync_dma_flush(void) +{ +} +#endif + #ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL void arch_sync_dma_for_cpu_all(void); #else diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 29973baa0581..db8ab24a54f4 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -9,7 +9,7 @@ #include <linux/bug.h> #include <linux/cache.h> -/** +/* * List of possible attributes associated with a DMA mapping. The semantics * of each attribute should be defined in Documentation/core-api/dma-attributes.rst. */ @@ -80,11 +80,28 @@ #define DMA_ATTR_MMIO (1UL << 10) /* - * DMA_ATTR_CPU_CACHE_CLEAN: Indicates the CPU will not dirty any cacheline - * overlapping this buffer while it is mapped for DMA. All mappings sharing - * a cacheline must have this attribute for this to be considered safe. + * DMA_ATTR_DEBUGGING_IGNORE_CACHELINES: Indicates the CPU cache line can be + * overlapped. All mappings sharing a cacheline must have this attribute for + * this to be considered safe. + */ +#define DMA_ATTR_DEBUGGING_IGNORE_CACHELINES (1UL << 11) + +/* + * DMA_ATTR_REQUIRE_COHERENT: Indicates that DMA coherency is required. + * All mappings that carry this attribute can't work with SWIOTLB and cache + * flushing. + */ +#define DMA_ATTR_REQUIRE_COHERENT (1UL << 12) +/* + * DMA_ATTR_CC_SHARED: Indicates the DMA mapping is shared (decrypted) for + * confidential computing guests. For normal system memory the caller must have + * called set_memory_decrypted(), and pgprot_decrypted must be used when + * creating CPU PTEs for the mapping. The same shared semantic may be passed + * to the vIOMMU when it sets up the IOPTE. For MMIO use together with + * DMA_ATTR_MMIO to indicate shared MMIO. Unless DMA_ATTR_MMIO is provided + * a struct page is required. */ -#define DMA_ATTR_CPU_CACHE_CLEAN (1UL << 11) +#define DMA_ATTR_CC_SHARED (1UL << 13) /* * A dma_addr_t can hold any valid DMA or bus address for the platform. It can @@ -248,8 +265,8 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size, { return NULL; } -static void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_handle, unsigned long attrs) +static inline void dma_free_attrs(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) { } static inline void *dmam_alloc_attrs(struct device *dev, size_t size, diff --git a/include/linux/dma/edma.h b/include/linux/dma/edma.h index 270b5458aecf..1fafd5b0e315 100644 --- a/include/linux/dma/edma.h +++ b/include/linux/dma/edma.h @@ -73,6 +73,8 @@ enum dw_edma_chip_flags { * @ll_region_rd: DMA descriptor link list memory for read channel * @dt_region_wr: DMA data memory for write channel * @dt_region_rd: DMA data memory for read channel + * @db_irq: Virtual IRQ dedicated to interrupt emulation + * @db_offset: Offset from DMA register base * @mf: DMA register map format * @dw: struct dw_edma that is filled by dw_edma_probe() */ @@ -94,9 +96,14 @@ struct dw_edma_chip { struct dw_edma_region dt_region_wr[EDMA_MAX_WR_CH]; struct dw_edma_region dt_region_rd[EDMA_MAX_RD_CH]; + /* interrupt emulation */ + int db_irq; + resource_size_t db_offset; + enum dw_edma_map_format mf; struct dw_edma *dw; + bool cfg_non_ll; }; /* Export to the platform drivers */ diff --git a/include/linux/dma/qcom-gpi-dma.h b/include/linux/dma/qcom-gpi-dma.h index 6680dd1a43c6..332be28427e4 100644 --- a/include/linux/dma/qcom-gpi-dma.h +++ b/include/linux/dma/qcom-gpi-dma.h @@ -8,6 +8,9 @@ /** * enum spi_transfer_cmd - spi transfer commands + * @SPI_TX: SPI peripheral TX command + * @SPI_RX: SPI peripheral RX command + * @SPI_DUPLEX: SPI peripheral Duplex command */ enum spi_transfer_cmd { SPI_TX = 1, @@ -64,7 +67,7 @@ enum i2c_op { * @set_config: set peripheral config * @rx_len: receive length for buffer * @op: i2c cmd - * @muli-msg: is part of multi i2c r-w msgs + * @multi_msg: is part of multi i2c r-w msgs */ struct gpi_i2c_config { u8 set_config; diff --git a/include/linux/dma/ti-cppi5.h b/include/linux/dma/ti-cppi5.h index c53c0f6e3b1a..3fe19b75ddf7 100644 --- a/include/linux/dma/ti-cppi5.h +++ b/include/linux/dma/ti-cppi5.h @@ -16,8 +16,8 @@ * struct cppi5_desc_hdr_t - Descriptor header, present in all types of * descriptors * @pkt_info0: Packet info word 0 (n/a in Buffer desc) - * @pkt_info0: Packet info word 1 (n/a in Buffer desc) - * @pkt_info0: Packet info word 2 (n/a in Buffer desc) + * @pkt_info1: Packet info word 1 (n/a in Buffer desc) + * @pkt_info2: Packet info word 2 (n/a in Buffer desc) * @src_dst_tag: Packet info word 3 (n/a in Buffer desc) */ struct cppi5_desc_hdr_t { @@ -35,7 +35,7 @@ struct cppi5_desc_hdr_t { * @buf_info1: word 8: Buffer valid data length * @org_buf_len: word 9: Original buffer length * @org_buf_ptr: word 10/11: Original buffer pointer - * @epib[0]: Extended Packet Info Data (optional, 4 words), and/or + * @epib: Extended Packet Info Data (optional, 4 words), and/or * Protocol Specific Data (optional, 0-128 bytes in * multiples of 4), and/or * Other Software Data (0-N bytes, optional) @@ -132,7 +132,7 @@ struct cppi5_desc_epib_t { /** * struct cppi5_monolithic_desc_t - Monolithic-mode packet descriptor * @hdr: Descriptor header - * @epib[0]: Extended Packet Info Data (optional, 4 words), and/or + * @epib: Extended Packet Info Data (optional, 4 words), and/or * Protocol Specific Data (optional, 0-128 bytes in * multiples of 4), and/or * Other Software Data (0-N bytes, optional) @@ -179,7 +179,7 @@ static inline void cppi5_desc_dump(void *desc, u32 size) * cppi5_desc_is_tdcm - check if the paddr indicates Teardown Complete Message * @paddr: Physical address of the packet popped from the ring * - * Returns true if the address indicates TDCM + * Returns: true if the address indicates TDCM */ static inline bool cppi5_desc_is_tdcm(dma_addr_t paddr) { @@ -190,7 +190,7 @@ static inline bool cppi5_desc_is_tdcm(dma_addr_t paddr) * cppi5_desc_get_type - get descriptor type * @desc_hdr: packet descriptor/TR header * - * Returns descriptor type: + * Returns: descriptor type: * CPPI5_INFO0_DESC_TYPE_VAL_HOST * CPPI5_INFO0_DESC_TYPE_VAL_MONO * CPPI5_INFO0_DESC_TYPE_VAL_TR @@ -205,7 +205,7 @@ static inline u32 cppi5_desc_get_type(struct cppi5_desc_hdr_t *desc_hdr) * cppi5_desc_get_errflags - get Error Flags from Desc * @desc_hdr: packet/TR descriptor header * - * Returns Error Flags from Packet/TR Descriptor + * Returns: Error Flags from Packet/TR Descriptor */ static inline u32 cppi5_desc_get_errflags(struct cppi5_desc_hdr_t *desc_hdr) { @@ -307,7 +307,7 @@ static inline void cppi5_desc_set_tags_ids(struct cppi5_desc_hdr_t *desc_hdr, * @psdata_size: PSDATA size * @sw_data_size: SWDATA size * - * Returns required Host Packet Descriptor size + * Returns: required Host Packet Descriptor size * 0 - if PSDATA > CPPI5_INFO0_HDESC_PSDATA_MAX_SIZE */ static inline u32 cppi5_hdesc_calc_size(bool epib, u32 psdata_size, @@ -381,6 +381,8 @@ cppi5_hdesc_update_psdata_size(struct cppi5_host_desc_t *desc, u32 psdata_size) /** * cppi5_hdesc_get_psdata_size - get PSdata size in bytes * @desc: Host packet descriptor + * + * Returns: PSdata size in bytes */ static inline u32 cppi5_hdesc_get_psdata_size(struct cppi5_host_desc_t *desc) { @@ -398,7 +400,7 @@ static inline u32 cppi5_hdesc_get_psdata_size(struct cppi5_host_desc_t *desc) * cppi5_hdesc_get_pktlen - get Packet Length from HDesc * @desc: Host packet descriptor * - * Returns Packet Length from Host Packet Descriptor + * Returns: Packet Length from Host Packet Descriptor */ static inline u32 cppi5_hdesc_get_pktlen(struct cppi5_host_desc_t *desc) { @@ -408,6 +410,7 @@ static inline u32 cppi5_hdesc_get_pktlen(struct cppi5_host_desc_t *desc) /** * cppi5_hdesc_set_pktlen - set Packet Length in HDesc * @desc: Host packet descriptor + * @pkt_len: Packet length to set */ static inline void cppi5_hdesc_set_pktlen(struct cppi5_host_desc_t *desc, u32 pkt_len) @@ -420,7 +423,7 @@ static inline void cppi5_hdesc_set_pktlen(struct cppi5_host_desc_t *desc, * cppi5_hdesc_get_psflags - get Protocol Specific Flags from HDesc * @desc: Host packet descriptor * - * Returns Protocol Specific Flags from Host Packet Descriptor + * Returns: Protocol Specific Flags from Host Packet Descriptor */ static inline u32 cppi5_hdesc_get_psflags(struct cppi5_host_desc_t *desc) { @@ -431,6 +434,7 @@ static inline u32 cppi5_hdesc_get_psflags(struct cppi5_host_desc_t *desc) /** * cppi5_hdesc_set_psflags - set Protocol Specific Flags in HDesc * @desc: Host packet descriptor + * @ps_flags: Protocol Specific flags to set */ static inline void cppi5_hdesc_set_psflags(struct cppi5_host_desc_t *desc, u32 ps_flags) @@ -442,8 +446,10 @@ static inline void cppi5_hdesc_set_psflags(struct cppi5_host_desc_t *desc, } /** - * cppi5_hdesc_get_errflags - get Packet Type from HDesc + * cppi5_hdesc_get_pkttype - get Packet Type from HDesc * @desc: Host packet descriptor + * + * Returns: Packet type */ static inline u32 cppi5_hdesc_get_pkttype(struct cppi5_host_desc_t *desc) { @@ -452,7 +458,7 @@ static inline u32 cppi5_hdesc_get_pkttype(struct cppi5_host_desc_t *desc) } /** - * cppi5_hdesc_get_errflags - set Packet Type in HDesc + * cppi5_hdesc_set_pkttype - set Packet Type in HDesc * @desc: Host packet descriptor * @pkt_type: Packet Type */ @@ -501,7 +507,7 @@ static inline void cppi5_hdesc_reset_to_original(struct cppi5_host_desc_t *desc) /** * cppi5_hdesc_link_hbdesc - link Host Buffer Descriptor to HDesc * @desc: Host Packet Descriptor - * @buf_desc: Host Buffer Descriptor physical address + * @hbuf_desc: Host Buffer Descriptor physical address * * add and link Host Buffer Descriptor to HDesc */ @@ -527,7 +533,7 @@ static inline void cppi5_hdesc_reset_hbdesc(struct cppi5_host_desc_t *desc) * cppi5_hdesc_epib_present - check if EPIB present * @desc_hdr: packet descriptor/TR header * - * Returns true if EPIB present in the packet + * Returns: true if EPIB present in the packet */ static inline bool cppi5_hdesc_epib_present(struct cppi5_desc_hdr_t *desc_hdr) { @@ -538,7 +544,7 @@ static inline bool cppi5_hdesc_epib_present(struct cppi5_desc_hdr_t *desc_hdr) * cppi5_hdesc_get_psdata - Get pointer on PSDATA * @desc: Host packet descriptor * - * Returns pointer on PSDATA in HDesc. + * Returns: pointer on PSDATA in HDesc. * NULL - if ps_data placed at the start of data buffer. */ static inline void *cppi5_hdesc_get_psdata(struct cppi5_host_desc_t *desc) @@ -568,7 +574,7 @@ static inline void *cppi5_hdesc_get_psdata(struct cppi5_host_desc_t *desc) * cppi5_hdesc_get_swdata - Get pointer on swdata * @desc: Host packet descriptor * - * Returns pointer on SWDATA in HDesc. + * Returns: pointer on SWDATA in HDesc. * NOTE. It's caller responsibility to be sure hdesc actually has swdata. */ static inline void *cppi5_hdesc_get_swdata(struct cppi5_host_desc_t *desc) @@ -648,6 +654,7 @@ enum cppi5_tr_types { CPPI5_TR_TYPE11, /* type12-14: Reserved */ CPPI5_TR_TYPE15 = 15, + /* private: */ CPPI5_TR_TYPE_MAX }; @@ -673,6 +680,7 @@ enum cppi5_tr_event_size { CPPI5_TR_EVENT_SIZE_ICNT1_DEC, CPPI5_TR_EVENT_SIZE_ICNT2_DEC, CPPI5_TR_EVENT_SIZE_ICNT3_DEC, + /* private: */ CPPI5_TR_EVENT_SIZE_MAX }; @@ -690,6 +698,7 @@ enum cppi5_tr_trigger { CPPI5_TR_TRIGGER_GLOBAL0, CPPI5_TR_TRIGGER_GLOBAL1, CPPI5_TR_TRIGGER_LOCAL_EVENT, + /* private: */ CPPI5_TR_TRIGGER_MAX }; @@ -711,6 +720,7 @@ enum cppi5_tr_trigger_type { CPPI5_TR_TRIGGER_TYPE_ICNT2_DEC, CPPI5_TR_TRIGGER_TYPE_ICNT3_DEC, CPPI5_TR_TRIGGER_TYPE_ALL, + /* private: */ CPPI5_TR_TRIGGER_TYPE_MAX }; @@ -815,7 +825,7 @@ struct cppi5_tr_type3_t { * destination * @dicnt1: Total loop iteration count for level 1 for destination * @dicnt2: Total loop iteration count for level 2 for destination - * @sicnt3: Total loop iteration count for level 3 (outermost) for + * @dicnt3: Total loop iteration count for level 3 (outermost) for * destination */ struct cppi5_tr_type15_t { @@ -887,6 +897,7 @@ enum cppi5_tr_resp_status_type { CPPI5_TR_RESPONSE_STATUS_UNSUPPORTED_ERR, CPPI5_TR_RESPONSE_STATUS_TRANSFER_EXCEPTION, CPPI5_TR_RESPONSE_STATUS__TEARDOWN_FLUSH, + /* private: */ CPPI5_TR_RESPONSE_STATUS_MAX }; @@ -903,6 +914,7 @@ enum cppi5_tr_resp_status_submission { CPPI5_TR_RESPONSE_STATUS_SUBMISSION_ICNT0, CPPI5_TR_RESPONSE_STATUS_SUBMISSION_FIFO_FULL, CPPI5_TR_RESPONSE_STATUS_SUBMISSION_OWN, + /* private: */ CPPI5_TR_RESPONSE_STATUS_SUBMISSION_MAX }; @@ -931,6 +943,7 @@ enum cppi5_tr_resp_status_unsupported { CPPI5_TR_RESPONSE_STATUS_UNSUPPORTED_DFMT, CPPI5_TR_RESPONSE_STATUS_UNSUPPORTED_SECTR, CPPI5_TR_RESPONSE_STATUS_UNSUPPORTED_AMODE_SPECIFIC, + /* private: */ CPPI5_TR_RESPONSE_STATUS_UNSUPPORTED_MAX }; @@ -939,7 +952,7 @@ enum cppi5_tr_resp_status_unsupported { * @tr_count: number of TR records * @tr_size: Nominal size of TR record (max) [16, 32, 64, 128] * - * Returns required TR Descriptor size + * Returns: required TR Descriptor size */ static inline size_t cppi5_trdesc_calc_size(u32 tr_count, u32 tr_size) { @@ -955,7 +968,7 @@ static inline size_t cppi5_trdesc_calc_size(u32 tr_count, u32 tr_size) /** * cppi5_trdesc_init - Init TR Descriptor - * @desc: TR Descriptor + * @desc_hdr: TR Descriptor * @tr_count: number of TR records * @tr_size: Nominal size of TR record (max) [16, 32, 64, 128] * @reload_idx: Absolute index to jump to on the 2nd and following passes @@ -1044,7 +1057,7 @@ static inline void cppi5_tr_set_trigger(cppi5_tr_flags_t *flags, } /** - * cppi5_tr_cflag_set - Update the Configuration specific flags + * cppi5_tr_csf_set - Update the Configuration specific flags * @flags: Pointer to the TR's flags * @csf: Configuration specific flags * diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 99efe2b9b4ea..b3d251c9734e 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -996,7 +996,8 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single( * @vecs: The array of DMA vectors that should be transferred * @nents: The number of DMA vectors in the array * @dir: Specifies the direction of the data transfer - * @flags: DMA engine flags + * @flags: DMA engine flags - DMA_PREP_REPEAT can be used to mark a cyclic + * DMA transfer */ static inline struct dma_async_tx_descriptor *dmaengine_prep_peripheral_dma_vec( struct dma_chan *chan, const struct dma_vec *vecs, size_t nents, diff --git a/include/linux/dmi.h b/include/linux/dmi.h index 927f8a8b7a1d..c8700e6a694d 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -60,6 +60,7 @@ enum dmi_entry_type { DMI_ENTRY_OOB_REMOTE_ACCESS, DMI_ENTRY_BIS_ENTRY, DMI_ENTRY_SYSTEM_BOOT, + DMI_ENTRY_64_MEM_ERROR, DMI_ENTRY_MGMT_DEV, DMI_ENTRY_MGMT_DEV_COMPONENT, DMI_ENTRY_MGMT_DEV_THRES, @@ -69,6 +70,10 @@ enum dmi_entry_type { DMI_ENTRY_ADDITIONAL, DMI_ENTRY_ONBOARD_DEV_EXT, DMI_ENTRY_MGMT_CONTROLLER_HOST, + DMI_ENTRY_TPM_DEVICE, + DMI_ENTRY_PROCESSOR_ADDITIONAL, + DMI_ENTRY_FIRMWARE_INVENTORY, + DMI_ENTRY_STRING_PROPERTY, DMI_ENTRY_INACTIVE = 126, DMI_ENTRY_END_OF_TABLE = 127, }; @@ -86,6 +91,21 @@ struct dmi_device { void *device_data; /* Type specific data */ }; +#define DMI_A_INFO_ENT_MIN_SIZE 0x6 +struct dmi_a_info_entry { + u8 length; + u16 handle; + u8 offset; + u8 str_num; + u8 value[]; +} __packed; + +#define DMI_A_INFO_MIN_SIZE 0xB +struct dmi_a_info { + struct dmi_header header; + u8 count; +} __packed; + #ifdef CONFIG_DMI struct dmi_dev_onboard { @@ -115,6 +135,7 @@ extern void dmi_memdev_name(u16 handle, const char **bank, const char **device); extern u64 dmi_memdev_size(u16 handle); extern u8 dmi_memdev_type(u16 handle); extern u16 dmi_memdev_handle(int slot); +const char *dmi_string_nosave(const struct dmi_header *dm, u8 s); #else @@ -148,6 +169,8 @@ static inline u8 dmi_memdev_type(u16 handle) { return 0x0; } static inline u16 dmi_memdev_handle(int slot) { return 0xffff; } static inline const struct dmi_system_id * dmi_first_match(const struct dmi_system_id *list) { return NULL; } +static inline const char * + dmi_string_nosave(const struct dmi_header *dm, u8 s) { return ""; } #endif diff --git a/include/linux/dpll.h b/include/linux/dpll.h index 2ce295b46b8c..b7277a8b484d 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -52,6 +52,12 @@ struct dpll_device_ops { int (*phase_offset_avg_factor_get)(const struct dpll_device *dpll, void *dpll_priv, u32 *factor, struct netlink_ext_ack *extack); + int (*freq_monitor_set)(const struct dpll_device *dpll, void *dpll_priv, + enum dpll_feature_state state, + struct netlink_ext_ack *extack); + int (*freq_monitor_get)(const struct dpll_device *dpll, void *dpll_priv, + enum dpll_feature_state *state, + struct netlink_ext_ack *extack); }; struct dpll_pin_ops { @@ -110,6 +116,10 @@ struct dpll_pin_ops { int (*ffo_get)(const struct dpll_pin *pin, void *pin_priv, const struct dpll_device *dpll, void *dpll_priv, s64 *ffo, struct netlink_ext_ack *extack); + int (*measured_freq_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, u64 *measured_freq, + struct netlink_ext_ack *extack); int (*esync_set)(const struct dpll_pin *pin, void *pin_priv, const struct dpll_device *dpll, void *dpll_priv, u64 freq, struct netlink_ext_ack *extack); diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 53f44b8cd75f..f53c534aba0c 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -87,7 +87,7 @@ */ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, /* "arbitrary" size strings, nla_policy.len = 0 */ - __str_field(1, DRBD_GENLA_F_MANDATORY, info_text, 0) + __str_field(1, 0, info_text, 0) ) /* Configuration requests typically need a context to operate on. @@ -96,10 +96,10 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, * and/or the replication group (aka resource) name, * and the volume id within the resource. */ GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, - __u32_field(1, DRBD_GENLA_F_MANDATORY, ctx_volume) - __str_field(2, DRBD_GENLA_F_MANDATORY, ctx_resource_name, 128) - __bin_field(3, DRBD_GENLA_F_MANDATORY, ctx_my_addr, 128) - __bin_field(4, DRBD_GENLA_F_MANDATORY, ctx_peer_addr, 128) + __u32_field(1, 0, ctx_volume) + __str_field(2, 0, ctx_resource_name, 128) + __bin_field(3, 0, ctx_my_addr, 128) + __bin_field(4, 0, ctx_peer_addr, 128) ) GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, @@ -108,86 +108,86 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __s32_field(3, DRBD_F_REQUIRED | DRBD_F_INVARIANT, meta_dev_idx) /* use the resize command to try and change the disk_size */ - __u64_field(4, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, disk_size) + __u64_field(4, DRBD_F_INVARIANT, disk_size) /* we could change the max_bio_bvecs, * but it won't propagate through the stack */ - __u32_field(5, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, max_bio_bvecs) - - __u32_field_def(6, DRBD_GENLA_F_MANDATORY, on_io_error, DRBD_ON_IO_ERROR_DEF) - __u32_field_def(7, DRBD_GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF) - - __u32_field_def(8, DRBD_GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF) - __s32_field_def(9, DRBD_GENLA_F_MANDATORY, resync_after, DRBD_MINOR_NUMBER_DEF) - __u32_field_def(10, DRBD_GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF) - __u32_field_def(11, DRBD_GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) - __u32_field_def(12, DRBD_GENLA_F_MANDATORY, c_delay_target, DRBD_C_DELAY_TARGET_DEF) - __u32_field_def(13, DRBD_GENLA_F_MANDATORY, c_fill_target, DRBD_C_FILL_TARGET_DEF) - __u32_field_def(14, DRBD_GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF) - __u32_field_def(15, DRBD_GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF) - __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF) + __u32_field(5, DRBD_F_INVARIANT, max_bio_bvecs) + + __u32_field_def(6, 0, on_io_error, DRBD_ON_IO_ERROR_DEF) + __u32_field_def(7, 0, fencing, DRBD_FENCING_DEF) + + __u32_field_def(8, 0, resync_rate, DRBD_RESYNC_RATE_DEF) + __s32_field_def(9, 0, resync_after, DRBD_MINOR_NUMBER_DEF) + __u32_field_def(10, 0, al_extents, DRBD_AL_EXTENTS_DEF) + __u32_field_def(11, 0, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) + __u32_field_def(12, 0, c_delay_target, DRBD_C_DELAY_TARGET_DEF) + __u32_field_def(13, 0, c_fill_target, DRBD_C_FILL_TARGET_DEF) + __u32_field_def(14, 0, c_max_rate, DRBD_C_MAX_RATE_DEF) + __u32_field_def(15, 0, c_min_rate, DRBD_C_MIN_RATE_DEF) + __u32_field_def(20, 0, disk_timeout, DRBD_DISK_TIMEOUT_DEF) __u32_field_def(21, 0 /* OPTIONAL */, read_balancing, DRBD_READ_BALANCING_DEF) __u32_field_def(25, 0 /* OPTIONAL */, rs_discard_granularity, DRBD_RS_DISCARD_GRANULARITY_DEF) - __flg_field_def(16, DRBD_GENLA_F_MANDATORY, disk_barrier, DRBD_DISK_BARRIER_DEF) - __flg_field_def(17, DRBD_GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF) - __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) - __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) + __flg_field_def(16, 0, disk_barrier, DRBD_DISK_BARRIER_DEF) + __flg_field_def(17, 0, disk_flushes, DRBD_DISK_FLUSHES_DEF) + __flg_field_def(18, 0, disk_drain, DRBD_DISK_DRAIN_DEF) + __flg_field_def(19, 0, md_flushes, DRBD_MD_FLUSHES_DEF) __flg_field_def(23, 0 /* OPTIONAL */, al_updates, DRBD_AL_UPDATES_DEF) __flg_field_def(24, 0 /* OPTIONAL */, discard_zeroes_if_aligned, DRBD_DISCARD_ZEROES_IF_ALIGNED_DEF) __flg_field_def(26, 0 /* OPTIONAL */, disable_write_same, DRBD_DISABLE_WRITE_SAME_DEF) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, - __str_field_def(1, DRBD_GENLA_F_MANDATORY, cpu_mask, DRBD_CPU_MASK_SIZE) - __u32_field_def(2, DRBD_GENLA_F_MANDATORY, on_no_data, DRBD_ON_NO_DATA_DEF) + __str_field_def(1, 0, cpu_mask, DRBD_CPU_MASK_SIZE) + __u32_field_def(2, 0, on_no_data, DRBD_ON_NO_DATA_DEF) ) GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, - __str_field_def(1, DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE, + __str_field_def(1, DRBD_F_SENSITIVE, shared_secret, SHARED_SECRET_MAX) - __str_field_def(2, DRBD_GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) - __str_field_def(3, DRBD_GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) - __str_field_def(4, DRBD_GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) - __str_field_def(5, DRBD_GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) - __u32_field_def(6, DRBD_GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF) - __u32_field_def(7, DRBD_GENLA_F_MANDATORY, connect_int, DRBD_CONNECT_INT_DEF) - __u32_field_def(8, DRBD_GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF) - __u32_field_def(9, DRBD_GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF) - __u32_field_def(10, DRBD_GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF) - __u32_field_def(11, DRBD_GENLA_F_MANDATORY, sndbuf_size, DRBD_SNDBUF_SIZE_DEF) - __u32_field_def(12, DRBD_GENLA_F_MANDATORY, rcvbuf_size, DRBD_RCVBUF_SIZE_DEF) - __u32_field_def(13, DRBD_GENLA_F_MANDATORY, ko_count, DRBD_KO_COUNT_DEF) - __u32_field_def(14, DRBD_GENLA_F_MANDATORY, max_buffers, DRBD_MAX_BUFFERS_DEF) - __u32_field_def(15, DRBD_GENLA_F_MANDATORY, max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF) - __u32_field_def(16, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) - __u32_field_def(17, DRBD_GENLA_F_MANDATORY, after_sb_0p, DRBD_AFTER_SB_0P_DEF) - __u32_field_def(18, DRBD_GENLA_F_MANDATORY, after_sb_1p, DRBD_AFTER_SB_1P_DEF) - __u32_field_def(19, DRBD_GENLA_F_MANDATORY, after_sb_2p, DRBD_AFTER_SB_2P_DEF) - __u32_field_def(20, DRBD_GENLA_F_MANDATORY, rr_conflict, DRBD_RR_CONFLICT_DEF) - __u32_field_def(21, DRBD_GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF) - __u32_field_def(22, DRBD_GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) - __u32_field_def(23, DRBD_GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) - __flg_field_def(24, DRBD_GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) - __flg_field(25, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, discard_my_data) - __flg_field_def(26, DRBD_GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) - __flg_field_def(27, DRBD_GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) - __flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, tentative) - __flg_field_def(29, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) - /* 9: __u32_field_def(30, DRBD_GENLA_F_MANDATORY, fencing_policy, DRBD_FENCING_DEF) */ - /* 9: __str_field_def(31, DRBD_GENLA_F_MANDATORY, name, SHARED_SECRET_MAX) */ + __str_field_def(2, 0, cram_hmac_alg, SHARED_SECRET_MAX) + __str_field_def(3, 0, integrity_alg, SHARED_SECRET_MAX) + __str_field_def(4, 0, verify_alg, SHARED_SECRET_MAX) + __str_field_def(5, 0, csums_alg, SHARED_SECRET_MAX) + __u32_field_def(6, 0, wire_protocol, DRBD_PROTOCOL_DEF) + __u32_field_def(7, 0, connect_int, DRBD_CONNECT_INT_DEF) + __u32_field_def(8, 0, timeout, DRBD_TIMEOUT_DEF) + __u32_field_def(9, 0, ping_int, DRBD_PING_INT_DEF) + __u32_field_def(10, 0, ping_timeo, DRBD_PING_TIMEO_DEF) + __u32_field_def(11, 0, sndbuf_size, DRBD_SNDBUF_SIZE_DEF) + __u32_field_def(12, 0, rcvbuf_size, DRBD_RCVBUF_SIZE_DEF) + __u32_field_def(13, 0, ko_count, DRBD_KO_COUNT_DEF) + __u32_field_def(14, 0, max_buffers, DRBD_MAX_BUFFERS_DEF) + __u32_field_def(15, 0, max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF) + __u32_field_def(16, 0, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) + __u32_field_def(17, 0, after_sb_0p, DRBD_AFTER_SB_0P_DEF) + __u32_field_def(18, 0, after_sb_1p, DRBD_AFTER_SB_1P_DEF) + __u32_field_def(19, 0, after_sb_2p, DRBD_AFTER_SB_2P_DEF) + __u32_field_def(20, 0, rr_conflict, DRBD_RR_CONFLICT_DEF) + __u32_field_def(21, 0, on_congestion, DRBD_ON_CONGESTION_DEF) + __u32_field_def(22, 0, cong_fill, DRBD_CONG_FILL_DEF) + __u32_field_def(23, 0, cong_extents, DRBD_CONG_EXTENTS_DEF) + __flg_field_def(24, 0, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) + __flg_field(25, DRBD_F_INVARIANT, discard_my_data) + __flg_field_def(26, 0, tcp_cork, DRBD_TCP_CORK_DEF) + __flg_field_def(27, 0, always_asbp, DRBD_ALWAYS_ASBP_DEF) + __flg_field(28, DRBD_F_INVARIANT, tentative) + __flg_field_def(29, 0, use_rle, DRBD_USE_RLE_DEF) + /* 9: __u32_field_def(30, 0, fencing_policy, DRBD_FENCING_DEF) */ + /* 9: __str_field_def(31, 0, name, SHARED_SECRET_MAX) */ /* 9: __u32_field(32, DRBD_F_REQUIRED | DRBD_F_INVARIANT, peer_node_id) */ __flg_field_def(33, 0 /* OPTIONAL */, csums_after_crash_only, DRBD_CSUMS_AFTER_CRASH_ONLY_DEF) __u32_field_def(34, 0 /* OPTIONAL */, sock_check_timeo, DRBD_SOCKET_CHECK_TIMEO_DEF) ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, - __flg_field(1, DRBD_GENLA_F_MANDATORY, assume_uptodate) + __flg_field(1, 0, assume_uptodate) ) GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms, - __u64_field(1, DRBD_GENLA_F_MANDATORY, resize_size) - __flg_field(2, DRBD_GENLA_F_MANDATORY, resize_force) - __flg_field(3, DRBD_GENLA_F_MANDATORY, no_resync) + __u64_field(1, 0, resize_size) + __flg_field(2, 0, resize_force) + __flg_field(3, 0, no_resync) __u32_field_def(4, 0 /* OPTIONAL */, al_stripes, DRBD_AL_STRIPES_DEF) __u32_field_def(5, 0 /* OPTIONAL */, al_stripe_size, DRBD_AL_STRIPE_SIZE_DEF) ) @@ -195,31 +195,31 @@ GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms, GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info, /* the reason of the broadcast, * if this is an event triggered broadcast. */ - __u32_field(1, DRBD_GENLA_F_MANDATORY, sib_reason) + __u32_field(1, 0, sib_reason) __u32_field(2, DRBD_F_REQUIRED, current_state) - __u64_field(3, DRBD_GENLA_F_MANDATORY, capacity) - __u64_field(4, DRBD_GENLA_F_MANDATORY, ed_uuid) + __u64_field(3, 0, capacity) + __u64_field(4, 0, ed_uuid) /* These are for broadcast from after state change work. * prev_state and new_state are from the moment the state change took * place, new_state is not neccessarily the same as current_state, * there may have been more state changes since. Which will be * broadcasted soon, in their respective after state change work. */ - __u32_field(5, DRBD_GENLA_F_MANDATORY, prev_state) - __u32_field(6, DRBD_GENLA_F_MANDATORY, new_state) + __u32_field(5, 0, prev_state) + __u32_field(6, 0, new_state) /* if we have a local disk: */ - __bin_field(7, DRBD_GENLA_F_MANDATORY, uuids, (UI_SIZE*sizeof(__u64))) - __u32_field(8, DRBD_GENLA_F_MANDATORY, disk_flags) - __u64_field(9, DRBD_GENLA_F_MANDATORY, bits_total) - __u64_field(10, DRBD_GENLA_F_MANDATORY, bits_oos) + __bin_field(7, 0, uuids, (UI_SIZE*sizeof(__u64))) + __u32_field(8, 0, disk_flags) + __u64_field(9, 0, bits_total) + __u64_field(10, 0, bits_oos) /* and in case resync or online verify is active */ - __u64_field(11, DRBD_GENLA_F_MANDATORY, bits_rs_total) - __u64_field(12, DRBD_GENLA_F_MANDATORY, bits_rs_failed) + __u64_field(11, 0, bits_rs_total) + __u64_field(12, 0, bits_rs_failed) /* for pre and post notifications of helper execution */ - __str_field(13, DRBD_GENLA_F_MANDATORY, helper, 32) - __u32_field(14, DRBD_GENLA_F_MANDATORY, helper_exit_code) + __str_field(13, 0, helper, 32) + __u32_field(14, 0, helper_exit_code) __u64_field(15, 0, send_cnt) __u64_field(16, 0, recv_cnt) @@ -233,12 +233,12 @@ GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info, ) GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms, - __u64_field(1, DRBD_GENLA_F_MANDATORY, ov_start_sector) - __u64_field(2, DRBD_GENLA_F_MANDATORY, ov_stop_sector) + __u64_field(1, 0, ov_start_sector) + __u64_field(2, 0, ov_stop_sector) ) GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms, - __flg_field(1, DRBD_GENLA_F_MANDATORY, clear_bm) + __flg_field(1, 0, clear_bm) ) GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms, @@ -246,11 +246,11 @@ GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms, ) GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms, - __flg_field(1, DRBD_GENLA_F_MANDATORY, force_disconnect) + __flg_field(1, 0, force_disconnect) ) GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms, - __flg_field(1, DRBD_GENLA_F_MANDATORY, force_detach) + __flg_field(1, 0, force_detach) ) GENL_struct(DRBD_NLA_RESOURCE_INFO, 15, resource_info, @@ -315,12 +315,12 @@ GENL_struct(DRBD_NLA_PEER_DEVICE_STATISTICS, 22, peer_device_statistics, ) GENL_struct(DRBD_NLA_NOTIFICATION_HEADER, 23, drbd_notification_header, - __u32_field(1, DRBD_GENLA_F_MANDATORY, nh_type) + __u32_field(1, 0, nh_type) ) GENL_struct(DRBD_NLA_HELPER, 24, drbd_helper_info, - __str_field(1, DRBD_GENLA_F_MANDATORY, helper_name, 32) - __u32_field(2, DRBD_GENLA_F_MANDATORY, helper_status) + __str_field(1, 0, helper_name, 32) + __u32_field(2, 0, helper_status) ) /* @@ -333,9 +333,9 @@ GENL_notification( DRBD_EVENT, 1, events, GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) GENL_tla_expected(DRBD_NLA_STATE_INFO, DRBD_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_GENLA_F_MANDATORY) - GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_GENLA_F_MANDATORY) - GENL_tla_expected(DRBD_NLA_SYNCER_CONF, DRBD_GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_NET_CONF, 0) + GENL_tla_expected(DRBD_NLA_DISK_CONF, 0) + GENL_tla_expected(DRBD_NLA_SYNCER_CONF, 0) ) /* query kernel for specific or all info */ @@ -349,7 +349,7 @@ GENL_op( ), /* To select the object .doit. * Or a subset of objects in .dumpit. */ - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, 0) ) /* add DRBD minor devices as volumes to resources */ @@ -367,7 +367,7 @@ GENL_op(DRBD_ADM_DEL_RESOURCE, 8, GENL_doit(drbd_adm_del_resource), GENL_op(DRBD_ADM_RESOURCE_OPTS, 9, GENL_doit(drbd_adm_resource_opts), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, DRBD_GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, 0) ) GENL_op( @@ -403,7 +403,7 @@ GENL_op( DRBD_ADM_RESIZE, 13, GENL_doit(drbd_adm_resize), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, DRBD_GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, 0) ) GENL_op( @@ -424,18 +424,18 @@ GENL_op( DRBD_ADM_NEW_C_UUID, 16, GENL_doit(drbd_adm_new_c_uuid), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, DRBD_GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, 0) ) GENL_op( DRBD_ADM_START_OV, 17, GENL_doit(drbd_adm_start_ov), - GENL_tla_expected(DRBD_NLA_START_OV_PARMS, DRBD_GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_START_OV_PARMS, 0) ) GENL_op(DRBD_ADM_DETACH, 18, GENL_doit(drbd_adm_detach), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_DETACH_PARMS, DRBD_GENLA_F_MANDATORY)) + GENL_tla_expected(DRBD_NLA_DETACH_PARMS, 0)) GENL_op(DRBD_ADM_INVALIDATE, 19, GENL_doit(drbd_adm_invalidate), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) @@ -460,36 +460,36 @@ GENL_op(DRBD_ADM_GET_RESOURCES, 30, GENL_op_init( .dumpit = drbd_adm_dump_resources, ), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY) - GENL_tla_expected(DRBD_NLA_RESOURCE_INFO, DRBD_GENLA_F_MANDATORY) - GENL_tla_expected(DRBD_NLA_RESOURCE_STATISTICS, DRBD_GENLA_F_MANDATORY)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, 0) + GENL_tla_expected(DRBD_NLA_RESOURCE_INFO, 0) + GENL_tla_expected(DRBD_NLA_RESOURCE_STATISTICS, 0)) GENL_op(DRBD_ADM_GET_DEVICES, 31, GENL_op_init( .dumpit = drbd_adm_dump_devices, .done = drbd_adm_dump_devices_done, ), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY) - GENL_tla_expected(DRBD_NLA_DEVICE_INFO, DRBD_GENLA_F_MANDATORY) - GENL_tla_expected(DRBD_NLA_DEVICE_STATISTICS, DRBD_GENLA_F_MANDATORY)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, 0) + GENL_tla_expected(DRBD_NLA_DEVICE_INFO, 0) + GENL_tla_expected(DRBD_NLA_DEVICE_STATISTICS, 0)) GENL_op(DRBD_ADM_GET_CONNECTIONS, 32, GENL_op_init( .dumpit = drbd_adm_dump_connections, .done = drbd_adm_dump_connections_done, ), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY) - GENL_tla_expected(DRBD_NLA_CONNECTION_INFO, DRBD_GENLA_F_MANDATORY) - GENL_tla_expected(DRBD_NLA_CONNECTION_STATISTICS, DRBD_GENLA_F_MANDATORY)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, 0) + GENL_tla_expected(DRBD_NLA_CONNECTION_INFO, 0) + GENL_tla_expected(DRBD_NLA_CONNECTION_STATISTICS, 0)) GENL_op(DRBD_ADM_GET_PEER_DEVICES, 33, GENL_op_init( .dumpit = drbd_adm_dump_peer_devices, .done = drbd_adm_dump_peer_devices_done, ), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY) - GENL_tla_expected(DRBD_NLA_PEER_DEVICE_INFO, DRBD_GENLA_F_MANDATORY) - GENL_tla_expected(DRBD_NLA_PEER_DEVICE_STATISTICS, DRBD_GENLA_F_MANDATORY)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, 0) + GENL_tla_expected(DRBD_NLA_PEER_DEVICE_INFO, 0) + GENL_tla_expected(DRBD_NLA_PEER_DEVICE_STATISTICS, 0)) GENL_notification( DRBD_RESOURCE_STATE, 34, events, @@ -524,7 +524,7 @@ GENL_op( GENL_op_init( .dumpit = drbd_adm_get_initial_state, ), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, 0)) GENL_notification( DRBD_HELPER, 40, events, diff --git a/include/linux/dsa/loop.h b/include/linux/dsa/loop.h deleted file mode 100644 index b8fef35591aa..000000000000 --- a/include/linux/dsa/loop.h +++ /dev/null @@ -1,42 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef DSA_LOOP_H -#define DSA_LOOP_H - -#include <linux/if_vlan.h> -#include <linux/types.h> -#include <linux/ethtool.h> -#include <net/dsa.h> - -struct dsa_loop_vlan { - u16 members; - u16 untagged; -}; - -struct dsa_loop_mib_entry { - char name[ETH_GSTRING_LEN]; - unsigned long val; -}; - -enum dsa_loop_mib_counters { - DSA_LOOP_PHY_READ_OK, - DSA_LOOP_PHY_READ_ERR, - DSA_LOOP_PHY_WRITE_OK, - DSA_LOOP_PHY_WRITE_ERR, - __DSA_LOOP_CNT_MAX, -}; - -struct dsa_loop_port { - struct dsa_loop_mib_entry mib[__DSA_LOOP_CNT_MAX]; - u16 pvid; - int mtu; -}; - -struct dsa_loop_priv { - struct mii_bus *bus; - unsigned int port_base; - struct dsa_loop_vlan vlans[VLAN_N_VID]; - struct net_device *netdev; - struct dsa_loop_port ports[DSA_MAX_PORTS]; -}; - -#endif /* DSA_LOOP_H */ diff --git a/include/linux/edac.h b/include/linux/edac.h index fa32f2aca22f..deba46b3ee25 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -541,17 +541,6 @@ struct mem_ctl_info { struct csrow_info **csrows; unsigned int nr_csrows, num_cschannel; - /* - * Memory Controller hierarchy - * - * There are basically two types of memory controller: the ones that - * sees memory sticks ("dimms"), and the ones that sees memory ranks. - * All old memory controllers enumerate memories per rank, but most - * of the recent drivers enumerate memories per DIMM, instead. - * When the memory controller is per rank, csbased is true. - */ - unsigned int n_layers; - struct edac_mc_layer *layers; bool csbased; /* @@ -609,6 +598,18 @@ struct mem_ctl_info { u8 fake_inject_layer[EDAC_MAX_LAYERS]; bool fake_inject_ue; u16 fake_inject_count; + + /* + * Memory Controller hierarchy + * + * There are basically two types of memory controller: the ones that + * sees memory sticks ("dimms"), and the ones that sees memory ranks. + * All old memory controllers enumerate memories per rank, but most + * of the recent drivers enumerate memories per DIMM, instead. + * When the memory controller is per rank, csbased is true. + */ + unsigned int n_layers; + struct edac_mc_layer layers[] __counted_by(n_layers); }; #define mci_for_each_dimm(mci, dimm) \ diff --git a/include/linux/efi.h b/include/linux/efi.h index 664898d09ff5..72e76ec54641 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -832,27 +832,6 @@ extern int __init parse_efi_signature_list( const void *data, size_t size, efi_element_handler_t (*get_handler_for_guid)(const efi_guid_t *)); -/** - * efi_range_is_wc - check the WC bit on an address range - * @start: starting kvirt address - * @len: length of range - * - * Consult the EFI memory map and make sure it's ok to set this range WC. - * Returns true or false. - */ -static inline int efi_range_is_wc(unsigned long start, unsigned long len) -{ - unsigned long i; - - for (i = 0; i < len; i += (1UL << EFI_PAGE_SHIFT)) { - unsigned long paddr = __pa(start + i); - if (!(efi_mem_attributes(paddr) & EFI_MEMORY_WC)) - return 0; - } - /* The range checked out */ - return 1; -} - /* * We play games with efi_enabled so that the compiler will, if * possible, remove EFI-related code altogether. diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index e7497f804644..c909a8ba22e8 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -248,7 +248,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, struct em_perf_state *ps; int i; - WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n"); + lockdep_assert(rcu_read_lock_any_held()); if (!sum_util) return 0; @@ -267,7 +267,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * Find the lowest performance state of the Energy Model above the * requested performance. */ - em_table = rcu_dereference(pd->em_table); + em_table = rcu_dereference_all(pd->em_table); i = em_pd_get_efficient_state(em_table->state, pd, max_util); ps = &em_table->state[i]; diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index f83ca0abf2cd..e04d67e999a1 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -48,6 +48,7 @@ /** * arch_ptrace_report_syscall_entry - Architecture specific ptrace_report_syscall_entry() wrapper + * @regs: Pointer to the register state at syscall entry * * Invoked from syscall_trace_enter() to wrap ptrace_report_syscall_entry(). * @@ -205,6 +206,8 @@ static __always_inline bool report_single_step(unsigned long work) /** * arch_ptrace_report_syscall_exit - Architecture specific ptrace_report_syscall_exit() + * @regs: Pointer to the register state at syscall exit + * @step: Indicates a single-step exit rather than a normal syscall exit * * This allows architecture specific ptrace_report_syscall_exit() * implementations. If not defined by the architecture this falls back to @@ -321,7 +324,7 @@ static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs) { instrumentation_begin(); syscall_exit_to_user_mode_work(regs); - local_irq_disable_exit_to_user(); + local_irq_disable(); syscall_exit_to_user_mode_prepare(regs); instrumentation_end(); exit_to_user_mode(); diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 9a1eacf35d37..df8f88f63a70 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -42,7 +42,8 @@ extern const struct header_ops eth_header_ops; int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned len); -int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr); +int eth_header_parse(const struct sk_buff *skb, const struct net_device *dev, + unsigned char *haddr); int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type); void eth_header_cache_update(struct hh_cache *hh, const struct net_device *dev, diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 798abec67a1b..1cb0740ba331 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -176,6 +176,8 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) * struct ethtool_rxfh_context - a custom RSS context configuration * @indir_size: Number of u32 entries in indirection table * @key_size: Size of hash key, in bytes + * @indir_user_size: number of user provided entries for the + * indirection table * @priv_size: Size of driver private data, in bytes * @hfunc: RSS hash function identifier. One of the %ETH_RSS_HASH_* * @input_xfrm: Defines how the input data is transformed. Valid values are one @@ -186,6 +188,7 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) struct ethtool_rxfh_context { u32 indir_size; u32 key_size; + u32 indir_user_size; u16 priv_size; u8 hfunc; u8 input_xfrm; @@ -214,6 +217,13 @@ static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx) } void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id); +void ethtool_rxfh_indir_lost(struct net_device *dev); +bool ethtool_rxfh_indir_can_resize(struct net_device *dev, const u32 *tbl, + u32 old_size, u32 new_size); +void ethtool_rxfh_indir_resize(struct net_device *dev, u32 *tbl, + u32 old_size, u32 new_size); +int ethtool_rxfh_ctxs_can_resize(struct net_device *dev, u32 new_indir_size); +void ethtool_rxfh_ctxs_resize(struct net_device *dev, u32 new_indir_size); struct link_mode_info { int speed; @@ -332,6 +342,8 @@ struct kernel_ethtool_coalesce { u32 tx_aggr_max_bytes; u32 tx_aggr_max_frames; u32 tx_aggr_time_usecs; + u32 rx_cqe_frames; + u32 rx_cqe_nsecs; }; /** @@ -380,7 +392,9 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, #define ETHTOOL_COALESCE_TX_AGGR_TIME_USECS BIT(26) #define ETHTOOL_COALESCE_RX_PROFILE BIT(27) #define ETHTOOL_COALESCE_TX_PROFILE BIT(28) -#define ETHTOOL_COALESCE_ALL_PARAMS GENMASK(28, 0) +#define ETHTOOL_COALESCE_RX_CQE_FRAMES BIT(29) +#define ETHTOOL_COALESCE_RX_CQE_NSECS BIT(30) +#define ETHTOOL_COALESCE_ALL_PARAMS GENMASK(30, 0) #define ETHTOOL_COALESCE_USECS \ (ETHTOOL_COALESCE_RX_USECS | ETHTOOL_COALESCE_TX_USECS) @@ -512,12 +526,14 @@ struct ethtool_eth_ctrl_stats { * * Equivalent to `30.3.4.3 aPAUSEMACCtrlFramesReceived` * from the standard. + * @tx_pause_storm_events: TX pause storm event count (see ethtool.yaml). */ struct ethtool_pause_stats { enum ethtool_mac_stats_src src; struct_group(stats, u64 tx_pause_frames; u64 rx_pause_frames; + u64 tx_pause_storm_events; ); }; @@ -1331,12 +1347,15 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, * @rss_ctx: XArray of custom RSS contexts * @rss_lock: Protects entries in @rss_ctx. May be taken from * within RTNL. + * @rss_indir_user_size: Number of user provided entries for the default + * (context 0) indirection table. * @wol_enabled: Wake-on-LAN is enabled * @module_fw_flash_in_progress: Module firmware flashing is in progress. */ struct ethtool_netdev_state { struct xarray rss_ctx; struct mutex rss_lock; + u32 rss_indir_user_size; unsigned wol_enabled:1; unsigned module_fw_flash_in_progress:1; }; diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index ccb478eb174b..ea9ca0e4172a 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -82,11 +82,14 @@ static inline struct epoll_event __user * epoll_put_uevent(__poll_t revents, __u64 data, struct epoll_event __user *uevent) { - if (__put_user(revents, &uevent->events) || - __put_user(data, &uevent->data)) - return NULL; - + scoped_user_write_access_size(uevent, sizeof(*uevent), efault) { + unsafe_put_user(revents, &uevent->events, efault); + unsafe_put_user(data, &uevent->data, efault); + } return uevent+1; + +efault: + return NULL; } #endif diff --git a/include/linux/evm.h b/include/linux/evm.h index ddece4a6b25d..913f4573b203 100644 --- a/include/linux/evm.h +++ b/include/linux/evm.h @@ -18,6 +18,8 @@ extern enum integrity_status evm_verifyxattr(struct dentry *dentry, const char *xattr_name, void *xattr_value, size_t xattr_value_len); +int evm_fix_hmac(struct dentry *dentry, const char *xattr_name, + const char *xattr_value, size_t xattr_value_len); int evm_inode_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr, struct xattr *xattrs, int *xattr_count); @@ -51,6 +53,12 @@ static inline enum integrity_status evm_verifyxattr(struct dentry *dentry, { return INTEGRITY_UNKNOWN; } + +static inline int evm_fix_hmac(struct dentry *dentry, const char *xattr_name, + const char *xattr_value, size_t xattr_value_len) +{ + return -EOPNOTSUPP; +} #endif static inline int evm_inode_init_security(struct inode *inode, struct inode *dir, diff --git a/include/linux/export-internal.h b/include/linux/export-internal.h index d445705ac13c..726054614752 100644 --- a/include/linux/export-internal.h +++ b/include/linux/export-internal.h @@ -37,14 +37,14 @@ * section flag requires it. Use '%progbits' instead of '@progbits' since the * former apparently works on all arches according to the binutils source. */ -#define __KSYMTAB(name, sym, sec, ns) \ +#define __KSYMTAB(name, sym, ns) \ asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1" "\n" \ "__kstrtab_" #name ":" "\n" \ " .asciz \"" #name "\"" "\n" \ "__kstrtabns_" #name ":" "\n" \ " .asciz \"" ns "\"" "\n" \ " .previous" "\n" \ - " .section \"___ksymtab" sec "+" #name "\", \"a\"" "\n" \ + " .section \"___ksymtab+" #name "\", \"a\"" "\n" \ __KSYM_ALIGN "\n" \ "__ksymtab_" #name ":" "\n" \ __KSYM_REF(sym) "\n" \ @@ -59,14 +59,22 @@ #define KSYM_FUNC(name) name #endif -#define KSYMTAB_FUNC(name, sec, ns) __KSYMTAB(name, KSYM_FUNC(name), sec, ns) -#define KSYMTAB_DATA(name, sec, ns) __KSYMTAB(name, name, sec, ns) +#define KSYMTAB_FUNC(name, ns) __KSYMTAB(name, KSYM_FUNC(name), ns) +#define KSYMTAB_DATA(name, ns) __KSYMTAB(name, name, ns) -#define SYMBOL_CRC(sym, crc, sec) \ - asm(".section \"___kcrctab" sec "+" #sym "\",\"a\"" "\n" \ - ".balign 4" "\n" \ - "__crc_" #sym ":" "\n" \ - ".long " #crc "\n" \ - ".previous" "\n") +#define SYMBOL_CRC(sym, crc) \ + asm(" .section \"___kcrctab+" #sym "\",\"a\"" "\n" \ + " .balign 4" "\n" \ + "__crc_" #sym ":" "\n" \ + " .long " #crc "\n" \ + " .previous" "\n" \ + ) + +#define SYMBOL_FLAGS(sym, flags) \ + asm(" .section \"___kflagstab+" #sym "\",\"a\"" "\n" \ + "__flags_" #sym ":" "\n" \ + " .byte " #flags "\n" \ + " .previous" "\n" \ + ) #endif /* __LINUX_EXPORT_INTERNAL_H__ */ diff --git a/include/linux/fb.h b/include/linux/fb.h index 6d4a58084fd5..5178a33c752c 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -217,14 +217,12 @@ struct fb_deferred_io { /* delay between mkwrite and deferred handler */ unsigned long delay; bool sort_pagereflist; /* sort pagelist by offset */ - int open_count; /* number of opened files; protected by fb_info lock */ - struct mutex lock; /* mutex that protects the pageref list */ - struct list_head pagereflist; /* list of pagerefs for touched pages */ - struct address_space *mapping; /* page cache object for fb device */ /* callback */ struct page *(*get_page)(struct fb_info *info, unsigned long offset); void (*deferred_io)(struct fb_info *info, struct list_head *pagelist); }; + +struct fb_deferred_io_state; #endif /* @@ -484,9 +482,8 @@ struct fb_info { #ifdef CONFIG_FB_DEFERRED_IO struct delayed_work deferred_work; - unsigned long npagerefs; - struct fb_deferred_io_pageref *pagerefs; struct fb_deferred_io *fbdefio; + struct fb_deferred_io_state *fbdefio_state; #endif const struct fb_ops *fbops; @@ -605,9 +602,9 @@ extern int register_framebuffer(struct fb_info *fb_info); extern void unregister_framebuffer(struct fb_info *fb_info); extern int devm_register_framebuffer(struct device *dev, struct fb_info *fb_info); extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size); -extern void fb_pad_unaligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 idx, - u32 height, u32 shift_high, u32 shift_low, u32 mod); -extern void fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 s_pitch, u32 height); +void fb_pad_unaligned_buffer(u8 *dst, u32 d_pitch, const u8 *src, u32 idx, u32 height, + u32 shift_high, u32 shift_low, u32 mod); +void fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, const u8 *src, u32 s_pitch, u32 height); extern void fb_set_suspend(struct fb_info *info, int state); extern int fb_get_color_depth(struct fb_var_screeninfo *var, struct fb_fix_screeninfo *fix); @@ -633,8 +630,8 @@ static inline struct device *dev_of_fbinfo(const struct fb_info *info) #endif } -static inline void __fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, - u8 *src, u32 s_pitch, u32 height) +static inline void __fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, const u8 *src, u32 s_pitch, + u32 height) { u32 i, j; diff --git a/include/linux/filter.h b/include/linux/filter.h index 44d7ae95ddbc..1ec6d5ba64cc 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -21,7 +21,6 @@ #include <linux/if_vlan.h> #include <linux/vmalloc.h> #include <linux/sockptr.h> -#include <crypto/sha1.h> #include <linux/u64_stats_sync.h> #include <net/sch_generic.h> @@ -1092,22 +1091,25 @@ bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) return set_memory_rox((unsigned long)hdr, hdr->size >> PAGE_SHIFT); } -int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap, - enum skb_drop_reason *reason); +enum skb_drop_reason +sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); static inline int sk_filter(struct sock *sk, struct sk_buff *skb) { - enum skb_drop_reason ignore_reason; + enum skb_drop_reason drop_reason; - return sk_filter_trim_cap(sk, skb, 1, &ignore_reason); + drop_reason = sk_filter_trim_cap(sk, skb, 1); + return drop_reason ? -EPERM : 0; } -static inline int sk_filter_reason(struct sock *sk, struct sk_buff *skb, - enum skb_drop_reason *reason) +static inline enum skb_drop_reason +sk_filter_reason(struct sock *sk, struct sk_buff *skb) { - return sk_filter_trim_cap(sk, skb, 1, reason); + return sk_filter_trim_cap(sk, skb, 1); } +struct bpf_prog *__bpf_prog_select_runtime(struct bpf_verifier_env *env, struct bpf_prog *fp, + int *err); struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err); void bpf_prog_free(struct bpf_prog *fp); @@ -1153,7 +1155,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); ((u64 (*)(u64, u64, u64, u64, u64, const struct bpf_insn *)) \ (void *)__bpf_call_base) -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); +struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); bool bpf_jit_needs_zext(void); bool bpf_jit_inlines_helper_call(s32 imm); @@ -1184,6 +1186,31 @@ static inline bool bpf_dump_raw_ok(const struct cred *cred) struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); + +#ifdef CONFIG_BPF_SYSCALL +struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, + const struct bpf_insn *patch, u32 len); +struct bpf_insn_aux_data *bpf_dup_insn_aux_data(struct bpf_verifier_env *env); +void bpf_restore_insn_aux_data(struct bpf_verifier_env *env, + struct bpf_insn_aux_data *orig_insn_aux); +#else +static inline struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, + const struct bpf_insn *patch, u32 len) +{ + return ERR_PTR(-ENOTSUPP); +} + +static inline struct bpf_insn_aux_data *bpf_dup_insn_aux_data(struct bpf_verifier_env *env) +{ + return NULL; +} + +static inline void bpf_restore_insn_aux_data(struct bpf_verifier_env *env, + struct bpf_insn_aux_data *orig_insn_aux) +{ +} +#endif /* CONFIG_BPF_SYSCALL */ + int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt); static inline bool xdp_return_frame_no_direct(void) @@ -1310,9 +1337,14 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog, const char *bpf_jit_get_prog_name(struct bpf_prog *prog); -struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp); +struct bpf_prog *bpf_jit_blind_constants(struct bpf_verifier_env *env, struct bpf_prog *prog); void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other); +static inline bool bpf_prog_need_blind(const struct bpf_prog *prog) +{ + return prog->blinding_requested && !prog->blinded; +} + static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, u32 pass, void *image) { @@ -1451,6 +1483,20 @@ static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp) { } +static inline bool bpf_prog_need_blind(const struct bpf_prog *prog) +{ + return false; +} + +static inline +struct bpf_prog *bpf_jit_blind_constants(struct bpf_verifier_env *env, struct bpf_prog *prog) +{ + return prog; +} + +static inline void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other) +{ +} #endif /* CONFIG_BPF_JIT */ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp); diff --git a/include/linux/find.h b/include/linux/find.h index 9d720ad92bc1..6c2be8ca615d 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -22,8 +22,6 @@ extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long si unsigned long __find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n); unsigned long __find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long n); -unsigned long __find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, - unsigned long size, unsigned long n); unsigned long __find_nth_and_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, const unsigned long *addr3, unsigned long size, unsigned long n); diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h index 0ec1cdc5585d..4e3baa557068 100644 --- a/include/linux/firmware/cirrus/cs_dsp.h +++ b/include/linux/firmware/cirrus/cs_dsp.h @@ -179,6 +179,7 @@ struct cs_dsp { bool booted; bool running; + bool hibernating; struct list_head ctl_list; @@ -354,4 +355,6 @@ int cs_dsp_chunk_write(struct cs_dsp_chunk *ch, int nbits, u32 val); int cs_dsp_chunk_flush(struct cs_dsp_chunk *ch); int cs_dsp_chunk_read(struct cs_dsp_chunk *ch, int nbits); +void cs_dsp_hibernate(struct cs_dsp *dsp, bool hibernating); + #endif diff --git a/include/linux/firmware/intel/stratix10-svc-client.h b/include/linux/firmware/intel/stratix10-svc-client.h index d290060f4c73..91013161e9db 100644 --- a/include/linux/firmware/intel/stratix10-svc-client.h +++ b/include/linux/firmware/intel/stratix10-svc-client.h @@ -68,12 +68,12 @@ * timeout value used in Stratix10 FPGA manager driver. * timeout value used in RSU driver */ -#define SVC_RECONFIG_REQUEST_TIMEOUT_MS 300 -#define SVC_RECONFIG_BUFFER_TIMEOUT_MS 720 -#define SVC_RSU_REQUEST_TIMEOUT_MS 300 +#define SVC_RECONFIG_REQUEST_TIMEOUT_MS 5000 +#define SVC_RECONFIG_BUFFER_TIMEOUT_MS 5000 +#define SVC_RSU_REQUEST_TIMEOUT_MS 2000 #define SVC_FCS_REQUEST_TIMEOUT_MS 2000 #define SVC_COMPLETED_TIMEOUT_MS 30000 -#define SVC_HWMON_REQUEST_TIMEOUT_MS 300 +#define SVC_HWMON_REQUEST_TIMEOUT_MS 2000 struct stratix10_svc_chan; diff --git a/include/linux/firmware/samsung/exynos-acpm-protocol.h b/include/linux/firmware/samsung/exynos-acpm-protocol.h index 2091da965a5a..13f17dc4443b 100644 --- a/include/linux/firmware/samsung/exynos-acpm-protocol.h +++ b/include/linux/firmware/samsung/exynos-acpm-protocol.h @@ -14,30 +14,24 @@ struct acpm_handle; struct device_node; struct acpm_dvfs_ops { - int (*set_rate)(const struct acpm_handle *handle, - unsigned int acpm_chan_id, unsigned int clk_id, - unsigned long rate); - unsigned long (*get_rate)(const struct acpm_handle *handle, + int (*set_rate)(struct acpm_handle *handle, unsigned int acpm_chan_id, + unsigned int clk_id, unsigned long rate); + unsigned long (*get_rate)(struct acpm_handle *handle, unsigned int acpm_chan_id, unsigned int clk_id); }; struct acpm_pmic_ops { - int (*read_reg)(const struct acpm_handle *handle, - unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, - u8 *buf); - int (*bulk_read)(const struct acpm_handle *handle, - unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, - u8 count, u8 *buf); - int (*write_reg)(const struct acpm_handle *handle, - unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, - u8 value); - int (*bulk_write)(const struct acpm_handle *handle, - unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, - u8 count, const u8 *buf); - int (*update_reg)(const struct acpm_handle *handle, - unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, - u8 value, u8 mask); + int (*read_reg)(struct acpm_handle *handle, unsigned int acpm_chan_id, + u8 type, u8 reg, u8 chan, u8 *buf); + int (*bulk_read)(struct acpm_handle *handle, unsigned int acpm_chan_id, + u8 type, u8 reg, u8 chan, u8 count, u8 *buf); + int (*write_reg)(struct acpm_handle *handle, unsigned int acpm_chan_id, + u8 type, u8 reg, u8 chan, u8 value); + int (*bulk_write)(struct acpm_handle *handle, unsigned int acpm_chan_id, + u8 type, u8 reg, u8 chan, u8 count, const u8 *buf); + int (*update_reg)(struct acpm_handle *handle, unsigned int acpm_chan_id, + u8 type, u8 reg, u8 chan, u8 value, u8 mask); }; struct acpm_ops { @@ -56,12 +50,12 @@ struct acpm_handle { struct device; #if IS_ENABLED(CONFIG_EXYNOS_ACPM_PROTOCOL) -const struct acpm_handle *devm_acpm_get_by_node(struct device *dev, - struct device_node *np); +struct acpm_handle *devm_acpm_get_by_node(struct device *dev, + struct device_node *np); #else -static inline const struct acpm_handle *devm_acpm_get_by_node(struct device *dev, - struct device_node *np) +static inline struct acpm_handle *devm_acpm_get_by_node(struct device *dev, + struct device_node *np) { return NULL; } diff --git a/include/linux/firmware/thead/thead,th1520-aon.h b/include/linux/firmware/thead/thead,th1520-aon.h index dae132b66873..d81f5f6f5b90 100644 --- a/include/linux/firmware/thead/thead,th1520-aon.h +++ b/include/linux/firmware/thead/thead,th1520-aon.h @@ -97,80 +97,6 @@ struct th1520_aon_rpc_ack_common { #define RPC_GET_SVC_FLAG_ACK_TYPE(MESG) (((MESG)->svc & 0x40) >> 6) #define RPC_SET_SVC_FLAG_ACK_TYPE(MESG, ACK) ((MESG)->svc |= (ACK) << 6) -#define RPC_SET_BE64(MESG, OFFSET, SET_DATA) \ - do { \ - u8 *data = (u8 *)(MESG); \ - u64 _offset = (OFFSET); \ - u64 _set_data = (SET_DATA); \ - data[_offset + 7] = _set_data & 0xFF; \ - data[_offset + 6] = (_set_data & 0xFF00) >> 8; \ - data[_offset + 5] = (_set_data & 0xFF0000) >> 16; \ - data[_offset + 4] = (_set_data & 0xFF000000) >> 24; \ - data[_offset + 3] = (_set_data & 0xFF00000000) >> 32; \ - data[_offset + 2] = (_set_data & 0xFF0000000000) >> 40; \ - data[_offset + 1] = (_set_data & 0xFF000000000000) >> 48; \ - data[_offset + 0] = (_set_data & 0xFF00000000000000) >> 56; \ - } while (0) - -#define RPC_SET_BE32(MESG, OFFSET, SET_DATA) \ - do { \ - u8 *data = (u8 *)(MESG); \ - u64 _offset = (OFFSET); \ - u64 _set_data = (SET_DATA); \ - data[_offset + 3] = (_set_data) & 0xFF; \ - data[_offset + 2] = (_set_data & 0xFF00) >> 8; \ - data[_offset + 1] = (_set_data & 0xFF0000) >> 16; \ - data[_offset + 0] = (_set_data & 0xFF000000) >> 24; \ - } while (0) - -#define RPC_SET_BE16(MESG, OFFSET, SET_DATA) \ - do { \ - u8 *data = (u8 *)(MESG); \ - u64 _offset = (OFFSET); \ - u64 _set_data = (SET_DATA); \ - data[_offset + 1] = (_set_data) & 0xFF; \ - data[_offset + 0] = (_set_data & 0xFF00) >> 8; \ - } while (0) - -#define RPC_SET_U8(MESG, OFFSET, SET_DATA) \ - do { \ - u8 *data = (u8 *)(MESG); \ - data[OFFSET] = (SET_DATA) & 0xFF; \ - } while (0) - -#define RPC_GET_BE64(MESG, OFFSET, PTR) \ - do { \ - u8 *data = (u8 *)(MESG); \ - u64 _offset = (OFFSET); \ - *(u32 *)(PTR) = \ - (data[_offset + 7] | data[_offset + 6] << 8 | \ - data[_offset + 5] << 16 | data[_offset + 4] << 24 | \ - data[_offset + 3] << 32 | data[_offset + 2] << 40 | \ - data[_offset + 1] << 48 | data[_offset + 0] << 56); \ - } while (0) - -#define RPC_GET_BE32(MESG, OFFSET, PTR) \ - do { \ - u8 *data = (u8 *)(MESG); \ - u64 _offset = (OFFSET); \ - *(u32 *)(PTR) = \ - (data[_offset + 3] | data[_offset + 2] << 8 | \ - data[_offset + 1] << 16 | data[_offset + 0] << 24); \ - } while (0) - -#define RPC_GET_BE16(MESG, OFFSET, PTR) \ - do { \ - u8 *data = (u8 *)(MESG); \ - u64 _offset = (OFFSET); \ - *(u16 *)(PTR) = (data[_offset + 1] | data[_offset + 0] << 8); \ - } while (0) - -#define RPC_GET_U8(MESG, OFFSET, PTR) \ - do { \ - u8 *data = (u8 *)(MESG); \ - *(u8 *)(PTR) = (data[OFFSET]); \ - } while (0) - /* * Defines for SC PM Power Mode */ diff --git a/include/linux/pagevec.h b/include/linux/folio_batch.h index 63be5a451627..b45946adc50b 100644 --- a/include/linux/pagevec.h +++ b/include/linux/folio_batch.h @@ -1,18 +1,18 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * include/linux/pagevec.h + * include/linux/folio_batch.h * * In many places it is efficient to batch an operation up against multiple * folios. A folio_batch is a container which is used for that. */ -#ifndef _LINUX_PAGEVEC_H -#define _LINUX_PAGEVEC_H +#ifndef _LINUX_FOLIO_BATCH_H +#define _LINUX_FOLIO_BATCH_H #include <linux/types.h> /* 31 pointers + header align the folio_batch structure to a power of two */ -#define PAGEVEC_SIZE 31 +#define FOLIO_BATCH_SIZE 31 struct folio; @@ -29,7 +29,7 @@ struct folio_batch { unsigned char nr; unsigned char i; bool percpu_pvec_drained; - struct folio *folios[PAGEVEC_SIZE]; + struct folio *folios[FOLIO_BATCH_SIZE]; }; /** @@ -58,7 +58,7 @@ static inline unsigned int folio_batch_count(const struct folio_batch *fbatch) static inline unsigned int folio_batch_space(const struct folio_batch *fbatch) { - return PAGEVEC_SIZE - fbatch->nr; + return FOLIO_BATCH_SIZE - fbatch->nr; } /** @@ -93,7 +93,7 @@ static inline struct folio *folio_batch_next(struct folio_batch *fbatch) return fbatch->folios[fbatch->i++]; } -void __folio_batch_release(struct folio_batch *pvec); +void __folio_batch_release(struct folio_batch *fbatch); static inline void folio_batch_release(struct folio_batch *fbatch) { @@ -102,4 +102,4 @@ static inline void folio_batch_release(struct folio_batch *fbatch) } void folio_batch_remove_exceptionals(struct folio_batch *fbatch); -#endif /* _LINUX_PAGEVEC_H */ +#endif /* _LINUX_FOLIO_BATCH_H */ diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h index adab609c972e..f6d5f1f127c9 100644 --- a/include/linux/folio_queue.h +++ b/include/linux/folio_queue.h @@ -14,7 +14,7 @@ #ifndef _LINUX_FOLIO_QUEUE_H #define _LINUX_FOLIO_QUEUE_H -#include <linux/pagevec.h> +#include <linux/folio_batch.h> #include <linux/mm.h> /* @@ -29,12 +29,12 @@ */ struct folio_queue { struct folio_batch vec; /* Folios in the queue segment */ - u8 orders[PAGEVEC_SIZE]; /* Order of each folio */ + u8 orders[FOLIO_BATCH_SIZE]; /* Order of each folio */ struct folio_queue *next; /* Next queue segment or NULL */ struct folio_queue *prev; /* Previous queue segment of NULL */ unsigned long marks; /* 1-bit mark per folio */ unsigned long marks2; /* Second 1-bit mark per folio */ -#if PAGEVEC_SIZE > BITS_PER_LONG +#if FOLIO_BATCH_SIZE > BITS_PER_LONG #error marks is not big enough #endif unsigned int rreq_id; @@ -70,7 +70,7 @@ static inline void folioq_init(struct folio_queue *folioq, unsigned int rreq_id) */ static inline unsigned int folioq_nr_slots(const struct folio_queue *folioq) { - return PAGEVEC_SIZE; + return FOLIO_BATCH_SIZE; } /** diff --git a/include/linux/font.h b/include/linux/font.h index fd8625cd76b2..6845f02d739a 100644 --- a/include/linux/font.h +++ b/include/linux/font.h @@ -11,47 +11,123 @@ #ifndef _VIDEO_FONT_H #define _VIDEO_FONT_H +#include <linux/math.h> #include <linux/types.h> +struct console_font; + +/* + * Glyphs + */ + +/** + * font_glyph_pitch - Calculates the number of bytes per scanline + * @width: The glyph width in bits per scanline + * + * A glyph's pitch is the number of bytes in a single scanline, rounded + * up to the next full byte. The parameter @width receives the number + * of visible bits per scanline. For example, if width is 14 bytes per + * scanline, the pitch is 2 bytes per scanline. If width is 8 bits per + * scanline, the pitch is 1 byte per scanline. + * + * Returns: + * The number of bytes in a single scanline of the glyph + */ +static inline unsigned int font_glyph_pitch(unsigned int width) +{ + return DIV_ROUND_UP(width, 8); +} + +/** + * font_glyph_size - Calculates the number of bytes per glyph + * @width: The glyph width in bits per scanline + * @vpitch: The number of scanlines in the glyph + * + * The number of bytes in a glyph depends on the pitch and the number + * of scanlines. font_glyph_size automatically calculates the pitch + * from the given width. The parameter @vpitch gives the number of + * scanlines, which is usually the glyph's height in scanlines. Fonts + * coming from user space can sometimes have a different vertical pitch + * with empty scanlines between two adjacent glyphs. + */ +static inline unsigned int font_glyph_size(unsigned int width, unsigned int vpitch) +{ + return font_glyph_pitch(width) * vpitch; +} + +/* + * font_data_t and helpers + */ + +/** + * font_data_t - Raw font data + * + * Values of type font_data_t store a pointer to raw font data. The format + * is monochrome. Each bit sets a pixel of a stored glyph. Font data does + * not store geometry information for the individual glyphs. Users of the + * font have to store glyph size, pitch and character count separately. + * + * Font data in font_data_t is not equivalent to raw u8. Each pointer stores + * an additional hidden header before the font data. The layout is + * + * +------+-----------------------------+ + * | -16 | CRC32 Checksum (optional) | + * | -12 | <Unused> | + * | -8 | Number of data bytes | + * | -4 | Reference count | + * +------+-----------------------------+ + * | 0 | Data buffer | + * | ... | | + * +------+-----------------------------+ + * + * Use helpers to access font_data_t. Use font_data_buf() to get the stored data. + */ +typedef const unsigned char font_data_t; + +/** + * font_data_buf() - Returns the font data as raw bytes + * @fd: The font data + * + * Returns: + * The raw font data. The provided buffer is read-only. + */ +static inline const unsigned char *font_data_buf(font_data_t *fd) +{ + return (const unsigned char *)fd; +} + +font_data_t *font_data_import(const struct console_font *font, unsigned int vpitch, + u32 (*calc_csum)(u32, const void *, size_t)); +void font_data_get(font_data_t *fd); +bool font_data_put(font_data_t *fd); +unsigned int font_data_size(font_data_t *fd); +bool font_data_is_equal(font_data_t *lhs, font_data_t *rhs); +int font_data_export(font_data_t *fd, struct console_font *font, unsigned int vpitch); + +/* font_rotate.c */ +void font_glyph_rotate_90(const unsigned char *glyph, unsigned int width, unsigned int height, + unsigned char *out); +void font_glyph_rotate_180(const unsigned char *glyph, unsigned int width, unsigned int height, + unsigned char *out); +void font_glyph_rotate_270(const unsigned char *glyph, unsigned int width, unsigned int height, + unsigned char *out); +unsigned char *font_data_rotate(font_data_t *fd, unsigned int width, unsigned int height, + unsigned int charcount, unsigned int steps, + unsigned char *buf, size_t *bufsize); + +/* + * Font description + */ + struct font_desc { int idx; const char *name; unsigned int width, height; unsigned int charcount; - const void *data; + font_data_t *data; int pref; }; -#define VGA8x8_IDX 0 -#define VGA8x16_IDX 1 -#define PEARL8x8_IDX 2 -#define VGA6x11_IDX 3 -#define FONT7x14_IDX 4 -#define FONT10x18_IDX 5 -#define SUN8x16_IDX 6 -#define SUN12x22_IDX 7 -#define ACORN8x8_IDX 8 -#define MINI4x6_IDX 9 -#define FONT6x10_IDX 10 -#define TER16x32_IDX 11 -#define FONT6x8_IDX 12 -#define TER10x18_IDX 13 - -extern const struct font_desc font_vga_8x8, - font_vga_8x16, - font_pearl_8x8, - font_vga_6x11, - font_7x14, - font_10x18, - font_sun_8x16, - font_sun_12x22, - font_acorn_8x8, - font_mini_4x6, - font_6x10, - font_ter_16x32, - font_6x8, - font_ter_10x18; - /* Find a font with a specific name */ extern const struct font_desc *find_font(const char *name); @@ -65,17 +141,23 @@ extern const struct font_desc *get_default_font(int xres, int yres, /* Max. length for the name of a predefined font */ #define MAX_FONT_NAME 32 -/* Extra word getters */ -#define REFCOUNT(fd) (((int *)(fd))[-1]) -#define FNTSIZE(fd) (((int *)(fd))[-2]) -#define FNTCHARCNT(fd) (((int *)(fd))[-3]) -#define FNTSUM(fd) (((int *)(fd))[-4]) - -#define FONT_EXTRA_WORDS 4 +/* + * Built-in fonts + */ -struct font_data { - unsigned int extra[FONT_EXTRA_WORDS]; - const unsigned char data[]; -} __packed; +extern const struct font_desc font_10x18; +extern const struct font_desc font_6x10; +extern const struct font_desc font_6x8; +extern const struct font_desc font_7x14; +extern const struct font_desc font_acorn_8x8; +extern const struct font_desc font_mini_4x6; +extern const struct font_desc font_pearl_8x8; +extern const struct font_desc font_sun_12x22; +extern const struct font_desc font_sun_8x16; +extern const struct font_desc font_ter_10x18; +extern const struct font_desc font_ter_16x32; +extern const struct font_desc font_vga_6x11; +extern const struct font_desc font_vga_8x16; +extern const struct font_desc font_vga_8x8; #endif /* _VIDEO_FONT_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 8b3dd145b25e..e1d257e6da68 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -55,8 +55,6 @@ struct bdi_writeback; struct bio; struct io_comp_batch; struct fiemap_extent_info; -struct hd_geometry; -struct iovec; struct kiocb; struct kobject; struct pipe_inode_info; @@ -445,6 +443,13 @@ struct address_space_operations { extern const struct address_space_operations empty_aops; +/* Structure for tracking metadata buffer heads associated with the mapping */ +struct mapping_metadata_bhs { + struct address_space *mapping; /* Mapping bhs are associated with */ + spinlock_t lock; /* Lock protecting bh list */ + struct list_head list; /* The list of bhs (b_assoc_buffers) */ +}; + /** * struct address_space - Contents of a cacheable, mappable object. * @host: Owner, either the inode or the block_device. @@ -464,8 +469,6 @@ extern const struct address_space_operations empty_aops; * @flags: Error bits and flags (AS_*). * @wb_err: The most recent error which has occurred. * @i_private_lock: For use by the owner of the address_space. - * @i_private_list: For use by the owner of the address_space. - * @i_private_data: For use by the owner of the address_space. */ struct address_space { struct inode *host; @@ -484,9 +487,7 @@ struct address_space { unsigned long flags; errseq_t wb_err; spinlock_t i_private_lock; - struct list_head i_private_list; struct rw_semaphore i_mmap_rwsem; - void * i_private_data; } __attribute__((aligned(sizeof(long)))) __randomize_layout; /* * On most architectures that alignment is already the case; but @@ -783,7 +784,7 @@ struct inode { #endif /* Stat data, not accessed from path walking */ - unsigned long i_ino; + u64 i_ino; /* * Filesystems may only read i_nlink directly. They shall use the * following functions for modification: @@ -1917,7 +1918,6 @@ struct dir_context { */ #define COPY_FILE_SPLICE (1 << 0) -struct iov_iter; struct io_uring_cmd; struct offset_ctx; @@ -2058,16 +2058,24 @@ static inline bool can_mmap_file(struct file *file) return true; } -int __compat_vma_mmap(const struct file_operations *f_op, - struct file *file, struct vm_area_struct *vma); +void compat_set_desc_from_vma(struct vm_area_desc *desc, const struct file *file, + const struct vm_area_struct *vma); +int __compat_vma_mmap(struct vm_area_desc *desc, struct vm_area_struct *vma); int compat_vma_mmap(struct file *file, struct vm_area_struct *vma); +int __vma_check_mmap_hook(struct vm_area_struct *vma); static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma) { + int err; + if (file->f_op->mmap_prepare) return compat_vma_mmap(file, vma); - return file->f_op->mmap(file, vma); + err = file->f_op->mmap(file, vma); + if (err) + return err; + + return __vma_check_mmap_hook(vma); } static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc) @@ -2475,6 +2483,19 @@ struct file *dentry_create(struct path *path, int flags, umode_t mode, const struct cred *cred); const struct path *backing_file_user_path(const struct file *f); +#ifdef CONFIG_SECURITY +void *backing_file_security(const struct file *f); +void backing_file_set_security(struct file *f, void *security); +#else +static inline void *backing_file_security(const struct file *f) +{ + return NULL; +} +static inline void backing_file_set_security(struct file *f, void *security) +{ +} +#endif /* CONFIG_SECURITY */ + /* * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file * stored in ->vm_file is a backing file whose f_inode is on the underlying @@ -2912,65 +2933,63 @@ static inline bool name_contains_dotdot(const char *name) #include <linux/err.h> /* needed for stackable file system support */ -extern loff_t default_llseek(struct file *file, loff_t offset, int whence); +loff_t default_llseek(struct file *file, loff_t offset, int whence); -extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence); +loff_t vfs_llseek(struct file *file, loff_t offset, int whence); -extern int inode_init_always_gfp(struct super_block *, struct inode *, gfp_t); +int inode_init_always_gfp(struct super_block *sb, struct inode *inode, gfp_t gfp); static inline int inode_init_always(struct super_block *sb, struct inode *inode) { return inode_init_always_gfp(sb, inode, GFP_NOFS); } -extern void inode_init_once(struct inode *); -extern void address_space_init_once(struct address_space *mapping); -extern struct inode * igrab(struct inode *); -extern ino_t iunique(struct super_block *, ino_t); -extern int inode_needs_sync(struct inode *inode); -extern int inode_just_drop(struct inode *inode); +void inode_init_once(struct inode *inode); +void address_space_init_once(struct address_space *mapping); +struct inode *igrab(struct inode *inode); +ino_t iunique(struct super_block *sb, ino_t max_reserved); +int inode_needs_sync(struct inode *inode); +int inode_just_drop(struct inode *inode); static inline int inode_generic_drop(struct inode *inode) { return !inode->i_nlink || inode_unhashed(inode); } -extern void d_mark_dontcache(struct inode *inode); +void d_mark_dontcache(struct inode *inode); -extern struct inode *ilookup5_nowait(struct super_block *sb, - unsigned long hashval, int (*test)(struct inode *, void *), - void *data, bool *isnew); -extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, - int (*test)(struct inode *, void *), void *data); -extern struct inode *ilookup(struct super_block *sb, unsigned long ino); +struct inode *ilookup5_nowait(struct super_block *sb, u64 hashval, + int (*test)(struct inode *, void *), void *data, + bool *isnew); +struct inode *ilookup5(struct super_block *sb, u64 hashval, + int (*test)(struct inode *, void *), void *data); +struct inode *ilookup(struct super_block *sb, u64 ino); -extern struct inode *inode_insert5(struct inode *inode, unsigned long hashval, - int (*test)(struct inode *, void *), - int (*set)(struct inode *, void *), - void *data); -struct inode *iget5_locked(struct super_block *, unsigned long, +struct inode *inode_insert5(struct inode *inode, u64 hashval, + int (*test)(struct inode *, void *), + int (*set)(struct inode *, void *), void *data); +struct inode *iget5_locked(struct super_block *sb, u64 hashval, int (*test)(struct inode *, void *), - int (*set)(struct inode *, void *), void *); -struct inode *iget5_locked_rcu(struct super_block *, unsigned long, + int (*set)(struct inode *, void *), void *data); +struct inode *iget5_locked_rcu(struct super_block *sb, u64 hashval, int (*test)(struct inode *, void *), - int (*set)(struct inode *, void *), void *); -extern struct inode * iget_locked(struct super_block *, unsigned long); -extern struct inode *find_inode_nowait(struct super_block *, - unsigned long, - int (*match)(struct inode *, - unsigned long, void *), - void *data); -extern struct inode *find_inode_rcu(struct super_block *, unsigned long, - int (*)(struct inode *, void *), void *); -extern struct inode *find_inode_by_ino_rcu(struct super_block *, unsigned long); -extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); -extern int insert_inode_locked(struct inode *); + int (*set)(struct inode *, void *), void *data); +struct inode *iget_locked(struct super_block *sb, u64 ino); +struct inode *find_inode_nowait(struct super_block *sb, u64 hashval, + int (*match)(struct inode *, u64, void *), + void *data); +struct inode *find_inode_rcu(struct super_block *sb, u64 hashval, + int (*test)(struct inode *, void *), void *data); +struct inode *find_inode_by_ino_rcu(struct super_block *sb, u64 ino); +int insert_inode_locked4(struct inode *inode, u64 hashval, + int (*test)(struct inode *, void *), void *data); +int insert_inode_locked(struct inode *inode); #ifdef CONFIG_DEBUG_LOCK_ALLOC -extern void lockdep_annotate_inode_mutex_key(struct inode *inode); +void lockdep_annotate_inode_mutex_key(struct inode *inode); #else static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { }; #endif -extern void unlock_new_inode(struct inode *); -extern void discard_new_inode(struct inode *); -extern unsigned int get_next_ino(void); -extern void evict_inodes(struct super_block *sb); +void unlock_new_inode(struct inode *inode); +void discard_new_inode(struct inode *inode); +unsigned int get_next_ino(void); +void evict_inodes(struct super_block *sb); void dump_mapping(const struct address_space *); /* @@ -3015,21 +3034,21 @@ int setattr_should_drop_sgid(struct mnt_idmap *idmap, */ #define alloc_inode_sb(_sb, _cache, _gfp) kmem_cache_alloc_lru(_cache, &_sb->s_inode_lru, _gfp) -extern void __insert_inode_hash(struct inode *, unsigned long hashval); +void __insert_inode_hash(struct inode *inode, u64 hashval); static inline void insert_inode_hash(struct inode *inode) { __insert_inode_hash(inode, inode->i_ino); } -extern void __remove_inode_hash(struct inode *); +void __remove_inode_hash(struct inode *inode); static inline void remove_inode_hash(struct inode *inode) { if (!inode_unhashed(inode) && !hlist_fake(&inode->i_hash)) __remove_inode_hash(inode); } -extern void inode_sb_list_add(struct inode *inode); -extern void inode_lru_list_add(struct inode *inode); +void inode_sb_list_add(struct inode *inode); +void inode_lru_list_add(struct inode *inode); int generic_file_mmap(struct file *, struct vm_area_struct *); int generic_file_mmap_prepare(struct vm_area_desc *desc); @@ -3295,8 +3314,8 @@ void simple_offset_destroy(struct offset_ctx *octx); extern const struct file_operations simple_offset_dir_operations; -extern int __generic_file_fsync(struct file *, loff_t, loff_t, int); -extern int generic_file_fsync(struct file *, loff_t, loff_t, int); +extern int simple_fsync_noflush(struct file *, loff_t, loff_t, int); +extern int simple_fsync(struct file *, loff_t, loff_t, int); extern int generic_check_addressable(unsigned, u64); diff --git a/include/linux/fs/super_types.h b/include/linux/fs/super_types.h index fa7638b81246..383050e7fdf5 100644 --- a/include/linux/fs/super_types.h +++ b/include/linux/fs/super_types.h @@ -338,5 +338,6 @@ struct super_block { #define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */ #define SB_I_NOIDMAP 0x00002000 /* No idmapped mounts on this superblock */ #define SB_I_ALLOW_HSM 0x00004000 /* Allow HSM events on this superblock */ +#define SB_I_NO_DATA_INTEGRITY 0x00008000 /* fs cannot guarantee data persistence on sync */ #endif /* _LINUX_FS_SUPER_TYPES_H */ diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index 5e8a3b546033..98b83708f92b 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -27,8 +27,8 @@ typedef int fs_param_type(struct p_log *, * The type of parameter expected. */ fs_param_type fs_param_is_bool, fs_param_is_u32, fs_param_is_s32, fs_param_is_u64, - fs_param_is_enum, fs_param_is_string, fs_param_is_blob, fs_param_is_blockdev, - fs_param_is_path, fs_param_is_fd, fs_param_is_uid, fs_param_is_gid, + fs_param_is_enum, fs_param_is_string, fs_param_is_blockdev, + fs_param_is_fd, fs_param_is_uid, fs_param_is_gid, fs_param_is_file_or_string; /* @@ -84,8 +84,6 @@ extern int fs_lookup_param(struct fs_context *fc, extern int lookup_constant(const struct constant_table tbl[], const char *name, int not_found); -extern const struct constant_table bool_names[]; - #ifdef CONFIG_VALIDATE_FS_PARSER extern bool fs_validate_description(const char *name, const struct fs_parameter_spec *desc); @@ -127,9 +125,7 @@ static inline bool fs_validate_description(const char *name, #define fsparam_enum(NAME, OPT, array) __fsparam(fs_param_is_enum, NAME, OPT, 0, array) #define fsparam_string(NAME, OPT) \ __fsparam(fs_param_is_string, NAME, OPT, 0, NULL) -#define fsparam_blob(NAME, OPT) __fsparam(fs_param_is_blob, NAME, OPT, 0, NULL) #define fsparam_bdev(NAME, OPT) __fsparam(fs_param_is_blockdev, NAME, OPT, 0, NULL) -#define fsparam_path(NAME, OPT) __fsparam(fs_param_is_path, NAME, OPT, 0, NULL) #define fsparam_fd(NAME, OPT) __fsparam(fs_param_is_fd, NAME, OPT, 0, NULL) #define fsparam_file_or_string(NAME, OPT) \ __fsparam(fs_param_is_file_or_string, NAME, OPT, 0, NULL) diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 516aba5b858b..54712ec61ffb 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -450,8 +450,8 @@ u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name); /* bio.c */ bool fscrypt_decrypt_bio(struct bio *bio); -int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, - sector_t pblk, unsigned int len); +int fscrypt_zeroout_range(const struct inode *inode, loff_t pos, + sector_t sector, u64 len); /* hooks.c */ int fscrypt_file_open(struct inode *inode, struct file *filp); @@ -755,8 +755,8 @@ static inline bool fscrypt_decrypt_bio(struct bio *bio) return true; } -static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, - sector_t pblk, unsigned int len) +static inline int fscrypt_zeroout_range(const struct inode *inode, loff_t pos, + sector_t sector, u64 len) { return -EOPNOTSUPP; } @@ -865,19 +865,11 @@ static inline void fscrypt_set_ops(struct super_block *sb, bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode); -void fscrypt_set_bio_crypt_ctx(struct bio *bio, - const struct inode *inode, u64 first_lblk, - gfp_t gfp_mask); - -void fscrypt_set_bio_crypt_ctx_bh(struct bio *bio, - const struct buffer_head *first_bh, - gfp_t gfp_mask); +void fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, + loff_t pos, gfp_t gfp_mask); bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, - u64 next_lblk); - -bool fscrypt_mergeable_bio_bh(struct bio *bio, - const struct buffer_head *next_bh); + loff_t pos); bool fscrypt_dio_supported(struct inode *inode); @@ -892,22 +884,11 @@ static inline bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode) static inline void fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, - u64 first_lblk, gfp_t gfp_mask) { } - -static inline void fscrypt_set_bio_crypt_ctx_bh( - struct bio *bio, - const struct buffer_head *first_bh, - gfp_t gfp_mask) { } + loff_t pos, gfp_t gfp_mask) { } static inline bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, - u64 next_lblk) -{ - return true; -} - -static inline bool fscrypt_mergeable_bio_bh(struct bio *bio, - const struct buffer_head *next_bh) + loff_t pos) { return true; } diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 897d6211c163..1da63f2d7040 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -178,9 +178,6 @@ struct fsl_mc_obj_desc { * @regions: pointer to array of MMIO region entries * @irqs: pointer to array of pointers to interrupts allocated to this device * @resource: generic resource associated with this MC object device, if any. - * @driver_override: driver name to force a match; do not set directly, - * because core frees it; use driver_set_override() to - * set or clear it. * * Generic device object for MC object devices that are "attached" to a * MC bus. @@ -214,7 +211,6 @@ struct fsl_mc_device { struct fsl_mc_device_irq **irqs; struct fsl_mc_resource *resource; struct device_link *consumer_link; - const char *driver_override; }; #define to_fsl_mc_device(_dev) \ diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 28a9cb13fbfa..079c18bcdbde 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -495,19 +495,6 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) fsnotify_dentry(dentry, mask); } -static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode, - int error) -{ - struct fs_error_report report = { - .error = error, - .inode = inode, - .sb = sb, - }; - - return fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR, - NULL, NULL, NULL, 0); -} - static inline void fsnotify_mnt_attach(struct mnt_namespace *ns, struct vfsmount *mnt) { fsnotify_mnt(FS_MNT_ATTACH, ns, mnt); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index c242fe49af4c..28b30c6f1031 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -31,7 +31,7 @@ #define ARCH_SUPPORTS_FTRACE_OPS 0 #endif -#ifdef CONFIG_TRACING +#ifdef CONFIG_TRACER_SNAPSHOT extern void ftrace_boot_snapshot(void); #else static inline void ftrace_boot_snapshot(void) { } diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 097be89487bf..80b38fbf2121 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -15,6 +15,7 @@ #define _LINUX_FWNODE_H_ #include <linux/bits.h> +#include <linux/bitops.h> #include <linux/err.h> #include <linux/list.h> #include <linux/types.h> @@ -42,12 +43,12 @@ struct device; * suppliers. Only enforce ordering with suppliers that have * drivers. */ -#define FWNODE_FLAG_LINKS_ADDED BIT(0) -#define FWNODE_FLAG_NOT_DEVICE BIT(1) -#define FWNODE_FLAG_INITIALIZED BIT(2) -#define FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD BIT(3) -#define FWNODE_FLAG_BEST_EFFORT BIT(4) -#define FWNODE_FLAG_VISITED BIT(5) +#define FWNODE_FLAG_LINKS_ADDED 0 +#define FWNODE_FLAG_NOT_DEVICE 1 +#define FWNODE_FLAG_INITIALIZED 2 +#define FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD 3 +#define FWNODE_FLAG_BEST_EFFORT 4 +#define FWNODE_FLAG_VISITED 5 struct fwnode_handle { struct fwnode_handle *secondary; @@ -57,7 +58,7 @@ struct fwnode_handle { struct device *dev; struct list_head suppliers; struct list_head consumers; - u8 flags; + unsigned long flags; }; /* @@ -212,16 +213,37 @@ static inline void fwnode_init(struct fwnode_handle *fwnode, INIT_LIST_HEAD(&fwnode->suppliers); } +static inline void fwnode_set_flag(struct fwnode_handle *fwnode, + unsigned int bit) +{ + set_bit(bit, &fwnode->flags); +} + +static inline void fwnode_clear_flag(struct fwnode_handle *fwnode, + unsigned int bit) +{ + clear_bit(bit, &fwnode->flags); +} + +static inline void fwnode_assign_flag(struct fwnode_handle *fwnode, + unsigned int bit, bool value) +{ + assign_bit(bit, &fwnode->flags, value); +} + +static inline bool fwnode_test_flag(struct fwnode_handle *fwnode, + unsigned int bit) +{ + return test_bit(bit, &fwnode->flags); +} + static inline void fwnode_dev_initialized(struct fwnode_handle *fwnode, bool initialized) { if (IS_ERR_OR_NULL(fwnode)) return; - if (initialized) - fwnode->flags |= FWNODE_FLAG_INITIALIZED; - else - fwnode->flags &= ~FWNODE_FLAG_INITIALIZED; + fwnode_assign_flag(fwnode, FWNODE_FLAG_INITIALIZED, initialized); } int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup, diff --git a/include/linux/generic_pt/common.h b/include/linux/generic_pt/common.h index 6a9a1acb5aad..fc5d0b5edadc 100644 --- a/include/linux/generic_pt/common.h +++ b/include/linux/generic_pt/common.h @@ -175,6 +175,22 @@ enum { PT_FEAT_VTDSS_FORCE_WRITEABLE, }; +struct pt_riscv_32 { + struct pt_common common; +}; + +struct pt_riscv_64 { + struct pt_common common; +}; + +enum { + /* + * Support the 64k contiguous page size following the Svnapot extension. + */ + PT_FEAT_RISCV_SVNAPOT_64K = PT_FEAT_FMT_START, + +}; + struct pt_x86_64 { struct pt_common common; }; diff --git a/include/linux/generic_pt/iommu.h b/include/linux/generic_pt/iommu.h index 9eefbb74efd0..dd0edd02a48a 100644 --- a/include/linux/generic_pt/iommu.h +++ b/include/linux/generic_pt/iommu.h @@ -66,6 +66,13 @@ struct pt_iommu { struct device *iommu_device; }; +static inline struct pt_iommu *iommupt_from_domain(struct iommu_domain *domain) +{ + if (!IS_ENABLED(CONFIG_IOMMU_PT) || !domain->is_iommupt) + return NULL; + return container_of(domain, struct pt_iommu, domain); +} + /** * struct pt_iommu_info - Details about the IOMMU page table * @@ -81,6 +88,56 @@ struct pt_iommu_info { struct pt_iommu_ops { /** + * @map_range: Install translation for an IOVA range + * @iommu_table: Table to manipulate + * @iova: IO virtual address to start + * @paddr: Physical/Output address to start + * @len: Length of the range starting from @iova + * @prot: A bitmap of IOMMU_READ/WRITE/CACHE/NOEXEC/MMIO + * @gfp: GFP flags for any memory allocations + * + * The range starting at IOVA will have paddr installed into it. The + * rage is automatically segmented into optimally sized table entries, + * and can have any valid alignment. + * + * On error the caller will probably want to invoke unmap on the range + * from iova up to the amount indicated by @mapped to return the table + * back to an unchanged state. + * + * Context: The caller must hold a write range lock that includes + * the whole range. + * + * Returns: -ERRNO on failure, 0 on success. The number of bytes of VA + * that were mapped are added to @mapped, @mapped is not zerod first. + */ + int (*map_range)(struct pt_iommu *iommu_table, dma_addr_t iova, + phys_addr_t paddr, dma_addr_t len, unsigned int prot, + gfp_t gfp, size_t *mapped); + + /** + * @unmap_range: Make a range of IOVA empty/not present + * @iommu_table: Table to manipulate + * @iova: IO virtual address to start + * @len: Length of the range starting from @iova + * @iotlb_gather: Gather struct that must be flushed on return + * + * unmap_range() will remove a translation created by map_range(). It + * cannot subdivide a mapping created by map_range(), so it should be + * called with IOVA ranges that match those passed to map_pages. The + * IOVA range can aggregate contiguous map_range() calls so long as no + * individual range is split. + * + * Context: The caller must hold a write range lock that includes + * the whole range. + * + * Returns: Number of bytes of VA unmapped. iova + res will be the + * point unmapping stopped. + */ + size_t (*unmap_range)(struct pt_iommu *iommu_table, dma_addr_t iova, + dma_addr_t len, + struct iommu_iotlb_gather *iotlb_gather); + + /** * @set_dirty: Make the iova write dirty * @iommu_table: Table to manipulate * @iova: IO virtual address to start @@ -194,14 +251,6 @@ struct pt_iommu_cfg { #define IOMMU_PROTOTYPES(fmt) \ phys_addr_t pt_iommu_##fmt##_iova_to_phys(struct iommu_domain *domain, \ dma_addr_t iova); \ - int pt_iommu_##fmt##_map_pages(struct iommu_domain *domain, \ - unsigned long iova, phys_addr_t paddr, \ - size_t pgsize, size_t pgcount, \ - int prot, gfp_t gfp, size_t *mapped); \ - size_t pt_iommu_##fmt##_unmap_pages( \ - struct iommu_domain *domain, unsigned long iova, \ - size_t pgsize, size_t pgcount, \ - struct iommu_iotlb_gather *iotlb_gather); \ int pt_iommu_##fmt##_read_and_clear_dirty( \ struct iommu_domain *domain, unsigned long iova, size_t size, \ unsigned long flags, struct iommu_dirty_bitmap *dirty); \ @@ -222,9 +271,7 @@ struct pt_iommu_cfg { * iommu_pt */ #define IOMMU_PT_DOMAIN_OPS(fmt) \ - .iova_to_phys = &pt_iommu_##fmt##_iova_to_phys, \ - .map_pages = &pt_iommu_##fmt##_map_pages, \ - .unmap_pages = &pt_iommu_##fmt##_unmap_pages + .iova_to_phys = &pt_iommu_##fmt##_iova_to_phys #define IOMMU_PT_DIRTY_OPS(fmt) \ .read_and_clear_dirty = &pt_iommu_##fmt##_read_and_clear_dirty @@ -275,6 +322,17 @@ struct pt_iommu_vtdss_hw_info { IOMMU_FORMAT(vtdss, vtdss_pt); +struct pt_iommu_riscv_64_cfg { + struct pt_iommu_cfg common; +}; + +struct pt_iommu_riscv_64_hw_info { + u64 ppn; + u8 fsc_iosatp_mode; +}; + +IOMMU_FORMAT(riscv_64, riscv_64pt); + struct pt_iommu_x86_64_cfg { struct pt_iommu_cfg common; /* 4 is a 57 bit 5 level table */ diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index d4da060b7532..a7d36c9ea924 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -149,7 +149,8 @@ static int __ ## s_name ## _from_attrs(struct s_name *s, \ if (!tla) \ return -ENOMSG; \ DPRINT_TLA(#s_name, "<=-", #tag_name); \ - err = drbd_nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy); \ + err = nla_parse_nested_deprecated(ntb, maxtype, tla, \ + s_name ## _nl_policy, NULL); \ if (err) \ return err; \ \ @@ -292,6 +293,10 @@ static struct genl_family ZZZ_genl_family __ro_after_init = { #endif .maxattr = ARRAY_SIZE(CONCATENATE(GENL_MAGIC_FAMILY, _tla_nl_policy))-1, .policy = CONCATENATE(GENL_MAGIC_FAMILY, _tla_nl_policy), +#ifdef GENL_MAGIC_FAMILY_PRE_DOIT + .pre_doit = GENL_MAGIC_FAMILY_PRE_DOIT, + .post_doit = GENL_MAGIC_FAMILY_POST_DOIT, +#endif .ops = ZZZ_genl_ops, .n_ops = ARRAY_SIZE(ZZZ_genl_ops), .mcgrps = ZZZ_genl_mcgrps, diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index 621b87a87d74..2200cedd160a 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -26,16 +26,6 @@ extern void CONCATENATE(GENL_MAGIC_FAMILY, _genl_unregister)(void); */ /* - * @DRBD_GENLA_F_MANDATORY: By default, netlink ignores attributes it does not - * know about. This flag can be set in nlattr->nla_type to indicate that this - * attribute must not be ignored. - * - * We check and remove this flag in drbd_nla_check_mandatory() before - * validating the attribute types and lengths via nla_parse_nested(). - */ -#define DRBD_GENLA_F_MANDATORY (1 << 14) - -/* * Flags specific to drbd and not visible at the netlink layer, used in * <struct>_from_attrs and <struct>_to_skb: * @@ -52,7 +42,6 @@ extern void CONCATENATE(GENL_MAGIC_FAMILY, _genl_unregister)(void); #define DRBD_F_SENSITIVE (1 << 1) #define DRBD_F_INVARIANT (1 << 2) -#define __nla_type(x) ((__u16)((x) & NLA_TYPE_MASK & ~DRBD_GENLA_F_MANDATORY)) /* }}}1 * MAGIC @@ -158,12 +147,12 @@ enum { \ #undef __field #define __field(attr_nr, attr_flag, name, nla_type, type, \ __get, __put, __is_signed) \ - T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)), + T_ ## name = (__u16)(attr_nr), #undef __array #define __array(attr_nr, attr_flag, name, nla_type, type, \ maxlen, __get, __put, __is_signed) \ - T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)), + T_ ## name = (__u16)(attr_nr), #include GENL_MAGIC_INCLUDE_FILE diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 2b30a0529d48..51ef13ed756e 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -14,8 +14,8 @@ struct vm_area_struct; struct mempolicy; /* Helper macro to avoid gfp flags if they are the default one */ -#define __default_gfp(a,...) a -#define default_gfp(...) __default_gfp(__VA_ARGS__ __VA_OPT__(,) GFP_KERNEL) +#define __default_gfp(a,b,...) b +#define default_gfp(...) __default_gfp(,##__VA_ARGS__,GFP_KERNEL) /* Convert GFP flags to their corresponding migrate type */ #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) @@ -339,8 +339,11 @@ static inline struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int orde { return folio_alloc_noprof(gfp, order); } -#define vma_alloc_folio_noprof(gfp, order, vma, addr) \ - folio_alloc_noprof(gfp, order) +static inline struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, + struct vm_area_struct *vma, unsigned long addr) +{ + return folio_alloc_noprof(gfp, order); +} #endif #define alloc_pages(...) alloc_hooks(alloc_pages_noprof(__VA_ARGS__)) diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h index 814bb2892f99..6c75df30a281 100644 --- a/include/linux/gfp_types.h +++ b/include/linux/gfp_types.h @@ -139,6 +139,8 @@ enum { * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg. * * %__GFP_NO_OBJ_EXT causes slab allocation to have no object extension. + * mark_obj_codetag_empty() should be called upon freeing for objects allocated + * with this flag to indicate that their NULL tags are expected and normal. */ #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 0d8408582918..3efb5cb1e1d1 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -6,6 +6,8 @@ #include <linux/err.h> #include <linux/types.h> +#include "defs.h" + struct acpi_device; struct device; struct fwnode_handle; diff --git a/include/linux/gpio/defs.h b/include/linux/gpio/defs.h new file mode 100644 index 000000000000..b69fd7c041b2 --- /dev/null +++ b/include/linux/gpio/defs.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __LINUX_GPIO_DEFS_H +#define __LINUX_GPIO_DEFS_H + +#define GPIO_LINE_DIRECTION_IN 1 +#define GPIO_LINE_DIRECTION_OUT 0 + +#endif /* __LINUX_GPIO_DEFS_H */ diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index fabe2baf7b50..17511434ed07 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -20,6 +20,8 @@ #include <asm/msi.h> #endif +#include "defs.h" + struct device; struct irq_chip; struct irq_data; @@ -42,9 +44,6 @@ union gpio_irq_fwspec { #endif }; -#define GPIO_LINE_DIRECTION_IN 1 -#define GPIO_LINE_DIRECTION_OUT 0 - /** * struct gpio_irq_chip - GPIO interrupt controller */ @@ -344,11 +343,17 @@ struct gpio_irq_chip { * @direction_output: configures signal "offset" as output, returns 0 on * success or a negative error number. This can be omitted on input-only * or output-only gpio chips. - * @get: returns value for signal "offset", 0=low, 1=high, or negative error + * @get: returns value for signal "offset", 0=low, 1=high, or negative error. + * the low and high values are defined as physical low on the line + * in/out to the connector such as a physical pad, pin or rail. The GPIO + * library has internal logic to handle lines that are active low, such + * as indicated by overstrike or #name in a schematic, and the driver + * should not try to second-guess the logic value of a line. * @get_multiple: reads values for multiple signals defined by "mask" and * stores them in "bits", returns 0 on success or negative error * @set: assigns output value for signal "offset", returns 0 on success or - * negative error value + * negative error value. The output value follows the same semantic + * rules as for @get. * @set_multiple: assigns output values for multiple signals defined by * "mask", returns 0 on success or negative error value * @set_config: optional hook for all kinds of settings. Uses the same diff --git a/include/linux/gpio/generic.h b/include/linux/gpio/generic.h index ff566dc9c3cb..de43c06c83ef 100644 --- a/include/linux/gpio/generic.h +++ b/include/linux/gpio/generic.h @@ -3,9 +3,15 @@ #ifndef __LINUX_GPIO_GENERIC_H #define __LINUX_GPIO_GENERIC_H +#include <linux/bits.h> +#include <linux/bug.h> #include <linux/cleanup.h> -#include <linux/gpio/driver.h> +#include <linux/container_of.h> +#include <linux/errno.h> #include <linux/spinlock.h> +#include <linux/types.h> + +#include <linux/gpio/driver.h> struct device; diff --git a/include/linux/gpio/gpio-nomadik.h b/include/linux/gpio/gpio-nomadik.h index 592a774a53cd..8061b9826361 100644 --- a/include/linux/gpio/gpio-nomadik.h +++ b/include/linux/gpio/gpio-nomadik.h @@ -114,8 +114,7 @@ struct nmk_gpio_chip { } /** - * enum prcm_gpiocr_reg_index - * Used to reference an PRCM GPIOCR register address. + * enum prcm_gpiocr_reg_index - Used to reference a PRCM GPIOCR register address. */ enum prcm_gpiocr_reg_index { PRCM_IDX_GPIOCR1, @@ -123,8 +122,7 @@ enum prcm_gpiocr_reg_index { PRCM_IDX_GPIOCR3 }; /** - * enum prcm_gpiocr_altcx_index - * Used to reference an Other alternate-C function. + * enum prcm_gpiocr_altcx_index - Used to reference an Other alternate-C function. */ enum prcm_gpiocr_altcx_index { PRCM_IDX_GPIOCR_ALTC1, @@ -135,7 +133,7 @@ enum prcm_gpiocr_altcx_index { }; /** - * struct prcm_gpio_altcx - Other alternate-C function + * struct prcm_gpiocr_altcx - Other alternate-C function * @used: other alternate-C function availability * @reg_index: PRCM GPIOCR register index used to control the function * @control_bit: PRCM GPIOCR bit used to control the function @@ -147,7 +145,7 @@ struct prcm_gpiocr_altcx { } __packed; /** - * struct prcm_gpio_altcx_pin_desc - Other alternate-C pin + * struct prcm_gpiocr_altcx_pin_desc - Other alternate-C pin * @pin: The pin number * @altcx: array of other alternate-C[1-4] functions */ @@ -193,7 +191,7 @@ struct nmk_pingroup { * numbering. * @npins: The number of entries in @pins. * @functions: The functions supported on this SoC. - * @nfunction: The number of entries in @functions. + * @nfunctions: The number of entries in @functions. * @groups: An array describing all pin groups the pin SoC supports. * @ngroups: The number of entries in @groups. * @altcx_pins: The pins that support Other alternate-C function on this SoC diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h index 44e5f162973e..5eb88f5d0630 100644 --- a/include/linux/gpio/machine.h +++ b/include/linux/gpio/machine.h @@ -46,23 +46,6 @@ struct gpiod_lookup_table { struct gpiod_lookup table[]; }; -/** - * struct gpiod_hog - GPIO line hog table - * @chip_label: name of the chip the GPIO belongs to - * @chip_hwnum: hardware number (i.e. relative to the chip) of the GPIO - * @line_name: consumer name for the hogged line - * @lflags: bitmask of gpio_lookup_flags GPIO_* values - * @dflags: GPIO flags used to specify the direction and value - */ -struct gpiod_hog { - struct list_head list; - const char *chip_label; - u16 chip_hwnum; - const char *line_name; - unsigned long lflags; - int dflags; -}; - /* * Helper for lookup tables with just one single lookup for a device. */ @@ -95,24 +78,10 @@ static struct gpiod_lookup_table _name = { \ .flags = _flags, \ } -/* - * Simple definition of a single GPIO hog in an array. - */ -#define GPIO_HOG(_chip_label, _chip_hwnum, _line_name, _lflags, _dflags) \ -(struct gpiod_hog) { \ - .chip_label = _chip_label, \ - .chip_hwnum = _chip_hwnum, \ - .line_name = _line_name, \ - .lflags = _lflags, \ - .dflags = _dflags, \ -} - #ifdef CONFIG_GPIOLIB void gpiod_add_lookup_table(struct gpiod_lookup_table *table); void gpiod_add_lookup_tables(struct gpiod_lookup_table **tables, size_t n); void gpiod_remove_lookup_table(struct gpiod_lookup_table *table); -void gpiod_add_hogs(struct gpiod_hog *hogs); -void gpiod_remove_hogs(struct gpiod_hog *hogs); #else /* ! CONFIG_GPIOLIB */ static inline void gpiod_add_lookup_table(struct gpiod_lookup_table *table) {} @@ -120,8 +89,6 @@ static inline void gpiod_add_lookup_tables(struct gpiod_lookup_table **tables, size_t n) {} static inline void gpiod_remove_lookup_table(struct gpiod_lookup_table *table) {} -static inline void gpiod_add_hogs(struct gpiod_hog *hogs) {} -static inline void gpiod_remove_hogs(struct gpiod_hog *hogs) {} #endif /* CONFIG_GPIOLIB */ #endif /* __LINUX_GPIO_MACHINE_H */ diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h new file mode 100644 index 000000000000..5fa917ba5450 --- /dev/null +++ b/include/linux/gpu_buddy.h @@ -0,0 +1,241 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __GPU_BUDDY_H__ +#define __GPU_BUDDY_H__ + +#include <linux/bitops.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/rbtree.h> +#include <linux/rbtree_augmented.h> + +/** + * GPU_BUDDY_RANGE_ALLOCATION - Allocate within a specific address range + * + * When set, allocation is restricted to the range [start, end) specified + * in gpu_buddy_alloc_blocks(). Without this flag, start/end are ignored + * and allocation can use any free space. + */ +#define GPU_BUDDY_RANGE_ALLOCATION BIT(0) + +/** + * GPU_BUDDY_TOPDOWN_ALLOCATION - Allocate from top of address space + * + * Allocate starting from high addresses and working down. Useful for + * separating different allocation types (e.g., kernel vs userspace) + * to reduce fragmentation. + */ +#define GPU_BUDDY_TOPDOWN_ALLOCATION BIT(1) + +/** + * GPU_BUDDY_CONTIGUOUS_ALLOCATION - Require physically contiguous blocks + * + * The allocation must be satisfied with a single contiguous block. + * If the requested size cannot be allocated contiguously, the + * allocation fails with -ENOSPC. + */ +#define GPU_BUDDY_CONTIGUOUS_ALLOCATION BIT(2) + +/** + * GPU_BUDDY_CLEAR_ALLOCATION - Prefer pre-cleared (zeroed) memory + * + * Attempt to allocate from the clear tree first. If insufficient clear + * memory is available, falls back to dirty memory. Useful when the + * caller needs zeroed memory and wants to avoid GPU clear operations. + */ +#define GPU_BUDDY_CLEAR_ALLOCATION BIT(3) + +/** + * GPU_BUDDY_CLEARED - Mark returned blocks as cleared + * + * Used with gpu_buddy_free_list() to indicate that the memory being + * freed has been cleared (zeroed). The blocks will be placed in the + * clear tree for future GPU_BUDDY_CLEAR_ALLOCATION requests. + */ +#define GPU_BUDDY_CLEARED BIT(4) + +/** + * GPU_BUDDY_TRIM_DISABLE - Disable automatic block trimming + * + * By default, if an allocation is smaller than the allocated block, + * excess memory is trimmed and returned to the free pool. This flag + * disables trimming, keeping the full power-of-two block size. + */ +#define GPU_BUDDY_TRIM_DISABLE BIT(5) + +enum gpu_buddy_free_tree { + GPU_BUDDY_CLEAR_TREE = 0, + GPU_BUDDY_DIRTY_TREE, + GPU_BUDDY_MAX_FREE_TREES, +}; + +#define for_each_free_tree(tree) \ + for ((tree) = 0; (tree) < GPU_BUDDY_MAX_FREE_TREES; (tree)++) + +/** + * struct gpu_buddy_block - Block within a buddy allocator + * + * Each block in the buddy allocator is represented by this structure. + * Blocks are organized in a binary tree where each parent block can be + * split into two children (left and right buddies). The allocator manages + * blocks at various orders (power-of-2 sizes) from chunk_size up to the + * largest contiguous region. + * + * @private: Private data owned by the allocator user (e.g., driver-specific data) + * @link: List node for user ownership while block is allocated + */ +struct gpu_buddy_block { +/* private: */ + /* + * Header bit layout: + * - Bits 63:12: block offset within the address space + * - Bits 11:10: state (ALLOCATED, FREE, or SPLIT) + * - Bit 9: clear bit (1 if memory is zeroed) + * - Bits 8:6: reserved + * - Bits 5:0: order (log2 of size relative to chunk_size) + */ +#define GPU_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) +#define GPU_BUDDY_HEADER_STATE GENMASK_ULL(11, 10) +#define GPU_BUDDY_ALLOCATED (1 << 10) +#define GPU_BUDDY_FREE (2 << 10) +#define GPU_BUDDY_SPLIT (3 << 10) +#define GPU_BUDDY_HEADER_CLEAR GENMASK_ULL(9, 9) +/* Free to be used, if needed in the future */ +#define GPU_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6) +#define GPU_BUDDY_HEADER_ORDER GENMASK_ULL(5, 0) + u64 header; + + struct gpu_buddy_block *left; + struct gpu_buddy_block *right; + struct gpu_buddy_block *parent; +/* public: */ + void *private; /* owned by creator */ + + /* + * While the block is allocated by the user through gpu_buddy_alloc*, + * the user has ownership of the link, for example to maintain within + * a list, if so desired. As soon as the block is freed with + * gpu_buddy_free* ownership is given back to the mm. + */ + union { +/* private: */ + struct rb_node rb; +/* public: */ + struct list_head link; + }; +/* private: */ + struct list_head tmp_link; + unsigned int subtree_max_alignment; +}; + +/* Order-zero must be at least SZ_4K */ +#define GPU_BUDDY_MAX_ORDER (63 - 12) + +/** + * struct gpu_buddy - GPU binary buddy allocator + * + * The buddy allocator provides efficient power-of-two memory allocation + * with fast allocation and free operations. It is commonly used for GPU + * memory management where allocations can be split into power-of-two + * block sizes. + * + * Locking should be handled by the user; a simple mutex around + * gpu_buddy_alloc_blocks() and gpu_buddy_free_block()/gpu_buddy_free_list() + * should suffice. + * + * @n_roots: Number of root blocks in the roots array. + * @max_order: Maximum block order (log2 of largest block size / chunk_size). + * @chunk_size: Minimum allocation granularity in bytes. Must be at least SZ_4K. + * @size: Total size of the address space managed by this allocator in bytes. + * @avail: Total free space currently available for allocation in bytes. + * @clear_avail: Free space available in the clear tree (zeroed memory) in bytes. + * This is a subset of @avail. + */ +struct gpu_buddy { +/* private: */ + /* + * Array of red-black trees for free block management. + * Indexed as free_trees[clear/dirty][order] where: + * - Index 0 (GPU_BUDDY_CLEAR_TREE): blocks with zeroed content + * - Index 1 (GPU_BUDDY_DIRTY_TREE): blocks with unknown content + * Each tree holds free blocks of the corresponding order. + */ + struct rb_root **free_trees; + /* + * Array of root blocks representing the top-level blocks of the + * binary tree(s). Multiple roots exist when the total size is not + * a power of two, with each root being the largest power-of-two + * that fits in the remaining space. + */ + struct gpu_buddy_block **roots; +/* public: */ + unsigned int n_roots; + unsigned int max_order; + u64 chunk_size; + u64 size; + u64 avail; + u64 clear_avail; +}; + +static inline u64 +gpu_buddy_block_offset(const struct gpu_buddy_block *block) +{ + return block->header & GPU_BUDDY_HEADER_OFFSET; +} + +static inline unsigned int +gpu_buddy_block_order(struct gpu_buddy_block *block) +{ + return block->header & GPU_BUDDY_HEADER_ORDER; +} + +static inline bool +gpu_buddy_block_is_free(struct gpu_buddy_block *block) +{ + return (block->header & GPU_BUDDY_HEADER_STATE) == GPU_BUDDY_FREE; +} + +static inline bool +gpu_buddy_block_is_clear(struct gpu_buddy_block *block) +{ + return block->header & GPU_BUDDY_HEADER_CLEAR; +} + +static inline u64 +gpu_buddy_block_size(struct gpu_buddy *mm, + struct gpu_buddy_block *block) +{ + return mm->chunk_size << gpu_buddy_block_order(block); +} + +int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size); + +void gpu_buddy_fini(struct gpu_buddy *mm); + +int gpu_buddy_alloc_blocks(struct gpu_buddy *mm, + u64 start, u64 end, u64 size, + u64 min_page_size, + struct list_head *blocks, + unsigned long flags); + +int gpu_buddy_block_trim(struct gpu_buddy *mm, + u64 *start, + u64 new_size, + struct list_head *blocks); + +void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear); + +void gpu_buddy_free_block(struct gpu_buddy *mm, struct gpu_buddy_block *block); + +void gpu_buddy_free_list(struct gpu_buddy *mm, + struct list_head *objects, + unsigned int flags); + +void gpu_buddy_print(struct gpu_buddy *mm); +void gpu_buddy_block_print(struct gpu_buddy *mm, + struct gpu_buddy_block *block); +#endif diff --git a/include/linux/hfs_common.h b/include/linux/hfs_common.h index dadb5e0aa8a3..07dfc39630ab 100644 --- a/include/linux/hfs_common.h +++ b/include/linux/hfs_common.h @@ -166,6 +166,11 @@ struct hfsplus_attr_unistr { hfsplus_unichr unicode[HFSPLUS_ATTR_MAX_STRLEN]; } __packed; +enum { + HFS_REGULAR_NAME, + HFS_XATTR_NAME, +}; + struct hfs_extent { __be16 block; __be16 count; @@ -510,7 +515,11 @@ struct hfs_btree_header_rec { #define HFSPLUS_NODE_MXSZ 32768 #define HFSPLUS_ATTR_TREE_NODE_SIZE 8192 #define HFSPLUS_BTREE_HDR_NODE_RECS_COUNT 3 +#define HFSPLUS_BTREE_HDR_MAP_REC_INDEX 2 /* Map (bitmap) record in Header node */ +#define HFSPLUS_BTREE_MAP_NODE_REC_INDEX 0 /* Map record in Map Node */ #define HFSPLUS_BTREE_HDR_USER_BYTES 128 +#define HFSPLUS_BTREE_MAP_NODE_RECS_COUNT 2 +#define HFSPLUS_BTREE_MAP_NODE_RESERVED_BYTES 2 /* btree key type */ #define HFSPLUS_KEY_CASEFOLDING 0xCF /* case-insensitive */ diff --git a/include/linux/hid.h b/include/linux/hid.h index dce862cafbbd..442a80d79e89 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -634,6 +634,38 @@ enum hid_battery_status { HID_BATTERY_REPORTED, /* Device sent unsolicited battery strength report */ }; +/** + * struct hid_battery - represents a single battery power supply + * @dev: pointer to the parent hid_device + * @ps: the power supply instance + * @min: minimum battery value from HID descriptor + * @max: maximum battery value from HID descriptor + * @report_type: HID report type (input/feature) + * @report_id: HID report ID for this battery + * @charge_status: current charging status + * @status: battery reporting status + * @capacity: current battery capacity (0-100) + * @avoid_query: if true, avoid querying battery (e.g., for stylus) + * @present: if true, battery is present (may be dynamic) + * @ratelimit_time: rate limiting for battery reports + * @list: list node for linking into hid_device's battery list + */ +struct hid_battery { + struct hid_device *dev; + struct power_supply *ps; + __s32 min; + __s32 max; + __s32 report_type; + __s32 report_id; + __s32 charge_status; + enum hid_battery_status status; + __s32 capacity; + bool avoid_query; + bool present; + ktime_t ratelimit_time; + struct list_head list; +}; + struct hid_driver; struct hid_ll_driver; @@ -670,19 +702,10 @@ struct hid_device { #ifdef CONFIG_HID_BATTERY_STRENGTH /* * Power supply information for HID devices which report - * battery strength. power_supply was successfully registered if - * battery is non-NULL. + * battery strength. Each battery is tracked separately in the + * batteries list. */ - struct power_supply *battery; - __s32 battery_capacity; - __s32 battery_min; - __s32 battery_max; - __s32 battery_report_type; - __s32 battery_report_id; - __s32 battery_charge_status; - enum hid_battery_status battery_status; - bool battery_avoid_query; - ktime_t battery_ratelimit_time; + struct list_head batteries; #endif unsigned long status; /* see STAT flags above */ @@ -698,6 +721,7 @@ struct hid_device { char name[128]; /* Device name */ char phys[64]; /* Device physical location */ char uniq[64]; /* Device unique identifier (serial #) */ + u64 firmware_version; /* Firmware version */ void *driver_data; @@ -743,6 +767,15 @@ static inline void hid_set_drvdata(struct hid_device *hdev, void *data) dev_set_drvdata(&hdev->dev, data); } +#ifdef CONFIG_HID_BATTERY_STRENGTH +static inline struct hid_battery *hid_get_battery(struct hid_device *hdev) +{ + if (list_empty(&hdev->batteries)) + return NULL; + return list_first_entry(&hdev->batteries, struct hid_battery, list); +} +#endif + #define HID_GLOBAL_STACK_SIZE 4 #define HID_COLLECTION_STACK_SIZE 4 @@ -836,6 +869,12 @@ struct hid_usage_id { * raw_event and event should return negative on error, any other value will * pass the event on to .event() typically return 0 for success. * + * report_fixup must return a report descriptor pointer whose lifetime is at + * least that of the input rdesc. This is usually done by mutating the input + * rdesc and returning it or a sub-portion of it. In case a new buffer is + * allocated and returned, the implementation of report_fixup is responsible for + * freeing it later. + * * input_mapping shall return a negative value to completely ignore this usage * (e.g. doubled or invalid usage), zero to continue with parsing of this * usage by generic code (no special handling needed) or positive to skip diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 51a6dc2b97e9..a6268dc4f7cb 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -102,6 +102,12 @@ #define QM_MIG_REGION_SEL 0x100198 #define QM_MIG_REGION_EN BIT(0) +#define QM_MAX_CHANNEL_NUM 8 +#define QM_CHANNEL_USAGE_OFFSET 0x1100 +#define QM_MAX_DEV_USAGE 100 +#define QM_DEV_USAGE_RATE 100 +#define QM_CHANNEL_ADDR_INTRVL 0x4 + /* uacce mode of the driver */ #define UACCE_MODE_NOUACCE 0 /* don't use uacce */ #define UACCE_MODE_SVA 1 /* use uacce sva mode */ @@ -359,6 +365,11 @@ struct qm_rsv_buf { struct qm_dma qcdma; }; +struct qm_channel { + int channel_num; + const char *channel_name[QM_MAX_CHANNEL_NUM]; +}; + struct hisi_qm { enum qm_hw_ver ver; enum qm_fun_type fun_type; @@ -433,6 +444,7 @@ struct hisi_qm { struct qm_err_isolate isolate_data; struct hisi_qm_cap_tables cap_tables; + struct qm_channel channel_data; }; struct hisi_qp_status { @@ -546,8 +558,6 @@ int hisi_qm_init(struct hisi_qm *qm); void hisi_qm_uninit(struct hisi_qm *qm); int hisi_qm_start(struct hisi_qm *qm); int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r); -int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg); -void hisi_qm_stop_qp(struct hisi_qp *qp); int hisi_qp_send(struct hisi_qp *qp, const void *msg); void hisi_qm_debug_init(struct hisi_qm *qm); void hisi_qm_debug_regs_clear(struct hisi_qm *qm); diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 5e7a63143a4a..1f5f55917d1c 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -453,16 +453,6 @@ void host1x_client_unregister(struct host1x_client *client); int host1x_client_suspend(struct host1x_client *client); int host1x_client_resume(struct host1x_client *client); -struct tegra_mipi_device; - -struct tegra_mipi_device *tegra_mipi_request(struct device *device, - struct device_node *np); -void tegra_mipi_free(struct tegra_mipi_device *device); -int tegra_mipi_enable(struct tegra_mipi_device *device); -int tegra_mipi_disable(struct tegra_mipi_device *device); -int tegra_mipi_start_calibration(struct tegra_mipi_device *device); -int tegra_mipi_finish_calibration(struct tegra_mipi_device *device); - /* host1x memory contexts */ struct host1x_memory_context { diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 74adbd4e7003..9ced498fefaa 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -13,6 +13,7 @@ #define _LINUX_HRTIMER_H #include <linux/hrtimer_defs.h> +#include <linux/hrtimer_rearm.h> #include <linux/hrtimer_types.h> #include <linux/init.h> #include <linux/list.h> @@ -31,6 +32,13 @@ * soft irq context * HRTIMER_MODE_HARD - Timer callback function will be executed in * hard irq context even on PREEMPT_RT. + * HRTIMER_MODE_LAZY_REARM - Avoid reprogramming if the timer was the + * first expiring timer and is moved into the + * future. Special mode for the HRTICK timer to + * avoid extensive reprogramming of the hardware, + * which is expensive in virtual machines. Risks + * a pointless expiry, but that's better than + * reprogramming on every context switch, */ enum hrtimer_mode { HRTIMER_MODE_ABS = 0x00, @@ -38,6 +46,7 @@ enum hrtimer_mode { HRTIMER_MODE_PINNED = 0x02, HRTIMER_MODE_SOFT = 0x04, HRTIMER_MODE_HARD = 0x08, + HRTIMER_MODE_LAZY_REARM = 0x10, HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, @@ -55,33 +64,6 @@ enum hrtimer_mode { HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD, }; -/* - * Values to track state of the timer - * - * Possible states: - * - * 0x00 inactive - * 0x01 enqueued into rbtree - * - * The callback state is not part of the timer->state because clearing it would - * mean touching the timer after the callback, this makes it impossible to free - * the timer from the callback function. - * - * Therefore we track the callback state in: - * - * timer->base->cpu_base->running == timer - * - * On SMP it is possible to have a "callback function running and enqueued" - * status. It happens for example when a posix timer expired and the callback - * queued a signal. Between dropping the lock which protects the posix timer - * and reacquiring the base lock of the hrtimer, another CPU can deliver the - * signal and rearm the timer. - * - * All state transitions are protected by cpu_base->lock. - */ -#define HRTIMER_STATE_INACTIVE 0x00 -#define HRTIMER_STATE_ENQUEUED 0x01 - /** * struct hrtimer_sleeper - simple sleeper structure * @timer: embedded timer structure @@ -134,11 +116,6 @@ static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer) return timer->_softexpires; } -static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer) -{ - return ktime_to_ns(timer->node.expires); -} - ktime_t hrtimer_cb_get_time(const struct hrtimer *timer); static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) @@ -146,24 +123,23 @@ static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) return ktime_sub(timer->node.expires, hrtimer_cb_get_time(timer)); } -static inline int hrtimer_is_hres_active(struct hrtimer *timer) -{ - return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? - timer->base->cpu_base->hres_active : 0; -} - #ifdef CONFIG_HIGH_RES_TIMERS +extern unsigned int hrtimer_resolution; struct clock_event_device; extern void hrtimer_interrupt(struct clock_event_device *dev); -extern unsigned int hrtimer_resolution; +extern struct static_key_false hrtimer_highres_enabled_key; -#else +static inline bool hrtimer_highres_enabled(void) +{ + return static_branch_likely(&hrtimer_highres_enabled_key); +} +#else /* CONFIG_HIGH_RES_TIMERS */ #define hrtimer_resolution (unsigned int)LOW_RES_NSEC - -#endif +static inline bool hrtimer_highres_enabled(void) { return false; } +#endif /* !CONFIG_HIGH_RES_TIMERS */ static inline ktime_t __hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now) @@ -293,8 +269,8 @@ extern bool hrtimer_active(const struct hrtimer *timer); */ static inline bool hrtimer_is_queued(struct hrtimer *timer) { - /* The READ_ONCE pairs with the update functions of timer->state */ - return !!(READ_ONCE(timer->state) & HRTIMER_STATE_ENQUEUED); + /* The READ_ONCE pairs with the update functions of timer->is_queued */ + return READ_ONCE(timer->is_queued); } /* diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h index 02b010df6570..52ed9e46ff13 100644 --- a/include/linux/hrtimer_defs.h +++ b/include/linux/hrtimer_defs.h @@ -19,21 +19,23 @@ * timer to a base on another cpu. * @clockid: clock id for per_cpu support * @seq: seqcount around __run_hrtimer + * @expires_next: Absolute time of the next event in this clock base * @running: pointer to the currently running hrtimer * @active: red black tree root node for the active timers * @offset: offset of this clock to the monotonic base */ struct hrtimer_clock_base { - struct hrtimer_cpu_base *cpu_base; - unsigned int index; - clockid_t clockid; - seqcount_raw_spinlock_t seq; - struct hrtimer *running; - struct timerqueue_head active; - ktime_t offset; + struct hrtimer_cpu_base *cpu_base; + const unsigned int index; + const clockid_t clockid; + seqcount_raw_spinlock_t seq; + ktime_t expires_next; + struct hrtimer *running; + struct timerqueue_linked_head active; + ktime_t offset; } __hrtimer_clock_base_align; -enum hrtimer_base_type { +enum hrtimer_base_type { HRTIMER_BASE_MONOTONIC, HRTIMER_BASE_REALTIME, HRTIMER_BASE_BOOTTIME, @@ -42,37 +44,36 @@ enum hrtimer_base_type { HRTIMER_BASE_REALTIME_SOFT, HRTIMER_BASE_BOOTTIME_SOFT, HRTIMER_BASE_TAI_SOFT, - HRTIMER_MAX_CLOCK_BASES, + HRTIMER_MAX_CLOCK_BASES }; /** * struct hrtimer_cpu_base - the per cpu clock bases - * @lock: lock protecting the base and associated clock bases - * and timers - * @cpu: cpu number - * @active_bases: Bitfield to mark bases with active timers - * @clock_was_set_seq: Sequence counter of clock was set events - * @hres_active: State of high resolution mode - * @in_hrtirq: hrtimer_interrupt() is currently executing - * @hang_detected: The last hrtimer interrupt detected a hang - * @softirq_activated: displays, if the softirq is raised - update of softirq - * related settings is not required then. - * @nr_events: Total number of hrtimer interrupt events - * @nr_retries: Total number of hrtimer interrupt retries - * @nr_hangs: Total number of hrtimer interrupt hangs - * @max_hang_time: Maximum time spent in hrtimer_interrupt - * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are - * expired - * @online: CPU is online from an hrtimers point of view - * @timer_waiters: A hrtimer_cancel() invocation waits for the timer - * callback to finish. - * @expires_next: absolute time of the next event, is required for remote - * hrtimer enqueue; it is the total first expiry time (hard - * and soft hrtimer are taken into account) - * @next_timer: Pointer to the first expiring timer - * @softirq_expires_next: Time to check, if soft queues needs also to be expired - * @softirq_next_timer: Pointer to the first expiring softirq based timer - * @clock_base: array of clock bases for this cpu + * @lock: lock protecting the base and associated clock bases and timers + * @cpu: cpu number + * @active_bases: Bitfield to mark bases with active timers + * @clock_was_set_seq: Sequence counter of clock was set events + * @hres_active: State of high resolution mode + * @deferred_rearm: A deferred rearm is pending + * @deferred_needs_update: The deferred rearm must re-evaluate the first timer + * @hang_detected: The last hrtimer interrupt detected a hang + * @softirq_activated: displays, if the softirq is raised - update of softirq + * related settings is not required then. + * @nr_events: Total number of hrtimer interrupt events + * @nr_retries: Total number of hrtimer interrupt retries + * @nr_hangs: Total number of hrtimer interrupt hangs + * @max_hang_time: Maximum time spent in hrtimer_interrupt + * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are expired + * @online: CPU is online from an hrtimers point of view + * @timer_waiters: A hrtimer_cancel() waiters for the timer callback to finish. + * @expires_next: Absolute time of the next event, is required for remote + * hrtimer enqueue; it is the total first expiry time (hard + * and soft hrtimer are taken into account) + * @next_timer: Pointer to the first expiring timer + * @softirq_expires_next: Time to check, if soft queues needs also to be expired + * @softirq_next_timer: Pointer to the first expiring softirq based timer + * @deferred_expires_next: Cached expires next value for deferred rearm + * @clock_base: Array of clock bases for this cpu * * Note: next_timer is just an optimization for __remove_hrtimer(). * Do not dereference the pointer because it is not reliable on @@ -83,11 +84,12 @@ struct hrtimer_cpu_base { unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; - unsigned int hres_active : 1, - in_hrtirq : 1, - hang_detected : 1, - softirq_activated : 1, - online : 1; + bool hres_active; + bool deferred_rearm; + bool deferred_needs_update; + bool hang_detected; + bool softirq_activated; + bool online; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int nr_events; unsigned short nr_retries; @@ -102,6 +104,7 @@ struct hrtimer_cpu_base { struct hrtimer *next_timer; ktime_t softirq_expires_next; struct hrtimer *softirq_next_timer; + ktime_t deferred_expires_next; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; call_single_data_t csd; } ____cacheline_aligned; diff --git a/include/linux/hrtimer_rearm.h b/include/linux/hrtimer_rearm.h new file mode 100644 index 000000000000..a6f2e5d5e1c7 --- /dev/null +++ b/include/linux/hrtimer_rearm.h @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef _LINUX_HRTIMER_REARM_H +#define _LINUX_HRTIMER_REARM_H + +#ifdef CONFIG_HRTIMER_REARM_DEFERRED +#include <linux/thread_info.h> + +void __hrtimer_rearm_deferred(void); + +/* + * This is purely CPU local, so check the TIF bit first to avoid the overhead of + * the atomic test_and_clear_bit() operation for the common case where the bit + * is not set. + */ +static __always_inline bool hrtimer_test_and_clear_rearm_deferred_tif(unsigned long tif_work) +{ + lockdep_assert_irqs_disabled(); + + if (unlikely(tif_work & _TIF_HRTIMER_REARM)) { + clear_thread_flag(TIF_HRTIMER_REARM); + return true; + } + return false; +} + +#define TIF_REARM_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | _TIF_HRTIMER_REARM) + +/* Invoked from the exit to user before invoking exit_to_user_mode_loop() */ +static __always_inline bool +hrtimer_rearm_deferred_user_irq(unsigned long *tif_work, const unsigned long tif_mask) +{ + /* Help the compiler to optimize the function out for syscall returns */ + if (!(tif_mask & _TIF_HRTIMER_REARM)) + return false; + /* + * Rearm the timer if none of the resched flags is set before going into + * the loop which re-enables interrupts. + */ + if (unlikely((*tif_work & TIF_REARM_MASK) == _TIF_HRTIMER_REARM)) { + clear_thread_flag(TIF_HRTIMER_REARM); + __hrtimer_rearm_deferred(); + /* Don't go into the loop if HRTIMER_REARM was the only flag */ + *tif_work &= ~TIF_HRTIMER_REARM; + return !*tif_work; + } + return false; +} + +/* Invoked from the time slice extension decision function */ +static __always_inline void hrtimer_rearm_deferred_tif(unsigned long tif_work) +{ + if (hrtimer_test_and_clear_rearm_deferred_tif(tif_work)) + __hrtimer_rearm_deferred(); +} + +/* + * This is to be called on all irqentry_exit() paths that will enable + * interrupts. + */ +static __always_inline void hrtimer_rearm_deferred(void) +{ + hrtimer_rearm_deferred_tif(read_thread_flags()); +} + +/* + * Invoked from the scheduler on entry to __schedule() so it can defer + * rearming after the load balancing callbacks which might change hrtick. + */ +static __always_inline bool hrtimer_test_and_clear_rearm_deferred(void) +{ + return hrtimer_test_and_clear_rearm_deferred_tif(read_thread_flags()); +} + +#else /* CONFIG_HRTIMER_REARM_DEFERRED */ +static __always_inline void __hrtimer_rearm_deferred(void) { } +static __always_inline void hrtimer_rearm_deferred(void) { } +static __always_inline void hrtimer_rearm_deferred_tif(unsigned long tif_work) { } +static __always_inline bool +hrtimer_rearm_deferred_user_irq(unsigned long *tif_work, const unsigned long tif_mask) { return false; } +static __always_inline bool hrtimer_test_and_clear_rearm_deferred(void) { return false; } +#endif /* !CONFIG_HRTIMER_REARM_DEFERRED */ + +#endif diff --git a/include/linux/hrtimer_types.h b/include/linux/hrtimer_types.h index 8fbbb6bdf7a1..b5dacc8271a4 100644 --- a/include/linux/hrtimer_types.h +++ b/include/linux/hrtimer_types.h @@ -17,7 +17,7 @@ enum hrtimer_restart { /** * struct hrtimer - the basic hrtimer structure - * @node: timerqueue node, which also manages node.expires, + * @node: Linked timerqueue node, which also manages node.expires, * the absolute expiry time in the hrtimers internal * representation. The time is related to the clock on * which the timer is based. Is setup by adding @@ -28,23 +28,26 @@ enum hrtimer_restart { * was armed. * @function: timer expiry callback function * @base: pointer to the timer base (per cpu and per clock) - * @state: state information (See bit values above) + * @is_queued: Indicates whether a timer is enqueued or not * @is_rel: Set if the timer was armed relative * @is_soft: Set if hrtimer will be expired in soft interrupt context. * @is_hard: Set if hrtimer will be expired in hard interrupt context * even on RT. + * @is_lazy: Set if the timer is frequently rearmed to avoid updates + * of the clock event device * * The hrtimer structure must be initialized by hrtimer_setup() */ struct hrtimer { - struct timerqueue_node node; + struct timerqueue_linked_node node; + struct hrtimer_clock_base *base; + bool is_queued; + bool is_rel; + bool is_soft; + bool is_hard; + bool is_lazy; ktime_t _softexpires; enum hrtimer_restart (*__private function)(struct hrtimer *); - struct hrtimer_clock_base *base; - u8 state; - u8 is_rel; - u8 is_soft; - u8 is_hard; }; #endif /* _LINUX_HRTIMER_TYPES_H */ diff --git a/include/linux/hsi/hsi.h b/include/linux/hsi/hsi.h index 6ca92bff02c6..ea6bef9b6012 100644 --- a/include/linux/hsi/hsi.h +++ b/include/linux/hsi/hsi.h @@ -271,7 +271,7 @@ struct hsi_controller { struct module *owner; unsigned int id; unsigned int num_ports; - struct hsi_port **port; + struct hsi_port *port[] __counted_by(num_ports); }; #define to_hsi_controller(dev) container_of(dev, struct hsi_controller, device) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index a4d9f964dfde..2949e5acff35 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -27,8 +27,8 @@ static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud) vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf); bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long next); -int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr); +bool zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, + unsigned long addr); int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud, unsigned long addr); bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, @@ -83,7 +83,7 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr; * file is never split and the MAX_PAGECACHE_ORDER limit does not apply to * it. Same to PFNMAPs where there's neither page* nor pagecache. */ -#define THP_ORDERS_ALL_SPECIAL \ +#define THP_ORDERS_ALL_SPECIAL_DAX \ (BIT(PMD_ORDER) | BIT(PUD_ORDER)) #define THP_ORDERS_ALL_FILE_DEFAULT \ ((BIT(MAX_PAGECACHE_ORDER + 1) - 1) & ~BIT(0)) @@ -92,7 +92,7 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr; * Mask of all large folio orders supported for THP. */ #define THP_ORDERS_ALL \ - (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_SPECIAL | THP_ORDERS_ALL_FILE_DEFAULT) + (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_SPECIAL_DAX | THP_ORDERS_ALL_FILE_DEFAULT) enum tva_type { TVA_SMAPS, /* Exposing "THPeligible:" in smaps. */ @@ -771,6 +771,11 @@ static inline bool pmd_is_huge(pmd_t pmd) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +static inline bool is_pmd_order(unsigned int order) +{ + return order == HPAGE_PMD_ORDER; +} + static inline int split_folio_to_list_to_order(struct folio *folio, struct list_head *list, int new_order) { diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 65910437be1c..93418625d3c5 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -518,6 +518,7 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) struct hugetlbfs_inode_info { struct inode vfs_inode; + struct resv_map *resv_map; unsigned int seals; }; @@ -777,10 +778,6 @@ static inline unsigned long huge_page_size(const struct hstate *h) return (unsigned long)PAGE_SIZE << h->order; } -extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma); - -extern unsigned long vma_mmu_pagesize(struct vm_area_struct *vma); - static inline unsigned long huge_page_mask(struct hstate *h) { return h->mask; @@ -796,6 +793,23 @@ static inline unsigned huge_page_shift(struct hstate *h) return h->order + PAGE_SHIFT; } +/** + * hugetlb_linear_page_index() - linear_page_index() but in hugetlb + * page size granularity. + * @vma: the hugetlb VMA + * @address: the virtual address within the VMA + * + * Return: the page offset within the mapping in huge page units. + */ +static inline pgoff_t hugetlb_linear_page_index(struct vm_area_struct *vma, + unsigned long address) +{ + struct hstate *h = hstate_vma(vma); + + return ((address - vma->vm_start) >> huge_page_shift(h)) + + (vma->vm_pgoff >> huge_page_order(h)); +} + static inline bool order_is_gigantic(unsigned int order) { return order > MAX_PAGE_ORDER; @@ -1177,16 +1191,6 @@ static inline unsigned long huge_page_mask(struct hstate *h) return PAGE_MASK; } -static inline unsigned long vma_kernel_pagesize(struct vm_area_struct *vma) -{ - return PAGE_SIZE; -} - -static inline unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) -{ - return PAGE_SIZE; -} - static inline unsigned int huge_page_order(struct hstate *h) { return 0; diff --git a/include/linux/hugetlb_inline.h b/include/linux/hugetlb_inline.h index 593f5d4e108b..565b473fd135 100644 --- a/include/linux/hugetlb_inline.h +++ b/include/linux/hugetlb_inline.h @@ -13,7 +13,7 @@ static inline bool is_vm_hugetlb_flags(vm_flags_t vm_flags) static inline bool is_vma_hugetlb_flags(const vma_flags_t *flags) { - return vma_flags_test(flags, VMA_HUGETLB_BIT); + return vma_flags_test_any(flags, VMA_HUGETLB_BIT); } #else @@ -30,7 +30,7 @@ static inline bool is_vma_hugetlb_flags(const vma_flags_t *flags) #endif -static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma) +static inline bool is_vm_hugetlb_page(const struct vm_area_struct *vma) { return is_vm_hugetlb_flags(vma->vm_flags); } diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index b77bc55a4cf3..1d3c1927986e 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -46,7 +46,7 @@ struct hwrng { unsigned long priv; unsigned short quality; - /* internal. */ + /* private: internal. */ struct list_head list; struct kref ref; struct work_struct cleanup_work; diff --git a/include/linux/hwspinlock.h b/include/linux/hwspinlock.h index f35b42e8c5de..74b91244fe0e 100644 --- a/include/linux/hwspinlock.h +++ b/include/linux/hwspinlock.h @@ -25,34 +25,6 @@ struct hwspinlock; struct hwspinlock_device; struct hwspinlock_ops; -/** - * struct hwspinlock_pdata - platform data for hwspinlock drivers - * @base_id: base id for this hwspinlock device - * - * hwspinlock devices provide system-wide hardware locks that are used - * by remote processors that have no other way to achieve synchronization. - * - * To achieve that, each physical lock must have a system-wide id number - * that is agreed upon, otherwise remote processors can't possibly assume - * they're using the same hardware lock. - * - * Usually boards have a single hwspinlock device, which provides several - * hwspinlocks, and in this case, they can be trivially numbered 0 to - * (num-of-locks - 1). - * - * In case boards have several hwspinlocks devices, a different base id - * should be used for each hwspinlock device (they can't all use 0 as - * a starting id!). - * - * This platform data structure should be used to provide the base id - * for each device (which is trivially 0 when only a single hwspinlock - * device exists). It can be shared between different platforms, hence - * its location. - */ -struct hwspinlock_pdata { - int base_id; -}; - #ifdef CONFIG_HWSPINLOCK int hwspin_lock_register(struct hwspinlock_device *bank, struct device *dev, diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index dfc516c1c719..a26fb8e7cedf 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1015,8 +1015,8 @@ struct vmbus_channel { /* The max size of a packet on this channel */ u32 max_pkt_size; - /* function to mmap ring buffer memory to the channel's sysfs ring attribute */ - int (*mmap_ring_buffer)(struct vmbus_channel *channel, struct vm_area_struct *vma); + /* function to mmap ring buffer memory to the channel's sysfs ring attribute */ + int (*mmap_prepare_ring_buffer)(struct vmbus_channel *channel, struct vm_area_desc *desc); /* boolean to control visibility of sysfs for ring buffer */ bool ring_sysfs_visible; diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index e3b3b0fa2a8f..2bd9f2157e6c 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -15,38 +15,13 @@ static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb) #if IS_ENABLED(CONFIG_IPV6) -typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info, - const struct in6_addr *force_saddr, - const struct inet6_skb_parm *parm); void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct in6_addr *force_saddr, const struct inet6_skb_parm *parm); -#if IS_BUILTIN(CONFIG_IPV6) -static inline void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, - const struct inet6_skb_parm *parm) -{ - icmp6_send(skb, type, code, info, NULL, parm); -} -static inline int inet6_register_icmp_sender(ip6_icmp_send_t *fn) -{ - BUILD_BUG_ON(fn != icmp6_send); - return 0; -} -static inline int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn) -{ - BUILD_BUG_ON(fn != icmp6_send); - return 0; -} -#else -extern void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, - const struct inet6_skb_parm *parm); -extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn); -extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn); -#endif static inline void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) { - __icmpv6_send(skb, type, code, info, IP6CB(skb)); + icmp6_send(skb, type, code, info, NULL, IP6CB(skb)); } int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, @@ -58,7 +33,7 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info); static inline void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) { struct inet6_skb_parm parm = { 0 }; - __icmpv6_send(skb_in, type, code, info, &parm); + icmp6_send(skb_in, type, code, info, NULL, &parm); } #endif diff --git a/include/linux/ieee80211-eht.h b/include/linux/ieee80211-eht.h index f8e9f5d36d2a..a97b1d01f3ac 100644 --- a/include/linux/ieee80211-eht.h +++ b/include/linux/ieee80211-eht.h @@ -251,8 +251,8 @@ struct ieee80211_eht_operation_info { #define IEEE80211_EHT_PHY_CAP5_SUPP_EXTRA_EHT_LTF 0x40 #define IEEE80211_EHT_PHY_CAP6_MAX_NUM_SUPP_EHT_LTF_MASK 0x07 -#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_80MHZ 0x08 -#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_160MHZ 0x30 +#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_80MHZ 0x10 +#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_160MHZ 0x20 #define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_320MHZ 0x40 #define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_MASK 0x78 #define IEEE80211_EHT_PHY_CAP6_EHT_DUP_6GHZ_SUPP 0x80 diff --git a/include/linux/ieee80211-ht.h b/include/linux/ieee80211-ht.h index 21bbf470540f..7612b72f9c7c 100644 --- a/include/linux/ieee80211-ht.h +++ b/include/linux/ieee80211-ht.h @@ -281,6 +281,9 @@ enum ieee80211_back_actioncode { WLAN_ACTION_ADDBA_REQ = 0, WLAN_ACTION_ADDBA_RESP = 1, WLAN_ACTION_DELBA = 2, + WLAN_ACTION_NDP_ADDBA_REQ = 128, + WLAN_ACTION_NDP_ADDBA_RESP = 129, + WLAN_ACTION_NDP_DELBA = 130, }; /* BACK (block-ack) parties */ diff --git a/include/linux/ieee80211-nan.h b/include/linux/ieee80211-nan.h index d07959bf8a90..455033955e54 100644 --- a/include/linux/ieee80211-nan.h +++ b/include/linux/ieee80211-nan.h @@ -9,7 +9,7 @@ * Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net> * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH * Copyright (c) 2016 - 2017 Intel Deutschland GmbH - * Copyright (c) 2018 - 2025 Intel Corporation + * Copyright (c) 2018 - 2026 Intel Corporation */ #ifndef LINUX_IEEE80211_NAN_H @@ -23,6 +23,11 @@ #define NAN_OP_MODE_160MHZ 0x04 #define NAN_OP_MODE_PNDL_SUPPRTED 0x08 +#define NAN_DEV_CAPA_NUM_TX_ANT_POS 0 +#define NAN_DEV_CAPA_NUM_TX_ANT_MASK 0x0f +#define NAN_DEV_CAPA_NUM_RX_ANT_POS 4 +#define NAN_DEV_CAPA_NUM_RX_ANT_MASK 0xf0 + /* NAN Device capabilities, as defined in Wi-Fi Aware (TM) specification * Table 79 */ @@ -32,4 +37,41 @@ #define NAN_DEV_CAPA_NDPE_SUPPORTED 0x08 #define NAN_DEV_CAPA_S3_SUPPORTED 0x10 +/* NAN attributes, as defined in Wi-Fi Aware (TM) specification 4.0 Table 42 */ +#define NAN_ATTR_MASTER_INDICATION 0x00 +#define NAN_ATTR_CLUSTER_INFO 0x01 + +struct ieee80211_nan_attr { + u8 attr; + __le16 length; + u8 data[]; +} __packed; + +struct ieee80211_nan_master_indication { + u8 master_pref; + u8 random_factor; +} __packed; + +struct ieee80211_nan_anchor_master_info { + union { + __le64 master_rank; + struct { + u8 master_addr[ETH_ALEN]; + u8 random_factor; + u8 master_pref; + } __packed; + } __packed; + u8 hop_count; + __le32 ambtt; +} __packed; + +#define for_each_nan_attr(_attr, _data, _datalen) \ + for (_attr = (const struct ieee80211_nan_attr *)(_data); \ + (const u8 *)(_data) + (_datalen) - (const u8 *)_attr >= \ + (int)sizeof(*_attr) && \ + (const u8 *)(_data) + (_datalen) - (const u8 *)_attr >= \ + (int)sizeof(*_attr) + le16_to_cpu(_attr->length); \ + _attr = (const struct ieee80211_nan_attr *) \ + (_attr->data + le16_to_cpu(_attr->length))) + #endif /* LINUX_IEEE80211_NAN_H */ diff --git a/include/linux/ieee80211-uhr.h b/include/linux/ieee80211-uhr.h index 132acced7d79..d199f3ebdba0 100644 --- a/include/linux/ieee80211-uhr.h +++ b/include/linux/ieee80211-uhr.h @@ -12,8 +12,8 @@ #define IEEE80211_UHR_OPER_PARAMS_DPS_ENA 0x0001 #define IEEE80211_UHR_OPER_PARAMS_NPCA_ENA 0x0002 -#define IEEE80211_UHR_OPER_PARAMS_DBE_ENA 0x0004 -#define IEEE80211_UHR_OPER_PARAMS_PEDCA_ENA 0x0008 +#define IEEE80211_UHR_OPER_PARAMS_PEDCA_ENA 0x0004 +#define IEEE80211_UHR_OPER_PARAMS_DBE_ENA 0x0008 struct ieee80211_uhr_operation { __le16 params; @@ -29,11 +29,216 @@ struct ieee80211_uhr_operation { #define IEEE80211_UHR_NPCA_PARAMS_MOPLEN 0x00400000 #define IEEE80211_UHR_NPCA_PARAMS_DIS_SUBCH_BMAP_PRES 0x00800000 +/** + * struct ieee80211_uhr_npca_info - npca operation information + * + * This structure is the "NPCA Operation Parameters field format" of "UHR + * Operation Element" fields as described in P802.11bn_D1.3 + * subclause 9.4.2.353. See Figure 9-aa4. + * + * Refer to IEEE80211_UHR_NPCA* + * @params: + * NPCA Primary Channel - NPCA primary channel + * NPCA_Min Duration Threshold - Minimum duration of inter-BSS activity + * NPCA Switching Delay - + * Time needed by an NPCA AP to switch from the + * BSS primary channel to the NPCA primary channel + * in the unit of 4 µs. + * NPCA Switching Back Delay - + * Time to switch from the NPCA primary channel + * to the BSS primary channel in the unit of 4 µs. + * NPCA Initial QSRC - + * Initialize the EDCAF QSRC[AC] variables + * when an NPCA STA in the BSS + * switches to NPCA operation. + * NPCA MOPLEN - + * Indicates which conditions can be used to + * initiate an NPCA operation, + * 1 -> both PHYLEN NPCA operation and MOPLEN + * NPCA operation are + * permitted in the BSS + * 0 -> only PHYLEN NPCA operation is allowed in the BSS. + * NPCA Disabled Subchannel Bitmap Present - + * Indicates whether the NPCA Disabled Subchannel + * Bitmap field is present. A 1 in this field indicates that + * the NPCA Disabled Subchannel Bitmap field is present + * @dis_subch_bmap: + * A bit in the bitmap that lies within the BSS bandwidth is set + * to 1 to indicate that the corresponding 20 MHz subchannel is + * punctured and is set to 0 to indicate that the corresponding + * 20 MHz subchannel is not punctured. A bit in the bitmap that + * falls outside of the BSS bandwidth is reserved. This field is + * present when the value of the NPCA Disabled Subchannel Bitmap + * Field Present field is equal to 1, and not present, otherwise + */ struct ieee80211_uhr_npca_info { __le32 params; __le16 dis_subch_bmap[]; } __packed; +#define IEEE80211_UHR_DPS_PADDING_DELAY 0x0000003F +#define IEEE80211_UHR_DPS_TRANSITION_DELAY 0x00003F00 +#define IEEE80211_UHR_DPS_ICF_REQUIRED 0x00010000 +#define IEEE80211_UHR_DPS_PARAMETERIZED_FLAG 0x00020000 +#define IEEE80211_UHR_DPS_LC_MODE_BW 0x001C0000 +#define IEEE80211_UHR_DPS_LC_MODE_NSS 0x01E00000 +#define IEEE80211_UHR_DPS_LC_MODE_MCS 0x1E000000 +#define IEEE80211_UHR_DPS_MOBILE_AP_DPS_STATIC_HCM 0x20000000 + +/** + * struct ieee80211_uhr_dps_info - DPS operation information + * + * This structure is the "DPS Operation Parameter field" of "UHR + * Operation Element" fields as described in P802.11bn_D1.3 + * subclause 9.4.1.87. See Figure 9-207u. + * + * Refer to IEEE80211_UHR_DPS* + * @params: + * DPS Padding Delay - + * Indicates the minimum MAC padding + * duration that is required by a DPS STA + * in an ICF to cause the STA to transition + * from the lower capability mode to the + * higher capability mode. The DPS Padding + * Delay field is in units of 4 µs. + * DPS Transition Delay - + * Indicates the amount of time required by a + * DPS STA to transition from the higher + * capability mode to the lower capability + * mode. The DPS Transition Delay field is in + * units of 4 µs. + * ICF Required - + * Indicates when the DPS assisting STA needs + * to transmit an ICF frame to the peer DPS STA + * before performing the frame exchanges with + * the peer DPS STA in a TXOP. + * 1 -> indicates that the transmission of the + * ICF frame to the peer DPS STA prior to + * any frame exchange is needed. + * 0 -> ICF transmission before the frame + * exchanges with the peer DPS STA is only + * needed if the frame exchange is performed + * in the HC mode. + * Parameterized Flag - + * 0 -> indicates that only 20 MHz, 1 SS, + * non-HT PPDU format with the data + * rate of 6, 12, and 24 Mb/s as the + * default mode are supported by the + * DPS STA in the LC mode + * 1 -> indicates that a bandwidth up to the + * bandwidth indicated in the LC Mode + * Bandwidth field, a number of spatial + * streams up to the NSS indicated in + * the LC Mode Nss field, and an MCS up + * to the MCS indicated in the LC Mode + * MCS fields are supported by the DPS + * STA in the LC mode as the + * parameterized mode. + * LC Mode Bandwidth - + * Indicates the maximum bandwidth supported + * by the STA in the LC mode. + * LC Mode NSS - + * Indicates the maximum number of the spatial + * streams supported by the STA in the LC mode. + * LC Mode MCS - + * Indicates the highest MCS supported by the STA + * in the LC mode. + * Mobile AP DPS Static HCM - + * 1 -> indicates that it will remain in the DPS high + * capability mode until the next TBTT on that + * link. + * 0 -> otherwise. + */ +struct ieee80211_uhr_dps_info { + __le32 params; +} __packed; + +#define IEEE80211_UHR_DBE_OPER_BANDWIDTH 0x07 +#define IEEE80211_UHR_DBE_OPER_DIS_SUBCHANNEL_BITMAP_PRES 0x08 + +/** + * enum ieee80211_uhr_dbe_oper_bw - DBE Operational Bandwidth + * + * Encoding for the DBE Operational Bandwidth field in the UHR Operation + * element (DBE Operation Parameters). + * + * @IEEE80211_UHR_DBE_OPER_BW_40: 40 MHz operational DBE bandwidth + * @IEEE80211_UHR_DBE_OPER_BW_80: 80 MHz operational DBE bandwidth + * @IEEE80211_UHR_DBE_OPER_BW_160: 160 MHz operational DBE bandwidth + * @IEEE80211_UHR_DBE_OPER_BW_320_1: 320-1 MHz operational DBE bandwidth + * @IEEE80211_UHR_DBE_OPER_BW_320_2: 320-2 MHz operational DBE bandwidth + */ +enum ieee80211_uhr_dbe_oper_bw { + IEEE80211_UHR_DBE_OPER_BW_40 = 1, + IEEE80211_UHR_DBE_OPER_BW_80 = 2, + IEEE80211_UHR_DBE_OPER_BW_160 = 3, + IEEE80211_UHR_DBE_OPER_BW_320_1 = 4, + IEEE80211_UHR_DBE_OPER_BW_320_2 = 5, +}; + +/** + * struct ieee80211_uhr_dbe_info - DBE operation information + * + * This structure is the "DBE Operation Parameters field" of + * "UHR Operation Element" fields as described in P802.11bn_D1.3 + * subclause 9.4.2.353. See Figure 9-aa6. + * + * Refer to IEEE80211_UHR_DBE_OPER* + * @params: + * B0-B2 - DBE Operational Bandwidth field, see + * "enum ieee80211_uhr_dbe_oper_bw" for values. + * Value 0 is reserved. + * Value 1 indicates 40 MHz operational DBE bandwidth. + * Value 2 indicates 80 MHz operational DBE bandwidth. + * Value 3 indicates 160 MHz operational DBE bandwidth. + * Value 4 indicates 320-1 MHz operational DBE bandwidth. + * Value 5 indicates 320-2 MHz operational DBE bandwidth. + * Values 6 to 7 are reserved. + * B3 - DBE Disabled Subchannel Bitmap Present. + * @dis_subch_bmap: DBE Disabled Subchannel Bitmap field is set to indicate + * disabled 20 MHz subchannels within the DBE Bandwidth. + */ +struct ieee80211_uhr_dbe_info { + u8 params; + __le16 dis_subch_bmap[]; +} __packed; + +#define IEEE80211_UHR_P_EDCA_ECWMIN 0x0F +#define IEEE80211_UHR_P_EDCA_ECWMAX 0xF0 +#define IEEE80211_UHR_P_EDCA_AIFSN 0x000F +#define IEEE80211_UHR_P_EDCA_CW_DS 0x0030 +#define IEEE80211_UHR_P_EDCA_PSRC_THRESHOLD 0x01C0 +#define IEEE80211_UHR_P_EDCA_QSRC_THRESHOLD 0x0600 + +/** + * struct ieee80211_uhr_p_edca_info - P-EDCA operation information + * + * This structure is the "P-EDCA Operation Parameters field" of + * "UHR Operation Element" fields as described in P802.11bn_D1.3 + * subclause 9.4.2.353. See Figure 9-aa5. + * + * Refer to IEEE80211_UHR_P_EDCA* + * @p_edca_ec: P-EDCA ECWmin and ECWmax. + * These fields indicate the CWmin and CWmax values used by a + * P-EDCA STA during P-EDCA contention. + * @params: AIFSN, CW DS, PSRC threshold, and QSRC threshold. + * - The AIFSN field indicates the AIFSN value used by a P-EDCA STA + * during P-EDCA contention. + * - The CW DS field indicates the value used for randomization of the + * transmission slot of the DS-CTS frame. The value 3 is reserved. + * The value 0 indicates that randomization is not enabled. + * - The P-EDCA PSRC threshold field indicates the maximum number of + * allowed consecutive DS-CTS transmissions. The value 0 and values + * greater than 4 are reserved. + * - The P-EDCA QSRC threshold field indicates the value of the + * QSRC[AC_VO] counter required to start P-EDCA contention. The + * value 0 is reserved. + */ +struct ieee80211_uhr_p_edca_info { + u8 p_edca_ec; + __le16 params; +} __packed; + static inline bool ieee80211_uhr_oper_size_ok(const u8 *data, u8 len, bool beacon) { @@ -47,19 +252,52 @@ static inline bool ieee80211_uhr_oper_size_ok(const u8 *data, u8 len, if (beacon) return true; - /* FIXME: DPS, DBE, P-EDCA (consider order, also relative to NPCA) */ + /* DPS Operation Parameters (fixed 4 bytes) */ + if (oper->params & cpu_to_le16(IEEE80211_UHR_OPER_PARAMS_DPS_ENA)) { + needed += sizeof(struct ieee80211_uhr_dps_info); + if (len < needed) + return false; + } + /* NPCA Operation Parameters (fixed 4 bytes + optional 2 bytes) */ if (oper->params & cpu_to_le16(IEEE80211_UHR_OPER_PARAMS_NPCA_ENA)) { const struct ieee80211_uhr_npca_info *npca = - (const void *)oper->variable; + (const void *)(data + needed); needed += sizeof(*npca); - if (len < needed) return false; - if (npca->params & cpu_to_le32(IEEE80211_UHR_NPCA_PARAMS_DIS_SUBCH_BMAP_PRES)) + if (npca->params & + cpu_to_le32(IEEE80211_UHR_NPCA_PARAMS_DIS_SUBCH_BMAP_PRES)) { needed += sizeof(npca->dis_subch_bmap[0]); + if (len < needed) + return false; + } + } + + /* P-EDCA Operation Parameters (fixed 3 bytes) */ + if (oper->params & cpu_to_le16(IEEE80211_UHR_OPER_PARAMS_PEDCA_ENA)) { + needed += sizeof(struct ieee80211_uhr_p_edca_info); + if (len < needed) + return false; + } + + /* DBE Operation Parameters (fixed 1 byte + optional 2 bytes) */ + if (oper->params & cpu_to_le16(IEEE80211_UHR_OPER_PARAMS_DBE_ENA)) { + const struct ieee80211_uhr_dbe_info *dbe = + (const void *)(data + needed); + + needed += sizeof(*dbe); + if (len < needed) + return false; + + if (dbe->params & + IEEE80211_UHR_DBE_OPER_DIS_SUBCHANNEL_BITMAP_PRES) { + needed += sizeof(dbe->dis_subch_bmap[0]); + if (len < needed) + return false; + } } return len >= needed; @@ -72,12 +310,15 @@ static inline bool ieee80211_uhr_oper_size_ok(const u8 *data, u8 len, static inline const struct ieee80211_uhr_npca_info * ieee80211_uhr_npca_info(const struct ieee80211_uhr_operation *oper) { + const u8 *pos = oper->variable; + if (!(oper->params & cpu_to_le16(IEEE80211_UHR_OPER_PARAMS_NPCA_ENA))) return NULL; - /* FIXME: DPS */ + if (oper->params & cpu_to_le16(IEEE80211_UHR_OPER_PARAMS_DPS_ENA)) + pos += sizeof(struct ieee80211_uhr_dps_info); - return (const void *)oper->variable; + return (const void *)pos; } static inline const __le16 * @@ -131,6 +372,24 @@ ieee80211_uhr_npca_dis_subch_bitmap(const struct ieee80211_uhr_operation *oper) #define IEEE80211_UHR_MAC_CAP_DBE_EHT_MCS_MAP_160_PRES 0x08 #define IEEE80211_UHR_MAC_CAP_DBE_EHT_MCS_MAP_320_PRES 0x10 +/** + * enum ieee80211_uhr_dbe_max_supported_bw - DBE Maximum Supported Bandwidth + * + * As per spec P802.11bn_D1.3 "Table 9-bb5—Encoding of the DBE Maximum + * Supported Bandwidth field". + * + * @IEEE80211_UHR_DBE_MAX_BW_40: Indicates 40 MHz DBE max supported bw + * @IEEE80211_UHR_DBE_MAX_BW_80: Indicates 80 MHz DBE max supported bw + * @IEEE80211_UHR_DBE_MAX_BW_160: Indicates 160 MHz DBE max supported bw + * @IEEE80211_UHR_DBE_MAX_BW_320: Indicates 320 MHz DBE max supported bw + */ +enum ieee80211_uhr_dbe_max_supported_bw { + IEEE80211_UHR_DBE_MAX_BW_40 = 1, + IEEE80211_UHR_DBE_MAX_BW_80 = 2, + IEEE80211_UHR_DBE_MAX_BW_160 = 3, + IEEE80211_UHR_DBE_MAX_BW_320 = 4, +}; + struct ieee80211_uhr_cap_mac { u8 mac_cap[5]; } __packed; diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 0aa2fb8f88de..23f9df9be837 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1046,31 +1046,28 @@ struct ieee80211_mgmt { } __packed probe_resp; struct { u8 category; + u8 action_code; union { struct { - u8 action_code; u8 dialog_token; u8 status_code; u8 variable[]; } __packed wme_action; struct{ - u8 action_code; + u8 no_fixed_fields[0]; u8 variable[]; } __packed chan_switch; struct{ - u8 action_code; struct ieee80211_ext_chansw_ie data; u8 variable[]; } __packed ext_chan_switch; struct{ - u8 action_code; u8 dialog_token; u8 element_id; u8 length; struct ieee80211_msrment_ie msr_elem; } __packed measurement; struct{ - u8 action_code; u8 dialog_token; __le16 capab; __le16 timeout; @@ -1079,7 +1076,6 @@ struct ieee80211_mgmt { u8 variable[]; } __packed addba_req; struct{ - u8 action_code; u8 dialog_token; __le16 status; __le16 capab; @@ -1088,54 +1084,45 @@ struct ieee80211_mgmt { u8 variable[]; } __packed addba_resp; struct{ - u8 action_code; __le16 params; __le16 reason_code; } __packed delba; struct { - u8 action_code; + u8 no_fixed_fields[0]; u8 variable[]; } __packed self_prot; struct{ - u8 action_code; + u8 no_fixed_fields[0]; u8 variable[]; } __packed mesh_action; struct { - u8 action; u8 trans_id[WLAN_SA_QUERY_TR_ID_LEN]; } __packed sa_query; struct { - u8 action; u8 smps_control; } __packed ht_smps; struct { - u8 action_code; u8 chanwidth; } __packed ht_notify_cw; struct { - u8 action_code; u8 dialog_token; __le16 capability; u8 variable[]; } __packed tdls_discover_resp; struct { - u8 action_code; u8 operating_mode; } __packed vht_opmode_notif; struct { - u8 action_code; u8 membership[WLAN_MEMBERSHIP_LEN]; u8 position[WLAN_USER_POSITION_LEN]; } __packed vht_group_notif; struct { - u8 action_code; u8 dialog_token; u8 tpc_elem_id; u8 tpc_elem_length; struct ieee80211_tpc_report_ie tpc; } __packed tpc_report; struct { - u8 action_code; u8 dialog_token; u8 follow_up; u8 tod[6]; @@ -1145,11 +1132,10 @@ struct ieee80211_mgmt { u8 variable[]; } __packed ftm; struct { - u8 action_code; + u8 no_fixed_fields[0]; u8 variable[]; } __packed s1g; struct { - u8 action_code; u8 dialog_token; u8 follow_up; u32 tod; @@ -1158,41 +1144,37 @@ struct ieee80211_mgmt { u8 max_toa_error; } __packed wnm_timing_msr; struct { - u8 action_code; u8 dialog_token; u8 variable[]; } __packed ttlm_req; struct { - u8 action_code; u8 dialog_token; __le16 status_code; u8 variable[]; } __packed ttlm_res; struct { - u8 action_code; + u8 no_fixed_fields[0]; + /* no variable fields either */ } __packed ttlm_tear_down; struct { - u8 action_code; u8 dialog_token; u8 variable[]; } __packed ml_reconf_req; struct { - u8 action_code; u8 dialog_token; u8 count; u8 variable[]; } __packed ml_reconf_resp; struct { - u8 action_code; + u8 no_fixed_fields[0]; u8 variable[]; } __packed epcs; struct { - u8 action_code; u8 dialog_token; u8 control; u8 variable[]; } __packed eml_omn; - } u; + }; } __packed action; DECLARE_FLEX_ARRAY(u8, body); /* Generic frame body */ } u; @@ -1210,9 +1192,15 @@ struct ieee80211_mgmt { #define BSS_MEMBERSHIP_SELECTOR_MIN BSS_MEMBERSHIP_SELECTOR_UHR_PHY -/* mgmt header + 1 byte category code */ -#define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u) +#define IEEE80211_MIN_ACTION_SIZE(type) offsetofend(struct ieee80211_mgmt, u.action.type) +/* Link Reconfiguration Status Duple field */ +struct ieee80211_ml_reconf_status { + u8 info; + __le16 status; +} __packed; + +#define IEEE80211_ML_RECONF_LINK_ID_MASK 0xf /* Management MIC information element (IEEE 802.11w) for CMAC */ struct ieee80211_mmie { @@ -1358,6 +1346,7 @@ struct ieee80211_tdls_data { #define WLAN_AUTH_FILS_SK 4 #define WLAN_AUTH_FILS_SK_PFS 5 #define WLAN_AUTH_FILS_PK 6 +#define WLAN_AUTH_IEEE8021X 8 #define WLAN_AUTH_EPPKE 9 #define WLAN_AUTH_LEAP 128 @@ -1500,6 +1489,8 @@ enum ieee80211_statuscode { WLAN_STATUS_REJECT_DSE_BAND = 96, WLAN_STATUS_DENIED_WITH_SUGGESTED_BAND_AND_CHANNEL = 99, WLAN_STATUS_DENIED_DUE_TO_SPECTRUM_MANAGEMENT = 103, + /* 802.11ah */ + WLAN_STATUS_REJECTED_NDP_BLOCK_ACK_SUGGESTED = 109, /* 802.11ai */ WLAN_STATUS_FILS_AUTHENTICATION_FAILURE = 112, WLAN_STATUS_UNKNOWN_AUTHENTICATION_SERVER = 113, @@ -1507,6 +1498,7 @@ enum ieee80211_statuscode { WLAN_STATUS_SAE_PK = 127, WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING = 133, WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED = 134, + WLAN_STATUS_8021X_AUTH_SUCCESS = 153, }; @@ -1929,6 +1921,11 @@ enum ieee80211_radio_measurement_actioncode { #define PMK_MAX_LEN 64 #define SAE_PASSWORD_MAX_LEN 128 +#define MICHAEL_MIC_LEN 8 + +void michael_mic(const u8 *key, struct ieee80211_hdr *hdr, + const u8 *data, size_t data_len, u8 *mic); + /* Public action codes (IEEE Std 802.11-2016, 9.6.8.1, Table 9-307) */ enum ieee80211_pub_actioncode { WLAN_PUB_ACTION_20_40_BSS_COEX = 0, @@ -2248,6 +2245,7 @@ struct ieee80211_multiple_bssid_configuration { #define WLAN_OUI_WFA 0x506f9a #define WLAN_OUI_TYPE_WFA_P2P 9 +#define WLAN_OUI_TYPE_WFA_NAN 0x13 #define WLAN_OUI_TYPE_WFA_DPP 0x1A #define WLAN_OUI_MICROSOFT 0x0050f2 #define WLAN_OUI_TYPE_MICROSOFT_WPA 1 @@ -2389,7 +2387,7 @@ static inline bool ieee80211_is_bufferable_mmpdu(struct sk_buff *skb) if (!ieee80211_is_action(fc)) return false; - if (skb->len < offsetofend(typeof(*mgmt), u.action.u.ftm.action_code)) + if (skb->len < IEEE80211_MIN_ACTION_SIZE(action_code)) return true; /* action frame - additionally check for non-bufferable FTM */ @@ -2398,8 +2396,8 @@ static inline bool ieee80211_is_bufferable_mmpdu(struct sk_buff *skb) mgmt->u.action.category != WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION) return true; - if (mgmt->u.action.u.ftm.action_code == WLAN_PUB_ACTION_FTM_REQUEST || - mgmt->u.action.u.ftm.action_code == WLAN_PUB_ACTION_FTM_RESPONSE) + if (mgmt->u.action.action_code == WLAN_PUB_ACTION_FTM_REQUEST || + mgmt->u.action.action_code == WLAN_PUB_ACTION_FTM_RESPONSE) return false; return true; @@ -2449,7 +2447,7 @@ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr) */ static inline bool ieee80211_is_robust_mgmt_frame(struct sk_buff *skb) { - if (skb->len < IEEE80211_MIN_ACTION_SIZE) + if (skb->len < IEEE80211_MIN_ACTION_SIZE(category)) return false; return _ieee80211_is_robust_mgmt_frame((void *)skb->data); } @@ -2465,7 +2463,7 @@ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr, { struct ieee80211_mgmt *mgmt = (void *)hdr; - if (len < IEEE80211_MIN_ACTION_SIZE) + if (len < IEEE80211_MIN_ACTION_SIZE(category)) return false; if (!ieee80211_is_action(hdr->frame_control)) return false; @@ -2483,13 +2481,14 @@ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr, static inline bool ieee80211_is_protected_dual_of_public_action(struct sk_buff *skb) { + struct ieee80211_mgmt *mgmt = (void *)skb->data; u8 action; if (!ieee80211_is_public_action((void *)skb->data, skb->len) || - skb->len < IEEE80211_MIN_ACTION_SIZE + 1) + skb->len < IEEE80211_MIN_ACTION_SIZE(action_code)) return false; - action = *(u8 *)(skb->data + IEEE80211_MIN_ACTION_SIZE); + action = mgmt->u.action.action_code; return action != WLAN_PUB_ACTION_20_40_BSS_COEX && action != WLAN_PUB_ACTION_DSE_REG_LOC_ANN && @@ -2528,7 +2527,7 @@ static inline bool _ieee80211_is_group_privacy_action(struct ieee80211_hdr *hdr) */ static inline bool ieee80211_is_group_privacy_action(struct sk_buff *skb) { - if (skb->len < IEEE80211_MIN_ACTION_SIZE) + if (skb->len < IEEE80211_MIN_ACTION_SIZE(category)) return false; return _ieee80211_is_group_privacy_action((void *)skb->data); } @@ -2624,8 +2623,7 @@ static inline bool ieee80211_action_contains_tpc(struct sk_buff *skb) if (!ieee80211_is_action(mgmt->frame_control)) return false; - if (skb->len < IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.tpc_report)) + if (skb->len < IEEE80211_MIN_ACTION_SIZE(tpc_report)) return false; /* @@ -2644,12 +2642,11 @@ static inline bool ieee80211_action_contains_tpc(struct sk_buff *skb) return false; /* both spectrum mgmt and link measurement have same action code */ - if (mgmt->u.action.u.tpc_report.action_code != - WLAN_ACTION_SPCT_TPC_RPRT) + if (mgmt->u.action.action_code != WLAN_ACTION_SPCT_TPC_RPRT) return false; - if (mgmt->u.action.u.tpc_report.tpc_elem_id != WLAN_EID_TPC_REPORT || - mgmt->u.action.u.tpc_report.tpc_elem_length != + if (mgmt->u.action.tpc_report.tpc_elem_id != WLAN_EID_TPC_REPORT || + mgmt->u.action.tpc_report.tpc_elem_length != sizeof(struct ieee80211_tpc_report_ie)) return false; @@ -2665,16 +2662,15 @@ static inline bool ieee80211_is_timing_measurement(struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *)skb->data; - if (skb->len < IEEE80211_MIN_ACTION_SIZE) + if (skb->len < IEEE80211_MIN_ACTION_SIZE(wnm_timing_msr)) return false; if (!ieee80211_is_action(mgmt->frame_control)) return false; if (mgmt->u.action.category == WLAN_CATEGORY_WNM_UNPROTECTED && - mgmt->u.action.u.wnm_timing_msr.action_code == - WLAN_UNPROTECTED_WNM_ACTION_TIMING_MEASUREMENT_RESPONSE && - skb->len >= offsetofend(typeof(*mgmt), u.action.u.wnm_timing_msr)) + mgmt->u.action.action_code == + WLAN_UNPROTECTED_WNM_ACTION_TIMING_MEASUREMENT_RESPONSE) return true; return false; @@ -2689,15 +2685,13 @@ static inline bool ieee80211_is_ftm(struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *)skb->data; - if (!ieee80211_is_public_action((void *)mgmt, skb->len)) + if (skb->len < IEEE80211_MIN_ACTION_SIZE(ftm)) return false; - if (mgmt->u.action.u.ftm.action_code == - WLAN_PUB_ACTION_FTM_RESPONSE && - skb->len >= offsetofend(typeof(*mgmt), u.action.u.ftm)) - return true; + if (!ieee80211_is_public_action((void *)mgmt, skb->len)) + return false; - return false; + return mgmt->u.action.action_code == WLAN_PUB_ACTION_FTM_RESPONSE; } struct element { diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 61b7335aa037..ca9afa824aa4 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -40,7 +40,8 @@ static inline struct ethhdr *inner_eth_hdr(const struct sk_buff *skb) return (struct ethhdr *)skb_inner_mac_header(skb); } -int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr); +int eth_header_parse(const struct sk_buff *skb, const struct net_device *dev, + unsigned char *haddr); extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len); diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index db45d6f1c4f4..594d6dc3f4c9 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -25,8 +25,6 @@ struct pppoe_opt { struct net_device *dev; /* device associated with socket*/ int ifindex; /* ifindex of device associated with socket */ struct pppoe_addr pa; /* what this socket is bound to*/ - struct sockaddr_pppox relay; /* what socket data will be - relayed to (PPPoE relaying) */ struct work_struct padt_work;/* Work item for handling PADT */ }; @@ -53,16 +51,10 @@ struct pppox_sock { #define pppoe_dev proto.pppoe.dev #define pppoe_ifindex proto.pppoe.ifindex #define pppoe_pa proto.pppoe.pa -#define pppoe_relay proto.pppoe.relay static inline struct pppox_sock *pppox_sk(struct sock *sk) { - return (struct pppox_sock *)sk; -} - -static inline struct sock *sk_pppox(struct pppox_sock *po) -{ - return (struct sock *)po; + return container_of(sk, struct pppox_sock, sk); } struct module; @@ -80,14 +72,11 @@ extern void pppox_unbind_sock(struct sock *sk);/* delete ppp-channel binding */ extern int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); extern int pppox_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); -#define PPPOEIOCSFWD32 _IOW(0xB1 ,0, compat_size_t) - /* PPPoX socket states */ enum { PPPOX_NONE = 0, /* initial state */ PPPOX_CONNECTED = 1, /* connection established ==TCP_ESTABLISHED */ PPPOX_BOUND = 2, /* bound to ppp device */ - PPPOX_RELAY = 4, /* forwarding is enabled */ PPPOX_DEAD = 16 /* dead, useless, please clean me up!*/ }; diff --git a/include/linux/if_team.h b/include/linux/if_team.h index ce97d891cf72..3d21e06fda67 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -27,10 +27,11 @@ struct team; struct team_port { struct net_device *dev; - struct hlist_node hlist; /* node in enabled ports hash list */ + struct hlist_node tx_hlist; /* node in tx-enabled ports hash list */ struct list_head list; /* node in ordinary list */ struct team *team; - int index; /* index of enabled port. If disabled, it's set to -1 */ + int tx_index; /* index of tx enabled port. If disabled, -1 */ + bool rx_enabled; bool linkup; /* either state.linkup or user.linkup */ @@ -75,14 +76,24 @@ static inline struct team_port *team_port_get_rcu(const struct net_device *dev) return rcu_dereference(dev->rx_handler_data); } +static inline bool team_port_rx_enabled(struct team_port *port) +{ + return READ_ONCE(port->rx_enabled); +} + +static inline bool team_port_tx_enabled(struct team_port *port) +{ + return READ_ONCE(port->tx_index) != -1; +} + static inline bool team_port_enabled(struct team_port *port) { - return port->index != -1; + return team_port_rx_enabled(port) && team_port_tx_enabled(port); } static inline bool team_port_txable(struct team_port *port) { - return port->linkup && team_port_enabled(port); + return port->linkup && team_port_tx_enabled(port); } static inline bool team_port_dev_txable(const struct net_device *port_dev) @@ -121,8 +132,7 @@ struct team_mode_ops { int (*port_enter)(struct team *team, struct team_port *port); void (*port_leave)(struct team *team, struct team_port *port); void (*port_change_dev_addr)(struct team *team, struct team_port *port); - void (*port_enabled)(struct team *team, struct team_port *port); - void (*port_disabled)(struct team *team, struct team_port *port); + void (*port_tx_disabled)(struct team *team, struct team_port *port); }; extern int team_modeop_port_enter(struct team *team, struct team_port *port); @@ -186,16 +196,16 @@ struct team_mode { #define TEAM_MODE_PRIV_SIZE (sizeof(long) * TEAM_MODE_PRIV_LONGS) struct team { - struct net_device *dev; /* associated netdevice */ struct team_pcpu_stats __percpu *pcpu_stats; const struct header_ops *header_ops_cache; /* - * List of enabled ports and their count + * List of tx-enabled ports and counts of rx and tx-enabled ports. */ - int en_port_count; - struct hlist_head en_port_hlist[TEAM_PORT_HASHENTRIES]; + int tx_en_port_count; + int rx_en_port_count; + struct hlist_head tx_en_port_hlist[TEAM_PORT_HASHENTRIES]; struct list_head port_list; /* list of all ports */ @@ -232,48 +242,50 @@ static inline int team_dev_queue_xmit(struct team *team, struct team_port *port, skb_set_queue_mapping(skb, qdisc_skb_cb(skb)->slave_dev_queue_mapping); skb->dev = port->dev; - if (unlikely(netpoll_tx_running(team->dev))) { + if (unlikely(netpoll_tx_running(netdev_from_priv(team)))) { team_netpoll_send_skb(port, skb); return 0; } return dev_queue_xmit(skb); } -static inline struct hlist_head *team_port_index_hash(struct team *team, - int port_index) +static inline struct hlist_head *team_tx_port_index_hash(struct team *team, + int tx_port_index) { - return &team->en_port_hlist[port_index & (TEAM_PORT_HASHENTRIES - 1)]; + unsigned int list_entry = tx_port_index & (TEAM_PORT_HASHENTRIES - 1); + + return &team->tx_en_port_hlist[list_entry]; } -static inline struct team_port *team_get_port_by_index(struct team *team, - int port_index) +static inline struct team_port *team_get_port_by_tx_index(struct team *team, + int tx_port_index) { + struct hlist_head *head = team_tx_port_index_hash(team, tx_port_index); struct team_port *port; - struct hlist_head *head = team_port_index_hash(team, port_index); - hlist_for_each_entry(port, head, hlist) - if (port->index == port_index) + hlist_for_each_entry(port, head, tx_hlist) + if (port->tx_index == tx_port_index) return port; return NULL; } static inline int team_num_to_port_index(struct team *team, unsigned int num) { - int en_port_count = READ_ONCE(team->en_port_count); + int tx_en_port_count = READ_ONCE(team->tx_en_port_count); - if (unlikely(!en_port_count)) + if (unlikely(!tx_en_port_count)) return 0; - return num % en_port_count; + return num % tx_en_port_count; } -static inline struct team_port *team_get_port_by_index_rcu(struct team *team, - int port_index) +static inline struct team_port *team_get_port_by_tx_index_rcu(struct team *team, + int tx_port_index) { + struct hlist_head *head = team_tx_port_index_hash(team, tx_port_index); struct team_port *port; - struct hlist_head *head = team_port_index_hash(team, port_index); - hlist_for_each_entry_rcu(port, head, hlist) - if (port->index == port_index) + hlist_for_each_entry_rcu(port, head, tx_hlist) + if (READ_ONCE(port->tx_index) == tx_port_index) return port; return NULL; } diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index a9ecff191bd9..2c91b7659ce9 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -931,6 +931,18 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev) #define IIO_DECLARE_DMA_BUFFER_WITH_TS(type, name, count) \ __IIO_DECLARE_BUFFER_WITH_TS(type, name, count) __aligned(IIO_DMA_MINALIGN) +/** + * IIO_DECLARE_QUATERNION() - Declare a quaternion element + * @type: element type of the individual vectors + * @name: identifier name + * + * Quaternions are a vector composed of 4 elements (W, X, Y, Z). Use this macro + * to declare a quaternion element in a struct to ensure proper alignment in + * an IIO buffer. + */ +#define IIO_DECLARE_QUATERNION(type, name) \ + type name[4] __aligned(sizeof(type) * 4) + struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv); /* The information at the returned address is guaranteed to be cacheline aligned */ diff --git a/include/linux/ima.h b/include/linux/ima.h index abf8923f8fc5..8e08baf16c2f 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -11,6 +11,7 @@ #include <linux/fs.h> #include <linux/security.h> #include <linux/kexec.h> +#include <linux/secure_boot.h> #include <crypto/hash_info.h> struct linux_binprm; @@ -73,14 +74,8 @@ int ima_validate_range(phys_addr_t phys, size_t size); #endif #ifdef CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT -extern bool arch_ima_get_secureboot(void); extern const char * const *arch_get_ima_policy(void); #else -static inline bool arch_ima_get_secureboot(void) -{ - return false; -} - static inline const char * const *arch_get_ima_policy(void) { return NULL; diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h index 35227d47cfc9..0e4340ecd857 100644 --- a/include/linux/indirect_call_wrapper.h +++ b/include/linux/indirect_call_wrapper.h @@ -16,22 +16,26 @@ */ #define INDIRECT_CALL_1(f, f1, ...) \ ({ \ - likely(f == f1) ? f1(__VA_ARGS__) : f(__VA_ARGS__); \ + typeof(f) __f1 = (f); \ + likely(__f1 == f1) ? f1(__VA_ARGS__) : __f1(__VA_ARGS__); \ }) #define INDIRECT_CALL_2(f, f2, f1, ...) \ ({ \ - likely(f == f2) ? f2(__VA_ARGS__) : \ - INDIRECT_CALL_1(f, f1, __VA_ARGS__); \ + typeof(f) __f2 = (f); \ + likely(__f2 == f2) ? f2(__VA_ARGS__) : \ + INDIRECT_CALL_1(__f2, f1, __VA_ARGS__); \ }) #define INDIRECT_CALL_3(f, f3, f2, f1, ...) \ ({ \ - likely(f == f3) ? f3(__VA_ARGS__) : \ - INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__); \ + typeof(f) __f3 = (f); \ + likely(__f3 == f3) ? f3(__VA_ARGS__) : \ + INDIRECT_CALL_2(__f3, f2, f1, __VA_ARGS__); \ }) #define INDIRECT_CALL_4(f, f4, f3, f2, f1, ...) \ ({ \ - likely(f == f4) ? f4(__VA_ARGS__) : \ - INDIRECT_CALL_3(f, f3, f2, f1, __VA_ARGS__); \ + typeof(f) __f4 = (f); \ + likely(__f4 == f4) ? f4(__VA_ARGS__) : \ + INDIRECT_CALL_3(__f4, f3, f2, f1, __VA_ARGS__); \ }) #define INDIRECT_CALLABLE_DECLARE(f) f @@ -53,7 +57,7 @@ * builtin, this macro simplify dealing with indirect calls with only ipv4/ipv6 * alternatives */ -#if IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) #define INDIRECT_CALL_INET(f, f2, f1, ...) \ INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__) #elif IS_ENABLED(CONFIG_INET) diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h index fa1f328d6712..328004f605c3 100644 --- a/include/linux/intel_rapl.h +++ b/include/linux/intel_rapl.h @@ -77,7 +77,6 @@ enum rapl_primitives { PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW2, /* below are not raw primitive data */ - AVERAGE_POWER, NR_RAPL_PRIMITIVES, }; @@ -128,6 +127,46 @@ struct reg_action { int err; }; +struct rapl_defaults { + u8 floor_freq_reg_addr; + int (*check_unit)(struct rapl_domain *rd); + void (*set_floor_freq)(struct rapl_domain *rd, bool mode); + u64 (*compute_time_window)(struct rapl_domain *rd, u64 val, bool to_raw); + unsigned int dram_domain_energy_unit; + unsigned int psys_domain_energy_unit; + bool spr_psys_bits; + bool msr_pl4_support; + bool msr_pmu_support; +}; + +#define PRIMITIVE_INFO_INIT(p, m, s, i, u, f) { \ + .name = #p, \ + .mask = m, \ + .shift = s, \ + .id = i, \ + .unit = u, \ + .flag = f \ + } + +enum unit_type { + ARBITRARY_UNIT, /* no translation */ + POWER_UNIT, + ENERGY_UNIT, + TIME_UNIT, +}; + +/* per domain data. used to describe individual knobs such that access function + * can be consolidated into one instead of many inline functions. + */ +struct rapl_primitive_info { + const char *name; + u64 mask; + int shift; + enum rapl_domain_reg_id id; + enum unit_type unit; + u32 flag; +}; + /** * struct rapl_if_priv: private data for different RAPL interfaces * @control_type: Each RAPL interface must have its own powercap @@ -142,8 +181,8 @@ struct reg_action { * registers. * @write_raw: Callback for writing RAPL interface specific * registers. - * @defaults: internal pointer to interface default settings - * @rpi: internal pointer to interface primitive info + * @defaults: pointer to default settings + * @rpi: pointer to interface primitive info */ struct rapl_if_priv { enum rapl_if_type type; @@ -154,8 +193,8 @@ struct rapl_if_priv { int limits[RAPL_DOMAIN_MAX]; int (*read_raw)(int id, struct reg_action *ra, bool pmu_ctx); int (*write_raw)(int id, struct reg_action *ra); - void *defaults; - void *rpi; + const struct rapl_defaults *defaults; + struct rapl_primitive_info *rpi; }; #ifdef CONFIG_PERF_EVENTS @@ -211,6 +250,9 @@ void rapl_remove_package_cpuslocked(struct rapl_package *rp); struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu); struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu); void rapl_remove_package(struct rapl_package *rp); +int rapl_default_check_unit(struct rapl_domain *rd); +void rapl_default_set_floor_freq(struct rapl_domain *rd, bool mode); +u64 rapl_default_compute_time_window(struct rapl_domain *rd, u64 value, bool to_raw); #ifdef CONFIG_PERF_EVENTS int rapl_package_add_pmu(struct rapl_package *rp); diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 7a1516011ccf..e19872e37e06 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -53,7 +53,7 @@ struct iommu_flush_ops { * tables. * @ias: Input address (iova) size, in bits. * @oas: Output address (paddr) size, in bits. - * @coherent_walk A flag to indicate whether or not page table walks made + * @coherent_walk: A flag to indicate whether or not page table walks made * by the IOMMU are coherent with the CPU caches. * @tlb: TLB management callbacks for this set of tables. * @iommu_dev: The device representing the DMA configuration for the @@ -136,6 +136,7 @@ struct io_pgtable_cfg { void (*free)(void *cookie, void *pages, size_t size); /* Low-level data specific to the table format */ + /* private: */ union { struct { u64 ttbr; @@ -203,6 +204,9 @@ struct arm_lpae_io_pgtable_walk_data { * @unmap_pages: Unmap a range of virtually contiguous pages of the same size. * @iova_to_phys: Translate iova to physical address. * @pgtable_walk: (optional) Perform a page table walk for a given iova. + * @read_and_clear_dirty: Record dirty info per IOVA. If an IOVA is dirty, + * clear its dirty state from the PTE unless the + * IOMMU_DIRTY_NO_CLEAR flag is passed in. * * These functions map directly onto the iommu_ops member functions with * the same names. @@ -231,7 +235,9 @@ struct io_pgtable_ops { * the configuration actually provided by the allocator (e.g. the * pgsize_bitmap may be restricted). * @cookie: An opaque token provided by the IOMMU driver and passed back to - * the callback routines in cfg->tlb. + * the callback routines. + * + * Returns: Pointer to the &struct io_pgtable_ops for this set of page tables. */ struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt, struct io_pgtable_cfg *cfg, diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 3e4a82a6f817..244392026c6d 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -8,6 +8,9 @@ #include <linux/llist.h> #include <uapi/linux/io_uring.h> +struct iou_loop_params; +struct io_uring_bpf_ops; + enum { /* * A hint to not wake right away but delay until there are enough of @@ -41,6 +44,8 @@ enum io_uring_cmd_flags { IO_URING_F_COMPAT = (1 << 12), }; +struct iou_loop_params; + struct io_wq_work_node { struct io_wq_work_node *next; }; @@ -268,24 +273,30 @@ struct io_alloc_cache { unsigned int init_clear; }; +enum { + IO_RING_F_DRAIN_NEXT = BIT(0), + IO_RING_F_OP_RESTRICTED = BIT(1), + IO_RING_F_REG_RESTRICTED = BIT(2), + IO_RING_F_OFF_TIMEOUT_USED = BIT(3), + IO_RING_F_DRAIN_ACTIVE = BIT(4), + IO_RING_F_HAS_EVFD = BIT(5), + /* all CQEs should be posted only by the submitter task */ + IO_RING_F_TASK_COMPLETE = BIT(6), + IO_RING_F_LOCKLESS_CQ = BIT(7), + IO_RING_F_SYSCALL_IOPOLL = BIT(8), + IO_RING_F_POLL_ACTIVATED = BIT(9), + IO_RING_F_DRAIN_DISABLED = BIT(10), + IO_RING_F_COMPAT = BIT(11), + IO_RING_F_IOWQ_LIMITS_SET = BIT(12), +}; + struct io_ring_ctx { /* const or read-mostly hot data */ struct { + /* ring setup flags */ unsigned int flags; - unsigned int drain_next: 1; - unsigned int op_restricted: 1; - unsigned int reg_restricted: 1; - unsigned int off_timeout_used: 1; - unsigned int drain_active: 1; - unsigned int has_evfd: 1; - /* all CQEs should be posted only by the submitter task */ - unsigned int task_complete: 1; - unsigned int lockless_cq: 1; - unsigned int syscall_iopoll: 1; - unsigned int poll_activated: 1; - unsigned int drain_disabled: 1; - unsigned int compat: 1; - unsigned int iowq_limits_set : 1; + /* internal state flags IO_RING_F_* flags , mostly read-only */ + unsigned int int_flags; struct task_struct *submitter_task; struct io_rings *rings; @@ -355,6 +366,9 @@ struct io_ring_ctx { struct io_alloc_cache rw_cache; struct io_alloc_cache cmd_cache; + int (*loop_step)(struct io_ring_ctx *ctx, + struct iou_loop_params *); + /* * Any cancelable uring_cmd is added to this list in * ->uring_cmd() by io_uring_cmd_insert_cancelable() @@ -388,6 +402,7 @@ struct io_ring_ctx { * regularly bounce b/w CPUs. */ struct { + struct io_rings __rcu *rings_rcu; struct llist_head work_llist; struct llist_head retry_llist; unsigned long check_cq; @@ -476,6 +491,8 @@ struct io_ring_ctx { DECLARE_HASHTABLE(napi_ht, 4); #endif + struct io_uring_bpf_ops *bpf_ops; + /* * Protection for resize vs mmap races - both the mmap and resize * side will need to grab this lock, to prevent either side from @@ -540,9 +557,11 @@ enum { REQ_F_BL_NO_RECYCLE_BIT, REQ_F_BUFFERS_COMMIT_BIT, REQ_F_BUF_NODE_BIT, + REQ_F_BUF_MORE_BIT, REQ_F_HAS_METADATA_BIT, REQ_F_IMPORT_BUFFER_BIT, REQ_F_SQE_COPIED_BIT, + REQ_F_IOPOLL_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, @@ -625,6 +644,8 @@ enum { REQ_F_BUFFERS_COMMIT = IO_REQ_FLAG(REQ_F_BUFFERS_COMMIT_BIT), /* buf node is valid */ REQ_F_BUF_NODE = IO_REQ_FLAG(REQ_F_BUF_NODE_BIT), + /* incremental buffer consumption, more space available */ + REQ_F_BUF_MORE = IO_REQ_FLAG(REQ_F_BUF_MORE_BIT), /* request has read/write metadata assigned */ REQ_F_HAS_METADATA = IO_REQ_FLAG(REQ_F_HAS_METADATA_BIT), /* @@ -634,6 +655,8 @@ enum { REQ_F_IMPORT_BUFFER = IO_REQ_FLAG(REQ_F_IMPORT_BUFFER_BIT), /* ->sqe_copy() has been called, if necessary */ REQ_F_SQE_COPIED = IO_REQ_FLAG(REQ_F_SQE_COPIED_BIT), + /* request must be iopolled to completion (set in ->issue()) */ + REQ_F_IOPOLL = IO_REQ_FLAG(REQ_F_IOPOLL_BIT), }; struct io_tw_req { diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 99b7209dabd7..2c5685adf3a9 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -9,7 +9,7 @@ #include <linux/types.h> #include <linux/mm_types.h> #include <linux/blkdev.h> -#include <linux/pagevec.h> +#include <linux/folio_batch.h> struct address_space; struct fiemap_extent_info; @@ -65,6 +65,8 @@ struct vm_fault; * * IOMAP_F_ATOMIC_BIO indicates that (write) I/O will be issued as an atomic * bio, i.e. set REQ_ATOMIC. + * + * IOMAP_F_INTEGRITY indicates that the filesystems handles integrity metadata. */ #define IOMAP_F_NEW (1U << 0) #define IOMAP_F_DIRTY (1U << 1) @@ -79,6 +81,11 @@ struct vm_fault; #define IOMAP_F_BOUNDARY (1U << 6) #define IOMAP_F_ANON_WRITE (1U << 7) #define IOMAP_F_ATOMIC_BIO (1U << 8) +#ifdef CONFIG_BLK_DEV_INTEGRITY +#define IOMAP_F_INTEGRITY (1U << 9) +#else +#define IOMAP_F_INTEGRITY 0 +#endif /* CONFIG_BLK_DEV_INTEGRITY */ /* * Flag reserved for file system specific usage @@ -493,6 +500,7 @@ struct iomap_read_folio_ctx { struct folio *cur_folio; struct readahead_control *rac; void *read_ctx; + loff_t read_ctx_file_offset; }; struct iomap_read_ops { @@ -512,7 +520,14 @@ struct iomap_read_ops { * * This is optional. */ - void (*submit_read)(struct iomap_read_folio_ctx *ctx); + void (*submit_read)(const struct iomap_iter *iter, + struct iomap_read_folio_ctx *ctx); + + /* + * Optional, allows filesystem to specify own bio_set, so new bio's + * can be allocated from the provided bio_set. + */ + struct bio_set *bio_set; }; /* @@ -598,6 +613,9 @@ int iomap_swapfile_activate(struct swap_info_struct *sis, extern struct bio_set iomap_ioend_bioset; #ifdef CONFIG_BLOCK +int iomap_bio_read_folio_range(const struct iomap_iter *iter, + struct iomap_read_folio_ctx *ctx, size_t plen); + extern const struct iomap_read_ops iomap_bio_read_ops; static inline void iomap_bio_read_folio(struct folio *folio, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 54b8b48c762e..e587d4ac4d33 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -223,6 +223,7 @@ enum iommu_domain_cookie_type { struct iommu_domain { unsigned type; enum iommu_domain_cookie_type cookie_type; + bool is_iommupt; const struct iommu_domain_ops *ops; const struct iommu_dirty_ops *dirty_ops; const struct iommu_ops *owner; /* Whose domain_alloc we came from */ @@ -271,6 +272,8 @@ enum iommu_cap { */ IOMMU_CAP_DEFERRED_FLUSH, IOMMU_CAP_DIRTY_TRACKING, /* IOMMU supports dirty tracking */ + /* ATS is supported and may be enabled for this device */ + IOMMU_CAP_PCI_ATS_SUPPORTED, }; /* These are the possible reserved region types */ @@ -980,7 +983,8 @@ static inline void iommu_flush_iotlb_all(struct iommu_domain *domain) static inline void iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather) { - if (domain->ops->iotlb_sync) + if (domain->ops->iotlb_sync && + likely(iotlb_gather->start < iotlb_gather->end)) domain->ops->iotlb_sync(domain, iotlb_gather); iommu_iotlb_gather_init(iotlb_gather); diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h index bdd2e0652bc3..53edd69acb9b 100644 --- a/include/linux/iopoll.h +++ b/include/linux/iopoll.h @@ -159,7 +159,7 @@ * * This macro does not rely on timekeeping. Hence it is safe to call even when * timekeeping is suspended, at the expense of an underestimation of wall clock - * time, which is rather minimal with a non-zero delay_us. + * time, which is rather minimal with a non-zero @delay_us. * * When available, you'll probably want to use one of the specialized * macros defined below rather than this macro directly. @@ -167,9 +167,9 @@ * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either * case, the last read value at @args is stored in @val. */ -#define read_poll_timeout_atomic(op, val, cond, sleep_us, timeout_us, \ - sleep_before_read, args...) \ - poll_timeout_us_atomic((val) = op(args), cond, sleep_us, timeout_us, sleep_before_read) +#define read_poll_timeout_atomic(op, val, cond, delay_us, timeout_us, \ + delay_before_read, args...) \ + poll_timeout_us_atomic((val) = op(args), cond, delay_us, timeout_us, delay_before_read) /** * readx_poll_timeout - Periodically poll an address until a condition is met or a timeout occurs diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 5533a5debf3f..3c73c9c0d4f7 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -202,6 +202,7 @@ enum { * typedef resource_alignf - Resource alignment callback * @data: Private data used by the callback * @res: Resource candidate range (an empty resource space) + * @empty_res: Empty resource range without alignment applied * @size: The minimum size of the empty space * @align: Alignment from the constraints * @@ -212,6 +213,7 @@ enum { */ typedef resource_size_t (*resource_alignf)(void *data, const struct resource *res, + const struct resource *empty_res, resource_size_t size, resource_size_t align); @@ -304,14 +306,28 @@ static inline unsigned long resource_ext_type(const struct resource *res) { return res->flags & IORESOURCE_EXT_TYPE_BITS; } -/* True iff r1 completely contains r2 */ -static inline bool resource_contains(const struct resource *r1, const struct resource *r2) + +/* + * For checking if @r1 completely contains @r2 for resources that have real + * addresses but are not yet crafted into the resource tree. Normally + * resource_contains() should be used instead of this function as it checks + * also IORESOURCE_UNSET flag. + */ +static inline bool __resource_contains_unbound(const struct resource *r1, + const struct resource *r2) { if (resource_type(r1) != resource_type(r2)) return false; + + return r1->start <= r2->start && r1->end >= r2->end; +} +/* True iff r1 completely contains r2 */ +static inline bool resource_contains(const struct resource *r1, const struct resource *r2) +{ if (r1->flags & IORESOURCE_UNSET || r2->flags & IORESOURCE_UNSET) return false; - return r1->start <= r2->start && r1->end >= r2->end; + + return __resource_contains_unbound(r1, r2); } /* True if any part of r1 overlaps r2 */ diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 443053a76dcf..a7421382a916 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -333,7 +333,12 @@ struct tcp6_timewait_sock { }; #if IS_ENABLED(CONFIG_IPV6) -bool ipv6_mod_enabled(void); +extern int disable_ipv6_mod; + +static inline bool ipv6_mod_enabled(void) +{ + return disable_ipv6_mod == 0; +} static inline struct ipv6_pinfo *inet6_sk(const struct sock *__sk) { diff --git a/include/linux/irq-entry-common.h b/include/linux/irq-entry-common.h index d26d1b1bcbfb..167fba7dbf04 100644 --- a/include/linux/irq-entry-common.h +++ b/include/linux/irq-entry-common.h @@ -3,6 +3,7 @@ #define __LINUX_IRQENTRYCOMMON_H #include <linux/context_tracking.h> +#include <linux/hrtimer_rearm.h> #include <linux/kmsan.h> #include <linux/rseq_entry.h> #include <linux/static_call_types.h> @@ -33,6 +34,14 @@ _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | _TIF_RSEQ | \ ARCH_EXIT_TO_USER_MODE_WORK) +#ifdef CONFIG_HRTIMER_REARM_DEFERRED +# define EXIT_TO_USER_MODE_WORK_SYSCALL (EXIT_TO_USER_MODE_WORK) +# define EXIT_TO_USER_MODE_WORK_IRQ (EXIT_TO_USER_MODE_WORK | _TIF_HRTIMER_REARM) +#else +# define EXIT_TO_USER_MODE_WORK_SYSCALL (EXIT_TO_USER_MODE_WORK) +# define EXIT_TO_USER_MODE_WORK_IRQ (EXIT_TO_USER_MODE_WORK) +#endif + /** * arch_enter_from_user_mode - Architecture specific sanity check for user mode regs * @regs: Pointer to currents pt_regs @@ -101,37 +110,6 @@ static __always_inline void enter_from_user_mode(struct pt_regs *regs) } /** - * local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable() - * @ti_work: Cached TIF flags gathered with interrupts disabled - * - * Defaults to local_irq_enable(). Can be supplied by architecture specific - * code. - */ -static inline void local_irq_enable_exit_to_user(unsigned long ti_work); - -#ifndef local_irq_enable_exit_to_user -static __always_inline void local_irq_enable_exit_to_user(unsigned long ti_work) -{ - local_irq_enable(); -} -#endif - -/** - * local_irq_disable_exit_to_user - Exit to user variant of local_irq_disable() - * - * Defaults to local_irq_disable(). Can be supplied by architecture specific - * code. - */ -static inline void local_irq_disable_exit_to_user(void); - -#ifndef local_irq_disable_exit_to_user -static __always_inline void local_irq_disable_exit_to_user(void) -{ - local_irq_disable(); -} -#endif - -/** * arch_exit_to_user_mode_work - Architecture specific TIF work for exit * to user mode. * @regs: Pointer to currents pt_regs @@ -203,6 +181,7 @@ unsigned long exit_to_user_mode_loop(struct pt_regs *regs, unsigned long ti_work /** * __exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required * @regs: Pointer to pt_regs on entry stack + * @work_mask: Which TIF bits need to be evaluated * * 1) check that interrupts are disabled * 2) call tick_nohz_user_enter_prepare() @@ -212,7 +191,8 @@ unsigned long exit_to_user_mode_loop(struct pt_regs *regs, unsigned long ti_work * * Don't invoke directly, use the syscall/irqentry_ prefixed variants below */ -static __always_inline void __exit_to_user_mode_prepare(struct pt_regs *regs) +static __always_inline void __exit_to_user_mode_prepare(struct pt_regs *regs, + const unsigned long work_mask) { unsigned long ti_work; @@ -222,8 +202,10 @@ static __always_inline void __exit_to_user_mode_prepare(struct pt_regs *regs) tick_nohz_user_enter_prepare(); ti_work = read_thread_flags(); - if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK)) - ti_work = exit_to_user_mode_loop(regs, ti_work); + if (unlikely(ti_work & work_mask)) { + if (!hrtimer_rearm_deferred_user_irq(&ti_work, work_mask)) + ti_work = exit_to_user_mode_loop(regs, ti_work); + } arch_exit_to_user_mode_prepare(regs, ti_work); } @@ -239,7 +221,7 @@ static __always_inline void __exit_to_user_mode_validate(void) /* Temporary workaround to keep ARM64 alive */ static __always_inline void exit_to_user_mode_prepare_legacy(struct pt_regs *regs) { - __exit_to_user_mode_prepare(regs); + __exit_to_user_mode_prepare(regs, EXIT_TO_USER_MODE_WORK); rseq_exit_to_user_mode_legacy(); __exit_to_user_mode_validate(); } @@ -253,7 +235,7 @@ static __always_inline void exit_to_user_mode_prepare_legacy(struct pt_regs *reg */ static __always_inline void syscall_exit_to_user_mode_prepare(struct pt_regs *regs) { - __exit_to_user_mode_prepare(regs); + __exit_to_user_mode_prepare(regs, EXIT_TO_USER_MODE_WORK_SYSCALL); rseq_syscall_exit_to_user_mode(); __exit_to_user_mode_validate(); } @@ -267,7 +249,7 @@ static __always_inline void syscall_exit_to_user_mode_prepare(struct pt_regs *re */ static __always_inline void irqentry_exit_to_user_mode_prepare(struct pt_regs *regs) { - __exit_to_user_mode_prepare(regs); + __exit_to_user_mode_prepare(regs, EXIT_TO_USER_MODE_WORK_IRQ); rseq_irqentry_exit_to_user_mode(); __exit_to_user_mode_validate(); } @@ -335,6 +317,8 @@ static __always_inline void irqentry_enter_from_user_mode(struct pt_regs *regs) */ static __always_inline void irqentry_exit_to_user_mode(struct pt_regs *regs) { + lockdep_assert_irqs_disabled(); + instrumentation_begin(); irqentry_exit_to_user_mode_prepare(regs); instrumentation_end(); @@ -366,6 +350,207 @@ typedef struct irqentry_state { #endif /** + * irqentry_exit_cond_resched - Conditionally reschedule on return from interrupt + * + * Conditional reschedule with additional sanity checks. + */ +void raw_irqentry_exit_cond_resched(void); + +#ifdef CONFIG_PREEMPT_DYNAMIC +#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) +#define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched +#define irqentry_exit_cond_resched_dynamic_disabled NULL +DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); +#define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)() +#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) +DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); +void dynamic_irqentry_exit_cond_resched(void); +#define irqentry_exit_cond_resched() dynamic_irqentry_exit_cond_resched() +#endif +#else /* CONFIG_PREEMPT_DYNAMIC */ +#define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched() +#endif /* CONFIG_PREEMPT_DYNAMIC */ + +/** + * irqentry_enter_from_kernel_mode - Establish state before invoking the irq handler + * @regs: Pointer to currents pt_regs + * + * Invoked from architecture specific entry code with interrupts disabled. + * Can only be called when the interrupt entry came from kernel mode. The + * calling code must be non-instrumentable. When the function returns all + * state is correct and the subsequent functions can be instrumented. + * + * The function establishes state (lockdep, RCU (context tracking), tracing) and + * is provided for architectures which require a strict split between entry from + * kernel and user mode and therefore cannot use irqentry_enter() which handles + * both entry modes. + * + * Returns: An opaque object that must be passed to irqentry_exit_to_kernel_mode(). + */ +static __always_inline irqentry_state_t irqentry_enter_from_kernel_mode(struct pt_regs *regs) +{ + irqentry_state_t ret = { + .exit_rcu = false, + }; + + /* + * If this entry hit the idle task invoke ct_irq_enter() whether + * RCU is watching or not. + * + * Interrupts can nest when the first interrupt invokes softirq + * processing on return which enables interrupts. + * + * Scheduler ticks in the idle task can mark quiescent state and + * terminate a grace period, if and only if the timer interrupt is + * not nested into another interrupt. + * + * Checking for rcu_is_watching() here would prevent the nesting + * interrupt to invoke ct_irq_enter(). If that nested interrupt is + * the tick then rcu_flavor_sched_clock_irq() would wrongfully + * assume that it is the first interrupt and eventually claim + * quiescent state and end grace periods prematurely. + * + * Unconditionally invoke ct_irq_enter() so RCU state stays + * consistent. + * + * TINY_RCU does not support EQS, so let the compiler eliminate + * this part when enabled. + */ + if (!IS_ENABLED(CONFIG_TINY_RCU) && + (is_idle_task(current) || arch_in_rcu_eqs())) { + /* + * If RCU is not watching then the same careful + * sequence vs. lockdep and tracing is required + * as in irqentry_enter_from_user_mode(). + */ + lockdep_hardirqs_off(CALLER_ADDR0); + ct_irq_enter(); + instrumentation_begin(); + kmsan_unpoison_entry_regs(regs); + trace_hardirqs_off_finish(); + instrumentation_end(); + + ret.exit_rcu = true; + return ret; + } + + /* + * If RCU is watching then RCU only wants to check whether it needs + * to restart the tick in NOHZ mode. rcu_irq_enter_check_tick() + * already contains a warning when RCU is not watching, so no point + * in having another one here. + */ + lockdep_hardirqs_off(CALLER_ADDR0); + instrumentation_begin(); + kmsan_unpoison_entry_regs(regs); + rcu_irq_enter_check_tick(); + trace_hardirqs_off_finish(); + instrumentation_end(); + + return ret; +} + +/** + * irqentry_exit_to_kernel_mode_preempt - Run preempt checks on return to kernel mode + * @regs: Pointer to current's pt_regs + * @state: Return value from matching call to irqentry_enter_from_kernel_mode() + * + * This is to be invoked before irqentry_exit_to_kernel_mode_after_preempt() to + * allow kernel preemption on return from interrupt. + * + * Must be invoked with interrupts disabled and CPU state which allows kernel + * preemption. + * + * After returning from this function, the caller can modify CPU state before + * invoking irqentry_exit_to_kernel_mode_after_preempt(), which is required to + * re-establish the tracing, lockdep and RCU state for returning to the + * interrupted context. + */ +static inline void irqentry_exit_to_kernel_mode_preempt(struct pt_regs *regs, + irqentry_state_t state) +{ + if (regs_irqs_disabled(regs) || state.exit_rcu) + return; + + if (IS_ENABLED(CONFIG_PREEMPTION)) + irqentry_exit_cond_resched(); +} + +/** + * irqentry_exit_to_kernel_mode_after_preempt - Establish trace, lockdep and RCU state + * @regs: Pointer to current's pt_regs + * @state: Return value from matching call to irqentry_enter_from_kernel_mode() + * + * This is to be invoked after irqentry_exit_to_kernel_mode_preempt() and before + * actually returning to the interrupted context. + * + * There are no requirements for the CPU state other than being able to complete + * the tracing, lockdep and RCU state transitions. After this function returns + * the caller must return directly to the interrupted context. + */ +static __always_inline void +irqentry_exit_to_kernel_mode_after_preempt(struct pt_regs *regs, irqentry_state_t state) +{ + if (!regs_irqs_disabled(regs)) { + /* + * If RCU was not watching on entry this needs to be done + * carefully and needs the same ordering of lockdep/tracing + * and RCU as the return to user mode path. + */ + if (state.exit_rcu) { + instrumentation_begin(); + hrtimer_rearm_deferred(); + /* Tell the tracer that IRET will enable interrupts */ + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(); + instrumentation_end(); + ct_irq_exit(); + lockdep_hardirqs_on(CALLER_ADDR0); + return; + } + + instrumentation_begin(); + hrtimer_rearm_deferred(); + /* Covers both tracing and lockdep */ + trace_hardirqs_on(); + instrumentation_end(); + } else { + /* + * IRQ flags state is correct already. Just tell RCU if it + * was not watching on entry. + */ + if (state.exit_rcu) + ct_irq_exit(); + } +} + +/** + * irqentry_exit_to_kernel_mode - Run preempt checks and establish state after + * invoking the interrupt handler + * @regs: Pointer to current's pt_regs + * @state: Return value from matching call to irqentry_enter_from_kernel_mode() + * + * This is the counterpart of irqentry_enter_from_kernel_mode() and combines + * the calls to irqentry_exit_to_kernel_mode_preempt() and + * irqentry_exit_to_kernel_mode_after_preempt(). + * + * The requirement for the CPU state is that it can schedule. After the function + * returns the tracing, lockdep and RCU state transitions are completed and the + * caller must return directly to the interrupted context. + */ +static __always_inline void irqentry_exit_to_kernel_mode(struct pt_regs *regs, + irqentry_state_t state) +{ + lockdep_assert_irqs_disabled(); + + instrumentation_begin(); + irqentry_exit_to_kernel_mode_preempt(regs, state); + instrumentation_end(); + + irqentry_exit_to_kernel_mode_after_preempt(regs, state); +} + +/** * irqentry_enter - Handle state tracking on ordinary interrupt entries * @regs: Pointer to pt_regs of interrupted context * @@ -394,33 +579,11 @@ typedef struct irqentry_state { * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit * would not be possible. * - * Returns: An opaque object that must be passed to idtentry_exit() + * Returns: An opaque object that must be passed to irqentry_exit() */ irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs); /** - * irqentry_exit_cond_resched - Conditionally reschedule on return from interrupt - * - * Conditional reschedule with additional sanity checks. - */ -void raw_irqentry_exit_cond_resched(void); - -#ifdef CONFIG_PREEMPT_DYNAMIC -#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) -#define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched -#define irqentry_exit_cond_resched_dynamic_disabled NULL -DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); -#define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)() -#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) -DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); -void dynamic_irqentry_exit_cond_resched(void); -#define irqentry_exit_cond_resched() dynamic_irqentry_exit_cond_resched() -#endif -#else /* CONFIG_PREEMPT_DYNAMIC */ -#define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched() -#endif /* CONFIG_PREEMPT_DYNAMIC */ - -/** * irqentry_exit - Handle return from exception that used irqentry_enter() * @regs: Pointer to pt_regs (exception entry regs) * @state: Return value from matching call to irqentry_enter() diff --git a/include/linux/irq.h b/include/linux/irq.h index 951acbdb9f84..efa514ee562f 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -35,6 +35,10 @@ enum irqchip_irq_state; * * Bits 0-7 are the same as the IRQF_* bits in linux/interrupt.h * + * Note that the first 6 definitions are shadowed by C preprocessor definitions + * in include/dt-bindings/interrupt-controller/irq.h. This is not an issue, as + * the actual values must be the same, due to being part of the stable DT ABI. + * * IRQ_TYPE_NONE - default, unspecified type * IRQ_TYPE_EDGE_RISING - rising edge triggered * IRQ_TYPE_EDGE_FALLING - falling edge triggered diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 70c0948f978e..0225121f3013 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -394,6 +394,7 @@ #define GITS_TYPER_VLPIS (1UL << 1) #define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4 #define GITS_TYPER_ITT_ENTRY_SIZE GENMASK_ULL(7, 4) +#define GITS_TYPER_IDBITS GENMASK_ULL(12, 8) #define GITS_TYPER_IDBITS_SHIFT 8 #define GITS_TYPER_DEVBITS_SHIFT 13 #define GITS_TYPER_DEVBITS GENMASK_ULL(17, 13) diff --git a/include/linux/irqchip/arm-gic-v5.h b/include/linux/irqchip/arm-gic-v5.h index b78488df6c98..40d2fce68294 100644 --- a/include/linux/irqchip/arm-gic-v5.h +++ b/include/linux/irqchip/arm-gic-v5.h @@ -25,6 +25,28 @@ #define GICV5_HWIRQ_TYPE_SPI UL(0x3) /* + * Architected PPIs + */ +#define GICV5_ARCH_PPI_S_DB_PPI 0x0 +#define GICV5_ARCH_PPI_RL_DB_PPI 0x1 +#define GICV5_ARCH_PPI_NS_DB_PPI 0x2 +#define GICV5_ARCH_PPI_SW_PPI 0x3 +#define GICV5_ARCH_PPI_HACDBSIRQ 0xf +#define GICV5_ARCH_PPI_CNTHVS 0x13 +#define GICV5_ARCH_PPI_CNTHPS 0x14 +#define GICV5_ARCH_PPI_PMBIRQ 0x15 +#define GICV5_ARCH_PPI_COMMIRQ 0x16 +#define GICV5_ARCH_PPI_PMUIRQ 0x17 +#define GICV5_ARCH_PPI_CTIIRQ 0x18 +#define GICV5_ARCH_PPI_GICMNT 0x19 +#define GICV5_ARCH_PPI_CNTHP 0x1a +#define GICV5_ARCH_PPI_CNTV 0x1b +#define GICV5_ARCH_PPI_CNTHV 0x1c +#define GICV5_ARCH_PPI_CNTPS 0x1d +#define GICV5_ARCH_PPI_CNTP 0x1e +#define GICV5_ARCH_PPI_TRBIRQ 0x1f + +/* * Tables attributes */ #define GICV5_NO_READ_ALLOC 0b0 @@ -365,6 +387,11 @@ int gicv5_spi_irq_set_type(struct irq_data *d, unsigned int type); int gicv5_irs_iste_alloc(u32 lpi); void gicv5_irs_syncr(void); +/* Embedded in kvm.arch */ +struct gicv5_vpe { + bool resident; +}; + struct gicv5_its_devtab_cfg { union { struct { diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index a53a00d36228..7e785aa6d35d 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -429,22 +429,46 @@ struct jbd2_inode { unsigned long i_flags; /** - * @i_dirty_start: + * @i_dirty_start_page: + * + * Dirty range start in PAGE_SIZE units. + * + * The dirty range is empty if @i_dirty_start_page is greater than or + * equal to @i_dirty_end_page. * - * Offset in bytes where the dirty range for this inode starts. * [j_list_lock] */ - loff_t i_dirty_start; + pgoff_t i_dirty_start_page; /** - * @i_dirty_end: + * @i_dirty_end_page: + * + * Dirty range end in PAGE_SIZE units (exclusive). * - * Inclusive offset in bytes where the dirty range for this inode - * ends. [j_list_lock] + * [j_list_lock] */ - loff_t i_dirty_end; + pgoff_t i_dirty_end_page; }; +/* + * Lockless readers treat start_page >= end_page as an empty range. + * Writers publish a new non-empty range by storing i_dirty_end_page before + * i_dirty_start_page. + */ +static inline bool jbd2_jinode_get_dirty_range(const struct jbd2_inode *jinode, + loff_t *start, loff_t *end) +{ + pgoff_t start_page = READ_ONCE(jinode->i_dirty_start_page); + pgoff_t end_page = READ_ONCE(jinode->i_dirty_end_page); + + if (start_page >= end_page) + return false; + + *start = (loff_t)start_page << PAGE_SHIFT; + *end = ((loff_t)end_page << PAGE_SHIFT) - 1; + return true; +} + struct jbd2_revoke_table_s; /** diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index fdef2c155c27..bbd57061802c 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -67,10 +67,6 @@ extern void register_refined_jiffies(long clock_tick_rate); /* USER_TICK_USEC is the time between ticks in usec assuming fake USER_HZ */ #define USER_TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ) -#ifndef __jiffy_arch_data -#define __jiffy_arch_data -#endif - /* * The 64-bit value is not atomic on 32-bit systems - you MUST NOT read it * without sampling the sequence number in jiffies_lock. @@ -83,7 +79,7 @@ extern void register_refined_jiffies(long clock_tick_rate); * See arch/ARCH/kernel/vmlinux.lds.S */ extern u64 __cacheline_aligned_in_smp jiffies_64; -extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffies; +extern unsigned long volatile __cacheline_aligned_in_smp jiffies; #if (BITS_PER_LONG < 64) u64 get_jiffies_64(void); @@ -434,8 +430,44 @@ extern unsigned long preset_lpj; /* * Convert various time units to each other: */ -extern unsigned int jiffies_to_msecs(const unsigned long j); -extern unsigned int jiffies_to_usecs(const unsigned long j); + +#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) +/** + * jiffies_to_msecs - Convert jiffies to milliseconds + * @j: jiffies value + * + * This inline version takes care of HZ in {100,250,1000}. + * + * Return: milliseconds value + */ +static inline unsigned int jiffies_to_msecs(const unsigned long j) +{ + return (MSEC_PER_SEC / HZ) * j; +} +#else +unsigned int jiffies_to_msecs(const unsigned long j); +#endif + +#if !(USEC_PER_SEC % HZ) +/** + * jiffies_to_usecs - Convert jiffies to microseconds + * @j: jiffies value + * + * Return: microseconds value + */ +static inline unsigned int jiffies_to_usecs(const unsigned long j) +{ + /* + * Hz usually doesn't go much further MSEC_PER_SEC. + * jiffies_to_usecs() and usecs_to_jiffies() depend on that. + */ + BUILD_BUG_ON(HZ > USEC_PER_SEC); + + return (USEC_PER_SEC / HZ) * j; +} +#else +unsigned int jiffies_to_usecs(const unsigned long j); +#endif /** * jiffies_to_nsecs - Convert jiffies to nanoseconds diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index fdb79dd1ebd8..b9c7b0ebf7b9 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -87,13 +87,6 @@ struct static_key { atomic_t enabled; #ifdef CONFIG_JUMP_LABEL /* - * Note: - * To make anonymous unions work with old compilers, the static - * initialization of them requires brackets. This creates a dependency - * on the order of the struct with the initializers. If any fields - * are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need - * to be modified. - * * bit 0 => 1 if key is initially true * 0 if initially false * bit 1 => 1 if points to struct static_key_mod @@ -238,19 +231,12 @@ extern void static_key_enable_cpuslocked(struct static_key *key); extern void static_key_disable_cpuslocked(struct static_key *key); extern enum jump_label_type jump_label_init_type(struct jump_entry *entry); -/* - * We should be using ATOMIC_INIT() for initializing .enabled, but - * the inclusion of atomic.h is problematic for inclusion of jump_label.h - * in 'low-level' headers. Thus, we are initializing .enabled with a - * raw value, but have added a BUILD_BUG_ON() to catch any issues in - * jump_label_init() see: kernel/jump_label.c. - */ #define STATIC_KEY_INIT_TRUE \ - { .enabled = { 1 }, \ - { .type = JUMP_TYPE_TRUE } } + { .enabled = ATOMIC_INIT(1), \ + .type = JUMP_TYPE_TRUE } #define STATIC_KEY_INIT_FALSE \ - { .enabled = { 0 }, \ - { .type = JUMP_TYPE_FALSE } } + { .enabled = ATOMIC_INIT(0), \ + .type = JUMP_TYPE_FALSE } #else /* !CONFIG_JUMP_LABEL */ diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 338a1921a50a..bf233bde68c7 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -352,8 +352,8 @@ bool __kasan_mempool_poison_object(void *ptr, unsigned long ip); * kasan_mempool_unpoison_object(). * * This function operates on all slab allocations including large kmalloc - * allocations (the ones returned by kmalloc_large() or by kmalloc() with the - * size > KMALLOC_MAX_SIZE). + * allocations (i.e. the ones backed directly by the buddy allocator rather + * than kmalloc slab caches). * * Return: true if the allocation can be safely reused; false otherwise. */ @@ -381,8 +381,8 @@ void __kasan_mempool_unpoison_object(void *ptr, size_t size, unsigned long ip); * original tags based on the pointer value. * * This function operates on all slab allocations including large kmalloc - * allocations (the ones returned by kmalloc_large() or by kmalloc() with the - * size > KMALLOC_MAX_SIZE). + * allocations (i.e. the ones backed directly by the buddy allocator rather + * than kmalloc slab caches). */ static __always_inline void kasan_mempool_unpoison_object(void *ptr, size_t size) diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index b5a5f32fdfd1..e21b2f7f4159 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -23,6 +23,7 @@ struct file; struct dentry; struct iattr; +struct ns_common; struct seq_file; struct vm_area_struct; struct vm_operations_struct; @@ -99,8 +100,6 @@ enum kernfs_node_type { #define KERNFS_TYPE_MASK 0x000f #define KERNFS_FLAG_MASK ~KERNFS_TYPE_MASK -#define KERNFS_MAX_USER_XATTRS 128 -#define KERNFS_USER_XATTR_SIZE_LIMIT (128 << 10) enum kernfs_node_flag { KERNFS_ACTIVATED = 0x0010, @@ -209,7 +208,7 @@ struct kernfs_node { struct rb_node rb; - const void *ns; /* namespace tag */ + const struct ns_common *ns; /* namespace tag */ unsigned int hash; /* ns + name hash */ unsigned short flags; umode_t mode; @@ -331,7 +330,7 @@ struct kernfs_ops { */ struct kernfs_fs_context { struct kernfs_root *root; /* Root of the hierarchy being mounted */ - void *ns_tag; /* Namespace tag of the mount (or NULL) */ + struct ns_common *ns_tag; /* Namespace tag of the mount (or NULL) */ unsigned long magic; /* File system specific magic number */ /* The following are set/used by kernfs_mount() */ @@ -406,9 +405,11 @@ void pr_cont_kernfs_name(struct kernfs_node *kn); void pr_cont_kernfs_path(struct kernfs_node *kn); struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn); struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, - const char *name, const void *ns); + const char *name, + const struct ns_common *ns); struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, - const char *path, const void *ns); + const char *path, + const struct ns_common *ns); void kernfs_get(struct kernfs_node *kn); void kernfs_put(struct kernfs_node *kn); @@ -426,7 +427,8 @@ unsigned int kernfs_root_flags(struct kernfs_node *kn); struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, - void *priv, const void *ns); + void *priv, + const struct ns_common *ns); struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, const char *name); struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, @@ -434,7 +436,8 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, kuid_t uid, kgid_t gid, loff_t size, const struct kernfs_ops *ops, - void *priv, const void *ns, + void *priv, + const struct ns_common *ns, struct lock_class_key *key); struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, const char *name, @@ -446,9 +449,9 @@ void kernfs_break_active_protection(struct kernfs_node *kn); void kernfs_unbreak_active_protection(struct kernfs_node *kn); bool kernfs_remove_self(struct kernfs_node *kn); int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, - const void *ns); + const struct ns_common *ns); int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, - const char *new_name, const void *new_ns); + const char *new_name, const struct ns_common *new_ns); int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr); __poll_t kernfs_generic_poll(struct kernfs_open_file *of, struct poll_table_struct *pt); @@ -459,7 +462,7 @@ int kernfs_xattr_get(struct kernfs_node *kn, const char *name, int kernfs_xattr_set(struct kernfs_node *kn, const char *name, const void *value, size_t size, int flags); -const void *kernfs_super_ns(struct super_block *sb); +const struct ns_common *kernfs_super_ns(struct super_block *sb); int kernfs_get_tree(struct fs_context *fc); void kernfs_free_fs_context(struct fs_context *fc); void kernfs_kill_sb(struct super_block *sb); @@ -494,11 +497,11 @@ static inline struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) static inline struct kernfs_node * kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, - const void *ns) + const struct ns_common *ns) { return NULL; } static inline struct kernfs_node * kernfs_walk_and_get_ns(struct kernfs_node *parent, const char *path, - const void *ns) + const struct ns_common *ns) { return NULL; } static inline void kernfs_get(struct kernfs_node *kn) { } @@ -526,14 +529,15 @@ static inline unsigned int kernfs_root_flags(struct kernfs_node *kn) static inline struct kernfs_node * kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, - void *priv, const void *ns) + void *priv, const struct ns_common *ns) { return ERR_PTR(-ENOSYS); } static inline struct kernfs_node * __kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, loff_t size, const struct kernfs_ops *ops, - void *priv, const void *ns, struct lock_class_key *key) + void *priv, const struct ns_common *ns, + struct lock_class_key *key) { return ERR_PTR(-ENOSYS); } static inline struct kernfs_node * @@ -549,12 +553,14 @@ static inline bool kernfs_remove_self(struct kernfs_node *kn) { return false; } static inline int kernfs_remove_by_name_ns(struct kernfs_node *kn, - const char *name, const void *ns) + const char *name, + const struct ns_common *ns) { return -ENOSYS; } static inline int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, - const char *new_name, const void *new_ns) + const char *new_name, + const struct ns_common *new_ns) { return -ENOSYS; } static inline int kernfs_setattr(struct kernfs_node *kn, @@ -575,7 +581,7 @@ static inline int kernfs_xattr_set(struct kernfs_node *kn, const char *name, const void *value, size_t size, int flags) { return -ENOSYS; } -static inline const void *kernfs_super_ns(struct super_block *sb) +static inline const struct ns_common *kernfs_super_ns(struct super_block *sb) { return NULL; } static inline int kernfs_get_tree(struct fs_context *fc) diff --git a/include/linux/kho/abi/kexec_handover.h b/include/linux/kho/abi/kexec_handover.h index 2201a0d2c159..6b7d8ef550f9 100644 --- a/include/linux/kho/abi/kexec_handover.h +++ b/include/linux/kho/abi/kexec_handover.h @@ -10,8 +10,13 @@ #ifndef _LINUX_KHO_ABI_KEXEC_HANDOVER_H #define _LINUX_KHO_ABI_KEXEC_HANDOVER_H +#include <linux/bits.h> +#include <linux/log2.h> +#include <linux/math.h> #include <linux/types.h> +#include <asm/page.h> + /** * DOC: Kexec Handover ABI * @@ -29,32 +34,32 @@ * compatibility is only guaranteed for kernels supporting the same ABI version. * * FDT Structure Overview: - * The FDT serves as a central registry for physical - * addresses of preserved data structures and sub-FDTs. The first kernel - * populates this FDT with references to memory regions and other FDTs that - * need to persist across the kexec transition. The subsequent kernel then - * parses this FDT to locate and restore the preserved data.:: + * The FDT serves as a central registry for physical addresses of preserved + * data structures. The first kernel populates this FDT with references to + * memory regions and other metadata that need to persist across the kexec + * transition. The subsequent kernel then parses this FDT to locate and + * restore the preserved data.:: * * / { - * compatible = "kho-v1"; + * compatible = "kho-v2"; * * preserved-memory-map = <0x...>; * * <subnode-name-1> { - * fdt = <0x...>; + * preserved-data = <0x...>; * }; * * <subnode-name-2> { - * fdt = <0x...>; + * preserved-data = <0x...>; * }; * ... ... * <subnode-name-N> { - * fdt = <0x...>; + * preserved-data = <0x...>; * }; * }; * * Root KHO Node (/): - * - compatible: "kho-v1" + * - compatible: "kho-v2" * * Indentifies the overall KHO ABI version. * @@ -69,20 +74,20 @@ * is provided by the subsystem that uses KHO for preserving its * data. * - * - fdt: u64 + * - preserved-data: u64 * - * Physical address pointing to a subnode FDT blob that is also + * Physical address pointing to a subnode data blob that is also * being preserved. */ /* The compatible string for the KHO FDT root node. */ -#define KHO_FDT_COMPATIBLE "kho-v1" +#define KHO_FDT_COMPATIBLE "kho-v2" /* The FDT property for the preserved memory map. */ #define KHO_FDT_MEMORY_MAP_PROP_NAME "preserved-memory-map" -/* The FDT property for sub-FDTs. */ -#define KHO_FDT_SUB_TREE_PROP_NAME "fdt" +/* The FDT property for preserved data blobs. */ +#define KHO_FDT_SUB_TREE_PROP_NAME "preserved-data" /** * DOC: Kexec Handover ABI for vmalloc Preservation @@ -160,4 +165,113 @@ struct kho_vmalloc { unsigned short order; }; +/** + * DOC: KHO persistent memory tracker + * + * KHO tracks preserved memory using a radix tree data structure. Each node of + * the tree is exactly a single page. The leaf nodes are bitmaps where each set + * bit is a preserved page of any order. The intermediate nodes are tables of + * physical addresses that point to a lower level node. + * + * The tree hierarchy is shown below:: + * + * root + * +-------------------+ + * | Level 5 | (struct kho_radix_node) + * +-------------------+ + * | + * v + * +-------------------+ + * | Level 4 | (struct kho_radix_node) + * +-------------------+ + * | + * | ... (intermediate levels) + * | + * v + * +-------------------+ + * | Level 0 | (struct kho_radix_leaf) + * +-------------------+ + * + * The tree is traversed using a key that encodes the page's physical address + * (pa) and its order into a single unsigned long value. The encoded key value + * is composed of two parts: the 'order bit' in the upper part and the + * 'shifted physical address' in the lower part.:: + * + * +------------+-----------------------------+--------------------------+ + * | Page Order | Order Bit | Shifted Physical Address | + * +------------+-----------------------------+--------------------------+ + * | 0 | ...000100 ... (at bit 52) | pa >> (PAGE_SHIFT + 0) | + * | 1 | ...000010 ... (at bit 51) | pa >> (PAGE_SHIFT + 1) | + * | 2 | ...000001 ... (at bit 50) | pa >> (PAGE_SHIFT + 2) | + * | ... | ... | ... | + * +------------+-----------------------------+--------------------------+ + * + * Shifted Physical Address: + * The 'shifted physical address' is the physical address normalized for its + * order. It effectively represents the PFN shifted right by the order. + * + * Order Bit: + * The 'order bit' encodes the page order by setting a single bit at a + * specific position. The position of this bit itself represents the order. + * + * For instance, on a 64-bit system with 4KB pages (PAGE_SHIFT = 12), the + * maximum range for the shifted physical address (for order 0) is 52 bits + * (64 - 12). This address occupies bits [0-51]. For order 0, the order bit is + * set at position 52. + * + * The following diagram illustrates how the encoded key value is split into + * indices for the tree levels, with PAGE_SIZE of 4KB:: + * + * 63:60 59:51 50:42 41:33 32:24 23:15 14:0 + * +---------+--------+--------+--------+--------+--------+-----------------+ + * | 0 | Lv 5 | Lv 4 | Lv 3 | Lv 2 | Lv 1 | Lv 0 (bitmap) | + * +---------+--------+--------+--------+--------+--------+-----------------+ + * + * The radix tree stores pages of all orders in a single 6-level hierarchy. It + * efficiently shares higher tree levels, especially due to common zero top + * address bits, allowing a single, efficient algorithm to manage all + * pages. This bitmap approach also offers memory efficiency; for example, a + * 512KB bitmap can cover a 16GB memory range for 0-order pages with PAGE_SIZE = + * 4KB. + * + * The data structures defined here are part of the KHO ABI. Any modification + * to these structures that breaks backward compatibility must be accompanied by + * an update to the "compatible" string. This ensures that a newer kernel can + * correctly interpret the data passed by an older kernel. + */ + +/* + * Defines constants for the KHO radix tree structure, used to track preserved + * memory. These constants govern the indexing, sizing, and depth of the tree. + */ +enum kho_radix_consts { + /* + * The bit position of the order bit (and also the length of the + * shifted physical address) for an order-0 page. + */ + KHO_ORDER_0_LOG2 = 64 - PAGE_SHIFT, + + /* Size of the table in kho_radix_node, in log2 */ + KHO_TABLE_SIZE_LOG2 = const_ilog2(PAGE_SIZE / sizeof(phys_addr_t)), + + /* Number of bits in the kho_radix_leaf bitmap, in log2 */ + KHO_BITMAP_SIZE_LOG2 = PAGE_SHIFT + const_ilog2(BITS_PER_BYTE), + + /* + * The total tree depth is the number of intermediate levels + * and 1 bitmap level. + */ + KHO_TREE_MAX_DEPTH = + DIV_ROUND_UP(KHO_ORDER_0_LOG2 - KHO_BITMAP_SIZE_LOG2, + KHO_TABLE_SIZE_LOG2) + 1, +}; + +struct kho_radix_node { + u64 table[1 << KHO_TABLE_SIZE_LOG2]; +}; + +struct kho_radix_leaf { + DECLARE_BITMAP(bitmap, 1 << KHO_BITMAP_SIZE_LOG2); +}; + #endif /* _LINUX_KHO_ABI_KEXEC_HANDOVER_H */ diff --git a/include/linux/kho/abi/memfd.h b/include/linux/kho/abi/memfd.h index 68cb6303b846..08b10fea2afc 100644 --- a/include/linux/kho/abi/memfd.h +++ b/include/linux/kho/abi/memfd.h @@ -56,10 +56,24 @@ struct memfd_luo_folio_ser { u64 index; } __packed; +/* + * The set of seals this version supports preserving. If support for any new + * seals is needed, add it here and bump version. + */ +#define MEMFD_LUO_ALL_SEALS (F_SEAL_SEAL | \ + F_SEAL_SHRINK | \ + F_SEAL_GROW | \ + F_SEAL_WRITE | \ + F_SEAL_FUTURE_WRITE | \ + F_SEAL_EXEC) + /** * struct memfd_luo_ser - Main serialization structure for a memfd. * @pos: The file's current position (f_pos). * @size: The total size of the file in bytes (i_size). + * @seals: The seals present on the memfd. The seals are uABI so it is safe + * to directly use them in the ABI. + * @flags: Flags for the file. Unused flag bits must be set to 0. * @nr_folios: Number of folios in the folios array. * @folios: KHO vmalloc descriptor pointing to the array of * struct memfd_luo_folio_ser. @@ -67,11 +81,13 @@ struct memfd_luo_folio_ser { struct memfd_luo_ser { u64 pos; u64 size; + u32 seals; + u32 flags; u64 nr_folios; struct kho_vmalloc folios; } __packed; /* The compatibility string for memfd file handler */ -#define MEMFD_LUO_FH_COMPATIBLE "memfd-v1" +#define MEMFD_LUO_FH_COMPATIBLE "memfd-v2" #endif /* _LINUX_KHO_ABI_MEMFD_H */ diff --git a/include/linux/kho_radix_tree.h b/include/linux/kho_radix_tree.h new file mode 100644 index 000000000000..84e918b96e53 --- /dev/null +++ b/include/linux/kho_radix_tree.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_KHO_RADIX_TREE_H +#define _LINUX_KHO_RADIX_TREE_H + +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/mutex_types.h> +#include <linux/types.h> + +/** + * DOC: Kexec Handover Radix Tree + * + * This is a radix tree implementation for tracking physical memory pages + * across kexec transitions. It was developed for the KHO mechanism but is + * designed for broader use by any subsystem that needs to preserve pages. + * + * The radix tree is a multi-level tree where leaf nodes are bitmaps + * representing individual pages. To allow pages of different sizes (orders) + * to be stored efficiently in a single tree, it uses a unique key encoding + * scheme. Each key is an unsigned long that combines a page's physical + * address and its order. + * + * Client code is responsible for allocating the root node of the tree, + * initializing the mutex lock, and managing its lifecycle. It must use the + * tree data structures defined in the KHO ABI, + * `include/linux/kho/abi/kexec_handover.h`. + */ + +struct kho_radix_node; + +struct kho_radix_tree { + struct kho_radix_node *root; + struct mutex lock; /* protects the tree's structure and root pointer */ +}; + +typedef int (*kho_radix_tree_walk_callback_t)(phys_addr_t phys, + unsigned int order); + +#ifdef CONFIG_KEXEC_HANDOVER + +int kho_radix_add_page(struct kho_radix_tree *tree, unsigned long pfn, + unsigned int order); + +void kho_radix_del_page(struct kho_radix_tree *tree, unsigned long pfn, + unsigned int order); + +int kho_radix_walk_tree(struct kho_radix_tree *tree, + kho_radix_tree_walk_callback_t cb); + +#else /* #ifdef CONFIG_KEXEC_HANDOVER */ + +static inline int kho_radix_add_page(struct kho_radix_tree *tree, long pfn, + unsigned int order) +{ + return -EOPNOTSUPP; +} + +static inline void kho_radix_del_page(struct kho_radix_tree *tree, + unsigned long pfn, unsigned int order) { } + +static inline int kho_radix_walk_tree(struct kho_radix_tree *tree, + kho_radix_tree_walk_callback_t cb) +{ + return -EOPNOTSUPP; +} + +#endif /* #ifdef CONFIG_KEXEC_HANDOVER */ + +#endif /* _LINUX_KHO_RADIX_TREE_H */ diff --git a/include/linux/kobject.h b/include/linux/kobject.h index c8219505a79f..bcb5d4e32001 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -109,7 +109,7 @@ struct kobject *kobject_get(struct kobject *kobj); struct kobject * __must_check kobject_get_unless_zero(struct kobject *kobj); void kobject_put(struct kobject *kobj); -const void *kobject_namespace(const struct kobject *kobj); +const struct ns_common *kobject_namespace(const struct kobject *kobj); void kobject_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid); char *kobject_get_path(const struct kobject *kobj, gfp_t flag); @@ -118,7 +118,7 @@ struct kobj_type { const struct sysfs_ops *sysfs_ops; const struct attribute_group **default_groups; const struct kobj_ns_type_operations *(*child_ns_type)(const struct kobject *kobj); - const void *(*namespace)(const struct kobject *kobj); + const struct ns_common *(*namespace)(const struct kobject *kobj); void (*get_ownership)(const struct kobject *kobj, kuid_t *uid, kgid_t *gid); }; diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h index 150fe2ae1b6b..4f0990e09b93 100644 --- a/include/linux/kobject_ns.h +++ b/include/linux/kobject_ns.h @@ -16,6 +16,7 @@ #ifndef _LINUX_KOBJECT_NS_H #define _LINUX_KOBJECT_NS_H +struct ns_common; struct sock; struct kobject; @@ -39,10 +40,10 @@ enum kobj_ns_type { struct kobj_ns_type_operations { enum kobj_ns_type type; bool (*current_may_mount)(void); - void *(*grab_current_ns)(void); - const void *(*netlink_ns)(struct sock *sk); - const void *(*initial_ns)(void); - void (*drop_ns)(void *); + struct ns_common *(*grab_current_ns)(void); + const struct ns_common *(*netlink_ns)(struct sock *sk); + const struct ns_common *(*initial_ns)(void); + void (*drop_ns)(struct ns_common *); }; int kobj_ns_type_register(const struct kobj_ns_type_operations *ops); @@ -51,7 +52,7 @@ const struct kobj_ns_type_operations *kobj_child_ns_ops(const struct kobject *pa const struct kobj_ns_type_operations *kobj_ns_ops(const struct kobject *kobj); bool kobj_ns_current_may_mount(enum kobj_ns_type type); -void *kobj_ns_grab_current(enum kobj_ns_type type); -void kobj_ns_drop(enum kobj_ns_type type, void *ns); +struct ns_common *kobj_ns_grab_current(enum kobj_ns_type type); +void kobj_ns_drop(enum kobj_ns_type type, struct ns_common *ns); #endif /* _LINUX_KOBJECT_NS_H */ diff --git a/include/linux/ksm.h b/include/linux/ksm.h index c982694c987b..d39d0d5483a2 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -17,8 +17,8 @@ #ifdef CONFIG_KSM int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, vm_flags_t *vm_flags); -vm_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file, - vm_flags_t vm_flags); +vma_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file, + vma_flags_t vma_flags); int ksm_enable_merge_any(struct mm_struct *mm); int ksm_disable_merge_any(struct mm_struct *mm); int ksm_disable(struct mm_struct *mm); @@ -103,10 +103,10 @@ bool ksm_process_mergeable(struct mm_struct *mm); #else /* !CONFIG_KSM */ -static inline vm_flags_t ksm_vma_flags(struct mm_struct *mm, - const struct file *file, vm_flags_t vm_flags) +static inline vma_flags_t ksm_vma_flags(struct mm_struct *mm, + const struct file *file, vma_flags_t vma_flags) { - return vm_flags; + return vma_flags; } static inline int ksm_disable(struct mm_struct *mm) diff --git a/include/linux/ksysfs.h b/include/linux/ksysfs.h new file mode 100644 index 000000000000..c7dc6e18f28e --- /dev/null +++ b/include/linux/ksysfs.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _KSYSFS_H_ +#define _KSYSFS_H_ + +void ksysfs_init(void); + +#endif /* _KSYSFS_H_ */ diff --git a/include/linux/kthread.h b/include/linux/kthread.h index c92c1149ee6e..a01a474719a7 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -7,6 +7,24 @@ struct mm_struct; +/* opaque kthread data */ +struct kthread; + +/* + * When "(p->flags & PF_KTHREAD)" is set the task is a kthread and will + * always remain a kthread. For kthreads p->worker_private always + * points to a struct kthread. For tasks that are not kthreads + * p->worker_private is used to point to other things. + * + * Return NULL for any task that is not a kthread. + */ +static inline struct kthread *tsk_is_kthread(struct task_struct *p) +{ + if (p->flags & PF_KTHREAD) + return p->worker_private; + return NULL; +} + __printf(4, 5) struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), void *data, @@ -98,9 +116,10 @@ void *kthread_probe_data(struct task_struct *k); int kthread_park(struct task_struct *k); void kthread_unpark(struct task_struct *k); void kthread_parkme(void); -void kthread_exit(long result) __noreturn; +#define kthread_exit(result) do_exit(result) void kthread_complete_and_exit(struct completion *, long) __noreturn; int kthreads_update_housekeeping(void); +void kthread_do_exit(struct kthread *, long); int kthreadd(void *unused); extern struct task_struct *kthreadd_task; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index dde605cb894e..4c14aee1fb06 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -253,7 +253,6 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif -#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER union kvm_mmu_notifier_arg { unsigned long attributes; }; @@ -275,7 +274,6 @@ struct kvm_gfn_range { bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); -#endif enum { OUTSIDE_GUEST_MODE, @@ -320,7 +318,8 @@ static inline bool kvm_vcpu_can_poll(ktime_t cur, ktime_t stop) struct kvm_mmio_fragment { gpa_t gpa; void *data; - unsigned len; + u64 val; + unsigned int len; }; struct kvm_vcpu { @@ -849,13 +848,12 @@ struct kvm { struct hlist_head irq_ack_notifier_list; #endif -#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER struct mmu_notifier mmu_notifier; unsigned long mmu_invalidate_seq; long mmu_invalidate_in_progress; gfn_t mmu_invalidate_range_start; gfn_t mmu_invalidate_range_end; -#endif + struct list_head devices; u64 manual_dirty_log_protect; struct dentry *debugfs_dentry; @@ -1032,6 +1030,13 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) return NULL; } +static inline bool kvm_is_vcpu_creation_in_progress(struct kvm *kvm) +{ + lockdep_assert_held(&kvm->lock); + + return kvm->created_vcpus != atomic_read(&kvm->online_vcpus); +} + void kvm_destroy_vcpus(struct kvm *kvm); int kvm_trylock_all_vcpus(struct kvm *kvm); @@ -1631,6 +1636,13 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {} #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING /* + * kvm_arch_shutdown() is invoked immediately prior to forcefully disabling + * hardware virtualization on all CPUs via IPI function calls (in preparation + * for shutdown or reboot), e.g. to allow arch code to prepare for disabling + * virtualization while KVM may be actively running vCPUs. + */ +void kvm_arch_shutdown(void); +/* * kvm_arch_{enable,disable}_virtualization() are called on one CPU, under * kvm_usage_lock, immediately after/before 0=>1 and 1=>0 transitions of * kvm_usage_count, i.e. at the beginning of the generic hardware enabling @@ -1943,56 +1955,43 @@ enum kvm_stat_kind { struct kvm_stat_data { struct kvm *kvm; - const struct _kvm_stats_desc *desc; + const struct kvm_stats_desc *desc; enum kvm_stat_kind kind; }; -struct _kvm_stats_desc { - struct kvm_stats_desc desc; - char name[KVM_STATS_NAME_SIZE]; -}; - -#define STATS_DESC_COMMON(type, unit, base, exp, sz, bsz) \ - .flags = type | unit | base | \ - BUILD_BUG_ON_ZERO(type & ~KVM_STATS_TYPE_MASK) | \ - BUILD_BUG_ON_ZERO(unit & ~KVM_STATS_UNIT_MASK) | \ - BUILD_BUG_ON_ZERO(base & ~KVM_STATS_BASE_MASK), \ - .exponent = exp, \ - .size = sz, \ +#define STATS_DESC_COMMON(type, unit, base, exp, sz, bsz) \ + .flags = type | unit | base | \ + BUILD_BUG_ON_ZERO(type & ~KVM_STATS_TYPE_MASK) | \ + BUILD_BUG_ON_ZERO(unit & ~KVM_STATS_UNIT_MASK) | \ + BUILD_BUG_ON_ZERO(base & ~KVM_STATS_BASE_MASK), \ + .exponent = exp, \ + .size = sz, \ .bucket_size = bsz -#define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ - { \ - { \ - STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ - .offset = offsetof(struct kvm_vm_stat, generic.stat) \ - }, \ - .name = #stat, \ - } -#define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ - { \ - { \ - STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ - .offset = offsetof(struct kvm_vcpu_stat, generic.stat) \ - }, \ - .name = #stat, \ - } -#define VM_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ - { \ - { \ - STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ - .offset = offsetof(struct kvm_vm_stat, stat) \ - }, \ - .name = #stat, \ - } -#define VCPU_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ - { \ - { \ - STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ - .offset = offsetof(struct kvm_vcpu_stat, stat) \ - }, \ - .name = #stat, \ - } +#define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ +{ \ + STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ + .offset = offsetof(struct kvm_vm_stat, generic.stat), \ + .name = #stat, \ +} +#define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ +{ \ + STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ + .offset = offsetof(struct kvm_vcpu_stat, generic.stat), \ + .name = #stat, \ +} +#define VM_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ +{ \ + STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ + .offset = offsetof(struct kvm_vm_stat, stat), \ + .name = #stat, \ +} +#define VCPU_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ +{ \ + STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ + .offset = offsetof(struct kvm_vcpu_stat, stat), \ + .name = #stat, \ +} /* SCOPE: VM, VM_GENERIC, VCPU, VCPU_GENERIC */ #define STATS_DESC(SCOPE, stat, type, unit, base, exp, sz, bsz) \ SCOPE##_STATS_DESC(stat, type, unit, base, exp, sz, bsz) @@ -2069,7 +2068,7 @@ struct _kvm_stats_desc { STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking) ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header, - const struct _kvm_stats_desc *desc, + const struct kvm_stats_desc *desc, void *stats, size_t size_stats, char __user *user_buffer, size_t size, loff_t *offset); @@ -2114,11 +2113,10 @@ static inline void kvm_stats_log_hist_update(u64 *data, size_t size, u64 value) extern const struct kvm_stats_header kvm_vm_stats_header; -extern const struct _kvm_stats_desc kvm_vm_stats_desc[]; +extern const struct kvm_stats_desc kvm_vm_stats_desc[]; extern const struct kvm_stats_header kvm_vcpu_stats_header; -extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[]; +extern const struct kvm_stats_desc kvm_vcpu_stats_desc[]; -#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq) { if (unlikely(kvm->mmu_invalidate_in_progress)) @@ -2196,7 +2194,6 @@ static inline bool mmu_invalidate_retry_gfn_unsafe(struct kvm *kvm, return READ_ONCE(kvm->mmu_invalidate_seq) != mmu_seq; } -#endif #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING @@ -2318,7 +2315,6 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING extern bool enable_virt_at_load; -extern bool kvm_rebooting; #endif extern unsigned int halt_poll_ns; @@ -2384,6 +2380,7 @@ void kvm_unregister_device_ops(u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_arm_vgic_v2_ops; extern struct kvm_device_ops kvm_arm_vgic_v3_ops; +extern struct kvm_device_ops kvm_arm_vgic_v5_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT @@ -2612,12 +2609,4 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, struct kvm_pre_fault_memory *range); #endif -#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING -int kvm_enable_virtualization(void); -void kvm_disable_virtualization(void); -#else -static inline int kvm_enable_virtualization(void) { return 0; } -static inline void kvm_disable_virtualization(void) { } -#endif - #endif diff --git a/include/linux/leafops.h b/include/linux/leafops.h index a9ff94b744f2..992cd8bd8ed0 100644 --- a/include/linux/leafops.h +++ b/include/linux/leafops.h @@ -363,6 +363,23 @@ static inline unsigned long softleaf_to_pfn(softleaf_t entry) return swp_offset(entry) & SWP_PFN_MASK; } +static inline void softleaf_migration_sync(softleaf_t entry, + struct folio *folio) +{ + /* + * Ensure we do not race with split, which might alter tail pages into new + * folios and thus result in observing an unlocked folio. + * This matches the write barrier in __split_folio_to_order(). + */ + smp_rmb(); + + /* + * Any use of migration entries may only occur while the + * corresponding page is locked + */ + VM_WARN_ON_ONCE(!folio_test_locked(folio)); +} + /** * softleaf_to_page() - Obtains struct page for PFN encoded within leaf entry. * @entry: Leaf entry, softleaf_has_pfn(@entry) must return true. @@ -374,11 +391,8 @@ static inline struct page *softleaf_to_page(softleaf_t entry) struct page *page = pfn_to_page(softleaf_to_pfn(entry)); VM_WARN_ON_ONCE(!softleaf_has_pfn(entry)); - /* - * Any use of migration entries may only occur while the - * corresponding page is locked - */ - VM_WARN_ON_ONCE(softleaf_is_migration(entry) && !PageLocked(page)); + if (softleaf_is_migration(entry)) + softleaf_migration_sync(entry, page_folio(page)); return page; } @@ -394,12 +408,8 @@ static inline struct folio *softleaf_to_folio(softleaf_t entry) struct folio *folio = pfn_folio(softleaf_to_pfn(entry)); VM_WARN_ON_ONCE(!softleaf_has_pfn(entry)); - /* - * Any use of migration entries may only occur while the - * corresponding folio is locked. - */ - VM_WARN_ON_ONCE(softleaf_is_migration(entry) && - !folio_test_locked(folio)); + if (softleaf_is_migration(entry)) + softleaf_migration_sync(entry, folio); return folio; } @@ -597,7 +607,20 @@ static inline bool pmd_is_migration_entry(pmd_t pmd) } /** - * pmd_is_valid_softleaf() - Is this PMD entry a valid leaf entry? + * softleaf_is_valid_pmd_entry() - Is the specified softleaf entry obtained from + * a PMD one that we support at PMD level? + * @entry: Entry to check. + * Returns: true if the softleaf entry is valid at PMD, otherwise false. + */ +static inline bool softleaf_is_valid_pmd_entry(softleaf_t entry) +{ + /* Only device private, migration entries valid for PMD. */ + return softleaf_is_device_private(entry) || + softleaf_is_migration(entry); +} + +/** + * pmd_is_valid_softleaf() - Is this PMD entry a valid softleaf entry? * @pmd: PMD entry. * * PMD leaf entries are valid only if they are device private or migration @@ -610,9 +633,27 @@ static inline bool pmd_is_valid_softleaf(pmd_t pmd) { const softleaf_t entry = softleaf_from_pmd(pmd); - /* Only device private, migration entries valid for PMD. */ - return softleaf_is_device_private(entry) || - softleaf_is_migration(entry); + return softleaf_is_valid_pmd_entry(entry); +} + +/** + * pmd_to_softleaf_folio() - Convert the PMD entry to a folio. + * @pmd: PMD entry. + * + * The PMD entry is expected to be a valid PMD softleaf entry. + * + * Returns: the folio the softleaf entry references if this is a valid softleaf + * entry, otherwise NULL. + */ +static inline struct folio *pmd_to_softleaf_folio(pmd_t pmd) +{ + const softleaf_t entry = softleaf_from_pmd(pmd); + + if (!softleaf_is_valid_pmd_entry(entry)) { + VM_WARN_ON_ONCE(true); + return NULL; + } + return softleaf_to_folio(entry); } #endif /* CONFIG_MMU */ diff --git a/include/linux/libata.h b/include/linux/libata.h index 00346ce3af5e..5c085ef4eda7 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1205,7 +1205,6 @@ extern unsigned int ata_do_dev_read_id(struct ata_device *dev, struct ata_taskfile *tf, __le16 *id); extern void ata_qc_complete(struct ata_queued_cmd *qc); extern u64 ata_qc_get_active(struct ata_port *ap); -extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd); extern int ata_std_bios_param(struct scsi_device *sdev, struct gendisk *unused, sector_t capacity, int geom[]); @@ -1226,7 +1225,8 @@ extern int ata_ncq_prio_enable(struct ata_port *ap, struct scsi_device *sdev, extern struct ata_device *ata_dev_pair(struct ata_device *adev); int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev); extern void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap); -extern void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, struct list_head *eh_q); +int ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, + struct list_head *eh_q); /* * SATA specific code - drivers/ata/libata-sata.c diff --git a/include/linux/lis3lv02d.h b/include/linux/lis3lv02d.h index b72b8cdba765..feb60ba4e30e 100644 --- a/include/linux/lis3lv02d.h +++ b/include/linux/lis3lv02d.h @@ -30,8 +30,8 @@ * @default_rate: Default sampling rate. 0 means reset default * @setup_resources: Interrupt line setup call back function * @release_resources: Interrupt line release call back function - * @st_min_limits[3]: Selftest acceptance minimum values - * @st_max_limits[3]: Selftest acceptance maximum values + * @st_min_limits: Selftest acceptance minimum values (x, y, z) + * @st_max_limits: Selftest acceptance maximum values (x, y, z) * @irq2: Irq line 2 number * * Platform data is used to setup the sensor chip. Meaning of the different diff --git a/include/linux/liveupdate.h b/include/linux/liveupdate.h index fe82a6c3005f..dd11fdc76a5f 100644 --- a/include/linux/liveupdate.h +++ b/include/linux/liveupdate.h @@ -23,8 +23,11 @@ struct file; /** * struct liveupdate_file_op_args - Arguments for file operation callbacks. * @handler: The file handler being called. - * @retrieved: The retrieve status for the 'can_finish / finish' - * operation. + * @retrieve_status: The retrieve status for the 'can_finish / finish' + * operation. A value of 0 means the retrieve has not been + * attempted, a positive value means the retrieve was + * successful, and a negative value means the retrieve failed, + * and the value is the error code of the call. * @file: The file object. For retrieve: [OUT] The callback sets * this to the new file. For other ops: [IN] The caller sets * this to the file being operated on. @@ -40,7 +43,7 @@ struct file; */ struct liveupdate_file_op_args { struct liveupdate_file_handler *handler; - bool retrieved; + int retrieve_status; struct file *file; u64 serialized_data; void *private_data; diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index eff711bf973f..234be7f12c15 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -315,7 +315,7 @@ do { \ #endif /* CONFIG_PREEMPT_RT */ -#if defined(WARN_CONTEXT_ANALYSIS) +#if defined(WARN_CONTEXT_ANALYSIS) && !defined(__CHECKER__) /* * Because the compiler only knows about the base per-CPU variable, use this * helper function to make the compiler think we lock/unlock the @base variable, diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index 382c56a97bba..584db296e43b 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -94,7 +94,7 @@ struct common_audit_data { #endif char *kmod_name; struct lsm_ioctlop_audit *op; - struct file *file; + const struct file *file; struct lsm_ibpkey_audit *ibpkey; struct lsm_ibendport_audit *ibendport; int reason; diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 8c42b4bde09c..2b8dfb35caed 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -191,6 +191,9 @@ LSM_HOOK(int, 0, file_permission, struct file *file, int mask) LSM_HOOK(int, 0, file_alloc_security, struct file *file) LSM_HOOK(void, LSM_RET_VOID, file_release, struct file *file) LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file) +LSM_HOOK(int, 0, backing_file_alloc, struct file *backing_file, + const struct file *user_file) +LSM_HOOK(void, LSM_RET_VOID, backing_file_free, struct file *backing_file) LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd, unsigned long arg) LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd, @@ -198,6 +201,8 @@ LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd, LSM_HOOK(int, 0, mmap_addr, unsigned long addr) LSM_HOOK(int, 0, mmap_file, struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags) +LSM_HOOK(int, 0, mmap_backing_file, struct vm_area_struct *vma, + struct file *backing_file, struct file *user_file) LSM_HOOK(int, 0, file_mprotect, struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot) LSM_HOOK(int, 0, file_lock, struct file *file, unsigned int cmd) @@ -317,6 +322,11 @@ LSM_HOOK(int, 0, post_notification, const struct cred *w_cred, LSM_HOOK(int, 0, watch_key, struct key *key) #endif /* CONFIG_SECURITY && CONFIG_KEY_NOTIFICATIONS */ +#if defined(CONFIG_SECURITY_NETWORK) && defined(CONFIG_SECURITY_PATH) +LSM_HOOK(int, 0, unix_find, const struct path *path, struct sock *other, + int flags) +#endif /* CONFIG_SECURITY_NETWORK && CONFIG_SECURITY_PATH */ + #ifdef CONFIG_SECURITY_NETWORK LSM_HOOK(int, 0, unix_stream_connect, struct sock *sock, struct sock *other, struct sock *newsk) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index d48bf0ad26f4..b4f8cad53ddb 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -104,6 +104,7 @@ struct security_hook_list { struct lsm_blob_sizes { unsigned int lbs_cred; unsigned int lbs_file; + unsigned int lbs_backing_file; unsigned int lbs_ib; unsigned int lbs_inode; unsigned int lbs_sock; diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 7b8aad47121e..0c464eade1d6 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -139,6 +139,7 @@ enum maple_type { maple_leaf_64, maple_range_64, maple_arange_64, + maple_copy, }; enum store_type { @@ -154,6 +155,46 @@ enum store_type { wr_slot_store, }; +struct maple_copy { + /* + * min, max, and pivots are values + * start, end, split are indexes into arrays + * data is a size + */ + + struct { + struct maple_node *node; + unsigned long max; + enum maple_type mt; + } dst[3]; + struct { + struct maple_node *node; + unsigned long max; + unsigned char start; + unsigned char end; + enum maple_type mt; + } src[4]; + /* Simulated node */ + void __rcu *slot[3]; + unsigned long gap[3]; + unsigned long min; + union { + unsigned long pivot[3]; + struct { + void *_pad[2]; + unsigned long max; + }; + }; + unsigned char end; + + /*Avoid passing these around */ + unsigned char s_count; + unsigned char d_count; + unsigned char split; + unsigned char data; + unsigned char height; +}; + /** * DOC: Maple tree flags * @@ -299,6 +340,7 @@ struct maple_node { }; struct maple_range_64 mr64; struct maple_arange_64 ma64; + struct maple_copy cp; }; }; diff --git a/include/linux/math.h b/include/linux/math.h index 6dc1d1d32fbc..1e8fb3efbc8c 100644 --- a/include/linux/math.h +++ b/include/linux/math.h @@ -89,23 +89,7 @@ } \ ) -/* - * Divide positive or negative dividend by positive or negative divisor - * and round to closest integer. Result is undefined for negative - * divisors if the dividend variable type is unsigned and for negative - * dividends if the divisor variable type is unsigned. - */ -#define DIV_ROUND_CLOSEST(x, divisor)( \ -{ \ - typeof(x) __x = x; \ - typeof(divisor) __d = divisor; \ - (((typeof(x))-1) > 0 || \ - ((typeof(divisor))-1) > 0 || \ - (((__x) > 0) == ((__d) > 0))) ? \ - (((__x) + ((__d) / 2)) / (__d)) : \ - (((__x) - ((__d) / 2)) / (__d)); \ -} \ -) +#define DIV_ROUND_CLOSEST __KERNEL_DIV_ROUND_CLOSEST /* * Same as above but for u64 dividends. divisor must be a 32-bit * number. diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h deleted file mode 100644 index cea443a672cb..000000000000 --- a/include/linux/mdio-gpio.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_MDIO_GPIO_H -#define __LINUX_MDIO_GPIO_H - -#define MDIO_GPIO_MDC 0 -#define MDIO_GPIO_MDIO 1 -#define MDIO_GPIO_MDO 2 - -#endif diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 5d1203b9af20..f4f9d9609448 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -688,8 +688,6 @@ static inline int mdiodev_c45_write(struct mdio_device *mdiodev, u32 devad, val); } -int mdiobus_register_device(struct mdio_device *mdiodev); -int mdiobus_unregister_device(struct mdio_device *mdiodev); bool mdiobus_is_registered_device(struct mii_bus *bus, int addr); struct phy_device *mdiobus_get_phy(struct mii_bus *bus, int addr); diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 6ec5e9ac0699..9eac4f268359 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -155,6 +155,7 @@ int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); int memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size); +int memblock_reserved_mark_kern(phys_addr_t base, phys_addr_t size); int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size); int memblock_clear_kho_scratch(phys_addr_t base, phys_addr_t size); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 70b685a85bf4..5173a9f16721 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -35,10 +35,10 @@ enum memcg_stat_item { MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS, MEMCG_SOCK, MEMCG_PERCPU_B, - MEMCG_VMALLOC, MEMCG_KMEM, MEMCG_ZSWAP_B, MEMCG_ZSWAPPED, + MEMCG_ZSWAP_INCOMP, MEMCG_NR_STAT, }; diff --git a/include/linux/memfd.h b/include/linux/memfd.h index c328a7b356d0..b4fda09dab9f 100644 --- a/include/linux/memfd.h +++ b/include/linux/memfd.h @@ -18,6 +18,8 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx); */ int memfd_check_seals_mmap(struct file *file, vm_flags_t *vm_flags_ptr); struct file *memfd_alloc_file(const char *name, unsigned int flags); +int memfd_get_seals(struct file *file); +int memfd_add_seals(struct file *file, unsigned int seals); #else static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned int a) { @@ -37,6 +39,16 @@ static inline struct file *memfd_alloc_file(const char *name, unsigned int flags { return ERR_PTR(-EINVAL); } + +static inline int memfd_get_seals(struct file *file) +{ + return -EINVAL; +} + +static inline int memfd_add_seals(struct file *file, unsigned int seals) +{ + return -EINVAL; +} #endif #endif /* __LINUX_MEMFD_H */ diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index 96987d9d95a8..7999c58629ee 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -52,7 +52,7 @@ int mt_perf_to_adistance(struct access_coordinate *perf, int *adist); struct memory_dev_type *mt_find_alloc_memory_type(int adist, struct list_head *memory_types); void mt_put_memory_types(struct list_head *memory_types); -#ifdef CONFIG_MIGRATION +#ifdef CONFIG_NUMA_MIGRATION int next_demotion_node(int node, const nodemask_t *allowed_mask); void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); bool node_is_toptier(int node); diff --git a/include/linux/memory.h b/include/linux/memory.h index faeaa921e55b..5bb5599c6b2b 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -19,6 +19,7 @@ #include <linux/node.h> #include <linux/compiler.h> #include <linux/mutex.h> +#include <linux/memory_hotplug.h> #define MIN_MEMORY_BLOCK_SIZE (1UL << SECTION_SIZE_BITS) @@ -77,7 +78,7 @@ enum memory_block_state { struct memory_block { unsigned long start_section_nr; enum memory_block_state state; /* serialized by the dev->lock */ - int online_type; /* for passing data to online routine */ + enum mmop online_type; /* for passing data to online routine */ int nid; /* NID for this memory block */ /* * The single zone of this memory block if all PFNs of this memory block diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index f2f16cdd73ee..815e908c4135 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -16,11 +16,8 @@ struct resource; struct vmem_altmap; struct dev_pagemap; -#ifdef CONFIG_MEMORY_HOTPLUG -struct page *pfn_to_online_page(unsigned long pfn); - /* Types for control the zone type of onlined and offlined memory */ -enum { +enum mmop { /* Offline the memory. */ MMOP_OFFLINE = 0, /* Online the memory. Zone depends, see default_zone_for_pfn(). */ @@ -31,6 +28,9 @@ enum { MMOP_ONLINE_MOVABLE, }; +#ifdef CONFIG_MEMORY_HOTPLUG +struct page *pfn_to_online_page(unsigned long pfn); + /* Flags for add_memory() and friends to specify memory hotplug details. */ typedef int __bitwise mhp_t; @@ -286,8 +286,8 @@ static inline void __remove_memory(u64 start, u64 size) {} #ifdef CONFIG_MEMORY_HOTPLUG /* Default online_type (MMOP_*) when new memory blocks are added. */ -extern int mhp_get_default_online_type(void); -extern void mhp_set_default_online_type(int online_type); +extern enum mmop mhp_get_default_online_type(void); +extern void mhp_set_default_online_type(enum mmop online_type); extern void __ref free_area_init_core_hotplug(struct pglist_data *pgdat); extern int __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags); extern int add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags); @@ -308,10 +308,8 @@ extern int sparse_add_section(int nid, unsigned long pfn, struct dev_pagemap *pgmap); extern void sparse_remove_section(unsigned long pfn, unsigned long nr_pages, struct vmem_altmap *altmap); -extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, - unsigned long pnum); -extern struct zone *zone_for_pfn_range(int online_type, int nid, - struct memory_group *group, unsigned long start_pfn, +extern struct zone *zone_for_pfn_range(enum mmop online_type, + int nid, struct memory_group *group, unsigned long start_pfn, unsigned long nr_pages); extern int arch_create_linear_mapping(int nid, u64 start, u64 size, struct mhp_params *params); diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 0fe96f3ab3ef..65c732d440d2 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -55,6 +55,7 @@ struct mempolicy { nodemask_t cpuset_mems_allowed; /* relative to these nodes */ nodemask_t user_nodemask; /* nodemask passed by user */ } w; + struct rcu_head rcu; }; /* diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h index f72e6d4b14a7..d465dcd8c90a 100644 --- a/include/linux/mfd/arizona/pdata.h +++ b/include/linux/mfd/arizona/pdata.h @@ -117,11 +117,6 @@ struct arizona_pdata { /** Check for line output with HPDET method */ bool hpdet_acc_id_line; -#ifdef CONFIG_GPIOLIB_LEGACY - /** GPIO used for mic isolation with HPDET */ - int hpdet_id_gpio; -#endif - /** Channel to use for headphone detection */ unsigned int hpdet_channel; @@ -131,11 +126,6 @@ struct arizona_pdata { /** Extra debounce timeout used during initial mic detection (ms) */ unsigned int micd_detect_debounce; -#ifdef CONFIG_GPIOLIB_LEGACY - /** GPIO for mic detection polarity */ - int micd_pol_gpio; -#endif - /** Mic detect ramp rate */ unsigned int micd_bias_start_time; diff --git a/include/linux/mfd/cs42l43-regs.h b/include/linux/mfd/cs42l43-regs.h index c39a49269cb7..68831f113589 100644 --- a/include/linux/mfd/cs42l43-regs.h +++ b/include/linux/mfd/cs42l43-regs.h @@ -1181,4 +1181,80 @@ /* CS42L43_FW_MISSION_CTRL_MM_MCU_CFG_REG */ #define CS42L43_FW_MISSION_CTRL_MM_MCU_CFG_DISABLE_VAL 0xF05AA50F +/* CS42L43B VARIANT REGISTERS */ +#define CS42L43B_DEVID_VAL 0x0042A43B + +#define CS42L43B_DECIM_VOL_CTRL_CH1_CH2 0x00008280 +#define CS42L43B_DECIM_VOL_CTRL_CH3_CH4 0x00008284 + +#define CS42L43B_DECIM_VOL_CTRL_CH5_CH6 0x00008290 +#define CS42L43B_DECIM_VOL_CTRL_UPDATE 0x0000829C + +#define CS42L43B_DECIM_HPF_WNF_CTRL5 0x000082A0 +#define CS42L43B_DECIM_HPF_WNF_CTRL6 0x000082A4 + +#define CS42L43B_SWIRE_DP3_CH3_INPUT 0x0000C320 +#define CS42L43B_SWIRE_DP3_CH4_INPUT 0x0000C330 +#define CS42L43B_SWIRE_DP4_CH3_INPUT 0x0000C340 +#define CS42L43B_SWIRE_DP4_CH4_INPUT 0x0000C350 + +#define CS42L43B_ISRC1DEC3_INPUT1 0x0000C780 +#define CS42L43B_ISRC1DEC4_INPUT1 0x0000C790 +#define CS42L43B_ISRC2DEC3_INPUT1 0x0000C7A0 +#define CS42L43B_ISRC2DEC4_INPUT1 0x0000C7B0 + +#define CS42L43B_FW_MISSION_CTRL_NEED_CONFIGS 0x00117E00 +#define CS42L43B_FW_MISSION_CTRL_HAVE_CONFIGS 0x00117E04 +#define CS42L43B_FW_MISSION_CTRL_PATCH_START_ADDR_REG 0x00117E08 +#define CS42L43B_FW_MISSION_CTRL_MM_CTRL_SELECTION 0x00117E0C +#define CS42L43B_FW_MISSION_CTRL_MM_MCU_CFG_REG 0x00117E10 + +#define CS42L43B_MCU_SW_REV 0x00117314 +#define CS42L43B_PATCH_START_ADDR 0x00117318 +#define CS42L43B_CONFIG_SELECTION 0x0011731C +#define CS42L43B_NEED_CONFIGS 0x00117320 +#define CS42L43B_BOOT_STATUS 0x00117330 + +#define CS42L43B_FW_MISSION_CTRL_NEED_CONFIGS 0x00117E00 +#define CS42L43B_FW_MISSION_CTRL_HAVE_CONFIGS 0x00117E04 +#define CS42L43B_FW_MISSION_CTRL_PATCH_START_ADDR_REG 0x00117E08 +#define CS42L43B_FW_MISSION_CTRL_MM_CTRL_SELECTION 0x00117E0C +#define CS42L43B_FW_MISSION_CTRL_MM_MCU_CFG_REG 0x00117E10 + +#define CS42L43B_MCU_RAM_MAX 0x00117FFF + +/* CS42L43B_DECIM_DECIM_VOL_CTRL_CH5_CH6 */ +#define CS42L43B_DECIM6_MUTE_MASK 0x80000000 +#define CS42L43B_DECIM6_MUTE_SHIFT 31 +#define CS42L43B_DECIM6_VOL_MASK 0x3FC00000 +#define CS42L43B_DECIM6_VOL_SHIFT 22 +#define CS42L43B_DECIM6_PATH1_VOL_FALL_RATE_MASK 0x00380000 +#define CS42L43B_DECIM6_PATH1_VOL_FALL_RATE_SHIFT 19 +#define CS42L43B_DECIM6_PATH1_VOL_RISE_RATE_MASK 0x00070000 +#define CS42L43B_DECIM6_PATH1_VOL_RISE_RATE_SHIFT 16 +#define CS42L43B_DECIM5_MUTE_MASK 0x00008000 +#define CS42L43B_DECIM5_MUTE_SHIFT 15 +#define CS42L43B_DECIM5_VOL_MASK 0x00003FC0 +#define CS42L43B_DECIM5_VOL_SHIFT 6 +#define CS42L43B_DECIM5_PATH1_VOL_FALL_RATE_MASK 0x00000038 +#define CS42L43B_DECIM5_PATH1_VOL_FALL_RATE_SHIFT 3 +#define CS42L43B_DECIM5_PATH1_VOL_RISE_RATE_MASK 0x00000007 +#define CS42L43B_DECIM5_PATH1_VOL_RISE_RATE_SHIFT 0 + +/* CS42L43B_DECIM_VOL_CTRL_UPDATE */ +#define CS42L43B_DECIM6_PATH1_VOL_TRIG_MASK 0x00000800 +#define CS42L43B_DECIM6_PATH1_VOL_TRIG_SHIFT 11 +#define CS42L43B_DECIM5_PATH1_VOL_TRIG_MASK 0x00000100 +#define CS42L43B_DECIM5_PATH1_VOL_TRIG_SHIFT 8 +#define CS42L43B_DECIM4_VOL_UPDATE_MASK 0x00000020 +#define CS42L43B_DECIM4_VOL_UPDATE_SHIFT 5 + +/* CS42L43_ISRC1_CTRL..CS42L43_ISRC2_CTRL */ +#define CS42L43B_ISRC_DEC4_EN_MASK 0x00000008 +#define CS42L43B_ISRC_DEC4_EN_SHIFT 3 +#define CS42L43B_ISRC_DEC4_EN_WIDTH 1 +#define CS42L43B_ISRC_DEC3_EN_MASK 0x00000004 +#define CS42L43B_ISRC_DEC3_EN_SHIFT 2 +#define CS42L43B_ISRC_DEC3_EN_WIDTH 1 + #endif /* CS42L43_CORE_REGS_H */ diff --git a/include/linux/mfd/cs42l43.h b/include/linux/mfd/cs42l43.h index 2239d8585e78..ff0f7e365a19 100644 --- a/include/linux/mfd/cs42l43.h +++ b/include/linux/mfd/cs42l43.h @@ -98,6 +98,7 @@ struct cs42l43 { bool sdw_pll_active; bool attached; bool hw_lock; + long variant_id; }; #endif /* CS42L43_CORE_EXT_H */ diff --git a/include/linux/mfd/kempld.h b/include/linux/mfd/kempld.h index 643c096b93ac..2dbd80abfd1d 100644 --- a/include/linux/mfd/kempld.h +++ b/include/linux/mfd/kempld.h @@ -37,6 +37,7 @@ #define KEMPLD_SPEC_GET_MINOR(x) (x & 0x0f) #define KEMPLD_SPEC_GET_MAJOR(x) ((x >> 4) & 0x0f) #define KEMPLD_IRQ_GPIO 0x35 +#define KEMPLD_IRQ_GPIO_MASK 0x0f #define KEMPLD_IRQ_I2C 0x36 #define KEMPLD_CFG 0x37 #define KEMPLD_CFG_GPIO_I2C_MUX (1 << 0) diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index ca691641788b..9c6f9817383f 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -33,6 +33,7 @@ #define PHY_ID_LAN8804 0x00221670 #define PHY_ID_LAN8841 0x00221650 #define PHY_ID_LAN8842 0x002216C0 +#define PHY_ID_LAN9645X 0x002216D0 #define PHY_ID_KSZ886X 0x00221430 #define PHY_ID_KSZ8863 0x00221435 diff --git a/include/linux/microchipphy.h b/include/linux/microchipphy.h index 517288da19fd..7da956c666a0 100644 --- a/include/linux/microchipphy.h +++ b/include/linux/microchipphy.h @@ -61,6 +61,11 @@ /* Registers specific to the LAN7800/LAN7850 embedded phy */ #define LAN78XX_PHY_LED_MODE_SELECT (0x1D) +/* PHY Control 3 register (page 1) */ +#define LAN78XX_PHY_CTRL3 (0x14) +#define LAN78XX_PHY_CTRL3_AUTO_DOWNSHIFT BIT(4) +#define LAN78XX_PHY_CTRL3_DOWNSHIFT_CTRL_MASK GENMASK(3, 2) + /* DSP registers */ #define PHY_ARDENNES_MMD_DEV_3_PHY_CFG (0x806A) #define PHY_ARDENNES_MMD_DEV_3_PHY_CFG_ZD_DLY_EN_ (0x2000) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 26ca00c325d9..d5af2b7f577b 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -65,7 +65,7 @@ bool isolate_folio_to_list(struct folio *folio, struct list_head *list); int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src); -void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) +void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) __releases(ptl); void folio_migrate_flags(struct folio *newfolio, struct folio *folio); int folio_migrate_mapping(struct address_space *mapping, @@ -97,6 +97,14 @@ static inline int set_movable_ops(const struct movable_operations *ops, enum pag return -ENOSYS; } +static inline void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) + __releases(ptl) +{ + WARN_ON_ONCE(1); + + spin_unlock(ptl); +} + #endif /* CONFIG_MIGRATION */ #ifdef CONFIG_NUMA_BALANCING diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index b37fe39cef27..07a25f264292 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -293,6 +293,7 @@ enum { MLX5_UMR_INLINE = (1 << 7), }; +#define MLX5_UMR_ALIGN (2048) #define MLX5_UMR_FLEX_ALIGNMENT 0x40 #define MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_mtt)) #define MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_klm)) @@ -1259,6 +1260,7 @@ enum mlx5_cap_type { MLX5_CAP_PORT_SELECTION = 0x25, MLX5_CAP_ADV_VIRTUALIZATION = 0x26, MLX5_CAP_ADV_RDMA = 0x28, + MLX5_CAP_TLP_EMULATION = 0x2a, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1481,6 +1483,14 @@ enum mlx5_qcam_feature_groups { MLX5_GET64(virtio_emulation_cap, \ (mdev)->caps.hca[MLX5_CAP_VDPA_EMULATION]->cur, cap) +#define MLX5_CAP_DEV_TLP_EMULATION(mdev, cap)\ + MLX5_GET(tlp_dev_emu_capabilities, \ + (mdev)->caps.hca[MLX5_CAP_TLP_EMULATION]->cur, cap) + +#define MLX5_CAP64_DEV_TLP_EMULATION(mdev, cap)\ + MLX5_GET64(tlp_dev_emu_capabilities, \ + (mdev)->caps.hca[MLX5_CAP_TLP_EMULATION]->cur, cap) + #define MLX5_CAP_IPSEC(mdev, cap)\ MLX5_GET(ipsec_cap, (mdev)->caps.hca[MLX5_CAP_IPSEC]->cur, cap) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 04dcd09f7517..e1ded9cf0f70 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -550,7 +550,7 @@ struct mlx5_debugfs_entries { }; enum mlx5_func_type { - MLX5_PF, + MLX5_SELF, MLX5_VF, MLX5_SF, MLX5_HOST_PF, @@ -755,7 +755,6 @@ struct mlx5_core_dev { } caps; struct mlx5_timeouts *timeouts; u64 sys_image_guid; - phys_addr_t iseg_base; struct mlx5_init_seg __iomem *iseg; phys_addr_t bar_addr; enum mlx5_device_state state; @@ -798,6 +797,7 @@ struct mlx5_core_dev { enum mlx5_wc_state wc_state; /* sync write combining state */ struct mutex wc_state_lock; + struct devlink *shd; }; struct mlx5_db { diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 9cadb1d5e6df..d8f3b7ef319e 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -55,6 +55,7 @@ enum mlx5_flow_destination_type { MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM, MLX5_FLOW_DESTINATION_TYPE_RANGE, MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE, + MLX5_FLOW_DESTINATION_TYPE_VHCA_RX, }; enum { @@ -190,6 +191,9 @@ struct mlx5_flow_destination { struct mlx5_flow_table *ft; struct mlx5_fc *counter; struct { + u16 id; + } vhca; + struct { u16 num; u16 vhca_id; struct mlx5_pkt_reformat *pkt_reformat; @@ -248,9 +252,9 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, struct mlx5_flow_table * mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, struct mlx5_flow_table_attr *ft_attr, u16 vport); -struct mlx5_flow_table *mlx5_create_lag_demux_flow_table( - struct mlx5_flow_namespace *ns, - int prio, u32 level); +struct mlx5_flow_table * +mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns, + struct mlx5_flow_table_attr *ft_attr); int mlx5_destroy_flow_table(struct mlx5_flow_table *ft); /* inbox should be set with the following values: diff --git a/include/linux/mlx5/lag.h b/include/linux/mlx5/lag.h new file mode 100644 index 000000000000..ab9f754664e5 --- /dev/null +++ b/include/linux/mlx5/lag.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_LAG_API_H__ +#define __MLX5_LAG_API_H__ + +#include <linux/types.h> + +struct mlx5_core_dev; +struct mlx5_flow_table; +struct mlx5_flow_table_attr; + +int mlx5_lag_demux_init(struct mlx5_core_dev *dev, + struct mlx5_flow_table_attr *ft_attr); +void mlx5_lag_demux_cleanup(struct mlx5_core_dev *dev); +int mlx5_lag_demux_rule_add(struct mlx5_core_dev *dev, u16 vport_num, + int vport_index); +void mlx5_lag_demux_rule_del(struct mlx5_core_dev *dev, int vport_index); +int mlx5_lag_get_dev_seq(struct mlx5_core_dev *dev); + +#endif /* __MLX5_LAG_API_H__ */ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 775cb0c56865..49f3ad4b1a7c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -469,7 +469,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 table_miss_action_domain[0x1]; u8 termination_table[0x1]; u8 reformat_and_fwd_to_table[0x1]; - u8 reserved_at_1a[0x2]; + u8 forward_vhca_rx[0x1]; + u8 reserved_at_1b[0x1]; u8 ipsec_encrypt[0x1]; u8 ipsec_decrypt[0x1]; u8 sw_owner_v2[0x1]; @@ -1389,6 +1390,26 @@ struct mlx5_ifc_virtio_emulation_cap_bits { u8 reserved_at_1c0[0x640]; }; +struct mlx5_ifc_tlp_dev_emu_capabilities_bits { + u8 reserved_at_0[0x20]; + + u8 reserved_at_20[0x13]; + u8 log_tlp_rsp_gw_page_stride[0x5]; + u8 reserved_at_38[0x8]; + + u8 reserved_at_40[0xc0]; + + u8 reserved_at_100[0xc]; + u8 tlp_rsp_gw_num_pages[0x4]; + u8 reserved_at_110[0x10]; + + u8 reserved_at_120[0xa0]; + + u8 tlp_rsp_gw_pages_bar_offset[0x40]; + + u8 reserved_at_200[0x600]; +}; + enum { MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_1_BYTE = 0x0, MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_2_BYTES = 0x2, @@ -1633,6 +1654,11 @@ enum { MLX5_STEERING_FORMAT_CONNECTX_8 = 3, }; +enum { + MLX5_ID_MODE_FUNCTION_INDEX = 0, + MLX5_ID_MODE_FUNCTION_VHCA_ID = 1, +}; + struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_0[0x6]; u8 page_request_disable[0x1]; @@ -1895,7 +1921,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_280[0x10]; u8 max_wqe_sz_sq[0x10]; - u8 reserved_at_2a0[0x7]; + u8 icm_mng_function_id_mode[0x1]; + u8 reserved_at_2a1[0x6]; u8 mkey_pcie_tph[0x1]; u8 reserved_at_2a8[0x1]; u8 tis_tir_td_order[0x1]; @@ -1947,7 +1974,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_360[0x3]; u8 log_max_rq[0x5]; - u8 reserved_at_368[0x3]; + u8 ft_alias_sw_vhca_id[0x1]; + u8 reserved_at_369[0x2]; u8 log_max_sq[0x5]; u8 reserved_at_370[0x3]; u8 log_max_tir[0x5]; @@ -1961,7 +1989,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_rqt[0x5]; u8 reserved_at_390[0x3]; u8 log_max_rqt_size[0x5]; - u8 reserved_at_398[0x1]; + u8 tlp_device_emulation_manager[0x1]; u8 vnic_env_cnt_bar_uar_access[0x1]; u8 vnic_env_cnt_odp_page_fault[0x1]; u8 log_max_tis_per_sq[0x5]; @@ -1992,12 +2020,14 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 disable_local_lb_mc[0x1]; u8 log_min_hairpin_wq_data_sz[0x5]; u8 reserved_at_3e8[0x1]; - u8 silent_mode[0x1]; + u8 silent_mode_set[0x1]; u8 vhca_state[0x1]; u8 log_max_vlan_list[0x5]; u8 reserved_at_3f0[0x3]; u8 log_max_current_mc_list[0x5]; - u8 reserved_at_3f8[0x3]; + u8 reserved_at_3f8[0x1]; + u8 silent_mode_query[0x1]; + u8 reserved_at_3fa[0x1]; u8 log_max_current_uc_list[0x5]; u8 general_obj_types[0x40]; @@ -2173,7 +2203,8 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 sf_eq_usage[0x1]; u8 reserved_at_d3[0x5]; u8 multiplane[0x1]; - u8 reserved_at_d9[0x7]; + u8 migration_state[0x1]; + u8 reserved_at_da[0x6]; u8 cross_vhca_object_to_object_supported[0x20]; @@ -2259,6 +2290,7 @@ enum mlx5_ifc_flow_destination_type { MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT = 0x0, MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE = 0x1, MLX5_IFC_FLOW_DESTINATION_TYPE_TIR = 0x2, + MLX5_IFC_FLOW_DESTINATION_TYPE_VHCA_RX = 0x4, MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_SAMPLER = 0x6, MLX5_IFC_FLOW_DESTINATION_TYPE_UPLINK = 0x8, MLX5_IFC_FLOW_DESTINATION_TYPE_TABLE_TYPE = 0xA, @@ -3830,6 +3862,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_tls_cap_bits tls_cap; struct mlx5_ifc_device_mem_cap_bits device_mem_cap; struct mlx5_ifc_virtio_emulation_cap_bits virtio_emulation_cap; + struct mlx5_ifc_tlp_dev_emu_capabilities_bits tlp_dev_emu_capabilities; struct mlx5_ifc_macsec_cap_bits macsec_cap; struct mlx5_ifc_crypto_cap_bits crypto_cap; struct mlx5_ifc_ipsec_cap_bits ipsec_cap; @@ -6244,7 +6277,9 @@ struct mlx5_ifc_query_l2_table_entry_out_bits { u8 reserved_at_40[0xa0]; - u8 reserved_at_e0[0x13]; + u8 reserved_at_e0[0x11]; + u8 silent_mode[0x1]; + u8 reserved_at_f2[0x1]; u8 vlan_valid[0x1]; u8 vlan[0xc]; @@ -6260,7 +6295,10 @@ struct mlx5_ifc_query_l2_table_entry_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x60]; + u8 reserved_at_40[0x40]; + + u8 silent_mode_query[0x1]; + u8 reserved_at_81[0x1f]; u8 reserved_at_a0[0x8]; u8 table_index[0x18]; @@ -6927,7 +6965,9 @@ struct mlx5_ifc_create_match_definer_out_bits { struct mlx5_ifc_alias_context_bits { u8 vhca_id_to_be_accessed[0x10]; - u8 reserved_at_10[0xd]; + u8 reserved_at_10[0xb]; + u8 vhca_id_type[0x1]; + u8 reserved_at_1c[0x1]; u8 status[0x3]; u8 object_id_to_be_accessed[0x20]; u8 reserved_at_40[0x40]; @@ -10824,7 +10864,9 @@ struct mlx5_ifc_pcam_enhanced_features_bits { u8 fec_200G_per_lane_in_pplm[0x1]; u8 reserved_at_1e[0x2a]; u8 fec_100G_per_lane_in_pplm[0x1]; - u8 reserved_at_49[0xa]; + u8 reserved_at_49[0x2]; + u8 shp_pbmc_pbsr_support[0x1]; + u8 reserved_at_4c[0x7]; u8 buffer_ownership[0x1]; u8 resereved_at_54[0x14]; u8 fec_50G_per_lane_in_pplm[0x1]; @@ -12069,8 +12111,9 @@ struct mlx5_ifc_pbmc_reg_bits { u8 port_buffer_size[0x10]; struct mlx5_ifc_bufferx_reg_bits buffer[10]; + struct mlx5_ifc_bufferx_reg_bits shared_headroom_pool; - u8 reserved_at_2e0[0x80]; + u8 reserved_at_320[0x40]; }; struct mlx5_ifc_sbpr_reg_bits { @@ -13280,13 +13323,24 @@ struct mlx5_ifc_query_vhca_migration_state_in_bits { u8 reserved_at_60[0x20]; }; +enum { + MLX5_QUERY_VHCA_MIG_STATE_UNINITIALIZED = 0x0, + MLX5_QUERY_VHCA_MIG_STATE_OPER_MIGRATION_IDLE = 0x1, + MLX5_QUERY_VHCA_MIG_STATE_OPER_MIGRATION_READY = 0x2, + MLX5_QUERY_VHCA_MIG_STATE_OPER_MIGRATION_DIRTY = 0x3, + MLX5_QUERY_VHCA_MIG_STATE_OPER_MIGRATION_INIT = 0x4, +}; + struct mlx5_ifc_query_vhca_migration_state_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x20]; + + u8 migration_state[0x4]; + u8 reserved_at_64[0x1c]; u8 required_umem_size[0x20]; diff --git a/include/linux/mm.h b/include/linux/mm.h index 5be3d8a8f806..8260e28205e9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -27,7 +27,6 @@ #include <linux/page-flags.h> #include <linux/page_ref.h> #include <linux/overflow.h> -#include <linux/sizes.h> #include <linux/sched.h> #include <linux/pgtable.h> #include <linux/kasan.h> @@ -208,8 +207,6 @@ static inline void __mm_zero_struct_page(struct page *page) #define MAPCOUNT_ELF_CORE_MARGIN (5) #define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN) -extern int sysctl_max_map_count; - extern unsigned long sysctl_user_reserve_kbytes; extern unsigned long sysctl_admin_reserve_kbytes; @@ -349,9 +346,9 @@ enum { * if KVM does not lock down the memory type. */ DECLARE_VMA_BIT(ALLOW_ANY_UNCACHED, 39), -#ifdef CONFIG_PPC32 +#if defined(CONFIG_PPC32) DECLARE_VMA_BIT_ALIAS(DROPPABLE, ARCH_1), -#else +#elif defined(CONFIG_64BIT) DECLARE_VMA_BIT(DROPPABLE, 40), #endif DECLARE_VMA_BIT(UFFD_MINOR, 41), @@ -466,8 +463,10 @@ enum { #if defined(CONFIG_X86_USER_SHADOW_STACK) || defined(CONFIG_ARM64_GCS) || \ defined(CONFIG_RISCV_USER_CFI) #define VM_SHADOW_STACK INIT_VM_FLAG(SHADOW_STACK) +#define VMA_STARTGAP_FLAGS mk_vma_flags(VMA_GROWSDOWN_BIT, VMA_SHADOW_STACK_BIT) #else #define VM_SHADOW_STACK VM_NONE +#define VMA_STARTGAP_FLAGS mk_vma_flags(VMA_GROWSDOWN_BIT) #endif #if defined(CONFIG_PPC64) #define VM_SAO INIT_VM_FLAG(SAO) @@ -506,32 +505,41 @@ enum { #endif #if defined(CONFIG_64BIT) || defined(CONFIG_PPC32) #define VM_DROPPABLE INIT_VM_FLAG(DROPPABLE) +#define VMA_DROPPABLE mk_vma_flags(VMA_DROPPABLE_BIT) #else #define VM_DROPPABLE VM_NONE +#define VMA_DROPPABLE EMPTY_VMA_FLAGS #endif /* Bits set in the VMA until the stack is in its final location */ #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY) -#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) +#define TASK_EXEC_BIT ((current->personality & READ_IMPLIES_EXEC) ? \ + VMA_EXEC_BIT : VMA_READ_BIT) /* Common data flag combinations */ -#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -#define VM_DATA_FLAGS_NON_EXEC (VM_READ | VM_WRITE | VM_MAYREAD | \ - VM_MAYWRITE | VM_MAYEXEC) -#define VM_DATA_FLAGS_EXEC (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -#ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */ -#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC +#define VMA_DATA_FLAGS_TSK_EXEC mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, \ + TASK_EXEC_BIT, VMA_MAYREAD_BIT, VMA_MAYWRITE_BIT, \ + VMA_MAYEXEC_BIT) +#define VMA_DATA_FLAGS_NON_EXEC mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, \ + VMA_MAYREAD_BIT, VMA_MAYWRITE_BIT, VMA_MAYEXEC_BIT) +#define VMA_DATA_FLAGS_EXEC mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, \ + VMA_EXEC_BIT, VMA_MAYREAD_BIT, VMA_MAYWRITE_BIT, \ + VMA_MAYEXEC_BIT) + +#ifndef VMA_DATA_DEFAULT_FLAGS /* arch can override this */ +#define VMA_DATA_DEFAULT_FLAGS VMA_DATA_FLAGS_EXEC #endif -#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ -#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS +#ifndef VMA_STACK_DEFAULT_FLAGS /* arch can override this */ +#define VMA_STACK_DEFAULT_FLAGS VMA_DATA_DEFAULT_FLAGS #endif -#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK) +#define VMA_STACK_FLAGS append_vma_flags(VMA_STACK_DEFAULT_FLAGS, \ + VMA_STACK_BIT, VMA_ACCOUNT_BIT) + +/* Temporary until VMA flags conversion complete. */ +#define VM_STACK_FLAGS vma_flags_to_legacy(VMA_STACK_FLAGS) #ifdef CONFIG_MSEAL_SYSTEM_MAPPINGS #define VM_SEALED_SYSMAP VM_SEALED @@ -539,15 +547,17 @@ enum { #define VM_SEALED_SYSMAP VM_NONE #endif -#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) - /* VMA basic access permission flags */ #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) +#define VMA_ACCESS_FLAGS mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT) /* * Special vmas that are non-mergable, non-mlock()able. */ -#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) + +#define VMA_SPECIAL_FLAGS mk_vma_flags(VMA_IO_BIT, VMA_DONTEXPAND_BIT, \ + VMA_PFNMAP_BIT, VMA_MIXEDMAP_BIT) +#define VM_SPECIAL vma_flags_to_legacy(VMA_SPECIAL_FLAGS) /* * Physically remapped pages are special. Tell the @@ -574,6 +584,8 @@ enum { /* This mask represents all the VMA flag bits used by mlock */ #define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT) +#define VMA_LOCKED_MASK mk_vma_flags(VMA_LOCKED_BIT, VMA_LOCKONFAULT_BIT) + /* These flags can be updated atomically via VMA/mmap read lock. */ #define VM_ATOMIC_SET_ALLOWED VM_MAYBE_GUARD @@ -588,27 +600,32 @@ enum { * possesses it but the other does not, the merged VMA should nonetheless have * applied to it: * - * VM_SOFTDIRTY - if a VMA is marked soft-dirty, that is has not had its - * references cleared via /proc/$pid/clear_refs, any merged VMA - * should be considered soft-dirty also as it operates at a VMA - * granularity. + * VMA_SOFTDIRTY_BIT - if a VMA is marked soft-dirty, that is has not had its + * references cleared via /proc/$pid/clear_refs, any + * merged VMA should be considered soft-dirty also as it + * operates at a VMA granularity. * - * VM_MAYBE_GUARD - If a VMA may have guard regions in place it implies that - * mapped page tables may contain metadata not described by the - * VMA and thus any merged VMA may also contain this metadata, - * and thus we must make this flag sticky. + * VMA_MAYBE_GUARD_BIT - If a VMA may have guard regions in place it implies + * that mapped page tables may contain metadata not + * described by the VMA and thus any merged VMA may also + * contain this metadata, and thus we must make this flag + * sticky. */ -#define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD) +#ifdef CONFIG_MEM_SOFT_DIRTY +#define VMA_STICKY_FLAGS mk_vma_flags(VMA_SOFTDIRTY_BIT, VMA_MAYBE_GUARD_BIT) +#else +#define VMA_STICKY_FLAGS mk_vma_flags(VMA_MAYBE_GUARD_BIT) +#endif /* * VMA flags we ignore for the purposes of merge, i.e. one VMA possessing one * of these flags and the other not does not preclude a merge. * - * VM_STICKY - When merging VMAs, VMA flags must match, unless they are - * 'sticky'. If any sticky flags exist in either VMA, we simply - * set all of them on the merged VMA. + * VMA_STICKY_FLAGS - When merging VMAs, VMA flags must match, unless they + * are 'sticky'. If any sticky flags exist in either VMA, + * we simply set all of them on the merged VMA. */ -#define VM_IGNORE_MERGE VM_STICKY +#define VMA_IGNORE_MERGE_FLAGS VMA_STICKY_FLAGS /* * Flags which should result in page tables being copied on fork. These are @@ -747,15 +764,37 @@ struct vm_fault { * to the functions called when a no-page or a wp-page exception occurs. */ struct vm_operations_struct { - void (*open)(struct vm_area_struct * area); + /** + * @open: Called when a VMA is remapped, split or forked. Not called + * upon first mapping a VMA. + * Context: User context. May sleep. Caller holds mmap_lock. + */ + void (*open)(struct vm_area_struct *vma); /** * @close: Called when the VMA is being removed from the MM. * Context: User context. May sleep. Caller holds mmap_lock. */ - void (*close)(struct vm_area_struct * area); + void (*close)(struct vm_area_struct *vma); + /** + * @mapped: Called when the VMA is first mapped in the MM. Not called if + * the new VMA is merged with an adjacent VMA. + * + * The @vm_private_data field is an output field allowing the user to + * modify vma->vm_private_data as necessary. + * + * ONLY valid if set from f_op->mmap_prepare. Will result in an error if + * set from f_op->mmap. + * + * Returns %0 on success, or an error otherwise. On error, the VMA will + * be unmapped. + * + * Context: User context. May sleep. Caller holds mmap_lock. + */ + int (*mapped)(unsigned long start, unsigned long end, pgoff_t pgoff, + const struct file *file, void **vm_private_data); /* Called any time before splitting to check if it's allowed */ - int (*may_split)(struct vm_area_struct *area, unsigned long addr); - int (*mremap)(struct vm_area_struct *area); + int (*may_split)(struct vm_area_struct *vma, unsigned long addr); + int (*mremap)(struct vm_area_struct *vma); /* * Called by mprotect() to make driver-specific permission * checks before mprotect() is finalised. The VMA must not @@ -767,7 +806,7 @@ struct vm_operations_struct { vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order); vm_fault_t (*map_pages)(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); - unsigned long (*pagesize)(struct vm_area_struct * area); + unsigned long (*pagesize)(struct vm_area_struct *vma); /* notification that a previously read-only page is about to become * writable, if an error is returned it will cause a SIGBUS */ @@ -937,22 +976,20 @@ static inline void vm_flags_reset(struct vm_area_struct *vma, vm_flags_init(vma, flags); } -static inline void vm_flags_reset_once(struct vm_area_struct *vma, - vm_flags_t flags) +static inline void vma_flags_reset_once(struct vm_area_struct *vma, + vma_flags_t *flags) { - vma_assert_write_locked(vma); - /* - * If VMA flags exist beyond the first system word, also clear these. It - * is assumed the write once behaviour is required only for the first - * system word. - */ + const unsigned long word = flags->__vma_flags[0]; + + /* It is assumed only the first system word must be written once. */ + vma_flags_overwrite_word_once(&vma->flags, word); + /* The remainder can be copied normally. */ if (NUM_VMA_FLAG_BITS > BITS_PER_LONG) { - unsigned long *bitmap = vma->flags.__vma_flags; + unsigned long *dst = &vma->flags.__vma_flags[1]; + const unsigned long *src = &flags->__vma_flags[1]; - bitmap_zero(&bitmap[1], NUM_VMA_FLAG_BITS - BITS_PER_LONG); + bitmap_copy(dst, src, NUM_VMA_FLAG_BITS - BITS_PER_LONG); } - - vma_flags_overwrite_word_once(&vma->flags, flags); } static inline void vm_flags_set(struct vm_area_struct *vma, @@ -991,7 +1028,8 @@ static inline void vm_flags_mod(struct vm_area_struct *vma, __vm_flags_mod(vma, set, clear); } -static inline bool __vma_atomic_valid_flag(struct vm_area_struct *vma, vma_flag_t bit) +static __always_inline bool __vma_atomic_valid_flag(struct vm_area_struct *vma, + vma_flag_t bit) { const vm_flags_t mask = BIT((__force int)bit); @@ -1006,7 +1044,8 @@ static inline bool __vma_atomic_valid_flag(struct vm_area_struct *vma, vma_flag_ * Set VMA flag atomically. Requires only VMA/mmap read lock. Only specific * valid flags are allowed to do this. */ -static inline void vma_set_atomic_flag(struct vm_area_struct *vma, vma_flag_t bit) +static __always_inline void vma_set_atomic_flag(struct vm_area_struct *vma, + vma_flag_t bit) { unsigned long *bitmap = vma->flags.__vma_flags; @@ -1022,7 +1061,8 @@ static inline void vma_set_atomic_flag(struct vm_area_struct *vma, vma_flag_t bi * This is necessarily racey, so callers must ensure that serialisation is * achieved through some other means, or that races are permissible. */ -static inline bool vma_test_atomic_flag(struct vm_area_struct *vma, vma_flag_t bit) +static __always_inline bool vma_test_atomic_flag(struct vm_area_struct *vma, + vma_flag_t bit) { if (__vma_atomic_valid_flag(vma, bit)) return test_bit((__force int)bit, &vma->vm_flags); @@ -1031,21 +1071,21 @@ static inline bool vma_test_atomic_flag(struct vm_area_struct *vma, vma_flag_t b } /* Set an individual VMA flag in flags, non-atomically. */ -static inline void vma_flag_set(vma_flags_t *flags, vma_flag_t bit) +static __always_inline void vma_flags_set_flag(vma_flags_t *flags, + vma_flag_t bit) { unsigned long *bitmap = flags->__vma_flags; __set_bit((__force int)bit, bitmap); } -static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits) +static __always_inline vma_flags_t __mk_vma_flags(vma_flags_t flags, + size_t count, const vma_flag_t *bits) { - vma_flags_t flags; int i; - vma_flags_clear_all(&flags); for (i = 0; i < count; i++) - vma_flag_set(&flags, bits[i]); + vma_flags_set_flag(&flags, bits[i]); return flags; } @@ -1054,16 +1094,73 @@ static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits) * vma_flags_t bitmap value. E.g.: * * vma_flags_t flags = mk_vma_flags(VMA_IO_BIT, VMA_PFNMAP_BIT, - * VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT); + * VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT); * * The compiler cleverly optimises away all of the work and this ends up being * equivalent to aggregating the values manually. */ -#define mk_vma_flags(...) __mk_vma_flags(COUNT_ARGS(__VA_ARGS__), \ - (const vma_flag_t []){__VA_ARGS__}) +#define mk_vma_flags(...) __mk_vma_flags(EMPTY_VMA_FLAGS, \ + COUNT_ARGS(__VA_ARGS__), (const vma_flag_t []){__VA_ARGS__}) + +/* + * Helper macro which acts like mk_vma_flags, only appending to a copy of the + * specified flags rather than establishing new flags. E.g.: + * + * vma_flags_t flags = append_vma_flags(VMA_STACK_DEFAULT_FLAGS, VMA_STACK_BIT, + * VMA_ACCOUNT_BIT); + */ +#define append_vma_flags(flags, ...) __mk_vma_flags(flags, \ + COUNT_ARGS(__VA_ARGS__), (const vma_flag_t []){__VA_ARGS__}) + +/* Calculates the number of set bits in the specified VMA flags. */ +static __always_inline int vma_flags_count(const vma_flags_t *flags) +{ + const unsigned long *bitmap = flags->__vma_flags; + + return bitmap_weight(bitmap, NUM_VMA_FLAG_BITS); +} + +/* + * Test whether a specific VMA flag is set, e.g.: + * + * if (vma_flags_test(flags, VMA_READ_BIT)) { ... } + */ +static __always_inline bool vma_flags_test(const vma_flags_t *flags, + vma_flag_t bit) +{ + const unsigned long *bitmap = flags->__vma_flags; + + return test_bit((__force int)bit, bitmap); +} + +/* + * Obtain a set of VMA flags which contain the overlapping flags contained + * within flags and to_and. + */ +static __always_inline vma_flags_t vma_flags_and_mask(const vma_flags_t *flags, + vma_flags_t to_and) +{ + vma_flags_t dst; + unsigned long *bitmap_dst = dst.__vma_flags; + const unsigned long *bitmap = flags->__vma_flags; + const unsigned long *bitmap_to_and = to_and.__vma_flags; + + bitmap_and(bitmap_dst, bitmap, bitmap_to_and, NUM_VMA_FLAG_BITS); + return dst; +} + +/* + * Obtain a set of VMA flags which contains the specified overlapping flags, + * e.g.: + * + * vma_flags_t read_flags = vma_flags_and(&flags, VMA_READ_BIT, + * VMA_MAY_READ_BIT); + */ +#define vma_flags_and(flags, ...) \ + vma_flags_and_mask(flags, mk_vma_flags(__VA_ARGS__)) /* Test each of to_test flags in flags, non-atomically. */ -static __always_inline bool vma_flags_test_mask(const vma_flags_t *flags, +static __always_inline bool vma_flags_test_any_mask(const vma_flags_t *flags, vma_flags_t to_test) { const unsigned long *bitmap = flags->__vma_flags; @@ -1075,10 +1172,10 @@ static __always_inline bool vma_flags_test_mask(const vma_flags_t *flags, /* * Test whether any specified VMA flag is set, e.g.: * - * if (vma_flags_test(flags, VMA_READ_BIT, VMA_MAYREAD_BIT)) { ... } + * if (vma_flags_test_any(flags, VMA_READ_BIT, VMA_MAYREAD_BIT)) { ... } */ -#define vma_flags_test(flags, ...) \ - vma_flags_test_mask(flags, mk_vma_flags(__VA_ARGS__)) +#define vma_flags_test_any(flags, ...) \ + vma_flags_test_any_mask(flags, mk_vma_flags(__VA_ARGS__)) /* Test that ALL of the to_test flags are set, non-atomically. */ static __always_inline bool vma_flags_test_all_mask(const vma_flags_t *flags, @@ -1098,8 +1195,29 @@ static __always_inline bool vma_flags_test_all_mask(const vma_flags_t *flags, #define vma_flags_test_all(flags, ...) \ vma_flags_test_all_mask(flags, mk_vma_flags(__VA_ARGS__)) +/* + * Helper to test that a flag mask of type vma_flags_t has a SINGLE flag set + * (returning false if flagmask has no flags set). + * + * This is defined to make the semantics clearer when testing an optionally + * defined VMA flags mask, e.g.: + * + * if (vma_flags_test_single_mask(&flags, VMA_DROPPABLE)) { ... } + * + * When VMA_DROPPABLE is defined if available, or set to EMPTY_VMA_FLAGS + * otherwise. + */ +static __always_inline bool vma_flags_test_single_mask(const vma_flags_t *flags, + vma_flags_t flagmask) +{ + VM_WARN_ON_ONCE(vma_flags_count(&flagmask) > 1); + + return vma_flags_test_any_mask(flags, flagmask); +} + /* Set each of the to_set flags in flags, non-atomically. */ -static __always_inline void vma_flags_set_mask(vma_flags_t *flags, vma_flags_t to_set) +static __always_inline void vma_flags_set_mask(vma_flags_t *flags, + vma_flags_t to_set) { unsigned long *bitmap = flags->__vma_flags; const unsigned long *bitmap_to_set = to_set.__vma_flags; @@ -1116,7 +1234,8 @@ static __always_inline void vma_flags_set_mask(vma_flags_t *flags, vma_flags_t t vma_flags_set_mask(flags, mk_vma_flags(__VA_ARGS__)) /* Clear all of the to-clear flags in flags, non-atomically. */ -static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, vma_flags_t to_clear) +static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, + vma_flags_t to_clear) { unsigned long *bitmap = flags->__vma_flags; const unsigned long *bitmap_to_clear = to_clear.__vma_flags; @@ -1133,13 +1252,85 @@ static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, vma_flags_t vma_flags_clear_mask(flags, mk_vma_flags(__VA_ARGS__)) /* + * Obtain a VMA flags value containing those flags that are present in flags or + * flags_other but not in both. + */ +static __always_inline vma_flags_t vma_flags_diff_pair(const vma_flags_t *flags, + const vma_flags_t *flags_other) +{ + vma_flags_t dst; + const unsigned long *bitmap_other = flags_other->__vma_flags; + const unsigned long *bitmap = flags->__vma_flags; + unsigned long *bitmap_dst = dst.__vma_flags; + + bitmap_xor(bitmap_dst, bitmap, bitmap_other, NUM_VMA_FLAG_BITS); + return dst; +} + +/* Determine if flags and flags_other have precisely the same flags set. */ +static __always_inline bool vma_flags_same_pair(const vma_flags_t *flags, + const vma_flags_t *flags_other) +{ + const unsigned long *bitmap = flags->__vma_flags; + const unsigned long *bitmap_other = flags_other->__vma_flags; + + return bitmap_equal(bitmap, bitmap_other, NUM_VMA_FLAG_BITS); +} + +/* Determine if flags and flags_other have precisely the same flags set. */ +static __always_inline bool vma_flags_same_mask(const vma_flags_t *flags, + vma_flags_t flags_other) +{ + const unsigned long *bitmap = flags->__vma_flags; + const unsigned long *bitmap_other = flags_other.__vma_flags; + + return bitmap_equal(bitmap, bitmap_other, NUM_VMA_FLAG_BITS); +} + +/* + * Helper macro to determine if only the specific flags are set, e.g.: + * + * if (vma_flags_same(&flags, VMA_WRITE_BIT) { ... } + */ +#define vma_flags_same(flags, ...) \ + vma_flags_same_mask(flags, mk_vma_flags(__VA_ARGS__)) + +/* + * Test whether a specific flag in the VMA is set, e.g.: + * + * if (vma_test(vma, VMA_READ_BIT)) { ... } + */ +static __always_inline bool vma_test(const struct vm_area_struct *vma, + vma_flag_t bit) +{ + return vma_flags_test(&vma->flags, bit); +} + +/* Helper to test any VMA flags in a VMA . */ +static __always_inline bool vma_test_any_mask(const struct vm_area_struct *vma, + vma_flags_t flags) +{ + return vma_flags_test_any_mask(&vma->flags, flags); +} + +/* + * Helper macro for testing whether any VMA flags are set in a VMA, + * e.g.: + * + * if (vma_test_any(vma, VMA_IO_BIT, VMA_PFNMAP_BIT, + * VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT)) { ... } + */ +#define vma_test_any(vma, ...) \ + vma_test_any_mask(vma, mk_vma_flags(__VA_ARGS__)) + +/* * Helper to test that ALL specified flags are set in a VMA. * * Note: appropriate locks must be held, this function does not acquire them for * you. */ -static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma, - vma_flags_t flags) +static __always_inline bool vma_test_all_mask(const struct vm_area_struct *vma, + vma_flags_t flags) { return vma_flags_test_all_mask(&vma->flags, flags); } @@ -1147,10 +1338,28 @@ static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma, /* * Helper macro for checking that ALL specified flags are set in a VMA, e.g.: * - * if (vma_test_all_flags(vma, VMA_READ_BIT, VMA_MAYREAD_BIT) { ... } + * if (vma_test_all(vma, VMA_READ_BIT, VMA_MAYREAD_BIT) { ... } + */ +#define vma_test_all(vma, ...) \ + vma_test_all_mask(vma, mk_vma_flags(__VA_ARGS__)) + +/* + * Helper to test that a flag mask of type vma_flags_t has a SINGLE flag set + * (returning false if flagmask has no flags set). + * + * This is useful when a flag needs to be either defined or not depending upon + * kernel configuration, e.g.: + * + * if (vma_test_single_mask(vma, VMA_DROPPABLE)) { ... } + * + * When VMA_DROPPABLE is defined if available, or set to EMPTY_VMA_FLAGS + * otherwise. */ -#define vma_test_all_flags(vma, ...) \ - vma_test_all_flags_mask(vma, mk_vma_flags(__VA_ARGS__)) +static __always_inline bool +vma_test_single_mask(const struct vm_area_struct *vma, vma_flags_t flagmask) +{ + return vma_flags_test_single_mask(&vma->flags, flagmask); +} /* * Helper to set all VMA flags in a VMA. @@ -1158,8 +1367,8 @@ static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma, * Note: appropriate locks must be held, this function does not acquire them for * you. */ -static inline void vma_set_flags_mask(struct vm_area_struct *vma, - vma_flags_t flags) +static __always_inline void vma_set_flags_mask(struct vm_area_struct *vma, + vma_flags_t flags) { vma_flags_set_mask(&vma->flags, flags); } @@ -1176,26 +1385,69 @@ static inline void vma_set_flags_mask(struct vm_area_struct *vma, #define vma_set_flags(vma, ...) \ vma_set_flags_mask(vma, mk_vma_flags(__VA_ARGS__)) -/* Helper to test all VMA flags in a VMA descriptor. */ -static inline bool vma_desc_test_flags_mask(const struct vm_area_desc *desc, - vma_flags_t flags) +/* Helper to clear all VMA flags in a VMA. */ +static __always_inline void vma_clear_flags_mask(struct vm_area_struct *vma, + vma_flags_t flags) { - return vma_flags_test_mask(&desc->vma_flags, flags); + vma_flags_clear_mask(&vma->flags, flags); } /* - * Helper macro for testing VMA flags for an input pointer to a struct - * vm_area_desc object describing a proposed VMA, e.g.: + * Helper macro for clearing VMA flags, e.g.: + * + * vma_clear_flags(vma, VMA_IO_BIT, VMA_PFNMAP_BIT, VMA_DONTEXPAND_BIT, + * VMA_DONTDUMP_BIT); + */ +#define vma_clear_flags(vma, ...) \ + vma_clear_flags_mask(vma, mk_vma_flags(__VA_ARGS__)) + +/* + * Test whether a specific VMA flag is set in a VMA descriptor, e.g.: + * + * if (vma_desc_test(desc, VMA_READ_BIT)) { ... } + */ +static __always_inline bool vma_desc_test(const struct vm_area_desc *desc, + vma_flag_t bit) +{ + return vma_flags_test(&desc->vma_flags, bit); +} + +/* Helper to test any VMA flags in a VMA descriptor. */ +static __always_inline bool vma_desc_test_any_mask(const struct vm_area_desc *desc, + vma_flags_t flags) +{ + return vma_flags_test_any_mask(&desc->vma_flags, flags); +} + +/* + * Helper macro for testing whether any VMA flags are set in a VMA descriptor, + * e.g.: * - * if (vma_desc_test_flags(desc, VMA_IO_BIT, VMA_PFNMAP_BIT, + * if (vma_desc_test_any(desc, VMA_IO_BIT, VMA_PFNMAP_BIT, * VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT)) { ... } */ -#define vma_desc_test_flags(desc, ...) \ - vma_desc_test_flags_mask(desc, mk_vma_flags(__VA_ARGS__)) +#define vma_desc_test_any(desc, ...) \ + vma_desc_test_any_mask(desc, mk_vma_flags(__VA_ARGS__)) + +/* Helper to test all VMA flags in a VMA descriptor. */ +static __always_inline bool vma_desc_test_all_mask(const struct vm_area_desc *desc, + vma_flags_t flags) +{ + return vma_flags_test_all_mask(&desc->vma_flags, flags); +} + +/* + * Helper macro for testing whether ALL VMA flags are set in a VMA descriptor, + * e.g.: + * + * if (vma_desc_test_all(desc, VMA_READ_BIT, VMA_MAYREAD_BIT)) { ... } + */ +#define vma_desc_test_all(desc, ...) \ + vma_desc_test_all_mask(desc, mk_vma_flags(__VA_ARGS__)) /* Helper to set all VMA flags in a VMA descriptor. */ -static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc, - vma_flags_t flags) +static __always_inline void vma_desc_set_flags_mask(struct vm_area_desc *desc, + vma_flags_t flags) { vma_flags_set_mask(&desc->vma_flags, flags); } @@ -1211,8 +1463,8 @@ static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc, vma_desc_set_flags_mask(desc, mk_vma_flags(__VA_ARGS__)) /* Helper to clear all VMA flags in a VMA descriptor. */ -static inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc, - vma_flags_t flags) +static __always_inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc, + vma_flags_t flags) { vma_flags_clear_mask(&desc->vma_flags, flags); } @@ -1292,12 +1544,6 @@ static inline bool vma_is_accessible(const struct vm_area_struct *vma) return vma->vm_flags & VM_ACCESS_FLAGS; } -static inline bool is_shared_maywrite_vm_flags(vm_flags_t vm_flags) -{ - return (vm_flags & (VM_SHARED | VM_MAYWRITE)) == - (VM_SHARED | VM_MAYWRITE); -} - static inline bool is_shared_maywrite(const vma_flags_t *flags) { return vma_flags_test_all(flags, VMA_SHARED_BIT, VMA_MAYWRITE_BIT); @@ -1308,6 +1554,28 @@ static inline bool vma_is_shared_maywrite(const struct vm_area_struct *vma) return is_shared_maywrite(&vma->flags); } +/** + * vma_kernel_pagesize - Default page size granularity for this VMA. + * @vma: The user mapping. + * + * The kernel page size specifies in which granularity VMA modifications + * can be performed. Folios in this VMA will be aligned to, and at least + * the size of the number of bytes returned by this function. + * + * The default kernel page size is not affected by Transparent Huge Pages + * being in effect. + * + * Return: The default page size granularity for this VMA. + */ +static inline unsigned long vma_kernel_pagesize(struct vm_area_struct *vma) +{ + if (unlikely(vma->vm_ops && vma->vm_ops->pagesize)) + return vma->vm_ops->pagesize(vma); + return PAGE_SIZE; +} + +unsigned long vma_mmu_pagesize(struct vm_area_struct *vma); + static inline struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max) { @@ -1507,7 +1775,7 @@ static inline int folio_put_testzero(struct folio *folio) */ static inline bool get_page_unless_zero(struct page *page) { - return page_ref_add_unless(page, 1, 0); + return page_ref_add_unless_zero(page, 1); } static inline struct folio *folio_get_nontail_page(struct page *page) @@ -1957,7 +2225,7 @@ static inline bool is_nommu_shared_mapping(vm_flags_t flags) static inline bool is_nommu_shared_vma_flags(const vma_flags_t *flags) { - return vma_flags_test(flags, VMA_MAYSHARE_BIT, VMA_MAYOVERLAY_BIT); + return vma_flags_test_any(flags, VMA_MAYSHARE_BIT, VMA_MAYOVERLAY_BIT); } #endif @@ -2479,36 +2747,6 @@ static inline unsigned long folio_nr_pages(const struct folio *folio) return folio_large_nr_pages(folio); } -#if !defined(CONFIG_HAVE_GIGANTIC_FOLIOS) -/* - * We don't expect any folios that exceed buddy sizes (and consequently - * memory sections). - */ -#define MAX_FOLIO_ORDER MAX_PAGE_ORDER -#elif defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) -/* - * Only pages within a single memory section are guaranteed to be - * contiguous. By limiting folios to a single memory section, all folio - * pages are guaranteed to be contiguous. - */ -#define MAX_FOLIO_ORDER PFN_SECTION_SHIFT -#elif defined(CONFIG_HUGETLB_PAGE) -/* - * There is no real limit on the folio size. We limit them to the maximum we - * currently expect (see CONFIG_HAVE_GIGANTIC_FOLIOS): with hugetlb, we expect - * no folios larger than 16 GiB on 64bit and 1 GiB on 32bit. - */ -#define MAX_FOLIO_ORDER get_order(IS_ENABLED(CONFIG_64BIT) ? SZ_16G : SZ_1G) -#else -/* - * Without hugetlb, gigantic folios that are bigger than a single PUD are - * currently impossible. - */ -#define MAX_FOLIO_ORDER PUD_ORDER -#endif - -#define MAX_FOLIO_NR_PAGES (1UL << MAX_FOLIO_ORDER) - /* * compound_nr() returns the number of pages in this potentially compound * page. compound_nr() can be called on a tail page, and is defined to @@ -2667,7 +2905,7 @@ static inline bool folio_maybe_mapped_shared(struct folio *folio) * The caller must add any reference (e.g., from folio_try_get()) it might be * holding itself to the result. * - * Returns the expected folio refcount. + * Returns: the expected folio refcount. */ static inline int folio_expected_ref_count(const struct folio *folio) { @@ -2798,8 +3036,9 @@ extern void pagefault_out_of_memory(void); */ struct zap_details { struct folio *single_folio; /* Locked folio to be unmapped */ - bool even_cows; /* Zap COWed private pages too? */ + bool skip_cows; /* Do not zap COWed private pages */ bool reclaim_pt; /* Need reclaim page tables? */ + bool reaping; /* Reaping, do not block. */ zap_flags_t zap_flags; /* Extra flags for zapping */ }; @@ -2832,14 +3071,17 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, struct page *vm_normal_page_pud(struct vm_area_struct *vma, unsigned long addr, pud_t pud); -void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, +void zap_special_vma_range(struct vm_area_struct *vma, unsigned long address, unsigned long size); -void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, - unsigned long size, struct zap_details *details); -static inline void zap_vma_pages(struct vm_area_struct *vma) +void zap_vma_range(struct vm_area_struct *vma, unsigned long address, + unsigned long size); +/** + * zap_vma - zap all page table entries in a vma + * @vma: The vma to zap. + */ +static inline void zap_vma(struct vm_area_struct *vma) { - zap_page_range_single(vma, vma->vm_start, - vma->vm_end - vma->vm_start, NULL); + zap_vma_range(vma, vma->vm_start, vma->vm_end - vma->vm_start); } struct mmu_notifier_range; @@ -3514,26 +3756,21 @@ static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } static inline void ptlock_free(struct ptdesc *ptdesc) {} #endif /* defined(CONFIG_SPLIT_PTE_PTLOCKS) */ -static inline unsigned long ptdesc_nr_pages(const struct ptdesc *ptdesc) -{ - return compound_nr(ptdesc_page(ptdesc)); -} - static inline void __pagetable_ctor(struct ptdesc *ptdesc) { - pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags)); + struct folio *folio = ptdesc_folio(ptdesc); - __SetPageTable(ptdesc_page(ptdesc)); - mod_node_page_state(pgdat, NR_PAGETABLE, ptdesc_nr_pages(ptdesc)); + __folio_set_pgtable(folio); + lruvec_stat_add_folio(folio, NR_PAGETABLE); } static inline void pagetable_dtor(struct ptdesc *ptdesc) { - pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags)); + struct folio *folio = ptdesc_folio(ptdesc); ptlock_free(ptdesc); - __ClearPageTable(ptdesc_page(ptdesc)); - mod_node_page_state(pgdat, NR_PAGETABLE, -ptdesc_nr_pages(ptdesc)); + __folio_clear_pgtable(folio); + lruvec_stat_sub_folio(folio, NR_PAGETABLE); } static inline void pagetable_dtor_free(struct ptdesc *ptdesc) @@ -3852,7 +4089,6 @@ extern int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); extern struct file *get_mm_exe_file(struct mm_struct *mm); extern struct file *get_task_exe_file(struct task_struct *task); -extern bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long npages); extern void vm_stat_account(struct mm_struct *, vm_flags_t, long npages); extern bool vma_is_special_mapping(const struct vm_area_struct *vma, @@ -3903,11 +4139,13 @@ static inline void mm_populate(unsigned long addr, unsigned long len) {} #endif /* This takes the mm semaphore itself */ -extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long); -extern int vm_munmap(unsigned long, size_t); -extern unsigned long __must_check vm_mmap(struct file *, unsigned long, - unsigned long, unsigned long, - unsigned long, unsigned long); +int __must_check vm_brk_flags(unsigned long addr, unsigned long request, bool is_exec); +int vm_munmap(unsigned long start, size_t len); +unsigned long __must_check vm_mmap(struct file *file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flag, unsigned long offset); +unsigned long __must_check vm_mmap_shadow_stack(unsigned long addr, + unsigned long len, unsigned long flags); struct vm_unmapped_area_info { #define VM_UNMAPPED_AREA_TOPDOWN 1 @@ -4004,6 +4242,11 @@ static inline unsigned long vma_pages(const struct vm_area_struct *vma) return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; } +static inline unsigned long vma_last_pgoff(struct vm_area_struct *vma) +{ + return vma->vm_pgoff + vma_pages(vma) - 1; +} + static inline unsigned long vma_desc_size(const struct vm_area_desc *desc) { return desc->end - desc->start; @@ -4078,15 +4321,75 @@ static inline void mmap_action_ioremap(struct vm_area_desc *desc, * @start_pfn: The first PFN in the range to remap. */ static inline void mmap_action_ioremap_full(struct vm_area_desc *desc, - unsigned long start_pfn) + unsigned long start_pfn) { mmap_action_ioremap(desc, desc->start, start_pfn, vma_desc_size(desc)); } -void mmap_action_prepare(struct mmap_action *action, - struct vm_area_desc *desc); -int mmap_action_complete(struct mmap_action *action, - struct vm_area_struct *vma); +/** + * mmap_action_simple_ioremap - helper for mmap_prepare hook to specify that the + * physical range in [start_phys_addr, start_phys_addr + size) should be I/O + * remapped. + * @desc: The VMA descriptor for the VMA requiring remap. + * @start_phys_addr: Start of the physical memory to be mapped. + * @size: Size of the area to map. + * + * NOTE: Some drivers might want to tweak desc->page_prot for purposes of + * write-combine or similar. + */ +static inline void mmap_action_simple_ioremap(struct vm_area_desc *desc, + phys_addr_t start_phys_addr, + unsigned long size) +{ + struct mmap_action *action = &desc->action; + + action->simple_ioremap.start_phys_addr = start_phys_addr; + action->simple_ioremap.size = size; + action->type = MMAP_SIMPLE_IO_REMAP; +} + +/** + * mmap_action_map_kernel_pages - helper for mmap_prepare hook to specify that + * @num kernel pages contained in the @pages array should be mapped to userland + * starting at virtual address @start. + * @desc: The VMA descriptor for the VMA requiring kernel pags to be mapped. + * @start: The virtual address from which to map them. + * @pages: An array of struct page pointers describing the memory to map. + * @nr_pages: The number of entries in the @pages aray. + */ +static inline void mmap_action_map_kernel_pages(struct vm_area_desc *desc, + unsigned long start, struct page **pages, + unsigned long nr_pages) +{ + struct mmap_action *action = &desc->action; + + action->type = MMAP_MAP_KERNEL_PAGES; + action->map_kernel.start = start; + action->map_kernel.pages = pages; + action->map_kernel.nr_pages = nr_pages; + action->map_kernel.pgoff = desc->pgoff; +} + +/** + * mmap_action_map_kernel_pages_full - helper for mmap_prepare hook to specify that + * kernel pages contained in the @pages array should be mapped to userland + * from @desc->start to @desc->end. + * @desc: The VMA descriptor for the VMA requiring kernel pags to be mapped. + * @pages: An array of struct page pointers describing the memory to map. + * + * The caller must ensure that @pages contains sufficient entries to cover the + * entire range described by @desc. + */ +static inline void mmap_action_map_kernel_pages_full(struct vm_area_desc *desc, + struct page **pages) +{ + mmap_action_map_kernel_pages(desc, desc->start, pages, + vma_desc_pages(desc)); +} + +int mmap_action_prepare(struct vm_area_desc *desc); +int mmap_action_complete(struct vm_area_struct *vma, + struct mmap_action *action); /* Look up the first VMA which exactly match the interval vm_start ... vm_end */ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm, @@ -4100,20 +4403,81 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm, return vma; } +/** + * range_is_subset - Is the specified inner range a subset of the outer range? + * @outer_start: The start of the outer range. + * @outer_end: The exclusive end of the outer range. + * @inner_start: The start of the inner range. + * @inner_end: The exclusive end of the inner range. + * + * Returns: %true if [inner_start, inner_end) is a subset of [outer_start, + * outer_end), otherwise %false. + */ +static inline bool range_is_subset(unsigned long outer_start, + unsigned long outer_end, + unsigned long inner_start, + unsigned long inner_end) +{ + return outer_start <= inner_start && inner_end <= outer_end; +} + +/** + * range_in_vma - is the specified [@start, @end) range a subset of the VMA? + * @vma: The VMA against which we want to check [@start, @end). + * @start: The start of the range we wish to check. + * @end: The exclusive end of the range we wish to check. + * + * Returns: %true if [@start, @end) is a subset of [@vma->vm_start, + * @vma->vm_end), %false otherwise. + */ static inline bool range_in_vma(const struct vm_area_struct *vma, unsigned long start, unsigned long end) { - return (vma && vma->vm_start <= start && end <= vma->vm_end); + if (!vma) + return false; + + return range_is_subset(vma->vm_start, vma->vm_end, start, end); +} + +/** + * range_in_vma_desc - is the specified [@start, @end) range a subset of the VMA + * described by @desc, a VMA descriptor? + * @desc: The VMA descriptor against which we want to check [@start, @end). + * @start: The start of the range we wish to check. + * @end: The exclusive end of the range we wish to check. + * + * Returns: %true if [@start, @end) is a subset of [@desc->start, @desc->end), + * %false otherwise. + */ +static inline bool range_in_vma_desc(const struct vm_area_desc *desc, + unsigned long start, unsigned long end) +{ + if (!desc) + return false; + + return range_is_subset(desc->start, desc->end, start, end); } #ifdef CONFIG_MMU pgprot_t vm_get_page_prot(vm_flags_t vm_flags); + +static inline pgprot_t vma_get_page_prot(vma_flags_t vma_flags) +{ + const vm_flags_t vm_flags = vma_flags_to_legacy(vma_flags); + + return vm_get_page_prot(vm_flags); +} + void vma_set_page_prot(struct vm_area_struct *vma); #else static inline pgprot_t vm_get_page_prot(vm_flags_t vm_flags) { return __pgprot(0); } +static inline pgprot_t vma_get_page_prot(vma_flags_t vma_flags) +{ + return __pgprot(0); +} static inline void vma_set_page_prot(struct vm_area_struct *vma) { vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); @@ -4135,6 +4499,9 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr, struct page **pages, unsigned long *num); +int map_kernel_pages_prepare(struct vm_area_desc *desc); +int map_kernel_pages_complete(struct vm_area_struct *vma, + struct mmap_action *action); int vm_map_pages(struct vm_area_struct *vma, struct page **pages, unsigned long num); int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages, @@ -4513,10 +4880,9 @@ int vmemmap_populate_hugepages(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap); int vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap); -int vmemmap_populate_hvo(unsigned long start, unsigned long end, int node, +int vmemmap_populate_hvo(unsigned long start, unsigned long end, + unsigned int order, struct zone *zone, unsigned long headsize); -int vmemmap_undo_hvo(unsigned long start, unsigned long end, int node, - unsigned long headsize); void vmemmap_wrprotect_hvo(unsigned long start, unsigned long end, int node, unsigned long headsize); void vmemmap_populate_print_last(void); @@ -4702,22 +5068,6 @@ long copy_folio_from_user(struct folio *dst_folio, const void __user *usr_src, bool allow_pagefault); -/** - * vma_is_special_huge - Are transhuge page-table entries considered special? - * @vma: Pointer to the struct vm_area_struct to consider - * - * Whether transhuge page-table entries are considered "special" following - * the definition in vm_normal_page(). - * - * Return: true if transhuge page-table entries should be considered special, - * false otherwise. - */ -static inline bool vma_is_special_huge(const struct vm_area_struct *vma) -{ - return vma_is_dax(vma) || (vma->vm_file && - (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))); -} - #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ #if MAX_NUMNODES > 1 @@ -4822,10 +5172,9 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); * DMA mapping IDs for page_pool * * When DMA-mapping a page, page_pool allocates an ID (from an xarray) and - * stashes it in the upper bits of page->pp_magic. We always want to be able to - * unambiguously identify page pool pages (using page_pool_page_is_pp()). Non-PP - * pages can have arbitrary kernel pointers stored in the same field as pp_magic - * (since it overlaps with page->lru.next), so we must ensure that we cannot + * stashes it in the upper bits of page->pp_magic. Non-PP pages can have + * arbitrary kernel pointers stored in the same field as pp_magic (since + * it overlaps with page->lru.next), so we must ensure that we cannot * mistake a valid kernel pointer with any of the values we write into this * field. * @@ -4860,26 +5209,6 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); #define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \ PP_DMA_INDEX_SHIFT) -/* Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is - * OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for - * the head page of compound page and bit 1 for pfmemalloc page, as well as the - * bits used for the DMA index. page_is_pfmemalloc() is checked in - * __page_pool_put_page() to avoid recycling the pfmemalloc page. - */ -#define PP_MAGIC_MASK ~(PP_DMA_INDEX_MASK | 0x3UL) - -#ifdef CONFIG_PAGE_POOL -static inline bool page_pool_page_is_pp(const struct page *page) -{ - return (page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE; -} -#else -static inline bool page_pool_page_is_pp(const struct page *page) -{ - return false; -} -#endif - #define PAGE_SNAPSHOT_FAITHFUL (1 << 0) #define PAGE_SNAPSHOT_PG_BUDDY (1 << 1) #define PAGE_SNAPSHOT_PG_IDLE (1 << 2) @@ -4899,4 +5228,8 @@ static inline bool snapshot_page_is_faithful(const struct page_snapshot *ps) void snapshot_page(struct page_snapshot *ps, const struct page *page); +void map_anon_folio_pte_nopf(struct folio *folio, pte_t *pte, + struct vm_area_struct *vma, unsigned long addr, + bool uffd_wp); + #endif /* _LINUX_MM_H */ diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index fa2d6ba811b5..7fc2ced00f8f 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -30,11 +30,6 @@ static inline int folio_is_file_lru(const struct folio *folio) return !folio_test_swapbacked(folio); } -static inline int page_is_file_lru(struct page *page) -{ - return folio_is_file_lru(page_folio(page)); -} - static __always_inline void __update_lru_size(struct lruvec *lruvec, enum lru_list lru, enum zone_type zid, long nr_pages) @@ -102,6 +97,12 @@ static __always_inline enum lru_list folio_lru_list(const struct folio *folio) #ifdef CONFIG_LRU_GEN +static inline bool lru_gen_switching(void) +{ + DECLARE_STATIC_KEY_FALSE(lru_switch); + + return static_branch_unlikely(&lru_switch); +} #ifdef CONFIG_LRU_GEN_ENABLED static inline bool lru_gen_enabled(void) { @@ -316,6 +317,11 @@ static inline bool lru_gen_enabled(void) return false; } +static inline bool lru_gen_switching(void) +{ + return false; +} + static inline bool lru_gen_in_fault(void) { return false; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3cc8ae722886..a308e2c23b82 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -126,14 +126,14 @@ struct page { atomic_long_t pp_ref_count; }; struct { /* Tail pages of compound page */ - unsigned long compound_head; /* Bit zero is set */ + unsigned long compound_info; /* Bit zero is set */ }; struct { /* ZONE_DEVICE pages */ /* - * The first word is used for compound_head or folio + * The first word is used for compound_info or folio * pgmap */ - void *_unused_pgmap_compound_head; + void *_unused_pgmap_compound_info; void *zone_device_data; /* * ZONE_DEVICE private pages are counted as being @@ -409,7 +409,7 @@ struct folio { /* private: avoid cluttering the output */ /* For the Unevictable "LRU list" slot */ struct { - /* Avoid compound_head */ + /* Avoid compound_info */ void *__filler; /* public: */ unsigned int mlock_count; @@ -510,7 +510,7 @@ struct folio { FOLIO_MATCH(flags, flags); FOLIO_MATCH(lru, lru); FOLIO_MATCH(mapping, mapping); -FOLIO_MATCH(compound_head, lru); +FOLIO_MATCH(compound_info, lru); FOLIO_MATCH(__folio_index, index); FOLIO_MATCH(private, private); FOLIO_MATCH(_mapcount, _mapcount); @@ -529,7 +529,7 @@ FOLIO_MATCH(_last_cpupid, _last_cpupid); static_assert(offsetof(struct folio, fl) == \ offsetof(struct page, pg) + sizeof(struct page)) FOLIO_MATCH(flags, _flags_1); -FOLIO_MATCH(compound_head, _head_1); +FOLIO_MATCH(compound_info, _head_1); FOLIO_MATCH(_mapcount, _mapcount_1); FOLIO_MATCH(_refcount, _refcount_1); #undef FOLIO_MATCH @@ -537,13 +537,13 @@ FOLIO_MATCH(_refcount, _refcount_1); static_assert(offsetof(struct folio, fl) == \ offsetof(struct page, pg) + 2 * sizeof(struct page)) FOLIO_MATCH(flags, _flags_2); -FOLIO_MATCH(compound_head, _head_2); +FOLIO_MATCH(compound_info, _head_2); #undef FOLIO_MATCH #define FOLIO_MATCH(pg, fl) \ static_assert(offsetof(struct folio, fl) == \ offsetof(struct page, pg) + 3 * sizeof(struct page)) FOLIO_MATCH(flags, _flags_3); -FOLIO_MATCH(compound_head, _head_3); +FOLIO_MATCH(compound_info, _head_3); #undef FOLIO_MATCH /** @@ -609,8 +609,8 @@ struct ptdesc { #define TABLE_MATCH(pg, pt) \ static_assert(offsetof(struct page, pg) == offsetof(struct ptdesc, pt)) TABLE_MATCH(flags, pt_flags); -TABLE_MATCH(compound_head, pt_list); -TABLE_MATCH(compound_head, _pt_pad_1); +TABLE_MATCH(compound_info, pt_list); +TABLE_MATCH(compound_info, _pt_pad_1); TABLE_MATCH(mapping, __page_mapping); TABLE_MATCH(__folio_index, pt_index); TABLE_MATCH(rcu_head, pt_rcu_head); @@ -814,6 +814,8 @@ enum mmap_action_type { MMAP_NOTHING, /* Mapping is complete, no further action. */ MMAP_REMAP_PFN, /* Remap PFN range. */ MMAP_IO_REMAP_PFN, /* I/O remap PFN range. */ + MMAP_SIMPLE_IO_REMAP, /* I/O remap with guardrails. */ + MMAP_MAP_KERNEL_PAGES, /* Map kernel page range from array. */ }; /* @@ -822,13 +824,22 @@ enum mmap_action_type { */ struct mmap_action { union { - /* Remap range. */ struct { unsigned long start; unsigned long start_pfn; unsigned long size; pgprot_t pgprot; } remap; + struct { + phys_addr_t start_phys_addr; + unsigned long size; + } simple_ioremap; + struct { + unsigned long start; + struct page **pages; + unsigned long nr_pages; + pgoff_t pgoff; + } map_kernel; }; enum mmap_action_type type; @@ -870,6 +881,14 @@ typedef struct { #define EMPTY_VMA_FLAGS ((vma_flags_t){ }) +/* Are no flags set in the specified VMA flags? */ +static __always_inline bool vma_flags_empty(const vma_flags_t *flags) +{ + const unsigned long *bitmap = flags->__vma_flags; + + return bitmap_empty(bitmap, NUM_VMA_FLAG_BITS); +} + /* * Describes a VMA that is about to be mmap()'ed. Drivers may choose to * manipulate mutable fields which will cause those fields to be updated in the @@ -879,8 +898,8 @@ typedef struct { */ struct vm_area_desc { /* Immutable state. */ - const struct mm_struct *const mm; - struct file *const file; /* May vary from vm_file in stacked callers. */ + struct mm_struct *mm; + struct file *file; /* May vary from vm_file in stacked callers. */ unsigned long start; unsigned long end; @@ -1056,18 +1075,31 @@ struct vm_area_struct { } __randomize_layout; /* Clears all bits in the VMA flags bitmap, non-atomically. */ -static inline void vma_flags_clear_all(vma_flags_t *flags) +static __always_inline void vma_flags_clear_all(vma_flags_t *flags) { bitmap_zero(flags->__vma_flags, NUM_VMA_FLAG_BITS); } /* + * Helper function which converts a vma_flags_t value to a legacy vm_flags_t + * value. This is only valid if the input flags value can be expressed in a + * system word. + * + * Will be removed once the conversion to VMA flags is complete. + */ +static __always_inline vm_flags_t vma_flags_to_legacy(vma_flags_t flags) +{ + return (vm_flags_t)flags.__vma_flags[0]; +} + +/* * Copy value to the first system word of VMA flags, non-atomically. * * IMPORTANT: This does not overwrite bytes past the first system word. The * caller must account for this. */ -static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long value) +static __always_inline void vma_flags_overwrite_word(vma_flags_t *flags, + unsigned long value) { unsigned long *bitmap = flags->__vma_flags; @@ -1075,12 +1107,27 @@ static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long va } /* + * Helper function which converts a legacy vm_flags_t value to a vma_flags_t + * value. + * + * Will be removed once the conversion to VMA flags is complete. + */ +static __always_inline vma_flags_t legacy_to_vma_flags(vm_flags_t flags) +{ + vma_flags_t ret = EMPTY_VMA_FLAGS; + + vma_flags_overwrite_word(&ret, flags); + return ret; +} + +/* * Copy value to the first system word of VMA flags ONCE, non-atomically. * * IMPORTANT: This does not overwrite bytes past the first system word. The * caller must account for this. */ -static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned long value) +static __always_inline void vma_flags_overwrite_word_once(vma_flags_t *flags, + unsigned long value) { unsigned long *bitmap = flags->__vma_flags; @@ -1088,7 +1135,8 @@ static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned lo } /* Update the first system word of VMA flags setting bits, non-atomically. */ -static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value) +static __always_inline void vma_flags_set_word(vma_flags_t *flags, + unsigned long value) { unsigned long *bitmap = flags->__vma_flags; @@ -1096,7 +1144,8 @@ static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value) } /* Update the first system word of VMA flags clearing bits, non-atomically. */ -static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value) +static __always_inline void vma_flags_clear_word(vma_flags_t *flags, + unsigned long value) { unsigned long *bitmap = flags->__vma_flags; @@ -1241,7 +1290,11 @@ struct mm_struct { unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ unsigned long stack_vm; /* VM_STACK */ - vm_flags_t def_flags; + union { + /* Temporary while VMA flags are being converted. */ + vm_flags_t def_flags; + vma_flags_t def_vma_flags; + }; /** * @write_protect_seq: Locked when any thread is write diff --git a/include/linux/mman.h b/include/linux/mman.h index 0ba8a7e8b90a..389521594c69 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -170,53 +170,4 @@ static inline bool arch_memory_deny_write_exec_supported(void) } #define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported #endif - -/* - * Denies creating a writable executable mapping or gaining executable permissions. - * - * This denies the following: - * - * a) mmap(PROT_WRITE | PROT_EXEC) - * - * b) mmap(PROT_WRITE) - * mprotect(PROT_EXEC) - * - * c) mmap(PROT_WRITE) - * mprotect(PROT_READ) - * mprotect(PROT_EXEC) - * - * But allows the following: - * - * d) mmap(PROT_READ | PROT_EXEC) - * mmap(PROT_READ | PROT_EXEC | PROT_BTI) - * - * This is only applicable if the user has set the Memory-Deny-Write-Execute - * (MDWE) protection mask for the current process. - * - * @old specifies the VMA flags the VMA originally possessed, and @new the ones - * we propose to set. - * - * Return: false if proposed change is OK, true if not ok and should be denied. - */ -static inline bool map_deny_write_exec(unsigned long old, unsigned long new) -{ - /* If MDWE is disabled, we have nothing to deny. */ - if (!mm_flags_test(MMF_HAS_MDWE, current->mm)) - return false; - - /* If the new VMA is not executable, we have nothing to deny. */ - if (!(new & VM_EXEC)) - return false; - - /* Under MDWE we do not accept newly writably executable VMAs... */ - if (new & VM_WRITE) - return true; - - /* ...nor previously non-executable VMAs becoming executable. */ - if (!(old & VM_EXEC)) - return true; - - return false; -} - #endif /* _LINUX_MMAN_H */ diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 93eca48bc443..04b8f61ece5d 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -546,7 +546,7 @@ static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass) __mmap_lock_trace_acquire_returned(mm, true, true); } -static inline int mmap_write_lock_killable(struct mm_struct *mm) +static inline int __must_check mmap_write_lock_killable(struct mm_struct *mm) { int ret; @@ -593,7 +593,7 @@ static inline void mmap_read_lock(struct mm_struct *mm) __mmap_lock_trace_acquire_returned(mm, false, true); } -static inline int mmap_read_lock_killable(struct mm_struct *mm) +static inline int __must_check mmap_read_lock_killable(struct mm_struct *mm) { int ret; @@ -603,7 +603,7 @@ static inline int mmap_read_lock_killable(struct mm_struct *mm) return ret; } -static inline bool mmap_read_trylock(struct mm_struct *mm) +static inline bool __must_check mmap_read_trylock(struct mm_struct *mm) { bool ret; diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index e9e964c20e53..9dc4750296af 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -329,6 +329,8 @@ struct mmc_card { #define MMC_QUIRK_BROKEN_CACHE_FLUSH (1<<16) /* Don't flush cache until the write has occurred */ #define MMC_QUIRK_BROKEN_SD_POWEROFF_NOTIFY (1<<17) /* Disable broken SD poweroff notify support */ #define MMC_QUIRK_NO_UHS_DDR50_TUNING (1<<18) /* Disable DDR50 tuning */ +#define MMC_QUIRK_BROKEN_MDT (1<<19) /* Wrong manufacturing year */ +#define MMC_QUIRK_FIXED_SECURE_ERASE_TRIM_TIME (1<<20) /* Secure erase/trim time is fixed regardless of size */ bool written_flag; /* Indicates eMMC has been written since power on */ bool reenable_cmdq; /* Re-enable Command Queue */ diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index e0e2c265e5d1..ba84f02c2a10 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -486,14 +486,12 @@ struct mmc_host { struct mmc_ios ios; /* current io bus settings */ + bool claimed; /* host exclusively claimed */ + /* group bitfields together to minimize padding */ unsigned int use_spi_crc:1; - unsigned int claimed:1; /* host exclusively claimed */ unsigned int doing_init_tune:1; /* initial tuning in progress */ - unsigned int can_retune:1; /* re-tuning can be used */ unsigned int doing_retune:1; /* re-tuning in progress */ - unsigned int retune_now:1; /* do re-tuning at next req */ - unsigned int retune_paused:1; /* re-tuning is temporarily disabled */ unsigned int retune_crc_disable:1; /* don't trigger retune upon crc */ unsigned int can_dma_map_merge:1; /* merging can be used */ unsigned int vqmmc_enabled:1; /* vqmmc regulator is enabled */ @@ -508,6 +506,9 @@ struct mmc_host { int rescan_disable; /* disable card detection */ int rescan_entered; /* used with nonremovable devices */ + bool can_retune; /* re-tuning can be used */ + bool retune_now; /* do re-tuning at next req */ + bool retune_paused; /* re-tuning is temporarily disabled */ int need_retune; /* re-tuning is needed */ int hold_retune; /* hold off re-tuning */ unsigned int retune_period; /* re-tuning period in secs */ diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 673cbdf43453..0685dd717e85 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -111,11 +111,15 @@ #define SDIO_VENDOR_ID_MEDIATEK 0x037a #define SDIO_DEVICE_ID_MEDIATEK_MT7663 0x7663 #define SDIO_DEVICE_ID_MEDIATEK_MT7668 0x7668 +#define SDIO_DEVICE_ID_MEDIATEK_MT7902 0x790a #define SDIO_DEVICE_ID_MEDIATEK_MT7961 0x7961 #define SDIO_VENDOR_ID_MICROCHIP_WILC 0x0296 #define SDIO_DEVICE_ID_MICROCHIP_WILC1000 0x5347 +#define SDIO_VENDOR_ID_NXP 0x0471 +#define SDIO_DEVICE_ID_NXP_IW61X 0x0205 + #define SDIO_VENDOR_ID_REALTEK 0x024c #define SDIO_DEVICE_ID_REALTEK_RTW8723BS 0xb723 #define SDIO_DEVICE_ID_REALTEK_RTW8821BS 0xb821 diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 07a2bbaf86e9..69c304b467df 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -97,20 +97,20 @@ struct mmu_notifier_ops { * Start-end is necessary in case the secondary MMU is mapping the page * at a smaller granularity than the primary MMU. */ - int (*clear_flush_young)(struct mmu_notifier *subscription, - struct mm_struct *mm, - unsigned long start, - unsigned long end); + bool (*clear_flush_young)(struct mmu_notifier *subscription, + struct mm_struct *mm, + unsigned long start, + unsigned long end); /* * clear_young is a lightweight version of clear_flush_young. Like the * latter, it is supposed to test-and-clear the young/accessed bitflag * in the secondary pte, but it may omit flushing the secondary tlb. */ - int (*clear_young)(struct mmu_notifier *subscription, - struct mm_struct *mm, - unsigned long start, - unsigned long end); + bool (*clear_young)(struct mmu_notifier *subscription, + struct mm_struct *mm, + unsigned long start, + unsigned long end); /* * test_young is called to check the young/accessed bitflag in @@ -118,9 +118,9 @@ struct mmu_notifier_ops { * frequently used without actually clearing the flag or tearing * down the secondary mapping on the page. */ - int (*test_young)(struct mmu_notifier *subscription, - struct mm_struct *mm, - unsigned long address); + bool (*test_young)(struct mmu_notifier *subscription, + struct mm_struct *mm, + unsigned long address); /* * invalidate_range_start() and invalidate_range_end() must be @@ -234,15 +234,57 @@ struct mmu_notifier { }; /** - * struct mmu_interval_notifier_ops + * struct mmu_interval_notifier_finish - mmu_interval_notifier two-pass abstraction + * @link: Lockless list link for the notifiers pending pass list + * @notifier: The mmu_interval_notifier for which the finish pass is called. + * + * Allocate, typically using GFP_NOWAIT in the interval notifier's start pass. + * Note that with a large number of notifiers implementing two passes, + * allocation with GFP_NOWAIT will become increasingly likely to fail, so consider + * implementing a small pool instead of using kmalloc() allocations. + * + * If the implementation needs to pass data between the start and the finish passes, + * the recommended way is to embed struct mmu_interval_notifier_finish into a larger + * structure that also contains the data needed to be shared. Keep in mind that + * a notifier callback can be invoked in parallel, and each invocation needs its + * own struct mmu_interval_notifier_finish. + * + * If allocation fails, then the &mmu_interval_notifier_ops->invalidate_start op + * needs to implements the full notifier functionality. Please refer to its + * documentation. + */ +struct mmu_interval_notifier_finish { + struct llist_node link; + struct mmu_interval_notifier *notifier; +}; + +/** + * struct mmu_interval_notifier_ops - callback for range notification * @invalidate: Upon return the caller must stop using any SPTEs within this * range. This function can sleep. Return false only if sleeping * was required but mmu_notifier_range_blockable(range) is false. + * @invalidate_start: Similar to @invalidate, but intended for two-pass notifier + * callbacks where the call to @invalidate_start is the first + * pass and any struct mmu_interval_notifier_finish pointer + * returned in the @finish parameter describes the finish pass. + * If *@finish is %NULL on return, then no final pass will be + * called, and @invalidate_start needs to implement the full + * notifier, behaving like @invalidate. The value of *@finish + * is guaranteed to be %NULL at function entry. + * @invalidate_finish: Called as the second pass for any notifier that returned + * a non-NULL *@finish from @invalidate_start. The @finish + * pointer passed here is the same one returned by + * @invalidate_start. */ struct mmu_interval_notifier_ops { bool (*invalidate)(struct mmu_interval_notifier *interval_sub, const struct mmu_notifier_range *range, unsigned long cur_seq); + bool (*invalidate_start)(struct mmu_interval_notifier *interval_sub, + const struct mmu_notifier_range *range, + unsigned long cur_seq, + struct mmu_interval_notifier_finish **finish); + void (*invalidate_finish)(struct mmu_interval_notifier_finish *finish); }; struct mmu_interval_notifier { @@ -309,8 +351,8 @@ void mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub); /** * mmu_interval_set_seq - Save the invalidation sequence - * @interval_sub - The subscription passed to invalidate - * @cur_seq - The cur_seq passed to the invalidate() callback + * @interval_sub: The subscription passed to invalidate + * @cur_seq: The cur_seq passed to the invalidate() callback * * This must be called unconditionally from the invalidate callback of a * struct mmu_interval_notifier_ops under the same lock that is used to call @@ -329,8 +371,8 @@ mmu_interval_set_seq(struct mmu_interval_notifier *interval_sub, /** * mmu_interval_read_retry - End a read side critical section against a VA range - * interval_sub: The subscription - * seq: The return of the paired mmu_interval_read_begin() + * @interval_sub: The subscription + * @seq: The return of the paired mmu_interval_read_begin() * * This MUST be called under a user provided lock that is also held * unconditionally by op->invalidate() when it calls mmu_interval_set_seq(). @@ -338,7 +380,7 @@ mmu_interval_set_seq(struct mmu_interval_notifier *interval_sub, * Each call should be paired with a single mmu_interval_read_begin() and * should be used to conclude the read side. * - * Returns true if an invalidation collided with this critical section, and + * Returns: true if an invalidation collided with this critical section, and * the caller should retry. */ static inline bool @@ -350,20 +392,21 @@ mmu_interval_read_retry(struct mmu_interval_notifier *interval_sub, /** * mmu_interval_check_retry - Test if a collision has occurred - * interval_sub: The subscription - * seq: The return of the matching mmu_interval_read_begin() + * @interval_sub: The subscription + * @seq: The return of the matching mmu_interval_read_begin() * * This can be used in the critical section between mmu_interval_read_begin() - * and mmu_interval_read_retry(). A return of true indicates an invalidation - * has collided with this critical region and a future - * mmu_interval_read_retry() will return true. - * - * False is not reliable and only suggests a collision may not have - * occurred. It can be called many times and does not have to hold the user - * provided lock. + * and mmu_interval_read_retry(). * * This call can be used as part of loops and other expensive operations to * expedite a retry. + * It can be called many times and does not have to hold the user + * provided lock. + * + * Returns: true indicates an invalidation has collided with this critical + * region and a future mmu_interval_read_retry() will return true. + * False is not reliable and only suggests a collision may not have + * occurred. */ static inline bool mmu_interval_check_retry(struct mmu_interval_notifier *interval_sub, @@ -375,14 +418,12 @@ mmu_interval_check_retry(struct mmu_interval_notifier *interval_sub, extern void __mmu_notifier_subscriptions_destroy(struct mm_struct *mm); extern void __mmu_notifier_release(struct mm_struct *mm); -extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long start, - unsigned long end); -extern int __mmu_notifier_clear_young(struct mm_struct *mm, - unsigned long start, - unsigned long end); -extern int __mmu_notifier_test_young(struct mm_struct *mm, - unsigned long address); +bool __mmu_notifier_clear_flush_young(struct mm_struct *mm, + unsigned long start, unsigned long end); +bool __mmu_notifier_clear_young(struct mm_struct *mm, + unsigned long start, unsigned long end); +bool __mmu_notifier_test_young(struct mm_struct *mm, + unsigned long address); extern int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *r); extern void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *r); extern void __mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm, @@ -402,30 +443,28 @@ static inline void mmu_notifier_release(struct mm_struct *mm) __mmu_notifier_release(mm); } -static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long start, - unsigned long end) +static inline bool mmu_notifier_clear_flush_young(struct mm_struct *mm, + unsigned long start, unsigned long end) { if (mm_has_notifiers(mm)) return __mmu_notifier_clear_flush_young(mm, start, end); - return 0; + return false; } -static inline int mmu_notifier_clear_young(struct mm_struct *mm, - unsigned long start, - unsigned long end) +static inline bool mmu_notifier_clear_young(struct mm_struct *mm, + unsigned long start, unsigned long end) { if (mm_has_notifiers(mm)) return __mmu_notifier_clear_young(mm, start, end); - return 0; + return false; } -static inline int mmu_notifier_test_young(struct mm_struct *mm, - unsigned long address) +static inline bool mmu_notifier_test_young(struct mm_struct *mm, + unsigned long address) { if (mm_has_notifiers(mm)) return __mmu_notifier_test_young(mm, address); - return 0; + return false; } static inline void @@ -515,55 +554,6 @@ static inline void mmu_notifier_range_init_owner( range->owner = owner; } -#define clear_flush_young_ptes_notify(__vma, __address, __ptep, __nr) \ -({ \ - int __young; \ - struct vm_area_struct *___vma = __vma; \ - unsigned long ___address = __address; \ - unsigned int ___nr = __nr; \ - __young = clear_flush_young_ptes(___vma, ___address, __ptep, ___nr); \ - __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ - ___address, \ - ___address + \ - ___nr * PAGE_SIZE); \ - __young; \ -}) - -#define pmdp_clear_flush_young_notify(__vma, __address, __pmdp) \ -({ \ - int __young; \ - struct vm_area_struct *___vma = __vma; \ - unsigned long ___address = __address; \ - __young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \ - __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ - ___address, \ - ___address + \ - PMD_SIZE); \ - __young; \ -}) - -#define ptep_clear_young_notify(__vma, __address, __ptep) \ -({ \ - int __young; \ - struct vm_area_struct *___vma = __vma; \ - unsigned long ___address = __address; \ - __young = ptep_test_and_clear_young(___vma, ___address, __ptep);\ - __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address, \ - ___address + PAGE_SIZE); \ - __young; \ -}) - -#define pmdp_clear_young_notify(__vma, __address, __pmdp) \ -({ \ - int __young; \ - struct vm_area_struct *___vma = __vma; \ - unsigned long ___address = __address; \ - __young = pmdp_test_and_clear_young(___vma, ___address, __pmdp);\ - __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address, \ - ___address + PMD_SIZE); \ - __young; \ -}) - #else /* CONFIG_MMU_NOTIFIER */ struct mmu_notifier_range { @@ -600,24 +590,22 @@ static inline void mmu_notifier_release(struct mm_struct *mm) { } -static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long start, - unsigned long end) +static inline bool mmu_notifier_clear_flush_young(struct mm_struct *mm, + unsigned long start, unsigned long end) { - return 0; + return false; } -static inline int mmu_notifier_clear_young(struct mm_struct *mm, - unsigned long start, - unsigned long end) +static inline bool mmu_notifier_clear_young(struct mm_struct *mm, + unsigned long start, unsigned long end) { - return 0; + return false; } -static inline int mmu_notifier_test_young(struct mm_struct *mm, - unsigned long address) +static inline bool mmu_notifier_test_young(struct mm_struct *mm, + unsigned long address) { - return 0; + return false; } static inline void @@ -651,11 +639,6 @@ static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm) #define mmu_notifier_range_update_to_read_only(r) false -#define clear_flush_young_ptes_notify clear_flush_young_ptes -#define pmdp_clear_flush_young_notify pmdp_clear_flush_young -#define ptep_clear_young_notify ptep_test_and_clear_young -#define pmdp_clear_young_notify pmdp_test_and_clear_young - static inline void mmu_notifier_synchronize(void) { } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 3e51190a55e4..3bcdda226a91 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -23,6 +23,7 @@ #include <linux/page-flags.h> #include <linux/local_lock.h> #include <linux/zswap.h> +#include <linux/sizes.h> #include <asm/page.h> /* Free memory management - zoned buddy allocator. */ @@ -61,6 +62,59 @@ */ #define PAGE_ALLOC_COSTLY_ORDER 3 +#if !defined(CONFIG_HAVE_GIGANTIC_FOLIOS) +/* + * We don't expect any folios that exceed buddy sizes (and consequently + * memory sections). + */ +#define MAX_FOLIO_ORDER MAX_PAGE_ORDER +#elif defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) +/* + * Only pages within a single memory section are guaranteed to be + * contiguous. By limiting folios to a single memory section, all folio + * pages are guaranteed to be contiguous. + */ +#define MAX_FOLIO_ORDER PFN_SECTION_SHIFT +#elif defined(CONFIG_HUGETLB_PAGE) +/* + * There is no real limit on the folio size. We limit them to the maximum we + * currently expect (see CONFIG_HAVE_GIGANTIC_FOLIOS): with hugetlb, we expect + * no folios larger than 16 GiB on 64bit and 1 GiB on 32bit. + */ +#ifdef CONFIG_64BIT +#define MAX_FOLIO_ORDER (ilog2(SZ_16G) - PAGE_SHIFT) +#else +#define MAX_FOLIO_ORDER (ilog2(SZ_1G) - PAGE_SHIFT) +#endif +#else +/* + * Without hugetlb, gigantic folios that are bigger than a single PUD are + * currently impossible. + */ +#define MAX_FOLIO_ORDER (PUD_SHIFT - PAGE_SHIFT) +#endif + +#define MAX_FOLIO_NR_PAGES (1UL << MAX_FOLIO_ORDER) + +/* + * HugeTLB Vmemmap Optimization (HVO) requires struct pages of the head page to + * be naturally aligned with regard to the folio size. + * + * HVO which is only active if the size of struct page is a power of 2. + */ +#define MAX_FOLIO_VMEMMAP_ALIGN \ + (IS_ENABLED(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP) && \ + is_power_of_2(sizeof(struct page)) ? \ + MAX_FOLIO_NR_PAGES * sizeof(struct page) : 0) + +/* + * vmemmap optimization (like HVO) is only possible for page orders that fill + * two or more pages with struct pages. + */ +#define VMEMMAP_TAIL_MIN_ORDER (ilog2(2 * PAGE_SIZE / sizeof(struct page))) +#define __NR_VMEMMAP_TAILS (MAX_FOLIO_ORDER - VMEMMAP_TAIL_MIN_ORDER + 1) +#define NR_VMEMMAP_TAILS (__NR_VMEMMAP_TAILS > 0 ? __NR_VMEMMAP_TAILS : 0) + enum migratetype { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, @@ -220,6 +274,7 @@ enum node_stat_item { NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */ NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */ NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */ + NR_VMALLOC, NR_KERNEL_STACK_KB, /* measured in KiB */ #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) NR_KERNEL_SCS_KB, /* measured in KiB */ @@ -255,11 +310,26 @@ enum node_stat_item { PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, PGDEMOTE_PROACTIVE, + PGSTEAL_KSWAPD, + PGSTEAL_DIRECT, + PGSTEAL_KHUGEPAGED, + PGSTEAL_PROACTIVE, + PGSTEAL_ANON, + PGSTEAL_FILE, + PGSCAN_KSWAPD, + PGSCAN_DIRECT, + PGSCAN_KHUGEPAGED, + PGSCAN_PROACTIVE, + PGSCAN_ANON, + PGSCAN_FILE, + PGREFILL, #ifdef CONFIG_HUGETLB_PAGE NR_HUGETLB, #endif NR_BALLOON_PAGES, NR_KERNEL_FILE_PAGES, + NR_GPU_ACTIVE, /* Pages assigned to GPU objects */ + NR_GPU_RECLAIM, /* Pages in shrinkable GPU pools */ NR_VM_NODE_STAT_ITEMS }; @@ -616,7 +686,7 @@ struct lru_gen_memcg { void lru_gen_init_pgdat(struct pglist_data *pgdat); void lru_gen_init_lruvec(struct lruvec *lruvec); -bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw); +bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw, unsigned int nr); void lru_gen_init_memcg(struct mem_cgroup *memcg); void lru_gen_exit_memcg(struct mem_cgroup *memcg); @@ -635,7 +705,8 @@ static inline void lru_gen_init_lruvec(struct lruvec *lruvec) { } -static inline bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw) +static inline bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw, + unsigned int nr) { return false; } @@ -1057,6 +1128,9 @@ struct zone { /* Zone statistics */ atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS]; +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP + struct page *vmemmap_tails[NR_VMEMMAP_TAILS]; +#endif } ____cacheline_internodealigned_in_smp; enum pgdat_flags { @@ -1910,15 +1984,13 @@ struct mem_section_usage { unsigned long pageblock_flags[0]; }; -void subsection_map_init(unsigned long pfn, unsigned long nr_pages); - struct page; struct page_ext; struct mem_section { /* * This is, logically, a pointer to an array of struct * pages. However, it is stored with some other magic. - * (see sparse.c::sparse_init_one_section()) + * (see sparse_init_one_section()) * * Additionally during early boot we encode node id of * the location of the section here to guide allocation. @@ -2300,11 +2372,9 @@ static inline unsigned long next_present_section_nr(unsigned long section_nr) #endif #else -#define sparse_index_init(_sec, _nid) do {} while (0) #define sparse_vmemmap_init_nid_early(_nid) do {} while (0) #define sparse_vmemmap_init_nid_late(_nid) do {} while (0) #define pfn_in_present_section pfn_valid -#define subsection_map_init(_pfn, _nr_pages) do {} while (0) #endif /* CONFIG_SPARSEMEM */ /* diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 5b1725fe9707..23ff24080dfd 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -691,6 +691,7 @@ struct x86_cpu_id { __u16 feature; /* bit index */ /* Solely for kernel-internal use: DO NOT EXPORT to userspace! */ __u16 flags; + __u8 platform_mask; __u8 type; kernel_ulong_t driver_data; }; @@ -702,6 +703,7 @@ struct x86_cpu_id { #define X86_STEPPING_ANY 0 #define X86_STEP_MIN 0 #define X86_STEP_MAX 0xf +#define X86_PLATFORM_ANY 0x0 #define X86_FEATURE_ANY 0 /* Same as FPU, you can't test for that */ #define X86_CPU_TYPE_ANY 0 diff --git a/include/linux/module.h b/include/linux/module.h index 14f391b186c6..7566815fabbe 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -413,11 +413,13 @@ struct module { struct module_attribute *modinfo_attrs; const char *version; const char *srcversion; + const char *imported_namespaces; struct kobject *holders_dir; /* Exported symbols */ const struct kernel_symbol *syms; const u32 *crcs; + const u8 *flagstab; unsigned int num_syms; #ifdef CONFIG_ARCH_USES_CFI_TRAPS @@ -433,9 +435,6 @@ struct module { unsigned int num_kp; /* GPL-only exported symbols. */ - unsigned int num_gpl_syms; - const struct kernel_symbol *gpl_syms; - const u32 *gpl_crcs; bool using_gplonly_symbols; #ifdef CONFIG_MODULE_SIG diff --git a/include/linux/module_signature.h b/include/linux/module_signature.h index 7eb4b00381ac..db335d46787f 100644 --- a/include/linux/module_signature.h +++ b/include/linux/module_signature.h @@ -10,35 +10,7 @@ #define _LINUX_MODULE_SIGNATURE_H #include <linux/types.h> - -/* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */ -#define MODULE_SIG_STRING "~Module signature appended~\n" - -enum pkey_id_type { - PKEY_ID_PGP, /* OpenPGP generated key ID */ - PKEY_ID_X509, /* X.509 arbitrary subjectKeyIdentifier */ - PKEY_ID_PKCS7, /* Signature in PKCS#7 message */ -}; - -/* - * Module signature information block. - * - * The constituents of the signature section are, in order: - * - * - Signer's name - * - Key identifier - * - Signature data - * - Information block - */ -struct module_signature { - u8 algo; /* Public-key crypto algorithm [0] */ - u8 hash; /* Digest algorithm [0] */ - u8 id_type; /* Key identifier type [PKEY_ID_PKCS7] */ - u8 signer_len; /* Length of signer's name [0] */ - u8 key_id_len; /* Length of key identifier [0] */ - u8 __pad[3]; - __be32 sig_len; /* Length of signature data */ -}; +#include <uapi/linux/module_signature.h> int mod_check_sig(const struct module_signature *ms, size_t file_len, const char *name); diff --git a/include/linux/module_symbol.h b/include/linux/module_symbol.h index 77c9895b9ddb..574609aced99 100644 --- a/include/linux/module_symbol.h +++ b/include/linux/module_symbol.h @@ -2,6 +2,11 @@ #ifndef _LINUX_MODULE_SYMBOL_H #define _LINUX_MODULE_SYMBOL_H +/* Kernel symbol flags bitset. */ +enum ksym_flags { + KSYM_FLAG_GPL_ONLY = 1 << 0, +}; + /* This ignores the intensely annoying "mapping symbols" found in ELF files. */ static inline bool is_mapping_symbol(const char *str) { diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 7d22d4c4ea2e..075f28585074 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -317,8 +317,8 @@ struct kparam_array name, &__param_ops_##name, arg, perm, -1, 0) #ifdef CONFIG_SYSFS -extern void kernel_param_lock(struct module *mod); -extern void kernel_param_unlock(struct module *mod); +void kernel_param_lock(struct module *mod); +void kernel_param_unlock(struct module *mod); #else static inline void kernel_param_lock(struct module *mod) { @@ -398,7 +398,7 @@ static inline void kernel_param_unlock(struct module *mod) * Returns: true if the two parameter names are equal. * Dashes (-) are considered equal to underscores (_). */ -extern bool parameq(const char *name1, const char *name2); +bool parameq(const char *name1, const char *name2); /** * parameqn - checks if two parameter names match @@ -412,28 +412,23 @@ extern bool parameq(const char *name1, const char *name2); * are equal. * Dashes (-) are considered equal to underscores (_). */ -extern bool parameqn(const char *name1, const char *name2, size_t n); +bool parameqn(const char *name1, const char *name2, size_t n); typedef int (*parse_unknown_fn)(char *param, char *val, const char *doing, void *arg); /* Called on module insert or kernel boot */ -extern char *parse_args(const char *name, - char *args, - const struct kernel_param *params, - unsigned num, - s16 level_min, - s16 level_max, - void *arg, parse_unknown_fn unknown); +char *parse_args(const char *doing, + char *args, + const struct kernel_param *params, + unsigned int num, + s16 min_level, + s16 max_level, + void *arg, parse_unknown_fn unknown); /* Called by module remove. */ -#ifdef CONFIG_SYSFS -extern void destroy_params(const struct kernel_param *params, unsigned num); -#else -static inline void destroy_params(const struct kernel_param *params, - unsigned num) -{ -} -#endif /* !CONFIG_SYSFS */ +#ifdef CONFIG_MODULES +void module_destroy_params(const struct kernel_param *params, unsigned int num); +#endif /* All the helper functions */ /* The macros to do compile-time type checking stolen from Jakub @@ -442,78 +437,77 @@ static inline void destroy_params(const struct kernel_param *params, static inline type __always_unused *__check_##name(void) { return(p); } extern const struct kernel_param_ops param_ops_byte; -extern int param_set_byte(const char *val, const struct kernel_param *kp); -extern int param_get_byte(char *buffer, const struct kernel_param *kp); +int param_set_byte(const char *val, const struct kernel_param *kp); +int param_get_byte(char *buffer, const struct kernel_param *kp); #define param_check_byte(name, p) __param_check(name, p, unsigned char) extern const struct kernel_param_ops param_ops_short; -extern int param_set_short(const char *val, const struct kernel_param *kp); -extern int param_get_short(char *buffer, const struct kernel_param *kp); +int param_set_short(const char *val, const struct kernel_param *kp); +int param_get_short(char *buffer, const struct kernel_param *kp); #define param_check_short(name, p) __param_check(name, p, short) extern const struct kernel_param_ops param_ops_ushort; -extern int param_set_ushort(const char *val, const struct kernel_param *kp); -extern int param_get_ushort(char *buffer, const struct kernel_param *kp); +int param_set_ushort(const char *val, const struct kernel_param *kp); +int param_get_ushort(char *buffer, const struct kernel_param *kp); #define param_check_ushort(name, p) __param_check(name, p, unsigned short) extern const struct kernel_param_ops param_ops_int; -extern int param_set_int(const char *val, const struct kernel_param *kp); -extern int param_get_int(char *buffer, const struct kernel_param *kp); +int param_set_int(const char *val, const struct kernel_param *kp); +int param_get_int(char *buffer, const struct kernel_param *kp); #define param_check_int(name, p) __param_check(name, p, int) extern const struct kernel_param_ops param_ops_uint; -extern int param_set_uint(const char *val, const struct kernel_param *kp); -extern int param_get_uint(char *buffer, const struct kernel_param *kp); +int param_set_uint(const char *val, const struct kernel_param *kp); +int param_get_uint(char *buffer, const struct kernel_param *kp); int param_set_uint_minmax(const char *val, const struct kernel_param *kp, unsigned int min, unsigned int max); #define param_check_uint(name, p) __param_check(name, p, unsigned int) extern const struct kernel_param_ops param_ops_long; -extern int param_set_long(const char *val, const struct kernel_param *kp); -extern int param_get_long(char *buffer, const struct kernel_param *kp); +int param_set_long(const char *val, const struct kernel_param *kp); +int param_get_long(char *buffer, const struct kernel_param *kp); #define param_check_long(name, p) __param_check(name, p, long) extern const struct kernel_param_ops param_ops_ulong; -extern int param_set_ulong(const char *val, const struct kernel_param *kp); -extern int param_get_ulong(char *buffer, const struct kernel_param *kp); +int param_set_ulong(const char *val, const struct kernel_param *kp); +int param_get_ulong(char *buffer, const struct kernel_param *kp); #define param_check_ulong(name, p) __param_check(name, p, unsigned long) extern const struct kernel_param_ops param_ops_ullong; -extern int param_set_ullong(const char *val, const struct kernel_param *kp); -extern int param_get_ullong(char *buffer, const struct kernel_param *kp); +int param_set_ullong(const char *val, const struct kernel_param *kp); +int param_get_ullong(char *buffer, const struct kernel_param *kp); #define param_check_ullong(name, p) __param_check(name, p, unsigned long long) extern const struct kernel_param_ops param_ops_hexint; -extern int param_set_hexint(const char *val, const struct kernel_param *kp); -extern int param_get_hexint(char *buffer, const struct kernel_param *kp); +int param_set_hexint(const char *val, const struct kernel_param *kp); +int param_get_hexint(char *buffer, const struct kernel_param *kp); #define param_check_hexint(name, p) param_check_uint(name, p) extern const struct kernel_param_ops param_ops_charp; -extern int param_set_charp(const char *val, const struct kernel_param *kp); -extern int param_get_charp(char *buffer, const struct kernel_param *kp); -extern void param_free_charp(void *arg); +int param_set_charp(const char *val, const struct kernel_param *kp); +int param_get_charp(char *buffer, const struct kernel_param *kp); +void param_free_charp(void *arg); #define param_check_charp(name, p) __param_check(name, p, char *) /* We used to allow int as well as bool. We're taking that away! */ extern const struct kernel_param_ops param_ops_bool; -extern int param_set_bool(const char *val, const struct kernel_param *kp); -extern int param_get_bool(char *buffer, const struct kernel_param *kp); +int param_set_bool(const char *val, const struct kernel_param *kp); +int param_get_bool(char *buffer, const struct kernel_param *kp); #define param_check_bool(name, p) __param_check(name, p, bool) extern const struct kernel_param_ops param_ops_bool_enable_only; -extern int param_set_bool_enable_only(const char *val, - const struct kernel_param *kp); +int param_set_bool_enable_only(const char *val, const struct kernel_param *kp); /* getter is the same as for the regular bool */ #define param_check_bool_enable_only param_check_bool extern const struct kernel_param_ops param_ops_invbool; -extern int param_set_invbool(const char *val, const struct kernel_param *kp); -extern int param_get_invbool(char *buffer, const struct kernel_param *kp); +int param_set_invbool(const char *val, const struct kernel_param *kp); +int param_get_invbool(char *buffer, const struct kernel_param *kp); #define param_check_invbool(name, p) __param_check(name, p, bool) /* An int, which can only be set like a bool (though it shows as an int). */ extern const struct kernel_param_ops param_ops_bint; -extern int param_set_bint(const char *val, const struct kernel_param *kp); +int param_set_bint(const char *val, const struct kernel_param *kp); #define param_get_bint param_get_int #define param_check_bint param_check_int @@ -620,19 +614,19 @@ enum hwparam_type { extern const struct kernel_param_ops param_array_ops; extern const struct kernel_param_ops param_ops_string; -extern int param_set_copystring(const char *val, const struct kernel_param *); -extern int param_get_string(char *buffer, const struct kernel_param *kp); +int param_set_copystring(const char *val, const struct kernel_param *kp); +int param_get_string(char *buffer, const struct kernel_param *kp); /* for exporting parameters in /sys/module/.../parameters */ struct module; #if defined(CONFIG_SYSFS) && defined(CONFIG_MODULES) -extern int module_param_sysfs_setup(struct module *mod, - const struct kernel_param *kparam, - unsigned int num_params); +int module_param_sysfs_setup(struct module *mod, + const struct kernel_param *kparam, + unsigned int num_params); -extern void module_param_sysfs_remove(struct module *mod); +void module_param_sysfs_remove(struct module *mod); #else static inline int module_param_sysfs_setup(struct module *mod, const struct kernel_param *kparam, diff --git a/include/linux/mpage.h b/include/linux/mpage.h index 1bdc39daac0a..358946990bfa 100644 --- a/include/linux/mpage.h +++ b/include/linux/mpage.h @@ -17,7 +17,14 @@ struct readahead_control; void mpage_readahead(struct readahead_control *, get_block_t get_block); int mpage_read_folio(struct folio *folio, get_block_t get_block); -int mpage_writepages(struct address_space *mapping, - struct writeback_control *wbc, get_block_t get_block); +int __mpage_writepages(struct address_space *mapping, + struct writeback_control *wbc, get_block_t get_block, + int (*write_folio)(struct folio *folio, + struct writeback_control *wbc)); +static inline int mpage_writepages(struct address_space *mapping, + struct writeback_control *wbc, get_block_t get_block) +{ + return __mpage_writepages(mapping, wbc, get_block, NULL); +} #endif diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 0075f6e5c3da..cf3374580f74 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -76,7 +76,7 @@ static inline int mr_call_vif_notifiers(struct net *net, struct vif_device *vif, struct net_device *vif_dev, unsigned short vif_index, u32 tb_id, - unsigned int *ipmr_seq) + atomic_t *ipmr_seq) { struct vif_entry_notifier_info info = { .info = { @@ -89,7 +89,7 @@ static inline int mr_call_vif_notifiers(struct net *net, }; ASSERT_RTNL(); - (*ipmr_seq)++; + atomic_inc(ipmr_seq); return call_fib_notifiers(net, event_type, &info.info); } @@ -198,7 +198,7 @@ static inline int mr_call_mfc_notifiers(struct net *net, unsigned short family, enum fib_event_type event_type, struct mr_mfc *mfc, u32 tb_id, - unsigned int *ipmr_seq) + atomic_t *ipmr_seq) { struct mfc_entry_notifier_info info = { .info = { @@ -208,8 +208,7 @@ static inline int mr_call_mfc_notifiers(struct net *net, .tb_id = tb_id }; - ASSERT_RTNL(); - (*ipmr_seq)++; + atomic_inc(ipmr_seq); return call_fib_notifiers(net, event_type, &info.info); } diff --git a/include/linux/mutex.h b/include/linux/mutex.h index ecaa0440f6ec..734048c02f4f 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -79,7 +79,7 @@ do { \ #define __MUTEX_INITIALIZER(lockname) \ { .owner = ATOMIC_LONG_INIT(0) \ , .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ - , .wait_list = LIST_HEAD_INIT(lockname.wait_list) \ + , .first_waiter = NULL \ __DEBUG_MUTEX_INITIALIZER(lockname) \ __DEP_MAP_MUTEX_INITIALIZER(lockname) } @@ -87,12 +87,12 @@ do { \ struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) #ifdef CONFIG_DEBUG_LOCK_ALLOC -void mutex_init_lockep(struct mutex *lock, const char *name, struct lock_class_key *key); +void mutex_init_lockdep(struct mutex *lock, const char *name, struct lock_class_key *key); static inline void __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) { - mutex_init_lockep(lock, name, key); + mutex_init_lockdep(lock, name, key); } #else extern void mutex_init_generic(struct mutex *lock); @@ -146,7 +146,7 @@ static inline void __mutex_init(struct mutex *lock, const char *name, { mutex_rt_init_generic(lock); } -#endif /* !CONFIG_LOCKDEP */ +#endif /* !CONFIG_DEBUG_LOCK_ALLOC */ #endif /* CONFIG_PREEMPT_RT */ #ifdef CONFIG_DEBUG_MUTEXES @@ -183,7 +183,7 @@ static inline int __must_check __devm_mutex_init(struct device *dev, struct mute */ #ifdef CONFIG_DEBUG_LOCK_ALLOC extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass) __acquires(lock); -extern void _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); +extern void _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock) __acquires(lock); extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) __cond_acquires(0, lock); extern int __must_check _mutex_lock_killable(struct mutex *lock, @@ -253,6 +253,7 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) __cond_a DEFINE_LOCK_GUARD_1(mutex, struct mutex, mutex_lock(_T->lock), mutex_unlock(_T->lock)) DEFINE_LOCK_GUARD_1_COND(mutex, _try, mutex_trylock(_T->lock)) DEFINE_LOCK_GUARD_1_COND(mutex, _intr, mutex_lock_interruptible(_T->lock), _RET == 0) +DEFINE_LOCK_GUARD_1_COND(mutex, _kill, mutex_lock_killable(_T->lock), _RET == 0) DEFINE_LOCK_GUARD_1(mutex_init, struct mutex, mutex_init(_T->lock), /* */) DECLARE_LOCK_GUARD_1_ATTRS(mutex, __acquires(_T), __releases(*(struct mutex **)_T)) @@ -261,6 +262,8 @@ DECLARE_LOCK_GUARD_1_ATTRS(mutex_try, __acquires(_T), __releases(*(struct mutex #define class_mutex_try_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(mutex_try, _T) DECLARE_LOCK_GUARD_1_ATTRS(mutex_intr, __acquires(_T), __releases(*(struct mutex **)_T)) #define class_mutex_intr_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(mutex_intr, _T) +DECLARE_LOCK_GUARD_1_ATTRS(mutex_kill, __acquires(_T), __releases(*(struct mutex **)_T)) +#define class_mutex_kill_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(mutex_kill, _T) DECLARE_LOCK_GUARD_1_ATTRS(mutex_init, __acquires(_T), __releases(*(struct mutex **)_T)) #define class_mutex_init_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(mutex_init, _T) diff --git a/include/linux/mutex_types.h b/include/linux/mutex_types.h index 80975935ec48..24ed599fdda8 100644 --- a/include/linux/mutex_types.h +++ b/include/linux/mutex_types.h @@ -44,7 +44,7 @@ context_lock_struct(mutex) { #ifdef CONFIG_MUTEX_SPIN_ON_OWNER struct optimistic_spin_queue osq; /* Spinner MCS lock */ #endif - struct list_head wait_list; + struct mutex_waiter *first_waiter __guarded_by(&wait_lock); #ifdef CONFIG_DEBUG_MUTEXES void *magic; #endif diff --git a/include/linux/mux/consumer.h b/include/linux/mux/consumer.h index 2e25c838f831..a961861a503b 100644 --- a/include/linux/mux/consumer.h +++ b/include/linux/mux/consumer.h @@ -16,6 +16,8 @@ struct device; struct mux_control; struct mux_state; +#if IS_ENABLED(CONFIG_MULTIPLEXER) + unsigned int mux_control_states(struct mux_control *mux); int __must_check mux_control_select_delay(struct mux_control *mux, unsigned int state, @@ -54,11 +56,109 @@ int mux_control_deselect(struct mux_control *mux); int mux_state_deselect(struct mux_state *mstate); struct mux_control *mux_control_get(struct device *dev, const char *mux_name); +struct mux_control *mux_control_get_optional(struct device *dev, const char *mux_name); void mux_control_put(struct mux_control *mux); -struct mux_control *devm_mux_control_get(struct device *dev, - const char *mux_name); -struct mux_state *devm_mux_state_get(struct device *dev, - const char *mux_name); +struct mux_control *devm_mux_control_get(struct device *dev, const char *mux_name); +struct mux_state *devm_mux_state_get(struct device *dev, const char *mux_name); +struct mux_state *devm_mux_state_get_optional(struct device *dev, const char *mux_name); +struct mux_state *devm_mux_state_get_selected(struct device *dev, const char *mux_name); +struct mux_state *devm_mux_state_get_optional_selected(struct device *dev, const char *mux_name); + +#else + +static inline unsigned int mux_control_states(struct mux_control *mux) +{ + return 0; +} +static inline int __must_check mux_control_select_delay(struct mux_control *mux, + unsigned int state, unsigned int delay_us) +{ + return -EOPNOTSUPP; +} +static inline int __must_check mux_state_select_delay(struct mux_state *mstate, + unsigned int delay_us) +{ + return -EOPNOTSUPP; +} +static inline int __must_check mux_control_try_select_delay(struct mux_control *mux, + unsigned int state, + unsigned int delay_us) +{ + return -EOPNOTSUPP; +} +static inline int __must_check mux_state_try_select_delay(struct mux_state *mstate, + unsigned int delay_us) +{ + return -EOPNOTSUPP; +} + +static inline int __must_check mux_control_select(struct mux_control *mux, + unsigned int state) +{ + return -EOPNOTSUPP; +} + +static inline int __must_check mux_state_select(struct mux_state *mstate) +{ + return -EOPNOTSUPP; +} + +static inline int __must_check mux_control_try_select(struct mux_control *mux, + unsigned int state) +{ + return -EOPNOTSUPP; +} + +static inline int __must_check mux_state_try_select(struct mux_state *mstate) +{ + return -EOPNOTSUPP; +} + +static inline int mux_control_deselect(struct mux_control *mux) +{ + return -EOPNOTSUPP; +} +static inline int mux_state_deselect(struct mux_state *mstate) +{ + return -EOPNOTSUPP; +} + +static inline struct mux_control *mux_control_get(struct device *dev, const char *mux_name) +{ + return ERR_PTR(-EOPNOTSUPP); +} +static inline struct mux_control *mux_control_get_optional(struct device *dev, + const char *mux_name) +{ + return NULL; +} +static inline void mux_control_put(struct mux_control *mux) {} + +static inline struct mux_control *devm_mux_control_get(struct device *dev, const char *mux_name) +{ + return ERR_PTR(-EOPNOTSUPP); +} +static inline struct mux_state *devm_mux_state_get(struct device *dev, const char *mux_name) +{ + return ERR_PTR(-EOPNOTSUPP); +} +static inline struct mux_state *devm_mux_state_get_optional(struct device *dev, + const char *mux_name) +{ + return NULL; +} +static inline struct mux_state *devm_mux_state_get_selected(struct device *dev, + const char *mux_name) +{ + return ERR_PTR(-EOPNOTSUPP); +} +static inline struct mux_state *devm_mux_state_get_optional_selected(struct device *dev, + const char *mux_name) +{ + return NULL; +} + +#endif /* CONFIG_MULTIPLEXER */ #endif /* _LINUX_MUX_CONSUMER_H */ diff --git a/include/linux/namei.h b/include/linux/namei.h index 58600cf234bc..2ad6dd9987b9 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -54,9 +54,6 @@ extern int path_pts(struct path *path); extern int user_path_at(int, const char __user *, unsigned, struct path *); -struct dentry *lookup_one_qstr_excl(const struct qstr *name, - struct dentry *base, - unsigned int flags); extern int kern_path(const char *, unsigned, struct path *); struct dentry *kern_path_parent(const char *name, struct path *parent); @@ -168,9 +165,6 @@ extern int follow_down_one(struct path *); extern int follow_down(struct path *path, unsigned int flags); extern int follow_up(struct path *); -extern struct dentry *lock_rename(struct dentry *, struct dentry *); -extern struct dentry *lock_rename_child(struct dentry *, struct dentry *); -extern void unlock_rename(struct dentry *, struct dentry *); int start_renaming(struct renamedata *rd, int lookup_flags, struct qstr *old_last, struct qstr *new_last); int start_renaming_dentry(struct renamedata *rd, int lookup_flags, diff --git a/include/linux/net.h b/include/linux/net.h index f58b38ab37f8..f268f395ce47 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -23,9 +23,30 @@ #include <linux/fs.h> #include <linux/mm.h> #include <linux/sockptr.h> +#include <linux/uio.h> #include <uapi/linux/net.h> +/** + * struct sockopt - socket option value container + * @iter_in: iov_iter for reading optval with the content from the caller. + * Use copy_from_iter() given this iov direction is ITER_SOURCE + * @iter_out: iov_iter for protocols to update optval data to userspace + * Use _copy_to_iter() given iov direction is ITER_DEST + * @optlen: serves as both input (buffer size) and output (returned data size). + * + * Type-safe wrapper for socket option data that works with both + * user and kernel buffers. + * + * The optlen field allows callbacks to return a specific length value + * independent of the bytes written via copy_to_iter(). + */ +typedef struct sockopt { + struct iov_iter iter_in; + struct iov_iter iter_out; + int optlen; +} sockopt_t; + struct poll_table_struct; struct pipe_inode_info; struct inode; @@ -192,6 +213,8 @@ struct proto_ops { unsigned int optlen); int (*getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); + int (*getsockopt_iter)(struct socket *sock, int level, + int optname, sockopt_t *opt); void (*show_fdinfo)(struct seq_file *m, struct socket *sock); int (*sendmsg) (struct socket *sock, struct msghdr *m, size_t total_len); @@ -223,6 +246,7 @@ struct proto_ops { int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg, size_t size); int (*set_rcvlowat)(struct sock *sk, int val); + void (*set_rcvbuf)(struct sock *sk, int val); }; #define DECLARE_SOCKADDR(type, dst, src) \ @@ -304,6 +328,8 @@ do { \ #define net_get_random_once(buf, nbytes) \ get_random_once((buf), (nbytes)) +#define net_get_random_sleepable_once(buf, nbytes) \ + get_random_sleepable_once((buf), (nbytes)) /* * E.g. XFS meta- & log-data is in slab pages, or bcache meta diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d4e6e00bb90a..7969fcdd5ac4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -311,7 +311,9 @@ struct header_ops { int (*create) (struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len); - int (*parse)(const struct sk_buff *skb, unsigned char *haddr); + int (*parse)(const struct sk_buff *skb, + const struct net_device *dev, + unsigned char *haddr); int (*cache)(const struct neighbour *neigh, struct hh_cache *hh, __be16 type); void (*cache_update)(struct hh_cache *hh, const struct net_device *dev, @@ -1714,7 +1716,6 @@ struct net_device_ops { * @IFF_OPENVSWITCH: device is a Open vSwitch master * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device * @IFF_TEAM: device is a team device - * @IFF_RXFH_CONFIGURED: device has had Rx Flow indirection table configured * @IFF_PHONY_HEADROOM: the headroom value is controlled by an external * entity (i.e. the master device for bridged veth) * @IFF_MACSEC: device is a MACsec device @@ -1750,7 +1751,6 @@ enum netdev_priv_flags { IFF_OPENVSWITCH = 1<<20, IFF_L3MDEV_SLAVE = 1<<21, IFF_TEAM = 1<<22, - IFF_RXFH_CONFIGURED = 1<<23, IFF_PHONY_HEADROOM = 1<<24, IFF_MACSEC = 1<<25, IFF_NO_RX_HANDLER = 1<<26, @@ -2155,6 +2155,7 @@ struct net_device { unsigned long state; unsigned int flags; unsigned short hard_header_len; + enum netdev_stat_type pcpu_stat_type:8; netdev_features_t features; struct inet6_dev __rcu *ip6_ptr; __cacheline_group_end(net_device_read_txrx); @@ -2404,8 +2405,6 @@ struct net_device { void *ml_priv; enum netdev_ml_priv_type ml_priv_type; - enum netdev_stat_type pcpu_stat_type:8; - #if IS_ENABLED(CONFIG_GARP) struct garp_port __rcu *garp_port; #endif @@ -2562,7 +2561,14 @@ struct net_device { * Also protects some fields in: * struct napi_struct, struct netdev_queue, struct netdev_rx_queue * - * Ordering: take after rtnl_lock. + * Ordering: + * + * - take after rtnl_lock + * + * - for the case of netdev queue leasing, the netdev-scope lock is + * taken for both the virtual and the physical device; to prevent + * deadlocks, the virtual device's lock must always be acquired + * before the physical device's (see netdev_nl_queue_create_doit) */ struct mutex lock; @@ -2764,6 +2770,17 @@ static inline void *netdev_priv(const struct net_device *dev) return (void *)dev->priv; } +/** + * netdev_from_priv() - get network device from priv + * @priv: network device private data + * + * Returns: net_device to which @priv belongs + */ +static inline struct net_device *netdev_from_priv(const void *priv) +{ + return container_of(priv, struct net_device, priv); +} + /* Set the sysfs physical device reference for the network logical device * if set prior to registration will cause a symlink during initialization. */ @@ -3403,6 +3420,8 @@ static inline int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) int register_netdevice(struct net_device *dev); void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); void unregister_netdevice_many(struct list_head *head); +bool unregister_netdevice_queued(const struct net_device *dev); + static inline void unregister_netdevice(struct net_device *dev) { unregister_netdevice_queue(dev, NULL); @@ -3446,7 +3465,7 @@ static inline int dev_parse_header(const struct sk_buff *skb, if (!dev->header_ops || !dev->header_ops->parse) return 0; - return dev->header_ops->parse(skb, haddr); + return dev->header_ops->parse(skb, dev, haddr); } static inline __be16 dev_parse_header_protocol(const struct sk_buff *skb) @@ -3576,17 +3595,49 @@ struct page_pool_bh { }; DECLARE_PER_CPU(struct page_pool_bh, system_page_pool); +#define XMIT_RECURSION_LIMIT 8 + #ifndef CONFIG_PREEMPT_RT static inline int dev_recursion_level(void) { return this_cpu_read(softnet_data.xmit.recursion); } + +static inline bool dev_xmit_recursion(void) +{ + return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > + XMIT_RECURSION_LIMIT); +} + +static inline void dev_xmit_recursion_inc(void) +{ + __this_cpu_inc(softnet_data.xmit.recursion); +} + +static inline void dev_xmit_recursion_dec(void) +{ + __this_cpu_dec(softnet_data.xmit.recursion); +} #else static inline int dev_recursion_level(void) { return current->net_xmit.recursion; } +static inline bool dev_xmit_recursion(void) +{ + return unlikely(current->net_xmit.recursion > XMIT_RECURSION_LIMIT); +} + +static inline void dev_xmit_recursion_inc(void) +{ + current->net_xmit.recursion++; +} + +static inline void dev_xmit_recursion_dec(void) +{ + current->net_xmit.recursion--; +} #endif void __netif_schedule(struct Qdisc *q); @@ -4711,7 +4762,7 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) { spin_lock(&txq->_xmit_lock); - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + /* Pairs with READ_ONCE() in netif_tx_owned() */ WRITE_ONCE(txq->xmit_lock_owner, cpu); } @@ -4729,7 +4780,7 @@ static inline void __netif_tx_release(struct netdev_queue *txq) static inline void __netif_tx_lock_bh(struct netdev_queue *txq) { spin_lock_bh(&txq->_xmit_lock); - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + /* Pairs with READ_ONCE() in netif_tx_owned() */ WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); } @@ -4738,7 +4789,7 @@ static inline bool __netif_tx_trylock(struct netdev_queue *txq) bool ok = spin_trylock(&txq->_xmit_lock); if (likely(ok)) { - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + /* Pairs with READ_ONCE() in netif_tx_owned() */ WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); } return ok; @@ -4746,14 +4797,14 @@ static inline bool __netif_tx_trylock(struct netdev_queue *txq) static inline void __netif_tx_unlock(struct netdev_queue *txq) { - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + /* Pairs with READ_ONCE() in netif_tx_owned() */ WRITE_ONCE(txq->xmit_lock_owner, -1); spin_unlock(&txq->_xmit_lock); } static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) { - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + /* Pairs with READ_ONCE() in netif_tx_owned() */ WRITE_ONCE(txq->xmit_lock_owner, -1); spin_unlock_bh(&txq->_xmit_lock); } @@ -4846,6 +4897,23 @@ static inline void netif_tx_disable(struct net_device *dev) local_bh_enable(); } +#ifndef CONFIG_PREEMPT_RT +static inline bool netif_tx_owned(struct netdev_queue *txq, unsigned int cpu) +{ + /* Other cpus might concurrently change txq->xmit_lock_owner + * to -1 or to their cpu id, but not to our id. + */ + return READ_ONCE(txq->xmit_lock_owner) == cpu; +} + +#else +static inline bool netif_tx_owned(struct netdev_queue *txq, unsigned int cpu) +{ + return rt_mutex_owner(&txq->_xmit_lock.lock) == current; +} + +#endif + static inline void netif_addr_lock(struct net_device *dev) { unsigned char nest_level = 0; @@ -5289,9 +5357,9 @@ static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_devi } int netdev_class_create_file_ns(const struct class_attribute *class_attr, - const void *ns); + const struct ns_common *ns); void netdev_class_remove_file_ns(const struct class_attribute *class_attr, - const void *ns); + const struct ns_common *ns); extern const struct kobj_ns_type_operations net_ns_type_operations; @@ -5519,10 +5587,7 @@ static inline bool netif_is_lag_port(const struct net_device *dev) return netif_is_bond_slave(dev) || netif_is_team_port(dev); } -static inline bool netif_is_rxfh_configured(const struct net_device *dev) -{ - return dev->priv_flags & IFF_RXFH_CONFIGURED; -} +bool netif_is_rxfh_configured(const struct net_device *dev); static inline bool netif_is_failover(const struct net_device *dev) { diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index e9f4f845d760..b98331572ad2 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -309,7 +309,7 @@ enum { /* register and unregister set references */ extern ip_set_id_t ip_set_get_byname(struct net *net, - const char *name, struct ip_set **set); + const struct nlattr *name, struct ip_set **set); extern void ip_set_put_byindex(struct net *net, ip_set_id_t index); extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name); extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index); diff --git a/include/linux/netfilter/nf_conntrack_amanda.h b/include/linux/netfilter/nf_conntrack_amanda.h index dfe89f38d1f7..1719987e8fd8 100644 --- a/include/linux/netfilter/nf_conntrack_amanda.h +++ b/include/linux/netfilter/nf_conntrack_amanda.h @@ -7,10 +7,13 @@ #include <linux/skbuff.h> #include <net/netfilter/nf_conntrack_expect.h> -extern unsigned int (__rcu *nf_nat_amanda_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned int matchoff, - unsigned int matchlen, - struct nf_conntrack_expect *exp); +typedef unsigned int +nf_nat_amanda_hook_fn(struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conntrack_expect *exp); + +extern nf_nat_amanda_hook_fn __rcu *nf_nat_amanda_hook; #endif /* _NF_CONNTRACK_AMANDA_H */ diff --git a/include/linux/netfilter/nf_conntrack_ftp.h b/include/linux/netfilter/nf_conntrack_ftp.h index f31292642035..7b62446ccec4 100644 --- a/include/linux/netfilter/nf_conntrack_ftp.h +++ b/include/linux/netfilter/nf_conntrack_ftp.h @@ -26,11 +26,14 @@ struct nf_ct_ftp_master { /* For NAT to hook in when we find a packet which describes what other * connection we should expect. */ -extern unsigned int (__rcu *nf_nat_ftp_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - enum nf_ct_ftp_type type, - unsigned int protoff, - unsigned int matchoff, - unsigned int matchlen, - struct nf_conntrack_expect *exp); +typedef unsigned int +nf_nat_ftp_hook_fn(struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + enum nf_ct_ftp_type type, + unsigned int protoff, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conntrack_expect *exp); + +extern nf_nat_ftp_hook_fn __rcu *nf_nat_ftp_hook; #endif /* _NF_CONNTRACK_FTP_H */ diff --git a/include/linux/netfilter/nf_conntrack_irc.h b/include/linux/netfilter/nf_conntrack_irc.h index 4f3ca5621998..ce07250afb4e 100644 --- a/include/linux/netfilter/nf_conntrack_irc.h +++ b/include/linux/netfilter/nf_conntrack_irc.h @@ -8,11 +8,14 @@ #define IRC_PORT 6667 -extern unsigned int (__rcu *nf_nat_irc_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned int matchoff, - unsigned int matchlen, - struct nf_conntrack_expect *exp); +typedef unsigned int +nf_nat_irc_hook_fn(struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conntrack_expect *exp); + +extern nf_nat_irc_hook_fn __rcu *nf_nat_irc_hook; #endif /* _NF_CONNTRACK_IRC_H */ diff --git a/include/linux/netfilter/nf_conntrack_snmp.h b/include/linux/netfilter/nf_conntrack_snmp.h index 99107e4f5234..bb39f04a9977 100644 --- a/include/linux/netfilter/nf_conntrack_snmp.h +++ b/include/linux/netfilter/nf_conntrack_snmp.h @@ -5,9 +5,12 @@ #include <linux/netfilter.h> #include <linux/skbuff.h> -extern int (__rcu *nf_nat_snmp_hook)(struct sk_buff *skb, - unsigned int protoff, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo); +typedef int +nf_nat_snmp_hook_fn(struct sk_buff *skb, + unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo); + +extern nf_nat_snmp_hook_fn __rcu *nf_nat_snmp_hook; #endif /* _NF_CONNTRACK_SNMP_H */ diff --git a/include/linux/netfilter/nf_conntrack_tftp.h b/include/linux/netfilter/nf_conntrack_tftp.h index 1490b68dd7d1..90b334bbce3c 100644 --- a/include/linux/netfilter/nf_conntrack_tftp.h +++ b/include/linux/netfilter/nf_conntrack_tftp.h @@ -19,8 +19,11 @@ struct tftphdr { #define TFTP_OPCODE_ACK 4 #define TFTP_OPCODE_ERROR 5 -extern unsigned int (__rcu *nf_nat_tftp_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - struct nf_conntrack_expect *exp); +typedef unsigned int +nf_nat_tftp_hook_fn(struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + struct nf_conntrack_expect *exp); + +extern nf_nat_tftp_hook_fn __rcu *nf_nat_tftp_hook; #endif /* _NF_CONNTRACK_TFTP_H */ diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 61aa48f46dd7..5ce45b6d890f 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -34,59 +34,13 @@ struct ip6_rt_info { struct nf_queue_entry; struct nf_bridge_frag_data; -/* - * Hook functions for ipv6 to allow xt_* modules to be built-in even - * if IPv6 is a module. - */ -struct nf_ipv6_ops { -#if IS_MODULE(CONFIG_IPV6) - int (*chk_addr)(struct net *net, const struct in6_addr *addr, - const struct net_device *dev, int strict); - int (*route_me_harder)(struct net *net, struct sock *sk, struct sk_buff *skb); - int (*dev_get_saddr)(struct net *net, const struct net_device *dev, - const struct in6_addr *daddr, unsigned int srcprefs, - struct in6_addr *saddr); - int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl, - bool strict); - u32 (*cookie_init_sequence)(const struct ipv6hdr *iph, - const struct tcphdr *th, u16 *mssp); - int (*cookie_v6_check)(const struct ipv6hdr *iph, - const struct tcphdr *th); -#endif - void (*route_input)(struct sk_buff *skb); - int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, - int (*output)(struct net *, struct sock *, struct sk_buff *)); - int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry); -#if IS_MODULE(CONFIG_IPV6) - int (*br_fragment)(struct net *net, struct sock *sk, - struct sk_buff *skb, - struct nf_bridge_frag_data *data, - int (*output)(struct net *, struct sock *sk, - const struct nf_bridge_frag_data *data, - struct sk_buff *)); -#endif -}; - #ifdef CONFIG_NETFILTER #include <net/addrconf.h> -extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops; -static inline const struct nf_ipv6_ops *nf_get_ipv6_ops(void) -{ - return rcu_dereference(nf_ipv6_ops); -} - static inline int nf_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict) { -#if IS_MODULE(CONFIG_IPV6) - const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); - - if (!v6_ops) - return 1; - - return v6_ops->chk_addr(net, addr, dev, strict); -#elif IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) return ipv6_chk_addr(net, addr, dev, strict); #else return 1; @@ -99,15 +53,7 @@ int __nf_ip6_route(struct net *net, struct dst_entry **dst, static inline int nf_ip6_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict) { -#if IS_MODULE(CONFIG_IPV6) - const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); - - if (v6ops) - return v6ops->route(net, dst, fl, strict); - - return -EHOSTUNREACH; -#endif -#if IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) return __nf_ip6_route(net, dst, fl, strict); #else return -EHOSTUNREACH; @@ -129,14 +75,7 @@ static inline int nf_br_ip6_fragment(struct net *net, struct sock *sk, const struct nf_bridge_frag_data *data, struct sk_buff *)) { -#if IS_MODULE(CONFIG_IPV6) - const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); - - if (!v6_ops) - return 1; - - return v6_ops->br_fragment(net, sk, skb, data, output); -#elif IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) return br_ip6_fragment(net, sk, skb, data, output); #else return 1; @@ -147,14 +86,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb); static inline int nf_ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb) { -#if IS_MODULE(CONFIG_IPV6) - const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); - - if (!v6_ops) - return -EHOSTUNREACH; - - return v6_ops->route_me_harder(net, sk, skb); -#elif IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) return ip6_route_me_harder(net, sk, skb); #else return -EHOSTUNREACH; @@ -165,32 +97,18 @@ static inline u32 nf_ipv6_cookie_init_sequence(const struct ipv6hdr *iph, const struct tcphdr *th, u16 *mssp) { -#if IS_ENABLED(CONFIG_SYN_COOKIES) -#if IS_MODULE(CONFIG_IPV6) - const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); - - if (v6_ops) - return v6_ops->cookie_init_sequence(iph, th, mssp); -#elif IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) && IS_ENABLED(CONFIG_SYN_COOKIES) return __cookie_v6_init_sequence(iph, th, mssp); #endif -#endif return 0; } static inline int nf_cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th) { -#if IS_ENABLED(CONFIG_SYN_COOKIES) -#if IS_MODULE(CONFIG_IPV6) - const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); - - if (v6_ops) - return v6_ops->cookie_v6_check(iph, th); -#elif IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) && IS_ENABLED(CONFIG_SYN_COOKIES) return __cookie_v6_check(iph, th); #endif -#endif return 0; } @@ -198,14 +116,6 @@ __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); int nf_ip6_check_hbh_len(struct sk_buff *skb, u32 *plen); - -int ipv6_netfilter_init(void); -void ipv6_netfilter_fini(void); - -#else /* CONFIG_NETFILTER */ -static inline int ipv6_netfilter_init(void) { return 0; } -static inline void ipv6_netfilter_fini(void) { return; } -static inline const struct nf_ipv6_ops *nf_get_ipv6_ops(void) { return NULL; } #endif /* CONFIG_NETFILTER */ #endif /*__LINUX_IP6_NETFILTER_H*/ diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 72ee7d210a74..ba17ac5bf356 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -140,7 +140,6 @@ struct netfs_io_stream { void (*issue_write)(struct netfs_io_subrequest *subreq); /* Collection tracking */ struct list_head subrequests; /* Contributory I/O operations */ - struct netfs_io_subrequest *front; /* Op being collected */ unsigned long long collected_to; /* Position we've collected results to */ size_t transferred; /* The amount transferred from this stream */ unsigned short error; /* Aggregate error for the stream */ diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 207156f2143c..bc1162895f35 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -21,6 +21,7 @@ void lockup_detector_soft_poweroff(void); extern int watchdog_user_enabled; extern int watchdog_thresh; extern unsigned long watchdog_enabled; +extern int watchdog_hardlockup_miss_thresh; extern struct cpumask watchdog_cpumask; extern unsigned long *watchdog_cpumask_bits; diff --git a/include/linux/ns/ns_common_types.h b/include/linux/ns/ns_common_types.h index 0014fbc1c626..ea45c54e4435 100644 --- a/include/linux/ns/ns_common_types.h +++ b/include/linux/ns/ns_common_types.h @@ -7,6 +7,7 @@ #include <linux/rbtree.h> #include <linux/refcount.h> #include <linux/types.h> +#include <uapi/linux/sched.h> struct cgroup_namespace; struct dentry; @@ -184,15 +185,38 @@ struct ns_common { struct user_namespace *: (IS_ENABLED(CONFIG_USER_NS) ? &userns_operations : NULL), \ struct uts_namespace *: (IS_ENABLED(CONFIG_UTS_NS) ? &utsns_operations : NULL)) -#define ns_common_type(__ns) \ - _Generic((__ns), \ - struct cgroup_namespace *: CLONE_NEWCGROUP, \ - struct ipc_namespace *: CLONE_NEWIPC, \ - struct mnt_namespace *: CLONE_NEWNS, \ - struct net *: CLONE_NEWNET, \ - struct pid_namespace *: CLONE_NEWPID, \ - struct time_namespace *: CLONE_NEWTIME, \ - struct user_namespace *: CLONE_NEWUSER, \ - struct uts_namespace *: CLONE_NEWUTS) +/* + * FOR_EACH_NS_TYPE - Canonical list of namespace types + * + * Enumerates all (struct type, CLONE_NEW* flag) pairs. This is the + * single source of truth used to derive ns_common_type() and + * CLONE_NS_ALL. When adding a new namespace type, add a single entry + * here; all consumers update automatically. + * + * @X: Callback macro taking (struct_name, clone_flag) as arguments. + */ +#define FOR_EACH_NS_TYPE(X) \ + X(cgroup_namespace, CLONE_NEWCGROUP) \ + X(ipc_namespace, CLONE_NEWIPC) \ + X(mnt_namespace, CLONE_NEWNS) \ + X(net, CLONE_NEWNET) \ + X(pid_namespace, CLONE_NEWPID) \ + X(time_namespace, CLONE_NEWTIME) \ + X(user_namespace, CLONE_NEWUSER) \ + X(uts_namespace, CLONE_NEWUTS) + +/* Bitmask of all known CLONE_NEW* flags. */ +#define _NS_TYPE_FLAG_OR(struct_name, flag) | (flag) +#define CLONE_NS_ALL (0 FOR_EACH_NS_TYPE(_NS_TYPE_FLAG_OR)) + +/* + * ns_common_type - Map a namespace struct pointer to its CLONE_NEW* flag + * + * Uses a leading-comma pattern so the FOR_EACH_NS_TYPE expansion + * produces ", struct foo *: FLAG" entries without a trailing comma. + */ +#define _NS_TYPE_ASSOC(struct_name, flag) , struct struct_name *: (flag) + +#define ns_common_type(__ns) _Generic((__ns)FOR_EACH_NS_TYPE(_NS_TYPE_ASSOC)) #endif /* _LINUX_NS_COMMON_TYPES_H */ diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 825f5865bfc5..c8e227a3f9e2 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -55,6 +55,8 @@ static __always_inline bool is_ns_init_id(const struct ns_common *ns) #define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns))) +bool may_see_all_namespaces(void); + static __always_inline __must_check int __ns_ref_active_read(const struct ns_common *ns) { return atomic_read(&ns->__ns_ref_active); diff --git a/include/linux/ntb.h b/include/linux/ntb.h index 8ff9d663096b..879c3e89e026 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -256,6 +256,7 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) * @msg_clear_mask: See ntb_msg_clear_mask(). * @msg_read: See ntb_msg_read(). * @peer_msg_write: See ntb_peer_msg_write(). + * @get_dma_dev: See ntb_get_dma_dev(). */ struct ntb_dev_ops { int (*port_number)(struct ntb_dev *ntb); @@ -329,6 +330,7 @@ struct ntb_dev_ops { int (*msg_clear_mask)(struct ntb_dev *ntb, u64 mask_bits); u32 (*msg_read)(struct ntb_dev *ntb, int *pidx, int midx); int (*peer_msg_write)(struct ntb_dev *ntb, int pidx, int midx, u32 msg); + struct device *(*get_dma_dev)(struct ntb_dev *ntb); }; static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) @@ -391,6 +393,8 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) /* !ops->msg_clear_mask == !ops->msg_count && */ !ops->msg_read == !ops->msg_count && !ops->peer_msg_write == !ops->msg_count && + + /* ops->get_dma_dev is optional */ 1; } @@ -1564,6 +1568,26 @@ static inline int ntb_peer_msg_write(struct ntb_dev *ntb, int pidx, int midx, } /** + * ntb_get_dma_dev() - get the device to use for DMA allocations/mappings + * @ntb: NTB device context. + * + * Return a struct device suitable for DMA API allocations and mappings. + * This is typically the parent of the NTB device, but may be overridden by a + * driver by implementing .get_dma_dev(). + * + * Drivers that implement .get_dma_dev() must return a non-NULL pointer. + * + * Return: device pointer to use for DMA operations. + */ +static inline struct device *ntb_get_dma_dev(struct ntb_dev *ntb) +{ + if (!ntb->ops->get_dma_dev) + return ntb->dev.parent; + + return ntb->ops->get_dma_dev(ntb); +} + +/** * ntb_peer_resource_idx() - get a resource index for a given peer idx * @ntb: NTB device context. * @pidx: Peer port index. diff --git a/include/linux/nvme-auth.h b/include/linux/nvme-auth.h index 60e069a6757f..682f81046345 100644 --- a/include/linux/nvme-auth.h +++ b/include/linux/nvme-auth.h @@ -7,11 +7,12 @@ #define _NVME_AUTH_H #include <crypto/kpp.h> +#include <crypto/sha2.h> struct nvme_dhchap_key { size_t len; u8 hash; - u8 key[]; + u8 key[] __counted_by(len); }; u32 nvme_auth_get_seqnum(void); @@ -20,32 +21,44 @@ const char *nvme_auth_dhgroup_kpp(u8 dhgroup_id); u8 nvme_auth_dhgroup_id(const char *dhgroup_name); const char *nvme_auth_hmac_name(u8 hmac_id); -const char *nvme_auth_digest_name(u8 hmac_id); size_t nvme_auth_hmac_hash_len(u8 hmac_id); u8 nvme_auth_hmac_id(const char *hmac_name); +struct nvme_auth_hmac_ctx { + u8 hmac_id; + union { + struct hmac_sha256_ctx sha256; + struct hmac_sha384_ctx sha384; + struct hmac_sha512_ctx sha512; + }; +}; +int nvme_auth_hmac_init(struct nvme_auth_hmac_ctx *hmac, u8 hmac_id, + const u8 *key, size_t key_len); +void nvme_auth_hmac_update(struct nvme_auth_hmac_ctx *hmac, const u8 *data, + size_t data_len); +void nvme_auth_hmac_final(struct nvme_auth_hmac_ctx *hmac, u8 *out); u32 nvme_auth_key_struct_size(u32 key_len); -struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret, - u8 key_hash); +struct nvme_dhchap_key *nvme_auth_extract_key(const char *secret, u8 key_hash); void nvme_auth_free_key(struct nvme_dhchap_key *key); struct nvme_dhchap_key *nvme_auth_alloc_key(u32 len, u8 hash); struct nvme_dhchap_key *nvme_auth_transform_key( - struct nvme_dhchap_key *key, char *nqn); -int nvme_auth_generate_key(u8 *secret, struct nvme_dhchap_key **ret_key); -int nvme_auth_augmented_challenge(u8 hmac_id, u8 *skey, size_t skey_len, - u8 *challenge, u8 *aug, size_t hlen); + const struct nvme_dhchap_key *key, const char *nqn); +int nvme_auth_parse_key(const char *secret, struct nvme_dhchap_key **ret_key); +int nvme_auth_augmented_challenge(u8 hmac_id, const u8 *skey, size_t skey_len, + const u8 *challenge, u8 *aug, size_t hlen); int nvme_auth_gen_privkey(struct crypto_kpp *dh_tfm, u8 dh_gid); int nvme_auth_gen_pubkey(struct crypto_kpp *dh_tfm, u8 *host_key, size_t host_key_len); int nvme_auth_gen_shared_secret(struct crypto_kpp *dh_tfm, - u8 *ctrl_key, size_t ctrl_key_len, + const u8 *ctrl_key, size_t ctrl_key_len, u8 *sess_key, size_t sess_key_len); -int nvme_auth_generate_psk(u8 hmac_id, u8 *skey, size_t skey_len, - u8 *c1, u8 *c2, size_t hash_len, +int nvme_auth_generate_psk(u8 hmac_id, const u8 *skey, size_t skey_len, + const u8 *c1, const u8 *c2, size_t hash_len, u8 **ret_psk, size_t *ret_len); -int nvme_auth_generate_digest(u8 hmac_id, u8 *psk, size_t psk_len, - char *subsysnqn, char *hostnqn, u8 **ret_digest); -int nvme_auth_derive_tls_psk(int hmac_id, u8 *psk, size_t psk_len, - u8 *psk_digest, u8 **ret_psk); +int nvme_auth_generate_digest(u8 hmac_id, const u8 *psk, size_t psk_len, + const char *subsysnqn, const char *hostnqn, + char **ret_digest); +int nvme_auth_derive_tls_psk(int hmac_id, const u8 *psk, size_t psk_len, + const char *psk_digest, u8 **ret_psk); #endif /* _NVME_AUTH_H */ diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 655d194f8e72..041f30931a90 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -513,9 +513,16 @@ struct nvme_id_ns_nvm { __u8 pic; __u8 rsvd9[3]; __le32 elbaf[64]; - __u8 rsvd268[3828]; + __le32 npdgl; + __le32 nprg; + __le32 npra; + __le32 nors; + __le32 npdal; + __u8 rsvd288[3808]; }; +static_assert(sizeof(struct nvme_id_ns_nvm) == 4096); + enum { NVME_ID_NS_NVM_STS_MASK = 0x7f, NVME_ID_NS_NVM_GUARD_SHIFT = 7, @@ -590,7 +597,11 @@ enum { enum { NVME_NS_FEAT_THIN = 1 << 0, NVME_NS_FEAT_ATOMICS = 1 << 1, - NVME_NS_FEAT_IO_OPT = 1 << 4, + NVME_NS_FEAT_OPTPERF_SHIFT = 4, + /* In NVMe version 2.0 and below, OPTPERF is only bit 4 of NSFEAT */ + NVME_NS_FEAT_OPTPERF_MASK = 0x1, + /* Since version 2.1, OPTPERF is bits 4 and 5 of NSFEAT */ + NVME_NS_FEAT_OPTPERF_MASK_2_1 = 0x3, NVME_NS_ATTR_RO = 1 << 0, NVME_NS_FLBAS_LBA_MASK = 0xf, NVME_NS_FLBAS_LBA_UMASK = 0x60, @@ -1837,6 +1848,11 @@ enum { NVME_AUTH_HASH_INVALID = 0xff, }; +/* Maximum digest size for any NVME_AUTH_HASH_* value */ +enum { + NVME_AUTH_MAX_DIGEST_SIZE = 64, +}; + /* Defined Diffie-Hellman group identifiers for DH-HMAC-CHAP authentication */ enum { NVME_AUTH_DHGROUP_NULL = 0x00, @@ -2332,4 +2348,8 @@ enum nvme_pr_change_ptpl { #define NVME_PR_IGNORE_KEY (1 << 3) +/* Section 8.3.4.5.2 of the NVMe 2.1 */ +#define NVME_AUTH_DHCHAP_MAX_HASH_IDS 30 +#define NVME_AUTH_DHCHAP_MAX_DH_IDS 30 + #endif /* _LINUX_NVME_H */ diff --git a/include/linux/of.h b/include/linux/of.h index be6ec4916adf..959786f8f196 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -410,7 +410,7 @@ extern int of_alias_get_id(const struct device_node *np, const char *stem); extern int of_alias_get_highest_id(const char *stem); bool of_machine_compatible_match(const char *const *compats); -bool of_machine_device_match(const struct of_device_id *matches); +const struct of_device_id *of_machine_get_match(const struct of_device_id *matches); const void *of_machine_get_match_data(const struct of_device_id *matches); /** @@ -426,6 +426,9 @@ static inline bool of_machine_is_compatible(const char *compat) return of_machine_compatible_match(compats); } +int of_machine_read_compatible(const char **compatible, unsigned int index); +int of_machine_read_model(const char **model); + extern int of_add_property(struct device_node *np, struct property *prop); extern int of_remove_property(struct device_node *np, struct property *prop); extern int of_update_property(struct device_node *np, struct property *newprop); @@ -851,6 +854,17 @@ static inline int of_machine_is_compatible(const char *compat) return 0; } +static inline int of_machine_read_compatible(const char **compatible, + unsigned int index) +{ + return -ENOSYS; +} + +static inline int of_machine_read_model(const char **model) +{ + return -ENOSYS; +} + static inline int of_add_property(struct device_node *np, struct property *prop) { return 0; @@ -866,9 +880,9 @@ static inline bool of_machine_compatible_match(const char *const *compats) return false; } -static inline bool of_machine_device_match(const struct of_device_id *matches) +static inline const struct of_device_id *of_machine_get_match(const struct of_device_id *matches) { - return false; + return NULL; } static inline const void * @@ -976,6 +990,11 @@ static inline int of_numa_init(void) } #endif +static inline bool of_machine_device_match(const struct of_device_id *matches) +{ + return of_machine_get_match(matches) != NULL; +} + static inline struct device_node *of_find_matching_node( struct device_node *from, const struct of_device_id *matches) diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h index fd706cdf255c..16b08234d03b 100644 --- a/include/linux/of_dma.h +++ b/include/linux/of_dma.h @@ -38,6 +38,26 @@ extern int of_dma_controller_register(struct device_node *np, void *data); extern void of_dma_controller_free(struct device_node *np); +static void __of_dma_controller_free(void *np) +{ + of_dma_controller_free(np); +} + +static inline int +devm_of_dma_controller_register(struct device *dev, struct device_node *np, + struct dma_chan *(*of_dma_xlate) + (struct of_phandle_args *, struct of_dma *), + void *data) +{ + int ret; + + ret = of_dma_controller_register(np, of_dma_xlate, data); + if (ret) + return ret; + + return devm_add_action_or_reset(dev, __of_dma_controller_free, np); +} + extern int of_dma_router_register(struct device_node *np, void *(*of_dma_route_allocate) (struct of_phandle_args *, struct of_dma *), @@ -64,6 +84,15 @@ static inline void of_dma_controller_free(struct device_node *np) { } +static inline int +devm_of_dma_controller_register(struct device *dev, struct device_node *np, + struct dma_chan *(*of_dma_xlate) + (struct of_phandle_args *, struct of_dma *), + void *data) +{ + return -ENODEV; +} + static inline int of_dma_router_register(struct device_node *np, void *(*of_dma_route_allocate) (struct of_phandle_args *, struct of_dma *), diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h deleted file mode 100644 index d0f66a5e1b2a..000000000000 --- a/include/linux/of_gpio.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * OF helpers for the GPIO API - * - * Copyright (c) 2007-2008 MontaVista Software, Inc. - * - * Author: Anton Vorontsov <avorontsov@ru.mvista.com> - */ - -#ifndef __LINUX_OF_GPIO_H -#define __LINUX_OF_GPIO_H - -#include <linux/compiler.h> -#include <linux/gpio/driver.h> -#include <linux/gpio.h> /* FIXME: Shouldn't be here */ -#include <linux/of.h> - -struct device_node; - -#ifdef CONFIG_OF_GPIO - -extern int of_get_named_gpio(const struct device_node *np, - const char *list_name, int index); - -#else /* CONFIG_OF_GPIO */ - -#include <linux/errno.h> - -/* Drivers may not strictly depend on the GPIO support, so let them link. */ -static inline int of_get_named_gpio(const struct device_node *np, - const char *propname, int index) -{ - return -ENOSYS; -} - -#endif /* CONFIG_OF_GPIO */ - -#endif /* __LINUX_OF_GPIO_H */ diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h index f573423359f4..e8b20b29fa68 100644 --- a/include/linux/of_reserved_mem.h +++ b/include/linux/of_reserved_mem.h @@ -11,7 +11,6 @@ struct resource; struct reserved_mem { const char *name; - unsigned long fdt_node; const struct reserved_mem_ops *ops; phys_addr_t base; phys_addr_t size; @@ -19,18 +18,20 @@ struct reserved_mem { }; struct reserved_mem_ops { + int (*node_validate)(unsigned long fdt_node, phys_addr_t *align); + int (*node_fixup)(unsigned long fdt_node, phys_addr_t base, + phys_addr_t size); + int (*node_init)(unsigned long fdt_node, struct reserved_mem *rmem); int (*device_init)(struct reserved_mem *rmem, struct device *dev); void (*device_release)(struct reserved_mem *rmem, struct device *dev); }; -typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem); - #ifdef CONFIG_OF_RESERVED_MEM -#define RESERVEDMEM_OF_DECLARE(name, compat, init) \ - _OF_DECLARE(reservedmem, name, compat, init, reservedmem_of_init_fn) +#define RESERVEDMEM_OF_DECLARE(name, compat, ops) \ + _OF_DECLARE(reservedmem, name, compat, ops, struct reserved_mem_ops *) int of_reserved_mem_device_init_by_idx(struct device *dev, struct device_node *np, int idx); @@ -48,8 +49,9 @@ int of_reserved_mem_region_count(const struct device_node *np); #else -#define RESERVEDMEM_OF_DECLARE(name, compat, init) \ - _OF_DECLARE_STUB(reservedmem, name, compat, init, reservedmem_of_init_fn) +#define RESERVEDMEM_OF_DECLARE(name, compat, ops) \ + _OF_DECLARE_STUB(reservedmem, name, compat, ops, \ + struct reserved_mem_ops *) static inline int of_reserved_mem_device_init_by_idx(struct device *dev, struct device_node *np, int idx) diff --git a/include/linux/overflow.h b/include/linux/overflow.h index eddd987a8513..a8cb6319b4fb 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -42,7 +42,7 @@ * both the type-agnostic benefits of the macros while also being able to * enforce that the return value is, in fact, checked. */ -static inline bool __must_check __must_check_overflow(bool overflow) +static __always_inline bool __must_check __must_check_overflow(bool overflow) { return unlikely(overflow); } @@ -327,7 +327,7 @@ static inline bool __must_check __must_check_overflow(bool overflow) * with any overflow causing the return value to be SIZE_MAX. The * lvalue must be size_t to avoid implicit type conversion. */ -static inline size_t __must_check size_mul(size_t factor1, size_t factor2) +static __always_inline size_t __must_check size_mul(size_t factor1, size_t factor2) { size_t bytes; @@ -346,7 +346,7 @@ static inline size_t __must_check size_mul(size_t factor1, size_t factor2) * with any overflow causing the return value to be SIZE_MAX. The * lvalue must be size_t to avoid implicit type conversion. */ -static inline size_t __must_check size_add(size_t addend1, size_t addend2) +static __always_inline size_t __must_check size_add(size_t addend1, size_t addend2) { size_t bytes; @@ -367,7 +367,7 @@ static inline size_t __must_check size_add(size_t addend1, size_t addend2) * argument may be SIZE_MAX (or the result with be forced to SIZE_MAX). * The lvalue must be size_t to avoid implicit type conversion. */ -static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) +static __always_inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) { size_t bytes; diff --git a/include/linux/padata.h b/include/linux/padata.h index 765f2778e264..b6232bea6edf 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -149,23 +149,23 @@ struct padata_mt_job { /** * struct padata_instance - The overall control structure. * - * @cpu_online_node: Linkage for CPU online callback. - * @cpu_dead_node: Linkage for CPU offline callback. + * @cpuhp_node: Linkage for CPU hotplug callbacks. * @parallel_wq: The workqueue used for parallel work. * @serial_wq: The workqueue used for serial work. * @pslist: List of padata_shell objects attached to this instance. * @cpumask: User supplied cpumasks for parallel and serial works. + * @validate_cpumask: Internal cpumask used to validate @cpumask during hotplug. * @kobj: padata instance kernel object. * @lock: padata instance lock. * @flags: padata flags. */ struct padata_instance { - struct hlist_node cpu_online_node; - struct hlist_node cpu_dead_node; + struct hlist_node cpuhp_node; struct workqueue_struct *parallel_wq; struct workqueue_struct *serial_wq; struct list_head pslist; struct padata_cpumask cpumask; + cpumask_var_t validate_cpumask; struct kobject kobj; struct mutex lock; u8 flags; diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index f7a0e4af0c73..0e03d816e8b9 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -198,97 +198,91 @@ enum pageflags { #ifndef __GENERATING_BOUNDS_H -#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP -DECLARE_STATIC_KEY_FALSE(hugetlb_optimize_vmemmap_key); - /* - * Return the real head page struct iff the @page is a fake head page, otherwise - * return the @page itself. See Documentation/mm/vmemmap_dedup.rst. + * For tail pages, if the size of struct page is power-of-2 ->compound_info + * encodes the mask that converts the address of the tail page address to + * the head page address. + * + * Otherwise, ->compound_info has direct pointer to head pages. */ -static __always_inline const struct page *page_fixed_fake_head(const struct page *page) +static __always_inline bool compound_info_has_mask(void) { - if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key)) - return page; - /* - * Only addresses aligned with PAGE_SIZE of struct page may be fake head - * struct page. The alignment check aims to avoid access the fields ( - * e.g. compound_head) of the @page[1]. It can avoid touch a (possibly) - * cold cacheline in some cases. + * Limit mask usage to HugeTLB vmemmap optimization (HVO) where it + * makes a difference. + * + * The approach with mask would work in the wider set of conditions, + * but it requires validating that struct pages are naturally aligned + * for all orders up to the MAX_FOLIO_ORDER, which can be tricky. */ - if (IS_ALIGNED((unsigned long)page, PAGE_SIZE) && - test_bit(PG_head, &page->flags.f)) { - /* - * We can safely access the field of the @page[1] with PG_head - * because the @page is a compound page composed with at least - * two contiguous pages. - */ - unsigned long head = READ_ONCE(page[1].compound_head); - - if (likely(head & 1)) - return (const struct page *)(head - 1); - } - return page; + if (!IS_ENABLED(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP)) + return false; + + return is_power_of_2(sizeof(struct page)); } -static __always_inline bool page_count_writable(const struct page *page, int u) +static __always_inline unsigned long _compound_head(const struct page *page) { - if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key)) - return true; + unsigned long info = READ_ONCE(page->compound_info); + unsigned long mask; + + if (!compound_info_has_mask()) { + /* Bit 0 encodes PageTail() */ + if (info & 1) + return info - 1; + + return (unsigned long)page; + } /* - * The refcount check is ordered before the fake-head check to prevent - * the following race: - * CPU 1 (HVO) CPU 2 (speculative PFN walker) - * - * page_ref_freeze() - * synchronize_rcu() - * rcu_read_lock() - * page_is_fake_head() is false - * vmemmap_remap_pte() - * XXX: struct page[] becomes r/o + * If compound_info_has_mask() is true the rest of the info encodes + * the mask that converts the address of the tail page to the head page. * - * page_ref_unfreeze() - * page_ref_count() is not zero + * No need to clear bit 0 in the mask as 'page' always has it clear. * - * atomic_add_unless(&page->_refcount) - * XXX: try to modify r/o struct page[] - * - * The refcount check also prevents modification attempts to other (r/o) - * tail pages that are not fake heads. + * Let's do it in a branchless manner. */ - if (atomic_read_acquire(&page->_refcount) == u) - return false; - return page_fixed_fake_head(page) == page; -} -#else -static inline const struct page *page_fixed_fake_head(const struct page *page) -{ - return page; -} + /* Non-tail: -1UL, Tail: 0 */ + mask = (info & 1) - 1; -static inline bool page_count_writable(const struct page *page, int u) -{ - return true; -} -#endif + /* Non-tail: -1UL, Tail: info */ + mask |= info; -static __always_inline int page_is_fake_head(const struct page *page) -{ - return page_fixed_fake_head(page) != page; + return (unsigned long)page & mask; } -static __always_inline unsigned long _compound_head(const struct page *page) +#define compound_head(page) ((typeof(page))_compound_head(page)) + +static __always_inline void set_compound_head(struct page *tail, + const struct page *head, unsigned int order) { - unsigned long head = READ_ONCE(page->compound_head); + unsigned int shift; + unsigned long mask; + + if (!compound_info_has_mask()) { + WRITE_ONCE(tail->compound_info, (unsigned long)head | 1); + return; + } + + /* + * If the size of struct page is power-of-2, bits [shift:0] of the + * virtual address of compound head are zero. + * + * Calculate mask that can be applied to the virtual address of + * the tail page to get address of the head page. + */ + shift = order + order_base_2(sizeof(struct page)); + mask = GENMASK(BITS_PER_LONG - 1, shift); - if (unlikely(head & 1)) - return head - 1; - return (unsigned long)page_fixed_fake_head(page); + /* Bit 0 encodes PageTail() */ + WRITE_ONCE(tail->compound_info, mask | 1); } -#define compound_head(page) ((typeof(page))_compound_head(page)) +static __always_inline void clear_compound_head(struct page *page) +{ + WRITE_ONCE(page->compound_info, 0); +} /** * page_folio - Converts from page to folio. @@ -320,13 +314,13 @@ static __always_inline unsigned long _compound_head(const struct page *page) static __always_inline int PageTail(const struct page *page) { - return READ_ONCE(page->compound_head) & 1 || page_is_fake_head(page); + return READ_ONCE(page->compound_info) & 1; } static __always_inline int PageCompound(const struct page *page) { return test_bit(PG_head, &page->flags.f) || - READ_ONCE(page->compound_head) & 1; + READ_ONCE(page->compound_info) & 1; } #define PAGE_POISON_PATTERN -1l @@ -348,7 +342,7 @@ static const unsigned long *const_folio_flags(const struct folio *folio, { const struct page *page = &folio->page; - VM_BUG_ON_PGFLAGS(page->compound_head & 1, page); + VM_BUG_ON_PGFLAGS(page->compound_info & 1, page); VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags.f), page); return &page[n].flags.f; } @@ -357,7 +351,7 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n) { struct page *page = &folio->page; - VM_BUG_ON_PGFLAGS(page->compound_head & 1, page); + VM_BUG_ON_PGFLAGS(page->compound_info & 1, page); VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags.f), page); return &page[n].flags.f; } @@ -724,6 +718,11 @@ static __always_inline bool folio_test_anon(const struct folio *folio) return ((unsigned long)folio->mapping & FOLIO_MAPPING_ANON) != 0; } +static __always_inline bool folio_test_lazyfree(const struct folio *folio) +{ + return folio_test_anon(folio) && !folio_test_swapbacked(folio); +} + static __always_inline bool PageAnonNotKsm(const struct page *page) { unsigned long flags = (unsigned long)page_folio(page)->mapping; @@ -847,7 +846,7 @@ static __always_inline bool folio_test_head(const struct folio *folio) static __always_inline int PageHead(const struct page *page) { PF_POISONED_CHECK(page); - return test_bit(PG_head, &page->flags.f) && !page_is_fake_head(page); + return test_bit(PG_head, &page->flags.f); } __SETPAGEFLAG(Head, head, PF_ANY) @@ -865,16 +864,6 @@ static inline bool folio_test_large(const struct folio *folio) return folio_test_head(folio); } -static __always_inline void set_compound_head(struct page *page, struct page *head) -{ - WRITE_ONCE(page->compound_head, (unsigned long)head + 1); -} - -static __always_inline void clear_compound_head(struct page *page) -{ - WRITE_ONCE(page->compound_head, 0); -} - #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline void ClearPageCompound(struct page *page) { @@ -934,6 +923,7 @@ enum pagetype { PGTY_zsmalloc = 0xf6, PGTY_unaccepted = 0xf7, PGTY_large_kmalloc = 0xf8, + PGTY_netpp = 0xf9, PGTY_mapcount_underflow = 0xff }; @@ -1066,6 +1056,11 @@ PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc) PAGE_TYPE_OPS(Unaccepted, unaccepted, unaccepted) PAGE_TYPE_OPS(LargeKmalloc, large_kmalloc, large_kmalloc) +/* + * Marks page_pool allocated pages. + */ +PAGE_TYPE_OPS(Netpp, netpp, netpp) + /** * PageHuge - Determine if the page belongs to hugetlbfs * @page: The page to test. diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 544150d1d5fd..94d3f0e71c06 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -228,24 +228,18 @@ static inline int folio_ref_dec_return(struct folio *folio) return page_ref_dec_return(&folio->page); } -static inline bool page_ref_add_unless(struct page *page, int nr, int u) +static inline bool page_ref_add_unless_zero(struct page *page, int nr) { - bool ret = false; - - rcu_read_lock(); - /* avoid writing to the vmemmap area being remapped */ - if (page_count_writable(page, u)) - ret = atomic_add_unless(&page->_refcount, nr, u); - rcu_read_unlock(); + bool ret = atomic_add_unless(&page->_refcount, nr, 0); if (page_ref_tracepoint_active(page_ref_mod_unless)) __page_ref_mod_unless(page, nr, ret); return ret; } -static inline bool folio_ref_add_unless(struct folio *folio, int nr, int u) +static inline bool folio_ref_add_unless_zero(struct folio *folio, int nr) { - return page_ref_add_unless(&folio->page, nr, u); + return page_ref_add_unless_zero(&folio->page, nr); } /** @@ -261,12 +255,12 @@ static inline bool folio_ref_add_unless(struct folio *folio, int nr, int u) */ static inline bool folio_try_get(struct folio *folio) { - return folio_ref_add_unless(folio, 1, 0); + return folio_ref_add_unless_zero(folio, 1); } static inline bool folio_ref_try_add(struct folio *folio, int count) { - return folio_ref_add_unless(folio, count, 0); + return folio_ref_add_unless_zero(folio, count); } static inline int page_ref_freeze(struct page *page, int count) diff --git a/include/linux/page_reporting.h b/include/linux/page_reporting.h index fe648dfa3a7c..9d4ca5c218a0 100644 --- a/include/linux/page_reporting.h +++ b/include/linux/page_reporting.h @@ -7,6 +7,7 @@ /* This value should always be a power of 2, see page_reporting_cycle() */ #define PAGE_REPORTING_CAPACITY 32 +#define PAGE_REPORTING_ORDER_UNSPECIFIED -1 struct page_reporting_dev_info { /* function that alters pages to make them "reported" */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ec442af3f886..31a848485ad9 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -210,7 +210,6 @@ enum mapping_flags { AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM = 9, AS_KERNEL_FILE = 10, /* mapping for a fake kernel file that shouldn't account usage to user cgroups */ - AS_NO_DATA_INTEGRITY = 11, /* no data integrity guarantees */ /* Bits 16-25 are used for FOLIO_ORDER */ AS_FOLIO_ORDER_BITS = 5, AS_FOLIO_ORDER_MIN = 16, @@ -346,16 +345,6 @@ static inline bool mapping_writeback_may_deadlock_on_reclaim(const struct addres return test_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags); } -static inline void mapping_set_no_data_integrity(struct address_space *mapping) -{ - set_bit(AS_NO_DATA_INTEGRITY, &mapping->flags); -} - -static inline bool mapping_no_data_integrity(const struct address_space *mapping) -{ - return test_bit(AS_NO_DATA_INTEGRITY, &mapping->flags); -} - static inline gfp_t mapping_gfp_mask(const struct address_space *mapping) { return mapping->gfp_mask; diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h index 88e18615dd72..b41d7265c01b 100644 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h @@ -148,14 +148,8 @@ int walk_page_mapping(struct address_space *mapping, pgoff_t first_index, typedef int __bitwise folio_walk_flags_t; -/* - * Walk migration entries as well. Careful: a large folio might get split - * concurrently. - */ -#define FW_MIGRATION ((__force folio_walk_flags_t)BIT(0)) - /* Walk shared zeropages (small + huge) as well. */ -#define FW_ZEROPAGE ((__force folio_walk_flags_t)BIT(1)) +#define FW_ZEROPAGE ((__force folio_walk_flags_t)BIT(0)) enum folio_walk_level { FW_LEVEL_PTE, diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index c021c7af175f..1eca1264815b 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -191,13 +191,49 @@ struct pci_epc { * @BAR_RESIZABLE: The BAR implements the PCI-SIG Resizable BAR Capability. * NOTE: An EPC driver can currently only set a single supported * size. - * @BAR_RESERVED: The BAR should not be touched by an EPF driver. + * @BAR_RESERVED: Used for HW-backed BARs (e.g. MSI-X table, DMA regs). The BAR + * should not be disabled by an EPC driver. The BAR should not be + * reprogrammed by an EPF driver. An EPF driver is allowed to + * disable the BAR if absolutely necessary. (However, right now + * there is no EPC operation to disable a BAR that has not been + * programmed using pci_epc_set_bar().) + * @BAR_DISABLED: The BAR should be disabled by an EPC driver. The BAR will be + * unavailable to an EPF driver. */ enum pci_epc_bar_type { BAR_PROGRAMMABLE = 0, BAR_FIXED, BAR_RESIZABLE, BAR_RESERVED, + BAR_DISABLED, +}; + +/** + * enum pci_epc_bar_rsvd_region_type - type of a fixed subregion behind a BAR + * @PCI_EPC_BAR_RSVD_DMA_CTRL_MMIO: Integrated DMA controller MMIO window + * @PCI_EPC_BAR_RSVD_MSIX_TBL_RAM: MSI-X table structure + * @PCI_EPC_BAR_RSVD_MSIX_PBA_RAM: MSI-X PBA structure + * + * BARs marked BAR_RESERVED are owned by the SoC/EPC hardware and must not be + * reprogrammed by EPF drivers. Some of them still expose fixed subregions that + * EPFs may want to reference (e.g. embedded doorbell fallback). + */ +enum pci_epc_bar_rsvd_region_type { + PCI_EPC_BAR_RSVD_DMA_CTRL_MMIO = 0, + PCI_EPC_BAR_RSVD_MSIX_TBL_RAM, + PCI_EPC_BAR_RSVD_MSIX_PBA_RAM, +}; + +/** + * struct pci_epc_bar_rsvd_region - fixed subregion behind a BAR + * @type: reserved region type + * @offset: offset within the BAR aperture + * @size: size of the reserved region + */ +struct pci_epc_bar_rsvd_region { + enum pci_epc_bar_rsvd_region_type type; + resource_size_t offset; + resource_size_t size; }; /** @@ -206,18 +242,16 @@ enum pci_epc_bar_type { * @fixed_size: the fixed size, only applicable if type is BAR_FIXED_MASK. * @only_64bit: if true, an EPF driver is not allowed to choose if this BAR * should be configured as 32-bit or 64-bit, the EPF driver must - * configure this BAR as 64-bit. Additionally, the BAR succeeding - * this BAR must be set to type BAR_RESERVED. - * - * only_64bit should not be set on a BAR of type BAR_RESERVED. - * (If BARx is a 64-bit BAR that an EPF driver is not allowed to - * touch, then both BARx and BARx+1 must be set to type - * BAR_RESERVED.) + * configure this BAR as 64-bit. + * @nr_rsvd_regions: number of fixed subregions described for BAR_RESERVED + * @rsvd_regions: fixed subregions behind BAR_RESERVED */ struct pci_epc_bar_desc { enum pci_epc_bar_type type; u64 fixed_size; bool only_64bit; + u8 nr_rsvd_regions; + const struct pci_epc_bar_rsvd_region *rsvd_regions; }; /** diff --git a/include/linux/pci-tph.h b/include/linux/pci-tph.h index ba28140ce670..be68cd17f2f8 100644 --- a/include/linux/pci-tph.h +++ b/include/linux/pci-tph.h @@ -25,7 +25,7 @@ int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index, u16 tag); int pcie_tph_get_cpu_st(struct pci_dev *dev, enum tph_mem_type mem_type, - unsigned int cpu_uid, u16 *tag); + unsigned int cpu, u16 *tag); void pcie_disable_tph(struct pci_dev *pdev); int pcie_enable_tph(struct pci_dev *pdev, int mode); u16 pcie_tph_get_st_table_size(struct pci_dev *pdev); @@ -36,7 +36,7 @@ static inline int pcie_tph_set_st_entry(struct pci_dev *pdev, { return -EINVAL; } static inline int pcie_tph_get_cpu_st(struct pci_dev *dev, enum tph_mem_type mem_type, - unsigned int cpu_uid, u16 *tag) + unsigned int cpu, u16 *tag) { return -EINVAL; } static inline void pcie_disable_tph(struct pci_dev *pdev) { } static inline int pcie_enable_tph(struct pci_dev *pdev, int mode) diff --git a/include/linux/pci.h b/include/linux/pci.h index 1c270f1d5123..2c4454583c11 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -72,12 +72,20 @@ /* return bus from PCI devid = ((u16)bus_number) << 8) | devfn */ #define PCI_BUS_NUM(x) (((x) >> 8) & 0xff) +/* + * PCI_SLOT_ALL_DEVICES indicates a slot that covers all devices on the bus. + * Used for PCIe hotplug where the physical slot is the entire secondary bus, + * and, if ARI Forwarding is enabled, functions may appear to be on multiple + * devices. + */ +#define PCI_SLOT_ALL_DEVICES 0xfe + /* pci_slot represents a physical slot */ struct pci_slot { struct pci_bus *bus; /* Bus this slot is on */ struct list_head list; /* Node in list of slots */ struct hotplug_slot *hotplug; /* Hotplug info (move here) */ - unsigned char number; /* PCI_SLOT(pci_dev->devfn) */ + unsigned char number; /* Device nr, or PCI_SLOT_ALL_DEVICES */ struct kobject kobj; }; @@ -518,7 +526,7 @@ struct pci_dev { unsigned int ptm_root:1; unsigned int ptm_responder:1; unsigned int ptm_requester:1; - unsigned int ptm_enabled:1; + atomic_t ptm_enable_cnt; u8 ptm_granularity; #endif #ifdef CONFIG_PCI_MSI @@ -575,12 +583,6 @@ struct pci_dev { u8 supported_speeds; /* Supported Link Speeds Vector */ phys_addr_t rom; /* Physical address if not from BAR */ size_t romlen; /* Length if not from BAR */ - /* - * Driver name to force a match. Do not set directly, because core - * frees it. Use driver_set_override() to set or clear it. - */ - const char *driver_override; - unsigned long priv_flags; /* Private flags for the PCI driver */ /* These methods index pci_reset_fn_methods[] */ @@ -1193,8 +1195,6 @@ extern const struct bus_type pci_bus_type; /* Do NOT directly access these two variables, unless you are arch-specific PCI * code, or PCI core code. */ extern struct list_head pci_root_buses; /* List of all known PCI buses */ -/* Some device drivers need know if PCI is initiated */ -int no_pci_devices(void); void pcibios_resource_survey_bus(struct pci_bus *bus); void pcibios_bus_add_device(struct pci_dev *pdev); @@ -1206,9 +1206,15 @@ int __must_check pcibios_enable_device(struct pci_dev *, int mask); char *pcibios_setup(char *str); /* Used only when drivers/pci/setup.c is used */ -resource_size_t pcibios_align_resource(void *, const struct resource *, - resource_size_t, - resource_size_t); +resource_size_t pcibios_align_resource(void *data, const struct resource *res, + const struct resource *empty_res, + resource_size_t size, + resource_size_t align); +resource_size_t pci_align_resource(struct pci_dev *dev, + const struct resource *res, + const struct resource *empty_res, + resource_size_t size, + resource_size_t align); /* Generic PCI functions used internally */ @@ -1975,11 +1981,11 @@ struct pci_ptm_debugfs { }; #ifdef CONFIG_PCIE_PTM -int pci_enable_ptm(struct pci_dev *dev, u8 *granularity); +int pci_enable_ptm(struct pci_dev *dev); void pci_disable_ptm(struct pci_dev *dev); bool pcie_ptm_enabled(struct pci_dev *dev); #else -static inline int pci_enable_ptm(struct pci_dev *dev, u8 *granularity) +static inline int pci_enable_ptm(struct pci_dev *dev) { return -EINVAL; } static inline void pci_disable_ptm(struct pci_dev *dev) { } static inline bool pcie_ptm_enabled(struct pci_dev *dev) @@ -2132,7 +2138,6 @@ static inline struct pci_dev *pci_get_base_class(unsigned int class, static inline int pci_dev_present(const struct pci_device_id *ids) { return 0; } -#define no_pci_devices() (1) #define pci_dev_put(dev) do { } while (0) static inline void pci_set_master(struct pci_dev *dev) { } diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 406abf629be2..24cb42f66e4b 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2586,6 +2586,8 @@ #define PCI_VENDOR_ID_AZWAVE 0x1a3b +#define PCI_VENDOR_ID_GOOGLE 0x1ae0 + #define PCI_VENDOR_ID_REDHAT_QUMRANET 0x1af4 #define PCI_SUBVENDOR_ID_REDHAT_QUMRANET 0x1af4 #define PCI_SUBDEVICE_ID_QEMU 0x1100 diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index a50df42a893f..cdd68ed3ae1a 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -491,64 +491,63 @@ static inline pgd_t pgdp_get(pgd_t *pgdp) #endif #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, - unsigned long address, - pte_t *ptep) +static inline bool ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) { pte_t pte = ptep_get(ptep); - int r = 1; + bool young = true; + if (!pte_young(pte)) - r = 0; + young = false; else set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte)); - return r; + return young; } #endif #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) -static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, - unsigned long address, - pmd_t *pmdp) +static inline bool pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) { pmd_t pmd = *pmdp; - int r = 1; + bool young = true; + if (!pmd_young(pmd)) - r = 0; + young = false; else set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd)); - return r; + return young; } #else -static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, - unsigned long address, - pmd_t *pmdp) +static inline bool pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) { BUILD_BUG(); - return 0; + return false; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */ #endif #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH -int ptep_clear_flush_young(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep); +bool ptep_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep); #endif #ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH #ifdef CONFIG_TRANSPARENT_HUGEPAGE -extern int pmdp_clear_flush_young(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp); +bool pmdp_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); #else /* * Despite relevant to THP only, this API is called from generic rmap code * under PageTransHuge(), hence needs a dummy implementation for !THP */ -static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) +static inline bool pmdp_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) { BUILD_BUG(); - return 0; + return false; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif @@ -1086,10 +1085,10 @@ static inline void wrprotect_ptes(struct mm_struct *mm, unsigned long addr, * Context: The caller holds the page table lock. The PTEs map consecutive * pages that belong to the same folio. The PTEs are all in the same PMD. */ -static inline int clear_flush_young_ptes(struct vm_area_struct *vma, +static inline bool clear_flush_young_ptes(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, unsigned int nr) { - int young = 0; + bool young = false; for (;;) { young |= ptep_clear_flush_young(vma, addr, ptep); @@ -1103,6 +1102,43 @@ static inline int clear_flush_young_ptes(struct vm_area_struct *vma, } #endif +#ifndef test_and_clear_young_ptes +/** + * test_and_clear_young_ptes - Mark PTEs that map consecutive pages of the same + * folio as old + * @vma: The virtual memory area the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear access bit. + * + * May be overridden by the architecture; otherwise, implemented as a simple + * loop over ptep_test_and_clear_young(). + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + * + * Returns: whether any PTE was young. + */ +static inline bool test_and_clear_young_ptes(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, unsigned int nr) +{ + bool young = false; + + for (;;) { + young |= ptep_test_and_clear_young(vma, addr, ptep); + if (--nr == 0) + break; + ptep++; + addr += PAGE_SIZE; + } + + return young; +} +#endif + /* * On some architectures hardware does not set page access bit when accessing * memory page, it is responsibility of software setting this bit. It brings @@ -1917,41 +1953,56 @@ static inline void pfnmap_setup_cachemode_pfn(unsigned long pfn, pgprot_t *prot) pfnmap_setup_cachemode(pfn, PAGE_SIZE, prot); } -#ifdef CONFIG_MMU +/* + * ZERO_PAGE() is global shared page(s) that is always zero. It is used for + * zero-mapped memory areas, CoW etc. + * + * On architectures that __HAVE_COLOR_ZERO_PAGE there are several such pages + * for different ranges in the virtual address space. + * + * zero_page_pfn identifies the first (or the only) pfn for these pages. + * + * For architectures that don't __HAVE_COLOR_ZERO_PAGE the zero page lives in + * empty_zero_page in BSS. + */ +void arch_setup_zero_pages(void); + #ifdef __HAVE_COLOR_ZERO_PAGE static inline int is_zero_pfn(unsigned long pfn) { - extern unsigned long zero_pfn; - unsigned long offset_from_zero_pfn = pfn - zero_pfn; + extern unsigned long zero_page_pfn; + unsigned long offset_from_zero_pfn = pfn - zero_page_pfn; + return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT); } -#define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr)) +#define zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr)) #else static inline int is_zero_pfn(unsigned long pfn) { - extern unsigned long zero_pfn; - return pfn == zero_pfn; -} + extern unsigned long zero_page_pfn; -static inline unsigned long my_zero_pfn(unsigned long addr) -{ - extern unsigned long zero_pfn; - return zero_pfn; + return pfn == zero_page_pfn; } -#endif -#else -static inline int is_zero_pfn(unsigned long pfn) + +static inline unsigned long zero_pfn(unsigned long addr) { - return 0; + extern unsigned long zero_page_pfn; + + return zero_page_pfn; } -static inline unsigned long my_zero_pfn(unsigned long addr) +extern uint8_t empty_zero_page[PAGE_SIZE]; +extern struct page *__zero_page; + +static inline struct page *_zero_page(unsigned long addr) { - return 0; + return __zero_page; } -#endif /* CONFIG_MMU */ +#define ZERO_PAGE(vaddr) _zero_page(vaddr) + +#endif /* __HAVE_COLOR_ZERO_PAGE */ #ifdef CONFIG_MMU @@ -1989,7 +2040,7 @@ static inline int pud_trans_unstable(pud_t *pud) { #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) - pud_t pudval = READ_ONCE(*pud); + pud_t pudval = pudp_get(pud); if (pud_none(pudval) || pud_trans_huge(pudval)) return 1; diff --git a/include/linux/phy.h b/include/linux/phy.h index 6f9979a26892..199a7aaa341b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -612,6 +612,8 @@ struct phy_oatc14_sqi_capability { * @advertising_eee: Currently advertised EEE linkmodes * @enable_tx_lpi: When True, MAC should transmit LPI to PHY * @eee_active: phylib private state, indicating that EEE has been negotiated + * @autonomous_eee_disabled: Set when autonomous EEE has been disabled, + * used to re-apply after PHY soft reset * @eee_cfg: User configuration of EEE * @lp_advertising: Current link partner advertised linkmodes * @host_interfaces: PHY interface modes supported by host @@ -739,6 +741,7 @@ struct phy_device { __ETHTOOL_DECLARE_LINK_MODE_MASK(eee_disabled_modes); bool enable_tx_lpi; bool eee_active; + bool autonomous_eee_disabled; struct eee_config eee_cfg; /* Host supported PHY interface types. Should be ignored if empty. */ @@ -1359,6 +1362,17 @@ struct phy_driver { void (*get_stats)(struct phy_device *dev, struct ethtool_stats *stats, u64 *data); + /** + * @disable_autonomous_eee: Disable PHY-autonomous EEE + * + * Some PHYs manage EEE autonomously, preventing the MAC from + * controlling LPI signaling. This callback disables autonomous + * EEE at the PHY. + * + * Return: 0 on success, negative errno on failure. + */ + int (*disable_autonomous_eee)(struct phy_device *dev); + /* Get and Set PHY tunables */ /** @get_tunable: Return the value of a tunable */ int (*get_tunable)(struct phy_device *dev, @@ -2152,8 +2166,6 @@ int phy_suspend(struct phy_device *phydev); int phy_resume(struct phy_device *phydev); int __phy_resume(struct phy_device *phydev); int phy_loopback(struct phy_device *phydev, bool enable, int speed); -struct phy_device *phy_attach(struct net_device *dev, const char *bus_id, - phy_interface_t interface); struct phy_device *phy_find_next(struct mii_bus *bus, struct phy_device *pos); int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, u32 flags, phy_interface_t interface); @@ -2448,9 +2460,6 @@ int __phy_hwtstamp_set(struct phy_device *phydev, struct phy_port *phy_get_sfp_port(struct phy_device *phydev); -extern const struct bus_type mdio_bus_type; -extern const struct class mdio_bus_class; - /** * phy_module_driver() - Helper macro for registering PHY drivers * @__phy_drivers: array of PHY drivers to register diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h index 63ce16191eb9..11b8f0b8da0c 100644 --- a/include/linux/pinctrl/consumer.h +++ b/include/linux/pinctrl/consumer.h @@ -35,6 +35,8 @@ int pinctrl_gpio_direction_output(struct gpio_chip *gc, unsigned int offset); int pinctrl_gpio_set_config(struct gpio_chip *gc, unsigned int offset, unsigned long config); +int pinctrl_gpio_get_config(struct gpio_chip *gc, unsigned int offset, + unsigned long *config); struct pinctrl * __must_check pinctrl_get(struct device *dev); void pinctrl_put(struct pinctrl *p); @@ -102,6 +104,13 @@ pinctrl_gpio_direction_output(struct gpio_chip *gc, unsigned int offset) } static inline int +pinctrl_gpio_get_config(struct gpio_chip *gc, unsigned int offset, + unsigned long *config) +{ + return 0; +} + +static inline int pinctrl_gpio_set_config(struct gpio_chip *gc, unsigned int offset, unsigned long config) { diff --git a/include/linux/platform_data/dma-mcf-edma.h b/include/linux/platform_data/dma-mcf-edma.h index d718ccfa3421..0b31af66a1ac 100644 --- a/include/linux/platform_data/dma-mcf-edma.h +++ b/include/linux/platform_data/dma-mcf-edma.h @@ -26,8 +26,9 @@ bool mcf_edma_filter_fn(struct dma_chan *chan, void *param); /** * struct mcf_edma_platform_data - platform specific data for eDMA engine * - * @ver The eDMA module version. - * @dma_channels The number of eDMA channels. + * @dma_channels: The number of eDMA channels. + * @slave_map: Slave device map + * @slavecnt: Number of entries in @slave_map */ struct mcf_edma_platform_data { int dma_channels; diff --git a/include/linux/platform_data/dsa.h b/include/linux/platform_data/dsa.h index d4d9bf2060a6..77424bb24723 100644 --- a/include/linux/platform_data/dsa.h +++ b/include/linux/platform_data/dsa.h @@ -3,20 +3,11 @@ #define __DSA_PDATA_H struct device; -struct net_device; -#define DSA_MAX_SWITCHES 4 #define DSA_MAX_PORTS 12 -#define DSA_RTABLE_NONE -1 struct dsa_chip_data { /* - * How to access the switch configuration registers. - */ - struct device *host_dev; - int sw_addr; - - /* * Reference to network devices */ struct device *netdev[DSA_MAX_PORTS]; @@ -24,12 +15,6 @@ struct dsa_chip_data { /* set to size of eeprom if supported by the switch */ int eeprom_len; - /* Device tree node pointer for this specific switch chip - * used during switch setup in case additional properties - * and resources needs to be used - */ - struct device_node *of_node; - /* * The names of the switch's ports. Use "cpu" to * designate the switch port that the cpu is connected to, @@ -38,31 +23,6 @@ struct dsa_chip_data { * or any other string to indicate this is a physical port. */ char *port_names[DSA_MAX_PORTS]; - struct device_node *port_dn[DSA_MAX_PORTS]; - - /* - * An array of which element [a] indicates which port on this - * switch should be used to send packets to that are destined - * for switch a. Can be NULL if there is only one switch chip. - */ - s8 rtable[DSA_MAX_SWITCHES]; }; -struct dsa_platform_data { - /* - * Reference to a Linux network interface that connects - * to the root switch chip of the tree. - */ - struct device *netdev; - struct net_device *of_netdev; - - /* - * Info structs describing each of the switch chips - * connected via this network interface. - */ - int nr_chips; - struct dsa_chip_data *chip; -}; - - #endif /* __DSA_PDATA_H */ diff --git a/include/linux/platform_data/ina2xx.h b/include/linux/platform_data/ina2xx.h deleted file mode 100644 index 2aa5ee9a9050..000000000000 --- a/include/linux/platform_data/ina2xx.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Driver for Texas Instruments INA219, INA226 power monitor chips - * - * Copyright (C) 2012 Lothar Felten <lothar.felten@gmail.com> - * - * For further information, see the Documentation/hwmon/ina2xx.rst file. - */ - -/** - * struct ina2xx_platform_data - ina2xx info - * @shunt_uohms shunt resistance in microohms - */ -struct ina2xx_platform_data { - long shunt_uohms; -}; diff --git a/include/linux/platform_data/mdio-gpio.h b/include/linux/platform_data/mdio-gpio.h deleted file mode 100644 index 13874fa6e767..000000000000 --- a/include/linux/platform_data/mdio-gpio.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * MDIO-GPIO bus platform data structure - */ - -#ifndef __LINUX_MDIO_GPIO_PDATA_H -#define __LINUX_MDIO_GPIO_PDATA_H - -struct mdio_gpio_platform_data { - u32 phy_mask; - u32 phy_ignore_ta_mask; -}; - -#endif /* __LINUX_MDIO_GPIO_PDATA_H */ diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h index f6cca7a035c7..50b6be57da66 100644 --- a/include/linux/platform_data/mlxreg.h +++ b/include/linux/platform_data/mlxreg.h @@ -13,10 +13,10 @@ /** * enum mlxreg_wdt_type - type of HW watchdog * - * TYPE1 HW watchdog implementation exist in old systems. - * All new systems have TYPE2 HW watchdog. - * TYPE3 HW watchdog can exist on all systems with new CPLD. - * TYPE3 is selected by WD capability bit. + * @MLX_WDT_TYPE1: HW watchdog implementation in old systems. + * @MLX_WDT_TYPE2: All new systems have TYPE2 HW watchdog. + * @MLX_WDT_TYPE3: HW watchdog that can exist on all systems with new CPLD. + * TYPE3 is selected by WD capability bit. */ enum mlxreg_wdt_type { MLX_WDT_TYPE1, @@ -35,7 +35,7 @@ enum mlxreg_wdt_type { * @MLXREG_HOTPLUG_LC_SYNCED: entry for line card synchronization events, coming * after hardware-firmware synchronization handshake; * @MLXREG_HOTPLUG_LC_READY: entry for line card ready events, indicating line card - PHYs ready / unready state; + * PHYs ready / unready state; * @MLXREG_HOTPLUG_LC_ACTIVE: entry for line card active events, indicating firmware * availability / unavailability for the ports on line card; * @MLXREG_HOTPLUG_LC_THERMAL: entry for line card thermal shutdown events, positive @@ -123,8 +123,8 @@ struct mlxreg_hotplug_device { * @reg_pwr: attribute power register; * @reg_ena: attribute enable register; * @mode: access mode; - * @np - pointer to node platform associated with attribute; - * @hpdev - hotplug device data; + * @np: pointer to node platform associated with attribute; + * @hpdev: hotplug device data; * @notifier: pointer to event notifier block; * @health_cntr: dynamic device health indication counter; * @attached: true if device has been attached after good health indication; diff --git a/include/linux/platform_data/voltage-omap.h b/include/linux/platform_data/voltage-omap.h index 6d74e507dbd2..2b48f2b0135d 100644 --- a/include/linux/platform_data/voltage-omap.h +++ b/include/linux/platform_data/voltage-omap.h @@ -10,14 +10,14 @@ /** * struct omap_volt_data - Omap voltage specific data. - * @voltage_nominal: The possible voltage value in uV + * @volt_nominal: The possible voltage value in uV * @sr_efuse_offs: The offset of the efuse register(from system * control module base address) from where to read * the n-target value for the smartreflex module. * @sr_errminlimit: Error min limit value for smartreflex. This value * differs at differnet opp and thus is linked * with voltage. - * @vp_errorgain: Error gain value for the voltage processor. This + * @vp_errgain: Error gain value for the voltage processor. This * field also differs according to the voltage/opp. */ struct omap_volt_data { diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h index b1b837583d54..dbe745dc88d5 100644 --- a/include/linux/platform_data/x86/int3472.h +++ b/include/linux/platform_data/x86/int3472.h @@ -26,6 +26,7 @@ #define INT3472_GPIO_TYPE_POWER_ENABLE 0x0b #define INT3472_GPIO_TYPE_CLK_ENABLE 0x0c #define INT3472_GPIO_TYPE_PRIVACY_LED 0x0d +#define INT3472_GPIO_TYPE_DOVDD 0x10 #define INT3472_GPIO_TYPE_HANDSHAKE 0x12 #define INT3472_GPIO_TYPE_HOTPLUG_DETECT 0x13 @@ -33,8 +34,8 @@ #define INT3472_MAX_SENSOR_GPIOS 3 #define INT3472_MAX_REGULATORS 3 -/* E.g. "avdd\0" */ -#define GPIO_SUPPLY_NAME_LENGTH 5 +/* E.g. "dovdd\0" */ +#define GPIO_SUPPLY_NAME_LENGTH 6 /* 12 chars for acpi_dev_name() + "-", e.g. "ABCD1234:00-" */ #define GPIO_REGULATOR_NAME_LENGTH (12 + GPIO_SUPPLY_NAME_LENGTH) /* lower- and upper-case mapping */ diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 813da101b5bf..975400a472e3 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -31,11 +31,6 @@ struct platform_device { struct resource *resource; const struct platform_device_id *id_entry; - /* - * Driver name to force a match. Do not set directly, because core - * frees it. Use driver_set_override() to set or clear it. - */ - const char *driver_override; /* MFD cell pointer */ struct mfd_cell *mfd_cell; @@ -118,22 +113,58 @@ extern int platform_get_irq_byname_optional(struct platform_device *dev, const char *name); extern int platform_add_devices(struct platform_device **, int); +/** + * struct platform_device_info - set of parameters for creating a platform device + * @parent: parent device for the new platform device. + * @fwnode: firmware node associated with the device. + * @of_node_reused: indicates that device tree node associated with the device + * is shared with another device, typically its ancestor. Setting this to + * %true prevents the device from being matched via the OF match table, + * and stops the device core from automatically binding pinctrl + * configuration to avoid disrupting the other device. + * @name: name of the device. + * @id: instance ID of the device. Use %PLATFORM_DEVID_NONE if there is only + * one instance of the device, or %PLATFORM_DEVID_AUTO to let the + * kernel automatically assign a unique instance ID. + * @res: set of resources to attach to the device. + * @num_res: number of entries in @res. + * @data: device-specific data for this platform device. + * @size_data: size of device-specific data. + * @dma_mask: DMA mask for the device. + * @swnode: a secondary software node to be attached to the device. The node + * will be automatically registered and its lifetime tied to the platform + * device if it is not registered yet. + * @properties: a set of software properties for the device. If provided, + * a managed software node will be automatically created and + * assigned to the device. The properties array must be terminated + * with a sentinel entry. Specifying both @properties and @swnode is not + * allowed. + * + * This structure is used to hold information needed to create and register + * a platform device using platform_device_register_full(). + * + * platform_device_register_full() makes deep copies of @name, @res, @data and + * @properties, so the caller does not need to keep them after registration. + * If the registration is performed during initialization, these can be marked + * as __initconst. + */ struct platform_device_info { - struct device *parent; - struct fwnode_handle *fwnode; - bool of_node_reused; + struct device *parent; + struct fwnode_handle *fwnode; + bool of_node_reused; - const char *name; - int id; + const char *name; + int id; - const struct resource *res; - unsigned int num_res; + const struct resource *res; + unsigned int num_res; - const void *data; - size_t size_data; - u64 dma_mask; + const void *data; + size_t size_data; + u64 dma_mask; - const struct property_entry *properties; + const struct software_node *swnode; + const struct property_entry *properties; }; extern struct platform_device *platform_device_register_full( const struct platform_device_info *pdevinfo); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 93ba0143ca47..b299dc0128d6 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -49,8 +49,8 @@ struct dev_pm_domain_attach_data { const char * const *pd_names; - const u32 num_pd_names; - const u32 pd_flags; + u32 num_pd_names; + u32 pd_flags; }; struct dev_pm_domain_list { @@ -183,6 +183,7 @@ struct genpd_power_state { u64 rejected; u64 above; u64 below; + u64 usage_s2idle; struct fwnode_handle *fwnode; u64 idle_time; void *data; diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 41037c513f06..64921b10ac74 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -545,22 +545,10 @@ static inline int pm_runtime_resume_and_get(struct device *dev) * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, queue up a work item for @dev like in pm_request_idle(). - * - * Return: - * * 1: Success. Usage counter dropped to zero, but device was already suspended. - * * 0: Success. - * * -EINVAL: Runtime PM error. - * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status - * change ongoing. - * * -EBUSY: Runtime PM child_count non-zero. - * * -EPERM: Device PM QoS resume latency 0. - * * -EINPROGRESS: Suspend already in progress. - * * -ENOSYS: CONFIG_PM not enabled. */ -static inline int pm_runtime_put(struct device *dev) +static inline void pm_runtime_put(struct device *dev) { - return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC); + __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC); } /** diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h index c417abd2ab70..d5b08313cf11 100644 --- a/include/linux/power/max17042_battery.h +++ b/include/linux/power/max17042_battery.h @@ -17,6 +17,7 @@ #define MAX17042_DEFAULT_VMAX (4500) /* LiHV cell max */ #define MAX17042_DEFAULT_TEMP_MIN (0) /* For sys without temp sensor */ #define MAX17042_DEFAULT_TEMP_MAX (700) /* 70 degrees Celcius */ +#define MAX17042_DEFAULT_TASK_PERIOD (5760) /* Consider RepCap which is less then 10 units below FullCAP full */ #define MAX17042_FULL_THRESHOLD 10 @@ -105,7 +106,7 @@ enum max17042_register { MAX17042_OCV = 0xEE, - MAX17042_OCVInternal = 0xFB, /* MAX17055 VFOCV */ + MAX17042_OCVInternal = 0xFB, /* MAX17055/77759 VFOCV */ MAX17042_VFSOC = 0xFF, }; @@ -156,7 +157,7 @@ enum max17055_register { MAX17055_AtAvCap = 0xDF, }; -/* Registers specific to max17047/50/55 */ +/* Registers specific to max17047/50/55/77759 */ enum max17047_register { MAX17047_QRTbl00 = 0x12, MAX17047_FullSOCThr = 0x13, @@ -167,12 +168,32 @@ enum max17047_register { MAX17047_QRTbl30 = 0x42, }; +enum max77759_register { + MAX77759_AvgTA0 = 0x26, + MAX77759_AtTTF = 0x33, + MAX77759_Tconvert = 0x34, + MAX77759_AvgCurrent0 = 0x3B, + MAX77759_THMHOT = 0x40, + MAX77759_CTESample = 0x41, + MAX77759_ISys = 0x43, + MAX77759_AvgVCell0 = 0x44, + MAX77759_RlxSOC = 0x47, + MAX77759_AvgISys = 0x4B, + MAX77759_QH0 = 0x4C, + MAX77759_MixAtFull = 0x4F, + MAX77759_VSys = 0xB1, + MAX77759_TAlrtTh2 = 0xB2, + MAX77759_VByp = 0xB3, + MAX77759_IIn = 0xD0, +}; + enum max170xx_chip_type { MAXIM_DEVICE_TYPE_UNKNOWN = 0, MAXIM_DEVICE_TYPE_MAX17042, MAXIM_DEVICE_TYPE_MAX17047, MAXIM_DEVICE_TYPE_MAX17050, MAXIM_DEVICE_TYPE_MAX17055, + MAXIM_DEVICE_TYPE_MAX77759, MAXIM_DEVICE_TYPE_NUM }; diff --git a/include/linux/powercap.h b/include/linux/powercap.h index 3d557bbcd2c7..603419db924c 100644 --- a/include/linux/powercap.h +++ b/include/linux/powercap.h @@ -238,7 +238,7 @@ static inline void *powercap_get_zone_data(struct powercap_zone *power_zone) * Advantage of this parameter is that client can embed * this data in its data structures and allocate in a * single call, preventing multiple allocations. -* @control_type_name: The Name of this control_type, which will be shown +* @name: The Name of this control_type, which will be shown * in the sysfs Interface. * @ops: Callbacks for control type. This parameter is optional. * @@ -277,7 +277,7 @@ int powercap_unregister_control_type(struct powercap_control_type *instance); * @name: A name for this zone. * @parent: A pointer to the parent power zone instance if any or NULL * @ops: Pointer to zone operation callback structure. -* @no_constraints: Number of constraints for this zone +* @nr_constraints: Number of constraints for this zone * @const_ops: Pointer to constraint callback structure * * Register a power zone under a given control type. A power zone must register diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h index f73fbea0dbc2..2f63e9a6cc88 100644 --- a/include/linux/ppp_channel.h +++ b/include/linux/ppp_channel.h @@ -55,7 +55,7 @@ extern void ppp_input(struct ppp_channel *, struct sk_buff *); /* Called by the channel when an input error occurs, indicating that we may have missed a packet. */ -extern void ppp_input_error(struct ppp_channel *, int code); +extern void ppp_input_error(struct ppp_channel *); /* Attach a channel to a given PPP unit in specified net. */ extern int ppp_register_net_channel(struct net *, struct ppp_channel *); @@ -72,7 +72,9 @@ extern int ppp_channel_index(struct ppp_channel *); /* Get the unit number associated with a channel, or -1 if none */ extern int ppp_unit_number(struct ppp_channel *); -/* Get the device name associated with a channel, or NULL if none */ +/* Get the device name associated with a channel, or NULL if none. + * Caller must hold RCU read lock. + */ extern char *ppp_dev_name(struct ppp_channel *); /* diff --git a/include/linux/printk.h b/include/linux/printk.h index 63d516c873b4..54e3c621fec3 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -801,6 +801,19 @@ static inline void print_hex_dump_debug(const char *prefix_str, int prefix_type, } #endif +#if defined(DEBUG) +#define print_hex_dump_devel(prefix_str, prefix_type, rowsize, \ + groupsize, buf, len, ascii) \ + print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, rowsize, \ + groupsize, buf, len, ascii) +#else +static inline void print_hex_dump_devel(const char *prefix_str, int prefix_type, + int rowsize, int groupsize, + const void *buf, size_t len, bool ascii) +{ +} +#endif + /** * print_hex_dump_bytes - shorthand form of print_hex_dump() with default params * @prefix_str: string to prefix each line with; diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 69ffa4b4d1fa..d5099a2baca5 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -829,12 +829,14 @@ struct sev_data_range_list { * * @len: length of the command buffer read by the PSP * @iommu_snp_shutdown: Disable enforcement of SNP in the IOMMU + * @x86_snp_shutdown: Disable SNP on all cores * @rsvd1: reserved */ struct sev_data_snp_shutdown_ex { u32 len; u32 iommu_snp_shutdown:1; - u32 rsvd1:31; + u32 x86_snp_shutdown:1; + u32 rsvd1:30; } __packed; /** @@ -891,6 +893,7 @@ struct snp_feature_info { } __packed; /* Feature bits in ECX */ +#define SNP_X86_SHUTDOWN_SUPPORTED BIT(1) #define SNP_RAPL_DISABLE_SUPPORTED BIT(2) #define SNP_CIPHER_TEXT_HIDING_SUPPORTED BIT(3) #define SNP_AES_256_XTS_POLICY_SUPPORTED BIT(4) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 534531807d95..d2c3629bbe45 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -48,7 +48,7 @@ struct ptr_ring { */ static inline bool __ptr_ring_full(struct ptr_ring *r) { - return r->queue[r->producer]; + return data_race(r->queue[r->producer]); } static inline bool ptr_ring_full(struct ptr_ring *r) @@ -103,7 +103,7 @@ static inline bool ptr_ring_full_bh(struct ptr_ring *r) */ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) { - if (unlikely(!r->size) || r->queue[r->producer]) + if (unlikely(!r->size) || data_race(r->queue[r->producer])) return -ENOSPC; /* Make sure the pointer we are storing points to a valid data. */ @@ -194,7 +194,7 @@ static inline void *__ptr_ring_peek(struct ptr_ring *r) static inline bool __ptr_ring_empty(struct ptr_ring *r) { if (likely(r->size)) - return !r->queue[READ_ONCE(r->consumer_head)]; + return !data_race(r->queue[READ_ONCE(r->consumer_head)]); return true; } @@ -256,7 +256,7 @@ static inline void __ptr_ring_zero_tail(struct ptr_ring *r, int consumer_head) * besides the first one until we write out all entries. */ while (likely(head > r->consumer_tail)) - r->queue[--head] = NULL; + data_race(r->queue[--head] = NULL); r->consumer_tail = consumer_head; } diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index c334f82ed385..f9c0f9d7c9d9 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -44,14 +44,7 @@ int dquot_initialize(struct inode *inode); bool dquot_initialize_needed(struct inode *inode); void dquot_drop(struct inode *inode); struct dquot *dqget(struct super_block *sb, struct kqid qid); -static inline struct dquot *dqgrab(struct dquot *dquot) -{ - /* Make sure someone else has active reference to dquot */ - WARN_ON_ONCE(!atomic_read(&dquot->dq_count)); - WARN_ON_ONCE(!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)); - atomic_inc(&dquot->dq_count); - return dquot; -} +struct dquot *dqgrab(struct dquot *dquot); static inline bool dquot_is_busy(struct dquot *dquot) { diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h index 51b811b62322..870558c9d36e 100644 --- a/include/linux/raid/xor.h +++ b/include/linux/raid/xor.h @@ -2,29 +2,6 @@ #ifndef _XOR_H #define _XOR_H -#define MAX_XOR_BLOCKS 4 +void xor_gen(void *dest, void **srcs, unsigned int src_cnt, unsigned int bytes); -extern void xor_blocks(unsigned int count, unsigned int bytes, - void *dest, void **srcs); - -struct xor_block_template { - struct xor_block_template *next; - const char *name; - int speed; - void (*do_2)(unsigned long, unsigned long * __restrict, - const unsigned long * __restrict); - void (*do_3)(unsigned long, unsigned long * __restrict, - const unsigned long * __restrict, - const unsigned long * __restrict); - void (*do_4)(unsigned long, unsigned long * __restrict, - const unsigned long * __restrict, - const unsigned long * __restrict, - const unsigned long * __restrict); - void (*do_5)(unsigned long, unsigned long * __restrict, - const unsigned long * __restrict, - const unsigned long * __restrict, - const unsigned long * __restrict, - const unsigned long * __restrict); -}; - -#endif +#endif /* _XOR_H */ diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index 1d982dbdd0d0..024fc20e7762 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -6,10 +6,10 @@ #include <linux/kernel.h> #include <linux/jump_label.h> #include <linux/percpu-defs.h> +#include <linux/prandom.h> DECLARE_STATIC_KEY_MAYBE(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, randomize_kstack_offset); -DECLARE_PER_CPU(u32, kstack_offset); /* * Do not use this anywhere else in the kernel. This is used here because @@ -46,53 +46,39 @@ DECLARE_PER_CPU(u32, kstack_offset); #define KSTACK_OFFSET_MAX(x) ((x) & 0b1111111100) #endif +DECLARE_PER_CPU(struct rnd_state, kstack_rnd_state); + +static __always_inline u32 get_kstack_offset(void) +{ + struct rnd_state *state; + u32 rnd; + + state = &get_cpu_var(kstack_rnd_state); + rnd = prandom_u32_state(state); + put_cpu_var(kstack_rnd_state); + + return rnd; +} + /** - * add_random_kstack_offset - Increase stack utilization by previously - * chosen random offset + * add_random_kstack_offset - Increase stack utilization by a random offset. * - * This should be used in the syscall entry path when interrupts and - * preempt are disabled, and after user registers have been stored to - * the stack. For testing the resulting entropy, please see: - * tools/testing/selftests/lkdtm/stack-entropy.sh + * This should be used in the syscall entry path after user registers have been + * stored to the stack. Preemption may be enabled. For testing the resulting + * entropy, please see: tools/testing/selftests/lkdtm/stack-entropy.sh */ #define add_random_kstack_offset() do { \ if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ &randomize_kstack_offset)) { \ - u32 offset = raw_cpu_read(kstack_offset); \ + u32 offset = get_kstack_offset(); \ u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset)); \ /* Keep allocation even after "ptr" loses scope. */ \ asm volatile("" :: "r"(ptr) : "memory"); \ } \ } while (0) -/** - * choose_random_kstack_offset - Choose the random offset for the next - * add_random_kstack_offset() - * - * This should only be used during syscall exit when interrupts and - * preempt are disabled. This position in the syscall flow is done to - * frustrate attacks from userspace attempting to learn the next offset: - * - Maximize the timing uncertainty visible from userspace: if the - * offset is chosen at syscall entry, userspace has much more control - * over the timing between choosing offsets. "How long will we be in - * kernel mode?" tends to be more difficult to predict than "how long - * will we be in user mode?" - * - Reduce the lifetime of the new offset sitting in memory during - * kernel mode execution. Exposure of "thread-local" memory content - * (e.g. current, percpu, etc) tends to be easier than arbitrary - * location memory exposure. - */ -#define choose_random_kstack_offset(rand) do { \ - if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ - &randomize_kstack_offset)) { \ - u32 offset = raw_cpu_read(kstack_offset); \ - offset = ror32(offset, 5) ^ (rand); \ - raw_cpu_write(kstack_offset, offset); \ - } \ -} while (0) #else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */ #define add_random_kstack_offset() do { } while (0) -#define choose_random_kstack_offset(rand) do { } while (0) #endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */ #endif diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index 4091e978aef2..48acdc3889dd 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -35,10 +35,15 @@ #define RB_CLEAR_NODE(node) \ ((node)->__rb_parent_color = (unsigned long)(node)) +#define RB_EMPTY_LINKED_NODE(lnode) RB_EMPTY_NODE(&(lnode)->node) +#define RB_CLEAR_LINKED_NODE(lnode) ({ \ + RB_CLEAR_NODE(&(lnode)->node); \ + (lnode)->prev = (lnode)->next = NULL; \ +}) extern void rb_insert_color(struct rb_node *, struct rb_root *); extern void rb_erase(struct rb_node *, struct rb_root *); - +extern bool rb_erase_linked(struct rb_node_linked *, struct rb_root_linked *); /* Find logical next and previous nodes in a tree */ extern struct rb_node *rb_next(const struct rb_node *); @@ -213,15 +218,10 @@ rb_add_cached(struct rb_node *node, struct rb_root_cached *tree, return leftmost ? node : NULL; } -/** - * rb_add() - insert @node into @tree - * @node: node to insert - * @tree: tree to insert @node into - * @less: operator defining the (partial) node order - */ static __always_inline void -rb_add(struct rb_node *node, struct rb_root *tree, - bool (*less)(struct rb_node *, const struct rb_node *)) +__rb_add(struct rb_node *node, struct rb_root *tree, + bool (*less)(struct rb_node *, const struct rb_node *), + void (*linkop)(struct rb_node *, struct rb_node *, struct rb_node **)) { struct rb_node **link = &tree->rb_node; struct rb_node *parent = NULL; @@ -234,10 +234,73 @@ rb_add(struct rb_node *node, struct rb_root *tree, link = &parent->rb_right; } + linkop(node, parent, link); rb_link_node(node, parent, link); rb_insert_color(node, tree); } +#define __node_2_linked_node(_n) \ + rb_entry((_n), struct rb_node_linked, node) + +static inline void +rb_link_linked_node(struct rb_node *node, struct rb_node *parent, struct rb_node **link) +{ + if (!parent) + return; + + struct rb_node_linked *nnew = __node_2_linked_node(node); + struct rb_node_linked *npar = __node_2_linked_node(parent); + + if (link == &parent->rb_left) { + nnew->prev = npar->prev; + nnew->next = npar; + npar->prev = nnew; + if (nnew->prev) + nnew->prev->next = nnew; + } else { + nnew->next = npar->next; + nnew->prev = npar; + npar->next = nnew; + if (nnew->next) + nnew->next->prev = nnew; + } +} + +/** + * rb_add_linked() - insert @node into the leftmost linked tree @tree + * @node: node to insert + * @tree: linked tree to insert @node into + * @less: operator defining the (partial) node order + * + * Returns @true when @node is the new leftmost, @false otherwise. + */ +static __always_inline bool +rb_add_linked(struct rb_node_linked *node, struct rb_root_linked *tree, + bool (*less)(struct rb_node *, const struct rb_node *)) +{ + __rb_add(&node->node, &tree->rb_root, less, rb_link_linked_node); + if (!node->prev) + tree->rb_leftmost = node; + return !node->prev; +} + +/* Empty linkop function which is optimized away by the compiler */ +static __always_inline void +rb_link_noop(struct rb_node *n, struct rb_node *p, struct rb_node **l) { } + +/** + * rb_add() - insert @node into @tree + * @node: node to insert + * @tree: tree to insert @node into + * @less: operator defining the (partial) node order + */ +static __always_inline void +rb_add(struct rb_node *node, struct rb_root *tree, + bool (*less)(struct rb_node *, const struct rb_node *)) +{ + __rb_add(node, tree, less, rb_link_noop); +} + /** * rb_find_add_cached() - find equivalent @node in @tree, or add @node * @node: node to look-for / insert diff --git a/include/linux/rbtree_types.h b/include/linux/rbtree_types.h index 45b6ecde3665..3c7ae53e8139 100644 --- a/include/linux/rbtree_types.h +++ b/include/linux/rbtree_types.h @@ -9,6 +9,12 @@ struct rb_node { } __attribute__((aligned(sizeof(long)))); /* The alignment might seem pointless, but allegedly CRIS needs it */ +struct rb_node_linked { + struct rb_node node; + struct rb_node_linked *prev; + struct rb_node_linked *next; +}; + struct rb_root { struct rb_node *rb_node; }; @@ -28,7 +34,17 @@ struct rb_root_cached { struct rb_node *rb_leftmost; }; +/* + * Leftmost tree with links. This would allow a trivial rb_rightmost update, + * but that has been omitted due to the lack of users. + */ +struct rb_root_linked { + struct rb_root rb_root; + struct rb_node_linked *rb_leftmost; +}; + #define RB_ROOT (struct rb_root) { NULL, } #define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL } +#define RB_ROOT_LINKED (struct rb_root_linked) { {NULL, }, NULL } #endif diff --git a/include/linux/rculist_bl.h b/include/linux/rculist_bl.h index 0b952d06eb0b..36363b876e53 100644 --- a/include/linux/rculist_bl.h +++ b/include/linux/rculist_bl.h @@ -8,21 +8,31 @@ #include <linux/list_bl.h> #include <linux/rcupdate.h> +/* return the first ptr or next element in an RCU protected list */ +#define hlist_bl_first_rcu(head) \ + (*((struct hlist_bl_node __rcu **)(&(head)->first))) +#define hlist_bl_next_rcu(node) \ + (*((struct hlist_bl_node __rcu **)(&(node)->next))) + static inline void hlist_bl_set_first_rcu(struct hlist_bl_head *h, struct hlist_bl_node *n) { LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK); LIST_BL_BUG_ON(((unsigned long)h->first & LIST_BL_LOCKMASK) != LIST_BL_LOCKMASK); - rcu_assign_pointer(h->first, + rcu_assign_pointer(hlist_bl_first_rcu(h), (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK)); } -static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h) -{ - return (struct hlist_bl_node *) - ((unsigned long)rcu_dereference_check(h->first, hlist_bl_is_locked(h)) & ~LIST_BL_LOCKMASK); -} +#define hlist_bl_first_rcu_dereference(head) \ +({ \ + struct hlist_bl_head *__head = (head); \ + \ + (struct hlist_bl_node *) \ + ((unsigned long)rcu_dereference_check(hlist_bl_first_rcu(__head), \ + hlist_bl_is_locked(__head)) & \ + ~LIST_BL_LOCKMASK); \ +}) /** * hlist_bl_del_rcu - deletes entry from hash list without re-initialization @@ -73,7 +83,7 @@ static inline void hlist_bl_add_head_rcu(struct hlist_bl_node *n, { struct hlist_bl_node *first; - /* don't need hlist_bl_first_rcu because we're under lock */ + /* don't need hlist_bl_first_rcu* because we're under lock */ first = hlist_bl_first(h); n->next = first; @@ -93,9 +103,30 @@ static inline void hlist_bl_add_head_rcu(struct hlist_bl_node *n, * */ #define hlist_bl_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = hlist_bl_first_rcu(head); \ + for (pos = hlist_bl_first_rcu_dereference(head); \ pos && \ ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_raw(hlist_bl_next_rcu(pos))) + +/** + * hlist_bl_for_each_entry_continue_rcu - continue iteration over list of given + * type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_bl_node to use as a loop cursor. + * @member: the name of the hlist_bl_node within the struct. + * + * Continue to iterate over list of given type, continuing after + * the current position which must have been in the list when the RCU read + * lock was taken. + * This would typically require either that you obtained the node from a + * previous walk of the list in the same RCU read-side critical section, or + * that you held some sort of non-RCU reference (such as a reference count) + * to keep the node alive *and* in the list. + */ +#define hlist_bl_for_each_entry_continue_rcu(tpos, pos, member) \ + for (pos = rcu_dereference_raw(hlist_bl_next_rcu(&(tpos)->member)); \ + pos && \ + ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1; }); \ + pos = rcu_dereference_raw(hlist_bl_next_rcu(pos))) #endif diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 04f3f86a4145..bfa765132de8 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -206,18 +206,6 @@ static inline void exit_tasks_rcu_finish(void) { } #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */ /** - * rcu_trace_implies_rcu_gp - does an RCU Tasks Trace grace period imply an RCU grace period? - * - * As an accident of implementation, an RCU Tasks Trace grace period also - * acts as an RCU grace period. However, this could change at any time. - * Code relying on this accident must call this function to verify that - * this accident is still happening. - * - * You have been warned! - */ -static inline bool rcu_trace_implies_rcu_gp(void) { return true; } - -/** * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU * * This macro resembles cond_resched(), except that it is defined to diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 3da377ffb0c2..ba7657ced281 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -170,7 +170,7 @@ static inline unsigned int refcount_read(const refcount_t *r) return atomic_read(&r->refs); } -static inline __must_check __signed_wrap +static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp) { int old = refcount_read(r); @@ -212,7 +212,7 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) return __refcount_add_not_zero(i, r, NULL); } -static inline __must_check __signed_wrap +static inline __must_check bool __refcount_add_not_zero_limited_acquire(int i, refcount_t *r, int *oldp, int limit) { @@ -244,7 +244,7 @@ __refcount_inc_not_zero_limited_acquire(refcount_t *r, int *oldp, int limit) return __refcount_add_not_zero_limited_acquire(1, r, oldp, limit); } -static inline __must_check __signed_wrap +static inline __must_check bool __refcount_add_not_zero_acquire(int i, refcount_t *r, int *oldp) { return __refcount_add_not_zero_limited_acquire(i, r, oldp, INT_MAX); @@ -277,7 +277,7 @@ static inline __must_check bool refcount_add_not_zero_acquire(int i, refcount_t return __refcount_add_not_zero_acquire(i, r, NULL); } -static inline __signed_wrap +static inline void __refcount_add(int i, refcount_t *r, int *oldp) { int old = atomic_fetch_add_relaxed(i, &r->refs); @@ -383,7 +383,7 @@ static inline void refcount_inc(refcount_t *r) __refcount_inc(r, NULL); } -static inline __must_check __signed_wrap +static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) { int old = atomic_fetch_sub_release(i, &r->refs); diff --git a/include/linux/regmap.h b/include/linux/regmap.h index caff2240bdab..df44cb30f53b 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -10,15 +10,16 @@ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com> */ -#include <linux/list.h> -#include <linux/rbtree.h> -#include <linux/ktime.h> +#include <linux/bug.h> +#include <linux/cleanup.h> #include <linux/delay.h> #include <linux/err.h> -#include <linux/bug.h> -#include <linux/lockdep.h> -#include <linux/iopoll.h> #include <linux/fwnode.h> +#include <linux/iopoll.h> +#include <linux/ktime.h> +#include <linux/list.h> +#include <linux/lockdep.h> +#include <linux/rbtree.h> struct module; struct clk; @@ -692,6 +693,10 @@ struct regmap *__regmap_init_sdw_mbq(struct device *dev, struct sdw_slave *sdw, const struct regmap_sdw_mbq_cfg *mbq_config, struct lock_class_key *lock_key, const char *lock_name); +struct regmap *__regmap_init_i3c(struct i3c_device *i3c, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); struct regmap *__regmap_init_spi_avmm(struct spi_device *spi, const struct regmap_config *config, struct lock_class_key *lock_key, @@ -999,6 +1004,19 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); dev, sdw, config, mbq_config) /** + * regmap_init_i3c() - Initialise register map + * + * @i3c: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer to + * a struct regmap. + */ +#define regmap_init_i3c(i3c, config) \ + __regmap_lockdep_wrapper(__regmap_init_i3c, #config, \ + i3c, config) + +/** * regmap_init_spi_avmm() - Initialize register map for Intel SPI Slave * to AVMM Bus Bridge * @@ -1460,6 +1478,8 @@ struct regmap_field *regmap_field_alloc(struct regmap *regmap, struct reg_field reg_field); void regmap_field_free(struct regmap_field *field); +DEFINE_FREE(regmap_field, struct regmap_field *, if (_T) regmap_field_free(_T)) + struct regmap_field *devm_regmap_field_alloc(struct device *dev, struct regmap *regmap, struct reg_field reg_field); void devm_regmap_field_free(struct device *dev, struct regmap_field *field); diff --git a/include/linux/remoteproc/mtk_scp.h b/include/linux/remoteproc/mtk_scp.h index 344ff41c22c7..4070537d6542 100644 --- a/include/linux/remoteproc/mtk_scp.h +++ b/include/linux/remoteproc/mtk_scp.h @@ -58,7 +58,7 @@ int scp_ipi_register(struct mtk_scp *scp, u32 id, scp_ipi_handler_t handler, void *priv); void scp_ipi_unregister(struct mtk_scp *scp, u32 id); -int scp_ipi_send(struct mtk_scp *scp, u32 id, void *buf, unsigned int len, +int scp_ipi_send(struct mtk_scp *scp, u32 id, const void *buf, unsigned int len, unsigned int wait); unsigned int scp_get_vdec_hw_capa(struct mtk_scp *scp); diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h index 46514cb1b9e0..52a5a4e81f18 100644 --- a/include/linux/reset-controller.h +++ b/include/linux/reset-controller.h @@ -3,7 +3,10 @@ #define _LINUX_RESET_CONTROLLER_H_ #include <linux/list.h> +#include <linux/mutex.h> +struct fwnode_handle; +struct fwnode_reference_args; struct reset_controller_dev; /** @@ -35,14 +38,16 @@ struct of_phandle_args; * @reset_control_head: head of internal list of requested reset controls * @dev: corresponding driver model device struct * @of_node: corresponding device tree node as phandle target - * @of_args: for reset-gpios controllers: corresponding phandle args with - * of_node and GPIO number complementing of_node; either this or - * of_node should be present * @of_reset_n_cells: number of cells in reset line specifiers * @of_xlate: translation function to translate from specifier as found in the - * device tree to id as given to the reset control ops, defaults - * to :c:func:`of_reset_simple_xlate`. + * device tree to id as given to the reset control ops + * @fwnode: firmware node associated with this device + * @fwnode_reset_n_cells: number of cells in reset line specifiers + * @fwnode_xlate: translation function to translate from firmware specifier to + * id as given to the reset control ops, defaults to + * :c:func:`fwnode_reset_simple_xlate` * @nr_resets: number of reset controls in this reset controller device + * @lock: protects the reset control list from concurrent access */ struct reset_controller_dev { const struct reset_control_ops *ops; @@ -51,11 +56,15 @@ struct reset_controller_dev { struct list_head reset_control_head; struct device *dev; struct device_node *of_node; - const struct of_phandle_args *of_args; int of_reset_n_cells; int (*of_xlate)(struct reset_controller_dev *rcdev, const struct of_phandle_args *reset_spec); + struct fwnode_handle *fwnode; + int fwnode_reset_n_cells; + int (*fwnode_xlate)(struct reset_controller_dev *rcdev, + const struct fwnode_reference_args *reset_spec); unsigned int nr_resets; + struct mutex lock; }; #if IS_ENABLED(CONFIG_RESET_CONTROLLER) diff --git a/include/linux/reset.h b/include/linux/reset.h index 44f9e3415f92..9c391cf0c822 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -5,10 +5,12 @@ #include <linux/bits.h> #include <linux/err.h> #include <linux/errno.h> +#include <linux/of.h> #include <linux/types.h> struct device; struct device_node; +struct fwnode_handle; struct reset_control; /** @@ -84,7 +86,7 @@ int reset_control_bulk_deassert(int num_rstcs, struct reset_control_bulk_data *r int reset_control_bulk_acquire(int num_rstcs, struct reset_control_bulk_data *rstcs); void reset_control_bulk_release(int num_rstcs, struct reset_control_bulk_data *rstcs); -struct reset_control *__of_reset_control_get(struct device_node *node, +struct reset_control *__fwnode_reset_control_get(struct fwnode_handle *fwnode, const char *id, int index, enum reset_control_flags flags); struct reset_control *__reset_control_get(struct device *dev, const char *id, int index, enum reset_control_flags flags); @@ -103,7 +105,8 @@ int __devm_reset_control_bulk_get(struct device *dev, int num_rstcs, struct reset_control *devm_reset_control_array_get(struct device *dev, enum reset_control_flags flags); -struct reset_control *of_reset_control_array_get(struct device_node *np, enum reset_control_flags); +struct reset_control *fwnode_reset_control_array_get(struct fwnode_handle *fwnode, + enum reset_control_flags); int reset_control_get_count(struct device *dev); @@ -152,8 +155,8 @@ static inline int __device_reset(struct device *dev, bool optional) return optional ? 0 : -ENOTSUPP; } -static inline struct reset_control *__of_reset_control_get( - struct device_node *node, +static inline struct reset_control *__fwnode_reset_control_get( + struct fwnode_handle *fwnode, const char *id, int index, enum reset_control_flags flags) { bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; @@ -242,7 +245,7 @@ devm_reset_control_array_get(struct device *dev, enum reset_control_flags flags) } static inline struct reset_control * -of_reset_control_array_get(struct device_node *np, enum reset_control_flags flags) +fwnode_reset_control_array_get(struct fwnode_handle *fwnode, enum reset_control_flags flags) { bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; @@ -500,7 +503,8 @@ reset_control_bulk_get_optional_shared(struct device *dev, int num_rstcs, static inline struct reset_control *of_reset_control_get_exclusive( struct device_node *node, const char *id) { - return __of_reset_control_get(node, id, 0, RESET_CONTROL_EXCLUSIVE); + return __fwnode_reset_control_get(of_fwnode_handle(node), id, 0, + RESET_CONTROL_EXCLUSIVE); } /** @@ -520,7 +524,8 @@ static inline struct reset_control *of_reset_control_get_exclusive( static inline struct reset_control *of_reset_control_get_optional_exclusive( struct device_node *node, const char *id) { - return __of_reset_control_get(node, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE); + return __fwnode_reset_control_get(of_fwnode_handle(node), id, 0, + RESET_CONTROL_OPTIONAL_EXCLUSIVE); } /** @@ -545,7 +550,8 @@ static inline struct reset_control *of_reset_control_get_optional_exclusive( static inline struct reset_control *of_reset_control_get_shared( struct device_node *node, const char *id) { - return __of_reset_control_get(node, id, 0, RESET_CONTROL_SHARED); + return __fwnode_reset_control_get(of_fwnode_handle(node), id, 0, + RESET_CONTROL_SHARED); } /** @@ -562,7 +568,8 @@ static inline struct reset_control *of_reset_control_get_shared( static inline struct reset_control *of_reset_control_get_exclusive_by_index( struct device_node *node, int index) { - return __of_reset_control_get(node, NULL, index, RESET_CONTROL_EXCLUSIVE); + return __fwnode_reset_control_get(of_fwnode_handle(node), NULL, index, + RESET_CONTROL_EXCLUSIVE); } /** @@ -590,7 +597,8 @@ static inline struct reset_control *of_reset_control_get_exclusive_by_index( static inline struct reset_control *of_reset_control_get_shared_by_index( struct device_node *node, int index) { - return __of_reset_control_get(node, NULL, index, RESET_CONTROL_SHARED); + return __fwnode_reset_control_get(of_fwnode_handle(node), NULL, index, + RESET_CONTROL_SHARED); } /** @@ -1032,30 +1040,35 @@ devm_reset_control_array_get_optional_shared(struct device *dev) static inline struct reset_control * of_reset_control_array_get_exclusive(struct device_node *node) { - return of_reset_control_array_get(node, RESET_CONTROL_EXCLUSIVE); + return fwnode_reset_control_array_get(of_fwnode_handle(node), + RESET_CONTROL_EXCLUSIVE); } static inline struct reset_control * of_reset_control_array_get_exclusive_released(struct device_node *node) { - return of_reset_control_array_get(node, RESET_CONTROL_EXCLUSIVE_RELEASED); + return fwnode_reset_control_array_get(of_fwnode_handle(node), + RESET_CONTROL_EXCLUSIVE_RELEASED); } static inline struct reset_control * of_reset_control_array_get_shared(struct device_node *node) { - return of_reset_control_array_get(node, RESET_CONTROL_SHARED); + return fwnode_reset_control_array_get(of_fwnode_handle(node), + RESET_CONTROL_SHARED); } static inline struct reset_control * of_reset_control_array_get_optional_exclusive(struct device_node *node) { - return of_reset_control_array_get(node, RESET_CONTROL_OPTIONAL_EXCLUSIVE); + return fwnode_reset_control_array_get(of_fwnode_handle(node), + RESET_CONTROL_OPTIONAL_EXCLUSIVE); } static inline struct reset_control * of_reset_control_array_get_optional_shared(struct device_node *node) { - return of_reset_control_array_get(node, RESET_CONTROL_OPTIONAL_SHARED); + return fwnode_reset_control_array_get(of_fwnode_handle(node), + RESET_CONTROL_OPTIONAL_SHARED); } #endif diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 133ccb39137a..0480509a6339 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -129,10 +129,10 @@ static __always_inline unsigned int rht_key_get_hash(struct rhashtable *ht, unsigned int hash; /* params must be equal to ht->p if it isn't constant. */ - if (!__builtin_constant_p(params.key_len)) + if (!__builtin_constant_p(params.key_len)) { hash = ht->p.hashfn(key, ht->key_len, hash_rnd); - else if (params.key_len) { - unsigned int key_len = params.key_len; + } else { + unsigned int key_len = params.key_len ? : ht->p.key_len; if (params.hashfn) hash = params.hashfn(key, key_len, hash_rnd); @@ -140,13 +140,6 @@ static __always_inline unsigned int rht_key_get_hash(struct rhashtable *ht, hash = jhash(key, key_len, hash_rnd); else hash = jhash2(key, key_len / sizeof(u32), hash_rnd); - } else { - unsigned int key_len = ht->p.key_len; - - if (params.hashfn) - hash = params.hashfn(key, key_len, hash_rnd); - else - hash = jhash(key, key_len, hash_rnd); } return hash; diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 876358cfe1b1..994f52b34344 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -248,6 +248,65 @@ int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node); int ring_buffer_map(struct trace_buffer *buffer, int cpu, struct vm_area_struct *vma); +void ring_buffer_map_dup(struct trace_buffer *buffer, int cpu); int ring_buffer_unmap(struct trace_buffer *buffer, int cpu); int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu); + +struct ring_buffer_desc { + int cpu; + unsigned int nr_page_va; /* excludes the meta page */ + unsigned long meta_va; + unsigned long page_va[] __counted_by(nr_page_va); +}; + +struct trace_buffer_desc { + int nr_cpus; + size_t struct_len; + char __data[]; /* list of ring_buffer_desc */ +}; + +static inline struct ring_buffer_desc *__next_ring_buffer_desc(struct ring_buffer_desc *desc) +{ + size_t len = struct_size(desc, page_va, desc->nr_page_va); + + return (struct ring_buffer_desc *)((void *)desc + len); +} + +static inline struct ring_buffer_desc *__first_ring_buffer_desc(struct trace_buffer_desc *desc) +{ + return (struct ring_buffer_desc *)(&desc->__data[0]); +} + +static inline size_t trace_buffer_desc_size(size_t buffer_size, unsigned int nr_cpus) +{ + unsigned int nr_pages = max(DIV_ROUND_UP(buffer_size, PAGE_SIZE), 2UL) + 1; + struct ring_buffer_desc *rbdesc; + + return size_add(offsetof(struct trace_buffer_desc, __data), + size_mul(nr_cpus, struct_size(rbdesc, page_va, nr_pages))); +} + +#define for_each_ring_buffer_desc(__pdesc, __cpu, __trace_pdesc) \ + for (__pdesc = __first_ring_buffer_desc(__trace_pdesc), __cpu = 0; \ + (__cpu) < (__trace_pdesc)->nr_cpus; \ + (__cpu)++, __pdesc = __next_ring_buffer_desc(__pdesc)) + +struct ring_buffer_remote { + struct trace_buffer_desc *desc; + int (*swap_reader_page)(unsigned int cpu, void *priv); + int (*reset)(unsigned int cpu, void *priv); + void *priv; +}; + +int ring_buffer_poll_remote(struct trace_buffer *buffer, int cpu); + +struct trace_buffer * +__ring_buffer_alloc_remote(struct ring_buffer_remote *remote, + struct lock_class_key *key); + +#define ring_buffer_alloc_remote(remote) \ +({ \ + static struct lock_class_key __key; \ + __ring_buffer_alloc_remote(remote, &__key); \ +}) #endif /* _LINUX_RING_BUFFER_H */ diff --git a/include/linux/ring_buffer_types.h b/include/linux/ring_buffer_types.h new file mode 100644 index 000000000000..54577021a49d --- /dev/null +++ b/include/linux/ring_buffer_types.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_RING_BUFFER_TYPES_H +#define _LINUX_RING_BUFFER_TYPES_H + +#include <asm/local.h> + +#define TS_SHIFT 27 +#define TS_MASK ((1ULL << TS_SHIFT) - 1) +#define TS_DELTA_TEST (~TS_MASK) + +/* + * We need to fit the time_stamp delta into 27 bits. + */ +static inline bool test_time_stamp(u64 delta) +{ + return !!(delta & TS_DELTA_TEST); +} + +#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data) + +#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) +#define RB_ALIGNMENT 4U +#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) +#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ + +#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS +# define RB_FORCE_8BYTE_ALIGNMENT 0 +# define RB_ARCH_ALIGNMENT RB_ALIGNMENT +#else +# define RB_FORCE_8BYTE_ALIGNMENT 1 +# define RB_ARCH_ALIGNMENT 8U +#endif + +#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT) + +struct buffer_data_page { + u64 time_stamp; /* page time stamp */ + local_t commit; /* write committed index */ + unsigned char data[] RB_ALIGN_DATA; /* data of buffer page */ +}; +#endif diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index fb7ab9165645..83266ce14642 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -182,11 +182,11 @@ struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *, rpmsg_rx_cb_t cb, void *priv, struct rpmsg_channel_info chinfo); -int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len); -int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst); +int rpmsg_send(struct rpmsg_endpoint *ept, const void *data, int len); +int rpmsg_sendto(struct rpmsg_endpoint *ept, const void *data, int len, u32 dst); -int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len); -int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst); +int rpmsg_trysend(struct rpmsg_endpoint *ept, const void *data, int len); +int rpmsg_trysendto(struct rpmsg_endpoint *ept, const void *data, int len, u32 dst); __poll_t rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp, poll_table *wait); @@ -249,7 +249,7 @@ static inline struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *rpdev return NULL; } -static inline int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len) +static inline int rpmsg_send(struct rpmsg_endpoint *ept, const void *data, int len) { /* This shouldn't be possible */ WARN_ON(1); @@ -257,7 +257,7 @@ static inline int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len) return -ENXIO; } -static inline int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len, +static inline int rpmsg_sendto(struct rpmsg_endpoint *ept, const void *data, int len, u32 dst) { /* This shouldn't be possible */ @@ -267,7 +267,8 @@ static inline int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len, } -static inline int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len) +static inline int rpmsg_trysend(struct rpmsg_endpoint *ept, const void *data, + int len) { /* This shouldn't be possible */ WARN_ON(1); @@ -275,7 +276,7 @@ static inline int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len) return -ENXIO; } -static inline int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, +static inline int rpmsg_trysendto(struct rpmsg_endpoint *ept, const void *data, int len, u32 dst) { /* This shouldn't be possible */ diff --git a/include/linux/rpmsg/mtk_rpmsg.h b/include/linux/rpmsg/mtk_rpmsg.h index 363b60178040..badcbc89917f 100644 --- a/include/linux/rpmsg/mtk_rpmsg.h +++ b/include/linux/rpmsg/mtk_rpmsg.h @@ -25,7 +25,7 @@ struct mtk_rpmsg_info { ipi_handler_t handler, void *priv); void (*unregister_ipi)(struct platform_device *pdev, u32 id); int (*send_ipi)(struct platform_device *pdev, u32 id, - void *buf, unsigned int len, unsigned int wait); + const void *buf, unsigned int len, unsigned int wait); int ns_ipi_id; }; diff --git a/include/linux/rseq.h b/include/linux/rseq.h index 7a01a0760405..b9d62fc2140d 100644 --- a/include/linux/rseq.h +++ b/include/linux/rseq.h @@ -146,6 +146,18 @@ static inline void rseq_fork(struct task_struct *t, u64 clone_flags) t->rseq = current->rseq; } +/* + * Value returned by getauxval(AT_RSEQ_ALIGN) and expected by rseq + * registration. This is the active rseq area size rounded up to next + * power of 2, which guarantees that the rseq structure will always be + * aligned on the nearest power of two large enough to contain it, even + * as it grows. + */ +static inline unsigned int rseq_alloc_align(void) +{ + return 1U << get_count_order(offsetof(struct rseq, end)); +} + #else /* CONFIG_RSEQ */ static inline void rseq_handle_slowpath(struct pt_regs *regs) { } static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { } diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h index cbc4a791618b..f11ebd34f8b9 100644 --- a/include/linux/rseq_entry.h +++ b/include/linux/rseq_entry.h @@ -40,6 +40,7 @@ DECLARE_PER_CPU(struct rseq_stats, rseq_stats); #endif /* !CONFIG_RSEQ_STATS */ #ifdef CONFIG_RSEQ +#include <linux/hrtimer_rearm.h> #include <linux/jump_label.h> #include <linux/rseq.h> #include <linux/sched/signal.h> @@ -110,7 +111,7 @@ static __always_inline void rseq_slice_clear_grant(struct task_struct *t) t->rseq.slice.state.granted = false; } -static __always_inline bool rseq_grant_slice_extension(bool work_pending) +static __always_inline bool __rseq_grant_slice_extension(bool work_pending) { struct task_struct *curr = current; struct rseq_slice_ctrl usr_ctrl; @@ -215,11 +216,20 @@ efault: return false; } +static __always_inline bool rseq_grant_slice_extension(unsigned long ti_work, unsigned long mask) +{ + if (unlikely(__rseq_grant_slice_extension(ti_work & mask))) { + hrtimer_rearm_deferred_tif(ti_work); + return true; + } + return false; +} + #else /* CONFIG_RSEQ_SLICE_EXTENSION */ -static inline bool rseq_slice_extension_enabled(void) { return false; } -static inline bool rseq_arm_slice_extension_timer(void) { return false; } -static inline void rseq_slice_clear_grant(struct task_struct *t) { } -static inline bool rseq_grant_slice_extension(bool work_pending) { return false; } +static __always_inline bool rseq_slice_extension_enabled(void) { return false; } +static __always_inline bool rseq_arm_slice_extension_timer(void) { return false; } +static __always_inline void rseq_slice_clear_grant(struct task_struct *t) { } +static __always_inline bool rseq_grant_slice_extension(unsigned long ti_work, unsigned long mask) { return false; } #endif /* !CONFIG_RSEQ_SLICE_EXTENSION */ bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr); @@ -778,7 +788,7 @@ static inline void rseq_syscall_exit_to_user_mode(void) { } static inline void rseq_irqentry_exit_to_user_mode(void) { } static inline void rseq_exit_to_user_mode_legacy(void) { } static inline void rseq_debug_syscall_return(struct pt_regs *regs) { } -static inline bool rseq_grant_slice_extension(bool work_pending) { return false; } +static inline bool rseq_grant_slice_extension(unsigned long ti_work, unsigned long mask) { return false; } #endif /* !CONFIG_RSEQ */ #endif /* _LINUX_RSEQ_ENTRY_H */ diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h index da5fa6f40294..0b42045988db 100644 --- a/include/linux/rseq_types.h +++ b/include/linux/rseq_types.h @@ -133,10 +133,12 @@ struct rseq_data { }; * @active: MM CID is active for the task * @cid: The CID associated to the task either permanently or * borrowed from the CPU + * @node: Queued in the per MM MMCID list */ struct sched_mm_cid { unsigned int active; unsigned int cid; + struct hlist_node node; }; /** @@ -157,6 +159,7 @@ struct mm_cid_pcpu { * @work: Regular work to handle the affinity mode change case * @lock: Spinlock to protect against affinity setting which can't take @mutex * @mutex: Mutex to serialize forks and exits related to this mm + * @user_list: List of the MM CID users of a MM * @nr_cpus_allowed: The number of CPUs in the per MM allowed CPUs map. The map * is growth only. * @users: The number of tasks sharing this MM. Separate from mm::mm_users @@ -177,13 +180,14 @@ struct mm_mm_cid { raw_spinlock_t lock; struct mutex mutex; + struct hlist_head user_list; /* Low frequency modified */ unsigned int nr_cpus_allowed; unsigned int users; unsigned int pcpu_thrs; unsigned int update_deferred; -}____cacheline_aligned_in_smp; +} ____cacheline_aligned; #else /* CONFIG_SCHED_MM_CID */ struct mm_mm_cid { }; struct sched_mm_cid { }; diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index ede4c6bf6f22..78e7e588817c 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -22,8 +22,8 @@ extern int max_lock_depth; struct rt_mutex_base { raw_spinlock_t wait_lock; - struct rb_root_cached waiters; - struct task_struct *owner; + struct rb_root_cached waiters __guarded_by(&wait_lock); + struct task_struct *owner __guarded_by(&wait_lock); }; #define __RT_MUTEX_BASE_INITIALIZER(rtbasename) \ @@ -41,7 +41,7 @@ struct rt_mutex_base { */ static inline bool rt_mutex_base_is_locked(struct rt_mutex_base *lock) { - return READ_ONCE(lock->owner) != NULL; + return data_race(READ_ONCE(lock->owner) != NULL); } #ifdef CONFIG_RT_MUTEXES @@ -49,7 +49,7 @@ static inline bool rt_mutex_base_is_locked(struct rt_mutex_base *lock) static inline struct task_struct *rt_mutex_owner(struct rt_mutex_base *lock) { - unsigned long owner = (unsigned long) READ_ONCE(lock->owner); + unsigned long owner = (unsigned long) data_race(READ_ONCE(lock->owner)); return (struct task_struct *) (owner & ~RT_MUTEX_HAS_WAITERS); } diff --git a/include/linux/rv.h b/include/linux/rv.h index 58774eb3aecf..541ba404926a 100644 --- a/include/linux/rv.h +++ b/include/linux/rv.h @@ -13,6 +13,7 @@ #define RV_MON_GLOBAL 0 #define RV_MON_PER_CPU 1 #define RV_MON_PER_TASK 2 +#define RV_MON_PER_OBJ 3 #ifdef CONFIG_RV #include <linux/array_size.h> @@ -81,11 +82,49 @@ struct ltl_monitor {}; #endif /* CONFIG_RV_LTL_MONITOR */ +#ifdef CONFIG_RV_HA_MONITOR +/* + * In the future, hybrid automata may rely on multiple + * environment variables, e.g. different clocks started at + * different times or running at different speed. + * For now we support only 1 variable. + */ +#define MAX_HA_ENV_LEN 1 + +/* + * Monitors can pick the preferred timer implementation: + * No timer: if monitors don't have state invariants. + * Timer wheel: lightweight invariants check but far less precise. + * Hrtimer: accurate invariants check with higher overhead. + */ +#define HA_TIMER_NONE 0 +#define HA_TIMER_WHEEL 1 +#define HA_TIMER_HRTIMER 2 + +/* + * Hybrid automaton per-object variables. + */ +struct ha_monitor { + struct da_monitor da_mon; + u64 env_store[MAX_HA_ENV_LEN]; + union { + struct hrtimer hrtimer; + struct timer_list timer; + }; +}; + +#else + +struct ha_monitor { }; + +#endif /* CONFIG_RV_HA_MONITOR */ + #define RV_PER_TASK_MONITOR_INIT (CONFIG_RV_PER_TASK_MONITORS) union rv_task_monitor { struct da_monitor da_mon; struct ltl_monitor ltl_mon; + struct ha_monitor ha_mon; }; #ifdef CONFIG_RV_REACTORS diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h index 3390d21c95dd..4e67cd934d8f 100644 --- a/include/linux/rwlock.h +++ b/include/linux/rwlock.h @@ -30,17 +30,27 @@ do { \ #ifdef CONFIG_DEBUG_SPINLOCK extern void do_raw_read_lock(rwlock_t *lock) __acquires_shared(lock); - extern int do_raw_read_trylock(rwlock_t *lock); + extern int do_raw_read_trylock(rwlock_t *lock) __cond_acquires_shared(true, lock); extern void do_raw_read_unlock(rwlock_t *lock) __releases_shared(lock); extern void do_raw_write_lock(rwlock_t *lock) __acquires(lock); - extern int do_raw_write_trylock(rwlock_t *lock); +extern int do_raw_write_trylock(rwlock_t *lock) __cond_acquires(true, lock); extern void do_raw_write_unlock(rwlock_t *lock) __releases(lock); #else # define do_raw_read_lock(rwlock) do {__acquire_shared(lock); arch_read_lock(&(rwlock)->raw_lock); } while (0) -# define do_raw_read_trylock(rwlock) arch_read_trylock(&(rwlock)->raw_lock) +static inline int do_raw_read_trylock(rwlock_t *rwlock) + __cond_acquires_shared(true, rwlock) + __no_context_analysis +{ + return arch_read_trylock(&(rwlock)->raw_lock); +} # define do_raw_read_unlock(rwlock) do {arch_read_unlock(&(rwlock)->raw_lock); __release_shared(lock); } while (0) # define do_raw_write_lock(rwlock) do {__acquire(lock); arch_write_lock(&(rwlock)->raw_lock); } while (0) -# define do_raw_write_trylock(rwlock) arch_write_trylock(&(rwlock)->raw_lock) +static inline int do_raw_write_trylock(rwlock_t *rwlock) + __cond_acquires(true, rwlock) + __no_context_analysis +{ + return arch_write_trylock(&(rwlock)->raw_lock); +} # define do_raw_write_unlock(rwlock) do {arch_write_unlock(&(rwlock)->raw_lock); __release(lock); } while (0) #endif diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h index 61a852609eab..9e02a5f28cd1 100644 --- a/include/linux/rwlock_api_smp.h +++ b/include/linux/rwlock_api_smp.h @@ -23,7 +23,7 @@ void __lockfunc _raw_write_lock_bh(rwlock_t *lock) __acquires(lock); void __lockfunc _raw_read_lock_irq(rwlock_t *lock) __acquires_shared(lock); void __lockfunc _raw_write_lock_irq(rwlock_t *lock) __acquires(lock); unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock) - __acquires(lock); + __acquires_shared(lock); unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock) __acquires(lock); int __lockfunc _raw_read_trylock(rwlock_t *lock) __cond_acquires_shared(true, lock); @@ -36,7 +36,7 @@ void __lockfunc _raw_read_unlock_irq(rwlock_t *lock) __releases_shared(lock); void __lockfunc _raw_write_unlock_irq(rwlock_t *lock) __releases(lock); void __lockfunc _raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) - __releases(lock); + __releases_shared(lock); void __lockfunc _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) __releases(lock); @@ -116,6 +116,7 @@ _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) #endif static inline int __raw_read_trylock(rwlock_t *lock) + __cond_acquires_shared(true, lock) { preempt_disable(); if (do_raw_read_trylock(lock)) { @@ -127,6 +128,7 @@ static inline int __raw_read_trylock(rwlock_t *lock) } static inline int __raw_write_trylock(rwlock_t *lock) + __cond_acquires(true, lock) { preempt_disable(); if (do_raw_write_trylock(lock)) { diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 9bf1d93d3d7b..6a1a7bae5f81 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -57,7 +57,7 @@ context_lock_struct(rw_semaphore) { struct optimistic_spin_queue osq; /* spinner MCS lock */ #endif raw_spinlock_t wait_lock; - struct list_head wait_list; + struct rwsem_waiter *first_waiter __guarded_by(&wait_lock); #ifdef CONFIG_DEBUG_RWSEMS void *magic; #endif @@ -106,7 +106,7 @@ static inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore * .owner = ATOMIC_LONG_INIT(0), \ __RWSEM_OPT_INIT(name) \ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),\ - .wait_list = LIST_HEAD_INIT((name).wait_list), \ + .first_waiter = NULL, \ __RWSEM_DEBUG_INIT(name) \ __RWSEM_DEP_MAP_INIT(name) } @@ -129,9 +129,9 @@ do { \ * rwsem to see if somebody from an incompatible type is wanting access to the * lock. */ -static inline int rwsem_is_contended(struct rw_semaphore *sem) +static inline bool rwsem_is_contended(struct rw_semaphore *sem) { - return !list_empty(&sem->wait_list); + return data_race(sem->first_waiter != NULL); } #if defined(CONFIG_DEBUG_RWSEMS) || defined(CONFIG_DETECT_HUNG_TASK_BLOCKER) diff --git a/include/linux/sched.h b/include/linux/sched.h index 074ad4ef3d81..004e6d56a499 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -579,6 +579,7 @@ struct sched_entity { u64 deadline; u64 min_vruntime; u64 min_slice; + u64 max_slice; struct list_head group_node; unsigned char on_rq; @@ -948,6 +949,10 @@ struct task_struct { struct srcu_ctr __percpu *trc_reader_scp; #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ +#ifdef CONFIG_TRIVIAL_PREEMPT_RCU + int rcu_trivial_preempt_nesting; +#endif /* #ifdef CONFIG_TRIVIAL_PREEMPT_RCU */ + struct sched_info sched_info; struct list_head tasks; @@ -1158,12 +1163,9 @@ struct task_struct { /* * executable name, excluding path. * - * - normally initialized begin_new_exec() - * - set it with set_task_comm() - * - strscpy_pad() to ensure it is always NUL-terminated and - * zero-padded - * - task_lock() to ensure the operation is atomic and the name is - * fully updated. + * - normally initialized by begin_new_exec() + * - set it with set_task_comm() to ensure it is always + * NUL-terminated and zero-padded */ char comm[TASK_COMM_LEN]; @@ -1237,6 +1239,7 @@ struct task_struct { #endif struct mutex *blocked_on; /* lock we're blocked on */ + raw_spinlock_t blocked_lock; #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER /* @@ -2178,61 +2181,85 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock) __must_hold(lock); }) #ifndef CONFIG_PREEMPT_RT + +/* + * With proxy exec, if a task has been proxy-migrated, it may be a donor + * on a cpu that it can't actually run on. Thus we need a special state + * to denote that the task is being woken, but that it needs to be + * evaluated for return-migration before it is run. So if the task is + * blocked_on PROXY_WAKING, return migrate it before running it. + */ +#define PROXY_WAKING ((struct mutex *)(-1L)) + static inline struct mutex *__get_task_blocked_on(struct task_struct *p) { - struct mutex *m = p->blocked_on; - - if (m) - lockdep_assert_held_once(&m->wait_lock); - return m; + lockdep_assert_held_once(&p->blocked_lock); + return p->blocked_on == PROXY_WAKING ? NULL : p->blocked_on; } static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m) { - struct mutex *blocked_on = READ_ONCE(p->blocked_on); - WARN_ON_ONCE(!m); /* The task should only be setting itself as blocked */ WARN_ON_ONCE(p != current); - /* Currently we serialize blocked_on under the mutex::wait_lock */ - lockdep_assert_held_once(&m->wait_lock); + /* Currently we serialize blocked_on under the task::blocked_lock */ + lockdep_assert_held_once(&p->blocked_lock); /* * Check ensure we don't overwrite existing mutex value * with a different mutex. Note, setting it to the same * lock repeatedly is ok. */ - WARN_ON_ONCE(blocked_on && blocked_on != m); - WRITE_ONCE(p->blocked_on, m); + WARN_ON_ONCE(p->blocked_on && p->blocked_on != m); + p->blocked_on = m; } -static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m) +static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *m) { - guard(raw_spinlock_irqsave)(&m->wait_lock); - __set_task_blocked_on(p, m); + /* Currently we serialize blocked_on under the task::blocked_lock */ + lockdep_assert_held_once(&p->blocked_lock); + /* + * There may be cases where we re-clear already cleared + * blocked_on relationships, but make sure we are not + * clearing the relationship with a different lock. + */ + WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m && p->blocked_on != PROXY_WAKING); + p->blocked_on = NULL; } -static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *m) +static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m) { - if (m) { - struct mutex *blocked_on = READ_ONCE(p->blocked_on); + guard(raw_spinlock_irqsave)(&p->blocked_lock); + __clear_task_blocked_on(p, m); +} - /* Currently we serialize blocked_on under the mutex::wait_lock */ - lockdep_assert_held_once(&m->wait_lock); - /* - * There may be cases where we re-clear already cleared - * blocked_on relationships, but make sure we are not - * clearing the relationship with a different lock. - */ - WARN_ON_ONCE(blocked_on && blocked_on != m); +static inline void __set_task_blocked_on_waking(struct task_struct *p, struct mutex *m) +{ + /* Currently we serialize blocked_on under the task::blocked_lock */ + lockdep_assert_held_once(&p->blocked_lock); + + if (!sched_proxy_exec()) { + __clear_task_blocked_on(p, m); + return; } - WRITE_ONCE(p->blocked_on, NULL); + + /* Don't set PROXY_WAKING if blocked_on was already cleared */ + if (!p->blocked_on) + return; + /* + * There may be cases where we set PROXY_WAKING on tasks that were + * already set to waking, but make sure we are not changing + * the relationship with a different lock. + */ + WARN_ON_ONCE(m && p->blocked_on != m && p->blocked_on != PROXY_WAKING); + p->blocked_on = PROXY_WAKING; } -static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m) +static inline void set_task_blocked_on_waking(struct task_struct *p, struct mutex *m) { - guard(raw_spinlock_irqsave)(&m->wait_lock); - __clear_task_blocked_on(p, m); + guard(raw_spinlock_irqsave)(&p->blocked_lock); + __set_task_blocked_on_waking(p, m); } + #else static inline void __clear_task_blocked_on(struct task_struct *p, struct rt_mutex *m) { @@ -2241,6 +2268,14 @@ static inline void __clear_task_blocked_on(struct task_struct *p, struct rt_mute static inline void clear_task_blocked_on(struct task_struct *p, struct rt_mutex *m) { } + +static inline void __set_task_blocked_on_waking(struct task_struct *p, struct rt_mutex *m) +{ +} + +static inline void set_task_blocked_on_waking(struct task_struct *p, struct rt_mutex *m) +{ +} #endif /* !CONFIG_PREEMPT_RT */ static __always_inline bool need_resched(void) @@ -2353,7 +2388,6 @@ static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct allo #ifdef CONFIG_SCHED_MM_CID void sched_mm_cid_before_execve(struct task_struct *t); void sched_mm_cid_after_execve(struct task_struct *t); -void sched_mm_cid_fork(struct task_struct *t); void sched_mm_cid_exit(struct task_struct *t); static __always_inline int task_mm_cid(struct task_struct *t) { @@ -2362,7 +2396,6 @@ static __always_inline int task_mm_cid(struct task_struct *t) #else static inline void sched_mm_cid_before_execve(struct task_struct *t) { } static inline void sched_mm_cid_after_execve(struct task_struct *t) { } -static inline void sched_mm_cid_fork(struct task_struct *t) { } static inline void sched_mm_cid_exit(struct task_struct *t) { } static __always_inline int task_mm_cid(struct task_struct *t) { diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index c40115d4e34d..1198138cb839 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -37,4 +37,31 @@ extern void dl_clear_root_domain_cpu(int cpu); extern u64 dl_cookie; extern bool dl_bw_visited(int cpu, u64 cookie); +static inline bool dl_server(struct sched_dl_entity *dl_se) +{ + return dl_se->dl_server; +} + +static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) +{ + BUG_ON(dl_server(dl_se)); + return container_of(dl_se, struct task_struct, dl); +} + +/* + * Regarding the deadline, a task with implicit deadline has a relative + * deadline == relative period. A task with constrained deadline has a + * relative deadline <= relative period. + * + * We support constrained deadline tasks. However, there are some restrictions + * applied only for tasks which do not have an implicit deadline. See + * update_dl_entity() to know more about such restrictions. + * + * The dl_is_implicit() returns true if the task has an implicit deadline. + */ +static inline bool dl_is_implicit(struct sched_dl_entity *dl_se) +{ + return dl_se->dl_deadline == dl_se->dl_period; +} + #endif /* _LINUX_SCHED_DEADLINE_H */ diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index bcb962d5ee7d..1a3af2ea2a79 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -62,6 +62,16 @@ enum scx_dsq_id_flags { SCX_DSQ_LOCAL_CPU_MASK = 0xffffffffLLU, }; +struct scx_deferred_reenq_user { + struct list_head node; + u64 flags; +}; + +struct scx_dsq_pcpu { + struct scx_dispatch_q *dsq; + struct scx_deferred_reenq_user deferred_reenq_user; +}; + /* * A dispatch queue (DSQ) can be either a FIFO or p->scx.dsq_vtime ordered * queue. A built-in DSQ is always a FIFO. The built-in local DSQs are used to @@ -78,30 +88,58 @@ struct scx_dispatch_q { u64 id; struct rhash_head hash_node; struct llist_node free_node; + struct scx_sched *sched; + struct scx_dsq_pcpu __percpu *pcpu; struct rcu_head rcu; }; -/* scx_entity.flags */ +/* sched_ext_entity.flags */ enum scx_ent_flags { SCX_TASK_QUEUED = 1 << 0, /* on ext runqueue */ + SCX_TASK_IN_CUSTODY = 1 << 1, /* in custody, needs ops.dequeue() when leaving */ SCX_TASK_RESET_RUNNABLE_AT = 1 << 2, /* runnable_at should be reset */ SCX_TASK_DEQD_FOR_SLEEP = 1 << 3, /* last dequeue was for SLEEP */ + SCX_TASK_SUB_INIT = 1 << 4, /* task being initialized for a sub sched */ + SCX_TASK_IMMED = 1 << 5, /* task is on local DSQ with %SCX_ENQ_IMMED */ - SCX_TASK_STATE_SHIFT = 8, /* bit 8 and 9 are used to carry scx_task_state */ + /* + * Bits 8 and 9 are used to carry task state: + * + * NONE ops.init_task() not called yet + * INIT ops.init_task() succeeded, but task can be cancelled + * READY fully initialized, but not in sched_ext + * ENABLED fully initialized and in sched_ext + */ + SCX_TASK_STATE_SHIFT = 8, /* bits 8 and 9 are used to carry task state */ SCX_TASK_STATE_BITS = 2, SCX_TASK_STATE_MASK = ((1 << SCX_TASK_STATE_BITS) - 1) << SCX_TASK_STATE_SHIFT, - SCX_TASK_CURSOR = 1 << 31, /* iteration cursor, not a task */ -}; + SCX_TASK_NONE = 0 << SCX_TASK_STATE_SHIFT, + SCX_TASK_INIT = 1 << SCX_TASK_STATE_SHIFT, + SCX_TASK_READY = 2 << SCX_TASK_STATE_SHIFT, + SCX_TASK_ENABLED = 3 << SCX_TASK_STATE_SHIFT, -/* scx_entity.flags & SCX_TASK_STATE_MASK */ -enum scx_task_state { - SCX_TASK_NONE, /* ops.init_task() not called yet */ - SCX_TASK_INIT, /* ops.init_task() succeeded, but task can be cancelled */ - SCX_TASK_READY, /* fully initialized, but not in sched_ext */ - SCX_TASK_ENABLED, /* fully initialized and in sched_ext */ + /* + * Bits 12 and 13 are used to carry reenqueue reason. In addition to + * %SCX_ENQ_REENQ flag, ops.enqueue() can also test for + * %SCX_TASK_REENQ_REASON_NONE to distinguish reenqueues. + * + * NONE not being reenqueued + * KFUNC reenqueued by scx_bpf_dsq_reenq() and friends + * IMMED reenqueued due to failed ENQ_IMMED + * PREEMPTED preempted while running + */ + SCX_TASK_REENQ_REASON_SHIFT = 12, + SCX_TASK_REENQ_REASON_BITS = 2, + SCX_TASK_REENQ_REASON_MASK = ((1 << SCX_TASK_REENQ_REASON_BITS) - 1) << SCX_TASK_REENQ_REASON_SHIFT, + + SCX_TASK_REENQ_NONE = 0 << SCX_TASK_REENQ_REASON_SHIFT, + SCX_TASK_REENQ_KFUNC = 1 << SCX_TASK_REENQ_REASON_SHIFT, + SCX_TASK_REENQ_IMMED = 2 << SCX_TASK_REENQ_REASON_SHIFT, + SCX_TASK_REENQ_PREEMPTED = 3 << SCX_TASK_REENQ_REASON_SHIFT, - SCX_TASK_NR_STATES, + /* iteration cursor, not a task */ + SCX_TASK_CURSOR = 1 << 31, }; /* scx_entity.dsq_flags */ @@ -109,33 +147,6 @@ enum scx_ent_dsq_flags { SCX_TASK_DSQ_ON_PRIQ = 1 << 0, /* task is queued on the priority queue of a dsq */ }; -/* - * Mask bits for scx_entity.kf_mask. Not all kfuncs can be called from - * everywhere and the following bits track which kfunc sets are currently - * allowed for %current. This simple per-task tracking works because SCX ops - * nest in a limited way. BPF will likely implement a way to allow and disallow - * kfuncs depending on the calling context which will replace this manual - * mechanism. See scx_kf_allow(). - */ -enum scx_kf_mask { - SCX_KF_UNLOCKED = 0, /* sleepable and not rq locked */ - /* ENQUEUE and DISPATCH may be nested inside CPU_RELEASE */ - SCX_KF_CPU_RELEASE = 1 << 0, /* ops.cpu_release() */ - /* - * ops.dispatch() may release rq lock temporarily and thus ENQUEUE and - * SELECT_CPU may be nested inside. ops.dequeue (in REST) may also be - * nested inside DISPATCH. - */ - SCX_KF_DISPATCH = 1 << 1, /* ops.dispatch() */ - SCX_KF_ENQUEUE = 1 << 2, /* ops.enqueue() and ops.select_cpu() */ - SCX_KF_SELECT_CPU = 1 << 3, /* ops.select_cpu() */ - SCX_KF_REST = 1 << 4, /* other rq-locked operations */ - - __SCX_KF_RQ_LOCKED = SCX_KF_CPU_RELEASE | SCX_KF_DISPATCH | - SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST, - __SCX_KF_TERMINAL = SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST, -}; - enum scx_dsq_lnode_flags { SCX_DSQ_LNODE_ITER_CURSOR = 1 << 0, @@ -149,19 +160,31 @@ struct scx_dsq_list_node { u32 priv; /* can be used by iter cursor */ }; -#define INIT_DSQ_LIST_CURSOR(__node, __flags, __priv) \ +#define INIT_DSQ_LIST_CURSOR(__cursor, __dsq, __flags) \ (struct scx_dsq_list_node) { \ - .node = LIST_HEAD_INIT((__node).node), \ + .node = LIST_HEAD_INIT((__cursor).node), \ .flags = SCX_DSQ_LNODE_ITER_CURSOR | (__flags), \ - .priv = (__priv), \ + .priv = READ_ONCE((__dsq)->seq), \ } +struct scx_sched; + /* * The following is embedded in task_struct and contains all fields necessary * for a task to be scheduled by SCX. */ struct sched_ext_entity { +#ifdef CONFIG_CGROUPS + /* + * Associated scx_sched. Updated either during fork or while holding + * both p->pi_lock and rq lock. + */ + struct scx_sched __rcu *sched; +#endif struct scx_dispatch_q *dsq; + atomic_long_t ops_state; + u64 ddsp_dsq_id; + u64 ddsp_enq_flags; struct scx_dsq_list_node dsq_list; /* dispatch order */ struct rb_node dsq_priq; /* p->scx.dsq_vtime order */ u32 dsq_seq; @@ -171,9 +194,7 @@ struct sched_ext_entity { s32 sticky_cpu; s32 holding_cpu; s32 selected_cpu; - u32 kf_mask; /* see scx_kf_mask above */ struct task_struct *kf_tasks[2]; /* see SCX_CALL_OP_TASK() */ - atomic_long_t ops_state; struct list_head runnable_node; /* rq->scx.runnable_list */ unsigned long runnable_at; @@ -181,8 +202,6 @@ struct sched_ext_entity { #ifdef CONFIG_SCHED_CORE u64 core_sched_at; /* see scx_prio_less() */ #endif - u64 ddsp_dsq_id; - u64 ddsp_enq_flags; /* BPF scheduler modifiable fields */ diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index a22248aebcf9..584ae88b435e 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -132,6 +132,7 @@ struct signal_struct { */ unsigned int is_child_subreaper:1; unsigned int has_child_subreaper:1; + unsigned int autoreap:1; #ifdef CONFIG_POSIX_TIMERS @@ -739,7 +740,7 @@ static inline int thread_group_empty(struct task_struct *p) extern struct sighand_struct *lock_task_sighand(struct task_struct *task, unsigned long *flags) - __acquires(&task->sighand->siglock); + __cond_acquires(nonnull, &task->sighand->siglock); static inline void unlock_task_sighand(struct task_struct *task, unsigned long *flags) diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 45c0022b91ce..36553e14866d 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -95,6 +95,7 @@ struct sched_domain { unsigned int newidle_call; unsigned int newidle_success; unsigned int newidle_ratio; + u64 newidle_stamp; u64 max_newidle_lb_cost; unsigned long last_decay_max_lb_cost; @@ -141,18 +142,30 @@ struct sched_domain { unsigned int span_weight; /* - * Span of all CPUs in this domain. + * See sched_domain_span(), on why flex arrays are broken. * - * NOTE: this field is variable length. (Allocated dynamically - * by attaching extra space to the end of the structure, - * depending on how many CPUs the kernel has booted up with) - */ unsigned long span[]; + */ }; static inline struct cpumask *sched_domain_span(struct sched_domain *sd) { - return to_cpumask(sd->span); + /* + * Turns out that C flexible arrays are fundamentally broken since it + * is allowed for offsetof(*sd, span) < sizeof(*sd), this means that + * structure initialzation *sd = { ... }; which writes every byte + * inside sizeof(*type), will over-write the start of the flexible + * array. + * + * Luckily, the way we allocate sched_domain is by: + * + * sizeof(*sd) + cpumask_size() + * + * this means that we have sufficient space for the whole flex array + * *outside* of sizeof(*sd). So use that, and avoid using sd->span. + */ + unsigned long *bitmap = (void *)sd + sizeof(*sd); + return to_cpumask(bitmap); } extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], @@ -171,7 +184,6 @@ typedef int (*sched_domain_flags_f)(void); struct sd_data { struct sched_domain *__percpu *sd; - struct sched_domain_shared *__percpu *sds; struct sched_group *__percpu *sg; struct sched_group_capacity *__percpu *sgc; }; diff --git a/include/linux/secure_boot.h b/include/linux/secure_boot.h new file mode 100644 index 000000000000..d17e92351567 --- /dev/null +++ b/include/linux/secure_boot.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2026 Red Hat, Inc. All Rights Reserved. + * + * Author: Coiby Xu <coxu@redhat.com> + */ + +#ifndef _LINUX_SECURE_BOOT_H +#define _LINUX_SECURE_BOOT_H + +#include <linux/types.h> + +#ifdef CONFIG_HAVE_ARCH_GET_SECUREBOOT +/* + * Returns true if the platform secure boot is enabled. + * Returns false if disabled or not supported. + */ +bool arch_get_secureboot(void); +#else +static inline bool arch_get_secureboot(void) { return false; } +#endif + +#endif /* _LINUX_SECURE_BOOT_H */ diff --git a/include/linux/security.h b/include/linux/security.h index 83a646d72f6f..41d7367cf403 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -145,6 +145,7 @@ enum lockdown_reason { LOCKDOWN_BPF_WRITE_USER, LOCKDOWN_DBG_WRITE_KERNEL, LOCKDOWN_RTAS_ERROR_INJECTION, + LOCKDOWN_XEN_USER_ACTIONS, LOCKDOWN_INTEGRITY_MAX, LOCKDOWN_KCORE, LOCKDOWN_KPROBES, @@ -471,11 +472,17 @@ int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); void security_file_release(struct file *file); void security_file_free(struct file *file); +int security_backing_file_alloc(struct file *backing_file, + const struct file *user_file); +void security_backing_file_free(struct file *backing_file); int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); int security_file_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg); int security_mmap_file(struct file *file, unsigned long prot, unsigned long flags); +int security_mmap_backing_file(struct vm_area_struct *vma, + struct file *backing_file, + struct file *user_file); int security_mmap_addr(unsigned long addr); int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); @@ -1140,6 +1147,15 @@ static inline void security_file_release(struct file *file) static inline void security_file_free(struct file *file) { } +static inline int security_backing_file_alloc(struct file *backing_file, + const struct file *user_file) +{ + return 0; +} + +static inline void security_backing_file_free(struct file *backing_file) +{ } + static inline int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -1159,6 +1175,13 @@ static inline int security_mmap_file(struct file *file, unsigned long prot, return 0; } +static inline int security_mmap_backing_file(struct vm_area_struct *vma, + struct file *backing_file, + struct file *user_file) +{ + return 0; +} + static inline int security_mmap_addr(unsigned long addr) { return cap_mmap_addr(addr); @@ -1931,6 +1954,17 @@ static inline int security_mptcp_add_subflow(struct sock *sk, struct sock *ssk) } #endif /* CONFIG_SECURITY_NETWORK */ +#if defined(CONFIG_SECURITY_NETWORK) && defined(CONFIG_SECURITY_PATH) + +int security_unix_find(const struct path *path, struct sock *other, int flags); + +#else /* CONFIG_SECURITY_NETWORK && CONFIG_SECURITY_PATH */ +static inline int security_unix_find(const struct path *path, struct sock *other, int flags) +{ + return 0; +} +#endif /* CONFIG_SECURITY_NETWORK && CONFIG_SECURITY_PATH */ + #ifdef CONFIG_SECURITY_INFINIBAND int security_ib_pkey_access(void *sec, u64 subnet_prefix, u16 pkey); int security_ib_endport_manage_subnet(void *sec, const char *name, u8 port_num); diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index 80f33a93f944..0630430cc01a 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -53,6 +53,11 @@ static inline bool is_sed_ioctl(unsigned int cmd) case IOC_OPAL_DISCOVERY: case IOC_OPAL_REVERT_LSP: case IOC_OPAL_SET_SID_PW: + case IOC_OPAL_REACTIVATE_LSP: + case IOC_OPAL_LR_SET_START_LEN: + case IOC_OPAL_ENABLE_DISABLE_LR: + case IOC_OPAL_GET_SUM_STATUS: + case IOC_OPAL_STACK_RESET: return true; } return false; diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h index 89706157e622..a4c8651ef021 100644 --- a/include/linux/semaphore.h +++ b/include/linux/semaphore.h @@ -15,7 +15,7 @@ struct semaphore { raw_spinlock_t lock; unsigned int count; - struct list_head wait_list; + struct semaphore_waiter *first_waiter; #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER unsigned long last_holder; @@ -33,7 +33,7 @@ struct semaphore { { \ .lock = __RAW_SPIN_LOCK_UNLOCKED((name).lock), \ .count = n, \ - .wait_list = LIST_HEAD_INIT((name).wait_list) \ + .first_waiter = NULL \ __LAST_HOLDER_SEMAPHORE_INITIALIZER \ } diff --git a/include/linux/serdev.h b/include/linux/serdev.h index 5654c58eb73c..188c0ba62d50 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -49,10 +49,7 @@ struct serdev_device { struct mutex write_lock; }; -static inline struct serdev_device *to_serdev_device(struct device *d) -{ - return container_of(d, struct serdev_device, dev); -} +#define to_serdev_device(d) container_of_const(d, struct serdev_device, dev) /** * struct serdev_device_driver - serdev slave device driver @@ -68,10 +65,7 @@ struct serdev_device_driver { void (*shutdown)(struct serdev_device *); }; -static inline struct serdev_device_driver *to_serdev_device_driver(struct device_driver *d) -{ - return container_of(d, struct serdev_device_driver, driver); -} +#define to_serdev_device_driver(d) container_of_const(d, struct serdev_device_driver, driver) enum serdev_parity { SERDEV_PARITY_NONE, @@ -112,10 +106,7 @@ struct serdev_controller { const struct serdev_controller_ops *ops; }; -static inline struct serdev_controller *to_serdev_controller(struct device *d) -{ - return container_of(d, struct serdev_controller, dev); -} +#define to_serdev_controller(d) container_of_const(d, struct serdev_controller, dev) static inline void *serdev_device_get_drvdata(const struct serdev_device *serdev) { @@ -343,4 +334,13 @@ static inline bool serdev_acpi_get_uart_resource(struct acpi_resource *ares, } #endif /* CONFIG_ACPI */ +#ifdef CONFIG_OF +struct serdev_controller *of_find_serdev_controller_by_node(struct device_node *node); +#else +static inline struct serdev_controller *of_find_serdev_controller_by_node(struct device_node *node) +{ + return NULL; +} +#endif /* CONFIG_OF */ + #endif /*_LINUX_SERDEV_H */ diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 01efdce0fda0..a95b2d143d24 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -195,6 +195,7 @@ void serial8250_do_set_mctrl(struct uart_port *port, unsigned int mctrl); void serial8250_do_set_divisor(struct uart_port *port, unsigned int baud, unsigned int quot); int fsl8250_handle_irq(struct uart_port *port); +void serial8250_handle_irq_locked(struct uart_port *port, unsigned int iir); int serial8250_handle_irq(struct uart_port *port, unsigned int iir); u16 serial8250_rx_chars(struct uart_8250_port *up, u16 lsr); void serial8250_read_char(struct uart_8250_port *up, u16 lsr); diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index a8273b32e041..f6a2d3402d76 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -48,7 +48,7 @@ struct shmem_inode_info { }; struct timespec64 i_crtime; /* file creation time */ struct shared_policy policy; /* NUMA memory alloc policy */ - struct simple_xattrs xattrs; /* list of xattrs */ + struct simple_xattrs *xattrs; /* list of xattrs */ pgoff_t fallocend; /* highest fallocate endindex */ unsigned int fsflags; /* for FS_IOC_[SG]ETFLAGS */ atomic_t stop_eviction; /* hold when working on inode */ diff --git a/include/linux/simple_ring_buffer.h b/include/linux/simple_ring_buffer.h new file mode 100644 index 000000000000..21aec556293e --- /dev/null +++ b/include/linux/simple_ring_buffer.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SIMPLE_RING_BUFFER_H +#define _LINUX_SIMPLE_RING_BUFFER_H + +#include <linux/list.h> +#include <linux/ring_buffer.h> +#include <linux/ring_buffer_types.h> +#include <linux/types.h> + +/* + * Ideally those struct would stay private but the caller needs to know + * the allocation size for simple_ring_buffer_init(). + */ +struct simple_buffer_page { + struct list_head link; + struct buffer_data_page *page; + u64 entries; + u32 write; + u32 id; +}; + +struct simple_rb_per_cpu { + struct simple_buffer_page *tail_page; + struct simple_buffer_page *reader_page; + struct simple_buffer_page *head_page; + struct simple_buffer_page *bpages; + struct trace_buffer_meta *meta; + u32 nr_pages; + +#define SIMPLE_RB_UNAVAILABLE 0 +#define SIMPLE_RB_READY 1 +#define SIMPLE_RB_WRITING 2 + u32 status; + + u64 last_overrun; + u64 write_stamp; + + struct simple_rb_cbs *cbs; +}; + +int simple_ring_buffer_init(struct simple_rb_per_cpu *cpu_buffer, struct simple_buffer_page *bpages, + const struct ring_buffer_desc *desc); + +void simple_ring_buffer_unload(struct simple_rb_per_cpu *cpu_buffer); + +void *simple_ring_buffer_reserve(struct simple_rb_per_cpu *cpu_buffer, unsigned long length, + u64 timestamp); + +void simple_ring_buffer_commit(struct simple_rb_per_cpu *cpu_buffer); + +int simple_ring_buffer_enable_tracing(struct simple_rb_per_cpu *cpu_buffer, bool enable); + +int simple_ring_buffer_reset(struct simple_rb_per_cpu *cpu_buffer); + +int simple_ring_buffer_swap_reader_page(struct simple_rb_per_cpu *cpu_buffer); + +int simple_ring_buffer_init_mm(struct simple_rb_per_cpu *cpu_buffer, + struct simple_buffer_page *bpages, + const struct ring_buffer_desc *desc, + void *(*load_page)(unsigned long va), + void (*unload_page)(void *va)); + +void simple_ring_buffer_unload_mm(struct simple_rb_per_cpu *cpu_buffer, + void (*unload_page)(void *)); +#endif diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index daa4e4944ce3..2bcf78a4de7b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2605,8 +2605,9 @@ static inline void skb_len_add(struct sk_buff *skb, int delta) * * Does not take any additional reference on the fragment. */ -static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i, - netmem_ref netmem, int off, int size) +static __always_inline void +__skb_fill_netmem_desc(struct sk_buff *skb, int i, netmem_ref netmem, + int off, int size) { struct page *page; @@ -2628,14 +2629,16 @@ static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i, skb->pfmemalloc = true; } -static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, - struct page *page, int off, int size) +static __always_inline void +__skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, + int off, int size) { __skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size); } -static inline void skb_fill_netmem_desc(struct sk_buff *skb, int i, - netmem_ref netmem, int off, int size) +static __always_inline void +skb_fill_netmem_desc(struct sk_buff *skb, int i, netmem_ref netmem, + int off, int size) { __skb_fill_netmem_desc(skb, i, netmem, off, size); skb_shinfo(skb)->nr_frags = i + 1; @@ -2655,8 +2658,9 @@ static inline void skb_fill_netmem_desc(struct sk_buff *skb, int i, * * Does not take any additional reference on the fragment. */ -static inline void skb_fill_page_desc(struct sk_buff *skb, int i, - struct page *page, int off, int size) +static __always_inline void +skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, + int off, int size) { skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size); } @@ -2682,8 +2686,17 @@ static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i, shinfo->nr_frags = i + 1; } -void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem, - int off, int size, unsigned int truesize); +static inline void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, + netmem_ref netmem, int off, + int size, unsigned int truesize) +{ + DEBUG_NET_WARN_ON_ONCE(size > truesize); + + skb_fill_netmem_desc(skb, i, netmem, off, size); + skb->len += size; + skb->data_len += size; + skb->truesize += truesize; +} static inline void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, int size, @@ -2819,7 +2832,7 @@ static inline void *__skb_push(struct sk_buff *skb, unsigned int len) } void *skb_pull(struct sk_buff *skb, unsigned int len); -static inline void *__skb_pull(struct sk_buff *skb, unsigned int len) +static __always_inline void *__skb_pull(struct sk_buff *skb, unsigned int len) { DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); @@ -2844,7 +2857,7 @@ void *skb_pull_data(struct sk_buff *skb, size_t len); void *__pskb_pull_tail(struct sk_buff *skb, int delta); -static inline enum skb_drop_reason +static __always_inline enum skb_drop_reason pskb_may_pull_reason(struct sk_buff *skb, unsigned int len) { DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); @@ -2862,12 +2875,13 @@ pskb_may_pull_reason(struct sk_buff *skb, unsigned int len) return SKB_NOT_DROPPED_YET; } -static inline bool pskb_may_pull(struct sk_buff *skb, unsigned int len) +static __always_inline bool +pskb_may_pull(struct sk_buff *skb, unsigned int len) { return pskb_may_pull_reason(skb, len) == SKB_NOT_DROPPED_YET; } -static inline void *pskb_pull(struct sk_buff *skb, unsigned int len) +static __always_inline void *pskb_pull(struct sk_buff *skb, unsigned int len) { if (!pskb_may_pull(skb, len)) return NULL; @@ -3328,7 +3342,7 @@ static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) return 0; } -static inline int pskb_trim(struct sk_buff *skb, unsigned int len) +static __always_inline int pskb_trim(struct sk_buff *skb, unsigned int len) { skb_might_realloc(skb); return (len < skb->len) ? __pskb_trim(skb, len) : 0; @@ -3371,7 +3385,7 @@ static inline int __skb_grow(struct sk_buff *skb, unsigned int len) * destructor function and make the @skb unowned. The buffer continues * to exist but is no longer charged to its former owner. */ -static inline void skb_orphan(struct sk_buff *skb) +static __always_inline void skb_orphan(struct sk_buff *skb) { if (skb->destructor) { skb->destructor(skb); @@ -3750,6 +3764,17 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag) } /** + * skb_frag_phys - gets the physical address of the data in a paged fragment + * @frag: the paged fragment buffer + * + * Returns: the physical address of the data within @frag. + */ +static inline phys_addr_t skb_frag_phys(const skb_frag_t *frag) +{ + return page_to_phys(skb_frag_page(frag)) + skb_frag_off(frag); +} + +/** * skb_frag_page_copy() - sets the page in a fragment from another fragment * @fragto: skb fragment where page is set * @fragfrom: skb fragment page is copied from @@ -4035,8 +4060,8 @@ __skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len, * update the CHECKSUM_COMPLETE checksum, or set ip_summed to * CHECKSUM_NONE so that it can be recomputed from scratch. */ -static inline void skb_postpull_rcsum(struct sk_buff *skb, - const void *start, unsigned int len) +static __always_inline void +skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = wsum_negate(csum_partial(start, len, @@ -4295,7 +4320,7 @@ __skb_header_pointer(const struct sk_buff *skb, int offset, int len, return buffer; } -static inline void * __must_check +static __always_inline void * __must_check skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) { return __skb_header_pointer(skb, offset, len, skb->data, @@ -4467,7 +4492,7 @@ DECLARE_STATIC_KEY_FALSE(netstamp_needed_key); /* It is used in the ingress path to clear the delivery_time. * If needed, set the skb->tstamp to the (rcv) timestamp. */ -static inline void skb_clear_delivery_time(struct sk_buff *skb) +static __always_inline void skb_clear_delivery_time(struct sk_buff *skb) { if (skb->tstamp_type) { skb->tstamp_type = SKB_CLOCK_REALTIME; @@ -4494,7 +4519,8 @@ static inline ktime_t skb_tstamp(const struct sk_buff *skb) return skb->tstamp; } -static inline ktime_t skb_tstamp_cond(const struct sk_buff *skb, bool cond) +static __always_inline ktime_t +skb_tstamp_cond(const struct sk_buff *skb, bool cond) { if (skb->tstamp_type != SKB_CLOCK_MONOTONIC && skb->tstamp) return skb->tstamp; @@ -5097,6 +5123,7 @@ static inline bool skb_has_extensions(struct sk_buff *skb) return unlikely(skb->active_extensions); } #else +static inline void __skb_ext_put(struct skb_ext *ext) {} static inline void skb_ext_put(struct sk_buff *skb) {} static inline void skb_ext_reset(struct sk_buff *skb) {} static inline void skb_ext_del(struct sk_buff *skb, int unused) {} @@ -5283,7 +5310,7 @@ static inline void skb_decrease_gso_size(struct skb_shared_info *shinfo, void __skb_warn_lro_forwarding(const struct sk_buff *skb); -static inline bool skb_warn_if_lro(const struct sk_buff *skb) +static __always_inline bool skb_warn_if_lro(const struct sk_buff *skb) { /* LRO sets gso_size but not gso_type, whereas if GSO is really * wanted then gso_type will be set. */ diff --git a/include/linux/slab.h b/include/linux/slab.h index a5a5e4108ae5..15a60b501b95 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -517,18 +517,6 @@ void kfree_sensitive(const void *objp); DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T)) DEFINE_FREE(kfree_sensitive, void *, if (_T) kfree_sensitive(_T)) -/** - * ksize - Report actual allocation size of associated object - * - * @objp: Pointer returned from a prior kmalloc()-family allocation. - * - * This should not be used for writing beyond the originally requested - * allocation size. Either use krealloc() or round up the allocation size - * with kmalloc_size_roundup() prior to allocation. If this is used to - * access beyond the originally requested allocation size, UBSAN_BOUNDS - * and/or FORTIFY_SOURCE may trip, since they only know about the - * originally allocated size via the __alloc_size attribute. - */ size_t ksize(const void *objp); #ifdef CONFIG_PRINTK diff --git a/include/linux/smp.h b/include/linux/smp.h index 1ebd88026119..6925d15ccaa7 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -73,7 +73,7 @@ static inline void on_each_cpu(smp_call_func_t func, void *info, int wait) } /** - * on_each_cpu_mask(): Run a function on processors specified by + * on_each_cpu_mask() - Run a function on processors specified by * cpumask, which may include the local processor. * @mask: The set of cpus to run on (only runs on online subset). * @func: The function to run. This must be fast and non-blocking. @@ -239,13 +239,30 @@ static inline int get_boot_cpu_id(void) #endif /* !SMP */ -/** +/* * raw_smp_processor_id() - get the current (unstable) CPU id * - * For then you know what you are doing and need an unstable + * raw_smp_processor_id() is arch-specific/arch-defined and + * may be a macro or a static inline function. + * + * For when you know what you are doing and need an unstable * CPU id. */ +/* + * Allow the architecture to differentiate between a stable and unstable read. + * For example, x86 uses an IRQ-safe asm-volatile read for the unstable but a + * regular asm read for the stable. + */ +#ifndef __smp_processor_id +#define __smp_processor_id() raw_smp_processor_id() +#endif + +#ifdef CONFIG_DEBUG_PREEMPT + extern unsigned int debug_smp_processor_id(void); +# define smp_processor_id() debug_smp_processor_id() + +#else /** * smp_processor_id() - get the current (stable) CPU id * @@ -258,23 +275,10 @@ static inline int get_boot_cpu_id(void) * - preemption is disabled; * - the task is CPU affine. * - * When CONFIG_DEBUG_PREEMPT; we verify these assumption and WARN + * When CONFIG_DEBUG_PREEMPT=y, we verify these assumptions and WARN * when smp_processor_id() is used when the CPU id is not stable. */ -/* - * Allow the architecture to differentiate between a stable and unstable read. - * For example, x86 uses an IRQ-safe asm-volatile read for the unstable but a - * regular asm read for the stable. - */ -#ifndef __smp_processor_id -#define __smp_processor_id() raw_smp_processor_id() -#endif - -#ifdef CONFIG_DEBUG_PREEMPT - extern unsigned int debug_smp_processor_id(void); -# define smp_processor_id() debug_smp_processor_id() -#else # define smp_processor_id() __smp_processor_id() #endif diff --git a/include/linux/soc/qcom/apr.h b/include/linux/soc/qcom/apr.h index 6e1b1202e818..58fa1df96347 100644 --- a/include/linux/soc/qcom/apr.h +++ b/include/linux/soc/qcom/apr.h @@ -191,7 +191,7 @@ int apr_send_pkt(struct apr_device *adev, struct apr_pkt *pkt); gpr_port_t *gpr_alloc_port(gpr_device_t *gdev, struct device *dev, gpr_port_cb cb, void *priv); void gpr_free_port(gpr_port_t *port); -int gpr_send_port_pkt(gpr_port_t *port, struct gpr_pkt *pkt); -int gpr_send_pkt(gpr_device_t *gdev, struct gpr_pkt *pkt); +int gpr_send_port_pkt(gpr_port_t *port, const struct gpr_pkt *pkt); +int gpr_send_pkt(gpr_device_t *gdev, const struct gpr_pkt *pkt); #endif /* __QCOM_APR_H_ */ diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 8243ab3a12a8..227125d84318 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -91,10 +91,12 @@ * struct llcc_slice_desc - Cache slice descriptor * @slice_id: llcc slice id * @slice_size: Size allocated for the llcc slice + * @refcount: Atomic counter to track activate/deactivate calls */ struct llcc_slice_desc { u32 slice_id; size_t slice_size; + refcount_t refcount; }; /** @@ -152,11 +154,10 @@ struct llcc_edac_reg_offset { * @edac_reg_offset: Offset of the LLCC EDAC registers * @lock: mutex associated with each slice * @cfg_size: size of the config data table - * @max_slices: max slices as read from device tree * @num_banks: Number of llcc banks - * @bitmap: Bit map to track the active slice ids * @ecc_irq: interrupt for llcc cache error detection and reporting * @ecc_irq_configured: 'True' if firmware has already configured the irq propagation + * @desc: Array pointer of pre-allocated LLCC slice descriptors * @version: Indicates the LLCC version */ struct llcc_drv_data { @@ -167,12 +168,11 @@ struct llcc_drv_data { const struct llcc_edac_reg_offset *edac_reg_offset; struct mutex lock; u32 cfg_size; - u32 max_slices; u32 num_banks; - unsigned long *bitmap; int ecc_irq; bool ecc_irq_configured; u32 version; + struct llcc_slice_desc *desc; }; #if IS_ENABLED(CONFIG_QCOM_LLCC) diff --git a/include/linux/soc/qcom/pdr.h b/include/linux/soc/qcom/pdr.h index 83a8ea612e69..2b7691e47c2a 100644 --- a/include/linux/soc/qcom/pdr.h +++ b/include/linux/soc/qcom/pdr.h @@ -5,6 +5,7 @@ #include <linux/soc/qcom/qmi.h> #define SERVREG_NAME_LENGTH 64 +#define SERVREG_PFR_LENGTH 256 struct pdr_service; struct pdr_handle; diff --git a/include/linux/soc/qcom/qmi.h b/include/linux/soc/qcom/qmi.h index 291cdc7ef49c..b9dcb437a0be 100644 --- a/include/linux/soc/qcom/qmi.h +++ b/include/linux/soc/qcom/qmi.h @@ -92,6 +92,18 @@ struct qmi_elem_info { #define QMI_ERR_INCOMPATIBLE_STATE_V01 90 #define QMI_ERR_NOT_SUPPORTED_V01 94 +/* + * Enumerate the IDs of the QMI services + */ +#define QMI_SERVICE_ID_TEST 0x0f /* 15 */ +#define QMI_SERVICE_ID_SSCTL 0x2b /* 43 */ +#define QMI_SERVICE_ID_IPA 0x31 /* 49 */ +#define QMI_SERVICE_ID_SERVREG_LOC 0x40 /* 64 */ +#define QMI_SERVICE_ID_SERVREG_NOTIF 0x42 /* 66 */ +#define QMI_SERVICE_ID_WLFW 0x45 /* 69 */ +#define QMI_SERVICE_ID_SLIMBUS 0x301 /* 769 */ +#define QMI_SERVICE_ID_USB_AUDIO_STREAM 0x41d /* 1053 */ + /** * struct qmi_response_type_v01 - common response header (decoded) * @result: result of the transaction diff --git a/include/linux/soc/qcom/ubwc.h b/include/linux/soc/qcom/ubwc.h index f052e241736c..f5d0e2341261 100644 --- a/include/linux/soc/qcom/ubwc.h +++ b/include/linux/soc/qcom/ubwc.h @@ -74,4 +74,29 @@ static inline bool qcom_ubwc_get_ubwc_mode(const struct qcom_ubwc_cfg_data *cfg) return ret; } +/* + * This is the best guess, based on the MDSS driver, which worked so far. + */ +static inline bool qcom_ubwc_min_acc_length_64b(const struct qcom_ubwc_cfg_data *cfg) +{ + return cfg->ubwc_enc_version == UBWC_1_0 && + (cfg->ubwc_dec_version == UBWC_2_0 || + cfg->ubwc_dec_version == UBWC_3_0); +} + +static inline bool qcom_ubwc_macrotile_mode(const struct qcom_ubwc_cfg_data *cfg) +{ + return cfg->macrotile_mode; +} + +static inline bool qcom_ubwc_bank_spread(const struct qcom_ubwc_cfg_data *cfg) +{ + return cfg->ubwc_bank_spread; +} + +static inline u32 qcom_ubwc_swizzle(const struct qcom_ubwc_cfg_data *cfg) +{ + return cfg->ubwc_swizzle; +} + #endif /* __QCOM_UBWC_H__ */ diff --git a/include/linux/socket.h b/include/linux/socket.h index ec715ad4bf25..ec4a0a025793 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -415,7 +415,7 @@ struct __kernel_timespec; struct old_timespec32; struct scm_timestamping_internal { - struct timespec64 ts[3]; + ktime_t ts[3]; }; extern void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss); diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index f462717acf20..6147eb1fb210 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -532,7 +532,7 @@ struct sdw_slave_intr_status { }; /** - * sdw_reg_bank - SoundWire register banks + * enum sdw_reg_bank - SoundWire register banks * @SDW_BANK0: Soundwire register bank 0 * @SDW_BANK1: Soundwire register bank 1 */ @@ -751,7 +751,7 @@ struct sdw_port_params { * struct sdw_transport_params: Data Port Transport Parameters * * @blk_grp_ctrl_valid: Port implements block group control - * @num: Port number + * @port_num: Port number * @blk_grp_ctrl: Block group control value * @sample_interval: Sample interval * @offset1: Blockoffset of the payload data @@ -782,7 +782,7 @@ struct sdw_transport_params { /** * struct sdw_enable_ch: Enable/disable Data Port channel * - * @num: Port number + * @port_num: Port number * @ch_mask: Active channel mask * @enable: Enable (true) /disable (false) channel */ @@ -885,7 +885,7 @@ void sdw_bus_master_delete(struct sdw_bus *bus); void sdw_show_ping_status(struct sdw_bus *bus, bool sync_delay); /** - * sdw_port_config: Master or Slave Port configuration + * struct sdw_port_config: Master or Slave Port configuration * * @num: Port number * @ch_mask: channels mask for port @@ -896,7 +896,7 @@ struct sdw_port_config { }; /** - * sdw_stream_config: Master or Slave stream configuration + * struct sdw_stream_config: Master or Slave stream configuration * * @frame_rate: Audio frame rate of the stream, in Hz * @ch_count: Channel count of the stream @@ -913,7 +913,7 @@ struct sdw_stream_config { }; /** - * sdw_stream_state: Stream states + * enum sdw_stream_state: Stream states * * @SDW_STREAM_ALLOCATED: New stream allocated. * @SDW_STREAM_CONFIGURED: Stream configured @@ -934,7 +934,7 @@ enum sdw_stream_state { }; /** - * sdw_stream_params: Stream parameters + * struct sdw_stream_params: Stream parameters * * @rate: Sampling frequency, in Hz * @ch_count: Number of channels @@ -947,7 +947,7 @@ struct sdw_stream_params { }; /** - * sdw_stream_runtime: Runtime stream parameters + * struct sdw_stream_runtime: Runtime stream parameters * * @name: SoundWire stream name * @params: Stream parameters @@ -983,7 +983,7 @@ struct sdw_stream_runtime { * @defer_msg: Defer message * @params: Current bus parameters * @stream_refcount: number of streams currently using this bus - * @btp_stream_refcount: number of BTP streams currently using this bus (should + * @bpt_stream_refcount: number of BTP streams currently using this bus (should * be zero or one, multiple streams per link is not supported). * @bpt_stream: pointer stored to handle BTP streams. * @ops: Master callback ops diff --git a/include/linux/soundwire/sdw_amd.h b/include/linux/soundwire/sdw_amd.h index fe31773d5210..470360a2723c 100644 --- a/include/linux/soundwire/sdw_amd.h +++ b/include/linux/soundwire/sdw_amd.h @@ -66,8 +66,10 @@ struct sdw_amd_dai_runtime { * @status: peripheral devices status array * @num_din_ports: number of input ports * @num_dout_ports: number of output ports + * @max_ports: total number of input ports and output ports * @cols_index: Column index in frame shape * @rows_index: Rows index in frame shape + * @port_offset_map: dynamic array to map port block offset * @instance: SoundWire manager instance * @quirks: SoundWire manager quirks * @wake_en_mask: wake enable mask per SoundWire manager @@ -92,10 +94,12 @@ struct amd_sdw_manager { int num_din_ports; int num_dout_ports; + int max_ports; int cols_index; int rows_index; + int *port_offset_map; u32 instance; u32 quirks; u32 wake_en_mask; diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index 5774e554c0f0..c8e207522223 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -28,6 +28,14 @@ .dtr = true, \ } +#define SPI_MEM_DTR_OP_PACKED_CMD(__opcode, __addr, __buswidth) \ + { \ + .nbytes = 2, \ + .opcode = __opcode << 8 | __addr, \ + .buswidth = __buswidth, \ + .dtr = true, \ + } + #define SPI_MEM_OP_ADDR(__nbytes, __val, __buswidth) \ { \ .nbytes = __nbytes, \ @@ -130,11 +138,13 @@ enum spi_mem_data_dir { /** * struct spi_mem_op - describes a SPI memory operation + * @cmd: the complete command * @cmd.nbytes: number of opcode bytes (only 1 or 2 are valid). The opcode is * sent MSB-first. * @cmd.buswidth: number of IO lines used to transmit the command * @cmd.opcode: operation opcode * @cmd.dtr: whether the command opcode should be sent in DTR mode or not + * @addr: the address attributes * @addr.nbytes: number of address bytes to send. Can be zero if the operation * does not need to send an address * @addr.buswidth: number of IO lines used to transmit the address cycles @@ -143,10 +153,12 @@ enum spi_mem_data_dir { * Note that only @addr.nbytes are taken into account in this * address value, so users should make sure the value fits in the * assigned number of bytes. + * @dummy: data for dummy operation * @dummy.nbytes: number of dummy bytes to send after an opcode or address. Can * be zero if the operation does not require dummy bytes * @dummy.buswidth: number of IO lanes used to transmit the dummy bytes * @dummy.dtr: whether the dummy bytes should be sent in DTR mode or not + * @data: the data attributes * @data.buswidth: number of IO lanes used to send/receive the data * @data.dtr: whether the data should be sent in DTR mode or not * @data.ecc: whether error correction is required or not @@ -273,7 +285,7 @@ struct spi_mem { }; /** - * struct spi_mem_set_drvdata() - attach driver private data to a SPI mem + * spi_mem_set_drvdata() - attach driver private data to a SPI mem * device * @mem: memory device * @data: data to attach to the memory device @@ -284,7 +296,7 @@ static inline void spi_mem_set_drvdata(struct spi_mem *mem, void *data) } /** - * struct spi_mem_get_drvdata() - get driver private data attached to a SPI mem + * spi_mem_get_drvdata() - get driver private data attached to a SPI mem * device * @mem: memory device * diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index af7cfee7b8f6..7587b1c5d7ec 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -159,10 +159,6 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * @modalias: Name of the driver to use with this device, or an alias * for that name. This appears in the sysfs "modalias" attribute * for driver coldplugging, and in uevents used for hotplugging - * @driver_override: If the name of a driver is written to this attribute, then - * the device will bind to the named driver and only the named driver. - * Do not set directly, because core frees it; use driver_set_override() to - * set or clear it. * @pcpu_statistics: statistics for the spi_device * @word_delay: delay to be inserted between consecutive * words of a transfer @@ -224,7 +220,6 @@ struct spi_device { void *controller_state; void *controller_data; char modalias[SPI_NAME_SIZE]; - const char *driver_override; /* The statistics */ struct spi_statistics __percpu *pcpu_statistics; @@ -387,6 +382,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) } extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 chip_select); +extern struct spi_device *devm_spi_new_ancillary_device(struct spi_device *spi, u8 chip_select); /* Use a define to avoid include chaining to get THIS_MODULE */ #define spi_register_driver(driver) \ diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index e1e2f144af9b..241277cd34cf 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -178,7 +178,7 @@ do { \ #ifdef CONFIG_DEBUG_SPINLOCK extern void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock); - extern int do_raw_spin_trylock(raw_spinlock_t *lock); + extern int do_raw_spin_trylock(raw_spinlock_t *lock) __cond_acquires(true, lock); extern void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock); #else static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock) @@ -189,6 +189,7 @@ static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock) } static inline int do_raw_spin_trylock(raw_spinlock_t *lock) + __cond_acquires(true, lock) { int ret = arch_spin_trylock(&(lock)->raw_lock); diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index 1e84e71ca495..3a50976471d7 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -48,16 +48,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) lock->slock = 1; } -/* - * Read-write spinlocks. No debug version. - */ -#define arch_read_lock(lock) do { barrier(); (void)(lock); } while (0) -#define arch_write_lock(lock) do { barrier(); (void)(lock); } while (0) -#define arch_read_trylock(lock) ({ barrier(); (void)(lock); 1; }) -#define arch_write_trylock(lock) ({ barrier(); (void)(lock); 1; }) -#define arch_read_unlock(lock) do { barrier(); (void)(lock); } while (0) -#define arch_write_unlock(lock) do { barrier(); (void)(lock); } while (0) - #else /* DEBUG_SPINLOCK */ #define arch_spin_is_locked(lock) ((void)(lock), 0) /* for sched/core.c and kernel_lock.c: */ @@ -68,4 +58,14 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) #define arch_spin_is_contended(lock) (((void)(lock), 0)) +/* + * Read-write spinlocks. No debug version. + */ +#define arch_read_lock(lock) do { barrier(); (void)(lock); } while (0) +#define arch_write_lock(lock) do { barrier(); (void)(lock); } while (0) +#define arch_read_trylock(lock) ({ barrier(); (void)(lock); 1; }) +#define arch_write_trylock(lock) ({ barrier(); (void)(lock); 1; }) +#define arch_read_unlock(lock) do { barrier(); (void)(lock); } while (0) +#define arch_write_unlock(lock) do { barrier(); (void)(lock); } while (0) + #endif /* __LINUX_SPINLOCK_UP_H */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index bb44a0bd7696..81b1938512d5 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -69,8 +69,8 @@ int init_srcu_struct_fast_updown(struct srcu_struct *ssp); #define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock(). #define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe(). // 0x4 // SRCU-lite is no longer with us. -#define SRCU_READ_FLAVOR_FAST 0x4 // srcu_read_lock_fast(). -#define SRCU_READ_FLAVOR_FAST_UPDOWN 0x8 // srcu_read_lock_fast(). +#define SRCU_READ_FLAVOR_FAST 0x4 // srcu_read_lock_fast(), also NMI-safe. +#define SRCU_READ_FLAVOR_FAST_UPDOWN 0x8 // srcu_read_lock_fast_updown(). #define SRCU_READ_FLAVOR_ALL (SRCU_READ_FLAVOR_NORMAL | SRCU_READ_FLAVOR_NMI | \ SRCU_READ_FLAVOR_FAST | SRCU_READ_FLAVOR_FAST_UPDOWN) // All of the above. diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index dec7cbe015aa..905b629e8fa3 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -11,6 +11,7 @@ #ifndef _LINUX_SRCU_TINY_H #define _LINUX_SRCU_TINY_H +#include <linux/irq_work_types.h> #include <linux/swait.h> struct srcu_struct { @@ -24,18 +25,21 @@ struct srcu_struct { struct rcu_head *srcu_cb_head; /* Pending callbacks: Head. */ struct rcu_head **srcu_cb_tail; /* Pending callbacks: Tail. */ struct work_struct srcu_work; /* For driving grace periods. */ + struct irq_work srcu_irq_work; /* Defer schedule_work() to irq work. */ #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ }; void srcu_drive_gp(struct work_struct *wp); +void srcu_tiny_irq_work(struct irq_work *irq_work); #define __SRCU_STRUCT_INIT(name, __ignored, ___ignored, ____ignored) \ { \ .srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq), \ .srcu_cb_tail = &name.srcu_cb_head, \ .srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp), \ + .srcu_irq_work = { .func = srcu_tiny_irq_work }, \ __SRCU_DEP_MAP_INIT(name) \ } diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 958cb7ef41cb..fd1a9270cb9a 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -34,7 +34,7 @@ struct srcu_data { /* Values: SRCU_READ_FLAVOR_.* */ /* Update-side state. */ - spinlock_t __private lock ____cacheline_internodealigned_in_smp; + raw_spinlock_t __private lock ____cacheline_internodealigned_in_smp; struct rcu_segcblist srcu_cblist; /* List of callbacks.*/ unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */ unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ @@ -55,7 +55,7 @@ struct srcu_data { * Node in SRCU combining tree, similar in function to rcu_data. */ struct srcu_node { - spinlock_t __private lock; + raw_spinlock_t __private lock; unsigned long srcu_have_cbs[4]; /* GP seq for children having CBs, but only */ /* if greater than ->srcu_gp_seq. */ unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs have CBs for given GP? */ @@ -74,7 +74,7 @@ struct srcu_usage { /* First node at each level. */ int srcu_size_state; /* Small-to-big transition state. */ struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ - spinlock_t __private lock; /* Protect counters and size state. */ + raw_spinlock_t __private lock; /* Protect counters and size state. */ struct mutex srcu_gp_mutex; /* Serialize GP work. */ unsigned long srcu_gp_seq; /* Grace-period seq #. */ unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */ @@ -95,6 +95,7 @@ struct srcu_usage { unsigned long reschedule_jiffies; unsigned long reschedule_count; struct delayed_work work; + struct irq_work irq_work; struct srcu_struct *srcu_ssp; }; @@ -156,7 +157,7 @@ struct srcu_struct { #define __SRCU_USAGE_INIT(name) \ { \ - .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ .srcu_gp_seq = SRCU_GP_SEQ_INITIAL_VAL, \ .srcu_gp_seq_needed = SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE, \ .srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL, \ @@ -259,7 +260,7 @@ static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ss * srcu_read_unlock_fast(). * * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side - * critical sections either because they disables interrupts, because + * critical sections either because they disable interrupts, because * they are a single instruction, or because they are read-modify-write * atomic operations, depending on the whims of the architecture. * This matters because the SRCU-fast grace-period mechanism uses either diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 32352a216567..4430b967abde 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -84,61 +84,78 @@ struct stmmac_priv; /* Platfrom data for platform device structure's platform_data field */ struct stmmac_mdio_bus_data { - unsigned int phy_mask; - unsigned int pcs_mask; - unsigned int default_an_inband; + u32 phy_mask; + u32 pcs_mask; int *irqs; int probed_phy_irq; bool needs_reset; }; struct stmmac_dma_cfg { + /* pbl: programmable burst limit + * txpbl: transmit programmable burst limit + * rxpbl: receive programmable burst limit + * If txpbl or rxpbl are zero, the value of pbl will be substituted. + * Range 0 - 63. + */ int pbl; int txpbl; int rxpbl; + /* pblx8: multiplies pbl, txpbl, rxpbl by a factor of 8 for dwmac >= + * 3.50a, or a factor of 4 for previous versions. + */ bool pblx8; - int fixed_burst; - int mixed_burst; + /* fixed_burst: + * when set, AXI bursts defined by axi_blen_regval are permitted. + * AHB uses SINGLE, INCR4, INCR8 or INCR16 during burst transfers. + * when clear, AXI and AHB use SINGLE or INCR bursts. + */ + bool fixed_burst; + /* mixed_burst: + * when set and fixed_burst is clear, AHB uses INCR for bursts > 16 + * and SINGLE or INCRx for bursts <= 16. + */ + bool mixed_burst; + /* aal: address aligned bursts for AHB and AXI master interface */ bool aal; + bool dche; bool eame; + /* multi_msi_en: stmmac core internal */ bool multi_msi_en; - bool dche; + /* atds: stmmac core internal */ bool atds; }; #define AXI_BLEN 7 struct stmmac_axi { - bool axi_lpi_en; - bool axi_xit_frm; u32 axi_wr_osr_lmt; u32 axi_rd_osr_lmt; - bool axi_kbbe; u32 axi_blen_regval; + bool axi_lpi_en; + bool axi_xit_frm; bool axi_fb; - bool axi_mb; - bool axi_rb; }; struct stmmac_rxq_cfg { - u8 mode_to_use; u32 chan; + u32 prio; + u8 mode_to_use; u8 pkt_route; bool use_prio; - u32 prio; }; struct stmmac_txq_cfg { u32 weight; - bool coe_unsupported; - u8 mode_to_use; /* Credit Base Shaper parameters */ u32 send_slope; u32 idle_slope; u32 high_credit; u32 low_credit; - bool use_prio; u32 prio; int tbs_en; + bool use_prio; + bool coe_unsupported; + u8 mode_to_use; }; struct stmmac_safety_feature_cfg { @@ -187,11 +204,13 @@ enum dwmac_core_type { #define STMMAC_FLAG_MULTI_MSI_EN BIT(7) #define STMMAC_FLAG_EXT_SNAPSHOT_EN BIT(8) #define STMMAC_FLAG_INT_SNAPSHOT_EN BIT(9) -#define STMMAC_FLAG_RX_CLK_RUNS_IN_LPI BIT(10) -#define STMMAC_FLAG_EN_TX_LPI_CLOCKGATING BIT(11) -#define STMMAC_FLAG_EN_TX_LPI_CLK_PHY_CAP BIT(12) -#define STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY BIT(13) -#define STMMAC_FLAG_KEEP_PREAMBLE_BEFORE_SFD BIT(14) +#define STMMAC_FLAG_EEE_DISABLE BIT(10) +#define STMMAC_FLAG_RX_CLK_RUNS_IN_LPI BIT(11) +#define STMMAC_FLAG_EN_TX_LPI_CLOCKGATING BIT(12) +#define STMMAC_FLAG_EN_TX_LPI_CLK_PHY_CAP BIT(13) +#define STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY BIT(14) +#define STMMAC_FLAG_KEEP_PREAMBLE_BEFORE_SFD BIT(15) +#define STMMAC_FLAG_SERDES_SUPPORTS_2500M BIT(16) struct mac_device_info; @@ -225,28 +244,28 @@ struct plat_stmmacenet_data { phy_interface_t phy_interface; struct stmmac_mdio_bus_data *mdio_bus_data; struct device_node *phy_node; - struct fwnode_handle *port_node; struct device_node *mdio_node; struct stmmac_dma_cfg *dma_cfg; struct stmmac_safety_feature_cfg *safety_feat_cfg; int clk_csr; - int enh_desc; - int tx_coe; + bool default_an_inband; + bool enh_desc; + bool tx_coe; + bool bugged_jumbo; + bool pmt; + bool force_sf_dma_mode; + bool force_thresh_dma_mode; + bool riwt_off; int rx_coe; - int bugged_jumbo; - int pmt; - int force_sf_dma_mode; - int force_thresh_dma_mode; - int riwt_off; int max_speed; int maxmtu; int multicast_filter_bins; int unicast_filter_entries; int tx_fifo_size; int rx_fifo_size; - u32 host_dma_width; - u32 rx_queues_to_use; - u32 tx_queues_to_use; + u8 host_dma_width; + u8 rx_queues_to_use; + u8 tx_queues_to_use; u8 rx_sched_algorithm; u8 tx_sched_algorithm; struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES]; @@ -256,7 +275,8 @@ struct plat_stmmacenet_data { int (*set_phy_intf_sel)(void *priv, u8 phy_intf_sel); int (*set_clk_tx_rate)(void *priv, struct clk *clk_tx_i, phy_interface_t interface, int speed); - void (*fix_mac_speed)(void *priv, int speed, unsigned int mode); + void (*fix_mac_speed)(void *priv, phy_interface_t interface, + int speed, unsigned int mode); int (*fix_soc_reset)(struct stmmac_priv *priv); int (*serdes_powerup)(struct net_device *ndev, void *priv); void (*serdes_powerdown)(struct net_device *ndev, void *priv); @@ -279,10 +299,41 @@ struct plat_stmmacenet_data { struct phylink_pcs *(*select_pcs)(struct stmmac_priv *priv, phy_interface_t interface); void *bsp_priv; + + /* stmmac clocks: + * stmmac_clk: CSR clock (which can be hclk_i, clk_csr_i, aclk_i, + * or clk_app_i depending on GMAC configuration). This clock + * generates the MDC clock. + * + * pclk: introduced for Imagination Technologies Pistachio board - + * see 5f9755d26fbf ("stmmac: Add an optional register interface + * clock"). This is probably used for cases where separate clocks + * are provided for the host interface and register interface. In + * this case, as the MDC clock is derived from stmmac_clk, pclk + * can only really be the "application clock" for the "host + * interface" and not the "register interface" aka CSR clock as + * it is never used when determining the divider for the MDC + * clock. + * + * clk_ptp_ref: optional PTP reference clock (clk_ptp_ref_i). When + * present, this clock increments the timestamp value. Otherwise, + * the rate of stmmac_clk will be used. + * + * clk_tx_i: MAC transmit clock, which will be 2.5MHz for 10M, + * 25MHz for 100M, or 125MHz for 1G irrespective of the interface + * mode. For the DWMAC PHY interface modes: + * + * GMII/MII PHY's transmit clock for 10M (2.5MHz) or 100M (25MHz), + * or 125MHz local clock for 1G mode + * RMII 50MHz RMII clock divided by 2 or 20. + * RGMII 125MHz local clock divided by 1, 5, or 50. + * SGMII 125MHz SerDes clock divided by 1, 5, or 50. + * TBI/RTBI 125MHz SerDes clock + */ struct clk *stmmac_clk; struct clk *pclk; struct clk *clk_ptp_ref; - struct clk *clk_tx_i; /* clk_tx_i to MAC core */ + struct clk *clk_tx_i; unsigned long clk_ptp_rate; unsigned long clk_ref_rate; struct clk_bulk_data *clks; @@ -296,7 +347,7 @@ struct plat_stmmacenet_data { int rss_en; int mac_port_sel_speed; u8 vlan_fail_q; - struct pci_dev *pdev; + bool provide_bus_info; int int_snapshot_num; int msi_mac_vec; int msi_wol_vec; @@ -306,5 +357,6 @@ struct plat_stmmacenet_data { int msi_tx_base_vec; const struct dwmac4_addrs *dwmac4_addrs; unsigned int flags; + struct stmmac_dma_cfg __dma_cfg; }; #endif diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 72820503514c..01011113d226 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -99,7 +99,7 @@ static inline void print_stop_info(const char *log_lvl, struct task_struct *task * stop_machine: freeze the machine on all CPUs and run this function * @fn: the function to run * @data: the data ptr to pass to @fn() - * @cpus: the cpus to run @fn() on (NULL = run on each online CPU) + * @cpus: the cpus to run @fn() on (NULL = one unspecified online CPU) * * Description: This causes a thread to be scheduled on every CPU, which * will run with interrupts disabled. Each CPU specified by @cpus will @@ -133,7 +133,7 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus); * stop_machine_cpuslocked: freeze the machine on all CPUs and run this function * @fn: the function to run * @data: the data ptr to pass to @fn() - * @cpus: the cpus to run @fn() on (NULL = run on each online CPU) + * @cpus: the cpus to run @fn() on (NULL = one unspecified online CPU) * * Same as above. Avoids nested calls to cpus_read_lock(). * diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 4dc14c7a711b..a11acf5cd63b 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -20,7 +20,7 @@ #include <linux/lwq.h> #include <linux/wait.h> #include <linux/mm.h> -#include <linux/pagevec.h> +#include <linux/folio_batch.h> #include <linux/kthread.h> /* diff --git a/include/linux/swap.h b/include/linux/swap.h index 62fc7499b408..4b1f13b5bbad 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -20,8 +20,6 @@ struct notifier_block; struct bio; -struct pagevec; - #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #define SWAP_FLAG_PRIO_MASK 0x7fff #define SWAP_FLAG_DISCARD 0x10000 /* enable discard for swap */ @@ -208,7 +206,6 @@ enum { SWP_DISCARDABLE = (1 << 2), /* blkdev support discard */ SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */ SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */ - SWP_CONTINUED = (1 << 5), /* swap_map has count continuation */ SWP_BLKDEV = (1 << 6), /* its a block device */ SWP_ACTIVATED = (1 << 7), /* set after swap_activate success */ SWP_FS_OPS = (1 << 8), /* swapfile operations go through fs */ @@ -223,16 +220,6 @@ enum { #define SWAP_CLUSTER_MAX_SKIPPED (SWAP_CLUSTER_MAX << 10) #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX -/* Bit flag in swap_map */ -#define COUNT_CONTINUED 0x80 /* Flag swap_map continuation for full count */ - -/* Special value in first swap_map */ -#define SWAP_MAP_MAX 0x3e /* Max count */ -#define SWAP_MAP_BAD 0x3f /* Note page is bad */ - -/* Special value in each swap_map continuation */ -#define SWAP_CONT_MAX 0x7f /* Max count */ - /* * The first page in the swap file is the swap header, which is always marked * bad to prevent it from being allocated as an entry. This also prevents the @@ -264,8 +251,7 @@ struct swap_info_struct { signed short prio; /* swap priority of this type */ struct plist_node list; /* entry in swap_active_head */ signed char type; /* strange name for an index */ - unsigned int max; /* extent of the swap_map */ - unsigned char *swap_map; /* vmalloc'ed array of usage counts */ + unsigned int max; /* size of this swap device */ unsigned long *zeromap; /* kvmalloc'ed bitmap to track zero pages */ struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */ struct list_head free_clusters; /* free clusters list */ @@ -284,18 +270,14 @@ struct swap_info_struct { struct completion comp; /* seldom referenced */ spinlock_t lock; /* * protect map scan related fields like - * swap_map, inuse_pages and all cluster - * lists. other fields are only changed + * inuse_pages and all cluster lists. + * Other fields are only changed * at swapon/swapoff, so are protected * by swap_lock. changing flags need * hold this lock and swap_lock. If * both locks need hold, hold swap_lock * first. */ - spinlock_t cont_lock; /* - * protect swap count continuation page - * list. - */ struct work_struct discard_work; /* discard worker */ struct work_struct reclaim_work; /* reclaim worker */ struct list_head discard_clusters; /* discard clusters list */ @@ -451,7 +433,6 @@ static inline long get_nr_swap_pages(void) } extern void si_swapinfo(struct sysinfo *); -extern int add_swap_count_continuation(swp_entry_t, gfp_t); int swap_type_of(dev_t device, sector_t offset); int find_first_swap(dev_t *device); extern unsigned int count_swap_pages(int, int); @@ -517,11 +498,6 @@ static inline void free_swap_cache(struct folio *folio) { } -static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask) -{ - return 0; -} - static inline int swap_dup_entry_direct(swp_entry_t ent) { return 0; diff --git a/include/linux/sys_soc.h b/include/linux/sys_soc.h index d9b3cf0f410c..f19f5cec18e2 100644 --- a/include/linux/sys_soc.h +++ b/include/linux/sys_soc.h @@ -37,6 +37,16 @@ void soc_device_unregister(struct soc_device *soc_dev); */ struct device *soc_device_to_device(struct soc_device *soc); +/** + * soc_attr_read_machine - retrieve the machine model and store it in + * the soc_device_attribute structure + * @soc_dev_attr: SoC attribute structure to store the model in + * + * Returns: + * 0 on success, negative error number on failure. + */ +int soc_attr_read_machine(struct soc_device_attribute *soc_dev_attr); + #ifdef CONFIG_SOC_BUS const struct soc_device_attribute *soc_device_match( const struct soc_device_attribute *matches); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 02bd6ddb6278..f5639d5ac331 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1283,19 +1283,9 @@ static inline long ksys_lchown(const char __user *filename, uid_t user, AT_SYMLINK_NOFOLLOW); } -int do_sys_ftruncate(unsigned int fd, loff_t length, int small); - -static inline long ksys_ftruncate(unsigned int fd, loff_t length) -{ - return do_sys_ftruncate(fd, length, 1); -} - -int do_sys_truncate(const char __user *pathname, loff_t length); - -static inline long ksys_truncate(const char __user *pathname, loff_t length) -{ - return do_sys_truncate(pathname, length); -} +#define FTRUNCATE_LFS (1u << 0) /* allow truncating > 32-bit */ +int ksys_ftruncate(unsigned int fd, loff_t length, unsigned int flags); +int ksys_truncate(const char __user *pathname, loff_t length); static inline unsigned int ksys_personality(unsigned int personality) { diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 99b775f3ff46..b1a3a1e6ad09 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -396,13 +396,13 @@ struct sysfs_ops { #ifdef CONFIG_SYSFS -int __must_check sysfs_create_dir_ns(struct kobject *kobj, const void *ns); +int __must_check sysfs_create_dir_ns(struct kobject *kobj, const struct ns_common *ns); void sysfs_remove_dir(struct kobject *kobj); int __must_check sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, - const void *new_ns); + const struct ns_common *new_ns); int __must_check sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, - const void *new_ns); + const struct ns_common *new_ns); int __must_check sysfs_create_mount_point(struct kobject *parent_kobj, const char *name); void sysfs_remove_mount_point(struct kobject *parent_kobj, @@ -410,7 +410,7 @@ void sysfs_remove_mount_point(struct kobject *parent_kobj, int __must_check sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, - const void *ns); + const struct ns_common *ns); int __must_check sysfs_create_files(struct kobject *kobj, const struct attribute * const *attr); int __must_check sysfs_chmod_file(struct kobject *kobj, @@ -419,7 +419,7 @@ struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj, const struct attribute *attr); void sysfs_unbreak_active_protection(struct kernfs_node *kn); void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, - const void *ns); + const struct ns_common *ns); bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr); void sysfs_remove_files(struct kobject *kobj, const struct attribute * const *attr); @@ -437,7 +437,7 @@ void sysfs_remove_link(struct kobject *kobj, const char *name); int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *target, const char *old_name, const char *new_name, - const void *new_ns); + const struct ns_common *new_ns); void sysfs_delete_link(struct kobject *dir, struct kobject *targ, const char *name); @@ -445,15 +445,15 @@ void sysfs_delete_link(struct kobject *dir, struct kobject *targ, int __must_check sysfs_create_group(struct kobject *kobj, const struct attribute_group *grp); int __must_check sysfs_create_groups(struct kobject *kobj, - const struct attribute_group **groups); + const struct attribute_group *const *groups); int __must_check sysfs_update_groups(struct kobject *kobj, - const struct attribute_group **groups); + const struct attribute_group *const *groups); int sysfs_update_group(struct kobject *kobj, const struct attribute_group *grp); void sysfs_remove_group(struct kobject *kobj, const struct attribute_group *grp); void sysfs_remove_groups(struct kobject *kobj, - const struct attribute_group **groups); + const struct attribute_group *const *groups); int sysfs_add_file_to_group(struct kobject *kobj, const struct attribute *attr, const char *group); void sysfs_remove_file_from_group(struct kobject *kobj, @@ -486,7 +486,7 @@ int sysfs_change_owner(struct kobject *kobj, kuid_t kuid, kgid_t kgid); int sysfs_link_change_owner(struct kobject *kobj, struct kobject *targ, const char *name, kuid_t kuid, kgid_t kgid); int sysfs_groups_change_owner(struct kobject *kobj, - const struct attribute_group **groups, + const struct attribute_group *const *groups, kuid_t kuid, kgid_t kgid); int sysfs_group_change_owner(struct kobject *kobj, const struct attribute_group *groups, kuid_t kuid, @@ -502,7 +502,7 @@ ssize_t sysfs_bin_attr_simple_read(struct file *file, struct kobject *kobj, #else /* CONFIG_SYSFS */ -static inline int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) +static inline int sysfs_create_dir_ns(struct kobject *kobj, const struct ns_common *ns) { return 0; } @@ -512,14 +512,14 @@ static inline void sysfs_remove_dir(struct kobject *kobj) } static inline int sysfs_rename_dir_ns(struct kobject *kobj, - const char *new_name, const void *new_ns) + const char *new_name, const struct ns_common *new_ns) { return 0; } static inline int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, - const void *new_ns) + const struct ns_common *new_ns) { return 0; } @@ -537,7 +537,7 @@ static inline void sysfs_remove_mount_point(struct kobject *parent_kobj, static inline int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, - const void *ns) + const struct ns_common *ns) { return 0; } @@ -567,7 +567,7 @@ static inline void sysfs_unbreak_active_protection(struct kernfs_node *kn) static inline void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, - const void *ns) + const struct ns_common *ns) { } @@ -612,7 +612,7 @@ static inline void sysfs_remove_link(struct kobject *kobj, const char *name) static inline int sysfs_rename_link_ns(struct kobject *k, struct kobject *t, const char *old_name, - const char *new_name, const void *ns) + const char *new_name, const struct ns_common *ns) { return 0; } @@ -629,13 +629,13 @@ static inline int sysfs_create_group(struct kobject *kobj, } static inline int sysfs_create_groups(struct kobject *kobj, - const struct attribute_group **groups) + const struct attribute_group *const *groups) { return 0; } static inline int sysfs_update_groups(struct kobject *kobj, - const struct attribute_group **groups) + const struct attribute_group *const *groups) { return 0; } @@ -652,7 +652,7 @@ static inline void sysfs_remove_group(struct kobject *kobj, } static inline void sysfs_remove_groups(struct kobject *kobj, - const struct attribute_group **groups) + const struct attribute_group *const *groups) { } @@ -733,7 +733,7 @@ static inline int sysfs_change_owner(struct kobject *kobj, kuid_t kuid, kgid_t k } static inline int sysfs_groups_change_owner(struct kobject *kobj, - const struct attribute_group **groups, + const struct attribute_group *const *groups, kuid_t kuid, kgid_t kgid) { return 0; diff --git a/include/linux/tcp.h b/include/linux/tcp.h index f72eef31fa23..6982f10e826b 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -228,8 +228,7 @@ struct tcp_sock { u32 sacked_out; /* SACK'd packets */ u16 tcp_header_len; /* Bytes of tcp header to send */ u8 scaling_ratio; /* see tcp_win_from_space() */ - u8 chrono_type : 2, /* current chronograph type */ - repair : 1, + u8 repair : 1, tcp_usec_ts : 1, /* TSval values in usec */ is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ @@ -264,6 +263,7 @@ struct tcp_sock { * total number of data bytes sent. */ u32 snd_sml; /* Last byte of the most recently transmitted small packet */ + u8 chrono_type; /* current chronograph type */ u32 chrono_start; /* Start time in jiffies of a TCP chrono */ u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ @@ -316,6 +316,9 @@ struct tcp_sock { */ u32 app_limited; /* limited until "delivered" reaches this val */ u32 rcv_wnd; /* Current receiver window */ + u32 rcv_mwnd_seq; /* Maximum window sequence number (RFC 7323, + * section 2.4, receiver requirements) + */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ /* * Options received (usually on last packet, some only on SYN packets). @@ -548,6 +551,13 @@ enum tsq_flags { TCPF_ACK_DEFERRED = BIT(TCP_ACK_DEFERRED), }; +/* Flags of interest for tcp_release_cb() */ +#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \ + TCPF_WRITE_TIMER_DEFERRED | \ + TCPF_DELACK_TIMER_DEFERRED | \ + TCPF_MTU_REDUCED_DEFERRED | \ + TCPF_ACK_DEFERRED) + #define tcp_sk(ptr) container_of_const(ptr, struct tcp_sock, inet_conn.icsk_inet.sk) /* Variant of tcp_sk() upgrading a const sock to a read/write tcp socket. diff --git a/include/linux/tee_core.h b/include/linux/tee_core.h index ee5f0bd41f43..f993d5118edd 100644 --- a/include/linux/tee_core.h +++ b/include/linux/tee_core.h @@ -50,7 +50,7 @@ enum tee_dma_heap_id { * @dev: embedded basic device structure * @cdev: embedded cdev * @num_users: number of active users of this device - * @c_no_user: completion used when unregistering the device + * @c_no_users: completion used when unregistering the device * @mutex: mutex protecting @num_users and @idr * @idr: register of user space shared memory objects allocated or * registered on this device @@ -132,6 +132,7 @@ struct tee_driver_ops { /* Size for TEE revision string buffer used by get_tee_revision(). */ #define TEE_REVISION_STR_SIZE 128 +#define TEE_DESC_PRIVILEGED 0x1 /** * struct tee_desc - Describes the TEE driver to the subsystem * @name: name of driver @@ -139,7 +140,6 @@ struct tee_driver_ops { * @owner: module providing the driver * @flags: Extra properties of driver, defined by TEE_DESC_* below */ -#define TEE_DESC_PRIVILEGED 0x1 struct tee_desc { const char *name; const struct tee_driver_ops *ops; @@ -187,7 +187,7 @@ struct tee_protmem_pool_ops { * Allocates a new struct tee_device instance. The device is * removed by tee_device_unregister(). * - * @returns a pointer to a 'struct tee_device' or an ERR_PTR on failure + * @returns: a pointer to a 'struct tee_device' or an ERR_PTR on failure */ struct tee_device *tee_device_alloc(const struct tee_desc *teedesc, struct device *dev, @@ -201,7 +201,7 @@ struct tee_device *tee_device_alloc(const struct tee_desc *teedesc, * tee_device_unregister() need to be called to remove the @teedev if * this function fails. * - * @returns < 0 on failure + * @returns: < 0 on failure */ int tee_device_register(struct tee_device *teedev); @@ -254,14 +254,14 @@ void tee_device_set_dev_groups(struct tee_device *teedev, * tee_session_calc_client_uuid() - Calculates client UUID for session * @uuid: Resulting UUID * @connection_method: Connection method for session (TEE_IOCTL_LOGIN_*) - * @connectuon_data: Connection data for opening session + * @connection_data: Connection data for opening session * * Based on connection method calculates UUIDv5 based client UUID. * * For group based logins verifies that calling process has specified * credentials. * - * @return < 0 on failure + * @returns: < 0 on failure */ int tee_session_calc_client_uuid(uuid_t *uuid, u32 connection_method, const u8 connection_data[TEE_IOCTL_UUID_LEN]); @@ -295,7 +295,7 @@ struct tee_shm_pool_ops { * @paddr: Physical address of start of pool * @size: Size in bytes of the pool * - * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure. + * @returns: pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure. */ struct tee_shm_pool *tee_shm_pool_alloc_res_mem(unsigned long vaddr, phys_addr_t paddr, size_t size, @@ -318,14 +318,16 @@ static inline void tee_shm_pool_free(struct tee_shm_pool *pool) * @paddr: Physical address of start of pool * @size: Size in bytes of the pool * - * @returns pointer to a 'struct tee_protmem_pool' or an ERR_PTR on failure. + * @returns: pointer to a 'struct tee_protmem_pool' or an ERR_PTR on failure. */ struct tee_protmem_pool *tee_protmem_static_pool_alloc(phys_addr_t paddr, size_t size); /** * tee_get_drvdata() - Return driver_data pointer - * @returns the driver_data pointer supplied to tee_register(). + * @teedev: Pointer to the tee_device + * + * @returns: the driver_data pointer supplied to tee_register(). */ void *tee_get_drvdata(struct tee_device *teedev); @@ -334,7 +336,7 @@ void *tee_get_drvdata(struct tee_device *teedev); * TEE driver * @ctx: The TEE context for shared memory allocation * @size: Shared memory allocation size - * @returns a pointer to 'struct tee_shm' on success or an ERR_PTR on failure + * @returns: a pointer to 'struct tee_shm' on success or an ERR_PTR on failure */ struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size); @@ -354,7 +356,7 @@ void tee_dyn_shm_free_helper(struct tee_shm *shm, /** * tee_shm_is_dynamic() - Check if shared memory object is of the dynamic kind * @shm: Shared memory handle - * @returns true if object is dynamic shared memory + * @returns: true if object is dynamic shared memory */ static inline bool tee_shm_is_dynamic(struct tee_shm *shm) { @@ -370,7 +372,7 @@ void tee_shm_put(struct tee_shm *shm); /** * tee_shm_get_id() - Get id of a shared memory object * @shm: Shared memory handle - * @returns id + * @returns: id */ static inline int tee_shm_get_id(struct tee_shm *shm) { @@ -382,7 +384,7 @@ static inline int tee_shm_get_id(struct tee_shm *shm) * count * @ctx: Context owning the shared memory * @id: Id of shared memory object - * @returns a pointer to 'struct tee_shm' on success or an ERR_PTR on failure + * @returns: a pointer to 'struct tee_shm' on success or an ERR_PTR on failure */ struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id); @@ -402,7 +404,7 @@ static inline bool tee_param_is_memref(struct tee_param *param) * teedev_open() - Open a struct tee_device * @teedev: Device to open * - * @return a pointer to struct tee_context on success or an ERR_PTR on failure. + * @returns: pointer to struct tee_context on success or an ERR_PTR on failure. */ struct tee_context *teedev_open(struct tee_device *teedev); diff --git a/include/linux/tegra-mipi-cal.h b/include/linux/tegra-mipi-cal.h new file mode 100644 index 000000000000..2a540b50f65d --- /dev/null +++ b/include/linux/tegra-mipi-cal.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __TEGRA_MIPI_CAL_H_ +#define __TEGRA_MIPI_CAL_H_ + +struct tegra_mipi_device { + const struct tegra_mipi_ops *ops; + struct platform_device *pdev; + unsigned long pads; +}; + +/** + * Operations for Tegra MIPI calibration device + */ +struct tegra_mipi_ops { + /** + * @enable: + * + * Enable MIPI calibration device + */ + int (*enable)(struct tegra_mipi_device *device); + + /** + * @disable: + * + * Disable MIPI calibration device + */ + int (*disable)(struct tegra_mipi_device *device); + + /** + * @start_calibration: + * + * Start MIPI calibration + */ + int (*start_calibration)(struct tegra_mipi_device *device); + + /** + * @finish_calibration: + * + * Finish MIPI calibration + */ + int (*finish_calibration)(struct tegra_mipi_device *device); +}; + +int devm_tegra_mipi_add_provider(struct device *device, struct device_node *np, + const struct tegra_mipi_ops *ops); + +struct tegra_mipi_device *tegra_mipi_request(struct device *device, + struct device_node *np); +void tegra_mipi_free(struct tegra_mipi_device *device); + +int tegra_mipi_enable(struct tegra_mipi_device *device); +int tegra_mipi_disable(struct tegra_mipi_device *device); +int tegra_mipi_start_calibration(struct tegra_mipi_device *device); +int tegra_mipi_finish_calibration(struct tegra_mipi_device *device); + +#endif /* __TEGRA_MIPI_CAL_H_ */ diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 0b5ed6821080..0ddc77aeeca2 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -273,6 +273,9 @@ bool thermal_trip_is_bound_to_cdev(struct thermal_zone_device *tz, int thermal_zone_device_enable(struct thermal_zone_device *tz); int thermal_zone_device_disable(struct thermal_zone_device *tz); void thermal_zone_device_critical(struct thermal_zone_device *tz); + +void thermal_pm_prepare(void); +void thermal_pm_complete(void); #else static inline struct thermal_zone_device *thermal_zone_device_register_with_trips( const char *type, @@ -350,6 +353,9 @@ static inline int thermal_zone_device_enable(struct thermal_zone_device *tz) static inline int thermal_zone_device_disable(struct thermal_zone_device *tz) { return -ENODEV; } + +static inline void thermal_pm_prepare(void) {} +static inline void thermal_pm_complete(void) {} #endif /* CONFIG_THERMAL */ #endif /* __THERMAL_H__ */ diff --git a/include/linux/timb_gpio.h b/include/linux/timb_gpio.h index 3faf5a6bb13e..74f5e73bf6db 100644 --- a/include/linux/timb_gpio.h +++ b/include/linux/timb_gpio.h @@ -9,10 +9,10 @@ /** * struct timbgpio_platform_data - Platform data of the Timberdale GPIO driver - * @gpio_base The number of the first GPIO pin, set to -1 for + * @gpio_base: The number of the first GPIO pin, set to -1 for * dynamic number allocation. - * @nr_pins Number of pins that is supported by the hardware (1-32) - * @irq_base If IRQ is supported by the hardware, this is the base + * @nr_pins: Number of pins that is supported by the hardware (1-32) + * @irq_base: If IRQ is supported by the hardware, this is the base * number of IRQ:s. One IRQ per pin will be used. Set to * -1 if IRQ:s is not supported. */ diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index c514d0e5a45c..58bd9728df58 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -8,6 +8,7 @@ #include <linux/ns_common.h> #include <linux/err.h> #include <linux/time64.h> +#include <linux/cleanup.h> struct user_namespace; extern struct user_namespace init_user_ns; @@ -25,7 +26,9 @@ struct time_namespace { struct ucounts *ucounts; struct ns_common ns; struct timens_offsets offsets; +#ifdef CONFIG_TIME_NS_VDSO struct page *vvar_page; +#endif /* If set prevents changing offsets after any task joined namespace. */ bool frozen_offsets; } __randomize_layout; @@ -38,9 +41,6 @@ static inline struct time_namespace *to_time_ns(struct ns_common *ns) return container_of(ns, struct time_namespace, ns); } void __init time_ns_init(void); -extern int vdso_join_timens(struct task_struct *task, - struct time_namespace *ns); -extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns); static inline struct time_namespace *get_time_ns(struct time_namespace *ns) { @@ -53,7 +53,6 @@ struct time_namespace *copy_time_ns(u64 flags, struct time_namespace *old_ns); void free_time_ns(struct time_namespace *ns); void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk); -struct page *find_timens_vvar_page(struct vm_area_struct *vma); static inline void put_time_ns(struct time_namespace *ns) { @@ -117,17 +116,6 @@ static inline void __init time_ns_init(void) { } -static inline int vdso_join_timens(struct task_struct *task, - struct time_namespace *ns) -{ - return 0; -} - -static inline void timens_commit(struct task_struct *tsk, - struct time_namespace *ns) -{ -} - static inline struct time_namespace *get_time_ns(struct time_namespace *ns) { return NULL; @@ -154,11 +142,6 @@ static inline void timens_on_fork(struct nsproxy *nsproxy, return; } -static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma) -{ - return NULL; -} - static inline void timens_add_monotonic(struct timespec64 *ts) { } static inline void timens_add_boottime(struct timespec64 *ts) { } @@ -175,4 +158,20 @@ static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim) } #endif +#ifdef CONFIG_TIME_NS_VDSO +extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns); +struct page *find_timens_vvar_page(struct vm_area_struct *vma); +#else /* !CONFIG_TIME_NS_VDSO */ +static inline void timens_commit(struct task_struct *tsk, struct time_namespace *ns) +{ +} + +static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + return NULL; +} +#endif /* CONFIG_TIME_NS_VDSO */ + +DEFINE_FREE(time_ns, struct time_namespace *, if (_T) put_time_ns(_T)) + #endif /* _LINUX_TIMENS_H */ diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index b8ae89ea28ab..e36d11e33e0c 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -72,6 +72,10 @@ struct tk_read_base { * @id: The timekeeper ID * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds + * @cs_id: The ID of the current clocksource + * @cs_ns_to_cyc_mult: Multiplicator for nanoseconds to cycles conversion + * @cs_ns_to_cyc_shift: Shift value for nanoseconds to cycles conversion + * @cs_ns_to_cyc_maxns: Maximum nanoseconds to cyles conversion range * @clock_was_set_seq: The sequence number of clock was set events * @cs_was_changed_seq: The sequence number of clocksource change events * @clock_valid: Indicator for valid clock @@ -159,6 +163,10 @@ struct timekeeper { u64 raw_sec; /* Cachline 3 and 4 (timekeeping internal variables): */ + enum clocksource_ids cs_id; + u32 cs_ns_to_cyc_mult; + u32 cs_ns_to_cyc_shift; + u64 cs_ns_to_cyc_maxns; unsigned int clock_was_set_seq; u8 cs_was_changed_seq; u8 clock_valid; diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h index d306d9dd2207..7d0aaa766580 100644 --- a/include/linux/timerqueue.h +++ b/include/linux/timerqueue.h @@ -5,12 +5,11 @@ #include <linux/rbtree.h> #include <linux/timerqueue_types.h> -extern bool timerqueue_add(struct timerqueue_head *head, - struct timerqueue_node *node); -extern bool timerqueue_del(struct timerqueue_head *head, - struct timerqueue_node *node); -extern struct timerqueue_node *timerqueue_iterate_next( - struct timerqueue_node *node); +bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node); +bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node); +struct timerqueue_node *timerqueue_iterate_next(struct timerqueue_node *node); + +bool timerqueue_linked_add(struct timerqueue_linked_head *head, struct timerqueue_linked_node *node); /** * timerqueue_getnext - Returns the timer with the earliest expiration time @@ -19,8 +18,7 @@ extern struct timerqueue_node *timerqueue_iterate_next( * * Returns a pointer to the timer node that has the earliest expiration time. */ -static inline -struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head) +static inline struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head) { struct rb_node *leftmost = rb_first_cached(&head->rb_root); @@ -41,4 +39,46 @@ static inline void timerqueue_init_head(struct timerqueue_head *head) { head->rb_root = RB_ROOT_CACHED; } + +/* Timer queues with linked nodes */ + +static __always_inline +struct timerqueue_linked_node *timerqueue_linked_first(struct timerqueue_linked_head *head) +{ + return rb_entry_safe(head->rb_root.rb_leftmost, struct timerqueue_linked_node, node); +} + +static __always_inline +struct timerqueue_linked_node *timerqueue_linked_next(struct timerqueue_linked_node *node) +{ + return rb_entry_safe(node->node.next, struct timerqueue_linked_node, node); +} + +static __always_inline +struct timerqueue_linked_node *timerqueue_linked_prev(struct timerqueue_linked_node *node) +{ + return rb_entry_safe(node->node.prev, struct timerqueue_linked_node, node); +} + +static __always_inline +bool timerqueue_linked_del(struct timerqueue_linked_head *head, struct timerqueue_linked_node *node) +{ + return rb_erase_linked(&node->node, &head->rb_root); +} + +static __always_inline void timerqueue_linked_init(struct timerqueue_linked_node *node) +{ + RB_CLEAR_LINKED_NODE(&node->node); +} + +static __always_inline bool timerqueue_linked_node_queued(struct timerqueue_linked_node *node) +{ + return !RB_EMPTY_LINKED_NODE(&node->node); +} + +static __always_inline void timerqueue_linked_init_head(struct timerqueue_linked_head *head) +{ + head->rb_root = RB_ROOT_LINKED; +} + #endif /* _LINUX_TIMERQUEUE_H */ diff --git a/include/linux/timerqueue_types.h b/include/linux/timerqueue_types.h index dc298d0923e3..be2218b147c4 100644 --- a/include/linux/timerqueue_types.h +++ b/include/linux/timerqueue_types.h @@ -6,12 +6,21 @@ #include <linux/types.h> struct timerqueue_node { - struct rb_node node; - ktime_t expires; + struct rb_node node; + ktime_t expires; }; struct timerqueue_head { - struct rb_root_cached rb_root; + struct rb_root_cached rb_root; +}; + +struct timerqueue_linked_node { + struct rb_node_linked node; + ktime_t expires; +}; + +struct timerqueue_linked_head { + struct rb_root_linked rb_root; }; #endif /* _LINUX_TIMERQUEUE_TYPES_H */ diff --git a/include/linux/tnum.h b/include/linux/tnum.h index fa4654ffb621..ca2cfec8de08 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h @@ -131,4 +131,7 @@ static inline bool tnum_subreg_is_const(struct tnum a) return !(tnum_subreg(a)).mask; } +/* Returns the smallest member of t larger than z */ +u64 tnum_step(struct tnum t, u64 z); + #endif /* _LINUX_TNUM_H */ diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 37eb2f0f3dd8..40a43a4c7caf 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -22,20 +22,23 @@ union bpf_attr; const char *trace_print_flags_seq(struct trace_seq *p, const char *delim, unsigned long flags, - const struct trace_print_flags *flag_array); + const struct trace_print_flags *flag_array, + size_t flag_array_size); const char *trace_print_symbols_seq(struct trace_seq *p, unsigned long val, - const struct trace_print_flags *symbol_array); + const struct trace_print_flags *symbol_array, + size_t symbol_array_size); #if BITS_PER_LONG == 32 const char *trace_print_flags_seq_u64(struct trace_seq *p, const char *delim, unsigned long long flags, - const struct trace_print_flags_u64 *flag_array); + const struct trace_print_flags_u64 *flag_array, + size_t flag_array_size); const char *trace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, - const struct trace_print_flags_u64 - *symbol_array); + const struct trace_print_flags_u64 *symbol_array, + size_t symbol_array_size); #endif struct trace_iterator; diff --git a/include/linux/trace_printk.h b/include/linux/trace_printk.h index bb5874097f24..2670ec7f4262 100644 --- a/include/linux/trace_printk.h +++ b/include/linux/trace_printk.h @@ -107,7 +107,6 @@ do { \ __trace_printk(_THIS_IP_, fmt, ##args); \ } while (0) -extern __printf(2, 3) int __trace_bprintk(unsigned long ip, const char *fmt, ...); extern __printf(2, 3) diff --git a/include/linux/trace_remote.h b/include/linux/trace_remote.h new file mode 100644 index 000000000000..fcd1d46ea466 --- /dev/null +++ b/include/linux/trace_remote.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_TRACE_REMOTE_H +#define _LINUX_TRACE_REMOTE_H + +#include <linux/dcache.h> +#include <linux/ring_buffer.h> +#include <linux/trace_remote_event.h> + +/** + * struct trace_remote_callbacks - Callbacks used by Tracefs to control the remote + * @init: Called once the remote has been registered. Allows the + * caller to extend the Tracefs remote directory + * @load_trace_buffer: Called before Tracefs accesses the trace buffer for the first + * time. Must return a &trace_buffer_desc + * (most likely filled with trace_remote_alloc_buffer()) + * @unload_trace_buffer: + * Called once Tracefs has no use for the trace buffer + * (most likely call trace_remote_free_buffer()) + * @enable_tracing: Called on Tracefs tracing_on. It is expected from the + * remote to allow writing. + * @swap_reader_page: Called when Tracefs consumes a new page from a + * ring-buffer. It is expected from the remote to isolate a + * @reset: Called on `echo 0 > trace`. It is expected from the + * remote to reset all ring-buffer pages. + * new reader-page from the @cpu ring-buffer. + * @enable_event: Called on events/event_name/enable. It is expected from + * the remote to allow the writing event @id. + */ +struct trace_remote_callbacks { + int (*init)(struct dentry *d, void *priv); + struct trace_buffer_desc *(*load_trace_buffer)(unsigned long size, void *priv); + void (*unload_trace_buffer)(struct trace_buffer_desc *desc, void *priv); + int (*enable_tracing)(bool enable, void *priv); + int (*swap_reader_page)(unsigned int cpu, void *priv); + int (*reset)(unsigned int cpu, void *priv); + int (*enable_event)(unsigned short id, bool enable, void *priv); +}; + +int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv, + struct remote_event *events, size_t nr_events); + +int trace_remote_alloc_buffer(struct trace_buffer_desc *desc, size_t desc_size, size_t buffer_size, + const struct cpumask *cpumask); + +void trace_remote_free_buffer(struct trace_buffer_desc *desc); + +#endif diff --git a/include/linux/trace_remote_event.h b/include/linux/trace_remote_event.h new file mode 100644 index 000000000000..c8ae1e1f5e72 --- /dev/null +++ b/include/linux/trace_remote_event.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_TRACE_REMOTE_EVENTS_H +#define _LINUX_TRACE_REMOTE_EVENTS_H + +struct trace_remote; +struct trace_event_fields; +struct trace_seq; + +struct remote_event_hdr { + unsigned short id; +}; + +#define REMOTE_EVENT_NAME_MAX 30 +struct remote_event { + char name[REMOTE_EVENT_NAME_MAX]; + unsigned short id; + bool enabled; + struct trace_remote *remote; + struct trace_event_fields *fields; + char *print_fmt; + void (*print)(void *evt, struct trace_seq *seq); +}; + +#define RE_STRUCT(__args...) __args +#define re_field(__type, __field) __type __field; + +#define REMOTE_EVENT_FORMAT(__name, __struct) \ + struct remote_event_format_##__name { \ + struct remote_event_hdr hdr; \ + __struct \ + } +#endif diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 22ca1c8b54f3..578e520b6ee6 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -122,6 +122,22 @@ static inline bool tracepoint_is_faultable(struct tracepoint *tp) { return tp->ext && tp->ext->faultable; } +/* + * Run RCU callback with the appropriate grace period wait for non-faultable + * tracepoints, e.g., those used in atomic context. + */ +static inline void call_tracepoint_unregister_atomic(struct rcu_head *rcu, rcu_callback_t func) +{ + call_srcu(&tracepoint_srcu, rcu, func); +} +/* + * Run RCU callback with the appropriate grace period wait for faultable + * tracepoints, e.g., those used in syscall context. + */ +static inline void call_tracepoint_unregister_syscall(struct rcu_head *rcu, rcu_callback_t func) +{ + call_rcu_tasks_trace(rcu, func); +} #else static inline void tracepoint_synchronize_unregister(void) { } @@ -129,6 +145,10 @@ static inline bool tracepoint_is_faultable(struct tracepoint *tp) { return false; } +static inline void call_tracepoint_unregister_atomic(struct rcu_head *rcu, rcu_callback_t func) +{ } +static inline void call_tracepoint_unregister_syscall(struct rcu_head *rcu, rcu_callback_t func) +{ } #endif #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS @@ -294,6 +314,10 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) WARN_ONCE(!rcu_is_watching(), \ "RCU not watching for tracepoint"); \ } \ + } \ + static inline void trace_call__##name(proto) \ + { \ + __do_trace_##name(args); \ } #define __DECLARE_TRACE_SYSCALL(name, proto, args, data_proto) \ @@ -313,6 +337,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) WARN_ONCE(!rcu_is_watching(), \ "RCU not watching for tracepoint"); \ } \ + } \ + static inline void trace_call__##name(proto) \ + { \ + might_fault(); \ + __do_trace_##name(args); \ } /* @@ -398,6 +427,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define __DECLARE_TRACE_COMMON(name, proto, args, data_proto) \ static inline void trace_##name(proto) \ { } \ + static inline void trace_call__##name(proto) \ + { } \ static inline int \ register_trace_##name(void (*probe)(data_proto), \ void *data) \ diff --git a/include/linux/types.h b/include/linux/types.h index 7e71d260763c..608050dbca6a 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -239,7 +239,7 @@ struct ustat { * * This guarantee is important for few reasons: * - future call_rcu_lazy() will make use of lower bits in the pointer; - * - the structure shares storage space in struct page with @compound_head, + * - the structure shares storage space in struct page with @compound_info, * which encode PageTail() in bit 0. The guarantee is needed to avoid * false-positive PageTail(). */ diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 1f3804245c06..56328601218c 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -331,16 +331,21 @@ static inline size_t probe_subpage_writeable(char __user *uaddr, size_t size) #endif /* CONFIG_ARCH_HAS_SUBPAGE_FAULTS */ -#ifndef ARCH_HAS_NOCACHE_UACCESS +#ifndef ARCH_HAS_NONTEMPORAL_UACCESS static inline __must_check unsigned long -__copy_from_user_inatomic_nocache(void *to, const void __user *from, +copy_from_user_inatomic_nontemporal(void *to, const void __user *from, unsigned long n) { + if (can_do_masked_user_access()) + from = mask_user_address(from); + else + if (!access_ok(from, n)) + return n; return __copy_from_user_inatomic(to, from, n); } -#endif /* ARCH_HAS_NOCACHE_UACCESS */ +#endif /* ARCH_HAS_NONTEMPORAL_UACCESS */ extern __must_check int check_zeroed_user(const void __user *from, size_t size); @@ -647,36 +652,22 @@ static inline void user_access_restore(unsigned long flags) { } /* Define RW variant so the below _mode macro expansion works */ #define masked_user_rw_access_begin(u) masked_user_access_begin(u) #define user_rw_access_begin(u, s) user_access_begin(u, s) -#define user_rw_access_end() user_access_end() /* Scoped user access */ -#define USER_ACCESS_GUARD(_mode) \ -static __always_inline void __user * \ -class_user_##_mode##_begin(void __user *ptr) \ -{ \ - return ptr; \ -} \ - \ -static __always_inline void \ -class_user_##_mode##_end(void __user *ptr) \ -{ \ - user_##_mode##_access_end(); \ -} \ - \ -DEFINE_CLASS(user_ ##_mode## _access, void __user *, \ - class_user_##_mode##_end(_T), \ - class_user_##_mode##_begin(ptr), void __user *ptr) \ - \ -static __always_inline class_user_##_mode##_access_t \ -class_user_##_mode##_access_ptr(void __user *scope) \ -{ \ - return scope; \ -} -USER_ACCESS_GUARD(read) -USER_ACCESS_GUARD(write) -USER_ACCESS_GUARD(rw) -#undef USER_ACCESS_GUARD +/* Cleanup wrapper functions */ +static __always_inline void __scoped_user_read_access_end(const void *p) +{ + user_read_access_end(); +}; +static __always_inline void __scoped_user_write_access_end(const void *p) +{ + user_write_access_end(); +}; +static __always_inline void __scoped_user_rw_access_end(const void *p) +{ + user_access_end(); +}; /** * __scoped_user_access_begin - Start a scoped user access @@ -750,13 +741,13 @@ USER_ACCESS_GUARD(rw) * * Don't use directly. Use scoped_masked_user_$MODE_access() instead. */ -#define __scoped_user_access(mode, uptr, size, elbl) \ -for (bool done = false; !done; done = true) \ - for (void __user *_tmpptr = __scoped_user_access_begin(mode, uptr, size, elbl); \ - !done; done = true) \ - for (CLASS(user_##mode##_access, scope)(_tmpptr); !done; done = true) \ - /* Force modified pointer usage within the scope */ \ - for (const typeof(uptr) uptr = _tmpptr; !done; done = true) +#define __scoped_user_access(mode, uptr, size, elbl) \ +for (bool done = false; !done; done = true) \ + for (auto _tmpptr = __scoped_user_access_begin(mode, uptr, size, elbl); \ + !done; done = true) \ + /* Force modified pointer usage within the scope */ \ + for (const auto uptr __cleanup(__scoped_user_##mode##_access_end) = \ + _tmpptr; !done; done = true) /** * scoped_user_read_access_size - Start a scoped user read access with given size @@ -806,7 +797,7 @@ for (bool done = false; !done; done = true) \ /** * scoped_user_rw_access_size - Start a scoped user read/write access with given size - * @uptr Pointer to the user space address to read from and write to + * @uptr: Pointer to the user space address to read from and write to * @size: Size of the access starting from @uptr * @elbl: Error label to goto when the access region is rejected * @@ -817,7 +808,7 @@ for (bool done = false; !done; done = true) \ /** * scoped_user_rw_access - Start a scoped user read/write access - * @uptr Pointer to the user space address to read from and write to + * @uptr: Pointer to the user space address to read from and write to * @elbl: Error label to goto when the access region is rejected * * The size of the access starting from @uptr is determined via sizeof(*@uptr)). diff --git a/include/linux/udp.h b/include/linux/udp.h index 1cbf6b4d3aab..ce56ebcee5cb 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -40,8 +40,6 @@ enum { UDP_FLAGS_ACCEPT_FRAGLIST, UDP_FLAGS_ACCEPT_L4, UDP_FLAGS_ENCAP_ENABLED, /* This socket enabled encap */ - UDP_FLAGS_UDPLITE_SEND_CC, /* set via udplite setsockopt */ - UDP_FLAGS_UDPLITE_RECV_CC, /* set via udplite setsockopt */ }; /* per NUMA structure for lockless producer usage. */ @@ -74,11 +72,7 @@ struct udp_sock { */ __u16 len; /* total length of pending frames */ __u16 gso_size; - /* - * Fields specific to UDP-Lite. - */ - __u16 pcslen; - __u16 pcrlen; + /* * For encapsulation sockets. */ @@ -236,8 +230,6 @@ static inline void udp_allow_gso(struct sock *sk) hlist_nulls_for_each_entry_rcu(__up, node, list, udp_lrpa_node) #endif -#define IS_UDPLITE(__sk) (unlikely(__sk->sk_protocol == IPPROTO_UDPLITE)) - static inline struct sock *udp_tunnel_sk(const struct net *net, bool is_ipv6) { #if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 334641e20fb1..02eaac47ac44 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -97,7 +97,7 @@ struct uio_device { * @irq_flags: flags for request_irq() * @priv: optional private data * @handler: the device's irq handler - * @mmap: mmap operation for this uio device + * @mmap_prepare: mmap_prepare operation for this uio device * @open: open operation for this uio device * @release: release operation for this uio device * @irqcontrol: disable/enable irqs when 0/1 is written to /dev/uioX @@ -112,7 +112,7 @@ struct uio_info { unsigned long irq_flags; void *priv; irqreturn_t (*handler)(int irq, struct uio_info *dev_info); - int (*mmap)(struct uio_info *info, struct vm_area_struct *vma); + int (*mmap_prepare)(struct uio_info *info, struct vm_area_desc *desc); int (*open)(struct uio_info *info, struct inode *inode); int (*release)(struct uio_info *info, struct inode *inode); int (*irqcontrol)(struct uio_info *info, s32 irq_on); diff --git a/include/linux/units.h b/include/linux/units.h index 80d57c50b9e3..c6d78988613a 100644 --- a/include/linux/units.h +++ b/include/linux/units.h @@ -57,6 +57,9 @@ #define MICROWATT_PER_MILLIWATT 1000UL #define MICROWATT_PER_WATT 1000000UL +#define MICROJOULE_PER_JOULE 1000000UL +#define NANOJOULE_PER_JOULE 1000000000UL + #define BYTES_PER_KBIT (KILO / BITS_PER_BYTE) #define BYTES_PER_MBIT (MEGA / BITS_PER_BYTE) #define BYTES_PER_GBIT (GIGA / BITS_PER_BYTE) diff --git a/include/linux/usb.h b/include/linux/usb.h index fbfcc70b07fb..4aab20015851 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -21,6 +21,7 @@ #include <linux/completion.h> /* for struct completion */ #include <linux/sched.h> /* for current && schedule_timeout */ #include <linux/mutex.h> /* for struct mutex */ +#include <linux/spinlock.h> /* for spinlock_t */ #include <linux/pm_runtime.h> /* for runtime PM */ struct usb_device; @@ -636,8 +637,9 @@ struct usb3_lpm_parameters { * @do_remote_wakeup: remote wakeup should be enabled * @reset_resume: needs reset instead of resume * @port_is_suspended: the upstream port is suspended (L2 or U3) - * @offload_at_suspend: offload activities during suspend is enabled. + * @offload_pm_locked: prevents offload_usage changes during PM transitions. * @offload_usage: number of offload activities happening on this usb device. + * @offload_lock: protects offload_usage and offload_pm_locked * @slot_id: Slot ID assigned by xHCI * @l1_params: best effor service latency for USB2 L1 LPM state, and L1 timeout. * @u1_params: exit latencies for USB3 U1 LPM state, and hub-initiated timeout. @@ -726,8 +728,9 @@ struct usb_device { unsigned do_remote_wakeup:1; unsigned reset_resume:1; unsigned port_is_suspended:1; - unsigned offload_at_suspend:1; + unsigned offload_pm_locked:1; int offload_usage; + spinlock_t offload_lock; enum usb_link_tunnel_mode tunnel_mode; struct device_link *usb4_link; @@ -849,6 +852,7 @@ static inline void usb_mark_last_busy(struct usb_device *udev) int usb_offload_get(struct usb_device *udev); int usb_offload_put(struct usb_device *udev); bool usb_offload_check(struct usb_device *udev); +void usb_offload_set_pm_locked(struct usb_device *udev, bool locked); #else static inline int usb_offload_get(struct usb_device *udev) @@ -857,6 +861,8 @@ static inline int usb_offload_put(struct usb_device *udev) { return 0; } static inline bool usb_offload_check(struct usb_device *udev) { return false; } +static inline void usb_offload_set_pm_locked(struct usb_device *udev, bool locked) +{ } #endif extern int usb_disable_lpm(struct usb_device *udev); @@ -1862,14 +1868,18 @@ void usb_free_noncoherent(struct usb_device *dev, size_t size, * SYNCHRONOUS CALL SUPPORT * *-------------------------------------------------------------------*/ +/* Maximum value allowed for timeout in synchronous routines below */ +#define USB_MAX_SYNCHRONOUS_TIMEOUT 60000 /* ms */ + extern int usb_control_msg(struct usb_device *dev, unsigned int pipe, __u8 request, __u8 requesttype, __u16 value, __u16 index, void *data, __u16 size, int timeout); extern int usb_interrupt_msg(struct usb_device *usb_dev, unsigned int pipe, void *data, int len, int *actual_length, int timeout); extern int usb_bulk_msg(struct usb_device *usb_dev, unsigned int pipe, - void *data, int len, int *actual_length, - int timeout); + void *data, int len, int *actual_length, int timeout); +extern int usb_bulk_msg_killable(struct usb_device *usb_dev, unsigned int pipe, + void *data, int len, int *actual_length, int timeout); /* wrappers around usb_control_msg() for the most common standard requests */ int usb_control_msg_send(struct usb_device *dev, __u8 endpoint, __u8 request, diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index 2f7bd2fdc616..b3cc7beab4a3 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -78,4 +78,7 @@ /* skip BOS descriptor request */ #define USB_QUIRK_NO_BOS BIT(17) +/* Device claims zero configurations, forcing to 1 */ +#define USB_QUIRK_FORCE_ONE_CONFIG BIT(18) + #endif /* __LINUX_USB_QUIRKS_H */ diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h index 2ca60828f28b..1502b2a355f9 100644 --- a/include/linux/usb/r8152.h +++ b/include/linux/usb/r8152.h @@ -32,6 +32,7 @@ #define VENDOR_ID_DLINK 0x2001 #define VENDOR_ID_DELL 0x413c #define VENDOR_ID_ASUS 0x0b05 +#define VENDOR_ID_TRENDNET 0x20f4 #if IS_REACHABLE(CONFIG_USB_RTL8152) extern u8 rtl8152_get_version(struct usb_interface *intf); diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index b0e84896e6ac..bbf799ccf3b3 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -132,6 +132,7 @@ struct driver_info { #define FLAG_MULTI_PACKET 0x2000 #define FLAG_RX_ASSEMBLE 0x4000 /* rx packets may span >1 frames */ #define FLAG_NOARP 0x8000 /* device can't do ARP */ +#define FLAG_NOMAXMTU 0x10000 /* allow max_mtu above hard_mtu */ /* init device ... can sleep, or cause probe() failure */ int (*bind)(struct usbnet *, struct usb_interface *); diff --git a/include/linux/usb/uvc.h b/include/linux/usb/uvc.h index ea92ac623a45..05bfebab42b6 100644 --- a/include/linux/usb/uvc.h +++ b/include/linux/usb/uvc.h @@ -138,6 +138,9 @@ #define UVC_GUID_FORMAT_M420 \ { 'M', '4', '2', '0', 0x00, 0x00, 0x10, 0x00, \ 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_P010 \ + { 'P', '0', '1', '0', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} #define UVC_GUID_FORMAT_H264 \ { 'H', '2', '6', '4', 0x00, 0x00, 0x10, 0x00, \ diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index fd5f42765497..d83e349900a3 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -23,6 +23,9 @@ /* The set of all possible UFFD-related VM flags. */ #define __VM_UFFD_FLAGS (VM_UFFD_MISSING | VM_UFFD_WP | VM_UFFD_MINOR) +#define __VMA_UFFD_FLAGS mk_vma_flags(VMA_UFFD_MISSING_BIT, VMA_UFFD_WP_BIT, \ + VMA_UFFD_MINOR_BIT) + /* * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining * new flags, since they might collide with O_* ones. We want diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 2bfe3baa63f4..782c42d25db1 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -72,9 +72,6 @@ struct vdpa_mgmt_dev; * struct vdpa_device - representation of a vDPA device * @dev: underlying device * @vmap: the metadata passed to upper layer to be used for mapping - * @driver_override: driver name to force a match; do not set directly, - * because core frees it; use driver_set_override() to - * set or clear it. * @config: the configuration ops for this device. * @map: the map ops for this device * @cf_lock: Protects get and set access to configuration layout. @@ -90,7 +87,6 @@ struct vdpa_mgmt_dev; struct vdpa_device { struct device dev; union virtio_map vmap; - const char *driver_override; const struct vdpa_config_ops *config; const struct virtio_map_ops *map; struct rw_semaphore cf_lock; /* Protects get/set config */ diff --git a/include/linux/vdso_datastore.h b/include/linux/vdso_datastore.h index a91fa24b06e0..0b530428db71 100644 --- a/include/linux/vdso_datastore.h +++ b/include/linux/vdso_datastore.h @@ -2,9 +2,15 @@ #ifndef _LINUX_VDSO_DATASTORE_H #define _LINUX_VDSO_DATASTORE_H +#ifdef CONFIG_HAVE_GENERIC_VDSO #include <linux/mm_types.h> extern const struct vm_special_mapping vdso_vvar_mapping; struct vm_area_struct *vdso_install_vvar_mapping(struct mm_struct *mm, unsigned long addr); +void __init vdso_setup_data_pages(void); +#else /* !CONFIG_HAVE_GENERIC_VDSO */ +static inline void vdso_setup_data_pages(void) { } +#endif /* CONFIG_HAVE_GENERIC_VDSO */ + #endif /* _LINUX_VDSO_DATASTORE_H */ diff --git a/include/linux/vfio.h b/include/linux/vfio.h index e90859956514..31b826efba00 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -16,6 +16,7 @@ #include <linux/cdev.h> #include <uapi/linux/vfio.h> #include <linux/iova_bitmap.h> +#include <linux/uaccess.h> struct kvm; struct iommufd_ctx; @@ -52,6 +53,7 @@ struct vfio_device { struct vfio_device_set *dev_set; struct list_head dev_set_list; unsigned int migration_flags; + u8 precopy_info_v2; struct kvm *kvm; /* Members below here are private, not for driver use */ @@ -72,13 +74,11 @@ struct vfio_device { u8 iommufd_attached:1; #endif u8 cdev_opened:1; -#ifdef CONFIG_DEBUG_FS /* * debug_root is a static property of the vfio_device * which must be set prior to registering the vfio_device. */ struct dentry *debug_root; -#endif }; /** @@ -284,6 +284,44 @@ static inline int vfio_check_feature(u32 flags, size_t argsz, u32 supported_ops, return 1; } +/** + * vfio_check_precopy_ioctl - Validate user input for the VFIO_MIG_GET_PRECOPY_INFO ioctl + * @vdev: The vfio device + * @cmd: Cmd from the ioctl + * @arg: Arg from the ioctl + * @info: Driver pointer to hold the userspace input to the ioctl + * + * For use in a driver's get_precopy_info. Checks that the inputs to the + * VFIO_MIG_GET_PRECOPY_INFO ioctl are correct. + + * Returns 0 on success, otherwise errno. + */ + +static inline int +vfio_check_precopy_ioctl(struct vfio_device *vdev, unsigned int cmd, + unsigned long arg, struct vfio_precopy_info *info) +{ + unsigned long minsz; + + if (cmd != VFIO_MIG_GET_PRECOPY_INFO) + return -ENOTTY; + + minsz = offsetofend(struct vfio_precopy_info, dirty_bytes); + + if (copy_from_user(info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info->argsz < minsz) + return -EINVAL; + + /* keep v1 behaviour as is for compatibility reasons */ + if (vdev->precopy_info_v2) + /* flags are output, set its initial value to 0 */ + info->flags = 0; + + return 0; +} + struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, const struct vfio_device_ops *ops); #define vfio_alloc_device(dev_struct, member, dev, ops) \ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 75dabb763c65..f36d21b5bc19 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -207,6 +207,39 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, return __virtio_net_hdr_to_skb(skb, hdr, little_endian, hdr->gso_type); } +/* This function must be called after virtio_net_hdr_from_skb(). */ +static inline void __virtio_net_set_hdrlen(const struct sk_buff *skb, + struct virtio_net_hdr *hdr, + bool little_endian) +{ + u16 hdr_len; + + hdr_len = skb_transport_offset(skb); + + if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP_L4) + hdr_len += sizeof(struct udphdr); + else + hdr_len += tcp_hdrlen(skb); + + hdr->hdr_len = __cpu_to_virtio16(little_endian, hdr_len); +} + +/* This function must be called after virtio_net_hdr_from_skb(). */ +static inline void __virtio_net_set_tnl_hdrlen(const struct sk_buff *skb, + struct virtio_net_hdr *hdr) +{ + u16 hdr_len; + + hdr_len = skb_inner_transport_offset(skb); + + if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP_L4) + hdr_len += sizeof(struct udphdr); + else + hdr_len += inner_tcp_hdrlen(skb); + + hdr->hdr_len = __cpu_to_virtio16(true, hdr_len); +} + static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, struct virtio_net_hdr *hdr, bool little_endian, @@ -385,7 +418,8 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, bool tnl_hdr_negotiated, bool little_endian, int vlan_hlen, - bool has_data_valid) + bool has_data_valid, + bool feature_hdrlen) { struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)vhdr; unsigned int inner_nh, outer_th; @@ -394,9 +428,17 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, tnl_gso_type = skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM); - if (!tnl_gso_type) - return virtio_net_hdr_from_skb(skb, hdr, little_endian, - has_data_valid, vlan_hlen); + if (!tnl_gso_type) { + ret = virtio_net_hdr_from_skb(skb, hdr, little_endian, + has_data_valid, vlan_hlen); + if (ret) + return ret; + + if (feature_hdrlen && hdr->hdr_len) + __virtio_net_set_hdrlen(skb, hdr, little_endian); + + return ret; + } /* Tunnel support not negotiated but skb ask for it. */ if (!tnl_hdr_negotiated) @@ -414,6 +456,9 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, if (ret) return ret; + if (feature_hdrlen && hdr->hdr_len) + __virtio_net_set_tnl_hdrlen(skb, hdr); + if (skb->protocol == htons(ETH_P_IPV6)) hdr->gso_type |= VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6; else diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 22a139f82d75..03fe95f5a020 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -38,21 +38,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, PGFREE, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE, PGFAULT, PGMAJFAULT, PGLAZYFREED, - PGREFILL, PGREUSE, - PGSTEAL_KSWAPD, - PGSTEAL_DIRECT, - PGSTEAL_KHUGEPAGED, - PGSTEAL_PROACTIVE, - PGSCAN_KSWAPD, - PGSCAN_DIRECT, - PGSCAN_KHUGEPAGED, - PGSCAN_PROACTIVE, PGSCAN_DIRECT_THROTTLE, - PGSCAN_ANON, - PGSCAN_FILE, - PGSTEAL_ANON, - PGSTEAL_FILE, #ifdef CONFIG_NUMA PGSCAN_ZONE_RECLAIM_SUCCESS, PGSCAN_ZONE_RECLAIM_FAILED, diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index e8e94f90d686..3b02c0c6b371 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -286,8 +286,6 @@ int unregister_vmap_purge_notifier(struct notifier_block *nb); #ifdef CONFIG_MMU #define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START) -unsigned long vmalloc_nr_pages(void); - int vm_area_map_pages(struct vm_struct *area, unsigned long start, unsigned long end, struct page **pages); void vm_area_unmap_pages(struct vm_struct *area, unsigned long start, @@ -304,7 +302,6 @@ static inline void set_vm_flush_reset_perms(void *addr) #else /* !CONFIG_MMU */ #define VMALLOC_TOTAL 0UL -static inline unsigned long vmalloc_nr_pages(void) { return 0; } static inline void set_vm_flush_reset_perms(void *addr) {} #endif /* CONFIG_MMU */ diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 9e29d79fc790..ace7379d627d 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -406,7 +406,7 @@ do { \ schedule()) /** - * wait_var_event_killable - wait for a variable to be updated and notified + * wait_var_event_interruptible - wait for a variable to be updated and notified * @var: the address of variable being waited on * @condition: the condition to wait for * @@ -492,7 +492,7 @@ do { \ * wait_var_event_mutex - wait for a variable to be updated under a mutex * @var: the address of the variable being waited on * @condition: condition to wait for - * @mutex: the mutex which protects updates to the variable + * @lock: the mutex which protects updates to the variable * * Wait for a condition which can only be reliably tested while holding * a mutex. The variables assessed in the condition will normal be diff --git a/include/linux/wmi.h b/include/linux/wmi.h index 75cb0c7cfe57..14fb644e1701 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -18,16 +18,12 @@ * struct wmi_device - WMI device structure * @dev: Device associated with this WMI device * @setable: True for devices implementing the Set Control Method - * @driver_override: Driver name to force a match; do not set directly, - * because core frees it; use driver_set_override() to - * set or clear it. * * This represents WMI devices discovered by the WMI driver core. */ struct wmi_device { struct device dev; bool setable; - const char *driver_override; }; /** diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index a4749f56398f..ab6cb70ca1a5 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -131,8 +131,9 @@ struct rcu_work { enum wq_affn_scope { WQ_AFFN_DFL, /* use system default */ WQ_AFFN_CPU, /* one pod per CPU */ - WQ_AFFN_SMT, /* one pod poer SMT */ + WQ_AFFN_SMT, /* one pod per SMT */ WQ_AFFN_CACHE, /* one pod per LLC */ + WQ_AFFN_CACHE_SHARD, /* synthetic sub-LLC shards */ WQ_AFFN_NUMA, /* one pod per NUMA node */ WQ_AFFN_SYSTEM, /* one pod across the whole system */ @@ -440,6 +441,9 @@ enum wq_consts { * system_long_wq is similar to system_percpu_wq but may host long running * works. Queue flushing might take relatively long. * + * system_dfl_long_wq is similar to system_dfl_wq but it may host long running + * works. + * * system_dfl_wq is unbound workqueue. Workers are not bound to * any specific CPU, not concurrency managed, and all queued works are * executed immediately as long as max_active limit is not reached and @@ -468,6 +472,7 @@ extern struct workqueue_struct *system_power_efficient_wq; extern struct workqueue_struct *system_freezable_power_efficient_wq; extern struct workqueue_struct *system_bh_wq; extern struct workqueue_struct *system_bh_highpri_wq; +extern struct workqueue_struct *system_dfl_long_wq; void workqueue_softirq_action(bool highpri); void workqueue_softirq_dead(unsigned int cpu); @@ -512,6 +517,26 @@ __printf(1, 4) struct workqueue_struct * alloc_workqueue_noprof(const char *fmt, unsigned int flags, int max_active, ...); #define alloc_workqueue(...) alloc_hooks(alloc_workqueue_noprof(__VA_ARGS__)) +/** + * devm_alloc_workqueue - Resource-managed allocate a workqueue + * @dev: Device to allocate workqueue for + * @fmt: printf format for the name of the workqueue + * @flags: WQ_* flags + * @max_active: max in-flight work items, 0 for default + * @...: args for @fmt + * + * Resource managed workqueue, see alloc_workqueue() for details. + * + * The workqueue will be automatically destroyed on driver detach. Typically + * this should be used in drivers already relying on devm interafaces. + * + * RETURNS: + * Pointer to the allocated workqueue on success, %NULL on failure. + */ +__printf(2, 5) struct workqueue_struct * +devm_alloc_workqueue(struct device *dev, const char *fmt, unsigned int flags, + int max_active, ...); + #ifdef CONFIG_LOCKDEP /** * alloc_workqueue_lockdep_map - allocate a workqueue with user-defined lockdep_map @@ -568,6 +593,8 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active, */ #define alloc_ordered_workqueue(fmt, flags, args...) \ alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) +#define devm_alloc_ordered_workqueue(dev, fmt, flags, args...) \ + devm_alloc_workqueue(dev, fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) #define create_workqueue(name) \ alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM | WQ_PERCPU, 1, (name)) @@ -712,14 +739,14 @@ static inline bool schedule_work_on(int cpu, struct work_struct *work) } /** - * schedule_work - put work task in global workqueue + * schedule_work - put work task in per-CPU workqueue * @work: job to be done * - * Returns %false if @work was already on the kernel-global workqueue and + * Returns %false if @work was already on the system per-CPU workqueue and * %true otherwise. * - * This puts a job in the kernel-global workqueue if it was not already - * queued and leaves it in the same position on the kernel-global + * This puts a job in the system per-CPU workqueue if it was not already + * queued and leaves it in the same position on the system per-CPU * workqueue otherwise. * * Shares the same memory-ordering properties of queue_work(), cf. the @@ -783,6 +810,8 @@ extern void __warn_flushing_systemwide_wq(void) _wq == system_highpri_wq) || \ (__builtin_constant_p(_wq == system_long_wq) && \ _wq == system_long_wq) || \ + (__builtin_constant_p(_wq == system_dfl_long_wq) && \ + _wq == system_dfl_long_wq) || \ (__builtin_constant_p(_wq == system_dfl_wq) && \ _wq == system_dfl_wq) || \ (__builtin_constant_p(_wq == system_freezable_wq) && \ @@ -796,12 +825,12 @@ extern void __warn_flushing_systemwide_wq(void) }) /** - * schedule_delayed_work_on - queue work in global workqueue on CPU after delay + * schedule_delayed_work_on - queue work in per-CPU workqueue on CPU after delay * @cpu: cpu to use * @dwork: job to be done * @delay: number of jiffies to wait * - * After waiting for a given time this puts a job in the kernel-global + * After waiting for a given time this puts a job in the system per-CPU * workqueue on the specified CPU. */ static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork, @@ -811,11 +840,11 @@ static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork, } /** - * schedule_delayed_work - put work task in global workqueue after delay + * schedule_delayed_work - put work task in per-CPU workqueue after delay * @dwork: job to be done * @delay: number of jiffies to wait or 0 for immediate execution * - * After waiting for a given time this puts a job in the kernel-global + * After waiting for a given time this puts a job in the system per-CPU * workqueue. */ static inline bool schedule_delayed_work(struct delayed_work *dwork, diff --git a/include/linux/writeback.h b/include/linux/writeback.h index e530112c4b3a..62552a2ce5b9 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -11,7 +11,7 @@ #include <linux/flex_proportions.h> #include <linux/backing-dev-defs.h> #include <linux/blk_types.h> -#include <linux/pagevec.h> +#include <linux/folio_batch.h> struct bio; diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 85b1fff02fde..0c95ead5a297 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -181,7 +181,7 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx, * data structures. */ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) - __releases(ctx) __acquires_shared(ctx) __no_context_analysis + __must_hold(ctx) { #ifdef DEBUG_WW_MUTEXES lockdep_assert_held(ctx); @@ -199,7 +199,7 @@ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) * mutexes have been released with ww_mutex_unlock. */ static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) - __releases_shared(ctx) __no_context_analysis + __releases(ctx) __no_context_analysis { #ifdef CONFIG_DEBUG_LOCK_ALLOC mutex_release(&ctx->first_lock_dep_map, _THIS_IP_); diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 296b5ee5c979..8b6601367eae 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -16,6 +16,7 @@ #include <linux/types.h> #include <linux/spinlock.h> #include <linux/mm.h> +#include <linux/rhashtable-types.h> #include <linux/user_namespace.h> #include <uapi/linux/xattr.h> @@ -106,31 +107,65 @@ static inline const char *xattr_prefix(const struct xattr_handler *handler) } struct simple_xattrs { - struct rb_root rb_root; - rwlock_t lock; + struct rhashtable ht; }; struct simple_xattr { - struct rb_node rb_node; + struct rhash_head hash_node; + struct rcu_head rcu; char *name; size_t size; char value[] __counted_by(size); }; -void simple_xattrs_init(struct simple_xattrs *xattrs); +#define SIMPLE_XATTR_MAX_NR 128 +#define SIMPLE_XATTR_MAX_SIZE (128 << 10) + +struct simple_xattr_limits { + atomic_t nr_xattrs; /* current user.* xattr count */ + atomic_t xattr_size; /* current total user.* value bytes */ +}; + +static inline void simple_xattr_limits_init(struct simple_xattr_limits *limits) +{ + atomic_set(&limits->nr_xattrs, 0); + atomic_set(&limits->xattr_size, 0); +} + +int simple_xattrs_init(struct simple_xattrs *xattrs); +struct simple_xattrs *simple_xattrs_alloc(void); +struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp, + const void *value, int flags); void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space); size_t simple_xattr_space(const char *name, size_t size); struct simple_xattr *simple_xattr_alloc(const void *value, size_t size); void simple_xattr_free(struct simple_xattr *xattr); +void simple_xattr_free_rcu(struct simple_xattr *xattr); int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, void *buffer, size_t size); struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs, const char *name, const void *value, size_t size, int flags); +int simple_xattr_set_limited(struct simple_xattrs *xattrs, + struct simple_xattr_limits *limits, + const char *name, const void *value, + size_t size, int flags); ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, char *buffer, size_t size); -void simple_xattr_add(struct simple_xattrs *xattrs, - struct simple_xattr *new_xattr); +int simple_xattr_add(struct simple_xattrs *xattrs, + struct simple_xattr *new_xattr); int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name); +DEFINE_CLASS(simple_xattr, + struct simple_xattr *, + if (!IS_ERR_OR_NULL(_T)) simple_xattr_free(_T), + simple_xattr_alloc(value, size), + const void *value, size_t size) + +DEFINE_CLASS(simple_xattrs, + struct simple_xattrs *, + if (!IS_ERR_OR_NULL(_T)) { simple_xattrs_free(_T, NULL); kfree(_T); }, + simple_xattrs_alloc(), + void) + #endif /* _LINUX_XATTR_H */ |
