summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/acpi.h11
-rw-r--r--include/linux/bit_spinlock.h8
-rw-r--r--include/linux/bootmem_info.h7
-rw-r--r--include/linux/buffer_head.h2
-rw-r--r--include/linux/bvec.h6
-rw-r--r--include/linux/cgroup-defs.h1
-rw-r--r--include/linux/cgroup.h2
-rw-r--r--include/linux/cma.h9
-rw-r--r--include/linux/compaction.h5
-rw-r--r--include/linux/compiler-clang.h8
-rw-r--r--include/linux/compiler-gcc.h8
-rw-r--r--include/linux/compiler.h20
-rw-r--r--include/linux/compiler_types.h2
-rw-r--r--include/linux/context_tracking_irq.h8
-rw-r--r--include/linux/coresight.h47
-rw-r--r--include/linux/counter.h3
-rw-r--r--include/linux/cper.h8
-rw-r--r--include/linux/cpu.h2
-rw-r--r--include/linux/cpuhotplug.h1
-rw-r--r--include/linux/crash_reserve.h11
-rw-r--r--include/linux/damon.h118
-rw-r--r--include/linux/dax.h28
-rw-r--r--include/linux/device.h2
-rw-r--r--include/linux/device/class.h2
-rw-r--r--include/linux/dma/k3-udma-glue.h3
-rw-r--r--include/linux/dmaengine.h10
-rw-r--r--include/linux/exportfs.h14
-rw-r--r--include/linux/fb.h1
-rw-r--r--include/linux/folio_queue.h12
-rw-r--r--include/linux/fsl/mc.h2
-rw-r--r--include/linux/hrtimer.h2
-rw-r--r--include/linux/hrtimer_types.h4
-rw-r--r--include/linux/huge_mm.h44
-rw-r--r--include/linux/hugetlb.h35
-rw-r--r--include/linux/i2c.h26
-rw-r--r--include/linux/i3c/master.h2
-rw-r--r--include/linux/idr.h11
-rw-r--r--include/linux/iio/adc/ad_sigma_delta.h4
-rw-r--r--include/linux/iio/backend.h19
-rw-r--r--include/linux/iio/buffer-dmaengine.h7
-rw-r--r--include/linux/iio/iio-gts-helper.h1
-rw-r--r--include/linux/iio/iio.h41
-rw-r--r--include/linux/iio/imu/adis.h34
-rw-r--r--include/linux/interval_tree_generic.h8
-rw-r--r--include/linux/io_uring/cmd.h1
-rw-r--r--include/linux/iomap.h15
-rw-r--r--include/linux/iommu.h35
-rw-r--r--include/linux/iommufd.h32
-rw-r--r--include/linux/ioport.h9
-rw-r--r--include/linux/irqdomain.h16
-rw-r--r--include/linux/kdb.h2
-rw-r--r--include/linux/kernfs.h14
-rw-r--r--include/linux/kexec.h9
-rw-r--r--include/linux/kgdb.h11
-rw-r--r--include/linux/linkage.h4
-rw-r--r--include/linux/list_nulls.h1
-rw-r--r--include/linux/mei_cl_bus.h5
-rw-r--r--include/linux/memblock.h1
-rw-r--r--include/linux/memcontrol.h32
-rw-r--r--include/linux/memory.h2
-rw-r--r--include/linux/memremap.h17
-rw-r--r--include/linux/mfd/mt6397/rtc.h5
-rw-r--r--include/linux/mhi.h18
-rw-r--r--include/linux/migrate.h4
-rw-r--r--include/linux/min_heap.h12
-rw-r--r--include/linux/mm.h344
-rw-r--r--include/linux/mm_types.h203
-rw-r--r--include/linux/mmap_lock.h6
-rw-r--r--include/linux/mmu_notifier.h8
-rw-r--r--include/linux/mmzone.h55
-rw-r--r--include/linux/module.h6
-rw-r--r--include/linux/mutex.h2
-rw-r--r--include/linux/netdevice.h2
-rw-r--r--include/linux/nfs4.h2
-rw-r--r--include/linux/nfs_fs_sb.h8
-rw-r--r--include/linux/nfs_xdr.h5
-rw-r--r--include/linux/node.h7
-rw-r--r--include/linux/objtool.h2
-rw-r--r--include/linux/page-flags.h53
-rw-r--r--include/linux/page_counter.h9
-rw-r--r--include/linux/page_ext.h93
-rw-r--r--include/linux/page_ref.h2
-rw-r--r--include/linux/pagemap.h25
-rw-r--r--include/linux/part_stat.h2
-rw-r--r--include/linux/pci-ats.h3
-rw-r--r--include/linux/percpu-defs.h6
-rw-r--r--include/linux/pgalloc_tag.h77
-rw-r--r--include/linux/pgtable.h14
-rw-r--r--include/linux/phy/phy.h12
-rw-r--r--include/linux/platform_data/cros_ec_commands.h1
-rw-r--r--include/linux/posix_acl.h11
-rw-r--r--include/linux/pps_gen_kernel.h4
-rw-r--r--include/linux/rcupdate.h2
-rw-r--r--include/linux/rcuwait.h13
-rw-r--r--include/linux/reboot.h36
-rw-r--r--include/linux/refcount.h125
-rw-r--r--include/linux/rhashtable.h6
-rw-r--r--include/linux/ring_buffer.h8
-rw-r--r--include/linux/rmap.h293
-rw-r--r--include/linux/rtc.h1
-rw-r--r--include/linux/sched.h4
-rw-r--r--include/linux/sched/smt.h2
-rw-r--r--include/linux/seq_buf.h4
-rw-r--r--include/linux/seq_file.h1
-rw-r--r--include/linux/serdev.h6
-rw-r--r--include/linux/slab.h15
-rw-r--r--include/linux/sort.h11
-rw-r--r--include/linux/soundwire/sdw.h33
-rw-r--r--include/linux/soundwire/sdw_amd.h2
-rw-r--r--include/linux/soundwire/sdw_intel.h4
-rw-r--r--include/linux/sprintf.h3
-rw-r--r--include/linux/string.h4
-rw-r--r--include/linux/sunrpc/clnt.h5
-rw-r--r--include/linux/sunrpc/sched.h1
-rw-r--r--include/linux/sunrpc/xprtmultipath.h1
-rw-r--r--include/linux/swap.h41
-rw-r--r--include/linux/swap_slots.h28
-rw-r--r--include/linux/swapops.h27
-rw-r--r--include/linux/thermal.h2
-rw-r--r--include/linux/timer.h36
-rw-r--r--include/linux/trace.h4
-rw-r--r--include/linux/trace_seq.h8
-rw-r--r--include/linux/tty.h53
-rw-r--r--include/linux/tty_driver.h180
-rw-r--r--include/linux/tty_ldisc.h1
-rw-r--r--include/linux/types.h13
-rw-r--r--include/linux/usb.h8
-rw-r--r--include/linux/usb/musb.h2
-rw-r--r--include/linux/usb/ulpi.h9
-rw-r--r--include/linux/user_namespace.h15
-rw-r--r--include/linux/vfio.h14
-rw-r--r--include/linux/vm_event_item.h2
-rw-r--r--include/linux/vmstat.h2
-rw-r--r--include/linux/writeback.h24
-rw-r--r--include/linux/xarray.h13
-rw-r--r--include/linux/zpool.h47
-rw-r--r--include/linux/zsmalloc.h29
-rw-r--r--include/linux/zswap.h6
138 files changed, 2011 insertions, 939 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index a70e62d69dc7..3f2e93ed9730 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1094,6 +1094,17 @@ static inline acpi_handle acpi_get_processor_handle(int cpu)
#endif /* !CONFIG_ACPI */
+#ifdef CONFIG_ACPI_HMAT
+int hmat_get_extended_linear_cache_size(struct resource *backing_res, int nid,
+ resource_size_t *size);
+#else
+static inline int hmat_get_extended_linear_cache_size(struct resource *backing_res,
+ int nid, resource_size_t *size)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
extern void arch_post_acpi_subsys_init(void);
#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h
index bbc4730a6505..c0989b5b0407 100644
--- a/include/linux/bit_spinlock.h
+++ b/include/linux/bit_spinlock.h
@@ -13,7 +13,7 @@
* Don't use this unless you really need to: spin_lock() and spin_unlock()
* are significantly faster.
*/
-static inline void bit_spin_lock(int bitnum, unsigned long *addr)
+static __always_inline void bit_spin_lock(int bitnum, unsigned long *addr)
{
/*
* Assuming the lock is uncontended, this never enters
@@ -38,7 +38,7 @@ static inline void bit_spin_lock(int bitnum, unsigned long *addr)
/*
* Return true if it was acquired
*/
-static inline int bit_spin_trylock(int bitnum, unsigned long *addr)
+static __always_inline int bit_spin_trylock(int bitnum, unsigned long *addr)
{
preempt_disable();
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
@@ -54,7 +54,7 @@ static inline int bit_spin_trylock(int bitnum, unsigned long *addr)
/*
* bit-based spin_unlock()
*/
-static inline void bit_spin_unlock(int bitnum, unsigned long *addr)
+static __always_inline void bit_spin_unlock(int bitnum, unsigned long *addr)
{
#ifdef CONFIG_DEBUG_SPINLOCK
BUG_ON(!test_bit(bitnum, addr));
@@ -71,7 +71,7 @@ static inline void bit_spin_unlock(int bitnum, unsigned long *addr)
* non-atomic version, which can be used eg. if the bit lock itself is
* protecting the rest of the flags in the word.
*/
-static inline void __bit_spin_unlock(int bitnum, unsigned long *addr)
+static __always_inline void __bit_spin_unlock(int bitnum, unsigned long *addr)
{
#ifdef CONFIG_DEBUG_SPINLOCK
BUG_ON(!test_bit(bitnum, addr));
diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h
index d8a8d245824a..4c506e76a808 100644
--- a/include/linux/bootmem_info.h
+++ b/include/linux/bootmem_info.h
@@ -18,6 +18,8 @@ enum bootmem_type {
#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
void __init register_page_bootmem_info_node(struct pglist_data *pgdat);
+void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
+ unsigned long nr_pages);
void get_page_bootmem(unsigned long info, struct page *page,
enum bootmem_type type);
@@ -58,6 +60,11 @@ static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
{
}
+static inline void register_page_bootmem_memmap(unsigned long section_nr,
+ struct page *map, unsigned long nr_pages)
+{
+}
+
static inline void put_page_bootmem(struct page *page)
{
}
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index fab70b26e131..f0a4ad7839b6 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -270,7 +270,7 @@ int cont_write_begin(struct file *, struct address_space *, loff_t,
unsigned, struct folio **, void **,
get_block_t *, loff_t *);
int generic_cont_expand_simple(struct inode *inode, loff_t size);
-void block_commit_write(struct page *page, unsigned int from, unsigned int to);
+void block_commit_write(struct folio *folio, size_t from, size_t to);
int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
get_block_t get_block);
sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index ba8f52d48b94..204b22a99c4b 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -184,6 +184,12 @@ static inline void bvec_iter_advance_single(const struct bio_vec *bv,
((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \
bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len))
+#define for_each_mp_bvec(bvl, bio_vec, iter, start) \
+ for (iter = (start); \
+ (iter).bi_size && \
+ ((bvl = mp_bvec_iter_bvec((bio_vec), (iter))), 1); \
+ bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len))
+
/* for iterating one bio from start to end */
#define BVEC_ITER_ALL_INIT (struct bvec_iter) \
{ \
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 485b651869d9..5bc8f55c8cca 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -710,6 +710,7 @@ struct cgroup_subsys {
void (*css_released)(struct cgroup_subsys_state *css);
void (*css_free)(struct cgroup_subsys_state *css);
void (*css_reset)(struct cgroup_subsys_state *css);
+ void (*css_killed)(struct cgroup_subsys_state *css);
void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu);
int (*css_extra_stat_show)(struct seq_file *seq,
struct cgroup_subsys_state *css);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 28e999f2c642..e7da3c3b098b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -344,7 +344,7 @@ static inline u64 cgroup_id(const struct cgroup *cgrp)
*/
static inline bool css_is_dying(struct cgroup_subsys_state *css)
{
- return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt);
+ return css->flags & CSS_DYING;
}
static inline void cgroup_get(struct cgroup *cgrp)
diff --git a/include/linux/cma.h b/include/linux/cma.h
index d15b64f51336..62d9c1cf6326 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -40,6 +40,9 @@ static inline int __init cma_declare_contiguous(phys_addr_t base,
return cma_declare_contiguous_nid(base, size, limit, alignment,
order_per_bit, fixed, name, res_cma, NUMA_NO_NODE);
}
+extern int __init cma_declare_contiguous_multi(phys_addr_t size,
+ phys_addr_t align, unsigned int order_per_bit,
+ const char *name, struct cma **res_cma, int nid);
extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
unsigned int order_per_bit,
const char *name,
@@ -50,12 +53,14 @@ extern bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned
extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count);
extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data);
+extern bool cma_intersects(struct cma *cma, unsigned long start, unsigned long end);
extern void cma_reserve_pages_on_error(struct cma *cma);
#ifdef CONFIG_CMA
struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp);
bool cma_free_folio(struct cma *cma, const struct folio *folio);
+bool cma_validate_zones(struct cma *cma);
#else
static inline struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp)
{
@@ -66,6 +71,10 @@ static inline bool cma_free_folio(struct cma *cma, const struct folio *folio)
{
return false;
}
+static inline bool cma_validate_zones(struct cma *cma)
+{
+ return false;
+}
#endif
#endif
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 7bf0c521db63..173d9c07a895 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -95,7 +95,7 @@ extern enum compact_result try_to_compact_pages(gfp_t gfp_mask,
struct page **page);
extern void reset_isolation_suitable(pg_data_t *pgdat);
extern bool compaction_suitable(struct zone *zone, int order,
- int highest_zoneidx);
+ unsigned long watermark, int highest_zoneidx);
extern void compaction_defer_reset(struct zone *zone, int order,
bool alloc_success);
@@ -113,7 +113,8 @@ static inline void reset_isolation_suitable(pg_data_t *pgdat)
}
static inline bool compaction_suitable(struct zone *zone, int order,
- int highest_zoneidx)
+ unsigned long watermark,
+ int highest_zoneidx)
{
return false;
}
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 2e7c2c282f3a..4fc8e26914ad 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -128,3 +128,11 @@
*/
#define ASM_INPUT_G "ir"
#define ASM_INPUT_RM "r"
+
+/*
+ * Declare compiler support for __typeof_unqual__() operator.
+ *
+ * Bindgen uses LLVM even if our C compiler is GCC, so we cannot
+ * rely on the auto-detected CONFIG_CC_HAS_TYPEOF_UNQUAL.
+ */
+#define CC_HAS_TYPEOF_UNQUAL (__clang_major__ >= 19)
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index c9b58188ec61..32048052c64a 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -137,3 +137,11 @@
#if GCC_VERSION < 90100
#undef __alloc_size__
#endif
+
+/*
+ * Declare compiler support for __typeof_unqual__() operator.
+ *
+ * Bindgen uses LLVM even if our C compiler is GCC, so we cannot
+ * rely on the auto-detected CONFIG_CC_HAS_TYPEOF_UNQUAL.
+ */
+#define CC_HAS_TYPEOF_UNQUAL (__GNUC__ >= 14)
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 9fc30b6b80c9..27725f1ab5ab 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -226,6 +226,26 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
__BUILD_BUG_ON_ZERO_MSG(!__is_noncstr(p), \
"must be non-C-string (not NUL-terminated)")
+/*
+ * Use __typeof_unqual__() when available.
+ *
+ * XXX: Remove test for __CHECKER__ once
+ * sparse learns about __typeof_unqual__().
+ */
+#if CC_HAS_TYPEOF_UNQUAL && !defined(__CHECKER__)
+# define USE_TYPEOF_UNQUAL 1
+#endif
+
+/*
+ * Define TYPEOF_UNQUAL() to use __typeof_unqual__() as typeof
+ * operator when available, to return an unqualified type of the exp.
+ */
+#if defined(USE_TYPEOF_UNQUAL)
+# define TYPEOF_UNQUAL(exp) __typeof_unqual__(exp)
+#else
+# define TYPEOF_UNQUAL(exp) __typeof__(exp)
+#endif
+
#endif /* __KERNEL__ */
#if defined(CONFIG_CFI_CLANG) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index e09d323be845..501cffddc2f4 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -57,7 +57,7 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
# define __user BTF_TYPE_TAG(user)
# endif
# define __iomem
-# define __percpu BTF_TYPE_TAG(percpu)
+# define __percpu __percpu_qual BTF_TYPE_TAG(percpu)
# define __rcu BTF_TYPE_TAG(rcu)
# define __chk_user_ptr(x) (void)0
diff --git a/include/linux/context_tracking_irq.h b/include/linux/context_tracking_irq.h
index c50b5670c4a5..197916ee91a4 100644
--- a/include/linux/context_tracking_irq.h
+++ b/include/linux/context_tracking_irq.h
@@ -10,12 +10,12 @@ void ct_irq_exit_irqson(void);
void ct_nmi_enter(void);
void ct_nmi_exit(void);
#else
-static inline void ct_irq_enter(void) { }
-static inline void ct_irq_exit(void) { }
+static __always_inline void ct_irq_enter(void) { }
+static __always_inline void ct_irq_exit(void) { }
static inline void ct_irq_enter_irqson(void) { }
static inline void ct_irq_exit_irqson(void) { }
-static inline void ct_nmi_enter(void) { }
-static inline void ct_nmi_exit(void) { }
+static __always_inline void ct_nmi_enter(void) { }
+static __always_inline void ct_nmi_exit(void) { }
#endif
#endif
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 17276965ff1d..d79a242b271d 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -71,7 +71,8 @@ enum coresight_dev_subtype_source {
enum coresight_dev_subtype_helper {
CORESIGHT_DEV_SUBTYPE_HELPER_CATU,
- CORESIGHT_DEV_SUBTYPE_HELPER_ECT_CTI
+ CORESIGHT_DEV_SUBTYPE_HELPER_ECT_CTI,
+ CORESIGHT_DEV_SUBTYPE_HELPER_CTCU,
};
/**
@@ -238,7 +239,7 @@ struct coresight_trace_id_map {
DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX);
atomic_t __percpu *cpu_map;
atomic_t perf_cs_etm_session_active;
- spinlock_t lock;
+ raw_spinlock_t lock;
};
/**
@@ -301,7 +302,7 @@ struct coresight_device {
/* system configuration and feature lists */
struct list_head feature_csdev_list;
struct list_head config_csdev_list;
- spinlock_t cscfg_csdev_lock;
+ raw_spinlock_t cscfg_csdev_lock;
void *active_cscfg_ctxt;
};
@@ -329,17 +330,29 @@ static struct coresight_dev_list (var) = { \
#define to_coresight_device(d) container_of(d, struct coresight_device, dev)
+/**
+ * struct coresight_path - data needed by enable/disable path
+ * @path_list: path from source to sink.
+ * @trace_id: trace_id of the whole path.
+ */
+struct coresight_path {
+ struct list_head path_list;
+ u8 trace_id;
+};
+
enum cs_mode {
CS_MODE_DISABLED,
CS_MODE_SYSFS,
CS_MODE_PERF,
};
+#define coresight_ops(csdev) csdev->ops
#define source_ops(csdev) csdev->ops->source_ops
#define sink_ops(csdev) csdev->ops->sink_ops
#define link_ops(csdev) csdev->ops->link_ops
#define helper_ops(csdev) csdev->ops->helper_ops
#define ect_ops(csdev) csdev->ops->ect_ops
+#define panic_ops(csdev) csdev->ops->panic_ops
/**
* struct coresight_ops_sink - basic operations for a sink
@@ -389,7 +402,7 @@ struct coresight_ops_link {
struct coresight_ops_source {
int (*cpu_id)(struct coresight_device *csdev);
int (*enable)(struct coresight_device *csdev, struct perf_event *event,
- enum cs_mode mode, struct coresight_trace_id_map *id_map);
+ enum cs_mode mode, struct coresight_path *path);
void (*disable)(struct coresight_device *csdev,
struct perf_event *event);
};
@@ -409,11 +422,24 @@ struct coresight_ops_helper {
int (*disable)(struct coresight_device *csdev, void *data);
};
+
+/**
+ * struct coresight_ops_panic - Generic device ops for panic handing
+ *
+ * @sync : Sync the device register state/trace data
+ */
+struct coresight_ops_panic {
+ int (*sync)(struct coresight_device *csdev);
+};
+
struct coresight_ops {
+ int (*trace_id)(struct coresight_device *csdev, enum cs_mode mode,
+ struct coresight_device *sink);
const struct coresight_ops_sink *sink_ops;
const struct coresight_ops_link *link_ops;
const struct coresight_ops_source *source_ops;
const struct coresight_ops_helper *helper_ops;
+ const struct coresight_ops_panic *panic_ops;
};
static inline u32 csdev_access_relaxed_read32(struct csdev_access *csa,
@@ -459,8 +485,11 @@ static inline struct clk *coresight_get_enable_apb_pclk(struct device *dev)
int ret;
pclk = clk_get(dev, "apb_pclk");
- if (IS_ERR(pclk))
- return NULL;
+ if (IS_ERR(pclk)) {
+ pclk = clk_get(dev, "apb");
+ if (IS_ERR(pclk))
+ return NULL;
+ }
ret = clk_prepare_enable(pclk);
if (ret) {
@@ -649,6 +678,10 @@ extern int coresight_enable_sysfs(struct coresight_device *csdev);
extern void coresight_disable_sysfs(struct coresight_device *csdev);
extern int coresight_timeout(struct csdev_access *csa, u32 offset,
int position, int value);
+typedef void (*coresight_timeout_cb_t) (struct csdev_access *, u32, int, int);
+extern int coresight_timeout_action(struct csdev_access *csa, u32 offset,
+ int position, int value,
+ coresight_timeout_cb_t cb);
extern int coresight_claim_device(struct coresight_device *csdev);
extern int coresight_claim_device_unlocked(struct coresight_device *csdev);
@@ -694,4 +727,6 @@ int coresight_init_driver(const char *drv, struct amba_driver *amba_drv,
void coresight_remove_driver(struct amba_driver *amba_drv,
struct platform_driver *pdev_drv);
+int coresight_etm_get_trace_id(struct coresight_device *csdev, enum cs_mode mode,
+ struct coresight_device *sink);
#endif /* _LINUX_COREISGHT_H */
diff --git a/include/linux/counter.h b/include/linux/counter.h
index 426b7d58a438..f208e867dd0f 100644
--- a/include/linux/counter.h
+++ b/include/linux/counter.h
@@ -580,6 +580,9 @@ struct counter_array {
#define COUNTER_COMP_CEILING(_read, _write) \
COUNTER_COMP_COUNT_U64("ceiling", _read, _write)
+#define COUNTER_COMP_COMPARE(_read, _write) \
+ COUNTER_COMP_COUNT_U64("compare", _read, _write)
+
#define COUNTER_COMP_COUNT_MODE(_read, _write, _available) \
{ \
.type = COUNTER_COMP_COUNT_MODE, \
diff --git a/include/linux/cper.h b/include/linux/cper.h
index 265b0f8fc0b3..0ed60a91eca9 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -89,6 +89,10 @@ enum {
#define CPER_NOTIFY_DMAR \
GUID_INIT(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \
0x72, 0x2D, 0xEB, 0x41)
+/* CXL Protocol Error Section */
+#define CPER_SEC_CXL_PROT_ERR \
+ GUID_INIT(0x80B9EFB4, 0x52B5, 0x4DE3, 0xA7, 0x77, 0x68, 0x78, \
+ 0x4B, 0x77, 0x10, 0x48)
/* CXL Event record UUIDs are formatted as GUIDs and reported in section type */
/*
@@ -601,4 +605,8 @@ void cper_estatus_print(const char *pfx,
int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus);
int cper_estatus_check(const struct acpi_hest_generic_status *estatus);
+struct cxl_cper_sec_prot_err;
+void cxl_cper_print_prot_err(const char *pfx,
+ const struct cxl_cper_sec_prot_err *prot_err);
+
#endif
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 6a0a8f1c7c90..e3049543008b 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -148,7 +148,7 @@ static inline int suspend_disable_secondary_cpus(void)
}
static inline void suspend_enable_secondary_cpus(void)
{
- return thaw_secondary_cpus();
+ thaw_secondary_cpus();
}
#else /* !CONFIG_PM_SLEEP_SMP */
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 6cc5e484547c..1987400000b4 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -116,7 +116,6 @@ enum cpuhp_state {
CPUHP_NET_IUCV_PREPARE,
CPUHP_ARM_BL_PREPARE,
CPUHP_TRACE_RB_PREPARE,
- CPUHP_MM_ZS_PREPARE,
CPUHP_MM_ZSWP_POOL_PREPARE,
CPUHP_KVM_PPC_BOOK3S_PREPARE,
CPUHP_ZCOMP_PREPARE,
diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h
index 5a9df944fb80..1fe7e7d1b214 100644
--- a/include/linux/crash_reserve.h
+++ b/include/linux/crash_reserve.h
@@ -32,13 +32,12 @@ int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
#define CRASH_ADDR_HIGH_MAX memblock_end_of_DRAM()
#endif
-void __init reserve_crashkernel_generic(char *cmdline,
- unsigned long long crash_size,
- unsigned long long crash_base,
- unsigned long long crash_low_size,
- bool high);
+void __init reserve_crashkernel_generic(unsigned long long crash_size,
+ unsigned long long crash_base,
+ unsigned long long crash_low_size,
+ bool high);
#else
-static inline void __init reserve_crashkernel_generic(char *cmdline,
+static inline void __init reserve_crashkernel_generic(
unsigned long long crash_size,
unsigned long long crash_base,
unsigned long long crash_low_size,
diff --git a/include/linux/damon.h b/include/linux/damon.h
index c9074d569596..47e36e6ea203 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -36,6 +36,16 @@ struct damon_addr_range {
};
/**
+ * struct damon_size_range - Represents size for filter to operate on [@min, @max].
+ * @min: Min size (inclusive).
+ * @max: Max size (inclusive).
+ */
+struct damon_size_range {
+ unsigned long min;
+ unsigned long max;
+};
+
+/**
* struct damon_region - Represents a monitoring target region.
* @ar: The address range of the region.
* @sampling_addr: Address of the sample for the next access check.
@@ -324,8 +334,11 @@ struct damos_stat {
/**
* enum damos_filter_type - Type of memory for &struct damos_filter
* @DAMOS_FILTER_TYPE_ANON: Anonymous pages.
+ * @DAMOS_FILTER_TYPE_ACTIVE: Active pages.
* @DAMOS_FILTER_TYPE_MEMCG: Specific memcg's pages.
* @DAMOS_FILTER_TYPE_YOUNG: Recently accessed pages.
+ * @DAMOS_FILTER_TYPE_HUGEPAGE_SIZE: Page is part of a hugepage.
+ * @DAMOS_FILTER_TYPE_UNMAPPED: Unmapped pages.
* @DAMOS_FILTER_TYPE_ADDR: Address range.
* @DAMOS_FILTER_TYPE_TARGET: Data Access Monitoring target.
* @NR_DAMOS_FILTER_TYPES: Number of filter types.
@@ -343,8 +356,11 @@ struct damos_stat {
*/
enum damos_filter_type {
DAMOS_FILTER_TYPE_ANON,
+ DAMOS_FILTER_TYPE_ACTIVE,
DAMOS_FILTER_TYPE_MEMCG,
DAMOS_FILTER_TYPE_YOUNG,
+ DAMOS_FILTER_TYPE_HUGEPAGE_SIZE,
+ DAMOS_FILTER_TYPE_UNMAPPED,
DAMOS_FILTER_TYPE_ADDR,
DAMOS_FILTER_TYPE_TARGET,
NR_DAMOS_FILTER_TYPES,
@@ -360,6 +376,7 @@ enum damos_filter_type {
* @target_idx: Index of the &struct damon_target of
* &damon_ctx->adaptive_targets if @type is
* DAMOS_FILTER_TYPE_TARGET.
+ * @sz_range: Size range if @type is DAMOS_FILTER_TYPE_HUGEPAGE_SIZE.
* @list: List head for siblings.
*
* Before applying the &damos->action to a memory region, DAMOS checks if each
@@ -376,6 +393,7 @@ struct damos_filter {
unsigned short memcg_id;
struct damon_addr_range addr_range;
int target_idx;
+ struct damon_size_range sz_range;
};
struct list_head list;
};
@@ -432,6 +450,8 @@ struct damos_access_pattern {
* @wmarks: Watermarks for automated (in)activation of this scheme.
* @target_nid: Destination node if @action is "migrate_{hot,cold}".
* @filters: Additional set of &struct damos_filter for &action.
+ * @ops_filters: ops layer handling &struct damos_filter objects list.
+ * @last_applied: Last @action applied ops-managing entity.
* @stat: Statistics of this scheme.
* @list: List head for siblings.
*
@@ -454,6 +474,15 @@ struct damos_access_pattern {
* implementation could check pages of the region and skip &action to respect
* &filters
*
+ * The minimum entity that @action can be applied depends on the underlying
+ * &struct damon_operations. Since it may not be aligned with the core layer
+ * abstract, namely &struct damon_region, &struct damon_operations could apply
+ * @action to same entity multiple times. Large folios that underlying on
+ * multiple &struct damon region objects could be such examples. The &struct
+ * damon_operations can use @last_applied to avoid that. DAMOS core logic
+ * unsets @last_applied when each regions walking for applying the scheme is
+ * finished.
+ *
* After applying the &action to each region, &stat_count and &stat_sz is
* updated to reflect the number of regions and total size of regions that the
* &action is applied.
@@ -475,6 +504,9 @@ struct damos {
* layer-handled filters. If true, operations layer allows it, too.
*/
bool core_filters_allowed;
+ /* whether to reject core/ops filters umatched regions */
+ bool core_filters_default_reject;
+ bool ops_filters_default_reject;
/* public: */
struct damos_quota quota;
struct damos_watermarks wmarks;
@@ -482,6 +514,8 @@ struct damos {
int target_nid;
};
struct list_head filters;
+ struct list_head ops_filters;
+ void *last_applied;
struct damos_stat stat;
struct list_head list;
};
@@ -510,7 +544,6 @@ enum damon_ops_id {
* @update: Update operations-related data structures.
* @prepare_access_checks: Prepare next access check of target regions.
* @check_accesses: Check the accesses to target regions.
- * @reset_aggregated: Reset aggregated accesses monitoring results.
* @get_scheme_score: Get the score of a region for a scheme.
* @apply_scheme: Apply a DAMON-based operation scheme.
* @target_valid: Determine if the target is valid.
@@ -522,8 +555,7 @@ enum damon_ops_id {
* (&damon_ctx.kdamond) calls @init and @prepare_access_checks before starting
* the monitoring, @update after each &damon_attrs.ops_update_interval, and
* @check_accesses, @target_valid and @prepare_access_checks after each
- * &damon_attrs.sample_interval. Finally, @reset_aggregated is called after
- * each &damon_attrs.aggr_interval.
+ * &damon_attrs.sample_interval.
*
* Each &struct damon_operations instance having valid @id can be registered
* via damon_register_ops() and selected by damon_select_ops() later.
@@ -538,8 +570,6 @@ enum damon_ops_id {
* last preparation and update the number of observed accesses of each region.
* It should also return max number of observed accesses that made as a result
* of its update. The value will be used for regions adjustment threshold.
- * @reset_aggregated should reset the access monitoring results that aggregated
- * by @check_accesses.
* @get_scheme_score should return the priority score of a region for a scheme
* as an integer in [0, &DAMOS_MAX_SCORE].
* @apply_scheme is called from @kdamond when a region for user provided
@@ -557,7 +587,6 @@ struct damon_operations {
void (*update)(struct damon_ctx *context);
void (*prepare_access_checks)(struct damon_ctx *context);
unsigned int (*check_accesses)(struct damon_ctx *context);
- void (*reset_aggregated)(struct damon_ctx *context);
int (*get_scheme_score)(struct damon_ctx *context,
struct damon_target *t, struct damon_region *r,
struct damos *scheme);
@@ -571,43 +600,28 @@ struct damon_operations {
/**
* struct damon_callback - Monitoring events notification callbacks.
*
- * @before_start: Called before starting the monitoring.
* @after_wmarks_check: Called after each schemes' watermarks check.
- * @after_sampling: Called after each sampling.
* @after_aggregation: Called after each aggregation.
- * @before_damos_apply: Called before applying DAMOS action.
* @before_terminate: Called before terminating the monitoring.
- * @private: User private data.
*
- * The monitoring thread (&damon_ctx.kdamond) calls @before_start and
- * @before_terminate just before starting and finishing the monitoring,
- * respectively. Therefore, those are good places for installing and cleaning
- * @private.
+ * The monitoring thread (&damon_ctx.kdamond) calls @before_terminate just
+ * before finishing the monitoring.
*
* The monitoring thread calls @after_wmarks_check after each DAMON-based
* operation schemes' watermarks check. If users need to make changes to the
* attributes of the monitoring context while it's deactivated due to the
* watermarks, this is the good place to do.
*
- * The monitoring thread calls @after_sampling and @after_aggregation for each
- * of the sampling intervals and aggregation intervals, respectively.
- * Therefore, users can safely access the monitoring results without additional
- * protection. For the reason, users are recommended to use these callback for
- * the accesses to the results.
+ * The monitoring thread calls @after_aggregation for each of the aggregation
+ * intervals. Therefore, users can safely access the monitoring results
+ * without additional protection. For the reason, users are recommended to use
+ * these callback for the accesses to the results.
*
* If any callback returns non-zero, monitoring stops.
*/
struct damon_callback {
- void *private;
-
- int (*before_start)(struct damon_ctx *context);
int (*after_wmarks_check)(struct damon_ctx *context);
- int (*after_sampling)(struct damon_ctx *context);
int (*after_aggregation)(struct damon_ctx *context);
- int (*before_damos_apply)(struct damon_ctx *context,
- struct damon_target *target,
- struct damon_region *region,
- struct damos *scheme);
void (*before_terminate)(struct damon_ctx *context);
};
@@ -633,11 +647,37 @@ struct damon_call_control {
};
/**
+ * struct damon_intervals_goal - Monitoring intervals auto-tuning goal.
+ *
+ * @access_bp: Access events observation ratio to achieve in bp.
+ * @aggrs: Number of aggregations to acheive @access_bp within.
+ * @min_sample_us: Minimum resulting sampling interval in microseconds.
+ * @max_sample_us: Maximum resulting sampling interval in microseconds.
+ *
+ * DAMON automatically tunes &damon_attrs->sample_interval and
+ * &damon_attrs->aggr_interval aiming the ratio in bp (1/10,000) of
+ * DAMON-observed access events to theoretical maximum amount within @aggrs
+ * aggregations be same to @access_bp. The logic increases
+ * &damon_attrs->aggr_interval and &damon_attrs->sampling_interval in same
+ * ratio if the current access events observation ratio is lower than the
+ * target for each @aggrs aggregations, and vice versa.
+ *
+ * If @aggrs is zero, the tuning is disabled and hence this struct is ignored.
+ */
+struct damon_intervals_goal {
+ unsigned long access_bp;
+ unsigned long aggrs;
+ unsigned long min_sample_us;
+ unsigned long max_sample_us;
+};
+
+/**
* struct damon_attrs - Monitoring attributes for accuracy/overhead control.
*
* @sample_interval: The time between access samplings.
* @aggr_interval: The time between monitor results aggregations.
* @ops_update_interval: The time between monitoring operations updates.
+ * @intervals_goal: Intervals auto-tuning goal.
* @min_nr_regions: The minimum number of adaptive monitoring
* regions.
* @max_nr_regions: The maximum number of adaptive monitoring
@@ -657,8 +697,20 @@ struct damon_attrs {
unsigned long sample_interval;
unsigned long aggr_interval;
unsigned long ops_update_interval;
+ struct damon_intervals_goal intervals_goal;
unsigned long min_nr_regions;
unsigned long max_nr_regions;
+/* private: internal use only */
+ /*
+ * @aggr_interval to @sample_interval ratio.
+ * Core-external components call damon_set_attrs() with &damon_attrs
+ * that this field is unset. In the case, damon_set_attrs() sets this
+ * field of resulting &damon_attrs. Core-internal components such as
+ * kdamond_tune_intervals() calls damon_set_attrs() with &damon_attrs
+ * that this field is set. In the case, damon_set_attrs() just keep
+ * it.
+ */
+ unsigned long aggr_samples;
};
/**
@@ -707,6 +759,11 @@ struct damon_ctx {
* update
*/
unsigned long next_ops_update_sis;
+ /*
+ * number of sample intervals that should be passed before next
+ * intervals tuning
+ */
+ unsigned long next_intervals_tune_sis;
/* for waiting until the execution of the kdamond_fn is started */
struct completion kdamond_started;
/* for scheme quotas prioritization */
@@ -788,6 +845,12 @@ static inline unsigned long damon_sz_region(struct damon_region *r)
#define damos_for_each_filter_safe(f, next, scheme) \
list_for_each_entry_safe(f, next, &(scheme)->filters, list)
+#define damos_for_each_ops_filter(f, scheme) \
+ list_for_each_entry(f, &(scheme)->ops_filters, list)
+
+#define damos_for_each_ops_filter_safe(f, next, scheme) \
+ list_for_each_entry_safe(f, next, &(scheme)->ops_filters, list)
+
#ifdef CONFIG_DAMON
struct damon_region *damon_new_region(unsigned long start, unsigned long end);
@@ -813,6 +876,7 @@ void damon_update_region_access_rate(struct damon_region *r, bool accessed,
struct damos_filter *damos_new_filter(enum damos_filter_type type,
bool matching, bool allow);
void damos_add_filter(struct damos *s, struct damos_filter *f);
+bool damos_filter_for_ops(enum damos_filter_type type);
void damos_destroy_filter(struct damos_filter *f);
struct damos_quota_goal *damos_new_quota_goal(
diff --git a/include/linux/dax.h b/include/linux/dax.h
index df41a0017b31..dcc9fcdf14e4 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -207,6 +207,11 @@ int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
const struct iomap_ops *ops);
+static inline bool dax_page_is_idle(struct page *page)
+{
+ return page && page_ref_count(page) == 0;
+}
+
#if IS_ENABLED(CONFIG_DAX)
int dax_read_lock(void);
void dax_read_unlock(int id);
@@ -220,6 +225,19 @@ static inline void dax_read_unlock(int id)
{
}
#endif /* CONFIG_DAX */
+
+#if !IS_ENABLED(CONFIG_FS_DAX)
+static inline int __must_check dax_break_layout(struct inode *inode,
+ loff_t start, loff_t end, void (cb)(struct inode *))
+{
+ return 0;
+}
+
+static inline void dax_break_layout_final(struct inode *inode)
+{
+}
+#endif
+
bool dax_alive(struct dax_device *dax_dev);
void *dax_get_private(struct dax_device *dax_dev);
long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
@@ -241,8 +259,18 @@ vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order,
vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
unsigned int order, pfn_t pfn);
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
+void dax_delete_mapping_range(struct address_space *mapping,
+ loff_t start, loff_t end);
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
pgoff_t index);
+int __must_check dax_break_layout(struct inode *inode, loff_t start,
+ loff_t end, void (cb)(struct inode *));
+static inline int __must_check dax_break_layout_inode(struct inode *inode,
+ void (cb)(struct inode *))
+{
+ return dax_break_layout(inode, 0, LLONG_MAX, cb);
+}
+void dax_break_layout_final(struct inode *inode);
int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
struct inode *dest, loff_t destoff,
loff_t len, bool *is_same,
diff --git a/include/linux/device.h b/include/linux/device.h
index e3d56cd13fea..79e49fe494b7 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1162,7 +1162,7 @@ static inline void device_remove_group(struct device *dev,
{
const struct attribute_group *groups[] = { grp, NULL };
- return device_remove_groups(dev, groups);
+ device_remove_groups(dev, groups);
}
int __must_check devm_device_add_group(struct device *dev,
diff --git a/include/linux/device/class.h b/include/linux/device/class.h
index 45ee3a634999..65880e60c720 100644
--- a/include/linux/device/class.h
+++ b/include/linux/device/class.h
@@ -193,7 +193,7 @@ static inline int __must_check class_create_file(const struct class *class,
static inline void class_remove_file(const struct class *class,
const struct class_attribute *attr)
{
- return class_remove_file_ns(class, attr, NULL);
+ class_remove_file_ns(class, attr, NULL);
}
/* Simple class attribute that is just a static string */
diff --git a/include/linux/dma/k3-udma-glue.h b/include/linux/dma/k3-udma-glue.h
index 2dea217629d0..5d43881e6fb7 100644
--- a/include/linux/dma/k3-udma-glue.h
+++ b/include/linux/dma/k3-udma-glue.h
@@ -138,8 +138,7 @@ int k3_udma_glue_rx_get_irq(struct k3_udma_glue_rx_channel *rx_chn,
u32 flow_num);
void k3_udma_glue_reset_rx_chn(struct k3_udma_glue_rx_channel *rx_chn,
u32 flow_num, void *data,
- void (*cleanup)(void *data, dma_addr_t desc_dma),
- bool skip_fdq);
+ void (*cleanup)(void *data, dma_addr_t desc_dma));
int k3_udma_glue_rx_flow_enable(struct k3_udma_glue_rx_channel *rx_chn,
u32 flow_idx);
int k3_udma_glue_rx_flow_disable(struct k3_udma_glue_rx_channel *rx_chn,
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 346251bf1026..bb146c5ac3e4 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -839,7 +839,6 @@ struct dma_filter {
* The function takes a buffer of size buf_len. The callback function will
* be called after period_len bytes have been transferred.
* @device_prep_interleaved_dma: Transfer expression in a generic way.
- * @device_prep_dma_imm_data: DMA's 8 byte immediate data to the dst address
* @device_caps: May be used to override the generic DMA slave capabilities
* with per-channel specific ones
* @device_config: Pushes a new configuration to a channel, return 0 or an error
@@ -942,9 +941,6 @@ struct dma_device {
struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)(
struct dma_chan *chan, struct dma_interleaved_template *xt,
unsigned long flags);
- struct dma_async_tx_descriptor *(*device_prep_dma_imm_data)(
- struct dma_chan *chan, dma_addr_t dst, u64 data,
- unsigned long flags);
void (*device_caps)(struct dma_chan *chan, struct dma_slave_caps *caps);
int (*device_config)(struct dma_chan *chan, struct dma_slave_config *config);
@@ -1639,14 +1635,14 @@ static inline struct dma_chan
{
struct dma_chan *chan;
- chan = dma_request_slave_channel(dev, name);
- if (chan)
+ chan = dma_request_chan(dev, name);
+ if (!IS_ERR(chan))
return chan;
if (!fn || !fn_param)
return NULL;
- return __dma_request_channel(&mask, fn, fn_param, NULL);
+ return dma_request_channel(mask, fn, fn_param);
}
static inline char *
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index a087606ace19..fc93f0abf513 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -279,10 +279,22 @@ struct export_operations {
atomic attribute updates
*/
#define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */
-#define EXPORT_OP_ASYNC_LOCK (0x40) /* fs can do async lock request */
+#define EXPORT_OP_NOLOCKS (0x40) /* no file locking support */
unsigned long flags;
};
+/**
+ * exportfs_cannot_lock() - check if export implements file locking
+ * @export_ops: the nfs export operations to check
+ *
+ * Returns true if the export does not support file locking.
+ */
+static inline bool
+exportfs_cannot_lock(const struct export_operations *export_ops)
+{
+ return export_ops->flags & EXPORT_OP_NOLOCKS;
+}
+
extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
int *max_len, struct inode *parent,
int flags);
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 5ba187e08cf7..cd653862ab99 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -225,6 +225,7 @@ struct fb_deferred_io {
int open_count; /* number of opened files; protected by fb_info lock */
struct mutex lock; /* mutex that protects the pageref list */
struct list_head pagereflist; /* list of pagerefs for touched pages */
+ struct address_space *mapping; /* page cache object for fb device */
/* callback */
struct page *(*get_page)(struct fb_info *info, unsigned long offset);
void (*deferred_io)(struct fb_info *info, struct list_head *pagelist);
diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h
index 4d3f8074c137..45ad2408a80c 100644
--- a/include/linux/folio_queue.h
+++ b/include/linux/folio_queue.h
@@ -15,6 +15,7 @@
#define _LINUX_FOLIO_QUEUE_H
#include <linux/pagevec.h>
+#include <linux/mm.h>
/*
* Segment in a queue of running buffers. Each segment can hold a number of
@@ -216,13 +217,6 @@ static inline void folioq_unmark3(struct folio_queue *folioq, unsigned int slot)
clear_bit(slot, &folioq->marks3);
}
-static inline unsigned int __folio_order(struct folio *folio)
-{
- if (!folio_test_large(folio))
- return 0;
- return folio->_flags_1 & 0xff;
-}
-
/**
* folioq_append: Add a folio to a folio queue segment
* @folioq: The segment to add to
@@ -241,7 +235,7 @@ static inline unsigned int folioq_append(struct folio_queue *folioq, struct foli
unsigned int slot = folioq->vec.nr++;
folioq->vec.folios[slot] = folio;
- folioq->orders[slot] = __folio_order(folio);
+ folioq->orders[slot] = folio_order(folio);
return slot;
}
@@ -263,7 +257,7 @@ static inline unsigned int folioq_append_mark(struct folio_queue *folioq, struct
unsigned int slot = folioq->vec.nr++;
folioq->vec.folios[slot] = folio;
- folioq->orders[slot] = __folio_order(folio);
+ folioq->orders[slot] = folio_order(folio);
folioq_mark(folioq, slot);
return slot;
}
diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h
index 99f30c7d6208..897d6211c163 100644
--- a/include/linux/fsl/mc.h
+++ b/include/linux/fsl/mc.h
@@ -417,8 +417,6 @@ int __must_check fsl_mc_portal_allocate(struct fsl_mc_device *mc_dev,
void fsl_mc_portal_free(struct fsl_mc_io *mc_io);
-int fsl_mc_portal_reset(struct fsl_mc_io *mc_io);
-
int __must_check fsl_mc_object_allocate(struct fsl_mc_device *mc_dev,
enum fsl_mc_pool_type pool_type,
struct fsl_mc_device **new_mc_adev);
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 88e078871158..1adcba3ddd76 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -231,8 +231,6 @@ static inline enum hrtimer_restart hrtimer_dummy_timeout(struct hrtimer *unused)
/* Exported timer functions: */
/* Initialize timers: */
-extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock,
- enum hrtimer_mode mode);
extern void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *),
clockid_t clock_id, enum hrtimer_mode mode);
extern void hrtimer_setup_on_stack(struct hrtimer *timer,
diff --git a/include/linux/hrtimer_types.h b/include/linux/hrtimer_types.h
index ad66a3081735..8fbbb6bdf7a1 100644
--- a/include/linux/hrtimer_types.h
+++ b/include/linux/hrtimer_types.h
@@ -34,12 +34,12 @@ enum hrtimer_restart {
* @is_hard: Set if hrtimer will be expired in hard interrupt context
* even on RT.
*
- * The hrtimer structure must be initialized by hrtimer_init()
+ * The hrtimer structure must be initialized by hrtimer_setup()
*/
struct hrtimer {
struct timerqueue_node node;
ktime_t _softexpires;
- enum hrtimer_restart (*function)(struct hrtimer *);
+ enum hrtimer_restart (*__private function)(struct hrtimer *);
struct hrtimer_clock_base *base;
u8 state;
u8 is_rel;
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 93e509b6c00e..e893d546a49f 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -39,6 +39,10 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write);
vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write);
+vm_fault_t vmf_insert_folio_pmd(struct vm_fault *vmf, struct folio *folio,
+ bool write);
+vm_fault_t vmf_insert_folio_pud(struct vm_fault *vmf, struct folio *folio,
+ bool write);
enum transparent_hugepage_flag {
TRANSPARENT_HUGEPAGE_UNSUPPORTED,
@@ -341,6 +345,36 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
unsigned int new_order);
int min_order_for_split(struct folio *folio);
int split_folio_to_list(struct folio *folio, struct list_head *list);
+bool uniform_split_supported(struct folio *folio, unsigned int new_order,
+ bool warns);
+bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
+ bool warns);
+int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
+ struct list_head *list);
+/*
+ * try_folio_split - try to split a @folio at @page using non uniform split.
+ * @folio: folio to be split
+ * @page: split to order-0 at the given page
+ * @list: store the after-split folios
+ *
+ * Try to split a @folio at @page using non uniform split to order-0, if
+ * non uniform split is not supported, fall back to uniform split.
+ *
+ * Return: 0: split is successful, otherwise split failed.
+ */
+static inline int try_folio_split(struct folio *folio, struct page *page,
+ struct list_head *list)
+{
+ int ret = min_order_for_split(folio);
+
+ if (ret < 0)
+ return ret;
+
+ if (!non_uniform_split_supported(folio, 0, false))
+ return split_huge_page_to_list_to_order(&folio->page, list,
+ ret);
+ return folio_split(folio, ret, page, list);
+}
static inline int split_huge_page(struct page *page)
{
struct folio *folio = page_folio(page);
@@ -404,7 +438,7 @@ int madvise_collapse(struct vm_area_struct *vma,
struct vm_area_struct **prev,
unsigned long start, unsigned long end);
void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, long adjust_next);
+ unsigned long end, struct vm_area_struct *next);
spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma);
spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma);
@@ -533,6 +567,12 @@ static inline int split_folio_to_list(struct folio *folio, struct list_head *lis
return 0;
}
+static inline int try_folio_split(struct folio *folio, struct page *page,
+ struct list_head *list)
+{
+ return 0;
+}
+
static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) {}
#define split_huge_pmd(__vma, __pmd, __address) \
do { } while (0)
@@ -571,7 +611,7 @@ static inline int madvise_collapse(struct vm_area_struct *vma,
static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
- long adjust_next)
+ struct vm_area_struct *next)
{
}
static inline int is_swap_pmd(pmd_t pmd)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 76a75ec03dd6..8f3ac832ee7f 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -174,6 +174,9 @@ struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio);
extern int sysctl_hugetlb_shm_group;
extern struct list_head huge_boot_pages[MAX_NUMNODES];
+void hugetlb_bootmem_alloc(void);
+bool hugetlb_bootmem_allocated(void);
+
/* arch callbacks */
#ifndef CONFIG_HIGHPTE
@@ -588,6 +591,7 @@ enum hugetlb_page_flags {
HPG_freed,
HPG_vmemmap_optimized,
HPG_raw_hwp_unreliable,
+ HPG_cma,
__NR_HPAGEFLAGS,
};
@@ -647,6 +651,7 @@ HPAGEFLAG(Temporary, temporary)
HPAGEFLAG(Freed, freed)
HPAGEFLAG(VmemmapOptimized, vmemmap_optimized)
HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable)
+HPAGEFLAG(Cma, cma)
#ifdef CONFIG_HUGETLB_PAGE
@@ -675,11 +680,21 @@ struct hstate {
char name[HSTATE_NAME_LEN];
};
+struct cma;
+
struct huge_bootmem_page {
struct list_head list;
struct hstate *hstate;
+ unsigned long flags;
+ struct cma *cma;
};
+#define HUGE_BOOTMEM_HVO 0x0001
+#define HUGE_BOOTMEM_ZONES_VALID 0x0002
+#define HUGE_BOOTMEM_CMA 0x0004
+
+bool hugetlb_bootmem_page_zones_valid(int nid, struct huge_bootmem_page *m);
+
int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
void wait_for_freed_hugetlb_folios(void);
@@ -815,6 +830,17 @@ static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift,
}
#endif
+#ifndef arch_has_huge_bootmem_alloc
+/*
+ * Some architectures do their own bootmem allocation, so they can't use
+ * early CMA allocation.
+ */
+static inline bool arch_has_huge_bootmem_alloc(void)
+{
+ return false;
+}
+#endif
+
static inline struct hstate *folio_hstate(struct folio *folio)
{
VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
@@ -1257,6 +1283,15 @@ static inline bool hugetlbfs_pagecache_present(
{
return false;
}
+
+static inline void hugetlb_bootmem_alloc(void)
+{
+}
+
+static inline bool hugetlb_bootmem_allocated(void)
+{
+ return false;
+}
#endif /* CONFIG_HUGETLB_PAGE */
static inline spinlock_t *huge_pte_lock(struct hstate *h,
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 2b2af24d2a43..2e4903b7f7bc 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -952,6 +952,21 @@ static inline u8 i2c_8bit_addr_from_msg(const struct i2c_msg *msg)
return (msg->addr << 1) | (msg->flags & I2C_M_RD);
}
+/*
+ * 10-bit address
+ * addr_1: 5'b11110 | addr[9:8] | (R/nW)
+ * addr_2: addr[7:0]
+ */
+static inline u8 i2c_10bit_addr_hi_from_msg(const struct i2c_msg *msg)
+{
+ return 0xf0 | ((msg->addr & GENMASK(9, 8)) >> 7) | (msg->flags & I2C_M_RD);
+}
+
+static inline u8 i2c_10bit_addr_lo_from_msg(const struct i2c_msg *msg)
+{
+ return msg->addr & GENMASK(7, 0);
+}
+
u8 *i2c_get_dma_safe_msg_buf(struct i2c_msg *msg, unsigned int threshold);
void i2c_put_dma_safe_msg_buf(u8 *buf, struct i2c_msg *msg, bool xferred);
@@ -1029,10 +1044,6 @@ static inline struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node
return i2c_get_adapter_by_fwnode(of_fwnode_handle(node));
}
-const struct of_device_id
-*i2c_of_match_device(const struct of_device_id *matches,
- struct i2c_client *client);
-
int of_i2c_get_board_info(struct device *dev, struct device_node *node,
struct i2c_board_info *info);
@@ -1053,13 +1064,6 @@ static inline struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node
return NULL;
}
-static inline const struct of_device_id
-*i2c_of_match_device(const struct of_device_id *matches,
- struct i2c_client *client)
-{
- return NULL;
-}
-
static inline int of_i2c_get_board_info(struct device *dev,
struct device_node *node,
struct i2c_board_info *info)
diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h
index 12d532b012c5..c67922ece617 100644
--- a/include/linux/i3c/master.h
+++ b/include/linux/i3c/master.h
@@ -475,7 +475,7 @@ struct i3c_master_controller_ops {
int (*attach_i2c_dev)(struct i2c_dev_desc *dev);
void (*detach_i2c_dev)(struct i2c_dev_desc *dev);
int (*i2c_xfers)(struct i2c_dev_desc *dev,
- const struct i2c_msg *xfers, int nxfers);
+ struct i2c_msg *xfers, int nxfers);
int (*request_ibi)(struct i3c_dev_desc *dev,
const struct i3c_ibi_setup *req);
void (*free_ibi)(struct i3c_dev_desc *dev);
diff --git a/include/linux/idr.h b/include/linux/idr.h
index cd729be369b3..2267902d29a7 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -274,6 +274,7 @@ struct ida {
int ida_alloc_range(struct ida *, unsigned int min, unsigned int max, gfp_t);
void ida_free(struct ida *, unsigned int id);
void ida_destroy(struct ida *ida);
+int ida_find_first_range(struct ida *ida, unsigned int min, unsigned int max);
/**
* ida_alloc() - Allocate an unused ID.
@@ -345,4 +346,14 @@ static inline bool ida_is_empty(const struct ida *ida)
{
return xa_empty(&ida->xa);
}
+
+static inline bool ida_exists(struct ida *ida, unsigned int id)
+{
+ return ida_find_first_range(ida, id, id) == id;
+}
+
+static inline int ida_find_first(struct ida *ida)
+{
+ return ida_find_first_range(ida, 0, ~0);
+}
#endif /* __IDR_H__ */
diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h
index 417073c52380..f242b285081b 100644
--- a/include/linux/iio/adc/ad_sigma_delta.h
+++ b/include/linux/iio/adc/ad_sigma_delta.h
@@ -46,6 +46,7 @@ struct iio_dev;
* modify or drop the sample data, it, may be NULL.
* @has_registers: true if the device has writable and readable registers, false
* if there is just one read-only sample data shift register.
+ * @has_named_irqs: Set to true if there is more than one IRQ line.
* @addr_shift: Shift of the register address in the communications register.
* @read_mask: Mask for the communications register having the read bit set.
* @status_ch_mask: Mask for the channel number stored in status register.
@@ -53,7 +54,6 @@ struct iio_dev;
* be used.
* @irq_flags: flags for the interrupt used by the triggered buffer
* @num_slots: Number of sequencer slots
- * @irq_line: IRQ for reading conversions. If 0, spi->irq will be used
* @num_resetclks: Number of SPI clk cycles with MOSI=1 to reset the chip.
*/
struct ad_sigma_delta_info {
@@ -64,13 +64,13 @@ struct ad_sigma_delta_info {
int (*disable_one)(struct ad_sigma_delta *, unsigned int chan);
int (*postprocess_sample)(struct ad_sigma_delta *, unsigned int raw_sample);
bool has_registers;
+ bool has_named_irqs;
unsigned int addr_shift;
unsigned int read_mask;
unsigned int status_ch_mask;
unsigned int data_reg;
unsigned long irq_flags;
unsigned int num_slots;
- int irq_line;
unsigned int num_resetclks;
};
diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h
index 10be00f3b120..e45b7dfbec35 100644
--- a/include/linux/iio/backend.h
+++ b/include/linux/iio/backend.h
@@ -70,6 +70,12 @@ enum iio_backend_sample_trigger {
IIO_BACKEND_SAMPLE_TRIGGER_MAX
};
+enum iio_backend_interface_type {
+ IIO_BACKEND_INTERFACE_SERIAL_LVDS,
+ IIO_BACKEND_INTERFACE_SERIAL_CMOS,
+ IIO_BACKEND_INTERFACE_MAX
+};
+
/**
* struct iio_backend_ops - operations structure for an iio_backend
* @enable: Enable backend.
@@ -88,6 +94,9 @@ enum iio_backend_sample_trigger {
* @extend_chan_spec: Extend an IIO channel.
* @ext_info_set: Extended info setter.
* @ext_info_get: Extended info getter.
+ * @interface_type_get: Interface type.
+ * @data_size_set: Data size.
+ * @oversampling_ratio_set: Set Oversampling ratio.
* @read_raw: Read a channel attribute from a backend device
* @debugfs_print_chan_status: Print channel status into a buffer.
* @debugfs_reg_access: Read or write register value of backend.
@@ -128,6 +137,11 @@ struct iio_backend_ops {
const char *buf, size_t len);
int (*ext_info_get)(struct iio_backend *back, uintptr_t private,
const struct iio_chan_spec *chan, char *buf);
+ int (*interface_type_get)(struct iio_backend *back,
+ enum iio_backend_interface_type *type);
+ int (*data_size_set)(struct iio_backend *back, unsigned int size);
+ int (*oversampling_ratio_set)(struct iio_backend *back,
+ unsigned int ratio);
int (*read_raw)(struct iio_backend *back,
struct iio_chan_spec const *chan, int *val, int *val2,
long mask);
@@ -186,6 +200,11 @@ ssize_t iio_backend_ext_info_set(struct iio_dev *indio_dev, uintptr_t private,
const char *buf, size_t len);
ssize_t iio_backend_ext_info_get(struct iio_dev *indio_dev, uintptr_t private,
const struct iio_chan_spec *chan, char *buf);
+int iio_backend_interface_type_get(struct iio_backend *back,
+ enum iio_backend_interface_type *type);
+int iio_backend_data_size_set(struct iio_backend *back, unsigned int size);
+int iio_backend_oversampling_ratio_set(struct iio_backend *back,
+ unsigned int ratio);
int iio_backend_read_raw(struct iio_backend *back,
struct iio_chan_spec const *chan, int *val, int *val2,
long mask);
diff --git a/include/linux/iio/buffer-dmaengine.h b/include/linux/iio/buffer-dmaengine.h
index 81d9a19aeb91..37f27545f69f 100644
--- a/include/linux/iio/buffer-dmaengine.h
+++ b/include/linux/iio/buffer-dmaengine.h
@@ -11,8 +11,9 @@
struct iio_dev;
struct device;
+struct dma_chan;
-void iio_dmaengine_buffer_free(struct iio_buffer *buffer);
+void iio_dmaengine_buffer_teardown(struct iio_buffer *buffer);
struct iio_buffer *iio_dmaengine_buffer_setup_ext(struct device *dev,
struct iio_dev *indio_dev,
const char *channel,
@@ -26,6 +27,10 @@ int devm_iio_dmaengine_buffer_setup_ext(struct device *dev,
struct iio_dev *indio_dev,
const char *channel,
enum iio_buffer_direction dir);
+int devm_iio_dmaengine_buffer_setup_with_handle(struct device *dev,
+ struct iio_dev *indio_dev,
+ struct dma_chan *chan,
+ enum iio_buffer_direction dir);
#define devm_iio_dmaengine_buffer_setup(dev, indio_dev, channel) \
devm_iio_dmaengine_buffer_setup_ext(dev, indio_dev, channel, \
diff --git a/include/linux/iio/iio-gts-helper.h b/include/linux/iio/iio-gts-helper.h
index e5de7a124bad..66f830ab9b49 100644
--- a/include/linux/iio/iio-gts-helper.h
+++ b/include/linux/iio/iio-gts-helper.h
@@ -208,5 +208,6 @@ int iio_gts_all_avail_scales(struct iio_gts *gts, const int **vals, int *type,
int *length);
int iio_gts_avail_scales_for_time(struct iio_gts *gts, int time,
const int **vals, int *type, int *length);
+int iio_gts_get_total_gain(struct iio_gts *gts, int gain, int time);
#endif
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 56161e02f002..07a0e8132e88 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -9,7 +9,7 @@
#include <linux/device.h>
#include <linux/cdev.h>
-#include <linux/cleanup.h>
+#include <linux/compiler_types.h>
#include <linux/slab.h>
#include <linux/iio/types.h>
/* IIO TODO LIST */
@@ -663,30 +663,29 @@ int iio_device_claim_direct_mode(struct iio_dev *indio_dev);
void iio_device_release_direct_mode(struct iio_dev *indio_dev);
/*
- * This autocleanup logic is normally used via
- * iio_device_claim_direct_scoped().
+ * Helper functions that allow claim and release of direct mode
+ * in a fashion that doesn't generate many false positives from sparse.
+ * Note this must remain static inline in the header so that sparse
+ * can see the __acquire() marking. Revisit when sparse supports
+ * __cond_acquires()
*/
-DEFINE_GUARD(iio_claim_direct, struct iio_dev *, iio_device_claim_direct_mode(_T),
- iio_device_release_direct_mode(_T))
+static inline bool iio_device_claim_direct(struct iio_dev *indio_dev)
+{
+ int ret = iio_device_claim_direct_mode(indio_dev);
-DEFINE_GUARD_COND(iio_claim_direct, _try, ({
- struct iio_dev *dev;
- int d = iio_device_claim_direct_mode(_T);
+ if (ret)
+ return false;
- if (d < 0)
- dev = NULL;
- else
- dev = _T;
- dev;
- }))
+ __acquire(iio_dev);
-/**
- * iio_device_claim_direct_scoped() - Scoped call to iio_device_claim_direct.
- * @fail: What to do on failure to claim device.
- * @iio_dev: Pointer to the IIO devices structure
- */
-#define iio_device_claim_direct_scoped(fail, iio_dev) \
- scoped_cond_guard(iio_claim_direct_try, fail, iio_dev)
+ return true;
+}
+
+static inline void iio_device_release_direct(struct iio_dev *indio_dev)
+{
+ iio_device_release_direct_mode(indio_dev);
+ __release(indio_dev);
+}
int iio_device_claim_buffer_mode(struct iio_dev *indio_dev);
void iio_device_release_buffer_mode(struct iio_dev *indio_dev);
diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h
index 4bb98d9731de..aa160511e265 100644
--- a/include/linux/iio/imu/adis.h
+++ b/include/linux/iio/imu/adis.h
@@ -44,6 +44,8 @@ struct adis_timeout {
* @glob_cmd_reg: Register address of the GLOB_CMD register
* @msc_ctrl_reg: Register address of the MSC_CTRL register
* @diag_stat_reg: Register address of the DIAG_STAT register
+ * @diag_stat_size: Length (in bytes) of the DIAG_STAT register. If 0 the
+ * default length is 2 bytes long.
* @prod_id_reg: Register address of the PROD_ID register
* @prod_id: Product ID code that should be expected when reading @prod_id_reg
* @self_test_mask: Bitmask of supported self-test operations
@@ -70,6 +72,7 @@ struct adis_data {
unsigned int glob_cmd_reg;
unsigned int msc_ctrl_reg;
unsigned int diag_stat_reg;
+ unsigned int diag_stat_size;
unsigned int prod_id_reg;
unsigned int prod_id;
@@ -95,12 +98,28 @@ struct adis_data {
};
/**
+ * struct adis_ops: Custom ops for adis devices.
+ * @write: Custom spi write implementation.
+ * @read: Custom spi read implementation.
+ * @reset: Custom sw reset implementation. The custom implementation does not
+ * need to sleep after the reset. It's done by the library already.
+ */
+struct adis_ops {
+ int (*write)(struct adis *adis, unsigned int reg, unsigned int value,
+ unsigned int size);
+ int (*read)(struct adis *adis, unsigned int reg, unsigned int *value,
+ unsigned int size);
+ int (*reset)(struct adis *adis);
+};
+
+/**
* struct adis - ADIS device instance data
* @spi: Reference to SPI device which owns this ADIS IIO device
* @trig: IIO trigger object data
* @data: ADIS chip variant specific data
* @burst_extra_len: Burst extra length. Should only be used by devices that can
* dynamically change their burst mode length.
+ * @ops: ops struct for custom read and write functions
* @state_lock: Lock used by the device to protect state
* @msg: SPI message object
* @xfer: SPI transfer objects to be used for a @msg
@@ -116,6 +135,7 @@ struct adis {
const struct adis_data *data;
unsigned int burst_extra_len;
+ const struct adis_ops *ops;
/**
* The state_lock is meant to be used during operations that require
* a sequence of SPI R/W in order to protect the SPI transfer
@@ -168,7 +188,7 @@ int __adis_read_reg(struct adis *adis, unsigned int reg,
static inline int __adis_write_reg_8(struct adis *adis, unsigned int reg,
u8 val)
{
- return __adis_write_reg(adis, reg, val, 1);
+ return adis->ops->write(adis, reg, val, 1);
}
/**
@@ -180,7 +200,7 @@ static inline int __adis_write_reg_8(struct adis *adis, unsigned int reg,
static inline int __adis_write_reg_16(struct adis *adis, unsigned int reg,
u16 val)
{
- return __adis_write_reg(adis, reg, val, 2);
+ return adis->ops->write(adis, reg, val, 2);
}
/**
@@ -192,7 +212,7 @@ static inline int __adis_write_reg_16(struct adis *adis, unsigned int reg,
static inline int __adis_write_reg_32(struct adis *adis, unsigned int reg,
u32 val)
{
- return __adis_write_reg(adis, reg, val, 4);
+ return adis->ops->write(adis, reg, val, 4);
}
/**
@@ -207,7 +227,7 @@ static inline int __adis_read_reg_16(struct adis *adis, unsigned int reg,
unsigned int tmp;
int ret;
- ret = __adis_read_reg(adis, reg, &tmp, 2);
+ ret = adis->ops->read(adis, reg, &tmp, 2);
if (ret == 0)
*val = tmp;
@@ -226,7 +246,7 @@ static inline int __adis_read_reg_32(struct adis *adis, unsigned int reg,
unsigned int tmp;
int ret;
- ret = __adis_read_reg(adis, reg, &tmp, 4);
+ ret = adis->ops->read(adis, reg, &tmp, 4);
if (ret == 0)
*val = tmp;
@@ -244,7 +264,7 @@ static inline int adis_write_reg(struct adis *adis, unsigned int reg,
unsigned int val, unsigned int size)
{
guard(mutex)(&adis->state_lock);
- return __adis_write_reg(adis, reg, val, size);
+ return adis->ops->write(adis, reg, val, size);
}
/**
@@ -258,7 +278,7 @@ static int adis_read_reg(struct adis *adis, unsigned int reg,
unsigned int *val, unsigned int size)
{
guard(mutex)(&adis->state_lock);
- return __adis_read_reg(adis, reg, val, size);
+ return adis->ops->read(adis, reg, val, size);
}
/**
diff --git a/include/linux/interval_tree_generic.h b/include/linux/interval_tree_generic.h
index aaa8a0767aa3..1b400f26f63d 100644
--- a/include/linux/interval_tree_generic.h
+++ b/include/linux/interval_tree_generic.h
@@ -104,12 +104,8 @@ ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \
if (ITSTART(node) <= last) { /* Cond1 */ \
if (start <= ITLAST(node)) /* Cond2 */ \
return node; /* node is leftmost match */ \
- if (node->ITRB.rb_right) { \
- node = rb_entry(node->ITRB.rb_right, \
- ITSTRUCT, ITRB); \
- if (start <= node->ITSUBTREE) \
- continue; \
- } \
+ node = rb_entry(node->ITRB.rb_right, ITSTRUCT, ITRB); \
+ continue; \
} \
return NULL; /* No match */ \
} \
diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index e6723fa95160..0634a3de1782 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -21,7 +21,6 @@ struct io_uring_cmd {
struct io_uring_cmd_data {
void *op_data;
- struct io_uring_sqe sqes[2];
};
static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 02fe001feebb..68416b135151 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -79,6 +79,11 @@ struct vm_fault;
#define IOMAP_F_ATOMIC_BIO (1U << 8)
/*
+ * Flag reserved for file system specific usage
+ */
+#define IOMAP_F_PRIVATE (1U << 12)
+
+/*
* Flags set by the core iomap code during operations:
*
* IOMAP_F_SIZE_CHANGED indicates to the iomap_end method that the file size
@@ -88,14 +93,8 @@ struct vm_fault;
* range it covers needs to be remapped by the high level before the operation
* can proceed.
*/
-#define IOMAP_F_SIZE_CHANGED (1U << 8)
-#define IOMAP_F_STALE (1U << 9)
-
-/*
- * Flags from 0x1000 up are for file system specific usage:
- */
-#define IOMAP_F_PRIVATE (1U << 12)
-
+#define IOMAP_F_SIZE_CHANGED (1U << 14)
+#define IOMAP_F_STALE (1U << 15)
/*
* Magic value for addr:
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index cf8c16ba04a0..ccce8a751e2a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -41,6 +41,7 @@ struct iommu_dirty_ops;
struct notifier_block;
struct iommu_sva;
struct iommu_dma_cookie;
+struct iommu_dma_msi_cookie;
struct iommu_fault_param;
struct iommufd_ctx;
struct iommufd_viommu;
@@ -165,6 +166,15 @@ struct iommu_domain_geometry {
bool force_aperture; /* DMA only allowed in mappable range? */
};
+enum iommu_domain_cookie_type {
+ IOMMU_COOKIE_NONE,
+ IOMMU_COOKIE_DMA_IOVA,
+ IOMMU_COOKIE_DMA_MSI,
+ IOMMU_COOKIE_FAULT_HANDLER,
+ IOMMU_COOKIE_SVA,
+ IOMMU_COOKIE_IOMMUFD,
+};
+
/* Domain feature flags */
#define __IOMMU_DOMAIN_PAGING (1U << 0) /* Support for iommu_map/unmap */
#define __IOMMU_DOMAIN_DMA_API (1U << 1) /* Domain for use in DMA-API
@@ -211,23 +221,18 @@ struct iommu_domain_geometry {
struct iommu_domain {
unsigned type;
+ enum iommu_domain_cookie_type cookie_type;
const struct iommu_domain_ops *ops;
const struct iommu_dirty_ops *dirty_ops;
const struct iommu_ops *owner; /* Whose domain_alloc we came from */
unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */
struct iommu_domain_geometry geometry;
- struct iommu_dma_cookie *iova_cookie;
int (*iopf_handler)(struct iopf_group *group);
-#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU)
- int (*sw_msi)(struct iommu_domain *domain, struct msi_desc *desc,
- phys_addr_t msi_addr);
-#endif
-
- union { /* Pointer usable by owner of the domain */
- struct iommufd_hw_pagetable *iommufd_hwpt; /* iommufd */
- };
- union { /* Fault handler */
+ union { /* cookie */
+ struct iommu_dma_cookie *iova_cookie;
+ struct iommu_dma_msi_cookie *msi_cookie;
+ struct iommufd_hw_pagetable *iommufd_hwpt;
struct {
iommu_fault_handler_t handler;
void *handler_token;
@@ -244,16 +249,6 @@ struct iommu_domain {
};
};
-static inline void iommu_domain_set_sw_msi(
- struct iommu_domain *domain,
- int (*sw_msi)(struct iommu_domain *domain, struct msi_desc *desc,
- phys_addr_t msi_addr))
-{
-#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU)
- domain->sw_msi = sw_msi;
-#endif
-}
-
static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
{
return domain->type & __IOMMU_DOMAIN_DMA_API;
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 11110c749200..34b6e6ca4bfa 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -8,9 +8,11 @@
#include <linux/err.h>
#include <linux/errno.h>
+#include <linux/iommu.h>
#include <linux/refcount.h>
#include <linux/types.h>
#include <linux/xarray.h>
+#include <uapi/linux/iommufd.h>
struct device;
struct file;
@@ -34,6 +36,7 @@ enum iommufd_object_type {
IOMMUFD_OBJ_FAULT,
IOMMUFD_OBJ_VIOMMU,
IOMMUFD_OBJ_VDEVICE,
+ IOMMUFD_OBJ_VEVENTQ,
#ifdef CONFIG_IOMMUFD_TEST
IOMMUFD_OBJ_SELFTEST,
#endif
@@ -52,9 +55,11 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
struct device *dev, u32 *id);
void iommufd_device_unbind(struct iommufd_device *idev);
-int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id);
-int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id);
-void iommufd_device_detach(struct iommufd_device *idev);
+int iommufd_device_attach(struct iommufd_device *idev, ioasid_t pasid,
+ u32 *pt_id);
+int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid,
+ u32 *pt_id);
+void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid);
struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev);
u32 iommufd_device_to_id(struct iommufd_device *idev);
@@ -93,6 +98,8 @@ struct iommufd_viommu {
const struct iommufd_viommu_ops *ops;
struct xarray vdevs;
+ struct list_head veventqs;
+ struct rw_semaphore veventqs_rwsem;
unsigned int type;
};
@@ -187,6 +194,11 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
enum iommufd_object_type type);
struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu,
unsigned long vdev_id);
+int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu,
+ struct device *dev, unsigned long *vdev_id);
+int iommufd_viommu_report_event(struct iommufd_viommu *viommu,
+ enum iommu_veventq_type type, void *event_data,
+ size_t data_len);
#else /* !CONFIG_IOMMUFD_DRIVER_CORE */
static inline struct iommufd_object *
_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size,
@@ -200,6 +212,20 @@ iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id)
{
return NULL;
}
+
+static inline int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu,
+ struct device *dev,
+ unsigned long *vdev_id)
+{
+ return -ENOENT;
+}
+
+static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu,
+ enum iommu_veventq_type type,
+ void *event_data, size_t data_len)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_IOMMUFD_DRIVER_CORE */
/*
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 5385349f0b8a..e8b2d6aa4013 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -154,15 +154,20 @@ enum {
};
/* helpers to define resources */
-#define DEFINE_RES_NAMED(_start, _size, _name, _flags) \
+#define DEFINE_RES_NAMED_DESC(_start, _size, _name, _flags, _desc) \
(struct resource) { \
.start = (_start), \
.end = (_start) + (_size) - 1, \
.name = (_name), \
.flags = (_flags), \
- .desc = IORES_DESC_NONE, \
+ .desc = (_desc), \
}
+#define DEFINE_RES_NAMED(_start, _size, _name, _flags) \
+ DEFINE_RES_NAMED_DESC(_start, _size, _name, _flags, IORES_DESC_NONE)
+#define DEFINE_RES(_start, _size, _flags) \
+ DEFINE_RES_NAMED(_start, _size, NULL, _flags)
+
#define DEFINE_RES_IO_NAMED(_start, _size, _name) \
DEFINE_RES_NAMED((_start), (_size), (_name), IORESOURCE_IO)
#define DEFINE_RES_IO(_start, _size) \
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 33ff41eef8f7..bb7111105296 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -72,7 +72,7 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
/**
* struct irq_domain_ops - Methods for irq_domain objects
- * @match: Match an interrupt controller device node to a host, returns
+ * @match: Match an interrupt controller device node to a domain, returns
* 1 on a match
* @select: Match an interrupt controller fw specification. It is more generic
* than @match as it receives a complete struct irq_fwspec. Therefore,
@@ -352,8 +352,8 @@ struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode,
void *host_data);
struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
enum irq_domain_bus_token bus_token);
-void irq_set_default_host(struct irq_domain *host);
-struct irq_domain *irq_get_default_host(void);
+void irq_set_default_domain(struct irq_domain *domain);
+struct irq_domain *irq_get_default_domain(void);
int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
irq_hw_number_t hwirq, int node,
const struct irq_affinity_desc *affinity);
@@ -454,7 +454,7 @@ static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_nod
return IS_ERR(d) ? NULL : d;
}
-unsigned int irq_create_direct_mapping(struct irq_domain *host);
+unsigned int irq_create_direct_mapping(struct irq_domain *domain);
#endif
static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node,
@@ -507,7 +507,7 @@ static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fw
return IS_ERR(d) ? NULL : d;
}
-void irq_domain_remove(struct irq_domain *host);
+void irq_domain_remove(struct irq_domain *domain);
int irq_domain_associate(struct irq_domain *domain, unsigned int irq,
irq_hw_number_t hwirq);
@@ -515,16 +515,16 @@ void irq_domain_associate_many(struct irq_domain *domain,
unsigned int irq_base,
irq_hw_number_t hwirq_base, int count);
-unsigned int irq_create_mapping_affinity(struct irq_domain *host,
+unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
irq_hw_number_t hwirq,
const struct irq_affinity_desc *affinity);
unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec);
void irq_dispose_mapping(unsigned int virq);
-static inline unsigned int irq_create_mapping(struct irq_domain *host,
+static inline unsigned int irq_create_mapping(struct irq_domain *domain,
irq_hw_number_t hwirq)
{
- return irq_create_mapping_affinity(host, hwirq, NULL);
+ return irq_create_mapping_affinity(domain, hwirq, NULL);
}
struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain,
diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index 905a2e2f45f6..ecbf819deeca 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -104,7 +104,7 @@ extern int kdb_initial_cpu;
#define KDB_NOENVVALUE (-6)
#define KDB_NOTIMP (-7)
#define KDB_ENVFULL (-8)
-#define KDB_ENVBUFFULL (-9)
+#define KDB_KMALLOCFAILED (-9)
#define KDB_TOOMANYBPT (-10)
#define KDB_TOOMANYDBREGS (-11)
#define KDB_DUPBPT (-12)
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 87c79d076d6d..b5a5f32fdfd1 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -147,6 +147,11 @@ enum kernfs_root_flag {
* Support user xattrs to be written to nodes rooted at this root.
*/
KERNFS_ROOT_SUPPORT_USER_XATTR = 0x0008,
+
+ /*
+ * Renames must not change the parent node.
+ */
+ KERNFS_ROOT_INVARIANT_PARENT = 0x0010,
};
/* type-specific structures for kernfs_node union members */
@@ -199,8 +204,8 @@ struct kernfs_node {
* never moved to a different parent, it is safe to access the
* parent directly.
*/
- struct kernfs_node *parent;
- const char *name;
+ struct kernfs_node __rcu *__parent;
+ const char __rcu *name;
struct rb_node rb;
@@ -395,7 +400,7 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
}
int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen);
-int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn,
+int kernfs_path_from_node(struct kernfs_node *kn_to, struct kernfs_node *kn_from,
char *buf, size_t buflen);
void pr_cont_kernfs_name(struct kernfs_node *kn);
void pr_cont_kernfs_path(struct kernfs_node *kn);
@@ -416,6 +421,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
unsigned int flags, void *priv);
void kernfs_destroy_root(struct kernfs_root *root);
+unsigned int kernfs_root_flags(struct kernfs_node *kn);
struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
const char *name, umode_t mode,
@@ -514,6 +520,8 @@ kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags,
{ return ERR_PTR(-ENOSYS); }
static inline void kernfs_destroy_root(struct kernfs_root *root) { }
+static inline unsigned int kernfs_root_flags(struct kernfs_node *kn)
+{ return 0; }
static inline struct kernfs_node *
kernfs_create_dir_ns(struct kernfs_node *parent, const char *name,
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index c840431eadda..c8971861521a 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -203,6 +203,15 @@ static inline int arch_kimage_file_post_load_cleanup(struct kimage *image)
}
#endif
+#ifndef arch_check_excluded_range
+static inline int arch_check_excluded_range(struct kimage *image,
+ unsigned long start,
+ unsigned long end)
+{
+ return 0;
+}
+#endif
+
#ifdef CONFIG_KEXEC_SIG
#ifdef CONFIG_SIGNED_PE_FILE_VERIFICATION
int kexec_kernel_verify_pe_sig(const char *kernel, unsigned long kernel_len);
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 51ef131e66b7..5eebbe7a3545 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -257,7 +257,6 @@ extern void kgdb_arch_late(void);
* hardware breakpoints.
* @correct_hw_break: Allow an architecture to specify how to correct the
* hardware debug registers.
- * @enable_nmi: Manage NMI-triggered entry to KGDB
*/
struct kgdb_arch {
unsigned char gdb_bpt_instr[BREAK_INSTR_SIZE];
@@ -270,8 +269,6 @@ struct kgdb_arch {
void (*disable_hw_break)(struct pt_regs *regs);
void (*remove_all_hw_break)(void);
void (*correct_hw_break)(void);
-
- void (*enable_nmi)(bool on);
};
/**
@@ -306,14 +303,6 @@ extern const struct kgdb_arch arch_kgdb_ops;
extern unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs);
-#ifdef CONFIG_SERIAL_KGDB_NMI
-extern int kgdb_register_nmi_console(void);
-extern int kgdb_unregister_nmi_console(void);
-#else
-static inline int kgdb_register_nmi_console(void) { return 0; }
-static inline int kgdb_unregister_nmi_console(void) { return 0; }
-#endif
-
extern int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops);
extern void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops);
extern struct kgdb_io *dbg_io_ops;
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 5c8865bb59d9..b11660b706c5 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -134,10 +134,6 @@
.size name, .-name
#endif
-/* If symbol 'name' is treated as a subroutine (gets called, and returns)
- * then please use ENDPROC to mark 'name' as STT_FUNC for the benefit of
- * static analysis tools such as stack depth analyzer.
- */
#ifndef ENDPROC
/* deprecated, use SYM_FUNC_END */
#define ENDPROC(name) \
diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
index fa6e8471bd22..248db9b77ee2 100644
--- a/include/linux/list_nulls.h
+++ b/include/linux/list_nulls.h
@@ -28,6 +28,7 @@ struct hlist_nulls_node {
#define NULLS_MARKER(value) (1UL | (((long)value) << 1))
#define INIT_HLIST_NULLS_HEAD(ptr, nulls) \
((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls))
+#define HLIST_NULLS_HEAD_INIT(nulls) {.first = (struct hlist_nulls_node *)NULLS_MARKER(nulls)}
#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h
index b38a56a13f39..725fd7727422 100644
--- a/include/linux/mei_cl_bus.h
+++ b/include/linux/mei_cl_bus.h
@@ -97,8 +97,6 @@ ssize_t mei_cldev_send(struct mei_cl_device *cldev, const u8 *buf,
ssize_t mei_cldev_send_timeout(struct mei_cl_device *cldev, const u8 *buf,
size_t length, unsigned long timeout);
ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length);
-ssize_t mei_cldev_recv_nonblock(struct mei_cl_device *cldev, u8 *buf,
- size_t length);
ssize_t mei_cldev_recv_timeout(struct mei_cl_device *cldev, u8 *buf, size_t length,
unsigned long timeout);
ssize_t mei_cldev_send_vtag(struct mei_cl_device *cldev, const u8 *buf,
@@ -107,8 +105,6 @@ ssize_t mei_cldev_send_vtag_timeout(struct mei_cl_device *cldev, const u8 *buf,
size_t length, u8 vtag, unsigned long timeout);
ssize_t mei_cldev_recv_vtag(struct mei_cl_device *cldev, u8 *buf, size_t length,
u8 *vtag);
-ssize_t mei_cldev_recv_nonblock_vtag(struct mei_cl_device *cldev, u8 *buf,
- size_t length, u8 *vtag);
ssize_t mei_cldev_recv_vtag_timeout(struct mei_cl_device *cldev, u8 *buf, size_t length,
u8 *vtag, unsigned long timeout);
@@ -116,7 +112,6 @@ int mei_cldev_register_rx_cb(struct mei_cl_device *cldev, mei_cldev_cb_t rx_cb);
int mei_cldev_register_notif_cb(struct mei_cl_device *cldev,
mei_cldev_cb_t notif_cb);
-const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev);
u8 mei_cldev_ver(const struct mei_cl_device *cldev);
void *mei_cldev_get_drvdata(const struct mei_cl_device *cldev);
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index e79eb6ac516f..ef5a1ecc6e59 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -133,7 +133,6 @@ int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
int memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size);
-void memblock_free_all(void);
void memblock_free(void *ptr, size_t size);
void reset_all_zones_managed_pages(void);
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6e74b8254d9b..53364526d877 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -438,9 +438,7 @@ static inline struct mem_cgroup *folio_memcg(struct folio *folio)
*/
static inline bool folio_memcg_charged(struct folio *folio)
{
- if (folio_memcg_kmem(folio))
- return __folio_objcg(folio) != NULL;
- return __folio_memcg(folio) != NULL;
+ return folio->memcg_data != 0;
}
/*
@@ -649,8 +647,6 @@ int mem_cgroup_charge_hugetlb(struct folio* folio, gfp_t gfp);
int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm,
gfp_t gfp, swp_entry_t entry);
-void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr_pages);
-
void __mem_cgroup_uncharge(struct folio *folio);
/**
@@ -1040,7 +1036,9 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
rcu_read_unlock();
}
-void split_page_memcg(struct page *head, int old_order, int new_order);
+void split_page_memcg(struct page *first, unsigned order);
+void folio_split_memcg_refs(struct folio *folio, unsigned old_order,
+ unsigned new_order);
static inline u64 cgroup_id_from_mm(struct mm_struct *mm)
{
@@ -1165,10 +1163,6 @@ static inline int mem_cgroup_swapin_charge_folio(struct folio *folio,
return 0;
}
-static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr)
-{
-}
-
static inline void mem_cgroup_uncharge(struct folio *folio)
{
}
@@ -1465,7 +1459,12 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
{
}
-static inline void split_page_memcg(struct page *head, int old_order, int new_order)
+static inline void split_page_memcg(struct page *first, unsigned order)
+{
+}
+
+static inline void folio_split_memcg_refs(struct folio *folio,
+ unsigned old_order, unsigned new_order)
{
}
@@ -1848,6 +1847,9 @@ static inline void mem_cgroup_exit_user_fault(void)
current->in_user_fault = 0;
}
+void memcg1_swapout(struct folio *folio, swp_entry_t entry);
+void memcg1_swapin(swp_entry_t entry, unsigned int nr_pages);
+
#else /* CONFIG_MEMCG_V1 */
static inline
unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order,
@@ -1875,6 +1877,14 @@ static inline void mem_cgroup_exit_user_fault(void)
{
}
+static inline void memcg1_swapout(struct folio *folio, swp_entry_t entry)
+{
+}
+
+static inline void memcg1_swapin(swp_entry_t entry, unsigned int nr_pages)
+{
+}
+
#endif /* CONFIG_MEMCG_V1 */
#endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/memory.h b/include/linux/memory.h
index c0afee5d126e..12daa6ec7d09 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -25,7 +25,7 @@
/**
* struct memory_group - a logical group of memory blocks
* @nid: The node id for all memory blocks inside the memory group.
- * @blocks: List of all memory blocks belonging to this memory group.
+ * @memory_blocks: List of all memory blocks belonging to this memory group.
* @present_kernel_pages: Present (online) memory outside ZONE_MOVABLE of this
* memory group.
* @present_movable_pages: Present (online) memory in ZONE_MOVABLE of this
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 3f7143ade32c..4aa151914eab 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -161,7 +161,7 @@ static inline bool is_device_private_page(const struct page *page)
{
return IS_ENABLED(CONFIG_DEVICE_PRIVATE) &&
is_zone_device_page(page) &&
- page->pgmap->type == MEMORY_DEVICE_PRIVATE;
+ page_pgmap(page)->type == MEMORY_DEVICE_PRIVATE;
}
static inline bool folio_is_device_private(const struct folio *folio)
@@ -173,13 +173,13 @@ static inline bool is_pci_p2pdma_page(const struct page *page)
{
return IS_ENABLED(CONFIG_PCI_P2PDMA) &&
is_zone_device_page(page) &&
- page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
+ page_pgmap(page)->type == MEMORY_DEVICE_PCI_P2PDMA;
}
static inline bool is_device_coherent_page(const struct page *page)
{
return is_zone_device_page(page) &&
- page->pgmap->type == MEMORY_DEVICE_COHERENT;
+ page_pgmap(page)->type == MEMORY_DEVICE_COHERENT;
}
static inline bool folio_is_device_coherent(const struct folio *folio)
@@ -187,6 +187,17 @@ static inline bool folio_is_device_coherent(const struct folio *folio)
return is_device_coherent_page(&folio->page);
}
+static inline bool is_fsdax_page(const struct page *page)
+{
+ return is_zone_device_page(page) &&
+ page_pgmap(page)->type == MEMORY_DEVICE_FS_DAX;
+}
+
+static inline bool folio_is_fsdax(const struct folio *folio)
+{
+ return is_fsdax_page(&folio->page);
+}
+
#ifdef CONFIG_ZONE_DEVICE
void zone_device_page_init(struct page *page);
void *memremap_pages(struct dev_pagemap *pgmap, int nid);
diff --git a/include/linux/mfd/mt6397/rtc.h b/include/linux/mfd/mt6397/rtc.h
index 068ae1c0f0e8..27883af44f87 100644
--- a/include/linux/mfd/mt6397/rtc.h
+++ b/include/linux/mfd/mt6397/rtc.h
@@ -60,11 +60,6 @@
#define RTC_PDN2 0x002e
#define RTC_PDN2_PWRON_ALARM BIT(4)
-#define RTC_MIN_YEAR 1968
-#define RTC_BASE_YEAR 1900
-#define RTC_NUM_YEARS 128
-#define RTC_MIN_YEAR_OFFSET (RTC_MIN_YEAR - RTC_BASE_YEAR)
-
#define MTK_RTC_POLL_DELAY_US 10
#define MTK_RTC_POLL_TIMEOUT (jiffies_to_usecs(HZ))
diff --git a/include/linux/mhi.h b/include/linux/mhi.h
index 059dc94d20bb..dd372b0123a6 100644
--- a/include/linux/mhi.h
+++ b/include/linux/mhi.h
@@ -721,12 +721,6 @@ enum mhi_state mhi_get_mhi_state(struct mhi_controller *mhi_cntrl);
void mhi_soc_reset(struct mhi_controller *mhi_cntrl);
/**
- * mhi_device_get - Disable device low power mode
- * @mhi_dev: Device associated with the channel
- */
-void mhi_device_get(struct mhi_device *mhi_dev);
-
-/**
* mhi_device_get_sync - Disable device low power mode. Synchronously
* take the controller out of suspended state
* @mhi_dev: Device associated with the channel
@@ -777,18 +771,6 @@ int mhi_prepare_for_transfer_autoqueue(struct mhi_device *mhi_dev);
void mhi_unprepare_from_transfer(struct mhi_device *mhi_dev);
/**
- * mhi_queue_dma - Send or receive DMA mapped buffers from client device
- * over MHI channel
- * @mhi_dev: Device associated with the channels
- * @dir: DMA direction for the channel
- * @mhi_buf: Buffer for holding the DMA mapped data
- * @len: Buffer length
- * @mflags: MHI transfer flags used for the transfer
- */
-int mhi_queue_dma(struct mhi_device *mhi_dev, enum dma_data_direction dir,
- struct mhi_buf *mhi_buf, size_t len, enum mhi_flags mflags);
-
-/**
* mhi_queue_buf - Send or receive raw buffers from client device over MHI
* channel
* @mhi_dev: Device associated with the channels
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 80891120cca9..aaa2114498d6 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -205,8 +205,8 @@ struct migrate_vma {
unsigned long end;
/*
- * Set to the owner value also stored in page->pgmap->owner for
- * migrating out of device private memory. The flags also need to
+ * Set to the owner value also stored in page_pgmap(page)->owner
+ * for migrating out of device private memory. The flags also need to
* be set to MIGRATE_VMA_SELECT_DEVICE_PRIVATE.
* The caller should always set this field when using mmu notifier
* callbacks to avoid device MMU invalidations for device private
diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
index 1160bed6579e..79ddc0adbf2b 100644
--- a/include/linux/min_heap.h
+++ b/include/linux/min_heap.h
@@ -218,7 +218,7 @@ static size_t parent(size_t i, unsigned int lsbit, size_t size)
/* Initialize a min-heap. */
static __always_inline
-void __min_heap_init_inline(min_heap_char *heap, void *data, int size)
+void __min_heap_init_inline(min_heap_char *heap, void *data, size_t size)
{
heap->nr = 0;
heap->size = size;
@@ -254,7 +254,7 @@ bool __min_heap_full_inline(min_heap_char *heap)
/* Sift the element at pos down the heap. */
static __always_inline
-void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size,
+void __min_heap_sift_down_inline(min_heap_char *heap, size_t pos, size_t elem_size,
const struct min_heap_callbacks *func, void *args)
{
const unsigned long lsbit = elem_size & -elem_size;
@@ -324,7 +324,7 @@ static __always_inline
void __min_heapify_all_inline(min_heap_char *heap, size_t elem_size,
const struct min_heap_callbacks *func, void *args)
{
- int i;
+ ssize_t i;
for (i = heap->nr / 2 - 1; i >= 0; i--)
__min_heap_sift_down_inline(heap, i, elem_size, func, args);
@@ -379,7 +379,7 @@ bool __min_heap_push_inline(min_heap_char *heap, const void *element, size_t ele
const struct min_heap_callbacks *func, void *args)
{
void *data = heap->data;
- int pos;
+ size_t pos;
if (WARN_ONCE(heap->nr >= heap->size, "Pushing on a full heap"))
return false;
@@ -428,10 +428,10 @@ bool __min_heap_del_inline(min_heap_char *heap, size_t elem_size, size_t idx,
__min_heap_del_inline(container_of(&(_heap)->nr, min_heap_char, nr), \
__minheap_obj_size(_heap), _idx, _func, _args)
-void __min_heap_init(min_heap_char *heap, void *data, int size);
+void __min_heap_init(min_heap_char *heap, void *data, size_t size);
void *__min_heap_peek(struct min_heap_char *heap);
bool __min_heap_full(min_heap_char *heap);
-void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size,
+void __min_heap_sift_down(min_heap_char *heap, size_t pos, size_t elem_size,
const struct min_heap_callbacks *func, void *args);
void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
const struct min_heap_callbacks *func, void *args);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4a50ba8002e9..b7f13f087954 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -32,6 +32,7 @@
#include <linux/memremap.h>
#include <linux/slab.h>
#include <linux/cacheinfo.h>
+#include <linux/rcuwait.h>
struct mempolicy;
struct anon_vma;
@@ -40,20 +41,10 @@ struct user_struct;
struct pt_regs;
struct folio_batch;
+void arch_mm_preinit(void);
void mm_core_init(void);
void init_mm_internals(void);
-#ifndef CONFIG_NUMA /* Don't use mapnrs, do it properly */
-extern unsigned long max_mapnr;
-
-static inline void set_max_mapnr(unsigned long limit)
-{
- max_mapnr = limit;
-}
-#else
-static inline void set_max_mapnr(unsigned long limit) { }
-#endif
-
extern atomic_long_t _totalram_pages;
static inline unsigned long totalram_pages(void)
{
@@ -242,8 +233,6 @@ void setup_initial_init_mm(void *start_code, void *end_code,
struct vm_area_struct *vm_area_alloc(struct mm_struct *);
struct vm_area_struct *vm_area_dup(struct vm_area_struct *);
void vm_area_free(struct vm_area_struct *);
-/* Use only if VMA has no other users */
-void __vm_area_free(struct vm_area_struct *vma);
#ifndef CONFIG_MMU
extern struct rb_root nommu_region_tree;
@@ -682,13 +671,57 @@ static inline void vma_numab_state_free(struct vm_area_struct *vma) {}
#endif /* CONFIG_NUMA_BALANCING */
#ifdef CONFIG_PER_VMA_LOCK
+static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ static struct lock_class_key lockdep_key;
+
+ lockdep_init_map(&vma->vmlock_dep_map, "vm_lock", &lockdep_key, 0);
+#endif
+ if (reset_refcnt)
+ refcount_set(&vma->vm_refcnt, 0);
+ vma->vm_lock_seq = UINT_MAX;
+}
+
+static inline bool is_vma_writer_only(int refcnt)
+{
+ /*
+ * With a writer and no readers, refcnt is VMA_LOCK_OFFSET if the vma
+ * is detached and (VMA_LOCK_OFFSET + 1) if it is attached. Waiting on
+ * a detached vma happens only in vma_mark_detached() and is a rare
+ * case, therefore most of the time there will be no unnecessary wakeup.
+ */
+ return refcnt & VMA_LOCK_OFFSET && refcnt <= VMA_LOCK_OFFSET + 1;
+}
+
+static inline void vma_refcount_put(struct vm_area_struct *vma)
+{
+ /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
+ struct mm_struct *mm = vma->vm_mm;
+ int oldcnt;
+
+ rwsem_release(&vma->vmlock_dep_map, _RET_IP_);
+ if (!__refcount_dec_and_test(&vma->vm_refcnt, &oldcnt)) {
+
+ if (is_vma_writer_only(oldcnt - 1))
+ rcuwait_wake_up(&mm->vma_writer_wait);
+ }
+}
+
/*
* Try to read-lock a vma. The function is allowed to occasionally yield false
* locked result to avoid performance overhead, in which case we fall back to
* using mmap_lock. The function should never yield false unlocked result.
+ * False locked result is possible if mm_lock_seq overflows or if vma gets
+ * reused and attached to a different mm before we lock it.
+ * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got
+ * detached.
*/
-static inline bool vma_start_read(struct vm_area_struct *vma)
+static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
+ struct vm_area_struct *vma)
{
+ int oldcnt;
+
/*
* Check before locking. A race might cause false locked result.
* We can use READ_ONCE() for the mm_lock_seq here, and don't need
@@ -696,16 +729,26 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
* we don't rely on for anything - the mm_lock_seq read against which we
* need ordering is below.
*/
- if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq.sequence))
- return false;
+ if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence))
+ return NULL;
- if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0))
- return false;
+ /*
+ * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire()
+ * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET.
+ * Acquire fence is required here to avoid reordering against later
+ * vm_lock_seq check and checks inside lock_vma_under_rcu().
+ */
+ if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
+ VMA_REF_LIMIT))) {
+ /* return EAGAIN if vma got detached from under us */
+ return oldcnt ? NULL : ERR_PTR(-EAGAIN);
+ }
+ rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
/*
- * Overflow might produce false locked result.
+ * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result.
* False unlocked result is impossible because we modify and check
- * vma->vm_lock_seq under vma->vm_lock protection and mm->mm_lock_seq
+ * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq
* modification invalidates all existing locks.
*
* We must use ACQUIRE semantics for the mm_lock_seq so that if we are
@@ -713,18 +756,47 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
* after it has been unlocked.
* This pairs with RELEASE semantics in vma_end_write_all().
*/
- if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&vma->vm_mm->mm_lock_seq))) {
- up_read(&vma->vm_lock->lock);
- return false;
+ if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) {
+ vma_refcount_put(vma);
+ return NULL;
}
+
+ return vma;
+}
+
+/*
+ * Use only while holding mmap read lock which guarantees that locking will not
+ * fail (nobody can concurrently write-lock the vma). vma_start_read() should
+ * not be used in such cases because it might fail due to mm_lock_seq overflow.
+ * This functionality is used to obtain vma read lock and drop the mmap read lock.
+ */
+static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass)
+{
+ int oldcnt;
+
+ mmap_assert_locked(vma->vm_mm);
+ if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
+ VMA_REF_LIMIT)))
+ return false;
+
+ rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
return true;
}
+/*
+ * Use only while holding mmap read lock which guarantees that locking will not
+ * fail (nobody can concurrently write-lock the vma). vma_start_read() should
+ * not be used in such cases because it might fail due to mm_lock_seq overflow.
+ * This functionality is used to obtain vma read lock and drop the mmap read lock.
+ */
+static inline bool vma_start_read_locked(struct vm_area_struct *vma)
+{
+ return vma_start_read_locked_nested(vma, 0);
+}
+
static inline void vma_end_read(struct vm_area_struct *vma)
{
- rcu_read_lock(); /* keeps vma alive till the end of up_read */
- up_read(&vma->vm_lock->lock);
- rcu_read_unlock();
+ vma_refcount_put(vma);
}
/* WARNING! Can only be used if mmap_lock is expected to be write-locked */
@@ -740,6 +812,8 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_l
return (vma->vm_lock_seq == *mm_lock_seq);
}
+void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq);
+
/*
* Begin writing to a VMA.
* Exclude concurrent readers under the per-VMA lock until the currently
@@ -752,15 +826,7 @@ static inline void vma_start_write(struct vm_area_struct *vma)
if (__is_vma_write_locked(vma, &mm_lock_seq))
return;
- down_write(&vma->vm_lock->lock);
- /*
- * We should use WRITE_ONCE() here because we can have concurrent reads
- * from the early lockless pessimistic check in vma_start_read().
- * We don't really care about the correctness of that early check, but
- * we should use WRITE_ONCE() for cleanliness and to keep KCSAN happy.
- */
- WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq);
- up_write(&vma->vm_lock->lock);
+ __vma_start_write(vma, mm_lock_seq);
}
static inline void vma_assert_write_locked(struct vm_area_struct *vma)
@@ -772,18 +838,36 @@ static inline void vma_assert_write_locked(struct vm_area_struct *vma)
static inline void vma_assert_locked(struct vm_area_struct *vma)
{
- if (!rwsem_is_locked(&vma->vm_lock->lock))
- vma_assert_write_locked(vma);
+ unsigned int mm_lock_seq;
+
+ VM_BUG_ON_VMA(refcount_read(&vma->vm_refcnt) <= 1 &&
+ !__is_vma_write_locked(vma, &mm_lock_seq), vma);
}
-static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached)
+/*
+ * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these
+ * assertions should be made either under mmap_write_lock or when the object
+ * has been isolated under mmap_write_lock, ensuring no competing writers.
+ */
+static inline void vma_assert_attached(struct vm_area_struct *vma)
{
- /* When detaching vma should be write-locked */
- if (detached)
- vma_assert_write_locked(vma);
- vma->detached = detached;
+ WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt));
+}
+
+static inline void vma_assert_detached(struct vm_area_struct *vma)
+{
+ WARN_ON_ONCE(refcount_read(&vma->vm_refcnt));
+}
+
+static inline void vma_mark_attached(struct vm_area_struct *vma)
+{
+ vma_assert_write_locked(vma);
+ vma_assert_detached(vma);
+ refcount_set_release(&vma->vm_refcnt, 1);
}
+void vma_mark_detached(struct vm_area_struct *vma);
+
static inline void release_fault_lock(struct vm_fault *vmf)
{
if (vmf->flags & FAULT_FLAG_VMA_LOCK)
@@ -805,14 +889,18 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
#else /* CONFIG_PER_VMA_LOCK */
-static inline bool vma_start_read(struct vm_area_struct *vma)
- { return false; }
+static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {}
+static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
+ struct vm_area_struct *vma)
+ { return NULL; }
static inline void vma_end_read(struct vm_area_struct *vma) {}
static inline void vma_start_write(struct vm_area_struct *vma) {}
static inline void vma_assert_write_locked(struct vm_area_struct *vma)
{ mmap_assert_write_locked(vma->vm_mm); }
-static inline void vma_mark_detached(struct vm_area_struct *vma,
- bool detached) {}
+static inline void vma_assert_attached(struct vm_area_struct *vma) {}
+static inline void vma_assert_detached(struct vm_area_struct *vma) {}
+static inline void vma_mark_attached(struct vm_area_struct *vma) {}
+static inline void vma_mark_detached(struct vm_area_struct *vma) {}
static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
unsigned long address)
@@ -839,18 +927,13 @@ static inline void assert_fault_locked(struct vm_fault *vmf)
extern const struct vm_operations_struct vma_dummy_vm_ops;
-/*
- * WARNING: vma_init does not initialize vma->vm_lock.
- * Use vm_area_alloc()/vm_area_free() if vma needs locking.
- */
static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
{
memset(vma, 0, sizeof(*vma));
vma->vm_mm = mm;
vma->vm_ops = &vma_dummy_vm_ops;
INIT_LIST_HEAD(&vma->anon_vma_chain);
- vma_mark_detached(vma, false);
- vma_numab_state_init(vma);
+ vma_lock_init(vma, false);
}
/* Use when VMA is not part of the VMA tree and needs no locking */
@@ -1043,6 +1126,7 @@ static inline int vma_iter_bulk_store(struct vma_iterator *vmi,
if (unlikely(mas_is_err(&vmi->mas)))
return -ENOMEM;
+ vma_mark_attached(vma);
return 0;
}
@@ -1083,6 +1167,25 @@ int vma_is_stack_for_current(struct vm_area_struct *vma);
struct mmu_gather;
struct inode;
+extern void prep_compound_page(struct page *page, unsigned int order);
+
+static inline unsigned int folio_large_order(const struct folio *folio)
+{
+ return folio->_flags_1 & 0xff;
+}
+
+#ifdef NR_PAGES_IN_LARGE_FOLIO
+static inline long folio_large_nr_pages(const struct folio *folio)
+{
+ return folio->_nr_pages;
+}
+#else
+static inline long folio_large_nr_pages(const struct folio *folio)
+{
+ return 1L << folio_large_order(folio);
+}
+#endif
+
/*
* compound_order() can be called without holding a reference, which means
* that niceties like page_folio() don't work. These callers should be
@@ -1096,7 +1199,7 @@ static inline unsigned int compound_order(struct page *page)
if (!test_bit(PG_head, &folio->flags))
return 0;
- return folio->_flags_1 & 0xff;
+ return folio_large_order(folio);
}
/**
@@ -1112,7 +1215,7 @@ static inline unsigned int folio_order(const struct folio *folio)
{
if (!folio_test_large(folio))
return 0;
- return folio->_flags_1 & 0xff;
+ return folio_large_order(folio);
}
#include <linux/huge_mm.h>
@@ -1205,6 +1308,8 @@ static inline int is_vmalloc_or_module_addr(const void *x)
static inline int folio_entire_mapcount(const struct folio *folio)
{
VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
+ if (!IS_ENABLED(CONFIG_64BIT) && unlikely(folio_large_order(folio) == 1))
+ return 0;
return atomic_read(&folio->_entire_mapcount) + 1;
}
@@ -1404,25 +1509,6 @@ vm_fault_t finish_fault(struct vm_fault *vmf);
* back into memory.
*/
-#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX)
-DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
-
-bool __put_devmap_managed_folio_refs(struct folio *folio, int refs);
-static inline bool put_devmap_managed_folio_refs(struct folio *folio, int refs)
-{
- if (!static_branch_unlikely(&devmap_managed_key))
- return false;
- if (!folio_is_zone_device(folio))
- return false;
- return __put_devmap_managed_folio_refs(folio, refs);
-}
-#else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */
-static inline bool put_devmap_managed_folio_refs(struct folio *folio, int refs)
-{
- return false;
-}
-#endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */
-
/* 127: arbitrary random number, small enough to assemble well */
#define folio_ref_zero_or_close_to_overflow(folio) \
((unsigned int) folio_ref_count(folio) + 127u <= 127u)
@@ -1543,12 +1629,6 @@ static inline void put_page(struct page *page)
if (folio_test_slab(folio))
return;
- /*
- * For some devmap managed pages we need to catch refcount transition
- * from 2 to 1:
- */
- if (put_devmap_managed_folio_refs(folio, 1))
- return;
folio_put(folio);
}
@@ -1907,6 +1987,13 @@ static inline struct folio *pfn_folio(unsigned long pfn)
return page_folio(pfn_to_page(pfn));
}
+static inline bool folio_has_pincount(const struct folio *folio)
+{
+ if (IS_ENABLED(CONFIG_64BIT))
+ return folio_test_large(folio);
+ return folio_order(folio) > 1;
+}
+
/**
* folio_maybe_dma_pinned - Report if a folio may be pinned for DMA.
* @folio: The folio.
@@ -1923,7 +2010,7 @@ static inline struct folio *pfn_folio(unsigned long pfn)
* get that many refcounts, and b) all the callers of this routine are
* expected to be able to deal gracefully with a false positive.
*
- * For large folios, the result will be exactly correct. That's because
+ * For most large folios, the result will be exactly correct. That's because
* we have more tracking data available: the _pincount field is used
* instead of the GUP_PIN_COUNTING_BIAS scheme.
*
@@ -1934,7 +2021,7 @@ static inline struct folio *pfn_folio(unsigned long pfn)
*/
static inline bool folio_maybe_dma_pinned(struct folio *folio)
{
- if (folio_test_large(folio))
+ if (folio_has_pincount(folio))
return atomic_read(&folio->_pincount) > 0;
/*
@@ -2006,6 +2093,13 @@ static inline bool folio_is_longterm_pinnable(struct folio *folio)
if (folio_is_device_coherent(folio))
return false;
+ /*
+ * Filesystems can only tolerate transient delays to truncate and
+ * hole-punch operations
+ */
+ if (folio_is_fsdax(folio))
+ return false;
+
/* Otherwise, non-movable zone folios can be pinned. */
return !folio_is_zone_movable(folio);
@@ -2049,11 +2143,7 @@ static inline long folio_nr_pages(const struct folio *folio)
{
if (!folio_test_large(folio))
return 1;
-#ifdef CONFIG_64BIT
- return folio->_folio_nr_pages;
-#else
- return 1L << (folio->_flags_1 & 0xff);
-#endif
+ return folio_large_nr_pages(folio);
}
/* Only hugetlbfs can allocate folios larger than MAX_ORDER */
@@ -2068,24 +2158,20 @@ static inline long folio_nr_pages(const struct folio *folio)
* page. compound_nr() can be called on a tail page, and is defined to
* return 1 in that case.
*/
-static inline unsigned long compound_nr(struct page *page)
+static inline long compound_nr(struct page *page)
{
struct folio *folio = (struct folio *)page;
if (!test_bit(PG_head, &folio->flags))
return 1;
-#ifdef CONFIG_64BIT
- return folio->_folio_nr_pages;
-#else
- return 1L << (folio->_flags_1 & 0xff);
-#endif
+ return folio_large_nr_pages(folio);
}
/**
* thp_nr_pages - The number of regular pages in this huge page.
* @page: The head page of a huge page.
*/
-static inline int thp_nr_pages(struct page *page)
+static inline long thp_nr_pages(struct page *page)
{
return folio_nr_pages((struct folio *)page);
}
@@ -2140,23 +2226,18 @@ static inline size_t folio_size(const struct folio *folio)
}
/**
- * folio_likely_mapped_shared - Estimate if the folio is mapped into the page
- * tables of more than one MM
+ * folio_maybe_mapped_shared - Whether the folio is mapped into the page
+ * tables of more than one MM
* @folio: The folio.
*
- * This function checks if the folio is currently mapped into more than one
- * MM ("mapped shared"), or if the folio is only mapped into a single MM
- * ("mapped exclusively").
+ * This function checks if the folio maybe currently mapped into more than one
+ * MM ("maybe mapped shared"), or if the folio is certainly mapped into a single
+ * MM ("mapped exclusively").
*
* For KSM folios, this function also returns "mapped shared" when a folio is
* mapped multiple times into the same MM, because the individual page mappings
* are independent.
*
- * As precise information is not easily available for all folios, this function
- * estimates the number of MMs ("sharers") that are currently mapping a folio
- * using the number of times the first page of the folio is currently mapped
- * into page tables.
- *
* For small anonymous folios and anonymous hugetlb folios, the return
* value will be exactly correct: non-KSM folios can only be mapped at most once
* into an MM, and they cannot be partially mapped. KSM folios are
@@ -2164,8 +2245,8 @@ static inline size_t folio_size(const struct folio *folio)
*
* For other folios, the result can be fuzzy:
* #. For partially-mappable large folios (THP), the return value can wrongly
- * indicate "mapped exclusively" (false negative) when the folio is
- * only partially mapped into at least one MM.
+ * indicate "mapped shared" (false positive) if a folio was mapped by
+ * more than two MMs at one point in time.
* #. For pagecache folios (including hugetlb), the return value can wrongly
* indicate "mapped shared" (false positive) when two VMAs in the same MM
* cover the same file range.
@@ -2182,7 +2263,7 @@ static inline size_t folio_size(const struct folio *folio)
*
* Return: Whether the folio is estimated to be mapped into more than one MM.
*/
-static inline bool folio_likely_mapped_shared(struct folio *folio)
+static inline bool folio_maybe_mapped_shared(struct folio *folio)
{
int mapcount = folio_mapcount(folio);
@@ -2190,16 +2271,22 @@ static inline bool folio_likely_mapped_shared(struct folio *folio)
if (!folio_test_large(folio) || unlikely(folio_test_hugetlb(folio)))
return mapcount > 1;
- /* A single mapping implies "mapped exclusively". */
- if (mapcount <= 1)
- return false;
-
- /* If any page is mapped more than once we treat it "mapped shared". */
- if (folio_entire_mapcount(folio) || mapcount > folio_nr_pages(folio))
+ /*
+ * vm_insert_page() without CONFIG_TRANSPARENT_HUGEPAGE ...
+ * simply assume "mapped shared", nobody should really care
+ * about this for arbitrary kernel allocations.
+ */
+ if (!IS_ENABLED(CONFIG_MM_ID))
return true;
- /* Let's guess based on the first subpage. */
- return atomic_read(&folio->_mapcount) > 0;
+ /*
+ * A single mapping implies "mapped exclusively", even if the
+ * folio flag says something different: it's easier to handle this
+ * case here instead of on the RMAP hot path.
+ */
+ if (mapcount <= 1)
+ return false;
+ return folio_test_large_maybe_mapped_shared(folio);
}
#ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE
@@ -2408,11 +2495,13 @@ struct follow_pfnmap_args {
* Outputs:
*
* @pfn: the PFN of the address
+ * @addr_mask: address mask covering pfn
* @pgprot: the pgprot_t of the mapping
* @writable: whether the mapping is writable
* @special: whether the mapping is a special mapping (real PFN maps)
*/
unsigned long pfn;
+ unsigned long addr_mask;
pgprot_t pgprot;
bool writable;
bool special;
@@ -3179,7 +3268,6 @@ extern void reserve_bootmem_region(phys_addr_t start,
/* Free the reserved page into the buddy system, so it gets managed. */
void free_reserved_page(struct page *page);
-#define free_highmem_page(page) free_reserved_page(page)
static inline void mark_page_reserved(struct page *page)
{
@@ -3539,6 +3627,8 @@ int vm_map_pages(struct vm_area_struct *vma, struct page **pages,
unsigned long num);
int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages,
unsigned long num);
+vm_fault_t vmf_insert_page_mkwrite(struct vm_fault *vmf, struct page *page,
+ bool write);
vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn);
vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
@@ -3817,6 +3907,7 @@ static inline void print_vma_addr(char *prefix, unsigned long rip)
#endif
void *sparse_buffer_alloc(unsigned long size);
+unsigned long section_map_size(void);
struct page * __populate_section_memmap(unsigned long pfn,
unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
struct dev_pagemap *pgmap);
@@ -3825,7 +3916,8 @@ p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
- struct vmem_altmap *altmap, struct page *reuse);
+ struct vmem_altmap *altmap, unsigned long ptpfn,
+ unsigned long flags);
void *vmemmap_alloc_block(unsigned long size, int node);
struct vmem_altmap;
void *vmemmap_alloc_block_buf(unsigned long size, int node,
@@ -3841,6 +3933,12 @@ int vmemmap_populate_hugepages(unsigned long start, unsigned long end,
int node, struct vmem_altmap *altmap);
int vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap);
+int vmemmap_populate_hvo(unsigned long start, unsigned long end, int node,
+ unsigned long headsize);
+int vmemmap_undo_hvo(unsigned long start, unsigned long end, int node,
+ unsigned long headsize);
+void vmemmap_wrprotect_hvo(unsigned long start, unsigned long end, int node,
+ unsigned long headsize);
void vmemmap_populate_print_last(void);
#ifdef CONFIG_MEMORY_HOTPLUG
void vmemmap_free(unsigned long start, unsigned long end,
@@ -3907,9 +4005,6 @@ static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap,
}
#endif
-void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
- unsigned long nr_pages);
-
enum mf_flags {
MF_COUNT_INCREASED = 1 << 0,
MF_ACTION_REQUIRED = 1 << 1,
@@ -4110,6 +4205,7 @@ void vma_pgtable_walk_begin(struct vm_area_struct *vma);
void vma_pgtable_walk_end(struct vm_area_struct *vma);
int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size);
+int reserve_mem_release_by_name(const char *name);
#ifdef CONFIG_64BIT
int do_mseal(unsigned long start, size_t len_in, unsigned long flags);
@@ -4142,4 +4238,14 @@ int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *st
int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status);
int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
+
+/*
+ * mseal of userspace process's system mappings.
+ */
+#ifdef CONFIG_MSEAL_SYSTEM_MAPPINGS
+#define VM_SEALED_SYSMAP VM_SEALED
+#else
+#define VM_SEALED_SYSMAP VM_NONE
+#endif
+
#endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 75e8850cec3a..56d07edd01f9 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -19,6 +19,7 @@
#include <linux/workqueue.h>
#include <linux/seqlock.h>
#include <linux/percpu_counter.h>
+#include <linux/types.h>
#include <asm/mmu.h>
@@ -133,8 +134,11 @@ struct page {
unsigned long compound_head; /* Bit zero is set */
};
struct { /* ZONE_DEVICE pages */
- /** @pgmap: Points to the hosting device page map. */
- struct dev_pagemap *pgmap;
+ /*
+ * The first word is used for compound_head or folio
+ * pgmap
+ */
+ void *_unused_pgmap_compound_head;
void *zone_device_data;
/*
* ZONE_DEVICE private pages are counted as being
@@ -287,6 +291,49 @@ typedef struct {
unsigned long val;
} swp_entry_t;
+#if defined(CONFIG_MEMCG) || defined(CONFIG_SLAB_OBJ_EXT)
+/* We have some extra room after the refcount in tail pages. */
+#define NR_PAGES_IN_LARGE_FOLIO
+#endif
+
+/*
+ * On 32bit, we can cut the required metadata in half, because:
+ * (a) PID_MAX_LIMIT implicitly limits the number of MMs we could ever have,
+ * so we can limit MM IDs to 15 bit (32767).
+ * (b) We don't expect folios where even a single complete PTE mapping by
+ * one MM would exceed 15 bits (order-15).
+ */
+#ifdef CONFIG_64BIT
+typedef int mm_id_mapcount_t;
+#define MM_ID_MAPCOUNT_MAX INT_MAX
+typedef unsigned int mm_id_t;
+#else /* !CONFIG_64BIT */
+typedef short mm_id_mapcount_t;
+#define MM_ID_MAPCOUNT_MAX SHRT_MAX
+typedef unsigned short mm_id_t;
+#endif /* CONFIG_64BIT */
+
+/* We implicitly use the dummy ID for init-mm etc. where we never rmap pages. */
+#define MM_ID_DUMMY 0
+#define MM_ID_MIN (MM_ID_DUMMY + 1)
+
+/*
+ * We leave the highest bit of each MM id unused, so we can store a flag
+ * in the highest bit of each folio->_mm_id[].
+ */
+#define MM_ID_BITS ((sizeof(mm_id_t) * BITS_PER_BYTE) - 1)
+#define MM_ID_MASK ((1U << MM_ID_BITS) - 1)
+#define MM_ID_MAX MM_ID_MASK
+
+/*
+ * In order to use bit_spin_lock(), which requires an unsigned long, we
+ * operate on folio->_mm_ids when working on flags.
+ */
+#define FOLIO_MM_IDS_LOCK_BITNUM MM_ID_BITS
+#define FOLIO_MM_IDS_LOCK_BIT BIT(FOLIO_MM_IDS_LOCK_BITNUM)
+#define FOLIO_MM_IDS_SHARED_BITNUM (2 * MM_ID_BITS + 1)
+#define FOLIO_MM_IDS_SHARED_BIT BIT(FOLIO_MM_IDS_SHARED_BITNUM)
+
/**
* struct folio - Represents a contiguous set of bytes.
* @flags: Identical to the page flags.
@@ -296,6 +343,8 @@ typedef struct {
* anonymous memory.
* @index: Offset within the file, in units of pages. For anonymous memory,
* this is the index from the beginning of the mmap.
+ * @share: number of DAX mappings that reference this folio. See
+ * dax_associate_entry.
* @private: Filesystem per-folio data (see folio_attach_private()).
* @swap: Used for swp_entry_t if folio_test_swapcache().
* @_mapcount: Do not access this member directly. Use folio_mapcount() to
@@ -303,13 +352,17 @@ typedef struct {
* @_refcount: Do not access this member directly. Use folio_ref_count()
* to find how many references there are to this folio.
* @memcg_data: Memory Control Group data.
+ * @pgmap: Metadata for ZONE_DEVICE mappings
* @virtual: Virtual address in the kernel direct map.
* @_last_cpupid: IDs of last CPU and last process that accessed the folio.
* @_entire_mapcount: Do not use directly, call folio_entire_mapcount().
* @_large_mapcount: Do not use directly, call folio_mapcount().
* @_nr_pages_mapped: Do not use outside of rmap and debug code.
* @_pincount: Do not use directly, call folio_maybe_dma_pinned().
- * @_folio_nr_pages: Do not use directly, call folio_nr_pages().
+ * @_nr_pages: Do not use directly, call folio_nr_pages().
+ * @_mm_id: Do not use outside of rmap code.
+ * @_mm_ids: Do not use outside of rmap code.
+ * @_mm_id_mapcount: Do not use outside of rmap code.
* @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h.
* @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h.
* @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h.
@@ -341,9 +394,13 @@ struct folio {
/* private: */
};
/* public: */
+ struct dev_pagemap *pgmap;
};
struct address_space *mapping;
- pgoff_t index;
+ union {
+ pgoff_t index;
+ unsigned long share;
+ };
union {
void *private;
swp_entry_t swap;
@@ -369,14 +426,30 @@ struct folio {
struct {
unsigned long _flags_1;
unsigned long _head_1;
+ union {
+ struct {
/* public: */
- atomic_t _large_mapcount;
- atomic_t _entire_mapcount;
- atomic_t _nr_pages_mapped;
- atomic_t _pincount;
+ atomic_t _large_mapcount;
+ atomic_t _nr_pages_mapped;
#ifdef CONFIG_64BIT
- unsigned int _folio_nr_pages;
-#endif
+ atomic_t _entire_mapcount;
+ atomic_t _pincount;
+#endif /* CONFIG_64BIT */
+ mm_id_mapcount_t _mm_id_mapcount[2];
+ union {
+ mm_id_t _mm_id[2];
+ unsigned long _mm_ids;
+ };
+ /* private: the union with struct page is transitional */
+ };
+ unsigned long _usable_1[4];
+ };
+ atomic_t _mapcount_1;
+ atomic_t _refcount_1;
+ /* public: */
+#ifdef NR_PAGES_IN_LARGE_FOLIO
+ unsigned int _nr_pages;
+#endif /* NR_PAGES_IN_LARGE_FOLIO */
/* private: the union with struct page is transitional */
};
struct page __page_1;
@@ -386,20 +459,27 @@ struct folio {
unsigned long _flags_2;
unsigned long _head_2;
/* public: */
- void *_hugetlb_subpool;
- void *_hugetlb_cgroup;
- void *_hugetlb_cgroup_rsvd;
- void *_hugetlb_hwpoison;
+ struct list_head _deferred_list;
+#ifndef CONFIG_64BIT
+ atomic_t _entire_mapcount;
+ atomic_t _pincount;
+#endif /* !CONFIG_64BIT */
/* private: the union with struct page is transitional */
};
+ struct page __page_2;
+ };
+ union {
struct {
- unsigned long _flags_2a;
- unsigned long _head_2a;
+ unsigned long _flags_3;
+ unsigned long _head_3;
/* public: */
- struct list_head _deferred_list;
+ void *_hugetlb_subpool;
+ void *_hugetlb_cgroup;
+ void *_hugetlb_cgroup_rsvd;
+ void *_hugetlb_hwpoison;
/* private: the union with struct page is transitional */
};
- struct page __page_2;
+ struct page __page_3;
};
};
@@ -428,14 +508,20 @@ FOLIO_MATCH(_last_cpupid, _last_cpupid);
offsetof(struct page, pg) + sizeof(struct page))
FOLIO_MATCH(flags, _flags_1);
FOLIO_MATCH(compound_head, _head_1);
+FOLIO_MATCH(_mapcount, _mapcount_1);
+FOLIO_MATCH(_refcount, _refcount_1);
#undef FOLIO_MATCH
#define FOLIO_MATCH(pg, fl) \
static_assert(offsetof(struct folio, fl) == \
offsetof(struct page, pg) + 2 * sizeof(struct page))
FOLIO_MATCH(flags, _flags_2);
FOLIO_MATCH(compound_head, _head_2);
-FOLIO_MATCH(flags, _flags_2a);
-FOLIO_MATCH(compound_head, _head_2a);
+#undef FOLIO_MATCH
+#define FOLIO_MATCH(pg, fl) \
+ static_assert(offsetof(struct folio, fl) == \
+ offsetof(struct page, pg) + 3 * sizeof(struct page))
+FOLIO_MATCH(flags, _flags_3);
+FOLIO_MATCH(compound_head, _head_3);
#undef FOLIO_MATCH
/**
@@ -578,6 +664,12 @@ static inline void *folio_get_private(struct folio *folio)
typedef unsigned long vm_flags_t;
/*
+ * freeptr_t represents a SLUB freelist pointer, which might be encoded
+ * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled.
+ */
+typedef struct { unsigned long v; } freeptr_t;
+
+/*
* A region containing a mapping of a non-memory backed file under NOMMU
* conditions. These are held in a global tree and are pinned by the VMAs that
* map parts of them.
@@ -633,9 +725,8 @@ static inline struct anon_vma_name *anon_vma_name_alloc(const char *name)
}
#endif
-struct vma_lock {
- struct rw_semaphore lock;
-};
+#define VMA_LOCK_OFFSET 0x40000000
+#define VMA_REF_LIMIT (VMA_LOCK_OFFSET - 1)
struct vma_numab_state {
/*
@@ -681,6 +772,9 @@ struct vma_numab_state {
*
* Only explicitly marked struct members may be accessed by RCU readers before
* getting a stable reference.
+ *
+ * WARNING: when adding new members, please update vm_area_init_from() to copy
+ * them during vm_area_struct content duplication.
*/
struct vm_area_struct {
/* The first cache line has the info for VMA tree walking. */
@@ -691,9 +785,7 @@ struct vm_area_struct {
unsigned long vm_start;
unsigned long vm_end;
};
-#ifdef CONFIG_PER_VMA_LOCK
- struct rcu_head vm_rcu; /* Used for deferred freeing. */
-#endif
+ freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */
};
/*
@@ -714,18 +806,12 @@ struct vm_area_struct {
#ifdef CONFIG_PER_VMA_LOCK
/*
- * Flag to indicate areas detached from the mm->mm_mt tree.
- * Unstable RCU readers are allowed to read this.
- */
- bool detached;
-
- /*
* Can only be written (using WRITE_ONCE()) while holding both:
* - mmap_lock (in write mode)
- * - vm_lock->lock (in write mode)
+ * - vm_refcnt bit at VMA_LOCK_OFFSET is set
* Can be read reliably while holding one of:
* - mmap_lock (in read or write mode)
- * - vm_lock->lock (in read or write mode)
+ * - vm_refcnt bit at VMA_LOCK_OFFSET is set or vm_refcnt > 1
* Can be read unreliably (using READ_ONCE()) for pessimistic bailout
* while holding nothing (except RCU to keep the VMA struct allocated).
*
@@ -734,20 +820,7 @@ struct vm_area_struct {
* slowpath.
*/
unsigned int vm_lock_seq;
- /* Unstable RCU readers are allowed to read this. */
- struct vma_lock *vm_lock;
#endif
-
- /*
- * For areas with an address space and backing store,
- * linkage into the address_space->i_mmap interval tree.
- *
- */
- struct {
- struct rb_node rb;
- unsigned long rb_subtree_last;
- } shared;
-
/*
* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
* list, after a COW of one of the file pages. A MAP_SHARED vma
@@ -767,14 +840,6 @@ struct vm_area_struct {
struct file * vm_file; /* File we map to (can be NULL). */
void * vm_private_data; /* was vm_pte (shared mem) */
-#ifdef CONFIG_ANON_VMA_NAME
- /*
- * For private and shared anonymous mappings, a pointer to a null
- * terminated string containing the name given to the vma, or NULL if
- * unnamed. Serialized by mmap_lock. Use anon_vma_name to access.
- */
- struct anon_vma_name *anon_name;
-#endif
#ifdef CONFIG_SWAP
atomic_long_t swap_readahead_info;
#endif
@@ -787,6 +852,30 @@ struct vm_area_struct {
#ifdef CONFIG_NUMA_BALANCING
struct vma_numab_state *numab_state; /* NUMA Balancing state */
#endif
+#ifdef CONFIG_PER_VMA_LOCK
+ /* Unstable RCU readers are allowed to read this. */
+ refcount_t vm_refcnt ____cacheline_aligned_in_smp;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map vmlock_dep_map;
+#endif
+#endif
+ /*
+ * For areas with an address space and backing store,
+ * linkage into the address_space->i_mmap interval tree.
+ *
+ */
+ struct {
+ struct rb_node rb;
+ unsigned long rb_subtree_last;
+ } shared;
+#ifdef CONFIG_ANON_VMA_NAME
+ /*
+ * For private and shared anonymous mappings, a pointer to a null
+ * terminated string containing the name given to the vma, or NULL if
+ * unnamed. Serialized by mmap_lock. Use anon_vma_name to access.
+ */
+ struct anon_vma_name *anon_name;
+#endif
struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
} __randomize_layout;
@@ -922,6 +1011,7 @@ struct mm_struct {
* by mmlist_lock
*/
#ifdef CONFIG_PER_VMA_LOCK
+ struct rcuwait vma_writer_wait;
/*
* This field has lock-like semantics, meaning it is sometimes
* accessed with ACQUIRE/RELEASE semantics.
@@ -1074,6 +1164,9 @@ struct mm_struct {
#endif
} lru_gen;
#endif /* CONFIG_LRU_GEN_WALKS_MMU */
+#ifdef CONFIG_MM_ID
+ mm_id_t mm_id;
+#endif /* CONFIG_MM_ID */
} __randomize_layout;
/*
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 45a21faa3ff6..4706c6769902 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -122,12 +122,6 @@ static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int
#endif /* CONFIG_PER_VMA_LOCK */
-static inline void mmap_init_lock(struct mm_struct *mm)
-{
- init_rwsem(&mm->mmap_lock);
- mm_lock_seqcount_init(mm);
-}
-
static inline void mmap_write_lock(struct mm_struct *mm)
{
__mmap_lock_trace_start_locking(mm, true);
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index e2dd57ca368b..bc2402a45741 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -43,10 +43,10 @@ struct mmu_interval_notifier;
* a device driver to possibly ignore the invalidation if the
* owner field matches the driver's device private pgmap owner.
*
- * @MMU_NOTIFY_EXCLUSIVE: to signal a device driver that the device will no
- * longer have exclusive access to the page. When sent during creation of an
- * exclusive range the owner will be initialised to the value provided by the
- * caller of make_device_exclusive_range(), otherwise the owner will be NULL.
+ * @MMU_NOTIFY_EXCLUSIVE: conversion of a page table entry to device-exclusive.
+ * The owner is initialized to the value provided by the caller of
+ * make_device_exclusive(), such that this caller can filter out these
+ * events.
*/
enum mmu_notifier_event {
MMU_NOTIFY_UNMAP = 0,
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index e16939553930..25e80b2ca7f4 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -138,6 +138,7 @@ enum numa_stat_item {
enum zone_stat_item {
/* First 128 byte cacheline (assuming 64 bit words) */
NR_FREE_PAGES,
+ NR_FREE_PAGES_BLOCKS,
NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */
NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE,
NR_ZONE_ACTIVE_ANON,
@@ -220,9 +221,11 @@ enum node_stat_item {
PGDEMOTE_KSWAPD,
PGDEMOTE_DIRECT,
PGDEMOTE_KHUGEPAGED,
+ PGDEMOTE_PROACTIVE,
#ifdef CONFIG_HUGETLB_PAGE
NR_HUGETLB,
#endif
+ NR_BALLOON_PAGES,
NR_VM_NODE_STAT_ITEMS
};
@@ -1161,6 +1164,12 @@ static inline bool is_zone_device_page(const struct page *page)
return page_zonenum(page) == ZONE_DEVICE;
}
+static inline struct dev_pagemap *page_pgmap(const struct page *page)
+{
+ VM_WARN_ON_ONCE_PAGE(!is_zone_device_page(page), page);
+ return page_folio(page)->pgmap;
+}
+
/*
* Consecutive zone device pages should not be merged into the same sgl
* or bvec segment with other types of pages or if they belong to different
@@ -1176,7 +1185,7 @@ static inline bool zone_device_pages_have_same_pgmap(const struct page *a,
return false;
if (!is_zone_device_page(a))
return true;
- return a->pgmap == b->pgmap;
+ return page_pgmap(a) == page_pgmap(b);
}
extern void memmap_init_zone_device(struct zone *, unsigned long,
@@ -1191,6 +1200,10 @@ static inline bool zone_device_pages_have_same_pgmap(const struct page *a,
{
return true;
}
+static inline struct dev_pagemap *page_pgmap(const struct page *page)
+{
+ return NULL;
+}
#endif
static inline bool folio_is_zone_device(const struct folio *folio)
@@ -1937,6 +1950,9 @@ enum {
#ifdef CONFIG_ZONE_DEVICE
SECTION_TAINT_ZONE_DEVICE_BIT,
#endif
+#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT
+ SECTION_IS_VMEMMAP_PREINIT_BIT,
+#endif
SECTION_MAP_LAST_BIT,
};
@@ -1947,6 +1963,9 @@ enum {
#ifdef CONFIG_ZONE_DEVICE
#define SECTION_TAINT_ZONE_DEVICE BIT(SECTION_TAINT_ZONE_DEVICE_BIT)
#endif
+#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT
+#define SECTION_IS_VMEMMAP_PREINIT BIT(SECTION_IS_VMEMMAP_PREINIT_BIT)
+#endif
#define SECTION_MAP_MASK (~(BIT(SECTION_MAP_LAST_BIT) - 1))
#define SECTION_NID_SHIFT SECTION_MAP_LAST_BIT
@@ -2001,6 +2020,30 @@ static inline int online_device_section(struct mem_section *section)
}
#endif
+#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT
+static inline int preinited_vmemmap_section(struct mem_section *section)
+{
+ return (section &&
+ (section->section_mem_map & SECTION_IS_VMEMMAP_PREINIT));
+}
+
+void sparse_vmemmap_init_nid_early(int nid);
+void sparse_vmemmap_init_nid_late(int nid);
+
+#else
+static inline int preinited_vmemmap_section(struct mem_section *section)
+{
+ return 0;
+}
+static inline void sparse_vmemmap_init_nid_early(int nid)
+{
+}
+
+static inline void sparse_vmemmap_init_nid_late(int nid)
+{
+}
+#endif
+
static inline int online_section_nr(unsigned long nr)
{
return online_section(__nr_to_section(nr));
@@ -2038,6 +2081,9 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
}
#endif
+void sparse_init_early_section(int nid, struct page *map, unsigned long pnum,
+ unsigned long flags);
+
#ifndef CONFIG_HAVE_ARCH_PFN_VALID
/**
* pfn_valid - check if there is a valid memory map entry for a PFN
@@ -2100,6 +2146,11 @@ static inline unsigned long next_present_section_nr(unsigned long section_nr)
return -1;
}
+#define for_each_present_section_nr(start, section_nr) \
+ for (section_nr = next_present_section_nr(start - 1); \
+ section_nr != -1; \
+ section_nr = next_present_section_nr(section_nr))
+
/*
* These are _only_ used during initialisation, therefore they
* can use __initdata ... They could have names to indicate
@@ -2119,6 +2170,8 @@ void sparse_init(void);
#else
#define sparse_init() do {} while (0)
#define sparse_index_init(_sec, _nid) do {} while (0)
+#define sparse_vmemmap_init_nid_early(_nid, _use) do {} while (0)
+#define sparse_vmemmap_init_nid_late(_nid) do {} while (0)
#define pfn_in_present_section pfn_valid
#define subsection_map_init(_pfn, _nr_pages) do {} while (0)
#endif /* CONFIG_SPARSEMEM */
diff --git a/include/linux/module.h b/include/linux/module.h
index d9a5183a9fe7..d94b196d5a34 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -771,6 +771,8 @@ static inline bool is_livepatch_module(struct module *mod)
void set_module_sig_enforced(void);
+void module_for_each_mod(int(*func)(struct module *mod, void *data), void *data);
+
#else /* !CONFIG_MODULES... */
static inline struct module *__module_address(unsigned long addr)
@@ -878,6 +880,10 @@ static inline bool module_is_coming(struct module *mod)
{
return false;
}
+
+static inline void module_for_each_mod(int(*func)(struct module *mod, void *data), void *data)
+{
+}
#endif /* CONFIG_MODULES */
#ifdef CONFIG_SYSFS
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 2bf91b57591b..2143d05116be 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -202,4 +202,6 @@ DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T))
DEFINE_GUARD_COND(mutex, _try, mutex_trylock(_T))
DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T) == 0)
+extern unsigned long mutex_get_owner(struct mutex *lock);
+
#endif /* __LINUX_MUTEX_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fa79145518d1..cf3b6445817b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4192,7 +4192,7 @@ int dev_change_flags(struct net_device *dev, unsigned int flags,
int netif_set_alias(struct net_device *dev, const char *alias, size_t len);
int dev_set_alias(struct net_device *, const char *, size_t);
int dev_get_alias(const struct net_device *, char *, size_t);
-int netif_change_net_namespace(struct net_device *dev, struct net *net,
+int __dev_change_net_namespace(struct net_device *dev, struct net *net,
const char *pat, int new_ifindex,
struct netlink_ext_ack *extack);
int dev_change_net_namespace(struct net_device *dev, struct net *net,
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 9ac83ca88326..d8cad844870a 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -300,6 +300,7 @@ enum nfsstat4 {
/* error codes for internal client use */
#define NFS4ERR_RESET_TO_MDS 12001
#define NFS4ERR_RESET_TO_PNFS 12002
+#define NFS4ERR_FATAL_IOERROR 12003
static inline bool seqid_mutating_err(u32 err)
{
@@ -691,6 +692,7 @@ enum {
NFSPROC4_CLNT_LISTXATTRS,
NFSPROC4_CLNT_REMOVEXATTR,
NFSPROC4_CLNT_READ_PLUS,
+ NFSPROC4_CLNT_OFFLOAD_STATUS,
};
/* nfs41 types */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index f00bfcee7120..71319637a84e 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -50,6 +50,7 @@ struct nfs_client {
#define NFS_CS_DS 7 /* - Server is a DS */
#define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */
#define NFS_CS_PNFS 9 /* - Server used for pnfs */
+#define NFS_CS_NETUNREACH_FATAL 10 /* - ENETUNREACH errors are fatal */
struct sockaddr_storage cl_addr; /* server identifier */
size_t cl_addrlen;
char * cl_hostname; /* hostname of server */
@@ -167,6 +168,8 @@ struct nfs_server {
#define NFS_MOUNT_TRUNK_DISCOVERY 0x04000000
#define NFS_MOUNT_SHUTDOWN 0x08000000
#define NFS_MOUNT_NO_ALIGNWRITE 0x10000000
+#define NFS_MOUNT_FORCE_RDIRPLUS 0x20000000
+#define NFS_MOUNT_NETUNREACH_FATAL 0x40000000
unsigned int fattr_valid; /* Valid attributes */
unsigned int caps; /* server capabilities */
@@ -250,6 +253,10 @@ struct nfs_server {
struct list_head ss_copies;
struct list_head ss_src_copies;
+ unsigned long delegation_flags;
+#define NFS4SERV_DELEGRETURN (1)
+#define NFS4SERV_DELEGATION_EXPIRED (2)
+#define NFS4SERV_DELEGRETURN_DELAYED (3)
unsigned long delegation_gen;
unsigned long mig_gen;
unsigned long mig_status;
@@ -289,6 +296,7 @@ struct nfs_server {
#define NFS_CAP_CASE_INSENSITIVE (1U << 6)
#define NFS_CAP_CASE_PRESERVING (1U << 7)
#define NFS_CAP_REBOOT_LAYOUTRETURN (1U << 8)
+#define NFS_CAP_OFFLOAD_STATUS (1U << 9)
#define NFS_CAP_OPEN_XOR (1U << 12)
#define NFS_CAP_DELEGTIME (1U << 13)
#define NFS_CAP_POSIX_LOCK (1U << 14)
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index d66c61cbbd1d..67f6632f723b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1515,8 +1515,9 @@ struct nfs42_offload_status_args {
struct nfs42_offload_status_res {
struct nfs4_sequence_res osr_seq_res;
- uint64_t osr_count;
- int osr_status;
+ u64 osr_count;
+ int complete_count;
+ u32 osr_complete;
};
struct nfs42_copy_notify_args {
diff --git a/include/linux/node.h b/include/linux/node.h
index 9a881c2208b3..2b7517892230 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -57,6 +57,11 @@ enum cache_write_policy {
NODE_CACHE_WRITE_OTHER,
};
+enum cache_mode {
+ NODE_CACHE_ADDR_MODE_RESERVED,
+ NODE_CACHE_ADDR_MODE_EXTENDED_LINEAR,
+};
+
/**
* struct node_cache_attrs - system memory caching attributes
*
@@ -65,6 +70,7 @@ enum cache_write_policy {
* @size: Total size of cache in bytes
* @line_size: Number of bytes fetched on a cache miss
* @level: The cache hierarchy level
+ * @address_mode: The address mode
*/
struct node_cache_attrs {
enum cache_indexing indexing;
@@ -72,6 +78,7 @@ struct node_cache_attrs {
u64 size;
u16 line_size;
u8 level;
+ u16 address_mode;
};
#ifdef CONFIG_HMEM_REPORTING
diff --git a/include/linux/objtool.h b/include/linux/objtool.h
index 3ca965a2ddc8..366ad004d794 100644
--- a/include/linux/objtool.h
+++ b/include/linux/objtool.h
@@ -69,7 +69,7 @@
* In asm, there are two kinds of code: normal C-type callable functions and
* the rest. The normal callable functions can be called by other code, and
* don't do anything unusual with the stack. Such normal callable functions
- * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this
+ * are annotated with SYM_FUNC_{START,END}. Most asm code falls in this
* category. In this case, no special debugging annotations are needed because
* objtool can automatically generate the ORC data for the ORC unwinder to read
* at runtime.
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index df9234e5f478..e6a21b62dcce 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -226,11 +226,48 @@ static __always_inline const struct page *page_fixed_fake_head(const struct page
}
return page;
}
+
+static __always_inline bool page_count_writable(const struct page *page, int u)
+{
+ if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key))
+ return true;
+
+ /*
+ * The refcount check is ordered before the fake-head check to prevent
+ * the following race:
+ * CPU 1 (HVO) CPU 2 (speculative PFN walker)
+ *
+ * page_ref_freeze()
+ * synchronize_rcu()
+ * rcu_read_lock()
+ * page_is_fake_head() is false
+ * vmemmap_remap_pte()
+ * XXX: struct page[] becomes r/o
+ *
+ * page_ref_unfreeze()
+ * page_ref_count() is not zero
+ *
+ * atomic_add_unless(&page->_refcount)
+ * XXX: try to modify r/o struct page[]
+ *
+ * The refcount check also prevents modification attempts to other (r/o)
+ * tail pages that are not fake heads.
+ */
+ if (atomic_read_acquire(&page->_refcount) == u)
+ return false;
+
+ return page_fixed_fake_head(page) == page;
+}
#else
static inline const struct page *page_fixed_fake_head(const struct page *page)
{
return page;
}
+
+static inline bool page_count_writable(const struct page *page, int u)
+{
+ return true;
+}
#endif
static __always_inline int page_is_fake_head(const struct page *page)
@@ -673,12 +710,6 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted)
#define PAGE_MAPPING_KSM (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE)
#define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE)
-/*
- * Different with flags above, this flag is used only for fsdax mode. It
- * indicates that this page->mapping is now under reflink case.
- */
-#define PAGE_MAPPING_DAX_SHARED ((void *)0x1)
-
static __always_inline bool folio_mapping_flags(const struct folio *folio)
{
return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) != 0;
@@ -1106,6 +1137,12 @@ static inline bool is_page_hwpoison(const struct page *page)
return folio_test_hugetlb(folio) && PageHWPoison(&folio->page);
}
+static inline bool folio_contain_hwpoisoned_page(struct folio *folio)
+{
+ return folio_test_hwpoison(folio) ||
+ (folio_test_large(folio) && folio_test_has_hwpoisoned(folio));
+}
+
bool is_free_buddy_page(const struct page *page);
PAGEFLAG(Isolated, isolated, PF_ANY);
@@ -1193,6 +1230,10 @@ static inline int folio_has_private(const struct folio *folio)
return !!(folio->flags & PAGE_FLAGS_PRIVATE);
}
+static inline bool folio_test_large_maybe_mapped_shared(const struct folio *folio)
+{
+ return test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids);
+}
#undef PF_ANY
#undef PF_HEAD
#undef PF_NO_TAIL
diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index 46406f3fe34d..d649b6bbbc87 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -9,10 +9,12 @@
struct page_counter {
/*
- * Make sure 'usage' does not share cacheline with any other field. The
- * memcg->memory.usage is a hot member of struct mem_cgroup.
+ * Make sure 'usage' does not share cacheline with any other field in
+ * v2. The memcg->memory.usage is a hot member of struct mem_cgroup.
*/
atomic_long_t usage;
+ unsigned long failcnt; /* v1-only field */
+
CACHELINE_PADDING(_pad1_);
/* effective memory.min and memory.min usage tracking */
@@ -28,12 +30,12 @@ struct page_counter {
unsigned long watermark;
/* Latest cg2 reset watermark */
unsigned long local_watermark;
- unsigned long failcnt;
/* Keep all the read most fields in a separete cacheline. */
CACHELINE_PADDING(_pad2_);
bool protection_support;
+ bool track_failcnt;
unsigned long min;
unsigned long low;
unsigned long high;
@@ -58,6 +60,7 @@ static inline void page_counter_init(struct page_counter *counter,
counter->max = PAGE_COUNTER_MAX;
counter->parent = parent;
counter->protection_support = protection_support;
+ counter->track_failcnt = false;
}
static inline unsigned long page_counter_read(struct page_counter *counter)
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index e4b48a0dda24..76c817162d2f 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -3,6 +3,7 @@
#define __LINUX_PAGE_EXT_H
#include <linux/types.h>
+#include <linux/mmzone.h>
#include <linux/stacktrace.h>
struct pglist_data;
@@ -69,16 +70,31 @@ extern void page_ext_init(void);
static inline void page_ext_init_flatmem_late(void)
{
}
+
+static inline bool page_ext_iter_next_fast_possible(unsigned long next_pfn)
+{
+ /*
+ * page_ext is allocated per memory section. Once we cross a
+ * memory section, we have to fetch the new pointer.
+ */
+ return next_pfn % PAGES_PER_SECTION;
+}
#else
extern void page_ext_init_flatmem(void);
extern void page_ext_init_flatmem_late(void);
static inline void page_ext_init(void)
{
}
+
+static inline bool page_ext_iter_next_fast_possible(unsigned long next_pfn)
+{
+ return true;
+}
#endif
extern struct page_ext *page_ext_get(const struct page *page);
extern void page_ext_put(struct page_ext *page_ext);
+extern struct page_ext *page_ext_lookup(unsigned long pfn);
static inline void *page_ext_data(struct page_ext *page_ext,
struct page_ext_operations *ops)
@@ -93,6 +109,83 @@ static inline struct page_ext *page_ext_next(struct page_ext *curr)
return next;
}
+struct page_ext_iter {
+ unsigned long index;
+ unsigned long start_pfn;
+ struct page_ext *page_ext;
+};
+
+/**
+ * page_ext_iter_begin() - Prepare for iterating through page extensions.
+ * @iter: page extension iterator.
+ * @pfn: PFN of the page we're interested in.
+ *
+ * Must be called with RCU read lock taken.
+ *
+ * Return: NULL if no page_ext exists for this page.
+ */
+static inline struct page_ext *page_ext_iter_begin(struct page_ext_iter *iter,
+ unsigned long pfn)
+{
+ iter->index = 0;
+ iter->start_pfn = pfn;
+ iter->page_ext = page_ext_lookup(pfn);
+
+ return iter->page_ext;
+}
+
+/**
+ * page_ext_iter_next() - Get next page extension
+ * @iter: page extension iterator.
+ *
+ * Must be called with RCU read lock taken.
+ *
+ * Return: NULL if no next page_ext exists.
+ */
+static inline struct page_ext *page_ext_iter_next(struct page_ext_iter *iter)
+{
+ unsigned long pfn;
+
+ if (WARN_ON_ONCE(!iter->page_ext))
+ return NULL;
+
+ iter->index++;
+ pfn = iter->start_pfn + iter->index;
+
+ if (page_ext_iter_next_fast_possible(pfn))
+ iter->page_ext = page_ext_next(iter->page_ext);
+ else
+ iter->page_ext = page_ext_lookup(pfn);
+
+ return iter->page_ext;
+}
+
+/**
+ * page_ext_iter_get() - Get current page extension
+ * @iter: page extension iterator.
+ *
+ * Return: NULL if no page_ext exists for this iterator.
+ */
+static inline struct page_ext *page_ext_iter_get(const struct page_ext_iter *iter)
+{
+ return iter->page_ext;
+}
+
+/**
+ * for_each_page_ext(): iterate through page_ext objects.
+ * @__page: the page we're interested in
+ * @__pgcount: how many pages to iterate through
+ * @__page_ext: struct page_ext pointer where the current page_ext
+ * object is returned
+ * @__iter: struct page_ext_iter object (defined in the stack)
+ *
+ * IMPORTANT: must be called with RCU read lock taken.
+ */
+#define for_each_page_ext(__page, __pgcount, __page_ext, __iter) \
+ for (__page_ext = page_ext_iter_begin(&__iter, page_to_pfn(__page));\
+ __page_ext && __iter.index < __pgcount; \
+ __page_ext = page_ext_iter_next(&__iter))
+
#else /* !CONFIG_PAGE_EXTENSION */
struct page_ext;
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 8c236c651d1d..544150d1d5fd 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -234,7 +234,7 @@ static inline bool page_ref_add_unless(struct page *page, int nr, int u)
rcu_read_lock();
/* avoid writing to the vmemmap area being remapped */
- if (!page_is_fake_head(page) && page_ref_count(page) != u)
+ if (page_count_writable(page, u))
ret = atomic_add_unless(&page->_refcount, nr, u);
rcu_read_unlock();
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 7661be85136c..26baa78f1ca7 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -536,26 +536,6 @@ struct address_space *folio_mapping(struct folio *);
struct address_space *swapcache_mapping(struct folio *);
/**
- * folio_file_mapping - Find the mapping this folio belongs to.
- * @folio: The folio.
- *
- * For folios which are in the page cache, return the mapping that this
- * page belongs to. Folios in the swap cache return the mapping of the
- * swap file or swap device where the data is stored. This is different
- * from the mapping returned by folio_mapping(). The only reason to
- * use it is if, like NFS, you return 0 from ->activate_swapfile.
- *
- * Do not call this for folios which aren't in the page cache or swap cache.
- */
-static inline struct address_space *folio_file_mapping(struct folio *folio)
-{
- if (unlikely(folio_test_swapcache(folio)))
- return swapcache_mapping(folio);
-
- return folio->mapping;
-}
-
-/**
* folio_flush_mapping - Find the file mapping this folio belongs to.
* @folio: The folio.
*
@@ -575,11 +555,6 @@ static inline struct address_space *folio_flush_mapping(struct folio *folio)
return folio_mapping(folio);
}
-static inline struct address_space *page_file_mapping(struct page *page)
-{
- return folio_file_mapping(page_folio(page));
-}
-
/**
* folio_inode - Get the host inode for this folio.
* @folio: The folio.
diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h
index ac8c44dd8237..c5e9cac0575e 100644
--- a/include/linux/part_stat.h
+++ b/include/linux/part_stat.h
@@ -33,7 +33,7 @@ struct disk_stats {
#define part_stat_read(part, field) \
({ \
- typeof((part)->bd_stats->field) res = 0; \
+ TYPEOF_UNQUAL((part)->bd_stats->field) res = 0; \
unsigned int _cpu; \
for_each_possible_cpu(_cpu) \
res += per_cpu_ptr((part)->bd_stats, _cpu)->field; \
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index 0e8b74e63767..75c6c86cf09d 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -42,6 +42,7 @@ int pci_enable_pasid(struct pci_dev *pdev, int features);
void pci_disable_pasid(struct pci_dev *pdev);
int pci_pasid_features(struct pci_dev *pdev);
int pci_max_pasids(struct pci_dev *pdev);
+int pci_pasid_status(struct pci_dev *pdev);
#else /* CONFIG_PCI_PASID */
static inline int pci_enable_pasid(struct pci_dev *pdev, int features)
{ return -EINVAL; }
@@ -50,6 +51,8 @@ static inline int pci_pasid_features(struct pci_dev *pdev)
{ return -EINVAL; }
static inline int pci_max_pasids(struct pci_dev *pdev)
{ return -EINVAL; }
+static inline int pci_pasid_status(struct pci_dev *pdev)
+{ return -EINVAL; }
#endif /* CONFIG_PCI_PASID */
#endif /* LINUX_PCI_ATS_H */
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 0fcacb909778..0aeb0e276a3e 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -222,7 +222,7 @@ do { \
} while (0)
#define PERCPU_PTR(__p) \
- (typeof(*(__p)) __force __kernel *)((__force unsigned long)(__p))
+ (TYPEOF_UNQUAL(*(__p)) __force __kernel *)((__force unsigned long)(__p))
#ifdef CONFIG_SMP
@@ -318,7 +318,7 @@ static __always_inline void __this_cpu_preempt_check(const char *op) { }
#define __pcpu_size_call_return(stem, variable) \
({ \
- typeof(variable) pscr_ret__; \
+ TYPEOF_UNQUAL(variable) pscr_ret__; \
__verify_pcpu_ptr(&(variable)); \
switch(sizeof(variable)) { \
case 1: pscr_ret__ = stem##1(variable); break; \
@@ -333,7 +333,7 @@ static __always_inline void __this_cpu_preempt_check(const char *op) { }
#define __pcpu_size_call_return2(stem, variable, ...) \
({ \
- typeof(variable) pscr2_ret__; \
+ TYPEOF_UNQUAL(variable) pscr2_ret__; \
__verify_pcpu_ptr(&(variable)); \
switch(sizeof(variable)) { \
case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break; \
diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
index 3469c4b20105..c74077977830 100644
--- a/include/linux/pgalloc_tag.h
+++ b/include/linux/pgalloc_tag.h
@@ -162,74 +162,32 @@ static inline void update_page_tag_ref(union pgtag_ref_handle handle, union code
}
}
-static inline void clear_page_tag_ref(struct page *page)
-{
- if (mem_alloc_profiling_enabled()) {
- union pgtag_ref_handle handle;
- union codetag_ref ref;
-
- if (get_page_tag_ref(page, &ref, &handle)) {
- set_codetag_empty(&ref);
- update_page_tag_ref(handle, &ref);
- put_page_tag_ref(handle);
- }
- }
-}
-
-static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
- unsigned int nr)
-{
- if (mem_alloc_profiling_enabled()) {
- union pgtag_ref_handle handle;
- union codetag_ref ref;
-
- if (get_page_tag_ref(page, &ref, &handle)) {
- alloc_tag_add(&ref, task->alloc_tag, PAGE_SIZE * nr);
- update_page_tag_ref(handle, &ref);
- put_page_tag_ref(handle);
- }
- }
-}
+/* Should be called only if mem_alloc_profiling_enabled() */
+void __clear_page_tag_ref(struct page *page);
-static inline void pgalloc_tag_sub(struct page *page, unsigned int nr)
+static inline void clear_page_tag_ref(struct page *page)
{
- if (mem_alloc_profiling_enabled()) {
- union pgtag_ref_handle handle;
- union codetag_ref ref;
-
- if (get_page_tag_ref(page, &ref, &handle)) {
- alloc_tag_sub(&ref, PAGE_SIZE * nr);
- update_page_tag_ref(handle, &ref);
- put_page_tag_ref(handle);
- }
- }
+ if (mem_alloc_profiling_enabled())
+ __clear_page_tag_ref(page);
}
-static inline struct alloc_tag *pgalloc_tag_get(struct page *page)
+/* Should be called only if mem_alloc_profiling_enabled() */
+static inline struct alloc_tag *__pgalloc_tag_get(struct page *page)
{
struct alloc_tag *tag = NULL;
-
- if (mem_alloc_profiling_enabled()) {
- union pgtag_ref_handle handle;
- union codetag_ref ref;
-
- if (get_page_tag_ref(page, &ref, &handle)) {
- alloc_tag_sub_check(&ref);
- if (ref.ct)
- tag = ct_to_alloc_tag(ref.ct);
- put_page_tag_ref(handle);
- }
+ union pgtag_ref_handle handle;
+ union codetag_ref ref;
+
+ if (get_page_tag_ref(page, &ref, &handle)) {
+ alloc_tag_sub_check(&ref);
+ if (ref.ct)
+ tag = ct_to_alloc_tag(ref.ct);
+ put_page_tag_ref(handle);
}
return tag;
}
-static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
-{
- if (mem_alloc_profiling_enabled() && tag)
- this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr);
-}
-
void pgalloc_tag_split(struct folio *folio, int old_order, int new_order);
void pgalloc_tag_swap(struct folio *new, struct folio *old);
@@ -238,11 +196,6 @@ void __init alloc_tag_sec_init(void);
#else /* CONFIG_MEM_ALLOC_PROFILING */
static inline void clear_page_tag_ref(struct page *page) {}
-static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
- unsigned int nr) {}
-static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
-static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; }
-static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
static inline void alloc_tag_sec_init(void) {}
static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) {}
static inline void pgalloc_tag_swap(struct folio *new, struct folio *old) {}
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 4c107e17c547..e2b705c14945 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -222,10 +222,14 @@ static inline int pmd_dirty(pmd_t pmd)
* hazard could result in the direct mode hypervisor case, since the actual
* write to the page tables may not yet have taken place, so reads though
* a raw PTE pointer after it has been modified are not guaranteed to be
- * up to date. This mode can only be entered and left under the protection of
- * the page table locks for all page tables which may be modified. In the UP
- * case, this is required so that preemption is disabled, and in the SMP case,
- * it must synchronize the delayed page table writes properly on other CPUs.
+ * up to date.
+ *
+ * In the general case, no lock is guaranteed to be held between entry and exit
+ * of the lazy mode. So the implementation must assume preemption may be enabled
+ * and cpu migration is possible; it must take steps to be robust against this.
+ * (In practice, for user PTE updates, the appropriate page table lock(s) are
+ * held, but for kernel PTE updates, no lock is held). Nesting is not permitted
+ * and the mode cannot be used in interrupt context.
*/
#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
#define arch_enter_lazy_mmu_mode() do {} while (0)
@@ -287,7 +291,6 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
{
page_table_check_ptes_set(mm, ptep, pte, nr);
- arch_enter_lazy_mmu_mode();
for (;;) {
set_pte(ptep, pte);
if (--nr == 0)
@@ -295,7 +298,6 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
ptep++;
pte = pte_next_pfn(pte);
}
- arch_leave_lazy_mmu_mode();
}
#endif
#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h
index 03cd5bae92d3..e63e6e70e860 100644
--- a/include/linux/phy/phy.h
+++ b/include/linux/phy/phy.h
@@ -227,8 +227,6 @@ int phy_pm_runtime_get(struct phy *phy);
int phy_pm_runtime_get_sync(struct phy *phy);
int phy_pm_runtime_put(struct phy *phy);
int phy_pm_runtime_put_sync(struct phy *phy);
-void phy_pm_runtime_allow(struct phy *phy);
-void phy_pm_runtime_forbid(struct phy *phy);
int phy_init(struct phy *phy);
int phy_exit(struct phy *phy);
int phy_power_on(struct phy *phy);
@@ -321,16 +319,6 @@ static inline int phy_pm_runtime_put_sync(struct phy *phy)
return -ENOSYS;
}
-static inline void phy_pm_runtime_allow(struct phy *phy)
-{
- return;
-}
-
-static inline void phy_pm_runtime_forbid(struct phy *phy)
-{
- return;
-}
-
static inline int phy_init(struct phy *phy)
{
if (!phy)
diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h
index ecf290a0c98f..1f4e4f2b89bb 100644
--- a/include/linux/platform_data/cros_ec_commands.h
+++ b/include/linux/platform_data/cros_ec_commands.h
@@ -5046,6 +5046,7 @@ struct ec_response_pd_status {
#define PD_EVENT_DATA_SWAP BIT(3)
#define PD_EVENT_TYPEC BIT(4)
#define PD_EVENT_PPM BIT(5)
+#define PD_EVENT_INIT BIT(6)
struct ec_response_host_event_status {
uint32_t status; /* PD MCU host event status */
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index e2d47eb1a7f3..62d497763e25 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -27,11 +27,16 @@ struct posix_acl_entry {
};
struct posix_acl {
- refcount_t a_refcount;
- unsigned int a_count;
- struct rcu_head a_rcu;
+ /* New members MUST be added within the struct_group() macro below. */
+ struct_group_tagged(posix_acl_hdr, hdr,
+ refcount_t a_refcount;
+ unsigned int a_count;
+ struct rcu_head a_rcu;
+ );
struct posix_acl_entry a_entries[] __counted_by(a_count);
};
+static_assert(offsetof(struct posix_acl, a_entries) == sizeof(struct posix_acl_hdr),
+ "struct member likely outside of struct_group_tagged()");
#define FOREACH_ACL_ENTRY(pa, acl, pe) \
for(pa=(acl)->a_entries, pe=pa+(acl)->a_count; pa<pe; pa++)
diff --git a/include/linux/pps_gen_kernel.h b/include/linux/pps_gen_kernel.h
index 022ea0ac4440..6214c8aa2e02 100644
--- a/include/linux/pps_gen_kernel.h
+++ b/include/linux/pps_gen_kernel.h
@@ -43,7 +43,7 @@ struct pps_gen_source_info {
/* The main struct */
struct pps_gen_device {
- struct pps_gen_source_info info; /* PSS generator info */
+ const struct pps_gen_source_info *info; /* PSS generator info */
bool enabled; /* PSS generator status */
unsigned int event;
@@ -70,7 +70,7 @@ extern const struct attribute_group *pps_gen_groups[];
*/
extern struct pps_gen_device *pps_gen_register_source(
- struct pps_gen_source_info *info);
+ const struct pps_gen_source_info *info);
extern void pps_gen_unregister_source(struct pps_gen_device *pps_gen);
extern void pps_gen_event(struct pps_gen_device *pps_gen,
unsigned int event, void *data);
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index f8159f8a7d73..120536f4c6eb 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -132,7 +132,7 @@ static inline void rcu_sysrq_end(void) { }
#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK))
void rcu_irq_work_resched(void);
#else
-static inline void rcu_irq_work_resched(void) { }
+static __always_inline void rcu_irq_work_resched(void) { }
#endif
#ifdef CONFIG_RCU_NOCB_CPU
diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h
index 27343424225c..9ad134a04b41 100644
--- a/include/linux/rcuwait.h
+++ b/include/linux/rcuwait.h
@@ -4,18 +4,7 @@
#include <linux/rcupdate.h>
#include <linux/sched/signal.h>
-
-/*
- * rcuwait provides a way of blocking and waking up a single
- * task in an rcu-safe manner.
- *
- * The only time @task is non-nil is when a user is blocked (or
- * checking if it needs to) on a condition, and reset as soon as we
- * know that the condition has succeeded and are awoken.
- */
-struct rcuwait {
- struct task_struct __rcu *task;
-};
+#include <linux/types.h>
#define __RCUWAIT_INITIALIZER(name) \
{ .task = NULL, }
diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index abcdde4df697..aa08c3bbbf59 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -177,16 +177,38 @@ void ctrl_alt_del(void);
extern void orderly_poweroff(bool force);
extern void orderly_reboot(void);
-void __hw_protection_shutdown(const char *reason, int ms_until_forced, bool shutdown);
-static inline void hw_protection_reboot(const char *reason, int ms_until_forced)
-{
- __hw_protection_shutdown(reason, ms_until_forced, false);
-}
+/**
+ * enum hw_protection_action - Hardware protection action
+ *
+ * @HWPROT_ACT_DEFAULT:
+ * The default action should be taken. This is HWPROT_ACT_SHUTDOWN
+ * by default, but can be overridden.
+ * @HWPROT_ACT_SHUTDOWN:
+ * The system should be shut down (powered off) for HW protection.
+ * @HWPROT_ACT_REBOOT:
+ * The system should be rebooted for HW protection.
+ */
+enum hw_protection_action { HWPROT_ACT_DEFAULT, HWPROT_ACT_SHUTDOWN, HWPROT_ACT_REBOOT };
-static inline void hw_protection_shutdown(const char *reason, int ms_until_forced)
+void __hw_protection_trigger(const char *reason, int ms_until_forced,
+ enum hw_protection_action action);
+
+/**
+ * hw_protection_trigger - Trigger default emergency system hardware protection action
+ *
+ * @reason: Reason of emergency shutdown or reboot to be printed.
+ * @ms_until_forced: Time to wait for orderly shutdown or reboot before
+ * triggering it. Negative value disables the forced
+ * shutdown or reboot.
+ *
+ * Initiate an emergency system shutdown or reboot in order to protect
+ * hardware from further damage. The exact action taken is controllable at
+ * runtime and defaults to shutdown.
+ */
+static inline void hw_protection_trigger(const char *reason, int ms_until_forced)
{
- __hw_protection_shutdown(reason, ms_until_forced, true);
+ __hw_protection_trigger(reason, ms_until_forced, HWPROT_ACT_DEFAULT);
}
/*
diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index 35f039ecb272..80dc023ac2bf 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -87,6 +87,15 @@
* The decrements dec_and_test() and sub_and_test() also provide acquire
* ordering on success.
*
+ * refcount_{add|inc}_not_zero_acquire() and refcount_set_release() provide
+ * acquire and release ordering for cases when the memory occupied by the
+ * object might be reused to store another object. This is important for the
+ * cases where secondary validation is required to detect such reuse, e.g.
+ * SLAB_TYPESAFE_BY_RCU. The secondary validation checks have to happen after
+ * the refcount is taken, hence acquire order is necessary. Similarly, when the
+ * object is initialized, all stores to its attributes should be visible before
+ * the refcount is set, otherwise a stale attribute value might be used by
+ * another task which succeeds in taking a refcount to the new object.
*/
#ifndef _LINUX_REFCOUNT_H
@@ -126,6 +135,31 @@ static inline void refcount_set(refcount_t *r, int n)
}
/**
+ * refcount_set_release - set a refcount's value with release ordering
+ * @r: the refcount
+ * @n: value to which the refcount will be set
+ *
+ * This function should be used when memory occupied by the object might be
+ * reused to store another object -- consider SLAB_TYPESAFE_BY_RCU.
+ *
+ * Provides release memory ordering which will order previous memory operations
+ * against this store. This ensures all updates to this object are visible
+ * once the refcount is set and stale values from the object previously
+ * occupying this memory are overwritten with new ones.
+ *
+ * This function should be called only after new object is fully initialized.
+ * After this call the object should be considered visible to other tasks even
+ * if it was not yet added into an object collection normally used to discover
+ * it. This is because other tasks might have discovered the object previously
+ * occupying the same memory and after memory reuse they can succeed in taking
+ * refcount to the new object and start using it.
+ */
+static inline void refcount_set_release(refcount_t *r, int n)
+{
+ atomic_set_release(&r->refs, n);
+}
+
+/**
* refcount_read - get a refcount's value
* @r: the refcount
*
@@ -178,6 +212,71 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r)
return __refcount_add_not_zero(i, r, NULL);
}
+static inline __must_check __signed_wrap
+bool __refcount_add_not_zero_limited_acquire(int i, refcount_t *r, int *oldp,
+ int limit)
+{
+ int old = refcount_read(r);
+
+ do {
+ if (!old)
+ break;
+
+ if (i > limit - old) {
+ if (oldp)
+ *oldp = old;
+ return false;
+ }
+ } while (!atomic_try_cmpxchg_acquire(&r->refs, &old, old + i));
+
+ if (oldp)
+ *oldp = old;
+
+ if (unlikely(old < 0 || old + i < 0))
+ refcount_warn_saturate(r, REFCOUNT_ADD_NOT_ZERO_OVF);
+
+ return old;
+}
+
+static inline __must_check bool
+__refcount_inc_not_zero_limited_acquire(refcount_t *r, int *oldp, int limit)
+{
+ return __refcount_add_not_zero_limited_acquire(1, r, oldp, limit);
+}
+
+static inline __must_check __signed_wrap
+bool __refcount_add_not_zero_acquire(int i, refcount_t *r, int *oldp)
+{
+ return __refcount_add_not_zero_limited_acquire(i, r, oldp, INT_MAX);
+}
+
+/**
+ * refcount_add_not_zero_acquire - add a value to a refcount with acquire ordering unless it is 0
+ *
+ * @i: the value to add to the refcount
+ * @r: the refcount
+ *
+ * Will saturate at REFCOUNT_SATURATED and WARN.
+ *
+ * This function should be used when memory occupied by the object might be
+ * reused to store another object -- consider SLAB_TYPESAFE_BY_RCU.
+ *
+ * Provides acquire memory ordering on success, it is assumed the caller has
+ * guaranteed the object memory to be stable (RCU, etc.). It does provide a
+ * control dependency and thereby orders future stores. See the comment on top.
+ *
+ * Use of this function is not recommended for the normal reference counting
+ * use case in which references are taken and released one at a time. In these
+ * cases, refcount_inc_not_zero_acquire() should instead be used to increment a
+ * reference count.
+ *
+ * Return: false if the passed refcount is 0, true otherwise
+ */
+static inline __must_check bool refcount_add_not_zero_acquire(int i, refcount_t *r)
+{
+ return __refcount_add_not_zero_acquire(i, r, NULL);
+}
+
static inline __signed_wrap
void __refcount_add(int i, refcount_t *r, int *oldp)
{
@@ -236,6 +335,32 @@ static inline __must_check bool refcount_inc_not_zero(refcount_t *r)
return __refcount_inc_not_zero(r, NULL);
}
+static inline __must_check bool __refcount_inc_not_zero_acquire(refcount_t *r, int *oldp)
+{
+ return __refcount_add_not_zero_acquire(1, r, oldp);
+}
+
+/**
+ * refcount_inc_not_zero_acquire - increment a refcount with acquire ordering unless it is 0
+ * @r: the refcount to increment
+ *
+ * Similar to refcount_inc_not_zero(), but provides acquire memory ordering on
+ * success.
+ *
+ * This function should be used when memory occupied by the object might be
+ * reused to store another object -- consider SLAB_TYPESAFE_BY_RCU.
+ *
+ * Provides acquire memory ordering on success, it is assumed the caller has
+ * guaranteed the object memory to be stable (RCU, etc.). It does provide a
+ * control dependency and thereby orders future stores. See the comment on top.
+ *
+ * Return: true if the increment was successful, false otherwise
+ */
+static inline __must_check bool refcount_inc_not_zero_acquire(refcount_t *r)
+{
+ return __refcount_inc_not_zero_acquire(r, NULL);
+}
+
static inline void __refcount_inc(refcount_t *r, int *oldp)
{
__refcount_add(1, r, oldp);
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 8463a128e2f4..6c85b28ea30b 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -1259,7 +1259,7 @@ static inline int rhashtable_replace_fast(
static inline void rhltable_walk_enter(struct rhltable *hlt,
struct rhashtable_iter *iter)
{
- return rhashtable_walk_enter(&hlt->ht, iter);
+ rhashtable_walk_enter(&hlt->ht, iter);
}
/**
@@ -1275,12 +1275,12 @@ static inline void rhltable_free_and_destroy(struct rhltable *hlt,
void *arg),
void *arg)
{
- return rhashtable_free_and_destroy(&hlt->ht, free_fn, arg);
+ rhashtable_free_and_destroy(&hlt->ht, free_fn, arg);
}
static inline void rhltable_destroy(struct rhltable *hlt)
{
- return rhltable_free_and_destroy(hlt, NULL, NULL);
+ rhltable_free_and_destroy(hlt, NULL, NULL);
}
#endif /* _LINUX_RHASHTABLE_H */
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 17fbb7855295..56e27263acf8 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -92,10 +92,10 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
struct trace_buffer *__ring_buffer_alloc_range(unsigned long size, unsigned flags,
int order, unsigned long start,
unsigned long range_size,
+ unsigned long scratch_size,
struct lock_class_key *key);
-bool ring_buffer_last_boot_delta(struct trace_buffer *buffer, long *text,
- long *data);
+void *ring_buffer_meta_scratch(struct trace_buffer *buffer, unsigned int *size);
/*
* Because the ring buffer is generic, if other users of the ring buffer get
@@ -113,11 +113,11 @@ bool ring_buffer_last_boot_delta(struct trace_buffer *buffer, long *text,
* traced by ftrace, it can produce lockdep warnings. We need to keep each
* ring buffer's lock class separate.
*/
-#define ring_buffer_alloc_range(size, flags, order, start, range_size) \
+#define ring_buffer_alloc_range(size, flags, order, start, range_size, s_size) \
({ \
static struct lock_class_key __key; \
__ring_buffer_alloc_range((size), (flags), (order), (start), \
- (range_size), &__key); \
+ (range_size), (s_size), &__key); \
})
typedef bool (*ring_buffer_cond_fn)(void *data);
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 683a04088f3f..6b82b618846e 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -13,6 +13,7 @@
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/memremap.h>
+#include <linux/bit_spinlock.h>
/*
* The anon_vma heads a list of private "related" vmas, to scan if
@@ -173,6 +174,214 @@ static inline void anon_vma_merge(struct vm_area_struct *vma,
struct anon_vma *folio_get_anon_vma(const struct folio *folio);
+#ifdef CONFIG_MM_ID
+static __always_inline void folio_lock_large_mapcount(struct folio *folio)
+{
+ bit_spin_lock(FOLIO_MM_IDS_LOCK_BITNUM, &folio->_mm_ids);
+}
+
+static __always_inline void folio_unlock_large_mapcount(struct folio *folio)
+{
+ __bit_spin_unlock(FOLIO_MM_IDS_LOCK_BITNUM, &folio->_mm_ids);
+}
+
+static inline unsigned int folio_mm_id(const struct folio *folio, int idx)
+{
+ VM_WARN_ON_ONCE(idx != 0 && idx != 1);
+ return folio->_mm_id[idx] & MM_ID_MASK;
+}
+
+static inline void folio_set_mm_id(struct folio *folio, int idx, mm_id_t id)
+{
+ VM_WARN_ON_ONCE(idx != 0 && idx != 1);
+ folio->_mm_id[idx] &= ~MM_ID_MASK;
+ folio->_mm_id[idx] |= id;
+}
+
+static inline void __folio_large_mapcount_sanity_checks(const struct folio *folio,
+ int diff, mm_id_t mm_id)
+{
+ VM_WARN_ON_ONCE(!folio_test_large(folio) || folio_test_hugetlb(folio));
+ VM_WARN_ON_ONCE(diff <= 0);
+ VM_WARN_ON_ONCE(mm_id < MM_ID_MIN || mm_id > MM_ID_MAX);
+
+ /*
+ * Make sure we can detect at least one complete PTE mapping of the
+ * folio in a single MM as "exclusively mapped". This is primarily
+ * a check on 32bit, where we currently reduce the size of the per-MM
+ * mapcount to a short.
+ */
+ VM_WARN_ON_ONCE(diff > folio_large_nr_pages(folio));
+ VM_WARN_ON_ONCE(folio_large_nr_pages(folio) - 1 > MM_ID_MAPCOUNT_MAX);
+
+ VM_WARN_ON_ONCE(folio_mm_id(folio, 0) == MM_ID_DUMMY &&
+ folio->_mm_id_mapcount[0] != -1);
+ VM_WARN_ON_ONCE(folio_mm_id(folio, 0) != MM_ID_DUMMY &&
+ folio->_mm_id_mapcount[0] < 0);
+ VM_WARN_ON_ONCE(folio_mm_id(folio, 1) == MM_ID_DUMMY &&
+ folio->_mm_id_mapcount[1] != -1);
+ VM_WARN_ON_ONCE(folio_mm_id(folio, 1) != MM_ID_DUMMY &&
+ folio->_mm_id_mapcount[1] < 0);
+ VM_WARN_ON_ONCE(!folio_mapped(folio) &&
+ folio_test_large_maybe_mapped_shared(folio));
+}
+
+static __always_inline void folio_set_large_mapcount(struct folio *folio,
+ int mapcount, struct vm_area_struct *vma)
+{
+ __folio_large_mapcount_sanity_checks(folio, mapcount, vma->vm_mm->mm_id);
+
+ VM_WARN_ON_ONCE(folio_mm_id(folio, 0) != MM_ID_DUMMY);
+ VM_WARN_ON_ONCE(folio_mm_id(folio, 1) != MM_ID_DUMMY);
+
+ /* Note: mapcounts start at -1. */
+ atomic_set(&folio->_large_mapcount, mapcount - 1);
+ folio->_mm_id_mapcount[0] = mapcount - 1;
+ folio_set_mm_id(folio, 0, vma->vm_mm->mm_id);
+}
+
+static __always_inline int folio_add_return_large_mapcount(struct folio *folio,
+ int diff, struct vm_area_struct *vma)
+{
+ const mm_id_t mm_id = vma->vm_mm->mm_id;
+ int new_mapcount_val;
+
+ folio_lock_large_mapcount(folio);
+ __folio_large_mapcount_sanity_checks(folio, diff, mm_id);
+
+ new_mapcount_val = atomic_read(&folio->_large_mapcount) + diff;
+ atomic_set(&folio->_large_mapcount, new_mapcount_val);
+
+ /*
+ * If a folio is mapped more than once into an MM on 32bit, we
+ * can in theory overflow the per-MM mapcount (although only for
+ * fairly large folios), turning it negative. In that case, just
+ * free up the slot and mark the folio "mapped shared", otherwise
+ * we might be in trouble when unmapping pages later.
+ */
+ if (folio_mm_id(folio, 0) == mm_id) {
+ folio->_mm_id_mapcount[0] += diff;
+ if (!IS_ENABLED(CONFIG_64BIT) && unlikely(folio->_mm_id_mapcount[0] < 0)) {
+ folio->_mm_id_mapcount[0] = -1;
+ folio_set_mm_id(folio, 0, MM_ID_DUMMY);
+ folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT;
+ }
+ } else if (folio_mm_id(folio, 1) == mm_id) {
+ folio->_mm_id_mapcount[1] += diff;
+ if (!IS_ENABLED(CONFIG_64BIT) && unlikely(folio->_mm_id_mapcount[1] < 0)) {
+ folio->_mm_id_mapcount[1] = -1;
+ folio_set_mm_id(folio, 1, MM_ID_DUMMY);
+ folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT;
+ }
+ } else if (folio_mm_id(folio, 0) == MM_ID_DUMMY) {
+ folio_set_mm_id(folio, 0, mm_id);
+ folio->_mm_id_mapcount[0] = diff - 1;
+ /* We might have other mappings already. */
+ if (new_mapcount_val != diff - 1)
+ folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT;
+ } else if (folio_mm_id(folio, 1) == MM_ID_DUMMY) {
+ folio_set_mm_id(folio, 1, mm_id);
+ folio->_mm_id_mapcount[1] = diff - 1;
+ /* Slot 0 certainly has mappings as well. */
+ folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT;
+ }
+ folio_unlock_large_mapcount(folio);
+ return new_mapcount_val + 1;
+}
+#define folio_add_large_mapcount folio_add_return_large_mapcount
+
+static __always_inline int folio_sub_return_large_mapcount(struct folio *folio,
+ int diff, struct vm_area_struct *vma)
+{
+ const mm_id_t mm_id = vma->vm_mm->mm_id;
+ int new_mapcount_val;
+
+ folio_lock_large_mapcount(folio);
+ __folio_large_mapcount_sanity_checks(folio, diff, mm_id);
+
+ new_mapcount_val = atomic_read(&folio->_large_mapcount) - diff;
+ atomic_set(&folio->_large_mapcount, new_mapcount_val);
+
+ /*
+ * There are valid corner cases where we might underflow a per-MM
+ * mapcount (some mappings added when no slot was free, some mappings
+ * added once a slot was free), so we always set it to -1 once we go
+ * negative.
+ */
+ if (folio_mm_id(folio, 0) == mm_id) {
+ folio->_mm_id_mapcount[0] -= diff;
+ if (folio->_mm_id_mapcount[0] >= 0)
+ goto out;
+ folio->_mm_id_mapcount[0] = -1;
+ folio_set_mm_id(folio, 0, MM_ID_DUMMY);
+ } else if (folio_mm_id(folio, 1) == mm_id) {
+ folio->_mm_id_mapcount[1] -= diff;
+ if (folio->_mm_id_mapcount[1] >= 0)
+ goto out;
+ folio->_mm_id_mapcount[1] = -1;
+ folio_set_mm_id(folio, 1, MM_ID_DUMMY);
+ }
+
+ /*
+ * If one MM slot owns all mappings, the folio is mapped exclusively.
+ * Note that if the folio is now unmapped (new_mapcount_val == -1), both
+ * slots must be free (mapcount == -1), and we'll also mark it as
+ * exclusive.
+ */
+ if (folio->_mm_id_mapcount[0] == new_mapcount_val ||
+ folio->_mm_id_mapcount[1] == new_mapcount_val)
+ folio->_mm_ids &= ~FOLIO_MM_IDS_SHARED_BIT;
+out:
+ folio_unlock_large_mapcount(folio);
+ return new_mapcount_val + 1;
+}
+#define folio_sub_large_mapcount folio_sub_return_large_mapcount
+#else /* !CONFIG_MM_ID */
+/*
+ * See __folio_rmap_sanity_checks(), we might map large folios even without
+ * CONFIG_TRANSPARENT_HUGEPAGE. We'll keep that working for now.
+ */
+static inline void folio_set_large_mapcount(struct folio *folio, int mapcount,
+ struct vm_area_struct *vma)
+{
+ /* Note: mapcounts start at -1. */
+ atomic_set(&folio->_large_mapcount, mapcount - 1);
+}
+
+static inline void folio_add_large_mapcount(struct folio *folio,
+ int diff, struct vm_area_struct *vma)
+{
+ atomic_add(diff, &folio->_large_mapcount);
+}
+
+static inline int folio_add_return_large_mapcount(struct folio *folio,
+ int diff, struct vm_area_struct *vma)
+{
+ BUILD_BUG();
+}
+
+static inline void folio_sub_large_mapcount(struct folio *folio,
+ int diff, struct vm_area_struct *vma)
+{
+ atomic_sub(diff, &folio->_large_mapcount);
+}
+
+static inline int folio_sub_return_large_mapcount(struct folio *folio,
+ int diff, struct vm_area_struct *vma)
+{
+ BUILD_BUG();
+}
+#endif /* CONFIG_MM_ID */
+
+#define folio_inc_large_mapcount(folio, vma) \
+ folio_add_large_mapcount(folio, 1, vma)
+#define folio_inc_return_large_mapcount(folio, vma) \
+ folio_add_return_large_mapcount(folio, 1, vma)
+#define folio_dec_large_mapcount(folio, vma) \
+ folio_sub_large_mapcount(folio, 1, vma)
+#define folio_dec_return_large_mapcount(folio, vma) \
+ folio_sub_return_large_mapcount(folio, 1, vma)
+
/* RMAP flags, currently only relevant for some anon rmap operations. */
typedef int __bitwise rmap_t;
@@ -192,6 +401,7 @@ typedef int __bitwise rmap_t;
enum rmap_level {
RMAP_LEVEL_PTE = 0,
RMAP_LEVEL_PMD,
+ RMAP_LEVEL_PUD,
};
static inline void __folio_rmap_sanity_checks(const struct folio *folio,
@@ -228,6 +438,14 @@ static inline void __folio_rmap_sanity_checks(const struct folio *folio,
VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PMD_NR, folio);
VM_WARN_ON_FOLIO(nr_pages != HPAGE_PMD_NR, folio);
break;
+ case RMAP_LEVEL_PUD:
+ /*
+ * Assume that we are creating a single "entire" mapping of the
+ * folio.
+ */
+ VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PUD_NR, folio);
+ VM_WARN_ON_FOLIO(nr_pages != HPAGE_PUD_NR, folio);
+ break;
default:
VM_WARN_ON_ONCE(true);
}
@@ -251,12 +469,16 @@ void folio_add_file_rmap_ptes(struct folio *, struct page *, int nr_pages,
folio_add_file_rmap_ptes(folio, page, 1, vma)
void folio_add_file_rmap_pmd(struct folio *, struct page *,
struct vm_area_struct *);
+void folio_add_file_rmap_pud(struct folio *, struct page *,
+ struct vm_area_struct *);
void folio_remove_rmap_ptes(struct folio *, struct page *, int nr_pages,
struct vm_area_struct *);
#define folio_remove_rmap_pte(folio, page, vma) \
folio_remove_rmap_ptes(folio, page, 1, vma)
void folio_remove_rmap_pmd(struct folio *, struct page *,
struct vm_area_struct *);
+void folio_remove_rmap_pud(struct folio *, struct page *,
+ struct vm_area_struct *);
void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *,
unsigned long address, rmap_t flags);
@@ -322,7 +544,8 @@ static inline void hugetlb_remove_rmap(struct folio *folio)
}
static __always_inline void __folio_dup_file_rmap(struct folio *folio,
- struct page *page, int nr_pages, enum rmap_level level)
+ struct page *page, int nr_pages, struct vm_area_struct *dst_vma,
+ enum rmap_level level)
{
const int orig_nr_pages = nr_pages;
@@ -335,14 +558,17 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio,
break;
}
- do {
- atomic_inc(&page->_mapcount);
- } while (page++, --nr_pages > 0);
- atomic_add(orig_nr_pages, &folio->_large_mapcount);
+ if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) {
+ do {
+ atomic_inc(&page->_mapcount);
+ } while (page++, --nr_pages > 0);
+ }
+ folio_add_large_mapcount(folio, orig_nr_pages, dst_vma);
break;
case RMAP_LEVEL_PMD:
+ case RMAP_LEVEL_PUD:
atomic_inc(&folio->_entire_mapcount);
- atomic_inc(&folio->_large_mapcount);
+ folio_inc_large_mapcount(folio, dst_vma);
break;
}
}
@@ -352,45 +578,47 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio,
* @folio: The folio to duplicate the mappings of
* @page: The first page to duplicate the mappings of
* @nr_pages: The number of pages of which the mapping will be duplicated
+ * @dst_vma: The destination vm area
*
* The page range of the folio is defined by [page, page + nr_pages)
*
* The caller needs to hold the page table lock.
*/
static inline void folio_dup_file_rmap_ptes(struct folio *folio,
- struct page *page, int nr_pages)
+ struct page *page, int nr_pages, struct vm_area_struct *dst_vma)
{
- __folio_dup_file_rmap(folio, page, nr_pages, RMAP_LEVEL_PTE);
+ __folio_dup_file_rmap(folio, page, nr_pages, dst_vma, RMAP_LEVEL_PTE);
}
static __always_inline void folio_dup_file_rmap_pte(struct folio *folio,
- struct page *page)
+ struct page *page, struct vm_area_struct *dst_vma)
{
- __folio_dup_file_rmap(folio, page, 1, RMAP_LEVEL_PTE);
+ __folio_dup_file_rmap(folio, page, 1, dst_vma, RMAP_LEVEL_PTE);
}
/**
* folio_dup_file_rmap_pmd - duplicate a PMD mapping of a page range of a folio
* @folio: The folio to duplicate the mapping of
* @page: The first page to duplicate the mapping of
+ * @dst_vma: The destination vm area
*
* The page range of the folio is defined by [page, page + HPAGE_PMD_NR)
*
* The caller needs to hold the page table lock.
*/
static inline void folio_dup_file_rmap_pmd(struct folio *folio,
- struct page *page)
+ struct page *page, struct vm_area_struct *dst_vma)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, RMAP_LEVEL_PTE);
+ __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, dst_vma, RMAP_LEVEL_PTE);
#else
WARN_ON_ONCE(true);
#endif
}
static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio,
- struct page *page, int nr_pages, struct vm_area_struct *src_vma,
- enum rmap_level level)
+ struct page *page, int nr_pages, struct vm_area_struct *dst_vma,
+ struct vm_area_struct *src_vma, enum rmap_level level)
{
const int orig_nr_pages = nr_pages;
bool maybe_pinned;
@@ -432,18 +660,20 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio,
do {
if (PageAnonExclusive(page))
ClearPageAnonExclusive(page);
- atomic_inc(&page->_mapcount);
+ if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT))
+ atomic_inc(&page->_mapcount);
} while (page++, --nr_pages > 0);
- atomic_add(orig_nr_pages, &folio->_large_mapcount);
+ folio_add_large_mapcount(folio, orig_nr_pages, dst_vma);
break;
case RMAP_LEVEL_PMD:
+ case RMAP_LEVEL_PUD:
if (PageAnonExclusive(page)) {
if (unlikely(maybe_pinned))
return -EBUSY;
ClearPageAnonExclusive(page);
}
atomic_inc(&folio->_entire_mapcount);
- atomic_inc(&folio->_large_mapcount);
+ folio_inc_large_mapcount(folio, dst_vma);
break;
}
return 0;
@@ -455,6 +685,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio,
* @folio: The folio to duplicate the mappings of
* @page: The first page to duplicate the mappings of
* @nr_pages: The number of pages of which the mapping will be duplicated
+ * @dst_vma: The destination vm area
* @src_vma: The vm area from which the mappings are duplicated
*
* The page range of the folio is defined by [page, page + nr_pages)
@@ -473,16 +704,18 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio,
* Returns 0 if duplicating the mappings succeeded. Returns -EBUSY otherwise.
*/
static inline int folio_try_dup_anon_rmap_ptes(struct folio *folio,
- struct page *page, int nr_pages, struct vm_area_struct *src_vma)
+ struct page *page, int nr_pages, struct vm_area_struct *dst_vma,
+ struct vm_area_struct *src_vma)
{
- return __folio_try_dup_anon_rmap(folio, page, nr_pages, src_vma,
- RMAP_LEVEL_PTE);
+ return __folio_try_dup_anon_rmap(folio, page, nr_pages, dst_vma,
+ src_vma, RMAP_LEVEL_PTE);
}
static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio,
- struct page *page, struct vm_area_struct *src_vma)
+ struct page *page, struct vm_area_struct *dst_vma,
+ struct vm_area_struct *src_vma)
{
- return __folio_try_dup_anon_rmap(folio, page, 1, src_vma,
+ return __folio_try_dup_anon_rmap(folio, page, 1, dst_vma, src_vma,
RMAP_LEVEL_PTE);
}
@@ -491,6 +724,7 @@ static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio,
* of a folio
* @folio: The folio to duplicate the mapping of
* @page: The first page to duplicate the mapping of
+ * @dst_vma: The destination vm area
* @src_vma: The vm area from which the mapping is duplicated
*
* The page range of the folio is defined by [page, page + HPAGE_PMD_NR)
@@ -509,11 +743,12 @@ static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio,
* Returns 0 if duplicating the mapping succeeded. Returns -EBUSY otherwise.
*/
static inline int folio_try_dup_anon_rmap_pmd(struct folio *folio,
- struct page *page, struct vm_area_struct *src_vma)
+ struct page *page, struct vm_area_struct *dst_vma,
+ struct vm_area_struct *src_vma)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- return __folio_try_dup_anon_rmap(folio, page, HPAGE_PMD_NR, src_vma,
- RMAP_LEVEL_PMD);
+ return __folio_try_dup_anon_rmap(folio, page, HPAGE_PMD_NR, dst_vma,
+ src_vma, RMAP_LEVEL_PMD);
#else
WARN_ON_ONCE(true);
return -EBUSY;
@@ -663,9 +898,8 @@ int folio_referenced(struct folio *, int is_locked,
void try_to_migrate(struct folio *folio, enum ttu_flags flags);
void try_to_unmap(struct folio *, enum ttu_flags flags);
-int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,
- unsigned long end, struct page **pages,
- void *arg);
+struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr,
+ void *owner, struct folio **foliop);
/* Avoid racy checks */
#define PVMW_SYNC (1 << 0)
@@ -739,6 +973,9 @@ unsigned long page_address_in_vma(const struct folio *folio,
*/
int folio_mkclean(struct folio *);
+int mapping_wrprotect_range(struct address_space *mapping, pgoff_t pgoff,
+ unsigned long pfn, unsigned long nr_pages);
+
int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
struct vm_area_struct *vma);
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 3f4d315aaec9..95da051fb155 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -170,6 +170,7 @@ struct rtc_device {
/* useful timestamps */
#define RTC_TIMESTAMP_BEGIN_0000 -62167219200ULL /* 0000-01-01 00:00:00 */
#define RTC_TIMESTAMP_BEGIN_1900 -2208988800LL /* 1900-01-01 00:00:00 */
+#define RTC_TIMESTAMP_EPOCH_GPS 315964800LL /* 1980-01-06 00:00:00 */
#define RTC_TIMESTAMP_BEGIN_2000 946684800LL /* 2000-01-01 00:00:00 */
#define RTC_TIMESTAMP_END_2063 2966371199LL /* 2063-12-31 23:59:59 */
#define RTC_TIMESTAMP_END_2079 3471292799LL /* 2079-12-31 23:59:59 */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 56ddeb37b5cd..f96ac1982893 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1239,6 +1239,10 @@ struct task_struct {
struct mutex_waiter *blocked_on;
#endif
+#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
+ struct mutex *blocker_mutex;
+#endif
+
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
int non_block_count;
#endif
diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h
index fb1e295e7e63..166b19af956f 100644
--- a/include/linux/sched/smt.h
+++ b/include/linux/sched/smt.h
@@ -12,7 +12,7 @@ static __always_inline bool sched_smt_active(void)
return static_branch_likely(&sched_smt_present);
}
#else
-static inline bool sched_smt_active(void) { return false; }
+static __always_inline bool sched_smt_active(void) { return false; }
#endif
void arch_smt_update(void);
diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h
index fe41da005970..52791e070506 100644
--- a/include/linux/seq_buf.h
+++ b/include/linux/seq_buf.h
@@ -167,8 +167,8 @@ extern int seq_buf_hex_dump(struct seq_buf *s, const char *prefix_str,
const void *buf, size_t len, bool ascii);
#ifdef CONFIG_BINARY_PRINTF
-extern int
-seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary);
+__printf(2, 0)
+int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary);
#endif
void seq_buf_do_printk(struct seq_buf *s, const char *lvl);
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 2fb266ea69fa..d6ebf0596510 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -181,6 +181,7 @@ int seq_open_private(struct file *, const struct seq_operations *, int);
int seq_release_private(struct inode *, struct file *);
#ifdef CONFIG_BINARY_PRINTF
+__printf(2, 0)
void seq_bprintf(struct seq_file *m, const char *f, const u32 *binary);
#endif
diff --git a/include/linux/serdev.h b/include/linux/serdev.h
index ff78efc1f60d..34562eb99931 100644
--- a/include/linux/serdev.h
+++ b/include/linux/serdev.h
@@ -84,7 +84,6 @@ enum serdev_parity {
struct serdev_controller_ops {
ssize_t (*write_buf)(struct serdev_controller *, const u8 *, size_t);
void (*write_flush)(struct serdev_controller *);
- int (*write_room)(struct serdev_controller *);
int (*open)(struct serdev_controller *);
void (*close)(struct serdev_controller *);
void (*set_flow_control)(struct serdev_controller *, bool);
@@ -212,7 +211,6 @@ int serdev_device_break_ctl(struct serdev_device *serdev, int break_state);
void serdev_device_write_wakeup(struct serdev_device *);
ssize_t serdev_device_write(struct serdev_device *, const u8 *, size_t, long);
void serdev_device_write_flush(struct serdev_device *);
-int serdev_device_write_room(struct serdev_device *);
/*
* serdev device driver functions
@@ -273,10 +271,6 @@ static inline ssize_t serdev_device_write(struct serdev_device *sdev,
return -ENODEV;
}
static inline void serdev_device_write_flush(struct serdev_device *sdev) {}
-static inline int serdev_device_write_room(struct serdev_device *sdev)
-{
- return 0;
-}
#define serdev_device_driver_register(x)
#define serdev_device_driver_unregister(x)
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 98e07e9e9e58..d5a8ab98035c 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -137,6 +137,15 @@ enum _slab_flag_bits {
* rcu_read_lock before reading the address, then rcu_read_unlock after
* taking the spinlock within the structure expected at that address.
*
+ * Note that object identity check has to be done *after* acquiring a
+ * reference, therefore user has to ensure proper ordering for loads.
+ * Similarly, when initializing objects allocated with SLAB_TYPESAFE_BY_RCU,
+ * the newly allocated object has to be fully initialized *before* its
+ * refcount gets initialized and proper ordering for stores is required.
+ * refcount_{add|inc}_not_zero_acquire() and refcount_set_release() are
+ * designed with the proper fences required for reference counting objects
+ * allocated with SLAB_TYPESAFE_BY_RCU.
+ *
* Note that it is not possible to acquire a lock within a structure
* allocated with SLAB_TYPESAFE_BY_RCU without first acquiring a reference
* as described above. The reason is that SLAB_TYPESAFE_BY_RCU pages
@@ -236,12 +245,6 @@ enum _slab_flag_bits {
#endif
/*
- * freeptr_t represents a SLUB freelist pointer, which might be encoded
- * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled.
- */
-typedef struct { unsigned long v; } freeptr_t;
-
-/*
* ZERO_SIZE_PTR will be returned for zero sized kmalloc requests.
*
* Dereferencing ZERO_SIZE_PTR will lead to a distinct access fault.
diff --git a/include/linux/sort.h b/include/linux/sort.h
index e163287ac6c1..8e5603b10941 100644
--- a/include/linux/sort.h
+++ b/include/linux/sort.h
@@ -13,4 +13,15 @@ void sort(void *base, size_t num, size_t size,
cmp_func_t cmp_func,
swap_func_t swap_func);
+/* Versions that periodically call cond_resched(): */
+
+void sort_r_nonatomic(void *base, size_t num, size_t size,
+ cmp_r_func_t cmp_func,
+ swap_r_func_t swap_func,
+ const void *priv);
+
+void sort_nonatomic(void *base, size_t num, size_t size,
+ cmp_func_t cmp_func,
+ swap_func_t swap_func);
+
#endif
diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 2d6c30317792..2362f621d94c 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -150,12 +150,14 @@ enum sdw_dpn_pkg_mode {
*
* @SDW_STREAM_PCM: PCM data stream
* @SDW_STREAM_PDM: PDM data stream
+ * @SDW_STREAM_BPT: BPT data stream
*
* spec doesn't define this, but is used in implementation
*/
enum sdw_stream_type {
SDW_STREAM_PCM = 0,
SDW_STREAM_PDM = 1,
+ SDW_STREAM_BPT = 2,
};
/**
@@ -822,6 +824,15 @@ struct sdw_defer {
struct completion complete;
};
+/*
+ * Add a practical limit to BPT transfer sizes. BPT is typically used
+ * to transfer firmware, and larger firmware transfers will increase
+ * the cold latency beyond typical OS or user requirements.
+ */
+#define SDW_BPT_MSG_MAX_BYTES (1024 * 1024)
+
+struct sdw_bpt_msg;
+
/**
* struct sdw_master_ops - Master driver ops
* @read_prop: Read Master properties
@@ -837,6 +848,10 @@ struct sdw_defer {
* @get_device_num: Callback for vendor-specific device_number allocation
* @put_device_num: Callback for vendor-specific device_number release
* @new_peripheral_assigned: Callback to handle enumeration of new peripheral.
+ * @bpt_send_async: reserve resources for BPT stream and send message
+ * using BTP protocol
+ * @bpt_wait: wait for message completion using BTP protocol
+ * and release resources
*/
struct sdw_master_ops {
int (*read_prop)(struct sdw_bus *bus);
@@ -853,6 +868,9 @@ struct sdw_master_ops {
void (*new_peripheral_assigned)(struct sdw_bus *bus,
struct sdw_slave *slave,
int dev_num);
+ int (*bpt_send_async)(struct sdw_bus *bus, struct sdw_slave *slave,
+ struct sdw_bpt_msg *msg);
+ int (*bpt_wait)(struct sdw_bus *bus, struct sdw_slave *slave, struct sdw_bpt_msg *msg);
};
int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent,
@@ -879,7 +897,7 @@ struct sdw_port_config {
* @ch_count: Channel count of the stream
* @bps: Number of bits per audio sample
* @direction: Data direction
- * @type: Stream type PCM or PDM
+ * @type: Stream type PCM, PDM or BPT
*/
struct sdw_stream_config {
unsigned int frame_rate;
@@ -929,7 +947,7 @@ struct sdw_stream_params {
* @name: SoundWire stream name
* @params: Stream parameters
* @state: Current state of the stream
- * @type: Stream type PCM or PDM
+ * @type: Stream type PCM, PDM or BPT
* @m_rt_count: Count of Master runtime(s) in this stream
* @master_list: List of Master runtime(s) in this stream.
* master_list can contain only one m_rt per Master instance
@@ -959,6 +977,9 @@ struct sdw_stream_runtime {
* @defer_msg: Defer message
* @params: Current bus parameters
* @stream_refcount: number of streams currently using this bus
+ * @btp_stream_refcount: number of BTP streams currently using this bus (should
+ * be zero or one, multiple streams per link is not supported).
+ * @bpt_stream: pointer stored to handle BTP streams.
* @ops: Master callback ops
* @port_ops: Master port callback ops
* @prop: Master properties
@@ -996,6 +1017,8 @@ struct sdw_bus {
struct sdw_defer defer_msg;
struct sdw_bus_params params;
int stream_refcount;
+ int bpt_stream_refcount;
+ struct sdw_stream_runtime *bpt_stream;
const struct sdw_master_ops *ops;
const struct sdw_master_port_ops *port_ops;
struct sdw_master_prop prop;
@@ -1017,7 +1040,7 @@ struct sdw_bus {
unsigned int lane_used_bandwidth[SDW_MAX_LANES];
};
-struct sdw_stream_runtime *sdw_alloc_stream(const char *stream_name);
+struct sdw_stream_runtime *sdw_alloc_stream(const char *stream_name, enum sdw_stream_type type);
void sdw_release_stream(struct sdw_stream_runtime *stream);
int sdw_compute_params(struct sdw_bus *bus, struct sdw_stream_runtime *stream);
@@ -1043,6 +1066,10 @@ int sdw_compare_devid(struct sdw_slave *slave, struct sdw_slave_id id);
void sdw_extract_slave_id(struct sdw_bus *bus, u64 addr, struct sdw_slave_id *id);
bool is_clock_scaling_supported_by_slave(struct sdw_slave *slave);
+int sdw_bpt_send_async(struct sdw_bus *bus, struct sdw_slave *slave, struct sdw_bpt_msg *msg);
+int sdw_bpt_wait(struct sdw_bus *bus, struct sdw_slave *slave, struct sdw_bpt_msg *msg);
+int sdw_bpt_send_sync(struct sdw_bus *bus, struct sdw_slave *slave, struct sdw_bpt_msg *msg);
+
#if IS_ENABLED(CONFIG_SOUNDWIRE)
int sdw_stream_add_slave(struct sdw_slave *slave,
diff --git a/include/linux/soundwire/sdw_amd.h b/include/linux/soundwire/sdw_amd.h
index 799f8578137b..6b839987f14c 100644
--- a/include/linux/soundwire/sdw_amd.h
+++ b/include/linux/soundwire/sdw_amd.h
@@ -28,6 +28,8 @@
#define ACP_SDW1 1
#define AMD_SDW_MAX_MANAGER_COUNT 2
#define ACP63_PCI_REV_ID 0x63
+#define ACP70_PCI_REV_ID 0x70
+#define ACP71_PCI_REV_ID 0x71
struct acp_sdw_pdata {
u16 instance;
diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 580086417e4b..493d9de4e472 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -436,6 +436,10 @@ struct sdw_intel_hw_ops {
bool (*sync_check_cmdsync_unlocked)(struct sdw_intel *sdw);
void (*program_sdi)(struct sdw_intel *sdw, int dev_num);
+
+ int (*bpt_send_async)(struct sdw_intel *sdw, struct sdw_slave *slave,
+ struct sdw_bpt_msg *msg);
+ int (*bpt_wait)(struct sdw_intel *sdw, struct sdw_slave *slave, struct sdw_bpt_msg *msg);
};
extern const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops;
diff --git a/include/linux/sprintf.h b/include/linux/sprintf.h
index 33dcbec71925..51cab2def9ec 100644
--- a/include/linux/sprintf.h
+++ b/include/linux/sprintf.h
@@ -24,4 +24,7 @@ __scanf(2, 0) int vsscanf(const char *, const char *, va_list);
extern bool no_hash_pointers;
int no_hash_pointers_enable(char *str);
+/* Used for Rust formatting ('%pA') */
+char *rust_fmt_argument(char *buf, char *end, const void *ptr);
+
#endif /* _LINUX_KERNEL_SPRINTF_H */
diff --git a/include/linux/string.h b/include/linux/string.h
index 0403a4ca4c11..01621ad0f598 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -336,8 +336,8 @@ int __sysfs_match_string(const char * const *array, size_t n, const char *s);
#define sysfs_match_string(_a, _s) __sysfs_match_string(_a, ARRAY_SIZE(_a), _s)
#ifdef CONFIG_BINARY_PRINTF
-int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args);
-int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf);
+__printf(3, 0) int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args);
+__printf(3, 0) int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf);
#endif
extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index fec976e58174..f8b406b0a1af 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -64,7 +64,9 @@ struct rpc_clnt {
cl_noretranstimeo: 1,/* No retransmit timeouts */
cl_autobind : 1,/* use getport() */
cl_chatty : 1,/* be verbose */
- cl_shutdown : 1;/* rpc immediate -EIO */
+ cl_shutdown : 1,/* rpc immediate -EIO */
+ cl_netunreach_fatal : 1;
+ /* Treat ENETUNREACH errors as fatal */
struct xprtsec_parms cl_xprtsec; /* transport security policy */
struct rpc_rtt * cl_rtt; /* RTO estimator data */
@@ -175,6 +177,7 @@ struct rpc_add_xprt_test {
#define RPC_CLNT_CREATE_SOFTERR (1UL << 10)
#define RPC_CLNT_CREATE_REUSEPORT (1UL << 11)
#define RPC_CLNT_CREATE_CONNECTED (1UL << 12)
+#define RPC_CLNT_CREATE_NETUNREACH_FATAL (1UL << 13)
struct rpc_clnt *rpc_create(struct rpc_create_args *args);
struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index eac57914dcf3..ccba79ebf893 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -134,6 +134,7 @@ struct rpc_task_setup {
#define RPC_TASK_MOVEABLE 0x0004 /* nfs4.1+ rpc tasks */
#define RPC_TASK_NULLCREDS 0x0010 /* Use AUTH_NULL credential */
#define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */
+#define RPC_TASK_NETUNREACH_FATAL 0x0040 /* ENETUNREACH is fatal */
#define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */
#define RPC_TASK_NO_ROUND_ROBIN 0x0100 /* send requests on "main" xprt */
#define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */
diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h
index e411368cdacf..e4db5022fe92 100644
--- a/include/linux/sunrpc/xprtmultipath.h
+++ b/include/linux/sunrpc/xprtmultipath.h
@@ -56,6 +56,7 @@ extern void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps,
struct rpc_xprt *xprt);
extern void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps,
struct rpc_xprt *xprt, bool offline);
+extern struct rpc_xprt *rpc_xprt_switch_get_main_xprt(struct rpc_xprt_switch *xps);
extern void xprt_iter_init(struct rpc_xprt_iter *xpi,
struct rpc_xprt_switch *xps);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index a98c757400fe..db46b25a65ae 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -24,7 +24,6 @@ struct pagevec;
#define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */
#define SWAP_FLAG_PRIO_MASK 0x7fff
-#define SWAP_FLAG_PRIO_SHIFT 0
#define SWAP_FLAG_DISCARD 0x10000 /* enable discard for swap */
#define SWAP_FLAG_DISCARD_ONCE 0x20000 /* discard swap area at swapon-time */
#define SWAP_FLAG_DISCARD_PAGES 0x40000 /* discard page-clusters after use */
@@ -74,14 +73,13 @@ static inline int current_is_kswapd(void)
* to a special SWP_DEVICE_{READ|WRITE} entry.
*
* When a page is mapped by the device for exclusive access we set the CPU page
- * table entries to special SWP_DEVICE_EXCLUSIVE_* entries.
+ * table entries to a special SWP_DEVICE_EXCLUSIVE entry.
*/
#ifdef CONFIG_DEVICE_PRIVATE
-#define SWP_DEVICE_NUM 4
+#define SWP_DEVICE_NUM 3
#define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM)
#define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1)
-#define SWP_DEVICE_EXCLUSIVE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+2)
-#define SWP_DEVICE_EXCLUSIVE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+3)
+#define SWP_DEVICE_EXCLUSIVE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+2)
#else
#define SWP_DEVICE_NUM 0
#endif
@@ -286,12 +284,10 @@ enum swap_cluster_flags {
#endif
/*
- * We assign a cluster to each CPU, so each CPU can allocate swap entry from
- * its own cluster and swapout sequentially. The purpose is to optimize swapout
- * throughput.
+ * We keep using same cluster for rotational device so IO will be sequential.
+ * The purpose is to optimize SWAP throughput on these device.
*/
-struct percpu_cluster {
- local_lock_t lock; /* Protect the percpu_cluster above */
+struct swap_sequential_cluster {
unsigned int next[SWAP_NR_ORDERS]; /* Likely next allocation offset */
};
@@ -317,8 +313,7 @@ struct swap_info_struct {
atomic_long_t frag_cluster_nr[SWAP_NR_ORDERS];
unsigned int pages; /* total of usable pages of swap */
atomic_long_t inuse_pages; /* number of those currently in use */
- struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */
- struct percpu_cluster *global_cluster; /* Use one global cluster for rotating device */
+ struct swap_sequential_cluster *global_cluster; /* Use one global cluster for rotating device */
spinlock_t global_cluster_lock; /* Serialize usage of global cluster */
struct rb_root swap_extent_root;/* root of the swap extent rbtree */
struct block_device *bdev; /* swap device or bdev of swap file */
@@ -461,7 +456,6 @@ void free_pages_and_swap_cache(struct encoded_page **, int);
extern atomic_long_t nr_swap_pages;
extern long total_swap_pages;
extern atomic_t nr_rotate_swap;
-extern bool has_usable_swap(void);
/* Swap 50% full? Release swapcache more aggressively.. */
static inline bool vm_swap_full(void)
@@ -475,24 +469,22 @@ static inline long get_nr_swap_pages(void)
}
extern void si_swapinfo(struct sysinfo *);
-swp_entry_t folio_alloc_swap(struct folio *folio);
+int folio_alloc_swap(struct folio *folio, gfp_t gfp_mask);
bool folio_free_swap(struct folio *folio);
void put_swap_folio(struct folio *folio, swp_entry_t entry);
extern swp_entry_t get_swap_page_of_type(int);
-extern int get_swap_pages(int n, swp_entry_t swp_entries[], int order);
extern int add_swap_count_continuation(swp_entry_t, gfp_t);
extern void swap_shmem_alloc(swp_entry_t, int);
extern int swap_duplicate(swp_entry_t);
extern int swapcache_prepare(swp_entry_t entry, int nr);
extern void swap_free_nr(swp_entry_t entry, int nr_pages);
-extern void swapcache_free_entries(swp_entry_t *entries, int n);
extern void free_swap_and_cache_nr(swp_entry_t entry, int nr);
int swap_type_of(dev_t device, sector_t offset);
int find_first_swap(dev_t *device);
extern unsigned int count_swap_pages(int, int);
extern sector_t swapdev_block(int, pgoff_t);
extern int __swap_count(swp_entry_t entry);
-extern int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry);
+extern bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry);
extern int swp_swapcount(swp_entry_t entry);
struct swap_info_struct *swp_swap_info(swp_entry_t entry);
struct backing_dev_info;
@@ -575,9 +567,9 @@ static inline int __swap_count(swp_entry_t entry)
return 0;
}
-static inline int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
+static inline bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry)
{
- return 0;
+ return false;
}
static inline int swp_swapcount(swp_entry_t entry)
@@ -585,11 +577,9 @@ static inline int swp_swapcount(swp_entry_t entry)
return 0;
}
-static inline swp_entry_t folio_alloc_swap(struct folio *folio)
+static inline int folio_alloc_swap(struct folio *folio, gfp_t gfp_mask)
{
- swp_entry_t entry;
- entry.val = 0;
- return entry;
+ return -EINVAL;
}
static inline bool folio_free_swap(struct folio *folio)
@@ -650,7 +640,6 @@ static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp)
#endif
#if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP)
-void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry);
int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry);
static inline int mem_cgroup_try_charge_swap(struct folio *folio,
swp_entry_t entry)
@@ -671,10 +660,6 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_p
extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg);
extern bool mem_cgroup_swap_full(struct folio *folio);
#else
-static inline void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
-{
-}
-
static inline int mem_cgroup_try_charge_swap(struct folio *folio,
swp_entry_t entry)
{
diff --git a/include/linux/swap_slots.h b/include/linux/swap_slots.h
deleted file mode 100644
index 840aec3523b2..000000000000
--- a/include/linux/swap_slots.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_SWAP_SLOTS_H
-#define _LINUX_SWAP_SLOTS_H
-
-#include <linux/swap.h>
-#include <linux/spinlock.h>
-#include <linux/mutex.h>
-
-#define SWAP_SLOTS_CACHE_SIZE SWAP_BATCH
-#define THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE (5*SWAP_SLOTS_CACHE_SIZE)
-#define THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE (2*SWAP_SLOTS_CACHE_SIZE)
-
-struct swap_slots_cache {
- bool lock_initialized;
- struct mutex alloc_lock; /* protects slots, nr, cur */
- swp_entry_t *slots;
- int nr;
- int cur;
- int n_ret;
-};
-
-void disable_swap_slots_cache_lock(void);
-void reenable_swap_slots_cache_unlock(void);
-void enable_swap_slots_cache(void);
-
-extern bool swap_slot_cache_enabled;
-
-#endif /* _LINUX_SWAP_SLOTS_H */
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 96f26e29fefe..64ea151a7ae3 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -186,26 +186,16 @@ static inline bool is_writable_device_private_entry(swp_entry_t entry)
return unlikely(swp_type(entry) == SWP_DEVICE_WRITE);
}
-static inline swp_entry_t make_readable_device_exclusive_entry(pgoff_t offset)
+static inline swp_entry_t make_device_exclusive_entry(pgoff_t offset)
{
- return swp_entry(SWP_DEVICE_EXCLUSIVE_READ, offset);
-}
-
-static inline swp_entry_t make_writable_device_exclusive_entry(pgoff_t offset)
-{
- return swp_entry(SWP_DEVICE_EXCLUSIVE_WRITE, offset);
+ return swp_entry(SWP_DEVICE_EXCLUSIVE, offset);
}
static inline bool is_device_exclusive_entry(swp_entry_t entry)
{
- return swp_type(entry) == SWP_DEVICE_EXCLUSIVE_READ ||
- swp_type(entry) == SWP_DEVICE_EXCLUSIVE_WRITE;
+ return swp_type(entry) == SWP_DEVICE_EXCLUSIVE;
}
-static inline bool is_writable_device_exclusive_entry(swp_entry_t entry)
-{
- return unlikely(swp_type(entry) == SWP_DEVICE_EXCLUSIVE_WRITE);
-}
#else /* CONFIG_DEVICE_PRIVATE */
static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset)
{
@@ -227,12 +217,7 @@ static inline bool is_writable_device_private_entry(swp_entry_t entry)
return false;
}
-static inline swp_entry_t make_readable_device_exclusive_entry(pgoff_t offset)
-{
- return swp_entry(0, 0);
-}
-
-static inline swp_entry_t make_writable_device_exclusive_entry(pgoff_t offset)
+static inline swp_entry_t make_device_exclusive_entry(pgoff_t offset)
{
return swp_entry(0, 0);
}
@@ -242,10 +227,6 @@ static inline bool is_device_exclusive_entry(swp_entry_t entry)
return false;
}
-static inline bool is_writable_device_exclusive_entry(swp_entry_t entry)
-{
- return false;
-}
#endif /* CONFIG_DEVICE_PRIVATE */
#ifdef CONFIG_MIGRATION
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 69f9bedd0ee8..0b5ed6821080 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -86,8 +86,6 @@ struct thermal_trip {
#define THERMAL_TRIP_PRIV_TO_INT(_val_) (uintptr_t)(_val_)
#define THERMAL_INT_TO_TRIP_PRIV(_val_) (void *)(uintptr_t)(_val_)
-struct thermal_zone_device;
-
struct cooling_spec {
unsigned long upper; /* Highest cooling state */
unsigned long lower; /* Lowest cooling state */
diff --git a/include/linux/timer.h b/include/linux/timer.h
index e67ecd1cbc97..10596d7c3a34 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -30,7 +30,7 @@
*
* @TIMER_IRQSAFE: An irqsafe timer is executed with IRQ disabled and
* it's safe to wait for the completion of the running instance from
- * IRQ handlers, for example, by calling del_timer_sync().
+ * IRQ handlers, for example, by calling timer_delete_sync().
*
* Note: The irq disabled callback execution is a special case for
* workqueue locking issues. It's not meant for executing random crap
@@ -168,40 +168,6 @@ extern int timer_delete(struct timer_list *timer);
extern int timer_shutdown_sync(struct timer_list *timer);
extern int timer_shutdown(struct timer_list *timer);
-/**
- * del_timer_sync - Delete a pending timer and wait for a running callback
- * @timer: The timer to be deleted
- *
- * See timer_delete_sync() for detailed explanation.
- *
- * Do not use in new code. Use timer_delete_sync() instead.
- *
- * Returns:
- * * %0 - The timer was not pending
- * * %1 - The timer was pending and deactivated
- */
-static inline int del_timer_sync(struct timer_list *timer)
-{
- return timer_delete_sync(timer);
-}
-
-/**
- * del_timer - Delete a pending timer
- * @timer: The timer to be deleted
- *
- * See timer_delete() for detailed explanation.
- *
- * Do not use in new code. Use timer_delete() instead.
- *
- * Returns:
- * * %0 - The timer was not pending
- * * %1 - The timer was pending and deactivated
- */
-static inline int del_timer(struct timer_list *timer)
-{
- return timer_delete(timer);
-}
-
extern void init_timers(void);
struct hrtimer;
extern enum hrtimer_restart it_real_fn(struct hrtimer *);
diff --git a/include/linux/trace.h b/include/linux/trace.h
index fdcd76b7be83..7eaad857dee0 100644
--- a/include/linux/trace.h
+++ b/include/linux/trace.h
@@ -72,8 +72,8 @@ static inline int unregister_ftrace_export(struct trace_export *export)
static inline void trace_printk_init_buffers(void)
{
}
-static inline int trace_array_printk(struct trace_array *tr, unsigned long ip,
- const char *fmt, ...)
+static inline __printf(3, 4)
+int trace_array_printk(struct trace_array *tr, unsigned long ip, const char *fmt, ...)
{
return 0;
}
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index 1ef95c0287f0..a93ed5ac3226 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -88,8 +88,8 @@ extern __printf(2, 3)
void trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
extern __printf(2, 0)
void trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args);
-extern void
-trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
+extern __printf(2, 0)
+void trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
extern int trace_print_seq(struct seq_file *m, struct trace_seq *s);
extern int trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
int cnt);
@@ -113,8 +113,8 @@ static inline __printf(2, 3)
void trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
{
}
-static inline void
-trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
+static inline __printf(2, 0)
+void trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
{
}
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 2372f9357240..0a46e4054dec 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -239,7 +239,6 @@ struct tty_struct {
struct list_head tty_files;
-#define N_TTY_BUF_SIZE 4096
struct work_struct SAK_work;
} __randomize_layout;
@@ -251,7 +250,7 @@ struct tty_file_private {
};
/**
- * DOC: TTY Struct Flags
+ * enum tty_struct_flags - TTY Struct Flags
*
* These bits are used in the :c:member:`tty_struct.flags` field.
*
@@ -260,62 +259,64 @@ struct tty_file_private {
* tty->write. Thus, you must use the inline functions set_bit() and
* clear_bit() to make things atomic.
*
- * TTY_THROTTLED
+ * @TTY_THROTTLED:
* Driver input is throttled. The ldisc should call
* :c:member:`tty_driver.unthrottle()` in order to resume reception when
* it is ready to process more data (at threshold min).
*
- * TTY_IO_ERROR
+ * @TTY_IO_ERROR:
* If set, causes all subsequent userspace read/write calls on the tty to
* fail, returning -%EIO. (May be no ldisc too.)
*
- * TTY_OTHER_CLOSED
+ * @TTY_OTHER_CLOSED:
* Device is a pty and the other side has closed.
*
- * TTY_EXCLUSIVE
+ * @TTY_EXCLUSIVE:
* Exclusive open mode (a single opener).
*
- * TTY_DO_WRITE_WAKEUP
+ * @TTY_DO_WRITE_WAKEUP:
* If set, causes the driver to call the
* :c:member:`tty_ldisc_ops.write_wakeup()` method in order to resume
* transmission when it can accept more data to transmit.
*
- * TTY_LDISC_OPEN
+ * @TTY_LDISC_OPEN:
* Indicates that a line discipline is open. For debugging purposes only.
*
- * TTY_PTY_LOCK
+ * @TTY_PTY_LOCK:
* A flag private to pty code to implement %TIOCSPTLCK/%TIOCGPTLCK logic.
*
- * TTY_NO_WRITE_SPLIT
+ * @TTY_NO_WRITE_SPLIT:
* Prevent driver from splitting up writes into smaller chunks (preserve
* write boundaries to driver).
*
- * TTY_HUPPED
+ * @TTY_HUPPED:
* The TTY was hung up. This is set post :c:member:`tty_driver.hangup()`.
*
- * TTY_HUPPING
+ * @TTY_HUPPING:
* The TTY is in the process of hanging up to abort potential readers.
*
- * TTY_LDISC_CHANGING
+ * @TTY_LDISC_CHANGING:
* Line discipline for this TTY is being changed. I/O should not block
* when this is set. Use tty_io_nonblock() to check.
*
- * TTY_LDISC_HALTED
+ * @TTY_LDISC_HALTED:
* Line discipline for this TTY was stopped. No work should be queued to
* this ldisc.
*/
-#define TTY_THROTTLED 0
-#define TTY_IO_ERROR 1
-#define TTY_OTHER_CLOSED 2
-#define TTY_EXCLUSIVE 3
-#define TTY_DO_WRITE_WAKEUP 5
-#define TTY_LDISC_OPEN 11
-#define TTY_PTY_LOCK 16
-#define TTY_NO_WRITE_SPLIT 17
-#define TTY_HUPPED 18
-#define TTY_HUPPING 19
-#define TTY_LDISC_CHANGING 20
-#define TTY_LDISC_HALTED 22
+enum tty_struct_flags {
+ TTY_THROTTLED,
+ TTY_IO_ERROR,
+ TTY_OTHER_CLOSED,
+ TTY_EXCLUSIVE,
+ TTY_DO_WRITE_WAKEUP,
+ TTY_LDISC_OPEN,
+ TTY_PTY_LOCK,
+ TTY_NO_WRITE_SPLIT,
+ TTY_HUPPED,
+ TTY_HUPPING,
+ TTY_LDISC_CHANGING,
+ TTY_LDISC_HALTED,
+};
static inline bool tty_io_nonblock(struct tty_struct *tty, struct file *file)
{
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index d4cdc089f6c3..188ee9b768eb 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -17,6 +17,92 @@ struct serial_icounter_struct;
struct serial_struct;
/**
+ * enum tty_driver_flag -- TTY Driver Flags
+ *
+ * These are flags passed to tty_alloc_driver().
+ *
+ * @TTY_DRIVER_INSTALLED:
+ * Whether this driver was succesfully installed. This is a tty internal
+ * flag. Do not touch.
+ *
+ * @TTY_DRIVER_RESET_TERMIOS:
+ * Requests the tty layer to reset the termios setting when the last
+ * process has closed the device. Used for PTYs, in particular.
+ *
+ * @TTY_DRIVER_REAL_RAW:
+ * Indicates that the driver will guarantee not to set any special
+ * character handling flags if this is set for the tty:
+ *
+ * ``(IGNBRK || (!BRKINT && !PARMRK)) && (IGNPAR || !INPCK)``
+ *
+ * That is, if there is no reason for the driver to
+ * send notifications of parity and break characters up to the line
+ * driver, it won't do so. This allows the line driver to optimize for
+ * this case if this flag is set. (Note that there is also a promise, if
+ * the above case is true, not to signal overruns, either.)
+ *
+ * @TTY_DRIVER_DYNAMIC_DEV:
+ * The individual tty devices need to be registered with a call to
+ * tty_register_device() when the device is found in the system and
+ * unregistered with a call to tty_unregister_device() so the devices will
+ * be show up properly in sysfs. If not set, all &tty_driver.num entries
+ * will be created by the tty core in sysfs when tty_register_driver() is
+ * called. This is to be used by drivers that have tty devices that can
+ * appear and disappear while the main tty driver is registered with the
+ * tty core.
+ *
+ * @TTY_DRIVER_DEVPTS_MEM:
+ * Don't use the standard arrays (&tty_driver.ttys and
+ * &tty_driver.termios), instead use dynamic memory keyed through the
+ * devpts filesystem. This is only applicable to the PTY driver.
+ *
+ * @TTY_DRIVER_HARDWARE_BREAK:
+ * Hardware handles break signals. Pass the requested timeout to the
+ * &tty_operations.break_ctl instead of using a simple on/off interface.
+ *
+ * @TTY_DRIVER_DYNAMIC_ALLOC:
+ * Do not allocate structures which are needed per line for this driver
+ * (&tty_driver.ports) as it would waste memory. The driver will take
+ * care. This is only applicable to the PTY driver.
+ *
+ * @TTY_DRIVER_UNNUMBERED_NODE:
+ * Do not create numbered ``/dev`` nodes. For example, create
+ * ``/dev/ttyprintk`` and not ``/dev/ttyprintk0``. Applicable only when a
+ * driver for a single tty device is being allocated.
+ */
+enum tty_driver_flag {
+ TTY_DRIVER_INSTALLED = BIT(0),
+ TTY_DRIVER_RESET_TERMIOS = BIT(1),
+ TTY_DRIVER_REAL_RAW = BIT(2),
+ TTY_DRIVER_DYNAMIC_DEV = BIT(3),
+ TTY_DRIVER_DEVPTS_MEM = BIT(4),
+ TTY_DRIVER_HARDWARE_BREAK = BIT(5),
+ TTY_DRIVER_DYNAMIC_ALLOC = BIT(6),
+ TTY_DRIVER_UNNUMBERED_NODE = BIT(7),
+};
+
+enum tty_driver_type {
+ TTY_DRIVER_TYPE_SYSTEM,
+ TTY_DRIVER_TYPE_CONSOLE,
+ TTY_DRIVER_TYPE_SERIAL,
+ TTY_DRIVER_TYPE_PTY,
+ TTY_DRIVER_TYPE_SCC,
+ TTY_DRIVER_TYPE_SYSCONS,
+};
+
+enum tty_driver_subtype {
+ SYSTEM_TYPE_TTY = 1,
+ SYSTEM_TYPE_CONSOLE,
+ SYSTEM_TYPE_SYSCONS,
+ SYSTEM_TYPE_SYSPTMX,
+
+ PTY_TYPE_MASTER = 1,
+ PTY_TYPE_SLAVE,
+
+ SERIAL_TYPE_NORMAL = 1,
+};
+
+/**
* struct tty_operations -- interface between driver and tty
*
* @lookup: ``struct tty_struct *()(struct tty_driver *self, struct file *,
@@ -414,8 +500,8 @@ struct tty_operations {
* @major: major /dev device number (zero for autoassignment)
* @minor_start: the first minor /dev device number
* @num: number of devices allocated
- * @type: type of tty driver (%TTY_DRIVER_TYPE_)
- * @subtype: subtype of tty driver (%SYSTEM_TYPE_, %PTY_TYPE_, %SERIAL_TYPE_)
+ * @type: type of tty driver (enum tty_driver_type)
+ * @subtype: subtype of tty driver (enum tty_driver_subtype)
* @init_termios: termios to set to each tty initially (e.g. %tty_std_termios)
* @flags: tty driver flags (%TTY_DRIVER_)
* @proc_entry: proc fs entry, used internally
@@ -447,8 +533,8 @@ struct tty_driver {
int major;
int minor_start;
unsigned int num;
- short type;
- short subtype;
+ enum tty_driver_type type;
+ enum tty_driver_subtype subtype;
struct ktermios init_termios;
unsigned long flags;
struct proc_dir_entry *proc_entry;
@@ -478,7 +564,13 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line);
void tty_driver_kref_put(struct tty_driver *driver);
-/* Use TTY_DRIVER_* flags below */
+/**
+ * tty_alloc_driver - allocate tty driver
+ * @lines: count of lines this driver can handle at most
+ * @flags: some of enum tty_driver_flag, will be set in driver->flags
+ *
+ * Returns: struct tty_driver or a PTR-encoded error (use IS_ERR() and friends).
+ */
#define tty_alloc_driver(lines, flags) \
__tty_alloc_driver(lines, THIS_MODULE, flags)
@@ -494,84 +586,6 @@ static inline void tty_set_operations(struct tty_driver *driver,
driver->ops = op;
}
-/**
- * DOC: TTY Driver Flags
- *
- * TTY_DRIVER_RESET_TERMIOS
- * Requests the tty layer to reset the termios setting when the last
- * process has closed the device. Used for PTYs, in particular.
- *
- * TTY_DRIVER_REAL_RAW
- * Indicates that the driver will guarantee not to set any special
- * character handling flags if this is set for the tty:
- *
- * ``(IGNBRK || (!BRKINT && !PARMRK)) && (IGNPAR || !INPCK)``
- *
- * That is, if there is no reason for the driver to
- * send notifications of parity and break characters up to the line
- * driver, it won't do so. This allows the line driver to optimize for
- * this case if this flag is set. (Note that there is also a promise, if
- * the above case is true, not to signal overruns, either.)
- *
- * TTY_DRIVER_DYNAMIC_DEV
- * The individual tty devices need to be registered with a call to
- * tty_register_device() when the device is found in the system and
- * unregistered with a call to tty_unregister_device() so the devices will
- * be show up properly in sysfs. If not set, all &tty_driver.num entries
- * will be created by the tty core in sysfs when tty_register_driver() is
- * called. This is to be used by drivers that have tty devices that can
- * appear and disappear while the main tty driver is registered with the
- * tty core.
- *
- * TTY_DRIVER_DEVPTS_MEM
- * Don't use the standard arrays (&tty_driver.ttys and
- * &tty_driver.termios), instead use dynamic memory keyed through the
- * devpts filesystem. This is only applicable to the PTY driver.
- *
- * TTY_DRIVER_HARDWARE_BREAK
- * Hardware handles break signals. Pass the requested timeout to the
- * &tty_operations.break_ctl instead of using a simple on/off interface.
- *
- * TTY_DRIVER_DYNAMIC_ALLOC
- * Do not allocate structures which are needed per line for this driver
- * (&tty_driver.ports) as it would waste memory. The driver will take
- * care. This is only applicable to the PTY driver.
- *
- * TTY_DRIVER_UNNUMBERED_NODE
- * Do not create numbered ``/dev`` nodes. For example, create
- * ``/dev/ttyprintk`` and not ``/dev/ttyprintk0``. Applicable only when a
- * driver for a single tty device is being allocated.
- */
-#define TTY_DRIVER_INSTALLED 0x0001
-#define TTY_DRIVER_RESET_TERMIOS 0x0002
-#define TTY_DRIVER_REAL_RAW 0x0004
-#define TTY_DRIVER_DYNAMIC_DEV 0x0008
-#define TTY_DRIVER_DEVPTS_MEM 0x0010
-#define TTY_DRIVER_HARDWARE_BREAK 0x0020
-#define TTY_DRIVER_DYNAMIC_ALLOC 0x0040
-#define TTY_DRIVER_UNNUMBERED_NODE 0x0080
-
-/* tty driver types */
-#define TTY_DRIVER_TYPE_SYSTEM 0x0001
-#define TTY_DRIVER_TYPE_CONSOLE 0x0002
-#define TTY_DRIVER_TYPE_SERIAL 0x0003
-#define TTY_DRIVER_TYPE_PTY 0x0004
-#define TTY_DRIVER_TYPE_SCC 0x0005 /* scc driver */
-#define TTY_DRIVER_TYPE_SYSCONS 0x0006
-
-/* system subtypes (magic, used by tty_io.c) */
-#define SYSTEM_TYPE_TTY 0x0001
-#define SYSTEM_TYPE_CONSOLE 0x0002
-#define SYSTEM_TYPE_SYSCONS 0x0003
-#define SYSTEM_TYPE_SYSPTMX 0x0004
-
-/* pty subtypes (magic, used by tty_io.c) */
-#define PTY_TYPE_MASTER 0x0001
-#define PTY_TYPE_SLAVE 0x0002
-
-/* serial subtype definitions */
-#define SERIAL_TYPE_NORMAL 1
-
int tty_register_driver(struct tty_driver *driver);
void tty_unregister_driver(struct tty_driver *driver);
struct device *tty_register_device(struct tty_driver *driver, unsigned index,
diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index af01e89074b2..c5cccc3fc1e8 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -39,7 +39,6 @@ do { \
int ldsem_down_read(struct ld_semaphore *sem, long timeout);
int ldsem_down_read_trylock(struct ld_semaphore *sem);
int ldsem_down_write(struct ld_semaphore *sem, long timeout);
-int ldsem_down_write_trylock(struct ld_semaphore *sem);
void ldsem_up_read(struct ld_semaphore *sem);
void ldsem_up_write(struct ld_semaphore *sem);
diff --git a/include/linux/types.h b/include/linux/types.h
index 1c509ce8f7f6..49b79c8bb1a9 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -92,6 +92,7 @@ typedef unsigned char unchar;
typedef unsigned short ushort;
typedef unsigned int uint;
typedef unsigned long ulong;
+typedef unsigned long long ullong;
#ifndef __BIT_TYPES_DEFINED__
#define __BIT_TYPES_DEFINED__
@@ -248,5 +249,17 @@ typedef void (*swap_func_t)(void *a, void *b, int size);
typedef int (*cmp_r_func_t)(const void *a, const void *b, const void *priv);
typedef int (*cmp_func_t)(const void *a, const void *b);
+/*
+ * rcuwait provides a way of blocking and waking up a single
+ * task in an rcu-safe manner.
+ *
+ * The only time @task is non-nil is when a user is blocked (or
+ * checking if it needs to) on a condition, and reset as soon as we
+ * know that the condition has succeeded and are awoken.
+ */
+struct rcuwait {
+ struct task_struct __rcu *task;
+};
+
#endif /* __ASSEMBLY__ */
#endif /* _LINUX_TYPES_H */
diff --git a/include/linux/usb.h b/include/linux/usb.h
index cfa8005e24f9..b46738701f8d 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -51,6 +51,7 @@ struct ep_device;
* @desc: descriptor for this endpoint, wMaxPacketSize in native byteorder
* @ss_ep_comp: SuperSpeed companion descriptor for this endpoint
* @ssp_isoc_ep_comp: SuperSpeedPlus isoc companion descriptor for this endpoint
+ * @eusb2_isoc_ep_comp: eUSB2 isoc companion descriptor for this endpoint
* @urb_list: urbs queued to this endpoint; maintained by usbcore
* @hcpriv: for use by HCD; typically holds hardware dma queue head (QH)
* with one or more transfer descriptors (TDs) per urb
@@ -64,9 +65,10 @@ struct ep_device;
* descriptor within an active interface in a given USB configuration.
*/
struct usb_host_endpoint {
- struct usb_endpoint_descriptor desc;
- struct usb_ss_ep_comp_descriptor ss_ep_comp;
- struct usb_ssp_isoc_ep_comp_descriptor ssp_isoc_ep_comp;
+ struct usb_endpoint_descriptor desc;
+ struct usb_ss_ep_comp_descriptor ss_ep_comp;
+ struct usb_ssp_isoc_ep_comp_descriptor ssp_isoc_ep_comp;
+ struct usb_eusb2_isoc_ep_comp_descriptor eusb2_isoc_ep_comp;
struct list_head urb_list;
void *hcpriv;
struct ep_device *ep_dev; /* For sysfs info */
diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h
index 3963e55e88a3..fbdef950f06c 100644
--- a/include/linux/usb/musb.h
+++ b/include/linux/usb/musb.h
@@ -61,7 +61,7 @@ struct musb_hdrc_eps_bits {
};
struct musb_hdrc_config {
- struct musb_fifo_cfg *fifo_cfg; /* board fifo configuration */
+ const struct musb_fifo_cfg *fifo_cfg; /* board fifo configuration */
unsigned fifo_cfg_size; /* size of the fifo configuration */
/* MUSB configuration-specific details */
diff --git a/include/linux/usb/ulpi.h b/include/linux/usb/ulpi.h
index 5050f502c1ed..4b651065738a 100644
--- a/include/linux/usb/ulpi.h
+++ b/include/linux/usb/ulpi.h
@@ -49,19 +49,10 @@
/*-------------------------------------------------------------------------*/
#if IS_ENABLED(CONFIG_USB_ULPI)
-struct usb_phy *otg_ulpi_create(struct usb_phy_io_ops *ops,
- unsigned int flags);
-
struct usb_phy *devm_otg_ulpi_create(struct device *dev,
struct usb_phy_io_ops *ops,
unsigned int flags);
#else
-static inline struct usb_phy *otg_ulpi_create(struct usb_phy_io_ops *ops,
- unsigned int flags)
-{
- return NULL;
-}
-
static inline struct usb_phy *devm_otg_ulpi_create(struct device *dev,
struct usb_phy_io_ops *ops,
unsigned int flags)
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 7183e5aca282..a0bb6d012137 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -5,8 +5,10 @@
#include <linux/kref.h>
#include <linux/nsproxy.h>
#include <linux/ns_common.h>
+#include <linux/rculist_nulls.h>
#include <linux/sched.h>
#include <linux/workqueue.h>
+#include <linux/rcuref.h>
#include <linux/rwsem.h>
#include <linux/sysctl.h>
#include <linux/err.h>
@@ -115,10 +117,11 @@ struct user_namespace {
} __randomize_layout;
struct ucounts {
- struct hlist_node node;
+ struct hlist_nulls_node node;
struct user_namespace *ns;
kuid_t uid;
- atomic_t count;
+ struct rcu_head rcu;
+ rcuref_t count;
atomic_long_t ucount[UCOUNT_COUNTS];
atomic_long_t rlimit[UCOUNT_RLIMIT_COUNTS];
};
@@ -131,9 +134,15 @@ void retire_userns_sysctls(struct user_namespace *ns);
struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
void dec_ucount(struct ucounts *ucounts, enum ucount_type type);
struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid);
-struct ucounts * __must_check get_ucounts(struct ucounts *ucounts);
void put_ucounts(struct ucounts *ucounts);
+static inline struct ucounts * __must_check get_ucounts(struct ucounts *ucounts)
+{
+ if (rcuref_get(&ucounts->count))
+ return ucounts;
+ return NULL;
+}
+
static inline long get_rlimit_value(struct ucounts *ucounts, enum rlimit_type type)
{
return atomic_long_read(&ucounts->rlimit[type]);
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 000a6cab2d31..707b00772ce1 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -67,6 +67,7 @@ struct vfio_device {
struct inode *inode;
#if IS_ENABLED(CONFIG_IOMMUFD)
struct iommufd_device *iommufd_device;
+ struct ida pasids;
u8 iommufd_attached:1;
#endif
u8 cdev_opened:1;
@@ -91,6 +92,8 @@ struct vfio_device {
* bound iommufd. Undo in unbind_iommufd if @detach_ioas is not
* called.
* @detach_ioas: Opposite of attach_ioas
+ * @pasid_attach_ioas: The pasid variation of attach_ioas
+ * @pasid_detach_ioas: Opposite of pasid_attach_ioas
* @open_device: Called when the first file descriptor is opened for this device
* @close_device: Opposite of open_device
* @read: Perform read(2) on device file descriptor
@@ -115,6 +118,9 @@ struct vfio_device_ops {
void (*unbind_iommufd)(struct vfio_device *vdev);
int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id);
void (*detach_ioas)(struct vfio_device *vdev);
+ int (*pasid_attach_ioas)(struct vfio_device *vdev, u32 pasid,
+ u32 *pt_id);
+ void (*pasid_detach_ioas)(struct vfio_device *vdev, u32 pasid);
int (*open_device)(struct vfio_device *vdev);
void (*close_device)(struct vfio_device *vdev);
ssize_t (*read)(struct vfio_device *vdev, char __user *buf,
@@ -139,6 +145,10 @@ int vfio_iommufd_physical_bind(struct vfio_device *vdev,
void vfio_iommufd_physical_unbind(struct vfio_device *vdev);
int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
void vfio_iommufd_physical_detach_ioas(struct vfio_device *vdev);
+int vfio_iommufd_physical_pasid_attach_ioas(struct vfio_device *vdev,
+ u32 pasid, u32 *pt_id);
+void vfio_iommufd_physical_pasid_detach_ioas(struct vfio_device *vdev,
+ u32 pasid);
int vfio_iommufd_emulated_bind(struct vfio_device *vdev,
struct iommufd_ctx *ictx, u32 *out_device_id);
void vfio_iommufd_emulated_unbind(struct vfio_device *vdev);
@@ -166,6 +176,10 @@ vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx)
((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL)
#define vfio_iommufd_physical_detach_ioas \
((void (*)(struct vfio_device *vdev)) NULL)
+#define vfio_iommufd_physical_pasid_attach_ioas \
+ ((int (*)(struct vfio_device *vdev, u32 pasid, u32 *pt_id)) NULL)
+#define vfio_iommufd_physical_pasid_detach_ioas \
+ ((void (*)(struct vfio_device *vdev, u32 pasid)) NULL)
#define vfio_iommufd_emulated_bind \
((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \
u32 *out_device_id)) NULL)
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 5a37cb2b6f93..9e15a088ba38 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -41,9 +41,11 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
PGSTEAL_KSWAPD,
PGSTEAL_DIRECT,
PGSTEAL_KHUGEPAGED,
+ PGSTEAL_PROACTIVE,
PGSCAN_KSWAPD,
PGSCAN_DIRECT,
PGSCAN_KHUGEPAGED,
+ PGSCAN_PROACTIVE,
PGSCAN_DIRECT_THROTTLE,
PGSCAN_ANON,
PGSCAN_FILE,
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 4751e3ecc467..b2ccb6845595 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -504,7 +504,7 @@ static inline const char *node_stat_name(enum node_stat_item item)
static inline const char *lru_list_name(enum lru_list lru)
{
- return node_stat_name(NR_LRU_BASE + (enum node_stat_item)lru) + 3; // skip "nr_"
+ return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
}
#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index caf4f0b12235..eda4b62511f7 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -313,6 +313,30 @@ static inline void cgroup_writeback_umount(struct super_block *sb)
/*
* mm/page-writeback.c
*/
+/* consolidated parameters for balance_dirty_pages() and its subroutines */
+struct dirty_throttle_control {
+#ifdef CONFIG_CGROUP_WRITEBACK
+ struct wb_domain *dom;
+ struct dirty_throttle_control *gdtc; /* only set in memcg dtc's */
+#endif
+ struct bdi_writeback *wb;
+ struct fprop_local_percpu *wb_completions;
+
+ unsigned long avail; /* dirtyable */
+ unsigned long dirty; /* file_dirty + write + nfs */
+ unsigned long thresh; /* dirty threshold */
+ unsigned long bg_thresh; /* dirty background threshold */
+ unsigned long limit; /* hard dirty limit */
+
+ unsigned long wb_dirty; /* per-wb counterparts */
+ unsigned long wb_thresh;
+ unsigned long wb_bg_thresh;
+
+ unsigned long pos_ratio;
+ bool freerun;
+ bool dirty_exceeded;
+};
+
void laptop_io_completion(struct backing_dev_info *info);
void laptop_sync_completion(void);
void laptop_mode_timer_fn(struct timer_list *t);
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 0b618ec04115..78eede109b1a 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -1555,6 +1555,8 @@ int xa_get_order(struct xarray *, unsigned long index);
int xas_get_order(struct xa_state *xas);
void xas_split(struct xa_state *, void *entry, unsigned int order);
void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t);
+void xas_try_split(struct xa_state *xas, void *entry, unsigned int order);
+unsigned int xas_try_split_min_order(unsigned int order);
#else
static inline int xa_get_order(struct xarray *xa, unsigned long index)
{
@@ -1576,6 +1578,17 @@ static inline void xas_split_alloc(struct xa_state *xas, void *entry,
unsigned int order, gfp_t gfp)
{
}
+
+static inline void xas_try_split(struct xa_state *xas, void *entry,
+ unsigned int order)
+{
+}
+
+static inline unsigned int xas_try_split_min_order(unsigned int order)
+{
+ return 0;
+}
+
#endif
/**
diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index a67d62b79698..52f30e526607 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -4,9 +4,8 @@
*
* Copyright (C) 2014 Dan Streetman
*
- * This is a common frontend for the zbud and zsmalloc memory
- * storage pool implementations. Typically, this is used to
- * store compressed memory.
+ * This is a common frontend for the zswap compressed memory storage
+ * implementations.
*/
#ifndef _ZPOOL_H_
@@ -14,25 +13,6 @@
struct zpool;
-/*
- * Control how a handle is mapped. It will be ignored if the
- * implementation does not support it. Its use is optional.
- * Note that this does not refer to memory protection, it
- * refers to how the memory will be copied in/out if copying
- * is necessary during mapping; read-write is the safest as
- * it copies the existing memory in on map, and copies the
- * changed memory back out on unmap. Write-only does not copy
- * in the memory and should only be used for initialization.
- * If in doubt, use ZPOOL_MM_DEFAULT which is read-write.
- */
-enum zpool_mapmode {
- ZPOOL_MM_RW, /* normal read-write mapping */
- ZPOOL_MM_RO, /* read-only (no copy-out at unmap time) */
- ZPOOL_MM_WO, /* write-only (no copy-in at map time) */
-
- ZPOOL_MM_DEFAULT = ZPOOL_MM_RW
-};
-
bool zpool_has_pool(char *type);
struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp);
@@ -41,17 +21,19 @@ const char *zpool_get_type(struct zpool *pool);
void zpool_destroy_pool(struct zpool *pool);
-bool zpool_malloc_support_movable(struct zpool *pool);
-
int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp,
unsigned long *handle);
void zpool_free(struct zpool *pool, unsigned long handle);
-void *zpool_map_handle(struct zpool *pool, unsigned long handle,
- enum zpool_mapmode mm);
+void *zpool_obj_read_begin(struct zpool *zpool, unsigned long handle,
+ void *local_copy);
+
+void zpool_obj_read_end(struct zpool *zpool, unsigned long handle,
+ void *handle_mem);
-void zpool_unmap_handle(struct zpool *pool, unsigned long handle);
+void zpool_obj_write(struct zpool *zpool, unsigned long handle,
+ void *handle_mem, size_t mem_len);
u64 zpool_get_total_pages(struct zpool *pool);
@@ -81,15 +63,16 @@ struct zpool_driver {
void *(*create)(const char *name, gfp_t gfp);
void (*destroy)(void *pool);
- bool malloc_support_movable;
int (*malloc)(void *pool, size_t size, gfp_t gfp,
unsigned long *handle);
void (*free)(void *pool, unsigned long handle);
- bool sleep_mapped;
- void *(*map)(void *pool, unsigned long handle,
- enum zpool_mapmode mm);
- void (*unmap)(void *pool, unsigned long handle);
+ void *(*obj_read_begin)(void *pool, unsigned long handle,
+ void *local_copy);
+ void (*obj_read_end)(void *pool, unsigned long handle,
+ void *handle_mem);
+ void (*obj_write)(void *pool, unsigned long handle,
+ void *handle_mem, size_t mem_len);
u64 (*total_pages)(void *pool);
};
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index a48cd0ffe57d..c26baf9fb331 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -16,23 +16,6 @@
#include <linux/types.h>
-/*
- * zsmalloc mapping modes
- *
- * NOTE: These only make a difference when a mapped object spans pages.
- */
-enum zs_mapmode {
- ZS_MM_RW, /* normal read-write mapping */
- ZS_MM_RO, /* read-only (no copy-out at unmap time) */
- ZS_MM_WO /* write-only (no copy-in at map time) */
- /*
- * NOTE: ZS_MM_WO should only be used for initializing new
- * (uninitialized) allocations. Partial writes to already
- * initialized allocations should use ZS_MM_RW to preserve the
- * existing data.
- */
-};
-
struct zs_pool_stats {
/* How many pages were migrated (freed) */
atomic_long_t pages_compacted;
@@ -48,14 +31,18 @@ void zs_free(struct zs_pool *pool, unsigned long obj);
size_t zs_huge_class_size(struct zs_pool *pool);
-void *zs_map_object(struct zs_pool *pool, unsigned long handle,
- enum zs_mapmode mm);
-void zs_unmap_object(struct zs_pool *pool, unsigned long handle);
-
unsigned long zs_get_total_pages(struct zs_pool *pool);
unsigned long zs_compact(struct zs_pool *pool);
unsigned int zs_lookup_class_index(struct zs_pool *pool, unsigned int size);
void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats);
+
+void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
+ void *local_copy);
+void zs_obj_read_end(struct zs_pool *pool, unsigned long handle,
+ void *handle_mem);
+void zs_obj_write(struct zs_pool *pool, unsigned long handle,
+ void *handle_mem, size_t mem_len);
+
#endif
diff --git a/include/linux/zswap.h b/include/linux/zswap.h
index d961ead91bf1..30c193a1207e 100644
--- a/include/linux/zswap.h
+++ b/include/linux/zswap.h
@@ -26,7 +26,7 @@ struct zswap_lruvec_state {
unsigned long zswap_total_pages(void);
bool zswap_store(struct folio *folio);
-bool zswap_load(struct folio *folio);
+int zswap_load(struct folio *folio);
void zswap_invalidate(swp_entry_t swp);
int zswap_swapon(int type, unsigned long nr_pages);
void zswap_swapoff(int type);
@@ -44,9 +44,9 @@ static inline bool zswap_store(struct folio *folio)
return false;
}
-static inline bool zswap_load(struct folio *folio)
+static inline int zswap_load(struct folio *folio)
{
- return false;
+ return -ENOENT;
}
static inline void zswap_invalidate(swp_entry_t swp) {}