summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/acpi.h1
-rw-r--r--include/linux/align.h10
-rw-r--r--include/linux/arm-smccc.h15
-rw-r--r--include/linux/binfmts.h2
-rw-r--r--include/linux/bitmap.h8
-rw-r--r--include/linux/bits.h2
-rw-r--r--include/linux/blkdev.h8
-rw-r--r--include/linux/cache.h9
-rw-r--r--include/linux/cfi.h2
-rw-r--r--include/linux/cgroup-defs.h5
-rw-r--r--include/linux/cgroup.h3
-rw-r--r--include/linux/cleanup.h39
-rw-r--r--include/linux/compiler.h28
-rw-r--r--include/linux/compiler_types.h23
-rw-r--r--include/linux/cpufreq.h26
-rw-r--r--include/linux/cpumask.h71
-rw-r--r--include/linux/cpuset.h11
-rw-r--r--include/linux/dcache.h39
-rw-r--r--include/linux/device.h9
-rw-r--r--include/linux/dma-direct.h13
-rw-r--r--include/linux/edac.h215
-rw-r--r--include/linux/energy_model.h22
-rw-r--r--include/linux/eventpoll.h4
-rw-r--r--include/linux/execmem.h31
-rw-r--r--include/linux/fanotify.h12
-rw-r--r--include/linux/file_ref.h48
-rw-r--r--include/linux/fs.h62
-rw-r--r--include/linux/fs_context.h2
-rw-r--r--include/linux/fscrypt.h12
-rw-r--r--include/linux/fsnotify.h20
-rw-r--r--include/linux/fsnotify_backend.h42
-rw-r--r--include/linux/hrtimer.h8
-rw-r--r--include/linux/hyperv.h57
-rw-r--r--include/linux/idr.h17
-rw-r--r--include/linux/interrupt.h16
-rw-r--r--include/linux/iomap.h116
-rw-r--r--include/linux/irq.h7
-rw-r--r--include/linux/irqchip/irq-davinci-cp-intc.h25
-rw-r--r--include/linux/irqdomain.h139
-rw-r--r--include/linux/kexec.h2
-rw-r--r--include/linux/key.h1
-rw-r--r--include/linux/kstrtox.h1
-rw-r--r--include/linux/kvm_host.h2
-rw-r--r--include/linux/lsm_audit.h2
-rw-r--r--include/linux/lsm_hook_defs.h3
-rw-r--r--include/linux/mem_encrypt.h23
-rw-r--r--include/linux/misc_cgroup.h6
-rw-r--r--include/linux/mm.h2
-rw-r--r--include/linux/mnt_idmapping.h5
-rw-r--r--include/linux/mod_devicetable.h2
-rw-r--r--include/linux/module.h16
-rw-r--r--include/linux/moduleloader.h4
-rw-r--r--include/linux/msi.h26
-rw-r--r--include/linux/namei.h45
-rw-r--r--include/linux/nfs_xdr.h2
-rw-r--r--include/linux/nmi.h4
-rw-r--r--include/linux/nodemask.h8
-rw-r--r--include/linux/nodemask_types.h11
-rw-r--r--include/linux/numa.h17
-rw-r--r--include/linux/objpool.h7
-rw-r--r--include/linux/objtool.h4
-rw-r--r--include/linux/page-flags.h18
-rw-r--r--include/linux/pagemap.h48
-rw-r--r--include/linux/percpu-defs.h17
-rw-r--r--include/linux/percpu-rwsem.h8
-rw-r--r--include/linux/perf/arm_pmu.h17
-rw-r--r--include/linux/perf_event.h102
-rw-r--r--include/linux/pid.h7
-rw-r--r--include/linux/pidfs.h1
-rw-r--r--include/linux/pipe_fs_i.h2
-rw-r--r--include/linux/platform_profile.h2
-rw-r--r--include/linux/pm.h9
-rw-r--r--include/linux/pm_clock.h5
-rw-r--r--include/linux/pm_runtime.h33
-rw-r--r--include/linux/pm_wakeup.h6
-rw-r--r--include/linux/pnp.h2
-rw-r--r--include/linux/posix-timers.h30
-rw-r--r--include/linux/preempt.h3
-rw-r--r--include/linux/printk.h6
-rw-r--r--include/linux/rcupdate.h58
-rw-r--r--include/linux/rcupdate_wait.h3
-rw-r--r--include/linux/rcutiny.h36
-rw-r--r--include/linux/rcutree.h5
-rw-r--r--include/linux/resctrl.h212
-rw-r--r--include/linux/resctrl_types.h54
-rw-r--r--include/linux/sched.h7
-rw-r--r--include/linux/sched/deadline.h4
-rw-r--r--include/linux/sched/debug.h2
-rw-r--r--include/linux/sched/ext.h1
-rw-r--r--include/linux/sched/idle.h23
-rw-r--r--include/linux/sched/mm.h7
-rw-r--r--include/linux/sched/signal.h3
-rw-r--r--include/linux/sched/topology.h14
-rw-r--r--include/linux/seccomp.h12
-rw-r--r--include/linux/security.h10
-rw-r--r--include/linux/sizes.h8
-rw-r--r--include/linux/slab.h16
-rw-r--r--include/linux/srcu.h102
-rw-r--r--include/linux/srcutiny.h29
-rw-r--r--include/linux/srcutree.h98
-rw-r--r--include/linux/string.h16
-rw-r--r--include/linux/string_choices.h24
-rw-r--r--include/linux/syscalls.h8
-rw-r--r--include/linux/sysv_fs.h214
-rw-r--r--include/linux/thread_info.h48
-rw-r--r--include/linux/time_namespace.h2
-rw-r--r--include/linux/topology.h53
-rw-r--r--include/linux/torture.h1
-rw-r--r--include/linux/uaccess.h2
-rw-r--r--include/linux/ucopysize.h63
-rw-r--r--include/linux/uidgid.h6
-rw-r--r--include/linux/uio.h2
-rw-r--r--include/linux/uprobes.h3
-rw-r--r--include/linux/vdso_datastore.h10
-rw-r--r--include/linux/vfsdebug.h45
-rw-r--r--include/linux/vm_event_item.h2
-rw-r--r--include/linux/vmcore_info.h3
-rw-r--r--include/linux/wait.h3
118 files changed, 1896 insertions, 1001 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 4e495b29c640..a70e62d69dc7 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -330,7 +330,6 @@ static inline bool acpi_sci_irq_valid(void)
}
extern int sbf_port;
-extern unsigned long acpi_realmode_flags;
int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity);
int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
diff --git a/include/linux/align.h b/include/linux/align.h
index 2b4acec7b95a..55debf105a5d 100644
--- a/include/linux/align.h
+++ b/include/linux/align.h
@@ -2,14 +2,6 @@
#ifndef _LINUX_ALIGN_H
#define _LINUX_ALIGN_H
-#include <linux/const.h>
-
-/* @a is a power of 2 value */
-#define ALIGN(x, a) __ALIGN_KERNEL((x), (a))
-#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a))
-#define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask))
-#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a)))
-#define PTR_ALIGN_DOWN(p, a) ((typeof(p))ALIGN_DOWN((unsigned long)(p), (a)))
-#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0)
+#include <vdso/align.h>
#endif /* _LINUX_ALIGN_H */
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index 67f6fdf2e7cd..f19be5754090 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -179,6 +179,9 @@
#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_62 62
#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_63 63
/* End of pKVM hypercall range */
+#define ARM_SMCCC_KVM_FUNC_DISCOVER_IMPL_VER 64
+#define ARM_SMCCC_KVM_FUNC_DISCOVER_IMPL_CPUS 65
+
#define ARM_SMCCC_KVM_FUNC_FEATURES_2 127
#define ARM_SMCCC_KVM_NUM_FUNCS 128
@@ -225,6 +228,18 @@
ARM_SMCCC_OWNER_VENDOR_HYP, \
ARM_SMCCC_KVM_FUNC_MMIO_GUARD)
+#define ARM_SMCCC_VENDOR_HYP_KVM_DISCOVER_IMPL_VER_FUNC_ID \
+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_64, \
+ ARM_SMCCC_OWNER_VENDOR_HYP, \
+ ARM_SMCCC_KVM_FUNC_DISCOVER_IMPL_VER)
+
+#define ARM_SMCCC_VENDOR_HYP_KVM_DISCOVER_IMPL_CPUS_FUNC_ID \
+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_64, \
+ ARM_SMCCC_OWNER_VENDOR_HYP, \
+ ARM_SMCCC_KVM_FUNC_DISCOVER_IMPL_CPUS)
+
/* ptp_kvm counter type ID */
#define KVM_PTP_VIRT_COUNTER 0
#define KVM_PTP_PHYS_COUNTER 1
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 60d674af3080..1625c8529e70 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -64,7 +64,7 @@ struct linux_binprm {
const char *fdpath; /* generated filename for execveat */
unsigned interp_flags;
int execfd; /* File descriptor of the executable */
- unsigned long loader, exec;
+ unsigned long exec;
struct rlimit rlim_stack; /* Saved RLIMIT_STACK used during exec. */
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 2026953e2c4e..595217b7a6e7 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -560,9 +560,9 @@ void bitmap_replace(unsigned long *dst,
* ...0..11...0..10
* dst: 0000001100000010
*
- * A relationship exists between bitmap_scatter() and bitmap_gather().
+ * A relationship exists between bitmap_scatter() and bitmap_gather(). See
+ * bitmap_gather() for the bitmap gather detailed operations. TL;DR:
* bitmap_gather() can be seen as the 'reverse' bitmap_scatter() operation.
- * See bitmap_scatter() for details related to this relationship.
*/
static __always_inline
void bitmap_scatter(unsigned long *dst, const unsigned long *src,
@@ -608,7 +608,9 @@ void bitmap_scatter(unsigned long *dst, const unsigned long *src,
* dst: 0000000000011010
*
* A relationship exists between bitmap_gather() and bitmap_scatter(). See
- * bitmap_scatter() for the bitmap scatter detailed operations.
+ * bitmap_scatter() for the bitmap scatter detailed operations. TL;DR:
+ * bitmap_scatter() can be seen as the 'reverse' bitmap_gather() operation.
+ *
* Suppose scattered computed using bitmap_scatter(scattered, src, mask, n).
* The operation bitmap_gather(result, scattered, mask, n) leads to a result
* equal or equivalent to src.
diff --git a/include/linux/bits.h b/include/linux/bits.h
index 61a75d3f294b..14fd0ca9a6cd 100644
--- a/include/linux/bits.h
+++ b/include/linux/bits.h
@@ -40,7 +40,7 @@
* Missing asm support
*
* __GENMASK_U128() depends on _BIT128() which would not work
- * in the asm code, as it shifts an 'unsigned __init128' data
+ * in the asm code, as it shifts an 'unsigned __int128' data
* type instead of direct representation of 128 bit constants
* such as long and unsigned long. The fundamental problem is
* that a 128 bit constant will get silently truncated by the
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d37751789bf5..1c0cf6af392c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -268,10 +268,16 @@ static inline dev_t disk_devt(struct gendisk *disk)
return MKDEV(disk->major, disk->first_minor);
}
+/*
+ * We should strive for 1 << (PAGE_SHIFT + MAX_PAGECACHE_ORDER)
+ * however we constrain this to what we can validate and test.
+ */
+#define BLK_MAX_BLOCK_SIZE SZ_64K
+
/* blk_validate_limits() validates bsize, so drivers don't usually need to */
static inline int blk_validate_block_size(unsigned long bsize)
{
- if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
+ if (bsize < 512 || bsize > BLK_MAX_BLOCK_SIZE || !is_power_of_2(bsize))
return -EINVAL;
return 0;
diff --git a/include/linux/cache.h b/include/linux/cache.h
index ca2a05682a54..e69768f50d53 100644
--- a/include/linux/cache.h
+++ b/include/linux/cache.h
@@ -3,16 +3,13 @@
#define __LINUX_CACHE_H
#include <uapi/linux/kernel.h>
+#include <vdso/cache.h>
#include <asm/cache.h>
#ifndef L1_CACHE_ALIGN
#define L1_CACHE_ALIGN(x) __ALIGN_KERNEL(x, L1_CACHE_BYTES)
#endif
-#ifndef SMP_CACHE_BYTES
-#define SMP_CACHE_BYTES L1_CACHE_BYTES
-#endif
-
/**
* SMP_CACHE_ALIGN - align a value to the L2 cacheline size
* @x: value to align
@@ -63,10 +60,6 @@
#define __ro_after_init __section(".data..ro_after_init")
#endif
-#ifndef ____cacheline_aligned
-#define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
-#endif
-
#ifndef ____cacheline_aligned_in_smp
#ifdef CONFIG_SMP
#define ____cacheline_aligned_in_smp ____cacheline_aligned
diff --git a/include/linux/cfi.h b/include/linux/cfi.h
index f0df518e11dd..1db17ecbb86c 100644
--- a/include/linux/cfi.h
+++ b/include/linux/cfi.h
@@ -11,6 +11,8 @@
#include <linux/module.h>
#include <asm/cfi.h>
+extern bool cfi_warn;
+
#ifndef cfi_get_offset
static inline int cfi_get_offset(void)
{
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 17960a1e858d..485b651869d9 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -619,9 +619,8 @@ struct cgroup_root {
*/
struct cftype {
/*
- * By convention, the name should begin with the name of the
- * subsystem, followed by a period. Zero length string indicates
- * end of cftype array.
+ * Name of the subsystem is prepended in cgroup_file_name().
+ * Zero length string indicates end of cftype array.
*/
char name[MAX_CFTYPE_NAME];
unsigned long private;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f8ef47f8a634..28e999f2c642 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -113,6 +113,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
+int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_rm_cftypes(struct cftype *cfts);
void cgroup_file_notify(struct cgroup_file *cfile);
void cgroup_file_show(struct cgroup_file *cfile, bool show);
@@ -689,8 +690,6 @@ static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
*/
void cgroup_rstat_updated(struct cgroup *cgrp, int cpu);
void cgroup_rstat_flush(struct cgroup *cgrp);
-void cgroup_rstat_flush_hold(struct cgroup *cgrp);
-void cgroup_rstat_flush_release(struct cgroup *cgrp);
/*
* Basic resource stats.
diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index ee2614adb785..2b32a5759b22 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -216,6 +216,23 @@ const volatile void * __must_check_fn(const volatile void *val)
#define return_ptr(p) return no_free_ptr(p)
+/*
+ * Only for situations where an allocation is handed in to another function
+ * and consumed by that function on success.
+ *
+ * struct foo *f __free(kfree) = kzalloc(sizeof(*f), GFP_KERNEL);
+ *
+ * setup(f);
+ * if (some_condition)
+ * return -EINVAL;
+ * ....
+ * ret = bar(f);
+ * if (!ret)
+ * retain_ptr(f);
+ * return ret;
+ */
+#define retain_ptr(p) \
+ __get_and_null(p, NULL)
/*
* DEFINE_CLASS(name, type, exit, init, init_args...):
@@ -291,11 +308,21 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \
#define __DEFINE_CLASS_IS_CONDITIONAL(_name, _is_cond) \
static __maybe_unused const bool class_##_name##_is_conditional = _is_cond
-#define DEFINE_GUARD(_name, _type, _lock, _unlock) \
+#define __DEFINE_GUARD_LOCK_PTR(_name, _exp) \
+ static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \
+ { return (void *)(__force unsigned long)*(_exp); }
+
+#define DEFINE_CLASS_IS_GUARD(_name) \
__DEFINE_CLASS_IS_CONDITIONAL(_name, false); \
+ __DEFINE_GUARD_LOCK_PTR(_name, _T)
+
+#define DEFINE_CLASS_IS_COND_GUARD(_name) \
+ __DEFINE_CLASS_IS_CONDITIONAL(_name, true); \
+ __DEFINE_GUARD_LOCK_PTR(_name, _T)
+
+#define DEFINE_GUARD(_name, _type, _lock, _unlock) \
DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \
- static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \
- { return (void *)(__force unsigned long)*_T; }
+ DEFINE_CLASS_IS_GUARD(_name)
#define DEFINE_GUARD_COND(_name, _ext, _condlock) \
__DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \
@@ -375,11 +402,7 @@ static inline void class_##_name##_destructor(class_##_name##_t *_T) \
if (_T->lock) { _unlock; } \
} \
\
-static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \
-{ \
- return (void *)(__force unsigned long)_T->lock; \
-}
-
+__DEFINE_GUARD_LOCK_PTR(_name, &_T->lock)
#define __DEFINE_LOCK_GUARD_1(_name, _type, _lock) \
static inline class_##_name##_t class_##_name##_constructor(_type *l) \
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 155385754824..9fc30b6b80c9 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -206,12 +206,38 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
#define __must_be_byte_array(a) __BUILD_BUG_ON_ZERO_MSG(!__is_byte_array(a), \
"must be byte array")
+/*
+ * If the "nonstring" attribute isn't available, we have to return true
+ * so the __must_*() checks pass when "nonstring" isn't supported.
+ */
+#if __has_attribute(__nonstring__) && defined(__annotated)
+#define __is_cstr(a) (!__annotated(a, nonstring))
+#define __is_noncstr(a) (__annotated(a, nonstring))
+#else
+#define __is_cstr(a) (true)
+#define __is_noncstr(a) (true)
+#endif
+
/* Require C Strings (i.e. NUL-terminated) lack the "nonstring" attribute. */
#define __must_be_cstr(p) \
- __BUILD_BUG_ON_ZERO_MSG(__annotated(p, nonstring), "must be cstr (NUL-terminated)")
+ __BUILD_BUG_ON_ZERO_MSG(!__is_cstr(p), \
+ "must be C-string (NUL-terminated)")
+#define __must_be_noncstr(p) \
+ __BUILD_BUG_ON_ZERO_MSG(!__is_noncstr(p), \
+ "must be non-C-string (not NUL-terminated)")
#endif /* __KERNEL__ */
+#if defined(CONFIG_CFI_CLANG) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+/*
+ * Force a reference to the external symbol so the compiler generates
+ * __kcfi_typid.
+ */
+#define KCFI_REFERENCE(sym) __ADDRESSABLE(sym)
+#else
+#define KCFI_REFERENCE(sym)
+#endif
+
/**
* offset_to_ptr - convert a relative memory offset to an absolute pointer
* @off: the address of the 32-bit offset value
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 981cc3d7e3aa..e09d323be845 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -349,6 +349,18 @@ struct ftrace_likely_data {
#endif
/*
+ * Optional: only supported since gcc >= 15
+ * Optional: not supported by Clang
+ *
+ * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117178
+ */
+#ifdef CONFIG_CC_HAS_MULTIDIMENSIONAL_NONSTRING
+# define __nonstring_array __attribute__((__nonstring__))
+#else
+# define __nonstring_array
+#endif
+
+/*
* Apply __counted_by() when the Endianness matches to increase test coverage.
*/
#ifdef __LITTLE_ENDIAN
@@ -360,7 +372,7 @@ struct ftrace_likely_data {
#endif
/* Do not trap wrapping arithmetic within an annotated function. */
-#ifdef CONFIG_UBSAN_SIGNED_WRAP
+#ifdef CONFIG_UBSAN_INTEGER_WRAP
# define __signed_wrap __attribute__((no_sanitize("signed-integer-overflow")))
#else
# define __signed_wrap
@@ -446,11 +458,14 @@ struct ftrace_likely_data {
#define __member_size(p) __builtin_object_size(p, 1)
#endif
-/* Determine if an attribute has been applied to a variable. */
+/*
+ * Determine if an attribute has been applied to a variable.
+ * Using __annotated needs to check for __annotated being available,
+ * or negative tests may fail when annotation cannot be checked. For
+ * example, see the definition of __is_cstr().
+ */
#if __has_builtin(__builtin_has_attribute)
#define __annotated(var, attr) __builtin_has_attribute(var, attr)
-#else
-#define __annotated(var, attr) (false)
#endif
/*
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 7fe0981a7e46..400fee6427a5 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -144,6 +144,9 @@ struct cpufreq_policy {
/* Per policy boost enabled flag. */
bool boost_enabled;
+ /* Per policy boost supported flag. */
+ bool boost_supported;
+
/* Cached frequency lookup from cpufreq_driver_resolve_freq. */
unsigned int cached_target_freq;
unsigned int cached_resolved_idx;
@@ -210,6 +213,9 @@ static inline struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
static inline void cpufreq_cpu_put(struct cpufreq_policy *policy) { }
#endif
+/* Scope based cleanup macro for cpufreq_policy kobject reference counting */
+DEFINE_FREE(put_cpufreq_policy, struct cpufreq_policy *, if (_T) cpufreq_cpu_put(_T))
+
static inline bool policy_is_inactive(struct cpufreq_policy *policy)
{
return cpumask_empty(policy->cpus);
@@ -778,10 +784,8 @@ int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy,
ssize_t cpufreq_show_cpus(const struct cpumask *mask, char *buf);
#ifdef CONFIG_CPU_FREQ
-int cpufreq_boost_trigger_state(int state);
bool cpufreq_boost_enabled(void);
-int cpufreq_enable_boost_support(void);
-bool policy_has_boost_freq(struct cpufreq_policy *policy);
+int cpufreq_boost_set_sw(struct cpufreq_policy *policy, int state);
/* Find lowest freq at or above target in a table in ascending order */
static inline int cpufreq_table_find_index_al(struct cpufreq_policy *policy,
@@ -1150,23 +1154,14 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_
return 0;
}
#else
-static inline int cpufreq_boost_trigger_state(int state)
-{
- return 0;
-}
static inline bool cpufreq_boost_enabled(void)
{
return false;
}
-static inline int cpufreq_enable_boost_support(void)
+static inline int cpufreq_boost_set_sw(struct cpufreq_policy *policy, int state)
{
- return -EINVAL;
-}
-
-static inline bool policy_has_boost_freq(struct cpufreq_policy *policy)
-{
- return false;
+ return -EOPNOTSUPP;
}
static inline int
@@ -1184,7 +1179,7 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_
}
#endif
-extern unsigned int arch_freq_get_on_cpu(int cpu);
+extern int arch_freq_get_on_cpu(int cpu);
#ifndef arch_set_freq_scale
static __always_inline
@@ -1198,7 +1193,6 @@ void arch_set_freq_scale(const struct cpumask *cpus,
/* the following are really really optional */
extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;
extern struct freq_attr cpufreq_freq_attr_scaling_boost_freqs;
-extern struct freq_attr *cpufreq_generic_attr[];
int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy);
unsigned int cpufreq_generic_get(unsigned int cpu);
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 36a890d0dd57..f9a868384083 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -81,7 +81,7 @@ static __always_inline void set_nr_cpu_ids(unsigned int nr)
*
* cpu_possible_mask- has bit 'cpu' set iff cpu is populatable
* cpu_present_mask - has bit 'cpu' set iff cpu is populated
- * cpu_enabled_mask - has bit 'cpu' set iff cpu can be brought online
+ * cpu_enabled_mask - has bit 'cpu' set iff cpu can be brought online
* cpu_online_mask - has bit 'cpu' set iff cpu available to scheduler
* cpu_active_mask - has bit 'cpu' set iff cpu available to migration
*
@@ -285,35 +285,52 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
}
/**
- * for_each_cpu - iterate over every cpu in a mask
- * @cpu: the (optionally unsigned) integer iterator
- * @mask: the cpumask pointer
+ * cpumask_next_and_wrap - get the next cpu in *src1p & *src2p, starting from
+ * @n+1. If nothing found, wrap around and start from
+ * the beginning
+ * @n: the cpu prior to the place to search (i.e. search starts from @n+1)
+ * @src1p: the first cpumask pointer
+ * @src2p: the second cpumask pointer
*
- * After the loop, cpu is >= nr_cpu_ids.
+ * Return: next set bit, wrapped if needed, or >= nr_cpu_ids if @src1p & @src2p is empty.
*/
-#define for_each_cpu(cpu, mask) \
- for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits)
-
-#if NR_CPUS == 1
static __always_inline
-unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap)
+unsigned int cpumask_next_and_wrap(int n, const struct cpumask *src1p,
+ const struct cpumask *src2p)
{
- cpumask_check(start);
+ /* -1 is a legal arg here. */
if (n != -1)
cpumask_check(n);
+ return find_next_and_bit_wrap(cpumask_bits(src1p), cpumask_bits(src2p),
+ small_cpumask_bits, n + 1);
+}
- /*
- * Return the first available CPU when wrapping, or when starting before cpu0,
- * since there is only one valid option.
- */
- if (wrap && n >= 0)
- return nr_cpumask_bits;
-
- return cpumask_first(mask);
+/**
+ * cpumask_next_wrap - get the next cpu in *src, starting from @n+1. If nothing
+ * found, wrap around and start from the beginning
+ * @n: the cpu prior to the place to search (i.e. search starts from @n+1)
+ * @src: cpumask pointer
+ *
+ * Return: next set bit, wrapped if needed, or >= nr_cpu_ids if @src is empty.
+ */
+static __always_inline
+unsigned int cpumask_next_wrap(int n, const struct cpumask *src)
+{
+ /* -1 is a legal arg here. */
+ if (n != -1)
+ cpumask_check(n);
+ return find_next_bit_wrap(cpumask_bits(src), small_cpumask_bits, n + 1);
}
-#else
-unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap);
-#endif
+
+/**
+ * for_each_cpu - iterate over every cpu in a mask
+ * @cpu: the (optionally unsigned) integer iterator
+ * @mask: the cpumask pointer
+ *
+ * After the loop, cpu is >= nr_cpu_ids.
+ */
+#define for_each_cpu(cpu, mask) \
+ for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits)
/**
* for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location
@@ -1033,11 +1050,21 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
#define for_each_possible_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++)
#define for_each_online_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++)
#define for_each_present_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++)
+
+#define for_each_possible_cpu_wrap(cpu, start) \
+ for ((void)(start), (cpu) = 0; (cpu) < 1; (cpu)++)
+#define for_each_online_cpu_wrap(cpu, start) \
+ for ((void)(start), (cpu) = 0; (cpu) < 1; (cpu)++)
#else
#define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask)
#define for_each_online_cpu(cpu) for_each_cpu((cpu), cpu_online_mask)
#define for_each_enabled_cpu(cpu) for_each_cpu((cpu), cpu_enabled_mask)
#define for_each_present_cpu(cpu) for_each_cpu((cpu), cpu_present_mask)
+
+#define for_each_possible_cpu_wrap(cpu, start) \
+ for_each_cpu_wrap((cpu), cpu_possible_mask, (start))
+#define for_each_online_cpu_wrap(cpu, start) \
+ for_each_cpu_wrap((cpu), cpu_online_mask, (start))
#endif
/* Wrappers for arch boot code to manipulate normally-constant masks */
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 835e7b793f6a..5466c96a33db 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -125,9 +125,11 @@ static inline int cpuset_do_page_mem_spread(void)
extern bool current_cpuset_is_being_rebound(void);
+extern void dl_rebuild_rd_accounting(void);
extern void rebuild_sched_domains(void);
extern void cpuset_print_current_mems_allowed(void);
+extern void cpuset_reset_sched_domains(void);
/*
* read_mems_allowed_begin is required when making decisions involving
@@ -259,11 +261,20 @@ static inline bool current_cpuset_is_being_rebound(void)
return false;
}
+static inline void dl_rebuild_rd_accounting(void)
+{
+}
+
static inline void rebuild_sched_domains(void)
{
partition_sched_domains(1, NULL, NULL);
}
+static inline void cpuset_reset_sched_domains(void)
+{
+ partition_sched_domains(1, NULL, NULL);
+}
+
static inline void cpuset_print_current_mems_allowed(void)
{
}
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 4afb60365675..45bff10d3773 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -203,34 +203,34 @@ struct dentry_operations {
#define DCACHE_NFSFS_RENAMED BIT(12)
/* this dentry has been "silly renamed" and has to be deleted on the last
* dput() */
-#define DCACHE_FSNOTIFY_PARENT_WATCHED BIT(14)
+#define DCACHE_FSNOTIFY_PARENT_WATCHED BIT(13)
/* Parent inode is watched by some fsnotify listener */
-#define DCACHE_DENTRY_KILLED BIT(15)
+#define DCACHE_DENTRY_KILLED BIT(14)
-#define DCACHE_MOUNTED BIT(16) /* is a mountpoint */
-#define DCACHE_NEED_AUTOMOUNT BIT(17) /* handle automount on this dir */
-#define DCACHE_MANAGE_TRANSIT BIT(18) /* manage transit from this dirent */
+#define DCACHE_MOUNTED BIT(15) /* is a mountpoint */
+#define DCACHE_NEED_AUTOMOUNT BIT(16) /* handle automount on this dir */
+#define DCACHE_MANAGE_TRANSIT BIT(17) /* manage transit from this dirent */
#define DCACHE_MANAGED_DENTRY \
(DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)
-#define DCACHE_LRU_LIST BIT(19)
+#define DCACHE_LRU_LIST BIT(18)
-#define DCACHE_ENTRY_TYPE (7 << 20) /* bits 20..22 are for storing type: */
-#define DCACHE_MISS_TYPE (0 << 20) /* Negative dentry */
-#define DCACHE_WHITEOUT_TYPE (1 << 20) /* Whiteout dentry (stop pathwalk) */
-#define DCACHE_DIRECTORY_TYPE (2 << 20) /* Normal directory */
-#define DCACHE_AUTODIR_TYPE (3 << 20) /* Lookupless directory (presumed automount) */
-#define DCACHE_REGULAR_TYPE (4 << 20) /* Regular file type */
-#define DCACHE_SPECIAL_TYPE (5 << 20) /* Other file type */
-#define DCACHE_SYMLINK_TYPE (6 << 20) /* Symlink */
+#define DCACHE_ENTRY_TYPE (7 << 19) /* bits 19..21 are for storing type: */
+#define DCACHE_MISS_TYPE (0 << 19) /* Negative dentry */
+#define DCACHE_WHITEOUT_TYPE (1 << 19) /* Whiteout dentry (stop pathwalk) */
+#define DCACHE_DIRECTORY_TYPE (2 << 19) /* Normal directory */
+#define DCACHE_AUTODIR_TYPE (3 << 19) /* Lookupless directory (presumed automount) */
+#define DCACHE_REGULAR_TYPE (4 << 19) /* Regular file type */
+#define DCACHE_SPECIAL_TYPE (5 << 19) /* Other file type */
+#define DCACHE_SYMLINK_TYPE (6 << 19) /* Symlink */
-#define DCACHE_NOKEY_NAME BIT(25) /* Encrypted name encoded without key */
-#define DCACHE_OP_REAL BIT(26)
+#define DCACHE_NOKEY_NAME BIT(22) /* Encrypted name encoded without key */
+#define DCACHE_OP_REAL BIT(23)
-#define DCACHE_PAR_LOOKUP BIT(28) /* being looked up (with parent locked shared) */
-#define DCACHE_DENTRY_CURSOR BIT(29)
-#define DCACHE_NORCU BIT(30) /* No RCU delay for freeing */
+#define DCACHE_PAR_LOOKUP BIT(24) /* being looked up (with parent locked shared) */
+#define DCACHE_DENTRY_CURSOR BIT(25)
+#define DCACHE_NORCU BIT(26) /* No RCU delay for freeing */
extern seqlock_t rename_lock;
@@ -253,7 +253,6 @@ extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
extern bool d_same_name(const struct dentry *dentry, const struct dentry *parent,
const struct qstr *name);
-extern struct dentry * d_exact_alias(struct dentry *, struct inode *);
extern struct dentry *d_find_any_alias(struct inode *inode);
extern struct dentry * d_obtain_alias(struct inode *);
extern struct dentry * d_obtain_root(struct inode *);
diff --git a/include/linux/device.h b/include/linux/device.h
index 80a5b3268986..615282365052 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1025,6 +1025,15 @@ static inline bool dev_pm_test_driver_flags(struct device *dev, u32 flags)
return !!(dev->power.driver_flags & flags);
}
+static inline bool dev_pm_smart_suspend(struct device *dev)
+{
+#ifdef CONFIG_PM_SLEEP
+ return dev->power.smart_suspend;
+#else
+ return false;
+#endif
+}
+
static inline void device_lock(struct device *dev)
{
mutex_lock(&dev->mutex);
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index d7e30d4f7503..f3bc0bcd7098 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -78,14 +78,18 @@ static inline dma_addr_t dma_range_map_max(const struct bus_dma_region *map)
#define phys_to_dma_unencrypted phys_to_dma
#endif
#else
-static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev,
- phys_addr_t paddr)
+static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
{
if (dev->dma_range_map)
return translate_phys_to_dma(dev, paddr);
return paddr;
}
+static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev,
+ phys_addr_t paddr)
+{
+ return dma_addr_unencrypted(__phys_to_dma(dev, paddr));
+}
/*
* If memory encryption is supported, phys_to_dma will set the memory encryption
* bit in the DMA address, and dma_to_phys will clear it.
@@ -94,19 +98,20 @@ static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev,
*/
static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
{
- return __sme_set(phys_to_dma_unencrypted(dev, paddr));
+ return dma_addr_encrypted(__phys_to_dma(dev, paddr));
}
static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr)
{
phys_addr_t paddr;
+ dma_addr = dma_addr_canonical(dma_addr);
if (dev->dma_range_map)
paddr = translate_dma_to_phys(dev, dma_addr);
else
paddr = dma_addr;
- return __sme_clr(paddr);
+ return paddr;
}
#endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */
diff --git a/include/linux/edac.h b/include/linux/edac.h
index b4ee8961e623..451f9c152c99 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -661,4 +661,219 @@ static inline struct dimm_info *edac_get_dimm(struct mem_ctl_info *mci,
return mci->dimms[index];
}
+
+#define EDAC_FEAT_NAME_LEN 128
+
+/* RAS feature type */
+enum edac_dev_feat {
+ RAS_FEAT_SCRUB,
+ RAS_FEAT_ECS,
+ RAS_FEAT_MEM_REPAIR,
+ RAS_FEAT_MAX
+};
+
+/**
+ * struct edac_scrub_ops - scrub device operations (all elements optional)
+ * @read_addr: read base address of scrubbing range.
+ * @read_size: read offset of scrubbing range.
+ * @write_addr: set base address of the scrubbing range.
+ * @write_size: set offset of the scrubbing range.
+ * @get_enabled_bg: check if currently performing background scrub.
+ * @set_enabled_bg: start or stop a bg-scrub.
+ * @get_min_cycle: get minimum supported scrub cycle duration in seconds.
+ * @get_max_cycle: get maximum supported scrub cycle duration in seconds.
+ * @get_cycle_duration: get current scrub cycle duration in seconds.
+ * @set_cycle_duration: set current scrub cycle duration in seconds.
+ */
+struct edac_scrub_ops {
+ int (*read_addr)(struct device *dev, void *drv_data, u64 *base);
+ int (*read_size)(struct device *dev, void *drv_data, u64 *size);
+ int (*write_addr)(struct device *dev, void *drv_data, u64 base);
+ int (*write_size)(struct device *dev, void *drv_data, u64 size);
+ int (*get_enabled_bg)(struct device *dev, void *drv_data, bool *enable);
+ int (*set_enabled_bg)(struct device *dev, void *drv_data, bool enable);
+ int (*get_min_cycle)(struct device *dev, void *drv_data, u32 *min);
+ int (*get_max_cycle)(struct device *dev, void *drv_data, u32 *max);
+ int (*get_cycle_duration)(struct device *dev, void *drv_data, u32 *cycle);
+ int (*set_cycle_duration)(struct device *dev, void *drv_data, u32 cycle);
+};
+
+#if IS_ENABLED(CONFIG_EDAC_SCRUB)
+int edac_scrub_get_desc(struct device *scrub_dev,
+ const struct attribute_group **attr_groups,
+ u8 instance);
+#else
+static inline int edac_scrub_get_desc(struct device *scrub_dev,
+ const struct attribute_group **attr_groups,
+ u8 instance)
+{ return -EOPNOTSUPP; }
+#endif /* CONFIG_EDAC_SCRUB */
+
+/**
+ * struct edac_ecs_ops - ECS device operations (all elements optional)
+ * @get_log_entry_type: read the log entry type value.
+ * @set_log_entry_type: set the log entry type value.
+ * @get_mode: read the mode value.
+ * @set_mode: set the mode value.
+ * @reset: reset the ECS counter.
+ * @get_threshold: read the threshold count per gigabits of memory cells.
+ * @set_threshold: set the threshold count per gigabits of memory cells.
+ */
+struct edac_ecs_ops {
+ int (*get_log_entry_type)(struct device *dev, void *drv_data, int fru_id, u32 *val);
+ int (*set_log_entry_type)(struct device *dev, void *drv_data, int fru_id, u32 val);
+ int (*get_mode)(struct device *dev, void *drv_data, int fru_id, u32 *val);
+ int (*set_mode)(struct device *dev, void *drv_data, int fru_id, u32 val);
+ int (*reset)(struct device *dev, void *drv_data, int fru_id, u32 val);
+ int (*get_threshold)(struct device *dev, void *drv_data, int fru_id, u32 *threshold);
+ int (*set_threshold)(struct device *dev, void *drv_data, int fru_id, u32 threshold);
+};
+
+struct edac_ecs_ex_info {
+ u16 num_media_frus;
+};
+
+#if IS_ENABLED(CONFIG_EDAC_ECS)
+int edac_ecs_get_desc(struct device *ecs_dev,
+ const struct attribute_group **attr_groups,
+ u16 num_media_frus);
+#else
+static inline int edac_ecs_get_desc(struct device *ecs_dev,
+ const struct attribute_group **attr_groups,
+ u16 num_media_frus)
+{ return -EOPNOTSUPP; }
+#endif /* CONFIG_EDAC_ECS */
+
+enum edac_mem_repair_type {
+ EDAC_REPAIR_MAX
+};
+
+enum edac_mem_repair_cmd {
+ EDAC_DO_MEM_REPAIR = 1,
+};
+
+/**
+ * struct edac_mem_repair_ops - memory repair operations
+ * (all elements are optional except do_repair, set_hpa/set_dpa)
+ * @get_repair_type: get the memory repair type, listed in
+ * enum edac_mem_repair_function.
+ * @get_persist_mode: get the current persist mode.
+ * false - Soft repair type (temporary repair).
+ * true - Hard memory repair type (permanent repair).
+ * @set_persist_mode: set the persist mode of the memory repair instance.
+ * @get_repair_safe_when_in_use: get whether memory media is accessible and
+ * data is retained during repair operation.
+ * @get_hpa: get current host physical address (HPA) of memory to repair.
+ * @set_hpa: set host physical address (HPA) of memory to repair.
+ * @get_min_hpa: get the minimum supported host physical address (HPA).
+ * @get_max_hpa: get the maximum supported host physical address (HPA).
+ * @get_dpa: get current device physical address (DPA) of memory to repair.
+ * @set_dpa: set device physical address (DPA) of memory to repair.
+ * In some states of system configuration (e.g. before address decoders
+ * have been configured), memory devices (e.g. CXL) may not have an active
+ * mapping in the host physical address map. As such, the memory
+ * to repair must be identified by a device specific physical addressing
+ * scheme using a device physical address(DPA). The DPA and other control
+ * attributes to use for the repair operations will be presented in related
+ * error records.
+ * @get_min_dpa: get the minimum supported device physical address (DPA).
+ * @get_max_dpa: get the maximum supported device physical address (DPA).
+ * @get_nibble_mask: get current nibble mask of memory to repair.
+ * @set_nibble_mask: set nibble mask of memory to repair.
+ * @get_bank_group: get current bank group of memory to repair.
+ * @set_bank_group: set bank group of memory to repair.
+ * @get_bank: get current bank of memory to repair.
+ * @set_bank: set bank of memory to repair.
+ * @get_rank: get current rank of memory to repair.
+ * @set_rank: set rank of memory to repair.
+ * @get_row: get current row of memory to repair.
+ * @set_row: set row of memory to repair.
+ * @get_column: get current column of memory to repair.
+ * @set_column: set column of memory to repair.
+ * @get_channel: get current channel of memory to repair.
+ * @set_channel: set channel of memory to repair.
+ * @get_sub_channel: get current subchannel of memory to repair.
+ * @set_sub_channel: set subchannel of memory to repair.
+ * @do_repair: Issue memory repair operation for the HPA/DPA and
+ * other control attributes set for the memory to repair.
+ *
+ * All elements are optional except do_repair and at least one of set_hpa/set_dpa.
+ */
+struct edac_mem_repair_ops {
+ int (*get_repair_type)(struct device *dev, void *drv_data, const char **type);
+ int (*get_persist_mode)(struct device *dev, void *drv_data, bool *persist);
+ int (*set_persist_mode)(struct device *dev, void *drv_data, bool persist);
+ int (*get_repair_safe_when_in_use)(struct device *dev, void *drv_data, bool *safe);
+ int (*get_hpa)(struct device *dev, void *drv_data, u64 *hpa);
+ int (*set_hpa)(struct device *dev, void *drv_data, u64 hpa);
+ int (*get_min_hpa)(struct device *dev, void *drv_data, u64 *hpa);
+ int (*get_max_hpa)(struct device *dev, void *drv_data, u64 *hpa);
+ int (*get_dpa)(struct device *dev, void *drv_data, u64 *dpa);
+ int (*set_dpa)(struct device *dev, void *drv_data, u64 dpa);
+ int (*get_min_dpa)(struct device *dev, void *drv_data, u64 *dpa);
+ int (*get_max_dpa)(struct device *dev, void *drv_data, u64 *dpa);
+ int (*get_nibble_mask)(struct device *dev, void *drv_data, u32 *val);
+ int (*set_nibble_mask)(struct device *dev, void *drv_data, u32 val);
+ int (*get_bank_group)(struct device *dev, void *drv_data, u32 *val);
+ int (*set_bank_group)(struct device *dev, void *drv_data, u32 val);
+ int (*get_bank)(struct device *dev, void *drv_data, u32 *val);
+ int (*set_bank)(struct device *dev, void *drv_data, u32 val);
+ int (*get_rank)(struct device *dev, void *drv_data, u32 *val);
+ int (*set_rank)(struct device *dev, void *drv_data, u32 val);
+ int (*get_row)(struct device *dev, void *drv_data, u32 *val);
+ int (*set_row)(struct device *dev, void *drv_data, u32 val);
+ int (*get_column)(struct device *dev, void *drv_data, u32 *val);
+ int (*set_column)(struct device *dev, void *drv_data, u32 val);
+ int (*get_channel)(struct device *dev, void *drv_data, u32 *val);
+ int (*set_channel)(struct device *dev, void *drv_data, u32 val);
+ int (*get_sub_channel)(struct device *dev, void *drv_data, u32 *val);
+ int (*set_sub_channel)(struct device *dev, void *drv_data, u32 val);
+ int (*do_repair)(struct device *dev, void *drv_data, u32 val);
+};
+
+#if IS_ENABLED(CONFIG_EDAC_MEM_REPAIR)
+int edac_mem_repair_get_desc(struct device *dev,
+ const struct attribute_group **attr_groups,
+ u8 instance);
+#else
+static inline int edac_mem_repair_get_desc(struct device *dev,
+ const struct attribute_group **attr_groups,
+ u8 instance)
+{ return -EOPNOTSUPP; }
+#endif /* CONFIG_EDAC_MEM_REPAIR */
+
+/* EDAC device feature information structure */
+struct edac_dev_data {
+ union {
+ const struct edac_scrub_ops *scrub_ops;
+ const struct edac_ecs_ops *ecs_ops;
+ const struct edac_mem_repair_ops *mem_repair_ops;
+ };
+ u8 instance;
+ void *private;
+};
+
+struct edac_dev_feat_ctx {
+ struct device dev;
+ void *private;
+ struct edac_dev_data *scrub;
+ struct edac_dev_data ecs;
+ struct edac_dev_data *mem_repair;
+};
+
+struct edac_dev_feature {
+ enum edac_dev_feat ft_type;
+ u8 instance;
+ union {
+ const struct edac_scrub_ops *scrub_ops;
+ const struct edac_ecs_ops *ecs_ops;
+ const struct edac_mem_repair_ops *mem_repair_ops;
+ };
+ void *ctx;
+ struct edac_ecs_ex_info ecs_info;
+};
+
+int edac_dev_register(struct device *parent, char *dev_name,
+ void *parent_pvt_data, int num_features,
+ const struct edac_dev_feature *ras_features);
#endif /* _LINUX_EDAC_H_ */
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 78318d49276d..d8eabbf86a5b 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -167,13 +167,13 @@ struct em_data_callback {
struct em_perf_domain *em_cpu_get(int cpu);
struct em_perf_domain *em_pd_get(struct device *dev);
int em_dev_update_perf_domain(struct device *dev,
- struct em_perf_table __rcu *new_table);
+ struct em_perf_table *new_table);
int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
- struct em_data_callback *cb, cpumask_t *span,
- bool microwatts);
+ const struct em_data_callback *cb,
+ const cpumask_t *cpus, bool microwatts);
void em_dev_unregister_perf_domain(struct device *dev);
-struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd);
-void em_table_free(struct em_perf_table __rcu *table);
+struct em_perf_table *em_table_alloc(struct em_perf_domain *pd);
+void em_table_free(struct em_perf_table *table);
int em_dev_compute_costs(struct device *dev, struct em_perf_state *table,
int nr_states);
int em_dev_update_chip_binning(struct device *dev);
@@ -240,9 +240,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
struct em_perf_state *ps;
int i;
-#ifdef CONFIG_SCHED_DEBUG
WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n");
-#endif
if (!sum_util)
return 0;
@@ -346,8 +344,8 @@ struct em_data_callback {};
static inline
int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
- struct em_data_callback *cb, cpumask_t *span,
- bool microwatts)
+ const struct em_data_callback *cb,
+ const cpumask_t *cpus, bool microwatts)
{
return -EINVAL;
}
@@ -373,14 +371,14 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd)
return 0;
}
static inline
-struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd)
+struct em_perf_table *em_table_alloc(struct em_perf_domain *pd)
{
return NULL;
}
-static inline void em_table_free(struct em_perf_table __rcu *table) {}
+static inline void em_table_free(struct em_perf_table *table) {}
static inline
int em_dev_update_perf_domain(struct device *dev,
- struct em_perf_table __rcu *new_table)
+ struct em_perf_table *new_table)
{
return -EINVAL;
}
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 0c0d00fcd131..ccb478eb174b 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -25,6 +25,10 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long t
/* Used to release the epoll bits inside the "struct file" */
void eventpoll_release_file(struct file *file);
+/* Copy ready events to userspace */
+int epoll_sendevents(struct file *file, struct epoll_event __user *events,
+ int maxevents);
+
/*
* This is called from inside fs/file_table.c:__fput() to unlink files
* from the eventpoll interface. We need to have this facility to cleanup
diff --git a/include/linux/execmem.h b/include/linux/execmem.h
index 64130ae19690..65655a5d1be2 100644
--- a/include/linux/execmem.h
+++ b/include/linux/execmem.h
@@ -65,6 +65,37 @@ enum execmem_range_flags {
* Architectures that use EXECMEM_ROX_CACHE must implement this.
*/
void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable);
+
+/**
+ * execmem_make_temp_rw - temporarily remap region with read-write
+ * permissions
+ * @ptr: address of the region to remap
+ * @size: size of the region to remap
+ *
+ * Remaps a part of the cached large page in the ROX cache in the range
+ * [@ptr, @ptr + @size) as writable and not executable. The caller must
+ * have exclusive ownership of this range and ensure nothing will try to
+ * execute code in this range.
+ *
+ * Return: 0 on success or negative error code on failure.
+ */
+int execmem_make_temp_rw(void *ptr, size_t size);
+
+/**
+ * execmem_restore_rox - restore read-only-execute permissions
+ * @ptr: address of the region to remap
+ * @size: size of the region to remap
+ *
+ * Restores read-only-execute permissions on a range [@ptr, @ptr + @size)
+ * after it was temporarily remapped as writable. Relies on architecture
+ * implementation of set_memory_rox() to restore mapping using large pages.
+ *
+ * Return: 0 on success or negative error code on failure.
+ */
+int execmem_restore_rox(void *ptr, size_t size);
+#else
+static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; }
+static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; }
#endif
/**
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 78f660ebc318..3c817dc6292e 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -25,7 +25,7 @@
#define FANOTIFY_FID_BITS (FAN_REPORT_DFID_NAME_TARGET)
-#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD)
+#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD | FAN_REPORT_MNT)
/*
* fanotify_init() flags that require CAP_SYS_ADMIN.
@@ -38,7 +38,8 @@
FAN_REPORT_PIDFD | \
FAN_REPORT_FD_ERROR | \
FAN_UNLIMITED_QUEUE | \
- FAN_UNLIMITED_MARKS)
+ FAN_UNLIMITED_MARKS | \
+ FAN_REPORT_MNT)
/*
* fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN.
@@ -58,7 +59,7 @@
#define FANOTIFY_INTERNAL_GROUP_FLAGS (FANOTIFY_UNPRIV)
#define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \
- FAN_MARK_FILESYSTEM)
+ FAN_MARK_FILESYSTEM | FAN_MARK_MNTNS)
#define FANOTIFY_MARK_CMD_BITS (FAN_MARK_ADD | FAN_MARK_REMOVE | \
FAN_MARK_FLUSH)
@@ -109,10 +110,13 @@
/* Events that can only be reported with data type FSNOTIFY_EVENT_ERROR */
#define FANOTIFY_ERROR_EVENTS (FAN_FS_ERROR)
+#define FANOTIFY_MOUNT_EVENTS (FAN_MNT_ATTACH | FAN_MNT_DETACH)
+
/* Events that user can request to be notified on */
#define FANOTIFY_EVENTS (FANOTIFY_PATH_EVENTS | \
FANOTIFY_INODE_EVENTS | \
- FANOTIFY_ERROR_EVENTS)
+ FANOTIFY_ERROR_EVENTS | \
+ FANOTIFY_MOUNT_EVENTS)
/* Extra flags that may be reported with event or control handling of events */
#define FANOTIFY_EVENT_FLAGS (FAN_EVENT_ON_CHILD | FAN_ONDIR)
diff --git a/include/linux/file_ref.h b/include/linux/file_ref.h
index 9b3a8d9b17ab..7db62fbc0500 100644
--- a/include/linux/file_ref.h
+++ b/include/linux/file_ref.h
@@ -61,6 +61,7 @@ static inline void file_ref_init(file_ref_t *ref, unsigned long cnt)
atomic_long_set(&ref->refcnt, cnt - 1);
}
+bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt);
bool __file_ref_put(file_ref_t *ref, unsigned long cnt);
/**
@@ -161,6 +162,39 @@ static __always_inline __must_check bool file_ref_put(file_ref_t *ref)
}
/**
+ * file_ref_put_close - drop a reference expecting it would transition to FILE_REF_NOREF
+ * @ref: Pointer to the reference count
+ *
+ * Semantically it is equivalent to calling file_ref_put(), but it trades lower
+ * performance in face of other CPUs also modifying the refcount for higher
+ * performance when this happens to be the last reference.
+ *
+ * For the last reference file_ref_put() issues 2 atomics. One to drop the
+ * reference and another to transition it to FILE_REF_DEAD. This routine does
+ * the work in one step, but in order to do it has to pre-read the variable which
+ * decreases scalability.
+ *
+ * Use with close() et al, stick to file_ref_put() by default.
+ */
+static __always_inline __must_check bool file_ref_put_close(file_ref_t *ref)
+{
+ long old, new;
+
+ old = atomic_long_read(&ref->refcnt);
+ do {
+ if (unlikely(old < 0))
+ return __file_ref_put_badval(ref, old);
+
+ if (old == FILE_REF_ONEREF)
+ new = FILE_REF_DEAD;
+ else
+ new = old - 1;
+ } while (!atomic_long_try_cmpxchg(&ref->refcnt, &old, new));
+
+ return new == FILE_REF_DEAD;
+}
+
+/**
* file_ref_read - Read the number of file references
* @ref: Pointer to the reference count
*
@@ -174,4 +208,18 @@ static inline unsigned long file_ref_read(file_ref_t *ref)
return c >= FILE_REF_RELEASED ? 0 : c + 1;
}
+/*
+ * __file_ref_read_raw - Return the value stored in ref->refcnt
+ * @ref: Pointer to the reference count
+ *
+ * Return: The raw value found in the counter
+ *
+ * A hack for file_needs_f_pos_lock(), you probably want to use
+ * file_ref_read() instead.
+ */
+static inline unsigned long __file_ref_read_raw(file_ref_t *ref)
+{
+ return atomic_long_read(&ref->refcnt);
+}
+
#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2788df98080f..1a0e23a5d02d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2,6 +2,7 @@
#ifndef _LINUX_FS_H
#define _LINUX_FS_H
+#include <linux/vfsdebug.h>
#include <linux/linkage.h>
#include <linux/wait_bit.h>
#include <linux/kdev_t.h>
@@ -790,19 +791,8 @@ struct inode {
static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen)
{
- int testlen;
-
- /*
- * TODO: patch it into a debug-only check if relevant macros show up.
- * In the meantime, since we are suffering strlen even on production kernels
- * to find the right length, do a fixup if the wrong value got passed.
- */
- testlen = strlen(link);
- if (testlen != linklen) {
- WARN_ONCE(1, "bad length passed for symlink [%s] (got %d, expected %d)",
- link, linklen, testlen);
- linklen = testlen;
- }
+ VFS_WARN_ON_INODE(strlen(link) != linklen, inode);
+ VFS_WARN_ON_INODE(inode->i_opflags & IOP_CACHED_LINK, inode);
inode->i_link = link;
inode->i_linklen = linklen;
inode->i_opflags |= IOP_CACHED_LINK;
@@ -1067,7 +1057,6 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
/**
* struct file - Represents a file
- * @f_ref: reference count
* @f_lock: Protects f_ep, f_flags. Must not be taken from IRQ context.
* @f_mode: FMODE_* flags often used in hotpaths
* @f_op: file operations
@@ -1077,12 +1066,12 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
* @f_flags: file flags
* @f_iocb_flags: iocb flags
* @f_cred: stashed credentials of creator/opener
+ * @f_owner: file owner
* @f_path: path of the file
* @f_pos_lock: lock protecting file position
* @f_pipe: specific to pipes
* @f_pos: file position
* @f_security: LSM security context of this file
- * @f_owner: file owner
* @f_wb_err: writeback error
* @f_sb_err: per sb writeback errors
* @f_ep: link of all epoll hooks for this file
@@ -1090,9 +1079,9 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
* @f_llist: work queue entrypoint
* @f_ra: file's readahead state
* @f_freeptr: Pointer used by SLAB_TYPESAFE_BY_RCU file cache (don't touch.)
+ * @f_ref: reference count
*/
struct file {
- file_ref_t f_ref;
spinlock_t f_lock;
fmode_t f_mode;
const struct file_operations *f_op;
@@ -1102,6 +1091,7 @@ struct file {
unsigned int f_flags;
unsigned int f_iocb_flags;
const struct cred *f_cred;
+ struct fown_struct *f_owner;
/* --- cacheline 1 boundary (64 bytes) --- */
struct path f_path;
union {
@@ -1115,7 +1105,6 @@ struct file {
void *f_security;
#endif
/* --- cacheline 2 boundary (128 bytes) --- */
- struct fown_struct *f_owner;
errseq_t f_wb_err;
errseq_t f_sb_err;
#ifdef CONFIG_EPOLL
@@ -1127,6 +1116,7 @@ struct file {
struct file_ra_state f_ra;
freeptr_t f_freeptr;
};
+ file_ref_t f_ref;
/* --- cacheline 3 boundary (192 bytes) --- */
} __randomize_layout
__attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
@@ -1981,8 +1971,8 @@ bool inode_owner_or_capable(struct mnt_idmap *idmap,
*/
int vfs_create(struct mnt_idmap *, struct inode *,
struct dentry *, umode_t, bool);
-int vfs_mkdir(struct mnt_idmap *, struct inode *,
- struct dentry *, umode_t);
+struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *,
+ struct dentry *, umode_t);
int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
umode_t, dev_t);
int vfs_symlink(struct mnt_idmap *, struct inode *,
@@ -2039,7 +2029,7 @@ int vfs_fchown(struct file *file, uid_t user, gid_t group);
int vfs_fchmod(struct file *file, umode_t mode);
int vfs_utimes(const struct path *path, struct timespec64 *times);
-extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+int vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_COMPAT
extern long compat_ptr_ioctl(struct file *file, unsigned int cmd,
@@ -2211,8 +2201,8 @@ struct inode_operations {
int (*unlink) (struct inode *,struct dentry *);
int (*symlink) (struct mnt_idmap *, struct inode *,struct dentry *,
const char *);
- int (*mkdir) (struct mnt_idmap *, struct inode *,struct dentry *,
- umode_t);
+ struct dentry *(*mkdir) (struct mnt_idmap *, struct inode *,
+ struct dentry *, umode_t);
int (*rmdir) (struct inode *,struct dentry *);
int (*mknod) (struct mnt_idmap *, struct inode *,struct dentry *,
umode_t,dev_t);
@@ -2616,6 +2606,7 @@ struct file_system_type {
#define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */
#define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */
#define FS_MGTIME 64 /* FS uses multigrain timestamps */
+#define FS_LBS 128 /* FS supports LBS */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
int (*init_fs_context)(struct fs_context *);
const struct fs_parameter_spec *parameters;
@@ -2653,9 +2644,6 @@ static inline bool is_mgtime(const struct inode *inode)
extern struct dentry *mount_bdev(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data,
int (*fill_super)(struct super_block *, void *, int));
-extern struct dentry *mount_single(struct file_system_type *fs_type,
- int flags, void *data,
- int (*fill_super)(struct super_block *, void *, int));
extern struct dentry *mount_nodev(struct file_system_type *fs_type,
int flags, void *data,
int (*fill_super)(struct super_block *, void *, int));
@@ -2794,13 +2782,13 @@ static inline bool is_idmapped_mnt(const struct vfsmount *mnt)
return mnt_idmap(mnt) != &nop_mnt_idmap;
}
-extern long vfs_truncate(const struct path *, loff_t);
+int vfs_truncate(const struct path *, loff_t);
int do_truncate(struct mnt_idmap *, struct dentry *, loff_t start,
unsigned int time_attrs, struct file *filp);
extern int vfs_fallocate(struct file *file, int mode, loff_t offset,
loff_t len);
-extern long do_sys_open(int dfd, const char __user *filename, int flags,
- umode_t mode);
+int do_sys_open(int dfd, const char __user *filename, int flags,
+ umode_t mode);
extern struct file *file_open_name(struct filename *, int, umode_t);
extern struct file *filp_open(const char *, int, umode_t);
extern struct file *file_open_root(const struct path *,
@@ -2851,7 +2839,10 @@ extern int filp_close(struct file *, fl_owner_t id);
extern struct filename *getname_flags(const char __user *, int);
extern struct filename *getname_uflags(const char __user *, int);
-extern struct filename *getname(const char __user *);
+static inline struct filename *getname(const char __user *name)
+{
+ return getname_flags(name, 0);
+}
extern struct filename *getname_kernel(const char *);
extern struct filename *__getname_maybe_null(const char __user *);
static inline struct filename *getname_maybe_null(const char __user *name, int flags)
@@ -2864,6 +2855,13 @@ static inline struct filename *getname_maybe_null(const char __user *name, int f
return __getname_maybe_null(name);
}
extern void putname(struct filename *name);
+DEFINE_FREE(putname, struct filename *, if (!IS_ERR_OR_NULL(_T)) putname(_T))
+
+static inline struct filename *refname(struct filename *name)
+{
+ atomic_inc(&name->refcnt);
+ return name;
+}
extern int finish_open(struct file *file, struct dentry *dentry,
int (*open)(struct inode *, struct file *));
@@ -3297,7 +3295,11 @@ static inline void __iget(struct inode *inode)
extern void iget_failed(struct inode *);
extern void clear_inode(struct inode *);
extern void __destroy_inode(struct inode *);
-extern struct inode *new_inode_pseudo(struct super_block *sb);
+struct inode *alloc_inode(struct super_block *sb);
+static inline struct inode *new_inode_pseudo(struct super_block *sb)
+{
+ return alloc_inode(sb);
+}
extern struct inode *new_inode(struct super_block *sb);
extern void free_inode_nonrcu(struct inode *inode);
extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *);
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index 4b4bfef6f053..a19e4bd32e4d 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -144,8 +144,6 @@ extern void put_fs_context(struct fs_context *fc);
extern int vfs_parse_fs_param_source(struct fs_context *fc,
struct fs_parameter *param);
extern void fc_drop_locked(struct fs_context *fc);
-int reconfigure_single(struct super_block *s,
- int flags, void *data);
extern int get_tree_nodev(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 18855cb44b1c..56fad33043d5 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -310,10 +310,8 @@ static inline void fscrypt_prepare_dentry(struct dentry *dentry,
/* crypto.c */
void fscrypt_enqueue_decrypt_work(struct work_struct *);
-struct page *fscrypt_encrypt_pagecache_blocks(struct page *page,
- unsigned int len,
- unsigned int offs,
- gfp_t gfp_flags);
+struct page *fscrypt_encrypt_pagecache_blocks(struct folio *folio,
+ size_t len, size_t offs, gfp_t gfp_flags);
int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page,
unsigned int len, unsigned int offs,
u64 lblk_num, gfp_t gfp_flags);
@@ -480,10 +478,8 @@ static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work)
{
}
-static inline struct page *fscrypt_encrypt_pagecache_blocks(struct page *page,
- unsigned int len,
- unsigned int offs,
- gfp_t gfp_flags)
+static inline struct page *fscrypt_encrypt_pagecache_blocks(struct folio *folio,
+ size_t len, size_t offs, gfp_t gfp_flags)
{
return ERR_PTR(-EOPNOTSUPP);
}
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 83d3ac97f826..454d8e466958 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -320,6 +320,11 @@ static inline void fsnotify_vfsmount_delete(struct vfsmount *mnt)
__fsnotify_vfsmount_delete(mnt);
}
+static inline void fsnotify_mntns_delete(struct mnt_namespace *mntns)
+{
+ __fsnotify_mntns_delete(mntns);
+}
+
/*
* fsnotify_inoderemove - an inode is going away
*/
@@ -528,4 +533,19 @@ static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode,
NULL, NULL, NULL, 0);
}
+static inline void fsnotify_mnt_attach(struct mnt_namespace *ns, struct vfsmount *mnt)
+{
+ fsnotify_mnt(FS_MNT_ATTACH, ns, mnt);
+}
+
+static inline void fsnotify_mnt_detach(struct mnt_namespace *ns, struct vfsmount *mnt)
+{
+ fsnotify_mnt(FS_MNT_DETACH, ns, mnt);
+}
+
+static inline void fsnotify_mnt_move(struct mnt_namespace *ns, struct vfsmount *mnt)
+{
+ fsnotify_mnt(FS_MNT_MOVE, ns, mnt);
+}
+
#endif /* _LINUX_FS_NOTIFY_H */
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 0d24a21a8e60..6cd8d1d28b8b 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -59,6 +59,10 @@
#define FS_PRE_ACCESS 0x00100000 /* Pre-content access hook */
+#define FS_MNT_ATTACH 0x01000000 /* Mount was attached */
+#define FS_MNT_DETACH 0x02000000 /* Mount was detached */
+#define FS_MNT_MOVE (FS_MNT_ATTACH | FS_MNT_DETACH)
+
/*
* Set on inode mark that cares about things that happen to its children.
* Always set for dnotify and inotify.
@@ -80,6 +84,9 @@
*/
#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | FS_RENAME)
+/* Mount namespace events */
+#define FSNOTIFY_MNT_EVENTS (FS_MNT_ATTACH | FS_MNT_DETACH)
+
/* Content events can be used to inspect file content */
#define FSNOTIFY_CONTENT_PERM_EVENTS (FS_OPEN_PERM | FS_OPEN_EXEC_PERM | \
FS_ACCESS_PERM)
@@ -108,6 +115,7 @@
/* Events that can be reported to backends */
#define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \
+ FSNOTIFY_MNT_EVENTS | \
FS_EVENTS_POSS_ON_CHILD | \
FS_DELETE_SELF | FS_MOVE_SELF | \
FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \
@@ -298,6 +306,7 @@ enum fsnotify_data_type {
FSNOTIFY_EVENT_PATH,
FSNOTIFY_EVENT_INODE,
FSNOTIFY_EVENT_DENTRY,
+ FSNOTIFY_EVENT_MNT,
FSNOTIFY_EVENT_ERROR,
};
@@ -318,6 +327,11 @@ static inline const struct path *file_range_path(const struct file_range *range)
return range->path;
}
+struct fsnotify_mnt {
+ const struct mnt_namespace *ns;
+ u64 mnt_id;
+};
+
static inline struct inode *fsnotify_data_inode(const void *data, int data_type)
{
switch (data_type) {
@@ -383,6 +397,24 @@ static inline struct super_block *fsnotify_data_sb(const void *data,
}
}
+static inline const struct fsnotify_mnt *fsnotify_data_mnt(const void *data,
+ int data_type)
+{
+ switch (data_type) {
+ case FSNOTIFY_EVENT_MNT:
+ return data;
+ default:
+ return NULL;
+ }
+}
+
+static inline u64 fsnotify_data_mnt_id(const void *data, int data_type)
+{
+ const struct fsnotify_mnt *mnt_data = fsnotify_data_mnt(data, data_type);
+
+ return mnt_data ? mnt_data->mnt_id : 0;
+}
+
static inline struct fs_error_report *fsnotify_data_error_report(
const void *data,
int data_type)
@@ -420,6 +452,7 @@ enum fsnotify_iter_type {
FSNOTIFY_ITER_TYPE_SB,
FSNOTIFY_ITER_TYPE_PARENT,
FSNOTIFY_ITER_TYPE_INODE2,
+ FSNOTIFY_ITER_TYPE_MNTNS,
FSNOTIFY_ITER_TYPE_COUNT
};
@@ -429,6 +462,7 @@ enum fsnotify_obj_type {
FSNOTIFY_OBJ_TYPE_INODE,
FSNOTIFY_OBJ_TYPE_VFSMOUNT,
FSNOTIFY_OBJ_TYPE_SB,
+ FSNOTIFY_OBJ_TYPE_MNTNS,
FSNOTIFY_OBJ_TYPE_COUNT,
FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT
};
@@ -613,8 +647,10 @@ extern int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data
extern void __fsnotify_inode_delete(struct inode *inode);
extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt);
extern void fsnotify_sb_delete(struct super_block *sb);
+extern void __fsnotify_mntns_delete(struct mnt_namespace *mntns);
extern void fsnotify_sb_free(struct super_block *sb);
extern u32 fsnotify_get_cookie(void);
+extern void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt);
static inline __u32 fsnotify_parent_needed_mask(__u32 mask)
{
@@ -928,6 +964,9 @@ static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
static inline void fsnotify_sb_delete(struct super_block *sb)
{}
+static inline void __fsnotify_mntns_delete(struct mnt_namespace *mntns)
+{}
+
static inline void fsnotify_sb_free(struct super_block *sb)
{}
@@ -942,6 +981,9 @@ static inline u32 fsnotify_get_cookie(void)
static inline void fsnotify_unmount_inodes(struct super_block *sb)
{}
+static inline void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt)
+{}
+
#endif /* CONFIG_FSNOTIFY */
#endif /* __KERNEL __ */
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index f7bfdcf0dda3..88e078871158 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -223,6 +223,11 @@ static inline void hrtimer_cancel_wait_running(struct hrtimer *timer)
}
#endif
+static inline enum hrtimer_restart hrtimer_dummy_timeout(struct hrtimer *unused)
+{
+ return HRTIMER_NORESTART;
+}
+
/* Exported timer functions: */
/* Initialize timers: */
@@ -333,6 +338,7 @@ static inline int hrtimer_callback_running(struct hrtimer *timer)
static inline void hrtimer_update_function(struct hrtimer *timer,
enum hrtimer_restart (*function)(struct hrtimer *))
{
+#ifdef CONFIG_PROVE_LOCKING
guard(raw_spinlock_irqsave)(&timer->base->cpu_base->lock);
if (WARN_ON_ONCE(hrtimer_is_queued(timer)))
@@ -340,7 +346,7 @@ static inline void hrtimer_update_function(struct hrtimer *timer,
if (WARN_ON_ONCE(!function))
return;
-
+#endif
timer->function = function;
}
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 4179add2864b..675959fb97ba 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -371,19 +371,6 @@ struct vmtransfer_page_packet_header {
struct vmtransfer_page_range ranges[];
} __packed;
-struct vmgpadl_packet_header {
- struct vmpacket_descriptor d;
- u32 gpadl;
- u32 reserved;
-} __packed;
-
-struct vmadd_remove_transfer_page_set {
- struct vmpacket_descriptor d;
- u32 gpadl;
- u16 xfer_pageset_id;
- u16 reserved;
-} __packed;
-
/*
* This structure defines a range in guest physical space that can be made to
* look virtually contiguous.
@@ -395,30 +382,6 @@ struct gpa_range {
};
/*
- * This is the format for an Establish Gpadl packet, which contains a handle by
- * which this GPADL will be known and a set of GPA ranges associated with it.
- * This can be converted to a MDL by the guest OS. If there are multiple GPA
- * ranges, then the resulting MDL will be "chained," representing multiple VA
- * ranges.
- */
-struct vmestablish_gpadl {
- struct vmpacket_descriptor d;
- u32 gpadl;
- u32 range_cnt;
- struct gpa_range range[1];
-} __packed;
-
-/*
- * This is the format for a Teardown Gpadl packet, which indicates that the
- * GPADL handle in the Establish Gpadl packet will never be referenced again.
- */
-struct vmteardown_gpadl {
- struct vmpacket_descriptor d;
- u32 gpadl;
- u32 reserved; /* for alignment to a 8-byte boundary */
-} __packed;
-
-/*
* This is the format for a GPA-Direct packet, which contains a set of GPA
* ranges, in addition to commands and/or data.
*/
@@ -429,25 +392,6 @@ struct vmdata_gpa_direct {
struct gpa_range range[1];
} __packed;
-/* This is the format for a Additional Data Packet. */
-struct vmadditional_data {
- struct vmpacket_descriptor d;
- u64 total_bytes;
- u32 offset;
- u32 byte_cnt;
- unsigned char data[1];
-} __packed;
-
-union vmpacket_largest_possible_header {
- struct vmpacket_descriptor simple_hdr;
- struct vmtransfer_page_packet_header xfer_page_hdr;
- struct vmgpadl_packet_header gpadl_hdr;
- struct vmadd_remove_transfer_page_set add_rm_xfer_page_hdr;
- struct vmestablish_gpadl establish_gpadl_hdr;
- struct vmteardown_gpadl teardown_gpadl_hdr;
- struct vmdata_gpa_direct data_gpa_direct_hdr;
-};
-
#define VMPACKET_DATA_START_ADDRESS(__packet) \
(void *)(((unsigned char *)__packet) + \
((struct vmpacket_descriptor)__packet)->offset8 * 8)
@@ -1661,6 +1605,7 @@ int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id,
const guid_t *shv_host_servie_id);
int vmbus_send_modifychannel(struct vmbus_channel *channel, u32 target_vp);
void vmbus_set_event(struct vmbus_channel *channel);
+int vmbus_channel_set_cpu(struct vmbus_channel *channel, u32 target_cpu);
/* Get the start of the ring buffer. */
static inline void *
diff --git a/include/linux/idr.h b/include/linux/idr.h
index da5f5fa4a3a6..cd729be369b3 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -15,6 +15,7 @@
#include <linux/radix-tree.h>
#include <linux/gfp.h>
#include <linux/percpu.h>
+#include <linux/cleanup.h>
struct idr {
struct radix_tree_root idr_rt;
@@ -124,6 +125,22 @@ void *idr_get_next_ul(struct idr *, unsigned long *nextid);
void *idr_replace(struct idr *, void *, unsigned long id);
void idr_destroy(struct idr *);
+struct __class_idr {
+ struct idr *idr;
+ int id;
+};
+
+#define idr_null ((struct __class_idr){ NULL, -1 })
+#define take_idr_id(id) __get_and_null(id, idr_null)
+
+DEFINE_CLASS(idr_alloc, struct __class_idr,
+ if (_T.id >= 0) idr_remove(_T.idr, _T.id),
+ ((struct __class_idr){
+ .idr = idr,
+ .id = idr_alloc(idr, ptr, start, end, gfp),
+ }),
+ struct idr *idr, void *ptr, int start, int end, gfp_t gfp);
+
/**
* idr_init_base() - Initialise an IDR.
* @idr: IDR handle.
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 8cd9327e4e78..c782a74d2a30 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -448,7 +448,7 @@ irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
static inline void disable_irq_nosync_lockdep(unsigned int irq)
{
disable_irq_nosync(irq);
-#ifdef CONFIG_LOCKDEP
+#if defined(CONFIG_LOCKDEP) && !defined(CONFIG_PREEMPT_RT)
local_irq_disable();
#endif
}
@@ -456,22 +456,14 @@ static inline void disable_irq_nosync_lockdep(unsigned int irq)
static inline void disable_irq_nosync_lockdep_irqsave(unsigned int irq, unsigned long *flags)
{
disable_irq_nosync(irq);
-#ifdef CONFIG_LOCKDEP
+#if defined(CONFIG_LOCKDEP) && !defined(CONFIG_PREEMPT_RT)
local_irq_save(*flags);
#endif
}
-static inline void disable_irq_lockdep(unsigned int irq)
-{
- disable_irq(irq);
-#ifdef CONFIG_LOCKDEP
- local_irq_disable();
-#endif
-}
-
static inline void enable_irq_lockdep(unsigned int irq)
{
-#ifdef CONFIG_LOCKDEP
+#if defined(CONFIG_LOCKDEP) && !defined(CONFIG_PREEMPT_RT)
local_irq_enable();
#endif
enable_irq(irq);
@@ -479,7 +471,7 @@ static inline void enable_irq_lockdep(unsigned int irq)
static inline void enable_irq_lockdep_irqrestore(unsigned int irq, unsigned long *flags)
{
-#ifdef CONFIG_LOCKDEP
+#if defined(CONFIG_LOCKDEP) && !defined(CONFIG_PREEMPT_RT)
local_irq_restore(*flags);
#endif
enable_irq(irq);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 75bf54e76f3b..02fe001feebb 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -56,6 +56,13 @@ struct vm_fault;
*
* IOMAP_F_BOUNDARY indicates that I/O and I/O completions for this iomap must
* never be merged with the mapping before it.
+ *
+ * IOMAP_F_ANON_WRITE indicates that (write) I/O does not have a target block
+ * assigned to it yet and the file system will do that in the bio submission
+ * handler, splitting the I/O as needed.
+ *
+ * IOMAP_F_ATOMIC_BIO indicates that (write) I/O will be issued as an atomic
+ * bio, i.e. set REQ_ATOMIC.
*/
#define IOMAP_F_NEW (1U << 0)
#define IOMAP_F_DIRTY (1U << 1)
@@ -68,6 +75,8 @@ struct vm_fault;
#endif /* CONFIG_BUFFER_HEAD */
#define IOMAP_F_XATTR (1U << 5)
#define IOMAP_F_BOUNDARY (1U << 6)
+#define IOMAP_F_ANON_WRITE (1U << 7)
+#define IOMAP_F_ATOMIC_BIO (1U << 8)
/*
* Flags set by the core iomap code during operations:
@@ -111,6 +120,8 @@ struct iomap {
static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos)
{
+ if (iomap->flags & IOMAP_F_ANON_WRITE)
+ return U64_MAX; /* invalid */
return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
}
@@ -182,7 +193,8 @@ struct iomap_folio_ops {
#else
#define IOMAP_DAX 0
#endif /* CONFIG_FS_DAX */
-#define IOMAP_ATOMIC (1 << 9)
+#define IOMAP_ATOMIC (1 << 9) /* torn-write protection */
+#define IOMAP_DONTCACHE (1 << 10)
struct iomap_ops {
/*
@@ -211,8 +223,10 @@ struct iomap_ops {
* calls to iomap_iter(). Treat as read-only in the body.
* @len: The remaining length of the file segment we're operating on.
* It is updated at the same time as @pos.
- * @processed: The number of bytes processed by the body in the most recent
- * iteration, or a negative errno. 0 causes the iteration to stop.
+ * @iter_start_pos: The original start pos for the current iomap. Used for
+ * incremental iter advance.
+ * @status: Status of the most recent iteration. Zero on success or a negative
+ * errno on error.
* @flags: Zero or more of the iomap_begin flags above.
* @iomap: Map describing the I/O iteration
* @srcmap: Source map for COW operations
@@ -221,7 +235,8 @@ struct iomap_iter {
struct inode *inode;
loff_t pos;
u64 len;
- s64 processed;
+ loff_t iter_start_pos;
+ int status;
unsigned flags;
struct iomap iomap;
struct iomap srcmap;
@@ -229,20 +244,46 @@ struct iomap_iter {
};
int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops);
+int iomap_iter_advance(struct iomap_iter *iter, u64 *count);
/**
- * iomap_length - length of the current iomap iteration
+ * iomap_length_trim - trimmed length of the current iomap iteration
* @iter: iteration structure
+ * @pos: File position to trim from.
+ * @len: Length of the mapping to trim to.
*
- * Returns the length that the operation applies to for the current iteration.
+ * Returns a trimmed length that the operation applies to for the current
+ * iteration.
*/
-static inline u64 iomap_length(const struct iomap_iter *iter)
+static inline u64 iomap_length_trim(const struct iomap_iter *iter, loff_t pos,
+ u64 len)
{
u64 end = iter->iomap.offset + iter->iomap.length;
if (iter->srcmap.type != IOMAP_HOLE)
end = min(end, iter->srcmap.offset + iter->srcmap.length);
- return min(iter->len, end - iter->pos);
+ return min(len, end - pos);
+}
+
+/**
+ * iomap_length - length of the current iomap iteration
+ * @iter: iteration structure
+ *
+ * Returns the length that the operation applies to for the current iteration.
+ */
+static inline u64 iomap_length(const struct iomap_iter *iter)
+{
+ return iomap_length_trim(iter, iter->pos, iter->len);
+}
+
+/**
+ * iomap_iter_advance_full - advance by the full length of current map
+ */
+static inline int iomap_iter_advance_full(struct iomap_iter *iter)
+{
+ u64 length = iomap_length(iter);
+
+ return iomap_iter_advance(iter, &length);
}
/**
@@ -306,12 +347,11 @@ bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio);
int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
const struct iomap_ops *ops);
int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
- bool *did_zero, const struct iomap_ops *ops);
+ bool *did_zero, const struct iomap_ops *ops, void *private);
int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
- const struct iomap_ops *ops);
-vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf,
- const struct iomap_ops *ops);
-
+ const struct iomap_ops *ops, void *private);
+vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops,
+ void *private);
typedef void (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length,
struct iomap *iomap);
void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
@@ -328,16 +368,42 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno,
const struct iomap_ops *ops);
/*
+ * Flags for iomap_ioend->io_flags.
+ */
+/* shared COW extent */
+#define IOMAP_IOEND_SHARED (1U << 0)
+/* unwritten extent */
+#define IOMAP_IOEND_UNWRITTEN (1U << 1)
+/* don't merge into previous ioend */
+#define IOMAP_IOEND_BOUNDARY (1U << 2)
+/* is direct I/O */
+#define IOMAP_IOEND_DIRECT (1U << 3)
+
+/*
+ * Flags that if set on either ioend prevent the merge of two ioends.
+ * (IOMAP_IOEND_BOUNDARY also prevents merges, but only one-way)
+ */
+#define IOMAP_IOEND_NOMERGE_FLAGS \
+ (IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT)
+
+/*
* Structure for writeback I/O completions.
+ *
+ * File systems implementing ->submit_ioend (for buffered I/O) or ->submit_io
+ * for direct I/O) can split a bio generated by iomap. In that case the parent
+ * ioend it was split from is recorded in ioend->io_parent.
*/
struct iomap_ioend {
struct list_head io_list; /* next ioend in chain */
- u16 io_type;
- u16 io_flags; /* IOMAP_F_* */
+ u16 io_flags; /* IOMAP_IOEND_* */
struct inode *io_inode; /* file being written to */
- size_t io_size; /* size of data within eof */
+ size_t io_size; /* size of the extent */
+ atomic_t io_remaining; /* completetion defer count */
+ int io_error; /* stashed away status */
+ struct iomap_ioend *io_parent; /* parent for completions */
loff_t io_offset; /* offset in the file */
sector_t io_sector; /* start sector of ioend */
+ void *io_private; /* file system private data */
struct bio io_bio; /* MUST BE LAST! */
};
@@ -362,12 +428,14 @@ struct iomap_writeback_ops {
loff_t offset, unsigned len);
/*
- * Optional, allows the file systems to perform actions just before
- * submitting the bio and/or override the bio end_io handler for complex
- * operations like copy on write extent manipulation or unwritten extent
- * conversions.
+ * Optional, allows the file systems to hook into bio submission,
+ * including overriding the bi_end_io handler.
+ *
+ * Returns 0 if the bio was successfully submitted, or a negative
+ * error code if status was non-zero or another error happened and
+ * the bio could not be submitted.
*/
- int (*prepare_ioend)(struct iomap_ioend *ioend, int status);
+ int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status);
/*
* Optional, allows the file system to discard state on a page where
@@ -383,6 +451,10 @@ struct iomap_writepage_ctx {
u32 nr_folios; /* folios added to the ioend */
};
+struct iomap_ioend *iomap_init_ioend(struct inode *inode, struct bio *bio,
+ loff_t file_offset, u16 ioend_flags);
+struct iomap_ioend *iomap_split_ioend(struct iomap_ioend *ioend,
+ unsigned int max_len, bool is_append);
void iomap_finish_ioends(struct iomap_ioend *ioend, int error);
void iomap_ioend_try_merge(struct iomap_ioend *ioend,
struct list_head *more_ioends);
@@ -454,4 +526,6 @@ int iomap_swapfile_activate(struct swap_info_struct *sis,
# define iomap_swapfile_activate(sis, swapfile, pagespan, ops) (-EIO)
#endif /* CONFIG_SWAP */
+extern struct bio_set iomap_ioend_bioset;
+
#endif /* LINUX_IOMAP_H */
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 8daa17f0107a..dd5df1e2d032 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -486,6 +486,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
* @ipi_send_mask: send an IPI to destination cpus in cpumask
* @irq_nmi_setup: function called from core code before enabling an NMI
* @irq_nmi_teardown: function called from core code after disabling an NMI
+ * @irq_force_complete_move: optional function to force complete pending irq move
* @flags: chip specific flags
*/
struct irq_chip {
@@ -537,6 +538,8 @@ struct irq_chip {
int (*irq_nmi_setup)(struct irq_data *data);
void (*irq_nmi_teardown)(struct irq_data *data);
+ void (*irq_force_complete_move)(struct irq_data *data);
+
unsigned long flags;
};
@@ -612,6 +615,7 @@ extern int irq_affinity_online_cpu(unsigned int cpu);
#endif
#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
+bool irq_can_move_in_process_context(struct irq_data *data);
void __irq_move_irq(struct irq_data *data);
static inline void irq_move_irq(struct irq_data *data)
{
@@ -619,11 +623,10 @@ static inline void irq_move_irq(struct irq_data *data)
__irq_move_irq(data);
}
void irq_move_masked_irq(struct irq_data *data);
-void irq_force_complete_move(struct irq_desc *desc);
#else
+static inline bool irq_can_move_in_process_context(struct irq_data *data) { return true; }
static inline void irq_move_irq(struct irq_data *data) { }
static inline void irq_move_masked_irq(struct irq_data *data) { }
-static inline void irq_force_complete_move(struct irq_desc *desc) { }
#endif
extern int no_irq_affinity;
diff --git a/include/linux/irqchip/irq-davinci-cp-intc.h b/include/linux/irqchip/irq-davinci-cp-intc.h
deleted file mode 100644
index 8d71ed5b5a61..000000000000
--- a/include/linux/irqchip/irq-davinci-cp-intc.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2019 Texas Instruments
- */
-
-#ifndef _LINUX_IRQ_DAVINCI_CP_INTC_
-#define _LINUX_IRQ_DAVINCI_CP_INTC_
-
-#include <linux/ioport.h>
-
-/**
- * struct davinci_cp_intc_config - configuration data for davinci-cp-intc
- * driver.
- *
- * @reg: register range to map
- * @num_irqs: number of HW interrupts supported by the controller
- */
-struct davinci_cp_intc_config {
- struct resource reg;
- unsigned int num_irqs;
-};
-
-int davinci_cp_intc_init(const struct davinci_cp_intc_config *config);
-
-#endif /* _LINUX_IRQ_DAVINCI_CP_INTC_ */
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index e432b6a12a32..5126482515cb 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -281,6 +281,8 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa)
void irq_domain_free_fwnode(struct fwnode_handle *fwnode);
+DEFINE_FREE(irq_domain_free_fwnode, struct fwnode_handle *, if (_T) irq_domain_free_fwnode(_T))
+
struct irq_domain_chip_generic_info;
/**
@@ -350,13 +352,13 @@ struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode,
irq_hw_number_t first_hwirq,
const struct irq_domain_ops *ops,
void *host_data);
-extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
- enum irq_domain_bus_token bus_token);
-extern void irq_set_default_host(struct irq_domain *host);
-extern struct irq_domain *irq_get_default_host(void);
-extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
- irq_hw_number_t hwirq, int node,
- const struct irq_affinity_desc *affinity);
+struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
+ enum irq_domain_bus_token bus_token);
+void irq_set_default_host(struct irq_domain *host);
+struct irq_domain *irq_get_default_host(void);
+int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
+ irq_hw_number_t hwirq, int node,
+ const struct irq_affinity_desc *affinity);
static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node)
{
@@ -370,8 +372,8 @@ static inline bool is_fwnode_irqchip(const struct fwnode_handle *fwnode)
return fwnode && fwnode->ops == &irqchip_fwnode_ops;
}
-extern void irq_domain_update_bus_token(struct irq_domain *domain,
- enum irq_domain_bus_token bus_token);
+void irq_domain_update_bus_token(struct irq_domain *domain,
+ enum irq_domain_bus_token bus_token);
static inline
struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode,
@@ -454,7 +456,7 @@ static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_nod
return IS_ERR(d) ? NULL : d;
}
-extern unsigned int irq_create_direct_mapping(struct irq_domain *host);
+unsigned int irq_create_direct_mapping(struct irq_domain *host);
#endif
static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node,
@@ -507,19 +509,19 @@ static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fw
return IS_ERR(d) ? NULL : d;
}
-extern void irq_domain_remove(struct irq_domain *host);
+void irq_domain_remove(struct irq_domain *host);
-extern int irq_domain_associate(struct irq_domain *domain, unsigned int irq,
- irq_hw_number_t hwirq);
-extern void irq_domain_associate_many(struct irq_domain *domain,
- unsigned int irq_base,
- irq_hw_number_t hwirq_base, int count);
+int irq_domain_associate(struct irq_domain *domain, unsigned int irq,
+ irq_hw_number_t hwirq);
+void irq_domain_associate_many(struct irq_domain *domain,
+ unsigned int irq_base,
+ irq_hw_number_t hwirq_base, int count);
-extern unsigned int irq_create_mapping_affinity(struct irq_domain *host,
- irq_hw_number_t hwirq,
- const struct irq_affinity_desc *affinity);
-extern unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec);
-extern void irq_dispose_mapping(unsigned int virq);
+unsigned int irq_create_mapping_affinity(struct irq_domain *host,
+ irq_hw_number_t hwirq,
+ const struct irq_affinity_desc *affinity);
+unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec);
+void irq_dispose_mapping(unsigned int virq);
static inline unsigned int irq_create_mapping(struct irq_domain *host,
irq_hw_number_t hwirq)
@@ -527,9 +529,9 @@ static inline unsigned int irq_create_mapping(struct irq_domain *host,
return irq_create_mapping_affinity(host, hwirq, NULL);
}
-extern struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain,
- irq_hw_number_t hwirq,
- unsigned int *irq);
+struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain,
+ irq_hw_number_t hwirq,
+ unsigned int *irq);
static inline struct irq_desc *irq_resolve_mapping(struct irq_domain *domain,
irq_hw_number_t hwirq)
@@ -587,19 +589,21 @@ int irq_reserve_ipi(struct irq_domain *domain, const struct cpumask *dest);
int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest);
/* V2 interfaces to support hierarchy IRQ domains. */
-extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
- unsigned int virq);
-extern void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
- irq_hw_number_t hwirq,
- const struct irq_chip *chip,
- void *chip_data, irq_flow_handler_t handler,
- void *handler_data, const char *handler_name);
-extern void irq_domain_reset_irq_data(struct irq_data *irq_data);
+struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
+ unsigned int virq);
+void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
+ irq_hw_number_t hwirq,
+ const struct irq_chip *chip,
+ void *chip_data, irq_flow_handler_t handler,
+ void *handler_data, const char *handler_name);
+void irq_domain_reset_irq_data(struct irq_data *irq_data);
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
-extern struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
- unsigned int flags, unsigned int size,
- struct fwnode_handle *fwnode,
- const struct irq_domain_ops *ops, void *host_data);
+struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
+ unsigned int flags,
+ unsigned int size,
+ struct fwnode_handle *fwnode,
+ const struct irq_domain_ops *ops,
+ void *host_data);
static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *parent,
unsigned int flags,
@@ -613,13 +617,13 @@ static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *par
ops, host_data);
}
-extern int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
- unsigned int nr_irqs, int node, void *arg,
- bool realloc,
- const struct irq_affinity_desc *affinity);
-extern void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs);
-extern int irq_domain_activate_irq(struct irq_data *irq_data, bool early);
-extern void irq_domain_deactivate_irq(struct irq_data *irq_data);
+int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
+ unsigned int nr_irqs, int node, void *arg,
+ bool realloc,
+ const struct irq_affinity_desc *affinity);
+void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs);
+int irq_domain_activate_irq(struct irq_data *irq_data, bool early);
+void irq_domain_deactivate_irq(struct irq_data *irq_data);
static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
unsigned int nr_irqs, int node, void *arg)
@@ -628,32 +632,29 @@ static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
NULL);
}
-extern int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
- unsigned int irq_base,
- unsigned int nr_irqs, void *arg);
-extern int irq_domain_set_hwirq_and_chip(struct irq_domain *domain,
- unsigned int virq,
- irq_hw_number_t hwirq,
- const struct irq_chip *chip,
- void *chip_data);
-extern void irq_domain_free_irqs_common(struct irq_domain *domain,
- unsigned int virq,
- unsigned int nr_irqs);
-extern void irq_domain_free_irqs_top(struct irq_domain *domain,
- unsigned int virq, unsigned int nr_irqs);
-
-extern int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg);
-extern int irq_domain_pop_irq(struct irq_domain *domain, int virq);
-
-extern int irq_domain_alloc_irqs_parent(struct irq_domain *domain,
- unsigned int irq_base,
- unsigned int nr_irqs, void *arg);
-
-extern void irq_domain_free_irqs_parent(struct irq_domain *domain,
- unsigned int irq_base,
- unsigned int nr_irqs);
-
-extern int irq_domain_disconnect_hierarchy(struct irq_domain *domain,
+int irq_domain_set_hwirq_and_chip(struct irq_domain *domain,
+ unsigned int virq,
+ irq_hw_number_t hwirq,
+ const struct irq_chip *chip,
+ void *chip_data);
+void irq_domain_free_irqs_common(struct irq_domain *domain,
+ unsigned int virq,
+ unsigned int nr_irqs);
+void irq_domain_free_irqs_top(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs);
+
+int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg);
+int irq_domain_pop_irq(struct irq_domain *domain, int virq);
+
+int irq_domain_alloc_irqs_parent(struct irq_domain *domain,
+ unsigned int irq_base,
+ unsigned int nr_irqs, void *arg);
+
+void irq_domain_free_irqs_parent(struct irq_domain *domain,
+ unsigned int irq_base,
+ unsigned int nr_irqs);
+
+int irq_domain_disconnect_hierarchy(struct irq_domain *domain,
unsigned int virq);
static inline bool irq_domain_is_hierarchy(struct irq_domain *domain)
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index f0e9f8eda7a3..c840431eadda 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -68,8 +68,6 @@ extern note_buf_t __percpu *crash_notes;
#define KEXEC_CRASH_MEM_ALIGN PAGE_SIZE
#endif
-#define KEXEC_CORE_NOTE_NAME CRASH_CORE_NOTE_NAME
-
/*
* This structure is used to hold the arguments that are used when loading
* kernel binaries.
diff --git a/include/linux/key.h b/include/linux/key.h
index 074dca3222b9..ba05de8579ec 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -236,6 +236,7 @@ struct key {
#define KEY_FLAG_ROOT_CAN_INVAL 7 /* set if key can be invalidated by root without permission */
#define KEY_FLAG_KEEP 8 /* set if key should not be removed */
#define KEY_FLAG_UID_KEYRING 9 /* set if key is a user or user session keyring */
+#define KEY_FLAG_FINAL_PUT 10 /* set if final put has happened on key */
/* the key type and key description string
* - the desc is used to match a key against search criteria
diff --git a/include/linux/kstrtox.h b/include/linux/kstrtox.h
index 7fcf29a4e0de..6ea897222af1 100644
--- a/include/linux/kstrtox.h
+++ b/include/linux/kstrtox.h
@@ -143,6 +143,7 @@ static inline int __must_check kstrtos32_from_user(const char __user *s, size_t
*/
extern unsigned long simple_strtoul(const char *,char **,unsigned int);
+extern unsigned long simple_strntoul(const char *,char **,unsigned int,size_t);
extern long simple_strtol(const char *,char **,unsigned int);
extern unsigned long long simple_strtoull(const char *,char **,unsigned int);
extern long long simple_strtoll(const char *,char **,unsigned int);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f34f4cfaa513..5438a1b446a6 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -267,6 +267,7 @@ struct kvm_gfn_range {
union kvm_mmu_notifier_arg arg;
enum kvm_gfn_range_filter attr_filter;
bool may_block;
+ bool lockless;
};
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
@@ -1746,7 +1747,6 @@ static inline void kvm_unregister_perf_callbacks(void) {}
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
void kvm_arch_destroy_vm(struct kvm *kvm);
-void kvm_arch_sync_events(struct kvm *kvm);
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index e13d2f947b51..7283bc4cf413 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -5,7 +5,7 @@
*
* Author : Etienne BASSET <etienne.basset@ensta.org>
*
- * All credits to : Stephen Smalley, <sds@tycho.nsa.gov>
+ * All credits to : Stephen Smalley
* All BUGS to : Etienne BASSET <etienne.basset@ensta.org>
*/
#ifndef _LSM_COMMON_LOGGING_
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index e2f1ce37c41e..2bf909fa3394 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -445,7 +445,7 @@ LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap)
LSM_HOOK(int, 0, locked_down, enum lockdown_reason what)
#ifdef CONFIG_PERF_EVENTS
-LSM_HOOK(int, 0, perf_event_open, struct perf_event_attr *attr, int type)
+LSM_HOOK(int, 0, perf_event_open, int type)
LSM_HOOK(int, 0, perf_event_alloc, struct perf_event *event)
LSM_HOOK(int, 0, perf_event_read, struct perf_event *event)
LSM_HOOK(int, 0, perf_event_write, struct perf_event *event)
@@ -455,6 +455,7 @@ LSM_HOOK(int, 0, perf_event_write, struct perf_event *event)
LSM_HOOK(int, 0, uring_override_creds, const struct cred *new)
LSM_HOOK(int, 0, uring_sqpoll, void)
LSM_HOOK(int, 0, uring_cmd, struct io_uring_cmd *ioucmd)
+LSM_HOOK(int, 0, uring_allowed, void)
#endif /* CONFIG_IO_URING */
LSM_HOOK(void, LSM_RET_VOID, initramfs_populated, void)
diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h
index ae4526389261..07584c5e36fb 100644
--- a/include/linux/mem_encrypt.h
+++ b/include/linux/mem_encrypt.h
@@ -26,11 +26,34 @@
*/
#define __sme_set(x) ((x) | sme_me_mask)
#define __sme_clr(x) ((x) & ~sme_me_mask)
+
+#define dma_addr_encrypted(x) __sme_set(x)
+#define dma_addr_canonical(x) __sme_clr(x)
+
#else
#define __sme_set(x) (x)
#define __sme_clr(x) (x)
#endif
+/*
+ * dma_addr_encrypted() and dma_addr_unencrypted() are for converting a given DMA
+ * address to the respective type of addressing.
+ *
+ * dma_addr_canonical() is used to reverse any conversions for encrypted/decrypted
+ * back to the canonical address.
+ */
+#ifndef dma_addr_encrypted
+#define dma_addr_encrypted(x) (x)
+#endif
+
+#ifndef dma_addr_unencrypted
+#define dma_addr_unencrypted(x) (x)
+#endif
+
+#ifndef dma_addr_canonical
+#define dma_addr_canonical(x) (x)
+#endif
+
#endif /* __ASSEMBLY__ */
#endif /* __MEM_ENCRYPT_H__ */
diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h
index 49eef10c8e59..4bf261d41a6d 100644
--- a/include/linux/misc_cgroup.h
+++ b/include/linux/misc_cgroup.h
@@ -60,7 +60,6 @@ struct misc_cg {
struct misc_res res[MISC_CG_RES_TYPES];
};
-u64 misc_cg_res_total_usage(enum misc_res_type type);
int misc_cg_set_capacity(enum misc_res_type type, u64 capacity);
int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg, u64 amount);
void misc_cg_uncharge(enum misc_res_type type, struct misc_cg *cg, u64 amount);
@@ -104,11 +103,6 @@ static inline void put_misc_cg(struct misc_cg *cg)
#else /* !CONFIG_CGROUP_MISC */
-static inline u64 misc_cg_res_total_usage(enum misc_res_type type)
-{
- return 0;
-}
-
static inline int misc_cg_set_capacity(enum misc_res_type type, u64 capacity)
{
return 0;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1f80baddacc5..2edb8d14d165 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2555,7 +2555,7 @@ int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
struct task_struct *task, bool bypass_rlim);
struct kvec;
-struct page *get_dump_page(unsigned long addr);
+struct page *get_dump_page(unsigned long addr, int *locked);
bool folio_mark_dirty(struct folio *folio);
bool folio_mark_dirty_lock(struct folio *folio);
diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h
index b1b219bc3422..e71a6070a8f8 100644
--- a/include/linux/mnt_idmapping.h
+++ b/include/linux/mnt_idmapping.h
@@ -25,6 +25,11 @@ static_assert(sizeof(vfsgid_t) == sizeof(kgid_t));
static_assert(offsetof(vfsuid_t, val) == offsetof(kuid_t, val));
static_assert(offsetof(vfsgid_t, val) == offsetof(kgid_t, val));
+static inline bool is_valid_mnt_idmap(const struct mnt_idmap *idmap)
+{
+ return idmap != &nop_mnt_idmap && idmap != &invalid_mnt_idmap;
+}
+
#ifdef CONFIG_MULTIUSER
static inline uid_t __vfsuid_val(vfsuid_t uid)
{
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index d67614f7b7f1..bd7e60c0b72f 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -692,6 +692,7 @@ struct x86_cpu_id {
__u16 feature; /* bit index */
/* Solely for kernel-internal use: DO NOT EXPORT to userspace! */
__u16 flags;
+ __u8 type;
kernel_ulong_t driver_data;
};
@@ -703,6 +704,7 @@ struct x86_cpu_id {
#define X86_STEP_MIN 0
#define X86_STEP_MAX 0xf
#define X86_FEATURE_ANY 0 /* Same as FPU, you can't test for that */
+#define X86_CPU_TYPE_ANY 0
/*
* Generic table type for matching CPU features.
diff --git a/include/linux/module.h b/include/linux/module.h
index 30e5b19bafa9..9937e71a3b5b 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -370,7 +370,6 @@ enum mod_mem_type {
struct module_memory {
void *base;
- void *rw_copy;
bool is_rox;
unsigned int size;
@@ -772,16 +771,6 @@ static inline bool is_livepatch_module(struct module *mod)
void set_module_sig_enforced(void);
-void *__module_writable_address(struct module *mod, void *loc);
-
-static inline void *module_writable_address(struct module *mod, void *loc)
-{
- if (!IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) || !mod ||
- mod->state != MODULE_STATE_UNFORMED)
- return loc;
- return __module_writable_address(mod, loc);
-}
-
#else /* !CONFIG_MODULES... */
static inline struct module *__module_address(unsigned long addr)
@@ -889,11 +878,6 @@ static inline bool module_is_coming(struct module *mod)
{
return false;
}
-
-static inline void *module_writable_address(struct module *mod, void *loc)
-{
- return loc;
-}
#endif /* CONFIG_MODULES */
#ifdef CONFIG_SYSFS
diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h
index 1f5507ba5a12..e395461d59e5 100644
--- a/include/linux/moduleloader.h
+++ b/include/linux/moduleloader.h
@@ -108,10 +108,6 @@ int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *mod);
-int module_post_finalize(const Elf_Ehdr *hdr,
- const Elf_Shdr *sechdrs,
- struct module *mod);
-
#ifdef CONFIG_MODULES
void flush_module_init_free_work(void);
#else
diff --git a/include/linux/msi.h b/include/linux/msi.h
index b10093c4d00e..6d7d1947f928 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -73,7 +73,6 @@ struct msi_msg {
};
};
-extern int pci_msi_ignore_mask;
/* Helper functions */
struct msi_desc;
struct pci_dev;
@@ -81,7 +80,6 @@ struct device_attribute;
struct irq_domain;
struct irq_affinity_desc;
-void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
#ifdef CONFIG_GENERIC_MSI_IRQ
void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg);
#else
@@ -225,8 +223,11 @@ struct msi_dev_domain {
int msi_setup_device_data(struct device *dev);
-void msi_lock_descs(struct device *dev);
-void msi_unlock_descs(struct device *dev);
+void __msi_lock_descs(struct device *dev);
+void __msi_unlock_descs(struct device *dev);
+
+DEFINE_LOCK_GUARD_1(msi_descs_lock, struct device, __msi_lock_descs(_T->lock),
+ __msi_unlock_descs(_T->lock));
struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid,
enum msi_desc_filter filter);
@@ -556,6 +557,16 @@ enum {
MSI_FLAG_PCI_MSIX_ALLOC_DYN = (1 << 20),
/* PCI MSIs cannot be steered separately to CPU cores */
MSI_FLAG_NO_AFFINITY = (1 << 21),
+ /* Inhibit usage of entry masking */
+ MSI_FLAG_NO_MASK = (1 << 22),
+};
+
+/*
+ * Flags for msi_parent_ops::chip_flags
+ */
+enum {
+ MSI_CHIP_FLAG_SET_EOI = (1 << 0),
+ MSI_CHIP_FLAG_SET_ACK = (1 << 1),
};
/**
@@ -563,6 +574,8 @@ enum {
*
* @supported_flags: Required: The supported MSI flags of the parent domain
* @required_flags: Optional: The required MSI flags of the parent MSI domain
+ * @chip_flags: Optional: Select MSI chip callbacks to update with defaults
+ * in msi_lib_init_dev_msi_info().
* @bus_select_token: Optional: The bus token of the real parent domain for
* irq_domain::select()
* @bus_select_mask: Optional: A mask of supported BUS_DOMAINs for
@@ -575,6 +588,7 @@ enum {
struct msi_parent_ops {
u32 supported_flags;
u32 required_flags;
+ u32 chip_flags;
u32 bus_select_token;
u32 bus_select_mask;
const char *prefix;
@@ -603,8 +617,6 @@ void msi_remove_device_irq_domain(struct device *dev, unsigned int domid);
bool msi_match_device_irq_domain(struct device *dev, unsigned int domid,
enum irq_domain_bus_token bus_token);
-int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid,
- unsigned int first, unsigned int last);
int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid,
unsigned int first, unsigned int last);
int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int nirqs);
@@ -613,8 +625,6 @@ struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, u
const struct irq_affinity_desc *affdesc,
union msi_instance_cookie *cookie);
-void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid,
- unsigned int first, unsigned int last);
void msi_domain_free_irqs_range(struct device *dev, unsigned int domid,
unsigned int first, unsigned int last);
void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 8ec8fed3bce8..e3042176cdf4 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -18,35 +18,36 @@ enum { MAX_NESTED_LINKS = 8 };
enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT};
/* pathwalk mode */
-#define LOOKUP_FOLLOW 0x0001 /* follow links at the end */
-#define LOOKUP_DIRECTORY 0x0002 /* require a directory */
-#define LOOKUP_AUTOMOUNT 0x0004 /* force terminal automount */
-#define LOOKUP_EMPTY 0x4000 /* accept empty path [user_... only] */
-#define LOOKUP_DOWN 0x8000 /* follow mounts in the starting point */
-#define LOOKUP_MOUNTPOINT 0x0080 /* follow mounts in the end */
-
-#define LOOKUP_REVAL 0x0020 /* tell ->d_revalidate() to trust no cache */
-#define LOOKUP_RCU 0x0040 /* RCU pathwalk mode; semi-internal */
+#define LOOKUP_FOLLOW BIT(0) /* follow links at the end */
+#define LOOKUP_DIRECTORY BIT(1) /* require a directory */
+#define LOOKUP_AUTOMOUNT BIT(2) /* force terminal automount */
+#define LOOKUP_EMPTY BIT(3) /* accept empty path [user_... only] */
+#define LOOKUP_LINKAT_EMPTY BIT(4) /* Linkat request with empty path. */
+#define LOOKUP_DOWN BIT(5) /* follow mounts in the starting point */
+#define LOOKUP_MOUNTPOINT BIT(6) /* follow mounts in the end */
+#define LOOKUP_REVAL BIT(7) /* tell ->d_revalidate() to trust no cache */
+#define LOOKUP_RCU BIT(8) /* RCU pathwalk mode; semi-internal */
+#define LOOKUP_CACHED BIT(9) /* Only do cached lookup */
+#define LOOKUP_PARENT BIT(10) /* Looking up final parent in path */
+/* 5 spare bits for pathwalk */
/* These tell filesystem methods that we are dealing with the final component... */
-#define LOOKUP_OPEN 0x0100 /* ... in open */
-#define LOOKUP_CREATE 0x0200 /* ... in object creation */
-#define LOOKUP_EXCL 0x0400 /* ... in exclusive creation */
-#define LOOKUP_RENAME_TARGET 0x0800 /* ... in destination of rename() */
+#define LOOKUP_OPEN BIT(16) /* ... in open */
+#define LOOKUP_CREATE BIT(17) /* ... in object creation */
+#define LOOKUP_EXCL BIT(18) /* ... in target must not exist */
+#define LOOKUP_RENAME_TARGET BIT(19) /* ... in destination of rename() */
-/* internal use only */
-#define LOOKUP_PARENT 0x0010
+/* 4 spare bits for intent */
/* Scoping flags for lookup. */
-#define LOOKUP_NO_SYMLINKS 0x010000 /* No symlink crossing. */
-#define LOOKUP_NO_MAGICLINKS 0x020000 /* No nd_jump_link() crossing. */
-#define LOOKUP_NO_XDEV 0x040000 /* No mountpoint crossing. */
-#define LOOKUP_BENEATH 0x080000 /* No escaping from starting point. */
-#define LOOKUP_IN_ROOT 0x100000 /* Treat dirfd as fs root. */
-#define LOOKUP_CACHED 0x200000 /* Only do cached lookup */
-#define LOOKUP_LINKAT_EMPTY 0x400000 /* Linkat request with empty path. */
+#define LOOKUP_NO_SYMLINKS BIT(24) /* No symlink crossing. */
+#define LOOKUP_NO_MAGICLINKS BIT(25) /* No nd_jump_link() crossing. */
+#define LOOKUP_NO_XDEV BIT(26) /* No mountpoint crossing. */
+#define LOOKUP_BENEATH BIT(27) /* No escaping from starting point. */
+#define LOOKUP_IN_ROOT BIT(28) /* Treat dirfd as fs root. */
/* LOOKUP_* flags which do scope-related checks based on the dirfd. */
#define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT)
+/* 3 spare bits for scoping */
extern int path_pts(struct path *path);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9155a6ffc370..d66c61cbbd1d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1802,7 +1802,7 @@ struct nfs_rpc_ops {
int (*link) (struct inode *, struct inode *, const struct qstr *);
int (*symlink) (struct inode *, struct dentry *, struct folio *,
unsigned int, struct iattr *);
- int (*mkdir) (struct inode *, struct dentry *, struct iattr *);
+ struct dentry *(*mkdir) (struct inode *, struct dentry *, struct iattr *);
int (*rmdir) (struct inode *, const struct qstr *);
int (*readdir) (struct nfs_readdir_arg *, struct nfs_readdir_res *);
int (*mknod) (struct inode *, struct dentry *, struct iattr *,
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index a8dfb38c9bb6..e78fa535f61d 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -17,7 +17,6 @@
void lockup_detector_init(void);
void lockup_detector_retry_init(void);
void lockup_detector_soft_poweroff(void);
-void lockup_detector_cleanup(void);
extern int watchdog_user_enabled;
extern int watchdog_thresh;
@@ -37,7 +36,6 @@ extern int sysctl_hardlockup_all_cpu_backtrace;
static inline void lockup_detector_init(void) { }
static inline void lockup_detector_retry_init(void) { }
static inline void lockup_detector_soft_poweroff(void) { }
-static inline void lockup_detector_cleanup(void) { }
#endif /* !CONFIG_LOCKUP_DETECTOR */
#ifdef CONFIG_SOFTLOCKUP_DETECTOR
@@ -104,12 +102,10 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs);
#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
extern void hardlockup_detector_perf_stop(void);
extern void hardlockup_detector_perf_restart(void);
-extern void hardlockup_detector_perf_cleanup(void);
extern void hardlockup_config_perf_event(const char *str);
#else
static inline void hardlockup_detector_perf_stop(void) { }
static inline void hardlockup_detector_perf_restart(void) { }
-static inline void hardlockup_detector_perf_cleanup(void) { }
static inline void hardlockup_config_perf_event(const char *str) { }
#endif
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 9fd7a0ce9c1a..f0ac0633366b 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -94,7 +94,6 @@
#include <linux/bitmap.h>
#include <linux/minmax.h>
#include <linux/nodemask_types.h>
-#include <linux/numa.h>
#include <linux/random.h>
extern nodemask_t _unused_nodemask_arg_;
@@ -191,6 +190,13 @@ static __always_inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *s
bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
}
+#define nodes_copy(dst, src) __nodes_copy(&(dst), &(src), MAX_NUMNODES)
+static __always_inline void __nodes_copy(nodemask_t *dstp,
+ const nodemask_t *srcp, unsigned int nbits)
+{
+ bitmap_copy(dstp->bits, srcp->bits, nbits);
+}
+
#define nodes_complement(dst, src) \
__nodes_complement(&(dst), &(src), MAX_NUMNODES)
static __always_inline void __nodes_complement(nodemask_t *dstp,
diff --git a/include/linux/nodemask_types.h b/include/linux/nodemask_types.h
index 6b28d97ea6ed..f850a48742f1 100644
--- a/include/linux/nodemask_types.h
+++ b/include/linux/nodemask_types.h
@@ -3,7 +3,16 @@
#define __LINUX_NODEMASK_TYPES_H
#include <linux/bitops.h>
-#include <linux/numa.h>
+
+#ifdef CONFIG_NODES_SHIFT
+#define NODES_SHIFT CONFIG_NODES_SHIFT
+#else
+#define NODES_SHIFT 0
+#endif
+
+#define MAX_NUMNODES (1 << NODES_SHIFT)
+
+#define NUMA_NO_NODE (-1)
typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t;
diff --git a/include/linux/numa.h b/include/linux/numa.h
index 3567e40329eb..e6baaf6051bc 100644
--- a/include/linux/numa.h
+++ b/include/linux/numa.h
@@ -3,16 +3,8 @@
#define _LINUX_NUMA_H
#include <linux/init.h>
#include <linux/types.h>
+#include <linux/nodemask.h>
-#ifdef CONFIG_NODES_SHIFT
-#define NODES_SHIFT CONFIG_NODES_SHIFT
-#else
-#define NODES_SHIFT 0
-#endif
-
-#define MAX_NUMNODES (1 << NODES_SHIFT)
-
-#define NUMA_NO_NODE (-1)
#define NUMA_NO_MEMBLK (-1)
static inline bool numa_valid_node(int nid)
@@ -39,6 +31,8 @@ void __init alloc_offline_node_data(int nid);
/* Generic implementation available */
int numa_nearest_node(int node, unsigned int state);
+int nearest_node_nodemask(int node, nodemask_t *mask);
+
#ifndef memory_add_physaddr_to_nid
int memory_add_physaddr_to_nid(u64 start);
#endif
@@ -55,6 +49,11 @@ static inline int numa_nearest_node(int node, unsigned int state)
return NUMA_NO_NODE;
}
+static inline int nearest_node_nodemask(int node, nodemask_t *mask)
+{
+ return NUMA_NO_NODE;
+}
+
static inline int memory_add_physaddr_to_nid(u64 start)
{
return 0;
diff --git a/include/linux/objpool.h b/include/linux/objpool.h
index cb1758eaa2d3..b713a1fe7521 100644
--- a/include/linux/objpool.h
+++ b/include/linux/objpool.h
@@ -170,17 +170,16 @@ static inline void *objpool_pop(struct objpool_head *pool)
{
void *obj = NULL;
unsigned long flags;
- int i, cpu;
+ int start, cpu;
/* disable local irq to avoid preemption & interruption */
raw_local_irq_save(flags);
- cpu = raw_smp_processor_id();
- for (i = 0; i < pool->nr_possible_cpus; i++) {
+ start = raw_smp_processor_id();
+ for_each_possible_cpu_wrap(cpu, start) {
obj = __objpool_try_get_slot(pool, cpu);
if (obj)
break;
- cpu = cpumask_next_wrap(cpu, cpu_possible_mask, -1, 1);
}
raw_local_irq_restore(flags);
diff --git a/include/linux/objtool.h b/include/linux/objtool.h
index c722a921165b..3ca965a2ddc8 100644
--- a/include/linux/objtool.h
+++ b/include/linux/objtool.h
@@ -128,7 +128,7 @@
#define UNWIND_HINT(type, sp_reg, sp_offset, signal) "\n\t"
#define STACK_FRAME_NON_STANDARD(func)
#define STACK_FRAME_NON_STANDARD_FP(func)
-#define __ASM_ANNOTATE(label, type)
+#define __ASM_ANNOTATE(label, type) ""
#define ASM_ANNOTATE(type)
#else
.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0
@@ -147,6 +147,8 @@
* these relocations will never be used for indirect calls.
*/
#define ANNOTATE_NOENDBR ASM_ANNOTATE(ANNOTYPE_NOENDBR)
+#define ANNOTATE_NOENDBR_SYM(sym) asm(__ASM_ANNOTATE(sym, ANNOTYPE_NOENDBR))
+
/*
* This should be used immediately before an indirect jump/call. It tells
* objtool the subsequent indirect jump/call is vouched safe for retpoline
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 36d283552f80..df9234e5f478 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -925,14 +925,15 @@ FOLIO_FLAG_FALSE(has_hwpoisoned)
enum pagetype {
/* 0x00-0x7f are positive numbers, ie mapcount */
/* Reserve 0x80-0xef for mapcount overflow. */
- PGTY_buddy = 0xf0,
- PGTY_offline = 0xf1,
- PGTY_table = 0xf2,
- PGTY_guard = 0xf3,
- PGTY_hugetlb = 0xf4,
- PGTY_slab = 0xf5,
- PGTY_zsmalloc = 0xf6,
- PGTY_unaccepted = 0xf7,
+ PGTY_buddy = 0xf0,
+ PGTY_offline = 0xf1,
+ PGTY_table = 0xf2,
+ PGTY_guard = 0xf3,
+ PGTY_hugetlb = 0xf4,
+ PGTY_slab = 0xf5,
+ PGTY_zsmalloc = 0xf6,
+ PGTY_unaccepted = 0xf7,
+ PGTY_large_kmalloc = 0xf8,
PGTY_mapcount_underflow = 0xff
};
@@ -1075,6 +1076,7 @@ PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc)
* Serialized with zone lock.
*/
PAGE_TYPE_OPS(Unaccepted, unaccepted, unaccepted)
+FOLIO_TYPE_OPS(large_kmalloc, large_kmalloc)
/**
* PageHuge - Determine if the page belongs to hugetlbfs
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 47bfc6b1b632..d2432cb02fa0 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1044,21 +1044,23 @@ static inline pgoff_t page_pgoff(const struct folio *folio,
return folio->index + folio_page_idx(folio, page);
}
-/*
- * Return byte-offset into filesystem object for page.
+/**
+ * folio_pos - Returns the byte position of this folio in its file.
+ * @folio: The folio.
*/
-static inline loff_t page_offset(struct page *page)
+static inline loff_t folio_pos(const struct folio *folio)
{
- return ((loff_t)page->index) << PAGE_SHIFT;
+ return ((loff_t)folio->index) * PAGE_SIZE;
}
-/**
- * folio_pos - Returns the byte position of this folio in its file.
- * @folio: The folio.
+/*
+ * Return byte-offset into filesystem object for page.
*/
-static inline loff_t folio_pos(struct folio *folio)
+static inline loff_t page_offset(struct page *page)
{
- return page_offset(&folio->page);
+ struct folio *folio = page_folio(page);
+
+ return folio_pos(folio) + folio_page_idx(folio, page) * PAGE_SIZE;
}
/*
@@ -1603,34 +1605,6 @@ static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio,
}
/**
- * page_mkwrite_check_truncate - check if page was truncated
- * @page: the page to check
- * @inode: the inode to check the page against
- *
- * Returns the number of bytes in the page up to EOF,
- * or -EFAULT if the page was truncated.
- */
-static inline int page_mkwrite_check_truncate(struct page *page,
- struct inode *inode)
-{
- loff_t size = i_size_read(inode);
- pgoff_t index = size >> PAGE_SHIFT;
- int offset = offset_in_page(size);
-
- if (page->mapping != inode->i_mapping)
- return -EFAULT;
-
- /* page is wholly inside EOF */
- if (page->index < index)
- return PAGE_SIZE;
- /* page is wholly past EOF */
- if (page->index > index || !offset)
- return -EFAULT;
- /* page is partially inside EOF */
- return offset;
-}
-
-/**
* i_blocks_per_folio - How many blocks fit in this folio.
* @inode: The inode which contains the blocks.
* @folio: The folio.
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 5b520fe86b60..0fcacb909778 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -26,13 +26,11 @@
#define PER_CPU_SHARED_ALIGNED_SECTION "..shared_aligned"
#define PER_CPU_ALIGNED_SECTION "..shared_aligned"
#endif
-#define PER_CPU_FIRST_SECTION "..first"
#else
#define PER_CPU_SHARED_ALIGNED_SECTION ""
#define PER_CPU_ALIGNED_SECTION "..shared_aligned"
-#define PER_CPU_FIRST_SECTION ""
#endif
@@ -115,14 +113,17 @@
DEFINE_PER_CPU_SECTION(type, name, "")
/*
- * Declaration/definition used for per-CPU variables that must come first in
- * the set of variables.
+ * Declaration/definition used for per-CPU variables that are frequently
+ * accessed and should be in a single cacheline.
+ *
+ * For use only by architecture and core code. Only use scalar or pointer
+ * types to maximize density.
*/
-#define DECLARE_PER_CPU_FIRST(type, name) \
- DECLARE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION)
+#define DECLARE_PER_CPU_CACHE_HOT(type, name) \
+ DECLARE_PER_CPU_SECTION(type, name, "..hot.." #name)
-#define DEFINE_PER_CPU_FIRST(type, name) \
- DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION)
+#define DEFINE_PER_CPU_CACHE_HOT(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, "..hot.." #name)
/*
* Declaration/definition used for per-CPU variables that must be cacheline
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index c012df33a9f0..af7d75ede619 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -8,6 +8,7 @@
#include <linux/wait.h>
#include <linux/rcu_sync.h>
#include <linux/lockdep.h>
+#include <linux/cleanup.h>
struct percpu_rw_semaphore {
struct rcu_sync rss;
@@ -125,6 +126,13 @@ extern bool percpu_is_read_locked(struct percpu_rw_semaphore *);
extern void percpu_down_write(struct percpu_rw_semaphore *);
extern void percpu_up_write(struct percpu_rw_semaphore *);
+DEFINE_GUARD(percpu_read, struct percpu_rw_semaphore *,
+ percpu_down_read(_T), percpu_up_read(_T))
+DEFINE_GUARD_COND(percpu_read, _try, percpu_down_read_trylock(_T))
+
+DEFINE_GUARD(percpu_write, struct percpu_rw_semaphore *,
+ percpu_down_write(_T), percpu_up_write(_T))
+
static inline bool percpu_is_write_locked(struct percpu_rw_semaphore *sem)
{
return atomic_read(&sem->block);
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 4b5b83677e3f..6dc5e0cd76ca 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -84,7 +84,6 @@ struct arm_pmu {
struct pmu pmu;
cpumask_t supported_cpus;
char *name;
- int pmuver;
irqreturn_t (*handle_irq)(struct arm_pmu *pmu);
void (*enable)(struct perf_event *event);
void (*disable)(struct perf_event *event);
@@ -100,20 +99,26 @@ struct arm_pmu {
void (*stop)(struct arm_pmu *);
void (*reset)(void *);
int (*map_event)(struct perf_event *event);
+ /*
+ * Called by KVM to map the PMUv3 event space onto non-PMUv3 hardware.
+ */
+ int (*map_pmuv3_event)(unsigned int eventsel);
DECLARE_BITMAP(cntr_mask, ARMPMU_MAX_HWEVENTS);
bool secure_access; /* 32-bit ARM only */
-#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
- DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
-#define ARMV8_PMUV3_EXT_COMMON_EVENT_BASE 0x4000
- DECLARE_BITMAP(pmceid_ext_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
struct platform_device *plat_device;
struct pmu_hw_events __percpu *hw_events;
struct hlist_node node;
struct notifier_block cpu_pm_nb;
/* the attr_groups array must be NULL-terminated */
const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1];
- /* store the PMMIR_EL1 to expose slots */
+
+ /* PMUv3 only */
+ int pmuver;
u64 reg_pmmir;
+#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
+ DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
+#define ARMV8_PMUV3_EXT_COMMON_EVENT_BASE 0x4000
+ DECLARE_BITMAP(pmceid_ext_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
/* Only to be used by ACPI probing code */
unsigned long acpi_cpuid;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8333f132f4a9..5a9bf15d4461 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -343,8 +343,7 @@ struct pmu {
*/
unsigned int scope;
- int __percpu *pmu_disable_count;
- struct perf_cpu_pmu_context __percpu *cpu_pmu_context;
+ struct perf_cpu_pmu_context * __percpu *cpu_pmu_context;
atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */
int task_ctx_nr;
int hrtimer_interval_ms;
@@ -495,7 +494,7 @@ struct pmu {
* context-switches callback
*/
void (*sched_task) (struct perf_event_pmu_context *pmu_ctx,
- bool sched_in);
+ struct task_struct *task, bool sched_in);
/*
* Kmem cache of PMU specific data
@@ -503,16 +502,6 @@ struct pmu {
struct kmem_cache *task_ctx_cache;
/*
- * PMU specific parts of task perf event context (i.e. ctx->task_ctx_data)
- * can be synchronized using this function. See Intel LBR callstack support
- * implementation and Perf core context switch handling callbacks for usage
- * examples.
- */
- void (*swap_task_ctx) (struct perf_event_pmu_context *prev_epc,
- struct perf_event_pmu_context *next_epc);
- /* optional */
-
- /*
* Set up pmu-private data structures for an AUX area
*/
void *(*setup_aux) (struct perf_event *event, void **pages,
@@ -673,13 +662,16 @@ struct swevent_hlist {
struct rcu_head rcu_head;
};
-#define PERF_ATTACH_CONTEXT 0x01
-#define PERF_ATTACH_GROUP 0x02
-#define PERF_ATTACH_TASK 0x04
-#define PERF_ATTACH_TASK_DATA 0x08
-#define PERF_ATTACH_ITRACE 0x10
-#define PERF_ATTACH_SCHED_CB 0x20
-#define PERF_ATTACH_CHILD 0x40
+#define PERF_ATTACH_CONTEXT 0x0001
+#define PERF_ATTACH_GROUP 0x0002
+#define PERF_ATTACH_TASK 0x0004
+#define PERF_ATTACH_TASK_DATA 0x0008
+#define PERF_ATTACH_GLOBAL_DATA 0x0010
+#define PERF_ATTACH_SCHED_CB 0x0020
+#define PERF_ATTACH_CHILD 0x0040
+#define PERF_ATTACH_EXCLUSIVE 0x0080
+#define PERF_ATTACH_CALLCHAIN 0x0100
+#define PERF_ATTACH_ITRACE 0x0200
struct bpf_prog;
struct perf_cgroup;
@@ -921,7 +913,7 @@ struct perf_event_pmu_context {
struct list_head pinned_active;
struct list_head flexible_active;
- /* Used to avoid freeing per-cpu perf_event_pmu_context */
+ /* Used to identify the per-cpu perf_event_pmu_context */
unsigned int embedded : 1;
unsigned int nr_events;
@@ -931,7 +923,6 @@ struct perf_event_pmu_context {
atomic_t refcount; /* event <-> epc */
struct rcu_head rcu_head;
- void *task_ctx_data; /* pmu specific data */
/*
* Set when one or more (plausibly active) event can't be scheduled
* due to pmu overcommit or pmu constraints, except tolerant to
@@ -979,7 +970,6 @@ struct perf_event_context {
int nr_user;
int is_active;
- int nr_task_data;
int nr_stat;
int nr_freq;
int rotate_disable;
@@ -1020,6 +1010,41 @@ struct perf_event_context {
local_t nr_no_switch_fast;
};
+/**
+ * struct perf_ctx_data - PMU specific data for a task
+ * @rcu_head: To avoid the race on free PMU specific data
+ * @refcount: To track users
+ * @global: To track system-wide users
+ * @ctx_cache: Kmem cache of PMU specific data
+ * @data: PMU specific data
+ *
+ * Currently, the struct is only used in Intel LBR call stack mode to
+ * save/restore the call stack of a task on context switches.
+ *
+ * The rcu_head is used to prevent the race on free the data.
+ * The data only be allocated when Intel LBR call stack mode is enabled.
+ * The data will be freed when the mode is disabled.
+ * The content of the data will only be accessed in context switch, which
+ * should be protected by rcu_read_lock().
+ *
+ * Because of the alignment requirement of Intel Arch LBR, the Kmem cache
+ * is used to allocate the PMU specific data. The ctx_cache is to track
+ * the Kmem cache.
+ *
+ * Careful: Struct perf_ctx_data is added as a pointer in struct task_struct.
+ * When system-wide Intel LBR call stack mode is enabled, a buffer with
+ * constant size will be allocated for each task.
+ * Also, system memory consumption can further grow when the size of
+ * struct perf_ctx_data enlarges.
+ */
+struct perf_ctx_data {
+ struct rcu_head rcu_head;
+ refcount_t refcount;
+ int global;
+ struct kmem_cache *ctx_cache;
+ void *data;
+};
+
struct perf_cpu_pmu_context {
struct perf_event_pmu_context epc;
struct perf_event_pmu_context *task_epc;
@@ -1029,6 +1054,7 @@ struct perf_cpu_pmu_context {
int active_oncpu;
int exclusive;
+ int pmu_disable_count;
raw_spinlock_t hrtimer_lock;
struct hrtimer hrtimer;
@@ -1062,7 +1088,13 @@ struct perf_output_handle {
struct perf_buffer *rb;
unsigned long wakeup;
unsigned long size;
- u64 aux_flags;
+ union {
+ u64 flags; /* perf_output*() */
+ u64 aux_flags; /* perf_aux_output*() */
+ struct {
+ u64 skip_read : 1;
+ };
+ };
union {
void *addr;
unsigned long head;
@@ -1339,6 +1371,9 @@ static inline void perf_sample_save_brstack(struct perf_sample_data *data,
if (branch_sample_hw_index(event))
size += sizeof(u64);
+
+ brs->nr = min_t(u16, event->attr.sample_max_stack, brs->nr);
+
size += brs->nr * sizeof(struct perf_branch_entry);
/*
@@ -1646,19 +1681,10 @@ static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64
}
extern int sysctl_perf_event_paranoid;
-extern int sysctl_perf_event_mlock;
extern int sysctl_perf_event_sample_rate;
-extern int sysctl_perf_cpu_time_max_percent;
extern void perf_sample_event_took(u64 sample_len_ns);
-int perf_event_max_sample_rate_handler(const struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos);
-int perf_cpu_time_max_percent_handler(const struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos);
-int perf_event_max_stack_handler(const struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos);
-
/* Access to perf_event_open(2) syscall. */
#define PERF_SECURITY_OPEN 0
@@ -1672,22 +1698,22 @@ static inline int perf_is_paranoid(void)
return sysctl_perf_event_paranoid > -1;
}
-int perf_allow_kernel(struct perf_event_attr *attr);
+int perf_allow_kernel(void);
-static inline int perf_allow_cpu(struct perf_event_attr *attr)
+static inline int perf_allow_cpu(void)
{
if (sysctl_perf_event_paranoid > 0 && !perfmon_capable())
return -EACCES;
- return security_perf_event_open(attr, PERF_SECURITY_CPU);
+ return security_perf_event_open(PERF_SECURITY_CPU);
}
-static inline int perf_allow_tracepoint(struct perf_event_attr *attr)
+static inline int perf_allow_tracepoint(void)
{
if (sysctl_perf_event_paranoid > -1 && !perfmon_capable())
return -EPERM;
- return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT);
+ return security_perf_event_open(PERF_SECURITY_TRACEPOINT);
}
extern int perf_exclude_event(struct perf_event *event, struct pt_regs *regs);
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 98837a1ff0f3..311ecebd7d56 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -101,9 +101,9 @@ extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type);
* these helpers must be called with the tasklist_lock write-held.
*/
extern void attach_pid(struct task_struct *task, enum pid_type);
-extern void detach_pid(struct task_struct *task, enum pid_type);
-extern void change_pid(struct task_struct *task, enum pid_type,
- struct pid *pid);
+void detach_pid(struct pid **pids, struct task_struct *task, enum pid_type);
+void change_pid(struct pid **pids, struct task_struct *task, enum pid_type,
+ struct pid *pid);
extern void exchange_tids(struct task_struct *task, struct task_struct *old);
extern void transfer_pid(struct task_struct *old, struct task_struct *new,
enum pid_type);
@@ -129,6 +129,7 @@ extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
extern struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
size_t set_tid_size);
extern void free_pid(struct pid *pid);
+void free_pids(struct pid **pids);
extern void disable_pid_allocation(struct pid_namespace *ns);
/*
diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h
index 7c830d0dec9a..05e6f8f4a026 100644
--- a/include/linux/pidfs.h
+++ b/include/linux/pidfs.h
@@ -6,6 +6,7 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags);
void __init pidfs_init(void);
void pidfs_add_pid(struct pid *pid);
void pidfs_remove_pid(struct pid *pid);
+void pidfs_exit(struct task_struct *tsk);
extern const struct dentry_operations pidfs_dentry_operations;
#endif /* _LINUX_PID_FS_H */
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index b698758000f8..9d42d473d201 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -108,7 +108,7 @@ struct pipe_inode_info {
#ifdef CONFIG_WATCH_QUEUE
bool note_loss;
#endif
- struct page *tmp_page;
+ struct page *tmp_page[2];
struct fasync_struct *fasync_readers;
struct fasync_struct *fasync_writers;
struct pipe_buffer *bufs;
diff --git a/include/linux/platform_profile.h b/include/linux/platform_profile.h
index 8c9df7dadd5d..a299225ab92e 100644
--- a/include/linux/platform_profile.h
+++ b/include/linux/platform_profile.h
@@ -50,7 +50,7 @@ struct platform_profile_ops {
struct device *platform_profile_register(struct device *dev, const char *name,
void *drvdata,
const struct platform_profile_ops *ops);
-int platform_profile_remove(struct device *dev);
+void platform_profile_remove(struct device *dev);
struct device *devm_platform_profile_register(struct device *dev, const char *name,
void *drvdata,
const struct platform_profile_ops *ops);
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 78855d794342..f0bd8fbae4f2 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -597,6 +597,7 @@ enum rpm_status {
RPM_RESUMING,
RPM_SUSPENDED,
RPM_SUSPENDING,
+ RPM_BLOCKED,
};
/*
@@ -678,9 +679,9 @@ struct dev_pm_info {
bool wakeup_path:1;
bool syscore:1;
bool no_pm_callbacks:1; /* Owned by the PM core */
- bool async_in_progress:1; /* Owned by the PM core */
+ bool work_in_progress:1; /* Owned by the PM core */
+ bool smart_suspend:1; /* Owned by the PM core */
bool must_resume:1; /* Owned by the PM core */
- bool set_active:1; /* Owned by the PM core */
bool may_skip_resume:1; /* Set by subsystems */
#else
bool should_wakeup:1;
@@ -838,10 +839,8 @@ extern int pm_generic_resume_early(struct device *dev);
extern int pm_generic_resume_noirq(struct device *dev);
extern int pm_generic_resume(struct device *dev);
extern int pm_generic_freeze_noirq(struct device *dev);
-extern int pm_generic_freeze_late(struct device *dev);
extern int pm_generic_freeze(struct device *dev);
extern int pm_generic_thaw_noirq(struct device *dev);
-extern int pm_generic_thaw_early(struct device *dev);
extern int pm_generic_thaw(struct device *dev);
extern int pm_generic_restore_noirq(struct device *dev);
extern int pm_generic_restore_early(struct device *dev);
@@ -883,10 +882,8 @@ static inline void dpm_for_each_dev(void *data, void (*fn)(struct device *, void
#define pm_generic_resume_noirq NULL
#define pm_generic_resume NULL
#define pm_generic_freeze_noirq NULL
-#define pm_generic_freeze_late NULL
#define pm_generic_freeze NULL
#define pm_generic_thaw_noirq NULL
-#define pm_generic_thaw_early NULL
#define pm_generic_thaw NULL
#define pm_generic_restore_noirq NULL
#define pm_generic_restore_early NULL
diff --git a/include/linux/pm_clock.h b/include/linux/pm_clock.h
index 68669ce18720..c3b46fa358d3 100644
--- a/include/linux/pm_clock.h
+++ b/include/linux/pm_clock.h
@@ -41,9 +41,7 @@ extern int pm_clk_create(struct device *dev);
extern void pm_clk_destroy(struct device *dev);
extern int pm_clk_add(struct device *dev, const char *con_id);
extern int pm_clk_add_clk(struct device *dev, struct clk *clk);
-extern int of_pm_clk_add_clk(struct device *dev, const char *name);
extern int of_pm_clk_add_clks(struct device *dev);
-extern void pm_clk_remove(struct device *dev, const char *con_id);
extern void pm_clk_remove_clk(struct device *dev, struct clk *clk);
extern int pm_clk_suspend(struct device *dev);
extern int pm_clk_resume(struct device *dev);
@@ -76,9 +74,6 @@ static inline int of_pm_clk_add_clks(struct device *dev)
{
return -EINVAL;
}
-static inline void pm_clk_remove(struct device *dev, const char *con_id)
-{
-}
#define pm_clk_suspend NULL
#define pm_clk_resume NULL
static inline void pm_clk_remove_clk(struct device *dev, struct clk *clk)
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index d39dc863f612..7fb5a459847e 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -66,6 +66,7 @@ static inline bool queue_pm_work(struct work_struct *work)
extern int pm_generic_runtime_suspend(struct device *dev);
extern int pm_generic_runtime_resume(struct device *dev);
+extern bool pm_runtime_need_not_resume(struct device *dev);
extern int pm_runtime_force_suspend(struct device *dev);
extern int pm_runtime_force_resume(struct device *dev);
@@ -77,6 +78,8 @@ extern int pm_runtime_get_if_in_use(struct device *dev);
extern int pm_schedule_suspend(struct device *dev, unsigned int delay);
extern int __pm_runtime_set_status(struct device *dev, unsigned int status);
extern int pm_runtime_barrier(struct device *dev);
+extern bool pm_runtime_block_if_disabled(struct device *dev);
+extern void pm_runtime_unblock(struct device *dev);
extern void pm_runtime_enable(struct device *dev);
extern void __pm_runtime_disable(struct device *dev, bool check_resume);
extern void pm_runtime_allow(struct device *dev);
@@ -197,6 +200,17 @@ static inline bool pm_runtime_enabled(struct device *dev)
}
/**
+ * pm_runtime_blocked - Check if runtime PM enabling is blocked.
+ * @dev: Target device.
+ *
+ * Do not call this function outside system suspend/resume code paths.
+ */
+static inline bool pm_runtime_blocked(struct device *dev)
+{
+ return dev->power.last_status == RPM_BLOCKED;
+}
+
+/**
* pm_runtime_has_no_callbacks - Check if runtime PM callbacks may be present.
* @dev: Target device.
*
@@ -241,6 +255,7 @@ static inline bool queue_pm_work(struct work_struct *work) { return false; }
static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; }
static inline int pm_generic_runtime_resume(struct device *dev) { return 0; }
+static inline bool pm_runtime_need_not_resume(struct device *dev) {return true; }
static inline int pm_runtime_force_suspend(struct device *dev) { return 0; }
static inline int pm_runtime_force_resume(struct device *dev) { return 0; }
@@ -271,8 +286,11 @@ static inline int pm_runtime_get_if_active(struct device *dev)
static inline int __pm_runtime_set_status(struct device *dev,
unsigned int status) { return 0; }
static inline int pm_runtime_barrier(struct device *dev) { return 0; }
+static inline bool pm_runtime_block_if_disabled(struct device *dev) { return true; }
+static inline void pm_runtime_unblock(struct device *dev) {}
static inline void pm_runtime_enable(struct device *dev) {}
static inline void __pm_runtime_disable(struct device *dev, bool c) {}
+static inline bool pm_runtime_blocked(struct device *dev) { return true; }
static inline void pm_runtime_allow(struct device *dev) {}
static inline void pm_runtime_forbid(struct device *dev) {}
@@ -556,11 +574,18 @@ static inline int pm_runtime_set_suspended(struct device *dev)
* pm_runtime_disable - Disable runtime PM for a device.
* @dev: Target device.
*
- * Prevent the runtime PM framework from working with @dev (by incrementing its
- * "blocking" counter).
+ * Prevent the runtime PM framework from working with @dev by incrementing its
+ * "disable" counter.
+ *
+ * If the counter is zero when this function runs and there is a pending runtime
+ * resume request for @dev, it will be resumed. If the counter is still zero at
+ * that point, all of the pending runtime PM requests for @dev will be canceled
+ * and all runtime PM operations in progress involving it will be waited for to
+ * complete.
*
- * For each invocation of this function for @dev there must be a matching
- * pm_runtime_enable() call in order for runtime PM to be enabled for it.
+ * For each invocation of this function for @dev, there must be a matching
+ * pm_runtime_enable() call, so that runtime PM is eventually enabled for it
+ * again.
*/
static inline void pm_runtime_disable(struct device *dev)
{
diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index d501c09c60cd..51e0e8dd5f9e 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -205,17 +205,17 @@ static inline void device_set_awake_path(struct device *dev)
static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
{
- return pm_wakeup_ws_event(ws, msec, false);
+ pm_wakeup_ws_event(ws, msec, false);
}
static inline void pm_wakeup_event(struct device *dev, unsigned int msec)
{
- return pm_wakeup_dev_event(dev, msec, false);
+ pm_wakeup_dev_event(dev, msec, false);
}
static inline void pm_wakeup_hard_event(struct device *dev)
{
- return pm_wakeup_dev_event(dev, 0, true);
+ pm_wakeup_dev_event(dev, 0, true);
}
/**
diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index b7a7158aaf65..23fe3eaf242d 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -290,7 +290,7 @@ static inline void pnp_set_drvdata(struct pnp_dev *pdev, void *data)
}
struct pnp_fixup {
- char id[7];
+ char id[8];
void (*quirk_function) (struct pnp_dev *dev); /* fixup function */
};
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index f11f10c97bd9..dd48c64b605e 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -114,6 +114,7 @@ bool posixtimer_init_sigqueue(struct sigqueue *q);
void posixtimer_send_sigqueue(struct k_itimer *tmr);
bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq);
void posixtimer_free_timer(struct k_itimer *timer);
+long posixtimer_create_prctl(unsigned long ctrl);
/* Init task static initializer */
#define INIT_CPU_TIMERBASE(b) { \
@@ -140,6 +141,7 @@ static inline void posixtimer_rearm_itimer(struct task_struct *p) { }
static inline bool posixtimer_deliver_signal(struct kernel_siginfo *info,
struct sigqueue *timer_sigq) { return false; }
static inline void posixtimer_free_timer(struct k_itimer *timer) { }
+static inline long posixtimer_create_prctl(unsigned long ctrl) { return -EINVAL; }
#endif
#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
@@ -177,23 +179,26 @@ static inline void posix_cputimers_init_work(void) { }
* @rcu: RCU head for freeing the timer.
*/
struct k_itimer {
- struct hlist_node list;
- struct hlist_node ignored_list;
+ /* 1st cacheline contains read-mostly fields */
struct hlist_node t_hash;
- spinlock_t it_lock;
- const struct k_clock *kclock;
- clockid_t it_clock;
+ struct hlist_node list;
timer_t it_id;
+ clockid_t it_clock;
+ int it_sigev_notify;
+ enum pid_type it_pid_type;
+ struct signal_struct *it_signal;
+ const struct k_clock *kclock;
+
+ /* 2nd cacheline and above contain fields which are modified regularly */
+ spinlock_t it_lock;
int it_status;
bool it_sig_periodic;
s64 it_overrun;
s64 it_overrun_last;
unsigned int it_signal_seq;
unsigned int it_sigqueue_seq;
- int it_sigev_notify;
- enum pid_type it_pid_type;
ktime_t it_interval;
- struct signal_struct *it_signal;
+ struct hlist_node ignored_list;
union {
struct pid *it_pid;
struct task_struct *it_process;
@@ -210,7 +215,7 @@ struct k_itimer {
} alarm;
} it;
struct rcu_head rcu;
-};
+} ____cacheline_aligned_in_smp;
void run_posix_cpu_timers(void);
void posix_cpu_timers_exit(struct task_struct *task);
@@ -240,6 +245,13 @@ static inline void posixtimer_sigqueue_putref(struct sigqueue *q)
posixtimer_putref(tmr);
}
+
+static inline bool posixtimer_valid(const struct k_itimer *timer)
+{
+ unsigned long val = (unsigned long)timer->it_signal;
+
+ return !(val & 0x1UL);
+}
#else /* CONFIG_POSIX_TIMERS */
static inline void posixtimer_sigqueue_getref(struct sigqueue *q) { }
static inline void posixtimer_sigqueue_putref(struct sigqueue *q) { }
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index ca86235ac15c..b0af8d4ef6e6 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -319,6 +319,7 @@ do { \
#ifdef CONFIG_PREEMPT_NOTIFIERS
struct preempt_notifier;
+struct task_struct;
/**
* preempt_ops - notifiers called when a task is preempted and rescheduled
@@ -515,6 +516,8 @@ static inline bool preempt_model_rt(void)
return IS_ENABLED(CONFIG_PREEMPT_RT);
}
+extern const char *preempt_model_str(void);
+
/*
* Does the preemption model allow non-cooperative preemption?
*
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 4217a9f412b2..5b462029d03c 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -207,6 +207,7 @@ void printk_legacy_allow_panic_sync(void);
extern bool nbcon_device_try_acquire(struct console *con);
extern void nbcon_device_release(struct console *con);
void nbcon_atomic_flush_unsafe(void);
+bool pr_flush(int timeout_ms, bool reset_on_progress);
#else
static inline __printf(1, 0)
int vprintk(const char *s, va_list args)
@@ -315,6 +316,11 @@ static inline void nbcon_atomic_flush_unsafe(void)
{
}
+static inline bool pr_flush(int timeout_ms, bool reset_on_progress)
+{
+ return true;
+}
+
#endif
bool this_cpu_in_panic(void);
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 48e5c03df1dd..f8159f8a7d73 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -95,9 +95,9 @@ static inline void __rcu_read_lock(void)
static inline void __rcu_read_unlock(void)
{
- preempt_enable();
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
rcu_read_unlock_strict();
+ preempt_enable();
}
static inline int rcu_preempt_depth(void)
@@ -121,12 +121,6 @@ void rcu_init(void);
extern int rcu_scheduler_active;
void rcu_sched_clock_irq(int user);
-#ifdef CONFIG_TASKS_RCU_GENERIC
-void rcu_init_tasks_generic(void);
-#else
-static inline void rcu_init_tasks_generic(void) { }
-#endif
-
#ifdef CONFIG_RCU_STALL_COMMON
void rcu_sysrq_start(void);
void rcu_sysrq_end(void);
@@ -806,11 +800,9 @@ do { \
* sections, invocation of the corresponding RCU callback is deferred
* until after the all the other CPUs exit their critical sections.
*
- * In v5.0 and later kernels, synchronize_rcu() and call_rcu() also
- * wait for regions of code with preemption disabled, including regions of
- * code with interrupts or softirqs disabled. In pre-v5.0 kernels, which
- * define synchronize_sched(), only code enclosed within rcu_read_lock()
- * and rcu_read_unlock() are guaranteed to be waited for.
+ * Both synchronize_rcu() and call_rcu() also wait for regions of code
+ * with preemption disabled, including regions of code with interrupts or
+ * softirqs disabled.
*
* Note, however, that RCU callbacks are permitted to run concurrently
* with new RCU read-side critical sections. One way that this can happen
@@ -865,11 +857,10 @@ static __always_inline void rcu_read_lock(void)
* rcu_read_unlock() - marks the end of an RCU read-side critical section.
*
* In almost all situations, rcu_read_unlock() is immune from deadlock.
- * In recent kernels that have consolidated synchronize_sched() and
- * synchronize_rcu_bh() into synchronize_rcu(), this deadlock immunity
- * also extends to the scheduler's runqueue and priority-inheritance
- * spinlocks, courtesy of the quiescent-state deferral that is carried
- * out when rcu_read_unlock() is invoked with interrupts disabled.
+ * This deadlock immunity also extends to the scheduler's runqueue
+ * and priority-inheritance spinlocks, courtesy of the quiescent-state
+ * deferral that is carried out when rcu_read_unlock() is invoked with
+ * interrupts disabled.
*
* See rcu_read_lock() for more information.
*/
@@ -1025,12 +1016,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
#define RCU_POINTER_INITIALIZER(p, v) \
.p = RCU_INITIALIZER(v)
-/*
- * Does the specified offset indicate that the corresponding rcu_head
- * structure can be handled by kvfree_rcu()?
- */
-#define __is_kvfree_rcu_offset(offset) ((offset) < 4096)
-
/**
* kfree_rcu() - kfree an object after a grace period.
* @ptr: pointer to kfree for double-argument invocations.
@@ -1041,11 +1026,11 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
* when they are used in a kernel module, that module must invoke the
* high-latency rcu_barrier() function at module-unload time.
*
- * The kfree_rcu() function handles this issue. Rather than encoding a
- * function address in the embedded rcu_head structure, kfree_rcu() instead
- * encodes the offset of the rcu_head structure within the base structure.
- * Because the functions are not allowed in the low-order 4096 bytes of
- * kernel virtual memory, offsets up to 4095 bytes can be accommodated.
+ * The kfree_rcu() function handles this issue. In order to have a universal
+ * callback function handling different offsets of rcu_head, the callback needs
+ * to determine the starting address of the freed object, which can be a large
+ * kmalloc or vmalloc allocation. To allow simply aligning the pointer down to
+ * page boundary for those, only offsets up to 4095 bytes can be accommodated.
* If the offset is larger than 4095 bytes, a compile-time error will
* be generated in kvfree_rcu_arg_2(). If this error is triggered, you can
* either fall back to use of call_rcu() or rearrange the structure to
@@ -1082,14 +1067,23 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
#define kfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr)
#define kvfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr)
+/*
+ * In mm/slab_common.c, no suitable header to include here.
+ */
+void kvfree_call_rcu(struct rcu_head *head, void *ptr);
+
+/*
+ * The BUILD_BUG_ON() makes sure the rcu_head offset can be handled. See the
+ * comment of kfree_rcu() for details.
+ */
#define kvfree_rcu_arg_2(ptr, rhf) \
do { \
typeof (ptr) ___p = (ptr); \
\
- if (___p) { \
- BUILD_BUG_ON(!__is_kvfree_rcu_offset(offsetof(typeof(*(ptr)), rhf))); \
- kvfree_call_rcu(&((___p)->rhf), (void *) (___p)); \
- } \
+ if (___p) { \
+ BUILD_BUG_ON(offsetof(typeof(*(ptr)), rhf) >= 4096); \
+ kvfree_call_rcu(&((___p)->rhf), (void *) (___p)); \
+ } \
} while (0)
#define kvfree_rcu_arg_1(ptr) \
diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h
index f9bed3d3f78d..4c92d4291cce 100644
--- a/include/linux/rcupdate_wait.h
+++ b/include/linux/rcupdate_wait.h
@@ -16,6 +16,9 @@
struct rcu_synchronize {
struct rcu_head head;
struct completion completion;
+
+ /* This is for debugging. */
+ struct rcu_gp_oldstate oldstate;
};
void wakeme_after_rcu(struct rcu_head *head);
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index fe42315f667f..f519cd680228 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -90,41 +90,6 @@ static inline void synchronize_rcu_expedited(void)
synchronize_rcu();
}
-/*
- * Add one more declaration of kvfree() here. It is
- * not so straight forward to just include <linux/mm.h>
- * where it is defined due to getting many compile
- * errors caused by that include.
- */
-extern void kvfree(const void *addr);
-
-static inline void __kvfree_call_rcu(struct rcu_head *head, void *ptr)
-{
- if (head) {
- call_rcu(head, (rcu_callback_t) ((void *) head - ptr));
- return;
- }
-
- // kvfree_rcu(one_arg) call.
- might_sleep();
- synchronize_rcu();
- kvfree(ptr);
-}
-
-static inline void kvfree_rcu_barrier(void)
-{
- rcu_barrier();
-}
-
-#ifdef CONFIG_KASAN_GENERIC
-void kvfree_call_rcu(struct rcu_head *head, void *ptr);
-#else
-static inline void kvfree_call_rcu(struct rcu_head *head, void *ptr)
-{
- __kvfree_call_rcu(head, ptr);
-}
-#endif
-
void rcu_qs(void);
static inline void rcu_softirq_qs(void)
@@ -164,7 +129,6 @@ static inline void rcu_end_inkernel_boot(void) { }
static inline bool rcu_inkernel_boot_has_ended(void) { return true; }
static inline bool rcu_is_watching(void) { return true; }
static inline void rcu_momentary_eqs(void) { }
-static inline void kfree_rcu_scheduler_running(void) { }
/* Avoid RCU read-side critical sections leaking across. */
static inline void rcu_all_qs(void) { barrier(); }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 27d86d912781..9d2d7bd251d4 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -34,12 +34,9 @@ static inline void rcu_virt_note_context_switch(void)
}
void synchronize_rcu_expedited(void);
-void kvfree_call_rcu(struct rcu_head *head, void *ptr);
-void kvfree_rcu_barrier(void);
void rcu_barrier(void);
void rcu_momentary_eqs(void);
-void kfree_rcu_scheduler_running(void);
struct rcu_gp_oldstate {
unsigned long rgos_norm;
@@ -103,7 +100,7 @@ extern int rcu_scheduler_active;
void rcu_end_inkernel_boot(void);
bool rcu_inkernel_boot_has_ended(void);
bool rcu_is_watching(void);
-#ifndef CONFIG_PREEMPTION
+#ifndef CONFIG_PREEMPT_RCU
void rcu_all_qs(void);
#endif
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index d94abba1c716..880351ca3dfc 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -6,6 +6,7 @@
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/pid.h>
+#include <linux/resctrl_types.h>
/* CLOSID, RMID value used by the default control group */
#define RESCTRL_RESERVED_CLOSID 0
@@ -25,6 +26,24 @@ int proc_resctrl_show(struct seq_file *m,
/* max value for struct rdt_domain's mbps_val */
#define MBA_MAX_MBPS U32_MAX
+/* Walk all possible resources, with variants for only controls or monitors. */
+#define for_each_rdt_resource(_r) \
+ for ((_r) = resctrl_arch_get_resource(0); \
+ (_r) && (_r)->rid < RDT_NUM_RESOURCES; \
+ (_r) = resctrl_arch_get_resource((_r)->rid + 1))
+
+#define for_each_capable_rdt_resource(r) \
+ for_each_rdt_resource((r)) \
+ if ((r)->alloc_capable || (r)->mon_capable)
+
+#define for_each_alloc_capable_rdt_resource(r) \
+ for_each_rdt_resource((r)) \
+ if ((r)->alloc_capable)
+
+#define for_each_mon_capable_rdt_resource(r) \
+ for_each_rdt_resource((r)) \
+ if ((r)->mon_capable)
+
/**
* enum resctrl_conf_type - The type of configuration.
* @CDP_NONE: No prioritisation, both code and data are controlled or monitored.
@@ -40,13 +59,42 @@ enum resctrl_conf_type {
#define CDP_NUM_TYPES (CDP_DATA + 1)
/*
- * Event IDs, the values match those used to program IA32_QM_EVTSEL before
- * reading IA32_QM_CTR on RDT systems.
+ * struct pseudo_lock_region - pseudo-lock region information
+ * @s: Resctrl schema for the resource to which this
+ * pseudo-locked region belongs
+ * @closid: The closid that this pseudo-locked region uses
+ * @d: RDT domain to which this pseudo-locked region
+ * belongs
+ * @cbm: bitmask of the pseudo-locked region
+ * @lock_thread_wq: waitqueue used to wait on the pseudo-locking thread
+ * completion
+ * @thread_done: variable used by waitqueue to test if pseudo-locking
+ * thread completed
+ * @cpu: core associated with the cache on which the setup code
+ * will be run
+ * @line_size: size of the cache lines
+ * @size: size of pseudo-locked region in bytes
+ * @kmem: the kernel memory associated with pseudo-locked region
+ * @minor: minor number of character device associated with this
+ * region
+ * @debugfs_dir: pointer to this region's directory in the debugfs
+ * filesystem
+ * @pm_reqs: Power management QoS requests related to this region
*/
-enum resctrl_event_id {
- QOS_L3_OCCUP_EVENT_ID = 0x01,
- QOS_L3_MBM_TOTAL_EVENT_ID = 0x02,
- QOS_L3_MBM_LOCAL_EVENT_ID = 0x03,
+struct pseudo_lock_region {
+ struct resctrl_schema *s;
+ u32 closid;
+ struct rdt_ctrl_domain *d;
+ u32 cbm;
+ wait_queue_head_t lock_thread_wq;
+ int thread_done;
+ int cpu;
+ unsigned int line_size;
+ unsigned int size;
+ void *kmem;
+ unsigned int minor;
+ struct dentry *debugfs_dir;
+ struct list_head pm_reqs;
};
/**
@@ -155,6 +203,7 @@ enum membw_throttle_mode {
/**
* struct resctrl_membw - Memory bandwidth allocation related data
* @min_bw: Minimum memory bandwidth percentage user can request
+ * @max_bw: Maximum memory bandwidth value, used as the reset value
* @bw_gran: Granularity at which the memory bandwidth is allocated
* @delay_linear: True if memory B/W delay is in linear scale
* @arch_needs_linear: True if we can't configure non-linear resources
@@ -165,6 +214,7 @@ enum membw_throttle_mode {
*/
struct resctrl_membw {
u32 min_bw;
+ u32 max_bw;
u32 bw_gran;
u32 delay_linear;
bool arch_needs_linear;
@@ -173,7 +223,6 @@ struct resctrl_membw {
u32 *mb_map;
};
-struct rdt_parse_data;
struct resctrl_schema;
enum resctrl_scope {
@@ -183,6 +232,16 @@ enum resctrl_scope {
};
/**
+ * enum resctrl_schema_fmt - The format user-space provides for a schema.
+ * @RESCTRL_SCHEMA_BITMAP: The schema is a bitmap in hex.
+ * @RESCTRL_SCHEMA_RANGE: The schema is a decimal number.
+ */
+enum resctrl_schema_fmt {
+ RESCTRL_SCHEMA_BITMAP,
+ RESCTRL_SCHEMA_RANGE,
+};
+
+/**
* struct rdt_resource - attributes of a resctrl resource
* @rid: The index of the resource
* @alloc_capable: Is allocation available on this machine
@@ -195,12 +254,10 @@ enum resctrl_scope {
* @ctrl_domains: RCU list of all control domains for this resource
* @mon_domains: RCU list of all monitor domains for this resource
* @name: Name to use in "schemata" file.
- * @data_width: Character width of data when displaying
- * @default_ctrl: Specifies default cache cbm or memory B/W percent.
- * @format_str: Per resource format string to show domain value
- * @parse_ctrlval: Per resource function pointer to parse control values
+ * @schema_fmt: Which format string and parser is used for this schema.
* @evt_list: List of monitoring events
- * @fflags: flags to choose base and info files
+ * @mbm_cfg_mask: Bandwidth sources that can be tracked when bandwidth
+ * monitoring events can be configured.
* @cdp_capable: Is the CDP feature available on this resource
*/
struct rdt_resource {
@@ -215,22 +272,25 @@ struct rdt_resource {
struct list_head ctrl_domains;
struct list_head mon_domains;
char *name;
- int data_width;
- u32 default_ctrl;
- const char *format_str;
- int (*parse_ctrlval)(struct rdt_parse_data *data,
- struct resctrl_schema *s,
- struct rdt_ctrl_domain *d);
+ enum resctrl_schema_fmt schema_fmt;
struct list_head evt_list;
- unsigned long fflags;
+ unsigned int mbm_cfg_mask;
bool cdp_capable;
};
+/*
+ * Get the resource that exists at this level. If the level is not supported
+ * a dummy/not-capable resource can be returned. Levels >= RDT_NUM_RESOURCES
+ * will return NULL.
+ */
+struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l);
+
/**
* struct resctrl_schema - configuration abilities of a resource presented to
* user-space
* @list: Member of resctrl_schema_all.
* @name: The name to use in the "schemata" file.
+ * @fmt_str: Format string to show domain value.
* @conf_type: Whether this schema is specific to code/data.
* @res: The resource structure exported by the architecture to describe
* the hardware that is configured by this schema.
@@ -241,16 +301,104 @@ struct rdt_resource {
struct resctrl_schema {
struct list_head list;
char name[8];
+ const char *fmt_str;
enum resctrl_conf_type conf_type;
struct rdt_resource *res;
u32 num_closid;
};
+struct resctrl_cpu_defaults {
+ u32 closid;
+ u32 rmid;
+};
+
+struct resctrl_mon_config_info {
+ struct rdt_resource *r;
+ struct rdt_mon_domain *d;
+ u32 evtid;
+ u32 mon_config;
+};
+
+/**
+ * resctrl_arch_sync_cpu_closid_rmid() - Refresh this CPU's CLOSID and RMID.
+ * Call via IPI.
+ * @info: If non-NULL, a pointer to a struct resctrl_cpu_defaults
+ * specifying the new CLOSID and RMID for tasks in the default
+ * resctrl ctrl and mon group when running on this CPU. If NULL,
+ * this CPU is not re-assigned to a different default group.
+ *
+ * Propagates reassignment of CPUs and/or tasks to different resctrl groups
+ * when requested by the resctrl core code.
+ *
+ * This function records the per-cpu defaults specified by @info (if any),
+ * and then reconfigures the CPU's hardware CLOSID and RMID for subsequent
+ * execution based on @current, in the same way as during a task switch.
+ */
+void resctrl_arch_sync_cpu_closid_rmid(void *info);
+
+/**
+ * resctrl_get_default_ctrl() - Return the default control value for this
+ * resource.
+ * @r: The resource whose default control type is queried.
+ */
+static inline u32 resctrl_get_default_ctrl(struct rdt_resource *r)
+{
+ switch (r->schema_fmt) {
+ case RESCTRL_SCHEMA_BITMAP:
+ return BIT_MASK(r->cache.cbm_len) - 1;
+ case RESCTRL_SCHEMA_RANGE:
+ return r->membw.max_bw;
+ }
+
+ return WARN_ON_ONCE(1);
+}
+
/* The number of closid supported by this resource regardless of CDP */
u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
u32 resctrl_arch_system_num_rmid_idx(void);
int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
+__init bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt);
+
+/**
+ * resctrl_arch_mon_event_config_write() - Write the config for an event.
+ * @config_info: struct resctrl_mon_config_info describing the resource, domain
+ * and event.
+ *
+ * Reads resource, domain and eventid from @config_info and writes the
+ * event config_info->mon_config into hardware.
+ *
+ * Called via IPI to reach a CPU that is a member of the specified domain.
+ */
+void resctrl_arch_mon_event_config_write(void *config_info);
+
+/**
+ * resctrl_arch_mon_event_config_read() - Read the config for an event.
+ * @config_info: struct resctrl_mon_config_info describing the resource, domain
+ * and event.
+ *
+ * Reads resource, domain and eventid from @config_info and reads the
+ * hardware config value into config_info->mon_config.
+ *
+ * Called via IPI to reach a CPU that is a member of the specified domain.
+ */
+void resctrl_arch_mon_event_config_read(void *config_info);
+
+/* For use by arch code to remap resctrl's smaller CDP CLOSID range */
+static inline u32 resctrl_get_config_index(u32 closid,
+ enum resctrl_conf_type type)
+{
+ switch (type) {
+ default:
+ case CDP_NONE:
+ return closid;
+ case CDP_CODE:
+ return closid * 2 + 1;
+ case CDP_DATA:
+ return closid * 2;
+ }
+}
+
/*
* Update the ctrl_val and apply this config right now.
* Must be called on one of the domain's CPUs.
@@ -314,6 +462,20 @@ static inline void resctrl_arch_rmid_read_context_check(void)
}
/**
+ * resctrl_find_domain() - Search for a domain id in a resource domain list.
+ * @h: The domain list to search.
+ * @id: The domain id to search for.
+ * @pos: A pointer to position in the list id should be inserted.
+ *
+ * Search the domain list to find the domain id. If the domain id is
+ * found, return the domain. NULL otherwise. If the domain id is not
+ * found (and NULL returned) then the first domain with id bigger than
+ * the input id can be returned to the caller via @pos.
+ */
+struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id,
+ struct list_head **pos);
+
+/**
* resctrl_arch_reset_rmid() - Reset any private state associated with rmid
* and eventid.
* @r: The domain's resource.
@@ -340,7 +502,19 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
*/
void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d);
+/**
+ * resctrl_arch_reset_all_ctrls() - Reset the control for each CLOSID to its
+ * default.
+ * @r: The resctrl resource to reset.
+ *
+ * This can be called from any CPU.
+ */
+void resctrl_arch_reset_all_ctrls(struct rdt_resource *r);
+
extern unsigned int resctrl_rmid_realloc_threshold;
extern unsigned int resctrl_rmid_realloc_limit;
+int __init resctrl_init(void);
+void __exit resctrl_exit(void);
+
#endif /* _RESCTRL_H */
diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h
new file mode 100644
index 000000000000..f26450b3326b
--- /dev/null
+++ b/include/linux/resctrl_types.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2025 Arm Ltd.
+ * Based on arch/x86/kernel/cpu/resctrl/internal.h
+ */
+
+#ifndef __LINUX_RESCTRL_TYPES_H
+#define __LINUX_RESCTRL_TYPES_H
+
+/* Reads to Local DRAM Memory */
+#define READS_TO_LOCAL_MEM BIT(0)
+
+/* Reads to Remote DRAM Memory */
+#define READS_TO_REMOTE_MEM BIT(1)
+
+/* Non-Temporal Writes to Local Memory */
+#define NON_TEMP_WRITE_TO_LOCAL_MEM BIT(2)
+
+/* Non-Temporal Writes to Remote Memory */
+#define NON_TEMP_WRITE_TO_REMOTE_MEM BIT(3)
+
+/* Reads to Local Memory the system identifies as "Slow Memory" */
+#define READS_TO_LOCAL_S_MEM BIT(4)
+
+/* Reads to Remote Memory the system identifies as "Slow Memory" */
+#define READS_TO_REMOTE_S_MEM BIT(5)
+
+/* Dirty Victims to All Types of Memory */
+#define DIRTY_VICTIMS_TO_ALL_MEM BIT(6)
+
+/* Max event bits supported */
+#define MAX_EVT_CONFIG_BITS GENMASK(6, 0)
+
+enum resctrl_res_level {
+ RDT_RESOURCE_L3,
+ RDT_RESOURCE_L2,
+ RDT_RESOURCE_MBA,
+ RDT_RESOURCE_SMBA,
+
+ /* Must be the last */
+ RDT_NUM_RESOURCES,
+};
+
+/*
+ * Event IDs, the values match those used to program IA32_QM_EVTSEL before
+ * reading IA32_QM_CTR on RDT systems.
+ */
+enum resctrl_event_id {
+ QOS_L3_OCCUP_EVENT_ID = 0x01,
+ QOS_L3_MBM_TOTAL_EVENT_ID = 0x02,
+ QOS_L3_MBM_LOCAL_EVENT_ID = 0x03,
+};
+
+#endif /* __LINUX_RESCTRL_TYPES_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c15365a30c0..6e5c38718ff5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -65,6 +65,7 @@ struct mempolicy;
struct nameidata;
struct nsproxy;
struct perf_event_context;
+struct perf_ctx_data;
struct pid_namespace;
struct pipe_inode_info;
struct rcu_node;
@@ -382,6 +383,11 @@ enum uclamp_id {
#ifdef CONFIG_SMP
extern struct root_domain def_root_domain;
extern struct mutex sched_domains_mutex;
+extern void sched_domains_mutex_lock(void);
+extern void sched_domains_mutex_unlock(void);
+#else
+static inline void sched_domains_mutex_lock(void) { }
+static inline void sched_domains_mutex_unlock(void) { }
#endif
struct sched_param {
@@ -1311,6 +1317,7 @@ struct task_struct {
struct perf_event_context *perf_event_ctxp;
struct mutex perf_event_mutex;
struct list_head perf_event_list;
+ struct perf_ctx_data __rcu *perf_ctx_data;
#endif
#ifdef CONFIG_DEBUG_PREEMPT
unsigned long preempt_disable_ip;
diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
index 3a912ab42bb5..f9aabbc9d22e 100644
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -34,7 +34,11 @@ static inline bool dl_time_before(u64 a, u64 b)
struct root_domain;
extern void dl_add_task_root_domain(struct task_struct *p);
extern void dl_clear_root_domain(struct root_domain *rd);
+extern void dl_clear_root_domain_cpu(int cpu);
#endif /* CONFIG_SMP */
+extern u64 dl_cookie;
+extern bool dl_bw_visited(int cpu, u64 cookie);
+
#endif /* _LINUX_SCHED_DEADLINE_H */
diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h
index b5035afa2396..35ed4577a6cc 100644
--- a/include/linux/sched/debug.h
+++ b/include/linux/sched/debug.h
@@ -35,12 +35,10 @@ extern void show_stack(struct task_struct *task, unsigned long *sp,
extern void sched_show_task(struct task_struct *p);
-#ifdef CONFIG_SCHED_DEBUG
struct seq_file;
extern void proc_sched_show_task(struct task_struct *p,
struct pid_namespace *ns, struct seq_file *m);
extern void proc_sched_set_task(struct task_struct *p);
-#endif
/* Attach to any functions which should be ignored in wchan output. */
#define __sched __section(".sched.text")
diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 1d70a9867fb1..f7545430a548 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -146,6 +146,7 @@ struct sched_ext_entity {
u32 weight;
s32 sticky_cpu;
s32 holding_cpu;
+ s32 selected_cpu;
u32 kf_mask; /* see scx_kf_mask above */
struct task_struct *kf_tasks[2]; /* see SCX_CALL_OP_TASK() */
atomic_long_t ops_state;
diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
index e670ac282333..439f6029d3b9 100644
--- a/include/linux/sched/idle.h
+++ b/include/linux/sched/idle.h
@@ -79,6 +79,21 @@ static __always_inline bool __must_check current_clr_polling_and_test(void)
return unlikely(tif_need_resched());
}
+static __always_inline void current_clr_polling(void)
+{
+ __current_clr_polling();
+
+ /*
+ * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
+ * Once the bit is cleared, we'll get IPIs with every new
+ * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
+ * fold.
+ */
+ smp_mb__after_atomic(); /* paired with resched_curr() */
+
+ preempt_fold_need_resched();
+}
+
#else
static inline void __current_set_polling(void) { }
static inline void __current_clr_polling(void) { }
@@ -91,21 +106,15 @@ static inline bool __must_check current_clr_polling_and_test(void)
{
return unlikely(tif_need_resched());
}
-#endif
static __always_inline void current_clr_polling(void)
{
__current_clr_polling();
- /*
- * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
- * Once the bit is cleared, we'll get IPIs with every new
- * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
- * fold.
- */
smp_mb(); /* paired with resched_curr() */
preempt_fold_need_resched();
}
+#endif
#endif /* _LINUX_SCHED_IDLE_H */
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 928a626725e6..b13474825130 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -531,6 +531,13 @@ enum {
static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
{
+ /*
+ * The atomic_read() below prevents CSE. The following should
+ * help the compiler generate more efficient code on architectures
+ * where sync_core_before_usermode() is a no-op.
+ */
+ if (!IS_ENABLED(CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE))
+ return;
if (current->mm != mm)
return;
if (likely(!(atomic_read(&mm->membarrier_state) &
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index d5d03d919df8..1ef1edbaaf79 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -136,7 +136,8 @@ struct signal_struct {
#ifdef CONFIG_POSIX_TIMERS
/* POSIX.1b Interval Timers */
- unsigned int next_posix_timer_id;
+ unsigned int timer_create_restore_ids:1;
+ atomic_t next_posix_timer_id;
struct hlist_head posix_timers;
struct hlist_head ignored_posix_timers;
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 7f3dbafe1817..7b4301b7235f 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -25,16 +25,12 @@ enum {
};
#undef SD_FLAG
-#ifdef CONFIG_SCHED_DEBUG
-
struct sd_flag_debug {
unsigned int meta_flags;
char *name;
};
extern const struct sd_flag_debug sd_flag_debug[];
-#endif
-
#ifdef CONFIG_SCHED_SMT
static inline int cpu_smt_flags(void)
{
@@ -166,10 +162,6 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
return to_cpumask(sd->span);
}
-extern void partition_sched_domains_locked(int ndoms_new,
- cpumask_var_t doms_new[],
- struct sched_domain_attr *dattr_new);
-
extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
struct sched_domain_attr *dattr_new);
@@ -211,12 +203,6 @@ extern void __init set_sched_topology(struct sched_domain_topology_level *tl);
struct sched_domain_attr;
static inline void
-partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[],
- struct sched_domain_attr *dattr_new)
-{
-}
-
-static inline void
partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
struct sched_domain_attr *dattr_new)
{
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index e45531455d3b..9b959972bf4a 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -22,21 +22,17 @@
#include <linux/atomic.h>
#include <asm/seccomp.h>
+extern int __secure_computing(void);
+
#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
-extern int __secure_computing(const struct seccomp_data *sd);
static inline int secure_computing(void)
{
if (unlikely(test_syscall_work(SECCOMP)))
- return __secure_computing(NULL);
+ return __secure_computing();
return 0;
}
#else
extern void secure_computing_strict(int this_syscall);
-static inline int __secure_computing(const struct seccomp_data *sd)
-{
- secure_computing_strict(sd->nr);
- return 0;
-}
#endif
extern long prctl_get_seccomp(void);
@@ -58,7 +54,7 @@ static inline int secure_computing(void) { return 0; }
#else
static inline void secure_computing_strict(int this_syscall) { return; }
#endif
-static inline int __secure_computing(const struct seccomp_data *sd) { return 0; }
+static inline int __secure_computing(void) { return 0; }
static inline long prctl_get_seccomp(void)
{
diff --git a/include/linux/security.h b/include/linux/security.h
index 980b6c207cad..1545d515a66b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2324,14 +2324,13 @@ struct perf_event_attr;
struct perf_event;
#ifdef CONFIG_SECURITY
-extern int security_perf_event_open(struct perf_event_attr *attr, int type);
+extern int security_perf_event_open(int type);
extern int security_perf_event_alloc(struct perf_event *event);
extern void security_perf_event_free(struct perf_event *event);
extern int security_perf_event_read(struct perf_event *event);
extern int security_perf_event_write(struct perf_event *event);
#else
-static inline int security_perf_event_open(struct perf_event_attr *attr,
- int type)
+static inline int security_perf_event_open(int type)
{
return 0;
}
@@ -2362,6 +2361,7 @@ static inline int security_perf_event_write(struct perf_event *event)
extern int security_uring_override_creds(const struct cred *new);
extern int security_uring_sqpoll(void);
extern int security_uring_cmd(struct io_uring_cmd *ioucmd);
+extern int security_uring_allowed(void);
#else
static inline int security_uring_override_creds(const struct cred *new)
{
@@ -2375,6 +2375,10 @@ static inline int security_uring_cmd(struct io_uring_cmd *ioucmd)
{
return 0;
}
+static inline int security_uring_allowed(void)
+{
+ return 0;
+}
#endif /* CONFIG_SECURITY */
#endif /* CONFIG_IO_URING */
diff --git a/include/linux/sizes.h b/include/linux/sizes.h
index c3a00b967d18..49039494076f 100644
--- a/include/linux/sizes.h
+++ b/include/linux/sizes.h
@@ -23,17 +23,25 @@
#define SZ_4K 0x00001000
#define SZ_8K 0x00002000
#define SZ_16K 0x00004000
+#define SZ_24K 0x00006000
#define SZ_32K 0x00008000
#define SZ_64K 0x00010000
#define SZ_128K 0x00020000
+#define SZ_192K 0x00030000
#define SZ_256K 0x00040000
+#define SZ_384K 0x00060000
#define SZ_512K 0x00080000
#define SZ_1M 0x00100000
#define SZ_2M 0x00200000
+#define SZ_3M 0x00300000
#define SZ_4M 0x00400000
+#define SZ_6M 0x00600000
#define SZ_8M 0x00800000
+#define SZ_12M 0x00c00000
#define SZ_16M 0x01000000
+#define SZ_18M 0x01200000
+#define SZ_24M 0x01800000
#define SZ_32M 0x02000000
#define SZ_64M 0x04000000
#define SZ_128M 0x08000000
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 09eedaecf120..98e07e9e9e58 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -16,6 +16,7 @@
#include <linux/gfp.h>
#include <linux/overflow.h>
#include <linux/types.h>
+#include <linux/rcupdate.h>
#include <linux/workqueue.h>
#include <linux/percpu-refcount.h>
#include <linux/cleanup.h>
@@ -941,8 +942,6 @@ static inline __alloc_size(1, 2) void *kmalloc_array_noprof(size_t n, size_t siz
if (unlikely(check_mul_overflow(n, size, &bytes)))
return NULL;
- if (__builtin_constant_p(n) && __builtin_constant_p(size))
- return kmalloc_noprof(bytes, flags);
return kmalloc_noprof(bytes, flags);
}
#define kmalloc_array(...) alloc_hooks(kmalloc_array_noprof(__VA_ARGS__))
@@ -1082,6 +1081,19 @@ extern void kvfree_sensitive(const void *addr, size_t len);
unsigned int kmem_cache_size(struct kmem_cache *s);
+#ifndef CONFIG_KVFREE_RCU_BATCHED
+static inline void kvfree_rcu_barrier(void)
+{
+ rcu_barrier();
+}
+
+static inline void kfree_rcu_scheduler_running(void) { }
+#else
+void kvfree_rcu_barrier(void);
+
+void kfree_rcu_scheduler_running(void);
+#endif
+
/**
* kmalloc_size_roundup - Report allocation bucket size for the given size
*
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index d7ba46e74f58..900b0d5c05f5 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -47,7 +47,13 @@ int init_srcu_struct(struct srcu_struct *ssp);
#define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock().
#define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe().
#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite().
-#define SRCU_READ_FLAVOR_ALL 0x7 // All of the above.
+#define SRCU_READ_FLAVOR_FAST 0x8 // srcu_read_lock_fast().
+#define SRCU_READ_FLAVOR_ALL (SRCU_READ_FLAVOR_NORMAL | SRCU_READ_FLAVOR_NMI | \
+ SRCU_READ_FLAVOR_LITE | SRCU_READ_FLAVOR_FAST) // All of the above.
+#define SRCU_READ_FLAVOR_SLOWGP (SRCU_READ_FLAVOR_LITE | SRCU_READ_FLAVOR_FAST)
+ // Flavors requiring synchronize_rcu()
+ // instead of smp_mb().
+void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp);
#ifdef CONFIG_TINY_SRCU
#include <linux/srcutiny.h>
@@ -60,15 +66,6 @@ int init_srcu_struct(struct srcu_struct *ssp);
void call_srcu(struct srcu_struct *ssp, struct rcu_head *head,
void (*func)(struct rcu_head *head));
void cleanup_srcu_struct(struct srcu_struct *ssp);
-int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp);
-void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp);
-#ifdef CONFIG_TINY_SRCU
-#define __srcu_read_lock_lite __srcu_read_lock
-#define __srcu_read_unlock_lite __srcu_read_unlock
-#else // #ifdef CONFIG_TINY_SRCU
-int __srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp);
-void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) __releases(ssp);
-#endif // #else // #ifdef CONFIG_TINY_SRCU
void synchronize_srcu(struct srcu_struct *ssp);
#define SRCU_GET_STATE_COMPLETED 0x1
@@ -258,6 +255,51 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
}
/**
+ * srcu_read_lock_fast - register a new reader for an SRCU-protected structure.
+ * @ssp: srcu_struct in which to register the new reader.
+ *
+ * Enter an SRCU read-side critical section, but for a light-weight
+ * smp_mb()-free reader. See srcu_read_lock() for more information.
+ *
+ * If srcu_read_lock_fast() is ever used on an srcu_struct structure,
+ * then none of the other flavors may be used, whether before, during,
+ * or after. Note that grace-period auto-expediting is disabled for _fast
+ * srcu_struct structures because auto-expedited grace periods invoke
+ * synchronize_rcu_expedited(), IPIs and all.
+ *
+ * Note that srcu_read_lock_fast() can be invoked only from those contexts
+ * where RCU is watching, that is, from contexts where it would be legal
+ * to invoke rcu_read_lock(). Otherwise, lockdep will complain.
+ */
+static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct *ssp) __acquires(ssp)
+{
+ struct srcu_ctr __percpu *retval;
+
+ srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST);
+ retval = __srcu_read_lock_fast(ssp);
+ rcu_try_lock_acquire(&ssp->dep_map);
+ return retval;
+}
+
+/**
+ * srcu_down_read_fast - register a new reader for an SRCU-protected structure.
+ * @ssp: srcu_struct in which to register the new reader.
+ *
+ * Enter a semaphore-like SRCU read-side critical section, but for
+ * a light-weight smp_mb()-free reader. See srcu_read_lock_fast() and
+ * srcu_down_read() for more information.
+ *
+ * The same srcu_struct may be used concurrently by srcu_down_read_fast()
+ * and srcu_read_lock_fast().
+ */
+static inline struct srcu_ctr __percpu *srcu_down_read_fast(struct srcu_struct *ssp) __acquires(ssp)
+{
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi());
+ srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST);
+ return __srcu_read_lock_fast(ssp);
+}
+
+/**
* srcu_read_lock_lite - register a new reader for an SRCU-protected structure.
* @ssp: srcu_struct in which to register the new reader.
*
@@ -278,7 +320,7 @@ static inline int srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp)
{
int retval;
- srcu_check_read_flavor_lite(ssp);
+ srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_LITE);
retval = __srcu_read_lock_lite(ssp);
rcu_try_lock_acquire(&ssp->dep_map);
return retval;
@@ -335,7 +377,8 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp)
* srcu_down_read() nor srcu_up_read() may be invoked from an NMI handler.
*
* Calls to srcu_down_read() may be nested, similar to the manner in
- * which calls to down_read() may be nested.
+ * which calls to down_read() may be nested. The same srcu_struct may be
+ * used concurrently by srcu_down_read() and srcu_read_lock().
*/
static inline int srcu_down_read(struct srcu_struct *ssp) __acquires(ssp)
{
@@ -361,9 +404,40 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx)
}
/**
+ * srcu_read_unlock_fast - unregister a old reader from an SRCU-protected structure.
+ * @ssp: srcu_struct in which to unregister the old reader.
+ * @scp: return value from corresponding srcu_read_lock_fast().
+ *
+ * Exit a light-weight SRCU read-side critical section.
+ */
+static inline void srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp)
+ __releases(ssp)
+{
+ srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST);
+ srcu_lock_release(&ssp->dep_map);
+ __srcu_read_unlock_fast(ssp, scp);
+}
+
+/**
+ * srcu_up_read_fast - unregister a old reader from an SRCU-protected structure.
+ * @ssp: srcu_struct in which to unregister the old reader.
+ * @scp: return value from corresponding srcu_read_lock_fast().
+ *
+ * Exit an SRCU read-side critical section, but not necessarily from
+ * the same context as the maching srcu_down_read_fast().
+ */
+static inline void srcu_up_read_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp)
+ __releases(ssp)
+{
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi());
+ srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST);
+ __srcu_read_unlock_fast(ssp, scp);
+}
+
+/**
* srcu_read_unlock_lite - unregister a old reader from an SRCU-protected structure.
* @ssp: srcu_struct in which to unregister the old reader.
- * @idx: return value from corresponding srcu_read_lock().
+ * @idx: return value from corresponding srcu_read_lock_lite().
*
* Exit a light-weight SRCU read-side critical section.
*/
@@ -379,7 +453,7 @@ static inline void srcu_read_unlock_lite(struct srcu_struct *ssp, int idx)
/**
* srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure.
* @ssp: srcu_struct in which to unregister the old reader.
- * @idx: return value from corresponding srcu_read_lock().
+ * @idx: return value from corresponding srcu_read_lock_nmisafe().
*
* Exit an SRCU read-side critical section, but in an NMI-safe manner.
*/
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 1321da803274..380260317d98 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -64,13 +64,38 @@ static inline int __srcu_read_lock(struct srcu_struct *ssp)
{
int idx;
- preempt_disable(); // Needed for PREEMPT_AUTO
+ preempt_disable(); // Needed for PREEMPT_LAZY
idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1;
WRITE_ONCE(ssp->srcu_lock_nesting[idx], READ_ONCE(ssp->srcu_lock_nesting[idx]) + 1);
preempt_enable();
return idx;
}
+struct srcu_ctr;
+
+static inline bool __srcu_ptr_to_ctr(struct srcu_struct *ssp, struct srcu_ctr __percpu *scpp)
+{
+ return (int)(intptr_t)(struct srcu_ctr __force __kernel *)scpp;
+}
+
+static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ssp, int idx)
+{
+ return (struct srcu_ctr __percpu *)(intptr_t)idx;
+}
+
+static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct *ssp)
+{
+ return __srcu_ctr_to_ptr(ssp, __srcu_read_lock(ssp));
+}
+
+static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp)
+{
+ __srcu_read_unlock(ssp, __srcu_ptr_to_ctr(ssp, scp));
+}
+
+#define __srcu_read_lock_lite __srcu_read_lock
+#define __srcu_read_unlock_lite __srcu_read_unlock
+
static inline void synchronize_srcu_expedited(struct srcu_struct *ssp)
{
synchronize_srcu(ssp);
@@ -82,7 +107,7 @@ static inline void srcu_barrier(struct srcu_struct *ssp)
}
#define srcu_check_read_flavor(ssp, read_flavor) do { } while (0)
-#define srcu_check_read_flavor_lite(ssp) do { } while (0)
+#define srcu_check_read_flavor_force(ssp, read_flavor) do { } while (0)
/* Defined here to avoid size increase for non-torture kernels. */
static inline void srcu_torture_stats_print(struct srcu_struct *ssp,
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index b17814c9d1c7..8bed7e6cc4c1 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -17,14 +17,19 @@
struct srcu_node;
struct srcu_struct;
+/* One element of the srcu_data srcu_ctrs array. */
+struct srcu_ctr {
+ atomic_long_t srcu_locks; /* Locks per CPU. */
+ atomic_long_t srcu_unlocks; /* Unlocks per CPU. */
+};
+
/*
* Per-CPU structure feeding into leaf srcu_node, similar in function
* to rcu_node.
*/
struct srcu_data {
/* Read-side state. */
- atomic_long_t srcu_lock_count[2]; /* Locks per CPU. */
- atomic_long_t srcu_unlock_count[2]; /* Unlocks per CPU. */
+ struct srcu_ctr srcu_ctrs[2]; /* Locks and unlocks per CPU. */
int srcu_reader_flavor; /* Reader flavor for srcu_struct structure? */
/* Values: SRCU_READ_FLAVOR_.* */
@@ -95,7 +100,7 @@ struct srcu_usage {
* Per-SRCU-domain structure, similar in function to rcu_state.
*/
struct srcu_struct {
- unsigned int srcu_idx; /* Current rdr array element. */
+ struct srcu_ctr __percpu *srcu_ctrp;
struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */
struct lockdep_map dep_map;
struct srcu_usage *srcu_sup; /* Update-side data. */
@@ -162,6 +167,7 @@ struct srcu_struct {
#define __SRCU_STRUCT_INIT(name, usage_name, pcpu_name) \
{ \
.sda = &pcpu_name, \
+ .srcu_ctrp = &pcpu_name.srcu_ctrs[0], \
__SRCU_STRUCT_INIT_COMMON(name, usage_name) \
}
@@ -201,10 +207,77 @@ struct srcu_struct {
#define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */)
#define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static)
+int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp);
void synchronize_srcu_expedited(struct srcu_struct *ssp);
void srcu_barrier(struct srcu_struct *ssp);
void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
+// Converts a per-CPU pointer to an ->srcu_ctrs[] array element to that
+// element's index.
+static inline bool __srcu_ptr_to_ctr(struct srcu_struct *ssp, struct srcu_ctr __percpu *scpp)
+{
+ return scpp - &ssp->sda->srcu_ctrs[0];
+}
+
+// Converts an integer to a per-CPU pointer to the corresponding
+// ->srcu_ctrs[] array element.
+static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ssp, int idx)
+{
+ return &ssp->sda->srcu_ctrs[idx];
+}
+
+/*
+ * Counts the new reader in the appropriate per-CPU element of the
+ * srcu_struct. Returns a pointer that must be passed to the matching
+ * srcu_read_unlock_fast().
+ *
+ * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side
+ * critical sections either because they disables interrupts, because they
+ * are a single instruction, or because they are a read-modify-write atomic
+ * operation, depending on the whims of the architecture.
+ *
+ * This means that __srcu_read_lock_fast() is not all that fast
+ * on architectures that support NMIs but do not supply NMI-safe
+ * implementations of this_cpu_inc().
+ */
+static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct *ssp)
+{
+ struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
+
+ RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_fast().");
+ if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
+ this_cpu_inc(scp->srcu_locks.counter); /* Y */
+ else
+ atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks)); /* Z */
+ barrier(); /* Avoid leaking the critical section. */
+ return scp;
+}
+
+/*
+ * Removes the count for the old reader from the appropriate
+ * per-CPU element of the srcu_struct. Note that this may well be a
+ * different CPU than that which was incremented by the corresponding
+ * srcu_read_lock_fast(), but it must be within the same task.
+ *
+ * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side
+ * critical sections either because they disables interrupts, because they
+ * are a single instruction, or because they are a read-modify-write atomic
+ * operation, depending on the whims of the architecture.
+ *
+ * This means that __srcu_read_unlock_fast() is not all that fast
+ * on architectures that support NMIs but do not supply NMI-safe
+ * implementations of this_cpu_inc().
+ */
+static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp)
+{
+ barrier(); /* Avoid leaking the critical section. */
+ if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
+ this_cpu_inc(scp->srcu_unlocks.counter); /* Z */
+ else
+ atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks)); /* Z */
+ RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast().");
+}
+
/*
* Counts the new reader in the appropriate per-CPU element of the
* srcu_struct. Returns an index that must be passed to the matching
@@ -217,13 +290,12 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
*/
static inline int __srcu_read_lock_lite(struct srcu_struct *ssp)
{
- int idx;
+ struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite().");
- idx = READ_ONCE(ssp->srcu_idx) & 0x1;
- this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter); /* Y */
+ this_cpu_inc(scp->srcu_locks.counter); /* Y */
barrier(); /* Avoid leaking the critical section. */
- return idx;
+ return __srcu_ptr_to_ctr(ssp, scp);
}
/*
@@ -240,22 +312,24 @@ static inline int __srcu_read_lock_lite(struct srcu_struct *ssp)
static inline void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx)
{
barrier(); /* Avoid leaking the critical section. */
- this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter); /* Z */
+ this_cpu_inc(__srcu_ctr_to_ptr(ssp, idx)->srcu_unlocks.counter); /* Z */
RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_lite().");
}
void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor);
-// Record _lite() usage even for CONFIG_PROVE_RCU=n kernels.
-static inline void srcu_check_read_flavor_lite(struct srcu_struct *ssp)
+// Record reader usage even for CONFIG_PROVE_RCU=n kernels. This is
+// needed only for flavors that require grace-period smp_mb() calls to be
+// promoted to synchronize_rcu().
+static inline void srcu_check_read_flavor_force(struct srcu_struct *ssp, int read_flavor)
{
struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
- if (likely(READ_ONCE(sdp->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE))
+ if (likely(READ_ONCE(sdp->srcu_reader_flavor) & read_flavor))
return;
// Note that the cmpxchg() in __srcu_check_read_flavor() is fully ordered.
- __srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE);
+ __srcu_check_read_flavor(ssp, read_flavor);
}
// Record non-_lite() usage only for CONFIG_PROVE_RCU=y kernels.
diff --git a/include/linux/string.h b/include/linux/string.h
index f8e21e80942f..0403a4ca4c11 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -415,8 +415,10 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count,
*/
#define strtomem_pad(dest, src, pad) do { \
const size_t _dest_len = __must_be_byte_array(dest) + \
+ __must_be_noncstr(dest) + \
ARRAY_SIZE(dest); \
- const size_t _src_len = __builtin_object_size(src, 1); \
+ const size_t _src_len = __must_be_cstr(src) + \
+ __builtin_object_size(src, 1); \
\
BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \
_dest_len == (size_t)-1); \
@@ -439,8 +441,10 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count,
*/
#define strtomem(dest, src) do { \
const size_t _dest_len = __must_be_byte_array(dest) + \
+ __must_be_noncstr(dest) + \
ARRAY_SIZE(dest); \
- const size_t _src_len = __builtin_object_size(src, 1); \
+ const size_t _src_len = __must_be_cstr(src) + \
+ __builtin_object_size(src, 1); \
\
BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \
_dest_len == (size_t)-1); \
@@ -459,8 +463,10 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count,
*/
#define memtostr(dest, src) do { \
const size_t _dest_len = __must_be_byte_array(dest) + \
+ __must_be_cstr(dest) + \
ARRAY_SIZE(dest); \
- const size_t _src_len = __builtin_object_size(src, 1); \
+ const size_t _src_len = __must_be_noncstr(src) + \
+ __builtin_object_size(src, 1); \
const size_t _src_chars = strnlen(src, _src_len); \
const size_t _copy_len = min(_dest_len - 1, _src_chars); \
\
@@ -485,8 +491,10 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count,
*/
#define memtostr_pad(dest, src) do { \
const size_t _dest_len = __must_be_byte_array(dest) + \
+ __must_be_cstr(dest) + \
ARRAY_SIZE(dest); \
- const size_t _src_len = __builtin_object_size(src, 1); \
+ const size_t _src_len = __must_be_noncstr(src) + \
+ __builtin_object_size(src, 1); \
const size_t _src_chars = strnlen(src, _src_len); \
const size_t _copy_len = min(_dest_len - 1, _src_chars); \
\
diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h
index 120ca0f28e95..f3ba4f52ff26 100644
--- a/include/linux/string_choices.h
+++ b/include/linux/string_choices.h
@@ -41,23 +41,23 @@ static inline const char *str_high_low(bool v)
}
#define str_low_high(v) str_high_low(!(v))
-static inline const char *str_read_write(bool v)
-{
- return v ? "read" : "write";
-}
-#define str_write_read(v) str_read_write(!(v))
-
static inline const char *str_on_off(bool v)
{
return v ? "on" : "off";
}
#define str_off_on(v) str_on_off(!(v))
-static inline const char *str_yes_no(bool v)
+static inline const char *str_read_write(bool v)
{
- return v ? "yes" : "no";
+ return v ? "read" : "write";
}
-#define str_no_yes(v) str_yes_no(!(v))
+#define str_write_read(v) str_read_write(!(v))
+
+static inline const char *str_true_false(bool v)
+{
+ return v ? "true" : "false";
+}
+#define str_false_true(v) str_true_false(!(v))
static inline const char *str_up_down(bool v)
{
@@ -65,11 +65,11 @@ static inline const char *str_up_down(bool v)
}
#define str_down_up(v) str_up_down(!(v))
-static inline const char *str_true_false(bool v)
+static inline const char *str_yes_no(bool v)
{
- return v ? "true" : "false";
+ return v ? "yes" : "no";
}
-#define str_false_true(v) str_true_false(!(v))
+#define str_no_yes(v) str_yes_no(!(v))
/**
* str_plural - Return the simple pluralization based on English counts
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index c6333204d451..e5603cc91963 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -951,6 +951,10 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags,
asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len,
int flags, uint32_t sig);
asmlinkage long sys_open_tree(int dfd, const char __user *path, unsigned flags);
+asmlinkage long sys_open_tree_attr(int dfd, const char __user *path,
+ unsigned flags,
+ struct mount_attr __user *uattr,
+ size_t usize);
asmlinkage long sys_move_mount(int from_dfd, const char __user *from_path,
int to_dfd, const char __user *to_path,
unsigned int ms_flags);
@@ -1266,14 +1270,14 @@ static inline long ksys_lchown(const char __user *filename, uid_t user,
AT_SYMLINK_NOFOLLOW);
}
-extern long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
+int do_sys_ftruncate(unsigned int fd, loff_t length, int small);
static inline long ksys_ftruncate(unsigned int fd, loff_t length)
{
return do_sys_ftruncate(fd, length, 1);
}
-extern long do_sys_truncate(const char __user *pathname, loff_t length);
+int do_sys_truncate(const char __user *pathname, loff_t length);
static inline long ksys_truncate(const char __user *pathname, loff_t length)
{
diff --git a/include/linux/sysv_fs.h b/include/linux/sysv_fs.h
deleted file mode 100644
index 5cf77dbb8d86..000000000000
--- a/include/linux/sysv_fs.h
+++ /dev/null
@@ -1,214 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_SYSV_FS_H
-#define _LINUX_SYSV_FS_H
-
-#define __packed2__ __attribute__((packed, aligned(2)))
-
-
-#ifndef __KERNEL__
-typedef u16 __fs16;
-typedef u32 __fs16;
-#endif
-
-/* inode numbers are 16 bit */
-typedef __fs16 sysv_ino_t;
-
-/* Block numbers are 24 bit, sometimes stored in 32 bit.
- On Coherent FS, they are always stored in PDP-11 manner: the least
- significant 16 bits come last. */
-typedef __fs32 sysv_zone_t;
-
-/* 0 is non-existent */
-#define SYSV_BADBL_INO 1 /* inode of bad blocks file */
-#define SYSV_ROOT_INO 2 /* inode of root directory */
-
-
-/* Xenix super-block data on disk */
-#define XENIX_NICINOD 100 /* number of inode cache entries */
-#define XENIX_NICFREE 100 /* number of free block list chunk entries */
-struct xenix_super_block {
- __fs16 s_isize; /* index of first data zone */
- __fs32 s_fsize __packed2__; /* total number of zones of this fs */
- /* the start of the free block list: */
- __fs16 s_nfree; /* number of free blocks in s_free, <= XENIX_NICFREE */
- sysv_zone_t s_free[XENIX_NICFREE]; /* first free block list chunk */
- /* the cache of free inodes: */
- __fs16 s_ninode; /* number of free inodes in s_inode, <= XENIX_NICINOD */
- sysv_ino_t s_inode[XENIX_NICINOD]; /* some free inodes */
- /* locks, not used by Linux: */
- char s_flock; /* lock during free block list manipulation */
- char s_ilock; /* lock during inode cache manipulation */
- char s_fmod; /* super-block modified flag */
- char s_ronly; /* flag whether fs is mounted read-only */
- __fs32 s_time __packed2__; /* time of last super block update */
- __fs32 s_tfree __packed2__; /* total number of free zones */
- __fs16 s_tinode; /* total number of free inodes */
- __fs16 s_dinfo[4]; /* device information ?? */
- char s_fname[6]; /* file system volume name */
- char s_fpack[6]; /* file system pack name */
- char s_clean; /* set to 0x46 when filesystem is properly unmounted */
- char s_fill[371];
- s32 s_magic; /* version of file system */
- __fs32 s_type; /* type of file system: 1 for 512 byte blocks
- 2 for 1024 byte blocks
- 3 for 2048 byte blocks */
-
-};
-
-/*
- * SystemV FS comes in two variants:
- * sysv2: System V Release 2 (e.g. Microport), structure elements aligned(2).
- * sysv4: System V Release 4 (e.g. Consensys), structure elements aligned(4).
- */
-#define SYSV_NICINOD 100 /* number of inode cache entries */
-#define SYSV_NICFREE 50 /* number of free block list chunk entries */
-
-/* SystemV4 super-block data on disk */
-struct sysv4_super_block {
- __fs16 s_isize; /* index of first data zone */
- u16 s_pad0;
- __fs32 s_fsize; /* total number of zones of this fs */
- /* the start of the free block list: */
- __fs16 s_nfree; /* number of free blocks in s_free, <= SYSV_NICFREE */
- u16 s_pad1;
- sysv_zone_t s_free[SYSV_NICFREE]; /* first free block list chunk */
- /* the cache of free inodes: */
- __fs16 s_ninode; /* number of free inodes in s_inode, <= SYSV_NICINOD */
- u16 s_pad2;
- sysv_ino_t s_inode[SYSV_NICINOD]; /* some free inodes */
- /* locks, not used by Linux: */
- char s_flock; /* lock during free block list manipulation */
- char s_ilock; /* lock during inode cache manipulation */
- char s_fmod; /* super-block modified flag */
- char s_ronly; /* flag whether fs is mounted read-only */
- __fs32 s_time; /* time of last super block update */
- __fs16 s_dinfo[4]; /* device information ?? */
- __fs32 s_tfree; /* total number of free zones */
- __fs16 s_tinode; /* total number of free inodes */
- u16 s_pad3;
- char s_fname[6]; /* file system volume name */
- char s_fpack[6]; /* file system pack name */
- s32 s_fill[12];
- __fs32 s_state; /* file system state: 0x7c269d38-s_time means clean */
- s32 s_magic; /* version of file system */
- __fs32 s_type; /* type of file system: 1 for 512 byte blocks
- 2 for 1024 byte blocks */
-};
-
-/* SystemV2 super-block data on disk */
-struct sysv2_super_block {
- __fs16 s_isize; /* index of first data zone */
- __fs32 s_fsize __packed2__; /* total number of zones of this fs */
- /* the start of the free block list: */
- __fs16 s_nfree; /* number of free blocks in s_free, <= SYSV_NICFREE */
- sysv_zone_t s_free[SYSV_NICFREE]; /* first free block list chunk */
- /* the cache of free inodes: */
- __fs16 s_ninode; /* number of free inodes in s_inode, <= SYSV_NICINOD */
- sysv_ino_t s_inode[SYSV_NICINOD]; /* some free inodes */
- /* locks, not used by Linux: */
- char s_flock; /* lock during free block list manipulation */
- char s_ilock; /* lock during inode cache manipulation */
- char s_fmod; /* super-block modified flag */
- char s_ronly; /* flag whether fs is mounted read-only */
- __fs32 s_time __packed2__; /* time of last super block update */
- __fs16 s_dinfo[4]; /* device information ?? */
- __fs32 s_tfree __packed2__; /* total number of free zones */
- __fs16 s_tinode; /* total number of free inodes */
- char s_fname[6]; /* file system volume name */
- char s_fpack[6]; /* file system pack name */
- s32 s_fill[14];
- __fs32 s_state; /* file system state: 0xcb096f43 means clean */
- s32 s_magic; /* version of file system */
- __fs32 s_type; /* type of file system: 1 for 512 byte blocks
- 2 for 1024 byte blocks */
-};
-
-/* V7 super-block data on disk */
-#define V7_NICINOD 100 /* number of inode cache entries */
-#define V7_NICFREE 50 /* number of free block list chunk entries */
-struct v7_super_block {
- __fs16 s_isize; /* index of first data zone */
- __fs32 s_fsize __packed2__; /* total number of zones of this fs */
- /* the start of the free block list: */
- __fs16 s_nfree; /* number of free blocks in s_free, <= V7_NICFREE */
- sysv_zone_t s_free[V7_NICFREE]; /* first free block list chunk */
- /* the cache of free inodes: */
- __fs16 s_ninode; /* number of free inodes in s_inode, <= V7_NICINOD */
- sysv_ino_t s_inode[V7_NICINOD]; /* some free inodes */
- /* locks, not used by Linux or V7: */
- char s_flock; /* lock during free block list manipulation */
- char s_ilock; /* lock during inode cache manipulation */
- char s_fmod; /* super-block modified flag */
- char s_ronly; /* flag whether fs is mounted read-only */
- __fs32 s_time __packed2__; /* time of last super block update */
- /* the following fields are not maintained by V7: */
- __fs32 s_tfree __packed2__; /* total number of free zones */
- __fs16 s_tinode; /* total number of free inodes */
- __fs16 s_m; /* interleave factor */
- __fs16 s_n; /* interleave factor */
- char s_fname[6]; /* file system name */
- char s_fpack[6]; /* file system pack name */
-};
-/* Constants to aid sanity checking */
-/* This is not a hard limit, nor enforced by v7 kernel. It's actually just
- * the limit used by Seventh Edition's ls, though is high enough to assume
- * that no reasonable file system would have that much entries in root
- * directory. Thus, if we see anything higher, we just probably got the
- * endiannes wrong. */
-#define V7_NFILES 1024
-/* The disk addresses are three-byte (despite direct block addresses being
- * aligned word-wise in inode). If the most significant byte is non-zero,
- * something is most likely wrong (not a filesystem, bad bytesex). */
-#define V7_MAXSIZE 0x00ffffff
-
-/* Coherent super-block data on disk */
-#define COH_NICINOD 100 /* number of inode cache entries */
-#define COH_NICFREE 64 /* number of free block list chunk entries */
-struct coh_super_block {
- __fs16 s_isize; /* index of first data zone */
- __fs32 s_fsize __packed2__; /* total number of zones of this fs */
- /* the start of the free block list: */
- __fs16 s_nfree; /* number of free blocks in s_free, <= COH_NICFREE */
- sysv_zone_t s_free[COH_NICFREE] __packed2__; /* first free block list chunk */
- /* the cache of free inodes: */
- __fs16 s_ninode; /* number of free inodes in s_inode, <= COH_NICINOD */
- sysv_ino_t s_inode[COH_NICINOD]; /* some free inodes */
- /* locks, not used by Linux: */
- char s_flock; /* lock during free block list manipulation */
- char s_ilock; /* lock during inode cache manipulation */
- char s_fmod; /* super-block modified flag */
- char s_ronly; /* flag whether fs is mounted read-only */
- __fs32 s_time __packed2__; /* time of last super block update */
- __fs32 s_tfree __packed2__; /* total number of free zones */
- __fs16 s_tinode; /* total number of free inodes */
- __fs16 s_interleave_m; /* interleave factor */
- __fs16 s_interleave_n;
- char s_fname[6]; /* file system volume name */
- char s_fpack[6]; /* file system pack name */
- __fs32 s_unique; /* zero, not used */
-};
-
-/* SystemV/Coherent inode data on disk */
-struct sysv_inode {
- __fs16 i_mode;
- __fs16 i_nlink;
- __fs16 i_uid;
- __fs16 i_gid;
- __fs32 i_size;
- u8 i_data[3*(10+1+1+1)];
- u8 i_gen;
- __fs32 i_atime; /* time of last access */
- __fs32 i_mtime; /* time of last modification */
- __fs32 i_ctime; /* time of creation */
-};
-
-/* SystemV/Coherent directory entry on disk */
-#define SYSV_NAMELEN 14 /* max size of name in struct sysv_dir_entry */
-struct sysv_dir_entry {
- sysv_ino_t inode;
- char name[SYSV_NAMELEN]; /* up to 14 characters, the rest are zeroes */
-};
-
-#define SYSV_DIRSIZE sizeof(struct sysv_dir_entry) /* size of every directory entry */
-
-#endif /* _LINUX_SYSV_FS_H */
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index cf2446c9c30d..dd925d84fa46 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -217,54 +217,6 @@ static inline int arch_within_stack_frames(const void * const stack,
}
#endif
-#ifdef CONFIG_HARDENED_USERCOPY
-extern void __check_object_size(const void *ptr, unsigned long n,
- bool to_user);
-
-static __always_inline void check_object_size(const void *ptr, unsigned long n,
- bool to_user)
-{
- if (!__builtin_constant_p(n))
- __check_object_size(ptr, n, to_user);
-}
-#else
-static inline void check_object_size(const void *ptr, unsigned long n,
- bool to_user)
-{ }
-#endif /* CONFIG_HARDENED_USERCOPY */
-
-extern void __compiletime_error("copy source size is too small")
-__bad_copy_from(void);
-extern void __compiletime_error("copy destination size is too small")
-__bad_copy_to(void);
-
-void __copy_overflow(int size, unsigned long count);
-
-static inline void copy_overflow(int size, unsigned long count)
-{
- if (IS_ENABLED(CONFIG_BUG))
- __copy_overflow(size, count);
-}
-
-static __always_inline __must_check bool
-check_copy_size(const void *addr, size_t bytes, bool is_source)
-{
- int sz = __builtin_object_size(addr, 0);
- if (unlikely(sz >= 0 && sz < bytes)) {
- if (!__builtin_constant_p(bytes))
- copy_overflow(sz, bytes);
- else if (is_source)
- __bad_copy_from();
- else
- __bad_copy_to();
- return false;
- }
- if (WARN_ON_ONCE(bytes > INT_MAX))
- return false;
- check_object_size(addr, bytes, is_source);
- return true;
-}
-
#ifndef arch_setup_new_exec
static inline void arch_setup_new_exec(void) { }
#endif
diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index 876e31b4461d..0b8b32bf0655 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -165,6 +165,4 @@ static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim)
}
#endif
-struct vdso_data *arch_get_vdso_data(void *vvar_page);
-
#endif /* _LINUX_TIMENS_H */
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 52f5850730b3..24e715f0f6d2 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -240,6 +240,29 @@ static inline const struct cpumask *cpu_smt_mask(int cpu)
}
#endif
+#ifndef topology_is_primary_thread
+
+static inline bool topology_is_primary_thread(unsigned int cpu)
+{
+ /*
+ * When disabling SMT, the primary thread of the SMT will remain
+ * enabled/active. Architectures that have a special primary thread
+ * (e.g. x86) need to override this function. Otherwise the first
+ * thread in the SMT can be made the primary thread.
+ *
+ * The sibling cpumask of an offline CPU always contains the CPU
+ * itself on architectures using the implementation of
+ * CONFIG_GENERIC_ARCH_TOPOLOGY for building their topology.
+ * Other architectures not using CONFIG_GENERIC_ARCH_TOPOLOGY for
+ * building their topology have to check whether to use this default
+ * implementation or to override it.
+ */
+ return cpu == cpumask_first(topology_sibling_cpumask(cpu));
+}
+#define topology_is_primary_thread topology_is_primary_thread
+
+#endif
+
static inline const struct cpumask *cpu_cpu_mask(int cpu)
{
return cpumask_of_node(cpu_to_node(cpu));
@@ -262,6 +285,36 @@ sched_numa_hop_mask(unsigned int node, unsigned int hops)
#endif /* CONFIG_NUMA */
/**
+ * for_each_node_numadist() - iterate over nodes in increasing distance
+ * order, starting from a given node
+ * @node: the iteration variable and the starting node.
+ * @unvisited: a nodemask to keep track of the unvisited nodes.
+ *
+ * This macro iterates over NUMA node IDs in increasing distance from the
+ * starting @node and yields MAX_NUMNODES when all the nodes have been
+ * visited.
+ *
+ * Note that by the time the loop completes, the @unvisited nodemask will
+ * be fully cleared, unless the loop exits early.
+ *
+ * The difference between for_each_node() and for_each_node_numadist() is
+ * that the former allows to iterate over nodes in numerical order, whereas
+ * the latter iterates over nodes in increasing order of distance.
+ *
+ * This complexity of this iterator is O(N^2), where N represents the
+ * number of nodes, as each iteration involves scanning all nodes to
+ * find the one with the shortest distance.
+ *
+ * Requires rcu_lock to be held.
+ */
+#define for_each_node_numadist(node, unvisited) \
+ for (int __start = (node), \
+ (node) = nearest_node_nodemask((__start), &(unvisited)); \
+ (node) < MAX_NUMNODES; \
+ node_clear((node), (unvisited)), \
+ (node) = nearest_node_nodemask((__start), &(unvisited)))
+
+/**
* for_each_numa_hop_mask - iterate over cpumasks of increasing NUMA distance
* from a given node.
* @mask: the iteration variable.
diff --git a/include/linux/torture.h b/include/linux/torture.h
index 0134e7221cae..1b59056c3b18 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -104,6 +104,7 @@ int torture_stutter_init(int s, int sgap);
/* Initialization and cleanup. */
bool torture_init_begin(char *ttype, int v);
void torture_init_end(void);
+unsigned long get_torture_init_jiffies(void);
bool torture_cleanup_begin(void);
void torture_cleanup_end(void);
bool torture_must_stop(void);
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index e9c702c1908d..7c06f4795670 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -7,7 +7,7 @@
#include <linux/minmax.h>
#include <linux/nospec.h>
#include <linux/sched.h>
-#include <linux/thread_info.h>
+#include <linux/ucopysize.h>
#include <asm/uaccess.h>
diff --git a/include/linux/ucopysize.h b/include/linux/ucopysize.h
new file mode 100644
index 000000000000..41c2d9720466
--- /dev/null
+++ b/include/linux/ucopysize.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Perform sanity checking for object sizes for uaccess.h and uio.h. */
+#ifndef __LINUX_UCOPYSIZE_H__
+#define __LINUX_UCOPYSIZE_H__
+
+#include <linux/bug.h>
+
+#ifdef CONFIG_HARDENED_USERCOPY
+#include <linux/jump_label.h>
+extern void __check_object_size(const void *ptr, unsigned long n,
+ bool to_user);
+
+DECLARE_STATIC_KEY_MAYBE(CONFIG_HARDENED_USERCOPY_DEFAULT_ON,
+ validate_usercopy_range);
+
+static __always_inline void check_object_size(const void *ptr, unsigned long n,
+ bool to_user)
+{
+ if (!__builtin_constant_p(n) &&
+ static_branch_maybe(CONFIG_HARDENED_USERCOPY_DEFAULT_ON,
+ &validate_usercopy_range)) {
+ __check_object_size(ptr, n, to_user);
+ }
+}
+#else
+static inline void check_object_size(const void *ptr, unsigned long n,
+ bool to_user)
+{ }
+#endif /* CONFIG_HARDENED_USERCOPY */
+
+extern void __compiletime_error("copy source size is too small")
+__bad_copy_from(void);
+extern void __compiletime_error("copy destination size is too small")
+__bad_copy_to(void);
+
+void __copy_overflow(int size, unsigned long count);
+
+static inline void copy_overflow(int size, unsigned long count)
+{
+ if (IS_ENABLED(CONFIG_BUG))
+ __copy_overflow(size, count);
+}
+
+static __always_inline __must_check bool
+check_copy_size(const void *addr, size_t bytes, bool is_source)
+{
+ int sz = __builtin_object_size(addr, 0);
+ if (unlikely(sz >= 0 && sz < bytes)) {
+ if (!__builtin_constant_p(bytes))
+ copy_overflow(sz, bytes);
+ else if (is_source)
+ __bad_copy_from();
+ else
+ __bad_copy_to();
+ return false;
+ }
+ if (WARN_ON_ONCE(bytes > INT_MAX))
+ return false;
+ check_object_size(addr, bytes, is_source);
+ return true;
+}
+
+#endif /* __LINUX_UCOPYSIZE_H__ */
diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h
index f85ec5613721..2dc767e08f54 100644
--- a/include/linux/uidgid.h
+++ b/include/linux/uidgid.h
@@ -132,6 +132,7 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid)
u32 map_id_down(struct uid_gid_map *map, u32 id);
u32 map_id_up(struct uid_gid_map *map, u32 id);
+u32 map_id_range_up(struct uid_gid_map *map, u32 id, u32 count);
#else
@@ -186,6 +187,11 @@ static inline u32 map_id_down(struct uid_gid_map *map, u32 id)
return id;
}
+static inline u32 map_id_range_up(struct uid_gid_map *map, u32 id, u32 count)
+{
+ return id;
+}
+
static inline u32 map_id_up(struct uid_gid_map *map, u32 id)
{
return id;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 8ada84e85447..49ece9e1888f 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -6,8 +6,8 @@
#define __LINUX_UIO_H
#include <linux/kernel.h>
-#include <linux/thread_info.h>
#include <linux/mm_types.h>
+#include <linux/ucopysize.h>
#include <uapi/linux/uio.h>
struct page;
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index b1df7d792fa1..2e46b69ff0a6 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -39,6 +39,8 @@ struct page;
#define MAX_URETPROBE_DEPTH 64
+#define UPROBE_NO_TRAMPOLINE_VADDR (~0UL)
+
struct uprobe_consumer {
/*
* handler() can return UPROBE_HANDLER_REMOVE to signal the need to
@@ -143,6 +145,7 @@ struct uprobe_task {
struct uprobe *active_uprobe;
unsigned long xol_vaddr;
+ bool signal_denied;
struct arch_uprobe *auprobe;
};
diff --git a/include/linux/vdso_datastore.h b/include/linux/vdso_datastore.h
new file mode 100644
index 000000000000..a91fa24b06e0
--- /dev/null
+++ b/include/linux/vdso_datastore.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_VDSO_DATASTORE_H
+#define _LINUX_VDSO_DATASTORE_H
+
+#include <linux/mm_types.h>
+
+extern const struct vm_special_mapping vdso_vvar_mapping;
+struct vm_area_struct *vdso_install_vvar_mapping(struct mm_struct *mm, unsigned long addr);
+
+#endif /* _LINUX_VDSO_DATASTORE_H */
diff --git a/include/linux/vfsdebug.h b/include/linux/vfsdebug.h
new file mode 100644
index 000000000000..9cf22d3eb9dd
--- /dev/null
+++ b/include/linux/vfsdebug.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_VFS_DEBUG_H
+#define LINUX_VFS_DEBUG_H 1
+
+#include <linux/bug.h>
+
+struct inode;
+
+#ifdef CONFIG_DEBUG_VFS
+void dump_inode(struct inode *inode, const char *reason);
+
+#define VFS_BUG_ON(cond) BUG_ON(cond)
+#define VFS_WARN_ON(cond) (void)WARN_ON(cond)
+#define VFS_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
+#define VFS_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format)
+#define VFS_WARN(cond, format...) (void)WARN(cond, format)
+
+#define VFS_BUG_ON_INODE(cond, inode) ({ \
+ if (unlikely(!!(cond))) { \
+ dump_inode(inode, "VFS_BUG_ON_INODE(" #cond")");\
+ BUG_ON(1); \
+ } \
+})
+
+#define VFS_WARN_ON_INODE(cond, inode) ({ \
+ int __ret_warn = !!(cond); \
+ \
+ if (unlikely(__ret_warn)) { \
+ dump_inode(inode, "VFS_WARN_ON_INODE(" #cond")");\
+ WARN_ON(1); \
+ } \
+ unlikely(__ret_warn); \
+})
+#else
+#define VFS_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
+#define VFS_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
+#define VFS_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
+#define VFS_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
+#define VFS_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
+
+#define VFS_BUG_ON_INODE(cond, inode) VFS_BUG_ON(cond)
+#define VFS_WARN_ON_INODE(cond, inode) BUILD_BUG_ON_INVALID(cond)
+#endif /* CONFIG_DEBUG_VFS */
+
+#endif
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index f70d0958095c..5a37cb2b6f93 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -151,6 +151,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
#ifdef CONFIG_X86
DIRECT_MAP_LEVEL2_SPLIT,
DIRECT_MAP_LEVEL3_SPLIT,
+ DIRECT_MAP_LEVEL2_COLLAPSE,
+ DIRECT_MAP_LEVEL3_COLLAPSE,
#endif
#ifdef CONFIG_PER_VMA_LOCK_STATS
VMA_LOCK_SUCCESS,
diff --git a/include/linux/vmcore_info.h b/include/linux/vmcore_info.h
index e1dec1a6a749..37e003ae5262 100644
--- a/include/linux/vmcore_info.h
+++ b/include/linux/vmcore_info.h
@@ -6,9 +6,8 @@
#include <linux/elfcore.h>
#include <linux/elf.h>
-#define CRASH_CORE_NOTE_NAME "CORE"
#define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4)
-#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(CRASH_CORE_NOTE_NAME), 4)
+#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(NN_PRSTATUS), 4)
#define CRASH_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4)
/*
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 6d90ad974408..3503fe822e38 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -316,6 +316,9 @@ extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags);
} \
\
cmd; \
+ \
+ if (condition) \
+ break; \
} \
finish_wait(&wq_head, &__wq_entry); \
__out: __ret; \