160 files changed, 2824 insertions, 2569 deletions
diff --git a/Documentation/arch/loongarch/introduction.rst b/Documentation/arch/loongarch/introduction.rst
index 8c568cfc2107..5e6db78abeaf 100644
--- a/Documentation/arch/loongarch/introduction.rst
+++ b/Documentation/arch/loongarch/introduction.rst
@@ -375,9 +375,9 @@ Developer web site of Loongson and LoongArch (Software and Documentation):
 
 Documentation of LoongArch ISA:
 
-  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-CN.pdf (in Chinese)
+  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-CN.pdf (in Chinese)
 
-  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-EN.pdf (in English)
+  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-EN.pdf (in English)
 
 Documentation of LoongArch ELF psABI:
 
diff --git a/Documentation/arch/x86/boot.rst b/Documentation/arch/x86/boot.rst
index f5d2f2414de8..22cc7a040dae 100644
--- a/Documentation/arch/x86/boot.rst
+++ b/Documentation/arch/x86/boot.rst
@@ -77,7 +77,7 @@ Protocol 2.14	BURNT BY INCORRECT COMMIT
 Protocol 2.15	(Kernel 5.5) Added the kernel_info and kernel_info.setup_type_max.
 =============	============================================================
 
-.. note::
+  .. note::
      The protocol version number should be changed only if the setup header
      is changed. There is no need to update the version number if boot_params
      or kernel_info are changed. Additionally, it is recommended to use
diff --git a/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml b/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml
index 462ead5a1cec..2cf3d016db42 100644
--- a/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml
+++ b/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml
@@ -36,6 +36,7 @@ properties:
           - qcom,sm8350-ufshc
           - qcom,sm8450-ufshc
           - qcom,sm8550-ufshc
+          - qcom,sm8650-ufshc
       - const: qcom,ufshc
       - const: jedec,ufs-2.0
 
@@ -122,6 +123,7 @@ allOf:
               - qcom,sm8350-ufshc
               - qcom,sm8450-ufshc
               - qcom,sm8550-ufshc
+              - qcom,sm8650-ufshc
     then:
       properties:
         clocks:
diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst
index 57c6ae23b3fc..cc4626d6ee4f 100644
--- a/Documentation/filesystems/erofs.rst
+++ b/Documentation/filesystems/erofs.rst
@@ -91,6 +91,10 @@ compatibility checking tool (fsck.erofs), and a debugging tool (dump.erofs):
 
 - git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git
 
+For more information, please also refer to the documentation site:
+
+- https://erofs.docs.kernel.org
+
 Bugs and patches are welcome, please kindly help us and send to the following
 linux-erofs mailing list:
 
diff --git a/Documentation/translations/zh_CN/arch/loongarch/introduction.rst b/Documentation/translations/zh_CN/arch/loongarch/introduction.rst
index 59d6bf33050c..bf463c5a4c51 100644
--- a/Documentation/translations/zh_CN/arch/loongarch/introduction.rst
+++ b/Documentation/translations/zh_CN/arch/loongarch/introduction.rst
@@ -338,9 +338,9 @@ Loongson与LoongArch的开发者网站（软件与文档资源）：
 
 LoongArch指令集架构的文档：
 
-  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-CN.pdf （中文版）
+  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-CN.pdf （中文版）
 
-  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-EN.pdf （英文版）
+  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-EN.pdf （英文版）
 
 LoongArch的ELF psABI文档：
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 6cc0708f77b4..509281e9e169 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7855,6 +7855,7 @@ R:	Yue Hu <huyue2@coolpad.com>
 R:	Jeffle Xu <jefflexu@linux.alibaba.com>
 L:	linux-erofs@lists.ozlabs.org
 S:	Maintained
+W:	https://erofs.docs.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git
 F:	Documentation/ABI/testing/sysfs-fs-erofs
 F:	Documentation/filesystems/erofs.rst
@@ -11024,7 +11025,6 @@ F:	drivers/net/wireless/intel/iwlwifi/
 
 INTEL WMI SLIM BOOTLOADER (SBL) FIRMWARE UPDATE DRIVER
 M:	Jithu Joseph <jithu.joseph@intel.com>
-R:	Maurice Ma <maurice.ma@intel.com>
 S:	Maintained
 W:	https://slimbootloader.github.io/security/firmware-update.html
 F:	drivers/platform/x86/intel/wmi/sbl-fw-update.c
@@ -13778,7 +13778,6 @@ F:	drivers/net/ethernet/mellanox/mlxfw/
 MELLANOX HARDWARE PLATFORM SUPPORT
 M:	Hans de Goede <hdegoede@redhat.com>
 M:	Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
-M:	Mark Gross <markgross@kernel.org>
 M:	Vadim Pasternak <vadimp@nvidia.com>
 L:	platform-driver-x86@vger.kernel.org
 S:	Supported
@@ -14387,7 +14386,6 @@ F:	drivers/platform/surface/surface_gpe.c
 MICROSOFT SURFACE HARDWARE PLATFORM SUPPORT
 M:	Hans de Goede <hdegoede@redhat.com>
 M:	Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
-M:	Mark Gross <markgross@kernel.org>
 M:	Maximilian Luz <luzmaximilian@gmail.com>
 L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
@@ -23667,7 +23665,6 @@ F:	drivers/platform/x86/x86-android-tablets/
 X86 PLATFORM DRIVERS
 M:	Hans de Goede <hdegoede@redhat.com>
 M:	Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
-M:	Mark Gross <markgross@kernel.org>
 L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
 Q:	https://patchwork.kernel.org/project/platform-driver-x86/list/
@@ -23705,6 +23702,20 @@ F:	arch/x86/kernel/dumpstack.c
 F:	arch/x86/kernel/stacktrace.c
 F:	arch/x86/kernel/unwind_*.c
 
+X86 TRUST DOMAIN EXTENSIONS (TDX)
+M:	Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+R:	Dave Hansen <dave.hansen@linux.intel.com>
+L:	x86@kernel.org
+L:	linux-coco@lists.linux.dev
+S:	Supported
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/tdx
+F:	arch/x86/boot/compressed/tdx*
+F:	arch/x86/coco/tdx/
+F:	arch/x86/include/asm/shared/tdx.h
+F:	arch/x86/include/asm/tdx.h
+F:	arch/x86/virt/vmx/tdx/
+F:	drivers/virt/coco/tdx-guest
+
 X86 VDSO
 M:	Andy Lutomirski <luto@kernel.org>
 L:	linux-kernel@vger.kernel.org
@@ -23885,8 +23896,7 @@ T:	git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git
 P:	Documentation/filesystems/xfs-maintainer-entry-profile.rst
 F:	Documentation/ABI/testing/sysfs-fs-xfs
 F:	Documentation/admin-guide/xfs.rst
-F:	Documentation/filesystems/xfs-delayed-logging-design.rst
-F:	Documentation/filesystems/xfs-self-describing-metadata.rst
+F:	Documentation/filesystems/xfs-*
 F:	fs/xfs/
 F:	include/uapi/linux/dqblk_xfs.h
 F:	include/uapi/linux/fsmap.h
diff --git a/Makefile b/Makefile
index ede0bd241056..724c79bebe72 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 6
 PATCHLEVEL = 7
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Hurr durr I'ma ninja sloth
 
 # *DOCUMENTATION*
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 9eeb0c05f3f4..204b94b2e6aa 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -68,6 +68,7 @@ LDFLAGS_vmlinux			+= -static -n -nostdlib
 ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS
 cflags-y			+= $(call cc-option,-mexplicit-relocs)
 KBUILD_CFLAGS_KERNEL		+= $(call cc-option,-mdirect-extern-access)
+KBUILD_CFLAGS_KERNEL		+= $(call cc-option,-fdirect-access-external-data)
 KBUILD_AFLAGS_MODULE		+= $(call cc-option,-fno-direct-access-external-data)
 KBUILD_CFLAGS_MODULE		+= $(call cc-option,-fno-direct-access-external-data)
 KBUILD_AFLAGS_MODULE		+= $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
@@ -142,6 +143,8 @@ vdso-install-y += arch/loongarch/vdso/vdso.so.dbg
 
 all:	$(notdir $(KBUILD_IMAGE))
 
+vmlinuz.efi: vmlinux.efi
+
 vmlinux.elf vmlinux.efi vmlinuz.efi: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(bootvars-y) $(boot)/$@
 
diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h
index c9544f358c33..655db7d7a427 100644
--- a/arch/loongarch/include/asm/asmmacro.h
+++ b/arch/loongarch/include/asm/asmmacro.h
@@ -609,8 +609,7 @@
 	lu32i.d	\reg, 0
 	lu52i.d	\reg, \reg, 0
 	.pushsection ".la_abs", "aw", %progbits
-	768:
-	.dword	768b-766b
+	.dword	766b
 	.dword	\sym
 	.popsection
 #endif
diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h
index ed5da02b1cf6..9b36ac003f89 100644
--- a/arch/loongarch/include/asm/percpu.h
+++ b/arch/loongarch/include/asm/percpu.h
@@ -40,13 +40,13 @@ static __always_inline unsigned long __percpu_##op(void *ptr,		\
 	switch (size) {							\
 	case 4:								\
 		__asm__ __volatile__(					\
-		"am"#asm_op".w"	" %[ret], %[val], %[ptr]	\n"		\
+		"am"#asm_op".w"	" %[ret], %[val], %[ptr]	\n"	\
 		: [ret] "=&r" (ret), [ptr] "+ZB"(*(u32 *)ptr)		\
 		: [val] "r" (val));					\
 		break;							\
 	case 8:								\
 		__asm__ __volatile__(					\
-		"am"#asm_op".d" " %[ret], %[val], %[ptr]	\n"		\
+		"am"#asm_op".d" " %[ret], %[val], %[ptr]	\n"	\
 		: [ret] "=&r" (ret), [ptr] "+ZB"(*(u64 *)ptr)		\
 		: [val] "r" (val));					\
 		break;							\
@@ -63,7 +63,7 @@ PERCPU_OP(and, and, &)
 PERCPU_OP(or, or, |)
 #undef PERCPU_OP
 
-static __always_inline unsigned long __percpu_read(void *ptr, int size)
+static __always_inline unsigned long __percpu_read(void __percpu *ptr, int size)
 {
 	unsigned long ret;
 
@@ -100,7 +100,7 @@ static __always_inline unsigned long __percpu_read(void *ptr, int size)
 	return ret;
 }
 
-static __always_inline void __percpu_write(void *ptr, unsigned long val, int size)
+static __always_inline void __percpu_write(void __percpu *ptr, unsigned long val, int size)
 {
 	switch (size) {
 	case 1:
@@ -132,8 +132,7 @@ static __always_inline void __percpu_write(void *ptr, unsigned long val, int siz
 	}
 }
 
-static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
-						   int size)
+static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, int size)
 {
 	switch (size) {
 	case 1:
diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h
index a0bc159ce8bd..ee52fb1e9963 100644
--- a/arch/loongarch/include/asm/setup.h
+++ b/arch/loongarch/include/asm/setup.h
@@ -25,7 +25,7 @@ extern void set_merr_handler(unsigned long offset, void *addr, unsigned long len
 #ifdef CONFIG_RELOCATABLE
 
 struct rela_la_abs {
-	long offset;
+	long pc;
 	long symvalue;
 };
 
diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c
index 6c3eff9af9fb..1acfa704c8d0 100644
--- a/arch/loongarch/kernel/relocate.c
+++ b/arch/loongarch/kernel/relocate.c
@@ -52,7 +52,7 @@ static inline void __init relocate_absolute(long random_offset)
 	for (p = begin; (void *)p < end; p++) {
 		long v = p->symvalue;
 		uint32_t lu12iw, ori, lu32id, lu52id;
-		union loongarch_instruction *insn = (void *)p - p->offset;
+		union loongarch_instruction *insn = (void *)p->pc;
 
 		lu12iw = (v >> 12) & 0xfffff;
 		ori    = v & 0xfff;
@@ -102,6 +102,14 @@ static inline __init unsigned long get_random_boot(void)
 	return hash;
 }
 
+static int __init nokaslr(char *p)
+{
+	pr_info("KASLR is disabled.\n");
+
+	return 0; /* Print a notice and silence the boot warning */
+}
+early_param("nokaslr", nokaslr);
+
 static inline __init bool kaslr_disabled(void)
 {
 	char *str;
diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c
index 3064af94db9c..e7015f7b70e3 100644
--- a/arch/loongarch/kernel/time.c
+++ b/arch/loongarch/kernel/time.c
@@ -58,14 +58,16 @@ static int constant_set_state_oneshot(struct clock_event_device *evt)
 	return 0;
 }
 
-static int constant_set_state_oneshot_stopped(struct clock_event_device *evt)
+static int constant_set_state_periodic(struct clock_event_device *evt)
 {
+	unsigned long period;
 	unsigned long timer_config;
 
 	raw_spin_lock(&state_lock);
 
-	timer_config = csr_read64(LOONGARCH_CSR_TCFG);
-	timer_config &= ~CSR_TCFG_EN;
+	period = const_clock_freq / HZ;
+	timer_config = period & CSR_TCFG_VAL;
+	timer_config |= (CSR_TCFG_PERIOD | CSR_TCFG_EN);
 	csr_write64(timer_config, LOONGARCH_CSR_TCFG);
 
 	raw_spin_unlock(&state_lock);
@@ -73,16 +75,14 @@ static int constant_set_state_oneshot_stopped(struct clock_event_device *evt)
 	return 0;
 }
 
-static int constant_set_state_periodic(struct clock_event_device *evt)
+static int constant_set_state_shutdown(struct clock_event_device *evt)
 {
-	unsigned long period;
 	unsigned long timer_config;
 
 	raw_spin_lock(&state_lock);
 
-	period = const_clock_freq / HZ;
-	timer_config = period & CSR_TCFG_VAL;
-	timer_config |= (CSR_TCFG_PERIOD | CSR_TCFG_EN);
+	timer_config = csr_read64(LOONGARCH_CSR_TCFG);
+	timer_config &= ~CSR_TCFG_EN;
 	csr_write64(timer_config, LOONGARCH_CSR_TCFG);
 
 	raw_spin_unlock(&state_lock);
@@ -90,11 +90,6 @@ static int constant_set_state_periodic(struct clock_event_device *evt)
 	return 0;
 }
 
-static int constant_set_state_shutdown(struct clock_event_device *evt)
-{
-	return 0;
-}
-
 static int constant_timer_next_event(unsigned long delta, struct clock_event_device *evt)
 {
 	unsigned long timer_config;
@@ -161,7 +156,7 @@ int constant_clockevent_init(void)
 	cd->rating = 320;
 	cd->cpumask = cpumask_of(cpu);
 	cd->set_state_oneshot = constant_set_state_oneshot;
-	cd->set_state_oneshot_stopped = constant_set_state_oneshot_stopped;
+	cd->set_state_oneshot_stopped = constant_set_state_shutdown;
 	cd->set_state_periodic = constant_set_state_periodic;
 	cd->set_state_shutdown = constant_set_state_shutdown;
 	cd->set_next_event = constant_timer_next_event;
diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c
index 71d0539e2d0b..2aae72e63871 100644
--- a/arch/loongarch/mm/pgtable.c
+++ b/arch/loongarch/mm/pgtable.c
@@ -13,13 +13,13 @@ struct page *dmw_virt_to_page(unsigned long kaddr)
 {
 	return pfn_to_page(virt_to_pfn(kaddr));
 }
-EXPORT_SYMBOL_GPL(dmw_virt_to_page);
+EXPORT_SYMBOL(dmw_virt_to_page);
 
 struct page *tlb_virt_to_page(unsigned long kaddr)
 {
 	return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr)));
 }
-EXPORT_SYMBOL_GPL(tlb_virt_to_page);
+EXPORT_SYMBOL(tlb_virt_to_page);
 
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index fd69dfa0cdab..a7c9c0e69e5a 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -140,11 +140,11 @@ config ARCH_MMAP_RND_COMPAT_BITS_MIN
 	default 8
 
 config ARCH_MMAP_RND_BITS_MAX
-	default 24 if 64BIT
-	default 17
+	default 18 if 64BIT
+	default 13
 
 config ARCH_MMAP_RND_COMPAT_BITS_MAX
-	default 17
+	default 13
 
 # unless you want to implement ACPI on PA-RISC ... ;-)
 config PM
diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h
index 140eaa97bf21..2d73d3c3cd37 100644
--- a/arch/parisc/include/asm/elf.h
+++ b/arch/parisc/include/asm/elf.h
@@ -349,15 +349,7 @@ struct pt_regs;	/* forward declaration... */
 
 #define ELF_HWCAP	0
 
-/* Masks for stack and mmap randomization */
-#define BRK_RND_MASK	(is_32bit_task() ? 0x07ffUL : 0x3ffffUL)
-#define MMAP_RND_MASK	(is_32bit_task() ? 0x1fffUL : 0x3ffffUL)
-#define STACK_RND_MASK	MMAP_RND_MASK
-
-struct mm_struct;
-extern unsigned long arch_randomize_brk(struct mm_struct *);
-#define arch_randomize_brk arch_randomize_brk
-
+#define STACK_RND_MASK	0x7ff	/* 8MB of VA */
 
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
 struct linux_binprm;
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index c05d121cf5d0..982aca20f56f 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -47,6 +47,8 @@
 
 #ifndef __ASSEMBLY__
 
+struct rlimit;
+unsigned long mmap_upper_limit(struct rlimit *rlim_stack);
 unsigned long calc_max_stack_size(unsigned long stack_max);
 
 /*
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c
index 29e2750f86a4..e95a977ba5f3 100644
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -383,7 +383,7 @@ show_cpuinfo (struct seq_file *m, void *v)
 	char cpu_name[60], *p;
 
 	/* strip PA path from CPU name to not confuse lscpu */
-	strlcpy(cpu_name, per_cpu(cpu_data, 0).dev->name, sizeof(cpu_name));
+	strscpy(cpu_name, per_cpu(cpu_data, 0).dev->name, sizeof(cpu_name));
 	p = strrchr(cpu_name, '[');
 	if (p)
 		*(--p) = 0;
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index ab896eff7a1d..98af719d5f85 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -77,7 +77,7 @@ unsigned long calc_max_stack_size(unsigned long stack_max)
  * indicating that "current" should be used instead of a passed-in
  * value from the exec bprm as done with arch_pick_mmap_layout().
  */
-static unsigned long mmap_upper_limit(struct rlimit *rlim_stack)
+unsigned long mmap_upper_limit(struct rlimit *rlim_stack)
 {
 	unsigned long stack_base;
 
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 21556ad87f4b..8f3a4d16bb79 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -15,6 +15,7 @@
 #include <linux/io.h>
 #include <asm/apic.h>
 #include <asm/desc.h>
+#include <asm/e820/api.h>
 #include <asm/sev.h>
 #include <asm/ibt.h>
 #include <asm/hypervisor.h>
@@ -286,15 +287,31 @@ static int hv_cpu_die(unsigned int cpu)
 
 static int __init hv_pci_init(void)
 {
-	int gen2vm = efi_enabled(EFI_BOOT);
+	bool gen2vm = efi_enabled(EFI_BOOT);
 
 	/*
-	 * For Generation-2 VM, we exit from pci_arch_init() by returning 0.
-	 * The purpose is to suppress the harmless warning:
+	 * A Generation-2 VM doesn't support legacy PCI/PCIe, so both
+	 * raw_pci_ops and raw_pci_ext_ops are NULL, and pci_subsys_init() ->
+	 * pcibios_init() doesn't call pcibios_resource_survey() ->
+	 * e820__reserve_resources_late(); as a result, any emulated persistent
+	 * memory of E820_TYPE_PRAM (12) via the kernel parameter
+	 * memmap=nn[KMG]!ss is not added into iomem_resource and hence can't be
+	 * detected by register_e820_pmem(). Fix this by directly calling
+	 * e820__reserve_resources_late() here: e820__reserve_resources_late()
+	 * depends on e820__reserve_resources(), which has been called earlier
+	 * from setup_arch(). Note: e820__reserve_resources_late() also adds
+	 * any memory of E820_TYPE_PMEM (7) into iomem_resource, and
+	 * acpi_nfit_register_region() -> acpi_nfit_insert_resource() ->
+	 * region_intersects() returns REGION_INTERSECTS, so the memory of
+	 * E820_TYPE_PMEM won't get added twice.
+	 *
+	 * We return 0 here so that pci_arch_init() won't print the warning:
 	 * "PCI: Fatal: No config space access function found"
 	 */
-	if (gen2vm)
+	if (gen2vm) {
+		e820__reserve_resources_late();
 		return 0;
+	}
 
 	/* For Generation-1 VM, we'll proceed in pci_arch_init().  */
 	return 1;
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index d0918a75cb00..1a0dd80d81ac 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -63,6 +63,7 @@ int acpi_fix_pin2_polarity __initdata;
 
 #ifdef CONFIG_X86_LOCAL_APIC
 static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
+static bool has_lapic_cpus __initdata;
 static bool acpi_support_online_capable;
 #endif
 
@@ -233,6 +234,14 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end)
 		return 0;
 
 	/*
+	 * According to https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#processor-local-x2apic-structure
+	 * when MADT provides both valid LAPIC and x2APIC entries, the APIC ID
+	 * in x2APIC must be equal or greater than 0xff.
+	 */
+	if (has_lapic_cpus && apic_id < 0xff)
+		return 0;
+
+	/*
 	 * We need to register disabled CPU as well to permit
 	 * counting disabled CPUs. This allows us to size
 	 * cpus_possible_map more accurately, to permit
@@ -1114,10 +1123,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
 
 static int __init acpi_parse_madt_lapic_entries(void)
 {
-	int count;
-	int x2count = 0;
-	int ret;
-	struct acpi_subtable_proc madt_proc[2];
+	int count, x2count = 0;
 
 	if (!boot_cpu_has(X86_FEATURE_APIC))
 		return -ENODEV;
@@ -1126,21 +1132,11 @@ static int __init acpi_parse_madt_lapic_entries(void)
 				      acpi_parse_sapic, MAX_LOCAL_APIC);
 
 	if (!count) {
-		memset(madt_proc, 0, sizeof(madt_proc));
-		madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC;
-		madt_proc[0].handler = acpi_parse_lapic;
-		madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC;
-		madt_proc[1].handler = acpi_parse_x2apic;
-		ret = acpi_table_parse_entries_array(ACPI_SIG_MADT,
-				sizeof(struct acpi_table_madt),
-				madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC);
-		if (ret < 0) {
-			pr_err("Error parsing LAPIC/X2APIC entries\n");
-			return ret;
-		}
-
-		count = madt_proc[0].count;
-		x2count = madt_proc[1].count;
+		count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
+					acpi_parse_lapic, MAX_LOCAL_APIC);
+		has_lapic_cpus = count > 0;
+		x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
+					acpi_parse_x2apic, MAX_LOCAL_APIC);
 	}
 	if (!count && !x2count) {
 		pr_err("No LAPIC entries present\n");
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index e6bba12c759c..01fa06dd06b6 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -262,11 +262,14 @@ static uint32_t  __init ms_hyperv_platform(void)
 static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs)
 {
 	static atomic_t nmi_cpu = ATOMIC_INIT(-1);
+	unsigned int old_cpu, this_cpu;
 
 	if (!unknown_nmi_panic)
 		return NMI_DONE;
 
-	if (atomic_cmpxchg(&nmi_cpu, -1, raw_smp_processor_id()) != -1)
+	old_cpu = -1;
+	this_cpu = raw_smp_processor_id();
+	if (!atomic_try_cmpxchg(&nmi_cpu, &old_cpu, this_cpu))
 		return NMI_HANDLED;
 
 	return NMI_DONE;
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index cacf2ede6217..23d8aaf8d9fd 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -175,9 +175,6 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
 	frame = get_sigframe(ksig, regs, sizeof(struct rt_sigframe), &fp);
 	uc_flags = frame_uc_flags(regs);
 
-	if (setup_signal_shadow_stack(ksig))
-		return -EFAULT;
-
 	if (!user_access_begin(frame, sizeof(*frame)))
 		return -EFAULT;
 
@@ -198,6 +195,9 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
 			return -EFAULT;
 	}
 
+	if (setup_signal_shadow_stack(ksig))
+		return -EFAULT;
+
 	/* Set up registers for signal handler */
 	regs->di = ksig->sig;
 	/* In case the signal handler was declared without prototypes */
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e2d11183f62e..900c1be1fee1 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2858,11 +2858,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
 	};
 	struct request *rq;
 
-	if (unlikely(bio_queue_enter(bio)))
-		return NULL;
-
 	if (blk_mq_attempt_bio_merge(q, bio, nsegs))
-		goto queue_exit;
+		return NULL;
 
 	rq_qos_throttle(q, bio);
 
@@ -2878,35 +2875,23 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
 	rq_qos_cleanup(q, bio);
 	if (bio->bi_opf & REQ_NOWAIT)
 		bio_wouldblock_error(bio);
-queue_exit:
-	blk_queue_exit(q);
 	return NULL;
 }
 
-static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
-		struct blk_plug *plug, struct bio **bio, unsigned int nsegs)
+/* return true if this @rq can be used for @bio */
+static bool blk_mq_can_use_cached_rq(struct request *rq, struct blk_plug *plug,
+		struct bio *bio)
 {
-	struct request *rq;
-	enum hctx_type type, hctx_type;
+	enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf);
+	enum hctx_type hctx_type = rq->mq_hctx->type;
 
-	if (!plug)
-		return NULL;
-	rq = rq_list_peek(&plug->cached_rq);
-	if (!rq || rq->q != q)
-		return NULL;
+	WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq);
 
-	if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) {
-		*bio = NULL;
-		return NULL;
-	}
-
-	type = blk_mq_get_hctx_type((*bio)->bi_opf);
-	hctx_type = rq->mq_hctx->type;
 	if (type != hctx_type &&
 	    !(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT))
-		return NULL;
-	if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf))
-		return NULL;
+		return false;
+	if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf))
+		return false;
 
 	/*
 	 * If any qos ->throttle() end up blocking, we will have flushed the
@@ -2914,12 +2899,12 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
 	 * before we throttle.
 	 */
 	plug->cached_rq = rq_list_next(rq);
-	rq_qos_throttle(q, *bio);
+	rq_qos_throttle(rq->q, bio);
 
 	blk_mq_rq_time_init(rq, 0);
-	rq->cmd_flags = (*bio)->bi_opf;
+	rq->cmd_flags = bio->bi_opf;
 	INIT_LIST_HEAD(&rq->queuelist);
-	return rq;
+	return true;
 }
 
 static void bio_set_ioprio(struct bio *bio)
@@ -2949,7 +2934,7 @@ void blk_mq_submit_bio(struct bio *bio)
 	struct blk_plug *plug = blk_mq_plug(bio);
 	const int is_sync = op_is_sync(bio->bi_opf);
 	struct blk_mq_hw_ctx *hctx;
-	struct request *rq;
+	struct request *rq = NULL;
 	unsigned int nr_segs = 1;
 	blk_status_t ret;
 
@@ -2960,20 +2945,36 @@ void blk_mq_submit_bio(struct bio *bio)
 			return;
 	}
 
-	if (!bio_integrity_prep(bio))
-		return;
-
 	bio_set_ioprio(bio);
 
-	rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs);
-	if (!rq) {
-		if (!bio)
+	if (plug) {
+		rq = rq_list_peek(&plug->cached_rq);
+		if (rq && rq->q != q)
+			rq = NULL;
+	}
+	if (rq) {
+		if (!bio_integrity_prep(bio))
 			return;
-		rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
-		if (unlikely(!rq))
+		if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
 			return;
+		if (blk_mq_can_use_cached_rq(rq, plug, bio))
+			goto done;
+		percpu_ref_get(&q->q_usage_counter);
+	} else {
+		if (unlikely(bio_queue_enter(bio)))
+			return;
+		if (!bio_integrity_prep(bio))
+			goto fail;
+	}
+
+	rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
+	if (unlikely(!rq)) {
+fail:
+		blk_queue_exit(q);
+		return;
 	}
 
+done:
 	trace_block_getrq(bio);
 
 	rq_qos_track(q, rq, bio);
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index 0ace218783c8..e9b16cbc26f4 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -250,9 +250,6 @@ int ivpu_rpm_get_if_active(struct ivpu_device *vdev)
 {
 	int ret;
 
-	ivpu_dbg(vdev, RPM, "rpm_get_if_active count %d\n",
-		 atomic_read(&vdev->drm.dev->power.usage_count));
-
 	ret = pm_runtime_get_if_active(vdev->drm.dev, false);
 	drm_WARN_ON(&vdev->drm, ret < 0);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index afec09930efa..9d92ca157677 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -248,6 +248,7 @@ extern int amdgpu_umsch_mm;
 extern int amdgpu_seamless;
 
 extern int amdgpu_user_partt_mode;
+extern int amdgpu_agp;
 
 #define AMDGPU_VM_MAX_NUM_CTX			4096
 #define AMDGPU_SG_THRESHOLD			(256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index df3ecfa9e13f..e50be6500030 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -207,7 +207,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
 	}
 
 	for (i = 0; i < p->nchunks; i++) {
-		struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
+		struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL;
 		struct drm_amdgpu_cs_chunk user_chunk;
 		uint32_t __user *cdata;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 3095a3a864af..8f24cabe2155 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -207,6 +207,7 @@ int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
 int amdgpu_umsch_mm;
 int amdgpu_seamless = -1; /* auto */
 uint amdgpu_debug_mask;
+int amdgpu_agp = -1; /* auto */
 
 static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
 
@@ -961,6 +962,15 @@ module_param_named(seamless, amdgpu_seamless, int, 0444);
 MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default");
 module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444);
 
+/**
+ * DOC: agp (int)
+ * Enable the AGP aperture.  This provides an aperture in the GPU's internal
+ * address space for direct access to system memory.  Note that these accesses
+ * are non-snooped, so they are only used for access to uncached memory.
+ */
+MODULE_PARM_DESC(agp, "AGP (-1 = auto (default), 0 = disable, 1 = enable)");
+module_param_named(agp, amdgpu_agp, int, 0444);
+
 /* These devices are not supported by amdgpu.
  * They are supported by the mach64, r128, radeon drivers
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 32b701cc0376..a21045d018f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1473,6 +1473,11 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
 				topology->nodes[i].num_links = (requires_reflection && topology->nodes[i].num_links) ?
 								topology->nodes[i].num_links : node_num_links;
 			}
+			/* popluate the connected port num info if supported and available */
+			if (ta_port_num_support && topology->nodes[i].num_links) {
+				memcpy(topology->nodes[i].port_num, link_extend_info_output->nodes[i].port_num,
+				       sizeof(struct xgmi_connected_port_num) * TA_XGMI__MAX_PORT_NUM);
+			}
 
 			/* reflect the topology information for bi-directionality */
 			if (requires_reflection && topology->nodes[i].num_hops)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 5d36ad3f48c7..c4d9cbde55b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -150,6 +150,7 @@ struct psp_xgmi_node_info {
 	uint8_t					is_sharing_enabled;
 	enum ta_xgmi_assigned_sdma_engine	sdma_engine;
 	uint8_t					num_links;
+	struct xgmi_connected_port_num		port_num[TA_XGMI__MAX_PORT_NUM];
 };
 
 struct psp_xgmi_topology_info {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 84e5987b14e0..a3dc68e98910 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1188,7 +1188,7 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
 			}
 
 			if (block_obj->hw_ops->query_ras_error_count)
-				block_obj->hw_ops->query_ras_error_count(adev, &err_data);
+				block_obj->hw_ops->query_ras_error_count(adev, err_data);
 
 			if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) ||
 			    (info->head.block == AMDGPU_RAS_BLOCK__GFX) ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 65949cc7abb9..07d930339b07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -398,6 +398,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
  * amdgpu_uvd_entity_init - init entity
  *
  * @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring pointer to check
  *
  * Initialize the entity used for handle management in the kernel driver.
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 0954447f689d..59acf424a078 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -230,6 +230,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
  * amdgpu_vce_entity_init - init entity
  *
  * @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring pointer to check
  *
  * Initialize the entity used for handle management in the kernel driver.
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 0ec7b061d7c2..a5a05c16c10d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -675,7 +675,7 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
 	amdgpu_gmc_set_agp_default(adev, mc);
 	amdgpu_gmc_vram_location(adev, &adev->gmc, base);
 	amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
-	if (!amdgpu_sriov_vf(adev))
+	if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
 		amdgpu_gmc_agp_location(adev, mc);
 
 	/* base offset of vram pages */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 6dce9b29f675..23d7b548d13f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -640,8 +640,9 @@ static void gmc_v11_0_vram_gtt_location(struct amdgpu_device *adev,
 	amdgpu_gmc_set_agp_default(adev, mc);
 	amdgpu_gmc_vram_location(adev, &adev->gmc, base);
 	amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_HIGH);
-	if (!amdgpu_sriov_vf(adev) ||
-	    (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 5, 0)))
+	if (!amdgpu_sriov_vf(adev) &&
+	    (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 5, 0)) &&
+	    (amdgpu_agp == 1))
 		amdgpu_gmc_agp_location(adev, mc);
 
 	/* base offset of vram pages */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index bde25eb4ed8e..2ac5820e9c92 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1630,7 +1630,7 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
 	} else {
 		amdgpu_gmc_vram_location(adev, mc, base);
 		amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
-		if (!amdgpu_sriov_vf(adev))
+		if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
 			amdgpu_gmc_agp_location(adev, mc);
 	}
 	/* base offset of vram pages */
@@ -2170,8 +2170,6 @@ static int gmc_v9_0_sw_fini(void *handle)
 
 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3))
 		amdgpu_gmc_sysfs_fini(adev);
-	adev->gmc.num_mem_partitions = 0;
-	kfree(adev->gmc.mem_partitions);
 
 	amdgpu_gmc_ras_fini(adev);
 	amdgpu_gem_force_release(adev);
@@ -2185,6 +2183,9 @@ static int gmc_v9_0_sw_fini(void *handle)
 	amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0);
 	amdgpu_bo_fini(adev);
 
+	adev->gmc.num_mem_partitions = 0;
+	kfree(adev->gmc.mem_partitions);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
index ea142611be1c..9b0146732e13 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
@@ -130,6 +130,9 @@ static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev)
 	uint64_t value;
 	int i;
 
+	if (amdgpu_sriov_vf(adev))
+		return;
+
 	inst_mask = adev->aid_mask;
 	for_each_inst(i, inst_mask) {
 		/* Program the AGP BAR */
@@ -139,9 +142,6 @@ static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev)
 		WREG32_SOC15(MMHUB, i, regMC_VM_AGP_TOP,
 			     adev->gmc.agp_end >> 24);
 
-		if (amdgpu_sriov_vf(adev))
-			return;
-
 		/* Program the system aperture low logical page number. */
 		WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
 			min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 6f99f6754c11..ee97814ebd99 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2079,7 +2079,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
 	struct dmub_srv_create_params create_params;
 	struct dmub_srv_region_params region_params;
 	struct dmub_srv_region_info region_info;
-	struct dmub_srv_fb_params fb_params;
+	struct dmub_srv_memory_params memory_params;
 	struct dmub_srv_fb_info *fb_info;
 	struct dmub_srv *dmub_srv;
 	const struct dmcub_firmware_header_v1_0 *hdr;
@@ -2182,6 +2182,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
 		adev->dm.dmub_fw->data +
 		le32_to_cpu(hdr->header.ucode_array_offset_bytes) +
 		PSP_HEADER_BYTES;
+	region_params.is_mailbox_in_inbox = false;
 
 	status = dmub_srv_calc_region_info(dmub_srv, &region_params,
 					   &region_info);
@@ -2205,10 +2206,10 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
 		return r;
 
 	/* Rebase the regions on the framebuffer address. */
-	memset(&fb_params, 0, sizeof(fb_params));
-	fb_params.cpu_addr = adev->dm.dmub_bo_cpu_addr;
-	fb_params.gpu_addr = adev->dm.dmub_bo_gpu_addr;
-	fb_params.region_info = &region_info;
+	memset(&memory_params, 0, sizeof(memory_params));
+	memory_params.cpu_fb_addr = adev->dm.dmub_bo_cpu_addr;
+	memory_params.gpu_fb_addr = adev->dm.dmub_bo_gpu_addr;
+	memory_params.region_info = &region_info;
 
 	adev->dm.dmub_fb_info =
 		kzalloc(sizeof(*adev->dm.dmub_fb_info), GFP_KERNEL);
@@ -2220,7 +2221,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
 		return -ENOMEM;
 	}
 
-	status = dmub_srv_calc_fb_info(dmub_srv, &fb_params, fb_info);
+	status = dmub_srv_calc_mem_info(dmub_srv, &memory_params, fb_info);
 	if (status != DMUB_STATUS_OK) {
 		DRM_ERROR("Error calculating DMUB FB info: %d\n", status);
 		return -EINVAL;
@@ -7481,6 +7482,9 @@ static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap,
 	int i;
 	int result = -EIO;
 
+	if (!ddc_service->ddc_pin || !ddc_service->ddc_pin->hw_info.hw_supported)
+		return result;
+
 	cmd.payloads = kcalloc(num, sizeof(struct i2c_payload), GFP_KERNEL);
 
 	if (!cmd.payloads)
@@ -9603,14 +9607,14 @@ static bool should_reset_plane(struct drm_atomic_state *state,
 	struct drm_plane *other;
 	struct drm_plane_state *old_other_state, *new_other_state;
 	struct drm_crtc_state *new_crtc_state;
+	struct amdgpu_device *adev = drm_to_adev(plane->dev);
 	int i;
 
 	/*
-	 * TODO: Remove this hack once the checks below are sufficient
-	 * enough to determine when we need to reset all the planes on
-	 * the stream.
+	 * TODO: Remove this hack for all asics once it proves that the
+	 * fast updates works fine on DCN3.2+.
 	 */
-	if (state->allow_modeset)
+	if (adev->ip_versions[DCE_HWIP][0] < IP_VERSION(3, 2, 0) && state->allow_modeset)
 		return true;
 
 	/* Exit early if we know that we're adding or removing the plane. */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index ed784cf27d39..c7a29bb737e2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -536,11 +536,8 @@ bool dm_helpers_dp_read_dpcd(
 
 	struct amdgpu_dm_connector *aconnector = link->priv;
 
-	if (!aconnector) {
-		drm_dbg_dp(aconnector->base.dev,
-			   "Failed to find connector for link!\n");
+	if (!aconnector)
 		return false;
-	}
 
 	return drm_dp_dpcd_read(&aconnector->dm_dp_aux.aux, address, data,
 				size) == size;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index d3b13d362eda..11da0eebee6c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -1604,31 +1604,31 @@ enum dc_status dm_dp_mst_is_port_support_mode(
 	unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0;
 	unsigned int max_compressed_bw_in_kbps = 0;
 	struct dc_dsc_bw_range bw_range = {0};
-	struct drm_dp_mst_topology_mgr *mst_mgr;
+	uint16_t full_pbn = aconnector->mst_output_port->full_pbn;
 
 	/*
-	 * check if the mode could be supported if DSC pass-through is supported
-	 * AND check if there enough bandwidth available to support the mode
-	 * with DSC enabled.
+	 * Consider the case with the depth of the mst topology tree is equal or less than 2
+	 * A. When dsc bitstream can be transmitted along the entire path
+	 *    1. dsc is possible between source and branch/leaf device (common dsc params is possible), AND
+	 *    2. dsc passthrough supported at MST branch, or
+	 *    3. dsc decoding supported at leaf MST device
+	 *    Use maximum dsc compression as bw constraint
+	 * B. When dsc bitstream cannot be transmitted along the entire path
+	 *    Use native bw as bw constraint
 	 */
 	if (is_dsc_common_config_possible(stream, &bw_range) &&
-	    aconnector->mst_output_port->passthrough_aux) {
-		mst_mgr = aconnector->mst_output_port->mgr;
-		mutex_lock(&mst_mgr->lock);
-
+	   (aconnector->mst_output_port->passthrough_aux ||
+	    aconnector->dsc_aux == &aconnector->mst_output_port->aux)) {
 		cur_link_settings = stream->link->verified_link_cap;
 
 		upper_link_bw_in_kbps = dc_link_bandwidth_kbps(aconnector->dc_link,
-							       &cur_link_settings
-							       );
-		down_link_bw_in_kbps = kbps_from_pbn(aconnector->mst_output_port->full_pbn);
+							       &cur_link_settings);
+		down_link_bw_in_kbps = kbps_from_pbn(full_pbn);
 
 		/* pick the bottleneck */
 		end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps,
 					    down_link_bw_in_kbps);
 
-		mutex_unlock(&mst_mgr->lock);
-
 		/*
 		 * use the maximum dsc compression bandwidth as the required
 		 * bandwidth for the mode
@@ -1643,8 +1643,7 @@ enum dc_status dm_dp_mst_is_port_support_mode(
 		/* check if mode could be supported within full_pbn */
 		bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3;
 		pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp, false);
-
-		if (pbn > aconnector->mst_output_port->full_pbn)
+		if (pbn > full_pbn)
 			return DC_FAIL_BANDWIDTH_VALIDATE;
 	}
 
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index 0fa4fcd00de2..507a7cf56711 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -820,22 +820,22 @@ static void dcn35_set_idle_state(struct clk_mgr *clk_mgr_base, bool allow_idle)
 
 	if (dc->config.disable_ips == DMUB_IPS_ENABLE ||
 		dc->config.disable_ips == DMUB_IPS_DISABLE_DYNAMIC) {
-		val |= DMUB_IPS1_ALLOW_MASK;
-		val |= DMUB_IPS2_ALLOW_MASK;
-	} else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS1) {
 		val = val & ~DMUB_IPS1_ALLOW_MASK;
 		val = val & ~DMUB_IPS2_ALLOW_MASK;
-	} else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2) {
-		val |= DMUB_IPS1_ALLOW_MASK;
-		val = val & ~DMUB_IPS2_ALLOW_MASK;
-	} else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2_Z10) {
+	} else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS1) {
 		val |= DMUB_IPS1_ALLOW_MASK;
 		val |= DMUB_IPS2_ALLOW_MASK;
+	} else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2) {
+		val = val & ~DMUB_IPS1_ALLOW_MASK;
+		val |= DMUB_IPS2_ALLOW_MASK;
+	} else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2_Z10) {
+		val = val & ~DMUB_IPS1_ALLOW_MASK;
+		val = val & ~DMUB_IPS2_ALLOW_MASK;
 	}
 
 	if (!allow_idle) {
-		val = val & ~DMUB_IPS1_ALLOW_MASK;
-		val = val & ~DMUB_IPS2_ALLOW_MASK;
+		val |= DMUB_IPS1_ALLOW_MASK;
+		val |= DMUB_IPS2_ALLOW_MASK;
 	}
 
 	dcn35_smu_write_ips_scratch(clk_mgr, val);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 7b9bf5cb4529..76b47f178127 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -3178,7 +3178,7 @@ static bool update_planes_and_stream_state(struct dc *dc,
 			struct pipe_ctx *otg_master = resource_get_otg_master_for_stream(&context->res_ctx,
 					context->streams[i]);
 
-			if (otg_master->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE)
+			if (otg_master && otg_master->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE)
 				resource_build_test_pattern_params(&context->res_ctx, otg_master);
 		}
 	}
@@ -4934,8 +4934,8 @@ bool dc_dmub_is_ips_idle_state(struct dc *dc)
 	if (dc->hwss.get_idle_state)
 		idle_state = dc->hwss.get_idle_state(dc);
 
-	if ((idle_state & DMUB_IPS1_ALLOW_MASK) ||
-		(idle_state & DMUB_IPS2_ALLOW_MASK))
+	if (!(idle_state & DMUB_IPS1_ALLOW_MASK) ||
+		!(idle_state & DMUB_IPS2_ALLOW_MASK))
 		return true;
 
 	return false;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 1d48278cba96..a1f1d1003992 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -5190,6 +5190,9 @@ bool dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy(
 	sec_next = sec_pipe->next_odm_pipe;
 	sec_prev = sec_pipe->prev_odm_pipe;
 
+	if (pri_pipe == NULL)
+		return false;
+
 	*sec_pipe = *pri_pipe;
 
 	sec_pipe->top_pipe = sec_top;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index e4c007203318..0e07699c1e83 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -1202,11 +1202,11 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 		allow_state = dc->hwss.get_idle_state(dc);
 		dc->hwss.set_idle_state(dc, false);
 
-		if (allow_state & DMUB_IPS2_ALLOW_MASK) {
+		if (!(allow_state & DMUB_IPS2_ALLOW_MASK)) {
 			// Wait for evaluation time
 			udelay(dc->debug.ips2_eval_delay_us);
 			commit_state = dc->hwss.get_idle_state(dc);
-			if (commit_state & DMUB_IPS2_COMMIT_MASK) {
+			if (!(commit_state & DMUB_IPS2_COMMIT_MASK)) {
 				// Tell PMFW to exit low power state
 				dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr);
 
@@ -1216,7 +1216,7 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 
 				for (i = 0; i < max_num_polls; ++i) {
 					commit_state = dc->hwss.get_idle_state(dc);
-					if (!(commit_state & DMUB_IPS2_COMMIT_MASK))
+					if (commit_state & DMUB_IPS2_COMMIT_MASK)
 						break;
 
 					udelay(1);
@@ -1235,10 +1235,10 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 		}
 
 		dc_dmub_srv_notify_idle(dc, false);
-		if (allow_state & DMUB_IPS1_ALLOW_MASK) {
+		if (!(allow_state & DMUB_IPS1_ALLOW_MASK)) {
 			for (i = 0; i < max_num_polls; ++i) {
 				commit_state = dc->hwss.get_idle_state(dc);
-				if (!(commit_state & DMUB_IPS1_COMMIT_MASK))
+				if (commit_state & DMUB_IPS1_COMMIT_MASK)
 					break;
 
 				udelay(1);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index cea666ea66c6..fcb825e4f1bb 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -177,6 +177,7 @@ struct dc_panel_patch {
 	unsigned int disable_fams;
 	unsigned int skip_avmute;
 	unsigned int mst_start_top_delay;
+	unsigned int remove_sink_ext_caps;
 };
 
 struct dc_edid_caps {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_stream_encoder.c
index 001f9eb66920..62a8f0b56006 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_stream_encoder.c
@@ -261,12 +261,6 @@ static void enc35_stream_encoder_enable(
 			/* invalid mode ! */
 			ASSERT_CRITICAL(false);
 		}
-
-		REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 1);
-		REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 1);
-	} else {
-		REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 0);
-		REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 0);
 	}
 }
 
@@ -436,6 +430,8 @@ static void enc35_disable_fifo(struct stream_encoder *enc)
 	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
 
 	REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 0);
+	REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 0);
+	REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 0);
 }
 
 static void enc35_enable_fifo(struct stream_encoder *enc)
@@ -443,6 +439,8 @@ static void enc35_enable_fifo(struct stream_encoder *enc)
 	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
 
 	REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, 0x7);
+	REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 1);
+	REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 1);
 
 	enc35_reset_fifo(enc, true);
 	enc35_reset_fifo(enc, false);
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
index d6f0f857c05a..f2fe523f914f 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
@@ -1088,6 +1088,9 @@ static bool detect_link_and_local_sink(struct dc_link *link,
 		if (sink->edid_caps.panel_patch.skip_scdc_overwrite)
 			link->ctx->dc->debug.hdmi20_disable = true;
 
+		if (sink->edid_caps.panel_patch.remove_sink_ext_caps)
+			link->dpcd_sink_ext_caps.raw = 0;
+
 		if (dc_is_hdmi_signal(link->connector_signal))
 			read_scdc_caps(link->ddc, link->local_sink);
 
diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
index 9665ada0f894..df63aa8f01e9 100644
--- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
@@ -195,6 +195,7 @@ struct dmub_srv_region_params {
 	uint32_t vbios_size;
 	const uint8_t *fw_inst_const;
 	const uint8_t *fw_bss_data;
+	bool is_mailbox_in_inbox;
 };
 
 /**
@@ -214,20 +215,25 @@ struct dmub_srv_region_params {
  */
 struct dmub_srv_region_info {
 	uint32_t fb_size;
+	uint32_t inbox_size;
 	uint8_t num_regions;
 	struct dmub_region regions[DMUB_WINDOW_TOTAL];
 };
 
 /**
- * struct dmub_srv_fb_params - parameters used for driver fb setup
+ * struct dmub_srv_memory_params - parameters used for driver fb setup
  * @region_info: region info calculated by dmub service
- * @cpu_addr: base cpu address for the framebuffer
- * @gpu_addr: base gpu virtual address for the framebuffer
+ * @cpu_fb_addr: base cpu address for the framebuffer
+ * @cpu_inbox_addr: base cpu address for the gart
+ * @gpu_fb_addr: base gpu virtual address for the framebuffer
+ * @gpu_inbox_addr: base gpu virtual address for the gart
  */
-struct dmub_srv_fb_params {
+struct dmub_srv_memory_params {
 	const struct dmub_srv_region_info *region_info;
-	void *cpu_addr;
-	uint64_t gpu_addr;
+	void *cpu_fb_addr;
+	void *cpu_inbox_addr;
+	uint64_t gpu_fb_addr;
+	uint64_t gpu_inbox_addr;
 };
 
 /**
@@ -563,8 +569,8 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
  *   DMUB_STATUS_OK - success
  *   DMUB_STATUS_INVALID - unspecified error
  */
-enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
-				       const struct dmub_srv_fb_params *params,
+enum dmub_status dmub_srv_calc_mem_info(struct dmub_srv *dmub,
+				       const struct dmub_srv_memory_params *params,
 				       struct dmub_srv_fb_info *out);
 
 /**
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
index e43e8d4bfe37..22fc4ba96def 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
@@ -434,7 +434,7 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
 	uint32_t fw_state_size = DMUB_FW_STATE_SIZE;
 	uint32_t trace_buffer_size = DMUB_TRACE_BUFFER_SIZE;
 	uint32_t scratch_mem_size = DMUB_SCRATCH_MEM_SIZE;
-
+	uint32_t previous_top = 0;
 	if (!dmub->sw_init)
 		return DMUB_STATUS_INVALID;
 
@@ -459,8 +459,15 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
 	bios->base = dmub_align(stack->top, 256);
 	bios->top = bios->base + params->vbios_size;
 
-	mail->base = dmub_align(bios->top, 256);
-	mail->top = mail->base + DMUB_MAILBOX_SIZE;
+	if (params->is_mailbox_in_inbox) {
+		mail->base = 0;
+		mail->top = mail->base + DMUB_MAILBOX_SIZE;
+		previous_top = bios->top;
+	} else {
+		mail->base = dmub_align(bios->top, 256);
+		mail->top = mail->base + DMUB_MAILBOX_SIZE;
+		previous_top = mail->top;
+	}
 
 	fw_info = dmub_get_fw_meta_info(params);
 
@@ -479,7 +486,7 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
 			dmub->fw_version = fw_info->fw_version;
 	}
 
-	trace_buff->base = dmub_align(mail->top, 256);
+	trace_buff->base = dmub_align(previous_top, 256);
 	trace_buff->top = trace_buff->base + dmub_align(trace_buffer_size, 64);
 
 	fw_state->base = dmub_align(trace_buff->top, 256);
@@ -490,11 +497,14 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
 
 	out->fb_size = dmub_align(scratch_mem->top, 4096);
 
+	if (params->is_mailbox_in_inbox)
+		out->inbox_size = dmub_align(mail->top, 4096);
+
 	return DMUB_STATUS_OK;
 }
 
-enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
-				       const struct dmub_srv_fb_params *params,
+enum dmub_status dmub_srv_calc_mem_info(struct dmub_srv *dmub,
+				       const struct dmub_srv_memory_params *params,
 				       struct dmub_srv_fb_info *out)
 {
 	uint8_t *cpu_base;
@@ -509,8 +519,8 @@ enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
 	if (params->region_info->num_regions != DMUB_NUM_WINDOWS)
 		return DMUB_STATUS_INVALID;
 
-	cpu_base = (uint8_t *)params->cpu_addr;
-	gpu_base = params->gpu_addr;
+	cpu_base = (uint8_t *)params->cpu_fb_addr;
+	gpu_base = params->gpu_fb_addr;
 
 	for (i = 0; i < DMUB_NUM_WINDOWS; ++i) {
 		const struct dmub_region *reg =
@@ -518,6 +528,12 @@ enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
 
 		out->fb[i].cpu_addr = cpu_base + reg->base;
 		out->fb[i].gpu_addr = gpu_base + reg->base;
+
+		if (i == DMUB_WINDOW_4_MAILBOX && params->cpu_inbox_addr != 0) {
+			out->fb[i].cpu_addr = (uint8_t *)params->cpu_inbox_addr + reg->base;
+			out->fb[i].gpu_addr = params->gpu_inbox_addr + reg->base;
+		}
+
 		out->fb[i].size = reg->top - reg->base;
 	}
 
@@ -707,9 +723,16 @@ enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub)
 		return DMUB_STATUS_INVALID;
 
 	if (dmub->hw_funcs.get_inbox1_rptr && dmub->hw_funcs.get_inbox1_wptr) {
-		dmub->inbox1_rb.rptr = dmub->hw_funcs.get_inbox1_rptr(dmub);
-		dmub->inbox1_rb.wrpt = dmub->hw_funcs.get_inbox1_wptr(dmub);
-		dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt;
+		uint32_t rptr = dmub->hw_funcs.get_inbox1_rptr(dmub);
+		uint32_t wptr = dmub->hw_funcs.get_inbox1_wptr(dmub);
+
+		if (rptr > dmub->inbox1_rb.capacity || wptr > dmub->inbox1_rb.capacity) {
+			return DMUB_STATUS_HW_FAILURE;
+		} else {
+			dmub->inbox1_rb.rptr = rptr;
+			dmub->inbox1_rb.wrpt = wptr;
+			dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt;
+		}
 	}
 
 	return DMUB_STATUS_OK;
@@ -743,6 +766,11 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub,
 	if (!dmub->hw_init)
 		return DMUB_STATUS_INVALID;
 
+	if (dmub->inbox1_rb.rptr > dmub->inbox1_rb.capacity ||
+	    dmub->inbox1_rb.wrpt > dmub->inbox1_rb.capacity) {
+		return DMUB_STATUS_HW_FAILURE;
+	}
+
 	if (dmub_rb_push_front(&dmub->inbox1_rb, cmd))
 		return DMUB_STATUS_OK;
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
index dab35d878a90..fef2d290f3f2 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
@@ -123,7 +123,7 @@ typedef enum {
   VOLTAGE_GUARDBAND_COUNT
 } GFX_GUARDBAND_e;
 
-#define SMU_METRICS_TABLE_VERSION 0x8
+#define SMU_METRICS_TABLE_VERSION 0x9
 
 typedef struct __attribute__((packed, aligned(4))) {
   uint32_t AccumulationCounter;
@@ -211,6 +211,14 @@ typedef struct __attribute__((packed, aligned(4))) {
   //XGMI Data tranfser size
   uint64_t XgmiReadDataSizeAcc[8];//in KByte
   uint64_t XgmiWriteDataSizeAcc[8];//in KByte
+
+  //PCIE BW Data and error count
+  uint32_t PcieBandwidth[4];
+  uint32_t PCIeL0ToRecoveryCountAcc;      // The Pcie counter itself is accumulated
+  uint32_t PCIenReplayAAcc;               // The Pcie counter itself is accumulated
+  uint32_t PCIenReplayARolloverCountAcc;  // The Pcie counter itself is accumulated
+  uint32_t PCIeNAKSentCountAcc;           // The Pcie counter itself is accumulated
+  uint32_t PCIeNAKReceivedCountAcc;       // The Pcie counter itself is accumulated
 } MetricsTable_t;
 
 #define SMU_VF_METRICS_TABLE_VERSION 0x3
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 891605d4975f..0e5a77c3c2e2 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -1454,7 +1454,7 @@ static int smu_v13_0_6_register_irq_handler(struct smu_context *smu)
 
 static int smu_v13_0_6_notify_unload(struct smu_context *smu)
 {
-	if (smu->smc_fw_version <= 0x553500)
+	if (amdgpu_in_reset(smu->adev))
 		return 0;
 
 	dev_dbg(smu->adev->dev, "Notify PMFW about driver unload");
@@ -2095,6 +2095,14 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
 			smu_v13_0_6_get_current_pcie_link_speed(smu);
 		gpu_metrics->pcie_bandwidth_acc =
 				SMUQ10_ROUND(metrics->PcieBandwidthAcc[0]);
+		gpu_metrics->pcie_bandwidth_inst =
+				SMUQ10_ROUND(metrics->PcieBandwidth[0]);
+		gpu_metrics->pcie_l0_to_recov_count_acc =
+				metrics->PCIeL0ToRecoveryCountAcc;
+		gpu_metrics->pcie_replay_count_acc =
+				metrics->PCIenReplayAAcc;
+		gpu_metrics->pcie_replay_rover_count_acc =
+				metrics->PCIenReplayARolloverCountAcc;
 	}
 
 	gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c
index d5c15292ae93..3d92f66e550c 100644
--- a/drivers/gpu/drm/drm_panel_orientation_quirks.c
+++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c
@@ -336,6 +336,12 @@ static const struct dmi_system_id orientation_data[] = {
 		  DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "IdeaPad Duet 3 10IGL5"),
 		},
 		.driver_data = (void *)&lcd1200x1920_rightside_up,
+	}, {	/* Lenovo Legion Go 8APU1 */
+		.matches = {
+		  DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+		  DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Legion Go 8APU1"),
+		},
+		.driver_data = (void *)&lcd1600x2560_leftside_up,
 	}, {	/* Lenovo Yoga Book X90F / X90L */
 		.matches = {
 		  DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/event.h b/drivers/gpu/drm/nouveau/include/nvkm/core/event.h
index 82b267c11147..460459af272d 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/event.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/event.h
@@ -14,7 +14,7 @@ struct nvkm_event {
 	int index_nr;
 
 	spinlock_t refs_lock;
-	spinlock_t list_lock;
+	rwlock_t list_lock;
 	int *refs;
 
 	struct list_head ntfy;
@@ -38,7 +38,7 @@ nvkm_event_init(const struct nvkm_event_func *func, struct nvkm_subdev *subdev,
 		int types_nr, int index_nr, struct nvkm_event *event)
 {
 	spin_lock_init(&event->refs_lock);
-	spin_lock_init(&event->list_lock);
+	rwlock_init(&event->list_lock);
 	return __nvkm_event_init(func, subdev, types_nr, index_nr, event);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index d8c92521226d..f28f9a857458 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -726,6 +726,11 @@ nouveau_display_create(struct drm_device *dev)
 
 	if (nouveau_modeset != 2) {
 		ret = nvif_disp_ctor(&drm->client.device, "kmsDisp", 0, &disp->disp);
+		/* no display hw */
+		if (ret == -ENODEV) {
+			ret = 0;
+			goto disp_create_err;
+		}
 
 		if (!ret && (disp->disp.outp_mask || drm->vbios.dcb.entries)) {
 			nouveau_display_create_properties(dev);
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/event.c b/drivers/gpu/drm/nouveau/nvkm/core/event.c
index a6c877135598..61fed7792e41 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/event.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/event.c
@@ -81,17 +81,17 @@ nvkm_event_ntfy_state(struct nvkm_event_ntfy *ntfy)
 static void
 nvkm_event_ntfy_remove(struct nvkm_event_ntfy *ntfy)
 {
-	spin_lock_irq(&ntfy->event->list_lock);
+	write_lock_irq(&ntfy->event->list_lock);
 	list_del_init(&ntfy->head);
-	spin_unlock_irq(&ntfy->event->list_lock);
+	write_unlock_irq(&ntfy->event->list_lock);
 }
 
 static void
 nvkm_event_ntfy_insert(struct nvkm_event_ntfy *ntfy)
 {
-	spin_lock_irq(&ntfy->event->list_lock);
+	write_lock_irq(&ntfy->event->list_lock);
 	list_add_tail(&ntfy->head, &ntfy->event->ntfy);
-	spin_unlock_irq(&ntfy->event->list_lock);
+	write_unlock_irq(&ntfy->event->list_lock);
 }
 
 static void
@@ -176,7 +176,7 @@ nvkm_event_ntfy(struct nvkm_event *event, int id, u32 bits)
 		return;
 
 	nvkm_trace(event->subdev, "event: ntfy %08x on %d\n", bits, id);
-	spin_lock_irqsave(&event->list_lock, flags);
+	read_lock_irqsave(&event->list_lock, flags);
 
 	list_for_each_entry_safe(ntfy, ntmp, &event->ntfy, head) {
 		if (ntfy->id == id && ntfy->bits & bits) {
@@ -185,7 +185,7 @@ nvkm_event_ntfy(struct nvkm_event *event, int id, u32 bits)
 		}
 	}
 
-	spin_unlock_irqrestore(&event->list_lock, flags);
+	read_unlock_irqrestore(&event->list_lock, flags);
 }
 
 void
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index e31f9641114b..dc44f5c7833f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -689,8 +689,8 @@ r535_gsp_rpc_get(struct nvkm_gsp *gsp, u32 fn, u32 argc)
 	struct nvfw_gsp_rpc *rpc;
 
 	rpc = r535_gsp_cmdq_get(gsp, ALIGN(sizeof(*rpc) + argc, sizeof(u64)));
-	if (!rpc)
-		return NULL;
+	if (IS_ERR(rpc))
+		return ERR_CAST(rpc);
 
 	rpc->header_version = 0x03000000;
 	rpc->signature = ('C' << 24) | ('P' << 16) | ('R' << 8) | 'V';
@@ -1159,7 +1159,7 @@ static void
 r535_gsp_acpi_mux_id(acpi_handle handle, u32 id, MUX_METHOD_DATA_ELEMENT *mode,
 						 MUX_METHOD_DATA_ELEMENT *part)
 {
-	acpi_handle iter = NULL, handle_mux;
+	acpi_handle iter = NULL, handle_mux = NULL;
 	acpi_status status;
 	unsigned long long value;
 
diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c
index affcfb243f0f..35f762872b8a 100644
--- a/drivers/i2c/busses/i2c-designware-common.c
+++ b/drivers/i2c/busses/i2c-designware-common.c
@@ -63,7 +63,7 @@ static int dw_reg_read(void *context, unsigned int reg, unsigned int *val)
 {
 	struct dw_i2c_dev *dev = context;
 
-	*val = readl_relaxed(dev->base + reg);
+	*val = readl(dev->base + reg);
 
 	return 0;
 }
@@ -72,7 +72,7 @@ static int dw_reg_write(void *context, unsigned int reg, unsigned int val)
 {
 	struct dw_i2c_dev *dev = context;
 
-	writel_relaxed(val, dev->base + reg);
+	writel(val, dev->base + reg);
 
 	return 0;
 }
@@ -81,7 +81,7 @@ static int dw_reg_read_swab(void *context, unsigned int reg, unsigned int *val)
 {
 	struct dw_i2c_dev *dev = context;
 
-	*val = swab32(readl_relaxed(dev->base + reg));
+	*val = swab32(readl(dev->base + reg));
 
 	return 0;
 }
@@ -90,7 +90,7 @@ static int dw_reg_write_swab(void *context, unsigned int reg, unsigned int val)
 {
 	struct dw_i2c_dev *dev = context;
 
-	writel_relaxed(swab32(val), dev->base + reg);
+	writel(swab32(val), dev->base + reg);
 
 	return 0;
 }
@@ -99,8 +99,8 @@ static int dw_reg_read_word(void *context, unsigned int reg, unsigned int *val)
 {
 	struct dw_i2c_dev *dev = context;
 
-	*val = readw_relaxed(dev->base + reg) |
-		(readw_relaxed(dev->base + reg + 2) << 16);
+	*val = readw(dev->base + reg) |
+		(readw(dev->base + reg + 2) << 16);
 
 	return 0;
 }
@@ -109,8 +109,8 @@ static int dw_reg_write_word(void *context, unsigned int reg, unsigned int val)
 {
 	struct dw_i2c_dev *dev = context;
 
-	writew_relaxed(val, dev->base + reg);
-	writew_relaxed(val >> 16, dev->base + reg + 2);
+	writew(val, dev->base + reg);
+	writew(val >> 16, dev->base + reg + 2);
 
 	return 0;
 }
diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index 041a76f71a49..e106af83cef4 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -771,8 +771,8 @@ static int ocores_i2c_resume(struct device *dev)
 	return ocores_init(dev, i2c);
 }
 
-static DEFINE_SIMPLE_DEV_PM_OPS(ocores_i2c_pm,
-				ocores_i2c_suspend, ocores_i2c_resume);
+static DEFINE_NOIRQ_DEV_PM_OPS(ocores_i2c_pm,
+			       ocores_i2c_suspend, ocores_i2c_resume);
 
 static struct platform_driver ocores_i2c_driver = {
 	.probe   = ocores_i2c_probe,
diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index 1d7648242749..76f79b68cef8 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -265,6 +265,9 @@ struct pxa_i2c {
 	u32			hs_mask;
 
 	struct i2c_bus_recovery_info recovery;
+	struct pinctrl		*pinctrl;
+	struct pinctrl_state	*pinctrl_default;
+	struct pinctrl_state	*pinctrl_recovery;
 };
 
 #define _IBMR(i2c)	((i2c)->reg_ibmr)
@@ -1299,12 +1302,13 @@ static void i2c_pxa_prepare_recovery(struct i2c_adapter *adap)
 	 */
 	gpiod_set_value(i2c->recovery.scl_gpiod, ibmr & IBMR_SCLS);
 	gpiod_set_value(i2c->recovery.sda_gpiod, ibmr & IBMR_SDAS);
+
+	WARN_ON(pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_recovery));
 }
 
 static void i2c_pxa_unprepare_recovery(struct i2c_adapter *adap)
 {
 	struct pxa_i2c *i2c = adap->algo_data;
-	struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
 	u32 isr;
 
 	/*
@@ -1318,7 +1322,7 @@ static void i2c_pxa_unprepare_recovery(struct i2c_adapter *adap)
 		i2c_pxa_do_reset(i2c);
 	}
 
-	WARN_ON(pinctrl_select_state(bri->pinctrl, bri->pins_default));
+	WARN_ON(pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_default));
 
 	dev_dbg(&i2c->adap.dev, "recovery: IBMR 0x%08x ISR 0x%08x\n",
 	        readl(_IBMR(i2c)), readl(_ISR(i2c)));
@@ -1340,20 +1344,76 @@ static int i2c_pxa_init_recovery(struct pxa_i2c *i2c)
 	if (IS_ENABLED(CONFIG_I2C_PXA_SLAVE))
 		return 0;
 
-	bri->pinctrl = devm_pinctrl_get(dev);
-	if (PTR_ERR(bri->pinctrl) == -ENODEV) {
-		bri->pinctrl = NULL;
+	i2c->pinctrl = devm_pinctrl_get(dev);
+	if (PTR_ERR(i2c->pinctrl) == -ENODEV)
+		i2c->pinctrl = NULL;
+	if (IS_ERR(i2c->pinctrl))
+		return PTR_ERR(i2c->pinctrl);
+
+	if (!i2c->pinctrl)
+		return 0;
+
+	i2c->pinctrl_default = pinctrl_lookup_state(i2c->pinctrl,
+						    PINCTRL_STATE_DEFAULT);
+	i2c->pinctrl_recovery = pinctrl_lookup_state(i2c->pinctrl, "recovery");
+
+	if (IS_ERR(i2c->pinctrl_default) || IS_ERR(i2c->pinctrl_recovery)) {
+		dev_info(dev, "missing pinmux recovery information: %ld %ld\n",
+			 PTR_ERR(i2c->pinctrl_default),
+			 PTR_ERR(i2c->pinctrl_recovery));
+		return 0;
+	}
+
+	/*
+	 * Claiming GPIOs can influence the pinmux state, and may glitch the
+	 * I2C bus. Do this carefully.
+	 */
+	bri->scl_gpiod = devm_gpiod_get(dev, "scl", GPIOD_OUT_HIGH_OPEN_DRAIN);
+	if (bri->scl_gpiod == ERR_PTR(-EPROBE_DEFER))
+		return -EPROBE_DEFER;
+	if (IS_ERR(bri->scl_gpiod)) {
+		dev_info(dev, "missing scl gpio recovery information: %pe\n",
+			 bri->scl_gpiod);
+		return 0;
+	}
+
+	/*
+	 * We have SCL. Pull SCL low and wait a bit so that SDA glitches
+	 * have no effect.
+	 */
+	gpiod_direction_output(bri->scl_gpiod, 0);
+	udelay(10);
+	bri->sda_gpiod = devm_gpiod_get(dev, "sda", GPIOD_OUT_HIGH_OPEN_DRAIN);
+
+	/* Wait a bit in case of a SDA glitch, and then release SCL. */
+	udelay(10);
+	gpiod_direction_output(bri->scl_gpiod, 1);
+
+	if (bri->sda_gpiod == ERR_PTR(-EPROBE_DEFER))
+		return -EPROBE_DEFER;
+
+	if (IS_ERR(bri->sda_gpiod)) {
+		dev_info(dev, "missing sda gpio recovery information: %pe\n",
+			 bri->sda_gpiod);
 		return 0;
 	}
-	if (IS_ERR(bri->pinctrl))
-		return PTR_ERR(bri->pinctrl);
 
 	bri->prepare_recovery = i2c_pxa_prepare_recovery;
 	bri->unprepare_recovery = i2c_pxa_unprepare_recovery;
+	bri->recover_bus = i2c_generic_scl_recovery;
 
 	i2c->adap.bus_recovery_info = bri;
 
-	return 0;
+	/*
+	 * Claiming GPIOs can change the pinmux state, which confuses the
+	 * pinctrl since pinctrl's idea of the current setting is unaffected
+	 * by the pinmux change caused by claiming the GPIO. Work around that
+	 * by switching pinctrl to the GPIO state here. We do it this way to
+	 * avoid glitching the I2C bus.
+	 */
+	pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_recovery);
+
+	return pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_default);
 }
 
 static int i2c_pxa_probe(struct platform_device *dev)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index a8c89df1a997..9a7a74239eab 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -2379,12 +2379,12 @@ retry_baser:
 		break;
 	}
 
+	if (!shr)
+		gic_flush_dcache_to_poc(base, PAGE_ORDER_TO_SIZE(order));
+
 	its_write_baser(its, baser, val);
 	tmp = baser->val;
 
-	if (its->flags & ITS_FLAGS_FORCE_NON_SHAREABLE)
-		tmp &= ~GITS_BASER_SHAREABILITY_MASK;
-
 	if ((val ^ tmp) & GITS_BASER_SHAREABILITY_MASK) {
 		/*
 		 * Shareability didn't stick. Just use
@@ -2394,10 +2394,9 @@ retry_baser:
 		 * non-cacheable as well.
 		 */
 		shr = tmp & GITS_BASER_SHAREABILITY_MASK;
-		if (!shr) {
+		if (!shr)
 			cache = GITS_BASER_nC;
-			gic_flush_dcache_to_poc(base, PAGE_ORDER_TO_SIZE(order));
-		}
+
 		goto retry_baser;
 	}
 
@@ -2609,6 +2608,11 @@ static int its_alloc_tables(struct its_node *its)
 		/* erratum 24313: ignore memory access type */
 		cache = GITS_BASER_nCnB;
 
+	if (its->flags & ITS_FLAGS_FORCE_NON_SHAREABLE) {
+		cache = GITS_BASER_nC;
+		shr = 0;
+	}
+
 	for (i = 0; i < GITS_BASER_NR_REGS; i++) {
 		struct its_baser *baser = its->tables + i;
 		u64 val = its_read_baser(its, baser);
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 62eb27639c9b..f03d7dba270c 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -254,7 +254,7 @@ enum evict_result {
 
 typedef enum evict_result (*le_predicate)(struct lru_entry *le, void *context);
 
-static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context)
+static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context, bool no_sleep)
 {
 	unsigned long tested = 0;
 	struct list_head *h = lru->cursor;
@@ -295,7 +295,8 @@ static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *con
 
 		h = h->next;
 
-		cond_resched();
+		if (!no_sleep)
+			cond_resched();
 	}
 
 	return NULL;
@@ -382,7 +383,10 @@ struct dm_buffer {
  */
 
 struct buffer_tree {
-	struct rw_semaphore lock;
+	union {
+		struct rw_semaphore lock;
+		rwlock_t spinlock;
+	} u;
 	struct rb_root root;
 } ____cacheline_aligned_in_smp;
 
@@ -393,9 +397,12 @@ struct dm_buffer_cache {
 	 * on the locks.
 	 */
 	unsigned int num_locks;
+	bool no_sleep;
 	struct buffer_tree trees[];
 };
 
+static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled);
+
 static inline unsigned int cache_index(sector_t block, unsigned int num_locks)
 {
 	return dm_hash_locks_index(block, num_locks);
@@ -403,22 +410,34 @@ static inline unsigned int cache_index(sector_t block, unsigned int num_locks)
 
 static inline void cache_read_lock(struct dm_buffer_cache *bc, sector_t block)
 {
-	down_read(&bc->trees[cache_index(block, bc->num_locks)].lock);
+	if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+		read_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+	else
+		down_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
 }
 
 static inline void cache_read_unlock(struct dm_buffer_cache *bc, sector_t block)
 {
-	up_read(&bc->trees[cache_index(block, bc->num_locks)].lock);
+	if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+		read_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+	else
+		up_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
 }
 
 static inline void cache_write_lock(struct dm_buffer_cache *bc, sector_t block)
 {
-	down_write(&bc->trees[cache_index(block, bc->num_locks)].lock);
+	if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+		write_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+	else
+		down_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
 }
 
 static inline void cache_write_unlock(struct dm_buffer_cache *bc, sector_t block)
 {
-	up_write(&bc->trees[cache_index(block, bc->num_locks)].lock);
+	if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+		write_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+	else
+		up_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
 }
 
 /*
@@ -442,18 +461,32 @@ static void lh_init(struct lock_history *lh, struct dm_buffer_cache *cache, bool
 
 static void __lh_lock(struct lock_history *lh, unsigned int index)
 {
-	if (lh->write)
-		down_write(&lh->cache->trees[index].lock);
-	else
-		down_read(&lh->cache->trees[index].lock);
+	if (lh->write) {
+		if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+			write_lock_bh(&lh->cache->trees[index].u.spinlock);
+		else
+			down_write(&lh->cache->trees[index].u.lock);
+	} else {
+		if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+			read_lock_bh(&lh->cache->trees[index].u.spinlock);
+		else
+			down_read(&lh->cache->trees[index].u.lock);
+	}
 }
 
 static void __lh_unlock(struct lock_history *lh, unsigned int index)
 {
-	if (lh->write)
-		up_write(&lh->cache->trees[index].lock);
-	else
-		up_read(&lh->cache->trees[index].lock);
+	if (lh->write) {
+		if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+			write_unlock_bh(&lh->cache->trees[index].u.spinlock);
+		else
+			up_write(&lh->cache->trees[index].u.lock);
+	} else {
+		if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+			read_unlock_bh(&lh->cache->trees[index].u.spinlock);
+		else
+			up_read(&lh->cache->trees[index].u.lock);
+	}
 }
 
 /*
@@ -502,14 +535,18 @@ static struct dm_buffer *list_to_buffer(struct list_head *l)
 	return le_to_buffer(le);
 }
 
-static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks)
+static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks, bool no_sleep)
 {
 	unsigned int i;
 
 	bc->num_locks = num_locks;
+	bc->no_sleep = no_sleep;
 
 	for (i = 0; i < bc->num_locks; i++) {
-		init_rwsem(&bc->trees[i].lock);
+		if (no_sleep)
+			rwlock_init(&bc->trees[i].u.spinlock);
+		else
+			init_rwsem(&bc->trees[i].u.lock);
 		bc->trees[i].root = RB_ROOT;
 	}
 
@@ -648,7 +685,7 @@ static struct dm_buffer *__cache_evict(struct dm_buffer_cache *bc, int list_mode
 	struct lru_entry *le;
 	struct dm_buffer *b;
 
-	le = lru_evict(&bc->lru[list_mode], __evict_pred, &w);
+	le = lru_evict(&bc->lru[list_mode], __evict_pred, &w, bc->no_sleep);
 	if (!le)
 		return NULL;
 
@@ -702,7 +739,7 @@ static void __cache_mark_many(struct dm_buffer_cache *bc, int old_mode, int new_
 	struct evict_wrapper w = {.lh = lh, .pred = pred, .context = context};
 
 	while (true) {
-		le = lru_evict(&bc->lru[old_mode], __evict_pred, &w);
+		le = lru_evict(&bc->lru[old_mode], __evict_pred, &w, bc->no_sleep);
 		if (!le)
 			break;
 
@@ -915,10 +952,11 @@ static void cache_remove_range(struct dm_buffer_cache *bc,
 {
 	unsigned int i;
 
+	BUG_ON(bc->no_sleep);
 	for (i = 0; i < bc->num_locks; i++) {
-		down_write(&bc->trees[i].lock);
+		down_write(&bc->trees[i].u.lock);
 		__remove_range(bc, &bc->trees[i].root, begin, end, pred, release);
-		up_write(&bc->trees[i].lock);
+		up_write(&bc->trees[i].u.lock);
 	}
 }
 
@@ -979,8 +1017,6 @@ struct dm_bufio_client {
 	struct dm_buffer_cache cache; /* must be last member */
 };
 
-static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled);
-
 /*----------------------------------------------------------------*/
 
 #define dm_bufio_in_request()	(!!current->bio_list)
@@ -1871,7 +1907,8 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
 	if (need_submit)
 		submit_io(b, REQ_OP_READ, read_endio);
 
-	wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
+	if (nf != NF_GET)	/* we already tested this condition above */
+		wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
 
 	if (b->read_error) {
 		int error = blk_status_to_errno(b->read_error);
@@ -2421,7 +2458,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
 		r = -ENOMEM;
 		goto bad_client;
 	}
-	cache_init(&c->cache, num_locks);
+	cache_init(&c->cache, num_locks, (flags & DM_BUFIO_CLIENT_NO_SLEEP) != 0);
 
 	c->bdev = bdev;
 	c->block_size = block_size;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 6de107aff331..2ae8560b6a14 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1673,7 +1673,7 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned int size)
 	unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
 	unsigned int remaining_size;
-	unsigned int order = MAX_ORDER - 1;
+	unsigned int order = MAX_ORDER;
 
 retry:
 	if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index efd510984e25..5eabdb06c649 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -33,7 +33,7 @@ struct delay_c {
 	struct work_struct flush_expired_bios;
 	struct list_head delayed_bios;
 	struct task_struct *worker;
-	atomic_t may_delay;
+	bool may_delay;
 
 	struct delay_class read;
 	struct delay_class write;
@@ -73,39 +73,6 @@ static inline bool delay_is_fast(struct delay_c *dc)
 	return !!dc->worker;
 }
 
-static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all)
-{
-	struct dm_delay_info *delayed, *next;
-
-	mutex_lock(&delayed_bios_lock);
-	list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
-		if (flush_all || time_after_eq(jiffies, delayed->expires)) {
-			struct bio *bio = dm_bio_from_per_bio_data(delayed,
-						sizeof(struct dm_delay_info));
-			list_del(&delayed->list);
-			dm_submit_bio_remap(bio, NULL);
-			delayed->class->ops--;
-		}
-	}
-	mutex_unlock(&delayed_bios_lock);
-}
-
-static int flush_worker_fn(void *data)
-{
-	struct delay_c *dc = data;
-
-	while (1) {
-		flush_delayed_bios_fast(dc, false);
-		if (unlikely(list_empty(&dc->delayed_bios))) {
-			set_current_state(TASK_INTERRUPTIBLE);
-			schedule();
-		} else
-			cond_resched();
-	}
-
-	return 0;
-}
-
 static void flush_bios(struct bio *bio)
 {
 	struct bio *n;
@@ -118,36 +85,61 @@ static void flush_bios(struct bio *bio)
 	}
 }
 
-static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all)
+static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
 {
 	struct dm_delay_info *delayed, *next;
+	struct bio_list flush_bio_list;
 	unsigned long next_expires = 0;
-	unsigned long start_timer = 0;
-	struct bio_list flush_bios = { };
+	bool start_timer = false;
+	bio_list_init(&flush_bio_list);
 
 	mutex_lock(&delayed_bios_lock);
 	list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
+		cond_resched();
 		if (flush_all || time_after_eq(jiffies, delayed->expires)) {
 			struct bio *bio = dm_bio_from_per_bio_data(delayed,
 						sizeof(struct dm_delay_info));
 			list_del(&delayed->list);
-			bio_list_add(&flush_bios, bio);
+			bio_list_add(&flush_bio_list, bio);
 			delayed->class->ops--;
 			continue;
 		}
 
-		if (!start_timer) {
-			start_timer = 1;
-			next_expires = delayed->expires;
-		} else
-			next_expires = min(next_expires, delayed->expires);
+		if (!delay_is_fast(dc)) {
+			if (!start_timer) {
+				start_timer = true;
+				next_expires = delayed->expires;
+			} else {
+				next_expires = min(next_expires, delayed->expires);
+			}
+		}
 	}
 	mutex_unlock(&delayed_bios_lock);
 
 	if (start_timer)
 		queue_timeout(dc, next_expires);
 
-	return bio_list_get(&flush_bios);
+	flush_bios(bio_list_get(&flush_bio_list));
+}
+
+static int flush_worker_fn(void *data)
+{
+	struct delay_c *dc = data;
+
+	while (!kthread_should_stop()) {
+		flush_delayed_bios(dc, false);
+		mutex_lock(&delayed_bios_lock);
+		if (unlikely(list_empty(&dc->delayed_bios))) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			mutex_unlock(&delayed_bios_lock);
+			schedule();
+		} else {
+			mutex_unlock(&delayed_bios_lock);
+			cond_resched();
+		}
+	}
+
+	return 0;
 }
 
 static void flush_expired_bios(struct work_struct *work)
@@ -155,10 +147,7 @@ static void flush_expired_bios(struct work_struct *work)
 	struct delay_c *dc;
 
 	dc = container_of(work, struct delay_c, flush_expired_bios);
-	if (delay_is_fast(dc))
-		flush_delayed_bios_fast(dc, false);
-	else
-		flush_bios(flush_delayed_bios(dc, false));
+	flush_delayed_bios(dc, false);
 }
 
 static void delay_dtr(struct dm_target *ti)
@@ -177,8 +166,7 @@ static void delay_dtr(struct dm_target *ti)
 	if (dc->worker)
 		kthread_stop(dc->worker);
 
-	if (!delay_is_fast(dc))
-		mutex_destroy(&dc->timer_lock);
+	mutex_destroy(&dc->timer_lock);
 
 	kfree(dc);
 }
@@ -236,7 +224,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	ti->private = dc;
 	INIT_LIST_HEAD(&dc->delayed_bios);
-	atomic_set(&dc->may_delay, 1);
+	mutex_init(&dc->timer_lock);
+	dc->may_delay = true;
 	dc->argc = argc;
 
 	ret = delay_class_ctr(ti, &dc->read, argv);
@@ -282,12 +271,12 @@ out:
 					    "dm-delay-flush-worker");
 		if (IS_ERR(dc->worker)) {
 			ret = PTR_ERR(dc->worker);
+			dc->worker = NULL;
 			goto bad;
 		}
 	} else {
 		timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
 		INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
-		mutex_init(&dc->timer_lock);
 		dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
 		if (!dc->kdelayd_wq) {
 			ret = -EINVAL;
@@ -312,7 +301,7 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
 	struct dm_delay_info *delayed;
 	unsigned long expires = 0;
 
-	if (!c->delay || !atomic_read(&dc->may_delay))
+	if (!c->delay)
 		return DM_MAPIO_REMAPPED;
 
 	delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
@@ -321,6 +310,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
 	delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);
 
 	mutex_lock(&delayed_bios_lock);
+	if (unlikely(!dc->may_delay)) {
+		mutex_unlock(&delayed_bios_lock);
+		return DM_MAPIO_REMAPPED;
+	}
 	c->ops++;
 	list_add_tail(&delayed->list, &dc->delayed_bios);
 	mutex_unlock(&delayed_bios_lock);
@@ -337,21 +330,20 @@ static void delay_presuspend(struct dm_target *ti)
 {
 	struct delay_c *dc = ti->private;
 
-	atomic_set(&dc->may_delay, 0);
+	mutex_lock(&delayed_bios_lock);
+	dc->may_delay = false;
+	mutex_unlock(&delayed_bios_lock);
 
-	if (delay_is_fast(dc))
-		flush_delayed_bios_fast(dc, true);
-	else {
+	if (!delay_is_fast(dc))
 		del_timer_sync(&dc->delay_timer);
-		flush_bios(flush_delayed_bios(dc, true));
-	}
+	flush_delayed_bios(dc, true);
 }
 
 static void delay_resume(struct dm_target *ti)
 {
 	struct delay_c *dc = ti->private;
 
-	atomic_set(&dc->may_delay, 1);
+	dc->may_delay = true;
 }
 
 static int delay_map(struct dm_target *ti, struct bio *bio)
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index 3ef9f018da60..2099c755119e 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -185,7 +185,7 @@ static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io,
 {
 	if (unlikely(verity_hash(v, verity_io_hash_req(v, io),
 				 data, 1 << v->data_dev_block_bits,
-				 verity_io_real_digest(v, io))))
+				 verity_io_real_digest(v, io), true)))
 		return 0;
 
 	return memcmp(verity_io_real_digest(v, io), want_digest,
@@ -386,7 +386,7 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
 	/* Always re-validate the corrected block against the expected hash */
 	r = verity_hash(v, verity_io_hash_req(v, io), fio->output,
 			1 << v->data_dev_block_bits,
-			verity_io_real_digest(v, io));
+			verity_io_real_digest(v, io), true);
 	if (unlikely(r < 0))
 		return r;
 
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 26adcfea0302..e115fcfe723c 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -135,20 +135,21 @@ static int verity_hash_update(struct dm_verity *v, struct ahash_request *req,
  * Wrapper for crypto_ahash_init, which handles verity salting.
  */
 static int verity_hash_init(struct dm_verity *v, struct ahash_request *req,
-				struct crypto_wait *wait)
+				struct crypto_wait *wait, bool may_sleep)
 {
 	int r;
 
 	ahash_request_set_tfm(req, v->tfm);
-	ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
-					CRYPTO_TFM_REQ_MAY_BACKLOG,
-					crypto_req_done, (void *)wait);
+	ahash_request_set_callback(req,
+		may_sleep ? CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG : 0,
+		crypto_req_done, (void *)wait);
 	crypto_init_wait(wait);
 
 	r = crypto_wait_req(crypto_ahash_init(req), wait);
 
 	if (unlikely(r < 0)) {
-		DMERR("crypto_ahash_init failed: %d", r);
+		if (r != -ENOMEM)
+			DMERR("crypto_ahash_init failed: %d", r);
 		return r;
 	}
 
@@ -179,12 +180,12 @@ out:
 }
 
 int verity_hash(struct dm_verity *v, struct ahash_request *req,
-		const u8 *data, size_t len, u8 *digest)
+		const u8 *data, size_t len, u8 *digest, bool may_sleep)
 {
 	int r;
 	struct crypto_wait wait;
 
-	r = verity_hash_init(v, req, &wait);
+	r = verity_hash_init(v, req, &wait, may_sleep);
 	if (unlikely(r < 0))
 		goto out;
 
@@ -322,7 +323,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
 
 		r = verity_hash(v, verity_io_hash_req(v, io),
 				data, 1 << v->hash_dev_block_bits,
-				verity_io_real_digest(v, io));
+				verity_io_real_digest(v, io), !io->in_tasklet);
 		if (unlikely(r < 0))
 			goto release_ret_r;
 
@@ -556,7 +557,7 @@ static int verity_verify_io(struct dm_verity_io *io)
 			continue;
 		}
 
-		r = verity_hash_init(v, req, &wait);
+		r = verity_hash_init(v, req, &wait, !io->in_tasklet);
 		if (unlikely(r < 0))
 			return r;
 
@@ -652,7 +653,7 @@ static void verity_tasklet(unsigned long data)
 
 	io->in_tasklet = true;
 	err = verity_verify_io(io);
-	if (err == -EAGAIN) {
+	if (err == -EAGAIN || err == -ENOMEM) {
 		/* fallback to retrying with work-queue */
 		INIT_WORK(&io->work, verity_work);
 		queue_work(io->v->verify_wq, &io->work);
@@ -1033,7 +1034,7 @@ static int verity_alloc_zero_digest(struct dm_verity *v)
 		goto out;
 
 	r = verity_hash(v, req, zero_data, 1 << v->data_dev_block_bits,
-			v->zero_digest);
+			v->zero_digest, true);
 
 out:
 	kfree(req);
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index 2f555b420367..f96f4e281ee4 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h
@@ -128,7 +128,7 @@ extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
 					      u8 *data, size_t len));
 
 extern int verity_hash(struct dm_verity *v, struct ahash_request *req,
-		       const u8 *data, size_t len, u8 *digest);
+		       const u8 *data, size_t len, u8 *digest, bool may_sleep);
 
 extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
 				 sector_t block, u8 *digest, bool *is_zero);
diff --git a/drivers/parisc/power.c b/drivers/parisc/power.c
index 539d8920c202..bb0d92461b08 100644
--- a/drivers/parisc/power.c
+++ b/drivers/parisc/power.c
@@ -176,7 +176,7 @@ static struct notifier_block parisc_panic_block = {
 static int qemu_power_off(struct sys_off_data *data)
 {
 	/* this turns the system off via SeaBIOS */
-	*(int *)data->cb_data = 0;
+	gsc_writel(0, (unsigned long) data->cb_data);
 	pdc_soft_power_button(1);
 	return NOTIFY_DONE;
 }
diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c
index cd6ac04c1468..c3104714b480 100644
--- a/drivers/platform/x86/amd/pmc/pmc.c
+++ b/drivers/platform/x86/amd/pmc/pmc.c
@@ -964,33 +964,6 @@ static const struct pci_device_id pmc_pci_ids[] = {
 	{ }
 };
 
-static int amd_pmc_get_dram_size(struct amd_pmc_dev *dev)
-{
-	int ret;
-
-	switch (dev->cpu_id) {
-	case AMD_CPU_ID_YC:
-		if (!(dev->major > 90 || (dev->major == 90 && dev->minor > 39))) {
-			ret = -EINVAL;
-			goto err_dram_size;
-		}
-		break;
-	default:
-		ret = -EINVAL;
-		goto err_dram_size;
-	}
-
-	ret = amd_pmc_send_cmd(dev, S2D_DRAM_SIZE, &dev->dram_size, dev->s2d_msg_id, true);
-	if (ret || !dev->dram_size)
-		goto err_dram_size;
-
-	return 0;
-
-err_dram_size:
-	dev_err(dev->dev, "DRAM size command not supported for this platform\n");
-	return ret;
-}
-
 static int amd_pmc_s2d_init(struct amd_pmc_dev *dev)
 {
 	u32 phys_addr_low, phys_addr_hi;
@@ -1009,8 +982,8 @@ static int amd_pmc_s2d_init(struct amd_pmc_dev *dev)
 		return -EIO;
 
 	/* Get DRAM size */
-	ret = amd_pmc_get_dram_size(dev);
-	if (ret)
+	ret = amd_pmc_send_cmd(dev, S2D_DRAM_SIZE, &dev->dram_size, dev->s2d_msg_id, true);
+	if (ret || !dev->dram_size)
 		dev->dram_size = S2D_TELEMETRY_DRAMBYTES_MAX;
 
 	/* Get STB DRAM address */
diff --git a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c
index 5798b49ddaba..8c9f4f3227fc 100644
--- a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c
+++ b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c
@@ -588,17 +588,14 @@ static void release_attributes_data(void)
 static int hp_add_other_attributes(int attr_type)
 {
 	struct kobject *attr_name_kobj;
-	union acpi_object *obj = NULL;
 	int ret;
 	char *attr_name;
 
-	mutex_lock(&bioscfg_drv.mutex);
-
 	attr_name_kobj = kzalloc(sizeof(*attr_name_kobj), GFP_KERNEL);
-	if (!attr_name_kobj) {
-		ret = -ENOMEM;
-		goto err_other_attr_init;
-	}
+	if (!attr_name_kobj)
+		return -ENOMEM;
+
+	mutex_lock(&bioscfg_drv.mutex);
 
 	/* Check if attribute type is supported */
 	switch (attr_type) {
@@ -615,14 +612,14 @@ static int hp_add_other_attributes(int attr_type)
 	default:
 		pr_err("Error: Unknown attr_type: %d\n", attr_type);
 		ret = -EINVAL;
-		goto err_other_attr_init;
+		kfree(attr_name_kobj);
+		goto unlock_drv_mutex;
 	}
 
 	ret = kobject_init_and_add(attr_name_kobj, &attr_name_ktype,
 				   NULL, "%s", attr_name);
 	if (ret) {
 		pr_err("Error encountered [%d]\n", ret);
-		kobject_put(attr_name_kobj);
 		goto err_other_attr_init;
 	}
 
@@ -630,27 +627,26 @@ static int hp_add_other_attributes(int attr_type)
 	switch (attr_type) {
 	case HPWMI_SECURE_PLATFORM_TYPE:
 		ret = hp_populate_secure_platform_data(attr_name_kobj);
-		if (ret)
-			goto err_other_attr_init;
 		break;
 
 	case HPWMI_SURE_START_TYPE:
 		ret = hp_populate_sure_start_data(attr_name_kobj);
-		if (ret)
-			goto err_other_attr_init;
 		break;
 
 	default:
 		ret = -EINVAL;
-		goto err_other_attr_init;
 	}
 
+	if (ret)
+		goto err_other_attr_init;
+
 	mutex_unlock(&bioscfg_drv.mutex);
 	return 0;
 
 err_other_attr_init:
+	kobject_put(attr_name_kobj);
+unlock_drv_mutex:
 	mutex_unlock(&bioscfg_drv.mutex);
-	kfree(obj);
 	return ret;
 }
 
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index ac037540acfc..88eefccb6ed2 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -1425,18 +1425,17 @@ static int ideapad_kbd_bl_init(struct ideapad_private *priv)
 	if (WARN_ON(priv->kbd_bl.initialized))
 		return -EEXIST;
 
-	brightness = ideapad_kbd_bl_brightness_get(priv);
-	if (brightness < 0)
-		return brightness;
-
-	priv->kbd_bl.last_brightness = brightness;
-
 	if (ideapad_kbd_bl_check_tristate(priv->kbd_bl.type)) {
 		priv->kbd_bl.led.max_brightness = 2;
 	} else {
 		priv->kbd_bl.led.max_brightness = 1;
 	}
 
+	brightness = ideapad_kbd_bl_brightness_get(priv);
+	if (brightness < 0)
+		return brightness;
+
+	priv->kbd_bl.last_brightness = brightness;
 	priv->kbd_bl.led.name                    = "platform::" LED_FUNCTION_KBD_BACKLIGHT;
 	priv->kbd_bl.led.brightness_get          = ideapad_kbd_bl_led_cdev_brightness_get;
 	priv->kbd_bl.led.brightness_set_blocking = ideapad_kbd_bl_led_cdev_brightness_set;
diff --git a/drivers/platform/x86/intel/telemetry/core.c b/drivers/platform/x86/intel/telemetry/core.c
index fdf55b5d6948..e4be40f73eeb 100644
--- a/drivers/platform/x86/intel/telemetry/core.c
+++ b/drivers/platform/x86/intel/telemetry/core.c
@@ -102,7 +102,7 @@ static const struct telemetry_core_ops telm_defpltops = {
 /**
  * telemetry_update_events() - Update telemetry Configuration
  * @pss_evtconfig: PSS related config. No change if num_evts = 0.
- * @pss_evtconfig: IOSS related config. No change if num_evts = 0.
+ * @ioss_evtconfig: IOSS related config. No change if num_evts = 0.
  *
  * This API updates the IOSS & PSS Telemetry configuration. Old config
  * is overwritten. Call telemetry_reset_events when logging is over
@@ -176,7 +176,7 @@ EXPORT_SYMBOL_GPL(telemetry_reset_events);
 /**
  * telemetry_get_eventconfig() - Returns the pss and ioss events enabled
  * @pss_evtconfig: Pointer to PSS related configuration.
- * @pss_evtconfig: Pointer to IOSS related configuration.
+ * @ioss_evtconfig: Pointer to IOSS related configuration.
  * @pss_len:	   Number of u32 elements allocated for pss_evtconfig array
  * @ioss_len:	   Number of u32 elements allocated for ioss_evtconfig array
  *
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 32d1e73e46ee..03348f605c2e 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1837,8 +1837,16 @@ static void qla2x00_abort_srb(struct qla_qpair *qp, srb_t *sp, const int res,
 		}
 
 		spin_lock_irqsave(qp->qp_lock_ptr, *flags);
-		if (ret_cmd && blk_mq_request_started(scsi_cmd_to_rq(cmd)))
-			sp->done(sp, res);
+		switch (sp->type) {
+		case SRB_SCSI_CMD:
+			if (ret_cmd && blk_mq_request_started(scsi_cmd_to_rq(cmd)))
+				sp->done(sp, res);
+			break;
+		default:
+			if (ret_cmd)
+				sp->done(sp, res);
+			break;
+		}
 	} else {
 		sp->done(sp, res);
 	}
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 67922e2c4c19..6d8218a44122 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -1019,7 +1019,7 @@ static ssize_t sdebug_error_write(struct file *file, const char __user *ubuf,
 	struct sdebug_err_inject *inject;
 	struct scsi_device *sdev = (struct scsi_device *)file->f_inode->i_private;
 
-	buf = kmalloc(count, GFP_KERNEL);
+	buf = kzalloc(count + 1, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
 
@@ -1132,7 +1132,6 @@ static const struct file_operations sdebug_target_reset_fail_fops = {
 static int sdebug_target_alloc(struct scsi_target *starget)
 {
 	struct sdebug_target_info *targetip;
-	struct dentry *dentry;
 
 	targetip = kzalloc(sizeof(struct sdebug_target_info), GFP_KERNEL);
 	if (!targetip)
@@ -1140,15 +1139,9 @@ static int sdebug_target_alloc(struct scsi_target *starget)
 
 	targetip->debugfs_entry = debugfs_create_dir(dev_name(&starget->dev),
 				sdebug_debugfs_root);
-	if (IS_ERR_OR_NULL(targetip->debugfs_entry))
-		pr_info("%s: failed to create debugfs directory for target %s\n",
-			__func__, dev_name(&starget->dev));
 
 	debugfs_create_file("fail_reset", 0600, targetip->debugfs_entry, starget,
 				&sdebug_target_reset_fail_fops);
-	if (IS_ERR_OR_NULL(dentry))
-		pr_info("%s: failed to create fail_reset file for target %s\n",
-			__func__, dev_name(&starget->dev));
 
 	starget->hostdata = targetip;
 
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 530918cbfce2..fa00dd503cbf 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1643,24 +1643,21 @@ out:
 	return disk_changed ? DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
-static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr)
+static int sd_sync_cache(struct scsi_disk *sdkp)
 {
 	int retries, res;
 	struct scsi_device *sdp = sdkp->device;
 	const int timeout = sdp->request_queue->rq_timeout
 		* SD_FLUSH_TIMEOUT_MULTIPLIER;
-	struct scsi_sense_hdr my_sshdr;
+	struct scsi_sense_hdr sshdr;
 	const struct scsi_exec_args exec_args = {
 		.req_flags = BLK_MQ_REQ_PM,
-		/* caller might not be interested in sense, but we need it */
-		.sshdr = sshdr ? : &my_sshdr,
+		.sshdr = &sshdr,
 	};
 
 	if (!scsi_device_online(sdp))
 		return -ENODEV;
 
-	sshdr = exec_args.sshdr;
-
 	for (retries = 3; retries > 0; --retries) {
 		unsigned char cmd[16] = { 0 };
 
@@ -1685,15 +1682,23 @@ static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr)
 			return res;
 
 		if (scsi_status_is_check_condition(res) &&
-		    scsi_sense_valid(sshdr)) {
-			sd_print_sense_hdr(sdkp, sshdr);
+		    scsi_sense_valid(&sshdr)) {
+			sd_print_sense_hdr(sdkp, &sshdr);
 
 			/* we need to evaluate the error return  */
-			if (sshdr->asc == 0x3a ||	/* medium not present */
-			    sshdr->asc == 0x20 ||	/* invalid command */
-			    (sshdr->asc == 0x74 && sshdr->ascq == 0x71))	/* drive is password locked */
+			if (sshdr.asc == 0x3a ||	/* medium not present */
+			    sshdr.asc == 0x20 ||	/* invalid command */
+			    (sshdr.asc == 0x74 && sshdr.ascq == 0x71))	/* drive is password locked */
 				/* this is no error here */
 				return 0;
+			/*
+			 * This drive doesn't support sync and there's not much
+			 * we can do because this is called during shutdown
+			 * or suspend so just return success so those operations
+			 * can proceed.
+			 */
+			if (sshdr.sense_key == ILLEGAL_REQUEST)
+				return 0;
 		}
 
 		switch (host_byte(res)) {
@@ -3853,7 +3858,7 @@ static void sd_shutdown(struct device *dev)
 
 	if (sdkp->WCE && sdkp->media_present) {
 		sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
-		sd_sync_cache(sdkp, NULL);
+		sd_sync_cache(sdkp);
 	}
 
 	if ((system_state != SYSTEM_RESTART &&
@@ -3874,7 +3879,6 @@ static inline bool sd_do_start_stop(struct scsi_device *sdev, bool runtime)
 static int sd_suspend_common(struct device *dev, bool runtime)
 {
 	struct scsi_disk *sdkp = dev_get_drvdata(dev);
-	struct scsi_sense_hdr sshdr;
 	int ret = 0;
 
 	if (!sdkp)	/* E.g.: runtime suspend following sd_remove() */
@@ -3883,24 +3887,13 @@ static int sd_suspend_common(struct device *dev, bool runtime)
 	if (sdkp->WCE && sdkp->media_present) {
 		if (!sdkp->device->silence_suspend)
 			sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
-		ret = sd_sync_cache(sdkp, &sshdr);
-
-		if (ret) {
-			/* ignore OFFLINE device */
-			if (ret == -ENODEV)
-				return 0;
-
-			if (!scsi_sense_valid(&sshdr) ||
-			    sshdr.sense_key != ILLEGAL_REQUEST)
-				return ret;
+		ret = sd_sync_cache(sdkp);
+		/* ignore OFFLINE device */
+		if (ret == -ENODEV)
+			return 0;
 
-			/*
-			 * sshdr.sense_key == ILLEGAL_REQUEST means this drive
-			 * doesn't support sync. There's not much to do and
-			 * suspend shouldn't fail.
-			 */
-			ret = 0;
-		}
+		if (ret)
+			return ret;
 	}
 
 	if (sd_do_start_stop(sdkp->device, runtime)) {
diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
index 2ba8ec254dce..0787456c2b89 100644
--- a/drivers/ufs/core/ufs-mcq.c
+++ b/drivers/ufs/core/ufs-mcq.c
@@ -436,7 +436,7 @@ int ufshcd_mcq_init(struct ufs_hba *hba)
 
 	for (i = 0; i < hba->nr_hw_queues; i++) {
 		hwq = &hba->uhq[i];
-		hwq->max_entries = hba->nutrs;
+		hwq->max_entries = hba->nutrs + 1;
 		spin_lock_init(&hwq->sq_lock);
 		spin_lock_init(&hwq->cq_lock);
 		mutex_init(&hwq->sq_mutex);
@@ -630,6 +630,7 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
 	int tag = scsi_cmd_to_rq(cmd)->tag;
 	struct ufshcd_lrb *lrbp = &hba->lrb[tag];
 	struct ufs_hw_queue *hwq;
+	unsigned long flags;
 	int err = FAILED;
 
 	if (!ufshcd_cmd_inflight(lrbp->cmd)) {
@@ -670,8 +671,10 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
 	}
 
 	err = SUCCESS;
+	spin_lock_irqsave(&hwq->cq_lock, flags);
 	if (ufshcd_cmd_inflight(lrbp->cmd))
 		ufshcd_release_scsi_cmd(hba, lrbp);
+	spin_unlock_irqrestore(&hwq->cq_lock, flags);
 
 out:
 	return err;
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index ef02c9bb0354..23c0834a97a4 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -313,17 +313,17 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
 				  bp.level - 1,
 				  0);
 	b = bch2_btree_iter_peek_node(iter);
-	if (IS_ERR(b))
+	if (IS_ERR_OR_NULL(b))
 		goto err;
 
 	BUG_ON(b->c.level != bp.level - 1);
 
-	if (b && extent_matches_bp(c, bp.btree_id, bp.level,
-				   bkey_i_to_s_c(&b->key),
-				   bucket, bp))
+	if (extent_matches_bp(c, bp.btree_id, bp.level,
+			      bkey_i_to_s_c(&b->key),
+			      bucket, bp))
 		return b;
 
-	if (b && btree_node_will_make_reachable(b)) {
+	if (btree_node_will_make_reachable(b)) {
 		b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
 	} else {
 		backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key));
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 9cb8684959ee..403aa3389fcc 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -617,7 +617,7 @@ struct journal_seq_blacklist_table {
 		u64		start;
 		u64		end;
 		bool		dirty;
-	}			entries[0];
+	}			entries[];
 };
 
 struct journal_keys {
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index c2adf3fbb0b3..6fa90bcd7016 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -3087,8 +3087,6 @@ void bch2_trans_put(struct btree_trans *trans)
 		srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
 	}
 
-	bch2_journal_preres_put(&c->journal, &trans->journal_preres);
-
 	kfree(trans->extra_journal_entries.data);
 
 	if (trans->fs_usage_deltas) {
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index 9b78f78a75b5..37fbf22de8fc 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -89,10 +89,13 @@ static void bkey_cached_free(struct btree_key_cache *bc,
 	ck->btree_trans_barrier_seq =
 		start_poll_synchronize_srcu(&c->btree_trans_barrier);
 
-	if (ck->c.lock.readers)
+	if (ck->c.lock.readers) {
 		list_move_tail(&ck->list, &bc->freed_pcpu);
-	else
+		bc->nr_freed_pcpu++;
+	} else {
 		list_move_tail(&ck->list, &bc->freed_nonpcpu);
+		bc->nr_freed_nonpcpu++;
+	}
 	atomic_long_inc(&bc->nr_freed);
 
 	kfree(ck->k);
@@ -109,6 +112,8 @@ static void __bkey_cached_move_to_freelist_ordered(struct btree_key_cache *bc,
 {
 	struct bkey_cached *pos;
 
+	bc->nr_freed_nonpcpu++;
+
 	list_for_each_entry_reverse(pos, &bc->freed_nonpcpu, list) {
 		if (ULONG_CMP_GE(ck->btree_trans_barrier_seq,
 				 pos->btree_trans_barrier_seq)) {
@@ -158,6 +163,7 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
 #else
 		mutex_lock(&bc->lock);
 		list_move_tail(&ck->list, &bc->freed_nonpcpu);
+		bc->nr_freed_nonpcpu++;
 		mutex_unlock(&bc->lock);
 #endif
 	} else {
@@ -217,6 +223,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
 			       f->nr < ARRAY_SIZE(f->objs) / 2) {
 				ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
 				list_del_init(&ck->list);
+				bc->nr_freed_nonpcpu--;
 				f->objs[f->nr++] = ck;
 			}
 
@@ -229,6 +236,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
 		if (!list_empty(&bc->freed_nonpcpu)) {
 			ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
 			list_del_init(&ck->list);
+			bc->nr_freed_nonpcpu--;
 		}
 		mutex_unlock(&bc->lock);
 #endif
@@ -664,7 +672,6 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
 		goto out;
 
 	bch2_journal_pin_drop(j, &ck->journal);
-	bch2_journal_preres_put(j, &ck->res);
 
 	BUG_ON(!btree_node_locked(c_iter.path, 0));
 
@@ -762,18 +769,6 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
 
 	BUG_ON(insert->k.u64s > ck->u64s);
 
-	if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) {
-		int difference;
-
-		BUG_ON(jset_u64s(insert->k.u64s) > trans->journal_preres.u64s);
-
-		difference = jset_u64s(insert->k.u64s) - ck->res.u64s;
-		if (difference > 0) {
-			trans->journal_preres.u64s	-= difference;
-			ck->res.u64s			+= difference;
-		}
-	}
-
 	bkey_copy(ck->k, insert);
 	ck->valid = true;
 
@@ -850,6 +845,8 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
 	 * Newest freed entries are at the end of the list - once we hit one
 	 * that's too new to be freed, we can bail out:
 	 */
+	scanned += bc->nr_freed_nonpcpu;
+
 	list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) {
 		if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
 						 ck->btree_trans_barrier_seq))
@@ -859,13 +856,15 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
 		six_lock_exit(&ck->c.lock);
 		kmem_cache_free(bch2_key_cache, ck);
 		atomic_long_dec(&bc->nr_freed);
-		scanned++;
 		freed++;
+		bc->nr_freed_nonpcpu--;
 	}
 
 	if (scanned >= nr)
 		goto out;
 
+	scanned += bc->nr_freed_pcpu;
+
 	list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) {
 		if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
 						 ck->btree_trans_barrier_seq))
@@ -875,8 +874,8 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
 		six_lock_exit(&ck->c.lock);
 		kmem_cache_free(bch2_key_cache, ck);
 		atomic_long_dec(&bc->nr_freed);
-		scanned++;
 		freed++;
+		bc->nr_freed_pcpu--;
 	}
 
 	if (scanned >= nr)
@@ -982,6 +981,9 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
 	}
 #endif
 
+	BUG_ON(list_count_nodes(&bc->freed_pcpu) != bc->nr_freed_pcpu);
+	BUG_ON(list_count_nodes(&bc->freed_nonpcpu) != bc->nr_freed_nonpcpu);
+
 	list_splice(&bc->freed_pcpu,	&items);
 	list_splice(&bc->freed_nonpcpu,	&items);
 
@@ -991,7 +993,6 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
 		cond_resched();
 
 		bch2_journal_pin_drop(&c->journal, &ck->journal);
-		bch2_journal_preres_put(&c->journal, &ck->res);
 
 		list_del(&ck->list);
 		kfree(ck->k);
diff --git a/fs/bcachefs/btree_key_cache_types.h b/fs/bcachefs/btree_key_cache_types.h
new file mode 100644
index 000000000000..290e4e57df5b
--- /dev/null
+++ b/fs/bcachefs/btree_key_cache_types.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_KEY_CACHE_TYPES_H
+#define _BCACHEFS_BTREE_KEY_CACHE_TYPES_H
+
+struct btree_key_cache_freelist {
+	struct bkey_cached	*objs[16];
+	unsigned		nr;
+};
+
+struct btree_key_cache {
+	struct mutex		lock;
+	struct rhashtable	table;
+	bool			table_init_done;
+
+	struct list_head	freed_pcpu;
+	size_t			nr_freed_pcpu;
+	struct list_head	freed_nonpcpu;
+	size_t			nr_freed_nonpcpu;
+
+	struct shrinker		*shrink;
+	unsigned		shrink_iter;
+	struct btree_key_cache_freelist __percpu *pcpu_freed;
+
+	atomic_long_t		nr_freed;
+	atomic_long_t		nr_keys;
+	atomic_long_t		nr_dirty;
+};
+
+struct bkey_cached_key {
+	u32			btree_id;
+	struct bpos		pos;
+} __packed __aligned(4);
+
+#endif /* _BCACHEFS_BTREE_KEY_CACHE_TYPES_H */
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index decad7b66c59..12907beda98c 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -78,6 +78,53 @@ inline void bch2_btree_node_prep_for_write(struct btree_trans *trans,
 		bch2_btree_init_next(trans, b);
 }
 
+static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btree_insert_entry *i)
+{
+	while (--i >= trans->updates) {
+		if (same_leaf_as_prev(trans, i))
+			continue;
+
+		bch2_btree_node_unlock_write(trans, i->path, insert_l(i)->b);
+	}
+
+	trace_and_count(trans->c, trans_restart_would_deadlock_write, trans);
+	return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write);
+}
+
+static inline int bch2_trans_lock_write(struct btree_trans *trans)
+{
+	struct btree_insert_entry *i;
+
+	EBUG_ON(trans->write_locked);
+
+	trans_for_each_update(trans, i) {
+		if (same_leaf_as_prev(trans, i))
+			continue;
+
+		if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c))
+			return trans_lock_write_fail(trans, i);
+
+		if (!i->cached)
+			bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
+	}
+
+	trans->write_locked = true;
+	return 0;
+}
+
+static inline void bch2_trans_unlock_write(struct btree_trans *trans)
+{
+	if (likely(trans->write_locked)) {
+		struct btree_insert_entry *i;
+
+		trans_for_each_update(trans, i)
+			if (!same_leaf_as_prev(trans, i))
+				bch2_btree_node_unlock_write_inlined(trans, i->path,
+								     insert_l(i)->b);
+		trans->write_locked = false;
+	}
+}
+
 /* Inserting into a given leaf node (last stage of insert): */
 
 /* Handle overwrites and do insert, for non extents: */
@@ -276,17 +323,6 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
 		bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot));
 }
 
-static noinline int
-bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned flags,
-				   unsigned long trace_ip)
-{
-	return drop_locks_do(trans,
-		bch2_journal_preres_get(&trans->c->journal,
-			&trans->journal_preres,
-			trans->journal_preres_u64s,
-			(flags & BCH_WATERMARK_MASK)));
-}
-
 static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
 						      unsigned flags)
 {
@@ -321,6 +357,45 @@ static inline int btree_key_can_insert(struct btree_trans *trans,
 	return 0;
 }
 
+noinline static int
+btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags,
+				     struct btree_path *path, unsigned new_u64s)
+{
+	struct bch_fs *c = trans->c;
+	struct btree_insert_entry *i;
+	struct bkey_cached *ck = (void *) path->l[0].b;
+	struct bkey_i *new_k;
+	int ret;
+
+	bch2_trans_unlock_write(trans);
+	bch2_trans_unlock(trans);
+
+	new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL);
+	if (!new_k) {
+		bch_err(c, "error allocating memory for key cache key, btree %s u64s %u",
+			bch2_btree_id_str(path->btree_id), new_u64s);
+		return -BCH_ERR_ENOMEM_btree_key_cache_insert;
+	}
+
+	ret =   bch2_trans_relock(trans) ?:
+		bch2_trans_lock_write(trans);
+	if (unlikely(ret)) {
+		kfree(new_k);
+		return ret;
+	}
+
+	memcpy(new_k, ck->k, ck->u64s * sizeof(u64));
+
+	trans_for_each_update(trans, i)
+		if (i->old_v == &ck->k->v)
+			i->old_v = &new_k->v;
+
+	kfree(ck->k);
+	ck->u64s	= new_u64s;
+	ck->k		= new_k;
+	return 0;
+}
+
 static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags,
 				       struct btree_path *path, unsigned u64s)
 {
@@ -347,12 +422,9 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags
 		return 0;
 
 	new_u64s	= roundup_pow_of_two(u64s);
-	new_k		= krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOFS);
-	if (!new_k) {
-		bch_err(c, "error allocating memory for key cache key, btree %s u64s %u",
-			bch2_btree_id_str(path->btree_id), new_u64s);
-		return -BCH_ERR_ENOMEM_btree_key_cache_insert;
-	}
+	new_k		= krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOWAIT);
+	if (unlikely(!new_k))
+		return btree_key_can_insert_cached_slowpath(trans, flags, path, new_u64s);
 
 	trans_for_each_update(trans, i)
 		if (i->old_v == &ck->k->v)
@@ -732,37 +804,6 @@ revert_fs_usage:
 	return ret;
 }
 
-static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btree_insert_entry *i)
-{
-	while (--i >= trans->updates) {
-		if (same_leaf_as_prev(trans, i))
-			continue;
-
-		bch2_btree_node_unlock_write(trans, i->path, insert_l(i)->b);
-	}
-
-	trace_and_count(trans->c, trans_restart_would_deadlock_write, trans);
-	return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write);
-}
-
-static inline int trans_lock_write(struct btree_trans *trans)
-{
-	struct btree_insert_entry *i;
-
-	trans_for_each_update(trans, i) {
-		if (same_leaf_as_prev(trans, i))
-			continue;
-
-		if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c))
-			return trans_lock_write_fail(trans, i);
-
-		if (!i->cached)
-			bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
-	}
-
-	return 0;
-}
-
 static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans)
 {
 	struct btree_insert_entry *i;
@@ -830,15 +871,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
 		}
 	}
 
-	ret = bch2_journal_preres_get(&c->journal,
-			&trans->journal_preres, trans->journal_preres_u64s,
-			(flags & BCH_WATERMARK_MASK)|JOURNAL_RES_GET_NONBLOCK);
-	if (unlikely(ret == -BCH_ERR_journal_preres_get_blocked))
-		ret = bch2_trans_journal_preres_get_cold(trans, flags, trace_ip);
-	if (unlikely(ret))
-		return ret;
-
-	ret = trans_lock_write(trans);
+	ret = bch2_trans_lock_write(trans);
 	if (unlikely(ret))
 		return ret;
 
@@ -847,10 +880,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
 	if (!ret && unlikely(trans->journal_replay_not_finished))
 		bch2_drop_overwrites_from_journal(trans);
 
-	trans_for_each_update(trans, i)
-		if (!same_leaf_as_prev(trans, i))
-			bch2_btree_node_unlock_write_inlined(trans, i->path,
-							insert_l(i)->b);
+	bch2_trans_unlock_write(trans);
 
 	if (!ret && trans->journal_pin)
 		bch2_journal_pin_add(&c->journal, trans->journal_res.seq,
@@ -1003,7 +1033,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
 	struct bch_fs *c = trans->c;
 	struct btree_insert_entry *i = NULL;
 	struct btree_write_buffered_key *wb;
-	unsigned u64s;
 	int ret = 0;
 
 	if (!trans->nr_updates &&
@@ -1063,13 +1092,8 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
 
 	EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
 
-	memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
-
 	trans->journal_u64s		= trans->extra_journal_entries.nr;
-	trans->journal_preres_u64s	= 0;
-
 	trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names);
-
 	if (trans->journal_transaction_names)
 		trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s);
 
@@ -1085,16 +1109,11 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
 		if (i->key_cache_already_flushed)
 			continue;
 
-		/* we're going to journal the key being updated: */
-		u64s = jset_u64s(i->k->k.u64s);
-		if (i->cached &&
-		    likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY)))
-			trans->journal_preres_u64s += u64s;
-
 		if (i->flags & BTREE_UPDATE_NOJOURNAL)
 			continue;
 
-		trans->journal_u64s += u64s;
+		/* we're going to journal the key being updated: */
+		trans->journal_u64s += jset_u64s(i->k->k.u64s);
 
 		/* and we're also going to log the overwrite: */
 		if (trans->journal_transaction_names)
@@ -1126,8 +1145,6 @@ retry:
 
 	trace_and_count(c, transaction_commit, trans, _RET_IP_);
 out:
-	bch2_journal_preres_put(&c->journal, &trans->journal_preres);
-
 	if (likely(!(flags & BTREE_INSERT_NOCHECK_RW)))
 		bch2_write_ref_put(c, BCH_WRITE_REF_trans);
 out_reset:
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index 941841a0c5bf..60453ba86c4b 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -5,7 +5,7 @@
 #include <linux/list.h>
 #include <linux/rhashtable.h>
 
-//#include "bkey_methods.h"
+#include "btree_key_cache_types.h"
 #include "buckets_types.h"
 #include "darray.h"
 #include "errcode.h"
@@ -312,31 +312,6 @@ struct btree_iter {
 #endif
 };
 
-struct btree_key_cache_freelist {
-	struct bkey_cached	*objs[16];
-	unsigned		nr;
-};
-
-struct btree_key_cache {
-	struct mutex		lock;
-	struct rhashtable	table;
-	bool			table_init_done;
-	struct list_head	freed_pcpu;
-	struct list_head	freed_nonpcpu;
-	struct shrinker		*shrink;
-	unsigned		shrink_iter;
-	struct btree_key_cache_freelist __percpu *pcpu_freed;
-
-	atomic_long_t		nr_freed;
-	atomic_long_t		nr_keys;
-	atomic_long_t		nr_dirty;
-};
-
-struct bkey_cached_key {
-	u32			btree_id;
-	struct bpos		pos;
-} __packed __aligned(4);
-
 #define BKEY_CACHED_ACCESSED		0
 #define BKEY_CACHED_DIRTY		1
 
@@ -352,7 +327,6 @@ struct bkey_cached {
 	struct rhash_head	hash;
 	struct list_head	list;
 
-	struct journal_preres	res;
 	struct journal_entry_pin journal;
 	u64			seq;
 
@@ -389,11 +363,7 @@ struct btree_insert_entry {
 	unsigned long		ip_allocated;
 };
 
-#ifndef CONFIG_LOCKDEP
 #define BTREE_ITER_MAX		64
-#else
-#define BTREE_ITER_MAX		32
-#endif
 
 struct btree_trans_commit_hook;
 typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *);
@@ -434,6 +404,7 @@ struct btree_trans {
 	bool			journal_transaction_names:1;
 	bool			journal_replay_not_finished:1;
 	bool			notrace_relock_fail:1;
+	bool			write_locked:1;
 	enum bch_errcode	restarted:16;
 	u32			restart_count;
 	unsigned long		last_begin_ip;
@@ -465,11 +436,9 @@ struct btree_trans {
 	struct journal_entry_pin *journal_pin;
 
 	struct journal_res	journal_res;
-	struct journal_preres	journal_preres;
 	u64			*journal_seq;
 	struct disk_reservation *disk_res;
 	unsigned		journal_u64s;
-	unsigned		journal_preres_u64s;
 	struct replicas_delta_list *fs_usage_deltas;
 };
 
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 39c2db68123b..76f27bc9fa24 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -513,8 +513,6 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *
 		up_read(&c->gc_lock);
 	as->took_gc_lock = false;
 
-	bch2_journal_preres_put(&c->journal, &as->journal_preres);
-
 	bch2_journal_pin_drop(&c->journal, &as->journal);
 	bch2_journal_pin_flush(&c->journal, &as->journal);
 	bch2_disk_reservation_put(c, &as->disk_res);
@@ -734,8 +732,6 @@ err:
 
 	bch2_journal_pin_drop(&c->journal, &as->journal);
 
-	bch2_journal_preres_put(&c->journal, &as->journal_preres);
-
 	mutex_lock(&c->btree_interior_update_lock);
 	for (i = 0; i < as->nr_new_nodes; i++) {
 		b = as->new_nodes[i];
@@ -1047,7 +1043,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
 	unsigned nr_nodes[2] = { 0, 0 };
 	unsigned update_level = level;
 	enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
-	unsigned journal_flags = 0;
 	int ret = 0;
 	u32 restart_count = trans->restart_count;
 
@@ -1061,10 +1056,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
 	flags &= ~BCH_WATERMARK_MASK;
 	flags |= watermark;
 
-	if (flags & BTREE_INSERT_JOURNAL_RECLAIM)
-		journal_flags |= JOURNAL_RES_GET_NONBLOCK;
-	journal_flags |= watermark;
-
 	while (1) {
 		nr_nodes[!!update_level] += 1 + split;
 		update_level++;
@@ -1129,27 +1120,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
 	if (ret)
 		goto err;
 
-	ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
-				      BTREE_UPDATE_JOURNAL_RES,
-				      journal_flags|JOURNAL_RES_GET_NONBLOCK);
-	if (ret) {
-		if (flags & BTREE_INSERT_JOURNAL_RECLAIM) {
-			ret = -BCH_ERR_journal_reclaim_would_deadlock;
-			goto err;
-		}
-
-		ret = drop_locks_do(trans,
-			bch2_journal_preres_get(&c->journal, &as->journal_preres,
-					      BTREE_UPDATE_JOURNAL_RES,
-					      journal_flags));
-		if (ret == -BCH_ERR_journal_preres_get_blocked) {
-			trace_and_count(c, trans_restart_journal_preres_get, trans, _RET_IP_, journal_flags);
-			ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get);
-		}
-		if (ret)
-			goto err;
-	}
-
 	ret = bch2_disk_reservation_get(c, &as->disk_res,
 			(nr_nodes[0] + nr_nodes[1]) * btree_sectors(c),
 			c->opts.metadata_replicas,
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index 4df21512d640..031076e75fa1 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -55,7 +55,6 @@ struct btree_update {
 	unsigned			update_level;
 
 	struct disk_reservation		disk_res;
-	struct journal_preres		journal_preres;
 
 	/*
 	 * BTREE_INTERIOR_UPDATING_NODE:
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 0771a6d880bf..5ed66202c226 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -239,6 +239,34 @@ restart_drop_extra_replicas:
 
 		next_pos = insert->k.p;
 
+		/*
+		 * Check for nonce offset inconsistency:
+		 * This is debug code - we've been seeing this bug rarely, and
+		 * it's been hard to reproduce, so this should give us some more
+		 * information when it does occur:
+		 */
+		struct printbuf err = PRINTBUF;
+		int invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), 0, &err);
+		printbuf_exit(&err);
+
+		if (invalid) {
+			struct printbuf buf = PRINTBUF;
+
+			prt_str(&buf, "about to insert invalid key in data update path");
+			prt_str(&buf, "\nold: ");
+			bch2_bkey_val_to_text(&buf, c, old);
+			prt_str(&buf, "\nk:   ");
+			bch2_bkey_val_to_text(&buf, c, k);
+			prt_str(&buf, "\nnew: ");
+			bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
+
+			bch2_print_string_as_lines(KERN_ERR, buf.buf);
+			printbuf_exit(&buf);
+
+			bch2_fatal_error(c);
+			goto out;
+		}
+
 		ret =   bch2_insert_snapshot_whiteouts(trans, m->btree_id,
 						k.k->p, bkey_start_pos(&insert->k)) ?:
 			bch2_insert_snapshot_whiteouts(trans, m->btree_id,
diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c
index d613695abf9f..4d0cb0ccff32 100644
--- a/fs/bcachefs/disk_groups.c
+++ b/fs/bcachefs/disk_groups.c
@@ -555,6 +555,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
 	case TARGET_DEV: {
 		struct bch_dev *ca;
 
+		out->atomic++;
 		rcu_read_lock();
 		ca = t.dev < c->sb.nr_devices
 			? rcu_dereference(c->devs[t.dev])
@@ -570,6 +571,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
 		}
 
 		rcu_read_unlock();
+		out->atomic--;
 		break;
 	}
 	case TARGET_GROUP:
@@ -580,7 +582,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
 	}
 }
 
-void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
+static void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
 {
 	struct target t = target_decode(v);
 
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index 875f7c5a6fca..2a77de18c004 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -1373,6 +1373,15 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
 			h->nr_active_devs++;
 
 	rcu_read_unlock();
+
+	/*
+	 * If we only have redundancy + 1 devices, we're better off with just
+	 * replication:
+	 */
+	if (h->nr_active_devs < h->redundancy + 2)
+		bch_err(c, "insufficient devices available to create stripe (have %u, need %u) - mismatched bucket sizes?",
+			h->nr_active_devs, h->redundancy + 2);
+
 	list_add(&h->list, &c->ec_stripe_head_list);
 	return h;
 }
@@ -1424,6 +1433,11 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
 
 	h = ec_new_stripe_head_alloc(c, target, algo, redundancy, watermark);
 found:
+	if (!IS_ERR_OR_NULL(h) &&
+	    h->nr_active_devs < h->redundancy + 2) {
+		mutex_unlock(&h->lock);
+		h = NULL;
+	}
 	mutex_unlock(&c->ec_stripe_head_lock);
 	return h;
 }
@@ -1681,8 +1695,6 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
 	int ret;
 
 	h = __bch2_ec_stripe_head_get(trans, target, algo, redundancy, watermark);
-	if (!h)
-		bch_err(c, "no stripe head");
 	if (IS_ERR_OR_NULL(h))
 		return h;
 
diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c
index 8bd9bcdd27f7..ff664fd0d8ef 100644
--- a/fs/bcachefs/fs-io-pagecache.c
+++ b/fs/bcachefs/fs-io-pagecache.c
@@ -13,7 +13,7 @@
 
 int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
 				     loff_t start, u64 end,
-				     int fgp_flags, gfp_t gfp,
+				     fgf_t fgp_flags, gfp_t gfp,
 				     folios *fs)
 {
 	struct folio *f;
diff --git a/fs/bcachefs/fs-io-pagecache.h b/fs/bcachefs/fs-io-pagecache.h
index a2222ad586e9..27f712ae37a6 100644
--- a/fs/bcachefs/fs-io-pagecache.h
+++ b/fs/bcachefs/fs-io-pagecache.h
@@ -7,7 +7,7 @@
 typedef DARRAY(struct folio *) folios;
 
 int bch2_filemap_get_contig_folios_d(struct address_space *, loff_t,
-				     u64, int, gfp_t, folios *);
+				     u64, fgf_t, gfp_t, folios *);
 int bch2_write_invalidate_inode_pages_range(struct address_space *, loff_t, loff_t);
 
 /*
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 166d8d8abe68..8ef817304e4a 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -1922,10 +1922,7 @@ out:
 	return dget(sb->s_root);
 
 err_put_super:
-	sb->s_fs_info = NULL;
-	c->vfs_sb = NULL;
 	deactivate_locked_super(sb);
-	bch2_fs_stop(c);
 	return ERR_PTR(bch2_err_class(ret));
 }
 
@@ -1933,11 +1930,8 @@ static void bch2_kill_sb(struct super_block *sb)
 {
 	struct bch_fs *c = sb->s_fs_info;
 
-	if (c)
-		c->vfs_sb = NULL;
 	generic_shutdown_super(sb);
-	if (c)
-		bch2_fs_free(c);
+	bch2_fs_free(c);
 }
 
 static struct file_system_type bcache_fs_type = {
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 9f3e9bd3d767..e0c5cd119acc 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -2220,7 +2220,7 @@ static int nlink_cmp(const void *_l, const void *_r)
 	const struct nlink *l = _l;
 	const struct nlink *r = _r;
 
-	return cmp_int(l->inum, r->inum) ?: cmp_int(l->snapshot, r->snapshot);
+	return cmp_int(l->inum, r->inum);
 }
 
 static void inc_link(struct bch_fs *c, struct snapshots_seen *s,
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index def77f2d8802..c7849b0753e7 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -1134,7 +1134,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans,
 		 * unlinked inodes in the snapshot leaves:
 		 */
 		*need_another_pass = true;
-		return 0;
+		goto out;
 	}
 
 	ret = 1;
@@ -1169,8 +1169,10 @@ again:
 	 */
 	for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
 			   BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
-		ret = lockrestart_do(trans, may_delete_deleted_inode(trans, &iter, k.k->p,
-								     &need_another_pass));
+		ret = commit_do(trans, NULL, NULL,
+				BTREE_INSERT_NOFAIL|
+				BTREE_INSERT_LAZY_RW,
+			may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass));
 		if (ret < 0)
 			break;
 
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index f02b3f7d26a0..d704a8f829c8 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -795,7 +795,7 @@ static int bch2_write_decrypt(struct bch_write_op *op)
 	 * checksum:
 	 */
 	csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
-	if (bch2_crc_cmp(op->crc.csum, csum))
+	if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io)
 		return -EIO;
 
 	ret = bch2_encrypt_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 5b5d69f2316b..23a9b7845d11 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -526,36 +526,6 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
 	return ret;
 }
 
-/* journal_preres: */
-
-static bool journal_preres_available(struct journal *j,
-				     struct journal_preres *res,
-				     unsigned new_u64s,
-				     unsigned flags)
-{
-	bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags, true);
-
-	if (!ret && mutex_trylock(&j->reclaim_lock)) {
-		bch2_journal_reclaim(j);
-		mutex_unlock(&j->reclaim_lock);
-	}
-
-	return ret;
-}
-
-int __bch2_journal_preres_get(struct journal *j,
-			      struct journal_preres *res,
-			      unsigned new_u64s,
-			      unsigned flags)
-{
-	int ret;
-
-	closure_wait_event(&j->preres_wait,
-		   (ret = bch2_journal_error(j)) ||
-		   journal_preres_available(j, res, new_u64s, flags));
-	return ret;
-}
-
 /* journal_entry_res: */
 
 void bch2_journal_entry_res_resize(struct journal *j,
@@ -1306,7 +1276,6 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
 	prt_printf(out, "last_seq:\t\t%llu\n",		journal_last_seq(j));
 	prt_printf(out, "last_seq_ondisk:\t%llu\n",		j->last_seq_ondisk);
 	prt_printf(out, "flushed_seq_ondisk:\t%llu\n",	j->flushed_seq_ondisk);
-	prt_printf(out, "prereserved:\t\t%u/%u\n",		j->prereserved.reserved, j->prereserved.remaining);
 	prt_printf(out, "watermark:\t\t%s\n",		bch2_watermarks[j->watermark]);
 	prt_printf(out, "each entry reserved:\t%u\n",	j->entry_u64s_reserved);
 	prt_printf(out, "nr flush writes:\t%llu\n",		j->nr_flush_writes);
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index 011711e99c8d..c85d01cf4948 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -395,104 +395,6 @@ out:
 	return 0;
 }
 
-/* journal_preres: */
-
-static inline void journal_set_watermark(struct journal *j)
-{
-	union journal_preres_state s = READ_ONCE(j->prereserved);
-	unsigned watermark = BCH_WATERMARK_stripe;
-
-	if (fifo_free(&j->pin) < j->pin.size / 4)
-		watermark = max_t(unsigned, watermark, BCH_WATERMARK_copygc);
-	if (fifo_free(&j->pin) < j->pin.size / 8)
-		watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
-
-	if (s.reserved > s.remaining)
-		watermark = max_t(unsigned, watermark, BCH_WATERMARK_copygc);
-	if (!s.remaining)
-		watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
-
-	if (watermark == j->watermark)
-		return;
-
-	swap(watermark, j->watermark);
-	if (watermark > j->watermark)
-		journal_wake(j);
-}
-
-static inline void bch2_journal_preres_put(struct journal *j,
-					   struct journal_preres *res)
-{
-	union journal_preres_state s = { .reserved = res->u64s };
-
-	if (!res->u64s)
-		return;
-
-	s.v = atomic64_sub_return(s.v, &j->prereserved.counter);
-	res->u64s = 0;
-
-	if (unlikely(s.waiting)) {
-		clear_bit(ilog2((((union journal_preres_state) { .waiting = 1 }).v)),
-			  (unsigned long *) &j->prereserved.v);
-		closure_wake_up(&j->preres_wait);
-	}
-
-	if (s.reserved <= s.remaining && j->watermark)
-		journal_set_watermark(j);
-}
-
-int __bch2_journal_preres_get(struct journal *,
-			struct journal_preres *, unsigned, unsigned);
-
-static inline int bch2_journal_preres_get_fast(struct journal *j,
-					       struct journal_preres *res,
-					       unsigned new_u64s,
-					       unsigned flags,
-					       bool set_waiting)
-{
-	int d = new_u64s - res->u64s;
-	union journal_preres_state old, new;
-	u64 v = atomic64_read(&j->prereserved.counter);
-	enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
-	int ret;
-
-	do {
-		old.v = new.v = v;
-		ret = 0;
-
-		if (watermark == BCH_WATERMARK_reclaim ||
-		    new.reserved + d < new.remaining) {
-			new.reserved += d;
-			ret = 1;
-		} else if (set_waiting && !new.waiting)
-			new.waiting = true;
-		else
-			return 0;
-	} while ((v = atomic64_cmpxchg(&j->prereserved.counter,
-				       old.v, new.v)) != old.v);
-
-	if (ret)
-		res->u64s += d;
-	return ret;
-}
-
-static inline int bch2_journal_preres_get(struct journal *j,
-					  struct journal_preres *res,
-					  unsigned new_u64s,
-					  unsigned flags)
-{
-	if (new_u64s <= res->u64s)
-		return 0;
-
-	if (bch2_journal_preres_get_fast(j, res, new_u64s, flags, false))
-		return 0;
-
-	if (flags & JOURNAL_RES_GET_NONBLOCK)
-		return -BCH_ERR_journal_preres_get_blocked;
-
-	return __bch2_journal_preres_get(j, res, new_u64s, flags);
-}
-
 /* journal_entry_res: */
 
 void bch2_journal_entry_res_resize(struct journal *,
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index f4bc2cdbfdd7..786a09285509 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -1079,6 +1079,12 @@ found:
 
 	if (ja->bucket_seq[ja->cur_idx] &&
 	    ja->sectors_free == ca->mi.bucket_size) {
+#if 0
+		/*
+		 * Debug code for ZNS support, where we (probably) want to be
+		 * correlated where we stopped in the journal to the zone write
+		 * points:
+		 */
 		bch_err(c, "ja->sectors_free == ca->mi.bucket_size");
 		bch_err(c, "cur_idx %u/%u", ja->cur_idx, ja->nr);
 		for (i = 0; i < 3; i++) {
@@ -1086,6 +1092,7 @@ found:
 
 			bch_err(c, "bucket_seq[%u] = %llu", idx, ja->bucket_seq[idx]);
 		}
+#endif
 		ja->sectors_free = 0;
 	}
 
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index 9a584aaaa2eb..e63c6eda86af 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -50,16 +50,21 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j,
 	return available;
 }
 
-static void journal_set_remaining(struct journal *j, unsigned u64s_remaining)
+static inline void journal_set_watermark(struct journal *j, bool low_on_space)
 {
-	union journal_preres_state old, new;
-	u64 v = atomic64_read(&j->prereserved.counter);
+	unsigned watermark = BCH_WATERMARK_stripe;
 
-	do {
-		old.v = new.v = v;
-		new.remaining = u64s_remaining;
-	} while ((v = atomic64_cmpxchg(&j->prereserved.counter,
-				       old.v, new.v)) != old.v);
+	if (low_on_space)
+		watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
+	if (fifo_free(&j->pin) < j->pin.size / 4)
+		watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
+
+	if (watermark == j->watermark)
+		return;
+
+	swap(watermark, j->watermark);
+	if (watermark > j->watermark)
+		journal_wake(j);
 }
 
 static struct journal_space
@@ -162,7 +167,6 @@ void bch2_journal_space_available(struct journal *j)
 	struct bch_fs *c = container_of(j, struct bch_fs, journal);
 	struct bch_dev *ca;
 	unsigned clean, clean_ondisk, total;
-	s64 u64s_remaining = 0;
 	unsigned max_entry_size	 = min(j->buf[0].buf_size >> 9,
 				       j->buf[1].buf_size >> 9);
 	unsigned i, nr_online = 0, nr_devs_want;
@@ -222,16 +226,10 @@ void bch2_journal_space_available(struct journal *j)
 	else
 		clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
 
-	u64s_remaining  = (u64) clean << 6;
-	u64s_remaining -= (u64) total << 3;
-	u64s_remaining = max(0LL, u64s_remaining);
-	u64s_remaining /= 4;
-	u64s_remaining = min_t(u64, u64s_remaining, U32_MAX);
+	journal_set_watermark(j, clean * 4 <= total);
 out:
 	j->cur_entry_sectors	= !ret ? j->space[journal_space_discarded].next_entry : 0;
 	j->cur_entry_error	= ret;
-	journal_set_remaining(j, u64s_remaining);
-	journal_set_watermark(j);
 
 	if (!ret)
 		journal_wake(j);
@@ -555,11 +553,6 @@ static u64 journal_seq_to_flush(struct journal *j)
 		/* Try to keep the journal at most half full: */
 		nr_buckets = ja->nr / 2;
 
-		/* And include pre-reservations: */
-		nr_buckets += DIV_ROUND_UP(j->prereserved.reserved,
-					   (ca->mi.bucket_size << 6) -
-					   journal_entry_overhead(j));
-
 		nr_buckets = min(nr_buckets, ja->nr);
 
 		bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr;
@@ -638,10 +631,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
 			       msecs_to_jiffies(c->opts.journal_reclaim_delay)))
 			min_nr = 1;
 
-		if (j->prereserved.reserved * 4 > j->prereserved.remaining)
-			min_nr = 1;
-
-		if (fifo_free(&j->pin) <= 32)
+		if (j->watermark != BCH_WATERMARK_stripe)
 			min_nr = 1;
 
 		if (atomic_read(&c->btree_cache.dirty) * 2 > c->btree_cache.used)
@@ -652,8 +642,6 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
 		trace_and_count(c, journal_reclaim_start, c,
 				direct, kicked,
 				min_nr, min_key_cache,
-				j->prereserved.reserved,
-				j->prereserved.remaining,
 				atomic_read(&c->btree_cache.dirty),
 				c->btree_cache.used,
 				atomic_long_read(&c->btree_key_cache.nr_dirty),
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index 42504e16acb6..a756b69582e3 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -76,14 +76,6 @@ struct journal_res {
 	u64			seq;
 };
 
-/*
- * For reserving space in the journal prior to getting a reservation on a
- * particular journal entry:
- */
-struct journal_preres {
-	unsigned		u64s;
-};
-
 union journal_res_state {
 	struct {
 		atomic64_t	counter;
@@ -104,22 +96,6 @@ union journal_res_state {
 	};
 };
 
-union journal_preres_state {
-	struct {
-		atomic64_t	counter;
-	};
-
-	struct {
-		u64		v;
-	};
-
-	struct {
-		u64		waiting:1,
-				reserved:31,
-				remaining:32;
-	};
-};
-
 /* bytes: */
 #define JOURNAL_ENTRY_SIZE_MIN		(64U << 10) /* 64k */
 #define JOURNAL_ENTRY_SIZE_MAX		(4U  << 20) /* 4M */
@@ -180,8 +156,6 @@ struct journal {
 	union journal_res_state reservations;
 	enum bch_watermark	watermark;
 
-	union journal_preres_state prereserved;
-
 	} __aligned(SMP_CACHE_BYTES);
 
 	unsigned long		flags;
diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c
index b775cf0fb7cb..97790445e67a 100644
--- a/fs/bcachefs/six.c
+++ b/fs/bcachefs/six.c
@@ -163,8 +163,11 @@ static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type,
 		this_cpu_sub(*lock->readers, !ret);
 		preempt_enable();
 
-		if (!ret && (old & SIX_LOCK_WAITING_write))
-			ret = -1 - SIX_LOCK_write;
+		if (!ret) {
+			smp_mb();
+			if (atomic_read(&lock->state) & SIX_LOCK_WAITING_write)
+				ret = -1 - SIX_LOCK_write;
+		}
 	} else if (type == SIX_LOCK_write && lock->readers) {
 		if (try) {
 			atomic_add(SIX_LOCK_HELD_write, &lock->state);
diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h
index 86833445af20..2d2e66a4e468 100644
--- a/fs/bcachefs/subvolume_types.h
+++ b/fs/bcachefs/subvolume_types.h
@@ -20,7 +20,7 @@ struct snapshot_t {
 };
 
 struct snapshot_table {
-	struct snapshot_t	s[0];
+	DECLARE_FLEX_ARRAY(struct snapshot_t, s);
 };
 
 typedef struct {
diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h
index 893304a1f06e..7857671159b4 100644
--- a/fs/bcachefs/trace.h
+++ b/fs/bcachefs/trace.h
@@ -196,10 +196,9 @@ DEFINE_EVENT(bio, journal_write,
 TRACE_EVENT(journal_reclaim_start,
 	TP_PROTO(struct bch_fs *c, bool direct, bool kicked,
 		 u64 min_nr, u64 min_key_cache,
-		 u64 prereserved, u64 prereserved_total,
 		 u64 btree_cache_dirty, u64 btree_cache_total,
 		 u64 btree_key_cache_dirty, u64 btree_key_cache_total),
-	TP_ARGS(c, direct, kicked, min_nr, min_key_cache, prereserved, prereserved_total,
+	TP_ARGS(c, direct, kicked, min_nr, min_key_cache,
 		btree_cache_dirty, btree_cache_total,
 		btree_key_cache_dirty, btree_key_cache_total),
 
@@ -209,8 +208,6 @@ TRACE_EVENT(journal_reclaim_start,
 		__field(bool,		kicked			)
 		__field(u64,		min_nr			)
 		__field(u64,		min_key_cache		)
-		__field(u64,		prereserved		)
-		__field(u64,		prereserved_total	)
 		__field(u64,		btree_cache_dirty	)
 		__field(u64,		btree_cache_total	)
 		__field(u64,		btree_key_cache_dirty	)
@@ -223,22 +220,18 @@ TRACE_EVENT(journal_reclaim_start,
 		__entry->kicked			= kicked;
 		__entry->min_nr			= min_nr;
 		__entry->min_key_cache		= min_key_cache;
-		__entry->prereserved		= prereserved;
-		__entry->prereserved_total	= prereserved_total;
 		__entry->btree_cache_dirty	= btree_cache_dirty;
 		__entry->btree_cache_total	= btree_cache_total;
 		__entry->btree_key_cache_dirty	= btree_key_cache_dirty;
 		__entry->btree_key_cache_total	= btree_key_cache_total;
 	),
 
-	TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu prereserved %llu/%llu btree cache %llu/%llu key cache %llu/%llu",
+	TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu btree cache %llu/%llu key cache %llu/%llu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->direct,
 		  __entry->kicked,
 		  __entry->min_nr,
 		  __entry->min_key_cache,
-		  __entry->prereserved,
-		  __entry->prereserved_total,
 		  __entry->btree_cache_dirty,
 		  __entry->btree_cache_total,
 		  __entry->btree_key_cache_dirty,
diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c
index a39ff0c296ec..79d982674c18 100644
--- a/fs/bcachefs/xattr.c
+++ b/fs/bcachefs/xattr.c
@@ -552,6 +552,14 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
 		s.v = v + 1;
 		s.defined = true;
 	} else {
+		/*
+		 * Check if this option was set on the parent - if so, switched
+		 * back to inheriting from the parent:
+		 *
+		 * rename() also has to deal with keeping inherited options up
+		 * to date - see bch2_reinherit_attrs()
+		 */
+		spin_lock(&dentry->d_lock);
 		if (!IS_ROOT(dentry)) {
 			struct bch_inode_info *dir =
 				to_bch_ei(d_inode(dentry->d_parent));
@@ -560,6 +568,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
 		} else {
 			s.v = 0;
 		}
+		spin_unlock(&dentry->d_lock);
 
 		s.defined = false;
 	}
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig
index e540648dedc2..1d318f85232d 100644
--- a/fs/erofs/Kconfig
+++ b/fs/erofs/Kconfig
@@ -21,7 +21,7 @@ config EROFS_FS
 	  performance under extremely memory pressure without extra cost.
 
 	  See the documentation at <file:Documentation/filesystems/erofs.rst>
-	  for more details.
+	  and the web pages at <https://erofs.docs.kernel.org> for more details.
 
 	  If unsure, say N.
 
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 029c761670bf..c98aeda8abb2 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -220,7 +220,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
 			up_read(&devs->rwsem);
 			return 0;
 		}
-		map->m_bdev = dif->bdev_handle->bdev;
+		map->m_bdev = dif->bdev_handle ? dif->bdev_handle->bdev : NULL;
 		map->m_daxdev = dif->dax_dev;
 		map->m_dax_part_off = dif->dax_part_off;
 		map->m_fscache = dif->fscache;
@@ -238,7 +238,8 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
 			if (map->m_pa >= startoff &&
 			    map->m_pa < startoff + length) {
 				map->m_pa -= startoff;
-				map->m_bdev = dif->bdev_handle->bdev;
+				map->m_bdev = dif->bdev_handle ?
+					      dif->bdev_handle->bdev : NULL;
 				map->m_daxdev = dif->dax_dev;
 				map->m_dax_part_off = dif->dax_part_off;
 				map->m_fscache = dif->fscache;
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index b8ad05b4509d..14a79d3226ab 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -15,11 +15,11 @@ static void *erofs_read_inode(struct erofs_buf *buf,
 	struct erofs_sb_info *sbi = EROFS_SB(sb);
 	struct erofs_inode *vi = EROFS_I(inode);
 	const erofs_off_t inode_loc = erofs_iloc(inode);
-
 	erofs_blk_t blkaddr, nblks = 0;
 	void *kaddr;
 	struct erofs_inode_compact *dic;
 	struct erofs_inode_extended *die, *copied = NULL;
+	union erofs_inode_i_u iu;
 	unsigned int ifmt;
 	int err;
 
@@ -35,9 +35,8 @@ static void *erofs_read_inode(struct erofs_buf *buf,
 
 	dic = kaddr + *ofs;
 	ifmt = le16_to_cpu(dic->i_format);
-
 	if (ifmt & ~EROFS_I_ALL) {
-		erofs_err(inode->i_sb, "unsupported i_format %u of nid %llu",
+		erofs_err(sb, "unsupported i_format %u of nid %llu",
 			  ifmt, vi->nid);
 		err = -EOPNOTSUPP;
 		goto err_out;
@@ -45,7 +44,7 @@ static void *erofs_read_inode(struct erofs_buf *buf,
 
 	vi->datalayout = erofs_inode_datalayout(ifmt);
 	if (vi->datalayout >= EROFS_INODE_DATALAYOUT_MAX) {
-		erofs_err(inode->i_sb, "unsupported datalayout %u of nid %llu",
+		erofs_err(sb, "unsupported datalayout %u of nid %llu",
 			  vi->datalayout, vi->nid);
 		err = -EOPNOTSUPP;
 		goto err_out;
@@ -82,40 +81,15 @@ static void *erofs_read_inode(struct erofs_buf *buf,
 		vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount);
 
 		inode->i_mode = le16_to_cpu(die->i_mode);
-		switch (inode->i_mode & S_IFMT) {
-		case S_IFREG:
-		case S_IFDIR:
-		case S_IFLNK:
-			vi->raw_blkaddr = le32_to_cpu(die->i_u.raw_blkaddr);
-			break;
-		case S_IFCHR:
-		case S_IFBLK:
-			inode->i_rdev =
-				new_decode_dev(le32_to_cpu(die->i_u.rdev));
-			break;
-		case S_IFIFO:
-		case S_IFSOCK:
-			inode->i_rdev = 0;
-			break;
-		default:
-			goto bogusimode;
-		}
+		iu = die->i_u;
 		i_uid_write(inode, le32_to_cpu(die->i_uid));
 		i_gid_write(inode, le32_to_cpu(die->i_gid));
 		set_nlink(inode, le32_to_cpu(die->i_nlink));
-
-		/* extended inode has its own timestamp */
+		/* each extended inode has its own timestamp */
 		inode_set_ctime(inode, le64_to_cpu(die->i_mtime),
 				le32_to_cpu(die->i_mtime_nsec));
 
 		inode->i_size = le64_to_cpu(die->i_size);
-
-		/* total blocks for compressed files */
-		if (erofs_inode_is_data_compressed(vi->datalayout))
-			nblks = le32_to_cpu(die->i_u.compressed_blocks);
-		else if (vi->datalayout == EROFS_INODE_CHUNK_BASED)
-			/* fill chunked inode summary info */
-			vi->chunkformat = le16_to_cpu(die->i_u.c.format);
 		kfree(copied);
 		copied = NULL;
 		break;
@@ -125,49 +99,51 @@ static void *erofs_read_inode(struct erofs_buf *buf,
 		vi->xattr_isize = erofs_xattr_ibody_size(dic->i_xattr_icount);
 
 		inode->i_mode = le16_to_cpu(dic->i_mode);
-		switch (inode->i_mode & S_IFMT) {
-		case S_IFREG:
-		case S_IFDIR:
-		case S_IFLNK:
-			vi->raw_blkaddr = le32_to_cpu(dic->i_u.raw_blkaddr);
-			break;
-		case S_IFCHR:
-		case S_IFBLK:
-			inode->i_rdev =
-				new_decode_dev(le32_to_cpu(dic->i_u.rdev));
-			break;
-		case S_IFIFO:
-		case S_IFSOCK:
-			inode->i_rdev = 0;
-			break;
-		default:
-			goto bogusimode;
-		}
+		iu = dic->i_u;
 		i_uid_write(inode, le16_to_cpu(dic->i_uid));
 		i_gid_write(inode, le16_to_cpu(dic->i_gid));
 		set_nlink(inode, le16_to_cpu(dic->i_nlink));
-
 		/* use build time for compact inodes */
 		inode_set_ctime(inode, sbi->build_time, sbi->build_time_nsec);
 
 		inode->i_size = le32_to_cpu(dic->i_size);
-		if (erofs_inode_is_data_compressed(vi->datalayout))
-			nblks = le32_to_cpu(dic->i_u.compressed_blocks);
-		else if (vi->datalayout == EROFS_INODE_CHUNK_BASED)
-			vi->chunkformat = le16_to_cpu(dic->i_u.c.format);
 		break;
 	default:
-		erofs_err(inode->i_sb,
-			  "unsupported on-disk inode version %u of nid %llu",
+		erofs_err(sb, "unsupported on-disk inode version %u of nid %llu",
 			  erofs_inode_version(ifmt), vi->nid);
 		err = -EOPNOTSUPP;
 		goto err_out;
 	}
 
-	if (vi->datalayout == EROFS_INODE_CHUNK_BASED) {
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFREG:
+	case S_IFDIR:
+	case S_IFLNK:
+		vi->raw_blkaddr = le32_to_cpu(iu.raw_blkaddr);
+		break;
+	case S_IFCHR:
+	case S_IFBLK:
+		inode->i_rdev = new_decode_dev(le32_to_cpu(iu.rdev));
+		break;
+	case S_IFIFO:
+	case S_IFSOCK:
+		inode->i_rdev = 0;
+		break;
+	default:
+		erofs_err(sb, "bogus i_mode (%o) @ nid %llu", inode->i_mode,
+			  vi->nid);
+		err = -EFSCORRUPTED;
+		goto err_out;
+	}
+
+	/* total blocks for compressed files */
+	if (erofs_inode_is_data_compressed(vi->datalayout)) {
+		nblks = le32_to_cpu(iu.compressed_blocks);
+	} else if (vi->datalayout == EROFS_INODE_CHUNK_BASED) {
+		/* fill chunked inode summary info */
+		vi->chunkformat = le16_to_cpu(iu.c.format);
 		if (vi->chunkformat & ~EROFS_CHUNK_FORMAT_ALL) {
-			erofs_err(inode->i_sb,
-				  "unsupported chunk format %x of nid %llu",
+			erofs_err(sb, "unsupported chunk format %x of nid %llu",
 				  vi->chunkformat, vi->nid);
 			err = -EOPNOTSUPP;
 			goto err_out;
@@ -191,10 +167,6 @@ static void *erofs_read_inode(struct erofs_buf *buf,
 		inode->i_blocks = nblks << (sb->s_blocksize_bits - 9);
 	return kaddr;
 
-bogusimode:
-	erofs_err(inode->i_sb, "bogus i_mode (%o) @ nid %llu",
-		  inode->i_mode, vi->nid);
-	err = -EFSCORRUPTED;
 err_out:
 	DBG_BUGON(1);
 	kfree(copied);
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index 929248c6ca84..4cbe0434cbb8 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -84,8 +84,8 @@ int	nfsd_net_reply_cache_init(struct nfsd_net *nn);
 void	nfsd_net_reply_cache_destroy(struct nfsd_net *nn);
 int	nfsd_reply_cache_init(struct nfsd_net *);
 void	nfsd_reply_cache_shutdown(struct nfsd_net *);
-int	nfsd_cache_lookup(struct svc_rqst *rqstp,
-			  struct nfsd_cacherep **cacherep);
+int	nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
+			  unsigned int len, struct nfsd_cacherep **cacherep);
 void	nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp,
 			  int cachetype, __be32 *statp);
 int	nfsd_reply_cache_stats_show(struct seq_file *m, void *v);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4045c852a450..40415929e2ae 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2804,7 +2804,7 @@ static int client_opens_release(struct inode *inode, struct file *file)
 
 	/* XXX: alternatively, we could get/drop in seq start/stop */
 	drop_client(clp);
-	return 0;
+	return seq_release(inode, file);
 }
 
 static const struct file_operations client_states_fops = {
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index fd56a52aa5fb..d3273a396659 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -369,33 +369,52 @@ nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
 	return freed;
 }
 
-/*
- * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
+/**
+ * nfsd_cache_csum - Checksum incoming NFS Call arguments
+ * @buf: buffer containing a whole RPC Call message
+ * @start: starting byte of the NFS Call header
+ * @remaining: size of the NFS Call header, in bytes
+ *
+ * Compute a weak checksum of the leading bytes of an NFS procedure
+ * call header to help verify that a retransmitted Call matches an
+ * entry in the duplicate reply cache.
+ *
+ * To avoid assumptions about how the RPC message is laid out in
+ * @buf and what else it might contain (eg, a GSS MIC suffix), the
+ * caller passes us the exact location and length of the NFS Call
+ * header.
+ *
+ * Returns a 32-bit checksum value, as defined in RFC 793.
  */
-static __wsum
-nfsd_cache_csum(struct svc_rqst *rqstp)
+static __wsum nfsd_cache_csum(struct xdr_buf *buf, unsigned int start,
+			      unsigned int remaining)
 {
+	unsigned int base, len;
+	struct xdr_buf subbuf;
+	__wsum csum = 0;
+	void *p;
 	int idx;
-	unsigned int base;
-	__wsum csum;
-	struct xdr_buf *buf = &rqstp->rq_arg;
-	const unsigned char *p = buf->head[0].iov_base;
-	size_t csum_len = min_t(size_t, buf->head[0].iov_len + buf->page_len,
-				RC_CSUMLEN);
-	size_t len = min(buf->head[0].iov_len, csum_len);
+
+	if (remaining > RC_CSUMLEN)
+		remaining = RC_CSUMLEN;
+	if (xdr_buf_subsegment(buf, &subbuf, start, remaining))
+		return csum;
 
 	/* rq_arg.head first */
-	csum = csum_partial(p, len, 0);
-	csum_len -= len;
+	if (subbuf.head[0].iov_len) {
+		len = min_t(unsigned int, subbuf.head[0].iov_len, remaining);
+		csum = csum_partial(subbuf.head[0].iov_base, len, csum);
+		remaining -= len;
+	}
 
 	/* Continue into page array */
-	idx = buf->page_base / PAGE_SIZE;
-	base = buf->page_base & ~PAGE_MASK;
-	while (csum_len) {
-		p = page_address(buf->pages[idx]) + base;
-		len = min_t(size_t, PAGE_SIZE - base, csum_len);
+	idx = subbuf.page_base / PAGE_SIZE;
+	base = subbuf.page_base & ~PAGE_MASK;
+	while (remaining) {
+		p = page_address(subbuf.pages[idx]) + base;
+		len = min_t(unsigned int, PAGE_SIZE - base, remaining);
 		csum = csum_partial(p, len, csum);
-		csum_len -= len;
+		remaining -= len;
 		base = 0;
 		++idx;
 	}
@@ -466,6 +485,8 @@ out:
 /**
  * nfsd_cache_lookup - Find an entry in the duplicate reply cache
  * @rqstp: Incoming Call to find
+ * @start: starting byte in @rqstp->rq_arg of the NFS Call header
+ * @len: size of the NFS Call header, in bytes
  * @cacherep: OUT: DRC entry for this request
  *
  * Try to find an entry matching the current call in the cache. When none
@@ -479,7 +500,8 @@ out:
  *   %RC_REPLY: Reply from cache
  *   %RC_DROPIT: Do not process the request further
  */
-int nfsd_cache_lookup(struct svc_rqst *rqstp, struct nfsd_cacherep **cacherep)
+int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
+		      unsigned int len, struct nfsd_cacherep **cacherep)
 {
 	struct nfsd_net		*nn;
 	struct nfsd_cacherep	*rp, *found;
@@ -495,7 +517,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, struct nfsd_cacherep **cacherep)
 		goto out;
 	}
 
-	csum = nfsd_cache_csum(rqstp);
+	csum = nfsd_cache_csum(&rqstp->rq_arg, start, len);
 
 	/*
 	 * Since the common case is a cache miss followed by an insert,
@@ -641,24 +663,17 @@ void nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp,
 	return;
 }
 
-/*
- * Copy cached reply to current reply buffer. Should always fit.
- * FIXME as reply is in a page, we should just attach the page, and
- * keep a refcount....
- */
 static int
 nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
 {
-	struct kvec	*vec = &rqstp->rq_res.head[0];
-
-	if (vec->iov_len + data->iov_len > PAGE_SIZE) {
-		printk(KERN_WARNING "nfsd: cached reply too large (%zd).\n",
-				data->iov_len);
-		return 0;
-	}
-	memcpy((char*)vec->iov_base + vec->iov_len, data->iov_base, data->iov_len);
-	vec->iov_len += data->iov_len;
-	return 1;
+	__be32 *p;
+
+	p = xdr_reserve_space(&rqstp->rq_res_stream, data->iov_len);
+	if (unlikely(!p))
+		return false;
+	memcpy(p, data->iov_base, data->iov_len);
+	xdr_commit_encode(&rqstp->rq_res_stream);
+	return true;
 }
 
 /*
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index d6122bb2d167..fe61d9bbcc1f 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -981,6 +981,8 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
 	const struct svc_procedure *proc = rqstp->rq_procinfo;
 	__be32 *statp = rqstp->rq_accept_statp;
 	struct nfsd_cacherep *rp;
+	unsigned int start, len;
+	__be32 *nfs_reply;
 
 	/*
 	 * Give the xdr decoder a chance to change this if it wants
@@ -988,6 +990,13 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
 	 */
 	rqstp->rq_cachetype = proc->pc_cachetype;
 
+	/*
+	 * ->pc_decode advances the argument stream past the NFS
+	 * Call header, so grab the header's starting location and
+	 * size now for the call to nfsd_cache_lookup().
+	 */
+	start = xdr_stream_pos(&rqstp->rq_arg_stream);
+	len = xdr_stream_remaining(&rqstp->rq_arg_stream);
 	if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
 		goto out_decode_err;
 
@@ -1001,7 +1010,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
 	smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter | 1);
 
 	rp = NULL;
-	switch (nfsd_cache_lookup(rqstp, &rp)) {
+	switch (nfsd_cache_lookup(rqstp, start, len, &rp)) {
 	case RC_DOIT:
 		break;
 	case RC_REPLY:
@@ -1010,6 +1019,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
 		goto out_dropit;
 	}
 
+	nfs_reply = xdr_inline_decode(&rqstp->rq_res_stream, 0);
 	*statp = proc->pc_func(rqstp);
 	if (test_bit(RQ_DROPME, &rqstp->rq_flags))
 		goto out_update_drop;
@@ -1023,7 +1033,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
 	 */
 	smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1);
 
-	nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1);
+	nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, nfs_reply);
 out_cached_reply:
 	return 1;
 
diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c
index ddab9ea267d1..3fe2dde1598f 100644
--- a/fs/overlayfs/params.c
+++ b/fs/overlayfs/params.c
@@ -430,7 +430,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
 	struct ovl_fs_context *ctx = fc->fs_private;
 	struct ovl_fs_context_layer *l;
 	char *dup = NULL, *iter;
-	ssize_t nr_lower = 0, nr = 0, nr_data = 0;
+	ssize_t nr_lower, nr;
 	bool data_layer = false;
 
 	/*
@@ -482,6 +482,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
 	iter = dup;
 	l = ctx->lower;
 	for (nr = 0; nr < nr_lower; nr++, l++) {
+		ctx->nr++;
 		memset(l, 0, sizeof(*l));
 
 		err = ovl_mount_dir(iter, &l->path);
@@ -498,10 +499,10 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
 			goto out_put;
 
 		if (data_layer)
-			nr_data++;
+			ctx->nr_data++;
 
 		/* Calling strchr() again would overrun. */
-		if ((nr + 1) == nr_lower)
+		if (ctx->nr == nr_lower)
 			break;
 
 		err = -EINVAL;
@@ -511,7 +512,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
 			 * This is a regular layer so we require that
 			 * there are no data layers.
 			 */
-			if ((ctx->nr_data + nr_data) > 0) {
+			if (ctx->nr_data > 0) {
 				pr_err("regular lower layers cannot follow data lower layers");
 				goto out_put;
 			}
@@ -524,8 +525,6 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
 		data_layer = true;
 		iter++;
 	}
-	ctx->nr = nr_lower;
-	ctx->nr_data += nr_data;
 	kfree(dup);
 	return 0;
 
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 50a201e9cd39..c3f020ca13a8 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -978,7 +978,7 @@ int ovl_set_protattr(struct inode *inode, struct dentry *upper,
 	return 0;
 }
 
-/**
+/*
  * Caller must hold a reference to inode to prevent it from being freed while
  * it is marked inuse.
  */
diff --git a/fs/smb/client/cifs_spnego.c b/fs/smb/client/cifs_spnego.c
index 6f3285f1dfee..af7849e5974f 100644
--- a/fs/smb/client/cifs_spnego.c
+++ b/fs/smb/client/cifs_spnego.c
@@ -64,8 +64,8 @@ struct key_type cifs_spnego_key_type = {
  * strlen(";sec=ntlmsspi") */
 #define MAX_MECH_STR_LEN	13
 
-/* strlen of "host=" */
-#define HOST_KEY_LEN		5
+/* strlen of ";host=" */
+#define HOST_KEY_LEN		6
 
 /* strlen of ";ip4=" or ";ip6=" */
 #define IP_KEY_LEN		5
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 57c2a7df3457..f896f60c924b 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -2065,6 +2065,12 @@ void __cifs_put_smb_ses(struct cifs_ses *ses)
 		ses->chans[i].server = NULL;
 	}
 
+	/* we now account for primary channel in iface->refcount */
+	if (ses->chans[0].iface) {
+		kref_put(&ses->chans[0].iface->refcount, release_iface);
+		ses->chans[0].server = NULL;
+	}
+
 	sesInfoFree(ses);
 	cifs_put_tcp_session(server, 0);
 }
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index 0bb2ac929061..8b2d7c1ca428 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -322,28 +322,32 @@ cifs_disable_secondary_channels(struct cifs_ses *ses)
 		iface = ses->chans[i].iface;
 		server = ses->chans[i].server;
 
+		/*
+		 * remove these references first, since we need to unlock
+		 * the chan_lock here, since iface_lock is a higher lock
+		 */
+		ses->chans[i].iface = NULL;
+		ses->chans[i].server = NULL;
+		spin_unlock(&ses->chan_lock);
+
 		if (iface) {
 			spin_lock(&ses->iface_lock);
 			kref_put(&iface->refcount, release_iface);
-			ses->chans[i].iface = NULL;
 			iface->num_channels--;
 			if (iface->weight_fulfilled)
 				iface->weight_fulfilled--;
 			spin_unlock(&ses->iface_lock);
 		}
 
-		spin_unlock(&ses->chan_lock);
-		if (server && !server->terminate) {
-			server->terminate = true;
-			cifs_signal_cifsd_for_reconnect(server, false);
-		}
-		spin_lock(&ses->chan_lock);
-
 		if (server) {
-			ses->chans[i].server = NULL;
+			if (!server->terminate) {
+				server->terminate = true;
+				cifs_signal_cifsd_for_reconnect(server, false);
+			}
 			cifs_put_tcp_session(server, false);
 		}
 
+		spin_lock(&ses->chan_lock);
 	}
 
 done:
diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c
index 84ea67301303..5a3ca62d2f07 100644
--- a/fs/smb/client/smb2transport.c
+++ b/fs/smb/client/smb2transport.c
@@ -458,6 +458,8 @@ generate_smb3signingkey(struct cifs_ses *ses,
 				  ptriplet->encryption.context,
 				  ses->smb3encryptionkey,
 				  SMB3_ENC_DEC_KEY_SIZE);
+		if (rc)
+			return rc;
 		rc = generate_key(ses, ptriplet->decryption.label,
 				  ptriplet->decryption.context,
 				  ses->smb3decryptionkey,
@@ -466,9 +468,6 @@ generate_smb3signingkey(struct cifs_ses *ses,
 			return rc;
 	}
 
-	if (rc)
-		return rc;
-
 #ifdef CONFIG_CIFS_DEBUG_DUMP_KEYS
 	cifs_dbg(VFS, "%s: dumping generated AES session keys\n", __func__);
 	/*
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index ed0bc8cbc703..567fb37274d3 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -147,7 +147,7 @@ config XFS_ONLINE_SCRUB_STATS
 	bool "XFS online metadata check usage data collection"
 	default y
 	depends on XFS_ONLINE_SCRUB
-	select XFS_DEBUG
+	select DEBUG_FS
 	help
 	  If you say Y here, the kernel will gather usage data about
 	  the online metadata check subsystem.  This includes the number
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 3069194527dd..100ab5931b31 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2275,16 +2275,37 @@ xfs_alloc_min_freelist(
 
 	ASSERT(mp->m_alloc_maxlevels > 0);
 
+	/*
+	 * For a btree shorter than the maximum height, the worst case is that
+	 * every level gets split and a new level is added, then while inserting
+	 * another entry to refill the AGFL, every level under the old root gets
+	 * split again. This is:
+	 *
+	 *   (full height split reservation) + (AGFL refill split height)
+	 * = (current height + 1) + (current height - 1)
+	 * = (new height) + (new height - 2)
+	 * = 2 * new height - 2
+	 *
+	 * For a btree of maximum height, the worst case is that every level
+	 * under the root gets split, then while inserting another entry to
+	 * refill the AGFL, every level under the root gets split again. This is
+	 * also:
+	 *
+	 *   2 * (current height - 1)
+	 * = 2 * (new height - 1)
+	 * = 2 * new height - 2
+	 */
+
 	/* space needed by-bno freespace btree */
 	min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
-				       mp->m_alloc_maxlevels);
+				       mp->m_alloc_maxlevels) * 2 - 2;
 	/* space needed by-size freespace btree */
 	min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
-				       mp->m_alloc_maxlevels);
+				       mp->m_alloc_maxlevels) * 2 - 2;
 	/* space needed reverse mapping used space btree */
 	if (xfs_has_rmapbt(mp))
 		min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
-						mp->m_rmap_maxlevels);
+						mp->m_rmap_maxlevels) * 2 - 2;
 
 	return min_free;
 }
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index bcfb6a4203cd..f71679ce23b9 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -245,21 +245,18 @@ xfs_defer_create_intents(
 	return ret;
 }
 
-/* Abort all the intents that were committed. */
 STATIC void
-xfs_defer_trans_abort(
-	struct xfs_trans		*tp,
-	struct list_head		*dop_pending)
+xfs_defer_pending_abort(
+	struct xfs_mount		*mp,
+	struct list_head		*dop_list)
 {
 	struct xfs_defer_pending	*dfp;
 	const struct xfs_defer_op_type	*ops;
 
-	trace_xfs_defer_trans_abort(tp, _RET_IP_);
-
 	/* Abort intent items that don't have a done item. */
-	list_for_each_entry(dfp, dop_pending, dfp_list) {
+	list_for_each_entry(dfp, dop_list, dfp_list) {
 		ops = defer_op_types[dfp->dfp_type];
-		trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
+		trace_xfs_defer_pending_abort(mp, dfp);
 		if (dfp->dfp_intent && !dfp->dfp_done) {
 			ops->abort_intent(dfp->dfp_intent);
 			dfp->dfp_intent = NULL;
@@ -267,6 +264,16 @@ xfs_defer_trans_abort(
 	}
 }
 
+/* Abort all the intents that were committed. */
+STATIC void
+xfs_defer_trans_abort(
+	struct xfs_trans		*tp,
+	struct list_head		*dop_pending)
+{
+	trace_xfs_defer_trans_abort(tp, _RET_IP_);
+	xfs_defer_pending_abort(tp->t_mountp, dop_pending);
+}
+
 /*
  * Capture resources that the caller said not to release ("held") when the
  * transaction commits.  Caller is responsible for zero-initializing @dres.
@@ -756,12 +763,13 @@ xfs_defer_ops_capture(
 
 /* Release all resources that we used to capture deferred ops. */
 void
-xfs_defer_ops_capture_free(
+xfs_defer_ops_capture_abort(
 	struct xfs_mount		*mp,
 	struct xfs_defer_capture	*dfc)
 {
 	unsigned short			i;
 
+	xfs_defer_pending_abort(mp, &dfc->dfc_dfops);
 	xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
 
 	for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
@@ -802,7 +810,7 @@ xfs_defer_ops_capture_and_commit(
 	/* Commit the transaction and add the capture structure to the list. */
 	error = xfs_trans_commit(tp);
 	if (error) {
-		xfs_defer_ops_capture_free(mp, dfc);
+		xfs_defer_ops_capture_abort(mp, dfc);
 		return error;
 	}
 
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 114a3a4930a3..8788ad5f6a73 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -121,7 +121,7 @@ int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp,
 		struct list_head *capture_list);
 void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp,
 		struct xfs_defer_resources *dres);
-void xfs_defer_ops_capture_free(struct xfs_mount *mp,
+void xfs_defer_ops_capture_abort(struct xfs_mount *mp,
 		struct xfs_defer_capture *d);
 void xfs_defer_resources_rele(struct xfs_defer_resources *dres);
 
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 543f3748c2a3..137a65bda95d 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -510,6 +510,9 @@ xfs_dinode_verify(
 	if (mode && nextents + naextents > nblocks)
 		return __this_address;
 
+	if (nextents + naextents == 0 && nblocks != 0)
+		return __this_address;
+
 	if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents)
 		return __this_address;
 
diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
index 0e5dba2343ea..144198a6b270 100644
--- a/fs/xfs/xfs_inode_item_recover.c
+++ b/fs/xfs/xfs_inode_item_recover.c
@@ -286,6 +286,7 @@ xlog_recover_inode_commit_pass2(
 	struct xfs_log_dinode		*ldip;
 	uint				isize;
 	int				need_free = 0;
+	xfs_failaddr_t			fa;
 
 	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
 		in_f = item->ri_buf[0].i_addr;
@@ -369,24 +370,26 @@ xlog_recover_inode_commit_pass2(
 	 * superblock flag to determine whether we need to look at di_flushiter
 	 * to skip replay when the on disk inode is newer than the log one
 	 */
-	if (!xfs_has_v3inodes(mp) &&
-	    ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
-		/*
-		 * Deal with the wrap case, DI_MAX_FLUSH is less
-		 * than smaller numbers
-		 */
-		if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
-		    ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
-			/* do nothing */
-		} else {
-			trace_xfs_log_recover_inode_skip(log, in_f);
-			error = 0;
-			goto out_release;
+	if (!xfs_has_v3inodes(mp)) {
+		if (ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
+			/*
+			 * Deal with the wrap case, DI_MAX_FLUSH is less
+			 * than smaller numbers
+			 */
+			if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
+			    ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
+				/* do nothing */
+			} else {
+				trace_xfs_log_recover_inode_skip(log, in_f);
+				error = 0;
+				goto out_release;
+			}
 		}
+
+		/* Take the opportunity to reset the flush iteration count */
+		ldip->di_flushiter = 0;
 	}
 
-	/* Take the opportunity to reset the flush iteration count */
-	ldip->di_flushiter = 0;
 
 	if (unlikely(S_ISREG(ldip->di_mode))) {
 		if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
@@ -528,8 +531,19 @@ out_owner_change:
 	    (dip->di_mode != 0))
 		error = xfs_recover_inode_owner_change(mp, dip, in_f,
 						       buffer_list);
-	/* re-generate the checksum. */
+	/* re-generate the checksum and validate the recovered inode. */
 	xfs_dinode_calc_crc(log->l_mp, dip);
+	fa = xfs_dinode_verify(log->l_mp, in_f->ilf_ino, dip);
+	if (fa) {
+		XFS_CORRUPTION_ERROR(
+			"Bad dinode after recovery",
+				XFS_ERRLEVEL_LOW, mp, dip, sizeof(*dip));
+		xfs_alert(mp,
+			"Metadata corruption detected at %pS, inode 0x%llx",
+			fa, in_f->ilf_ino);
+		error = -EFSCORRUPTED;
+		goto out_release;
+	}
 
 	ASSERT(bp->b_mount == mp);
 	bp->b_flags |= _XBF_LOGRECOVERY;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 51c100c86177..ee206facf0dc 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1893,9 +1893,7 @@ xlog_write_iclog(
 		 * the buffer manually, the code needs to be kept in sync
 		 * with the I/O completion path.
 		 */
-		xlog_state_done_syncing(iclog);
-		up(&iclog->ic_sema);
-		return;
+		goto sync;
 	}
 
 	/*
@@ -1925,20 +1923,17 @@ xlog_write_iclog(
 		 * avoid shutdown re-entering this path and erroring out again.
 		 */
 		if (log->l_targ != log->l_mp->m_ddev_targp &&
-		    blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) {
-			xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
-			return;
-		}
+		    blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev))
+			goto shutdown;
 	}
 	if (iclog->ic_flags & XLOG_ICL_NEED_FUA)
 		iclog->ic_bio.bi_opf |= REQ_FUA;
 
 	iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
 
-	if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) {
-		xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
-		return;
-	}
+	if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count))
+		goto shutdown;
+
 	if (is_vmalloc_addr(iclog->ic_data))
 		flush_kernel_vmap_range(iclog->ic_data, count);
 
@@ -1959,6 +1954,12 @@ xlog_write_iclog(
 	}
 
 	submit_bio(&iclog->ic_bio);
+	return;
+shutdown:
+	xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
+sync:
+	xlog_state_done_syncing(iclog);
+	up(&iclog->ic_sema);
 }
 
 /*
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 13b94d2e605b..a1e18b24971a 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2511,7 +2511,7 @@ xlog_abort_defer_ops(
 
 	list_for_each_entry_safe(dfc, next, capture_list, dfc_list) {
 		list_del_init(&dfc->dfc_list);
-		xfs_defer_ops_capture_free(mp, dfc);
+		xfs_defer_ops_capture_abort(mp, dfc);
 	}
 }
 
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 658edee8381d..e5b62dc28466 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -784,6 +784,7 @@ xfs_reflink_end_cow_extent(
 		}
 	}
 	del = got;
+	xfs_trim_extent(&del, *offset_fsb, end_fsb - *offset_fsb);
 
 	/* Grab the corresponding mapping in the data fork. */
 	nmaps = 1;
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 995513fa2690..0655aa5b57b2 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -70,7 +70,7 @@ static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
  */
 static __always_inline int queued_spin_value_unlocked(struct qspinlock lock)
 {
-	return !atomic_read(&lock.val);
+	return !lock.val.counter;
 }
 
 /**
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index d305db70674b..efc0c0b07efb 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -195,6 +195,7 @@ enum cpuhp_state {
 	CPUHP_AP_ARM_CORESIGHT_CTI_STARTING,
 	CPUHP_AP_ARM64_ISNDEP_STARTING,
 	CPUHP_AP_SMPCFD_DYING,
+	CPUHP_AP_HRTIMERS_DYING,
 	CPUHP_AP_X86_TBOOT_DYING,
 	CPUHP_AP_ARM_CACHE_B15_RAC_DYING,
 	CPUHP_AP_ONLINE,
diff --git a/include/linux/export-internal.h b/include/linux/export-internal.h
index 45fca09b2319..69501e0ec239 100644
--- a/include/linux/export-internal.h
+++ b/include/linux/export-internal.h
@@ -50,9 +50,7 @@
 	    "	.previous"						"\n"	\
 	)
 
-#ifdef CONFIG_IA64
-#define KSYM_FUNC(name)		@fptr(name)
-#elif defined(CONFIG_PARISC) && defined(CONFIG_64BIT)
+#if defined(CONFIG_PARISC) && defined(CONFIG_64BIT)
 #define KSYM_FUNC(name)		P%name
 #else
 #define KSYM_FUNC(name)		name
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 0ee140176f10..f2044d5a652b 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -531,9 +531,9 @@ extern void sysrq_timer_list_show(void);
 
 int hrtimers_prepare_cpu(unsigned int cpu);
 #ifdef CONFIG_HOTPLUG_CPU
-int hrtimers_dead_cpu(unsigned int cpu);
+int hrtimers_cpu_dying(unsigned int cpu);
 #else
-#define hrtimers_dead_cpu	NULL
+#define hrtimers_cpu_dying	NULL
 #endif
 
 #endif
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index afb028c54f33..5547ba68e6e4 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -843,11 +843,11 @@ struct perf_event {
 };
 
 /*
- *           ,-----------------------[1:n]----------------------.
- *           V                                                  V
- * perf_event_context <-[1:n]-> perf_event_pmu_context <--- perf_event
- *           ^                      ^     |                     |
- *           `--------[1:n]---------'     `-[n:1]-> pmu <-[1:n]-'
+ *           ,-----------------------[1:n]------------------------.
+ *           V                                                    V
+ * perf_event_context <-[1:n]-> perf_event_pmu_context <-[1:n]- perf_event
+ *                                        |                       |
+ *                                        `--[n:1]-> pmu <-[1:n]--'
  *
  *
  * struct perf_event_pmu_context  lifetime is refcount based and RCU freed
@@ -865,6 +865,9 @@ struct perf_event {
  * ctx->mutex pinning the configuration. Since we hold a reference on
  * group_leader (through the filedesc) it can't go away, therefore it's
  * associated pmu_ctx must exist and cannot change due to ctx->mutex.
+ *
+ * perf_event holds a refcount on perf_event_context
+ * perf_event holds a refcount on perf_event_pmu_context
  */
 struct perf_event_pmu_context {
 	struct pmu			*pmu;
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
index f04a43044d91..976e9500f651 100644
--- a/io_uring/fdinfo.c
+++ b/io_uring/fdinfo.c
@@ -145,13 +145,8 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
 	if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) {
 		struct io_sq_data *sq = ctx->sq_data;
 
-		if (mutex_trylock(&sq->lock)) {
-			if (sq->thread) {
-				sq_pid = task_pid_nr(sq->thread);
-				sq_cpu = task_cpu(sq->thread);
-			}
-			mutex_unlock(&sq->lock);
-		}
+		sq_pid = sq->task_pid;
+		sq_cpu = sq->sq_cpu;
 	}
 
 	seq_printf(m, "SqThread:\t%d\n", sq_pid);
diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
index bd6c2c7959a5..65b5dbe3c850 100644
--- a/io_uring/sqpoll.c
+++ b/io_uring/sqpoll.c
@@ -214,6 +214,7 @@ static bool io_sqd_handle_event(struct io_sq_data *sqd)
 			did_sig = get_signal(&ksig);
 		cond_resched();
 		mutex_lock(&sqd->lock);
+		sqd->sq_cpu = raw_smp_processor_id();
 	}
 	return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
 }
@@ -229,10 +230,15 @@ static int io_sq_thread(void *data)
 	snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid);
 	set_task_comm(current, buf);
 
-	if (sqd->sq_cpu != -1)
+	/* reset to our pid after we've set task_comm, for fdinfo */
+	sqd->task_pid = current->pid;
+
+	if (sqd->sq_cpu != -1) {
 		set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu));
-	else
+	} else {
 		set_cpus_allowed_ptr(current, cpu_online_mask);
+		sqd->sq_cpu = raw_smp_processor_id();
+	}
 
 	mutex_lock(&sqd->lock);
 	while (1) {
@@ -261,6 +267,7 @@ static int io_sq_thread(void *data)
 				mutex_unlock(&sqd->lock);
 				cond_resched();
 				mutex_lock(&sqd->lock);
+				sqd->sq_cpu = raw_smp_processor_id();
 			}
 			continue;
 		}
@@ -294,6 +301,7 @@ static int io_sq_thread(void *data)
 				mutex_unlock(&sqd->lock);
 				schedule();
 				mutex_lock(&sqd->lock);
+				sqd->sq_cpu = raw_smp_processor_id();
 			}
 			list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
 				atomic_andnot(IORING_SQ_NEED_WAKEUP,
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 91e82e34b51e..7a98cd176a12 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -531,7 +531,7 @@ int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark)
 	if (tsk != current)
 		return 0;
 
-	if (WARN_ON_ONCE(!current->mm))
+	if (!current->mm)
 		return 0;
 	exe_file = get_mm_exe_file(current->mm);
 	if (!exe_file)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 1d5b9de3b1b9..4b9ff41ca603 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3885,14 +3885,6 @@ static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
 	return psi_trigger_poll(&ctx->psi.trigger, of->file, pt);
 }
 
-static int cgroup_pressure_open(struct kernfs_open_file *of)
-{
-	if (of->file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE))
-		return -EPERM;
-
-	return 0;
-}
-
 static void cgroup_pressure_release(struct kernfs_open_file *of)
 {
 	struct cgroup_file_ctx *ctx = of->priv;
@@ -5299,7 +5291,6 @@ static struct cftype cgroup_psi_files[] = {
 	{
 		.name = "io.pressure",
 		.file_offset = offsetof(struct cgroup, psi_files[PSI_IO]),
-		.open = cgroup_pressure_open,
 		.seq_show = cgroup_io_pressure_show,
 		.write = cgroup_io_pressure_write,
 		.poll = cgroup_pressure_poll,
@@ -5308,7 +5299,6 @@ static struct cftype cgroup_psi_files[] = {
 	{
 		.name = "memory.pressure",
 		.file_offset = offsetof(struct cgroup, psi_files[PSI_MEM]),
-		.open = cgroup_pressure_open,
 		.seq_show = cgroup_memory_pressure_show,
 		.write = cgroup_memory_pressure_write,
 		.poll = cgroup_pressure_poll,
@@ -5317,7 +5307,6 @@ static struct cftype cgroup_psi_files[] = {
 	{
 		.name = "cpu.pressure",
 		.file_offset = offsetof(struct cgroup, psi_files[PSI_CPU]),
-		.open = cgroup_pressure_open,
 		.seq_show = cgroup_cpu_pressure_show,
 		.write = cgroup_cpu_pressure_write,
 		.poll = cgroup_pressure_poll,
@@ -5327,7 +5316,6 @@ static struct cftype cgroup_psi_files[] = {
 	{
 		.name = "irq.pressure",
 		.file_offset = offsetof(struct cgroup, psi_files[PSI_IRQ]),
-		.open = cgroup_pressure_open,
 		.seq_show = cgroup_irq_pressure_show,
 		.write = cgroup_irq_pressure_write,
 		.poll = cgroup_pressure_poll,
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 9e4c6780adde..a86972a91991 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2113,7 +2113,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
 	[CPUHP_HRTIMERS_PREPARE] = {
 		.name			= "hrtimers:prepare",
 		.startup.single		= hrtimers_prepare_cpu,
-		.teardown.single	= hrtimers_dead_cpu,
+		.teardown.single	= NULL,
 	},
 	[CPUHP_SMPCFD_PREPARE] = {
 		.name			= "smpcfd:prepare",
@@ -2205,6 +2205,12 @@ static struct cpuhp_step cpuhp_hp_states[] = {
 		.startup.single		= NULL,
 		.teardown.single	= smpcfd_dying_cpu,
 	},
+	[CPUHP_AP_HRTIMERS_DYING] = {
+		.name			= "hrtimers:dying",
+		.startup.single		= NULL,
+		.teardown.single	= hrtimers_cpu_dying,
+	},
+
 	/* Entry state on starting. Interrupts enabled from here on. Transient
 	 * state for synchronsization */
 	[CPUHP_AP_ONLINE] = {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 683dc086ef10..b704d83a28b2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4828,6 +4828,11 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
 	void *task_ctx_data = NULL;
 
 	if (!ctx->task) {
+		/*
+		 * perf_pmu_migrate_context() / __perf_pmu_install_event()
+		 * relies on the fact that find_get_pmu_context() cannot fail
+		 * for CPU contexts.
+		 */
 		struct perf_cpu_pmu_context *cpc;
 
 		cpc = per_cpu_ptr(pmu->cpu_pmu_context, event->cpu);
@@ -12889,6 +12894,9 @@ static void __perf_pmu_install_event(struct pmu *pmu,
 				     int cpu, struct perf_event *event)
 {
 	struct perf_event_pmu_context *epc;
+	struct perf_event_context *old_ctx = event->ctx;
+
+	get_ctx(ctx); /* normally find_get_context() */
 
 	event->cpu = cpu;
 	epc = find_get_pmu_context(pmu, ctx, event);
@@ -12897,6 +12905,11 @@ static void __perf_pmu_install_event(struct pmu *pmu,
 	if (event->state >= PERF_EVENT_STATE_OFF)
 		event->state = PERF_EVENT_STATE_INACTIVE;
 	perf_install_in_context(ctx, event, cpu);
+
+	/*
+	 * Now that event->ctx is updated and visible, put the old ctx.
+	 */
+	put_ctx(old_ctx);
 }
 
 static void __perf_pmu_install(struct perf_event_context *ctx,
@@ -12935,6 +12948,10 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
 	struct perf_event_context *src_ctx, *dst_ctx;
 	LIST_HEAD(events);
 
+	/*
+	 * Since per-cpu context is persistent, no need to grab an extra
+	 * reference.
+	 */
 	src_ctx = &per_cpu_ptr(&perf_cpu_context, src_cpu)->ctx;
 	dst_ctx = &per_cpu_ptr(&perf_cpu_context, dst_cpu)->ctx;
 
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index 52695c59d041..dad981a865b8 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -700,7 +700,8 @@ retry:
 	owner = uval & FUTEX_TID_MASK;
 
 	if (pending_op && !pi && !owner) {
-		futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
+		futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, 1,
+			   FUTEX_BITSET_MATCH_ANY);
 		return 0;
 	}
 
@@ -752,8 +753,10 @@ retry:
 	 * Wake robust non-PI futexes here. The wakeup of
 	 * PI futexes happens in exit_pi_state():
 	 */
-	if (!pi && (uval & FUTEX_WAITERS))
-		futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
+	if (!pi && (uval & FUTEX_WAITERS)) {
+		futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, 1,
+			   FUTEX_BITSET_MATCH_ANY);
+	}
 
 	return 0;
 }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2048138ce54b..d7a3c63a2171 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3666,41 +3666,140 @@ static inline void
 dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
 #endif
 
+static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
+			   unsigned long weight)
+{
+	unsigned long old_weight = se->load.weight;
+	u64 avruntime = avg_vruntime(cfs_rq);
+	s64 vlag, vslice;
+
+	/*
+	 * VRUNTIME
+	 * ========
+	 *
+	 * COROLLARY #1: The virtual runtime of the entity needs to be
+	 * adjusted if re-weight at !0-lag point.
+	 *
+	 * Proof: For contradiction assume this is not true, so we can
+	 * re-weight without changing vruntime at !0-lag point.
+	 *
+	 *             Weight	VRuntime   Avg-VRuntime
+	 *     before    w          v            V
+	 *      after    w'         v'           V'
+	 *
+	 * Since lag needs to be preserved through re-weight:
+	 *
+	 *	lag = (V - v)*w = (V'- v')*w', where v = v'
+	 *	==>	V' = (V - v)*w/w' + v		(1)
+	 *
+	 * Let W be the total weight of the entities before reweight,
+	 * since V' is the new weighted average of entities:
+	 *
+	 *	V' = (WV + w'v - wv) / (W + w' - w)	(2)
+	 *
+	 * by using (1) & (2) we obtain:
+	 *
+	 *	(WV + w'v - wv) / (W + w' - w) = (V - v)*w/w' + v
+	 *	==> (WV-Wv+Wv+w'v-wv)/(W+w'-w) = (V - v)*w/w' + v
+	 *	==> (WV - Wv)/(W + w' - w) + v = (V - v)*w/w' + v
+	 *	==>	(V - v)*W/(W + w' - w) = (V - v)*w/w' (3)
+	 *
+	 * Since we are doing at !0-lag point which means V != v, we
+	 * can simplify (3):
+	 *
+	 *	==>	W / (W + w' - w) = w / w'
+	 *	==>	Ww' = Ww + ww' - ww
+	 *	==>	W * (w' - w) = w * (w' - w)
+	 *	==>	W = w	(re-weight indicates w' != w)
+	 *
+	 * So the cfs_rq contains only one entity, hence vruntime of
+	 * the entity @v should always equal to the cfs_rq's weighted
+	 * average vruntime @V, which means we will always re-weight
+	 * at 0-lag point, thus breach assumption. Proof completed.
+	 *
+	 *
+	 * COROLLARY #2: Re-weight does NOT affect weighted average
+	 * vruntime of all the entities.
+	 *
+	 * Proof: According to corollary #1, Eq. (1) should be:
+	 *
+	 *	(V - v)*w = (V' - v')*w'
+	 *	==>    v' = V' - (V - v)*w/w'		(4)
+	 *
+	 * According to the weighted average formula, we have:
+	 *
+	 *	V' = (WV - wv + w'v') / (W - w + w')
+	 *	   = (WV - wv + w'(V' - (V - v)w/w')) / (W - w + w')
+	 *	   = (WV - wv + w'V' - Vw + wv) / (W - w + w')
+	 *	   = (WV + w'V' - Vw) / (W - w + w')
+	 *
+	 *	==>  V'*(W - w + w') = WV + w'V' - Vw
+	 *	==>	V' * (W - w) = (W - w) * V	(5)
+	 *
+	 * If the entity is the only one in the cfs_rq, then reweight
+	 * always occurs at 0-lag point, so V won't change. Or else
+	 * there are other entities, hence W != w, then Eq. (5) turns
+	 * into V' = V. So V won't change in either case, proof done.
+	 *
+	 *
+	 * So according to corollary #1 & #2, the effect of re-weight
+	 * on vruntime should be:
+	 *
+	 *	v' = V' - (V - v) * w / w'		(4)
+	 *	   = V  - (V - v) * w / w'
+	 *	   = V  - vl * w / w'
+	 *	   = V  - vl'
+	 */
+	if (avruntime != se->vruntime) {
+		vlag = (s64)(avruntime - se->vruntime);
+		vlag = div_s64(vlag * old_weight, weight);
+		se->vruntime = avruntime - vlag;
+	}
+
+	/*
+	 * DEADLINE
+	 * ========
+	 *
+	 * When the weight changes, the virtual time slope changes and
+	 * we should adjust the relative virtual deadline accordingly.
+	 *
+	 *	d' = v' + (d - v)*w/w'
+	 *	   = V' - (V - v)*w/w' + (d - v)*w/w'
+	 *	   = V  - (V - v)*w/w' + (d - v)*w/w'
+	 *	   = V  + (d - V)*w/w'
+	 */
+	vslice = (s64)(se->deadline - avruntime);
+	vslice = div_s64(vslice * old_weight, weight);
+	se->deadline = avruntime + vslice;
+}
+
 static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 			    unsigned long weight)
 {
-	unsigned long old_weight = se->load.weight;
+	bool curr = cfs_rq->curr == se;
 
 	if (se->on_rq) {
 		/* commit outstanding execution time */
-		if (cfs_rq->curr == se)
+		if (curr)
 			update_curr(cfs_rq);
 		else
-			avg_vruntime_sub(cfs_rq, se);
+			__dequeue_entity(cfs_rq, se);
 		update_load_sub(&cfs_rq->load, se->load.weight);
 	}
 	dequeue_load_avg(cfs_rq, se);
 
-	update_load_set(&se->load, weight);
-
 	if (!se->on_rq) {
 		/*
 		 * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
 		 * we need to scale se->vlag when w_i changes.
 		 */
-		se->vlag = div_s64(se->vlag * old_weight, weight);
+		se->vlag = div_s64(se->vlag * se->load.weight, weight);
 	} else {
-		s64 deadline = se->deadline - se->vruntime;
-		/*
-		 * When the weight changes, the virtual time slope changes and
-		 * we should adjust the relative virtual deadline accordingly.
-		 */
-		deadline = div_s64(deadline * old_weight, weight);
-		se->deadline = se->vruntime + deadline;
-		if (se != cfs_rq->curr)
-			min_deadline_cb_propagate(&se->run_node, NULL);
+		reweight_eevdf(cfs_rq, se, weight);
 	}
 
+	update_load_set(&se->load, weight);
+
 #ifdef CONFIG_SMP
 	do {
 		u32 divider = get_pelt_divider(&se->avg);
@@ -3712,8 +3811,17 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 	enqueue_load_avg(cfs_rq, se);
 	if (se->on_rq) {
 		update_load_add(&cfs_rq->load, se->load.weight);
-		if (cfs_rq->curr != se)
-			avg_vruntime_add(cfs_rq, se);
+		if (!curr) {
+			/*
+			 * The entity's vruntime has been adjusted, so let's check
+			 * whether the rq-wide min_vruntime needs updated too. Since
+			 * the calculations above require stable min_vruntime rather
+			 * than up-to-date one, we do the update at the end of the
+			 * reweight process.
+			 */
+			__enqueue_entity(cfs_rq, se);
+			update_min_vruntime(cfs_rq);
+		}
 	}
 }
 
@@ -3857,14 +3965,11 @@ static void update_cfs_group(struct sched_entity *se)
 
 #ifndef CONFIG_SMP
 	shares = READ_ONCE(gcfs_rq->tg->shares);
-
-	if (likely(se->load.weight == shares))
-		return;
 #else
-	shares   = calc_group_shares(gcfs_rq);
+	shares = calc_group_shares(gcfs_rq);
 #endif
-
-	reweight_entity(cfs_rq_of(se), se, shares);
+	if (unlikely(se->load.weight != shares))
+		reweight_entity(cfs_rq_of(se), se, shares);
 }
 
 #else /* CONFIG_FAIR_GROUP_SCHED */
@@ -11079,12 +11184,16 @@ static int should_we_balance(struct lb_env *env)
 			continue;
 		}
 
-		/* Are we the first idle CPU? */
+		/*
+		 * Are we the first idle core in a non-SMT domain or higher,
+		 * or the first idle CPU in a SMT domain?
+		 */
 		return cpu == env->dst_cpu;
 	}
 
-	if (idle_smt == env->dst_cpu)
-		return true;
+	/* Are we the first idle CPU with busy siblings? */
+	if (idle_smt != -1)
+		return idle_smt == env->dst_cpu;
 
 	/* Are we the first CPU of this group ? */
 	return group_balance_cpu(sg) == env->dst_cpu;
diff --git a/kernel/sys.c b/kernel/sys.c
index 420d9cb9cc8e..e219fcfa112d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2394,6 +2394,10 @@ static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3,
 	if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN))
 		return -EINVAL;
 
+	/* PARISC cannot allow mdwe as it needs writable stacks */
+	if (IS_ENABLED(CONFIG_PARISC))
+		return -EINVAL;
+
 	current_bits = get_current_mdwe();
 	if (current_bits && current_bits != bits)
 		return -EPERM; /* Cannot unset the flags */
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 238262e4aba7..760793998cdd 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -2219,29 +2219,22 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
 	}
 }
 
-int hrtimers_dead_cpu(unsigned int scpu)
+int hrtimers_cpu_dying(unsigned int dying_cpu)
 {
 	struct hrtimer_cpu_base *old_base, *new_base;
-	int i;
+	int i, ncpu = cpumask_first(cpu_active_mask);
 
-	BUG_ON(cpu_online(scpu));
-	tick_cancel_sched_timer(scpu);
+	tick_cancel_sched_timer(dying_cpu);
+
+	old_base = this_cpu_ptr(&hrtimer_bases);
+	new_base = &per_cpu(hrtimer_bases, ncpu);
 
-	/*
-	 * this BH disable ensures that raise_softirq_irqoff() does
-	 * not wakeup ksoftirqd (and acquire the pi-lock) while
-	 * holding the cpu_base lock
-	 */
-	local_bh_disable();
-	local_irq_disable();
-	old_base = &per_cpu(hrtimer_bases, scpu);
-	new_base = this_cpu_ptr(&hrtimer_bases);
 	/*
 	 * The caller is globally serialized and nobody else
 	 * takes two locks at once, deadlock is not possible.
 	 */
-	raw_spin_lock(&new_base->lock);
-	raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
+	raw_spin_lock(&old_base->lock);
+	raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING);
 
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
 		migrate_hrtimer_list(&old_base->clock_base[i],
@@ -2252,15 +2245,13 @@ int hrtimers_dead_cpu(unsigned int scpu)
 	 * The migration might have changed the first expiring softirq
 	 * timer on this CPU. Update it.
 	 */
-	hrtimer_update_softirq_timer(new_base, false);
+	__hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT);
+	/* Tell the other CPU to retrigger the next event */
+	smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);
 
-	raw_spin_unlock(&old_base->lock);
 	raw_spin_unlock(&new_base->lock);
+	raw_spin_unlock(&old_base->lock);
 
-	/* Check, if we got expired work to do */
-	__hrtimer_peek_ahead_timers();
-	local_irq_enable();
-	local_bh_enable();
 	return 0;
 }
 
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 630077d95dc6..6262d55904e7 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -924,7 +924,7 @@ static bool __damos_filter_out(struct damon_ctx *ctx, struct damon_target *t,
 		matched = true;
 		break;
 	default:
-		break;
+		return false;
 	}
 
 	return matched == filter->matching;
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
index 45bd0fd4a8b1..be667236b8e6 100644
--- a/mm/damon/sysfs-schemes.c
+++ b/mm/damon/sysfs-schemes.c
@@ -162,6 +162,9 @@ damon_sysfs_scheme_regions_alloc(void)
 	struct damon_sysfs_scheme_regions *regions = kmalloc(sizeof(*regions),
 			GFP_KERNEL);
 
+	if (!regions)
+		return NULL;
+
 	regions->kobj = (struct kobject){};
 	INIT_LIST_HEAD(&regions->regions_list);
 	regions->nr_regions = 0;
@@ -1823,6 +1826,8 @@ static int damon_sysfs_before_damos_apply(struct damon_ctx *ctx,
 		return 0;
 
 	region = damon_sysfs_scheme_region_alloc(r);
+	if (!region)
+		return 0;
 	list_add_tail(&region->list, &sysfs_regions->regions_list);
 	sysfs_regions->nr_regions++;
 	if (kobject_init_and_add(&region->kobj,
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index e27846708b5a..7472404456aa 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -1172,7 +1172,7 @@ static int damon_sysfs_update_target(struct damon_target *target,
 		struct damon_ctx *ctx,
 		struct damon_sysfs_target *sys_target)
 {
-	int err;
+	int err = 0;
 
 	if (damon_target_has_pid(ctx)) {
 		err = damon_sysfs_update_target_pid(target, sys_target->pid);
@@ -1203,8 +1203,10 @@ static int damon_sysfs_set_targets(struct damon_ctx *ctx,
 
 	damon_for_each_target_safe(t, next, ctx) {
 		if (i < sysfs_targets->nr) {
-			damon_sysfs_update_target(t, ctx,
+			err = damon_sysfs_update_target(t, ctx,
 					sysfs_targets->targets_arr[i]);
+			if (err)
+				return err;
 		} else {
 			if (damon_target_has_pid(ctx))
 				put_pid(t->pid);
diff --git a/mm/filemap.c b/mm/filemap.c
index 9710f43a89ac..32eedf3afd45 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3443,7 +3443,7 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
 		 * handled in the specific fault path, and it'll prohibit the
 		 * fault-around logic.
 		 */
-		if (!pte_none(vmf->pte[count]))
+		if (!pte_none(ptep_get(&vmf->pte[count])))
 			goto skip;
 
 		count++;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f31f02472396..4f542444a91f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2769,13 +2769,15 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 			int nr = folio_nr_pages(folio);
 
 			xas_split(&xas, folio, folio_order(folio));
-			if (folio_test_swapbacked(folio)) {
-				__lruvec_stat_mod_folio(folio, NR_SHMEM_THPS,
-							-nr);
-			} else {
-				__lruvec_stat_mod_folio(folio, NR_FILE_THPS,
-							-nr);
-				filemap_nr_thps_dec(mapping);
+			if (folio_test_pmd_mappable(folio)) {
+				if (folio_test_swapbacked(folio)) {
+					__lruvec_stat_mod_folio(folio,
+							NR_SHMEM_THPS, -nr);
+				} else {
+					__lruvec_stat_mod_folio(folio,
+							NR_FILE_THPS, -nr);
+					filemap_nr_thps_dec(mapping);
+				}
 			}
 		}
 
diff --git a/mm/ksm.c b/mm/ksm.c
index 7efcc68ccc6e..6a831009b4cb 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -468,7 +468,7 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex
 			page = pfn_swap_entry_to_page(entry);
 	}
 	/* return 1 if the page is an normal ksm page or KSM-placed zero page */
-	ret = (page && PageKsm(page)) || is_ksm_zero_pte(*pte);
+	ret = (page && PageKsm(page)) || is_ksm_zero_pte(ptent);
 	pte_unmap_unlock(pte, ptl);
 	return ret;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 774bd6e21e27..1c1061df9cd1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2936,7 +2936,8 @@ void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg)
  * Moreover, it should not come from DMA buffer and is not readily
  * reclaimable. So those GFP bits should be masked off.
  */
-#define OBJCGS_CLEAR_MASK	(__GFP_DMA | __GFP_RECLAIMABLE | __GFP_ACCOUNT)
+#define OBJCGS_CLEAR_MASK	(__GFP_DMA | __GFP_RECLAIMABLE | \
+				 __GFP_ACCOUNT | __GFP_NOFAIL)
 
 /*
  * mod_objcg_mlstate() may be called with irq enabled, so
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 96d9eae5c7cc..0b6ca553bebe 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -312,7 +312,7 @@ static int mfill_atomic_pte_poison(pmd_t *dst_pmd,
 
 	ret = -EEXIST;
 	/* Refuse to overwrite any PTE, even a PTE marker (e.g. UFFD WP). */
-	if (!pte_none(*dst_pte))
+	if (!pte_none(ptep_get(dst_pte)))
 		goto out_unlock;
 
 	set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
diff --git a/mm/util.c b/mm/util.c
index aa01f6ea5a75..744b4d7e3fae 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -414,6 +414,15 @@ static int mmap_is_legacy(struct rlimit *rlim_stack)
 
 static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
 {
+#ifdef CONFIG_STACK_GROWSUP
+	/*
+	 * For an upwards growing stack the calculation is much simpler.
+	 * Memory for the maximum stack size is reserved at the top of the
+	 * task. mmap_base starts directly below the stack and grows
+	 * downwards.
+	 */
+	return PAGE_ALIGN_DOWN(mmap_upper_limit(rlim_stack) - rnd);
+#else
 	unsigned long gap = rlim_stack->rlim_cur;
 	unsigned long pad = stack_guard_gap;
 
@@ -431,6 +440,7 @@ static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
 		gap = MAX_GAP;
 
 	return PAGE_ALIGN(STACK_TOP - gap - rnd);
+#endif
 }
 
 void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 68d0134bdbf9..1a965fe68e01 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -487,14 +487,14 @@ UIMAGE_OPTS-y ?=
 UIMAGE_TYPE ?= kernel
 UIMAGE_LOADADDR ?= arch_must_set_this
 UIMAGE_ENTRYADDR ?= $(UIMAGE_LOADADDR)
-UIMAGE_NAME ?= 'Linux-$(KERNELRELEASE)'
+UIMAGE_NAME ?= Linux-$(KERNELRELEASE)
 
 quiet_cmd_uimage = UIMAGE  $@
       cmd_uimage = $(BASH) $(MKIMAGE) -A $(UIMAGE_ARCH) -O linux \
 			-C $(UIMAGE_COMPRESSION) $(UIMAGE_OPTS-y) \
 			-T $(UIMAGE_TYPE) \
 			-a $(UIMAGE_LOADADDR) -e $(UIMAGE_ENTRYADDR) \
-			-n $(UIMAGE_NAME) -d $< $@
+			-n '$(UIMAGE_NAME)' -d $< $@
 
 # XZ
 # ---------------------------------------------------------------------------
diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c
index 0572330bf8a7..a76925b46ce6 100644
--- a/scripts/kconfig/symbol.c
+++ b/scripts/kconfig/symbol.c
@@ -122,9 +122,9 @@ static long long sym_get_range_val(struct symbol *sym, int base)
 static void sym_validate_range(struct symbol *sym)
 {
 	struct property *prop;
+	struct symbol *range_sym;
 	int base;
 	long long val, val2;
-	char str[64];
 
 	switch (sym->type) {
 	case S_INT:
@@ -140,17 +140,15 @@ static void sym_validate_range(struct symbol *sym)
 	if (!prop)
 		return;
 	val = strtoll(sym->curr.val, NULL, base);
-	val2 = sym_get_range_val(prop->expr->left.sym, base);
+	range_sym = prop->expr->left.sym;
+	val2 = sym_get_range_val(range_sym, base);
 	if (val >= val2) {
-		val2 = sym_get_range_val(prop->expr->right.sym, base);
+		range_sym = prop->expr->right.sym;
+		val2 = sym_get_range_val(range_sym, base);
 		if (val <= val2)
 			return;
 	}
-	if (sym->type == S_INT)
-		sprintf(str, "%lld", val2);
-	else
-		sprintf(str, "0x%llx", val2);
-	sym->curr.val = xstrdup(str);
+	sym->curr.val = range_sym->curr.val;
 }
 
 static void sym_set_changed(struct symbol *sym)
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 973b5e5ae2dd..cb6406f485a9 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1383,13 +1383,15 @@ static void section_rela(struct module *mod, struct elf_info *elf,
 	const Elf_Rela *rela;
 
 	for (rela = start; rela < stop; rela++) {
+		Elf_Sym *tsym;
 		Elf_Addr taddr, r_offset;
 		unsigned int r_type, r_sym;
 
 		r_offset = TO_NATIVE(rela->r_offset);
 		get_rel_type_and_sym(elf, rela->r_info, &r_type, &r_sym);
 
-		taddr = TO_NATIVE(rela->r_addend);
+		tsym = elf->symtab_start + r_sym;
+		taddr = tsym->st_value + TO_NATIVE(rela->r_addend);
 
 		switch (elf->hdr->e_machine) {
 		case EM_RISCV:
@@ -1404,7 +1406,7 @@ static void section_rela(struct module *mod, struct elf_info *elf,
 			break;
 		}
 
-		check_section_mismatch(mod, elf, elf->symtab_start + r_sym,
+		check_section_mismatch(mod, elf, tsym,
 				       fsecndx, fromsec, r_offset, taddr);
 	}
 }
diff --git a/sound/pci/hda/cs35l56_hda_i2c.c b/sound/pci/hda/cs35l56_hda_i2c.c
index 757a4d193e0f..a9ef6d86de83 100644
--- a/sound/pci/hda/cs35l56_hda_i2c.c
+++ b/sound/pci/hda/cs35l56_hda_i2c.c
@@ -21,6 +21,10 @@ static int cs35l56_hda_i2c_probe(struct i2c_client *clt)
 		return -ENOMEM;
 
 	cs35l56->base.dev = &clt->dev;
+
+#ifdef CS35L56_WAKE_HOLD_TIME_US
+	cs35l56->base.can_hibernate = true;
+#endif
 	cs35l56->base.regmap = devm_regmap_init_i2c(clt, &cs35l56_regmap_i2c);
 	if (IS_ERR(cs35l56->base.regmap)) {
 		ret = PTR_ERR(cs35l56->base.regmap);
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 03264915c618..db90feb49c16 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2135,6 +2135,9 @@ static int azx_probe(struct pci_dev *pci,
 	if (chip->driver_caps & AZX_DCAPS_I915_COMPONENT) {
 		err = snd_hdac_i915_init(azx_bus(chip));
 		if (err < 0) {
+			if (err == -EPROBE_DEFER)
+				goto out_free;
+
 			/* if the controller is bound only with HDMI/DP
 			 * (for HSW and BDW), we need to abort the probe;
 			 * for other chips, still continue probing as other
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 669ae3d6e447..5618b1d9bfd1 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -9832,6 +9832,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x103c, 0x88d0, "HP Pavilion 15-eh1xxx (mainboard 88D0)", ALC287_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8902, "HP OMEN 16", ALC285_FIXUP_HP_MUTE_LED),
+	SND_PCI_QUIRK(0x103c, 0x890e, "HP 255 G8 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
 	SND_PCI_QUIRK(0x103c, 0x8919, "HP Pavilion Aero Laptop 13-be0xxx", ALC287_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x896d, "HP ZBook Firefly 16 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x896e, "HP EliteBook x360 830 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
@@ -9867,6 +9868,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+	SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
 	SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8b44, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
@@ -9900,12 +9902,16 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x8c70, "HP EliteBook 835 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8c71, "HP EliteBook 845 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
+	SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+	SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+	SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
 	SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
 	SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x1043, 0x10a1, "ASUS UX391UA", ALC294_FIXUP_ASUS_SPK),
 	SND_PCI_QUIRK(0x1043, 0x10c0, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
 	SND_PCI_QUIRK(0x1043, 0x10d0, "ASUS X540LA/X540LJ", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1043, 0x10d3, "ASUS K6500ZC", ALC294_FIXUP_ASUS_SPK),
 	SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x1043, 0x11c0, "ASUS X556UR", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1043, 0x125e, "ASUS Q524UQK", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
@@ -9944,13 +9950,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW),
 	SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC),
+	SND_PCI_QUIRK(0x1043, 0x1a63, "ASUS UX3405MA", ALC245_FIXUP_CS35L41_SPI_2),
 	SND_PCI_QUIRK(0x1043, 0x1a83, "ASUS UM5302LA", ALC294_FIXUP_CS35L41_I2C_2),
 	SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2),
 	SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B),
 	SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC),
 	SND_PCI_QUIRK(0x1043, 0x1b93, "ASUS G614JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2),
 	SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1043, 0x1c03, "ASUS UM3406HA", ALC287_FIXUP_CS35L41_I2C_2),
 	SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
+	SND_PCI_QUIRK(0x1043, 0x1c33, "ASUS UX5304MA", ALC245_FIXUP_CS35L41_SPI_2),
+	SND_PCI_QUIRK(0x1043, 0x1c43, "ASUS UX8406MA", ALC245_FIXUP_CS35L41_SPI_2),
 	SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
 	SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS),
 	SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JI", ALC285_FIXUP_ASUS_HEADSET_MIC),
@@ -10821,22 +10831,6 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
 		{0x12, 0x90a60130},
 		{0x17, 0x90170110},
 		{0x21, 0x03211020}),
-	SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
-		{0x14, 0x90170110},
-		{0x21, 0x04211020}),
-	SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
-		{0x14, 0x90170110},
-		{0x21, 0x04211030}),
-	SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
-		ALC295_STANDARD_PINS,
-		{0x17, 0x21014020},
-		{0x18, 0x21a19030}),
-	SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
-		ALC295_STANDARD_PINS,
-		{0x17, 0x21014040},
-		{0x18, 0x21a19050}),
-	SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
-		ALC295_STANDARD_PINS),
 	SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
 		ALC298_STANDARD_PINS,
 		{0x17, 0x90170110}),
@@ -10880,6 +10874,9 @@ static const struct snd_hda_pin_quirk alc269_fallback_pin_fixup_tbl[] = {
 	SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
 		{0x19, 0x40000000},
 		{0x1b, 0x40000000}),
+	SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
+		{0x19, 0x40000000},
+		{0x1b, 0x40000000}),
 	SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
 		{0x19, 0x40000000},
 		{0x1a, 0x40000000}),
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index 264eeb9c46a9..318e2dad27e0 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -1421,7 +1421,7 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
 	if (error)
 		goto setval_error;
 
-	if (new_val->addr_family == ADDR_FAMILY_IPV6) {
+	if (new_val->addr_family & ADDR_FAMILY_IPV6) {
 		error = fprintf(nmfile, "\n[ipv6]\n");
 		if (error < 0)
 			goto setval_error;
@@ -1455,14 +1455,18 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
 	if (error < 0)
 		goto setval_error;
 
-	error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way);
-	if (error < 0)
-		goto setval_error;
-
-	error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr);
-	if (error < 0)
-		goto setval_error;
+	/* we do not want ipv4 addresses in ipv6 section and vice versa */
+	if (is_ipv6 != is_ipv4((char *)new_val->gate_way)) {
+		error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way);
+		if (error < 0)
+			goto setval_error;
+	}
 
+	if (is_ipv6 != is_ipv4((char *)new_val->dns_addr)) {
+		error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr);
+		if (error < 0)
+			goto setval_error;
+	}
 	fclose(nmfile);
 	fclose(ifcfg_file);
 
diff --git a/tools/hv/hv_set_ifconfig.sh b/tools/hv/hv_set_ifconfig.sh
index ae5a7a8249a2..440a91b35823 100755
--- a/tools/hv/hv_set_ifconfig.sh
+++ b/tools/hv/hv_set_ifconfig.sh
@@ -53,7 +53,7 @@
 #                       or "manual" if no boot-time protocol should be used)
 #
 # address1=ipaddr1/plen
-# address=ipaddr2/plen
+# address2=ipaddr2/plen
 #
 # gateway=gateway1;gateway2
 #
@@ -61,7 +61,7 @@
 #
 # [ipv6]
 # address1=ipaddr1/plen
-# address2=ipaddr1/plen
+# address2=ipaddr2/plen
 #
 # gateway=gateway1;gateway2
 #
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 9a10512e3407..7a334377f92b 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -211,9 +211,6 @@ int *fd_instr_count_percpu;
 struct timeval interval_tv = { 5, 0 };
 struct timespec interval_ts = { 5, 0 };
 
-/* Save original CPU model */
-unsigned int model_orig;
-
 unsigned int num_iterations;
 unsigned int header_iterations;
 unsigned int debug;
@@ -224,24 +221,16 @@ unsigned int rapl_joules;
 unsigned int summary_only;
 unsigned int list_header_only;
 unsigned int dump_only;
-unsigned int do_snb_cstates;
-unsigned int do_knl_cstates;
-unsigned int do_slm_cstates;
-unsigned int use_c1_residency_msr;
 unsigned int has_aperf;
 unsigned int has_epb;
 unsigned int has_turbo;
 unsigned int is_hybrid;
-unsigned int do_irtl_snb;
-unsigned int do_irtl_hsw;
 unsigned int units = 1000000;	/* MHz etc */
 unsigned int genuine_intel;
 unsigned int authentic_amd;
 unsigned int hygon_genuine;
 unsigned int max_level, max_extended_level;
 unsigned int has_invariant_tsc;
-unsigned int do_nhm_platform_info;
-unsigned int no_MSR_MISC_PWR_MGMT;
 unsigned int aperf_mperf_multiplier = 1;
 double bclk;
 double base_hz;
@@ -250,7 +239,6 @@ double tsc_tweak = 1.0;
 unsigned int show_pkg_only;
 unsigned int show_core_only;
 char *output_buffer, *outp;
-unsigned int do_rapl;
 unsigned int do_dts;
 unsigned int do_ptm;
 unsigned int do_ipc;
@@ -261,65 +249,686 @@ unsigned int gfx_cur_mhz;
 unsigned int gfx_act_mhz;
 unsigned int tj_max;
 unsigned int tj_max_override;
-int tcc_offset_bits;
 double rapl_power_units, rapl_time_units;
 double rapl_dram_energy_units, rapl_energy_units;
 double rapl_joule_counter_range;
-unsigned int do_core_perf_limit_reasons;
-unsigned int has_automatic_cstate_conversion;
-unsigned int dis_cstate_prewake;
-unsigned int do_gfx_perf_limit_reasons;
-unsigned int do_ring_perf_limit_reasons;
 unsigned int crystal_hz;
 unsigned long long tsc_hz;
 int base_cpu;
-double discover_bclk(unsigned int family, unsigned int model);
 unsigned int has_hwp;		/* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
 			/* IA32_HWP_REQUEST, IA32_HWP_STATUS */
 unsigned int has_hwp_notify;	/* IA32_HWP_INTERRUPT */
 unsigned int has_hwp_activity_window;	/* IA32_HWP_REQUEST[bits 41:32] */
 unsigned int has_hwp_epp;	/* IA32_HWP_REQUEST[bits 31:24] */
 unsigned int has_hwp_pkg;	/* IA32_HWP_REQUEST_PKG */
-unsigned int has_misc_feature_control;
 unsigned int first_counter_read = 1;
 int ignore_stdin;
 
-#define RAPL_PKG		(1 << 0)
-					/* 0x610 MSR_PKG_POWER_LIMIT */
-					/* 0x611 MSR_PKG_ENERGY_STATUS */
-#define RAPL_PKG_PERF_STATUS	(1 << 1)
-					/* 0x613 MSR_PKG_PERF_STATUS */
-#define RAPL_PKG_POWER_INFO	(1 << 2)
-					/* 0x614 MSR_PKG_POWER_INFO */
-
-#define RAPL_DRAM		(1 << 3)
-					/* 0x618 MSR_DRAM_POWER_LIMIT */
-					/* 0x619 MSR_DRAM_ENERGY_STATUS */
-#define RAPL_DRAM_PERF_STATUS	(1 << 4)
-					/* 0x61b MSR_DRAM_PERF_STATUS */
-#define RAPL_DRAM_POWER_INFO	(1 << 5)
-					/* 0x61c MSR_DRAM_POWER_INFO */
-
-#define RAPL_CORES_POWER_LIMIT	(1 << 6)
-					/* 0x638 MSR_PP0_POWER_LIMIT */
-#define RAPL_CORE_POLICY	(1 << 7)
-					/* 0x63a MSR_PP0_POLICY */
-
-#define RAPL_GFX		(1 << 8)
-					/* 0x640 MSR_PP1_POWER_LIMIT */
-					/* 0x641 MSR_PP1_ENERGY_STATUS */
-					/* 0x642 MSR_PP1_POLICY */
-
-#define RAPL_CORES_ENERGY_STATUS	(1 << 9)
-					/* 0x639 MSR_PP0_ENERGY_STATUS */
-#define RAPL_PER_CORE_ENERGY	(1 << 10)
-					/* Indicates cores energy collection is per-core,
-					 * not per-package. */
-#define RAPL_AMD_F17H		(1 << 11)
-					/* 0xc0010299 MSR_RAPL_PWR_UNIT */
-					/* 0xc001029a MSR_CORE_ENERGY_STAT */
-					/* 0xc001029b MSR_PKG_ENERGY_STAT */
-#define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
+int get_msr(int cpu, off_t offset, unsigned long long *msr);
+
+/* Model specific support Start */
+
+/* List of features that may diverge among different platforms */
+struct platform_features {
+	bool has_msr_misc_feature_control;	/* MSR_MISC_FEATURE_CONTROL */
+	bool has_msr_misc_pwr_mgmt;	/* MSR_MISC_PWR_MGMT */
+	bool has_nhm_msrs;	/* MSR_PLATFORM_INFO, MSR_IA32_TEMPERATURE_TARGET, MSR_SMI_COUNT, MSR_PKG_CST_CONFIG_CONTROL, MSR_IA32_POWER_CTL, TRL MSRs */
+	bool has_config_tdp;	/* MSR_CONFIG_TDP_NOMINAL/LEVEL_1/LEVEL_2/CONTROL, MSR_TURBO_ACTIVATION_RATIO */
+	int bclk_freq;		/* CPU base clock */
+	int crystal_freq;	/* Crystal clock to use when not available from CPUID.15 */
+	int supported_cstates;	/* Core cstates and Package cstates supported */
+	int cst_limit;		/* MSR_PKG_CST_CONFIG_CONTROL */
+	bool has_cst_auto_convension;	/* AUTOMATIC_CSTATE_CONVERSION bit in MSR_PKG_CST_CONFIG_CONTROL */
+	bool has_irtl_msrs;	/* MSR_PKGC3/PKGC6/PKGC7/PKGC8/PKGC9/PKGC10_IRTL */
+	bool has_msr_core_c1_res;	/* MSR_CORE_C1_RES */
+	bool has_msr_module_c6_res_ms;	/* MSR_MODULE_C6_RES_MS */
+	bool has_msr_c6_demotion_policy_config;	/* MSR_CC6_DEMOTION_POLICY_CONFIG/MSR_MC6_DEMOTION_POLICY_CONFIG */
+	bool has_msr_atom_pkg_c6_residency;	/* MSR_ATOM_PKG_C6_RESIDENCY */
+	bool has_msr_knl_core_c6_residency;	/* MSR_KNL_CORE_C6_RESIDENCY */
+	bool has_ext_cst_msrs;	/* MSR_PKG_WEIGHTED_CORE_C0_RES/MSR_PKG_ANY_CORE_C0_RES/MSR_PKG_ANY_GFXE_C0_RES/MSR_PKG_BOTH_CORE_GFXE_C0_RES */
+	bool has_cst_prewake_bit;	/* Cstate prewake bit in MSR_IA32_POWER_CTL */
+	int trl_msrs;		/* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */
+	int plr_msrs;		/* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */
+	int rapl_msrs;		/* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */
+	bool has_per_core_rapl;	/* Indicates cores energy collection is per-core, not per-package. AMD specific for now */
+	bool has_rapl_divisor;	/* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */
+	bool has_fixed_rapl_unit;	/* Fixed Energy Unit used for DRAM RAPL Domain */
+	int rapl_quirk_tdp;	/* Hardcoded TDP value when cannot be retrieved from hardware */
+	int tcc_offset_bits;	/* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */
+	bool enable_tsc_tweak;	/* Use CPU Base freq instead of TSC freq for aperf/mperf counter */
+	bool need_perf_multiplier;	/* mperf/aperf multiplier */
+};
+
+struct platform_data {
+	unsigned int model;
+	const struct platform_features *features;
+};
+
+/* For BCLK */
+enum bclk_freq {
+	BCLK_100MHZ = 1,
+	BCLK_133MHZ,
+	BCLK_SLV,
+};
+
+#define SLM_BCLK_FREQS 5
+double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 };
+
+double slm_bclk(void)
+{
+	unsigned long long msr = 3;
+	unsigned int i;
+	double freq;
+
+	if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
+		fprintf(outf, "SLM BCLK: unknown\n");
+
+	i = msr & 0xf;
+	if (i >= SLM_BCLK_FREQS) {
+		fprintf(outf, "SLM BCLK[%d] invalid\n", i);
+		i = 3;
+	}
+	freq = slm_freq_table[i];
+
+	if (!quiet)
+		fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
+
+	return freq;
+}
+
+/* For Package cstate limit */
+enum package_cstate_limit {
+	CST_LIMIT_NHM = 1,
+	CST_LIMIT_SNB,
+	CST_LIMIT_HSW,
+	CST_LIMIT_SKX,
+	CST_LIMIT_ICX,
+	CST_LIMIT_SLV,
+	CST_LIMIT_AMT,
+	CST_LIMIT_KNL,
+	CST_LIMIT_GMT,
+};
+
+/* For Turbo Ratio Limit MSRs */
+enum turbo_ratio_limit_msrs {
+	TRL_BASE = BIT(0),
+	TRL_LIMIT1 = BIT(1),
+	TRL_LIMIT2 = BIT(2),
+	TRL_ATOM = BIT(3),
+	TRL_KNL = BIT(4),
+	TRL_CORECOUNT = BIT(5),
+};
+
+/* For Perf Limit Reason MSRs */
+enum perf_limit_reason_msrs {
+	PLR_CORE = BIT(0),
+	PLR_GFX = BIT(1),
+	PLR_RING = BIT(2),
+};
+
+/* For RAPL MSRs */
+enum rapl_msrs {
+	RAPL_PKG_POWER_LIMIT = BIT(0),	/* 0x610 MSR_PKG_POWER_LIMIT */
+	RAPL_PKG_ENERGY_STATUS = BIT(1),	/* 0x611 MSR_PKG_ENERGY_STATUS */
+	RAPL_PKG_PERF_STATUS = BIT(2),	/* 0x613 MSR_PKG_PERF_STATUS */
+	RAPL_PKG_POWER_INFO = BIT(3),	/* 0x614 MSR_PKG_POWER_INFO */
+	RAPL_DRAM_POWER_LIMIT = BIT(4),	/* 0x618 MSR_DRAM_POWER_LIMIT */
+	RAPL_DRAM_ENERGY_STATUS = BIT(5),	/* 0x619 MSR_DRAM_ENERGY_STATUS */
+	RAPL_DRAM_PERF_STATUS = BIT(6),	/* 0x61b MSR_DRAM_PERF_STATUS */
+	RAPL_DRAM_POWER_INFO = BIT(7),	/* 0x61c MSR_DRAM_POWER_INFO */
+	RAPL_CORE_POWER_LIMIT = BIT(8),	/* 0x638 MSR_PP0_POWER_LIMIT */
+	RAPL_CORE_ENERGY_STATUS = BIT(9),	/* 0x639 MSR_PP0_ENERGY_STATUS */
+	RAPL_CORE_POLICY = BIT(10),	/* 0x63a MSR_PP0_POLICY */
+	RAPL_GFX_POWER_LIMIT = BIT(11),	/* 0x640 MSR_PP1_POWER_LIMIT */
+	RAPL_GFX_ENERGY_STATUS = BIT(12),	/* 0x641 MSR_PP1_ENERGY_STATUS */
+	RAPL_GFX_POLICY = BIT(13),	/* 0x642 MSR_PP1_POLICY */
+	RAPL_AMD_PWR_UNIT = BIT(14),	/* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */
+	RAPL_AMD_CORE_ENERGY_STAT = BIT(15),	/* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */
+	RAPL_AMD_PKG_ENERGY_STAT = BIT(16),	/* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */
+};
+
+#define RAPL_PKG	(RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT)
+#define RAPL_DRAM	(RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT)
+#define RAPL_CORE	(RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT)
+#define RAPL_GFX	(RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS)
+
+#define RAPL_PKG_ALL	(RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO)
+#define RAPL_DRAM_ALL	(RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO)
+#define RAPL_CORE_ALL	(RAPL_CORE | RAPL_CORE_POLICY)
+#define RAPL_GFX_ALL	(RAPL_GFX | RAPL_GFX_POLIGY)
+
+#define RAPL_AMD_F17H	(RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT)
+
+/* For Cstates */
+enum cstates {
+	CC1 = BIT(0),
+	CC3 = BIT(1),
+	CC6 = BIT(2),
+	CC7 = BIT(3),
+	PC2 = BIT(4),
+	PC3 = BIT(5),
+	PC6 = BIT(6),
+	PC7 = BIT(7),
+	PC8 = BIT(8),
+	PC9 = BIT(9),
+	PC10 = BIT(10),
+};
+
+static const struct platform_features nhm_features = {
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.bclk_freq = BCLK_133MHZ,
+	.supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
+	.cst_limit = CST_LIMIT_NHM,
+	.trl_msrs = TRL_BASE,
+};
+
+static const struct platform_features nhx_features = {
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.bclk_freq = BCLK_133MHZ,
+	.supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
+	.cst_limit = CST_LIMIT_NHM,
+};
+
+static const struct platform_features snb_features = {
+	.has_msr_misc_feature_control = 1,
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+	.cst_limit = CST_LIMIT_SNB,
+	.has_irtl_msrs = 1,
+	.trl_msrs = TRL_BASE,
+	.rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features snx_features = {
+	.has_msr_misc_feature_control = 1,
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+	.cst_limit = CST_LIMIT_SNB,
+	.has_irtl_msrs = 1,
+	.trl_msrs = TRL_BASE,
+	.rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features ivb_features = {
+	.has_msr_misc_feature_control = 1,
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.has_config_tdp = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+	.cst_limit = CST_LIMIT_SNB,
+	.has_irtl_msrs = 1,
+	.trl_msrs = TRL_BASE,
+	.rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features ivx_features = {
+	.has_msr_misc_feature_control = 1,
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+	.cst_limit = CST_LIMIT_SNB,
+	.has_irtl_msrs = 1,
+	.trl_msrs = TRL_BASE | TRL_LIMIT1,
+	.rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features hsw_features = {
+	.has_msr_misc_feature_control = 1,
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.has_config_tdp = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+	.cst_limit = CST_LIMIT_HSW,
+	.has_irtl_msrs = 1,
+	.trl_msrs = TRL_BASE,
+	.plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
+	.rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features hsx_features = {
+	.has_msr_misc_feature_control = 1,
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.has_config_tdp = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+	.cst_limit = CST_LIMIT_HSW,
+	.has_irtl_msrs = 1,
+	.trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2,
+	.plr_msrs = PLR_CORE | PLR_RING,
+	.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+	.has_fixed_rapl_unit = 1,
+};
+
+static const struct platform_features hswl_features = {
+	.has_msr_misc_feature_control = 1,
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.has_config_tdp = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+	.cst_limit = CST_LIMIT_HSW,
+	.has_irtl_msrs = 1,
+	.trl_msrs = TRL_BASE,
+	.plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
+	.rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features hswg_features = {
+	.has_msr_misc_feature_control = 1,
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.has_config_tdp = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+	.cst_limit = CST_LIMIT_HSW,
+	.has_irtl_msrs = 1,
+	.trl_msrs = TRL_BASE,
+	.plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
+	.rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features bdw_features = {
+	.has_msr_misc_feature_control = 1,
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.has_config_tdp = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+	.cst_limit = CST_LIMIT_HSW,
+	.has_irtl_msrs = 1,
+	.trl_msrs = TRL_BASE,
+	.rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features bdwg_features = {
+	.has_msr_misc_feature_control = 1,
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.has_config_tdp = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+	.cst_limit = CST_LIMIT_HSW,
+	.has_irtl_msrs = 1,
+	.trl_msrs = TRL_BASE,
+	.rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features bdx_features = {
+	.has_msr_misc_feature_control = 1,
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.has_config_tdp = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.supported_cstates = CC1 | CC3 | CC6 | PC2 | PC3 | PC6,
+	.cst_limit = CST_LIMIT_HSW,
+	.has_irtl_msrs = 1,
+	.has_cst_auto_convension = 1,
+	.trl_msrs = TRL_BASE,
+	.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+	.has_fixed_rapl_unit = 1,
+};
+
+static const struct platform_features skl_features = {
+	.has_msr_misc_feature_control = 1,
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.has_config_tdp = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.crystal_freq = 24000000,
+	.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+	.cst_limit = CST_LIMIT_HSW,
+	.has_irtl_msrs = 1,
+	.has_ext_cst_msrs = 1,
+	.trl_msrs = TRL_BASE,
+	.tcc_offset_bits = 6,
+	.rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
+	.enable_tsc_tweak = 1,
+};
+
+static const struct platform_features cnl_features = {
+	.has_msr_misc_feature_control = 1,
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.has_config_tdp = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+	.cst_limit = CST_LIMIT_HSW,
+	.has_irtl_msrs = 1,
+	.has_msr_core_c1_res = 1,
+	.has_ext_cst_msrs = 1,
+	.trl_msrs = TRL_BASE,
+	.tcc_offset_bits = 6,
+	.rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
+	.enable_tsc_tweak = 1,
+};
+
+static const struct platform_features adl_features = {
+	.has_msr_misc_feature_control = 1,
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.has_config_tdp = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10,
+	.cst_limit = CST_LIMIT_HSW,
+	.has_irtl_msrs = 1,
+	.has_msr_core_c1_res = 1,
+	.has_ext_cst_msrs = 1,
+	.trl_msrs = TRL_BASE,
+	.tcc_offset_bits = 6,
+	.rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
+	.enable_tsc_tweak = 1,
+};
+
+static const struct platform_features skx_features = {
+	.has_msr_misc_feature_control = 1,
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.has_config_tdp = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.supported_cstates = CC1 | CC6 | PC2 | PC6,
+	.cst_limit = CST_LIMIT_SKX,
+	.has_irtl_msrs = 1,
+	.has_cst_auto_convension = 1,
+	.trl_msrs = TRL_BASE | TRL_CORECOUNT,
+	.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+	.has_fixed_rapl_unit = 1,
+};
+
+static const struct platform_features icx_features = {
+	.has_msr_misc_feature_control = 1,
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.has_config_tdp = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.supported_cstates = CC1 | CC6 | PC2 | PC6,
+	.cst_limit = CST_LIMIT_ICX,
+	.has_irtl_msrs = 1,
+	.has_cst_prewake_bit = 1,
+	.trl_msrs = TRL_BASE | TRL_CORECOUNT,
+	.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+	.has_fixed_rapl_unit = 1,
+};
+
+static const struct platform_features spr_features = {
+	.has_msr_misc_feature_control = 1,
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.has_config_tdp = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.supported_cstates = CC1 | CC6 | PC2 | PC6,
+	.cst_limit = CST_LIMIT_SKX,
+	.has_msr_core_c1_res = 1,
+	.has_irtl_msrs = 1,
+	.has_cst_prewake_bit = 1,
+	.trl_msrs = TRL_BASE | TRL_CORECOUNT,
+	.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features srf_features = {
+	.has_msr_misc_feature_control = 1,
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.has_config_tdp = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.supported_cstates = CC1 | CC6 | PC2 | PC6,
+	.cst_limit = CST_LIMIT_SKX,
+	.has_msr_core_c1_res = 1,
+	.has_msr_module_c6_res_ms = 1,
+	.has_irtl_msrs = 1,
+	.has_cst_prewake_bit = 1,
+	.trl_msrs = TRL_BASE | TRL_CORECOUNT,
+	.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features grr_features = {
+	.has_msr_misc_feature_control = 1,
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.has_config_tdp = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.supported_cstates = CC1 | CC6,
+	.cst_limit = CST_LIMIT_SKX,
+	.has_msr_core_c1_res = 1,
+	.has_msr_module_c6_res_ms = 1,
+	.has_irtl_msrs = 1,
+	.has_cst_prewake_bit = 1,
+	.trl_msrs = TRL_BASE | TRL_CORECOUNT,
+	.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features slv_features = {
+	.has_nhm_msrs = 1,
+	.bclk_freq = BCLK_SLV,
+	.supported_cstates = CC1 | CC6 | PC6,
+	.cst_limit = CST_LIMIT_SLV,
+	.has_msr_core_c1_res = 1,
+	.has_msr_module_c6_res_ms = 1,
+	.has_msr_c6_demotion_policy_config = 1,
+	.has_msr_atom_pkg_c6_residency = 1,
+	.trl_msrs = TRL_ATOM,
+	.rapl_msrs = RAPL_PKG | RAPL_CORE,
+	.has_rapl_divisor = 1,
+	.rapl_quirk_tdp = 30,
+};
+
+static const struct platform_features slvd_features = {
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.bclk_freq = BCLK_SLV,
+	.supported_cstates = CC1 | CC6 | PC3 | PC6,
+	.cst_limit = CST_LIMIT_SLV,
+	.has_msr_atom_pkg_c6_residency = 1,
+	.trl_msrs = TRL_BASE,
+	.rapl_msrs = RAPL_PKG | RAPL_CORE,
+	.rapl_quirk_tdp = 30,
+};
+
+static const struct platform_features amt_features = {
+	.has_nhm_msrs = 1,
+	.bclk_freq = BCLK_133MHZ,
+	.supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
+	.cst_limit = CST_LIMIT_AMT,
+	.trl_msrs = TRL_BASE,
+};
+
+static const struct platform_features gmt_features = {
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.crystal_freq = 19200000,
+	.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+	.cst_limit = CST_LIMIT_GMT,
+	.has_irtl_msrs = 1,
+	.trl_msrs = TRL_BASE | TRL_CORECOUNT,
+	.rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features gmtd_features = {
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.crystal_freq = 25000000,
+	.supported_cstates = CC1 | CC6 | PC2 | PC6,
+	.cst_limit = CST_LIMIT_GMT,
+	.has_irtl_msrs = 1,
+	.has_msr_core_c1_res = 1,
+	.trl_msrs = TRL_BASE | TRL_CORECOUNT,
+	.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS,
+};
+
+static const struct platform_features gmtp_features = {
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.crystal_freq = 19200000,
+	.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+	.cst_limit = CST_LIMIT_GMT,
+	.has_irtl_msrs = 1,
+	.trl_msrs = TRL_BASE,
+	.rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features tmt_features = {
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+	.cst_limit = CST_LIMIT_GMT,
+	.has_irtl_msrs = 1,
+	.trl_msrs = TRL_BASE,
+	.rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
+	.enable_tsc_tweak = 1,
+};
+
+static const struct platform_features tmtd_features = {
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.supported_cstates = CC1 | CC6,
+	.cst_limit = CST_LIMIT_GMT,
+	.has_irtl_msrs = 1,
+	.trl_msrs = TRL_BASE | TRL_CORECOUNT,
+	.rapl_msrs = RAPL_PKG_ALL,
+};
+
+static const struct platform_features knl_features = {
+	.has_msr_misc_pwr_mgmt = 1,
+	.has_nhm_msrs = 1,
+	.has_config_tdp = 1,
+	.bclk_freq = BCLK_100MHZ,
+	.supported_cstates = CC1 | CC6 | PC3 | PC6,
+	.cst_limit = CST_LIMIT_KNL,
+	.has_msr_knl_core_c6_residency = 1,
+	.trl_msrs = TRL_KNL,
+	.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+	.has_fixed_rapl_unit = 1,
+	.need_perf_multiplier = 1,
+};
+
+static const struct platform_features default_features = {
+};
+
+static const struct platform_features amd_features_with_rapl = {
+	.rapl_msrs = RAPL_AMD_F17H,
+	.has_per_core_rapl = 1,
+	.rapl_quirk_tdp = 280,	/* This is the max stock TDP of HEDT/Server Fam17h+ chips */
+};
+
+static const struct platform_data turbostat_pdata[] = {
+	{ INTEL_FAM6_NEHALEM, &nhm_features },
+	{ INTEL_FAM6_NEHALEM_G, &nhm_features },
+	{ INTEL_FAM6_NEHALEM_EP, &nhm_features },
+	{ INTEL_FAM6_NEHALEM_EX, &nhx_features },
+	{ INTEL_FAM6_WESTMERE, &nhm_features },
+	{ INTEL_FAM6_WESTMERE_EP, &nhm_features },
+	{ INTEL_FAM6_WESTMERE_EX, &nhx_features },
+	{ INTEL_FAM6_SANDYBRIDGE, &snb_features },
+	{ INTEL_FAM6_SANDYBRIDGE_X, &snx_features },
+	{ INTEL_FAM6_IVYBRIDGE, &ivb_features },
+	{ INTEL_FAM6_IVYBRIDGE_X, &ivx_features },
+	{ INTEL_FAM6_HASWELL, &hsw_features },
+	{ INTEL_FAM6_HASWELL_X, &hsx_features },
+	{ INTEL_FAM6_HASWELL_L, &hswl_features },
+	{ INTEL_FAM6_HASWELL_G, &hswg_features },
+	{ INTEL_FAM6_BROADWELL, &bdw_features },
+	{ INTEL_FAM6_BROADWELL_G, &bdwg_features },
+	{ INTEL_FAM6_BROADWELL_X, &bdx_features },
+	{ INTEL_FAM6_BROADWELL_D, &bdx_features },
+	{ INTEL_FAM6_SKYLAKE_L, &skl_features },
+	{ INTEL_FAM6_SKYLAKE, &skl_features },
+	{ INTEL_FAM6_SKYLAKE_X, &skx_features },
+	{ INTEL_FAM6_KABYLAKE_L, &skl_features },
+	{ INTEL_FAM6_KABYLAKE, &skl_features },
+	{ INTEL_FAM6_COMETLAKE, &skl_features },
+	{ INTEL_FAM6_COMETLAKE_L, &skl_features },
+	{ INTEL_FAM6_CANNONLAKE_L, &cnl_features },
+	{ INTEL_FAM6_ICELAKE_X, &icx_features },
+	{ INTEL_FAM6_ICELAKE_D, &icx_features },
+	{ INTEL_FAM6_ICELAKE_L, &cnl_features },
+	{ INTEL_FAM6_ICELAKE_NNPI, &cnl_features },
+	{ INTEL_FAM6_ROCKETLAKE, &cnl_features },
+	{ INTEL_FAM6_TIGERLAKE_L, &cnl_features },
+	{ INTEL_FAM6_TIGERLAKE, &cnl_features },
+	{ INTEL_FAM6_SAPPHIRERAPIDS_X, &spr_features },
+	{ INTEL_FAM6_EMERALDRAPIDS_X, &spr_features },
+	{ INTEL_FAM6_GRANITERAPIDS_X, &spr_features },
+	{ INTEL_FAM6_LAKEFIELD, &cnl_features },
+	{ INTEL_FAM6_ALDERLAKE, &adl_features },
+	{ INTEL_FAM6_ALDERLAKE_L, &adl_features },
+	{ INTEL_FAM6_RAPTORLAKE, &adl_features },
+	{ INTEL_FAM6_RAPTORLAKE_P, &adl_features },
+	{ INTEL_FAM6_RAPTORLAKE_S, &adl_features },
+	{ INTEL_FAM6_METEORLAKE, &cnl_features },
+	{ INTEL_FAM6_METEORLAKE_L, &cnl_features },
+	{ INTEL_FAM6_ARROWLAKE, &cnl_features },
+	{ INTEL_FAM6_LUNARLAKE_M, &cnl_features },
+	{ INTEL_FAM6_ATOM_SILVERMONT, &slv_features },
+	{ INTEL_FAM6_ATOM_SILVERMONT_D, &slvd_features },
+	{ INTEL_FAM6_ATOM_AIRMONT, &amt_features },
+	{ INTEL_FAM6_ATOM_GOLDMONT, &gmt_features },
+	{ INTEL_FAM6_ATOM_GOLDMONT_D, &gmtd_features },
+	{ INTEL_FAM6_ATOM_GOLDMONT_PLUS, &gmtp_features },
+	{ INTEL_FAM6_ATOM_TREMONT_D, &tmtd_features },
+	{ INTEL_FAM6_ATOM_TREMONT, &tmt_features },
+	{ INTEL_FAM6_ATOM_TREMONT_L, &tmt_features },
+	{ INTEL_FAM6_ATOM_GRACEMONT, &adl_features },
+	{ INTEL_FAM6_ATOM_CRESTMONT_X, &srf_features },
+	{ INTEL_FAM6_ATOM_CRESTMONT, &grr_features },
+	{ INTEL_FAM6_XEON_PHI_KNL, &knl_features },
+	{ INTEL_FAM6_XEON_PHI_KNM, &knl_features },
+	/*
+	 * Missing support for
+	 * INTEL_FAM6_ICELAKE
+	 * INTEL_FAM6_ATOM_SILVERMONT_MID
+	 * INTEL_FAM6_ATOM_AIRMONT_MID
+	 * INTEL_FAM6_ATOM_AIRMONT_NP
+	 */
+	{ 0, NULL },
+};
+
+static const struct platform_features *platform;
+
+void probe_platform_features(unsigned int family, unsigned int model)
+{
+	int i;
+
+	platform = &default_features;
+
+	if (authentic_amd || hygon_genuine) {
+		if (max_extended_level >= 0x80000007) {
+			unsigned int eax, ebx, ecx, edx;
+
+			__cpuid(0x80000007, eax, ebx, ecx, edx);
+			/* RAPL (Fam 17h+) */
+			if ((edx & (1 << 14)) && family >= 0x17)
+				platform = &amd_features_with_rapl;
+		}
+		return;
+	}
+
+	if (!genuine_intel || family != 6)
+		return;
+
+	for (i = 0; turbostat_pdata[i].features; i++) {
+		if (turbostat_pdata[i].model == model) {
+			platform = turbostat_pdata[i].features;
+			return;
+		}
+	}
+}
+
+/* Model specific support End */
+
 #define	TJMAX_DEFAULT	100
 
 /* MSRs that are not yet in the kernel-provided header. */
@@ -333,8 +942,8 @@ int backwards_count;
 char *progname;
 
 #define CPU_SUBSET_MAXCPUS	1024	/* need to use before probe... */
-cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
-size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
+cpu_set_t *cpu_present_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset;
+size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size;
 #define MAX_ADDED_COUNTERS 8
 #define MAX_ADDED_THREAD_COUNTERS 24
 #define BITMASK_SIZE 32
@@ -355,12 +964,11 @@ struct thread_data {
 	unsigned int x2apic_id;
 	unsigned int flags;
 	bool is_atom;
-#define CPU_IS_FIRST_THREAD_IN_CORE	0x2
-#define CPU_IS_FIRST_CORE_IN_PACKAGE	0x4
 	unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
 } *thread_even, *thread_odd;
 
 struct core_data {
+	int base_cpu;
 	unsigned long long c3;
 	unsigned long long c6;
 	unsigned long long c7;
@@ -373,6 +981,7 @@ struct core_data {
 } *core_even, *core_odd;
 
 struct pkg_data {
+	int base_cpu;
 	unsigned long long pc2;
 	unsigned long long pc3;
 	unsigned long long pc6;
@@ -456,7 +1065,7 @@ off_t idx_to_offset(int idx)
 
 	switch (idx) {
 	case IDX_PKG_ENERGY:
-		if (do_rapl & RAPL_AMD_F17H)
+		if (platform->rapl_msrs & RAPL_AMD_F17H)
 			offset = MSR_PKG_ENERGY_STAT;
 		else
 			offset = MSR_PKG_ENERGY_STATUS;
@@ -516,17 +1125,17 @@ int idx_valid(int idx)
 {
 	switch (idx) {
 	case IDX_PKG_ENERGY:
-		return do_rapl & (RAPL_PKG | RAPL_AMD_F17H);
+		return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H);
 	case IDX_DRAM_ENERGY:
-		return do_rapl & RAPL_DRAM;
+		return platform->rapl_msrs & RAPL_DRAM;
 	case IDX_PP0_ENERGY:
-		return do_rapl & RAPL_CORES_ENERGY_STATUS;
+		return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS;
 	case IDX_PP1_ENERGY:
-		return do_rapl & RAPL_GFX;
+		return platform->rapl_msrs & RAPL_GFX;
 	case IDX_PKG_PERF:
-		return do_rapl & RAPL_PKG_PERF_STATUS;
+		return platform->rapl_msrs & RAPL_PKG_PERF_STATUS;
 	case IDX_DRAM_PERF:
-		return do_rapl & RAPL_DRAM_PERF_STATUS;
+		return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS;
 	default:
 		return 0;
 	}
@@ -563,6 +1172,9 @@ struct topo_params {
 	int num_die;
 	int num_cpus;
 	int num_cores;
+	int allowed_packages;
+	int allowed_cpus;
+	int allowed_cores;
 	int max_cpu_num;
 	int max_node_num;
 	int nodes_per_pkg;
@@ -575,7 +1187,7 @@ struct timeval tv_even, tv_odd, tv_delta;
 int *irq_column_2_cpu;		/* /proc/interrupts column numbers */
 int *irqs_per_cpu;		/* indexed by cpu_num */
 
-void setup_all_buffers(void);
+void setup_all_buffers(bool startup);
 
 char *sys_lpi_file;
 char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
@@ -586,6 +1198,11 @@ int cpu_is_not_present(int cpu)
 	return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
 }
 
+int cpu_is_not_allowed(int cpu)
+{
+	return !CPU_ISSET_S(cpu, cpu_allowed_setsize, cpu_allowed_set);
+}
+
 /*
  * run func(thread, core, package) in topology order
  * skip non-present cpus
@@ -603,10 +1220,9 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk
 					struct thread_data *t;
 					struct core_data *c;
 					struct pkg_data *p;
-
 					t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
 
-					if (cpu_is_not_present(t->cpu_id))
+					if (cpu_is_not_allowed(t->cpu_id))
 						continue;
 
 					c = GET_CORE(core_base, core_no, node_no, pkg_no);
@@ -622,6 +1238,25 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk
 	return 0;
 }
 
+int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+	UNUSED(p);
+
+	return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0);
+}
+
+int is_cpu_first_core_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+	UNUSED(c);
+
+	return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0);
+}
+
+int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+	return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p);
+}
+
 int cpu_migrate(int cpu)
 {
 	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
@@ -904,11 +1539,11 @@ void print_header(char *delim)
 	if (DO_BIC(BIC_CORE_THROT_CNT))
 		outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : ""));
 
-	if (do_rapl && !rapl_joules) {
-		if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
+	if (platform->rapl_msrs && !rapl_joules) {
+		if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
 			outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
-	} else if (do_rapl && rapl_joules) {
-		if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
+	} else if (platform->rapl_msrs && rapl_joules) {
+		if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
 			outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
 	}
 
@@ -966,10 +1601,10 @@ void print_header(char *delim)
 	if (DO_BIC(BIC_SYS_LPI))
 		outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
 
-	if (do_rapl && !rapl_joules) {
+	if (platform->rapl_msrs && !rapl_joules) {
 		if (DO_BIC(BIC_PkgWatt))
 			outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
-		if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
+		if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
 			outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_GFXWatt))
 			outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
@@ -979,10 +1614,10 @@ void print_header(char *delim)
 			outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_RAM__))
 			outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
-	} else if (do_rapl && rapl_joules) {
+	} else if (platform->rapl_msrs && rapl_joules) {
 		if (DO_BIC(BIC_Pkg_J))
 			outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
-		if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
+		if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
 			outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_GFX_J))
 			outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
@@ -1106,11 +1741,11 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 	int printed = 0;
 
 	/* if showing only 1st thread in core and this isn't one, bail out */
-	if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+	if (show_core_only && !is_cpu_first_thread_in_core(t, c, p))
 		return 0;
 
 	/* if showing only 1st thread in pkg and this isn't one, bail out */
-	if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+	if (show_pkg_only && !is_cpu_first_core_in_package(t, c, p))
 		return 0;
 
 	/*if not summary line and --cpu is used */
@@ -1244,7 +1879,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc);
 
 	/* print per-core data only for 1st thread in core */
-	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+	if (!is_cpu_first_thread_in_core(t, c, p))
 		goto done;
 
 	if (DO_BIC(BIC_CPU_c3))
@@ -1284,14 +1919,14 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 
 	fmt8 = "%s%.2f";
 
-	if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
+	if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
 		outp +=
 		    sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
-	if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
+	if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
 
 	/* print per-package data only for 1st core in package */
-	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+	if (!is_cpu_first_core_in_package(t, c, p))
 		goto done;
 
 	/* PkgTmp */
@@ -1352,7 +1987,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 		outp +=
 		    sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
 
-	if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
+	if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
 		outp +=
 		    sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
 	if (DO_BIC(BIC_GFXWatt))
@@ -1364,7 +1999,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 			    p->energy_dram * rapl_dram_energy_units / interval_float);
 	if (DO_BIC(BIC_Pkg_J))
 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
-	if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
+	if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
 	if (DO_BIC(BIC_GFX_J))
 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
@@ -1527,7 +2162,7 @@ void delta_core(struct core_data *new, struct core_data *old)
 
 int soft_c1_residency_display(int bic)
 {
-	if (!DO_BIC(BIC_CPU_c1) || use_c1_residency_msr)
+	if (!DO_BIC(BIC_CPU_c1) || platform->has_msr_core_c1_res)
 		return 0;
 
 	return DO_BIC_READ(bic);
@@ -1567,7 +2202,8 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d
 
 	old->c1 = new->c1 - old->c1;
 
-	if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) {
+	if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
+	    || soft_c1_residency_display(BIC_Avg_MHz)) {
 		if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
 			old->aperf = new->aperf - old->aperf;
 			old->mperf = new->mperf - old->mperf;
@@ -1576,7 +2212,7 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d
 		}
 	}
 
-	if (use_c1_residency_msr) {
+	if (platform->has_msr_core_c1_res) {
 		/*
 		 * Some models have a dedicated C1 residency MSR,
 		 * which should be more accurate than the derivation below.
@@ -1626,7 +2262,7 @@ int delta_cpu(struct thread_data *t, struct core_data *c,
 	int retval = 0;
 
 	/* calculate core delta only for 1st thread in core */
-	if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
+	if (is_cpu_first_thread_in_core(t, c, p))
 		delta_core(c, c2);
 
 	/* always calculate thread delta */
@@ -1635,7 +2271,7 @@ int delta_cpu(struct thread_data *t, struct core_data *c,
 		return retval;
 
 	/* calculate package delta only for 1st core in package */
-	if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
+	if (is_cpu_first_core_in_package(t, c, p))
 		retval = delta_package(p, p2);
 
 	return retval;
@@ -1663,9 +2299,6 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 	t->irq_count = 0;
 	t->smi_count = 0;
 
-	/* tells format_counters to dump all fields from this set */
-	t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
-
 	c->c3 = 0;
 	c->c6 = 0;
 	c->c7 = 0;
@@ -1749,7 +2382,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	}
 
 	/* sum per-core values only for 1st thread in core */
-	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+	if (!is_cpu_first_thread_in_core(t, c, p))
 		return 0;
 
 	average.cores.c3 += c->c3;
@@ -1769,7 +2402,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	}
 
 	/* sum per-pkg values only for 1st core in pkg */
-	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+	if (!is_cpu_first_core_in_package(t, c, p))
 		return 0;
 
 	if (DO_BIC(BIC_Totl_c0))
@@ -1834,40 +2467,40 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data
 	/* Use the global time delta for the average. */
 	average.threads.tv_delta = tv_delta;
 
-	average.threads.tsc /= topo.num_cpus;
-	average.threads.aperf /= topo.num_cpus;
-	average.threads.mperf /= topo.num_cpus;
-	average.threads.instr_count /= topo.num_cpus;
-	average.threads.c1 /= topo.num_cpus;
+	average.threads.tsc /= topo.allowed_cpus;
+	average.threads.aperf /= topo.allowed_cpus;
+	average.threads.mperf /= topo.allowed_cpus;
+	average.threads.instr_count /= topo.allowed_cpus;
+	average.threads.c1 /= topo.allowed_cpus;
 
 	if (average.threads.irq_count > 9999999)
 		sums_need_wide_columns = 1;
 
-	average.cores.c3 /= topo.num_cores;
-	average.cores.c6 /= topo.num_cores;
-	average.cores.c7 /= topo.num_cores;
-	average.cores.mc6_us /= topo.num_cores;
+	average.cores.c3 /= topo.allowed_cores;
+	average.cores.c6 /= topo.allowed_cores;
+	average.cores.c7 /= topo.allowed_cores;
+	average.cores.mc6_us /= topo.allowed_cores;
 
 	if (DO_BIC(BIC_Totl_c0))
-		average.packages.pkg_wtd_core_c0 /= topo.num_packages;
+		average.packages.pkg_wtd_core_c0 /= topo.allowed_packages;
 	if (DO_BIC(BIC_Any_c0))
-		average.packages.pkg_any_core_c0 /= topo.num_packages;
+		average.packages.pkg_any_core_c0 /= topo.allowed_packages;
 	if (DO_BIC(BIC_GFX_c0))
-		average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
+		average.packages.pkg_any_gfxe_c0 /= topo.allowed_packages;
 	if (DO_BIC(BIC_CPUGFX))
-		average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
+		average.packages.pkg_both_core_gfxe_c0 /= topo.allowed_packages;
 
-	average.packages.pc2 /= topo.num_packages;
+	average.packages.pc2 /= topo.allowed_packages;
 	if (DO_BIC(BIC_Pkgpc3))
-		average.packages.pc3 /= topo.num_packages;
+		average.packages.pc3 /= topo.allowed_packages;
 	if (DO_BIC(BIC_Pkgpc6))
-		average.packages.pc6 /= topo.num_packages;
+		average.packages.pc6 /= topo.allowed_packages;
 	if (DO_BIC(BIC_Pkgpc7))
-		average.packages.pc7 /= topo.num_packages;
+		average.packages.pc7 /= topo.allowed_packages;
 
-	average.packages.pc8 /= topo.num_packages;
-	average.packages.pc9 /= topo.num_packages;
-	average.packages.pc10 /= topo.num_packages;
+	average.packages.pc8 /= topo.allowed_packages;
+	average.packages.pc9 /= topo.allowed_packages;
+	average.packages.pc10 /= topo.allowed_packages;
 
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW)
@@ -1877,7 +2510,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data
 				sums_need_wide_columns = 1;
 			continue;
 		}
-		average.threads.counter[i] /= topo.num_cpus;
+		average.threads.counter[i] /= topo.allowed_cpus;
 	}
 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW)
@@ -1886,7 +2519,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data
 			if (average.cores.counter[i] > 9999999)
 				sums_need_wide_columns = 1;
 		}
-		average.cores.counter[i] /= topo.num_cores;
+		average.cores.counter[i] /= topo.allowed_cores;
 	}
 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW)
@@ -1895,7 +2528,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data
 			if (average.packages.counter[i] > 9999999)
 				sums_need_wide_columns = 1;
 		}
-		average.packages.counter[i] /= topo.num_packages;
+		average.packages.counter[i] /= topo.allowed_packages;
 	}
 }
 
@@ -2092,7 +2725,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 retry:
 	t->tsc = rdtsc();	/* we are running on local CPU of interest */
 
-	if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) {
+	if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
+	    || soft_c1_residency_display(BIC_Avg_MHz)) {
 		unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
 
 		/*
@@ -2158,7 +2792,7 @@ retry:
 			return -5;
 		t->smi_count = msr & 0xFFFFFFFF;
 	}
-	if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
+	if (DO_BIC(BIC_CPU_c1) && platform->has_msr_core_c1_res) {
 		if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
 			return -6;
 	}
@@ -2169,7 +2803,7 @@ retry:
 	}
 
 	/* collect core counters only for 1st thread in core */
-	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+	if (!is_cpu_first_thread_in_core(t, c, p))
 		goto done;
 
 	if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
@@ -2177,10 +2811,10 @@ retry:
 			return -6;
 	}
 
-	if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !do_knl_cstates) {
+	if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !platform->has_msr_knl_core_c6_residency) {
 		if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
 			return -7;
-	} else if (do_knl_cstates || soft_c1_residency_display(BIC_CPU_c6)) {
+	} else if (platform->has_msr_knl_core_c6_residency && soft_c1_residency_display(BIC_CPU_c6)) {
 		if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
 			return -7;
 	}
@@ -2212,7 +2846,7 @@ retry:
 	if (DO_BIC(BIC_CORE_THROT_CNT))
 		get_core_throt_cnt(cpu, &c->core_throt_cnt);
 
-	if (do_rapl & RAPL_AMD_F17H) {
+	if (platform->rapl_msrs & RAPL_AMD_F17H) {
 		if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
 			return -14;
 		c->core_energy = msr & 0xFFFFFFFF;
@@ -2224,7 +2858,7 @@ retry:
 	}
 
 	/* collect package counters only for 1st core in package */
-	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+	if (!is_cpu_first_core_in_package(t, c, p))
 		goto done;
 
 	if (DO_BIC(BIC_Totl_c0)) {
@@ -2247,7 +2881,7 @@ retry:
 		if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
 			return -9;
 	if (DO_BIC(BIC_Pkgpc6)) {
-		if (do_slm_cstates) {
+		if (platform->has_msr_atom_pkg_c6_residency) {
 			if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
 				return -10;
 		} else {
@@ -2277,37 +2911,37 @@ retry:
 	if (DO_BIC(BIC_SYS_LPI))
 		p->sys_lpi = cpuidle_cur_sys_lpi_us;
 
-	if (do_rapl & RAPL_PKG) {
+	if (platform->rapl_msrs & RAPL_PKG) {
 		if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
 			return -13;
 		p->energy_pkg = msr;
 	}
-	if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
+	if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) {
 		if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
 			return -14;
 		p->energy_cores = msr;
 	}
-	if (do_rapl & RAPL_DRAM) {
+	if (platform->rapl_msrs & RAPL_DRAM) {
 		if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
 			return -15;
 		p->energy_dram = msr;
 	}
-	if (do_rapl & RAPL_GFX) {
+	if (platform->rapl_msrs & RAPL_GFX) {
 		if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
 			return -16;
 		p->energy_gfx = msr;
 	}
-	if (do_rapl & RAPL_PKG_PERF_STATUS) {
+	if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS) {
 		if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
 			return -16;
 		p->rapl_pkg_perf_status = msr;
 	}
-	if (do_rapl & RAPL_DRAM_PERF_STATUS) {
+	if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS) {
 		if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
 			return -16;
 		p->rapl_dram_perf_status = msr;
 	}
-	if (do_rapl & RAPL_AMD_F17H) {
+	if (platform->rapl_msrs & RAPL_AMD_F17H) {
 		if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
 			return -13;
 		p->energy_pkg = msr;
@@ -2414,18 +3048,58 @@ int icx_pkg_cstate_limits[16] =
 	PCLRSV, PCLRSV
 };
 
-static void calculate_tsc_tweak()
+void probe_cst_limit(void)
 {
-	tsc_tweak = base_hz / tsc_hz;
-}
+	unsigned long long msr;
+	int *pkg_cstate_limits;
+
+	if (!platform->has_nhm_msrs)
+		return;
+
+	switch (platform->cst_limit) {
+	case CST_LIMIT_NHM:
+		pkg_cstate_limits = nhm_pkg_cstate_limits;
+		break;
+	case CST_LIMIT_SNB:
+		pkg_cstate_limits = snb_pkg_cstate_limits;
+		break;
+	case CST_LIMIT_HSW:
+		pkg_cstate_limits = hsw_pkg_cstate_limits;
+		break;
+	case CST_LIMIT_SKX:
+		pkg_cstate_limits = skx_pkg_cstate_limits;
+		break;
+	case CST_LIMIT_ICX:
+		pkg_cstate_limits = icx_pkg_cstate_limits;
+		break;
+	case CST_LIMIT_SLV:
+		pkg_cstate_limits = slv_pkg_cstate_limits;
+		break;
+	case CST_LIMIT_AMT:
+		pkg_cstate_limits = amt_pkg_cstate_limits;
+		break;
+	case CST_LIMIT_KNL:
+		pkg_cstate_limits = phi_pkg_cstate_limits;
+		break;
+	case CST_LIMIT_GMT:
+		pkg_cstate_limits = glm_pkg_cstate_limits;
+		break;
+	default:
+		return;
+	}
 
-void prewake_cstate_probe(unsigned int family, unsigned int model);
+	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
+	pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
+}
 
-static void dump_nhm_platform_info(void)
+static void dump_platform_info(void)
 {
 	unsigned long long msr;
 	unsigned int ratio;
 
+	if (!platform->has_nhm_msrs)
+		return;
+
 	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
 
 	fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
@@ -2435,19 +3109,27 @@ static void dump_nhm_platform_info(void)
 
 	ratio = (msr >> 8) & 0xFF;
 	fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
+}
+
+static void dump_power_ctl(void)
+{
+	unsigned long long msr;
+
+	if (!platform->has_nhm_msrs)
+		return;
 
 	get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
 	fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
 		base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
 
 	/* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */
-	if (dis_cstate_prewake)
+	if (platform->has_cst_prewake_bit)
 		fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN");
 
 	return;
 }
 
-static void dump_hsw_turbo_ratio_limits(void)
+static void dump_turbo_ratio_limit2(void)
 {
 	unsigned long long msr;
 	unsigned int ratio;
@@ -2466,7 +3148,7 @@ static void dump_hsw_turbo_ratio_limits(void)
 	return;
 }
 
-static void dump_ivt_turbo_ratio_limits(void)
+static void dump_turbo_ratio_limit1(void)
 {
 	unsigned long long msr;
 	unsigned int ratio;
@@ -2509,29 +3191,7 @@ static void dump_ivt_turbo_ratio_limits(void)
 	return;
 }
 
-int has_turbo_ratio_group_limits(int family, int model)
-{
-
-	if (!genuine_intel)
-		return 0;
-
-	if (family != 6)
-		return 0;
-
-	switch (model) {
-	case INTEL_FAM6_ATOM_GOLDMONT:
-	case INTEL_FAM6_SKYLAKE_X:
-	case INTEL_FAM6_ICELAKE_X:
-	case INTEL_FAM6_SAPPHIRERAPIDS_X:
-	case INTEL_FAM6_ATOM_GOLDMONT_D:
-	case INTEL_FAM6_ATOM_TREMONT_D:
-		return 1;
-	default:
-		return 0;
-	}
-}
-
-static void dump_turbo_ratio_limits(int trl_msr_offset, int family, int model)
+static void dump_turbo_ratio_limits(int trl_msr_offset)
 {
 	unsigned long long msr, core_counts;
 	int shift;
@@ -2540,7 +3200,7 @@ static void dump_turbo_ratio_limits(int trl_msr_offset, int family, int model)
 	fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n",
 		base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY_" : "", msr);
 
-	if (has_turbo_ratio_group_limits(family, model)) {
+	if (platform->trl_msrs & TRL_CORECOUNT) {
 		get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
 		fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
 	} else {
@@ -2657,10 +3317,13 @@ static void dump_knl_turbo_ratio_limits(void)
 				ratio[i], bclk, ratio[i] * bclk, cores[i]);
 }
 
-static void dump_nhm_cst_cfg(void)
+static void dump_cst_cfg(void)
 {
 	unsigned long long msr;
 
+	if (!platform->has_nhm_msrs)
+		return;
+
 	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
 
 	fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
@@ -2673,7 +3336,7 @@ static void dump_nhm_cst_cfg(void)
 		(msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]);
 
 #define AUTOMATIC_CSTATE_CONVERSION		(1UL << 16)
-	if (has_automatic_cstate_conversion) {
+	if (platform->has_cst_auto_convension) {
 		fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
 	}
 
@@ -2730,39 +3393,50 @@ void print_irtl(void)
 {
 	unsigned long long msr;
 
-	get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
-	fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
-	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
-		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
-
-	get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
-	fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
-	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
-		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+	if (!platform->has_irtl_msrs)
+		return;
 
-	get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
-	fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
-	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
-		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+	if (platform->supported_cstates & PC3) {
+		get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
+		fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
+		fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+			(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+	}
 
-	if (!do_irtl_hsw)
-		return;
+	if (platform->supported_cstates & PC6) {
+		get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
+		fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
+		fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+			(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+	}
 
-	get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
-	fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
-	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
-		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+	if (platform->supported_cstates & PC7) {
+		get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
+		fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
+		fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+			(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+	}
 
-	get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
-	fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
-	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
-		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+	if (platform->supported_cstates & PC8) {
+		get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
+		fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
+		fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+			(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+	}
 
-	get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
-	fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
-	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
-		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+	if (platform->supported_cstates & PC9) {
+		get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
+		fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
+		fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+			(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+	}
 
+	if (platform->supported_cstates & PC10) {
+		get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
+		fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
+		fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+			(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+	}
 }
 
 void free_fd_percpu(void)
@@ -2785,6 +3459,14 @@ void free_all_buffers(void)
 	cpu_present_set = NULL;
 	cpu_present_setsize = 0;
 
+	CPU_FREE(cpu_effective_set);
+	cpu_effective_set = NULL;
+	cpu_effective_setsize = 0;
+
+	CPU_FREE(cpu_allowed_set);
+	cpu_allowed_set = NULL;
+	cpu_allowed_setsize = 0;
+
 	CPU_FREE(cpu_affinity_set);
 	cpu_affinity_set = NULL;
 	cpu_affinity_setsize = 0;
@@ -2927,6 +3609,59 @@ int get_physical_node_id(struct cpu_topology *thiscpu)
 	return -1;
 }
 
+static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size)
+{
+	unsigned int start, end;
+	char *next = cpu_str;
+
+	while (next && *next) {
+
+		if (*next == '-')	/* no negative cpu numbers */
+			return 1;
+
+		start = strtoul(next, &next, 10);
+
+		if (start >= CPU_SUBSET_MAXCPUS)
+			return 1;
+		CPU_SET_S(start, cpu_set_size, cpu_set);
+
+		if (*next == '\0' || *next == '\n')
+			break;
+
+		if (*next == ',') {
+			next += 1;
+			continue;
+		}
+
+		if (*next == '-') {
+			next += 1;	/* start range */
+		} else if (*next == '.') {
+			next += 1;
+			if (*next == '.')
+				next += 1;	/* start range */
+			else
+				return 1;
+		}
+
+		end = strtoul(next, &next, 10);
+		if (end <= start)
+			return 1;
+
+		while (++start <= end) {
+			if (start >= CPU_SUBSET_MAXCPUS)
+				return 1;
+			CPU_SET_S(start, cpu_set_size, cpu_set);
+		}
+
+		if (*next == ',')
+			next += 1;
+		else if (*next != '\0' && *next != '\n')
+			return 1;
+	}
+
+	return 0;
+}
+
 int get_thread_siblings(struct cpu_topology *thiscpu)
 {
 	char path[80], character;
@@ -2998,7 +3733,7 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
 
 					t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
 
-					if (cpu_is_not_present(t->cpu_id))
+					if (cpu_is_not_allowed(t->cpu_id))
 						continue;
 
 					t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no);
@@ -3050,11 +3785,51 @@ int for_all_proc_cpus(int (func) (int))
 	return 0;
 }
 
+#define PATH_EFFECTIVE_CPUS	"/sys/fs/cgroup/cpuset.cpus.effective"
+
+static char cpu_effective_str[1024];
+
+static int update_effective_str(bool startup)
+{
+	FILE *fp;
+	char *pos;
+	char buf[1024];
+	int ret;
+
+	if (cpu_effective_str[0] == '\0' && !startup)
+		return 0;
+
+	fp = fopen(PATH_EFFECTIVE_CPUS, "r");
+	if (!fp)
+		return 0;
+
+	pos = fgets(buf, 1024, fp);
+	if (!pos)
+		err(1, "%s: file read failed\n", PATH_EFFECTIVE_CPUS);
+
+	fclose(fp);
+
+	ret = strncmp(cpu_effective_str, buf, 1024);
+	if (!ret)
+		return 0;
+
+	strncpy(cpu_effective_str, buf, 1024);
+	return 1;
+}
+
+static void update_effective_set(bool startup)
+{
+	update_effective_str(startup);
+
+	if (parse_cpu_str(cpu_effective_str, cpu_effective_set, cpu_effective_setsize))
+		err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str);
+}
+
 void re_initialize(void)
 {
 	free_all_buffers();
-	setup_all_buffers();
-	fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
+	setup_all_buffers(false);
+	fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus);
 }
 
 void set_max_cpu_num(void)
@@ -3191,8 +3966,8 @@ int snapshot_gfx_rc6_ms(void)
 /*
  * snapshot_gfx_mhz()
  *
- * record snapshot of
- * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
+ * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
+ * when /sys/class/drm/card0/gt_cur_freq_mhz is not available.
  *
  * return 1 if config change requires a restart, else return 0
  */
@@ -3201,9 +3976,11 @@ int snapshot_gfx_mhz(void)
 	static FILE *fp;
 	int retval;
 
-	if (fp == NULL)
-		fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
-	else {
+	if (fp == NULL) {
+		fp = fopen("/sys/class/drm/card0/gt_cur_freq_mhz", "r");
+		if (!fp)
+			fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
+	} else {
 		rewind(fp);
 		fflush(fp);
 	}
@@ -3218,8 +3995,8 @@ int snapshot_gfx_mhz(void)
 /*
  * snapshot_gfx_cur_mhz()
  *
- * record snapshot of
- * /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
+ * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
+ * when /sys/class/drm/card0/gt_act_freq_mhz is not available.
  *
  * return 1 if config change requires a restart, else return 0
  */
@@ -3228,9 +4005,11 @@ int snapshot_gfx_act_mhz(void)
 	static FILE *fp;
 	int retval;
 
-	if (fp == NULL)
-		fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
-	else {
+	if (fp == NULL) {
+		fp = fopen("/sys/class/drm/card0/gt_act_freq_mhz", "r");
+		if (!fp)
+			fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
+	} else {
 		rewind(fp);
 		fflush(fp);
 	}
@@ -3562,6 +4341,10 @@ restart:
 			re_initialize();
 			goto restart;
 		}
+		if (update_effective_str(false)) {
+			re_initialize();
+			goto restart;
+		}
 		do_sleep();
 		if (snapshot_proc_sysfs_files())
 			goto restart;
@@ -3674,395 +4457,31 @@ void check_permissions(void)
 		exit(-6);
 }
 
-/*
- * NHM adds support for additional MSRs:
- *
- * MSR_SMI_COUNT                   0x00000034
- *
- * MSR_PLATFORM_INFO               0x000000ce
- * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
- *
- * MSR_MISC_PWR_MGMT               0x000001aa
- *
- * MSR_PKG_C3_RESIDENCY            0x000003f8
- * MSR_PKG_C6_RESIDENCY            0x000003f9
- * MSR_CORE_C3_RESIDENCY           0x000003fc
- * MSR_CORE_C6_RESIDENCY           0x000003fd
- *
- * Side effect:
- * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
- * sets has_misc_feature_control
- */
-int probe_nhm_msrs(unsigned int family, unsigned int model)
+void probe_bclk(void)
 {
 	unsigned long long msr;
 	unsigned int base_ratio;
-	int *pkg_cstate_limits;
 
-	if (!genuine_intel)
-		return 0;
-
-	if (family != 6)
-		return 0;
-
-	bclk = discover_bclk(family, model);
+	if (!platform->has_nhm_msrs)
+		return;
 
-	switch (model) {
-	case INTEL_FAM6_NEHALEM:	/* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
-	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
-		pkg_cstate_limits = nhm_pkg_cstate_limits;
-		break;
-	case INTEL_FAM6_SANDYBRIDGE:	/* SNB */
-	case INTEL_FAM6_SANDYBRIDGE_X:	/* SNB Xeon */
-	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
-	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
-		pkg_cstate_limits = snb_pkg_cstate_limits;
-		has_misc_feature_control = 1;
-		break;
-	case INTEL_FAM6_HASWELL:	/* HSW */
-	case INTEL_FAM6_HASWELL_G:	/* HSW */
-	case INTEL_FAM6_HASWELL_X:	/* HSX */
-	case INTEL_FAM6_HASWELL_L:	/* HSW */
-	case INTEL_FAM6_BROADWELL:	/* BDW */
-	case INTEL_FAM6_BROADWELL_G:	/* BDW */
-	case INTEL_FAM6_BROADWELL_X:	/* BDX */
-	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
-	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
-		pkg_cstate_limits = hsw_pkg_cstate_limits;
-		has_misc_feature_control = 1;
-		break;
-	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
-	case INTEL_FAM6_SAPPHIRERAPIDS_X:	/* SPR */
-		pkg_cstate_limits = skx_pkg_cstate_limits;
-		has_misc_feature_control = 1;
-		break;
-	case INTEL_FAM6_ICELAKE_X:	/* ICX */
-		pkg_cstate_limits = icx_pkg_cstate_limits;
-		has_misc_feature_control = 1;
-		break;
-	case INTEL_FAM6_ATOM_SILVERMONT:	/* BYT */
-		no_MSR_MISC_PWR_MGMT = 1;
-		/* FALLTHRU */
-	case INTEL_FAM6_ATOM_SILVERMONT_D:	/* AVN */
-		pkg_cstate_limits = slv_pkg_cstate_limits;
-		break;
-	case INTEL_FAM6_ATOM_AIRMONT:	/* AMT */
-		pkg_cstate_limits = amt_pkg_cstate_limits;
-		no_MSR_MISC_PWR_MGMT = 1;
-		break;
-	case INTEL_FAM6_XEON_PHI_KNL:	/* PHI */
-		pkg_cstate_limits = phi_pkg_cstate_limits;
-		break;
-	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
-	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
-	case INTEL_FAM6_ATOM_GOLDMONT_D:	/* DNV */
-	case INTEL_FAM6_ATOM_TREMONT:	/* EHL */
-	case INTEL_FAM6_ATOM_TREMONT_D:	/* JVL */
-		pkg_cstate_limits = glm_pkg_cstate_limits;
-		break;
-	default:
-		return 0;
-	}
-	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
-	pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
+	if (platform->bclk_freq == BCLK_100MHZ)
+		bclk = 100.00;
+	else if (platform->bclk_freq == BCLK_133MHZ)
+		bclk = 133.33;
+	else if (platform->bclk_freq == BCLK_SLV)
+		bclk = slm_bclk();
+	else
+		return;
 
 	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
 	base_ratio = (msr >> 8) & 0xFF;
 
 	base_hz = base_ratio * bclk * 1000000;
 	has_base_hz = 1;
-	return 1;
-}
-
-/*
- * SLV client has support for unique MSRs:
- *
- * MSR_CC6_DEMOTION_POLICY_CONFIG
- * MSR_MC6_DEMOTION_POLICY_CONFIG
- */
-
-int has_slv_msrs(unsigned int family, unsigned int model)
-{
-	if (!genuine_intel)
-		return 0;
-
-	if (family != 6)
-		return 0;
-
-	switch (model) {
-	case INTEL_FAM6_ATOM_SILVERMONT:
-	case INTEL_FAM6_ATOM_SILVERMONT_MID:
-	case INTEL_FAM6_ATOM_AIRMONT_MID:
-		return 1;
-	}
-	return 0;
-}
-
-int is_dnv(unsigned int family, unsigned int model)
-{
-
-	if (!genuine_intel)
-		return 0;
-
-	if (family != 6)
-		return 0;
-
-	switch (model) {
-	case INTEL_FAM6_ATOM_GOLDMONT_D:
-		return 1;
-	}
-	return 0;
-}
-
-int is_bdx(unsigned int family, unsigned int model)
-{
-
-	if (!genuine_intel)
-		return 0;
-
-	if (family != 6)
-		return 0;
-
-	switch (model) {
-	case INTEL_FAM6_BROADWELL_X:
-		return 1;
-	}
-	return 0;
-}
-
-int is_skx(unsigned int family, unsigned int model)
-{
-
-	if (!genuine_intel)
-		return 0;
-
-	if (family != 6)
-		return 0;
-
-	switch (model) {
-	case INTEL_FAM6_SKYLAKE_X:
-		return 1;
-	}
-	return 0;
-}
-
-int is_icx(unsigned int family, unsigned int model)
-{
-
-	if (!genuine_intel)
-		return 0;
-
-	if (family != 6)
-		return 0;
-
-	switch (model) {
-	case INTEL_FAM6_ICELAKE_X:
-		return 1;
-	}
-	return 0;
-}
-
-int is_spr(unsigned int family, unsigned int model)
-{
-
-	if (!genuine_intel)
-		return 0;
-
-	if (family != 6)
-		return 0;
-
-	switch (model) {
-	case INTEL_FAM6_SAPPHIRERAPIDS_X:
-		return 1;
-	}
-	return 0;
-}
-
-int is_ehl(unsigned int family, unsigned int model)
-{
-	if (!genuine_intel)
-		return 0;
-
-	if (family != 6)
-		return 0;
-
-	switch (model) {
-	case INTEL_FAM6_ATOM_TREMONT:
-		return 1;
-	}
-	return 0;
-}
-
-int is_jvl(unsigned int family, unsigned int model)
-{
-	if (!genuine_intel)
-		return 0;
-
-	if (family != 6)
-		return 0;
-
-	switch (model) {
-	case INTEL_FAM6_ATOM_TREMONT_D:
-		return 1;
-	}
-	return 0;
-}
-
-int has_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
-	if (has_slv_msrs(family, model))
-		return 0;
-
-	if (family != 6)
-		return 0;
-
-	switch (model) {
-		/* Nehalem compatible, but do not include turbo-ratio limit support */
-	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
-	case INTEL_FAM6_XEON_PHI_KNL:	/* PHI - Knights Landing (different MSR definition) */
-		return 0;
-	default:
-		return 1;
-	}
-}
-
-int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
-	if (has_slv_msrs(family, model))
-		return 1;
-
-	return 0;
-}
-
-int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
-	if (!genuine_intel)
-		return 0;
 
-	if (family != 6)
-		return 0;
-
-	switch (model) {
-	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
-	case INTEL_FAM6_HASWELL_X:	/* HSW Xeon */
-		return 1;
-	default:
-		return 0;
-	}
-}
-
-int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
-	if (!genuine_intel)
-		return 0;
-
-	if (family != 6)
-		return 0;
-
-	switch (model) {
-	case INTEL_FAM6_HASWELL_X:	/* HSW Xeon */
-		return 1;
-	default:
-		return 0;
-	}
-}
-
-int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
-	if (!genuine_intel)
-		return 0;
-
-	if (family != 6)
-		return 0;
-
-	switch (model) {
-	case INTEL_FAM6_XEON_PHI_KNL:	/* Knights Landing */
-		return 1;
-	default:
-		return 0;
-	}
-}
-
-int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
-	if (!genuine_intel)
-		return 0;
-
-	if (family != 6)
-		return 0;
-
-	switch (model) {
-	case INTEL_FAM6_ATOM_GOLDMONT:
-	case INTEL_FAM6_SKYLAKE_X:
-	case INTEL_FAM6_ICELAKE_X:
-	case INTEL_FAM6_SAPPHIRERAPIDS_X:
-		return 1;
-	default:
-		return 0;
-	}
-}
-
-int has_config_tdp(unsigned int family, unsigned int model)
-{
-	if (!genuine_intel)
-		return 0;
-
-	if (family != 6)
-		return 0;
-
-	switch (model) {
-	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
-	case INTEL_FAM6_HASWELL:	/* HSW */
-	case INTEL_FAM6_HASWELL_X:	/* HSX */
-	case INTEL_FAM6_HASWELL_L:	/* HSW */
-	case INTEL_FAM6_HASWELL_G:	/* HSW */
-	case INTEL_FAM6_BROADWELL:	/* BDW */
-	case INTEL_FAM6_BROADWELL_G:	/* BDW */
-	case INTEL_FAM6_BROADWELL_X:	/* BDX */
-	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
-	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
-	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
-	case INTEL_FAM6_ICELAKE_X:	/* ICX */
-	case INTEL_FAM6_SAPPHIRERAPIDS_X:	/* SPR */
-	case INTEL_FAM6_XEON_PHI_KNL:	/* Knights Landing */
-		return 1;
-	default:
-		return 0;
-	}
-}
-
-/*
- * tcc_offset_bits:
- * 0: Tcc Offset not supported (Default)
- * 6: Bit 29:24 of MSR_PLATFORM_INFO
- * 4: Bit 27:24 of MSR_PLATFORM_INFO
- */
-void check_tcc_offset(int model)
-{
-	unsigned long long msr;
-
-	if (!genuine_intel)
-		return;
-
-	switch (model) {
-	case INTEL_FAM6_SKYLAKE_L:
-	case INTEL_FAM6_SKYLAKE:
-	case INTEL_FAM6_KABYLAKE_L:
-	case INTEL_FAM6_KABYLAKE:
-	case INTEL_FAM6_ICELAKE_L:
-	case INTEL_FAM6_ICELAKE:
-	case INTEL_FAM6_TIGERLAKE_L:
-	case INTEL_FAM6_TIGERLAKE:
-	case INTEL_FAM6_COMETLAKE:
-		if (!get_msr(base_cpu, MSR_PLATFORM_INFO, &msr)) {
-			msr = (msr >> 30) & 1;
-			if (msr)
-				tcc_offset_bits = 6;
-		}
-		return;
-	default:
-		return;
-	}
+	if (platform->enable_tsc_tweak)
+		tsc_tweak = base_hz / tsc_hz;
 }
 
 static void remove_underbar(char *s)
@@ -4078,44 +4497,37 @@ static void remove_underbar(char *s)
 	*to = 0;
 }
 
-static void dump_turbo_ratio_info(unsigned int family, unsigned int model)
+static void dump_turbo_ratio_info(void)
 {
 	if (!has_turbo)
 		return;
 
-	if (has_hsw_turbo_ratio_limit(family, model))
-		dump_hsw_turbo_ratio_limits();
+	if (!platform->has_nhm_msrs)
+		return;
+
+	if (platform->trl_msrs & TRL_LIMIT2)
+		dump_turbo_ratio_limit2();
 
-	if (has_ivt_turbo_ratio_limit(family, model))
-		dump_ivt_turbo_ratio_limits();
+	if (platform->trl_msrs & TRL_LIMIT1)
+		dump_turbo_ratio_limit1();
 
-	if (has_turbo_ratio_limit(family, model)) {
-		dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT, family, model);
+	if (platform->trl_msrs & TRL_BASE) {
+		dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT);
 
 		if (is_hybrid)
-			dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT, family, model);
+			dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT);
 	}
 
-	if (has_atom_turbo_ratio_limit(family, model))
+	if (platform->trl_msrs & TRL_ATOM)
 		dump_atom_turbo_ratio_limits();
 
-	if (has_knl_turbo_ratio_limit(family, model))
+	if (platform->trl_msrs & TRL_KNL)
 		dump_knl_turbo_ratio_limits();
 
-	if (has_config_tdp(family, model))
+	if (platform->has_config_tdp)
 		dump_config_tdp();
 }
 
-static void dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
-{
-	if (!do_nhm_platform_info)
-		return;
-
-	dump_nhm_platform_info();
-	dump_turbo_ratio_info(family, model);
-	dump_nhm_cst_cfg();
-}
-
 static int read_sysfs_int(char *path)
 {
 	FILE *input;
@@ -4152,7 +4564,7 @@ static void dump_sysfs_file(char *path)
 	fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
 }
 
-static void intel_uncore_frequency_probe(void)
+static void probe_intel_uncore_frequency(void)
 {
 	int i, j;
 	char path[128];
@@ -4163,6 +4575,10 @@ static void intel_uncore_frequency_probe(void)
 	if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK))
 		return;
 
+	/* Cluster level sysfs not supported yet. */
+	if (!access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK))
+		return;
+
 	if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
 		BIC_PRESENT(BIC_UNCORE_MHZ);
 
@@ -4194,6 +4610,20 @@ static void intel_uncore_frequency_probe(void)
 	}
 }
 
+static void probe_graphics(void)
+{
+	if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
+		BIC_PRESENT(BIC_GFX_rc6);
+
+	if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK) ||
+	    !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
+		BIC_PRESENT(BIC_GFXMHz);
+
+	if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK) ||
+	    !access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
+		BIC_PRESENT(BIC_GFXACTMHz);
+}
+
 static void dump_sysfs_cstate_config(void)
 {
 	char path[64];
@@ -4310,7 +4740,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	cpu = t->cpu_id;
 
 	/* EPB is per-package */
-	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+	if (!is_cpu_first_thread_in_package(t, c, p))
 		return 0;
 
 	if (cpu_migrate(cpu)) {
@@ -4359,7 +4789,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	cpu = t->cpu_id;
 
 	/* MSR_HWP_CAPABILITIES is per-package */
-	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+	if (!is_cpu_first_thread_in_package(t, c, p))
 		return 0;
 
 	if (cpu_migrate(cpu)) {
@@ -4442,7 +4872,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
 	cpu = t->cpu_id;
 
 	/* per-package */
-	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+	if (!is_cpu_first_thread_in_package(t, c, p))
 		return 0;
 
 	if (cpu_migrate(cpu)) {
@@ -4450,7 +4880,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
 		return -1;
 	}
 
-	if (do_core_perf_limit_reasons) {
+	if (platform->plr_msrs & PLR_CORE) {
 		get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
 		fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
 		fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
@@ -4483,7 +4913,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
 			(msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : "");
 
 	}
-	if (do_gfx_perf_limit_reasons) {
+	if (platform->plr_msrs & PLR_GFX) {
 		get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
 		fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
 		fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
@@ -4503,7 +4933,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
 			(msr & 1 << 25) ? "GFXPwr, " : "",
 			(msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
 	}
-	if (do_ring_perf_limit_reasons) {
+	if (platform->plr_msrs & PLR_RING) {
 		get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
 		fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
 		fprintf(outf, " (Active: %s%s%s%s%s%s)",
@@ -4525,208 +4955,74 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
 #define	RAPL_POWER_GRANULARITY	0x7FFF	/* 15 bit power granularity */
 #define	RAPL_TIME_GRANULARITY	0x3F	/* 6 bit time granularity */
 
-double get_tdp_intel(unsigned int model)
+double get_quirk_tdp(void)
 {
-	unsigned long long msr;
+	if (platform->rapl_quirk_tdp)
+		return platform->rapl_quirk_tdp;
 
-	if (do_rapl & RAPL_PKG_POWER_INFO)
-		if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
-			return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
-
-	switch (model) {
-	case INTEL_FAM6_ATOM_SILVERMONT:
-	case INTEL_FAM6_ATOM_SILVERMONT_D:
-		return 30.0;
-	default:
-		return 135.0;
-	}
+	return 135.0;
 }
 
-double get_tdp_amd(unsigned int family)
+double get_tdp_intel(void)
 {
-	UNUSED(family);
+	unsigned long long msr;
 
-	/* This is the max stock TDP of HEDT/Server Fam17h+ chips */
-	return 280.0;
+	if (platform->rapl_msrs & RAPL_PKG_POWER_INFO)
+		if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
+			return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
+	return get_quirk_tdp();
 }
 
-/*
- * rapl_dram_energy_units_probe()
- * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
- */
-static double rapl_dram_energy_units_probe(int model, double rapl_energy_units)
+double get_tdp_amd(void)
 {
-	/* only called for genuine_intel, family 6 */
-
-	switch (model) {
-	case INTEL_FAM6_HASWELL_X:	/* HSX */
-	case INTEL_FAM6_BROADWELL_X:	/* BDX */
-	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
-	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
-	case INTEL_FAM6_ICELAKE_X:	/* ICX */
-		return (rapl_dram_energy_units = 15.3 / 1000000);
-	default:
-		return (rapl_energy_units);
-	}
+	return get_quirk_tdp();
 }
 
-void rapl_probe_intel(unsigned int family, unsigned int model)
+void rapl_probe_intel(void)
 {
 	unsigned long long msr;
 	unsigned int time_unit;
 	double tdp;
 
-	if (family != 6)
-		return;
-
-	switch (model) {
-	case INTEL_FAM6_SANDYBRIDGE:
-	case INTEL_FAM6_IVYBRIDGE:
-	case INTEL_FAM6_HASWELL:	/* HSW */
-	case INTEL_FAM6_HASWELL_L:	/* HSW */
-	case INTEL_FAM6_HASWELL_G:	/* HSW */
-	case INTEL_FAM6_BROADWELL:	/* BDW */
-	case INTEL_FAM6_BROADWELL_G:	/* BDW */
-		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
-		if (rapl_joules) {
-			BIC_PRESENT(BIC_Pkg_J);
-			BIC_PRESENT(BIC_Cor_J);
-			BIC_PRESENT(BIC_GFX_J);
-		} else {
-			BIC_PRESENT(BIC_PkgWatt);
-			BIC_PRESENT(BIC_CorWatt);
-			BIC_PRESENT(BIC_GFXWatt);
-		}
-		break;
-	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
-	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
-		do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
-		if (rapl_joules)
-			BIC_PRESENT(BIC_Pkg_J);
-		else
-			BIC_PRESENT(BIC_PkgWatt);
-		break;
-	case INTEL_FAM6_ATOM_TREMONT:	/* EHL */
-		do_rapl =
-		    RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS
-		    | RAPL_GFX | RAPL_PKG_POWER_INFO;
-		if (rapl_joules) {
-			BIC_PRESENT(BIC_Pkg_J);
-			BIC_PRESENT(BIC_Cor_J);
-			BIC_PRESENT(BIC_RAM_J);
-			BIC_PRESENT(BIC_GFX_J);
-		} else {
-			BIC_PRESENT(BIC_PkgWatt);
-			BIC_PRESENT(BIC_CorWatt);
-			BIC_PRESENT(BIC_RAMWatt);
-			BIC_PRESENT(BIC_GFXWatt);
-		}
-		break;
-	case INTEL_FAM6_ATOM_TREMONT_D:	/* JVL */
-		do_rapl = RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
-		BIC_PRESENT(BIC_PKG__);
-		if (rapl_joules)
-			BIC_PRESENT(BIC_Pkg_J);
-		else
-			BIC_PRESENT(BIC_PkgWatt);
-		break;
-	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
-	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
-		do_rapl =
-		    RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS
-		    | RAPL_GFX | RAPL_PKG_POWER_INFO;
-		BIC_PRESENT(BIC_PKG__);
-		BIC_PRESENT(BIC_RAM__);
-		if (rapl_joules) {
+	if (rapl_joules) {
+		if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
 			BIC_PRESENT(BIC_Pkg_J);
+		if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
 			BIC_PRESENT(BIC_Cor_J);
+		if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
 			BIC_PRESENT(BIC_RAM_J);
+		if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
 			BIC_PRESENT(BIC_GFX_J);
-		} else {
+	} else {
+		if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
 			BIC_PRESENT(BIC_PkgWatt);
+		if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
 			BIC_PRESENT(BIC_CorWatt);
+		if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
 			BIC_PRESENT(BIC_RAMWatt);
+		if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
 			BIC_PRESENT(BIC_GFXWatt);
-		}
-		break;
-	case INTEL_FAM6_HASWELL_X:	/* HSX */
-	case INTEL_FAM6_BROADWELL_X:	/* BDX */
-	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
-	case INTEL_FAM6_ICELAKE_X:	/* ICX */
-	case INTEL_FAM6_SAPPHIRERAPIDS_X:	/* SPR */
-	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
-		do_rapl =
-		    RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
-		    RAPL_PKG_POWER_INFO;
-		BIC_PRESENT(BIC_PKG__);
-		BIC_PRESENT(BIC_RAM__);
-		if (rapl_joules) {
-			BIC_PRESENT(BIC_Pkg_J);
-			BIC_PRESENT(BIC_RAM_J);
-		} else {
-			BIC_PRESENT(BIC_PkgWatt);
-			BIC_PRESENT(BIC_RAMWatt);
-		}
-		break;
-	case INTEL_FAM6_SANDYBRIDGE_X:
-	case INTEL_FAM6_IVYBRIDGE_X:
-		do_rapl =
-		    RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS |
-		    RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
-		BIC_PRESENT(BIC_PKG__);
-		BIC_PRESENT(BIC_RAM__);
-		if (rapl_joules) {
-			BIC_PRESENT(BIC_Pkg_J);
-			BIC_PRESENT(BIC_Cor_J);
-			BIC_PRESENT(BIC_RAM_J);
-		} else {
-			BIC_PRESENT(BIC_PkgWatt);
-			BIC_PRESENT(BIC_CorWatt);
-			BIC_PRESENT(BIC_RAMWatt);
-		}
-		break;
-	case INTEL_FAM6_ATOM_SILVERMONT:	/* BYT */
-	case INTEL_FAM6_ATOM_SILVERMONT_D:	/* AVN */
-		do_rapl = RAPL_PKG | RAPL_CORES;
-		if (rapl_joules) {
-			BIC_PRESENT(BIC_Pkg_J);
-			BIC_PRESENT(BIC_Cor_J);
-		} else {
-			BIC_PRESENT(BIC_PkgWatt);
-			BIC_PRESENT(BIC_CorWatt);
-		}
-		break;
-	case INTEL_FAM6_ATOM_GOLDMONT_D:	/* DNV */
-		do_rapl =
-		    RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
-		    RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
+	}
+
+	if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS)
 		BIC_PRESENT(BIC_PKG__);
+	if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)
 		BIC_PRESENT(BIC_RAM__);
-		if (rapl_joules) {
-			BIC_PRESENT(BIC_Pkg_J);
-			BIC_PRESENT(BIC_Cor_J);
-			BIC_PRESENT(BIC_RAM_J);
-		} else {
-			BIC_PRESENT(BIC_PkgWatt);
-			BIC_PRESENT(BIC_CorWatt);
-			BIC_PRESENT(BIC_RAMWatt);
-		}
-		break;
-	default:
-		return;
-	}
 
 	/* units on package 0, verify later other packages match */
 	if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
 		return;
 
 	rapl_power_units = 1.0 / (1 << (msr & 0xF));
-	if (model == INTEL_FAM6_ATOM_SILVERMONT)
+	if (platform->has_rapl_divisor)
 		rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
 	else
 		rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
 
-	rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
+	if (platform->has_fixed_rapl_unit)
+		rapl_dram_energy_units = (15.3 / 1000000);
+	else
+		rapl_dram_energy_units = rapl_energy_units;
 
 	time_unit = msr >> 16 & 0xF;
 	if (time_unit == 0)
@@ -4734,32 +5030,18 @@ void rapl_probe_intel(unsigned int family, unsigned int model)
 
 	rapl_time_units = 1.0 / (1 << (time_unit));
 
-	tdp = get_tdp_intel(model);
+	tdp = get_tdp_intel();
 
 	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
 	if (!quiet)
 		fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
 }
 
-void rapl_probe_amd(unsigned int family, unsigned int model)
+void rapl_probe_amd(void)
 {
 	unsigned long long msr;
-	unsigned int eax, ebx, ecx, edx;
-	unsigned int has_rapl = 0;
 	double tdp;
 
-	UNUSED(model);
-
-	if (max_extended_level >= 0x80000007) {
-		__cpuid(0x80000007, eax, ebx, ecx, edx);
-		/* RAPL (Fam 17h+) */
-		has_rapl = edx & (1 << 14);
-	}
-
-	if (!has_rapl || family < 0x17)
-		return;
-
-	do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
 	if (rapl_joules) {
 		BIC_PRESENT(BIC_Pkg_J);
 		BIC_PRESENT(BIC_Cor_J);
@@ -4775,128 +5057,13 @@ void rapl_probe_amd(unsigned int family, unsigned int model)
 	rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
 	rapl_power_units = ldexp(1.0, -(msr & 0xf));
 
-	tdp = get_tdp_amd(family);
+	tdp = get_tdp_amd();
 
 	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
 	if (!quiet)
 		fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
 }
 
-/*
- * rapl_probe()
- *
- * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
- */
-void rapl_probe(unsigned int family, unsigned int model)
-{
-	if (genuine_intel)
-		rapl_probe_intel(family, model);
-	if (authentic_amd || hygon_genuine)
-		rapl_probe_amd(family, model);
-}
-
-void perf_limit_reasons_probe(unsigned int family, unsigned int model)
-{
-	if (!genuine_intel)
-		return;
-
-	if (family != 6)
-		return;
-
-	switch (model) {
-	case INTEL_FAM6_HASWELL:	/* HSW */
-	case INTEL_FAM6_HASWELL_L:	/* HSW */
-	case INTEL_FAM6_HASWELL_G:	/* HSW */
-		do_gfx_perf_limit_reasons = 1;
-		/* FALLTHRU */
-	case INTEL_FAM6_HASWELL_X:	/* HSX */
-		do_core_perf_limit_reasons = 1;
-		do_ring_perf_limit_reasons = 1;
-	default:
-		return;
-	}
-}
-
-void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
-{
-	if (family != 6)
-		return;
-
-	switch (model) {
-	case INTEL_FAM6_BROADWELL_X:
-	case INTEL_FAM6_SKYLAKE_X:
-		has_automatic_cstate_conversion = 1;
-	}
-}
-
-void prewake_cstate_probe(unsigned int family, unsigned int model)
-{
-	if (is_icx(family, model) || is_spr(family, model))
-		dis_cstate_prewake = 1;
-}
-
-int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
-{
-	unsigned long long msr;
-	unsigned int dts, dts2;
-	int cpu;
-
-	UNUSED(c);
-	UNUSED(p);
-
-	if (!(do_dts || do_ptm))
-		return 0;
-
-	cpu = t->cpu_id;
-
-	/* DTS is per-core, no need to print for each thread */
-	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
-		return 0;
-
-	if (cpu_migrate(cpu)) {
-		fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
-		return -1;
-	}
-
-	if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
-		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
-			return 0;
-
-		dts = (msr >> 16) & 0x7F;
-		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts);
-
-		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
-			return 0;
-
-		dts = (msr >> 16) & 0x7F;
-		dts2 = (msr >> 8) & 0x7F;
-		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
-			cpu, msr, tj_max - dts, tj_max - dts2);
-	}
-
-	if (do_dts && debug) {
-		unsigned int resolution;
-
-		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
-			return 0;
-
-		dts = (msr >> 16) & 0x7F;
-		resolution = (msr >> 27) & 0xF;
-		fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
-			cpu, msr, tj_max - dts, resolution);
-
-		if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
-			return 0;
-
-		dts = (msr >> 16) & 0x7F;
-		dts2 = (msr >> 8) & 0x7F;
-		fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
-			cpu, msr, tj_max - dts, tj_max - dts2);
-	}
-
-	return 0;
-}
-
 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
 {
 	fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n",
@@ -4918,11 +5085,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	UNUSED(c);
 	UNUSED(p);
 
-	if (!do_rapl)
+	if (!platform->rapl_msrs)
 		return 0;
 
 	/* RAPL counters are per package, so print only for 1st thread/package */
-	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+	if (!is_cpu_first_thread_in_package(t, c, p))
 		return 0;
 
 	cpu = t->cpu_id;
@@ -4931,7 +5098,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		return -1;
 	}
 
-	if (do_rapl & RAPL_AMD_F17H) {
+	if (platform->rapl_msrs & RAPL_AMD_F17H) {
 		msr_name = "MSR_RAPL_PWR_UNIT";
 		if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr))
 			return -1;
@@ -4944,7 +5111,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr,
 		rapl_power_units, rapl_energy_units, rapl_time_units);
 
-	if (do_rapl & RAPL_PKG_POWER_INFO) {
+	if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) {
 
 		if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
 			return -5;
@@ -4957,7 +5124,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
 
 	}
-	if (do_rapl & RAPL_PKG) {
+	if (platform->rapl_msrs & RAPL_PKG) {
 
 		if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
 			return -9;
@@ -4981,7 +5148,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 			cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN");
 	}
 
-	if (do_rapl & RAPL_DRAM_POWER_INFO) {
+	if (platform->rapl_msrs & RAPL_DRAM_POWER_INFO) {
 		if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
 			return -6;
 
@@ -4992,7 +5159,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
 			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
 	}
-	if (do_rapl & RAPL_DRAM) {
+	if (platform->rapl_msrs & RAPL_DRAM) {
 		if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
 			return -9;
 		fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
@@ -5000,20 +5167,20 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 
 		print_power_limit_msr(cpu, msr, "DRAM Limit");
 	}
-	if (do_rapl & RAPL_CORE_POLICY) {
+	if (platform->rapl_msrs & RAPL_CORE_POLICY) {
 		if (get_msr(cpu, MSR_PP0_POLICY, &msr))
 			return -7;
 
 		fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
 	}
-	if (do_rapl & RAPL_CORES_POWER_LIMIT) {
+	if (platform->rapl_msrs & RAPL_CORE_POWER_LIMIT) {
 		if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
 			return -9;
 		fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
 			cpu, msr, (msr >> 31) & 1 ? "" : "UN");
 		print_power_limit_msr(cpu, msr, "Cores Limit");
 	}
-	if (do_rapl & RAPL_GFX) {
+	if (platform->rapl_msrs & RAPL_GFX) {
 		if (get_msr(cpu, MSR_PP1_POLICY, &msr))
 			return -8;
 
@@ -5029,217 +5196,24 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 }
 
 /*
- * SNB adds support for additional MSRs:
+ * probe_rapl()
  *
- * MSR_PKG_C7_RESIDENCY            0x000003fa
- * MSR_CORE_C7_RESIDENCY           0x000003fe
- * MSR_PKG_C2_RESIDENCY            0x0000060d
+ * sets rapl_power_units, rapl_energy_units, rapl_time_units
  */
-
-int has_snb_msrs(unsigned int family, unsigned int model)
+void probe_rapl(void)
 {
-	if (!genuine_intel)
-		return 0;
-
-	if (family != 6)
-		return 0;
-
-	switch (model) {
-	case INTEL_FAM6_SANDYBRIDGE:
-	case INTEL_FAM6_SANDYBRIDGE_X:
-	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
-	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
-	case INTEL_FAM6_HASWELL:	/* HSW */
-	case INTEL_FAM6_HASWELL_X:	/* HSW */
-	case INTEL_FAM6_HASWELL_L:	/* HSW */
-	case INTEL_FAM6_HASWELL_G:	/* HSW */
-	case INTEL_FAM6_BROADWELL:	/* BDW */
-	case INTEL_FAM6_BROADWELL_G:	/* BDW */
-	case INTEL_FAM6_BROADWELL_X:	/* BDX */
-	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
-	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
-	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
-	case INTEL_FAM6_ICELAKE_X:	/* ICX */
-	case INTEL_FAM6_SAPPHIRERAPIDS_X:	/* SPR */
-	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
-	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
-	case INTEL_FAM6_ATOM_GOLDMONT_D:	/* DNV */
-	case INTEL_FAM6_ATOM_TREMONT:	/* EHL */
-	case INTEL_FAM6_ATOM_TREMONT_D:	/* JVL */
-		return 1;
-	}
-	return 0;
-}
-
-/*
- * HSW ULT added support for C8/C9/C10 MSRs:
- *
- * MSR_PKG_C8_RESIDENCY		0x00000630
- * MSR_PKG_C9_RESIDENCY		0x00000631
- * MSR_PKG_C10_RESIDENCY	0x00000632
- *
- * MSR_PKGC8_IRTL		0x00000633
- * MSR_PKGC9_IRTL		0x00000634
- * MSR_PKGC10_IRTL		0x00000635
- *
- */
-int has_c8910_msrs(unsigned int family, unsigned int model)
-{
-	if (!genuine_intel)
-		return 0;
-
-	if (family != 6)
-		return 0;
-
-	switch (model) {
-	case INTEL_FAM6_HASWELL_L:	/* HSW */
-	case INTEL_FAM6_BROADWELL:	/* BDW */
-	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
-	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
-	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
-	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
-	case INTEL_FAM6_ATOM_TREMONT:	/* EHL */
-		return 1;
-	}
-	return 0;
-}
-
-/*
- * SKL adds support for additional MSRS:
- *
- * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
- * MSR_PKG_ANY_CORE_C0_RES         0x00000659
- * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
- * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
- */
-int has_skl_msrs(unsigned int family, unsigned int model)
-{
-	if (!genuine_intel)
-		return 0;
-
-	if (family != 6)
-		return 0;
-
-	switch (model) {
-	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
-	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
-		return 1;
-	}
-	return 0;
-}
-
-int is_slm(unsigned int family, unsigned int model)
-{
-	if (!genuine_intel)
-		return 0;
-
-	if (family != 6)
-		return 0;
-
-	switch (model) {
-	case INTEL_FAM6_ATOM_SILVERMONT:	/* BYT */
-	case INTEL_FAM6_ATOM_SILVERMONT_D:	/* AVN */
-		return 1;
-	}
-	return 0;
-}
-
-int is_knl(unsigned int family, unsigned int model)
-{
-	if (!genuine_intel)
-		return 0;
-
-	if (family != 6)
-		return 0;
-
-	switch (model) {
-	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
-		return 1;
-	}
-	return 0;
-}
-
-int is_cnl(unsigned int family, unsigned int model)
-{
-	if (!genuine_intel)
-		return 0;
-
-	if (family != 6)
-		return 0;
-
-	switch (model) {
-	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
-		return 1;
-	}
-
-	return 0;
-}
-
-unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
-{
-	if (is_knl(family, model))
-		return 1024;
-	return 1;
-}
-
-#define SLM_BCLK_FREQS 5
-double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 };
-
-double slm_bclk(void)
-{
-	unsigned long long msr = 3;
-	unsigned int i;
-	double freq;
-
-	if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
-		fprintf(outf, "SLM BCLK: unknown\n");
-
-	i = msr & 0xf;
-	if (i >= SLM_BCLK_FREQS) {
-		fprintf(outf, "SLM BCLK[%d] invalid\n", i);
-		i = 3;
-	}
-	freq = slm_freq_table[i];
-
-	if (!quiet)
-		fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
-
-	return freq;
-}
-
-double discover_bclk(unsigned int family, unsigned int model)
-{
-	if (has_snb_msrs(family, model) || is_knl(family, model))
-		return 100.00;
-	else if (is_slm(family, model))
-		return slm_bclk();
-	else
-		return 133.33;
-}
-
-int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
-{
-	unsigned int eax, ebx, ecx, edx;
-
-	UNUSED(c);
-	UNUSED(p);
-
-	if (!genuine_intel)
-		return 0;
+	if (!platform->rapl_msrs)
+		return;
 
-	if (cpu_migrate(t->cpu_id)) {
-		fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id);
-		return -1;
-	}
+	if (genuine_intel)
+		rapl_probe_intel();
+	if (authentic_amd || hygon_genuine)
+		rapl_probe_amd();
 
-	if (max_level < 0x1a)
-		return 0;
+	if (quiet)
+		return;
 
-	__cpuid(0x1a, eax, ebx, ecx, edx);
-	eax = (eax >> 24) & 0xFF;
-	if (eax == 0x20)
-		t->is_atom = true;
-	return 0;
+	for_all_cpus(print_rapl, ODD_COUNTERS);
 }
 
 /*
@@ -5268,7 +5242,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
 		return 0;
 
 	/* this is a per-package concept */
-	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+	if (!is_cpu_first_thread_in_package(t, c, p))
 		return 0;
 
 	cpu = t->cpu_id;
@@ -5284,7 +5258,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
 	}
 
 	/* Temperature Target MSR is Nehalem and newer only */
-	if (!do_nhm_platform_info)
+	if (!platform->has_nhm_msrs)
 		goto guess;
 
 	if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
@@ -5293,20 +5267,18 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
 	tcc_default = (msr >> 16) & 0xFF;
 
 	if (!quiet) {
-		switch (tcc_offset_bits) {
-		case 4:
-			tcc_offset = (msr >> 24) & 0xF;
-			fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
-				cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
-			break;
-		case 6:
-			tcc_offset = (msr >> 24) & 0x3F;
+		int bits = platform->tcc_offset_bits;
+		unsigned long long enabled = 0;
+
+		if (bits && !get_msr(base_cpu, MSR_PLATFORM_INFO, &enabled))
+			enabled = (enabled >> 30) & 1;
+
+		if (bits && enabled) {
+			tcc_offset = (msr >> 24) & GENMASK(bits - 1, 0);
 			fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
 				cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
-			break;
-		default:
+		} else {
 			fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default);
-			break;
 		}
 	}
 
@@ -5324,6 +5296,108 @@ guess:
 	return 0;
 }
 
+int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+	unsigned long long msr;
+	unsigned int dts, dts2;
+	int cpu;
+
+	UNUSED(c);
+	UNUSED(p);
+
+	if (!(do_dts || do_ptm))
+		return 0;
+
+	cpu = t->cpu_id;
+
+	/* DTS is per-core, no need to print for each thread */
+	if (!is_cpu_first_thread_in_core(t, c, p))
+		return 0;
+
+	if (cpu_migrate(cpu)) {
+		fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
+		return -1;
+	}
+
+	if (do_ptm && is_cpu_first_core_in_package(t, c, p)) {
+		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
+			return 0;
+
+		dts = (msr >> 16) & 0x7F;
+		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts);
+
+		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
+			return 0;
+
+		dts = (msr >> 16) & 0x7F;
+		dts2 = (msr >> 8) & 0x7F;
+		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
+			cpu, msr, tj_max - dts, tj_max - dts2);
+	}
+
+	if (do_dts && debug) {
+		unsigned int resolution;
+
+		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
+			return 0;
+
+		dts = (msr >> 16) & 0x7F;
+		resolution = (msr >> 27) & 0xF;
+		fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
+			cpu, msr, tj_max - dts, resolution);
+
+		if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
+			return 0;
+
+		dts = (msr >> 16) & 0x7F;
+		dts2 = (msr >> 8) & 0x7F;
+		fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
+			cpu, msr, tj_max - dts, tj_max - dts2);
+	}
+
+	return 0;
+}
+
+void probe_thermal(void)
+{
+	if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK))
+		BIC_PRESENT(BIC_CORE_THROT_CNT);
+	else
+		BIC_NOT_PRESENT(BIC_CORE_THROT_CNT);
+
+	for_all_cpus(set_temperature_target, ODD_COUNTERS);
+
+	if (quiet)
+		return;
+
+	for_all_cpus(print_thermal, ODD_COUNTERS);
+}
+
+int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	UNUSED(c);
+	UNUSED(p);
+
+	if (!genuine_intel)
+		return 0;
+
+	if (cpu_migrate(t->cpu_id)) {
+		fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id);
+		return -1;
+	}
+
+	if (max_level < 0x1a)
+		return 0;
+
+	__cpuid(0x1a, eax, ebx, ecx, edx);
+	eax = (eax >> 24) & 0xFF;
+	if (eax == 0x20)
+		t->is_atom = true;
+	return 0;
+}
+
 void decode_feature_control_msr(void)
 {
 	unsigned long long msr;
@@ -5354,7 +5428,7 @@ void decode_misc_feature_control(void)
 {
 	unsigned long long msr;
 
-	if (!has_misc_feature_control)
+	if (!platform->has_msr_misc_feature_control)
 		return;
 
 	if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
@@ -5375,10 +5449,7 @@ void decode_misc_pwr_mgmt_msr(void)
 {
 	unsigned long long msr;
 
-	if (!do_nhm_platform_info)
-		return;
-
-	if (no_MSR_MISC_PWR_MGMT)
+	if (!platform->has_msr_misc_pwr_mgmt)
 		return;
 
 	if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
@@ -5397,6 +5468,9 @@ void decode_c6_demotion_policy_msr(void)
 {
 	unsigned long long msr;
 
+	if (!platform->has_msr_c6_demotion_policy_config)
+		return;
+
 	if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
 		fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
 			base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
@@ -5406,67 +5480,6 @@ void decode_c6_demotion_policy_msr(void)
 			base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
 }
 
-/*
- * When models are the same, for the purpose of turbostat, reuse
- */
-unsigned int intel_model_duplicates(unsigned int model)
-{
-
-	switch (model) {
-	case INTEL_FAM6_NEHALEM_EP:	/* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
-	case INTEL_FAM6_NEHALEM:	/* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
-	case 0x1F:		/* Core i7 and i5 Processor - Nehalem */
-	case INTEL_FAM6_WESTMERE:	/* Westmere Client - Clarkdale, Arrandale */
-	case INTEL_FAM6_WESTMERE_EP:	/* Westmere EP - Gulftown */
-		return INTEL_FAM6_NEHALEM;
-
-	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
-	case INTEL_FAM6_WESTMERE_EX:	/* Westmere-EX Xeon - Eagleton */
-		return INTEL_FAM6_NEHALEM_EX;
-
-	case INTEL_FAM6_XEON_PHI_KNM:
-		return INTEL_FAM6_XEON_PHI_KNL;
-
-	case INTEL_FAM6_BROADWELL_X:
-	case INTEL_FAM6_BROADWELL_D:	/* BDX-DE */
-		return INTEL_FAM6_BROADWELL_X;
-
-	case INTEL_FAM6_SKYLAKE_L:
-	case INTEL_FAM6_SKYLAKE:
-	case INTEL_FAM6_KABYLAKE_L:
-	case INTEL_FAM6_KABYLAKE:
-	case INTEL_FAM6_COMETLAKE_L:
-	case INTEL_FAM6_COMETLAKE:
-		return INTEL_FAM6_SKYLAKE_L;
-
-	case INTEL_FAM6_ICELAKE_L:
-	case INTEL_FAM6_ICELAKE_NNPI:
-	case INTEL_FAM6_TIGERLAKE_L:
-	case INTEL_FAM6_TIGERLAKE:
-	case INTEL_FAM6_ROCKETLAKE:
-	case INTEL_FAM6_LAKEFIELD:
-	case INTEL_FAM6_ALDERLAKE:
-	case INTEL_FAM6_ALDERLAKE_L:
-	case INTEL_FAM6_ATOM_GRACEMONT:
-	case INTEL_FAM6_RAPTORLAKE:
-	case INTEL_FAM6_RAPTORLAKE_P:
-	case INTEL_FAM6_RAPTORLAKE_S:
-	case INTEL_FAM6_METEORLAKE:
-	case INTEL_FAM6_METEORLAKE_L:
-		return INTEL_FAM6_CANNONLAKE_L;
-
-	case INTEL_FAM6_ATOM_TREMONT_L:
-		return INTEL_FAM6_ATOM_TREMONT;
-
-	case INTEL_FAM6_ICELAKE_D:
-		return INTEL_FAM6_ICELAKE_X;
-
-	case INTEL_FAM6_EMERALDRAPIDS_X:
-		return INTEL_FAM6_SAPPHIRERAPIDS_X;
-	}
-	return model;
-}
-
 void print_dev_latency(void)
 {
 	char *path = "/dev/cpu_dma_latency";
@@ -5510,6 +5523,101 @@ void linux_perf_init(void)
 	BIC_PRESENT(BIC_IPC);
 }
 
+void probe_cstates(void)
+{
+	probe_cst_limit();
+
+	if (platform->supported_cstates & CC1)
+		BIC_PRESENT(BIC_CPU_c1);
+
+	if (platform->supported_cstates & CC3)
+		BIC_PRESENT(BIC_CPU_c3);
+
+	if (platform->supported_cstates & CC6)
+		BIC_PRESENT(BIC_CPU_c6);
+
+	if (platform->supported_cstates & CC7)
+		BIC_PRESENT(BIC_CPU_c7);
+
+	if (platform->supported_cstates & PC2 && (pkg_cstate_limit >= PCL__2))
+		BIC_PRESENT(BIC_Pkgpc2);
+
+	if (platform->supported_cstates & PC3 && (pkg_cstate_limit >= PCL__3))
+		BIC_PRESENT(BIC_Pkgpc3);
+
+	if (platform->supported_cstates & PC6 && (pkg_cstate_limit >= PCL__6))
+		BIC_PRESENT(BIC_Pkgpc6);
+
+	if (platform->supported_cstates & PC7 && (pkg_cstate_limit >= PCL__7))
+		BIC_PRESENT(BIC_Pkgpc7);
+
+	if (platform->supported_cstates & PC8 && (pkg_cstate_limit >= PCL__8))
+		BIC_PRESENT(BIC_Pkgpc8);
+
+	if (platform->supported_cstates & PC9 && (pkg_cstate_limit >= PCL__9))
+		BIC_PRESENT(BIC_Pkgpc9);
+
+	if (platform->supported_cstates & PC10 && (pkg_cstate_limit >= PCL_10))
+		BIC_PRESENT(BIC_Pkgpc10);
+
+	if (platform->has_msr_module_c6_res_ms)
+		BIC_PRESENT(BIC_Mod_c6);
+
+	if (platform->has_ext_cst_msrs) {
+		BIC_PRESENT(BIC_Totl_c0);
+		BIC_PRESENT(BIC_Any_c0);
+		BIC_PRESENT(BIC_GFX_c0);
+		BIC_PRESENT(BIC_CPUGFX);
+	}
+
+	if (quiet)
+		return;
+
+	dump_power_ctl();
+	dump_cst_cfg();
+	decode_c6_demotion_policy_msr();
+	print_dev_latency();
+	dump_sysfs_cstate_config();
+	print_irtl();
+}
+
+void probe_lpi(void)
+{
+	if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
+		BIC_PRESENT(BIC_CPU_LPI);
+	else
+		BIC_NOT_PRESENT(BIC_CPU_LPI);
+
+	if (!access(sys_lpi_file_sysfs, R_OK)) {
+		sys_lpi_file = sys_lpi_file_sysfs;
+		BIC_PRESENT(BIC_SYS_LPI);
+	} else if (!access(sys_lpi_file_debugfs, R_OK)) {
+		sys_lpi_file = sys_lpi_file_debugfs;
+		BIC_PRESENT(BIC_SYS_LPI);
+	} else {
+		sys_lpi_file_sysfs = NULL;
+		BIC_NOT_PRESENT(BIC_SYS_LPI);
+	}
+
+}
+
+void probe_pstates(void)
+{
+	probe_bclk();
+
+	if (quiet)
+		return;
+
+	dump_platform_info();
+	dump_turbo_ratio_info();
+	dump_sysfs_pstate_config();
+	decode_misc_pwr_mgmt_msr();
+
+	for_all_cpus(print_hwp, ODD_COUNTERS);
+	for_all_cpus(print_epb, ODD_COUNTERS);
+	for_all_cpus(print_perf_limit, ODD_COUNTERS);
+}
+
 void process_cpuid()
 {
 	unsigned int eax, ebx, ecx, edx;
@@ -5569,10 +5677,8 @@ void process_cpuid()
 			edx_flags & (1 << 22) ? "ACPI-TM" : "-",
 			edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-");
 	}
-	if (genuine_intel) {
-		model_orig = model;
-		model = intel_model_duplicates(model);
-	}
+
+	probe_platform_features(family, model);
 
 	if (!(edx_flags & (1 << 5)))
 		errx(1, "CPUID: no MSR");
@@ -5656,26 +5762,12 @@ void process_cpuid()
 		__cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
 
 		if (ebx_tsc != 0) {
-
 			if (!quiet && (ebx != 0))
 				fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
 					eax_crystal, ebx_tsc, crystal_hz);
 
 			if (crystal_hz == 0)
-				switch (model) {
-				case INTEL_FAM6_SKYLAKE_L:	/* SKL */
-					crystal_hz = 24000000;	/* 24.0 MHz */
-					break;
-				case INTEL_FAM6_ATOM_GOLDMONT_D:	/* DNV */
-					crystal_hz = 25000000;	/* 25.0 MHz */
-					break;
-				case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
-				case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
-					crystal_hz = 19200000;	/* 19.2 MHz */
-					break;
-				default:
-					crystal_hz = 0;
-				}
+				crystal_hz = platform->crystal_freq;
 
 			if (crystal_hz) {
 				tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal;
@@ -5700,147 +5792,33 @@ void process_cpuid()
 	}
 
 	if (has_aperf)
-		aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
+		aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1;
 
 	BIC_PRESENT(BIC_IRQ);
 	BIC_PRESENT(BIC_TSC_MHz);
+}
 
-	if (probe_nhm_msrs(family, model)) {
-		do_nhm_platform_info = 1;
-		BIC_PRESENT(BIC_CPU_c1);
-		BIC_PRESENT(BIC_CPU_c3);
-		BIC_PRESENT(BIC_CPU_c6);
-		BIC_PRESENT(BIC_SMI);
-	}
-	do_snb_cstates = has_snb_msrs(family, model);
-
-	if (do_snb_cstates)
-		BIC_PRESENT(BIC_CPU_c7);
-
-	do_irtl_snb = has_snb_msrs(family, model);
-	if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
-		BIC_PRESENT(BIC_Pkgpc2);
-	if (pkg_cstate_limit >= PCL__3)
-		BIC_PRESENT(BIC_Pkgpc3);
-	if (pkg_cstate_limit >= PCL__6)
-		BIC_PRESENT(BIC_Pkgpc6);
-	if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
-		BIC_PRESENT(BIC_Pkgpc7);
-	if (has_slv_msrs(family, model)) {
-		BIC_NOT_PRESENT(BIC_Pkgpc2);
-		BIC_NOT_PRESENT(BIC_Pkgpc3);
-		BIC_PRESENT(BIC_Pkgpc6);
-		BIC_NOT_PRESENT(BIC_Pkgpc7);
-		BIC_PRESENT(BIC_Mod_c6);
-		use_c1_residency_msr = 1;
-	}
-	if (is_jvl(family, model)) {
-		BIC_NOT_PRESENT(BIC_CPU_c3);
-		BIC_NOT_PRESENT(BIC_CPU_c7);
-		BIC_NOT_PRESENT(BIC_Pkgpc2);
-		BIC_NOT_PRESENT(BIC_Pkgpc3);
-		BIC_NOT_PRESENT(BIC_Pkgpc6);
-		BIC_NOT_PRESENT(BIC_Pkgpc7);
-	}
-	if (is_dnv(family, model)) {
-		BIC_PRESENT(BIC_CPU_c1);
-		BIC_NOT_PRESENT(BIC_CPU_c3);
-		BIC_NOT_PRESENT(BIC_Pkgpc3);
-		BIC_NOT_PRESENT(BIC_CPU_c7);
-		BIC_NOT_PRESENT(BIC_Pkgpc7);
-		use_c1_residency_msr = 1;
-	}
-	if (is_skx(family, model) || is_icx(family, model) || is_spr(family, model)) {
-		BIC_NOT_PRESENT(BIC_CPU_c3);
-		BIC_NOT_PRESENT(BIC_Pkgpc3);
-		BIC_NOT_PRESENT(BIC_CPU_c7);
-		BIC_NOT_PRESENT(BIC_Pkgpc7);
-	}
-	if (is_bdx(family, model)) {
-		BIC_NOT_PRESENT(BIC_CPU_c7);
-		BIC_NOT_PRESENT(BIC_Pkgpc7);
-	}
-	if (has_c8910_msrs(family, model)) {
-		if (pkg_cstate_limit >= PCL__8)
-			BIC_PRESENT(BIC_Pkgpc8);
-		if (pkg_cstate_limit >= PCL__9)
-			BIC_PRESENT(BIC_Pkgpc9);
-		if (pkg_cstate_limit >= PCL_10)
-			BIC_PRESENT(BIC_Pkgpc10);
-	}
-	do_irtl_hsw = has_c8910_msrs(family, model);
-	if (has_skl_msrs(family, model)) {
-		BIC_PRESENT(BIC_Totl_c0);
-		BIC_PRESENT(BIC_Any_c0);
-		BIC_PRESENT(BIC_GFX_c0);
-		BIC_PRESENT(BIC_CPUGFX);
-	}
-	do_slm_cstates = is_slm(family, model);
-	do_knl_cstates = is_knl(family, model);
-
-	if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) || is_ehl(family, model))
-		BIC_NOT_PRESENT(BIC_CPU_c3);
-
-	if (!quiet)
-		decode_misc_pwr_mgmt_msr();
-
-	if (!quiet && has_slv_msrs(family, model))
-		decode_c6_demotion_policy_msr();
-
-	rapl_probe(family, model);
-	perf_limit_reasons_probe(family, model);
-	automatic_cstate_conversion_probe(family, model);
-
-	check_tcc_offset(model_orig);
-
-	if (!quiet)
-		dump_cstate_pstate_config_info(family, model);
-	intel_uncore_frequency_probe();
-
-	if (!quiet)
-		print_dev_latency();
-	if (!quiet)
-		dump_sysfs_cstate_config();
-	if (!quiet)
-		dump_sysfs_pstate_config();
+void probe_pm_features(void)
+{
+	probe_pstates();
 
-	if (has_skl_msrs(family, model) || is_ehl(family, model))
-		calculate_tsc_tweak();
+	probe_cstates();
 
-	if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
-		BIC_PRESENT(BIC_GFX_rc6);
+	probe_lpi();
 
-	if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
-		BIC_PRESENT(BIC_GFXMHz);
+	probe_intel_uncore_frequency();
 
-	if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
-		BIC_PRESENT(BIC_GFXACTMHz);
+	probe_graphics();
 
-	if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
-		BIC_PRESENT(BIC_CPU_LPI);
-	else
-		BIC_NOT_PRESENT(BIC_CPU_LPI);
+	probe_rapl();
 
-	if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK))
-		BIC_PRESENT(BIC_CORE_THROT_CNT);
-	else
-		BIC_NOT_PRESENT(BIC_CORE_THROT_CNT);
+	probe_thermal();
 
-	if (!access(sys_lpi_file_sysfs, R_OK)) {
-		sys_lpi_file = sys_lpi_file_sysfs;
-		BIC_PRESENT(BIC_SYS_LPI);
-	} else if (!access(sys_lpi_file_debugfs, R_OK)) {
-		sys_lpi_file = sys_lpi_file_debugfs;
-		BIC_PRESENT(BIC_SYS_LPI);
-	} else {
-		sys_lpi_file_sysfs = NULL;
-		BIC_NOT_PRESENT(BIC_SYS_LPI);
-	}
+	if (platform->has_nhm_msrs)
+		BIC_PRESENT(BIC_SMI);
 
 	if (!quiet)
 		decode_misc_feature_control();
-
-	return;
 }
 
 /*
@@ -5855,7 +5833,7 @@ int dir_filter(const struct dirent *dirp)
 		return 0;
 }
 
-void topology_probe()
+void topology_probe(bool startup)
 {
 	int i;
 	int max_core_id = 0;
@@ -5888,14 +5866,62 @@ void topology_probe()
 	for_all_proc_cpus(mark_cpu_present);
 
 	/*
-	 * Validate that all cpus in cpu_subset are also in cpu_present_set
+	 * Allocate and initialize cpu_effective_set
+	 */
+	cpu_effective_set = CPU_ALLOC((topo.max_cpu_num + 1));
+	if (cpu_effective_set == NULL)
+		err(3, "CPU_ALLOC");
+	cpu_effective_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
+	CPU_ZERO_S(cpu_effective_setsize, cpu_effective_set);
+	update_effective_set(startup);
+
+	/*
+	 * Allocate and initialize cpu_allowed_set
+	 */
+	cpu_allowed_set = CPU_ALLOC((topo.max_cpu_num + 1));
+	if (cpu_allowed_set == NULL)
+		err(3, "CPU_ALLOC");
+	cpu_allowed_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
+	CPU_ZERO_S(cpu_allowed_setsize, cpu_allowed_set);
+
+	/*
+	 * Validate and update cpu_allowed_set.
+	 *
+	 * Make sure all cpus in cpu_subset are also in cpu_present_set during startup.
+	 * Give a warning when cpus in cpu_subset become unavailable at runtime.
+	 * Give a warning when cpus are not effective because of cgroup setting.
+	 *
+	 * cpu_allowed_set is the intersection of cpu_present_set/cpu_effective_set/cpu_subset.
 	 */
 	for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
-		if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
-			if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
-				err(1, "cpu%d not present", i);
+		if (cpu_subset && !CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
+			continue;
+
+		if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set)) {
+			if (cpu_subset) {
+				/* cpus in cpu_subset must be in cpu_present_set during startup */
+				if (startup)
+					err(1, "cpu%d not present", i);
+				else
+					fprintf(stderr, "cpu%d not present\n", i);
+			}
+			continue;
+		}
+
+		if (CPU_COUNT_S(cpu_effective_setsize, cpu_effective_set)) {
+			if (!CPU_ISSET_S(i, cpu_effective_setsize, cpu_effective_set)) {
+				fprintf(stderr, "cpu%d not effective\n", i);
+				continue;
+			}
+		}
+
+		CPU_SET_S(i, cpu_allowed_setsize, cpu_allowed_set);
 	}
 
+	if (!CPU_COUNT_S(cpu_allowed_setsize, cpu_allowed_set))
+		err(-ENODEV, "No valid cpus found");
+	sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set);
+
 	/*
 	 * Allocate and initialize cpu_affinity_set
 	 */
@@ -6009,15 +6035,19 @@ void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_
 	if (*c == NULL)
 		goto error;
 
-	for (i = 0; i < num_cores; i++)
+	for (i = 0; i < num_cores; i++) {
 		(*c)[i].core_id = -1;
+		(*c)[i].base_cpu = -1;
+	}
 
 	*p = calloc(topo.num_packages, sizeof(struct pkg_data));
 	if (*p == NULL)
 		goto error;
 
-	for (i = 0; i < topo.num_packages; i++)
+	for (i = 0; i < topo.num_packages; i++) {
 		(*p)[i].package_id = i;
+		(*p)[i].base_cpu = -1;
+	}
 
 	return;
 error:
@@ -6050,10 +6080,11 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base,
 	p = GET_PKG(pkg_base, pkg_id);
 
 	t->cpu_id = cpu_id;
-	if (thread_id == 0) {
-		t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
-		if (cpu_is_first_core_in_package(cpu_id))
-			t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
+	if (!cpu_is_not_allowed(cpu_id)) {
+		if (c->base_cpu < 0)
+			c->base_cpu = t->cpu_id;
+		if (p->base_cpu < 0)
+			p->base_cpu = t->cpu_id;
 	}
 
 	c->core_id = core_id;
@@ -6093,59 +6124,64 @@ void allocate_irq_buffers(void)
 		err(-1, "calloc %d", topo.max_cpu_num + 1);
 }
 
-void setup_all_buffers(void)
+int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+	topo.allowed_cpus++;
+	if ((int)t->cpu_id == c->base_cpu)
+		topo.allowed_cores++;
+	if ((int)t->cpu_id == p->base_cpu)
+		topo.allowed_packages++;
+
+	return 0;
+}
+
+void topology_update(void)
 {
-	topology_probe();
+	topo.allowed_cpus = 0;
+	topo.allowed_cores = 0;
+	topo.allowed_packages = 0;
+	for_all_cpus(update_topo, ODD_COUNTERS);
+}
+void setup_all_buffers(bool startup)
+{
+	topology_probe(startup);
 	allocate_irq_buffers();
 	allocate_fd_percpu();
 	allocate_counters(&thread_even, &core_even, &package_even);
 	allocate_counters(&thread_odd, &core_odd, &package_odd);
 	allocate_output_buffer();
 	for_all_proc_cpus(initialize_counters);
+	topology_update();
 }
 
 void set_base_cpu(void)
 {
-	base_cpu = sched_getcpu();
-	if (base_cpu < 0)
-		err(-ENODEV, "No valid cpus found");
+	int i;
 
-	if (debug > 1)
-		fprintf(outf, "base_cpu = %d\n", base_cpu);
+	for (i = 0; i < topo.max_cpu_num + 1; ++i) {
+		if (cpu_is_not_allowed(i))
+			continue;
+		base_cpu = i;
+		if (debug > 1)
+			fprintf(outf, "base_cpu = %d\n", base_cpu);
+		return;
+	}
+	err(-ENODEV, "No valid cpus found");
 }
 
 void turbostat_init()
 {
-	setup_all_buffers();
+	setup_all_buffers(true);
 	set_base_cpu();
 	check_dev_msr();
 	check_permissions();
 	process_cpuid();
+	probe_pm_features();
 	linux_perf_init();
 
-	if (!quiet)
-		for_all_cpus(print_hwp, ODD_COUNTERS);
-
-	if (!quiet)
-		for_all_cpus(print_epb, ODD_COUNTERS);
-
-	if (!quiet)
-		for_all_cpus(print_perf_limit, ODD_COUNTERS);
-
-	if (!quiet)
-		for_all_cpus(print_rapl, ODD_COUNTERS);
-
-	for_all_cpus(set_temperature_target, ODD_COUNTERS);
-
 	for_all_cpus(get_cpu_type, ODD_COUNTERS);
 	for_all_cpus(get_cpu_type, EVEN_COUNTERS);
 
-	if (!quiet)
-		for_all_cpus(print_thermal, ODD_COUNTERS);
-
-	if (!quiet && do_irtl_snb)
-		print_irtl();
-
 	if (DO_BIC(BIC_IPC))
 		(void)get_instr_count_fd(base_cpu);
 }
@@ -6160,8 +6196,6 @@ int fork_it(char **argv)
 	first_counter_read = 0;
 	if (status)
 		exit(status);
-	/* clear affinity side-effect of get_counters() */
-	sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
 	gettimeofday(&tv_even, (struct timezone *)NULL);
 
 	child_pid = fork();
@@ -6225,7 +6259,7 @@ int get_and_dump_counters(void)
 
 void print_version()
 {
-	fprintf(outf, "turbostat version 2023.03.17 - Len Brown <lenb@kernel.org>\n");
+	fprintf(outf, "turbostat version 2023.11.07 - Len Brown <lenb@kernel.org>\n");
 }
 
 #define COMMAND_LINE_SIZE 2048
@@ -6508,9 +6542,6 @@ void probe_sysfs(void)
  */
 void parse_cpu_command(char *optarg)
 {
-	unsigned int start, end;
-	char *next;
-
 	if (!strcmp(optarg, "core")) {
 		if (cpu_subset)
 			goto error;
@@ -6533,52 +6564,8 @@ void parse_cpu_command(char *optarg)
 
 	CPU_ZERO_S(cpu_subset_size, cpu_subset);
 
-	next = optarg;
-
-	while (next && *next) {
-
-		if (*next == '-')	/* no negative cpu numbers */
-			goto error;
-
-		start = strtoul(next, &next, 10);
-
-		if (start >= CPU_SUBSET_MAXCPUS)
-			goto error;
-		CPU_SET_S(start, cpu_subset_size, cpu_subset);
-
-		if (*next == '\0')
-			break;
-
-		if (*next == ',') {
-			next += 1;
-			continue;
-		}
-
-		if (*next == '-') {
-			next += 1;	/* start range */
-		} else if (*next == '.') {
-			next += 1;
-			if (*next == '.')
-				next += 1;	/* start range */
-			else
-				goto error;
-		}
-
-		end = strtoul(next, &next, 10);
-		if (end <= start)
-			goto error;
-
-		while (++start <= end) {
-			if (start >= CPU_SUBSET_MAXCPUS)
-				goto error;
-			CPU_SET_S(start, cpu_subset_size, cpu_subset);
-		}
-
-		if (*next == ',')
-			next += 1;
-		else if (*next != '\0')
-			goto error;
-	}
+	if (parse_cpu_str(optarg, cpu_subset, cpu_subset_size))
+		goto error;
 
 	return;
 
@@ -6719,6 +6706,19 @@ void cmdline(int argc, char **argv)
 
 int main(int argc, char **argv)
 {
+	int fd, ret;
+
+	fd = open("/sys/fs/cgroup/cgroup.procs", O_WRONLY);
+	if (fd < 0)
+		goto skip_cgroup_setting;
+
+	ret = write(fd, "0\n", 2);
+	if (ret == -1)
+		perror("Can't update cgroup\n");
+
+	close(fd);
+
+skip_cgroup_setting:
 	outf = stderr;
 	cmdline(argc, argv);
 
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
index cc920c79ff1c..4ff10ea61461 100644
--- a/tools/testing/selftests/mm/.gitignore
+++ b/tools/testing/selftests/mm/.gitignore
@@ -45,3 +45,4 @@ mdwe_test
 gup_longterm
 mkdirty
 va_high_addr_switch
+hugetlb_fault_after_madv
diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c
index 0161fb49fc6e..befab43719ba 100644
--- a/tools/testing/selftests/mm/pagemap_ioctl.c
+++ b/tools/testing/selftests/mm/pagemap_ioctl.c
@@ -94,19 +94,19 @@ int init_uffd(void)
 
 	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
 	if (uffd == -1)
-		ksft_exit_fail_msg("uffd syscall failed\n");
+		return uffd;
 
 	uffdio_api.api = UFFD_API;
 	uffdio_api.features = UFFD_FEATURE_WP_UNPOPULATED | UFFD_FEATURE_WP_ASYNC |
 			      UFFD_FEATURE_WP_HUGETLBFS_SHMEM;
 	if (ioctl(uffd, UFFDIO_API, &uffdio_api))
-		ksft_exit_fail_msg("UFFDIO_API\n");
+		return -1;
 
 	if (!(uffdio_api.api & UFFDIO_REGISTER_MODE_WP) ||
 	    !(uffdio_api.features & UFFD_FEATURE_WP_UNPOPULATED) ||
 	    !(uffdio_api.features & UFFD_FEATURE_WP_ASYNC) ||
 	    !(uffdio_api.features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM))
-		ksft_exit_fail_msg("UFFDIO_API error %llu\n", uffdio_api.api);
+		return -1;
 
 	return 0;
 }
@@ -1151,7 +1151,7 @@ int sanity_tests(void)
 	/* 9. Memory mapped file */
 	fd = open(__FILE__, O_RDONLY);
 	if (fd < 0)
-		ksft_exit_fail_msg("%s Memory mapped file\n");
+		ksft_exit_fail_msg("%s Memory mapped file\n", __func__);
 
 	ret = stat(__FILE__, &sbuf);
 	if (ret < 0)
@@ -1159,7 +1159,7 @@ int sanity_tests(void)
 
 	fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
 	if (fmem == MAP_FAILED)
-		ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno));
+		ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
 
 	tmp_buf = malloc(sbuf.st_size);
 	memcpy(tmp_buf, fmem, sbuf.st_size);
@@ -1189,7 +1189,7 @@ int sanity_tests(void)
 
 	fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
 	if (fmem == MAP_FAILED)
-		ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno));
+		ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
 
 	wp_init(fmem, buf_size);
 	wp_addr_range(fmem, buf_size);
@@ -1479,6 +1479,10 @@ int main(void)
 	struct stat sbuf;
 
 	ksft_print_header();
+
+	if (init_uffd())
+		return ksft_exit_pass();
+
 	ksft_set_plan(115);
 
 	page_size = getpagesize();
@@ -1488,9 +1492,6 @@ int main(void)
 	if (pagemap_fd < 0)
 		return -EINVAL;
 
-	if (init_uffd())
-		ksft_exit_fail_msg("uffd init failed\n");
-
 	/* 1. Sanity testing */
 	sanity_tests_sd();
 
@@ -1595,7 +1596,7 @@ int main(void)
 
 	fmem = mmap(NULL, sbuf.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
 	if (fmem == MAP_FAILED)
-		ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno));
+		ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
 
 	wp_init(fmem, sbuf.st_size);
 	wp_addr_range(fmem, sbuf.st_size);
@@ -1623,7 +1624,7 @@ int main(void)
 
 	fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
 	if (fmem == MAP_FAILED)
-		ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno));
+		ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
 
 	wp_init(fmem, buf_size);
 	wp_addr_range(fmem, buf_size);
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index cc16f6ca8533..00757445278e 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -223,9 +223,12 @@ CATEGORY="hugetlb" run_test ./hugepage-mremap
 CATEGORY="hugetlb" run_test ./hugepage-vmemmap
 CATEGORY="hugetlb" run_test ./hugetlb-madvise
 
+nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages)
 # For this test, we need one and just one huge page
 echo 1 > /proc/sys/vm/nr_hugepages
 CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv
+# Restore the previous number of huge pages, since further tests rely on it
+echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages
 
 if test_selected "hugetlb"; then
 	echo "NOTE: These hugetlb tests provide minimal coverage.  Use"