summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/arch/loongarch/introduction.rst4
-rw-r--r--Documentation/arch/x86/boot.rst2
-rw-r--r--Documentation/devicetree/bindings/ufs/qcom,ufs.yaml2
-rw-r--r--Documentation/filesystems/erofs.rst4
-rw-r--r--Documentation/translations/zh_CN/arch/loongarch/introduction.rst4
-rw-r--r--MAINTAINERS22
-rw-r--r--Makefile2
-rw-r--r--arch/loongarch/Makefile3
-rw-r--r--arch/loongarch/include/asm/asmmacro.h3
-rw-r--r--arch/loongarch/include/asm/percpu.h11
-rw-r--r--arch/loongarch/include/asm/setup.h2
-rw-r--r--arch/loongarch/kernel/relocate.c10
-rw-r--r--arch/loongarch/kernel/time.c23
-rw-r--r--arch/loongarch/mm/pgtable.c4
-rw-r--r--arch/parisc/Kconfig6
-rw-r--r--arch/parisc/include/asm/elf.h10
-rw-r--r--arch/parisc/include/asm/processor.h2
-rw-r--r--arch/parisc/kernel/processor.c2
-rw-r--r--arch/parisc/kernel/sys_parisc.c2
-rw-r--r--arch/x86/hyperv/hv_init.c25
-rw-r--r--arch/x86/kernel/acpi/boot.c34
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c5
-rw-r--r--arch/x86/kernel/signal_64.c6
-rw-r--r--block/blk-mq.c75
-rw-r--r--drivers/accel/ivpu/ivpu_pm.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c6
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c24
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c5
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c29
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c18
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_resource.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_types.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_stream_encoder.c10
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_detection.c3
-rw-r--r--drivers/gpu/drm/amd/display/dmub/dmub_srv.h22
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c50
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h10
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c10
-rw-r--r--drivers/gpu/drm/drm_panel_orientation_quirks.c6
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/event.h4
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_display.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/event.c12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c6
-rw-r--r--drivers/i2c/busses/i2c-designware-common.c16
-rw-r--r--drivers/i2c/busses/i2c-ocores.c4
-rw-r--r--drivers/i2c/busses/i2c-pxa.c76
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c16
-rw-r--r--drivers/md/dm-bufio.c87
-rw-r--r--drivers/md/dm-crypt.c2
-rw-r--r--drivers/md/dm-delay.c112
-rw-r--r--drivers/md/dm-verity-fec.c4
-rw-r--r--drivers/md/dm-verity-target.c23
-rw-r--r--drivers/md/dm-verity.h2
-rw-r--r--drivers/parisc/power.c2
-rw-r--r--drivers/platform/x86/amd/pmc/pmc.c31
-rw-r--r--drivers/platform/x86/hp/hp-bioscfg/bioscfg.c26
-rw-r--r--drivers/platform/x86/ideapad-laptop.c11
-rw-r--r--drivers/platform/x86/intel/telemetry/core.c4
-rw-r--r--drivers/scsi/qla2xxx/qla_os.c12
-rw-r--r--drivers/scsi/scsi_debug.c9
-rw-r--r--drivers/scsi/sd.c53
-rw-r--r--drivers/ufs/core/ufs-mcq.c5
-rw-r--r--fs/bcachefs/backpointers.c10
-rw-r--r--fs/bcachefs/bcachefs.h2
-rw-r--r--fs/bcachefs/btree_iter.c2
-rw-r--r--fs/bcachefs/btree_key_cache.c37
-rw-r--r--fs/bcachefs/btree_key_cache_types.h34
-rw-r--r--fs/bcachefs/btree_trans_commit.c169
-rw-r--r--fs/bcachefs/btree_types.h35
-rw-r--r--fs/bcachefs/btree_update_interior.c30
-rw-r--r--fs/bcachefs/btree_update_interior.h1
-rw-r--r--fs/bcachefs/data_update.c28
-rw-r--r--fs/bcachefs/disk_groups.c4
-rw-r--r--fs/bcachefs/ec.c16
-rw-r--r--fs/bcachefs/fs-io-pagecache.c2
-rw-r--r--fs/bcachefs/fs-io-pagecache.h2
-rw-r--r--fs/bcachefs/fs.c8
-rw-r--r--fs/bcachefs/fsck.c2
-rw-r--r--fs/bcachefs/inode.c8
-rw-r--r--fs/bcachefs/io_write.c2
-rw-r--r--fs/bcachefs/journal.c31
-rw-r--r--fs/bcachefs/journal.h98
-rw-r--r--fs/bcachefs/journal_io.c7
-rw-r--r--fs/bcachefs/journal_reclaim.c42
-rw-r--r--fs/bcachefs/journal_types.h26
-rw-r--r--fs/bcachefs/six.c7
-rw-r--r--fs/bcachefs/subvolume_types.h2
-rw-r--r--fs/bcachefs/trace.h11
-rw-r--r--fs/bcachefs/xattr.c9
-rw-r--r--fs/erofs/Kconfig2
-rw-r--r--fs/erofs/data.c5
-rw-r--r--fs/erofs/inode.c98
-rw-r--r--fs/nfsd/cache.h4
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/nfsd/nfscache.c87
-rw-r--r--fs/nfsd/nfssvc.c14
-rw-r--r--fs/overlayfs/params.c11
-rw-r--r--fs/overlayfs/util.c2
-rw-r--r--fs/smb/client/cifs_spnego.c4
-rw-r--r--fs/smb/client/connect.c6
-rw-r--r--fs/smb/client/sess.c22
-rw-r--r--fs/smb/client/smb2transport.c5
-rw-r--r--fs/xfs/Kconfig2
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c27
-rw-r--r--fs/xfs/libxfs/xfs_defer.c28
-rw-r--r--fs/xfs/libxfs/xfs_defer.h2
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c3
-rw-r--r--fs/xfs/xfs_inode_item_recover.c46
-rw-r--r--fs/xfs/xfs_log.c23
-rw-r--r--fs/xfs/xfs_log_recover.c2
-rw-r--r--fs/xfs/xfs_reflink.c1
-rw-r--r--include/asm-generic/qspinlock.h2
-rw-r--r--include/linux/cpuhotplug.h1
-rw-r--r--include/linux/export-internal.h4
-rw-r--r--include/linux/hrtimer.h4
-rw-r--r--include/linux/perf_event.h13
-rw-r--r--io_uring/fdinfo.c9
-rw-r--r--io_uring/sqpoll.c12
-rw-r--r--kernel/audit_watch.c2
-rw-r--r--kernel/cgroup/cgroup.c12
-rw-r--r--kernel/cpu.c8
-rw-r--r--kernel/events/core.c17
-rw-r--r--kernel/futex/core.c9
-rw-r--r--kernel/sched/fair.c161
-rw-r--r--kernel/sys.c4
-rw-r--r--kernel/time/hrtimer.c33
-rw-r--r--mm/damon/core.c2
-rw-r--r--mm/damon/sysfs-schemes.c5
-rw-r--r--mm/damon/sysfs.c6
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/huge_memory.c16
-rw-r--r--mm/ksm.c2
-rw-r--r--mm/memcontrol.c3
-rw-r--r--mm/userfaultfd.c2
-rw-r--r--mm/util.c10
-rw-r--r--scripts/Makefile.lib4
-rw-r--r--scripts/kconfig/symbol.c14
-rw-r--r--scripts/mod/modpost.c6
-rw-r--r--sound/pci/hda/cs35l56_hda_i2c.c4
-rw-r--r--sound/pci/hda/hda_intel.c3
-rw-r--r--sound/pci/hda/patch_realtek.c29
-rw-r--r--tools/hv/hv_kvp_daemon.c20
-rwxr-xr-xtools/hv/hv_set_ifconfig.sh4
-rw-r--r--tools/power/x86/turbostat/turbostat.c2884
-rw-r--r--tools/testing/selftests/mm/.gitignore1
-rw-r--r--tools/testing/selftests/mm/pagemap_ioctl.c23
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh3
160 files changed, 2824 insertions, 2569 deletions
diff --git a/Documentation/arch/loongarch/introduction.rst b/Documentation/arch/loongarch/introduction.rst
index 8c568cfc2107..5e6db78abeaf 100644
--- a/Documentation/arch/loongarch/introduction.rst
+++ b/Documentation/arch/loongarch/introduction.rst
@@ -375,9 +375,9 @@ Developer web site of Loongson and LoongArch (Software and Documentation):
Documentation of LoongArch ISA:
- https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-CN.pdf (in Chinese)
+ https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-CN.pdf (in Chinese)
- https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-EN.pdf (in English)
+ https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-EN.pdf (in English)
Documentation of LoongArch ELF psABI:
diff --git a/Documentation/arch/x86/boot.rst b/Documentation/arch/x86/boot.rst
index f5d2f2414de8..22cc7a040dae 100644
--- a/Documentation/arch/x86/boot.rst
+++ b/Documentation/arch/x86/boot.rst
@@ -77,7 +77,7 @@ Protocol 2.14 BURNT BY INCORRECT COMMIT
Protocol 2.15 (Kernel 5.5) Added the kernel_info and kernel_info.setup_type_max.
============= ============================================================
-.. note::
+ .. note::
The protocol version number should be changed only if the setup header
is changed. There is no need to update the version number if boot_params
or kernel_info are changed. Additionally, it is recommended to use
diff --git a/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml b/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml
index 462ead5a1cec..2cf3d016db42 100644
--- a/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml
+++ b/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml
@@ -36,6 +36,7 @@ properties:
- qcom,sm8350-ufshc
- qcom,sm8450-ufshc
- qcom,sm8550-ufshc
+ - qcom,sm8650-ufshc
- const: qcom,ufshc
- const: jedec,ufs-2.0
@@ -122,6 +123,7 @@ allOf:
- qcom,sm8350-ufshc
- qcom,sm8450-ufshc
- qcom,sm8550-ufshc
+ - qcom,sm8650-ufshc
then:
properties:
clocks:
diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst
index 57c6ae23b3fc..cc4626d6ee4f 100644
--- a/Documentation/filesystems/erofs.rst
+++ b/Documentation/filesystems/erofs.rst
@@ -91,6 +91,10 @@ compatibility checking tool (fsck.erofs), and a debugging tool (dump.erofs):
- git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git
+For more information, please also refer to the documentation site:
+
+- https://erofs.docs.kernel.org
+
Bugs and patches are welcome, please kindly help us and send to the following
linux-erofs mailing list:
diff --git a/Documentation/translations/zh_CN/arch/loongarch/introduction.rst b/Documentation/translations/zh_CN/arch/loongarch/introduction.rst
index 59d6bf33050c..bf463c5a4c51 100644
--- a/Documentation/translations/zh_CN/arch/loongarch/introduction.rst
+++ b/Documentation/translations/zh_CN/arch/loongarch/introduction.rst
@@ -338,9 +338,9 @@ Loongson与LoongArch的开发者网站(软件与文档资源):
LoongArch指令集架构的文档:
- https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-CN.pdf (中文版)
+ https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-CN.pdf (中文版)
- https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-EN.pdf (英文版)
+ https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-EN.pdf (英文版)
LoongArch的ELF psABI文档:
diff --git a/MAINTAINERS b/MAINTAINERS
index 6cc0708f77b4..509281e9e169 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7855,6 +7855,7 @@ R: Yue Hu <huyue2@coolpad.com>
R: Jeffle Xu <jefflexu@linux.alibaba.com>
L: linux-erofs@lists.ozlabs.org
S: Maintained
+W: https://erofs.docs.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git
F: Documentation/ABI/testing/sysfs-fs-erofs
F: Documentation/filesystems/erofs.rst
@@ -11024,7 +11025,6 @@ F: drivers/net/wireless/intel/iwlwifi/
INTEL WMI SLIM BOOTLOADER (SBL) FIRMWARE UPDATE DRIVER
M: Jithu Joseph <jithu.joseph@intel.com>
-R: Maurice Ma <maurice.ma@intel.com>
S: Maintained
W: https://slimbootloader.github.io/security/firmware-update.html
F: drivers/platform/x86/intel/wmi/sbl-fw-update.c
@@ -13778,7 +13778,6 @@ F: drivers/net/ethernet/mellanox/mlxfw/
MELLANOX HARDWARE PLATFORM SUPPORT
M: Hans de Goede <hdegoede@redhat.com>
M: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
-M: Mark Gross <markgross@kernel.org>
M: Vadim Pasternak <vadimp@nvidia.com>
L: platform-driver-x86@vger.kernel.org
S: Supported
@@ -14387,7 +14386,6 @@ F: drivers/platform/surface/surface_gpe.c
MICROSOFT SURFACE HARDWARE PLATFORM SUPPORT
M: Hans de Goede <hdegoede@redhat.com>
M: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
-M: Mark Gross <markgross@kernel.org>
M: Maximilian Luz <luzmaximilian@gmail.com>
L: platform-driver-x86@vger.kernel.org
S: Maintained
@@ -23667,7 +23665,6 @@ F: drivers/platform/x86/x86-android-tablets/
X86 PLATFORM DRIVERS
M: Hans de Goede <hdegoede@redhat.com>
M: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
-M: Mark Gross <markgross@kernel.org>
L: platform-driver-x86@vger.kernel.org
S: Maintained
Q: https://patchwork.kernel.org/project/platform-driver-x86/list/
@@ -23705,6 +23702,20 @@ F: arch/x86/kernel/dumpstack.c
F: arch/x86/kernel/stacktrace.c
F: arch/x86/kernel/unwind_*.c
+X86 TRUST DOMAIN EXTENSIONS (TDX)
+M: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+R: Dave Hansen <dave.hansen@linux.intel.com>
+L: x86@kernel.org
+L: linux-coco@lists.linux.dev
+S: Supported
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/tdx
+F: arch/x86/boot/compressed/tdx*
+F: arch/x86/coco/tdx/
+F: arch/x86/include/asm/shared/tdx.h
+F: arch/x86/include/asm/tdx.h
+F: arch/x86/virt/vmx/tdx/
+F: drivers/virt/coco/tdx-guest
+
X86 VDSO
M: Andy Lutomirski <luto@kernel.org>
L: linux-kernel@vger.kernel.org
@@ -23885,8 +23896,7 @@ T: git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git
P: Documentation/filesystems/xfs-maintainer-entry-profile.rst
F: Documentation/ABI/testing/sysfs-fs-xfs
F: Documentation/admin-guide/xfs.rst
-F: Documentation/filesystems/xfs-delayed-logging-design.rst
-F: Documentation/filesystems/xfs-self-describing-metadata.rst
+F: Documentation/filesystems/xfs-*
F: fs/xfs/
F: include/uapi/linux/dqblk_xfs.h
F: include/uapi/linux/fsmap.h
diff --git a/Makefile b/Makefile
index ede0bd241056..724c79bebe72 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 7
SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
NAME = Hurr durr I'ma ninja sloth
# *DOCUMENTATION*
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 9eeb0c05f3f4..204b94b2e6aa 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -68,6 +68,7 @@ LDFLAGS_vmlinux += -static -n -nostdlib
ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS
cflags-y += $(call cc-option,-mexplicit-relocs)
KBUILD_CFLAGS_KERNEL += $(call cc-option,-mdirect-extern-access)
+KBUILD_CFLAGS_KERNEL += $(call cc-option,-fdirect-access-external-data)
KBUILD_AFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data)
KBUILD_CFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data)
KBUILD_AFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
@@ -142,6 +143,8 @@ vdso-install-y += arch/loongarch/vdso/vdso.so.dbg
all: $(notdir $(KBUILD_IMAGE))
+vmlinuz.efi: vmlinux.efi
+
vmlinux.elf vmlinux.efi vmlinuz.efi: vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(bootvars-y) $(boot)/$@
diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h
index c9544f358c33..655db7d7a427 100644
--- a/arch/loongarch/include/asm/asmmacro.h
+++ b/arch/loongarch/include/asm/asmmacro.h
@@ -609,8 +609,7 @@
lu32i.d \reg, 0
lu52i.d \reg, \reg, 0
.pushsection ".la_abs", "aw", %progbits
- 768:
- .dword 768b-766b
+ .dword 766b
.dword \sym
.popsection
#endif
diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h
index ed5da02b1cf6..9b36ac003f89 100644
--- a/arch/loongarch/include/asm/percpu.h
+++ b/arch/loongarch/include/asm/percpu.h
@@ -40,13 +40,13 @@ static __always_inline unsigned long __percpu_##op(void *ptr, \
switch (size) { \
case 4: \
__asm__ __volatile__( \
- "am"#asm_op".w" " %[ret], %[val], %[ptr] \n" \
+ "am"#asm_op".w" " %[ret], %[val], %[ptr] \n" \
: [ret] "=&r" (ret), [ptr] "+ZB"(*(u32 *)ptr) \
: [val] "r" (val)); \
break; \
case 8: \
__asm__ __volatile__( \
- "am"#asm_op".d" " %[ret], %[val], %[ptr] \n" \
+ "am"#asm_op".d" " %[ret], %[val], %[ptr] \n" \
: [ret] "=&r" (ret), [ptr] "+ZB"(*(u64 *)ptr) \
: [val] "r" (val)); \
break; \
@@ -63,7 +63,7 @@ PERCPU_OP(and, and, &)
PERCPU_OP(or, or, |)
#undef PERCPU_OP
-static __always_inline unsigned long __percpu_read(void *ptr, int size)
+static __always_inline unsigned long __percpu_read(void __percpu *ptr, int size)
{
unsigned long ret;
@@ -100,7 +100,7 @@ static __always_inline unsigned long __percpu_read(void *ptr, int size)
return ret;
}
-static __always_inline void __percpu_write(void *ptr, unsigned long val, int size)
+static __always_inline void __percpu_write(void __percpu *ptr, unsigned long val, int size)
{
switch (size) {
case 1:
@@ -132,8 +132,7 @@ static __always_inline void __percpu_write(void *ptr, unsigned long val, int siz
}
}
-static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
- int size)
+static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, int size)
{
switch (size) {
case 1:
diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h
index a0bc159ce8bd..ee52fb1e9963 100644
--- a/arch/loongarch/include/asm/setup.h
+++ b/arch/loongarch/include/asm/setup.h
@@ -25,7 +25,7 @@ extern void set_merr_handler(unsigned long offset, void *addr, unsigned long len
#ifdef CONFIG_RELOCATABLE
struct rela_la_abs {
- long offset;
+ long pc;
long symvalue;
};
diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c
index 6c3eff9af9fb..1acfa704c8d0 100644
--- a/arch/loongarch/kernel/relocate.c
+++ b/arch/loongarch/kernel/relocate.c
@@ -52,7 +52,7 @@ static inline void __init relocate_absolute(long random_offset)
for (p = begin; (void *)p < end; p++) {
long v = p->symvalue;
uint32_t lu12iw, ori, lu32id, lu52id;
- union loongarch_instruction *insn = (void *)p - p->offset;
+ union loongarch_instruction *insn = (void *)p->pc;
lu12iw = (v >> 12) & 0xfffff;
ori = v & 0xfff;
@@ -102,6 +102,14 @@ static inline __init unsigned long get_random_boot(void)
return hash;
}
+static int __init nokaslr(char *p)
+{
+ pr_info("KASLR is disabled.\n");
+
+ return 0; /* Print a notice and silence the boot warning */
+}
+early_param("nokaslr", nokaslr);
+
static inline __init bool kaslr_disabled(void)
{
char *str;
diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c
index 3064af94db9c..e7015f7b70e3 100644
--- a/arch/loongarch/kernel/time.c
+++ b/arch/loongarch/kernel/time.c
@@ -58,14 +58,16 @@ static int constant_set_state_oneshot(struct clock_event_device *evt)
return 0;
}
-static int constant_set_state_oneshot_stopped(struct clock_event_device *evt)
+static int constant_set_state_periodic(struct clock_event_device *evt)
{
+ unsigned long period;
unsigned long timer_config;
raw_spin_lock(&state_lock);
- timer_config = csr_read64(LOONGARCH_CSR_TCFG);
- timer_config &= ~CSR_TCFG_EN;
+ period = const_clock_freq / HZ;
+ timer_config = period & CSR_TCFG_VAL;
+ timer_config |= (CSR_TCFG_PERIOD | CSR_TCFG_EN);
csr_write64(timer_config, LOONGARCH_CSR_TCFG);
raw_spin_unlock(&state_lock);
@@ -73,16 +75,14 @@ static int constant_set_state_oneshot_stopped(struct clock_event_device *evt)
return 0;
}
-static int constant_set_state_periodic(struct clock_event_device *evt)
+static int constant_set_state_shutdown(struct clock_event_device *evt)
{
- unsigned long period;
unsigned long timer_config;
raw_spin_lock(&state_lock);
- period = const_clock_freq / HZ;
- timer_config = period & CSR_TCFG_VAL;
- timer_config |= (CSR_TCFG_PERIOD | CSR_TCFG_EN);
+ timer_config = csr_read64(LOONGARCH_CSR_TCFG);
+ timer_config &= ~CSR_TCFG_EN;
csr_write64(timer_config, LOONGARCH_CSR_TCFG);
raw_spin_unlock(&state_lock);
@@ -90,11 +90,6 @@ static int constant_set_state_periodic(struct clock_event_device *evt)
return 0;
}
-static int constant_set_state_shutdown(struct clock_event_device *evt)
-{
- return 0;
-}
-
static int constant_timer_next_event(unsigned long delta, struct clock_event_device *evt)
{
unsigned long timer_config;
@@ -161,7 +156,7 @@ int constant_clockevent_init(void)
cd->rating = 320;
cd->cpumask = cpumask_of(cpu);
cd->set_state_oneshot = constant_set_state_oneshot;
- cd->set_state_oneshot_stopped = constant_set_state_oneshot_stopped;
+ cd->set_state_oneshot_stopped = constant_set_state_shutdown;
cd->set_state_periodic = constant_set_state_periodic;
cd->set_state_shutdown = constant_set_state_shutdown;
cd->set_next_event = constant_timer_next_event;
diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c
index 71d0539e2d0b..2aae72e63871 100644
--- a/arch/loongarch/mm/pgtable.c
+++ b/arch/loongarch/mm/pgtable.c
@@ -13,13 +13,13 @@ struct page *dmw_virt_to_page(unsigned long kaddr)
{
return pfn_to_page(virt_to_pfn(kaddr));
}
-EXPORT_SYMBOL_GPL(dmw_virt_to_page);
+EXPORT_SYMBOL(dmw_virt_to_page);
struct page *tlb_virt_to_page(unsigned long kaddr)
{
return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr)));
}
-EXPORT_SYMBOL_GPL(tlb_virt_to_page);
+EXPORT_SYMBOL(tlb_virt_to_page);
pgd_t *pgd_alloc(struct mm_struct *mm)
{
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index fd69dfa0cdab..a7c9c0e69e5a 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -140,11 +140,11 @@ config ARCH_MMAP_RND_COMPAT_BITS_MIN
default 8
config ARCH_MMAP_RND_BITS_MAX
- default 24 if 64BIT
- default 17
+ default 18 if 64BIT
+ default 13
config ARCH_MMAP_RND_COMPAT_BITS_MAX
- default 17
+ default 13
# unless you want to implement ACPI on PA-RISC ... ;-)
config PM
diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h
index 140eaa97bf21..2d73d3c3cd37 100644
--- a/arch/parisc/include/asm/elf.h
+++ b/arch/parisc/include/asm/elf.h
@@ -349,15 +349,7 @@ struct pt_regs; /* forward declaration... */
#define ELF_HWCAP 0
-/* Masks for stack and mmap randomization */
-#define BRK_RND_MASK (is_32bit_task() ? 0x07ffUL : 0x3ffffUL)
-#define MMAP_RND_MASK (is_32bit_task() ? 0x1fffUL : 0x3ffffUL)
-#define STACK_RND_MASK MMAP_RND_MASK
-
-struct mm_struct;
-extern unsigned long arch_randomize_brk(struct mm_struct *);
-#define arch_randomize_brk arch_randomize_brk
-
+#define STACK_RND_MASK 0x7ff /* 8MB of VA */
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
struct linux_binprm;
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index c05d121cf5d0..982aca20f56f 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -47,6 +47,8 @@
#ifndef __ASSEMBLY__
+struct rlimit;
+unsigned long mmap_upper_limit(struct rlimit *rlim_stack);
unsigned long calc_max_stack_size(unsigned long stack_max);
/*
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c
index 29e2750f86a4..e95a977ba5f3 100644
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -383,7 +383,7 @@ show_cpuinfo (struct seq_file *m, void *v)
char cpu_name[60], *p;
/* strip PA path from CPU name to not confuse lscpu */
- strlcpy(cpu_name, per_cpu(cpu_data, 0).dev->name, sizeof(cpu_name));
+ strscpy(cpu_name, per_cpu(cpu_data, 0).dev->name, sizeof(cpu_name));
p = strrchr(cpu_name, '[');
if (p)
*(--p) = 0;
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index ab896eff7a1d..98af719d5f85 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -77,7 +77,7 @@ unsigned long calc_max_stack_size(unsigned long stack_max)
* indicating that "current" should be used instead of a passed-in
* value from the exec bprm as done with arch_pick_mmap_layout().
*/
-static unsigned long mmap_upper_limit(struct rlimit *rlim_stack)
+unsigned long mmap_upper_limit(struct rlimit *rlim_stack)
{
unsigned long stack_base;
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 21556ad87f4b..8f3a4d16bb79 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -15,6 +15,7 @@
#include <linux/io.h>
#include <asm/apic.h>
#include <asm/desc.h>
+#include <asm/e820/api.h>
#include <asm/sev.h>
#include <asm/ibt.h>
#include <asm/hypervisor.h>
@@ -286,15 +287,31 @@ static int hv_cpu_die(unsigned int cpu)
static int __init hv_pci_init(void)
{
- int gen2vm = efi_enabled(EFI_BOOT);
+ bool gen2vm = efi_enabled(EFI_BOOT);
/*
- * For Generation-2 VM, we exit from pci_arch_init() by returning 0.
- * The purpose is to suppress the harmless warning:
+ * A Generation-2 VM doesn't support legacy PCI/PCIe, so both
+ * raw_pci_ops and raw_pci_ext_ops are NULL, and pci_subsys_init() ->
+ * pcibios_init() doesn't call pcibios_resource_survey() ->
+ * e820__reserve_resources_late(); as a result, any emulated persistent
+ * memory of E820_TYPE_PRAM (12) via the kernel parameter
+ * memmap=nn[KMG]!ss is not added into iomem_resource and hence can't be
+ * detected by register_e820_pmem(). Fix this by directly calling
+ * e820__reserve_resources_late() here: e820__reserve_resources_late()
+ * depends on e820__reserve_resources(), which has been called earlier
+ * from setup_arch(). Note: e820__reserve_resources_late() also adds
+ * any memory of E820_TYPE_PMEM (7) into iomem_resource, and
+ * acpi_nfit_register_region() -> acpi_nfit_insert_resource() ->
+ * region_intersects() returns REGION_INTERSECTS, so the memory of
+ * E820_TYPE_PMEM won't get added twice.
+ *
+ * We return 0 here so that pci_arch_init() won't print the warning:
* "PCI: Fatal: No config space access function found"
*/
- if (gen2vm)
+ if (gen2vm) {
+ e820__reserve_resources_late();
return 0;
+ }
/* For Generation-1 VM, we'll proceed in pci_arch_init(). */
return 1;
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index d0918a75cb00..1a0dd80d81ac 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -63,6 +63,7 @@ int acpi_fix_pin2_polarity __initdata;
#ifdef CONFIG_X86_LOCAL_APIC
static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
+static bool has_lapic_cpus __initdata;
static bool acpi_support_online_capable;
#endif
@@ -233,6 +234,14 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end)
return 0;
/*
+ * According to https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#processor-local-x2apic-structure
+ * when MADT provides both valid LAPIC and x2APIC entries, the APIC ID
+ * in x2APIC must be equal or greater than 0xff.
+ */
+ if (has_lapic_cpus && apic_id < 0xff)
+ return 0;
+
+ /*
* We need to register disabled CPU as well to permit
* counting disabled CPUs. This allows us to size
* cpus_possible_map more accurately, to permit
@@ -1114,10 +1123,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
static int __init acpi_parse_madt_lapic_entries(void)
{
- int count;
- int x2count = 0;
- int ret;
- struct acpi_subtable_proc madt_proc[2];
+ int count, x2count = 0;
if (!boot_cpu_has(X86_FEATURE_APIC))
return -ENODEV;
@@ -1126,21 +1132,11 @@ static int __init acpi_parse_madt_lapic_entries(void)
acpi_parse_sapic, MAX_LOCAL_APIC);
if (!count) {
- memset(madt_proc, 0, sizeof(madt_proc));
- madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC;
- madt_proc[0].handler = acpi_parse_lapic;
- madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC;
- madt_proc[1].handler = acpi_parse_x2apic;
- ret = acpi_table_parse_entries_array(ACPI_SIG_MADT,
- sizeof(struct acpi_table_madt),
- madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC);
- if (ret < 0) {
- pr_err("Error parsing LAPIC/X2APIC entries\n");
- return ret;
- }
-
- count = madt_proc[0].count;
- x2count = madt_proc[1].count;
+ count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
+ acpi_parse_lapic, MAX_LOCAL_APIC);
+ has_lapic_cpus = count > 0;
+ x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
+ acpi_parse_x2apic, MAX_LOCAL_APIC);
}
if (!count && !x2count) {
pr_err("No LAPIC entries present\n");
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index e6bba12c759c..01fa06dd06b6 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -262,11 +262,14 @@ static uint32_t __init ms_hyperv_platform(void)
static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs)
{
static atomic_t nmi_cpu = ATOMIC_INIT(-1);
+ unsigned int old_cpu, this_cpu;
if (!unknown_nmi_panic)
return NMI_DONE;
- if (atomic_cmpxchg(&nmi_cpu, -1, raw_smp_processor_id()) != -1)
+ old_cpu = -1;
+ this_cpu = raw_smp_processor_id();
+ if (!atomic_try_cmpxchg(&nmi_cpu, &old_cpu, this_cpu))
return NMI_HANDLED;
return NMI_DONE;
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index cacf2ede6217..23d8aaf8d9fd 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -175,9 +175,6 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
frame = get_sigframe(ksig, regs, sizeof(struct rt_sigframe), &fp);
uc_flags = frame_uc_flags(regs);
- if (setup_signal_shadow_stack(ksig))
- return -EFAULT;
-
if (!user_access_begin(frame, sizeof(*frame)))
return -EFAULT;
@@ -198,6 +195,9 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
return -EFAULT;
}
+ if (setup_signal_shadow_stack(ksig))
+ return -EFAULT;
+
/* Set up registers for signal handler */
regs->di = ksig->sig;
/* In case the signal handler was declared without prototypes */
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e2d11183f62e..900c1be1fee1 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2858,11 +2858,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
};
struct request *rq;
- if (unlikely(bio_queue_enter(bio)))
- return NULL;
-
if (blk_mq_attempt_bio_merge(q, bio, nsegs))
- goto queue_exit;
+ return NULL;
rq_qos_throttle(q, bio);
@@ -2878,35 +2875,23 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
rq_qos_cleanup(q, bio);
if (bio->bi_opf & REQ_NOWAIT)
bio_wouldblock_error(bio);
-queue_exit:
- blk_queue_exit(q);
return NULL;
}
-static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
- struct blk_plug *plug, struct bio **bio, unsigned int nsegs)
+/* return true if this @rq can be used for @bio */
+static bool blk_mq_can_use_cached_rq(struct request *rq, struct blk_plug *plug,
+ struct bio *bio)
{
- struct request *rq;
- enum hctx_type type, hctx_type;
+ enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf);
+ enum hctx_type hctx_type = rq->mq_hctx->type;
- if (!plug)
- return NULL;
- rq = rq_list_peek(&plug->cached_rq);
- if (!rq || rq->q != q)
- return NULL;
+ WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq);
- if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) {
- *bio = NULL;
- return NULL;
- }
-
- type = blk_mq_get_hctx_type((*bio)->bi_opf);
- hctx_type = rq->mq_hctx->type;
if (type != hctx_type &&
!(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT))
- return NULL;
- if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf))
- return NULL;
+ return false;
+ if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf))
+ return false;
/*
* If any qos ->throttle() end up blocking, we will have flushed the
@@ -2914,12 +2899,12 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
* before we throttle.
*/
plug->cached_rq = rq_list_next(rq);
- rq_qos_throttle(q, *bio);
+ rq_qos_throttle(rq->q, bio);
blk_mq_rq_time_init(rq, 0);
- rq->cmd_flags = (*bio)->bi_opf;
+ rq->cmd_flags = bio->bi_opf;
INIT_LIST_HEAD(&rq->queuelist);
- return rq;
+ return true;
}
static void bio_set_ioprio(struct bio *bio)
@@ -2949,7 +2934,7 @@ void blk_mq_submit_bio(struct bio *bio)
struct blk_plug *plug = blk_mq_plug(bio);
const int is_sync = op_is_sync(bio->bi_opf);
struct blk_mq_hw_ctx *hctx;
- struct request *rq;
+ struct request *rq = NULL;
unsigned int nr_segs = 1;
blk_status_t ret;
@@ -2960,20 +2945,36 @@ void blk_mq_submit_bio(struct bio *bio)
return;
}
- if (!bio_integrity_prep(bio))
- return;
-
bio_set_ioprio(bio);
- rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs);
- if (!rq) {
- if (!bio)
+ if (plug) {
+ rq = rq_list_peek(&plug->cached_rq);
+ if (rq && rq->q != q)
+ rq = NULL;
+ }
+ if (rq) {
+ if (!bio_integrity_prep(bio))
return;
- rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
- if (unlikely(!rq))
+ if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
return;
+ if (blk_mq_can_use_cached_rq(rq, plug, bio))
+ goto done;
+ percpu_ref_get(&q->q_usage_counter);
+ } else {
+ if (unlikely(bio_queue_enter(bio)))
+ return;
+ if (!bio_integrity_prep(bio))
+ goto fail;
+ }
+
+ rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
+ if (unlikely(!rq)) {
+fail:
+ blk_queue_exit(q);
+ return;
}
+done:
trace_block_getrq(bio);
rq_qos_track(q, rq, bio);
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index 0ace218783c8..e9b16cbc26f4 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -250,9 +250,6 @@ int ivpu_rpm_get_if_active(struct ivpu_device *vdev)
{
int ret;
- ivpu_dbg(vdev, RPM, "rpm_get_if_active count %d\n",
- atomic_read(&vdev->drm.dev->power.usage_count));
-
ret = pm_runtime_get_if_active(vdev->drm.dev, false);
drm_WARN_ON(&vdev->drm, ret < 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index afec09930efa..9d92ca157677 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -248,6 +248,7 @@ extern int amdgpu_umsch_mm;
extern int amdgpu_seamless;
extern int amdgpu_user_partt_mode;
+extern int amdgpu_agp;
#define AMDGPU_VM_MAX_NUM_CTX 4096
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index df3ecfa9e13f..e50be6500030 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -207,7 +207,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
}
for (i = 0; i < p->nchunks; i++) {
- struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
+ struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL;
struct drm_amdgpu_cs_chunk user_chunk;
uint32_t __user *cdata;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 3095a3a864af..8f24cabe2155 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -207,6 +207,7 @@ int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
int amdgpu_umsch_mm;
int amdgpu_seamless = -1; /* auto */
uint amdgpu_debug_mask;
+int amdgpu_agp = -1; /* auto */
static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
@@ -961,6 +962,15 @@ module_param_named(seamless, amdgpu_seamless, int, 0444);
MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default");
module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444);
+/**
+ * DOC: agp (int)
+ * Enable the AGP aperture. This provides an aperture in the GPU's internal
+ * address space for direct access to system memory. Note that these accesses
+ * are non-snooped, so they are only used for access to uncached memory.
+ */
+MODULE_PARM_DESC(agp, "AGP (-1 = auto (default), 0 = disable, 1 = enable)");
+module_param_named(agp, amdgpu_agp, int, 0444);
+
/* These devices are not supported by amdgpu.
* They are supported by the mach64, r128, radeon drivers
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 32b701cc0376..a21045d018f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1473,6 +1473,11 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
topology->nodes[i].num_links = (requires_reflection && topology->nodes[i].num_links) ?
topology->nodes[i].num_links : node_num_links;
}
+ /* popluate the connected port num info if supported and available */
+ if (ta_port_num_support && topology->nodes[i].num_links) {
+ memcpy(topology->nodes[i].port_num, link_extend_info_output->nodes[i].port_num,
+ sizeof(struct xgmi_connected_port_num) * TA_XGMI__MAX_PORT_NUM);
+ }
/* reflect the topology information for bi-directionality */
if (requires_reflection && topology->nodes[i].num_hops)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 5d36ad3f48c7..c4d9cbde55b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -150,6 +150,7 @@ struct psp_xgmi_node_info {
uint8_t is_sharing_enabled;
enum ta_xgmi_assigned_sdma_engine sdma_engine;
uint8_t num_links;
+ struct xgmi_connected_port_num port_num[TA_XGMI__MAX_PORT_NUM];
};
struct psp_xgmi_topology_info {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 84e5987b14e0..a3dc68e98910 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1188,7 +1188,7 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
}
if (block_obj->hw_ops->query_ras_error_count)
- block_obj->hw_ops->query_ras_error_count(adev, &err_data);
+ block_obj->hw_ops->query_ras_error_count(adev, err_data);
if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) ||
(info->head.block == AMDGPU_RAS_BLOCK__GFX) ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 65949cc7abb9..07d930339b07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -398,6 +398,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
* amdgpu_uvd_entity_init - init entity
*
* @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring pointer to check
*
* Initialize the entity used for handle management in the kernel driver.
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 0954447f689d..59acf424a078 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -230,6 +230,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
* amdgpu_vce_entity_init - init entity
*
* @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring pointer to check
*
* Initialize the entity used for handle management in the kernel driver.
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 0ec7b061d7c2..a5a05c16c10d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -675,7 +675,7 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
amdgpu_gmc_set_agp_default(adev, mc);
amdgpu_gmc_vram_location(adev, &adev->gmc, base);
amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
- if (!amdgpu_sriov_vf(adev))
+ if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
amdgpu_gmc_agp_location(adev, mc);
/* base offset of vram pages */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 6dce9b29f675..23d7b548d13f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -640,8 +640,9 @@ static void gmc_v11_0_vram_gtt_location(struct amdgpu_device *adev,
amdgpu_gmc_set_agp_default(adev, mc);
amdgpu_gmc_vram_location(adev, &adev->gmc, base);
amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_HIGH);
- if (!amdgpu_sriov_vf(adev) ||
- (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 5, 0)))
+ if (!amdgpu_sriov_vf(adev) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 5, 0)) &&
+ (amdgpu_agp == 1))
amdgpu_gmc_agp_location(adev, mc);
/* base offset of vram pages */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index bde25eb4ed8e..2ac5820e9c92 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1630,7 +1630,7 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
} else {
amdgpu_gmc_vram_location(adev, mc, base);
amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
- if (!amdgpu_sriov_vf(adev))
+ if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
amdgpu_gmc_agp_location(adev, mc);
}
/* base offset of vram pages */
@@ -2170,8 +2170,6 @@ static int gmc_v9_0_sw_fini(void *handle)
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3))
amdgpu_gmc_sysfs_fini(adev);
- adev->gmc.num_mem_partitions = 0;
- kfree(adev->gmc.mem_partitions);
amdgpu_gmc_ras_fini(adev);
amdgpu_gem_force_release(adev);
@@ -2185,6 +2183,9 @@ static int gmc_v9_0_sw_fini(void *handle)
amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0);
amdgpu_bo_fini(adev);
+ adev->gmc.num_mem_partitions = 0;
+ kfree(adev->gmc.mem_partitions);
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
index ea142611be1c..9b0146732e13 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
@@ -130,6 +130,9 @@ static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev)
uint64_t value;
int i;
+ if (amdgpu_sriov_vf(adev))
+ return;
+
inst_mask = adev->aid_mask;
for_each_inst(i, inst_mask) {
/* Program the AGP BAR */
@@ -139,9 +142,6 @@ static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev)
WREG32_SOC15(MMHUB, i, regMC_VM_AGP_TOP,
adev->gmc.agp_end >> 24);
- if (amdgpu_sriov_vf(adev))
- return;
-
/* Program the system aperture low logical page number. */
WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 6f99f6754c11..ee97814ebd99 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2079,7 +2079,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
struct dmub_srv_create_params create_params;
struct dmub_srv_region_params region_params;
struct dmub_srv_region_info region_info;
- struct dmub_srv_fb_params fb_params;
+ struct dmub_srv_memory_params memory_params;
struct dmub_srv_fb_info *fb_info;
struct dmub_srv *dmub_srv;
const struct dmcub_firmware_header_v1_0 *hdr;
@@ -2182,6 +2182,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
adev->dm.dmub_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes) +
PSP_HEADER_BYTES;
+ region_params.is_mailbox_in_inbox = false;
status = dmub_srv_calc_region_info(dmub_srv, &region_params,
&region_info);
@@ -2205,10 +2206,10 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
return r;
/* Rebase the regions on the framebuffer address. */
- memset(&fb_params, 0, sizeof(fb_params));
- fb_params.cpu_addr = adev->dm.dmub_bo_cpu_addr;
- fb_params.gpu_addr = adev->dm.dmub_bo_gpu_addr;
- fb_params.region_info = &region_info;
+ memset(&memory_params, 0, sizeof(memory_params));
+ memory_params.cpu_fb_addr = adev->dm.dmub_bo_cpu_addr;
+ memory_params.gpu_fb_addr = adev->dm.dmub_bo_gpu_addr;
+ memory_params.region_info = &region_info;
adev->dm.dmub_fb_info =
kzalloc(sizeof(*adev->dm.dmub_fb_info), GFP_KERNEL);
@@ -2220,7 +2221,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
return -ENOMEM;
}
- status = dmub_srv_calc_fb_info(dmub_srv, &fb_params, fb_info);
+ status = dmub_srv_calc_mem_info(dmub_srv, &memory_params, fb_info);
if (status != DMUB_STATUS_OK) {
DRM_ERROR("Error calculating DMUB FB info: %d\n", status);
return -EINVAL;
@@ -7481,6 +7482,9 @@ static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap,
int i;
int result = -EIO;
+ if (!ddc_service->ddc_pin || !ddc_service->ddc_pin->hw_info.hw_supported)
+ return result;
+
cmd.payloads = kcalloc(num, sizeof(struct i2c_payload), GFP_KERNEL);
if (!cmd.payloads)
@@ -9603,14 +9607,14 @@ static bool should_reset_plane(struct drm_atomic_state *state,
struct drm_plane *other;
struct drm_plane_state *old_other_state, *new_other_state;
struct drm_crtc_state *new_crtc_state;
+ struct amdgpu_device *adev = drm_to_adev(plane->dev);
int i;
/*
- * TODO: Remove this hack once the checks below are sufficient
- * enough to determine when we need to reset all the planes on
- * the stream.
+ * TODO: Remove this hack for all asics once it proves that the
+ * fast updates works fine on DCN3.2+.
*/
- if (state->allow_modeset)
+ if (adev->ip_versions[DCE_HWIP][0] < IP_VERSION(3, 2, 0) && state->allow_modeset)
return true;
/* Exit early if we know that we're adding or removing the plane. */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index ed784cf27d39..c7a29bb737e2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -536,11 +536,8 @@ bool dm_helpers_dp_read_dpcd(
struct amdgpu_dm_connector *aconnector = link->priv;
- if (!aconnector) {
- drm_dbg_dp(aconnector->base.dev,
- "Failed to find connector for link!\n");
+ if (!aconnector)
return false;
- }
return drm_dp_dpcd_read(&aconnector->dm_dp_aux.aux, address, data,
size) == size;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index d3b13d362eda..11da0eebee6c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -1604,31 +1604,31 @@ enum dc_status dm_dp_mst_is_port_support_mode(
unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0;
unsigned int max_compressed_bw_in_kbps = 0;
struct dc_dsc_bw_range bw_range = {0};
- struct drm_dp_mst_topology_mgr *mst_mgr;
+ uint16_t full_pbn = aconnector->mst_output_port->full_pbn;
/*
- * check if the mode could be supported if DSC pass-through is supported
- * AND check if there enough bandwidth available to support the mode
- * with DSC enabled.
+ * Consider the case with the depth of the mst topology tree is equal or less than 2
+ * A. When dsc bitstream can be transmitted along the entire path
+ * 1. dsc is possible between source and branch/leaf device (common dsc params is possible), AND
+ * 2. dsc passthrough supported at MST branch, or
+ * 3. dsc decoding supported at leaf MST device
+ * Use maximum dsc compression as bw constraint
+ * B. When dsc bitstream cannot be transmitted along the entire path
+ * Use native bw as bw constraint
*/
if (is_dsc_common_config_possible(stream, &bw_range) &&
- aconnector->mst_output_port->passthrough_aux) {
- mst_mgr = aconnector->mst_output_port->mgr;
- mutex_lock(&mst_mgr->lock);
-
+ (aconnector->mst_output_port->passthrough_aux ||
+ aconnector->dsc_aux == &aconnector->mst_output_port->aux)) {
cur_link_settings = stream->link->verified_link_cap;
upper_link_bw_in_kbps = dc_link_bandwidth_kbps(aconnector->dc_link,
- &cur_link_settings
- );
- down_link_bw_in_kbps = kbps_from_pbn(aconnector->mst_output_port->full_pbn);
+ &cur_link_settings);
+ down_link_bw_in_kbps = kbps_from_pbn(full_pbn);
/* pick the bottleneck */
end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps,
down_link_bw_in_kbps);
- mutex_unlock(&mst_mgr->lock);
-
/*
* use the maximum dsc compression bandwidth as the required
* bandwidth for the mode
@@ -1643,8 +1643,7 @@ enum dc_status dm_dp_mst_is_port_support_mode(
/* check if mode could be supported within full_pbn */
bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3;
pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp, false);
-
- if (pbn > aconnector->mst_output_port->full_pbn)
+ if (pbn > full_pbn)
return DC_FAIL_BANDWIDTH_VALIDATE;
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index 0fa4fcd00de2..507a7cf56711 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -820,22 +820,22 @@ static void dcn35_set_idle_state(struct clk_mgr *clk_mgr_base, bool allow_idle)
if (dc->config.disable_ips == DMUB_IPS_ENABLE ||
dc->config.disable_ips == DMUB_IPS_DISABLE_DYNAMIC) {
- val |= DMUB_IPS1_ALLOW_MASK;
- val |= DMUB_IPS2_ALLOW_MASK;
- } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS1) {
val = val & ~DMUB_IPS1_ALLOW_MASK;
val = val & ~DMUB_IPS2_ALLOW_MASK;
- } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2) {
- val |= DMUB_IPS1_ALLOW_MASK;
- val = val & ~DMUB_IPS2_ALLOW_MASK;
- } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2_Z10) {
+ } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS1) {
val |= DMUB_IPS1_ALLOW_MASK;
val |= DMUB_IPS2_ALLOW_MASK;
+ } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2) {
+ val = val & ~DMUB_IPS1_ALLOW_MASK;
+ val |= DMUB_IPS2_ALLOW_MASK;
+ } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2_Z10) {
+ val = val & ~DMUB_IPS1_ALLOW_MASK;
+ val = val & ~DMUB_IPS2_ALLOW_MASK;
}
if (!allow_idle) {
- val = val & ~DMUB_IPS1_ALLOW_MASK;
- val = val & ~DMUB_IPS2_ALLOW_MASK;
+ val |= DMUB_IPS1_ALLOW_MASK;
+ val |= DMUB_IPS2_ALLOW_MASK;
}
dcn35_smu_write_ips_scratch(clk_mgr, val);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 7b9bf5cb4529..76b47f178127 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -3178,7 +3178,7 @@ static bool update_planes_and_stream_state(struct dc *dc,
struct pipe_ctx *otg_master = resource_get_otg_master_for_stream(&context->res_ctx,
context->streams[i]);
- if (otg_master->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE)
+ if (otg_master && otg_master->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE)
resource_build_test_pattern_params(&context->res_ctx, otg_master);
}
}
@@ -4934,8 +4934,8 @@ bool dc_dmub_is_ips_idle_state(struct dc *dc)
if (dc->hwss.get_idle_state)
idle_state = dc->hwss.get_idle_state(dc);
- if ((idle_state & DMUB_IPS1_ALLOW_MASK) ||
- (idle_state & DMUB_IPS2_ALLOW_MASK))
+ if (!(idle_state & DMUB_IPS1_ALLOW_MASK) ||
+ !(idle_state & DMUB_IPS2_ALLOW_MASK))
return true;
return false;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 1d48278cba96..a1f1d1003992 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -5190,6 +5190,9 @@ bool dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy(
sec_next = sec_pipe->next_odm_pipe;
sec_prev = sec_pipe->prev_odm_pipe;
+ if (pri_pipe == NULL)
+ return false;
+
*sec_pipe = *pri_pipe;
sec_pipe->top_pipe = sec_top;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index e4c007203318..0e07699c1e83 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -1202,11 +1202,11 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
allow_state = dc->hwss.get_idle_state(dc);
dc->hwss.set_idle_state(dc, false);
- if (allow_state & DMUB_IPS2_ALLOW_MASK) {
+ if (!(allow_state & DMUB_IPS2_ALLOW_MASK)) {
// Wait for evaluation time
udelay(dc->debug.ips2_eval_delay_us);
commit_state = dc->hwss.get_idle_state(dc);
- if (commit_state & DMUB_IPS2_COMMIT_MASK) {
+ if (!(commit_state & DMUB_IPS2_COMMIT_MASK)) {
// Tell PMFW to exit low power state
dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr);
@@ -1216,7 +1216,7 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
for (i = 0; i < max_num_polls; ++i) {
commit_state = dc->hwss.get_idle_state(dc);
- if (!(commit_state & DMUB_IPS2_COMMIT_MASK))
+ if (commit_state & DMUB_IPS2_COMMIT_MASK)
break;
udelay(1);
@@ -1235,10 +1235,10 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
}
dc_dmub_srv_notify_idle(dc, false);
- if (allow_state & DMUB_IPS1_ALLOW_MASK) {
+ if (!(allow_state & DMUB_IPS1_ALLOW_MASK)) {
for (i = 0; i < max_num_polls; ++i) {
commit_state = dc->hwss.get_idle_state(dc);
- if (!(commit_state & DMUB_IPS1_COMMIT_MASK))
+ if (commit_state & DMUB_IPS1_COMMIT_MASK)
break;
udelay(1);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index cea666ea66c6..fcb825e4f1bb 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -177,6 +177,7 @@ struct dc_panel_patch {
unsigned int disable_fams;
unsigned int skip_avmute;
unsigned int mst_start_top_delay;
+ unsigned int remove_sink_ext_caps;
};
struct dc_edid_caps {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_stream_encoder.c
index 001f9eb66920..62a8f0b56006 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_stream_encoder.c
@@ -261,12 +261,6 @@ static void enc35_stream_encoder_enable(
/* invalid mode ! */
ASSERT_CRITICAL(false);
}
-
- REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 1);
- REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 1);
- } else {
- REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 0);
- REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 0);
}
}
@@ -436,6 +430,8 @@ static void enc35_disable_fifo(struct stream_encoder *enc)
struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 0);
+ REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 0);
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 0);
}
static void enc35_enable_fifo(struct stream_encoder *enc)
@@ -443,6 +439,8 @@ static void enc35_enable_fifo(struct stream_encoder *enc)
struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, 0x7);
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 1);
+ REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 1);
enc35_reset_fifo(enc, true);
enc35_reset_fifo(enc, false);
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
index d6f0f857c05a..f2fe523f914f 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
@@ -1088,6 +1088,9 @@ static bool detect_link_and_local_sink(struct dc_link *link,
if (sink->edid_caps.panel_patch.skip_scdc_overwrite)
link->ctx->dc->debug.hdmi20_disable = true;
+ if (sink->edid_caps.panel_patch.remove_sink_ext_caps)
+ link->dpcd_sink_ext_caps.raw = 0;
+
if (dc_is_hdmi_signal(link->connector_signal))
read_scdc_caps(link->ddc, link->local_sink);
diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
index 9665ada0f894..df63aa8f01e9 100644
--- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
@@ -195,6 +195,7 @@ struct dmub_srv_region_params {
uint32_t vbios_size;
const uint8_t *fw_inst_const;
const uint8_t *fw_bss_data;
+ bool is_mailbox_in_inbox;
};
/**
@@ -214,20 +215,25 @@ struct dmub_srv_region_params {
*/
struct dmub_srv_region_info {
uint32_t fb_size;
+ uint32_t inbox_size;
uint8_t num_regions;
struct dmub_region regions[DMUB_WINDOW_TOTAL];
};
/**
- * struct dmub_srv_fb_params - parameters used for driver fb setup
+ * struct dmub_srv_memory_params - parameters used for driver fb setup
* @region_info: region info calculated by dmub service
- * @cpu_addr: base cpu address for the framebuffer
- * @gpu_addr: base gpu virtual address for the framebuffer
+ * @cpu_fb_addr: base cpu address for the framebuffer
+ * @cpu_inbox_addr: base cpu address for the gart
+ * @gpu_fb_addr: base gpu virtual address for the framebuffer
+ * @gpu_inbox_addr: base gpu virtual address for the gart
*/
-struct dmub_srv_fb_params {
+struct dmub_srv_memory_params {
const struct dmub_srv_region_info *region_info;
- void *cpu_addr;
- uint64_t gpu_addr;
+ void *cpu_fb_addr;
+ void *cpu_inbox_addr;
+ uint64_t gpu_fb_addr;
+ uint64_t gpu_inbox_addr;
};
/**
@@ -563,8 +569,8 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
* DMUB_STATUS_OK - success
* DMUB_STATUS_INVALID - unspecified error
*/
-enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
- const struct dmub_srv_fb_params *params,
+enum dmub_status dmub_srv_calc_mem_info(struct dmub_srv *dmub,
+ const struct dmub_srv_memory_params *params,
struct dmub_srv_fb_info *out);
/**
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
index e43e8d4bfe37..22fc4ba96def 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
@@ -434,7 +434,7 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
uint32_t fw_state_size = DMUB_FW_STATE_SIZE;
uint32_t trace_buffer_size = DMUB_TRACE_BUFFER_SIZE;
uint32_t scratch_mem_size = DMUB_SCRATCH_MEM_SIZE;
-
+ uint32_t previous_top = 0;
if (!dmub->sw_init)
return DMUB_STATUS_INVALID;
@@ -459,8 +459,15 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
bios->base = dmub_align(stack->top, 256);
bios->top = bios->base + params->vbios_size;
- mail->base = dmub_align(bios->top, 256);
- mail->top = mail->base + DMUB_MAILBOX_SIZE;
+ if (params->is_mailbox_in_inbox) {
+ mail->base = 0;
+ mail->top = mail->base + DMUB_MAILBOX_SIZE;
+ previous_top = bios->top;
+ } else {
+ mail->base = dmub_align(bios->top, 256);
+ mail->top = mail->base + DMUB_MAILBOX_SIZE;
+ previous_top = mail->top;
+ }
fw_info = dmub_get_fw_meta_info(params);
@@ -479,7 +486,7 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
dmub->fw_version = fw_info->fw_version;
}
- trace_buff->base = dmub_align(mail->top, 256);
+ trace_buff->base = dmub_align(previous_top, 256);
trace_buff->top = trace_buff->base + dmub_align(trace_buffer_size, 64);
fw_state->base = dmub_align(trace_buff->top, 256);
@@ -490,11 +497,14 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
out->fb_size = dmub_align(scratch_mem->top, 4096);
+ if (params->is_mailbox_in_inbox)
+ out->inbox_size = dmub_align(mail->top, 4096);
+
return DMUB_STATUS_OK;
}
-enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
- const struct dmub_srv_fb_params *params,
+enum dmub_status dmub_srv_calc_mem_info(struct dmub_srv *dmub,
+ const struct dmub_srv_memory_params *params,
struct dmub_srv_fb_info *out)
{
uint8_t *cpu_base;
@@ -509,8 +519,8 @@ enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
if (params->region_info->num_regions != DMUB_NUM_WINDOWS)
return DMUB_STATUS_INVALID;
- cpu_base = (uint8_t *)params->cpu_addr;
- gpu_base = params->gpu_addr;
+ cpu_base = (uint8_t *)params->cpu_fb_addr;
+ gpu_base = params->gpu_fb_addr;
for (i = 0; i < DMUB_NUM_WINDOWS; ++i) {
const struct dmub_region *reg =
@@ -518,6 +528,12 @@ enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
out->fb[i].cpu_addr = cpu_base + reg->base;
out->fb[i].gpu_addr = gpu_base + reg->base;
+
+ if (i == DMUB_WINDOW_4_MAILBOX && params->cpu_inbox_addr != 0) {
+ out->fb[i].cpu_addr = (uint8_t *)params->cpu_inbox_addr + reg->base;
+ out->fb[i].gpu_addr = params->gpu_inbox_addr + reg->base;
+ }
+
out->fb[i].size = reg->top - reg->base;
}
@@ -707,9 +723,16 @@ enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub)
return DMUB_STATUS_INVALID;
if (dmub->hw_funcs.get_inbox1_rptr && dmub->hw_funcs.get_inbox1_wptr) {
- dmub->inbox1_rb.rptr = dmub->hw_funcs.get_inbox1_rptr(dmub);
- dmub->inbox1_rb.wrpt = dmub->hw_funcs.get_inbox1_wptr(dmub);
- dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt;
+ uint32_t rptr = dmub->hw_funcs.get_inbox1_rptr(dmub);
+ uint32_t wptr = dmub->hw_funcs.get_inbox1_wptr(dmub);
+
+ if (rptr > dmub->inbox1_rb.capacity || wptr > dmub->inbox1_rb.capacity) {
+ return DMUB_STATUS_HW_FAILURE;
+ } else {
+ dmub->inbox1_rb.rptr = rptr;
+ dmub->inbox1_rb.wrpt = wptr;
+ dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt;
+ }
}
return DMUB_STATUS_OK;
@@ -743,6 +766,11 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub,
if (!dmub->hw_init)
return DMUB_STATUS_INVALID;
+ if (dmub->inbox1_rb.rptr > dmub->inbox1_rb.capacity ||
+ dmub->inbox1_rb.wrpt > dmub->inbox1_rb.capacity) {
+ return DMUB_STATUS_HW_FAILURE;
+ }
+
if (dmub_rb_push_front(&dmub->inbox1_rb, cmd))
return DMUB_STATUS_OK;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
index dab35d878a90..fef2d290f3f2 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
@@ -123,7 +123,7 @@ typedef enum {
VOLTAGE_GUARDBAND_COUNT
} GFX_GUARDBAND_e;
-#define SMU_METRICS_TABLE_VERSION 0x8
+#define SMU_METRICS_TABLE_VERSION 0x9
typedef struct __attribute__((packed, aligned(4))) {
uint32_t AccumulationCounter;
@@ -211,6 +211,14 @@ typedef struct __attribute__((packed, aligned(4))) {
//XGMI Data tranfser size
uint64_t XgmiReadDataSizeAcc[8];//in KByte
uint64_t XgmiWriteDataSizeAcc[8];//in KByte
+
+ //PCIE BW Data and error count
+ uint32_t PcieBandwidth[4];
+ uint32_t PCIeL0ToRecoveryCountAcc; // The Pcie counter itself is accumulated
+ uint32_t PCIenReplayAAcc; // The Pcie counter itself is accumulated
+ uint32_t PCIenReplayARolloverCountAcc; // The Pcie counter itself is accumulated
+ uint32_t PCIeNAKSentCountAcc; // The Pcie counter itself is accumulated
+ uint32_t PCIeNAKReceivedCountAcc; // The Pcie counter itself is accumulated
} MetricsTable_t;
#define SMU_VF_METRICS_TABLE_VERSION 0x3
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 891605d4975f..0e5a77c3c2e2 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -1454,7 +1454,7 @@ static int smu_v13_0_6_register_irq_handler(struct smu_context *smu)
static int smu_v13_0_6_notify_unload(struct smu_context *smu)
{
- if (smu->smc_fw_version <= 0x553500)
+ if (amdgpu_in_reset(smu->adev))
return 0;
dev_dbg(smu->adev->dev, "Notify PMFW about driver unload");
@@ -2095,6 +2095,14 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
smu_v13_0_6_get_current_pcie_link_speed(smu);
gpu_metrics->pcie_bandwidth_acc =
SMUQ10_ROUND(metrics->PcieBandwidthAcc[0]);
+ gpu_metrics->pcie_bandwidth_inst =
+ SMUQ10_ROUND(metrics->PcieBandwidth[0]);
+ gpu_metrics->pcie_l0_to_recov_count_acc =
+ metrics->PCIeL0ToRecoveryCountAcc;
+ gpu_metrics->pcie_replay_count_acc =
+ metrics->PCIenReplayAAcc;
+ gpu_metrics->pcie_replay_rover_count_acc =
+ metrics->PCIenReplayARolloverCountAcc;
}
gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c
index d5c15292ae93..3d92f66e550c 100644
--- a/drivers/gpu/drm/drm_panel_orientation_quirks.c
+++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c
@@ -336,6 +336,12 @@ static const struct dmi_system_id orientation_data[] = {
DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "IdeaPad Duet 3 10IGL5"),
},
.driver_data = (void *)&lcd1200x1920_rightside_up,
+ }, { /* Lenovo Legion Go 8APU1 */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Legion Go 8APU1"),
+ },
+ .driver_data = (void *)&lcd1600x2560_leftside_up,
}, { /* Lenovo Yoga Book X90F / X90L */
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/event.h b/drivers/gpu/drm/nouveau/include/nvkm/core/event.h
index 82b267c11147..460459af272d 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/event.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/event.h
@@ -14,7 +14,7 @@ struct nvkm_event {
int index_nr;
spinlock_t refs_lock;
- spinlock_t list_lock;
+ rwlock_t list_lock;
int *refs;
struct list_head ntfy;
@@ -38,7 +38,7 @@ nvkm_event_init(const struct nvkm_event_func *func, struct nvkm_subdev *subdev,
int types_nr, int index_nr, struct nvkm_event *event)
{
spin_lock_init(&event->refs_lock);
- spin_lock_init(&event->list_lock);
+ rwlock_init(&event->list_lock);
return __nvkm_event_init(func, subdev, types_nr, index_nr, event);
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index d8c92521226d..f28f9a857458 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -726,6 +726,11 @@ nouveau_display_create(struct drm_device *dev)
if (nouveau_modeset != 2) {
ret = nvif_disp_ctor(&drm->client.device, "kmsDisp", 0, &disp->disp);
+ /* no display hw */
+ if (ret == -ENODEV) {
+ ret = 0;
+ goto disp_create_err;
+ }
if (!ret && (disp->disp.outp_mask || drm->vbios.dcb.entries)) {
nouveau_display_create_properties(dev);
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/event.c b/drivers/gpu/drm/nouveau/nvkm/core/event.c
index a6c877135598..61fed7792e41 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/event.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/event.c
@@ -81,17 +81,17 @@ nvkm_event_ntfy_state(struct nvkm_event_ntfy *ntfy)
static void
nvkm_event_ntfy_remove(struct nvkm_event_ntfy *ntfy)
{
- spin_lock_irq(&ntfy->event->list_lock);
+ write_lock_irq(&ntfy->event->list_lock);
list_del_init(&ntfy->head);
- spin_unlock_irq(&ntfy->event->list_lock);
+ write_unlock_irq(&ntfy->event->list_lock);
}
static void
nvkm_event_ntfy_insert(struct nvkm_event_ntfy *ntfy)
{
- spin_lock_irq(&ntfy->event->list_lock);
+ write_lock_irq(&ntfy->event->list_lock);
list_add_tail(&ntfy->head, &ntfy->event->ntfy);
- spin_unlock_irq(&ntfy->event->list_lock);
+ write_unlock_irq(&ntfy->event->list_lock);
}
static void
@@ -176,7 +176,7 @@ nvkm_event_ntfy(struct nvkm_event *event, int id, u32 bits)
return;
nvkm_trace(event->subdev, "event: ntfy %08x on %d\n", bits, id);
- spin_lock_irqsave(&event->list_lock, flags);
+ read_lock_irqsave(&event->list_lock, flags);
list_for_each_entry_safe(ntfy, ntmp, &event->ntfy, head) {
if (ntfy->id == id && ntfy->bits & bits) {
@@ -185,7 +185,7 @@ nvkm_event_ntfy(struct nvkm_event *event, int id, u32 bits)
}
}
- spin_unlock_irqrestore(&event->list_lock, flags);
+ read_unlock_irqrestore(&event->list_lock, flags);
}
void
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index e31f9641114b..dc44f5c7833f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -689,8 +689,8 @@ r535_gsp_rpc_get(struct nvkm_gsp *gsp, u32 fn, u32 argc)
struct nvfw_gsp_rpc *rpc;
rpc = r535_gsp_cmdq_get(gsp, ALIGN(sizeof(*rpc) + argc, sizeof(u64)));
- if (!rpc)
- return NULL;
+ if (IS_ERR(rpc))
+ return ERR_CAST(rpc);
rpc->header_version = 0x03000000;
rpc->signature = ('C' << 24) | ('P' << 16) | ('R' << 8) | 'V';
@@ -1159,7 +1159,7 @@ static void
r535_gsp_acpi_mux_id(acpi_handle handle, u32 id, MUX_METHOD_DATA_ELEMENT *mode,
MUX_METHOD_DATA_ELEMENT *part)
{
- acpi_handle iter = NULL, handle_mux;
+ acpi_handle iter = NULL, handle_mux = NULL;
acpi_status status;
unsigned long long value;
diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c
index affcfb243f0f..35f762872b8a 100644
--- a/drivers/i2c/busses/i2c-designware-common.c
+++ b/drivers/i2c/busses/i2c-designware-common.c
@@ -63,7 +63,7 @@ static int dw_reg_read(void *context, unsigned int reg, unsigned int *val)
{
struct dw_i2c_dev *dev = context;
- *val = readl_relaxed(dev->base + reg);
+ *val = readl(dev->base + reg);
return 0;
}
@@ -72,7 +72,7 @@ static int dw_reg_write(void *context, unsigned int reg, unsigned int val)
{
struct dw_i2c_dev *dev = context;
- writel_relaxed(val, dev->base + reg);
+ writel(val, dev->base + reg);
return 0;
}
@@ -81,7 +81,7 @@ static int dw_reg_read_swab(void *context, unsigned int reg, unsigned int *val)
{
struct dw_i2c_dev *dev = context;
- *val = swab32(readl_relaxed(dev->base + reg));
+ *val = swab32(readl(dev->base + reg));
return 0;
}
@@ -90,7 +90,7 @@ static int dw_reg_write_swab(void *context, unsigned int reg, unsigned int val)
{
struct dw_i2c_dev *dev = context;
- writel_relaxed(swab32(val), dev->base + reg);
+ writel(swab32(val), dev->base + reg);
return 0;
}
@@ -99,8 +99,8 @@ static int dw_reg_read_word(void *context, unsigned int reg, unsigned int *val)
{
struct dw_i2c_dev *dev = context;
- *val = readw_relaxed(dev->base + reg) |
- (readw_relaxed(dev->base + reg + 2) << 16);
+ *val = readw(dev->base + reg) |
+ (readw(dev->base + reg + 2) << 16);
return 0;
}
@@ -109,8 +109,8 @@ static int dw_reg_write_word(void *context, unsigned int reg, unsigned int val)
{
struct dw_i2c_dev *dev = context;
- writew_relaxed(val, dev->base + reg);
- writew_relaxed(val >> 16, dev->base + reg + 2);
+ writew(val, dev->base + reg);
+ writew(val >> 16, dev->base + reg + 2);
return 0;
}
diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index 041a76f71a49..e106af83cef4 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -771,8 +771,8 @@ static int ocores_i2c_resume(struct device *dev)
return ocores_init(dev, i2c);
}
-static DEFINE_SIMPLE_DEV_PM_OPS(ocores_i2c_pm,
- ocores_i2c_suspend, ocores_i2c_resume);
+static DEFINE_NOIRQ_DEV_PM_OPS(ocores_i2c_pm,
+ ocores_i2c_suspend, ocores_i2c_resume);
static struct platform_driver ocores_i2c_driver = {
.probe = ocores_i2c_probe,
diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index 1d7648242749..76f79b68cef8 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -265,6 +265,9 @@ struct pxa_i2c {
u32 hs_mask;
struct i2c_bus_recovery_info recovery;
+ struct pinctrl *pinctrl;
+ struct pinctrl_state *pinctrl_default;
+ struct pinctrl_state *pinctrl_recovery;
};
#define _IBMR(i2c) ((i2c)->reg_ibmr)
@@ -1299,12 +1302,13 @@ static void i2c_pxa_prepare_recovery(struct i2c_adapter *adap)
*/
gpiod_set_value(i2c->recovery.scl_gpiod, ibmr & IBMR_SCLS);
gpiod_set_value(i2c->recovery.sda_gpiod, ibmr & IBMR_SDAS);
+
+ WARN_ON(pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_recovery));
}
static void i2c_pxa_unprepare_recovery(struct i2c_adapter *adap)
{
struct pxa_i2c *i2c = adap->algo_data;
- struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
u32 isr;
/*
@@ -1318,7 +1322,7 @@ static void i2c_pxa_unprepare_recovery(struct i2c_adapter *adap)
i2c_pxa_do_reset(i2c);
}
- WARN_ON(pinctrl_select_state(bri->pinctrl, bri->pins_default));
+ WARN_ON(pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_default));
dev_dbg(&i2c->adap.dev, "recovery: IBMR 0x%08x ISR 0x%08x\n",
readl(_IBMR(i2c)), readl(_ISR(i2c)));
@@ -1340,20 +1344,76 @@ static int i2c_pxa_init_recovery(struct pxa_i2c *i2c)
if (IS_ENABLED(CONFIG_I2C_PXA_SLAVE))
return 0;
- bri->pinctrl = devm_pinctrl_get(dev);
- if (PTR_ERR(bri->pinctrl) == -ENODEV) {
- bri->pinctrl = NULL;
+ i2c->pinctrl = devm_pinctrl_get(dev);
+ if (PTR_ERR(i2c->pinctrl) == -ENODEV)
+ i2c->pinctrl = NULL;
+ if (IS_ERR(i2c->pinctrl))
+ return PTR_ERR(i2c->pinctrl);
+
+ if (!i2c->pinctrl)
+ return 0;
+
+ i2c->pinctrl_default = pinctrl_lookup_state(i2c->pinctrl,
+ PINCTRL_STATE_DEFAULT);
+ i2c->pinctrl_recovery = pinctrl_lookup_state(i2c->pinctrl, "recovery");
+
+ if (IS_ERR(i2c->pinctrl_default) || IS_ERR(i2c->pinctrl_recovery)) {
+ dev_info(dev, "missing pinmux recovery information: %ld %ld\n",
+ PTR_ERR(i2c->pinctrl_default),
+ PTR_ERR(i2c->pinctrl_recovery));
+ return 0;
+ }
+
+ /*
+ * Claiming GPIOs can influence the pinmux state, and may glitch the
+ * I2C bus. Do this carefully.
+ */
+ bri->scl_gpiod = devm_gpiod_get(dev, "scl", GPIOD_OUT_HIGH_OPEN_DRAIN);
+ if (bri->scl_gpiod == ERR_PTR(-EPROBE_DEFER))
+ return -EPROBE_DEFER;
+ if (IS_ERR(bri->scl_gpiod)) {
+ dev_info(dev, "missing scl gpio recovery information: %pe\n",
+ bri->scl_gpiod);
+ return 0;
+ }
+
+ /*
+ * We have SCL. Pull SCL low and wait a bit so that SDA glitches
+ * have no effect.
+ */
+ gpiod_direction_output(bri->scl_gpiod, 0);
+ udelay(10);
+ bri->sda_gpiod = devm_gpiod_get(dev, "sda", GPIOD_OUT_HIGH_OPEN_DRAIN);
+
+ /* Wait a bit in case of a SDA glitch, and then release SCL. */
+ udelay(10);
+ gpiod_direction_output(bri->scl_gpiod, 1);
+
+ if (bri->sda_gpiod == ERR_PTR(-EPROBE_DEFER))
+ return -EPROBE_DEFER;
+
+ if (IS_ERR(bri->sda_gpiod)) {
+ dev_info(dev, "missing sda gpio recovery information: %pe\n",
+ bri->sda_gpiod);
return 0;
}
- if (IS_ERR(bri->pinctrl))
- return PTR_ERR(bri->pinctrl);
bri->prepare_recovery = i2c_pxa_prepare_recovery;
bri->unprepare_recovery = i2c_pxa_unprepare_recovery;
+ bri->recover_bus = i2c_generic_scl_recovery;
i2c->adap.bus_recovery_info = bri;
- return 0;
+ /*
+ * Claiming GPIOs can change the pinmux state, which confuses the
+ * pinctrl since pinctrl's idea of the current setting is unaffected
+ * by the pinmux change caused by claiming the GPIO. Work around that
+ * by switching pinctrl to the GPIO state here. We do it this way to
+ * avoid glitching the I2C bus.
+ */
+ pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_recovery);
+
+ return pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_default);
}
static int i2c_pxa_probe(struct platform_device *dev)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index a8c89df1a997..9a7a74239eab 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -2379,12 +2379,12 @@ retry_baser:
break;
}
+ if (!shr)
+ gic_flush_dcache_to_poc(base, PAGE_ORDER_TO_SIZE(order));
+
its_write_baser(its, baser, val);
tmp = baser->val;
- if (its->flags & ITS_FLAGS_FORCE_NON_SHAREABLE)
- tmp &= ~GITS_BASER_SHAREABILITY_MASK;
-
if ((val ^ tmp) & GITS_BASER_SHAREABILITY_MASK) {
/*
* Shareability didn't stick. Just use
@@ -2394,10 +2394,9 @@ retry_baser:
* non-cacheable as well.
*/
shr = tmp & GITS_BASER_SHAREABILITY_MASK;
- if (!shr) {
+ if (!shr)
cache = GITS_BASER_nC;
- gic_flush_dcache_to_poc(base, PAGE_ORDER_TO_SIZE(order));
- }
+
goto retry_baser;
}
@@ -2609,6 +2608,11 @@ static int its_alloc_tables(struct its_node *its)
/* erratum 24313: ignore memory access type */
cache = GITS_BASER_nCnB;
+ if (its->flags & ITS_FLAGS_FORCE_NON_SHAREABLE) {
+ cache = GITS_BASER_nC;
+ shr = 0;
+ }
+
for (i = 0; i < GITS_BASER_NR_REGS; i++) {
struct its_baser *baser = its->tables + i;
u64 val = its_read_baser(its, baser);
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 62eb27639c9b..f03d7dba270c 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -254,7 +254,7 @@ enum evict_result {
typedef enum evict_result (*le_predicate)(struct lru_entry *le, void *context);
-static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context)
+static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context, bool no_sleep)
{
unsigned long tested = 0;
struct list_head *h = lru->cursor;
@@ -295,7 +295,8 @@ static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *con
h = h->next;
- cond_resched();
+ if (!no_sleep)
+ cond_resched();
}
return NULL;
@@ -382,7 +383,10 @@ struct dm_buffer {
*/
struct buffer_tree {
- struct rw_semaphore lock;
+ union {
+ struct rw_semaphore lock;
+ rwlock_t spinlock;
+ } u;
struct rb_root root;
} ____cacheline_aligned_in_smp;
@@ -393,9 +397,12 @@ struct dm_buffer_cache {
* on the locks.
*/
unsigned int num_locks;
+ bool no_sleep;
struct buffer_tree trees[];
};
+static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled);
+
static inline unsigned int cache_index(sector_t block, unsigned int num_locks)
{
return dm_hash_locks_index(block, num_locks);
@@ -403,22 +410,34 @@ static inline unsigned int cache_index(sector_t block, unsigned int num_locks)
static inline void cache_read_lock(struct dm_buffer_cache *bc, sector_t block)
{
- down_read(&bc->trees[cache_index(block, bc->num_locks)].lock);
+ if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+ read_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+ else
+ down_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}
static inline void cache_read_unlock(struct dm_buffer_cache *bc, sector_t block)
{
- up_read(&bc->trees[cache_index(block, bc->num_locks)].lock);
+ if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+ read_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+ else
+ up_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}
static inline void cache_write_lock(struct dm_buffer_cache *bc, sector_t block)
{
- down_write(&bc->trees[cache_index(block, bc->num_locks)].lock);
+ if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+ write_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+ else
+ down_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}
static inline void cache_write_unlock(struct dm_buffer_cache *bc, sector_t block)
{
- up_write(&bc->trees[cache_index(block, bc->num_locks)].lock);
+ if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+ write_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+ else
+ up_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}
/*
@@ -442,18 +461,32 @@ static void lh_init(struct lock_history *lh, struct dm_buffer_cache *cache, bool
static void __lh_lock(struct lock_history *lh, unsigned int index)
{
- if (lh->write)
- down_write(&lh->cache->trees[index].lock);
- else
- down_read(&lh->cache->trees[index].lock);
+ if (lh->write) {
+ if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+ write_lock_bh(&lh->cache->trees[index].u.spinlock);
+ else
+ down_write(&lh->cache->trees[index].u.lock);
+ } else {
+ if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+ read_lock_bh(&lh->cache->trees[index].u.spinlock);
+ else
+ down_read(&lh->cache->trees[index].u.lock);
+ }
}
static void __lh_unlock(struct lock_history *lh, unsigned int index)
{
- if (lh->write)
- up_write(&lh->cache->trees[index].lock);
- else
- up_read(&lh->cache->trees[index].lock);
+ if (lh->write) {
+ if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+ write_unlock_bh(&lh->cache->trees[index].u.spinlock);
+ else
+ up_write(&lh->cache->trees[index].u.lock);
+ } else {
+ if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+ read_unlock_bh(&lh->cache->trees[index].u.spinlock);
+ else
+ up_read(&lh->cache->trees[index].u.lock);
+ }
}
/*
@@ -502,14 +535,18 @@ static struct dm_buffer *list_to_buffer(struct list_head *l)
return le_to_buffer(le);
}
-static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks)
+static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks, bool no_sleep)
{
unsigned int i;
bc->num_locks = num_locks;
+ bc->no_sleep = no_sleep;
for (i = 0; i < bc->num_locks; i++) {
- init_rwsem(&bc->trees[i].lock);
+ if (no_sleep)
+ rwlock_init(&bc->trees[i].u.spinlock);
+ else
+ init_rwsem(&bc->trees[i].u.lock);
bc->trees[i].root = RB_ROOT;
}
@@ -648,7 +685,7 @@ static struct dm_buffer *__cache_evict(struct dm_buffer_cache *bc, int list_mode
struct lru_entry *le;
struct dm_buffer *b;
- le = lru_evict(&bc->lru[list_mode], __evict_pred, &w);
+ le = lru_evict(&bc->lru[list_mode], __evict_pred, &w, bc->no_sleep);
if (!le)
return NULL;
@@ -702,7 +739,7 @@ static void __cache_mark_many(struct dm_buffer_cache *bc, int old_mode, int new_
struct evict_wrapper w = {.lh = lh, .pred = pred, .context = context};
while (true) {
- le = lru_evict(&bc->lru[old_mode], __evict_pred, &w);
+ le = lru_evict(&bc->lru[old_mode], __evict_pred, &w, bc->no_sleep);
if (!le)
break;
@@ -915,10 +952,11 @@ static void cache_remove_range(struct dm_buffer_cache *bc,
{
unsigned int i;
+ BUG_ON(bc->no_sleep);
for (i = 0; i < bc->num_locks; i++) {
- down_write(&bc->trees[i].lock);
+ down_write(&bc->trees[i].u.lock);
__remove_range(bc, &bc->trees[i].root, begin, end, pred, release);
- up_write(&bc->trees[i].lock);
+ up_write(&bc->trees[i].u.lock);
}
}
@@ -979,8 +1017,6 @@ struct dm_bufio_client {
struct dm_buffer_cache cache; /* must be last member */
};
-static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled);
-
/*----------------------------------------------------------------*/
#define dm_bufio_in_request() (!!current->bio_list)
@@ -1871,7 +1907,8 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
if (need_submit)
submit_io(b, REQ_OP_READ, read_endio);
- wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
+ if (nf != NF_GET) /* we already tested this condition above */
+ wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
if (b->read_error) {
int error = blk_status_to_errno(b->read_error);
@@ -2421,7 +2458,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
r = -ENOMEM;
goto bad_client;
}
- cache_init(&c->cache, num_locks);
+ cache_init(&c->cache, num_locks, (flags & DM_BUFIO_CLIENT_NO_SLEEP) != 0);
c->bdev = bdev;
c->block_size = block_size;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 6de107aff331..2ae8560b6a14 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1673,7 +1673,7 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned int size)
unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
unsigned int remaining_size;
- unsigned int order = MAX_ORDER - 1;
+ unsigned int order = MAX_ORDER;
retry:
if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index efd510984e25..5eabdb06c649 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -33,7 +33,7 @@ struct delay_c {
struct work_struct flush_expired_bios;
struct list_head delayed_bios;
struct task_struct *worker;
- atomic_t may_delay;
+ bool may_delay;
struct delay_class read;
struct delay_class write;
@@ -73,39 +73,6 @@ static inline bool delay_is_fast(struct delay_c *dc)
return !!dc->worker;
}
-static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all)
-{
- struct dm_delay_info *delayed, *next;
-
- mutex_lock(&delayed_bios_lock);
- list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
- if (flush_all || time_after_eq(jiffies, delayed->expires)) {
- struct bio *bio = dm_bio_from_per_bio_data(delayed,
- sizeof(struct dm_delay_info));
- list_del(&delayed->list);
- dm_submit_bio_remap(bio, NULL);
- delayed->class->ops--;
- }
- }
- mutex_unlock(&delayed_bios_lock);
-}
-
-static int flush_worker_fn(void *data)
-{
- struct delay_c *dc = data;
-
- while (1) {
- flush_delayed_bios_fast(dc, false);
- if (unlikely(list_empty(&dc->delayed_bios))) {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
- } else
- cond_resched();
- }
-
- return 0;
-}
-
static void flush_bios(struct bio *bio)
{
struct bio *n;
@@ -118,36 +85,61 @@ static void flush_bios(struct bio *bio)
}
}
-static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all)
+static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
{
struct dm_delay_info *delayed, *next;
+ struct bio_list flush_bio_list;
unsigned long next_expires = 0;
- unsigned long start_timer = 0;
- struct bio_list flush_bios = { };
+ bool start_timer = false;
+ bio_list_init(&flush_bio_list);
mutex_lock(&delayed_bios_lock);
list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
+ cond_resched();
if (flush_all || time_after_eq(jiffies, delayed->expires)) {
struct bio *bio = dm_bio_from_per_bio_data(delayed,
sizeof(struct dm_delay_info));
list_del(&delayed->list);
- bio_list_add(&flush_bios, bio);
+ bio_list_add(&flush_bio_list, bio);
delayed->class->ops--;
continue;
}
- if (!start_timer) {
- start_timer = 1;
- next_expires = delayed->expires;
- } else
- next_expires = min(next_expires, delayed->expires);
+ if (!delay_is_fast(dc)) {
+ if (!start_timer) {
+ start_timer = true;
+ next_expires = delayed->expires;
+ } else {
+ next_expires = min(next_expires, delayed->expires);
+ }
+ }
}
mutex_unlock(&delayed_bios_lock);
if (start_timer)
queue_timeout(dc, next_expires);
- return bio_list_get(&flush_bios);
+ flush_bios(bio_list_get(&flush_bio_list));
+}
+
+static int flush_worker_fn(void *data)
+{
+ struct delay_c *dc = data;
+
+ while (!kthread_should_stop()) {
+ flush_delayed_bios(dc, false);
+ mutex_lock(&delayed_bios_lock);
+ if (unlikely(list_empty(&dc->delayed_bios))) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ mutex_unlock(&delayed_bios_lock);
+ schedule();
+ } else {
+ mutex_unlock(&delayed_bios_lock);
+ cond_resched();
+ }
+ }
+
+ return 0;
}
static void flush_expired_bios(struct work_struct *work)
@@ -155,10 +147,7 @@ static void flush_expired_bios(struct work_struct *work)
struct delay_c *dc;
dc = container_of(work, struct delay_c, flush_expired_bios);
- if (delay_is_fast(dc))
- flush_delayed_bios_fast(dc, false);
- else
- flush_bios(flush_delayed_bios(dc, false));
+ flush_delayed_bios(dc, false);
}
static void delay_dtr(struct dm_target *ti)
@@ -177,8 +166,7 @@ static void delay_dtr(struct dm_target *ti)
if (dc->worker)
kthread_stop(dc->worker);
- if (!delay_is_fast(dc))
- mutex_destroy(&dc->timer_lock);
+ mutex_destroy(&dc->timer_lock);
kfree(dc);
}
@@ -236,7 +224,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->private = dc;
INIT_LIST_HEAD(&dc->delayed_bios);
- atomic_set(&dc->may_delay, 1);
+ mutex_init(&dc->timer_lock);
+ dc->may_delay = true;
dc->argc = argc;
ret = delay_class_ctr(ti, &dc->read, argv);
@@ -282,12 +271,12 @@ out:
"dm-delay-flush-worker");
if (IS_ERR(dc->worker)) {
ret = PTR_ERR(dc->worker);
+ dc->worker = NULL;
goto bad;
}
} else {
timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
- mutex_init(&dc->timer_lock);
dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
if (!dc->kdelayd_wq) {
ret = -EINVAL;
@@ -312,7 +301,7 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
struct dm_delay_info *delayed;
unsigned long expires = 0;
- if (!c->delay || !atomic_read(&dc->may_delay))
+ if (!c->delay)
return DM_MAPIO_REMAPPED;
delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
@@ -321,6 +310,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);
mutex_lock(&delayed_bios_lock);
+ if (unlikely(!dc->may_delay)) {
+ mutex_unlock(&delayed_bios_lock);
+ return DM_MAPIO_REMAPPED;
+ }
c->ops++;
list_add_tail(&delayed->list, &dc->delayed_bios);
mutex_unlock(&delayed_bios_lock);
@@ -337,21 +330,20 @@ static void delay_presuspend(struct dm_target *ti)
{
struct delay_c *dc = ti->private;
- atomic_set(&dc->may_delay, 0);
+ mutex_lock(&delayed_bios_lock);
+ dc->may_delay = false;
+ mutex_unlock(&delayed_bios_lock);
- if (delay_is_fast(dc))
- flush_delayed_bios_fast(dc, true);
- else {
+ if (!delay_is_fast(dc))
del_timer_sync(&dc->delay_timer);
- flush_bios(flush_delayed_bios(dc, true));
- }
+ flush_delayed_bios(dc, true);
}
static void delay_resume(struct dm_target *ti)
{
struct delay_c *dc = ti->private;
- atomic_set(&dc->may_delay, 1);
+ dc->may_delay = true;
}
static int delay_map(struct dm_target *ti, struct bio *bio)
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index 3ef9f018da60..2099c755119e 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -185,7 +185,7 @@ static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io,
{
if (unlikely(verity_hash(v, verity_io_hash_req(v, io),
data, 1 << v->data_dev_block_bits,
- verity_io_real_digest(v, io))))
+ verity_io_real_digest(v, io), true)))
return 0;
return memcmp(verity_io_real_digest(v, io), want_digest,
@@ -386,7 +386,7 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
/* Always re-validate the corrected block against the expected hash */
r = verity_hash(v, verity_io_hash_req(v, io), fio->output,
1 << v->data_dev_block_bits,
- verity_io_real_digest(v, io));
+ verity_io_real_digest(v, io), true);
if (unlikely(r < 0))
return r;
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 26adcfea0302..e115fcfe723c 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -135,20 +135,21 @@ static int verity_hash_update(struct dm_verity *v, struct ahash_request *req,
* Wrapper for crypto_ahash_init, which handles verity salting.
*/
static int verity_hash_init(struct dm_verity *v, struct ahash_request *req,
- struct crypto_wait *wait)
+ struct crypto_wait *wait, bool may_sleep)
{
int r;
ahash_request_set_tfm(req, v->tfm);
- ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
- CRYPTO_TFM_REQ_MAY_BACKLOG,
- crypto_req_done, (void *)wait);
+ ahash_request_set_callback(req,
+ may_sleep ? CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG : 0,
+ crypto_req_done, (void *)wait);
crypto_init_wait(wait);
r = crypto_wait_req(crypto_ahash_init(req), wait);
if (unlikely(r < 0)) {
- DMERR("crypto_ahash_init failed: %d", r);
+ if (r != -ENOMEM)
+ DMERR("crypto_ahash_init failed: %d", r);
return r;
}
@@ -179,12 +180,12 @@ out:
}
int verity_hash(struct dm_verity *v, struct ahash_request *req,
- const u8 *data, size_t len, u8 *digest)
+ const u8 *data, size_t len, u8 *digest, bool may_sleep)
{
int r;
struct crypto_wait wait;
- r = verity_hash_init(v, req, &wait);
+ r = verity_hash_init(v, req, &wait, may_sleep);
if (unlikely(r < 0))
goto out;
@@ -322,7 +323,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
r = verity_hash(v, verity_io_hash_req(v, io),
data, 1 << v->hash_dev_block_bits,
- verity_io_real_digest(v, io));
+ verity_io_real_digest(v, io), !io->in_tasklet);
if (unlikely(r < 0))
goto release_ret_r;
@@ -556,7 +557,7 @@ static int verity_verify_io(struct dm_verity_io *io)
continue;
}
- r = verity_hash_init(v, req, &wait);
+ r = verity_hash_init(v, req, &wait, !io->in_tasklet);
if (unlikely(r < 0))
return r;
@@ -652,7 +653,7 @@ static void verity_tasklet(unsigned long data)
io->in_tasklet = true;
err = verity_verify_io(io);
- if (err == -EAGAIN) {
+ if (err == -EAGAIN || err == -ENOMEM) {
/* fallback to retrying with work-queue */
INIT_WORK(&io->work, verity_work);
queue_work(io->v->verify_wq, &io->work);
@@ -1033,7 +1034,7 @@ static int verity_alloc_zero_digest(struct dm_verity *v)
goto out;
r = verity_hash(v, req, zero_data, 1 << v->data_dev_block_bits,
- v->zero_digest);
+ v->zero_digest, true);
out:
kfree(req);
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index 2f555b420367..f96f4e281ee4 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h
@@ -128,7 +128,7 @@ extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
u8 *data, size_t len));
extern int verity_hash(struct dm_verity *v, struct ahash_request *req,
- const u8 *data, size_t len, u8 *digest);
+ const u8 *data, size_t len, u8 *digest, bool may_sleep);
extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
sector_t block, u8 *digest, bool *is_zero);
diff --git a/drivers/parisc/power.c b/drivers/parisc/power.c
index 539d8920c202..bb0d92461b08 100644
--- a/drivers/parisc/power.c
+++ b/drivers/parisc/power.c
@@ -176,7 +176,7 @@ static struct notifier_block parisc_panic_block = {
static int qemu_power_off(struct sys_off_data *data)
{
/* this turns the system off via SeaBIOS */
- *(int *)data->cb_data = 0;
+ gsc_writel(0, (unsigned long) data->cb_data);
pdc_soft_power_button(1);
return NOTIFY_DONE;
}
diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c
index cd6ac04c1468..c3104714b480 100644
--- a/drivers/platform/x86/amd/pmc/pmc.c
+++ b/drivers/platform/x86/amd/pmc/pmc.c
@@ -964,33 +964,6 @@ static const struct pci_device_id pmc_pci_ids[] = {
{ }
};
-static int amd_pmc_get_dram_size(struct amd_pmc_dev *dev)
-{
- int ret;
-
- switch (dev->cpu_id) {
- case AMD_CPU_ID_YC:
- if (!(dev->major > 90 || (dev->major == 90 && dev->minor > 39))) {
- ret = -EINVAL;
- goto err_dram_size;
- }
- break;
- default:
- ret = -EINVAL;
- goto err_dram_size;
- }
-
- ret = amd_pmc_send_cmd(dev, S2D_DRAM_SIZE, &dev->dram_size, dev->s2d_msg_id, true);
- if (ret || !dev->dram_size)
- goto err_dram_size;
-
- return 0;
-
-err_dram_size:
- dev_err(dev->dev, "DRAM size command not supported for this platform\n");
- return ret;
-}
-
static int amd_pmc_s2d_init(struct amd_pmc_dev *dev)
{
u32 phys_addr_low, phys_addr_hi;
@@ -1009,8 +982,8 @@ static int amd_pmc_s2d_init(struct amd_pmc_dev *dev)
return -EIO;
/* Get DRAM size */
- ret = amd_pmc_get_dram_size(dev);
- if (ret)
+ ret = amd_pmc_send_cmd(dev, S2D_DRAM_SIZE, &dev->dram_size, dev->s2d_msg_id, true);
+ if (ret || !dev->dram_size)
dev->dram_size = S2D_TELEMETRY_DRAMBYTES_MAX;
/* Get STB DRAM address */
diff --git a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c
index 5798b49ddaba..8c9f4f3227fc 100644
--- a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c
+++ b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c
@@ -588,17 +588,14 @@ static void release_attributes_data(void)
static int hp_add_other_attributes(int attr_type)
{
struct kobject *attr_name_kobj;
- union acpi_object *obj = NULL;
int ret;
char *attr_name;
- mutex_lock(&bioscfg_drv.mutex);
-
attr_name_kobj = kzalloc(sizeof(*attr_name_kobj), GFP_KERNEL);
- if (!attr_name_kobj) {
- ret = -ENOMEM;
- goto err_other_attr_init;
- }
+ if (!attr_name_kobj)
+ return -ENOMEM;
+
+ mutex_lock(&bioscfg_drv.mutex);
/* Check if attribute type is supported */
switch (attr_type) {
@@ -615,14 +612,14 @@ static int hp_add_other_attributes(int attr_type)
default:
pr_err("Error: Unknown attr_type: %d\n", attr_type);
ret = -EINVAL;
- goto err_other_attr_init;
+ kfree(attr_name_kobj);
+ goto unlock_drv_mutex;
}
ret = kobject_init_and_add(attr_name_kobj, &attr_name_ktype,
NULL, "%s", attr_name);
if (ret) {
pr_err("Error encountered [%d]\n", ret);
- kobject_put(attr_name_kobj);
goto err_other_attr_init;
}
@@ -630,27 +627,26 @@ static int hp_add_other_attributes(int attr_type)
switch (attr_type) {
case HPWMI_SECURE_PLATFORM_TYPE:
ret = hp_populate_secure_platform_data(attr_name_kobj);
- if (ret)
- goto err_other_attr_init;
break;
case HPWMI_SURE_START_TYPE:
ret = hp_populate_sure_start_data(attr_name_kobj);
- if (ret)
- goto err_other_attr_init;
break;
default:
ret = -EINVAL;
- goto err_other_attr_init;
}
+ if (ret)
+ goto err_other_attr_init;
+
mutex_unlock(&bioscfg_drv.mutex);
return 0;
err_other_attr_init:
+ kobject_put(attr_name_kobj);
+unlock_drv_mutex:
mutex_unlock(&bioscfg_drv.mutex);
- kfree(obj);
return ret;
}
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index ac037540acfc..88eefccb6ed2 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -1425,18 +1425,17 @@ static int ideapad_kbd_bl_init(struct ideapad_private *priv)
if (WARN_ON(priv->kbd_bl.initialized))
return -EEXIST;
- brightness = ideapad_kbd_bl_brightness_get(priv);
- if (brightness < 0)
- return brightness;
-
- priv->kbd_bl.last_brightness = brightness;
-
if (ideapad_kbd_bl_check_tristate(priv->kbd_bl.type)) {
priv->kbd_bl.led.max_brightness = 2;
} else {
priv->kbd_bl.led.max_brightness = 1;
}
+ brightness = ideapad_kbd_bl_brightness_get(priv);
+ if (brightness < 0)
+ return brightness;
+
+ priv->kbd_bl.last_brightness = brightness;
priv->kbd_bl.led.name = "platform::" LED_FUNCTION_KBD_BACKLIGHT;
priv->kbd_bl.led.brightness_get = ideapad_kbd_bl_led_cdev_brightness_get;
priv->kbd_bl.led.brightness_set_blocking = ideapad_kbd_bl_led_cdev_brightness_set;
diff --git a/drivers/platform/x86/intel/telemetry/core.c b/drivers/platform/x86/intel/telemetry/core.c
index fdf55b5d6948..e4be40f73eeb 100644
--- a/drivers/platform/x86/intel/telemetry/core.c
+++ b/drivers/platform/x86/intel/telemetry/core.c
@@ -102,7 +102,7 @@ static const struct telemetry_core_ops telm_defpltops = {
/**
* telemetry_update_events() - Update telemetry Configuration
* @pss_evtconfig: PSS related config. No change if num_evts = 0.
- * @pss_evtconfig: IOSS related config. No change if num_evts = 0.
+ * @ioss_evtconfig: IOSS related config. No change if num_evts = 0.
*
* This API updates the IOSS & PSS Telemetry configuration. Old config
* is overwritten. Call telemetry_reset_events when logging is over
@@ -176,7 +176,7 @@ EXPORT_SYMBOL_GPL(telemetry_reset_events);
/**
* telemetry_get_eventconfig() - Returns the pss and ioss events enabled
* @pss_evtconfig: Pointer to PSS related configuration.
- * @pss_evtconfig: Pointer to IOSS related configuration.
+ * @ioss_evtconfig: Pointer to IOSS related configuration.
* @pss_len: Number of u32 elements allocated for pss_evtconfig array
* @ioss_len: Number of u32 elements allocated for ioss_evtconfig array
*
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 32d1e73e46ee..03348f605c2e 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1837,8 +1837,16 @@ static void qla2x00_abort_srb(struct qla_qpair *qp, srb_t *sp, const int res,
}
spin_lock_irqsave(qp->qp_lock_ptr, *flags);
- if (ret_cmd && blk_mq_request_started(scsi_cmd_to_rq(cmd)))
- sp->done(sp, res);
+ switch (sp->type) {
+ case SRB_SCSI_CMD:
+ if (ret_cmd && blk_mq_request_started(scsi_cmd_to_rq(cmd)))
+ sp->done(sp, res);
+ break;
+ default:
+ if (ret_cmd)
+ sp->done(sp, res);
+ break;
+ }
} else {
sp->done(sp, res);
}
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 67922e2c4c19..6d8218a44122 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -1019,7 +1019,7 @@ static ssize_t sdebug_error_write(struct file *file, const char __user *ubuf,
struct sdebug_err_inject *inject;
struct scsi_device *sdev = (struct scsi_device *)file->f_inode->i_private;
- buf = kmalloc(count, GFP_KERNEL);
+ buf = kzalloc(count + 1, GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -1132,7 +1132,6 @@ static const struct file_operations sdebug_target_reset_fail_fops = {
static int sdebug_target_alloc(struct scsi_target *starget)
{
struct sdebug_target_info *targetip;
- struct dentry *dentry;
targetip = kzalloc(sizeof(struct sdebug_target_info), GFP_KERNEL);
if (!targetip)
@@ -1140,15 +1139,9 @@ static int sdebug_target_alloc(struct scsi_target *starget)
targetip->debugfs_entry = debugfs_create_dir(dev_name(&starget->dev),
sdebug_debugfs_root);
- if (IS_ERR_OR_NULL(targetip->debugfs_entry))
- pr_info("%s: failed to create debugfs directory for target %s\n",
- __func__, dev_name(&starget->dev));
debugfs_create_file("fail_reset", 0600, targetip->debugfs_entry, starget,
&sdebug_target_reset_fail_fops);
- if (IS_ERR_OR_NULL(dentry))
- pr_info("%s: failed to create fail_reset file for target %s\n",
- __func__, dev_name(&starget->dev));
starget->hostdata = targetip;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 530918cbfce2..fa00dd503cbf 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1643,24 +1643,21 @@ out:
return disk_changed ? DISK_EVENT_MEDIA_CHANGE : 0;
}
-static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr)
+static int sd_sync_cache(struct scsi_disk *sdkp)
{
int retries, res;
struct scsi_device *sdp = sdkp->device;
const int timeout = sdp->request_queue->rq_timeout
* SD_FLUSH_TIMEOUT_MULTIPLIER;
- struct scsi_sense_hdr my_sshdr;
+ struct scsi_sense_hdr sshdr;
const struct scsi_exec_args exec_args = {
.req_flags = BLK_MQ_REQ_PM,
- /* caller might not be interested in sense, but we need it */
- .sshdr = sshdr ? : &my_sshdr,
+ .sshdr = &sshdr,
};
if (!scsi_device_online(sdp))
return -ENODEV;
- sshdr = exec_args.sshdr;
-
for (retries = 3; retries > 0; --retries) {
unsigned char cmd[16] = { 0 };
@@ -1685,15 +1682,23 @@ static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr)
return res;
if (scsi_status_is_check_condition(res) &&
- scsi_sense_valid(sshdr)) {
- sd_print_sense_hdr(sdkp, sshdr);
+ scsi_sense_valid(&sshdr)) {
+ sd_print_sense_hdr(sdkp, &sshdr);
/* we need to evaluate the error return */
- if (sshdr->asc == 0x3a || /* medium not present */
- sshdr->asc == 0x20 || /* invalid command */
- (sshdr->asc == 0x74 && sshdr->ascq == 0x71)) /* drive is password locked */
+ if (sshdr.asc == 0x3a || /* medium not present */
+ sshdr.asc == 0x20 || /* invalid command */
+ (sshdr.asc == 0x74 && sshdr.ascq == 0x71)) /* drive is password locked */
/* this is no error here */
return 0;
+ /*
+ * This drive doesn't support sync and there's not much
+ * we can do because this is called during shutdown
+ * or suspend so just return success so those operations
+ * can proceed.
+ */
+ if (sshdr.sense_key == ILLEGAL_REQUEST)
+ return 0;
}
switch (host_byte(res)) {
@@ -3853,7 +3858,7 @@ static void sd_shutdown(struct device *dev)
if (sdkp->WCE && sdkp->media_present) {
sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
- sd_sync_cache(sdkp, NULL);
+ sd_sync_cache(sdkp);
}
if ((system_state != SYSTEM_RESTART &&
@@ -3874,7 +3879,6 @@ static inline bool sd_do_start_stop(struct scsi_device *sdev, bool runtime)
static int sd_suspend_common(struct device *dev, bool runtime)
{
struct scsi_disk *sdkp = dev_get_drvdata(dev);
- struct scsi_sense_hdr sshdr;
int ret = 0;
if (!sdkp) /* E.g.: runtime suspend following sd_remove() */
@@ -3883,24 +3887,13 @@ static int sd_suspend_common(struct device *dev, bool runtime)
if (sdkp->WCE && sdkp->media_present) {
if (!sdkp->device->silence_suspend)
sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
- ret = sd_sync_cache(sdkp, &sshdr);
-
- if (ret) {
- /* ignore OFFLINE device */
- if (ret == -ENODEV)
- return 0;
-
- if (!scsi_sense_valid(&sshdr) ||
- sshdr.sense_key != ILLEGAL_REQUEST)
- return ret;
+ ret = sd_sync_cache(sdkp);
+ /* ignore OFFLINE device */
+ if (ret == -ENODEV)
+ return 0;
- /*
- * sshdr.sense_key == ILLEGAL_REQUEST means this drive
- * doesn't support sync. There's not much to do and
- * suspend shouldn't fail.
- */
- ret = 0;
- }
+ if (ret)
+ return ret;
}
if (sd_do_start_stop(sdkp->device, runtime)) {
diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
index 2ba8ec254dce..0787456c2b89 100644
--- a/drivers/ufs/core/ufs-mcq.c
+++ b/drivers/ufs/core/ufs-mcq.c
@@ -436,7 +436,7 @@ int ufshcd_mcq_init(struct ufs_hba *hba)
for (i = 0; i < hba->nr_hw_queues; i++) {
hwq = &hba->uhq[i];
- hwq->max_entries = hba->nutrs;
+ hwq->max_entries = hba->nutrs + 1;
spin_lock_init(&hwq->sq_lock);
spin_lock_init(&hwq->cq_lock);
mutex_init(&hwq->sq_mutex);
@@ -630,6 +630,7 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
int tag = scsi_cmd_to_rq(cmd)->tag;
struct ufshcd_lrb *lrbp = &hba->lrb[tag];
struct ufs_hw_queue *hwq;
+ unsigned long flags;
int err = FAILED;
if (!ufshcd_cmd_inflight(lrbp->cmd)) {
@@ -670,8 +671,10 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
}
err = SUCCESS;
+ spin_lock_irqsave(&hwq->cq_lock, flags);
if (ufshcd_cmd_inflight(lrbp->cmd))
ufshcd_release_scsi_cmd(hba, lrbp);
+ spin_unlock_irqrestore(&hwq->cq_lock, flags);
out:
return err;
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index ef02c9bb0354..23c0834a97a4 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -313,17 +313,17 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
bp.level - 1,
0);
b = bch2_btree_iter_peek_node(iter);
- if (IS_ERR(b))
+ if (IS_ERR_OR_NULL(b))
goto err;
BUG_ON(b->c.level != bp.level - 1);
- if (b && extent_matches_bp(c, bp.btree_id, bp.level,
- bkey_i_to_s_c(&b->key),
- bucket, bp))
+ if (extent_matches_bp(c, bp.btree_id, bp.level,
+ bkey_i_to_s_c(&b->key),
+ bucket, bp))
return b;
- if (b && btree_node_will_make_reachable(b)) {
+ if (btree_node_will_make_reachable(b)) {
b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
} else {
backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key));
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 9cb8684959ee..403aa3389fcc 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -617,7 +617,7 @@ struct journal_seq_blacklist_table {
u64 start;
u64 end;
bool dirty;
- } entries[0];
+ } entries[];
};
struct journal_keys {
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index c2adf3fbb0b3..6fa90bcd7016 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -3087,8 +3087,6 @@ void bch2_trans_put(struct btree_trans *trans)
srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
}
- bch2_journal_preres_put(&c->journal, &trans->journal_preres);
-
kfree(trans->extra_journal_entries.data);
if (trans->fs_usage_deltas) {
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index 9b78f78a75b5..37fbf22de8fc 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -89,10 +89,13 @@ static void bkey_cached_free(struct btree_key_cache *bc,
ck->btree_trans_barrier_seq =
start_poll_synchronize_srcu(&c->btree_trans_barrier);
- if (ck->c.lock.readers)
+ if (ck->c.lock.readers) {
list_move_tail(&ck->list, &bc->freed_pcpu);
- else
+ bc->nr_freed_pcpu++;
+ } else {
list_move_tail(&ck->list, &bc->freed_nonpcpu);
+ bc->nr_freed_nonpcpu++;
+ }
atomic_long_inc(&bc->nr_freed);
kfree(ck->k);
@@ -109,6 +112,8 @@ static void __bkey_cached_move_to_freelist_ordered(struct btree_key_cache *bc,
{
struct bkey_cached *pos;
+ bc->nr_freed_nonpcpu++;
+
list_for_each_entry_reverse(pos, &bc->freed_nonpcpu, list) {
if (ULONG_CMP_GE(ck->btree_trans_barrier_seq,
pos->btree_trans_barrier_seq)) {
@@ -158,6 +163,7 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
#else
mutex_lock(&bc->lock);
list_move_tail(&ck->list, &bc->freed_nonpcpu);
+ bc->nr_freed_nonpcpu++;
mutex_unlock(&bc->lock);
#endif
} else {
@@ -217,6 +223,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
f->nr < ARRAY_SIZE(f->objs) / 2) {
ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
list_del_init(&ck->list);
+ bc->nr_freed_nonpcpu--;
f->objs[f->nr++] = ck;
}
@@ -229,6 +236,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
if (!list_empty(&bc->freed_nonpcpu)) {
ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
list_del_init(&ck->list);
+ bc->nr_freed_nonpcpu--;
}
mutex_unlock(&bc->lock);
#endif
@@ -664,7 +672,6 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
goto out;
bch2_journal_pin_drop(j, &ck->journal);
- bch2_journal_preres_put(j, &ck->res);
BUG_ON(!btree_node_locked(c_iter.path, 0));
@@ -762,18 +769,6 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
BUG_ON(insert->k.u64s > ck->u64s);
- if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) {
- int difference;
-
- BUG_ON(jset_u64s(insert->k.u64s) > trans->journal_preres.u64s);
-
- difference = jset_u64s(insert->k.u64s) - ck->res.u64s;
- if (difference > 0) {
- trans->journal_preres.u64s -= difference;
- ck->res.u64s += difference;
- }
- }
-
bkey_copy(ck->k, insert);
ck->valid = true;
@@ -850,6 +845,8 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
* Newest freed entries are at the end of the list - once we hit one
* that's too new to be freed, we can bail out:
*/
+ scanned += bc->nr_freed_nonpcpu;
+
list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) {
if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
ck->btree_trans_barrier_seq))
@@ -859,13 +856,15 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
six_lock_exit(&ck->c.lock);
kmem_cache_free(bch2_key_cache, ck);
atomic_long_dec(&bc->nr_freed);
- scanned++;
freed++;
+ bc->nr_freed_nonpcpu--;
}
if (scanned >= nr)
goto out;
+ scanned += bc->nr_freed_pcpu;
+
list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) {
if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
ck->btree_trans_barrier_seq))
@@ -875,8 +874,8 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
six_lock_exit(&ck->c.lock);
kmem_cache_free(bch2_key_cache, ck);
atomic_long_dec(&bc->nr_freed);
- scanned++;
freed++;
+ bc->nr_freed_pcpu--;
}
if (scanned >= nr)
@@ -982,6 +981,9 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
}
#endif
+ BUG_ON(list_count_nodes(&bc->freed_pcpu) != bc->nr_freed_pcpu);
+ BUG_ON(list_count_nodes(&bc->freed_nonpcpu) != bc->nr_freed_nonpcpu);
+
list_splice(&bc->freed_pcpu, &items);
list_splice(&bc->freed_nonpcpu, &items);
@@ -991,7 +993,6 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
cond_resched();
bch2_journal_pin_drop(&c->journal, &ck->journal);
- bch2_journal_preres_put(&c->journal, &ck->res);
list_del(&ck->list);
kfree(ck->k);
diff --git a/fs/bcachefs/btree_key_cache_types.h b/fs/bcachefs/btree_key_cache_types.h
new file mode 100644
index 000000000000..290e4e57df5b
--- /dev/null
+++ b/fs/bcachefs/btree_key_cache_types.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_KEY_CACHE_TYPES_H
+#define _BCACHEFS_BTREE_KEY_CACHE_TYPES_H
+
+struct btree_key_cache_freelist {
+ struct bkey_cached *objs[16];
+ unsigned nr;
+};
+
+struct btree_key_cache {
+ struct mutex lock;
+ struct rhashtable table;
+ bool table_init_done;
+
+ struct list_head freed_pcpu;
+ size_t nr_freed_pcpu;
+ struct list_head freed_nonpcpu;
+ size_t nr_freed_nonpcpu;
+
+ struct shrinker *shrink;
+ unsigned shrink_iter;
+ struct btree_key_cache_freelist __percpu *pcpu_freed;
+
+ atomic_long_t nr_freed;
+ atomic_long_t nr_keys;
+ atomic_long_t nr_dirty;
+};
+
+struct bkey_cached_key {
+ u32 btree_id;
+ struct bpos pos;
+} __packed __aligned(4);
+
+#endif /* _BCACHEFS_BTREE_KEY_CACHE_TYPES_H */
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index decad7b66c59..12907beda98c 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -78,6 +78,53 @@ inline void bch2_btree_node_prep_for_write(struct btree_trans *trans,
bch2_btree_init_next(trans, b);
}
+static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btree_insert_entry *i)
+{
+ while (--i >= trans->updates) {
+ if (same_leaf_as_prev(trans, i))
+ continue;
+
+ bch2_btree_node_unlock_write(trans, i->path, insert_l(i)->b);
+ }
+
+ trace_and_count(trans->c, trans_restart_would_deadlock_write, trans);
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write);
+}
+
+static inline int bch2_trans_lock_write(struct btree_trans *trans)
+{
+ struct btree_insert_entry *i;
+
+ EBUG_ON(trans->write_locked);
+
+ trans_for_each_update(trans, i) {
+ if (same_leaf_as_prev(trans, i))
+ continue;
+
+ if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c))
+ return trans_lock_write_fail(trans, i);
+
+ if (!i->cached)
+ bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
+ }
+
+ trans->write_locked = true;
+ return 0;
+}
+
+static inline void bch2_trans_unlock_write(struct btree_trans *trans)
+{
+ if (likely(trans->write_locked)) {
+ struct btree_insert_entry *i;
+
+ trans_for_each_update(trans, i)
+ if (!same_leaf_as_prev(trans, i))
+ bch2_btree_node_unlock_write_inlined(trans, i->path,
+ insert_l(i)->b);
+ trans->write_locked = false;
+ }
+}
+
/* Inserting into a given leaf node (last stage of insert): */
/* Handle overwrites and do insert, for non extents: */
@@ -276,17 +323,6 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot));
}
-static noinline int
-bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned flags,
- unsigned long trace_ip)
-{
- return drop_locks_do(trans,
- bch2_journal_preres_get(&trans->c->journal,
- &trans->journal_preres,
- trans->journal_preres_u64s,
- (flags & BCH_WATERMARK_MASK)));
-}
-
static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
unsigned flags)
{
@@ -321,6 +357,45 @@ static inline int btree_key_can_insert(struct btree_trans *trans,
return 0;
}
+noinline static int
+btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags,
+ struct btree_path *path, unsigned new_u64s)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_insert_entry *i;
+ struct bkey_cached *ck = (void *) path->l[0].b;
+ struct bkey_i *new_k;
+ int ret;
+
+ bch2_trans_unlock_write(trans);
+ bch2_trans_unlock(trans);
+
+ new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL);
+ if (!new_k) {
+ bch_err(c, "error allocating memory for key cache key, btree %s u64s %u",
+ bch2_btree_id_str(path->btree_id), new_u64s);
+ return -BCH_ERR_ENOMEM_btree_key_cache_insert;
+ }
+
+ ret = bch2_trans_relock(trans) ?:
+ bch2_trans_lock_write(trans);
+ if (unlikely(ret)) {
+ kfree(new_k);
+ return ret;
+ }
+
+ memcpy(new_k, ck->k, ck->u64s * sizeof(u64));
+
+ trans_for_each_update(trans, i)
+ if (i->old_v == &ck->k->v)
+ i->old_v = &new_k->v;
+
+ kfree(ck->k);
+ ck->u64s = new_u64s;
+ ck->k = new_k;
+ return 0;
+}
+
static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags,
struct btree_path *path, unsigned u64s)
{
@@ -347,12 +422,9 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags
return 0;
new_u64s = roundup_pow_of_two(u64s);
- new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOFS);
- if (!new_k) {
- bch_err(c, "error allocating memory for key cache key, btree %s u64s %u",
- bch2_btree_id_str(path->btree_id), new_u64s);
- return -BCH_ERR_ENOMEM_btree_key_cache_insert;
- }
+ new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOWAIT);
+ if (unlikely(!new_k))
+ return btree_key_can_insert_cached_slowpath(trans, flags, path, new_u64s);
trans_for_each_update(trans, i)
if (i->old_v == &ck->k->v)
@@ -732,37 +804,6 @@ revert_fs_usage:
return ret;
}
-static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btree_insert_entry *i)
-{
- while (--i >= trans->updates) {
- if (same_leaf_as_prev(trans, i))
- continue;
-
- bch2_btree_node_unlock_write(trans, i->path, insert_l(i)->b);
- }
-
- trace_and_count(trans->c, trans_restart_would_deadlock_write, trans);
- return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write);
-}
-
-static inline int trans_lock_write(struct btree_trans *trans)
-{
- struct btree_insert_entry *i;
-
- trans_for_each_update(trans, i) {
- if (same_leaf_as_prev(trans, i))
- continue;
-
- if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c))
- return trans_lock_write_fail(trans, i);
-
- if (!i->cached)
- bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
- }
-
- return 0;
-}
-
static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans)
{
struct btree_insert_entry *i;
@@ -830,15 +871,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
}
}
- ret = bch2_journal_preres_get(&c->journal,
- &trans->journal_preres, trans->journal_preres_u64s,
- (flags & BCH_WATERMARK_MASK)|JOURNAL_RES_GET_NONBLOCK);
- if (unlikely(ret == -BCH_ERR_journal_preres_get_blocked))
- ret = bch2_trans_journal_preres_get_cold(trans, flags, trace_ip);
- if (unlikely(ret))
- return ret;
-
- ret = trans_lock_write(trans);
+ ret = bch2_trans_lock_write(trans);
if (unlikely(ret))
return ret;
@@ -847,10 +880,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
if (!ret && unlikely(trans->journal_replay_not_finished))
bch2_drop_overwrites_from_journal(trans);
- trans_for_each_update(trans, i)
- if (!same_leaf_as_prev(trans, i))
- bch2_btree_node_unlock_write_inlined(trans, i->path,
- insert_l(i)->b);
+ bch2_trans_unlock_write(trans);
if (!ret && trans->journal_pin)
bch2_journal_pin_add(&c->journal, trans->journal_res.seq,
@@ -1003,7 +1033,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
struct bch_fs *c = trans->c;
struct btree_insert_entry *i = NULL;
struct btree_write_buffered_key *wb;
- unsigned u64s;
int ret = 0;
if (!trans->nr_updates &&
@@ -1063,13 +1092,8 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
- memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
-
trans->journal_u64s = trans->extra_journal_entries.nr;
- trans->journal_preres_u64s = 0;
-
trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names);
-
if (trans->journal_transaction_names)
trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s);
@@ -1085,16 +1109,11 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
if (i->key_cache_already_flushed)
continue;
- /* we're going to journal the key being updated: */
- u64s = jset_u64s(i->k->k.u64s);
- if (i->cached &&
- likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY)))
- trans->journal_preres_u64s += u64s;
-
if (i->flags & BTREE_UPDATE_NOJOURNAL)
continue;
- trans->journal_u64s += u64s;
+ /* we're going to journal the key being updated: */
+ trans->journal_u64s += jset_u64s(i->k->k.u64s);
/* and we're also going to log the overwrite: */
if (trans->journal_transaction_names)
@@ -1126,8 +1145,6 @@ retry:
trace_and_count(c, transaction_commit, trans, _RET_IP_);
out:
- bch2_journal_preres_put(&c->journal, &trans->journal_preres);
-
if (likely(!(flags & BTREE_INSERT_NOCHECK_RW)))
bch2_write_ref_put(c, BCH_WRITE_REF_trans);
out_reset:
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index 941841a0c5bf..60453ba86c4b 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -5,7 +5,7 @@
#include <linux/list.h>
#include <linux/rhashtable.h>
-//#include "bkey_methods.h"
+#include "btree_key_cache_types.h"
#include "buckets_types.h"
#include "darray.h"
#include "errcode.h"
@@ -312,31 +312,6 @@ struct btree_iter {
#endif
};
-struct btree_key_cache_freelist {
- struct bkey_cached *objs[16];
- unsigned nr;
-};
-
-struct btree_key_cache {
- struct mutex lock;
- struct rhashtable table;
- bool table_init_done;
- struct list_head freed_pcpu;
- struct list_head freed_nonpcpu;
- struct shrinker *shrink;
- unsigned shrink_iter;
- struct btree_key_cache_freelist __percpu *pcpu_freed;
-
- atomic_long_t nr_freed;
- atomic_long_t nr_keys;
- atomic_long_t nr_dirty;
-};
-
-struct bkey_cached_key {
- u32 btree_id;
- struct bpos pos;
-} __packed __aligned(4);
-
#define BKEY_CACHED_ACCESSED 0
#define BKEY_CACHED_DIRTY 1
@@ -352,7 +327,6 @@ struct bkey_cached {
struct rhash_head hash;
struct list_head list;
- struct journal_preres res;
struct journal_entry_pin journal;
u64 seq;
@@ -389,11 +363,7 @@ struct btree_insert_entry {
unsigned long ip_allocated;
};
-#ifndef CONFIG_LOCKDEP
#define BTREE_ITER_MAX 64
-#else
-#define BTREE_ITER_MAX 32
-#endif
struct btree_trans_commit_hook;
typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *);
@@ -434,6 +404,7 @@ struct btree_trans {
bool journal_transaction_names:1;
bool journal_replay_not_finished:1;
bool notrace_relock_fail:1;
+ bool write_locked:1;
enum bch_errcode restarted:16;
u32 restart_count;
unsigned long last_begin_ip;
@@ -465,11 +436,9 @@ struct btree_trans {
struct journal_entry_pin *journal_pin;
struct journal_res journal_res;
- struct journal_preres journal_preres;
u64 *journal_seq;
struct disk_reservation *disk_res;
unsigned journal_u64s;
- unsigned journal_preres_u64s;
struct replicas_delta_list *fs_usage_deltas;
};
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 39c2db68123b..76f27bc9fa24 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -513,8 +513,6 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *
up_read(&c->gc_lock);
as->took_gc_lock = false;
- bch2_journal_preres_put(&c->journal, &as->journal_preres);
-
bch2_journal_pin_drop(&c->journal, &as->journal);
bch2_journal_pin_flush(&c->journal, &as->journal);
bch2_disk_reservation_put(c, &as->disk_res);
@@ -734,8 +732,6 @@ err:
bch2_journal_pin_drop(&c->journal, &as->journal);
- bch2_journal_preres_put(&c->journal, &as->journal_preres);
-
mutex_lock(&c->btree_interior_update_lock);
for (i = 0; i < as->nr_new_nodes; i++) {
b = as->new_nodes[i];
@@ -1047,7 +1043,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
unsigned nr_nodes[2] = { 0, 0 };
unsigned update_level = level;
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
- unsigned journal_flags = 0;
int ret = 0;
u32 restart_count = trans->restart_count;
@@ -1061,10 +1056,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
flags &= ~BCH_WATERMARK_MASK;
flags |= watermark;
- if (flags & BTREE_INSERT_JOURNAL_RECLAIM)
- journal_flags |= JOURNAL_RES_GET_NONBLOCK;
- journal_flags |= watermark;
-
while (1) {
nr_nodes[!!update_level] += 1 + split;
update_level++;
@@ -1129,27 +1120,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
if (ret)
goto err;
- ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
- BTREE_UPDATE_JOURNAL_RES,
- journal_flags|JOURNAL_RES_GET_NONBLOCK);
- if (ret) {
- if (flags & BTREE_INSERT_JOURNAL_RECLAIM) {
- ret = -BCH_ERR_journal_reclaim_would_deadlock;
- goto err;
- }
-
- ret = drop_locks_do(trans,
- bch2_journal_preres_get(&c->journal, &as->journal_preres,
- BTREE_UPDATE_JOURNAL_RES,
- journal_flags));
- if (ret == -BCH_ERR_journal_preres_get_blocked) {
- trace_and_count(c, trans_restart_journal_preres_get, trans, _RET_IP_, journal_flags);
- ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get);
- }
- if (ret)
- goto err;
- }
-
ret = bch2_disk_reservation_get(c, &as->disk_res,
(nr_nodes[0] + nr_nodes[1]) * btree_sectors(c),
c->opts.metadata_replicas,
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index 4df21512d640..031076e75fa1 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -55,7 +55,6 @@ struct btree_update {
unsigned update_level;
struct disk_reservation disk_res;
- struct journal_preres journal_preres;
/*
* BTREE_INTERIOR_UPDATING_NODE:
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 0771a6d880bf..5ed66202c226 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -239,6 +239,34 @@ restart_drop_extra_replicas:
next_pos = insert->k.p;
+ /*
+ * Check for nonce offset inconsistency:
+ * This is debug code - we've been seeing this bug rarely, and
+ * it's been hard to reproduce, so this should give us some more
+ * information when it does occur:
+ */
+ struct printbuf err = PRINTBUF;
+ int invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), 0, &err);
+ printbuf_exit(&err);
+
+ if (invalid) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "about to insert invalid key in data update path");
+ prt_str(&buf, "\nold: ");
+ bch2_bkey_val_to_text(&buf, c, old);
+ prt_str(&buf, "\nk: ");
+ bch2_bkey_val_to_text(&buf, c, k);
+ prt_str(&buf, "\nnew: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
+
+ bch2_print_string_as_lines(KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+
+ bch2_fatal_error(c);
+ goto out;
+ }
+
ret = bch2_insert_snapshot_whiteouts(trans, m->btree_id,
k.k->p, bkey_start_pos(&insert->k)) ?:
bch2_insert_snapshot_whiteouts(trans, m->btree_id,
diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c
index d613695abf9f..4d0cb0ccff32 100644
--- a/fs/bcachefs/disk_groups.c
+++ b/fs/bcachefs/disk_groups.c
@@ -555,6 +555,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
case TARGET_DEV: {
struct bch_dev *ca;
+ out->atomic++;
rcu_read_lock();
ca = t.dev < c->sb.nr_devices
? rcu_dereference(c->devs[t.dev])
@@ -570,6 +571,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
}
rcu_read_unlock();
+ out->atomic--;
break;
}
case TARGET_GROUP:
@@ -580,7 +582,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
}
}
-void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
+static void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
{
struct target t = target_decode(v);
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index 875f7c5a6fca..2a77de18c004 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -1373,6 +1373,15 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
h->nr_active_devs++;
rcu_read_unlock();
+
+ /*
+ * If we only have redundancy + 1 devices, we're better off with just
+ * replication:
+ */
+ if (h->nr_active_devs < h->redundancy + 2)
+ bch_err(c, "insufficient devices available to create stripe (have %u, need %u) - mismatched bucket sizes?",
+ h->nr_active_devs, h->redundancy + 2);
+
list_add(&h->list, &c->ec_stripe_head_list);
return h;
}
@@ -1424,6 +1433,11 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
h = ec_new_stripe_head_alloc(c, target, algo, redundancy, watermark);
found:
+ if (!IS_ERR_OR_NULL(h) &&
+ h->nr_active_devs < h->redundancy + 2) {
+ mutex_unlock(&h->lock);
+ h = NULL;
+ }
mutex_unlock(&c->ec_stripe_head_lock);
return h;
}
@@ -1681,8 +1695,6 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
int ret;
h = __bch2_ec_stripe_head_get(trans, target, algo, redundancy, watermark);
- if (!h)
- bch_err(c, "no stripe head");
if (IS_ERR_OR_NULL(h))
return h;
diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c
index 8bd9bcdd27f7..ff664fd0d8ef 100644
--- a/fs/bcachefs/fs-io-pagecache.c
+++ b/fs/bcachefs/fs-io-pagecache.c
@@ -13,7 +13,7 @@
int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
loff_t start, u64 end,
- int fgp_flags, gfp_t gfp,
+ fgf_t fgp_flags, gfp_t gfp,
folios *fs)
{
struct folio *f;
diff --git a/fs/bcachefs/fs-io-pagecache.h b/fs/bcachefs/fs-io-pagecache.h
index a2222ad586e9..27f712ae37a6 100644
--- a/fs/bcachefs/fs-io-pagecache.h
+++ b/fs/bcachefs/fs-io-pagecache.h
@@ -7,7 +7,7 @@
typedef DARRAY(struct folio *) folios;
int bch2_filemap_get_contig_folios_d(struct address_space *, loff_t,
- u64, int, gfp_t, folios *);
+ u64, fgf_t, gfp_t, folios *);
int bch2_write_invalidate_inode_pages_range(struct address_space *, loff_t, loff_t);
/*
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 166d8d8abe68..8ef817304e4a 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -1922,10 +1922,7 @@ out:
return dget(sb->s_root);
err_put_super:
- sb->s_fs_info = NULL;
- c->vfs_sb = NULL;
deactivate_locked_super(sb);
- bch2_fs_stop(c);
return ERR_PTR(bch2_err_class(ret));
}
@@ -1933,11 +1930,8 @@ static void bch2_kill_sb(struct super_block *sb)
{
struct bch_fs *c = sb->s_fs_info;
- if (c)
- c->vfs_sb = NULL;
generic_shutdown_super(sb);
- if (c)
- bch2_fs_free(c);
+ bch2_fs_free(c);
}
static struct file_system_type bcache_fs_type = {
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 9f3e9bd3d767..e0c5cd119acc 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -2220,7 +2220,7 @@ static int nlink_cmp(const void *_l, const void *_r)
const struct nlink *l = _l;
const struct nlink *r = _r;
- return cmp_int(l->inum, r->inum) ?: cmp_int(l->snapshot, r->snapshot);
+ return cmp_int(l->inum, r->inum);
}
static void inc_link(struct bch_fs *c, struct snapshots_seen *s,
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index def77f2d8802..c7849b0753e7 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -1134,7 +1134,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans,
* unlinked inodes in the snapshot leaves:
*/
*need_another_pass = true;
- return 0;
+ goto out;
}
ret = 1;
@@ -1169,8 +1169,10 @@ again:
*/
for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
- ret = lockrestart_do(trans, may_delete_deleted_inode(trans, &iter, k.k->p,
- &need_another_pass));
+ ret = commit_do(trans, NULL, NULL,
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_LAZY_RW,
+ may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass));
if (ret < 0)
break;
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index f02b3f7d26a0..d704a8f829c8 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -795,7 +795,7 @@ static int bch2_write_decrypt(struct bch_write_op *op)
* checksum:
*/
csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
- if (bch2_crc_cmp(op->crc.csum, csum))
+ if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io)
return -EIO;
ret = bch2_encrypt_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 5b5d69f2316b..23a9b7845d11 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -526,36 +526,6 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
return ret;
}
-/* journal_preres: */
-
-static bool journal_preres_available(struct journal *j,
- struct journal_preres *res,
- unsigned new_u64s,
- unsigned flags)
-{
- bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags, true);
-
- if (!ret && mutex_trylock(&j->reclaim_lock)) {
- bch2_journal_reclaim(j);
- mutex_unlock(&j->reclaim_lock);
- }
-
- return ret;
-}
-
-int __bch2_journal_preres_get(struct journal *j,
- struct journal_preres *res,
- unsigned new_u64s,
- unsigned flags)
-{
- int ret;
-
- closure_wait_event(&j->preres_wait,
- (ret = bch2_journal_error(j)) ||
- journal_preres_available(j, res, new_u64s, flags));
- return ret;
-}
-
/* journal_entry_res: */
void bch2_journal_entry_res_resize(struct journal *j,
@@ -1306,7 +1276,6 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
prt_printf(out, "last_seq:\t\t%llu\n", journal_last_seq(j));
prt_printf(out, "last_seq_ondisk:\t%llu\n", j->last_seq_ondisk);
prt_printf(out, "flushed_seq_ondisk:\t%llu\n", j->flushed_seq_ondisk);
- prt_printf(out, "prereserved:\t\t%u/%u\n", j->prereserved.reserved, j->prereserved.remaining);
prt_printf(out, "watermark:\t\t%s\n", bch2_watermarks[j->watermark]);
prt_printf(out, "each entry reserved:\t%u\n", j->entry_u64s_reserved);
prt_printf(out, "nr flush writes:\t%llu\n", j->nr_flush_writes);
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index 011711e99c8d..c85d01cf4948 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -395,104 +395,6 @@ out:
return 0;
}
-/* journal_preres: */
-
-static inline void journal_set_watermark(struct journal *j)
-{
- union journal_preres_state s = READ_ONCE(j->prereserved);
- unsigned watermark = BCH_WATERMARK_stripe;
-
- if (fifo_free(&j->pin) < j->pin.size / 4)
- watermark = max_t(unsigned, watermark, BCH_WATERMARK_copygc);
- if (fifo_free(&j->pin) < j->pin.size / 8)
- watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
-
- if (s.reserved > s.remaining)
- watermark = max_t(unsigned, watermark, BCH_WATERMARK_copygc);
- if (!s.remaining)
- watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
-
- if (watermark == j->watermark)
- return;
-
- swap(watermark, j->watermark);
- if (watermark > j->watermark)
- journal_wake(j);
-}
-
-static inline void bch2_journal_preres_put(struct journal *j,
- struct journal_preres *res)
-{
- union journal_preres_state s = { .reserved = res->u64s };
-
- if (!res->u64s)
- return;
-
- s.v = atomic64_sub_return(s.v, &j->prereserved.counter);
- res->u64s = 0;
-
- if (unlikely(s.waiting)) {
- clear_bit(ilog2((((union journal_preres_state) { .waiting = 1 }).v)),
- (unsigned long *) &j->prereserved.v);
- closure_wake_up(&j->preres_wait);
- }
-
- if (s.reserved <= s.remaining && j->watermark)
- journal_set_watermark(j);
-}
-
-int __bch2_journal_preres_get(struct journal *,
- struct journal_preres *, unsigned, unsigned);
-
-static inline int bch2_journal_preres_get_fast(struct journal *j,
- struct journal_preres *res,
- unsigned new_u64s,
- unsigned flags,
- bool set_waiting)
-{
- int d = new_u64s - res->u64s;
- union journal_preres_state old, new;
- u64 v = atomic64_read(&j->prereserved.counter);
- enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
- int ret;
-
- do {
- old.v = new.v = v;
- ret = 0;
-
- if (watermark == BCH_WATERMARK_reclaim ||
- new.reserved + d < new.remaining) {
- new.reserved += d;
- ret = 1;
- } else if (set_waiting && !new.waiting)
- new.waiting = true;
- else
- return 0;
- } while ((v = atomic64_cmpxchg(&j->prereserved.counter,
- old.v, new.v)) != old.v);
-
- if (ret)
- res->u64s += d;
- return ret;
-}
-
-static inline int bch2_journal_preres_get(struct journal *j,
- struct journal_preres *res,
- unsigned new_u64s,
- unsigned flags)
-{
- if (new_u64s <= res->u64s)
- return 0;
-
- if (bch2_journal_preres_get_fast(j, res, new_u64s, flags, false))
- return 0;
-
- if (flags & JOURNAL_RES_GET_NONBLOCK)
- return -BCH_ERR_journal_preres_get_blocked;
-
- return __bch2_journal_preres_get(j, res, new_u64s, flags);
-}
-
/* journal_entry_res: */
void bch2_journal_entry_res_resize(struct journal *,
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index f4bc2cdbfdd7..786a09285509 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -1079,6 +1079,12 @@ found:
if (ja->bucket_seq[ja->cur_idx] &&
ja->sectors_free == ca->mi.bucket_size) {
+#if 0
+ /*
+ * Debug code for ZNS support, where we (probably) want to be
+ * correlated where we stopped in the journal to the zone write
+ * points:
+ */
bch_err(c, "ja->sectors_free == ca->mi.bucket_size");
bch_err(c, "cur_idx %u/%u", ja->cur_idx, ja->nr);
for (i = 0; i < 3; i++) {
@@ -1086,6 +1092,7 @@ found:
bch_err(c, "bucket_seq[%u] = %llu", idx, ja->bucket_seq[idx]);
}
+#endif
ja->sectors_free = 0;
}
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index 9a584aaaa2eb..e63c6eda86af 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -50,16 +50,21 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j,
return available;
}
-static void journal_set_remaining(struct journal *j, unsigned u64s_remaining)
+static inline void journal_set_watermark(struct journal *j, bool low_on_space)
{
- union journal_preres_state old, new;
- u64 v = atomic64_read(&j->prereserved.counter);
+ unsigned watermark = BCH_WATERMARK_stripe;
- do {
- old.v = new.v = v;
- new.remaining = u64s_remaining;
- } while ((v = atomic64_cmpxchg(&j->prereserved.counter,
- old.v, new.v)) != old.v);
+ if (low_on_space)
+ watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
+ if (fifo_free(&j->pin) < j->pin.size / 4)
+ watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
+
+ if (watermark == j->watermark)
+ return;
+
+ swap(watermark, j->watermark);
+ if (watermark > j->watermark)
+ journal_wake(j);
}
static struct journal_space
@@ -162,7 +167,6 @@ void bch2_journal_space_available(struct journal *j)
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_dev *ca;
unsigned clean, clean_ondisk, total;
- s64 u64s_remaining = 0;
unsigned max_entry_size = min(j->buf[0].buf_size >> 9,
j->buf[1].buf_size >> 9);
unsigned i, nr_online = 0, nr_devs_want;
@@ -222,16 +226,10 @@ void bch2_journal_space_available(struct journal *j)
else
clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
- u64s_remaining = (u64) clean << 6;
- u64s_remaining -= (u64) total << 3;
- u64s_remaining = max(0LL, u64s_remaining);
- u64s_remaining /= 4;
- u64s_remaining = min_t(u64, u64s_remaining, U32_MAX);
+ journal_set_watermark(j, clean * 4 <= total);
out:
j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0;
j->cur_entry_error = ret;
- journal_set_remaining(j, u64s_remaining);
- journal_set_watermark(j);
if (!ret)
journal_wake(j);
@@ -555,11 +553,6 @@ static u64 journal_seq_to_flush(struct journal *j)
/* Try to keep the journal at most half full: */
nr_buckets = ja->nr / 2;
- /* And include pre-reservations: */
- nr_buckets += DIV_ROUND_UP(j->prereserved.reserved,
- (ca->mi.bucket_size << 6) -
- journal_entry_overhead(j));
-
nr_buckets = min(nr_buckets, ja->nr);
bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr;
@@ -638,10 +631,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
msecs_to_jiffies(c->opts.journal_reclaim_delay)))
min_nr = 1;
- if (j->prereserved.reserved * 4 > j->prereserved.remaining)
- min_nr = 1;
-
- if (fifo_free(&j->pin) <= 32)
+ if (j->watermark != BCH_WATERMARK_stripe)
min_nr = 1;
if (atomic_read(&c->btree_cache.dirty) * 2 > c->btree_cache.used)
@@ -652,8 +642,6 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
trace_and_count(c, journal_reclaim_start, c,
direct, kicked,
min_nr, min_key_cache,
- j->prereserved.reserved,
- j->prereserved.remaining,
atomic_read(&c->btree_cache.dirty),
c->btree_cache.used,
atomic_long_read(&c->btree_key_cache.nr_dirty),
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index 42504e16acb6..a756b69582e3 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -76,14 +76,6 @@ struct journal_res {
u64 seq;
};
-/*
- * For reserving space in the journal prior to getting a reservation on a
- * particular journal entry:
- */
-struct journal_preres {
- unsigned u64s;
-};
-
union journal_res_state {
struct {
atomic64_t counter;
@@ -104,22 +96,6 @@ union journal_res_state {
};
};
-union journal_preres_state {
- struct {
- atomic64_t counter;
- };
-
- struct {
- u64 v;
- };
-
- struct {
- u64 waiting:1,
- reserved:31,
- remaining:32;
- };
-};
-
/* bytes: */
#define JOURNAL_ENTRY_SIZE_MIN (64U << 10) /* 64k */
#define JOURNAL_ENTRY_SIZE_MAX (4U << 20) /* 4M */
@@ -180,8 +156,6 @@ struct journal {
union journal_res_state reservations;
enum bch_watermark watermark;
- union journal_preres_state prereserved;
-
} __aligned(SMP_CACHE_BYTES);
unsigned long flags;
diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c
index b775cf0fb7cb..97790445e67a 100644
--- a/fs/bcachefs/six.c
+++ b/fs/bcachefs/six.c
@@ -163,8 +163,11 @@ static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type,
this_cpu_sub(*lock->readers, !ret);
preempt_enable();
- if (!ret && (old & SIX_LOCK_WAITING_write))
- ret = -1 - SIX_LOCK_write;
+ if (!ret) {
+ smp_mb();
+ if (atomic_read(&lock->state) & SIX_LOCK_WAITING_write)
+ ret = -1 - SIX_LOCK_write;
+ }
} else if (type == SIX_LOCK_write && lock->readers) {
if (try) {
atomic_add(SIX_LOCK_HELD_write, &lock->state);
diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h
index 86833445af20..2d2e66a4e468 100644
--- a/fs/bcachefs/subvolume_types.h
+++ b/fs/bcachefs/subvolume_types.h
@@ -20,7 +20,7 @@ struct snapshot_t {
};
struct snapshot_table {
- struct snapshot_t s[0];
+ DECLARE_FLEX_ARRAY(struct snapshot_t, s);
};
typedef struct {
diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h
index 893304a1f06e..7857671159b4 100644
--- a/fs/bcachefs/trace.h
+++ b/fs/bcachefs/trace.h
@@ -196,10 +196,9 @@ DEFINE_EVENT(bio, journal_write,
TRACE_EVENT(journal_reclaim_start,
TP_PROTO(struct bch_fs *c, bool direct, bool kicked,
u64 min_nr, u64 min_key_cache,
- u64 prereserved, u64 prereserved_total,
u64 btree_cache_dirty, u64 btree_cache_total,
u64 btree_key_cache_dirty, u64 btree_key_cache_total),
- TP_ARGS(c, direct, kicked, min_nr, min_key_cache, prereserved, prereserved_total,
+ TP_ARGS(c, direct, kicked, min_nr, min_key_cache,
btree_cache_dirty, btree_cache_total,
btree_key_cache_dirty, btree_key_cache_total),
@@ -209,8 +208,6 @@ TRACE_EVENT(journal_reclaim_start,
__field(bool, kicked )
__field(u64, min_nr )
__field(u64, min_key_cache )
- __field(u64, prereserved )
- __field(u64, prereserved_total )
__field(u64, btree_cache_dirty )
__field(u64, btree_cache_total )
__field(u64, btree_key_cache_dirty )
@@ -223,22 +220,18 @@ TRACE_EVENT(journal_reclaim_start,
__entry->kicked = kicked;
__entry->min_nr = min_nr;
__entry->min_key_cache = min_key_cache;
- __entry->prereserved = prereserved;
- __entry->prereserved_total = prereserved_total;
__entry->btree_cache_dirty = btree_cache_dirty;
__entry->btree_cache_total = btree_cache_total;
__entry->btree_key_cache_dirty = btree_key_cache_dirty;
__entry->btree_key_cache_total = btree_key_cache_total;
),
- TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu prereserved %llu/%llu btree cache %llu/%llu key cache %llu/%llu",
+ TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu btree cache %llu/%llu key cache %llu/%llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->direct,
__entry->kicked,
__entry->min_nr,
__entry->min_key_cache,
- __entry->prereserved,
- __entry->prereserved_total,
__entry->btree_cache_dirty,
__entry->btree_cache_total,
__entry->btree_key_cache_dirty,
diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c
index a39ff0c296ec..79d982674c18 100644
--- a/fs/bcachefs/xattr.c
+++ b/fs/bcachefs/xattr.c
@@ -552,6 +552,14 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
s.v = v + 1;
s.defined = true;
} else {
+ /*
+ * Check if this option was set on the parent - if so, switched
+ * back to inheriting from the parent:
+ *
+ * rename() also has to deal with keeping inherited options up
+ * to date - see bch2_reinherit_attrs()
+ */
+ spin_lock(&dentry->d_lock);
if (!IS_ROOT(dentry)) {
struct bch_inode_info *dir =
to_bch_ei(d_inode(dentry->d_parent));
@@ -560,6 +568,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
} else {
s.v = 0;
}
+ spin_unlock(&dentry->d_lock);
s.defined = false;
}
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig
index e540648dedc2..1d318f85232d 100644
--- a/fs/erofs/Kconfig
+++ b/fs/erofs/Kconfig
@@ -21,7 +21,7 @@ config EROFS_FS
performance under extremely memory pressure without extra cost.
See the documentation at <file:Documentation/filesystems/erofs.rst>
- for more details.
+ and the web pages at <https://erofs.docs.kernel.org> for more details.
If unsure, say N.
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 029c761670bf..c98aeda8abb2 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -220,7 +220,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
up_read(&devs->rwsem);
return 0;
}
- map->m_bdev = dif->bdev_handle->bdev;
+ map->m_bdev = dif->bdev_handle ? dif->bdev_handle->bdev : NULL;
map->m_daxdev = dif->dax_dev;
map->m_dax_part_off = dif->dax_part_off;
map->m_fscache = dif->fscache;
@@ -238,7 +238,8 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
if (map->m_pa >= startoff &&
map->m_pa < startoff + length) {
map->m_pa -= startoff;
- map->m_bdev = dif->bdev_handle->bdev;
+ map->m_bdev = dif->bdev_handle ?
+ dif->bdev_handle->bdev : NULL;
map->m_daxdev = dif->dax_dev;
map->m_dax_part_off = dif->dax_part_off;
map->m_fscache = dif->fscache;
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index b8ad05b4509d..14a79d3226ab 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -15,11 +15,11 @@ static void *erofs_read_inode(struct erofs_buf *buf,
struct erofs_sb_info *sbi = EROFS_SB(sb);
struct erofs_inode *vi = EROFS_I(inode);
const erofs_off_t inode_loc = erofs_iloc(inode);
-
erofs_blk_t blkaddr, nblks = 0;
void *kaddr;
struct erofs_inode_compact *dic;
struct erofs_inode_extended *die, *copied = NULL;
+ union erofs_inode_i_u iu;
unsigned int ifmt;
int err;
@@ -35,9 +35,8 @@ static void *erofs_read_inode(struct erofs_buf *buf,
dic = kaddr + *ofs;
ifmt = le16_to_cpu(dic->i_format);
-
if (ifmt & ~EROFS_I_ALL) {
- erofs_err(inode->i_sb, "unsupported i_format %u of nid %llu",
+ erofs_err(sb, "unsupported i_format %u of nid %llu",
ifmt, vi->nid);
err = -EOPNOTSUPP;
goto err_out;
@@ -45,7 +44,7 @@ static void *erofs_read_inode(struct erofs_buf *buf,
vi->datalayout = erofs_inode_datalayout(ifmt);
if (vi->datalayout >= EROFS_INODE_DATALAYOUT_MAX) {
- erofs_err(inode->i_sb, "unsupported datalayout %u of nid %llu",
+ erofs_err(sb, "unsupported datalayout %u of nid %llu",
vi->datalayout, vi->nid);
err = -EOPNOTSUPP;
goto err_out;
@@ -82,40 +81,15 @@ static void *erofs_read_inode(struct erofs_buf *buf,
vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount);
inode->i_mode = le16_to_cpu(die->i_mode);
- switch (inode->i_mode & S_IFMT) {
- case S_IFREG:
- case S_IFDIR:
- case S_IFLNK:
- vi->raw_blkaddr = le32_to_cpu(die->i_u.raw_blkaddr);
- break;
- case S_IFCHR:
- case S_IFBLK:
- inode->i_rdev =
- new_decode_dev(le32_to_cpu(die->i_u.rdev));
- break;
- case S_IFIFO:
- case S_IFSOCK:
- inode->i_rdev = 0;
- break;
- default:
- goto bogusimode;
- }
+ iu = die->i_u;
i_uid_write(inode, le32_to_cpu(die->i_uid));
i_gid_write(inode, le32_to_cpu(die->i_gid));
set_nlink(inode, le32_to_cpu(die->i_nlink));
-
- /* extended inode has its own timestamp */
+ /* each extended inode has its own timestamp */
inode_set_ctime(inode, le64_to_cpu(die->i_mtime),
le32_to_cpu(die->i_mtime_nsec));
inode->i_size = le64_to_cpu(die->i_size);
-
- /* total blocks for compressed files */
- if (erofs_inode_is_data_compressed(vi->datalayout))
- nblks = le32_to_cpu(die->i_u.compressed_blocks);
- else if (vi->datalayout == EROFS_INODE_CHUNK_BASED)
- /* fill chunked inode summary info */
- vi->chunkformat = le16_to_cpu(die->i_u.c.format);
kfree(copied);
copied = NULL;
break;
@@ -125,49 +99,51 @@ static void *erofs_read_inode(struct erofs_buf *buf,
vi->xattr_isize = erofs_xattr_ibody_size(dic->i_xattr_icount);
inode->i_mode = le16_to_cpu(dic->i_mode);
- switch (inode->i_mode & S_IFMT) {
- case S_IFREG:
- case S_IFDIR:
- case S_IFLNK:
- vi->raw_blkaddr = le32_to_cpu(dic->i_u.raw_blkaddr);
- break;
- case S_IFCHR:
- case S_IFBLK:
- inode->i_rdev =
- new_decode_dev(le32_to_cpu(dic->i_u.rdev));
- break;
- case S_IFIFO:
- case S_IFSOCK:
- inode->i_rdev = 0;
- break;
- default:
- goto bogusimode;
- }
+ iu = dic->i_u;
i_uid_write(inode, le16_to_cpu(dic->i_uid));
i_gid_write(inode, le16_to_cpu(dic->i_gid));
set_nlink(inode, le16_to_cpu(dic->i_nlink));
-
/* use build time for compact inodes */
inode_set_ctime(inode, sbi->build_time, sbi->build_time_nsec);
inode->i_size = le32_to_cpu(dic->i_size);
- if (erofs_inode_is_data_compressed(vi->datalayout))
- nblks = le32_to_cpu(dic->i_u.compressed_blocks);
- else if (vi->datalayout == EROFS_INODE_CHUNK_BASED)
- vi->chunkformat = le16_to_cpu(dic->i_u.c.format);
break;
default:
- erofs_err(inode->i_sb,
- "unsupported on-disk inode version %u of nid %llu",
+ erofs_err(sb, "unsupported on-disk inode version %u of nid %llu",
erofs_inode_version(ifmt), vi->nid);
err = -EOPNOTSUPP;
goto err_out;
}
- if (vi->datalayout == EROFS_INODE_CHUNK_BASED) {
+ switch (inode->i_mode & S_IFMT) {
+ case S_IFREG:
+ case S_IFDIR:
+ case S_IFLNK:
+ vi->raw_blkaddr = le32_to_cpu(iu.raw_blkaddr);
+ break;
+ case S_IFCHR:
+ case S_IFBLK:
+ inode->i_rdev = new_decode_dev(le32_to_cpu(iu.rdev));
+ break;
+ case S_IFIFO:
+ case S_IFSOCK:
+ inode->i_rdev = 0;
+ break;
+ default:
+ erofs_err(sb, "bogus i_mode (%o) @ nid %llu", inode->i_mode,
+ vi->nid);
+ err = -EFSCORRUPTED;
+ goto err_out;
+ }
+
+ /* total blocks for compressed files */
+ if (erofs_inode_is_data_compressed(vi->datalayout)) {
+ nblks = le32_to_cpu(iu.compressed_blocks);
+ } else if (vi->datalayout == EROFS_INODE_CHUNK_BASED) {
+ /* fill chunked inode summary info */
+ vi->chunkformat = le16_to_cpu(iu.c.format);
if (vi->chunkformat & ~EROFS_CHUNK_FORMAT_ALL) {
- erofs_err(inode->i_sb,
- "unsupported chunk format %x of nid %llu",
+ erofs_err(sb, "unsupported chunk format %x of nid %llu",
vi->chunkformat, vi->nid);
err = -EOPNOTSUPP;
goto err_out;
@@ -191,10 +167,6 @@ static void *erofs_read_inode(struct erofs_buf *buf,
inode->i_blocks = nblks << (sb->s_blocksize_bits - 9);
return kaddr;
-bogusimode:
- erofs_err(inode->i_sb, "bogus i_mode (%o) @ nid %llu",
- inode->i_mode, vi->nid);
- err = -EFSCORRUPTED;
err_out:
DBG_BUGON(1);
kfree(copied);
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index 929248c6ca84..4cbe0434cbb8 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -84,8 +84,8 @@ int nfsd_net_reply_cache_init(struct nfsd_net *nn);
void nfsd_net_reply_cache_destroy(struct nfsd_net *nn);
int nfsd_reply_cache_init(struct nfsd_net *);
void nfsd_reply_cache_shutdown(struct nfsd_net *);
-int nfsd_cache_lookup(struct svc_rqst *rqstp,
- struct nfsd_cacherep **cacherep);
+int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
+ unsigned int len, struct nfsd_cacherep **cacherep);
void nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp,
int cachetype, __be32 *statp);
int nfsd_reply_cache_stats_show(struct seq_file *m, void *v);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4045c852a450..40415929e2ae 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2804,7 +2804,7 @@ static int client_opens_release(struct inode *inode, struct file *file)
/* XXX: alternatively, we could get/drop in seq start/stop */
drop_client(clp);
- return 0;
+ return seq_release(inode, file);
}
static const struct file_operations client_states_fops = {
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index fd56a52aa5fb..d3273a396659 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -369,33 +369,52 @@ nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
return freed;
}
-/*
- * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
+/**
+ * nfsd_cache_csum - Checksum incoming NFS Call arguments
+ * @buf: buffer containing a whole RPC Call message
+ * @start: starting byte of the NFS Call header
+ * @remaining: size of the NFS Call header, in bytes
+ *
+ * Compute a weak checksum of the leading bytes of an NFS procedure
+ * call header to help verify that a retransmitted Call matches an
+ * entry in the duplicate reply cache.
+ *
+ * To avoid assumptions about how the RPC message is laid out in
+ * @buf and what else it might contain (eg, a GSS MIC suffix), the
+ * caller passes us the exact location and length of the NFS Call
+ * header.
+ *
+ * Returns a 32-bit checksum value, as defined in RFC 793.
*/
-static __wsum
-nfsd_cache_csum(struct svc_rqst *rqstp)
+static __wsum nfsd_cache_csum(struct xdr_buf *buf, unsigned int start,
+ unsigned int remaining)
{
+ unsigned int base, len;
+ struct xdr_buf subbuf;
+ __wsum csum = 0;
+ void *p;
int idx;
- unsigned int base;
- __wsum csum;
- struct xdr_buf *buf = &rqstp->rq_arg;
- const unsigned char *p = buf->head[0].iov_base;
- size_t csum_len = min_t(size_t, buf->head[0].iov_len + buf->page_len,
- RC_CSUMLEN);
- size_t len = min(buf->head[0].iov_len, csum_len);
+
+ if (remaining > RC_CSUMLEN)
+ remaining = RC_CSUMLEN;
+ if (xdr_buf_subsegment(buf, &subbuf, start, remaining))
+ return csum;
/* rq_arg.head first */
- csum = csum_partial(p, len, 0);
- csum_len -= len;
+ if (subbuf.head[0].iov_len) {
+ len = min_t(unsigned int, subbuf.head[0].iov_len, remaining);
+ csum = csum_partial(subbuf.head[0].iov_base, len, csum);
+ remaining -= len;
+ }
/* Continue into page array */
- idx = buf->page_base / PAGE_SIZE;
- base = buf->page_base & ~PAGE_MASK;
- while (csum_len) {
- p = page_address(buf->pages[idx]) + base;
- len = min_t(size_t, PAGE_SIZE - base, csum_len);
+ idx = subbuf.page_base / PAGE_SIZE;
+ base = subbuf.page_base & ~PAGE_MASK;
+ while (remaining) {
+ p = page_address(subbuf.pages[idx]) + base;
+ len = min_t(unsigned int, PAGE_SIZE - base, remaining);
csum = csum_partial(p, len, csum);
- csum_len -= len;
+ remaining -= len;
base = 0;
++idx;
}
@@ -466,6 +485,8 @@ out:
/**
* nfsd_cache_lookup - Find an entry in the duplicate reply cache
* @rqstp: Incoming Call to find
+ * @start: starting byte in @rqstp->rq_arg of the NFS Call header
+ * @len: size of the NFS Call header, in bytes
* @cacherep: OUT: DRC entry for this request
*
* Try to find an entry matching the current call in the cache. When none
@@ -479,7 +500,8 @@ out:
* %RC_REPLY: Reply from cache
* %RC_DROPIT: Do not process the request further
*/
-int nfsd_cache_lookup(struct svc_rqst *rqstp, struct nfsd_cacherep **cacherep)
+int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
+ unsigned int len, struct nfsd_cacherep **cacherep)
{
struct nfsd_net *nn;
struct nfsd_cacherep *rp, *found;
@@ -495,7 +517,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, struct nfsd_cacherep **cacherep)
goto out;
}
- csum = nfsd_cache_csum(rqstp);
+ csum = nfsd_cache_csum(&rqstp->rq_arg, start, len);
/*
* Since the common case is a cache miss followed by an insert,
@@ -641,24 +663,17 @@ void nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp,
return;
}
-/*
- * Copy cached reply to current reply buffer. Should always fit.
- * FIXME as reply is in a page, we should just attach the page, and
- * keep a refcount....
- */
static int
nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
{
- struct kvec *vec = &rqstp->rq_res.head[0];
-
- if (vec->iov_len + data->iov_len > PAGE_SIZE) {
- printk(KERN_WARNING "nfsd: cached reply too large (%zd).\n",
- data->iov_len);
- return 0;
- }
- memcpy((char*)vec->iov_base + vec->iov_len, data->iov_base, data->iov_len);
- vec->iov_len += data->iov_len;
- return 1;
+ __be32 *p;
+
+ p = xdr_reserve_space(&rqstp->rq_res_stream, data->iov_len);
+ if (unlikely(!p))
+ return false;
+ memcpy(p, data->iov_base, data->iov_len);
+ xdr_commit_encode(&rqstp->rq_res_stream);
+ return true;
}
/*
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index d6122bb2d167..fe61d9bbcc1f 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -981,6 +981,8 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
const struct svc_procedure *proc = rqstp->rq_procinfo;
__be32 *statp = rqstp->rq_accept_statp;
struct nfsd_cacherep *rp;
+ unsigned int start, len;
+ __be32 *nfs_reply;
/*
* Give the xdr decoder a chance to change this if it wants
@@ -988,6 +990,13 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
*/
rqstp->rq_cachetype = proc->pc_cachetype;
+ /*
+ * ->pc_decode advances the argument stream past the NFS
+ * Call header, so grab the header's starting location and
+ * size now for the call to nfsd_cache_lookup().
+ */
+ start = xdr_stream_pos(&rqstp->rq_arg_stream);
+ len = xdr_stream_remaining(&rqstp->rq_arg_stream);
if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
goto out_decode_err;
@@ -1001,7 +1010,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter | 1);
rp = NULL;
- switch (nfsd_cache_lookup(rqstp, &rp)) {
+ switch (nfsd_cache_lookup(rqstp, start, len, &rp)) {
case RC_DOIT:
break;
case RC_REPLY:
@@ -1010,6 +1019,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
goto out_dropit;
}
+ nfs_reply = xdr_inline_decode(&rqstp->rq_res_stream, 0);
*statp = proc->pc_func(rqstp);
if (test_bit(RQ_DROPME, &rqstp->rq_flags))
goto out_update_drop;
@@ -1023,7 +1033,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
*/
smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1);
- nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1);
+ nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, nfs_reply);
out_cached_reply:
return 1;
diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c
index ddab9ea267d1..3fe2dde1598f 100644
--- a/fs/overlayfs/params.c
+++ b/fs/overlayfs/params.c
@@ -430,7 +430,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
struct ovl_fs_context *ctx = fc->fs_private;
struct ovl_fs_context_layer *l;
char *dup = NULL, *iter;
- ssize_t nr_lower = 0, nr = 0, nr_data = 0;
+ ssize_t nr_lower, nr;
bool data_layer = false;
/*
@@ -482,6 +482,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
iter = dup;
l = ctx->lower;
for (nr = 0; nr < nr_lower; nr++, l++) {
+ ctx->nr++;
memset(l, 0, sizeof(*l));
err = ovl_mount_dir(iter, &l->path);
@@ -498,10 +499,10 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
goto out_put;
if (data_layer)
- nr_data++;
+ ctx->nr_data++;
/* Calling strchr() again would overrun. */
- if ((nr + 1) == nr_lower)
+ if (ctx->nr == nr_lower)
break;
err = -EINVAL;
@@ -511,7 +512,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
* This is a regular layer so we require that
* there are no data layers.
*/
- if ((ctx->nr_data + nr_data) > 0) {
+ if (ctx->nr_data > 0) {
pr_err("regular lower layers cannot follow data lower layers");
goto out_put;
}
@@ -524,8 +525,6 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
data_layer = true;
iter++;
}
- ctx->nr = nr_lower;
- ctx->nr_data += nr_data;
kfree(dup);
return 0;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 50a201e9cd39..c3f020ca13a8 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -978,7 +978,7 @@ int ovl_set_protattr(struct inode *inode, struct dentry *upper,
return 0;
}
-/**
+/*
* Caller must hold a reference to inode to prevent it from being freed while
* it is marked inuse.
*/
diff --git a/fs/smb/client/cifs_spnego.c b/fs/smb/client/cifs_spnego.c
index 6f3285f1dfee..af7849e5974f 100644
--- a/fs/smb/client/cifs_spnego.c
+++ b/fs/smb/client/cifs_spnego.c
@@ -64,8 +64,8 @@ struct key_type cifs_spnego_key_type = {
* strlen(";sec=ntlmsspi") */
#define MAX_MECH_STR_LEN 13
-/* strlen of "host=" */
-#define HOST_KEY_LEN 5
+/* strlen of ";host=" */
+#define HOST_KEY_LEN 6
/* strlen of ";ip4=" or ";ip6=" */
#define IP_KEY_LEN 5
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 57c2a7df3457..f896f60c924b 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -2065,6 +2065,12 @@ void __cifs_put_smb_ses(struct cifs_ses *ses)
ses->chans[i].server = NULL;
}
+ /* we now account for primary channel in iface->refcount */
+ if (ses->chans[0].iface) {
+ kref_put(&ses->chans[0].iface->refcount, release_iface);
+ ses->chans[0].server = NULL;
+ }
+
sesInfoFree(ses);
cifs_put_tcp_session(server, 0);
}
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index 0bb2ac929061..8b2d7c1ca428 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -322,28 +322,32 @@ cifs_disable_secondary_channels(struct cifs_ses *ses)
iface = ses->chans[i].iface;
server = ses->chans[i].server;
+ /*
+ * remove these references first, since we need to unlock
+ * the chan_lock here, since iface_lock is a higher lock
+ */
+ ses->chans[i].iface = NULL;
+ ses->chans[i].server = NULL;
+ spin_unlock(&ses->chan_lock);
+
if (iface) {
spin_lock(&ses->iface_lock);
kref_put(&iface->refcount, release_iface);
- ses->chans[i].iface = NULL;
iface->num_channels--;
if (iface->weight_fulfilled)
iface->weight_fulfilled--;
spin_unlock(&ses->iface_lock);
}
- spin_unlock(&ses->chan_lock);
- if (server && !server->terminate) {
- server->terminate = true;
- cifs_signal_cifsd_for_reconnect(server, false);
- }
- spin_lock(&ses->chan_lock);
-
if (server) {
- ses->chans[i].server = NULL;
+ if (!server->terminate) {
+ server->terminate = true;
+ cifs_signal_cifsd_for_reconnect(server, false);
+ }
cifs_put_tcp_session(server, false);
}
+ spin_lock(&ses->chan_lock);
}
done:
diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c
index 84ea67301303..5a3ca62d2f07 100644
--- a/fs/smb/client/smb2transport.c
+++ b/fs/smb/client/smb2transport.c
@@ -458,6 +458,8 @@ generate_smb3signingkey(struct cifs_ses *ses,
ptriplet->encryption.context,
ses->smb3encryptionkey,
SMB3_ENC_DEC_KEY_SIZE);
+ if (rc)
+ return rc;
rc = generate_key(ses, ptriplet->decryption.label,
ptriplet->decryption.context,
ses->smb3decryptionkey,
@@ -466,9 +468,6 @@ generate_smb3signingkey(struct cifs_ses *ses,
return rc;
}
- if (rc)
- return rc;
-
#ifdef CONFIG_CIFS_DEBUG_DUMP_KEYS
cifs_dbg(VFS, "%s: dumping generated AES session keys\n", __func__);
/*
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index ed0bc8cbc703..567fb37274d3 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -147,7 +147,7 @@ config XFS_ONLINE_SCRUB_STATS
bool "XFS online metadata check usage data collection"
default y
depends on XFS_ONLINE_SCRUB
- select XFS_DEBUG
+ select DEBUG_FS
help
If you say Y here, the kernel will gather usage data about
the online metadata check subsystem. This includes the number
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 3069194527dd..100ab5931b31 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2275,16 +2275,37 @@ xfs_alloc_min_freelist(
ASSERT(mp->m_alloc_maxlevels > 0);
+ /*
+ * For a btree shorter than the maximum height, the worst case is that
+ * every level gets split and a new level is added, then while inserting
+ * another entry to refill the AGFL, every level under the old root gets
+ * split again. This is:
+ *
+ * (full height split reservation) + (AGFL refill split height)
+ * = (current height + 1) + (current height - 1)
+ * = (new height) + (new height - 2)
+ * = 2 * new height - 2
+ *
+ * For a btree of maximum height, the worst case is that every level
+ * under the root gets split, then while inserting another entry to
+ * refill the AGFL, every level under the root gets split again. This is
+ * also:
+ *
+ * 2 * (current height - 1)
+ * = 2 * (new height - 1)
+ * = 2 * new height - 2
+ */
+
/* space needed by-bno freespace btree */
min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
- mp->m_alloc_maxlevels);
+ mp->m_alloc_maxlevels) * 2 - 2;
/* space needed by-size freespace btree */
min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
- mp->m_alloc_maxlevels);
+ mp->m_alloc_maxlevels) * 2 - 2;
/* space needed reverse mapping used space btree */
if (xfs_has_rmapbt(mp))
min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
- mp->m_rmap_maxlevels);
+ mp->m_rmap_maxlevels) * 2 - 2;
return min_free;
}
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index bcfb6a4203cd..f71679ce23b9 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -245,21 +245,18 @@ xfs_defer_create_intents(
return ret;
}
-/* Abort all the intents that were committed. */
STATIC void
-xfs_defer_trans_abort(
- struct xfs_trans *tp,
- struct list_head *dop_pending)
+xfs_defer_pending_abort(
+ struct xfs_mount *mp,
+ struct list_head *dop_list)
{
struct xfs_defer_pending *dfp;
const struct xfs_defer_op_type *ops;
- trace_xfs_defer_trans_abort(tp, _RET_IP_);
-
/* Abort intent items that don't have a done item. */
- list_for_each_entry(dfp, dop_pending, dfp_list) {
+ list_for_each_entry(dfp, dop_list, dfp_list) {
ops = defer_op_types[dfp->dfp_type];
- trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
+ trace_xfs_defer_pending_abort(mp, dfp);
if (dfp->dfp_intent && !dfp->dfp_done) {
ops->abort_intent(dfp->dfp_intent);
dfp->dfp_intent = NULL;
@@ -267,6 +264,16 @@ xfs_defer_trans_abort(
}
}
+/* Abort all the intents that were committed. */
+STATIC void
+xfs_defer_trans_abort(
+ struct xfs_trans *tp,
+ struct list_head *dop_pending)
+{
+ trace_xfs_defer_trans_abort(tp, _RET_IP_);
+ xfs_defer_pending_abort(tp->t_mountp, dop_pending);
+}
+
/*
* Capture resources that the caller said not to release ("held") when the
* transaction commits. Caller is responsible for zero-initializing @dres.
@@ -756,12 +763,13 @@ xfs_defer_ops_capture(
/* Release all resources that we used to capture deferred ops. */
void
-xfs_defer_ops_capture_free(
+xfs_defer_ops_capture_abort(
struct xfs_mount *mp,
struct xfs_defer_capture *dfc)
{
unsigned short i;
+ xfs_defer_pending_abort(mp, &dfc->dfc_dfops);
xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
@@ -802,7 +810,7 @@ xfs_defer_ops_capture_and_commit(
/* Commit the transaction and add the capture structure to the list. */
error = xfs_trans_commit(tp);
if (error) {
- xfs_defer_ops_capture_free(mp, dfc);
+ xfs_defer_ops_capture_abort(mp, dfc);
return error;
}
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 114a3a4930a3..8788ad5f6a73 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -121,7 +121,7 @@ int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp,
struct list_head *capture_list);
void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp,
struct xfs_defer_resources *dres);
-void xfs_defer_ops_capture_free(struct xfs_mount *mp,
+void xfs_defer_ops_capture_abort(struct xfs_mount *mp,
struct xfs_defer_capture *d);
void xfs_defer_resources_rele(struct xfs_defer_resources *dres);
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 543f3748c2a3..137a65bda95d 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -510,6 +510,9 @@ xfs_dinode_verify(
if (mode && nextents + naextents > nblocks)
return __this_address;
+ if (nextents + naextents == 0 && nblocks != 0)
+ return __this_address;
+
if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents)
return __this_address;
diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
index 0e5dba2343ea..144198a6b270 100644
--- a/fs/xfs/xfs_inode_item_recover.c
+++ b/fs/xfs/xfs_inode_item_recover.c
@@ -286,6 +286,7 @@ xlog_recover_inode_commit_pass2(
struct xfs_log_dinode *ldip;
uint isize;
int need_free = 0;
+ xfs_failaddr_t fa;
if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
in_f = item->ri_buf[0].i_addr;
@@ -369,24 +370,26 @@ xlog_recover_inode_commit_pass2(
* superblock flag to determine whether we need to look at di_flushiter
* to skip replay when the on disk inode is newer than the log one
*/
- if (!xfs_has_v3inodes(mp) &&
- ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
- /*
- * Deal with the wrap case, DI_MAX_FLUSH is less
- * than smaller numbers
- */
- if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
- ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
- /* do nothing */
- } else {
- trace_xfs_log_recover_inode_skip(log, in_f);
- error = 0;
- goto out_release;
+ if (!xfs_has_v3inodes(mp)) {
+ if (ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
+ /*
+ * Deal with the wrap case, DI_MAX_FLUSH is less
+ * than smaller numbers
+ */
+ if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
+ ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
+ /* do nothing */
+ } else {
+ trace_xfs_log_recover_inode_skip(log, in_f);
+ error = 0;
+ goto out_release;
+ }
}
+
+ /* Take the opportunity to reset the flush iteration count */
+ ldip->di_flushiter = 0;
}
- /* Take the opportunity to reset the flush iteration count */
- ldip->di_flushiter = 0;
if (unlikely(S_ISREG(ldip->di_mode))) {
if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
@@ -528,8 +531,19 @@ out_owner_change:
(dip->di_mode != 0))
error = xfs_recover_inode_owner_change(mp, dip, in_f,
buffer_list);
- /* re-generate the checksum. */
+ /* re-generate the checksum and validate the recovered inode. */
xfs_dinode_calc_crc(log->l_mp, dip);
+ fa = xfs_dinode_verify(log->l_mp, in_f->ilf_ino, dip);
+ if (fa) {
+ XFS_CORRUPTION_ERROR(
+ "Bad dinode after recovery",
+ XFS_ERRLEVEL_LOW, mp, dip, sizeof(*dip));
+ xfs_alert(mp,
+ "Metadata corruption detected at %pS, inode 0x%llx",
+ fa, in_f->ilf_ino);
+ error = -EFSCORRUPTED;
+ goto out_release;
+ }
ASSERT(bp->b_mount == mp);
bp->b_flags |= _XBF_LOGRECOVERY;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 51c100c86177..ee206facf0dc 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1893,9 +1893,7 @@ xlog_write_iclog(
* the buffer manually, the code needs to be kept in sync
* with the I/O completion path.
*/
- xlog_state_done_syncing(iclog);
- up(&iclog->ic_sema);
- return;
+ goto sync;
}
/*
@@ -1925,20 +1923,17 @@ xlog_write_iclog(
* avoid shutdown re-entering this path and erroring out again.
*/
if (log->l_targ != log->l_mp->m_ddev_targp &&
- blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) {
- xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
- return;
- }
+ blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev))
+ goto shutdown;
}
if (iclog->ic_flags & XLOG_ICL_NEED_FUA)
iclog->ic_bio.bi_opf |= REQ_FUA;
iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
- if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) {
- xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
- return;
- }
+ if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count))
+ goto shutdown;
+
if (is_vmalloc_addr(iclog->ic_data))
flush_kernel_vmap_range(iclog->ic_data, count);
@@ -1959,6 +1954,12 @@ xlog_write_iclog(
}
submit_bio(&iclog->ic_bio);
+ return;
+shutdown:
+ xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
+sync:
+ xlog_state_done_syncing(iclog);
+ up(&iclog->ic_sema);
}
/*
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 13b94d2e605b..a1e18b24971a 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2511,7 +2511,7 @@ xlog_abort_defer_ops(
list_for_each_entry_safe(dfc, next, capture_list, dfc_list) {
list_del_init(&dfc->dfc_list);
- xfs_defer_ops_capture_free(mp, dfc);
+ xfs_defer_ops_capture_abort(mp, dfc);
}
}
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 658edee8381d..e5b62dc28466 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -784,6 +784,7 @@ xfs_reflink_end_cow_extent(
}
}
del = got;
+ xfs_trim_extent(&del, *offset_fsb, end_fsb - *offset_fsb);
/* Grab the corresponding mapping in the data fork. */
nmaps = 1;
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 995513fa2690..0655aa5b57b2 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -70,7 +70,7 @@ static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
*/
static __always_inline int queued_spin_value_unlocked(struct qspinlock lock)
{
- return !atomic_read(&lock.val);
+ return !lock.val.counter;
}
/**
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index d305db70674b..efc0c0b07efb 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -195,6 +195,7 @@ enum cpuhp_state {
CPUHP_AP_ARM_CORESIGHT_CTI_STARTING,
CPUHP_AP_ARM64_ISNDEP_STARTING,
CPUHP_AP_SMPCFD_DYING,
+ CPUHP_AP_HRTIMERS_DYING,
CPUHP_AP_X86_TBOOT_DYING,
CPUHP_AP_ARM_CACHE_B15_RAC_DYING,
CPUHP_AP_ONLINE,
diff --git a/include/linux/export-internal.h b/include/linux/export-internal.h
index 45fca09b2319..69501e0ec239 100644
--- a/include/linux/export-internal.h
+++ b/include/linux/export-internal.h
@@ -50,9 +50,7 @@
" .previous" "\n" \
)
-#ifdef CONFIG_IA64
-#define KSYM_FUNC(name) @fptr(name)
-#elif defined(CONFIG_PARISC) && defined(CONFIG_64BIT)
+#if defined(CONFIG_PARISC) && defined(CONFIG_64BIT)
#define KSYM_FUNC(name) P%name
#else
#define KSYM_FUNC(name) name
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 0ee140176f10..f2044d5a652b 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -531,9 +531,9 @@ extern void sysrq_timer_list_show(void);
int hrtimers_prepare_cpu(unsigned int cpu);
#ifdef CONFIG_HOTPLUG_CPU
-int hrtimers_dead_cpu(unsigned int cpu);
+int hrtimers_cpu_dying(unsigned int cpu);
#else
-#define hrtimers_dead_cpu NULL
+#define hrtimers_cpu_dying NULL
#endif
#endif
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index afb028c54f33..5547ba68e6e4 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -843,11 +843,11 @@ struct perf_event {
};
/*
- * ,-----------------------[1:n]----------------------.
- * V V
- * perf_event_context <-[1:n]-> perf_event_pmu_context <--- perf_event
- * ^ ^ | |
- * `--------[1:n]---------' `-[n:1]-> pmu <-[1:n]-'
+ * ,-----------------------[1:n]------------------------.
+ * V V
+ * perf_event_context <-[1:n]-> perf_event_pmu_context <-[1:n]- perf_event
+ * | |
+ * `--[n:1]-> pmu <-[1:n]--'
*
*
* struct perf_event_pmu_context lifetime is refcount based and RCU freed
@@ -865,6 +865,9 @@ struct perf_event {
* ctx->mutex pinning the configuration. Since we hold a reference on
* group_leader (through the filedesc) it can't go away, therefore it's
* associated pmu_ctx must exist and cannot change due to ctx->mutex.
+ *
+ * perf_event holds a refcount on perf_event_context
+ * perf_event holds a refcount on perf_event_pmu_context
*/
struct perf_event_pmu_context {
struct pmu *pmu;
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
index f04a43044d91..976e9500f651 100644
--- a/io_uring/fdinfo.c
+++ b/io_uring/fdinfo.c
@@ -145,13 +145,8 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) {
struct io_sq_data *sq = ctx->sq_data;
- if (mutex_trylock(&sq->lock)) {
- if (sq->thread) {
- sq_pid = task_pid_nr(sq->thread);
- sq_cpu = task_cpu(sq->thread);
- }
- mutex_unlock(&sq->lock);
- }
+ sq_pid = sq->task_pid;
+ sq_cpu = sq->sq_cpu;
}
seq_printf(m, "SqThread:\t%d\n", sq_pid);
diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
index bd6c2c7959a5..65b5dbe3c850 100644
--- a/io_uring/sqpoll.c
+++ b/io_uring/sqpoll.c
@@ -214,6 +214,7 @@ static bool io_sqd_handle_event(struct io_sq_data *sqd)
did_sig = get_signal(&ksig);
cond_resched();
mutex_lock(&sqd->lock);
+ sqd->sq_cpu = raw_smp_processor_id();
}
return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
}
@@ -229,10 +230,15 @@ static int io_sq_thread(void *data)
snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid);
set_task_comm(current, buf);
- if (sqd->sq_cpu != -1)
+ /* reset to our pid after we've set task_comm, for fdinfo */
+ sqd->task_pid = current->pid;
+
+ if (sqd->sq_cpu != -1) {
set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu));
- else
+ } else {
set_cpus_allowed_ptr(current, cpu_online_mask);
+ sqd->sq_cpu = raw_smp_processor_id();
+ }
mutex_lock(&sqd->lock);
while (1) {
@@ -261,6 +267,7 @@ static int io_sq_thread(void *data)
mutex_unlock(&sqd->lock);
cond_resched();
mutex_lock(&sqd->lock);
+ sqd->sq_cpu = raw_smp_processor_id();
}
continue;
}
@@ -294,6 +301,7 @@ static int io_sq_thread(void *data)
mutex_unlock(&sqd->lock);
schedule();
mutex_lock(&sqd->lock);
+ sqd->sq_cpu = raw_smp_processor_id();
}
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
atomic_andnot(IORING_SQ_NEED_WAKEUP,
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 91e82e34b51e..7a98cd176a12 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -531,7 +531,7 @@ int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark)
if (tsk != current)
return 0;
- if (WARN_ON_ONCE(!current->mm))
+ if (!current->mm)
return 0;
exe_file = get_mm_exe_file(current->mm);
if (!exe_file)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 1d5b9de3b1b9..4b9ff41ca603 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3885,14 +3885,6 @@ static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
return psi_trigger_poll(&ctx->psi.trigger, of->file, pt);
}
-static int cgroup_pressure_open(struct kernfs_open_file *of)
-{
- if (of->file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE))
- return -EPERM;
-
- return 0;
-}
-
static void cgroup_pressure_release(struct kernfs_open_file *of)
{
struct cgroup_file_ctx *ctx = of->priv;
@@ -5299,7 +5291,6 @@ static struct cftype cgroup_psi_files[] = {
{
.name = "io.pressure",
.file_offset = offsetof(struct cgroup, psi_files[PSI_IO]),
- .open = cgroup_pressure_open,
.seq_show = cgroup_io_pressure_show,
.write = cgroup_io_pressure_write,
.poll = cgroup_pressure_poll,
@@ -5308,7 +5299,6 @@ static struct cftype cgroup_psi_files[] = {
{
.name = "memory.pressure",
.file_offset = offsetof(struct cgroup, psi_files[PSI_MEM]),
- .open = cgroup_pressure_open,
.seq_show = cgroup_memory_pressure_show,
.write = cgroup_memory_pressure_write,
.poll = cgroup_pressure_poll,
@@ -5317,7 +5307,6 @@ static struct cftype cgroup_psi_files[] = {
{
.name = "cpu.pressure",
.file_offset = offsetof(struct cgroup, psi_files[PSI_CPU]),
- .open = cgroup_pressure_open,
.seq_show = cgroup_cpu_pressure_show,
.write = cgroup_cpu_pressure_write,
.poll = cgroup_pressure_poll,
@@ -5327,7 +5316,6 @@ static struct cftype cgroup_psi_files[] = {
{
.name = "irq.pressure",
.file_offset = offsetof(struct cgroup, psi_files[PSI_IRQ]),
- .open = cgroup_pressure_open,
.seq_show = cgroup_irq_pressure_show,
.write = cgroup_irq_pressure_write,
.poll = cgroup_pressure_poll,
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 9e4c6780adde..a86972a91991 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2113,7 +2113,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
[CPUHP_HRTIMERS_PREPARE] = {
.name = "hrtimers:prepare",
.startup.single = hrtimers_prepare_cpu,
- .teardown.single = hrtimers_dead_cpu,
+ .teardown.single = NULL,
},
[CPUHP_SMPCFD_PREPARE] = {
.name = "smpcfd:prepare",
@@ -2205,6 +2205,12 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.startup.single = NULL,
.teardown.single = smpcfd_dying_cpu,
},
+ [CPUHP_AP_HRTIMERS_DYING] = {
+ .name = "hrtimers:dying",
+ .startup.single = NULL,
+ .teardown.single = hrtimers_cpu_dying,
+ },
+
/* Entry state on starting. Interrupts enabled from here on. Transient
* state for synchronsization */
[CPUHP_AP_ONLINE] = {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 683dc086ef10..b704d83a28b2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4828,6 +4828,11 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
void *task_ctx_data = NULL;
if (!ctx->task) {
+ /*
+ * perf_pmu_migrate_context() / __perf_pmu_install_event()
+ * relies on the fact that find_get_pmu_context() cannot fail
+ * for CPU contexts.
+ */
struct perf_cpu_pmu_context *cpc;
cpc = per_cpu_ptr(pmu->cpu_pmu_context, event->cpu);
@@ -12889,6 +12894,9 @@ static void __perf_pmu_install_event(struct pmu *pmu,
int cpu, struct perf_event *event)
{
struct perf_event_pmu_context *epc;
+ struct perf_event_context *old_ctx = event->ctx;
+
+ get_ctx(ctx); /* normally find_get_context() */
event->cpu = cpu;
epc = find_get_pmu_context(pmu, ctx, event);
@@ -12897,6 +12905,11 @@ static void __perf_pmu_install_event(struct pmu *pmu,
if (event->state >= PERF_EVENT_STATE_OFF)
event->state = PERF_EVENT_STATE_INACTIVE;
perf_install_in_context(ctx, event, cpu);
+
+ /*
+ * Now that event->ctx is updated and visible, put the old ctx.
+ */
+ put_ctx(old_ctx);
}
static void __perf_pmu_install(struct perf_event_context *ctx,
@@ -12935,6 +12948,10 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
struct perf_event_context *src_ctx, *dst_ctx;
LIST_HEAD(events);
+ /*
+ * Since per-cpu context is persistent, no need to grab an extra
+ * reference.
+ */
src_ctx = &per_cpu_ptr(&perf_cpu_context, src_cpu)->ctx;
dst_ctx = &per_cpu_ptr(&perf_cpu_context, dst_cpu)->ctx;
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index 52695c59d041..dad981a865b8 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -700,7 +700,8 @@ retry:
owner = uval & FUTEX_TID_MASK;
if (pending_op && !pi && !owner) {
- futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
+ futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, 1,
+ FUTEX_BITSET_MATCH_ANY);
return 0;
}
@@ -752,8 +753,10 @@ retry:
* Wake robust non-PI futexes here. The wakeup of
* PI futexes happens in exit_pi_state():
*/
- if (!pi && (uval & FUTEX_WAITERS))
- futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
+ if (!pi && (uval & FUTEX_WAITERS)) {
+ futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, 1,
+ FUTEX_BITSET_MATCH_ANY);
+ }
return 0;
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2048138ce54b..d7a3c63a2171 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3666,41 +3666,140 @@ static inline void
dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
#endif
+static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
+ unsigned long weight)
+{
+ unsigned long old_weight = se->load.weight;
+ u64 avruntime = avg_vruntime(cfs_rq);
+ s64 vlag, vslice;
+
+ /*
+ * VRUNTIME
+ * ========
+ *
+ * COROLLARY #1: The virtual runtime of the entity needs to be
+ * adjusted if re-weight at !0-lag point.
+ *
+ * Proof: For contradiction assume this is not true, so we can
+ * re-weight without changing vruntime at !0-lag point.
+ *
+ * Weight VRuntime Avg-VRuntime
+ * before w v V
+ * after w' v' V'
+ *
+ * Since lag needs to be preserved through re-weight:
+ *
+ * lag = (V - v)*w = (V'- v')*w', where v = v'
+ * ==> V' = (V - v)*w/w' + v (1)
+ *
+ * Let W be the total weight of the entities before reweight,
+ * since V' is the new weighted average of entities:
+ *
+ * V' = (WV + w'v - wv) / (W + w' - w) (2)
+ *
+ * by using (1) & (2) we obtain:
+ *
+ * (WV + w'v - wv) / (W + w' - w) = (V - v)*w/w' + v
+ * ==> (WV-Wv+Wv+w'v-wv)/(W+w'-w) = (V - v)*w/w' + v
+ * ==> (WV - Wv)/(W + w' - w) + v = (V - v)*w/w' + v
+ * ==> (V - v)*W/(W + w' - w) = (V - v)*w/w' (3)
+ *
+ * Since we are doing at !0-lag point which means V != v, we
+ * can simplify (3):
+ *
+ * ==> W / (W + w' - w) = w / w'
+ * ==> Ww' = Ww + ww' - ww
+ * ==> W * (w' - w) = w * (w' - w)
+ * ==> W = w (re-weight indicates w' != w)
+ *
+ * So the cfs_rq contains only one entity, hence vruntime of
+ * the entity @v should always equal to the cfs_rq's weighted
+ * average vruntime @V, which means we will always re-weight
+ * at 0-lag point, thus breach assumption. Proof completed.
+ *
+ *
+ * COROLLARY #2: Re-weight does NOT affect weighted average
+ * vruntime of all the entities.
+ *
+ * Proof: According to corollary #1, Eq. (1) should be:
+ *
+ * (V - v)*w = (V' - v')*w'
+ * ==> v' = V' - (V - v)*w/w' (4)
+ *
+ * According to the weighted average formula, we have:
+ *
+ * V' = (WV - wv + w'v') / (W - w + w')
+ * = (WV - wv + w'(V' - (V - v)w/w')) / (W - w + w')
+ * = (WV - wv + w'V' - Vw + wv) / (W - w + w')
+ * = (WV + w'V' - Vw) / (W - w + w')
+ *
+ * ==> V'*(W - w + w') = WV + w'V' - Vw
+ * ==> V' * (W - w) = (W - w) * V (5)
+ *
+ * If the entity is the only one in the cfs_rq, then reweight
+ * always occurs at 0-lag point, so V won't change. Or else
+ * there are other entities, hence W != w, then Eq. (5) turns
+ * into V' = V. So V won't change in either case, proof done.
+ *
+ *
+ * So according to corollary #1 & #2, the effect of re-weight
+ * on vruntime should be:
+ *
+ * v' = V' - (V - v) * w / w' (4)
+ * = V - (V - v) * w / w'
+ * = V - vl * w / w'
+ * = V - vl'
+ */
+ if (avruntime != se->vruntime) {
+ vlag = (s64)(avruntime - se->vruntime);
+ vlag = div_s64(vlag * old_weight, weight);
+ se->vruntime = avruntime - vlag;
+ }
+
+ /*
+ * DEADLINE
+ * ========
+ *
+ * When the weight changes, the virtual time slope changes and
+ * we should adjust the relative virtual deadline accordingly.
+ *
+ * d' = v' + (d - v)*w/w'
+ * = V' - (V - v)*w/w' + (d - v)*w/w'
+ * = V - (V - v)*w/w' + (d - v)*w/w'
+ * = V + (d - V)*w/w'
+ */
+ vslice = (s64)(se->deadline - avruntime);
+ vslice = div_s64(vslice * old_weight, weight);
+ se->deadline = avruntime + vslice;
+}
+
static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
unsigned long weight)
{
- unsigned long old_weight = se->load.weight;
+ bool curr = cfs_rq->curr == se;
if (se->on_rq) {
/* commit outstanding execution time */
- if (cfs_rq->curr == se)
+ if (curr)
update_curr(cfs_rq);
else
- avg_vruntime_sub(cfs_rq, se);
+ __dequeue_entity(cfs_rq, se);
update_load_sub(&cfs_rq->load, se->load.weight);
}
dequeue_load_avg(cfs_rq, se);
- update_load_set(&se->load, weight);
-
if (!se->on_rq) {
/*
* Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
* we need to scale se->vlag when w_i changes.
*/
- se->vlag = div_s64(se->vlag * old_weight, weight);
+ se->vlag = div_s64(se->vlag * se->load.weight, weight);
} else {
- s64 deadline = se->deadline - se->vruntime;
- /*
- * When the weight changes, the virtual time slope changes and
- * we should adjust the relative virtual deadline accordingly.
- */
- deadline = div_s64(deadline * old_weight, weight);
- se->deadline = se->vruntime + deadline;
- if (se != cfs_rq->curr)
- min_deadline_cb_propagate(&se->run_node, NULL);
+ reweight_eevdf(cfs_rq, se, weight);
}
+ update_load_set(&se->load, weight);
+
#ifdef CONFIG_SMP
do {
u32 divider = get_pelt_divider(&se->avg);
@@ -3712,8 +3811,17 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
enqueue_load_avg(cfs_rq, se);
if (se->on_rq) {
update_load_add(&cfs_rq->load, se->load.weight);
- if (cfs_rq->curr != se)
- avg_vruntime_add(cfs_rq, se);
+ if (!curr) {
+ /*
+ * The entity's vruntime has been adjusted, so let's check
+ * whether the rq-wide min_vruntime needs updated too. Since
+ * the calculations above require stable min_vruntime rather
+ * than up-to-date one, we do the update at the end of the
+ * reweight process.
+ */
+ __enqueue_entity(cfs_rq, se);
+ update_min_vruntime(cfs_rq);
+ }
}
}
@@ -3857,14 +3965,11 @@ static void update_cfs_group(struct sched_entity *se)
#ifndef CONFIG_SMP
shares = READ_ONCE(gcfs_rq->tg->shares);
-
- if (likely(se->load.weight == shares))
- return;
#else
- shares = calc_group_shares(gcfs_rq);
+ shares = calc_group_shares(gcfs_rq);
#endif
-
- reweight_entity(cfs_rq_of(se), se, shares);
+ if (unlikely(se->load.weight != shares))
+ reweight_entity(cfs_rq_of(se), se, shares);
}
#else /* CONFIG_FAIR_GROUP_SCHED */
@@ -11079,12 +11184,16 @@ static int should_we_balance(struct lb_env *env)
continue;
}
- /* Are we the first idle CPU? */
+ /*
+ * Are we the first idle core in a non-SMT domain or higher,
+ * or the first idle CPU in a SMT domain?
+ */
return cpu == env->dst_cpu;
}
- if (idle_smt == env->dst_cpu)
- return true;
+ /* Are we the first idle CPU with busy siblings? */
+ if (idle_smt != -1)
+ return idle_smt == env->dst_cpu;
/* Are we the first CPU of this group ? */
return group_balance_cpu(sg) == env->dst_cpu;
diff --git a/kernel/sys.c b/kernel/sys.c
index 420d9cb9cc8e..e219fcfa112d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2394,6 +2394,10 @@ static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3,
if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN))
return -EINVAL;
+ /* PARISC cannot allow mdwe as it needs writable stacks */
+ if (IS_ENABLED(CONFIG_PARISC))
+ return -EINVAL;
+
current_bits = get_current_mdwe();
if (current_bits && current_bits != bits)
return -EPERM; /* Cannot unset the flags */
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 238262e4aba7..760793998cdd 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -2219,29 +2219,22 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
}
}
-int hrtimers_dead_cpu(unsigned int scpu)
+int hrtimers_cpu_dying(unsigned int dying_cpu)
{
struct hrtimer_cpu_base *old_base, *new_base;
- int i;
+ int i, ncpu = cpumask_first(cpu_active_mask);
- BUG_ON(cpu_online(scpu));
- tick_cancel_sched_timer(scpu);
+ tick_cancel_sched_timer(dying_cpu);
+
+ old_base = this_cpu_ptr(&hrtimer_bases);
+ new_base = &per_cpu(hrtimer_bases, ncpu);
- /*
- * this BH disable ensures that raise_softirq_irqoff() does
- * not wakeup ksoftirqd (and acquire the pi-lock) while
- * holding the cpu_base lock
- */
- local_bh_disable();
- local_irq_disable();
- old_base = &per_cpu(hrtimer_bases, scpu);
- new_base = this_cpu_ptr(&hrtimer_bases);
/*
* The caller is globally serialized and nobody else
* takes two locks at once, deadlock is not possible.
*/
- raw_spin_lock(&new_base->lock);
- raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
+ raw_spin_lock(&old_base->lock);
+ raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING);
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
migrate_hrtimer_list(&old_base->clock_base[i],
@@ -2252,15 +2245,13 @@ int hrtimers_dead_cpu(unsigned int scpu)
* The migration might have changed the first expiring softirq
* timer on this CPU. Update it.
*/
- hrtimer_update_softirq_timer(new_base, false);
+ __hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT);
+ /* Tell the other CPU to retrigger the next event */
+ smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);
- raw_spin_unlock(&old_base->lock);
raw_spin_unlock(&new_base->lock);
+ raw_spin_unlock(&old_base->lock);
- /* Check, if we got expired work to do */
- __hrtimer_peek_ahead_timers();
- local_irq_enable();
- local_bh_enable();
return 0;
}
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 630077d95dc6..6262d55904e7 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -924,7 +924,7 @@ static bool __damos_filter_out(struct damon_ctx *ctx, struct damon_target *t,
matched = true;
break;
default:
- break;
+ return false;
}
return matched == filter->matching;
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
index 45bd0fd4a8b1..be667236b8e6 100644
--- a/mm/damon/sysfs-schemes.c
+++ b/mm/damon/sysfs-schemes.c
@@ -162,6 +162,9 @@ damon_sysfs_scheme_regions_alloc(void)
struct damon_sysfs_scheme_regions *regions = kmalloc(sizeof(*regions),
GFP_KERNEL);
+ if (!regions)
+ return NULL;
+
regions->kobj = (struct kobject){};
INIT_LIST_HEAD(&regions->regions_list);
regions->nr_regions = 0;
@@ -1823,6 +1826,8 @@ static int damon_sysfs_before_damos_apply(struct damon_ctx *ctx,
return 0;
region = damon_sysfs_scheme_region_alloc(r);
+ if (!region)
+ return 0;
list_add_tail(&region->list, &sysfs_regions->regions_list);
sysfs_regions->nr_regions++;
if (kobject_init_and_add(&region->kobj,
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index e27846708b5a..7472404456aa 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -1172,7 +1172,7 @@ static int damon_sysfs_update_target(struct damon_target *target,
struct damon_ctx *ctx,
struct damon_sysfs_target *sys_target)
{
- int err;
+ int err = 0;
if (damon_target_has_pid(ctx)) {
err = damon_sysfs_update_target_pid(target, sys_target->pid);
@@ -1203,8 +1203,10 @@ static int damon_sysfs_set_targets(struct damon_ctx *ctx,
damon_for_each_target_safe(t, next, ctx) {
if (i < sysfs_targets->nr) {
- damon_sysfs_update_target(t, ctx,
+ err = damon_sysfs_update_target(t, ctx,
sysfs_targets->targets_arr[i]);
+ if (err)
+ return err;
} else {
if (damon_target_has_pid(ctx))
put_pid(t->pid);
diff --git a/mm/filemap.c b/mm/filemap.c
index 9710f43a89ac..32eedf3afd45 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3443,7 +3443,7 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
* handled in the specific fault path, and it'll prohibit the
* fault-around logic.
*/
- if (!pte_none(vmf->pte[count]))
+ if (!pte_none(ptep_get(&vmf->pte[count])))
goto skip;
count++;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f31f02472396..4f542444a91f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2769,13 +2769,15 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
int nr = folio_nr_pages(folio);
xas_split(&xas, folio, folio_order(folio));
- if (folio_test_swapbacked(folio)) {
- __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS,
- -nr);
- } else {
- __lruvec_stat_mod_folio(folio, NR_FILE_THPS,
- -nr);
- filemap_nr_thps_dec(mapping);
+ if (folio_test_pmd_mappable(folio)) {
+ if (folio_test_swapbacked(folio)) {
+ __lruvec_stat_mod_folio(folio,
+ NR_SHMEM_THPS, -nr);
+ } else {
+ __lruvec_stat_mod_folio(folio,
+ NR_FILE_THPS, -nr);
+ filemap_nr_thps_dec(mapping);
+ }
}
}
diff --git a/mm/ksm.c b/mm/ksm.c
index 7efcc68ccc6e..6a831009b4cb 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -468,7 +468,7 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex
page = pfn_swap_entry_to_page(entry);
}
/* return 1 if the page is an normal ksm page or KSM-placed zero page */
- ret = (page && PageKsm(page)) || is_ksm_zero_pte(*pte);
+ ret = (page && PageKsm(page)) || is_ksm_zero_pte(ptent);
pte_unmap_unlock(pte, ptl);
return ret;
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 774bd6e21e27..1c1061df9cd1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2936,7 +2936,8 @@ void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg)
* Moreover, it should not come from DMA buffer and is not readily
* reclaimable. So those GFP bits should be masked off.
*/
-#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | __GFP_ACCOUNT)
+#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | \
+ __GFP_ACCOUNT | __GFP_NOFAIL)
/*
* mod_objcg_mlstate() may be called with irq enabled, so
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 96d9eae5c7cc..0b6ca553bebe 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -312,7 +312,7 @@ static int mfill_atomic_pte_poison(pmd_t *dst_pmd,
ret = -EEXIST;
/* Refuse to overwrite any PTE, even a PTE marker (e.g. UFFD WP). */
- if (!pte_none(*dst_pte))
+ if (!pte_none(ptep_get(dst_pte)))
goto out_unlock;
set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
diff --git a/mm/util.c b/mm/util.c
index aa01f6ea5a75..744b4d7e3fae 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -414,6 +414,15 @@ static int mmap_is_legacy(struct rlimit *rlim_stack)
static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
{
+#ifdef CONFIG_STACK_GROWSUP
+ /*
+ * For an upwards growing stack the calculation is much simpler.
+ * Memory for the maximum stack size is reserved at the top of the
+ * task. mmap_base starts directly below the stack and grows
+ * downwards.
+ */
+ return PAGE_ALIGN_DOWN(mmap_upper_limit(rlim_stack) - rnd);
+#else
unsigned long gap = rlim_stack->rlim_cur;
unsigned long pad = stack_guard_gap;
@@ -431,6 +440,7 @@ static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
gap = MAX_GAP;
return PAGE_ALIGN(STACK_TOP - gap - rnd);
+#endif
}
void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 68d0134bdbf9..1a965fe68e01 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -487,14 +487,14 @@ UIMAGE_OPTS-y ?=
UIMAGE_TYPE ?= kernel
UIMAGE_LOADADDR ?= arch_must_set_this
UIMAGE_ENTRYADDR ?= $(UIMAGE_LOADADDR)
-UIMAGE_NAME ?= 'Linux-$(KERNELRELEASE)'
+UIMAGE_NAME ?= Linux-$(KERNELRELEASE)
quiet_cmd_uimage = UIMAGE $@
cmd_uimage = $(BASH) $(MKIMAGE) -A $(UIMAGE_ARCH) -O linux \
-C $(UIMAGE_COMPRESSION) $(UIMAGE_OPTS-y) \
-T $(UIMAGE_TYPE) \
-a $(UIMAGE_LOADADDR) -e $(UIMAGE_ENTRYADDR) \
- -n $(UIMAGE_NAME) -d $< $@
+ -n '$(UIMAGE_NAME)' -d $< $@
# XZ
# ---------------------------------------------------------------------------
diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c
index 0572330bf8a7..a76925b46ce6 100644
--- a/scripts/kconfig/symbol.c
+++ b/scripts/kconfig/symbol.c
@@ -122,9 +122,9 @@ static long long sym_get_range_val(struct symbol *sym, int base)
static void sym_validate_range(struct symbol *sym)
{
struct property *prop;
+ struct symbol *range_sym;
int base;
long long val, val2;
- char str[64];
switch (sym->type) {
case S_INT:
@@ -140,17 +140,15 @@ static void sym_validate_range(struct symbol *sym)
if (!prop)
return;
val = strtoll(sym->curr.val, NULL, base);
- val2 = sym_get_range_val(prop->expr->left.sym, base);
+ range_sym = prop->expr->left.sym;
+ val2 = sym_get_range_val(range_sym, base);
if (val >= val2) {
- val2 = sym_get_range_val(prop->expr->right.sym, base);
+ range_sym = prop->expr->right.sym;
+ val2 = sym_get_range_val(range_sym, base);
if (val <= val2)
return;
}
- if (sym->type == S_INT)
- sprintf(str, "%lld", val2);
- else
- sprintf(str, "0x%llx", val2);
- sym->curr.val = xstrdup(str);
+ sym->curr.val = range_sym->curr.val;
}
static void sym_set_changed(struct symbol *sym)
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 973b5e5ae2dd..cb6406f485a9 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1383,13 +1383,15 @@ static void section_rela(struct module *mod, struct elf_info *elf,
const Elf_Rela *rela;
for (rela = start; rela < stop; rela++) {
+ Elf_Sym *tsym;
Elf_Addr taddr, r_offset;
unsigned int r_type, r_sym;
r_offset = TO_NATIVE(rela->r_offset);
get_rel_type_and_sym(elf, rela->r_info, &r_type, &r_sym);
- taddr = TO_NATIVE(rela->r_addend);
+ tsym = elf->symtab_start + r_sym;
+ taddr = tsym->st_value + TO_NATIVE(rela->r_addend);
switch (elf->hdr->e_machine) {
case EM_RISCV:
@@ -1404,7 +1406,7 @@ static void section_rela(struct module *mod, struct elf_info *elf,
break;
}
- check_section_mismatch(mod, elf, elf->symtab_start + r_sym,
+ check_section_mismatch(mod, elf, tsym,
fsecndx, fromsec, r_offset, taddr);
}
}
diff --git a/sound/pci/hda/cs35l56_hda_i2c.c b/sound/pci/hda/cs35l56_hda_i2c.c
index 757a4d193e0f..a9ef6d86de83 100644
--- a/sound/pci/hda/cs35l56_hda_i2c.c
+++ b/sound/pci/hda/cs35l56_hda_i2c.c
@@ -21,6 +21,10 @@ static int cs35l56_hda_i2c_probe(struct i2c_client *clt)
return -ENOMEM;
cs35l56->base.dev = &clt->dev;
+
+#ifdef CS35L56_WAKE_HOLD_TIME_US
+ cs35l56->base.can_hibernate = true;
+#endif
cs35l56->base.regmap = devm_regmap_init_i2c(clt, &cs35l56_regmap_i2c);
if (IS_ERR(cs35l56->base.regmap)) {
ret = PTR_ERR(cs35l56->base.regmap);
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 03264915c618..db90feb49c16 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2135,6 +2135,9 @@ static int azx_probe(struct pci_dev *pci,
if (chip->driver_caps & AZX_DCAPS_I915_COMPONENT) {
err = snd_hdac_i915_init(azx_bus(chip));
if (err < 0) {
+ if (err == -EPROBE_DEFER)
+ goto out_free;
+
/* if the controller is bound only with HDMI/DP
* (for HSW and BDW), we need to abort the probe;
* for other chips, still continue probing as other
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 669ae3d6e447..5618b1d9bfd1 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -9832,6 +9832,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x103c, 0x88d0, "HP Pavilion 15-eh1xxx (mainboard 88D0)", ALC287_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8902, "HP OMEN 16", ALC285_FIXUP_HP_MUTE_LED),
+ SND_PCI_QUIRK(0x103c, 0x890e, "HP 255 G8 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
SND_PCI_QUIRK(0x103c, 0x8919, "HP Pavilion Aero Laptop 13-be0xxx", ALC287_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x896d, "HP ZBook Firefly 16 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x896e, "HP EliteBook x360 830 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
@@ -9867,6 +9868,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b44, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
@@ -9900,12 +9902,16 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8c70, "HP EliteBook 835 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8c71, "HP EliteBook 845 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x1043, 0x10a1, "ASUS UX391UA", ALC294_FIXUP_ASUS_SPK),
SND_PCI_QUIRK(0x1043, 0x10c0, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
SND_PCI_QUIRK(0x1043, 0x10d0, "ASUS X540LA/X540LJ", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1043, 0x10d3, "ASUS K6500ZC", ALC294_FIXUP_ASUS_SPK),
SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x1043, 0x11c0, "ASUS X556UR", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1043, 0x125e, "ASUS Q524UQK", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
@@ -9944,13 +9950,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW),
SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC),
+ SND_PCI_QUIRK(0x1043, 0x1a63, "ASUS UX3405MA", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1a83, "ASUS UM5302LA", ALC294_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B),
SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC),
SND_PCI_QUIRK(0x1043, 0x1b93, "ASUS G614JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1043, 0x1c03, "ASUS UM3406HA", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
+ SND_PCI_QUIRK(0x1043, 0x1c33, "ASUS UX5304MA", ALC245_FIXUP_CS35L41_SPI_2),
+ SND_PCI_QUIRK(0x1043, 0x1c43, "ASUS UX8406MA", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS),
SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JI", ALC285_FIXUP_ASUS_HEADSET_MIC),
@@ -10821,22 +10831,6 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
{0x12, 0x90a60130},
{0x17, 0x90170110},
{0x21, 0x03211020}),
- SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
- {0x14, 0x90170110},
- {0x21, 0x04211020}),
- SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
- {0x14, 0x90170110},
- {0x21, 0x04211030}),
- SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
- ALC295_STANDARD_PINS,
- {0x17, 0x21014020},
- {0x18, 0x21a19030}),
- SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
- ALC295_STANDARD_PINS,
- {0x17, 0x21014040},
- {0x18, 0x21a19050}),
- SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
- ALC295_STANDARD_PINS),
SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
ALC298_STANDARD_PINS,
{0x17, 0x90170110}),
@@ -10880,6 +10874,9 @@ static const struct snd_hda_pin_quirk alc269_fallback_pin_fixup_tbl[] = {
SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
{0x19, 0x40000000},
{0x1b, 0x40000000}),
+ SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
+ {0x19, 0x40000000},
+ {0x1b, 0x40000000}),
SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
{0x19, 0x40000000},
{0x1a, 0x40000000}),
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index 264eeb9c46a9..318e2dad27e0 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -1421,7 +1421,7 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
if (error)
goto setval_error;
- if (new_val->addr_family == ADDR_FAMILY_IPV6) {
+ if (new_val->addr_family & ADDR_FAMILY_IPV6) {
error = fprintf(nmfile, "\n[ipv6]\n");
if (error < 0)
goto setval_error;
@@ -1455,14 +1455,18 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
if (error < 0)
goto setval_error;
- error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way);
- if (error < 0)
- goto setval_error;
-
- error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr);
- if (error < 0)
- goto setval_error;
+ /* we do not want ipv4 addresses in ipv6 section and vice versa */
+ if (is_ipv6 != is_ipv4((char *)new_val->gate_way)) {
+ error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way);
+ if (error < 0)
+ goto setval_error;
+ }
+ if (is_ipv6 != is_ipv4((char *)new_val->dns_addr)) {
+ error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr);
+ if (error < 0)
+ goto setval_error;
+ }
fclose(nmfile);
fclose(ifcfg_file);
diff --git a/tools/hv/hv_set_ifconfig.sh b/tools/hv/hv_set_ifconfig.sh
index ae5a7a8249a2..440a91b35823 100755
--- a/tools/hv/hv_set_ifconfig.sh
+++ b/tools/hv/hv_set_ifconfig.sh
@@ -53,7 +53,7 @@
# or "manual" if no boot-time protocol should be used)
#
# address1=ipaddr1/plen
-# address=ipaddr2/plen
+# address2=ipaddr2/plen
#
# gateway=gateway1;gateway2
#
@@ -61,7 +61,7 @@
#
# [ipv6]
# address1=ipaddr1/plen
-# address2=ipaddr1/plen
+# address2=ipaddr2/plen
#
# gateway=gateway1;gateway2
#
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 9a10512e3407..7a334377f92b 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -211,9 +211,6 @@ int *fd_instr_count_percpu;
struct timeval interval_tv = { 5, 0 };
struct timespec interval_ts = { 5, 0 };
-/* Save original CPU model */
-unsigned int model_orig;
-
unsigned int num_iterations;
unsigned int header_iterations;
unsigned int debug;
@@ -224,24 +221,16 @@ unsigned int rapl_joules;
unsigned int summary_only;
unsigned int list_header_only;
unsigned int dump_only;
-unsigned int do_snb_cstates;
-unsigned int do_knl_cstates;
-unsigned int do_slm_cstates;
-unsigned int use_c1_residency_msr;
unsigned int has_aperf;
unsigned int has_epb;
unsigned int has_turbo;
unsigned int is_hybrid;
-unsigned int do_irtl_snb;
-unsigned int do_irtl_hsw;
unsigned int units = 1000000; /* MHz etc */
unsigned int genuine_intel;
unsigned int authentic_amd;
unsigned int hygon_genuine;
unsigned int max_level, max_extended_level;
unsigned int has_invariant_tsc;
-unsigned int do_nhm_platform_info;
-unsigned int no_MSR_MISC_PWR_MGMT;
unsigned int aperf_mperf_multiplier = 1;
double bclk;
double base_hz;
@@ -250,7 +239,6 @@ double tsc_tweak = 1.0;
unsigned int show_pkg_only;
unsigned int show_core_only;
char *output_buffer, *outp;
-unsigned int do_rapl;
unsigned int do_dts;
unsigned int do_ptm;
unsigned int do_ipc;
@@ -261,65 +249,686 @@ unsigned int gfx_cur_mhz;
unsigned int gfx_act_mhz;
unsigned int tj_max;
unsigned int tj_max_override;
-int tcc_offset_bits;
double rapl_power_units, rapl_time_units;
double rapl_dram_energy_units, rapl_energy_units;
double rapl_joule_counter_range;
-unsigned int do_core_perf_limit_reasons;
-unsigned int has_automatic_cstate_conversion;
-unsigned int dis_cstate_prewake;
-unsigned int do_gfx_perf_limit_reasons;
-unsigned int do_ring_perf_limit_reasons;
unsigned int crystal_hz;
unsigned long long tsc_hz;
int base_cpu;
-double discover_bclk(unsigned int family, unsigned int model);
unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
/* IA32_HWP_REQUEST, IA32_HWP_STATUS */
unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */
unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */
unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
-unsigned int has_misc_feature_control;
unsigned int first_counter_read = 1;
int ignore_stdin;
-#define RAPL_PKG (1 << 0)
- /* 0x610 MSR_PKG_POWER_LIMIT */
- /* 0x611 MSR_PKG_ENERGY_STATUS */
-#define RAPL_PKG_PERF_STATUS (1 << 1)
- /* 0x613 MSR_PKG_PERF_STATUS */
-#define RAPL_PKG_POWER_INFO (1 << 2)
- /* 0x614 MSR_PKG_POWER_INFO */
-
-#define RAPL_DRAM (1 << 3)
- /* 0x618 MSR_DRAM_POWER_LIMIT */
- /* 0x619 MSR_DRAM_ENERGY_STATUS */
-#define RAPL_DRAM_PERF_STATUS (1 << 4)
- /* 0x61b MSR_DRAM_PERF_STATUS */
-#define RAPL_DRAM_POWER_INFO (1 << 5)
- /* 0x61c MSR_DRAM_POWER_INFO */
-
-#define RAPL_CORES_POWER_LIMIT (1 << 6)
- /* 0x638 MSR_PP0_POWER_LIMIT */
-#define RAPL_CORE_POLICY (1 << 7)
- /* 0x63a MSR_PP0_POLICY */
-
-#define RAPL_GFX (1 << 8)
- /* 0x640 MSR_PP1_POWER_LIMIT */
- /* 0x641 MSR_PP1_ENERGY_STATUS */
- /* 0x642 MSR_PP1_POLICY */
-
-#define RAPL_CORES_ENERGY_STATUS (1 << 9)
- /* 0x639 MSR_PP0_ENERGY_STATUS */
-#define RAPL_PER_CORE_ENERGY (1 << 10)
- /* Indicates cores energy collection is per-core,
- * not per-package. */
-#define RAPL_AMD_F17H (1 << 11)
- /* 0xc0010299 MSR_RAPL_PWR_UNIT */
- /* 0xc001029a MSR_CORE_ENERGY_STAT */
- /* 0xc001029b MSR_PKG_ENERGY_STAT */
-#define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
+int get_msr(int cpu, off_t offset, unsigned long long *msr);
+
+/* Model specific support Start */
+
+/* List of features that may diverge among different platforms */
+struct platform_features {
+ bool has_msr_misc_feature_control; /* MSR_MISC_FEATURE_CONTROL */
+ bool has_msr_misc_pwr_mgmt; /* MSR_MISC_PWR_MGMT */
+ bool has_nhm_msrs; /* MSR_PLATFORM_INFO, MSR_IA32_TEMPERATURE_TARGET, MSR_SMI_COUNT, MSR_PKG_CST_CONFIG_CONTROL, MSR_IA32_POWER_CTL, TRL MSRs */
+ bool has_config_tdp; /* MSR_CONFIG_TDP_NOMINAL/LEVEL_1/LEVEL_2/CONTROL, MSR_TURBO_ACTIVATION_RATIO */
+ int bclk_freq; /* CPU base clock */
+ int crystal_freq; /* Crystal clock to use when not available from CPUID.15 */
+ int supported_cstates; /* Core cstates and Package cstates supported */
+ int cst_limit; /* MSR_PKG_CST_CONFIG_CONTROL */
+ bool has_cst_auto_convension; /* AUTOMATIC_CSTATE_CONVERSION bit in MSR_PKG_CST_CONFIG_CONTROL */
+ bool has_irtl_msrs; /* MSR_PKGC3/PKGC6/PKGC7/PKGC8/PKGC9/PKGC10_IRTL */
+ bool has_msr_core_c1_res; /* MSR_CORE_C1_RES */
+ bool has_msr_module_c6_res_ms; /* MSR_MODULE_C6_RES_MS */
+ bool has_msr_c6_demotion_policy_config; /* MSR_CC6_DEMOTION_POLICY_CONFIG/MSR_MC6_DEMOTION_POLICY_CONFIG */
+ bool has_msr_atom_pkg_c6_residency; /* MSR_ATOM_PKG_C6_RESIDENCY */
+ bool has_msr_knl_core_c6_residency; /* MSR_KNL_CORE_C6_RESIDENCY */
+ bool has_ext_cst_msrs; /* MSR_PKG_WEIGHTED_CORE_C0_RES/MSR_PKG_ANY_CORE_C0_RES/MSR_PKG_ANY_GFXE_C0_RES/MSR_PKG_BOTH_CORE_GFXE_C0_RES */
+ bool has_cst_prewake_bit; /* Cstate prewake bit in MSR_IA32_POWER_CTL */
+ int trl_msrs; /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */
+ int plr_msrs; /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */
+ int rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */
+ bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */
+ bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */
+ bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */
+ int rapl_quirk_tdp; /* Hardcoded TDP value when cannot be retrieved from hardware */
+ int tcc_offset_bits; /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */
+ bool enable_tsc_tweak; /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */
+ bool need_perf_multiplier; /* mperf/aperf multiplier */
+};
+
+struct platform_data {
+ unsigned int model;
+ const struct platform_features *features;
+};
+
+/* For BCLK */
+enum bclk_freq {
+ BCLK_100MHZ = 1,
+ BCLK_133MHZ,
+ BCLK_SLV,
+};
+
+#define SLM_BCLK_FREQS 5
+double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 };
+
+double slm_bclk(void)
+{
+ unsigned long long msr = 3;
+ unsigned int i;
+ double freq;
+
+ if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
+ fprintf(outf, "SLM BCLK: unknown\n");
+
+ i = msr & 0xf;
+ if (i >= SLM_BCLK_FREQS) {
+ fprintf(outf, "SLM BCLK[%d] invalid\n", i);
+ i = 3;
+ }
+ freq = slm_freq_table[i];
+
+ if (!quiet)
+ fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
+
+ return freq;
+}
+
+/* For Package cstate limit */
+enum package_cstate_limit {
+ CST_LIMIT_NHM = 1,
+ CST_LIMIT_SNB,
+ CST_LIMIT_HSW,
+ CST_LIMIT_SKX,
+ CST_LIMIT_ICX,
+ CST_LIMIT_SLV,
+ CST_LIMIT_AMT,
+ CST_LIMIT_KNL,
+ CST_LIMIT_GMT,
+};
+
+/* For Turbo Ratio Limit MSRs */
+enum turbo_ratio_limit_msrs {
+ TRL_BASE = BIT(0),
+ TRL_LIMIT1 = BIT(1),
+ TRL_LIMIT2 = BIT(2),
+ TRL_ATOM = BIT(3),
+ TRL_KNL = BIT(4),
+ TRL_CORECOUNT = BIT(5),
+};
+
+/* For Perf Limit Reason MSRs */
+enum perf_limit_reason_msrs {
+ PLR_CORE = BIT(0),
+ PLR_GFX = BIT(1),
+ PLR_RING = BIT(2),
+};
+
+/* For RAPL MSRs */
+enum rapl_msrs {
+ RAPL_PKG_POWER_LIMIT = BIT(0), /* 0x610 MSR_PKG_POWER_LIMIT */
+ RAPL_PKG_ENERGY_STATUS = BIT(1), /* 0x611 MSR_PKG_ENERGY_STATUS */
+ RAPL_PKG_PERF_STATUS = BIT(2), /* 0x613 MSR_PKG_PERF_STATUS */
+ RAPL_PKG_POWER_INFO = BIT(3), /* 0x614 MSR_PKG_POWER_INFO */
+ RAPL_DRAM_POWER_LIMIT = BIT(4), /* 0x618 MSR_DRAM_POWER_LIMIT */
+ RAPL_DRAM_ENERGY_STATUS = BIT(5), /* 0x619 MSR_DRAM_ENERGY_STATUS */
+ RAPL_DRAM_PERF_STATUS = BIT(6), /* 0x61b MSR_DRAM_PERF_STATUS */
+ RAPL_DRAM_POWER_INFO = BIT(7), /* 0x61c MSR_DRAM_POWER_INFO */
+ RAPL_CORE_POWER_LIMIT = BIT(8), /* 0x638 MSR_PP0_POWER_LIMIT */
+ RAPL_CORE_ENERGY_STATUS = BIT(9), /* 0x639 MSR_PP0_ENERGY_STATUS */
+ RAPL_CORE_POLICY = BIT(10), /* 0x63a MSR_PP0_POLICY */
+ RAPL_GFX_POWER_LIMIT = BIT(11), /* 0x640 MSR_PP1_POWER_LIMIT */
+ RAPL_GFX_ENERGY_STATUS = BIT(12), /* 0x641 MSR_PP1_ENERGY_STATUS */
+ RAPL_GFX_POLICY = BIT(13), /* 0x642 MSR_PP1_POLICY */
+ RAPL_AMD_PWR_UNIT = BIT(14), /* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */
+ RAPL_AMD_CORE_ENERGY_STAT = BIT(15), /* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */
+ RAPL_AMD_PKG_ENERGY_STAT = BIT(16), /* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */
+};
+
+#define RAPL_PKG (RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT)
+#define RAPL_DRAM (RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT)
+#define RAPL_CORE (RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT)
+#define RAPL_GFX (RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS)
+
+#define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO)
+#define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO)
+#define RAPL_CORE_ALL (RAPL_CORE | RAPL_CORE_POLICY)
+#define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLIGY)
+
+#define RAPL_AMD_F17H (RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT)
+
+/* For Cstates */
+enum cstates {
+ CC1 = BIT(0),
+ CC3 = BIT(1),
+ CC6 = BIT(2),
+ CC7 = BIT(3),
+ PC2 = BIT(4),
+ PC3 = BIT(5),
+ PC6 = BIT(6),
+ PC7 = BIT(7),
+ PC8 = BIT(8),
+ PC9 = BIT(9),
+ PC10 = BIT(10),
+};
+
+static const struct platform_features nhm_features = {
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_133MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
+ .cst_limit = CST_LIMIT_NHM,
+ .trl_msrs = TRL_BASE,
+};
+
+static const struct platform_features nhx_features = {
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_133MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
+ .cst_limit = CST_LIMIT_NHM,
+};
+
+static const struct platform_features snb_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+ .cst_limit = CST_LIMIT_SNB,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features snx_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+ .cst_limit = CST_LIMIT_SNB,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features ivb_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+ .cst_limit = CST_LIMIT_SNB,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features ivx_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+ .cst_limit = CST_LIMIT_SNB,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE | TRL_LIMIT1,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features hsw_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+ .cst_limit = CST_LIMIT_HSW,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
+ .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features hsx_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+ .cst_limit = CST_LIMIT_HSW,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2,
+ .plr_msrs = PLR_CORE | PLR_RING,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+ .has_fixed_rapl_unit = 1,
+};
+
+static const struct platform_features hswl_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+ .cst_limit = CST_LIMIT_HSW,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
+ .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features hswg_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+ .cst_limit = CST_LIMIT_HSW,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
+ .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features bdw_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+ .cst_limit = CST_LIMIT_HSW,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features bdwg_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+ .cst_limit = CST_LIMIT_HSW,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features bdx_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | PC2 | PC3 | PC6,
+ .cst_limit = CST_LIMIT_HSW,
+ .has_irtl_msrs = 1,
+ .has_cst_auto_convension = 1,
+ .trl_msrs = TRL_BASE,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+ .has_fixed_rapl_unit = 1,
+};
+
+static const struct platform_features skl_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .crystal_freq = 24000000,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+ .cst_limit = CST_LIMIT_HSW,
+ .has_irtl_msrs = 1,
+ .has_ext_cst_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .tcc_offset_bits = 6,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
+ .enable_tsc_tweak = 1,
+};
+
+static const struct platform_features cnl_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+ .cst_limit = CST_LIMIT_HSW,
+ .has_irtl_msrs = 1,
+ .has_msr_core_c1_res = 1,
+ .has_ext_cst_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .tcc_offset_bits = 6,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
+ .enable_tsc_tweak = 1,
+};
+
+static const struct platform_features adl_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10,
+ .cst_limit = CST_LIMIT_HSW,
+ .has_irtl_msrs = 1,
+ .has_msr_core_c1_res = 1,
+ .has_ext_cst_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .tcc_offset_bits = 6,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
+ .enable_tsc_tweak = 1,
+};
+
+static const struct platform_features skx_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC6 | PC2 | PC6,
+ .cst_limit = CST_LIMIT_SKX,
+ .has_irtl_msrs = 1,
+ .has_cst_auto_convension = 1,
+ .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+ .has_fixed_rapl_unit = 1,
+};
+
+static const struct platform_features icx_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC6 | PC2 | PC6,
+ .cst_limit = CST_LIMIT_ICX,
+ .has_irtl_msrs = 1,
+ .has_cst_prewake_bit = 1,
+ .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+ .has_fixed_rapl_unit = 1,
+};
+
+static const struct platform_features spr_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC6 | PC2 | PC6,
+ .cst_limit = CST_LIMIT_SKX,
+ .has_msr_core_c1_res = 1,
+ .has_irtl_msrs = 1,
+ .has_cst_prewake_bit = 1,
+ .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features srf_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC6 | PC2 | PC6,
+ .cst_limit = CST_LIMIT_SKX,
+ .has_msr_core_c1_res = 1,
+ .has_msr_module_c6_res_ms = 1,
+ .has_irtl_msrs = 1,
+ .has_cst_prewake_bit = 1,
+ .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features grr_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC6,
+ .cst_limit = CST_LIMIT_SKX,
+ .has_msr_core_c1_res = 1,
+ .has_msr_module_c6_res_ms = 1,
+ .has_irtl_msrs = 1,
+ .has_cst_prewake_bit = 1,
+ .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features slv_features = {
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_SLV,
+ .supported_cstates = CC1 | CC6 | PC6,
+ .cst_limit = CST_LIMIT_SLV,
+ .has_msr_core_c1_res = 1,
+ .has_msr_module_c6_res_ms = 1,
+ .has_msr_c6_demotion_policy_config = 1,
+ .has_msr_atom_pkg_c6_residency = 1,
+ .trl_msrs = TRL_ATOM,
+ .rapl_msrs = RAPL_PKG | RAPL_CORE,
+ .has_rapl_divisor = 1,
+ .rapl_quirk_tdp = 30,
+};
+
+static const struct platform_features slvd_features = {
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_SLV,
+ .supported_cstates = CC1 | CC6 | PC3 | PC6,
+ .cst_limit = CST_LIMIT_SLV,
+ .has_msr_atom_pkg_c6_residency = 1,
+ .trl_msrs = TRL_BASE,
+ .rapl_msrs = RAPL_PKG | RAPL_CORE,
+ .rapl_quirk_tdp = 30,
+};
+
+static const struct platform_features amt_features = {
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_133MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
+ .cst_limit = CST_LIMIT_AMT,
+ .trl_msrs = TRL_BASE,
+};
+
+static const struct platform_features gmt_features = {
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .crystal_freq = 19200000,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+ .cst_limit = CST_LIMIT_GMT,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+ .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features gmtd_features = {
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .crystal_freq = 25000000,
+ .supported_cstates = CC1 | CC6 | PC2 | PC6,
+ .cst_limit = CST_LIMIT_GMT,
+ .has_irtl_msrs = 1,
+ .has_msr_core_c1_res = 1,
+ .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS,
+};
+
+static const struct platform_features gmtp_features = {
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .crystal_freq = 19200000,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+ .cst_limit = CST_LIMIT_GMT,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features tmt_features = {
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+ .cst_limit = CST_LIMIT_GMT,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
+ .enable_tsc_tweak = 1,
+};
+
+static const struct platform_features tmtd_features = {
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC6,
+ .cst_limit = CST_LIMIT_GMT,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+ .rapl_msrs = RAPL_PKG_ALL,
+};
+
+static const struct platform_features knl_features = {
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC6 | PC3 | PC6,
+ .cst_limit = CST_LIMIT_KNL,
+ .has_msr_knl_core_c6_residency = 1,
+ .trl_msrs = TRL_KNL,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+ .has_fixed_rapl_unit = 1,
+ .need_perf_multiplier = 1,
+};
+
+static const struct platform_features default_features = {
+};
+
+static const struct platform_features amd_features_with_rapl = {
+ .rapl_msrs = RAPL_AMD_F17H,
+ .has_per_core_rapl = 1,
+ .rapl_quirk_tdp = 280, /* This is the max stock TDP of HEDT/Server Fam17h+ chips */
+};
+
+static const struct platform_data turbostat_pdata[] = {
+ { INTEL_FAM6_NEHALEM, &nhm_features },
+ { INTEL_FAM6_NEHALEM_G, &nhm_features },
+ { INTEL_FAM6_NEHALEM_EP, &nhm_features },
+ { INTEL_FAM6_NEHALEM_EX, &nhx_features },
+ { INTEL_FAM6_WESTMERE, &nhm_features },
+ { INTEL_FAM6_WESTMERE_EP, &nhm_features },
+ { INTEL_FAM6_WESTMERE_EX, &nhx_features },
+ { INTEL_FAM6_SANDYBRIDGE, &snb_features },
+ { INTEL_FAM6_SANDYBRIDGE_X, &snx_features },
+ { INTEL_FAM6_IVYBRIDGE, &ivb_features },
+ { INTEL_FAM6_IVYBRIDGE_X, &ivx_features },
+ { INTEL_FAM6_HASWELL, &hsw_features },
+ { INTEL_FAM6_HASWELL_X, &hsx_features },
+ { INTEL_FAM6_HASWELL_L, &hswl_features },
+ { INTEL_FAM6_HASWELL_G, &hswg_features },
+ { INTEL_FAM6_BROADWELL, &bdw_features },
+ { INTEL_FAM6_BROADWELL_G, &bdwg_features },
+ { INTEL_FAM6_BROADWELL_X, &bdx_features },
+ { INTEL_FAM6_BROADWELL_D, &bdx_features },
+ { INTEL_FAM6_SKYLAKE_L, &skl_features },
+ { INTEL_FAM6_SKYLAKE, &skl_features },
+ { INTEL_FAM6_SKYLAKE_X, &skx_features },
+ { INTEL_FAM6_KABYLAKE_L, &skl_features },
+ { INTEL_FAM6_KABYLAKE, &skl_features },
+ { INTEL_FAM6_COMETLAKE, &skl_features },
+ { INTEL_FAM6_COMETLAKE_L, &skl_features },
+ { INTEL_FAM6_CANNONLAKE_L, &cnl_features },
+ { INTEL_FAM6_ICELAKE_X, &icx_features },
+ { INTEL_FAM6_ICELAKE_D, &icx_features },
+ { INTEL_FAM6_ICELAKE_L, &cnl_features },
+ { INTEL_FAM6_ICELAKE_NNPI, &cnl_features },
+ { INTEL_FAM6_ROCKETLAKE, &cnl_features },
+ { INTEL_FAM6_TIGERLAKE_L, &cnl_features },
+ { INTEL_FAM6_TIGERLAKE, &cnl_features },
+ { INTEL_FAM6_SAPPHIRERAPIDS_X, &spr_features },
+ { INTEL_FAM6_EMERALDRAPIDS_X, &spr_features },
+ { INTEL_FAM6_GRANITERAPIDS_X, &spr_features },
+ { INTEL_FAM6_LAKEFIELD, &cnl_features },
+ { INTEL_FAM6_ALDERLAKE, &adl_features },
+ { INTEL_FAM6_ALDERLAKE_L, &adl_features },
+ { INTEL_FAM6_RAPTORLAKE, &adl_features },
+ { INTEL_FAM6_RAPTORLAKE_P, &adl_features },
+ { INTEL_FAM6_RAPTORLAKE_S, &adl_features },
+ { INTEL_FAM6_METEORLAKE, &cnl_features },
+ { INTEL_FAM6_METEORLAKE_L, &cnl_features },
+ { INTEL_FAM6_ARROWLAKE, &cnl_features },
+ { INTEL_FAM6_LUNARLAKE_M, &cnl_features },
+ { INTEL_FAM6_ATOM_SILVERMONT, &slv_features },
+ { INTEL_FAM6_ATOM_SILVERMONT_D, &slvd_features },
+ { INTEL_FAM6_ATOM_AIRMONT, &amt_features },
+ { INTEL_FAM6_ATOM_GOLDMONT, &gmt_features },
+ { INTEL_FAM6_ATOM_GOLDMONT_D, &gmtd_features },
+ { INTEL_FAM6_ATOM_GOLDMONT_PLUS, &gmtp_features },
+ { INTEL_FAM6_ATOM_TREMONT_D, &tmtd_features },
+ { INTEL_FAM6_ATOM_TREMONT, &tmt_features },
+ { INTEL_FAM6_ATOM_TREMONT_L, &tmt_features },
+ { INTEL_FAM6_ATOM_GRACEMONT, &adl_features },
+ { INTEL_FAM6_ATOM_CRESTMONT_X, &srf_features },
+ { INTEL_FAM6_ATOM_CRESTMONT, &grr_features },
+ { INTEL_FAM6_XEON_PHI_KNL, &knl_features },
+ { INTEL_FAM6_XEON_PHI_KNM, &knl_features },
+ /*
+ * Missing support for
+ * INTEL_FAM6_ICELAKE
+ * INTEL_FAM6_ATOM_SILVERMONT_MID
+ * INTEL_FAM6_ATOM_AIRMONT_MID
+ * INTEL_FAM6_ATOM_AIRMONT_NP
+ */
+ { 0, NULL },
+};
+
+static const struct platform_features *platform;
+
+void probe_platform_features(unsigned int family, unsigned int model)
+{
+ int i;
+
+ platform = &default_features;
+
+ if (authentic_amd || hygon_genuine) {
+ if (max_extended_level >= 0x80000007) {
+ unsigned int eax, ebx, ecx, edx;
+
+ __cpuid(0x80000007, eax, ebx, ecx, edx);
+ /* RAPL (Fam 17h+) */
+ if ((edx & (1 << 14)) && family >= 0x17)
+ platform = &amd_features_with_rapl;
+ }
+ return;
+ }
+
+ if (!genuine_intel || family != 6)
+ return;
+
+ for (i = 0; turbostat_pdata[i].features; i++) {
+ if (turbostat_pdata[i].model == model) {
+ platform = turbostat_pdata[i].features;
+ return;
+ }
+ }
+}
+
+/* Model specific support End */
+
#define TJMAX_DEFAULT 100
/* MSRs that are not yet in the kernel-provided header. */
@@ -333,8 +942,8 @@ int backwards_count;
char *progname;
#define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */
-cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
-size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
+cpu_set_t *cpu_present_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset;
+size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size;
#define MAX_ADDED_COUNTERS 8
#define MAX_ADDED_THREAD_COUNTERS 24
#define BITMASK_SIZE 32
@@ -355,12 +964,11 @@ struct thread_data {
unsigned int x2apic_id;
unsigned int flags;
bool is_atom;
-#define CPU_IS_FIRST_THREAD_IN_CORE 0x2
-#define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4
unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
} *thread_even, *thread_odd;
struct core_data {
+ int base_cpu;
unsigned long long c3;
unsigned long long c6;
unsigned long long c7;
@@ -373,6 +981,7 @@ struct core_data {
} *core_even, *core_odd;
struct pkg_data {
+ int base_cpu;
unsigned long long pc2;
unsigned long long pc3;
unsigned long long pc6;
@@ -456,7 +1065,7 @@ off_t idx_to_offset(int idx)
switch (idx) {
case IDX_PKG_ENERGY:
- if (do_rapl & RAPL_AMD_F17H)
+ if (platform->rapl_msrs & RAPL_AMD_F17H)
offset = MSR_PKG_ENERGY_STAT;
else
offset = MSR_PKG_ENERGY_STATUS;
@@ -516,17 +1125,17 @@ int idx_valid(int idx)
{
switch (idx) {
case IDX_PKG_ENERGY:
- return do_rapl & (RAPL_PKG | RAPL_AMD_F17H);
+ return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H);
case IDX_DRAM_ENERGY:
- return do_rapl & RAPL_DRAM;
+ return platform->rapl_msrs & RAPL_DRAM;
case IDX_PP0_ENERGY:
- return do_rapl & RAPL_CORES_ENERGY_STATUS;
+ return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS;
case IDX_PP1_ENERGY:
- return do_rapl & RAPL_GFX;
+ return platform->rapl_msrs & RAPL_GFX;
case IDX_PKG_PERF:
- return do_rapl & RAPL_PKG_PERF_STATUS;
+ return platform->rapl_msrs & RAPL_PKG_PERF_STATUS;
case IDX_DRAM_PERF:
- return do_rapl & RAPL_DRAM_PERF_STATUS;
+ return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS;
default:
return 0;
}
@@ -563,6 +1172,9 @@ struct topo_params {
int num_die;
int num_cpus;
int num_cores;
+ int allowed_packages;
+ int allowed_cpus;
+ int allowed_cores;
int max_cpu_num;
int max_node_num;
int nodes_per_pkg;
@@ -575,7 +1187,7 @@ struct timeval tv_even, tv_odd, tv_delta;
int *irq_column_2_cpu; /* /proc/interrupts column numbers */
int *irqs_per_cpu; /* indexed by cpu_num */
-void setup_all_buffers(void);
+void setup_all_buffers(bool startup);
char *sys_lpi_file;
char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
@@ -586,6 +1198,11 @@ int cpu_is_not_present(int cpu)
return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
}
+int cpu_is_not_allowed(int cpu)
+{
+ return !CPU_ISSET_S(cpu, cpu_allowed_setsize, cpu_allowed_set);
+}
+
/*
* run func(thread, core, package) in topology order
* skip non-present cpus
@@ -603,10 +1220,9 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk
struct thread_data *t;
struct core_data *c;
struct pkg_data *p;
-
t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
- if (cpu_is_not_present(t->cpu_id))
+ if (cpu_is_not_allowed(t->cpu_id))
continue;
c = GET_CORE(core_base, core_no, node_no, pkg_no);
@@ -622,6 +1238,25 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk
return 0;
}
+int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ UNUSED(p);
+
+ return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0);
+}
+
+int is_cpu_first_core_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ UNUSED(c);
+
+ return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0);
+}
+
+int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p);
+}
+
int cpu_migrate(int cpu)
{
CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
@@ -904,11 +1539,11 @@ void print_header(char *delim)
if (DO_BIC(BIC_CORE_THROT_CNT))
outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : ""));
- if (do_rapl && !rapl_joules) {
- if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
+ if (platform->rapl_msrs && !rapl_joules) {
+ if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
- } else if (do_rapl && rapl_joules) {
- if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
+ } else if (platform->rapl_msrs && rapl_joules) {
+ if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
}
@@ -966,10 +1601,10 @@ void print_header(char *delim)
if (DO_BIC(BIC_SYS_LPI))
outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
- if (do_rapl && !rapl_joules) {
+ if (platform->rapl_msrs && !rapl_joules) {
if (DO_BIC(BIC_PkgWatt))
outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
- if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
+ if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
if (DO_BIC(BIC_GFXWatt))
outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
@@ -979,10 +1614,10 @@ void print_header(char *delim)
outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
if (DO_BIC(BIC_RAM__))
outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
- } else if (do_rapl && rapl_joules) {
+ } else if (platform->rapl_msrs && rapl_joules) {
if (DO_BIC(BIC_Pkg_J))
outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
- if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
+ if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
if (DO_BIC(BIC_GFX_J))
outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
@@ -1106,11 +1741,11 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
int printed = 0;
/* if showing only 1st thread in core and this isn't one, bail out */
- if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+ if (show_core_only && !is_cpu_first_thread_in_core(t, c, p))
return 0;
/* if showing only 1st thread in pkg and this isn't one, bail out */
- if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ if (show_pkg_only && !is_cpu_first_core_in_package(t, c, p))
return 0;
/*if not summary line and --cpu is used */
@@ -1244,7 +1879,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc);
/* print per-core data only for 1st thread in core */
- if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+ if (!is_cpu_first_thread_in_core(t, c, p))
goto done;
if (DO_BIC(BIC_CPU_c3))
@@ -1284,14 +1919,14 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
fmt8 = "%s%.2f";
- if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
+ if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
- if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
+ if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
/* print per-package data only for 1st core in package */
- if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ if (!is_cpu_first_core_in_package(t, c, p))
goto done;
/* PkgTmp */
@@ -1352,7 +1987,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
- if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
+ if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
if (DO_BIC(BIC_GFXWatt))
@@ -1364,7 +1999,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
p->energy_dram * rapl_dram_energy_units / interval_float);
if (DO_BIC(BIC_Pkg_J))
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
- if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
+ if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
if (DO_BIC(BIC_GFX_J))
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
@@ -1527,7 +2162,7 @@ void delta_core(struct core_data *new, struct core_data *old)
int soft_c1_residency_display(int bic)
{
- if (!DO_BIC(BIC_CPU_c1) || use_c1_residency_msr)
+ if (!DO_BIC(BIC_CPU_c1) || platform->has_msr_core_c1_res)
return 0;
return DO_BIC_READ(bic);
@@ -1567,7 +2202,8 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d
old->c1 = new->c1 - old->c1;
- if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) {
+ if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
+ || soft_c1_residency_display(BIC_Avg_MHz)) {
if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
old->aperf = new->aperf - old->aperf;
old->mperf = new->mperf - old->mperf;
@@ -1576,7 +2212,7 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d
}
}
- if (use_c1_residency_msr) {
+ if (platform->has_msr_core_c1_res) {
/*
* Some models have a dedicated C1 residency MSR,
* which should be more accurate than the derivation below.
@@ -1626,7 +2262,7 @@ int delta_cpu(struct thread_data *t, struct core_data *c,
int retval = 0;
/* calculate core delta only for 1st thread in core */
- if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
+ if (is_cpu_first_thread_in_core(t, c, p))
delta_core(c, c2);
/* always calculate thread delta */
@@ -1635,7 +2271,7 @@ int delta_cpu(struct thread_data *t, struct core_data *c,
return retval;
/* calculate package delta only for 1st core in package */
- if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
+ if (is_cpu_first_core_in_package(t, c, p))
retval = delta_package(p, p2);
return retval;
@@ -1663,9 +2299,6 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
t->irq_count = 0;
t->smi_count = 0;
- /* tells format_counters to dump all fields from this set */
- t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
-
c->c3 = 0;
c->c6 = 0;
c->c7 = 0;
@@ -1749,7 +2382,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
}
/* sum per-core values only for 1st thread in core */
- if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+ if (!is_cpu_first_thread_in_core(t, c, p))
return 0;
average.cores.c3 += c->c3;
@@ -1769,7 +2402,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
}
/* sum per-pkg values only for 1st core in pkg */
- if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ if (!is_cpu_first_core_in_package(t, c, p))
return 0;
if (DO_BIC(BIC_Totl_c0))
@@ -1834,40 +2467,40 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data
/* Use the global time delta for the average. */
average.threads.tv_delta = tv_delta;
- average.threads.tsc /= topo.num_cpus;
- average.threads.aperf /= topo.num_cpus;
- average.threads.mperf /= topo.num_cpus;
- average.threads.instr_count /= topo.num_cpus;
- average.threads.c1 /= topo.num_cpus;
+ average.threads.tsc /= topo.allowed_cpus;
+ average.threads.aperf /= topo.allowed_cpus;
+ average.threads.mperf /= topo.allowed_cpus;
+ average.threads.instr_count /= topo.allowed_cpus;
+ average.threads.c1 /= topo.allowed_cpus;
if (average.threads.irq_count > 9999999)
sums_need_wide_columns = 1;
- average.cores.c3 /= topo.num_cores;
- average.cores.c6 /= topo.num_cores;
- average.cores.c7 /= topo.num_cores;
- average.cores.mc6_us /= topo.num_cores;
+ average.cores.c3 /= topo.allowed_cores;
+ average.cores.c6 /= topo.allowed_cores;
+ average.cores.c7 /= topo.allowed_cores;
+ average.cores.mc6_us /= topo.allowed_cores;
if (DO_BIC(BIC_Totl_c0))
- average.packages.pkg_wtd_core_c0 /= topo.num_packages;
+ average.packages.pkg_wtd_core_c0 /= topo.allowed_packages;
if (DO_BIC(BIC_Any_c0))
- average.packages.pkg_any_core_c0 /= topo.num_packages;
+ average.packages.pkg_any_core_c0 /= topo.allowed_packages;
if (DO_BIC(BIC_GFX_c0))
- average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
+ average.packages.pkg_any_gfxe_c0 /= topo.allowed_packages;
if (DO_BIC(BIC_CPUGFX))
- average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
+ average.packages.pkg_both_core_gfxe_c0 /= topo.allowed_packages;
- average.packages.pc2 /= topo.num_packages;
+ average.packages.pc2 /= topo.allowed_packages;
if (DO_BIC(BIC_Pkgpc3))
- average.packages.pc3 /= topo.num_packages;
+ average.packages.pc3 /= topo.allowed_packages;
if (DO_BIC(BIC_Pkgpc6))
- average.packages.pc6 /= topo.num_packages;
+ average.packages.pc6 /= topo.allowed_packages;
if (DO_BIC(BIC_Pkgpc7))
- average.packages.pc7 /= topo.num_packages;
+ average.packages.pc7 /= topo.allowed_packages;
- average.packages.pc8 /= topo.num_packages;
- average.packages.pc9 /= topo.num_packages;
- average.packages.pc10 /= topo.num_packages;
+ average.packages.pc8 /= topo.allowed_packages;
+ average.packages.pc9 /= topo.allowed_packages;
+ average.packages.pc10 /= topo.allowed_packages;
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
@@ -1877,7 +2510,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data
sums_need_wide_columns = 1;
continue;
}
- average.threads.counter[i] /= topo.num_cpus;
+ average.threads.counter[i] /= topo.allowed_cpus;
}
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
@@ -1886,7 +2519,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data
if (average.cores.counter[i] > 9999999)
sums_need_wide_columns = 1;
}
- average.cores.counter[i] /= topo.num_cores;
+ average.cores.counter[i] /= topo.allowed_cores;
}
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
@@ -1895,7 +2528,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data
if (average.packages.counter[i] > 9999999)
sums_need_wide_columns = 1;
}
- average.packages.counter[i] /= topo.num_packages;
+ average.packages.counter[i] /= topo.allowed_packages;
}
}
@@ -2092,7 +2725,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
retry:
t->tsc = rdtsc(); /* we are running on local CPU of interest */
- if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) {
+ if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
+ || soft_c1_residency_display(BIC_Avg_MHz)) {
unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
/*
@@ -2158,7 +2792,7 @@ retry:
return -5;
t->smi_count = msr & 0xFFFFFFFF;
}
- if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
+ if (DO_BIC(BIC_CPU_c1) && platform->has_msr_core_c1_res) {
if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
return -6;
}
@@ -2169,7 +2803,7 @@ retry:
}
/* collect core counters only for 1st thread in core */
- if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+ if (!is_cpu_first_thread_in_core(t, c, p))
goto done;
if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
@@ -2177,10 +2811,10 @@ retry:
return -6;
}
- if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !do_knl_cstates) {
+ if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !platform->has_msr_knl_core_c6_residency) {
if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
return -7;
- } else if (do_knl_cstates || soft_c1_residency_display(BIC_CPU_c6)) {
+ } else if (platform->has_msr_knl_core_c6_residency && soft_c1_residency_display(BIC_CPU_c6)) {
if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
return -7;
}
@@ -2212,7 +2846,7 @@ retry:
if (DO_BIC(BIC_CORE_THROT_CNT))
get_core_throt_cnt(cpu, &c->core_throt_cnt);
- if (do_rapl & RAPL_AMD_F17H) {
+ if (platform->rapl_msrs & RAPL_AMD_F17H) {
if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
return -14;
c->core_energy = msr & 0xFFFFFFFF;
@@ -2224,7 +2858,7 @@ retry:
}
/* collect package counters only for 1st core in package */
- if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ if (!is_cpu_first_core_in_package(t, c, p))
goto done;
if (DO_BIC(BIC_Totl_c0)) {
@@ -2247,7 +2881,7 @@ retry:
if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
return -9;
if (DO_BIC(BIC_Pkgpc6)) {
- if (do_slm_cstates) {
+ if (platform->has_msr_atom_pkg_c6_residency) {
if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
return -10;
} else {
@@ -2277,37 +2911,37 @@ retry:
if (DO_BIC(BIC_SYS_LPI))
p->sys_lpi = cpuidle_cur_sys_lpi_us;
- if (do_rapl & RAPL_PKG) {
+ if (platform->rapl_msrs & RAPL_PKG) {
if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
return -13;
p->energy_pkg = msr;
}
- if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
+ if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) {
if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
return -14;
p->energy_cores = msr;
}
- if (do_rapl & RAPL_DRAM) {
+ if (platform->rapl_msrs & RAPL_DRAM) {
if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
return -15;
p->energy_dram = msr;
}
- if (do_rapl & RAPL_GFX) {
+ if (platform->rapl_msrs & RAPL_GFX) {
if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
return -16;
p->energy_gfx = msr;
}
- if (do_rapl & RAPL_PKG_PERF_STATUS) {
+ if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS) {
if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
return -16;
p->rapl_pkg_perf_status = msr;
}
- if (do_rapl & RAPL_DRAM_PERF_STATUS) {
+ if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS) {
if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
return -16;
p->rapl_dram_perf_status = msr;
}
- if (do_rapl & RAPL_AMD_F17H) {
+ if (platform->rapl_msrs & RAPL_AMD_F17H) {
if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
return -13;
p->energy_pkg = msr;
@@ -2414,18 +3048,58 @@ int icx_pkg_cstate_limits[16] =
PCLRSV, PCLRSV
};
-static void calculate_tsc_tweak()
+void probe_cst_limit(void)
{
- tsc_tweak = base_hz / tsc_hz;
-}
+ unsigned long long msr;
+ int *pkg_cstate_limits;
+
+ if (!platform->has_nhm_msrs)
+ return;
+
+ switch (platform->cst_limit) {
+ case CST_LIMIT_NHM:
+ pkg_cstate_limits = nhm_pkg_cstate_limits;
+ break;
+ case CST_LIMIT_SNB:
+ pkg_cstate_limits = snb_pkg_cstate_limits;
+ break;
+ case CST_LIMIT_HSW:
+ pkg_cstate_limits = hsw_pkg_cstate_limits;
+ break;
+ case CST_LIMIT_SKX:
+ pkg_cstate_limits = skx_pkg_cstate_limits;
+ break;
+ case CST_LIMIT_ICX:
+ pkg_cstate_limits = icx_pkg_cstate_limits;
+ break;
+ case CST_LIMIT_SLV:
+ pkg_cstate_limits = slv_pkg_cstate_limits;
+ break;
+ case CST_LIMIT_AMT:
+ pkg_cstate_limits = amt_pkg_cstate_limits;
+ break;
+ case CST_LIMIT_KNL:
+ pkg_cstate_limits = phi_pkg_cstate_limits;
+ break;
+ case CST_LIMIT_GMT:
+ pkg_cstate_limits = glm_pkg_cstate_limits;
+ break;
+ default:
+ return;
+ }
-void prewake_cstate_probe(unsigned int family, unsigned int model);
+ get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
+ pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
+}
-static void dump_nhm_platform_info(void)
+static void dump_platform_info(void)
{
unsigned long long msr;
unsigned int ratio;
+ if (!platform->has_nhm_msrs)
+ return;
+
get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
@@ -2435,19 +3109,27 @@ static void dump_nhm_platform_info(void)
ratio = (msr >> 8) & 0xFF;
fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
+}
+
+static void dump_power_ctl(void)
+{
+ unsigned long long msr;
+
+ if (!platform->has_nhm_msrs)
+ return;
get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
/* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */
- if (dis_cstate_prewake)
+ if (platform->has_cst_prewake_bit)
fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN");
return;
}
-static void dump_hsw_turbo_ratio_limits(void)
+static void dump_turbo_ratio_limit2(void)
{
unsigned long long msr;
unsigned int ratio;
@@ -2466,7 +3148,7 @@ static void dump_hsw_turbo_ratio_limits(void)
return;
}
-static void dump_ivt_turbo_ratio_limits(void)
+static void dump_turbo_ratio_limit1(void)
{
unsigned long long msr;
unsigned int ratio;
@@ -2509,29 +3191,7 @@ static void dump_ivt_turbo_ratio_limits(void)
return;
}
-int has_turbo_ratio_group_limits(int family, int model)
-{
-
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_ATOM_GOLDMONT:
- case INTEL_FAM6_SKYLAKE_X:
- case INTEL_FAM6_ICELAKE_X:
- case INTEL_FAM6_SAPPHIRERAPIDS_X:
- case INTEL_FAM6_ATOM_GOLDMONT_D:
- case INTEL_FAM6_ATOM_TREMONT_D:
- return 1;
- default:
- return 0;
- }
-}
-
-static void dump_turbo_ratio_limits(int trl_msr_offset, int family, int model)
+static void dump_turbo_ratio_limits(int trl_msr_offset)
{
unsigned long long msr, core_counts;
int shift;
@@ -2540,7 +3200,7 @@ static void dump_turbo_ratio_limits(int trl_msr_offset, int family, int model)
fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n",
base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY_" : "", msr);
- if (has_turbo_ratio_group_limits(family, model)) {
+ if (platform->trl_msrs & TRL_CORECOUNT) {
get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
} else {
@@ -2657,10 +3317,13 @@ static void dump_knl_turbo_ratio_limits(void)
ratio[i], bclk, ratio[i] * bclk, cores[i]);
}
-static void dump_nhm_cst_cfg(void)
+static void dump_cst_cfg(void)
{
unsigned long long msr;
+ if (!platform->has_nhm_msrs)
+ return;
+
get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
@@ -2673,7 +3336,7 @@ static void dump_nhm_cst_cfg(void)
(msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]);
#define AUTOMATIC_CSTATE_CONVERSION (1UL << 16)
- if (has_automatic_cstate_conversion) {
+ if (platform->has_cst_auto_convension) {
fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
}
@@ -2730,39 +3393,50 @@ void print_irtl(void)
{
unsigned long long msr;
- get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
- fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
- fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
- (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
-
- get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
- fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
- fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
- (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ if (!platform->has_irtl_msrs)
+ return;
- get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
- fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
- fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
- (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ if (platform->supported_cstates & PC3) {
+ get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ }
- if (!do_irtl_hsw)
- return;
+ if (platform->supported_cstates & PC6) {
+ get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ }
- get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
- fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
- fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
- (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ if (platform->supported_cstates & PC7) {
+ get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ }
- get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
- fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
- fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
- (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ if (platform->supported_cstates & PC8) {
+ get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ }
- get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
- fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
- fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
- (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ if (platform->supported_cstates & PC9) {
+ get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ }
+ if (platform->supported_cstates & PC10) {
+ get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ }
}
void free_fd_percpu(void)
@@ -2785,6 +3459,14 @@ void free_all_buffers(void)
cpu_present_set = NULL;
cpu_present_setsize = 0;
+ CPU_FREE(cpu_effective_set);
+ cpu_effective_set = NULL;
+ cpu_effective_setsize = 0;
+
+ CPU_FREE(cpu_allowed_set);
+ cpu_allowed_set = NULL;
+ cpu_allowed_setsize = 0;
+
CPU_FREE(cpu_affinity_set);
cpu_affinity_set = NULL;
cpu_affinity_setsize = 0;
@@ -2927,6 +3609,59 @@ int get_physical_node_id(struct cpu_topology *thiscpu)
return -1;
}
+static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size)
+{
+ unsigned int start, end;
+ char *next = cpu_str;
+
+ while (next && *next) {
+
+ if (*next == '-') /* no negative cpu numbers */
+ return 1;
+
+ start = strtoul(next, &next, 10);
+
+ if (start >= CPU_SUBSET_MAXCPUS)
+ return 1;
+ CPU_SET_S(start, cpu_set_size, cpu_set);
+
+ if (*next == '\0' || *next == '\n')
+ break;
+
+ if (*next == ',') {
+ next += 1;
+ continue;
+ }
+
+ if (*next == '-') {
+ next += 1; /* start range */
+ } else if (*next == '.') {
+ next += 1;
+ if (*next == '.')
+ next += 1; /* start range */
+ else
+ return 1;
+ }
+
+ end = strtoul(next, &next, 10);
+ if (end <= start)
+ return 1;
+
+ while (++start <= end) {
+ if (start >= CPU_SUBSET_MAXCPUS)
+ return 1;
+ CPU_SET_S(start, cpu_set_size, cpu_set);
+ }
+
+ if (*next == ',')
+ next += 1;
+ else if (*next != '\0' && *next != '\n')
+ return 1;
+ }
+
+ return 0;
+}
+
int get_thread_siblings(struct cpu_topology *thiscpu)
{
char path[80], character;
@@ -2998,7 +3733,7 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
- if (cpu_is_not_present(t->cpu_id))
+ if (cpu_is_not_allowed(t->cpu_id))
continue;
t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no);
@@ -3050,11 +3785,51 @@ int for_all_proc_cpus(int (func) (int))
return 0;
}
+#define PATH_EFFECTIVE_CPUS "/sys/fs/cgroup/cpuset.cpus.effective"
+
+static char cpu_effective_str[1024];
+
+static int update_effective_str(bool startup)
+{
+ FILE *fp;
+ char *pos;
+ char buf[1024];
+ int ret;
+
+ if (cpu_effective_str[0] == '\0' && !startup)
+ return 0;
+
+ fp = fopen(PATH_EFFECTIVE_CPUS, "r");
+ if (!fp)
+ return 0;
+
+ pos = fgets(buf, 1024, fp);
+ if (!pos)
+ err(1, "%s: file read failed\n", PATH_EFFECTIVE_CPUS);
+
+ fclose(fp);
+
+ ret = strncmp(cpu_effective_str, buf, 1024);
+ if (!ret)
+ return 0;
+
+ strncpy(cpu_effective_str, buf, 1024);
+ return 1;
+}
+
+static void update_effective_set(bool startup)
+{
+ update_effective_str(startup);
+
+ if (parse_cpu_str(cpu_effective_str, cpu_effective_set, cpu_effective_setsize))
+ err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str);
+}
+
void re_initialize(void)
{
free_all_buffers();
- setup_all_buffers();
- fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
+ setup_all_buffers(false);
+ fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus);
}
void set_max_cpu_num(void)
@@ -3191,8 +3966,8 @@ int snapshot_gfx_rc6_ms(void)
/*
* snapshot_gfx_mhz()
*
- * record snapshot of
- * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
+ * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
+ * when /sys/class/drm/card0/gt_cur_freq_mhz is not available.
*
* return 1 if config change requires a restart, else return 0
*/
@@ -3201,9 +3976,11 @@ int snapshot_gfx_mhz(void)
static FILE *fp;
int retval;
- if (fp == NULL)
- fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
- else {
+ if (fp == NULL) {
+ fp = fopen("/sys/class/drm/card0/gt_cur_freq_mhz", "r");
+ if (!fp)
+ fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
+ } else {
rewind(fp);
fflush(fp);
}
@@ -3218,8 +3995,8 @@ int snapshot_gfx_mhz(void)
/*
* snapshot_gfx_cur_mhz()
*
- * record snapshot of
- * /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
+ * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
+ * when /sys/class/drm/card0/gt_act_freq_mhz is not available.
*
* return 1 if config change requires a restart, else return 0
*/
@@ -3228,9 +4005,11 @@ int snapshot_gfx_act_mhz(void)
static FILE *fp;
int retval;
- if (fp == NULL)
- fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
- else {
+ if (fp == NULL) {
+ fp = fopen("/sys/class/drm/card0/gt_act_freq_mhz", "r");
+ if (!fp)
+ fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
+ } else {
rewind(fp);
fflush(fp);
}
@@ -3562,6 +4341,10 @@ restart:
re_initialize();
goto restart;
}
+ if (update_effective_str(false)) {
+ re_initialize();
+ goto restart;
+ }
do_sleep();
if (snapshot_proc_sysfs_files())
goto restart;
@@ -3674,395 +4457,31 @@ void check_permissions(void)
exit(-6);
}
-/*
- * NHM adds support for additional MSRs:
- *
- * MSR_SMI_COUNT 0x00000034
- *
- * MSR_PLATFORM_INFO 0x000000ce
- * MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
- *
- * MSR_MISC_PWR_MGMT 0x000001aa
- *
- * MSR_PKG_C3_RESIDENCY 0x000003f8
- * MSR_PKG_C6_RESIDENCY 0x000003f9
- * MSR_CORE_C3_RESIDENCY 0x000003fc
- * MSR_CORE_C6_RESIDENCY 0x000003fd
- *
- * Side effect:
- * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
- * sets has_misc_feature_control
- */
-int probe_nhm_msrs(unsigned int family, unsigned int model)
+void probe_bclk(void)
{
unsigned long long msr;
unsigned int base_ratio;
- int *pkg_cstate_limits;
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- bclk = discover_bclk(family, model);
+ if (!platform->has_nhm_msrs)
+ return;
- switch (model) {
- case INTEL_FAM6_NEHALEM: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
- case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */
- pkg_cstate_limits = nhm_pkg_cstate_limits;
- break;
- case INTEL_FAM6_SANDYBRIDGE: /* SNB */
- case INTEL_FAM6_SANDYBRIDGE_X: /* SNB Xeon */
- case INTEL_FAM6_IVYBRIDGE: /* IVB */
- case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */
- pkg_cstate_limits = snb_pkg_cstate_limits;
- has_misc_feature_control = 1;
- break;
- case INTEL_FAM6_HASWELL: /* HSW */
- case INTEL_FAM6_HASWELL_G: /* HSW */
- case INTEL_FAM6_HASWELL_X: /* HSX */
- case INTEL_FAM6_HASWELL_L: /* HSW */
- case INTEL_FAM6_BROADWELL: /* BDW */
- case INTEL_FAM6_BROADWELL_G: /* BDW */
- case INTEL_FAM6_BROADWELL_X: /* BDX */
- case INTEL_FAM6_SKYLAKE_L: /* SKL */
- case INTEL_FAM6_CANNONLAKE_L: /* CNL */
- pkg_cstate_limits = hsw_pkg_cstate_limits;
- has_misc_feature_control = 1;
- break;
- case INTEL_FAM6_SKYLAKE_X: /* SKX */
- case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */
- pkg_cstate_limits = skx_pkg_cstate_limits;
- has_misc_feature_control = 1;
- break;
- case INTEL_FAM6_ICELAKE_X: /* ICX */
- pkg_cstate_limits = icx_pkg_cstate_limits;
- has_misc_feature_control = 1;
- break;
- case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */
- no_MSR_MISC_PWR_MGMT = 1;
- /* FALLTHRU */
- case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */
- pkg_cstate_limits = slv_pkg_cstate_limits;
- break;
- case INTEL_FAM6_ATOM_AIRMONT: /* AMT */
- pkg_cstate_limits = amt_pkg_cstate_limits;
- no_MSR_MISC_PWR_MGMT = 1;
- break;
- case INTEL_FAM6_XEON_PHI_KNL: /* PHI */
- pkg_cstate_limits = phi_pkg_cstate_limits;
- break;
- case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
- case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
- case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
- case INTEL_FAM6_ATOM_TREMONT: /* EHL */
- case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
- pkg_cstate_limits = glm_pkg_cstate_limits;
- break;
- default:
- return 0;
- }
- get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
- pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
+ if (platform->bclk_freq == BCLK_100MHZ)
+ bclk = 100.00;
+ else if (platform->bclk_freq == BCLK_133MHZ)
+ bclk = 133.33;
+ else if (platform->bclk_freq == BCLK_SLV)
+ bclk = slm_bclk();
+ else
+ return;
get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
base_ratio = (msr >> 8) & 0xFF;
base_hz = base_ratio * bclk * 1000000;
has_base_hz = 1;
- return 1;
-}
-
-/*
- * SLV client has support for unique MSRs:
- *
- * MSR_CC6_DEMOTION_POLICY_CONFIG
- * MSR_MC6_DEMOTION_POLICY_CONFIG
- */
-
-int has_slv_msrs(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_ATOM_SILVERMONT:
- case INTEL_FAM6_ATOM_SILVERMONT_MID:
- case INTEL_FAM6_ATOM_AIRMONT_MID:
- return 1;
- }
- return 0;
-}
-
-int is_dnv(unsigned int family, unsigned int model)
-{
-
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_ATOM_GOLDMONT_D:
- return 1;
- }
- return 0;
-}
-
-int is_bdx(unsigned int family, unsigned int model)
-{
-
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_BROADWELL_X:
- return 1;
- }
- return 0;
-}
-
-int is_skx(unsigned int family, unsigned int model)
-{
-
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_SKYLAKE_X:
- return 1;
- }
- return 0;
-}
-
-int is_icx(unsigned int family, unsigned int model)
-{
-
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_ICELAKE_X:
- return 1;
- }
- return 0;
-}
-
-int is_spr(unsigned int family, unsigned int model)
-{
-
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_SAPPHIRERAPIDS_X:
- return 1;
- }
- return 0;
-}
-
-int is_ehl(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_ATOM_TREMONT:
- return 1;
- }
- return 0;
-}
-
-int is_jvl(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_ATOM_TREMONT_D:
- return 1;
- }
- return 0;
-}
-
-int has_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
- if (has_slv_msrs(family, model))
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- /* Nehalem compatible, but do not include turbo-ratio limit support */
- case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */
- case INTEL_FAM6_XEON_PHI_KNL: /* PHI - Knights Landing (different MSR definition) */
- return 0;
- default:
- return 1;
- }
-}
-
-int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
- if (has_slv_msrs(family, model))
- return 1;
-
- return 0;
-}
-
-int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */
- case INTEL_FAM6_HASWELL_X: /* HSW Xeon */
- return 1;
- default:
- return 0;
- }
-}
-
-int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_HASWELL_X: /* HSW Xeon */
- return 1;
- default:
- return 0;
- }
-}
-
-int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */
- return 1;
- default:
- return 0;
- }
-}
-
-int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_ATOM_GOLDMONT:
- case INTEL_FAM6_SKYLAKE_X:
- case INTEL_FAM6_ICELAKE_X:
- case INTEL_FAM6_SAPPHIRERAPIDS_X:
- return 1;
- default:
- return 0;
- }
-}
-
-int has_config_tdp(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_IVYBRIDGE: /* IVB */
- case INTEL_FAM6_HASWELL: /* HSW */
- case INTEL_FAM6_HASWELL_X: /* HSX */
- case INTEL_FAM6_HASWELL_L: /* HSW */
- case INTEL_FAM6_HASWELL_G: /* HSW */
- case INTEL_FAM6_BROADWELL: /* BDW */
- case INTEL_FAM6_BROADWELL_G: /* BDW */
- case INTEL_FAM6_BROADWELL_X: /* BDX */
- case INTEL_FAM6_SKYLAKE_L: /* SKL */
- case INTEL_FAM6_CANNONLAKE_L: /* CNL */
- case INTEL_FAM6_SKYLAKE_X: /* SKX */
- case INTEL_FAM6_ICELAKE_X: /* ICX */
- case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */
- case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */
- return 1;
- default:
- return 0;
- }
-}
-
-/*
- * tcc_offset_bits:
- * 0: Tcc Offset not supported (Default)
- * 6: Bit 29:24 of MSR_PLATFORM_INFO
- * 4: Bit 27:24 of MSR_PLATFORM_INFO
- */
-void check_tcc_offset(int model)
-{
- unsigned long long msr;
-
- if (!genuine_intel)
- return;
-
- switch (model) {
- case INTEL_FAM6_SKYLAKE_L:
- case INTEL_FAM6_SKYLAKE:
- case INTEL_FAM6_KABYLAKE_L:
- case INTEL_FAM6_KABYLAKE:
- case INTEL_FAM6_ICELAKE_L:
- case INTEL_FAM6_ICELAKE:
- case INTEL_FAM6_TIGERLAKE_L:
- case INTEL_FAM6_TIGERLAKE:
- case INTEL_FAM6_COMETLAKE:
- if (!get_msr(base_cpu, MSR_PLATFORM_INFO, &msr)) {
- msr = (msr >> 30) & 1;
- if (msr)
- tcc_offset_bits = 6;
- }
- return;
- default:
- return;
- }
+ if (platform->enable_tsc_tweak)
+ tsc_tweak = base_hz / tsc_hz;
}
static void remove_underbar(char *s)
@@ -4078,44 +4497,37 @@ static void remove_underbar(char *s)
*to = 0;
}
-static void dump_turbo_ratio_info(unsigned int family, unsigned int model)
+static void dump_turbo_ratio_info(void)
{
if (!has_turbo)
return;
- if (has_hsw_turbo_ratio_limit(family, model))
- dump_hsw_turbo_ratio_limits();
+ if (!platform->has_nhm_msrs)
+ return;
+
+ if (platform->trl_msrs & TRL_LIMIT2)
+ dump_turbo_ratio_limit2();
- if (has_ivt_turbo_ratio_limit(family, model))
- dump_ivt_turbo_ratio_limits();
+ if (platform->trl_msrs & TRL_LIMIT1)
+ dump_turbo_ratio_limit1();
- if (has_turbo_ratio_limit(family, model)) {
- dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT, family, model);
+ if (platform->trl_msrs & TRL_BASE) {
+ dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT);
if (is_hybrid)
- dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT, family, model);
+ dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT);
}
- if (has_atom_turbo_ratio_limit(family, model))
+ if (platform->trl_msrs & TRL_ATOM)
dump_atom_turbo_ratio_limits();
- if (has_knl_turbo_ratio_limit(family, model))
+ if (platform->trl_msrs & TRL_KNL)
dump_knl_turbo_ratio_limits();
- if (has_config_tdp(family, model))
+ if (platform->has_config_tdp)
dump_config_tdp();
}
-static void dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
-{
- if (!do_nhm_platform_info)
- return;
-
- dump_nhm_platform_info();
- dump_turbo_ratio_info(family, model);
- dump_nhm_cst_cfg();
-}
-
static int read_sysfs_int(char *path)
{
FILE *input;
@@ -4152,7 +4564,7 @@ static void dump_sysfs_file(char *path)
fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
}
-static void intel_uncore_frequency_probe(void)
+static void probe_intel_uncore_frequency(void)
{
int i, j;
char path[128];
@@ -4163,6 +4575,10 @@ static void intel_uncore_frequency_probe(void)
if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK))
return;
+ /* Cluster level sysfs not supported yet. */
+ if (!access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK))
+ return;
+
if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
BIC_PRESENT(BIC_UNCORE_MHZ);
@@ -4194,6 +4610,20 @@ static void intel_uncore_frequency_probe(void)
}
}
+static void probe_graphics(void)
+{
+ if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
+ BIC_PRESENT(BIC_GFX_rc6);
+
+ if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK) ||
+ !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
+ BIC_PRESENT(BIC_GFXMHz);
+
+ if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK) ||
+ !access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
+ BIC_PRESENT(BIC_GFXACTMHz);
+}
+
static void dump_sysfs_cstate_config(void)
{
char path[64];
@@ -4310,7 +4740,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
cpu = t->cpu_id;
/* EPB is per-package */
- if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ if (!is_cpu_first_thread_in_package(t, c, p))
return 0;
if (cpu_migrate(cpu)) {
@@ -4359,7 +4789,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
cpu = t->cpu_id;
/* MSR_HWP_CAPABILITIES is per-package */
- if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ if (!is_cpu_first_thread_in_package(t, c, p))
return 0;
if (cpu_migrate(cpu)) {
@@ -4442,7 +4872,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
cpu = t->cpu_id;
/* per-package */
- if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ if (!is_cpu_first_thread_in_package(t, c, p))
return 0;
if (cpu_migrate(cpu)) {
@@ -4450,7 +4880,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
return -1;
}
- if (do_core_perf_limit_reasons) {
+ if (platform->plr_msrs & PLR_CORE) {
get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
@@ -4483,7 +4913,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
(msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : "");
}
- if (do_gfx_perf_limit_reasons) {
+ if (platform->plr_msrs & PLR_GFX) {
get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
@@ -4503,7 +4933,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
(msr & 1 << 25) ? "GFXPwr, " : "",
(msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
}
- if (do_ring_perf_limit_reasons) {
+ if (platform->plr_msrs & PLR_RING) {
get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
fprintf(outf, " (Active: %s%s%s%s%s%s)",
@@ -4525,208 +4955,74 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
#define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */
#define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */
-double get_tdp_intel(unsigned int model)
+double get_quirk_tdp(void)
{
- unsigned long long msr;
+ if (platform->rapl_quirk_tdp)
+ return platform->rapl_quirk_tdp;
- if (do_rapl & RAPL_PKG_POWER_INFO)
- if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
- return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
-
- switch (model) {
- case INTEL_FAM6_ATOM_SILVERMONT:
- case INTEL_FAM6_ATOM_SILVERMONT_D:
- return 30.0;
- default:
- return 135.0;
- }
+ return 135.0;
}
-double get_tdp_amd(unsigned int family)
+double get_tdp_intel(void)
{
- UNUSED(family);
+ unsigned long long msr;
- /* This is the max stock TDP of HEDT/Server Fam17h+ chips */
- return 280.0;
+ if (platform->rapl_msrs & RAPL_PKG_POWER_INFO)
+ if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
+ return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
+ return get_quirk_tdp();
}
-/*
- * rapl_dram_energy_units_probe()
- * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
- */
-static double rapl_dram_energy_units_probe(int model, double rapl_energy_units)
+double get_tdp_amd(void)
{
- /* only called for genuine_intel, family 6 */
-
- switch (model) {
- case INTEL_FAM6_HASWELL_X: /* HSX */
- case INTEL_FAM6_BROADWELL_X: /* BDX */
- case INTEL_FAM6_SKYLAKE_X: /* SKX */
- case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
- case INTEL_FAM6_ICELAKE_X: /* ICX */
- return (rapl_dram_energy_units = 15.3 / 1000000);
- default:
- return (rapl_energy_units);
- }
+ return get_quirk_tdp();
}
-void rapl_probe_intel(unsigned int family, unsigned int model)
+void rapl_probe_intel(void)
{
unsigned long long msr;
unsigned int time_unit;
double tdp;
- if (family != 6)
- return;
-
- switch (model) {
- case INTEL_FAM6_SANDYBRIDGE:
- case INTEL_FAM6_IVYBRIDGE:
- case INTEL_FAM6_HASWELL: /* HSW */
- case INTEL_FAM6_HASWELL_L: /* HSW */
- case INTEL_FAM6_HASWELL_G: /* HSW */
- case INTEL_FAM6_BROADWELL: /* BDW */
- case INTEL_FAM6_BROADWELL_G: /* BDW */
- do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
- if (rapl_joules) {
- BIC_PRESENT(BIC_Pkg_J);
- BIC_PRESENT(BIC_Cor_J);
- BIC_PRESENT(BIC_GFX_J);
- } else {
- BIC_PRESENT(BIC_PkgWatt);
- BIC_PRESENT(BIC_CorWatt);
- BIC_PRESENT(BIC_GFXWatt);
- }
- break;
- case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
- case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
- do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
- if (rapl_joules)
- BIC_PRESENT(BIC_Pkg_J);
- else
- BIC_PRESENT(BIC_PkgWatt);
- break;
- case INTEL_FAM6_ATOM_TREMONT: /* EHL */
- do_rapl =
- RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS
- | RAPL_GFX | RAPL_PKG_POWER_INFO;
- if (rapl_joules) {
- BIC_PRESENT(BIC_Pkg_J);
- BIC_PRESENT(BIC_Cor_J);
- BIC_PRESENT(BIC_RAM_J);
- BIC_PRESENT(BIC_GFX_J);
- } else {
- BIC_PRESENT(BIC_PkgWatt);
- BIC_PRESENT(BIC_CorWatt);
- BIC_PRESENT(BIC_RAMWatt);
- BIC_PRESENT(BIC_GFXWatt);
- }
- break;
- case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
- do_rapl = RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
- BIC_PRESENT(BIC_PKG__);
- if (rapl_joules)
- BIC_PRESENT(BIC_Pkg_J);
- else
- BIC_PRESENT(BIC_PkgWatt);
- break;
- case INTEL_FAM6_SKYLAKE_L: /* SKL */
- case INTEL_FAM6_CANNONLAKE_L: /* CNL */
- do_rapl =
- RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS
- | RAPL_GFX | RAPL_PKG_POWER_INFO;
- BIC_PRESENT(BIC_PKG__);
- BIC_PRESENT(BIC_RAM__);
- if (rapl_joules) {
+ if (rapl_joules) {
+ if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
BIC_PRESENT(BIC_Pkg_J);
+ if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
BIC_PRESENT(BIC_Cor_J);
+ if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
BIC_PRESENT(BIC_RAM_J);
+ if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
BIC_PRESENT(BIC_GFX_J);
- } else {
+ } else {
+ if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
BIC_PRESENT(BIC_PkgWatt);
+ if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
BIC_PRESENT(BIC_CorWatt);
+ if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
BIC_PRESENT(BIC_RAMWatt);
+ if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
BIC_PRESENT(BIC_GFXWatt);
- }
- break;
- case INTEL_FAM6_HASWELL_X: /* HSX */
- case INTEL_FAM6_BROADWELL_X: /* BDX */
- case INTEL_FAM6_SKYLAKE_X: /* SKX */
- case INTEL_FAM6_ICELAKE_X: /* ICX */
- case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */
- case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
- do_rapl =
- RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
- RAPL_PKG_POWER_INFO;
- BIC_PRESENT(BIC_PKG__);
- BIC_PRESENT(BIC_RAM__);
- if (rapl_joules) {
- BIC_PRESENT(BIC_Pkg_J);
- BIC_PRESENT(BIC_RAM_J);
- } else {
- BIC_PRESENT(BIC_PkgWatt);
- BIC_PRESENT(BIC_RAMWatt);
- }
- break;
- case INTEL_FAM6_SANDYBRIDGE_X:
- case INTEL_FAM6_IVYBRIDGE_X:
- do_rapl =
- RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS |
- RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
- BIC_PRESENT(BIC_PKG__);
- BIC_PRESENT(BIC_RAM__);
- if (rapl_joules) {
- BIC_PRESENT(BIC_Pkg_J);
- BIC_PRESENT(BIC_Cor_J);
- BIC_PRESENT(BIC_RAM_J);
- } else {
- BIC_PRESENT(BIC_PkgWatt);
- BIC_PRESENT(BIC_CorWatt);
- BIC_PRESENT(BIC_RAMWatt);
- }
- break;
- case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */
- case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */
- do_rapl = RAPL_PKG | RAPL_CORES;
- if (rapl_joules) {
- BIC_PRESENT(BIC_Pkg_J);
- BIC_PRESENT(BIC_Cor_J);
- } else {
- BIC_PRESENT(BIC_PkgWatt);
- BIC_PRESENT(BIC_CorWatt);
- }
- break;
- case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
- do_rapl =
- RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
- RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
+ }
+
+ if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS)
BIC_PRESENT(BIC_PKG__);
+ if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)
BIC_PRESENT(BIC_RAM__);
- if (rapl_joules) {
- BIC_PRESENT(BIC_Pkg_J);
- BIC_PRESENT(BIC_Cor_J);
- BIC_PRESENT(BIC_RAM_J);
- } else {
- BIC_PRESENT(BIC_PkgWatt);
- BIC_PRESENT(BIC_CorWatt);
- BIC_PRESENT(BIC_RAMWatt);
- }
- break;
- default:
- return;
- }
/* units on package 0, verify later other packages match */
if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
return;
rapl_power_units = 1.0 / (1 << (msr & 0xF));
- if (model == INTEL_FAM6_ATOM_SILVERMONT)
+ if (platform->has_rapl_divisor)
rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
else
rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
- rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
+ if (platform->has_fixed_rapl_unit)
+ rapl_dram_energy_units = (15.3 / 1000000);
+ else
+ rapl_dram_energy_units = rapl_energy_units;
time_unit = msr >> 16 & 0xF;
if (time_unit == 0)
@@ -4734,32 +5030,18 @@ void rapl_probe_intel(unsigned int family, unsigned int model)
rapl_time_units = 1.0 / (1 << (time_unit));
- tdp = get_tdp_intel(model);
+ tdp = get_tdp_intel();
rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
if (!quiet)
fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
}
-void rapl_probe_amd(unsigned int family, unsigned int model)
+void rapl_probe_amd(void)
{
unsigned long long msr;
- unsigned int eax, ebx, ecx, edx;
- unsigned int has_rapl = 0;
double tdp;
- UNUSED(model);
-
- if (max_extended_level >= 0x80000007) {
- __cpuid(0x80000007, eax, ebx, ecx, edx);
- /* RAPL (Fam 17h+) */
- has_rapl = edx & (1 << 14);
- }
-
- if (!has_rapl || family < 0x17)
- return;
-
- do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
if (rapl_joules) {
BIC_PRESENT(BIC_Pkg_J);
BIC_PRESENT(BIC_Cor_J);
@@ -4775,128 +5057,13 @@ void rapl_probe_amd(unsigned int family, unsigned int model)
rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
rapl_power_units = ldexp(1.0, -(msr & 0xf));
- tdp = get_tdp_amd(family);
+ tdp = get_tdp_amd();
rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
if (!quiet)
fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
}
-/*
- * rapl_probe()
- *
- * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
- */
-void rapl_probe(unsigned int family, unsigned int model)
-{
- if (genuine_intel)
- rapl_probe_intel(family, model);
- if (authentic_amd || hygon_genuine)
- rapl_probe_amd(family, model);
-}
-
-void perf_limit_reasons_probe(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return;
-
- if (family != 6)
- return;
-
- switch (model) {
- case INTEL_FAM6_HASWELL: /* HSW */
- case INTEL_FAM6_HASWELL_L: /* HSW */
- case INTEL_FAM6_HASWELL_G: /* HSW */
- do_gfx_perf_limit_reasons = 1;
- /* FALLTHRU */
- case INTEL_FAM6_HASWELL_X: /* HSX */
- do_core_perf_limit_reasons = 1;
- do_ring_perf_limit_reasons = 1;
- default:
- return;
- }
-}
-
-void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
-{
- if (family != 6)
- return;
-
- switch (model) {
- case INTEL_FAM6_BROADWELL_X:
- case INTEL_FAM6_SKYLAKE_X:
- has_automatic_cstate_conversion = 1;
- }
-}
-
-void prewake_cstate_probe(unsigned int family, unsigned int model)
-{
- if (is_icx(family, model) || is_spr(family, model))
- dis_cstate_prewake = 1;
-}
-
-int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
-{
- unsigned long long msr;
- unsigned int dts, dts2;
- int cpu;
-
- UNUSED(c);
- UNUSED(p);
-
- if (!(do_dts || do_ptm))
- return 0;
-
- cpu = t->cpu_id;
-
- /* DTS is per-core, no need to print for each thread */
- if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
- return 0;
-
- if (cpu_migrate(cpu)) {
- fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
- return -1;
- }
-
- if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
- if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
- return 0;
-
- dts = (msr >> 16) & 0x7F;
- fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts);
-
- if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
- return 0;
-
- dts = (msr >> 16) & 0x7F;
- dts2 = (msr >> 8) & 0x7F;
- fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
- cpu, msr, tj_max - dts, tj_max - dts2);
- }
-
- if (do_dts && debug) {
- unsigned int resolution;
-
- if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
- return 0;
-
- dts = (msr >> 16) & 0x7F;
- resolution = (msr >> 27) & 0xF;
- fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
- cpu, msr, tj_max - dts, resolution);
-
- if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
- return 0;
-
- dts = (msr >> 16) & 0x7F;
- dts2 = (msr >> 8) & 0x7F;
- fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
- cpu, msr, tj_max - dts, tj_max - dts2);
- }
-
- return 0;
-}
-
void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
{
fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n",
@@ -4918,11 +5085,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
UNUSED(c);
UNUSED(p);
- if (!do_rapl)
+ if (!platform->rapl_msrs)
return 0;
/* RAPL counters are per package, so print only for 1st thread/package */
- if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ if (!is_cpu_first_thread_in_package(t, c, p))
return 0;
cpu = t->cpu_id;
@@ -4931,7 +5098,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return -1;
}
- if (do_rapl & RAPL_AMD_F17H) {
+ if (platform->rapl_msrs & RAPL_AMD_F17H) {
msr_name = "MSR_RAPL_PWR_UNIT";
if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr))
return -1;
@@ -4944,7 +5111,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr,
rapl_power_units, rapl_energy_units, rapl_time_units);
- if (do_rapl & RAPL_PKG_POWER_INFO) {
+ if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) {
if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
return -5;
@@ -4957,7 +5124,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
}
- if (do_rapl & RAPL_PKG) {
+ if (platform->rapl_msrs & RAPL_PKG) {
if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
return -9;
@@ -4981,7 +5148,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN");
}
- if (do_rapl & RAPL_DRAM_POWER_INFO) {
+ if (platform->rapl_msrs & RAPL_DRAM_POWER_INFO) {
if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
return -6;
@@ -4992,7 +5159,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
}
- if (do_rapl & RAPL_DRAM) {
+ if (platform->rapl_msrs & RAPL_DRAM) {
if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
return -9;
fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
@@ -5000,20 +5167,20 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
print_power_limit_msr(cpu, msr, "DRAM Limit");
}
- if (do_rapl & RAPL_CORE_POLICY) {
+ if (platform->rapl_msrs & RAPL_CORE_POLICY) {
if (get_msr(cpu, MSR_PP0_POLICY, &msr))
return -7;
fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
}
- if (do_rapl & RAPL_CORES_POWER_LIMIT) {
+ if (platform->rapl_msrs & RAPL_CORE_POWER_LIMIT) {
if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
return -9;
fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
cpu, msr, (msr >> 31) & 1 ? "" : "UN");
print_power_limit_msr(cpu, msr, "Cores Limit");
}
- if (do_rapl & RAPL_GFX) {
+ if (platform->rapl_msrs & RAPL_GFX) {
if (get_msr(cpu, MSR_PP1_POLICY, &msr))
return -8;
@@ -5029,217 +5196,24 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
}
/*
- * SNB adds support for additional MSRs:
+ * probe_rapl()
*
- * MSR_PKG_C7_RESIDENCY 0x000003fa
- * MSR_CORE_C7_RESIDENCY 0x000003fe
- * MSR_PKG_C2_RESIDENCY 0x0000060d
+ * sets rapl_power_units, rapl_energy_units, rapl_time_units
*/
-
-int has_snb_msrs(unsigned int family, unsigned int model)
+void probe_rapl(void)
{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_SANDYBRIDGE:
- case INTEL_FAM6_SANDYBRIDGE_X:
- case INTEL_FAM6_IVYBRIDGE: /* IVB */
- case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */
- case INTEL_FAM6_HASWELL: /* HSW */
- case INTEL_FAM6_HASWELL_X: /* HSW */
- case INTEL_FAM6_HASWELL_L: /* HSW */
- case INTEL_FAM6_HASWELL_G: /* HSW */
- case INTEL_FAM6_BROADWELL: /* BDW */
- case INTEL_FAM6_BROADWELL_G: /* BDW */
- case INTEL_FAM6_BROADWELL_X: /* BDX */
- case INTEL_FAM6_SKYLAKE_L: /* SKL */
- case INTEL_FAM6_CANNONLAKE_L: /* CNL */
- case INTEL_FAM6_SKYLAKE_X: /* SKX */
- case INTEL_FAM6_ICELAKE_X: /* ICX */
- case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */
- case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
- case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
- case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
- case INTEL_FAM6_ATOM_TREMONT: /* EHL */
- case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
- return 1;
- }
- return 0;
-}
-
-/*
- * HSW ULT added support for C8/C9/C10 MSRs:
- *
- * MSR_PKG_C8_RESIDENCY 0x00000630
- * MSR_PKG_C9_RESIDENCY 0x00000631
- * MSR_PKG_C10_RESIDENCY 0x00000632
- *
- * MSR_PKGC8_IRTL 0x00000633
- * MSR_PKGC9_IRTL 0x00000634
- * MSR_PKGC10_IRTL 0x00000635
- *
- */
-int has_c8910_msrs(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_HASWELL_L: /* HSW */
- case INTEL_FAM6_BROADWELL: /* BDW */
- case INTEL_FAM6_SKYLAKE_L: /* SKL */
- case INTEL_FAM6_CANNONLAKE_L: /* CNL */
- case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
- case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
- case INTEL_FAM6_ATOM_TREMONT: /* EHL */
- return 1;
- }
- return 0;
-}
-
-/*
- * SKL adds support for additional MSRS:
- *
- * MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
- * MSR_PKG_ANY_CORE_C0_RES 0x00000659
- * MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
- * MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B
- */
-int has_skl_msrs(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_SKYLAKE_L: /* SKL */
- case INTEL_FAM6_CANNONLAKE_L: /* CNL */
- return 1;
- }
- return 0;
-}
-
-int is_slm(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */
- case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */
- return 1;
- }
- return 0;
-}
-
-int is_knl(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
- return 1;
- }
- return 0;
-}
-
-int is_cnl(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_CANNONLAKE_L: /* CNL */
- return 1;
- }
-
- return 0;
-}
-
-unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
-{
- if (is_knl(family, model))
- return 1024;
- return 1;
-}
-
-#define SLM_BCLK_FREQS 5
-double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 };
-
-double slm_bclk(void)
-{
- unsigned long long msr = 3;
- unsigned int i;
- double freq;
-
- if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
- fprintf(outf, "SLM BCLK: unknown\n");
-
- i = msr & 0xf;
- if (i >= SLM_BCLK_FREQS) {
- fprintf(outf, "SLM BCLK[%d] invalid\n", i);
- i = 3;
- }
- freq = slm_freq_table[i];
-
- if (!quiet)
- fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
-
- return freq;
-}
-
-double discover_bclk(unsigned int family, unsigned int model)
-{
- if (has_snb_msrs(family, model) || is_knl(family, model))
- return 100.00;
- else if (is_slm(family, model))
- return slm_bclk();
- else
- return 133.33;
-}
-
-int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
-{
- unsigned int eax, ebx, ecx, edx;
-
- UNUSED(c);
- UNUSED(p);
-
- if (!genuine_intel)
- return 0;
+ if (!platform->rapl_msrs)
+ return;
- if (cpu_migrate(t->cpu_id)) {
- fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id);
- return -1;
- }
+ if (genuine_intel)
+ rapl_probe_intel();
+ if (authentic_amd || hygon_genuine)
+ rapl_probe_amd();
- if (max_level < 0x1a)
- return 0;
+ if (quiet)
+ return;
- __cpuid(0x1a, eax, ebx, ecx, edx);
- eax = (eax >> 24) & 0xFF;
- if (eax == 0x20)
- t->is_atom = true;
- return 0;
+ for_all_cpus(print_rapl, ODD_COUNTERS);
}
/*
@@ -5268,7 +5242,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
return 0;
/* this is a per-package concept */
- if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ if (!is_cpu_first_thread_in_package(t, c, p))
return 0;
cpu = t->cpu_id;
@@ -5284,7 +5258,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
}
/* Temperature Target MSR is Nehalem and newer only */
- if (!do_nhm_platform_info)
+ if (!platform->has_nhm_msrs)
goto guess;
if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
@@ -5293,20 +5267,18 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
tcc_default = (msr >> 16) & 0xFF;
if (!quiet) {
- switch (tcc_offset_bits) {
- case 4:
- tcc_offset = (msr >> 24) & 0xF;
- fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
- cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
- break;
- case 6:
- tcc_offset = (msr >> 24) & 0x3F;
+ int bits = platform->tcc_offset_bits;
+ unsigned long long enabled = 0;
+
+ if (bits && !get_msr(base_cpu, MSR_PLATFORM_INFO, &enabled))
+ enabled = (enabled >> 30) & 1;
+
+ if (bits && enabled) {
+ tcc_offset = (msr >> 24) & GENMASK(bits - 1, 0);
fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
- break;
- default:
+ } else {
fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default);
- break;
}
}
@@ -5324,6 +5296,108 @@ guess:
return 0;
}
+int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ unsigned long long msr;
+ unsigned int dts, dts2;
+ int cpu;
+
+ UNUSED(c);
+ UNUSED(p);
+
+ if (!(do_dts || do_ptm))
+ return 0;
+
+ cpu = t->cpu_id;
+
+ /* DTS is per-core, no need to print for each thread */
+ if (!is_cpu_first_thread_in_core(t, c, p))
+ return 0;
+
+ if (cpu_migrate(cpu)) {
+ fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
+ return -1;
+ }
+
+ if (do_ptm && is_cpu_first_core_in_package(t, c, p)) {
+ if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
+ return 0;
+
+ dts = (msr >> 16) & 0x7F;
+ fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts);
+
+ if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
+ return 0;
+
+ dts = (msr >> 16) & 0x7F;
+ dts2 = (msr >> 8) & 0x7F;
+ fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
+ cpu, msr, tj_max - dts, tj_max - dts2);
+ }
+
+ if (do_dts && debug) {
+ unsigned int resolution;
+
+ if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
+ return 0;
+
+ dts = (msr >> 16) & 0x7F;
+ resolution = (msr >> 27) & 0xF;
+ fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
+ cpu, msr, tj_max - dts, resolution);
+
+ if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
+ return 0;
+
+ dts = (msr >> 16) & 0x7F;
+ dts2 = (msr >> 8) & 0x7F;
+ fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
+ cpu, msr, tj_max - dts, tj_max - dts2);
+ }
+
+ return 0;
+}
+
+void probe_thermal(void)
+{
+ if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK))
+ BIC_PRESENT(BIC_CORE_THROT_CNT);
+ else
+ BIC_NOT_PRESENT(BIC_CORE_THROT_CNT);
+
+ for_all_cpus(set_temperature_target, ODD_COUNTERS);
+
+ if (quiet)
+ return;
+
+ for_all_cpus(print_thermal, ODD_COUNTERS);
+}
+
+int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ UNUSED(c);
+ UNUSED(p);
+
+ if (!genuine_intel)
+ return 0;
+
+ if (cpu_migrate(t->cpu_id)) {
+ fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id);
+ return -1;
+ }
+
+ if (max_level < 0x1a)
+ return 0;
+
+ __cpuid(0x1a, eax, ebx, ecx, edx);
+ eax = (eax >> 24) & 0xFF;
+ if (eax == 0x20)
+ t->is_atom = true;
+ return 0;
+}
+
void decode_feature_control_msr(void)
{
unsigned long long msr;
@@ -5354,7 +5428,7 @@ void decode_misc_feature_control(void)
{
unsigned long long msr;
- if (!has_misc_feature_control)
+ if (!platform->has_msr_misc_feature_control)
return;
if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
@@ -5375,10 +5449,7 @@ void decode_misc_pwr_mgmt_msr(void)
{
unsigned long long msr;
- if (!do_nhm_platform_info)
- return;
-
- if (no_MSR_MISC_PWR_MGMT)
+ if (!platform->has_msr_misc_pwr_mgmt)
return;
if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
@@ -5397,6 +5468,9 @@ void decode_c6_demotion_policy_msr(void)
{
unsigned long long msr;
+ if (!platform->has_msr_c6_demotion_policy_config)
+ return;
+
if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
@@ -5406,67 +5480,6 @@ void decode_c6_demotion_policy_msr(void)
base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
}
-/*
- * When models are the same, for the purpose of turbostat, reuse
- */
-unsigned int intel_model_duplicates(unsigned int model)
-{
-
- switch (model) {
- case INTEL_FAM6_NEHALEM_EP: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
- case INTEL_FAM6_NEHALEM: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
- case 0x1F: /* Core i7 and i5 Processor - Nehalem */
- case INTEL_FAM6_WESTMERE: /* Westmere Client - Clarkdale, Arrandale */
- case INTEL_FAM6_WESTMERE_EP: /* Westmere EP - Gulftown */
- return INTEL_FAM6_NEHALEM;
-
- case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */
- case INTEL_FAM6_WESTMERE_EX: /* Westmere-EX Xeon - Eagleton */
- return INTEL_FAM6_NEHALEM_EX;
-
- case INTEL_FAM6_XEON_PHI_KNM:
- return INTEL_FAM6_XEON_PHI_KNL;
-
- case INTEL_FAM6_BROADWELL_X:
- case INTEL_FAM6_BROADWELL_D: /* BDX-DE */
- return INTEL_FAM6_BROADWELL_X;
-
- case INTEL_FAM6_SKYLAKE_L:
- case INTEL_FAM6_SKYLAKE:
- case INTEL_FAM6_KABYLAKE_L:
- case INTEL_FAM6_KABYLAKE:
- case INTEL_FAM6_COMETLAKE_L:
- case INTEL_FAM6_COMETLAKE:
- return INTEL_FAM6_SKYLAKE_L;
-
- case INTEL_FAM6_ICELAKE_L:
- case INTEL_FAM6_ICELAKE_NNPI:
- case INTEL_FAM6_TIGERLAKE_L:
- case INTEL_FAM6_TIGERLAKE:
- case INTEL_FAM6_ROCKETLAKE:
- case INTEL_FAM6_LAKEFIELD:
- case INTEL_FAM6_ALDERLAKE:
- case INTEL_FAM6_ALDERLAKE_L:
- case INTEL_FAM6_ATOM_GRACEMONT:
- case INTEL_FAM6_RAPTORLAKE:
- case INTEL_FAM6_RAPTORLAKE_P:
- case INTEL_FAM6_RAPTORLAKE_S:
- case INTEL_FAM6_METEORLAKE:
- case INTEL_FAM6_METEORLAKE_L:
- return INTEL_FAM6_CANNONLAKE_L;
-
- case INTEL_FAM6_ATOM_TREMONT_L:
- return INTEL_FAM6_ATOM_TREMONT;
-
- case INTEL_FAM6_ICELAKE_D:
- return INTEL_FAM6_ICELAKE_X;
-
- case INTEL_FAM6_EMERALDRAPIDS_X:
- return INTEL_FAM6_SAPPHIRERAPIDS_X;
- }
- return model;
-}
-
void print_dev_latency(void)
{
char *path = "/dev/cpu_dma_latency";
@@ -5510,6 +5523,101 @@ void linux_perf_init(void)
BIC_PRESENT(BIC_IPC);
}
+void probe_cstates(void)
+{
+ probe_cst_limit();
+
+ if (platform->supported_cstates & CC1)
+ BIC_PRESENT(BIC_CPU_c1);
+
+ if (platform->supported_cstates & CC3)
+ BIC_PRESENT(BIC_CPU_c3);
+
+ if (platform->supported_cstates & CC6)
+ BIC_PRESENT(BIC_CPU_c6);
+
+ if (platform->supported_cstates & CC7)
+ BIC_PRESENT(BIC_CPU_c7);
+
+ if (platform->supported_cstates & PC2 && (pkg_cstate_limit >= PCL__2))
+ BIC_PRESENT(BIC_Pkgpc2);
+
+ if (platform->supported_cstates & PC3 && (pkg_cstate_limit >= PCL__3))
+ BIC_PRESENT(BIC_Pkgpc3);
+
+ if (platform->supported_cstates & PC6 && (pkg_cstate_limit >= PCL__6))
+ BIC_PRESENT(BIC_Pkgpc6);
+
+ if (platform->supported_cstates & PC7 && (pkg_cstate_limit >= PCL__7))
+ BIC_PRESENT(BIC_Pkgpc7);
+
+ if (platform->supported_cstates & PC8 && (pkg_cstate_limit >= PCL__8))
+ BIC_PRESENT(BIC_Pkgpc8);
+
+ if (platform->supported_cstates & PC9 && (pkg_cstate_limit >= PCL__9))
+ BIC_PRESENT(BIC_Pkgpc9);
+
+ if (platform->supported_cstates & PC10 && (pkg_cstate_limit >= PCL_10))
+ BIC_PRESENT(BIC_Pkgpc10);
+
+ if (platform->has_msr_module_c6_res_ms)
+ BIC_PRESENT(BIC_Mod_c6);
+
+ if (platform->has_ext_cst_msrs) {
+ BIC_PRESENT(BIC_Totl_c0);
+ BIC_PRESENT(BIC_Any_c0);
+ BIC_PRESENT(BIC_GFX_c0);
+ BIC_PRESENT(BIC_CPUGFX);
+ }
+
+ if (quiet)
+ return;
+
+ dump_power_ctl();
+ dump_cst_cfg();
+ decode_c6_demotion_policy_msr();
+ print_dev_latency();
+ dump_sysfs_cstate_config();
+ print_irtl();
+}
+
+void probe_lpi(void)
+{
+ if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
+ BIC_PRESENT(BIC_CPU_LPI);
+ else
+ BIC_NOT_PRESENT(BIC_CPU_LPI);
+
+ if (!access(sys_lpi_file_sysfs, R_OK)) {
+ sys_lpi_file = sys_lpi_file_sysfs;
+ BIC_PRESENT(BIC_SYS_LPI);
+ } else if (!access(sys_lpi_file_debugfs, R_OK)) {
+ sys_lpi_file = sys_lpi_file_debugfs;
+ BIC_PRESENT(BIC_SYS_LPI);
+ } else {
+ sys_lpi_file_sysfs = NULL;
+ BIC_NOT_PRESENT(BIC_SYS_LPI);
+ }
+
+}
+
+void probe_pstates(void)
+{
+ probe_bclk();
+
+ if (quiet)
+ return;
+
+ dump_platform_info();
+ dump_turbo_ratio_info();
+ dump_sysfs_pstate_config();
+ decode_misc_pwr_mgmt_msr();
+
+ for_all_cpus(print_hwp, ODD_COUNTERS);
+ for_all_cpus(print_epb, ODD_COUNTERS);
+ for_all_cpus(print_perf_limit, ODD_COUNTERS);
+}
+
void process_cpuid()
{
unsigned int eax, ebx, ecx, edx;
@@ -5569,10 +5677,8 @@ void process_cpuid()
edx_flags & (1 << 22) ? "ACPI-TM" : "-",
edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-");
}
- if (genuine_intel) {
- model_orig = model;
- model = intel_model_duplicates(model);
- }
+
+ probe_platform_features(family, model);
if (!(edx_flags & (1 << 5)))
errx(1, "CPUID: no MSR");
@@ -5656,26 +5762,12 @@ void process_cpuid()
__cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
if (ebx_tsc != 0) {
-
if (!quiet && (ebx != 0))
fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
eax_crystal, ebx_tsc, crystal_hz);
if (crystal_hz == 0)
- switch (model) {
- case INTEL_FAM6_SKYLAKE_L: /* SKL */
- crystal_hz = 24000000; /* 24.0 MHz */
- break;
- case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
- crystal_hz = 25000000; /* 25.0 MHz */
- break;
- case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
- case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
- crystal_hz = 19200000; /* 19.2 MHz */
- break;
- default:
- crystal_hz = 0;
- }
+ crystal_hz = platform->crystal_freq;
if (crystal_hz) {
tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal;
@@ -5700,147 +5792,33 @@ void process_cpuid()
}
if (has_aperf)
- aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
+ aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1;
BIC_PRESENT(BIC_IRQ);
BIC_PRESENT(BIC_TSC_MHz);
+}
- if (probe_nhm_msrs(family, model)) {
- do_nhm_platform_info = 1;
- BIC_PRESENT(BIC_CPU_c1);
- BIC_PRESENT(BIC_CPU_c3);
- BIC_PRESENT(BIC_CPU_c6);
- BIC_PRESENT(BIC_SMI);
- }
- do_snb_cstates = has_snb_msrs(family, model);
-
- if (do_snb_cstates)
- BIC_PRESENT(BIC_CPU_c7);
-
- do_irtl_snb = has_snb_msrs(family, model);
- if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
- BIC_PRESENT(BIC_Pkgpc2);
- if (pkg_cstate_limit >= PCL__3)
- BIC_PRESENT(BIC_Pkgpc3);
- if (pkg_cstate_limit >= PCL__6)
- BIC_PRESENT(BIC_Pkgpc6);
- if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
- BIC_PRESENT(BIC_Pkgpc7);
- if (has_slv_msrs(family, model)) {
- BIC_NOT_PRESENT(BIC_Pkgpc2);
- BIC_NOT_PRESENT(BIC_Pkgpc3);
- BIC_PRESENT(BIC_Pkgpc6);
- BIC_NOT_PRESENT(BIC_Pkgpc7);
- BIC_PRESENT(BIC_Mod_c6);
- use_c1_residency_msr = 1;
- }
- if (is_jvl(family, model)) {
- BIC_NOT_PRESENT(BIC_CPU_c3);
- BIC_NOT_PRESENT(BIC_CPU_c7);
- BIC_NOT_PRESENT(BIC_Pkgpc2);
- BIC_NOT_PRESENT(BIC_Pkgpc3);
- BIC_NOT_PRESENT(BIC_Pkgpc6);
- BIC_NOT_PRESENT(BIC_Pkgpc7);
- }
- if (is_dnv(family, model)) {
- BIC_PRESENT(BIC_CPU_c1);
- BIC_NOT_PRESENT(BIC_CPU_c3);
- BIC_NOT_PRESENT(BIC_Pkgpc3);
- BIC_NOT_PRESENT(BIC_CPU_c7);
- BIC_NOT_PRESENT(BIC_Pkgpc7);
- use_c1_residency_msr = 1;
- }
- if (is_skx(family, model) || is_icx(family, model) || is_spr(family, model)) {
- BIC_NOT_PRESENT(BIC_CPU_c3);
- BIC_NOT_PRESENT(BIC_Pkgpc3);
- BIC_NOT_PRESENT(BIC_CPU_c7);
- BIC_NOT_PRESENT(BIC_Pkgpc7);
- }
- if (is_bdx(family, model)) {
- BIC_NOT_PRESENT(BIC_CPU_c7);
- BIC_NOT_PRESENT(BIC_Pkgpc7);
- }
- if (has_c8910_msrs(family, model)) {
- if (pkg_cstate_limit >= PCL__8)
- BIC_PRESENT(BIC_Pkgpc8);
- if (pkg_cstate_limit >= PCL__9)
- BIC_PRESENT(BIC_Pkgpc9);
- if (pkg_cstate_limit >= PCL_10)
- BIC_PRESENT(BIC_Pkgpc10);
- }
- do_irtl_hsw = has_c8910_msrs(family, model);
- if (has_skl_msrs(family, model)) {
- BIC_PRESENT(BIC_Totl_c0);
- BIC_PRESENT(BIC_Any_c0);
- BIC_PRESENT(BIC_GFX_c0);
- BIC_PRESENT(BIC_CPUGFX);
- }
- do_slm_cstates = is_slm(family, model);
- do_knl_cstates = is_knl(family, model);
-
- if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) || is_ehl(family, model))
- BIC_NOT_PRESENT(BIC_CPU_c3);
-
- if (!quiet)
- decode_misc_pwr_mgmt_msr();
-
- if (!quiet && has_slv_msrs(family, model))
- decode_c6_demotion_policy_msr();
-
- rapl_probe(family, model);
- perf_limit_reasons_probe(family, model);
- automatic_cstate_conversion_probe(family, model);
-
- check_tcc_offset(model_orig);
-
- if (!quiet)
- dump_cstate_pstate_config_info(family, model);
- intel_uncore_frequency_probe();
-
- if (!quiet)
- print_dev_latency();
- if (!quiet)
- dump_sysfs_cstate_config();
- if (!quiet)
- dump_sysfs_pstate_config();
+void probe_pm_features(void)
+{
+ probe_pstates();
- if (has_skl_msrs(family, model) || is_ehl(family, model))
- calculate_tsc_tweak();
+ probe_cstates();
- if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
- BIC_PRESENT(BIC_GFX_rc6);
+ probe_lpi();
- if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
- BIC_PRESENT(BIC_GFXMHz);
+ probe_intel_uncore_frequency();
- if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
- BIC_PRESENT(BIC_GFXACTMHz);
+ probe_graphics();
- if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
- BIC_PRESENT(BIC_CPU_LPI);
- else
- BIC_NOT_PRESENT(BIC_CPU_LPI);
+ probe_rapl();
- if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK))
- BIC_PRESENT(BIC_CORE_THROT_CNT);
- else
- BIC_NOT_PRESENT(BIC_CORE_THROT_CNT);
+ probe_thermal();
- if (!access(sys_lpi_file_sysfs, R_OK)) {
- sys_lpi_file = sys_lpi_file_sysfs;
- BIC_PRESENT(BIC_SYS_LPI);
- } else if (!access(sys_lpi_file_debugfs, R_OK)) {
- sys_lpi_file = sys_lpi_file_debugfs;
- BIC_PRESENT(BIC_SYS_LPI);
- } else {
- sys_lpi_file_sysfs = NULL;
- BIC_NOT_PRESENT(BIC_SYS_LPI);
- }
+ if (platform->has_nhm_msrs)
+ BIC_PRESENT(BIC_SMI);
if (!quiet)
decode_misc_feature_control();
-
- return;
}
/*
@@ -5855,7 +5833,7 @@ int dir_filter(const struct dirent *dirp)
return 0;
}
-void topology_probe()
+void topology_probe(bool startup)
{
int i;
int max_core_id = 0;
@@ -5888,14 +5866,62 @@ void topology_probe()
for_all_proc_cpus(mark_cpu_present);
/*
- * Validate that all cpus in cpu_subset are also in cpu_present_set
+ * Allocate and initialize cpu_effective_set
+ */
+ cpu_effective_set = CPU_ALLOC((topo.max_cpu_num + 1));
+ if (cpu_effective_set == NULL)
+ err(3, "CPU_ALLOC");
+ cpu_effective_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
+ CPU_ZERO_S(cpu_effective_setsize, cpu_effective_set);
+ update_effective_set(startup);
+
+ /*
+ * Allocate and initialize cpu_allowed_set
+ */
+ cpu_allowed_set = CPU_ALLOC((topo.max_cpu_num + 1));
+ if (cpu_allowed_set == NULL)
+ err(3, "CPU_ALLOC");
+ cpu_allowed_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
+ CPU_ZERO_S(cpu_allowed_setsize, cpu_allowed_set);
+
+ /*
+ * Validate and update cpu_allowed_set.
+ *
+ * Make sure all cpus in cpu_subset are also in cpu_present_set during startup.
+ * Give a warning when cpus in cpu_subset become unavailable at runtime.
+ * Give a warning when cpus are not effective because of cgroup setting.
+ *
+ * cpu_allowed_set is the intersection of cpu_present_set/cpu_effective_set/cpu_subset.
*/
for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
- if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
- if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
- err(1, "cpu%d not present", i);
+ if (cpu_subset && !CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
+ continue;
+
+ if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set)) {
+ if (cpu_subset) {
+ /* cpus in cpu_subset must be in cpu_present_set during startup */
+ if (startup)
+ err(1, "cpu%d not present", i);
+ else
+ fprintf(stderr, "cpu%d not present\n", i);
+ }
+ continue;
+ }
+
+ if (CPU_COUNT_S(cpu_effective_setsize, cpu_effective_set)) {
+ if (!CPU_ISSET_S(i, cpu_effective_setsize, cpu_effective_set)) {
+ fprintf(stderr, "cpu%d not effective\n", i);
+ continue;
+ }
+ }
+
+ CPU_SET_S(i, cpu_allowed_setsize, cpu_allowed_set);
}
+ if (!CPU_COUNT_S(cpu_allowed_setsize, cpu_allowed_set))
+ err(-ENODEV, "No valid cpus found");
+ sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set);
+
/*
* Allocate and initialize cpu_affinity_set
*/
@@ -6009,15 +6035,19 @@ void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_
if (*c == NULL)
goto error;
- for (i = 0; i < num_cores; i++)
+ for (i = 0; i < num_cores; i++) {
(*c)[i].core_id = -1;
+ (*c)[i].base_cpu = -1;
+ }
*p = calloc(topo.num_packages, sizeof(struct pkg_data));
if (*p == NULL)
goto error;
- for (i = 0; i < topo.num_packages; i++)
+ for (i = 0; i < topo.num_packages; i++) {
(*p)[i].package_id = i;
+ (*p)[i].base_cpu = -1;
+ }
return;
error:
@@ -6050,10 +6080,11 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base,
p = GET_PKG(pkg_base, pkg_id);
t->cpu_id = cpu_id;
- if (thread_id == 0) {
- t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
- if (cpu_is_first_core_in_package(cpu_id))
- t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
+ if (!cpu_is_not_allowed(cpu_id)) {
+ if (c->base_cpu < 0)
+ c->base_cpu = t->cpu_id;
+ if (p->base_cpu < 0)
+ p->base_cpu = t->cpu_id;
}
c->core_id = core_id;
@@ -6093,59 +6124,64 @@ void allocate_irq_buffers(void)
err(-1, "calloc %d", topo.max_cpu_num + 1);
}
-void setup_all_buffers(void)
+int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ topo.allowed_cpus++;
+ if ((int)t->cpu_id == c->base_cpu)
+ topo.allowed_cores++;
+ if ((int)t->cpu_id == p->base_cpu)
+ topo.allowed_packages++;
+
+ return 0;
+}
+
+void topology_update(void)
{
- topology_probe();
+ topo.allowed_cpus = 0;
+ topo.allowed_cores = 0;
+ topo.allowed_packages = 0;
+ for_all_cpus(update_topo, ODD_COUNTERS);
+}
+void setup_all_buffers(bool startup)
+{
+ topology_probe(startup);
allocate_irq_buffers();
allocate_fd_percpu();
allocate_counters(&thread_even, &core_even, &package_even);
allocate_counters(&thread_odd, &core_odd, &package_odd);
allocate_output_buffer();
for_all_proc_cpus(initialize_counters);
+ topology_update();
}
void set_base_cpu(void)
{
- base_cpu = sched_getcpu();
- if (base_cpu < 0)
- err(-ENODEV, "No valid cpus found");
+ int i;
- if (debug > 1)
- fprintf(outf, "base_cpu = %d\n", base_cpu);
+ for (i = 0; i < topo.max_cpu_num + 1; ++i) {
+ if (cpu_is_not_allowed(i))
+ continue;
+ base_cpu = i;
+ if (debug > 1)
+ fprintf(outf, "base_cpu = %d\n", base_cpu);
+ return;
+ }
+ err(-ENODEV, "No valid cpus found");
}
void turbostat_init()
{
- setup_all_buffers();
+ setup_all_buffers(true);
set_base_cpu();
check_dev_msr();
check_permissions();
process_cpuid();
+ probe_pm_features();
linux_perf_init();
- if (!quiet)
- for_all_cpus(print_hwp, ODD_COUNTERS);
-
- if (!quiet)
- for_all_cpus(print_epb, ODD_COUNTERS);
-
- if (!quiet)
- for_all_cpus(print_perf_limit, ODD_COUNTERS);
-
- if (!quiet)
- for_all_cpus(print_rapl, ODD_COUNTERS);
-
- for_all_cpus(set_temperature_target, ODD_COUNTERS);
-
for_all_cpus(get_cpu_type, ODD_COUNTERS);
for_all_cpus(get_cpu_type, EVEN_COUNTERS);
- if (!quiet)
- for_all_cpus(print_thermal, ODD_COUNTERS);
-
- if (!quiet && do_irtl_snb)
- print_irtl();
-
if (DO_BIC(BIC_IPC))
(void)get_instr_count_fd(base_cpu);
}
@@ -6160,8 +6196,6 @@ int fork_it(char **argv)
first_counter_read = 0;
if (status)
exit(status);
- /* clear affinity side-effect of get_counters() */
- sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
gettimeofday(&tv_even, (struct timezone *)NULL);
child_pid = fork();
@@ -6225,7 +6259,7 @@ int get_and_dump_counters(void)
void print_version()
{
- fprintf(outf, "turbostat version 2023.03.17 - Len Brown <lenb@kernel.org>\n");
+ fprintf(outf, "turbostat version 2023.11.07 - Len Brown <lenb@kernel.org>\n");
}
#define COMMAND_LINE_SIZE 2048
@@ -6508,9 +6542,6 @@ void probe_sysfs(void)
*/
void parse_cpu_command(char *optarg)
{
- unsigned int start, end;
- char *next;
-
if (!strcmp(optarg, "core")) {
if (cpu_subset)
goto error;
@@ -6533,52 +6564,8 @@ void parse_cpu_command(char *optarg)
CPU_ZERO_S(cpu_subset_size, cpu_subset);
- next = optarg;
-
- while (next && *next) {
-
- if (*next == '-') /* no negative cpu numbers */
- goto error;
-
- start = strtoul(next, &next, 10);
-
- if (start >= CPU_SUBSET_MAXCPUS)
- goto error;
- CPU_SET_S(start, cpu_subset_size, cpu_subset);
-
- if (*next == '\0')
- break;
-
- if (*next == ',') {
- next += 1;
- continue;
- }
-
- if (*next == '-') {
- next += 1; /* start range */
- } else if (*next == '.') {
- next += 1;
- if (*next == '.')
- next += 1; /* start range */
- else
- goto error;
- }
-
- end = strtoul(next, &next, 10);
- if (end <= start)
- goto error;
-
- while (++start <= end) {
- if (start >= CPU_SUBSET_MAXCPUS)
- goto error;
- CPU_SET_S(start, cpu_subset_size, cpu_subset);
- }
-
- if (*next == ',')
- next += 1;
- else if (*next != '\0')
- goto error;
- }
+ if (parse_cpu_str(optarg, cpu_subset, cpu_subset_size))
+ goto error;
return;
@@ -6719,6 +6706,19 @@ void cmdline(int argc, char **argv)
int main(int argc, char **argv)
{
+ int fd, ret;
+
+ fd = open("/sys/fs/cgroup/cgroup.procs", O_WRONLY);
+ if (fd < 0)
+ goto skip_cgroup_setting;
+
+ ret = write(fd, "0\n", 2);
+ if (ret == -1)
+ perror("Can't update cgroup\n");
+
+ close(fd);
+
+skip_cgroup_setting:
outf = stderr;
cmdline(argc, argv);
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
index cc920c79ff1c..4ff10ea61461 100644
--- a/tools/testing/selftests/mm/.gitignore
+++ b/tools/testing/selftests/mm/.gitignore
@@ -45,3 +45,4 @@ mdwe_test
gup_longterm
mkdirty
va_high_addr_switch
+hugetlb_fault_after_madv
diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c
index 0161fb49fc6e..befab43719ba 100644
--- a/tools/testing/selftests/mm/pagemap_ioctl.c
+++ b/tools/testing/selftests/mm/pagemap_ioctl.c
@@ -94,19 +94,19 @@ int init_uffd(void)
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
if (uffd == -1)
- ksft_exit_fail_msg("uffd syscall failed\n");
+ return uffd;
uffdio_api.api = UFFD_API;
uffdio_api.features = UFFD_FEATURE_WP_UNPOPULATED | UFFD_FEATURE_WP_ASYNC |
UFFD_FEATURE_WP_HUGETLBFS_SHMEM;
if (ioctl(uffd, UFFDIO_API, &uffdio_api))
- ksft_exit_fail_msg("UFFDIO_API\n");
+ return -1;
if (!(uffdio_api.api & UFFDIO_REGISTER_MODE_WP) ||
!(uffdio_api.features & UFFD_FEATURE_WP_UNPOPULATED) ||
!(uffdio_api.features & UFFD_FEATURE_WP_ASYNC) ||
!(uffdio_api.features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM))
- ksft_exit_fail_msg("UFFDIO_API error %llu\n", uffdio_api.api);
+ return -1;
return 0;
}
@@ -1151,7 +1151,7 @@ int sanity_tests(void)
/* 9. Memory mapped file */
fd = open(__FILE__, O_RDONLY);
if (fd < 0)
- ksft_exit_fail_msg("%s Memory mapped file\n");
+ ksft_exit_fail_msg("%s Memory mapped file\n", __func__);
ret = stat(__FILE__, &sbuf);
if (ret < 0)
@@ -1159,7 +1159,7 @@ int sanity_tests(void)
fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (fmem == MAP_FAILED)
- ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno));
+ ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
tmp_buf = malloc(sbuf.st_size);
memcpy(tmp_buf, fmem, sbuf.st_size);
@@ -1189,7 +1189,7 @@ int sanity_tests(void)
fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
if (fmem == MAP_FAILED)
- ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno));
+ ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
wp_init(fmem, buf_size);
wp_addr_range(fmem, buf_size);
@@ -1479,6 +1479,10 @@ int main(void)
struct stat sbuf;
ksft_print_header();
+
+ if (init_uffd())
+ return ksft_exit_pass();
+
ksft_set_plan(115);
page_size = getpagesize();
@@ -1488,9 +1492,6 @@ int main(void)
if (pagemap_fd < 0)
return -EINVAL;
- if (init_uffd())
- ksft_exit_fail_msg("uffd init failed\n");
-
/* 1. Sanity testing */
sanity_tests_sd();
@@ -1595,7 +1596,7 @@ int main(void)
fmem = mmap(NULL, sbuf.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
if (fmem == MAP_FAILED)
- ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno));
+ ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
wp_init(fmem, sbuf.st_size);
wp_addr_range(fmem, sbuf.st_size);
@@ -1623,7 +1624,7 @@ int main(void)
fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
if (fmem == MAP_FAILED)
- ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno));
+ ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
wp_init(fmem, buf_size);
wp_addr_range(fmem, buf_size);
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index cc16f6ca8533..00757445278e 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -223,9 +223,12 @@ CATEGORY="hugetlb" run_test ./hugepage-mremap
CATEGORY="hugetlb" run_test ./hugepage-vmemmap
CATEGORY="hugetlb" run_test ./hugetlb-madvise
+nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages)
# For this test, we need one and just one huge page
echo 1 > /proc/sys/vm/nr_hugepages
CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv
+# Restore the previous number of huge pages, since further tests rely on it
+echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages
if test_selected "hugetlb"; then
echo "NOTE: These hugetlb tests provide minimal coverage. Use"