summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig15
-rw-r--r--arch/x86/Kconfig.cpu44
-rw-r--r--arch/x86/Makefile6
-rw-r--r--arch/x86/Makefile_32.cpu2
-rw-r--r--arch/x86/boot/compressed/Makefile7
-rw-r--r--arch/x86/boot/compressed/sev.c15
-rw-r--r--arch/x86/boot/compressed/vmlinux.lds.S2
-rw-r--r--arch/x86/boot/startup/sev-shared.c2
-rw-r--r--arch/x86/coco/sev/core.c1
-rw-r--r--arch/x86/coco/sev/noinstr.c6
-rw-r--r--arch/x86/coco/tdx/debug.c26
-rw-r--r--arch/x86/coco/tdx/tdx.c8
-rw-r--r--arch/x86/configs/x86_64_defconfig1
-rw-r--r--arch/x86/crypto/Kconfig37
-rw-r--r--arch/x86/crypto/Makefile9
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c1
-rw-r--r--arch/x86/crypto/des3_ede-asm_64.S831
-rw-r--r--arch/x86/crypto/des3_ede_glue.c391
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_asm.S133
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c163
-rw-r--r--arch/x86/crypto/sm3-avx-asm_64.S517
-rw-r--r--arch/x86/crypto/sm3_avx_glue.c100
-rw-r--r--arch/x86/entry/entry_fred.c14
-rw-r--r--arch/x86/entry/syscall_32.c4
-rw-r--r--arch/x86/entry/syscall_64.c2
-rw-r--r--arch/x86/entry/vdso/common/vclock_gettime.c2
-rw-r--r--arch/x86/entry/vdso/vdso32/Makefile4
-rw-r--r--arch/x86/entry/vdso/vdso32/fake_32bit_build.h1
-rw-r--r--arch/x86/entry/vdso/vdso32/sigreturn.S30
-rw-r--r--arch/x86/entry/vdso/vma.c4
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c91
-rw-r--r--arch/x86/events/amd/ibs.c270
-rw-r--r--arch/x86/events/core.c7
-rw-r--r--arch/x86/events/intel/core.c59
-rw-r--r--arch/x86/events/intel/ds.c11
-rw-r--r--arch/x86/events/intel/p4.c6
-rw-r--r--arch/x86/events/intel/pt.c1
-rw-r--r--arch/x86/events/intel/uncore.c1
-rw-r--r--arch/x86/events/intel/uncore_discovery.c17
-rw-r--r--arch/x86/events/intel/uncore_snbep.c61
-rw-r--r--arch/x86/events/msr.c82
-rw-r--r--arch/x86/events/perf_event_flags.h1
-rw-r--r--arch/x86/hyperv/hv_crash.c100
-rw-r--r--arch/x86/include/asm/amd/ibs.h6
-rw-r--r--arch/x86/include/asm/apicdef.h7
-rw-r--r--arch/x86/include/asm/clock_inlined.h22
-rw-r--r--arch/x86/include/asm/cpu.h1
-rw-r--r--arch/x86/include/asm/cpufeatures.h3
-rw-r--r--arch/x86/include/asm/efi.h2
-rw-r--r--arch/x86/include/asm/elf.h9
-rw-r--r--arch/x86/include/asm/entry-common.h12
-rw-r--r--arch/x86/include/asm/floppy.h27
-rw-r--r--arch/x86/include/asm/fpu/xcr.h2
-rw-r--r--arch/x86/include/asm/fsgsbase.h4
-rw-r--r--arch/x86/include/asm/io.h19
-rw-r--r--arch/x86/include/asm/irqflags.h6
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h10
-rw-r--r--arch/x86/include/asm/kvm-x86-pmu-ops.h8
-rw-r--r--arch/x86/include/asm/kvm_host.h55
-rw-r--r--arch/x86/include/asm/local.h2
-rw-r--r--arch/x86/include/asm/mce.h62
-rw-r--r--arch/x86/include/asm/microcode.h2
-rw-r--r--arch/x86/include/asm/mmu_context.h3
-rw-r--r--arch/x86/include/asm/msr-index.h15
-rw-r--r--arch/x86/include/asm/numa.h6
-rw-r--r--arch/x86/include/asm/orc_types.h9
-rw-r--r--arch/x86/include/asm/page_types.h2
-rw-r--r--arch/x86/include/asm/paravirt-base.h6
-rw-r--r--arch/x86/include/asm/paravirt.h11
-rw-r--r--arch/x86/include/asm/paravirt_types.h2
-rw-r--r--arch/x86/include/asm/perf_event.h55
-rw-r--r--arch/x86/include/asm/pgtable.h28
-rw-r--r--arch/x86/include/asm/pgtable_64.h2
-rw-r--r--arch/x86/include/asm/pkeys.h3
-rw-r--r--arch/x86/include/asm/processor.h5
-rw-r--r--arch/x86/include/asm/reboot.h11
-rw-r--r--arch/x86/include/asm/segment.h59
-rw-r--r--arch/x86/include/asm/sev.h4
-rw-r--r--arch/x86/include/asm/shared/tdx.h50
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/svm.h20
-rw-r--r--arch/x86/include/asm/tdx.h4
-rw-r--r--arch/x86/include/asm/tdx_global_metadata.h7
-rw-r--r--arch/x86/include/asm/time.h1
-rw-r--r--arch/x86/include/asm/tlbflush.h26
-rw-r--r--arch/x86/include/asm/topology.h6
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/include/asm/uaccess_32.h8
-rw-r--r--arch/x86/include/asm/uaccess_64.h16
-rw-r--r--arch/x86/include/asm/vdso.h1
-rw-r--r--arch/x86/include/asm/vermagic.h6
-rw-r--r--arch/x86/include/asm/virt.h26
-rw-r--r--arch/x86/include/asm/vmx.h11
-rw-r--r--arch/x86/include/asm/vsyscall.h13
-rw-r--r--arch/x86/include/asm/xor.h502
-rw-r--r--arch/x86/include/asm/xor_32.h573
-rw-r--r--arch/x86/include/asm/xor_64.h28
-rw-r--r--arch/x86/include/asm/xor_avx.h178
-rw-r--r--arch/x86/include/uapi/asm/kvm.h13
-rw-r--r--arch/x86/kernel/Makefile14
-rw-r--r--arch/x86/kernel/acpi/boot.c20
-rw-r--r--arch/x86/kernel/apic/apic.c59
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c18
-rw-r--r--arch/x86/kernel/cpu/amd.c52
-rw-r--r--arch/x86/kernel/cpu/bus_lock.c55
-rw-r--r--arch/x86/kernel/cpu/common.c75
-rw-r--r--arch/x86/kernel/cpu/intel.c1
-rw-r--r--arch/x86/kernel/cpu/match.c3
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c167
-rw-r--r--arch/x86/kernel/cpu/microcode/intel-ucode-defs.h398
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c54
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c7
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c7
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c36
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/cpu/sgx/encl.c2
-rw-r--r--arch/x86/kernel/cpu/topology.c21
-rw-r--r--arch/x86/kernel/cpu/topology_common.c8
-rw-r--r--arch/x86/kernel/cpu/vmware.c2
-rw-r--r--arch/x86/kernel/crash.c3
-rw-r--r--arch/x86/kernel/fpu/xstate.c3
-rw-r--r--arch/x86/kernel/fpu/xstate.h8
-rw-r--r--arch/x86/kernel/fred.c3
-rw-r--r--arch/x86/kernel/head_32.S4
-rw-r--r--arch/x86/kernel/head_64.S35
-rw-r--r--arch/x86/kernel/hpet.c4
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c6
-rw-r--r--arch/x86/kernel/kvm.c8
-rw-r--r--arch/x86/kernel/paravirt.c3
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c8
-rw-r--r--arch/x86/kernel/reboot.c68
-rw-r--r--arch/x86/kernel/rtc.c21
-rw-r--r--arch/x86/kernel/shstk.c15
-rw-r--r--arch/x86/kernel/smp.c5
-rw-r--r--arch/x86/kernel/smpboot.c201
-rw-r--r--arch/x86/kernel/sys_ia32.c3
-rw-r--r--arch/x86/kernel/tls.c4
-rw-r--r--arch/x86/kernel/traps.c12
-rw-r--r--arch/x86/kernel/tsc.c61
-rw-r--r--arch/x86/kernel/umip.c3
-rw-r--r--arch/x86/kernel/unwind_orc.c32
-rw-r--r--arch/x86/kernel/vmlinux.lds.S1
-rw-r--r--arch/x86/kvm/cpuid.c11
-rw-r--r--arch/x86/kvm/emulate.c26
-rw-r--r--arch/x86/kvm/hyperv.c9
-rw-r--r--arch/x86/kvm/hyperv.h8
-rw-r--r--arch/x86/kvm/ioapic.c3
-rw-r--r--arch/x86/kvm/kvm_emulate.h2
-rw-r--r--arch/x86/kvm/lapic.c13
-rw-r--r--arch/x86/kvm/mmu/mmu.c31
-rw-r--r--arch/x86/kvm/mmu/spte.h2
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c2
-rw-r--r--arch/x86/kvm/pmu.c2
-rw-r--r--arch/x86/kvm/svm/avic.c26
-rw-r--r--arch/x86/kvm/svm/hyperv.h9
-rw-r--r--arch/x86/kvm/svm/nested.c611
-rw-r--r--arch/x86/kvm/svm/sev.c400
-rw-r--r--arch/x86/kvm/svm/svm.c677
-rw-r--r--arch/x86/kvm/svm/svm.h129
-rw-r--r--arch/x86/kvm/svm/vmenter.S10
-rw-r--r--arch/x86/kvm/vmx/capabilities.h2
-rw-r--r--arch/x86/kvm/vmx/main.c19
-rw-r--r--arch/x86/kvm/vmx/nested.c111
-rw-r--r--arch/x86/kvm/vmx/nested.h1
-rw-r--r--arch/x86/kvm/vmx/tdx.c226
-rw-r--r--arch/x86/kvm/vmx/tdx.h8
-rw-r--r--arch/x86/kvm/vmx/tdx_arch.h6
-rw-r--r--arch/x86/kvm/vmx/vmcs.h11
-rw-r--r--arch/x86/kvm/vmx/vmcs_shadow_fields.h5
-rw-r--r--arch/x86/kvm/vmx/vmenter.S2
-rw-r--r--arch/x86/kvm/vmx/vmx.c168
-rw-r--r--arch/x86/kvm/vmx/vmx.h3
-rw-r--r--arch/x86/kvm/vmx/vmx_ops.h5
-rw-r--r--arch/x86/kvm/x86.c469
-rw-r--r--arch/x86/kvm/x86.h42
-rw-r--r--arch/x86/lib/copy_user_uncached_64.S6
-rw-r--r--arch/x86/lib/usercopy_32.c9
-rw-r--r--arch/x86/lib/usercopy_64.c12
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/numa.c8
-rw-r--r--arch/x86/mm/pat/set_memory.c37
-rw-r--r--arch/x86/mm/pgtable.c28
-rw-r--r--arch/x86/mm/pkeys.c3
-rw-r--r--arch/x86/mm/srat.c2
-rw-r--r--arch/x86/mm/tlb.c21
-rw-r--r--arch/x86/pci/i386.c5
-rw-r--r--arch/x86/platform/efi/efi.c2
-rw-r--r--arch/x86/platform/efi/efi_64.c35
-rw-r--r--arch/x86/platform/efi/quirks.c55
-rw-r--r--arch/x86/platform/geode/geode-common.c24
-rw-r--r--arch/x86/platform/pvh/enlighten.c7
-rw-r--r--arch/x86/power/hibernate_64.c2
-rw-r--r--arch/x86/tools/vdso2c.c1
-rw-r--r--arch/x86/um/asm/vm-flags.h4
-rw-r--r--arch/x86/virt/Makefile2
-rw-r--r--arch/x86/virt/hw.c360
-rw-r--r--arch/x86/virt/svm/sev.c163
-rw-r--r--arch/x86/virt/vmx/tdx/tdx.c320
-rw-r--r--arch/x86/virt/vmx/tdx/tdx.h8
-rw-r--r--arch/x86/virt/vmx/tdx/tdx_global_metadata.c32
-rw-r--r--arch/x86/xen/enlighten_hvm.c5
-rw-r--r--arch/x86/xen/enlighten_pv.c8
-rw-r--r--arch/x86/xen/mmu_pv.c9
205 files changed, 4372 insertions, 6419 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e2df1b147184..99bb5217649a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -69,7 +69,6 @@ config X86
select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
select ARCH_ENABLE_HUGEPAGE_MIGRATION if X86_64 && HUGETLB_PAGE && MIGRATION
select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64
- select ARCH_ENABLE_MEMORY_HOTREMOVE if MEMORY_HOTPLUG
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if (PGTABLE_LEVELS > 2) && (X86_64 || X86_PAE)
select ARCH_ENABLE_THP_MIGRATION if X86_64 && TRANSPARENT_HUGEPAGE
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
@@ -141,6 +140,7 @@ config X86
select ARCH_USE_SYM_ANNOTATIONS
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
select ARCH_WANT_DEFAULT_BPF_JIT if X86_64
+ select ARCH_WANTS_CLOCKSOURCE_READ_INLINE if X86_64
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select ARCH_WANTS_NO_INSTR
select ARCH_WANT_GENERAL_HUGETLB
@@ -163,6 +163,7 @@ config X86
select EDAC_SUPPORT
select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
select GENERIC_CLOCKEVENTS_BROADCAST_IDLE if GENERIC_CLOCKEVENTS_BROADCAST
+ select GENERIC_CLOCKEVENTS_COUPLED_INLINE if X86_64
select GENERIC_CLOCKEVENTS_MIN_ADJUST
select GENERIC_CMOS_UPDATE
select GENERIC_CPU_AUTOPROBE
@@ -446,11 +447,6 @@ config SMP
uniprocessor machines. On a uniprocessor machine, the kernel
will run faster if you say N here.
- Note that if you say Y here and choose architecture "586" or
- "Pentium" under "Processor family", the kernel will not work on 486
- architectures. Similarly, multiprocessor kernels for the "PPro"
- architecture may not work on all Pentium based boards.
-
People using multiprocessor machines who say Y here should also say
Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
Management" code will be disabled if you say Y here.
@@ -557,7 +553,7 @@ config X86_FRED
bool "Flexible Return and Event Delivery"
depends on X86_64
help
- When enabled, try to use Flexible Return and Event Delivery
+ When enabled, use Flexible Return and Event Delivery
instead of the legacy SYSCALL/SYSENTER/IDT architecture for
ring transitions and exception/interrupt handling if the
system supports it.
@@ -2771,11 +2767,6 @@ menuconfig APM
manpage ("man 8 hdparm") for that), and it doesn't turn off
VESA-compliant "green" monitors.
- This driver does not support the TI 4000M TravelMate and the ACER
- 486/DX4/75 because they don't have compliant BIOSes. Many "green"
- desktop machines also don't have compliant BIOSes, and this driver
- may cause those machines to panic during the boot phase.
-
Generally, if you don't have a battery in your machine, there isn't
much point in using this driver and you should say N. If you get
random kernel OOPSes or reboots that don't seem to be related to
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index f928cf6e3252..d7ba9219cb47 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -8,19 +8,18 @@ choice
This is the processor type of your CPU. This information is
used for optimizing purposes. In order to compile a kernel
that can run on all supported x86 CPU types (albeit not
- optimally fast), you can specify "486" here.
+ optimally fast), you can specify "586" here.
- Note that the 386 is no longer supported, this includes
+ Note that the 386 and 486 is no longer supported, this includes
AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI 486DLC/DLC2,
- UMC 486SX-S and the NexGen Nx586.
+ UMC 486SX-S and the NexGen Nx586, AMD ELAN and all 486 based
+ CPUs.
The kernel will not necessarily run on earlier architectures than
the one you have chosen, e.g. a Pentium optimized kernel will run on
a PPro, but not necessarily on a i486.
Here are the settings recommended for greatest speed:
- - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or
- SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S.
- "586" for generic Pentium CPUs lacking the TSC
(time stamp counter) register.
- "Pentium-Classic" for the Intel Pentium.
@@ -46,20 +45,6 @@ choice
See each option's help text for additional details. If you don't know
what to do, choose "Pentium-Pro".
-config M486SX
- bool "486SX"
- depends on X86_32
- help
- Select this for an 486-class CPU without an FPU such as
- AMD/Cyrix/IBM/Intel SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5S.
-
-config M486
- bool "486DX"
- depends on X86_32
- help
- Select this for an 486-class CPU such as AMD/Cyrix/IBM/Intel
- 486DX/DX2/DX4 and UMC U5D.
-
config M586
bool "586/K5/5x86/6x86/6x86MX"
depends on X86_32
@@ -188,14 +173,6 @@ config MWINCHIP3D
stores for this CPU, which can increase performance of some
operations.
-config MELAN
- bool "AMD Elan"
- depends on X86_32
- help
- Select this for an AMD Elan processor.
-
- Do not use this option for K6/Athlon/Opteron processors!
-
config MGEODEGX1
bool "GeodeGX1"
depends on X86_32
@@ -292,12 +269,12 @@ config X86_L1_CACHE_SHIFT
int
default "7" if MPENTIUM4
default "6" if MK7 || MPENTIUMM || MATOM || MVIAC7 || X86_GENERIC || X86_64
- default "4" if MELAN || M486SX || M486 || MGEODEGX1
+ default "4" if MGEODEGX1
default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
config X86_F00F_BUG
def_bool y
- depends on M586MMX || M586TSC || M586 || M486SX || M486
+ depends on M586MMX || M586TSC || M586
config X86_INVD_BUG
def_bool y
@@ -305,7 +282,7 @@ config X86_INVD_BUG
config X86_ALIGNMENT_16
def_bool y
- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486SX || M486 || MVIAC3_2 || MGEODEGX1
+ depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK6 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODEGX1
config X86_INTEL_USERCOPY
def_bool y
@@ -337,12 +314,11 @@ config X86_MINIMUM_CPU_FAMILY
int
default "64" if X86_64
default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MK7)
- default "5" if X86_32 && X86_CX8
- default "4"
+ default "5"
config X86_DEBUGCTLMSR
def_bool y
- depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486SX || M486) && !UML
+ depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586) && !UML
config IA32_FEAT_CTL
def_bool y
@@ -378,7 +354,7 @@ config CPU_SUP_INTEL
config CPU_SUP_CYRIX_32
default y
bool "Support Cyrix processors" if PROCESSOR_SELECT
- depends on M486SX || M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT)
+ depends on M586 || M586TSC || M586MMX || (EXPERT && !64BIT)
help
This enables detection, tunings and quirks for Cyrix processors
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 5f881460a8b5..46fec0b08487 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -48,7 +48,7 @@ endif
# How to compile the 16-bit code. Note we always compile for -march=i386;
# that way we can complain to the user if the CPU is insufficient.
-REALMODE_CFLAGS := -std=gnu11 -fms-extensions -m16 -g -Os \
+REALMODE_CFLAGS := $(CC_FLAGS_DIALECT) -m16 -g -Os \
-DDISABLE_BRANCH_PROFILING -D__DISABLE_EXPORTS \
-Wall -Wstrict-prototypes -march=i386 -mregparm=3 \
-fno-strict-aliasing -fomit-frame-pointer -fno-pic \
@@ -59,10 +59,6 @@ REALMODE_CFLAGS += -fno-stack-protector
REALMODE_CFLAGS += -Wno-address-of-packed-member
REALMODE_CFLAGS += $(cc_stack_align4)
REALMODE_CFLAGS += $(CLANG_FLAGS)
-ifdef CONFIG_CC_IS_CLANG
-REALMODE_CFLAGS += -Wno-gnu
-REALMODE_CFLAGS += -Wno-microsoft-anon-tag
-endif
export REALMODE_CFLAGS
# BITS is used as extension for files which are available in a 32 bit
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index a3dda95e47f4..7c9898c15376 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -10,8 +10,6 @@ else
align := -falign-functions=0 -falign-jumps=0 -falign-loops=0
endif
-cflags-$(CONFIG_M486SX) += -march=i486
-cflags-$(CONFIG_M486) += -march=i486
cflags-$(CONFIG_M586) += -march=i586
cflags-$(CONFIG_M586TSC) += -march=i586
cflags-$(CONFIG_M586MMX) += -march=pentium-mmx
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 68f9d7a1683b..07e0e64b9a98 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -25,7 +25,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
# avoid errors with '-march=i386', and future flags may depend on the target to
# be valid.
KBUILD_CFLAGS := -m$(BITS) -O2 $(CLANG_FLAGS)
-KBUILD_CFLAGS += -std=gnu11 -fms-extensions
+KBUILD_CFLAGS += $(CC_FLAGS_DIALECT)
KBUILD_CFLAGS += -fno-strict-aliasing -fPIE
KBUILD_CFLAGS += -Wundef
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
@@ -36,10 +36,6 @@ KBUILD_CFLAGS += -mno-mmx -mno-sse
KBUILD_CFLAGS += -ffreestanding -fshort-wchar
KBUILD_CFLAGS += -fno-stack-protector
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
-ifdef CONFIG_CC_IS_CLANG
-KBUILD_CFLAGS += -Wno-gnu
-KBUILD_CFLAGS += -Wno-microsoft-anon-tag
-endif
KBUILD_CFLAGS += -Wno-pointer-sign
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
KBUILD_CFLAGS += -D__DISABLE_EXPORTS
@@ -113,6 +109,7 @@ vmlinux-objs-$(CONFIG_EFI_SBAT) += $(obj)/sbat.o
ifdef CONFIG_EFI_SBAT
$(obj)/sbat.o: $(CONFIG_EFI_SBAT_FILE)
+AFLAGS_sbat.o += -I $(srctree)
endif
$(obj)/vmlinux: $(vmlinux-objs-y) $(vmlinux-libs-y) FORCE
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index c8c1464b3a56..c6512f2ea31e 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -28,17 +28,17 @@
#include "sev.h"
static struct ghcb boot_ghcb_page __aligned(PAGE_SIZE);
-struct ghcb *boot_ghcb;
+struct ghcb *boot_ghcb __section(".data");
#undef __init
#define __init
#define __BOOT_COMPRESSED
-u8 snp_vmpl;
-u16 ghcb_version;
+u8 snp_vmpl __section(".data");
+u16 ghcb_version __section(".data");
-u64 boot_svsm_caa_pa;
+u64 boot_svsm_caa_pa __section(".data");
/* Include code for early handlers */
#include "../../boot/startup/sev-shared.c"
@@ -188,6 +188,7 @@ bool sev_es_check_ghcb_fault(unsigned long address)
MSR_AMD64_SNP_RESERVED_BIT13 | \
MSR_AMD64_SNP_RESERVED_BIT15 | \
MSR_AMD64_SNP_SECURE_AVIC | \
+ MSR_AMD64_SNP_RESERVED_BITS19_22 | \
MSR_AMD64_SNP_RESERVED_MASK)
#ifdef CONFIG_AMD_SECURE_AVIC
@@ -197,11 +198,11 @@ bool sev_es_check_ghcb_fault(unsigned long address)
#endif
/*
- * SNP_FEATURES_PRESENT is the mask of SNP features that are implemented
+ * SNP_FEATURES_IMPL is the mask of SNP features that are implemented
* by the guest kernel. As and when a new feature is implemented in the
* guest kernel, a corresponding bit should be added to the mask.
*/
-#define SNP_FEATURES_PRESENT (MSR_AMD64_SNP_DEBUG_SWAP | \
+#define SNP_FEATURES_IMPL (MSR_AMD64_SNP_DEBUG_SWAP | \
MSR_AMD64_SNP_SECURE_TSC | \
SNP_FEATURE_SECURE_AVIC)
@@ -210,7 +211,7 @@ u64 snp_get_unsupported_features(u64 status)
if (!(status & MSR_AMD64_SEV_SNP_ENABLED))
return 0;
- return status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT;
+ return status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_IMPL;
}
void snp_check_features(void)
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index 587ce3e7c504..e0b152715d9c 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -88,7 +88,7 @@ SECTIONS
/DISCARD/ : {
*(.dynamic) *(.dynsym) *(.dynstr) *(.dynbss)
*(.hash) *(.gnu.hash)
- *(.note.*)
+ *(.note.*) *(.modinfo)
}
.got.plt (INFO) : {
diff --git a/arch/x86/boot/startup/sev-shared.c b/arch/x86/boot/startup/sev-shared.c
index a0fa8bb2b945..d9ac3a929d33 100644
--- a/arch/x86/boot/startup/sev-shared.c
+++ b/arch/x86/boot/startup/sev-shared.c
@@ -31,7 +31,7 @@ static u32 cpuid_std_range_max __ro_after_init;
static u32 cpuid_hyp_range_max __ro_after_init;
static u32 cpuid_ext_range_max __ro_after_init;
-bool sev_snp_needs_sfw;
+bool sev_snp_needs_sfw __section(".data");
void __noreturn
sev_es_terminate(unsigned int set, unsigned int reason)
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
index 907981b94c40..7ed3da998489 100644
--- a/arch/x86/coco/sev/core.c
+++ b/arch/x86/coco/sev/core.c
@@ -89,6 +89,7 @@ static const char * const sev_status_feat_names[] = {
[MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt",
[MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt",
[MSR_AMD64_SNP_SECURE_AVIC_BIT] = "SecureAVIC",
+ [MSR_AMD64_SNP_IBPB_ON_ENTRY_BIT] = "IBPBOnEntry",
};
/*
diff --git a/arch/x86/coco/sev/noinstr.c b/arch/x86/coco/sev/noinstr.c
index 9d94aca4a698..5afd663a1c21 100644
--- a/arch/x86/coco/sev/noinstr.c
+++ b/arch/x86/coco/sev/noinstr.c
@@ -121,6 +121,9 @@ noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state)
WARN_ON(!irqs_disabled());
+ if (!sev_cfg.ghcbs_initialized)
+ return boot_ghcb;
+
data = this_cpu_read(runtime_data);
ghcb = &data->ghcb_page;
@@ -164,6 +167,9 @@ noinstr void __sev_put_ghcb(struct ghcb_state *state)
WARN_ON(!irqs_disabled());
+ if (!sev_cfg.ghcbs_initialized)
+ return;
+
data = this_cpu_read(runtime_data);
ghcb = &data->ghcb_page;
diff --git a/arch/x86/coco/tdx/debug.c b/arch/x86/coco/tdx/debug.c
index cef847c8bb67..8e477db4ce0a 100644
--- a/arch/x86/coco/tdx/debug.c
+++ b/arch/x86/coco/tdx/debug.c
@@ -7,21 +7,21 @@
#include <linux/printk.h>
#include <asm/tdx.h>
-#define DEF_TDX_ATTR_NAME(_name) [TDX_ATTR_##_name##_BIT] = __stringify(_name)
+#define DEF_TDX_TD_ATTR_NAME(_name) [TDX_TD_ATTR_##_name##_BIT] = __stringify(_name)
static __initdata const char *tdx_attributes[] = {
- DEF_TDX_ATTR_NAME(DEBUG),
- DEF_TDX_ATTR_NAME(HGS_PLUS_PROF),
- DEF_TDX_ATTR_NAME(PERF_PROF),
- DEF_TDX_ATTR_NAME(PMT_PROF),
- DEF_TDX_ATTR_NAME(ICSSD),
- DEF_TDX_ATTR_NAME(LASS),
- DEF_TDX_ATTR_NAME(SEPT_VE_DISABLE),
- DEF_TDX_ATTR_NAME(MIGRTABLE),
- DEF_TDX_ATTR_NAME(PKS),
- DEF_TDX_ATTR_NAME(KL),
- DEF_TDX_ATTR_NAME(TPA),
- DEF_TDX_ATTR_NAME(PERFMON),
+ DEF_TDX_TD_ATTR_NAME(DEBUG),
+ DEF_TDX_TD_ATTR_NAME(HGS_PLUS_PROF),
+ DEF_TDX_TD_ATTR_NAME(PERF_PROF),
+ DEF_TDX_TD_ATTR_NAME(PMT_PROF),
+ DEF_TDX_TD_ATTR_NAME(ICSSD),
+ DEF_TDX_TD_ATTR_NAME(LASS),
+ DEF_TDX_TD_ATTR_NAME(SEPT_VE_DISABLE),
+ DEF_TDX_TD_ATTR_NAME(MIGRATABLE),
+ DEF_TDX_TD_ATTR_NAME(PKS),
+ DEF_TDX_TD_ATTR_NAME(KL),
+ DEF_TDX_TD_ATTR_NAME(TPA),
+ DEF_TDX_TD_ATTR_NAME(PERFMON),
};
#define DEF_TD_CTLS_NAME(_name) [TD_CTLS_##_name##_BIT] = __stringify(_name)
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 7b2833705d47..186915a17c50 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -238,14 +238,14 @@ static void __noreturn tdx_panic(const char *msg)
*
* TDX 1.0 does not allow the guest to disable SEPT #VE on its own. The VMM
* controls if the guest will receive such #VE with TD attribute
- * TDX_ATTR_SEPT_VE_DISABLE.
+ * TDX_TD_ATTR_SEPT_VE_DISABLE.
*
* Newer TDX modules allow the guest to control if it wants to receive SEPT
* violation #VEs.
*
* Check if the feature is available and disable SEPT #VE if possible.
*
- * If the TD is allowed to disable/enable SEPT #VEs, the TDX_ATTR_SEPT_VE_DISABLE
+ * If the TD is allowed to disable/enable SEPT #VEs, the TDX_TD_ATTR_SEPT_VE_DISABLE
* attribute is no longer reliable. It reflects the initial state of the
* control for the TD, but it will not be updated if someone (e.g. bootloader)
* changes it before the kernel starts. Kernel must check TDCS_TD_CTLS bit to
@@ -254,14 +254,14 @@ static void __noreturn tdx_panic(const char *msg)
static void disable_sept_ve(u64 td_attr)
{
const char *msg = "TD misconfiguration: SEPT #VE has to be disabled";
- bool debug = td_attr & TDX_ATTR_DEBUG;
+ bool debug = td_attr & TDX_TD_ATTR_DEBUG;
u64 config, controls;
/* Is this TD allowed to disable SEPT #VE */
tdg_vm_rd(TDCS_CONFIG_FLAGS, &config);
if (!(config & TDCS_CONFIG_FLEXIBLE_PENDING_VE)) {
/* No SEPT #VE controls for the guest: check the attribute */
- if (td_attr & TDX_ATTR_SEPT_VE_DISABLE)
+ if (td_attr & TDX_TD_ATTR_SEPT_VE_DISABLE)
return;
/* Relax SEPT_VE_DISABLE check for debug TD for backtraces */
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 7d7310cdf8b0..269f7d808be4 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -230,6 +230,7 @@ CONFIG_EEEPC_LAPTOP=y
CONFIG_AMD_IOMMU=y
CONFIG_INTEL_IOMMU=y
# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
+CONFIG_IRQ_REMAP=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig
index 7fb2319a0916..f65d7b83702f 100644
--- a/arch/x86/crypto/Kconfig
+++ b/arch/x86/crypto/Kconfig
@@ -99,20 +99,6 @@ config CRYPTO_CAST6_AVX_X86_64
Processes eight blocks in parallel.
-config CRYPTO_DES3_EDE_X86_64
- tristate "Ciphers: Triple DES EDE with modes: ECB, CBC"
- depends on 64BIT
- select CRYPTO_SKCIPHER
- select CRYPTO_LIB_DES
- imply CRYPTO_CTR
- help
- Block cipher: Triple DES EDE (FIPS 46-3) cipher algorithm
- Length-preserving ciphers: Triple DES EDE with ECB and CBC modes
-
- Architecture: x86_64
-
- Processes one or three blocks in parallel.
-
config CRYPTO_SERPENT_SSE2_X86_64
tristate "Ciphers: Serpent with modes: ECB, CBC (SSE2)"
depends on 64BIT
@@ -331,27 +317,4 @@ config CRYPTO_AEGIS128_AESNI_SSE2
- AES-NI (AES New Instructions)
- SSE4.1 (Streaming SIMD Extensions 4.1)
-config CRYPTO_SM3_AVX_X86_64
- tristate "Hash functions: SM3 (AVX)"
- depends on 64BIT
- select CRYPTO_HASH
- select CRYPTO_LIB_SM3
- help
- SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3
-
- Architecture: x86_64 using:
- - AVX (Advanced Vector Extensions)
-
- If unsure, say N.
-
-config CRYPTO_GHASH_CLMUL_NI_INTEL
- tristate "Hash functions: GHASH (CLMUL-NI)"
- depends on 64BIT
- select CRYPTO_CRYPTD
- help
- GCM GHASH hash function (NIST SP800-38D)
-
- Architecture: x86_64 using:
- - CLMUL-NI (carry-less multiplication new instructions)
-
endmenu
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index b21ad0978c52..e04ff8718d6b 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -20,9 +20,6 @@ serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o serpent_avx_glue.o
obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
-obj-$(CONFIG_CRYPTO_DES3_EDE_X86_64) += des3_ede-x86_64.o
-des3_ede-x86_64-y := des3_ede-asm_64.o des3_ede_glue.o
-
obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += camellia-aesni-avx-x86_64.o
@@ -50,12 +47,6 @@ aesni-intel-$(CONFIG_64BIT) += aes-ctr-avx-x86_64.o \
aes-gcm-vaes-avx512.o \
aes-xts-avx-x86_64.o
-obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
-ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
-
-obj-$(CONFIG_CRYPTO_SM3_AVX_X86_64) += sm3-avx-x86_64.o
-sm3-avx-x86_64-y := sm3-avx-asm_64.o sm3_avx_glue.o
-
obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o
sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index e6c38d1d8a92..f522fff9231e 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -25,6 +25,7 @@
#include <crypto/aes.h>
#include <crypto/b128ops.h>
#include <crypto/gcm.h>
+#include <crypto/gf128mul.h>
#include <crypto/xts.h>
#include <asm/cpu_device_id.h>
#include <asm/simd.h>
diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S
deleted file mode 100644
index cf21b998e77c..000000000000
--- a/arch/x86/crypto/des3_ede-asm_64.S
+++ /dev/null
@@ -1,831 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * des3_ede-asm_64.S - x86-64 assembly implementation of 3DES cipher
- *
- * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- */
-
-#include <linux/linkage.h>
-
-.file "des3_ede-asm_64.S"
-.text
-
-#define s1 .L_s1
-#define s2 ((s1) + (64*8))
-#define s3 ((s2) + (64*8))
-#define s4 ((s3) + (64*8))
-#define s5 ((s4) + (64*8))
-#define s6 ((s5) + (64*8))
-#define s7 ((s6) + (64*8))
-#define s8 ((s7) + (64*8))
-
-/* register macros */
-#define CTX %rdi
-
-#define RL0 %r8
-#define RL1 %r9
-#define RL2 %r10
-
-#define RL0d %r8d
-#define RL1d %r9d
-#define RL2d %r10d
-
-#define RR0 %r11
-#define RR1 %r12
-#define RR2 %r13
-
-#define RR0d %r11d
-#define RR1d %r12d
-#define RR2d %r13d
-
-#define RW0 %rax
-#define RW1 %rbx
-#define RW2 %rcx
-
-#define RW0d %eax
-#define RW1d %ebx
-#define RW2d %ecx
-
-#define RW0bl %al
-#define RW1bl %bl
-#define RW2bl %cl
-
-#define RW0bh %ah
-#define RW1bh %bh
-#define RW2bh %ch
-
-#define RT0 %r15
-#define RT1 %rsi
-#define RT2 %r14
-#define RT3 %rdx
-
-#define RT0d %r15d
-#define RT1d %esi
-#define RT2d %r14d
-#define RT3d %edx
-
-/***********************************************************************
- * 1-way 3DES
- ***********************************************************************/
-#define do_permutation(a, b, offset, mask) \
- movl a, RT0d; \
- shrl $(offset), RT0d; \
- xorl b, RT0d; \
- andl $(mask), RT0d; \
- xorl RT0d, b; \
- shll $(offset), RT0d; \
- xorl RT0d, a;
-
-#define expand_to_64bits(val, mask) \
- movl val##d, RT0d; \
- rorl $4, RT0d; \
- shlq $32, RT0; \
- orq RT0, val; \
- andq mask, val;
-
-#define compress_to_64bits(val) \
- movq val, RT0; \
- shrq $32, RT0; \
- roll $4, RT0d; \
- orl RT0d, val##d;
-
-#define initial_permutation(left, right) \
- do_permutation(left##d, right##d, 4, 0x0f0f0f0f); \
- do_permutation(left##d, right##d, 16, 0x0000ffff); \
- do_permutation(right##d, left##d, 2, 0x33333333); \
- do_permutation(right##d, left##d, 8, 0x00ff00ff); \
- movabs $0x3f3f3f3f3f3f3f3f, RT3; \
- movl left##d, RW0d; \
- roll $1, right##d; \
- xorl right##d, RW0d; \
- andl $0xaaaaaaaa, RW0d; \
- xorl RW0d, left##d; \
- xorl RW0d, right##d; \
- roll $1, left##d; \
- expand_to_64bits(right, RT3); \
- expand_to_64bits(left, RT3);
-
-#define final_permutation(left, right) \
- compress_to_64bits(right); \
- compress_to_64bits(left); \
- movl right##d, RW0d; \
- rorl $1, left##d; \
- xorl left##d, RW0d; \
- andl $0xaaaaaaaa, RW0d; \
- xorl RW0d, right##d; \
- xorl RW0d, left##d; \
- rorl $1, right##d; \
- do_permutation(right##d, left##d, 8, 0x00ff00ff); \
- do_permutation(right##d, left##d, 2, 0x33333333); \
- do_permutation(left##d, right##d, 16, 0x0000ffff); \
- do_permutation(left##d, right##d, 4, 0x0f0f0f0f);
-
-#define round1(n, from, to, load_next_key) \
- xorq from, RW0; \
- \
- movzbl RW0bl, RT0d; \
- movzbl RW0bh, RT1d; \
- shrq $16, RW0; \
- movzbl RW0bl, RT2d; \
- movzbl RW0bh, RT3d; \
- shrq $16, RW0; \
- leaq s8(%rip), RW1; \
- movq (RW1, RT0, 8), RT0; \
- leaq s6(%rip), RW1; \
- xorq (RW1, RT1, 8), to; \
- movzbl RW0bl, RL1d; \
- movzbl RW0bh, RT1d; \
- shrl $16, RW0d; \
- leaq s4(%rip), RW1; \
- xorq (RW1, RT2, 8), RT0; \
- leaq s2(%rip), RW1; \
- xorq (RW1, RT3, 8), to; \
- movzbl RW0bl, RT2d; \
- movzbl RW0bh, RT3d; \
- leaq s7(%rip), RW1; \
- xorq (RW1, RL1, 8), RT0; \
- leaq s5(%rip), RW1; \
- xorq (RW1, RT1, 8), to; \
- leaq s3(%rip), RW1; \
- xorq (RW1, RT2, 8), RT0; \
- load_next_key(n, RW0); \
- xorq RT0, to; \
- leaq s1(%rip), RW1; \
- xorq (RW1, RT3, 8), to; \
-
-#define load_next_key(n, RWx) \
- movq (((n) + 1) * 8)(CTX), RWx;
-
-#define dummy2(a, b) /*_*/
-
-#define read_block(io, left, right) \
- movl (io), left##d; \
- movl 4(io), right##d; \
- bswapl left##d; \
- bswapl right##d;
-
-#define write_block(io, left, right) \
- bswapl left##d; \
- bswapl right##d; \
- movl left##d, (io); \
- movl right##d, 4(io);
-
-SYM_FUNC_START(des3_ede_x86_64_crypt_blk)
- /* input:
- * %rdi: round keys, CTX
- * %rsi: dst
- * %rdx: src
- */
- pushq %rbx;
- pushq %r12;
- pushq %r13;
- pushq %r14;
- pushq %r15;
-
- pushq %rsi; /* dst */
-
- read_block(%rdx, RL0, RR0);
- initial_permutation(RL0, RR0);
-
- movq (CTX), RW0;
-
- round1(0, RR0, RL0, load_next_key);
- round1(1, RL0, RR0, load_next_key);
- round1(2, RR0, RL0, load_next_key);
- round1(3, RL0, RR0, load_next_key);
- round1(4, RR0, RL0, load_next_key);
- round1(5, RL0, RR0, load_next_key);
- round1(6, RR0, RL0, load_next_key);
- round1(7, RL0, RR0, load_next_key);
- round1(8, RR0, RL0, load_next_key);
- round1(9, RL0, RR0, load_next_key);
- round1(10, RR0, RL0, load_next_key);
- round1(11, RL0, RR0, load_next_key);
- round1(12, RR0, RL0, load_next_key);
- round1(13, RL0, RR0, load_next_key);
- round1(14, RR0, RL0, load_next_key);
- round1(15, RL0, RR0, load_next_key);
-
- round1(16+0, RL0, RR0, load_next_key);
- round1(16+1, RR0, RL0, load_next_key);
- round1(16+2, RL0, RR0, load_next_key);
- round1(16+3, RR0, RL0, load_next_key);
- round1(16+4, RL0, RR0, load_next_key);
- round1(16+5, RR0, RL0, load_next_key);
- round1(16+6, RL0, RR0, load_next_key);
- round1(16+7, RR0, RL0, load_next_key);
- round1(16+8, RL0, RR0, load_next_key);
- round1(16+9, RR0, RL0, load_next_key);
- round1(16+10, RL0, RR0, load_next_key);
- round1(16+11, RR0, RL0, load_next_key);
- round1(16+12, RL0, RR0, load_next_key);
- round1(16+13, RR0, RL0, load_next_key);
- round1(16+14, RL0, RR0, load_next_key);
- round1(16+15, RR0, RL0, load_next_key);
-
- round1(32+0, RR0, RL0, load_next_key);
- round1(32+1, RL0, RR0, load_next_key);
- round1(32+2, RR0, RL0, load_next_key);
- round1(32+3, RL0, RR0, load_next_key);
- round1(32+4, RR0, RL0, load_next_key);
- round1(32+5, RL0, RR0, load_next_key);
- round1(32+6, RR0, RL0, load_next_key);
- round1(32+7, RL0, RR0, load_next_key);
- round1(32+8, RR0, RL0, load_next_key);
- round1(32+9, RL0, RR0, load_next_key);
- round1(32+10, RR0, RL0, load_next_key);
- round1(32+11, RL0, RR0, load_next_key);
- round1(32+12, RR0, RL0, load_next_key);
- round1(32+13, RL0, RR0, load_next_key);
- round1(32+14, RR0, RL0, load_next_key);
- round1(32+15, RL0, RR0, dummy2);
-
- final_permutation(RR0, RL0);
-
- popq %rsi /* dst */
- write_block(%rsi, RR0, RL0);
-
- popq %r15;
- popq %r14;
- popq %r13;
- popq %r12;
- popq %rbx;
-
- RET;
-SYM_FUNC_END(des3_ede_x86_64_crypt_blk)
-
-/***********************************************************************
- * 3-way 3DES
- ***********************************************************************/
-#define expand_to_64bits(val, mask) \
- movl val##d, RT0d; \
- rorl $4, RT0d; \
- shlq $32, RT0; \
- orq RT0, val; \
- andq mask, val;
-
-#define compress_to_64bits(val) \
- movq val, RT0; \
- shrq $32, RT0; \
- roll $4, RT0d; \
- orl RT0d, val##d;
-
-#define initial_permutation3(left, right) \
- do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
- do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
- do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
- do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
- do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); \
- do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
- \
- do_permutation(right##0d, left##0d, 2, 0x33333333); \
- do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
- do_permutation(right##1d, left##1d, 2, 0x33333333); \
- do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
- do_permutation(right##2d, left##2d, 2, 0x33333333); \
- do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
- \
- movabs $0x3f3f3f3f3f3f3f3f, RT3; \
- \
- movl left##0d, RW0d; \
- roll $1, right##0d; \
- xorl right##0d, RW0d; \
- andl $0xaaaaaaaa, RW0d; \
- xorl RW0d, left##0d; \
- xorl RW0d, right##0d; \
- roll $1, left##0d; \
- expand_to_64bits(right##0, RT3); \
- expand_to_64bits(left##0, RT3); \
- movl left##1d, RW1d; \
- roll $1, right##1d; \
- xorl right##1d, RW1d; \
- andl $0xaaaaaaaa, RW1d; \
- xorl RW1d, left##1d; \
- xorl RW1d, right##1d; \
- roll $1, left##1d; \
- expand_to_64bits(right##1, RT3); \
- expand_to_64bits(left##1, RT3); \
- movl left##2d, RW2d; \
- roll $1, right##2d; \
- xorl right##2d, RW2d; \
- andl $0xaaaaaaaa, RW2d; \
- xorl RW2d, left##2d; \
- xorl RW2d, right##2d; \
- roll $1, left##2d; \
- expand_to_64bits(right##2, RT3); \
- expand_to_64bits(left##2, RT3);
-
-#define final_permutation3(left, right) \
- compress_to_64bits(right##0); \
- compress_to_64bits(left##0); \
- movl right##0d, RW0d; \
- rorl $1, left##0d; \
- xorl left##0d, RW0d; \
- andl $0xaaaaaaaa, RW0d; \
- xorl RW0d, right##0d; \
- xorl RW0d, left##0d; \
- rorl $1, right##0d; \
- compress_to_64bits(right##1); \
- compress_to_64bits(left##1); \
- movl right##1d, RW1d; \
- rorl $1, left##1d; \
- xorl left##1d, RW1d; \
- andl $0xaaaaaaaa, RW1d; \
- xorl RW1d, right##1d; \
- xorl RW1d, left##1d; \
- rorl $1, right##1d; \
- compress_to_64bits(right##2); \
- compress_to_64bits(left##2); \
- movl right##2d, RW2d; \
- rorl $1, left##2d; \
- xorl left##2d, RW2d; \
- andl $0xaaaaaaaa, RW2d; \
- xorl RW2d, right##2d; \
- xorl RW2d, left##2d; \
- rorl $1, right##2d; \
- \
- do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
- do_permutation(right##0d, left##0d, 2, 0x33333333); \
- do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
- do_permutation(right##1d, left##1d, 2, 0x33333333); \
- do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
- do_permutation(right##2d, left##2d, 2, 0x33333333); \
- \
- do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
- do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
- do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
- do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
- do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
- do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f);
-
-#define round3(n, from, to, load_next_key, do_movq) \
- xorq from##0, RW0; \
- movzbl RW0bl, RT3d; \
- movzbl RW0bh, RT1d; \
- shrq $16, RW0; \
- leaq s8(%rip), RT2; \
- xorq (RT2, RT3, 8), to##0; \
- leaq s6(%rip), RT2; \
- xorq (RT2, RT1, 8), to##0; \
- movzbl RW0bl, RT3d; \
- movzbl RW0bh, RT1d; \
- shrq $16, RW0; \
- leaq s4(%rip), RT2; \
- xorq (RT2, RT3, 8), to##0; \
- leaq s2(%rip), RT2; \
- xorq (RT2, RT1, 8), to##0; \
- movzbl RW0bl, RT3d; \
- movzbl RW0bh, RT1d; \
- shrl $16, RW0d; \
- leaq s7(%rip), RT2; \
- xorq (RT2, RT3, 8), to##0; \
- leaq s5(%rip), RT2; \
- xorq (RT2, RT1, 8), to##0; \
- movzbl RW0bl, RT3d; \
- movzbl RW0bh, RT1d; \
- load_next_key(n, RW0); \
- leaq s3(%rip), RT2; \
- xorq (RT2, RT3, 8), to##0; \
- leaq s1(%rip), RT2; \
- xorq (RT2, RT1, 8), to##0; \
- xorq from##1, RW1; \
- movzbl RW1bl, RT3d; \
- movzbl RW1bh, RT1d; \
- shrq $16, RW1; \
- leaq s8(%rip), RT2; \
- xorq (RT2, RT3, 8), to##1; \
- leaq s6(%rip), RT2; \
- xorq (RT2, RT1, 8), to##1; \
- movzbl RW1bl, RT3d; \
- movzbl RW1bh, RT1d; \
- shrq $16, RW1; \
- leaq s4(%rip), RT2; \
- xorq (RT2, RT3, 8), to##1; \
- leaq s2(%rip), RT2; \
- xorq (RT2, RT1, 8), to##1; \
- movzbl RW1bl, RT3d; \
- movzbl RW1bh, RT1d; \
- shrl $16, RW1d; \
- leaq s7(%rip), RT2; \
- xorq (RT2, RT3, 8), to##1; \
- leaq s5(%rip), RT2; \
- xorq (RT2, RT1, 8), to##1; \
- movzbl RW1bl, RT3d; \
- movzbl RW1bh, RT1d; \
- do_movq(RW0, RW1); \
- leaq s3(%rip), RT2; \
- xorq (RT2, RT3, 8), to##1; \
- leaq s1(%rip), RT2; \
- xorq (RT2, RT1, 8), to##1; \
- xorq from##2, RW2; \
- movzbl RW2bl, RT3d; \
- movzbl RW2bh, RT1d; \
- shrq $16, RW2; \
- leaq s8(%rip), RT2; \
- xorq (RT2, RT3, 8), to##2; \
- leaq s6(%rip), RT2; \
- xorq (RT2, RT1, 8), to##2; \
- movzbl RW2bl, RT3d; \
- movzbl RW2bh, RT1d; \
- shrq $16, RW2; \
- leaq s4(%rip), RT2; \
- xorq (RT2, RT3, 8), to##2; \
- leaq s2(%rip), RT2; \
- xorq (RT2, RT1, 8), to##2; \
- movzbl RW2bl, RT3d; \
- movzbl RW2bh, RT1d; \
- shrl $16, RW2d; \
- leaq s7(%rip), RT2; \
- xorq (RT2, RT3, 8), to##2; \
- leaq s5(%rip), RT2; \
- xorq (RT2, RT1, 8), to##2; \
- movzbl RW2bl, RT3d; \
- movzbl RW2bh, RT1d; \
- do_movq(RW0, RW2); \
- leaq s3(%rip), RT2; \
- xorq (RT2, RT3, 8), to##2; \
- leaq s1(%rip), RT2; \
- xorq (RT2, RT1, 8), to##2;
-
-#define __movq(src, dst) \
- movq src, dst;
-
-SYM_FUNC_START(des3_ede_x86_64_crypt_blk_3way)
- /* input:
- * %rdi: ctx, round keys
- * %rsi: dst (3 blocks)
- * %rdx: src (3 blocks)
- */
-
- pushq %rbx;
- pushq %r12;
- pushq %r13;
- pushq %r14;
- pushq %r15;
-
- pushq %rsi /* dst */
-
- /* load input */
- movl 0 * 4(%rdx), RL0d;
- movl 1 * 4(%rdx), RR0d;
- movl 2 * 4(%rdx), RL1d;
- movl 3 * 4(%rdx), RR1d;
- movl 4 * 4(%rdx), RL2d;
- movl 5 * 4(%rdx), RR2d;
-
- bswapl RL0d;
- bswapl RR0d;
- bswapl RL1d;
- bswapl RR1d;
- bswapl RL2d;
- bswapl RR2d;
-
- initial_permutation3(RL, RR);
-
- movq 0(CTX), RW0;
- movq RW0, RW1;
- movq RW0, RW2;
-
- round3(0, RR, RL, load_next_key, __movq);
- round3(1, RL, RR, load_next_key, __movq);
- round3(2, RR, RL, load_next_key, __movq);
- round3(3, RL, RR, load_next_key, __movq);
- round3(4, RR, RL, load_next_key, __movq);
- round3(5, RL, RR, load_next_key, __movq);
- round3(6, RR, RL, load_next_key, __movq);
- round3(7, RL, RR, load_next_key, __movq);
- round3(8, RR, RL, load_next_key, __movq);
- round3(9, RL, RR, load_next_key, __movq);
- round3(10, RR, RL, load_next_key, __movq);
- round3(11, RL, RR, load_next_key, __movq);
- round3(12, RR, RL, load_next_key, __movq);
- round3(13, RL, RR, load_next_key, __movq);
- round3(14, RR, RL, load_next_key, __movq);
- round3(15, RL, RR, load_next_key, __movq);
-
- round3(16+0, RL, RR, load_next_key, __movq);
- round3(16+1, RR, RL, load_next_key, __movq);
- round3(16+2, RL, RR, load_next_key, __movq);
- round3(16+3, RR, RL, load_next_key, __movq);
- round3(16+4, RL, RR, load_next_key, __movq);
- round3(16+5, RR, RL, load_next_key, __movq);
- round3(16+6, RL, RR, load_next_key, __movq);
- round3(16+7, RR, RL, load_next_key, __movq);
- round3(16+8, RL, RR, load_next_key, __movq);
- round3(16+9, RR, RL, load_next_key, __movq);
- round3(16+10, RL, RR, load_next_key, __movq);
- round3(16+11, RR, RL, load_next_key, __movq);
- round3(16+12, RL, RR, load_next_key, __movq);
- round3(16+13, RR, RL, load_next_key, __movq);
- round3(16+14, RL, RR, load_next_key, __movq);
- round3(16+15, RR, RL, load_next_key, __movq);
-
- round3(32+0, RR, RL, load_next_key, __movq);
- round3(32+1, RL, RR, load_next_key, __movq);
- round3(32+2, RR, RL, load_next_key, __movq);
- round3(32+3, RL, RR, load_next_key, __movq);
- round3(32+4, RR, RL, load_next_key, __movq);
- round3(32+5, RL, RR, load_next_key, __movq);
- round3(32+6, RR, RL, load_next_key, __movq);
- round3(32+7, RL, RR, load_next_key, __movq);
- round3(32+8, RR, RL, load_next_key, __movq);
- round3(32+9, RL, RR, load_next_key, __movq);
- round3(32+10, RR, RL, load_next_key, __movq);
- round3(32+11, RL, RR, load_next_key, __movq);
- round3(32+12, RR, RL, load_next_key, __movq);
- round3(32+13, RL, RR, load_next_key, __movq);
- round3(32+14, RR, RL, load_next_key, __movq);
- round3(32+15, RL, RR, dummy2, dummy2);
-
- final_permutation3(RR, RL);
-
- bswapl RR0d;
- bswapl RL0d;
- bswapl RR1d;
- bswapl RL1d;
- bswapl RR2d;
- bswapl RL2d;
-
- popq %rsi /* dst */
- movl RR0d, 0 * 4(%rsi);
- movl RL0d, 1 * 4(%rsi);
- movl RR1d, 2 * 4(%rsi);
- movl RL1d, 3 * 4(%rsi);
- movl RR2d, 4 * 4(%rsi);
- movl RL2d, 5 * 4(%rsi);
-
- popq %r15;
- popq %r14;
- popq %r13;
- popq %r12;
- popq %rbx;
-
- RET;
-SYM_FUNC_END(des3_ede_x86_64_crypt_blk_3way)
-
-.section .rodata, "a", @progbits
-.align 16
-.L_s1:
- .quad 0x0010100001010400, 0x0000000000000000
- .quad 0x0000100000010000, 0x0010100001010404
- .quad 0x0010100001010004, 0x0000100000010404
- .quad 0x0000000000000004, 0x0000100000010000
- .quad 0x0000000000000400, 0x0010100001010400
- .quad 0x0010100001010404, 0x0000000000000400
- .quad 0x0010000001000404, 0x0010100001010004
- .quad 0x0010000001000000, 0x0000000000000004
- .quad 0x0000000000000404, 0x0010000001000400
- .quad 0x0010000001000400, 0x0000100000010400
- .quad 0x0000100000010400, 0x0010100001010000
- .quad 0x0010100001010000, 0x0010000001000404
- .quad 0x0000100000010004, 0x0010000001000004
- .quad 0x0010000001000004, 0x0000100000010004
- .quad 0x0000000000000000, 0x0000000000000404
- .quad 0x0000100000010404, 0x0010000001000000
- .quad 0x0000100000010000, 0x0010100001010404
- .quad 0x0000000000000004, 0x0010100001010000
- .quad 0x0010100001010400, 0x0010000001000000
- .quad 0x0010000001000000, 0x0000000000000400
- .quad 0x0010100001010004, 0x0000100000010000
- .quad 0x0000100000010400, 0x0010000001000004
- .quad 0x0000000000000400, 0x0000000000000004
- .quad 0x0010000001000404, 0x0000100000010404
- .quad 0x0010100001010404, 0x0000100000010004
- .quad 0x0010100001010000, 0x0010000001000404
- .quad 0x0010000001000004, 0x0000000000000404
- .quad 0x0000100000010404, 0x0010100001010400
- .quad 0x0000000000000404, 0x0010000001000400
- .quad 0x0010000001000400, 0x0000000000000000
- .quad 0x0000100000010004, 0x0000100000010400
- .quad 0x0000000000000000, 0x0010100001010004
-.L_s2:
- .quad 0x0801080200100020, 0x0800080000000000
- .quad 0x0000080000000000, 0x0001080200100020
- .quad 0x0001000000100000, 0x0000000200000020
- .quad 0x0801000200100020, 0x0800080200000020
- .quad 0x0800000200000020, 0x0801080200100020
- .quad 0x0801080000100000, 0x0800000000000000
- .quad 0x0800080000000000, 0x0001000000100000
- .quad 0x0000000200000020, 0x0801000200100020
- .quad 0x0001080000100000, 0x0001000200100020
- .quad 0x0800080200000020, 0x0000000000000000
- .quad 0x0800000000000000, 0x0000080000000000
- .quad 0x0001080200100020, 0x0801000000100000
- .quad 0x0001000200100020, 0x0800000200000020
- .quad 0x0000000000000000, 0x0001080000100000
- .quad 0x0000080200000020, 0x0801080000100000
- .quad 0x0801000000100000, 0x0000080200000020
- .quad 0x0000000000000000, 0x0001080200100020
- .quad 0x0801000200100020, 0x0001000000100000
- .quad 0x0800080200000020, 0x0801000000100000
- .quad 0x0801080000100000, 0x0000080000000000
- .quad 0x0801000000100000, 0x0800080000000000
- .quad 0x0000000200000020, 0x0801080200100020
- .quad 0x0001080200100020, 0x0000000200000020
- .quad 0x0000080000000000, 0x0800000000000000
- .quad 0x0000080200000020, 0x0801080000100000
- .quad 0x0001000000100000, 0x0800000200000020
- .quad 0x0001000200100020, 0x0800080200000020
- .quad 0x0800000200000020, 0x0001000200100020
- .quad 0x0001080000100000, 0x0000000000000000
- .quad 0x0800080000000000, 0x0000080200000020
- .quad 0x0800000000000000, 0x0801000200100020
- .quad 0x0801080200100020, 0x0001080000100000
-.L_s3:
- .quad 0x0000002000000208, 0x0000202008020200
- .quad 0x0000000000000000, 0x0000200008020008
- .quad 0x0000002008000200, 0x0000000000000000
- .quad 0x0000202000020208, 0x0000002008000200
- .quad 0x0000200000020008, 0x0000000008000008
- .quad 0x0000000008000008, 0x0000200000020000
- .quad 0x0000202008020208, 0x0000200000020008
- .quad 0x0000200008020000, 0x0000002000000208
- .quad 0x0000000008000000, 0x0000000000000008
- .quad 0x0000202008020200, 0x0000002000000200
- .quad 0x0000202000020200, 0x0000200008020000
- .quad 0x0000200008020008, 0x0000202000020208
- .quad 0x0000002008000208, 0x0000202000020200
- .quad 0x0000200000020000, 0x0000002008000208
- .quad 0x0000000000000008, 0x0000202008020208
- .quad 0x0000002000000200, 0x0000000008000000
- .quad 0x0000202008020200, 0x0000000008000000
- .quad 0x0000200000020008, 0x0000002000000208
- .quad 0x0000200000020000, 0x0000202008020200
- .quad 0x0000002008000200, 0x0000000000000000
- .quad 0x0000002000000200, 0x0000200000020008
- .quad 0x0000202008020208, 0x0000002008000200
- .quad 0x0000000008000008, 0x0000002000000200
- .quad 0x0000000000000000, 0x0000200008020008
- .quad 0x0000002008000208, 0x0000200000020000
- .quad 0x0000000008000000, 0x0000202008020208
- .quad 0x0000000000000008, 0x0000202000020208
- .quad 0x0000202000020200, 0x0000000008000008
- .quad 0x0000200008020000, 0x0000002008000208
- .quad 0x0000002000000208, 0x0000200008020000
- .quad 0x0000202000020208, 0x0000000000000008
- .quad 0x0000200008020008, 0x0000202000020200
-.L_s4:
- .quad 0x1008020000002001, 0x1000020800002001
- .quad 0x1000020800002001, 0x0000000800000000
- .quad 0x0008020800002000, 0x1008000800000001
- .quad 0x1008000000000001, 0x1000020000002001
- .quad 0x0000000000000000, 0x0008020000002000
- .quad 0x0008020000002000, 0x1008020800002001
- .quad 0x1000000800000001, 0x0000000000000000
- .quad 0x0008000800000000, 0x1008000000000001
- .quad 0x1000000000000001, 0x0000020000002000
- .quad 0x0008000000000000, 0x1008020000002001
- .quad 0x0000000800000000, 0x0008000000000000
- .quad 0x1000020000002001, 0x0000020800002000
- .quad 0x1008000800000001, 0x1000000000000001
- .quad 0x0000020800002000, 0x0008000800000000
- .quad 0x0000020000002000, 0x0008020800002000
- .quad 0x1008020800002001, 0x1000000800000001
- .quad 0x0008000800000000, 0x1008000000000001
- .quad 0x0008020000002000, 0x1008020800002001
- .quad 0x1000000800000001, 0x0000000000000000
- .quad 0x0000000000000000, 0x0008020000002000
- .quad 0x0000020800002000, 0x0008000800000000
- .quad 0x1008000800000001, 0x1000000000000001
- .quad 0x1008020000002001, 0x1000020800002001
- .quad 0x1000020800002001, 0x0000000800000000
- .quad 0x1008020800002001, 0x1000000800000001
- .quad 0x1000000000000001, 0x0000020000002000
- .quad 0x1008000000000001, 0x1000020000002001
- .quad 0x0008020800002000, 0x1008000800000001
- .quad 0x1000020000002001, 0x0000020800002000
- .quad 0x0008000000000000, 0x1008020000002001
- .quad 0x0000000800000000, 0x0008000000000000
- .quad 0x0000020000002000, 0x0008020800002000
-.L_s5:
- .quad 0x0000001000000100, 0x0020001002080100
- .quad 0x0020000002080000, 0x0420001002000100
- .quad 0x0000000000080000, 0x0000001000000100
- .quad 0x0400000000000000, 0x0020000002080000
- .quad 0x0400001000080100, 0x0000000000080000
- .quad 0x0020001002000100, 0x0400001000080100
- .quad 0x0420001002000100, 0x0420000002080000
- .quad 0x0000001000080100, 0x0400000000000000
- .quad 0x0020000002000000, 0x0400000000080000
- .quad 0x0400000000080000, 0x0000000000000000
- .quad 0x0400001000000100, 0x0420001002080100
- .quad 0x0420001002080100, 0x0020001002000100
- .quad 0x0420000002080000, 0x0400001000000100
- .quad 0x0000000000000000, 0x0420000002000000
- .quad 0x0020001002080100, 0x0020000002000000
- .quad 0x0420000002000000, 0x0000001000080100
- .quad 0x0000000000080000, 0x0420001002000100
- .quad 0x0000001000000100, 0x0020000002000000
- .quad 0x0400000000000000, 0x0020000002080000
- .quad 0x0420001002000100, 0x0400001000080100
- .quad 0x0020001002000100, 0x0400000000000000
- .quad 0x0420000002080000, 0x0020001002080100
- .quad 0x0400001000080100, 0x0000001000000100
- .quad 0x0020000002000000, 0x0420000002080000
- .quad 0x0420001002080100, 0x0000001000080100
- .quad 0x0420000002000000, 0x0420001002080100
- .quad 0x0020000002080000, 0x0000000000000000
- .quad 0x0400000000080000, 0x0420000002000000
- .quad 0x0000001000080100, 0x0020001002000100
- .quad 0x0400001000000100, 0x0000000000080000
- .quad 0x0000000000000000, 0x0400000000080000
- .quad 0x0020001002080100, 0x0400001000000100
-.L_s6:
- .quad 0x0200000120000010, 0x0204000020000000
- .quad 0x0000040000000000, 0x0204040120000010
- .quad 0x0204000020000000, 0x0000000100000010
- .quad 0x0204040120000010, 0x0004000000000000
- .quad 0x0200040020000000, 0x0004040100000010
- .quad 0x0004000000000000, 0x0200000120000010
- .quad 0x0004000100000010, 0x0200040020000000
- .quad 0x0200000020000000, 0x0000040100000010
- .quad 0x0000000000000000, 0x0004000100000010
- .quad 0x0200040120000010, 0x0000040000000000
- .quad 0x0004040000000000, 0x0200040120000010
- .quad 0x0000000100000010, 0x0204000120000010
- .quad 0x0204000120000010, 0x0000000000000000
- .quad 0x0004040100000010, 0x0204040020000000
- .quad 0x0000040100000010, 0x0004040000000000
- .quad 0x0204040020000000, 0x0200000020000000
- .quad 0x0200040020000000, 0x0000000100000010
- .quad 0x0204000120000010, 0x0004040000000000
- .quad 0x0204040120000010, 0x0004000000000000
- .quad 0x0000040100000010, 0x0200000120000010
- .quad 0x0004000000000000, 0x0200040020000000
- .quad 0x0200000020000000, 0x0000040100000010
- .quad 0x0200000120000010, 0x0204040120000010
- .quad 0x0004040000000000, 0x0204000020000000
- .quad 0x0004040100000010, 0x0204040020000000
- .quad 0x0000000000000000, 0x0204000120000010
- .quad 0x0000000100000010, 0x0000040000000000
- .quad 0x0204000020000000, 0x0004040100000010
- .quad 0x0000040000000000, 0x0004000100000010
- .quad 0x0200040120000010, 0x0000000000000000
- .quad 0x0204040020000000, 0x0200000020000000
- .quad 0x0004000100000010, 0x0200040120000010
-.L_s7:
- .quad 0x0002000000200000, 0x2002000004200002
- .quad 0x2000000004000802, 0x0000000000000000
- .quad 0x0000000000000800, 0x2000000004000802
- .quad 0x2002000000200802, 0x0002000004200800
- .quad 0x2002000004200802, 0x0002000000200000
- .quad 0x0000000000000000, 0x2000000004000002
- .quad 0x2000000000000002, 0x0000000004000000
- .quad 0x2002000004200002, 0x2000000000000802
- .quad 0x0000000004000800, 0x2002000000200802
- .quad 0x2002000000200002, 0x0000000004000800
- .quad 0x2000000004000002, 0x0002000004200000
- .quad 0x0002000004200800, 0x2002000000200002
- .quad 0x0002000004200000, 0x0000000000000800
- .quad 0x2000000000000802, 0x2002000004200802
- .quad 0x0002000000200800, 0x2000000000000002
- .quad 0x0000000004000000, 0x0002000000200800
- .quad 0x0000000004000000, 0x0002000000200800
- .quad 0x0002000000200000, 0x2000000004000802
- .quad 0x2000000004000802, 0x2002000004200002
- .quad 0x2002000004200002, 0x2000000000000002
- .quad 0x2002000000200002, 0x0000000004000000
- .quad 0x0000000004000800, 0x0002000000200000
- .quad 0x0002000004200800, 0x2000000000000802
- .quad 0x2002000000200802, 0x0002000004200800
- .quad 0x2000000000000802, 0x2000000004000002
- .quad 0x2002000004200802, 0x0002000004200000
- .quad 0x0002000000200800, 0x0000000000000000
- .quad 0x2000000000000002, 0x2002000004200802
- .quad 0x0000000000000000, 0x2002000000200802
- .quad 0x0002000004200000, 0x0000000000000800
- .quad 0x2000000004000002, 0x0000000004000800
- .quad 0x0000000000000800, 0x2002000000200002
-.L_s8:
- .quad 0x0100010410001000, 0x0000010000001000
- .quad 0x0000000000040000, 0x0100010410041000
- .quad 0x0100000010000000, 0x0100010410001000
- .quad 0x0000000400000000, 0x0100000010000000
- .quad 0x0000000400040000, 0x0100000010040000
- .quad 0x0100010410041000, 0x0000010000041000
- .quad 0x0100010010041000, 0x0000010400041000
- .quad 0x0000010000001000, 0x0000000400000000
- .quad 0x0100000010040000, 0x0100000410000000
- .quad 0x0100010010001000, 0x0000010400001000
- .quad 0x0000010000041000, 0x0000000400040000
- .quad 0x0100000410040000, 0x0100010010041000
- .quad 0x0000010400001000, 0x0000000000000000
- .quad 0x0000000000000000, 0x0100000410040000
- .quad 0x0100000410000000, 0x0100010010001000
- .quad 0x0000010400041000, 0x0000000000040000
- .quad 0x0000010400041000, 0x0000000000040000
- .quad 0x0100010010041000, 0x0000010000001000
- .quad 0x0000000400000000, 0x0100000410040000
- .quad 0x0000010000001000, 0x0000010400041000
- .quad 0x0100010010001000, 0x0000000400000000
- .quad 0x0100000410000000, 0x0100000010040000
- .quad 0x0100000410040000, 0x0100000010000000
- .quad 0x0000000000040000, 0x0100010410001000
- .quad 0x0000000000000000, 0x0100010410041000
- .quad 0x0000000400040000, 0x0100000410000000
- .quad 0x0100000010040000, 0x0100010010001000
- .quad 0x0100010410001000, 0x0000000000000000
- .quad 0x0100010410041000, 0x0000010000041000
- .quad 0x0000010000041000, 0x0000010400001000
- .quad 0x0000010400001000, 0x0000000400040000
- .quad 0x0100000010000000, 0x0100010010041000
diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c
deleted file mode 100644
index 34600f90d8a6..000000000000
--- a/arch/x86/crypto/des3_ede_glue.c
+++ /dev/null
@@ -1,391 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Glue Code for assembler optimized version of 3DES
- *
- * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
- * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
- */
-
-#include <crypto/algapi.h>
-#include <crypto/des.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/types.h>
-
-struct des3_ede_x86_ctx {
- struct des3_ede_ctx enc;
- struct des3_ede_ctx dec;
-};
-
-/* regular block cipher functions */
-asmlinkage void des3_ede_x86_64_crypt_blk(const u32 *expkey, u8 *dst,
- const u8 *src);
-
-/* 3-way parallel cipher functions */
-asmlinkage void des3_ede_x86_64_crypt_blk_3way(const u32 *expkey, u8 *dst,
- const u8 *src);
-
-static inline void des3_ede_enc_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- u32 *enc_ctx = ctx->enc.expkey;
-
- des3_ede_x86_64_crypt_blk(enc_ctx, dst, src);
-}
-
-static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- u32 *dec_ctx = ctx->dec.expkey;
-
- des3_ede_x86_64_crypt_blk(dec_ctx, dst, src);
-}
-
-static inline void des3_ede_dec_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- u32 *dec_ctx = ctx->dec.expkey;
-
- des3_ede_x86_64_crypt_blk_3way(dec_ctx, dst, src);
-}
-
-static void des3_ede_x86_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- des3_ede_enc_blk(crypto_tfm_ctx(tfm), dst, src);
-}
-
-static void des3_ede_x86_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- des3_ede_dec_blk(crypto_tfm_ctx(tfm), dst, src);
-}
-
-static int ecb_crypt(struct skcipher_request *req, const u32 *expkey)
-{
- const unsigned int bsize = DES3_EDE_BLOCK_SIZE;
- struct skcipher_walk walk;
- unsigned int nbytes;
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- while ((nbytes = walk.nbytes)) {
- const u8 *wsrc = walk.src.virt.addr;
- u8 *wdst = walk.dst.virt.addr;
-
- /* Process four block batch */
- if (nbytes >= bsize * 3) {
- do {
- des3_ede_x86_64_crypt_blk_3way(expkey, wdst,
- wsrc);
-
- wsrc += bsize * 3;
- wdst += bsize * 3;
- nbytes -= bsize * 3;
- } while (nbytes >= bsize * 3);
-
- if (nbytes < bsize)
- goto done;
- }
-
- /* Handle leftovers */
- do {
- des3_ede_x86_64_crypt_blk(expkey, wdst, wsrc);
-
- wsrc += bsize;
- wdst += bsize;
- nbytes -= bsize;
- } while (nbytes >= bsize);
-
-done:
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- return err;
-}
-
-static int ecb_encrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return ecb_crypt(req, ctx->enc.expkey);
-}
-
-static int ecb_decrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return ecb_crypt(req, ctx->dec.expkey);
-}
-
-static unsigned int __cbc_encrypt(struct des3_ede_x86_ctx *ctx,
- struct skcipher_walk *walk)
-{
- unsigned int bsize = DES3_EDE_BLOCK_SIZE;
- unsigned int nbytes = walk->nbytes;
- u64 *src = (u64 *)walk->src.virt.addr;
- u64 *dst = (u64 *)walk->dst.virt.addr;
- u64 *iv = (u64 *)walk->iv;
-
- do {
- *dst = *src ^ *iv;
- des3_ede_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
- iv = dst;
-
- src += 1;
- dst += 1;
- nbytes -= bsize;
- } while (nbytes >= bsize);
-
- *(u64 *)walk->iv = *iv;
- return nbytes;
-}
-
-static int cbc_encrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- unsigned int nbytes;
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- while (walk.nbytes) {
- nbytes = __cbc_encrypt(ctx, &walk);
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- return err;
-}
-
-static unsigned int __cbc_decrypt(struct des3_ede_x86_ctx *ctx,
- struct skcipher_walk *walk)
-{
- unsigned int bsize = DES3_EDE_BLOCK_SIZE;
- unsigned int nbytes = walk->nbytes;
- u64 *src = (u64 *)walk->src.virt.addr;
- u64 *dst = (u64 *)walk->dst.virt.addr;
- u64 ivs[3 - 1];
- u64 last_iv;
-
- /* Start of the last block. */
- src += nbytes / bsize - 1;
- dst += nbytes / bsize - 1;
-
- last_iv = *src;
-
- /* Process four block batch */
- if (nbytes >= bsize * 3) {
- do {
- nbytes -= bsize * 3 - bsize;
- src -= 3 - 1;
- dst -= 3 - 1;
-
- ivs[0] = src[0];
- ivs[1] = src[1];
-
- des3_ede_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
-
- dst[1] ^= ivs[0];
- dst[2] ^= ivs[1];
-
- nbytes -= bsize;
- if (nbytes < bsize)
- goto done;
-
- *dst ^= *(src - 1);
- src -= 1;
- dst -= 1;
- } while (nbytes >= bsize * 3);
- }
-
- /* Handle leftovers */
- for (;;) {
- des3_ede_dec_blk(ctx, (u8 *)dst, (u8 *)src);
-
- nbytes -= bsize;
- if (nbytes < bsize)
- break;
-
- *dst ^= *(src - 1);
- src -= 1;
- dst -= 1;
- }
-
-done:
- *dst ^= *(u64 *)walk->iv;
- *(u64 *)walk->iv = last_iv;
-
- return nbytes;
-}
-
-static int cbc_decrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- unsigned int nbytes;
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- while (walk.nbytes) {
- nbytes = __cbc_decrypt(ctx, &walk);
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- return err;
-}
-
-static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct des3_ede_x86_ctx *ctx = crypto_tfm_ctx(tfm);
- u32 i, j, tmp;
- int err;
-
- err = des3_ede_expand_key(&ctx->enc, key, keylen);
- if (err == -ENOKEY) {
- if (crypto_tfm_get_flags(tfm) & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)
- err = -EINVAL;
- else
- err = 0;
- }
-
- if (err) {
- memset(ctx, 0, sizeof(*ctx));
- return err;
- }
-
- /* Fix encryption context for this implementation and form decryption
- * context. */
- j = DES3_EDE_EXPKEY_WORDS - 2;
- for (i = 0; i < DES3_EDE_EXPKEY_WORDS; i += 2, j -= 2) {
- tmp = ror32(ctx->enc.expkey[i + 1], 4);
- ctx->enc.expkey[i + 1] = tmp;
-
- ctx->dec.expkey[j + 0] = ctx->enc.expkey[i + 0];
- ctx->dec.expkey[j + 1] = tmp;
- }
-
- return 0;
-}
-
-static int des3_ede_x86_setkey_skcipher(struct crypto_skcipher *tfm,
- const u8 *key,
- unsigned int keylen)
-{
- return des3_ede_x86_setkey(&tfm->base, key, keylen);
-}
-
-static struct crypto_alg des3_ede_cipher = {
- .cra_name = "des3_ede",
- .cra_driver_name = "des3_ede-asm",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = DES3_EDE_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
- .cra_module = THIS_MODULE,
- .cra_u = {
- .cipher = {
- .cia_min_keysize = DES3_EDE_KEY_SIZE,
- .cia_max_keysize = DES3_EDE_KEY_SIZE,
- .cia_setkey = des3_ede_x86_setkey,
- .cia_encrypt = des3_ede_x86_encrypt,
- .cia_decrypt = des3_ede_x86_decrypt,
- }
- }
-};
-
-static struct skcipher_alg des3_ede_skciphers[] = {
- {
- .base.cra_name = "ecb(des3_ede)",
- .base.cra_driver_name = "ecb-des3_ede-asm",
- .base.cra_priority = 300,
- .base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
- .base.cra_module = THIS_MODULE,
- .min_keysize = DES3_EDE_KEY_SIZE,
- .max_keysize = DES3_EDE_KEY_SIZE,
- .setkey = des3_ede_x86_setkey_skcipher,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- }, {
- .base.cra_name = "cbc(des3_ede)",
- .base.cra_driver_name = "cbc-des3_ede-asm",
- .base.cra_priority = 300,
- .base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
- .base.cra_module = THIS_MODULE,
- .min_keysize = DES3_EDE_KEY_SIZE,
- .max_keysize = DES3_EDE_KEY_SIZE,
- .ivsize = DES3_EDE_BLOCK_SIZE,
- .setkey = des3_ede_x86_setkey_skcipher,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- }
-};
-
-static bool is_blacklisted_cpu(void)
-{
- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
- return false;
-
- if (boot_cpu_data.x86 == 0x0f) {
- /*
- * On Pentium 4, des3_ede-x86_64 is slower than generic C
- * implementation because use of 64bit rotates (which are really
- * slow on P4). Therefore blacklist P4s.
- */
- return true;
- }
-
- return false;
-}
-
-static int force;
-module_param(force, int, 0);
-MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
-
-static int __init des3_ede_x86_init(void)
-{
- int err;
-
- if (!force && is_blacklisted_cpu()) {
- pr_info("des3_ede-x86_64: performance on this CPU would be suboptimal: disabling des3_ede-x86_64.\n");
- return -ENODEV;
- }
-
- err = crypto_register_alg(&des3_ede_cipher);
- if (err)
- return err;
-
- err = crypto_register_skciphers(des3_ede_skciphers,
- ARRAY_SIZE(des3_ede_skciphers));
- if (err)
- crypto_unregister_alg(&des3_ede_cipher);
-
- return err;
-}
-
-static void __exit des3_ede_x86_fini(void)
-{
- crypto_unregister_alg(&des3_ede_cipher);
- crypto_unregister_skciphers(des3_ede_skciphers,
- ARRAY_SIZE(des3_ede_skciphers));
-}
-
-module_init(des3_ede_x86_init);
-module_exit(des3_ede_x86_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Triple DES EDE Cipher Algorithm, asm optimized");
-MODULE_ALIAS_CRYPTO("des3_ede");
-MODULE_ALIAS_CRYPTO("des3_ede-asm");
-MODULE_AUTHOR("Jussi Kivilinna <jussi.kivilinna@iki.fi>");
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
deleted file mode 100644
index c4fbaa82ed7a..000000000000
--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ /dev/null
@@ -1,133 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
- * instructions. This file contains accelerated part of ghash
- * implementation. More information about PCLMULQDQ can be found at:
- *
- * https://www.intel.com/content/dam/develop/external/us/en/documents/clmul-wp-rev-2-02-2014-04-20.pdf
- *
- * Copyright (c) 2009 Intel Corp.
- * Author: Huang Ying <ying.huang@intel.com>
- * Vinodh Gopal
- * Erdinc Ozturk
- * Deniz Karakoyunlu
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-
-.section .rodata.cst16.bswap_mask, "aM", @progbits, 16
-.align 16
-.Lbswap_mask:
- .octa 0x000102030405060708090a0b0c0d0e0f
-
-#define DATA %xmm0
-#define SHASH %xmm1
-#define T1 %xmm2
-#define T2 %xmm3
-#define T3 %xmm4
-#define BSWAP %xmm5
-#define IN1 %xmm6
-
-.text
-
-/*
- * __clmul_gf128mul_ble: internal ABI
- * input:
- * DATA: operand1
- * SHASH: operand2, hash_key << 1 mod poly
- * output:
- * DATA: operand1 * operand2 mod poly
- * changed:
- * T1
- * T2
- * T3
- */
-SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)
- movaps DATA, T1
- pshufd $0b01001110, DATA, T2
- pshufd $0b01001110, SHASH, T3
- pxor DATA, T2
- pxor SHASH, T3
-
- pclmulqdq $0x00, SHASH, DATA # DATA = a0 * b0
- pclmulqdq $0x11, SHASH, T1 # T1 = a1 * b1
- pclmulqdq $0x00, T3, T2 # T2 = (a1 + a0) * (b1 + b0)
- pxor DATA, T2
- pxor T1, T2 # T2 = a0 * b1 + a1 * b0
-
- movaps T2, T3
- pslldq $8, T3
- psrldq $8, T2
- pxor T3, DATA
- pxor T2, T1 # <T1:DATA> is result of
- # carry-less multiplication
-
- # first phase of the reduction
- movaps DATA, T3
- psllq $1, T3
- pxor DATA, T3
- psllq $5, T3
- pxor DATA, T3
- psllq $57, T3
- movaps T3, T2
- pslldq $8, T2
- psrldq $8, T3
- pxor T2, DATA
- pxor T3, T1
-
- # second phase of the reduction
- movaps DATA, T2
- psrlq $5, T2
- pxor DATA, T2
- psrlq $1, T2
- pxor DATA, T2
- psrlq $1, T2
- pxor T2, T1
- pxor T1, DATA
- RET
-SYM_FUNC_END(__clmul_gf128mul_ble)
-
-/* void clmul_ghash_mul(char *dst, const le128 *shash) */
-SYM_FUNC_START(clmul_ghash_mul)
- FRAME_BEGIN
- movups (%rdi), DATA
- movups (%rsi), SHASH
- movaps .Lbswap_mask(%rip), BSWAP
- pshufb BSWAP, DATA
- call __clmul_gf128mul_ble
- pshufb BSWAP, DATA
- movups DATA, (%rdi)
- FRAME_END
- RET
-SYM_FUNC_END(clmul_ghash_mul)
-
-/*
- * int clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
- * const le128 *shash);
- */
-SYM_FUNC_START(clmul_ghash_update)
- FRAME_BEGIN
- cmp $16, %rdx
- jb .Lupdate_just_ret # check length
- movaps .Lbswap_mask(%rip), BSWAP
- movups (%rdi), DATA
- movups (%rcx), SHASH
- pshufb BSWAP, DATA
-.align 4
-.Lupdate_loop:
- movups (%rsi), IN1
- pshufb BSWAP, IN1
- pxor IN1, DATA
- call __clmul_gf128mul_ble
- sub $16, %rdx
- add $16, %rsi
- cmp $16, %rdx
- jge .Lupdate_loop
- pshufb BSWAP, DATA
- movups DATA, (%rdi)
-.Lupdate_just_ret:
- mov %rdx, %rax
- FRAME_END
- RET
-SYM_FUNC_END(clmul_ghash_update)
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
deleted file mode 100644
index aea5d4d06be7..000000000000
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ /dev/null
@@ -1,163 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
- * instructions. This file contains glue code.
- *
- * Copyright (c) 2009 Intel Corp.
- * Author: Huang Ying <ying.huang@intel.com>
- */
-
-#include <asm/cpu_device_id.h>
-#include <asm/simd.h>
-#include <crypto/b128ops.h>
-#include <crypto/ghash.h>
-#include <crypto/internal/hash.h>
-#include <crypto/utils.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/unaligned.h>
-
-asmlinkage void clmul_ghash_mul(char *dst, const le128 *shash);
-
-asmlinkage int clmul_ghash_update(char *dst, const char *src,
- unsigned int srclen, const le128 *shash);
-
-struct x86_ghash_ctx {
- le128 shash;
-};
-
-static int ghash_init(struct shash_desc *desc)
-{
- struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
-
- memset(dctx, 0, sizeof(*dctx));
-
- return 0;
-}
-
-static int ghash_setkey(struct crypto_shash *tfm,
- const u8 *key, unsigned int keylen)
-{
- struct x86_ghash_ctx *ctx = crypto_shash_ctx(tfm);
- u64 a, b;
-
- if (keylen != GHASH_BLOCK_SIZE)
- return -EINVAL;
-
- /*
- * GHASH maps bits to polynomial coefficients backwards, which makes it
- * hard to implement. But it can be shown that the GHASH multiplication
- *
- * D * K (mod x^128 + x^7 + x^2 + x + 1)
- *
- * (where D is a data block and K is the key) is equivalent to:
- *
- * bitreflect(D) * bitreflect(K) * x^(-127)
- * (mod x^128 + x^127 + x^126 + x^121 + 1)
- *
- * So, the code below precomputes:
- *
- * bitreflect(K) * x^(-127) (mod x^128 + x^127 + x^126 + x^121 + 1)
- *
- * ... but in Montgomery form (so that Montgomery multiplication can be
- * used), i.e. with an extra x^128 factor, which means actually:
- *
- * bitreflect(K) * x (mod x^128 + x^127 + x^126 + x^121 + 1)
- *
- * The within-a-byte part of bitreflect() cancels out GHASH's built-in
- * reflection, and thus bitreflect() is actually a byteswap.
- */
- a = get_unaligned_be64(key);
- b = get_unaligned_be64(key + 8);
- ctx->shash.a = cpu_to_le64((a << 1) | (b >> 63));
- ctx->shash.b = cpu_to_le64((b << 1) | (a >> 63));
- if (a >> 63)
- ctx->shash.a ^= cpu_to_le64((u64)0xc2 << 56);
- return 0;
-}
-
-static int ghash_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen)
-{
- struct x86_ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
- struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- u8 *dst = dctx->buffer;
- int remain;
-
- kernel_fpu_begin();
- remain = clmul_ghash_update(dst, src, srclen, &ctx->shash);
- kernel_fpu_end();
- return remain;
-}
-
-static void ghash_flush(struct x86_ghash_ctx *ctx, struct ghash_desc_ctx *dctx,
- const u8 *src, unsigned int len)
-{
- u8 *dst = dctx->buffer;
-
- kernel_fpu_begin();
- if (len) {
- crypto_xor(dst, src, len);
- clmul_ghash_mul(dst, &ctx->shash);
- }
- kernel_fpu_end();
-}
-
-static int ghash_finup(struct shash_desc *desc, const u8 *src,
- unsigned int len, u8 *dst)
-{
- struct x86_ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
- struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- u8 *buf = dctx->buffer;
-
- ghash_flush(ctx, dctx, src, len);
- memcpy(dst, buf, GHASH_BLOCK_SIZE);
-
- return 0;
-}
-
-static struct shash_alg ghash_alg = {
- .digestsize = GHASH_DIGEST_SIZE,
- .init = ghash_init,
- .update = ghash_update,
- .finup = ghash_finup,
- .setkey = ghash_setkey,
- .descsize = sizeof(struct ghash_desc_ctx),
- .base = {
- .cra_name = "ghash",
- .cra_driver_name = "ghash-pclmulqdqni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
- .cra_blocksize = GHASH_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct x86_ghash_ctx),
- .cra_module = THIS_MODULE,
- },
-};
-
-static const struct x86_cpu_id pcmul_cpu_id[] = {
- X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), /* Pickle-Mickle-Duck */
- {}
-};
-MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
-
-static int __init ghash_pclmulqdqni_mod_init(void)
-{
- if (!x86_match_cpu(pcmul_cpu_id))
- return -ENODEV;
-
- return crypto_register_shash(&ghash_alg);
-}
-
-static void __exit ghash_pclmulqdqni_mod_exit(void)
-{
- crypto_unregister_shash(&ghash_alg);
-}
-
-module_init(ghash_pclmulqdqni_mod_init);
-module_exit(ghash_pclmulqdqni_mod_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("GHASH hash function, accelerated by PCLMULQDQ-NI");
-MODULE_ALIAS_CRYPTO("ghash");
diff --git a/arch/x86/crypto/sm3-avx-asm_64.S b/arch/x86/crypto/sm3-avx-asm_64.S
deleted file mode 100644
index 503bab450a91..000000000000
--- a/arch/x86/crypto/sm3-avx-asm_64.S
+++ /dev/null
@@ -1,517 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * SM3 AVX accelerated transform.
- * specified in: https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02
- *
- * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- * Copyright (C) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
- */
-
-/* Based on SM3 AES/BMI2 accelerated work by libgcrypt at:
- * https://gnupg.org/software/libgcrypt/index.html
- */
-
-#include <linux/linkage.h>
-#include <linux/cfi_types.h>
-#include <asm/frame.h>
-
-/* Context structure */
-
-#define state_h0 0
-#define state_h1 4
-#define state_h2 8
-#define state_h3 12
-#define state_h4 16
-#define state_h5 20
-#define state_h6 24
-#define state_h7 28
-
-/* Constants */
-
-/* Round constant macros */
-
-#define K0 2043430169 /* 0x79cc4519 */
-#define K1 -208106958 /* 0xf3988a32 */
-#define K2 -416213915 /* 0xe7311465 */
-#define K3 -832427829 /* 0xce6228cb */
-#define K4 -1664855657 /* 0x9cc45197 */
-#define K5 965255983 /* 0x3988a32f */
-#define K6 1930511966 /* 0x7311465e */
-#define K7 -433943364 /* 0xe6228cbc */
-#define K8 -867886727 /* 0xcc451979 */
-#define K9 -1735773453 /* 0x988a32f3 */
-#define K10 823420391 /* 0x311465e7 */
-#define K11 1646840782 /* 0x6228cbce */
-#define K12 -1001285732 /* 0xc451979c */
-#define K13 -2002571463 /* 0x88a32f39 */
-#define K14 289824371 /* 0x11465e73 */
-#define K15 579648742 /* 0x228cbce6 */
-#define K16 -1651869049 /* 0x9d8a7a87 */
-#define K17 991229199 /* 0x3b14f50f */
-#define K18 1982458398 /* 0x7629ea1e */
-#define K19 -330050500 /* 0xec53d43c */
-#define K20 -660100999 /* 0xd8a7a879 */
-#define K21 -1320201997 /* 0xb14f50f3 */
-#define K22 1654563303 /* 0x629ea1e7 */
-#define K23 -985840690 /* 0xc53d43ce */
-#define K24 -1971681379 /* 0x8a7a879d */
-#define K25 351604539 /* 0x14f50f3b */
-#define K26 703209078 /* 0x29ea1e76 */
-#define K27 1406418156 /* 0x53d43cec */
-#define K28 -1482130984 /* 0xa7a879d8 */
-#define K29 1330705329 /* 0x4f50f3b1 */
-#define K30 -1633556638 /* 0x9ea1e762 */
-#define K31 1027854021 /* 0x3d43cec5 */
-#define K32 2055708042 /* 0x7a879d8a */
-#define K33 -183551212 /* 0xf50f3b14 */
-#define K34 -367102423 /* 0xea1e7629 */
-#define K35 -734204845 /* 0xd43cec53 */
-#define K36 -1468409689 /* 0xa879d8a7 */
-#define K37 1358147919 /* 0x50f3b14f */
-#define K38 -1578671458 /* 0xa1e7629e */
-#define K39 1137624381 /* 0x43cec53d */
-#define K40 -2019718534 /* 0x879d8a7a */
-#define K41 255530229 /* 0x0f3b14f5 */
-#define K42 511060458 /* 0x1e7629ea */
-#define K43 1022120916 /* 0x3cec53d4 */
-#define K44 2044241832 /* 0x79d8a7a8 */
-#define K45 -206483632 /* 0xf3b14f50 */
-#define K46 -412967263 /* 0xe7629ea1 */
-#define K47 -825934525 /* 0xcec53d43 */
-#define K48 -1651869049 /* 0x9d8a7a87 */
-#define K49 991229199 /* 0x3b14f50f */
-#define K50 1982458398 /* 0x7629ea1e */
-#define K51 -330050500 /* 0xec53d43c */
-#define K52 -660100999 /* 0xd8a7a879 */
-#define K53 -1320201997 /* 0xb14f50f3 */
-#define K54 1654563303 /* 0x629ea1e7 */
-#define K55 -985840690 /* 0xc53d43ce */
-#define K56 -1971681379 /* 0x8a7a879d */
-#define K57 351604539 /* 0x14f50f3b */
-#define K58 703209078 /* 0x29ea1e76 */
-#define K59 1406418156 /* 0x53d43cec */
-#define K60 -1482130984 /* 0xa7a879d8 */
-#define K61 1330705329 /* 0x4f50f3b1 */
-#define K62 -1633556638 /* 0x9ea1e762 */
-#define K63 1027854021 /* 0x3d43cec5 */
-
-/* Register macros */
-
-#define RSTATE %rdi
-#define RDATA %rsi
-#define RNBLKS %rdx
-
-#define t0 %eax
-#define t1 %ebx
-#define t2 %ecx
-
-#define a %r8d
-#define b %r9d
-#define c %r10d
-#define d %r11d
-#define e %r12d
-#define f %r13d
-#define g %r14d
-#define h %r15d
-
-#define W0 %xmm0
-#define W1 %xmm1
-#define W2 %xmm2
-#define W3 %xmm3
-#define W4 %xmm4
-#define W5 %xmm5
-
-#define XTMP0 %xmm6
-#define XTMP1 %xmm7
-#define XTMP2 %xmm8
-#define XTMP3 %xmm9
-#define XTMP4 %xmm10
-#define XTMP5 %xmm11
-#define XTMP6 %xmm12
-
-#define BSWAP_REG %xmm15
-
-/* Stack structure */
-
-#define STACK_W_SIZE (32 * 2 * 3)
-#define STACK_REG_SAVE_SIZE (64)
-
-#define STACK_W (0)
-#define STACK_REG_SAVE (STACK_W + STACK_W_SIZE)
-#define STACK_SIZE (STACK_REG_SAVE + STACK_REG_SAVE_SIZE)
-
-/* Instruction helpers. */
-
-#define roll2(v, reg) \
- roll $(v), reg;
-
-#define roll3mov(v, src, dst) \
- movl src, dst; \
- roll $(v), dst;
-
-#define roll3(v, src, dst) \
- rorxl $(32-(v)), src, dst;
-
-#define addl2(a, out) \
- leal (a, out), out;
-
-/* Round function macros. */
-
-#define GG1(x, y, z, o, t) \
- movl x, o; \
- xorl y, o; \
- xorl z, o;
-
-#define FF1(x, y, z, o, t) GG1(x, y, z, o, t)
-
-#define GG2(x, y, z, o, t) \
- andnl z, x, o; \
- movl y, t; \
- andl x, t; \
- addl2(t, o);
-
-#define FF2(x, y, z, o, t) \
- movl y, o; \
- xorl x, o; \
- movl y, t; \
- andl x, t; \
- andl z, o; \
- xorl t, o;
-
-#define R(i, a, b, c, d, e, f, g, h, round, widx, wtype) \
- /* rol(a, 12) => t0 */ \
- roll3mov(12, a, t0); /* rorxl here would reduce perf by 6% on zen3 */ \
- /* rol (t0 + e + t), 7) => t1 */ \
- leal K##round(t0, e, 1), t1; \
- roll2(7, t1); \
- /* h + w1 => h */ \
- addl wtype##_W1_ADDR(round, widx), h; \
- /* h + t1 => h */ \
- addl2(t1, h); \
- /* t1 ^ t0 => t0 */ \
- xorl t1, t0; \
- /* w1w2 + d => d */ \
- addl wtype##_W1W2_ADDR(round, widx), d; \
- /* FF##i(a,b,c) => t1 */ \
- FF##i(a, b, c, t1, t2); \
- /* d + t1 => d */ \
- addl2(t1, d); \
- /* GG#i(e,f,g) => t2 */ \
- GG##i(e, f, g, t2, t1); \
- /* h + t2 => h */ \
- addl2(t2, h); \
- /* rol (f, 19) => f */ \
- roll2(19, f); \
- /* d + t0 => d */ \
- addl2(t0, d); \
- /* rol (b, 9) => b */ \
- roll2(9, b); \
- /* P0(h) => h */ \
- roll3(9, h, t2); \
- roll3(17, h, t1); \
- xorl t2, h; \
- xorl t1, h;
-
-#define R1(a, b, c, d, e, f, g, h, round, widx, wtype) \
- R(1, a, b, c, d, e, f, g, h, round, widx, wtype)
-
-#define R2(a, b, c, d, e, f, g, h, round, widx, wtype) \
- R(2, a, b, c, d, e, f, g, h, round, widx, wtype)
-
-/* Input expansion macros. */
-
-/* Byte-swapped input address. */
-#define IW_W_ADDR(round, widx, offs) \
- (STACK_W + ((round) / 4) * 64 + (offs) + ((widx) * 4))(%rsp)
-
-/* Expanded input address. */
-#define XW_W_ADDR(round, widx, offs) \
- (STACK_W + ((((round) / 3) - 4) % 2) * 64 + (offs) + ((widx) * 4))(%rsp)
-
-/* Rounds 1-12, byte-swapped input block addresses. */
-#define IW_W1_ADDR(round, widx) IW_W_ADDR(round, widx, 0)
-#define IW_W1W2_ADDR(round, widx) IW_W_ADDR(round, widx, 32)
-
-/* Rounds 1-12, expanded input block addresses. */
-#define XW_W1_ADDR(round, widx) XW_W_ADDR(round, widx, 0)
-#define XW_W1W2_ADDR(round, widx) XW_W_ADDR(round, widx, 32)
-
-/* Input block loading. */
-#define LOAD_W_XMM_1() \
- vmovdqu 0*16(RDATA), XTMP0; /* XTMP0: w3, w2, w1, w0 */ \
- vmovdqu 1*16(RDATA), XTMP1; /* XTMP1: w7, w6, w5, w4 */ \
- vmovdqu 2*16(RDATA), XTMP2; /* XTMP2: w11, w10, w9, w8 */ \
- vmovdqu 3*16(RDATA), XTMP3; /* XTMP3: w15, w14, w13, w12 */ \
- vpshufb BSWAP_REG, XTMP0, XTMP0; \
- vpshufb BSWAP_REG, XTMP1, XTMP1; \
- vpshufb BSWAP_REG, XTMP2, XTMP2; \
- vpshufb BSWAP_REG, XTMP3, XTMP3; \
- vpxor XTMP0, XTMP1, XTMP4; \
- vpxor XTMP1, XTMP2, XTMP5; \
- vpxor XTMP2, XTMP3, XTMP6; \
- leaq 64(RDATA), RDATA; \
- vmovdqa XTMP0, IW_W1_ADDR(0, 0); \
- vmovdqa XTMP4, IW_W1W2_ADDR(0, 0); \
- vmovdqa XTMP1, IW_W1_ADDR(4, 0); \
- vmovdqa XTMP5, IW_W1W2_ADDR(4, 0);
-
-#define LOAD_W_XMM_2() \
- vmovdqa XTMP2, IW_W1_ADDR(8, 0); \
- vmovdqa XTMP6, IW_W1W2_ADDR(8, 0);
-
-#define LOAD_W_XMM_3() \
- vpshufd $0b00000000, XTMP0, W0; /* W0: xx, w0, xx, xx */ \
- vpshufd $0b11111001, XTMP0, W1; /* W1: xx, w3, w2, w1 */ \
- vmovdqa XTMP1, W2; /* W2: xx, w6, w5, w4 */ \
- vpalignr $12, XTMP1, XTMP2, W3; /* W3: xx, w9, w8, w7 */ \
- vpalignr $8, XTMP2, XTMP3, W4; /* W4: xx, w12, w11, w10 */ \
- vpshufd $0b11111001, XTMP3, W5; /* W5: xx, w15, w14, w13 */
-
-/* Message scheduling. Note: 3 words per XMM register. */
-#define SCHED_W_0(round, w0, w1, w2, w3, w4, w5) \
- /* Load (w[i - 16]) => XTMP0 */ \
- vpshufd $0b10111111, w0, XTMP0; \
- vpalignr $12, XTMP0, w1, XTMP0; /* XTMP0: xx, w2, w1, w0 */ \
- /* Load (w[i - 13]) => XTMP1 */ \
- vpshufd $0b10111111, w1, XTMP1; \
- vpalignr $12, XTMP1, w2, XTMP1; \
- /* w[i - 9] == w3 */ \
- /* XMM3 ^ XTMP0 => XTMP0 */ \
- vpxor w3, XTMP0, XTMP0;
-
-#define SCHED_W_1(round, w0, w1, w2, w3, w4, w5) \
- /* w[i - 3] == w5 */ \
- /* rol(XMM5, 15) ^ XTMP0 => XTMP0 */ \
- vpslld $15, w5, XTMP2; \
- vpsrld $(32-15), w5, XTMP3; \
- vpxor XTMP2, XTMP3, XTMP3; \
- vpxor XTMP3, XTMP0, XTMP0; \
- /* rol(XTMP1, 7) => XTMP1 */ \
- vpslld $7, XTMP1, XTMP5; \
- vpsrld $(32-7), XTMP1, XTMP1; \
- vpxor XTMP5, XTMP1, XTMP1; \
- /* XMM4 ^ XTMP1 => XTMP1 */ \
- vpxor w4, XTMP1, XTMP1; \
- /* w[i - 6] == XMM4 */ \
- /* P1(XTMP0) ^ XTMP1 => XMM0 */ \
- vpslld $15, XTMP0, XTMP5; \
- vpsrld $(32-15), XTMP0, XTMP6; \
- vpslld $23, XTMP0, XTMP2; \
- vpsrld $(32-23), XTMP0, XTMP3; \
- vpxor XTMP0, XTMP1, XTMP1; \
- vpxor XTMP6, XTMP5, XTMP5; \
- vpxor XTMP3, XTMP2, XTMP2; \
- vpxor XTMP2, XTMP5, XTMP5; \
- vpxor XTMP5, XTMP1, w0;
-
-#define SCHED_W_2(round, w0, w1, w2, w3, w4, w5) \
- /* W1 in XMM12 */ \
- vpshufd $0b10111111, w4, XTMP4; \
- vpalignr $12, XTMP4, w5, XTMP4; \
- vmovdqa XTMP4, XW_W1_ADDR((round), 0); \
- /* W1 ^ W2 => XTMP1 */ \
- vpxor w0, XTMP4, XTMP1; \
- vmovdqa XTMP1, XW_W1W2_ADDR((round), 0);
-
-
-.section .rodata.cst16, "aM", @progbits, 16
-.align 16
-
-.Lbe32mask:
- .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
-
-.text
-
-/*
- * Transform nblocks*64 bytes (nblocks*16 32-bit words) at DATA.
- *
- * void sm3_transform_avx(struct sm3_state *state,
- * const u8 *data, int nblocks);
- */
-SYM_TYPED_FUNC_START(sm3_transform_avx)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: data (64*nblks bytes)
- * %rdx: nblocks
- */
- vzeroupper;
-
- pushq %rbp;
- movq %rsp, %rbp;
-
- movq %rdx, RNBLKS;
-
- subq $STACK_SIZE, %rsp;
- andq $(~63), %rsp;
-
- movq %rbx, (STACK_REG_SAVE + 0 * 8)(%rsp);
- movq %r15, (STACK_REG_SAVE + 1 * 8)(%rsp);
- movq %r14, (STACK_REG_SAVE + 2 * 8)(%rsp);
- movq %r13, (STACK_REG_SAVE + 3 * 8)(%rsp);
- movq %r12, (STACK_REG_SAVE + 4 * 8)(%rsp);
-
- vmovdqa .Lbe32mask (%rip), BSWAP_REG;
-
- /* Get the values of the chaining variables. */
- movl state_h0(RSTATE), a;
- movl state_h1(RSTATE), b;
- movl state_h2(RSTATE), c;
- movl state_h3(RSTATE), d;
- movl state_h4(RSTATE), e;
- movl state_h5(RSTATE), f;
- movl state_h6(RSTATE), g;
- movl state_h7(RSTATE), h;
-
-.align 16
-.Loop:
- /* Load data part1. */
- LOAD_W_XMM_1();
-
- leaq -1(RNBLKS), RNBLKS;
-
- /* Transform 0-3 + Load data part2. */
- R1(a, b, c, d, e, f, g, h, 0, 0, IW); LOAD_W_XMM_2();
- R1(d, a, b, c, h, e, f, g, 1, 1, IW);
- R1(c, d, a, b, g, h, e, f, 2, 2, IW);
- R1(b, c, d, a, f, g, h, e, 3, 3, IW); LOAD_W_XMM_3();
-
- /* Transform 4-7 + Precalc 12-14. */
- R1(a, b, c, d, e, f, g, h, 4, 0, IW);
- R1(d, a, b, c, h, e, f, g, 5, 1, IW);
- R1(c, d, a, b, g, h, e, f, 6, 2, IW); SCHED_W_0(12, W0, W1, W2, W3, W4, W5);
- R1(b, c, d, a, f, g, h, e, 7, 3, IW); SCHED_W_1(12, W0, W1, W2, W3, W4, W5);
-
- /* Transform 8-11 + Precalc 12-17. */
- R1(a, b, c, d, e, f, g, h, 8, 0, IW); SCHED_W_2(12, W0, W1, W2, W3, W4, W5);
- R1(d, a, b, c, h, e, f, g, 9, 1, IW); SCHED_W_0(15, W1, W2, W3, W4, W5, W0);
- R1(c, d, a, b, g, h, e, f, 10, 2, IW); SCHED_W_1(15, W1, W2, W3, W4, W5, W0);
- R1(b, c, d, a, f, g, h, e, 11, 3, IW); SCHED_W_2(15, W1, W2, W3, W4, W5, W0);
-
- /* Transform 12-14 + Precalc 18-20 */
- R1(a, b, c, d, e, f, g, h, 12, 0, XW); SCHED_W_0(18, W2, W3, W4, W5, W0, W1);
- R1(d, a, b, c, h, e, f, g, 13, 1, XW); SCHED_W_1(18, W2, W3, W4, W5, W0, W1);
- R1(c, d, a, b, g, h, e, f, 14, 2, XW); SCHED_W_2(18, W2, W3, W4, W5, W0, W1);
-
- /* Transform 15-17 + Precalc 21-23 */
- R1(b, c, d, a, f, g, h, e, 15, 0, XW); SCHED_W_0(21, W3, W4, W5, W0, W1, W2);
- R2(a, b, c, d, e, f, g, h, 16, 1, XW); SCHED_W_1(21, W3, W4, W5, W0, W1, W2);
- R2(d, a, b, c, h, e, f, g, 17, 2, XW); SCHED_W_2(21, W3, W4, W5, W0, W1, W2);
-
- /* Transform 18-20 + Precalc 24-26 */
- R2(c, d, a, b, g, h, e, f, 18, 0, XW); SCHED_W_0(24, W4, W5, W0, W1, W2, W3);
- R2(b, c, d, a, f, g, h, e, 19, 1, XW); SCHED_W_1(24, W4, W5, W0, W1, W2, W3);
- R2(a, b, c, d, e, f, g, h, 20, 2, XW); SCHED_W_2(24, W4, W5, W0, W1, W2, W3);
-
- /* Transform 21-23 + Precalc 27-29 */
- R2(d, a, b, c, h, e, f, g, 21, 0, XW); SCHED_W_0(27, W5, W0, W1, W2, W3, W4);
- R2(c, d, a, b, g, h, e, f, 22, 1, XW); SCHED_W_1(27, W5, W0, W1, W2, W3, W4);
- R2(b, c, d, a, f, g, h, e, 23, 2, XW); SCHED_W_2(27, W5, W0, W1, W2, W3, W4);
-
- /* Transform 24-26 + Precalc 30-32 */
- R2(a, b, c, d, e, f, g, h, 24, 0, XW); SCHED_W_0(30, W0, W1, W2, W3, W4, W5);
- R2(d, a, b, c, h, e, f, g, 25, 1, XW); SCHED_W_1(30, W0, W1, W2, W3, W4, W5);
- R2(c, d, a, b, g, h, e, f, 26, 2, XW); SCHED_W_2(30, W0, W1, W2, W3, W4, W5);
-
- /* Transform 27-29 + Precalc 33-35 */
- R2(b, c, d, a, f, g, h, e, 27, 0, XW); SCHED_W_0(33, W1, W2, W3, W4, W5, W0);
- R2(a, b, c, d, e, f, g, h, 28, 1, XW); SCHED_W_1(33, W1, W2, W3, W4, W5, W0);
- R2(d, a, b, c, h, e, f, g, 29, 2, XW); SCHED_W_2(33, W1, W2, W3, W4, W5, W0);
-
- /* Transform 30-32 + Precalc 36-38 */
- R2(c, d, a, b, g, h, e, f, 30, 0, XW); SCHED_W_0(36, W2, W3, W4, W5, W0, W1);
- R2(b, c, d, a, f, g, h, e, 31, 1, XW); SCHED_W_1(36, W2, W3, W4, W5, W0, W1);
- R2(a, b, c, d, e, f, g, h, 32, 2, XW); SCHED_W_2(36, W2, W3, W4, W5, W0, W1);
-
- /* Transform 33-35 + Precalc 39-41 */
- R2(d, a, b, c, h, e, f, g, 33, 0, XW); SCHED_W_0(39, W3, W4, W5, W0, W1, W2);
- R2(c, d, a, b, g, h, e, f, 34, 1, XW); SCHED_W_1(39, W3, W4, W5, W0, W1, W2);
- R2(b, c, d, a, f, g, h, e, 35, 2, XW); SCHED_W_2(39, W3, W4, W5, W0, W1, W2);
-
- /* Transform 36-38 + Precalc 42-44 */
- R2(a, b, c, d, e, f, g, h, 36, 0, XW); SCHED_W_0(42, W4, W5, W0, W1, W2, W3);
- R2(d, a, b, c, h, e, f, g, 37, 1, XW); SCHED_W_1(42, W4, W5, W0, W1, W2, W3);
- R2(c, d, a, b, g, h, e, f, 38, 2, XW); SCHED_W_2(42, W4, W5, W0, W1, W2, W3);
-
- /* Transform 39-41 + Precalc 45-47 */
- R2(b, c, d, a, f, g, h, e, 39, 0, XW); SCHED_W_0(45, W5, W0, W1, W2, W3, W4);
- R2(a, b, c, d, e, f, g, h, 40, 1, XW); SCHED_W_1(45, W5, W0, W1, W2, W3, W4);
- R2(d, a, b, c, h, e, f, g, 41, 2, XW); SCHED_W_2(45, W5, W0, W1, W2, W3, W4);
-
- /* Transform 42-44 + Precalc 48-50 */
- R2(c, d, a, b, g, h, e, f, 42, 0, XW); SCHED_W_0(48, W0, W1, W2, W3, W4, W5);
- R2(b, c, d, a, f, g, h, e, 43, 1, XW); SCHED_W_1(48, W0, W1, W2, W3, W4, W5);
- R2(a, b, c, d, e, f, g, h, 44, 2, XW); SCHED_W_2(48, W0, W1, W2, W3, W4, W5);
-
- /* Transform 45-47 + Precalc 51-53 */
- R2(d, a, b, c, h, e, f, g, 45, 0, XW); SCHED_W_0(51, W1, W2, W3, W4, W5, W0);
- R2(c, d, a, b, g, h, e, f, 46, 1, XW); SCHED_W_1(51, W1, W2, W3, W4, W5, W0);
- R2(b, c, d, a, f, g, h, e, 47, 2, XW); SCHED_W_2(51, W1, W2, W3, W4, W5, W0);
-
- /* Transform 48-50 + Precalc 54-56 */
- R2(a, b, c, d, e, f, g, h, 48, 0, XW); SCHED_W_0(54, W2, W3, W4, W5, W0, W1);
- R2(d, a, b, c, h, e, f, g, 49, 1, XW); SCHED_W_1(54, W2, W3, W4, W5, W0, W1);
- R2(c, d, a, b, g, h, e, f, 50, 2, XW); SCHED_W_2(54, W2, W3, W4, W5, W0, W1);
-
- /* Transform 51-53 + Precalc 57-59 */
- R2(b, c, d, a, f, g, h, e, 51, 0, XW); SCHED_W_0(57, W3, W4, W5, W0, W1, W2);
- R2(a, b, c, d, e, f, g, h, 52, 1, XW); SCHED_W_1(57, W3, W4, W5, W0, W1, W2);
- R2(d, a, b, c, h, e, f, g, 53, 2, XW); SCHED_W_2(57, W3, W4, W5, W0, W1, W2);
-
- /* Transform 54-56 + Precalc 60-62 */
- R2(c, d, a, b, g, h, e, f, 54, 0, XW); SCHED_W_0(60, W4, W5, W0, W1, W2, W3);
- R2(b, c, d, a, f, g, h, e, 55, 1, XW); SCHED_W_1(60, W4, W5, W0, W1, W2, W3);
- R2(a, b, c, d, e, f, g, h, 56, 2, XW); SCHED_W_2(60, W4, W5, W0, W1, W2, W3);
-
- /* Transform 57-59 + Precalc 63 */
- R2(d, a, b, c, h, e, f, g, 57, 0, XW); SCHED_W_0(63, W5, W0, W1, W2, W3, W4);
- R2(c, d, a, b, g, h, e, f, 58, 1, XW);
- R2(b, c, d, a, f, g, h, e, 59, 2, XW); SCHED_W_1(63, W5, W0, W1, W2, W3, W4);
-
- /* Transform 60-62 + Precalc 63 */
- R2(a, b, c, d, e, f, g, h, 60, 0, XW);
- R2(d, a, b, c, h, e, f, g, 61, 1, XW); SCHED_W_2(63, W5, W0, W1, W2, W3, W4);
- R2(c, d, a, b, g, h, e, f, 62, 2, XW);
-
- /* Transform 63 */
- R2(b, c, d, a, f, g, h, e, 63, 0, XW);
-
- /* Update the chaining variables. */
- xorl state_h0(RSTATE), a;
- xorl state_h1(RSTATE), b;
- xorl state_h2(RSTATE), c;
- xorl state_h3(RSTATE), d;
- movl a, state_h0(RSTATE);
- movl b, state_h1(RSTATE);
- movl c, state_h2(RSTATE);
- movl d, state_h3(RSTATE);
- xorl state_h4(RSTATE), e;
- xorl state_h5(RSTATE), f;
- xorl state_h6(RSTATE), g;
- xorl state_h7(RSTATE), h;
- movl e, state_h4(RSTATE);
- movl f, state_h5(RSTATE);
- movl g, state_h6(RSTATE);
- movl h, state_h7(RSTATE);
-
- cmpq $0, RNBLKS;
- jne .Loop;
-
- vzeroall;
-
- movq (STACK_REG_SAVE + 0 * 8)(%rsp), %rbx;
- movq (STACK_REG_SAVE + 1 * 8)(%rsp), %r15;
- movq (STACK_REG_SAVE + 2 * 8)(%rsp), %r14;
- movq (STACK_REG_SAVE + 3 * 8)(%rsp), %r13;
- movq (STACK_REG_SAVE + 4 * 8)(%rsp), %r12;
-
- vmovdqa %xmm0, IW_W1_ADDR(0, 0);
- vmovdqa %xmm0, IW_W1W2_ADDR(0, 0);
- vmovdqa %xmm0, IW_W1_ADDR(4, 0);
- vmovdqa %xmm0, IW_W1W2_ADDR(4, 0);
- vmovdqa %xmm0, IW_W1_ADDR(8, 0);
- vmovdqa %xmm0, IW_W1W2_ADDR(8, 0);
-
- movq %rbp, %rsp;
- popq %rbp;
- RET;
-SYM_FUNC_END(sm3_transform_avx)
diff --git a/arch/x86/crypto/sm3_avx_glue.c b/arch/x86/crypto/sm3_avx_glue.c
deleted file mode 100644
index 6e8c42b9dc8e..000000000000
--- a/arch/x86/crypto/sm3_avx_glue.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * SM3 Secure Hash Algorithm, AVX assembler accelerated.
- * specified in: https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02
- *
- * Copyright (C) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-#include <crypto/sm3.h>
-#include <crypto/sm3_base.h>
-#include <linux/cpufeature.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-asmlinkage void sm3_transform_avx(struct sm3_state *state,
- const u8 *data, int nblocks);
-
-static int sm3_avx_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- int remain;
-
- /*
- * Make sure struct sm3_state begins directly with the SM3
- * 256-bit internal state, as this is what the asm functions expect.
- */
- BUILD_BUG_ON(offsetof(struct sm3_state, state) != 0);
-
- kernel_fpu_begin();
- remain = sm3_base_do_update_blocks(desc, data, len, sm3_transform_avx);
- kernel_fpu_end();
- return remain;
-}
-
-static int sm3_avx_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- kernel_fpu_begin();
- sm3_base_do_finup(desc, data, len, sm3_transform_avx);
- kernel_fpu_end();
- return sm3_base_finish(desc, out);
-}
-
-static struct shash_alg sm3_avx_alg = {
- .digestsize = SM3_DIGEST_SIZE,
- .init = sm3_base_init,
- .update = sm3_avx_update,
- .finup = sm3_avx_finup,
- .descsize = SM3_STATE_SIZE,
- .base = {
- .cra_name = "sm3",
- .cra_driver_name = "sm3-avx",
- .cra_priority = 300,
- .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
- CRYPTO_AHASH_ALG_FINUP_MAX,
- .cra_blocksize = SM3_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-static int __init sm3_avx_mod_init(void)
-{
- const char *feature_name;
-
- if (!boot_cpu_has(X86_FEATURE_AVX)) {
- pr_info("AVX instruction are not detected.\n");
- return -ENODEV;
- }
-
- if (!boot_cpu_has(X86_FEATURE_BMI2)) {
- pr_info("BMI2 instruction are not detected.\n");
- return -ENODEV;
- }
-
- if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
- &feature_name)) {
- pr_info("CPU feature '%s' is not supported.\n", feature_name);
- return -ENODEV;
- }
-
- return crypto_register_shash(&sm3_avx_alg);
-}
-
-static void __exit sm3_avx_mod_exit(void)
-{
- crypto_unregister_shash(&sm3_avx_alg);
-}
-
-module_init(sm3_avx_mod_init);
-module_exit(sm3_avx_mod_exit);
-
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
-MODULE_DESCRIPTION("SM3 Secure Hash Algorithm, AVX assembler accelerated");
-MODULE_ALIAS_CRYPTO("sm3");
-MODULE_ALIAS_CRYPTO("sm3-avx");
diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c
index 88c757ac8ccd..fbe2d10dd737 100644
--- a/arch/x86/entry/entry_fred.c
+++ b/arch/x86/entry/entry_fred.c
@@ -177,6 +177,16 @@ static noinstr void fred_extint(struct pt_regs *regs)
}
}
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+noinstr void exc_vmm_communication(struct pt_regs *regs, unsigned long error_code)
+{
+ if (user_mode(regs))
+ return user_exc_vmm_communication(regs, error_code);
+ else
+ return kernel_exc_vmm_communication(regs, error_code);
+}
+#endif
+
static noinstr void fred_hwexc(struct pt_regs *regs, unsigned long error_code)
{
/* Optimize for #PF. That's the only exception which matters performance wise */
@@ -207,6 +217,10 @@ static noinstr void fred_hwexc(struct pt_regs *regs, unsigned long error_code)
#ifdef CONFIG_X86_CET
case X86_TRAP_CP: return exc_control_protection(regs, error_code);
#endif
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ case X86_TRAP_VC: return exc_vmm_communication(regs, error_code);
+#endif
+
default: return fred_bad_type(regs, error_code);
}
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 8e829575e12f..31b9492fe851 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -247,7 +247,6 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
{
int nr = syscall_32_enter(regs);
- add_random_kstack_offset();
/*
* Subtlety here: if ptrace pokes something larger than 2^31-1 into
* orig_ax, the int return value truncates it. This matches
@@ -256,6 +255,7 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
nr = syscall_enter_from_user_mode(regs, nr);
instrumentation_begin();
+ add_random_kstack_offset();
do_syscall_32_irqs_on(regs, nr);
instrumentation_end();
@@ -268,7 +268,6 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
int nr = syscall_32_enter(regs);
int res;
- add_random_kstack_offset();
/*
* This cannot use syscall_enter_from_user_mode() as it has to
* fetch EBP before invoking any of the syscall entry work
@@ -277,6 +276,7 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
enter_from_user_mode(regs);
instrumentation_begin();
+ add_random_kstack_offset();
local_irq_enable();
/* Fetch EBP from where the vDSO stashed it. */
if (IS_ENABLED(CONFIG_X86_64)) {
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index b6e68ea98b83..71f032504e73 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -86,10 +86,10 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
/* Returns true to return using SYSRET, or false to use IRET */
__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
{
- add_random_kstack_offset();
nr = syscall_enter_from_user_mode(regs, nr);
instrumentation_begin();
+ add_random_kstack_offset();
if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) {
/* Invalid system call, but still a system call. */
diff --git a/arch/x86/entry/vdso/common/vclock_gettime.c b/arch/x86/entry/vdso/common/vclock_gettime.c
index 027b7e88d753..57066f346b3f 100644
--- a/arch/x86/entry/vdso/common/vclock_gettime.c
+++ b/arch/x86/entry/vdso/common/vclock_gettime.c
@@ -13,7 +13,7 @@
#include <linux/types.h>
#include <vdso/gettime.h>
-#include "../../../../lib/vdso/gettimeofday.c"
+#include "lib/vdso/gettimeofday.c"
int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
{
diff --git a/arch/x86/entry/vdso/vdso32/Makefile b/arch/x86/entry/vdso/vdso32/Makefile
index add6afb484ba..ded4fc6a48cd 100644
--- a/arch/x86/entry/vdso/vdso32/Makefile
+++ b/arch/x86/entry/vdso/vdso32/Makefile
@@ -15,6 +15,10 @@ flags-y := -DBUILD_VDSO32 -m32 -mregparm=0
flags-$(CONFIG_X86_64) += -include $(src)/fake_32bit_build.h
flags-remove-y := -m64
+# Checker flags
+CHECKFLAGS := $(subst -m64,-m32,$(CHECKFLAGS))
+CHECKFLAGS := $(subst -D__x86_64__,-D__i386__,$(CHECKFLAGS))
+
# The location of this include matters!
include $(src)/../common/Makefile.include
diff --git a/arch/x86/entry/vdso/vdso32/fake_32bit_build.h b/arch/x86/entry/vdso/vdso32/fake_32bit_build.h
index db1b15f686e3..bc3e549795c3 100644
--- a/arch/x86/entry/vdso/vdso32/fake_32bit_build.h
+++ b/arch/x86/entry/vdso/vdso32/fake_32bit_build.h
@@ -11,6 +11,7 @@
#undef CONFIG_PGTABLE_LEVELS
#undef CONFIG_ILLEGAL_POINTER_VALUE
#undef CONFIG_SPARSEMEM_VMEMMAP
+#undef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
#undef CONFIG_NR_CPUS
#undef CONFIG_PARAVIRT_XXL
diff --git a/arch/x86/entry/vdso/vdso32/sigreturn.S b/arch/x86/entry/vdso/vdso32/sigreturn.S
index b433353bc8e3..b33fcc501ba3 100644
--- a/arch/x86/entry/vdso/vdso32/sigreturn.S
+++ b/arch/x86/entry/vdso/vdso32/sigreturn.S
@@ -35,9 +35,38 @@
#endif
.endm
+/*
+ * WARNING:
+ *
+ * A bug in the libgcc unwinder as of at least gcc 15.2 (2026) means that
+ * the unwinder fails to recognize the signal frame flag.
+ *
+ * There is a hacky legacy fallback path in libgcc which ends up
+ * getting invoked instead. It happens to work as long as BOTH of the
+ * following conditions are true:
+ *
+ * 1. There is at least one byte before the each of the sigreturn
+ * functions which falls outside any function. This is enforced by
+ * an explicit nop instruction before the ALIGN.
+ * 2. The code sequences between the entry point up to and including
+ * the int $0x80 below need to match EXACTLY. Do not change them
+ * in any way. The exact byte sequences are:
+ *
+ * __kernel_sigreturn:
+ * 0: 58 pop %eax
+ * 1: b8 77 00 00 00 mov $0x77,%eax
+ * 6: cd 80 int $0x80
+ *
+ * __kernel_rt_sigreturn:
+ * 0: b8 ad 00 00 00 mov $0xad,%eax
+ * 5: cd 80 int $0x80
+ *
+ * For details, see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=124050
+ */
.text
.globl __kernel_sigreturn
.type __kernel_sigreturn,@function
+ nop /* libgcc hack: see comment above */
ALIGN
__kernel_sigreturn:
STARTPROC_SIGNAL_FRAME IA32_SIGFRAME_sigcontext
@@ -52,6 +81,7 @@ SYM_INNER_LABEL(vdso32_sigreturn_landing_pad, SYM_L_GLOBAL)
.globl __kernel_rt_sigreturn
.type __kernel_rt_sigreturn,@function
+ nop /* libgcc hack: see comment above */
ALIGN
__kernel_rt_sigreturn:
STARTPROC_SIGNAL_FRAME IA32_RT_SIGFRAME_sigcontext
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index e7fd7517370f..a6bfcc8243cd 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -88,7 +88,6 @@ static vm_fault_t vvar_vclock_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
switch (vmf->pgoff) {
-#ifdef CONFIG_PARAVIRT_CLOCK
case VDSO_PAGE_PVCLOCK_OFFSET:
{
struct pvclock_vsyscall_time_info *pvti =
@@ -100,8 +99,6 @@ static vm_fault_t vvar_vclock_fault(const struct vm_special_mapping *sm,
pgprot_decrypted(vma->vm_page_prot));
break;
}
-#endif /* CONFIG_PARAVIRT_CLOCK */
-#ifdef CONFIG_HYPERV_TIMER
case VDSO_PAGE_HVCLOCK_OFFSET:
{
unsigned long pfn = hv_get_tsc_pfn();
@@ -109,7 +106,6 @@ static vm_fault_t vvar_vclock_fault(const struct vm_special_mapping *sm,
return vmf_insert_pfn(vma, vmf->address, pfn);
break;
}
-#endif /* CONFIG_HYPERV_TIMER */
}
return VM_FAULT_SIGBUS;
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 4bd1e271bb22..ea36de9fa864 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -23,7 +23,7 @@
* soon be no new userspace code that will ever use a vsyscall.
*
* The code in this file emulates vsyscalls when notified of a page
- * fault to a vsyscall address.
+ * fault or a general protection fault to a vsyscall address.
*/
#include <linux/kernel.h>
@@ -62,6 +62,11 @@ static int __init vsyscall_setup(char *str)
else
return -EINVAL;
+ if (cpu_feature_enabled(X86_FEATURE_LASS) && vsyscall_mode == EMULATE) {
+ setup_clear_cpu_cap(X86_FEATURE_LASS);
+ pr_warn_once("x86/cpu: Disabling LASS due to vsyscall=emulate\n");
+ }
+
return 0;
}
@@ -111,48 +116,17 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size)
}
}
-bool emulate_vsyscall(unsigned long error_code,
- struct pt_regs *regs, unsigned long address)
+static bool __emulate_vsyscall(struct pt_regs *regs, unsigned long address)
{
unsigned long caller;
int vsyscall_nr, syscall_nr, tmp;
long ret;
unsigned long orig_dx;
- /* Write faults or kernel-privilege faults never get fixed up. */
- if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER)
+ /* Confirm that the fault happened in 64-bit user mode */
+ if (!user_64bit_mode(regs))
return false;
- /*
- * Assume that faults at regs->ip are because of an
- * instruction fetch. Return early and avoid
- * emulation for faults during data accesses:
- */
- if (address != regs->ip) {
- /* Failed vsyscall read */
- if (vsyscall_mode == EMULATE)
- return false;
-
- /*
- * User code tried and failed to read the vsyscall page.
- */
- warn_bad_vsyscall(KERN_INFO, regs, "vsyscall read attempt denied -- look up the vsyscall kernel parameter if you need a workaround");
- return false;
- }
-
- /*
- * X86_PF_INSTR is only set when NX is supported. When
- * available, use it to double-check that the emulation code
- * is only being used for instruction fetches:
- */
- if (cpu_feature_enabled(X86_FEATURE_NX))
- WARN_ON_ONCE(!(error_code & X86_PF_INSTR));
-
- /*
- * No point in checking CS -- the only way to get here is a user mode
- * trap to a high address, which means that we're in 64-bit user code.
- */
-
if (vsyscall_mode == NONE) {
warn_bad_vsyscall(KERN_INFO, regs,
"vsyscall attempted with vsyscall=none");
@@ -280,6 +254,53 @@ sigsegv:
return true;
}
+bool emulate_vsyscall_pf(unsigned long error_code, struct pt_regs *regs,
+ unsigned long address)
+{
+ /* Write faults or kernel-privilege faults never get fixed up. */
+ if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER)
+ return false;
+
+ /*
+ * Assume that faults at regs->ip are because of an instruction
+ * fetch. Return early and avoid emulation for faults during
+ * data accesses:
+ */
+ if (address != regs->ip) {
+ /* Failed vsyscall read */
+ if (vsyscall_mode == EMULATE)
+ return false;
+
+ /* User code tried and failed to read the vsyscall page. */
+ warn_bad_vsyscall(KERN_INFO, regs,
+ "vsyscall read attempt denied -- look up the vsyscall kernel parameter if you need a workaround");
+ return false;
+ }
+
+ /*
+ * X86_PF_INSTR is only set when NX is supported. When
+ * available, use it to double-check that the emulation code
+ * is only being used for instruction fetches:
+ */
+ if (cpu_feature_enabled(X86_FEATURE_NX))
+ WARN_ON_ONCE(!(error_code & X86_PF_INSTR));
+
+ return __emulate_vsyscall(regs, address);
+}
+
+bool emulate_vsyscall_gp(struct pt_regs *regs)
+{
+ /* Without LASS, vsyscall accesses are expected to generate a #PF */
+ if (!cpu_feature_enabled(X86_FEATURE_LASS))
+ return false;
+
+ /* Emulate only if the RIP points to the vsyscall address */
+ if (!is_vsyscall_vaddr(regs->ip))
+ return false;
+
+ return __emulate_vsyscall(regs, regs->ip);
+}
+
/*
* A pseudo VMA to allow ptrace access for the vsyscall page. This only
* covers the 64bit vsyscall page now. 32bit has a real VMA now and does
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index aca89f23d2e0..e0bd5051db2a 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -32,6 +32,13 @@ static u32 ibs_caps;
/* attr.config2 */
#define IBS_SW_FILTER_MASK 1
+/* attr.config1 */
+#define IBS_OP_CONFIG1_LDLAT_MASK (0xFFFULL << 0)
+#define IBS_OP_CONFIG1_STRMST_MASK (1ULL << 12)
+#define IBS_OP_CONFIG1_STRMST_SHIFT (12)
+
+#define IBS_FETCH_CONFIG1_FETCHLAT_MASK (0x7FFULL << 0)
+
/*
* IBS states:
*
@@ -83,9 +90,11 @@ struct cpu_perf_ibs {
struct perf_ibs {
struct pmu pmu;
unsigned int msr;
+ unsigned int msr2;
u64 config_mask;
u64 cnt_mask;
u64 enable_mask;
+ u64 disable_mask;
u64 valid_mask;
u16 min_period;
u64 max_period;
@@ -274,7 +283,23 @@ static bool perf_ibs_ldlat_event(struct perf_ibs *perf_ibs,
{
return perf_ibs == &perf_ibs_op &&
(ibs_caps & IBS_CAPS_OPLDLAT) &&
- (event->attr.config1 & 0xFFF);
+ (event->attr.config1 & IBS_OP_CONFIG1_LDLAT_MASK);
+}
+
+static bool perf_ibs_fetch_lat_event(struct perf_ibs *perf_ibs,
+ struct perf_event *event)
+{
+ return perf_ibs == &perf_ibs_fetch &&
+ (ibs_caps & IBS_CAPS_FETCHLAT) &&
+ (event->attr.config1 & IBS_FETCH_CONFIG1_FETCHLAT_MASK);
+}
+
+static bool perf_ibs_strmst_event(struct perf_ibs *perf_ibs,
+ struct perf_event *event)
+{
+ return perf_ibs == &perf_ibs_op &&
+ (ibs_caps & IBS_CAPS_STRMST_RMTSOCKET) &&
+ (event->attr.config1 & IBS_OP_CONFIG1_STRMST_MASK);
}
static int perf_ibs_init(struct perf_event *event)
@@ -289,6 +314,8 @@ static int perf_ibs_init(struct perf_event *event)
return -ENOENT;
config = event->attr.config;
+ hwc->extra_reg.config = 0;
+ hwc->extra_reg.reg = 0;
if (event->pmu != &perf_ibs->pmu)
return -ENOENT;
@@ -304,15 +331,44 @@ static int perf_ibs_init(struct perf_event *event)
event->attr.exclude_idle)
return -EINVAL;
- if (!(event->attr.config2 & IBS_SW_FILTER_MASK) &&
- (event->attr.exclude_kernel || event->attr.exclude_user ||
- event->attr.exclude_hv))
- return -EINVAL;
-
ret = validate_group(event);
if (ret)
return ret;
+ if (perf_allow_kernel())
+ hwc->flags |= PERF_X86_EVENT_UNPRIVILEGED;
+
+ if (ibs_caps & IBS_CAPS_DIS) {
+ hwc->extra_reg.config &= ~perf_ibs->disable_mask;
+ hwc->extra_reg.reg = perf_ibs->msr2;
+ }
+
+ if (ibs_caps & IBS_CAPS_BIT63_FILTER) {
+ if (perf_ibs == &perf_ibs_fetch) {
+ if (event->attr.exclude_kernel) {
+ hwc->extra_reg.config |= IBS_FETCH_2_EXCL_RIP_63_EQ_1;
+ hwc->extra_reg.reg = perf_ibs->msr2;
+ }
+ if (event->attr.exclude_user) {
+ hwc->extra_reg.config |= IBS_FETCH_2_EXCL_RIP_63_EQ_0;
+ hwc->extra_reg.reg = perf_ibs->msr2;
+ }
+ } else {
+ if (event->attr.exclude_kernel) {
+ hwc->extra_reg.config |= IBS_OP_2_EXCL_RIP_63_EQ_1;
+ hwc->extra_reg.reg = perf_ibs->msr2;
+ }
+ if (event->attr.exclude_user) {
+ hwc->extra_reg.config |= IBS_OP_2_EXCL_RIP_63_EQ_0;
+ hwc->extra_reg.reg = perf_ibs->msr2;
+ }
+ }
+ } else if (!(event->attr.config2 & IBS_SW_FILTER_MASK) &&
+ (event->attr.exclude_kernel || event->attr.exclude_user ||
+ event->attr.exclude_hv)) {
+ return -EINVAL;
+ }
+
if (hwc->sample_period) {
if (config & perf_ibs->cnt_mask)
/* raw max_cnt may not be set */
@@ -349,14 +405,37 @@ static int perf_ibs_init(struct perf_event *event)
}
if (perf_ibs_ldlat_event(perf_ibs, event)) {
- u64 ldlat = event->attr.config1 & 0xFFF;
+ u64 ldlat = event->attr.config1 & IBS_OP_CONFIG1_LDLAT_MASK;
if (ldlat < 128 || ldlat > 2048)
return -EINVAL;
ldlat >>= 7;
- config |= (ldlat - 1) << 59;
- config |= IBS_OP_L3MISSONLY | IBS_OP_LDLAT_EN;
+ config |= (ldlat - 1) << IBS_OP_LDLAT_THRSH_SHIFT;
+
+ config |= IBS_OP_LDLAT_EN;
+ if (cpu_feature_enabled(X86_FEATURE_ZEN5))
+ config |= IBS_OP_L3MISSONLY;
+ }
+
+ if (perf_ibs_fetch_lat_event(perf_ibs, event)) {
+ u64 fetchlat = event->attr.config1 & IBS_FETCH_CONFIG1_FETCHLAT_MASK;
+
+ if (fetchlat < 128 || fetchlat > 1920)
+ return -EINVAL;
+ fetchlat >>= 7;
+
+ hwc->extra_reg.reg = perf_ibs->msr2;
+ hwc->extra_reg.config |= fetchlat << IBS_FETCH_2_FETCHLAT_FILTER_SHIFT;
+ }
+
+ if (perf_ibs_strmst_event(perf_ibs, event)) {
+ u64 strmst = event->attr.config1 & IBS_OP_CONFIG1_STRMST_MASK;
+
+ strmst >>= IBS_OP_CONFIG1_STRMST_SHIFT;
+
+ hwc->extra_reg.reg = perf_ibs->msr2;
+ hwc->extra_reg.config |= strmst << IBS_OP_2_STRM_ST_FILTER_SHIFT;
}
/*
@@ -439,6 +518,9 @@ static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
wrmsrq(hwc->config_base, tmp & ~perf_ibs->enable_mask);
wrmsrq(hwc->config_base, tmp | perf_ibs->enable_mask);
+
+ if (hwc->extra_reg.reg)
+ wrmsrq(hwc->extra_reg.reg, hwc->extra_reg.config);
}
/*
@@ -451,6 +533,11 @@ static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
struct hw_perf_event *hwc, u64 config)
{
+ if (ibs_caps & IBS_CAPS_DIS) {
+ wrmsrq(hwc->extra_reg.reg, perf_ibs->disable_mask);
+ return;
+ }
+
config &= ~perf_ibs->cnt_mask;
if (boot_cpu_data.x86 == 0x10)
wrmsrq(hwc->config_base, config);
@@ -488,6 +575,14 @@ static void perf_ibs_start(struct perf_event *event, int flags)
config |= period >> 4;
/*
+ * Reset the IBS_{FETCH|OP}_CTL MSR before updating pcpu->state.
+ * Doing so prevents a race condition in which an NMI due to other
+ * source might accidentally activate the event before we enable
+ * it ourselves.
+ */
+ perf_ibs_disable_event(perf_ibs, hwc, 0);
+
+ /*
* Set STARTED before enabling the hardware, such that a subsequent NMI
* must observe it.
*/
@@ -631,6 +726,11 @@ PMU_EVENT_ATTR_STRING(ldlat, ibs_op_ldlat_format, "config1:0-11");
PMU_EVENT_ATTR_STRING(zen4_ibs_extensions, zen4_ibs_extensions, "1");
PMU_EVENT_ATTR_STRING(ldlat, ibs_op_ldlat_cap, "1");
PMU_EVENT_ATTR_STRING(dtlb_pgsize, ibs_op_dtlb_pgsize_cap, "1");
+PMU_EVENT_ATTR_STRING(fetchlat, ibs_fetch_lat_format, "config1:0-10");
+PMU_EVENT_ATTR_STRING(fetchlat, ibs_fetch_lat_cap, "1");
+PMU_EVENT_ATTR_STRING(strmst, ibs_op_strmst_format, "config1:12");
+PMU_EVENT_ATTR_STRING(strmst, ibs_op_strmst_cap, "1");
+PMU_EVENT_ATTR_STRING(rmtsocket, ibs_op_rmtsocket_cap, "1");
static umode_t
zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int i)
@@ -639,6 +739,24 @@ zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int
}
static umode_t
+ibs_fetch_lat_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return ibs_caps & IBS_CAPS_FETCHLAT ? attr->mode : 0;
+}
+
+static umode_t
+ibs_op_strmst_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return ibs_caps & IBS_CAPS_STRMST_RMTSOCKET ? attr->mode : 0;
+}
+
+static umode_t
+ibs_op_rmtsocket_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return ibs_caps & IBS_CAPS_STRMST_RMTSOCKET ? attr->mode : 0;
+}
+
+static umode_t
ibs_op_ldlat_is_visible(struct kobject *kobj, struct attribute *attr, int i)
{
return ibs_caps & IBS_CAPS_OPLDLAT ? attr->mode : 0;
@@ -666,6 +784,16 @@ static struct attribute *zen4_ibs_extensions_attrs[] = {
NULL,
};
+static struct attribute *ibs_fetch_lat_format_attrs[] = {
+ &ibs_fetch_lat_format.attr.attr,
+ NULL,
+};
+
+static struct attribute *ibs_fetch_lat_cap_attrs[] = {
+ &ibs_fetch_lat_cap.attr.attr,
+ NULL,
+};
+
static struct attribute *ibs_op_ldlat_cap_attrs[] = {
&ibs_op_ldlat_cap.attr.attr,
NULL,
@@ -676,6 +804,16 @@ static struct attribute *ibs_op_dtlb_pgsize_cap_attrs[] = {
NULL,
};
+static struct attribute *ibs_op_strmst_cap_attrs[] = {
+ &ibs_op_strmst_cap.attr.attr,
+ NULL,
+};
+
+static struct attribute *ibs_op_rmtsocket_cap_attrs[] = {
+ &ibs_op_rmtsocket_cap.attr.attr,
+ NULL,
+};
+
static struct attribute_group group_fetch_formats = {
.name = "format",
.attrs = fetch_attrs,
@@ -693,6 +831,18 @@ static struct attribute_group group_zen4_ibs_extensions = {
.is_visible = zen4_ibs_extensions_is_visible,
};
+static struct attribute_group group_ibs_fetch_lat_cap = {
+ .name = "caps",
+ .attrs = ibs_fetch_lat_cap_attrs,
+ .is_visible = ibs_fetch_lat_is_visible,
+};
+
+static struct attribute_group group_ibs_fetch_lat_format = {
+ .name = "format",
+ .attrs = ibs_fetch_lat_format_attrs,
+ .is_visible = ibs_fetch_lat_is_visible,
+};
+
static struct attribute_group group_ibs_op_ldlat_cap = {
.name = "caps",
.attrs = ibs_op_ldlat_cap_attrs,
@@ -705,6 +855,18 @@ static struct attribute_group group_ibs_op_dtlb_pgsize_cap = {
.is_visible = ibs_op_dtlb_pgsize_is_visible,
};
+static struct attribute_group group_ibs_op_strmst_cap = {
+ .name = "caps",
+ .attrs = ibs_op_strmst_cap_attrs,
+ .is_visible = ibs_op_strmst_is_visible,
+};
+
+static struct attribute_group group_ibs_op_rmtsocket_cap = {
+ .name = "caps",
+ .attrs = ibs_op_rmtsocket_cap_attrs,
+ .is_visible = ibs_op_rmtsocket_is_visible,
+};
+
static const struct attribute_group *fetch_attr_groups[] = {
&group_fetch_formats,
&empty_caps_group,
@@ -714,6 +876,8 @@ static const struct attribute_group *fetch_attr_groups[] = {
static const struct attribute_group *fetch_attr_update[] = {
&group_fetch_l3missonly,
&group_zen4_ibs_extensions,
+ &group_ibs_fetch_lat_cap,
+ &group_ibs_fetch_lat_format,
NULL,
};
@@ -748,6 +912,11 @@ static struct attribute *ibs_op_ldlat_format_attrs[] = {
NULL,
};
+static struct attribute *ibs_op_strmst_format_attrs[] = {
+ &ibs_op_strmst_format.attr.attr,
+ NULL,
+};
+
static struct attribute_group group_cnt_ctl = {
.name = "format",
.attrs = cnt_ctl_attrs,
@@ -772,6 +941,12 @@ static struct attribute_group group_ibs_op_ldlat_format = {
.is_visible = ibs_op_ldlat_is_visible,
};
+static struct attribute_group group_ibs_op_strmst_format = {
+ .name = "format",
+ .attrs = ibs_op_strmst_format_attrs,
+ .is_visible = ibs_op_strmst_is_visible,
+};
+
static const struct attribute_group *op_attr_update[] = {
&group_cnt_ctl,
&group_op_l3missonly,
@@ -779,6 +954,9 @@ static const struct attribute_group *op_attr_update[] = {
&group_ibs_op_ldlat_cap,
&group_ibs_op_ldlat_format,
&group_ibs_op_dtlb_pgsize_cap,
+ &group_ibs_op_strmst_cap,
+ &group_ibs_op_strmst_format,
+ &group_ibs_op_rmtsocket_cap,
NULL,
};
@@ -795,6 +973,7 @@ static struct perf_ibs perf_ibs_fetch = {
.check_period = perf_ibs_check_period,
},
.msr = MSR_AMD64_IBSFETCHCTL,
+ .msr2 = MSR_AMD64_IBSFETCHCTL2,
.config_mask = IBS_FETCH_MAX_CNT | IBS_FETCH_RAND_EN,
.cnt_mask = IBS_FETCH_MAX_CNT,
.enable_mask = IBS_FETCH_ENABLE,
@@ -820,6 +999,7 @@ static struct perf_ibs perf_ibs_op = {
.check_period = perf_ibs_check_period,
},
.msr = MSR_AMD64_IBSOPCTL,
+ .msr2 = MSR_AMD64_IBSOPCTL2,
.config_mask = IBS_OP_MAX_CNT,
.cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT |
IBS_OP_CUR_CNT_RAND,
@@ -1155,7 +1335,8 @@ static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs,
{
if (event->attr.sample_type & PERF_SAMPLE_RAW ||
perf_ibs_is_mem_sample_type(perf_ibs, event) ||
- perf_ibs_ldlat_event(perf_ibs, event))
+ perf_ibs_ldlat_event(perf_ibs, event) ||
+ perf_ibs_fetch_lat_event(perf_ibs, event))
return perf_ibs->offset_max;
else if (check_rip)
return 3;
@@ -1190,7 +1371,7 @@ static bool perf_ibs_is_kernel_br_target(struct perf_event *event,
op_data.op_brn_ret && kernel_ip(br_target));
}
-static bool perf_ibs_swfilt_discard(struct perf_ibs *perf_ibs, struct perf_event *event,
+static bool perf_ibs_discard_sample(struct perf_ibs *perf_ibs, struct perf_event *event,
struct pt_regs *regs, struct perf_ibs_data *ibs_data,
int br_target_idx)
{
@@ -1214,12 +1395,10 @@ static void perf_ibs_phyaddr_clear(struct perf_ibs *perf_ibs,
struct perf_ibs_data *ibs_data)
{
if (perf_ibs == &perf_ibs_op) {
- ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)] &= ~(1ULL << 18);
ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)] = 0;
return;
}
- ibs_data->regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHCTL)] &= ~(1ULL << 52);
ibs_data->regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHPHYSAD)] = 0;
}
@@ -1293,8 +1472,20 @@ fail:
* within [128, 2048] range.
*/
if (!op_data3.ld_op || !op_data3.dc_miss ||
- op_data3.dc_miss_lat <= (event->attr.config1 & 0xFFF))
+ op_data3.dc_miss_lat <= (event->attr.config1 & IBS_OP_CONFIG1_LDLAT_MASK)) {
+ throttle = perf_event_account_interrupt(event);
goto out;
+ }
+ }
+
+ if (perf_ibs_fetch_lat_event(perf_ibs, event)) {
+ union ibs_fetch_ctl fetch_ctl;
+
+ fetch_ctl.val = ibs_data.regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHCTL)];
+ if (fetch_ctl.fetch_lat < (event->attr.config1 & IBS_FETCH_CONFIG1_FETCHLAT_MASK)) {
+ throttle = perf_event_account_interrupt(event);
+ goto out;
+ }
}
/*
@@ -1326,15 +1517,18 @@ fail:
regs.flags &= ~PERF_EFLAGS_EXACT;
} else {
/* Workaround for erratum #1197 */
- if (perf_ibs->fetch_ignore_if_zero_rip && !(ibs_data.regs[1]))
+ if (perf_ibs->fetch_ignore_if_zero_rip && !(ibs_data.regs[1])) {
+ throttle = perf_event_account_interrupt(event);
goto out;
+ }
set_linear_ip(&regs, ibs_data.regs[1]);
regs.flags |= PERF_EFLAGS_EXACT;
}
- if ((event->attr.config2 & IBS_SW_FILTER_MASK) &&
- perf_ibs_swfilt_discard(perf_ibs, event, &regs, &ibs_data, br_target_idx)) {
+ if (((ibs_caps & IBS_CAPS_BIT63_FILTER) ||
+ (event->attr.config2 & IBS_SW_FILTER_MASK)) &&
+ perf_ibs_discard_sample(perf_ibs, event, &regs, &ibs_data, br_target_idx)) {
throttle = perf_event_account_interrupt(event);
goto out;
}
@@ -1344,7 +1538,7 @@ fail:
* unprivileged users.
*/
if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
- perf_allow_kernel()) {
+ (hwc->flags & PERF_X86_EVENT_UNPRIVILEGED)) {
perf_ibs_phyaddr_clear(perf_ibs, &ibs_data);
}
@@ -1375,6 +1569,9 @@ fail:
out:
if (!throttle) {
+ if (ibs_caps & IBS_CAPS_DIS)
+ wrmsrq(hwc->extra_reg.reg, perf_ibs->disable_mask);
+
if (perf_ibs == &perf_ibs_op) {
if (ibs_caps & IBS_CAPS_OPCNTEXT) {
new_config = period & IBS_OP_MAX_CNT_EXT_MASK;
@@ -1446,6 +1643,9 @@ static __init int perf_ibs_fetch_init(void)
if (ibs_caps & IBS_CAPS_ZEN4)
perf_ibs_fetch.config_mask |= IBS_FETCH_L3MISSONLY;
+ if (ibs_caps & IBS_CAPS_DIS)
+ perf_ibs_fetch.disable_mask = IBS_FETCH_2_DIS;
+
perf_ibs_fetch.pmu.attr_groups = fetch_attr_groups;
perf_ibs_fetch.pmu.attr_update = fetch_attr_update;
@@ -1467,6 +1667,9 @@ static __init int perf_ibs_op_init(void)
if (ibs_caps & IBS_CAPS_ZEN4)
perf_ibs_op.config_mask |= IBS_OP_L3MISSONLY;
+ if (ibs_caps & IBS_CAPS_DIS)
+ perf_ibs_op.disable_mask = IBS_OP_2_DIS;
+
perf_ibs_op.pmu.attr_groups = op_attr_groups;
perf_ibs_op.pmu.attr_update = op_attr_update;
@@ -1545,7 +1748,7 @@ EXPORT_SYMBOL(get_ibs_caps);
static inline int get_eilvt(int offset)
{
- return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
+ return !setup_APIC_eilvt(offset, 0, APIC_DELIVERY_MODE_NMI, 1);
}
static inline int put_eilvt(int offset)
@@ -1694,7 +1897,7 @@ static void setup_APIC_ibs(void)
if (offset < 0)
goto failed;
- if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
+ if (!setup_APIC_eilvt(offset, 0, APIC_DELIVERY_MODE_NMI, 0))
return;
failed:
pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
@@ -1707,12 +1910,29 @@ static void clear_APIC_ibs(void)
offset = get_ibs_lvt_offset();
if (offset >= 0)
- setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
+ setup_APIC_eilvt(offset, 0, APIC_DELIVERY_MODE_FIXED, 1);
}
static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu)
{
setup_APIC_ibs();
+
+ if (ibs_caps & IBS_CAPS_DIS) {
+ /*
+ * IBS enable sequence:
+ * CTL[En] = 1;
+ * CTL2[Dis] = 0;
+ *
+ * IBS disable sequence:
+ * CTL2[Dis] = 1;
+ *
+ * Set CTL2[Dis] when CPU comes up. This is needed to make
+ * enable sequence effective.
+ */
+ wrmsrq(MSR_AMD64_IBSFETCHCTL2, IBS_FETCH_2_DIS);
+ wrmsrq(MSR_AMD64_IBSOPCTL2, IBS_OP_2_DIS);
+ }
+
return 0;
}
@@ -1771,6 +1991,14 @@ static __init int amd_ibs_init(void)
perf_ibs_pm_init();
+#ifdef CONFIG_X86_32
+ /*
+ * IBS_CAPS_BIT63_FILTER is used for exclude_kernel/user filtering,
+ * which obviously won't work for 32 bit kernel.
+ */
+ caps &= ~IBS_CAPS_BIT63_FILTER;
+#endif
+
ibs_caps = caps;
/* make ibs_caps visible to other cpus: */
smp_mb();
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 03ce1bc7ef2e..810ab21ffd99 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1372,14 +1372,17 @@ static void x86_pmu_enable(struct pmu *pmu)
else if (i < n_running)
continue;
- if (hwc->state & PERF_HES_ARCH)
+ cpuc->events[hwc->idx] = event;
+
+ if (hwc->state & PERF_HES_ARCH) {
+ static_call(x86_pmu_set_period)(event);
continue;
+ }
/*
* if cpuc->enabled = 0, then no wrmsr as
* per x86_pmu_enable_event()
*/
- cpuc->events[hwc->idx] = event;
x86_pmu_start(event, PERF_EF_RELOAD);
}
cpuc->n_added = 0;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index cf3a4fe06ff2..d9488ade0f8e 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4628,6 +4628,19 @@ static inline void intel_pmu_set_acr_caused_constr(struct perf_event *event,
event->hw.dyn_constraint &= hybrid(event->pmu, acr_cause_mask64);
}
+static inline int intel_set_branch_counter_constr(struct perf_event *event,
+ int *num)
+{
+ if (branch_sample_call_stack(event))
+ return -EINVAL;
+ if (branch_sample_counters(event)) {
+ (*num)++;
+ event->hw.dyn_constraint &= x86_pmu.lbr_counters;
+ }
+
+ return 0;
+}
+
static int intel_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);
@@ -4698,21 +4711,19 @@ static int intel_pmu_hw_config(struct perf_event *event)
* group, which requires the extra space to store the counters.
*/
leader = event->group_leader;
- if (branch_sample_call_stack(leader))
+ if (intel_set_branch_counter_constr(leader, &num))
return -EINVAL;
- if (branch_sample_counters(leader)) {
- num++;
- leader->hw.dyn_constraint &= x86_pmu.lbr_counters;
- }
leader->hw.flags |= PERF_X86_EVENT_BRANCH_COUNTERS;
for_each_sibling_event(sibling, leader) {
- if (branch_sample_call_stack(sibling))
+ if (intel_set_branch_counter_constr(sibling, &num))
+ return -EINVAL;
+ }
+
+ /* event isn't installed as a sibling yet. */
+ if (event != leader) {
+ if (intel_set_branch_counter_constr(event, &num))
return -EINVAL;
- if (branch_sample_counters(sibling)) {
- num++;
- sibling->hw.dyn_constraint &= x86_pmu.lbr_counters;
- }
}
if (num > fls(x86_pmu.lbr_counters))
@@ -4844,8 +4855,10 @@ static int intel_pmu_hw_config(struct perf_event *event)
intel_pmu_set_acr_caused_constr(leader, idx++, cause_mask);
if (leader->nr_siblings) {
- for_each_sibling_event(sibling, leader)
- intel_pmu_set_acr_caused_constr(sibling, idx++, cause_mask);
+ for_each_sibling_event(sibling, leader) {
+ if (is_x86_event(sibling))
+ intel_pmu_set_acr_caused_constr(sibling, idx++, cause_mask);
+ }
}
if (leader != event)
@@ -5770,7 +5783,7 @@ static void __intel_pmu_check_dyn_constr(struct event_constraint *constr,
}
if (check_fail) {
- pr_info("The two events 0x%llx and 0x%llx may not be "
+ pr_warn("The two events 0x%llx and 0x%llx may not be "
"fully scheduled under some circumstances as "
"%s.\n",
c1->code, c2->code, dyn_constr_type_name[type]);
@@ -5783,6 +5796,7 @@ static void intel_pmu_check_dyn_constr(struct pmu *pmu,
struct event_constraint *constr,
u64 cntr_mask)
{
+ u64 gp_mask = GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0);
enum dyn_constr_type i;
u64 mask;
@@ -5797,20 +5811,25 @@ static void intel_pmu_check_dyn_constr(struct pmu *pmu,
mask = x86_pmu.lbr_counters;
break;
case DYN_CONSTR_ACR_CNTR:
- mask = hybrid(pmu, acr_cntr_mask64) & GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0);
+ mask = hybrid(pmu, acr_cntr_mask64) & gp_mask;
break;
case DYN_CONSTR_ACR_CAUSE:
- if (hybrid(pmu, acr_cntr_mask64) == hybrid(pmu, acr_cause_mask64))
+ if (hybrid(pmu, acr_cntr_mask64) ==
+ hybrid(pmu, acr_cause_mask64))
continue;
- mask = hybrid(pmu, acr_cause_mask64) & GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0);
+ mask = hybrid(pmu, acr_cause_mask64) & gp_mask;
break;
case DYN_CONSTR_PEBS:
- if (x86_pmu.arch_pebs)
- mask = hybrid(pmu, arch_pebs_cap).counters;
+ if (x86_pmu.arch_pebs) {
+ mask = hybrid(pmu, arch_pebs_cap).counters &
+ gp_mask;
+ }
break;
case DYN_CONSTR_PDIST:
- if (x86_pmu.arch_pebs)
- mask = hybrid(pmu, arch_pebs_cap).pdists;
+ if (x86_pmu.arch_pebs) {
+ mask = hybrid(pmu, arch_pebs_cap).pdists &
+ gp_mask;
+ }
break;
default:
pr_warn("Unsupported dynamic constraint type %d\n", i);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 5027afc97b65..7f0d515c07c5 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -345,12 +345,12 @@ static u64 parse_omr_data_source(u8 dse)
if (omr.omr_remote)
val |= REM;
- val |= omr.omr_hitm ? P(SNOOP, HITM) : P(SNOOP, HIT);
-
if (omr.omr_source == 0x2) {
- u8 snoop = omr.omr_snoop | omr.omr_promoted;
+ u8 snoop = omr.omr_snoop | (omr.omr_promoted << 1);
- if (snoop == 0x0)
+ if (omr.omr_hitm)
+ val |= P(SNOOP, HITM);
+ else if (snoop == 0x0)
val |= P(SNOOP, NA);
else if (snoop == 0x1)
val |= P(SNOOP, MISS);
@@ -359,7 +359,10 @@ static u64 parse_omr_data_source(u8 dse)
else if (snoop == 0x3)
val |= P(SNOOP, NONE);
} else if (omr.omr_source > 0x2 && omr.omr_source < 0x7) {
+ val |= omr.omr_hitm ? P(SNOOP, HITM) : P(SNOOP, HIT);
val |= omr.omr_snoop ? P(SNOOPX, FWD) : 0;
+ } else {
+ val |= P(SNOOP, NONE);
}
return val;
diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c
index e5fd7367e45d..02bfdb77158b 100644
--- a/arch/x86/events/intel/p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -1367,14 +1367,14 @@ static __initconst const struct x86_pmu p4_pmu = {
__init int p4_pmu_init(void)
{
- unsigned int low, high;
+ unsigned int misc;
int i, reg;
/* If we get stripped -- indexing fails */
BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC);
- rdmsr(MSR_IA32_MISC_ENABLE, low, high);
- if (!(low & (1 << 7))) {
+ rdmsrq(MSR_IA32_MISC_ENABLE, misc);
+ if (!(misc & MSR_IA32_MISC_ENABLE_EMON)) {
pr_cont("unsupported Netburst CPU model %d ",
boot_cpu_data.x86_model);
return -ENODEV;
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 44524a387c58..b5726b50e77d 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -1591,7 +1591,6 @@ void intel_pt_handle_vmx(int on)
local_irq_restore(flags);
}
-EXPORT_SYMBOL_FOR_KVM(intel_pt_handle_vmx);
/*
* PMU callbacks
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 786bd51a0d89..e9cc1ba921c5 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -67,6 +67,7 @@ int uncore_die_to_segment(int die)
return bus ? pci_domain_nr(bus) : -EINVAL;
}
+/* Note: This API can only be used when NUMA information is available. */
int uncore_device_to_die(struct pci_dev *dev)
{
int node = pcibus_to_node(dev->bus);
diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c
index 12e259a988dd..583cbd06b9b8 100644
--- a/arch/x86/events/intel/uncore_discovery.c
+++ b/arch/x86/events/intel/uncore_discovery.c
@@ -264,6 +264,7 @@ static int __parse_discovery_table(struct uncore_discovery_domain *domain,
struct uncore_unit_discovery unit;
void __iomem *io_addr;
unsigned long size;
+ int ret = 0;
int i;
size = UNCORE_DISCOVERY_GLOBAL_MAP_SIZE;
@@ -273,21 +274,23 @@ static int __parse_discovery_table(struct uncore_discovery_domain *domain,
/* Read Global Discovery State */
memcpy_fromio(&global, io_addr, sizeof(struct uncore_global_discovery));
+ iounmap(io_addr);
+
if (uncore_discovery_invalid_unit(global)) {
pr_info("Invalid Global Discovery State: 0x%llx 0x%llx 0x%llx\n",
global.table1, global.ctl, global.table3);
- iounmap(io_addr);
return -EINVAL;
}
- iounmap(io_addr);
size = (1 + global.max_units) * global.stride * 8;
io_addr = ioremap(addr, size);
if (!io_addr)
return -ENOMEM;
- if (domain->global_init && domain->global_init(global.ctl))
- return -ENODEV;
+ if (domain->global_init && domain->global_init(global.ctl)) {
+ ret = -ENODEV;
+ goto out;
+ }
/* Parsing Unit Discovery State */
for (i = 0; i < global.max_units; i++) {
@@ -307,8 +310,10 @@ static int __parse_discovery_table(struct uncore_discovery_domain *domain,
}
*parsed = true;
+
+out:
iounmap(io_addr);
- return 0;
+ return ret;
}
static int parse_discovery_table(struct uncore_discovery_domain *domain,
@@ -366,7 +371,7 @@ static bool uncore_discovery_pci(struct uncore_discovery_domain *domain)
(val & UNCORE_DISCOVERY_DVSEC2_BIR_MASK) * UNCORE_DISCOVERY_BIR_STEP;
die = get_device_die_id(dev);
- if (die < 0)
+ if ((die < 0) || (die >= uncore_max_dies()))
continue;
parse_discovery_table(domain, dev, die, bar_offset, &parsed);
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 0a1d08136cc1..215d33e260ed 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1459,13 +1459,7 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
}
map->pbus_to_dieid[bus] = die_id = uncore_device_to_die(ubox_dev);
-
raw_spin_unlock(&pci2phy_map_lock);
-
- if (WARN_ON_ONCE(die_id == -1)) {
- err = -EINVAL;
- break;
- }
}
}
@@ -6420,7 +6414,7 @@ static void spr_update_device_location(int type_id)
while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, dev)) != NULL) {
- die = uncore_device_to_die(dev);
+ die = uncore_pcibus_to_dieid(dev->bus);
if (die < 0)
continue;
@@ -6444,6 +6438,11 @@ static void spr_update_device_location(int type_id)
int spr_uncore_pci_init(void)
{
+ int ret = snbep_pci2phy_map_init(0x3250, SKX_CPUNODEID, SKX_GIDNIDMAP, true);
+
+ if (ret)
+ return ret;
+
/*
* The discovery table of UPI on some SPR variant is broken,
* which impacts the detection of both UPI and M3UPI uncore PMON.
@@ -6935,34 +6934,34 @@ static struct freerunning_counters dmr_iio_freerunning[] = {
static struct uncore_event_desc dmr_uncore_iio_freerunning_events[] = {
/* ITC Free Running Data BW counter for inbound traffic */
- INTEL_UNCORE_FR_EVENT_DESC(inb_data_port0, 0x10, "3.814697266e-6"),
- INTEL_UNCORE_FR_EVENT_DESC(inb_data_port1, 0x11, "3.814697266e-6"),
- INTEL_UNCORE_FR_EVENT_DESC(inb_data_port2, 0x12, "3.814697266e-6"),
- INTEL_UNCORE_FR_EVENT_DESC(inb_data_port3, 0x13, "3.814697266e-6"),
- INTEL_UNCORE_FR_EVENT_DESC(inb_data_port4, 0x14, "3.814697266e-6"),
- INTEL_UNCORE_FR_EVENT_DESC(inb_data_port5, 0x15, "3.814697266e-6"),
- INTEL_UNCORE_FR_EVENT_DESC(inb_data_port6, 0x16, "3.814697266e-6"),
- INTEL_UNCORE_FR_EVENT_DESC(inb_data_port7, 0x17, "3.814697266e-6"),
+ INTEL_UNCORE_FR_EVENT_DESC(inb_data_port0, 0x10, 3.814697266e-6),
+ INTEL_UNCORE_FR_EVENT_DESC(inb_data_port1, 0x11, 3.814697266e-6),
+ INTEL_UNCORE_FR_EVENT_DESC(inb_data_port2, 0x12, 3.814697266e-6),
+ INTEL_UNCORE_FR_EVENT_DESC(inb_data_port3, 0x13, 3.814697266e-6),
+ INTEL_UNCORE_FR_EVENT_DESC(inb_data_port4, 0x14, 3.814697266e-6),
+ INTEL_UNCORE_FR_EVENT_DESC(inb_data_port5, 0x15, 3.814697266e-6),
+ INTEL_UNCORE_FR_EVENT_DESC(inb_data_port6, 0x16, 3.814697266e-6),
+ INTEL_UNCORE_FR_EVENT_DESC(inb_data_port7, 0x17, 3.814697266e-6),
/* ITC Free Running BW IN counters */
- INTEL_UNCORE_FR_EVENT_DESC(bw_in_port0, 0x20, "3.814697266e-6"),
- INTEL_UNCORE_FR_EVENT_DESC(bw_in_port1, 0x21, "3.814697266e-6"),
- INTEL_UNCORE_FR_EVENT_DESC(bw_in_port2, 0x22, "3.814697266e-6"),
- INTEL_UNCORE_FR_EVENT_DESC(bw_in_port3, 0x23, "3.814697266e-6"),
- INTEL_UNCORE_FR_EVENT_DESC(bw_in_port4, 0x24, "3.814697266e-6"),
- INTEL_UNCORE_FR_EVENT_DESC(bw_in_port5, 0x25, "3.814697266e-6"),
- INTEL_UNCORE_FR_EVENT_DESC(bw_in_port6, 0x26, "3.814697266e-6"),
- INTEL_UNCORE_FR_EVENT_DESC(bw_in_port7, 0x27, "3.814697266e-6"),
+ INTEL_UNCORE_FR_EVENT_DESC(bw_in_port0, 0x20, 3.814697266e-6),
+ INTEL_UNCORE_FR_EVENT_DESC(bw_in_port1, 0x21, 3.814697266e-6),
+ INTEL_UNCORE_FR_EVENT_DESC(bw_in_port2, 0x22, 3.814697266e-6),
+ INTEL_UNCORE_FR_EVENT_DESC(bw_in_port3, 0x23, 3.814697266e-6),
+ INTEL_UNCORE_FR_EVENT_DESC(bw_in_port4, 0x24, 3.814697266e-6),
+ INTEL_UNCORE_FR_EVENT_DESC(bw_in_port5, 0x25, 3.814697266e-6),
+ INTEL_UNCORE_FR_EVENT_DESC(bw_in_port6, 0x26, 3.814697266e-6),
+ INTEL_UNCORE_FR_EVENT_DESC(bw_in_port7, 0x27, 3.814697266e-6),
/* ITC Free Running BW OUT counters */
- INTEL_UNCORE_FR_EVENT_DESC(bw_out_port0, 0x30, "3.814697266e-6"),
- INTEL_UNCORE_FR_EVENT_DESC(bw_out_port1, 0x31, "3.814697266e-6"),
- INTEL_UNCORE_FR_EVENT_DESC(bw_out_port2, 0x32, "3.814697266e-6"),
- INTEL_UNCORE_FR_EVENT_DESC(bw_out_port3, 0x33, "3.814697266e-6"),
- INTEL_UNCORE_FR_EVENT_DESC(bw_out_port4, 0x34, "3.814697266e-6"),
- INTEL_UNCORE_FR_EVENT_DESC(bw_out_port5, 0x35, "3.814697266e-6"),
- INTEL_UNCORE_FR_EVENT_DESC(bw_out_port6, 0x36, "3.814697266e-6"),
- INTEL_UNCORE_FR_EVENT_DESC(bw_out_port7, 0x37, "3.814697266e-6"),
+ INTEL_UNCORE_FR_EVENT_DESC(bw_out_port0, 0x30, 3.814697266e-6),
+ INTEL_UNCORE_FR_EVENT_DESC(bw_out_port1, 0x31, 3.814697266e-6),
+ INTEL_UNCORE_FR_EVENT_DESC(bw_out_port2, 0x32, 3.814697266e-6),
+ INTEL_UNCORE_FR_EVENT_DESC(bw_out_port3, 0x33, 3.814697266e-6),
+ INTEL_UNCORE_FR_EVENT_DESC(bw_out_port4, 0x34, 3.814697266e-6),
+ INTEL_UNCORE_FR_EVENT_DESC(bw_out_port5, 0x35, 3.814697266e-6),
+ INTEL_UNCORE_FR_EVENT_DESC(bw_out_port6, 0x36, 3.814697266e-6),
+ INTEL_UNCORE_FR_EVENT_DESC(bw_out_port7, 0x37, 3.814697266e-6),
/* Free Running Clock Counter */
INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x40"),
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 8052596b8503..76d6418c5055 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -2,7 +2,6 @@
#include <linux/perf_event.h>
#include <linux/sysfs.h>
#include <linux/nospec.h>
-#include <asm/cpu_device_id.h>
#include <asm/msr.h>
#include "probe.h"
@@ -41,86 +40,11 @@ static bool test_therm_status(int idx, void *data)
static bool test_intel(int idx, void *data)
{
- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
- boot_cpu_data.x86 != 6)
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return false;
- switch (boot_cpu_data.x86_vfm) {
- case INTEL_NEHALEM:
- case INTEL_NEHALEM_G:
- case INTEL_NEHALEM_EP:
- case INTEL_NEHALEM_EX:
-
- case INTEL_WESTMERE:
- case INTEL_WESTMERE_EP:
- case INTEL_WESTMERE_EX:
-
- case INTEL_SANDYBRIDGE:
- case INTEL_SANDYBRIDGE_X:
-
- case INTEL_IVYBRIDGE:
- case INTEL_IVYBRIDGE_X:
-
- case INTEL_HASWELL:
- case INTEL_HASWELL_X:
- case INTEL_HASWELL_L:
- case INTEL_HASWELL_G:
-
- case INTEL_BROADWELL:
- case INTEL_BROADWELL_D:
- case INTEL_BROADWELL_G:
- case INTEL_BROADWELL_X:
- case INTEL_SAPPHIRERAPIDS_X:
- case INTEL_EMERALDRAPIDS_X:
- case INTEL_GRANITERAPIDS_X:
- case INTEL_GRANITERAPIDS_D:
-
- case INTEL_ATOM_SILVERMONT:
- case INTEL_ATOM_SILVERMONT_D:
- case INTEL_ATOM_AIRMONT:
- case INTEL_ATOM_AIRMONT_NP:
-
- case INTEL_ATOM_GOLDMONT:
- case INTEL_ATOM_GOLDMONT_D:
- case INTEL_ATOM_GOLDMONT_PLUS:
- case INTEL_ATOM_TREMONT_D:
- case INTEL_ATOM_TREMONT:
- case INTEL_ATOM_TREMONT_L:
-
- case INTEL_XEON_PHI_KNL:
- case INTEL_XEON_PHI_KNM:
- if (idx == PERF_MSR_SMI)
- return true;
- break;
-
- case INTEL_SKYLAKE_L:
- case INTEL_SKYLAKE:
- case INTEL_SKYLAKE_X:
- case INTEL_KABYLAKE_L:
- case INTEL_KABYLAKE:
- case INTEL_COMETLAKE_L:
- case INTEL_COMETLAKE:
- case INTEL_ICELAKE_L:
- case INTEL_ICELAKE:
- case INTEL_ICELAKE_X:
- case INTEL_ICELAKE_D:
- case INTEL_TIGERLAKE_L:
- case INTEL_TIGERLAKE:
- case INTEL_ROCKETLAKE:
- case INTEL_ALDERLAKE:
- case INTEL_ALDERLAKE_L:
- case INTEL_ATOM_GRACEMONT:
- case INTEL_RAPTORLAKE:
- case INTEL_RAPTORLAKE_P:
- case INTEL_RAPTORLAKE_S:
- case INTEL_METEORLAKE:
- case INTEL_METEORLAKE_L:
- if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
- return true;
- break;
- }
-
- return false;
+ /* Rely on perf_msr_probe() to check the availability */
+ return true;
}
PMU_EVENT_ATTR_STRING(tsc, attr_tsc, "event=0x00" );
diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h
index 70078334e4a3..47f84ee8f540 100644
--- a/arch/x86/events/perf_event_flags.h
+++ b/arch/x86/events/perf_event_flags.h
@@ -23,3 +23,4 @@ PERF_ARCH(PEBS_LAT_HYBRID, 0x0020000) /* ld and st lat for hybrid */
PERF_ARCH(NEEDS_BRANCH_STACK, 0x0040000) /* require branch stack setup */
PERF_ARCH(BRANCH_COUNTERS, 0x0080000) /* logs the counters in the extra space of each branch */
PERF_ARCH(ACR, 0x0100000) /* Auto counter reload */
+PERF_ARCH(UNPRIVILEGED, 0x0200000) /* Unprivileged event (wrt perf_allow_kernel()) */
diff --git a/arch/x86/hyperv/hv_crash.c b/arch/x86/hyperv/hv_crash.c
index 92da1b4f2e73..5ffcc23255de 100644
--- a/arch/x86/hyperv/hv_crash.c
+++ b/arch/x86/hyperv/hv_crash.c
@@ -107,14 +107,12 @@ static void __noreturn hv_panic_timeout_reboot(void)
cpu_relax();
}
-/* This cannot be inlined as it needs stack */
-static noinline __noclone void hv_crash_restore_tss(void)
+static void hv_crash_restore_tss(void)
{
load_TR_desc();
}
-/* This cannot be inlined as it needs stack */
-static noinline void hv_crash_clear_kernpt(void)
+static void hv_crash_clear_kernpt(void)
{
pgd_t *pgd;
p4d_t *p4d;
@@ -125,6 +123,25 @@ static noinline void hv_crash_clear_kernpt(void)
native_p4d_clear(p4d);
}
+
+static void __noreturn hv_crash_handle(void)
+{
+ hv_crash_restore_tss();
+ hv_crash_clear_kernpt();
+
+ /* we are now fully in devirtualized normal kernel mode */
+ __crash_kexec(NULL);
+
+ hv_panic_timeout_reboot();
+}
+
+/*
+ * __naked functions do not permit function calls, not even to __always_inline
+ * functions that only contain asm() blocks themselves. So use a macro instead.
+ */
+#define hv_wrmsr(msr, val) \
+ asm volatile("wrmsr" :: "c"(msr), "a"((u32)val), "d"((u32)(val >> 32)) : "memory")
+
/*
* This is the C entry point from the asm glue code after the disable hypercall.
* We enter here in IA32-e long mode, ie, full 64bit mode running on kernel
@@ -133,51 +150,38 @@ static noinline void hv_crash_clear_kernpt(void)
* available. We restore kernel GDT, and rest of the context, and continue
* to kexec.
*/
-static asmlinkage void __noreturn hv_crash_c_entry(void)
+static void __naked hv_crash_c_entry(void)
{
- struct hv_crash_ctxt *ctxt = &hv_crash_ctxt;
-
/* first thing, restore kernel gdt */
- native_load_gdt(&ctxt->gdtr);
+ asm volatile("lgdt %0" : : "m" (hv_crash_ctxt.gdtr));
- asm volatile("movw %%ax, %%ss" : : "a"(ctxt->ss));
- asm volatile("movq %0, %%rsp" : : "m"(ctxt->rsp));
+ asm volatile("movw %0, %%ss\n\t"
+ "movq %1, %%rsp"
+ :: "m"(hv_crash_ctxt.ss), "m"(hv_crash_ctxt.rsp));
- asm volatile("movw %%ax, %%ds" : : "a"(ctxt->ds));
- asm volatile("movw %%ax, %%es" : : "a"(ctxt->es));
- asm volatile("movw %%ax, %%fs" : : "a"(ctxt->fs));
- asm volatile("movw %%ax, %%gs" : : "a"(ctxt->gs));
+ asm volatile("movw %0, %%ds" : : "m"(hv_crash_ctxt.ds));
+ asm volatile("movw %0, %%es" : : "m"(hv_crash_ctxt.es));
+ asm volatile("movw %0, %%fs" : : "m"(hv_crash_ctxt.fs));
+ asm volatile("movw %0, %%gs" : : "m"(hv_crash_ctxt.gs));
- native_wrmsrq(MSR_IA32_CR_PAT, ctxt->pat);
- asm volatile("movq %0, %%cr0" : : "r"(ctxt->cr0));
+ hv_wrmsr(MSR_IA32_CR_PAT, hv_crash_ctxt.pat);
+ asm volatile("movq %0, %%cr0" : : "r"(hv_crash_ctxt.cr0));
- asm volatile("movq %0, %%cr8" : : "r"(ctxt->cr8));
- asm volatile("movq %0, %%cr4" : : "r"(ctxt->cr4));
- asm volatile("movq %0, %%cr2" : : "r"(ctxt->cr4));
+ asm volatile("movq %0, %%cr8" : : "r"(hv_crash_ctxt.cr8));
+ asm volatile("movq %0, %%cr4" : : "r"(hv_crash_ctxt.cr4));
+ asm volatile("movq %0, %%cr2" : : "r"(hv_crash_ctxt.cr2));
- native_load_idt(&ctxt->idtr);
- native_wrmsrq(MSR_GS_BASE, ctxt->gsbase);
- native_wrmsrq(MSR_EFER, ctxt->efer);
+ asm volatile("lidt %0" : : "m" (hv_crash_ctxt.idtr));
+ hv_wrmsr(MSR_GS_BASE, hv_crash_ctxt.gsbase);
+ hv_wrmsr(MSR_EFER, hv_crash_ctxt.efer);
/* restore the original kernel CS now via far return */
- asm volatile("movzwq %0, %%rax\n\t"
- "pushq %%rax\n\t"
- "pushq $1f\n\t"
- "lretq\n\t"
- "1:nop\n\t" : : "m"(ctxt->cs) : "rax");
-
- /* We are in asmlinkage without stack frame, hence make C function
- * calls which will buy stack frames.
- */
- hv_crash_restore_tss();
- hv_crash_clear_kernpt();
-
- /* we are now fully in devirtualized normal kernel mode */
- __crash_kexec(NULL);
-
- hv_panic_timeout_reboot();
+ asm volatile("pushq %q0\n\t"
+ "pushq %q1\n\t"
+ "lretq"
+ :: "r"(hv_crash_ctxt.cs), "r"(hv_crash_handle));
}
-/* Tell gcc we are using lretq long jump in the above function intentionally */
+/* Tell objtool we are using lretq long jump in the above function intentionally */
STACK_FRAME_NON_STANDARD(hv_crash_c_entry);
static void hv_mark_tss_not_busy(void)
@@ -195,20 +199,20 @@ static void hv_hvcrash_ctxt_save(void)
{
struct hv_crash_ctxt *ctxt = &hv_crash_ctxt;
- asm volatile("movq %%rsp,%0" : "=m"(ctxt->rsp));
+ ctxt->rsp = current_stack_pointer;
ctxt->cr0 = native_read_cr0();
ctxt->cr4 = native_read_cr4();
- asm volatile("movq %%cr2, %0" : "=a"(ctxt->cr2));
- asm volatile("movq %%cr8, %0" : "=a"(ctxt->cr8));
+ asm volatile("movq %%cr2, %0" : "=r"(ctxt->cr2));
+ asm volatile("movq %%cr8, %0" : "=r"(ctxt->cr8));
- asm volatile("movl %%cs, %%eax" : "=a"(ctxt->cs));
- asm volatile("movl %%ss, %%eax" : "=a"(ctxt->ss));
- asm volatile("movl %%ds, %%eax" : "=a"(ctxt->ds));
- asm volatile("movl %%es, %%eax" : "=a"(ctxt->es));
- asm volatile("movl %%fs, %%eax" : "=a"(ctxt->fs));
- asm volatile("movl %%gs, %%eax" : "=a"(ctxt->gs));
+ asm volatile("movw %%cs, %0" : "=m"(ctxt->cs));
+ asm volatile("movw %%ss, %0" : "=m"(ctxt->ss));
+ asm volatile("movw %%ds, %0" : "=m"(ctxt->ds));
+ asm volatile("movw %%es, %0" : "=m"(ctxt->es));
+ asm volatile("movw %%fs, %0" : "=m"(ctxt->fs));
+ asm volatile("movw %%gs, %0" : "=m"(ctxt->gs));
native_store_gdt(&ctxt->gdtr);
store_idt(&ctxt->idtr);
diff --git a/arch/x86/include/asm/amd/ibs.h b/arch/x86/include/asm/amd/ibs.h
index fcc8a5abe54e..68e24a1736d0 100644
--- a/arch/x86/include/asm/amd/ibs.h
+++ b/arch/x86/include/asm/amd/ibs.h
@@ -77,7 +77,7 @@ union ibs_op_data {
__u64 val;
struct {
__u64 comp_to_ret_ctr:16, /* 0-15: op completion to retire count */
- tag_to_ret_ctr:16, /* 15-31: op tag to retire count */
+ tag_to_ret_ctr:16, /* 16-31: op tag to retire count */
reserved1:2, /* 32-33: reserved */
op_return:1, /* 34: return op */
op_brn_taken:1, /* 35: taken branch op */
@@ -99,7 +99,9 @@ union ibs_op_data2 {
rmt_node:1, /* 4: destination node */
cache_hit_st:1, /* 5: cache hit state */
data_src_hi:2, /* 6-7: data source high */
- reserved1:56; /* 8-63: reserved */
+ strm_st:1, /* 8: streaming store */
+ rmt_socket:1, /* 9: remote socket */
+ reserved1:54; /* 10-63: reserved */
};
};
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index be39a543fbe5..bc125c4429dc 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -138,15 +138,8 @@
#define APIC_SEOI 0x420
#define APIC_IER 0x480
#define APIC_EILVTn(n) (0x500 + 0x10 * n)
-#define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */
#define APIC_EILVT_NR_AMD_10H 4
#define APIC_EILVT_NR_MAX APIC_EILVT_NR_AMD_10H
-#define APIC_EILVT_LVTOFF(x) (((x) >> 4) & 0xF)
-#define APIC_EILVT_MSG_FIX 0x0
-#define APIC_EILVT_MSG_SMI 0x2
-#define APIC_EILVT_MSG_NMI 0x4
-#define APIC_EILVT_MSG_EXT 0x7
-#define APIC_EILVT_MASKED (1 << 16)
#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
#define APIC_BASE_MSR 0x800
diff --git a/arch/x86/include/asm/clock_inlined.h b/arch/x86/include/asm/clock_inlined.h
new file mode 100644
index 000000000000..b2dee8db2fb9
--- /dev/null
+++ b/arch/x86/include/asm/clock_inlined.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_CLOCK_INLINED_H
+#define _ASM_X86_CLOCK_INLINED_H
+
+#include <asm/tsc.h>
+
+struct clocksource;
+
+static __always_inline u64 arch_inlined_clocksource_read(struct clocksource *cs)
+{
+ return (u64)rdtsc_ordered();
+}
+
+struct clock_event_device;
+
+static __always_inline void
+arch_inlined_clockevent_set_next_coupled(u64 cycles, struct clock_event_device *evt)
+{
+ native_wrmsrq(MSR_IA32_TSC_DEADLINE, cycles);
+}
+
+#endif
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index ad235dda1ded..57a0786dfd75 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -11,7 +11,6 @@
#ifndef CONFIG_SMP
#define cpu_physical_id(cpu) boot_cpu_physical_apicid
-#define cpu_acpi_id(cpu) 0
#endif /* CONFIG_SMP */
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index dbe104df339b..1d506e5d6f46 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -415,7 +415,7 @@
*/
#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* "overflow_recov" MCA overflow recovery support */
#define X86_FEATURE_SUCCOR (17*32+ 1) /* "succor" Uncorrectable error containment and recovery */
-
+#define X86_FEATURE_CPPC_PERF_PRIO (17*32+ 2) /* CPPC Floor Perf support */
#define X86_FEATURE_SMCA (17*32+ 3) /* "smca" Scalable MCA */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
@@ -473,6 +473,7 @@
#define X86_FEATURE_GP_ON_USER_CPUID (20*32+17) /* User CPUID faulting */
#define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */
+#define X86_FEATURE_AVX512_BMM (20*32+23) /* AVX512 Bit Matrix Multiply instructions */
#define X86_FEATURE_ERAPS (20*32+24) /* Enhanced Return Address Predictor Security */
#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index ee382b56dd7b..dc8fe1361c18 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -138,7 +138,7 @@ extern void __init efi_apply_memmap_quirks(void);
extern int __init efi_reuse_config(u64 tables, int nr_tables);
extern void efi_delete_dummy_variable(void);
extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr);
-extern void efi_free_boot_services(void);
+extern void efi_unmap_boot_services(void);
void arch_efi_call_virt_setup(void);
void arch_efi_call_virt_teardown(void);
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 2ba5f166e58f..c7f98977663c 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -187,7 +187,6 @@ void set_personality_ia32(bool);
#define ELF_CORE_COPY_REGS(pr_reg, regs) \
do { \
- unsigned v; \
(pr_reg)[0] = (regs)->r15; \
(pr_reg)[1] = (regs)->r14; \
(pr_reg)[2] = (regs)->r13; \
@@ -211,10 +210,10 @@ do { \
(pr_reg)[20] = (regs)->ss; \
(pr_reg)[21] = x86_fsbase_read_cpu(); \
(pr_reg)[22] = x86_gsbase_read_cpu_inactive(); \
- asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v; \
- asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \
- asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \
- asm("movl %%gs,%0" : "=r" (v)); (pr_reg)[26] = v; \
+ savesegment(ds, (pr_reg)[23]); \
+ savesegment(es, (pr_reg)[24]); \
+ savesegment(fs, (pr_reg)[25]); \
+ savesegment(gs, (pr_reg)[26]); \
} while (0);
/* I'm not sure if we can use '-' here */
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index ce3eb6d5fdf9..7535131c711b 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -82,18 +82,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
current_thread_info()->status &= ~(TS_COMPAT | TS_I386_REGS_POKED);
#endif
- /*
- * This value will get limited by KSTACK_OFFSET_MAX(), which is 10
- * bits. The actual entropy will be further reduced by the compiler
- * when applying stack alignment constraints (see cc_stack_align4/8 in
- * arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32)
- * low bits from any entropy chosen here.
- *
- * Therefore, final stack offset entropy will be 7 (x86_64) or
- * 8 (ia32) bits.
- */
- choose_random_kstack_offset(rdtsc());
-
/* Avoid unnecessary reads of 'x86_ibpb_exit_to_user' */
if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER) &&
this_cpu_read(x86_ibpb_exit_to_user)) {
diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h
index e7a244051c62..8d1e86687b98 100644
--- a/arch/x86/include/asm/floppy.h
+++ b/arch/x86/include/asm/floppy.h
@@ -29,9 +29,6 @@
#define CSW fd_routine[can_use_virtual_dma & 1]
-#define fd_inb(base, reg) inb_p((base) + (reg))
-#define fd_outb(value, base, reg) outb_p(value, (base) + (reg))
-
#define fd_request_dma() CSW._request_dma(FLOPPY_DMA, "floppy")
#define fd_free_dma() CSW._free_dma(FLOPPY_DMA)
#define fd_enable_irq() enable_irq(FLOPPY_IRQ)
@@ -49,6 +46,26 @@ static char *virtual_dma_addr;
static int virtual_dma_mode;
static int doing_pdma;
+static inline u8 fd_inb(u16 base, u16 reg)
+{
+ u8 ret = inb_p(base + reg);
+
+ native_io_delay();
+ native_io_delay();
+ native_io_delay();
+
+ return ret;
+}
+
+static inline void fd_outb(u8 value, u16 base, u16 reg)
+{
+ outb_p(value, base + reg);
+
+ native_io_delay();
+ native_io_delay();
+ native_io_delay();
+}
+
static irqreturn_t floppy_hardint(int irq, void *dev_id)
{
unsigned char st;
@@ -79,9 +96,9 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id)
if (st != (STATUS_DMA | STATUS_READY))
break;
if (virtual_dma_mode)
- outb_p(*lptr, virtual_dma_port + FD_DATA);
+ fd_outb(*lptr, virtual_dma_port, FD_DATA);
else
- *lptr = inb_p(virtual_dma_port + FD_DATA);
+ *lptr = fd_inb(virtual_dma_port, FD_DATA);
}
virtual_dma_count = lcount;
virtual_dma_addr = lptr;
diff --git a/arch/x86/include/asm/fpu/xcr.h b/arch/x86/include/asm/fpu/xcr.h
index 9a710c060445..698457f16d5d 100644
--- a/arch/x86/include/asm/fpu/xcr.h
+++ b/arch/x86/include/asm/fpu/xcr.h
@@ -23,7 +23,7 @@ static inline void xsetbv(u32 index, u64 value)
/*
* Return a mask of xfeatures which are currently being tracked
- * by the processor as being in the initial configuration.
+ * by the processor as being not in the initial configuration.
*
* Callers should check X86_FEATURE_XGETBV1.
*/
diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h
index ab2547f97c2c..70ff4ef457b1 100644
--- a/arch/x86/include/asm/fsgsbase.h
+++ b/arch/x86/include/asm/fsgsbase.h
@@ -25,7 +25,7 @@ static __always_inline unsigned long rdfsbase(void)
{
unsigned long fsbase;
- asm volatile("rdfsbase %0" : "=r" (fsbase) :: "memory");
+ asm volatile("rdfsbase %0" : "=r" (fsbase));
return fsbase;
}
@@ -34,7 +34,7 @@ static __always_inline unsigned long rdgsbase(void)
{
unsigned long gsbase;
- asm volatile("rdgsbase %0" : "=r" (gsbase) :: "memory");
+ asm volatile("rdgsbase %0" : "=r" (gsbase));
return gsbase;
}
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index ca309a3227c7..7f4847b2b904 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -218,9 +218,8 @@ static inline void __iowrite32_copy(void __iomem *to, const void *from,
size_t count)
{
asm volatile("rep movsl"
- : "=&c"(count), "=&D"(to), "=&S"(from)
- : "0"(count), "1"(to), "2"(from)
- : "memory");
+ : "+D"(to), "+S"(from), "+c"(count)
+ : : "memory");
}
#define __iowrite32_copy __iowrite32_copy
#endif
@@ -243,21 +242,19 @@ extern int io_delay_type;
extern void io_delay_init(void);
#if defined(CONFIG_PARAVIRT)
-#include <asm/paravirt.h>
+#include <asm/paravirt-base.h>
#else
+#define call_io_delay() true
+#endif
static inline void slow_down_io(void)
{
+ if (!call_io_delay())
+ return;
+
native_io_delay();
-#ifdef REALLY_SLOW_IO
- native_io_delay();
- native_io_delay();
- native_io_delay();
-#endif
}
-#endif
-
#define BUILDIO(bwl, type) \
static inline void out##bwl##_p(type value, u16 port) \
{ \
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 462754b0bf8a..6f25de05ed58 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -96,11 +96,11 @@ static __always_inline void halt(void)
native_halt();
}
#endif /* __ASSEMBLER__ */
+#else
+#include <asm/paravirt.h>
#endif /* CONFIG_PARAVIRT */
-#ifdef CONFIG_PARAVIRT_XXL
-#include <asm/paravirt.h>
-#else
+#ifndef CONFIG_PARAVIRT_XXL
#ifndef __ASSEMBLER__
#include <linux/types.h>
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index de709fb5bd76..3776cf5382a2 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -1,8 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#if !defined(KVM_X86_OP) || !defined(KVM_X86_OP_OPTIONAL)
-BUILD_BUG_ON(1)
-#endif
-
+#if !defined(KVM_X86_OP) || \
+ !defined(KVM_X86_OP_OPTIONAL) || \
+ !defined(KVM_X86_OP_OPTIONAL_RET0)
+#error Missing one or more KVM_X86_OP #defines
+#else
/*
* KVM_X86_OP() and KVM_X86_OP_OPTIONAL() are used to help generate
* both DECLARE/DEFINE_STATIC_CALL() invocations and
@@ -148,6 +149,7 @@ KVM_X86_OP_OPTIONAL(alloc_apic_backing_page)
KVM_X86_OP_OPTIONAL_RET0(gmem_prepare)
KVM_X86_OP_OPTIONAL_RET0(gmem_max_mapping_level)
KVM_X86_OP_OPTIONAL(gmem_invalidate)
+#endif
#undef KVM_X86_OP
#undef KVM_X86_OP_OPTIONAL
diff --git a/arch/x86/include/asm/kvm-x86-pmu-ops.h b/arch/x86/include/asm/kvm-x86-pmu-ops.h
index f0aa6996811f..d5452b3433b7 100644
--- a/arch/x86/include/asm/kvm-x86-pmu-ops.h
+++ b/arch/x86/include/asm/kvm-x86-pmu-ops.h
@@ -1,7 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#if !defined(KVM_X86_PMU_OP) || !defined(KVM_X86_PMU_OP_OPTIONAL)
-BUILD_BUG_ON(1)
-#endif
+#if !defined(KVM_X86_PMU_OP) || \
+ !defined(KVM_X86_PMU_OP_OPTIONAL)
+#error Missing one or more KVM_X86_PMU_OP #defines
+#else
/*
* KVM_X86_PMU_OP() and KVM_X86_PMU_OP_OPTIONAL() are used to help generate
@@ -26,6 +27,7 @@ KVM_X86_PMU_OP_OPTIONAL(cleanup)
KVM_X86_PMU_OP_OPTIONAL(write_global_ctrl)
KVM_X86_PMU_OP(mediated_load)
KVM_X86_PMU_OP(mediated_put)
+#endif
#undef KVM_X86_PMU_OP
#undef KVM_X86_PMU_OP_OPTIONAL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ff07c45e3c73..c470e40a00aa 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -40,7 +40,8 @@
#include <asm/irq_remapping.h>
#include <asm/kvm_page_track.h>
#include <asm/kvm_vcpu_regs.h>
-#include <asm/reboot.h>
+#include <asm/virt.h>
+
#include <hyperv/hvhdk.h>
#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
@@ -1098,6 +1099,21 @@ struct kvm_vcpu_arch {
*/
bool pdptrs_from_userspace;
+ /*
+ * Set if an emulated nested VM-Enter to L2 is pending completion. KVM
+ * must not synthesize a VM-Exit to L1 before entering L2, as VM-Exits
+ * can only occur at instruction boundaries. The only exception is
+ * VMX's "notify" exits, which exist in large part to break the CPU out
+ * of infinite ucode loops, but can corrupt vCPU state in the process!
+ *
+ * For all intents and purposes, this is a boolean, but it's tracked as
+ * a u8 so that KVM can detect when userspace may have stuffed vCPU
+ * state and generated an architecturally-impossible VM-Exit.
+ */
+#define KVM_NESTED_RUN_PENDING 1
+#define KVM_NESTED_RUN_PENDING_UNTRUSTED 2
+ u8 nested_run_pending;
+
#if IS_ENABLED(CONFIG_HYPERV)
hpa_t hv_root_tdp;
#endif
@@ -1261,7 +1277,7 @@ struct kvm_x86_pmu_event_filter {
__u32 nr_excludes;
__u64 *includes;
__u64 *excludes;
- __u64 events[];
+ __u64 events[] __counted_by(nevents);
};
enum kvm_apicv_inhibit {
@@ -1433,6 +1449,7 @@ struct kvm_arch {
struct kvm_pit *vpit;
#endif
atomic_t vapics_in_nmi_mode;
+
struct mutex apic_map_lock;
struct kvm_apic_map __rcu *apic_map;
atomic_t apic_map_dirty;
@@ -1440,8 +1457,22 @@ struct kvm_arch {
bool apic_access_memslot_enabled;
bool apic_access_memslot_inhibited;
- /* Protects apicv_inhibit_reasons */
+ /*
+ * Force apicv_update_lock and apicv_nr_irq_window_req to reside in a
+ * dedicated cacheline. They are write-mostly, whereas most everything
+ * else in kvm_arch is read-mostly. Note that apicv_inhibit_reasons is
+ * read-mostly: toggling VM-wide inhibits is rare; _checking_ for
+ * inhibits is common.
+ */
+ ____cacheline_aligned
+ /*
+ * Protects apicv_inhibit_reasons and apicv_nr_irq_window_req (with an
+ * asterisk, see kvm_inc_or_dec_irq_window_inhibit() for details).
+ */
struct rw_semaphore apicv_update_lock;
+ atomic_t apicv_nr_irq_window_req;
+ ____cacheline_aligned
+
unsigned long apicv_inhibit_reasons;
gpa_t wall_clock;
@@ -2097,9 +2128,6 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
-int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
- const void *val, int bytes);
-
extern bool tdp_enabled;
u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
@@ -2316,6 +2344,18 @@ static inline void kvm_clear_apicv_inhibit(struct kvm *kvm,
kvm_set_or_clear_apicv_inhibit(kvm, reason, false);
}
+void kvm_inc_or_dec_irq_window_inhibit(struct kvm *kvm, bool inc);
+
+static inline void kvm_inc_apicv_irq_window_req(struct kvm *kvm)
+{
+ kvm_inc_or_dec_irq_window_inhibit(kvm, true);
+}
+
+static inline void kvm_dec_apicv_irq_window_req(struct kvm *kvm)
+{
+ kvm_inc_or_dec_irq_window_inhibit(kvm, false);
+}
+
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
void *insn, int insn_len);
void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg);
@@ -2485,7 +2525,8 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS | \
KVM_X86_QUIRK_SLOT_ZAP_ALL | \
KVM_X86_QUIRK_STUFF_FEATURE_MSRS | \
- KVM_X86_QUIRK_IGNORE_GUEST_PAT)
+ KVM_X86_QUIRK_IGNORE_GUEST_PAT | \
+ KVM_X86_QUIRK_VMCS12_ALLOW_FREEZE_IN_SMM)
#define KVM_X86_CONDITIONAL_QUIRKS \
(KVM_X86_QUIRK_CD_NW_CLEARED | \
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index 59aa966dc212..4957018fef3e 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -106,7 +106,7 @@ static inline bool local_add_negative(long i, local_t *l)
static inline long local_add_return(long i, local_t *l)
{
long __i = i;
- asm volatile(_ASM_XADD "%0, %1;"
+ asm volatile(_ASM_XADD "%0, %1"
: "+r" (i), "+m" (l->a.counter)
: : "memory");
return i + __i;
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 2d98886de09a..0175d39a5856 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -343,44 +343,60 @@ extern void apei_mce_report_mem_error(int corrected,
*/
#ifdef CONFIG_X86_MCE_AMD
-/* These may be used by multiple smca_hwid_mcatypes */
+/*
+ * These may be used by multiple smca_hwid_mcatypes.
+ *
+ * Keep in alphanumeric order, numerals before letters.
+ * Exception: Keep "V2, etc." with their originals.
+ */
enum smca_bank_types {
- SMCA_LS = 0, /* Load Store */
- SMCA_LS_V2,
- SMCA_IF, /* Instruction Fetch */
- SMCA_L2_CACHE, /* L2 Cache */
+ SMCA_CS, /* Coherent Station */
+ SMCA_CS_V2,
+ SMCA_DACC_BE, /* Data Acceleration Back-end */
+ SMCA_DACC_FE, /* Data Acceleration Front-end */
SMCA_DE, /* Decoder Unit */
- SMCA_RESERVED, /* Reserved */
+ SMCA_EDDR5CMN, /* eDDR5 CMN */
SMCA_EX, /* Execution Unit */
SMCA_FP, /* Floating Point */
+ SMCA_GMI_PCS, /* GMI PCS Unit */
+ SMCA_GMI_PHY, /* GMI PHY Unit */
+ SMCA_IF, /* Instruction Fetch */
+ SMCA_L2_CACHE, /* L2 Cache */
SMCA_L3_CACHE, /* L3 Cache */
- SMCA_CS, /* Coherent Slave */
- SMCA_CS_V2,
- SMCA_PIE, /* Power, Interrupts, etc. */
- SMCA_UMC, /* Unified Memory Controller */
- SMCA_UMC_V2,
+ SMCA_LS, /* Load Store */
+ SMCA_LS_V2,
SMCA_MA_LLC, /* Memory Attached Last Level Cache */
- SMCA_PB, /* Parameter Block */
- SMCA_PSP, /* Platform Security Processor */
- SMCA_PSP_V2,
- SMCA_SMU, /* System Management Unit */
- SMCA_SMU_V2,
SMCA_MP5, /* Microprocessor 5 Unit */
+ SMCA_MPART, /* AMD Root of Trust Microprocessor */
+ SMCA_MPASP, /* AMD Secure Processor */
+ SMCA_MPASP_V2,
+ SMCA_MPDACC, /* MP for Data Acceleration */
SMCA_MPDMA, /* MPDMA Unit */
+ SMCA_MPM, /* Microprocessor Manageability Core */
+ SMCA_MPRAS, /* MP for RAS */
+ SMCA_NBIF, /* NBIF Unit */
SMCA_NBIO, /* Northbridge IO Unit */
+ SMCA_PB, /* Parameter Block */
SMCA_PCIE, /* PCI Express Unit */
SMCA_PCIE_V2,
- SMCA_XGMI_PCS, /* xGMI PCS Unit */
- SMCA_NBIF, /* NBIF Unit */
- SMCA_SHUB, /* System HUB Unit */
+ SMCA_PCIE_PL, /* PCIe Link */
+ SMCA_PIE, /* Power, Interrupts, etc. */
+ SMCA_PSP, /* Platform Security Processor */
+ SMCA_PSP_V2,
+ SMCA_RESERVED, /* Reserved */
SMCA_SATA, /* SATA Unit */
+ SMCA_SHUB, /* System HUB Unit */
+ SMCA_SMU, /* System Management Unit */
+ SMCA_SMU_V2,
+ SMCA_SSBDCI, /* Die to Die Interconnect */
+ SMCA_UMC, /* Unified Memory Controller */
+ SMCA_UMC_V2,
SMCA_USB, /* USB Unit */
- SMCA_USR_DP, /* Ultra Short Reach Data Plane Controller */
SMCA_USR_CP, /* Ultra Short Reach Control Plane Controller */
- SMCA_GMI_PCS, /* GMI PCS Unit */
- SMCA_XGMI_PHY, /* xGMI PHY Unit */
+ SMCA_USR_DP, /* Ultra Short Reach Data Plane Controller */
SMCA_WAFL_PHY, /* WAFL PHY Unit */
- SMCA_GMI_PHY, /* GMI PHY Unit */
+ SMCA_XGMI_PCS, /* xGMI PCS Unit */
+ SMCA_XGMI_PHY, /* xGMI PHY Unit */
N_SMCA_BANK_TYPES
};
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 8b41f26f003b..3c317d155771 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -61,6 +61,8 @@ static inline int intel_microcode_get_datasize(struct microcode_header_intel *hd
return hdr->datasize ? : DEFAULT_UCODE_DATASIZE;
}
+extern u32 intel_get_platform_id(void);
+
static inline u32 intel_get_microcode_revision(void)
{
u32 rev, dummy;
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 1acafb1c6a93..ef5b507de34e 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -136,9 +136,6 @@ static inline void mm_reset_untag_mask(struct mm_struct *mm)
}
#endif
-#define enter_lazy_tlb enter_lazy_tlb
-extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
-
extern void mm_init_global_asid(struct mm_struct *mm);
extern void mm_free_global_asid(struct mm_struct *mm);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index da5275d8eda6..a14a0f43e04a 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -674,6 +674,9 @@
#define MSR_AMD64_DC_CFG 0xc0011022
#define MSR_AMD64_TW_CFG 0xc0011023
+#define MSR_AMD64_FP_CFG 0xc0011028
+#define MSR_AMD64_FP_CFG_ZEN1_DENORM_FIX_BIT 9
+
#define MSR_AMD64_DE_CFG 0xc0011029
#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1
#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT)
@@ -698,6 +701,8 @@
#define MSR_AMD64_IBSBRTARGET 0xc001103b
#define MSR_AMD64_ICIBSEXTDCTL 0xc001103c
#define MSR_AMD64_IBSOPDATA4 0xc001103d
+#define MSR_AMD64_IBSOPCTL2 0xc001103e
+#define MSR_AMD64_IBSFETCHCTL2 0xc001103f
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
#define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b
#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e
@@ -740,7 +745,10 @@
#define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT)
#define MSR_AMD64_SNP_SECURE_AVIC_BIT 18
#define MSR_AMD64_SNP_SECURE_AVIC BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT)
-#define MSR_AMD64_SNP_RESV_BIT 19
+#define MSR_AMD64_SNP_RESERVED_BITS19_22 GENMASK_ULL(22, 19)
+#define MSR_AMD64_SNP_IBPB_ON_ENTRY_BIT 23
+#define MSR_AMD64_SNP_IBPB_ON_ENTRY BIT_ULL(MSR_AMD64_SNP_IBPB_ON_ENTRY_BIT)
+#define MSR_AMD64_SNP_RESV_BIT 24
#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT)
#define MSR_AMD64_SAVIC_CONTROL 0xc0010138
#define MSR_AMD64_SAVIC_EN_BIT 0
@@ -762,12 +770,14 @@
#define MSR_AMD_CPPC_CAP2 0xc00102b2
#define MSR_AMD_CPPC_REQ 0xc00102b3
#define MSR_AMD_CPPC_STATUS 0xc00102b4
+#define MSR_AMD_CPPC_REQ2 0xc00102b5
/* Masks for use with MSR_AMD_CPPC_CAP1 */
#define AMD_CPPC_LOWEST_PERF_MASK GENMASK(7, 0)
#define AMD_CPPC_LOWNONLIN_PERF_MASK GENMASK(15, 8)
#define AMD_CPPC_NOMINAL_PERF_MASK GENMASK(23, 16)
#define AMD_CPPC_HIGHEST_PERF_MASK GENMASK(31, 24)
+#define AMD_CPPC_FLOOR_PERF_CNT_MASK GENMASK_ULL(39, 32)
/* Masks for use with MSR_AMD_CPPC_REQ */
#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0)
@@ -775,6 +785,9 @@
#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16)
#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24)
+/* Masks for use with MSR_AMD_CPPC_REQ2 */
+#define AMD_CPPC_FLOOR_PERF_MASK GENMASK(7, 0)
+
/* AMD Performance Counter Global Status and Control MSRs */
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 53ba39ce010c..a9063f332fa6 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -22,6 +22,7 @@ extern int numa_off;
*/
extern s16 __apicid_to_node[MAX_LOCAL_APIC];
extern nodemask_t numa_nodes_parsed __initdata;
+extern nodemask_t numa_phys_nodes_parsed __initdata;
static inline void set_apicid_to_node(int apicid, s16 node)
{
@@ -48,6 +49,7 @@ extern void __init init_cpu_to_node(void);
extern void numa_add_cpu(unsigned int cpu);
extern void numa_remove_cpu(unsigned int cpu);
extern void init_gi_nodes(void);
+extern int num_phys_nodes(void);
#else /* CONFIG_NUMA */
static inline void numa_set_node(int cpu, int node) { }
static inline void numa_clear_node(int cpu) { }
@@ -55,6 +57,10 @@ static inline void init_cpu_to_node(void) { }
static inline void numa_add_cpu(unsigned int cpu) { }
static inline void numa_remove_cpu(unsigned int cpu) { }
static inline void init_gi_nodes(void) { }
+static inline int num_phys_nodes(void)
+{
+ return 1;
+}
#endif /* CONFIG_NUMA */
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
index e0125afa53fb..5837c2bb277f 100644
--- a/arch/x86/include/asm/orc_types.h
+++ b/arch/x86/include/asm/orc_types.h
@@ -28,15 +28,16 @@
* and GCC realigned stacks.
*/
#define ORC_REG_UNDEFINED 0
-#define ORC_REG_PREV_SP 1
+#define ORC_REG_AX 1
#define ORC_REG_DX 2
-#define ORC_REG_DI 3
+#define ORC_REG_SP 3
#define ORC_REG_BP 4
-#define ORC_REG_SP 5
+#define ORC_REG_DI 5
#define ORC_REG_R10 6
#define ORC_REG_R13 7
-#define ORC_REG_BP_INDIRECT 8
+#define ORC_REG_PREV_SP 8
#define ORC_REG_SP_INDIRECT 9
+#define ORC_REG_BP_INDIRECT 10
#define ORC_REG_MAX 15
#define ORC_TYPE_UNDEFINED 0
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 018a8d906ca3..3e0801a0f782 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -26,7 +26,7 @@
#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
-#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC
+#define VMA_DATA_DEFAULT_FLAGS VMA_DATA_FLAGS_TSK_EXEC
/* Physical address where kernel should be loaded. */
#define LOAD_PHYSICAL_ADDR __ALIGN_KERNEL_MASK(CONFIG_PHYSICAL_START, CONFIG_PHYSICAL_ALIGN - 1)
diff --git a/arch/x86/include/asm/paravirt-base.h b/arch/x86/include/asm/paravirt-base.h
index 982a0b93bc76..3b9e7772d196 100644
--- a/arch/x86/include/asm/paravirt-base.h
+++ b/arch/x86/include/asm/paravirt-base.h
@@ -15,6 +15,8 @@ struct pv_info {
#ifdef CONFIG_PARAVIRT_XXL
u16 extra_user_64bit_cs; /* __USER_CS if none */
#endif
+ bool io_delay;
+
const char *name;
};
@@ -26,6 +28,10 @@ u64 _paravirt_ident_64(u64);
#endif
#define paravirt_nop ((void *)nop_func)
+#ifdef CONFIG_PARAVIRT
+#define call_io_delay() pv_info.io_delay
+#endif
+
#ifdef CONFIG_PARAVIRT_SPINLOCKS
void paravirt_set_cap(void);
#else
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index fcf8ab50948a..cdfe4007443e 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -19,17 +19,6 @@
#include <linux/cpumask.h>
#include <asm/frame.h>
-/* The paravirtualized I/O functions */
-static inline void slow_down_io(void)
-{
- PVOP_VCALL0(pv_ops, cpu.io_delay);
-#ifdef REALLY_SLOW_IO
- PVOP_VCALL0(pv_ops, cpu.io_delay);
- PVOP_VCALL0(pv_ops, cpu.io_delay);
- PVOP_VCALL0(pv_ops, cpu.io_delay);
-#endif
-}
-
void native_flush_tlb_local(void);
void native_flush_tlb_global(void);
void native_flush_tlb_one_user(unsigned long addr);
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 9bcf6bce88f6..4f5ae0068aab 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -30,8 +30,6 @@ struct pv_lazy_ops {
struct pv_cpu_ops {
/* hooks for various privileged instructions */
- void (*io_delay)(void);
-
#ifdef CONFIG_PARAVIRT_XXL
unsigned long (*get_debugreg)(int regno);
void (*set_debugreg)(int regno, unsigned long value);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index ff5acb8b199b..752cb319d5ea 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -643,6 +643,10 @@ struct arch_pebs_cntr_header {
#define IBS_CAPS_OPDATA4 (1U<<10)
#define IBS_CAPS_ZEN4 (1U<<11)
#define IBS_CAPS_OPLDLAT (1U<<12)
+#define IBS_CAPS_DIS (1U<<13)
+#define IBS_CAPS_FETCHLAT (1U<<14)
+#define IBS_CAPS_BIT63_FILTER (1U<<15)
+#define IBS_CAPS_STRMST_RMTSOCKET (1U<<16)
#define IBS_CAPS_OPDTLBPGSIZE (1U<<19)
#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
@@ -657,31 +661,44 @@ struct arch_pebs_cntr_header {
#define IBSCTL_LVT_OFFSET_MASK 0x0F
/* IBS fetch bits/masks */
-#define IBS_FETCH_L3MISSONLY (1ULL<<59)
-#define IBS_FETCH_RAND_EN (1ULL<<57)
-#define IBS_FETCH_VAL (1ULL<<49)
-#define IBS_FETCH_ENABLE (1ULL<<48)
-#define IBS_FETCH_CNT 0xFFFF0000ULL
-#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
+#define IBS_FETCH_L3MISSONLY (1ULL << 59)
+#define IBS_FETCH_RAND_EN (1ULL << 57)
+#define IBS_FETCH_VAL (1ULL << 49)
+#define IBS_FETCH_ENABLE (1ULL << 48)
+#define IBS_FETCH_CNT 0xFFFF0000ULL
+#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
+
+#define IBS_FETCH_2_DIS (1ULL << 0)
+#define IBS_FETCH_2_FETCHLAT_FILTER (0xFULL << 1)
+#define IBS_FETCH_2_FETCHLAT_FILTER_SHIFT (1)
+#define IBS_FETCH_2_EXCL_RIP_63_EQ_1 (1ULL << 5)
+#define IBS_FETCH_2_EXCL_RIP_63_EQ_0 (1ULL << 6)
/*
* IBS op bits/masks
* The lower 7 bits of the current count are random bits
* preloaded by hardware and ignored in software
*/
-#define IBS_OP_LDLAT_EN (1ULL<<63)
-#define IBS_OP_LDLAT_THRSH (0xFULL<<59)
-#define IBS_OP_CUR_CNT (0xFFF80ULL<<32)
-#define IBS_OP_CUR_CNT_RAND (0x0007FULL<<32)
-#define IBS_OP_CUR_CNT_EXT_MASK (0x7FULL<<52)
-#define IBS_OP_CNT_CTL (1ULL<<19)
-#define IBS_OP_VAL (1ULL<<18)
-#define IBS_OP_ENABLE (1ULL<<17)
-#define IBS_OP_L3MISSONLY (1ULL<<16)
-#define IBS_OP_MAX_CNT 0x0000FFFFULL
-#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
-#define IBS_OP_MAX_CNT_EXT_MASK (0x7FULL<<20) /* separate upper 7 bits */
-#define IBS_RIP_INVALID (1ULL<<38)
+#define IBS_OP_LDLAT_EN (1ULL << 63)
+#define IBS_OP_LDLAT_THRSH (0xFULL << 59)
+#define IBS_OP_LDLAT_THRSH_SHIFT (59)
+#define IBS_OP_CUR_CNT (0xFFF80ULL << 32)
+#define IBS_OP_CUR_CNT_RAND (0x0007FULL << 32)
+#define IBS_OP_CUR_CNT_EXT_MASK (0x7FULL << 52)
+#define IBS_OP_CNT_CTL (1ULL << 19)
+#define IBS_OP_VAL (1ULL << 18)
+#define IBS_OP_ENABLE (1ULL << 17)
+#define IBS_OP_L3MISSONLY (1ULL << 16)
+#define IBS_OP_MAX_CNT 0x0000FFFFULL
+#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
+#define IBS_OP_MAX_CNT_EXT_MASK (0x7FULL << 20) /* separate upper 7 bits */
+#define IBS_RIP_INVALID (1ULL << 38)
+
+#define IBS_OP_2_DIS (1ULL << 0)
+#define IBS_OP_2_EXCL_RIP_63_EQ_0 (1ULL << 1)
+#define IBS_OP_2_EXCL_RIP_63_EQ_1 (1ULL << 2)
+#define IBS_OP_2_STRM_ST_FILTER (1ULL << 3)
+#define IBS_OP_2_STRM_ST_FILTER_SHIFT (3)
#ifdef CONFIG_X86_LOCAL_APIC
extern u32 get_ibs_caps(void);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1662c5a8f445..13e3e9a054cb 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -47,14 +47,6 @@ void ptdump_walk_user_pgd_level_checkwx(void);
#define debug_checkwx_user() do { } while (0)
#endif
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
- __visible;
-#define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page))
-
extern spinlock_t pgd_lock;
extern struct list_head pgd_list;
@@ -1240,12 +1232,12 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma,
pte_t entry, int dirty);
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-extern int ptep_test_and_clear_young(struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep);
+bool ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-extern int ptep_clear_flush_young(struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep);
+bool ptep_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep);
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
@@ -1303,14 +1295,14 @@ extern int pudp_set_access_flags(struct vm_area_struct *vma,
pud_t entry, int dirty);
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
-extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
- unsigned long addr, pmd_t *pmdp);
-extern int pudp_test_and_clear_young(struct vm_area_struct *vma,
- unsigned long addr, pud_t *pudp);
+bool pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp);
+bool pudp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pudp);
#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
-extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp);
+bool pmdp_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index f06e5d6a2747..ce45882ccd07 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -19,10 +19,8 @@
extern p4d_t level4_kernel_pgt[512];
extern p4d_t level4_ident_pgt[512];
extern pud_t level3_kernel_pgt[512];
-extern pud_t level3_ident_pgt[512];
extern pmd_t level2_kernel_pgt[512];
extern pmd_t level2_fixmap_pgt[512];
-extern pmd_t level2_ident_pgt[512];
extern pte_t level1_fixmap_pgt[512 * FIXMAP_PMD_NUM];
extern pgd_t init_top_pgt[];
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index 2e6c04d8a45b..06ed2cd2592e 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -9,8 +9,7 @@
*/
#define arch_max_pkey() (cpu_feature_enabled(X86_FEATURE_OSPKE) ? 16 : 1)
-extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
- unsigned long init_val);
+extern int arch_set_user_pkey_access(int pkey, unsigned long init_val);
static inline bool arch_pkeys_enabled(void)
{
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index a24c7805acdb..10b5355b323e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -140,6 +140,11 @@ struct cpuinfo_x86 {
__u32 x86_vfm;
};
__u8 x86_stepping;
+ union {
+ // MSR_IA32_PLATFORM_ID[52-50]
+ __u8 intel_platform_id;
+ __u8 amd_unused;
+ };
#ifdef CONFIG_X86_64
/* Number of 4K pages in DTLB/ITLB combined(in pages): */
int x86_tlbsize;
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index ecd58ea9a837..a671a1145906 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -25,17 +25,6 @@ void __noreturn machine_real_restart(unsigned int type);
#define MRR_BIOS 0
#define MRR_APM 1
-typedef void (cpu_emergency_virt_cb)(void);
-#if IS_ENABLED(CONFIG_KVM_X86)
-void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback);
-void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback);
-void cpu_emergency_disable_virtualization(void);
-#else
-static inline void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback) {}
-static inline void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) {}
-static inline void cpu_emergency_disable_virtualization(void) {}
-#endif /* CONFIG_KVM_X86 */
-
typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
void nmi_shootdown_cpus(nmi_shootdown_cb callback);
void run_crash_ipi_callback(struct pt_regs *regs);
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 9f5be2bbd291..dbd90fede5e7 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -302,19 +302,17 @@ extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_I
* failure to fully clear the cached descriptor is only observable for
* FS and GS.
*/
-#define __loadsegment_simple(seg, value) \
-do { \
- unsigned short __val = (value); \
- \
- asm volatile(" \n" \
- "1: movl %k0,%%" #seg " \n" \
+#define LOAD_SEGMENT(seg) \
+static inline void __loadsegment_##seg(u16 value) \
+{ \
+ asm volatile("1: movl %k0,%%" #seg "\n" \
_ASM_EXTABLE_TYPE_REG(1b, 1b, EX_TYPE_ZERO_REG, %k0)\
- : "+r" (__val) : : "memory"); \
-} while (0)
+ : "+r" (value) : : "memory"); \
+}
-#define __loadsegment_ss(value) __loadsegment_simple(ss, (value))
-#define __loadsegment_ds(value) __loadsegment_simple(ds, (value))
-#define __loadsegment_es(value) __loadsegment_simple(es, (value))
+LOAD_SEGMENT(ss)
+LOAD_SEGMENT(ds)
+LOAD_SEGMENT(es)
#ifdef CONFIG_X86_32
@@ -322,33 +320,48 @@ do { \
* On 32-bit systems, the hidden parts of FS and GS are unobservable if
* the selector is NULL, so there's no funny business here.
*/
-#define __loadsegment_fs(value) __loadsegment_simple(fs, (value))
-#define __loadsegment_gs(value) __loadsegment_simple(gs, (value))
+LOAD_SEGMENT(fs)
+LOAD_SEGMENT(gs)
#else
-static inline void __loadsegment_fs(unsigned short value)
+static inline void __loadsegment_fs(u16 value)
{
- asm volatile(" \n"
- "1: movw %0, %%fs \n"
- "2: \n"
-
+ asm volatile("1: movw %0, %%fs\n"
+ "2:\n"
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_CLEAR_FS)
-
- : : "rm" (value) : "memory");
+ : : ASM_INPUT_RM (value) : "memory");
}
/* __loadsegment_gs is intentionally undefined. Use load_gs_index instead. */
#endif
-#define loadsegment(seg, value) __loadsegment_ ## seg (value)
+#undef LOAD_SEGMENT
+
+#define loadsegment(seg, val) __loadsegment_##seg(val)
/*
* Save a segment register away:
*/
-#define savesegment(seg, value) \
- asm("movl %%" #seg ",%k0" : "=r" (value) : : "memory")
+#define SAVE_SEGMENT(seg) \
+static inline unsigned long __savesegment_##seg(void) \
+{ \
+ unsigned long v; \
+ asm volatile("movl %%" #seg ",%k0" : "=r" (v)); \
+ return v; \
+}
+
+SAVE_SEGMENT(cs)
+SAVE_SEGMENT(ss)
+SAVE_SEGMENT(ds)
+SAVE_SEGMENT(es)
+SAVE_SEGMENT(fs)
+SAVE_SEGMENT(gs)
+
+#undef SAVE_SEGMENT
+
+#define savesegment(seg, var) ((var) = __savesegment_##seg())
#endif /* !__ASSEMBLER__ */
#endif /* __KERNEL__ */
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 0e6c0940100f..09e605c85de4 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -661,6 +661,8 @@ static inline void snp_leak_pages(u64 pfn, unsigned int pages)
{
__snp_leak_pages(pfn, pages, true);
}
+void snp_prepare(void);
+void snp_shutdown(void);
#else
static inline bool snp_probe_rmptable_info(void) { return false; }
static inline int snp_rmptable_init(void) { return -ENOSYS; }
@@ -677,6 +679,8 @@ static inline void __snp_leak_pages(u64 pfn, unsigned int npages, bool dump_rmp)
static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
static inline void kdump_sev_callback(void) { }
static inline void snp_fixup_e820_tables(void) {}
+static inline void snp_prepare(void) {}
+static inline void snp_shutdown(void) {}
#endif
#endif
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index 8bc074c8d7c6..049638e3da74 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -20,31 +20,31 @@
#define TDG_VM_RD 7
#define TDG_VM_WR 8
-/* TDX attributes */
-#define TDX_ATTR_DEBUG_BIT 0
-#define TDX_ATTR_DEBUG BIT_ULL(TDX_ATTR_DEBUG_BIT)
-#define TDX_ATTR_HGS_PLUS_PROF_BIT 4
-#define TDX_ATTR_HGS_PLUS_PROF BIT_ULL(TDX_ATTR_HGS_PLUS_PROF_BIT)
-#define TDX_ATTR_PERF_PROF_BIT 5
-#define TDX_ATTR_PERF_PROF BIT_ULL(TDX_ATTR_PERF_PROF_BIT)
-#define TDX_ATTR_PMT_PROF_BIT 6
-#define TDX_ATTR_PMT_PROF BIT_ULL(TDX_ATTR_PMT_PROF_BIT)
-#define TDX_ATTR_ICSSD_BIT 16
-#define TDX_ATTR_ICSSD BIT_ULL(TDX_ATTR_ICSSD_BIT)
-#define TDX_ATTR_LASS_BIT 27
-#define TDX_ATTR_LASS BIT_ULL(TDX_ATTR_LASS_BIT)
-#define TDX_ATTR_SEPT_VE_DISABLE_BIT 28
-#define TDX_ATTR_SEPT_VE_DISABLE BIT_ULL(TDX_ATTR_SEPT_VE_DISABLE_BIT)
-#define TDX_ATTR_MIGRTABLE_BIT 29
-#define TDX_ATTR_MIGRTABLE BIT_ULL(TDX_ATTR_MIGRTABLE_BIT)
-#define TDX_ATTR_PKS_BIT 30
-#define TDX_ATTR_PKS BIT_ULL(TDX_ATTR_PKS_BIT)
-#define TDX_ATTR_KL_BIT 31
-#define TDX_ATTR_KL BIT_ULL(TDX_ATTR_KL_BIT)
-#define TDX_ATTR_TPA_BIT 62
-#define TDX_ATTR_TPA BIT_ULL(TDX_ATTR_TPA_BIT)
-#define TDX_ATTR_PERFMON_BIT 63
-#define TDX_ATTR_PERFMON BIT_ULL(TDX_ATTR_PERFMON_BIT)
+/* TDX TD attributes */
+#define TDX_TD_ATTR_DEBUG_BIT 0
+#define TDX_TD_ATTR_DEBUG BIT_ULL(TDX_TD_ATTR_DEBUG_BIT)
+#define TDX_TD_ATTR_HGS_PLUS_PROF_BIT 4
+#define TDX_TD_ATTR_HGS_PLUS_PROF BIT_ULL(TDX_TD_ATTR_HGS_PLUS_PROF_BIT)
+#define TDX_TD_ATTR_PERF_PROF_BIT 5
+#define TDX_TD_ATTR_PERF_PROF BIT_ULL(TDX_TD_ATTR_PERF_PROF_BIT)
+#define TDX_TD_ATTR_PMT_PROF_BIT 6
+#define TDX_TD_ATTR_PMT_PROF BIT_ULL(TDX_TD_ATTR_PMT_PROF_BIT)
+#define TDX_TD_ATTR_ICSSD_BIT 16
+#define TDX_TD_ATTR_ICSSD BIT_ULL(TDX_TD_ATTR_ICSSD_BIT)
+#define TDX_TD_ATTR_LASS_BIT 27
+#define TDX_TD_ATTR_LASS BIT_ULL(TDX_TD_ATTR_LASS_BIT)
+#define TDX_TD_ATTR_SEPT_VE_DISABLE_BIT 28
+#define TDX_TD_ATTR_SEPT_VE_DISABLE BIT_ULL(TDX_TD_ATTR_SEPT_VE_DISABLE_BIT)
+#define TDX_TD_ATTR_MIGRATABLE_BIT 29
+#define TDX_TD_ATTR_MIGRATABLE BIT_ULL(TDX_TD_ATTR_MIGRATABLE_BIT)
+#define TDX_TD_ATTR_PKS_BIT 30
+#define TDX_TD_ATTR_PKS BIT_ULL(TDX_TD_ATTR_PKS_BIT)
+#define TDX_TD_ATTR_KL_BIT 31
+#define TDX_TD_ATTR_KL BIT_ULL(TDX_TD_ATTR_KL_BIT)
+#define TDX_TD_ATTR_TPA_BIT 62
+#define TDX_TD_ATTR_TPA BIT_ULL(TDX_TD_ATTR_TPA_BIT)
+#define TDX_TD_ATTR_PERFMON_BIT 63
+#define TDX_TD_ATTR_PERFMON BIT_ULL(TDX_TD_ATTR_PERFMON_BIT)
/* TDX TD-Scope Metadata. To be used by TDG.VM.WR and TDG.VM.RD */
#define TDCS_CONFIG_FLAGS 0x1110000300000016
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 84951572ab81..05d1d479b4cf 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -130,7 +130,6 @@ __visible void smp_call_function_interrupt(struct pt_regs *regs);
__visible void smp_call_function_single_interrupt(struct pt_regs *r);
#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
-#define cpu_acpi_id(cpu) per_cpu(x86_cpu_to_acpiid, cpu)
/*
* This function is needed by all SMP systems. It must _always_ be valid
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index edde36097ddc..bcfeb5e7c0ed 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -142,13 +142,13 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u64 exit_info_2;
u32 exit_int_info;
u32 exit_int_info_err;
- u64 nested_ctl;
+ u64 misc_ctl;
u64 avic_vapic_bar;
u64 ghcb_gpa;
u32 event_inj;
u32 event_inj_err;
u64 nested_cr3;
- u64 virt_ext;
+ u64 misc_ctl2;
u32 clean;
u32 reserved_5;
u64 next_rip;
@@ -182,6 +182,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define TLB_CONTROL_FLUSH_ASID 3
#define TLB_CONTROL_FLUSH_ASID_LOCAL 7
+#define TLB_CONTROL_MASK GENMASK(2, 0)
+
#define ERAP_CONTROL_ALLOW_LARGER_RAP BIT(0)
#define ERAP_CONTROL_CLEAR_RAP BIT(1)
@@ -222,8 +224,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define X2APIC_MODE_SHIFT 30
#define X2APIC_MODE_MASK (1 << X2APIC_MODE_SHIFT)
-#define LBR_CTL_ENABLE_MASK BIT_ULL(0)
-#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
+#define SVM_INT_VECTOR_MASK GENMASK(7, 0)
#define SVM_INTERRUPT_SHADOW_MASK BIT_ULL(0)
#define SVM_GUEST_INTERRUPT_MASK BIT_ULL(1)
@@ -239,10 +240,12 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
-#define SVM_NESTED_CTL_NP_ENABLE BIT(0)
-#define SVM_NESTED_CTL_SEV_ENABLE BIT(1)
-#define SVM_NESTED_CTL_SEV_ES_ENABLE BIT(2)
+#define SVM_MISC_ENABLE_NP BIT(0)
+#define SVM_MISC_ENABLE_SEV BIT(1)
+#define SVM_MISC_ENABLE_SEV_ES BIT(2)
+#define SVM_MISC2_ENABLE_V_LBR BIT_ULL(0)
+#define SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE BIT_ULL(1)
#define SVM_TSC_RATIO_RSVD 0xffffff0000000000ULL
#define SVM_TSC_RATIO_MIN 0x0000000000000001ULL
@@ -636,6 +639,9 @@ static inline void __unused_size_checks(void)
#define SVM_EVTINJ_VALID (1 << 31)
#define SVM_EVTINJ_VALID_ERR (1 << 11)
+#define SVM_EVTINJ_RESERVED_BITS ~(SVM_EVTINJ_VEC_MASK | SVM_EVTINJ_TYPE_MASK | \
+ SVM_EVTINJ_VALID_ERR | SVM_EVTINJ_VALID)
+
#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 6b338d7f01b7..a149740b24e8 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -145,8 +145,6 @@ static __always_inline u64 sc_retry(sc_func_t func, u64 fn,
#define seamcall(_fn, _args) sc_retry(__seamcall, (_fn), (_args))
#define seamcall_ret(_fn, _args) sc_retry(__seamcall_ret, (_fn), (_args))
#define seamcall_saved_ret(_fn, _args) sc_retry(__seamcall_saved_ret, (_fn), (_args))
-int tdx_cpu_enable(void);
-int tdx_enable(void);
const char *tdx_dump_mce_info(struct mce *m);
const struct tdx_sys_info *tdx_get_sysinfo(void);
@@ -223,8 +221,6 @@ u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td);
u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page);
#else
static inline void tdx_init(void) { }
-static inline int tdx_cpu_enable(void) { return -ENODEV; }
-static inline int tdx_enable(void) { return -ENODEV; }
static inline u32 tdx_get_nr_guest_keyids(void) { return 0; }
static inline const char *tdx_dump_mce_info(struct mce *m) { return NULL; }
static inline const struct tdx_sys_info *tdx_get_sysinfo(void) { return NULL; }
diff --git a/arch/x86/include/asm/tdx_global_metadata.h b/arch/x86/include/asm/tdx_global_metadata.h
index 060a2ad744bf..40689c8dc67e 100644
--- a/arch/x86/include/asm/tdx_global_metadata.h
+++ b/arch/x86/include/asm/tdx_global_metadata.h
@@ -5,6 +5,12 @@
#include <linux/types.h>
+struct tdx_sys_info_version {
+ u16 minor_version;
+ u16 major_version;
+ u16 update_version;
+};
+
struct tdx_sys_info_features {
u64 tdx_features0;
};
@@ -35,6 +41,7 @@ struct tdx_sys_info_td_conf {
};
struct tdx_sys_info {
+ struct tdx_sys_info_version version;
struct tdx_sys_info_features features;
struct tdx_sys_info_tdmr tdmr;
struct tdx_sys_info_td_ctrl td_ctrl;
diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h
index f360104ed172..459780c3ed1f 100644
--- a/arch/x86/include/asm/time.h
+++ b/arch/x86/include/asm/time.h
@@ -7,7 +7,6 @@
extern void hpet_time_init(void);
extern bool pit_timer_init(void);
-extern bool tsc_clocksource_watchdog_disabled(void);
extern struct clock_event_device *global_clock_event;
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 5a3cdc439e38..0545fe75c3fa 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -172,6 +172,28 @@ struct tlb_state_shared {
};
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared);
+/*
+ * Please ignore the name of this function. It should be called
+ * switch_to_kernel_thread().
+ *
+ * enter_lazy_tlb() is a hint from the scheduler that we are entering a
+ * kernel thread or other context without an mm. Acceptable implementations
+ * include doing nothing whatsoever, switching to init_mm, or various clever
+ * lazy tricks to try to minimize TLB flushes.
+ *
+ * The scheduler reserves the right to call enter_lazy_tlb() several times
+ * in a row. It will notify us that we're going back to a real mm by
+ * calling switch_mm_irqs_off().
+ */
+#define enter_lazy_tlb enter_lazy_tlb
+static __always_inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+ if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
+ return;
+
+ this_cpu_write(cpu_tlbstate_shared.is_lazy, true);
+}
+
bool nmi_uaccess_okay(void);
#define nmi_uaccess_okay nmi_uaccess_okay
@@ -480,6 +502,10 @@ static inline void cpu_tlbstate_update_lam(unsigned long lam, u64 untag_mask)
{
}
#endif
+#else /* !MODULE */
+#define enter_lazy_tlb enter_lazy_tlb
+extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+ __compiletime_error("enter_lazy_tlb() should not be used in modules");
#endif /* !MODULE */
static inline void __native_tlb_flush_global(unsigned long cr4)
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 1fadf0cf520c..0ba9bdb99871 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -155,6 +155,7 @@ extern unsigned int __max_logical_packages;
extern unsigned int __max_threads_per_core;
extern unsigned int __num_threads_per_package;
extern unsigned int __num_cores_per_package;
+extern unsigned int __num_nodes_per_package;
const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c);
enum x86_topology_cpu_type get_topology_cpu_type(struct cpuinfo_x86 *c);
@@ -179,6 +180,11 @@ static inline unsigned int topology_num_threads_per_package(void)
return __num_threads_per_package;
}
+static inline unsigned int topology_num_nodes_per_package(void)
+{
+ return __num_nodes_per_package;
+}
+
#ifdef CONFIG_X86_LOCAL_APIC
int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level);
#else
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 367297b188c3..3a0dd3c2b233 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -507,7 +507,7 @@ extern struct movsl_mask {
} ____cacheline_aligned_in_smp movsl_mask;
#endif
-#define ARCH_HAS_NOCACHE_UACCESS 1
+#define ARCH_HAS_NONTEMPORAL_UACCESS 1
/*
* The "unsafe" user accesses aren't really "unsafe", but the naming
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 40379a1adbb8..fff19e73ccb3 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -26,13 +26,7 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n)
return __copy_user_ll(to, (__force const void *)from, n);
}
-static __always_inline unsigned long
-__copy_from_user_inatomic_nocache(void *to, const void __user *from,
- unsigned long n)
-{
- return __copy_from_user_ll_nocache_nozero(to, from, n);
-}
-
+unsigned long __must_check copy_from_user_inatomic_nontemporal(void *, const void __user *, unsigned long n);
unsigned long __must_check clear_user(void __user *mem, unsigned long len);
unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 915124011c27..20de34cc9aa6 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -147,26 +147,28 @@ raw_copy_to_user(void __user *dst, const void *src, unsigned long size)
return copy_user_generic((__force void *)dst, src, size);
}
-extern long __copy_user_nocache(void *dst, const void __user *src, unsigned size);
-extern long __copy_user_flushcache(void *dst, const void __user *src, unsigned size);
+#define copy_to_nontemporal copy_to_nontemporal
+extern size_t copy_to_nontemporal(void *dst, const void *src, size_t size);
+extern size_t copy_user_flushcache(void *dst, const void __user *src, size_t size);
static inline int
-__copy_from_user_inatomic_nocache(void *dst, const void __user *src,
+copy_from_user_inatomic_nontemporal(void *dst, const void __user *src,
unsigned size)
{
long ret;
kasan_check_write(dst, size);
+ src = mask_user_address(src);
stac();
- ret = __copy_user_nocache(dst, src, size);
+ ret = copy_to_nontemporal(dst, (__force const void *)src, size);
clac();
return ret;
}
-static inline int
-__copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
+static inline size_t
+copy_from_user_flushcache(void *dst, const void __user *src, size_t size)
{
kasan_check_write(dst, size);
- return __copy_user_flushcache(dst, src, size);
+ return copy_user_flushcache(dst, src, size);
}
/*
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index e8afbe9faa5b..f2d49212ae90 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -18,7 +18,6 @@ struct vdso_image {
unsigned long extable_base, extable_len;
const void *extable;
- long sym_VDSO32_NOTE_MASK;
long sym___kernel_sigreturn;
long sym___kernel_rt_sigreturn;
long sym___kernel_vsyscall;
diff --git a/arch/x86/include/asm/vermagic.h b/arch/x86/include/asm/vermagic.h
index 5d471253c755..eda233a90ea8 100644
--- a/arch/x86/include/asm/vermagic.h
+++ b/arch/x86/include/asm/vermagic.h
@@ -5,10 +5,6 @@
#ifdef CONFIG_X86_64
/* X86_64 does not define MODULE_PROC_FAMILY */
-#elif defined CONFIG_M486SX
-#define MODULE_PROC_FAMILY "486SX "
-#elif defined CONFIG_M486
-#define MODULE_PROC_FAMILY "486 "
#elif defined CONFIG_M586
#define MODULE_PROC_FAMILY "586 "
#elif defined CONFIG_M586TSC
@@ -31,8 +27,6 @@
#define MODULE_PROC_FAMILY "K6 "
#elif defined CONFIG_MK7
#define MODULE_PROC_FAMILY "K7 "
-#elif defined CONFIG_MELAN
-#define MODULE_PROC_FAMILY "ELAN "
#elif defined CONFIG_MCRUSOE
#define MODULE_PROC_FAMILY "CRUSOE "
#elif defined CONFIG_MEFFICEON
diff --git a/arch/x86/include/asm/virt.h b/arch/x86/include/asm/virt.h
new file mode 100644
index 000000000000..1558a0673d06
--- /dev/null
+++ b/arch/x86/include/asm/virt.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_X86_VIRT_H
+#define _ASM_X86_VIRT_H
+
+#include <asm/reboot.h>
+
+typedef void (cpu_emergency_virt_cb)(void);
+
+#if IS_ENABLED(CONFIG_KVM_X86)
+extern bool virt_rebooting;
+
+void __init x86_virt_init(void);
+
+int x86_virt_get_ref(int feat);
+void x86_virt_put_ref(int feat);
+
+int x86_virt_emergency_disable_virtualization_cpu(void);
+
+void x86_virt_register_emergency_callback(cpu_emergency_virt_cb *callback);
+void x86_virt_unregister_emergency_callback(cpu_emergency_virt_cb *callback);
+#else
+static __always_inline void x86_virt_init(void) {}
+static inline int x86_virt_emergency_disable_virtualization_cpu(void) { return -ENOENT; }
+#endif
+
+#endif /* _ASM_X86_VIRT_H */
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index b92ff87e3560..37080382df54 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -20,6 +20,17 @@
#include <asm/trapnr.h>
#include <asm/vmxfeatures.h>
+struct vmcs_hdr {
+ u32 revision_id:31;
+ u32 shadow_vmcs:1;
+};
+
+struct vmcs {
+ struct vmcs_hdr hdr;
+ u32 abort;
+ char data[];
+};
+
#define VMCS_CONTROL_BIT(x) BIT(VMX_FEATURE_##x & 0x1f)
/*
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index 472f0263dbc6..538053b1656a 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -14,12 +14,17 @@ extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
* Called on instruction fetch fault in vsyscall page.
* Returns true if handled.
*/
-extern bool emulate_vsyscall(unsigned long error_code,
- struct pt_regs *regs, unsigned long address);
+bool emulate_vsyscall_pf(unsigned long error_code, struct pt_regs *regs, unsigned long address);
+bool emulate_vsyscall_gp(struct pt_regs *regs);
#else
static inline void map_vsyscall(void) {}
-static inline bool emulate_vsyscall(unsigned long error_code,
- struct pt_regs *regs, unsigned long address)
+static inline bool emulate_vsyscall_pf(unsigned long error_code,
+ struct pt_regs *regs, unsigned long address)
+{
+ return false;
+}
+
+static inline bool emulate_vsyscall_gp(struct pt_regs *regs)
{
return false;
}
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
deleted file mode 100644
index 7b0307acc410..000000000000
--- a/arch/x86/include/asm/xor.h
+++ /dev/null
@@ -1,502 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _ASM_X86_XOR_H
-#define _ASM_X86_XOR_H
-
-/*
- * Optimized RAID-5 checksumming functions for SSE.
- */
-
-/*
- * Cache avoiding checksumming functions utilizing KNI instructions
- * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
- */
-
-/*
- * Based on
- * High-speed RAID5 checksumming functions utilizing SSE instructions.
- * Copyright (C) 1998 Ingo Molnar.
- */
-
-/*
- * x86-64 changes / gcc fixes from Andi Kleen.
- * Copyright 2002 Andi Kleen, SuSE Labs.
- *
- * This hasn't been optimized for the hammer yet, but there are likely
- * no advantages to be gotten from x86-64 here anyways.
- */
-
-#include <asm/fpu/api.h>
-
-#ifdef CONFIG_X86_32
-/* reduce register pressure */
-# define XOR_CONSTANT_CONSTRAINT "i"
-#else
-# define XOR_CONSTANT_CONSTRAINT "re"
-#endif
-
-#define OFFS(x) "16*("#x")"
-#define PF_OFFS(x) "256+16*("#x")"
-#define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n"
-#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n"
-#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n"
-#define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n"
-#define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n"
-#define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n"
-#define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n"
-#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n"
-#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n"
-#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n"
-#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n"
-#define NOP(x)
-
-#define BLK64(pf, op, i) \
- pf(i) \
- op(i, 0) \
- op(i + 1, 1) \
- op(i + 2, 2) \
- op(i + 3, 3)
-
-static void
-xor_sse_2(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2)
-{
- unsigned long lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- LD(i, 0) \
- LD(i + 1, 1) \
- PF1(i) \
- PF1(i + 2) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO1(i, 0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- ST(i, 0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " add %[inc], %[p1] ;\n"
- " add %[inc], %[p2] ;\n"
- " dec %[cnt] ;\n"
- " jnz 1b ;\n"
- : [cnt] "+r" (lines),
- [p1] "+r" (p1), [p2] "+r" (p2)
- : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
- : "memory");
-
- kernel_fpu_end();
-}
-
-static void
-xor_sse_2_pf64(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2)
-{
- unsigned long lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- BLK64(PF0, LD, i) \
- BLK64(PF1, XO1, i) \
- BLK64(NOP, ST, i) \
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " add %[inc], %[p1] ;\n"
- " add %[inc], %[p2] ;\n"
- " dec %[cnt] ;\n"
- " jnz 1b ;\n"
- : [cnt] "+r" (lines),
- [p1] "+r" (p1), [p2] "+r" (p2)
- : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
- : "memory");
-
- kernel_fpu_end();
-}
-
-static void
-xor_sse_3(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3)
-{
- unsigned long lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i + 2) \
- LD(i, 0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF2(i) \
- PF2(i + 2) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO1(i, 0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- XO2(i, 0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- ST(i, 0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " add %[inc], %[p1] ;\n"
- " add %[inc], %[p2] ;\n"
- " add %[inc], %[p3] ;\n"
- " dec %[cnt] ;\n"
- " jnz 1b ;\n"
- : [cnt] "+r" (lines),
- [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
- : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
- : "memory");
-
- kernel_fpu_end();
-}
-
-static void
-xor_sse_3_pf64(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3)
-{
- unsigned long lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- BLK64(PF0, LD, i) \
- BLK64(PF1, XO1, i) \
- BLK64(PF2, XO2, i) \
- BLK64(NOP, ST, i) \
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " add %[inc], %[p1] ;\n"
- " add %[inc], %[p2] ;\n"
- " add %[inc], %[p3] ;\n"
- " dec %[cnt] ;\n"
- " jnz 1b ;\n"
- : [cnt] "+r" (lines),
- [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
- : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
- : "memory");
-
- kernel_fpu_end();
-}
-
-static void
-xor_sse_4(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4)
-{
- unsigned long lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i + 2) \
- LD(i, 0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF2(i) \
- PF2(i + 2) \
- XO1(i, 0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- PF3(i) \
- PF3(i + 2) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO2(i, 0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- XO3(i, 0) \
- XO3(i + 1, 1) \
- XO3(i + 2, 2) \
- XO3(i + 3, 3) \
- ST(i, 0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " add %[inc], %[p1] ;\n"
- " add %[inc], %[p2] ;\n"
- " add %[inc], %[p3] ;\n"
- " add %[inc], %[p4] ;\n"
- " dec %[cnt] ;\n"
- " jnz 1b ;\n"
- : [cnt] "+r" (lines), [p1] "+r" (p1),
- [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
- : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
- : "memory");
-
- kernel_fpu_end();
-}
-
-static void
-xor_sse_4_pf64(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4)
-{
- unsigned long lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- BLK64(PF0, LD, i) \
- BLK64(PF1, XO1, i) \
- BLK64(PF2, XO2, i) \
- BLK64(PF3, XO3, i) \
- BLK64(NOP, ST, i) \
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " add %[inc], %[p1] ;\n"
- " add %[inc], %[p2] ;\n"
- " add %[inc], %[p3] ;\n"
- " add %[inc], %[p4] ;\n"
- " dec %[cnt] ;\n"
- " jnz 1b ;\n"
- : [cnt] "+r" (lines), [p1] "+r" (p1),
- [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
- : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
- : "memory");
-
- kernel_fpu_end();
-}
-
-static void
-xor_sse_5(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4,
- const unsigned long * __restrict p5)
-{
- unsigned long lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i + 2) \
- LD(i, 0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF2(i) \
- PF2(i + 2) \
- XO1(i, 0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- PF3(i) \
- PF3(i + 2) \
- XO2(i, 0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- PF4(i) \
- PF4(i + 2) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO3(i, 0) \
- XO3(i + 1, 1) \
- XO3(i + 2, 2) \
- XO3(i + 3, 3) \
- XO4(i, 0) \
- XO4(i + 1, 1) \
- XO4(i + 2, 2) \
- XO4(i + 3, 3) \
- ST(i, 0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " add %[inc], %[p1] ;\n"
- " add %[inc], %[p2] ;\n"
- " add %[inc], %[p3] ;\n"
- " add %[inc], %[p4] ;\n"
- " add %[inc], %[p5] ;\n"
- " dec %[cnt] ;\n"
- " jnz 1b ;\n"
- : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2),
- [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
- : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
- : "memory");
-
- kernel_fpu_end();
-}
-
-static void
-xor_sse_5_pf64(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4,
- const unsigned long * __restrict p5)
-{
- unsigned long lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- BLK64(PF0, LD, i) \
- BLK64(PF1, XO1, i) \
- BLK64(PF2, XO2, i) \
- BLK64(PF3, XO3, i) \
- BLK64(PF4, XO4, i) \
- BLK64(NOP, ST, i) \
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " add %[inc], %[p1] ;\n"
- " add %[inc], %[p2] ;\n"
- " add %[inc], %[p3] ;\n"
- " add %[inc], %[p4] ;\n"
- " add %[inc], %[p5] ;\n"
- " dec %[cnt] ;\n"
- " jnz 1b ;\n"
- : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2),
- [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
- : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
- : "memory");
-
- kernel_fpu_end();
-}
-
-static struct xor_block_template xor_block_sse_pf64 = {
- .name = "prefetch64-sse",
- .do_2 = xor_sse_2_pf64,
- .do_3 = xor_sse_3_pf64,
- .do_4 = xor_sse_4_pf64,
- .do_5 = xor_sse_5_pf64,
-};
-
-#undef LD
-#undef XO1
-#undef XO2
-#undef XO3
-#undef XO4
-#undef ST
-#undef NOP
-#undef BLK64
-#undef BLOCK
-
-#undef XOR_CONSTANT_CONSTRAINT
-
-#ifdef CONFIG_X86_32
-# include <asm/xor_32.h>
-#else
-# include <asm/xor_64.h>
-#endif
-
-#define XOR_SELECT_TEMPLATE(FASTEST) \
- AVX_SELECT(FASTEST)
-
-#endif /* _ASM_X86_XOR_H */
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
deleted file mode 100644
index 7a6b9474591e..000000000000
--- a/arch/x86/include/asm/xor_32.h
+++ /dev/null
@@ -1,573 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _ASM_X86_XOR_32_H
-#define _ASM_X86_XOR_32_H
-
-/*
- * Optimized RAID-5 checksumming functions for MMX.
- */
-
-/*
- * High-speed RAID5 checksumming functions utilizing MMX instructions.
- * Copyright (C) 1998 Ingo Molnar.
- */
-
-#define LD(x, y) " movq 8*("#x")(%1), %%mm"#y" ;\n"
-#define ST(x, y) " movq %%mm"#y", 8*("#x")(%1) ;\n"
-#define XO1(x, y) " pxor 8*("#x")(%2), %%mm"#y" ;\n"
-#define XO2(x, y) " pxor 8*("#x")(%3), %%mm"#y" ;\n"
-#define XO3(x, y) " pxor 8*("#x")(%4), %%mm"#y" ;\n"
-#define XO4(x, y) " pxor 8*("#x")(%5), %%mm"#y" ;\n"
-
-#include <asm/fpu/api.h>
-
-static void
-xor_pII_mmx_2(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2)
-{
- unsigned long lines = bytes >> 7;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- LD(i, 0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- XO1(i, 0) \
- ST(i, 0) \
- XO1(i+1, 1) \
- ST(i+1, 1) \
- XO1(i + 2, 2) \
- ST(i + 2, 2) \
- XO1(i + 3, 3) \
- ST(i + 3, 3)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $128, %1 ;\n"
- " addl $128, %2 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2)
- :
- : "memory");
-
- kernel_fpu_end();
-}
-
-static void
-xor_pII_mmx_3(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3)
-{
- unsigned long lines = bytes >> 7;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- LD(i, 0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- XO1(i, 0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- XO2(i, 0) \
- ST(i, 0) \
- XO2(i + 1, 1) \
- ST(i + 1, 1) \
- XO2(i + 2, 2) \
- ST(i + 2, 2) \
- XO2(i + 3, 3) \
- ST(i + 3, 3)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $128, %1 ;\n"
- " addl $128, %2 ;\n"
- " addl $128, %3 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2), "+r" (p3)
- :
- : "memory");
-
- kernel_fpu_end();
-}
-
-static void
-xor_pII_mmx_4(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4)
-{
- unsigned long lines = bytes >> 7;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- LD(i, 0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- XO1(i, 0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- XO2(i, 0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- XO3(i, 0) \
- ST(i, 0) \
- XO3(i + 1, 1) \
- ST(i + 1, 1) \
- XO3(i + 2, 2) \
- ST(i + 2, 2) \
- XO3(i + 3, 3) \
- ST(i + 3, 3)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $128, %1 ;\n"
- " addl $128, %2 ;\n"
- " addl $128, %3 ;\n"
- " addl $128, %4 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
- :
- : "memory");
-
- kernel_fpu_end();
-}
-
-
-static void
-xor_pII_mmx_5(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4,
- const unsigned long * __restrict p5)
-{
- unsigned long lines = bytes >> 7;
-
- kernel_fpu_begin();
-
- /* Make sure GCC forgets anything it knows about p4 or p5,
- such that it won't pass to the asm volatile below a
- register that is shared with any other variable. That's
- because we modify p4 and p5 there, but we can't mark them
- as read/write, otherwise we'd overflow the 10-asm-operands
- limit of GCC < 3.1. */
- asm("" : "+r" (p4), "+r" (p5));
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- LD(i, 0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- XO1(i, 0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- XO2(i, 0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- XO3(i, 0) \
- XO3(i + 1, 1) \
- XO3(i + 2, 2) \
- XO3(i + 3, 3) \
- XO4(i, 0) \
- ST(i, 0) \
- XO4(i + 1, 1) \
- ST(i + 1, 1) \
- XO4(i + 2, 2) \
- ST(i + 2, 2) \
- XO4(i + 3, 3) \
- ST(i + 3, 3)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $128, %1 ;\n"
- " addl $128, %2 ;\n"
- " addl $128, %3 ;\n"
- " addl $128, %4 ;\n"
- " addl $128, %5 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2), "+r" (p3)
- : "r" (p4), "r" (p5)
- : "memory");
-
- /* p4 and p5 were modified, and now the variables are dead.
- Clobber them just to be sure nobody does something stupid
- like assuming they have some legal value. */
- asm("" : "=r" (p4), "=r" (p5));
-
- kernel_fpu_end();
-}
-
-#undef LD
-#undef XO1
-#undef XO2
-#undef XO3
-#undef XO4
-#undef ST
-#undef BLOCK
-
-static void
-xor_p5_mmx_2(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2)
-{
- unsigned long lines = bytes >> 6;
-
- kernel_fpu_begin();
-
- asm volatile(
- " .align 32 ;\n"
- " 1: ;\n"
- " movq (%1), %%mm0 ;\n"
- " movq 8(%1), %%mm1 ;\n"
- " pxor (%2), %%mm0 ;\n"
- " movq 16(%1), %%mm2 ;\n"
- " movq %%mm0, (%1) ;\n"
- " pxor 8(%2), %%mm1 ;\n"
- " movq 24(%1), %%mm3 ;\n"
- " movq %%mm1, 8(%1) ;\n"
- " pxor 16(%2), %%mm2 ;\n"
- " movq 32(%1), %%mm4 ;\n"
- " movq %%mm2, 16(%1) ;\n"
- " pxor 24(%2), %%mm3 ;\n"
- " movq 40(%1), %%mm5 ;\n"
- " movq %%mm3, 24(%1) ;\n"
- " pxor 32(%2), %%mm4 ;\n"
- " movq 48(%1), %%mm6 ;\n"
- " movq %%mm4, 32(%1) ;\n"
- " pxor 40(%2), %%mm5 ;\n"
- " movq 56(%1), %%mm7 ;\n"
- " movq %%mm5, 40(%1) ;\n"
- " pxor 48(%2), %%mm6 ;\n"
- " pxor 56(%2), %%mm7 ;\n"
- " movq %%mm6, 48(%1) ;\n"
- " movq %%mm7, 56(%1) ;\n"
-
- " addl $64, %1 ;\n"
- " addl $64, %2 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2)
- :
- : "memory");
-
- kernel_fpu_end();
-}
-
-static void
-xor_p5_mmx_3(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3)
-{
- unsigned long lines = bytes >> 6;
-
- kernel_fpu_begin();
-
- asm volatile(
- " .align 32,0x90 ;\n"
- " 1: ;\n"
- " movq (%1), %%mm0 ;\n"
- " movq 8(%1), %%mm1 ;\n"
- " pxor (%2), %%mm0 ;\n"
- " movq 16(%1), %%mm2 ;\n"
- " pxor 8(%2), %%mm1 ;\n"
- " pxor (%3), %%mm0 ;\n"
- " pxor 16(%2), %%mm2 ;\n"
- " movq %%mm0, (%1) ;\n"
- " pxor 8(%3), %%mm1 ;\n"
- " pxor 16(%3), %%mm2 ;\n"
- " movq 24(%1), %%mm3 ;\n"
- " movq %%mm1, 8(%1) ;\n"
- " movq 32(%1), %%mm4 ;\n"
- " movq 40(%1), %%mm5 ;\n"
- " pxor 24(%2), %%mm3 ;\n"
- " movq %%mm2, 16(%1) ;\n"
- " pxor 32(%2), %%mm4 ;\n"
- " pxor 24(%3), %%mm3 ;\n"
- " pxor 40(%2), %%mm5 ;\n"
- " movq %%mm3, 24(%1) ;\n"
- " pxor 32(%3), %%mm4 ;\n"
- " pxor 40(%3), %%mm5 ;\n"
- " movq 48(%1), %%mm6 ;\n"
- " movq %%mm4, 32(%1) ;\n"
- " movq 56(%1), %%mm7 ;\n"
- " pxor 48(%2), %%mm6 ;\n"
- " movq %%mm5, 40(%1) ;\n"
- " pxor 56(%2), %%mm7 ;\n"
- " pxor 48(%3), %%mm6 ;\n"
- " pxor 56(%3), %%mm7 ;\n"
- " movq %%mm6, 48(%1) ;\n"
- " movq %%mm7, 56(%1) ;\n"
-
- " addl $64, %1 ;\n"
- " addl $64, %2 ;\n"
- " addl $64, %3 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2), "+r" (p3)
- :
- : "memory" );
-
- kernel_fpu_end();
-}
-
-static void
-xor_p5_mmx_4(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4)
-{
- unsigned long lines = bytes >> 6;
-
- kernel_fpu_begin();
-
- asm volatile(
- " .align 32,0x90 ;\n"
- " 1: ;\n"
- " movq (%1), %%mm0 ;\n"
- " movq 8(%1), %%mm1 ;\n"
- " pxor (%2), %%mm0 ;\n"
- " movq 16(%1), %%mm2 ;\n"
- " pxor 8(%2), %%mm1 ;\n"
- " pxor (%3), %%mm0 ;\n"
- " pxor 16(%2), %%mm2 ;\n"
- " pxor 8(%3), %%mm1 ;\n"
- " pxor (%4), %%mm0 ;\n"
- " movq 24(%1), %%mm3 ;\n"
- " pxor 16(%3), %%mm2 ;\n"
- " pxor 8(%4), %%mm1 ;\n"
- " movq %%mm0, (%1) ;\n"
- " movq 32(%1), %%mm4 ;\n"
- " pxor 24(%2), %%mm3 ;\n"
- " pxor 16(%4), %%mm2 ;\n"
- " movq %%mm1, 8(%1) ;\n"
- " movq 40(%1), %%mm5 ;\n"
- " pxor 32(%2), %%mm4 ;\n"
- " pxor 24(%3), %%mm3 ;\n"
- " movq %%mm2, 16(%1) ;\n"
- " pxor 40(%2), %%mm5 ;\n"
- " pxor 32(%3), %%mm4 ;\n"
- " pxor 24(%4), %%mm3 ;\n"
- " movq %%mm3, 24(%1) ;\n"
- " movq 56(%1), %%mm7 ;\n"
- " movq 48(%1), %%mm6 ;\n"
- " pxor 40(%3), %%mm5 ;\n"
- " pxor 32(%4), %%mm4 ;\n"
- " pxor 48(%2), %%mm6 ;\n"
- " movq %%mm4, 32(%1) ;\n"
- " pxor 56(%2), %%mm7 ;\n"
- " pxor 40(%4), %%mm5 ;\n"
- " pxor 48(%3), %%mm6 ;\n"
- " pxor 56(%3), %%mm7 ;\n"
- " movq %%mm5, 40(%1) ;\n"
- " pxor 48(%4), %%mm6 ;\n"
- " pxor 56(%4), %%mm7 ;\n"
- " movq %%mm6, 48(%1) ;\n"
- " movq %%mm7, 56(%1) ;\n"
-
- " addl $64, %1 ;\n"
- " addl $64, %2 ;\n"
- " addl $64, %3 ;\n"
- " addl $64, %4 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
- :
- : "memory");
-
- kernel_fpu_end();
-}
-
-static void
-xor_p5_mmx_5(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4,
- const unsigned long * __restrict p5)
-{
- unsigned long lines = bytes >> 6;
-
- kernel_fpu_begin();
-
- /* Make sure GCC forgets anything it knows about p4 or p5,
- such that it won't pass to the asm volatile below a
- register that is shared with any other variable. That's
- because we modify p4 and p5 there, but we can't mark them
- as read/write, otherwise we'd overflow the 10-asm-operands
- limit of GCC < 3.1. */
- asm("" : "+r" (p4), "+r" (p5));
-
- asm volatile(
- " .align 32,0x90 ;\n"
- " 1: ;\n"
- " movq (%1), %%mm0 ;\n"
- " movq 8(%1), %%mm1 ;\n"
- " pxor (%2), %%mm0 ;\n"
- " pxor 8(%2), %%mm1 ;\n"
- " movq 16(%1), %%mm2 ;\n"
- " pxor (%3), %%mm0 ;\n"
- " pxor 8(%3), %%mm1 ;\n"
- " pxor 16(%2), %%mm2 ;\n"
- " pxor (%4), %%mm0 ;\n"
- " pxor 8(%4), %%mm1 ;\n"
- " pxor 16(%3), %%mm2 ;\n"
- " movq 24(%1), %%mm3 ;\n"
- " pxor (%5), %%mm0 ;\n"
- " pxor 8(%5), %%mm1 ;\n"
- " movq %%mm0, (%1) ;\n"
- " pxor 16(%4), %%mm2 ;\n"
- " pxor 24(%2), %%mm3 ;\n"
- " movq %%mm1, 8(%1) ;\n"
- " pxor 16(%5), %%mm2 ;\n"
- " pxor 24(%3), %%mm3 ;\n"
- " movq 32(%1), %%mm4 ;\n"
- " movq %%mm2, 16(%1) ;\n"
- " pxor 24(%4), %%mm3 ;\n"
- " pxor 32(%2), %%mm4 ;\n"
- " movq 40(%1), %%mm5 ;\n"
- " pxor 24(%5), %%mm3 ;\n"
- " pxor 32(%3), %%mm4 ;\n"
- " pxor 40(%2), %%mm5 ;\n"
- " movq %%mm3, 24(%1) ;\n"
- " pxor 32(%4), %%mm4 ;\n"
- " pxor 40(%3), %%mm5 ;\n"
- " movq 48(%1), %%mm6 ;\n"
- " movq 56(%1), %%mm7 ;\n"
- " pxor 32(%5), %%mm4 ;\n"
- " pxor 40(%4), %%mm5 ;\n"
- " pxor 48(%2), %%mm6 ;\n"
- " pxor 56(%2), %%mm7 ;\n"
- " movq %%mm4, 32(%1) ;\n"
- " pxor 48(%3), %%mm6 ;\n"
- " pxor 56(%3), %%mm7 ;\n"
- " pxor 40(%5), %%mm5 ;\n"
- " pxor 48(%4), %%mm6 ;\n"
- " pxor 56(%4), %%mm7 ;\n"
- " movq %%mm5, 40(%1) ;\n"
- " pxor 48(%5), %%mm6 ;\n"
- " pxor 56(%5), %%mm7 ;\n"
- " movq %%mm6, 48(%1) ;\n"
- " movq %%mm7, 56(%1) ;\n"
-
- " addl $64, %1 ;\n"
- " addl $64, %2 ;\n"
- " addl $64, %3 ;\n"
- " addl $64, %4 ;\n"
- " addl $64, %5 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2), "+r" (p3)
- : "r" (p4), "r" (p5)
- : "memory");
-
- /* p4 and p5 were modified, and now the variables are dead.
- Clobber them just to be sure nobody does something stupid
- like assuming they have some legal value. */
- asm("" : "=r" (p4), "=r" (p5));
-
- kernel_fpu_end();
-}
-
-static struct xor_block_template xor_block_pII_mmx = {
- .name = "pII_mmx",
- .do_2 = xor_pII_mmx_2,
- .do_3 = xor_pII_mmx_3,
- .do_4 = xor_pII_mmx_4,
- .do_5 = xor_pII_mmx_5,
-};
-
-static struct xor_block_template xor_block_p5_mmx = {
- .name = "p5_mmx",
- .do_2 = xor_p5_mmx_2,
- .do_3 = xor_p5_mmx_3,
- .do_4 = xor_p5_mmx_4,
- .do_5 = xor_p5_mmx_5,
-};
-
-static struct xor_block_template xor_block_pIII_sse = {
- .name = "pIII_sse",
- .do_2 = xor_sse_2,
- .do_3 = xor_sse_3,
- .do_4 = xor_sse_4,
- .do_5 = xor_sse_5,
-};
-
-/* Also try the AVX routines */
-#include <asm/xor_avx.h>
-
-/* Also try the generic routines. */
-#include <asm-generic/xor.h>
-
-/* We force the use of the SSE xor block because it can write around L2.
- We may also be able to load into the L1 only depending on how the cpu
- deals with a load to a line that is being prefetched. */
-#undef XOR_TRY_TEMPLATES
-#define XOR_TRY_TEMPLATES \
-do { \
- AVX_XOR_SPEED; \
- if (boot_cpu_has(X86_FEATURE_XMM)) { \
- xor_speed(&xor_block_pIII_sse); \
- xor_speed(&xor_block_sse_pf64); \
- } else if (boot_cpu_has(X86_FEATURE_MMX)) { \
- xor_speed(&xor_block_pII_mmx); \
- xor_speed(&xor_block_p5_mmx); \
- } else { \
- xor_speed(&xor_block_8regs); \
- xor_speed(&xor_block_8regs_p); \
- xor_speed(&xor_block_32regs); \
- xor_speed(&xor_block_32regs_p); \
- } \
-} while (0)
-
-#endif /* _ASM_X86_XOR_32_H */
diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h
deleted file mode 100644
index 0307e4ec5044..000000000000
--- a/arch/x86/include/asm/xor_64.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_XOR_64_H
-#define _ASM_X86_XOR_64_H
-
-static struct xor_block_template xor_block_sse = {
- .name = "generic_sse",
- .do_2 = xor_sse_2,
- .do_3 = xor_sse_3,
- .do_4 = xor_sse_4,
- .do_5 = xor_sse_5,
-};
-
-
-/* Also try the AVX routines */
-#include <asm/xor_avx.h>
-
-/* We force the use of the SSE xor block because it can write around L2.
- We may also be able to load into the L1 only depending on how the cpu
- deals with a load to a line that is being prefetched. */
-#undef XOR_TRY_TEMPLATES
-#define XOR_TRY_TEMPLATES \
-do { \
- AVX_XOR_SPEED; \
- xor_speed(&xor_block_sse_pf64); \
- xor_speed(&xor_block_sse); \
-} while (0)
-
-#endif /* _ASM_X86_XOR_64_H */
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
deleted file mode 100644
index 7f81dd5897f4..000000000000
--- a/arch/x86/include/asm/xor_avx.h
+++ /dev/null
@@ -1,178 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _ASM_X86_XOR_AVX_H
-#define _ASM_X86_XOR_AVX_H
-
-/*
- * Optimized RAID-5 checksumming functions for AVX
- *
- * Copyright (C) 2012 Intel Corporation
- * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
- *
- * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines
- */
-
-#include <linux/compiler.h>
-#include <asm/fpu/api.h>
-
-#define BLOCK4(i) \
- BLOCK(32 * i, 0) \
- BLOCK(32 * (i + 1), 1) \
- BLOCK(32 * (i + 2), 2) \
- BLOCK(32 * (i + 3), 3)
-
-#define BLOCK16() \
- BLOCK4(0) \
- BLOCK4(4) \
- BLOCK4(8) \
- BLOCK4(12)
-
-static void xor_avx_2(unsigned long bytes, unsigned long * __restrict p0,
- const unsigned long * __restrict p1)
-{
- unsigned long lines = bytes >> 9;
-
- kernel_fpu_begin();
-
- while (lines--) {
-#undef BLOCK
-#define BLOCK(i, reg) \
-do { \
- asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p1[i / sizeof(*p1)])); \
- asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
- "m" (p0[i / sizeof(*p0)])); \
- asm volatile("vmovdqa %%ymm" #reg ", %0" : \
- "=m" (p0[i / sizeof(*p0)])); \
-} while (0);
-
- BLOCK16()
-
- p0 = (unsigned long *)((uintptr_t)p0 + 512);
- p1 = (unsigned long *)((uintptr_t)p1 + 512);
- }
-
- kernel_fpu_end();
-}
-
-static void xor_avx_3(unsigned long bytes, unsigned long * __restrict p0,
- const unsigned long * __restrict p1,
- const unsigned long * __restrict p2)
-{
- unsigned long lines = bytes >> 9;
-
- kernel_fpu_begin();
-
- while (lines--) {
-#undef BLOCK
-#define BLOCK(i, reg) \
-do { \
- asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p2[i / sizeof(*p2)])); \
- asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
- "m" (p1[i / sizeof(*p1)])); \
- asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
- "m" (p0[i / sizeof(*p0)])); \
- asm volatile("vmovdqa %%ymm" #reg ", %0" : \
- "=m" (p0[i / sizeof(*p0)])); \
-} while (0);
-
- BLOCK16()
-
- p0 = (unsigned long *)((uintptr_t)p0 + 512);
- p1 = (unsigned long *)((uintptr_t)p1 + 512);
- p2 = (unsigned long *)((uintptr_t)p2 + 512);
- }
-
- kernel_fpu_end();
-}
-
-static void xor_avx_4(unsigned long bytes, unsigned long * __restrict p0,
- const unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3)
-{
- unsigned long lines = bytes >> 9;
-
- kernel_fpu_begin();
-
- while (lines--) {
-#undef BLOCK
-#define BLOCK(i, reg) \
-do { \
- asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p3[i / sizeof(*p3)])); \
- asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
- "m" (p2[i / sizeof(*p2)])); \
- asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
- "m" (p1[i / sizeof(*p1)])); \
- asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
- "m" (p0[i / sizeof(*p0)])); \
- asm volatile("vmovdqa %%ymm" #reg ", %0" : \
- "=m" (p0[i / sizeof(*p0)])); \
-} while (0);
-
- BLOCK16();
-
- p0 = (unsigned long *)((uintptr_t)p0 + 512);
- p1 = (unsigned long *)((uintptr_t)p1 + 512);
- p2 = (unsigned long *)((uintptr_t)p2 + 512);
- p3 = (unsigned long *)((uintptr_t)p3 + 512);
- }
-
- kernel_fpu_end();
-}
-
-static void xor_avx_5(unsigned long bytes, unsigned long * __restrict p0,
- const unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4)
-{
- unsigned long lines = bytes >> 9;
-
- kernel_fpu_begin();
-
- while (lines--) {
-#undef BLOCK
-#define BLOCK(i, reg) \
-do { \
- asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p4[i / sizeof(*p4)])); \
- asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
- "m" (p3[i / sizeof(*p3)])); \
- asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
- "m" (p2[i / sizeof(*p2)])); \
- asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
- "m" (p1[i / sizeof(*p1)])); \
- asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
- "m" (p0[i / sizeof(*p0)])); \
- asm volatile("vmovdqa %%ymm" #reg ", %0" : \
- "=m" (p0[i / sizeof(*p0)])); \
-} while (0);
-
- BLOCK16()
-
- p0 = (unsigned long *)((uintptr_t)p0 + 512);
- p1 = (unsigned long *)((uintptr_t)p1 + 512);
- p2 = (unsigned long *)((uintptr_t)p2 + 512);
- p3 = (unsigned long *)((uintptr_t)p3 + 512);
- p4 = (unsigned long *)((uintptr_t)p4 + 512);
- }
-
- kernel_fpu_end();
-}
-
-static struct xor_block_template xor_block_avx = {
- .name = "avx",
- .do_2 = xor_avx_2,
- .do_3 = xor_avx_3,
- .do_4 = xor_avx_4,
- .do_5 = xor_avx_5,
-};
-
-#define AVX_XOR_SPEED \
-do { \
- if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE)) \
- xor_speed(&xor_block_avx); \
-} while (0)
-
-#define AVX_SELECT(FASTEST) \
- (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST)
-
-#endif
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 846a63215ce1..5f2b30d0405c 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -197,13 +197,13 @@ struct kvm_msrs {
__u32 nmsrs; /* number of msrs in entries */
__u32 pad;
- struct kvm_msr_entry entries[];
+ __DECLARE_FLEX_ARRAY(struct kvm_msr_entry, entries);
};
/* for KVM_GET_MSR_INDEX_LIST */
struct kvm_msr_list {
__u32 nmsrs; /* number of msrs in entries */
- __u32 indices[];
+ __DECLARE_FLEX_ARRAY(__u32, indices);
};
/* Maximum size of any access bitmap in bytes */
@@ -245,7 +245,7 @@ struct kvm_cpuid_entry {
struct kvm_cpuid {
__u32 nent;
__u32 padding;
- struct kvm_cpuid_entry entries[];
+ __DECLARE_FLEX_ARRAY(struct kvm_cpuid_entry, entries);
};
struct kvm_cpuid_entry2 {
@@ -267,7 +267,7 @@ struct kvm_cpuid_entry2 {
struct kvm_cpuid2 {
__u32 nent;
__u32 padding;
- struct kvm_cpuid_entry2 entries[];
+ __DECLARE_FLEX_ARRAY(struct kvm_cpuid_entry2, entries);
};
/* for KVM_GET_PIT and KVM_SET_PIT */
@@ -398,7 +398,7 @@ struct kvm_xsave {
* the contents of CPUID leaf 0xD on the host.
*/
__u32 region[1024];
- __u32 extra[];
+ __DECLARE_FLEX_ARRAY(__u32, extra);
};
#define KVM_MAX_XCRS 16
@@ -476,6 +476,7 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7)
#define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8)
#define KVM_X86_QUIRK_IGNORE_GUEST_PAT (1 << 9)
+#define KVM_X86_QUIRK_VMCS12_ALLOW_FREEZE_IN_SMM (1 << 10)
#define KVM_STATE_NESTED_FORMAT_VMX 0
#define KVM_STATE_NESTED_FORMAT_SVM 1
@@ -565,7 +566,7 @@ struct kvm_pmu_event_filter {
__u32 fixed_counter_bitmap;
__u32 flags;
__u32 pad[4];
- __u64 events[];
+ __DECLARE_FLEX_ARRAY(__u64, events);
};
#define KVM_PMU_EVENT_ALLOW 0
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index e9aeeeafad17..47a32f583930 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -44,6 +44,20 @@ KCOV_INSTRUMENT_unwind_orc.o := n
KCOV_INSTRUMENT_unwind_frame.o := n
KCOV_INSTRUMENT_unwind_guess.o := n
+# Disable KCOV to prevent crashes during kexec: load_segments() invalidates
+# the GS base, which KCOV relies on for per-CPU data.
+#
+# As KCOV and KEXEC compatibility should be preserved (e.g. syzkaller is
+# using it to collect crash dumps during kernel fuzzing), disabling
+# KCOV for KEXEC kernels is not an option. Selectively disabling KCOV
+# instrumentation for individual affected functions can be fragile, while
+# adding more checks to KCOV would slow it down.
+#
+# As a compromise solution, disable KCOV instrumentation for the whole
+# source code file. If its coverage is ever needed, other approaches
+# should be considered.
+KCOV_INSTRUMENT_machine_kexec_64.o := n
+
CFLAGS_head32.o := -fno-stack-protector
CFLAGS_head64.o := -fno-stack-protector
CFLAGS_irq.o := -I $(src)/../include/asm/trace
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index a3f2fb1fea1b..ceba24f65ae3 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1848,3 +1848,23 @@ void __iomem * (*acpi_os_ioremap)(acpi_physical_address phys, acpi_size size) =
x86_acpi_os_ioremap;
EXPORT_SYMBOL_GPL(acpi_os_ioremap);
#endif
+
+int acpi_get_cpu_uid(unsigned int cpu, u32 *uid)
+{
+ u32 acpi_id;
+
+ if (cpu >= nr_cpu_ids)
+ return -EINVAL;
+
+#ifdef CONFIG_SMP
+ acpi_id = per_cpu(x86_cpu_to_acpiid, cpu);
+ if (acpi_id == CPU_ACPIID_INVALID)
+ return -ENODEV;
+#else
+ acpi_id = 0;
+#endif
+
+ *uid = acpi_id;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(acpi_get_cpu_uid);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index d93f87f29d03..639904911444 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -332,7 +332,7 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
* Since the offsets must be consistent for all cores, we keep track
* of the LVT offsets in software and reserve the offset for the same
* vector also to be used on other cores. An offset is freed by
- * setting the entry to APIC_EILVT_MASKED.
+ * setting the entry to APIC_LVT_MASKED.
*
* If the BIOS is right, there should be no conflicts. Otherwise a
* "[Firmware Bug]: ..." error message is generated. However, if
@@ -344,9 +344,9 @@ static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX];
static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new)
{
- return (old & APIC_EILVT_MASKED)
- || (new == APIC_EILVT_MASKED)
- || ((new & ~APIC_EILVT_MASKED) == old);
+ return (old & APIC_LVT_MASKED)
+ || (new == APIC_LVT_MASKED)
+ || ((new & ~APIC_LVT_MASKED) == old);
}
static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
@@ -358,13 +358,13 @@ static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
rsvd = atomic_read(&eilvt_offsets[offset]);
do {
- vector = rsvd & ~APIC_EILVT_MASKED; /* 0: unassigned */
+ vector = rsvd & ~APIC_LVT_MASKED; /* 0: unassigned */
if (vector && !eilvt_entry_is_changeable(vector, new))
/* may not change if vectors are different */
return rsvd;
} while (!atomic_try_cmpxchg(&eilvt_offsets[offset], &rsvd, new));
- rsvd = new & ~APIC_EILVT_MASKED;
+ rsvd = new & ~APIC_LVT_MASKED;
if (rsvd && rsvd != vector)
pr_info("LVT offset %d assigned for vector 0x%02x\n",
offset, rsvd);
@@ -412,23 +412,21 @@ EXPORT_SYMBOL_GPL(setup_APIC_eilvt);
/*
* Program the next event, relative to now
*/
-static int lapic_next_event(unsigned long delta,
- struct clock_event_device *evt)
+static int lapic_next_event(unsigned long delta, struct clock_event_device *evt)
{
apic_write(APIC_TMICT, delta);
return 0;
}
-static int lapic_next_deadline(unsigned long delta,
- struct clock_event_device *evt)
+static int lapic_next_deadline(unsigned long delta, struct clock_event_device *evt)
{
- u64 tsc;
-
- /* This MSR is special and need a special fence: */
- weak_wrmsr_fence();
+ /*
+ * There is no weak_wrmsr_fence() required here as all of this is purely
+ * CPU local. Avoid the [ml]fence overhead.
+ */
+ u64 tsc = rdtsc();
- tsc = rdtsc();
- wrmsrq(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
+ native_wrmsrq(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
return 0;
}
@@ -452,7 +450,7 @@ static int lapic_timer_shutdown(struct clock_event_device *evt)
* the timer _and_ zero the counter registers:
*/
if (v & APIC_LVT_TIMER_TSCDEADLINE)
- wrmsrq(MSR_IA32_TSC_DEADLINE, 0);
+ native_wrmsrq(MSR_IA32_TSC_DEADLINE, 0);
else
apic_write(APIC_TMICT, 0);
@@ -549,6 +547,11 @@ static __init bool apic_validate_deadline_timer(void)
if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
return false;
+
+ /* XEN_PV does not support it, but be paranoia about it */
+ if (boot_cpu_has(X86_FEATURE_XENPV))
+ goto clear;
+
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
return true;
@@ -561,9 +564,11 @@ static __init bool apic_validate_deadline_timer(void)
if (boot_cpu_data.microcode >= rev)
return true;
- setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
"please update microcode to version: 0x%x (or later)\n", rev);
+
+clear:
+ setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
return false;
}
@@ -586,14 +591,14 @@ static void setup_APIC_timer(void)
if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
levt->name = "lapic-deadline";
- levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC |
- CLOCK_EVT_FEAT_DUMMY);
+ levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_DUMMY);
+ levt->features |= CLOCK_EVT_FEAT_CLOCKSOURCE_COUPLED;
+ levt->cs_id = CSID_X86_TSC;
levt->set_next_event = lapic_next_deadline;
- clockevents_config_and_register(levt,
- tsc_khz * (1000 / TSC_DIVISOR),
- 0xF, ~0UL);
- } else
+ clockevents_config_and_register(levt, tsc_khz * (1000 / TSC_DIVISOR), 0xF, ~0UL);
+ } else {
clockevents_register_device(levt);
+ }
apic_update_vector(smp_processor_id(), LOCAL_TIMER_VECTOR, true);
}
@@ -1894,6 +1899,7 @@ void __init check_x2apic(void)
static inline void try_to_enable_x2apic(int remap_mode) { }
static inline void __x2apic_enable(void) { }
+static inline void __x2apic_disable(void) { }
#endif /* !CONFIG_X86_X2APIC */
void __init enable_IR_x2apic(void)
@@ -2456,6 +2462,11 @@ static void lapic_resume(void *data)
if (x2apic_mode) {
__x2apic_enable();
} else {
+ if (x2apic_enabled()) {
+ pr_warn_once("x2apic: re-enabled by firmware during resume. Disabling\n");
+ __x2apic_disable();
+ }
+
/*
* Make sure the APICBASE points to the right address
*
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 15209f220e1f..42568ceec481 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -1708,8 +1708,22 @@ static void __init uv_system_init_hub(void)
struct uv_hub_info_s *new_hub;
/* Allocate & fill new per hub info list */
- new_hub = (bid == 0) ? &uv_hub_info_node0
- : kzalloc_node(bytes, GFP_KERNEL, uv_blade_to_node(bid));
+ if (bid == 0) {
+ new_hub = &uv_hub_info_node0;
+ } else {
+ int nid;
+
+ /*
+ * Deconfigured sockets are mapped to SOCK_EMPTY. Use
+ * NUMA_NO_NODE to allocate on a valid node.
+ */
+ nid = uv_blade_to_node(bid);
+ if (nid == SOCK_EMPTY)
+ nid = NUMA_NO_NODE;
+
+ new_hub = kzalloc_node(bytes, GFP_KERNEL, nid);
+ }
+
if (WARN_ON_ONCE(!new_hub)) {
/* do not kfree() bid 0, which is statically allocated */
while (--bid > 0)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 09de584e4c8f..2d9ae6ab1701 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/export.h>
#include <linux/bitops.h>
+#include <linux/dmi.h>
#include <linux/elf.h>
#include <linux/mm.h>
#include <linux/kvm_types.h>
@@ -943,6 +944,9 @@ static void init_amd_zen1(struct cpuinfo_x86 *c)
msr_clear_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT);
clear_cpu_cap(c, X86_FEATURE_IRPERF);
}
+
+ pr_notice_once("AMD Zen1 FPDSS bug detected, enabling mitigation.\n");
+ msr_set_bit(MSR_AMD64_FP_CFG, MSR_AMD64_FP_CFG_ZEN1_DENORM_FIX_BIT);
}
static const struct x86_cpu_id amd_zenbleed_microcode[] = {
@@ -1380,3 +1384,51 @@ static __init int print_s5_reset_status_mmio(void)
return 0;
}
late_initcall(print_s5_reset_status_mmio);
+
+static void __init dmi_scan_additional(const struct dmi_header *d, void *p)
+{
+ struct dmi_a_info *info = (struct dmi_a_info *)d;
+ void *next, *end;
+
+ if (!IS_ENABLED(CONFIG_DMI))
+ return;
+
+ if (info->header.type != DMI_ENTRY_ADDITIONAL ||
+ info->header.length < DMI_A_INFO_MIN_SIZE ||
+ info->count < 1)
+ return;
+
+ next = (void *)(info + 1);
+ end = (void *)info + info->header.length;
+
+ do {
+ struct dmi_a_info_entry *entry;
+ const char *string_ptr;
+
+ entry = (struct dmi_a_info_entry *)next;
+
+ /*
+ * Not much can be done to validate data. At least the entry
+ * length shouldn't be 0.
+ */
+ if (!entry->length)
+ return;
+
+ string_ptr = dmi_string_nosave(&info->header, entry->str_num);
+
+ /* Sample string: AGESA!V9 StrixKrackanPI-FP8 1.1.0.0c */
+ if (!strncmp(string_ptr, "AGESA", 5)) {
+ pr_info("AGESA: %s\n", string_ptr);
+ break;
+ }
+
+ next += entry->length;
+ } while (end - next >= DMI_A_INFO_ENT_MIN_SIZE);
+}
+
+static __init int print_dmi_agesa(void)
+{
+ dmi_walk(dmi_scan_additional, NULL);
+ return 0;
+}
+late_initcall(print_dmi_agesa);
diff --git a/arch/x86/kernel/cpu/bus_lock.c b/arch/x86/kernel/cpu/bus_lock.c
index fb166662bc0d..bba28607a59a 100644
--- a/arch/x86/kernel/cpu/bus_lock.c
+++ b/arch/x86/kernel/cpu/bus_lock.c
@@ -132,6 +132,12 @@ static void __init sld_state_setup(void)
sld_state = state;
}
+static __init int setup_split_lock_detect(char *arg)
+{
+ return 1;
+}
+__setup("split_lock_detect=", setup_split_lock_detect);
+
static void __init __split_lock_setup(void)
{
if (!split_lock_verify_msr(false)) {
@@ -391,34 +397,35 @@ supported:
static void sld_state_show(void)
{
- if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) &&
- !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
+ const char *action = "warning";
+
+ if ((!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) &&
+ !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) ||
+ (sld_state == sld_off))
return;
- switch (sld_state) {
- case sld_off:
- pr_info("disabled\n");
- break;
- case sld_warn:
- if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
- pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n");
- if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
- "x86/splitlock", NULL, splitlock_cpu_offline) < 0)
- pr_warn("No splitlock CPU offline handler\n");
- } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
- pr_info("#DB: warning on user-space bus_locks\n");
- }
- break;
- case sld_fatal:
- if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
- pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n");
- else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
- pr_info("#DB: sending SIGBUS on user-space bus_locks\n");
- break;
- case sld_ratelimit:
+ if (sld_state == sld_ratelimit) {
if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst);
- break;
+ return;
+ } else if (sld_state == sld_fatal) {
+ action = "sending SIGBUS";
+ }
+
+ if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
+ pr_info("#AC: crashing the kernel on kernel split_locks and %s on user-space split_locks\n", action);
+
+ /*
+ * This is handling the case where a CPU goes offline at the
+ * moment where split lock detection is disabled in the warn
+ * setting, see split_lock_warn(). It doesn't have any effect
+ * in the fatal case.
+ */
+ if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/splitlock", NULL, splitlock_cpu_offline) < 0)
+ pr_warn("No splitlock CPU offline handler\n");
+
+ } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
+ pr_info("#DB: %s on user-space bus_locks\n", action);
}
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 1c3261cae40c..a4268c47f2bc 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -71,6 +71,7 @@
#include <asm/traps.h>
#include <asm/sev.h>
#include <asm/tdx.h>
+#include <asm/virt.h>
#include <asm/posted_intr.h>
#include <asm/runtime-const.h>
@@ -95,6 +96,9 @@ EXPORT_SYMBOL(__max_dies_per_package);
unsigned int __max_logical_packages __ro_after_init = 1;
EXPORT_SYMBOL(__max_logical_packages);
+unsigned int __num_nodes_per_package __ro_after_init = 1;
+EXPORT_SYMBOL(__num_nodes_per_package);
+
unsigned int __num_cores_per_package __ro_after_init = 1;
EXPORT_SYMBOL(__num_cores_per_package);
@@ -406,31 +410,46 @@ out:
cr4_clear_bits(X86_CR4_UMIP);
}
-static __always_inline void setup_lass(struct cpuinfo_x86 *c)
+static int enable_lass(unsigned int cpu)
+{
+ cr4_set_bits(X86_CR4_LASS);
+
+ return 0;
+}
+
+/*
+ * Finalize features that need to be enabled just before entering
+ * userspace. Note that this only runs on a single CPU. Use appropriate
+ * callbacks if all the CPUs need to reflect the same change.
+ */
+static int cpu_finalize_pre_userspace(void)
{
if (!cpu_feature_enabled(X86_FEATURE_LASS))
- return;
+ return 0;
- /*
- * Legacy vsyscall page access causes a #GP when LASS is active.
- * Disable LASS because the #GP handler doesn't support vsyscall
- * emulation.
- *
- * Also disable LASS when running under EFI, as some runtime and
- * boot services rely on 1:1 mappings in the lower half.
- */
- if (IS_ENABLED(CONFIG_X86_VSYSCALL_EMULATION) ||
- IS_ENABLED(CONFIG_EFI)) {
- setup_clear_cpu_cap(X86_FEATURE_LASS);
- return;
- }
+ /* Runs on all online CPUs and future CPUs that come online. */
+ cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/lass:enable", enable_lass, NULL);
- cr4_set_bits(X86_CR4_LASS);
+ return 0;
}
+late_initcall(cpu_finalize_pre_userspace);
/* These bits should not change their value after CPU init is finished. */
static const unsigned long cr4_pinned_mask = X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP |
- X86_CR4_FSGSBASE | X86_CR4_CET | X86_CR4_FRED;
+ X86_CR4_FSGSBASE | X86_CR4_CET;
+
+/*
+ * The CR pinning protects against ROP on the 'mov %reg, %CRn' instruction(s).
+ * Since you can ROP directly to these instructions (barring shadow stack),
+ * any protection must follow immediately and unconditionally after that.
+ *
+ * Specifically, the CR[04] write functions below will have the value
+ * validation controlled by the @cr_pinning static_branch which is
+ * __ro_after_init, just like the cr4_pinned_bits value.
+ *
+ * Once set, an attacker will have to defeat page-tables to get around these
+ * restrictions. Which is a much bigger ask than 'simple' ROP.
+ */
static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
static unsigned long cr4_pinned_bits __ro_after_init;
@@ -1743,7 +1762,7 @@ static void __init cpu_parse_early_param(void)
/* Minimize the gap between FRED is available and available but disabled. */
arglen = cmdline_find_option(boot_command_line, "fred", arg, sizeof(arg));
- if (arglen != 2 || strncmp(arg, "on", 2))
+ if (arglen == 3 && !strncmp(arg, "off", 3))
setup_clear_cpu_cap(X86_FEATURE_FRED);
arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
@@ -2045,13 +2064,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
setup_smep(c);
setup_smap(c);
setup_umip(c);
- setup_lass(c);
-
- /* Enable FSGSBASE instructions if available. */
- if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
- cr4_set_bits(X86_CR4_FSGSBASE);
- elf_hwcap2 |= HWCAP2_FSGSBASE;
- }
/*
* The vendor-specific functions might have changed features.
@@ -2151,6 +2163,7 @@ static __init void identify_boot_cpu(void)
cpu_detect_tlb(&boot_cpu_data);
setup_cr_pinning();
+ x86_virt_init();
tsx_init();
tdx_init();
lkgs_init();
@@ -2413,6 +2426,18 @@ void cpu_init_exception_handling(bool boot_cpu)
/* GHCB needs to be setup to handle #VC. */
setup_ghcb();
+ /*
+ * On CPUs with FSGSBASE support, paranoid_entry() uses
+ * ALTERNATIVE-patched RDGSBASE/WRGSBASE instructions. Secondary CPUs
+ * boot after alternatives are patched globally, so early exceptions
+ * execute patched code that depends on FSGSBASE. Enable the feature
+ * before any exceptions occur.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_FSGSBASE)) {
+ cr4_set_bits(X86_CR4_FSGSBASE);
+ elf_hwcap2 |= HWCAP2_FSGSBASE;
+ }
+
if (cpu_feature_enabled(X86_FEATURE_FRED)) {
/* The boot CPU has enabled FRED during early boot */
if (!boot_cpu)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 646ff33c4651..f28c0efb7c8f 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -205,6 +205,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64))
c->microcode = intel_get_microcode_revision();
+ c->intel_platform_id = intel_get_platform_id();
/* Now if any of them are set, check the blacklist and clear the lot */
if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index 6af1e8baeb0f..4604802692da 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -76,6 +76,9 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
if (m->steppings != X86_STEPPING_ANY &&
!(BIT(c->x86_stepping) & m->steppings))
continue;
+ if (m->platform_mask != X86_PLATFORM_ANY &&
+ !(BIT(c->intel_platform_id) & m->platform_mask))
+ continue;
if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature))
continue;
if (!x86_match_vendor_cpu_type(c, m))
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index da13c1e37f87..6605a0224659 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -95,39 +95,49 @@ static DEFINE_PER_CPU_READ_MOSTLY(struct smca_bank[MAX_NR_BANKS], smca_banks);
static DEFINE_PER_CPU_READ_MOSTLY(u8[N_SMCA_BANK_TYPES], smca_bank_counts);
static const char * const smca_names[] = {
- [SMCA_LS ... SMCA_LS_V2] = "load_store",
- [SMCA_IF] = "insn_fetch",
- [SMCA_L2_CACHE] = "l2_cache",
+ [SMCA_CS ... SMCA_CS_V2] = "coherent_station",
+ [SMCA_DACC_BE] = "dacc_be",
+ [SMCA_DACC_FE] = "dacc_fe",
[SMCA_DE] = "decode_unit",
- [SMCA_RESERVED] = "reserved",
+ [SMCA_EDDR5CMN] = "eddr5_cmn",
[SMCA_EX] = "execution_unit",
[SMCA_FP] = "floating_point",
+ [SMCA_GMI_PCS] = "gmi_pcs",
+ [SMCA_GMI_PHY] = "gmi_phy",
+ [SMCA_IF] = "insn_fetch",
+ [SMCA_L2_CACHE] = "l2_cache",
[SMCA_L3_CACHE] = "l3_cache",
- [SMCA_CS ... SMCA_CS_V2] = "coherent_slave",
- [SMCA_PIE] = "pie",
-
- /* UMC v2 is separate because both of them can exist in a single system. */
- [SMCA_UMC] = "umc",
- [SMCA_UMC_V2] = "umc_v2",
+ [SMCA_LS ... SMCA_LS_V2] = "load_store",
[SMCA_MA_LLC] = "ma_llc",
- [SMCA_PB] = "param_block",
- [SMCA_PSP ... SMCA_PSP_V2] = "psp",
- [SMCA_SMU ... SMCA_SMU_V2] = "smu",
[SMCA_MP5] = "mp5",
+ [SMCA_MPART] = "mpart",
+ [SMCA_MPASP ... SMCA_MPASP_V2] = "mpasp",
+ [SMCA_MPDACC] = "mpdacc",
[SMCA_MPDMA] = "mpdma",
+ [SMCA_MPM] = "mpm",
+ [SMCA_MPRAS] = "mpras",
+ [SMCA_NBIF] = "nbif",
[SMCA_NBIO] = "nbio",
+ [SMCA_PB] = "param_block",
[SMCA_PCIE ... SMCA_PCIE_V2] = "pcie",
- [SMCA_XGMI_PCS] = "xgmi_pcs",
- [SMCA_NBIF] = "nbif",
- [SMCA_SHUB] = "shub",
+ [SMCA_PCIE_PL] = "pcie_pl",
+ [SMCA_PIE] = "pie",
+ [SMCA_PSP ... SMCA_PSP_V2] = "psp",
+ [SMCA_RESERVED] = "reserved",
[SMCA_SATA] = "sata",
+ [SMCA_SHUB] = "shub",
+ [SMCA_SMU ... SMCA_SMU_V2] = "smu",
+ [SMCA_SSBDCI] = "ssbdci",
+
+ /* UMC v2 is separate because both of them can exist in a single system. */
+ [SMCA_UMC] = "umc",
+ [SMCA_UMC_V2] = "umc_v2",
[SMCA_USB] = "usb",
- [SMCA_USR_DP] = "usr_dp",
[SMCA_USR_CP] = "usr_cp",
- [SMCA_GMI_PCS] = "gmi_pcs",
- [SMCA_XGMI_PHY] = "xgmi_phy",
+ [SMCA_USR_DP] = "usr_dp",
[SMCA_WAFL_PHY] = "wafl_phy",
- [SMCA_GMI_PHY] = "gmi_phy",
+ [SMCA_XGMI_PCS] = "xgmi_pcs",
+ [SMCA_XGMI_PHY] = "xgmi_phy",
};
static const char *smca_get_name(enum smca_bank_types t)
@@ -153,68 +163,60 @@ enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank)
}
EXPORT_SYMBOL_GPL(smca_get_bank_type);
+/*
+ * Format:
+ * { bank_type, hwid_mcatype }
+ *
+ * alphanumerically sorted by bank type.
+ */
static const struct smca_hwid smca_hwid_mcatypes[] = {
- /* { bank_type, hwid_mcatype } */
-
- /* Reserved type */
- { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0) },
-
- /* ZN Core (HWID=0xB0) MCA types */
- { SMCA_LS, HWID_MCATYPE(0xB0, 0x0) },
- { SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10) },
- { SMCA_IF, HWID_MCATYPE(0xB0, 0x1) },
- { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2) },
+ { SMCA_CS, HWID_MCATYPE(0x2E, 0x0) },
+ { SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2) },
+ { SMCA_DACC_BE, HWID_MCATYPE(0x164, 0x0) },
+ { SMCA_DACC_FE, HWID_MCATYPE(0x157, 0x0) },
{ SMCA_DE, HWID_MCATYPE(0xB0, 0x3) },
- /* HWID 0xB0 MCATYPE 0x4 is Reserved */
+ { SMCA_EDDR5CMN, HWID_MCATYPE(0x1E0, 0x0) },
{ SMCA_EX, HWID_MCATYPE(0xB0, 0x5) },
{ SMCA_FP, HWID_MCATYPE(0xB0, 0x6) },
+ { SMCA_GMI_PCS, HWID_MCATYPE(0x241, 0x0) },
+ { SMCA_GMI_PHY, HWID_MCATYPE(0x269, 0x0) },
+ { SMCA_IF, HWID_MCATYPE(0xB0, 0x1) },
+ { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2) },
{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7) },
-
- /* Data Fabric MCA types */
- { SMCA_CS, HWID_MCATYPE(0x2E, 0x0) },
- { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1) },
- { SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2) },
+ { SMCA_LS, HWID_MCATYPE(0xB0, 0x0) },
+ { SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10) },
{ SMCA_MA_LLC, HWID_MCATYPE(0x2E, 0x4) },
-
- /* Unified Memory Controller MCA type */
- { SMCA_UMC, HWID_MCATYPE(0x96, 0x0) },
- { SMCA_UMC_V2, HWID_MCATYPE(0x96, 0x1) },
-
- /* Parameter Block MCA type */
- { SMCA_PB, HWID_MCATYPE(0x05, 0x0) },
-
- /* Platform Security Processor MCA type */
- { SMCA_PSP, HWID_MCATYPE(0xFF, 0x0) },
- { SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1) },
-
- /* System Management Unit MCA type */
- { SMCA_SMU, HWID_MCATYPE(0x01, 0x0) },
- { SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1) },
-
- /* Microprocessor 5 Unit MCA type */
{ SMCA_MP5, HWID_MCATYPE(0x01, 0x2) },
-
- /* MPDMA MCA type */
+ { SMCA_MPART, HWID_MCATYPE(0xFF, 0x2) },
+ { SMCA_MPASP, HWID_MCATYPE(0xFD, 0x0) },
+ { SMCA_MPASP_V2, HWID_MCATYPE(0xFD, 0x1) },
+ { SMCA_MPDACC, HWID_MCATYPE(0xBE, 0x0) },
{ SMCA_MPDMA, HWID_MCATYPE(0x01, 0x3) },
-
- /* Northbridge IO Unit MCA type */
+ { SMCA_MPM, HWID_MCATYPE(0xF9, 0x0) },
+ { SMCA_MPRAS, HWID_MCATYPE(0x12, 0x0) },
+ { SMCA_NBIF, HWID_MCATYPE(0x6C, 0x0) },
{ SMCA_NBIO, HWID_MCATYPE(0x18, 0x0) },
-
- /* PCI Express Unit MCA type */
+ { SMCA_PB, HWID_MCATYPE(0x05, 0x0) },
{ SMCA_PCIE, HWID_MCATYPE(0x46, 0x0) },
{ SMCA_PCIE_V2, HWID_MCATYPE(0x46, 0x1) },
-
- { SMCA_XGMI_PCS, HWID_MCATYPE(0x50, 0x0) },
- { SMCA_NBIF, HWID_MCATYPE(0x6C, 0x0) },
- { SMCA_SHUB, HWID_MCATYPE(0x80, 0x0) },
+ { SMCA_PCIE_PL, HWID_MCATYPE(0x1E1, 0x0) },
+ { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1) },
+ { SMCA_PSP, HWID_MCATYPE(0xFF, 0x0) },
+ { SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1) },
+ { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0) },
{ SMCA_SATA, HWID_MCATYPE(0xA8, 0x0) },
+ { SMCA_SHUB, HWID_MCATYPE(0x80, 0x0) },
+ { SMCA_SMU, HWID_MCATYPE(0x01, 0x0) },
+ { SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1) },
+ { SMCA_SSBDCI, HWID_MCATYPE(0x5C, 0x0) },
+ { SMCA_UMC, HWID_MCATYPE(0x96, 0x0) },
+ { SMCA_UMC_V2, HWID_MCATYPE(0x96, 0x1) },
{ SMCA_USB, HWID_MCATYPE(0xAA, 0x0) },
- { SMCA_USR_DP, HWID_MCATYPE(0x170, 0x0) },
{ SMCA_USR_CP, HWID_MCATYPE(0x180, 0x0) },
- { SMCA_GMI_PCS, HWID_MCATYPE(0x241, 0x0) },
- { SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0) },
+ { SMCA_USR_DP, HWID_MCATYPE(0x170, 0x0) },
{ SMCA_WAFL_PHY, HWID_MCATYPE(0x267, 0x0) },
- { SMCA_GMI_PHY, HWID_MCATYPE(0x269, 0x0) },
+ { SMCA_XGMI_PCS, HWID_MCATYPE(0x50, 0x0) },
+ { SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0) },
};
/*
@@ -521,7 +523,7 @@ static void mce_threshold_block_init(struct threshold_block *b, int offset)
static int setup_APIC_mce_threshold(int reserved, int new)
{
if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
- APIC_EILVT_MSG_FIX, 0))
+ APIC_DELIVERY_MODE_FIXED, 0))
return new;
return reserved;
@@ -604,6 +606,14 @@ bool amd_filter_mce(struct mce *m)
enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank);
struct cpuinfo_x86 *c = &boot_cpu_data;
+ /* Bogus hw errors on Cezanne A0. */
+ if (c->x86 == 0x19 &&
+ c->x86_model == 0x50 &&
+ c->x86_stepping == 0x0) {
+ if (!(m->status & MCI_STATUS_EN))
+ return true;
+ }
+
/* See Family 17h Models 10h-2Fh Erratum #1114. */
if (c->x86 == 0x17 &&
c->x86_model >= 0x10 && c->x86_model <= 0x2F &&
@@ -704,11 +714,11 @@ static void smca_enable_interrupt_vectors(void)
return;
offset = (mca_intr_cfg & SMCA_THR_LVT_OFF) >> 12;
- if (!setup_APIC_eilvt(offset, THRESHOLD_APIC_VECTOR, APIC_EILVT_MSG_FIX, 0))
+ if (!setup_APIC_eilvt(offset, THRESHOLD_APIC_VECTOR, APIC_DELIVERY_MODE_FIXED, 0))
data->thr_intr_en = 1;
offset = (mca_intr_cfg & MASK_DEF_LVTOFF) >> 4;
- if (!setup_APIC_eilvt(offset, DEFERRED_ERROR_VECTOR, APIC_EILVT_MSG_FIX, 0))
+ if (!setup_APIC_eilvt(offset, DEFERRED_ERROR_VECTOR, APIC_DELIVERY_MODE_FIXED, 0))
data->dfr_intr_en = 1;
}
@@ -875,13 +885,18 @@ void amd_clear_bank(struct mce *m)
{
amd_reset_thr_limit(m->bank);
- /* Clear MCA_DESTAT for all deferred errors even those logged in MCA_STATUS. */
- if (m->status & MCI_STATUS_DEFERRED)
- mce_wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank), 0);
+ if (mce_flags.smca) {
+ /*
+ * Clear MCA_DESTAT for all deferred errors even those
+ * logged in MCA_STATUS.
+ */
+ if (m->status & MCI_STATUS_DEFERRED)
+ mce_wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank), 0);
- /* Don't clear MCA_STATUS if MCA_DESTAT was used exclusively. */
- if (m->kflags & MCE_CHECK_DFR_REGS)
- return;
+ /* Don't clear MCA_STATUS if MCA_DESTAT was used exclusively. */
+ if (m->kflags & MCE_CHECK_DFR_REGS)
+ return;
+ }
mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0);
}
diff --git a/arch/x86/kernel/cpu/microcode/intel-ucode-defs.h b/arch/x86/kernel/cpu/microcode/intel-ucode-defs.h
index 2d48e6593540..72c8809f88e5 100644
--- a/arch/x86/kernel/cpu/microcode/intel-ucode-defs.h
+++ b/arch/x86/kernel/cpu/microcode/intel-ucode-defs.h
@@ -1,160 +1,238 @@
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x03, .steppings = 0x0004, .driver_data = 0x2 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0001, .driver_data = 0x45 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0002, .driver_data = 0x40 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0004, .driver_data = 0x2c },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0008, .driver_data = 0x10 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x06, .steppings = 0x0001, .driver_data = 0xa },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x06, .steppings = 0x0020, .driver_data = 0x3 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x06, .steppings = 0x0400, .driver_data = 0xd },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x06, .steppings = 0x2000, .driver_data = 0x7 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x07, .steppings = 0x0002, .driver_data = 0x14 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x07, .steppings = 0x0004, .driver_data = 0x38 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x07, .steppings = 0x0008, .driver_data = 0x2e },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0002, .driver_data = 0x11 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0008, .driver_data = 0x8 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0040, .driver_data = 0xc },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0400, .driver_data = 0x5 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x09, .steppings = 0x0020, .driver_data = 0x47 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0a, .steppings = 0x0001, .driver_data = 0x3 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0a, .steppings = 0x0002, .driver_data = 0x1 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0b, .steppings = 0x0002, .driver_data = 0x1d },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0b, .steppings = 0x0010, .driver_data = 0x2 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0d, .steppings = 0x0040, .driver_data = 0x18 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0e, .steppings = 0x0100, .driver_data = 0x39 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0e, .steppings = 0x1000, .driver_data = 0x59 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0004, .driver_data = 0x5d },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0040, .driver_data = 0xd2 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0080, .driver_data = 0x6b },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0400, .driver_data = 0x95 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0800, .driver_data = 0xbc },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x2000, .driver_data = 0xa4 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x16, .steppings = 0x0002, .driver_data = 0x44 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x17, .steppings = 0x0040, .driver_data = 0x60f },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x17, .steppings = 0x0080, .driver_data = 0x70a },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x17, .steppings = 0x0400, .driver_data = 0xa0b },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1a, .steppings = 0x0010, .driver_data = 0x12 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1a, .steppings = 0x0020, .driver_data = 0x1d },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1c, .steppings = 0x0004, .driver_data = 0x219 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1c, .steppings = 0x0400, .driver_data = 0x107 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1d, .steppings = 0x0002, .driver_data = 0x29 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1e, .steppings = 0x0020, .driver_data = 0xa },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x25, .steppings = 0x0004, .driver_data = 0x11 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x25, .steppings = 0x0020, .driver_data = 0x7 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x26, .steppings = 0x0002, .driver_data = 0x105 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2a, .steppings = 0x0080, .driver_data = 0x2f },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2c, .steppings = 0x0004, .driver_data = 0x1f },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2d, .steppings = 0x0040, .driver_data = 0x621 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2d, .steppings = 0x0080, .driver_data = 0x71a },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2e, .steppings = 0x0040, .driver_data = 0xd },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2f, .steppings = 0x0004, .driver_data = 0x3b },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x37, .steppings = 0x0100, .driver_data = 0x838 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x37, .steppings = 0x0200, .driver_data = 0x90d },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3a, .steppings = 0x0200, .driver_data = 0x21 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3c, .steppings = 0x0008, .driver_data = 0x28 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3d, .steppings = 0x0010, .driver_data = 0x2f },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3e, .steppings = 0x0010, .driver_data = 0x42e },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3e, .steppings = 0x0040, .driver_data = 0x600 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3e, .steppings = 0x0080, .driver_data = 0x715 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3f, .steppings = 0x0004, .driver_data = 0x49 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3f, .steppings = 0x0010, .driver_data = 0x1a },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x45, .steppings = 0x0002, .driver_data = 0x26 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x46, .steppings = 0x0002, .driver_data = 0x1c },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x47, .steppings = 0x0002, .driver_data = 0x22 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x4c, .steppings = 0x0008, .driver_data = 0x368 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x4c, .steppings = 0x0010, .driver_data = 0x411 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x4d, .steppings = 0x0100, .driver_data = 0x12d },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x4e, .steppings = 0x0008, .driver_data = 0xf0 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0008, .driver_data = 0x1000191 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0010, .driver_data = 0x2007006 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0020, .driver_data = 0x3000010 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0080, .driver_data = 0x5003901 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0800, .driver_data = 0x7002b01 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x56, .steppings = 0x0004, .driver_data = 0x1c },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x56, .steppings = 0x0008, .driver_data = 0x700001c },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x56, .steppings = 0x0010, .driver_data = 0xf00001a },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x56, .steppings = 0x0020, .driver_data = 0xe000015 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x5c, .steppings = 0x0004, .driver_data = 0x14 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x5c, .steppings = 0x0200, .driver_data = 0x48 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x5c, .steppings = 0x0400, .driver_data = 0x28 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x5e, .steppings = 0x0008, .driver_data = 0xf0 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x5f, .steppings = 0x0002, .driver_data = 0x3e },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x66, .steppings = 0x0008, .driver_data = 0x2a },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x6a, .steppings = 0x0020, .driver_data = 0xc0002f0 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x6a, .steppings = 0x0040, .driver_data = 0xd000404 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x6c, .steppings = 0x0002, .driver_data = 0x10002d0 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x7a, .steppings = 0x0002, .driver_data = 0x42 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x7a, .steppings = 0x0100, .driver_data = 0x26 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x7e, .steppings = 0x0020, .driver_data = 0xca },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8a, .steppings = 0x0002, .driver_data = 0x33 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8c, .steppings = 0x0002, .driver_data = 0xbc },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8c, .steppings = 0x0004, .driver_data = 0x3c },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8d, .steppings = 0x0002, .driver_data = 0x56 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8e, .steppings = 0x0200, .driver_data = 0xf6 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8e, .steppings = 0x0400, .driver_data = 0xf6 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8e, .steppings = 0x0800, .driver_data = 0xf6 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8e, .steppings = 0x1000, .driver_data = 0x100 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0010, .driver_data = 0x2c0003f7 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0020, .driver_data = 0x2c0003f7 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0040, .driver_data = 0x2c0003f7 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0080, .driver_data = 0x2b000639 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0100, .driver_data = 0x2c0003f7 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x96, .steppings = 0x0002, .driver_data = 0x1a },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x97, .steppings = 0x0004, .driver_data = 0x3a },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x97, .steppings = 0x0020, .driver_data = 0x3a },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9a, .steppings = 0x0008, .driver_data = 0x437 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9a, .steppings = 0x0010, .driver_data = 0x437 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9c, .steppings = 0x0001, .driver_data = 0x24000026 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x0200, .driver_data = 0xf8 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x0400, .driver_data = 0xfa },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x0800, .driver_data = 0xf6 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x1000, .driver_data = 0xf8 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x2000, .driver_data = 0x104 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa5, .steppings = 0x0004, .driver_data = 0x100 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa5, .steppings = 0x0008, .driver_data = 0x100 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa5, .steppings = 0x0020, .driver_data = 0x100 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa6, .steppings = 0x0001, .driver_data = 0x102 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa6, .steppings = 0x0002, .driver_data = 0x100 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa7, .steppings = 0x0002, .driver_data = 0x64 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xaa, .steppings = 0x0010, .driver_data = 0x24 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xad, .steppings = 0x0002, .driver_data = 0xa0000d1 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xaf, .steppings = 0x0008, .driver_data = 0x3000341 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xb5, .steppings = 0x0001, .driver_data = 0xa },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xb7, .steppings = 0x0002, .driver_data = 0x12f },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xb7, .steppings = 0x0010, .driver_data = 0x12f },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xba, .steppings = 0x0004, .driver_data = 0x4128 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xba, .steppings = 0x0008, .driver_data = 0x4128 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xba, .steppings = 0x0100, .driver_data = 0x4128 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbd, .steppings = 0x0002, .driver_data = 0x11f },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbe, .steppings = 0x0001, .driver_data = 0x1d },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbf, .steppings = 0x0004, .driver_data = 0x3a },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbf, .steppings = 0x0020, .driver_data = 0x3a },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbf, .steppings = 0x0040, .driver_data = 0x3a },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbf, .steppings = 0x0080, .driver_data = 0x3a },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xc5, .steppings = 0x0004, .driver_data = 0x118 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xc6, .steppings = 0x0004, .driver_data = 0x118 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xc6, .steppings = 0x0010, .driver_data = 0x118 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xca, .steppings = 0x0004, .driver_data = 0x118 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xcf, .steppings = 0x0002, .driver_data = 0x210002a9 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xcf, .steppings = 0x0004, .driver_data = 0x210002a9 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x00, .steppings = 0x0080, .driver_data = 0x12 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x00, .steppings = 0x0400, .driver_data = 0x15 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x01, .steppings = 0x0004, .driver_data = 0x2e },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0010, .driver_data = 0x21 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0020, .driver_data = 0x2c },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0040, .driver_data = 0x10 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0080, .driver_data = 0x39 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0200, .driver_data = 0x2f },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x03, .steppings = 0x0004, .driver_data = 0xa },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x03, .steppings = 0x0008, .driver_data = 0xc },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x03, .steppings = 0x0010, .driver_data = 0x17 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0002, .driver_data = 0x17 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0008, .driver_data = 0x5 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0010, .driver_data = 0x6 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0080, .driver_data = 0x3 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0100, .driver_data = 0xe },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0200, .driver_data = 0x3 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0400, .driver_data = 0x4 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x06, .steppings = 0x0004, .driver_data = 0xf },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x06, .steppings = 0x0010, .driver_data = 0x4 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x06, .steppings = 0x0020, .driver_data = 0x8 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x06, .steppings = 0x0100, .driver_data = 0x9 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x03, .steppings = 0x0004, .platform_mask = 0x00, .driver_data = 0x2 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0001, .platform_mask = 0x01, .driver_data = 0x40 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0001, .platform_mask = 0x02, .driver_data = 0x41 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0001, .platform_mask = 0x08, .driver_data = 0x45 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0002, .platform_mask = 0x01, .driver_data = 0x40 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0004, .platform_mask = 0x01, .driver_data = 0x2a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0004, .platform_mask = 0x02, .driver_data = 0x2c },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0004, .platform_mask = 0x04, .driver_data = 0x2b },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0008, .platform_mask = 0x01, .driver_data = 0x10 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0008, .platform_mask = 0x02, .driver_data = 0xc },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0008, .platform_mask = 0x04, .driver_data = 0xb },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0008, .platform_mask = 0x08, .driver_data = 0xd },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x06, .steppings = 0x0001, .platform_mask = 0x01, .driver_data = 0xa },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x06, .steppings = 0x0020, .platform_mask = 0x10, .driver_data = 0x3 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x06, .steppings = 0x0400, .platform_mask = 0x02, .driver_data = 0xc },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x06, .steppings = 0x0400, .platform_mask = 0x08, .driver_data = 0xd },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x06, .steppings = 0x0400, .platform_mask = 0x20, .driver_data = 0xb },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x06, .steppings = 0x2000, .platform_mask = 0x02, .driver_data = 0x5 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x06, .steppings = 0x2000, .platform_mask = 0x08, .driver_data = 0x6 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x06, .steppings = 0x2000, .platform_mask = 0x20, .driver_data = 0x7 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x07, .steppings = 0x0002, .platform_mask = 0x04, .driver_data = 0x14 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x07, .steppings = 0x0004, .platform_mask = 0x04, .driver_data = 0x38 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x07, .steppings = 0x0008, .platform_mask = 0x04, .driver_data = 0x2e },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0002, .platform_mask = 0x01, .driver_data = 0xd },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0002, .platform_mask = 0x04, .driver_data = 0x10 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0002, .platform_mask = 0x08, .driver_data = 0xf },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0002, .platform_mask = 0x10, .driver_data = 0x11 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0002, .platform_mask = 0x20, .driver_data = 0xe },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0008, .platform_mask = 0x08, .driver_data = 0x8 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0008, .platform_mask = 0x20, .driver_data = 0x7 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0040, .platform_mask = 0x01, .driver_data = 0x7 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0040, .platform_mask = 0x02, .driver_data = 0xa },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0040, .platform_mask = 0x04, .driver_data = 0x2 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0040, .platform_mask = 0x10, .driver_data = 0x8 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0040, .platform_mask = 0x80, .driver_data = 0xc },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0400, .platform_mask = 0x10, .driver_data = 0x1 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0400, .platform_mask = 0x20, .driver_data = 0x4 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0400, .platform_mask = 0x80, .driver_data = 0x5 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x09, .steppings = 0x0020, .platform_mask = 0x10, .driver_data = 0x7 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x09, .steppings = 0x0020, .platform_mask = 0x20, .driver_data = 0x7 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x09, .steppings = 0x0020, .platform_mask = 0x80, .driver_data = 0x47 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0a, .steppings = 0x0001, .platform_mask = 0x04, .driver_data = 0x3 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0a, .steppings = 0x0002, .platform_mask = 0x04, .driver_data = 0x1 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0b, .steppings = 0x0002, .platform_mask = 0x10, .driver_data = 0x1c },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0b, .steppings = 0x0002, .platform_mask = 0x20, .driver_data = 0x1d },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0b, .steppings = 0x0010, .platform_mask = 0x10, .driver_data = 0x1 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0b, .steppings = 0x0010, .platform_mask = 0x20, .driver_data = 0x2 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0d, .steppings = 0x0040, .platform_mask = 0x20, .driver_data = 0x18 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0e, .steppings = 0x0100, .platform_mask = 0x20, .driver_data = 0x39 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0e, .steppings = 0x1000, .platform_mask = 0x20, .driver_data = 0x54 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0e, .steppings = 0x1000, .platform_mask = 0x80, .driver_data = 0x59 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0004, .platform_mask = 0x01, .driver_data = 0x5d },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0004, .platform_mask = 0x20, .driver_data = 0x5c },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0040, .platform_mask = 0x01, .driver_data = 0xd0 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0040, .platform_mask = 0x04, .driver_data = 0xd2 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0040, .platform_mask = 0x20, .driver_data = 0xd1 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0080, .platform_mask = 0x10, .driver_data = 0x6a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0080, .platform_mask = 0x40, .driver_data = 0x6b },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0400, .platform_mask = 0x80, .driver_data = 0x95 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0800, .platform_mask = 0x01, .driver_data = 0xba },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0800, .platform_mask = 0x04, .driver_data = 0xbc },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0800, .platform_mask = 0x08, .driver_data = 0xbb },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0800, .platform_mask = 0x10, .driver_data = 0xba },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0800, .platform_mask = 0x20, .driver_data = 0xba },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0800, .platform_mask = 0x40, .driver_data = 0xbc },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0800, .platform_mask = 0x80, .driver_data = 0xba },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x2000, .platform_mask = 0x01, .driver_data = 0xa4 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x2000, .platform_mask = 0x20, .driver_data = 0xa4 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x2000, .platform_mask = 0x80, .driver_data = 0xa4 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x16, .steppings = 0x0002, .platform_mask = 0x01, .driver_data = 0x43 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x16, .steppings = 0x0002, .platform_mask = 0x02, .driver_data = 0x42 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x16, .steppings = 0x0002, .platform_mask = 0x80, .driver_data = 0x44 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x17, .steppings = 0x0040, .platform_mask = 0x01, .driver_data = 0x60f },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x17, .steppings = 0x0040, .platform_mask = 0x04, .driver_data = 0x60f },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x17, .steppings = 0x0040, .platform_mask = 0x10, .driver_data = 0x60f },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x17, .steppings = 0x0040, .platform_mask = 0x40, .driver_data = 0x60f },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x17, .steppings = 0x0040, .platform_mask = 0x80, .driver_data = 0x60f },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x17, .steppings = 0x0080, .platform_mask = 0x10, .driver_data = 0x70a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x17, .steppings = 0x0400, .platform_mask = 0x11, .driver_data = 0xa0b },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x17, .steppings = 0x0400, .platform_mask = 0x44, .driver_data = 0xa0b },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x17, .steppings = 0x0400, .platform_mask = 0xa0, .driver_data = 0xa0b },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1a, .steppings = 0x0010, .platform_mask = 0x03, .driver_data = 0x12 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1a, .steppings = 0x0020, .platform_mask = 0x03, .driver_data = 0x1d },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1c, .steppings = 0x0004, .platform_mask = 0x01, .driver_data = 0x217 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1c, .steppings = 0x0004, .platform_mask = 0x04, .driver_data = 0x218 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1c, .steppings = 0x0004, .platform_mask = 0x08, .driver_data = 0x219 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1c, .steppings = 0x0400, .platform_mask = 0x01, .driver_data = 0x107 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1c, .steppings = 0x0400, .platform_mask = 0x04, .driver_data = 0x107 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1c, .steppings = 0x0400, .platform_mask = 0x08, .driver_data = 0x107 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1c, .steppings = 0x0400, .platform_mask = 0x10, .driver_data = 0x107 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1d, .steppings = 0x0002, .platform_mask = 0x08, .driver_data = 0x29 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1e, .steppings = 0x0020, .platform_mask = 0x13, .driver_data = 0xa },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x25, .steppings = 0x0004, .platform_mask = 0x12, .driver_data = 0x11 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x25, .steppings = 0x0020, .platform_mask = 0x92, .driver_data = 0x7 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x26, .steppings = 0x0002, .platform_mask = 0x01, .driver_data = 0x104 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x26, .steppings = 0x0002, .platform_mask = 0x02, .driver_data = 0x105 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2a, .steppings = 0x0080, .platform_mask = 0x12, .driver_data = 0x2f },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2c, .steppings = 0x0004, .platform_mask = 0x03, .driver_data = 0x1f },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2d, .steppings = 0x0040, .platform_mask = 0x6d, .driver_data = 0x621 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2d, .steppings = 0x0080, .platform_mask = 0x6d, .driver_data = 0x71a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2e, .steppings = 0x0040, .platform_mask = 0x04, .driver_data = 0xd },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2f, .steppings = 0x0004, .platform_mask = 0x05, .driver_data = 0x3b },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x37, .steppings = 0x0100, .platform_mask = 0x02, .driver_data = 0x838 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x37, .steppings = 0x0100, .platform_mask = 0x0c, .driver_data = 0x838 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x37, .steppings = 0x0200, .platform_mask = 0x0f, .driver_data = 0x90d },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3a, .steppings = 0x0200, .platform_mask = 0x12, .driver_data = 0x21 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3c, .steppings = 0x0008, .platform_mask = 0x32, .driver_data = 0x28 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3d, .steppings = 0x0010, .platform_mask = 0xc0, .driver_data = 0x2f },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3e, .steppings = 0x0010, .platform_mask = 0xed, .driver_data = 0x42e },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3e, .steppings = 0x0040, .platform_mask = 0xed, .driver_data = 0x600 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3e, .steppings = 0x0080, .platform_mask = 0xed, .driver_data = 0x715 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3f, .steppings = 0x0004, .platform_mask = 0x6f, .driver_data = 0x49 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3f, .steppings = 0x0010, .platform_mask = 0x80, .driver_data = 0x1a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x45, .steppings = 0x0002, .platform_mask = 0x72, .driver_data = 0x26 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x46, .steppings = 0x0002, .platform_mask = 0x32, .driver_data = 0x1c },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x47, .steppings = 0x0002, .platform_mask = 0x22, .driver_data = 0x22 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x4c, .steppings = 0x0008, .platform_mask = 0x01, .driver_data = 0x368 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x4c, .steppings = 0x0010, .platform_mask = 0x01, .driver_data = 0x411 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x4d, .steppings = 0x0100, .platform_mask = 0x01, .driver_data = 0x12d },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x4e, .steppings = 0x0008, .platform_mask = 0xc0, .driver_data = 0xf0 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0008, .platform_mask = 0x97, .driver_data = 0x1000191 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0010, .platform_mask = 0xb7, .driver_data = 0x2007006 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0020, .platform_mask = 0xb7, .driver_data = 0x3000010 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0080, .platform_mask = 0xbf, .driver_data = 0x5003901 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0800, .platform_mask = 0xbf, .driver_data = 0x7002b01 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x56, .steppings = 0x0004, .platform_mask = 0x10, .driver_data = 0x1c },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x56, .steppings = 0x0008, .platform_mask = 0x10, .driver_data = 0x700001c },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x56, .steppings = 0x0010, .platform_mask = 0x10, .driver_data = 0xf00001a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x56, .steppings = 0x0020, .platform_mask = 0x10, .driver_data = 0xe000015 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x5c, .steppings = 0x0004, .platform_mask = 0x01, .driver_data = 0x14 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x5c, .steppings = 0x0200, .platform_mask = 0x03, .driver_data = 0x48 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x5c, .steppings = 0x0400, .platform_mask = 0x03, .driver_data = 0x28 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x5e, .steppings = 0x0008, .platform_mask = 0x36, .driver_data = 0xf0 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x5f, .steppings = 0x0002, .platform_mask = 0x01, .driver_data = 0x3e },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x66, .steppings = 0x0008, .platform_mask = 0x80, .driver_data = 0x2a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x6a, .steppings = 0x0020, .platform_mask = 0x87, .driver_data = 0xc0002f0 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x6a, .steppings = 0x0040, .platform_mask = 0x87, .driver_data = 0xd000404 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x6c, .steppings = 0x0002, .platform_mask = 0x10, .driver_data = 0x10002d0 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x7a, .steppings = 0x0002, .platform_mask = 0x01, .driver_data = 0x42 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x7a, .steppings = 0x0100, .platform_mask = 0x01, .driver_data = 0x26 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x7e, .steppings = 0x0020, .platform_mask = 0x80, .driver_data = 0xca },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8a, .steppings = 0x0002, .platform_mask = 0x10, .driver_data = 0x33 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8c, .steppings = 0x0002, .platform_mask = 0x80, .driver_data = 0xbc },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8c, .steppings = 0x0004, .platform_mask = 0xc2, .driver_data = 0x3c },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8d, .steppings = 0x0002, .platform_mask = 0xc2, .driver_data = 0x56 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8e, .steppings = 0x0200, .platform_mask = 0x10, .driver_data = 0xf6 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8e, .steppings = 0x0200, .platform_mask = 0xc0, .driver_data = 0xf6 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8e, .steppings = 0x0400, .platform_mask = 0xc0, .driver_data = 0xf6 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8e, .steppings = 0x0800, .platform_mask = 0xd0, .driver_data = 0xf6 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8e, .steppings = 0x1000, .platform_mask = 0x94, .driver_data = 0x100 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0010, .platform_mask = 0x10, .driver_data = 0x2c0003f7 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0010, .platform_mask = 0x87, .driver_data = 0x2b000639 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0020, .platform_mask = 0x10, .driver_data = 0x2c0003f7 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0020, .platform_mask = 0x87, .driver_data = 0x2b000639 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0040, .platform_mask = 0x10, .driver_data = 0x2c0003f7 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0040, .platform_mask = 0x87, .driver_data = 0x2b000639 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0080, .platform_mask = 0x87, .driver_data = 0x2b000639 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0100, .platform_mask = 0x10, .driver_data = 0x2c0003f7 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0100, .platform_mask = 0x87, .driver_data = 0x2b000639 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x96, .steppings = 0x0002, .platform_mask = 0x01, .driver_data = 0x1a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x97, .steppings = 0x0004, .platform_mask = 0x07, .driver_data = 0x3a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x97, .steppings = 0x0020, .platform_mask = 0x07, .driver_data = 0x3a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9a, .steppings = 0x0008, .platform_mask = 0x80, .driver_data = 0x437 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9a, .steppings = 0x0010, .platform_mask = 0x40, .driver_data = 0xa },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9a, .steppings = 0x0010, .platform_mask = 0x80, .driver_data = 0x437 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9c, .steppings = 0x0001, .platform_mask = 0x01, .driver_data = 0x24000026 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x0200, .platform_mask = 0x2a, .driver_data = 0xf8 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x0400, .platform_mask = 0x22, .driver_data = 0xfa },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x0800, .platform_mask = 0x02, .driver_data = 0xf6 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x1000, .platform_mask = 0x22, .driver_data = 0xf8 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x2000, .platform_mask = 0x22, .driver_data = 0x104 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa5, .steppings = 0x0004, .platform_mask = 0x20, .driver_data = 0x100 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa5, .steppings = 0x0008, .platform_mask = 0x22, .driver_data = 0x100 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa5, .steppings = 0x0020, .platform_mask = 0x22, .driver_data = 0x100 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa6, .steppings = 0x0001, .platform_mask = 0x80, .driver_data = 0x102 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa6, .steppings = 0x0002, .platform_mask = 0x80, .driver_data = 0x100 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa7, .steppings = 0x0002, .platform_mask = 0x02, .driver_data = 0x64 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xaa, .steppings = 0x0010, .platform_mask = 0xe6, .driver_data = 0x24 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xad, .steppings = 0x0002, .platform_mask = 0x20, .driver_data = 0xa0000d1 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xad, .steppings = 0x0002, .platform_mask = 0x95, .driver_data = 0x10003a2 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xaf, .steppings = 0x0008, .platform_mask = 0x01, .driver_data = 0x3000341 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xb5, .steppings = 0x0001, .platform_mask = 0x80, .driver_data = 0xa },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xb7, .steppings = 0x0002, .platform_mask = 0x32, .driver_data = 0x12f },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xb7, .steppings = 0x0010, .platform_mask = 0x32, .driver_data = 0x12f },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xba, .steppings = 0x0004, .platform_mask = 0xe0, .driver_data = 0x4128 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xba, .steppings = 0x0008, .platform_mask = 0xe0, .driver_data = 0x4128 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xba, .steppings = 0x0100, .platform_mask = 0xe0, .driver_data = 0x4128 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbd, .steppings = 0x0002, .platform_mask = 0x80, .driver_data = 0x11f },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbe, .steppings = 0x0001, .platform_mask = 0x19, .driver_data = 0x1d },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbf, .steppings = 0x0004, .platform_mask = 0x07, .driver_data = 0x3a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbf, .steppings = 0x0020, .platform_mask = 0x07, .driver_data = 0x3a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbf, .steppings = 0x0040, .platform_mask = 0x07, .driver_data = 0x3a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbf, .steppings = 0x0080, .platform_mask = 0x07, .driver_data = 0x3a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xc5, .steppings = 0x0004, .platform_mask = 0x82, .driver_data = 0x118 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xc6, .steppings = 0x0004, .platform_mask = 0x82, .driver_data = 0x118 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xc6, .steppings = 0x0010, .platform_mask = 0x82, .driver_data = 0x118 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xca, .steppings = 0x0004, .platform_mask = 0x82, .driver_data = 0x118 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xcf, .steppings = 0x0002, .platform_mask = 0x87, .driver_data = 0x210002a9 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xcf, .steppings = 0x0004, .platform_mask = 0x87, .driver_data = 0x210002a9 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x00, .steppings = 0x0080, .platform_mask = 0x01, .driver_data = 0x12 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x00, .steppings = 0x0080, .platform_mask = 0x02, .driver_data = 0x8 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x00, .steppings = 0x0400, .platform_mask = 0x01, .driver_data = 0x13 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x00, .steppings = 0x0400, .platform_mask = 0x02, .driver_data = 0x15 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x00, .steppings = 0x0400, .platform_mask = 0x04, .driver_data = 0x14 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x01, .steppings = 0x0004, .platform_mask = 0x04, .driver_data = 0x2e },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0010, .platform_mask = 0x02, .driver_data = 0x1f },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0010, .platform_mask = 0x04, .driver_data = 0x1e },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0010, .platform_mask = 0x10, .driver_data = 0x21 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0020, .platform_mask = 0x01, .driver_data = 0x29 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0020, .platform_mask = 0x02, .driver_data = 0x2a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0020, .platform_mask = 0x04, .driver_data = 0x2b },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0020, .platform_mask = 0x10, .driver_data = 0x2c },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0040, .platform_mask = 0x02, .driver_data = 0x10 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0080, .platform_mask = 0x02, .driver_data = 0x38 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0080, .platform_mask = 0x04, .driver_data = 0x37 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0080, .platform_mask = 0x08, .driver_data = 0x39 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0200, .platform_mask = 0x02, .driver_data = 0x2d },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0200, .platform_mask = 0x04, .driver_data = 0x2e },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0200, .platform_mask = 0x08, .driver_data = 0x2f },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x03, .steppings = 0x0004, .platform_mask = 0x0d, .driver_data = 0xa },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x03, .steppings = 0x0008, .platform_mask = 0x0d, .driver_data = 0xc },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x03, .steppings = 0x0010, .platform_mask = 0x1d, .driver_data = 0x17 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0002, .platform_mask = 0x02, .driver_data = 0x16 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0002, .platform_mask = 0xbd, .driver_data = 0x17 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0008, .platform_mask = 0x9d, .driver_data = 0x5 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0010, .platform_mask = 0x9d, .driver_data = 0x6 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0080, .platform_mask = 0x9d, .driver_data = 0x3 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0100, .platform_mask = 0x01, .driver_data = 0xc },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0100, .platform_mask = 0x02, .driver_data = 0xe },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0100, .platform_mask = 0x5f, .driver_data = 0x7 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0200, .platform_mask = 0xbd, .driver_data = 0x3 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0400, .platform_mask = 0x5c, .driver_data = 0x4 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0400, .platform_mask = 0x5d, .driver_data = 0x2 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x06, .steppings = 0x0004, .platform_mask = 0x04, .driver_data = 0xf },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x06, .steppings = 0x0010, .platform_mask = 0x01, .driver_data = 0x2 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x06, .steppings = 0x0010, .platform_mask = 0x34, .driver_data = 0x4 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x06, .steppings = 0x0020, .platform_mask = 0x01, .driver_data = 0x8 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x06, .steppings = 0x0100, .platform_mask = 0x22, .driver_data = 0x9 },
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 8744f3adc2a0..37ac4afe0972 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -120,19 +120,44 @@ static inline unsigned int exttable_size(struct extended_sigtable *et)
return et->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE;
}
+
+/*
+ * Use CPUID to generate a "vfm" value. Useful before cpuinfo_x86
+ * structures are populated.
+ */
+static u32 intel_cpuid_vfm(void)
+{
+ u32 eax = cpuid_eax(1);
+ u32 fam = x86_family(eax);
+ u32 model = x86_model(eax);
+
+ return IFM(fam, model);
+}
+
+u32 intel_get_platform_id(void)
+{
+ unsigned int val[2];
+
+ /*
+ * This can be called early. Use CPUID directly instead of
+ * relying on cpuinfo_x86 which may not be fully initialized.
+ * The PII does not have MSR_IA32_PLATFORM_ID. Everything
+ * before _it_ has no microcode (for Linux at least).
+ */
+ if (intel_cpuid_vfm() <= INTEL_PENTIUM_II_KLAMATH)
+ return 0;
+
+ /* get processor flags from MSR 0x17 */
+ native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
+
+ return (val[1] >> 18) & 7;
+}
+
void intel_collect_cpu_info(struct cpu_signature *sig)
{
sig->sig = cpuid_eax(1);
- sig->pf = 0;
sig->rev = intel_get_microcode_revision();
-
- if (IFM(x86_family(sig->sig), x86_model(sig->sig)) >= INTEL_PENTIUM_III_DESCHUTES) {
- unsigned int val[2];
-
- /* get processor flags from MSR 0x17 */
- native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
- sig->pf = 1 << ((val[1] >> 18) & 7);
- }
+ sig->pf = 1 << intel_get_platform_id();
}
EXPORT_SYMBOL_GPL(intel_collect_cpu_info);
@@ -142,8 +167,15 @@ static inline bool cpu_signatures_match(struct cpu_signature *s1, unsigned int s
if (s1->sig != sig2)
return false;
- /* Processor flags are either both 0 or they intersect. */
- return ((!s1->pf && !pf2) || (s1->pf & pf2));
+ /*
+ * Consider an empty mask to match everything. This
+ * should only occur for one CPU model, the PII.
+ */
+ if (!pf2)
+ return true;
+
+ /* Is the CPU's platform ID in the signature mask? */
+ return s1->pf & pf2;
}
bool intel_find_matching_signature(void *mc, struct cpu_signature *sig)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 89a2eb8a0722..a7dfc29d3470 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -161,6 +161,8 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
if (vmbus_handler)
vmbus_handler();
+ add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR);
+
if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)
apic_eoi();
@@ -496,8 +498,9 @@ static void hv_reserve_irq_vectors(void)
test_and_set_bit(HYPERV_DBG_FASTFAIL_VECTOR, system_vectors))
BUG();
- pr_info("Hyper-V: reserve vectors: %d %d %d\n", HYPERV_DBG_ASSERT_VECTOR,
- HYPERV_DBG_SERVICE_VECTOR, HYPERV_DBG_FASTFAIL_VECTOR);
+ pr_info("Hyper-V: reserve vectors: 0x%x 0x%x 0x%x\n",
+ HYPERV_DBG_ASSERT_VECTOR, HYPERV_DBG_SERVICE_VECTOR,
+ HYPERV_DBG_FASTFAIL_VECTOR);
}
static void __init ms_hyperv_init_platform(void)
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 763534d77f59..e3eee9ae4141 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -437,9 +437,10 @@ static unsigned long nr_mtrr_spare_reg __initdata =
static int __init parse_mtrr_spare_reg(char *arg)
{
- if (arg)
- nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
- return 0;
+ if (!arg)
+ return -EINVAL;
+
+ return kstrtoul(arg, 0, &nr_mtrr_spare_reg);
}
early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index e6a154240b8d..9bd87bae4983 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -364,7 +364,7 @@ void arch_mon_domain_online(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
msr_clear_bit(MSR_RMID_SNC_CONFIG, 0);
}
-/* CPU models that support MSR_RMID_SNC_CONFIG */
+/* CPU models that support SNC and MSR_RMID_SNC_CONFIG */
static const struct x86_cpu_id snc_cpu_ids[] __initconst = {
X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, 0),
@@ -375,40 +375,14 @@ static const struct x86_cpu_id snc_cpu_ids[] __initconst = {
{}
};
-/*
- * There isn't a simple hardware bit that indicates whether a CPU is running
- * in Sub-NUMA Cluster (SNC) mode. Infer the state by comparing the
- * number of CPUs sharing the L3 cache with CPU0 to the number of CPUs in
- * the same NUMA node as CPU0.
- * It is not possible to accurately determine SNC state if the system is
- * booted with a maxcpus=N parameter. That distorts the ratio of SNC nodes
- * to L3 caches. It will be OK if system is booted with hyperthreading
- * disabled (since this doesn't affect the ratio).
- */
static __init int snc_get_config(void)
{
- struct cacheinfo *ci = get_cpu_cacheinfo_level(0, RESCTRL_L3_CACHE);
- const cpumask_t *node0_cpumask;
- int cpus_per_node, cpus_per_l3;
- int ret;
-
- if (!x86_match_cpu(snc_cpu_ids) || !ci)
- return 1;
+ int ret = topology_num_nodes_per_package();
- cpus_read_lock();
- if (num_online_cpus() != num_present_cpus())
- pr_warn("Some CPUs offline, SNC detection may be incorrect\n");
- cpus_read_unlock();
-
- node0_cpumask = cpumask_of_node(cpu_to_node(0));
-
- cpus_per_node = cpumask_weight(node0_cpumask);
- cpus_per_l3 = cpumask_weight(&ci->shared_cpu_map);
-
- if (!cpus_per_node || !cpus_per_l3)
+ if (ret > 1 && !x86_match_cpu(snc_cpu_ids)) {
+ pr_warn("CoD enabled system? Resctrl not supported\n");
return 1;
-
- ret = cpus_per_l3 / cpus_per_node;
+ }
/* sanity check: Only valid results are 1, 2, 3, 4, 6 */
switch (ret) {
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 42c7eac0c387..837d6a4b0c28 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -52,6 +52,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
{ X86_FEATURE_AMD_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 },
+ { X86_FEATURE_CPPC_PERF_PRIO, CPUID_EDX, 16, 0x80000007, 0 },
{ X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
{ X86_FEATURE_X2AVIC_EXT, CPUID_ECX, 6, 0x8000000a, 0 },
{ X86_FEATURE_COHERENCY_SFW_NO, CPUID_EBX, 31, 0x8000001f, 0 },
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index ac60ebde5d9b..3f0222d10f6e 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -1220,7 +1220,7 @@ void sgx_zap_enclave_ptes(struct sgx_encl *encl, unsigned long addr)
ret = sgx_encl_find(encl_mm->mm, addr, &vma);
if (!ret && encl == vma->vm_private_data)
- zap_vma_ptes(vma, addr, PAGE_SIZE);
+ zap_special_vma_range(vma, addr, PAGE_SIZE);
mmap_read_unlock(encl_mm->mm);
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 23190a786d31..4913b64ec592 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -31,6 +31,7 @@
#include <asm/mpspec.h>
#include <asm/msr.h>
#include <asm/smp.h>
+#include <asm/numa.h>
#include "cpu.h"
@@ -203,15 +204,11 @@ fwbug:
static unsigned int topo_unit_count(u32 lvlid, enum x86_topology_domains at_level,
unsigned long *map)
{
- unsigned int id, end, cnt = 0;
+ unsigned int end;
/* Calculate the exclusive end */
end = lvlid + (1U << x86_topo_system.dom_shifts[at_level]);
-
- /* Unfortunately there is no bitmap_weight_range() */
- for (id = find_next_bit(map, end, lvlid); id < end; id = find_next_bit(map, end, ++id))
- cnt++;
- return cnt;
+ return bitmap_weight_from(map, lvlid, end);
}
static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present)
@@ -492,11 +489,19 @@ void __init topology_init_possible_cpus(void)
set_nr_cpu_ids(allowed);
cnta = domain_weight(TOPO_PKG_DOMAIN);
- cntb = domain_weight(TOPO_DIE_DOMAIN);
__max_logical_packages = cnta;
+
+ pr_info("Max. logical packages: %3u\n", __max_logical_packages);
+
+ cntb = num_phys_nodes();
+ __num_nodes_per_package = DIV_ROUND_UP(cntb, cnta);
+
+ pr_info("Max. logical nodes: %3u\n", cntb);
+ pr_info("Num. nodes per package:%3u\n", __num_nodes_per_package);
+
+ cntb = domain_weight(TOPO_DIE_DOMAIN);
__max_dies_per_package = 1U << (get_count_order(cntb) - get_count_order(cnta));
- pr_info("Max. logical packages: %3u\n", cnta);
pr_info("Max. logical dies: %3u\n", cntb);
pr_info("Max. dies per package: %3u\n", __max_dies_per_package);
diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c
index 71625795d711..d0d79d5b8eb9 100644
--- a/arch/x86/kernel/cpu/topology_common.c
+++ b/arch/x86/kernel/cpu/topology_common.c
@@ -157,8 +157,8 @@ static void parse_topology(struct topo_scan *tscan, bool early)
switch (c->x86_vendor) {
case X86_VENDOR_AMD:
- if (IS_ENABLED(CONFIG_CPU_SUP_AMD))
- cpu_parse_topology_amd(tscan);
+ case X86_VENDOR_HYGON:
+ cpu_parse_topology_amd(tscan);
break;
case X86_VENDOR_CENTAUR:
case X86_VENDOR_ZHAOXIN:
@@ -170,10 +170,6 @@ static void parse_topology(struct topo_scan *tscan, bool early)
if (c->cpuid_level >= 0x1a)
c->topo.cpu_type = cpuid_eax(0x1a);
break;
- case X86_VENDOR_HYGON:
- if (IS_ENABLED(CONFIG_CPU_SUP_HYGON))
- cpu_parse_topology_amd(tscan);
- break;
}
}
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index a3e6936839b1..eee0d1a48802 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -339,7 +339,7 @@ arch_initcall(activate_jump_labels);
static void __init vmware_paravirt_ops_setup(void)
{
pv_info.name = "VMware hypervisor";
- pv_ops.cpu.io_delay = paravirt_nop;
+ pv_info.io_delay = false;
if (vmware_tsc_khz == 0)
return;
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 335fd2ee9766..cd796818d94d 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -42,6 +42,7 @@
#include <asm/crash.h>
#include <asm/cmdline.h>
#include <asm/sev.h>
+#include <asm/virt.h>
/* Used while preparing memory map entries for second kernel */
struct crash_memmap_data {
@@ -111,7 +112,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
crash_smp_send_stop();
- cpu_emergency_disable_virtualization();
+ x86_virt_emergency_disable_virtualization_cpu();
/*
* Disable Intel PT to stop its logging
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 76153dfb58c9..a7b6524a9dea 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1080,8 +1080,7 @@ void __user *get_xsave_addr_user(struct xregs_state __user *xsave, int xfeature_
* This will go out and modify PKRU register to set the access
* rights for @pkey to @init_val.
*/
-int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
- unsigned long init_val)
+int arch_set_user_pkey_access(int pkey, unsigned long init_val)
{
u32 old_pkru, new_pkru_bits = 0;
int pkey_shift;
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 52ce19289989..38a2862f09d3 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -267,16 +267,16 @@ static inline void os_xrstor_supervisor(struct fpstate *fpstate)
*/
static inline u64 xfeatures_need_sigframe_write(void)
{
- u64 xfeaures_to_write;
+ u64 xfeatures_to_write;
/* In-use features must be written: */
- xfeaures_to_write = xfeatures_in_use();
+ xfeatures_to_write = xfeatures_in_use();
/* Also write all non-optimizable sigframe features: */
- xfeaures_to_write |= XFEATURE_MASK_USER_SUPPORTED &
+ xfeatures_to_write |= XFEATURE_MASK_USER_SUPPORTED &
~XFEATURE_MASK_SIGFRAME_INITOPT;
- return xfeaures_to_write;
+ return xfeatures_to_write;
}
/*
diff --git a/arch/x86/kernel/fred.c b/arch/x86/kernel/fred.c
index e736b19e18de..117aa06d25ca 100644
--- a/arch/x86/kernel/fred.c
+++ b/arch/x86/kernel/fred.c
@@ -27,9 +27,6 @@ EXPORT_PER_CPU_SYMBOL(fred_rsp0);
void cpu_init_fred_exceptions(void)
{
- /* When FRED is enabled by default, remove this log message */
- pr_info("Initialize FRED on CPU%d\n", smp_processor_id());
-
/*
* If a kernel event is delivered before a CPU goes to user level for
* the first time, its SS is NULL thus NULL is pushed into the SS field
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 80ef5d386b03..5171cb746444 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -441,10 +441,6 @@ initial_pg_fixmap:
swapper_pg_dir:
.fill 1024,4,0
.fill PTI_USER_PGD_FILL,4,0
-.globl empty_zero_page
-empty_zero_page:
- .fill 4096,1,0
-EXPORT_SYMBOL(empty_zero_page)
/*
* This starts the data section.
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 21816b48537c..7ed5520dd52e 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -616,38 +616,10 @@ SYM_DATA(early_recursion_flag, .long 0)
.data
-#if defined(CONFIG_XEN_PV) || defined(CONFIG_PVH)
-SYM_DATA_START_PTI_ALIGNED(init_top_pgt)
- .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
- .org init_top_pgt + L4_PAGE_OFFSET*8, 0
- .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
- .org init_top_pgt + L4_START_KERNEL*8, 0
- /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
- .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
- .fill PTI_USER_PGD_FILL,8,0
-SYM_DATA_END(init_top_pgt)
-
-SYM_DATA_START_PAGE_ALIGNED(level3_ident_pgt)
- .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
- .fill 511, 8, 0
-SYM_DATA_END(level3_ident_pgt)
-SYM_DATA_START_PAGE_ALIGNED(level2_ident_pgt)
- /*
- * Since I easily can, map the first 1G.
- * Don't set NX because code runs from these pages.
- *
- * Note: This sets _PAGE_GLOBAL despite whether
- * the CPU supports it or it is enabled. But,
- * the CPU should ignore the bit.
- */
- PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
-SYM_DATA_END(level2_ident_pgt)
-#else
SYM_DATA_START_PTI_ALIGNED(init_top_pgt)
.fill 512,8,0
.fill PTI_USER_PGD_FILL,8,0
SYM_DATA_END(init_top_pgt)
-#endif
SYM_DATA_START_PAGE_ALIGNED(level4_kernel_pgt)
.fill 511,8,0
@@ -712,10 +684,3 @@ SYM_PIC_ALIAS(phys_base);
EXPORT_SYMBOL(phys_base)
#include "../xen/xen-head.S"
-
- __PAGE_ALIGNED_BSS
-SYM_DATA_START_PAGE_ALIGNED(empty_zero_page)
- .skip PAGE_SIZE
-SYM_DATA_END(empty_zero_page)
-EXPORT_SYMBOL(empty_zero_page)
-
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 610590e83445..8dc7b710e125 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -854,7 +854,7 @@ static struct clocksource clocksource_hpet = {
.rating = 250,
.read = read_hpet,
.mask = HPET_MASK,
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_CALIBRATED,
.resume = hpet_resume_counter,
};
@@ -1082,8 +1082,6 @@ int __init hpet_enable(void)
if (!hpet_counting())
goto out_nohpet;
- if (tsc_clocksource_watchdog_disabled())
- clocksource_hpet.flags |= CLOCK_SOURCE_MUST_VERIFY;
clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);
if (id & HPET_ID_LEGSUP) {
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 5630c7dca1f3..7e980ea49d8d 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -525,12 +525,8 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
if (ret)
return ERR_PTR(ret);
ret = crash_load_dm_crypt_keys(image);
- if (ret == -ENOENT) {
- kexec_dprintk("No dm crypt key to load\n");
- } else if (ret) {
- pr_err("Failed to load dm crypt keys\n");
+ if (ret)
return ERR_PTR(ret);
- }
if (image->dm_crypt_keys_addr &&
cmdline_len + MAX_ELFCOREHDR_STR_LEN + MAX_DMCRYPTKEYS_STR_LEN >
header->cmdline_size) {
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 3bc062363814..29226d112029 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -75,12 +75,6 @@ DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visi
static int has_steal_clock = 0;
static int has_guest_poll = 0;
-/*
- * No need for any "IO delay" on KVM
- */
-static void kvm_io_delay(void)
-{
-}
#define KVM_TASK_SLEEP_HASHBITS 8
#define KVM_TASK_SLEEP_HASHSIZE (1<<KVM_TASK_SLEEP_HASHBITS)
@@ -327,7 +321,7 @@ static void __init paravirt_ops_setup(void)
pv_info.name = "KVM";
if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
- pv_ops.cpu.io_delay = kvm_io_delay;
+ pv_info.io_delay = false;
#ifdef CONFIG_X86_IO_APIC
no_timer_check = 1;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index a6ed52cae003..792fa96b3233 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -94,6 +94,7 @@ struct pv_info pv_info = {
#ifdef CONFIG_PARAVIRT_XXL
.extra_user_64bit_cs = __USER_CS,
#endif
+ .io_delay = true,
};
/* 64-bit pagetable entries */
@@ -101,8 +102,6 @@ struct pv_info pv_info = {
struct paravirt_patch_template pv_ops = {
/* Cpu ops. */
- .cpu.io_delay = native_io_delay,
-
#ifdef CONFIG_PARAVIRT_XXL
.cpu.cpuid = native_cpuid,
.cpu.get_debugreg = pv_native_get_debugreg,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 3ef15c2f152f..168dabf9853f 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -61,7 +61,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
unsigned long d0, d1, d2, d3, d6, d7;
- unsigned short gs;
+ unsigned int gs;
savesegment(gs, gs);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 08e72f429870..b85e715ebb30 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -104,10 +104,10 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
return;
}
- asm("movl %%ds,%0" : "=r" (ds));
- asm("movl %%es,%0" : "=r" (es));
- asm("movl %%fs,%0" : "=r" (fsindex));
- asm("movl %%gs,%0" : "=r" (gsindex));
+ savesegment(ds, ds);
+ savesegment(es, es);
+ savesegment(fs, fsindex);
+ savesegment(gs, gsindex);
rdmsrq(MSR_FS_BASE, fs);
rdmsrq(MSR_GS_BASE, gs);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 6032fa9ec753..0fed6d0d7e32 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -27,6 +27,7 @@
#include <asm/cpu.h>
#include <asm/nmi.h>
#include <asm/smp.h>
+#include <asm/virt.h>
#include <linux/ctype.h>
#include <linux/mc146818rtc.h>
@@ -532,51 +533,6 @@ static inline void kb_wait(void)
static inline void nmi_shootdown_cpus_on_restart(void);
#if IS_ENABLED(CONFIG_KVM_X86)
-/* RCU-protected callback to disable virtualization prior to reboot. */
-static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback;
-
-void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback)
-{
- if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback)))
- return;
-
- rcu_assign_pointer(cpu_emergency_virt_callback, callback);
-}
-EXPORT_SYMBOL_FOR_KVM(cpu_emergency_register_virt_callback);
-
-void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback)
-{
- if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback) != callback))
- return;
-
- rcu_assign_pointer(cpu_emergency_virt_callback, NULL);
- synchronize_rcu();
-}
-EXPORT_SYMBOL_FOR_KVM(cpu_emergency_unregister_virt_callback);
-
-/*
- * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
- * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
- * GIF=0, i.e. if the crash occurred between CLGI and STGI.
- */
-void cpu_emergency_disable_virtualization(void)
-{
- cpu_emergency_virt_cb *callback;
-
- /*
- * IRQs must be disabled as KVM enables virtualization in hardware via
- * function call IPIs, i.e. IRQs need to be disabled to guarantee
- * virtualization stays disabled.
- */
- lockdep_assert_irqs_disabled();
-
- rcu_read_lock();
- callback = rcu_dereference(cpu_emergency_virt_callback);
- if (callback)
- callback();
- rcu_read_unlock();
-}
-
static void emergency_reboot_disable_virtualization(void)
{
local_irq_disable();
@@ -588,16 +544,11 @@ static void emergency_reboot_disable_virtualization(void)
* We can't take any locks and we may be on an inconsistent state, so
* use NMIs as IPIs to tell the other CPUs to disable VMX/SVM and halt.
*
- * Do the NMI shootdown even if virtualization is off on _this_ CPU, as
- * other CPUs may have virtualization enabled.
+ * Safely force _this_ CPU out of VMX/SVM operation, and if necessary,
+ * blast NMIs to force other CPUs out of VMX/SVM as well.k
*/
- if (rcu_access_pointer(cpu_emergency_virt_callback)) {
- /* Safely force _this_ CPU out of VMX/SVM operation. */
- cpu_emergency_disable_virtualization();
-
- /* Disable VMX/SVM and halt on other CPUs. */
+ if (!x86_virt_emergency_disable_virtualization_cpu())
nmi_shootdown_cpus_on_restart();
- }
}
#else
static void emergency_reboot_disable_virtualization(void) { }
@@ -773,12 +724,15 @@ static void __machine_emergency_restart(int emergency)
machine_ops.emergency_restart();
}
-static void native_machine_restart(char *__unused)
+static void native_machine_restart(char *command)
{
pr_notice("machine restart\n");
if (!reboot_force)
machine_shutdown();
+
+ do_kernel_restart(command);
+
__machine_emergency_restart(0);
}
@@ -875,10 +829,10 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
shootdown_callback(cpu, regs);
/*
- * Prepare the CPU for reboot _after_ invoking the callback so that the
- * callback can safely use virtualization instructions, e.g. VMCLEAR.
+ * Disable virtualization, as both VMX and SVM can block INIT and thus
+ * prevent AP bringup, e.g. in a kdump kernel or in firmware.
*/
- cpu_emergency_disable_virtualization();
+ x86_virt_emergency_disable_virtualization_cpu();
atomic_dec(&waiting_for_crash_ipi);
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 51a849a79c98..314b062a15de 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -2,10 +2,10 @@
/*
* RTC related functions
*/
+#include <linux/acpi.h>
#include <linux/platform_device.h>
#include <linux/mc146818rtc.h>
#include <linux/export.h>
-#include <linux/pnp.h>
#include <asm/vsyscall.h>
#include <asm/x86_init.h>
@@ -133,25 +133,14 @@ static struct platform_device rtc_device = {
static __init int add_rtc_cmos(void)
{
-#ifdef CONFIG_PNP
- static const char * const ids[] __initconst =
- { "PNP0b00", "PNP0b01", "PNP0b02", };
- struct pnp_dev *dev;
- int i;
-
- pnp_for_each_dev(dev) {
- for (i = 0; i < ARRAY_SIZE(ids); i++) {
- if (compare_pnp_id(dev->id, ids[i]) != 0)
- return 0;
- }
- }
-#endif
+ if (cmos_rtc_platform_device_present)
+ return 0;
+
if (!x86_platform.legacy.rtc)
return -ENODEV;
platform_device_register(&rtc_device);
- dev_info(&rtc_device.dev,
- "registered platform RTC device (no PNP device found)\n");
+ dev_info(&rtc_device.dev, "registered fallback platform RTC device\n");
return 0;
}
diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c
index 978232b6d48d..0962ae4c3017 100644
--- a/arch/x86/kernel/shstk.c
+++ b/arch/x86/kernel/shstk.c
@@ -100,17 +100,9 @@ static int create_rstor_token(unsigned long ssp, unsigned long *token_addr)
static unsigned long alloc_shstk(unsigned long addr, unsigned long size,
unsigned long token_offset, bool set_res_tok)
{
- int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_ABOVE4G;
- struct mm_struct *mm = current->mm;
- unsigned long mapped_addr, unused;
+ unsigned long mapped_addr;
- if (addr)
- flags |= MAP_FIXED_NOREPLACE;
-
- mmap_write_lock(mm);
- mapped_addr = do_mmap(NULL, addr, size, PROT_READ, flags,
- VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL);
- mmap_write_unlock(mm);
+ mapped_addr = vm_mmap_shadow_stack(addr, size, MAP_ABOVE4G);
if (!set_res_tok || IS_ERR_VALUE(mapped_addr))
goto out;
@@ -351,7 +343,8 @@ static int shstk_pop_sigframe(unsigned long *ssp)
need_to_check_vma = PAGE_ALIGN(*ssp) == *ssp;
if (need_to_check_vma)
- mmap_read_lock_killable(current->mm);
+ if (mmap_read_lock_killable(current->mm))
+ return -EINTR;
err = get_shstk_data(&token_addr, (unsigned long __user *)*ssp);
if (unlikely(err))
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index b014e6d229f9..cbf95fe2b207 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -35,6 +35,7 @@
#include <asm/trace/irq_vectors.h>
#include <asm/kexec.h>
#include <asm/reboot.h>
+#include <asm/virt.h>
/*
* Some notes on x86 processor bugs affecting SMP operation:
@@ -124,7 +125,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
if (raw_smp_processor_id() == atomic_read(&stopping_cpu))
return NMI_HANDLED;
- cpu_emergency_disable_virtualization();
+ x86_virt_emergency_disable_virtualization_cpu();
stop_this_cpu(NULL);
return NMI_HANDLED;
@@ -136,7 +137,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
DEFINE_IDTENTRY_SYSVEC(sysvec_reboot)
{
apic_eoi();
- cpu_emergency_disable_virtualization();
+ x86_virt_emergency_disable_virtualization_cpu();
stop_this_cpu(NULL);
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 5cd6950ab672..294a8ea60298 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -468,13 +468,6 @@ static int x86_cluster_flags(void)
}
#endif
-/*
- * Set if a package/die has multiple NUMA nodes inside.
- * AMD Magny-Cours, Intel Cluster-on-Die, and Intel
- * Sub-NUMA Clustering have this.
- */
-static bool x86_has_numa_in_package;
-
static struct sched_domain_topology_level x86_topology[] = {
SDTL_INIT(tl_smt_mask, cpu_smt_flags, SMT),
#ifdef CONFIG_SCHED_CLUSTER
@@ -496,7 +489,7 @@ static void __init build_sched_topology(void)
* PKG domain since the NUMA domains will auto-magically create the
* right spanning domains based on the SLIT.
*/
- if (x86_has_numa_in_package) {
+ if (topology_num_nodes_per_package() > 1) {
unsigned int pkgdom = ARRAY_SIZE(x86_topology) - 2;
memset(&x86_topology[pkgdom], 0, sizeof(x86_topology[pkgdom]));
@@ -513,33 +506,149 @@ static void __init build_sched_topology(void)
}
#ifdef CONFIG_NUMA
-static int sched_avg_remote_distance;
-static int avg_remote_numa_distance(void)
+/*
+ * Test if the on-trace cluster at (N,N) is symmetric.
+ * Uses upper triangle iteration to avoid obvious duplicates.
+ */
+static bool slit_cluster_symmetric(int N)
{
- int i, j;
- int distance, nr_remote, total_distance;
-
- if (sched_avg_remote_distance > 0)
- return sched_avg_remote_distance;
-
- nr_remote = 0;
- total_distance = 0;
- for_each_node_state(i, N_CPU) {
- for_each_node_state(j, N_CPU) {
- distance = node_distance(i, j);
-
- if (distance >= REMOTE_DISTANCE) {
- nr_remote++;
- total_distance += distance;
- }
+ int u = topology_num_nodes_per_package();
+
+ for (int k = 0; k < u; k++) {
+ for (int l = k; l < u; l++) {
+ if (node_distance(N + k, N + l) !=
+ node_distance(N + l, N + k))
+ return false;
}
}
- if (nr_remote)
- sched_avg_remote_distance = total_distance / nr_remote;
- else
- sched_avg_remote_distance = REMOTE_DISTANCE;
- return sched_avg_remote_distance;
+ return true;
+}
+
+/*
+ * Return the package-id of the cluster, or ~0 if indeterminate.
+ * Each node in the on-trace cluster should have the same package-id.
+ */
+static u32 slit_cluster_package(int N)
+{
+ int u = topology_num_nodes_per_package();
+ u32 pkg_id = ~0;
+
+ for (int n = 0; n < u; n++) {
+ const struct cpumask *cpus = cpumask_of_node(N + n);
+ int cpu;
+
+ for_each_cpu(cpu, cpus) {
+ u32 id = topology_logical_package_id(cpu);
+
+ if (pkg_id == ~0)
+ pkg_id = id;
+ if (pkg_id != id)
+ return ~0;
+ }
+ }
+
+ return pkg_id;
+}
+
+/*
+ * Validate the SLIT table is of the form expected for SNC, specifically:
+ *
+ * - each on-trace cluster should be symmetric,
+ * - each on-trace cluster should have a unique package-id.
+ *
+ * If you NUMA_EMU on top of SNC, you get to keep the pieces.
+ */
+static bool slit_validate(void)
+{
+ int u = topology_num_nodes_per_package();
+ u32 pkg_id, prev_pkg_id = ~0;
+
+ for (int pkg = 0; pkg < topology_max_packages(); pkg++) {
+ int n = pkg * u;
+
+ /*
+ * Ensure the on-trace cluster is symmetric and each cluster
+ * has a different package id.
+ */
+ if (!slit_cluster_symmetric(n))
+ return false;
+ pkg_id = slit_cluster_package(n);
+ if (pkg_id == ~0)
+ return false;
+ if (pkg && pkg_id == prev_pkg_id)
+ return false;
+
+ prev_pkg_id = pkg_id;
+ }
+
+ return true;
+}
+
+/*
+ * Compute a sanitized SLIT table for SNC; notably SNC-3 can end up with
+ * asymmetric off-trace clusters, reflecting physical assymmetries. However
+ * this leads to 'unfortunate' sched_domain configurations.
+ *
+ * For example dual socket GNR with SNC-3:
+ *
+ * node distances:
+ * node 0 1 2 3 4 5
+ * 0: 10 15 17 21 28 26
+ * 1: 15 10 15 23 26 23
+ * 2: 17 15 10 26 23 21
+ * 3: 21 28 26 10 15 17
+ * 4: 23 26 23 15 10 15
+ * 5: 26 23 21 17 15 10
+ *
+ * Fix things up by averaging out the off-trace clusters; resulting in:
+ *
+ * node 0 1 2 3 4 5
+ * 0: 10 15 17 24 24 24
+ * 1: 15 10 15 24 24 24
+ * 2: 17 15 10 24 24 24
+ * 3: 24 24 24 10 15 17
+ * 4: 24 24 24 15 10 15
+ * 5: 24 24 24 17 15 10
+ */
+static int slit_cluster_distance(int i, int j)
+{
+ static int slit_valid = -1;
+ int u = topology_num_nodes_per_package();
+ long d = 0;
+ int x, y;
+
+ if (slit_valid < 0) {
+ slit_valid = slit_validate();
+ if (!slit_valid)
+ pr_err(FW_BUG "SLIT table doesn't have the expected form for SNC -- fixup disabled!\n");
+ else
+ pr_info("Fixing up SNC SLIT table.\n");
+ }
+
+ /*
+ * Is this a unit cluster on the trace?
+ */
+ if ((i / u) == (j / u) || !slit_valid)
+ return node_distance(i, j);
+
+ /*
+ * Off-trace cluster.
+ *
+ * Notably average out the symmetric pair of off-trace clusters to
+ * ensure the resulting SLIT table is symmetric.
+ */
+ x = i - (i % u);
+ y = j - (j % u);
+
+ for (i = x; i < x + u; i++) {
+ for (j = y; j < y + u; j++) {
+ d += node_distance(i, j);
+ d += node_distance(j, i);
+ }
+ }
+
+ return d / (2*u*u);
}
int arch_sched_node_distance(int from, int to)
@@ -549,34 +658,14 @@ int arch_sched_node_distance(int from, int to)
switch (boot_cpu_data.x86_vfm) {
case INTEL_GRANITERAPIDS_X:
case INTEL_ATOM_DARKMONT_X:
-
- if (!x86_has_numa_in_package || topology_max_packages() == 1 ||
- d < REMOTE_DISTANCE)
+ if (topology_max_packages() == 1 ||
+ topology_num_nodes_per_package() < 3)
return d;
/*
- * With SNC enabled, there could be too many levels of remote
- * NUMA node distances, creating NUMA domain levels
- * including local nodes and partial remote nodes.
- *
- * Trim finer distance tuning for NUMA nodes in remote package
- * for the purpose of building sched domains. Group NUMA nodes
- * in the remote package in the same sched group.
- * Simplify NUMA domains and avoid extra NUMA levels including
- * different remote NUMA nodes and local nodes.
- *
- * GNR and CWF don't expect systems with more than 2 packages
- * and more than 2 hops between packages. Single average remote
- * distance won't be appropriate if there are more than 2
- * packages as average distance to different remote packages
- * could be different.
+ * Handle SNC-3 asymmetries.
*/
- WARN_ONCE(topology_max_packages() > 2,
- "sched: Expect only up to 2 packages for GNR or CWF, "
- "but saw %d packages when building sched domains.",
- topology_max_packages());
-
- d = avg_remote_numa_distance();
+ return slit_cluster_distance(from, to);
}
return d;
}
@@ -606,7 +695,7 @@ void set_cpu_sibling_map(int cpu)
o = &cpu_data(i);
if (match_pkg(c, o) && !topology_same_node(c, o))
- x86_has_numa_in_package = true;
+ WARN_ON_ONCE(topology_num_nodes_per_package() == 1);
if ((i == cpu) || (has_smt && match_smt(c, o)))
link_mask(topology_sibling_cpumask, cpu, i);
diff --git a/arch/x86/kernel/sys_ia32.c b/arch/x86/kernel/sys_ia32.c
index 6cf65397d225..610a1c2f4519 100644
--- a/arch/x86/kernel/sys_ia32.c
+++ b/arch/x86/kernel/sys_ia32.c
@@ -61,7 +61,8 @@ SYSCALL_DEFINE3(ia32_truncate64, const char __user *, filename,
SYSCALL_DEFINE3(ia32_ftruncate64, unsigned int, fd,
unsigned long, offset_low, unsigned long, offset_high)
{
- return ksys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low);
+ return ksys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low,
+ FTRUNCATE_LFS);
}
/* warning: next two assume little endian */
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 3ffbab0081f4..86b4186a0d4f 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -117,7 +117,7 @@ int do_set_thread_area(struct task_struct *p, int idx,
int can_allocate)
{
struct user_desc info;
- unsigned short __maybe_unused sel, modified_sel;
+ unsigned short modified_sel;
if (copy_from_user(&info, u_info, sizeof(info)))
return -EFAULT;
@@ -153,6 +153,8 @@ int do_set_thread_area(struct task_struct *p, int idx,
modified_sel = (idx << 3) | 3;
if (p == current) {
+ unsigned short sel;
+
#ifdef CONFIG_X86_64
savesegment(ds, sel);
if (sel == modified_sel)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 4dbff8ef9b1c..0ca3912ecb7f 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -70,6 +70,7 @@
#include <asm/tdx.h>
#include <asm/cfi.h>
#include <asm/msr.h>
+#include <asm/vsyscall.h>
#ifdef CONFIG_X86_64
#include <asm/x86_init.h>
@@ -921,11 +922,6 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
cond_local_irq_enable(regs);
- if (static_cpu_has(X86_FEATURE_UMIP)) {
- if (user_mode(regs) && fixup_umip_exception(regs))
- goto exit;
- }
-
if (v8086_mode(regs)) {
local_irq_enable();
handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
@@ -940,6 +936,12 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
if (fixup_vdso_exception(regs, X86_TRAP_GP, error_code, 0))
goto exit;
+ if (fixup_umip_exception(regs))
+ goto exit;
+
+ if (emulate_vsyscall_gp(regs))
+ goto exit;
+
gp_user_force_sig_segv(regs, X86_TRAP_GP, error_code, desc);
goto exit;
}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index d9aa694e43f3..c5110eb554bc 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -322,12 +322,16 @@ int __init notsc_setup(char *str)
return 1;
}
#endif
-
__setup("notsc", notsc_setup);
+enum {
+ TSC_WATCHDOG_AUTO,
+ TSC_WATCHDOG_OFF,
+ TSC_WATCHDOG_ON,
+};
+
static int no_sched_irq_time;
-static int no_tsc_watchdog;
-static int tsc_as_watchdog;
+static int tsc_watchdog;
static int __init tsc_setup(char *str)
{
@@ -337,25 +341,14 @@ static int __init tsc_setup(char *str)
no_sched_irq_time = 1;
if (!strcmp(str, "unstable"))
mark_tsc_unstable("boot parameter");
- if (!strcmp(str, "nowatchdog")) {
- no_tsc_watchdog = 1;
- if (tsc_as_watchdog)
- pr_alert("%s: Overriding earlier tsc=watchdog with tsc=nowatchdog\n",
- __func__);
- tsc_as_watchdog = 0;
- }
+ if (!strcmp(str, "nowatchdog"))
+ tsc_watchdog = TSC_WATCHDOG_OFF;
if (!strcmp(str, "recalibrate"))
tsc_force_recalibrate = 1;
- if (!strcmp(str, "watchdog")) {
- if (no_tsc_watchdog)
- pr_alert("%s: tsc=watchdog overridden by earlier tsc=nowatchdog\n",
- __func__);
- else
- tsc_as_watchdog = 1;
- }
+ if (!strcmp(str, "watchdog"))
+ tsc_watchdog = TSC_WATCHDOG_ON;
return 1;
}
-
__setup("tsc=", tsc_setup);
#define MAX_RETRIES 5
@@ -1175,7 +1168,6 @@ static int tsc_cs_enable(struct clocksource *cs)
static struct clocksource clocksource_tsc_early = {
.name = "tsc-early",
.rating = 299,
- .uncertainty_margin = 32 * NSEC_PER_MSEC,
.read = read_tsc,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
@@ -1200,9 +1192,9 @@ static struct clocksource clocksource_tsc = {
.read = read_tsc,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
- CLOCK_SOURCE_VALID_FOR_HRES |
+ CLOCK_SOURCE_CAN_INLINE_READ |
CLOCK_SOURCE_MUST_VERIFY |
- CLOCK_SOURCE_VERIFY_PERCPU,
+ CLOCK_SOURCE_HAS_COUPLED_CLOCK_EVENT,
.id = CSID_X86_TSC,
.vdso_clock_mode = VDSO_CLOCKMODE_TSC,
.enable = tsc_cs_enable,
@@ -1230,16 +1222,12 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable);
static void __init tsc_disable_clocksource_watchdog(void)
{
+ if (tsc_watchdog == TSC_WATCHDOG_ON)
+ return;
clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
}
-bool tsc_clocksource_watchdog_disabled(void)
-{
- return !(clocksource_tsc.flags & CLOCK_SOURCE_MUST_VERIFY) &&
- tsc_as_watchdog && !no_tsc_watchdog;
-}
-
static void __init check_system_tsc_reliable(void)
{
#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC)
@@ -1394,6 +1382,8 @@ restart:
(unsigned long)tsc_khz / 1000,
(unsigned long)tsc_khz % 1000);
+ clocksource_tsc.flags |= CLOCK_SOURCE_CALIBRATED;
+
/* Inform the TSC deadline clockevent devices about the recalibration */
lapic_update_tsc_freq();
@@ -1409,6 +1399,15 @@ out:
have_art = true;
clocksource_tsc.base = &art_base_clk;
}
+
+ /*
+ * Transfer the valid for high resolution flag if it was set on the
+ * early TSC already. That guarantees that there is no intermediate
+ * clocksource selected once the early TSC is unregistered.
+ */
+ if (clocksource_tsc_early.flags & CLOCK_SOURCE_VALID_FOR_HRES)
+ clocksource_tsc.flags |= CLOCK_SOURCE_VALID_FOR_HRES;
+
clocksource_register_khz(&clocksource_tsc, tsc_khz);
unreg:
clocksource_unregister(&clocksource_tsc_early);
@@ -1460,12 +1459,10 @@ static bool __init determine_cpu_tsc_frequencies(bool early)
if (early) {
cpu_khz = x86_platform.calibrate_cpu();
- if (tsc_early_khz) {
+ if (tsc_early_khz)
tsc_khz = tsc_early_khz;
- } else {
+ else
tsc_khz = x86_platform.calibrate_tsc();
- clocksource_tsc.freq_khz = tsc_khz;
- }
} else {
/* We should not be here with non-native cpu calibration */
WARN_ON(x86_platform.calibrate_cpu != native_calibrate_cpu);
@@ -1569,7 +1566,7 @@ void __init tsc_init(void)
return;
}
- if (tsc_clocksource_reliable || no_tsc_watchdog)
+ if (tsc_clocksource_reliable || tsc_watchdog == TSC_WATCHDOG_OFF)
tsc_disable_clocksource_watchdog();
clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index d432f3824f0c..3ce99cbcf187 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -354,6 +354,9 @@ bool fixup_umip_exception(struct pt_regs *regs)
void __user *uaddr;
struct insn insn;
+ if (!cpu_feature_enabled(X86_FEATURE_UMIP))
+ return false;
+
if (!regs)
return false;
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index f610fde2d5c4..6407bc9256bf 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -546,17 +546,23 @@ bool unwind_next_frame(struct unwind_state *state)
indirect = true;
break;
- case ORC_REG_R10:
- if (!get_reg(state, offsetof(struct pt_regs, r10), &sp)) {
- orc_warn_current("missing R10 value at %pB\n",
+ /*
+ * Any of the below registers may temporarily hold the stack pointer,
+ * typically during a DRAP stack realignment sequence or some other
+ * stack swizzle.
+ */
+
+ case ORC_REG_AX:
+ if (!get_reg(state, offsetof(struct pt_regs, ax), &sp)) {
+ orc_warn_current("missing AX value at %pB\n",
(void *)state->ip);
goto err;
}
break;
- case ORC_REG_R13:
- if (!get_reg(state, offsetof(struct pt_regs, r13), &sp)) {
- orc_warn_current("missing R13 value at %pB\n",
+ case ORC_REG_DX:
+ if (!get_reg(state, offsetof(struct pt_regs, dx), &sp)) {
+ orc_warn_current("missing DX value at %pB\n",
(void *)state->ip);
goto err;
}
@@ -570,9 +576,17 @@ bool unwind_next_frame(struct unwind_state *state)
}
break;
- case ORC_REG_DX:
- if (!get_reg(state, offsetof(struct pt_regs, dx), &sp)) {
- orc_warn_current("missing DX value at %pB\n",
+ case ORC_REG_R10:
+ if (!get_reg(state, offsetof(struct pt_regs, r10), &sp)) {
+ orc_warn_current("missing R10 value at %pB\n",
+ (void *)state->ip);
+ goto err;
+ }
+ break;
+
+ case ORC_REG_R13:
+ if (!get_reg(state, offsetof(struct pt_regs, r13), &sp)) {
+ orc_warn_current("missing R13 value at %pB\n",
(void *)state->ip);
goto err;
}
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 3a24a3fc55f5..4711a35e706c 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -427,6 +427,7 @@ SECTIONS
.llvm_bb_addr_map : { *(.llvm_bb_addr_map) }
#endif
+ MODINFO
ELF_DETAILS
DISCARDS
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index d2486506a808..e69156b54cff 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -776,7 +776,10 @@ do { \
#define SYNTHESIZED_F(name) \
({ \
kvm_cpu_cap_synthesized |= feature_bit(name); \
- F(name); \
+ \
+ BUILD_BUG_ON(X86_FEATURE_##name >= MAX_CPU_FEATURES); \
+ if (boot_cpu_has(X86_FEATURE_##name)) \
+ F(name); \
})
/*
@@ -1243,11 +1246,12 @@ void kvm_initialize_cpu_caps(void)
F(NULL_SEL_CLR_BASE),
/* UpperAddressIgnore */
F(AUTOIBRS),
- F(PREFETCHI),
EMULATED_F(NO_SMM_CTL_MSR),
/* PrefetchCtlMsr */
/* GpOnUserCpuid */
/* EPSF */
+ F(PREFETCHI),
+ F(AVX512_BMM),
F(ERAPS),
SYNTHESIZED_F(SBPB),
SYNTHESIZED_F(IBPB_BRTYPE),
@@ -2157,7 +2161,8 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
{
u32 eax, ebx, ecx, edx;
- if (cpuid_fault_enabled(vcpu) && !kvm_require_cpl(vcpu, 0))
+ if (!is_smm(vcpu) && cpuid_fault_enabled(vcpu) &&
+ !kvm_require_cpl(vcpu, 0))
return 1;
eax = kvm_rax_read(vcpu);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c8e292e9a24d..c8c6cc0406d6 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1297,12 +1297,25 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt,
int rc;
struct read_cache *mc = &ctxt->mem_read;
+ /*
+ * If the read gets a cache hit, simply copy the value from the cache.
+ * A "hit" here means that there is unused data in the cache, i.e. when
+ * re-emulating an instruction to complete a userspace exit, KVM relies
+ * on "no decode" to ensure the instruction is re-emulated in the same
+ * sequence, so that multiple reads are fulfilled in the correct order.
+ */
if (mc->pos < mc->end)
goto read_cached;
if (KVM_EMULATOR_BUG_ON((mc->end + size) >= sizeof(mc->data), ctxt))
return X86EMUL_UNHANDLEABLE;
+ /*
+ * Route all reads to the cache. This allows @dest to be an on-stack
+ * variable without triggering use-after-free if KVM needs to exit to
+ * userspace to handle an MMIO read (the MMIO fragment will point at
+ * the current location in the cache).
+ */
rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
&ctxt->exception);
if (rc != X86EMUL_CONTINUE)
@@ -3583,10 +3596,10 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt)
u64 msr = 0;
ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr);
- if (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
- ctxt->ops->cpl(ctxt)) {
+ if (!ctxt->ops->is_smm(ctxt) &&
+ (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT) &&
+ ctxt->ops->cpl(ctxt))
return emulate_gp(ctxt, 0);
- }
eax = reg_read(ctxt, VCPU_REGS_RAX);
ecx = reg_read(ctxt, VCPU_REGS_RCX);
@@ -3708,7 +3721,7 @@ static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt)
*/
static int em_fxsave(struct x86_emulate_ctxt *ctxt)
{
- struct fxregs_state fx_state;
+ struct fxregs_state fx_state = {};
int rc;
rc = check_fxsr(ctxt);
@@ -3738,7 +3751,7 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt)
static noinline int fxregs_fixup(struct fxregs_state *fx_state,
const size_t used_size)
{
- struct fxregs_state fx_tmp;
+ struct fxregs_state fx_tmp = {};
int rc;
rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
@@ -3874,8 +3887,7 @@ static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
{
u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
- /* Valid physical address? */
- if (rax & 0xffff000000000000ULL)
+ if (!ctxt->ops->page_address_valid(ctxt, rax))
return emulate_gp(ctxt, 0);
return check_svme(ctxt);
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 30202942289a..9b140bbdc1d8 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1981,16 +1981,17 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY)
goto out_flush_all;
- if (is_noncanonical_invlpg_address(entries[i], vcpu))
- continue;
-
/*
* Lower 12 bits of 'address' encode the number of additional
* pages to flush.
*/
gva = entries[i] & PAGE_MASK;
- for (j = 0; j < (entries[i] & ~PAGE_MASK) + 1; j++)
+ for (j = 0; j < (entries[i] & ~PAGE_MASK) + 1; j++) {
+ if (is_noncanonical_invlpg_address(gva + j * PAGE_SIZE, vcpu))
+ continue;
+
kvm_x86_call(flush_tlb_gva)(vcpu, gva + j * PAGE_SIZE);
+ }
++vcpu->stat.tlb_flush;
}
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 6ce160ffa678..6301f79fcbae 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -305,14 +305,6 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
{
return false;
}
-static inline bool kvm_hv_is_tlb_flush_hcall(struct kvm_vcpu *vcpu)
-{
- return false;
-}
-static inline bool guest_hv_cpuid_has_l2_tlb_flush(struct kvm_vcpu *vcpu)
-{
- return false;
-}
static inline int kvm_hv_verify_vp_assist(struct kvm_vcpu *vcpu)
{
return 0;
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index bb257793b6cb..eed96ff6e722 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -321,7 +321,8 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
idx = srcu_read_lock(&kvm->irq_srcu);
gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
if (gsi != -1)
- hlist_for_each_entry_rcu(kimn, &ioapic->mask_notifier_list, link)
+ hlist_for_each_entry_srcu(kimn, &ioapic->mask_notifier_list, link,
+ srcu_read_lock_held(&kvm->irq_srcu))
if (kimn->irq == gsi)
kimn->func(kimn, mask);
srcu_read_unlock(&kvm->irq_srcu, idx);
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index fb3dab4b5a53..0abff36d0994 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -245,6 +245,8 @@ struct x86_emulate_ops {
bool (*is_canonical_addr)(struct x86_emulate_ctxt *ctxt, gva_t addr,
unsigned int flags);
+
+ bool (*page_address_valid)(struct x86_emulate_ctxt *ctxt, gpa_t gpa);
};
/* Type, address-of, and value of an instruction's operand. */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9381c58d4c85..e3ec4d8607c1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -840,16 +840,16 @@ static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map,
{
int i, count = 0;
struct kvm_vcpu *vcpu;
+ size_t map_index;
if (min > map->max_apic_id)
return 0;
- min = array_index_nospec(min, map->max_apic_id + 1);
-
for_each_set_bit(i, ipi_bitmap,
- min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
- if (map->phys_map[min + i]) {
- vcpu = map->phys_map[min + i]->vcpu;
+ min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
+ map_index = array_index_nospec(min + i, map->max_apic_id + 1);
+ if (map->phys_map[map_index]) {
+ vcpu = map->phys_map[map_index]->vcpu;
count += kvm_apic_set_irq(vcpu, irq, NULL);
}
}
@@ -2657,6 +2657,9 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
{
struct kvm_lapic *apic = vcpu->arch.apic;
+ if (KVM_BUG_ON(!lapic_in_kernel(vcpu), vcpu->kvm))
+ return;
+
/*
* ICR is a single 64-bit register when x2APIC is enabled, all others
* registers hold 32-bit values. For legacy xAPIC, ICR writes need to
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index b922a8b00057..24fbc9ea502a 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2941,6 +2941,15 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
return -EPERM;
/*
+ * Only 4KiB mappings can become unsync, and KVM disallows hugepages
+ * when accounting 4KiB shadow pages. Upper-level gPTEs are always
+ * write-protected (see above), thus if the gfn can be mapped with a
+ * hugepage and isn't write-tracked, it can't have a shadow page.
+ */
+ if (!lpage_info_slot(gfn, slot, PG_LEVEL_2M)->disallow_lpage)
+ return 0;
+
+ /*
* The page is not write-tracked, mark existing shadow pages unsync
* unless KVM is synchronizing an unsync SP. In that case, KVM must
* complete emulation of the guest TLB flush before allowing shadow
@@ -3044,12 +3053,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
bool prefetch = !fault || fault->prefetch;
bool write_fault = fault && fault->write;
- if (unlikely(is_noslot_pfn(pfn))) {
- vcpu->stat.pf_mmio_spte_created++;
- mark_mmio_spte(vcpu, sptep, gfn, pte_access);
- return RET_PF_EMULATE;
- }
-
if (is_shadow_present_pte(*sptep)) {
if (prefetch && is_last_spte(*sptep, level) &&
pfn == spte_to_pfn(*sptep))
@@ -3066,13 +3069,22 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
child = spte_to_child_sp(pte);
drop_parent_pte(vcpu->kvm, child, sptep);
flush = true;
- } else if (WARN_ON_ONCE(pfn != spte_to_pfn(*sptep))) {
+ } else if (pfn != spte_to_pfn(*sptep)) {
+ WARN_ON_ONCE(vcpu->arch.mmu->root_role.direct);
drop_spte(vcpu->kvm, sptep);
flush = true;
} else
was_rmapped = 1;
}
+ if (unlikely(is_noslot_pfn(pfn))) {
+ vcpu->stat.pf_mmio_spte_created++;
+ mark_mmio_spte(vcpu, sptep, gfn, pte_access);
+ if (flush)
+ kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level);
+ return RET_PF_EMULATE;
+ }
+
wrprot = make_spte(vcpu, sp, slot, pte_access, gfn, pfn, *sptep, prefetch,
false, host_writable, &spte);
@@ -7487,9 +7499,14 @@ static void kvm_wake_nx_recovery_thread(struct kvm *kvm)
static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp)
{
+ int val = *(int *)kp->arg;
+
if (nx_hugepage_mitigation_hard_disabled)
return sysfs_emit(buffer, "never\n");
+ if (val == -1)
+ return sysfs_emit(buffer, "auto\n");
+
return param_get_bool(buffer, kp);
}
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 91ce29fd6f1b..8c0ffa2cded6 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -248,7 +248,7 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
static inline hpa_t kvm_mmu_get_dummy_root(void)
{
- return my_zero_pfn(0) << PAGE_SHIFT;
+ return zero_pfn(0) << PAGE_SHIFT;
}
static inline bool kvm_mmu_is_dummy_root(hpa_t shadow_page)
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 9c26038f6b77..7b1102d26f9c 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1507,7 +1507,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(void)
if (!sp)
return NULL;
- sp->spt = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
+ sp->spt = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
if (!sp->spt) {
kmem_cache_free(mmu_page_header_cache, sp);
return NULL;
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index bd6b785cf261..e218352e3423 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -1256,7 +1256,7 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
r = -EFAULT;
if (copy_from_user(filter->events, user_filter->events,
- sizeof(filter->events[0]) * filter->nevents))
+ flex_array_size(filter, events, filter->nevents)))
goto cleanup;
r = prepare_filter_lists(filter);
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index f92214b1a938..adf211860949 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -19,6 +19,7 @@
#include <linux/amd-iommu.h>
#include <linux/kvm_host.h>
#include <linux/kvm_irqfd.h>
+#include <linux/sysfs.h>
#include <asm/irq_remapping.h>
#include <asm/msr.h>
@@ -76,23 +77,33 @@ static int avic_param_set(const char *val, const struct kernel_param *kp)
return param_set_bint(val, kp);
}
+static int avic_param_get(char *buffer, const struct kernel_param *kp)
+{
+ int val = *(int *)kp->arg;
+
+ if (val == AVIC_AUTO_MODE)
+ return sysfs_emit(buffer, "N\n");
+
+ return param_get_bool(buffer, kp);
+}
+
static const struct kernel_param_ops avic_ops = {
.flags = KERNEL_PARAM_OPS_FL_NOARG,
.set = avic_param_set,
- .get = param_get_bool,
+ .get = avic_param_get,
};
/*
* Enable / disable AVIC. In "auto" mode (default behavior), AVIC is enabled
* for Zen4+ CPUs with x2AVIC (and all other criteria for enablement are met).
*/
-static int avic = AVIC_AUTO_MODE;
+static int __ro_after_init avic = AVIC_AUTO_MODE;
module_param_cb(avic, &avic_ops, &avic, 0444);
__MODULE_PARM_TYPE(avic, "bool");
module_param(enable_ipiv, bool, 0444);
-static bool force_avic;
+static bool __ro_after_init force_avic;
module_param_unsafe(force_avic, bool, 0444);
/* Note:
@@ -189,12 +200,12 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
struct kvm_vcpu *vcpu = &svm->vcpu;
vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK);
-
vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK;
vmcb->control.avic_physical_id |= avic_get_max_physical_id(vcpu);
-
vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
+ svm_clr_intercept(svm, INTERCEPT_CR8_WRITE);
+
/*
* Note: KVM supports hybrid-AVIC mode, where KVM emulates x2APIC MSR
* accesses, while interrupt injection to a running vCPU can be
@@ -226,6 +237,9 @@ static void avic_deactivate_vmcb(struct vcpu_svm *svm)
vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK);
vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK;
+ if (!is_sev_es_guest(&svm->vcpu))
+ svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
+
/*
* If running nested and the guest uses its own MSR bitmap, there
* is no need to update L0's msr bitmap
@@ -368,7 +382,7 @@ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb)
vmcb->control.avic_physical_id = __sme_set(__pa(kvm_svm->avic_physical_id_table));
vmcb->control.avic_vapic_bar = APIC_DEFAULT_PHYS_BASE;
- if (kvm_apicv_activated(svm->vcpu.kvm))
+ if (kvm_vcpu_apicv_active(&svm->vcpu))
avic_activate_vmcb(svm);
else
avic_deactivate_vmcb(svm);
diff --git a/arch/x86/kvm/svm/hyperv.h b/arch/x86/kvm/svm/hyperv.h
index d3f8bfc05832..f70d076911a6 100644
--- a/arch/x86/kvm/svm/hyperv.h
+++ b/arch/x86/kvm/svm/hyperv.h
@@ -41,10 +41,17 @@ static inline bool nested_svm_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu)
return hv_vcpu->vp_assist_page.nested_control.features.directhypercall;
}
+static inline bool nested_svm_is_l2_tlb_flush_hcall(struct kvm_vcpu *vcpu)
+{
+ return guest_hv_cpuid_has_l2_tlb_flush(vcpu) &&
+ nested_svm_l2_tlb_flush_enabled(vcpu) &&
+ kvm_hv_is_tlb_flush_hcall(vcpu);
+}
+
void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu);
#else /* CONFIG_KVM_HYPERV */
static inline void nested_svm_hv_update_vm_vp_ids(struct kvm_vcpu *vcpu) {}
-static inline bool nested_svm_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu)
+static inline bool nested_svm_is_l2_tlb_flush_hcall(struct kvm_vcpu *vcpu)
{
return false;
}
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 53ab6ce3cc26..961804df5f45 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -116,31 +116,28 @@ static bool nested_vmcb_needs_vls_intercept(struct vcpu_svm *svm)
if (!nested_npt_enabled(svm))
return true;
- if (!(svm->nested.ctl.virt_ext & VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK))
+ if (!(svm->nested.ctl.misc_ctl2 & SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE))
return true;
return false;
}
-void recalc_intercepts(struct vcpu_svm *svm)
+void nested_vmcb02_recalc_intercepts(struct vcpu_svm *svm)
{
- struct vmcb_control_area *c, *h;
- struct vmcb_ctrl_area_cached *g;
+ struct vmcb_ctrl_area_cached *vmcb12_ctrl = &svm->nested.ctl;
+ struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
+ struct vmcb *vmcb01 = svm->vmcb01.ptr;
unsigned int i;
- vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
-
- if (!is_guest_mode(&svm->vcpu))
+ if (WARN_ON_ONCE(svm->vmcb != vmcb02))
return;
- c = &svm->vmcb->control;
- h = &svm->vmcb01.ptr->control;
- g = &svm->nested.ctl;
+ vmcb_mark_dirty(vmcb02, VMCB_INTERCEPTS);
for (i = 0; i < MAX_INTERCEPT; i++)
- c->intercepts[i] = h->intercepts[i];
+ vmcb02->control.intercepts[i] = vmcb01->control.intercepts[i];
- if (g->int_ctl & V_INTR_MASKING_MASK) {
+ if (vmcb12_ctrl->int_ctl & V_INTR_MASKING_MASK) {
/*
* If L2 is active and V_INTR_MASKING is enabled in vmcb12,
* disable intercept of CR8 writes as L2's CR8 does not affect
@@ -151,24 +148,17 @@ void recalc_intercepts(struct vcpu_svm *svm)
* the effective RFLAGS.IF for L1 interrupts will never be set
* while L2 is running (L2's RFLAGS.IF doesn't affect L1 IRQs).
*/
- vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE);
- if (!(svm->vmcb01.ptr->save.rflags & X86_EFLAGS_IF))
- vmcb_clr_intercept(c, INTERCEPT_VINTR);
+ vmcb_clr_intercept(&vmcb02->control, INTERCEPT_CR8_WRITE);
+ if (!(vmcb01->save.rflags & X86_EFLAGS_IF))
+ vmcb_clr_intercept(&vmcb02->control, INTERCEPT_VINTR);
}
- /*
- * We want to see VMMCALLs from a nested guest only when Hyper-V L2 TLB
- * flush feature is enabled.
- */
- if (!nested_svm_l2_tlb_flush_enabled(&svm->vcpu))
- vmcb_clr_intercept(c, INTERCEPT_VMMCALL);
-
for (i = 0; i < MAX_INTERCEPT; i++)
- c->intercepts[i] |= g->intercepts[i];
+ vmcb02->control.intercepts[i] |= vmcb12_ctrl->intercepts[i];
/* If SMI is not intercepted, ignore guest SMI intercept as well */
if (!intercept_smi)
- vmcb_clr_intercept(c, INTERCEPT_SMI);
+ vmcb_clr_intercept(&vmcb02->control, INTERCEPT_SMI);
if (nested_vmcb_needs_vls_intercept(svm)) {
/*
@@ -176,10 +166,10 @@ void recalc_intercepts(struct vcpu_svm *svm)
* we must intercept these instructions to correctly
* emulate them in case L1 doesn't intercept them.
*/
- vmcb_set_intercept(c, INTERCEPT_VMLOAD);
- vmcb_set_intercept(c, INTERCEPT_VMSAVE);
+ vmcb_set_intercept(&vmcb02->control, INTERCEPT_VMLOAD);
+ vmcb_set_intercept(&vmcb02->control, INTERCEPT_VMSAVE);
} else {
- WARN_ON(!(c->virt_ext & VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK));
+ WARN_ON_ONCE(!(vmcb02->control.misc_ctl2 & SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE));
}
}
@@ -339,8 +329,56 @@ static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)
kvm_vcpu_is_legal_gpa(vcpu, addr + size - 1);
}
-static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
- struct vmcb_ctrl_area_cached *control)
+static bool nested_svm_event_inj_valid_exept(struct kvm_vcpu *vcpu, u8 vector)
+{
+ /*
+ * Vectors that do not correspond to a defined exception are invalid
+ * (including #NMI and reserved vectors). In a best effort to define
+ * valid exceptions based on the virtual CPU, make all exceptions always
+ * valid except those obviously tied to a CPU feature.
+ */
+ switch (vector) {
+ case DE_VECTOR: case DB_VECTOR: case BP_VECTOR: case OF_VECTOR:
+ case BR_VECTOR: case UD_VECTOR: case NM_VECTOR: case DF_VECTOR:
+ case TS_VECTOR: case NP_VECTOR: case SS_VECTOR: case GP_VECTOR:
+ case PF_VECTOR: case MF_VECTOR: case AC_VECTOR: case MC_VECTOR:
+ case XM_VECTOR: case HV_VECTOR: case SX_VECTOR:
+ return true;
+ case CP_VECTOR:
+ return guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK);
+ case VC_VECTOR:
+ return guest_cpu_cap_has(vcpu, X86_FEATURE_SEV_ES);
+ }
+ return false;
+}
+
+/*
+ * According to the APM, VMRUN exits with SVM_EXIT_ERR if SVM_EVTINJ_VALID is
+ * set and:
+ * - The type of event_inj is not one of the defined values.
+ * - The type is SVM_EVTINJ_TYPE_EXEPT, but the vector is not a valid exception.
+ */
+static bool nested_svm_check_event_inj(struct kvm_vcpu *vcpu, u32 event_inj)
+{
+ u32 type = event_inj & SVM_EVTINJ_TYPE_MASK;
+ u8 vector = event_inj & SVM_EVTINJ_VEC_MASK;
+
+ if (!(event_inj & SVM_EVTINJ_VALID))
+ return true;
+
+ if (type != SVM_EVTINJ_TYPE_INTR && type != SVM_EVTINJ_TYPE_NMI &&
+ type != SVM_EVTINJ_TYPE_EXEPT && type != SVM_EVTINJ_TYPE_SOFT)
+ return false;
+
+ if (type == SVM_EVTINJ_TYPE_EXEPT &&
+ !nested_svm_event_inj_valid_exept(vcpu, vector))
+ return false;
+
+ return true;
+}
+
+static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
+ struct vmcb_ctrl_area_cached *control)
{
if (CC(!vmcb12_is_intercept(control, INTERCEPT_VMRUN)))
return false;
@@ -348,7 +386,8 @@ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
if (CC(control->asid == 0))
return false;
- if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && !npt_enabled))
+ if (CC((control->misc_ctl & SVM_MISC_ENABLE_NP) &&
+ !kvm_vcpu_is_legal_gpa(vcpu, control->nested_cr3)))
return false;
if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa,
@@ -363,12 +402,15 @@ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
return false;
}
+ if (CC(!nested_svm_check_event_inj(vcpu, control->event_inj)))
+ return false;
+
return true;
}
/* Common checks that apply to both L1 and L2 state. */
-static bool __nested_vmcb_check_save(struct kvm_vcpu *vcpu,
- struct vmcb_save_area_cached *save)
+static bool nested_vmcb_check_save(struct kvm_vcpu *vcpu,
+ struct vmcb_save_area_cached *save)
{
if (CC(!(save->efer & EFER_SVME)))
return false;
@@ -390,6 +432,10 @@ static bool __nested_vmcb_check_save(struct kvm_vcpu *vcpu,
CC(!(save->cr0 & X86_CR0_PE)) ||
CC(!kvm_vcpu_is_legal_cr3(vcpu, save->cr3)))
return false;
+
+ if (CC((save->cs.attrib & SVM_SELECTOR_L_MASK) &&
+ (save->cs.attrib & SVM_SELECTOR_DB_MASK)))
+ return false;
}
/* Note, SVM doesn't have any additional restrictions on CR4. */
@@ -402,20 +448,15 @@ static bool __nested_vmcb_check_save(struct kvm_vcpu *vcpu,
return true;
}
-static bool nested_vmcb_check_save(struct kvm_vcpu *vcpu)
+int nested_svm_check_cached_vmcb12(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct vmcb_save_area_cached *save = &svm->nested.save;
-
- return __nested_vmcb_check_save(vcpu, save);
-}
-static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
- struct vmcb_ctrl_area_cached *ctl = &svm->nested.ctl;
+ if (!nested_vmcb_check_save(vcpu, &svm->nested.save) ||
+ !nested_vmcb_check_controls(vcpu, &svm->nested.ctl))
+ return -EINVAL;
- return __nested_vmcb_check_controls(vcpu, ctl);
+ return 0;
}
/*
@@ -447,37 +488,39 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
nested_svm_sanitize_intercept(vcpu, to, SKINIT);
nested_svm_sanitize_intercept(vcpu, to, RDPRU);
- to->iopm_base_pa = from->iopm_base_pa;
- to->msrpm_base_pa = from->msrpm_base_pa;
+ /* Always clear SVM_MISC_ENABLE_NP if the guest cannot use NPTs */
+ to->misc_ctl = from->misc_ctl;
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_NPT))
+ to->misc_ctl &= ~SVM_MISC_ENABLE_NP;
+
+ to->iopm_base_pa = from->iopm_base_pa & PAGE_MASK;
+ to->msrpm_base_pa = from->msrpm_base_pa & PAGE_MASK;
to->tsc_offset = from->tsc_offset;
- to->tlb_ctl = from->tlb_ctl;
+ to->tlb_ctl = from->tlb_ctl & TLB_CONTROL_MASK;
to->erap_ctl = from->erap_ctl;
to->int_ctl = from->int_ctl;
- to->int_vector = from->int_vector;
- to->int_state = from->int_state;
+ to->int_vector = from->int_vector & SVM_INT_VECTOR_MASK;
+ to->int_state = from->int_state & SVM_INTERRUPT_SHADOW_MASK;
to->exit_code = from->exit_code;
to->exit_info_1 = from->exit_info_1;
to->exit_info_2 = from->exit_info_2;
to->exit_int_info = from->exit_int_info;
to->exit_int_info_err = from->exit_int_info_err;
- to->nested_ctl = from->nested_ctl;
- to->event_inj = from->event_inj;
+ to->event_inj = from->event_inj & ~SVM_EVTINJ_RESERVED_BITS;
to->event_inj_err = from->event_inj_err;
to->next_rip = from->next_rip;
to->nested_cr3 = from->nested_cr3;
- to->virt_ext = from->virt_ext;
+ to->misc_ctl2 = from->misc_ctl2;
to->pause_filter_count = from->pause_filter_count;
to->pause_filter_thresh = from->pause_filter_thresh;
- /* Copy asid here because nested_vmcb_check_controls will check it. */
+ /* Copy asid here because nested_vmcb_check_controls() will check it */
to->asid = from->asid;
- to->msrpm_base_pa &= ~0x0fffULL;
- to->iopm_base_pa &= ~0x0fffULL;
+ to->clean = from->clean;
#ifdef CONFIG_KVM_HYPERV
/* Hyper-V extensions (Enlightened VMCB) */
if (kvm_hv_hypercall_enabled(vcpu)) {
- to->clean = from->clean;
memcpy(&to->hv_enlightenments, &from->hv_enlightenments,
sizeof(to->hv_enlightenments));
}
@@ -493,17 +536,34 @@ void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
static void __nested_copy_vmcb_save_to_cache(struct vmcb_save_area_cached *to,
struct vmcb_save_area *from)
{
- /*
- * Copy only fields that are validated, as we need them
- * to avoid TOC/TOU races.
- */
+ to->es = from->es;
+ to->cs = from->cs;
+ to->ss = from->ss;
+ to->ds = from->ds;
+ to->gdtr = from->gdtr;
+ to->idtr = from->idtr;
+
+ to->cpl = from->cpl;
+
to->efer = from->efer;
- to->cr0 = from->cr0;
- to->cr3 = from->cr3;
to->cr4 = from->cr4;
-
- to->dr6 = from->dr6;
+ to->cr3 = from->cr3;
+ to->cr0 = from->cr0;
to->dr7 = from->dr7;
+ to->dr6 = from->dr6;
+
+ to->rflags = from->rflags;
+ to->rip = from->rip;
+ to->rsp = from->rsp;
+
+ to->s_cet = from->s_cet;
+ to->ssp = from->ssp;
+ to->isst_addr = from->isst_addr;
+
+ to->rax = from->rax;
+ to->cr2 = from->cr2;
+
+ svm_copy_lbrs(to, from);
}
void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
@@ -521,6 +581,7 @@ void nested_sync_control_from_vmcb02(struct vcpu_svm *svm)
u32 mask;
svm->nested.ctl.event_inj = svm->vmcb->control.event_inj;
svm->nested.ctl.event_inj_err = svm->vmcb->control.event_inj_err;
+ svm->nested.ctl.int_state = svm->vmcb->control.int_state;
/* Only a few fields of int_ctl are written by the processor. */
mask = V_IRQ_MASK | V_TPR_MASK;
@@ -533,7 +594,7 @@ void nested_sync_control_from_vmcb02(struct vcpu_svm *svm)
* int_ctl (because it was never recognized while L2 was running).
*/
if (svm_is_intercept(svm, INTERCEPT_VINTR) &&
- !test_bit(INTERCEPT_VINTR, (unsigned long *)svm->nested.ctl.intercepts))
+ !vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_VINTR))
mask &= ~V_IRQ_MASK;
if (nested_vgif_enabled(svm))
@@ -639,8 +700,16 @@ void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm)
svm->nested.vmcb02.ptr->save.g_pat = svm->vmcb01.ptr->save.g_pat;
}
-static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
+static bool nested_vmcb12_has_lbrv(struct kvm_vcpu *vcpu)
{
+ return guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
+ (to_svm(vcpu)->nested.ctl.misc_ctl2 & SVM_MISC2_ENABLE_V_LBR);
+}
+
+static void nested_vmcb02_prepare_save(struct vcpu_svm *svm)
+{
+ struct vmcb_ctrl_area_cached *control = &svm->nested.ctl;
+ struct vmcb_save_area_cached *save = &svm->nested.save;
bool new_vmcb12 = false;
struct vmcb *vmcb01 = svm->vmcb01.ptr;
struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
@@ -656,64 +725,64 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
svm->nested.force_msr_bitmap_recalc = true;
}
- if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_SEG))) {
- vmcb02->save.es = vmcb12->save.es;
- vmcb02->save.cs = vmcb12->save.cs;
- vmcb02->save.ss = vmcb12->save.ss;
- vmcb02->save.ds = vmcb12->save.ds;
- vmcb02->save.cpl = vmcb12->save.cpl;
+ if (unlikely(new_vmcb12 || vmcb12_is_dirty(control, VMCB_SEG))) {
+ vmcb02->save.es = save->es;
+ vmcb02->save.cs = save->cs;
+ vmcb02->save.ss = save->ss;
+ vmcb02->save.ds = save->ds;
+ vmcb02->save.cpl = save->cpl;
vmcb_mark_dirty(vmcb02, VMCB_SEG);
}
- if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DT))) {
- vmcb02->save.gdtr = vmcb12->save.gdtr;
- vmcb02->save.idtr = vmcb12->save.idtr;
+ if (unlikely(new_vmcb12 || vmcb12_is_dirty(control, VMCB_DT))) {
+ vmcb02->save.gdtr = save->gdtr;
+ vmcb02->save.idtr = save->idtr;
vmcb_mark_dirty(vmcb02, VMCB_DT);
}
if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) &&
- (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_CET)))) {
- vmcb02->save.s_cet = vmcb12->save.s_cet;
- vmcb02->save.isst_addr = vmcb12->save.isst_addr;
- vmcb02->save.ssp = vmcb12->save.ssp;
+ (unlikely(new_vmcb12 || vmcb12_is_dirty(control, VMCB_CET)))) {
+ vmcb02->save.s_cet = save->s_cet;
+ vmcb02->save.isst_addr = save->isst_addr;
+ vmcb02->save.ssp = save->ssp;
vmcb_mark_dirty(vmcb02, VMCB_CET);
}
- kvm_set_rflags(vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
+ kvm_set_rflags(vcpu, save->rflags | X86_EFLAGS_FIXED);
svm_set_efer(vcpu, svm->nested.save.efer);
svm_set_cr0(vcpu, svm->nested.save.cr0);
svm_set_cr4(vcpu, svm->nested.save.cr4);
- svm->vcpu.arch.cr2 = vmcb12->save.cr2;
+ svm->vcpu.arch.cr2 = save->cr2;
- kvm_rax_write(vcpu, vmcb12->save.rax);
- kvm_rsp_write(vcpu, vmcb12->save.rsp);
- kvm_rip_write(vcpu, vmcb12->save.rip);
+ kvm_rax_write(vcpu, save->rax);
+ kvm_rsp_write(vcpu, save->rsp);
+ kvm_rip_write(vcpu, save->rip);
/* In case we don't even reach vcpu_run, the fields are not updated */
- vmcb02->save.rax = vmcb12->save.rax;
- vmcb02->save.rsp = vmcb12->save.rsp;
- vmcb02->save.rip = vmcb12->save.rip;
+ vmcb02->save.rax = save->rax;
+ vmcb02->save.rsp = save->rsp;
+ vmcb02->save.rip = save->rip;
- if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) {
+ if (unlikely(new_vmcb12 || vmcb12_is_dirty(control, VMCB_DR))) {
vmcb02->save.dr7 = svm->nested.save.dr7 | DR7_FIXED_1;
svm->vcpu.arch.dr6 = svm->nested.save.dr6 | DR6_ACTIVE_LOW;
vmcb_mark_dirty(vmcb02, VMCB_DR);
}
- if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
- (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
+ if (nested_vmcb12_has_lbrv(vcpu)) {
/*
* Reserved bits of DEBUGCTL are ignored. Be consistent with
* svm_set_msr's definition of reserved bits.
*/
- svm_copy_lbrs(vmcb02, vmcb12);
+ svm_copy_lbrs(&vmcb02->save, save);
vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS;
} else {
- svm_copy_lbrs(vmcb02, vmcb01);
+ svm_copy_lbrs(&vmcb02->save, &vmcb01->save);
}
+ vmcb_mark_dirty(vmcb02, VMCB_LBR);
svm_update_lbrv(&svm->vcpu);
}
@@ -741,18 +810,16 @@ static bool is_evtinj_nmi(u32 evtinj)
return type == SVM_EVTINJ_TYPE_NMI;
}
-static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
- unsigned long vmcb12_rip,
- unsigned long vmcb12_csbase)
+static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
{
u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK;
u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
- struct kvm_vcpu *vcpu = &svm->vcpu;
- struct vmcb *vmcb01 = svm->vmcb01.ptr;
+ struct vmcb_ctrl_area_cached *vmcb12_ctrl = &svm->nested.ctl;
struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
- u32 pause_count12;
- u32 pause_thresh12;
+ struct vmcb *vmcb01 = svm->vmcb01.ptr;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ u32 pause_count12, pause_thresh12;
nested_svm_transition_tlb_flush(vcpu);
@@ -765,7 +832,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
*/
if (guest_cpu_cap_has(vcpu, X86_FEATURE_VGIF) &&
- (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK))
+ (vmcb12_ctrl->int_ctl & V_GIF_ENABLE_MASK))
int_ctl_vmcb12_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
else
int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
@@ -781,8 +848,16 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
V_NMI_BLOCKING_MASK);
}
- /* Copied from vmcb01. msrpm_base can be overwritten later. */
- vmcb02->control.nested_ctl = vmcb01->control.nested_ctl;
+ /*
+ * Copied from vmcb01. msrpm_base can be overwritten later.
+ *
+ * SVM_MISC_ENABLE_NP in vmcb12 is only used for consistency checks. If
+ * L1 enables NPTs, KVM shadows L1's NPTs and uses those to run L2. If
+ * L1 disables NPT, KVM runs L2 with the same NPTs used to run L1. For
+ * the latter, L1 runs L2 with shadow page tables that translate L2 GVAs
+ * to L1 GPAs, so the same NPTs can be used for L1 and L2.
+ */
+ vmcb02->control.misc_ctl = vmcb01->control.misc_ctl & SVM_MISC_ENABLE_NP;
vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa;
vmcb02->control.msrpm_base_pa = vmcb01->control.msrpm_base_pa;
vmcb_mark_dirty(vmcb02, VMCB_PERM_MAP);
@@ -809,7 +884,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
* L1 re-enters L2, the same instruction will trigger a VM-Exit and the
* entire cycle start over.
*/
- if (vmcb02->save.rip && (svm->nested.ctl.bus_lock_rip == vmcb02->save.rip))
+ if (vmcb02->save.rip && (svm->nested.last_bus_lock_rip == vmcb02->save.rip))
vmcb02->control.bus_lock_counter = 1;
else
vmcb02->control.bus_lock_counter = 0;
@@ -823,10 +898,9 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
if (nested_npt_enabled(svm))
nested_svm_init_mmu_context(vcpu);
- vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(
- vcpu->arch.l1_tsc_offset,
- svm->nested.ctl.tsc_offset,
- svm->tsc_ratio_msr);
+ vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(vcpu->arch.l1_tsc_offset,
+ vmcb12_ctrl->tsc_offset,
+ svm->tsc_ratio_msr);
vmcb02->control.tsc_offset = vcpu->arch.tsc_offset;
@@ -835,49 +909,49 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
nested_svm_update_tsc_ratio_msr(vcpu);
vmcb02->control.int_ctl =
- (svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
+ (vmcb12_ctrl->int_ctl & int_ctl_vmcb12_bits) |
(vmcb01->control.int_ctl & int_ctl_vmcb01_bits);
- vmcb02->control.int_vector = svm->nested.ctl.int_vector;
- vmcb02->control.int_state = svm->nested.ctl.int_state;
- vmcb02->control.event_inj = svm->nested.ctl.event_inj;
- vmcb02->control.event_inj_err = svm->nested.ctl.event_inj_err;
+ vmcb02->control.int_vector = vmcb12_ctrl->int_vector;
+ vmcb02->control.int_state = vmcb12_ctrl->int_state;
+ vmcb02->control.event_inj = vmcb12_ctrl->event_inj;
+ vmcb02->control.event_inj_err = vmcb12_ctrl->event_inj_err;
/*
- * next_rip is consumed on VMRUN as the return address pushed on the
- * stack for injected soft exceptions/interrupts. If nrips is exposed
- * to L1, take it verbatim from vmcb12. If nrips is supported in
- * hardware but not exposed to L1, stuff the actual L2 RIP to emulate
- * what a nrips=0 CPU would do (L1 is responsible for advancing RIP
- * prior to injecting the event).
+ * If nrips is exposed to L1, take NextRIP as-is. Otherwise, L1
+ * advances L2's RIP before VMRUN instead of using NextRIP. KVM will
+ * stuff the current RIP as vmcb02's NextRIP before L2 is run. After
+ * the first run of L2 (e.g. after save+restore), NextRIP is updated by
+ * the CPU and/or KVM and should be used regardless of L1's support.
*/
- if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
- vmcb02->control.next_rip = svm->nested.ctl.next_rip;
- else if (boot_cpu_has(X86_FEATURE_NRIPS))
- vmcb02->control.next_rip = vmcb12_rip;
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) ||
+ !vcpu->arch.nested_run_pending)
+ vmcb02->control.next_rip = vmcb12_ctrl->next_rip;
svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj);
+
+ /*
+ * soft_int_csbase, soft_int_old_rip, and soft_int_next_rip (if L1
+ * doesn't have NRIPS) are initialized later, before the vCPU is run.
+ */
if (is_evtinj_soft(vmcb02->control.event_inj)) {
svm->soft_int_injected = true;
- svm->soft_int_csbase = vmcb12_csbase;
- svm->soft_int_old_rip = vmcb12_rip;
- if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
- svm->soft_int_next_rip = svm->nested.ctl.next_rip;
- else
- svm->soft_int_next_rip = vmcb12_rip;
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) ||
+ !vcpu->arch.nested_run_pending)
+ svm->soft_int_next_rip = vmcb12_ctrl->next_rip;
}
- /* LBR_CTL_ENABLE_MASK is controlled by svm_update_lbrv() */
+ /* SVM_MISC2_ENABLE_V_LBR is controlled by svm_update_lbrv() */
if (!nested_vmcb_needs_vls_intercept(svm))
- vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
+ vmcb02->control.misc_ctl2 |= SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE;
if (guest_cpu_cap_has(vcpu, X86_FEATURE_PAUSEFILTER))
- pause_count12 = svm->nested.ctl.pause_filter_count;
+ pause_count12 = vmcb12_ctrl->pause_filter_count;
else
pause_count12 = 0;
if (guest_cpu_cap_has(vcpu, X86_FEATURE_PFTHRESHOLD))
- pause_thresh12 = svm->nested.ctl.pause_filter_thresh;
+ pause_thresh12 = vmcb12_ctrl->pause_filter_thresh;
else
pause_thresh12 = 0;
if (kvm_pause_in_guest(svm->vcpu.kvm)) {
@@ -891,7 +965,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh;
/* ... but ensure filtering is disabled if so requested. */
- if (vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) {
+ if (vmcb12_is_intercept(vmcb12_ctrl, INTERCEPT_PAUSE)) {
if (!pause_count12)
vmcb02->control.pause_filter_count = 0;
if (!pause_thresh12)
@@ -908,7 +982,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
* L2 is the "guest").
*/
if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS))
- vmcb02->control.erap_ctl = (svm->nested.ctl.erap_ctl &
+ vmcb02->control.erap_ctl = (vmcb12_ctrl->erap_ctl &
ERAP_CONTROL_ALLOW_LARGER_RAP) |
ERAP_CONTROL_CLEAR_RAP;
@@ -916,7 +990,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
* Merge guest and host intercepts - must be called with vcpu in
* guest-mode to take effect.
*/
- recalc_intercepts(svm);
+ nested_vmcb02_recalc_intercepts(svm);
}
static void nested_svm_copy_common_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
@@ -931,28 +1005,29 @@ static void nested_svm_copy_common_state(struct vmcb *from_vmcb, struct vmcb *to
to_vmcb->save.spec_ctrl = from_vmcb->save.spec_ctrl;
}
-int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
- struct vmcb *vmcb12, bool from_vmrun)
+int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, bool from_vmrun)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb_ctrl_area_cached *control = &svm->nested.ctl;
+ struct vmcb_save_area_cached *save = &svm->nested.save;
int ret;
trace_kvm_nested_vmenter(svm->vmcb->save.rip,
vmcb12_gpa,
- vmcb12->save.rip,
- vmcb12->control.int_ctl,
- vmcb12->control.event_inj,
- vmcb12->control.nested_ctl,
- vmcb12->control.nested_cr3,
- vmcb12->save.cr3,
+ save->rip,
+ control->int_ctl,
+ control->event_inj,
+ control->misc_ctl,
+ control->nested_cr3,
+ save->cr3,
KVM_ISA_SVM);
- trace_kvm_nested_intercepts(vmcb12->control.intercepts[INTERCEPT_CR] & 0xffff,
- vmcb12->control.intercepts[INTERCEPT_CR] >> 16,
- vmcb12->control.intercepts[INTERCEPT_EXCEPTION],
- vmcb12->control.intercepts[INTERCEPT_WORD3],
- vmcb12->control.intercepts[INTERCEPT_WORD4],
- vmcb12->control.intercepts[INTERCEPT_WORD5]);
+ trace_kvm_nested_intercepts(control->intercepts[INTERCEPT_CR] & 0xffff,
+ control->intercepts[INTERCEPT_CR] >> 16,
+ control->intercepts[INTERCEPT_EXCEPTION],
+ control->intercepts[INTERCEPT_WORD3],
+ control->intercepts[INTERCEPT_WORD4],
+ control->intercepts[INTERCEPT_WORD5]);
svm->nested.vmcb12_gpa = vmcb12_gpa;
@@ -962,8 +1037,8 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr);
svm_switch_vmcb(svm, &svm->nested.vmcb02);
- nested_vmcb02_prepare_control(svm, vmcb12->save.rip, vmcb12->save.cs.base);
- nested_vmcb02_prepare_save(svm, vmcb12);
+ nested_vmcb02_prepare_control(svm);
+ nested_vmcb02_prepare_save(svm);
ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3,
nested_npt_enabled(svm), from_vmrun);
@@ -983,12 +1058,38 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
return 0;
}
+static int nested_svm_copy_vmcb12_to_cache(struct kvm_vcpu *vcpu, u64 vmcb12_gpa)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct kvm_host_map map;
+ struct vmcb *vmcb12;
+ int r = 0;
+
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map))
+ return -EFAULT;
+
+ vmcb12 = map.hva;
+ nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
+ nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
+
+ if (nested_svm_check_cached_vmcb12(vcpu) < 0) {
+ vmcb12->control.exit_code = SVM_EXIT_ERR;
+ vmcb12->control.exit_info_1 = 0;
+ vmcb12->control.exit_info_2 = 0;
+ vmcb12->control.event_inj = 0;
+ vmcb12->control.event_inj_err = 0;
+ svm_set_gif(svm, false);
+ r = -EINVAL;
+ }
+
+ kvm_vcpu_unmap(vcpu, &map);
+ return r;
+}
+
int nested_svm_vmrun(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
int ret;
- struct vmcb *vmcb12;
- struct kvm_host_map map;
u64 vmcb12_gpa;
struct vmcb *vmcb01 = svm->vmcb01.ptr;
@@ -1009,33 +1110,27 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
return ret;
}
- vmcb12_gpa = svm->vmcb->save.rax;
- ret = kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map);
- if (ret == -EINVAL) {
+ if (WARN_ON_ONCE(!svm->nested.initialized))
+ return -EINVAL;
+
+ vmcb12_gpa = kvm_register_read(vcpu, VCPU_REGS_RAX);
+ if (!page_address_valid(vcpu, vmcb12_gpa)) {
kvm_inject_gp(vcpu, 0);
return 1;
- } else if (ret) {
- return kvm_skip_emulated_instruction(vcpu);
}
- ret = kvm_skip_emulated_instruction(vcpu);
-
- vmcb12 = map.hva;
-
- if (WARN_ON_ONCE(!svm->nested.initialized))
- return -EINVAL;
-
- nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
- nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
+ ret = nested_svm_copy_vmcb12_to_cache(vcpu, vmcb12_gpa);
+ if (ret) {
+ if (ret == -EFAULT)
+ return kvm_handle_memory_failure(vcpu, X86EMUL_IO_NEEDED, NULL);
- if (!nested_vmcb_check_save(vcpu) ||
- !nested_vmcb_check_controls(vcpu)) {
- vmcb12->control.exit_code = SVM_EXIT_ERR;
- vmcb12->control.exit_info_1 = 0;
- vmcb12->control.exit_info_2 = 0;
- goto out;
+ /* Advance RIP past VMRUN as part of the nested #VMEXIT. */
+ return kvm_skip_emulated_instruction(vcpu);
}
+ /* At this point, VMRUN is guaranteed to not fault; advance RIP. */
+ ret = kvm_skip_emulated_instruction(vcpu);
+
/*
* Since vmcb01 is not in use, we can use it to store some of the L1
* state.
@@ -1049,27 +1144,20 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
if (!npt_enabled)
vmcb01->save.cr3 = kvm_read_cr3(vcpu);
- svm->nested.nested_run_pending = 1;
-
- if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, true))
- goto out_exit_err;
+ vcpu->arch.nested_run_pending = KVM_NESTED_RUN_PENDING;
- if (nested_svm_merge_msrpm(vcpu))
- goto out;
-
-out_exit_err:
- svm->nested.nested_run_pending = 0;
- svm->nmi_l1_to_l2 = false;
- svm->soft_int_injected = false;
-
- svm->vmcb->control.exit_code = SVM_EXIT_ERR;
- svm->vmcb->control.exit_info_1 = 0;
- svm->vmcb->control.exit_info_2 = 0;
+ if (enter_svm_guest_mode(vcpu, vmcb12_gpa, true) ||
+ !nested_svm_merge_msrpm(vcpu)) {
+ vcpu->arch.nested_run_pending = 0;
+ svm->nmi_l1_to_l2 = false;
+ svm->soft_int_injected = false;
- nested_svm_vmexit(svm);
+ svm->vmcb->control.exit_code = SVM_EXIT_ERR;
+ svm->vmcb->control.exit_info_1 = 0;
+ svm->vmcb->control.exit_info_2 = 0;
-out:
- kvm_vcpu_unmap(vcpu, &map);
+ nested_svm_vmexit(svm);
+ }
return ret;
}
@@ -1099,6 +1187,11 @@ void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
to_save->isst_addr = from_save->isst_addr;
to_save->ssp = from_save->ssp;
}
+
+ if (kvm_cpu_cap_has(X86_FEATURE_LBRV)) {
+ svm_copy_lbrs(to_save, from_save);
+ to_save->dbgctl &= ~DEBUGCTL_RESERVED_BITS;
+ }
}
void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
@@ -1117,36 +1210,20 @@ void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
}
-int nested_svm_vmexit(struct vcpu_svm *svm)
+static int nested_svm_vmexit_update_vmcb12(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu = &svm->vcpu;
- struct vmcb *vmcb01 = svm->vmcb01.ptr;
+ struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
- struct vmcb *vmcb12;
struct kvm_host_map map;
+ struct vmcb *vmcb12;
int rc;
rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map);
- if (rc) {
- if (rc == -EINVAL)
- kvm_inject_gp(vcpu, 0);
- return 1;
- }
+ if (rc)
+ return rc;
vmcb12 = map.hva;
- /* Exit Guest-Mode */
- leave_guest_mode(vcpu);
- svm->nested.vmcb12_gpa = 0;
- WARN_ON_ONCE(svm->nested.nested_run_pending);
-
- kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
-
- /* in case we halted in L2 */
- kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
-
- /* Give the current vmcb to the guest */
-
vmcb12->save.es = vmcb02->save.es;
vmcb12->save.cs = vmcb02->save.cs;
vmcb12->save.ss = vmcb02->save.ss;
@@ -1156,7 +1233,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb12->save.efer = svm->vcpu.arch.efer;
vmcb12->save.cr0 = kvm_read_cr0(vcpu);
vmcb12->save.cr3 = kvm_read_cr3(vcpu);
- vmcb12->save.cr2 = vmcb02->save.cr2;
+ vmcb12->save.cr2 = vcpu->arch.cr2;
vmcb12->save.cr4 = svm->vcpu.arch.cr4;
vmcb12->save.rflags = kvm_get_rflags(vcpu);
vmcb12->save.rip = kvm_rip_read(vcpu);
@@ -1183,9 +1260,43 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
vmcb12->control.next_rip = vmcb02->control.next_rip;
+ if (nested_vmcb12_has_lbrv(vcpu))
+ svm_copy_lbrs(&vmcb12->save, &vmcb02->save);
+
+ vmcb12->control.event_inj = 0;
+ vmcb12->control.event_inj_err = 0;
vmcb12->control.int_ctl = svm->nested.ctl.int_ctl;
- vmcb12->control.event_inj = svm->nested.ctl.event_inj;
- vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err;
+
+ trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code,
+ vmcb12->control.exit_info_1,
+ vmcb12->control.exit_info_2,
+ vmcb12->control.exit_int_info,
+ vmcb12->control.exit_int_info_err,
+ KVM_ISA_SVM);
+
+ kvm_vcpu_unmap(vcpu, &map);
+ return 0;
+}
+
+void nested_svm_vmexit(struct vcpu_svm *svm)
+{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct vmcb *vmcb01 = svm->vmcb01.ptr;
+ struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
+
+ if (nested_svm_vmexit_update_vmcb12(vcpu))
+ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+
+ /* Exit Guest-Mode */
+ leave_guest_mode(vcpu);
+ svm->nested.vmcb12_gpa = 0;
+
+ kvm_warn_on_nested_run_pending(vcpu);
+
+ kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+
+ /* in case we halted in L2 */
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
if (!kvm_pause_in_guest(vcpu->kvm)) {
vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
@@ -1194,11 +1305,11 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
}
/*
- * Invalidate bus_lock_rip unless KVM is still waiting for the guest
- * to make forward progress before re-enabling bus lock detection.
+ * Invalidate last_bus_lock_rip unless KVM is still waiting for the
+ * guest to make forward progress before re-enabling bus lock detection.
*/
if (!vmcb02->control.bus_lock_counter)
- svm->nested.ctl.bus_lock_rip = INVALID_GPA;
+ svm->nested.last_bus_lock_rip = INVALID_GPA;
nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
@@ -1231,11 +1342,10 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
if (!nested_exit_on_intr(svm))
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
- if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
- (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK)))
- svm_copy_lbrs(vmcb12, vmcb02);
- else
- svm_copy_lbrs(vmcb01, vmcb02);
+ if (!nested_vmcb12_has_lbrv(vcpu)) {
+ svm_copy_lbrs(&vmcb01->save, &vmcb02->save);
+ vmcb_mark_dirty(vmcb01, VMCB_LBR);
+ }
svm_update_lbrv(vcpu);
@@ -1288,22 +1398,16 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
svm->vcpu.arch.dr7 = DR7_FIXED_1;
kvm_update_dr7(&svm->vcpu);
- trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code,
- vmcb12->control.exit_info_1,
- vmcb12->control.exit_info_2,
- vmcb12->control.exit_int_info,
- vmcb12->control.exit_int_info_err,
- KVM_ISA_SVM);
-
- kvm_vcpu_unmap(vcpu, &map);
-
nested_svm_transition_tlb_flush(vcpu);
nested_svm_uninit_mmu_context(vcpu);
- rc = nested_svm_load_cr3(vcpu, vmcb01->save.cr3, false, true);
- if (rc)
- return 1;
+ if (nested_svm_load_cr3(vcpu, vmcb01->save.cr3, false, true))
+ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+
+ /* Drop tracking for L1->L2 injected NMIs and soft IRQs */
+ svm->nmi_l1_to_l2 = false;
+ svm->soft_int_injected = false;
/*
* Drop what we picked up for L2 via svm_complete_interrupts() so it
@@ -1328,8 +1432,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
*/
if (kvm_apicv_activated(vcpu->kvm))
__kvm_vcpu_update_apicv(vcpu);
-
- return 0;
}
static void nested_svm_triple_fault(struct kvm_vcpu *vcpu)
@@ -1399,7 +1501,7 @@ void svm_leave_nested(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
if (is_guest_mode(vcpu)) {
- svm->nested.nested_run_pending = 0;
+ vcpu->arch.nested_run_pending = 0;
svm->nested.vmcb12_gpa = INVALID_GPA;
leave_guest_mode(vcpu);
@@ -1584,7 +1686,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu)
* previously injected event, the pending exception occurred while said
* event was being delivered and thus needs to be handled.
*/
- bool block_nested_exceptions = svm->nested.nested_run_pending;
+ bool block_nested_exceptions = vcpu->arch.nested_run_pending;
/*
* New events (not exceptions) are only recognized at instruction
* boundaries. If an event needs reinjection, then KVM is handling a
@@ -1674,9 +1776,7 @@ int nested_svm_exit_special(struct vcpu_svm *svm)
}
case SVM_EXIT_VMMCALL:
/* Hyper-V L2 TLB flush hypercall is handled by L0 */
- if (guest_hv_cpuid_has_l2_tlb_flush(vcpu) &&
- nested_svm_l2_tlb_flush_enabled(vcpu) &&
- kvm_hv_is_tlb_flush_hcall(vcpu))
+ if (nested_svm_is_l2_tlb_flush_hcall(vcpu))
return NESTED_EXIT_HOST;
break;
default:
@@ -1721,12 +1821,12 @@ static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst,
dst->exit_info_2 = from->exit_info_2;
dst->exit_int_info = from->exit_int_info;
dst->exit_int_info_err = from->exit_int_info_err;
- dst->nested_ctl = from->nested_ctl;
+ dst->misc_ctl = from->misc_ctl;
dst->event_inj = from->event_inj;
dst->event_inj_err = from->event_inj_err;
dst->next_rip = from->next_rip;
- dst->nested_cr3 = from->nested_cr3;
- dst->virt_ext = from->virt_ext;
+ dst->nested_cr3 = from->nested_cr3;
+ dst->misc_ctl2 = from->misc_ctl2;
dst->pause_filter_count = from->pause_filter_count;
dst->pause_filter_thresh = from->pause_filter_thresh;
/* 'clean' and 'hv_enlightenments' are not changed by KVM */
@@ -1761,7 +1861,7 @@ static int svm_get_nested_state(struct kvm_vcpu *vcpu,
kvm_state.size += KVM_STATE_NESTED_SVM_VMCB_SIZE;
kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
- if (svm->nested.nested_run_pending)
+ if (vcpu->arch.nested_run_pending)
kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
}
@@ -1861,12 +1961,12 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
ret = -EINVAL;
__nested_copy_vmcb_control_to_cache(vcpu, &ctl_cached, ctl);
- if (!__nested_vmcb_check_controls(vcpu, &ctl_cached))
+ if (!nested_vmcb_check_controls(vcpu, &ctl_cached))
goto out_free;
/*
* Processor state contains L2 state. Check that it is
- * valid for guest mode (see nested_vmcb_check_save).
+ * valid for guest mode (see nested_vmcb_check_save()).
*/
cr0 = kvm_read_cr0(vcpu);
if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW))
@@ -1880,7 +1980,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
if (!(save->cr0 & X86_CR0_PG) ||
!(save->cr0 & X86_CR0_PE) ||
(save->rflags & X86_EFLAGS_VM) ||
- !__nested_vmcb_check_save(vcpu, &save_cached))
+ !nested_vmcb_check_save(vcpu, &save_cached))
goto out_free;
@@ -1898,8 +1998,10 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
- svm->nested.nested_run_pending =
- !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
+ if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING)
+ vcpu->arch.nested_run_pending = KVM_NESTED_RUN_PENDING_UNTRUSTED;
+ else
+ vcpu->arch.nested_run_pending = 0;
svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
@@ -1907,7 +2009,13 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
nested_copy_vmcb_control_to_cache(svm, ctl);
svm_switch_vmcb(svm, &svm->nested.vmcb02);
- nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip, svm->vmcb->save.cs.base);
+ nested_vmcb02_prepare_control(svm);
+
+ /*
+ * Any previously restored state (e.g. KVM_SET_SREGS) would mark fields
+ * dirty in vmcb01 instead of vmcb02, so mark all of vmcb02 dirty here.
+ */
+ vmcb_mark_all_dirty(svm->vmcb);
/*
* While the nested guest CR3 is already checked and set by
@@ -1922,6 +2030,9 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
svm->nested.force_msr_bitmap_recalc = true;
+ if (kvm_vcpu_apicv_active(vcpu))
+ kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
+
kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
ret = 0;
out_free:
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 3f9c1aa39a0a..c2126b3c3072 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -52,18 +52,18 @@
#define SNP_GUEST_VMM_ERR_GENERIC (~0U)
/* enable/disable SEV support */
-static bool sev_enabled = true;
+static bool __ro_after_init sev_enabled = true;
module_param_named(sev, sev_enabled, bool, 0444);
/* enable/disable SEV-ES support */
-static bool sev_es_enabled = true;
+static bool __ro_after_init sev_es_enabled = true;
module_param_named(sev_es, sev_es_enabled, bool, 0444);
/* enable/disable SEV-SNP support */
-static bool sev_snp_enabled = true;
+static bool __ro_after_init sev_snp_enabled = true;
module_param_named(sev_snp, sev_snp_enabled, bool, 0444);
-static unsigned int nr_ciphertext_hiding_asids;
+static unsigned int __ro_after_init nr_ciphertext_hiding_asids;
module_param_named(ciphertext_hiding_asids, nr_ciphertext_hiding_asids, uint, 0444);
#define AP_RESET_HOLD_NONE 0
@@ -107,6 +107,45 @@ static unsigned int nr_asids;
static unsigned long *sev_asid_bitmap;
static unsigned long *sev_reclaim_asid_bitmap;
+static __always_inline void kvm_lockdep_assert_sev_lock_held(struct kvm *kvm)
+{
+#ifdef CONFIG_PROVE_LOCKING
+ /*
+ * Querying SEV+ support is safe if there are no other references, i.e.
+ * if concurrent initialization of SEV+ is impossible.
+ */
+ if (!refcount_read(&kvm->users_count))
+ return;
+
+ /*
+ * Querying SEV+ support from vCPU context is always safe, as vCPUs can
+ * only be created after SEV+ is initialized (and KVM disallows all SEV
+ * sub-ioctls while vCPU creation is in-progress).
+ */
+ if (kvm_get_running_vcpu())
+ return;
+
+ lockdep_assert_held(&kvm->lock);
+#endif
+}
+
+static bool sev_guest(struct kvm *kvm)
+{
+ kvm_lockdep_assert_sev_lock_held(kvm);
+ return ____sev_guest(kvm);
+}
+static bool sev_es_guest(struct kvm *kvm)
+{
+ kvm_lockdep_assert_sev_lock_held(kvm);
+ return ____sev_es_guest(kvm);
+}
+
+static bool sev_snp_guest(struct kvm *kvm)
+{
+ kvm_lockdep_assert_sev_lock_held(kvm);
+ return ____sev_snp_guest(kvm);
+}
+
static int snp_decommission_context(struct kvm *kvm);
struct enc_region {
@@ -198,6 +237,28 @@ static void sev_misc_cg_uncharge(struct kvm_sev_info *sev)
misc_cg_uncharge(type, sev->misc_cg, 1);
}
+static unsigned int sev_alloc_asid(unsigned int min_asid, unsigned int max_asid)
+{
+ unsigned int asid;
+ bool retry = true;
+
+ guard(mutex)(&sev_bitmap_lock);
+
+again:
+ asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid);
+ if (asid > max_asid) {
+ if (retry && __sev_recycle_asids(min_asid, max_asid)) {
+ retry = false;
+ goto again;
+ }
+
+ return asid;
+ }
+
+ __set_bit(asid, sev_asid_bitmap);
+ return asid;
+}
+
static int sev_asid_new(struct kvm_sev_info *sev, unsigned long vm_type)
{
/*
@@ -205,7 +266,6 @@ static int sev_asid_new(struct kvm_sev_info *sev, unsigned long vm_type)
* SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
*/
unsigned int min_asid, max_asid, asid;
- bool retry = true;
int ret;
if (vm_type == KVM_X86_SNP_VM) {
@@ -229,37 +289,24 @@ static int sev_asid_new(struct kvm_sev_info *sev, unsigned long vm_type)
if (min_asid > max_asid)
return -ENOTTY;
- WARN_ON(sev->misc_cg);
+ WARN_ON_ONCE(sev->misc_cg);
sev->misc_cg = get_current_misc_cg();
ret = sev_misc_cg_try_charge(sev);
- if (ret) {
- put_misc_cg(sev->misc_cg);
- sev->misc_cg = NULL;
- return ret;
- }
-
- mutex_lock(&sev_bitmap_lock);
+ if (ret)
+ goto e_put_cg;
-again:
- asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid);
+ asid = sev_alloc_asid(min_asid, max_asid);
if (asid > max_asid) {
- if (retry && __sev_recycle_asids(min_asid, max_asid)) {
- retry = false;
- goto again;
- }
- mutex_unlock(&sev_bitmap_lock);
ret = -EBUSY;
goto e_uncharge;
}
- __set_bit(asid, sev_asid_bitmap);
-
- mutex_unlock(&sev_bitmap_lock);
-
sev->asid = asid;
return 0;
+
e_uncharge:
sev_misc_cg_uncharge(sev);
+e_put_cg:
put_misc_cg(sev->misc_cg);
sev->misc_cg = NULL;
return ret;
@@ -678,40 +725,42 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
unsigned int flags)
{
struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
- unsigned long npages, size;
- int npinned;
- unsigned long locked, lock_limit;
+ unsigned long npages, total_npages, lock_limit;
struct page **pages;
- unsigned long first, last;
- int ret;
+ int npinned, ret;
lockdep_assert_held(&kvm->lock);
if (ulen == 0 || uaddr + ulen < uaddr)
return ERR_PTR(-EINVAL);
- /* Calculate number of pages. */
- first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
- last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT;
- npages = (last - first + 1);
+ /*
+ * Calculate the number of pages that need to be pinned to cover the
+ * entire range. Note! This isn't simply PFN_DOWN(ulen), as KVM
+ * doesn't require the incoming address+size to be page aligned!
+ */
+ npages = PFN_DOWN(uaddr + ulen - 1) - PFN_DOWN(uaddr) + 1;
+ if (npages > INT_MAX)
+ return ERR_PTR(-EINVAL);
+
+ total_npages = sev->pages_locked + npages;
+ if (total_npages > totalram_pages())
+ return ERR_PTR(-EINVAL);
- locked = sev->pages_locked + npages;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
- pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
+ if (total_npages > lock_limit && !capable(CAP_IPC_LOCK)) {
+ pr_err("SEV: %lu total pages would exceed the lock limit of %lu.\n",
+ total_npages, lock_limit);
return ERR_PTR(-ENOMEM);
}
- if (WARN_ON_ONCE(npages > INT_MAX))
- return ERR_PTR(-EINVAL);
-
- /* Avoid using vmalloc for smaller buffers. */
- size = npages * sizeof(struct page *);
- if (size > PAGE_SIZE)
- pages = __vmalloc(size, GFP_KERNEL_ACCOUNT);
- else
- pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
-
+ /*
+ * Don't WARN if the kernel (rightly) thinks the total size is absurd,
+ * i.e. rely on the kernel to reject outrageous range sizes. The above
+ * check on the number of pages is purely to avoid truncation as
+ * pin_user_pages_fast() takes the number of pages as a 32-bit int.
+ */
+ pages = kvzalloc_objs(*pages, npages, GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (!pages)
return ERR_PTR(-ENOMEM);
@@ -724,7 +773,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
}
*n = npages;
- sev->pages_locked = locked;
+ sev->pages_locked = total_npages;
return pages;
@@ -882,6 +931,11 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
u8 *d;
int i;
+ lockdep_assert_held(&vcpu->mutex);
+
+ if (vcpu->arch.guest_state_protected)
+ return -EINVAL;
+
/* Check some debug related fields before encrypting the VMSA */
if (svm->vcpu.guest_debug || (svm->vmcb->save.dr7 & ~DR7_FIXED_1))
return -EINVAL;
@@ -1027,19 +1081,21 @@ static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (!sev_es_guest(kvm))
return -ENOTTY;
- kvm_for_each_vcpu(i, vcpu, kvm) {
- ret = mutex_lock_killable(&vcpu->mutex);
- if (ret)
- return ret;
+ if (kvm_is_vcpu_creation_in_progress(kvm))
+ return -EBUSY;
- ret = __sev_launch_update_vmsa(kvm, vcpu, &argp->error);
+ ret = kvm_lock_all_vcpus(kvm);
+ if (ret)
+ return ret;
- mutex_unlock(&vcpu->mutex);
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ ret = __sev_launch_update_vmsa(kvm, vcpu, &argp->error);
if (ret)
- return ret;
+ break;
}
- return 0;
+ kvm_unlock_all_vcpus(kvm);
+ return ret;
}
static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
@@ -2047,8 +2103,8 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src)
struct kvm_vcpu *src_vcpu;
unsigned long i;
- if (src->created_vcpus != atomic_read(&src->online_vcpus) ||
- dst->created_vcpus != atomic_read(&dst->online_vcpus))
+ if (kvm_is_vcpu_creation_in_progress(src) ||
+ kvm_is_vcpu_creation_in_progress(dst))
return -EBUSY;
if (!sev_es_guest(src))
@@ -2359,7 +2415,6 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
struct kvm_memory_slot *memslot;
long npages, count;
void __user *src;
- int ret = 0;
if (!sev_snp_guest(kvm) || !sev->snp_context)
return -EINVAL;
@@ -2404,13 +2459,11 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
* initial expected state and better guard against unexpected
* situations.
*/
- mutex_lock(&kvm->slots_lock);
+ guard(mutex)(&kvm->slots_lock);
memslot = gfn_to_memslot(kvm, params.gfn_start);
- if (!kvm_slot_has_gmem(memslot)) {
- ret = -EINVAL;
- goto out;
- }
+ if (!kvm_slot_has_gmem(memslot))
+ return -EINVAL;
sev_populate_args.sev_fd = argp->sev_fd;
sev_populate_args.type = params.type;
@@ -2421,22 +2474,18 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
argp->error = sev_populate_args.fw_error;
pr_debug("%s: kvm_gmem_populate failed, ret %ld (fw_error %d)\n",
__func__, count, argp->error);
- ret = -EIO;
- } else {
- params.gfn_start += count;
- params.len -= count * PAGE_SIZE;
- if (params.type != KVM_SEV_SNP_PAGE_TYPE_ZERO)
- params.uaddr += count * PAGE_SIZE;
-
- ret = 0;
- if (copy_to_user(u64_to_user_ptr(argp->data), &params, sizeof(params)))
- ret = -EFAULT;
+ return -EIO;
}
-out:
- mutex_unlock(&kvm->slots_lock);
+ params.gfn_start += count;
+ params.len -= count * PAGE_SIZE;
+ if (params.type != KVM_SEV_SNP_PAGE_TYPE_ZERO)
+ params.uaddr += count * PAGE_SIZE;
- return ret;
+ if (copy_to_user(u64_to_user_ptr(argp->data), &params, sizeof(params)))
+ return -EFAULT;
+
+ return 0;
}
static int snp_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
@@ -2447,6 +2496,13 @@ static int snp_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
unsigned long i;
int ret;
+ if (kvm_is_vcpu_creation_in_progress(kvm))
+ return -EBUSY;
+
+ ret = kvm_lock_all_vcpus(kvm);
+ if (ret)
+ return ret;
+
data.gctx_paddr = __psp_pa(sev->snp_context);
data.page_type = SNP_PAGE_TYPE_VMSA;
@@ -2456,12 +2512,12 @@ static int snp_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
ret = sev_es_sync_vmsa(svm);
if (ret)
- return ret;
+ goto out;
/* Transition the VMSA page to a firmware state. */
ret = rmp_make_private(pfn, INITIAL_VMSA_GPA, PG_LEVEL_4K, sev->asid, true);
if (ret)
- return ret;
+ goto out;
/* Issue the SNP command to encrypt the VMSA */
data.address = __sme_pa(svm->sev_es.vmsa);
@@ -2470,7 +2526,7 @@ static int snp_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (ret) {
snp_page_reclaim(kvm, pfn);
- return ret;
+ goto out;
}
svm->vcpu.arch.guest_state_protected = true;
@@ -2484,7 +2540,9 @@ static int snp_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
svm_enable_lbrv(vcpu);
}
- return 0;
+out:
+ kvm_unlock_all_vcpus(kvm);
+ return ret;
}
static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
@@ -2587,30 +2645,24 @@ int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
return -EFAULT;
- mutex_lock(&kvm->lock);
+ guard(mutex)(&kvm->lock);
/* Only the enc_context_owner handles some memory enc operations. */
if (is_mirroring_enc_context(kvm) &&
- !is_cmd_allowed_from_mirror(sev_cmd.id)) {
- r = -EINVAL;
- goto out;
- }
+ !is_cmd_allowed_from_mirror(sev_cmd.id))
+ return -EINVAL;
/*
* Once KVM_SEV_INIT2 initializes a KVM instance as an SNP guest, only
* allow the use of SNP-specific commands.
*/
- if (sev_snp_guest(kvm) && sev_cmd.id < KVM_SEV_SNP_LAUNCH_START) {
- r = -EPERM;
- goto out;
- }
+ if (sev_snp_guest(kvm) && sev_cmd.id < KVM_SEV_SNP_LAUNCH_START)
+ return -EPERM;
switch (sev_cmd.id) {
case KVM_SEV_ES_INIT:
- if (!sev_es_enabled) {
- r = -ENOTTY;
- goto out;
- }
+ if (!sev_es_enabled)
+ return -ENOTTY;
fallthrough;
case KVM_SEV_INIT:
r = sev_guest_init(kvm, &sev_cmd);
@@ -2682,15 +2734,12 @@ int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
r = snp_enable_certs(kvm);
break;
default:
- r = -EINVAL;
- goto out;
+ return -EINVAL;
}
if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd)))
r = -EFAULT;
-out:
- mutex_unlock(&kvm->lock);
return r;
}
@@ -2701,6 +2750,8 @@ int sev_mem_enc_register_region(struct kvm *kvm,
struct enc_region *region;
int ret = 0;
+ guard(mutex)(&kvm->lock);
+
if (!sev_guest(kvm))
return -ENOTTY;
@@ -2708,19 +2759,14 @@ int sev_mem_enc_register_region(struct kvm *kvm,
if (is_mirroring_enc_context(kvm))
return -EINVAL;
- if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
- return -EINVAL;
-
region = kzalloc_obj(*region, GFP_KERNEL_ACCOUNT);
if (!region)
return -ENOMEM;
- mutex_lock(&kvm->lock);
region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages,
FOLL_WRITE | FOLL_LONGTERM);
if (IS_ERR(region->pages)) {
ret = PTR_ERR(region->pages);
- mutex_unlock(&kvm->lock);
goto e_free;
}
@@ -2738,8 +2784,6 @@ int sev_mem_enc_register_region(struct kvm *kvm,
region->size = range->size;
list_add_tail(&region->list, &sev->regions_list);
- mutex_unlock(&kvm->lock);
-
return ret;
e_free:
@@ -2775,35 +2819,25 @@ int sev_mem_enc_unregister_region(struct kvm *kvm,
struct kvm_enc_region *range)
{
struct enc_region *region;
- int ret;
/* If kvm is mirroring encryption context it isn't responsible for it */
if (is_mirroring_enc_context(kvm))
return -EINVAL;
- mutex_lock(&kvm->lock);
+ guard(mutex)(&kvm->lock);
- if (!sev_guest(kvm)) {
- ret = -ENOTTY;
- goto failed;
- }
+ if (!sev_guest(kvm))
+ return -ENOTTY;
region = find_enc_region(kvm, range);
- if (!region) {
- ret = -EINVAL;
- goto failed;
- }
+ if (!region)
+ return -EINVAL;
sev_writeback_caches(kvm);
__unregister_enc_region_locked(kvm, region);
- mutex_unlock(&kvm->lock);
return 0;
-
-failed:
- mutex_unlock(&kvm->lock);
- return ret;
}
int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd)
@@ -2898,6 +2932,28 @@ static int snp_decommission_context(struct kvm *kvm)
return 0;
}
+void sev_vm_init(struct kvm *kvm)
+{
+ switch (kvm->arch.vm_type) {
+ case KVM_X86_DEFAULT_VM:
+ case KVM_X86_SW_PROTECTED_VM:
+ break;
+ case KVM_X86_SNP_VM:
+ kvm->arch.has_private_mem = true;
+ fallthrough;
+ case KVM_X86_SEV_ES_VM:
+ kvm->arch.has_protected_state = true;
+ fallthrough;
+ case KVM_X86_SEV_VM:
+ kvm->arch.pre_fault_allowed = !kvm->arch.has_private_mem;
+ to_kvm_sev_info(kvm)->need_init = true;
+ break;
+ default:
+ WARN_ONCE(1, "Unsupported VM type %u", kvm->arch.vm_type);
+ break;
+ }
+}
+
void sev_vm_destroy(struct kvm *kvm)
{
struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
@@ -3244,8 +3300,14 @@ void sev_guest_memory_reclaimed(struct kvm *kvm)
* With SNP+gmem, private/encrypted memory is unreachable via the
* hva-based mmu notifiers, i.e. these events are explicitly scoped to
* shared pages, where there's no need to flush caches.
+ *
+ * Checking for SEV+ outside of kvm->lock is safe as __sev_guest_init()
+ * can only be done before vCPUs are created, caches can be incoherent
+ * if and only if a vCPU was run, and either this task will see the VM
+ * as being SEV+ or the vCPU won't be to access the memory (because of
+ * the in-progress invalidation).
*/
- if (!sev_guest(kvm) || sev_snp_guest(kvm))
+ if (!____sev_guest(kvm) || ____sev_snp_guest(kvm))
return;
sev_writeback_caches(kvm);
@@ -3255,7 +3317,7 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm;
- if (!sev_es_guest(vcpu->kvm))
+ if (!is_sev_es_guest(vcpu))
return;
svm = to_svm(vcpu);
@@ -3265,7 +3327,7 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
* a guest-owned page. Transition the page to hypervisor state before
* releasing it back to the system.
*/
- if (sev_snp_guest(vcpu->kvm)) {
+ if (is_sev_snp_guest(vcpu)) {
u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT;
if (kvm_rmp_make_shared(vcpu->kvm, pfn, PG_LEVEL_4K))
@@ -3466,7 +3528,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
goto vmgexit_err;
break;
case SVM_VMGEXIT_AP_CREATION:
- if (!sev_snp_guest(vcpu->kvm))
+ if (!is_sev_snp_guest(vcpu))
goto vmgexit_err;
if (lower_32_bits(control->exit_info_1) != SVM_VMGEXIT_AP_DESTROY)
if (!kvm_ghcb_rax_is_valid(svm))
@@ -3480,12 +3542,12 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
case SVM_VMGEXIT_TERM_REQUEST:
break;
case SVM_VMGEXIT_PSC:
- if (!sev_snp_guest(vcpu->kvm) || !kvm_ghcb_sw_scratch_is_valid(svm))
+ if (!is_sev_snp_guest(vcpu) || !kvm_ghcb_sw_scratch_is_valid(svm))
goto vmgexit_err;
break;
case SVM_VMGEXIT_GUEST_REQUEST:
case SVM_VMGEXIT_EXT_GUEST_REQUEST:
- if (!sev_snp_guest(vcpu->kvm) ||
+ if (!is_sev_snp_guest(vcpu) ||
!PAGE_ALIGNED(control->exit_info_1) ||
!PAGE_ALIGNED(control->exit_info_2) ||
control->exit_info_1 == control->exit_info_2)
@@ -3559,7 +3621,8 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
int pre_sev_run(struct vcpu_svm *svm, int cpu)
{
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
- struct kvm *kvm = svm->vcpu.kvm;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct kvm *kvm = vcpu->kvm;
unsigned int asid = sev_get_asid(kvm);
/*
@@ -3567,7 +3630,7 @@ int pre_sev_run(struct vcpu_svm *svm, int cpu)
* VMSA, e.g. if userspace forces the vCPU to be RUNNABLE after an SNP
* AP Destroy event.
*/
- if (sev_es_guest(kvm) && !VALID_PAGE(svm->vmcb->control.vmsa_pa))
+ if (is_sev_es_guest(vcpu) && !VALID_PAGE(svm->vmcb->control.vmsa_pa))
return -EINVAL;
/*
@@ -4113,15 +4176,13 @@ static int snp_handle_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_
sev_ret_code fw_err = 0;
int ret;
- if (!sev_snp_guest(kvm))
+ if (!is_sev_snp_guest(&svm->vcpu))
return -EINVAL;
- mutex_lock(&sev->guest_req_mutex);
+ guard(mutex)(&sev->guest_req_mutex);
- if (kvm_read_guest(kvm, req_gpa, sev->guest_req_buf, PAGE_SIZE)) {
- ret = -EIO;
- goto out_unlock;
- }
+ if (kvm_read_guest(kvm, req_gpa, sev->guest_req_buf, PAGE_SIZE))
+ return -EIO;
data.gctx_paddr = __psp_pa(sev->snp_context);
data.req_paddr = __psp_pa(sev->guest_req_buf);
@@ -4134,21 +4195,16 @@ static int snp_handle_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_
*/
ret = sev_issue_cmd(kvm, SEV_CMD_SNP_GUEST_REQUEST, &data, &fw_err);
if (ret && !fw_err)
- goto out_unlock;
+ return ret;
- if (kvm_write_guest(kvm, resp_gpa, sev->guest_resp_buf, PAGE_SIZE)) {
- ret = -EIO;
- goto out_unlock;
- }
+ if (kvm_write_guest(kvm, resp_gpa, sev->guest_resp_buf, PAGE_SIZE))
+ return -EIO;
/* No action is requested *from KVM* if there was a firmware error. */
svm_vmgexit_no_action(svm, SNP_GUEST_ERR(0, fw_err));
- ret = 1; /* resume guest */
-
-out_unlock:
- mutex_unlock(&sev->guest_req_mutex);
- return ret;
+ /* resume guest */
+ return 1;
}
static int snp_req_certs_err(struct vcpu_svm *svm, u32 vmm_error)
@@ -4183,10 +4239,12 @@ static int snp_complete_req_certs(struct kvm_vcpu *vcpu)
static int snp_handle_ext_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa)
{
- struct kvm *kvm = svm->vcpu.kvm;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct kvm *kvm = vcpu->kvm;
+
u8 msg_type;
- if (!sev_snp_guest(kvm))
+ if (!is_sev_snp_guest(vcpu))
return -EINVAL;
if (kvm_read_guest(kvm, req_gpa + offsetof(struct snp_guest_msg_hdr, msg_type),
@@ -4205,7 +4263,6 @@ static int snp_handle_ext_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t r
*/
if (msg_type == SNP_MSG_REPORT_REQ) {
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
- struct kvm_vcpu *vcpu = &svm->vcpu;
u64 data_npages;
gpa_t data_gpa;
@@ -4322,7 +4379,7 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
GHCB_MSR_INFO_MASK, GHCB_MSR_INFO_POS);
break;
case GHCB_MSR_PREF_GPA_REQ:
- if (!sev_snp_guest(vcpu->kvm))
+ if (!is_sev_snp_guest(vcpu))
goto out_terminate;
set_ghcb_msr_bits(svm, GHCB_MSR_PREF_GPA_NONE, GHCB_MSR_GPA_VALUE_MASK,
@@ -4333,7 +4390,7 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
case GHCB_MSR_REG_GPA_REQ: {
u64 gfn;
- if (!sev_snp_guest(vcpu->kvm))
+ if (!is_sev_snp_guest(vcpu))
goto out_terminate;
gfn = get_ghcb_msr_bits(svm, GHCB_MSR_GPA_VALUE_MASK,
@@ -4348,7 +4405,7 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
break;
}
case GHCB_MSR_PSC_REQ:
- if (!sev_snp_guest(vcpu->kvm))
+ if (!is_sev_snp_guest(vcpu))
goto out_terminate;
ret = snp_begin_psc_msr(svm, control->ghcb_gpa);
@@ -4421,7 +4478,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
sev_es_sync_from_ghcb(svm);
/* SEV-SNP guest requires that the GHCB GPA must be registered */
- if (sev_snp_guest(svm->vcpu.kvm) && !ghcb_gpa_is_registered(svm, ghcb_gpa)) {
+ if (is_sev_snp_guest(vcpu) && !ghcb_gpa_is_registered(svm, ghcb_gpa)) {
vcpu_unimpl(&svm->vcpu, "vmgexit: GHCB GPA [%#llx] is not registered.\n", ghcb_gpa);
return -EINVAL;
}
@@ -4434,25 +4491,17 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
switch (control->exit_code) {
case SVM_VMGEXIT_MMIO_READ:
- ret = setup_vmgexit_scratch(svm, true, control->exit_info_2);
- if (ret)
- break;
+ case SVM_VMGEXIT_MMIO_WRITE: {
+ bool is_write = control->exit_code == SVM_VMGEXIT_MMIO_WRITE;
- ret = kvm_sev_es_mmio_read(vcpu,
- control->exit_info_1,
- control->exit_info_2,
- svm->sev_es.ghcb_sa);
- break;
- case SVM_VMGEXIT_MMIO_WRITE:
- ret = setup_vmgexit_scratch(svm, false, control->exit_info_2);
+ ret = setup_vmgexit_scratch(svm, !is_write, control->exit_info_2);
if (ret)
break;
- ret = kvm_sev_es_mmio_write(vcpu,
- control->exit_info_1,
- control->exit_info_2,
- svm->sev_es.ghcb_sa);
+ ret = kvm_sev_es_mmio(vcpu, is_write, control->exit_info_1,
+ control->exit_info_2, svm->sev_es.ghcb_sa);
break;
+ }
case SVM_VMGEXIT_NMI_COMPLETE:
++vcpu->stat.nmi_window_exits;
svm->nmi_masked = false;
@@ -4599,7 +4648,7 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm, bool init_event)
struct kvm_sev_info *sev = to_kvm_sev_info(svm->vcpu.kvm);
struct vmcb *vmcb = svm->vmcb01.ptr;
- svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
+ svm->vmcb->control.misc_ctl |= SVM_MISC_ENABLE_SEV_ES;
/*
* An SEV-ES guest requires a VMSA area that is a separate from the
@@ -4639,7 +4688,7 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm, bool init_event)
if (!sev_vcpu_has_debug_swap(svm)) {
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
- recalc_intercepts(svm);
+ svm_mark_intercepts_dirty(svm);
} else {
/*
* Disable #DB intercept iff DebugSwap is enabled. KVM doesn't
@@ -4670,7 +4719,7 @@ void sev_init_vmcb(struct vcpu_svm *svm, bool init_event)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
- svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
+ svm->vmcb->control.misc_ctl |= SVM_MISC_ENABLE_SEV;
clr_exception_intercept(svm, UD_VECTOR);
/*
@@ -4679,10 +4728,10 @@ void sev_init_vmcb(struct vcpu_svm *svm, bool init_event)
*/
clr_exception_intercept(svm, GP_VECTOR);
- if (init_event && sev_snp_guest(vcpu->kvm))
+ if (init_event && is_sev_snp_guest(vcpu))
sev_snp_init_protected_guest_state(vcpu);
- if (sev_es_guest(vcpu->kvm))
+ if (is_sev_es_guest(vcpu))
sev_es_init_vmcb(svm, init_event);
}
@@ -4693,7 +4742,7 @@ int sev_vcpu_create(struct kvm_vcpu *vcpu)
mutex_init(&svm->sev_es.snp_vmsa_mutex);
- if (!sev_es_guest(vcpu->kvm))
+ if (!is_sev_es_guest(vcpu))
return 0;
/*
@@ -4713,8 +4762,6 @@ int sev_vcpu_create(struct kvm_vcpu *vcpu)
void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa)
{
- struct kvm *kvm = svm->vcpu.kvm;
-
/*
* All host state for SEV-ES guests is categorized into three swap types
* based on how it is handled by hardware during a world switch:
@@ -4753,7 +4800,8 @@ void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_are
* loaded with the correct values *if* the CPU writes the MSRs.
*/
if (sev_vcpu_has_debug_swap(svm) ||
- (sev_snp_guest(kvm) && cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP))) {
+ (cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP) &&
+ is_sev_snp_guest(&svm->vcpu))) {
hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0);
hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1);
hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2);
@@ -5117,7 +5165,7 @@ struct vmcb_save_area *sev_decrypt_vmsa(struct kvm_vcpu *vcpu)
int error = 0;
int ret;
- if (!sev_es_guest(vcpu->kvm))
+ if (!is_sev_es_guest(vcpu))
return NULL;
/*
@@ -5130,7 +5178,7 @@ struct vmcb_save_area *sev_decrypt_vmsa(struct kvm_vcpu *vcpu)
sev = to_kvm_sev_info(vcpu->kvm);
/* Check if the SEV policy allows debugging */
- if (sev_snp_guest(vcpu->kvm)) {
+ if (is_sev_snp_guest(vcpu)) {
if (!(sev->policy & SNP_POLICY_MASK_DEBUG))
return NULL;
} else {
@@ -5138,7 +5186,7 @@ struct vmcb_save_area *sev_decrypt_vmsa(struct kvm_vcpu *vcpu)
return NULL;
}
- if (sev_snp_guest(vcpu->kvm)) {
+ if (is_sev_snp_guest(vcpu)) {
struct sev_data_snp_dbg dbg = {0};
vmsa = snp_alloc_firmware_page(__GFP_ZERO);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 8f8bc863e214..e7fdd7a9c280 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -44,6 +44,7 @@
#include <asm/traps.h>
#include <asm/reboot.h>
#include <asm/fpu/api.h>
+#include <asm/virt.h>
#include <trace/events/ipi.h>
@@ -52,6 +53,7 @@
#include "svm.h"
#include "svm_ops.h"
+#include "hyperv.h"
#include "kvm_onhyperv.h"
#include "svm_onhyperv.h"
@@ -77,6 +79,7 @@ static bool erratum_383_found __read_mostly;
* are published and we know what the new status bits are
*/
static uint64_t osvw_len = 4, osvw_status;
+static DEFINE_SPINLOCK(osvw_lock);
static DEFINE_PER_CPU(u64, current_tsc_ratio);
@@ -110,52 +113,52 @@ static DEFINE_PER_CPU(u64, current_tsc_ratio);
* count only mode.
*/
-static unsigned short pause_filter_thresh = KVM_DEFAULT_PLE_GAP;
+static unsigned short __ro_after_init pause_filter_thresh = KVM_DEFAULT_PLE_GAP;
module_param(pause_filter_thresh, ushort, 0444);
-static unsigned short pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW;
+static unsigned short __ro_after_init pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW;
module_param(pause_filter_count, ushort, 0444);
/* Default doubles per-vcpu window every exit. */
-static unsigned short pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
+static unsigned short __ro_after_init pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
module_param(pause_filter_count_grow, ushort, 0444);
/* Default resets per-vcpu window every exit to pause_filter_count. */
-static unsigned short pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
+static unsigned short __ro_after_init pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
module_param(pause_filter_count_shrink, ushort, 0444);
/* Default is to compute the maximum so we can never overflow. */
-static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX;
+static unsigned short __ro_after_init pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX;
module_param(pause_filter_count_max, ushort, 0444);
/*
* Use nested page tables by default. Note, NPT may get forced off by
* svm_hardware_setup() if it's unsupported by hardware or the host kernel.
*/
-bool npt_enabled = true;
+bool __ro_after_init npt_enabled = true;
module_param_named(npt, npt_enabled, bool, 0444);
/* allow nested virtualization in KVM/SVM */
-static int nested = true;
+static int __ro_after_init nested = true;
module_param(nested, int, 0444);
/* enable/disable Next RIP Save */
-int nrips = true;
+int __ro_after_init nrips = true;
module_param(nrips, int, 0444);
/* enable/disable Virtual VMLOAD VMSAVE */
-static int vls = true;
+static int __ro_after_init vls = true;
module_param(vls, int, 0444);
/* enable/disable Virtual GIF */
-int vgif = true;
+int __ro_after_init vgif = true;
module_param(vgif, int, 0444);
/* enable/disable LBR virtualization */
-int lbrv = true;
+int __ro_after_init lbrv = true;
module_param(lbrv, int, 0444);
-static int tsc_scaling = true;
+static int __ro_after_init tsc_scaling = true;
module_param(tsc_scaling, int, 0444);
module_param(enable_device_posted_irqs, bool, 0444);
@@ -164,19 +167,19 @@ bool __read_mostly dump_invalid_vmcb;
module_param(dump_invalid_vmcb, bool, 0644);
-bool intercept_smi = true;
+bool __ro_after_init intercept_smi = true;
module_param(intercept_smi, bool, 0444);
-bool vnmi = true;
+bool __ro_after_init vnmi = true;
module_param(vnmi, bool, 0444);
module_param(enable_mediated_pmu, bool, 0444);
-static bool svm_gp_erratum_intercept = true;
+static bool __ro_after_init svm_gp_erratum_intercept = true;
static u8 rsm_ins_bytes[] = "\x0f\xaa";
-static unsigned long iopm_base;
+static unsigned long __read_mostly iopm_base;
DEFINE_PER_CPU(struct svm_cpu_data, svm_data);
@@ -216,6 +219,19 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) {
if (!(efer & EFER_SVME)) {
+ /*
+ * Architecturally, clearing EFER.SVME while a guest is
+ * running yields undefined behavior, i.e. KVM can do
+ * literally anything. Force the vCPU back into L1 as
+ * that is the safest option for KVM, but synthesize a
+ * triple fault (for L1!) so that KVM at least doesn't
+ * run random L2 code in the context of L1. Do so if
+ * and only if the vCPU is actively running, e.g. to
+ * avoid positives if userspace is stuffing state.
+ */
+ if (is_guest_mode(vcpu) && vcpu->wants_to_run)
+ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+
svm_leave_nested(vcpu);
/* #GP intercept is still needed for vmware backdoor */
if (!enable_vmware_backdoor)
@@ -241,9 +257,11 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
* Never intercept #GP for SEV guests, KVM can't
* decrypt guest memory to workaround the erratum.
*/
- if (svm_gp_erratum_intercept && !sev_guest(vcpu->kvm))
+ if (svm_gp_erratum_intercept && !is_sev_guest(vcpu))
set_exception_intercept(svm, GP_VECTOR);
}
+
+ kvm_make_request(KVM_REQ_RECALC_INTERCEPTS, vcpu);
}
svm->vmcb->save.efer = efer | EFER_SVME;
@@ -283,7 +301,7 @@ static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
* SEV-ES does not expose the next RIP. The RIP update is controlled by
* the type of exit and the #VC handler in the guest.
*/
- if (sev_es_guest(vcpu->kvm))
+ if (is_sev_es_guest(vcpu))
goto done;
if (nrips && svm->vmcb->control.next_rip != 0) {
@@ -420,6 +438,48 @@ static void svm_init_osvw(struct kvm_vcpu *vcpu)
vcpu->arch.osvw.status |= 1;
}
+static void svm_init_os_visible_workarounds(void)
+{
+ u64 len, status;
+
+ /*
+ * Get OS-Visible Workarounds (OSVW) bits.
+ *
+ * Note that it is possible to have a system with mixed processor
+ * revisions and therefore different OSVW bits. If bits are not the same
+ * on different processors then choose the worst case (i.e. if erratum
+ * is present on one processor and not on another then assume that the
+ * erratum is present everywhere).
+ *
+ * Note #2! The OSVW MSRs are used to communciate that an erratum is
+ * NOT present! Software must assume erratum as present if its bit is
+ * set in OSVW_STATUS *or* the bit number exceeds OSVW_ID_LENGTH. If
+ * either RDMSR fails, simply zero out the length to treat all errata
+ * as being present. Similarly, use the *minimum* length across all
+ * CPUs, not the maximum length.
+ *
+ * If the length is zero, then is KVM already treating all errata as
+ * being present and there's nothing left to do.
+ */
+ if (!osvw_len)
+ return;
+
+ if (!this_cpu_has(X86_FEATURE_OSVW) ||
+ native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &len) ||
+ native_read_msr_safe(MSR_AMD64_OSVW_STATUS, &status))
+ len = status = 0;
+
+ if (status == READ_ONCE(osvw_status) && len >= READ_ONCE(osvw_len))
+ return;
+
+ guard(spinlock)(&osvw_lock);
+
+ if (len < osvw_len)
+ osvw_len = len;
+ osvw_status |= status;
+ osvw_status &= (1ULL << osvw_len) - 1;
+}
+
static bool __kvm_is_svm_supported(void)
{
int cpu = smp_processor_id();
@@ -477,27 +537,9 @@ static __always_inline struct sev_es_save_area *sev_es_host_save_area(struct svm
return &sd->save_area->host_sev_es_save;
}
-static inline void kvm_cpu_svm_disable(void)
-{
- uint64_t efer;
-
- wrmsrq(MSR_VM_HSAVE_PA, 0);
- rdmsrq(MSR_EFER, efer);
- if (efer & EFER_SVME) {
- /*
- * Force GIF=1 prior to disabling SVM, e.g. to ensure INIT and
- * NMI aren't blocked.
- */
- stgi();
- wrmsrq(MSR_EFER, efer & ~EFER_SVME);
- }
-}
-
static void svm_emergency_disable_virtualization_cpu(void)
{
- kvm_rebooting = true;
-
- kvm_cpu_svm_disable();
+ wrmsrq(MSR_VM_HSAVE_PA, 0);
}
static void svm_disable_virtualization_cpu(void)
@@ -506,7 +548,8 @@ static void svm_disable_virtualization_cpu(void)
if (tsc_scaling)
__svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT);
- kvm_cpu_svm_disable();
+ x86_virt_put_ref(X86_FEATURE_SVM);
+ wrmsrq(MSR_VM_HSAVE_PA, 0);
amd_pmu_disable_virt();
}
@@ -515,12 +558,12 @@ static int svm_enable_virtualization_cpu(void)
{
struct svm_cpu_data *sd;
- uint64_t efer;
int me = raw_smp_processor_id();
+ int r;
- rdmsrq(MSR_EFER, efer);
- if (efer & EFER_SVME)
- return -EBUSY;
+ r = x86_virt_get_ref(X86_FEATURE_SVM);
+ if (r)
+ return r;
sd = per_cpu_ptr(&svm_data, me);
sd->asid_generation = 1;
@@ -528,8 +571,6 @@ static int svm_enable_virtualization_cpu(void)
sd->next_asid = sd->max_asid + 1;
sd->min_asid = max_sev_asid + 1;
- wrmsrq(MSR_EFER, efer | EFER_SVME);
-
wrmsrq(MSR_VM_HSAVE_PA, sd->save_area_pa);
if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
@@ -540,34 +581,7 @@ static int svm_enable_virtualization_cpu(void)
__svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT);
}
-
- /*
- * Get OSVW bits.
- *
- * Note that it is possible to have a system with mixed processor
- * revisions and therefore different OSVW bits. If bits are not the same
- * on different processors then choose the worst case (i.e. if erratum
- * is present on one processor and not on another then assume that the
- * erratum is present everywhere).
- */
- if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
- u64 len, status = 0;
- int err;
-
- err = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &len);
- if (!err)
- err = native_read_msr_safe(MSR_AMD64_OSVW_STATUS, &status);
-
- if (err)
- osvw_status = osvw_len = 0;
- else {
- if (len < osvw_len)
- osvw_len = len;
- osvw_status |= status;
- osvw_status &= (1ULL << osvw_len) - 1;
- }
- } else
- osvw_status = osvw_len = 0;
+ svm_init_os_visible_workarounds();
svm_init_erratum_383();
@@ -635,7 +649,7 @@ static void set_dr_intercepts(struct vcpu_svm *svm)
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
- recalc_intercepts(svm);
+ svm_mark_intercepts_dirty(svm);
}
static void clr_dr_intercepts(struct vcpu_svm *svm)
@@ -644,7 +658,7 @@ static void clr_dr_intercepts(struct vcpu_svm *svm)
vmcb->control.intercepts[INTERCEPT_DR] = 0;
- recalc_intercepts(svm);
+ svm_mark_intercepts_dirty(svm);
}
static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
@@ -710,7 +724,7 @@ void *svm_alloc_permissions_map(unsigned long size, gfp_t gfp_mask)
static void svm_recalc_lbr_msr_intercepts(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- bool intercept = !(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK);
+ bool intercept = !(svm->vmcb->control.misc_ctl2 & SVM_MISC2_ENABLE_V_LBR);
if (intercept == svm->lbr_msrs_intercepted)
return;
@@ -720,7 +734,7 @@ static void svm_recalc_lbr_msr_intercepts(struct kvm_vcpu *vcpu)
svm_set_intercept_for_msr(vcpu, MSR_IA32_LASTINTFROMIP, MSR_TYPE_RW, intercept);
svm_set_intercept_for_msr(vcpu, MSR_IA32_LASTINTTOIP, MSR_TYPE_RW, intercept);
- if (sev_es_guest(vcpu->kvm))
+ if (is_sev_es_guest(vcpu))
svm_set_intercept_for_msr(vcpu, MSR_IA32_DEBUGCTLMSR, MSR_TYPE_RW, intercept);
svm->lbr_msrs_intercepted = intercept;
@@ -830,7 +844,7 @@ static void svm_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
svm_set_intercept_for_msr(vcpu, MSR_IA32_PL3_SSP, MSR_TYPE_RW, !shstk_enabled);
}
- if (sev_es_guest(vcpu->kvm))
+ if (is_sev_es_guest(vcpu))
sev_es_recalc_msr_intercepts(vcpu);
svm_recalc_pmu_msr_intercepts(vcpu);
@@ -841,20 +855,9 @@ static void svm_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
*/
}
-void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
-{
- to_vmcb->save.dbgctl = from_vmcb->save.dbgctl;
- to_vmcb->save.br_from = from_vmcb->save.br_from;
- to_vmcb->save.br_to = from_vmcb->save.br_to;
- to_vmcb->save.last_excp_from = from_vmcb->save.last_excp_from;
- to_vmcb->save.last_excp_to = from_vmcb->save.last_excp_to;
-
- vmcb_mark_dirty(to_vmcb, VMCB_LBR);
-}
-
static void __svm_enable_lbrv(struct kvm_vcpu *vcpu)
{
- to_svm(vcpu)->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
+ to_svm(vcpu)->vmcb->control.misc_ctl2 |= SVM_MISC2_ENABLE_V_LBR;
}
void svm_enable_lbrv(struct kvm_vcpu *vcpu)
@@ -865,17 +868,17 @@ void svm_enable_lbrv(struct kvm_vcpu *vcpu)
static void __svm_disable_lbrv(struct kvm_vcpu *vcpu)
{
- KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm);
- to_svm(vcpu)->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
+ KVM_BUG_ON(is_sev_es_guest(vcpu), vcpu->kvm);
+ to_svm(vcpu)->vmcb->control.misc_ctl2 &= ~SVM_MISC2_ENABLE_V_LBR;
}
void svm_update_lbrv(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
+ bool current_enable_lbrv = svm->vmcb->control.misc_ctl2 & SVM_MISC2_ENABLE_V_LBR;
bool enable_lbrv = (svm->vmcb->save.dbgctl & DEBUGCTLMSR_LBR) ||
(is_guest_mode(vcpu) && guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
- (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
+ (svm->nested.ctl.misc_ctl2 & SVM_MISC2_ENABLE_V_LBR));
if (enable_lbrv && !current_enable_lbrv)
__svm_enable_lbrv(vcpu);
@@ -1009,6 +1012,14 @@ void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu)
preempt_enable();
}
+static bool svm_has_pending_gif_event(struct vcpu_svm *svm)
+{
+ return svm->vcpu.arch.smi_pending ||
+ svm->vcpu.arch.nmi_pending ||
+ kvm_cpu_has_injectable_intr(&svm->vcpu) ||
+ kvm_apic_has_pending_init_or_sipi(&svm->vcpu);
+}
+
/* Evaluate instruction intercepts that depend on guest CPUID features. */
static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu)
{
@@ -1034,22 +1045,50 @@ static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu)
}
/*
- * No need to toggle VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK here, it is
- * always set if vls is enabled. If the intercepts are set, the bit is
- * meaningless anyway.
+ * Intercept instructions that #UD if EFER.SVME=0, as SVME must be set
+ * even when running the guest, i.e. hardware will only ever see
+ * EFER.SVME=1.
+ *
+ * No need to toggle any of the vgif/vls/etc. enable bits here, as they
+ * are set when the VMCB is initialized and never cleared (if the
+ * relevant intercepts are set, the enablements are meaningless anyway).
+ *
+ * FIXME: When #GP is not intercepted, a #GP on these instructions (e.g.
+ * due to CPL > 0) could be injected by hardware before the instruction
+ * is intercepted, leading to #GP taking precedence over #UD from the
+ * guest's perspective.
*/
- if (guest_cpuid_is_intel_compatible(vcpu)) {
+ if (!(vcpu->arch.efer & EFER_SVME)) {
svm_set_intercept(svm, INTERCEPT_VMLOAD);
svm_set_intercept(svm, INTERCEPT_VMSAVE);
+ svm_set_intercept(svm, INTERCEPT_CLGI);
+ svm_set_intercept(svm, INTERCEPT_STGI);
} else {
/*
* If hardware supports Virtual VMLOAD VMSAVE then enable it
* in VMCB and clear intercepts to avoid #VMEXIT.
*/
- if (vls) {
+ if (guest_cpuid_is_intel_compatible(vcpu)) {
+ svm_set_intercept(svm, INTERCEPT_VMLOAD);
+ svm_set_intercept(svm, INTERCEPT_VMSAVE);
+ } else if (vls) {
svm_clr_intercept(svm, INTERCEPT_VMLOAD);
svm_clr_intercept(svm, INTERCEPT_VMSAVE);
}
+
+ /*
+ * Process pending events when clearing STGI/CLGI intercepts if
+ * there's at least one pending event that is masked by GIF, so
+ * that KVM re-evaluates if the intercept needs to be set again
+ * to track when GIF is re-enabled (e.g. for NMI injection).
+ */
+ if (vgif) {
+ svm_clr_intercept(svm, INTERCEPT_CLGI);
+ svm_clr_intercept(svm, INTERCEPT_STGI);
+
+ if (svm_has_pending_gif_event(svm))
+ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+ }
}
if (kvm_need_rdpmc_intercept(vcpu))
@@ -1077,8 +1116,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu, bool init_event)
svm_set_intercept(svm, INTERCEPT_CR0_WRITE);
svm_set_intercept(svm, INTERCEPT_CR3_WRITE);
svm_set_intercept(svm, INTERCEPT_CR4_WRITE);
- if (!kvm_vcpu_apicv_active(vcpu))
- svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
+ svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
set_dr_intercepts(svm);
@@ -1163,7 +1201,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu, bool init_event)
if (npt_enabled) {
/* Setup VMCB for Nested Paging */
- control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
+ control->misc_ctl |= SVM_MISC_ENABLE_NP;
svm_clr_intercept(svm, INTERCEPT_INVLPG);
clr_exception_intercept(svm, PF_VECTOR);
svm_clr_intercept(svm, INTERCEPT_CR3_READ);
@@ -1189,25 +1227,22 @@ static void init_vmcb(struct kvm_vcpu *vcpu, bool init_event)
if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS))
svm->vmcb->control.erap_ctl |= ERAP_CONTROL_ALLOW_LARGER_RAP;
- if (kvm_vcpu_apicv_active(vcpu))
+ if (enable_apicv && irqchip_in_kernel(vcpu->kvm))
avic_init_vmcb(svm, vmcb);
if (vnmi)
svm->vmcb->control.int_ctl |= V_NMI_ENABLE_MASK;
- if (vgif) {
- svm_clr_intercept(svm, INTERCEPT_STGI);
- svm_clr_intercept(svm, INTERCEPT_CLGI);
+ if (vgif)
svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
- }
if (vls)
- svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
+ svm->vmcb->control.misc_ctl2 |= SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE;
if (vcpu->kvm->arch.bus_lock_detection_enabled)
svm_set_intercept(svm, INTERCEPT_BUSLOCK);
- if (sev_guest(vcpu->kvm))
+ if (is_sev_guest(vcpu))
sev_init_vmcb(svm, init_event);
svm_hv_init_vmcb(vmcb);
@@ -1381,7 +1416,7 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
- if (sev_es_guest(vcpu->kvm))
+ if (is_sev_es_guest(vcpu))
sev_es_unmap_ghcb(svm);
if (svm->guest_state_loaded)
@@ -1392,7 +1427,7 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
* or subsequent vmload of host save area.
*/
vmsave(sd->save_area_pa);
- if (sev_es_guest(vcpu->kvm))
+ if (is_sev_es_guest(vcpu))
sev_es_prepare_switch_to_guest(svm, sev_es_host_save_area(sd));
if (tsc_scaling)
@@ -1405,7 +1440,7 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
* all CPUs support TSC_AUX virtualization).
*/
if (likely(tsc_aux_uret_slot >= 0) &&
- (!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !sev_es_guest(vcpu->kvm)))
+ (!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !is_sev_es_guest(vcpu)))
kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);
if (cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE) &&
@@ -1472,7 +1507,7 @@ static bool svm_get_if_flag(struct kvm_vcpu *vcpu)
{
struct vmcb *vmcb = to_svm(vcpu)->vmcb;
- return sev_es_guest(vcpu->kvm)
+ return is_sev_es_guest(vcpu)
? vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK
: kvm_get_rflags(vcpu) & X86_EFLAGS_IF;
}
@@ -1706,7 +1741,7 @@ static void sev_post_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
* contents of the VMSA, and future VMCB save area updates won't be
* seen.
*/
- if (sev_es_guest(vcpu->kvm)) {
+ if (is_sev_es_guest(vcpu)) {
svm->vmcb->save.cr3 = cr3;
vmcb_mark_dirty(svm->vmcb, VMCB_CR);
}
@@ -1761,7 +1796,7 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
* SEV-ES guests must always keep the CR intercepts cleared. CR
* tracking is done using the CR write traps.
*/
- if (sev_es_guest(vcpu->kvm))
+ if (is_sev_es_guest(vcpu))
return;
if (hcr0 == cr0) {
@@ -1872,7 +1907,7 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (WARN_ON_ONCE(sev_es_guest(vcpu->kvm)))
+ if (WARN_ON_ONCE(is_sev_es_guest(vcpu)))
return;
get_debugreg(vcpu->arch.db[0], 0);
@@ -1951,7 +1986,7 @@ static int npf_interception(struct kvm_vcpu *vcpu)
}
}
- if (sev_snp_guest(vcpu->kvm) && (error_code & PFERR_GUEST_ENC_MASK))
+ if (is_sev_snp_guest(vcpu) && (error_code & PFERR_GUEST_ENC_MASK))
error_code |= PFERR_PRIVATE_ACCESS;
trace_kvm_page_fault(vcpu, gpa, error_code);
@@ -2096,7 +2131,7 @@ static int shutdown_interception(struct kvm_vcpu *vcpu)
* The VM save area for SEV-ES guests has already been encrypted so it
* cannot be reinitialized, i.e. synthesizing INIT is futile.
*/
- if (!sev_es_guest(vcpu->kvm)) {
+ if (!is_sev_es_guest(vcpu)) {
clear_page(svm->vmcb);
#ifdef CONFIG_KVM_SMM
if (is_smm(vcpu))
@@ -2123,7 +2158,7 @@ static int io_interception(struct kvm_vcpu *vcpu)
size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
if (string) {
- if (sev_es_guest(vcpu->kvm))
+ if (is_sev_es_guest(vcpu))
return sev_es_string_io(svm, size, port, in);
else
return kvm_emulate_instruction(vcpu, 0);
@@ -2152,6 +2187,7 @@ static int intr_interception(struct kvm_vcpu *vcpu)
static int vmload_vmsave_interception(struct kvm_vcpu *vcpu, bool vmload)
{
+ u64 vmcb12_gpa = kvm_register_read(vcpu, VCPU_REGS_RAX);
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb12;
struct kvm_host_map map;
@@ -2160,13 +2196,14 @@ static int vmload_vmsave_interception(struct kvm_vcpu *vcpu, bool vmload)
if (nested_svm_check_permissions(vcpu))
return 1;
- ret = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->vmcb->save.rax), &map);
- if (ret) {
- if (ret == -EINVAL)
- kvm_inject_gp(vcpu, 0);
+ if (!page_address_valid(vcpu, vmcb12_gpa)) {
+ kvm_inject_gp(vcpu, 0);
return 1;
}
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map))
+ return kvm_handle_memory_failure(vcpu, X86EMUL_IO_NEEDED, NULL);
+
vmcb12 = map.hva;
ret = kvm_skip_emulated_instruction(vcpu);
@@ -2203,58 +2240,28 @@ static int vmrun_interception(struct kvm_vcpu *vcpu)
return nested_svm_vmrun(vcpu);
}
-enum {
- NONE_SVM_INSTR,
- SVM_INSTR_VMRUN,
- SVM_INSTR_VMLOAD,
- SVM_INSTR_VMSAVE,
-};
-
-/* Return NONE_SVM_INSTR if not SVM instrs, otherwise return decode result */
-static int svm_instr_opcode(struct kvm_vcpu *vcpu)
+/* Return 0 if not SVM instr, otherwise return associated exit_code */
+static u64 svm_get_decoded_instr_exit_code(struct kvm_vcpu *vcpu)
{
struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
if (ctxt->b != 0x1 || ctxt->opcode_len != 2)
- return NONE_SVM_INSTR;
+ return 0;
+
+ BUILD_BUG_ON(!SVM_EXIT_VMRUN || !SVM_EXIT_VMLOAD || !SVM_EXIT_VMSAVE);
switch (ctxt->modrm) {
case 0xd8: /* VMRUN */
- return SVM_INSTR_VMRUN;
+ return SVM_EXIT_VMRUN;
case 0xda: /* VMLOAD */
- return SVM_INSTR_VMLOAD;
+ return SVM_EXIT_VMLOAD;
case 0xdb: /* VMSAVE */
- return SVM_INSTR_VMSAVE;
+ return SVM_EXIT_VMSAVE;
default:
break;
}
- return NONE_SVM_INSTR;
-}
-
-static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
-{
- const int guest_mode_exit_codes[] = {
- [SVM_INSTR_VMRUN] = SVM_EXIT_VMRUN,
- [SVM_INSTR_VMLOAD] = SVM_EXIT_VMLOAD,
- [SVM_INSTR_VMSAVE] = SVM_EXIT_VMSAVE,
- };
- int (*const svm_instr_handlers[])(struct kvm_vcpu *vcpu) = {
- [SVM_INSTR_VMRUN] = vmrun_interception,
- [SVM_INSTR_VMLOAD] = vmload_interception,
- [SVM_INSTR_VMSAVE] = vmsave_interception,
- };
- struct vcpu_svm *svm = to_svm(vcpu);
- int ret;
-
- if (is_guest_mode(vcpu)) {
- /* Returns '1' or -errno on failure, '0' on success. */
- ret = nested_svm_simple_vmexit(svm, guest_mode_exit_codes[opcode]);
- if (ret)
- return ret;
- return 1;
- }
- return svm_instr_handlers[opcode](vcpu);
+ return 0;
}
/*
@@ -2269,7 +2276,7 @@ static int gp_interception(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
u32 error_code = svm->vmcb->control.exit_info_1;
- int opcode;
+ u64 svm_exit_code;
/* Both #GP cases have zero error_code */
if (error_code)
@@ -2279,27 +2286,37 @@ static int gp_interception(struct kvm_vcpu *vcpu)
if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK)
goto reinject;
- opcode = svm_instr_opcode(vcpu);
+ /* FIXME: Handle SVM instructions through the emulator */
+ svm_exit_code = svm_get_decoded_instr_exit_code(vcpu);
+ if (svm_exit_code) {
+ if (!is_guest_mode(vcpu))
+ return svm_invoke_exit_handler(vcpu, svm_exit_code);
+
+ if (nested_svm_check_permissions(vcpu))
+ return 1;
- if (opcode == NONE_SVM_INSTR) {
- if (!enable_vmware_backdoor)
+ if (!page_address_valid(vcpu, kvm_register_read(vcpu, VCPU_REGS_RAX)))
goto reinject;
/*
- * VMware backdoor emulation on #GP interception only handles
- * IN{S}, OUT{S}, and RDPMC.
+ * FIXME: Only synthesize a #VMEXIT if L1 sets the intercept,
+ * but only after the VMLOAD/VMSAVE exit handlers can properly
+ * handle VMLOAD/VMSAVE from L2 with VLS enabled in L1 (i.e.
+ * RAX is an L2 GPA that needs translation through L1's NPT).
*/
- if (!is_guest_mode(vcpu))
- return kvm_emulate_instruction(vcpu,
- EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
- } else {
- /* All SVM instructions expect page aligned RAX */
- if (svm->vmcb->save.rax & ~PAGE_MASK)
- goto reinject;
-
- return emulate_svm_instr(vcpu, opcode);
+ nested_svm_simple_vmexit(svm, svm_exit_code);
+ return 1;
}
+ /*
+ * VMware backdoor emulation on #GP interception only handles
+ * IN{S}, OUT{S}, and RDPMC, and only for L1.
+ */
+ if (!enable_vmware_backdoor || is_guest_mode(vcpu))
+ goto reinject;
+
+ return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
+
reinject:
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
return 1;
@@ -2320,10 +2337,7 @@ void svm_set_gif(struct vcpu_svm *svm, bool value)
svm_clear_vintr(svm);
enable_gif(svm);
- if (svm->vcpu.arch.smi_pending ||
- svm->vcpu.arch.nmi_pending ||
- kvm_cpu_has_injectable_intr(&svm->vcpu) ||
- kvm_apic_has_pending_init_or_sipi(&svm->vcpu))
+ if (svm_has_pending_gif_event(svm))
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
} else {
disable_gif(svm);
@@ -2367,6 +2381,9 @@ static int invlpga_interception(struct kvm_vcpu *vcpu)
gva_t gva = kvm_rax_read(vcpu);
u32 asid = kvm_rcx_read(vcpu);
+ if (nested_svm_check_permissions(vcpu))
+ return 1;
+
/* FIXME: Handle an address size prefix. */
if (!is_long_mode(vcpu))
gva = (u32)gva;
@@ -2455,13 +2472,13 @@ static int task_switch_interception(struct kvm_vcpu *vcpu)
static void svm_clr_iret_intercept(struct vcpu_svm *svm)
{
- if (!sev_es_guest(svm->vcpu.kvm))
+ if (!is_sev_es_guest(&svm->vcpu))
svm_clr_intercept(svm, INTERCEPT_IRET);
}
static void svm_set_iret_intercept(struct vcpu_svm *svm)
{
- if (!sev_es_guest(svm->vcpu.kvm))
+ if (!is_sev_es_guest(&svm->vcpu))
svm_set_intercept(svm, INTERCEPT_IRET);
}
@@ -2469,7 +2486,7 @@ static int iret_interception(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- WARN_ON_ONCE(sev_es_guest(vcpu->kvm));
+ WARN_ON_ONCE(is_sev_es_guest(vcpu));
++vcpu->stat.nmi_window_exits;
svm->awaiting_iret_completion = true;
@@ -2643,7 +2660,7 @@ static int dr_interception(struct kvm_vcpu *vcpu)
* SEV-ES intercepts DR7 only to disable guest debugging and the guest issues a VMGEXIT
* for DR7 write only. KVM cannot change DR7 (always swapped as type 'A') so return early.
*/
- if (sev_es_guest(vcpu->kvm))
+ if (is_sev_es_guest(vcpu))
return 1;
if (vcpu->guest_debug == 0) {
@@ -2674,9 +2691,11 @@ static int dr_interception(struct kvm_vcpu *vcpu)
static int cr8_write_interception(struct kvm_vcpu *vcpu)
{
+ u8 cr8_prev = kvm_get_cr8(vcpu);
int r;
- u8 cr8_prev = kvm_get_cr8(vcpu);
+ WARN_ON_ONCE(kvm_vcpu_apicv_active(vcpu));
+
/* instruction emulation calls kvm_set_cr8() */
r = cr_interception(vcpu);
if (lapic_in_kernel(vcpu))
@@ -2722,10 +2741,28 @@ static int svm_get_feature_msr(u32 msr, u64 *data)
return 0;
}
+static u64 *svm_vmcb_lbr(struct vcpu_svm *svm, u32 msr)
+{
+ switch (msr) {
+ case MSR_IA32_LASTBRANCHFROMIP:
+ return &svm->vmcb->save.br_from;
+ case MSR_IA32_LASTBRANCHTOIP:
+ return &svm->vmcb->save.br_to;
+ case MSR_IA32_LASTINTFROMIP:
+ return &svm->vmcb->save.last_excp_from;
+ case MSR_IA32_LASTINTTOIP:
+ return &svm->vmcb->save.last_excp_to;
+ default:
+ break;
+ }
+ KVM_BUG_ON(1, svm->vcpu.kvm);
+ return &svm->vmcb->save.br_from;
+}
+
static bool sev_es_prevent_msr_access(struct kvm_vcpu *vcpu,
struct msr_data *msr_info)
{
- return sev_es_guest(vcpu->kvm) && vcpu->arch.guest_state_protected &&
+ return is_sev_es_guest(vcpu) && vcpu->arch.guest_state_protected &&
msr_info->index != MSR_IA32_XSS &&
!msr_write_intercepted(vcpu, msr_info->index);
}
@@ -2795,19 +2832,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = svm->tsc_aux;
break;
case MSR_IA32_DEBUGCTLMSR:
- msr_info->data = svm->vmcb->save.dbgctl;
+ msr_info->data = lbrv ? svm->vmcb->save.dbgctl : 0;
break;
case MSR_IA32_LASTBRANCHFROMIP:
- msr_info->data = svm->vmcb->save.br_from;
- break;
case MSR_IA32_LASTBRANCHTOIP:
- msr_info->data = svm->vmcb->save.br_to;
- break;
case MSR_IA32_LASTINTFROMIP:
- msr_info->data = svm->vmcb->save.last_excp_from;
- break;
case MSR_IA32_LASTINTTOIP:
- msr_info->data = svm->vmcb->save.last_excp_to;
+ msr_info->data = lbrv ? *svm_vmcb_lbr(svm, msr_info->index) : 0;
break;
case MSR_VM_HSAVE_PA:
msr_info->data = svm->nested.hsave_msr;
@@ -2861,7 +2892,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->sev_es.ghcb))
+ if (!err || !is_sev_es_guest(vcpu) || WARN_ON_ONCE(!svm->sev_es.ghcb))
return kvm_complete_insn_gp(vcpu, err);
svm_vmgexit_inject_exception(svm, X86_TRAP_GP);
@@ -3042,7 +3073,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
* required in this case because TSC_AUX is restored on #VMEXIT
* from the host save area.
*/
- if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) && sev_es_guest(vcpu->kvm))
+ if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) && is_sev_es_guest(vcpu))
break;
/*
@@ -3082,6 +3113,17 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
svm_update_lbrv(vcpu);
break;
+ case MSR_IA32_LASTBRANCHFROMIP:
+ case MSR_IA32_LASTBRANCHTOIP:
+ case MSR_IA32_LASTINTFROMIP:
+ case MSR_IA32_LASTINTTOIP:
+ if (!lbrv)
+ return KVM_MSR_RET_UNSUPPORTED;
+ if (!msr->host_initiated)
+ return 1;
+ *svm_vmcb_lbr(svm, ecx) = data;
+ vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
+ break;
case MSR_VM_HSAVE_PA:
/*
* Old kernels did not validate the value written to
@@ -3130,20 +3172,6 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu)
kvm_make_request(KVM_REQ_EVENT, vcpu);
svm_clear_vintr(to_svm(vcpu));
- /*
- * If not running nested, for AVIC, the only reason to end up here is ExtINTs.
- * In this case AVIC was temporarily disabled for
- * requesting the IRQ window and we have to re-enable it.
- *
- * If running nested, still remove the VM wide AVIC inhibit to
- * support case in which the interrupt window was requested when the
- * vCPU was not running nested.
-
- * All vCPUs which run still run nested, will remain to have their
- * AVIC still inhibited due to per-cpu AVIC inhibition.
- */
- kvm_clear_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_IRQWIN);
-
++vcpu->stat.irq_window_exits;
return 1;
}
@@ -3156,7 +3184,7 @@ static int pause_interception(struct kvm_vcpu *vcpu)
* vcpu->arch.preempted_in_kernel can never be true. Just
* set in_kernel to false as well.
*/
- in_kernel = !sev_es_guest(vcpu->kvm) && svm_get_cpl(vcpu) == 0;
+ in_kernel = !is_sev_es_guest(vcpu) && svm_get_cpl(vcpu) == 0;
grow_ple_window(vcpu);
@@ -3223,11 +3251,27 @@ static int bus_lock_exit(struct kvm_vcpu *vcpu)
vcpu->arch.complete_userspace_io = complete_userspace_buslock;
if (is_guest_mode(vcpu))
- svm->nested.ctl.bus_lock_rip = vcpu->arch.cui_linear_rip;
+ svm->nested.last_bus_lock_rip = vcpu->arch.cui_linear_rip;
return 0;
}
+static int vmmcall_interception(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Inject a #UD if L2 is active and the VMMCALL isn't a Hyper-V TLB
+ * hypercall, as VMMCALL #UDs if it's not intercepted, and this path is
+ * reachable if and only if L1 doesn't want to intercept VMMCALL or has
+ * enabled L0 (KVM) handling of Hyper-V L2 TLB flush hypercalls.
+ */
+ if (is_guest_mode(vcpu) && !nested_svm_is_l2_tlb_flush_hcall(vcpu)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+ }
+
+ return kvm_emulate_hypercall(vcpu);
+}
+
static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[SVM_EXIT_READ_CR0] = cr_interception,
[SVM_EXIT_READ_CR3] = cr_interception,
@@ -3278,7 +3322,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[SVM_EXIT_TASK_SWITCH] = task_switch_interception,
[SVM_EXIT_SHUTDOWN] = shutdown_interception,
[SVM_EXIT_VMRUN] = vmrun_interception,
- [SVM_EXIT_VMMCALL] = kvm_emulate_hypercall,
+ [SVM_EXIT_VMMCALL] = vmmcall_interception,
[SVM_EXIT_VMLOAD] = vmload_interception,
[SVM_EXIT_VMSAVE] = vmsave_interception,
[SVM_EXIT_STGI] = stgi_interception,
@@ -3321,9 +3365,9 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
guard(mutex)(&vmcb_dump_mutex);
- vm_type = sev_snp_guest(vcpu->kvm) ? "SEV-SNP" :
- sev_es_guest(vcpu->kvm) ? "SEV-ES" :
- sev_guest(vcpu->kvm) ? "SEV" : "SVM";
+ vm_type = is_sev_snp_guest(vcpu) ? "SEV-SNP" :
+ is_sev_es_guest(vcpu) ? "SEV-ES" :
+ is_sev_guest(vcpu) ? "SEV" : "SVM";
pr_err("%s vCPU%u VMCB %p, last attempted VMRUN on CPU %d\n",
vm_type, vcpu->vcpu_id, svm->current_vmcb->ptr, vcpu->arch.last_vmentry_cpu);
@@ -3353,13 +3397,13 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
- pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
+ pr_err("%-20s%lld\n", "misc_ctl:", control->misc_ctl);
pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
pr_err("%-20s%016llx\n", "ghcb:", control->ghcb_gpa);
pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
- pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext);
+ pr_err("%-20s%lld\n", "misc_ctl2:", control->misc_ctl2);
pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
@@ -3368,7 +3412,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
pr_err("%-20s%016llx\n", "allowed_sev_features:", control->allowed_sev_features);
pr_err("%-20s%016llx\n", "guest_sev_features:", control->guest_sev_features);
- if (sev_es_guest(vcpu->kvm)) {
+ if (is_sev_es_guest(vcpu)) {
save = sev_decrypt_vmsa(vcpu);
if (!save)
goto no_vmsa;
@@ -3451,7 +3495,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
"excp_from:", save->last_excp_from,
"excp_to:", save->last_excp_to);
- if (sev_es_guest(vcpu->kvm)) {
+ if (is_sev_es_guest(vcpu)) {
struct sev_es_save_area *vmsa = (struct sev_es_save_area *)save;
pr_err("%-15s %016llx\n",
@@ -3512,7 +3556,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
}
no_vmsa:
- if (sev_es_guest(vcpu->kvm))
+ if (is_sev_es_guest(vcpu))
sev_free_decrypted_vmsa(vcpu, save);
}
@@ -3601,7 +3645,7 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
struct kvm_run *kvm_run = vcpu->run;
/* SEV-ES guests must use the CR write traps to track CR registers. */
- if (!sev_es_guest(vcpu->kvm)) {
+ if (!is_sev_es_guest(vcpu)) {
if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE))
vcpu->arch.cr0 = svm->vmcb->save.cr0;
if (npt_enabled)
@@ -3637,6 +3681,16 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
return svm_invoke_exit_handler(vcpu, svm->vmcb->control.exit_code);
}
+static void svm_set_nested_run_soft_int_state(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ svm->soft_int_csbase = svm->vmcb->save.cs.base;
+ svm->soft_int_old_rip = kvm_rip_read(vcpu);
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
+ svm->soft_int_next_rip = kvm_rip_read(vcpu);
+}
+
static int pre_svm_run(struct kvm_vcpu *vcpu)
{
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
@@ -3653,7 +3707,7 @@ static int pre_svm_run(struct kvm_vcpu *vcpu)
svm->current_vmcb->cpu = vcpu->cpu;
}
- if (sev_guest(vcpu->kvm))
+ if (is_sev_guest(vcpu))
return pre_sev_run(svm, vcpu->cpu);
/* FIXME: handle wraparound of asid_generation */
@@ -3732,12 +3786,59 @@ static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
type = SVM_EVTINJ_TYPE_INTR;
}
+ /*
+ * If AVIC was inhibited in order to detect an IRQ window, and there's
+ * no other injectable interrupts pending or L2 is active (see below),
+ * then drop the inhibit as the window has served its purpose.
+ *
+ * If L2 is active, this path is reachable if L1 is not intercepting
+ * IRQs, i.e. if KVM is injecting L1 IRQs into L2. AVIC is locally
+ * inhibited while L2 is active; drop the VM-wide inhibit to optimize
+ * the case in which the interrupt window was requested while L1 was
+ * active (the vCPU was not running nested).
+ */
+ if (svm->avic_irq_window &&
+ (!kvm_cpu_has_injectable_intr(vcpu) || is_guest_mode(vcpu))) {
+ svm->avic_irq_window = false;
+ kvm_dec_apicv_irq_window_req(svm->vcpu.kvm);
+ }
+
trace_kvm_inj_virq(intr->nr, intr->soft, reinjected);
++vcpu->stat.irq_injections;
svm->vmcb->control.event_inj = intr->nr | SVM_EVTINJ_VALID | type;
}
+static void svm_fixup_nested_rips(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (!is_guest_mode(vcpu) || !vcpu->arch.nested_run_pending)
+ return;
+
+ /*
+ * If nrips is supported in hardware but not exposed to L1, stuff the
+ * actual L2 RIP to emulate what a nrips=0 CPU would do (L1 is
+ * responsible for advancing RIP prior to injecting the event). Once L2
+ * runs after L1 executes VMRUN, NextRIP is updated by the CPU and/or
+ * KVM, and this is no longer needed.
+ *
+ * This is done here (as opposed to when preparing vmcb02) to use the
+ * most up-to-date value of RIP regardless of the order of restoring
+ * registers and nested state in the vCPU save+restore path.
+ */
+ if (boot_cpu_has(X86_FEATURE_NRIPS) &&
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
+ svm->vmcb->control.next_rip = kvm_rip_read(vcpu);
+
+ /*
+ * Simiarly, initialize the soft int metadata here to use the most
+ * up-to-date values of RIP and CS base, regardless of restore order.
+ */
+ if (svm->soft_int_injected)
+ svm_set_nested_run_soft_int_state(vcpu);
+}
+
void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
int trig_mode, int vector)
{
@@ -3796,7 +3897,7 @@ static void svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
* SEV-ES guests must always keep the CR intercepts cleared. CR
* tracking is done using the CR write traps.
*/
- if (sev_es_guest(vcpu->kvm))
+ if (is_sev_es_guest(vcpu))
return;
if (nested_svm_virtualize_tpr(vcpu))
@@ -3860,7 +3961,7 @@ bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (svm->nested.nested_run_pending)
+ if (vcpu->arch.nested_run_pending)
return -EBUSY;
if (svm_nmi_blocked(vcpu))
@@ -3902,7 +4003,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (svm->nested.nested_run_pending)
+ if (vcpu->arch.nested_run_pending)
return -EBUSY;
if (svm_interrupt_blocked(vcpu))
@@ -3932,17 +4033,28 @@ static void svm_enable_irq_window(struct kvm_vcpu *vcpu)
*/
if (vgif || gif_set(svm)) {
/*
- * IRQ window is not needed when AVIC is enabled,
- * unless we have pending ExtINT since it cannot be injected
- * via AVIC. In such case, KVM needs to temporarily disable AVIC,
- * and fallback to injecting IRQ via V_IRQ.
+ * KVM only enables IRQ windows when AVIC is enabled if there's
+ * pending ExtINT since it cannot be injected via AVIC (ExtINT
+ * bypasses the local APIC). V_IRQ is ignored by hardware when
+ * AVIC is enabled, and so KVM needs to temporarily disable
+ * AVIC in order to detect when it's ok to inject the ExtINT.
*
- * If running nested, AVIC is already locally inhibited
- * on this vCPU, therefore there is no need to request
- * the VM wide AVIC inhibition.
+ * If running nested, AVIC is already locally inhibited on this
+ * vCPU (L2 vCPUs use a different MMU that never maps the AVIC
+ * backing page), therefore there is no need to increment the
+ * VM-wide AVIC inhibit. KVM will re-evaluate events when the
+ * vCPU exits to L1 and enable an IRQ window if the ExtINT is
+ * still pending.
+ *
+ * Note, the IRQ window inhibit needs to be updated even if
+ * AVIC is inhibited for a different reason, as KVM needs to
+ * keep AVIC inhibited if the other reason is cleared and there
+ * is still an injectable interrupt pending.
*/
- if (!is_guest_mode(vcpu))
- kvm_set_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_IRQWIN);
+ if (enable_apicv && !svm->avic_irq_window && !is_guest_mode(vcpu)) {
+ svm->avic_irq_window = true;
+ kvm_inc_apicv_irq_window_req(vcpu->kvm);
+ }
svm_set_vintr(svm);
}
@@ -3985,7 +4097,7 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
* ignores SEV-ES guest writes to EFER.SVME *and* CLGI/STGI are not
* supported NAEs in the GHCB protocol.
*/
- if (sev_es_guest(vcpu->kvm))
+ if (is_sev_es_guest(vcpu))
return;
if (!gif_set(svm)) {
@@ -4106,6 +4218,18 @@ static void svm_complete_soft_interrupt(struct kvm_vcpu *vcpu, u8 vector,
struct vcpu_svm *svm = to_svm(vcpu);
/*
+ * Initialize the soft int fields *before* reading them below if KVM
+ * aborted entry to the guest with a nested VMRUN pending. To ensure
+ * KVM uses up-to-date values for RIP and CS base across save/restore,
+ * regardless of restore order, KVM waits to set the soft int fields
+ * until VMRUN is imminent. But when canceling injection, KVM requeues
+ * the soft int and will reinject it via the standard injection flow,
+ * and so KVM needs to grab the state from the pending nested VMRUN.
+ */
+ if (is_guest_mode(vcpu) && vcpu->arch.nested_run_pending)
+ svm_set_nested_run_soft_int_state(vcpu);
+
+ /*
* If NRIPS is enabled, KVM must snapshot the pre-VMRUN next_rip that's
* associated with the original soft exception/interrupt. next_rip is
* cleared on all exits that can occur while vectoring an event, so KVM
@@ -4215,8 +4339,10 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu)
{
+#ifdef CONFIG_KVM_AMD_SEV
if (to_kvm_sev_info(vcpu->kvm)->need_init)
return -EINVAL;
+#endif
return 1;
}
@@ -4273,7 +4399,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
amd_clear_divider();
- if (sev_es_guest(vcpu->kvm))
+ if (is_sev_es_guest(vcpu))
__svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted,
sev_es_host_save_area(sd));
else
@@ -4334,6 +4460,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
kvm_register_is_dirty(vcpu, VCPU_EXREG_ERAPS))
svm->vmcb->control.erap_ctl |= ERAP_CONTROL_CLEAR_RAP;
+ svm_fixup_nested_rips(vcpu);
+
svm_hv_update_vp_id(svm->vmcb, vcpu);
/*
@@ -4354,7 +4482,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
* VM-Exit), as running with the host's DEBUGCTL can negatively affect
* guest state and can even be fatal, e.g. due to Bus Lock Detect.
*/
- if (!(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) &&
+ if (!(svm->vmcb->control.misc_ctl2 & SVM_MISC2_ENABLE_V_LBR) &&
vcpu->arch.host_debugctl != svm->vmcb->save.dbgctl)
update_debugctlmsr(svm->vmcb->save.dbgctl);
@@ -4374,7 +4502,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL))
x86_spec_ctrl_restore_host(svm->virt_spec_ctrl);
- if (!sev_es_guest(vcpu->kvm)) {
+ if (!is_sev_es_guest(vcpu)) {
vcpu->arch.cr2 = svm->vmcb->save.cr2;
vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
@@ -4385,7 +4513,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
- if (!(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) &&
+ if (!(svm->vmcb->control.misc_ctl2 & SVM_MISC2_ENABLE_V_LBR) &&
vcpu->arch.host_debugctl != svm->vmcb->save.dbgctl)
update_debugctlmsr(vcpu->arch.host_debugctl);
@@ -4403,11 +4531,11 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
nested_sync_control_from_vmcb02(svm);
/* Track VMRUNs that have made past consistency checking */
- if (svm->nested.nested_run_pending &&
+ if (vcpu->arch.nested_run_pending &&
!svm_is_vmrun_failure(svm->vmcb->control.exit_code))
++vcpu->stat.nested_run;
- svm->nested.nested_run_pending = 0;
+ vcpu->arch.nested_run_pending = 0;
}
svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
@@ -4435,6 +4563,16 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
svm_complete_interrupts(vcpu);
+ /*
+ * Update the cache after completing interrupts to get an accurate
+ * NextRIP, e.g. when re-injecting a soft interrupt.
+ *
+ * FIXME: Rework svm_get_nested_state() to not pull data from the
+ * cache (except for maybe int_ctl).
+ */
+ if (is_guest_mode(vcpu))
+ svm->nested.ctl.next_rip = svm->vmcb->control.next_rip;
+
return svm_exit_handlers_fastpath(vcpu);
}
@@ -4487,9 +4625,17 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
case MSR_IA32_SMBASE:
if (!IS_ENABLED(CONFIG_KVM_SMM))
return false;
- /* SEV-ES guests do not support SMM, so report false */
- if (kvm && sev_es_guest(kvm))
+
+#ifdef CONFIG_KVM_AMD_SEV
+ /*
+ * KVM can't access register state to emulate SMM for SEV-ES
+ * guests. Conusming stale data here is "fine", as KVM only
+ * checks for MSR_IA32_SMBASE support without a vCPU when
+ * userspace is querying KVM_CAP_X86_SMM.
+ */
+ if (kvm && ____sev_es_guest(kvm))
return false;
+#endif
break;
default:
break;
@@ -4524,7 +4670,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
if (guest_cpuid_is_intel_compatible(vcpu))
guest_cpu_cap_clear(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
- if (sev_guest(vcpu->kvm))
+ if (is_sev_guest(vcpu))
sev_vcpu_after_set_cpuid(svm);
}
@@ -4766,7 +4912,7 @@ bool svm_smi_blocked(struct kvm_vcpu *vcpu)
static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (svm->nested.nested_run_pending)
+ if (vcpu->arch.nested_run_pending)
return -EBUSY;
if (svm_smi_blocked(vcpu))
@@ -4783,7 +4929,6 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_host_map map_save;
- int ret;
if (!is_guest_mode(vcpu))
return 0;
@@ -4803,9 +4948,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
- ret = nested_svm_simple_vmexit(svm, SVM_EXIT_SW);
- if (ret)
- return ret;
+ nested_svm_simple_vmexit(svm, SVM_EXIT_SW);
/*
* KVM uses VMCB01 to store L1 host state while L2 runs but
@@ -4879,12 +5022,15 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
vmcb12 = map.hva;
nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
- ret = enter_svm_guest_mode(vcpu, smram64->svm_guest_vmcb_gpa, vmcb12, false);
- if (ret)
+ if (nested_svm_check_cached_vmcb12(vcpu) < 0)
+ goto unmap_save;
+
+ if (enter_svm_guest_mode(vcpu, smram64->svm_guest_vmcb_gpa, false) != 0)
goto unmap_save;
- svm->nested.nested_run_pending = 1;
+ ret = 0;
+ vcpu->arch.nested_run_pending = KVM_NESTED_RUN_PENDING;
unmap_save:
kvm_vcpu_unmap(vcpu, &map_save);
@@ -4920,7 +5066,7 @@ static int svm_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
return X86EMUL_UNHANDLEABLE_VECTORING;
/* Emulation is always possible when KVM has access to all guest state. */
- if (!sev_guest(vcpu->kvm))
+ if (!is_sev_guest(vcpu))
return X86EMUL_CONTINUE;
/* #UD and #GP should never be intercepted for SEV guests. */
@@ -4932,7 +5078,7 @@ static int svm_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
* Emulation is impossible for SEV-ES guests as KVM doesn't have access
* to guest register state.
*/
- if (sev_es_guest(vcpu->kvm))
+ if (is_sev_es_guest(vcpu))
return X86EMUL_RETRY_INSTR;
/*
@@ -5069,7 +5215,7 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
static void svm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
{
- if (!sev_es_guest(vcpu->kvm))
+ if (!is_sev_es_guest(vcpu))
return kvm_vcpu_deliver_sipi_vector(vcpu, vector);
sev_vcpu_deliver_sipi_vector(vcpu, vector);
@@ -5085,17 +5231,7 @@ static void svm_vm_destroy(struct kvm *kvm)
static int svm_vm_init(struct kvm *kvm)
{
- int type = kvm->arch.vm_type;
-
- if (type != KVM_X86_DEFAULT_VM &&
- type != KVM_X86_SW_PROTECTED_VM) {
- kvm->arch.has_protected_state =
- (type == KVM_X86_SEV_ES_VM || type == KVM_X86_SNP_VM);
- to_kvm_sev_info(kvm)->need_init = true;
-
- kvm->arch.has_private_mem = (type == KVM_X86_SNP_VM);
- kvm->arch.pre_fault_allowed = !kvm->arch.has_private_mem;
- }
+ sev_vm_init(kvm);
if (!pause_filter_count || !pause_filter_thresh)
kvm_disable_exits(kvm, KVM_X86_DISABLE_EXITS_PAUSE);
@@ -5405,14 +5541,10 @@ static __init int svm_hardware_setup(void)
pr_err_ratelimited("NX (Execute Disable) not supported\n");
return -EOPNOTSUPP;
}
- kvm_enable_efer_bits(EFER_NX);
kvm_caps.supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS |
XFEATURE_MASK_BNDCSR);
- if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
- kvm_enable_efer_bits(EFER_FFXSR);
-
if (tsc_scaling) {
if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
tsc_scaling = false;
@@ -5426,9 +5558,6 @@ static __init int svm_hardware_setup(void)
tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);
- if (boot_cpu_has(X86_FEATURE_AUTOIBRS))
- kvm_enable_efer_bits(EFER_AUTOIBRS);
-
/* Check for pause filtering support */
if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
pause_filter_count = 0;
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index ebd7b36b1ceb..a10668d17a16 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -92,6 +92,7 @@ enum {
/* TPR and CR2 are always written before VMRUN */
#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2))
+#ifdef CONFIG_KVM_AMD_SEV
struct kvm_sev_info {
bool active; /* SEV enabled guest */
bool es_active; /* SEV-ES enabled guest */
@@ -117,6 +118,7 @@ struct kvm_sev_info {
cpumask_var_t have_run_cpus; /* CPUs that have done VMRUN for this VM. */
bool snp_certs_enabled; /* SNP certificate-fetching support. */
};
+#endif
struct kvm_svm {
struct kvm kvm;
@@ -127,7 +129,9 @@ struct kvm_svm {
u64 *avic_physical_id_table;
struct hlist_node hnode;
+#ifdef CONFIG_KVM_AMD_SEV
struct kvm_sev_info sev_info;
+#endif
};
struct kvm_vcpu;
@@ -140,12 +144,32 @@ struct kvm_vmcb_info {
};
struct vmcb_save_area_cached {
+ struct vmcb_seg es;
+ struct vmcb_seg cs;
+ struct vmcb_seg ss;
+ struct vmcb_seg ds;
+ struct vmcb_seg gdtr;
+ struct vmcb_seg idtr;
+ u8 cpl;
u64 efer;
u64 cr4;
u64 cr3;
u64 cr0;
u64 dr7;
u64 dr6;
+ u64 rflags;
+ u64 rip;
+ u64 rsp;
+ u64 s_cet;
+ u64 ssp;
+ u64 isst_addr;
+ u64 rax;
+ u64 cr2;
+ u64 dbgctl;
+ u64 br_from;
+ u64 br_to;
+ u64 last_excp_from;
+ u64 last_excp_to;
};
struct vmcb_ctrl_area_cached {
@@ -166,14 +190,13 @@ struct vmcb_ctrl_area_cached {
u64 exit_info_2;
u32 exit_int_info;
u32 exit_int_info_err;
- u64 nested_ctl;
+ u64 misc_ctl;
u32 event_inj;
u32 event_inj_err;
u64 next_rip;
u64 nested_cr3;
- u64 virt_ext;
+ u64 misc_ctl2;
u32 clean;
- u64 bus_lock_rip;
union {
#if IS_ENABLED(CONFIG_HYPERV) || IS_ENABLED(CONFIG_KVM_HYPERV)
struct hv_vmcb_enlightenments hv_enlightenments;
@@ -188,6 +211,7 @@ struct svm_nested_state {
u64 vm_cr_msr;
u64 vmcb12_gpa;
u64 last_vmcb12_gpa;
+ u64 last_bus_lock_rip;
/*
* The MSR permissions map used for vmcb02, which is the merge result
@@ -195,10 +219,6 @@ struct svm_nested_state {
*/
void *msrpm;
- /* A VMRUN has started but has not yet been performed, so
- * we cannot inject a nested vmexit yet. */
- bool nested_run_pending;
-
/* cache for control fields of the guest */
struct vmcb_ctrl_area_cached ctl;
@@ -333,6 +353,7 @@ struct vcpu_svm {
bool guest_state_loaded;
+ bool avic_irq_window;
bool x2avic_msrs_intercepted;
bool lbr_msrs_intercepted;
@@ -357,41 +378,63 @@ struct svm_cpu_data {
DECLARE_PER_CPU(struct svm_cpu_data, svm_data);
-void recalc_intercepts(struct vcpu_svm *svm);
-
static __always_inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
{
return container_of(kvm, struct kvm_svm, kvm);
}
+#ifdef CONFIG_KVM_AMD_SEV
static __always_inline struct kvm_sev_info *to_kvm_sev_info(struct kvm *kvm)
{
return &to_kvm_svm(kvm)->sev_info;
}
-#ifdef CONFIG_KVM_AMD_SEV
-static __always_inline bool sev_guest(struct kvm *kvm)
+static __always_inline bool ____sev_guest(struct kvm *kvm)
{
return to_kvm_sev_info(kvm)->active;
}
-static __always_inline bool sev_es_guest(struct kvm *kvm)
+static __always_inline bool ____sev_es_guest(struct kvm *kvm)
{
struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
return sev->es_active && !WARN_ON_ONCE(!sev->active);
}
-static __always_inline bool sev_snp_guest(struct kvm *kvm)
+static __always_inline bool ____sev_snp_guest(struct kvm *kvm)
{
struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
return (sev->vmsa_features & SVM_SEV_FEAT_SNP_ACTIVE) &&
- !WARN_ON_ONCE(!sev_es_guest(kvm));
+ !WARN_ON_ONCE(!____sev_es_guest(kvm));
+}
+
+static __always_inline bool is_sev_guest(struct kvm_vcpu *vcpu)
+{
+ return ____sev_guest(vcpu->kvm);
+}
+static __always_inline bool is_sev_es_guest(struct kvm_vcpu *vcpu)
+{
+ return ____sev_es_guest(vcpu->kvm);
+}
+
+static __always_inline bool is_sev_snp_guest(struct kvm_vcpu *vcpu)
+{
+ return ____sev_snp_guest(vcpu->kvm);
}
#else
-#define sev_guest(kvm) false
-#define sev_es_guest(kvm) false
-#define sev_snp_guest(kvm) false
+static __always_inline bool is_sev_guest(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+static __always_inline bool is_sev_es_guest(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+
+static __always_inline bool is_sev_snp_guest(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
#endif
static inline bool ghcb_gpa_is_registered(struct vcpu_svm *svm, u64 val)
@@ -415,9 +458,9 @@ static inline void vmcb_mark_dirty(struct vmcb *vmcb, int bit)
vmcb->control.clean &= ~(1 << bit);
}
-static inline bool vmcb_is_dirty(struct vmcb *vmcb, int bit)
+static inline bool vmcb12_is_dirty(struct vmcb_ctrl_area_cached *control, int bit)
{
- return !test_bit(bit, (unsigned long *)&vmcb->control.clean);
+ return !test_bit(bit, (unsigned long *)&control->clean);
}
static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
@@ -486,6 +529,22 @@ static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u3
return __vmcb_is_intercept((unsigned long *)&control->intercepts, bit);
}
+void nested_vmcb02_recalc_intercepts(struct vcpu_svm *svm);
+
+static inline void svm_mark_intercepts_dirty(struct vcpu_svm *svm)
+{
+ vmcb_mark_dirty(svm->vmcb01.ptr, VMCB_INTERCEPTS);
+
+ /*
+ * If L2 is active, recalculate the intercepts for vmcb02 to account
+ * for the changes made to vmcb01. All intercept configuration is done
+ * for vmcb01 and then propagated to vmcb02 to combine KVM's intercepts
+ * with L1's intercepts (from the vmcb12 snapshot).
+ */
+ if (is_guest_mode(&svm->vcpu))
+ nested_vmcb02_recalc_intercepts(svm);
+}
+
static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
{
struct vmcb *vmcb = svm->vmcb01.ptr;
@@ -493,7 +552,7 @@ static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
WARN_ON_ONCE(bit >= 32);
vmcb_set_intercept(&vmcb->control, INTERCEPT_EXCEPTION_OFFSET + bit);
- recalc_intercepts(svm);
+ svm_mark_intercepts_dirty(svm);
}
static inline void clr_exception_intercept(struct vcpu_svm *svm, u32 bit)
@@ -503,7 +562,7 @@ static inline void clr_exception_intercept(struct vcpu_svm *svm, u32 bit)
WARN_ON_ONCE(bit >= 32);
vmcb_clr_intercept(&vmcb->control, INTERCEPT_EXCEPTION_OFFSET + bit);
- recalc_intercepts(svm);
+ svm_mark_intercepts_dirty(svm);
}
static inline void svm_set_intercept(struct vcpu_svm *svm, int bit)
@@ -512,7 +571,7 @@ static inline void svm_set_intercept(struct vcpu_svm *svm, int bit)
vmcb_set_intercept(&vmcb->control, bit);
- recalc_intercepts(svm);
+ svm_mark_intercepts_dirty(svm);
}
static inline void svm_clr_intercept(struct vcpu_svm *svm, int bit)
@@ -521,7 +580,7 @@ static inline void svm_clr_intercept(struct vcpu_svm *svm, int bit)
vmcb_clr_intercept(&vmcb->control, bit);
- recalc_intercepts(svm);
+ svm_mark_intercepts_dirty(svm);
}
static inline bool svm_is_intercept(struct vcpu_svm *svm, int bit)
@@ -578,7 +637,7 @@ static inline bool gif_set(struct vcpu_svm *svm)
static inline bool nested_npt_enabled(struct vcpu_svm *svm)
{
- return svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE;
+ return svm->nested.ctl.misc_ctl & SVM_MISC_ENABLE_NP;
}
static inline bool nested_vnmi_enabled(struct vcpu_svm *svm)
@@ -713,8 +772,16 @@ static inline void *svm_vcpu_alloc_msrpm(void)
return svm_alloc_permissions_map(MSRPM_SIZE, GFP_KERNEL_ACCOUNT);
}
+#define svm_copy_lbrs(to, from) \
+do { \
+ (to)->dbgctl = (from)->dbgctl; \
+ (to)->br_from = (from)->br_from; \
+ (to)->br_to = (from)->br_to; \
+ (to)->last_excp_from = (from)->last_excp_from; \
+ (to)->last_excp_to = (from)->last_excp_to; \
+} while (0)
+
void svm_vcpu_free_msrpm(void *msrpm);
-void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
void svm_enable_lbrv(struct kvm_vcpu *vcpu);
void svm_update_lbrv(struct kvm_vcpu *vcpu);
@@ -776,8 +843,7 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
int __init nested_svm_init_msrpm_merge_offsets(void);
-int enter_svm_guest_mode(struct kvm_vcpu *vcpu,
- u64 vmcb_gpa, struct vmcb *vmcb12, bool from_vmrun);
+int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb_gpa, bool from_vmrun);
void svm_leave_nested(struct kvm_vcpu *vcpu);
void svm_free_nested(struct vcpu_svm *svm);
int svm_allocate_nested(struct vcpu_svm *svm);
@@ -785,18 +851,19 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu);
void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
struct vmcb_save_area *from_save);
void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
-int nested_svm_vmexit(struct vcpu_svm *svm);
+void nested_svm_vmexit(struct vcpu_svm *svm);
-static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
+static inline void nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
{
svm->vmcb->control.exit_code = exit_code;
svm->vmcb->control.exit_info_1 = 0;
svm->vmcb->control.exit_info_2 = 0;
- return nested_svm_vmexit(svm);
+ nested_svm_vmexit(svm);
}
int nested_svm_exit_handled(struct vcpu_svm *svm);
int nested_svm_check_permissions(struct kvm_vcpu *vcpu);
+int nested_svm_check_cached_vmcb12(struct kvm_vcpu *vcpu);
int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
bool has_error_code, u32 error_code);
int nested_svm_exit_special(struct vcpu_svm *svm);
@@ -885,6 +952,7 @@ static inline struct page *snp_safe_alloc_page(void)
int sev_vcpu_create(struct kvm_vcpu *vcpu);
void sev_free_vcpu(struct kvm_vcpu *vcpu);
+void sev_vm_init(struct kvm *kvm);
void sev_vm_destroy(struct kvm *kvm);
void __init sev_set_cpu_caps(void);
void __init sev_hardware_setup(void);
@@ -911,6 +979,7 @@ static inline struct page *snp_safe_alloc_page(void)
static inline int sev_vcpu_create(struct kvm_vcpu *vcpu) { return 0; }
static inline void sev_free_vcpu(struct kvm_vcpu *vcpu) {}
+static inline void sev_vm_init(struct kvm *kvm) {}
static inline void sev_vm_destroy(struct kvm *kvm) {}
static inline void __init sev_set_cpu_caps(void) {}
static inline void __init sev_hardware_setup(void) {}
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index 3392bcadfb89..d47c5c93c991 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -298,16 +298,16 @@ SYM_FUNC_START(__svm_vcpu_run)
RESTORE_GUEST_SPEC_CTRL_BODY
RESTORE_HOST_SPEC_CTRL_BODY (%_ASM_SP)
-10: cmpb $0, _ASM_RIP(kvm_rebooting)
+10: cmpb $0, _ASM_RIP(virt_rebooting)
jne 2b
ud2
-30: cmpb $0, _ASM_RIP(kvm_rebooting)
+30: cmpb $0, _ASM_RIP(virt_rebooting)
jne 4b
ud2
-50: cmpb $0, _ASM_RIP(kvm_rebooting)
+50: cmpb $0, _ASM_RIP(virt_rebooting)
jne 6b
ud2
-70: cmpb $0, _ASM_RIP(kvm_rebooting)
+70: cmpb $0, _ASM_RIP(virt_rebooting)
jne 8b
ud2
@@ -394,7 +394,7 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
RESTORE_GUEST_SPEC_CTRL_BODY
RESTORE_HOST_SPEC_CTRL_BODY %sil
-3: cmpb $0, kvm_rebooting(%rip)
+3: cmpb $0, virt_rebooting(%rip)
jne 2b
ud2
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 4e371c93ae16..56cacc06225e 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -107,7 +107,7 @@ static inline bool cpu_has_load_perf_global_ctrl(void)
static inline bool cpu_has_load_cet_ctrl(void)
{
- return (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_CET_STATE);
+ return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_CET_STATE;
}
static inline bool cpu_has_save_perf_global_ctrl(void)
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index a46ccd670785..dbebddf648be 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -29,10 +29,15 @@ static __init int vt_hardware_setup(void)
if (ret)
return ret;
+ return enable_tdx ? tdx_hardware_setup() : 0;
+}
+
+static void vt_hardware_unsetup(void)
+{
if (enable_tdx)
- tdx_hardware_setup();
+ tdx_hardware_unsetup();
- return 0;
+ vmx_hardware_unsetup();
}
static int vt_vm_init(struct kvm *kvm)
@@ -869,7 +874,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.check_processor_compatibility = vmx_check_processor_compat,
- .hardware_unsetup = vmx_hardware_unsetup,
+ .hardware_unsetup = vt_op(hardware_unsetup),
.enable_virtualization_cpu = vmx_enable_virtualization_cpu,
.disable_virtualization_cpu = vt_op(disable_virtualization_cpu),
@@ -1029,7 +1034,6 @@ struct kvm_x86_init_ops vt_init_ops __initdata = {
static void __exit vt_exit(void)
{
kvm_exit();
- tdx_cleanup();
vmx_exit();
}
module_exit(vt_exit);
@@ -1043,11 +1047,6 @@ static int __init vt_init(void)
if (r)
return r;
- /* tdx_init() has been taken */
- r = tdx_bringup();
- if (r)
- goto err_tdx_bringup;
-
/*
* TDX and VMX have different vCPU structures. Calculate the
* maximum size/align so that kvm_init() can use the larger
@@ -1074,8 +1073,6 @@ static int __init vt_init(void)
return 0;
err_kvm_init:
- tdx_cleanup();
-err_tdx_bringup:
vmx_exit();
return r;
}
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 248635da6766..3fe88f29be7a 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2273,7 +2273,7 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu,
static u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
{
- if (vmx->nested.nested_run_pending &&
+ if (vmx->vcpu.arch.nested_run_pending &&
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
return vmcs12->guest_ia32_efer;
else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
@@ -2513,7 +2513,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
/*
* Interrupt/Exception Fields
*/
- if (vmx->nested.nested_run_pending) {
+ if (vmx->vcpu.arch.nested_run_pending) {
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
vmcs12->vm_entry_intr_info_field);
vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
@@ -2621,7 +2621,7 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
}
- if (kvm_mpx_supported() && vmx->nested.nested_run_pending &&
+ if (kvm_mpx_supported() && vmx->vcpu.arch.nested_run_pending &&
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
}
@@ -2718,7 +2718,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
!(evmcs->hv_clean_fields & HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
}
- if (vmx->nested.nested_run_pending &&
+ if (vcpu->arch.nested_run_pending &&
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
vmx_guest_debugctl_write(vcpu, vmcs12->guest_ia32_debugctl &
@@ -2728,13 +2728,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmx_guest_debugctl_write(vcpu, vmx->nested.pre_vmenter_debugctl);
}
- if (!vmx->nested.nested_run_pending ||
+ if (!vcpu->arch.nested_run_pending ||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE))
vmcs_write_cet_state(vcpu, vmx->nested.pre_vmenter_s_cet,
vmx->nested.pre_vmenter_ssp,
vmx->nested.pre_vmenter_ssp_tbl);
- if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending ||
+ if (kvm_mpx_supported() && (!vcpu->arch.nested_run_pending ||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
vmcs_write64(GUEST_BNDCFGS, vmx->nested.pre_vmenter_bndcfgs);
vmx_set_rflags(vcpu, vmcs12->guest_rflags);
@@ -2747,7 +2747,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
- if (vmx->nested.nested_run_pending &&
+ if (vcpu->arch.nested_run_pending &&
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
vcpu->arch.pat = vmcs12->guest_ia32_pat;
@@ -3300,10 +3300,24 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
if (CC(vmcs12->guest_cr4 & X86_CR4_CET && !(vmcs12->guest_cr0 & X86_CR0_WP)))
return -EINVAL;
- if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) &&
- (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) ||
- CC(!vmx_is_valid_debugctl(vcpu, vmcs12->guest_ia32_debugctl, false))))
- return -EINVAL;
+ if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
+ u64 debugctl = vmcs12->guest_ia32_debugctl;
+
+ /*
+ * FREEZE_IN_SMM is not virtualized, but allow L1 to set it in
+ * vmcs12's DEBUGCTL under a quirk for backwards compatibility.
+ * Note that the quirk only relaxes the consistency check. The
+ * vmcc02 bit is still under the control of the host. In
+ * particular, if a host administrator decides to clear the bit,
+ * then L1 has no say in the matter.
+ */
+ if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_VMCS12_ALLOW_FREEZE_IN_SMM))
+ debugctl &= ~DEBUGCTLMSR_FREEZE_IN_SMM;
+
+ if (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) ||
+ CC(!vmx_is_valid_debugctl(vcpu, debugctl, false)))
+ return -EINVAL;
+ }
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) &&
CC(!kvm_pat_valid(vmcs12->guest_ia32_pat)))
@@ -3335,7 +3349,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
* to bit 8 (LME) if bit 31 in the CR0 field (corresponding to
* CR0.PG) is 1.
*/
- if (to_vmx(vcpu)->nested.nested_run_pending &&
+ if (vcpu->arch.nested_run_pending &&
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) ||
CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) ||
@@ -3613,15 +3627,15 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
kvm_service_local_tlb_flush_requests(vcpu);
- if (!vmx->nested.nested_run_pending ||
+ if (!vcpu->arch.nested_run_pending ||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
vmx->nested.pre_vmenter_debugctl = vmx_guest_debugctl_read();
if (kvm_mpx_supported() &&
- (!vmx->nested.nested_run_pending ||
+ (!vcpu->arch.nested_run_pending ||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
vmx->nested.pre_vmenter_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
- if (!vmx->nested.nested_run_pending ||
+ if (!vcpu->arch.nested_run_pending ||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE))
vmcs_read_cet_state(vcpu, &vmx->nested.pre_vmenter_s_cet,
&vmx->nested.pre_vmenter_ssp,
@@ -3830,7 +3844,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
* We're finally done with prerequisite checking, and can start with
* the nested entry.
*/
- vmx->nested.nested_run_pending = 1;
+ vcpu->arch.nested_run_pending = KVM_NESTED_RUN_PENDING;
vmx->nested.has_preemption_timer_deadline = false;
status = nested_vmx_enter_non_root_mode(vcpu, true);
if (unlikely(status != NVMX_VMENTRY_SUCCESS))
@@ -3862,12 +3876,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
!nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING) &&
!(nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING) &&
(vmcs12->guest_rflags & X86_EFLAGS_IF))) {
- vmx->nested.nested_run_pending = 0;
+ vcpu->arch.nested_run_pending = 0;
return kvm_emulate_halt_noskip(vcpu);
}
break;
case GUEST_ACTIVITY_WAIT_SIPI:
- vmx->nested.nested_run_pending = 0;
+ vcpu->arch.nested_run_pending = 0;
kvm_set_mp_state(vcpu, KVM_MP_STATE_INIT_RECEIVED);
break;
default:
@@ -3877,7 +3891,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
return 1;
vmentry_failed:
- vmx->nested.nested_run_pending = 0;
+ vcpu->arch.nested_run_pending = 0;
if (status == NVMX_VMENTRY_KVM_INTERNAL_ERROR)
return 0;
if (status == NVMX_VMENTRY_VMEXIT)
@@ -4274,7 +4288,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
* previously injected event, the pending exception occurred while said
* event was being delivered and thus needs to be handled.
*/
- bool block_nested_exceptions = vmx->nested.nested_run_pending;
+ bool block_nested_exceptions = vcpu->arch.nested_run_pending;
/*
* Events that don't require injection, i.e. that are virtualized by
* hardware, aren't blocked by a pending VM-Enter as KVM doesn't need
@@ -4643,7 +4657,7 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
if (nested_cpu_has_preemption_timer(vmcs12) &&
vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER &&
- !vmx->nested.nested_run_pending)
+ !vcpu->arch.nested_run_pending)
vmcs12->vmx_preemption_timer_value =
vmx_get_preemption_timer_value(vcpu);
@@ -5042,7 +5056,7 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
vmx->nested.mtf_pending = false;
/* trying to cancel vmlaunch/vmresume is a bug */
- WARN_ON_ONCE(vmx->nested.nested_run_pending);
+ kvm_warn_on_nested_run_pending(vcpu);
#ifdef CONFIG_KVM_HYPERV
if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
@@ -6665,7 +6679,7 @@ bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu)
unsigned long exit_qual;
u32 exit_intr_info;
- WARN_ON_ONCE(vmx->nested.nested_run_pending);
+ kvm_warn_on_nested_run_pending(vcpu);
/*
* Late nested VM-Fail shares the same flow as nested VM-Exit since KVM
@@ -6761,7 +6775,7 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
if (is_guest_mode(vcpu)) {
kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
- if (vmx->nested.nested_run_pending)
+ if (vcpu->arch.nested_run_pending)
kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
if (vmx->nested.mtf_pending)
@@ -6836,19 +6850,40 @@ out:
void vmx_leave_nested(struct kvm_vcpu *vcpu)
{
if (is_guest_mode(vcpu)) {
- to_vmx(vcpu)->nested.nested_run_pending = 0;
+ vcpu->arch.nested_run_pending = 0;
nested_vmx_vmexit(vcpu, -1, 0, 0);
}
free_nested(vcpu);
}
+int nested_vmx_check_restored_vmcs12(struct kvm_vcpu *vcpu)
+{
+ enum vm_entry_failure_code ignored;
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+ if (nested_cpu_has_shadow_vmcs(vmcs12) &&
+ vmcs12->vmcs_link_pointer != INVALID_GPA) {
+ struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
+
+ if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION ||
+ !shadow_vmcs12->hdr.shadow_vmcs)
+ return -EINVAL;
+ }
+
+ if (nested_vmx_check_controls(vcpu, vmcs12) ||
+ nested_vmx_check_host_state(vcpu, vmcs12) ||
+ nested_vmx_check_guest_state(vcpu, vmcs12, &ignored))
+ return -EINVAL;
+
+ return 0;
+}
+
static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
struct kvm_nested_state __user *user_kvm_nested_state,
struct kvm_nested_state *kvm_state)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12;
- enum vm_entry_failure_code ignored;
struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
&user_kvm_nested_state->data.vmx[0];
int ret;
@@ -6973,31 +7008,28 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
return 0;
- vmx->nested.nested_run_pending =
- !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
+ if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING)
+ vcpu->arch.nested_run_pending = KVM_NESTED_RUN_PENDING_UNTRUSTED;
+ else
+ vcpu->arch.nested_run_pending = 0;
vmx->nested.mtf_pending =
!!(kvm_state->flags & KVM_STATE_NESTED_MTF_PENDING);
- ret = -EINVAL;
if (nested_cpu_has_shadow_vmcs(vmcs12) &&
vmcs12->vmcs_link_pointer != INVALID_GPA) {
struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
+ ret = -EINVAL;
if (kvm_state->size <
sizeof(*kvm_state) +
sizeof(user_vmx_nested_state->vmcs12) + sizeof(*shadow_vmcs12))
goto error_guest_mode;
+ ret = -EFAULT;
if (copy_from_user(shadow_vmcs12,
user_vmx_nested_state->shadow_vmcs12,
- sizeof(*shadow_vmcs12))) {
- ret = -EFAULT;
- goto error_guest_mode;
- }
-
- if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION ||
- !shadow_vmcs12->hdr.shadow_vmcs)
+ sizeof(*shadow_vmcs12)))
goto error_guest_mode;
}
@@ -7008,9 +7040,8 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
kvm_state->hdr.vmx.preemption_timer_deadline;
}
- if (nested_vmx_check_controls(vcpu, vmcs12) ||
- nested_vmx_check_host_state(vcpu, vmcs12) ||
- nested_vmx_check_guest_state(vcpu, vmcs12, &ignored))
+ ret = nested_vmx_check_restored_vmcs12(vcpu);
+ if (ret < 0)
goto error_guest_mode;
vmx->nested.dirty_vmcs12 = true;
@@ -7025,7 +7056,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
return 0;
error_guest_mode:
- vmx->nested.nested_run_pending = 0;
+ vcpu->arch.nested_run_pending = 0;
return ret;
}
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index b844c5d59025..213a448104af 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -22,6 +22,7 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps);
void nested_vmx_hardware_unsetup(void);
__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
void nested_vmx_set_vmcs_shadowing_bitmap(void);
+int nested_vmx_check_restored_vmcs12(struct kvm_vcpu *vcpu);
void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu);
enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
bool from_vmentry);
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index c5065f84b78b..04ce321ebdf3 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -6,6 +6,7 @@
#include <linux/misc_cgroup.h>
#include <linux/mmu_context.h>
#include <asm/tdx.h>
+#include <asm/virt.h>
#include "capabilities.h"
#include "mmu.h"
#include "x86_ops.h"
@@ -58,8 +59,6 @@ module_param_named(tdx, enable_tdx, bool, 0444);
#define TDX_SHARED_BIT_PWL_5 gpa_to_gfn(BIT_ULL(51))
#define TDX_SHARED_BIT_PWL_4 gpa_to_gfn(BIT_ULL(47))
-static enum cpuhp_state tdx_cpuhp_state;
-
static const struct tdx_sys_info *tdx_sysinfo;
void tdh_vp_rd_failed(struct vcpu_tdx *tdx, char *uclass, u32 field, u64 err)
@@ -75,7 +74,7 @@ void tdh_vp_wr_failed(struct vcpu_tdx *tdx, char *uclass, char *op, u32 field,
pr_err("TDH_VP_WR[%s.0x%x]%s0x%llx failed: 0x%llx\n", uclass, field, op, val, err);
}
-#define KVM_SUPPORTED_TD_ATTRS (TDX_TD_ATTR_SEPT_VE_DISABLE)
+#define KVM_SUPPORTED_TDX_TD_ATTRS (TDX_TD_ATTR_SEPT_VE_DISABLE)
static __always_inline struct kvm_tdx *to_kvm_tdx(struct kvm *kvm)
{
@@ -89,7 +88,7 @@ static __always_inline struct vcpu_tdx *to_tdx(struct kvm_vcpu *vcpu)
static u64 tdx_get_supported_attrs(const struct tdx_sys_info_td_conf *td_conf)
{
- u64 val = KVM_SUPPORTED_TD_ATTRS;
+ u64 val = KVM_SUPPORTED_TDX_TD_ATTRS;
if ((val & td_conf->attributes_fixed1) != td_conf->attributes_fixed1)
return 0;
@@ -218,8 +217,6 @@ static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf,
*/
static DEFINE_MUTEX(tdx_lock);
-static atomic_t nr_configured_hkid;
-
static bool tdx_operand_busy(u64 err)
{
return (err & TDX_SEAMCALL_STATUS_MASK) == TDX_OPERAND_BUSY;
@@ -267,7 +264,6 @@ static inline void tdx_hkid_free(struct kvm_tdx *kvm_tdx)
{
tdx_guest_keyid_free(kvm_tdx->hkid);
kvm_tdx->hkid = -1;
- atomic_dec(&nr_configured_hkid);
misc_cg_uncharge(MISC_CG_RES_TDX, kvm_tdx->misc_cg, 1);
put_misc_cg(kvm_tdx->misc_cg);
kvm_tdx->misc_cg = NULL;
@@ -1467,17 +1463,11 @@ static int tdx_emulate_mmio(struct kvm_vcpu *vcpu)
/* Request the device emulation to userspace device model. */
vcpu->mmio_is_write = write;
- if (!write)
- vcpu->arch.complete_userspace_io = tdx_complete_mmio_read;
- vcpu->run->mmio.phys_addr = gpa;
- vcpu->run->mmio.len = size;
- vcpu->run->mmio.is_write = write;
- vcpu->run->exit_reason = KVM_EXIT_MMIO;
+ __kvm_prepare_emulated_mmio_exit(vcpu, gpa, size, &val, write);
- if (write) {
- memcpy(vcpu->run->mmio.data, &val, size);
- } else {
+ if (!write) {
+ vcpu->arch.complete_userspace_io = tdx_complete_mmio_read;
vcpu->mmio_fragments[0].gpa = gpa;
vcpu->mmio_fragments[0].len = size;
trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, size, gpa, NULL);
@@ -1994,7 +1984,7 @@ int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath)
* TDX_SEAMCALL_VMFAILINVALID.
*/
if (unlikely((vp_enter_ret & TDX_SW_ERROR) == TDX_SW_ERROR)) {
- KVM_BUG_ON(!kvm_rebooting, vcpu->kvm);
+ KVM_BUG_ON(!virt_rebooting, vcpu->kvm);
goto unhandled_exit;
}
@@ -2397,8 +2387,6 @@ static int __tdx_td_init(struct kvm *kvm, struct td_params *td_params,
ret = -ENOMEM;
- atomic_inc(&nr_configured_hkid);
-
tdr_page = alloc_page(GFP_KERNEL);
if (!tdr_page)
goto free_hkid;
@@ -3290,106 +3278,15 @@ int tdx_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private)
return PG_LEVEL_4K;
}
-static int tdx_online_cpu(unsigned int cpu)
-{
- unsigned long flags;
- int r;
-
- /* Sanity check CPU is already in post-VMXON */
- WARN_ON_ONCE(!(cr4_read_shadow() & X86_CR4_VMXE));
-
- local_irq_save(flags);
- r = tdx_cpu_enable();
- local_irq_restore(flags);
-
- return r;
-}
-
-static int tdx_offline_cpu(unsigned int cpu)
-{
- int i;
-
- /* No TD is running. Allow any cpu to be offline. */
- if (!atomic_read(&nr_configured_hkid))
- return 0;
-
- /*
- * In order to reclaim TDX HKID, (i.e. when deleting guest TD), need to
- * call TDH.PHYMEM.PAGE.WBINVD on all packages to program all memory
- * controller with pconfig. If we have active TDX HKID, refuse to
- * offline the last online cpu.
- */
- for_each_online_cpu(i) {
- /*
- * Found another online cpu on the same package.
- * Allow to offline.
- */
- if (i != cpu && topology_physical_package_id(i) ==
- topology_physical_package_id(cpu))
- return 0;
- }
-
- /*
- * This is the last cpu of this package. Don't offline it.
- *
- * Because it's hard for human operator to understand the
- * reason, warn it.
- */
-#define MSG_ALLPKG_ONLINE \
- "TDX requires all packages to have an online CPU. Delete all TDs in order to offline all CPUs of a package.\n"
- pr_warn_ratelimited(MSG_ALLPKG_ONLINE);
- return -EBUSY;
-}
-
-static void __do_tdx_cleanup(void)
+void tdx_hardware_unsetup(void)
{
- /*
- * Once TDX module is initialized, it cannot be disabled and
- * re-initialized again w/o runtime update (which isn't
- * supported by kernel). Only need to remove the cpuhp here.
- * The TDX host core code tracks TDX status and can handle
- * 'multiple enabling' scenario.
- */
- WARN_ON_ONCE(!tdx_cpuhp_state);
- cpuhp_remove_state_nocalls_cpuslocked(tdx_cpuhp_state);
- tdx_cpuhp_state = 0;
+ misc_cg_set_capacity(MISC_CG_RES_TDX, 0);
}
-static void __tdx_cleanup(void)
-{
- cpus_read_lock();
- __do_tdx_cleanup();
- cpus_read_unlock();
-}
-
-static int __init __do_tdx_bringup(void)
-{
- int r;
-
- /*
- * TDX-specific cpuhp callback to call tdx_cpu_enable() on all
- * online CPUs before calling tdx_enable(), and on any new
- * going-online CPU to make sure it is ready for TDX guest.
- */
- r = cpuhp_setup_state_cpuslocked(CPUHP_AP_ONLINE_DYN,
- "kvm/cpu/tdx:online",
- tdx_online_cpu, tdx_offline_cpu);
- if (r < 0)
- return r;
-
- tdx_cpuhp_state = r;
-
- r = tdx_enable();
- if (r)
- __do_tdx_cleanup();
-
- return r;
-}
-
-static int __init __tdx_bringup(void)
+static int __init __tdx_hardware_setup(void)
{
const struct tdx_sys_info_td_conf *td_conf;
- int r, i;
+ int i;
for (i = 0; i < ARRAY_SIZE(tdx_uret_msrs); i++) {
/*
@@ -3405,34 +3302,18 @@ static int __init __tdx_bringup(void)
}
}
- /*
- * Enabling TDX requires enabling hardware virtualization first,
- * as making SEAMCALLs requires CPU being in post-VMXON state.
- */
- r = kvm_enable_virtualization();
- if (r)
- return r;
-
- cpus_read_lock();
- r = __do_tdx_bringup();
- cpus_read_unlock();
-
- if (r)
- goto tdx_bringup_err;
-
- r = -EINVAL;
/* Get TDX global information for later use */
tdx_sysinfo = tdx_get_sysinfo();
- if (WARN_ON_ONCE(!tdx_sysinfo))
- goto get_sysinfo_err;
+ if (!tdx_sysinfo)
+ return -ENODEV;
/* Check TDX module and KVM capabilities */
if (!tdx_get_supported_attrs(&tdx_sysinfo->td_conf) ||
!tdx_get_supported_xfam(&tdx_sysinfo->td_conf))
- goto get_sysinfo_err;
+ return -EINVAL;
if (!(tdx_sysinfo->features.tdx_features0 & MD_FIELD_ID_FEATURES0_TOPOLOGY_ENUM))
- goto get_sysinfo_err;
+ return -EINVAL;
/*
* TDX has its own limit of maximum vCPUs it can support for all
@@ -3467,35 +3348,16 @@ static int __init __tdx_bringup(void)
if (td_conf->max_vcpus_per_td < num_present_cpus()) {
pr_err("Disable TDX: MAX_VCPU_PER_TD (%u) smaller than number of logical CPUs (%u).\n",
td_conf->max_vcpus_per_td, num_present_cpus());
- goto get_sysinfo_err;
+ return -EINVAL;
}
if (misc_cg_set_capacity(MISC_CG_RES_TDX, tdx_get_nr_guest_keyids()))
- goto get_sysinfo_err;
+ return -EINVAL;
- /*
- * Leave hardware virtualization enabled after TDX is enabled
- * successfully. TDX CPU hotplug depends on this.
- */
return 0;
-
-get_sysinfo_err:
- __tdx_cleanup();
-tdx_bringup_err:
- kvm_disable_virtualization();
- return r;
}
-void tdx_cleanup(void)
-{
- if (enable_tdx) {
- misc_cg_set_capacity(MISC_CG_RES_TDX, 0);
- __tdx_cleanup();
- kvm_disable_virtualization();
- }
-}
-
-int __init tdx_bringup(void)
+int __init tdx_hardware_setup(void)
{
int r, i;
@@ -3526,40 +3388,12 @@ int __init tdx_bringup(void)
goto success_disable_tdx;
}
- if (!cpu_feature_enabled(X86_FEATURE_MOVDIR64B)) {
- pr_err("tdx: MOVDIR64B is required for TDX\n");
- goto success_disable_tdx;
- }
-
- if (!cpu_feature_enabled(X86_FEATURE_SELFSNOOP)) {
- pr_err("Self-snoop is required for TDX\n");
- goto success_disable_tdx;
- }
-
if (!cpu_feature_enabled(X86_FEATURE_TDX_HOST_PLATFORM)) {
- pr_err("tdx: no TDX private KeyIDs available\n");
- goto success_disable_tdx;
- }
-
- if (!enable_virt_at_load) {
- pr_err("tdx: tdx requires kvm.enable_virt_at_load=1\n");
+ pr_err("TDX not supported by the host platform\n");
goto success_disable_tdx;
}
- /*
- * Ideally KVM should probe whether TDX module has been loaded
- * first and then try to bring it up. But TDX needs to use SEAMCALL
- * to probe whether the module is loaded (there is no CPUID or MSR
- * for that), and making SEAMCALL requires enabling virtualization
- * first, just like the rest steps of bringing up TDX module.
- *
- * So, for simplicity do everything in __tdx_bringup(); the first
- * SEAMCALL will return -ENODEV when the module is not loaded. The
- * only complication is having to make sure that initialization
- * SEAMCALLs don't return TDX_SEAMCALL_VMFAILINVALID in other
- * cases.
- */
- r = __tdx_bringup();
+ r = __tdx_hardware_setup();
if (r) {
/*
* Disable TDX only but don't fail to load module if the TDX
@@ -3574,24 +3408,11 @@ int __init tdx_bringup(void)
if (r == -ENODEV)
goto success_disable_tdx;
- enable_tdx = 0;
+ return r;
}
- return r;
-
-success_disable_tdx:
- enable_tdx = 0;
- return 0;
-}
-
-void __init tdx_hardware_setup(void)
-{
KVM_SANITY_CHECK_VM_STRUCT_SIZE(kvm_tdx);
- /*
- * Note, if the TDX module can't be loaded, KVM TDX support will be
- * disabled but KVM will continue loading (see tdx_bringup()).
- */
vt_x86_ops.vm_size = max_t(unsigned int, vt_x86_ops.vm_size, sizeof(struct kvm_tdx));
vt_x86_ops.link_external_spt = tdx_sept_link_private_spt;
@@ -3599,4 +3420,9 @@ void __init tdx_hardware_setup(void)
vt_x86_ops.free_external_spt = tdx_sept_free_private_spt;
vt_x86_ops.remove_external_spte = tdx_sept_remove_private_spte;
vt_x86_ops.protected_apic_has_interrupt = tdx_protected_apic_has_interrupt;
+ return 0;
+
+success_disable_tdx:
+ enable_tdx = 0;
+ return 0;
}
diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
index 45b5183ccb36..b5cd2ffb303e 100644
--- a/arch/x86/kvm/vmx/tdx.h
+++ b/arch/x86/kvm/vmx/tdx.h
@@ -8,9 +8,8 @@
#ifdef CONFIG_KVM_INTEL_TDX
#include "common.h"
-void tdx_hardware_setup(void);
-int tdx_bringup(void);
-void tdx_cleanup(void);
+int tdx_hardware_setup(void);
+void tdx_hardware_unsetup(void);
extern bool enable_tdx;
@@ -187,9 +186,6 @@ TDX_BUILD_TDVPS_ACCESSORS(8, MANAGEMENT, management);
TDX_BUILD_TDVPS_ACCESSORS(64, STATE_NON_ARCH, state_non_arch);
#else
-static inline int tdx_bringup(void) { return 0; }
-static inline void tdx_cleanup(void) {}
-
#define enable_tdx 0
struct kvm_tdx {
diff --git a/arch/x86/kvm/vmx/tdx_arch.h b/arch/x86/kvm/vmx/tdx_arch.h
index a30e880849e3..350143b9b145 100644
--- a/arch/x86/kvm/vmx/tdx_arch.h
+++ b/arch/x86/kvm/vmx/tdx_arch.h
@@ -75,12 +75,6 @@ struct tdx_cpuid_value {
u32 edx;
} __packed;
-#define TDX_TD_ATTR_DEBUG BIT_ULL(0)
-#define TDX_TD_ATTR_SEPT_VE_DISABLE BIT_ULL(28)
-#define TDX_TD_ATTR_PKS BIT_ULL(30)
-#define TDX_TD_ATTR_KL BIT_ULL(31)
-#define TDX_TD_ATTR_PERFMON BIT_ULL(63)
-
#define TDX_EXT_EXIT_QUAL_TYPE_MASK GENMASK(3, 0)
#define TDX_EXT_EXIT_QUAL_TYPE_PENDING_EPT_VIOLATION 6
/*
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
index 66d747e265b1..1f16ddeae9cb 100644
--- a/arch/x86/kvm/vmx/vmcs.h
+++ b/arch/x86/kvm/vmx/vmcs.h
@@ -22,17 +22,6 @@
#define VMCS12_IDX_TO_ENC(idx) ROL16(idx, 10)
#define ENC_TO_VMCS12_IDX(enc) ROL16(enc, 6)
-struct vmcs_hdr {
- u32 revision_id:31;
- u32 shadow_vmcs:1;
-};
-
-struct vmcs {
- struct vmcs_hdr hdr;
- u32 abort;
- char data[];
-};
-
DECLARE_PER_CPU(struct vmcs *, current_vmcs);
/*
diff --git a/arch/x86/kvm/vmx/vmcs_shadow_fields.h b/arch/x86/kvm/vmx/vmcs_shadow_fields.h
index cad128d1657b..67e821c2be6d 100644
--- a/arch/x86/kvm/vmx/vmcs_shadow_fields.h
+++ b/arch/x86/kvm/vmx/vmcs_shadow_fields.h
@@ -1,6 +1,6 @@
#if !defined(SHADOW_FIELD_RO) && !defined(SHADOW_FIELD_RW)
-BUILD_BUG_ON(1)
-#endif
+#error Must #define at least one of SHADOW_FIELD_RO or SHADOW_FIELD_RW
+#else
#ifndef SHADOW_FIELD_RO
#define SHADOW_FIELD_RO(x, y)
@@ -74,6 +74,7 @@ SHADOW_FIELD_RW(HOST_GS_BASE, host_gs_base)
/* 64-bit */
SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS, guest_physical_address)
SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH, guest_physical_address)
+#endif
#undef SHADOW_FIELD_RO
#undef SHADOW_FIELD_RW
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 4426d34811fc..8a481dae9cae 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -310,7 +310,7 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
RET
.Lfixup:
- cmpb $0, _ASM_RIP(kvm_rebooting)
+ cmpb $0, _ASM_RIP(virt_rebooting)
jne .Lvmfail
ud2
.Lvmfail:
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 967b58a8ab9d..a29896a9ef14 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -48,6 +48,7 @@
#include <asm/msr.h>
#include <asm/mwait.h>
#include <asm/spec-ctrl.h>
+#include <asm/virt.h>
#include <asm/vmx.h>
#include <trace/events/ipi.h>
@@ -579,7 +580,6 @@ noinline void invept_error(unsigned long ext, u64 eptp)
vmx_insn_failed("invept failed: ext=0x%lx eptp=%llx\n", ext, eptp);
}
-static DEFINE_PER_CPU(struct vmcs *, vmxarea);
DEFINE_PER_CPU(struct vmcs *, current_vmcs);
/*
* We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
@@ -786,53 +786,17 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
return ret;
}
-/*
- * Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
- *
- * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
- * atomically track post-VMXON state, e.g. this may be called in NMI context.
- * Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
- * faults are guaranteed to be due to the !post-VMXON check unless the CPU is
- * magically in RM, VM86, compat mode, or at CPL>0.
- */
-static int kvm_cpu_vmxoff(void)
-{
- asm goto("1: vmxoff\n\t"
- _ASM_EXTABLE(1b, %l[fault])
- ::: "cc", "memory" : fault);
-
- cr4_clear_bits(X86_CR4_VMXE);
- return 0;
-
-fault:
- cr4_clear_bits(X86_CR4_VMXE);
- return -EIO;
-}
-
void vmx_emergency_disable_virtualization_cpu(void)
{
int cpu = raw_smp_processor_id();
struct loaded_vmcs *v;
- kvm_rebooting = true;
-
- /*
- * Note, CR4.VMXE can be _cleared_ in NMI context, but it can only be
- * set in task context. If this races with VMX is disabled by an NMI,
- * VMCLEAR and VMXOFF may #UD, but KVM will eat those faults due to
- * kvm_rebooting set.
- */
- if (!(__read_cr4() & X86_CR4_VMXE))
- return;
-
list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
loaded_vmcss_on_cpu_link) {
vmcs_clear(v->vmcs);
if (v->shadow_vmcs)
vmcs_clear(v->shadow_vmcs);
}
-
- kvm_cpu_vmxoff();
}
static void __loaded_vmcs_clear(void *arg)
@@ -1149,7 +1113,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
}
vmx_add_auto_msr(&m->guest, msr, guest_val, VM_ENTRY_MSR_LOAD_COUNT, kvm);
- vmx_add_auto_msr(&m->guest, msr, host_val, VM_EXIT_MSR_LOAD_COUNT, kvm);
+ vmx_add_auto_msr(&m->host, msr, host_val, VM_EXIT_MSR_LOAD_COUNT, kvm);
}
static bool update_transition_efer(struct vcpu_vmx *vmx)
@@ -2927,12 +2891,16 @@ static bool __kvm_is_vmx_supported(void)
return false;
}
- if (!this_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) ||
- !this_cpu_has(X86_FEATURE_VMX)) {
+ if (!this_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL)) {
pr_err("VMX not enabled (by BIOS) in MSR_IA32_FEAT_CTL on CPU %d\n", cpu);
return false;
}
+ if (!this_cpu_has(X86_FEATURE_VMX)) {
+ pr_err("VMX not fully enabled on CPU %d. Check kernel logs and/or BIOS\n", cpu);
+ return false;
+ }
+
return true;
}
@@ -2984,34 +2952,9 @@ int vmx_check_processor_compat(void)
return 0;
}
-static int kvm_cpu_vmxon(u64 vmxon_pointer)
-{
- u64 msr;
-
- cr4_set_bits(X86_CR4_VMXE);
-
- asm goto("1: vmxon %[vmxon_pointer]\n\t"
- _ASM_EXTABLE(1b, %l[fault])
- : : [vmxon_pointer] "m"(vmxon_pointer)
- : : fault);
- return 0;
-
-fault:
- WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n",
- rdmsrq_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr);
- cr4_clear_bits(X86_CR4_VMXE);
-
- return -EFAULT;
-}
-
int vmx_enable_virtualization_cpu(void)
{
int cpu = raw_smp_processor_id();
- u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
- int r;
-
- if (cr4_read_shadow() & X86_CR4_VMXE)
- return -EBUSY;
/*
* This can happen if we hot-added a CPU but failed to allocate
@@ -3020,15 +2963,7 @@ int vmx_enable_virtualization_cpu(void)
if (kvm_is_using_evmcs() && !hv_get_vp_assist_page(cpu))
return -EFAULT;
- intel_pt_handle_vmx(1);
-
- r = kvm_cpu_vmxon(phys_addr);
- if (r) {
- intel_pt_handle_vmx(0);
- return r;
- }
-
- return 0;
+ return x86_virt_get_ref(X86_FEATURE_VMX);
}
static void vmclear_local_loaded_vmcss(void)
@@ -3045,12 +2980,9 @@ void vmx_disable_virtualization_cpu(void)
{
vmclear_local_loaded_vmcss();
- if (kvm_cpu_vmxoff())
- kvm_spurious_fault();
+ x86_virt_put_ref(X86_FEATURE_VMX);
hv_reset_evmcs();
-
- intel_pt_handle_vmx(0);
}
struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
@@ -3128,47 +3060,6 @@ out_vmcs:
return -ENOMEM;
}
-static void free_kvm_area(void)
-{
- int cpu;
-
- for_each_possible_cpu(cpu) {
- free_vmcs(per_cpu(vmxarea, cpu));
- per_cpu(vmxarea, cpu) = NULL;
- }
-}
-
-static __init int alloc_kvm_area(void)
-{
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct vmcs *vmcs;
-
- vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL);
- if (!vmcs) {
- free_kvm_area();
- return -ENOMEM;
- }
-
- /*
- * When eVMCS is enabled, alloc_vmcs_cpu() sets
- * vmcs->revision_id to KVM_EVMCS_VERSION instead of
- * revision_id reported by MSR_IA32_VMX_BASIC.
- *
- * However, even though not explicitly documented by
- * TLFS, VMXArea passed as VMXON argument should
- * still be marked with revision_id reported by
- * physical CPU.
- */
- if (kvm_is_using_evmcs())
- vmcs->hdr.revision_id = vmx_basic_vmcs_revision_id(vmcs_config.basic);
-
- per_cpu(vmxarea, cpu) = vmcs;
- }
- return 0;
-}
-
static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
struct kvm_segment *save)
{
@@ -5279,7 +5170,7 @@ bool vmx_nmi_blocked(struct kvm_vcpu *vcpu)
int vmx_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
{
- if (to_vmx(vcpu)->nested.nested_run_pending)
+ if (vcpu->arch.nested_run_pending)
return -EBUSY;
/* An NMI must not be injected into L2 if it's supposed to VM-Exit. */
@@ -5306,7 +5197,7 @@ bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu)
int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
{
- if (to_vmx(vcpu)->nested.nested_run_pending)
+ if (vcpu->arch.nested_run_pending)
return -EBUSY;
/*
@@ -6118,7 +6009,7 @@ static bool vmx_unhandleable_emulation_required(struct kvm_vcpu *vcpu)
* only reachable if userspace modifies L2 guest state after KVM has
* performed the nested VM-Enter consistency checks.
*/
- if (vmx->nested.nested_run_pending)
+ if (vcpu->arch.nested_run_pending)
return true;
/*
@@ -6802,7 +6693,7 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
* invalid guest state should never happen as that means KVM knowingly
* allowed a nested VM-Enter with an invalid vmcs12. More below.
*/
- if (KVM_BUG_ON(vmx->nested.nested_run_pending, vcpu->kvm))
+ if (KVM_BUG_ON(vcpu->arch.nested_run_pending, vcpu->kvm))
return -EIO;
if (is_guest_mode(vcpu)) {
@@ -7730,11 +7621,11 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
* Track VMLAUNCH/VMRESUME that have made past guest state
* checking.
*/
- if (vmx->nested.nested_run_pending &&
+ if (vcpu->arch.nested_run_pending &&
!vmx_get_exit_reason(vcpu).failed_vmentry)
++vcpu->stat.nested_run;
- vmx->nested.nested_run_pending = 0;
+ vcpu->arch.nested_run_pending = 0;
}
if (unlikely(vmx->fail))
@@ -8491,7 +8382,7 @@ void vmx_setup_mce(struct kvm_vcpu *vcpu)
int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
{
/* we need a nested vmexit to enter SMM, postpone if run is pending */
- if (to_vmx(vcpu)->nested.nested_run_pending)
+ if (vcpu->arch.nested_run_pending)
return -EBUSY;
return !is_smm(vcpu);
}
@@ -8528,11 +8419,15 @@ int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
}
if (vmx->nested.smm.guest_mode) {
+ /* Triple fault if the state is invalid. */
+ if (nested_vmx_check_restored_vmcs12(vcpu) < 0)
+ return 1;
+
ret = nested_vmx_enter_non_root_mode(vcpu, false);
- if (ret)
- return ret;
+ if (ret != NVMX_VMENTRY_SUCCESS)
+ return 1;
- vmx->nested.nested_run_pending = 1;
+ vcpu->arch.nested_run_pending = KVM_NESTED_RUN_PENDING;
vmx->nested.smm.guest_mode = false;
}
return 0;
@@ -8565,8 +8460,6 @@ void vmx_hardware_unsetup(void)
if (nested)
nested_vmx_hardware_unsetup();
-
- free_kvm_area();
}
void vmx_vm_destroy(struct kvm *kvm)
@@ -8694,10 +8587,6 @@ __init int vmx_hardware_setup(void)
vmx_setup_user_return_msrs();
-
- if (boot_cpu_has(X86_FEATURE_NX))
- kvm_enable_efer_bits(EFER_NX);
-
if (boot_cpu_has(X86_FEATURE_MPX)) {
rdmsrq(MSR_IA32_BNDCFGS, host_bndcfgs);
WARN_ONCE(host_bndcfgs, "BNDCFGS in host will be lost");
@@ -8869,10 +8758,6 @@ __init int vmx_hardware_setup(void)
return r;
}
- r = alloc_kvm_area();
- if (r)
- goto err_kvm_area;
-
kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler);
/*
@@ -8899,11 +8784,6 @@ __init int vmx_hardware_setup(void)
kvm_caps.inapplicable_quirks &= ~KVM_X86_QUIRK_IGNORE_GUEST_PAT;
return 0;
-
-err_kvm_area:
- if (nested)
- nested_vmx_hardware_unsetup();
- return r;
}
void vmx_exit(void)
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 70bfe81dea54..db84e8001da5 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -138,9 +138,6 @@ struct nested_vmx {
*/
bool enlightened_vmcs_enabled;
- /* L2 must run next, and mustn't decide to exit to L1. */
- bool nested_run_pending;
-
/* Pending MTF VM-exit into L1. */
bool mtf_pending;
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index 96677576c836..81784befaaf4 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -119,7 +119,6 @@ do_exception:
#else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
asm volatile("1: vmread %[field], %[output]\n\t"
- ".byte 0x3e\n\t" /* branch taken hint */
"ja 3f\n\t"
/*
@@ -191,7 +190,6 @@ static __always_inline unsigned long vmcs_readl(unsigned long field)
#define vmx_asm1(insn, op1, error_args...) \
do { \
asm goto("1: " __stringify(insn) " %0\n\t" \
- ".byte 0x2e\n\t" /* branch not taken hint */ \
"jna %l[error]\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
: : op1 : "cc" : error, fault); \
@@ -208,7 +206,6 @@ fault: \
#define vmx_asm2(insn, op1, op2, error_args...) \
do { \
asm goto("1: " __stringify(insn) " %1, %0\n\t" \
- ".byte 0x2e\n\t" /* branch not taken hint */ \
"jna %l[error]\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
: : op1, op2 : "cc" : error, fault); \
@@ -224,7 +221,7 @@ fault: \
static __always_inline void __vmcs_writel(unsigned long field, unsigned long value)
{
- vmx_asm2(vmwrite, "r"(field), "rm"(value), field, value);
+ vmx_asm2(vmwrite, "r" (field), ASM_INPUT_RM (value), field, value);
}
static __always_inline void vmcs_write16(unsigned long field, u16 value)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a03530795707..0a1b63c63d1a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -83,6 +83,8 @@
#include <asm/intel_pt.h>
#include <asm/emulate_prefix.h>
#include <asm/sgx.h>
+#include <asm/virt.h>
+
#include <clocksource/hyperv_timer.h>
#define CREATE_TRACE_POINTS
@@ -243,7 +245,7 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_ipiv);
bool __read_mostly enable_device_posted_irqs = true;
EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_device_posted_irqs);
-const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+const struct kvm_stats_desc kvm_vm_stats_desc[] = {
KVM_GENERIC_VM_STATS(),
STATS_DESC_COUNTER(VM, mmu_shadow_zapped),
STATS_DESC_COUNTER(VM, mmu_pte_write),
@@ -269,7 +271,7 @@ const struct kvm_stats_header kvm_vm_stats_header = {
sizeof(kvm_vm_stats_desc),
};
-const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+const struct kvm_stats_desc kvm_vcpu_stats_desc[] = {
KVM_GENERIC_VCPU_STATS(),
STATS_DESC_COUNTER(VCPU, pf_taken),
STATS_DESC_COUNTER(VCPU, pf_fixed),
@@ -351,6 +353,9 @@ static const u32 msrs_to_save_base[] = {
MSR_IA32_U_CET, MSR_IA32_S_CET,
MSR_IA32_PL0_SSP, MSR_IA32_PL1_SSP, MSR_IA32_PL2_SSP,
MSR_IA32_PL3_SSP, MSR_IA32_INT_SSP_TAB,
+ MSR_IA32_DEBUGCTLMSR,
+ MSR_IA32_LASTBRANCHFROMIP, MSR_IA32_LASTBRANCHTOIP,
+ MSR_IA32_LASTINTFROMIP, MSR_IA32_LASTINTTOIP,
};
static const u32 msrs_to_save_pmu[] = {
@@ -710,7 +715,7 @@ static void drop_user_return_notifiers(void)
noinstr void kvm_spurious_fault(void)
{
/* Fault while not rebooting. We want the trace. */
- BUG_ON(!kvm_rebooting);
+ BUG_ON(!virt_rebooting);
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_spurious_fault);
@@ -864,9 +869,6 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, unsigned int nr,
vcpu->arch.exception.error_code = error_code;
vcpu->arch.exception.has_payload = has_payload;
vcpu->arch.exception.payload = payload;
- if (!is_guest_mode(vcpu))
- kvm_deliver_exception_payload(vcpu,
- &vcpu->arch.exception);
return;
}
@@ -5531,18 +5533,8 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
return 0;
}
-static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
- struct kvm_vcpu_events *events)
+static struct kvm_queued_exception *kvm_get_exception_to_save(struct kvm_vcpu *vcpu)
{
- struct kvm_queued_exception *ex;
-
- process_nmi(vcpu);
-
-#ifdef CONFIG_KVM_SMM
- if (kvm_check_request(KVM_REQ_SMI, vcpu))
- process_smi(vcpu);
-#endif
-
/*
* KVM's ABI only allows for one exception to be migrated. Luckily,
* the only time there can be two queued exceptions is if there's a
@@ -5553,21 +5545,46 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
if (vcpu->arch.exception_vmexit.pending &&
!vcpu->arch.exception.pending &&
!vcpu->arch.exception.injected)
- ex = &vcpu->arch.exception_vmexit;
- else
- ex = &vcpu->arch.exception;
+ return &vcpu->arch.exception_vmexit;
+
+ return &vcpu->arch.exception;
+}
+
+static void kvm_handle_exception_payload_quirk(struct kvm_vcpu *vcpu)
+{
+ struct kvm_queued_exception *ex = kvm_get_exception_to_save(vcpu);
/*
- * In guest mode, payload delivery should be deferred if the exception
- * will be intercepted by L1, e.g. KVM should not modifying CR2 if L1
- * intercepts #PF, ditto for DR6 and #DBs. If the per-VM capability,
- * KVM_CAP_EXCEPTION_PAYLOAD, is not set, userspace may or may not
- * propagate the payload and so it cannot be safely deferred. Deliver
- * the payload if the capability hasn't been requested.
+ * If KVM_CAP_EXCEPTION_PAYLOAD is disabled, then (prematurely) deliver
+ * the pending exception payload when userspace saves *any* vCPU state
+ * that interacts with exception payloads to avoid breaking userspace.
+ *
+ * Architecturally, KVM must not deliver an exception payload until the
+ * exception is actually injected, e.g. to avoid losing pending #DB
+ * information (which VMX tracks in the VMCS), and to avoid clobbering
+ * state if the exception is never injected for whatever reason. But
+ * if KVM_CAP_EXCEPTION_PAYLOAD isn't enabled, then userspace may or
+ * may not propagate the payload across save+restore, and so KVM can't
+ * safely defer delivery of the payload.
*/
if (!vcpu->kvm->arch.exception_payload_enabled &&
ex->pending && ex->has_payload)
kvm_deliver_exception_payload(vcpu, ex);
+}
+
+static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events)
+{
+ struct kvm_queued_exception *ex = kvm_get_exception_to_save(vcpu);
+
+ process_nmi(vcpu);
+
+#ifdef CONFIG_KVM_SMM
+ if (kvm_check_request(KVM_REQ_SMI, vcpu))
+ process_smi(vcpu);
+#endif
+
+ kvm_handle_exception_payload_quirk(vcpu);
memset(events, 0, sizeof(*events));
@@ -5746,6 +5763,8 @@ static int kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
vcpu->arch.guest_state_protected)
return -EINVAL;
+ kvm_handle_exception_payload_quirk(vcpu);
+
memset(dbgregs, 0, sizeof(*dbgregs));
BUILD_BUG_ON(ARRAY_SIZE(vcpu->arch.db) != ARRAY_SIZE(dbgregs->db));
@@ -7768,11 +7787,14 @@ static void kvm_init_msr_lists(void)
}
static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
- const void *v)
+ void *__v)
{
+ const void *v = __v;
int handled = 0;
int n;
+ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, addr, __v);
+
do {
n = min(len, 8);
if (!(lapic_in_kernel(vcpu) &&
@@ -7807,6 +7829,9 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
v += n;
} while (len);
+ if (len)
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len, addr, NULL);
+
return handled;
}
@@ -8095,90 +8120,32 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write);
}
-int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
- const void *val, int bytes)
-{
- int ret;
-
- ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes);
- if (ret < 0)
- return 0;
- kvm_page_track_write(vcpu, gpa, val, bytes);
- return 1;
-}
-
struct read_write_emulator_ops {
- int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
- int bytes);
- int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
- void *val, int bytes);
+ int (*read_write_guest)(struct kvm_vcpu *vcpu, gpa_t gpa,
+ void *val, int bytes);
int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
int bytes, void *val);
- int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
- void *val, int bytes);
bool write;
};
-static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
-{
- if (vcpu->mmio_read_completed) {
- trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
- vcpu->mmio_fragments[0].gpa, val);
- vcpu->mmio_read_completed = 0;
- return 1;
- }
-
- return 0;
-}
-
-static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
- void *val, int bytes)
+static int emulator_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa,
+ void *val, int bytes)
{
return !kvm_vcpu_read_guest(vcpu, gpa, val, bytes);
}
-static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
- void *val, int bytes)
-{
- return emulator_write_phys(vcpu, gpa, val, bytes);
-}
-
-static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
+static int emulator_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa,
+ void *val, int bytes)
{
- trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
- return vcpu_mmio_write(vcpu, gpa, bytes, val);
-}
-
-static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
- void *val, int bytes)
-{
- trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
- return X86EMUL_IO_NEEDED;
-}
-
-static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
- void *val, int bytes)
-{
- struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
+ int ret;
- memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
- return X86EMUL_CONTINUE;
+ ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes);
+ if (ret < 0)
+ return 0;
+ kvm_page_track_write(vcpu, gpa, val, bytes);
+ return 1;
}
-static const struct read_write_emulator_ops read_emultor = {
- .read_write_prepare = read_prepare,
- .read_write_emulate = read_emulate,
- .read_write_mmio = vcpu_mmio_read,
- .read_write_exit_mmio = read_exit_mmio,
-};
-
-static const struct read_write_emulator_ops write_emultor = {
- .read_write_emulate = write_emulate,
- .read_write_mmio = write_mmio,
- .read_write_exit_mmio = write_exit_mmio,
- .write = true,
-};
-
static int emulator_read_write_onepage(unsigned long addr, void *val,
unsigned int bytes,
struct x86_exception *exception,
@@ -8208,11 +8175,22 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
return X86EMUL_PROPAGATE_FAULT;
}
- if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes))
+ /*
+ * If the memory is not _known_ to be emulated MMIO, attempt to access
+ * guest memory. If accessing guest memory fails, e.g. because there's
+ * no memslot, then handle the access as MMIO. Note, treating the
+ * access as emulated MMIO is technically wrong if there is a memslot,
+ * i.e. if accessing host user memory failed, but this has been KVM's
+ * historical ABI for decades.
+ */
+ if (!ret && ops->read_write_guest(vcpu, gpa, val, bytes))
return X86EMUL_CONTINUE;
/*
- * Is this MMIO handled locally?
+ * Attempt to handle emulated MMIO within the kernel, e.g. for accesses
+ * to an in-kernel local or I/O APIC, or to an ioeventfd range attached
+ * to MMIO bus. If the access isn't fully resolved, insert an MMIO
+ * fragment with the relevant details.
*/
handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
if (handled == bytes)
@@ -8225,8 +8203,21 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
frag->gpa = gpa;
- frag->data = val;
+ if (write && bytes <= 8u) {
+ frag->val = 0;
+ frag->data = &frag->val;
+ memcpy(&frag->val, val, bytes);
+ } else {
+ frag->data = val;
+ }
frag->len = bytes;
+
+ /*
+ * Continue emulating, even though KVM needs to (eventually) do an MMIO
+ * exit to userspace. If the access splits multiple pages, then KVM
+ * needs to exit to userspace only after emulating both parts of the
+ * access.
+ */
return X86EMUL_CONTINUE;
}
@@ -8237,12 +8228,33 @@ static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
const struct read_write_emulator_ops *ops)
{
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
- gpa_t gpa;
int rc;
- if (ops->read_write_prepare &&
- ops->read_write_prepare(vcpu, val, bytes))
+ if (WARN_ON_ONCE((bytes > 8u || !ops->write) && object_is_on_stack(val)))
+ return X86EMUL_UNHANDLEABLE;
+
+ /*
+ * If the read was already completed via a userspace MMIO exit, there's
+ * nothing left to do except trace the MMIO read. When completing MMIO
+ * reads, KVM re-emulates the instruction to propagate the value into
+ * the correct destination, e.g. into the correct register, but the
+ * value itself has already been copied to the read cache.
+ *
+ * Note! This is *tightly* coupled to read_emulated() satisfying reads
+ * from the emulator's mem_read cache, so that the MMIO fragment data
+ * is copied to the correct chunk of the correct operand.
+ */
+ if (!ops->write && vcpu->mmio_read_completed) {
+ /*
+ * For simplicity, trace the entire MMIO read in one shot, even
+ * though the GPA might be incorrect if there are two fragments
+ * that aren't contiguous in the GPA space.
+ */
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
+ vcpu->mmio_fragments[0].gpa, val);
+ vcpu->mmio_read_completed = 0;
return X86EMUL_CONTINUE;
+ }
vcpu->mmio_nr_fragments = 0;
@@ -8271,17 +8283,21 @@ static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
if (!vcpu->mmio_nr_fragments)
return X86EMUL_CONTINUE;
- gpa = vcpu->mmio_fragments[0].gpa;
-
vcpu->mmio_needed = 1;
vcpu->mmio_cur_fragment = 0;
+ vcpu->mmio_is_write = ops->write;
- vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
- vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
- vcpu->run->exit_reason = KVM_EXIT_MMIO;
- vcpu->run->mmio.phys_addr = gpa;
+ kvm_prepare_emulated_mmio_exit(vcpu, &vcpu->mmio_fragments[0]);
- return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
+ /*
+ * For MMIO reads, stop emulating and immediately exit to userspace, as
+ * KVM needs the value to correctly emulate the instruction. For MMIO
+ * writes, continue emulating as the write to MMIO is a side effect for
+ * all intents and purposes. KVM will still exit to userspace, but
+ * after completing emulation (see the check on vcpu->mmio_needed in
+ * x86_emulate_instruction()).
+ */
+ return ops->write ? X86EMUL_CONTINUE : X86EMUL_IO_NEEDED;
}
static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
@@ -8290,8 +8306,13 @@ static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
unsigned int bytes,
struct x86_exception *exception)
{
- return emulator_read_write(ctxt, addr, val, bytes,
- exception, &read_emultor);
+ static const struct read_write_emulator_ops ops = {
+ .read_write_guest = emulator_read_guest,
+ .read_write_mmio = vcpu_mmio_read,
+ .write = false,
+ };
+
+ return emulator_read_write(ctxt, addr, val, bytes, exception, &ops);
}
static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
@@ -8300,8 +8321,13 @@ static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
unsigned int bytes,
struct x86_exception *exception)
{
- return emulator_read_write(ctxt, addr, (void *)val, bytes,
- exception, &write_emultor);
+ static const struct read_write_emulator_ops ops = {
+ .read_write_guest = emulator_write_guest,
+ .read_write_mmio = vcpu_mmio_write,
+ .write = true,
+ };
+
+ return emulator_read_write(ctxt, addr, (void *)val, bytes, exception, &ops);
}
#define emulator_try_cmpxchg_user(t, ptr, old, new) \
@@ -8890,6 +8916,11 @@ static bool emulator_is_canonical_addr(struct x86_emulate_ctxt *ctxt,
return !is_noncanonical_address(addr, emul_to_vcpu(ctxt), flags);
}
+static bool emulator_page_address_valid(struct x86_emulate_ctxt *ctxt, gpa_t gpa)
+{
+ return page_address_valid(emul_to_vcpu(ctxt), gpa);
+}
+
static const struct x86_emulate_ops emulate_ops = {
.vm_bugged = emulator_vm_bugged,
.read_gpr = emulator_read_gpr,
@@ -8937,6 +8968,7 @@ static const struct x86_emulate_ops emulate_ops = {
.set_xcr = emulator_set_xcr,
.get_untagged_addr = emulator_get_untagged_addr,
.is_canonical_addr = emulator_is_canonical_addr,
+ .page_address_valid = emulator_page_address_valid,
};
static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
@@ -9694,7 +9726,8 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
unsigned long val;
/* We should only ever be called with arch.pio.count equal to 1 */
- BUG_ON(vcpu->arch.pio.count != 1);
+ if (KVM_BUG_ON(vcpu->arch.pio.count != 1, vcpu->kvm))
+ return -EIO;
if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.cui_linear_rip))) {
vcpu->arch.pio.count = 0;
@@ -9998,6 +10031,18 @@ void kvm_setup_xss_caps(void)
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_setup_xss_caps);
+static void kvm_setup_efer_caps(void)
+{
+ if (kvm_cpu_cap_has(X86_FEATURE_NX))
+ kvm_enable_efer_bits(EFER_NX);
+
+ if (kvm_cpu_cap_has(X86_FEATURE_FXSR_OPT))
+ kvm_enable_efer_bits(EFER_FFXSR);
+
+ if (kvm_cpu_cap_has(X86_FEATURE_AUTOIBRS))
+ kvm_enable_efer_bits(EFER_AUTOIBRS);
+}
+
static inline void kvm_ops_update(struct kvm_x86_init_ops *ops)
{
memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
@@ -10134,6 +10179,8 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
if (r != 0)
goto out_mmu_exit;
+ kvm_setup_efer_caps();
+
enable_device_posted_irqs &= enable_apicv &&
irq_remapping_cap(IRQ_POSTING_CAP);
@@ -10736,12 +10783,10 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
X86_EFLAGS_RF);
- if (vcpu->arch.exception.vector == DB_VECTOR) {
- kvm_deliver_exception_payload(vcpu, &vcpu->arch.exception);
- if (vcpu->arch.dr7 & DR7_GD) {
- vcpu->arch.dr7 &= ~DR7_GD;
- kvm_update_dr7(vcpu);
- }
+ if (vcpu->arch.exception.vector == DB_VECTOR &&
+ vcpu->arch.dr7 & DR7_GD) {
+ vcpu->arch.dr7 &= ~DR7_GD;
+ kvm_update_dr7(vcpu);
}
kvm_inject_exception(vcpu);
@@ -10973,7 +11018,11 @@ void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
old = new = kvm->arch.apicv_inhibit_reasons;
- set_or_clear_apicv_inhibit(&new, reason, set);
+ if (reason != APICV_INHIBIT_REASON_IRQWIN)
+ set_or_clear_apicv_inhibit(&new, reason, set);
+
+ set_or_clear_apicv_inhibit(&new, APICV_INHIBIT_REASON_IRQWIN,
+ atomic_read(&kvm->arch.apicv_nr_irq_window_req));
if (!!old != !!new) {
/*
@@ -11014,6 +11063,45 @@ void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_or_clear_apicv_inhibit);
+void kvm_inc_or_dec_irq_window_inhibit(struct kvm *kvm, bool inc)
+{
+ int add = inc ? 1 : -1;
+
+ if (!enable_apicv)
+ return;
+
+ /*
+ * IRQ windows are requested either because of ExtINT injections, or
+ * because APICv is already disabled/inhibited for another reason.
+ * While ExtINT injections are rare and should not happen while the
+ * vCPU is running its actual workload, it's worth avoiding thrashing
+ * if the IRQ window is being requested because APICv is already
+ * inhibited. So, toggle the actual inhibit (which requires taking
+ * the lock for write) if and only if there's no other inhibit.
+ * kvm_set_or_clear_apicv_inhibit() always evaluates the IRQ window
+ * count; thus the IRQ window inhibit call _will_ be lazily updated on
+ * the next call, if it ever happens.
+ */
+ if (READ_ONCE(kvm->arch.apicv_inhibit_reasons) & ~BIT(APICV_INHIBIT_REASON_IRQWIN)) {
+ guard(rwsem_read)(&kvm->arch.apicv_update_lock);
+ if (READ_ONCE(kvm->arch.apicv_inhibit_reasons) & ~BIT(APICV_INHIBIT_REASON_IRQWIN)) {
+ atomic_add(add, &kvm->arch.apicv_nr_irq_window_req);
+ return;
+ }
+ }
+
+ /*
+ * Strictly speaking, the lock is only needed if going 0->1 or 1->0,
+ * a la atomic_dec_and_mutex_lock. However, ExtINTs are rare and
+ * only target a single CPU, so that is the common case; do not
+ * bother eliding the down_write()/up_write() pair.
+ */
+ guard(rwsem_write)(&kvm->arch.apicv_update_lock);
+ if (atomic_add_return(add, &kvm->arch.apicv_nr_irq_window_req) == inc)
+ __kvm_set_or_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_IRQWIN, inc);
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_inc_or_dec_irq_window_inhibit);
+
static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
{
if (!kvm_apic_present(vcpu))
@@ -11804,7 +11892,8 @@ static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
static int complete_emulated_pio(struct kvm_vcpu *vcpu)
{
- BUG_ON(!vcpu->arch.pio.count);
+ if (KVM_BUG_ON(!vcpu->arch.pio.count, vcpu->kvm))
+ return -EIO;
return complete_emulated_io(vcpu);
}
@@ -11833,7 +11922,8 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
struct kvm_mmio_fragment *frag;
unsigned len;
- BUG_ON(!vcpu->mmio_needed);
+ if (KVM_BUG_ON(!vcpu->mmio_needed, vcpu->kvm))
+ return -EIO;
/* Complete previous fragment */
frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
@@ -11846,6 +11936,9 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
frag++;
vcpu->mmio_cur_fragment++;
} else {
+ if (WARN_ON_ONCE(frag->data == &frag->val))
+ return -EIO;
+
/* Go forward to the next mmio piece. */
frag->data += len;
frag->gpa += len;
@@ -11862,12 +11955,7 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
return complete_emulated_io(vcpu);
}
- run->exit_reason = KVM_EXIT_MMIO;
- run->mmio.phys_addr = frag->gpa;
- if (vcpu->mmio_is_write)
- memcpy(run->mmio.data, frag->data, min(8u, frag->len));
- run->mmio.len = min(8u, frag->len);
- run->mmio.is_write = vcpu->mmio_is_write;
+ kvm_prepare_emulated_mmio_exit(vcpu, frag);
vcpu->arch.complete_userspace_io = complete_emulated_mmio;
return 0;
}
@@ -11897,6 +11985,13 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
static int kvm_x86_vcpu_pre_run(struct kvm_vcpu *vcpu)
{
/*
+ * Userspace may have modified vCPU state, mark nested_run_pending as
+ * "untrusted" to avoid triggering false-positive WARNs.
+ */
+ if (vcpu->arch.nested_run_pending == KVM_NESTED_RUN_PENDING)
+ vcpu->arch.nested_run_pending = KVM_NESTED_RUN_PENDING_UNTRUSTED;
+
+ /*
* SIPI_RECEIVED is obsolete; KVM leaves the vCPU in Wait-For-SIPI and
* tracks the pending SIPI separately. SIPI_RECEIVED is still accepted
* by KVM_SET_VCPU_EVENTS for backwards compatibility, but should be
@@ -12136,6 +12231,8 @@ static void __get_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
if (vcpu->arch.guest_state_protected)
goto skip_protected_regs;
+ kvm_handle_exception_payload_quirk(vcpu);
+
kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@ -12529,7 +12626,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
r = -EBUSY;
- if (kvm_is_exception_pending(vcpu))
+ if (kvm_is_exception_pending(vcpu) || vcpu->arch.exception.injected)
goto out;
if (dbg->control & KVM_GUESTDBG_INJECT_DB)
kvm_queue_exception(vcpu, DB_VECTOR);
@@ -13073,12 +13170,12 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_deliver_sipi_vector);
void kvm_arch_enable_virtualization(void)
{
- cpu_emergency_register_virt_callback(kvm_x86_ops.emergency_disable_virtualization_cpu);
+ x86_virt_register_emergency_callback(kvm_x86_ops.emergency_disable_virtualization_cpu);
}
void kvm_arch_disable_virtualization(void)
{
- cpu_emergency_unregister_virt_callback(kvm_x86_ops.emergency_disable_virtualization_cpu);
+ x86_virt_unregister_emergency_callback(kvm_x86_ops.emergency_disable_virtualization_cpu);
}
int kvm_arch_enable_virtualization_cpu(void)
@@ -13177,6 +13274,25 @@ int kvm_arch_enable_virtualization_cpu(void)
return 0;
}
+void kvm_arch_shutdown(void)
+{
+ /*
+ * Set virt_rebooting to indicate that KVM has asynchronously disabled
+ * hardware virtualization, i.e. that errors and/or exceptions on SVM
+ * and VMX instructions are expected and should be ignored.
+ */
+ virt_rebooting = true;
+
+ /*
+ * Ensure virt_rebooting is visible before IPIs are sent to other CPUs
+ * to disable virtualization. Effectively pairs with the reception of
+ * the IPI (virt_rebooting is read in task/exception context, but only
+ * _needs_ to be read as %true after the IPI function callback disables
+ * virtualization).
+ */
+ smp_wmb();
+}
+
void kvm_arch_disable_virtualization_cpu(void)
{
kvm_x86_call(disable_virtualization_cpu)();
@@ -13191,7 +13307,7 @@ void kvm_arch_disable_virtualization_cpu(void)
* disable virtualization arrives. Handle the extreme edge case here
* instead of trying to account for it in the normal flows.
*/
- if (in_task() || WARN_ON_ONCE(!kvm_rebooting))
+ if (in_task() || WARN_ON_ONCE(!virt_rebooting))
drop_user_return_notifiers();
else
__module_get(THIS_MODULE);
@@ -14243,7 +14359,8 @@ static int complete_sev_es_emulated_mmio(struct kvm_vcpu *vcpu)
struct kvm_mmio_fragment *frag;
unsigned int len;
- BUG_ON(!vcpu->mmio_needed);
+ if (KVM_BUG_ON(!vcpu->mmio_needed, vcpu->kvm))
+ return -EIO;
/* Complete previous fragment */
frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
@@ -14265,73 +14382,32 @@ static int complete_sev_es_emulated_mmio(struct kvm_vcpu *vcpu)
if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
vcpu->mmio_needed = 0;
- // VMG change, at this point, we're always done
- // RIP has already been advanced
+ /*
+ * All done, as frag->data always points at the GHCB scratch
+ * area and VMGEXIT is trap-like (RIP is advanced by hardware).
+ */
return 1;
}
// More MMIO is needed
- run->mmio.phys_addr = frag->gpa;
- run->mmio.len = min(8u, frag->len);
- run->mmio.is_write = vcpu->mmio_is_write;
- if (run->mmio.is_write)
- memcpy(run->mmio.data, frag->data, min(8u, frag->len));
- run->exit_reason = KVM_EXIT_MMIO;
-
+ kvm_prepare_emulated_mmio_exit(vcpu, frag);
vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;
-
return 0;
}
-int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
- void *data)
+int kvm_sev_es_mmio(struct kvm_vcpu *vcpu, bool is_write, gpa_t gpa,
+ unsigned int bytes, void *data)
{
- int handled;
struct kvm_mmio_fragment *frag;
-
- if (!data)
- return -EINVAL;
-
- handled = write_emultor.read_write_mmio(vcpu, gpa, bytes, data);
- if (handled == bytes)
- return 1;
-
- bytes -= handled;
- gpa += handled;
- data += handled;
-
- /*TODO: Check if need to increment number of frags */
- frag = vcpu->mmio_fragments;
- vcpu->mmio_nr_fragments = 1;
- frag->len = bytes;
- frag->gpa = gpa;
- frag->data = data;
-
- vcpu->mmio_needed = 1;
- vcpu->mmio_cur_fragment = 0;
-
- vcpu->run->mmio.phys_addr = gpa;
- vcpu->run->mmio.len = min(8u, frag->len);
- vcpu->run->mmio.is_write = 1;
- memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
- vcpu->run->exit_reason = KVM_EXIT_MMIO;
-
- vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;
-
- return 0;
-}
-EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_sev_es_mmio_write);
-
-int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
- void *data)
-{
int handled;
- struct kvm_mmio_fragment *frag;
- if (!data)
+ if (!data || WARN_ON_ONCE(object_is_on_stack(data)))
return -EINVAL;
- handled = read_emultor.read_write_mmio(vcpu, gpa, bytes, data);
+ if (is_write)
+ handled = vcpu_mmio_write(vcpu, gpa, bytes, data);
+ else
+ handled = vcpu_mmio_read(vcpu, gpa, bytes, data);
if (handled == bytes)
return 1;
@@ -14339,26 +14415,25 @@ int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
gpa += handled;
data += handled;
- /*TODO: Check if need to increment number of frags */
+ /*
+ * TODO: Determine whether or not userspace plays nice with MMIO
+ * requests that split a page boundary.
+ */
frag = vcpu->mmio_fragments;
- vcpu->mmio_nr_fragments = 1;
frag->len = bytes;
frag->gpa = gpa;
frag->data = data;
vcpu->mmio_needed = 1;
vcpu->mmio_cur_fragment = 0;
+ vcpu->mmio_nr_fragments = 1;
+ vcpu->mmio_is_write = is_write;
- vcpu->run->mmio.phys_addr = gpa;
- vcpu->run->mmio.len = min(8u, frag->len);
- vcpu->run->mmio.is_write = 0;
- vcpu->run->exit_reason = KVM_EXIT_MMIO;
-
+ kvm_prepare_emulated_mmio_exit(vcpu, frag);
vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;
-
return 0;
}
-EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_sev_es_mmio_read);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_sev_es_mmio);
static void advance_sev_es_emulated_pio(struct kvm_vcpu *vcpu, unsigned count, int size)
{
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 94d4f07aaaa0..38a905fa86de 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -188,6 +188,16 @@ static inline bool kvm_can_set_cpuid_and_feature_msrs(struct kvm_vcpu *vcpu)
return vcpu->arch.last_vmentry_cpu == -1 && !is_guest_mode(vcpu);
}
+/*
+ * WARN if a nested VM-Enter is pending completion, and userspace hasn't gained
+ * control since the nested VM-Enter was initiated (in which case, userspace
+ * may have modified vCPU state to induce an architecturally invalid VM-Exit).
+ */
+static inline void kvm_warn_on_nested_run_pending(struct kvm_vcpu *vcpu)
+{
+ WARN_ON_ONCE(vcpu->arch.nested_run_pending == KVM_NESTED_RUN_PENDING);
+}
+
static inline void kvm_set_mp_state(struct kvm_vcpu *vcpu, int mp_state)
{
vcpu->arch.mp_state = mp_state;
@@ -712,14 +722,38 @@ static inline bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
__reserved_bits; \
})
-int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t src, unsigned int bytes,
- void *dst);
-int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t src, unsigned int bytes,
- void *dst);
+int kvm_sev_es_mmio(struct kvm_vcpu *vcpu, bool is_write, gpa_t gpa,
+ unsigned int bytes, void *data);
int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
unsigned int port, void *data, unsigned int count,
int in);
+static inline void __kvm_prepare_emulated_mmio_exit(struct kvm_vcpu *vcpu,
+ gpa_t gpa, unsigned int len,
+ const void *data,
+ bool is_write)
+{
+ struct kvm_run *run = vcpu->run;
+
+ KVM_BUG_ON(len > 8, vcpu->kvm);
+
+ run->mmio.len = len;
+ run->mmio.is_write = is_write;
+ run->exit_reason = KVM_EXIT_MMIO;
+ run->mmio.phys_addr = gpa;
+ if (is_write)
+ memcpy(run->mmio.data, data, len);
+}
+
+static inline void kvm_prepare_emulated_mmio_exit(struct kvm_vcpu *vcpu,
+ struct kvm_mmio_fragment *frag)
+{
+ WARN_ON_ONCE(!vcpu->mmio_needed || !vcpu->mmio_nr_fragments);
+
+ __kvm_prepare_emulated_mmio_exit(vcpu, frag->gpa, min(8u, frag->len),
+ frag->data, vcpu->mmio_is_write);
+}
+
static inline bool user_exit_on_hypercall(struct kvm *kvm, unsigned long hc_nr)
{
return kvm->arch.hypercall_exit_enabled & BIT(hc_nr);
diff --git a/arch/x86/lib/copy_user_uncached_64.S b/arch/x86/lib/copy_user_uncached_64.S
index 18350b343c2a..8ed0ce3ad227 100644
--- a/arch/x86/lib/copy_user_uncached_64.S
+++ b/arch/x86/lib/copy_user_uncached_64.S
@@ -27,7 +27,7 @@
* Output:
* rax uncopied bytes or 0 if successful.
*/
-SYM_FUNC_START(__copy_user_nocache)
+SYM_FUNC_START(copy_to_nontemporal)
ANNOTATE_NOENDBR
/* If destination is not 7-byte aligned, we'll have to align it */
testb $7,%dil
@@ -240,5 +240,5 @@ _ASM_EXTABLE_UA(95b, .Ldone)
_ASM_EXTABLE_UA(52b, .Ldone0)
_ASM_EXTABLE_UA(53b, .Ldone0)
-SYM_FUNC_END(__copy_user_nocache)
-EXPORT_SYMBOL(__copy_user_nocache)
+SYM_FUNC_END(copy_to_nontemporal)
+EXPORT_SYMBOL(copy_to_nontemporal)
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index f6f436f1d573..ac27e39fc993 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -322,10 +322,11 @@ unsigned long __copy_user_ll(void *to, const void *from, unsigned long n)
}
EXPORT_SYMBOL(__copy_user_ll);
-unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
+unsigned long copy_from_user_inatomic_nontemporal(void *to, const void __user *from,
unsigned long n)
{
- __uaccess_begin_nospec();
+ if (!user_access_begin(from, n))
+ return n;
#ifdef CONFIG_X86_INTEL_USERCOPY
if (n > 64 && static_cpu_has(X86_FEATURE_XMM2))
n = __copy_user_intel_nocache(to, from, n);
@@ -334,7 +335,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr
#else
__copy_user(to, from, n);
#endif
- __uaccess_end();
+ user_access_end();
return n;
}
-EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
+EXPORT_SYMBOL(copy_from_user_inatomic_nontemporal);
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 654280aaa3e9..c47d8cd0e243 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -43,17 +43,17 @@ void arch_wb_cache_pmem(void *addr, size_t size)
}
EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
-long __copy_user_flushcache(void *dst, const void __user *src, unsigned size)
+size_t copy_user_flushcache(void *dst, const void __user *src, size_t size)
{
unsigned long flushed, dest = (unsigned long) dst;
- long rc;
+ unsigned long rc;
- stac();
- rc = __copy_user_nocache(dst, src, size);
- clac();
+ src = masked_user_access_begin(src);
+ rc = copy_to_nontemporal(dst, (__force const void *)src, size);
+ user_access_end();
/*
- * __copy_user_nocache() uses non-temporal stores for the bulk
+ * copy_to_nontemporal() uses non-temporal stores for the bulk
* of the transfer, but we need to manually flush if the
* transfer is unaligned. A cached memory copy is used when
* destination or size is not naturally aligned. That is:
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 5b9908f13dcf..3a5364853eab 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -4,6 +4,8 @@ KCOV_INSTRUMENT_tlb.o := n
KCOV_INSTRUMENT_mem_encrypt.o := n
KCOV_INSTRUMENT_mem_encrypt_amd.o := n
KCOV_INSTRUMENT_pgprot.o := n
+# See the "Disable KCOV" comment in arch/x86/kernel/Makefile.
+KCOV_INSTRUMENT_physaddr.o := n
KASAN_SANITIZE_mem_encrypt.o := n
KASAN_SANITIZE_mem_encrypt_amd.o := n
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b83a06739b51..f0e77e084482 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1314,7 +1314,7 @@ void do_user_addr_fault(struct pt_regs *regs,
* to consider the PF_PK bit.
*/
if (is_vsyscall_vaddr(address)) {
- if (emulate_vsyscall(error_code, regs, address))
+ if (emulate_vsyscall_pf(error_code, regs, address))
return;
}
#endif
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 7a97327140df..99d0a9332c14 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -48,6 +48,8 @@ s16 __apicid_to_node[MAX_LOCAL_APIC] = {
[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
};
+nodemask_t numa_phys_nodes_parsed __initdata;
+
int numa_cpu_node(int cpu)
{
u32 apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
@@ -57,6 +59,11 @@ int numa_cpu_node(int cpu)
return NUMA_NO_NODE;
}
+int __init num_phys_nodes(void)
+{
+ return bitmap_weight(numa_phys_nodes_parsed.bits, MAX_NUMNODES);
+}
+
cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
EXPORT_SYMBOL(node_to_cpumask_map);
@@ -210,6 +217,7 @@ static int __init dummy_numa_init(void)
0LLU, PFN_PHYS(max_pfn) - 1);
node_set(0, numa_nodes_parsed);
+ node_set(0, numa_phys_nodes_parsed);
numa_add_memblk(0, 0, PFN_PHYS(max_pfn));
return 0;
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 40581a720fe8..cba907c39718 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -1119,9 +1119,10 @@ set:
static int
__split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
- struct page *base)
+ struct ptdesc *ptdesc)
{
unsigned long lpaddr, lpinc, ref_pfn, pfn, pfninc = 1;
+ struct page *base = ptdesc_page(ptdesc);
pte_t *pbase = (pte_t *)page_address(base);
unsigned int i, level;
pgprot_t ref_prot;
@@ -1226,18 +1227,18 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
unsigned long address)
{
- struct page *base;
+ struct ptdesc *ptdesc;
if (!debug_pagealloc_enabled())
spin_unlock(&cpa_lock);
- base = alloc_pages(GFP_KERNEL, 0);
+ ptdesc = pagetable_alloc(GFP_KERNEL, 0);
if (!debug_pagealloc_enabled())
spin_lock(&cpa_lock);
- if (!base)
+ if (!ptdesc)
return -ENOMEM;
- if (__split_large_page(cpa, kpte, address, base))
- __free_page(base);
+ if (__split_large_page(cpa, kpte, address, ptdesc))
+ pagetable_free(ptdesc);
return 0;
}
@@ -1408,7 +1409,7 @@ static bool try_to_free_pte_page(pte_t *pte)
if (!pte_none(pte[i]))
return false;
- free_page((unsigned long)pte);
+ pte_free_kernel(&init_mm, pte);
return true;
}
@@ -1420,7 +1421,7 @@ static bool try_to_free_pmd_page(pmd_t *pmd)
if (!pmd_none(pmd[i]))
return false;
- free_page((unsigned long)pmd);
+ pmd_free(&init_mm, pmd);
return true;
}
@@ -1539,7 +1540,7 @@ static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end)
static int alloc_pte_page(pmd_t *pmd)
{
- pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL);
+ pte_t *pte = pte_alloc_one_kernel(&init_mm);
if (!pte)
return -1;
@@ -1549,7 +1550,11 @@ static int alloc_pte_page(pmd_t *pmd)
static int alloc_pmd_page(pud_t *pud)
{
- pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
+ /*
+ * Pass 0 as a placeholder for the second argument, since the
+ * generic implementation of pmd_alloc_one() does not use it.
+ */
+ pmd_t *pmd = pmd_alloc_one(&init_mm, 0);
if (!pmd)
return -1;
@@ -1743,7 +1748,11 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
pgd_entry = cpa->pgd + pgd_index(addr);
if (pgd_none(*pgd_entry)) {
- p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL);
+ /*
+ * Pass 0 as a placeholder for the second argument, since the
+ * generic implementation of p4d_alloc_one() does not use it.
+ */
+ p4d = p4d_alloc_one(&init_mm, 0);
if (!p4d)
return -1;
@@ -1755,7 +1764,11 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
*/
p4d = p4d_offset(pgd_entry, addr);
if (p4d_none(*p4d)) {
- pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
+ /*
+ * Pass 0 as a placeholder for the second argument, since the
+ * generic implementation of pud_alloc_one() does not use it.
+ */
+ pud = pud_alloc_one(&init_mm, 0);
if (!pud)
return -1;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 2e5ecfdce73c..da7f0a03cf90 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -443,10 +443,10 @@ int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
}
#endif
-int ptep_test_and_clear_young(struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep)
+bool ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
{
- int ret = 0;
+ bool ret = false;
if (pte_young(*ptep))
ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
@@ -456,10 +456,10 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
}
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
-int pmdp_test_and_clear_young(struct vm_area_struct *vma,
- unsigned long addr, pmd_t *pmdp)
+bool pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp)
{
- int ret = 0;
+ bool ret = false;
if (pmd_young(*pmdp))
ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
@@ -470,10 +470,10 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma,
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-int pudp_test_and_clear_young(struct vm_area_struct *vma,
- unsigned long addr, pud_t *pudp)
+bool pudp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pudp)
{
- int ret = 0;
+ bool ret = false;
if (pud_young(*pudp))
ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
@@ -483,8 +483,8 @@ int pudp_test_and_clear_young(struct vm_area_struct *vma,
}
#endif
-int ptep_clear_flush_young(struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep)
+bool ptep_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep)
{
/*
* On x86 CPUs, clearing the accessed bit without a TLB flush
@@ -503,10 +503,10 @@ int ptep_clear_flush_young(struct vm_area_struct *vma,
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-int pmdp_clear_flush_young(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp)
+bool pmdp_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
{
- int young;
+ bool young;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index 7418c367e328..1dfcfaf77e23 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -42,8 +42,7 @@ int __execute_only_pkey(struct mm_struct *mm)
* Set up PKRU so that it denies access for everything
* other than execution.
*/
- ret = arch_set_user_pkey_access(current, execute_only_pkey,
- PKEY_DISABLE_ACCESS);
+ ret = arch_set_user_pkey_access(execute_only_pkey, PKEY_DISABLE_ACCESS);
/*
* If the PKRU-set operation failed somehow, just return
* 0 and effectively disable execute-only support.
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 6f8e0f21c710..44ca66651756 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -57,6 +57,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
}
set_apicid_to_node(apic_id, node);
node_set(node, numa_nodes_parsed);
+ node_set(node, numa_phys_nodes_parsed);
pr_debug("SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", pxm, apic_id, node);
}
@@ -97,6 +98,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
set_apicid_to_node(apic_id, node);
node_set(node, numa_nodes_parsed);
+ node_set(node, numa_phys_nodes_parsed);
pr_debug("SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", pxm, apic_id, node);
}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 621e09d049cb..af43d177087e 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -972,27 +972,6 @@ reload_tlb:
}
/*
- * Please ignore the name of this function. It should be called
- * switch_to_kernel_thread().
- *
- * enter_lazy_tlb() is a hint from the scheduler that we are entering a
- * kernel thread or other context without an mm. Acceptable implementations
- * include doing nothing whatsoever, switching to init_mm, or various clever
- * lazy tricks to try to minimize TLB flushes.
- *
- * The scheduler reserves the right to call enter_lazy_tlb() several times
- * in a row. It will notify us that we're going back to a real mm by
- * calling switch_mm_irqs_off().
- */
-void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
-{
- if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
- return;
-
- this_cpu_write(cpu_tlbstate_shared.is_lazy, true);
-}
-
-/*
* Using a temporary mm allows to set temporary mappings that are not accessible
* by other CPUs. Such mappings are needed to perform sensitive memory writes
* that override the kernel memory protections (e.g., W^X), without exposing the
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index c4ec39ad276b..e2de26b82940 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -153,7 +153,8 @@ skip_isa_ioresource_align(struct pci_dev *dev) {
*/
resource_size_t
pcibios_align_resource(void *data, const struct resource *res,
- resource_size_t size, resource_size_t align)
+ const struct resource *empty_res,
+ resource_size_t size, resource_size_t align)
{
struct pci_dev *dev = data;
resource_size_t start = res->start;
@@ -164,6 +165,8 @@ pcibios_align_resource(void *data, const struct resource *res,
if (start & 0x300)
start = (start + 0x3ff) & ~0x3ff;
} else if (res->flags & IORESOURCE_MEM) {
+ start = pci_align_resource(dev, res, empty_res, size, align);
+
/* The low 1MB range is reserved for ISA cards */
if (start < BIOS_END)
start = BIOS_END;
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 74032f3ab9b0..0c39adb96b91 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -836,7 +836,7 @@ static void __init __efi_enter_virtual_mode(void)
}
efi_check_for_embedded_firmwares();
- efi_free_boot_services();
+ efi_unmap_boot_services();
if (!efi_is_mixed())
efi_native_runtime_setup();
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index b4409df2105a..5861008eab22 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -55,6 +55,7 @@
*/
static u64 efi_va = EFI_VA_START;
static struct mm_struct *efi_prev_mm;
+static unsigned long efi_cr4_lass;
/*
* We need our own copy of the higher levels of the page tables
@@ -443,16 +444,50 @@ static void efi_leave_mm(void)
unuse_temporary_mm(efi_prev_mm);
}
+/*
+ * Toggle LASS to allow EFI to access any 1:1 mapped region in the lower
+ * half.
+ *
+ * Disable LASS only after switching to EFI-mm, as userspace is not
+ * mapped in it. Similar to EFI-mm, these rely on preemption being
+ * disabled and the calls being serialized.
+ */
+
+static void efi_disable_lass(void)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_LASS))
+ return;
+
+ lockdep_assert_preemption_disabled();
+
+ /* Save current CR4.LASS state */
+ efi_cr4_lass = cr4_read_shadow() & X86_CR4_LASS;
+ cr4_clear_bits(efi_cr4_lass);
+}
+
+static void efi_enable_lass(void)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_LASS))
+ return;
+
+ lockdep_assert_preemption_disabled();
+
+ /* Reprogram CR4.LASS only if it was set earlier */
+ cr4_set_bits(efi_cr4_lass);
+}
+
void arch_efi_call_virt_setup(void)
{
efi_sync_low_kernel_mappings();
efi_fpu_begin();
firmware_restrict_branch_speculation_start();
efi_enter_mm();
+ efi_disable_lass();
}
void arch_efi_call_virt_teardown(void)
{
+ efi_enable_lass();
efi_leave_mm();
firmware_restrict_branch_speculation_end();
efi_fpu_end();
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 553f330198f2..79f0818131e8 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -341,7 +341,7 @@ void __init efi_reserve_boot_services(void)
/*
* Because the following memblock_reserve() is paired
- * with memblock_free_late() for this region in
+ * with free_reserved_area() for this region in
* efi_free_boot_services(), we must be extremely
* careful not to reserve, and subsequently free,
* critical regions of memory (like the kernel image) or
@@ -404,17 +404,33 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md)
pr_err("Failed to unmap VA mapping for 0x%llx\n", va);
}
-void __init efi_free_boot_services(void)
+struct efi_freeable_range {
+ u64 start;
+ u64 end;
+};
+
+static struct efi_freeable_range *ranges_to_free;
+
+void __init efi_unmap_boot_services(void)
{
struct efi_memory_map_data data = { 0 };
efi_memory_desc_t *md;
int num_entries = 0;
+ int idx = 0;
+ size_t sz;
void *new, *new_md;
/* Keep all regions for /sys/kernel/debug/efi */
if (efi_enabled(EFI_DBG))
return;
+ sz = sizeof(*ranges_to_free) * (efi.memmap.nr_map + 1);
+ ranges_to_free = kzalloc(sz, GFP_KERNEL);
+ if (!ranges_to_free) {
+ pr_err("Failed to allocate storage for freeable EFI regions\n");
+ return;
+ }
+
for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
@@ -471,7 +487,15 @@ void __init efi_free_boot_services(void)
start = SZ_1M;
}
- memblock_free_late(start, size);
+ /*
+ * With CONFIG_DEFERRED_STRUCT_PAGE_INIT parts of the memory
+ * map are still not initialized and we can't reliably free
+ * memory here.
+ * Queue the ranges to free at a later point.
+ */
+ ranges_to_free[idx].start = start;
+ ranges_to_free[idx].end = start + size;
+ idx++;
}
if (!num_entries)
@@ -512,6 +536,31 @@ void __init efi_free_boot_services(void)
}
}
+static int __init efi_free_boot_services(void)
+{
+ struct efi_freeable_range *range = ranges_to_free;
+ unsigned long freed = 0;
+
+ if (!ranges_to_free)
+ return 0;
+
+ while (range->start) {
+ void *start = phys_to_virt(range->start);
+ void *end = phys_to_virt(range->end);
+
+ free_reserved_area(start, end, -1, NULL);
+ freed += (end - start);
+ range++;
+ }
+ kfree(ranges_to_free);
+
+ if (freed)
+ pr_info("Freeing EFI boot services memory: %ldK\n", freed / SZ_1K);
+
+ return 0;
+}
+arch_initcall(efi_free_boot_services);
+
/*
* A number of config table entries get remapped to virtual addresses
* after entering EFI virtual mode. However, the kexec kernel requires
diff --git a/arch/x86/platform/geode/geode-common.c b/arch/x86/platform/geode/geode-common.c
index 05189c5f7d2a..1843ae385e2d 100644
--- a/arch/x86/platform/geode/geode-common.c
+++ b/arch/x86/platform/geode/geode-common.c
@@ -28,8 +28,10 @@ static const struct software_node geode_gpio_keys_node = {
.properties = geode_gpio_keys_props,
};
-static struct property_entry geode_restart_key_props[] = {
- { /* Placeholder for GPIO property */ },
+static struct software_node_ref_args geode_restart_gpio_ref;
+
+static const struct property_entry geode_restart_key_props[] = {
+ PROPERTY_ENTRY_REF_ARRAY_LEN("gpios", &geode_restart_gpio_ref, 1),
PROPERTY_ENTRY_U32("linux,code", KEY_RESTART),
PROPERTY_ENTRY_STRING("label", "Reset button"),
PROPERTY_ENTRY_U32("debounce-interval", 100),
@@ -64,8 +66,7 @@ int __init geode_create_restart_key(unsigned int pin)
struct platform_device *pd;
int err;
- geode_restart_key_props[0] = PROPERTY_ENTRY_GPIO("gpios",
- &geode_gpiochip_node,
+ geode_restart_gpio_ref = SOFTWARE_NODE_REFERENCE(&geode_gpiochip_node,
pin, GPIO_ACTIVE_LOW);
err = software_node_register_node_group(geode_gpio_keys_swnodes);
@@ -99,6 +100,7 @@ int __init geode_create_leds(const char *label, const struct geode_led *leds,
const struct software_node *group[MAX_LEDS + 2] = { 0 };
struct software_node *swnodes;
struct property_entry *props;
+ struct software_node_ref_args *gpio_refs;
struct platform_device_info led_info = {
.name = "leds-gpio",
.id = PLATFORM_DEVID_NONE,
@@ -127,6 +129,12 @@ int __init geode_create_leds(const char *label, const struct geode_led *leds,
goto err_free_swnodes;
}
+ gpio_refs = kzalloc_objs(*gpio_refs, n_leds);
+ if (!gpio_refs) {
+ err = -ENOMEM;
+ goto err_free_props;
+ }
+
group[0] = &geode_gpio_leds_node;
for (i = 0; i < n_leds; i++) {
node_name = kasprintf(GFP_KERNEL, "%s:%d", label, i);
@@ -135,9 +143,11 @@ int __init geode_create_leds(const char *label, const struct geode_led *leds,
goto err_free_names;
}
+ gpio_refs[i] = SOFTWARE_NODE_REFERENCE(&geode_gpiochip_node,
+ leds[i].pin,
+ GPIO_ACTIVE_LOW);
props[i * 3 + 0] =
- PROPERTY_ENTRY_GPIO("gpios", &geode_gpiochip_node,
- leds[i].pin, GPIO_ACTIVE_LOW);
+ PROPERTY_ENTRY_REF_ARRAY_LEN("gpios", &gpio_refs[i], 1);
props[i * 3 + 1] =
PROPERTY_ENTRY_STRING("linux,default-trigger",
leds[i].default_on ?
@@ -171,6 +181,8 @@ err_unregister_group:
err_free_names:
while (--i >= 0)
kfree(swnodes[i].name);
+ kfree(gpio_refs);
+err_free_props:
kfree(props);
err_free_swnodes:
kfree(swnodes);
diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c
index 2263885d16ba..f2053cbe9b0c 100644
--- a/arch/x86/platform/pvh/enlighten.c
+++ b/arch/x86/platform/pvh/enlighten.c
@@ -25,11 +25,6 @@ struct hvm_start_info __initdata pvh_start_info;
const unsigned int __initconst pvh_start_info_sz = sizeof(pvh_start_info);
-static u64 __init pvh_get_root_pointer(void)
-{
- return pvh_start_info.rsdp_paddr;
-}
-
/*
* Xen guests are able to obtain the memory map from the hypervisor via the
* HYPERVISOR_memory_op hypercall.
@@ -95,7 +90,7 @@ static void __init init_pvh_bootparams(bool xen_guest)
pvh_bootparams.hdr.version = (2 << 8) | 12;
pvh_bootparams.hdr.type_of_loader = ((xen_guest ? 0x9 : 0xb) << 4) | 0;
- x86_init.acpi.get_root_pointer = pvh_get_root_pointer;
+ pvh_bootparams.acpi_rsdp_addr = pvh_start_info.rsdp_paddr;
}
/*
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index a595953f1d6d..e72d26acae79 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -14,8 +14,6 @@
#include <linux/kdebug.h>
#include <linux/pgtable.h>
-#include <crypto/hash.h>
-
#include <asm/e820/api.h>
#include <asm/init.h>
#include <asm/proto.h>
diff --git a/arch/x86/tools/vdso2c.c b/arch/x86/tools/vdso2c.c
index f84e8f8fa5fe..b8a555763f43 100644
--- a/arch/x86/tools/vdso2c.c
+++ b/arch/x86/tools/vdso2c.c
@@ -75,7 +75,6 @@ struct vdso_sym {
};
struct vdso_sym required_syms[] = {
- {"VDSO32_NOTE_MASK", true},
{"__kernel_vsyscall", true},
{"__kernel_sigreturn", true},
{"__kernel_rt_sigreturn", true},
diff --git a/arch/x86/um/asm/vm-flags.h b/arch/x86/um/asm/vm-flags.h
index df7a3896f5dd..622d36d6ddff 100644
--- a/arch/x86/um/asm/vm-flags.h
+++ b/arch/x86/um/asm/vm-flags.h
@@ -9,11 +9,11 @@
#ifdef CONFIG_X86_32
-#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC
+#define VMA_DATA_DEFAULT_FLAGS VMA_DATA_FLAGS_TSK_EXEC
#else
-#define VM_STACK_DEFAULT_FLAGS (VM_GROWSDOWN | VM_DATA_FLAGS_EXEC)
+#define VMA_STACK_DEFAULT_FLAGS append_vma_flags(VMA_DATA_FLAGS_EXEC, VMA_GROWSDOWN_BIT)
#endif
#endif
diff --git a/arch/x86/virt/Makefile b/arch/x86/virt/Makefile
index ea343fc392dc..6e485751650c 100644
--- a/arch/x86/virt/Makefile
+++ b/arch/x86/virt/Makefile
@@ -1,2 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-y += svm/ vmx/
+
+obj-$(subst m,y,$(CONFIG_KVM_X86)) += hw.o \ No newline at end of file
diff --git a/arch/x86/virt/hw.c b/arch/x86/virt/hw.c
new file mode 100644
index 000000000000..f647557d38ac
--- /dev/null
+++ b/arch/x86/virt/hw.c
@@ -0,0 +1,360 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/errno.h>
+#include <linux/kvm_types.h>
+#include <linux/list.h>
+#include <linux/percpu.h>
+
+#include <asm/perf_event.h>
+#include <asm/processor.h>
+#include <asm/virt.h>
+#include <asm/vmx.h>
+
+struct x86_virt_ops {
+ int feature;
+ int (*enable_virtualization_cpu)(void);
+ int (*disable_virtualization_cpu)(void);
+ void (*emergency_disable_virtualization_cpu)(void);
+};
+static struct x86_virt_ops virt_ops __ro_after_init;
+
+__visible bool virt_rebooting;
+EXPORT_SYMBOL_FOR_KVM(virt_rebooting);
+
+static DEFINE_PER_CPU(int, virtualization_nr_users);
+
+static cpu_emergency_virt_cb __rcu *kvm_emergency_callback;
+
+void x86_virt_register_emergency_callback(cpu_emergency_virt_cb *callback)
+{
+ if (WARN_ON_ONCE(rcu_access_pointer(kvm_emergency_callback)))
+ return;
+
+ rcu_assign_pointer(kvm_emergency_callback, callback);
+}
+EXPORT_SYMBOL_FOR_KVM(x86_virt_register_emergency_callback);
+
+void x86_virt_unregister_emergency_callback(cpu_emergency_virt_cb *callback)
+{
+ if (WARN_ON_ONCE(rcu_access_pointer(kvm_emergency_callback) != callback))
+ return;
+
+ rcu_assign_pointer(kvm_emergency_callback, NULL);
+ synchronize_rcu();
+}
+EXPORT_SYMBOL_FOR_KVM(x86_virt_unregister_emergency_callback);
+
+static void x86_virt_invoke_kvm_emergency_callback(void)
+{
+ cpu_emergency_virt_cb *kvm_callback;
+
+ kvm_callback = rcu_dereference(kvm_emergency_callback);
+ if (kvm_callback)
+ kvm_callback();
+}
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+static DEFINE_PER_CPU(struct vmcs *, root_vmcs);
+
+static int x86_virt_cpu_vmxon(void)
+{
+ u64 vmxon_pointer = __pa(per_cpu(root_vmcs, raw_smp_processor_id()));
+ u64 msr;
+
+ cr4_set_bits(X86_CR4_VMXE);
+
+ asm goto("1: vmxon %[vmxon_pointer]\n\t"
+ _ASM_EXTABLE(1b, %l[fault])
+ : : [vmxon_pointer] "m"(vmxon_pointer)
+ : : fault);
+ return 0;
+
+fault:
+ WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n",
+ rdmsrq_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr);
+ cr4_clear_bits(X86_CR4_VMXE);
+
+ return -EFAULT;
+}
+
+static int x86_vmx_enable_virtualization_cpu(void)
+{
+ int r;
+
+ if (cr4_read_shadow() & X86_CR4_VMXE)
+ return -EBUSY;
+
+ intel_pt_handle_vmx(1);
+
+ r = x86_virt_cpu_vmxon();
+ if (r) {
+ intel_pt_handle_vmx(0);
+ return r;
+ }
+
+ return 0;
+}
+
+/*
+ * Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
+ *
+ * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
+ * atomically track post-VMXON state, e.g. this may be called in NMI context.
+ * Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
+ * faults are guaranteed to be due to the !post-VMXON check unless the CPU is
+ * magically in RM, VM86, compat mode, or at CPL>0.
+ */
+static int x86_vmx_disable_virtualization_cpu(void)
+{
+ int r = -EIO;
+
+ asm goto("1: vmxoff\n\t"
+ _ASM_EXTABLE(1b, %l[fault])
+ ::: "cc", "memory" : fault);
+ r = 0;
+
+fault:
+ cr4_clear_bits(X86_CR4_VMXE);
+ intel_pt_handle_vmx(0);
+ return r;
+}
+
+static void x86_vmx_emergency_disable_virtualization_cpu(void)
+{
+ virt_rebooting = true;
+
+ /*
+ * Note, CR4.VMXE can be _cleared_ in NMI context, but it can only be
+ * set in task context. If this races with _another_ emergency call
+ * from NMI context, VMCLEAR (in KVM) and VMXOFF may #UD, but KVM and
+ * the kernel will eat those faults due to virt_rebooting being set by
+ * the interrupting NMI callback.
+ */
+ if (!(__read_cr4() & X86_CR4_VMXE))
+ return;
+
+ x86_virt_invoke_kvm_emergency_callback();
+
+ x86_vmx_disable_virtualization_cpu();
+}
+
+static __init void x86_vmx_exit(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ free_page((unsigned long)per_cpu(root_vmcs, cpu));
+ per_cpu(root_vmcs, cpu) = NULL;
+ }
+}
+
+static __init int __x86_vmx_init(void)
+{
+ const struct x86_virt_ops vmx_ops = {
+ .feature = X86_FEATURE_VMX,
+ .enable_virtualization_cpu = x86_vmx_enable_virtualization_cpu,
+ .disable_virtualization_cpu = x86_vmx_disable_virtualization_cpu,
+ .emergency_disable_virtualization_cpu = x86_vmx_emergency_disable_virtualization_cpu,
+ };
+
+ u64 basic_msr;
+ u32 rev_id;
+ int cpu;
+
+ if (!cpu_feature_enabled(X86_FEATURE_VMX))
+ return -EOPNOTSUPP;
+
+ rdmsrq(MSR_IA32_VMX_BASIC, basic_msr);
+
+ /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
+ if (WARN_ON_ONCE(vmx_basic_vmcs_size(basic_msr) > PAGE_SIZE))
+ return -EIO;
+
+ /*
+ * Even if eVMCS is enabled (or will be enabled?), and even though not
+ * explicitly documented by TLFS, the root VMCS passed to VMXON should
+ * still be marked with the revision_id reported by the physical CPU.
+ */
+ rev_id = vmx_basic_vmcs_revision_id(basic_msr);
+
+ for_each_possible_cpu(cpu) {
+ int node = cpu_to_node(cpu);
+ struct page *page;
+ struct vmcs *vmcs;
+
+ page = __alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+ if (WARN_ON_ONCE(!page)) {
+ x86_vmx_exit();
+ return -ENOMEM;
+ }
+
+ vmcs = page_address(page);
+ vmcs->hdr.revision_id = rev_id;
+ per_cpu(root_vmcs, cpu) = vmcs;
+ }
+
+ memcpy(&virt_ops, &vmx_ops, sizeof(virt_ops));
+ return 0;
+}
+
+static __init int x86_vmx_init(void)
+{
+ int r;
+
+ r = __x86_vmx_init();
+ if (r)
+ setup_clear_cpu_cap(X86_FEATURE_VMX);
+ return r;
+}
+#else
+static __init int x86_vmx_init(void) { return -EOPNOTSUPP; }
+static __init void x86_vmx_exit(void) { }
+#endif
+
+#if IS_ENABLED(CONFIG_KVM_AMD)
+static int x86_svm_enable_virtualization_cpu(void)
+{
+ u64 efer;
+
+ rdmsrq(MSR_EFER, efer);
+ if (efer & EFER_SVME)
+ return -EBUSY;
+
+ wrmsrq(MSR_EFER, efer | EFER_SVME);
+ return 0;
+}
+
+static int x86_svm_disable_virtualization_cpu(void)
+{
+ int r = -EIO;
+ u64 efer;
+
+ /*
+ * Force GIF=1 prior to disabling SVM, e.g. to ensure INIT and
+ * NMI aren't blocked.
+ */
+ asm goto("1: stgi\n\t"
+ _ASM_EXTABLE(1b, %l[fault])
+ ::: "memory" : fault);
+ r = 0;
+
+fault:
+ rdmsrq(MSR_EFER, efer);
+ wrmsrq(MSR_EFER, efer & ~EFER_SVME);
+ return r;
+}
+
+static void x86_svm_emergency_disable_virtualization_cpu(void)
+{
+ u64 efer;
+
+ virt_rebooting = true;
+
+ rdmsrq(MSR_EFER, efer);
+ if (!(efer & EFER_SVME))
+ return;
+
+ x86_virt_invoke_kvm_emergency_callback();
+
+ x86_svm_disable_virtualization_cpu();
+}
+
+static __init int x86_svm_init(void)
+{
+ const struct x86_virt_ops svm_ops = {
+ .feature = X86_FEATURE_SVM,
+ .enable_virtualization_cpu = x86_svm_enable_virtualization_cpu,
+ .disable_virtualization_cpu = x86_svm_disable_virtualization_cpu,
+ .emergency_disable_virtualization_cpu = x86_svm_emergency_disable_virtualization_cpu,
+ };
+
+ if (!cpu_feature_enabled(X86_FEATURE_SVM) ||
+ cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+ return -EOPNOTSUPP;
+
+ memcpy(&virt_ops, &svm_ops, sizeof(virt_ops));
+ return 0;
+}
+#else
+static __init int x86_svm_init(void) { return -EOPNOTSUPP; }
+#endif
+
+int x86_virt_get_ref(int feat)
+{
+ int r;
+
+ /* Ensure the !feature check can't get false positives. */
+ BUILD_BUG_ON(!X86_FEATURE_SVM || !X86_FEATURE_VMX);
+
+ if (!virt_ops.feature || virt_ops.feature != feat)
+ return -EOPNOTSUPP;
+
+ guard(preempt)();
+
+ if (this_cpu_inc_return(virtualization_nr_users) > 1)
+ return 0;
+
+ r = virt_ops.enable_virtualization_cpu();
+ if (r)
+ WARN_ON_ONCE(this_cpu_dec_return(virtualization_nr_users));
+
+ return r;
+}
+EXPORT_SYMBOL_FOR_KVM(x86_virt_get_ref);
+
+void x86_virt_put_ref(int feat)
+{
+ guard(preempt)();
+
+ if (WARN_ON_ONCE(!this_cpu_read(virtualization_nr_users)) ||
+ this_cpu_dec_return(virtualization_nr_users))
+ return;
+
+ BUG_ON(virt_ops.disable_virtualization_cpu() && !virt_rebooting);
+}
+EXPORT_SYMBOL_FOR_KVM(x86_virt_put_ref);
+
+/*
+ * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
+ * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
+ * GIF=0, i.e. if the crash occurred between CLGI and STGI.
+ */
+int x86_virt_emergency_disable_virtualization_cpu(void)
+{
+ if (!virt_ops.feature)
+ return -EOPNOTSUPP;
+
+ /*
+ * IRQs must be disabled as virtualization is enabled in hardware via
+ * function call IPIs, i.e. IRQs need to be disabled to guarantee
+ * virtualization stays disabled.
+ */
+ lockdep_assert_irqs_disabled();
+
+ /*
+ * Do the NMI shootdown even if virtualization is off on _this_ CPU, as
+ * other CPUs may have virtualization enabled.
+ *
+ * TODO: Track whether or not virtualization might be enabled on other
+ * CPUs? May not be worth avoiding the NMI shootdown...
+ */
+ virt_ops.emergency_disable_virtualization_cpu();
+ return 0;
+}
+
+void __init x86_virt_init(void)
+{
+ /*
+ * Attempt to initialize both SVM and VMX, and simply use whichever one
+ * is present. Rsefuse to enable/use SVM or VMX if both are somehow
+ * supported. No known CPU supports both SVM and VMX.
+ */
+ bool has_vmx = !x86_vmx_init();
+ bool has_svm = !x86_svm_init();
+
+ if (WARN_ON_ONCE(has_vmx && has_svm)) {
+ x86_vmx_exit();
+ memset(&virt_ops, 0, sizeof(virt_ops));
+ }
+}
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index a4f3a364fb65..41f76f15caa1 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -117,6 +117,8 @@ static u64 rmp_segment_mask;
static u64 rmp_cfg;
+static void *rmp_bookkeeping __ro_after_init;
+
/* Mask to apply to a PFN to get the first PFN of a 2MB page */
#define PFN_PMD_MASK GENMASK_ULL(63, PMD_SHIFT - PAGE_SHIFT)
@@ -130,33 +132,23 @@ static unsigned long snp_nr_leaked_pages;
#undef pr_fmt
#define pr_fmt(fmt) "SEV-SNP: " fmt
-static int __mfd_enable(unsigned int cpu)
+static void mfd_reconfigure(void *arg)
{
- u64 val;
-
if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
- return 0;
-
- rdmsrq(MSR_AMD64_SYSCFG, val);
-
- val |= MSR_AMD64_SYSCFG_MFDM;
-
- wrmsrq(MSR_AMD64_SYSCFG, val);
-
- return 0;
-}
+ return;
-static __init void mfd_enable(void *arg)
-{
- __mfd_enable(smp_processor_id());
+ if (arg)
+ msr_set_bit(MSR_AMD64_SYSCFG, MSR_AMD64_SYSCFG_MFDM_BIT);
+ else
+ msr_clear_bit(MSR_AMD64_SYSCFG, MSR_AMD64_SYSCFG_MFDM_BIT);
}
-static int __snp_enable(unsigned int cpu)
+static void snp_enable(void *arg)
{
u64 val;
if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
- return 0;
+ return;
rdmsrq(MSR_AMD64_SYSCFG, val);
@@ -164,13 +156,6 @@ static int __snp_enable(unsigned int cpu)
val |= MSR_AMD64_SYSCFG_SNP_VMPL_EN;
wrmsrq(MSR_AMD64_SYSCFG, val);
-
- return 0;
-}
-
-static __init void snp_enable(void *arg)
-{
- __snp_enable(smp_processor_id());
}
static void __init __snp_fixup_e820_tables(u64 pa)
@@ -260,21 +245,30 @@ void __init snp_fixup_e820_tables(void)
}
}
-static bool __init clear_rmptable_bookkeeping(void)
+static void clear_rmp(void)
{
- void *bk;
+ unsigned int i;
+ u64 val;
- bk = memremap(probed_rmp_base, RMPTABLE_CPU_BOOKKEEPING_SZ, MEMREMAP_WB);
- if (!bk) {
- pr_err("Failed to map RMP bookkeeping area\n");
- return false;
- }
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
+ return;
- memset(bk, 0, RMPTABLE_CPU_BOOKKEEPING_SZ);
+ /* Clearing the RMP while SNP is enabled will cause an exception */
+ rdmsrq(MSR_AMD64_SYSCFG, val);
+ if (WARN_ON_ONCE(val & MSR_AMD64_SYSCFG_SNP_EN))
+ return;
- memunmap(bk);
+ memset(rmp_bookkeeping, 0, RMPTABLE_CPU_BOOKKEEPING_SZ);
- return true;
+ for (i = 0; i < rst_max_index; i++) {
+ struct rmp_segment_desc *desc;
+
+ desc = rmp_segment_table[i];
+ if (!desc)
+ continue;
+
+ memset(desc->rmp_entry, 0, desc->size);
+ }
}
static bool __init alloc_rmp_segment_desc(u64 segment_pa, u64 segment_size, u64 pa)
@@ -494,30 +488,32 @@ e_free:
static bool __init setup_rmptable(void)
{
if (rmp_cfg & MSR_AMD64_SEG_RMP_ENABLED) {
- return setup_segmented_rmptable();
+ if (!setup_segmented_rmptable())
+ return false;
} else {
- return setup_contiguous_rmptable();
+ if (!setup_contiguous_rmptable())
+ return false;
}
-}
-/*
- * Do the necessary preparations which are verified by the firmware as
- * described in the SNP_INIT_EX firmware command description in the SNP
- * firmware ABI spec.
- */
-int __init snp_rmptable_init(void)
-{
- unsigned int i;
- u64 val;
+ rmp_bookkeeping = memremap(probed_rmp_base, RMPTABLE_CPU_BOOKKEEPING_SZ, MEMREMAP_WB);
+ if (!rmp_bookkeeping) {
+ pr_err("Failed to map RMP bookkeeping area\n");
+ free_rmp_segment_table();
- if (WARN_ON_ONCE(!cc_platform_has(CC_ATTR_HOST_SEV_SNP)))
- return -ENOSYS;
+ return false;
+ }
- if (WARN_ON_ONCE(!amd_iommu_snp_en))
- return -ENOSYS;
+ return true;
+}
- if (!setup_rmptable())
- return -ENOSYS;
+static void clear_hsave_pa(void *arg)
+{
+ wrmsrq(MSR_VM_HSAVE_PA, 0);
+}
+
+void snp_prepare(void)
+{
+ u64 val;
/*
* Check if SEV-SNP is already enabled, this can happen in case of
@@ -525,35 +521,54 @@ int __init snp_rmptable_init(void)
*/
rdmsrq(MSR_AMD64_SYSCFG, val);
if (val & MSR_AMD64_SYSCFG_SNP_EN)
- goto skip_enable;
+ return;
- /* Zero out the RMP bookkeeping area */
- if (!clear_rmptable_bookkeeping()) {
- free_rmp_segment_table();
- return -ENOSYS;
- }
+ clear_rmp();
- /* Zero out the RMP entries */
- for (i = 0; i < rst_max_index; i++) {
- struct rmp_segment_desc *desc;
+ cpus_read_lock();
- desc = rmp_segment_table[i];
- if (!desc)
- continue;
+ /*
+ * MtrrFixDramModEn is not shared between threads on a core,
+ * therefore it must be set on all CPUs prior to enabling SNP.
+ */
+ on_each_cpu(mfd_reconfigure, (void *)1, 1);
+ on_each_cpu(snp_enable, NULL, 1);
- memset(desc->rmp_entry, 0, desc->size);
- }
+ /* SNP_INIT requires MSR_VM_HSAVE_PA to be cleared on all CPUs. */
+ on_each_cpu(clear_hsave_pa, NULL, 1);
- /* Flush the caches to ensure that data is written before SNP is enabled. */
- wbinvd_on_all_cpus();
+ cpus_read_unlock();
+}
+EXPORT_SYMBOL_FOR_MODULES(snp_prepare, "ccp");
- /* MtrrFixDramModEn must be enabled on all the CPUs prior to enabling SNP. */
- on_each_cpu(mfd_enable, NULL, 1);
+void snp_shutdown(void)
+{
+ u64 syscfg;
- on_each_cpu(snp_enable, NULL, 1);
+ rdmsrq(MSR_AMD64_SYSCFG, syscfg);
+ if (syscfg & MSR_AMD64_SYSCFG_SNP_EN)
+ return;
+
+ clear_rmp();
+ on_each_cpu(mfd_reconfigure, NULL, 1);
+}
+EXPORT_SYMBOL_FOR_MODULES(snp_shutdown, "ccp");
-skip_enable:
- cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL);
+/*
+ * Do the necessary preparations which are verified by the firmware as
+ * described in the SNP_INIT_EX firmware command description in the SNP
+ * firmware ABI spec.
+ */
+int __init snp_rmptable_init(void)
+{
+ if (WARN_ON_ONCE(!cc_platform_has(CC_ATTR_HOST_SEV_SNP)))
+ return -ENOSYS;
+
+ if (WARN_ON_ONCE(!amd_iommu_snp_en))
+ return -ENOSYS;
+
+ if (!setup_rmptable())
+ return -ENOSYS;
/*
* Setting crash_kexec_post_notifiers to 'true' to ensure that SNP panic
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index 8b8e165a2001..cb9b3210ab71 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -28,6 +28,7 @@
#include <linux/log2.h>
#include <linux/acpi.h>
#include <linux/suspend.h>
+#include <linux/syscore_ops.h>
#include <linux/idr.h>
#include <linux/kvm_types.h>
#include <asm/page.h>
@@ -39,6 +40,7 @@
#include <asm/cpu_device_id.h>
#include <asm/processor.h>
#include <asm/mce.h>
+#include <asm/virt.h>
#include "tdx.h"
static u32 tdx_global_keyid __ro_after_init;
@@ -51,13 +53,11 @@ static DEFINE_PER_CPU(bool, tdx_lp_initialized);
static struct tdmr_info_list tdx_tdmr_list;
-static enum tdx_module_status_t tdx_module_status;
-static DEFINE_MUTEX(tdx_module_lock);
-
/* All TDX-usable memory regions. Protected by mem_hotplug_lock. */
static LIST_HEAD(tdx_memlist);
-static struct tdx_sys_info tdx_sysinfo;
+static struct tdx_sys_info tdx_sysinfo __ro_after_init;
+static bool tdx_module_initialized __ro_after_init;
typedef void (*sc_err_func_t)(u64 fn, u64 err, struct tdx_module_args *args);
@@ -106,8 +106,7 @@ static __always_inline int sc_retry_prerr(sc_func_t func,
/*
* Do the module global initialization once and return its result.
- * It can be done on any cpu. It's always called with interrupts
- * disabled.
+ * It can be done on any cpu, and from task or IRQ context.
*/
static int try_init_module_global(void)
{
@@ -116,8 +115,6 @@ static int try_init_module_global(void)
static bool sysinit_done;
static int sysinit_ret;
- lockdep_assert_irqs_disabled();
-
raw_spin_lock(&sysinit_lock);
if (sysinit_done)
@@ -142,26 +139,15 @@ out:
}
/**
- * tdx_cpu_enable - Enable TDX on local cpu
- *
- * Do one-time TDX module per-cpu initialization SEAMCALL (and TDX module
- * global initialization SEAMCALL if not done) on local cpu to make this
- * cpu be ready to run any other SEAMCALLs.
- *
- * Always call this function via IPI function calls.
- *
- * Return 0 on success, otherwise errors.
+ * Enable VMXON and then do one-time TDX module per-cpu initialization SEAMCALL
+ * (and TDX module global initialization SEAMCALL if not done) on local cpu to
+ * make this cpu be ready to run any other SEAMCALLs.
*/
-int tdx_cpu_enable(void)
+static int tdx_cpu_enable(void)
{
struct tdx_module_args args = {};
int ret;
- if (!boot_cpu_has(X86_FEATURE_TDX_HOST_PLATFORM))
- return -ENODEV;
-
- lockdep_assert_irqs_disabled();
-
if (__this_cpu_read(tdx_lp_initialized))
return 0;
@@ -182,15 +168,101 @@ int tdx_cpu_enable(void)
return 0;
}
-EXPORT_SYMBOL_FOR_KVM(tdx_cpu_enable);
+
+static int tdx_online_cpu(unsigned int cpu)
+{
+ int ret;
+
+ ret = x86_virt_get_ref(X86_FEATURE_VMX);
+ if (ret)
+ return ret;
+
+ ret = tdx_cpu_enable();
+ if (ret)
+ x86_virt_put_ref(X86_FEATURE_VMX);
+
+ return ret;
+}
+
+static int tdx_offline_cpu(unsigned int cpu)
+{
+ int i;
+
+ /* No TD is running. Allow any cpu to be offline. */
+ if (ida_is_empty(&tdx_guest_keyid_pool))
+ goto done;
+
+ /*
+ * In order to reclaim TDX HKID, (i.e. when deleting guest TD), need to
+ * call TDH.PHYMEM.PAGE.WBINVD on all packages to program all memory
+ * controller with pconfig. If we have active TDX HKID, refuse to
+ * offline the last online cpu.
+ */
+ for_each_online_cpu(i) {
+ /*
+ * Found another online cpu on the same package.
+ * Allow to offline.
+ */
+ if (i != cpu && topology_physical_package_id(i) ==
+ topology_physical_package_id(cpu))
+ goto done;
+ }
+
+ /*
+ * This is the last cpu of this package. Don't offline it.
+ *
+ * Because it's hard for human operator to understand the
+ * reason, warn it.
+ */
+#define MSG_ALLPKG_ONLINE \
+ "TDX requires all packages to have an online CPU. Delete all TDs in order to offline all CPUs of a package.\n"
+ pr_warn_ratelimited(MSG_ALLPKG_ONLINE);
+ return -EBUSY;
+
+done:
+ x86_virt_put_ref(X86_FEATURE_VMX);
+ return 0;
+}
+
+static void tdx_shutdown_cpu(void *ign)
+{
+ x86_virt_put_ref(X86_FEATURE_VMX);
+}
+
+static void tdx_shutdown(void *ign)
+{
+ on_each_cpu(tdx_shutdown_cpu, NULL, 1);
+}
+
+static int tdx_suspend(void *ign)
+{
+ x86_virt_put_ref(X86_FEATURE_VMX);
+ return 0;
+}
+
+static void tdx_resume(void *ign)
+{
+ WARN_ON_ONCE(x86_virt_get_ref(X86_FEATURE_VMX));
+}
+
+static const struct syscore_ops tdx_syscore_ops = {
+ .suspend = tdx_suspend,
+ .resume = tdx_resume,
+ .shutdown = tdx_shutdown,
+};
+
+static struct syscore tdx_syscore = {
+ .ops = &tdx_syscore_ops,
+};
/*
* Add a memory region as a TDX memory block. The caller must make sure
* all memory regions are added in address ascending order and don't
* overlap.
*/
-static int add_tdx_memblock(struct list_head *tmb_list, unsigned long start_pfn,
- unsigned long end_pfn, int nid)
+static __init int add_tdx_memblock(struct list_head *tmb_list,
+ unsigned long start_pfn,
+ unsigned long end_pfn, int nid)
{
struct tdx_memblock *tmb;
@@ -208,7 +280,7 @@ static int add_tdx_memblock(struct list_head *tmb_list, unsigned long start_pfn,
return 0;
}
-static void free_tdx_memlist(struct list_head *tmb_list)
+static __init void free_tdx_memlist(struct list_head *tmb_list)
{
/* @tmb_list is protected by mem_hotplug_lock */
while (!list_empty(tmb_list)) {
@@ -226,7 +298,7 @@ static void free_tdx_memlist(struct list_head *tmb_list)
* ranges off in a secondary structure because memblock is modified
* in memory hotplug while TDX memory regions are fixed.
*/
-static int build_tdx_memlist(struct list_head *tmb_list)
+static __init int build_tdx_memlist(struct list_head *tmb_list)
{
unsigned long start_pfn, end_pfn;
int i, nid, ret;
@@ -258,7 +330,7 @@ err:
return ret;
}
-static int read_sys_metadata_field(u64 field_id, u64 *data)
+static __init int read_sys_metadata_field(u64 field_id, u64 *data)
{
struct tdx_module_args args = {};
int ret;
@@ -280,7 +352,7 @@ static int read_sys_metadata_field(u64 field_id, u64 *data)
#include "tdx_global_metadata.c"
-static int check_features(struct tdx_sys_info *sysinfo)
+static __init int check_features(struct tdx_sys_info *sysinfo)
{
u64 tdx_features0 = sysinfo->features.tdx_features0;
@@ -293,7 +365,7 @@ static int check_features(struct tdx_sys_info *sysinfo)
}
/* Calculate the actual TDMR size */
-static int tdmr_size_single(u16 max_reserved_per_tdmr)
+static __init int tdmr_size_single(u16 max_reserved_per_tdmr)
{
int tdmr_sz;
@@ -307,8 +379,8 @@ static int tdmr_size_single(u16 max_reserved_per_tdmr)
return ALIGN(tdmr_sz, TDMR_INFO_ALIGNMENT);
}
-static int alloc_tdmr_list(struct tdmr_info_list *tdmr_list,
- struct tdx_sys_info_tdmr *sysinfo_tdmr)
+static __init int alloc_tdmr_list(struct tdmr_info_list *tdmr_list,
+ struct tdx_sys_info_tdmr *sysinfo_tdmr)
{
size_t tdmr_sz, tdmr_array_sz;
void *tdmr_array;
@@ -339,7 +411,7 @@ static int alloc_tdmr_list(struct tdmr_info_list *tdmr_list,
return 0;
}
-static void free_tdmr_list(struct tdmr_info_list *tdmr_list)
+static __init void free_tdmr_list(struct tdmr_info_list *tdmr_list)
{
free_pages_exact(tdmr_list->tdmrs,
tdmr_list->max_tdmrs * tdmr_list->tdmr_sz);
@@ -368,8 +440,8 @@ static inline u64 tdmr_end(struct tdmr_info *tdmr)
* preallocated @tdmr_list, following all the special alignment
* and size rules for TDMR.
*/
-static int fill_out_tdmrs(struct list_head *tmb_list,
- struct tdmr_info_list *tdmr_list)
+static __init int fill_out_tdmrs(struct list_head *tmb_list,
+ struct tdmr_info_list *tdmr_list)
{
struct tdx_memblock *tmb;
int tdmr_idx = 0;
@@ -445,8 +517,8 @@ static int fill_out_tdmrs(struct list_head *tmb_list,
* Calculate PAMT size given a TDMR and a page size. The returned
* PAMT size is always aligned up to 4K page boundary.
*/
-static unsigned long tdmr_get_pamt_sz(struct tdmr_info *tdmr, int pgsz,
- u16 pamt_entry_size)
+static __init unsigned long tdmr_get_pamt_sz(struct tdmr_info *tdmr, int pgsz,
+ u16 pamt_entry_size)
{
unsigned long pamt_sz, nr_pamt_entries;
@@ -477,7 +549,7 @@ static unsigned long tdmr_get_pamt_sz(struct tdmr_info *tdmr, int pgsz,
* PAMT. This node will have some memory covered by the TDMR. The
* relative amount of memory covered is not considered.
*/
-static int tdmr_get_nid(struct tdmr_info *tdmr, struct list_head *tmb_list)
+static __init int tdmr_get_nid(struct tdmr_info *tdmr, struct list_head *tmb_list)
{
struct tdx_memblock *tmb;
@@ -506,9 +578,9 @@ static int tdmr_get_nid(struct tdmr_info *tdmr, struct list_head *tmb_list)
* Allocate PAMTs from the local NUMA node of some memory in @tmb_list
* within @tdmr, and set up PAMTs for @tdmr.
*/
-static int tdmr_set_up_pamt(struct tdmr_info *tdmr,
- struct list_head *tmb_list,
- u16 pamt_entry_size[])
+static __init int tdmr_set_up_pamt(struct tdmr_info *tdmr,
+ struct list_head *tmb_list,
+ u16 pamt_entry_size[])
{
unsigned long pamt_base[TDX_PS_NR];
unsigned long pamt_size[TDX_PS_NR];
@@ -578,7 +650,7 @@ static void tdmr_get_pamt(struct tdmr_info *tdmr, unsigned long *pamt_base,
*pamt_size = pamt_sz;
}
-static void tdmr_do_pamt_func(struct tdmr_info *tdmr,
+static __init void tdmr_do_pamt_func(struct tdmr_info *tdmr,
void (*pamt_func)(unsigned long base, unsigned long size))
{
unsigned long pamt_base, pamt_size;
@@ -595,17 +667,17 @@ static void tdmr_do_pamt_func(struct tdmr_info *tdmr,
pamt_func(pamt_base, pamt_size);
}
-static void free_pamt(unsigned long pamt_base, unsigned long pamt_size)
+static __init void free_pamt(unsigned long pamt_base, unsigned long pamt_size)
{
free_contig_range(pamt_base >> PAGE_SHIFT, pamt_size >> PAGE_SHIFT);
}
-static void tdmr_free_pamt(struct tdmr_info *tdmr)
+static __init void tdmr_free_pamt(struct tdmr_info *tdmr)
{
tdmr_do_pamt_func(tdmr, free_pamt);
}
-static void tdmrs_free_pamt_all(struct tdmr_info_list *tdmr_list)
+static __init void tdmrs_free_pamt_all(struct tdmr_info_list *tdmr_list)
{
int i;
@@ -614,9 +686,9 @@ static void tdmrs_free_pamt_all(struct tdmr_info_list *tdmr_list)
}
/* Allocate and set up PAMTs for all TDMRs */
-static int tdmrs_set_up_pamt_all(struct tdmr_info_list *tdmr_list,
- struct list_head *tmb_list,
- u16 pamt_entry_size[])
+static __init int tdmrs_set_up_pamt_all(struct tdmr_info_list *tdmr_list,
+ struct list_head *tmb_list,
+ u16 pamt_entry_size[])
{
int i, ret = 0;
@@ -665,12 +737,13 @@ void tdx_quirk_reset_page(struct page *page)
}
EXPORT_SYMBOL_FOR_KVM(tdx_quirk_reset_page);
-static void tdmr_quirk_reset_pamt(struct tdmr_info *tdmr)
+static __init void tdmr_quirk_reset_pamt(struct tdmr_info *tdmr)
+
{
tdmr_do_pamt_func(tdmr, tdx_quirk_reset_paddr);
}
-static void tdmrs_quirk_reset_pamt_all(struct tdmr_info_list *tdmr_list)
+static __init void tdmrs_quirk_reset_pamt_all(struct tdmr_info_list *tdmr_list)
{
int i;
@@ -678,7 +751,7 @@ static void tdmrs_quirk_reset_pamt_all(struct tdmr_info_list *tdmr_list)
tdmr_quirk_reset_pamt(tdmr_entry(tdmr_list, i));
}
-static unsigned long tdmrs_count_pamt_kb(struct tdmr_info_list *tdmr_list)
+static __init unsigned long tdmrs_count_pamt_kb(struct tdmr_info_list *tdmr_list)
{
unsigned long pamt_size = 0;
int i;
@@ -693,8 +766,8 @@ static unsigned long tdmrs_count_pamt_kb(struct tdmr_info_list *tdmr_list)
return pamt_size / 1024;
}
-static int tdmr_add_rsvd_area(struct tdmr_info *tdmr, int *p_idx, u64 addr,
- u64 size, u16 max_reserved_per_tdmr)
+static __init int tdmr_add_rsvd_area(struct tdmr_info *tdmr, int *p_idx,
+ u64 addr, u64 size, u16 max_reserved_per_tdmr)
{
struct tdmr_reserved_area *rsvd_areas = tdmr->reserved_areas;
int idx = *p_idx;
@@ -727,10 +800,10 @@ static int tdmr_add_rsvd_area(struct tdmr_info *tdmr, int *p_idx, u64 addr,
* those holes fall within @tdmr, set up a TDMR reserved area to cover
* the hole.
*/
-static int tdmr_populate_rsvd_holes(struct list_head *tmb_list,
- struct tdmr_info *tdmr,
- int *rsvd_idx,
- u16 max_reserved_per_tdmr)
+static __init int tdmr_populate_rsvd_holes(struct list_head *tmb_list,
+ struct tdmr_info *tdmr,
+ int *rsvd_idx,
+ u16 max_reserved_per_tdmr)
{
struct tdx_memblock *tmb;
u64 prev_end;
@@ -791,10 +864,10 @@ static int tdmr_populate_rsvd_holes(struct list_head *tmb_list,
* overlaps with @tdmr, set up a TDMR reserved area to cover the
* overlapping part.
*/
-static int tdmr_populate_rsvd_pamts(struct tdmr_info_list *tdmr_list,
- struct tdmr_info *tdmr,
- int *rsvd_idx,
- u16 max_reserved_per_tdmr)
+static __init int tdmr_populate_rsvd_pamts(struct tdmr_info_list *tdmr_list,
+ struct tdmr_info *tdmr,
+ int *rsvd_idx,
+ u16 max_reserved_per_tdmr)
{
int i, ret;
@@ -829,7 +902,7 @@ static int tdmr_populate_rsvd_pamts(struct tdmr_info_list *tdmr_list,
}
/* Compare function called by sort() for TDMR reserved areas */
-static int rsvd_area_cmp_func(const void *a, const void *b)
+static __init int rsvd_area_cmp_func(const void *a, const void *b)
{
struct tdmr_reserved_area *r1 = (struct tdmr_reserved_area *)a;
struct tdmr_reserved_area *r2 = (struct tdmr_reserved_area *)b;
@@ -848,10 +921,10 @@ static int rsvd_area_cmp_func(const void *a, const void *b)
* Populate reserved areas for the given @tdmr, including memory holes
* (via @tmb_list) and PAMTs (via @tdmr_list).
*/
-static int tdmr_populate_rsvd_areas(struct tdmr_info *tdmr,
- struct list_head *tmb_list,
- struct tdmr_info_list *tdmr_list,
- u16 max_reserved_per_tdmr)
+static __init int tdmr_populate_rsvd_areas(struct tdmr_info *tdmr,
+ struct list_head *tmb_list,
+ struct tdmr_info_list *tdmr_list,
+ u16 max_reserved_per_tdmr)
{
int ret, rsvd_idx = 0;
@@ -876,9 +949,9 @@ static int tdmr_populate_rsvd_areas(struct tdmr_info *tdmr,
* Populate reserved areas for all TDMRs in @tdmr_list, including memory
* holes (via @tmb_list) and PAMTs.
*/
-static int tdmrs_populate_rsvd_areas_all(struct tdmr_info_list *tdmr_list,
- struct list_head *tmb_list,
- u16 max_reserved_per_tdmr)
+static __init int tdmrs_populate_rsvd_areas_all(struct tdmr_info_list *tdmr_list,
+ struct list_head *tmb_list,
+ u16 max_reserved_per_tdmr)
{
int i;
@@ -899,9 +972,9 @@ static int tdmrs_populate_rsvd_areas_all(struct tdmr_info_list *tdmr_list,
* to cover all TDX memory regions in @tmb_list based on the TDX module
* TDMR global information in @sysinfo_tdmr.
*/
-static int construct_tdmrs(struct list_head *tmb_list,
- struct tdmr_info_list *tdmr_list,
- struct tdx_sys_info_tdmr *sysinfo_tdmr)
+static __init int construct_tdmrs(struct list_head *tmb_list,
+ struct tdmr_info_list *tdmr_list,
+ struct tdx_sys_info_tdmr *sysinfo_tdmr)
{
u16 pamt_entry_size[TDX_PS_NR] = {
sysinfo_tdmr->pamt_4k_entry_size,
@@ -933,7 +1006,8 @@ static int construct_tdmrs(struct list_head *tmb_list,
return ret;
}
-static int config_tdx_module(struct tdmr_info_list *tdmr_list, u64 global_keyid)
+static __init int config_tdx_module(struct tdmr_info_list *tdmr_list,
+ u64 global_keyid)
{
struct tdx_module_args args = {};
u64 *tdmr_pa_array;
@@ -968,7 +1042,7 @@ static int config_tdx_module(struct tdmr_info_list *tdmr_list, u64 global_keyid)
return ret;
}
-static int do_global_key_config(void *unused)
+static __init int do_global_key_config(void *unused)
{
struct tdx_module_args args = {};
@@ -986,7 +1060,7 @@ static int do_global_key_config(void *unused)
* KVM) can ensure success by ensuring sufficient CPUs are online and
* can run SEAMCALLs.
*/
-static int config_global_keyid(void)
+static __init int config_global_keyid(void)
{
cpumask_var_t packages;
int cpu, ret = -EINVAL;
@@ -1026,7 +1100,7 @@ static int config_global_keyid(void)
return ret;
}
-static int init_tdmr(struct tdmr_info *tdmr)
+static __init int init_tdmr(struct tdmr_info *tdmr)
{
u64 next;
@@ -1057,7 +1131,7 @@ static int init_tdmr(struct tdmr_info *tdmr)
return 0;
}
-static int init_tdmrs(struct tdmr_info_list *tdmr_list)
+static __init int init_tdmrs(struct tdmr_info_list *tdmr_list)
{
int i;
@@ -1076,7 +1150,7 @@ static int init_tdmrs(struct tdmr_info_list *tdmr_list)
return 0;
}
-static int init_tdx_module(void)
+static __init int init_tdx_module(void)
{
int ret;
@@ -1157,67 +1231,50 @@ err_free_tdxmem:
goto out_put_tdxmem;
}
-static int __tdx_enable(void)
+static __init int tdx_enable(void)
{
+ enum cpuhp_state state;
int ret;
- ret = init_tdx_module();
- if (ret) {
- pr_err("module initialization failed (%d)\n", ret);
- tdx_module_status = TDX_MODULE_ERROR;
- return ret;
+ if (!cpu_feature_enabled(X86_FEATURE_TDX_HOST_PLATFORM)) {
+ pr_err("TDX not supported by the host platform\n");
+ return -ENODEV;
}
- pr_info("module initialized\n");
- tdx_module_status = TDX_MODULE_INITIALIZED;
-
- return 0;
-}
+ if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) {
+ pr_err("XSAVE is required for TDX\n");
+ return -EINVAL;
+ }
-/**
- * tdx_enable - Enable TDX module to make it ready to run TDX guests
- *
- * This function assumes the caller has: 1) held read lock of CPU hotplug
- * lock to prevent any new cpu from becoming online; 2) done both VMXON
- * and tdx_cpu_enable() on all online cpus.
- *
- * This function requires there's at least one online cpu for each CPU
- * package to succeed.
- *
- * This function can be called in parallel by multiple callers.
- *
- * Return 0 if TDX is enabled successfully, otherwise error.
- */
-int tdx_enable(void)
-{
- int ret;
+ if (!cpu_feature_enabled(X86_FEATURE_MOVDIR64B)) {
+ pr_err("MOVDIR64B is required for TDX\n");
+ return -EINVAL;
+ }
- if (!boot_cpu_has(X86_FEATURE_TDX_HOST_PLATFORM))
+ if (!cpu_feature_enabled(X86_FEATURE_SELFSNOOP)) {
+ pr_err("Self-snoop is required for TDX\n");
return -ENODEV;
+ }
- lockdep_assert_cpus_held();
-
- mutex_lock(&tdx_module_lock);
+ state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "virt/tdx:online",
+ tdx_online_cpu, tdx_offline_cpu);
+ if (state < 0)
+ return state;
- switch (tdx_module_status) {
- case TDX_MODULE_UNINITIALIZED:
- ret = __tdx_enable();
- break;
- case TDX_MODULE_INITIALIZED:
- /* Already initialized, great, tell the caller. */
- ret = 0;
- break;
- default:
- /* Failed to initialize in the previous attempts */
- ret = -EINVAL;
- break;
+ ret = init_tdx_module();
+ if (ret) {
+ pr_err("TDX-Module initialization failed (%d)\n", ret);
+ cpuhp_remove_state(state);
+ return ret;
}
- mutex_unlock(&tdx_module_lock);
+ register_syscore(&tdx_syscore);
- return ret;
+ tdx_module_initialized = true;
+ pr_info("TDX-Module initialized\n");
+ return 0;
}
-EXPORT_SYMBOL_FOR_KVM(tdx_enable);
+subsys_initcall(tdx_enable);
static bool is_pamt_page(unsigned long phys)
{
@@ -1468,15 +1525,10 @@ void __init tdx_init(void)
const struct tdx_sys_info *tdx_get_sysinfo(void)
{
- const struct tdx_sys_info *p = NULL;
-
- /* Make sure all fields in @tdx_sysinfo have been populated */
- mutex_lock(&tdx_module_lock);
- if (tdx_module_status == TDX_MODULE_INITIALIZED)
- p = (const struct tdx_sys_info *)&tdx_sysinfo;
- mutex_unlock(&tdx_module_lock);
+ if (!tdx_module_initialized)
+ return NULL;
- return p;
+ return (const struct tdx_sys_info *)&tdx_sysinfo;
}
EXPORT_SYMBOL_FOR_KVM(tdx_get_sysinfo);
diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h
index 82bb82be8567..dde219c823b4 100644
--- a/arch/x86/virt/vmx/tdx/tdx.h
+++ b/arch/x86/virt/vmx/tdx/tdx.h
@@ -91,14 +91,6 @@ struct tdmr_info {
* Do not put any hardware-defined TDX structure representations below
* this comment!
*/
-
-/* Kernel defined TDX module status during module initialization. */
-enum tdx_module_status_t {
- TDX_MODULE_UNINITIALIZED,
- TDX_MODULE_INITIALIZED,
- TDX_MODULE_ERROR
-};
-
struct tdx_memblock {
struct list_head list;
unsigned long start_pfn;
diff --git a/arch/x86/virt/vmx/tdx/tdx_global_metadata.c b/arch/x86/virt/vmx/tdx/tdx_global_metadata.c
index 13ad2663488b..c7db393a9cfb 100644
--- a/arch/x86/virt/vmx/tdx/tdx_global_metadata.c
+++ b/arch/x86/virt/vmx/tdx/tdx_global_metadata.c
@@ -7,7 +7,22 @@
* Include this file to other C file instead.
*/
-static int get_tdx_sys_info_features(struct tdx_sys_info_features *sysinfo_features)
+static __init int get_tdx_sys_info_version(struct tdx_sys_info_version *sysinfo_version)
+{
+ int ret = 0;
+ u64 val;
+
+ if (!ret && !(ret = read_sys_metadata_field(0x0800000100000003, &val)))
+ sysinfo_version->minor_version = val;
+ if (!ret && !(ret = read_sys_metadata_field(0x0800000100000004, &val)))
+ sysinfo_version->major_version = val;
+ if (!ret && !(ret = read_sys_metadata_field(0x0800000100000005, &val)))
+ sysinfo_version->update_version = val;
+
+ return ret;
+}
+
+static __init int get_tdx_sys_info_features(struct tdx_sys_info_features *sysinfo_features)
{
int ret = 0;
u64 val;
@@ -18,7 +33,7 @@ static int get_tdx_sys_info_features(struct tdx_sys_info_features *sysinfo_featu
return ret;
}
-static int get_tdx_sys_info_tdmr(struct tdx_sys_info_tdmr *sysinfo_tdmr)
+static __init int get_tdx_sys_info_tdmr(struct tdx_sys_info_tdmr *sysinfo_tdmr)
{
int ret = 0;
u64 val;
@@ -37,7 +52,7 @@ static int get_tdx_sys_info_tdmr(struct tdx_sys_info_tdmr *sysinfo_tdmr)
return ret;
}
-static int get_tdx_sys_info_td_ctrl(struct tdx_sys_info_td_ctrl *sysinfo_td_ctrl)
+static __init int get_tdx_sys_info_td_ctrl(struct tdx_sys_info_td_ctrl *sysinfo_td_ctrl)
{
int ret = 0;
u64 val;
@@ -52,7 +67,7 @@ static int get_tdx_sys_info_td_ctrl(struct tdx_sys_info_td_ctrl *sysinfo_td_ctrl
return ret;
}
-static int get_tdx_sys_info_td_conf(struct tdx_sys_info_td_conf *sysinfo_td_conf)
+static __init int get_tdx_sys_info_td_conf(struct tdx_sys_info_td_conf *sysinfo_td_conf)
{
int ret = 0;
u64 val;
@@ -85,10 +100,17 @@ static int get_tdx_sys_info_td_conf(struct tdx_sys_info_td_conf *sysinfo_td_conf
return ret;
}
-static int get_tdx_sys_info(struct tdx_sys_info *sysinfo)
+static __init int get_tdx_sys_info(struct tdx_sys_info *sysinfo)
{
int ret = 0;
+ ret = ret ?: get_tdx_sys_info_version(&sysinfo->version);
+
+ pr_info("Module version: %u.%u.%02u\n",
+ sysinfo->version.major_version,
+ sysinfo->version.minor_version,
+ sysinfo->version.update_version);
+
ret = ret ?: get_tdx_sys_info_features(&sysinfo->features);
ret = ret ?: get_tdx_sys_info_tdmr(&sysinfo->tdmr);
ret = ret ?: get_tdx_sys_info_td_ctrl(&sysinfo->td_ctrl);
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index fe57ff85d004..2f9fa27e5a3c 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -151,6 +151,7 @@ static void xen_hvm_crash_shutdown(struct pt_regs *regs)
static int xen_cpu_up_prepare_hvm(unsigned int cpu)
{
+ u32 cpu_uid;
int rc = 0;
/*
@@ -161,8 +162,8 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu)
*/
xen_uninit_lock_cpu(cpu);
- if (cpu_acpi_id(cpu) != CPU_ACPIID_INVALID)
- per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
+ if (acpi_get_cpu_uid(cpu, &cpu_uid) == 0)
+ per_cpu(xen_vcpu_id, cpu) = cpu_uid;
else
per_cpu(xen_vcpu_id, cpu) = cpu;
xen_vcpu_setup(cpu);
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 6e459e47cafd..ed2d7a3756ce 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -392,7 +392,7 @@ static void __init xen_init_capabilities(void)
/*
* Xen PV would need some work to support PCID: CR3 handling as well
- * as xen_flush_tlb_others() would need updating.
+ * as xen_flush_tlb_multi() would need updating.
*/
setup_clear_cpu_cap(X86_FEATURE_PCID);
@@ -1045,10 +1045,6 @@ static void xen_update_io_bitmap(void)
}
#endif
-static void xen_io_delay(void)
-{
-}
-
static DEFINE_PER_CPU(unsigned long, xen_cr0_value);
static unsigned long xen_read_cr0(void)
@@ -1208,6 +1204,7 @@ void __init xen_setup_vcpu_info_placement(void)
static const struct pv_info xen_info __initconst = {
.extra_user_64bit_cs = FLAT_USER_CS64,
+ .io_delay = false,
.name = "Xen",
};
@@ -1391,7 +1388,6 @@ asmlinkage __visible void __init xen_start_kernel(struct start_info *si)
pv_ops.cpu.invalidate_io_bitmap = xen_invalidate_io_bitmap;
pv_ops.cpu.update_io_bitmap = xen_update_io_bitmap;
#endif
- pv_ops.cpu.io_delay = xen_io_delay;
pv_ops.cpu.start_context_switch = xen_start_context_switch;
pv_ops.cpu.end_context_switch = xen_end_context_switch;
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 3254eaa88471..c80d0058efd1 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -105,6 +105,9 @@ pte_t xen_make_pte_init(pteval_t pte);
static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
#endif
+static pud_t level3_ident_pgt[PTRS_PER_PUD] __page_aligned_bss;
+static pmd_t level2_ident_pgt[PTRS_PER_PMD] __page_aligned_bss;
+
/*
* Protects atomic reservation decrease/increase against concurrent increases.
* Also protects non-atomic updates of current_pages and balloon lists.
@@ -1777,6 +1780,12 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
/* Zap identity mapping */
init_top_pgt[0] = __pgd(0);
+ init_top_pgt[pgd_index(__PAGE_OFFSET_BASE_L4)].pgd =
+ __pa_symbol(level3_ident_pgt) + _KERNPG_TABLE_NOENC;
+ init_top_pgt[pgd_index(__START_KERNEL_map)].pgd =
+ __pa_symbol(level3_kernel_pgt) + _PAGE_TABLE_NOENC;
+ level3_ident_pgt[0].pud = __pa_symbol(level2_ident_pgt) + _KERNPG_TABLE_NOENC;
+
/* Pre-constructed entries are in pfn, so convert to mfn */
/* L4[273] -> level3_ident_pgt */
/* L4[511] -> level3_kernel_pgt */