summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/apic/io_apic.c10
-rw-r--r--arch/x86/kernel/apic/msi.c18
-rw-r--r--arch/x86/kernel/apic/vector.c23
-rw-r--r--arch/x86/kernel/cpu/intel.c11
-rw-r--r--arch/x86/kernel/cpu/mce/core.c4
-rw-r--r--arch/x86/kernel/fpu/core.c6
-rw-r--r--arch/x86/kernel/ldt.c26
-rw-r--r--arch/x86/kernel/paravirt.c3
-rw-r--r--arch/x86/kernel/process.c18
-rw-r--r--arch/x86/kernel/traps.c16
10 files changed, 85 insertions, 50 deletions
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ce61e3e7d399..81ffcfbfaef2 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2316,12 +2316,12 @@ static int mp_irqdomain_create(int ioapic)
ip->irqdomain = irq_domain_create_linear(fn, hwirqs, cfg->ops,
(void *)(long)ioapic);
- /* Release fw handle if it was allocated above */
- if (!cfg->dev)
- irq_domain_free_fwnode(fn);
-
- if (!ip->irqdomain)
+ if (!ip->irqdomain) {
+ /* Release fw handle if it was allocated above */
+ if (!cfg->dev)
+ irq_domain_free_fwnode(fn);
return -ENOMEM;
+ }
ip->irqdomain->parent = parent;
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 5cbaca58af95..c2b2911feeef 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -263,12 +263,13 @@ void __init arch_init_msi_domain(struct irq_domain *parent)
msi_default_domain =
pci_msi_create_irq_domain(fn, &pci_msi_domain_info,
parent);
- irq_domain_free_fwnode(fn);
}
- if (!msi_default_domain)
+ if (!msi_default_domain) {
+ irq_domain_free_fwnode(fn);
pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n");
- else
+ } else {
msi_default_domain->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK;
+ }
}
#ifdef CONFIG_IRQ_REMAP
@@ -301,7 +302,8 @@ struct irq_domain *arch_create_remap_msi_irq_domain(struct irq_domain *parent,
if (!fn)
return NULL;
d = pci_msi_create_irq_domain(fn, &pci_msi_ir_domain_info, parent);
- irq_domain_free_fwnode(fn);
+ if (!d)
+ irq_domain_free_fwnode(fn);
return d;
}
#endif
@@ -364,7 +366,8 @@ static struct irq_domain *dmar_get_irq_domain(void)
if (fn) {
dmar_domain = msi_create_irq_domain(fn, &dmar_msi_domain_info,
x86_vector_domain);
- irq_domain_free_fwnode(fn);
+ if (!dmar_domain)
+ irq_domain_free_fwnode(fn);
}
out:
mutex_unlock(&dmar_lock);
@@ -489,7 +492,10 @@ struct irq_domain *hpet_create_irq_domain(int hpet_id)
}
d = msi_create_irq_domain(fn, domain_info, parent);
- irq_domain_free_fwnode(fn);
+ if (!d) {
+ irq_domain_free_fwnode(fn);
+ kfree(domain_info);
+ }
return d;
}
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index c48be6e1f676..7649da2478d8 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -446,12 +446,10 @@ static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
trace_vector_activate(irqd->irq, apicd->is_managed,
apicd->can_reserve, reserve);
- /* Nothing to do for fixed assigned vectors */
- if (!apicd->can_reserve && !apicd->is_managed)
- return 0;
-
raw_spin_lock_irqsave(&vector_lock, flags);
- if (reserve || irqd_is_managed_and_shutdown(irqd))
+ if (!apicd->can_reserve && !apicd->is_managed)
+ assign_irq_vector_any_locked(irqd);
+ else if (reserve || irqd_is_managed_and_shutdown(irqd))
vector_assign_managed_shutdown(irqd);
else if (apicd->is_managed)
ret = activate_managed(irqd);
@@ -709,7 +707,6 @@ int __init arch_early_irq_init(void)
x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops,
NULL);
BUG_ON(x86_vector_domain == NULL);
- irq_domain_free_fwnode(fn);
irq_set_default_host(x86_vector_domain);
arch_init_msi_domain(x86_vector_domain);
@@ -775,20 +772,10 @@ void lapic_offline(void)
static int apic_set_affinity(struct irq_data *irqd,
const struct cpumask *dest, bool force)
{
- struct apic_chip_data *apicd = apic_chip_data(irqd);
int err;
- /*
- * Core code can call here for inactive interrupts. For inactive
- * interrupts which use managed or reservation mode there is no
- * point in going through the vector assignment right now as the
- * activation will assign a vector which fits the destination
- * cpumask. Let the core code store the destination mask and be
- * done with it.
- */
- if (!irqd_is_activated(irqd) &&
- (apicd->is_managed || apicd->can_reserve))
- return IRQ_SET_MASK_OK;
+ if (WARN_ON_ONCE(!irqd_is_activated(irqd)))
+ return -EIO;
raw_spin_lock(&vector_lock);
cpumask_and(vector_searchmask, dest, cpu_online_mask);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index c25a67a34bd3..0ab48f1cdf84 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -50,6 +50,13 @@ static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
static u64 msr_test_ctrl_cache __ro_after_init;
/*
+ * With a name like MSR_TEST_CTL it should go without saying, but don't touch
+ * MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it
+ * on CPUs that do not support SLD can cause fireworks, even when writing '0'.
+ */
+static bool cpu_model_supports_sld __ro_after_init;
+
+/*
* Processors which have self-snooping capability can handle conflicting
* memory type across CPUs by snooping its own cache. However, there exists
* CPU models in which having conflicting memory types still leads to
@@ -1071,7 +1078,8 @@ static void sld_update_msr(bool on)
static void split_lock_init(void)
{
- split_lock_verify_msr(sld_state != sld_off);
+ if (cpu_model_supports_sld)
+ split_lock_verify_msr(sld_state != sld_off);
}
static void split_lock_warn(unsigned long ip)
@@ -1177,5 +1185,6 @@ void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c)
return;
}
+ cpu_model_supports_sld = true;
split_lock_setup();
}
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index fbe89a92ff36..14e4b4d17ee5 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1901,6 +1901,8 @@ void (*machine_check_vector)(struct pt_regs *) = unexpected_machine_check;
static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
{
+ WARN_ON_ONCE(user_mode(regs));
+
/*
* Only required when from kernel mode. See
* mce_check_crashing_cpu() for details.
@@ -1954,7 +1956,7 @@ DEFINE_IDTENTRY_MCE_USER(exc_machine_check)
}
#else
/* 32bit unified entry point */
-DEFINE_IDTENTRY_MCE(exc_machine_check)
+DEFINE_IDTENTRY_RAW(exc_machine_check)
{
unsigned long dr7;
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 06c818967bb6..15247b96c6ea 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -101,6 +101,12 @@ void kernel_fpu_begin(void)
copy_fpregs_to_fpstate(&current->thread.fpu);
}
__cpu_invalidate_fpregs_state();
+
+ if (boot_cpu_has(X86_FEATURE_XMM))
+ ldmxcsr(MXCSR_DEFAULT);
+
+ if (boot_cpu_has(X86_FEATURE_FPU))
+ asm volatile ("fninit");
}
EXPORT_SYMBOL_GPL(kernel_fpu_begin);
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 8748321c4486..b8aee71840ae 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -29,6 +29,8 @@
#include <asm/mmu_context.h>
#include <asm/pgtable_areas.h>
+#include <xen/xen.h>
+
/* This is a multiple of PAGE_SIZE. */
#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
@@ -543,6 +545,28 @@ static int read_default_ldt(void __user *ptr, unsigned long bytecount)
return bytecount;
}
+static bool allow_16bit_segments(void)
+{
+ if (!IS_ENABLED(CONFIG_X86_16BIT))
+ return false;
+
+#ifdef CONFIG_XEN_PV
+ /*
+ * Xen PV does not implement ESPFIX64, which means that 16-bit
+ * segments will not work correctly. Until either Xen PV implements
+ * ESPFIX64 and can signal this fact to the guest or unless someone
+ * provides compelling evidence that allowing broken 16-bit segments
+ * is worthwhile, disallow 16-bit segments under Xen PV.
+ */
+ if (xen_pv_domain()) {
+ pr_info_once("Warning: 16-bit segments do not work correctly in a Xen PV guest\n");
+ return false;
+ }
+#endif
+
+ return true;
+}
+
static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
{
struct mm_struct *mm = current->mm;
@@ -574,7 +598,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
/* The user wants to clear the entry. */
memset(&ldt, 0, sizeof(ldt));
} else {
- if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
+ if (!ldt_info.seg_32bit && !allow_16bit_segments()) {
error = -EINVAL;
goto out;
}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 674a7d66d960..de2138ba38e5 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -324,7 +324,8 @@ struct paravirt_patch_template pv_ops = {
.cpu.swapgs = native_swapgs,
#ifdef CONFIG_X86_IOPL_IOPERM
- .cpu.update_io_bitmap = native_tss_update_io_bitmap,
+ .cpu.invalidate_io_bitmap = native_tss_invalidate_io_bitmap,
+ .cpu.update_io_bitmap = native_tss_update_io_bitmap,
#endif
.cpu.start_context_switch = paravirt_nop,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index f362ce0d5ac0..fe67dbd76e51 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -322,20 +322,6 @@ void arch_setup_new_exec(void)
}
#ifdef CONFIG_X86_IOPL_IOPERM
-static inline void tss_invalidate_io_bitmap(struct tss_struct *tss)
-{
- /*
- * Invalidate the I/O bitmap by moving io_bitmap_base outside the
- * TSS limit so any subsequent I/O access from user space will
- * trigger a #GP.
- *
- * This is correct even when VMEXIT rewrites the TSS limit
- * to 0x67 as the only requirement is that the base points
- * outside the limit.
- */
- tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;
-}
-
static inline void switch_to_bitmap(unsigned long tifp)
{
/*
@@ -346,7 +332,7 @@ static inline void switch_to_bitmap(unsigned long tifp)
* user mode.
*/
if (tifp & _TIF_IO_BITMAP)
- tss_invalidate_io_bitmap(this_cpu_ptr(&cpu_tss_rw));
+ tss_invalidate_io_bitmap();
}
static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm)
@@ -380,7 +366,7 @@ void native_tss_update_io_bitmap(void)
u16 *base = &tss->x86_tss.io_bitmap_base;
if (!test_thread_flag(TIF_IO_BITMAP)) {
- tss_invalidate_io_bitmap(tss);
+ native_tss_invalidate_io_bitmap();
return;
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index f58679e487f6..b7cb3e0716f7 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -303,6 +303,8 @@ DEFINE_IDTENTRY_ERRORCODE(exc_alignment_check)
do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs,
error_code, BUS_ADRALN, NULL);
+
+ local_irq_disable();
}
#ifdef CONFIG_VMAP_STACK
@@ -870,6 +872,12 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
trace_hardirqs_off_finish();
/*
+ * If something gets miswired and we end up here for a user mode
+ * #DB, we will malfunction.
+ */
+ WARN_ON_ONCE(user_mode(regs));
+
+ /*
* Catch SYSENTER with TF set and clear DR_STEP. If this hit a
* watchpoint at the same time then that will still be handled.
*/
@@ -887,6 +895,12 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
static __always_inline void exc_debug_user(struct pt_regs *regs,
unsigned long dr6)
{
+ /*
+ * If something gets miswired and we end up here for a kernel mode
+ * #DB, we will malfunction.
+ */
+ WARN_ON_ONCE(!user_mode(regs));
+
idtentry_enter_user(regs);
instrumentation_begin();
@@ -917,7 +931,7 @@ DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
}
#else
/* 32 bit does not have separate entry points. */
-DEFINE_IDTENTRY_DEBUG(exc_debug)
+DEFINE_IDTENTRY_RAW(exc_debug)
{
unsigned long dr6, dr7;