summaryrefslogtreecommitdiff
path: root/arch/x86/coco/tdx/tdx.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/coco/tdx/tdx.c')
-rw-r--r--arch/x86/coco/tdx/tdx.c82
1 files changed, 70 insertions, 12 deletions
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 0d9b090b4880..edab6d6049be 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -14,6 +14,7 @@
#include <asm/ia32.h>
#include <asm/insn.h>
#include <asm/insn-eval.h>
+#include <asm/paravirt_types.h>
#include <asm/pgtable.h>
#include <asm/set_memory.h>
#include <asm/traps.h>
@@ -32,9 +33,6 @@
#define VE_GET_PORT_NUM(e) ((e) >> 16)
#define VE_IS_IO_STRING(e) ((e) & BIT(4))
-#define ATTR_DEBUG BIT(0)
-#define ATTR_SEPT_VE_DISABLE BIT(28)
-
/* TDX Module call error codes */
#define TDCALL_RETURN_CODE(a) ((a) >> 32)
#define TDCALL_INVALID_OPERAND 0xc0000100
@@ -170,11 +168,11 @@ static void __noreturn tdx_panic(const char *msg)
/* Define register order according to the GHCI */
struct { u64 r14, r15, rbx, rdi, rsi, r8, r9, rdx; };
- char str[64];
+ char bytes[64] __nonstring;
} message;
/* VMM assumes '\0' in byte 65, if the message took all 64 bytes */
- strtomem_pad(message.str, msg, '\0');
+ strtomem_pad(message.bytes, msg, '\0');
args.r8 = message.r8;
args.r9 = message.r9;
@@ -200,14 +198,14 @@ static void __noreturn tdx_panic(const char *msg)
*
* TDX 1.0 does not allow the guest to disable SEPT #VE on its own. The VMM
* controls if the guest will receive such #VE with TD attribute
- * ATTR_SEPT_VE_DISABLE.
+ * TDX_ATTR_SEPT_VE_DISABLE.
*
* Newer TDX modules allow the guest to control if it wants to receive SEPT
* violation #VEs.
*
* Check if the feature is available and disable SEPT #VE if possible.
*
- * If the TD is allowed to disable/enable SEPT #VEs, the ATTR_SEPT_VE_DISABLE
+ * If the TD is allowed to disable/enable SEPT #VEs, the TDX_ATTR_SEPT_VE_DISABLE
* attribute is no longer reliable. It reflects the initial state of the
* control for the TD, but it will not be updated if someone (e.g. bootloader)
* changes it before the kernel starts. Kernel must check TDCS_TD_CTLS bit to
@@ -216,14 +214,14 @@ static void __noreturn tdx_panic(const char *msg)
static void disable_sept_ve(u64 td_attr)
{
const char *msg = "TD misconfiguration: SEPT #VE has to be disabled";
- bool debug = td_attr & ATTR_DEBUG;
+ bool debug = td_attr & TDX_ATTR_DEBUG;
u64 config, controls;
/* Is this TD allowed to disable SEPT #VE */
tdg_vm_rd(TDCS_CONFIG_FLAGS, &config);
if (!(config & TDCS_CONFIG_FLEXIBLE_PENDING_VE)) {
/* No SEPT #VE controls for the guest: check the attribute */
- if (td_attr & ATTR_SEPT_VE_DISABLE)
+ if (td_attr & TDX_ATTR_SEPT_VE_DISABLE)
return;
/* Relax SEPT_VE_DISABLE check for debug TD for backtraces */
@@ -274,6 +272,20 @@ static void enable_cpu_topology_enumeration(void)
tdg_vm_wr(TDCS_TD_CTLS, TD_CTLS_ENUM_TOPOLOGY, TD_CTLS_ENUM_TOPOLOGY);
}
+static void reduce_unnecessary_ve(void)
+{
+ u64 err = tdg_vm_wr(TDCS_TD_CTLS, TD_CTLS_REDUCE_VE, TD_CTLS_REDUCE_VE);
+
+ if (err == TDX_SUCCESS)
+ return;
+
+ /*
+ * Enabling REDUCE_VE includes ENUM_TOPOLOGY. Only try to
+ * enable ENUM_TOPOLOGY if REDUCE_VE was not successful.
+ */
+ enable_cpu_topology_enumeration();
+}
+
static void tdx_setup(u64 *cc_mask)
{
struct tdx_module_args args = {};
@@ -305,7 +317,8 @@ static void tdx_setup(u64 *cc_mask)
tdg_vm_wr(TDCS_NOTIFY_ENABLES, 0, -1ULL);
disable_sept_ve(td_attr);
- enable_cpu_topology_enumeration();
+
+ reduce_unnecessary_ve();
}
/*
@@ -380,13 +393,21 @@ static int handle_halt(struct ve_info *ve)
{
const bool irq_disabled = irqs_disabled();
+ /*
+ * HLT with IRQs enabled is unsafe, as an IRQ that is intended to be a
+ * wake event may be consumed before requesting HLT emulation, leaving
+ * the vCPU blocking indefinitely.
+ */
+ if (WARN_ONCE(!irq_disabled, "HLT emulation with IRQs enabled"))
+ return -EIO;
+
if (__halt(irq_disabled))
return -EIO;
return ve_instr_len(ve);
}
-void __cpuidle tdx_safe_halt(void)
+void __cpuidle tdx_halt(void)
{
const bool irq_disabled = false;
@@ -397,6 +418,16 @@ void __cpuidle tdx_safe_halt(void)
WARN_ONCE(1, "HLT instruction emulation failed\n");
}
+static void __cpuidle tdx_safe_halt(void)
+{
+ tdx_halt();
+ /*
+ * "__cpuidle" section doesn't support instrumentation, so stick
+ * with raw_* variant that avoids tracing hooks.
+ */
+ raw_local_irq_enable();
+}
+
static int read_msr(struct pt_regs *regs, struct ve_info *ve)
{
struct tdx_module_args args = {
@@ -1025,6 +1056,20 @@ static void tdx_kexec_finish(void)
}
}
+static __init void tdx_announce(void)
+{
+ struct tdx_module_args args = {};
+ u64 controls;
+
+ pr_info("Guest detected\n");
+
+ tdcall(TDG_VP_INFO, &args);
+ tdx_dump_attributes(args.rdx);
+
+ tdg_vm_rd(TDCS_TD_CTLS, &controls);
+ tdx_dump_td_ctls(controls);
+}
+
void __init tdx_early_init(void)
{
u64 cc_mask;
@@ -1084,6 +1129,19 @@ void __init tdx_early_init(void)
x86_platform.guest.enc_kexec_finish = tdx_kexec_finish;
/*
+ * Avoid "sti;hlt" execution in TDX guests as HLT induces a #VE that
+ * will enable interrupts before HLT TDCALL invocation if executed
+ * in STI-shadow, possibly resulting in missed wakeup events.
+ *
+ * Modify all possible HLT execution paths to use TDX specific routines
+ * that directly execute TDCALL and toggle the interrupt state as
+ * needed after TDCALL completion. This also reduces HLT related #VEs
+ * in addition to having a reliable halt logic execution.
+ */
+ pv_ops.irq.safe_halt = tdx_safe_halt;
+ pv_ops.irq.halt = tdx_halt;
+
+ /*
* TDX intercepts the RDMSR to read the X2APIC ID in the parallel
* bringup low level code. That raises #VE which cannot be handled
* there.
@@ -1094,5 +1152,5 @@ void __init tdx_early_init(void)
*/
x86_cpuinit.parallel_bringup = false;
- pr_info("Guest detected\n");
+ tdx_announce();
}