1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
|
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2018, Red Hat, Inc.
*
* Tests for Enlightened VMCS, including nested guest state.
*/
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <linux/bitmap.h>
#include "test_util.h"
#include "kvm_util.h"
#include "hyperv.h"
#include "vmx.h"
static int ud_count;
static void guest_ud_handler(struct ex_regs *regs)
{
ud_count++;
regs->rip += 3; /* VMLAUNCH */
}
static void guest_nmi_handler(struct ex_regs *regs)
{
}
static inline void rdmsr_from_l2(uint32_t msr)
{
/* Currently, L1 doesn't preserve GPRs during vmexits. */
__asm__ __volatile__ ("rdmsr" : : "c"(msr) :
"rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
"r10", "r11", "r12", "r13", "r14", "r15");
}
/* Exit to L1 from L2 with RDMSR instruction */
void l2_guest_code(void)
{
u64 unused;
GUEST_SYNC(7);
GUEST_SYNC(8);
/* Forced exit to L1 upon restore */
GUEST_SYNC(9);
vmcall();
/* MSR-Bitmap tests */
rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
vmcall();
rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
/* L2 TLB flush tests */
hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS);
rdmsr_from_l2(MSR_FS_BASE);
/*
* Note: hypercall status (RAX) is not preserved correctly by L1 after
* synthetic vmexit, use unchecked version.
*/
__hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS,
&unused);
/* Done, exit to L1 and never come back. */
vmcall();
}
void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages,
vm_vaddr_t hv_hcall_page_gpa)
{
#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
wrmsr(HV_X64_MSR_HYPERCALL, hv_hcall_page_gpa);
x2apic_enable();
GUEST_SYNC(1);
GUEST_SYNC(2);
enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
evmcs_enable();
GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
GUEST_SYNC(3);
GUEST_ASSERT(load_evmcs(hv_pages));
GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
GUEST_SYNC(4);
GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
prepare_vmcs(vmx_pages, l2_guest_code,
&l2_guest_stack[L2_GUEST_STACK_SIZE]);
GUEST_SYNC(5);
GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
current_evmcs->revision_id = -1u;
GUEST_ASSERT(vmlaunch());
current_evmcs->revision_id = EVMCS_VERSION;
GUEST_SYNC(6);
vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
PIN_BASED_NMI_EXITING);
/* L2 TLB flush setup */
current_evmcs->partition_assist_page = hv_pages->partition_assist_gpa;
current_evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
current_evmcs->hv_vm_id = 1;
current_evmcs->hv_vp_id = 1;
current_vp_assist->nested_control.features.directhypercall = 1;
*(u32 *)(hv_pages->partition_assist) = 0;
GUEST_ASSERT(!vmlaunch());
GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), NMI_VECTOR);
GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
/*
* NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is
* up-to-date (RIP points where it should and not at the beginning
* of l2_guest_code(). GUEST_SYNC(9) checkes that.
*/
GUEST_ASSERT(!vmresume());
GUEST_SYNC(10);
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
current_evmcs->guest_rip += 3; /* vmcall */
/* Intercept RDMSR 0xc0000100 */
vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
CPU_BASED_USE_MSR_BITMAPS);
__set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400);
GUEST_ASSERT(!vmresume());
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
current_evmcs->guest_rip += 2; /* rdmsr */
/* Enable enlightened MSR bitmap */
current_evmcs->hv_enlightenments_control.msr_bitmap = 1;
GUEST_ASSERT(!vmresume());
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
current_evmcs->guest_rip += 2; /* rdmsr */
/* Intercept RDMSR 0xc0000101 without telling KVM about it */
__set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400);
/* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
GUEST_ASSERT(!vmresume());
/* Make sure we don't see EXIT_REASON_MSR_READ here so eMSR bitmap works */
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
current_evmcs->guest_rip += 3; /* vmcall */
/* Now tell KVM we've changed MSR-Bitmap */
current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
GUEST_ASSERT(!vmresume());
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
current_evmcs->guest_rip += 2; /* rdmsr */
/*
* L2 TLB flush test. First VMCALL should be handled directly by L0,
* no VMCALL exit expected.
*/
GUEST_ASSERT(!vmresume());
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
current_evmcs->guest_rip += 2; /* rdmsr */
/* Enable synthetic vmexit */
*(u32 *)(hv_pages->partition_assist) = 1;
GUEST_ASSERT(!vmresume());
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH);
GUEST_ASSERT(!vmresume());
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
GUEST_SYNC(11);
/* Try enlightened vmptrld with an incorrect GPA */
evmcs_vmptrld(0xdeadbeef, hv_pages->enlightened_vmcs);
GUEST_ASSERT(vmlaunch());
GUEST_ASSERT(ud_count == 1);
GUEST_DONE();
}
void inject_nmi(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_events events;
vcpu_events_get(vcpu, &events);
events.nmi.pending = 1;
events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
vcpu_events_set(vcpu, &events);
}
static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm,
struct kvm_vcpu *vcpu)
{
struct kvm_regs regs1, regs2;
struct kvm_x86_state *state;
state = vcpu_save_state(vcpu);
memset(®s1, 0, sizeof(regs1));
vcpu_regs_get(vcpu, ®s1);
kvm_vm_release(vm);
/* Restore state in a new VM. */
vcpu = vm_recreate_with_one_vcpu(vm);
vcpu_set_hv_cpuid(vcpu);
vcpu_enable_evmcs(vcpu);
vcpu_load_state(vcpu, state);
kvm_x86_state_cleanup(state);
memset(®s2, 0, sizeof(regs2));
vcpu_regs_get(vcpu, ®s2);
TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)),
"Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
(ulong) regs2.rdi, (ulong) regs2.rsi);
return vcpu;
}
int main(int argc, char *argv[])
{
vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0;
vm_vaddr_t hcall_page;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct ucall uc;
int stage;
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS));
TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH));
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
hcall_page = vm_vaddr_alloc_pages(vm, 1);
memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize());
vcpu_set_hv_cpuid(vcpu);
vcpu_enable_evmcs(vcpu);
vcpu_alloc_vmx(vm, &vmx_pages_gva);
vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
pr_info("Running L1 which uses EVMCS to run L2\n");
for (stage = 1;; stage++) {
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
break;
case UCALL_DONE:
goto done;
default:
TEST_FAIL("Unknown ucall %lu", uc.cmd);
}
/* UCALL_SYNC is handled here. */
TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
stage, (ulong)uc.args[1]);
vcpu = save_restore_vm(vm, vcpu);
/* Force immediate L2->L1 exit before resuming */
if (stage == 8) {
pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
inject_nmi(vcpu);
}
/*
* Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly
* restored VM (before the first KVM_RUN) to check that
* KVM_STATE_NESTED_EVMCS is not lost.
*/
if (stage == 9) {
pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n");
vcpu = save_restore_vm(vm, vcpu);
}
}
done:
kvm_vm_free(vm);
}
|