summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/ia64/kvm/kvm-ia64.c33
-rw-r--r--arch/powerpc/include/asm/kvm_host.h2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c47
-rw-r--r--arch/powerpc/kvm/e500_tlb.c2
-rw-r--r--arch/s390/include/asm/processor.h1
-rw-r--r--arch/s390/kernel/dis.c27
-rw-r--r--arch/s390/kvm/Kconfig1
-rw-r--r--arch/s390/kvm/diag.c4
-rw-r--r--arch/s390/kvm/intercept.c11
-rw-r--r--arch/s390/kvm/interrupt.c23
-rw-r--r--arch/s390/kvm/kvm-s390.c10
-rw-r--r--arch/s390/kvm/priv.c9
-rw-r--r--arch/s390/kvm/sigp.c2
-rw-r--r--arch/s390/kvm/trace-s390.h210
-rw-r--r--arch/s390/kvm/trace.h341
-rw-r--r--arch/x86/include/asm/kvm_host.h3
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/Makefile2
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/emulate.c48
-rw-r--r--arch/x86/kvm/i8254.c64
-rw-r--r--arch/x86/kvm/i8254.h6
-rw-r--r--arch/x86/kvm/irq.h2
-rw-r--r--arch/x86/kvm/kvm_timer.h18
-rw-r--r--arch/x86/kvm/lapic.c52
-rw-r--r--arch/x86/kvm/lapic.h9
-rw-r--r--arch/x86/kvm/mmu.c119
-rw-r--r--arch/x86/kvm/pmu.c2
-rw-r--r--arch/x86/kvm/svm.c4
-rw-r--r--arch/x86/kvm/timer.c47
-rw-r--r--arch/x86/kvm/vmx.c6
-rw-r--r--arch/x86/kvm/x86.c63
32 files changed, 905 insertions, 266 deletions
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index bd77cb507c1c..eac65380bd20 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -924,6 +924,16 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
return 0;
}
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
+{
+ if (!irqchip_in_kernel(kvm))
+ return -ENXIO;
+
+ irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+ irq_event->irq, irq_event->level);
+ return 0;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -963,29 +973,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto out;
}
break;
- case KVM_IRQ_LINE_STATUS:
- case KVM_IRQ_LINE: {
- struct kvm_irq_level irq_event;
-
- r = -EFAULT;
- if (copy_from_user(&irq_event, argp, sizeof irq_event))
- goto out;
- r = -ENXIO;
- if (irqchip_in_kernel(kvm)) {
- __s32 status;
- status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
- irq_event.irq, irq_event.level);
- if (ioctl == KVM_IRQ_LINE_STATUS) {
- r = -EFAULT;
- irq_event.status = status;
- if (copy_to_user(argp, &irq_event,
- sizeof irq_event))
- goto out;
- }
- r = 0;
- }
- break;
- }
case KVM_GET_IRQCHIP: {
/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
struct kvm_irqchip chip;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 50ea12fd7bf5..572ad0141268 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -52,6 +52,8 @@
struct kvm;
extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+extern int kvm_unmap_hva_range(struct kvm *kvm,
+ unsigned long start, unsigned long end);
extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index d03eb6f7b058..3c635c0616b0 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -756,9 +756,12 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
goto out_put;
}
-static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
- int (*handler)(struct kvm *kvm, unsigned long *rmapp,
- unsigned long gfn))
+static int kvm_handle_hva_range(struct kvm *kvm,
+ unsigned long start,
+ unsigned long end,
+ int (*handler)(struct kvm *kvm,
+ unsigned long *rmapp,
+ unsigned long gfn))
{
int ret;
int retval = 0;
@@ -767,15 +770,25 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots) {
- unsigned long start = memslot->userspace_addr;
- unsigned long end;
+ unsigned long hva_start, hva_end;
+ gfn_t gfn, gfn_end;
- end = start + (memslot->npages << PAGE_SHIFT);
- if (hva >= start && hva < end) {
- gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
+ hva_start = max(start, memslot->userspace_addr);
+ hva_end = min(end, memslot->userspace_addr +
+ (memslot->npages << PAGE_SHIFT));
+ if (hva_start >= hva_end)
+ continue;
+ /*
+ * {gfn(page) | page intersects with [hva_start, hva_end)} =
+ * {gfn, gfn+1, ..., gfn_end-1}.
+ */
+ gfn = hva_to_gfn_memslot(hva_start, memslot);
+ gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
+
+ for (; gfn < gfn_end; ++gfn) {
+ gfn_t gfn_offset = gfn - memslot->base_gfn;
- ret = handler(kvm, &memslot->rmap[gfn_offset],
- memslot->base_gfn + gfn_offset);
+ ret = handler(kvm, &memslot->rmap[gfn_offset], gfn);
retval |= ret;
}
}
@@ -783,6 +796,13 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
return retval;
}
+static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
+ int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long gfn))
+{
+ return kvm_handle_hva_range(kvm, hva, hva + 1, handler);
+}
+
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
unsigned long gfn)
{
@@ -850,6 +870,13 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
return 0;
}
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+ if (kvm->arch.using_mmu_notifiers)
+ kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
+ return 0;
+}
+
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
unsigned long gfn)
{
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index c510fc961302..c8f6c5826742 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -520,7 +520,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
if (likely(!pfnmap)) {
unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
- pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn);
+ pfn = gfn_to_pfn_memslot(slot, gfn);
if (is_error_pfn(pfn)) {
printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
(long)gfn);
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 11e4e3236937..eac4fb5fb826 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -140,6 +140,7 @@ extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
extern unsigned long thread_saved_pc(struct task_struct *t);
extern void show_code(struct pt_regs *regs);
+extern int insn_to_mnemonic(unsigned char *instruction, char buf[8]);
unsigned long get_wchan(struct task_struct *p);
#define task_pt_regs(tsk) ((struct pt_regs *) \
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 619c5d350726..ffb622b16ab5 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -1468,6 +1468,33 @@ static struct insn *find_insn(unsigned char *code)
return NULL;
}
+/**
+ * insn_to_mnemonic - decode an s390 instruction
+ * @instruction: instruction to decode
+ * @buf: buffer to fill with mnemonic
+ *
+ * Decode the instruction at @instruction and store the corresponding
+ * mnemonic into @buf.
+ * @buf is left unchanged if the instruction could not be decoded.
+ * Returns:
+ * %0 on success, %-ENOENT if the instruction was not found.
+ */
+int insn_to_mnemonic(unsigned char *instruction, char buf[8])
+{
+ struct insn *insn;
+
+ insn = find_insn(instruction);
+ if (!insn)
+ return -ENOENT;
+ if (insn->name[0] == '\0')
+ snprintf(buf, sizeof(buf), "%s",
+ long_insn_name[(int) insn->name[1]]);
+ else
+ snprintf(buf, sizeof(buf), "%.5s", insn->name);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(insn_to_mnemonic);
+
static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
{
struct insn *insn;
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 78eb9847008f..a6e2677724e1 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -21,6 +21,7 @@ config KVM
depends on HAVE_KVM && EXPERIMENTAL
select PREEMPT_NOTIFIERS
select ANON_INODES
+ select HAVE_KVM_CPU_RELAX_INTERCEPT
---help---
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index c88bb7793390..a390687feb13 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -14,6 +14,8 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include "kvm-s390.h"
+#include "trace.h"
+#include "trace-s390.h"
static int diag_release_pages(struct kvm_vcpu *vcpu)
{
@@ -98,6 +100,7 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
vcpu->run->exit_reason = KVM_EXIT_S390_RESET;
VCPU_EVENT(vcpu, 3, "requesting userspace resets %llx",
vcpu->run->s390_reset_flags);
+ trace_kvm_s390_request_resets(vcpu->run->s390_reset_flags);
return -EREMOTE;
}
@@ -105,6 +108,7 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
{
int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
+ trace_kvm_s390_handle_diag(vcpu, code);
switch (code) {
case 0x10:
return diag_release_pages(vcpu);
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index adae539f12e2..22798ec33fd1 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -19,6 +19,8 @@
#include "kvm-s390.h"
#include "gaccess.h"
+#include "trace.h"
+#include "trace-s390.h"
static int handle_lctlg(struct kvm_vcpu *vcpu)
{
@@ -45,6 +47,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
disp2);
+ trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr);
do {
rc = get_guest_u64(vcpu, useraddr,
@@ -82,6 +85,7 @@ static int handle_lctl(struct kvm_vcpu *vcpu)
VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
disp2);
+ trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr);
reg = reg1;
do {
@@ -135,6 +139,8 @@ static int handle_stop(struct kvm_vcpu *vcpu)
vcpu->stat.exit_stop_request++;
spin_lock_bh(&vcpu->arch.local_int.lock);
+ trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits);
+
if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) {
vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP;
rc = SIE_INTERCEPT_RERUNVCPU;
@@ -171,6 +177,7 @@ static int handle_validity(struct kvm_vcpu *vcpu)
int rc;
vcpu->stat.exit_validity++;
+ trace_kvm_s390_intercept_validity(vcpu, viwhy);
if (viwhy == 0x37) {
vmaddr = gmap_fault(vcpu->arch.sie_block->prefix,
vcpu->arch.gmap);
@@ -213,6 +220,9 @@ static int handle_instruction(struct kvm_vcpu *vcpu)
intercept_handler_t handler;
vcpu->stat.exit_instruction++;
+ trace_kvm_s390_intercept_instruction(vcpu,
+ vcpu->arch.sie_block->ipa,
+ vcpu->arch.sie_block->ipb);
handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8];
if (handler)
return handler(vcpu);
@@ -222,6 +232,7 @@ static int handle_instruction(struct kvm_vcpu *vcpu)
static int handle_prog(struct kvm_vcpu *vcpu)
{
vcpu->stat.exit_program_interruption++;
+ trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc);
}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index b7bc1aac8ed2..7556231fb073 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -19,6 +19,7 @@
#include <asm/uaccess.h>
#include "kvm-s390.h"
#include "gaccess.h"
+#include "trace-s390.h"
static int psw_extint_disabled(struct kvm_vcpu *vcpu)
{
@@ -130,6 +131,8 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
case KVM_S390_INT_EMERGENCY:
VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
vcpu->stat.deliver_emergency_signal++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ inti->emerg.code, 0);
rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201);
if (rc == -EFAULT)
exception = 1;
@@ -152,6 +155,8 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
case KVM_S390_INT_EXTERNAL_CALL:
VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
vcpu->stat.deliver_external_call++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ inti->extcall.code, 0);
rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202);
if (rc == -EFAULT)
exception = 1;
@@ -175,6 +180,8 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
inti->ext.ext_params);
vcpu->stat.deliver_service_signal++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ inti->ext.ext_params, 0);
rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401);
if (rc == -EFAULT)
exception = 1;
@@ -198,6 +205,9 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
inti->ext.ext_params, inti->ext.ext_params2);
vcpu->stat.deliver_virtio_interrupt++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ inti->ext.ext_params,
+ inti->ext.ext_params2);
rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603);
if (rc == -EFAULT)
exception = 1;
@@ -229,6 +239,8 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
case KVM_S390_SIGP_STOP:
VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
vcpu->stat.deliver_stop_signal++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ 0, 0);
__set_intercept_indicator(vcpu, inti);
break;
@@ -236,12 +248,16 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x",
inti->prefix.address);
vcpu->stat.deliver_prefix_signal++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ inti->prefix.address, 0);
kvm_s390_set_prefix(vcpu, inti->prefix.address);
break;
case KVM_S390_RESTART:
VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
vcpu->stat.deliver_restart_signal++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ 0, 0);
rc = copy_to_guest(vcpu, offsetof(struct _lowcore,
restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
if (rc == -EFAULT)
@@ -259,6 +275,8 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
inti->pgm.code,
table[vcpu->arch.sie_block->ipa >> 14]);
vcpu->stat.deliver_program_int++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ inti->pgm.code, 0);
rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code);
if (rc == -EFAULT)
exception = 1;
@@ -515,6 +533,7 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
inti->pgm.code = code;
VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
+ trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, inti->type, code, 0, 1);
spin_lock_bh(&li->lock);
list_add(&inti->list, &li->list);
atomic_set(&li->active, 1);
@@ -556,6 +575,8 @@ int kvm_s390_inject_vm(struct kvm *kvm,
kfree(inti);
return -EINVAL;
}
+ trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64,
+ 2);
mutex_lock(&kvm->lock);
fi = &kvm->arch.float_int;
@@ -621,6 +642,8 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
kfree(inti);
return -EINVAL;
}
+ trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm,
+ s390int->parm64, 2);
mutex_lock(&vcpu->kvm->lock);
li = &vcpu->arch.local_int;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d470ccbfabae..e83df7f0fedd 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -32,6 +32,10 @@
#include "kvm-s390.h"
#include "gaccess.h"
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+#include "trace-s390.h"
+
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
struct kvm_stats_debugfs_item debugfs_entries[] = {
@@ -242,6 +246,7 @@ out_err:
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
+ trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
if (!kvm_is_ucontrol(vcpu->kvm)) {
clear_bit(63 - vcpu->vcpu_id,
(unsigned long *) &vcpu->kvm->arch.sca->mcn);
@@ -417,6 +422,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
goto out_free_sie_block;
VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
vcpu->arch.sie_block);
+ trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
return vcpu;
out_free_sie_block:
@@ -607,18 +613,22 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
local_irq_enable();
VCPU_EVENT(vcpu, 6, "entering sie flags %x",
atomic_read(&vcpu->arch.sie_block->cpuflags));
+ trace_kvm_s390_sie_enter(vcpu,
+ atomic_read(&vcpu->arch.sie_block->cpuflags));
rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs);
if (rc) {
if (kvm_is_ucontrol(vcpu->kvm)) {
rc = SIE_INTERCEPT_UCONTROL;
} else {
VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
+ trace_kvm_s390_sie_fault(vcpu);
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
rc = 0;
}
}
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
vcpu->arch.sie_block->icptcode);
+ trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
local_irq_disable();
kvm_guest_exit();
local_irq_enable();
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 60da903d6f3e..ed256fdd7b58 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -20,6 +20,7 @@
#include <asm/sysinfo.h>
#include "gaccess.h"
#include "kvm-s390.h"
+#include "trace.h"
static int handle_set_prefix(struct kvm_vcpu *vcpu)
{
@@ -59,6 +60,7 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
kvm_s390_set_prefix(vcpu, address);
VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
+ trace_kvm_s390_handle_prefix(vcpu, 1, address);
out:
return 0;
}
@@ -91,6 +93,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
}
VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
+ trace_kvm_s390_handle_prefix(vcpu, 0, address);
out:
return 0;
}
@@ -119,6 +122,7 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
}
VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr);
+ trace_kvm_s390_handle_stap(vcpu, useraddr);
out:
return 0;
}
@@ -164,9 +168,11 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
&facility_list, sizeof(facility_list));
if (rc == -EFAULT)
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- else
+ else {
VCPU_EVENT(vcpu, 5, "store facility list value %x",
facility_list);
+ trace_kvm_s390_handle_stfl(vcpu, facility_list);
+ }
return 0;
}
@@ -278,6 +284,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
goto out_mem;
}
+ trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
free_page(mem);
vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
vcpu->run->s.regs.gprs[0] = 0;
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 56f80e1f98f7..566ddf6e8dfb 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -18,6 +18,7 @@
#include <asm/sigp.h>
#include "gaccess.h"
#include "kvm-s390.h"
+#include "trace.h"
static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
u64 *reg)
@@ -344,6 +345,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
else
parameter = vcpu->run->s.regs.gprs[r1 + 1];
+ trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
switch (order_code) {
case SIGP_SENSE:
vcpu->stat.instruction_sigp_sense++;
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
new file mode 100644
index 000000000000..90fdf85b5ff7
--- /dev/null
+++ b/arch/s390/kvm/trace-s390.h
@@ -0,0 +1,210 @@
+#if !defined(_TRACE_KVMS390_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVMS390_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm-s390
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace-s390
+
+/*
+ * Trace point for the creation of the kvm instance.
+ */
+TRACE_EVENT(kvm_s390_create_vm,
+ TP_PROTO(unsigned long type),
+ TP_ARGS(type),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, type)
+ ),
+
+ TP_fast_assign(
+ __entry->type = type;
+ ),
+
+ TP_printk("create vm%s",
+ __entry->type & KVM_VM_S390_UCONTROL ? " (UCONTROL)" : "")
+ );
+
+/*
+ * Trace points for creation and destruction of vpcus.
+ */
+TRACE_EVENT(kvm_s390_create_vcpu,
+ TP_PROTO(unsigned int id, struct kvm_vcpu *vcpu,
+ struct kvm_s390_sie_block *sie_block),
+ TP_ARGS(id, vcpu, sie_block),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, id)
+ __field(struct kvm_vcpu *, vcpu)
+ __field(struct kvm_s390_sie_block *, sie_block)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ __entry->vcpu = vcpu;
+ __entry->sie_block = sie_block;
+ ),
+
+ TP_printk("create cpu %d at %p, sie block at %p", __entry->id,
+ __entry->vcpu, __entry->sie_block)
+ );
+
+TRACE_EVENT(kvm_s390_destroy_vcpu,
+ TP_PROTO(unsigned int id),
+ TP_ARGS(id),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, id)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ ),
+
+ TP_printk("destroy cpu %d", __entry->id)
+ );
+
+/*
+ * Trace points for injection of interrupts, either per machine or
+ * per vcpu.
+ */
+
+#define kvm_s390_int_type \
+ {KVM_S390_SIGP_STOP, "sigp stop"}, \
+ {KVM_S390_PROGRAM_INT, "program interrupt"}, \
+ {KVM_S390_SIGP_SET_PREFIX, "sigp set prefix"}, \
+ {KVM_S390_RESTART, "sigp restart"}, \
+ {KVM_S390_INT_VIRTIO, "virtio interrupt"}, \
+ {KVM_S390_INT_SERVICE, "sclp interrupt"}, \
+ {KVM_S390_INT_EMERGENCY, "sigp emergency"}, \
+ {KVM_S390_INT_EXTERNAL_CALL, "sigp ext call"}
+
+TRACE_EVENT(kvm_s390_inject_vm,
+ TP_PROTO(__u64 type, __u32 parm, __u64 parm64, int who),
+ TP_ARGS(type, parm, parm64, who),
+
+ TP_STRUCT__entry(
+ __field(__u32, inttype)
+ __field(__u32, parm)
+ __field(__u64, parm64)
+ __field(int, who)
+ ),
+
+ TP_fast_assign(
+ __entry->inttype = type & 0x00000000ffffffff;
+ __entry->parm = parm;
+ __entry->parm64 = parm64;
+ __entry->who = who;
+ ),
+
+ TP_printk("inject%s: type:%x (%s) parm:%x parm64:%llx",
+ (__entry->who == 1) ? " (from kernel)" :
+ (__entry->who == 2) ? " (from user)" : "",
+ __entry->inttype,
+ __print_symbolic(__entry->inttype, kvm_s390_int_type),
+ __entry->parm, __entry->parm64)
+ );
+
+TRACE_EVENT(kvm_s390_inject_vcpu,
+ TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64, \
+ int who),
+ TP_ARGS(id, type, parm, parm64, who),
+
+ TP_STRUCT__entry(
+ __field(int, id)
+ __field(__u32, inttype)
+ __field(__u32, parm)
+ __field(__u64, parm64)
+ __field(int, who)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ __entry->inttype = type & 0x00000000ffffffff;
+ __entry->parm = parm;
+ __entry->parm64 = parm64;
+ __entry->who = who;
+ ),
+
+ TP_printk("inject%s (vcpu %d): type:%x (%s) parm:%x parm64:%llx",
+ (__entry->who == 1) ? " (from kernel)" :
+ (__entry->who == 2) ? " (from user)" : "",
+ __entry->id, __entry->inttype,
+ __print_symbolic(__entry->inttype, kvm_s390_int_type),
+ __entry->parm, __entry->parm64)
+ );
+
+/*
+ * Trace point for the actual delivery of interrupts.
+ */
+TRACE_EVENT(kvm_s390_deliver_interrupt,
+ TP_PROTO(unsigned int id, __u64 type, __u32 data0, __u64 data1),
+ TP_ARGS(id, type, data0, data1),
+
+ TP_STRUCT__entry(
+ __field(int, id)
+ __field(__u32, inttype)
+ __field(__u32, data0)
+ __field(__u64, data1)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ __entry->inttype = type & 0x00000000ffffffff;
+ __entry->data0 = data0;
+ __entry->data1 = data1;
+ ),
+
+ TP_printk("deliver interrupt (vcpu %d): type:%x (%s) " \
+ "data:%08x %016llx",
+ __entry->id, __entry->inttype,
+ __print_symbolic(__entry->inttype, kvm_s390_int_type),
+ __entry->data0, __entry->data1)
+ );
+
+/*
+ * Trace point for resets that may be requested from userspace.
+ */
+TRACE_EVENT(kvm_s390_request_resets,
+ TP_PROTO(__u64 resets),
+ TP_ARGS(resets),
+
+ TP_STRUCT__entry(
+ __field(__u64, resets)
+ ),
+
+ TP_fast_assign(
+ __entry->resets = resets;
+ ),
+
+ TP_printk("requesting userspace resets %llx",
+ __entry->resets)
+ );
+
+/*
+ * Trace point for a vcpu's stop requests.
+ */
+TRACE_EVENT(kvm_s390_stop_request,
+ TP_PROTO(unsigned int action_bits),
+ TP_ARGS(action_bits),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, action_bits)
+ ),
+
+ TP_fast_assign(
+ __entry->action_bits = action_bits;
+ ),
+
+ TP_printk("stop request, action_bits = %08x",
+ __entry->action_bits)
+ );
+
+
+#endif /* _TRACE_KVMS390_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h
new file mode 100644
index 000000000000..2b29e62351d3
--- /dev/null
+++ b/arch/s390/kvm/trace.h
@@ -0,0 +1,341 @@
+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_H
+
+#include <linux/tracepoint.h>
+#include <asm/sigp.h>
+#include <asm/debug.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/*
+ * Helpers for vcpu-specific tracepoints containing the same information
+ * as s390dbf VCPU_EVENTs.
+ */
+#define VCPU_PROTO_COMMON struct kvm_vcpu *vcpu
+#define VCPU_ARGS_COMMON vcpu
+#define VCPU_FIELD_COMMON __field(int, id) \
+ __field(unsigned long, pswmask) \
+ __field(unsigned long, pswaddr)
+#define VCPU_ASSIGN_COMMON do { \
+ __entry->id = vcpu->vcpu_id; \
+ __entry->pswmask = vcpu->arch.sie_block->gpsw.mask; \
+ __entry->pswaddr = vcpu->arch.sie_block->gpsw.addr; \
+ } while (0);
+#define VCPU_TP_PRINTK(p_str, p_args...) \
+ TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id, \
+ __entry->pswmask, __entry->pswaddr, p_args)
+
+/*
+ * Tracepoints for SIE entry and exit.
+ */
+TRACE_EVENT(kvm_s390_sie_enter,
+ TP_PROTO(VCPU_PROTO_COMMON, int cpuflags),
+ TP_ARGS(VCPU_ARGS_COMMON, cpuflags),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(int, cpuflags)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->cpuflags = cpuflags;
+ ),
+
+ VCPU_TP_PRINTK("entering sie flags %x", __entry->cpuflags)
+ );
+
+TRACE_EVENT(kvm_s390_sie_fault,
+ TP_PROTO(VCPU_PROTO_COMMON),
+ TP_ARGS(VCPU_ARGS_COMMON),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ ),
+
+ VCPU_TP_PRINTK("%s", "fault in sie instruction")
+ );
+
+#define sie_intercept_code \
+ {0x04, "Instruction"}, \
+ {0x08, "Program interruption"}, \
+ {0x0C, "Instruction and program interuption"}, \
+ {0x10, "External request"}, \
+ {0x14, "External interruption"}, \
+ {0x18, "I/O request"}, \
+ {0x1C, "Wait state"}, \
+ {0x20, "Validity"}, \
+ {0x28, "Stop request"}
+
+TRACE_EVENT(kvm_s390_sie_exit,
+ TP_PROTO(VCPU_PROTO_COMMON, u8 icptcode),
+ TP_ARGS(VCPU_ARGS_COMMON, icptcode),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(u8, icptcode)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->icptcode = icptcode;
+ ),
+
+ VCPU_TP_PRINTK("exit sie icptcode %d (%s)", __entry->icptcode,
+ __print_symbolic(__entry->icptcode,
+ sie_intercept_code))
+ );
+
+/*
+ * Trace point for intercepted instructions.
+ */
+TRACE_EVENT(kvm_s390_intercept_instruction,
+ TP_PROTO(VCPU_PROTO_COMMON, __u16 ipa, __u32 ipb),
+ TP_ARGS(VCPU_ARGS_COMMON, ipa, ipb),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(__u64, instruction)
+ __field(char, insn[8])
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->instruction = ((__u64)ipa << 48) |
+ ((__u64)ipb << 16);
+ ),
+
+ VCPU_TP_PRINTK("intercepted instruction %016llx (%s)",
+ __entry->instruction,
+ insn_to_mnemonic((unsigned char *)
+ &__entry->instruction,
+ __entry->insn) ?
+ "unknown" : __entry->insn)
+ );
+
+/*
+ * Trace point for intercepted program interruptions.
+ */
+TRACE_EVENT(kvm_s390_intercept_prog,
+ TP_PROTO(VCPU_PROTO_COMMON, __u16 code),
+ TP_ARGS(VCPU_ARGS_COMMON, code),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(__u16, code)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->code = code;
+ ),
+
+ VCPU_TP_PRINTK("intercepted program interruption %04x",
+ __entry->code)
+ );
+
+/*
+ * Trace point for validity intercepts.
+ */
+TRACE_EVENT(kvm_s390_intercept_validity,
+ TP_PROTO(VCPU_PROTO_COMMON, __u16 viwhy),
+ TP_ARGS(VCPU_ARGS_COMMON, viwhy),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(__u16, viwhy)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->viwhy = viwhy;
+ ),
+
+ VCPU_TP_PRINTK("got validity intercept %04x", __entry->viwhy)
+ );
+
+/*
+ * Trace points for instructions that are of special interest.
+ */
+
+#define sigp_order_codes \
+ {SIGP_SENSE, "sense"}, \
+ {SIGP_EXTERNAL_CALL, "external call"}, \
+ {SIGP_EMERGENCY_SIGNAL, "emergency signal"}, \
+ {SIGP_STOP, "stop"}, \
+ {SIGP_STOP_AND_STORE_STATUS, "stop and store status"}, \
+ {SIGP_SET_ARCHITECTURE, "set architecture"}, \
+ {SIGP_SET_PREFIX, "set prefix"}, \
+ {SIGP_SENSE_RUNNING, "sense running"}, \
+ {SIGP_RESTART, "restart"}
+
+TRACE_EVENT(kvm_s390_handle_sigp,
+ TP_PROTO(VCPU_PROTO_COMMON, __u8 order_code, __u16 cpu_addr, \
+ __u32 parameter),
+ TP_ARGS(VCPU_ARGS_COMMON, order_code, cpu_addr, parameter),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(__u8, order_code)
+ __field(__u16, cpu_addr)
+ __field(__u32, parameter)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->order_code = order_code;
+ __entry->cpu_addr = cpu_addr;
+ __entry->parameter = parameter;
+ ),
+
+ VCPU_TP_PRINTK("handle sigp order %02x (%s), cpu address %04x, " \
+ "parameter %08x", __entry->order_code,
+ __print_symbolic(__entry->order_code,
+ sigp_order_codes),
+ __entry->cpu_addr, __entry->parameter)
+ );
+
+#define diagnose_codes \
+ {0x10, "release pages"}, \
+ {0x44, "time slice end"}, \
+ {0x308, "ipl functions"}, \
+ {0x500, "kvm hypercall"}, \
+ {0x501, "kvm breakpoint"}
+
+TRACE_EVENT(kvm_s390_handle_diag,
+ TP_PROTO(VCPU_PROTO_COMMON, __u16 code),
+ TP_ARGS(VCPU_ARGS_COMMON, code),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(__u16, code)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->code = code;
+ ),
+
+ VCPU_TP_PRINTK("handle diagnose call %04x (%s)", __entry->code,
+ __print_symbolic(__entry->code, diagnose_codes))
+ );
+
+TRACE_EVENT(kvm_s390_handle_lctl,
+ TP_PROTO(VCPU_PROTO_COMMON, int g, int reg1, int reg3, u64 addr),
+ TP_ARGS(VCPU_ARGS_COMMON, g, reg1, reg3, addr),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(int, g)
+ __field(int, reg1)
+ __field(int, reg3)
+ __field(u64, addr)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->g = g;
+ __entry->reg1 = reg1;
+ __entry->reg3 = reg3;
+ __entry->addr = addr;
+ ),
+
+ VCPU_TP_PRINTK("%s: loading cr %x-%x from %016llx",
+ __entry->g ? "lctlg" : "lctl",
+ __entry->reg1, __entry->reg3, __entry->addr)
+ );
+
+TRACE_EVENT(kvm_s390_handle_prefix,
+ TP_PROTO(VCPU_PROTO_COMMON, int set, u32 address),
+ TP_ARGS(VCPU_ARGS_COMMON, set, address),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(int, set)
+ __field(u32, address)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->set = set;
+ __entry->address = address;
+ ),
+
+ VCPU_TP_PRINTK("%s prefix to %08x",
+ __entry->set ? "setting" : "storing",
+ __entry->address)
+ );
+
+TRACE_EVENT(kvm_s390_handle_stap,
+ TP_PROTO(VCPU_PROTO_COMMON, u64 address),
+ TP_ARGS(VCPU_ARGS_COMMON, address),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(u64, address)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->address = address;
+ ),
+
+ VCPU_TP_PRINTK("storing cpu address to %016llx",
+ __entry->address)
+ );
+
+TRACE_EVENT(kvm_s390_handle_stfl,
+ TP_PROTO(VCPU_PROTO_COMMON, unsigned int facility_list),
+ TP_ARGS(VCPU_ARGS_COMMON, facility_list),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(unsigned int, facility_list)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->facility_list = facility_list;
+ ),
+
+ VCPU_TP_PRINTK("store facility list value %08x",
+ __entry->facility_list)
+ );
+
+TRACE_EVENT(kvm_s390_handle_stsi,
+ TP_PROTO(VCPU_PROTO_COMMON, int fc, int sel1, int sel2, u64 addr),
+ TP_ARGS(VCPU_ARGS_COMMON, fc, sel1, sel2, addr),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(int, fc)
+ __field(int, sel1)
+ __field(int, sel2)
+ __field(u64, addr)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->fc = fc;
+ __entry->sel1 = sel1;
+ __entry->sel2 = sel2;
+ __entry->addr = addr;
+ ),
+
+ VCPU_TP_PRINTK("STSI %d.%d.%d information stored to %016llx",
+ __entry->fc, __entry->sel1, __entry->sel2,
+ __entry->addr)
+ );
+
+#endif /* _TRACE_KVM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 09155d64cf7e..48e713188469 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -500,11 +500,11 @@ struct kvm_vcpu_arch {
};
struct kvm_lpage_info {
- unsigned long rmap_pde;
int write_count;
};
struct kvm_arch_memory_slot {
+ unsigned long *rmap_pde[KVM_NR_PAGE_SIZES - 1];
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
};
@@ -957,6 +957,7 @@ extern bool kvm_rebooting;
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_age_hva(struct kvm *kvm, unsigned long hva);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index a28f338843ea..45c044f0fff7 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -37,6 +37,7 @@ config KVM
select TASK_DELAY_ACCT
select PERF_EVENTS
select HAVE_KVM_MSI
+ select HAVE_KVM_CPU_RELAX_INTERCEPT
---help---
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 4f579e8dcacf..04d30401c5cb 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,7 +12,7 @@ kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o)
kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o)
kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
- i8254.o timer.o cpuid.o pmu.o
+ i8254.o cpuid.o pmu.o
kvm-intel-y += vmx.o
kvm-amd-y += svm.o
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0595f1397b7c..b496da684bd6 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -316,7 +316,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
}
case 7: {
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
- /* Mask ebx against host capbability word 9 */
+ /* Mask ebx against host capability word 9 */
if (index == 0) {
entry->ebx &= kvm_supported_word9_x86_features;
cpuid_mask(&entry->ebx, 9);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 97d9a9914ba8..10f0136f50c1 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -642,7 +642,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
goto bad;
} else {
- /* exapand-down segment */
+ /* expand-down segment */
if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim)
goto bad;
lim = desc.d ? 0xffffffff : 0xffff;
@@ -1166,24 +1166,21 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt,
int rc;
struct read_cache *mc = &ctxt->mem_read;
- while (size) {
- int n = min(size, 8u);
- size -= n;
- if (mc->pos < mc->end)
- goto read_cached;
+ if (mc->pos < mc->end)
+ goto read_cached;
- rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, n,
- &ctxt->exception);
- if (rc != X86EMUL_CONTINUE)
- return rc;
- mc->end += n;
+ WARN_ON((mc->end + size) >= sizeof(mc->data));
- read_cached:
- memcpy(dest, mc->data + mc->pos, n);
- mc->pos += n;
- dest += n;
- addr += n;
- }
+ rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
+ &ctxt->exception);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ mc->end += size;
+
+read_cached:
+ memcpy(dest, mc->data + mc->pos, size);
+ mc->pos += size;
return X86EMUL_CONTINUE;
}
@@ -1383,7 +1380,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
err_code = selector & 0xfffc;
err_vec = GP_VECTOR;
- /* can't load system descriptor into segment selecor */
+ /* can't load system descriptor into segment selector */
if (seg <= VCPU_SREG_GS && !seg_desc.s)
goto exception;
@@ -2038,12 +2035,6 @@ static void
setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
struct desc_struct *cs, struct desc_struct *ss)
{
- u16 selector;
-
- memset(cs, 0, sizeof(struct desc_struct));
- ctxt->ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS);
- memset(ss, 0, sizeof(struct desc_struct));
-
cs->l = 0; /* will be adjusted later */
set_desc_base(cs, 0); /* flat segment */
cs->g = 1; /* 4kb granularity */
@@ -2053,6 +2044,7 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
cs->dpl = 0; /* will be adjusted later */
cs->p = 1;
cs->d = 1;
+ cs->avl = 0;
set_desc_base(ss, 0); /* flat segment */
set_desc_limit(ss, 0xfffff); /* 4GB limit */
@@ -2062,6 +2054,8 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
ss->d = 1; /* 32bit stack segment */
ss->dpl = 0;
ss->p = 1;
+ ss->l = 0;
+ ss->avl = 0;
}
static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
@@ -2398,7 +2392,7 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
/*
- * Now load segment descriptors. If fault happenes at this stage
+ * Now load segment descriptors. If fault happens at this stage
* it is handled in a context of new task
*/
ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR);
@@ -2640,7 +2634,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
*
* 1. jmp/call/int to task gate: Check against DPL of the task gate
* 2. Exception/IRQ/iret: No check is performed
- * 3. jmp/call to TSS: Check agains DPL of the TSS
+ * 3. jmp/call to TSS: Check against DPL of the TSS
*/
if (reason == TASK_SWITCH_GATE) {
if (idt_index != -1) {
@@ -2681,7 +2675,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
/* set back link to prev task only if NT bit is set in eflags
- note that old_tss_sel is not used afetr this point */
+ note that old_tss_sel is not used after this point */
if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
old_tss_sel = 0xffff;
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index adba28f88d1a..11300d2fa714 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -108,7 +108,7 @@ static s64 __kpit_elapsed(struct kvm *kvm)
ktime_t remaining;
struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
- if (!ps->pit_timer.period)
+ if (!ps->period)
return 0;
/*
@@ -120,9 +120,9 @@ static s64 __kpit_elapsed(struct kvm *kvm)
* itself with the initial count and continues counting
* from there.
*/
- remaining = hrtimer_get_remaining(&ps->pit_timer.timer);
- elapsed = ps->pit_timer.period - ktime_to_ns(remaining);
- elapsed = mod_64(elapsed, ps->pit_timer.period);
+ remaining = hrtimer_get_remaining(&ps->timer);
+ elapsed = ps->period - ktime_to_ns(remaining);
+ elapsed = mod_64(elapsed, ps->period);
return elapsed;
}
@@ -238,12 +238,12 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
int value;
spin_lock(&ps->inject_lock);
- value = atomic_dec_return(&ps->pit_timer.pending);
+ value = atomic_dec_return(&ps->pending);
if (value < 0)
/* spurious acks can be generated if, for example, the
* PIC is being reset. Handle it gracefully here
*/
- atomic_inc(&ps->pit_timer.pending);
+ atomic_inc(&ps->pending);
else if (value > 0)
/* in this case, we had multiple outstanding pit interrupts
* that we needed to inject. Reinject
@@ -261,28 +261,17 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
if (!kvm_vcpu_is_bsp(vcpu) || !pit)
return;
- timer = &pit->pit_state.pit_timer.timer;
+ timer = &pit->pit_state.timer;
if (hrtimer_cancel(timer))
hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
}
static void destroy_pit_timer(struct kvm_pit *pit)
{
- hrtimer_cancel(&pit->pit_state.pit_timer.timer);
+ hrtimer_cancel(&pit->pit_state.timer);
flush_kthread_work(&pit->expired);
}
-static bool kpit_is_periodic(struct kvm_timer *ktimer)
-{
- struct kvm_kpit_state *ps = container_of(ktimer, struct kvm_kpit_state,
- pit_timer);
- return ps->is_periodic;
-}
-
-static struct kvm_timer_ops kpit_ops = {
- .is_periodic = kpit_is_periodic,
-};
-
static void pit_do_work(struct kthread_work *work)
{
struct kvm_pit *pit = container_of(work, struct kvm_pit, expired);
@@ -322,16 +311,16 @@ static void pit_do_work(struct kthread_work *work)
static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
{
- struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
- struct kvm_pit *pt = ktimer->kvm->arch.vpit;
+ struct kvm_kpit_state *ps = container_of(data, struct kvm_kpit_state, timer);
+ struct kvm_pit *pt = ps->kvm->arch.vpit;
- if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
- atomic_inc(&ktimer->pending);
+ if (ps->reinject || !atomic_read(&ps->pending)) {
+ atomic_inc(&ps->pending);
queue_kthread_work(&pt->worker, &pt->expired);
}
- if (ktimer->t_ops->is_periodic(ktimer)) {
- hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
+ if (ps->is_periodic) {
+ hrtimer_add_expires_ns(&ps->timer, ps->period);
return HRTIMER_RESTART;
} else
return HRTIMER_NORESTART;
@@ -340,7 +329,6 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
{
struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
- struct kvm_timer *pt = &ps->pit_timer;
s64 interval;
if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
@@ -351,19 +339,18 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
pr_debug("create pit timer, interval is %llu nsec\n", interval);
/* TODO The new value only affected after the retriggered */
- hrtimer_cancel(&pt->timer);
+ hrtimer_cancel(&ps->timer);
flush_kthread_work(&ps->pit->expired);
- pt->period = interval;
+ ps->period = interval;
ps->is_periodic = is_period;
- pt->timer.function = pit_timer_fn;
- pt->t_ops = &kpit_ops;
- pt->kvm = ps->pit->kvm;
+ ps->timer.function = pit_timer_fn;
+ ps->kvm = ps->pit->kvm;
- atomic_set(&pt->pending, 0);
+ atomic_set(&ps->pending, 0);
ps->irq_ack = 1;
- hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
+ hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval),
HRTIMER_MODE_ABS);
}
@@ -639,7 +626,7 @@ void kvm_pit_reset(struct kvm_pit *pit)
}
mutex_unlock(&pit->pit_state.lock);
- atomic_set(&pit->pit_state.pit_timer.pending, 0);
+ atomic_set(&pit->pit_state.pending, 0);
pit->pit_state.irq_ack = 1;
}
@@ -648,7 +635,7 @@ static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
if (!mask) {
- atomic_set(&pit->pit_state.pit_timer.pending, 0);
+ atomic_set(&pit->pit_state.pending, 0);
pit->pit_state.irq_ack = 1;
}
}
@@ -706,12 +693,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
pit_state = &pit->pit_state;
pit_state->pit = pit;
- hrtimer_init(&pit_state->pit_timer.timer,
- CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_init(&pit_state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
pit_state->irq_ack_notifier.gsi = 0;
pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
- pit_state->pit_timer.reinject = true;
+ pit_state->reinject = true;
mutex_unlock(&pit->pit_state.lock);
kvm_pit_reset(pit);
@@ -761,7 +747,7 @@ void kvm_free_pit(struct kvm *kvm)
kvm_unregister_irq_ack_notifier(kvm,
&kvm->arch.vpit->pit_state.irq_ack_notifier);
mutex_lock(&kvm->arch.vpit->pit_state.lock);
- timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
+ timer = &kvm->arch.vpit->pit_state.timer;
hrtimer_cancel(timer);
flush_kthread_work(&kvm->arch.vpit->expired);
kthread_stop(kvm->arch.vpit->worker_task);
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index fdf40425ea1d..dd1b16b611b0 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -24,8 +24,12 @@ struct kvm_kpit_channel_state {
struct kvm_kpit_state {
struct kvm_kpit_channel_state channels[3];
u32 flags;
- struct kvm_timer pit_timer;
bool is_periodic;
+ s64 period; /* unit: ns */
+ struct hrtimer timer;
+ atomic_t pending; /* accumulated triggered timers */
+ bool reinject;
+ struct kvm *kvm;
u32 speaker_data_on;
struct mutex lock;
struct kvm_pit *pit;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 2086f2bfba33..2d03568e9498 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -70,7 +70,7 @@ struct kvm_pic {
struct kvm_io_device dev_slave;
struct kvm_io_device dev_eclr;
void (*ack_notifier)(void *opaque, int irq);
- unsigned long irq_states[16];
+ unsigned long irq_states[PIC_NUM_PINS];
};
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h
deleted file mode 100644
index 497dbaa366d4..000000000000
--- a/arch/x86/kvm/kvm_timer.h
+++ /dev/null
@@ -1,18 +0,0 @@
-
-struct kvm_timer {
- struct hrtimer timer;
- s64 period; /* unit: ns */
- u32 timer_mode_mask;
- u64 tscdeadline;
- atomic_t pending; /* accumulated triggered timers */
- bool reinject;
- struct kvm_timer_ops *t_ops;
- struct kvm *kvm;
- struct kvm_vcpu *vcpu;
-};
-
-struct kvm_timer_ops {
- bool (*is_periodic)(struct kvm_timer *);
-};
-
-enum hrtimer_restart kvm_timer_fn(struct hrtimer *data);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ce878788a39f..0cd431c85d38 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -696,12 +696,14 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
val = apic_get_tmcct(apic);
break;
-
+ case APIC_PROCPRI:
+ apic_update_ppr(apic);
+ val = apic_get_reg(apic, offset);
+ break;
case APIC_TASKPRI:
report_tpr_access(apic, false);
/* fall thru */
default:
- apic_update_ppr(apic);
val = apic_get_reg(apic, offset);
break;
}
@@ -719,7 +721,7 @@ static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
{
unsigned char alignment = offset & 0xf;
u32 result;
- /* this bitmask has a bit cleared for each reserver register */
+ /* this bitmask has a bit cleared for each reserved register */
static const u64 rmask = 0x43ff01ffffffe70cULL;
if ((alignment + len) > 4) {
@@ -792,7 +794,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
atomic_set(&apic->lapic_timer.pending, 0);
if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
- /* lapic timer in oneshot or peroidic mode */
+ /* lapic timer in oneshot or periodic mode */
now = apic->lapic_timer.timer.base->get_time();
apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT)
* APIC_BUS_CYCLE_NS * apic->divide_count;
@@ -1212,10 +1214,8 @@ int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
*----------------------------------------------------------------------
*/
-static bool lapic_is_periodic(struct kvm_timer *ktimer)
+static bool lapic_is_periodic(struct kvm_lapic *apic)
{
- struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic,
- lapic_timer);
return apic_lvtt_period(apic);
}
@@ -1251,15 +1251,40 @@ void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
kvm_apic_local_deliver(apic, APIC_LVT0);
}
-static struct kvm_timer_ops lapic_timer_ops = {
- .is_periodic = lapic_is_periodic,
-};
-
static const struct kvm_io_device_ops apic_mmio_ops = {
.read = apic_mmio_read,
.write = apic_mmio_write,
};
+static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
+{
+ struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
+ struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
+ struct kvm_vcpu *vcpu = apic->vcpu;
+ wait_queue_head_t *q = &vcpu->wq;
+
+ /*
+ * There is a race window between reading and incrementing, but we do
+ * not care about potentially losing timer events in the !reinject
+ * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked
+ * in vcpu_enter_guest.
+ */
+ if (!atomic_read(&ktimer->pending)) {
+ atomic_inc(&ktimer->pending);
+ /* FIXME: this code should not know anything about vcpus */
+ kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+ }
+
+ if (waitqueue_active(q))
+ wake_up_interruptible(q);
+
+ if (lapic_is_periodic(apic)) {
+ hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
+ return HRTIMER_RESTART;
+ } else
+ return HRTIMER_NORESTART;
+}
+
int kvm_create_lapic(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic;
@@ -1283,10 +1308,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
HRTIMER_MODE_ABS);
- apic->lapic_timer.timer.function = kvm_timer_fn;
- apic->lapic_timer.t_ops = &lapic_timer_ops;
- apic->lapic_timer.kvm = vcpu->kvm;
- apic->lapic_timer.vcpu = vcpu;
+ apic->lapic_timer.timer.function = apic_timer_fn;
apic->base_address = APIC_DEFAULT_PHYS_BASE;
vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 4af5405ae1e2..166766fffd9f 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -2,10 +2,17 @@
#define __KVM_X86_LAPIC_H
#include "iodev.h"
-#include "kvm_timer.h"
#include <linux/kvm_host.h>
+struct kvm_timer {
+ struct hrtimer timer;
+ s64 period; /* unit: ns */
+ u32 timer_mode_mask;
+ u64 tscdeadline;
+ atomic_t pending; /* accumulated triggered timers */
+};
+
struct kvm_lapic {
unsigned long base_address;
struct kvm_io_device dev;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 01ca00423938..a9a20528e700 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -556,6 +556,14 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
return 0;
pfn = spte_to_pfn(old_spte);
+
+ /*
+ * KVM does not hold the refcount of the page used by
+ * kvm mmu, before reclaiming the page, we should
+ * unmap it from mmu first.
+ */
+ WARN_ON(!kvm_is_mmio_pfn(pfn) && !page_count(pfn_to_page(pfn)));
+
if (!shadow_accessed_mask || old_spte & shadow_accessed_mask)
kvm_set_pfn_accessed(pfn);
if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask))
@@ -960,13 +968,13 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
struct kvm_memory_slot *slot)
{
- struct kvm_lpage_info *linfo;
+ unsigned long idx;
if (likely(level == PT_PAGE_TABLE_LEVEL))
return &slot->rmap[gfn - slot->base_gfn];
- linfo = lpage_info_slot(gfn, slot, level);
- return &linfo->rmap_pde;
+ idx = gfn_to_index(gfn, slot->base_gfn, level);
+ return &slot->arch.rmap_pde[level - PT_DIRECTORY_LEVEL][idx];
}
/*
@@ -1200,7 +1208,7 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
}
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
- unsigned long data)
+ struct kvm_memory_slot *slot, unsigned long data)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1218,7 +1226,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
}
static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
- unsigned long data)
+ struct kvm_memory_slot *slot, unsigned long data)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1259,43 +1267,67 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
return 0;
}
-static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
- unsigned long data,
- int (*handler)(struct kvm *kvm, unsigned long *rmapp,
- unsigned long data))
+static int kvm_handle_hva_range(struct kvm *kvm,
+ unsigned long start,
+ unsigned long end,
+ unsigned long data,
+ int (*handler)(struct kvm *kvm,
+ unsigned long *rmapp,
+ struct kvm_memory_slot *slot,
+ unsigned long data))
{
int j;
- int ret;
- int retval = 0;
+ int ret = 0;
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots) {
- unsigned long start = memslot->userspace_addr;
- unsigned long end;
+ unsigned long hva_start, hva_end;
+ gfn_t gfn_start, gfn_end;
- end = start + (memslot->npages << PAGE_SHIFT);
- if (hva >= start && hva < end) {
- gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
- gfn_t gfn = memslot->base_gfn + gfn_offset;
+ hva_start = max(start, memslot->userspace_addr);
+ hva_end = min(end, memslot->userspace_addr +
+ (memslot->npages << PAGE_SHIFT));
+ if (hva_start >= hva_end)
+ continue;
+ /*
+ * {gfn(page) | page intersects with [hva_start, hva_end)} =
+ * {gfn_start, gfn_start+1, ..., gfn_end-1}.
+ */
+ gfn_start = hva_to_gfn_memslot(hva_start, memslot);
+ gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
- ret = handler(kvm, &memslot->rmap[gfn_offset], data);
+ for (j = PT_PAGE_TABLE_LEVEL;
+ j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
+ unsigned long idx, idx_end;
+ unsigned long *rmapp;
- for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
- struct kvm_lpage_info *linfo;
+ /*
+ * {idx(page_j) | page_j intersects with
+ * [hva_start, hva_end)} = {idx, idx+1, ..., idx_end}.
+ */
+ idx = gfn_to_index(gfn_start, memslot->base_gfn, j);
+ idx_end = gfn_to_index(gfn_end - 1, memslot->base_gfn, j);
- linfo = lpage_info_slot(gfn, memslot,
- PT_DIRECTORY_LEVEL + j);
- ret |= handler(kvm, &linfo->rmap_pde, data);
- }
- trace_kvm_age_page(hva, memslot, ret);
- retval |= ret;
+ rmapp = __gfn_to_rmap(gfn_start, j, memslot);
+
+ for (; idx <= idx_end; ++idx)
+ ret |= handler(kvm, rmapp++, memslot, data);
}
}
- return retval;
+ return ret;
+}
+
+static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
+ unsigned long data,
+ int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+ struct kvm_memory_slot *slot,
+ unsigned long data))
+{
+ return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
}
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
@@ -1303,13 +1335,18 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
}
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+ return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
+}
+
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
{
kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
}
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
- unsigned long data)
+ struct kvm_memory_slot *slot, unsigned long data)
{
u64 *sptep;
struct rmap_iterator uninitialized_var(iter);
@@ -1323,8 +1360,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
* This has some overhead, but not as much as the cost of swapping
* out actively used pages or breaking up actively used hugepages.
*/
- if (!shadow_accessed_mask)
- return kvm_unmap_rmapp(kvm, rmapp, data);
+ if (!shadow_accessed_mask) {
+ young = kvm_unmap_rmapp(kvm, rmapp, slot, data);
+ goto out;
+ }
for (sptep = rmap_get_first(*rmapp, &iter); sptep;
sptep = rmap_get_next(&iter)) {
@@ -1336,12 +1375,14 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
(unsigned long *)sptep);
}
}
-
+out:
+ /* @data has hva passed to kvm_age_hva(). */
+ trace_kvm_age_page(data, slot, young);
return young;
}
static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
- unsigned long data)
+ struct kvm_memory_slot *slot, unsigned long data)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1379,13 +1420,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
- kvm_unmap_rmapp(vcpu->kvm, rmapp, 0);
+ kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0);
kvm_flush_remote_tlbs(vcpu->kvm);
}
int kvm_age_hva(struct kvm *kvm, unsigned long hva)
{
- return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
+ return kvm_handle_hva(kvm, hva, hva, kvm_age_rmapp);
}
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
@@ -2472,14 +2513,12 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
unsigned long hva;
slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log);
- if (!slot) {
- get_page(fault_page);
- return page_to_pfn(fault_page);
- }
+ if (!slot)
+ return get_fault_pfn();
hva = gfn_to_hva_memslot(slot, gfn);
- return hva_to_pfn_atomic(vcpu->kvm, hva);
+ return hva_to_pfn_atomic(hva);
}
static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
@@ -3236,7 +3275,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
if (!async)
return false; /* *pfn has correct page already */
- put_page(pfn_to_page(*pfn));
+ kvm_release_pfn_clean(*pfn);
if (!prefault && can_do_async_pf(vcpu)) {
trace_kvm_try_async_get_page(gva, gfn);
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 9b7ec1150ab0..cfc258a6bf97 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -1,5 +1,5 @@
/*
- * Kernel-based Virtual Machine -- Performane Monitoring Unit support
+ * Kernel-based Virtual Machine -- Performance Monitoring Unit support
*
* Copyright 2011 Red Hat, Inc. and/or its affiliates.
*
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index baead950d6c8..687d0c30e559 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2063,7 +2063,7 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm)
if (svm->nested.intercept & 1ULL) {
/*
* The #vmexit can't be emulated here directly because this
- * code path runs with irqs and preemtion disabled. A
+ * code path runs with irqs and preemption disabled. A
* #vmexit emulation might sleep. Only signal request for
* the #vmexit here.
*/
@@ -2409,7 +2409,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
{
/*
* This function merges the msr permission bitmaps of kvm and the
- * nested vmcb. It is omptimized in that it only merges the parts where
+ * nested vmcb. It is optimized in that it only merges the parts where
* the kvm msr permission bitmap may contain zero bits
*/
int i;
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c
deleted file mode 100644
index 6b85cc647f34..000000000000
--- a/arch/x86/kvm/timer.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Kernel-based Virtual Machine driver for Linux
- *
- * This module enables machines with Intel VT-x extensions to run virtual
- * machines without emulation or binary translation.
- *
- * timer support
- *
- * Copyright 2010 Red Hat, Inc. and/or its affiliates.
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- */
-
-#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-#include <linux/hrtimer.h>
-#include <linux/atomic.h>
-#include "kvm_timer.h"
-
-enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
-{
- struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
- struct kvm_vcpu *vcpu = ktimer->vcpu;
- wait_queue_head_t *q = &vcpu->wq;
-
- /*
- * There is a race window between reading and incrementing, but we do
- * not care about potentially losing timer events in the !reinject
- * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked
- * in vcpu_enter_guest.
- */
- if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
- atomic_inc(&ktimer->pending);
- /* FIXME: this code should not know anything about vcpus */
- kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
- }
-
- if (waitqueue_active(q))
- wake_up_interruptible(q);
-
- if (ktimer->t_ops->is_periodic(ktimer)) {
- hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
- return HRTIMER_RESTART;
- } else
- return HRTIMER_NORESTART;
-}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c00f03de1b79..d6e4cbc42b8e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1343,7 +1343,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
guest_efer = vmx->vcpu.arch.efer;
/*
- * NX is emulated; LMA and LME handled by hardware; SCE meaninless
+ * NX is emulated; LMA and LME handled by hardware; SCE meaningless
* outside long mode
*/
ignore_bits = EFER_NX | EFER_SCE;
@@ -3254,7 +3254,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
* qemu binaries.
* IA32 arch specifies that at the time of processor reset the
* "Accessed" bit in the AR field of segment registers is 1. And qemu
- * is setting it to 0 in the usedland code. This causes invalid guest
+ * is setting it to 0 in the userland code. This causes invalid guest
* state vmexit when "unrestricted guest" mode is turned on.
* Fix for this setup issue in cpu_reset is being pushed in the qemu
* tree. Newer qemu binaries with that qemu fix would not need this
@@ -4439,7 +4439,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
hypercall[2] = 0xc1;
}
-/* called to set cr0 as approriate for a mov-to-cr0 exit. */
+/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
{
if (to_vmx(vcpu)->nested.vmxon &&
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 42bce48f6928..3ca90d74711d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -806,7 +806,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
* kvm-specific. Those are put in the beginning of the list.
*/
-#define KVM_SAVE_MSRS_BEGIN 9
+#define KVM_SAVE_MSRS_BEGIN 10
static u32 msrs_to_save[] = {
MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
@@ -1097,7 +1097,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
* For each generation, we track the original measured
* nanosecond time, offset, and write, so if TSCs are in
* sync, we can match exact offset, and if not, we can match
- * exact software computaion in compute_guest_tsc()
+ * exact software computation in compute_guest_tsc()
*
* These values are tracked in kvm->arch.cur_xxx variables.
*/
@@ -1504,7 +1504,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
{
gpa_t gpa = data & ~0x3f;
- /* Bits 2:5 are resrved, Should be zero */
+ /* Bits 2:5 are reserved, Should be zero */
if (data & 0x3c)
return 1;
@@ -1727,7 +1727,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
* Ignore all writes to this no longer documented MSR.
* Writes are only relevant for old K7 processors,
* all pre-dating SVM, but a recommended workaround from
- * AMD for these chips. It is possible to speicify the
+ * AMD for these chips. It is possible to specify the
* affected processor models on the command line, hence
* the need to ignore the workaround.
*/
@@ -2636,7 +2636,6 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
if (!vcpu->arch.time_page)
return -EINVAL;
src->flags |= PVCLOCK_GUEST_STOPPED;
- mark_page_dirty(vcpu->kvm, vcpu->arch.time >> PAGE_SHIFT);
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
return 0;
}
@@ -3087,7 +3086,7 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
if (!kvm->arch.vpit)
return -ENXIO;
mutex_lock(&kvm->arch.vpit->pit_state.lock);
- kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
+ kvm->arch.vpit->pit_state.reinject = control->pit_reinject;
mutex_unlock(&kvm->arch.vpit->pit_state.lock);
return 0;
}
@@ -3170,6 +3169,16 @@ out:
return r;
}
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
+{
+ if (!irqchip_in_kernel(kvm))
+ return -ENXIO;
+
+ irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+ irq_event->irq, irq_event->level);
+ return 0;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -3276,29 +3285,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
create_pit_unlock:
mutex_unlock(&kvm->slots_lock);
break;
- case KVM_IRQ_LINE_STATUS:
- case KVM_IRQ_LINE: {
- struct kvm_irq_level irq_event;
-
- r = -EFAULT;
- if (copy_from_user(&irq_event, argp, sizeof irq_event))
- goto out;
- r = -ENXIO;
- if (irqchip_in_kernel(kvm)) {
- __s32 status;
- status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
- irq_event.irq, irq_event.level);
- if (ioctl == KVM_IRQ_LINE_STATUS) {
- r = -EFAULT;
- irq_event.status = status;
- if (copy_to_user(argp, &irq_event,
- sizeof irq_event))
- goto out;
- }
- r = 0;
- }
- break;
- }
case KVM_GET_IRQCHIP: {
/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
struct kvm_irqchip *chip;
@@ -4496,7 +4482,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
/*
* if emulation was due to access to shadowed page table
- * and it failed try to unshadow page and re-entetr the
+ * and it failed try to unshadow page and re-enter the
* guest to let CPU execute the instruction.
*/
if (kvm_mmu_unprotect_page_virt(vcpu, gva))
@@ -5592,7 +5578,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
/*
* We are here if userspace calls get_regs() in the middle of
* instruction emulation. Registers state needs to be copied
- * back from emulation context to vcpu. Usrapace shouldn't do
+ * back from emulation context to vcpu. Userspace shouldn't do
* that usually, but some bad designed PV devices (vmware
* backdoor interface) need this to work
*/
@@ -6121,7 +6107,7 @@ int kvm_arch_hardware_enable(void *garbage)
* as we reset last_host_tsc on all VCPUs to stop this from being
* called multiple times (one for each physical CPU bringup).
*
- * Platforms with unnreliable TSCs don't have to deal with this, they
+ * Platforms with unreliable TSCs don't have to deal with this, they
* will be compensated by the logic in vcpu_load, which sets the TSC to
* catchup mode. This will catchup all VCPUs to real time, but cannot
* guarantee that they stay in perfect synchronization.
@@ -6318,6 +6304,10 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
int i;
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
+ if (!dont || free->arch.rmap_pde[i] != dont->arch.rmap_pde[i]) {
+ kvm_kvfree(free->arch.rmap_pde[i]);
+ free->arch.rmap_pde[i] = NULL;
+ }
if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) {
kvm_kvfree(free->arch.lpage_info[i]);
free->arch.lpage_info[i] = NULL;
@@ -6337,6 +6327,11 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
lpages = gfn_to_index(slot->base_gfn + npages - 1,
slot->base_gfn, level) + 1;
+ slot->arch.rmap_pde[i] =
+ kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap_pde[i]));
+ if (!slot->arch.rmap_pde[i])
+ goto out_free;
+
slot->arch.lpage_info[i] =
kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i]));
if (!slot->arch.lpage_info[i])
@@ -6365,7 +6360,9 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
out_free:
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
+ kvm_kvfree(slot->arch.rmap_pde[i]);
kvm_kvfree(slot->arch.lpage_info[i]);
+ slot->arch.rmap_pde[i] = NULL;
slot->arch.lpage_info[i] = NULL;
}
return -ENOMEM;
@@ -6385,7 +6382,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
map_flags = MAP_SHARED | MAP_ANONYMOUS;
/*To keep backward compatibility with older userspace,
- *x86 needs to hanlde !user_alloc case.
+ *x86 needs to handle !user_alloc case.
*/
if (!user_alloc) {
if (npages && !old.rmap) {