diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/kvm_main.c | 761 |
1 files changed, 393 insertions, 368 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6ba7468bdbe3..a87df97e0b14 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -433,7 +433,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) vcpu->preempted = false; vcpu->ready = false; preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); - vcpu->last_used_slot = 0; + vcpu->last_used_slot = NULL; } static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu) @@ -545,7 +545,7 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm, range->start, range->end - 1) { unsigned long hva_start, hva_end; - slot = container_of(node, struct kvm_memory_slot, hva_node); + slot = container_of(node, struct kvm_memory_slot, hva_node[slots->node_idx]); hva_start = max(range->start, slot->userspace_addr); hva_end = min(range->end, slot->userspace_addr + (slot->npages << PAGE_SHIFT)); @@ -876,20 +876,6 @@ static void kvm_destroy_pm_notifier(struct kvm *kvm) } #endif /* CONFIG_HAVE_KVM_PM_NOTIFIER */ -static struct kvm_memslots *kvm_alloc_memslots(void) -{ - struct kvm_memslots *slots; - - slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT); - if (!slots) - return NULL; - - slots->hva_tree = RB_ROOT_CACHED; - hash_init(slots->id_hash); - - return slots; -} - static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) { if (!memslot->dirty_bitmap) @@ -899,27 +885,33 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) memslot->dirty_bitmap = NULL; } +/* This does not remove the slot from struct kvm_memslots data structures */ static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { kvm_destroy_dirty_bitmap(slot); kvm_arch_free_memslot(kvm, slot); - slot->flags = 0; - slot->npages = 0; + kfree(slot); } static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots) { + struct hlist_node *idnode; struct kvm_memory_slot *memslot; + int bkt; - if (!slots) + /* + * The same memslot objects live in both active and inactive sets, + * arbitrarily free using index '1' so the second invocation of this + * function isn't operating over a structure with dangling pointers + * (even though this function isn't actually touching them). + */ + if (!slots->node_idx) return; - kvm_for_each_memslot(memslot, slots) + hash_for_each_safe(slots->id_hash, bkt, idnode, memslot, id_node[1]) kvm_free_memslot(kvm, memslot); - - kvfree(slots); } static umode_t kvm_stats_debugfs_mode(const struct _kvm_stats_desc *pdesc) @@ -1058,8 +1050,9 @@ int __weak kvm_arch_create_vm_debugfs(struct kvm *kvm) static struct kvm *kvm_create_vm(unsigned long type) { struct kvm *kvm = kvm_arch_alloc_vm(); + struct kvm_memslots *slots; int r = -ENOMEM; - int i; + int i, j; if (!kvm) return ERR_PTR(-ENOMEM); @@ -1087,13 +1080,20 @@ static struct kvm *kvm_create_vm(unsigned long type) refcount_set(&kvm->users_count, 1); for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { - struct kvm_memslots *slots = kvm_alloc_memslots(); + for (j = 0; j < 2; j++) { + slots = &kvm->__memslots[i][j]; - if (!slots) - goto out_err_no_arch_destroy_vm; - /* Generations must be different for each address space. */ - slots->generation = i; - rcu_assign_pointer(kvm->memslots[i], slots); + atomic_long_set(&slots->last_used_slot, (unsigned long)NULL); + slots->hva_tree = RB_ROOT_CACHED; + slots->gfn_tree = RB_ROOT; + hash_init(slots->id_hash); + slots->node_idx = j; + + /* Generations must be different for each address space. */ + slots->generation = i; + } + + rcu_assign_pointer(kvm->memslots[i], &kvm->__memslots[i][0]); } for (i = 0; i < KVM_NR_BUSES; i++) { @@ -1147,8 +1147,6 @@ out_err_no_arch_destroy_vm: WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count)); for (i = 0; i < KVM_NR_BUSES; i++) kfree(kvm_get_bus(kvm, i)); - for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) - kvm_free_memslots(kvm, __kvm_memslots(kvm, i)); cleanup_srcu_struct(&kvm->irq_srcu); out_err_no_irq_srcu: cleanup_srcu_struct(&kvm->srcu); @@ -1213,8 +1211,10 @@ static void kvm_destroy_vm(struct kvm *kvm) #endif kvm_arch_destroy_vm(kvm); kvm_destroy_devices(kvm); - for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) - kvm_free_memslots(kvm, __kvm_memslots(kvm, i)); + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { + kvm_free_memslots(kvm, &kvm->__memslots[i][0]); + kvm_free_memslots(kvm, &kvm->__memslots[i][1]); + } cleanup_srcu_struct(&kvm->irq_srcu); cleanup_srcu_struct(&kvm->srcu); kvm_arch_free_vm(kvm); @@ -1284,227 +1284,136 @@ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot) return 0; } -static void kvm_replace_memslot(struct kvm_memslots *slots, - struct kvm_memory_slot *old, - struct kvm_memory_slot *new) -{ - /* - * Remove the old memslot from the hash list and interval tree, copying - * the node data would corrupt the structures. - */ - if (old) { - hash_del(&old->id_node); - interval_tree_remove(&old->hva_node, &slots->hva_tree); - - if (!new) - return; - - /* Copy the source *data*, not the pointer, to the destination. */ - *new = *old; - } else { - /* If @old is NULL, initialize @new's hva range. */ - new->hva_node.start = new->userspace_addr; - new->hva_node.last = new->userspace_addr + - (new->npages << PAGE_SHIFT) - 1; - } - - /* (Re)Add the new memslot. */ - hash_add(slots->id_hash, &new->id_node, new->id); - interval_tree_insert(&new->hva_node, &slots->hva_tree); -} - -static void kvm_shift_memslot(struct kvm_memslots *slots, int dst, int src) +static struct kvm_memslots *kvm_get_inactive_memslots(struct kvm *kvm, int as_id) { - struct kvm_memory_slot *mslots = slots->memslots; + struct kvm_memslots *active = __kvm_memslots(kvm, as_id); + int node_idx_inactive = active->node_idx ^ 1; - kvm_replace_memslot(slots, &mslots[src], &mslots[dst]); + return &kvm->__memslots[as_id][node_idx_inactive]; } /* - * Delete a memslot by decrementing the number of used slots and shifting all - * other entries in the array forward one spot. - * @memslot is a detached dummy struct with just .id and .as_id filled. + * Helper to get the address space ID when one of memslot pointers may be NULL. + * This also serves as a sanity that at least one of the pointers is non-NULL, + * and that their address space IDs don't diverge. */ -static inline void kvm_memslot_delete(struct kvm_memslots *slots, - struct kvm_memory_slot *memslot) +static int kvm_memslots_get_as_id(struct kvm_memory_slot *a, + struct kvm_memory_slot *b) { - struct kvm_memory_slot *mslots = slots->memslots; - struct kvm_memory_slot *oldslot = id_to_memslot(slots, memslot->id); - int i; - - if (WARN_ON(!oldslot)) - return; - - slots->used_slots--; + if (WARN_ON_ONCE(!a && !b)) + return 0; - if (atomic_read(&slots->last_used_slot) >= slots->used_slots) - atomic_set(&slots->last_used_slot, 0); + if (!a) + return b->as_id; + if (!b) + return a->as_id; - /* - * Remove the to-be-deleted memslot from the list/tree _before_ shifting - * the trailing memslots forward, its data will be overwritten. - * Defer the (somewhat pointless) copying of the memslot until after - * the last slot has been shifted to avoid overwriting said last slot. - */ - kvm_replace_memslot(slots, oldslot, NULL); - - for (i = oldslot - mslots; i < slots->used_slots; i++) - kvm_shift_memslot(slots, i, i + 1); - mslots[i] = *memslot; + WARN_ON_ONCE(a->as_id != b->as_id); + return a->as_id; } -/* - * "Insert" a new memslot by incrementing the number of used slots. Returns - * the new slot's initial index into the memslots array. - */ -static inline int kvm_memslot_insert_back(struct kvm_memslots *slots) +static void kvm_insert_gfn_node(struct kvm_memslots *slots, + struct kvm_memory_slot *slot) { - return slots->used_slots++; -} - -/* - * Move a changed memslot backwards in the array by shifting existing slots - * with a higher GFN toward the front of the array. Note, the changed memslot - * itself is not preserved in the array, i.e. not swapped at this time, only - * its new index into the array is tracked. Returns the changed memslot's - * current index into the memslots array. - * The memslot at the returned index will not be in @slots->hva_tree or - * @slots->id_hash by then. - * @memslot is a detached struct with desired final data of the changed slot. - */ -static inline int kvm_memslot_move_backward(struct kvm_memslots *slots, - struct kvm_memory_slot *memslot) -{ - struct kvm_memory_slot *mslots = slots->memslots; - struct kvm_memory_slot *oldslot = id_to_memslot(slots, memslot->id); - int i; - - if (!oldslot || !slots->used_slots) - return -1; - - /* - * Delete the slot from the hash table and interval tree before sorting - * the remaining slots, the slot's data may be overwritten when copying - * slots as part of the sorting proccess. update_memslots() will - * unconditionally rewrite and re-add the entire slot. - */ - kvm_replace_memslot(slots, oldslot, NULL); - - /* - * Move the target memslot backward in the array by shifting existing - * memslots with a higher GFN (than the target memslot) towards the - * front of the array. - */ - for (i = oldslot - mslots; i < slots->used_slots - 1; i++) { - if (memslot->base_gfn > mslots[i + 1].base_gfn) - break; + struct rb_root *gfn_tree = &slots->gfn_tree; + struct rb_node **node, *parent; + int idx = slots->node_idx; - WARN_ON_ONCE(memslot->base_gfn == mslots[i + 1].base_gfn); + parent = NULL; + for (node = &gfn_tree->rb_node; *node; ) { + struct kvm_memory_slot *tmp; - kvm_shift_memslot(slots, i, i + 1); + tmp = container_of(*node, struct kvm_memory_slot, gfn_node[idx]); + parent = *node; + if (slot->base_gfn < tmp->base_gfn) + node = &(*node)->rb_left; + else if (slot->base_gfn > tmp->base_gfn) + node = &(*node)->rb_right; + else + BUG(); } - return i; + + rb_link_node(&slot->gfn_node[idx], parent, node); + rb_insert_color(&slot->gfn_node[idx], gfn_tree); } -/* - * Move a changed memslot forwards in the array by shifting existing slots with - * a lower GFN toward the back of the array. Note, the changed memslot itself - * is not preserved in the array, i.e. not swapped at this time, only its new - * index into the array is tracked. Returns the changed memslot's final index - * into the memslots array. - * The memslot at the returned index will not be in @slots->hva_tree or - * @slots->id_hash by then. - * @memslot is a detached struct with desired final data of the new or - * changed slot. - * Assumes that the memslot at @start index is not in @slots->hva_tree or - * @slots->id_hash. - */ -static inline int kvm_memslot_move_forward(struct kvm_memslots *slots, - struct kvm_memory_slot *memslot, - int start) +static void kvm_erase_gfn_node(struct kvm_memslots *slots, + struct kvm_memory_slot *slot) { - struct kvm_memory_slot *mslots = slots->memslots; - int i; + rb_erase(&slot->gfn_node[slots->node_idx], &slots->gfn_tree); +} - for (i = start; i > 0; i--) { - if (memslot->base_gfn < mslots[i - 1].base_gfn) - break; +static void kvm_replace_gfn_node(struct kvm_memslots *slots, + struct kvm_memory_slot *old, + struct kvm_memory_slot *new) +{ + int idx = slots->node_idx; - WARN_ON_ONCE(memslot->base_gfn == mslots[i - 1].base_gfn); + WARN_ON_ONCE(old->base_gfn != new->base_gfn); - kvm_shift_memslot(slots, i, i - 1); - } - return i; + rb_replace_node(&old->gfn_node[idx], &new->gfn_node[idx], + &slots->gfn_tree); } /* - * Re-sort memslots based on their GFN to account for an added, deleted, or - * moved memslot. Sorting memslots by GFN allows using a binary search during - * memslot lookup. - * - * IMPORTANT: Slots are sorted from highest GFN to lowest GFN! I.e. the entry - * at memslots[0] has the highest GFN. - * - * The sorting algorithm takes advantage of having initially sorted memslots - * and knowing the position of the changed memslot. Sorting is also optimized - * by not swapping the updated memslot and instead only shifting other memslots - * and tracking the new index for the update memslot. Only once its final - * index is known is the updated memslot copied into its position in the array. - * - * - When deleting a memslot, the deleted memslot simply needs to be moved to - * the end of the array. - * - * - When creating a memslot, the algorithm "inserts" the new memslot at the - * end of the array and then it forward to its correct location. - * - * - When moving a memslot, the algorithm first moves the updated memslot - * backward to handle the scenario where the memslot's GFN was changed to a - * lower value. update_memslots() then falls through and runs the same flow - * as creating a memslot to move the memslot forward to handle the scenario - * where its GFN was changed to a higher value. + * Replace @old with @new in the inactive memslots. * - * Note, slots are sorted from highest->lowest instead of lowest->highest for - * historical reasons. Originally, invalid memslots where denoted by having - * GFN=0, thus sorting from highest->lowest naturally sorted invalid memslots - * to the end of the array. The current algorithm uses dedicated logic to - * delete a memslot and thus does not rely on invalid memslots having GFN=0. + * With NULL @old this simply adds @new. + * With NULL @new this simply removes @old. * - * The other historical motiviation for highest->lowest was to improve the - * performance of memslot lookup. KVM originally used a linear search starting - * at memslots[0]. On x86, the largest memslot usually has one of the highest, - * if not *the* highest, GFN, as the bulk of the guest's RAM is located in a - * single memslot above the 4gb boundary. As the largest memslot is also the - * most likely to be referenced, sorting it to the front of the array was - * advantageous. The current binary search starts from the middle of the array - * and uses an LRU pointer to improve performance for all memslots and GFNs. - * - * @memslot is a detached struct, not a part of the current or new memslot - * array. + * If @new is non-NULL its hva_node[slots_idx] range has to be set + * appropriately. */ -static void update_memslots(struct kvm_memslots *slots, - struct kvm_memory_slot *memslot, - enum kvm_mr_change change) +static void kvm_replace_memslot(struct kvm *kvm, + struct kvm_memory_slot *old, + struct kvm_memory_slot *new) { - int i; + int as_id = kvm_memslots_get_as_id(old, new); + struct kvm_memslots *slots = kvm_get_inactive_memslots(kvm, as_id); + int idx = slots->node_idx; - if (change == KVM_MR_DELETE) { - kvm_memslot_delete(slots, memslot); - } else { - if (change == KVM_MR_CREATE) - i = kvm_memslot_insert_back(slots); - else - i = kvm_memslot_move_backward(slots, memslot); - i = kvm_memslot_move_forward(slots, memslot, i); + if (old) { + hash_del(&old->id_node[idx]); + interval_tree_remove(&old->hva_node[idx], &slots->hva_tree); - if (WARN_ON_ONCE(i < 0)) + if ((long)old == atomic_long_read(&slots->last_used_slot)) + atomic_long_set(&slots->last_used_slot, (long)new); + + if (!new) { + kvm_erase_gfn_node(slots, old); return; + } + } - /* - * Copy the memslot to its new position in memslots and update - * its index accordingly. - */ - slots->memslots[i] = *memslot; - kvm_replace_memslot(slots, NULL, &slots->memslots[i]); + /* + * Initialize @new's hva range. Do this even when replacing an @old + * slot, kvm_copy_memslot() deliberately does not touch node data. + */ + new->hva_node[idx].start = new->userspace_addr; + new->hva_node[idx].last = new->userspace_addr + + (new->npages << PAGE_SHIFT) - 1; + + /* + * (Re)Add the new memslot. There is no O(1) interval_tree_replace(), + * hva_node needs to be swapped with remove+insert even though hva can't + * change when replacing an existing slot. + */ + hash_add(slots->id_hash, &new->id_node[idx], new->id); + interval_tree_insert(&new->hva_node[idx], &slots->hva_tree); + + /* + * If the memslot gfn is unchanged, rb_replace_node() can be used to + * switch the node in the gfn tree instead of removing the old and + * inserting the new as two separate operations. Replacement is a + * single O(1) operation versus two O(log(n)) operations for + * remove+insert. + */ + if (old && old->base_gfn == new->base_gfn) { + kvm_replace_gfn_node(slots, old, new); + } else { + if (old) + kvm_erase_gfn_node(slots, old); + kvm_insert_gfn_node(slots, new); } } @@ -1522,11 +1431,12 @@ static int check_memory_region_flags(const struct kvm_userspace_memory_region *m return 0; } -static struct kvm_memslots *install_new_memslots(struct kvm *kvm, - int as_id, struct kvm_memslots *slots) +static void kvm_swap_active_memslots(struct kvm *kvm, int as_id) { - struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id); - u64 gen = old_memslots->generation; + struct kvm_memslots *slots = kvm_get_inactive_memslots(kvm, as_id); + + /* Grab the generation from the activate memslots. */ + u64 gen = __kvm_memslots(kvm, as_id)->generation; WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS); slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS; @@ -1577,58 +1487,6 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, kvm_arch_memslots_updated(kvm, gen); slots->generation = gen; - - return old_memslots; -} - -static size_t kvm_memslots_size(int slots) -{ - return sizeof(struct kvm_memslots) + - (sizeof(struct kvm_memory_slot) * slots); -} - -/* - * Note, at a minimum, the current number of used slots must be allocated, even - * when deleting a memslot, as we need a complete duplicate of the memslots for - * use when invalidating a memslot prior to deleting/moving the memslot. - */ -static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old, - enum kvm_mr_change change) -{ - struct kvm_memslots *slots; - size_t new_size; - struct kvm_memory_slot *memslot; - - if (change == KVM_MR_CREATE) - new_size = kvm_memslots_size(old->used_slots + 1); - else - new_size = kvm_memslots_size(old->used_slots); - - slots = kvzalloc(new_size, GFP_KERNEL_ACCOUNT); - if (unlikely(!slots)) - return NULL; - - memcpy(slots, old, kvm_memslots_size(old->used_slots)); - - slots->hva_tree = RB_ROOT_CACHED; - hash_init(slots->id_hash); - kvm_for_each_memslot(memslot, slots) { - interval_tree_insert(&memslot->hva_node, &slots->hva_tree); - hash_add(slots->id_hash, &memslot->id_node, memslot->id); - } - - return slots; -} - -static void kvm_copy_memslots_arch(struct kvm_memslots *to, - struct kvm_memslots *from) -{ - int i; - - WARN_ON_ONCE(to->used_slots != from->used_slots); - - for (i = 0; i < from->used_slots; i++) - to->memslots[i].arch = from->memslots[i].arch; } static int kvm_prepare_memory_region(struct kvm *kvm, @@ -1683,31 +1541,214 @@ static void kvm_commit_memory_region(struct kvm *kvm, kvm_arch_commit_memory_region(kvm, old, new, change); + switch (change) { + case KVM_MR_CREATE: + /* Nothing more to do. */ + break; + case KVM_MR_DELETE: + /* Free the old memslot and all its metadata. */ + kvm_free_memslot(kvm, old); + break; + case KVM_MR_MOVE: + case KVM_MR_FLAGS_ONLY: + /* + * Free the dirty bitmap as needed; the below check encompasses + * both the flags and whether a ring buffer is being used) + */ + if (old->dirty_bitmap && !new->dirty_bitmap) + kvm_destroy_dirty_bitmap(old); + + /* + * The final quirk. Free the detached, old slot, but only its + * memory, not any metadata. Metadata, including arch specific + * data, may be reused by @new. + */ + kfree(old); + break; + default: + BUG(); + } +} + +/* + * Activate @new, which must be installed in the inactive slots by the caller, + * by swapping the active slots and then propagating @new to @old once @old is + * unreachable and can be safely modified. + * + * With NULL @old this simply adds @new to @active (while swapping the sets). + * With NULL @new this simply removes @old from @active and frees it + * (while also swapping the sets). + */ +static void kvm_activate_memslot(struct kvm *kvm, + struct kvm_memory_slot *old, + struct kvm_memory_slot *new) +{ + int as_id = kvm_memslots_get_as_id(old, new); + + kvm_swap_active_memslots(kvm, as_id); + + /* Propagate the new memslot to the now inactive memslots. */ + kvm_replace_memslot(kvm, old, new); +} + +static void kvm_copy_memslot(struct kvm_memory_slot *dest, + const struct kvm_memory_slot *src) +{ + dest->base_gfn = src->base_gfn; + dest->npages = src->npages; + dest->dirty_bitmap = src->dirty_bitmap; + dest->arch = src->arch; + dest->userspace_addr = src->userspace_addr; + dest->flags = src->flags; + dest->id = src->id; + dest->as_id = src->as_id; +} + +static void kvm_invalidate_memslot(struct kvm *kvm, + struct kvm_memory_slot *old, + struct kvm_memory_slot *working_slot) +{ /* - * Free the old memslot's metadata. On DELETE, free the whole thing, - * otherwise free the dirty bitmap as needed (the below effectively - * checks both the flags and whether a ring buffer is being used). + * Mark the current slot INVALID. As with all memslot modifications, + * this must be done on an unreachable slot to avoid modifying the + * current slot in the active tree. */ - if (change == KVM_MR_DELETE) - kvm_free_memslot(kvm, old); - else if (old->dirty_bitmap && !new->dirty_bitmap) - kvm_destroy_dirty_bitmap(old); + kvm_copy_memslot(working_slot, old); + working_slot->flags |= KVM_MEMSLOT_INVALID; + kvm_replace_memslot(kvm, old, working_slot); + + /* + * Activate the slot that is now marked INVALID, but don't propagate + * the slot to the now inactive slots. The slot is either going to be + * deleted or recreated as a new slot. + */ + kvm_swap_active_memslots(kvm, old->as_id); + + /* + * From this point no new shadow pages pointing to a deleted, or moved, + * memslot will be created. Validation of sp->gfn happens in: + * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) + * - kvm_is_visible_gfn (mmu_check_root) + */ + kvm_arch_flush_shadow_memslot(kvm, working_slot); + + /* Was released by kvm_swap_active_memslots, reacquire. */ + mutex_lock(&kvm->slots_arch_lock); + + /* + * Copy the arch-specific field of the newly-installed slot back to the + * old slot as the arch data could have changed between releasing + * slots_arch_lock in install_new_memslots() and re-acquiring the lock + * above. Writers are required to retrieve memslots *after* acquiring + * slots_arch_lock, thus the active slot's data is guaranteed to be fresh. + */ + old->arch = working_slot->arch; +} + +static void kvm_create_memslot(struct kvm *kvm, + const struct kvm_memory_slot *new, + struct kvm_memory_slot *working) +{ + /* + * Add the new memslot to the inactive set as a copy of the + * new memslot data provided by userspace. + */ + kvm_copy_memslot(working, new); + kvm_replace_memslot(kvm, NULL, working); + kvm_activate_memslot(kvm, NULL, working); +} + +static void kvm_delete_memslot(struct kvm *kvm, + struct kvm_memory_slot *old, + struct kvm_memory_slot *invalid_slot) +{ + /* + * Remove the old memslot (in the inactive memslots) by passing NULL as + * the "new" slot. + */ + kvm_replace_memslot(kvm, old, NULL); + + /* And do the same for the invalid version in the active slot. */ + kvm_activate_memslot(kvm, invalid_slot, NULL); + + /* Free the invalid slot, the caller will clean up the old slot. */ + kfree(invalid_slot); +} + +static struct kvm_memory_slot *kvm_move_memslot(struct kvm *kvm, + struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, + struct kvm_memory_slot *invalid_slot) +{ + struct kvm_memslots *slots = kvm_get_inactive_memslots(kvm, old->as_id); + + /* + * The memslot's gfn is changing, remove it from the inactive tree, it + * will be re-added with its updated gfn. Because its range is + * changing, an in-place replace is not possible. + */ + kvm_erase_gfn_node(slots, old); + + /* + * The old slot is now fully disconnected, reuse its memory for the + * persistent copy of "new". + */ + kvm_copy_memslot(old, new); + + /* Re-add to the gfn tree with the updated gfn */ + kvm_insert_gfn_node(slots, old); + + /* Replace the current INVALID slot with the updated memslot. */ + kvm_activate_memslot(kvm, invalid_slot, old); + + /* + * Clear the INVALID flag so that the invalid_slot is now a perfect + * copy of the old slot. Return it for cleanup in the caller. + */ + WARN_ON_ONCE(!(invalid_slot->flags & KVM_MEMSLOT_INVALID)); + invalid_slot->flags &= ~KVM_MEMSLOT_INVALID; + return invalid_slot; +} + +static void kvm_update_flags_memslot(struct kvm *kvm, + struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, + struct kvm_memory_slot *working_slot) +{ + /* + * Similar to the MOVE case, but the slot doesn't need to be zapped as + * an intermediate step. Instead, the old memslot is simply replaced + * with a new, updated copy in both memslot sets. + */ + kvm_copy_memslot(working_slot, new); + kvm_replace_memslot(kvm, old, working_slot); + kvm_activate_memslot(kvm, old, working_slot); } static int kvm_set_memslot(struct kvm *kvm, + struct kvm_memory_slot *old, struct kvm_memory_slot *new, enum kvm_mr_change change) { - struct kvm_memory_slot *slot, old; - struct kvm_memslots *slots; + struct kvm_memory_slot *working; int r; /* - * Released in install_new_memslots. + * Modifications are done on an unreachable slot. Any changes are then + * (eventually) propagated to both the active and inactive slots. This + * allocation would ideally be on-demand (in helpers), but is done here + * to avoid having to handle failure after kvm_prepare_memory_region(). + */ + working = kzalloc(sizeof(*working), GFP_KERNEL_ACCOUNT); + if (!working) + return -ENOMEM; + + /* + * Released in kvm_swap_active_memslots. * * Must be held from before the current memslots are copied until * after the new memslots are installed with rcu_assign_pointer, - * then released before the synchronize srcu in install_new_memslots. + * then released before the synchronize srcu in kvm_swap_active_memslots. * * When modifying memslots outside of the slots_lock, must be held * before reading the pointer to the current memslots until after all @@ -1718,87 +1759,60 @@ static int kvm_set_memslot(struct kvm *kvm, */ mutex_lock(&kvm->slots_arch_lock); - slots = kvm_dup_memslots(__kvm_memslots(kvm, new->as_id), change); - if (!slots) { - mutex_unlock(&kvm->slots_arch_lock); - return -ENOMEM; - } - - if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) { - /* - * Note, the INVALID flag needs to be in the appropriate entry - * in the freshly allocated memslots, not in @old or @new. - */ - slot = id_to_memslot(slots, new->id); - slot->flags |= KVM_MEMSLOT_INVALID; - - /* - * We can re-use the old memslots, the only difference from the - * newly installed memslots is the invalid flag, which will get - * dropped by update_memslots anyway. We'll also revert to the - * old memslots if preparing the new memory region fails. - */ - slots = install_new_memslots(kvm, new->as_id, slots); - - /* From this point no new shadow pages pointing to a deleted, - * or moved, memslot will be created. - * - * validation of sp->gfn happens in: - * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) - * - kvm_is_visible_gfn (mmu_check_root) - */ - kvm_arch_flush_shadow_memslot(kvm, slot); - - /* Released in install_new_memslots. */ - mutex_lock(&kvm->slots_arch_lock); + /* + * Invalidate the old slot if it's being deleted or moved. This is + * done prior to actually deleting/moving the memslot to allow vCPUs to + * continue running by ensuring there are no mappings or shadow pages + * for the memslot when it is deleted/moved. Without pre-invalidation + * (and without a lock), a window would exist between effecting the + * delete/move and committing the changes in arch code where KVM or a + * guest could access a non-existent memslot. + */ + if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) + kvm_invalidate_memslot(kvm, old, working); + r = kvm_prepare_memory_region(kvm, old, new, change); + if (r) { /* - * The arch-specific fields of the now-active memslots could - * have been modified between releasing slots_arch_lock in - * install_new_memslots and re-acquiring slots_arch_lock above. - * Copy them to the inactive memslots. Arch code is required - * to retrieve memslots *after* acquiring slots_arch_lock, thus - * the active memslots are guaranteed to be fresh. + * For DELETE/MOVE, revert the above INVALID change. No + * modifications required since the original slot was preserved + * in the inactive slots. Changing the active memslots also + * release slots_arch_lock. */ - kvm_copy_memslots_arch(slots, __kvm_memslots(kvm, new->as_id)); + if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) + kvm_activate_memslot(kvm, working, old); + else + mutex_unlock(&kvm->slots_arch_lock); + kfree(working); + return r; } /* - * Make a full copy of the old memslot, the pointer will become stale - * when the memslots are re-sorted by update_memslots(), and the old - * memslot needs to be referenced after calling update_memslots(), e.g. - * to free its resources and for arch specific behavior. This needs to - * happen *after* (re)acquiring slots_arch_lock. + * For DELETE and MOVE, the working slot is now active as the INVALID + * version of the old slot. MOVE is particularly special as it reuses + * the old slot and returns a copy of the old slot (in working_slot). + * For CREATE, there is no old slot. For DELETE and FLAGS_ONLY, the + * old slot is detached but otherwise preserved. */ - slot = id_to_memslot(slots, new->id); - if (slot) { - old = *slot; - } else { - WARN_ON_ONCE(change != KVM_MR_CREATE); - memset(&old, 0, sizeof(old)); - old.id = new->id; - old.as_id = new->as_id; - } - - r = kvm_prepare_memory_region(kvm, &old, new, change); - if (r) - goto out_slots; - - update_memslots(slots, new, change); - slots = install_new_memslots(kvm, new->as_id, slots); + if (change == KVM_MR_CREATE) + kvm_create_memslot(kvm, new, working); + else if (change == KVM_MR_DELETE) + kvm_delete_memslot(kvm, old, working); + else if (change == KVM_MR_MOVE) + old = kvm_move_memslot(kvm, old, new, working); + else if (change == KVM_MR_FLAGS_ONLY) + kvm_update_flags_memslot(kvm, old, new, working); + else + BUG(); - kvm_commit_memory_region(kvm, &old, new, change); + /* + * No need to refresh new->arch, changes after dropping slots_arch_lock + * will directly hit the final, active memsot. Architectures are + * responsible for knowing that new->arch may be stale. + */ + kvm_commit_memory_region(kvm, old, new, change); - kvfree(slots); return 0; - -out_slots: - if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) - slots = install_new_memslots(kvm, new->as_id, slots); - else - mutex_unlock(&kvm->slots_arch_lock); - kvfree(slots); - return r; } /* @@ -1859,7 +1873,7 @@ int __kvm_set_memory_region(struct kvm *kvm, new.id = id; new.as_id = as_id; - return kvm_set_memslot(kvm, &new, KVM_MR_DELETE); + return kvm_set_memslot(kvm, old, &new, KVM_MR_DELETE); } new.as_id = as_id; @@ -1896,8 +1910,10 @@ int __kvm_set_memory_region(struct kvm *kvm, } if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { + int bkt; + /* Check for overlaps */ - kvm_for_each_memslot(tmp, __kvm_memslots(kvm, as_id)) { + kvm_for_each_memslot(tmp, bkt, __kvm_memslots(kvm, as_id)) { if (tmp->id == id) continue; if (!((new.base_gfn + new.npages <= tmp->base_gfn) || @@ -1906,7 +1922,7 @@ int __kvm_set_memory_region(struct kvm *kvm, } } - return kvm_set_memslot(kvm, &new, change); + return kvm_set_memslot(kvm, old, &new, change); } EXPORT_SYMBOL_GPL(__kvm_set_memory_region); @@ -2211,21 +2227,30 @@ EXPORT_SYMBOL_GPL(gfn_to_memslot); struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn) { struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); + u64 gen = slots->generation; struct kvm_memory_slot *slot; - int slot_index; - slot = try_get_memslot(slots, vcpu->last_used_slot, gfn); + /* + * This also protects against using a memslot from a different address space, + * since different address spaces have different generation numbers. + */ + if (unlikely(gen != vcpu->last_used_slot_gen)) { + vcpu->last_used_slot = NULL; + vcpu->last_used_slot_gen = gen; + } + + slot = try_get_memslot(vcpu->last_used_slot, gfn); if (slot) return slot; /* * Fall back to searching all memslots. We purposely use * search_memslots() instead of __gfn_to_memslot() to avoid - * thrashing the VM-wide last_used_index in kvm_memslots. + * thrashing the VM-wide last_used_slot in kvm_memslots. */ - slot = search_memslots(slots, gfn, &slot_index, false); + slot = search_memslots(slots, gfn, false); if (slot) { - vcpu->last_used_slot = slot_index; + vcpu->last_used_slot = slot; return slot; } |