diff options
author | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2016-03-08 12:12:18 +0100 |
---|---|---|
committer | Christian Borntraeger <borntraeger@de.ibm.com> | 2016-06-20 09:54:04 +0200 |
commit | 4be130a08420d6918d80c1067f8078f425eb98df (patch) | |
tree | c3e323bf6597eea8e588586550e378acf7652ce2 /arch/s390/include/asm | |
parent | 6ea427bbbd4078297bb1dbd6c5cb83f3f48aac46 (diff) | |
download | lwn-4be130a08420d6918d80c1067f8078f425eb98df.tar.gz lwn-4be130a08420d6918d80c1067f8078f425eb98df.zip |
s390/mm: add shadow gmap support
For a nested KVM guest the outer KVM host needs to create shadow
page tables for the nested guest. This patch adds the basic support
to the guest address space (gmap) code.
For each guest address space the inner KVM host creates, the first
outer KVM host needs to create shadow page tables. The address space
is identified by the ASCE loaded into the control register 1 at the
time the inner SIE instruction for the second nested KVM guest is
executed. The outer KVM host creates the shadow tables starting with
the table identified by the ASCE on a on-demand basis. The outer KVM
host will get repeated faults for all the shadow tables needed to
run the second KVM guest.
While a shadow page table for the second KVM guest is active the access
to the origin region, segment and page tables needs to be restricted
for the first KVM guest. For region and segment and page tables the first
KVM guest may read the memory, but write attempt has to lead to an
unshadow. This is done using the page invalid and read-only bits in the
page table of the first KVM guest. If the first guest re-accesses one of
the origin pages of a shadow, it gets a fault and the affected parts of
the shadow page table hierarchy needs to be removed again.
PGSTE tables don't have to be shadowed, as all interpretation assist can't
deal with the invalid bits in the shadow pte being set differently than
the original ones provided by the first KVM guest.
Many bug fixes and improvements by David Hildenbrand.
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Diffstat (limited to 'arch/s390/include/asm')
-rw-r--r-- | arch/s390/include/asm/gmap.h | 52 | ||||
-rw-r--r-- | arch/s390/include/asm/pgalloc.h | 2 | ||||
-rw-r--r-- | arch/s390/include/asm/pgtable.h | 10 | ||||
-rw-r--r-- | arch/s390/include/asm/processor.h | 1 |
4 files changed, 62 insertions, 3 deletions
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index e69853ce55da..58e65ee5b2d2 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -10,6 +10,7 @@ /** * struct gmap_struct - guest address space + * @list: list head for the mm->context gmap list * @crst_list: list of all crst tables used in the guest address space * @mm: pointer to the parent mm_struct * @guest_to_host: radix tree with guest to host address translation @@ -19,6 +20,13 @@ * @table: pointer to the page directory * @asce: address space control element for gmap page table * @pfault_enabled: defines if pfaults are applicable for the guest + * @host_to_rmap: radix tree with gmap_rmap lists + * @children: list of shadow gmap structures + * @pt_list: list of all page tables used in the shadow guest address space + * @shadow_lock: spinlock to protect the shadow gmap list + * @parent: pointer to the parent gmap for shadow guest address spaces + * @orig_asce: ASCE for which the shadow page table has been created + * @removed: flag to indicate if a shadow guest address space has been removed */ struct gmap { struct list_head list; @@ -33,9 +41,33 @@ struct gmap { unsigned long asce_end; void *private; bool pfault_enabled; + /* Additional data for shadow guest address spaces */ + struct radix_tree_root host_to_rmap; + struct list_head children; + struct list_head pt_list; + spinlock_t shadow_lock; + struct gmap *parent; + unsigned long orig_asce; + bool removed; }; /** + * struct gmap_rmap - reverse mapping for shadow page table entries + * @next: pointer to next rmap in the list + * @raddr: virtual rmap address in the shadow guest address space + */ +struct gmap_rmap { + struct gmap_rmap *next; + unsigned long raddr; +}; + +#define gmap_for_each_rmap(pos, head) \ + for (pos = (head); pos; pos = pos->next) + +#define gmap_for_each_rmap_safe(pos, n, head) \ + for (pos = (head); n = pos ? pos->next : NULL, pos; pos = n) + +/** * struct gmap_notifier - notify function block for page invalidation * @notifier_call: address of callback function */ @@ -46,6 +78,11 @@ struct gmap_notifier { unsigned long end); }; +static inline int gmap_is_shadow(struct gmap *gmap) +{ + return !!gmap->parent; +} + struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit); void gmap_remove(struct gmap *gmap); struct gmap *gmap_get(struct gmap *gmap); @@ -64,9 +101,22 @@ void gmap_discard(struct gmap *, unsigned long from, unsigned long to); void __gmap_zap(struct gmap *, unsigned long gaddr); void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr); +int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val); + +struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce); +int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t); +int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t); +int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt); +int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt); +int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, + unsigned long *pgt, int *dat_protection); +int gmap_shadow_page(struct gmap *sg, unsigned long saddr, + unsigned long paddr, int write); + void gmap_register_pte_notifier(struct gmap_notifier *); void gmap_unregister_pte_notifier(struct gmap_notifier *); -void gmap_pte_notify(struct mm_struct *, unsigned long addr, pte_t *); +void gmap_pte_notify(struct mm_struct *, unsigned long addr, pte_t *, + unsigned long bits); int gmap_mprotect_notify(struct gmap *, unsigned long start, unsigned long len, int prot); diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index da34cb6b1f3b..f4eb9843eed4 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -19,8 +19,10 @@ unsigned long *crst_table_alloc(struct mm_struct *); void crst_table_free(struct mm_struct *, unsigned long *); unsigned long *page_table_alloc(struct mm_struct *); +struct page *page_table_alloc_pgste(struct mm_struct *mm); void page_table_free(struct mm_struct *, unsigned long *); void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long); +void page_table_free_pgste(struct page *page); extern int page_table_allocate_pgste; static inline void clear_table(unsigned long *s, unsigned long val, size_t n) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 35dde6afffcf..a6e7fc8f5b49 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -256,6 +256,7 @@ static inline int is_module_addr(void *addr) /* Bits in the region table entry */ #define _REGION_ENTRY_ORIGIN ~0xfffUL/* region/segment table origin */ #define _REGION_ENTRY_PROTECT 0x200 /* region protection bit */ +#define _REGION_ENTRY_OFFSET 0xc0 /* region table offset */ #define _REGION_ENTRY_INVALID 0x20 /* invalid region table entry */ #define _REGION_ENTRY_TYPE_MASK 0x0c /* region/segment table type mask */ #define _REGION_ENTRY_TYPE_R1 0x0c /* region first table type */ @@ -327,6 +328,7 @@ static inline int is_module_addr(void *addr) #define PGSTE_GC_BIT 0x0002000000000000UL #define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */ #define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */ +#define PGSTE_VSIE_BIT 0x0000200000000000UL /* ref'd in a shadow table */ /* Guest Page State used for virtualization */ #define _PGSTE_GPS_ZERO 0x0000000080000000UL @@ -885,12 +887,16 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma, void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry); void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep); -void ptep_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +void ptep_notify(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long bits); int ptep_force_prot(struct mm_struct *mm, unsigned long gaddr, - pte_t *ptep, int prot); + pte_t *ptep, int prot, unsigned long bit); void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, pte_t *ptep , int reset); void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, + pte_t *sptep, pte_t *tptep, int write); +void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep); bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 9d4d311d7e52..94c80b6d031d 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -109,6 +109,7 @@ struct thread_struct { unsigned long ksp; /* kernel stack pointer */ mm_segment_t mm_segment; unsigned long gmap_addr; /* address of last gmap fault. */ + unsigned int gmap_write_flag; /* gmap fault write indication */ unsigned int gmap_pfault; /* signal of a pending guest pfault */ struct per_regs per_user; /* User specified PER registers */ struct per_event per_event; /* Cause of the last PER trap */ |