From 14988a4d350ce3b41ecad4f63c4f44c56f5ae34d Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 18 Feb 2011 11:32:40 +0000 Subject: xen: set max_pfn_mapped to the last pfn mapped Do not set max_pfn_mapped to the end of the initial memory mappings, that also contain pages that don't belong in pfn space (like the mfn list). Set max_pfn_mapped to the last real pfn mapped in the initial memory mappings that is the pfn backing _end. Signed-off-by: Stefano Stabellini Acked-by: Konrad Rzeszutek Wilk LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/xen/mmu.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index a2d78ad35a55..6e27979506c1 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1701,9 +1701,6 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { pte_t pte; - if (pfn > max_pfn_mapped) - max_pfn_mapped = pfn; - if (!pte_none(pte_page[pteidx])) continue; @@ -1761,6 +1758,12 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, pud_t *l3; pmd_t *l2; + /* max_pfn_mapped is the last pfn mapped in the initial memory + * mappings. Considering that on Xen after the kernel mappings we + * have the mappings of some pages that don't exist in pfn space, we + * set max_pfn_mapped to the last real pfn mapped. */ + max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); + /* Zap identity mapping */ init_level4_pgt[0] = __pgd(0); @@ -1865,9 +1868,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, initial_kernel_pmd = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); - max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + - xen_start_info->nr_pt_frames * PAGE_SIZE + - 512*1024); + max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); -- cgit v1.2.3 From d8aa5ec3382e6a545b8f25178d1e0992d4927f19 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 9 Mar 2011 14:22:05 +0000 Subject: xen: update mask_rw_pte after kernel page tables init changes After "x86-64, mm: Put early page table high" already existing kernel page table pages can be mapped using early_ioremap too so we need to update mask_rw_pte to make sure these pages are still mapped RO. The reason why we have to do that is explain by the commit message of fef5ba797991f9335bcfc295942b684f9bf613a1: "Xen requires that all pages containing pagetable entries to be mapped read-only. If pages used for the initial pagetable are already mapped then we can change the mapping to RO. However, if they are initially unmapped, we need to make sure that when they are later mapped, they are also mapped RO. ..SNIP.. the pagetable setup code early_ioremaps the pages to write their entries, so we must make sure that mappings created in the early_ioremap fixmap area are mapped RW. (Those mappings are removed before the pages are presented to Xen as pagetable pages.)" We accomplish all this in mask_rw_pte by mapping RO all the pages mapped using early_ioremap apart from the last one that has been allocated because it is not a page table page yet (it has not been hooked into the page tables yet). Signed-off-by: Stefano Stabellini Acked-by: Konrad Rzeszutek Wilk LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/xen/mmu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 6e27979506c1..21058ad1e5e3 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1488,10 +1488,12 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) /* * If the new pfn is within the range of the newly allocated * kernel pagetable, and it isn't being mapped into an - * early_ioremap fixmap slot, make sure it is RO. + * early_ioremap fixmap slot as a freshly allocated page, make sure + * it is RO. */ - if (!is_early_ioremap_ptep(ptep) && - pfn >= pgt_buf_start && pfn < pgt_buf_end) + if (((!is_early_ioremap_ptep(ptep) && + pfn >= pgt_buf_start && pfn < pgt_buf_end)) || + (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1))) pte = pte_wrprotect(pte); return pte; -- cgit v1.2.3 From b254244d2682fe975630f176c25a4444cc4e088d Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Thu, 24 Mar 2011 08:10:07 -0400 Subject: xen/p2m: Allocate p2m tracking pages on override It is possible to add a p2m override on pages that are currently mapped to INVALID_P2M_ENTRY; in particular, this will happen when using ballooned pages in gntdev. This means that set_phys_to_machine must be used instead of __set_phys_to_machine. Signed-off-by: Daniel De Graaf Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/p2m.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 215a3ce61068..2a44edd606e5 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -671,7 +671,9 @@ int m2p_add_override(unsigned long mfn, struct page *page) page->private = mfn; page->index = pfn_to_mfn(pfn); - __set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); + if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) + return -ENOMEM; + if (!PageHighMem(page)) /* Just zap old mapping for now */ pte_clear(&init_mm, address, ptep); @@ -709,7 +711,7 @@ int m2p_remove_override(struct page *page) spin_lock_irqsave(&m2p_override_lock, flags); list_del(&page->lru); spin_unlock_irqrestore(&m2p_override_lock, flags); - __set_phys_to_machine(pfn, page->index); + set_phys_to_machine(pfn, page->index); if (!PageHighMem(page)) set_pte_at(&init_mm, address, ptep, -- cgit v1.2.3 From b83c6e55ac482f08984504d61382ecf05f0afe32 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 24 Mar 2011 13:34:32 -0700 Subject: xen: fix p2m section mismatches Fix section mismatch warnings: set_phys_range_identity() is called by __init xen_set_identity(), so also mark set_phys_range_identity() as __init. then: __early_alloc_p2m() is called set_phys_range_identity(), so also mark __early_alloc_p2m() as __init. WARNING: arch/x86/built-in.o(.text+0x7856): Section mismatch in reference from the function __early_alloc_p2m() to the function .init.text:extend_brk() The function __early_alloc_p2m() references the function __init extend_brk(). This is often because __early_alloc_p2m lacks a __init annotation or the annotation of extend_brk is wrong. WARNING: arch/x86/built-in.o(.text+0x7967): Section mismatch in reference from the function set_phys_range_identity() to the function .init.text:extend_brk() The function set_phys_range_identity() references the function __init extend_brk(). This is often because set_phys_range_identity lacks a __init annotation or the annotation of extend_brk is wrong. [v2: Per Stephen Hemming recommonedation made __early_alloc_p2m static] Signed-off-by: Randy Dunlap Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/p2m.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 2a44edd606e5..141eb0de8b06 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -497,7 +497,7 @@ static bool alloc_p2m(unsigned long pfn) return true; } -bool __early_alloc_p2m(unsigned long pfn) +static bool __init __early_alloc_p2m(unsigned long pfn) { unsigned topidx, mididx, idx; @@ -530,7 +530,7 @@ bool __early_alloc_p2m(unsigned long pfn) } return idx != 0; } -unsigned long set_phys_range_identity(unsigned long pfn_s, +unsigned long __init set_phys_range_identity(unsigned long pfn_s, unsigned long pfn_e) { unsigned long pfn; -- cgit v1.2.3 From d88885d0923ae27b01dfcec644f94829b1e46bea Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 4 Apr 2011 14:48:20 -0400 Subject: xen/debug: Don't be so verbose with WARN on 1-1 mapping errors. There are valid situations in which this error is not a warning. Mainly when QEMU maps a guest memory and uses the VM_IO flag to set the MFNs. For right now make the WARN be WARN_ONCE. In the future we will: 1). Remove the VM_IO code handling.. 2). .. which will also remove this debug facility. Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index c82df6c9c0f0..a991b57f91fe 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -565,13 +565,13 @@ pte_t xen_make_pte_debug(pteval_t pte) if (io_page && (xen_initial_domain() || addr >= ISA_END_ADDRESS)) { other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT; - WARN(addr != other_addr, + WARN_ONCE(addr != other_addr, "0x%lx is using VM_IO, but it is 0x%lx!\n", (unsigned long)addr, (unsigned long)other_addr); } else { pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP; other_addr = (_pte.pte & PTE_PFN_MASK); - WARN((addr == other_addr) && (!io_page) && (!iomap_set), + WARN_ONCE((addr == other_addr) && (!io_page) && (!iomap_set), "0x%lx is missing VM_IO (and wasn't fixed)!\n", (unsigned long)addr); } -- cgit v1.2.3 From 61f4237d5b005767a76f4f3694e68e6f78f392d9 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 18 Sep 2010 22:25:30 -0700 Subject: xen: just completely disable XSAVE Some (old) versions of Xen just kill the domain if it tries to set any unknown bits in CR4, so we can't reliably probe for OSXSAVE in CR4. Since Xen doesn't support XSAVE for guests at the moment, and no such support is being worked on, there's no downside in just unconditionally masking XSAVE support. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 49dbd78ec3cb..66272a237622 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -250,23 +250,7 @@ static __init void xen_init_cpuid_mask(void) ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ (1 << X86_FEATURE_ACPI)); /* disable ACPI */ - ax = 1; - cx = 0; - xen_cpuid(&ax, &bx, &cx, &dx); - - /* cpuid claims we support xsave; try enabling it to see what happens */ - if (cx & (1 << (X86_FEATURE_XSAVE % 32))) { - unsigned long cr4; - - set_in_cr4(X86_CR4_OSXSAVE); - - cr4 = read_cr4(); - - if ((cr4 & X86_CR4_OSXSAVE) == 0) - cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32)); - - clear_in_cr4(X86_CR4_OSXSAVE); - } + cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32)); /* disable XSAVE */ } static void xen_set_debugreg(int reg, unsigned long val) -- cgit v1.2.3 From 947ccf9c3c30307b774af3666ee74fcd9f47f646 Mon Sep 17 00:00:00 2001 From: Shan Haitao Date: Tue, 9 Nov 2010 11:43:36 -0800 Subject: xen: Allow PV-OPS kernel to detect whether XSAVE is supported Xen fails to mask XSAVE from the cpuid feature, despite not historically supporting guest use of XSAVE. However, now that XSAVE support has been added to Xen, we need to reliably detect its presence. The most reliable way to do this is to look at the OSXSAVE feature in cpuid which is set iff the OS (Xen, in this case), has set CR4.OSXSAVE. [ Cleaned up conditional a bit. - Jeremy ] Signed-off-by: Shan Haitao Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 66272a237622..e3c6a06cf725 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -238,6 +238,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, static __init void xen_init_cpuid_mask(void) { unsigned int ax, bx, cx, dx; + unsigned int xsave_mask; cpuid_leaf1_edx_mask = ~((1 << X86_FEATURE_MCE) | /* disable MCE */ @@ -249,8 +250,16 @@ static __init void xen_init_cpuid_mask(void) cpuid_leaf1_edx_mask &= ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ (1 << X86_FEATURE_ACPI)); /* disable ACPI */ + ax = 1; + xen_cpuid(&ax, &bx, &cx, &dx); - cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32)); /* disable XSAVE */ + xsave_mask = + (1 << (X86_FEATURE_XSAVE % 32)) | + (1 << (X86_FEATURE_OSXSAVE % 32)); + + /* Xen will set CR4.OSXSAVE if supported and not disabled by force */ + if ((cx & xsave_mask) != xsave_mask) + cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */ } static void xen_set_debugreg(int reg, unsigned long val) -- cgit v1.2.3 From d419e4c0f7584ffc5c72d9aeeaac485cc756ebcf Mon Sep 17 00:00:00 2001 From: Shriram Rajagopalan Date: Mon, 11 Apr 2011 22:54:48 +0200 Subject: fix XEN_SAVE_RESTORE Kconfig dependencies Make XEN_SAVE_RESTORE select HIBERNATE_CALLBACKS. Remove XEN_SAVE_RESTORE dependency from PM_SLEEP. Signed-off-by: Shriram Rajagopalan Acked-by: Ian Campbell Signed-off-by: Rafael J. Wysocki --- arch/x86/xen/Kconfig | 1 + kernel/power/Kconfig | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 1c7121ba18ff..5cc821cb2e09 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -39,6 +39,7 @@ config XEN_MAX_DOMAIN_MEMORY config XEN_SAVE_RESTORE bool depends on XEN + select HIBERNATE_CALLBACKS default y config XEN_DEBUG_FS diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 049791468d37..6de9a8fc3417 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -89,7 +89,7 @@ config PM_STD_PARTITION config PM_SLEEP def_bool y - depends on SUSPEND || HIBERNATE_CALLBACKS || XEN_SAVE_RESTORE + depends on SUSPEND || HIBERNATE_CALLBACKS config PM_SLEEP_SMP def_bool y -- cgit v1.2.3 From 24bdb0b62cc82120924762ae6bc85afc8c3f2b26 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Tue, 12 Apr 2011 12:19:52 +0100 Subject: xen: do not create the extra e820 region at an addr lower than 4G Do not add the extra e820 region at a physical address lower than 4G because it breaks e820_end_of_low_ram_pfn(). It is OK for us to move the xen_extra_mem_start up and down because this is the index of the memory that can be ballooned in/out - it is memory not available to the kernel during bootup. Signed-off-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index fa0269a99377..90bac0aac3a5 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -227,7 +227,7 @@ char * __init xen_memory_setup(void) memcpy(map_raw, map, sizeof(map)); e820.nr_map = 0; - xen_extra_mem_start = mem_end; + xen_extra_mem_start = max((1ULL << 32), mem_end); for (i = 0; i < memmap.nr_entries; i++) { unsigned long long end; -- cgit v1.2.3 From ee176455e28469e2420032aab3db11ac2ae3eaa8 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Tue, 19 Apr 2011 14:47:31 +0100 Subject: xen: mask_rw_pte: do not apply the early_ioremap checks on x86_32 The two "is_early_ioremap_ptep" checks in mask_rw_pte are only used on x86_64, in fact early_ioremap is not used at all to setup the initial pagetable on x86_32. Moreover on x86_32 the two checks are wrong because the range pgt_buf_start..pgt_buf_end initially should be mapped RW because the pages in the range are not pagetable pages yet and haven't been cleared yet. Afterwards considering the pgt_buf_start..pgt_buf_end is part of the initial mapping, xen_alloc_pte is capable of turning the ptes RO when they become pagetable pages. Fix the issue and improve the readability of the code providing two different implementation of mask_rw_pte for x86_32 and x86_64. Signed-off-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index a991b57f91fe..aef7af92b28b 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1473,16 +1473,20 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) #endif } +#ifdef CONFIG_X86_32 static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) { - unsigned long pfn = pte_pfn(pte); - -#ifdef CONFIG_X86_32 /* If there's an existing pte, then don't allow _PAGE_RW to be set */ if (pte_val_ma(*ptep) & _PAGE_PRESENT) pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & pte_val_ma(pte)); -#endif + + return pte; +} +#else /* CONFIG_X86_64 */ +static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) +{ + unsigned long pfn = pte_pfn(pte); /* * If the new pfn is within the range of the newly allocated @@ -1497,6 +1501,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) return pte; } +#endif /* CONFIG_X86_64 */ /* Init-time set_pte while constructing initial pagetables, which doesn't allow RO pagetable pages to be remapped RW */ -- cgit v1.2.3