From 02f037d641dc6672be5cfe7875a48ab99b95b154 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 23 Mar 2016 15:41:57 -0600 Subject: x86/mm/pat: Add support of non-default PAT MSR setting In preparation for fixing a regression caused by: 9cd25aac1f44 ("x86/mm/pat: Emulate PAT when it is disabled")' ... PAT needs to support a case that PAT MSR is initialized with a non-default value. When pat_init() is called and PAT is disabled, it initializes the PAT table with the BIOS default value. Xen, however, sets PAT MSR with a non-default value to enable WC. This causes inconsistency between the PAT table and PAT MSR when PAT is set to disable on Xen. Change pat_init() to handle the PAT disable cases properly. Add init_cache_modes() to handle two cases when PAT is set to disable. 1. CPU supports PAT: Set PAT table to be consistent with PAT MSR. 2. CPU does not support PAT: Set PAT table to be consistent with PWT and PCD bits in a PTE. Note, __init_cache_modes(), renamed from pat_init_cache_modes(), will be changed to a static function in a later patch. Signed-off-by: Toshi Kani Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Juergen Gross Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Toshi Kani Cc: elliott@hpe.com Cc: konrad.wilk@oracle.com Cc: paul.gortmaker@windriver.com Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1458769323-24491-2-git-send-email-toshi.kani@hpe.com Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 73 ++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 20 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index faec01e7a17d..b4663885308f 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -181,7 +181,7 @@ static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg) * configuration. * Using lower indices is preferred, so we start with highest index. */ -void pat_init_cache_modes(u64 pat) +void __init_cache_modes(u64 pat) { enum page_cache_mode cache; char pat_msg[33]; @@ -207,9 +207,6 @@ static void pat_bsp_init(u64 pat) return; } - if (!pat_enabled()) - goto done; - rdmsrl(MSR_IA32_CR_PAT, tmp_pat); if (!tmp_pat) { pat_disable("PAT MSR is 0, disabled."); @@ -218,15 +215,11 @@ static void pat_bsp_init(u64 pat) wrmsrl(MSR_IA32_CR_PAT, pat); -done: - pat_init_cache_modes(pat); + __init_cache_modes(pat); } static void pat_ap_init(u64 pat) { - if (!pat_enabled()) - return; - if (!cpu_has_pat) { /* * If this happens we are on a secondary CPU, but switched to @@ -238,18 +231,32 @@ static void pat_ap_init(u64 pat) wrmsrl(MSR_IA32_CR_PAT, pat); } -void pat_init(void) +static void init_cache_modes(void) { - u64 pat; - struct cpuinfo_x86 *c = &boot_cpu_data; + u64 pat = 0; + static int init_cm_done; - if (!pat_enabled()) { + if (init_cm_done) + return; + + if (boot_cpu_has(X86_FEATURE_PAT)) { + /* + * CPU supports PAT. Set PAT table to be consistent with + * PAT MSR. This case supports "nopat" boot option, and + * virtual machine environments which support PAT without + * MTRRs. In specific, Xen has unique setup to PAT MSR. + * + * If PAT MSR returns 0, it is considered invalid and emulates + * as No PAT. + */ + rdmsrl(MSR_IA32_CR_PAT, pat); + } + + if (!pat) { /* * No PAT. Emulate the PAT table that corresponds to the two - * cache bits, PWT (Write Through) and PCD (Cache Disable). This - * setup is the same as the BIOS default setup when the system - * has PAT but the "nopat" boot option has been specified. This - * emulated PAT table is used when MSR_IA32_CR_PAT returns 0. + * cache bits, PWT (Write Through) and PCD (Cache Disable). + * This setup is also the same as the BIOS default setup. * * PTE encoding: * @@ -266,10 +273,36 @@ void pat_init(void) */ pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) | PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC); + } + + __init_cache_modes(pat); + + init_cm_done = 1; +} + +/** + * pat_init - Initialize PAT MSR and PAT table + * + * This function initializes PAT MSR and PAT table with an OS-defined value + * to enable additional cache attributes, WC and WT. + * + * This function must be called on all CPUs using the specific sequence of + * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this + * procedure for PAT. + */ +void pat_init(void) +{ + u64 pat; + struct cpuinfo_x86 *c = &boot_cpu_data; + + if (!pat_enabled()) { + init_cache_modes(); + return; + } - } else if ((c->x86_vendor == X86_VENDOR_INTEL) && - (((c->x86 == 0x6) && (c->x86_model <= 0xd)) || - ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) { + if ((c->x86_vendor == X86_VENDOR_INTEL) && + (((c->x86 == 0x6) && (c->x86_model <= 0xd)) || + ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) { /* * PAT support with the lower four entries. Intel Pentium 2, * 3, M, and 4 are affected by PAT errata, which makes the -- cgit v1.2.3 From 224bb1e5d67ba0f2872c98002d6a6f991ac6fd4a Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 23 Mar 2016 15:41:58 -0600 Subject: x86/mm/pat: Add pat_disable() interface In preparation for fixing a regression caused by: 9cd25aac1f44 ("x86/mm/pat: Emulate PAT when it is disabled") ... PAT needs to provide an interface that prevents the OS from initializing the PAT MSR. PAT MSR initialization must be done on all CPUs using the specific sequence of operations defined in the Intel SDM. This requires MTRRs to be enabled since pat_init() is called as part of MTRR init from mtrr_rendezvous_handler(). Make pat_disable() as the interface that prevents the OS from initializing the PAT MSR. MTRR will call this interface when it cannot provide the SDM-defined sequence to initialize PAT. This also assures that pat_disable() called from pat_bsp_init() will set the PAT table properly when CPU does not support PAT. Signed-off-by: Toshi Kani Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Juergen Gross Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Robert Elliott Cc: Toshi Kani Cc: konrad.wilk@oracle.com Cc: paul.gortmaker@windriver.com Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1458769323-24491-3-git-send-email-toshi.kani@hpe.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pat.h | 1 + arch/x86/mm/pat.c | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index 97ea55bc2b54..0ad356c066ef 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -5,6 +5,7 @@ #include bool pat_enabled(void); +void pat_disable(const char *reason); extern void pat_init(void); void __init_cache_modes(u64); diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index b4663885308f..1cc1d37f1de7 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -40,11 +40,22 @@ static bool boot_cpu_done; static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT); +static void init_cache_modes(void); -static inline void pat_disable(const char *reason) +void pat_disable(const char *reason) { + if (!__pat_enabled) + return; + + if (boot_cpu_done) { + WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n"); + return; + } + __pat_enabled = 0; pr_info("x86/PAT: %s\n", reason); + + init_cache_modes(); } static int __init nopat(char *str) -- cgit v1.2.3 From d63dcf49cf5ae5605f4d14229e3888e104f294b1 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 23 Mar 2016 15:41:59 -0600 Subject: x86/mm/pat: Replace cpu_has_pat with boot_cpu_has() Borislav Petkov suggested: > Please use on init paths boot_cpu_has(X86_FEATURE_PAT) and on fast > paths static_cpu_has(X86_FEATURE_PAT). No more of that cpu_has_XXX > ugliness. Replace the use of cpu_has_pat on init paths with boot_cpu_has(). Suggested-by: Borislav Petkov Signed-off-by: Toshi Kani Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Juergen Gross Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Robert Elliott Cc: Toshi Kani Cc: konrad.wilk@oracle.com Cc: paul.gortmaker@windriver.com Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1458769323-24491-4-git-send-email-toshi.kani@hpe.com Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 1cc1d37f1de7..59ec038b9833 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -213,7 +213,7 @@ static void pat_bsp_init(u64 pat) { u64 tmp_pat; - if (!cpu_has_pat) { + if (!boot_cpu_has(X86_FEATURE_PAT)) { pat_disable("PAT not supported by CPU."); return; } @@ -231,7 +231,7 @@ static void pat_bsp_init(u64 pat) static void pat_ap_init(u64 pat) { - if (!cpu_has_pat) { + if (!boot_cpu_has(X86_FEATURE_PAT)) { /* * If this happens we are on a secondary CPU, but switched to * PAT on the boot CPU. We have no way to undo PAT. -- cgit v1.2.3 From 88ba281108ed0c25c9d292b48bd3f272fcb90dd0 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 23 Mar 2016 15:42:02 -0600 Subject: x86/xen, pat: Remove PAT table init code from Xen Xen supports PAT without MTRRs for its guests. In order to enable WC attribute, it was necessary for xen_start_kernel() to call pat_init_cache_modes() to update PAT table before starting guest kernel. Now that the kernel initializes PAT table to the BIOS handoff state when MTRR is disabled, this Xen-specific PAT init code is no longer necessary. Delete it from xen_start_kernel(). Also change __init_cache_modes() to a static function since PAT table should not be tweaked by other modules. Signed-off-by: Toshi Kani Reviewed-by: Thomas Gleixner Acked-by: Juergen Gross Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Toshi Kani Cc: elliott@hpe.com Cc: paul.gortmaker@windriver.com Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1458769323-24491-7-git-send-email-toshi.kani@hpe.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pat.h | 1 - arch/x86/mm/pat.c | 2 +- arch/x86/xen/enlighten.c | 9 --------- 3 files changed, 1 insertion(+), 11 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index 0ad356c066ef..0b1ff4c1c14e 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -7,7 +7,6 @@ bool pat_enabled(void); void pat_disable(const char *reason); extern void pat_init(void); -void __init_cache_modes(u64); extern int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 59ec038b9833..c4c3ddcc9069 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -192,7 +192,7 @@ static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg) * configuration. * Using lower indices is preferred, so we start with highest index. */ -void __init_cache_modes(u64 pat) +static void __init_cache_modes(u64 pat) { enum page_cache_mode cache; char pat_msg[33]; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c469a7c7c309..d8cca75e3b3e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -75,7 +75,6 @@ #include #include #include -#include #include #ifdef CONFIG_ACPI @@ -1511,7 +1510,6 @@ asmlinkage __visible void __init xen_start_kernel(void) { struct physdev_set_iopl set_iopl; unsigned long initrd_start = 0; - u64 pat; int rc; if (!xen_start_info) @@ -1618,13 +1616,6 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_start_info->nr_pages); xen_reserve_special_pages(); - /* - * Modify the cache mode translation tables to match Xen's PAT - * configuration. - */ - rdmsrl(MSR_IA32_CR_PAT, pat); - __init_cache_modes(pat); - /* keep using Xen gdt for now; no urgent need to change it */ #ifdef CONFIG_X86_32 -- cgit v1.2.3 From b8291adc191abec2095f03a130ac91506d345cae Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 29 Mar 2016 17:41:58 +0200 Subject: x86/cpufeature: Remove cpu_has_gbpages Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1459266123-21878-6-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 1 - arch/x86/kvm/mmu.c | 3 ++- arch/x86/mm/hugetlbpage.c | 4 ++-- arch/x86/mm/init.c | 2 +- arch/x86/mm/ioremap.c | 2 +- arch/x86/mm/pageattr.c | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 3da7aec9fca1..693b4aa43908 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -130,7 +130,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX) #define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2) #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLUSH) -#define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) #define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 70e95d097ef1..bc1e0b65909e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3836,7 +3836,8 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, boot_cpu_data.x86_phys_bits, context->shadow_root_level, false, - cpu_has_gbpages, true, true); + boot_cpu_has(X86_FEATURE_GBPAGES), + true, true); else __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, boot_cpu_data.x86_phys_bits, diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 740d7ac03a55..14a95054d4e0 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -162,7 +162,7 @@ static __init int setup_hugepagesz(char *opt) unsigned long ps = memparse(opt, &opt); if (ps == PMD_SIZE) { hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); - } else if (ps == PUD_SIZE && cpu_has_gbpages) { + } else if (ps == PUD_SIZE && boot_cpu_has(X86_FEATURE_GBPAGES)) { hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); } else { printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n", @@ -177,7 +177,7 @@ __setup("hugepagesz=", setup_hugepagesz); static __init int gigantic_pages_init(void) { /* With compaction or CMA we can allocate gigantic pages at runtime */ - if (cpu_has_gbpages && !size_to_hstate(1UL << PUD_SHIFT)) + if (boot_cpu_has(X86_FEATURE_GBPAGES) && !size_to_hstate(1UL << PUD_SHIFT)) hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); return 0; } diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 9d56f271d519..14377e98f279 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -173,7 +173,7 @@ static void __init probe_page_size_mask(void) __supported_pte_mask &= ~_PAGE_GLOBAL; /* Enable 1 GB linear kernel mappings if available: */ - if (direct_gbpages && cpu_has_gbpages) { + if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) { printk(KERN_INFO "Using GB pages for direct mapping\n"); page_size_mask |= 1 << PG_LEVEL_1G; } else { diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 0d8d53d1f5cc..5a116ace9cbb 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -378,7 +378,7 @@ EXPORT_SYMBOL(iounmap); int __init arch_ioremap_pud_supported(void) { #ifdef CONFIG_X86_64 - return cpu_has_gbpages; + return boot_cpu_has(X86_FEATURE_GBPAGES); #else return 0; #endif diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 01be9ec3bf79..fb20c2ee0092 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1055,7 +1055,7 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, /* * Map everything starting from the Gb boundary, possibly with 1G pages */ - while (cpu_has_gbpages && end - start >= PUD_SIZE) { + while (boot_cpu_has(X86_FEATURE_GBPAGES) && end - start >= PUD_SIZE) { set_pud(pud, __pud(cpa->pfn << PAGE_SHIFT | _PAGE_PSE | massage_pgprot(pud_pgprot))); -- cgit v1.2.3 From 906bf7fda2c9cf5c1762ec607943ed54b6c5b203 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 29 Mar 2016 17:41:59 +0200 Subject: x86/cpufeature: Remove cpu_has_clflush Use the fast variant in the DRM code. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dri-devel@lists.freedesktop.org Cc: intel-gfx@lists.freedesktop.org Link: http://lkml.kernel.org/r/1459266123-21878-7-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 1 - arch/x86/kernel/cpu/intel.c | 2 +- arch/x86/kernel/tce_64.c | 2 +- arch/x86/mm/pageattr.c | 2 +- drivers/gpu/drm/drm_cache.c | 6 +++--- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- 6 files changed, 7 insertions(+), 8 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 693b4aa43908..a75154232db5 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -129,7 +129,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_aes boot_cpu_has(X86_FEATURE_AES) #define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX) #define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2) -#define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLUSH) #define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 1f7fdb91a818..628a9f853b84 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -468,7 +468,7 @@ static void init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_PEBS); } - if (c->x86 == 6 && cpu_has_clflush && + if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_CLFLUSH) && (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47)) set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR); diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c index ab40954e113e..f386bad0984e 100644 --- a/arch/x86/kernel/tce_64.c +++ b/arch/x86/kernel/tce_64.c @@ -40,7 +40,7 @@ static inline void flush_tce(void* tceaddr) { /* a single tce can't cross a cache line */ - if (cpu_has_clflush) + if (boot_cpu_has(X86_FEATURE_CLFLUSH)) clflush(tceaddr); else wbinvd(); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index fb20c2ee0092..bbf462ff9745 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1460,7 +1460,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, * error case we fall back to cpa_flush_all (which uses * WBINVD): */ - if (!ret && cpu_has_clflush) { + if (!ret && boot_cpu_has(X86_FEATURE_CLFLUSH)) { if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { cpa_flush_array(addr, numpages, cache, cpa.flags, pages); diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c index 6743ff7dccfa..059f7c39c582 100644 --- a/drivers/gpu/drm/drm_cache.c +++ b/drivers/gpu/drm/drm_cache.c @@ -72,7 +72,7 @@ drm_clflush_pages(struct page *pages[], unsigned long num_pages) { #if defined(CONFIG_X86) - if (cpu_has_clflush) { + if (static_cpu_has(X86_FEATURE_CLFLUSH)) { drm_cache_flush_clflush(pages, num_pages); return; } @@ -105,7 +105,7 @@ void drm_clflush_sg(struct sg_table *st) { #if defined(CONFIG_X86) - if (cpu_has_clflush) { + if (static_cpu_has(X86_FEATURE_CLFLUSH)) { struct sg_page_iter sg_iter; mb(); @@ -129,7 +129,7 @@ void drm_clflush_virt_range(void *addr, unsigned long length) { #if defined(CONFIG_X86) - if (cpu_has_clflush) { + if (static_cpu_has(X86_FEATURE_CLFLUSH)) { const int size = boot_cpu_data.x86_clflush_size; void *end = addr + length; addr = (void *)(((unsigned long)addr) & -size); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1328bc5021b4..b845f468dd74 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -488,7 +488,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, ret = relocate_entry_cpu(obj, reloc, target_offset); else if (obj->map_and_fenceable) ret = relocate_entry_gtt(obj, reloc, target_offset); - else if (cpu_has_clflush) + else if (static_cpu_has(X86_FEATURE_CLFLUSH)) ret = relocate_entry_clflush(obj, reloc, target_offset); else { WARN_ONCE(1, "Impossible case in relocation handling\n"); -- cgit v1.2.3 From c109bf95992b391bb40bc37c5d309d13fead99b5 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 29 Mar 2016 17:42:02 +0200 Subject: x86/cpufeature: Remove cpu_has_pge Use static_cpu_has() in __flush_tlb_all() due to the time-sensitivity of this one. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1459266123-21878-10-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 1 - arch/x86/include/asm/tlbflush.h | 2 +- arch/x86/kernel/cpu/intel.c | 6 +++--- arch/x86/kernel/cpu/mtrr/cyrix.c | 4 ++-- arch/x86/kernel/cpu/mtrr/generic.c | 4 ++-- arch/x86/mm/init.c | 2 +- arch/x86/xen/enlighten.c | 2 +- drivers/lguest/x86/core.c | 2 +- 8 files changed, 11 insertions(+), 12 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 5e02bc2e8444..f97b53417d44 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -121,7 +121,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) #define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE) #define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC) -#define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE) #define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) #define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR) #define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index c24b4224d439..3628e6c5ebf4 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -181,7 +181,7 @@ static inline void __native_flush_tlb_single(unsigned long addr) static inline void __flush_tlb_all(void) { - if (cpu_has_pge) + if (static_cpu_has(X86_FEATURE_PGE)) __flush_tlb_global(); else __flush_tlb(); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 1dba36fe73e5..f71a34944b56 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -152,9 +152,9 @@ static void early_init_intel(struct cpuinfo_x86 *c) * the TLB when any changes are made to any of the page table entries. * The operating system must reload CR3 to cause the TLB to be flushed" * - * As a result cpu_has_pge() in arch/x86/include/asm/tlbflush.h should - * be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE - * to be modified + * As a result, boot_cpu_has(X86_FEATURE_PGE) in arch/x86/include/asm/tlbflush.h + * should be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE + * to be modified. */ if (c->x86 == 5 && c->x86_model == 9) { pr_info("Disabling PGE capability bit\n"); diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index f8c81ba0b465..b1086f79e57e 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c @@ -137,7 +137,7 @@ static void prepare_set(void) u32 cr0; /* Save value of CR4 and clear Page Global Enable (bit 7) */ - if (cpu_has_pge) { + if (boot_cpu_has(X86_FEATURE_PGE)) { cr4 = __read_cr4(); __write_cr4(cr4 & ~X86_CR4_PGE); } @@ -170,7 +170,7 @@ static void post_set(void) write_cr0(read_cr0() & ~X86_CR0_CD); /* Restore value of CR4 */ - if (cpu_has_pge) + if (boot_cpu_has(X86_FEATURE_PGE)) __write_cr4(cr4); } diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 19f57360dfd2..f1bed301bdb2 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -741,7 +741,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) wbinvd(); /* Save value of CR4 and clear Page Global Enable (bit 7) */ - if (cpu_has_pge) { + if (boot_cpu_has(X86_FEATURE_PGE)) { cr4 = __read_cr4(); __write_cr4(cr4 & ~X86_CR4_PGE); } @@ -771,7 +771,7 @@ static void post_set(void) __releases(set_atomicity_lock) write_cr0(read_cr0() & ~X86_CR0_CD); /* Restore value of CR4 */ - if (cpu_has_pge) + if (boot_cpu_has(X86_FEATURE_PGE)) __write_cr4(cr4); raw_spin_unlock(&set_atomicity_lock); } diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 14377e98f279..05ff46a9c261 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -166,7 +166,7 @@ static void __init probe_page_size_mask(void) cr4_set_bits_and_update_boot(X86_CR4_PSE); /* Enable PGE if available */ - if (cpu_has_pge) { + if (boot_cpu_has(X86_FEATURE_PGE)) { cr4_set_bits_and_update_boot(X86_CR4_PGE); __supported_pte_mask |= _PAGE_GLOBAL; } else diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 880862c7d9dd..055f48ddb03c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1472,7 +1472,7 @@ static void xen_pvh_set_cr_flags(int cpu) if (cpu_has_pse) cr4_set_bits_and_update_boot(X86_CR4_PSE); - if (cpu_has_pge) + if (boot_cpu_has(X86_FEATURE_PGE)) cr4_set_bits_and_update_boot(X86_CR4_PGE); } diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 6a4cd771a2be..65f22debf3c6 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -599,7 +599,7 @@ void __init lguest_arch_host_init(void) * doing this. */ get_online_cpus(); - if (cpu_has_pge) { /* We have a broader idea of "global". */ + if (boot_cpu_has(X86_FEATURE_PGE)) { /* We have a broader idea of "global". */ /* Remember that this was originally set (for cleanup). */ cpu_had_pge = 1; /* -- cgit v1.2.3 From 16bf92261b1b6cb1a1c0671b445a2fcb5a1ecc96 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 29 Mar 2016 17:42:03 +0200 Subject: x86/cpufeature: Remove cpu_has_pse Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1459266123-21878-11-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 1 - arch/x86/include/asm/pgtable.h | 2 +- arch/x86/mm/init.c | 4 ++-- arch/x86/mm/init_32.c | 2 +- arch/x86/mm/init_64.c | 4 ++-- arch/x86/mm/ioremap.c | 2 +- arch/x86/power/hibernate_32.c | 2 +- arch/x86/xen/enlighten.c | 2 +- 8 files changed, 9 insertions(+), 10 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index f97b53417d44..97e5f13ea471 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -119,7 +119,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; } while (0) #define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) -#define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE) #define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC) #define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) #define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 97f3242e133c..f86491a7bc9d 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -183,7 +183,7 @@ static inline int pmd_trans_huge(pmd_t pmd) static inline int has_transparent_hugepage(void) { - return cpu_has_pse; + return boot_cpu_has(X86_FEATURE_PSE); } #ifdef __HAVE_ARCH_PTE_DEVMAP diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 05ff46a9c261..372aad2b3291 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -157,12 +157,12 @@ static void __init probe_page_size_mask(void) * This will simplify cpa(), which otherwise needs to support splitting * large pages into small in interrupt context, etc. */ - if (cpu_has_pse && !debug_pagealloc_enabled()) + if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled()) page_size_mask |= 1 << PG_LEVEL_2M; #endif /* Enable PSE if available */ - if (cpu_has_pse) + if (boot_cpu_has(X86_FEATURE_PSE)) cr4_set_bits_and_update_boot(X86_CR4_PSE); /* Enable PGE if available */ diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index bd7a9b9e2e14..85af914e3d27 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -284,7 +284,7 @@ kernel_physical_mapping_init(unsigned long start, */ mapping_iter = 1; - if (!cpu_has_pse) + if (!boot_cpu_has(X86_FEATURE_PSE)) use_pse = 0; repeat: diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 214afda97911..89d97477c1d9 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1295,7 +1295,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) struct vmem_altmap *altmap = to_vmem_altmap(start); int err; - if (cpu_has_pse) + if (boot_cpu_has(X86_FEATURE_PSE)) err = vmemmap_populate_hugepages(start, end, node, altmap); else if (altmap) { pr_err_once("%s: no cpu support for altmap allocations\n", @@ -1338,7 +1338,7 @@ void register_page_bootmem_memmap(unsigned long section_nr, } get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO); - if (!cpu_has_pse) { + if (!boot_cpu_has(X86_FEATURE_PSE)) { next = (addr + PAGE_SIZE) & PAGE_MASK; pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 5a116ace9cbb..f0894910bdd7 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -386,7 +386,7 @@ int __init arch_ioremap_pud_supported(void) int __init arch_ioremap_pmd_supported(void) { - return cpu_has_pse; + return boot_cpu_has(X86_FEATURE_PSE); } /* diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c index 291226b952a9..9f14bd34581d 100644 --- a/arch/x86/power/hibernate_32.c +++ b/arch/x86/power/hibernate_32.c @@ -106,7 +106,7 @@ static int resume_physical_mapping_init(pgd_t *pgd_base) * normal page tables. * NOTE: We can mark everything as executable here */ - if (cpu_has_pse) { + if (boot_cpu_has(X86_FEATURE_PSE)) { set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); pfn += PTRS_PER_PTE; } else { diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 055f48ddb03c..ff2a2e6ef7af 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1469,7 +1469,7 @@ static void xen_pvh_set_cr_flags(int cpu) * For BSP, PSE PGE are set in probe_page_size_mask(), for APs * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu__init_cpu(). */ - if (cpu_has_pse) + if (boot_cpu_has(X86_FEATURE_PSE)) cr4_set_bits_and_update_boot(X86_CR4_PSE); if (boot_cpu_has(X86_FEATURE_PGE)) -- cgit v1.2.3 From 1886297ce0c8d563a08c8a8c4c0b97743e06cd37 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Mon, 11 Apr 2016 13:36:00 -0600 Subject: x86/mm/pat: Fix BUG_ON() in mmap_mem() on QEMU/i386 The following BUG_ON() crash was reported on QEMU/i386: kernel BUG at arch/x86/mm/physaddr.c:79! Call Trace: phys_mem_access_prot_allowed mmap_mem ? mmap_region mmap_region do_mmap vm_mmap_pgoff SyS_mmap_pgoff do_int80_syscall_32 entry_INT80_32 after commit: edfe63ec97ed ("x86/mtrr: Fix Xorg crashes in Qemu sessions") PAT is now set to disabled state when MTRRs are disabled. Thus, reactivating the __pa(high_memory) check in phys_mem_access_prot_allowed(). When CONFIG_DEBUG_VIRTUAL is set, __pa() calls __phys_addr(), which in turn calls slow_virt_to_phys() for 'high_memory'. Because 'high_memory' is set to (the max direct mapped virt addr + 1), it is not a valid virtual address. Hence, slow_virt_to_phys() returns 0 and hit the BUG_ON. Using __pa_nodebug() instead of __pa() will fix this BUG_ON. However, this code block, originally written for Pentiums and earlier, is no longer adequate since a 32-bit Xen guest has MTRRs disabled and supports ZONE_HIGHMEM. In this setup, this code sets UC attribute for accessing RAM in high memory range. Delete this code block as it has been unused for a long time. Reported-by: kernel test robot Reviewed-by: Borislav Petkov Signed-off-by: Toshi Kani Cc: Andrew Morton Cc: David Vrabel Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1460403360-25441-1-git-send-email-toshi.kani@hpe.com Link: https://lkml.org/lkml/2016/4/1/608 Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index c4c3ddcc9069..fb0604f11eec 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -778,25 +778,6 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, if (file->f_flags & O_DSYNC) pcm = _PAGE_CACHE_MODE_UC_MINUS; -#ifdef CONFIG_X86_32 - /* - * On the PPro and successors, the MTRRs are used to set - * memory types for physical addresses outside main memory, - * so blindly setting UC or PWT on those pages is wrong. - * For Pentiums and earlier, the surround logic should disable - * caching for the high addresses through the KEN pin, but - * we maintain the tradition of paranoia in this code. - */ - if (!pat_enabled() && - !(boot_cpu_has(X86_FEATURE_MTRR) || - boot_cpu_has(X86_FEATURE_K6_MTRR) || - boot_cpu_has(X86_FEATURE_CYRIX_ARR) || - boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) && - (pfn << PAGE_SHIFT) >= __pa(high_memory)) { - pcm = _PAGE_CACHE_MODE_UC; - } -#endif - *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) | cachemode2protval(pcm)); return 1; -- cgit v1.2.3 From 7bbcdb1ca4d2fd69094ee89c18601b396531ca9f Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 2 Apr 2016 07:01:32 -0700 Subject: x86/head: Pass a real pt_regs and trapnr to early_fixup_exception() early_fixup_exception() is limited by the fact that it doesn't have a real struct pt_regs. Change both the 32-bit and 64-bit asm and the C code to pass and accept a real pt_regs. Tested-by: Boris Ostrovsky Signed-off-by: Andy Lutomirski Acked-by: Linus Torvalds Cc: Andrew Morton Cc: Arjan van de Ven Cc: Borislav Petkov Cc: KVM list Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: xen-devel Link: http://lkml.kernel.org/r/e3fb680fcfd5e23e38237e8328b64a25cc121d37.1459605520.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess.h | 2 +- arch/x86/kernel/head_32.S | 74 +++++++++++++++++++++++++++++------------- arch/x86/kernel/head_64.S | 68 ++++++++++++++++++++------------------ arch/x86/mm/extable.c | 6 ++-- 4 files changed, 92 insertions(+), 58 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index a969ae607be8..b6fb311b7d75 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -110,7 +110,7 @@ struct exception_table_entry { extern int fixup_exception(struct pt_regs *regs, int trapnr); extern bool ex_has_fault_handler(unsigned long ip); -extern int early_fixup_exception(unsigned long *ip); +extern int early_fixup_exception(struct pt_regs *regs, int trapnr); /* * These are the main single-value transfer routines. They automatically diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 54cdbd2003fe..0904536cd45c 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -568,29 +568,64 @@ early_idt_handler_common: je hlt_loop incl %ss:early_recursion_flag - push %eax # 16(%esp) - push %ecx # 12(%esp) - push %edx # 8(%esp) - push %ds # 4(%esp) - push %es # 0(%esp) - movl $(__KERNEL_DS),%eax - movl %eax,%ds - movl %eax,%es + /* The vector number is in pt_regs->gs */ - cmpl $(__KERNEL_CS),32(%esp) + cld + pushl %fs /* pt_regs->fs */ + movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */ + pushl %es /* pt_regs->es */ + movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */ + pushl %ds /* pt_regs->ds */ + movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */ + pushl %eax /* pt_regs->ax */ + pushl %ebp /* pt_regs->bp */ + pushl %edi /* pt_regs->di */ + pushl %esi /* pt_regs->si */ + pushl %edx /* pt_regs->dx */ + pushl %ecx /* pt_regs->cx */ + pushl %ebx /* pt_regs->bx */ + + /* Fix up DS and ES */ + movl $(__KERNEL_DS), %ecx + movl %ecx, %ds + movl %ecx, %es + + /* Load the vector number into EDX */ + movl PT_GS(%esp), %edx + + /* Load GS into pt_regs->gs and clear high bits */ + movw %gs, PT_GS(%esp) + movw $0, PT_GS+2(%esp) + + cmpl $(__KERNEL_CS),PT_CS(%esp) jne 10f - leal 28(%esp),%eax # Pointer to %eip - call early_fixup_exception - andl %eax,%eax - jnz ex_entry /* found an exception entry */ + movl %esp, %eax /* args are pt_regs (EAX), trapnr (EDX) */ + call early_fixup_exception + andl %eax,%eax + jz 10f /* Exception wasn't fixed up */ + + popl %ebx /* pt_regs->bx */ + popl %ecx /* pt_regs->cx */ + popl %edx /* pt_regs->dx */ + popl %esi /* pt_regs->si */ + popl %edi /* pt_regs->di */ + popl %ebp /* pt_regs->bp */ + popl %eax /* pt_regs->ax */ + popl %ds /* pt_regs->ds */ + popl %es /* pt_regs->es */ + popl %fs /* pt_regs->fs */ + popl %gs /* pt_regs->gs */ + decl %ss:early_recursion_flag + addl $4, %esp /* pop pt_regs->orig_ax */ + iret 10: #ifdef CONFIG_PRINTK xorl %eax,%eax - movw %ax,2(%esp) /* clean up the segment values on some cpus */ - movw %ax,6(%esp) - movw %ax,34(%esp) + movw %ax,PT_FS+2(%esp) /* clean up the segment values on some cpus */ + movw %ax,PT_DS+2(%esp) + movw %ax,PT_ES+2(%esp) leal 40(%esp),%eax pushl %eax /* %esp before the exception */ pushl %ebx @@ -608,13 +643,6 @@ hlt_loop: hlt jmp hlt_loop -ex_entry: - pop %es - pop %ds - pop %edx - pop %ecx - pop %eax - decl %ss:early_recursion_flag .Lis_nmi: addl $8,%esp /* drop vector number and error code */ iret diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 22fbf9df61bb..9e8636d2cedd 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -20,6 +20,7 @@ #include #include #include +#include "../entry/calling.h" #ifdef CONFIG_PARAVIRT #include @@ -357,39 +358,52 @@ early_idt_handler_common: jz 1f incl early_recursion_flag(%rip) - pushq %rax # 64(%rsp) - pushq %rcx # 56(%rsp) - pushq %rdx # 48(%rsp) - pushq %rsi # 40(%rsp) - pushq %rdi # 32(%rsp) - pushq %r8 # 24(%rsp) - pushq %r9 # 16(%rsp) - pushq %r10 # 8(%rsp) - pushq %r11 # 0(%rsp) - - cmpl $__KERNEL_CS,96(%rsp) + /* The vector number is currently in the pt_regs->di slot. */ + pushq %rsi /* pt_regs->si */ + movq 8(%rsp), %rsi /* RSI = vector number */ + movq %rdi, 8(%rsp) /* pt_regs->di = RDI */ + pushq %rdx /* pt_regs->dx */ + pushq %rcx /* pt_regs->cx */ + pushq %rax /* pt_regs->ax */ + pushq %r8 /* pt_regs->r8 */ + pushq %r9 /* pt_regs->r9 */ + pushq %r10 /* pt_regs->r10 */ + pushq %r11 /* pt_regs->r11 */ + pushq %rbx /* pt_regs->bx */ + pushq %rbp /* pt_regs->bp */ + pushq %r12 /* pt_regs->r12 */ + pushq %r13 /* pt_regs->r13 */ + pushq %r14 /* pt_regs->r14 */ + pushq %r15 /* pt_regs->r15 */ + + cmpl $__KERNEL_CS,CS(%rsp) jne 11f - cmpl $14,72(%rsp) # Page fault? + cmpq $14,%rsi /* Page fault? */ jnz 10f - GET_CR2_INTO(%rdi) # can clobber any volatile register if pv + GET_CR2_INTO(%rdi) /* Can clobber any volatile register if pv */ call early_make_pgtable andl %eax,%eax - jz 20f # All good + jz 20f /* All good */ 10: - leaq 88(%rsp),%rdi # Pointer to %rip + movq %rsp,%rdi /* RDI = pt_regs; RSI is already trapnr */ call early_fixup_exception andl %eax,%eax jnz 20f # Found an exception entry 11: #ifdef CONFIG_EARLY_PRINTK - GET_CR2_INTO(%r9) # can clobber any volatile register if pv - movl 80(%rsp),%r8d # error code - movl 72(%rsp),%esi # vector number - movl 96(%rsp),%edx # %cs - movq 88(%rsp),%rcx # %rip + /* + * On paravirt kernels, GET_CR2_INTO clobbers callee-clobbered regs. + * We only care about RSI, so we need to save it. + */ + movq %rsi,%rbx /* Save vector number */ + GET_CR2_INTO(%r9) + movq ORIG_RAX(%rsp),%r8 /* error code */ + movq %rbx,%rsi /* vector number */ + movq CS(%rsp),%rdx + movq RIP(%rsp),%rcx xorl %eax,%eax leaq early_idt_msg(%rip),%rdi call early_printk @@ -398,24 +412,16 @@ early_idt_handler_common: call dump_stack #ifdef CONFIG_KALLSYMS leaq early_idt_ripmsg(%rip),%rdi - movq 40(%rsp),%rsi # %rip again + movq RIP(%rsp),%rsi # %rip again call __print_symbol #endif #endif /* EARLY_PRINTK */ 1: hlt jmp 1b -20: # Exception table entry found or page table generated - popq %r11 - popq %r10 - popq %r9 - popq %r8 - popq %rdi - popq %rsi - popq %rdx - popq %rcx - popq %rax +20: /* Exception table entry found or page table generated */ decl early_recursion_flag(%rip) + jmp restore_regs_and_iret .Lis_nmi: addq $16,%rsp # drop vector number and error code INTERRUPT_RETURN diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 82447b3fba38..1366e067a796 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -83,13 +83,13 @@ int fixup_exception(struct pt_regs *regs, int trapnr) } /* Restricted version used during very early boot */ -int __init early_fixup_exception(unsigned long *ip) +int __init early_fixup_exception(struct pt_regs *regs, int trapnr) { const struct exception_table_entry *e; unsigned long new_ip; ex_handler_t handler; - e = search_exception_tables(*ip); + e = search_exception_tables(regs->ip); if (!e) return 0; @@ -100,6 +100,6 @@ int __init early_fixup_exception(unsigned long *ip) if (handler != ex_handler_default) return 0; - *ip = new_ip; + regs->ip = new_ip; return 1; } -- cgit v1.2.3 From 0d0efc07f3df677d7622bb760f8e2920b5e33f42 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 2 Apr 2016 07:01:33 -0700 Subject: x86/head: Move the early NMI fixup into C C is nicer than asm. Tested-by: Boris Ostrovsky Signed-off-by: Andy Lutomirski Acked-by: Linus Torvalds Cc: Andrew Morton Cc: Arjan van de Ven Cc: Borislav Petkov Cc: KVM list Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: xen-devel Link: http://lkml.kernel.org/r/dd068269f8d59fe44e9e43a50d0efd67da65c2b5.1459605520.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_32.S | 7 ------- arch/x86/kernel/head_64.S | 6 ------ arch/x86/mm/extable.c | 5 +++++ 3 files changed, 5 insertions(+), 13 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 0904536cd45c..184291c72c22 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -561,9 +561,6 @@ early_idt_handler_common: */ cld - cmpl $2,(%esp) # X86_TRAP_NMI - je .Lis_nmi # Ignore NMI - cmpl $2,%ss:early_recursion_flag je hlt_loop incl %ss:early_recursion_flag @@ -642,10 +639,6 @@ early_idt_handler_common: hlt_loop: hlt jmp hlt_loop - -.Lis_nmi: - addl $8,%esp /* drop vector number and error code */ - iret ENDPROC(early_idt_handler_common) /* This is the default interrupt "handler" :-) */ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 9e8636d2cedd..230843781dd4 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -351,9 +351,6 @@ early_idt_handler_common: */ cld - cmpl $2,(%rsp) # X86_TRAP_NMI - je .Lis_nmi # Ignore NMI - cmpl $2,early_recursion_flag(%rip) jz 1f incl early_recursion_flag(%rip) @@ -422,9 +419,6 @@ early_idt_handler_common: 20: /* Exception table entry found or page table generated */ decl early_recursion_flag(%rip) jmp restore_regs_and_iret -.Lis_nmi: - addq $16,%rsp # drop vector number and error code - INTERRUPT_RETURN ENDPROC(early_idt_handler_common) __INITDATA diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 1366e067a796..4be041910c2f 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -1,5 +1,6 @@ #include #include +#include typedef bool (*ex_handler_t)(const struct exception_table_entry *, struct pt_regs *, int); @@ -89,6 +90,10 @@ int __init early_fixup_exception(struct pt_regs *regs, int trapnr) unsigned long new_ip; ex_handler_t handler; + /* Ignore early NMIs. */ + if (trapnr == X86_TRAP_NMI) + return 1; + e = search_exception_tables(regs->ip); if (!e) return 0; -- cgit v1.2.3 From 0e861fbb5bda79b871341ef2a9a8059765cbe8a4 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 2 Apr 2016 07:01:34 -0700 Subject: x86/head: Move early exception panic code into early_fixup_exception() This removes a bunch of assembly and adds some C code instead. It changes the actual printouts on both 32-bit and 64-bit kernels, but they still seem okay. Tested-by: Boris Ostrovsky Signed-off-by: Andy Lutomirski Acked-by: Linus Torvalds Cc: Andrew Morton Cc: Arjan van de Ven Cc: Borislav Petkov Cc: KVM list Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: xen-devel Link: http://lkml.kernel.org/r/4085070316fc3ab29538d3fcfe282648d1d4ee2e.1459605520.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess.h | 2 +- arch/x86/kernel/head_32.S | 49 +++++------------------------------------- arch/x86/kernel/head_64.S | 45 ++------------------------------------ arch/x86/mm/extable.c | 29 ++++++++++++++++++++----- 4 files changed, 32 insertions(+), 93 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index b6fb311b7d75..d794fd1f582f 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -110,7 +110,7 @@ struct exception_table_entry { extern int fixup_exception(struct pt_regs *regs, int trapnr); extern bool ex_has_fault_handler(unsigned long ip); -extern int early_fixup_exception(struct pt_regs *regs, int trapnr); +extern void early_fixup_exception(struct pt_regs *regs, int trapnr); /* * These are the main single-value transfer routines. They automatically diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 184291c72c22..6770865fde6b 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -561,8 +561,6 @@ early_idt_handler_common: */ cld - cmpl $2,%ss:early_recursion_flag - je hlt_loop incl %ss:early_recursion_flag /* The vector number is in pt_regs->gs */ @@ -594,13 +592,8 @@ early_idt_handler_common: movw %gs, PT_GS(%esp) movw $0, PT_GS+2(%esp) - cmpl $(__KERNEL_CS),PT_CS(%esp) - jne 10f - movl %esp, %eax /* args are pt_regs (EAX), trapnr (EDX) */ call early_fixup_exception - andl %eax,%eax - jz 10f /* Exception wasn't fixed up */ popl %ebx /* pt_regs->bx */ popl %ecx /* pt_regs->cx */ @@ -616,29 +609,6 @@ early_idt_handler_common: decl %ss:early_recursion_flag addl $4, %esp /* pop pt_regs->orig_ax */ iret - -10: -#ifdef CONFIG_PRINTK - xorl %eax,%eax - movw %ax,PT_FS+2(%esp) /* clean up the segment values on some cpus */ - movw %ax,PT_DS+2(%esp) - movw %ax,PT_ES+2(%esp) - leal 40(%esp),%eax - pushl %eax /* %esp before the exception */ - pushl %ebx - pushl %ebp - pushl %esi - pushl %edi - movl %cr2,%eax - pushl %eax - pushl (20+6*4)(%esp) /* trapno */ - pushl $fault_msg - call printk -#endif - call dump_stack -hlt_loop: - hlt - jmp hlt_loop ENDPROC(early_idt_handler_common) /* This is the default interrupt "handler" :-) */ @@ -674,10 +644,14 @@ ignore_int: popl %eax #endif iret + +hlt_loop: + hlt + jmp hlt_loop ENDPROC(ignore_int) __INITDATA .align 4 -early_recursion_flag: +GLOBAL(early_recursion_flag) .long 0 __REFDATA @@ -742,19 +716,6 @@ __INITRODATA int_msg: .asciz "Unknown interrupt or fault at: %p %p %p\n" -fault_msg: -/* fault info: */ - .ascii "BUG: Int %d: CR2 %p\n" -/* regs pushed in early_idt_handler: */ - .ascii " EDI %p ESI %p EBP %p EBX %p\n" - .ascii " ESP %p ES %p DS %p\n" - .ascii " EDX %p ECX %p EAX %p\n" -/* fault frame: */ - .ascii " vec %p err %p EIP %p CS %p flg %p\n" - .ascii "Stack: %p %p %p %p %p %p %p %p\n" - .ascii " %p %p %p %p %p %p %p %p\n" - .asciz " %p %p %p %p %p %p %p %p\n" - #include "../../x86/xen/xen-head.S" /* diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 230843781dd4..3de91a7e6c99 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -351,8 +351,6 @@ early_idt_handler_common: */ cld - cmpl $2,early_recursion_flag(%rip) - jz 1f incl early_recursion_flag(%rip) /* The vector number is currently in the pt_regs->di slot. */ @@ -373,9 +371,6 @@ early_idt_handler_common: pushq %r14 /* pt_regs->r14 */ pushq %r15 /* pt_regs->r15 */ - cmpl $__KERNEL_CS,CS(%rsp) - jne 11f - cmpq $14,%rsi /* Page fault? */ jnz 10f GET_CR2_INTO(%rdi) /* Can clobber any volatile register if pv */ @@ -386,37 +381,8 @@ early_idt_handler_common: 10: movq %rsp,%rdi /* RDI = pt_regs; RSI is already trapnr */ call early_fixup_exception - andl %eax,%eax - jnz 20f # Found an exception entry - -11: -#ifdef CONFIG_EARLY_PRINTK - /* - * On paravirt kernels, GET_CR2_INTO clobbers callee-clobbered regs. - * We only care about RSI, so we need to save it. - */ - movq %rsi,%rbx /* Save vector number */ - GET_CR2_INTO(%r9) - movq ORIG_RAX(%rsp),%r8 /* error code */ - movq %rbx,%rsi /* vector number */ - movq CS(%rsp),%rdx - movq RIP(%rsp),%rcx - xorl %eax,%eax - leaq early_idt_msg(%rip),%rdi - call early_printk - cmpl $2,early_recursion_flag(%rip) - jz 1f - call dump_stack -#ifdef CONFIG_KALLSYMS - leaq early_idt_ripmsg(%rip),%rdi - movq RIP(%rsp),%rsi # %rip again - call __print_symbol -#endif -#endif /* EARLY_PRINTK */ -1: hlt - jmp 1b -20: /* Exception table entry found or page table generated */ +20: decl early_recursion_flag(%rip) jmp restore_regs_and_iret ENDPROC(early_idt_handler_common) @@ -424,16 +390,9 @@ ENDPROC(early_idt_handler_common) __INITDATA .balign 4 -early_recursion_flag: +GLOBAL(early_recursion_flag) .long 0 -#ifdef CONFIG_EARLY_PRINTK -early_idt_msg: - .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n" -early_idt_ripmsg: - .asciz "RIP %s\n" -#endif /* CONFIG_EARLY_PRINTK */ - #define NEXT_PAGE(name) \ .balign PAGE_SIZE; \ GLOBAL(name) diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 4be041910c2f..da442f37ca8b 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -83,8 +83,10 @@ int fixup_exception(struct pt_regs *regs, int trapnr) return handler(e, regs, trapnr); } +extern unsigned int early_recursion_flag; + /* Restricted version used during very early boot */ -int __init early_fixup_exception(struct pt_regs *regs, int trapnr) +void __init early_fixup_exception(struct pt_regs *regs, int trapnr) { const struct exception_table_entry *e; unsigned long new_ip; @@ -92,19 +94,36 @@ int __init early_fixup_exception(struct pt_regs *regs, int trapnr) /* Ignore early NMIs. */ if (trapnr == X86_TRAP_NMI) - return 1; + return; + + if (early_recursion_flag > 2) + goto halt_loop; + + if (regs->cs != __KERNEL_CS) + goto fail; e = search_exception_tables(regs->ip); if (!e) - return 0; + goto fail; new_ip = ex_fixup_addr(e); handler = ex_fixup_handler(e); /* special handling not supported during early boot */ if (handler != ex_handler_default) - return 0; + goto fail; regs->ip = new_ip; - return 1; + return; + +fail: + early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n", + (unsigned)trapnr, (unsigned long)regs->cs, regs->ip, + regs->orig_ax, read_cr2()); + + show_regs(regs); + +halt_loop: + while (true) + halt(); } -- cgit v1.2.3 From ae7ef45e12354a1e2f6013b46df0c9f5bbb6ffbe Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 2 Apr 2016 07:01:35 -0700 Subject: x86/traps: Enable all exception handler callbacks early Now that early_fixup_exception() has pt_regs, we can just call fixup_exception() from it. This will make fancy exception handlers work early. Tested-by: Boris Ostrovsky Signed-off-by: Andy Lutomirski Acked-by: Linus Torvalds Cc: Andrew Morton Cc: Arjan van de Ven Cc: Borislav Petkov Cc: KVM list Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: xen-devel Link: http://lkml.kernel.org/r/20fc047d926150cb08cb9b9f2923519b07ec1a15.1459605520.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/extable.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index da442f37ca8b..061a23758354 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -88,10 +88,6 @@ extern unsigned int early_recursion_flag; /* Restricted version used during very early boot */ void __init early_fixup_exception(struct pt_regs *regs, int trapnr) { - const struct exception_table_entry *e; - unsigned long new_ip; - ex_handler_t handler; - /* Ignore early NMIs. */ if (trapnr == X86_TRAP_NMI) return; @@ -102,19 +98,8 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr) if (regs->cs != __KERNEL_CS) goto fail; - e = search_exception_tables(regs->ip); - if (!e) - goto fail; - - new_ip = ex_fixup_addr(e); - handler = ex_fixup_handler(e); - - /* special handling not supported during early boot */ - if (handler != ex_handler_default) - goto fail; - - regs->ip = new_ip; - return; + if (fixup_exception(regs, trapnr)) + return; fail: early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n", -- cgit v1.2.3 From fbd704374d111bed16a19261176fa30e2379c87c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 2 Apr 2016 07:01:37 -0700 Subject: x86/msr: Carry on after a non-"safe" MSR access fails This demotes an OOPS and likely panic due to a failed non-"safe" MSR access to a WARN_ONCE() and, for RDMSR, a return value of zero. To be clear, this type of failure should *not* happen. This patch exists to minimize the chance of nasty undebuggable failures happening when a CONFIG_PARAVIRT=y bug in the non-"safe" MSR helpers gets fixed. Tested-by: Boris Ostrovsky Signed-off-by: Andy Lutomirski Acked-by: Linus Torvalds Cc: Andrew Morton Cc: Arjan van de Ven Cc: Borislav Petkov Cc: KVM list Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: xen-devel Link: http://lkml.kernel.org/r/26567b216aae70e795938f4b567eace5a0eb90ba.1459605520.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 10 ++++++++-- arch/x86/mm/extable.c | 27 +++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 7a79ee2778b3..25f169c6eb95 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -84,7 +84,10 @@ static inline unsigned long long native_read_msr(unsigned int msr) { DECLARE_ARGS(val, low, high); - asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr)); + asm volatile("1: rdmsr\n" + "2:\n" + _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe) + : EAX_EDX_RET(val, low, high) : "c" (msr)); if (msr_tracepoint_active(__tracepoint_read_msr)) do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), 0); return EAX_EDX_VAL(val, low, high); @@ -111,7 +114,10 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr, static inline void native_write_msr(unsigned int msr, unsigned low, unsigned high) { - asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory"); + asm volatile("1: wrmsr\n" + "2:\n" + _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe) + : : "c" (msr), "a"(low), "d" (high) : "memory"); if (msr_tracepoint_active(__tracepoint_read_msr)) do_trace_write_msr(msr, ((u64)high << 32 | low), 0); } diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 061a23758354..fd9eb98c4f58 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -43,6 +43,33 @@ bool ex_handler_ext(const struct exception_table_entry *fixup, } EXPORT_SYMBOL(ex_handler_ext); +bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) +{ + WARN_ONCE(1, "unchecked MSR access error: RDMSR from 0x%x\n", + (unsigned int)regs->cx); + + /* Pretend that the read succeeded and returned 0. */ + regs->ip = ex_fixup_addr(fixup); + regs->ax = 0; + regs->dx = 0; + return true; +} +EXPORT_SYMBOL(ex_handler_rdmsr_unsafe); + +bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) +{ + WARN_ONCE(1, "unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x)\n", + (unsigned int)regs->cx, + (unsigned int)regs->dx, (unsigned int)regs->ax); + + /* Pretend that the write succeeded. */ + regs->ip = ex_fixup_addr(fixup); + return true; +} +EXPORT_SYMBOL(ex_handler_wrmsr_unsafe); + bool ex_has_fault_handler(unsigned long ip) { const struct exception_table_entry *e; -- cgit v1.2.3 From 60a0e2039e3df6c0a2b896bd78af36ff36fb629c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Apr 2016 08:46:22 -0700 Subject: x86/extable: Add a comment about early exception handlers Borislav asked for a comment explaining why all exception handlers are allowed early. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Acked-by: Linus Torvalds Cc: Andrew Morton Cc: Arjan van de Ven Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: KVM list Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: xen-devel Link: http://lkml.kernel.org/r/5f1dcd6919f4a5923959a8065cb2c04d9dac1412.1459784772.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/extable.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index fd9eb98c4f58..aaeda3ffaafe 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -125,6 +125,20 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr) if (regs->cs != __KERNEL_CS) goto fail; + /* + * The full exception fixup machinery is available as soon as + * the early IDT is loaded. This means that it is the + * responsibility of extable users to either function correctly + * when handlers are invoked early or to simply avoid causing + * exceptions before they're ready to handle them. + * + * This is better than filtering which handlers can be used, + * because refusing to call a handler here is guaranteed to + * result in a hard-to-debug panic. + * + * Keep in mind that not all vectors actually get here. Early + * fage faults, for example, are special. + */ if (fixup_exception(regs, trapnr)) return; -- cgit v1.2.3 From 45e876f794e8e566bf827c25ef0791875081724f Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Apr 2016 12:23:26 -0700 Subject: x86/segments/64: When loadsegment(fs, ...) fails, clear the base On AMD CPUs, a failed loadsegment currently may not clear the FS base. Fix it. While we're at it, prevent loadsegment(gs, xyz) from even compiling on 64-bit kernels. It shouldn't be used. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/a084c1b93b7b1408b58d3fd0b5d6e47da8e7d7cf.1461698311.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/segment.h | 42 +++++++++++++++++++++++++++++++++++++++--- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/mm/extable.c | 10 ++++++++++ 3 files changed, 50 insertions(+), 4 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 7d5a1929d76b..e1a4afd20223 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -2,6 +2,7 @@ #define _ASM_X86_SEGMENT_H #include +#include /* * Constructor for a conventional segment GDT (or LDT) entry. @@ -249,10 +250,13 @@ extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDL #endif /* - * Load a segment. Fall back on loading the zero - * segment if something goes wrong.. + * Load a segment. Fall back on loading the zero segment if something goes + * wrong. This variant assumes that loading zero fully clears the segment. + * This is always the case on Intel CPUs and, even on 64-bit AMD CPUs, any + * failure to fully clear the cached descriptor is only observable for + * FS and GS. */ -#define loadsegment(seg, value) \ +#define __loadsegment_simple(seg, value) \ do { \ unsigned short __val = (value); \ \ @@ -269,6 +273,38 @@ do { \ : "+r" (__val) : : "memory"); \ } while (0) +#define __loadsegment_ss(value) __loadsegment_simple(ss, (value)) +#define __loadsegment_ds(value) __loadsegment_simple(ds, (value)) +#define __loadsegment_es(value) __loadsegment_simple(es, (value)) + +#ifdef CONFIG_X86_32 + +/* + * On 32-bit systems, the hidden parts of FS and GS are unobservable if + * the selector is NULL, so there's no funny business here. + */ +#define __loadsegment_fs(value) __loadsegment_simple(fs, (value)) +#define __loadsegment_gs(value) __loadsegment_simple(gs, (value)) + +#else + +static inline void __loadsegment_fs(unsigned short value) +{ + asm volatile(" \n" + "1: movw %0, %%fs \n" + "2: \n" + + _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_clear_fs) + + : : "rm" (value) : "memory"); +} + +/* __loadsegment_gs is intentionally undefined. Use load_gs_index instead. */ + +#endif + +#define loadsegment(seg, value) __loadsegment_ ## seg (value) + /* * Save a segment register away: */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6bfa36de6d9f..088106140c4b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -430,7 +430,7 @@ void load_percpu_segment(int cpu) #ifdef CONFIG_X86_32 loadsegment(fs, __KERNEL_PERCPU); #else - loadsegment(gs, 0); + __loadsegment_simple(gs, 0); wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); #endif load_stack_canary_segment(); diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index aaeda3ffaafe..4bb53b89f3c5 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -70,6 +70,16 @@ bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup, } EXPORT_SYMBOL(ex_handler_wrmsr_unsafe); +bool ex_handler_clear_fs(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) +{ + if (static_cpu_has(X86_BUG_NULL_SEG)) + asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS)); + asm volatile ("mov %0, %%fs" : : "rm" (0)); + return ex_handler_default(fixup, regs, trapnr); +} +EXPORT_SYMBOL(ex_handler_clear_fs); + bool ex_has_fault_handler(unsigned long ip) { const struct exception_table_entry *e; -- cgit v1.2.3