From 1db491f77b6ed0f32f1d4a3ac40a5be9524f1914 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Thu, 15 Jan 2015 20:30:01 -0800
Subject: x86/mm: Reduce PAE-mode per task pgd allocation overhead from 4K to
 32 bytes

With more embedded systems emerging using Quark, among other
things, 32-bit kernel matters again. 32-bit machine and kernel
uses PAE paging, which currently wastes at least 4K of memory
per process on Linux where we have to reserve an entire page to
support a single 32-byte PGD structure. It would be a very good
thing if we could eliminate that wastage.

PAE paging is used to access more than 4GB memory on x86-32. And
it is required for NX.

In this patch, we still allocate one page for pgd for a Xen
domain and 64-bit kernel because one page pgd is assumed in
these cases. But we can save memory space by only allocating
32-byte pgd for 32-bit PAE kernel when it is not running as a
Xen domain.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Christoph Lameter <cl@linux.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Glenn Williamson <glenn.p.williamson@intel.com>
Cc: H. Peter Anvin <hpa@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1421382601-46912-1-git-send-email-fenghua.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/pgtable.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 78 insertions(+), 3 deletions(-)

(limited to 'arch/x86/mm')

diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 6fb6927f9e76..d223e1fe1dd2 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -271,12 +271,87 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
 	}
 }
 
+/*
+ * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also
+ * assumes that pgd should be in one page.
+ *
+ * But kernel with PAE paging that is not running as a Xen domain
+ * only needs to allocate 32 bytes for pgd instead of one page.
+ */
+#ifdef CONFIG_X86_PAE
+
+#include <linux/slab.h>
+
+#define PGD_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
+#define PGD_ALIGN	32
+
+static struct kmem_cache *pgd_cache;
+
+static int __init pgd_cache_init(void)
+{
+	/*
+	 * When PAE kernel is running as a Xen domain, it does not use
+	 * shared kernel pmd. And this requires a whole page for pgd.
+	 */
+	if (!SHARED_KERNEL_PMD)
+		return 0;
+
+	/*
+	 * when PAE kernel is not running as a Xen domain, it uses
+	 * shared kernel pmd. Shared kernel pmd does not require a whole
+	 * page for pgd. We are able to just allocate a 32-byte for pgd.
+	 * During boot time, we create a 32-byte slab for pgd table allocation.
+	 */
+	pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
+				      SLAB_PANIC, NULL);
+	if (!pgd_cache)
+		return -ENOMEM;
+
+	return 0;
+}
+core_initcall(pgd_cache_init);
+
+static inline pgd_t *_pgd_alloc(void)
+{
+	/*
+	 * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain.
+	 * We allocate one page for pgd.
+	 */
+	if (!SHARED_KERNEL_PMD)
+		return (pgd_t *)__get_free_page(PGALLOC_GFP);
+
+	/*
+	 * Now PAE kernel is not running as a Xen domain. We can allocate
+	 * a 32-byte slab for pgd to save memory space.
+	 */
+	return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
+}
+
+static inline void _pgd_free(pgd_t *pgd)
+{
+	if (!SHARED_KERNEL_PMD)
+		free_page((unsigned long)pgd);
+	else
+		kmem_cache_free(pgd_cache, pgd);
+}
+#else
+static inline pgd_t *_pgd_alloc(void)
+{
+	return (pgd_t *)__get_free_page(PGALLOC_GFP);
+}
+
+static inline void _pgd_free(pgd_t *pgd)
+{
+	free_page((unsigned long)pgd);
+}
+#endif /* CONFIG_X86_PAE */
+
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	pgd_t *pgd;
 	pmd_t *pmds[PREALLOCATED_PMDS];
 
-	pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
+	pgd = _pgd_alloc();
 
 	if (pgd == NULL)
 		goto out;
@@ -306,7 +381,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 out_free_pmds:
 	free_pmds(pmds);
 out_free_pgd:
-	free_page((unsigned long)pgd);
+	_pgd_free(pgd);
 out:
 	return NULL;
 }
@@ -316,7 +391,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	pgd_mop_up_pmds(mm, pgd);
 	pgd_dtor(pgd);
 	paravirt_pgd_free(mm, pgd);
-	free_page((unsigned long)pgd);
+	_pgd_free(pgd);
 }
 
 /*
-- 
cgit v1.2.3


From 1f40a8bfa91adfa8d1ac5013c08c76f6fd6691ad Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Sun, 28 Dec 2014 17:15:24 +0100
Subject: x86/mm/pat: Ensure different messages in STRICT_DEVMEM and PAT cases

STRICT_DEVMEM and PAT produce same failure accessing /dev/mem,
which is quite confusing to the user. Make printk messages
different to lessen confusion.

Signed-off-by: Pavel Machek <pavel@ucw.cz>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/pat.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86/mm')

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 7ac68698406c..35af6771a95a 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -610,7 +610,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 }
 
 #ifdef CONFIG_STRICT_DEVMEM
-/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/
+/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM */
 static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 {
 	return 1;
@@ -628,8 +628,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 
 	while (cursor < to) {
 		if (!devmem_is_allowed(pfn)) {
-			printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx]\n",
-				current->comm, from, to - 1);
+			printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
+			       current->comm, from, to - 1);
 			return 0;
 		}
 		cursor += PAGE_SIZE;
-- 
cgit v1.2.3


From 6bbb614ec478961c7443086bdf7fd6784479c14a Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 27 Feb 2015 15:55:40 +0100
Subject: x86/mm: Unexport set_memory_ro() and set_memory_rw()

This effectively unexports set_memory_ro() and set_memory_rw()
functions, and thus reverts:

  a03352d2c1dc ("x86: export set_memory_ro and set_memory_rw").

They have been introduced for debugging purposes in e1000e, but
no module user is in mainline kernel (anymore?) and we
explicitly do not want modules to use these functions, as they
i.e. protect eBPF (interpreted & JIT'ed) images from malicious
modifications or bugs.

Outside of eBPF scope, I believe also other set_memory_*()
functions should be unexported on x86 for modules.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Bruce Allan <bruce.w.allan@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jesse Brandeburg <jesse.brandeburg@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: davem@davemloft.net
Link: http://lkml.kernel.org/r/a064393a0a5d319eebde5c761cfd743132d4f213.1425040940.git.daniel@iogearbox.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/pageattr.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/x86/mm')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 536ea2fb6e33..81e8282d8c2f 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1654,13 +1654,11 @@ int set_memory_ro(unsigned long addr, int numpages)
 {
 	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
 }
-EXPORT_SYMBOL_GPL(set_memory_ro);
 
 int set_memory_rw(unsigned long addr, int numpages)
 {
 	return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
 }
-EXPORT_SYMBOL_GPL(set_memory_rw);
 
 int set_memory_np(unsigned long addr, int numpages)
 {
-- 
cgit v1.2.3


From d9fd579c218e22c897f0f1b9e132af9b436cf445 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@suse.com>
Date: Wed, 4 Mar 2015 17:24:11 -0800
Subject: x86/mm: Use IS_ENABLED() for direct_gbpages

Replace #ifdef eyesore with IS_ENABLED() use.

Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: JBeulich@suse.com
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: julia.lawall@lip6.fr
Link: http://lkml.kernel.org/r/1425518654-3403-2-git-send-email-mcgrof@do-not-panic.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/init.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'arch/x86/mm')

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index a110efca6d06..74f2b37fd073 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -131,11 +131,7 @@ void  __init early_alloc_pgt_buf(void)
 
 int after_bootmem;
 
-int direct_gbpages
-#ifdef CONFIG_DIRECT_GBPAGES
-				= 1
-#endif
-;
+int direct_gbpages = IS_ENABLED(CONFIG_DIRECT_GBPAGES);
 
 static void __init init_gbpages(void)
 {
-- 
cgit v1.2.3


From e5008abe929c160d36e44b8c2b644d4330d2e389 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@suse.com>
Date: Wed, 4 Mar 2015 17:24:12 -0800
Subject: x86/mm: Simplify enabling direct_gbpages

direct_gbpages can be force enabled as an early parameter
but not really have taken effect when DEBUG_PAGEALLOC
or KMEMCHECK is enabled. You can also enable direct_gbpages
right now if you have an x86_64 architecture but your CPU
doesn't really have support for this feature. In both cases
PG_LEVEL_1G won't actually be enabled but direct_gbpages is used
in other areas under the assumptions that PG_LEVEL_1G
was set. Fix this by putting together all requirements
which make this feature sensible to enable under, and only
enable both finally flipping on PG_LEVEL_1G and leaving
PG_LEVEL_1G set when this is true.

We only enable this feature then to be possible on sensible
builds defined by the new ENABLE_DIRECT_GBPAGES. If the
CPU has support for it you can either enable this by using
the DIRECT_GBPAGES option or using the early kernel parameter.
If a platform had support for this you can always force disable
it as well.

Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: JBeulich@suse.com
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: julia.lawall@lip6.fr
Link: http://lkml.kernel.org/r/1425518654-3403-3-git-send-email-mcgrof@do-not-panic.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig       | 18 +++++++++++++-----
 arch/x86/mm/init.c     | 17 +++++++++--------
 arch/x86/mm/pageattr.c |  2 --
 3 files changed, 22 insertions(+), 15 deletions(-)

(limited to 'arch/x86/mm')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c2fb8a87dccb..4d06e1c8294a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1299,14 +1299,22 @@ config ARCH_DMA_ADDR_T_64BIT
 	def_bool y
 	depends on X86_64 || HIGHMEM64G
 
+config ENABLE_DIRECT_GBPAGES
+	def_bool y
+	depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK
+
 config DIRECT_GBPAGES
 	bool "Enable 1GB pages for kernel pagetables" if EXPERT
 	default y
-	depends on X86_64
-	---help---
-	  Allow the kernel linear mapping to use 1GB pages on CPUs that
-	  support it. This can improve the kernel's performance a tiny bit by
-	  reducing TLB pressure. If in doubt, say "Y".
+	depends on ENABLE_DIRECT_GBPAGES
+	---help---
+	  Enable by default the kernel linear mapping to use 1GB pages on CPUs
+	  that support it. This can improve the kernel's performance a tiny bit
+	  by reducing TLB pressure. If in doubt, say "Y". If you've disabled
+	  option but your platform is capable of handling support for this
+	  you can use the gbpages kernel parameter. Likewise if you've enabled
+	  this but you'd like to force disable this option you can use the
+	  nogbpages kernel parameter.
 
 # Common NUMA Features
 config NUMA
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 74f2b37fd073..2ce2c8e8c99c 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -131,16 +131,21 @@ void  __init early_alloc_pgt_buf(void)
 
 int after_bootmem;
 
+static int page_size_mask;
+
 int direct_gbpages = IS_ENABLED(CONFIG_DIRECT_GBPAGES);
 
 static void __init init_gbpages(void)
 {
-#ifdef CONFIG_X86_64
-	if (direct_gbpages && cpu_has_gbpages)
+	if (!IS_ENABLED(CONFIG_ENABLE_DIRECT_GBPAGES)) {
+		direct_gbpages = 0;
+		return;
+	}
+	if (direct_gbpages && cpu_has_gbpages) {
 		printk(KERN_INFO "Using GB pages for direct mapping\n");
-	else
+		page_size_mask |= 1 << PG_LEVEL_1G;
+	} else
 		direct_gbpages = 0;
-#endif
 }
 
 struct map_range {
@@ -149,8 +154,6 @@ struct map_range {
 	unsigned page_size_mask;
 };
 
-static int page_size_mask;
-
 static void __init probe_page_size_mask(void)
 {
 	init_gbpages();
@@ -161,8 +164,6 @@ static void __init probe_page_size_mask(void)
 	 * This will simplify cpa(), which otherwise needs to support splitting
 	 * large pages into small in interrupt context, etc.
 	 */
-	if (direct_gbpages)
-		page_size_mask |= 1 << PG_LEVEL_1G;
 	if (cpu_has_pse)
 		page_size_mask |= 1 << PG_LEVEL_2M;
 #endif
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 81e8282d8c2f..89af288ec674 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -81,11 +81,9 @@ void arch_report_meminfo(struct seq_file *m)
 	seq_printf(m, "DirectMap4M:    %8lu kB\n",
 			direct_pages_count[PG_LEVEL_2M] << 12);
 #endif
-#ifdef CONFIG_X86_64
 	if (direct_gbpages)
 		seq_printf(m, "DirectMap1G:    %8lu kB\n",
 			direct_pages_count[PG_LEVEL_1G] << 20);
-#endif
 }
 #else
 static inline void split_page_count(int level) { }
-- 
cgit v1.2.3


From 73c8c861dc5bddf1b24c6aeffee2292c96cf8db2 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@suse.com>
Date: Wed, 4 Mar 2015 17:24:14 -0800
Subject: x86/mm: Use early_param_on_off() for direct_gbpages

The enabler / disabler is pretty simple, just use the
provided wrappers, this lets us easily relate the variable
to the associated Kconfig entry.

Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: JBeulich@suse.com
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: julia.lawall@lip6.fr
Link: http://lkml.kernel.org/r/1425518654-3403-5-git-send-email-mcgrof@do-not-panic.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/init.c    |  3 ++-
 arch/x86/mm/init_64.c | 14 --------------
 2 files changed, 2 insertions(+), 15 deletions(-)

(limited to 'arch/x86/mm')

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 2ce2c8e8c99c..c35ba8bce7cb 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -133,7 +133,8 @@ int after_bootmem;
 
 static int page_size_mask;
 
-int direct_gbpages = IS_ENABLED(CONFIG_DIRECT_GBPAGES);
+early_param_on_off("gbpages", "nogbpages",
+		   direct_gbpages, CONFIG_DIRECT_GBPAGES);
 
 static void __init init_gbpages(void)
 {
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 30eb05ae7061..3fba623e3ba5 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -130,20 +130,6 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
 	return 0;
 }
 
-static int __init parse_direct_gbpages_off(char *arg)
-{
-	direct_gbpages = 0;
-	return 0;
-}
-early_param("nogbpages", parse_direct_gbpages_off);
-
-static int __init parse_direct_gbpages_on(char *arg)
-{
-	direct_gbpages = 1;
-	return 0;
-}
-early_param("gbpages", parse_direct_gbpages_on);
-
 /*
  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
  * physical space so we can cache the place of the first one and move
-- 
cgit v1.2.3


From 10971ab269bbf22120edac95fcfa3c873a549bea Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 5 Mar 2015 08:18:23 +0100
Subject: x86/mm: Further simplify 1 GB kernel linear mappings handling

It's a bit pointless to allow Kconfig configuration for 1GB kernel
mappings, it's already hidden behind a 'default y' and CONFIG_EXPERT.

Remove this complication and simplify the code by renaming
CONFIG_ENABLE_DIRECT_GBPAGES to CONFIG_X86_DIRECT_GBPAGES and
document the DEBUG_PAGE_ALLOC and KMEMCHECK quirks.

Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: JBeulich@suse.com
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: julia.lawall@lip6.fr
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig   | 20 ++++++--------------
 arch/x86/mm/init.c |  7 +------
 2 files changed, 7 insertions(+), 20 deletions(-)

(limited to 'arch/x86/mm')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4d06e1c8294a..d03847513b6d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1299,22 +1299,14 @@ config ARCH_DMA_ADDR_T_64BIT
 	def_bool y
 	depends on X86_64 || HIGHMEM64G
 
-config ENABLE_DIRECT_GBPAGES
+config X86_DIRECT_GBPAGES
 	def_bool y
 	depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK
-
-config DIRECT_GBPAGES
-	bool "Enable 1GB pages for kernel pagetables" if EXPERT
-	default y
-	depends on ENABLE_DIRECT_GBPAGES
-	---help---
-	  Enable by default the kernel linear mapping to use 1GB pages on CPUs
-	  that support it. This can improve the kernel's performance a tiny bit
-	  by reducing TLB pressure. If in doubt, say "Y". If you've disabled
-	  option but your platform is capable of handling support for this
-	  you can use the gbpages kernel parameter. Likewise if you've enabled
-	  this but you'd like to force disable this option you can use the
-	  nogbpages kernel parameter.
+	---help---
+	  Certain kernel features effectively disable kernel
+	  linear 1 GB mappings (even if the CPU otherwise
+	  supports them), so don't confuse the user by printing
+	  that we have them enabled.
 
 # Common NUMA Features
 config NUMA
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index c35ba8bce7cb..8704153f2675 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -133,15 +133,10 @@ int after_bootmem;
 
 static int page_size_mask;
 
-early_param_on_off("gbpages", "nogbpages",
-		   direct_gbpages, CONFIG_DIRECT_GBPAGES);
+early_param_on_off("gbpages", "nogbpages", direct_gbpages, CONFIG_X86_DIRECT_GBPAGES);
 
 static void __init init_gbpages(void)
 {
-	if (!IS_ENABLED(CONFIG_ENABLE_DIRECT_GBPAGES)) {
-		direct_gbpages = 0;
-		return;
-	}
 	if (direct_gbpages && cpu_has_gbpages) {
 		printk(KERN_INFO "Using GB pages for direct mapping\n");
 		page_size_mask |= 1 << PG_LEVEL_1G;
-- 
cgit v1.2.3


From e61980a70245715ab39cbee2b9d6e6afc1ec37d4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 5 Mar 2015 08:25:01 +0100
Subject: x86/mm: Simplify probe_page_size_mask()

Now that we've simplified the gbpages config space, move the
'page_size_mask' initialization into probe_page_size_mask(),
right next to the PSE and PGE enablement lines.

Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: JBeulich@suse.com
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: julia.lawall@lip6.fr
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/init.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

(limited to 'arch/x86/mm')

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 8704153f2675..6dc85d51cd98 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -131,29 +131,18 @@ void  __init early_alloc_pgt_buf(void)
 
 int after_bootmem;
 
-static int page_size_mask;
-
 early_param_on_off("gbpages", "nogbpages", direct_gbpages, CONFIG_X86_DIRECT_GBPAGES);
 
-static void __init init_gbpages(void)
-{
-	if (direct_gbpages && cpu_has_gbpages) {
-		printk(KERN_INFO "Using GB pages for direct mapping\n");
-		page_size_mask |= 1 << PG_LEVEL_1G;
-	} else
-		direct_gbpages = 0;
-}
-
 struct map_range {
 	unsigned long start;
 	unsigned long end;
 	unsigned page_size_mask;
 };
 
+static int page_size_mask;
+
 static void __init probe_page_size_mask(void)
 {
-	init_gbpages();
-
 #if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK)
 	/*
 	 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
@@ -173,6 +162,14 @@ static void __init probe_page_size_mask(void)
 		cr4_set_bits_and_update_boot(X86_CR4_PGE);
 		__supported_pte_mask |= _PAGE_GLOBAL;
 	}
+
+	/* Enable 1 GB linear kernel mappings if available: */
+	if (direct_gbpages && cpu_has_gbpages) {
+		printk(KERN_INFO "Using GB pages for direct mapping\n");
+		page_size_mask |= 1 << PG_LEVEL_1G;
+	} else {
+		direct_gbpages = 0;
+	}
 }
 
 #ifdef CONFIG_X86_32
-- 
cgit v1.2.3


From c709feda56886c38af3116254f84cbe6a78b3a5d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 5 Mar 2015 08:58:44 +0100
Subject: x86/mm/pat: Initialize __cachemode2pte_tbl[] and
 __pte2cachemode_tbl[] in a bit more readable fashion

The initialization of these two arrays is a bit difficult to follow:
restructure it optically so that a 2D structure shows which bit in
the PTE is set and which not.

Also improve on comments a bit.

No code or data changed:

  # arch/x86/mm/init.o:

   text    data     bss     dec     hex filename
   4585     424   29776   34785    87e1 init.o.before
   4585     424   29776   34785    87e1 init.o.after

md5:
   a82e11ff58bcfd0af3a94662a701f65d  init.o.before.asm
   a82e11ff58bcfd0af3a94662a701f65d  init.o.after.asm

Reviewed-by: Juergen Gross <jgross@suse.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/r/20150305082135.GB5969@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/init.c | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

(limited to 'arch/x86/mm')

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 6dc85d51cd98..4469563f8c3b 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -29,29 +29,33 @@
 
 /*
  * Tables translating between page_cache_type_t and pte encoding.
- * Minimal supported modes are defined statically, modified if more supported
- * cache modes are available.
- * Index into __cachemode2pte_tbl is the cachemode.
- * Index into __pte2cachemode_tbl are the caching attribute bits of the pte
- * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2.
+ *
+ * Minimal supported modes are defined statically, they are modified
+ * during bootup if more supported cache modes are available.
+ *
+ *   Index into __cachemode2pte_tbl[] is the cachemode.
+ *
+ *   Index into __pte2cachemode_tbl[] are the caching attribute bits of the pte
+ *   (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2.
  */
 uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = {
-	[_PAGE_CACHE_MODE_WB]		= 0,
-	[_PAGE_CACHE_MODE_WC]		= _PAGE_PWT,
-	[_PAGE_CACHE_MODE_UC_MINUS]	= _PAGE_PCD,
-	[_PAGE_CACHE_MODE_UC]		= _PAGE_PCD | _PAGE_PWT,
-	[_PAGE_CACHE_MODE_WT]		= _PAGE_PCD,
-	[_PAGE_CACHE_MODE_WP]		= _PAGE_PCD,
+	[_PAGE_CACHE_MODE_WB      ]	= 0         | 0        ,
+	[_PAGE_CACHE_MODE_WC      ]	= _PAGE_PWT | 0        ,
+	[_PAGE_CACHE_MODE_UC_MINUS]	= 0         | _PAGE_PCD,
+	[_PAGE_CACHE_MODE_UC      ]	= _PAGE_PWT | _PAGE_PCD,
+	[_PAGE_CACHE_MODE_WT      ]	= 0         | _PAGE_PCD,
+	[_PAGE_CACHE_MODE_WP      ]	= 0         | _PAGE_PCD,
 };
 EXPORT_SYMBOL(__cachemode2pte_tbl);
+
 uint8_t __pte2cachemode_tbl[8] = {
-	[__pte2cm_idx(0)] = _PAGE_CACHE_MODE_WB,
-	[__pte2cm_idx(_PAGE_PWT)] = _PAGE_CACHE_MODE_WC,
-	[__pte2cm_idx(_PAGE_PCD)] = _PAGE_CACHE_MODE_UC_MINUS,
-	[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD)] = _PAGE_CACHE_MODE_UC,
-	[__pte2cm_idx(_PAGE_PAT)] = _PAGE_CACHE_MODE_WB,
-	[__pte2cm_idx(_PAGE_PWT | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC,
-	[__pte2cm_idx(_PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
+	[__pte2cm_idx( 0        | 0         | 0        )] = _PAGE_CACHE_MODE_WB,
+	[__pte2cm_idx(_PAGE_PWT | 0         | 0        )] = _PAGE_CACHE_MODE_WC,
+	[__pte2cm_idx( 0        | _PAGE_PCD | 0        )] = _PAGE_CACHE_MODE_UC_MINUS,
+	[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0        )] = _PAGE_CACHE_MODE_UC,
+	[__pte2cm_idx( 0        | 0         | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB,
+	[__pte2cm_idx(_PAGE_PWT | 0         | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC,
+	[__pte2cm_idx(0         | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
 	[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC,
 };
 EXPORT_SYMBOL(__pte2cachemode_tbl);
-- 
cgit v1.2.3