summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2019-11-26 10:31:02 +0100
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2019-11-26 10:31:02 +0100
commit782b59711e1561ee0da06bc478ca5e8249aa8d09 (patch)
treea03ba1d7b9212480e707045619ae493d3be7a084
parent995e2ef08280dab8de6c517acd836613678fe2a3 (diff)
parent0f1839d0888700389e3062b4787046d61780d6d9 (diff)
downloadlwn-782b59711e1561ee0da06bc478ca5e8249aa8d09.tar.gz
lwn-782b59711e1561ee0da06bc478ca5e8249aa8d09.zip
Merge branch 'acpi-mm'
* acpi-mm: ACPI: HMAT: use %u instead of %d to print u32 values ACPI: NUMA: HMAT: fix a section mismatch ACPI: HMAT: don't mix pxm and nid when setting memory target processor_pxm ACPI: NUMA: HMAT: Register "soft reserved" memory as an "hmem" device ACPI: NUMA: HMAT: Register HMAT at device_initcall level device-dax: Add a driver for "hmem" devices dax: Fix alloc_dax_region() compile warning lib: Uplevel the pmem "region" ida to a global allocator x86/efi: Add efi_fake_mem support for EFI_MEMORY_SP arm/efi: EFI soft reservation to memblock x86/efi: EFI soft reservation to E820 enumeration efi: Common enable/disable infrastructure for EFI soft reservation x86/efi: Push EFI_MEMMAP check into leaf routines efi: Enumerate EFI_MEMORY_SP ACPI: NUMA: Establish a new drivers/acpi/numa/ directory
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt19
-rw-r--r--arch/arm64/mm/mmu.c2
-rw-r--r--arch/x86/boot/compressed/eboot.c6
-rw-r--r--arch/x86/boot/compressed/kaslr.c46
-rw-r--r--arch/x86/include/asm/e820/types.h8
-rw-r--r--arch/x86/include/asm/efi.h17
-rw-r--r--arch/x86/kernel/e820.c12
-rw-r--r--arch/x86/kernel/setup.c18
-rw-r--r--arch/x86/platform/efi/efi.c54
-rw-r--r--arch/x86/platform/efi/quirks.c3
-rw-r--r--drivers/acpi/Kconfig9
-rw-r--r--drivers/acpi/Makefile3
-rw-r--r--drivers/acpi/hmat/Makefile2
-rw-r--r--drivers/acpi/numa/Kconfig (renamed from drivers/acpi/hmat/Kconfig)7
-rw-r--r--drivers/acpi/numa/Makefile3
-rw-r--r--drivers/acpi/numa/hmat.c (renamed from drivers/acpi/hmat/hmat.c)158
-rw-r--r--drivers/acpi/numa/srat.c (renamed from drivers/acpi/numa.c)0
-rw-r--r--drivers/dax/Kconfig27
-rw-r--r--drivers/dax/Makefile2
-rw-r--r--drivers/dax/bus.c2
-rw-r--r--drivers/dax/bus.h2
-rw-r--r--drivers/dax/dax-private.h2
-rw-r--r--drivers/dax/hmem.c56
-rw-r--r--drivers/firmware/efi/Kconfig21
-rw-r--r--drivers/firmware/efi/Makefile5
-rw-r--r--drivers/firmware/efi/arm-init.c9
-rw-r--r--drivers/firmware/efi/arm-runtime.c24
-rw-r--r--drivers/firmware/efi/efi.c13
-rw-r--r--drivers/firmware/efi/esrt.c3
-rw-r--r--drivers/firmware/efi/fake_mem.c26
-rw-r--r--drivers/firmware/efi/fake_mem.h10
-rw-r--r--drivers/firmware/efi/libstub/arm32-stub.c5
-rw-r--r--drivers/firmware/efi/libstub/efi-stub-helper.c19
-rw-r--r--drivers/firmware/efi/libstub/random.c4
-rw-r--r--drivers/firmware/efi/x86_fake_mem.c69
-rw-r--r--drivers/nvdimm/Kconfig1
-rw-r--r--drivers/nvdimm/core.c1
-rw-r--r--drivers/nvdimm/nd-core.h1
-rw-r--r--drivers/nvdimm/region_devs.c13
-rw-r--r--include/linux/efi.h16
-rw-r--r--include/linux/ioport.h1
-rw-r--r--include/linux/memregion.h23
-rw-r--r--lib/Kconfig3
-rw-r--r--lib/Makefile1
-rw-r--r--lib/memregion.c18
45 files changed, 644 insertions, 100 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a84a83f8881e..5eee3ea05ac5 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1168,7 +1168,8 @@
Format: {"off" | "on" | "skip[mbr]"}
efi= [EFI]
- Format: { "old_map", "nochunk", "noruntime", "debug" }
+ Format: { "old_map", "nochunk", "noruntime", "debug",
+ "nosoftreserve" }
old_map [X86-64]: switch to the old ioremap-based EFI
runtime services mapping. 32-bit still uses this one by
default.
@@ -1177,6 +1178,12 @@
firmware implementations.
noruntime : disable EFI runtime services support
debug: enable misc debug output
+ nosoftreserve: The EFI_MEMORY_SP (Specific Purpose)
+ attribute may cause the kernel to reserve the
+ memory range for a memory mapping driver to
+ claim. Specify efi=nosoftreserve to disable this
+ reservation and treat the memory by its base type
+ (i.e. EFI_CONVENTIONAL_MEMORY / "System RAM").
efi_no_storage_paranoia [EFI; X86]
Using this parameter you can use more than 50% of
@@ -1189,15 +1196,21 @@
updating original EFI memory map.
Region of memory which aa attribute is added to is
from ss to ss+nn.
+
If efi_fake_mem=2G@4G:0x10000,2G@0x10a0000000:0x10000
is specified, EFI_MEMORY_MORE_RELIABLE(0x10000)
attribute is added to range 0x100000000-0x180000000 and
0x10a0000000-0x1120000000.
+ If efi_fake_mem=8G@9G:0x40000 is specified, the
+ EFI_MEMORY_SP(0x40000) attribute is added to
+ range 0x240000000-0x43fffffff.
+
Using this parameter you can do debugging of EFI memmap
- related feature. For example, you can do debugging of
+ related features. For example, you can do debugging of
Address Range Mirroring feature even if your box
- doesn't support it.
+ doesn't support it, or mark specific memory as
+ "soft reserved".
efivar_ssdt= [EFI; X86] Name of an EFI variable that contains an SSDT
that is to be dynamically loaded by Linux. If there are
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 60c929f3683b..2c385fe05fde 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1061,6 +1061,8 @@ int arch_add_memory(int nid, u64 start, u64 size,
__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
size, PAGE_KERNEL, __pgd_pgtable_alloc, flags);
+ memblock_clear_nomap(start, size);
+
return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT,
restrictions);
}
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 82bc60c8acb2..f2db8c5e4b06 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -554,7 +554,11 @@ setup_e820(struct boot_params *params, struct setup_data *e820ext, u32 e820ext_s
case EFI_BOOT_SERVICES_CODE:
case EFI_BOOT_SERVICES_DATA:
case EFI_CONVENTIONAL_MEMORY:
- e820_type = E820_TYPE_RAM;
+ if (efi_soft_reserve_enabled() &&
+ (d->attribute & EFI_MEMORY_SP))
+ e820_type = E820_TYPE_SOFT_RESERVED;
+ else
+ e820_type = E820_TYPE_RAM;
break;
case EFI_ACPI_MEMORY_NVS:
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 2e53c056ba20..da0eedd5635d 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -132,8 +132,14 @@ char *skip_spaces(const char *str)
#include "../../../../lib/ctype.c"
#include "../../../../lib/cmdline.c"
+enum parse_mode {
+ PARSE_MEMMAP,
+ PARSE_EFI,
+};
+
static int
-parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
+parse_memmap(char *p, unsigned long long *start, unsigned long long *size,
+ enum parse_mode mode)
{
char *oldp;
@@ -156,8 +162,29 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
*start = memparse(p + 1, &p);
return 0;
case '@':
- /* memmap=nn@ss specifies usable region, should be skipped */
- *size = 0;
+ if (mode == PARSE_MEMMAP) {
+ /*
+ * memmap=nn@ss specifies usable region, should
+ * be skipped
+ */
+ *size = 0;
+ } else {
+ unsigned long long flags;
+
+ /*
+ * efi_fake_mem=nn@ss:attr the attr specifies
+ * flags that might imply a soft-reservation.
+ */
+ *start = memparse(p + 1, &p);
+ if (p && *p == ':') {
+ p++;
+ if (kstrtoull(p, 0, &flags) < 0)
+ *size = 0;
+ else if (flags & EFI_MEMORY_SP)
+ return 0;
+ }
+ *size = 0;
+ }
/* Fall through */
default:
/*
@@ -172,7 +199,7 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
return -EINVAL;
}
-static void mem_avoid_memmap(char *str)
+static void mem_avoid_memmap(enum parse_mode mode, char *str)
{
static int i;
@@ -187,7 +214,7 @@ static void mem_avoid_memmap(char *str)
if (k)
*k++ = 0;
- rc = parse_memmap(str, &start, &size);
+ rc = parse_memmap(str, &start, &size, mode);
if (rc < 0)
break;
str = k;
@@ -238,7 +265,6 @@ static void parse_gb_huge_pages(char *param, char *val)
}
}
-
static void handle_mem_options(void)
{
char *args = (char *)get_cmd_line_ptr();
@@ -271,7 +297,7 @@ static void handle_mem_options(void)
}
if (!strcmp(param, "memmap")) {
- mem_avoid_memmap(val);
+ mem_avoid_memmap(PARSE_MEMMAP, val);
} else if (strstr(param, "hugepages")) {
parse_gb_huge_pages(param, val);
} else if (!strcmp(param, "mem")) {
@@ -284,6 +310,8 @@ static void handle_mem_options(void)
goto out;
mem_limit = mem_size;
+ } else if (!strcmp(param, "efi_fake_mem")) {
+ mem_avoid_memmap(PARSE_EFI, val);
}
}
@@ -760,6 +788,10 @@ process_efi_entries(unsigned long minimum, unsigned long image_size)
if (md->type != EFI_CONVENTIONAL_MEMORY)
continue;
+ if (efi_soft_reserve_enabled() &&
+ (md->attribute & EFI_MEMORY_SP))
+ continue;
+
if (efi_mirror_found &&
!(md->attribute & EFI_MEMORY_MORE_RELIABLE))
continue;
diff --git a/arch/x86/include/asm/e820/types.h b/arch/x86/include/asm/e820/types.h
index c3aa4b5e49e2..314f75d886d0 100644
--- a/arch/x86/include/asm/e820/types.h
+++ b/arch/x86/include/asm/e820/types.h
@@ -29,6 +29,14 @@ enum e820_type {
E820_TYPE_PRAM = 12,
/*
+ * Special-purpose memory is indicated to the system via the
+ * EFI_MEMORY_SP attribute. Define an e820 translation of this
+ * memory type for the purpose of reserving this range and
+ * marking it with the IORES_DESC_SOFT_RESERVED designation.
+ */
+ E820_TYPE_SOFT_RESERVED = 0xefffffff,
+
+ /*
* Reserved RAM used by the kernel itself if
* CONFIG_INTEL_TXT=y is enabled, memory of this type
* will be included in the S3 integrity calculation
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 43a82e59c59d..d028e9acdf1c 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -140,7 +140,6 @@ extern void efi_delete_dummy_variable(void);
extern void efi_switch_mm(struct mm_struct *mm);
extern void efi_recover_from_page_fault(unsigned long phys_addr);
extern void efi_free_boot_services(void);
-extern void efi_reserve_boot_services(void);
struct efi_setup_data {
u64 fw_vendor;
@@ -244,6 +243,8 @@ static inline bool efi_is_64bit(void)
extern bool efi_reboot_required(void);
extern bool efi_is_table_address(unsigned long phys_addr);
+extern void efi_find_mirror(void);
+extern void efi_reserve_boot_services(void);
#else
static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
static inline bool efi_reboot_required(void)
@@ -254,6 +255,20 @@ static inline bool efi_is_table_address(unsigned long phys_addr)
{
return false;
}
+static inline void efi_find_mirror(void)
+{
+}
+static inline void efi_reserve_boot_services(void)
+{
+}
#endif /* CONFIG_EFI */
+#ifdef CONFIG_EFI_FAKE_MEMMAP
+extern void __init efi_fake_memmap_early(void);
+#else
+static inline void efi_fake_memmap_early(void)
+{
+}
+#endif
+
#endif /* _ASM_X86_EFI_H */
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 7da2bcd2b8eb..9976106b57ec 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -190,6 +190,7 @@ static void __init e820_print_type(enum e820_type type)
case E820_TYPE_RAM: /* Fall through: */
case E820_TYPE_RESERVED_KERN: pr_cont("usable"); break;
case E820_TYPE_RESERVED: pr_cont("reserved"); break;
+ case E820_TYPE_SOFT_RESERVED: pr_cont("soft reserved"); break;
case E820_TYPE_ACPI: pr_cont("ACPI data"); break;
case E820_TYPE_NVS: pr_cont("ACPI NVS"); break;
case E820_TYPE_UNUSABLE: pr_cont("unusable"); break;
@@ -1037,6 +1038,7 @@ static const char *__init e820_type_to_string(struct e820_entry *entry)
case E820_TYPE_PRAM: return "Persistent Memory (legacy)";
case E820_TYPE_PMEM: return "Persistent Memory";
case E820_TYPE_RESERVED: return "Reserved";
+ case E820_TYPE_SOFT_RESERVED: return "Soft Reserved";
default: return "Unknown E820 type";
}
}
@@ -1052,6 +1054,7 @@ static unsigned long __init e820_type_to_iomem_type(struct e820_entry *entry)
case E820_TYPE_PRAM: /* Fall-through: */
case E820_TYPE_PMEM: /* Fall-through: */
case E820_TYPE_RESERVED: /* Fall-through: */
+ case E820_TYPE_SOFT_RESERVED: /* Fall-through: */
default: return IORESOURCE_MEM;
}
}
@@ -1064,6 +1067,7 @@ static unsigned long __init e820_type_to_iores_desc(struct e820_entry *entry)
case E820_TYPE_PMEM: return IORES_DESC_PERSISTENT_MEMORY;
case E820_TYPE_PRAM: return IORES_DESC_PERSISTENT_MEMORY_LEGACY;
case E820_TYPE_RESERVED: return IORES_DESC_RESERVED;
+ case E820_TYPE_SOFT_RESERVED: return IORES_DESC_SOFT_RESERVED;
case E820_TYPE_RESERVED_KERN: /* Fall-through: */
case E820_TYPE_RAM: /* Fall-through: */
case E820_TYPE_UNUSABLE: /* Fall-through: */
@@ -1078,11 +1082,12 @@ static bool __init do_mark_busy(enum e820_type type, struct resource *res)
return true;
/*
- * Treat persistent memory like device memory, i.e. reserve it
- * for exclusive use of a driver
+ * Treat persistent memory and other special memory ranges like
+ * device memory, i.e. reserve it for exclusive use of a driver
*/
switch (type) {
case E820_TYPE_RESERVED:
+ case E820_TYPE_SOFT_RESERVED:
case E820_TYPE_PRAM:
case E820_TYPE_PMEM:
return false;
@@ -1285,6 +1290,9 @@ void __init e820__memblock_setup(void)
if (end != (resource_size_t)end)
continue;
+ if (entry->type == E820_TYPE_SOFT_RESERVED)
+ memblock_reserve(entry->addr, entry->size);
+
if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN)
continue;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 77ea96b794bd..1c4b866bc184 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1122,17 +1122,15 @@ void __init setup_arch(char **cmdline_p)
reserve_bios_regions();
- if (efi_enabled(EFI_MEMMAP)) {
- efi_fake_memmap();
- efi_find_mirror();
- efi_esrt_init();
+ efi_fake_memmap();
+ efi_find_mirror();
+ efi_esrt_init();
- /*
- * The EFI specification says that boot service code won't be
- * called after ExitBootServices(). This is, in fact, a lie.
- */
- efi_reserve_boot_services();
- }
+ /*
+ * The EFI specification says that boot service code won't be
+ * called after ExitBootServices(). This is, in fact, a lie.
+ */
+ efi_reserve_boot_services();
/* preallocate 4k for mptable mpc */
e820__memblock_alloc_reserved_mpc_new();
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 425e025341db..38d44f36d5ed 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -128,6 +128,9 @@ void __init efi_find_mirror(void)
efi_memory_desc_t *md;
u64 mirror_size = 0, total_size = 0;
+ if (!efi_enabled(EFI_MEMMAP))
+ return;
+
for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
@@ -145,14 +148,18 @@ void __init efi_find_mirror(void)
/*
* Tell the kernel about the EFI memory map. This might include
- * more than the max 128 entries that can fit in the e820 legacy
- * (zeropage) memory map.
+ * more than the max 128 entries that can fit in the passed in e820
+ * legacy (zeropage) memory map, but the kernel's e820 table can hold
+ * E820_MAX_ENTRIES.
*/
static void __init do_add_efi_memmap(void)
{
efi_memory_desc_t *md;
+ if (!efi_enabled(EFI_MEMMAP))
+ return;
+
for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
@@ -164,7 +171,10 @@ static void __init do_add_efi_memmap(void)
case EFI_BOOT_SERVICES_CODE:
case EFI_BOOT_SERVICES_DATA:
case EFI_CONVENTIONAL_MEMORY:
- if (md->attribute & EFI_MEMORY_WB)
+ if (efi_soft_reserve_enabled()
+ && (md->attribute & EFI_MEMORY_SP))
+ e820_type = E820_TYPE_SOFT_RESERVED;
+ else if (md->attribute & EFI_MEMORY_WB)
e820_type = E820_TYPE_RAM;
else
e820_type = E820_TYPE_RESERVED;
@@ -190,11 +200,36 @@ static void __init do_add_efi_memmap(void)
e820_type = E820_TYPE_RESERVED;
break;
}
+
e820__range_add(start, size, e820_type);
}
e820__update_table(e820_table);
}
+/*
+ * Given add_efi_memmap defaults to 0 and there there is no alternative
+ * e820 mechanism for soft-reserved memory, import the full EFI memory
+ * map if soft reservations are present and enabled. Otherwise, the
+ * mechanism to disable the kernel's consideration of EFI_MEMORY_SP is
+ * the efi=nosoftreserve option.
+ */
+static bool do_efi_soft_reserve(void)
+{
+ efi_memory_desc_t *md;
+
+ if (!efi_enabled(EFI_MEMMAP))
+ return false;
+
+ if (!efi_soft_reserve_enabled())
+ return false;
+
+ for_each_efi_memory_desc(md)
+ if (md->type == EFI_CONVENTIONAL_MEMORY &&
+ (md->attribute & EFI_MEMORY_SP))
+ return true;
+ return false;
+}
+
int __init efi_memblock_x86_reserve_range(void)
{
struct efi_info *e = &boot_params.efi_info;
@@ -224,9 +259,11 @@ int __init efi_memblock_x86_reserve_range(void)
if (rv)
return rv;
- if (add_efi_memmap)
+ if (add_efi_memmap || do_efi_soft_reserve())
do_add_efi_memmap();
+ efi_fake_memmap_early();
+
WARN(efi.memmap.desc_version != 1,
"Unexpected EFI_MEMORY_DESCRIPTOR version %ld",
efi.memmap.desc_version);
@@ -779,6 +816,15 @@ static bool should_map_region(efi_memory_desc_t *md)
return false;
/*
+ * EFI specific purpose memory may be reserved by default
+ * depending on kernel config and boot options.
+ */
+ if (md->type == EFI_CONVENTIONAL_MEMORY &&
+ efi_soft_reserve_enabled() &&
+ (md->attribute & EFI_MEMORY_SP))
+ return false;
+
+ /*
* Map all of RAM so that we can access arguments in the 1:1
* mapping when making EFI runtime calls.
*/
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 3b9fd679cea9..7675cf754d90 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -320,6 +320,9 @@ void __init efi_reserve_boot_services(void)
{
efi_memory_desc_t *md;
+ if (!efi_enabled(EFI_MEMMAP))
+ return;
+
for_each_efi_memory_desc(md) {
u64 start = md->phys_addr;
u64 size = md->num_pages << EFI_PAGE_SHIFT;
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 0f23d8b22c42..4fb97511a16f 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -319,12 +319,6 @@ config ACPI_THERMAL
To compile this driver as a module, choose M here:
the module will be called thermal.
-config ACPI_NUMA
- bool "NUMA support"
- depends on NUMA
- depends on (X86 || IA64 || ARM64)
- default y if IA64 || ARM64
-
config ACPI_CUSTOM_DSDT_FILE
string "Custom DSDT Table file to include"
default ""
@@ -473,8 +467,7 @@ config ACPI_REDUCED_HARDWARE_ONLY
If you are unsure what to do, do not enable this option.
source "drivers/acpi/nfit/Kconfig"
-source "drivers/acpi/hmat/Kconfig"
-
+source "drivers/acpi/numa/Kconfig"
source "drivers/acpi/apei/Kconfig"
source "drivers/acpi/dptf/Kconfig"
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 61d31c36ed8a..33fdaf67454e 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -55,7 +55,6 @@ acpi-$(CONFIG_X86) += acpi_cmos_rtc.o
acpi-$(CONFIG_X86) += x86/apple.o
acpi-$(CONFIG_X86) += x86/utils.o
acpi-$(CONFIG_DEBUG_FS) += debugfs.o
-acpi-$(CONFIG_ACPI_NUMA) += numa.o
acpi-$(CONFIG_ACPI_PROCFS_POWER) += cm_sbs.o
acpi-y += acpi_lpat.o
acpi-$(CONFIG_ACPI_LPIT) += acpi_lpit.o
@@ -80,7 +79,7 @@ obj-$(CONFIG_ACPI_PROCESSOR) += processor.o
obj-$(CONFIG_ACPI) += container.o
obj-$(CONFIG_ACPI_THERMAL) += thermal.o
obj-$(CONFIG_ACPI_NFIT) += nfit/
-obj-$(CONFIG_ACPI_HMAT) += hmat/
+obj-$(CONFIG_ACPI_NUMA) += numa/
obj-$(CONFIG_ACPI) += acpi_memhotplug.o
obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o
obj-$(CONFIG_ACPI_BATTERY) += battery.o
diff --git a/drivers/acpi/hmat/Makefile b/drivers/acpi/hmat/Makefile
deleted file mode 100644
index 1c20ef36a385..000000000000
--- a/drivers/acpi/hmat/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_ACPI_HMAT) := hmat.o
diff --git a/drivers/acpi/hmat/Kconfig b/drivers/acpi/numa/Kconfig
index 95a29964dbea..fcf2e556d69d 100644
--- a/drivers/acpi/hmat/Kconfig
+++ b/drivers/acpi/numa/Kconfig
@@ -1,8 +1,15 @@
# SPDX-License-Identifier: GPL-2.0
+config ACPI_NUMA
+ bool "NUMA support"
+ depends on NUMA
+ depends on (X86 || IA64 || ARM64)
+ default y if IA64 || ARM64
+
config ACPI_HMAT
bool "ACPI Heterogeneous Memory Attribute Table Support"
depends on ACPI_NUMA
select HMEM_REPORTING
+ select MEMREGION
help
If set, this option has the kernel parse and report the
platform's ACPI HMAT (Heterogeneous Memory Attributes Table),
diff --git a/drivers/acpi/numa/Makefile b/drivers/acpi/numa/Makefile
new file mode 100644
index 000000000000..517a6c689a94
--- /dev/null
+++ b/drivers/acpi/numa/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_ACPI_NUMA) += srat.o
+obj-$(CONFIG_ACPI_HMAT) += hmat.o
diff --git a/drivers/acpi/hmat/hmat.c b/drivers/acpi/numa/hmat.c
index 8b0de8a3c647..2c32cfb72370 100644
--- a/drivers/acpi/hmat/hmat.c
+++ b/drivers/acpi/numa/hmat.c
@@ -8,12 +8,18 @@
* the applicable attributes with the node's interfaces.
*/
+#define pr_fmt(fmt) "acpi/hmat: " fmt
+#define dev_fmt(fmt) "acpi/hmat: " fmt
+
#include <linux/acpi.h>
#include <linux/bitops.h>
#include <linux/device.h>
#include <linux/init.h>
#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/platform_device.h>
#include <linux/list_sort.h>
+#include <linux/memregion.h>
#include <linux/memory.h>
#include <linux/mutex.h>
#include <linux/node.h>
@@ -49,6 +55,7 @@ struct memory_target {
struct list_head node;
unsigned int memory_pxm;
unsigned int processor_pxm;
+ struct resource memregions;
struct node_hmem_attrs hmem_attrs;
struct list_head caches;
struct node_cache_attrs cache_attrs;
@@ -104,22 +111,36 @@ static __init void alloc_memory_initiator(unsigned int cpu_pxm)
list_add_tail(&initiator->node, &initiators);
}
-static __init void alloc_memory_target(unsigned int mem_pxm)
+static __init void alloc_memory_target(unsigned int mem_pxm,
+ resource_size_t start, resource_size_t len)
{
struct memory_target *target;
target = find_mem_target(mem_pxm);
- if (target)
- return;
-
- target = kzalloc(sizeof(*target), GFP_KERNEL);
- if (!target)
- return;
+ if (!target) {
+ target = kzalloc(sizeof(*target), GFP_KERNEL);
+ if (!target)
+ return;
+ target->memory_pxm = mem_pxm;
+ target->processor_pxm = PXM_INVAL;
+ target->memregions = (struct resource) {
+ .name = "ACPI mem",
+ .start = 0,
+ .end = -1,
+ .flags = IORESOURCE_MEM,
+ };
+ list_add_tail(&target->node, &targets);
+ INIT_LIST_HEAD(&target->caches);
+ }
- target->memory_pxm = mem_pxm;
- target->processor_pxm = PXM_INVAL;
- list_add_tail(&target->node, &targets);
- INIT_LIST_HEAD(&target->caches);
+ /*
+ * There are potentially multiple ranges per PXM, so record each
+ * in the per-target memregions resource tree.
+ */
+ if (!__request_region(&target->memregions, start, len, "memory target",
+ IORESOURCE_MEM))
+ pr_warn("failed to reserve %#llx - %#llx in pxm: %d\n",
+ start, start + len, mem_pxm);
}
static __init const char *hmat_data_type(u8 type)
@@ -272,7 +293,7 @@ static __init int hmat_parse_locality(union acpi_subtable_headers *header,
u8 type, mem_hier;
if (hmat_loc->header.length < sizeof(*hmat_loc)) {
- pr_notice("HMAT: Unexpected locality header length: %d\n",
+ pr_notice("HMAT: Unexpected locality header length: %u\n",
hmat_loc->header.length);
return -EINVAL;
}
@@ -284,12 +305,12 @@ static __init int hmat_parse_locality(union acpi_subtable_headers *header,
total_size = sizeof(*hmat_loc) + sizeof(*entries) * ipds * tpds +
sizeof(*inits) * ipds + sizeof(*targs) * tpds;
if (hmat_loc->header.length < total_size) {
- pr_notice("HMAT: Unexpected locality header length:%d, minimum required:%d\n",
+ pr_notice("HMAT: Unexpected locality header length:%u, minimum required:%u\n",
hmat_loc->header.length, total_size);
return -EINVAL;
}
- pr_info("HMAT: Locality: Flags:%02x Type:%s Initiator Domains:%d Target Domains:%d Base:%lld\n",
+ pr_info("HMAT: Locality: Flags:%02x Type:%s Initiator Domains:%u Target Domains:%u Base:%lld\n",
hmat_loc->flags, hmat_data_type(type), ipds, tpds,
hmat_loc->entry_base_unit);
@@ -302,7 +323,7 @@ static __init int hmat_parse_locality(union acpi_subtable_headers *header,
value = hmat_normalize(entries[init * tpds + targ],
hmat_loc->entry_base_unit,
type);
- pr_info(" Initiator-Target[%d-%d]:%d%s\n",
+ pr_info(" Initiator-Target[%u-%u]:%u%s\n",
inits[init], targs[targ], value,
hmat_data_type_suffix(type));
@@ -329,13 +350,13 @@ static __init int hmat_parse_cache(union acpi_subtable_headers *header,
u32 attrs;
if (cache->header.length < sizeof(*cache)) {
- pr_notice("HMAT: Unexpected cache header length: %d\n",
+ pr_notice("HMAT: Unexpected cache header length: %u\n",
cache->header.length);
return -EINVAL;
}
attrs = cache->cache_attributes;
- pr_info("HMAT: Cache: Domain:%d Size:%llu Attrs:%08x SMBIOS Handles:%d\n",
+ pr_info("HMAT: Cache: Domain:%u Size:%llu Attrs:%08x SMBIOS Handles:%d\n",
cache->memory_PD, cache->cache_size, attrs,
cache->number_of_SMBIOShandles);
@@ -390,17 +411,17 @@ static int __init hmat_parse_proximity_domain(union acpi_subtable_headers *heade
struct memory_target *target = NULL;
if (p->header.length != sizeof(*p)) {
- pr_notice("HMAT: Unexpected address range header length: %d\n",
+ pr_notice("HMAT: Unexpected address range header length: %u\n",
p->header.length);
return -EINVAL;
}
if (hmat_revision == 1)
- pr_info("HMAT: Memory (%#llx length %#llx) Flags:%04x Processor Domain:%d Memory Domain:%d\n",
+ pr_info("HMAT: Memory (%#llx length %#llx) Flags:%04x Processor Domain:%u Memory Domain:%u\n",
p->reserved3, p->reserved4, p->flags, p->processor_PD,
p->memory_PD);
else
- pr_info("HMAT: Memory Flags:%04x Processor Domain:%d Memory Domain:%d\n",
+ pr_info("HMAT: Memory Flags:%04x Processor Domain:%u Memory Domain:%u\n",
p->flags, p->processor_PD, p->memory_PD);
if (p->flags & ACPI_HMAT_MEMORY_PD_VALID && hmat_revision == 1) {
@@ -417,7 +438,7 @@ static int __init hmat_parse_proximity_domain(union acpi_subtable_headers *heade
pr_debug("HMAT: Invalid Processor Domain\n");
return -EINVAL;
}
- target->processor_pxm = p_node;
+ target->processor_pxm = p->processor_PD;
}
return 0;
@@ -452,7 +473,7 @@ static __init int srat_parse_mem_affinity(union acpi_subtable_headers *header,
return -EINVAL;
if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
return 0;
- alloc_memory_target(ma->proximity_domain);
+ alloc_memory_target(ma->proximity_domain, ma->base_address, ma->length);
return 0;
}
@@ -613,11 +634,92 @@ static void hmat_register_target_perf(struct memory_target *target)
node_set_perf_attrs(mem_nid, &target->hmem_attrs, 0);
}
+static void hmat_register_target_device(struct memory_target *target,
+ struct resource *r)
+{
+ /* define a clean / non-busy resource for the platform device */
+ struct resource res = {
+ .start = r->start,
+ .end = r->end,
+ .flags = IORESOURCE_MEM,
+ };
+ struct platform_device *pdev;
+ struct memregion_info info;
+ int rc, id;
+
+ rc = region_intersects(res.start, resource_size(&res), IORESOURCE_MEM,
+ IORES_DESC_SOFT_RESERVED);
+ if (rc != REGION_INTERSECTS)
+ return;
+
+ id = memregion_alloc(GFP_KERNEL);
+ if (id < 0) {
+ pr_err("memregion allocation failure for %pr\n", &res);
+ return;
+ }
+
+ pdev = platform_device_alloc("hmem", id);
+ if (!pdev) {
+ pr_err("hmem device allocation failure for %pr\n", &res);
+ goto out_pdev;
+ }
+
+ pdev->dev.numa_node = acpi_map_pxm_to_online_node(target->memory_pxm);
+ info = (struct memregion_info) {
+ .target_node = acpi_map_pxm_to_node(target->memory_pxm),
+ };
+ rc = platform_device_add_data(pdev, &info, sizeof(info));
+ if (rc < 0) {
+ pr_err("hmem memregion_info allocation failure for %pr\n", &res);
+ goto out_pdev;
+ }
+
+ rc = platform_device_add_resources(pdev, &res, 1);
+ if (rc < 0) {
+ pr_err("hmem resource allocation failure for %pr\n", &res);
+ goto out_resource;
+ }
+
+ rc = platform_device_add(pdev);
+ if (rc < 0) {
+ dev_err(&pdev->dev, "device add failed for %pr\n", &res);
+ goto out_resource;
+ }
+
+ return;
+
+out_resource:
+ put_device(&pdev->dev);
+out_pdev:
+ memregion_free(id);
+}
+
+static void hmat_register_target_devices(struct memory_target *target)
+{
+ struct resource *res;
+
+ /*
+ * Do not bother creating devices if no driver is available to
+ * consume them.
+ */
+ if (!IS_ENABLED(CONFIG_DEV_DAX_HMEM))
+ return;
+
+ for (res = target->memregions.child; res; res = res->sibling)
+ hmat_register_target_device(target, res);
+}
+
static void hmat_register_target(struct memory_target *target)
{
int nid = pxm_to_node(target->memory_pxm);
/*
+ * Devices may belong to either an offline or online
+ * node, so unconditionally add them.
+ */
+ hmat_register_target_devices(target);
+
+ /*
* Skip offline nodes. This can happen when memory
* marked EFI_MEMORY_SP, "specific purpose", is applied
* to all the memory in a promixity domain leading to
@@ -677,11 +779,21 @@ static __init void hmat_free_structures(void)
struct target_cache *tcache, *cnext;
list_for_each_entry_safe(target, tnext, &targets, node) {
+ struct resource *res, *res_next;
+
list_for_each_entry_safe(tcache, cnext, &target->caches, node) {
list_del(&tcache->node);
kfree(tcache);
}
+
list_del(&target->node);
+ res = target->memregions.child;
+ while (res) {
+ res_next = res->sibling;
+ __release_region(&target->memregions, res->start,
+ resource_size(res));
+ res = res_next;
+ }
kfree(target);
}
@@ -748,4 +860,4 @@ out_put:
acpi_put_table(tbl);
return 0;
}
-subsys_initcall(hmat_init);
+device_initcall(hmat_init);
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa/srat.c
index eadbf90e65d1..eadbf90e65d1 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa/srat.c
diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
index f33c73e4af41..3b6c06f07326 100644
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig
@@ -32,19 +32,36 @@ config DEV_DAX_PMEM
Say M if unsure
+config DEV_DAX_HMEM
+ tristate "HMEM DAX: direct access to 'specific purpose' memory"
+ depends on EFI_SOFT_RESERVE
+ default DEV_DAX
+ help
+ EFI 2.8 platforms, and others, may advertise 'specific purpose'
+ memory. For example, a high bandwidth memory pool. The
+ indication from platform firmware is meant to reserve the
+ memory from typical usage by default. This driver creates
+ device-dax instances for these memory ranges, and that also
+ enables the possibility to assign them to the DEV_DAX_KMEM
+ driver to override the reservation and add them to kernel
+ "System RAM" pool.
+
+ Say M if unsure.
+
config DEV_DAX_KMEM
tristate "KMEM DAX: volatile-use of persistent memory"
default DEV_DAX
depends on DEV_DAX
depends on MEMORY_HOTPLUG # for add_memory() and friends
help
- Support access to persistent memory as if it were RAM. This
- allows easier use of persistent memory by unmodified
- applications.
+ Support access to persistent, or other performance
+ differentiated memory as if it were System RAM. This allows
+ easier use of persistent memory by unmodified applications, or
+ adds core kernel memory services to heterogeneous memory types
+ (HMEM) marked "reserved" by platform firmware.
To use this feature, a DAX device must be unbound from the
- device_dax driver (PMEM DAX) and bound to this kmem driver
- on each boot.
+ device_dax driver and bound to this kmem driver on each boot.
Say N if unsure.
diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile
index 81f7d54dadfb..80065b38b3c4 100644
--- a/drivers/dax/Makefile
+++ b/drivers/dax/Makefile
@@ -2,9 +2,11 @@
obj-$(CONFIG_DAX) += dax.o
obj-$(CONFIG_DEV_DAX) += device_dax.o
obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o
+obj-$(CONFIG_DEV_DAX_HMEM) += dax_hmem.o
dax-y := super.o
dax-y += bus.o
device_dax-y := device.o
+dax_hmem-y := hmem.o
obj-y += pmem/
diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index 8fafbeab510a..eccdda1f7b71 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -227,7 +227,7 @@ static void dax_region_unregister(void *region)
struct dax_region *alloc_dax_region(struct device *parent, int region_id,
struct resource *res, int target_node, unsigned int align,
- unsigned long pfn_flags)
+ unsigned long long pfn_flags)
{
struct dax_region *dax_region;
diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h
index 8619e3299943..9e4eba67e8b9 100644
--- a/drivers/dax/bus.h
+++ b/drivers/dax/bus.h
@@ -11,7 +11,7 @@ struct dax_region;
void dax_region_put(struct dax_region *dax_region);
struct dax_region *alloc_dax_region(struct device *parent, int region_id,
struct resource *res, int target_node, unsigned int align,
- unsigned long flags);
+ unsigned long long flags);
enum dev_dax_subsys {
DEV_DAX_BUS,
diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h
index 6ccca3b890d6..3107ce80e809 100644
--- a/drivers/dax/dax-private.h
+++ b/drivers/dax/dax-private.h
@@ -32,7 +32,7 @@ struct dax_region {
struct device *dev;
unsigned int align;
struct resource res;
- unsigned long pfn_flags;
+ unsigned long long pfn_flags;
};
/**
diff --git a/drivers/dax/hmem.c b/drivers/dax/hmem.c
new file mode 100644
index 000000000000..fe7214daf62e
--- /dev/null
+++ b/drivers/dax/hmem.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/platform_device.h>
+#include <linux/memregion.h>
+#include <linux/module.h>
+#include <linux/pfn_t.h>
+#include "bus.h"
+
+static int dax_hmem_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct dev_pagemap pgmap = { };
+ struct dax_region *dax_region;
+ struct memregion_info *mri;
+ struct dev_dax *dev_dax;
+ struct resource *res;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENOMEM;
+
+ mri = dev->platform_data;
+ memcpy(&pgmap.res, res, sizeof(*res));
+
+ dax_region = alloc_dax_region(dev, pdev->id, res, mri->target_node,
+ PMD_SIZE, PFN_DEV|PFN_MAP);
+ if (!dax_region)
+ return -ENOMEM;
+
+ dev_dax = devm_create_dev_dax(dax_region, 0, &pgmap);
+ if (IS_ERR(dev_dax))
+ return PTR_ERR(dev_dax);
+
+ /* child dev_dax instances now own the lifetime of the dax_region */
+ dax_region_put(dax_region);
+ return 0;
+}
+
+static int dax_hmem_remove(struct platform_device *pdev)
+{
+ /* devm handles teardown */
+ return 0;
+}
+
+static struct platform_driver dax_hmem_driver = {
+ .probe = dax_hmem_probe,
+ .remove = dax_hmem_remove,
+ .driver = {
+ .name = "hmem",
+ },
+};
+
+module_platform_driver(dax_hmem_driver);
+
+MODULE_ALIAS("platform:hmem*");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index b248870a9806..bcc378c19ebe 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -75,6 +75,27 @@ config EFI_MAX_FAKE_MEM
Ranges can be set up to this value using comma-separated list.
The default value is 8.
+config EFI_SOFT_RESERVE
+ bool "Reserve EFI Specific Purpose Memory"
+ depends on EFI && EFI_STUB && ACPI_HMAT
+ default ACPI_HMAT
+ help
+ On systems that have mixed performance classes of memory EFI
+ may indicate specific purpose memory with an attribute (See
+ EFI_MEMORY_SP in UEFI 2.8). A memory range tagged with this
+ attribute may have unique performance characteristics compared
+ to the system's general purpose "System RAM" pool. On the
+ expectation that such memory has application specific usage,
+ and its base EFI memory type is "conventional" answer Y to
+ arrange for the kernel to reserve it as a "Soft Reserved"
+ resource, and set aside for direct-access (device-dax) by
+ default. The memory range can later be optionally assigned to
+ the page allocator by system administrator policy via the
+ device-dax kmem facility. Say N to have the kernel treat this
+ memory as "System RAM" by default.
+
+ If unsure, say Y.
+
config EFI_PARAMS_FROM_FDT
bool
help
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index 4ac2de4dfa72..554d795270d9 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -20,13 +20,16 @@ obj-$(CONFIG_UEFI_CPER) += cper.o
obj-$(CONFIG_EFI_RUNTIME_MAP) += runtime-map.o
obj-$(CONFIG_EFI_RUNTIME_WRAPPERS) += runtime-wrappers.o
obj-$(CONFIG_EFI_STUB) += libstub/
-obj-$(CONFIG_EFI_FAKE_MEMMAP) += fake_mem.o
+obj-$(CONFIG_EFI_FAKE_MEMMAP) += fake_map.o
obj-$(CONFIG_EFI_BOOTLOADER_CONTROL) += efibc.o
obj-$(CONFIG_EFI_TEST) += test/
obj-$(CONFIG_EFI_DEV_PATH_PARSER) += dev-path-parser.o
obj-$(CONFIG_APPLE_PROPERTIES) += apple-properties.o
obj-$(CONFIG_EFI_RCI2_TABLE) += rci2-table.o
+fake_map-y += fake_mem.o
+fake_map-$(CONFIG_X86) += x86_fake_mem.o
+
arm-obj-$(CONFIG_EFI) := arm-init.o arm-runtime.o
obj-$(CONFIG_ARM) += $(arm-obj-y)
obj-$(CONFIG_ARM64) += $(arm-obj-y)
diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c
index 311cd349a862..904fa09e6a6b 100644
--- a/drivers/firmware/efi/arm-init.c
+++ b/drivers/firmware/efi/arm-init.c
@@ -164,6 +164,15 @@ static __init int is_usable_memory(efi_memory_desc_t *md)
case EFI_CONVENTIONAL_MEMORY:
case EFI_PERSISTENT_MEMORY:
/*
+ * Special purpose memory is 'soft reserved', which means it
+ * is set aside initially, but can be hotplugged back in or
+ * be assigned to the dax driver after boot.
+ */
+ if (efi_soft_reserve_enabled() &&
+ (md->attribute & EFI_MEMORY_SP))
+ return false;
+
+ /*
* According to the spec, these regions are no longer reserved
* after calling ExitBootServices(). However, we can only use
* them as System RAM if they can be mapped writeback cacheable.
diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c
index e2ac5fa5531b..899b803842bb 100644
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c
@@ -121,6 +121,30 @@ static int __init arm_enable_runtime_services(void)
return 0;
}
+ if (efi_soft_reserve_enabled()) {
+ efi_memory_desc_t *md;
+
+ for_each_efi_memory_desc(md) {
+ int md_size = md->num_pages << EFI_PAGE_SHIFT;
+ struct resource *res;
+
+ if (!(md->attribute & EFI_MEMORY_SP))
+ continue;
+
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (WARN_ON(!res))
+ break;
+
+ res->start = md->phys_addr;
+ res->end = md->phys_addr + md_size - 1;
+ res->name = "Soft Reserved";
+ res->flags = IORESOURCE_MEM;
+ res->desc = IORES_DESC_SOFT_RESERVED;
+
+ insert_resource(&iomem_resource, res);
+ }
+ }
+
if (efi_runtime_disabled()) {
pr_info("EFI runtime services will be disabled.\n");
return 0;
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 8a492ce16b3b..d101f072c8f8 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -81,6 +81,11 @@ bool efi_runtime_disabled(void)
return disable_runtime;
}
+bool __pure __efi_soft_reserve_enabled(void)
+{
+ return !efi_enabled(EFI_MEM_NO_SOFT_RESERVE);
+}
+
static int __init parse_efi_cmdline(char *str)
{
if (!str) {
@@ -94,6 +99,9 @@ static int __init parse_efi_cmdline(char *str)
if (parse_option_str(str, "noruntime"))
disable_runtime = true;
+ if (parse_option_str(str, "nosoftreserve"))
+ set_bit(EFI_MEM_NO_SOFT_RESERVE, &efi.flags);
+
return 0;
}
early_param("efi", parse_efi_cmdline);
@@ -842,15 +850,16 @@ char * __init efi_md_typeattr_format(char *buf, size_t size,
if (attr & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT |
EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_RO |
EFI_MEMORY_WP | EFI_MEMORY_RP | EFI_MEMORY_XP |
- EFI_MEMORY_NV |
+ EFI_MEMORY_NV | EFI_MEMORY_SP |
EFI_MEMORY_RUNTIME | EFI_MEMORY_MORE_RELIABLE))
snprintf(pos, size, "|attr=0x%016llx]",
(unsigned long long)attr);
else
snprintf(pos, size,
- "|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
+ "|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
attr & EFI_MEMORY_RUNTIME ? "RUN" : "",
attr & EFI_MEMORY_MORE_RELIABLE ? "MR" : "",
+ attr & EFI_MEMORY_SP ? "SP" : "",
attr & EFI_MEMORY_NV ? "NV" : "",
attr & EFI_MEMORY_XP ? "XP" : "",
attr & EFI_MEMORY_RP ? "RP" : "",
diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c
index d6dd5f503fa2..2762e0662bf4 100644
--- a/drivers/firmware/efi/esrt.c
+++ b/drivers/firmware/efi/esrt.c
@@ -246,6 +246,9 @@ void __init efi_esrt_init(void)
int rc;
phys_addr_t end;
+ if (!efi_enabled(EFI_MEMMAP))
+ return;
+
pr_debug("esrt-init: loading.\n");
if (!esrt_table_exists())
return;
diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c
index 9501edc0fcfb..bb9fc70d0cfa 100644
--- a/drivers/firmware/efi/fake_mem.c
+++ b/drivers/firmware/efi/fake_mem.c
@@ -17,12 +17,10 @@
#include <linux/memblock.h>
#include <linux/types.h>
#include <linux/sort.h>
-#include <asm/efi.h>
+#include "fake_mem.h"
-#define EFI_MAX_FAKEMEM CONFIG_EFI_MAX_FAKE_MEM
-
-static struct efi_mem_range fake_mems[EFI_MAX_FAKEMEM];
-static int nr_fake_mem;
+struct efi_mem_range efi_fake_mems[EFI_MAX_FAKEMEM];
+int nr_fake_mem;
static int __init cmp_fake_mem(const void *x1, const void *x2)
{
@@ -44,13 +42,13 @@ void __init efi_fake_memmap(void)
void *new_memmap;
int i;
- if (!nr_fake_mem)
+ if (!efi_enabled(EFI_MEMMAP) || !nr_fake_mem)
return;
/* count up the number of EFI memory descriptor */
for (i = 0; i < nr_fake_mem; i++) {
for_each_efi_memory_desc(md) {
- struct range *r = &fake_mems[i].range;
+ struct range *r = &efi_fake_mems[i].range;
new_nr_map += efi_memmap_split_count(md, r);
}
@@ -70,7 +68,7 @@ void __init efi_fake_memmap(void)
}
for (i = 0; i < nr_fake_mem; i++)
- efi_memmap_insert(&efi.memmap, new_memmap, &fake_mems[i]);
+ efi_memmap_insert(&efi.memmap, new_memmap, &efi_fake_mems[i]);
/* swap into new EFI memmap */
early_memunmap(new_memmap, efi.memmap.desc_size * new_nr_map);
@@ -104,22 +102,22 @@ static int __init setup_fake_mem(char *p)
if (nr_fake_mem >= EFI_MAX_FAKEMEM)
break;
- fake_mems[nr_fake_mem].range.start = start;
- fake_mems[nr_fake_mem].range.end = start + mem_size - 1;
- fake_mems[nr_fake_mem].attribute = attribute;
+ efi_fake_mems[nr_fake_mem].range.start = start;
+ efi_fake_mems[nr_fake_mem].range.end = start + mem_size - 1;
+ efi_fake_mems[nr_fake_mem].attribute = attribute;
nr_fake_mem++;
if (*p == ',')
p++;
}
- sort(fake_mems, nr_fake_mem, sizeof(struct efi_mem_range),
+ sort(efi_fake_mems, nr_fake_mem, sizeof(struct efi_mem_range),
cmp_fake_mem, NULL);
for (i = 0; i < nr_fake_mem; i++)
pr_info("efi_fake_mem: add attr=0x%016llx to [mem 0x%016llx-0x%016llx]",
- fake_mems[i].attribute, fake_mems[i].range.start,
- fake_mems[i].range.end);
+ efi_fake_mems[i].attribute, efi_fake_mems[i].range.start,
+ efi_fake_mems[i].range.end);
return *p == '\0' ? 0 : -EINVAL;
}
diff --git a/drivers/firmware/efi/fake_mem.h b/drivers/firmware/efi/fake_mem.h
new file mode 100644
index 000000000000..d52791af4b18
--- /dev/null
+++ b/drivers/firmware/efi/fake_mem.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __EFI_FAKE_MEM_H__
+#define __EFI_FAKE_MEM_H__
+#include <asm/efi.h>
+
+#define EFI_MAX_FAKEMEM CONFIG_EFI_MAX_FAKE_MEM
+
+extern struct efi_mem_range efi_fake_mems[EFI_MAX_FAKEMEM];
+extern int nr_fake_mem;
+#endif /* __EFI_FAKE_MEM_H__ */
diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c
index 41213bf5fcf5..4566640de650 100644
--- a/drivers/firmware/efi/libstub/arm32-stub.c
+++ b/drivers/firmware/efi/libstub/arm32-stub.c
@@ -146,6 +146,11 @@ static efi_status_t reserve_kernel_base(efi_system_table_t *sys_table_arg,
continue;
case EFI_CONVENTIONAL_MEMORY:
+ /* Skip soft reserved conventional memory */
+ if (efi_soft_reserve_enabled() &&
+ (desc->attribute & EFI_MEMORY_SP))
+ continue;
+
/*
* Reserve the intersection between this entry and the
* region.
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 35dbc2791c97..e02579907f2e 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -32,6 +32,7 @@ static unsigned long __chunk_size = EFI_READ_CHUNK_SIZE;
static int __section(.data) __nokaslr;
static int __section(.data) __quiet;
static int __section(.data) __novamap;
+static bool __section(.data) efi_nosoftreserve;
int __pure nokaslr(void)
{
@@ -45,6 +46,10 @@ int __pure novamap(void)
{
return __novamap;
}
+bool __pure __efi_soft_reserve_enabled(void)
+{
+ return !efi_nosoftreserve;
+}
#define EFI_MMAP_NR_SLACK_SLOTS 8
@@ -211,6 +216,10 @@ again:
if (desc->type != EFI_CONVENTIONAL_MEMORY)
continue;
+ if (efi_soft_reserve_enabled() &&
+ (desc->attribute & EFI_MEMORY_SP))
+ continue;
+
if (desc->num_pages < nr_pages)
continue;
@@ -305,6 +314,10 @@ efi_status_t efi_low_alloc_above(efi_system_table_t *sys_table_arg,
if (desc->type != EFI_CONVENTIONAL_MEMORY)
continue;
+ if (efi_soft_reserve_enabled() &&
+ (desc->attribute & EFI_MEMORY_SP))
+ continue;
+
if (desc->num_pages < nr_pages)
continue;
@@ -484,6 +497,12 @@ efi_status_t efi_parse_options(char const *cmdline)
__novamap = 1;
}
+ if (IS_ENABLED(CONFIG_EFI_SOFT_RESERVE) &&
+ !strncmp(str, "nosoftreserve", 7)) {
+ str += strlen("nosoftreserve");
+ efi_nosoftreserve = 1;
+ }
+
/* Group words together, delimited by "," */
while (*str && *str != ' ' && *str != ',')
str++;
diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c
index b4b1d1dcb5fd..6c188695e730 100644
--- a/drivers/firmware/efi/libstub/random.c
+++ b/drivers/firmware/efi/libstub/random.c
@@ -46,6 +46,10 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
if (md->type != EFI_CONVENTIONAL_MEMORY)
return 0;
+ if (efi_soft_reserve_enabled() &&
+ (md->attribute & EFI_MEMORY_SP))
+ return 0;
+
region_end = min((u64)ULONG_MAX, md->phys_addr + md->num_pages*EFI_PAGE_SIZE - 1);
first_slot = round_up(md->phys_addr, align);
diff --git a/drivers/firmware/efi/x86_fake_mem.c b/drivers/firmware/efi/x86_fake_mem.c
new file mode 100644
index 000000000000..e5d6d5a1b240
--- /dev/null
+++ b/drivers/firmware/efi/x86_fake_mem.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights reserved. */
+#include <linux/efi.h>
+#include <asm/e820/api.h>
+#include "fake_mem.h"
+
+void __init efi_fake_memmap_early(void)
+{
+ int i;
+
+ /*
+ * The late efi_fake_mem() call can handle all requests if
+ * EFI_MEMORY_SP support is disabled.
+ */
+ if (!efi_soft_reserve_enabled())
+ return;
+
+ if (!efi_enabled(EFI_MEMMAP) || !nr_fake_mem)
+ return;
+
+ /*
+ * Given that efi_fake_memmap() needs to perform memblock
+ * allocations it needs to run after e820__memblock_setup().
+ * However, if efi_fake_mem specifies EFI_MEMORY_SP for a given
+ * address range that potentially needs to mark the memory as
+ * reserved prior to e820__memblock_setup(). Update e820
+ * directly if EFI_MEMORY_SP is specified for an
+ * EFI_CONVENTIONAL_MEMORY descriptor.
+ */
+ for (i = 0; i < nr_fake_mem; i++) {
+ struct efi_mem_range *mem = &efi_fake_mems[i];
+ efi_memory_desc_t *md;
+ u64 m_start, m_end;
+
+ if ((mem->attribute & EFI_MEMORY_SP) == 0)
+ continue;
+
+ m_start = mem->range.start;
+ m_end = mem->range.end;
+ for_each_efi_memory_desc(md) {
+ u64 start, end;
+
+ if (md->type != EFI_CONVENTIONAL_MEMORY)
+ continue;
+
+ start = md->phys_addr;
+ end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1;
+
+ if (m_start <= end && m_end >= start)
+ /* fake range overlaps descriptor */;
+ else
+ continue;
+
+ /*
+ * Trim the boundary of the e820 update to the
+ * descriptor in case the fake range overlaps
+ * !EFI_CONVENTIONAL_MEMORY
+ */
+ start = max(start, m_start);
+ end = min(end, m_end);
+
+ if (end <= start)
+ continue;
+ e820__range_update(start, end - start + 1, E820_TYPE_RAM,
+ E820_TYPE_SOFT_RESERVED);
+ e820__update_table(e820_table);
+ }
+ }
+}
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 36af7af6b7cf..b7d1eb38b27d 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -4,6 +4,7 @@ menuconfig LIBNVDIMM
depends on PHYS_ADDR_T_64BIT
depends on HAS_IOMEM
depends on BLK_DEV
+ select MEMREGION
help
Generic support for non-volatile memory devices including
ACPI-6-NFIT defined resources. On platforms that define an
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 9204f1e9fd14..e592c4964674 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -455,7 +455,6 @@ static __exit void libnvdimm_exit(void)
nd_region_exit();
nvdimm_exit();
nvdimm_bus_exit();
- nd_region_devs_exit();
nvdimm_devs_exit();
}
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 25fa121104d0..aa059439fca0 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -114,7 +114,6 @@ struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev);
int __init nvdimm_bus_init(void);
void nvdimm_bus_exit(void);
void nvdimm_devs_exit(void);
-void nd_region_devs_exit(void);
struct nd_region;
void nd_region_advance_seeds(struct nd_region *nd_region, struct device *dev);
void nd_region_create_ns_seed(struct nd_region *nd_region);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index ef423ba1a711..fbf34cf688f4 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -3,6 +3,7 @@
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
*/
#include <linux/scatterlist.h>
+#include <linux/memregion.h>
#include <linux/highmem.h>
#include <linux/sched.h>
#include <linux/slab.h>
@@ -19,7 +20,6 @@
*/
#include <linux/io-64-nonatomic-hi-lo.h>
-static DEFINE_IDA(region_ida);
static DEFINE_PER_CPU(int, flush_idx);
static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm,
@@ -133,7 +133,7 @@ static void nd_region_release(struct device *dev)
put_device(&nvdimm->dev);
}
free_percpu(nd_region->lane);
- ida_simple_remove(&region_ida, nd_region->id);
+ memregion_free(nd_region->id);
if (is_nd_blk(dev))
kfree(to_nd_blk_region(dev));
else
@@ -985,7 +985,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
if (!region_buf)
return NULL;
- nd_region->id = ida_simple_get(&region_ida, 0, 0, GFP_KERNEL);
+ nd_region->id = memregion_alloc(GFP_KERNEL);
if (nd_region->id < 0)
goto err_id;
@@ -1044,7 +1044,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
return nd_region;
err_percpu:
- ida_simple_remove(&region_ida, nd_region->id);
+ memregion_free(nd_region->id);
err_id:
kfree(region_buf);
return NULL;
@@ -1216,8 +1216,3 @@ int nd_region_conflict(struct nd_region *nd_region, resource_size_t start,
return device_for_each_child(&nvdimm_bus->dev, &ctx, region_conflict);
}
-
-void __exit nd_region_devs_exit(void)
-{
- ida_destroy(&region_ida);
-}
diff --git a/include/linux/efi.h b/include/linux/efi.h
index d87acf62958e..88654910ce29 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -112,6 +112,7 @@ typedef struct {
#define EFI_MEMORY_MORE_RELIABLE \
((u64)0x0000000000010000ULL) /* higher reliability */
#define EFI_MEMORY_RO ((u64)0x0000000000020000ULL) /* read-only */
+#define EFI_MEMORY_SP ((u64)0x0000000000040000ULL) /* soft reserved */
#define EFI_MEMORY_RUNTIME ((u64)0x8000000000000000ULL) /* range requires runtime mapping */
#define EFI_MEMORY_DESCRIPTOR_VERSION 1
@@ -1044,7 +1045,6 @@ extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if pos
extern efi_status_t efi_query_variable_store(u32 attributes,
unsigned long size,
bool nonblocking);
-extern void efi_find_mirror(void);
#else
static inline efi_status_t efi_query_variable_store(u32 attributes,
@@ -1202,6 +1202,7 @@ extern int __init efi_setup_pcdp_console(char *);
#define EFI_DBG 8 /* Print additional debug info at runtime */
#define EFI_NX_PE_DATA 9 /* Can runtime data regions be mapped non-executable? */
#define EFI_MEM_ATTR 10 /* Did firmware publish an EFI_MEMORY_ATTRIBUTES table? */
+#define EFI_MEM_NO_SOFT_RESERVE 11 /* Is the kernel configured to ignore soft reservations? */
#ifdef CONFIG_EFI
/*
@@ -1212,6 +1213,14 @@ static inline bool efi_enabled(int feature)
return test_bit(feature, &efi.flags) != 0;
}
extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused);
+
+bool __pure __efi_soft_reserve_enabled(void);
+
+static inline bool __pure efi_soft_reserve_enabled(void)
+{
+ return IS_ENABLED(CONFIG_EFI_SOFT_RESERVE)
+ && __efi_soft_reserve_enabled();
+}
#else
static inline bool efi_enabled(int feature)
{
@@ -1225,6 +1234,11 @@ efi_capsule_pending(int *reset_type)
{
return false;
}
+
+static inline bool efi_soft_reserve_enabled(void)
+{
+ return false;
+}
#endif
extern int efi_status_to_err(efi_status_t status);
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 7bddddfc76d6..a9b9170b5dd2 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -134,6 +134,7 @@ enum {
IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5,
IORES_DESC_DEVICE_PRIVATE_MEMORY = 6,
IORES_DESC_RESERVED = 7,
+ IORES_DESC_SOFT_RESERVED = 8,
};
/*
diff --git a/include/linux/memregion.h b/include/linux/memregion.h
new file mode 100644
index 000000000000..e11595256cac
--- /dev/null
+++ b/include/linux/memregion.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _MEMREGION_H_
+#define _MEMREGION_H_
+#include <linux/types.h>
+#include <linux/errno.h>
+
+struct memregion_info {
+ int target_node;
+};
+
+#ifdef CONFIG_MEMREGION
+int memregion_alloc(gfp_t gfp);
+void memregion_free(int id);
+#else
+static inline int memregion_alloc(gfp_t gfp)
+{
+ return -ENOMEM;
+}
+void memregion_free(int id)
+{
+}
+#endif
+#endif /* _MEMREGION_H_ */
diff --git a/lib/Kconfig b/lib/Kconfig
index 3321d04dfa5a..681b7e50490e 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -605,6 +605,9 @@ config ARCH_NO_SG_CHAIN
config ARCH_HAS_PMEM_API
bool
+config MEMREGION
+ bool
+
# use memcpy to implement user copies for nommu architectures
config UACCESS_MEMCPY
bool
diff --git a/lib/Makefile b/lib/Makefile
index c5892807e06f..2fb7b47018f1 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -212,6 +212,7 @@ obj-$(CONFIG_GENERIC_NET_UTILS) += net_utils.o
obj-$(CONFIG_SG_SPLIT) += sg_split.o
obj-$(CONFIG_SG_POOL) += sg_pool.o
+obj-$(CONFIG_MEMREGION) += memregion.o
obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
obj-$(CONFIG_IRQ_POLL) += irq_poll.o
diff --git a/lib/memregion.c b/lib/memregion.c
new file mode 100644
index 000000000000..77c85b5251da
--- /dev/null
+++ b/lib/memregion.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* identifiers for device / performance-differentiated memory regions */
+#include <linux/idr.h>
+#include <linux/types.h>
+
+static DEFINE_IDA(memregion_ids);
+
+int memregion_alloc(gfp_t gfp)
+{
+ return ida_alloc(&memregion_ids, gfp);
+}
+EXPORT_SYMBOL(memregion_alloc);
+
+void memregion_free(int id)
+{
+ ida_free(&memregion_ids, id);
+}
+EXPORT_SYMBOL(memregion_free);