From d2f7cbe7b26a74dbbbf8f325b2a6fd01bc34032c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 31 Oct 2013 17:25:08 +0100 Subject: x86/efi: Runtime services virtual mapping We map the EFI regions needed for runtime services non-contiguously, with preserved alignment on virtual addresses starting from -4G down for a total max space of 64G. This way, we provide for stable runtime services addresses across kernels so that a kexec'd kernel can still use them. Thus, they're mapped in a separate pagetable so that we don't pollute the kernel namespace. Add an efi= kernel command line parameter for passing miscellaneous options and chicken bits from the command line. While at it, add a chicken bit called "efi=old_map" which can be used as a fallback to the old runtime services mapping method in case there's some b0rkage with a particular EFI implementation (haha, it is hard to hold up the sarcasm here...). Also, add the UEFI RT VA space to Documentation/x86/x86_64/mm.txt. Signed-off-by: Borislav Petkov Signed-off-by: Matt Fleming --- Documentation/kernel-parameters.txt | 6 ++++++ Documentation/x86/x86_64/mm.txt | 7 +++++++ 2 files changed, 13 insertions(+) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7a0f202d482e..ed43e92b0e7e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -835,6 +835,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. edd= [EDD] Format: {"off" | "on" | "skip[mbr]"} + efi= [EFI] + Format: { "old_map" } + old_map [X86-64]: switch to the old ioremap-based EFI + runtime services mapping. 32-bit still uses this one by + default. + efi_no_storage_paranoia [EFI; X86] Using this parameter you can use more than 50% of your efi variable storage. Use this parameter only if diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 881582f75c9c..c584a51add15 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -28,4 +28,11 @@ reference. Current X86-64 implementations only support 40 bits of address space, but we support up to 46 bits. This expands into MBZ space in the page tables. +->trampoline_pgd: + +We map EFI runtime services in the aforementioned PGD in the virtual +range of 64Gb (arbitrarily set, can be raised if needed) + +0xffffffef00000000 - 0xffffffff00000000 + -Andi Kleen, Jul 2004 -- cgit v1.2.3 From a0998eb15afeffbf52a2c2829318f67df9ac57b8 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Fri, 20 Dec 2013 18:02:17 +0800 Subject: efi: Export more EFI table variables to sysfs Export fw_vendor, runtime and config table physical addresses to /sys/firmware/efi/{fw_vendor,runtime,config_table} because kexec kernels need them. From EFI spec these 3 variables will be updated to virtual address after entering virtual mode. But kernel startup code will need the physical address. Signed-off-by: Dave Young Tested-by: Toshi Kani Signed-off-by: Matt Fleming --- Documentation/ABI/testing/sysfs-firmware-efi | 20 ++++++++++++++ arch/x86/platform/efi/efi.c | 4 +++ drivers/firmware/efi/efi.c | 41 +++++++++++++++++++++++++++- include/linux/efi.h | 3 ++ 4 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 Documentation/ABI/testing/sysfs-firmware-efi (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-firmware-efi b/Documentation/ABI/testing/sysfs-firmware-efi new file mode 100644 index 000000000000..05874da7ce80 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-firmware-efi @@ -0,0 +1,20 @@ +What: /sys/firmware/efi/fw_vendor +Date: December 2013 +Contact: Dave Young +Description: It shows the physical address of firmware vendor field in the + EFI system table. +Users: Kexec + +What: /sys/firmware/efi/runtime +Date: December 2013 +Contact: Dave Young +Description: It shows the physical address of runtime service table entry in + the EFI system table. +Users: Kexec + +What: /sys/firmware/efi/config_table +Date: December 2013 +Contact: Dave Young +Description: It shows the physical address of config table entry in the EFI + system table. +Users: Kexec diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 4694632ef581..28591072fbb7 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -653,6 +653,10 @@ void __init efi_init(void) set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); + efi.config_table = (unsigned long)efi.systab->tables; + efi.fw_vendor = (unsigned long)efi.systab->fw_vendor; + efi.runtime = (unsigned long)efi.systab->runtime; + /* * Show what we know for posterity */ diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 2e2fbdec0845..72533af72b98 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -32,6 +32,9 @@ struct efi __read_mostly efi = { .hcdp = EFI_INVALID_TABLE_ADDR, .uga = EFI_INVALID_TABLE_ADDR, .uv_systab = EFI_INVALID_TABLE_ADDR, + .fw_vendor = EFI_INVALID_TABLE_ADDR, + .runtime = EFI_INVALID_TABLE_ADDR, + .config_table = EFI_INVALID_TABLE_ADDR, }; EXPORT_SYMBOL(efi); @@ -71,13 +74,49 @@ static ssize_t systab_show(struct kobject *kobj, static struct kobj_attribute efi_attr_systab = __ATTR(systab, 0400, systab_show, NULL); +#define EFI_FIELD(var) efi.var + +#define EFI_ATTR_SHOW(name) \ +static ssize_t name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "0x%lx\n", EFI_FIELD(name)); \ +} + +EFI_ATTR_SHOW(fw_vendor); +EFI_ATTR_SHOW(runtime); +EFI_ATTR_SHOW(config_table); + +static struct kobj_attribute efi_attr_fw_vendor = __ATTR_RO(fw_vendor); +static struct kobj_attribute efi_attr_runtime = __ATTR_RO(runtime); +static struct kobj_attribute efi_attr_config_table = __ATTR_RO(config_table); + static struct attribute *efi_subsys_attrs[] = { &efi_attr_systab.attr, - NULL, /* maybe more in the future? */ + &efi_attr_fw_vendor.attr, + &efi_attr_runtime.attr, + &efi_attr_config_table.attr, + NULL, }; +static umode_t efi_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + umode_t mode = attr->mode; + + if (attr == &efi_attr_fw_vendor.attr) + return (efi.fw_vendor == EFI_INVALID_TABLE_ADDR) ? 0 : mode; + else if (attr == &efi_attr_runtime.attr) + return (efi.runtime == EFI_INVALID_TABLE_ADDR) ? 0 : mode; + else if (attr == &efi_attr_config_table.attr) + return (efi.config_table == EFI_INVALID_TABLE_ADDR) ? 0 : mode; + + return mode; +} + static struct attribute_group efi_subsys_attr_group = { .attrs = efi_subsys_attrs, + .is_visible = efi_attr_is_visible, }; static struct efivars generic_efivars; diff --git a/include/linux/efi.h b/include/linux/efi.h index 6c0ca528300c..fb60b10b7bd9 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -556,6 +556,9 @@ extern struct efi { unsigned long hcdp; /* HCDP table */ unsigned long uga; /* UGA table */ unsigned long uv_systab; /* UV system table */ + unsigned long fw_vendor; /* fw_vendor */ + unsigned long runtime; /* runtime table */ + unsigned long config_table; /* config tables */ efi_get_time_t *get_time; efi_set_time_t *set_time; efi_get_wakeup_time_t *get_wakeup_time; -- cgit v1.2.3 From 926172d46038d7610b6b8d84e40db727cefb482d Mon Sep 17 00:00:00 2001 From: Dave Young Date: Fri, 20 Dec 2013 18:02:18 +0800 Subject: efi: Export EFI runtime memory mapping to sysfs kexec kernel will need exactly same mapping for EFI runtime memory ranges. Thus here export the runtime ranges mapping to sysfs, kexec-tools will assemble them and pass to 2nd kernel via setup_data. Introducing a new directory /sys/firmware/efi/runtime-map just like /sys/firmware/memmap. Containing below attribute in each file of that directory: attribute num_pages phys_addr type virt_addr Signed-off-by: Dave Young Tested-by: Toshi Kani Signed-off-by: Matt Fleming --- .../ABI/testing/sysfs-firmware-efi-runtime-map | 34 ++++ arch/x86/platform/efi/efi.c | 46 +++++- drivers/firmware/efi/Kconfig | 11 ++ drivers/firmware/efi/Makefile | 1 + drivers/firmware/efi/efi.c | 4 + drivers/firmware/efi/runtime-map.c | 181 +++++++++++++++++++++ include/linux/efi.h | 13 ++ 7 files changed, 287 insertions(+), 3 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-firmware-efi-runtime-map create mode 100644 drivers/firmware/efi/runtime-map.c (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-firmware-efi-runtime-map b/Documentation/ABI/testing/sysfs-firmware-efi-runtime-map new file mode 100644 index 000000000000..c61b9b348e99 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-firmware-efi-runtime-map @@ -0,0 +1,34 @@ +What: /sys/firmware/efi/runtime-map/ +Date: December 2013 +Contact: Dave Young +Description: Switching efi runtime services to virtual mode requires + that all efi memory ranges which have the runtime attribute + bit set to be mapped to virtual addresses. + + The efi runtime services can only be switched to virtual + mode once without rebooting. The kexec kernel must maintain + the same physical to virtual address mappings as the first + kernel. The mappings are exported to sysfs so userspace tools + can reassemble them and pass them into the kexec kernel. + + /sys/firmware/efi/runtime-map/ is the directory the kernel + exports that information in. + + subdirectories are named with the number of the memory range: + + /sys/firmware/efi/runtime-map/0 + /sys/firmware/efi/runtime-map/1 + /sys/firmware/efi/runtime-map/2 + /sys/firmware/efi/runtime-map/3 + ... + + Each subdirectory contains five files: + + attribute : The attributes of the memory range. + num_pages : The size of the memory range in pages. + phys_addr : The physical address of the memory range. + type : The type of the memory range. + virt_addr : The virtual address of the memory range. + + Above values are all hexadecimal numbers with the '0x' prefix. +Users: Kexec diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 28591072fbb7..74fe7a719508 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -76,6 +76,9 @@ static __initdata efi_config_table_type_t arch_tables[] = { {NULL_GUID, NULL, NULL}, }; +static void *efi_runtime_map; +static int nr_efi_runtime_map; + /* * Returns 1 if 'facility' is enabled, 0 otherwise. */ @@ -824,6 +827,39 @@ static void __init get_systab_virt_addr(efi_memory_desc_t *md) } } +static int __init save_runtime_map(void) +{ + efi_memory_desc_t *md; + void *tmp, *p, *q = NULL; + int count = 0; + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; + + if (!(md->attribute & EFI_MEMORY_RUNTIME) || + (md->type == EFI_BOOT_SERVICES_CODE) || + (md->type == EFI_BOOT_SERVICES_DATA)) + continue; + tmp = krealloc(q, (count + 1) * memmap.desc_size, GFP_KERNEL); + if (!tmp) + goto out; + q = tmp; + + memcpy(q + count * memmap.desc_size, md, memmap.desc_size); + count++; + } + + efi_runtime_map = q; + nr_efi_runtime_map = count; + efi_runtime_map_setup(efi_runtime_map, nr_efi_runtime_map, + boot_params.efi_info.efi_memdesc_size); + + return 0; +out: + kfree(q); + return -ENOMEM; +} + /* * Map efi memory ranges for runtime serivce and update new_memmap with virtual * addresses. @@ -849,7 +885,7 @@ static void * __init efi_map_regions(int *count) tmp = krealloc(new_memmap, (*count + 1) * memmap.desc_size, GFP_KERNEL); if (!tmp) - goto out_krealloc; + goto out; new_memmap = tmp; memcpy(new_memmap + (*count * memmap.desc_size), md, memmap.desc_size); @@ -857,7 +893,7 @@ static void * __init efi_map_regions(int *count) } return new_memmap; -out_krealloc: +out: kfree(new_memmap); return NULL; } @@ -883,7 +919,7 @@ void __init efi_enter_virtual_mode(void) { efi_status_t status; void *new_memmap = NULL; - int count = 0; + int err, count = 0; efi.systab = NULL; @@ -904,6 +940,10 @@ void __init efi_enter_virtual_mode(void) return; } + err = save_runtime_map(); + if (err) + pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n"); + BUG_ON(!efi.systab); efi_setup_page_tables(); diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index 3150aa4874e8..730f5f2e8b7f 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -39,4 +39,15 @@ config EFI_VARS_PSTORE_DEFAULT_DISABLE config UEFI_CPER def_bool n +config EFI_RUNTIME_MAP + bool "Export efi runtime maps to sysfs" + depends on X86 && EFI && KEXEC + default y + help + Export efi runtime memory maps to /sys/firmware/efi/runtime-map. + That memory map is used for example by kexec to set up efi virtual + mapping the 2nd kernel, but can also be used for debugging purposes. + + See also Documentation/ABI/testing/sysfs-firmware-efi-runtime-map. + endmenu diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index 9ba156d3c775..a58e0f183a08 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile @@ -5,3 +5,4 @@ obj-y += efi.o vars.o obj-$(CONFIG_EFI_VARS) += efivars.o obj-$(CONFIG_EFI_VARS_PSTORE) += efi-pstore.o obj-$(CONFIG_UEFI_CPER) += cper.o +obj-$(CONFIG_EFI_RUNTIME_MAP) += runtime-map.o diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 72533af72b98..4753bac65279 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -167,6 +167,10 @@ static int __init efisubsys_init(void) goto err_unregister; } + error = efi_runtime_map_init(efi_kobj); + if (error) + goto err_remove_group; + /* and the standard mountpoint for efivarfs */ efivars_kobj = kobject_create_and_add("efivars", efi_kobj); if (!efivars_kobj) { diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c new file mode 100644 index 000000000000..97cdd16a2169 --- /dev/null +++ b/drivers/firmware/efi/runtime-map.c @@ -0,0 +1,181 @@ +/* + * linux/drivers/efi/runtime-map.c + * Copyright (C) 2013 Red Hat, Inc., Dave Young + * + * This file is released under the GPLv2. + */ + +#include +#include +#include +#include +#include +#include + +#include + +static void *efi_runtime_map; +static int nr_efi_runtime_map; +static u32 efi_memdesc_size; + +struct efi_runtime_map_entry { + efi_memory_desc_t md; + struct kobject kobj; /* kobject for each entry */ +}; + +static struct efi_runtime_map_entry **map_entries; + +struct map_attribute { + struct attribute attr; + ssize_t (*show)(struct efi_runtime_map_entry *entry, char *buf); +}; + +static inline struct map_attribute *to_map_attr(struct attribute *attr) +{ + return container_of(attr, struct map_attribute, attr); +} + +static ssize_t type_show(struct efi_runtime_map_entry *entry, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "0x%x\n", entry->md.type); +} + +#define EFI_RUNTIME_FIELD(var) entry->md.var + +#define EFI_RUNTIME_U64_ATTR_SHOW(name) \ +static ssize_t name##_show(struct efi_runtime_map_entry *entry, char *buf) \ +{ \ + return snprintf(buf, PAGE_SIZE, "0x%llx\n", EFI_RUNTIME_FIELD(name)); \ +} + +EFI_RUNTIME_U64_ATTR_SHOW(phys_addr); +EFI_RUNTIME_U64_ATTR_SHOW(virt_addr); +EFI_RUNTIME_U64_ATTR_SHOW(num_pages); +EFI_RUNTIME_U64_ATTR_SHOW(attribute); + +static inline struct efi_runtime_map_entry *to_map_entry(struct kobject *kobj) +{ + return container_of(kobj, struct efi_runtime_map_entry, kobj); +} + +static ssize_t map_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct efi_runtime_map_entry *entry = to_map_entry(kobj); + struct map_attribute *map_attr = to_map_attr(attr); + + return map_attr->show(entry, buf); +} + +static struct map_attribute map_type_attr = __ATTR_RO(type); +static struct map_attribute map_phys_addr_attr = __ATTR_RO(phys_addr); +static struct map_attribute map_virt_addr_attr = __ATTR_RO(virt_addr); +static struct map_attribute map_num_pages_attr = __ATTR_RO(num_pages); +static struct map_attribute map_attribute_attr = __ATTR_RO(attribute); + +/* + * These are default attributes that are added for every memmap entry. + */ +static struct attribute *def_attrs[] = { + &map_type_attr.attr, + &map_phys_addr_attr.attr, + &map_virt_addr_attr.attr, + &map_num_pages_attr.attr, + &map_attribute_attr.attr, + NULL +}; + +static const struct sysfs_ops map_attr_ops = { + .show = map_attr_show, +}; + +static void map_release(struct kobject *kobj) +{ + struct efi_runtime_map_entry *entry; + + entry = to_map_entry(kobj); + kfree(entry); +} + +static struct kobj_type __refdata map_ktype = { + .sysfs_ops = &map_attr_ops, + .default_attrs = def_attrs, + .release = map_release, +}; + +static struct kset *map_kset; + +static struct efi_runtime_map_entry * +add_sysfs_runtime_map_entry(struct kobject *kobj, int nr) +{ + int ret; + struct efi_runtime_map_entry *entry; + + if (!map_kset) { + map_kset = kset_create_and_add("runtime-map", NULL, kobj); + if (!map_kset) + return ERR_PTR(-ENOMEM); + } + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + kset_unregister(map_kset); + return entry; + } + + memcpy(&entry->md, efi_runtime_map + nr * efi_memdesc_size, + sizeof(efi_memory_desc_t)); + + kobject_init(&entry->kobj, &map_ktype); + entry->kobj.kset = map_kset; + ret = kobject_add(&entry->kobj, NULL, "%d", nr); + if (ret) { + kobject_put(&entry->kobj); + kset_unregister(map_kset); + return ERR_PTR(ret); + } + + return entry; +} + +void efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size) +{ + efi_runtime_map = map; + nr_efi_runtime_map = nr_entries; + efi_memdesc_size = desc_size; +} + +int __init efi_runtime_map_init(struct kobject *efi_kobj) +{ + int i, j, ret = 0; + struct efi_runtime_map_entry *entry; + + if (!efi_runtime_map) + return 0; + + map_entries = kzalloc(nr_efi_runtime_map * sizeof(entry), GFP_KERNEL); + if (!map_entries) { + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < nr_efi_runtime_map; i++) { + entry = add_sysfs_runtime_map_entry(efi_kobj, i); + if (IS_ERR(entry)) { + ret = PTR_ERR(entry); + goto out_add_entry; + } + *(map_entries + i) = entry; + } + + return 0; +out_add_entry: + for (j = i - 1; j > 0; j--) { + entry = *(map_entries + j); + kobject_put(&entry->kobj); + } + if (map_kset) + kset_unregister(map_kset); +out: + return ret; +} diff --git a/include/linux/efi.h b/include/linux/efi.h index fb60b10b7bd9..e64540746c63 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -872,4 +872,17 @@ int efivars_sysfs_init(void); #endif /* CONFIG_EFI_VARS */ +#ifdef CONFIG_EFI_RUNTIME_MAP +int efi_runtime_map_init(struct kobject *); +void efi_runtime_map_setup(void *, int, u32); +#else +static inline int efi_runtime_map_init(struct kobject *kobj) +{ + return 0; +} + +static inline void +efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size) {} +#endif + #endif /* _LINUX_EFI_H */ -- cgit v1.2.3 From 456a29ddada79198c5965300e04103c40c481f62 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Fri, 20 Dec 2013 18:02:20 +0800 Subject: x86: Add xloadflags bit for EFI runtime support on kexec Old kexec-tools can not load new kernels. The reason is kexec-tools does not fill efi_info in x86 setup header previously, thus EFI failed to initialize. In new kexec-tools it will by default to fill efi_info and pass other EFI required infomation to 2nd kernel so kexec kernel EFI initialization can succeed finally. To prevent from breaking userspace, add a new xloadflags bit so kexec-tools can check the flag and switch to old logic. Signed-off-by: Dave Young Acked-by: Borislav Petkov Tested-by: Toshi Kani Signed-off-by: Matt Fleming --- Documentation/x86/boot.txt | 3 +++ arch/x86/boot/header.S | 9 ++++++++- arch/x86/include/uapi/asm/bootparam.h | 1 + 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index f4f268c2b826..cb81741d3b0b 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt @@ -608,6 +608,9 @@ Protocol: 2.12+ - If 1, the kernel supports the 64-bit EFI handoff entry point given at handover_offset + 0x200. + Bit 4 (read): XLF_EFI_KEXEC + - If 1, the kernel supports kexec EFI boot with EFI runtime support. + Field name: cmdline_size Type: read Offset/size: 0x238/4 diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 9ec06a1f6d61..ec3b8ba68096 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -391,7 +391,14 @@ xloadflags: #else # define XLF23 0 #endif - .word XLF0 | XLF1 | XLF23 + +#if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC) +# define XLF4 XLF_EFI_KEXEC +#else +# define XLF4 0 +#endif + + .word XLF0 | XLF1 | XLF23 | XLF4 cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, #added with boot protocol diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 64fe421aab65..225b0988043a 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -24,6 +24,7 @@ #define XLF_CAN_BE_LOADED_ABOVE_4G (1<<1) #define XLF_EFI_HANDOVER_32 (1<<2) #define XLF_EFI_HANDOVER_64 (1<<3) +#define XLF_EFI_KEXEC (1<<4) #ifndef __ASSEMBLY__ -- cgit v1.2.3 From 5039e316dde3fb71c79e95e97c5bca8e4724d8f2 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Fri, 20 Dec 2013 18:02:21 +0800 Subject: x86: Export x86 boot_params to sysfs kexec-tools use boot_params for getting the 1st kernel hardware_subarch, the kexec kernel EFI runtime support also needs to read the old efi_info from boot_params. Currently it exists in debugfs which is not a good place for such infomation. Per HPA, we should avoid "sploit debugfs". In this patch /sys/kernel/boot_params are exported, also the setup_data is exported as a subdirectory. kexec-tools is using debugfs for hardware_subarch for a long time now so we're not removing it yet. Structure is like below: /sys/kernel/boot_params |__ data /* boot_params in binary*/ |__ setup_data | |__ 0 /* the first setup_data node */ | | |__ data /* setup_data node 0 in binary*/ | | |__ type /* setup_data type of setup_data node 0, hex string */ [snip] |__ version /* boot protocal version (in hex, "0x" prefixed)*/ Signed-off-by: Dave Young Acked-by: Borislav Petkov Tested-by: Toshi Kani Signed-off-by: Matt Fleming --- Documentation/ABI/testing/sysfs-kernel-boot_params | 38 +++ arch/x86/kernel/Makefile | 1 + arch/x86/kernel/ksysfs.c | 339 +++++++++++++++++++++ 3 files changed, 378 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-kernel-boot_params create mode 100644 arch/x86/kernel/ksysfs.c (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-kernel-boot_params b/Documentation/ABI/testing/sysfs-kernel-boot_params new file mode 100644 index 000000000000..eca38ce2852d --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-boot_params @@ -0,0 +1,38 @@ +What: /sys/kernel/boot_params +Date: December 2013 +Contact: Dave Young +Description: The /sys/kernel/boot_params directory contains two + files: "data" and "version" and one subdirectory "setup_data". + It is used to export the kernel boot parameters of an x86 + platform to userspace for kexec and debugging purpose. + + If there's no setup_data in boot_params the subdirectory will + not be created. + + "data" file is the binary representation of struct boot_params. + + "version" file is the string representation of boot + protocol version. + + "setup_data" subdirectory contains the setup_data data + structure in boot_params. setup_data is maintained in kernel + as a link list. In "setup_data" subdirectory there's one + subdirectory for each link list node named with the number + of the list nodes. The list node subdirectory contains two + files "type" and "data". "type" file is the string + representation of setup_data type. "data" file is the binary + representation of setup_data payload. + + The whole boot_params directory structure is like below: + /sys/kernel/boot_params + |__ data + |__ setup_data + | |__ 0 + | | |__ data + | | |__ type + | |__ 1 + | |__ data + | |__ type + |__ version + +Users: Kexec diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9b0a34e2cd79..510cca5c5390 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-y += syscall_$(BITS).o obj-$(CONFIG_X86_64) += vsyscall_64.o obj-$(CONFIG_X86_64) += vsyscall_emu_64.o +obj-$(CONFIG_SYSFS) += ksysfs.o obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o topology.o kdebugfs.o obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c new file mode 100644 index 000000000000..eb53d153f307 --- /dev/null +++ b/arch/x86/kernel/ksysfs.c @@ -0,0 +1,339 @@ +/* + * Architecture specific sysfs attributes in /sys/kernel + * + * Copyright (C) 2007, Intel Corp. + * Huang Ying + * Copyright (C) 2013, 2013 Red Hat, Inc. + * Dave Young + * + * This file is released under the GPLv2 + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +static ssize_t version_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "0x%04x\n", boot_params.hdr.version); +} + +static struct kobj_attribute boot_params_version_attr = __ATTR_RO(version); + +static ssize_t boot_params_data_read(struct file *fp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + memcpy(buf, (void *)&boot_params + off, count); + return count; +} + +static struct bin_attribute boot_params_data_attr = { + .attr = { + .name = "data", + .mode = S_IRUGO, + }, + .read = boot_params_data_read, + .size = sizeof(boot_params), +}; + +static struct attribute *boot_params_version_attrs[] = { + &boot_params_version_attr.attr, + NULL, +}; + +static struct bin_attribute *boot_params_data_attrs[] = { + &boot_params_data_attr, + NULL, +}; + +static struct attribute_group boot_params_attr_group = { + .attrs = boot_params_version_attrs, + .bin_attrs = boot_params_data_attrs, +}; + +static int kobj_to_setup_data_nr(struct kobject *kobj, int *nr) +{ + const char *name; + + name = kobject_name(kobj); + return kstrtoint(name, 10, nr); +} + +static int get_setup_data_paddr(int nr, u64 *paddr) +{ + int i = 0; + struct setup_data *data; + u64 pa_data = boot_params.hdr.setup_data; + + while (pa_data) { + if (nr == i) { + *paddr = pa_data; + return 0; + } + data = ioremap_cache(pa_data, sizeof(*data)); + if (!data) + return -ENOMEM; + + pa_data = data->next; + iounmap(data); + i++; + } + return -EINVAL; +} + +static int __init get_setup_data_size(int nr, size_t *size) +{ + int i = 0; + struct setup_data *data; + u64 pa_data = boot_params.hdr.setup_data; + + while (pa_data) { + data = ioremap_cache(pa_data, sizeof(*data)); + if (!data) + return -ENOMEM; + if (nr == i) { + *size = data->len; + iounmap(data); + return 0; + } + + pa_data = data->next; + iounmap(data); + i++; + } + return -EINVAL; +} + +static ssize_t type_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + int nr, ret; + u64 paddr; + struct setup_data *data; + + ret = kobj_to_setup_data_nr(kobj, &nr); + if (ret) + return ret; + + ret = get_setup_data_paddr(nr, &paddr); + if (ret) + return ret; + data = ioremap_cache(paddr, sizeof(*data)); + if (!data) + return -ENOMEM; + + ret = sprintf(buf, "0x%x\n", data->type); + iounmap(data); + return ret; +} + +static ssize_t setup_data_data_read(struct file *fp, + struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, + loff_t off, size_t count) +{ + int nr, ret = 0; + u64 paddr; + struct setup_data *data; + void *p; + + ret = kobj_to_setup_data_nr(kobj, &nr); + if (ret) + return ret; + + ret = get_setup_data_paddr(nr, &paddr); + if (ret) + return ret; + data = ioremap_cache(paddr, sizeof(*data)); + if (!data) + return -ENOMEM; + + if (off > data->len) { + ret = -EINVAL; + goto out; + } + + if (count > data->len - off) + count = data->len - off; + + if (!count) + goto out; + + ret = count; + p = ioremap_cache(paddr + sizeof(*data), data->len); + if (!p) { + ret = -ENOMEM; + goto out; + } + memcpy(buf, p + off, count); + iounmap(p); +out: + iounmap(data); + return ret; +} + +static struct kobj_attribute type_attr = __ATTR_RO(type); + +static struct bin_attribute data_attr = { + .attr = { + .name = "data", + .mode = S_IRUGO, + }, + .read = setup_data_data_read, +}; + +static struct attribute *setup_data_type_attrs[] = { + &type_attr.attr, + NULL, +}; + +static struct bin_attribute *setup_data_data_attrs[] = { + &data_attr, + NULL, +}; + +static struct attribute_group setup_data_attr_group = { + .attrs = setup_data_type_attrs, + .bin_attrs = setup_data_data_attrs, +}; + +static int __init create_setup_data_node(struct kobject *parent, + struct kobject **kobjp, int nr) +{ + int ret = 0; + size_t size; + struct kobject *kobj; + char name[16]; /* should be enough for setup_data nodes numbers */ + snprintf(name, 16, "%d", nr); + + kobj = kobject_create_and_add(name, parent); + if (!kobj) + return -ENOMEM; + + ret = get_setup_data_size(nr, &size); + if (ret) + goto out_kobj; + + data_attr.size = size; + ret = sysfs_create_group(kobj, &setup_data_attr_group); + if (ret) + goto out_kobj; + *kobjp = kobj; + + return 0; +out_kobj: + kobject_put(kobj); + return ret; +} + +static void __init cleanup_setup_data_node(struct kobject *kobj) +{ + sysfs_remove_group(kobj, &setup_data_attr_group); + kobject_put(kobj); +} + +static int __init get_setup_data_total_num(u64 pa_data, int *nr) +{ + int ret = 0; + struct setup_data *data; + + *nr = 0; + while (pa_data) { + *nr += 1; + data = ioremap_cache(pa_data, sizeof(*data)); + if (!data) { + ret = -ENOMEM; + goto out; + } + pa_data = data->next; + iounmap(data); + } + +out: + return ret; +} + +static int __init create_setup_data_nodes(struct kobject *parent) +{ + struct kobject *setup_data_kobj, **kobjp; + u64 pa_data; + int i, j, nr, ret = 0; + + pa_data = boot_params.hdr.setup_data; + if (!pa_data) + return 0; + + setup_data_kobj = kobject_create_and_add("setup_data", parent); + if (!setup_data_kobj) { + ret = -ENOMEM; + goto out; + } + + ret = get_setup_data_total_num(pa_data, &nr); + if (ret) + goto out_setup_data_kobj; + + kobjp = kmalloc(sizeof(*kobjp) * nr, GFP_KERNEL); + if (!kobjp) { + ret = -ENOMEM; + goto out_setup_data_kobj; + } + + for (i = 0; i < nr; i++) { + ret = create_setup_data_node(setup_data_kobj, kobjp + i, i); + if (ret) + goto out_clean_nodes; + } + + kfree(kobjp); + return 0; + +out_clean_nodes: + for (j = i - 1; j > 0; j--) + cleanup_setup_data_node(*(kobjp + j)); + kfree(kobjp); +out_setup_data_kobj: + kobject_put(setup_data_kobj); +out: + return ret; +} + +static int __init boot_params_ksysfs_init(void) +{ + int ret; + struct kobject *boot_params_kobj; + + boot_params_kobj = kobject_create_and_add("boot_params", + kernel_kobj); + if (!boot_params_kobj) { + ret = -ENOMEM; + goto out; + } + + ret = sysfs_create_group(boot_params_kobj, &boot_params_attr_group); + if (ret) + goto out_boot_params_kobj; + + ret = create_setup_data_nodes(boot_params_kobj); + if (ret) + goto out_create_group; + + return 0; +out_create_group: + sysfs_remove_group(boot_params_kobj, &boot_params_attr_group); +out_boot_params_kobj: + kobject_put(boot_params_kobj); +out: + return ret; +} + +arch_initcall(boot_params_ksysfs_init); -- cgit v1.2.3