diff options
25 files changed, 273 insertions, 331 deletions
diff --git a/Documentation/arch/x86/boot.rst b/Documentation/arch/x86/boot.rst index c513855a54bb..4fd492cb4970 100644 --- a/Documentation/arch/x86/boot.rst +++ b/Documentation/arch/x86/boot.rst @@ -878,7 +878,8 @@ Protocol: 2.10+ address if possible. A non-relocatable kernel will unconditionally move itself and to run - at this address. + at this address. A relocatable kernel will move itself to this address if it + loaded below this address. ============ ======= Field name: init_size diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 77e8c8c67950..e88f6f7b6b41 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2111,11 +2111,11 @@ config PHYSICAL_START help This gives the physical address where the kernel is loaded. - If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then - bzImage will decompress itself to above physical address and - run from there. Otherwise, bzImage will run from the address where - it has been loaded by the boot loader and will ignore above physical - address. + If the kernel is not relocatable (CONFIG_RELOCATABLE=n) then bzImage + will decompress itself to above physical address and run from there. + Otherwise, bzImage will run from the address where it has been loaded + by the boot loader. The only exception is if it is loaded below the + above physical address, in which case it will relocate itself there. In normal kdump cases one does not have to set/change this option as now bzImage can be compiled as a completely relocatable image diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index ff09ca6dbb87..909f2a35b60c 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -389,5 +389,5 @@ void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code) void do_boot_nmi_trap(struct pt_regs *regs, unsigned long error_code) { - /* Empty handler to ignore NMI during early boot */ + spurious_nmi_count++; } diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index b99e08e6815b..408507e305be 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -52,6 +52,7 @@ struct port_io_ops pio_ops; memptr free_mem_ptr; memptr free_mem_end_ptr; +int spurious_nmi_count; static char *vidmem; static int vidport; @@ -164,21 +165,34 @@ void __putstr(const char *s) outb(0xff & (pos >> 1), vidport+1); } -void __puthex(unsigned long value) +static noinline void __putnum(unsigned long value, unsigned int base, + int mindig) { - char alpha[2] = "0"; - int bits; + char buf[8*sizeof(value)+1]; + char *p; - for (bits = sizeof(value) * 8 - 4; bits >= 0; bits -= 4) { - unsigned long digit = (value >> bits) & 0xf; + p = buf + sizeof(buf); + *--p = '\0'; - if (digit < 0xA) - alpha[0] = '0' + digit; - else - alpha[0] = 'a' + (digit - 0xA); + while (mindig-- > 0 || value) { + unsigned char digit = value % base; + digit += (digit >= 10) ? ('a'-10) : '0'; + *--p = digit; - __putstr(alpha); + value /= base; } + + __putstr(p); +} + +void __puthex(unsigned long value) +{ + __putnum(value, 16, sizeof(value)*2); +} + +void __putdec(unsigned long value) +{ + __putnum(value, 10, 1); } #ifdef CONFIG_X86_NEED_RELOCS @@ -358,6 +372,19 @@ unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, } /* + * Set the memory encryption xloadflag based on the mem_encrypt= command line + * parameter, if provided. + */ +static void parse_mem_encrypt(struct setup_header *hdr) +{ + int on = cmdline_find_option_bool("mem_encrypt=on"); + int off = cmdline_find_option_bool("mem_encrypt=off"); + + if (on > off) + hdr->xloadflags |= XLF_MEM_ENCRYPTION; +} + +/* * The compressed kernel image (ZO), has been moved so that its position * is against the end of the buffer used to hold the uncompressed kernel * image (VO) and the execution environment (.bss, .brk), which makes sure @@ -387,6 +414,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) /* Clear flags intended for solely in-kernel use. */ boot_params_ptr->hdr.loadflags &= ~KASLR_FLAG; + parse_mem_encrypt(&boot_params_ptr->hdr); + sanitize_boot_params(boot_params_ptr); if (boot_params_ptr->screen_info.orig_video_mode == 7) { @@ -493,6 +522,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) /* Disable exception handling before booting the kernel */ cleanup_exception_handling(); + if (spurious_nmi_count) { + error_putstr("Spurious early NMIs ignored: "); + error_putdec(spurious_nmi_count); + error_putstr("\n"); + } + return output + entry_offset; } diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index bc2f0f17fb90..b353a7be380c 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -59,12 +59,15 @@ extern char _head[], _end[]; /* misc.c */ extern memptr free_mem_ptr; extern memptr free_mem_end_ptr; +extern int spurious_nmi_count; void *malloc(int size); void free(void *where); void __putstr(const char *s); void __puthex(unsigned long value); +void __putdec(unsigned long value); #define error_putstr(__x) __putstr(__x) #define error_puthex(__x) __puthex(__x) +#define error_putdec(__x) __putdec(__x) #ifdef CONFIG_X86_VERBOSE_BOOTUP diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 97561eabfbef..ec71846d28c9 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -117,6 +117,9 @@ static bool fault_in_kernel_space(unsigned long address) #undef __init #define __init +#undef __head +#define __head + #define __BOOT_COMPRESSED /* Basic instruction decoding support needed */ diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index a1bbedd989e4..b5c79f43359b 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -111,11 +111,7 @@ extra_header_fields: .long salign # SizeOfHeaders .long 0 # CheckSum .word IMAGE_SUBSYSTEM_EFI_APPLICATION # Subsystem (EFI application) -#ifdef CONFIG_EFI_DXE_MEM_ATTRIBUTES .word IMAGE_DLL_CHARACTERISTICS_NX_COMPAT # DllCharacteristics -#else - .word 0 # DllCharacteristics -#endif #ifdef CONFIG_X86_32 .long 0 # SizeOfStackReserve .long 0 # SizeOfStackCommit diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index ec95fe44fa3a..62dc9f59ea76 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -46,6 +46,7 @@ struct gdt_page { } __attribute__((aligned(PAGE_SIZE))); DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page); +DECLARE_INIT_PER_CPU(gdt_page); /* Provide the original GDT */ static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu) diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index b31eb9fd5954..f922b682b9b4 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -47,8 +47,8 @@ void __init sme_unmap_bootdata(char *real_mode_data); void __init sme_early_init(void); -void __init sme_encrypt_kernel(struct boot_params *bp); -void __init sme_enable(struct boot_params *bp); +void sme_encrypt_kernel(struct boot_params *bp); +void sme_enable(struct boot_params *bp); int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size); int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); @@ -81,8 +81,8 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { } static inline void __init sme_early_init(void) { } -static inline void __init sme_encrypt_kernel(struct boot_params *bp) { } -static inline void __init sme_enable(struct boot_params *bp) { } +static inline void sme_encrypt_kernel(struct boot_params *bp) { } +static inline void sme_enable(struct boot_params *bp) { } static inline void sev_es_init_vc_handling(void) { } diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 38b54b992f32..9053dfe9fa03 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -21,9 +21,9 @@ typedef unsigned long pgprotval_t; typedef struct { pteval_t pte; } pte_t; typedef struct { pmdval_t pmd; } pmd_t; -#ifdef CONFIG_X86_5LEVEL extern unsigned int __pgtable_l5_enabled; +#ifdef CONFIG_X86_5LEVEL #ifdef USE_EARLY_PGTABLE_L5 /* * cpu_feature_enabled() is not available in early boot code. diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 5c83729c8e71..e61e68d71cba 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -48,7 +48,7 @@ extern unsigned long saved_video_mode; extern void reserve_standard_io_resources(void); extern void i386_reserve_resources(void); extern unsigned long __startup_64(unsigned long physaddr, struct boot_params *bp); -extern void startup_64_setup_env(unsigned long physbase); +extern void startup_64_setup_gdt_idt(void); extern void early_setup_idt(void); extern void __init do_early_exception(struct pt_regs *regs, int trapnr); diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 10f9f1b259c3..9477b4053bce 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -214,16 +214,16 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) struct snp_guest_request_ioctl; void setup_ghcb(void); -void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, - unsigned long npages); -void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, - unsigned long npages); +void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, + unsigned long npages); +void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, + unsigned long npages); void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op); void snp_set_memory_shared(unsigned long vaddr, unsigned long npages); void snp_set_memory_private(unsigned long vaddr, unsigned long npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); -void __init __noreturn snp_abort(void); +void __noreturn snp_abort(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); void snp_accept_memory(phys_addr_t start, phys_addr_t end); u64 snp_get_unsupported_features(u64 status); diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 4a38e7917756..9b82eebd7add 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -24,6 +24,7 @@ #define XLF_EFI_KEXEC (1<<4) #define XLF_5LEVEL (1<<5) #define XLF_5LEVEL_ENABLED (1<<6) +#define XLF_MEM_ENCRYPTION (1<<7) #ifndef __ASSEMBLY__ diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index dc0956067944..212e8e06aeba 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -22,6 +22,8 @@ #include <linux/cc_platform.h> #include <linux/pgtable.h> +#include <asm/asm.h> +#include <asm/page_64.h> #include <asm/processor.h> #include <asm/proto.h> #include <asm/smp.h> @@ -67,42 +69,11 @@ unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4; EXPORT_SYMBOL(vmemmap_base); #endif -/* - * GDT used on the boot CPU before switching to virtual addresses. - */ -static struct desc_struct startup_gdt[GDT_ENTRIES] __initdata = { - [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(DESC_CODE32, 0, 0xfffff), - [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(DESC_CODE64, 0, 0xfffff), - [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(DESC_DATA64, 0, 0xfffff), -}; - -/* - * Address needs to be set at runtime because it references the startup_gdt - * while the kernel still uses a direct mapping. - */ -static struct desc_ptr startup_gdt_descr __initdata = { - .size = sizeof(startup_gdt)-1, - .address = 0, -}; - -static void __head *fixup_pointer(void *ptr, unsigned long physaddr) -{ - return ptr - (void *)_text + (void *)physaddr; -} - -static unsigned long __head *fixup_long(void *ptr, unsigned long physaddr) -{ - return fixup_pointer(ptr, physaddr); -} - -#ifdef CONFIG_X86_5LEVEL -static unsigned int __head *fixup_int(void *ptr, unsigned long physaddr) +static inline bool check_la57_support(void) { - return fixup_pointer(ptr, physaddr); -} + if (!IS_ENABLED(CONFIG_X86_5LEVEL)) + return false; -static bool __head check_la57_support(unsigned long physaddr) -{ /* * 5-level paging is detected and enabled at kernel decompression * stage. Only check if it has been enabled there. @@ -110,21 +81,8 @@ static bool __head check_la57_support(unsigned long physaddr) if (!(native_read_cr4() & X86_CR4_LA57)) return false; - *fixup_int(&__pgtable_l5_enabled, physaddr) = 1; - *fixup_int(&pgdir_shift, physaddr) = 48; - *fixup_int(&ptrs_per_p4d, physaddr) = 512; - *fixup_long(&page_offset_base, physaddr) = __PAGE_OFFSET_BASE_L5; - *fixup_long(&vmalloc_base, physaddr) = __VMALLOC_BASE_L5; - *fixup_long(&vmemmap_base, physaddr) = __VMEMMAP_BASE_L5; - return true; } -#else -static bool __head check_la57_support(unsigned long physaddr) -{ - return false; -} -#endif static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdval_t *pmd) { @@ -173,23 +131,22 @@ static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdv * doesn't have to generate PC-relative relocations when accessing globals from * that function. Clang actually does not generate them, which leads to * boot-time crashes. To work around this problem, every global pointer must - * be adjusted using fixup_pointer(). + * be accessed using RIP_REL_REF(). */ unsigned long __head __startup_64(unsigned long physaddr, struct boot_params *bp) { - unsigned long load_delta, *p; + pmd_t (*early_pgts)[PTRS_PER_PMD] = RIP_REL_REF(early_dynamic_pgts); unsigned long pgtable_flags; + unsigned long load_delta; pgdval_t *pgd; p4dval_t *p4d; pudval_t *pud; pmdval_t *pmd, pmd_entry; - pteval_t *mask_ptr; bool la57; int i; - unsigned int *next_pgt_ptr; - la57 = check_la57_support(physaddr); + la57 = check_la57_support(); /* Is the address too large? */ if (physaddr >> MAX_PHYSMEM_BITS) @@ -200,6 +157,7 @@ unsigned long __head __startup_64(unsigned long physaddr, * and the address I am actually running at. */ load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map); + RIP_REL_REF(phys_base) = load_delta; /* Is the address not 2M aligned? */ if (load_delta & ~PMD_MASK) @@ -210,26 +168,21 @@ unsigned long __head __startup_64(unsigned long physaddr, /* Fixup the physical addresses in the page table */ - pgd = fixup_pointer(early_top_pgt, physaddr); - p = pgd + pgd_index(__START_KERNEL_map); - if (la57) - *p = (unsigned long)level4_kernel_pgt; - else - *p = (unsigned long)level3_kernel_pgt; - *p += _PAGE_TABLE_NOENC - __START_KERNEL_map + load_delta; + pgd = &RIP_REL_REF(early_top_pgt)->pgd; + pgd[pgd_index(__START_KERNEL_map)] += load_delta; if (la57) { - p4d = fixup_pointer(level4_kernel_pgt, physaddr); - p4d[511] += load_delta; + p4d = (p4dval_t *)&RIP_REL_REF(level4_kernel_pgt); + p4d[MAX_PTRS_PER_P4D - 1] += load_delta; + + pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE_NOENC; } - pud = fixup_pointer(level3_kernel_pgt, physaddr); - pud[510] += load_delta; - pud[511] += load_delta; + RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 2].pud += load_delta; + RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 1].pud += load_delta; - pmd = fixup_pointer(level2_fixmap_pgt, physaddr); for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--) - pmd[i] += load_delta; + RIP_REL_REF(level2_fixmap_pgt)[i].pmd += load_delta; /* * Set up the identity mapping for the switchover. These @@ -238,15 +191,14 @@ unsigned long __head __startup_64(unsigned long physaddr, * it avoids problems around wraparound. */ - next_pgt_ptr = fixup_pointer(&next_early_pgt, physaddr); - pud = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr); - pmd = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr); + pud = &early_pgts[0]->pmd; + pmd = &early_pgts[1]->pmd; + RIP_REL_REF(next_early_pgt) = 2; pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); if (la57) { - p4d = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], - physaddr); + p4d = &early_pgts[RIP_REL_REF(next_early_pgt)++]->pmd; i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; pgd[i + 0] = (pgdval_t)p4d + pgtable_flags; @@ -267,8 +219,7 @@ unsigned long __head __startup_64(unsigned long physaddr, pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; /* Filter out unsupported __PAGE_KERNEL_* bits: */ - mask_ptr = fixup_pointer(&__supported_pte_mask, physaddr); - pmd_entry &= *mask_ptr; + pmd_entry &= RIP_REL_REF(__supported_pte_mask); pmd_entry += sme_get_me_mask(); pmd_entry += physaddr; @@ -294,7 +245,7 @@ unsigned long __head __startup_64(unsigned long physaddr, * error, causing the BIOS to halt the system. */ - pmd = fixup_pointer(level2_kernel_pgt, physaddr); + pmd = &RIP_REL_REF(level2_kernel_pgt)->pmd; /* invalidate pages before the kernel image */ for (i = 0; i < pmd_index((unsigned long)_text); i++) @@ -309,12 +260,6 @@ unsigned long __head __startup_64(unsigned long physaddr, for (; i < PTRS_PER_PMD; i++) pmd[i] &= ~_PAGE_PRESENT; - /* - * Fixup phys_base - remove the memory encryption mask to obtain - * the true physical address. - */ - *fixup_long(&phys_base, physaddr) += load_delta - sme_get_me_mask(); - return sme_postprocess_startup(bp, pmd); } @@ -486,6 +431,15 @@ asmlinkage __visible void __init __noreturn x86_64_start_kernel(char * real_mode (__START_KERNEL & PGDIR_MASK))); BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); + if (check_la57_support()) { + __pgtable_l5_enabled = 1; + pgdir_shift = 48; + ptrs_per_p4d = 512; + page_offset_base = __PAGE_OFFSET_BASE_L5; + vmalloc_base = __VMALLOC_BASE_L5; + vmemmap_base = __VMEMMAP_BASE_L5; + } + cr4_init_shadow(); /* Kill off the identity-map trampoline */ @@ -569,62 +523,52 @@ void __init __noreturn x86_64_start_reservations(char *real_mode_data) */ static gate_desc bringup_idt_table[NUM_EXCEPTION_VECTORS] __page_aligned_data; -static struct desc_ptr bringup_idt_descr = { - .size = (NUM_EXCEPTION_VECTORS * sizeof(gate_desc)) - 1, - .address = 0, /* Set at runtime */ -}; - -static void set_bringup_idt_handler(gate_desc *idt, int n, void *handler) +/* This may run while still in the direct mapping */ +static void __head startup_64_load_idt(void *vc_handler) { -#ifdef CONFIG_AMD_MEM_ENCRYPT + struct desc_ptr desc = { + .address = (unsigned long)&RIP_REL_REF(bringup_idt_table), + .size = sizeof(bringup_idt_table) - 1, + }; struct idt_data data; - gate_desc desc; - - init_idt_data(&data, n, handler); - idt_init_desc(&desc, &data); - native_write_idt_entry(idt, n, &desc); -#endif -} + gate_desc idt_desc; -/* This runs while still in the direct mapping */ -static void __head startup_64_load_idt(unsigned long physbase) -{ - struct desc_ptr *desc = fixup_pointer(&bringup_idt_descr, physbase); - gate_desc *idt = fixup_pointer(bringup_idt_table, physbase); - - - if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) { - void *handler; - - /* VMM Communication Exception */ - handler = fixup_pointer(vc_no_ghcb, physbase); - set_bringup_idt_handler(idt, X86_TRAP_VC, handler); + /* @vc_handler is set only for a VMM Communication Exception */ + if (vc_handler) { + init_idt_data(&data, X86_TRAP_VC, vc_handler); + idt_init_desc(&idt_desc, &data); + native_write_idt_entry((gate_desc *)desc.address, X86_TRAP_VC, &idt_desc); } - desc->address = (unsigned long)idt; - native_load_idt(desc); + native_load_idt(&desc); } /* This is used when running on kernel addresses */ void early_setup_idt(void) { - /* VMM Communication Exception */ + void *handler = NULL; + if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) { setup_ghcb(); - set_bringup_idt_handler(bringup_idt_table, X86_TRAP_VC, vc_boot_ghcb); + handler = vc_boot_ghcb; } - bringup_idt_descr.address = (unsigned long)bringup_idt_table; - native_load_idt(&bringup_idt_descr); + startup_64_load_idt(handler); } /* * Setup boot CPU state needed before kernel switches to virtual addresses. */ -void __head startup_64_setup_env(unsigned long physbase) +void __head startup_64_setup_gdt_idt(void) { + void *handler = NULL; + + struct desc_ptr startup_gdt_descr = { + .address = (unsigned long)&RIP_REL_REF(init_per_cpu_var(gdt_page.gdt)), + .size = GDT_SIZE - 1, + }; + /* Load GDT */ - startup_gdt_descr.address = (unsigned long)fixup_pointer(startup_gdt, physbase); native_load_gdt(&startup_gdt_descr); /* New GDT is live - reload data segment registers */ @@ -632,5 +576,8 @@ void __head startup_64_setup_env(unsigned long physbase) "movl %%eax, %%ss\n" "movl %%eax, %%es\n" : : "a"(__KERNEL_DS) : "memory"); - startup_64_load_idt(physbase); + if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) + handler = &RIP_REL_REF(vc_no_ghcb); + + startup_64_load_idt(handler); } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 3dbd05f93859..d8198fbd70e5 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -40,7 +40,6 @@ L4_START_KERNEL = l4_index(__START_KERNEL_map) L3_START_KERNEL = pud_index(__START_KERNEL_map) - .text __HEAD .code64 SYM_CODE_START_NOALIGN(startup_64) @@ -69,8 +68,6 @@ SYM_CODE_START_NOALIGN(startup_64) /* Set up the stack for verify_cpu() */ leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp - leaq _text(%rip), %rdi - /* Setup GSBASE to allow stack canary access for C code */ movl $MSR_GS_BASE, %ecx leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx @@ -78,7 +75,7 @@ SYM_CODE_START_NOALIGN(startup_64) shrq $32, %rdx wrmsr - call startup_64_setup_env + call startup_64_setup_gdt_idt /* Now switch to __KERNEL_CS so IRET works reliably */ pushq $__KERNEL_CS @@ -114,13 +111,11 @@ SYM_CODE_START_NOALIGN(startup_64) call __startup_64 /* Form the CR3 value being sure to include the CR3 modifier */ - addq $(early_top_pgt - __START_KERNEL_map), %rax + leaq early_top_pgt(%rip), %rcx + addq %rcx, %rax #ifdef CONFIG_AMD_MEM_ENCRYPT mov %rax, %rdi - mov %rax, %r14 - - addq phys_base(%rip), %rdi /* * For SEV guests: Verify that the C-bit is correct. A malicious @@ -129,17 +124,23 @@ SYM_CODE_START_NOALIGN(startup_64) * the next RET instruction. */ call sev_verify_cbit +#endif /* - * Restore CR3 value without the phys_base which will be added - * below, before writing %cr3. + * Switch to early_top_pgt which still has the identity mappings + * present. */ - mov %r14, %rax -#endif + movq %rax, %cr3 - jmp 1f + /* Branch to the common startup code at its kernel virtual address */ + ANNOTATE_RETPOLINE_SAFE + jmp *0f(%rip) SYM_CODE_END(startup_64) + __INITRODATA +0: .quad common_startup_64 + + .text SYM_CODE_START(secondary_startup_64) UNWIND_HINT_END_OF_STACK ANNOTATE_NOENDBR @@ -172,22 +173,39 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) ANNOTATE_NOENDBR /* Clear %R15 which holds the boot_params pointer on the boot CPU */ - xorq %r15, %r15 + xorl %r15d, %r15d + + /* Derive the runtime physical address of init_top_pgt[] */ + movq phys_base(%rip), %rax + addq $(init_top_pgt - __START_KERNEL_map), %rax /* * Retrieve the modifier (SME encryption mask if SME is active) to be * added to the initial pgdir entry that will be programmed into CR3. */ #ifdef CONFIG_AMD_MEM_ENCRYPT - movq sme_me_mask, %rax -#else - xorq %rax, %rax + addq sme_me_mask(%rip), %rax #endif + /* + * Switch to the init_top_pgt here, away from the trampoline_pgd and + * unmap the identity mapped ranges. + */ + movq %rax, %cr3 - /* Form the CR3 value being sure to include the CR3 modifier */ - addq $(init_top_pgt - __START_KERNEL_map), %rax -1: +SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL) + UNWIND_HINT_END_OF_STACK + ANNOTATE_NOENDBR + /* + * Create a mask of CR4 bits to preserve. Omit PGE in order to flush + * global 1:1 translations from the TLBs. + * + * From the SDM: + * "If CR4.PGE is changing from 0 to 1, there were no global TLB + * entries before the execution; if CR4.PGE is changing from 1 to 0, + * there will be no global TLB entries after the execution." + */ + movl $(X86_CR4_PAE | X86_CR4_LA57), %edx #ifdef CONFIG_X86_MCE /* * Preserve CR4.MCE if the kernel will enable #MC support. @@ -196,52 +214,20 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) * configured will crash the system regardless of the CR4.MCE value set * here. */ - movq %cr4, %rcx - andl $X86_CR4_MCE, %ecx -#else - movl $0, %ecx + orl $X86_CR4_MCE, %edx #endif + movq %cr4, %rcx + andl %edx, %ecx - /* Enable PAE mode, PSE, PGE and LA57 */ - orl $(X86_CR4_PAE | X86_CR4_PSE | X86_CR4_PGE), %ecx -#ifdef CONFIG_X86_5LEVEL - testb $1, __pgtable_l5_enabled(%rip) - jz 1f - orl $X86_CR4_LA57, %ecx -1: -#endif + /* Even if ignored in long mode, set PSE uniformly on all logical CPUs. */ + btsl $X86_CR4_PSE_BIT, %ecx movq %rcx, %cr4 - /* Setup early boot stage 4-/5-level pagetables. */ - addq phys_base(%rip), %rax - /* - * Switch to new page-table - * - * For the boot CPU this switches to early_top_pgt which still has the - * identity mappings present. The secondary CPUs will switch to the - * init_top_pgt here, away from the trampoline_pgd and unmap the - * identity mapped ranges. + * Set CR4.PGE to re-enable global translations. */ - movq %rax, %cr3 - - /* - * Do a global TLB flush after the CR3 switch to make sure the TLB - * entries from the identity mapping are flushed. - */ - movq %cr4, %rcx - movq %rcx, %rax - xorq $X86_CR4_PGE, %rcx + btsl $X86_CR4_PGE_BIT, %ecx movq %rcx, %cr4 - movq %rax, %cr4 - - /* Ensure I am executing from virtual addresses */ - movq $1f, %rax - ANNOTATE_RETPOLINE_SAFE - jmp *%rax -1: - UNWIND_HINT_END_OF_STACK - ANNOTATE_NOENDBR // above #ifdef CONFIG_SMP /* @@ -298,7 +284,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) .Llookup_AP: /* EAX contains the APIC ID of the current CPU */ - xorq %rcx, %rcx + xorl %ecx, %ecx leaq cpuid_to_apicid(%rip), %rbx .Lfind_cpunr: @@ -429,39 +415,10 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) movq %r15, %rdi .Ljump_to_C_code: - /* - * Jump to run C code and to be on a real kernel address. - * Since we are running on identity-mapped space we have to jump - * to the full 64bit address, this is only possible as indirect - * jump. In addition we need to ensure %cs is set so we make this - * a far return. - * - * Note: do not change to far jump indirect with 64bit offset. - * - * AMD does not support far jump indirect with 64bit offset. - * AMD64 Architecture Programmer's Manual, Volume 3: states only - * JMP FAR mem16:16 FF /5 Far jump indirect, - * with the target specified by a far pointer in memory. - * JMP FAR mem16:32 FF /5 Far jump indirect, - * with the target specified by a far pointer in memory. - * - * Intel64 does support 64bit offset. - * Software Developer Manual Vol 2: states: - * FF /5 JMP m16:16 Jump far, absolute indirect, - * address given in m16:16 - * FF /5 JMP m16:32 Jump far, absolute indirect, - * address given in m16:32. - * REX.W + FF /5 JMP m16:64 Jump far, absolute indirect, - * address given in m16:64. - */ - pushq $.Lafter_lret # put return address on stack for unwinder xorl %ebp, %ebp # clear frame pointer - movq initial_code(%rip), %rax - pushq $__KERNEL_CS # set correct cs - pushq %rax # target address in negative space - lretq -.Lafter_lret: - ANNOTATE_NOENDBR + ANNOTATE_RETPOLINE_SAFE + callq *initial_code(%rip) + ud2 SYM_CODE_END(secondary_startup_64) #include "verify_cpu.S" @@ -656,7 +613,8 @@ SYM_CODE_END(vc_no_ghcb) .balign 4 SYM_DATA_START_PTI_ALIGNED(early_top_pgt) - .fill 512,8,0 + .fill 511,8,0 + .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC .fill PTI_USER_PGD_FILL,8,0 SYM_DATA_END(early_top_pgt) diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 2a422e00ed4b..cde167b0ea92 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -503,7 +503,10 @@ static void *bzImage64_load(struct kimage *image, char *kernel, kbuf.bufsz = kernel_len - kern16_size; kbuf.memsz = PAGE_ALIGN(header->init_size); kbuf.buf_align = header->kernel_alignment; - kbuf.buf_min = MIN_KERNEL_LOAD_ADDR; + if (header->pref_address < MIN_KERNEL_LOAD_ADDR) + kbuf.buf_min = MIN_KERNEL_LOAD_ADDR; + else + kbuf.buf_min = header->pref_address; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; ret = kexec_add_buffer(&kbuf); if (ret) diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index a200bd72fadc..8b04958da5e7 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -95,7 +95,8 @@ static bool __init sev_es_check_cpu_features(void) return true; } -static void __noreturn sev_es_terminate(unsigned int set, unsigned int reason) +static void __head __noreturn +sev_es_terminate(unsigned int set, unsigned int reason) { u64 val = GHCB_MSR_TERM_REQ; @@ -332,13 +333,7 @@ static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid */ static const struct snp_cpuid_table *snp_cpuid_get_table(void) { - void *ptr; - - asm ("lea cpuid_table_copy(%%rip), %0" - : "=r" (ptr) - : "p" (&cpuid_table_copy)); - - return ptr; + return &RIP_REL_REF(cpuid_table_copy); } /* @@ -397,7 +392,7 @@ static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted) return xsave_size; } -static bool +static bool __head snp_cpuid_get_validated_func(struct cpuid_leaf *leaf) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); @@ -534,7 +529,8 @@ static int snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt, * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value * should be treated as fatal by caller. */ -static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) +static int __head +snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); @@ -576,7 +572,7 @@ static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_le * page yet, so it only supports the MSR based communication with the * hypervisor and only the CPUID exit-code. */ -void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) +void __head do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) { unsigned int subfn = lower_bits(regs->cx, 32); unsigned int fn = lower_bits(regs->ax, 32); @@ -1027,7 +1023,8 @@ struct cc_setup_data { * Search for a Confidential Computing blob passed in as a setup_data entry * via the Linux Boot Protocol. */ -static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) +static __head +struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) { struct cc_setup_data *sd = NULL; struct setup_data *hdr; @@ -1054,7 +1051,7 @@ static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) * mapping needs to be updated in sync with all the changes to virtual memory * layout and related mapping facilities throughout the boot process. */ -static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info) +static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info) { const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table; int i; diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 7d242898852f..b59b09c2f284 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -25,6 +25,7 @@ #include <linux/psp-sev.h> #include <uapi/linux/sev-guest.h> +#include <asm/init.h> #include <asm/cpu_entry_area.h> #include <asm/stacktrace.h> #include <asm/sev.h> @@ -701,8 +702,9 @@ static u64 __init get_jump_table_addr(void) return ret; } -static void early_set_pages_state(unsigned long vaddr, unsigned long paddr, - unsigned long npages, enum psc_op op) +static void __head +early_set_pages_state(unsigned long vaddr, unsigned long paddr, + unsigned long npages, enum psc_op op) { unsigned long paddr_end; u64 val; @@ -758,7 +760,7 @@ e_term: sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); } -void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, +void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { /* @@ -2081,7 +2083,7 @@ fail: * * Scan for the blob in that order. */ -static __init struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) +static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) { struct cc_blob_sev_info *cc_info; @@ -2107,7 +2109,7 @@ found_cc_info: return cc_info; } -bool __init snp_init(struct boot_params *bp) +bool __head snp_init(struct boot_params *bp) { struct cc_blob_sev_info *cc_info; @@ -2129,7 +2131,7 @@ bool __init snp_init(struct boot_params *bp) return true; } -void __init __noreturn snp_abort(void) +void __head __noreturn snp_abort(void) { sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); } diff --git a/arch/x86/kernel/sev_verify_cbit.S b/arch/x86/kernel/sev_verify_cbit.S index 3355e27c69eb..1ab65f6c6ae7 100644 --- a/arch/x86/kernel/sev_verify_cbit.S +++ b/arch/x86/kernel/sev_verify_cbit.S @@ -77,7 +77,7 @@ SYM_FUNC_START(sev_verify_cbit) * The check failed, prevent any forward progress to prevent ROP * attacks, invalidate the stack and go into a hlt loop. */ - xorq %rsp, %rsp + xorl %esp, %esp subq $0x1000, %rsp 2: hlt jmp 2b diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 72cc9c90e9f3..6da73513f026 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -14,19 +14,6 @@ ifdef CONFIG_KCSAN CFLAGS_REMOVE_delay.o = $(CC_FLAGS_FTRACE) endif -# Early boot use of cmdline; don't instrument it -ifdef CONFIG_AMD_MEM_ENCRYPT -KCOV_INSTRUMENT_cmdline.o := n -KASAN_SANITIZE_cmdline.o := n -KCSAN_SANITIZE_cmdline.o := n - -ifdef CONFIG_FUNCTION_TRACER -CFLAGS_REMOVE_cmdline.o = -pg -endif - -CFLAGS_cmdline.o := -fno-stack-protector -fno-jump-tables -endif - inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt quiet_cmd_inat_tables = GEN $@ diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 0166ab1780cc..64b5005d49e5 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -41,9 +41,9 @@ #include <linux/mem_encrypt.h> #include <linux/cc_platform.h> +#include <asm/init.h> #include <asm/setup.h> #include <asm/sections.h> -#include <asm/cmdline.h> #include <asm/coco.h> #include <asm/sev.h> @@ -95,10 +95,7 @@ struct sme_populate_pgd_data { */ static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch"); -static char sme_cmdline_arg[] __initdata = "mem_encrypt"; -static char sme_cmdline_on[] __initdata = "on"; - -static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) +static void __head sme_clear_pgd(struct sme_populate_pgd_data *ppd) { unsigned long pgd_start, pgd_end, pgd_size; pgd_t *pgd_p; @@ -113,7 +110,7 @@ static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) memset(pgd_p, 0, pgd_size); } -static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) +static pud_t __head *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) { pgd_t *pgd; p4d_t *p4d; @@ -150,7 +147,7 @@ static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) return pud; } -static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) +static void __head sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) { pud_t *pud; pmd_t *pmd; @@ -166,7 +163,7 @@ static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags)); } -static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) +static void __head sme_populate_pgd(struct sme_populate_pgd_data *ppd) { pud_t *pud; pmd_t *pmd; @@ -192,7 +189,7 @@ static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) set_pte(pte, __pte(ppd->paddr | ppd->pte_flags)); } -static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) +static void __head __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) { while (ppd->vaddr < ppd->vaddr_end) { sme_populate_pgd_large(ppd); @@ -202,7 +199,7 @@ static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) } } -static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) +static void __head __sme_map_range_pte(struct sme_populate_pgd_data *ppd) { while (ppd->vaddr < ppd->vaddr_end) { sme_populate_pgd(ppd); @@ -212,7 +209,7 @@ static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) } } -static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, +static void __head __sme_map_range(struct sme_populate_pgd_data *ppd, pmdval_t pmd_flags, pteval_t pte_flags) { unsigned long vaddr_end; @@ -236,22 +233,22 @@ static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, __sme_map_range_pte(ppd); } -static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) +static void __head sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) { __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC); } -static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) +static void __head sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) { __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC); } -static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) +static void __head sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) { __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP); } -static unsigned long __init sme_pgtable_calc(unsigned long len) +static unsigned long __head sme_pgtable_calc(unsigned long len) { unsigned long entries = 0, tables = 0; @@ -288,7 +285,7 @@ static unsigned long __init sme_pgtable_calc(unsigned long len) return entries + tables; } -void __init sme_encrypt_kernel(struct boot_params *bp) +void __head sme_encrypt_kernel(struct boot_params *bp) { unsigned long workarea_start, workarea_end, workarea_len; unsigned long execute_start, execute_end, execute_len; @@ -323,9 +320,8 @@ void __init sme_encrypt_kernel(struct boot_params *bp) * memory from being cached. */ - /* Physical addresses gives us the identity mapped virtual addresses */ - kernel_start = __pa_symbol(_text); - kernel_end = ALIGN(__pa_symbol(_end), PMD_SIZE); + kernel_start = (unsigned long)RIP_REL_REF(_text); + kernel_end = ALIGN((unsigned long)RIP_REL_REF(_end), PMD_SIZE); kernel_len = kernel_end - kernel_start; initrd_start = 0; @@ -343,14 +339,6 @@ void __init sme_encrypt_kernel(struct boot_params *bp) #endif /* - * We're running identity mapped, so we must obtain the address to the - * SME encryption workarea using rip-relative addressing. - */ - asm ("lea sme_workarea(%%rip), %0" - : "=r" (workarea_start) - : "p" (sme_workarea)); - - /* * Calculate required number of workarea bytes needed: * executable encryption area size: * stack page (PAGE_SIZE) @@ -359,7 +347,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp) * pagetable structures for the encryption of the kernel * pagetable structures for workarea (in case not currently mapped) */ - execute_start = workarea_start; + execute_start = workarea_start = (unsigned long)RIP_REL_REF(sme_workarea); execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE; execute_len = execute_end - execute_start; @@ -502,13 +490,11 @@ void __init sme_encrypt_kernel(struct boot_params *bp) native_write_cr3(__native_read_cr3()); } -void __init sme_enable(struct boot_params *bp) +void __head sme_enable(struct boot_params *bp) { - const char *cmdline_ptr, *cmdline_arg, *cmdline_on; unsigned int eax, ebx, ecx, edx; unsigned long feature_mask; unsigned long me_mask; - char buffer[16]; bool snp; u64 msr; @@ -551,6 +537,9 @@ void __init sme_enable(struct boot_params *bp) /* Check if memory encryption is enabled */ if (feature_mask == AMD_SME_BIT) { + if (!(bp->hdr.xloadflags & XLF_MEM_ENCRYPTION)) + return; + /* * No SME if Hypervisor bit is set. This check is here to * prevent a guest from trying to enable SME. For running as a @@ -570,31 +559,8 @@ void __init sme_enable(struct boot_params *bp) msr = __rdmsr(MSR_AMD64_SYSCFG); if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT)) return; - } else { - /* SEV state cannot be controlled by a command line option */ - goto out; } - /* - * Fixups have not been applied to phys_base yet and we're running - * identity mapped, so we must obtain the address to the SME command - * line argument data using rip-relative addressing. - */ - asm ("lea sme_cmdline_arg(%%rip), %0" - : "=r" (cmdline_arg) - : "p" (sme_cmdline_arg)); - asm ("lea sme_cmdline_on(%%rip), %0" - : "=r" (cmdline_on) - : "p" (sme_cmdline_on)); - - cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | - ((u64)bp->ext_cmd_line_ptr << 32)); - - if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0 || - strncmp(buffer, cmdline_on, sizeof(buffer))) - return; - -out: RIP_REL_REF(sme_me_mask) = me_mask; physical_mask &= ~me_mask; cc_vendor = CC_VENDOR_AMD; diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index c9f76fae902e..14d9c7daf90f 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -37,13 +37,15 @@ .text .code16 -.macro LOCK_AND_LOAD_REALMODE_ESP lock_pa=0 +.macro LOCK_AND_LOAD_REALMODE_ESP lock_pa=0 lock_rip=0 /* * Make sure only one CPU fiddles with the realmode stack */ .Llock_rm\@: .if \lock_pa lock btsl $0, pa_tr_lock + .elseif \lock_rip + lock btsl $0, tr_lock(%rip) .else lock btsl $0, tr_lock .endif @@ -220,6 +222,35 @@ SYM_CODE_START(trampoline_start64) lidt tr_idt(%rip) lgdt tr_gdt64(%rip) + /* Check if paging mode has to be changed */ + movq %cr4, %rax + xorl tr_cr4(%rip), %eax + testl $X86_CR4_LA57, %eax + jnz .L_switch_paging + + /* Paging mode is correct proceed in 64-bit mode */ + + LOCK_AND_LOAD_REALMODE_ESP lock_rip=1 + + movw $__KERNEL_DS, %dx + movl %edx, %ss + addl $pa_real_mode_base, %esp + movl %edx, %ds + movl %edx, %es + movl %edx, %fs + movl %edx, %gs + + movl $pa_trampoline_pgd, %eax + movq %rax, %cr3 + + pushq $__KERNEL_CS + pushq tr_start(%rip) + lretq +.L_switch_paging: + /* + * To switch between 4- and 5-level paging modes, it is necessary + * to disable paging. This must be done in the compatibility mode. + */ ljmpl *tr_compat(%rip) SYM_CODE_END(trampoline_start64) diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index bfa30625f5d0..3dc2f9aaf08d 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -24,6 +24,8 @@ static bool efi_noinitrd; static bool efi_nosoftreserve; static bool efi_disable_pci_dma = IS_ENABLED(CONFIG_EFI_DISABLE_PCI_DMA); +int efi_mem_encrypt; + bool __pure __efi_soft_reserve_enabled(void) { return !efi_nosoftreserve; @@ -75,6 +77,12 @@ efi_status_t efi_parse_options(char const *cmdline) efi_noinitrd = true; } else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) { efi_no5lvl = true; + } else if (IS_ENABLED(CONFIG_ARCH_HAS_MEM_ENCRYPT) && + !strcmp(param, "mem_encrypt") && val) { + if (parse_option_str(val, "on")) + efi_mem_encrypt = 1; + else if (parse_option_str(val, "off")) + efi_mem_encrypt = -1; } else if (!strcmp(param, "efi") && val) { efi_nochunk = parse_option_str(val, "nochunk"); efi_novamap |= parse_option_str(val, "novamap"); diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index c04b82ea40f2..fc18fd649ed7 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -37,8 +37,8 @@ extern bool efi_no5lvl; extern bool efi_nochunk; extern bool efi_nokaslr; extern int efi_loglevel; +extern int efi_mem_encrypt; extern bool efi_novamap; - extern const efi_system_table_t *efi_system_table; typedef union efi_dxe_services_table efi_dxe_services_table_t; diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 99429bc4b0c7..0336ed175e67 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -884,6 +884,9 @@ void __noreturn efi_stub_entry(efi_handle_t handle, } } + if (efi_mem_encrypt > 0) + hdr->xloadflags |= XLF_MEM_ENCRYPTION; + status = efi_decompress_kernel(&kernel_entry); if (status != EFI_SUCCESS) { efi_err("Failed to decompress kernel\n"); |