From dd649bd0b3aa012740059b1ba31ecad28a408f7f Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 28 May 2020 16:13:48 -0400 Subject: x86/cpu: Add 'unsafe_fsgsbase' to enable CR4.FSGSBASE This is temporary. It will allow the next few patches to be tested incrementally. Setting unsafe_fsgsbase is a root hole. Don't do it. Signed-off-by: Andy Lutomirski Signed-off-by: Chang S. Bae Signed-off-by: Thomas Gleixner Signed-off-by: Sasha Levin Signed-off-by: Thomas Gleixner Reviewed-by: Andi Kleen Reviewed-by: Andy Lutomirski Link: https://lkml.kernel.org/r/1557309753-24073-4-git-send-email-chang.seok.bae@intel.com Link: https://lkml.kernel.org/r/20200528201402.1708239-3-sashal@kernel.org --- Documentation/admin-guide/kernel-parameters.txt | 3 +++ 1 file changed, 3 insertions(+) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index fb95fad81c79..7308db7308ba 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3079,6 +3079,9 @@ no5lvl [X86-64] Disable 5-level paging mode. Forces kernel to use 4-level paging instead. + unsafe_fsgsbase [X86] Allow FSGSBASE instructions. This will be + replaced with a nofsgsbase flag. + no_console_suspend [HW] Never suspend the console Disable suspending of consoles during suspend and -- cgit v1.2.3 From b745cfba44c152c34363eea9e052367b6b1d652b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 28 May 2020 16:13:58 -0400 Subject: x86/cpu: Enable FSGSBASE on 64bit by default and add a chicken bit Now that FSGSBASE is fully supported, remove unsafe_fsgsbase, enable FSGSBASE by default, and add nofsgsbase to disable it. Signed-off-by: Andy Lutomirski Signed-off-by: Chang S. Bae Signed-off-by: Thomas Gleixner Signed-off-by: Sasha Levin Signed-off-by: Thomas Gleixner Reviewed-by: Andi Kleen Link: https://lkml.kernel.org/r/1557309753-24073-17-git-send-email-chang.seok.bae@intel.com Link: https://lkml.kernel.org/r/20200528201402.1708239-13-sashal@kernel.org --- Documentation/admin-guide/kernel-parameters.txt | 3 +-- arch/x86/kernel/cpu/common.c | 32 +++++++++++-------------- 2 files changed, 15 insertions(+), 20 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 7308db7308ba..8c0d045f1dc7 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3079,8 +3079,7 @@ no5lvl [X86-64] Disable 5-level paging mode. Forces kernel to use 4-level paging instead. - unsafe_fsgsbase [X86] Allow FSGSBASE instructions. This will be - replaced with a nofsgsbase flag. + nofsgsbase [X86] Disables FSGSBASE instructions. no_console_suspend [HW] Never suspend the console diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7438a318e5aa..18857ceb6bce 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -441,21 +441,21 @@ static void __init setup_cr_pinning(void) static_key_enable(&cr_pinning.key); } -/* - * Temporary hack: FSGSBASE is unsafe until a few kernel code paths are - * updated. This allows us to get the kernel ready incrementally. - * - * Once all the pieces are in place, these will go away and be replaced with - * a nofsgsbase chicken flag. - */ -static bool unsafe_fsgsbase; - -static __init int setup_unsafe_fsgsbase(char *arg) +static __init int x86_nofsgsbase_setup(char *arg) { - unsafe_fsgsbase = true; + /* Require an exact match without trailing characters. */ + if (strlen(arg)) + return 0; + + /* Do not emit a message if the feature is not present. */ + if (!boot_cpu_has(X86_FEATURE_FSGSBASE)) + return 1; + + setup_clear_cpu_cap(X86_FEATURE_FSGSBASE); + pr_info("FSGSBASE disabled via kernel command line\n"); return 1; } -__setup("unsafe_fsgsbase", setup_unsafe_fsgsbase); +__setup("nofsgsbase", x86_nofsgsbase_setup); /* * Protection Keys are not available in 32-bit mode. @@ -1512,12 +1512,8 @@ static void identify_cpu(struct cpuinfo_x86 *c) setup_umip(c); /* Enable FSGSBASE instructions if available. */ - if (cpu_has(c, X86_FEATURE_FSGSBASE)) { - if (unsafe_fsgsbase) - cr4_set_bits(X86_CR4_FSGSBASE); - else - clear_cpu_cap(c, X86_FEATURE_FSGSBASE); - } + if (cpu_has(c, X86_FEATURE_FSGSBASE)) + cr4_set_bits(X86_CR4_FSGSBASE); /* * The vendor-specific functions might have changed features. -- cgit v1.2.3 From 82c0c7d24c1a034d94af4595dbd3910d7336a6dc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 28 May 2020 16:14:00 -0400 Subject: Documentation/x86/64: Add documentation for GS/FS addressing mode Explain how the GS/FS based addressing can be utilized in user space applications along with the differences between the generic prctl() based GS/FS base control and the FSGSBASE version available on newer CPUs. Originally-by: Andi Kleen Signed-off-by: Thomas Gleixner Signed-off-by: Chang S. Bae Signed-off-by: Sasha Levin Signed-off-by: Thomas Gleixner Reviewed-by: Tony Luck Link: https://lkml.kernel.org/r/20200528201402.1708239-15-sashal@kernel.org --- Documentation/x86/x86_64/fsgs.rst | 199 +++++++++++++++++++++++++++++++++++++ Documentation/x86/x86_64/index.rst | 1 + 2 files changed, 200 insertions(+) create mode 100644 Documentation/x86/x86_64/fsgs.rst (limited to 'Documentation') diff --git a/Documentation/x86/x86_64/fsgs.rst b/Documentation/x86/x86_64/fsgs.rst new file mode 100644 index 000000000000..50960e09e1f6 --- /dev/null +++ b/Documentation/x86/x86_64/fsgs.rst @@ -0,0 +1,199 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Using FS and GS segments in user space applications +=================================================== + +The x86 architecture supports segmentation. Instructions which access +memory can use segment register based addressing mode. The following +notation is used to address a byte within a segment: + + Segment-register:Byte-address + +The segment base address is added to the Byte-address to compute the +resulting virtual address which is accessed. This allows to access multiple +instances of data with the identical Byte-address, i.e. the same code. The +selection of a particular instance is purely based on the base-address in +the segment register. + +In 32-bit mode the CPU provides 6 segments, which also support segment +limits. The limits can be used to enforce address space protections. + +In 64-bit mode the CS/SS/DS/ES segments are ignored and the base address is +always 0 to provide a full 64bit address space. The FS and GS segments are +still functional in 64-bit mode. + +Common FS and GS usage +------------------------------ + +The FS segment is commonly used to address Thread Local Storage (TLS). FS +is usually managed by runtime code or a threading library. Variables +declared with the '__thread' storage class specifier are instantiated per +thread and the compiler emits the FS: address prefix for accesses to these +variables. Each thread has its own FS base address so common code can be +used without complex address offset calculations to access the per thread +instances. Applications should not use FS for other purposes when they use +runtimes or threading libraries which manage the per thread FS. + +The GS segment has no common use and can be used freely by +applications. GCC and Clang support GS based addressing via address space +identifiers. + +Reading and writing the FS/GS base address +------------------------------------------ + +There exist two mechanisms to read and write the FS/GS base address: + + - the arch_prctl() system call + + - the FSGSBASE instruction family + +Accessing FS/GS base with arch_prctl() +-------------------------------------- + + The arch_prctl(2) based mechanism is available on all 64-bit CPUs and all + kernel versions. + + Reading the base: + + arch_prctl(ARCH_GET_FS, &fsbase); + arch_prctl(ARCH_GET_GS, &gsbase); + + Writing the base: + + arch_prctl(ARCH_SET_FS, fsbase); + arch_prctl(ARCH_SET_GS, gsbase); + + The ARCH_SET_GS prctl may be disabled depending on kernel configuration + and security settings. + +Accessing FS/GS base with the FSGSBASE instructions +--------------------------------------------------- + + With the Ivy Bridge CPU generation Intel introduced a new set of + instructions to access the FS and GS base registers directly from user + space. These instructions are also supported on AMD Family 17H CPUs. The + following instructions are available: + + =============== =========================== + RDFSBASE %reg Read the FS base register + RDGSBASE %reg Read the GS base register + WRFSBASE %reg Write the FS base register + WRGSBASE %reg Write the GS base register + =============== =========================== + + The instructions avoid the overhead of the arch_prctl() syscall and allow + more flexible usage of the FS/GS addressing modes in user space + applications. This does not prevent conflicts between threading libraries + and runtimes which utilize FS and applications which want to use it for + their own purpose. + +FSGSBASE instructions enablement +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + The instructions are enumerated in CPUID leaf 7, bit 0 of EBX. If + available /proc/cpuinfo shows 'fsgsbase' in the flag entry of the CPUs. + + The availability of the instructions does not enable them + automatically. The kernel has to enable them explicitly in CR4. The + reason for this is that older kernels make assumptions about the values in + the GS register and enforce them when GS base is set via + arch_prctl(). Allowing user space to write arbitrary values to GS base + would violate these assumptions and cause malfunction. + + On kernels which do not enable FSGSBASE the execution of the FSGSBASE + instructions will fault with a #UD exception. + + The kernel provides reliable information about the enabled state in the + ELF AUX vector. If the HWCAP2_FSGSBASE bit is set in the AUX vector, the + kernel has FSGSBASE instructions enabled and applications can use them. + The following code example shows how this detection works:: + + #include + #include + + /* Will be eventually in asm/hwcap.h */ + #ifndef HWCAP2_FSGSBASE + #define HWCAP2_FSGSBASE (1 << 1) + #endif + + .... + + unsigned val = getauxval(AT_HWCAP2); + + if (val & HWCAP2_FSGSBASE) + printf("FSGSBASE enabled\n"); + +FSGSBASE instructions compiler support +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +GCC version 4.6.4 and newer provide instrinsics for the FSGSBASE +instructions. Clang 5 supports them as well. + + =================== =========================== + _readfsbase_u64() Read the FS base register + _readfsbase_u64() Read the GS base register + _writefsbase_u64() Write the FS base register + _writegsbase_u64() Write the GS base register + =================== =========================== + +To utilize these instrinsics must be included in the source +code and the compiler option -mfsgsbase has to be added. + +Compiler support for FS/GS based addressing +------------------------------------------- + +GCC version 6 and newer provide support for FS/GS based addressing via +Named Address Spaces. GCC implements the following address space +identifiers for x86: + + ========= ==================================== + __seg_fs Variable is addressed relative to FS + __seg_gs Variable is addressed relative to GS + ========= ==================================== + +The preprocessor symbols __SEG_FS and __SEG_GS are defined when these +address spaces are supported. Code which implements fallback modes should +check whether these symbols are defined. Usage example:: + + #ifdef __SEG_GS + + long data0 = 0; + long data1 = 1; + + long __seg_gs *ptr; + + /* Check whether FSGSBASE is enabled by the kernel (HWCAP2_FSGSBASE) */ + .... + + /* Set GS base to point to data0 */ + _writegsbase_u64(&data0); + + /* Access offset 0 of GS */ + ptr = 0; + printf("data0 = %ld\n", *ptr); + + /* Set GS base to point to data1 */ + _writegsbase_u64(&data1); + /* ptr still addresses offset 0! */ + printf("data1 = %ld\n", *ptr); + + +Clang does not provide the GCC address space identifiers, but it provides +address spaces via an attribute based mechanism in Clang 2.6 and newer +versions: + + ==================================== ===================================== + __attribute__((address_space(256)) Variable is addressed relative to GS + __attribute__((address_space(257)) Variable is addressed relative to FS + ==================================== ===================================== + +FS/GS based addressing with inline assembly +------------------------------------------- + +In case the compiler does not support address spaces, inline assembly can +be used for FS/GS based addressing mode:: + + mov %fs:offset, %reg + mov %gs:offset, %reg + + mov %reg, %fs:offset + mov %reg, %gs:offset diff --git a/Documentation/x86/x86_64/index.rst b/Documentation/x86/x86_64/index.rst index d6eaaa5a35fc..a56070fc8e77 100644 --- a/Documentation/x86/x86_64/index.rst +++ b/Documentation/x86/x86_64/index.rst @@ -14,3 +14,4 @@ x86_64 Support fake-numa-for-cpusets cpu-hotplug-spec machinecheck + fsgs -- cgit v1.2.3