diff options
author | Sami Tolvanen <samitolvanen@google.com> | 2020-12-11 10:46:19 -0800 |
---|---|---|
committer | Kees Cook <keescook@chromium.org> | 2021-01-14 08:21:08 -0800 |
commit | dc5723b02e523b2c4a68667f7e28c65018f7202f (patch) | |
tree | e31da4c11f359712314d351d6b4281739785adfe | |
parent | 3b15cdc15956673ba1551d79bceae471436ac6a9 (diff) | |
download | lwn-dc5723b02e523b2c4a68667f7e28c65018f7202f.tar.gz lwn-dc5723b02e523b2c4a68667f7e28c65018f7202f.zip |
kbuild: add support for Clang LTO
This change adds build system support for Clang's Link Time
Optimization (LTO). With -flto, instead of ELF object files, Clang
produces LLVM bitcode, which is compiled into native code at link
time, allowing the final binary to be optimized globally. For more
details, see:
https://llvm.org/docs/LinkTimeOptimization.html
The Kconfig option CONFIG_LTO_CLANG is implemented as a choice,
which defaults to LTO being disabled. To use LTO, the architecture
must select ARCH_SUPPORTS_LTO_CLANG and support:
- compiling with Clang,
- compiling all assembly code with Clang's integrated assembler,
- and linking with LLD.
While using CONFIG_LTO_CLANG_FULL results in the best runtime
performance, the compilation is not scalable in time or
memory. CONFIG_LTO_CLANG_THIN enables ThinLTO, which allows
parallel optimization and faster incremental builds. ThinLTO is
used by default if the architecture also selects
ARCH_SUPPORTS_LTO_CLANG_THIN:
https://clang.llvm.org/docs/ThinLTO.html
To enable LTO, LLVM tools must be used to handle bitcode files, by
passing LLVM=1 and LLVM_IAS=1 options to make:
$ make LLVM=1 LLVM_IAS=1 defconfig
$ scripts/config -e LTO_CLANG_THIN
$ make LLVM=1 LLVM_IAS=1
To prepare for LTO support with other compilers, common parts are
gated behind the CONFIG_LTO option, and LTO can be disabled for
specific files by filtering out CC_FLAGS_LTO.
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20201211184633.3213045-3-samitolvanen@google.com
-rw-r--r-- | Makefile | 19 | ||||
-rw-r--r-- | arch/Kconfig | 91 | ||||
-rw-r--r-- | include/asm-generic/vmlinux.lds.h | 11 | ||||
-rw-r--r-- | scripts/Makefile.build | 9 | ||||
-rw-r--r-- | scripts/Makefile.modfinal | 9 | ||||
-rw-r--r-- | scripts/Makefile.modpost | 21 | ||||
-rwxr-xr-x | scripts/link-vmlinux.sh | 32 |
7 files changed, 174 insertions, 18 deletions
@@ -893,6 +893,21 @@ KBUILD_CFLAGS += $(CC_FLAGS_SCS) export CC_FLAGS_SCS endif +ifdef CONFIG_LTO_CLANG +ifdef CONFIG_LTO_CLANG_THIN +CC_FLAGS_LTO += -flto=thin -fsplit-lto-unit +KBUILD_LDFLAGS += --thinlto-cache-dir=$(extmod-prefix).thinlto-cache +else +CC_FLAGS_LTO += -flto +endif +CC_FLAGS_LTO += -fvisibility=hidden +endif + +ifdef CONFIG_LTO +KBUILD_CFLAGS += $(CC_FLAGS_LTO) +export CC_FLAGS_LTO +endif + ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_32B KBUILD_CFLAGS += -falign-functions=32 endif @@ -1479,7 +1494,7 @@ MRPROPER_FILES += include/config include/generated \ *.spec # Directories & files removed with 'make distclean' -DISTCLEAN_FILES += tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS +DISTCLEAN_FILES += tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS .thinlto-cache # clean - Delete most, but leave enough to build external modules # @@ -1725,7 +1740,7 @@ PHONY += compile_commands.json clean-dirs := $(KBUILD_EXTMOD) clean: rm-files := $(KBUILD_EXTMOD)/Module.symvers $(KBUILD_EXTMOD)/modules.nsdeps \ - $(KBUILD_EXTMOD)/compile_commands.json + $(KBUILD_EXTMOD)/compile_commands.json $(KBUILD_EXTMOD)/.thinlto-cache PHONY += help help: diff --git a/arch/Kconfig b/arch/Kconfig index 78c6f05b10f9..9494e3931f4b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -631,6 +631,97 @@ config SHADOW_CALL_STACK reading and writing arbitrary memory may be able to locate them and hijack control flow by modifying the stacks. +config LTO + bool + help + Selected if the kernel will be built using the compiler's LTO feature. + +config LTO_CLANG + bool + select LTO + help + Selected if the kernel will be built using Clang's LTO feature. + +config ARCH_SUPPORTS_LTO_CLANG + bool + help + An architecture should select this option if it supports: + - compiling with Clang, + - compiling inline assembly with Clang's integrated assembler, + - and linking with LLD. + +config ARCH_SUPPORTS_LTO_CLANG_THIN + bool + help + An architecture should select this option if it can support Clang's + ThinLTO mode. + +config HAS_LTO_CLANG + def_bool y + # Clang >= 11: https://github.com/ClangBuiltLinux/linux/issues/510 + depends on CC_IS_CLANG && CLANG_VERSION >= 110000 && LD_IS_LLD + depends on $(success,test $(LLVM) -eq 1) + depends on $(success,test $(LLVM_IAS) -eq 1) + depends on $(success,$(NM) --help | head -n 1 | grep -qi llvm) + depends on $(success,$(AR) --help | head -n 1 | grep -qi llvm) + depends on ARCH_SUPPORTS_LTO_CLANG + depends on !FTRACE_MCOUNT_USE_RECORDMCOUNT + depends on !KASAN + depends on !GCOV_KERNEL + depends on !MODVERSIONS + help + The compiler and Kconfig options support building with Clang's + LTO. + +choice + prompt "Link Time Optimization (LTO)" + default LTO_NONE + help + This option enables Link Time Optimization (LTO), which allows the + compiler to optimize binaries globally. + + If unsure, select LTO_NONE. Note that LTO is very resource-intensive + so it's disabled by default. + +config LTO_NONE + bool "None" + help + Build the kernel normally, without Link Time Optimization (LTO). + +config LTO_CLANG_FULL + bool "Clang Full LTO (EXPERIMENTAL)" + depends on HAS_LTO_CLANG + depends on !COMPILE_TEST + select LTO_CLANG + help + This option enables Clang's full Link Time Optimization (LTO), which + allows the compiler to optimize the kernel globally. If you enable + this option, the compiler generates LLVM bitcode instead of ELF + object files, and the actual compilation from bitcode happens at + the LTO link step, which may take several minutes depending on the + kernel configuration. More information can be found from LLVM's + documentation: + + https://llvm.org/docs/LinkTimeOptimization.html + + During link time, this option can use a large amount of RAM, and + may take much longer than the ThinLTO option. + +config LTO_CLANG_THIN + bool "Clang ThinLTO (EXPERIMENTAL)" + depends on HAS_LTO_CLANG && ARCH_SUPPORTS_LTO_CLANG_THIN + select LTO_CLANG + help + This option enables Clang's ThinLTO, which allows for parallel + optimization and faster incremental compiles compared to the + CONFIG_LTO_CLANG_FULL option. More information can be found + from Clang's documentation: + + https://clang.llvm.org/docs/ThinLTO.html + + If unsure, say Y. +endchoice + config HAVE_ARCH_WITHIN_STACK_FRAMES bool help diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index b2b3d81b1535..8988a2e445d8 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -90,15 +90,18 @@ * .data. We don't want to pull in .data..other sections, which Linux * has defined. Same for text and bss. * + * With LTO_CLANG, the linker also splits sections by default, so we need + * these macros to combine the sections during the final link. + * * RODATA_MAIN is not used because existing code already defines .rodata.x * sections to be brought in with rodata. */ -#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* -#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..LPBX* +#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* -#define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* -#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* +#define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L* +#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..compoundliteral* #define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]* #else #define TEXT_MAIN .text diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 22654a463ad8..65d4bea937fa 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -111,7 +111,7 @@ endif # --------------------------------------------------------------------------- quiet_cmd_cc_s_c = CC $(quiet_modtag) $@ - cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS), $(c_flags)) -fverbose-asm -S -o $@ $< + cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS) $(CC_FLAGS_LTO), $(c_flags)) -fverbose-asm -S -o $@ $< $(obj)/%.s: $(src)/%.c FORCE $(call if_changed_dep,cc_s_c) @@ -421,8 +421,15 @@ $(obj)/lib.a: $(lib-y) FORCE # Do not replace $(filter %.o,^) with $(real-prereqs). When a single object # module is turned into a multi object module, $^ will contain header file # dependencies recorded in the .*.cmd file. +ifdef CONFIG_LTO_CLANG +quiet_cmd_link_multi-m = AR [M] $@ +cmd_link_multi-m = \ + rm -f $@; \ + $(AR) cDPrsT $@ $(filter %.o,$^) +else quiet_cmd_link_multi-m = LD [M] $@ cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(filter %.o,$^) +endif $(multi-used-m): FORCE $(call if_changed,link_multi-m) diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index d49ec001825d..6de2c35b64e8 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -30,6 +30,12 @@ quiet_cmd_cc_o_c = CC [M] $@ ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink) +ifdef CONFIG_LTO_CLANG +# With CONFIG_LTO_CLANG, reuse the object file we compiled for modpost to +# avoid a second slow LTO link +prelink-ext := .lto +endif + quiet_cmd_ld_ko_o = LD [M] $@ cmd_ld_ko_o = \ $(LD) -r $(KBUILD_LDFLAGS) \ @@ -53,8 +59,9 @@ if_changed_except = $(if $(call newer_prereqs_except,$(2))$(cmd-check), \ $(cmd); \ printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd, @:) + # Re-generate module BTFs if either module's .ko or vmlinux changed -$(modules): %.ko: %.o %.mod.o scripts/module.lds $(if $(KBUILD_BUILTIN),vmlinux) FORCE +$(modules): %.ko: %$(prelink-ext).o %.mod.o scripts/module.lds $(if $(KBUILD_BUILTIN),vmlinux) FORCE +$(call if_changed_except,ld_ko_o,vmlinux) ifdef CONFIG_DEBUG_INFO_BTF_MODULES +$(if $(newer-prereqs),$(call cmd,btf_ko)) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index f54b6ac37ac2..9ff8bfdb574d 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -43,6 +43,9 @@ __modpost: include include/config/auto.conf include scripts/Kbuild.include +# for ld_flags +include scripts/Makefile.lib + MODPOST = scripts/mod/modpost \ $(if $(CONFIG_MODVERSIONS),-m) \ $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a) \ @@ -102,12 +105,26 @@ $(input-symdump): @echo >&2 'WARNING: Symbol version dump "$@" is missing.' @echo >&2 ' Modules may not have dependencies or modversions.' +ifdef CONFIG_LTO_CLANG +# With CONFIG_LTO_CLANG, .o files might be LLVM bitcode, so we need to run +# LTO to compile them into native code before running modpost +prelink-ext := .lto + +quiet_cmd_cc_lto_link_modules = LTO [M] $@ +cmd_cc_lto_link_modules = $(LD) $(ld_flags) -r -o $@ --whole-archive $^ + +%.lto.o: %.o + $(call if_changed,cc_lto_link_modules) +endif + +modules := $(sort $(shell cat $(MODORDER))) + # Read out modules.order to pass in modpost. # Otherwise, allmodconfig would fail with "Argument list too long". quiet_cmd_modpost = MODPOST $@ - cmd_modpost = sed 's/ko$$/o/' $< | $(MODPOST) -T - + cmd_modpost = sed 's/\.ko$$/$(prelink-ext)\.o/' $< | $(MODPOST) -T - -$(output-symdump): $(MODORDER) $(input-symdump) FORCE +$(output-symdump): $(MODORDER) $(input-symdump) $(modules:.ko=$(prelink-ext).o) FORCE $(call if_changed,modpost) targets += $(output-symdump) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 6eded325c837..596507573a48 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -56,6 +56,14 @@ modpost_link() ${KBUILD_VMLINUX_LIBS} \ --end-group" + if [ -n "${CONFIG_LTO_CLANG}" ]; then + # This might take a while, so indicate that we're doing + # an LTO link + info LTO ${1} + else + info LD ${1} + fi + ${LD} ${KBUILD_LDFLAGS} -r -o ${1} ${objects} } @@ -103,13 +111,22 @@ vmlinux_link() fi if [ "${SRCARCH}" != "um" ]; then - objects="--whole-archive \ - ${KBUILD_VMLINUX_OBJS} \ - --no-whole-archive \ - --start-group \ - ${KBUILD_VMLINUX_LIBS} \ - --end-group \ - ${@}" + if [ -n "${CONFIG_LTO_CLANG}" ]; then + # Use vmlinux.o instead of performing the slow LTO + # link again. + objects="--whole-archive \ + vmlinux.o \ + --no-whole-archive \ + ${@}" + else + objects="--whole-archive \ + ${KBUILD_VMLINUX_OBJS} \ + --no-whole-archive \ + --start-group \ + ${KBUILD_VMLINUX_LIBS} \ + --end-group \ + ${@}" + fi ${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} \ ${strip_debug#-Wl,} \ @@ -274,7 +291,6 @@ fi; ${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init need-builtin=1 #link vmlinux.o -info LD vmlinux.o modpost_link vmlinux.o objtool_link vmlinux.o |