diff --git a/Makefile b/Makefile index 80e78ab1cec4..3f0530ef8a35 100644 --- a/Makefile +++ b/Makefile @@ -654,6 +654,16 @@ RETPOLINE_VDSO_CFLAGS := $(call cc-option,$(RETPOLINE_VDSO_CFLAGS_GCC),$(call cc export RETPOLINE_CFLAGS export RETPOLINE_VDSO_CFLAGS +# Make toolchain changes before including arch/$(SRCARCH)/Makefile to ensure +# ar/cc/ld-* macros return correct values. +ifdef CONFIG_LTO_CLANG +# LTO produces LLVM IR instead of object files. Use llvm-ar and llvm-nm, so we +# can process these. +AR := llvm-ar +LLVM_NM := llvm-nm +export LLVM_NM +endif + include arch/$(SRCARCH)/Makefile ifdef need-config @@ -856,6 +866,22 @@ KBUILD_CFLAGS += $(CC_FLAGS_SCS) export CC_FLAGS_SCS endif +ifdef CONFIG_LTO_CLANG +ifdef CONFIG_THINLTO +CC_FLAGS_LTO_CLANG := -flto=thin $(call cc-option, -fsplit-lto-unit) +KBUILD_LDFLAGS += --thinlto-cache-dir=.thinlto-cache +else +CC_FLAGS_LTO_CLANG := -flto +endif +CC_FLAGS_LTO_CLANG += -fvisibility=default +endif + +ifdef CONFIG_LTO +CC_FLAGS_LTO := $(CC_FLAGS_LTO_CLANG) +KBUILD_CFLAGS += $(CC_FLAGS_LTO) +export CC_FLAGS_LTO +endif + # arch Makefile may override CC so keep this after arch Makefile is included NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) @@ -1682,7 +1708,8 @@ clean: $(clean-dirs) -o -name modules.builtin -o -name '.tmp_*.o.*' \ -o -name '*.c.[012]*.*' \ -o -name '*.ll' \ - -o -name '*.gcno' \) -type f -print | xargs rm -f + -o -name '*.gcno' \ + -o -name '*.*.symversions' \) -type f -print | xargs rm -f # Generate tags for editors # --------------------------------------------------------------------------- diff --git a/arch/Kconfig b/arch/Kconfig index 90a8c5ef4487..1766abf2f5c2 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -554,6 +554,53 @@ config SHADOW_CALL_STACK_VMAP provides better stack exhaustion protection, but increases per-thread memory consumption as a full page is allocated for each shadow stack. +config LTO + bool + +config ARCH_SUPPORTS_LTO_CLANG + bool + help + An architecture should select this option if it supports: + - compiling with Clang, + - compiling inline assembly with Clang's integrated assembler, + - and linking with LLD. + +config ARCH_SUPPORTS_THINLTO + bool + help + An architecture should select this if it supports Clang ThinLTO. + +config THINLTO + bool "Use Clang's ThinLTO (EXPERIMENTAL)" + depends on LTO_CLANG && ARCH_SUPPORTS_THINLTO + default y + help + Use ThinLTO to speed up Link Time Optimization. + +choice + prompt "Link-Time Optimization (LTO) (EXPERIMENTAL)" + default LTO_NONE + help + This option turns on Link-Time Optimization (LTO). + +config LTO_NONE + bool "None" + +config LTO_CLANG + bool "Use Clang's Link Time Optimization (LTO) (EXPERIMENTAL)" + depends on ARCH_SUPPORTS_LTO_CLANG + depends on !KASAN + depends on !FTRACE_MCOUNT_RECORD + depends on CC_IS_CLANG && CLANG_VERSION >= 100000 && LD_IS_LLD + select LTO + help + This option enables Clang's Link Time Optimization (LTO), which allows + the compiler to optimize the kernel globally at link time. If you + enable this option, the compiler generates LLVM IR instead of object + files, and the actual compilation from IR occurs at the LTO link step, + which may take several minutes. + +endchoice config HAVE_ARCH_WITHIN_STACK_FRAMES bool diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index dae64600ccbf..ec4827773569 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -63,10 +63,13 @@ * .data. We don't want to pull in .data..other sections, which Linux * has defined. Same for text and bss. * + * With LTO_CLANG, the linker also splits sections by default, so we need + * these macros to combine the sections during the final link. + * * RODATA_MAIN is not used because existing code already defines .rodata.x * sections to be brought in with rodata. */ -#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..LPBX* #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* diff --git a/scripts/Makefile.build b/scripts/Makefile.build index a9e47953ca53..7aefd2a1347e 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -93,7 +93,7 @@ endif # --------------------------------------------------------------------------- quiet_cmd_cc_s_c = CC $(quiet_modtag) $@ - cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS), $(c_flags)) $(DISABLE_LTO) -fverbose-asm -S -o $@ $< + cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS) $(CC_FLAGS_LTO), $(c_flags)) $(DISABLE_LTO) -fverbose-asm -S -o $@ $< $(obj)/%.s: $(src)/%.c FORCE $(call if_changed_dep,cc_s_c) @@ -148,6 +148,15 @@ ifdef CONFIG_MODVERSIONS # the actual value of the checksum generated by genksyms # o remove .tmp_.o to .o +ifdef CONFIG_LTO_CLANG +# Generate .o.symversions files for each .o with exported symbols, and link these +# to the kernel and/or modules at the end. +cmd_modversions_c = \ + if $(LLVM_NM) $@ | grep -q __ksymtab; then \ + $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ + > $@.symversions; \ + fi; +else cmd_modversions_c = \ if $(OBJDUMP) -h $@ | grep -q __ksymtab; then \ $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ @@ -159,6 +168,7 @@ cmd_modversions_c = \ rm -f $(@D)/.tmp_$(@F:.o=.ver); \ fi endif +endif ifdef CONFIG_FTRACE_MCOUNT_RECORD ifndef CC_USING_RECORD_MCOUNT @@ -383,6 +393,21 @@ $(obj)/%.asn1.c $(obj)/%.asn1.h: $(src)/%.asn1 $(objtree)/scripts/asn1_compiler # To build objects in subdirs, we need to descend into the directories $(sort $(subdir-obj-y)): $(subdir-ym) ; +# combine symversions for later processing +quiet_cmd_update_lto_symversions = SYMVER $@ +ifeq ($(CONFIG_LTO_CLANG) $(CONFIG_MODVERSIONS),y y) + cmd_update_lto_symversions = \ + rm -f $@.symversions; \ + for i in $(filter-out FORCE,$^); do \ + if [ -f $$i.symversions ]; then \ + cat $$i.symversions \ + >> $@.symversions; \ + fi; \ + done +else + cmd_update_lto_symversions = echo >/dev/null +endif + # # Rule to compile a set of .o files into one .a file (without symbol table) # @@ -391,8 +416,11 @@ ifdef builtin-target quiet_cmd_ar_builtin = AR $@ cmd_ar_builtin = rm -f $@; $(AR) cDPrST $@ $(real-prereqs) +quiet_cmd_ar_and_symver = AR $@ + cmd_ar_and_symver = $(cmd_update_lto_symversions); $(cmd_ar_builtin) + $(builtin-target): $(real-obj-y) FORCE - $(call if_changed,ar_builtin) + $(call if_changed,ar_and_symver) targets += $(builtin-target) endif # builtin-target @@ -412,16 +440,26 @@ $(modorder-target): $(subdir-ym) FORCE # ifdef lib-target +quiet_cmd_ar_lib = AR $@ + cmd_ar_lib = $(cmd_update_lto_symversions); $(cmd_ar) + $(lib-target): $(lib-y) FORCE - $(call if_changed,ar) + $(call if_changed,ar_lib) targets += $(lib-target) dummy-object = $(obj)/.lib_exports.o ksyms-lds = $(dot-target).lds +ifdef CONFIG_LTO_CLANG +# Objdump doesn't understand LLVM IR. Use llvm-nm to dump symbols. +dump_export_list = $(LLVM_NM) +else +dump_export_list = $(OBJDUMP) -h +endif + quiet_cmd_export_list = EXPORTS $@ -cmd_export_list = $(OBJDUMP) -h $< | \ +cmd_export_list = $(dump_export_list) $< | \ sed -ne '/___ksymtab/s/.*+\([^ ]*\).*/EXTERN(\1)/p' >$(ksyms-lds);\ rm -f $(dummy-object);\ echo | $(CC) $(a_flags) -c -o $(dummy-object) -x assembler -;\ @@ -439,8 +477,16 @@ endif # Do not replace $(filter %.o,^) with $(real-prereqs). When a single object # module is turned into a multi object module, $^ will contain header file # dependencies recorded in the .*.cmd file. +ifdef CONFIG_LTO_CLANG +quiet_cmd_link_multi-m = AR [M] $@ +cmd_link_multi-m = \ + $(cmd_update_lto_symversions); \ + rm -f $@; \ + $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(filter %.o,$^) +else quiet_cmd_link_multi-m = LD [M] $@ cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(filter %.o,$^) +endif $(multi-used-m): FORCE $(call if_changed,link_multi-m) diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 411c1e600e7d..2164a84a45a2 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -6,6 +6,7 @@ PHONY := __modfinal __modfinal: +include $(objtree)/include/config/auto.conf include $(srctree)/scripts/Kbuild.include # for c_flags @@ -30,12 +31,24 @@ quiet_cmd_cc_o_c = CC [M] $@ ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink) quiet_cmd_ld_ko_o = LD [M] $@ + +ifdef CONFIG_LTO_CLANG + cmd_ld_ko_o = \ + $(LD) -r $(LDFLAGS) \ + $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \ + $(addprefix -T , $(KBUILD_LDS_MODULE)) \ + $(shell [ -s $(@:.ko=.o.symversions) ] && \ + echo -T $(@:.ko=.o.symversions)) \ + -o $@ --whole-archive $(filter %.o, $^); \ + $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) +else cmd_ld_ko_o = \ $(LD) -r $(KBUILD_LDFLAGS) \ $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \ $(addprefix -T , $(KBUILD_LDS_MODULE)) \ -o $@ $(filter %.o, $^); \ $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) +endif $(modules): %.ko: %.o %.mod.o $(KBUILD_LDS_MODULE) FORCE +$(call if_changed,ld_ko_o) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 952fff485546..b8b447b62283 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -84,12 +84,32 @@ MODPOST += $(subst -i,-n,$(filter -i,$(MAKEFLAGS))) -s -T - $(wildcard vmlinux) # find all modules listed in modules.order modules := $(sort $(shell cat $(MODORDER))) +# With CONFIG_LTO_CLANG, .o files might be LLVM IR, so we need to link them +# into actual objects before passing them to modpost +modpost-ext = $(if $(CONFIG_LTO_CLANG),.lto,) + +ifdef CONFIG_LTO_CLANG +quiet_cmd_cc_lto_link_modules = LTO [M] $@ +cmd_cc_lto_link_modules = \ + $(LD) $(ld_flags) -r -o $(@) \ + $(shell [ -s $(@:$(modpost-ext).o=.o.symversions) ] && \ + echo -T $(@:$(modpost-ext).o=.o.symversions)) \ + --whole-archive $(filter-out FORCE,$^) + +$(modules:.ko=$(modpost-ext).o): %$(modpost-ext).o: %.o FORCE + $(call if_changed,cc_lto_link_modules) + +PHONY += FORCE +FORCE: + +endif + # Read out modules.order instead of expanding $(modules) to pass in modpost. # Otherwise, allmodconfig would fail with "Argument list too long". quiet_cmd_modpost = MODPOST $(words $(modules)) modules - cmd_modpost = sed 's/ko$$/o/' $(MODORDER) | $(MODPOST) + cmd_modpost = sed 's/\.ko$$/$(modpost-ext)\.o/' $(MODORDER) | $(MODPOST) -__modpost: +__modpost: $(modules:.ko=$(modpost-ext).o) @$(kecho) ' Building modules, stage 2.' $(call cmd,modpost) ifneq ($(KBUILD_MODPOST_NOFINAL),1) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 06495379fcd8..e589ce9b0acd 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -39,6 +39,30 @@ info() fi } +# If CONFIG_LTO_CLANG is selected, collect generated symbol versions into +# .tmp_symversions +modversions() +{ + if [ -z "${CONFIG_LTO_CLANG}" ]; then + return + fi + if [ -z "${CONFIG_MODVERSIONS}" ]; then + return + fi + + rm -f .tmp_symversions + + for a in ${KBUILD_VMLINUX_OBJS} ${KBUILD_VMLINUX_LIBS}; do + for o in $(${AR} t $a 2>/dev/null); do + if [ -f ${o}.symversions ]; then + cat ${o}.symversions >> .tmp_symversions + fi + done + done + + echo "-T .tmp_symversions" +} + # Link of vmlinux.o used for section mismatch analysis # ${1} output file modpost_link() @@ -52,7 +76,15 @@ modpost_link() ${KBUILD_VMLINUX_LIBS} \ --end-group" - ${LD} ${KBUILD_LDFLAGS} -r -o ${1} ${objects} + if [ -n "${CONFIG_LTO_CLANG}" ]; then + # This might take a while, so indicate that we're doing + # an LTO link + info LTO ${1} + else + info LD ${1} + fi + + ${LD} ${KBUILD_LDFLAGS} -r -o ${1} $(modversions) ${objects} } # Link of vmlinux @@ -70,13 +102,22 @@ vmlinux_link() shift if [ "${SRCARCH}" != "um" ]; then - objects="--whole-archive \ - ${KBUILD_VMLINUX_OBJS} \ - --no-whole-archive \ - --start-group \ - ${KBUILD_VMLINUX_LIBS} \ - --end-group \ - ${@}" + if [ -n "${CONFIG_LTO_CLANG}" ]; then + # Use vmlinux.o instead of performing the slow LTO + # link again. + objects="--whole-archive \ + vmlinux.o \ + --no-whole-archive \ + ${@}" + else + objects="--whole-archive \ + ${KBUILD_VMLINUX_OBJS} \ + --no-whole-archive \ + --start-group \ + ${KBUILD_VMLINUX_LIBS} \ + --end-group \ + ${@}" + fi ${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} \ -o ${output} \ @@ -189,6 +230,7 @@ cleanup() rm -f .btf.* rm -f .tmp_System.map rm -f .tmp_kallsyms* + rm -f .tmp_symversions rm -f .tmp_vmlinux* rm -f System.map rm -f vmlinux @@ -240,7 +282,6 @@ fi; ${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init #link vmlinux.o -info LD vmlinux.o modpost_link vmlinux.o # modpost vmlinux.o to check for section mismatches diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index d2a30a7b3f07..6ef098430b86 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -146,6 +146,9 @@ static struct module *new_module(const char *modname) p[strlen(p) - 2] = '\0'; mod->is_dot_o = 1; } + /* strip trailing .lto */ + if (strends(p, ".lto")) + p[strlen(p) - 4] = '\0'; /* add to list */ mod->name = p; @@ -2000,6 +2003,10 @@ static char *remove_dot(char *s) size_t m = strspn(s + n + 1, "0123456789"); if (m && (s[n + m] == '.' || s[n + m] == 0)) s[n] = 0; + + /* strip trailing .lto */ + if (strends(s, ".lto")) + s[strlen(s) - 4] = '\0'; } return s; }