ANDROID: kbuild: add support for Clang LTO
This change adds the configuration option CONFIG_LTO_CLANG, and build system support for Clang's Link Time Optimization (LTO). In preparation for LTO support with other compilers, potentially common parts of the changes are gated behind CONFIG_LTO instead. With -flto, instead of object files, Clang produces LLVM bitcode, which is compiled into a native object at link time, allowing the final binary to be optimized globally. For more details, see: https://llvm.org/docs/LinkTimeOptimization.html While the kernel normally uses GNU ld for linking, LLVM supports LTO only with LLD or GNU gold linkers. This change assumes LLD is used. Bug: 145210207 Change-Id: If1164ff33d073358ee7d4bba84cbb06c349c4a88 Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
This commit is contained in:
parent
548c06ceb9
commit
2e39b40dd2
29
Makefile
29
Makefile
@ -654,6 +654,16 @@ RETPOLINE_VDSO_CFLAGS := $(call cc-option,$(RETPOLINE_VDSO_CFLAGS_GCC),$(call cc
|
||||
export RETPOLINE_CFLAGS
|
||||
export RETPOLINE_VDSO_CFLAGS
|
||||
|
||||
# Make toolchain changes before including arch/$(SRCARCH)/Makefile to ensure
|
||||
# ar/cc/ld-* macros return correct values.
|
||||
ifdef CONFIG_LTO_CLANG
|
||||
# LTO produces LLVM IR instead of object files. Use llvm-ar and llvm-nm, so we
|
||||
# can process these.
|
||||
AR := llvm-ar
|
||||
LLVM_NM := llvm-nm
|
||||
export LLVM_NM
|
||||
endif
|
||||
|
||||
include arch/$(SRCARCH)/Makefile
|
||||
|
||||
ifdef need-config
|
||||
@ -856,6 +866,22 @@ KBUILD_CFLAGS += $(CC_FLAGS_SCS)
|
||||
export CC_FLAGS_SCS
|
||||
endif
|
||||
|
||||
ifdef CONFIG_LTO_CLANG
|
||||
ifdef CONFIG_THINLTO
|
||||
CC_FLAGS_LTO_CLANG := -flto=thin $(call cc-option, -fsplit-lto-unit)
|
||||
KBUILD_LDFLAGS += --thinlto-cache-dir=.thinlto-cache
|
||||
else
|
||||
CC_FLAGS_LTO_CLANG := -flto
|
||||
endif
|
||||
CC_FLAGS_LTO_CLANG += -fvisibility=default
|
||||
endif
|
||||
|
||||
ifdef CONFIG_LTO
|
||||
CC_FLAGS_LTO := $(CC_FLAGS_LTO_CLANG)
|
||||
KBUILD_CFLAGS += $(CC_FLAGS_LTO)
|
||||
export CC_FLAGS_LTO
|
||||
endif
|
||||
|
||||
# arch Makefile may override CC so keep this after arch Makefile is included
|
||||
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
|
||||
|
||||
@ -1682,7 +1708,8 @@ clean: $(clean-dirs)
|
||||
-o -name modules.builtin -o -name '.tmp_*.o.*' \
|
||||
-o -name '*.c.[012]*.*' \
|
||||
-o -name '*.ll' \
|
||||
-o -name '*.gcno' \) -type f -print | xargs rm -f
|
||||
-o -name '*.gcno' \
|
||||
-o -name '*.*.symversions' \) -type f -print | xargs rm -f
|
||||
|
||||
# Generate tags for editors
|
||||
# ---------------------------------------------------------------------------
|
||||
|
47
arch/Kconfig
47
arch/Kconfig
@ -554,6 +554,53 @@ config SHADOW_CALL_STACK_VMAP
|
||||
provides better stack exhaustion protection, but increases per-thread
|
||||
memory consumption as a full page is allocated for each shadow stack.
|
||||
|
||||
config LTO
|
||||
bool
|
||||
|
||||
config ARCH_SUPPORTS_LTO_CLANG
|
||||
bool
|
||||
help
|
||||
An architecture should select this option if it supports:
|
||||
- compiling with Clang,
|
||||
- compiling inline assembly with Clang's integrated assembler,
|
||||
- and linking with LLD.
|
||||
|
||||
config ARCH_SUPPORTS_THINLTO
|
||||
bool
|
||||
help
|
||||
An architecture should select this if it supports Clang ThinLTO.
|
||||
|
||||
config THINLTO
|
||||
bool "Use Clang's ThinLTO (EXPERIMENTAL)"
|
||||
depends on LTO_CLANG && ARCH_SUPPORTS_THINLTO
|
||||
default y
|
||||
help
|
||||
Use ThinLTO to speed up Link Time Optimization.
|
||||
|
||||
choice
|
||||
prompt "Link-Time Optimization (LTO) (EXPERIMENTAL)"
|
||||
default LTO_NONE
|
||||
help
|
||||
This option turns on Link-Time Optimization (LTO).
|
||||
|
||||
config LTO_NONE
|
||||
bool "None"
|
||||
|
||||
config LTO_CLANG
|
||||
bool "Use Clang's Link Time Optimization (LTO) (EXPERIMENTAL)"
|
||||
depends on ARCH_SUPPORTS_LTO_CLANG
|
||||
depends on !KASAN
|
||||
depends on !FTRACE_MCOUNT_RECORD
|
||||
depends on CC_IS_CLANG && CLANG_VERSION >= 100000 && LD_IS_LLD
|
||||
select LTO
|
||||
help
|
||||
This option enables Clang's Link Time Optimization (LTO), which allows
|
||||
the compiler to optimize the kernel globally at link time. If you
|
||||
enable this option, the compiler generates LLVM IR instead of object
|
||||
files, and the actual compilation from IR occurs at the LTO link step,
|
||||
which may take several minutes.
|
||||
|
||||
endchoice
|
||||
|
||||
config HAVE_ARCH_WITHIN_STACK_FRAMES
|
||||
bool
|
||||
|
@ -63,10 +63,13 @@
|
||||
* .data. We don't want to pull in .data..other sections, which Linux
|
||||
* has defined. Same for text and bss.
|
||||
*
|
||||
* With LTO_CLANG, the linker also splits sections by default, so we need
|
||||
* these macros to combine the sections during the final link.
|
||||
*
|
||||
* RODATA_MAIN is not used because existing code already defines .rodata.x
|
||||
* sections to be brought in with rodata.
|
||||
*/
|
||||
#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
|
||||
#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG)
|
||||
#define TEXT_MAIN .text .text.[0-9a-zA-Z_]*
|
||||
#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..LPBX*
|
||||
#define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]*
|
||||
|
@ -93,7 +93,7 @@ endif
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
quiet_cmd_cc_s_c = CC $(quiet_modtag) $@
|
||||
cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS), $(c_flags)) $(DISABLE_LTO) -fverbose-asm -S -o $@ $<
|
||||
cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS) $(CC_FLAGS_LTO), $(c_flags)) $(DISABLE_LTO) -fverbose-asm -S -o $@ $<
|
||||
|
||||
$(obj)/%.s: $(src)/%.c FORCE
|
||||
$(call if_changed_dep,cc_s_c)
|
||||
@ -148,6 +148,15 @@ ifdef CONFIG_MODVERSIONS
|
||||
# the actual value of the checksum generated by genksyms
|
||||
# o remove .tmp_<file>.o to <file>.o
|
||||
|
||||
ifdef CONFIG_LTO_CLANG
|
||||
# Generate .o.symversions files for each .o with exported symbols, and link these
|
||||
# to the kernel and/or modules at the end.
|
||||
cmd_modversions_c = \
|
||||
if $(LLVM_NM) $@ | grep -q __ksymtab; then \
|
||||
$(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
|
||||
> $@.symversions; \
|
||||
fi;
|
||||
else
|
||||
cmd_modversions_c = \
|
||||
if $(OBJDUMP) -h $@ | grep -q __ksymtab; then \
|
||||
$(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
|
||||
@ -159,6 +168,7 @@ cmd_modversions_c = \
|
||||
rm -f $(@D)/.tmp_$(@F:.o=.ver); \
|
||||
fi
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef CONFIG_FTRACE_MCOUNT_RECORD
|
||||
ifndef CC_USING_RECORD_MCOUNT
|
||||
@ -383,6 +393,21 @@ $(obj)/%.asn1.c $(obj)/%.asn1.h: $(src)/%.asn1 $(objtree)/scripts/asn1_compiler
|
||||
# To build objects in subdirs, we need to descend into the directories
|
||||
$(sort $(subdir-obj-y)): $(subdir-ym) ;
|
||||
|
||||
# combine symversions for later processing
|
||||
quiet_cmd_update_lto_symversions = SYMVER $@
|
||||
ifeq ($(CONFIG_LTO_CLANG) $(CONFIG_MODVERSIONS),y y)
|
||||
cmd_update_lto_symversions = \
|
||||
rm -f $@.symversions; \
|
||||
for i in $(filter-out FORCE,$^); do \
|
||||
if [ -f $$i.symversions ]; then \
|
||||
cat $$i.symversions \
|
||||
>> $@.symversions; \
|
||||
fi; \
|
||||
done
|
||||
else
|
||||
cmd_update_lto_symversions = echo >/dev/null
|
||||
endif
|
||||
|
||||
#
|
||||
# Rule to compile a set of .o files into one .a file (without symbol table)
|
||||
#
|
||||
@ -391,8 +416,11 @@ ifdef builtin-target
|
||||
quiet_cmd_ar_builtin = AR $@
|
||||
cmd_ar_builtin = rm -f $@; $(AR) cDPrST $@ $(real-prereqs)
|
||||
|
||||
quiet_cmd_ar_and_symver = AR $@
|
||||
cmd_ar_and_symver = $(cmd_update_lto_symversions); $(cmd_ar_builtin)
|
||||
|
||||
$(builtin-target): $(real-obj-y) FORCE
|
||||
$(call if_changed,ar_builtin)
|
||||
$(call if_changed,ar_and_symver)
|
||||
|
||||
targets += $(builtin-target)
|
||||
endif # builtin-target
|
||||
@ -412,16 +440,26 @@ $(modorder-target): $(subdir-ym) FORCE
|
||||
#
|
||||
ifdef lib-target
|
||||
|
||||
quiet_cmd_ar_lib = AR $@
|
||||
cmd_ar_lib = $(cmd_update_lto_symversions); $(cmd_ar)
|
||||
|
||||
$(lib-target): $(lib-y) FORCE
|
||||
$(call if_changed,ar)
|
||||
$(call if_changed,ar_lib)
|
||||
|
||||
targets += $(lib-target)
|
||||
|
||||
dummy-object = $(obj)/.lib_exports.o
|
||||
ksyms-lds = $(dot-target).lds
|
||||
|
||||
ifdef CONFIG_LTO_CLANG
|
||||
# Objdump doesn't understand LLVM IR. Use llvm-nm to dump symbols.
|
||||
dump_export_list = $(LLVM_NM)
|
||||
else
|
||||
dump_export_list = $(OBJDUMP) -h
|
||||
endif
|
||||
|
||||
quiet_cmd_export_list = EXPORTS $@
|
||||
cmd_export_list = $(OBJDUMP) -h $< | \
|
||||
cmd_export_list = $(dump_export_list) $< | \
|
||||
sed -ne '/___ksymtab/s/.*+\([^ ]*\).*/EXTERN(\1)/p' >$(ksyms-lds);\
|
||||
rm -f $(dummy-object);\
|
||||
echo | $(CC) $(a_flags) -c -o $(dummy-object) -x assembler -;\
|
||||
@ -439,8 +477,16 @@ endif
|
||||
# Do not replace $(filter %.o,^) with $(real-prereqs). When a single object
|
||||
# module is turned into a multi object module, $^ will contain header file
|
||||
# dependencies recorded in the .*.cmd file.
|
||||
ifdef CONFIG_LTO_CLANG
|
||||
quiet_cmd_link_multi-m = AR [M] $@
|
||||
cmd_link_multi-m = \
|
||||
$(cmd_update_lto_symversions); \
|
||||
rm -f $@; \
|
||||
$(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(filter %.o,$^)
|
||||
else
|
||||
quiet_cmd_link_multi-m = LD [M] $@
|
||||
cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(filter %.o,$^)
|
||||
endif
|
||||
|
||||
$(multi-used-m): FORCE
|
||||
$(call if_changed,link_multi-m)
|
||||
|
@ -6,6 +6,7 @@
|
||||
PHONY := __modfinal
|
||||
__modfinal:
|
||||
|
||||
include $(objtree)/include/config/auto.conf
|
||||
include $(srctree)/scripts/Kbuild.include
|
||||
|
||||
# for c_flags
|
||||
@ -30,12 +31,24 @@ quiet_cmd_cc_o_c = CC [M] $@
|
||||
ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink)
|
||||
|
||||
quiet_cmd_ld_ko_o = LD [M] $@
|
||||
|
||||
ifdef CONFIG_LTO_CLANG
|
||||
cmd_ld_ko_o = \
|
||||
$(LD) -r $(LDFLAGS) \
|
||||
$(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \
|
||||
$(addprefix -T , $(KBUILD_LDS_MODULE)) \
|
||||
$(shell [ -s $(@:.ko=.o.symversions) ] && \
|
||||
echo -T $(@:.ko=.o.symversions)) \
|
||||
-o $@ --whole-archive $(filter %.o, $^); \
|
||||
$(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)
|
||||
else
|
||||
cmd_ld_ko_o = \
|
||||
$(LD) -r $(KBUILD_LDFLAGS) \
|
||||
$(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \
|
||||
$(addprefix -T , $(KBUILD_LDS_MODULE)) \
|
||||
-o $@ $(filter %.o, $^); \
|
||||
$(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)
|
||||
endif
|
||||
|
||||
$(modules): %.ko: %.o %.mod.o $(KBUILD_LDS_MODULE) FORCE
|
||||
+$(call if_changed,ld_ko_o)
|
||||
|
@ -84,12 +84,32 @@ MODPOST += $(subst -i,-n,$(filter -i,$(MAKEFLAGS))) -s -T - $(wildcard vmlinux)
|
||||
# find all modules listed in modules.order
|
||||
modules := $(sort $(shell cat $(MODORDER)))
|
||||
|
||||
# With CONFIG_LTO_CLANG, .o files might be LLVM IR, so we need to link them
|
||||
# into actual objects before passing them to modpost
|
||||
modpost-ext = $(if $(CONFIG_LTO_CLANG),.lto,)
|
||||
|
||||
ifdef CONFIG_LTO_CLANG
|
||||
quiet_cmd_cc_lto_link_modules = LTO [M] $@
|
||||
cmd_cc_lto_link_modules = \
|
||||
$(LD) $(ld_flags) -r -o $(@) \
|
||||
$(shell [ -s $(@:$(modpost-ext).o=.o.symversions) ] && \
|
||||
echo -T $(@:$(modpost-ext).o=.o.symversions)) \
|
||||
--whole-archive $(filter-out FORCE,$^)
|
||||
|
||||
$(modules:.ko=$(modpost-ext).o): %$(modpost-ext).o: %.o FORCE
|
||||
$(call if_changed,cc_lto_link_modules)
|
||||
|
||||
PHONY += FORCE
|
||||
FORCE:
|
||||
|
||||
endif
|
||||
|
||||
# Read out modules.order instead of expanding $(modules) to pass in modpost.
|
||||
# Otherwise, allmodconfig would fail with "Argument list too long".
|
||||
quiet_cmd_modpost = MODPOST $(words $(modules)) modules
|
||||
cmd_modpost = sed 's/ko$$/o/' $(MODORDER) | $(MODPOST)
|
||||
cmd_modpost = sed 's/\.ko$$/$(modpost-ext)\.o/' $(MODORDER) | $(MODPOST)
|
||||
|
||||
__modpost:
|
||||
__modpost: $(modules:.ko=$(modpost-ext).o)
|
||||
@$(kecho) ' Building modules, stage 2.'
|
||||
$(call cmd,modpost)
|
||||
ifneq ($(KBUILD_MODPOST_NOFINAL),1)
|
||||
|
@ -39,6 +39,30 @@ info()
|
||||
fi
|
||||
}
|
||||
|
||||
# If CONFIG_LTO_CLANG is selected, collect generated symbol versions into
|
||||
# .tmp_symversions
|
||||
modversions()
|
||||
{
|
||||
if [ -z "${CONFIG_LTO_CLANG}" ]; then
|
||||
return
|
||||
fi
|
||||
if [ -z "${CONFIG_MODVERSIONS}" ]; then
|
||||
return
|
||||
fi
|
||||
|
||||
rm -f .tmp_symversions
|
||||
|
||||
for a in ${KBUILD_VMLINUX_OBJS} ${KBUILD_VMLINUX_LIBS}; do
|
||||
for o in $(${AR} t $a 2>/dev/null); do
|
||||
if [ -f ${o}.symversions ]; then
|
||||
cat ${o}.symversions >> .tmp_symversions
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
echo "-T .tmp_symversions"
|
||||
}
|
||||
|
||||
# Link of vmlinux.o used for section mismatch analysis
|
||||
# ${1} output file
|
||||
modpost_link()
|
||||
@ -52,7 +76,15 @@ modpost_link()
|
||||
${KBUILD_VMLINUX_LIBS} \
|
||||
--end-group"
|
||||
|
||||
${LD} ${KBUILD_LDFLAGS} -r -o ${1} ${objects}
|
||||
if [ -n "${CONFIG_LTO_CLANG}" ]; then
|
||||
# This might take a while, so indicate that we're doing
|
||||
# an LTO link
|
||||
info LTO ${1}
|
||||
else
|
||||
info LD ${1}
|
||||
fi
|
||||
|
||||
${LD} ${KBUILD_LDFLAGS} -r -o ${1} $(modversions) ${objects}
|
||||
}
|
||||
|
||||
# Link of vmlinux
|
||||
@ -70,13 +102,22 @@ vmlinux_link()
|
||||
shift
|
||||
|
||||
if [ "${SRCARCH}" != "um" ]; then
|
||||
objects="--whole-archive \
|
||||
${KBUILD_VMLINUX_OBJS} \
|
||||
--no-whole-archive \
|
||||
--start-group \
|
||||
${KBUILD_VMLINUX_LIBS} \
|
||||
--end-group \
|
||||
${@}"
|
||||
if [ -n "${CONFIG_LTO_CLANG}" ]; then
|
||||
# Use vmlinux.o instead of performing the slow LTO
|
||||
# link again.
|
||||
objects="--whole-archive \
|
||||
vmlinux.o \
|
||||
--no-whole-archive \
|
||||
${@}"
|
||||
else
|
||||
objects="--whole-archive \
|
||||
${KBUILD_VMLINUX_OBJS} \
|
||||
--no-whole-archive \
|
||||
--start-group \
|
||||
${KBUILD_VMLINUX_LIBS} \
|
||||
--end-group \
|
||||
${@}"
|
||||
fi
|
||||
|
||||
${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} \
|
||||
-o ${output} \
|
||||
@ -189,6 +230,7 @@ cleanup()
|
||||
rm -f .btf.*
|
||||
rm -f .tmp_System.map
|
||||
rm -f .tmp_kallsyms*
|
||||
rm -f .tmp_symversions
|
||||
rm -f .tmp_vmlinux*
|
||||
rm -f System.map
|
||||
rm -f vmlinux
|
||||
@ -240,7 +282,6 @@ fi;
|
||||
${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init
|
||||
|
||||
#link vmlinux.o
|
||||
info LD vmlinux.o
|
||||
modpost_link vmlinux.o
|
||||
|
||||
# modpost vmlinux.o to check for section mismatches
|
||||
|
@ -146,6 +146,9 @@ static struct module *new_module(const char *modname)
|
||||
p[strlen(p) - 2] = '\0';
|
||||
mod->is_dot_o = 1;
|
||||
}
|
||||
/* strip trailing .lto */
|
||||
if (strends(p, ".lto"))
|
||||
p[strlen(p) - 4] = '\0';
|
||||
|
||||
/* add to list */
|
||||
mod->name = p;
|
||||
@ -2000,6 +2003,10 @@ static char *remove_dot(char *s)
|
||||
size_t m = strspn(s + n + 1, "0123456789");
|
||||
if (m && (s[n + m] == '.' || s[n + m] == 0))
|
||||
s[n] = 0;
|
||||
|
||||
/* strip trailing .lto */
|
||||
if (strends(s, ".lto"))
|
||||
s[strlen(s) - 4] = '\0';
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user