ANDROID: kbuild: add support for Clang LTO

This change adds the configuration option CONFIG_LTO_CLANG, and
build system support for Clang's Link Time Optimization (LTO). In
preparation for LTO support with other compilers, potentially common
parts of the changes are gated behind CONFIG_LTO instead.

With -flto, instead of object files, Clang produces LLVM bitcode,
which is compiled into a native object at link time, allowing the
final binary to be optimized globally. For more details, see:

  https://llvm.org/docs/LinkTimeOptimization.html

While the kernel normally uses GNU ld for linking, LLVM supports LTO
only with LLD or GNU gold linkers. This change assumes LLD is used.

Bug: 145210207
Change-Id: If1164ff33d073358ee7d4bba84cbb06c349c4a88
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
This commit is contained in:
Sami Tolvanen 2017-11-28 08:48:49 -08:00
parent 548c06ceb9
commit 2e39b40dd2
8 changed files with 221 additions and 17 deletions

View File

@ -654,6 +654,16 @@ RETPOLINE_VDSO_CFLAGS := $(call cc-option,$(RETPOLINE_VDSO_CFLAGS_GCC),$(call cc
export RETPOLINE_CFLAGS
export RETPOLINE_VDSO_CFLAGS
# Make toolchain changes before including arch/$(SRCARCH)/Makefile to ensure
# ar/cc/ld-* macros return correct values.
ifdef CONFIG_LTO_CLANG
# LTO produces LLVM IR instead of object files. Use llvm-ar and llvm-nm, so we
# can process these.
AR := llvm-ar
LLVM_NM := llvm-nm
export LLVM_NM
endif
include arch/$(SRCARCH)/Makefile
ifdef need-config
@ -856,6 +866,22 @@ KBUILD_CFLAGS += $(CC_FLAGS_SCS)
export CC_FLAGS_SCS
endif
ifdef CONFIG_LTO_CLANG
ifdef CONFIG_THINLTO
CC_FLAGS_LTO_CLANG := -flto=thin $(call cc-option, -fsplit-lto-unit)
KBUILD_LDFLAGS += --thinlto-cache-dir=.thinlto-cache
else
CC_FLAGS_LTO_CLANG := -flto
endif
CC_FLAGS_LTO_CLANG += -fvisibility=default
endif
ifdef CONFIG_LTO
CC_FLAGS_LTO := $(CC_FLAGS_LTO_CLANG)
KBUILD_CFLAGS += $(CC_FLAGS_LTO)
export CC_FLAGS_LTO
endif
# arch Makefile may override CC so keep this after arch Makefile is included
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
@ -1682,7 +1708,8 @@ clean: $(clean-dirs)
-o -name modules.builtin -o -name '.tmp_*.o.*' \
-o -name '*.c.[012]*.*' \
-o -name '*.ll' \
-o -name '*.gcno' \) -type f -print | xargs rm -f
-o -name '*.gcno' \
-o -name '*.*.symversions' \) -type f -print | xargs rm -f
# Generate tags for editors
# ---------------------------------------------------------------------------

View File

@ -554,6 +554,53 @@ config SHADOW_CALL_STACK_VMAP
provides better stack exhaustion protection, but increases per-thread
memory consumption as a full page is allocated for each shadow stack.
config LTO
bool
config ARCH_SUPPORTS_LTO_CLANG
bool
help
An architecture should select this option if it supports:
- compiling with Clang,
- compiling inline assembly with Clang's integrated assembler,
- and linking with LLD.
config ARCH_SUPPORTS_THINLTO
bool
help
An architecture should select this if it supports Clang ThinLTO.
config THINLTO
bool "Use Clang's ThinLTO (EXPERIMENTAL)"
depends on LTO_CLANG && ARCH_SUPPORTS_THINLTO
default y
help
Use ThinLTO to speed up Link Time Optimization.
choice
prompt "Link-Time Optimization (LTO) (EXPERIMENTAL)"
default LTO_NONE
help
This option turns on Link-Time Optimization (LTO).
config LTO_NONE
bool "None"
config LTO_CLANG
bool "Use Clang's Link Time Optimization (LTO) (EXPERIMENTAL)"
depends on ARCH_SUPPORTS_LTO_CLANG
depends on !KASAN
depends on !FTRACE_MCOUNT_RECORD
depends on CC_IS_CLANG && CLANG_VERSION >= 100000 && LD_IS_LLD
select LTO
help
This option enables Clang's Link Time Optimization (LTO), which allows
the compiler to optimize the kernel globally at link time. If you
enable this option, the compiler generates LLVM IR instead of object
files, and the actual compilation from IR occurs at the LTO link step,
which may take several minutes.
endchoice
config HAVE_ARCH_WITHIN_STACK_FRAMES
bool

View File

@ -63,10 +63,13 @@
* .data. We don't want to pull in .data..other sections, which Linux
* has defined. Same for text and bss.
*
* With LTO_CLANG, the linker also splits sections by default, so we need
* these macros to combine the sections during the final link.
*
* RODATA_MAIN is not used because existing code already defines .rodata.x
* sections to be brought in with rodata.
*/
#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG)
#define TEXT_MAIN .text .text.[0-9a-zA-Z_]*
#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..LPBX*
#define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]*

View File

@ -93,7 +93,7 @@ endif
# ---------------------------------------------------------------------------
quiet_cmd_cc_s_c = CC $(quiet_modtag) $@
cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS), $(c_flags)) $(DISABLE_LTO) -fverbose-asm -S -o $@ $<
cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS) $(CC_FLAGS_LTO), $(c_flags)) $(DISABLE_LTO) -fverbose-asm -S -o $@ $<
$(obj)/%.s: $(src)/%.c FORCE
$(call if_changed_dep,cc_s_c)
@ -148,6 +148,15 @@ ifdef CONFIG_MODVERSIONS
# the actual value of the checksum generated by genksyms
# o remove .tmp_<file>.o to <file>.o
ifdef CONFIG_LTO_CLANG
# Generate .o.symversions files for each .o with exported symbols, and link these
# to the kernel and/or modules at the end.
cmd_modversions_c = \
if $(LLVM_NM) $@ | grep -q __ksymtab; then \
$(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
> $@.symversions; \
fi;
else
cmd_modversions_c = \
if $(OBJDUMP) -h $@ | grep -q __ksymtab; then \
$(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
@ -159,6 +168,7 @@ cmd_modversions_c = \
rm -f $(@D)/.tmp_$(@F:.o=.ver); \
fi
endif
endif
ifdef CONFIG_FTRACE_MCOUNT_RECORD
ifndef CC_USING_RECORD_MCOUNT
@ -383,6 +393,21 @@ $(obj)/%.asn1.c $(obj)/%.asn1.h: $(src)/%.asn1 $(objtree)/scripts/asn1_compiler
# To build objects in subdirs, we need to descend into the directories
$(sort $(subdir-obj-y)): $(subdir-ym) ;
# combine symversions for later processing
quiet_cmd_update_lto_symversions = SYMVER $@
ifeq ($(CONFIG_LTO_CLANG) $(CONFIG_MODVERSIONS),y y)
cmd_update_lto_symversions = \
rm -f $@.symversions; \
for i in $(filter-out FORCE,$^); do \
if [ -f $$i.symversions ]; then \
cat $$i.symversions \
>> $@.symversions; \
fi; \
done
else
cmd_update_lto_symversions = echo >/dev/null
endif
#
# Rule to compile a set of .o files into one .a file (without symbol table)
#
@ -391,8 +416,11 @@ ifdef builtin-target
quiet_cmd_ar_builtin = AR $@
cmd_ar_builtin = rm -f $@; $(AR) cDPrST $@ $(real-prereqs)
quiet_cmd_ar_and_symver = AR $@
cmd_ar_and_symver = $(cmd_update_lto_symversions); $(cmd_ar_builtin)
$(builtin-target): $(real-obj-y) FORCE
$(call if_changed,ar_builtin)
$(call if_changed,ar_and_symver)
targets += $(builtin-target)
endif # builtin-target
@ -412,16 +440,26 @@ $(modorder-target): $(subdir-ym) FORCE
#
ifdef lib-target
quiet_cmd_ar_lib = AR $@
cmd_ar_lib = $(cmd_update_lto_symversions); $(cmd_ar)
$(lib-target): $(lib-y) FORCE
$(call if_changed,ar)
$(call if_changed,ar_lib)
targets += $(lib-target)
dummy-object = $(obj)/.lib_exports.o
ksyms-lds = $(dot-target).lds
ifdef CONFIG_LTO_CLANG
# Objdump doesn't understand LLVM IR. Use llvm-nm to dump symbols.
dump_export_list = $(LLVM_NM)
else
dump_export_list = $(OBJDUMP) -h
endif
quiet_cmd_export_list = EXPORTS $@
cmd_export_list = $(OBJDUMP) -h $< | \
cmd_export_list = $(dump_export_list) $< | \
sed -ne '/___ksymtab/s/.*+\([^ ]*\).*/EXTERN(\1)/p' >$(ksyms-lds);\
rm -f $(dummy-object);\
echo | $(CC) $(a_flags) -c -o $(dummy-object) -x assembler -;\
@ -439,8 +477,16 @@ endif
# Do not replace $(filter %.o,^) with $(real-prereqs). When a single object
# module is turned into a multi object module, $^ will contain header file
# dependencies recorded in the .*.cmd file.
ifdef CONFIG_LTO_CLANG
quiet_cmd_link_multi-m = AR [M] $@
cmd_link_multi-m = \
$(cmd_update_lto_symversions); \
rm -f $@; \
$(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(filter %.o,$^)
else
quiet_cmd_link_multi-m = LD [M] $@
cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(filter %.o,$^)
endif
$(multi-used-m): FORCE
$(call if_changed,link_multi-m)

View File

@ -6,6 +6,7 @@
PHONY := __modfinal
__modfinal:
include $(objtree)/include/config/auto.conf
include $(srctree)/scripts/Kbuild.include
# for c_flags
@ -30,12 +31,24 @@ quiet_cmd_cc_o_c = CC [M] $@
ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink)
quiet_cmd_ld_ko_o = LD [M] $@
ifdef CONFIG_LTO_CLANG
cmd_ld_ko_o = \
$(LD) -r $(LDFLAGS) \
$(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \
$(addprefix -T , $(KBUILD_LDS_MODULE)) \
$(shell [ -s $(@:.ko=.o.symversions) ] && \
echo -T $(@:.ko=.o.symversions)) \
-o $@ --whole-archive $(filter %.o, $^); \
$(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)
else
cmd_ld_ko_o = \
$(LD) -r $(KBUILD_LDFLAGS) \
$(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \
$(addprefix -T , $(KBUILD_LDS_MODULE)) \
-o $@ $(filter %.o, $^); \
$(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)
endif
$(modules): %.ko: %.o %.mod.o $(KBUILD_LDS_MODULE) FORCE
+$(call if_changed,ld_ko_o)

View File

@ -84,12 +84,32 @@ MODPOST += $(subst -i,-n,$(filter -i,$(MAKEFLAGS))) -s -T - $(wildcard vmlinux)
# find all modules listed in modules.order
modules := $(sort $(shell cat $(MODORDER)))
# With CONFIG_LTO_CLANG, .o files might be LLVM IR, so we need to link them
# into actual objects before passing them to modpost
modpost-ext = $(if $(CONFIG_LTO_CLANG),.lto,)
ifdef CONFIG_LTO_CLANG
quiet_cmd_cc_lto_link_modules = LTO [M] $@
cmd_cc_lto_link_modules = \
$(LD) $(ld_flags) -r -o $(@) \
$(shell [ -s $(@:$(modpost-ext).o=.o.symversions) ] && \
echo -T $(@:$(modpost-ext).o=.o.symversions)) \
--whole-archive $(filter-out FORCE,$^)
$(modules:.ko=$(modpost-ext).o): %$(modpost-ext).o: %.o FORCE
$(call if_changed,cc_lto_link_modules)
PHONY += FORCE
FORCE:
endif
# Read out modules.order instead of expanding $(modules) to pass in modpost.
# Otherwise, allmodconfig would fail with "Argument list too long".
quiet_cmd_modpost = MODPOST $(words $(modules)) modules
cmd_modpost = sed 's/ko$$/o/' $(MODORDER) | $(MODPOST)
cmd_modpost = sed 's/\.ko$$/$(modpost-ext)\.o/' $(MODORDER) | $(MODPOST)
__modpost:
__modpost: $(modules:.ko=$(modpost-ext).o)
@$(kecho) ' Building modules, stage 2.'
$(call cmd,modpost)
ifneq ($(KBUILD_MODPOST_NOFINAL),1)

View File

@ -39,6 +39,30 @@ info()
fi
}
# If CONFIG_LTO_CLANG is selected, collect generated symbol versions into
# .tmp_symversions
modversions()
{
if [ -z "${CONFIG_LTO_CLANG}" ]; then
return
fi
if [ -z "${CONFIG_MODVERSIONS}" ]; then
return
fi
rm -f .tmp_symversions
for a in ${KBUILD_VMLINUX_OBJS} ${KBUILD_VMLINUX_LIBS}; do
for o in $(${AR} t $a 2>/dev/null); do
if [ -f ${o}.symversions ]; then
cat ${o}.symversions >> .tmp_symversions
fi
done
done
echo "-T .tmp_symversions"
}
# Link of vmlinux.o used for section mismatch analysis
# ${1} output file
modpost_link()
@ -52,7 +76,15 @@ modpost_link()
${KBUILD_VMLINUX_LIBS} \
--end-group"
${LD} ${KBUILD_LDFLAGS} -r -o ${1} ${objects}
if [ -n "${CONFIG_LTO_CLANG}" ]; then
# This might take a while, so indicate that we're doing
# an LTO link
info LTO ${1}
else
info LD ${1}
fi
${LD} ${KBUILD_LDFLAGS} -r -o ${1} $(modversions) ${objects}
}
# Link of vmlinux
@ -70,13 +102,22 @@ vmlinux_link()
shift
if [ "${SRCARCH}" != "um" ]; then
objects="--whole-archive \
${KBUILD_VMLINUX_OBJS} \
--no-whole-archive \
--start-group \
${KBUILD_VMLINUX_LIBS} \
--end-group \
${@}"
if [ -n "${CONFIG_LTO_CLANG}" ]; then
# Use vmlinux.o instead of performing the slow LTO
# link again.
objects="--whole-archive \
vmlinux.o \
--no-whole-archive \
${@}"
else
objects="--whole-archive \
${KBUILD_VMLINUX_OBJS} \
--no-whole-archive \
--start-group \
${KBUILD_VMLINUX_LIBS} \
--end-group \
${@}"
fi
${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} \
-o ${output} \
@ -189,6 +230,7 @@ cleanup()
rm -f .btf.*
rm -f .tmp_System.map
rm -f .tmp_kallsyms*
rm -f .tmp_symversions
rm -f .tmp_vmlinux*
rm -f System.map
rm -f vmlinux
@ -240,7 +282,6 @@ fi;
${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init
#link vmlinux.o
info LD vmlinux.o
modpost_link vmlinux.o
# modpost vmlinux.o to check for section mismatches

View File

@ -146,6 +146,9 @@ static struct module *new_module(const char *modname)
p[strlen(p) - 2] = '\0';
mod->is_dot_o = 1;
}
/* strip trailing .lto */
if (strends(p, ".lto"))
p[strlen(p) - 4] = '\0';
/* add to list */
mod->name = p;
@ -2000,6 +2003,10 @@ static char *remove_dot(char *s)
size_t m = strspn(s + n + 1, "0123456789");
if (m && (s[n + m] == '.' || s[n + m] == 0))
s[n] = 0;
/* strip trailing .lto */
if (strends(s, ".lto"))
s[strlen(s) - 4] = '\0';
}
return s;
}