diff options
author | Michal Marek <mmarek@suse.cz> | 2014-04-14 15:18:31 +0200 |
---|---|---|
committer | Michal Marek <mmarek@suse.cz> | 2014-04-14 15:18:31 +0200 |
commit | ac98dab39e55008f5e340f410059714a794aab55 (patch) | |
tree | 8a58753b48208254f23b29d21fe6800cff9590f9 | |
parent | c9eaa447e77efe77b7fa4c953bd62de8297fd6c5 (diff) | |
parent | 19a3cc83353e3bb4bc28769f8606139a3d350d2d (diff) |
Merge branch 'kbuild/lto' into kbuild/for-next
-rw-r--r-- | Documentation/lto-build | 173 | ||||
-rw-r--r-- | Makefile | 20 | ||||
-rw-r--r-- | arch/x86/Kconfig | 2 | ||||
-rw-r--r-- | init/Kconfig | 73 | ||||
-rw-r--r-- | kernel/gcov/Kconfig | 2 | ||||
-rw-r--r-- | lib/Kconfig.debug | 2 | ||||
-rw-r--r-- | scripts/Makefile.lto | 84 | ||||
-rw-r--r-- | scripts/Makefile.modpost | 7 | ||||
-rw-r--r-- | scripts/link-vmlinux.sh | 2 |
9 files changed, 357 insertions, 8 deletions
diff --git a/Documentation/lto-build b/Documentation/lto-build new file mode 100644 index 000000000000..5dcce1e9cc25 --- /dev/null +++ b/Documentation/lto-build @@ -0,0 +1,173 @@ +Link time optimization (LTO) for the Linux kernel + +This is an experimental feature. + +Link Time Optimization allows the compiler to optimize the complete program +instead of just each file. LTO requires at least gcc 4.8 (but +works more efficiently with 4.9+) LTO requires Linux binutils (the normal FSF +releases used in many distributions do not work at the moment) + +The compiler can inline functions between files and do various other global +optimizations, like specializing functions for common parameters, +determing when global variables are clobbered, making functions pure/const, +propagating constants globally, removing unneeded data and others. + +It will also drop unused functions which can make the kernel +image smaller in some circumstances, in particular for small kernel +configurations. + +For small monolithic kernels it can throw away unused code very effectively +(especially when modules are disabled) and usually shrinks +the code size. + +Build time and memory consumption at build time will increase, depending +on the size of the largest binary. Modular kernels are less affected. +With LTO incremental builds are less incremental, as always the whole +binary needs to be re-optimized (but not re-parsed) + +Oops can be somewhat more difficult to read, due to the more aggressive +inlining. + +Normal "reasonable" builds work with less than 4GB of RAM, but very large +configurations like allyesconfig may need more memory. The actual +memory needed depends on the available memory (gcc sizes its garbage +collector pools based on that or on the ulimit -m limits) and +the compiler version. + +gcc 4.9+ has much better build performance and less memory consumption + +- A few kernel features are currently incompatible with LTO, in particular +function tracing, because they require special compiler flags for +specific files, which is not supported in LTO right now. +- Jobserver control for -j does not work correctly for the final +LTO phase due to some problems with the kernel's pipe code. +The makefiles hard codes -j<number of online cpus> for the final +LTO phase to work around for this + +Configuration: +- Enable CONFIG_LTO_MENU and then disable CONFIG_LTO_DISABLE. +This is mainly to not have allyesconfig default to LTO. +- FUNCTION_TRACER, STACK_TRACER, FUNCTION_GRAPH_TRACER, KALLSYMS_ALL, GCOV +have to disabled because they are currently incompatible with LTO. +- MODVERSIONS have to be disabled (may work with 4.9+) + +Requirements: +- Enough memory: 4GB for a standard build, more for allyesconfig +The peak memory usage happens single threaded (when lto-wpa merges types), +so dialing back -j options will not help much. + +A 32bit compiler is unlikely to work due to the memory requirements. +You can however build a kernel targeted at 32bit on a 64bit host. + +Example build procedure: + +Simplified procedure for distributions that have gcc 4.8, but not +the Linux binutils (for example openSUSE 13.1 or FC20): + +The LTO builds requires gcc-nm/gcc-ar. Some distributions ship +those in separate packages, which may need to be explicitely installed. + +- Get the latest Linux binutils from +http://www.kernel.org/pub/linux/devel/binutils/ +and unpack it. + +We install it in a separate directory to not overwrite the system binutils. + +# replace VERSION with respective version numbers + +cd binutils* +# don't forget the --enable-plugins! +./configure --prefix=/opt/binutils-VERSION --enable-plugins +make -j $(getconf _NPROCESSORS_ONLN) && sudo make install + +Fix up the kernel configuration to allow LTO: + +<start with a suitable kernel configuration> +./source/scripts/config --disable function_tracer \ + --disable function_graph_tracer \ + --disable stack_tracer --enable lto_menu \ + --disable lto_disable \ + --disable gcov \ + --disable kallsyms_all \ + --disable modversions +make oldconfig + +Then you can build with + +# The COMPILER_PATH is needed to let gcc use the new binutils +# as the LTO plugin linker +# if you installed gcc in a separate directory like below also +# add it to the PATH line below before the regular $PATH +# The COMPILER_PATH setting is only needed if the gcc was not built +# with --with-plugin-ld pointing to the Linux binutils ld +# The AR/NM setting works around a Makefile bug +COMPILER_PATH=/opt/binutils-VERSION/bin PATH=$COMPILER_PATH:$PATH \ +make -j$(getconf _NPROCESSORS_ONLN) AR=gcc-ar NM=gcc-nm + +If you don't have gcc 4.8+ as system compiler you would also need +to install that compiler. In this case I recommend getting +a gcc 4.9+ snapshot from http://gcc.gnu.org (or release when available), +as it builds much faster for LTO than 4.8. + +Here's an example build procedure: + +Assuming gcc is unpacked in gcc-VERSION + +cd gcc-VERSION +./contrib/download_preqrequisites +cd .. + +mkdir obj-gcc +# please don't skip this cd. the build will not work correctly in the +# source dir, you have to use the separate object dir +cd obj-gcc +../gcc-VERSION/configure --prefix=/opt/gcc-VERSION --enable-lto \ +--with-plugin-ld=/opt/binutils-VERSION/bin/ld +--disable-nls --enable-languages=c,c++ \ +--disable-libstdcxx-pch +make -j$(getconf _NPROCESSORS_ONLN) +sudo make install-no-fixedincludes + +FAQs: + +Q: I get a section type attribute conflict +A: Usually because of someone doing +const __initdata (should be const __initconst) or const __read_mostly +(should be just const). Check both symbols reported by gcc. + +Q: I see lots of undefined symbols for memcmp etc. +A: Usually because NM=gcc-nm AR=gcc-ar are missing. +The Makefile tries to set those automatically, but it doesn't always +work. Better to set it manually on the make command line. + +Q: It's quite slow / uses too much memory. +A: Consider a gcc 4.9 snapshot/release (not released yet) +The main problem in 4.8 is the type merging in the single threaded WPA pass, +which has been improved considerably in 4.9 by running it distributed. + +Q: It's still slow +A: It'll always be somewhat slower than non LTO sorry. + +Q: What's up with .XXXXX numeric post fixes +A: This is due LTO turning (near) all symbols to static +Use gcc 4.9, it avoids them in most cases. They are also filtered out +in kallsyms. + +References: + +Presentation on Kernel LTO +(note, performance numbers/details outdated. In particular gcc 4.9 fixed +most of the build time problems): +http://halobates.de/kernel-lto.pdf + +Generic gcc LTO: +http://www.ucw.cz/~hubicka/slides/labs2013.pdf +http://www.hipeac.net/system/files/barcelona.pdf + +Somewhat outdated too: +http://gcc.gnu.org/projects/lto/lto.pdf +http://gcc.gnu.org/projects/lto/whopr.pdf + +Happy Link-Time-Optimizing! + +Andi Kleen @@ -349,9 +349,14 @@ include $(srctree)/scripts/Kbuild.include AS = $(CROSS_COMPILE)as LD = $(CROSS_COMPILE)ld +LDFINAL = $(LD) CC = $(CROSS_COMPILE)gcc CPP = $(CC) -E +ifdef CONFIG_LTO +AR = $(CROSS_COMPILE)gcc-ar +else AR = $(CROSS_COMPILE)ar +endif NM = $(CROSS_COMPILE)nm STRIP = $(CROSS_COMPILE)strip OBJCOPY = $(CROSS_COMPILE)objcopy @@ -410,7 +415,7 @@ KERNELVERSION = $(VERSION)$(if $(PATCHLEVEL),.$(PATCHLEVEL)$(if $(SUBLEVEL),.$(S export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC -export CPP AR NM STRIP OBJCOPY OBJDUMP +export CPP AR NM STRIP OBJCOPY OBJDUMP LDFINAL export MAKE AWK GENKSYMS INSTALLKERNEL PERL UTS_MACHINE export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS @@ -421,6 +426,17 @@ export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL export KBUILD_ARFLAGS +ifdef CONFIG_LTO +# LTO gcc creates a lot of files in TMPDIR, and with /tmp as tmpfs +# it's easy to drive the machine OOM. Use the object directory +# instead. +ifndef TMPDIR +TMPDIR ?= $(objtree) +export TMPDIR +$(info setting TMPDIR=$(objtree) for LTO build) +endif +endif + # When compiling out-of-tree modules, put MODVERDIR in the module # tree rather than in the kernel tree. The kernel tree might # even be read-only. @@ -731,6 +747,8 @@ ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y) KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO endif +include ${srctree}/scripts/Makefile.lto + # Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments KBUILD_CPPFLAGS += $(KCPPFLAGS) KBUILD_AFLAGS += $(KAFLAGS) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 25d2c6f7325e..895560b1fd24 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -533,7 +533,7 @@ config X86_32_IRIS config SCHED_OMIT_FRAME_POINTER def_bool y - prompt "Single-depth WCHAN output" + prompt "Single-depth WCHAN output" if !LTO && !FRAME_POINTER depends on X86 ---help--- Calculate simpler /proc/<PID>/wchan values. If this option diff --git a/init/Kconfig b/init/Kconfig index 765018c24cf9..03260b759197 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1255,6 +1255,77 @@ config CC_OPTIMIZE_FOR_SIZE If unsure, say N. +config LTO_MENU + bool "Enable gcc link time optimization (LTO)" + # Only tested on X86 for now. For other architectures you likely + # have to fix some things first, like adding asmlinkages etc. + depends on X86 + # lto does not support excluding flags for specific files + # right now. Can be removed if that is fixed. + depends on !FUNCTION_TRACER + help + With this option gcc will do whole program optimizations for + the whole kernel and module. This increases compile time, but can + lead to better code. It allows gcc to inline functions between + different files and do other optimization. It might also trigger + bugs due to more aggressive optimization. It allows gcc to drop unused + code. On smaller monolithic kernel configurations + it usually leads to smaller kernels, especially when modules + are disabled. + + With this option gcc will also do some global checking over + different source files. It also disables a number of kernel + features. + + This option is recommended for release builds. With LTO + the kernel always has to be re-optimized (but not re-parsed) + on each build. + + This requires a gcc 4.8 or later compiler and + Linux binutils 2.21.51.0.3 or later. gcc 4.9 builds significantly + faster than 4.8 It does not currently work with a FSF release of + binutils or with the gold linker. + + On larger configurations this may need more than 4GB of RAM. + It will likely not work on those with a 32bit compiler. + + When the toolchain support is not available this will (hopefully) + be automatically disabled. + + For more information see Documentation/lto-build + +config LTO_DISABLE + bool "Disable LTO again" + depends on LTO_MENU + default n + help + This option is merely here so that allyesconfig or allmodconfig do + not enable LTO. If you want to actually use LTO do not enable. + +config LTO + bool + default y + depends on LTO_MENU && !LTO_DISABLE + +config LTO_DEBUG + bool "Enable LTO compile time debugging" + depends on LTO + help + Enable LTO debugging in the compiler. The compiler dumps + some log files that make it easier to figure out LTO + behavior. The log files also allow to reconstruct + the global inlining and a global callgraph. + They however add some (single threaded) cost to the + compilation. When in doubt do not enable. + +config LTO_CP_CLONE + bool "Allow aggressive cloning for function specialization" + depends on LTO + help + Allow the compiler to clone and specialize functions for specific + arguments when it determines these arguments are very commonly + called. Experimential. Will increase text size. + config SYSCTL bool @@ -1744,6 +1815,8 @@ config MODULE_FORCE_UNLOAD config MODVERSIONS bool "Module versioning support" + # LTO should work with gcc 4.9 + depends on !LTO help Usually, you have to use modules compiled with your kernel. Saying Y here makes it sometimes possible to use modules diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index d04ce8ac4399..32f65b7aed46 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig @@ -2,7 +2,7 @@ menu "GCOV-based kernel profiling" config GCOV_KERNEL bool "Enable gcov-based kernel profiling" - depends on DEBUG_FS + depends on DEBUG_FS && !LTO select CONSTRUCTORS if !UML default n ---help--- diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 140b66a874c1..c653d645feea 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -180,7 +180,7 @@ config STRIP_ASM_SYMS config READABLE_ASM bool "Generate readable assembler code" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && !LTO help Disable some compiler optimizations that tend to generate human unreadable assembler output. This may make the kernel slightly slower, but it helps diff --git a/scripts/Makefile.lto b/scripts/Makefile.lto new file mode 100644 index 000000000000..b8e9e4836f4b --- /dev/null +++ b/scripts/Makefile.lto @@ -0,0 +1,84 @@ +# +# Support for gcc link time optimization +# + +DISABLE_LTO := +LTO_CFLAGS := + +export DISABLE_LTO +export LTO_CFLAGS + +ifdef CONFIG_LTO +# 4.7 works mostly, but it sometimes loses symbols on large builds +# This can be worked around by marking those symbols visible, +# but that is fairly ugly and the problem is gone with 4.8 +# So only allow it with 4.8 for now. +ifeq ($(call cc-ifversion, -ge, 0408,y),y) +ifneq ($(call cc-option,${LTO_CFLAGS},n),n) +# We need HJ Lu's Linux binutils because mainline binutils does not +# support mixing assembler and LTO code in the same ld -r object. +# XXX check if the gcc plugin ld is the expected one too +# XXX some Fedora binutils should also support it. How to check for that? +ifeq ($(call ld-ifversion,-ge,22710001,y),y) + LTO_CFLAGS := -flto -fno-toplevel-reorder + LTO_FINAL_CFLAGS := -fuse-linker-plugin + +# the -fno-toplevel-reorder is to preserve the order of initcalls +# everything else should tolerate reordering + LTO_FINAL_CFLAGS +=-fno-toplevel-reorder + +# enable LTO and set the jobs used by the LTO phase +# this should be -flto=jobserver to coordinate with the +# parent make, but work around +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50639 +# use as many jobs as processors are online for now +# this actually seems to be a kernel bug with the pipe code + LTO_FINAL_CFLAGS := -flto=$(shell getconf _NPROCESSORS_ONLN) + #LTO_FINAL_CFLAGS := -flto=jobserver + + # requires plugin ar passed and very recent HJ binutils + LTO_CFLAGS += -fno-fat-lto-objects + +# Used to disable LTO for specific files (e.g. vdso) + DISABLE_LTO := -fno-lto + + LTO_FINAL_CFLAGS += ${LTO_CFLAGS} -fwhole-program + +ifdef CONFIG_LTO_DEBUG + LTO_FINAL_CFLAGS += -dH -fdump-ipa-cgraph -fdump-ipa-inline-details + # -Wl,-plugin-save-temps -save-temps + LTO_CFLAGS += +endif +ifdef CONFIG_LTO_CP_CLONE + LTO_FINAL_CFLAGS += -fipa-cp-clone + LTO_CFLAGS += -fipa-cp-clone +endif + + # In principle gcc should pass through options in the object files, + # but it doesn't always work. So do it here manually + # Note that special options for individual files does not + # work currently (except for some special cases that only + # affect the compiler frontend) + # The main offenders are FTRACE and GCOV -- we exclude + # those in the config. + LTO_FINAL_CFLAGS += $(filter -g%,${KBUILD_CFLAGS}) + LTO_FINAL_CFLAGS += $(filter -O%,${KBUILD_CFLAGS}) + LTO_FINAL_CFLAGS += $(filter -f%,${KBUILD_CFLAGS}) + LTO_FINAL_CFLAGS += $(filter -m%,${KBUILD_CFLAGS}) + LTO_FINAL_CFLAGS += $(filter -W%,${KBUILD_CFLAGS}) + + KBUILD_CFLAGS += ${LTO_CFLAGS} + + LDFINAL := ${CONFIG_SHELL} ${srctree}/scripts/gcc-ld \ + ${LTO_FINAL_CFLAGS} + +else + $(warning "WARNING: Too old linker version $(call ld-version) for kernel LTO. You need Linux binutils. CONFIG_LTO disabled.") +endif +else + $(warning "WARNING: Compiler/Linker does not support LTO/WHOPR with linker plugin. CONFIG_LTO disabled.") +endif +else + $(warning "WARNING: GCC $(call cc-version) too old for LTO/WHOPR. CONFIG_LTO disabled") +endif +endif diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 69f0a1417e9a..9c40daea846c 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -77,7 +77,8 @@ modpost = scripts/mod/modpost \ $(if $(KBUILD_EXTRA_SYMBOLS), $(patsubst %, -e %,$(KBUILD_EXTRA_SYMBOLS))) \ $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ $(if $(CONFIG_DEBUG_SECTION_MISMATCH),,-S) \ - $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) + $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) \ + $(if $(CONFIG_LTO),-w) MODPOST_OPT=$(subst -i,-n,$(filter -i,$(MAKEFLAGS))) @@ -115,8 +116,8 @@ $(modules:.ko=.mod.o): %.mod.o: %.mod.c FORCE targets += $(modules:.ko=.mod.o) # Step 6), final link of the modules -quiet_cmd_ld_ko_o = LD [M] $@ - cmd_ld_ko_o = $(LD) -r $(LDFLAGS) \ +quiet_cmd_ld_ko_o = LDFINAL [M] $@ + cmd_ld_ko_o = $(LDFINAL) -r $(LDFLAGS) \ $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \ -o $@ $(filter-out FORCE,$^) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 86a4fe75f453..ec9a8ae33f8f 100644 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -53,7 +53,7 @@ vmlinux_link() local lds="${objtree}/${KBUILD_LDS}" if [ "${SRCARCH}" != "um" ]; then - ${LD} ${LDFLAGS} ${LDFLAGS_vmlinux} -o ${2} \ + ${LDFINAL} ${LDFLAGS} ${LDFLAGS_vmlinux} -o ${2} \ -T ${lds} ${KBUILD_VMLINUX_INIT} \ --start-group ${KBUILD_VMLINUX_MAIN} --end-group ${1} else |