From 227a06553fe6c785f23d76eece3bb10e2db5059c Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:53 -0800 Subject: tools/objtool: Check for use of the ENQCMD instruction in the kernel The ENQCMD instruction implicitly accesses the PASID_MSR to fill in the pasid field of the descriptor being submitted to an accelerator. But there is no precise (and stable across kernel changes) point at which the PASID_MSR is updated from the value for one task to the next. Kernel code that uses accelerators must always use the ENQCMDS instruction which does not access the PASID_MSR. Check for use of the ENQCMD instruction in the kernel and warn on its usage. Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220207230254.3342514-11-fenghua.yu@intel.com Signed-off-by: Peter Zijlstra --- tools/objtool/arch/x86/decode.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index c10ef78df050..479e769ca324 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -112,7 +112,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec const struct elf *elf = file->elf; struct insn insn; int x86_64, ret; - unsigned char op1, op2, + unsigned char op1, op2, op3, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0; @@ -139,6 +139,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec op1 = insn.opcode.bytes[0]; op2 = insn.opcode.bytes[1]; + op3 = insn.opcode.bytes[2]; if (insn.rex_prefix.nbytes) { rex = insn.rex_prefix.bytes[0]; @@ -491,6 +492,14 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec /* nopl/nopw */ *type = INSN_NOP; + } else if (op2 == 0x38 && op3 == 0xf8) { + if (insn.prefixes.nbytes == 1 && + insn.prefixes.bytes[0] == 0xf2) { + /* ENQCMD cannot be used in the kernel. */ + WARN("ENQCMD instruction at %s:%lx", sec->name, + offset); + } + } else if (op2 == 0xa0 || op2 == 0xa8) { /* push fs/gs */ -- cgit v1.2.3 From b44544fe0298ee2224960a31f795e317029e2a60 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:12 +0100 Subject: static_call: Avoid building empty .static_call_sites Without CONFIG_HAVE_STATIC_CALL_INLINE there's no point in creating the .static_call_sites section and it's related symbols. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154317.223798256@infradead.org --- include/asm-generic/vmlinux.lds.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 42f3866bca69..a41e62355160 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -404,6 +404,7 @@ KEEP(*(__jump_table)) \ __stop___jump_table = .; +#ifdef CONFIG_HAVE_STATIC_CALL_INLINE #define STATIC_CALL_DATA \ . = ALIGN(8); \ __start_static_call_sites = .; \ @@ -412,6 +413,9 @@ __start_static_call_tramp_key = .; \ KEEP(*(.static_call_tramp_key)) \ __stop_static_call_tramp_key = .; +#else +#define STATIC_CALL_DATA +#endif /* * Allow architectures to handle ro_after_init data on their -- cgit v1.2.3 From f2d3a250897133cc36c13a641bd6a9b4dd5ad234 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:13 +0100 Subject: objtool: Add --dry-run Add a --dry-run argument to skip writing the modifications. This is convenient for debugging. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kees Cook Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154317.282720146@infradead.org --- tools/objtool/builtin-check.c | 3 ++- tools/objtool/elf.c | 3 +++ tools/objtool/include/objtool/builtin.h | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 38070f26105b..853af934c9fd 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -20,7 +20,7 @@ #include bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, - validate_dup, vmlinux, mcount, noinstr, backup, sls; + validate_dup, vmlinux, mcount, noinstr, backup, sls, dryrun; static const char * const check_usage[] = { "objtool check [] file.o", @@ -46,6 +46,7 @@ const struct option check_options[] = { OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"), OPT_BOOLEAN('B', "backup", &backup, "create .orig files before modification"), OPT_BOOLEAN('S', "sls", &sls, "validate straight-line-speculation"), + OPT_BOOLEAN(0, "dry-run", &dryrun, "don't write the modifications"), OPT_END(), }; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 4b384c907027..456ac2206404 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -1019,6 +1019,9 @@ int elf_write(struct elf *elf) struct section *sec; Elf_Scn *s; + if (dryrun) + return 0; + /* Update changed relocation sections and section headers: */ list_for_each_entry(sec, &elf->sections, list) { if (sec->changed) { diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 89ba869ed08f..7b4b124b9032 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -9,7 +9,7 @@ extern const struct option check_options[]; extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, - validate_dup, vmlinux, mcount, noinstr, backup, sls; + validate_dup, vmlinux, mcount, noinstr, backup, sls, dryrun; extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]); -- cgit v1.2.3 From 1ffbe4e935f9b7308615c75be990aec07464d1e7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:14 +0100 Subject: objtool: Default ignore INT3 for unreachable Ignore all INT3 instructions for unreachable code warnings, similar to NOP. This allows using INT3 for various paddings instead of NOPs. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154317.343312938@infradead.org --- tools/objtool/check.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 7c33ec67c4a9..311bfc6922c1 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3115,9 +3115,8 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, switch (insn->type) { case INSN_RETURN: - if (next_insn && next_insn->type == INSN_TRAP) { - next_insn->ignore = true; - } else if (sls && !insn->retpoline_safe) { + if (sls && !insn->retpoline_safe && + next_insn && next_insn->type != INSN_TRAP) { WARN_FUNC("missing int3 after ret", insn->sec, insn->offset); } @@ -3164,9 +3163,8 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; case INSN_JUMP_DYNAMIC: - if (next_insn && next_insn->type == INSN_TRAP) { - next_insn->ignore = true; - } else if (sls && !insn->retpoline_safe) { + if (sls && !insn->retpoline_safe && + next_insn && next_insn->type != INSN_TRAP) { WARN_FUNC("missing int3 after indirect jump", insn->sec, insn->offset); } @@ -3337,7 +3335,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio int i; struct instruction *prev_insn; - if (insn->ignore || insn->type == INSN_NOP) + if (insn->ignore || insn->type == INSN_NOP || insn->type == INSN_TRAP) return true; /* -- cgit v1.2.3 From 537da1ed54658e916141e50923a7f5b20c728856 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:15 +0100 Subject: objtool,efi: Update __efi64_thunk annotation The current annotation relies on not running objtool on the file; this won't work when running objtool on vmlinux.o. Instead explicitly mark __efi64_thunk() to be ignored. This preserves the status quo, which is somewhat unfortunate. Luckily this code is hardly ever used. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154317.402118218@infradead.org --- arch/x86/platform/efi/Makefile | 1 - arch/x86/platform/efi/efi_thunk_64.S | 6 ++++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile index 84b09c230cbd..a50245157685 100644 --- a/arch/x86/platform/efi/Makefile +++ b/arch/x86/platform/efi/Makefile @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y KASAN_SANITIZE := n GCOV_PROFILE := n diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index 25799d768624..854dd81804b7 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -20,12 +20,14 @@ */ #include +#include #include #include .text .code64 -SYM_CODE_START(__efi64_thunk) +SYM_FUNC_START(__efi64_thunk) +STACK_FRAME_NON_STANDARD __efi64_thunk push %rbp push %rbx @@ -79,7 +81,7 @@ SYM_CODE_START(__efi64_thunk) 2: pushl $__KERNEL_CS pushl %ebp lret -SYM_CODE_END(__efi64_thunk) +SYM_FUNC_END(__efi64_thunk) .bss .balign 8 -- cgit v1.2.3 From 5cff2086b01526b8c7deacc86473ffbab0cddfa9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:16 +0100 Subject: objtool: Have WARN_FUNC fall back to sym+off Currently WARN_FUNC() either prints func+off and failing that prints sec+off, add an intermediate sym+off. This is useful when playing around with entry code. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154317.461283840@infradead.org --- tools/objtool/include/objtool/warn.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/objtool/include/objtool/warn.h b/tools/objtool/include/objtool/warn.h index d99c4675e4a5..802cfda0a6f6 100644 --- a/tools/objtool/include/objtool/warn.h +++ b/tools/objtool/include/objtool/warn.h @@ -22,6 +22,8 @@ static inline char *offstr(struct section *sec, unsigned long offset) unsigned long name_off; func = find_func_containing(sec, offset); + if (!func) + func = find_symbol_containing(sec, offset); if (func) { name = func->name; name_off = offset - func->offset; -- cgit v1.2.3 From 156ff4a544ae13c5fd6759a09ecb069f7059c1a1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:17 +0100 Subject: x86/ibt: Base IBT bits Add Kconfig, Makefile and basic instruction support for x86 IBT. (Ab)use __DISABLE_EXPORTS to disable IBT since it's already employed to mark compressed and purgatory. Additionally mark realmode with it as well to avoid inserting ENDBR instructions there. While ENDBR is technically a NOP, inserting them was causing some grief due to code growth. There's also a problem with using __noendbr in code compiled without -fcf-protection=branch. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154317.519875203@infradead.org --- arch/x86/Kconfig | 20 +++++++++++ arch/x86/Makefile | 16 +++++++-- arch/x86/include/asm/ibt.h | 87 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 121 insertions(+), 2 deletions(-) create mode 100644 arch/x86/include/asm/ibt.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9f5bd41bf660..19d16c054a96 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1861,6 +1861,26 @@ config X86_UMIP specific cases in protected and virtual-8086 modes. Emulated results are dummy. +config CC_HAS_IBT + # GCC >= 9 and binutils >= 2.29 + # Retpoline check to work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93654 + # Clang/LLVM >= 14 + # fentry check to work around https://reviews.llvm.org/D111108 + def_bool ((CC_IS_GCC && $(cc-option, -fcf-protection=branch -mindirect-branch-register)) || \ + (CC_IS_CLANG && $(success,echo "void a(void) {}" | $(CC) -Werror $(CLANG_FLAGS) -fcf-protection=branch -mfentry -pg -x c - -c -o /dev/null))) && \ + $(as-instr,endbr64) + +config X86_KERNEL_IBT + prompt "Indirect Branch Tracking" + bool + depends on X86_64 && CC_HAS_IBT + help + Build the kernel with support for Indirect Branch Tracking, a + hardware support course-grain forward-edge Control Flow Integrity + protection. It enforces that all indirect calls must land on + an ENDBR instruction, as such, the compiler will instrument the + code with them to make this happen. + config X86_INTEL_MEMORY_PROTECTION_KEYS prompt "Memory Protection Keys" def_bool y diff --git a/arch/x86/Makefile b/arch/x86/Makefile index e84cdd409b64..f29c2c9c3216 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -36,7 +36,7 @@ endif # How to compile the 16-bit code. Note we always compile for -march=i386; # that way we can complain to the user if the CPU is insufficient. -REALMODE_CFLAGS := -m16 -g -Os -DDISABLE_BRANCH_PROFILING \ +REALMODE_CFLAGS := -m16 -g -Os -DDISABLE_BRANCH_PROFILING -D__DISABLE_EXPORTS \ -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ -mno-mmx -mno-sse $(call cc-option,-fcf-protection=none) @@ -62,8 +62,20 @@ export BITS # KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -# Intel CET isn't enabled in the kernel +ifeq ($(CONFIG_X86_KERNEL_IBT),y) +# +# Kernel IBT has S_CET.NOTRACK_EN=0, as such the compilers must not generate +# NOTRACK prefixes. Current generation compilers unconditionally employ NOTRACK +# for jump-tables, as such, disable jump-tables for now. +# +# (jump-tables are implicitly disabled by RETPOLINE) +# +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104816 +# +KBUILD_CFLAGS += $(call cc-option,-fcf-protection=branch -fno-jump-tables) +else KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) +endif ifeq ($(CONFIG_X86_32),y) BITS := 32 diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h new file mode 100644 index 000000000000..fcaf6a41d373 --- /dev/null +++ b/arch/x86/include/asm/ibt.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IBT_H +#define _ASM_X86_IBT_H + +#include + +/* + * The rules for enabling IBT are: + * + * - CC_HAS_IBT: the toolchain supports it + * - X86_KERNEL_IBT: it is selected in Kconfig + * - !__DISABLE_EXPORTS: this is regular kernel code + * + * Esp. that latter one is a bit non-obvious, but some code like compressed, + * purgatory, realmode etc.. is built with custom CFLAGS that do not include + * -fcf-protection=branch and things will go *bang*. + * + * When all the above are satisfied, HAS_KERNEL_IBT will be 1, otherwise 0. + */ +#if defined(CONFIG_X86_KERNEL_IBT) && !defined(__DISABLE_EXPORTS) + +#define HAS_KERNEL_IBT 1 + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_X86_64 +#define ASM_ENDBR "endbr64\n\t" +#else +#define ASM_ENDBR "endbr32\n\t" +#endif + +#define __noendbr __attribute__((nocf_check)) + +static inline __attribute_const__ u32 gen_endbr(void) +{ + u32 endbr; + + /* + * Generate ENDBR64 in a way that is sure to not result in + * an ENDBR64 instruction as immediate. + */ + asm ( "mov $~0xfa1e0ff3, %[endbr]\n\t" + "not %[endbr]\n\t" + : [endbr] "=&r" (endbr) ); + + return endbr; +} + +static inline bool is_endbr(u32 val) +{ + val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */ + return val == gen_endbr(); +} + +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_X86_64 +#define ENDBR endbr64 +#else +#define ENDBR endbr32 +#endif + +#endif /* __ASSEMBLY__ */ + +#else /* !IBT */ + +#define HAS_KERNEL_IBT 0 + +#ifndef __ASSEMBLY__ + +#define ASM_ENDBR + +#define __noendbr + +static inline bool is_endbr(u32 val) { return false; } + +#else /* __ASSEMBLY__ */ + +#define ENDBR + +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_X86_KERNEL_IBT */ + +#define ENDBR_INSN_SIZE (4*HAS_KERNEL_IBT) + +#endif /* _ASM_X86_IBT_H */ -- cgit v1.2.3 From c8c301abeae58ec756b8fcb2178a632bd3c9e284 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:18 +0100 Subject: x86/ibt: Add ANNOTATE_NOENDBR In order to have objtool warn about code references to !ENDBR instruction, we need an annotation to allow this for non-control-flow instances -- consider text range checks, text patching, or return trampolines etc. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kees Cook Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154317.578968224@infradead.org --- include/linux/objtool.h | 16 ++++++++++++++++ tools/include/linux/objtool.h | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/include/linux/objtool.h b/include/linux/objtool.h index aca52db2f3f3..f797368820c8 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -77,6 +77,12 @@ struct unwind_hint { #define STACK_FRAME_NON_STANDARD_FP(func) #endif +#define ANNOTATE_NOENDBR \ + "986: \n\t" \ + ".pushsection .discard.noendbr\n\t" \ + _ASM_PTR " 986b\n\t" \ + ".popsection\n\t" + #else /* __ASSEMBLY__ */ /* @@ -129,6 +135,13 @@ struct unwind_hint { .popsection .endm +.macro ANNOTATE_NOENDBR +.Lhere_\@: + .pushsection .discard.noendbr + .quad .Lhere_\@ + .popsection +.endm + #endif /* __ASSEMBLY__ */ #else /* !CONFIG_STACK_VALIDATION */ @@ -139,12 +152,15 @@ struct unwind_hint { "\n\t" #define STACK_FRAME_NON_STANDARD(func) #define STACK_FRAME_NON_STANDARD_FP(func) +#define ANNOTATE_NOENDBR #else #define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm +.macro ANNOTATE_NOENDBR +.endm #endif #endif /* CONFIG_STACK_VALIDATION */ diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h index aca52db2f3f3..f797368820c8 100644 --- a/tools/include/linux/objtool.h +++ b/tools/include/linux/objtool.h @@ -77,6 +77,12 @@ struct unwind_hint { #define STACK_FRAME_NON_STANDARD_FP(func) #endif +#define ANNOTATE_NOENDBR \ + "986: \n\t" \ + ".pushsection .discard.noendbr\n\t" \ + _ASM_PTR " 986b\n\t" \ + ".popsection\n\t" + #else /* __ASSEMBLY__ */ /* @@ -129,6 +135,13 @@ struct unwind_hint { .popsection .endm +.macro ANNOTATE_NOENDBR +.Lhere_\@: + .pushsection .discard.noendbr + .quad .Lhere_\@ + .popsection +.endm + #endif /* __ASSEMBLY__ */ #else /* !CONFIG_STACK_VALIDATION */ @@ -139,12 +152,15 @@ struct unwind_hint { "\n\t" #define STACK_FRAME_NON_STANDARD(func) #define STACK_FRAME_NON_STANDARD_FP(func) +#define ANNOTATE_NOENDBR #else #define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm +.macro ANNOTATE_NOENDBR +.endm #endif #endif /* CONFIG_STACK_VALIDATION */ -- cgit v1.2.3 From bbf92368b0b1fe472d489e62d3340d7897e9c697 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:19 +0100 Subject: x86/text-patching: Make text_gen_insn() play nice with ANNOTATE_NOENDBR Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154317.638561109@infradead.org --- arch/x86/include/asm/text-patching.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index b7421780e4e9..1c4cfb1c6e4f 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -101,13 +101,21 @@ void *text_gen_insn(u8 opcode, const void *addr, const void *dest) static union text_poke_insn insn; /* per instance */ int size = text_opcode_size(opcode); + /* + * Hide the addresses to avoid the compiler folding in constants when + * referencing code, these can mess up annotations like + * ANNOTATE_NOENDBR. + */ + OPTIMIZER_HIDE_VAR(addr); + OPTIMIZER_HIDE_VAR(dest); + insn.opcode = opcode; if (size > 1) { insn.disp = (long)dest - (long)(addr + size); if (size == 2) { /* - * Ensure that for JMP9 the displacement + * Ensure that for JMP8 the displacement * actually fits the signed byte. */ BUG_ON((insn.disp >> 31) != (insn.disp >> 7)); -- cgit v1.2.3 From ba27d1a80871eb8dbeddf34ec7d396c149cbb8d7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:20 +0100 Subject: x86/ibt,paravirt: Use text_gen_insn() for paravirt_patch() Less duplication is more better. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154317.697253958@infradead.org --- arch/x86/include/asm/text-patching.h | 20 ++++++++++++++------ arch/x86/kernel/paravirt.c | 23 +++-------------------- 2 files changed, 17 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 1c4cfb1c6e4f..c6015b407461 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -96,32 +96,40 @@ union text_poke_insn { }; static __always_inline -void *text_gen_insn(u8 opcode, const void *addr, const void *dest) +void __text_gen_insn(void *buf, u8 opcode, const void *addr, const void *dest, int size) { - static union text_poke_insn insn; /* per instance */ - int size = text_opcode_size(opcode); + union text_poke_insn *insn = buf; + + BUG_ON(size < text_opcode_size(opcode)); /* * Hide the addresses to avoid the compiler folding in constants when * referencing code, these can mess up annotations like * ANNOTATE_NOENDBR. */ + OPTIMIZER_HIDE_VAR(insn); OPTIMIZER_HIDE_VAR(addr); OPTIMIZER_HIDE_VAR(dest); - insn.opcode = opcode; + insn->opcode = opcode; if (size > 1) { - insn.disp = (long)dest - (long)(addr + size); + insn->disp = (long)dest - (long)(addr + size); if (size == 2) { /* * Ensure that for JMP8 the displacement * actually fits the signed byte. */ - BUG_ON((insn.disp >> 31) != (insn.disp >> 7)); + BUG_ON((insn->disp >> 31) != (insn->disp >> 7)); } } +} +static __always_inline +void *text_gen_insn(u8 opcode, const void *addr, const void *dest) +{ + static union text_poke_insn insn; /* per instance */ + __text_gen_insn(&insn, opcode, addr, dest, text_opcode_size(opcode)); return &insn.text; } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 4420499f7bb4..06af2cf5181c 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -69,29 +69,12 @@ noinstr void paravirt_BUG(void) BUG(); } -struct branch { - unsigned char opcode; - u32 delta; -} __attribute__((packed)); - static unsigned paravirt_patch_call(void *insn_buff, const void *target, unsigned long addr, unsigned len) { - const int call_len = 5; - struct branch *b = insn_buff; - unsigned long delta = (unsigned long)target - (addr+call_len); - - if (len < call_len) { - pr_warn("paravirt: Failed to patch indirect CALL at %ps\n", (void *)addr); - /* Kernel might not be viable if patching fails, bail out: */ - BUG_ON(1); - } - - b->opcode = 0xe8; /* call */ - b->delta = delta; - BUILD_BUG_ON(sizeof(*b) != call_len); - - return call_len; + __text_gen_insn(insn_buff, CALL_INSN_OPCODE, + (void *)addr, target, CALL_INSN_SIZE); + return CALL_INSN_SIZE; } #ifdef CONFIG_PARAVIRT_XXL -- cgit v1.2.3 From 6cf3e4c0d29102c74aca1ce0c1710be9d02e440e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:21 +0100 Subject: x86/entry: Cleanup PARAVIRT Since commit 5c8f6a2e316e ("x86/xen: Add xenpv_restore_regs_and_return_to_usermode()") Xen will no longer reach this code and we can do away with the paravirt SWAPGS/INTERRUPT_RETURN. Suggested-by: Andrew Cooper Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154317.756014488@infradead.org --- arch/x86/entry/entry_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 466df3e50276..22e4e9aafc34 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -608,8 +608,8 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) /* Restore RDI. */ popq %rdi - SWAPGS - INTERRUPT_RETURN + swapgs + jmp native_iret SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL) -- cgit v1.2.3 From 8b87d8cec1b31ea710568ae49ba5f5146318da0d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:22 +0100 Subject: x86/entry,xen: Early rewrite of restore_regs_and_return_to_kernel() By doing an early rewrite of 'jmp native_iret` in restore_regs_and_return_to_kernel() we can get rid of the last INTERRUPT_RETURN user and paravirt_iret. Suggested-by: Andrew Cooper Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154317.815039833@infradead.org --- arch/x86/entry/entry_64.S | 11 ++++++++--- arch/x86/include/asm/irqflags.h | 5 ----- arch/x86/include/asm/paravirt_types.h | 1 - arch/x86/kernel/head_64.S | 3 ++- arch/x86/kernel/paravirt.c | 4 ---- arch/x86/xen/enlighten_pv.c | 7 ++++++- arch/x86/xen/xen-asm.S | 1 + 7 files changed, 17 insertions(+), 15 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 22e4e9aafc34..f731393b5af4 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -609,7 +609,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) /* Restore RDI. */ popq %rdi swapgs - jmp native_iret + jmp .Lnative_iret SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL) @@ -626,9 +626,14 @@ SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL) * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization * when returning from IPI handler. */ - INTERRUPT_RETURN +#ifdef CONFIG_XEN_PV +SYM_INNER_LABEL(early_xen_iret_patch, SYM_L_GLOBAL) + ANNOTATE_NOENDBR + .byte 0xe9 + .long .Lnative_iret - (. + 4) +#endif -SYM_INNER_LABEL_ALIGN(native_iret, SYM_L_GLOBAL) +.Lnative_iret: UNWIND_HINT_IRET_REGS /* * Are we returning to a stack segment from the LDT? Note: in diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 87761396e8cc..111104d1c2cd 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -141,13 +141,8 @@ static __always_inline void arch_local_irq_restore(unsigned long flags) #ifdef CONFIG_X86_64 #ifdef CONFIG_XEN_PV #define SWAPGS ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV -#define INTERRUPT_RETURN \ - ANNOTATE_RETPOLINE_SAFE; \ - ALTERNATIVE_TERNARY("jmp *paravirt_iret(%rip);", \ - X86_FEATURE_XENPV, "jmp xen_iret;", "jmp native_iret;") #else #define SWAPGS swapgs -#define INTERRUPT_RETURN jmp native_iret #endif #endif #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index a69012e1903f..7cd2874628a0 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -272,7 +272,6 @@ struct paravirt_patch_template { extern struct pv_info pv_info; extern struct paravirt_patch_template pv_ops; -extern void (*paravirt_iret)(void); #define PARAVIRT_PATCH(x) \ (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 9c63fc5988cd..023761cd6903 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -345,7 +345,6 @@ SYM_CODE_START_NOALIGN(vc_boot_ghcb) /* Remove Error Code */ addq $8, %rsp - /* Pure iret required here - don't use INTERRUPT_RETURN */ iretq SYM_CODE_END(vc_boot_ghcb) #endif @@ -426,6 +425,8 @@ SYM_CODE_END(early_idt_handler_common) * early_idt_handler_array can't be used because it returns via the * paravirtualized INTERRUPT_RETURN and pv-ops don't work that early. * + * XXX it does, fix this. + * * This handler will end up in the .init.text section and not be * available to boot secondary CPUs. */ diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 06af2cf5181c..6ecbbb07b650 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -132,8 +132,6 @@ void paravirt_set_sched_clock(u64 (*func)(void)) } /* These are in entry.S */ -extern void native_iret(void); - static struct resource reserve_ioports = { .start = 0, .end = IO_SPACE_LIMIT, @@ -397,8 +395,6 @@ struct paravirt_patch_template pv_ops = { #ifdef CONFIG_PARAVIRT_XXL NOKPROBE_SYMBOL(native_load_idt); - -void (*paravirt_iret)(void) = native_iret; #endif EXPORT_SYMBOL(pv_ops); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index d47c3d176ae4..3c7a55c43f13 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1177,6 +1177,8 @@ static void __init xen_domu_set_legacy_features(void) x86_platform.legacy.rtc = 0; } +extern void early_xen_iret_patch(void); + /* First C function to be called on Xen boot */ asmlinkage __visible void __init xen_start_kernel(void) { @@ -1187,6 +1189,10 @@ asmlinkage __visible void __init xen_start_kernel(void) if (!xen_start_info) return; + __text_gen_insn(&early_xen_iret_patch, + JMP32_INSN_OPCODE, &early_xen_iret_patch, &xen_iret, + JMP32_INSN_SIZE); + xen_domain_type = XEN_PV_DOMAIN; xen_start_flags = xen_start_info->flags; @@ -1195,7 +1201,6 @@ asmlinkage __visible void __init xen_start_kernel(void) /* Install Xen paravirt ops */ pv_info = xen_info; pv_ops.cpu = xen_cpu_ops.cpu; - paravirt_iret = xen_iret; xen_init_irq_ops(); /* diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index e730e6200e64..ee17b94e3fc0 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -189,6 +189,7 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 */ SYM_CODE_START(xen_iret) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR pushq $0 jmp hypercall_iret SYM_CODE_END(xen_iret) -- cgit v1.2.3 From 5b2fc51576eff811a614e33cbbd0c3cb05022892 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:23 +0100 Subject: x86/ibt,xen: Sprinkle the ENDBR Even though Xen currently doesn't advertise IBT, prepare for when it will eventually do so and sprinkle the ENDBR dust accordingly. Even though most of the entry points are IRET like, the CPL0 Hypervisor can set WAIT-FOR-ENDBR and demand ENDBR at these sites. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154317.873919996@infradead.org --- arch/x86/entry/entry_64.S | 1 + arch/x86/include/asm/segment.h | 2 +- arch/x86/kernel/head_64.S | 1 + arch/x86/xen/enlighten_pv.c | 3 +++ arch/x86/xen/xen-asm.S | 9 +++++++++ arch/x86/xen/xen-head.S | 9 +++++++-- 6 files changed, 22 insertions(+), 3 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f731393b5af4..3fd38286302d 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -809,6 +809,7 @@ SYM_CODE_END(exc_xen_hypervisor_callback) */ SYM_CODE_START(xen_failsafe_callback) UNWIND_HINT_EMPTY + ENDBR movl %ds, %ecx cmpw %cx, 0x10(%rsp) jne 1f diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index b228c9d44ee7..3a31d4ea61ea 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -283,7 +283,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) * pop %rcx; pop %r11; jmp early_idt_handler_array[i]; summing up to * max 8 bytes. */ -#define XEN_EARLY_IDT_HANDLER_SIZE 8 +#define XEN_EARLY_IDT_HANDLER_SIZE (8 + ENDBR_INSN_SIZE) #ifndef __ASSEMBLY__ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 023761cd6903..990960a8bdb4 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -383,6 +383,7 @@ SYM_CODE_START(early_idt_handler_array) .endr UNWIND_HINT_IRET_REGS offset=16 SYM_CODE_END(early_idt_handler_array) + ANNOTATE_NOENDBR // early_idt_handler_array[NUM_EXCEPTION_VECTORS] SYM_CODE_START_LOCAL(early_idt_handler_common) /* diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 3c7a55c43f13..5038edb79ad5 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -624,6 +624,9 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY(exc_coprocessor_error, false ), TRAP_ENTRY(exc_alignment_check, false ), TRAP_ENTRY(exc_simd_coprocessor_error, false ), +#ifdef CONFIG_X86_KERNEL_IBT + TRAP_ENTRY(exc_control_protection, false ), +#endif }; static bool __ref get_trap_addr(void **addr, unsigned int ist) diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index ee17b94e3fc0..caa9bc2fa100 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -122,6 +122,7 @@ SYM_FUNC_END(xen_read_cr2_direct); .macro xen_pv_trap name SYM_CODE_START(xen_\name) UNWIND_HINT_EMPTY + ENDBR pop %rcx pop %r11 jmp \name @@ -147,6 +148,9 @@ xen_pv_trap asm_exc_page_fault xen_pv_trap asm_exc_spurious_interrupt_bug xen_pv_trap asm_exc_coprocessor_error xen_pv_trap asm_exc_alignment_check +#ifdef CONFIG_X86_KERNEL_IBT +xen_pv_trap asm_exc_control_protection +#endif #ifdef CONFIG_X86_MCE xen_pv_trap asm_xenpv_exc_machine_check #endif /* CONFIG_X86_MCE */ @@ -162,6 +166,7 @@ SYM_CODE_START(xen_early_idt_handler_array) i = 0 .rept NUM_EXCEPTION_VECTORS UNWIND_HINT_EMPTY + ENDBR pop %rcx pop %r11 jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE @@ -231,6 +236,7 @@ SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode) /* Normal 64-bit system call target */ SYM_CODE_START(xen_syscall_target) UNWIND_HINT_EMPTY + ENDBR popq %rcx popq %r11 @@ -250,6 +256,7 @@ SYM_CODE_END(xen_syscall_target) /* 32-bit compat syscall target */ SYM_CODE_START(xen_syscall32_target) UNWIND_HINT_EMPTY + ENDBR popq %rcx popq %r11 @@ -267,6 +274,7 @@ SYM_CODE_END(xen_syscall32_target) /* 32-bit compat sysenter target */ SYM_CODE_START(xen_sysenter_target) UNWIND_HINT_EMPTY + ENDBR /* * NB: Xen is polite and clears TF from EFLAGS for us. This means * that we don't need to guard against single step exceptions here. @@ -290,6 +298,7 @@ SYM_CODE_END(xen_sysenter_target) SYM_CODE_START(xen_syscall32_target) SYM_CODE_START(xen_sysenter_target) UNWIND_HINT_EMPTY + ENDBR lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax pushq $0 diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 11d286529fe5..ac17196e2518 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -25,8 +25,12 @@ SYM_CODE_START(hypercall_page) .rept (PAGE_SIZE / 32) UNWIND_HINT_FUNC - .skip 31, 0x90 - RET + ANNOTATE_NOENDBR + ret + /* + * Xen will write the hypercall page, and sort out ENDBR. + */ + .skip 31, 0xcc .endr #define HYPERCALL(n) \ @@ -74,6 +78,7 @@ SYM_CODE_END(startup_xen) .pushsection .text SYM_CODE_START(asm_cpu_bringup_and_idle) UNWIND_HINT_EMPTY + ENDBR call cpu_bringup_and_idle SYM_CODE_END(asm_cpu_bringup_and_idle) -- cgit v1.2.3 From 8f93402b92d443573d310250efa0b7f352fec992 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:24 +0100 Subject: x86/ibt,entry: Sprinkle ENDBR dust Kernel entry points should be having ENDBR on for IBT configs. The SYSCALL entry points are found through taking their respective address in order to program them in the MSRs, while the exception entry points are found through UNWIND_HINT_IRET_REGS. The rule is that any UNWIND_HINT_IRET_REGS at sym+0 should have an ENDBR, see the later objtool ibt validation patch. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154317.933157479@infradead.org --- arch/x86/entry/entry_64.S | 7 +++++++ arch/x86/entry/entry_64_compat.S | 3 +++ arch/x86/include/asm/idtentry.h | 20 +++++++++++--------- arch/x86/include/asm/segment.h | 3 ++- arch/x86/kernel/head_64.S | 4 +++- arch/x86/kernel/idt.c | 5 +++-- 6 files changed, 29 insertions(+), 13 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 3fd38286302d..50b61186f8b2 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -86,6 +86,7 @@ SYM_CODE_START(entry_SYSCALL_64) UNWIND_HINT_EMPTY + ENDBR swapgs /* tss.sp2 is scratch space. */ @@ -350,6 +351,7 @@ SYM_CODE_END(ret_from_fork) .macro idtentry vector asmsym cfunc has_error_code:req SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 + ENDBR ASM_CLAC .if \has_error_code == 0 @@ -417,6 +419,7 @@ SYM_CODE_END(\asmsym) .macro idtentry_mce_db vector asmsym cfunc SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS + ENDBR ASM_CLAC pushq $-1 /* ORIG_RAX: no syscall to restart */ @@ -472,6 +475,7 @@ SYM_CODE_END(\asmsym) .macro idtentry_vc vector asmsym cfunc SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS + ENDBR ASM_CLAC /* @@ -533,6 +537,7 @@ SYM_CODE_END(\asmsym) .macro idtentry_df vector asmsym cfunc SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS offset=8 + ENDBR ASM_CLAC /* paranoid_entry returns GS information for paranoid_exit in EBX. */ @@ -1069,6 +1074,7 @@ SYM_CODE_END(error_return) */ SYM_CODE_START(asm_exc_nmi) UNWIND_HINT_IRET_REGS + ENDBR /* * We allow breakpoints in NMIs. If a breakpoint occurs, then @@ -1427,6 +1433,7 @@ SYM_CODE_END(asm_exc_nmi) */ SYM_CODE_START(ignore_sysret) UNWIND_HINT_EMPTY + ENDBR mov $-ENOSYS, %eax sysretl SYM_CODE_END(ignore_sysret) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 0051cf5c792d..35a0e69cf387 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -48,6 +48,7 @@ */ SYM_CODE_START(entry_SYSENTER_compat) UNWIND_HINT_EMPTY + ENDBR /* Interrupts are off on entry. */ SWAPGS @@ -198,6 +199,7 @@ SYM_CODE_END(entry_SYSENTER_compat) */ SYM_CODE_START(entry_SYSCALL_compat) UNWIND_HINT_EMPTY + ENDBR /* Interrupts are off on entry. */ swapgs @@ -340,6 +342,7 @@ SYM_CODE_END(entry_SYSCALL_compat) */ SYM_CODE_START(entry_INT80_compat) UNWIND_HINT_EMPTY + ENDBR /* * Interrupts are off on entry. */ diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 1345088e9902..f84280ab213c 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -5,6 +5,8 @@ /* Interrupts/Exceptions */ #include +#define IDT_ALIGN (8 * (1 + HAS_KERNEL_IBT)) + #ifndef __ASSEMBLY__ #include #include @@ -480,7 +482,7 @@ __visible noinstr void func(struct pt_regs *regs, \ /* * ASM code to emit the common vector entry stubs where each stub is - * packed into 8 bytes. + * packed into IDT_ALIGN bytes. * * Note, that the 'pushq imm8' is emitted via '.byte 0x6a, vector' because * GCC treats the local vector variable as unsigned int and would expand @@ -492,33 +494,33 @@ __visible noinstr void func(struct pt_regs *regs, \ * point is to mask off the bits above bit 7 because the push is sign * extending. */ - .align 8 + .align IDT_ALIGN SYM_CODE_START(irq_entries_start) vector=FIRST_EXTERNAL_VECTOR .rept NR_EXTERNAL_VECTORS UNWIND_HINT_IRET_REGS 0 : + ENDBR .byte 0x6a, vector jmp asm_common_interrupt - nop - /* Ensure that the above is 8 bytes max */ - . = 0b + 8 + /* Ensure that the above is IDT_ALIGN bytes max */ + .fill 0b + IDT_ALIGN - ., 1, 0xcc vector = vector+1 .endr SYM_CODE_END(irq_entries_start) #ifdef CONFIG_X86_LOCAL_APIC - .align 8 + .align IDT_ALIGN SYM_CODE_START(spurious_entries_start) vector=FIRST_SYSTEM_VECTOR .rept NR_SYSTEM_VECTORS UNWIND_HINT_IRET_REGS 0 : + ENDBR .byte 0x6a, vector jmp asm_spurious_interrupt - nop - /* Ensure that the above is 8 bytes max */ - . = 0b + 8 + /* Ensure that the above is IDT_ALIGN bytes max */ + .fill 0b + IDT_ALIGN - ., 1, 0xcc vector = vector+1 .endr SYM_CODE_END(spurious_entries_start) diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 3a31d4ea61ea..656ed6531d03 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -4,6 +4,7 @@ #include #include +#include /* * Constructor for a conventional segment GDT (or LDT) entry. @@ -275,7 +276,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) * vector has no error code (two bytes), a 'push $vector_number' (two * bytes), and a jump to the common entry code (up to five bytes). */ -#define EARLY_IDT_HANDLER_SIZE 9 +#define EARLY_IDT_HANDLER_SIZE (9 + ENDBR_INSN_SIZE) /* * xen_early_idt_handler_array is for Xen pv guests: for each entry in diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 990960a8bdb4..9b6fa760e1df 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -371,9 +371,11 @@ SYM_CODE_START(early_idt_handler_array) .rept NUM_EXCEPTION_VECTORS .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0 UNWIND_HINT_IRET_REGS + ENDBR pushq $0 # Dummy error code, to make stack frame uniform .else UNWIND_HINT_IRET_REGS offset=8 + ENDBR .endif pushq $i # 72(%rsp) Vector number jmp early_idt_handler_common @@ -381,11 +383,11 @@ SYM_CODE_START(early_idt_handler_array) i = i + 1 .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc .endr - UNWIND_HINT_IRET_REGS offset=16 SYM_CODE_END(early_idt_handler_array) ANNOTATE_NOENDBR // early_idt_handler_array[NUM_EXCEPTION_VECTORS] SYM_CODE_START_LOCAL(early_idt_handler_common) + UNWIND_HINT_IRET_REGS offset=16 /* * The stack is the hardware frame, an error code or zero, and the * vector number. diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index df0fa695bb09..7676e3444c83 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -10,6 +10,7 @@ #include #include #include +#include #define DPL0 0x0 #define DPL3 0x3 @@ -272,7 +273,7 @@ void __init idt_setup_apic_and_irq_gates(void) idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts), true); for_each_clear_bit_from(i, system_vectors, FIRST_SYSTEM_VECTOR) { - entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR); + entry = irq_entries_start + IDT_ALIGN * (i - FIRST_EXTERNAL_VECTOR); set_intr_gate(i, entry); } @@ -283,7 +284,7 @@ void __init idt_setup_apic_and_irq_gates(void) * system_vectors bitmap. Otherwise they show up in * /proc/interrupts. */ - entry = spurious_entries_start + 8 * (i - FIRST_SYSTEM_VECTOR); + entry = spurious_entries_start + IDT_ALIGN * (i - FIRST_SYSTEM_VECTOR); set_intr_gate(i, entry); } #endif -- cgit v1.2.3 From c4691712b546b93707a408368e3c57ae870a2dc4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:25 +0100 Subject: x86/linkage: Add ENDBR to SYM_FUNC_START*() Ensure the ASM functions have ENDBR on for IBT builds, this follows the ARM64 example. Unlike ARM64, we'll likely end up overwriting them with poison. Suggested-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kees Cook Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154317.992708941@infradead.org --- arch/x86/include/asm/linkage.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 030907922bd0..85865f1645bd 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -3,6 +3,7 @@ #define _ASM_X86_LINKAGE_H #include +#include #undef notrace #define notrace __attribute__((no_instrument_function)) @@ -34,5 +35,35 @@ #endif /* __ASSEMBLY__ */ +/* SYM_FUNC_START -- use for global functions */ +#define SYM_FUNC_START(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \ + ENDBR + +/* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */ +#define SYM_FUNC_START_NOALIGN(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) \ + ENDBR + +/* SYM_FUNC_START_LOCAL -- use for local functions */ +#define SYM_FUNC_START_LOCAL(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \ + ENDBR + +/* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment */ +#define SYM_FUNC_START_LOCAL_NOALIGN(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) \ + ENDBR + +/* SYM_FUNC_START_WEAK -- use for weak functions */ +#define SYM_FUNC_START_WEAK(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) \ + ENDBR + +/* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */ +#define SYM_FUNC_START_WEAK_NOALIGN(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ + ENDBR + #endif /* _ASM_X86_LINKAGE_H */ -- cgit v1.2.3 From c3b037917c6a4cbb09ab2d6ccf19f02335ad1847 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:26 +0100 Subject: x86/ibt,paravirt: Sprinkle ENDBR Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kees Cook Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154318.051635891@infradead.org --- arch/x86/include/asm/paravirt.h | 1 + arch/x86/include/asm/qspinlock_paravirt.h | 3 +++ arch/x86/kernel/kvm.c | 3 ++- arch/x86/kernel/paravirt.c | 2 ++ 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 0d76502cc6f5..964442b99245 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -666,6 +666,7 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu); ".globl " PV_THUNK_NAME(func) ";" \ ".type " PV_THUNK_NAME(func) ", @function;" \ PV_THUNK_NAME(func) ":" \ + ASM_ENDBR \ FRAME_BEGIN \ PV_SAVE_ALL_CALLER_REGS \ "call " #func ";" \ diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 1474cf96251d..892fd8c3a6f7 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -2,6 +2,8 @@ #ifndef __ASM_QSPINLOCK_PARAVIRT_H #define __ASM_QSPINLOCK_PARAVIRT_H +#include + /* * For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit * registers. For i386, however, only 1 32-bit register needs to be saved @@ -39,6 +41,7 @@ asm (".pushsection .text;" ".type " PV_UNLOCK ", @function;" ".align 4,0x90;" PV_UNLOCK ": " + ASM_ENDBR FRAME_BEGIN "push %rdx;" "mov $0x1,%eax;" diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d77481ecb0d5..79e0b8d63ffa 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -1029,10 +1029,11 @@ asm( ".global __raw_callee_save___kvm_vcpu_is_preempted;" ".type __raw_callee_save___kvm_vcpu_is_preempted, @function;" "__raw_callee_save___kvm_vcpu_is_preempted:" +ASM_ENDBR "movq __per_cpu_offset(,%rdi,8), %rax;" "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);" "setne %al;" -"ret;" +ASM_RET ".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;" ".popsection"); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 6ecbbb07b650..7ca2d46c08cc 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -41,6 +41,7 @@ extern void _paravirt_nop(void); asm (".pushsection .entry.text, \"ax\"\n" ".global _paravirt_nop\n" "_paravirt_nop:\n\t" + ASM_ENDBR ASM_RET ".size _paravirt_nop, . - _paravirt_nop\n\t" ".type _paravirt_nop, @function\n\t" @@ -50,6 +51,7 @@ asm (".pushsection .entry.text, \"ax\"\n" asm (".pushsection .entry.text, \"ax\"\n" ".global paravirt_ret0\n" "paravirt_ret0:\n\t" + ASM_ENDBR "xor %" _ASM_AX ", %" _ASM_AX ";\n\t" ASM_RET ".size paravirt_ret0, . - paravirt_ret0\n\t" -- cgit v1.2.3 From 214b9a83b617367d53680812ad09687542370b8e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:27 +0100 Subject: x86/ibt,crypto: Add ENDBR for the jump-table entries The code does: ## branch into array mov jump_table(,%rax,8), %bufp JMP_NOSPEC bufp resulting in needing to mark the jump-table entries with ENDBR. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154318.110500806@infradead.org --- arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index 80c0d22fc42c..ec35915f0901 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -195,6 +195,7 @@ crc_array: .altmacro LABEL crc_ %i .noaltmacro + ENDBR crc32q -i*8(block_0), crc_init crc32q -i*8(block_1), crc1 crc32q -i*8(block_2), crc2 @@ -204,6 +205,7 @@ LABEL crc_ %i .altmacro LABEL crc_ %i .noaltmacro + ENDBR crc32q -i*8(block_0), crc_init crc32q -i*8(block_1), crc1 # SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet @@ -237,6 +239,7 @@ LABEL crc_ %i ################################################################ LABEL crc_ 0 + ENDBR mov tmp, len cmp $128*24, tmp jae full_block -- cgit v1.2.3 From 6649fa876da4c505548b8e8945a6fc48e62e427c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:28 +0100 Subject: x86/ibt,kvm: Add ENDBR to fastops Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154318.168850084@infradead.org --- arch/x86/kvm/emulate.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5719d8cfdbd9..08c4e9c1a382 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -189,7 +189,7 @@ #define X16(x...) X8(x), X8(x) #define NR_FASTOP (ilog2(sizeof(ulong)) + 1) -#define FASTOP_SIZE 8 +#define FASTOP_SIZE (8 * (1 + HAS_KERNEL_IBT)) struct opcode { u64 flags; @@ -311,7 +311,8 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); #define __FOP_FUNC(name) \ ".align " __stringify(FASTOP_SIZE) " \n\t" \ ".type " name ", @function \n\t" \ - name ":\n\t" + name ":\n\t" \ + ASM_ENDBR #define FOP_FUNC(name) \ __FOP_FUNC(#name) @@ -433,6 +434,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); ".align 4 \n\t" \ ".type " #op ", @function \n\t" \ #op ": \n\t" \ + ASM_ENDBR \ #op " %al \n\t" \ __FOP_RET(#op) -- cgit v1.2.3 From aebfd12521d9c7d0b502cf6d06314cfbcdccfe3b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:29 +0100 Subject: x86/ibt,ftrace: Search for __fentry__ location Currently a lot of ftrace code assumes __fentry__ is at sym+0. However with Intel IBT enabled the first instruction of a function will most likely be ENDBR. Change ftrace_location() to not only return the __fentry__ location when called for the __fentry__ location, but also when called for the sym+0 location. Then audit/update all callsites of this function to consistently use these new semantics. Suggested-by: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) Acked-by: Masami Hiramatsu Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154318.227581603@infradead.org --- arch/x86/kernel/kprobes/core.c | 11 ++-------- kernel/bpf/trampoline.c | 20 ++++-------------- kernel/kprobes.c | 8 ++----- kernel/trace/ftrace.c | 48 +++++++++++++++++++++++++++++++++++------- 4 files changed, 48 insertions(+), 39 deletions(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 6290712cb36d..4d8086a1627e 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -193,17 +193,10 @@ static unsigned long __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) { struct kprobe *kp; - unsigned long faddr; + bool faddr; kp = get_kprobe((void *)addr); - faddr = ftrace_location(addr); - /* - * Addresses inside the ftrace location are refused by - * arch_check_ftrace_location(). Something went terribly wrong - * if such an address is checked here. - */ - if (WARN_ON(faddr && faddr != addr)) - return 0UL; + faddr = ftrace_location(addr) == addr; /* * Use the current code if it is not modified by Kprobe * and it cannot be modified by ftrace. diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 5e7edf913060..455f1b4f312e 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -117,18 +117,6 @@ static void bpf_trampoline_module_put(struct bpf_trampoline *tr) tr->mod = NULL; } -static int is_ftrace_location(void *ip) -{ - long addr; - - addr = ftrace_location((long)ip); - if (!addr) - return 0; - if (WARN_ON_ONCE(addr != (long)ip)) - return -EFAULT; - return 1; -} - static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) { void *ip = tr->func.addr; @@ -160,12 +148,12 @@ static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_ad static int register_fentry(struct bpf_trampoline *tr, void *new_addr) { void *ip = tr->func.addr; + unsigned long faddr; int ret; - ret = is_ftrace_location(ip); - if (ret < 0) - return ret; - tr->func.ftrace_managed = ret; + faddr = ftrace_location((unsigned long)ip); + if (faddr) + tr->func.ftrace_managed = true; if (bpf_trampoline_module_get(tr)) return -ENOENT; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 94cab8c9ce56..6d1e11cda4f1 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1562,14 +1562,10 @@ static inline int warn_kprobe_rereg(struct kprobe *p) static int check_ftrace_location(struct kprobe *p) { - unsigned long ftrace_addr; + unsigned long addr = (unsigned long)p->addr; - ftrace_addr = ftrace_location((unsigned long)p->addr); - if (ftrace_addr) { + if (ftrace_location(addr) == addr) { #ifdef CONFIG_KPROBES_ON_FTRACE - /* Given address is not on the instruction boundary */ - if ((unsigned long)p->addr != ftrace_addr) - return -EILSEQ; p->flags |= KPROBE_FLAG_FTRACE; #else /* !CONFIG_KPROBES_ON_FTRACE */ return -EINVAL; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6105b7036482..2ae6fb9a5b12 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1568,17 +1568,34 @@ unsigned long ftrace_location_range(unsigned long start, unsigned long end) } /** - * ftrace_location - return true if the ip giving is a traced location + * ftrace_location - return the ftrace location * @ip: the instruction pointer to check * - * Returns rec->ip if @ip given is a pointer to a ftrace location. - * That is, the instruction that is either a NOP or call to - * the function tracer. It checks the ftrace internal tables to - * determine if the address belongs or not. + * If @ip matches the ftrace location, return @ip. + * If @ip matches sym+0, return sym's ftrace location. + * Otherwise, return 0. */ unsigned long ftrace_location(unsigned long ip) { - return ftrace_location_range(ip, ip); + struct dyn_ftrace *rec; + unsigned long offset; + unsigned long size; + + rec = lookup_rec(ip, ip); + if (!rec) { + if (!kallsyms_lookup_size_offset(ip, &size, &offset)) + goto out; + + /* map sym+0 to __fentry__ */ + if (!offset) + rec = lookup_rec(ip, ip + size - 1); + } + + if (rec) + return rec->ip; + +out: + return 0; } /** @@ -4962,7 +4979,8 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) { struct ftrace_func_entry *entry; - if (!ftrace_location(ip)) + ip = ftrace_location(ip); + if (!ip) return -EINVAL; if (remove) { @@ -5110,11 +5128,16 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr) struct ftrace_func_entry *entry; struct ftrace_hash *free_hash = NULL; struct dyn_ftrace *rec; - int ret = -EBUSY; + int ret = -ENODEV; mutex_lock(&direct_mutex); + ip = ftrace_location(ip); + if (!ip) + goto out_unlock; + /* See if there's a direct function at @ip already */ + ret = -EBUSY; if (ftrace_find_rec_direct(ip)) goto out_unlock; @@ -5222,6 +5245,10 @@ int unregister_ftrace_direct(unsigned long ip, unsigned long addr) mutex_lock(&direct_mutex); + ip = ftrace_location(ip); + if (!ip) + goto out_unlock; + entry = find_direct_entry(&ip, NULL); if (!entry) goto out_unlock; @@ -5354,6 +5381,11 @@ int modify_ftrace_direct(unsigned long ip, mutex_lock(&direct_mutex); mutex_lock(&ftrace_lock); + + ip = ftrace_location(ip); + if (!ip) + goto out_unlock; + entry = find_direct_entry(&ip, &rec); if (!entry) goto out_unlock; -- cgit v1.2.3 From d15cb3dab1e4f00e29599a4f5e1f6678a530d270 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:30 +0100 Subject: x86/livepatch: Validate __fentry__ location Currently livepatch assumes __fentry__ lives at func+0, which is most likely untrue with IBT on. Instead make it use ftrace_location() by default which both validates and finds the actual ip if there is any in the same symbol. Suggested-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154318.285971256@infradead.org --- arch/powerpc/include/asm/livepatch.h | 10 ---------- kernel/livepatch/patch.c | 19 ++----------------- 2 files changed, 2 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h index 4fe018cc207b..7b9dcd51af32 100644 --- a/arch/powerpc/include/asm/livepatch.h +++ b/arch/powerpc/include/asm/livepatch.h @@ -19,16 +19,6 @@ static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip) regs_set_return_ip(regs, ip); } -#define klp_get_ftrace_location klp_get_ftrace_location -static inline unsigned long klp_get_ftrace_location(unsigned long faddr) -{ - /* - * Live patch works only with -mprofile-kernel on PPC. In this case, - * the ftrace location is always within the first 16 bytes. - */ - return ftrace_location_range(faddr, faddr + 16); -} - static inline void klp_init_thread_info(struct task_struct *p) { /* + 1 to account for STACK_END_MAGIC */ diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index fe316c021d73..c172bf92b576 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -124,19 +124,6 @@ unlock: ftrace_test_recursion_unlock(bit); } -/* - * Convert a function address into the appropriate ftrace location. - * - * Usually this is just the address of the function, but on some architectures - * it's more complicated so allow them to provide a custom behaviour. - */ -#ifndef klp_get_ftrace_location -static unsigned long klp_get_ftrace_location(unsigned long faddr) -{ - return faddr; -} -#endif - static void klp_unpatch_func(struct klp_func *func) { struct klp_ops *ops; @@ -153,8 +140,7 @@ static void klp_unpatch_func(struct klp_func *func) if (list_is_singular(&ops->func_stack)) { unsigned long ftrace_loc; - ftrace_loc = - klp_get_ftrace_location((unsigned long)func->old_func); + ftrace_loc = ftrace_location((unsigned long)func->old_func); if (WARN_ON(!ftrace_loc)) return; @@ -186,8 +172,7 @@ static int klp_patch_func(struct klp_func *func) if (!ops) { unsigned long ftrace_loc; - ftrace_loc = - klp_get_ftrace_location((unsigned long)func->old_func); + ftrace_loc = ftrace_location((unsigned long)func->old_func); if (!ftrace_loc) { pr_err("failed to find location for function '%s'\n", func->old_name); -- cgit v1.2.3 From e52fc2cf3f662828cc0d51c4b73bed73ad275fce Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:31 +0100 Subject: x86/ibt,ftrace: Make function-graph play nice Return trampoline must not use indirect branch to return; while this preserves the RSB, it is fundamentally incompatible with IBT. Instead use a retpoline like ROP gadget that defeats IBT while not unbalancing the RSB. And since ftrace_stub is no longer a plain RET, don't use it to copy from. Since RET is a trivial instruction, poke it directly. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154318.347296408@infradead.org --- arch/x86/kernel/ftrace.c | 9 ++------- arch/x86/kernel/ftrace_64.S | 21 +++++++++++++++++---- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 7cc540e6de0c..1e31c7d21597 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -316,12 +316,12 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) unsigned long offset; unsigned long npages; unsigned long size; - unsigned long retq; unsigned long *ptr; void *trampoline; void *ip; /* 48 8b 15 is movq (%rip), %rdx */ unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 }; + unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE }; union ftrace_op_code_union op_ptr; int ret; @@ -359,12 +359,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) goto fail; ip = trampoline + size; - - /* The trampoline ends with ret(q) */ - retq = (unsigned long)ftrace_stub; - ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE); - if (WARN_ON(ret < 0)) - goto fail; + memcpy(ip, retq, RET_SIZE); /* No need to test direct calls on created trampolines */ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 11ac028e30e4..e32b5cd6dc15 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -176,10 +176,10 @@ SYM_FUNC_END(ftrace_caller); SYM_FUNC_START(ftrace_epilogue) /* * This is weak to keep gas from relaxing the jumps. - * It is also used to copy the RET for trampolines. */ SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK) UNWIND_HINT_FUNC + ENDBR RET SYM_FUNC_END(ftrace_epilogue) @@ -284,6 +284,7 @@ SYM_FUNC_START(__fentry__) jnz trace SYM_INNER_LABEL(ftrace_stub, SYM_L_GLOBAL) + ENDBR RET trace: @@ -307,7 +308,7 @@ EXPORT_SYMBOL(__fentry__) #ifdef CONFIG_FUNCTION_GRAPH_TRACER SYM_FUNC_START(return_to_handler) - subq $24, %rsp + subq $16, %rsp /* Save the return values */ movq %rax, (%rsp) @@ -319,7 +320,19 @@ SYM_FUNC_START(return_to_handler) movq %rax, %rdi movq 8(%rsp), %rdx movq (%rsp), %rax - addq $24, %rsp - JMP_NOSPEC rdi + + addq $16, %rsp + /* + * Jump back to the old return address. This cannot be JMP_NOSPEC rdi + * since IBT would demand that contain ENDBR, which simply isn't so for + * return addresses. Use a retpoline here to keep the RSB balanced. + */ + ANNOTATE_INTRA_FUNCTION_CALL + call .Ldo_rop + int3 +.Ldo_rop: + mov %rdi, (%rsp) + UNWIND_HINT_FUNC + RET SYM_FUNC_END(return_to_handler) #endif -- cgit v1.2.3 From cc66bb91457827f62e2b6cb2518666820f0a6c48 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:32 +0100 Subject: x86/ibt,kprobes: Cure sym+0 equals fentry woes In order to allow kprobes to skip the ENDBR instructions at sym+0 for X86_KERNEL_IBT builds, change _kprobe_addr() to take an architecture callback to inspect the function at hand and modify the offset if needed. This streamlines the existing interface to cover more cases and require less hooks. Once PowerPC gets fully converted there will only be the one arch hook. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Masami Hiramatsu Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154318.405947704@infradead.org --- arch/powerpc/kernel/kprobes.c | 34 +++++++++++++--------- arch/x86/kernel/kprobes/core.c | 17 +++++++++++ include/linux/kprobes.h | 3 +- kernel/kprobes.c | 66 +++++++++++++++++++++++++++++++++--------- 4 files changed, 92 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 9a492fdec1df..7dae0b01abfb 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -105,6 +105,27 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset) return addr; } +static bool arch_kprobe_on_func_entry(unsigned long offset) +{ +#ifdef PPC64_ELF_ABI_v2 +#ifdef CONFIG_KPROBES_ON_FTRACE + return offset <= 16; +#else + return offset <= 8; +#endif +#else + return !offset; +#endif +} + +/* XXX try and fold the magic of kprobe_lookup_name() in this */ +kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, + bool *on_func_entry) +{ + *on_func_entry = arch_kprobe_on_func_entry(offset); + return (kprobe_opcode_t *)(addr + offset); +} + void *alloc_insn_page(void) { void *page; @@ -218,19 +239,6 @@ static nokprobe_inline void set_current_kprobe(struct kprobe *p, struct pt_regs kcb->kprobe_saved_msr = regs->msr; } -bool arch_kprobe_on_func_entry(unsigned long offset) -{ -#ifdef PPC64_ELF_ABI_v2 -#ifdef CONFIG_KPROBES_ON_FTRACE - return offset <= 16; -#else - return offset <= 8; -#endif -#else - return !offset; -#endif -} - void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { ri->ret_addr = (kprobe_opcode_t *)regs->link; diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 4d8086a1627e..9ea0e3e79896 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -52,6 +52,7 @@ #include #include #include +#include #include "common.h" @@ -294,6 +295,22 @@ static int can_probe(unsigned long paddr) return (addr == paddr); } +/* If x86 supports IBT (ENDBR) it must be skipped. */ +kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, + bool *on_func_entry) +{ + if (is_endbr(*(u32 *)addr)) { + *on_func_entry = !offset || offset == 4; + if (*on_func_entry) + offset = 4; + + } else { + *on_func_entry = !offset; + } + + return (kprobe_opcode_t *)(addr + offset); +} + /* * Copy an instruction with recovering modified instruction by kprobes * and adjust the displacement if the instruction uses the %rip-relative diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 19b884353b15..9c28f7a0ef42 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -265,7 +265,6 @@ extern int arch_init_kprobes(void); extern void kprobes_inc_nmissed_count(struct kprobe *p); extern bool arch_within_kprobe_blacklist(unsigned long addr); extern int arch_populate_kprobe_blacklist(void); -extern bool arch_kprobe_on_func_entry(unsigned long offset); extern int kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset); extern bool within_kprobe_blacklist(unsigned long addr); @@ -384,6 +383,8 @@ static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void) } kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset); +kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, bool *on_func_entry); + int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); int register_kprobes(struct kprobe **kps, int num); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 6d1e11cda4f1..185badc780b7 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1488,25 +1488,69 @@ bool within_kprobe_blacklist(unsigned long addr) return false; } +/* + * arch_adjust_kprobe_addr - adjust the address + * @addr: symbol base address + * @offset: offset within the symbol + * @on_func_entry: was this @addr+@offset on the function entry + * + * Typically returns @addr + @offset, except for special cases where the + * function might be prefixed by a CFI landing pad, in that case any offset + * inside the landing pad is mapped to the first 'real' instruction of the + * symbol. + * + * Specifically, for things like IBT/BTI, skip the resp. ENDBR/BTI.C + * instruction at +0. + */ +kprobe_opcode_t *__weak arch_adjust_kprobe_addr(unsigned long addr, + unsigned long offset, + bool *on_func_entry) +{ + *on_func_entry = !offset; + return (kprobe_opcode_t *)(addr + offset); +} + /* * If 'symbol_name' is specified, look it up and add the 'offset' * to it. This way, we can specify a relative address to a symbol. * This returns encoded errors if it fails to look up symbol or invalid * combination of parameters. */ -static kprobe_opcode_t *_kprobe_addr(kprobe_opcode_t *addr, - const char *symbol_name, unsigned int offset) +static kprobe_opcode_t * +_kprobe_addr(kprobe_opcode_t *addr, const char *symbol_name, + unsigned long offset, bool *on_func_entry) { if ((symbol_name && addr) || (!symbol_name && !addr)) goto invalid; if (symbol_name) { + /* + * Input: @sym + @offset + * Output: @addr + @offset + * + * NOTE: kprobe_lookup_name() does *NOT* fold the offset + * argument into it's output! + */ addr = kprobe_lookup_name(symbol_name, offset); if (!addr) return ERR_PTR(-ENOENT); } - addr = (kprobe_opcode_t *)(((char *)addr) + offset); + /* + * So here we have @addr + @offset, displace it into a new + * @addr' + @offset' where @addr' is the symbol start address. + */ + addr = (void *)addr + offset; + if (!kallsyms_lookup_size_offset((unsigned long)addr, NULL, &offset)) + return ERR_PTR(-ENOENT); + addr = (void *)addr - offset; + + /* + * Then ask the architecture to re-combine them, taking care of + * magical function entry details while telling us if this was indeed + * at the start of the function. + */ + addr = arch_adjust_kprobe_addr((unsigned long)addr, offset, on_func_entry); if (addr) return addr; @@ -1516,7 +1560,8 @@ invalid: static kprobe_opcode_t *kprobe_addr(struct kprobe *p) { - return _kprobe_addr(p->addr, p->symbol_name, p->offset); + bool on_func_entry; + return _kprobe_addr(p->addr, p->symbol_name, p->offset, &on_func_entry); } /* @@ -2043,11 +2088,6 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) } NOKPROBE_SYMBOL(pre_handler_kretprobe); -bool __weak arch_kprobe_on_func_entry(unsigned long offset) -{ - return !offset; -} - /** * kprobe_on_func_entry() -- check whether given address is function entry * @addr: Target address @@ -2063,15 +2103,13 @@ bool __weak arch_kprobe_on_func_entry(unsigned long offset) */ int kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset) { - kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset); + bool on_func_entry; + kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset, &on_func_entry); if (IS_ERR(kp_addr)) return PTR_ERR(kp_addr); - if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset)) - return -ENOENT; - - if (!arch_kprobe_on_func_entry(offset)) + if (!on_func_entry) return -EINVAL; return 0; -- cgit v1.2.3 From 58912710558889629daae3e0824daacab663bd4a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:33 +0100 Subject: x86/ibt,bpf: Add ENDBR instructions to prologue and trampoline With IBT enabled builds we need ENDBR instructions at indirect jump target sites, since we start execution of the JIT'ed code through an indirect jump, the very first instruction needs to be ENDBR. Similarly, since eBPF tail-calls use indirect branches, their landing site needs to be an ENDBR too. The trampolines need similar adjustment. Signed-off-by: Peter Zijlstra (Intel) Fixed-by: Kumar Kartikeya Dwivedi Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154318.464998838@infradead.org --- arch/x86/net/bpf_jit_comp.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 0ecb140864b2..b592ea0fc150 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -46,6 +46,12 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) #define EMIT4_off32(b1, b2, b3, b4, off) \ do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) +#ifdef CONFIG_X86_KERNEL_IBT +#define EMIT_ENDBR() EMIT(gen_endbr(), 4) +#else +#define EMIT_ENDBR() +#endif + static bool is_imm8(int value) { return value <= 127 && value >= -128; @@ -241,7 +247,7 @@ struct jit_context { /* Number of bytes emit_patch() needs to generate instructions */ #define X86_PATCH_SIZE 5 /* Number of bytes that will be skipped on tailcall */ -#define X86_TAIL_CALL_OFFSET 11 +#define X86_TAIL_CALL_OFFSET (11 + ENDBR_INSN_SIZE) static void push_callee_regs(u8 **pprog, bool *callee_regs_used) { @@ -286,6 +292,7 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, /* BPF trampoline can be made to work without these nops, * but let's waste 5 bytes for now and optimize later */ + EMIT_ENDBR(); memcpy(prog, x86_nops[5], X86_PATCH_SIZE); prog += X86_PATCH_SIZE; if (!ebpf_from_cbpf) { @@ -296,6 +303,10 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, } EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ + + /* X86_TAIL_CALL_OFFSET is here */ + EMIT_ENDBR(); + /* sub rsp, rounded_stack_depth */ if (stack_depth) EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8)); @@ -384,6 +395,13 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, /* BPF poking in modules is not supported */ return -EINVAL; + /* + * See emit_prologue(), for IBT builds the trampoline hook is preceded + * with an ENDBR instruction. + */ + if (is_endbr(*(u32 *)ip)) + ip += ENDBR_INSN_SIZE; + return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true); } @@ -2024,14 +2042,18 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i ip_off = stack_size; - if (flags & BPF_TRAMP_F_SKIP_FRAME) + if (flags & BPF_TRAMP_F_SKIP_FRAME) { /* skip patched call instruction and point orig_call to actual * body of the kernel function. */ + if (is_endbr(*(u32 *)orig_call)) + orig_call += ENDBR_INSN_SIZE; orig_call += X86_PATCH_SIZE; + } prog = image; + EMIT_ENDBR(); EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */ -- cgit v1.2.3 From 0aec21cfb51bc1856206f312d8c13bf1f368d78e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:34 +0100 Subject: x86/ibt,ftrace: Add ENDBR to samples/ftrace Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154318.523421433@infradead.org --- samples/ftrace/ftrace-direct-modify.c | 5 +++++ samples/ftrace/ftrace-direct-multi-modify.c | 10 +++++++--- samples/ftrace/ftrace-direct-multi.c | 5 ++++- samples/ftrace/ftrace-direct-too.c | 3 +++ samples/ftrace/ftrace-direct.c | 3 +++ 5 files changed, 22 insertions(+), 4 deletions(-) diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c index 2c7c31893551..39146fa83e20 100644 --- a/samples/ftrace/ftrace-direct-modify.c +++ b/samples/ftrace/ftrace-direct-modify.c @@ -24,20 +24,25 @@ static unsigned long my_ip = (unsigned long)schedule; #ifdef CONFIG_X86_64 +#include + asm ( " .pushsection .text, \"ax\", @progbits\n" " .type my_tramp1, @function\n" " .globl my_tramp1\n" " my_tramp1:" + ASM_ENDBR " pushq %rbp\n" " movq %rsp, %rbp\n" " call my_direct_func1\n" " leave\n" " .size my_tramp1, .-my_tramp1\n" ASM_RET + " .type my_tramp2, @function\n" " .globl my_tramp2\n" " my_tramp2:" + ASM_ENDBR " pushq %rbp\n" " movq %rsp, %rbp\n" " call my_direct_func2\n" diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c index 6f43a39decd0..65aa94d96f4e 100644 --- a/samples/ftrace/ftrace-direct-multi-modify.c +++ b/samples/ftrace/ftrace-direct-multi-modify.c @@ -22,11 +22,14 @@ extern void my_tramp2(void *); #ifdef CONFIG_X86_64 +#include + asm ( " .pushsection .text, \"ax\", @progbits\n" " .type my_tramp1, @function\n" " .globl my_tramp1\n" " my_tramp1:" + ASM_ENDBR " pushq %rbp\n" " movq %rsp, %rbp\n" " pushq %rdi\n" @@ -34,12 +37,13 @@ asm ( " call my_direct_func1\n" " popq %rdi\n" " leave\n" -" ret\n" + ASM_RET " .size my_tramp1, .-my_tramp1\n" + " .type my_tramp2, @function\n" -"\n" " .globl my_tramp2\n" " my_tramp2:" + ASM_ENDBR " pushq %rbp\n" " movq %rsp, %rbp\n" " pushq %rdi\n" @@ -47,7 +51,7 @@ asm ( " call my_direct_func2\n" " popq %rdi\n" " leave\n" -" ret\n" + ASM_RET " .size my_tramp2, .-my_tramp2\n" " .popsection\n" ); diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c index 2fafc9afcbf0..41ded7c615c7 100644 --- a/samples/ftrace/ftrace-direct-multi.c +++ b/samples/ftrace/ftrace-direct-multi.c @@ -17,11 +17,14 @@ extern void my_tramp(void *); #ifdef CONFIG_X86_64 +#include + asm ( " .pushsection .text, \"ax\", @progbits\n" " .type my_tramp, @function\n" " .globl my_tramp\n" " my_tramp:" + ASM_ENDBR " pushq %rbp\n" " movq %rsp, %rbp\n" " pushq %rdi\n" @@ -29,7 +32,7 @@ asm ( " call my_direct_func\n" " popq %rdi\n" " leave\n" -" ret\n" + ASM_RET " .size my_tramp, .-my_tramp\n" " .popsection\n" ); diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c index c93fb0e95173..6690468c5cc2 100644 --- a/samples/ftrace/ftrace-direct-too.c +++ b/samples/ftrace/ftrace-direct-too.c @@ -19,11 +19,14 @@ extern void my_tramp(void *); #ifdef CONFIG_X86_64 +#include + asm ( " .pushsection .text, \"ax\", @progbits\n" " .type my_tramp, @function\n" " .globl my_tramp\n" " my_tramp:" + ASM_ENDBR " pushq %rbp\n" " movq %rsp, %rbp\n" " pushq %rdi\n" diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c index 8b551e503a48..e8f1e440b9b8 100644 --- a/samples/ftrace/ftrace-direct.c +++ b/samples/ftrace/ftrace-direct.c @@ -16,11 +16,14 @@ extern void my_tramp(void *); #ifdef CONFIG_X86_64 +#include + asm ( " .pushsection .text, \"ax\", @progbits\n" " .type my_tramp, @function\n" " .globl my_tramp\n" " my_tramp:" + ASM_ENDBR " pushq %rbp\n" " movq %rsp, %rbp\n" " pushq %rdi\n" -- cgit v1.2.3 From 991625f3dd2cbc4b787deb0213e2bcf8fa264b21 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:35 +0100 Subject: x86/ibt: Add IBT feature, MSR and #CP handling The bits required to make the hardware go.. Of note is that, provided the syscall entry points are covered with ENDBR, #CP doesn't need to be an IST because we'll never hit the syscall gap. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154318.582331711@infradead.org --- arch/x86/include/asm/cpu.h | 1 + arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/idtentry.h | 5 ++ arch/x86/include/asm/msr-index.h | 20 +++++++- arch/x86/include/asm/traps.h | 2 + arch/x86/include/uapi/asm/processor-flags.h | 2 + arch/x86/kernel/cpu/common.c | 25 +++++++++- arch/x86/kernel/idt.c | 4 ++ arch/x86/kernel/traps.c | 75 +++++++++++++++++++++++++++++ 9 files changed, 133 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 33d41e350c79..a60025fdc3fc 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -7,6 +7,7 @@ #include #include #include +#include #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 65d147974f8d..c5bda3553a28 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -390,6 +390,7 @@ #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ +#define X86_FEATURE_IBT (18*32+20) /* Indirect Branch Tracking */ #define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index f84280ab213c..7924f27f5c8b 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -617,6 +617,11 @@ DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault); DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_DF, xenpv_exc_double_fault); #endif +/* #CP */ +#ifdef CONFIG_X86_KERNEL_IBT +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_CP, exc_control_protection); +#endif + /* #VC */ #ifdef CONFIG_AMD_MEM_ENCRYPT DECLARE_IDTENTRY_VC(X86_TRAP_VC, exc_vmm_communication); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index a4a39c3e0f19..65c3599b8f05 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -360,11 +360,29 @@ #define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c #define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d - #define MSR_CORE_PERF_LIMIT_REASONS 0x00000690 #define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 #define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 +/* Control-flow Enforcement Technology MSRs */ +#define MSR_IA32_U_CET 0x000006a0 /* user mode cet */ +#define MSR_IA32_S_CET 0x000006a2 /* kernel mode cet */ +#define CET_SHSTK_EN BIT_ULL(0) +#define CET_WRSS_EN BIT_ULL(1) +#define CET_ENDBR_EN BIT_ULL(2) +#define CET_LEG_IW_EN BIT_ULL(3) +#define CET_NO_TRACK_EN BIT_ULL(4) +#define CET_SUPPRESS_DISABLE BIT_ULL(5) +#define CET_RESERVED (BIT_ULL(6) | BIT_ULL(7) | BIT_ULL(8) | BIT_ULL(9)) +#define CET_SUPPRESS BIT_ULL(10) +#define CET_WAIT_ENDBR BIT_ULL(11) + +#define MSR_IA32_PL0_SSP 0x000006a4 /* ring-0 shadow stack pointer */ +#define MSR_IA32_PL1_SSP 0x000006a5 /* ring-1 shadow stack pointer */ +#define MSR_IA32_PL2_SSP 0x000006a6 /* ring-2 shadow stack pointer */ +#define MSR_IA32_PL3_SSP 0x000006a7 /* ring-3 shadow stack pointer */ +#define MSR_IA32_INT_SSP_TAB 0x000006a8 /* exception shadow stack table */ + /* Hardware P state interface */ #define MSR_PPERF 0x0000064e #define MSR_PERF_LIMIT_REASONS 0x0000064f diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 6221be7cafc3..35317c5c551d 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -18,6 +18,8 @@ void __init trap_init(void); asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs); #endif +extern bool ibt_selftest(void); + #ifdef CONFIG_X86_F00F_BUG /* For handling the FOOF bug */ void handle_invalid_op(struct pt_regs *regs); diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h index bcba3c643e63..c47cc7f2feeb 100644 --- a/arch/x86/include/uapi/asm/processor-flags.h +++ b/arch/x86/include/uapi/asm/processor-flags.h @@ -130,6 +130,8 @@ #define X86_CR4_SMAP _BITUL(X86_CR4_SMAP_BIT) #define X86_CR4_PKE_BIT 22 /* enable Protection Keys support */ #define X86_CR4_PKE _BITUL(X86_CR4_PKE_BIT) +#define X86_CR4_CET_BIT 23 /* enable Control-flow Enforcement Technology */ +#define X86_CR4_CET _BITUL(X86_CR4_CET_BIT) /* * x86-64 Task Priority Register, CR8 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7b8382c11788..db1f1494caea 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -59,6 +59,7 @@ #include #include #include +#include #include "cpu.h" @@ -361,7 +362,8 @@ out: /* These bits should not change their value after CPU init is finished. */ static const unsigned long cr4_pinned_mask = - X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE; + X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | + X86_CR4_FSGSBASE | X86_CR4_CET; static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); static unsigned long cr4_pinned_bits __ro_after_init; @@ -515,6 +517,24 @@ static __init int setup_disable_pku(char *arg) __setup("nopku", setup_disable_pku); #endif /* CONFIG_X86_64 */ +static __always_inline void setup_cet(struct cpuinfo_x86 *c) +{ + u64 msr = CET_ENDBR_EN; + + if (!HAS_KERNEL_IBT || + !cpu_feature_enabled(X86_FEATURE_IBT)) + return; + + wrmsrl(MSR_IA32_S_CET, msr); + cr4_set_bits(X86_CR4_CET); + + if (!ibt_selftest()) { + pr_err("IBT selftest: Failed!\n"); + setup_clear_cpu_cap(X86_FEATURE_IBT); + return; + } +} + /* * Some CPU features depend on higher CPUID levels, which may not always * be available due to CPUID level capping or broken virtualization @@ -1632,6 +1652,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) x86_init_rdrand(c); setup_pku(c); + setup_cet(c); /* * Clear/Set all flags overridden by options, need do it @@ -1698,6 +1719,8 @@ void enable_sep_cpu(void) void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); + if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT)) + pr_info("CET detected: Indirect Branch Tracking enabled\n"); #ifdef CONFIG_X86_32 sysenter_setup(); enable_sep_cpu(); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 7676e3444c83..608eb63bf044 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -104,6 +104,10 @@ static const __initconst struct idt_data def_idts[] = { ISTG(X86_TRAP_MC, asm_exc_machine_check, IST_INDEX_MCE), #endif +#ifdef CONFIG_X86_KERNEL_IBT + INTG(X86_TRAP_CP, asm_exc_control_protection), +#endif + #ifdef CONFIG_AMD_MEM_ENCRYPT ISTG(X86_TRAP_VC, asm_exc_vmm_communication, IST_INDEX_VC), #endif diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 8143693a7ea6..c073cb5dfbec 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -209,6 +209,81 @@ DEFINE_IDTENTRY(exc_overflow) do_error_trap(regs, 0, "overflow", X86_TRAP_OF, SIGSEGV, 0, NULL); } +#ifdef CONFIG_X86_KERNEL_IBT + +static __ro_after_init bool ibt_fatal = true; + +extern void ibt_selftest_ip(void); /* code label defined in asm below */ + +enum cp_error_code { + CP_EC = (1 << 15) - 1, + + CP_RET = 1, + CP_IRET = 2, + CP_ENDBR = 3, + CP_RSTRORSSP = 4, + CP_SETSSBSY = 5, + + CP_ENCL = 1 << 15, +}; + +DEFINE_IDTENTRY_ERRORCODE(exc_control_protection) +{ + if (!cpu_feature_enabled(X86_FEATURE_IBT)) { + pr_err("Unexpected #CP\n"); + BUG(); + } + + if (WARN_ON_ONCE(user_mode(regs) || (error_code & CP_EC) != CP_ENDBR)) + return; + + if (unlikely(regs->ip == (unsigned long)&ibt_selftest_ip)) { + regs->ax = 0; + return; + } + + pr_err("Missing ENDBR: %pS\n", (void *)instruction_pointer(regs)); + if (!ibt_fatal) { + printk(KERN_DEFAULT CUT_HERE); + __warn(__FILE__, __LINE__, (void *)regs->ip, TAINT_WARN, regs, NULL); + return; + } + BUG(); +} + +/* Must be noinline to ensure uniqueness of ibt_selftest_ip. */ +noinline bool ibt_selftest(void) +{ + unsigned long ret; + + asm (" lea ibt_selftest_ip(%%rip), %%rax\n\t" + ANNOTATE_RETPOLINE_SAFE + " jmp *%%rax\n\t" + "ibt_selftest_ip:\n\t" + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + " nop\n\t" + + : "=a" (ret) : : "memory"); + + return !ret; +} + +static int __init ibt_setup(char *str) +{ + if (!strcmp(str, "off")) + setup_clear_cpu_cap(X86_FEATURE_IBT); + + if (!strcmp(str, "warn")) + ibt_fatal = false; + + return 1; +} + +__setup("ibt=", ibt_setup); + +#endif /* CONFIG_X86_KERNEL_IBT */ + #ifdef CONFIG_X86_F00F_BUG void handle_invalid_op(struct pt_regs *regs) #else -- cgit v1.2.3 From af22700390c2f1d92dadd3eedf2738525a3a2f3a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:36 +0100 Subject: x86/ibt,kexec: Disable CET on kexec Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154318.641454603@infradead.org --- arch/x86/include/asm/cpu.h | 3 +++ arch/x86/kernel/cpu/common.c | 6 ++++++ arch/x86/kernel/machine_kexec_64.c | 4 +++- arch/x86/kernel/relocate_kernel_64.S | 8 ++++++++ 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index a60025fdc3fc..86e5e4e26fcb 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -73,4 +73,7 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c); #else static inline void init_ia32_feat_ctl(struct cpuinfo_x86 *c) {} #endif + +extern __noendbr void cet_disable(void); + #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index db1f1494caea..709acab25f3c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -535,6 +535,12 @@ static __always_inline void setup_cet(struct cpuinfo_x86 *c) } } +__noendbr void cet_disable(void) +{ + if (cpu_feature_enabled(X86_FEATURE_IBT)) + wrmsrl(MSR_IA32_S_CET, 0); +} + /* * Some CPU features depend on higher CPUID levels, which may not always * be available due to CPUID level capping or broken virtualization diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index f5da4a18070a..566bb8e17149 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -27,6 +27,7 @@ #include #include #include +#include #ifdef CONFIG_ACPI /* @@ -310,6 +311,7 @@ void machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); hw_breakpoint_disable(); + cet_disable(); if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC @@ -325,7 +327,7 @@ void machine_kexec(struct kimage *image) } control_page = page_address(image->control_code_page) + PAGE_SIZE; - memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); + __memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 399f075ccdc4..5b65f6ec5ee6 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -114,6 +114,14 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) /* store the start address on the stack */ pushq %rdx + /* + * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP + * below. + */ + movq %cr4, %rax + andq $~(X86_CR4_CET), %rax + movq %rax, %cr4 + /* * Set cr0 to a known state: * - Paging enabled -- cgit v1.2.3 From 99c95c5d4f1027130d555fdb27b576520894827d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:37 +0100 Subject: x86/alternative: Simplify int3_selftest_ip Similar to ibt_selftest_ip, apply the same pattern. Suggested-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154318.700456643@infradead.org --- arch/x86/kernel/alternative.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index b4470eabf151..14d1003328e9 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -719,7 +719,7 @@ asm ( " .popsection\n" ); -extern __initdata unsigned long int3_selftest_ip; /* defined in asm below */ +extern void int3_selftest_ip(void); /* defined in asm below */ static int __init int3_exception_notify(struct notifier_block *self, unsigned long val, void *data) @@ -733,14 +733,15 @@ int3_exception_notify(struct notifier_block *self, unsigned long val, void *data if (val != DIE_INT3) return NOTIFY_DONE; - if (regs->ip - INT3_INSN_SIZE != int3_selftest_ip) + if (regs->ip - INT3_INSN_SIZE != (unsigned long)&int3_selftest_ip) return NOTIFY_DONE; int3_emulate_call(regs, (unsigned long)&int3_magic); return NOTIFY_STOP; } -static void __init int3_selftest(void) +/* Must be noinline to ensure uniqueness of int3_selftest_ip. */ +static noinline void __init int3_selftest(void) { static __initdata struct notifier_block int3_exception_nb = { .notifier_call = int3_exception_notify, @@ -753,18 +754,10 @@ static void __init int3_selftest(void) /* * Basically: int3_magic(&val); but really complicated :-) * - * Stick the address of the INT3 instruction into int3_selftest_ip, - * then trigger the INT3, padded with NOPs to match a CALL instruction - * length. + * INT3 padded with NOP to CALL_INSN_SIZE. The int3_exception_nb + * notifier above will emulate CALL for us. */ - asm volatile ("1: int3; nop; nop; nop; nop\n\t" - ".pushsection .init.data,\"aw\"\n\t" - ".align " __ASM_SEL(4, 8) "\n\t" - ".type int3_selftest_ip, @object\n\t" - ".size int3_selftest_ip, " __ASM_SEL(4, 8) "\n\t" - "int3_selftest_ip:\n\t" - __ASM_SEL(.long, .quad) " 1b\n\t" - ".popsection\n\t" + asm volatile ("int3_selftest_ip: int3; nop; nop; nop; nop\n\t" : ASM_CALL_CONSTRAINT : __ASM_SEL_RAW(a, D) (&val) : "memory"); -- cgit v1.2.3 From fe379fa4d199abc52d5b4a256e52cf94eff685cf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:38 +0100 Subject: x86/ibt: Disable IBT around firmware Assume firmware isn't IBT clean and disable it across calls. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kees Cook Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154318.759989383@infradead.org --- arch/x86/include/asm/efi.h | 9 +++++++-- arch/x86/include/asm/ibt.h | 6 ++++++ arch/x86/kernel/apm_32.c | 7 +++++++ arch/x86/kernel/cpu/common.c | 28 ++++++++++++++++++++++++++++ 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 03cb12775043..98938a68251c 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -120,8 +121,12 @@ extern asmlinkage u64 __efi_call(void *fp, ...); efi_enter_mm(); \ }) -#define arch_efi_call_virt(p, f, args...) \ - efi_call((void *)p->f, args) \ +#define arch_efi_call_virt(p, f, args...) ({ \ + u64 ret, ibt = ibt_save(); \ + ret = efi_call((void *)p->f, args); \ + ibt_restore(ibt); \ + ret; \ +}) #define arch_efi_call_virt_teardown() \ ({ \ diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h index fcaf6a41d373..52fb05d66489 100644 --- a/arch/x86/include/asm/ibt.h +++ b/arch/x86/include/asm/ibt.h @@ -52,6 +52,9 @@ static inline bool is_endbr(u32 val) return val == gen_endbr(); } +extern __noendbr u64 ibt_save(void); +extern __noendbr void ibt_restore(u64 save); + #else /* __ASSEMBLY__ */ #ifdef CONFIG_X86_64 @@ -74,6 +77,9 @@ static inline bool is_endbr(u32 val) static inline bool is_endbr(u32 val) { return false; } +static inline u64 ibt_save(void) { return 0; } +static inline void ibt_restore(u64 save) { } + #else /* __ASSEMBLY__ */ #define ENDBR diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 241dda687eb9..60e330cdbd17 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -232,6 +232,7 @@ #include #include #include +#include #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) extern int (*console_blank_hook)(int); @@ -598,6 +599,7 @@ static long __apm_bios_call(void *_call) struct desc_struct save_desc_40; struct desc_struct *gdt; struct apm_bios_call *call = _call; + u64 ibt; cpu = get_cpu(); BUG_ON(cpu != 0); @@ -607,11 +609,13 @@ static long __apm_bios_call(void *_call) apm_irq_save(flags); firmware_restrict_branch_speculation_start(); + ibt = ibt_save(); APM_DO_SAVE_SEGS; apm_bios_call_asm(call->func, call->ebx, call->ecx, &call->eax, &call->ebx, &call->ecx, &call->edx, &call->esi); APM_DO_RESTORE_SEGS; + ibt_restore(ibt); firmware_restrict_branch_speculation_end(); apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; @@ -676,6 +680,7 @@ static long __apm_bios_call_simple(void *_call) struct desc_struct save_desc_40; struct desc_struct *gdt; struct apm_bios_call *call = _call; + u64 ibt; cpu = get_cpu(); BUG_ON(cpu != 0); @@ -685,10 +690,12 @@ static long __apm_bios_call_simple(void *_call) apm_irq_save(flags); firmware_restrict_branch_speculation_start(); + ibt = ibt_save(); APM_DO_SAVE_SEGS; error = apm_bios_call_simple_asm(call->func, call->ebx, call->ecx, &call->eax); APM_DO_RESTORE_SEGS; + ibt_restore(ibt); firmware_restrict_branch_speculation_end(); apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 709acab25f3c..03bd73f16d74 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -517,6 +517,34 @@ static __init int setup_disable_pku(char *arg) __setup("nopku", setup_disable_pku); #endif /* CONFIG_X86_64 */ +#ifdef CONFIG_X86_KERNEL_IBT + +__noendbr u64 ibt_save(void) +{ + u64 msr = 0; + + if (cpu_feature_enabled(X86_FEATURE_IBT)) { + rdmsrl(MSR_IA32_S_CET, msr); + wrmsrl(MSR_IA32_S_CET, msr & ~CET_ENDBR_EN); + } + + return msr; +} + +__noendbr void ibt_restore(u64 save) +{ + u64 msr; + + if (cpu_feature_enabled(X86_FEATURE_IBT)) { + rdmsrl(MSR_IA32_S_CET, msr); + msr &= ~CET_ENDBR_EN; + msr |= (save & CET_ENDBR_EN); + wrmsrl(MSR_IA32_S_CET, msr); + } +} + +#endif + static __always_inline void setup_cet(struct cpuinfo_x86 *c) { u64 msr = CET_ENDBR_EN; -- cgit v1.2.3 From 3e3f069504344c241f89737e4af014f83fca0b27 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:40 +0100 Subject: x86/ibt: Annotate text references Annotate away some of the generic code references. This is things where we take the address of a symbol for exception handling or return addresses (eg. context switch). Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154318.877758523@infradead.org --- arch/x86/entry/entry_64.S | 6 ++++++ arch/x86/entry/entry_64_compat.S | 1 + arch/x86/kernel/alternative.c | 10 ++++++++-- arch/x86/kernel/head_64.S | 4 ++++ arch/x86/kernel/kprobes/core.c | 1 + arch/x86/kernel/relocate_kernel_64.S | 2 ++ arch/x86/lib/error-inject.c | 2 ++ arch/x86/lib/retpoline.S | 1 + 8 files changed, 25 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 50b61186f8b2..d76f14f581f7 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -277,6 +277,7 @@ SYM_FUNC_END(__switch_to_asm) .pushsection .text, "ax" SYM_CODE_START(ret_from_fork) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR // copy_thread movq %rax, %rdi call schedule_tail /* rdi: 'prev' task parameter */ @@ -569,6 +570,7 @@ __irqentry_text_start: .align 16 .globl __irqentry_text_end __irqentry_text_end: + ANNOTATE_NOENDBR SYM_CODE_START_LOCAL(common_interrupt_return) SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) @@ -650,6 +652,7 @@ SYM_INNER_LABEL(early_xen_iret_patch, SYM_L_GLOBAL) #endif SYM_INNER_LABEL(native_irq_return_iret, SYM_L_GLOBAL) + ANNOTATE_NOENDBR // exc_double_fault /* * This may fault. Non-paranoid faults on return to userspace are * handled by fixup_bad_iret. These include #SS, #GP, and #NP. @@ -744,6 +747,7 @@ SYM_FUNC_START(asm_load_gs_index) FRAME_BEGIN swapgs .Lgs_change: + ANNOTATE_NOENDBR // error_entry movl %edi, %gs 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE swapgs @@ -1322,6 +1326,7 @@ first_nmi: #endif repeat_nmi: + ANNOTATE_NOENDBR // this code /* * If there was a nested NMI, the first NMI's iret will return * here. But NMIs are still enabled and we can take another @@ -1350,6 +1355,7 @@ repeat_nmi: .endr subq $(5*8), %rsp end_repeat_nmi: + ANNOTATE_NOENDBR // this code /* * Everything below this point can be preempted by a nested NMI. diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 35a0e69cf387..74208a1236b3 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -148,6 +148,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) popfq jmp .Lsysenter_flags_fixed SYM_INNER_LABEL(__end_entry_SYSENTER_compat, SYM_L_GLOBAL) + ANNOTATE_NOENDBR // is_sysenter_singlestep SYM_CODE_END(entry_SYSENTER_compat) /* diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 14d1003328e9..954d39c15724 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -713,6 +713,7 @@ asm ( " .pushsection .init.text, \"ax\", @progbits\n" " .type int3_magic, @function\n" "int3_magic:\n" + ANNOTATE_NOENDBR " movl $1, (%" _ASM_ARG1 ")\n" ASM_RET " .size int3_magic, .-int3_magic\n" @@ -724,16 +725,19 @@ extern void int3_selftest_ip(void); /* defined in asm below */ static int __init int3_exception_notify(struct notifier_block *self, unsigned long val, void *data) { + unsigned long selftest = (unsigned long)&int3_selftest_ip; struct die_args *args = data; struct pt_regs *regs = args->regs; + OPTIMIZER_HIDE_VAR(selftest); + if (!regs || user_mode(regs)) return NOTIFY_DONE; if (val != DIE_INT3) return NOTIFY_DONE; - if (regs->ip - INT3_INSN_SIZE != (unsigned long)&int3_selftest_ip) + if (regs->ip - INT3_INSN_SIZE != selftest) return NOTIFY_DONE; int3_emulate_call(regs, (unsigned long)&int3_magic); @@ -757,7 +761,9 @@ static noinline void __init int3_selftest(void) * INT3 padded with NOP to CALL_INSN_SIZE. The int3_exception_nb * notifier above will emulate CALL for us. */ - asm volatile ("int3_selftest_ip: int3; nop; nop; nop; nop\n\t" + asm volatile ("int3_selftest_ip:\n\t" + ANNOTATE_NOENDBR + " int3; nop; nop; nop; nop\n\t" : ASM_CALL_CONSTRAINT : __ASM_SEL_RAW(a, D) (&val) : "memory"); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 9b6fa760e1df..462cc1e18919 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -99,6 +99,7 @@ SYM_CODE_END(startup_64) SYM_CODE_START(secondary_startup_64) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR /* * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, * and someone has loaded a mapped page table. @@ -127,6 +128,7 @@ SYM_CODE_START(secondary_startup_64) */ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR /* * Retrieve the modifier (SME encryption mask if SME is active) to be @@ -192,6 +194,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) jmp *%rax 1: UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR // above /* * We must switch to a new descriptor in kernel space for the GDT @@ -299,6 +302,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) pushq %rax # target address in negative space lretq .Lafter_lret: + ANNOTATE_NOENDBR SYM_CODE_END(secondary_startup_64) #include "verify_cpu.S" diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 9ea0e3e79896..8ef933c03afa 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1033,6 +1033,7 @@ asm( ".type __kretprobe_trampoline, @function\n" "__kretprobe_trampoline:\n" #ifdef CONFIG_X86_64 + ANNOTATE_NOENDBR /* Push a fake return address to tell the unwinder it's a kretprobe. */ " pushq $__kretprobe_trampoline\n" UNWIND_HINT_FUNC diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 5b65f6ec5ee6..c1d8626c53b6 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -42,6 +42,7 @@ .code64 SYM_CODE_START_NOALIGN(relocate_kernel) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR /* * %rdi indirection_page * %rsi page_list @@ -223,6 +224,7 @@ SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR // RET target, above movq RSP(%r8), %rsp movq CR4(%r8), %rax movq %rax, %cr4 diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c index 520897061ee0..1e3de0769b81 100644 --- a/arch/x86/lib/error-inject.c +++ b/arch/x86/lib/error-inject.c @@ -3,6 +3,7 @@ #include #include #include +#include asmlinkage void just_return_func(void); @@ -11,6 +12,7 @@ asm( ".type just_return_func, @function\n" ".globl just_return_func\n" "just_return_func:\n" + ANNOTATE_NOENDBR ASM_RET ".size just_return_func, .-just_return_func\n" ); diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index afbdda539b80..5f87bab4fb8d 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -55,6 +55,7 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) .align RETPOLINE_THUNK_SIZE SYM_CODE_START(__x86_indirect_thunk_array) + ANNOTATE_NOENDBR // apply_retpolines #define GEN(reg) THUNK reg #include -- cgit v1.2.3 From 3215de84c06d747bb748b98945add83e3ec8a6e2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:41 +0100 Subject: x86/ibt,ftrace: Annotate ftrace code patching These are code patching sites, not indirect targets. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154318.936599479@infradead.org --- arch/x86/kernel/ftrace_64.S | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index e32b5cd6dc15..4ec13608d3c6 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -145,6 +145,7 @@ SYM_FUNC_START(ftrace_caller) movq %rcx, RSP(%rsp) SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL) + ANNOTATE_NOENDBR /* Load the ftrace_ops into the 3rd parameter */ movq function_trace_op(%rip), %rdx @@ -155,6 +156,7 @@ SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL) movq $0, CS(%rsp) SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) + ANNOTATE_NOENDBR call ftrace_stub /* Handlers can change the RIP */ @@ -169,6 +171,7 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) * layout here. */ SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL) + ANNOTATE_NOENDBR jmp ftrace_epilogue SYM_FUNC_END(ftrace_caller); @@ -192,6 +195,7 @@ SYM_FUNC_START(ftrace_regs_caller) /* save_mcount_regs fills in first two parameters */ SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL) + ANNOTATE_NOENDBR /* Load the ftrace_ops into the 3rd parameter */ movq function_trace_op(%rip), %rdx @@ -221,6 +225,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL) leaq (%rsp), %rcx SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) + ANNOTATE_NOENDBR call ftrace_stub /* Copy flags back to SS, to restore them */ @@ -248,6 +253,7 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) */ testq %rax, %rax SYM_INNER_LABEL(ftrace_regs_caller_jmp, SYM_L_GLOBAL) + ANNOTATE_NOENDBR jnz 1f restore_mcount_regs @@ -261,6 +267,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_jmp, SYM_L_GLOBAL) * to the return. */ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) + ANNOTATE_NOENDBR jmp ftrace_epilogue /* Swap the flags with orig_rax */ -- cgit v1.2.3 From e8d61bdf0fdfaeaf35fb5a63d6e67e60038b88e0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:42 +0100 Subject: x86/ibt,sev: Annotations No IBT on AMD so far.. probably correct, who knows. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154318.995109889@infradead.org --- arch/x86/entry/entry_64.S | 1 + arch/x86/entry/entry_64_compat.S | 1 + arch/x86/kernel/head_64.S | 2 ++ 3 files changed, 4 insertions(+) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d76f14f581f7..6e5399104abd 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -95,6 +95,7 @@ SYM_CODE_START(entry_SYSCALL_64) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR /* Construct struct pt_regs on stack */ pushq $__USER_DS /* pt_regs->ss */ diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 74208a1236b3..4fdb007cddbd 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -214,6 +214,7 @@ SYM_CODE_START(entry_SYSCALL_compat) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR /* Construct struct pt_regs on stack */ pushq $__USER32_DS /* pt_regs->ss */ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 462cc1e18919..b8e3019547a5 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -332,6 +332,7 @@ SYM_CODE_END(start_cpu0) */ SYM_CODE_START_NOALIGN(vc_boot_ghcb) UNWIND_HINT_IRET_REGS offset=8 + ENDBR /* Build pt_regs */ PUSH_AND_CLEAR_REGS @@ -439,6 +440,7 @@ SYM_CODE_END(early_idt_handler_common) */ SYM_CODE_START_NOALIGN(vc_no_ghcb) UNWIND_HINT_IRET_REGS offset=8 + ENDBR /* Build pt_regs */ PUSH_AND_CLEAR_REGS -- cgit v1.2.3 From 2b6ff7dea670a4623fae1d2349806fc7f8e305d1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:43 +0100 Subject: x86/ibt: Dont generate ENDBR in .discard.text Having ENDBR in discarded sections can easily lead to relocations into discarded sections which the linkers aren't really fond of. Objtool also shouldn't generate them, but why tempt fate. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154319.054842742@infradead.org --- arch/x86/include/asm/setup.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index a12458a7a8d4..896e48d45828 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -8,6 +8,7 @@ #include #include +#include #ifdef __i386__ @@ -119,7 +120,7 @@ void *extend_brk(size_t size, size_t align); * executable.) */ #define RESERVE_BRK(name,sz) \ - static void __section(".discard.text") __used notrace \ + static void __section(".discard.text") __noendbr __used notrace \ __brk_reservation_fn_##name##__(void) { \ asm volatile ( \ ".pushsection .brk_reservation,\"aw\",@nobits;" \ -- cgit v1.2.3 From cb9010f87dcbcdbb51cc96b922c6260848cecbd1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:44 +0100 Subject: x86/ibt: Ensure module init/exit points have references Since the references to the module init/exit points only have external references, a module LTO run will consider them 'unused' and seal them, leading to an immediate fail on module load. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154319.113767246@infradead.org --- include/linux/cfi.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/include/linux/cfi.h b/include/linux/cfi.h index 879744aaa6e0..c6dfc1ed0626 100644 --- a/include/linux/cfi.h +++ b/include/linux/cfi.h @@ -34,8 +34,17 @@ static inline void cfi_module_remove(struct module *mod, unsigned long base_addr #else /* !CONFIG_CFI_CLANG */ -#define __CFI_ADDRESSABLE(fn, __attr) +#ifdef CONFIG_X86_KERNEL_IBT + +#define __CFI_ADDRESSABLE(fn, __attr) \ + const void *__cfi_jt_ ## fn __visible __attr = (void *)&fn + +#endif /* CONFIG_X86_KERNEL_IBT */ #endif /* CONFIG_CFI_CLANG */ +#ifndef __CFI_ADDRESSABLE +#define __CFI_ADDRESSABLE(fn, __attr) +#endif + #endif /* _LINUX_CFI_H */ -- cgit v1.2.3 From 53f7109ef957315ab53205ba3a3f4f48874c0428 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:45 +0100 Subject: objtool: Rename --duplicate to --lto In order to prepare for LTO like objtool runs for modules, rename the duplicate argument to lto. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154319.172584233@infradead.org --- scripts/link-vmlinux.sh | 2 +- tools/objtool/builtin-check.c | 4 ++-- tools/objtool/check.c | 7 ++++++- tools/objtool/include/objtool/builtin.h | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 666f7bbc13eb..9b08dca26f99 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -115,7 +115,7 @@ objtool_link() objtoolcmd="orc generate" fi - objtoolopt="${objtoolopt} --duplicate" + objtoolopt="${objtoolopt} --lto" if is_enabled CONFIG_FTRACE_MCOUNT_USE_OBJTOOL; then objtoolopt="${objtoolopt} --mcount" diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 853af934c9fd..5c2fcaa2c260 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -20,7 +20,7 @@ #include bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, - validate_dup, vmlinux, mcount, noinstr, backup, sls, dryrun; + lto, vmlinux, mcount, noinstr, backup, sls, dryrun; static const char * const check_usage[] = { "objtool check [] file.o", @@ -40,7 +40,7 @@ const struct option check_options[] = { OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"), OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"), OPT_BOOLEAN('s', "stats", &stats, "print statistics"), - OPT_BOOLEAN('d', "duplicate", &validate_dup, "duplicate validation for vmlinux.o"), + OPT_BOOLEAN(0, "lto", <o, "whole-archive like runs"), OPT_BOOLEAN('n', "noinstr", &noinstr, "noinstr validation for vmlinux.o"), OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"), OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"), diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 311bfc6922c1..ae1d4f996803 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3499,6 +3499,11 @@ int check(struct objtool_file *file) { int ret, warnings = 0; + if (lto && !(vmlinux || module)) { + fprintf(stderr, "--lto requires: --vmlinux or --module\n"); + return 1; + } + arch_initial_func_cfi_state(&initial_func_cfi); init_cfi_state(&init_cfi); init_cfi_state(&func_cfi); @@ -3519,7 +3524,7 @@ int check(struct objtool_file *file) if (list_empty(&file->insn_list)) goto out; - if (vmlinux && !validate_dup) { + if (vmlinux && !lto) { ret = validate_vmlinux_functions(file); if (ret < 0) goto out; diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 7b4b124b9032..0cbe739ab0c8 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -9,7 +9,7 @@ extern const struct option check_options[]; extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, - validate_dup, vmlinux, mcount, noinstr, backup, sls, dryrun; + lto, vmlinux, mcount, noinstr, backup, sls, dryrun; extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]); -- cgit v1.2.3 From 4adb23686795e9c88e3217b5d7b4524c0da9d04f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:46 +0100 Subject: objtool: Ignore extra-symbol code There's a fun implementation detail on linking STB_WEAK symbols. When the linker combines two translation units, where one contains a weak function and the other an override for it. It simply strips the STB_WEAK symbol from the symbol table, but doesn't actually remove the code. The result is that when objtool is ran in a whole-archive kind of way, it will encounter *heaps* of unused (and unreferenced) code. All rudiments of weak functions. Additionally, when a weak implementation is split into a .cold subfunction that .cold symbol is left in place, even though completely unused. Teach objtool to ignore such rudiments by searching for symbol holes; that is, code ranges that fall outside the given symbol bounds. Specifically, ignore a sequence of unreachable instruction iff they occupy a single hole, additionally ignore any .cold subfunctions referenced. Both ld.bfd and ld.lld behave like this. LTO builds otoh can (and do) properly DCE weak functions. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154319.232019347@infradead.org --- tools/objtool/check.c | 43 ++++++++++++++++++++++++++ tools/objtool/elf.c | 60 +++++++++++++++++++++++++++++++++++++ tools/objtool/include/objtool/elf.h | 1 + 3 files changed, 104 insertions(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index ae1d4f996803..0e0e5b5a72c8 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3346,6 +3346,49 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio !strcmp(insn->sec->name, ".altinstr_aux")) return true; + /* + * Whole archive runs might encounder dead code from weak symbols. + * This is where the linker will have dropped the weak symbol in + * favour of a regular symbol, but leaves the code in place. + * + * In this case we'll find a piece of code (whole function) that is not + * covered by a !section symbol. Ignore them. + */ + if (!insn->func && lto) { + int size = find_symbol_hole_containing(insn->sec, insn->offset); + unsigned long end = insn->offset + size; + + if (!size) /* not a hole */ + return false; + + if (size < 0) /* hole until the end */ + return true; + + sec_for_each_insn_continue(file, insn) { + /* + * If we reach a visited instruction at or before the + * end of the hole, ignore the unreachable. + */ + if (insn->visited) + return true; + + if (insn->offset >= end) + break; + + /* + * If this hole jumps to a .cold function, mark it ignore too. + */ + if (insn->jump_dest && insn->jump_dest->func && + strstr(insn->jump_dest->func->name, ".cold")) { + struct instruction *dest = insn->jump_dest; + func_for_each_insn(file, dest->func, dest) + dest->ignore = true; + } + } + + return false; + } + if (!insn->func) return false; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 456ac2206404..d7b99a737496 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -83,6 +83,31 @@ static int symbol_by_offset(const void *key, const struct rb_node *node) return 0; } +struct symbol_hole { + unsigned long key; + const struct symbol *sym; +}; + +/* + * Find !section symbol where @offset is after it. + */ +static int symbol_hole_by_offset(const void *key, const struct rb_node *node) +{ + const struct symbol *s = rb_entry(node, struct symbol, node); + struct symbol_hole *sh = (void *)key; + + if (sh->key < s->offset) + return -1; + + if (sh->key >= s->offset + s->len) { + if (s->type != STT_SECTION) + sh->sym = s; + return 1; + } + + return 0; +} + struct section *find_section_by_name(const struct elf *elf, const char *name) { struct section *sec; @@ -162,6 +187,41 @@ struct symbol *find_symbol_containing(const struct section *sec, unsigned long o return NULL; } +/* + * Returns size of hole starting at @offset. + */ +int find_symbol_hole_containing(const struct section *sec, unsigned long offset) +{ + struct symbol_hole hole = { + .key = offset, + .sym = NULL, + }; + struct rb_node *n; + struct symbol *s; + + /* + * Find the rightmost symbol for which @offset is after it. + */ + n = rb_find(&hole, &sec->symbol_tree, symbol_hole_by_offset); + + /* found a symbol that contains @offset */ + if (n) + return 0; /* not a hole */ + + /* didn't find a symbol for which @offset is after it */ + if (!hole.sym) + return 0; /* not a hole */ + + /* @offset >= sym->offset + sym->len, find symbol after it */ + n = rb_next(&hole.sym->node); + if (!n) + return -1; /* until end of address space */ + + /* hole until start of next symbol */ + s = rb_entry(n, struct symbol, node); + return s->offset - offset; +} + struct symbol *find_func_containing(struct section *sec, unsigned long offset) { struct rb_node *node; diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index d22336781401..22ba7e2b816e 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -152,6 +152,7 @@ struct symbol *find_func_by_offset(struct section *sec, unsigned long offset); struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); struct symbol *find_symbol_by_name(const struct elf *elf, const char *name); struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset); +int find_symbol_hole_containing(const struct section *sec, unsigned long offset); struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, unsigned long offset); struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec, unsigned long offset, unsigned int len); -- cgit v1.2.3 From f9cdf7ca57cada055f61ef6d0eb4db21c3f200db Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:47 +0100 Subject: x86: Mark stop_this_cpu() __noreturn vmlinux.o: warning: objtool: smp_stop_nmi_callback()+0x2b: unreachable instruction 0000 0000000000047cf0 : ... 0026 47d16: e8 00 00 00 00 call 47d1b 47d17: R_X86_64_PLT32 stop_this_cpu-0x4 002b 47d1b: b8 01 00 00 00 mov $0x1,%eax Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154319.290905453@infradead.org --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/process.c | 2 +- tools/objtool/check.c | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 2c5f12ae7d04..dd34100455d2 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -835,7 +835,7 @@ bool xen_set_default_idle(void); #define xen_set_default_idle 0 #endif -void stop_this_cpu(void *dummy); +void __noreturn stop_this_cpu(void *dummy); void microcode_check(void); enum l1tf_mitigations { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 81d8ef036637..a057a5c08618 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -747,7 +747,7 @@ bool xen_set_default_idle(void) } #endif -void stop_this_cpu(void *dummy) +void __noreturn stop_this_cpu(void *dummy) { local_irq_disable(); /* diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 0e0e5b5a72c8..c3ddcecdab57 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -181,6 +181,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "kunit_try_catch_throw", "xen_start_kernel", "cpu_bringup_and_idle", + "stop_this_cpu", }; if (!func) -- cgit v1.2.3 From eae654f1c21216daa9fbb92591c0d9f5ae46cfc5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:48 +0100 Subject: exit: Mark do_group_exit() __noreturn vmlinux.o: warning: objtool: get_signal()+0x108: unreachable instruction 0000 000000000007f930 : ... 0103 7fa33: e8 00 00 00 00 call 7fa38 7fa34: R_X86_64_PLT32 do_group_exit-0x4 0108 7fa38: 41 8b 45 74 mov 0x74(%r13),%eax Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154319.351270711@infradead.org --- include/linux/sched/task.h | 2 +- kernel/exit.c | 2 +- tools/objtool/check.c | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index e84e54d1b490..719c9a6cac8d 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -79,7 +79,7 @@ static inline void exit_thread(struct task_struct *tsk) { } #endif -extern void do_group_exit(int); +extern __noreturn void do_group_exit(int); extern void exit_files(struct task_struct *); extern void exit_itimers(struct signal_struct *); diff --git a/kernel/exit.c b/kernel/exit.c index b00a25bb4ab9..b71f9df9074e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -906,7 +906,7 @@ SYSCALL_DEFINE1(exit, int, error_code) * Take down every thread in the group. This is called by fatal signals * as well as by sys_exit_group (below). */ -void +void __noreturn do_group_exit(int exit_code) { struct signal_struct *sig = current->signal; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index c3ddcecdab57..9896562350a8 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -181,6 +181,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "kunit_try_catch_throw", "xen_start_kernel", "cpu_bringup_and_idle", + "do_group_exit", "stop_this_cpu", }; -- cgit v1.2.3 From 105cd68596392cfe15056a891b0723609dcad247 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Mar 2022 17:58:35 +0100 Subject: x86: Mark __invalid_creds() __noreturn vmlinux.o: warning: objtool: ksys_unshare()+0x36c: unreachable instruction 0000 0000000000067040 : ... 0364 673a4: 4c 89 ef mov %r13,%rdi 0367 673a7: e8 00 00 00 00 call 673ac 673a8: R_X86_64_PLT32 __invalid_creds-0x4 036c 673ac: e9 28 ff ff ff jmp 672d9 0371 673b1: 41 bc f4 ff ff ff mov $0xfffffff4,%r12d 0377 673b7: e9 80 fd ff ff jmp 6713c Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/Yi9gOW9f1GGwwUD6@hirez.programming.kicks-ass.net --- include/linux/cred.h | 2 +- kernel/cred.c | 2 +- tools/objtool/check.c | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/cred.h b/include/linux/cred.h index fcbc6885cc09..9ed9232af934 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -176,7 +176,7 @@ extern int set_cred_ucounts(struct cred *); * check for validity of credentials */ #ifdef CONFIG_DEBUG_CREDENTIALS -extern void __invalid_creds(const struct cred *, const char *, unsigned); +extern void __noreturn __invalid_creds(const struct cred *, const char *, unsigned); extern void __validate_process_creds(struct task_struct *, const char *, unsigned); diff --git a/kernel/cred.c b/kernel/cred.c index 933155c96922..e10c15f51c1f 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -870,7 +870,7 @@ static void dump_invalid_creds(const struct cred *cred, const char *label, /* * report use of invalid credentials */ -void __invalid_creds(const struct cred *cred, const char *file, unsigned line) +void __noreturn __invalid_creds(const struct cred *cred, const char *file, unsigned line) { printk(KERN_ERR "CRED: Invalid credentials\n"); printk(KERN_ERR "CRED: At %s:%u\n", file, line); diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 9896562350a8..0c857e74c852 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -183,6 +183,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "cpu_bringup_and_idle", "do_group_exit", "stop_this_cpu", + "__invalid_creds", }; if (!func) -- cgit v1.2.3 From 0e5b613b4d4be3345dda349fb90dd73d2103302f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:49 +0100 Subject: objtool: Rework ASM_REACHABLE Currently ASM_REACHABLE only works for UD2 instructions; reorder things to also allow over-riding dead_end_function(). To that end: - Mark INSN_BUG instructions in decode_instructions(), this saves having to iterate all instructions yet again. - Have add_call_destinations() set insn->dead_end for dead_end_function() calls. - Move add_dead_ends() *after* add_call_destinations() such that ASM_REACHABLE can clear the ->dead_end mark. - have validate_branch() only check ->dead_end. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154319.410010807@infradead.org --- tools/objtool/check.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 0c857e74c852..894c9a722555 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -395,6 +395,14 @@ static int decode_instructions(struct objtool_file *file) if (ret) goto err; + /* + * By default, "ud2" is a dead end unless otherwise + * annotated, because GCC 7 inserts it for certain + * divide-by-zero cases. + */ + if (insn->type == INSN_BUG) + insn->dead_end = true; + hash_add(file->insn_hash, &insn->hash, sec_offset_hash(sec, insn->offset)); list_add_tail(&insn->list, &file->insn_list); nr_insns++; @@ -523,14 +531,6 @@ static int add_dead_ends(struct objtool_file *file) struct reloc *reloc; struct instruction *insn; - /* - * By default, "ud2" is a dead end unless otherwise annotated, because - * GCC 7 inserts it for certain divide-by-zero cases. - */ - for_each_insn(file, insn) - if (insn->type == INSN_BUG) - insn->dead_end = true; - /* * Check for manually annotated dead ends. */ @@ -1114,6 +1114,9 @@ static void annotate_call_site(struct objtool_file *file, list_add_tail(&insn->call_node, &file->mcount_loc_list); return; } + + if (!sibling && dead_end_function(file, sym)) + insn->dead_end = true; } static void add_call_dest(struct objtool_file *file, struct instruction *insn, @@ -2089,10 +2092,6 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - ret = add_dead_ends(file); - if (ret) - return ret; - add_ignores(file); add_uaccess_safe(file); @@ -2131,6 +2130,14 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + /* + * Must be after add_call_destinations() such that it can override + * dead_end_function() marks. + */ + ret = add_dead_ends(file); + if (ret) + return ret; + ret = add_jump_table_alts(file); if (ret) return ret; @@ -3138,7 +3145,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 1; } - if (dead_end_function(file, insn->call_dest)) + if (insn->dead_end) return 0; break; -- cgit v1.2.3 From be0075951fde739f14ee2b659e2fd6e2499c46c0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:50 +0100 Subject: x86: Annotate call_on_stack() vmlinux.o: warning: objtool: page_fault_oops()+0x13c: unreachable instruction 0000 000000000005b460 : ... 0128 5b588: 49 89 23 mov %rsp,(%r11) 012b 5b58b: 4c 89 dc mov %r11,%rsp 012e 5b58e: 4c 89 f2 mov %r14,%rdx 0131 5b591: 48 89 ee mov %rbp,%rsi 0134 5b594: 4c 89 e7 mov %r12,%rdi 0137 5b597: e8 00 00 00 00 call 5b59c 5b598: R_X86_64_PLT32 handle_stack_overflow-0x4 013c 5b59c: 5c pop %rsp vmlinux.o: warning: objtool: sysvec_reboot()+0x6d: unreachable instruction 0000 00000000000033f0 : ... 005d 344d: 4c 89 dc mov %r11,%rsp 0060 3450: e8 00 00 00 00 call 3455 3451: R_X86_64_PLT32 irq_enter_rcu-0x4 0065 3455: 48 89 ef mov %rbp,%rdi 0068 3458: e8 00 00 00 00 call 345d 3459: R_X86_64_PC32 .text+0x47d0c 006d 345d: e8 00 00 00 00 call 3462 345e: R_X86_64_PLT32 irq_exit_rcu-0x4 0072 3462: 5c pop %rsp Both cases are due to a call_on_stack() calling a __noreturn function. Since that's an inline asm, GCC can't do anything about the instructions after the CALL. Therefore put in an explicit ASM_REACHABLE annotation to make sure objtool and gcc are consistently confused about control flow. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154319.468805622@infradead.org --- arch/x86/include/asm/irq_stack.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h index ae9d40f6c706..05af249d6bec 100644 --- a/arch/x86/include/asm/irq_stack.h +++ b/arch/x86/include/asm/irq_stack.h @@ -99,7 +99,8 @@ } #define ASM_CALL_ARG0 \ - "call %P[__func] \n" + "call %P[__func] \n" \ + ASM_REACHABLE #define ASM_CALL_ARG1 \ "movq %[arg1], %%rdi \n" \ -- cgit v1.2.3 From dca5da2abe406168b85f97e22109710ebe0bda08 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Mar 2022 18:05:52 +0100 Subject: x86,objtool: Move the ASM_REACHABLE annotation to objtool.h Because we need a variant for .S files too. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/Yi9gOW9f1GGwwUD6@hirez.programming.kicks-ass.net --- arch/x86/include/asm/bug.h | 1 + arch/x86/include/asm/irq_stack.h | 1 + include/linux/compiler.h | 7 ------- include/linux/objtool.h | 16 ++++++++++++++++ tools/include/linux/objtool.h | 16 ++++++++++++++++ 5 files changed, 34 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index bab883c0b6fe..4d20a293c6fd 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -4,6 +4,7 @@ #include #include +#include /* * Despite that some emulators terminate on UD2, we use it for WARN(). diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h index 05af249d6bec..63f818aedf77 100644 --- a/arch/x86/include/asm/irq_stack.h +++ b/arch/x86/include/asm/irq_stack.h @@ -3,6 +3,7 @@ #define _ASM_X86_IRQ_STACK_H #include +#include #include diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 0f7fd205ab7e..219aa5ddbc73 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -125,18 +125,11 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, }) #define annotate_unreachable() __annotate_unreachable(__COUNTER__) -#define ASM_REACHABLE \ - "998:\n\t" \ - ".pushsection .discard.reachable\n\t" \ - ".long 998b - .\n\t" \ - ".popsection\n\t" - /* Annotate a C jump table to allow objtool to follow the code flow */ #define __annotate_jump_table __section(".rodata..c_jump_table") #else #define annotate_unreachable() -# define ASM_REACHABLE #define __annotate_jump_table #endif diff --git a/include/linux/objtool.h b/include/linux/objtool.h index f797368820c8..586d35720f13 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -83,6 +83,12 @@ struct unwind_hint { _ASM_PTR " 986b\n\t" \ ".popsection\n\t" +#define ASM_REACHABLE \ + "998:\n\t" \ + ".pushsection .discard.reachable\n\t" \ + ".long 998b - .\n\t" \ + ".popsection\n\t" + #else /* __ASSEMBLY__ */ /* @@ -142,6 +148,13 @@ struct unwind_hint { .popsection .endm +.macro REACHABLE +.Lhere_\@: + .pushsection .discard.reachable + .long .Lhere_\@ - . + .popsection +.endm + #endif /* __ASSEMBLY__ */ #else /* !CONFIG_STACK_VALIDATION */ @@ -153,6 +166,7 @@ struct unwind_hint { #define STACK_FRAME_NON_STANDARD(func) #define STACK_FRAME_NON_STANDARD_FP(func) #define ANNOTATE_NOENDBR +#define ASM_REACHABLE #else #define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 @@ -161,6 +175,8 @@ struct unwind_hint { .endm .macro ANNOTATE_NOENDBR .endm +.macro REACHABLE +.endm #endif #endif /* CONFIG_STACK_VALIDATION */ diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h index f797368820c8..586d35720f13 100644 --- a/tools/include/linux/objtool.h +++ b/tools/include/linux/objtool.h @@ -83,6 +83,12 @@ struct unwind_hint { _ASM_PTR " 986b\n\t" \ ".popsection\n\t" +#define ASM_REACHABLE \ + "998:\n\t" \ + ".pushsection .discard.reachable\n\t" \ + ".long 998b - .\n\t" \ + ".popsection\n\t" + #else /* __ASSEMBLY__ */ /* @@ -142,6 +148,13 @@ struct unwind_hint { .popsection .endm +.macro REACHABLE +.Lhere_\@: + .pushsection .discard.reachable + .long .Lhere_\@ - . + .popsection +.endm + #endif /* __ASSEMBLY__ */ #else /* !CONFIG_STACK_VALIDATION */ @@ -153,6 +166,7 @@ struct unwind_hint { #define STACK_FRAME_NON_STANDARD(func) #define STACK_FRAME_NON_STANDARD_FP(func) #define ANNOTATE_NOENDBR +#define ASM_REACHABLE #else #define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 @@ -161,6 +175,8 @@ struct unwind_hint { .endm .macro ANNOTATE_NOENDBR .endm +.macro REACHABLE +.endm #endif #endif /* CONFIG_STACK_VALIDATION */ -- cgit v1.2.3 From 3515899bef545fc5b5f6b865e080bfe4c9a92a41 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Mar 2022 18:07:30 +0100 Subject: x86: Annotate idtentry_df() Without CONFIG_X86_ESPFIX64 exc_double_fault() is noreturn and objtool is clever enough to figure that out. vmlinux.o: warning: objtool: asm_exc_double_fault()+0x22: unreachable instruction 0000000000001260 : 1260: f3 0f 1e fa endbr64 1264: 90 nop 1265: 90 nop 1266: 90 nop 1267: e8 84 03 00 00 call 15f0 126c: 48 89 e7 mov %rsp,%rdi 126f: 48 8b 74 24 78 mov 0x78(%rsp),%rsi 1274: 48 c7 44 24 78 ff ff ff ff movq $0xffffffffffffffff,0x78(%rsp) 127d: e8 00 00 00 00 call 1282 127e: R_X86_64_PLT32 exc_double_fault-0x4 1282: e9 09 04 00 00 jmp 1690 Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/Yi9gOW9f1GGwwUD6@hirez.programming.kicks-ass.net --- arch/x86/entry/entry_64.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 6e5399104abd..4faac48ebec5 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -551,6 +551,9 @@ SYM_CODE_START(\asmsym) movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ call \cfunc + /* For some configurations \cfunc ends up being a noreturn. */ + REACHABLE + jmp paranoid_exit _ASM_NOKPROBE(\asmsym) -- cgit v1.2.3 From 96db4a988d653a7f18b518c25367f7bf238f4667 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:52 +0100 Subject: objtool: Read the NOENDBR annotation Read the new NOENDBR annotation. While there, attempt to not bloat struct instruction. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154319.586815435@infradead.org --- tools/objtool/check.c | 27 +++++++++++++++++++++++++++ tools/objtool/include/objtool/check.h | 13 ++++++++++--- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 894c9a722555..63993945ff9f 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1866,6 +1866,29 @@ static int read_unwind_hints(struct objtool_file *file) return 0; } +static int read_noendbr_hints(struct objtool_file *file) +{ + struct section *sec; + struct instruction *insn; + struct reloc *reloc; + + sec = find_section_by_name(file->elf, ".rela.discard.noendbr"); + if (!sec) + return 0; + + list_for_each_entry(reloc, &sec->reloc_list, list) { + insn = find_insn(file, reloc->sym->sec, reloc->sym->offset + reloc->addend); + if (!insn) { + WARN("bad .discard.noendbr entry"); + return -1; + } + + insn->noendbr = 1; + } + + return 0; +} + static int read_retpoline_hints(struct objtool_file *file) { struct section *sec; @@ -2099,6 +2122,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + ret = read_noendbr_hints(file); + if (ret) + return ret; + /* * Must be before add_{jump_call}_destination. */ diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index 6cfff078897f..f10d7374f388 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -45,11 +45,18 @@ struct instruction { unsigned int len; enum insn_type type; unsigned long immediate; - bool dead_end, ignore, ignore_alts; - bool hint; - bool retpoline_safe; + + u8 dead_end : 1, + ignore : 1, + ignore_alts : 1, + hint : 1, + retpoline_safe : 1, + noendbr : 1; + /* 2 bit hole */ s8 instr; u8 visited; + /* u8 hole */ + struct alt_group *alt_group; struct symbol *call_dest; struct instruction *jump_dest; -- cgit v1.2.3 From 7d209d13e7c3a3d60dc262f11a8ae4e6b4454d30 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:53 +0100 Subject: objtool: Add IBT/ENDBR decoding Intel IBT requires the target of any indirect CALL or JMP instruction to be the ENDBR instruction; optionally it allows those two instructions to have a NOTRACK prefix in order to avoid this requirement. The kernel will not enable the use of NOTRACK, as such any occurence of it in compiler generated code should be flagged. Teach objtool to Decode ENDBR instructions and WARN about NOTRACK prefixes. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154319.645963517@infradead.org --- tools/objtool/arch/x86/decode.c | 34 +++++++++++++++++++++++++++++----- tools/objtool/include/objtool/arch.h | 1 + 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 479e769ca324..943cb41cddf7 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -103,6 +103,18 @@ unsigned long arch_jump_destination(struct instruction *insn) #define rm_is_mem(reg) (mod_is_mem() && !is_RIP() && rm_is(reg)) #define rm_is_reg(reg) (mod_is_reg() && modrm_rm == (reg)) +static bool has_notrack_prefix(struct insn *insn) +{ + int i; + + for (i = 0; i < insn->prefixes.nbytes; i++) { + if (insn->prefixes.bytes[i] == 0x3e) + return true; + } + + return false; +} + int arch_decode_instruction(struct objtool_file *file, const struct section *sec, unsigned long offset, unsigned int maxlen, unsigned int *len, enum insn_type *type, @@ -112,7 +124,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec const struct elf *elf = file->elf; struct insn insn; int x86_64, ret; - unsigned char op1, op2, op3, + unsigned char op1, op2, op3, prefix, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0; @@ -137,6 +149,8 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec if (insn.vex_prefix.nbytes) return 0; + prefix = insn.prefixes.bytes[0]; + op1 = insn.opcode.bytes[0]; op2 = insn.opcode.bytes[1]; op3 = insn.opcode.bytes[2]; @@ -492,6 +506,12 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec /* nopl/nopw */ *type = INSN_NOP; + } else if (op2 == 0x1e) { + + if (prefix == 0xf3 && (modrm == 0xfa || modrm == 0xfb)) + *type = INSN_ENDBR; + + } else if (op2 == 0x38 && op3 == 0xf8) { if (insn.prefixes.nbytes == 1 && insn.prefixes.bytes[0] == 0xf2) { @@ -636,20 +656,24 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec break; case 0xff: - if (modrm_reg == 2 || modrm_reg == 3) + if (modrm_reg == 2 || modrm_reg == 3) { *type = INSN_CALL_DYNAMIC; + if (has_notrack_prefix(&insn)) + WARN("notrack prefix found at %s:0x%lx", sec->name, offset); - else if (modrm_reg == 4) + } else if (modrm_reg == 4) { *type = INSN_JUMP_DYNAMIC; + if (has_notrack_prefix(&insn)) + WARN("notrack prefix found at %s:0x%lx", sec->name, offset); - else if (modrm_reg == 5) + } else if (modrm_reg == 5) { /* jmpf */ *type = INSN_CONTEXT_SWITCH; - else if (modrm_reg == 6) { + } else if (modrm_reg == 6) { /* push from mem */ ADD_OP(op) { diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 76bae3078286..9b19cc304195 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -27,6 +27,7 @@ enum insn_type { INSN_STD, INSN_CLD, INSN_TRAP, + INSN_ENDBR, INSN_OTHER, }; -- cgit v1.2.3 From 08f87a93c8ec709698edba66a5167077181fc978 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:54 +0100 Subject: objtool: Validate IBT assumptions Intel IBT requires that every indirect JMP/CALL targets an ENDBR instructions, failing this #CP happens and we die. Similarly, all exception entries should be ENDBR. Find all code relocations and ensure they're either an ENDBR instruction or ANNOTATE_NOENDBR. For the exceptions look for UNWIND_HINT_IRET_REGS at sym+0 not being ENDBR. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154319.705110141@infradead.org --- tools/objtool/builtin-check.c | 4 +- tools/objtool/check.c | 210 +++++++++++++++++++++++++++++++- tools/objtool/include/objtool/builtin.h | 3 +- tools/objtool/include/objtool/objtool.h | 3 + 4 files changed, 215 insertions(+), 5 deletions(-) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 5c2fcaa2c260..fc6975ab8b06 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -20,7 +20,8 @@ #include bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, - lto, vmlinux, mcount, noinstr, backup, sls, dryrun; + lto, vmlinux, mcount, noinstr, backup, sls, dryrun, + ibt; static const char * const check_usage[] = { "objtool check [] file.o", @@ -47,6 +48,7 @@ const struct option check_options[] = { OPT_BOOLEAN('B', "backup", &backup, "create .orig files before modification"), OPT_BOOLEAN('S', "sls", &sls, "validate straight-line-speculation"), OPT_BOOLEAN(0, "dry-run", &dryrun, "don't write the modifications"), + OPT_BOOLEAN(0, "ibt", &ibt, "validate ENDBR placement"), OPT_END(), }; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 63993945ff9f..d4cf831edc28 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -418,8 +418,16 @@ static int decode_instructions(struct objtool_file *file) return -1; } - sym_for_each_insn(file, func, insn) + sym_for_each_insn(file, func, insn) { insn->func = func; + if (insn->type == INSN_ENDBR) { + if (insn->offset == insn->func->offset) { + file->nr_endbr++; + } else { + file->nr_endbr_int++; + } + } + } } } @@ -1171,6 +1179,19 @@ static void add_retpoline_call(struct objtool_file *file, struct instruction *in annotate_call_site(file, insn, false); } + +static bool same_function(struct instruction *insn1, struct instruction *insn2) +{ + return insn1->func->pfunc == insn2->func->pfunc; +} + +static bool is_first_func_insn(struct instruction *insn) +{ + return insn->offset == insn->func->offset || + (insn->type == INSN_ENDBR && + insn->offset == insn->func->offset + insn->len); +} + /* * Find the destination instructions for all jumps. */ @@ -1251,8 +1272,8 @@ static int add_jump_destinations(struct objtool_file *file) insn->func->cfunc = insn->jump_dest->func; insn->jump_dest->func->pfunc = insn->func; - } else if (insn->jump_dest->func->pfunc != insn->func->pfunc && - insn->jump_dest->offset == insn->jump_dest->func->offset) { + } else if (!same_function(insn, insn->jump_dest) && + is_first_func_insn(insn->jump_dest)) { /* internal sibling call (without reloc) */ add_call_dest(file, insn, insn->jump_dest->func, true); } @@ -1842,6 +1863,16 @@ static int read_unwind_hints(struct objtool_file *file) insn->hint = true; + if (ibt && hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) { + struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset); + + if (sym && sym->bind == STB_GLOBAL && + insn->type != INSN_ENDBR && !insn->noendbr) { + WARN_FUNC("UNWIND_HINT_IRET_REGS without ENDBR", + insn->sec, insn->offset); + } + } + if (hint->type == UNWIND_HINT_TYPE_FUNC) { insn->cfi = &func_cfi; continue; @@ -1883,6 +1914,9 @@ static int read_noendbr_hints(struct objtool_file *file) return -1; } + if (insn->type == INSN_ENDBR) + WARN_FUNC("ANNOTATE_NOENDBR on ENDBR", insn->sec, insn->offset); + insn->noendbr = 1; } @@ -2122,6 +2156,9 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + /* + * Must be before read_unwind_hints() since that needs insn->noendbr. + */ ret = read_noendbr_hints(file); if (ret) return ret; @@ -3063,6 +3100,111 @@ static struct instruction *next_insn_to_validate(struct objtool_file *file, return next_insn_same_sec(file, insn); } +static struct instruction * +validate_ibt_reloc(struct objtool_file *file, struct reloc *reloc) +{ + struct instruction *dest; + struct section *sec; + unsigned long off; + + sec = reloc->sym->sec; + off = reloc->sym->offset; + + if ((reloc->sec->base->sh.sh_flags & SHF_EXECINSTR) && + (reloc->type == R_X86_64_PC32 || reloc->type == R_X86_64_PLT32)) + off += arch_dest_reloc_offset(reloc->addend); + else + off += reloc->addend; + + dest = find_insn(file, sec, off); + if (!dest) + return NULL; + + if (dest->type == INSN_ENDBR) + return NULL; + + if (reloc->sym->static_call_tramp) + return NULL; + + return dest; +} + +static void warn_noendbr(const char *msg, struct section *sec, unsigned long offset, + struct instruction *dest) +{ + WARN_FUNC("%srelocation to !ENDBR: %s+0x%lx", sec, offset, msg, + dest->func ? dest->func->name : dest->sec->name, + dest->func ? dest->offset - dest->func->offset : dest->offset); +} + +static void validate_ibt_dest(struct objtool_file *file, struct instruction *insn, + struct instruction *dest) +{ + if (dest->func && dest->func == insn->func) { + /* + * Anything from->to self is either _THIS_IP_ or IRET-to-self. + * + * There is no sane way to annotate _THIS_IP_ since the compiler treats the + * relocation as a constant and is happy to fold in offsets, skewing any + * annotation we do, leading to vast amounts of false-positives. + * + * There's also compiler generated _THIS_IP_ through KCOV and + * such which we have no hope of annotating. + * + * As such, blanket accept self-references without issue. + */ + return; + } + + if (dest->noendbr) + return; + + warn_noendbr("", insn->sec, insn->offset, dest); +} + +static void validate_ibt_insn(struct objtool_file *file, struct instruction *insn) +{ + struct instruction *dest; + struct reloc *reloc; + + switch (insn->type) { + case INSN_CALL: + case INSN_CALL_DYNAMIC: + case INSN_JUMP_CONDITIONAL: + case INSN_JUMP_UNCONDITIONAL: + case INSN_JUMP_DYNAMIC: + case INSN_JUMP_DYNAMIC_CONDITIONAL: + case INSN_RETURN: + /* + * We're looking for code references setting up indirect code + * flow. As such, ignore direct code flow and the actual + * dynamic branches. + */ + return; + + case INSN_NOP: + /* + * handle_group_alt() will create INSN_NOP instruction that + * don't belong to any section, ignore all NOP since they won't + * carry a (useful) relocation anyway. + */ + return; + + default: + break; + } + + for (reloc = insn_reloc(file, insn); + reloc; + reloc = find_reloc_by_dest_range(file->elf, insn->sec, + reloc->offset + 1, + (insn->offset + insn->len) - (reloc->offset + 1))) { + dest = validate_ibt_reloc(file, reloc); + if (dest) + validate_ibt_dest(file, insn, dest); + } +} + /* * Follow the branch starting at the given instruction, and recursively follow * any other branches (jumps). Meanwhile, track the frame pointer state at @@ -3272,6 +3414,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; } + if (ibt) + validate_ibt_insn(file, insn); + if (insn->dead_end) return 0; @@ -3557,6 +3702,53 @@ static int validate_functions(struct objtool_file *file) return warnings; } +static int validate_ibt(struct objtool_file *file) +{ + struct section *sec; + struct reloc *reloc; + + for_each_sec(file, sec) { + bool is_data; + + /* already done in validate_branch() */ + if (sec->sh.sh_flags & SHF_EXECINSTR) + continue; + + if (!sec->reloc) + continue; + + if (!strncmp(sec->name, ".orc", 4)) + continue; + + if (!strncmp(sec->name, ".discard", 8)) + continue; + + if (!strncmp(sec->name, ".debug", 6)) + continue; + + if (!strcmp(sec->name, "_error_injection_whitelist")) + continue; + + if (!strcmp(sec->name, "_kprobe_blacklist")) + continue; + + is_data = strstr(sec->name, ".data") || strstr(sec->name, ".rodata"); + + list_for_each_entry(reloc, &sec->reloc->reloc_list, list) { + struct instruction *dest; + + dest = validate_ibt_reloc(file, reloc); + if (is_data && dest && !dest->noendbr) { + warn_noendbr("data ", reloc->sym->sec, + reloc->sym->offset + reloc->addend, + dest); + } + } + } + + return 0; +} + static int validate_reachable_instructions(struct objtool_file *file) { struct instruction *insn; @@ -3584,6 +3776,11 @@ int check(struct objtool_file *file) return 1; } + if (ibt && !lto) { + fprintf(stderr, "--ibt requires: --lto\n"); + return 1; + } + arch_initial_func_cfi_state(&initial_func_cfi); init_cfi_state(&init_cfi); init_cfi_state(&func_cfi); @@ -3630,6 +3827,13 @@ int check(struct objtool_file *file) goto out; warnings += ret; + if (ibt) { + ret = validate_ibt(file); + if (ret < 0) + goto out; + warnings += ret; + } + if (!warnings) { ret = validate_reachable_instructions(file); if (ret < 0) diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 0cbe739ab0c8..c39dbfaef6dc 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -9,7 +9,8 @@ extern const struct option check_options[]; extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, - lto, vmlinux, mcount, noinstr, backup, sls, dryrun; + lto, vmlinux, mcount, noinstr, backup, sls, dryrun, + ibt; extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]); diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h index f99fbc6078d5..fa3c7fa1ca9c 100644 --- a/tools/objtool/include/objtool/objtool.h +++ b/tools/objtool/include/objtool/objtool.h @@ -28,6 +28,9 @@ struct objtool_file { struct list_head mcount_loc_list; bool ignore_unreachables, c_file, hints, rodata; + unsigned int nr_endbr; + unsigned int nr_endbr_int; + unsigned long jl_short, jl_long; unsigned long jl_nop_short, jl_nop_long; -- cgit v1.2.3 From 89bc853eae4ad125030ef99f207ba76c2f00a26e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:55 +0100 Subject: objtool: Find unused ENDBR instructions Find all ENDBR instructions which are never referenced and stick them in a section such that the kernel can poison them, sealing the functions from ever being an indirect call target. This removes about 1-in-4 ENDBR instructions. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154319.763643193@infradead.org --- arch/x86/kernel/vmlinux.lds.S | 9 +++++ tools/objtool/check.c | 69 ++++++++++++++++++++++++++++++++- tools/objtool/include/objtool/objtool.h | 1 + tools/objtool/objtool.c | 1 + 4 files changed, 78 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 27f830345b6f..7fda7f27e762 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -285,6 +285,15 @@ SECTIONS } #endif +#ifdef CONFIG_X86_KERNEL_IBT + . = ALIGN(8); + .ibt_endbr_seal : AT(ADDR(.ibt_endbr_seal) - LOAD_OFFSET) { + __ibt_endbr_seal = .; + *(.ibt_endbr_seal) + __ibt_endbr_seal_end = .; + } +#endif + /* * struct alt_inst entries. From the header (alternative.h): * "Alternative instructions for different CPU types or capabilities" diff --git a/tools/objtool/check.c b/tools/objtool/check.c index d4cf831edc28..6de5085e3e5a 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -383,6 +383,7 @@ static int decode_instructions(struct objtool_file *file) memset(insn, 0, sizeof(*insn)); INIT_LIST_HEAD(&insn->alts); INIT_LIST_HEAD(&insn->stack_ops); + INIT_LIST_HEAD(&insn->call_node); insn->sec = sec; insn->offset = offset; @@ -420,8 +421,9 @@ static int decode_instructions(struct objtool_file *file) sym_for_each_insn(file, func, insn) { insn->func = func; - if (insn->type == INSN_ENDBR) { + if (insn->type == INSN_ENDBR && list_empty(&insn->call_node)) { if (insn->offset == insn->func->offset) { + list_add_tail(&insn->call_node, &file->endbr_list); file->nr_endbr++; } else { file->nr_endbr_int++; @@ -742,6 +744,58 @@ static int create_retpoline_sites_sections(struct objtool_file *file) return 0; } +static int create_ibt_endbr_seal_sections(struct objtool_file *file) +{ + struct instruction *insn; + struct section *sec; + int idx; + + sec = find_section_by_name(file->elf, ".ibt_endbr_seal"); + if (sec) { + WARN("file already has .ibt_endbr_seal, skipping"); + return 0; + } + + idx = 0; + list_for_each_entry(insn, &file->endbr_list, call_node) + idx++; + + if (stats) { + printf("ibt: ENDBR at function start: %d\n", file->nr_endbr); + printf("ibt: ENDBR inside functions: %d\n", file->nr_endbr_int); + printf("ibt: superfluous ENDBR: %d\n", idx); + } + + if (!idx) + return 0; + + sec = elf_create_section(file->elf, ".ibt_endbr_seal", 0, + sizeof(int), idx); + if (!sec) { + WARN("elf_create_section: .ibt_endbr_seal"); + return -1; + } + + idx = 0; + list_for_each_entry(insn, &file->endbr_list, call_node) { + + int *site = (int *)sec->data->d_buf + idx; + *site = 0; + + if (elf_add_reloc_to_insn(file->elf, sec, + idx * sizeof(int), + R_X86_64_PC32, + insn->sec, insn->offset)) { + WARN("elf_add_reloc_to_insn: .ibt_endbr_seal"); + return -1; + } + + idx++; + } + + return 0; +} + static int create_mcount_loc_sections(struct objtool_file *file) { struct section *sec; @@ -3120,8 +3174,12 @@ validate_ibt_reloc(struct objtool_file *file, struct reloc *reloc) if (!dest) return NULL; - if (dest->type == INSN_ENDBR) + if (dest->type == INSN_ENDBR) { + if (!list_empty(&dest->call_node)) + list_del_init(&dest->call_node); + return NULL; + } if (reloc->sym->static_call_tramp) return NULL; @@ -3860,6 +3918,13 @@ int check(struct objtool_file *file) warnings += ret; } + if (ibt) { + ret = create_ibt_endbr_seal_sections(file); + if (ret < 0) + goto out; + warnings += ret; + } + if (stats) { printf("nr_insns_visited: %ld\n", nr_insns_visited); printf("nr_cfi: %ld\n", nr_cfi); diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h index fa3c7fa1ca9c..7a5c13a78f87 100644 --- a/tools/objtool/include/objtool/objtool.h +++ b/tools/objtool/include/objtool/objtool.h @@ -26,6 +26,7 @@ struct objtool_file { struct list_head retpoline_call_list; struct list_head static_call_list; struct list_head mcount_loc_list; + struct list_head endbr_list; bool ignore_unreachables, c_file, hints, rodata; unsigned int nr_endbr; diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index bdf699f6552b..b09946f4e1d6 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -128,6 +128,7 @@ struct objtool_file *objtool_open_read(const char *_objname) INIT_LIST_HEAD(&file.retpoline_call_list); INIT_LIST_HEAD(&file.static_call_list); INIT_LIST_HEAD(&file.mcount_loc_list); + INIT_LIST_HEAD(&file.endbr_list); file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment"); file.ignore_unreachables = no_unreachable; file.hints = false; -- cgit v1.2.3 From ed53a0d971926e484d86cce617ec02a7ee85c3fe Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:56 +0100 Subject: x86/alternative: Use .ibt_endbr_seal to seal indirect calls Objtool's --ibt option generates .ibt_endbr_seal which lists superfluous ENDBR instructions. That is those instructions for which the function is never indirectly called. Overwrite these ENDBR instructions with a NOP4 such that these function can never be indirect called, reducing the number of viable ENDBR targets in the kernel. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154319.822545231@infradead.org --- arch/um/kernel/um_arch.c | 4 ++++ arch/x86/Kconfig | 9 +++++++- arch/x86/include/asm/alternative.h | 1 + arch/x86/include/asm/ibt.h | 12 ++++++++++ arch/x86/kernel/alternative.c | 39 +++++++++++++++++++++++++++++++ arch/x86/kernel/module.c | 8 ++++++- scripts/Makefile.build | 47 ++++++++++++++++++++++++++++++-------- scripts/link-vmlinux.sh | 10 ++++++-- 8 files changed, 117 insertions(+), 13 deletions(-) diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index abceeabe29b9..0760e24f2eba 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -424,6 +424,10 @@ void __init check_bugs(void) os_check_bugs(); } +void apply_ibt_endbr(s32 *start, s32 *end) +{ +} + void apply_retpolines(s32 *start, s32 *end) { } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 19d16c054a96..870e0d10452d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1873,7 +1873,7 @@ config CC_HAS_IBT config X86_KERNEL_IBT prompt "Indirect Branch Tracking" bool - depends on X86_64 && CC_HAS_IBT + depends on X86_64 && CC_HAS_IBT && STACK_VALIDATION help Build the kernel with support for Indirect Branch Tracking, a hardware support course-grain forward-edge Control Flow Integrity @@ -1881,6 +1881,13 @@ config X86_KERNEL_IBT an ENDBR instruction, as such, the compiler will instrument the code with them to make this happen. + In addition to building the kernel with IBT, seal all functions that + are not indirect call targets, avoiding them ever becomming one. + + This requires LTO like objtool runs and will slow down the build. It + does significantly reduce the number of ENDBR instructions in the + kernel image. + config X86_INTEL_MEMORY_PROTECTION_KEYS prompt "Memory Protection Keys" def_bool y diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 58eee6402832..9b10c8c76087 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -76,6 +76,7 @@ extern int alternatives_patched; extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); extern void apply_retpolines(s32 *start, s32 *end); +extern void apply_ibt_endbr(s32 *start, s32 *end); struct module; diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h index 52fb05d66489..689880eca9ba 100644 --- a/arch/x86/include/asm/ibt.h +++ b/arch/x86/include/asm/ibt.h @@ -46,8 +46,20 @@ static inline __attribute_const__ u32 gen_endbr(void) return endbr; } +static inline __attribute_const__ u32 gen_endbr_poison(void) +{ + /* + * 4 byte NOP that isn't NOP4 (in fact it is OSP NOP3), such that it + * will be unique to (former) ENDBR sites. + */ + return 0x001f0f66; /* osp nopl (%rax) */ +} + static inline bool is_endbr(u32 val) { + if (val == gen_endbr_poison()) + return true; + val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */ return val == gen_endbr(); } diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 954d39c15724..a79196fd364f 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -115,6 +115,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) } extern s32 __retpoline_sites[], __retpoline_sites_end[]; +extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[]; extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; void text_poke_early(void *addr, const void *opcode, size_t len); @@ -512,6 +513,42 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } #endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */ +#ifdef CONFIG_X86_KERNEL_IBT + +/* + * Generated by: objtool --ibt + */ +void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + u32 endbr, poison = gen_endbr_poison(); + void *addr = (void *)s + *s; + + if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr))) + continue; + + if (WARN_ON_ONCE(!is_endbr(endbr))) + continue; + + DPRINTK("ENDBR at: %pS (%px)", addr, addr); + + /* + * When we have IBT, the lack of ENDBR will trigger #CP + */ + DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr); + DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr); + text_poke_early(addr, &poison, 4); + } +} + +#else + +void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) { } + +#endif /* CONFIG_X86_KERNEL_IBT */ + #ifdef CONFIG_SMP static void alternatives_smp_lock(const s32 *start, const s32 *end, u8 *text, u8 *text_end) @@ -830,6 +867,8 @@ void __init alternative_instructions(void) */ apply_alternatives(__alt_instructions, __alt_instructions_end); + apply_ibt_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); + #ifdef CONFIG_SMP /* Patch to UP if other cpus not imminent. */ if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) { diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 96d7c27b7093..58bafbd19b1d 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -253,7 +253,7 @@ int module_finalize(const Elf_Ehdr *hdr, { const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, *para = NULL, *orc = NULL, *orc_ip = NULL, - *retpolines = NULL; + *retpolines = NULL, *ibt_endbr = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { @@ -271,6 +271,8 @@ int module_finalize(const Elf_Ehdr *hdr, orc_ip = s; if (!strcmp(".retpoline_sites", secstrings + s->sh_name)) retpolines = s; + if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name)) + ibt_endbr = s; } /* @@ -290,6 +292,10 @@ int module_finalize(const Elf_Ehdr *hdr, void *aseg = (void *)alt->sh_addr; apply_alternatives(aseg, aseg + alt->sh_size); } + if (ibt_endbr) { + void *iseg = (void *)ibt_endbr->sh_addr; + apply_ibt_endbr(iseg, iseg + ibt_endbr->sh_size); + } if (locks && text) { void *lseg = (void *)locks->sh_addr; void *tseg = (void *)text->sh_addr; diff --git a/scripts/Makefile.build b/scripts/Makefile.build index a4b89b757287..926d2549a59c 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -86,12 +86,18 @@ ifdef need-builtin targets-for-builtin += $(obj)/built-in.a endif -targets-for-modules := $(patsubst %.o, %.mod, $(filter %.o, $(obj-m))) +targets-for-modules := ifdef CONFIG_LTO_CLANG targets-for-modules += $(patsubst %.o, %.lto.o, $(filter %.o, $(obj-m))) endif +ifdef CONFIG_X86_KERNEL_IBT +targets-for-modules += $(patsubst %.o, %.objtool, $(filter %.o, $(obj-m))) +endif + +targets-for-modules += $(patsubst %.o, %.mod, $(filter %.o, $(obj-m))) + ifdef need-modorder targets-for-modules += $(obj)/modules.order endif @@ -230,6 +236,7 @@ objtool := $(objtree)/tools/objtool/objtool objtool_args = \ $(if $(CONFIG_UNWINDER_ORC),orc generate,check) \ $(if $(part-of-module), --module) \ + $(if $(CONFIG_X86_KERNEL_IBT), --lto --ibt) \ $(if $(CONFIG_FRAME_POINTER),, --no-fp) \ $(if $(CONFIG_GCOV_KERNEL)$(CONFIG_LTO_CLANG), --no-unreachable)\ $(if $(CONFIG_RETPOLINE), --retpoline) \ @@ -237,8 +244,8 @@ objtool_args = \ $(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount) \ $(if $(CONFIG_SLS), --sls) -cmd_objtool = $(if $(objtool-enabled), ; $(objtool) $(objtool_args) $@) -cmd_gen_objtooldep = $(if $(objtool-enabled), { echo ; echo '$@: $$(wildcard $(objtool))' ; } >> $(dot-target).cmd) +cmd_objtool = $(if $(objtool-enabled), ; $(objtool) $(objtool_args) $(@:.objtool=.o)) +cmd_gen_objtooldep = $(if $(objtool-enabled), { echo ; echo '$(@:.objtool=.o): $$(wildcard $(objtool))' ; } >> $(dot-target).cmd) endif # CONFIG_STACK_VALIDATION @@ -247,6 +254,21 @@ ifdef CONFIG_LTO_CLANG # Skip objtool for LLVM bitcode $(obj)/%.o: objtool-enabled := +# objtool was skipped for LLVM bitcode, run it now that we have compiled +# modules into native code +$(obj)/%.lto.o: objtool-enabled = y +$(obj)/%.lto.o: part-of-module := y + +else ifdef CONFIG_X86_KERNEL_IBT + +# Skip objtool on individual files +$(obj)/%.o: objtool-enabled := + +# instead run objtool on the module as a whole, right before +# the final link pass with the linker script. +$(obj)/%.objtool: objtool-enabled = y +$(obj)/%.objtool: part-of-module := y + else # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory @@ -292,18 +314,13 @@ ifdef CONFIG_LTO_CLANG # Module .o files may contain LLVM bitcode, compile them into native code # before ELF processing quiet_cmd_cc_lto_link_modules = LTO [M] $@ -cmd_cc_lto_link_modules = \ + cmd_cc_lto_link_modules = \ $(LD) $(ld_flags) -r -o $@ \ $(shell [ -s $(@:.lto.o=.o.symversions) ] && \ echo -T $(@:.lto.o=.o.symversions)) \ --whole-archive $(filter-out FORCE,$^) \ $(cmd_objtool) -# objtool was skipped for LLVM bitcode, run it now that we have compiled -# modules into native code -$(obj)/%.lto.o: objtool-enabled = y -$(obj)/%.lto.o: part-of-module := y - $(obj)/%.lto.o: $(obj)/%.o FORCE $(call if_changed,cc_lto_link_modules) endif @@ -316,6 +333,18 @@ cmd_mod = { \ $(obj)/%.mod: $(obj)/%$(mod-prelink-ext).o FORCE $(call if_changed,mod) +# +# Since objtool will re-write the file it will change the timestamps, therefore +# it is critical that the %.objtool file gets a timestamp *after* objtool runs. +# +# Additionally, care must be had with ordering this rule against the other rules +# that take %.o as a dependency. +# +cmd_objtool_mod = true $(cmd_objtool) ; touch $@ + +$(obj)/%.objtool: $(obj)/%$(mod-prelink-ext).o FORCE + $(call if_changed,objtool_mod) + quiet_cmd_cc_lst_c = MKLST $@ cmd_cc_lst_c = $(CC) $(c_flags) -g -c -o $*.o $< && \ $(CONFIG_SHELL) $(srctree)/scripts/makelst $*.o \ diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 9b08dca26f99..f704034ebbe6 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -108,7 +108,9 @@ objtool_link() local objtoolcmd; local objtoolopt; - if is_enabled CONFIG_LTO_CLANG && is_enabled CONFIG_STACK_VALIDATION; then + if is_enabled CONFIG_STACK_VALIDATION && \ + ( is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT ); then + # Don't perform vmlinux validation unless explicitly requested, # but run objtool on vmlinux.o now that we have an object file. if is_enabled CONFIG_UNWINDER_ORC; then @@ -117,6 +119,10 @@ objtool_link() objtoolopt="${objtoolopt} --lto" + if is_enabled CONFIG_X86_KERNEL_IBT; then + objtoolopt="${objtoolopt} --ibt" + fi + if is_enabled CONFIG_FTRACE_MCOUNT_USE_OBJTOOL; then objtoolopt="${objtoolopt} --mcount" fi @@ -168,7 +174,7 @@ vmlinux_link() # skip output file argument shift - if is_enabled CONFIG_LTO_CLANG; then + if is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT; then # Use vmlinux.o instead of performing the slow LTO link again. objs=vmlinux.o libs= -- cgit v1.2.3 From 83a44a4f47ad20997aebb311fc678a13cde391d7 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 14 Mar 2022 12:48:41 -0700 Subject: x86: Remove toolchain check for X32 ABI capability Commit 0bf6276392e9 ("x32: Warn and disable rather than error if binutils too old") added a small test in arch/x86/Makefile because binutils 2.22 or newer is needed to properly support elf32-x86-64. This check is no longer necessary, as the minimum supported version of binutils is 2.23, which is enforced at configuration time with scripts/min-tool-version.sh. Remove this check and replace all uses of CONFIG_X86_X32 with CONFIG_X86_X32_ABI, as two symbols are no longer necessary. [nathan: Rebase, fix up a few places where CONFIG_X86_X32 was still used, and simplify commit message to satisfy -tip requirements] Signed-off-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220314194842.3452-2-nathan@kernel.org --- arch/x86/Kconfig | 8 ++------ arch/x86/Makefile | 16 ---------------- arch/x86/entry/syscalls/Makefile | 2 +- arch/x86/include/asm/syscall_wrapper.h | 6 +++--- arch/x86/include/asm/vdso.h | 2 +- arch/x86/kernel/process_64.c | 2 +- fs/fuse/ioctl.c | 2 +- fs/xfs/xfs_ioctl32.c | 2 +- sound/core/control_compat.c | 16 ++++++++-------- sound/core/pcm_compat.c | 20 ++++++++++---------- 10 files changed, 28 insertions(+), 48 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 870e0d10452d..b903bfcd713c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2861,7 +2861,7 @@ config IA32_AOUT help Support old a.out binaries in the 32bit emulation. -config X86_X32 +config X86_X32_ABI bool "x32 ABI for 64-bit mode" depends on X86_64 help @@ -2870,10 +2870,6 @@ config X86_X32 full 64-bit register file and wide data path while leaving pointers at 32 bits for smaller memory footprint. - You will need a recent binutils (2.22 or later) with - elf32_x86_64 support enabled to compile a kernel with this - option set. - config COMPAT_32 def_bool y depends on IA32_EMULATION || X86_32 @@ -2882,7 +2878,7 @@ config COMPAT_32 config COMPAT def_bool y - depends on IA32_EMULATION || X86_X32 + depends on IA32_EMULATION || X86_X32_ABI if COMPAT config COMPAT_FOR_U64_ALIGNMENT diff --git a/arch/x86/Makefile b/arch/x86/Makefile index f29c2c9c3216..63d50f65b828 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -152,22 +152,6 @@ else KBUILD_CFLAGS += -mcmodel=kernel endif -ifdef CONFIG_X86_X32 - x32_ld_ok := $(call try-run,\ - /bin/echo -e '1: .quad 1b' | \ - $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" - && \ - $(OBJCOPY) -O elf32-x86-64 "$$TMP" "$$TMP.o" && \ - $(LD) -m elf32_x86_64 "$$TMP.o" -o "$$TMP",y,n) - ifeq ($(x32_ld_ok),y) - CONFIG_X86_X32_ABI := y - KBUILD_AFLAGS += -DCONFIG_X86_X32_ABI - KBUILD_CFLAGS += -DCONFIG_X86_X32_ABI - else - $(warning CONFIG_X86_X32 enabled but no binutils support) - endif -endif -export CONFIG_X86_X32_ABI - # # If the function graph tracer is used with mcount instead of fentry, # '-maccumulate-outgoing-args' is needed to prevent a GCC bug diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile index 5b3efed0e4e8..7f3886eeb2ff 100644 --- a/arch/x86/entry/syscalls/Makefile +++ b/arch/x86/entry/syscalls/Makefile @@ -67,7 +67,7 @@ uapisyshdr-y += unistd_32.h unistd_64.h unistd_x32.h syshdr-y += syscalls_32.h syshdr-$(CONFIG_X86_64) += unistd_32_ia32.h unistd_64_x32.h syshdr-$(CONFIG_X86_64) += syscalls_64.h -syshdr-$(CONFIG_X86_X32) += syscalls_x32.h +syshdr-$(CONFIG_X86_X32_ABI) += syscalls_x32.h syshdr-$(CONFIG_XEN) += xen-hypercalls.h uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y)) diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index 6a2827d0681f..59358d1bf880 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -159,7 +159,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #endif /* CONFIG_IA32_EMULATION */ -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI /* * For the x32 ABI, we need to create a stub for compat_sys_*() which is aware * of the x86-64-style parameter ordering of x32 syscalls. The syscalls common @@ -177,12 +177,12 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #define __X32_COMPAT_SYS_NI(name) \ __SYS_NI(x64, compat_sys_##name) -#else /* CONFIG_X86_X32 */ +#else /* CONFIG_X86_X32_ABI */ #define __X32_COMPAT_SYS_STUB0(name) #define __X32_COMPAT_SYS_STUBx(x, name, ...) #define __X32_COMPAT_COND_SYSCALL(name) #define __X32_COMPAT_SYS_NI(name) -#endif /* CONFIG_X86_X32 */ +#endif /* CONFIG_X86_X32_ABI */ #ifdef CONFIG_COMPAT diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index 98aa103eb4ab..2963a2f5dbc4 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -37,7 +37,7 @@ struct vdso_image { extern const struct vdso_image vdso_image_64; #endif -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI extern const struct vdso_image vdso_image_x32; #endif diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3402edec236c..e459253649be 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -681,7 +681,7 @@ void set_personality_64bit(void) static void __set_personality_x32(void) { -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI if (current->mm) current->mm->context.flags = 0; diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c index df58966bc874..33cde4bbccdc 100644 --- a/fs/fuse/ioctl.c +++ b/fs/fuse/ioctl.c @@ -170,7 +170,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, #else if (flags & FUSE_IOCTL_COMPAT) { inarg.flags |= FUSE_IOCTL_32BIT; -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI if (in_x32_syscall()) inarg.flags |= FUSE_IOCTL_COMPAT_X32; #endif diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 004ed2a251e8..ca25ed89b706 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -217,7 +217,7 @@ xfs_compat_ioc_fsbulkstat( inumbers_fmt_pf inumbers_func = xfs_fsinumbers_fmt_compat; bulkstat_one_fmt_pf bs_one_func = xfs_fsbulkstat_one_fmt_compat; -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI if (in_x32_syscall()) { /* * ... but on x32 the input xfs_fsop_bulkreq has pointers diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c index edff063e088d..d8a86d1a99d6 100644 --- a/sound/core/control_compat.c +++ b/sound/core/control_compat.c @@ -150,7 +150,7 @@ struct snd_ctl_elem_value32 { unsigned char reserved[128]; }; -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI /* x32 has a different alignment for 64bit values from ia32 */ struct snd_ctl_elem_value_x32 { struct snd_ctl_elem_id id; @@ -162,7 +162,7 @@ struct snd_ctl_elem_value_x32 { } value; unsigned char reserved[128]; }; -#endif /* CONFIG_X86_X32 */ +#endif /* CONFIG_X86_X32_ABI */ /* get the value type and count of the control */ static int get_ctl_type(struct snd_card *card, struct snd_ctl_elem_id *id, @@ -347,7 +347,7 @@ static int snd_ctl_elem_write_user_compat(struct snd_ctl_file *file, return ctl_elem_write_user(file, data32, &data32->value); } -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI static int snd_ctl_elem_read_user_x32(struct snd_card *card, struct snd_ctl_elem_value_x32 __user *data32) { @@ -359,7 +359,7 @@ static int snd_ctl_elem_write_user_x32(struct snd_ctl_file *file, { return ctl_elem_write_user(file, data32, &data32->value); } -#endif /* CONFIG_X86_X32 */ +#endif /* CONFIG_X86_X32_ABI */ /* add or replace a user control */ static int snd_ctl_elem_add_compat(struct snd_ctl_file *file, @@ -418,10 +418,10 @@ enum { SNDRV_CTL_IOCTL_ELEM_WRITE32 = _IOWR('U', 0x13, struct snd_ctl_elem_value32), SNDRV_CTL_IOCTL_ELEM_ADD32 = _IOWR('U', 0x17, struct snd_ctl_elem_info32), SNDRV_CTL_IOCTL_ELEM_REPLACE32 = _IOWR('U', 0x18, struct snd_ctl_elem_info32), -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI SNDRV_CTL_IOCTL_ELEM_READ_X32 = _IOWR('U', 0x12, struct snd_ctl_elem_value_x32), SNDRV_CTL_IOCTL_ELEM_WRITE_X32 = _IOWR('U', 0x13, struct snd_ctl_elem_value_x32), -#endif /* CONFIG_X86_X32 */ +#endif /* CONFIG_X86_X32_ABI */ }; static inline long snd_ctl_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) @@ -460,12 +460,12 @@ static inline long snd_ctl_ioctl_compat(struct file *file, unsigned int cmd, uns return snd_ctl_elem_add_compat(ctl, argp, 0); case SNDRV_CTL_IOCTL_ELEM_REPLACE32: return snd_ctl_elem_add_compat(ctl, argp, 1); -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI case SNDRV_CTL_IOCTL_ELEM_READ_X32: return snd_ctl_elem_read_user_x32(ctl->card, argp); case SNDRV_CTL_IOCTL_ELEM_WRITE_X32: return snd_ctl_elem_write_user_x32(ctl, argp); -#endif /* CONFIG_X86_X32 */ +#endif /* CONFIG_X86_X32_ABI */ } down_read(&snd_ioctl_rwsem); diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c index e4e176854ce7..917c5b4f19d7 100644 --- a/sound/core/pcm_compat.c +++ b/sound/core/pcm_compat.c @@ -147,13 +147,13 @@ static int snd_pcm_ioctl_channel_info_compat(struct snd_pcm_substream *substream return err; } -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI /* X32 ABI has the same struct as x86-64 for snd_pcm_channel_info */ static int snd_pcm_channel_info_user(struct snd_pcm_substream *substream, struct snd_pcm_channel_info __user *src); #define snd_pcm_ioctl_channel_info_x32(s, p) \ snd_pcm_channel_info_user(s, p) -#endif /* CONFIG_X86_X32 */ +#endif /* CONFIG_X86_X32_ABI */ struct compat_snd_pcm_status64 { snd_pcm_state_t state; @@ -375,7 +375,7 @@ static int snd_pcm_ioctl_xfern_compat(struct snd_pcm_substream *substream, return err; } -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI /* X32 ABI has 64bit timespec and 64bit alignment */ struct snd_pcm_mmap_status_x32 { snd_pcm_state_t state; @@ -468,7 +468,7 @@ static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream, return 0; } -#endif /* CONFIG_X86_X32 */ +#endif /* CONFIG_X86_X32_ABI */ #ifdef __BIG_ENDIAN typedef char __pad_before_u32[4]; @@ -560,10 +560,10 @@ enum { SNDRV_PCM_IOCTL_READN_FRAMES32 = _IOR('A', 0x53, struct snd_xfern32), SNDRV_PCM_IOCTL_STATUS_COMPAT64 = _IOR('A', 0x20, struct compat_snd_pcm_status64), SNDRV_PCM_IOCTL_STATUS_EXT_COMPAT64 = _IOWR('A', 0x24, struct compat_snd_pcm_status64), -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI SNDRV_PCM_IOCTL_CHANNEL_INFO_X32 = _IOR('A', 0x32, struct snd_pcm_channel_info), SNDRV_PCM_IOCTL_SYNC_PTR_X32 = _IOWR('A', 0x23, struct snd_pcm_sync_ptr_x32), -#endif /* CONFIG_X86_X32 */ +#endif /* CONFIG_X86_X32_ABI */ }; static long snd_pcm_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) @@ -607,10 +607,10 @@ static long snd_pcm_ioctl_compat(struct file *file, unsigned int cmd, unsigned l case __SNDRV_PCM_IOCTL_SYNC_PTR32: return snd_pcm_common_ioctl(file, substream, cmd, argp); case __SNDRV_PCM_IOCTL_SYNC_PTR64: -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI if (in_x32_syscall()) return snd_pcm_ioctl_sync_ptr_x32(substream, argp); -#endif /* CONFIG_X86_X32 */ +#endif /* CONFIG_X86_X32_ABI */ return snd_pcm_ioctl_sync_ptr_buggy(substream, argp); case SNDRV_PCM_IOCTL_HW_REFINE32: return snd_pcm_ioctl_hw_params_compat(substream, 1, argp); @@ -642,10 +642,10 @@ static long snd_pcm_ioctl_compat(struct file *file, unsigned int cmd, unsigned l return snd_pcm_status_user_compat64(substream, argp, false); case SNDRV_PCM_IOCTL_STATUS_EXT_COMPAT64: return snd_pcm_status_user_compat64(substream, argp, true); -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI case SNDRV_PCM_IOCTL_CHANNEL_INFO_X32: return snd_pcm_ioctl_channel_info_x32(substream, argp); -#endif /* CONFIG_X86_X32 */ +#endif /* CONFIG_X86_X32_ABI */ } return -ENOIOCTLCMD; -- cgit v1.2.3 From aaeed6ecc1253ce1463fa1aca0b70a4ccbc9fa75 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 14 Mar 2022 12:48:42 -0700 Subject: x86/Kconfig: Do not allow CONFIG_X86_X32_ABI=y with llvm-objcopy There are two outstanding issues with CONFIG_X86_X32_ABI and llvm-objcopy, with similar root causes: 1. llvm-objcopy does not properly convert .note.gnu.property when going from x86_64 to x86_x32, resulting in a corrupted section when linking: https://github.com/ClangBuiltLinux/linux/issues/1141 2. llvm-objcopy produces corrupted compressed debug sections when going from x86_64 to x86_x32, also resulting in an error when linking: https://github.com/ClangBuiltLinux/linux/issues/514 After commit 41c5ef31ad71 ("x86/ibt: Base IBT bits"), the .note.gnu.property section is always generated when CONFIG_X86_KERNEL_IBT is enabled, which causes the first issue to become visible with an allmodconfig build: ld.lld: error: arch/x86/entry/vdso/vclock_gettime-x32.o:(.note.gnu.property+0x1c): program property is too short To avoid this error, do not allow CONFIG_X86_X32_ABI to be selected when using llvm-objcopy. If the two issues ever get fixed in llvm-objcopy, this can be turned into a feature check. Signed-off-by: Nathan Chancellor Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220314194842.3452-3-nathan@kernel.org --- arch/x86/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b903bfcd713c..0f0672d2c816 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2864,6 +2864,11 @@ config IA32_AOUT config X86_X32_ABI bool "x32 ABI for 64-bit mode" depends on X86_64 + # llvm-objcopy does not convert x86_64 .note.gnu.property or + # compressed debug sections to x86_x32 properly: + # https://github.com/ClangBuiltLinux/linux/issues/514 + # https://github.com/ClangBuiltLinux/linux/issues/1141 + depends on $(success,$(OBJCOPY) --version | head -n1 | grep -qv llvm) help Include code to run binaries for the x32 native 32-bit ABI for 64-bit processors. An x32 process gets access to the -- cgit v1.2.3 From d31ed5d767c0452b4f49846d80a0bfeafa3a4ded Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 18 Mar 2022 12:19:27 +0100 Subject: kbuild: Fixup the IBT kbuild changes Masahiro-san deemed my kbuild changes to support whole module objtool runs too terrible to live and gracefully provided an alternative. Suggested-by: Masahiro Yamada Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/CAK7LNAQ2mYMnOKMQheVi+6byUFE3KEkjm1zcndNUfe0tORGvug@mail.gmail.com --- scripts/Makefile.build | 66 +++++++++++++++----------------------------------- scripts/Makefile.lib | 4 +-- scripts/mod/modpost.c | 12 ++++----- 3 files changed, 27 insertions(+), 55 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 926d2549a59c..2173a6729f30 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -86,18 +86,12 @@ ifdef need-builtin targets-for-builtin += $(obj)/built-in.a endif -targets-for-modules := +targets-for-modules := $(patsubst %.o, %.mod, $(filter %.o, $(obj-m))) -ifdef CONFIG_LTO_CLANG -targets-for-modules += $(patsubst %.o, %.lto.o, $(filter %.o, $(obj-m))) -endif - -ifdef CONFIG_X86_KERNEL_IBT -targets-for-modules += $(patsubst %.o, %.objtool, $(filter %.o, $(obj-m))) +ifneq ($(CONFIG_LTO_CLANG)$(CONFIG_X86_KERNEL_IBT),) +targets-for-modules += $(patsubst %.o, %.prelink.o, $(filter %.o, $(obj-m))) endif -targets-for-modules += $(patsubst %.o, %.mod, $(filter %.o, $(obj-m))) - ifdef need-modorder targets-for-modules += $(obj)/modules.order endif @@ -244,31 +238,16 @@ objtool_args = \ $(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount) \ $(if $(CONFIG_SLS), --sls) -cmd_objtool = $(if $(objtool-enabled), ; $(objtool) $(objtool_args) $(@:.objtool=.o)) -cmd_gen_objtooldep = $(if $(objtool-enabled), { echo ; echo '$(@:.objtool=.o): $$(wildcard $(objtool))' ; } >> $(dot-target).cmd) +cmd_objtool = $(if $(objtool-enabled), ; $(objtool) $(objtool_args) $@) +cmd_gen_objtooldep = $(if $(objtool-enabled), { echo ; echo '$@: $$(wildcard $(objtool))' ; } >> $(dot-target).cmd) endif # CONFIG_STACK_VALIDATION -ifdef CONFIG_LTO_CLANG +ifneq ($(CONFIG_LTO_CLANG)$(CONFIG_X86_KERNEL_IBT),) # Skip objtool for LLVM bitcode $(obj)/%.o: objtool-enabled := -# objtool was skipped for LLVM bitcode, run it now that we have compiled -# modules into native code -$(obj)/%.lto.o: objtool-enabled = y -$(obj)/%.lto.o: part-of-module := y - -else ifdef CONFIG_X86_KERNEL_IBT - -# Skip objtool on individual files -$(obj)/%.o: objtool-enabled := - -# instead run objtool on the module as a whole, right before -# the final link pass with the linker script. -$(obj)/%.objtool: objtool-enabled = y -$(obj)/%.objtool: part-of-module := y - else # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory @@ -310,19 +289,24 @@ $(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE $(call if_changed_rule,cc_o_c) $(call cmd,force_checksrc) -ifdef CONFIG_LTO_CLANG +ifneq ($(CONFIG_LTO_CLANG)$(CONFIG_X86_KERNEL_IBT),) # Module .o files may contain LLVM bitcode, compile them into native code # before ELF processing -quiet_cmd_cc_lto_link_modules = LTO [M] $@ - cmd_cc_lto_link_modules = \ +quiet_cmd_cc_prelink_modules = LD [M] $@ + cmd_cc_prelink_modules = \ $(LD) $(ld_flags) -r -o $@ \ - $(shell [ -s $(@:.lto.o=.o.symversions) ] && \ - echo -T $(@:.lto.o=.o.symversions)) \ + $(shell [ -s $(@:.prelink.o=.o.symversions) ] && \ + echo -T $(@:.prelink.o=.o.symversions)) \ --whole-archive $(filter-out FORCE,$^) \ $(cmd_objtool) -$(obj)/%.lto.o: $(obj)/%.o FORCE - $(call if_changed,cc_lto_link_modules) +# objtool was skipped for LLVM bitcode, run it now that we have compiled +# modules into native code +$(obj)/%.prelink.o: objtool-enabled = y +$(obj)/%.prelink.o: part-of-module := y + +$(obj)/%.prelink.o: $(obj)/%.o FORCE + $(call if_changed,cc_prelink_modules) endif cmd_mod = { \ @@ -333,18 +317,6 @@ cmd_mod = { \ $(obj)/%.mod: $(obj)/%$(mod-prelink-ext).o FORCE $(call if_changed,mod) -# -# Since objtool will re-write the file it will change the timestamps, therefore -# it is critical that the %.objtool file gets a timestamp *after* objtool runs. -# -# Additionally, care must be had with ordering this rule against the other rules -# that take %.o as a dependency. -# -cmd_objtool_mod = true $(cmd_objtool) ; touch $@ - -$(obj)/%.objtool: $(obj)/%$(mod-prelink-ext).o FORCE - $(call if_changed,objtool_mod) - quiet_cmd_cc_lst_c = MKLST $@ cmd_cc_lst_c = $(CC) $(c_flags) -g -c -o $*.o $< && \ $(CONFIG_SHELL) $(srctree)/scripts/makelst $*.o \ @@ -498,7 +470,7 @@ $(obj)/lib.a: $(lib-y) FORCE # Do not replace $(filter %.o,^) with $(real-prereqs). When a single object # module is turned into a multi object module, $^ will contain header file # dependencies recorded in the .*.cmd file. -ifdef CONFIG_LTO_CLANG +ifneq ($(CONFIG_LTO_CLANG)$(CONFIG_X86_KERNEL_IBT),) quiet_cmd_link_multi-m = AR [M] $@ cmd_link_multi-m = \ $(cmd_update_lto_symversions); \ diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 79be57fdd32a..8bfc9238237c 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -230,11 +230,11 @@ dtc_cpp_flags = -Wp,-MMD,$(depfile).pre.tmp -nostdinc \ $(addprefix -I,$(DTC_INCLUDE)) \ -undef -D__DTS__ -ifeq ($(CONFIG_LTO_CLANG),y) +ifneq ($(CONFIG_LTO_CLANG)$(CONFIG_X86_KERNEL_IBT),) # With CONFIG_LTO_CLANG, .o files in modules might be LLVM bitcode, so we # need to run LTO to compile them into native code (.lto.o) before further # processing. -mod-prelink-ext := .lto +mod-prelink-ext := .prelink endif # Useful for describing the dependency of composite objects diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 6bfa33217914..09c3ab0a9b37 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1989,9 +1989,9 @@ static char *remove_dot(char *s) if (m && (s[n + m] == '.' || s[n + m] == 0)) s[n] = 0; - /* strip trailing .lto */ - if (strends(s, ".lto")) - s[strlen(s) - 4] = '\0'; + /* strip trailing .prelink */ + if (strends(s, ".prelink")) + s[strlen(s) - 8] = '\0'; } return s; } @@ -2015,9 +2015,9 @@ static void read_symbols(const char *modname) /* strip trailing .o */ tmp = NOFAIL(strdup(modname)); tmp[strlen(tmp) - 2] = '\0'; - /* strip trailing .lto */ - if (strends(tmp, ".lto")) - tmp[strlen(tmp) - 4] = '\0'; + /* strip trailing .prelink */ + if (strends(tmp, ".prelink")) + tmp[strlen(tmp) - 8] = '\0'; mod = new_module(tmp); free(tmp); } -- cgit v1.2.3 From 262448f3d18959d175b10e28a3b65f41d1d7313f Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 18 Mar 2022 16:07:46 -0700 Subject: x86/Kconfig: Only enable CONFIG_CC_HAS_IBT for clang >= 14.0.0 Commit 156ff4a544ae ("x86/ibt: Base IBT bits") added a check for a crash with 'clang -fcf-protection=branch -mfentry -pg', which intended to exclude Clang versions older than 14.0.0 from selecting CONFIG_X86_KERNEL_IBT. clang-11 does not have the issue that the check is testing for, so CONFIG_X86_KERNEL_IBT is selectable. Unfortunately, there is a different crash in clang-11 that was fixed in clang-12. To make matters worse, that crash does not appear to be entirely deterministic, as the same input to the compiler will sometimes crash and other times not, which makes dynamically checking for the crash like the '-pg' one unreliable. To make everything work properly for all common versions of clang, use a hard version check of 14.0.0, as that will be the first release upstream that has both bugs properly fixed. Signed-off-by: Nathan Chancellor Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220318230747.3900772-2-nathan@kernel.org --- arch/x86/Kconfig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0f0672d2c816..921e4ebda564 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1865,9 +1865,10 @@ config CC_HAS_IBT # GCC >= 9 and binutils >= 2.29 # Retpoline check to work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93654 # Clang/LLVM >= 14 - # fentry check to work around https://reviews.llvm.org/D111108 + # https://github.com/llvm/llvm-project/commit/e0b89df2e0f0130881bf6c39bf31d7f6aac00e0f + # https://github.com/llvm/llvm-project/commit/dfcf69770bc522b9e411c66454934a37c1f35332 def_bool ((CC_IS_GCC && $(cc-option, -fcf-protection=branch -mindirect-branch-register)) || \ - (CC_IS_CLANG && $(success,echo "void a(void) {}" | $(CC) -Werror $(CLANG_FLAGS) -fcf-protection=branch -mfentry -pg -x c - -c -o /dev/null))) && \ + (CC_IS_CLANG && CLANG_VERSION >= 140000)) && \ $(as-instr,endbr64) config X86_KERNEL_IBT -- cgit v1.2.3 From f6a2c2b2de817078ac5a7e58c10e746165e7825d Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 18 Mar 2022 16:07:47 -0700 Subject: x86/Kconfig: Only allow CONFIG_X86_KERNEL_IBT with ld.lld >= 14.0.0 With CONFIG_X86_KERNEL_IBT=y and a version of ld.lld prior to 14.0.0, there are numerous objtool warnings along the lines of: warning: objtool: .plt+0x6: indirect jump found in RETPOLINE build This is a known issue that has been resolved in ld.lld 14.0.0. Prevent CONFIG_X86_KERNEL_IBT from being selectable when using one of these problematic ld.lld versions. Signed-off-by: Nathan Chancellor Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220318230747.3900772-3-nathan@kernel.org --- arch/x86/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 921e4ebda564..87579264aa00 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1875,6 +1875,8 @@ config X86_KERNEL_IBT prompt "Indirect Branch Tracking" bool depends on X86_64 && CC_HAS_IBT && STACK_VALIDATION + # https://github.com/llvm/llvm-project/commit/9d7001eba9c4cb311e03cd8cdc231f9e579f2d0f + depends on !LD_IS_LLD || LLD_VERSION >= 140000 help Build the kernel with support for Indirect Branch Tracking, a hardware support course-grain forward-edge Control Flow Integrity -- cgit v1.2.3 From 3986f65d4f408ce9d0a361e3226a3246a5fb701c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 21 Mar 2022 10:13:12 +0100 Subject: kvm/emulate: Fix SETcc emulation for ENDBR Companion patch for commit fe83f5eae432 ("kvm/emulate: Fix SETcc emulation function offsets with SLS"), now extending it to cover the additional ENDBR instruction. Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/YjMVpfe%2f9ldmWX8W@hirez.programming.kicks-ass.net --- arch/x86/kvm/emulate.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d98fb36c68ed..df5e6c0ddadd 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "x86.h" #include "tss.h" @@ -434,15 +435,16 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); /* * Depending on .config the SETcc functions look like: * + * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT] * SETcc %al [3 bytes] * RET [1 byte] * INT3 [1 byte; CONFIG_SLS] * - * Which gives possible sizes 4 or 5. When rounded up to the - * next power-of-two alignment they become 4 or 8. + * Which gives possible sizes 4, 5, 8 or 9. When rounded up to the + * next power-of-two alignment they become 4, 8 or 16 resp. */ -#define SETCC_LENGTH (4 + IS_ENABLED(CONFIG_SLS)) -#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS)) +#define SETCC_LENGTH (ENDBR_INSN_SIZE + 4 + IS_ENABLED(CONFIG_SLS)) +#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS) << HAS_KERNEL_IBT) static_assert(SETCC_LENGTH <= SETCC_ALIGN); #define FOP_SETCC(op) \ -- cgit v1.2.3