diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/alternative.c | 71 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 199 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/bugs.c | 15 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/amd.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/ftrace.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/module.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 22 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 18 |
9 files changed, 240 insertions, 94 deletions
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 72646d75b6ff..2dcf3a06af09 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -778,6 +778,8 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } #ifdef CONFIG_X86_KERNEL_IBT +static void poison_cfi(void *addr); + static void __init_or_module poison_endbr(void *addr, bool warn) { u32 endbr, poison = gen_endbr_poison(); @@ -802,8 +804,11 @@ static void __init_or_module poison_endbr(void *addr, bool warn) /* * Generated by: objtool --ibt + * + * Seal the functions for indirect calls by clobbering the ENDBR instructions + * and the kCFI hash value. */ -void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) +void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end) { s32 *s; @@ -812,13 +817,13 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) poison_endbr(addr, true); if (IS_ENABLED(CONFIG_FINEIBT)) - poison_endbr(addr - 16, false); + poison_cfi(addr - 16); } } #else -void __init_or_module apply_ibt_endbr(s32 *start, s32 *end) { } +void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { } #endif /* CONFIG_X86_KERNEL_IBT */ @@ -1063,6 +1068,17 @@ static int cfi_rewrite_preamble(s32 *start, s32 *end) return 0; } +static void cfi_rewrite_endbr(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + void *addr = (void *)s + *s; + + poison_endbr(addr+16, false); + } +} + /* .retpoline_sites */ static int cfi_rand_callers(s32 *start, s32 *end) { @@ -1157,14 +1173,19 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, return; case CFI_FINEIBT: + /* place the FineIBT preamble at func()-16 */ ret = cfi_rewrite_preamble(start_cfi, end_cfi); if (ret) goto err; + /* rewrite the callers to target func()-16 */ ret = cfi_rewrite_callers(start_retpoline, end_retpoline); if (ret) goto err; + /* now that nobody targets func()+0, remove ENDBR there */ + cfi_rewrite_endbr(start_cfi, end_cfi); + if (builtin) pr_info("Using FineIBT CFI\n"); return; @@ -1177,6 +1198,41 @@ err: pr_err("Something went horribly wrong trying to rewrite the CFI implementation.\n"); } +static inline void poison_hash(void *addr) +{ + *(u32 *)addr = 0; +} + +static void poison_cfi(void *addr) +{ + switch (cfi_mode) { + case CFI_FINEIBT: + /* + * __cfi_\func: + * osp nopl (%rax) + * subl $0, %r10d + * jz 1f + * ud2 + * 1: nop + */ + poison_endbr(addr, false); + poison_hash(addr + fineibt_preamble_hash); + break; + + case CFI_KCFI: + /* + * __cfi_\func: + * movl $0, %eax + * .skip 11, 0x90 + */ + poison_hash(addr + 1); + break; + + default: + break; + } +} + #else static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, @@ -1184,6 +1240,10 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, { } +#ifdef CONFIG_X86_KERNEL_IBT +static void poison_cfi(void *addr) { } +#endif + #endif void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, @@ -1565,7 +1625,10 @@ void __init alternative_instructions(void) */ callthunks_patch_builtin_calls(); - apply_ibt_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); + /* + * Seal all functions that do not have their address taken. + */ + apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); #ifdef CONFIG_SMP /* Patch to UP if other cpus not imminent. */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 571abf808ea3..26ad7ca423e7 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -27,11 +27,6 @@ #include "cpu.h" -static const int amd_erratum_383[]; -static const int amd_erratum_400[]; -static const int amd_erratum_1054[]; -static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum); - /* * nodes_per_socket: Stores the number of nodes per socket. * Refer to Fam15h Models 00-0fh BKDG - CPUID Fn8000_001E_ECX @@ -39,6 +34,78 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum); */ static u32 nodes_per_socket = 1; +/* + * AMD errata checking + * + * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or + * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that + * have an OSVW id assigned, which it takes as first argument. Both take a + * variable number of family-specific model-stepping ranges created by + * AMD_MODEL_RANGE(). + * + * Example: + * + * const int amd_erratum_319[] = + * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2), + * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0), + * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0)); + */ + +#define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 } +#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 } +#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \ + ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end)) +#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff) +#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff) +#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff) + +static const int amd_erratum_400[] = + AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), + AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); + +static const int amd_erratum_383[] = + AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf)); + +/* #1054: Instructions Retired Performance Counter May Be Inaccurate */ +static const int amd_erratum_1054[] = + AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf)); + +static const int amd_zenbleed[] = + AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x30, 0x0, 0x4f, 0xf), + AMD_MODEL_RANGE(0x17, 0x60, 0x0, 0x7f, 0xf), + AMD_MODEL_RANGE(0x17, 0xa0, 0x0, 0xaf, 0xf)); + +static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) +{ + int osvw_id = *erratum++; + u32 range; + u32 ms; + + if (osvw_id >= 0 && osvw_id < 65536 && + cpu_has(cpu, X86_FEATURE_OSVW)) { + u64 osvw_len; + + rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len); + if (osvw_id < osvw_len) { + u64 osvw_bits; + + rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6), + osvw_bits); + return osvw_bits & (1ULL << (osvw_id & 0x3f)); + } + } + + /* OSVW unavailable or ID unknown, match family-model-stepping range */ + ms = (cpu->x86_model << 4) | cpu->x86_stepping; + while ((range = *erratum++)) + if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && + (ms >= AMD_MODEL_RANGE_START(range)) && + (ms <= AMD_MODEL_RANGE_END(range))) + return true; + + return false; +} + static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) { u32 gprs[8] = { 0 }; @@ -916,6 +983,47 @@ static void init_amd_zn(struct cpuinfo_x86 *c) } } +static bool cpu_has_zenbleed_microcode(void) +{ + u32 good_rev = 0; + + switch (boot_cpu_data.x86_model) { + case 0x30 ... 0x3f: good_rev = 0x0830107a; break; + case 0x60 ... 0x67: good_rev = 0x0860010b; break; + case 0x68 ... 0x6f: good_rev = 0x08608105; break; + case 0x70 ... 0x7f: good_rev = 0x08701032; break; + case 0xa0 ... 0xaf: good_rev = 0x08a00008; break; + + default: + return false; + break; + } + + if (boot_cpu_data.microcode < good_rev) + return false; + + return true; +} + +static void zenbleed_check(struct cpuinfo_x86 *c) +{ + if (!cpu_has_amd_erratum(c, amd_zenbleed)) + return; + + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) + return; + + if (!cpu_has(c, X86_FEATURE_AVX)) + return; + + if (!cpu_has_zenbleed_microcode()) { + pr_notice_once("Zenbleed: please update your microcode for the most optimal fix\n"); + msr_set_bit(MSR_AMD64_DE_CFG, MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT); + } else { + msr_clear_bit(MSR_AMD64_DE_CFG, MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT); + } +} + static void init_amd(struct cpuinfo_x86 *c) { early_init_amd(c); @@ -1020,6 +1128,8 @@ static void init_amd(struct cpuinfo_x86 *c) if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && cpu_has(c, X86_FEATURE_AUTOIBRS)) WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS)); + + zenbleed_check(c); } #ifdef CONFIG_X86_32 @@ -1115,73 +1225,6 @@ static const struct cpu_dev amd_cpu_dev = { cpu_dev_register(amd_cpu_dev); -/* - * AMD errata checking - * - * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or - * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that - * have an OSVW id assigned, which it takes as first argument. Both take a - * variable number of family-specific model-stepping ranges created by - * AMD_MODEL_RANGE(). - * - * Example: - * - * const int amd_erratum_319[] = - * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2), - * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0), - * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0)); - */ - -#define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 } -#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 } -#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \ - ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end)) -#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff) -#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff) -#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff) - -static const int amd_erratum_400[] = - AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), - AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); - -static const int amd_erratum_383[] = - AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf)); - -/* #1054: Instructions Retired Performance Counter May Be Inaccurate */ -static const int amd_erratum_1054[] = - AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf)); - -static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) -{ - int osvw_id = *erratum++; - u32 range; - u32 ms; - - if (osvw_id >= 0 && osvw_id < 65536 && - cpu_has(cpu, X86_FEATURE_OSVW)) { - u64 osvw_len; - - rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len); - if (osvw_id < osvw_len) { - u64 osvw_bits; - - rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6), - osvw_bits); - return osvw_bits & (1ULL << (osvw_id & 0x3f)); - } - } - - /* OSVW unavailable or ID unknown, match family-model-stepping range */ - ms = (cpu->x86_model << 4) | cpu->x86_stepping; - while ((range = *erratum++)) - if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && - (ms >= AMD_MODEL_RANGE_START(range)) && - (ms <= AMD_MODEL_RANGE_END(range))) - return true; - - return false; -} - static DEFINE_PER_CPU_READ_MOSTLY(unsigned long[4], amd_dr_addr_mask); static unsigned int amd_msr_dr_addr_masks[] = { @@ -1235,3 +1278,15 @@ u32 amd_get_highest_perf(void) return 255; } EXPORT_SYMBOL_GPL(amd_get_highest_perf); + +static void zenbleed_check_cpu(void *unused) +{ + struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); + + zenbleed_check(c); +} + +void amd_check_microcode(void) +{ + on_each_cpu(zenbleed_check_cpu, NULL, 1); +} diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 9e2a91830f72..95507448e781 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1150,19 +1150,21 @@ spectre_v2_user_select_mitigation(void) } /* - * If no STIBP, enhanced IBRS is enabled, or SMT impossible, STIBP + * If no STIBP, Intel enhanced IBRS is enabled, or SMT impossible, STIBP * is not required. * - * Enhanced IBRS also protects against cross-thread branch target + * Intel's Enhanced IBRS also protects against cross-thread branch target * injection in user-mode as the IBRS bit remains always set which * implicitly enables cross-thread protections. However, in legacy IBRS * mode, the IBRS bit is set only on kernel entry and cleared on return - * to userspace. This disables the implicit cross-thread protection, - * so allow for STIBP to be selected in that case. + * to userspace. AMD Automatic IBRS also does not protect userspace. + * These modes therefore disable the implicit cross-thread protection, + * so allow for STIBP to be selected in those cases. */ if (!boot_cpu_has(X86_FEATURE_STIBP) || !smt_possible || - spectre_v2_in_eibrs_mode(spectre_v2_enabled)) + (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && + !boot_cpu_has(X86_FEATURE_AUTOIBRS))) return; /* @@ -2294,7 +2296,8 @@ static ssize_t mmio_stale_data_show_state(char *buf) static char *stibp_state(void) { - if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) + if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && + !boot_cpu_has(X86_FEATURE_AUTOIBRS)) return ""; switch (spectre_v2_user_stibp) { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 52683fddafaf..0ba1067f4e5f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2287,6 +2287,8 @@ void microcode_check(struct cpuinfo_x86 *prev_info) perf_check_microcode(); + amd_check_microcode(); + store_cpu_caps(&curr_info); if (!memcmp(&prev_info->x86_capability, &curr_info.x86_capability, diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 5e74610b39e7..c4ec4ca47e11 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -1261,10 +1261,10 @@ static void __threshold_remove_blocks(struct threshold_bank *b) struct threshold_block *pos = NULL; struct threshold_block *tmp = NULL; - kobject_del(b->kobj); + kobject_put(b->kobj); list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj) - kobject_del(&pos->kobj); + kobject_put(b->kobj); } static void threshold_remove_bank(struct threshold_bank *bank) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 01e8f34daf22..12df54ff0e81 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -282,7 +282,6 @@ static inline void tramp_free(void *tramp) { } /* Defined as markers to the end of the ftrace default trampolines */ extern void ftrace_regs_caller_end(void); -extern void ftrace_regs_caller_ret(void); extern void ftrace_caller_end(void); extern void ftrace_caller_op_ptr(void); extern void ftrace_regs_caller_op_ptr(void); diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index b05f62ee2344..5f71a0cf4399 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -358,7 +358,7 @@ int module_finalize(const Elf_Ehdr *hdr, } if (ibt_endbr) { void *iseg = (void *)ibt_endbr->sh_addr; - apply_ibt_endbr(iseg, iseg + ibt_endbr->sh_size); + apply_seal_endbr(iseg, iseg + ibt_endbr->sh_size); } if (locks) { void *lseg = (void *)locks->sh_addr; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ff9b80a0e3e3..72015dba72ab 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -28,6 +28,7 @@ #include <linux/static_call.h> #include <trace/events/power.h> #include <linux/hw_breakpoint.h> +#include <linux/entry-common.h> #include <asm/cpu.h> #include <asm/apic.h> #include <linux/uaccess.h> @@ -134,6 +135,25 @@ static int set_new_tls(struct task_struct *p, unsigned long tls) return do_set_thread_area_64(p, ARCH_SET_FS, tls); } +__visible void ret_from_fork(struct task_struct *prev, struct pt_regs *regs, + int (*fn)(void *), void *fn_arg) +{ + schedule_tail(prev); + + /* Is this a kernel thread? */ + if (unlikely(fn)) { + fn(fn_arg); + /* + * A kernel thread is allowed to return here after successfully + * calling kernel_execve(). Exit to userspace to complete the + * execve() syscall. + */ + regs->ax = 0; + } + + syscall_exit_to_user_mode(regs); +} + int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; @@ -149,7 +169,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) frame = &fork_frame->frame; frame->bp = encode_frame_pointer(childregs); - frame->ret_addr = (unsigned long) ret_from_fork; + frame->ret_addr = (unsigned long) ret_from_fork_asm; p->thread.sp = (unsigned long) fork_frame; p->thread.io_bitmap = NULL; p->thread.iopl_warn = 0; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 58b1f208eff5..4a817d20ce3b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -697,9 +697,10 @@ static bool try_fixup_enqcmd_gp(void) } static bool gp_try_fixup_and_notify(struct pt_regs *regs, int trapnr, - unsigned long error_code, const char *str) + unsigned long error_code, const char *str, + unsigned long address) { - if (fixup_exception(regs, trapnr, error_code, 0)) + if (fixup_exception(regs, trapnr, error_code, address)) return true; current->thread.error_code = error_code; @@ -759,7 +760,7 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) goto exit; } - if (gp_try_fixup_and_notify(regs, X86_TRAP_GP, error_code, desc)) + if (gp_try_fixup_and_notify(regs, X86_TRAP_GP, error_code, desc, 0)) goto exit; if (error_code) @@ -1357,17 +1358,20 @@ DEFINE_IDTENTRY(exc_device_not_available) #define VE_FAULT_STR "VE fault" -static void ve_raise_fault(struct pt_regs *regs, long error_code) +static void ve_raise_fault(struct pt_regs *regs, long error_code, + unsigned long address) { if (user_mode(regs)) { gp_user_force_sig_segv(regs, X86_TRAP_VE, error_code, VE_FAULT_STR); return; } - if (gp_try_fixup_and_notify(regs, X86_TRAP_VE, error_code, VE_FAULT_STR)) + if (gp_try_fixup_and_notify(regs, X86_TRAP_VE, error_code, + VE_FAULT_STR, address)) { return; + } - die_addr(VE_FAULT_STR, regs, error_code, 0); + die_addr(VE_FAULT_STR, regs, error_code, address); } /* @@ -1431,7 +1435,7 @@ DEFINE_IDTENTRY(exc_virtualization_exception) * it successfully, treat it as #GP(0) and handle it. */ if (!tdx_handle_virt_exception(regs, &ve)) - ve_raise_fault(regs, 0); + ve_raise_fault(regs, 0, ve.gla); cond_local_irq_disable(regs); } |