From c182487da1b5281463f2255d2347885dba219c08 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Sep 2020 15:25:52 +0200 Subject: x86/debug: Sync BTF earlier Move the BTF sync near the DR6 load, as this will be the only common code guaranteed to run on every #DB. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Tested-by: Daniel Thompson Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200902133200.786888252@infradead.org --- arch/x86/kernel/traps.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 81a2fb711091..99456428fb98 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -749,6 +749,13 @@ static __always_inline unsigned long debug_read_clear_dr6(void) /* Filter out all the reserved bits which are preset to 1 */ dr6 &= ~DR6_RESERVED; + /* + * The SDM says "The processor clears the BTF flag when it + * generates a debug exception." Clear TIF_BLOCKSTEP to keep + * TIF_BLOCKSTEP in sync with the hardware BTF flag. + */ + clear_thread_flag(TIF_BLOCKSTEP); + return dr6; } @@ -782,13 +789,6 @@ static void handle_debug(struct pt_regs *regs, unsigned long dr6, bool user) bool user_icebp; int si_code; - /* - * The SDM says "The processor clears the BTF flag when it - * generates a debug exception." Clear TIF_BLOCKSTEP to keep - * TIF_BLOCKSTEP in sync with the hardware BTF flag. - */ - clear_thread_flag(TIF_BLOCKSTEP); - /* * If DR6 is zero, no point in trying to handle it. The kernel is * not using INT1. -- cgit v1.2.3 From 20a6e35a948284b8ab246ed35eefc56d674ad076 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Sep 2020 15:25:53 +0200 Subject: x86/debug: Move kprobe_debug_handler() into exc_debug_kernel() Kprobes are on kernel text, and thus only matter for #DB-from-kernel. Kprobes are ordered before the generic notifier, preserve that order. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Tested-by: Daniel Thompson Acked-by: Masami Hiramatsu Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200902133200.847465360@infradead.org --- arch/x86/include/asm/kprobes.h | 4 ++++ arch/x86/kernel/traps.c | 10 ++++------ 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 143bc9abe99c..991a7ad540c7 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -106,5 +106,9 @@ extern int kprobe_exceptions_notify(struct notifier_block *self, extern int kprobe_int3_handler(struct pt_regs *regs); extern int kprobe_debug_handler(struct pt_regs *regs); +#else + +static inline int kprobe_debug_handler(struct pt_regs *regs) { return 0; } + #endif /* CONFIG_KPROBES */ #endif /* _ASM_X86_KPROBES_H */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 99456428fb98..9cb39d30413d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -806,12 +806,6 @@ static void handle_debug(struct pt_regs *regs, unsigned long dr6, bool user) /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; -#ifdef CONFIG_KPROBES - if (kprobe_debug_handler(regs)) { - return; - } -#endif - if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, 0, SIGTRAP) == NOTIFY_STOP) { return; @@ -877,8 +871,12 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, if ((dr6 & DR_STEP) && is_sysenter_singlestep(regs)) dr6 &= ~DR_STEP; + if (kprobe_debug_handler(regs)) + goto out; + handle_debug(regs, dr6, false); +out: instrumentation_end(); idtentry_exit_nmi(regs, irq_state); -- cgit v1.2.3 From 7043679a989af969e9f20cc7d90195b36f54036f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Sep 2020 15:25:54 +0200 Subject: x86/debug: Remove handle_debug(.user) argument The handle_debug(.user) argument is used to terminate the #DB handler early for the INT1-from-kernel case, since the kernel doesn't use INT1. Remove the argument and handle this explicitly in #DB-from-kernel. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Tested-by: Daniel Thompson Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200902133200.907020598@infradead.org --- arch/x86/kernel/traps.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 9cb39d30413d..58bc434c75de 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -783,25 +783,18 @@ static __always_inline unsigned long debug_read_clear_dr6(void) * * May run on IST stack. */ -static void handle_debug(struct pt_regs *regs, unsigned long dr6, bool user) +static void handle_debug(struct pt_regs *regs, unsigned long dr6) { struct task_struct *tsk = current; bool user_icebp; int si_code; - /* - * If DR6 is zero, no point in trying to handle it. The kernel is - * not using INT1. - */ - if (!user && !dr6) - return; - /* * If dr6 has no reason to give us about the origin of this trap, * then it's very likely the result of an icebp/int01 trap. * User wants a sigtrap for that. */ - user_icebp = user && !dr6; + user_icebp = !dr6; /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; @@ -874,7 +867,13 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, if (kprobe_debug_handler(regs)) goto out; - handle_debug(regs, dr6, false); + /* + * The kernel doesn't use INT1 + */ + if (!dr6) + goto out; + + handle_debug(regs, dr6); out: instrumentation_end(); @@ -904,7 +903,7 @@ static __always_inline void exc_debug_user(struct pt_regs *regs, irqentry_enter_from_user_mode(regs); instrumentation_begin(); - handle_debug(regs, dr6, true); + handle_debug(regs, dr6); instrumentation_end(); irqentry_exit_to_user_mode(regs); -- cgit v1.2.3 From 4182e9436916a48f16207f0619562f1d3843a0c8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Sep 2020 15:25:55 +0200 Subject: x86/debug: Simplify #DB signal code There's no point in calculating si_code if it's not going to be used. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Tested-by: Daniel Thompson Link: https://lore.kernel.org/r/20200902133200.967434217@infradead.org --- arch/x86/kernel/traps.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 58bc434c75de..24e09f825a93 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -786,15 +786,14 @@ static __always_inline unsigned long debug_read_clear_dr6(void) static void handle_debug(struct pt_regs *regs, unsigned long dr6) { struct task_struct *tsk = current; - bool user_icebp; - int si_code; + bool icebp; /* * If dr6 has no reason to give us about the origin of this trap, * then it's very likely the result of an icebp/int01 trap. * User wants a sigtrap for that. */ - user_icebp = !dr6; + icebp = !dr6; /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; @@ -813,6 +812,11 @@ static void handle_debug(struct pt_regs *regs, unsigned long dr6) goto out; } + /* + * Reload dr6, the notifier might have changed it. + */ + dr6 = tsk->thread.debugreg6; + if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) { /* * Historical junk that used to handle SYSENTER single-stepping. @@ -825,9 +829,8 @@ static void handle_debug(struct pt_regs *regs, unsigned long dr6) regs->flags &= ~X86_EFLAGS_TF; } - si_code = get_si_code(tsk->thread.debugreg6); - if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) - send_sigtrap(regs, 0, si_code); + if (dr6 & (DR_STEP | DR_TRAP_BITS) || icebp) + send_sigtrap(regs, 0, get_si_code(dr6)); out: cond_local_irq_disable(regs); -- cgit v1.2.3 From 4eb5acc39187a7ba578fbb44f7bb1965057309ae Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Sep 2020 15:25:56 +0200 Subject: x86/debug: Move historical SYSENTER junk into exc_debug_kernel() The historical SYSENTER junk is explicitly for from-kernel, so move it to the #DB-from-kernel handler. It is ordered after the notifier, which is important for KGDB which uses TF single-step and needs to consume the event before that point. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Tested-by: Daniel Thompson Link: https://lore.kernel.org/r/20200902133201.031099736@infradead.org --- arch/x86/kernel/traps.c | 49 +++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 24e09f825a93..2605686ad31e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -783,7 +783,7 @@ static __always_inline unsigned long debug_read_clear_dr6(void) * * May run on IST stack. */ -static void handle_debug(struct pt_regs *regs, unsigned long dr6) +static bool handle_debug(struct pt_regs *regs, unsigned long *dr6) { struct task_struct *tsk = current; bool icebp; @@ -793,15 +793,13 @@ static void handle_debug(struct pt_regs *regs, unsigned long dr6) * then it's very likely the result of an icebp/int01 trap. * User wants a sigtrap for that. */ - icebp = !dr6; + icebp = !*dr6; /* Store the virtualized DR6 value */ - tsk->thread.debugreg6 = dr6; + tsk->thread.debugreg6 = *dr6; - if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, 0, - SIGTRAP) == NOTIFY_STOP) { - return; - } + if (notify_die(DIE_DEBUG, "debug", regs, (long)dr6, 0, SIGTRAP) == NOTIFY_STOP) + return true; /* It's safe to allow irq's after DR6 has been saved */ cond_local_irq_enable(regs); @@ -815,25 +813,15 @@ static void handle_debug(struct pt_regs *regs, unsigned long dr6) /* * Reload dr6, the notifier might have changed it. */ - dr6 = tsk->thread.debugreg6; + *dr6 = tsk->thread.debugreg6; - if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) { - /* - * Historical junk that used to handle SYSENTER single-stepping. - * This should be unreachable now. If we survive for a while - * without anyone hitting this warning, we'll turn this into - * an oops. - */ - tsk->thread.debugreg6 &= ~DR_STEP; - set_tsk_thread_flag(tsk, TIF_SINGLESTEP); - regs->flags &= ~X86_EFLAGS_TF; - } - - if (dr6 & (DR_STEP | DR_TRAP_BITS) || icebp) - send_sigtrap(regs, 0, get_si_code(dr6)); + if (*dr6 & (DR_STEP | DR_TRAP_BITS) || icebp) + send_sigtrap(regs, 0, get_si_code(*dr6)); out: cond_local_irq_disable(regs); + + return false; } static __always_inline void exc_debug_kernel(struct pt_regs *regs, @@ -876,7 +864,20 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, if (!dr6) goto out; - handle_debug(regs, dr6); + if (handle_debug(regs, &dr6)) + goto out; + + if (WARN_ON_ONCE(dr6 & DR_STEP)) { + /* + * Historical junk that used to handle SYSENTER single-stepping. + * This should be unreachable now. If we survive for a while + * without anyone hitting this warning, we'll turn this into + * an oops. + */ + dr6 &= ~DR_STEP; + set_thread_flag(TIF_SINGLESTEP); + regs->flags &= ~X86_EFLAGS_TF; + } out: instrumentation_end(); @@ -906,7 +907,7 @@ static __always_inline void exc_debug_user(struct pt_regs *regs, irqentry_enter_from_user_mode(regs); instrumentation_begin(); - handle_debug(regs, dr6); + handle_debug(regs, &dr6); instrumentation_end(); irqentry_exit_to_user_mode(regs); -- cgit v1.2.3 From f0b67c39c190e19bc1604a13bcc985c4445a4b2f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Sep 2020 15:25:57 +0200 Subject: x86/debug: Move cond_local_irq_enable() block into exc_debug_user() The cond_local_irq_enable() block, dealing with vm86 and sending signals is only relevant for #DB-from-user, move it there. This then reduces handle_debug() to only the notifier call, so rename it to notify_debug(). Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Tested-by: Daniel Thompson Link: https://lore.kernel.org/r/20200902133201.094265982@infradead.org --- arch/x86/kernel/traps.c | 58 ++++++++++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 2605686ad31e..682af24ff0de 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -783,17 +783,10 @@ static __always_inline unsigned long debug_read_clear_dr6(void) * * May run on IST stack. */ -static bool handle_debug(struct pt_regs *regs, unsigned long *dr6) + +static bool notify_debug(struct pt_regs *regs, unsigned long *dr6) { struct task_struct *tsk = current; - bool icebp; - - /* - * If dr6 has no reason to give us about the origin of this trap, - * then it's very likely the result of an icebp/int01 trap. - * User wants a sigtrap for that. - */ - icebp = !*dr6; /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = *dr6; @@ -801,26 +794,9 @@ static bool handle_debug(struct pt_regs *regs, unsigned long *dr6) if (notify_die(DIE_DEBUG, "debug", regs, (long)dr6, 0, SIGTRAP) == NOTIFY_STOP) return true; - /* It's safe to allow irq's after DR6 has been saved */ - cond_local_irq_enable(regs); - - if (v8086_mode(regs)) { - handle_vm86_trap((struct kernel_vm86_regs *) regs, 0, - X86_TRAP_DB); - goto out; - } - - /* - * Reload dr6, the notifier might have changed it. - */ + /* Reload the DR6 value, the notifier might have changed it */ *dr6 = tsk->thread.debugreg6; - if (*dr6 & (DR_STEP | DR_TRAP_BITS) || icebp) - send_sigtrap(regs, 0, get_si_code(*dr6)); - -out: - cond_local_irq_disable(regs); - return false; } @@ -864,7 +840,7 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, if (!dr6) goto out; - if (handle_debug(regs, &dr6)) + if (notify_debug(regs, &dr6)) goto out; if (WARN_ON_ONCE(dr6 & DR_STEP)) { @@ -889,6 +865,8 @@ out: static __always_inline void exc_debug_user(struct pt_regs *regs, unsigned long dr6) { + bool icebp; + /* * If something gets miswired and we end up here for a kernel mode * #DB, we will malfunction. @@ -907,8 +885,30 @@ static __always_inline void exc_debug_user(struct pt_regs *regs, irqentry_enter_from_user_mode(regs); instrumentation_begin(); - handle_debug(regs, &dr6); + /* + * If dr6 has no reason to give us about the origin of this trap, + * then it's very likely the result of an icebp/int01 trap. + * User wants a sigtrap for that. + */ + icebp = !dr6; + + if (notify_debug(regs, &dr6)) + goto out; + /* It's safe to allow irq's after DR6 has been saved */ + local_irq_enable(); + + if (v8086_mode(regs)) { + handle_vm86_trap((struct kernel_vm86_regs *)regs, 0, X86_TRAP_DB); + goto out_irq; + } + + if (dr6 & (DR_STEP | DR_TRAP_BITS) || icebp) + send_sigtrap(regs, 0, get_si_code(dr6)); + +out_irq: + local_irq_disable(); +out: instrumentation_end(); irqentry_exit_to_user_mode(regs); } -- cgit v1.2.3 From 389cd0cd8b3790b555c3679da946f4aa4fba3bab Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Sep 2020 15:25:58 +0200 Subject: x86/debug: Remove the historical junk Remove the historical junk and replace it with a WARN and a comment. The problem is that even though the kernel only uses TF single-step in kprobes and KGDB, both of which consume the event before this, QEMU/KVM has bugs in this area that can trigger this state so it has to be dealt with. Suggested-by: Brian Gerst Suggested-by: Andy Lutomirski Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Tested-by: Daniel Thompson Link: https://lore.kernel.org/r/20200902133201.170216274@infradead.org --- arch/x86/kernel/traps.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 682af24ff0de..1e890010a062 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -843,18 +843,19 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, if (notify_debug(regs, &dr6)) goto out; - if (WARN_ON_ONCE(dr6 & DR_STEP)) { - /* - * Historical junk that used to handle SYSENTER single-stepping. - * This should be unreachable now. If we survive for a while - * without anyone hitting this warning, we'll turn this into - * an oops. - */ - dr6 &= ~DR_STEP; - set_thread_flag(TIF_SINGLESTEP); + /* + * The kernel doesn't use TF single-step outside of: + * + * - Kprobes, consumed through kprobe_debug_handler() + * - KGDB, consumed through notify_debug() + * + * So if we get here with DR_STEP set, something is wonky. + * + * A known way to trigger this is through QEMU's GDB stub, + * which leaks #DB into the guest and causes IST recursion. + */ + if (WARN_ON_ONCE(current->thread.debugreg6 & DR_STEP)) regs->flags &= ~X86_EFLAGS_TF; - } - out: instrumentation_end(); idtentry_exit_nmi(regs, irq_state); -- cgit v1.2.3 From b84d42b6c6ac6a60519286e72b69f2dbf08dfb70 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Sep 2020 15:25:59 +0200 Subject: x86/debug: Remove aout_dump_debugregs() Unused remnants for the bit-bucket. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Tested-by: Daniel Thompson Link: https://lore.kernel.org/r/20200902133201.233022474@infradead.org --- arch/x86/include/asm/debugreg.h | 2 -- arch/x86/kernel/hw_breakpoint.c | 36 ------------------------------------ 2 files changed, 38 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index e89558a3fe4a..cfdf307ddc01 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -90,8 +90,6 @@ static __always_inline bool hw_breakpoint_active(void) return __this_cpu_read(cpu_dr7) & DR_GLOBAL_ENABLE_MASK; } -extern void aout_dump_debugregs(struct user *dump); - extern void hw_breakpoint_restore(void); static __always_inline unsigned long local_db_save(void) diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index b98ff620ba77..ca9de59ffb87 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -441,42 +441,6 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, return 0; } -/* - * Dump the debug register contents to the user. - * We can't dump our per cpu values because it - * may contain cpu wide breakpoint, something that - * doesn't belong to the current task. - * - * TODO: include non-ptrace user breakpoints (perf) - */ -void aout_dump_debugregs(struct user *dump) -{ - int i; - int dr7 = 0; - struct perf_event *bp; - struct arch_hw_breakpoint *info; - struct thread_struct *thread = ¤t->thread; - - for (i = 0; i < HBP_NUM; i++) { - bp = thread->ptrace_bps[i]; - - if (bp && !bp->attr.disabled) { - dump->u_debugreg[i] = bp->attr.bp_addr; - info = counter_arch_bp(bp); - dr7 |= encode_dr7(i, info->len, info->type); - } else { - dump->u_debugreg[i] = 0; - } - } - - dump->u_debugreg[4] = 0; - dump->u_debugreg[5] = 0; - dump->u_debugreg[6] = current->thread.debugreg6; - - dump->u_debugreg[7] = dr7; -} -EXPORT_SYMBOL_GPL(aout_dump_debugregs); - /* * Release the user breakpoints used by ptrace */ -- cgit v1.2.3 From 21d44be7b6ff4c254dc971e2c99d4082dd470afd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Sep 2020 15:26:00 +0200 Subject: x86/debug: Simplify hw_breakpoint_handler() This is called with interrupts disabled, there's no point in using get_cpu() and per_cpu(). Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Tested-by: Daniel Thompson Link: https://lore.kernel.org/r/20200902133201.292906672@infradead.org --- arch/x86/kernel/hw_breakpoint.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index ca9de59ffb87..7b7d9f23a9ad 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -487,7 +487,7 @@ EXPORT_SYMBOL_GPL(hw_breakpoint_restore); */ static int hw_breakpoint_handler(struct die_args *args) { - int i, cpu, rc = NOTIFY_STOP; + int i, rc = NOTIFY_STOP; struct perf_event *bp; unsigned long dr6; unsigned long *dr6_p; @@ -505,12 +505,10 @@ static int hw_breakpoint_handler(struct die_args *args) return NOTIFY_DONE; /* - * Assert that local interrupts are disabled * Reset the DRn bits in the virtualized register value. * The ptrace trigger routine will add in whatever is needed. */ current->thread.debugreg6 &= ~DR_TRAP_BITS; - cpu = get_cpu(); /* Handle all the breakpoints that were triggered */ for (i = 0; i < HBP_NUM; ++i) { @@ -525,7 +523,7 @@ static int hw_breakpoint_handler(struct die_args *args) */ rcu_read_lock(); - bp = per_cpu(bp_per_reg[i], cpu); + bp = this_cpu_read(bp_per_reg[i]); /* * Reset the 'i'th TRAP bit in dr6 to denote completion of * exception handling @@ -560,8 +558,6 @@ static int hw_breakpoint_handler(struct die_args *args) (dr6 & (~DR_TRAP_BITS))) rc = NOTIFY_DONE; - put_cpu(); - return rc; } -- cgit v1.2.3 From f4956cf83ed12271bdbd5b547f3378add72bbffb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Sep 2020 15:26:01 +0200 Subject: x86/debug: Support negative polarity DR6 bits DR6 has a whole bunch of bits that have negative polarity; they were architecturally reserved and defined to be 1 and are now getting used. Since they're 1 by default, 0 becomes the signal value. Handle this by xor'ing the read DR6 value by the reserved mask, this will flip them around such that 1 is the signal value (positive polarity). Current Linux doesn't yet support any of these bits, but there's two defined: - DR6[11] Bus Lock Debug Exception (ISEr39) - DR6[16] Restricted Transactional Memory (SDM) Update ptrace_{set,get}_debugreg() to provide/consume the value in architectural polarity. Although afaict ptrace_set_debugreg(6) is pointless, the value is not consumed anywhere. Change hw_breakpoint_restore() to alway write the DR6_RESERVED value to DR6, again, no consumer for that write. Suggested-by: Andrew Cooper Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Tested-by: Daniel Thompson Link: https://lore.kernel.org/r/20200902133201.354220797@infradead.org --- arch/x86/kernel/hw_breakpoint.c | 2 +- arch/x86/kernel/ptrace.c | 4 ++-- arch/x86/kernel/traps.c | 5 ++--- 3 files changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 7b7d9f23a9ad..d17a1daeff71 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -464,7 +464,7 @@ void hw_breakpoint_restore(void) set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1); set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2); set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3); - set_debugreg(current->thread.debugreg6, 6); + set_debugreg(DR6_RESERVED, 6); set_debugreg(__this_cpu_read(cpu_dr7), 7); } EXPORT_SYMBOL_GPL(hw_breakpoint_restore); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index e7537c5440bb..5f982896dc0b 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -601,7 +601,7 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) if (bp) val = bp->hw.info.address; } else if (n == 6) { - val = thread->debugreg6; + val = thread->debugreg6 ^ DR6_RESERVED; /* Flip back to arch polarity */ } else if (n == 7) { val = thread->ptrace_dr7; } @@ -657,7 +657,7 @@ static int ptrace_set_debugreg(struct task_struct *tsk, int n, if (n < HBP_NUM) { rc = ptrace_set_breakpoint_addr(tsk, n, val); } else if (n == 6) { - thread->debugreg6 = val; + thread->debugreg6 = val ^ DR6_RESERVED; /* Flip to positive polarity */ rc = 0; } else if (n == 7) { rc = ptrace_write_dr7(tsk, val); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1e890010a062..114515b26168 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -745,9 +745,8 @@ static __always_inline unsigned long debug_read_clear_dr6(void) * Keep it simple: clear DR6 immediately. */ get_debugreg(dr6, 6); - set_debugreg(0, 6); - /* Filter out all the reserved bits which are preset to 1 */ - dr6 &= ~DR6_RESERVED; + set_debugreg(DR6_RESERVED, 6); + dr6 ^= DR6_RESERVED; /* Flip to positive polarity */ /* * The SDM says "The processor clears the BTF flag when it -- cgit v1.2.3 From d53d9bc0cf783e93b374de3895145c7375e570ba Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Sep 2020 15:26:02 +0200 Subject: x86/debug: Change thread.debugreg6 to thread.virtual_dr6 Current usage of thread.debugreg6 is convoluted at best. It starts life as a copy of the hardware DR6 value, but then various bits are cleared and set. Replace this with a new variable thread.virtual_dr6 that is initialized to 0 when DR6 is read and only gains bits, at the same time the actual (on stack) dr6 value which is read from the hardware only gets bits cleared. Suggested-by: Andy Lutomirski Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Tested-by: Daniel Thompson Link: https://lore.kernel.org/r/20200902133201.415372940@infradead.org --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/hw_breakpoint.c | 12 +++--------- arch/x86/kernel/kgdb.c | 5 +++-- arch/x86/kernel/ptrace.c | 6 +++--- arch/x86/kernel/traps.c | 25 ++++++++++++++++--------- 5 files changed, 26 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 97143d87994c..d8a82e650810 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -517,7 +517,7 @@ struct thread_struct { /* Save middle states of ptrace breakpoints */ struct perf_event *ptrace_bps[HBP_NUM]; /* Debug status used for traps, single steps, etc... */ - unsigned long debugreg6; + unsigned long virtual_dr6; /* Keep track of the exact dr7 value set by the user */ unsigned long ptrace_dr7; /* Fault info: */ diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index d17a1daeff71..03aa33b58165 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -454,7 +454,7 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk) t->ptrace_bps[i] = NULL; } - t->debugreg6 = 0; + t->virtual_dr6 = 0; t->ptrace_dr7 = 0; } @@ -489,8 +489,8 @@ static int hw_breakpoint_handler(struct die_args *args) { int i, rc = NOTIFY_STOP; struct perf_event *bp; - unsigned long dr6; unsigned long *dr6_p; + unsigned long dr6; /* The DR6 value is pointed by args->err */ dr6_p = (unsigned long *)ERR_PTR(args->err); @@ -504,12 +504,6 @@ static int hw_breakpoint_handler(struct die_args *args) if ((dr6 & DR_TRAP_BITS) == 0) return NOTIFY_DONE; - /* - * Reset the DRn bits in the virtualized register value. - * The ptrace trigger routine will add in whatever is needed. - */ - current->thread.debugreg6 &= ~DR_TRAP_BITS; - /* Handle all the breakpoints that were triggered */ for (i = 0; i < HBP_NUM; ++i) { if (likely(!(dr6 & (DR_TRAP0 << i)))) @@ -554,7 +548,7 @@ static int hw_breakpoint_handler(struct die_args *args) * breakpoints (to generate signals) and b) when the system has * taken exception due to multiple causes */ - if ((current->thread.debugreg6 & DR_TRAP_BITS) || + if ((current->thread.virtual_dr6 & DR_TRAP_BITS) || (dr6 & (~DR_TRAP_BITS))) rc = NOTIFY_DONE; diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index c2f02f308ecf..ff7878df96b4 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -629,9 +629,10 @@ static void kgdb_hw_overflow_handler(struct perf_event *event, struct task_struct *tsk = current; int i; - for (i = 0; i < 4; i++) + for (i = 0; i < 4; i++) { if (breakinfo[i].enabled) - tsk->thread.debugreg6 |= (DR_TRAP0 << i); + tsk->thread.virtual_dr6 |= (DR_TRAP0 << i); + } } void kgdb_arch_late(void) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 5f982896dc0b..bedca011459c 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -465,7 +465,7 @@ static void ptrace_triggered(struct perf_event *bp, break; } - thread->debugreg6 |= (DR_TRAP0 << i); + thread->virtual_dr6 |= (DR_TRAP0 << i); } /* @@ -601,7 +601,7 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) if (bp) val = bp->hw.info.address; } else if (n == 6) { - val = thread->debugreg6 ^ DR6_RESERVED; /* Flip back to arch polarity */ + val = thread->virtual_dr6 ^ DR6_RESERVED; /* Flip back to arch polarity */ } else if (n == 7) { val = thread->ptrace_dr7; } @@ -657,7 +657,7 @@ static int ptrace_set_debugreg(struct task_struct *tsk, int n, if (n < HBP_NUM) { rc = ptrace_set_breakpoint_addr(tsk, n, val); } else if (n == 6) { - thread->debugreg6 = val ^ DR6_RESERVED; /* Flip to positive polarity */ + thread->virtual_dr6 = val ^ DR6_RESERVED; /* Flip to positive polarity */ rc = 0; } else if (n == 7) { rc = ptrace_write_dr7(tsk, val); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 114515b26168..df9c6554f83e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -748,6 +748,12 @@ static __always_inline unsigned long debug_read_clear_dr6(void) set_debugreg(DR6_RESERVED, 6); dr6 ^= DR6_RESERVED; /* Flip to positive polarity */ + /* + * Clear the virtual DR6 value, ptrace routines will set bits here for + * things we want signals for. + */ + current->thread.virtual_dr6 = 0; + /* * The SDM says "The processor clears the BTF flag when it * generates a debug exception." Clear TIF_BLOCKSTEP to keep @@ -785,17 +791,16 @@ static __always_inline unsigned long debug_read_clear_dr6(void) static bool notify_debug(struct pt_regs *regs, unsigned long *dr6) { - struct task_struct *tsk = current; - - /* Store the virtualized DR6 value */ - tsk->thread.debugreg6 = *dr6; - + /* + * Notifiers will clear bits in @dr6 to indicate the event has been + * consumed - hw_breakpoint_handler(), single_stop_cont(). + * + * Notifiers will set bits in @virtual_dr6 to indicate the desire + * for signals - ptrace_triggered(), kgdb_hw_overflow_handler(). + */ if (notify_die(DIE_DEBUG, "debug", regs, (long)dr6, 0, SIGTRAP) == NOTIFY_STOP) return true; - /* Reload the DR6 value, the notifier might have changed it */ - *dr6 = tsk->thread.debugreg6; - return false; } @@ -853,7 +858,7 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, * A known way to trigger this is through QEMU's GDB stub, * which leaks #DB into the guest and causes IST recursion. */ - if (WARN_ON_ONCE(current->thread.debugreg6 & DR_STEP)) + if (WARN_ON_ONCE(dr6 & DR_STEP)) regs->flags &= ~X86_EFLAGS_TF; out: instrumentation_end(); @@ -903,6 +908,8 @@ static __always_inline void exc_debug_user(struct pt_regs *regs, goto out_irq; } + /* Add the virtual_dr6 bits for signals. */ + dr6 |= current->thread.virtual_dr6; if (dr6 & (DR_STEP | DR_TRAP_BITS) || icebp) send_sigtrap(regs, 0, get_si_code(dr6)); -- cgit v1.2.3