From 72f924783b8a87e4454516520ffb5f35e4930371 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 5 Oct 2015 17:47:54 -0700 Subject: x86/entry, locking/lockdep: Move lockdep_sys_exit() to prepare_exit_to_usermode() Rather than worrying about exactly where LOCKDEP_SYS_EXIT should go in the asm code, add it to prepare_exit_from_usermode() and remove all of the asm calls that are followed by prepare_exit_to_usermode(). LOCKDEP_SYS_EXIT now appears only in the syscall fast paths. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1736ebe948b845e68120b86b89091f3ec27f5e8e.1444091584.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/common.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/entry/common.c') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 80dcc9261ca3..d94a60c16029 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -220,6 +220,8 @@ __visible void prepare_exit_to_usermode(struct pt_regs *regs) if (WARN_ON(!irqs_disabled())) local_irq_disable(); + lockdep_sys_exit(); + /* * In order to return to user mode, we need to have IRQs off with * none of _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_USER_RETURN_NOTIFY, -- cgit v1.2.3 From bd2d3a3ba67ac580f6e809aac36bf942f5447f91 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 5 Oct 2015 17:48:08 -0700 Subject: x86/entry: Add do_syscall_32(), a C function to do 32-bit syscalls System calls are really quite simple. Add a helper to call a 32-bit system call. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/a77ed179834c27da436fb4a7fb23c8ee77abc11c.1444091585.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/common.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'arch/x86/entry/common.c') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index d94a60c16029..41d17508cf46 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -318,3 +318,46 @@ __visible void syscall_return_slowpath(struct pt_regs *regs) local_irq_disable(); prepare_exit_to_usermode(regs); } + +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) +/* + * Does a 32-bit syscall. Called with IRQs off and does all entry and + * exit work. + */ +__visible void do_int80_syscall_32(struct pt_regs *regs) +{ + struct thread_info *ti = pt_regs_to_thread_info(regs); + unsigned int nr = (unsigned int)regs->orig_ax; + +#ifdef CONFIG_IA32_EMULATION + ti->status |= TS_COMPAT; +#endif + + local_irq_enable(); + + if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { + /* + * Subtlety here: if ptrace pokes something larger than + * 2^32-1 into orig_ax, this truncates it. This may or + * may not be necessary, but it matches the old asm + * behavior. + */ + nr = syscall_trace_enter(regs); + } + + if (nr < IA32_NR_syscalls) { + /* + * It's possible that a 32-bit syscall implementation + * takes a 64-bit parameter but nonetheless assumes that + * the high bits are zero. Make sure we zero-extend all + * of the args. + */ + regs->ax = ia32_sys_call_table[nr]( + (unsigned int)regs->bx, (unsigned int)regs->cx, + (unsigned int)regs->dx, (unsigned int)regs->si, + (unsigned int)regs->di, (unsigned int)regs->bp); + } + + syscall_return_slowpath(regs); +} +#endif -- cgit v1.2.3 From 710246df58041106b7de645f4b45770f8a59a269 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 5 Oct 2015 17:48:10 -0700 Subject: x86/entry: Add C code for fast system call entries This handles both SYSENTER and SYSCALL. The asm glue will take care of the differences. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/6041a58a9b8ef6d2522ab4350deb1a1945eb563f.1444091585.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/common.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'arch/x86/entry/common.c') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 41d17508cf46..1b2606edc621 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -24,6 +24,8 @@ #include #include +#include +#include #define CREATE_TRACE_POINTS #include @@ -360,4 +362,45 @@ __visible void do_int80_syscall_32(struct pt_regs *regs) syscall_return_slowpath(regs); } + +__visible void do_fast_syscall_32(struct pt_regs *regs) +{ + /* + * Called using the internal vDSO SYSENTER/SYSCALL32 calling + * convention. Adjust regs so it looks like we entered using int80. + */ + + unsigned long landing_pad = (unsigned long)current->mm->context.vdso + + vdso_image_32.sym_int80_landing_pad; + + /* + * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward + * so that 'regs->ip -= 2' lands back on an int $0x80 instruction. + * Fix it up. + */ + regs->ip = landing_pad; + + /* + * Fetch ECX from where the vDSO stashed it. + * + * WARNING: We are in CONTEXT_USER and RCU isn't paying attention! + */ + local_irq_enable(); + if (get_user(*(u32 *)®s->cx, + (u32 __user __force *)(unsigned long)(u32)regs->sp)) { + /* User code screwed up. */ + local_irq_disable(); + regs->ax = -EFAULT; +#ifdef CONFIG_CONTEXT_TRACKING + enter_from_user_mode(); +#endif + prepare_exit_to_usermode(regs); + return; + } + local_irq_disable(); + + /* Now this is just like a normal syscall. */ + do_int80_syscall_32(regs); + return; +} #endif -- cgit v1.2.3 From 7841b408717d4c3b1b334c8f1fef7f18c98cd2bd Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 5 Oct 2015 17:48:12 -0700 Subject: x86/entry/compat: Implement opportunistic SYSRETL for compat syscalls If CS, SS and IP are as expected and FLAGS is compatible with SYSRETL, then return from fast compat syscalls (both SYSCALL and SYSENTER) using SYSRETL. Unlike native 64-bit opportunistic SYSRET, this is not invisible to user code: RCX and R8-R15 end up in a different state than shown saved in pt_regs. To compensate, we only do this when returning to the vDSO fast syscall return path. This won't interfere with syscall restart, as we won't use SYSRETL when returning to the INT80 restart instruction. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/aa15e49db33773eb10b73d73466b6d5466d7856a.1444091585.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/common.c | 23 +++++++++++++++++++--- arch/x86/entry/entry_64_compat.S | 42 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 60 insertions(+), 5 deletions(-) (limited to 'arch/x86/entry/common.c') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 1b2606edc621..88dc5ba14d47 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -363,7 +363,8 @@ __visible void do_int80_syscall_32(struct pt_regs *regs) syscall_return_slowpath(regs); } -__visible void do_fast_syscall_32(struct pt_regs *regs) +/* Returns 0 to return using IRET or 1 to return using SYSRETL. */ +__visible long do_fast_syscall_32(struct pt_regs *regs) { /* * Called using the internal vDSO SYSENTER/SYSCALL32 calling @@ -395,12 +396,28 @@ __visible void do_fast_syscall_32(struct pt_regs *regs) enter_from_user_mode(); #endif prepare_exit_to_usermode(regs); - return; + return 0; /* Keep it simple: use IRET. */ } local_irq_disable(); /* Now this is just like a normal syscall. */ do_int80_syscall_32(regs); - return; + +#ifdef CONFIG_X86_64 + /* + * Opportunistic SYSRETL: if possible, try to return using SYSRETL. + * SYSRETL is available on all 64-bit CPUs, so we don't need to + * bother with SYSEXIT. + * + * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP, + * because the ECX fixup above will ensure that this is essentially + * never the case. + */ + return regs->cs == __USER32_CS && regs->ss == __USER_DS && + regs->ip == landing_pad && + (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0; +#else + return 0; +#endif } #endif diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 8f109de51d03..cf9641cd4796 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -115,7 +115,9 @@ sysenter_flags_fixed: movq %rsp, %rdi call do_fast_syscall_32 - jmp .Lsyscall_32_done + testl %eax, %eax + jz .Lsyscall_32_done + jmp sysret32_from_system_call sysenter_fix_flags: pushq $X86_EFLAGS_FIXED @@ -192,7 +194,43 @@ ENTRY(entry_SYSCALL_compat) movq %rsp, %rdi call do_fast_syscall_32 - jmp .Lsyscall_32_done + testl %eax, %eax + jz .Lsyscall_32_done + + /* Opportunistic SYSRET */ +sysret32_from_system_call: + TRACE_IRQS_ON /* User mode traces as IRQs on. */ + movq RBX(%rsp), %rbx /* pt_regs->rbx */ + movq RBP(%rsp), %rbp /* pt_regs->rbp */ + movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */ + movq RIP(%rsp), %rcx /* pt_regs->ip (in rcx) */ + addq $RAX, %rsp /* Skip r8-r15 */ + popq %rax /* pt_regs->rax */ + popq %rdx /* Skip pt_regs->cx */ + popq %rdx /* pt_regs->dx */ + popq %rsi /* pt_regs->si */ + popq %rdi /* pt_regs->di */ + + /* + * USERGS_SYSRET32 does: + * GSBASE = user's GS base + * EIP = ECX + * RFLAGS = R11 + * CS = __USER32_CS + * SS = __USER_DS + * + * ECX will not match pt_regs->cx, but we're returning to a vDSO + * trampoline that will fix up RCX, so this is okay. + * + * R12-R15 are callee-saved, so they contain whatever was in them + * when the system call started, which is already known to user + * code. We zero R8-R10 to avoid info leaks. + */ + xorq %r8, %r8 + xorq %r9, %r9 + xorq %r10, %r10 + movq RSP-ORIG_RAX(%rsp), %rsp + USERGS_SYSRET32 END(entry_SYSCALL_compat) /* -- cgit v1.2.3 From 5f310f739b4cc343f3f087681e41bbc2f0ce902d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 5 Oct 2015 17:48:15 -0700 Subject: x86/entry/32: Re-implement SYSENTER using the new C path Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/5b99659e8be70f3dd10cd8970a5c90293d9ad9a7.1444091585.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/common.c | 17 +++- arch/x86/entry/entry_32.S | 132 ++++++++----------------------- arch/x86/entry/vdso/vdso32/system_call.S | 2 + 3 files changed, 51 insertions(+), 100 deletions(-) (limited to 'arch/x86/entry/common.c') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 88dc5ba14d47..0ed023d6a983 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -363,7 +363,7 @@ __visible void do_int80_syscall_32(struct pt_regs *regs) syscall_return_slowpath(regs); } -/* Returns 0 to return using IRET or 1 to return using SYSRETL. */ +/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */ __visible long do_fast_syscall_32(struct pt_regs *regs) { /* @@ -417,7 +417,20 @@ __visible long do_fast_syscall_32(struct pt_regs *regs) regs->ip == landing_pad && (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0; #else - return 0; + /* + * Opportunistic SYSEXIT: if possible, try to return using SYSEXIT. + * + * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP, + * because the ECX fixup above will ensure that this is essentially + * never the case. + * + * We don't allow syscalls at all from VM86 mode, but we still + * need to check VM, because we might be returning from sys_vm86. + */ + return static_cpu_has(X86_FEATURE_SEP) && + regs->cs == __USER_CS && regs->ss == __USER_DS && + regs->ip == landing_pad && + (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0; #endif } #endif diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 02881e528945..c1c7c6364216 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -287,76 +287,47 @@ need_resched: END(resume_kernel) #endif -/* - * SYSENTER_RETURN points to after the SYSENTER instruction - * in the vsyscall page. See vsyscall-sysentry.S, which defines - * the symbol. - */ - # SYSENTER call handler stub ENTRY(entry_SYSENTER_32) movl TSS_sysenter_sp0(%esp), %esp sysenter_past_esp: + pushl $__USER_DS /* pt_regs->ss */ + pushl %ecx /* pt_regs->cx */ + pushfl /* pt_regs->flags (except IF = 0) */ + orl $X86_EFLAGS_IF, (%esp) /* Fix IF */ + pushl $__USER_CS /* pt_regs->cs */ + pushl $0 /* pt_regs->ip = 0 (placeholder) */ + pushl %eax /* pt_regs->orig_ax */ + SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ + /* - * Interrupts are disabled here, but we can't trace it until - * enough kernel state to call TRACE_IRQS_OFF can be called - but - * we immediately enable interrupts at that point anyway. - */ - pushl $__USER_DS - pushl %ebp - pushfl - orl $X86_EFLAGS_IF, (%esp) - pushl $__USER_CS - /* - * Push current_thread_info()->sysenter_return to the stack. - * A tiny bit of offset fixup is necessary: TI_sysenter_return - * is relative to thread_info, which is at the bottom of the - * kernel stack page. 4*4 means the 4 words pushed above; - * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack; - * and THREAD_SIZE takes us to the bottom. + * User mode is traced as though IRQs are on, and SYSENTER + * turned them off. */ - pushl ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp) - - pushl %eax - SAVE_ALL - ENABLE_INTERRUPTS(CLBR_NONE) - -/* - * Load the potential sixth argument from user stack. - * Careful about security. - */ - cmpl $__PAGE_OFFSET-3, %ebp - jae syscall_fault - ASM_STAC -1: movl (%ebp), %ebp - ASM_CLAC - movl %ebp, PT_EBP(%esp) - _ASM_EXTABLE(1b, syscall_fault) - - GET_THREAD_INFO(%ebp) - - testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp) - jnz syscall_trace_entry -sysenter_do_call: - cmpl $(NR_syscalls), %eax - jae sysenter_badsys - call *sys_call_table(, %eax, 4) -sysenter_after_call: - movl %eax, PT_EAX(%esp) - LOCKDEP_SYS_EXIT - DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF - movl TI_flags(%ebp), %ecx - testl $_TIF_ALLWORK_MASK, %ecx - jnz syscall_exit_work_irqs_off -sysenter_exit: -/* if something modifies registers it must also disable sysexit */ - movl PT_EIP(%esp), %edx - movl PT_OLDESP(%esp), %ecx - xorl %ebp, %ebp - TRACE_IRQS_ON + + movl %esp, %eax + call do_fast_syscall_32 + testl %eax, %eax + jz .Lsyscall_32_done + +/* Opportunistic SYSEXIT */ + TRACE_IRQS_ON /* User mode traces as IRQs on. */ + movl PT_EIP(%esp), %edx /* pt_regs->ip */ + movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */ + popl %ebx /* pt_regs->bx */ + addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */ + popl %esi /* pt_regs->si */ + popl %edi /* pt_regs->di */ + popl %ebp /* pt_regs->bp */ + popl %eax /* pt_regs->ax */ 1: mov PT_FS(%esp), %fs PTGS_TO_GS + + /* + * Return back to the vDSO, which will pop ecx and edx. + * Don't bother with DS and ES (they already contain __USER_DS). + */ ENABLE_INTERRUPTS_SYSEXIT .pushsection .fixup, "ax" @@ -371,7 +342,7 @@ ENDPROC(entry_SYSENTER_32) ENTRY(entry_INT80_32) ASM_CLAC pushl %eax /* pt_regs->orig_ax */ - SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest, load -ENOSYS into ax */ + SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ /* * User mode is traced as though IRQs are on, and the interrupt gate @@ -381,6 +352,7 @@ ENTRY(entry_INT80_32) movl %esp, %eax call do_int80_syscall_32 +.Lsyscall_32_done: restore_all: TRACE_IRQS_IRET @@ -457,42 +429,6 @@ ldt_ss: #endif ENDPROC(entry_INT80_32) - # perform syscall exit tracing - ALIGN -syscall_trace_entry: - movl $-ENOSYS, PT_EAX(%esp) - movl %esp, %eax - call syscall_trace_enter - /* What it returned is what we'll actually use. */ - cmpl $(NR_syscalls), %eax - jnae syscall_call - jmp syscall_exit -END(syscall_trace_entry) - - # perform syscall exit tracing - ALIGN -syscall_exit_work_irqs_off: - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_ANY) - -syscall_exit_work: - movl %esp, %eax - call syscall_return_slowpath - jmp restore_all -END(syscall_exit_work) - -syscall_fault: - ASM_CLAC - GET_THREAD_INFO(%ebp) - movl $-EFAULT, PT_EAX(%esp) - jmp resume_userspace -END(syscall_fault) - -sysenter_badsys: - movl $-ENOSYS, %eax - jmp sysenter_after_call -END(sysenter_badsys) - .macro FIXUP_ESPFIX_STACK /* * Switch back for ESPFIX stack to the normal zerobased stack diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S index 00157cae71e0..93bd8452383f 100644 --- a/arch/x86/entry/vdso/vdso32/system_call.S +++ b/arch/x86/entry/vdso/vdso32/system_call.S @@ -34,6 +34,8 @@ __kernel_vsyscall: /* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */ ALTERNATIVE_2 "", "sysenter", X86_FEATURE_SYSENTER32, \ "syscall", X86_FEATURE_SYSCALL32 +#else + ALTERNATIVE "", "sysenter", X86_FEATURE_SEP #endif /* Enter using int $0x80 */ -- cgit v1.2.3 From 8b13c2552ffc8e54e57598df36707183933e8e8c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 5 Oct 2015 17:48:17 -0700 Subject: x86/entry: Remove unnecessary IRQ twiddling in fast 32-bit syscalls This is slightly messy, but it eliminates an unnecessary cli;sti pair. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/22f34b1096694a37326f36c53407b8dd90f37948.1444091585.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/common.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'arch/x86/entry/common.c') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 0ed023d6a983..0d1c842ef3ca 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -323,10 +323,10 @@ __visible void syscall_return_slowpath(struct pt_regs *regs) #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) /* - * Does a 32-bit syscall. Called with IRQs off and does all entry and - * exit work. + * Does a 32-bit syscall. Called with IRQs on and does all entry and + * exit work and returns with IRQs off. */ -__visible void do_int80_syscall_32(struct pt_regs *regs) +static void do_syscall_32_irqs_on(struct pt_regs *regs) { struct thread_info *ti = pt_regs_to_thread_info(regs); unsigned int nr = (unsigned int)regs->orig_ax; @@ -335,8 +335,6 @@ __visible void do_int80_syscall_32(struct pt_regs *regs) ti->status |= TS_COMPAT; #endif - local_irq_enable(); - if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { /* * Subtlety here: if ptrace pokes something larger than @@ -363,6 +361,13 @@ __visible void do_int80_syscall_32(struct pt_regs *regs) syscall_return_slowpath(regs); } +/* Handles int $0x80 */ +__visible void do_int80_syscall_32(struct pt_regs *regs) +{ + local_irq_enable(); + do_syscall_32_irqs_on(regs); +} + /* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */ __visible long do_fast_syscall_32(struct pt_regs *regs) { @@ -398,10 +403,9 @@ __visible long do_fast_syscall_32(struct pt_regs *regs) prepare_exit_to_usermode(regs); return 0; /* Keep it simple: use IRET. */ } - local_irq_disable(); /* Now this is just like a normal syscall. */ - do_int80_syscall_32(regs); + do_syscall_32_irqs_on(regs); #ifdef CONFIG_X86_64 /* -- cgit v1.2.3 From 460d12453e1afe20416ce9536cfecb31d17a9abd Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 5 Oct 2015 17:48:18 -0700 Subject: x86/entry: Make irqs_disabled checks in exit code depend on lockdep These checks are quite slow. Disable them in non-lockdep kernels to reduce the performance hit. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/eccff2a154ae6fb50f40228901003a6e9c24f3d0.1444091585.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/entry/common.c') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 0d1c842ef3ca..03aacd188458 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -219,7 +219,7 @@ static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs) /* Called with IRQs disabled. */ __visible void prepare_exit_to_usermode(struct pt_regs *regs) { - if (WARN_ON(!irqs_disabled())) + if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled())) local_irq_disable(); lockdep_sys_exit(); @@ -281,8 +281,8 @@ __visible void syscall_return_slowpath(struct pt_regs *regs) CT_WARN_ON(ct_state() != CONTEXT_KERNEL); - if (WARN(irqs_disabled(), "syscall %ld left IRQs disabled", - regs->orig_ax)) + if (IS_ENABLED(CONFIG_PROVE_LOCKING) && + WARN(irqs_disabled(), "syscall %ld left IRQs disabled", regs->orig_ax)) local_irq_enable(); /* -- cgit v1.2.3 From 33c52129f45e06d9ce23e1a3d50bf9fd6770748b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 5 Oct 2015 17:48:19 -0700 Subject: x86/entry: Force inlining of 32-bit syscall code On systems that support fast syscalls, we only really care about the performance of the fast syscall path. Forcibly inline it and add a likely annotation. This saves 4-6 cycles. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/8472036ff1f4b426b4c4c3e3d0b3bf5264407c0c.1444091585.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/common.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/entry/common.c') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 03aacd188458..d5eee851071c 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -324,9 +324,11 @@ __visible void syscall_return_slowpath(struct pt_regs *regs) #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) /* * Does a 32-bit syscall. Called with IRQs on and does all entry and - * exit work and returns with IRQs off. + * exit work and returns with IRQs off. This function is extremely hot + * in workloads that use it, and it's usually called from + * do_fast_syscall_32, so forcibly inline it to improve performance. */ -static void do_syscall_32_irqs_on(struct pt_regs *regs) +static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) { struct thread_info *ti = pt_regs_to_thread_info(regs); unsigned int nr = (unsigned int)regs->orig_ax; @@ -345,7 +347,7 @@ static void do_syscall_32_irqs_on(struct pt_regs *regs) nr = syscall_trace_enter(regs); } - if (nr < IA32_NR_syscalls) { + if (likely(nr < IA32_NR_syscalls)) { /* * It's possible that a 32-bit syscall implementation * takes a 64-bit parameter but nonetheless assumes that -- cgit v1.2.3 From c68ca6787bdd6d2df37cf950135aa11e71af358a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 5 Oct 2015 17:48:20 -0700 Subject: x86/entry: Micro-optimize compat fast syscall arg fetch We're following a 32-bit pointer, and the uaccess code isn't smart enough to figure out that the access_ok() check isn't needed. This saves about three cycles on a cache-hot fast syscall. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/bdff034e2f23c5eb974c760cf494cb5bddce8f29.1444091585.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/common.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'arch/x86/entry/common.c') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index d5eee851071c..08a945d7915e 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -394,8 +394,20 @@ __visible long do_fast_syscall_32(struct pt_regs *regs) * WARNING: We are in CONTEXT_USER and RCU isn't paying attention! */ local_irq_enable(); - if (get_user(*(u32 *)®s->cx, - (u32 __user __force *)(unsigned long)(u32)regs->sp)) { + if ( +#ifdef CONFIG_X86_64 + /* + * Micro-optimization: the pointer we're following is explicitly + * 32 bits, so it can't be out of range. + */ + __get_user(*(u32 *)®s->cx, + (u32 __user __force *)(unsigned long)(u32)regs->sp) +#else + get_user(*(u32 *)®s->cx, + (u32 __user __force *)(unsigned long)(u32)regs->sp) +#endif + ) { + /* User code screwed up. */ local_irq_disable(); regs->ax = -EFAULT; -- cgit v1.2.3 From 4aabd140f9cbe0361401a1368bac74df1010abf5 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 5 Oct 2015 17:48:21 -0700 Subject: x86/entry: Hide two syscall entry assertions behind CONFIG_DEBUG_ENTRY This shaves a few cycles off the slow paths. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/ce383fa9e129286ce6da6e00b53acd4c9fb5d06a.1444091585.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/entry/common.c') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 08a945d7915e..778ca70c22dd 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -71,7 +71,8 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) unsigned long ret = 0; u32 work; - BUG_ON(regs != task_pt_regs(current)); + if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) + BUG_ON(regs != task_pt_regs(current)); work = ACCESS_ONCE(current_thread_info()->flags) & _TIF_WORK_SYSCALL_ENTRY; @@ -160,7 +161,8 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, u32 work = ACCESS_ONCE(current_thread_info()->flags) & _TIF_WORK_SYSCALL_ENTRY; - BUG_ON(regs != task_pt_regs(current)); + if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) + BUG_ON(regs != task_pt_regs(current)); /* * If we stepped into a sysenter/syscall insn, it trapped in -- cgit v1.2.3 From dd636071c3d8044c802b7a365e9934724a929530 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 5 Oct 2015 17:48:22 -0700 Subject: x86/entry: Use pt_regs_to_thread_info() in syscall entry tracing It generates simpler and faster code than current_thread_info(). Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/a3b6633e7dcb9f673c1b619afae602d29d27d2cf.1444091585.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/common.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/x86/entry/common.c') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 778ca70c22dd..d0874210d5b5 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -30,6 +30,13 @@ #define CREATE_TRACE_POINTS #include +static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs) +{ + unsigned long top_of_stack = + (unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING; + return (struct thread_info *)(top_of_stack - THREAD_SIZE); +} + #ifdef CONFIG_CONTEXT_TRACKING /* Called on entry from user mode with IRQs off. */ __visible void enter_from_user_mode(void) @@ -68,14 +75,14 @@ static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) */ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) { + struct thread_info *ti = pt_regs_to_thread_info(regs); unsigned long ret = 0; u32 work; if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) BUG_ON(regs != task_pt_regs(current)); - work = ACCESS_ONCE(current_thread_info()->flags) & - _TIF_WORK_SYSCALL_ENTRY; + work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; #ifdef CONFIG_CONTEXT_TRACKING /* @@ -157,9 +164,9 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, unsigned long phase1_result) { + struct thread_info *ti = pt_regs_to_thread_info(regs); long ret = 0; - u32 work = ACCESS_ONCE(current_thread_info()->flags) & - _TIF_WORK_SYSCALL_ENTRY; + u32 work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) BUG_ON(regs != task_pt_regs(current)); @@ -211,13 +218,6 @@ long syscall_trace_enter(struct pt_regs *regs) return syscall_trace_enter_phase2(regs, arch, phase1_result); } -static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs) -{ - unsigned long top_of_stack = - (unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING; - return (struct thread_info *)(top_of_stack - THREAD_SIZE); -} - /* Called with IRQs disabled. */ __visible void prepare_exit_to_usermode(struct pt_regs *regs) { -- cgit v1.2.3 From 39b48e575e92e31251b74b4b48cea2129cee90bd Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 5 Oct 2015 17:48:23 -0700 Subject: x86/entry: Split and inline prepare_exit_to_usermode() GCC is unable to properly optimize functions that have a very short likely case and a longer and register-heavier cold part -- it fails to sink all of the register saving and stack frame setup code into the unlikely part. Help it out with prepare_exit_to_usermode() by splitting it into two parts and inline the hot part. Saves 6-8 cycles for compat syscalls. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/9fc53eda4a5b924070952f12fa4ae3e477640a07.1444091585.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/common.c | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) (limited to 'arch/x86/entry/common.c') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index d0874210d5b5..66ccbd664d4c 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -218,14 +218,12 @@ long syscall_trace_enter(struct pt_regs *regs) return syscall_trace_enter_phase2(regs, arch, phase1_result); } -/* Called with IRQs disabled. */ -__visible void prepare_exit_to_usermode(struct pt_regs *regs) -{ - if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled())) - local_irq_disable(); - - lockdep_sys_exit(); +#define EXIT_TO_USERMODE_LOOP_FLAGS \ + (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ + _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY) +static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) +{ /* * In order to return to user mode, we need to have IRQs off with * none of _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_USER_RETURN_NOTIFY, @@ -235,14 +233,6 @@ __visible void prepare_exit_to_usermode(struct pt_regs *regs) * work to clear some of the flags can sleep. */ while (true) { - u32 cached_flags = - READ_ONCE(pt_regs_to_thread_info(regs)->flags); - - if (!(cached_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | - _TIF_UPROBE | _TIF_NEED_RESCHED | - _TIF_USER_RETURN_NOTIFY))) - break; - /* We have work to do. */ local_irq_enable(); @@ -266,7 +256,30 @@ __visible void prepare_exit_to_usermode(struct pt_regs *regs) /* Disable IRQs and retry */ local_irq_disable(); + + cached_flags = READ_ONCE(pt_regs_to_thread_info(regs)->flags); + + if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) + break; + } +} + +/* Called with IRQs disabled. */ +__visible inline void prepare_exit_to_usermode(struct pt_regs *regs) +{ + u32 cached_flags; + + if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled())) + local_irq_disable(); + + lockdep_sys_exit(); + + cached_flags = + READ_ONCE(pt_regs_to_thread_info(regs)->flags); + + if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) + exit_to_usermode_loop(regs, cached_flags); user_enter(); } -- cgit v1.2.3 From f5e6a9753ac2965564a14e6285a06f44043ed9c8 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 5 Oct 2015 17:48:24 -0700 Subject: x86/entry: Split and inline syscall_return_slowpath() GCC is unable to properly optimize functions that have a very short likely case and a longer and register-heavier cold part -- it fails to sink all of the register saving and stack frame setup code into the unlikely part. Help it out with syscall_return_slowpath() by splitting it into two parts and inline the hot part. Saves 6 cycles for compat syscalls. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/0f773a894ab15c589ac794c2d34ca6ba9b5335c9.1444091585.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/common.c | 50 ++++++++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 21 deletions(-) (limited to 'arch/x86/entry/common.c') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 66ccbd664d4c..b53e04d301a3 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -284,15 +284,40 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) user_enter(); } +#define SYSCALL_EXIT_WORK_FLAGS \ + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ + _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT) + +static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags) +{ + bool step; + + audit_syscall_exit(regs); + + if (cached_flags & _TIF_SYSCALL_TRACEPOINT) + trace_sys_exit(regs, regs->ax); + + /* + * If TIF_SYSCALL_EMU is set, we only get here because of + * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). + * We already reported this syscall instruction in + * syscall_trace_enter(). + */ + step = unlikely( + (cached_flags & (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU)) + == _TIF_SINGLESTEP); + if (step || cached_flags & _TIF_SYSCALL_TRACE) + tracehook_report_syscall_exit(regs, step); +} + /* * Called with IRQs on and fully valid regs. Returns with IRQs off in a * state such that we can immediately switch to user mode. */ -__visible void syscall_return_slowpath(struct pt_regs *regs) +__visible inline void syscall_return_slowpath(struct pt_regs *regs) { struct thread_info *ti = pt_regs_to_thread_info(regs); u32 cached_flags = READ_ONCE(ti->flags); - bool step; CT_WARN_ON(ct_state() != CONTEXT_KERNEL); @@ -304,25 +329,8 @@ __visible void syscall_return_slowpath(struct pt_regs *regs) * First do one-time work. If these work items are enabled, we * want to run them exactly once per syscall exit with IRQs on. */ - if (cached_flags & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | - _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)) { - audit_syscall_exit(regs); - - if (cached_flags & _TIF_SYSCALL_TRACEPOINT) - trace_sys_exit(regs, regs->ax); - - /* - * If TIF_SYSCALL_EMU is set, we only get here because of - * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). - * We already reported this syscall instruction in - * syscall_trace_enter(). - */ - step = unlikely( - (cached_flags & (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU)) - == _TIF_SINGLESTEP); - if (step || cached_flags & _TIF_SYSCALL_TRACE) - tracehook_report_syscall_exit(regs, step); - } + if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS)) + syscall_slow_exit_work(regs, cached_flags); #ifdef CONFIG_COMPAT /* -- cgit v1.2.3 From 657c1eea0019e80685a84cbb1919794243a187c9 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 16 Oct 2015 15:42:54 -0700 Subject: x86/entry/32: Fix entry_INT80_32() to expect interrupts to be on When I rewrote entry_INT80_32, I thought that int80 was an interrupt gate. It's a trap gate. *facepalm* Thanks to Brian Gerst for pointing out that it's better to change the entry code than to change the gate type. Suggested-by: Brian Gerst Reported-and-tested-by: Borislav Petkov Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 150ac78d63af ("x86/entry/32: Switch INT80 to the new C syscall path") Link: http://lkml.kernel.org/r/dc09d9b574a5c1dcca996847875c73f8341ce0ad.1445035014.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/common.c | 15 ++++++++++++--- arch/x86/entry/entry_32.S | 8 ++++---- arch/x86/entry/entry_64_compat.S | 2 +- 3 files changed, 17 insertions(+), 8 deletions(-) (limited to 'arch/x86/entry/common.c') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index b53e04d301a3..a89fdbc1f0be 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -351,7 +351,14 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs) * in workloads that use it, and it's usually called from * do_fast_syscall_32, so forcibly inline it to improve performance. */ -static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) +#ifdef CONFIG_X86_32 +/* 32-bit kernels use a trap gate for INT80, and the asm code calls here. */ +__visible +#else +/* 64-bit kernels use do_syscall_32_irqs_off() instead. */ +static +#endif +__always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) { struct thread_info *ti = pt_regs_to_thread_info(regs); unsigned int nr = (unsigned int)regs->orig_ax; @@ -386,12 +393,14 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) syscall_return_slowpath(regs); } -/* Handles int $0x80 */ -__visible void do_int80_syscall_32(struct pt_regs *regs) +#ifdef CONFIG_X86_64 +/* Handles INT80 on 64-bit kernels */ +__visible void do_syscall_32_irqs_off(struct pt_regs *regs) { local_irq_enable(); do_syscall_32_irqs_on(regs); } +#endif /* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */ __visible long do_fast_syscall_32(struct pt_regs *regs) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index c1c7c6364216..4f97f49261d3 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -345,13 +345,13 @@ ENTRY(entry_INT80_32) SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ /* - * User mode is traced as though IRQs are on, and the interrupt gate - * turned them off. + * User mode is traced as though IRQs are on. Unlike the 64-bit + * case, INT80 is a trap gate on 32-bit kernels, so interrupts + * are already on (unless user code is messing around with iopl). */ - TRACE_IRQS_OFF movl %esp, %eax - call do_int80_syscall_32 + call do_syscall_32_irqs_on .Lsyscall_32_done: restore_all: diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 92b0b27b43c6..c3201830a85e 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -303,7 +303,7 @@ ENTRY(entry_INT80_compat) TRACE_IRQS_OFF movq %rsp, %rdi - call do_int80_syscall_32 + call do_syscall_32_irqs_off .Lsyscall_32_done: /* Go back to user mode. */ -- cgit v1.2.3