From 3411506550b1f714a52b5db087666c08658d2698 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 12 Nov 2021 08:19:50 -0800 Subject: x86/csum: Rewrite/optimize csum_partial() With more NICs supporting CHECKSUM_COMPLETE, and IPv6 being widely used csum_partial() is heavily used with small amount of bytes, and is consuming many cycles. IPv6 header size, for instance, is 40 bytes. Another thing to consider is that NET_IP_ALIGN is 0 on x86, meaning that network headers are not word-aligned, unless the driver forces this. This means that csum_partial() fetches one u16 to 'align the buffer', then performs three u64 additions with carry in a loop, then a remaining u32, then a remaining u16. With this new version, it performs a loop only for the 64 bytes blocks, then the remaining is bisected. Testing on various CPUs, all of them show a big reduction in csum_partial() cost (by 50 to 80 %) Before: 4.16% [kernel] [k] csum_partial After: 0.83% [kernel] [k] csum_partial If run in a loop 1,000,000 times: Before: 26,922,913 cycles # 3846130.429 GHz 80,302,961 instructions # 2.98 insn per cycle 21,059,816 branches # 3008545142.857 M/sec 2,896 branch-misses # 0.01% of all branches After: 17,960,709 cycles # 3592141.800 GHz 41,292,805 instructions # 2.30 insn per cycle 11,058,119 branches # 2211623800.000 M/sec 2,997 branch-misses # 0.03% of all branches [ bp: Massage, merge in subsequent fixes into a single patch: - um compilation error due to missing load_unaligned_zeropad(): - Reported-by: kernel test robot - Link: https://lkml.kernel.org/r/20211118175239.1525650-1-eric.dumazet@gmail.com - Fix initial seed for odd buffers - Reported-by: Noah Goldstein - Link: https://lkml.kernel.org/r/20211125141817.3541501-1-eric.dumazet@gmail.com ] Signed-off-by: Eric Dumazet Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Alexander Duyck Link: https://lore.kernel.org/r/20211112161950.528886-1-eric.dumazet@gmail.com --- arch/x86/lib/csum-partial_64.c | 183 ++++++++++++++++++++--------------------- 1 file changed, 91 insertions(+), 92 deletions(-) diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c index e7925d668b68..1f8a8f895173 100644 --- a/arch/x86/lib/csum-partial_64.c +++ b/arch/x86/lib/csum-partial_64.c @@ -9,6 +9,7 @@ #include #include #include +#include static inline unsigned short from32to16(unsigned a) { @@ -21,120 +22,119 @@ static inline unsigned short from32to16(unsigned a) } /* - * Do a 64-bit checksum on an arbitrary memory area. + * Do a checksum on an arbitrary memory area. * Returns a 32bit checksum. * * This isn't as time critical as it used to be because many NICs * do hardware checksumming these days. - * - * Things tried and found to not make it faster: - * Manual Prefetching - * Unrolling to an 128 bytes inner loop. - * Using interleaving with more registers to break the carry chains. + * + * Still, with CHECKSUM_COMPLETE this is called to compute + * checksums on IPv6 headers (40 bytes) and other small parts. + * it's best to have buff aligned on a 64-bit boundary */ -static unsigned do_csum(const unsigned char *buff, unsigned len) +__wsum csum_partial(const void *buff, int len, __wsum sum) { - unsigned odd, count; - unsigned long result = 0; + u64 temp64 = (__force u64)sum; + unsigned odd, result; - if (unlikely(len == 0)) - return result; odd = 1 & (unsigned long) buff; if (unlikely(odd)) { - result = *buff << 8; + if (unlikely(len == 0)) + return sum; + temp64 = ror32((__force u32)sum, 8); + temp64 += (*(unsigned char *)buff << 8); len--; buff++; } - count = len >> 1; /* nr of 16-bit words.. */ - if (count) { - if (2 & (unsigned long) buff) { - result += *(unsigned short *)buff; - count--; - len -= 2; - buff += 2; - } - count >>= 1; /* nr of 32-bit words.. */ - if (count) { - unsigned long zero; - unsigned count64; - if (4 & (unsigned long) buff) { - result += *(unsigned int *) buff; - count--; - len -= 4; - buff += 4; - } - count >>= 1; /* nr of 64-bit words.. */ - /* main loop using 64byte blocks */ - zero = 0; - count64 = count >> 3; - while (count64) { - asm("addq 0*8(%[src]),%[res]\n\t" - "adcq 1*8(%[src]),%[res]\n\t" - "adcq 2*8(%[src]),%[res]\n\t" - "adcq 3*8(%[src]),%[res]\n\t" - "adcq 4*8(%[src]),%[res]\n\t" - "adcq 5*8(%[src]),%[res]\n\t" - "adcq 6*8(%[src]),%[res]\n\t" - "adcq 7*8(%[src]),%[res]\n\t" - "adcq %[zero],%[res]" - : [res] "=r" (result) - : [src] "r" (buff), [zero] "r" (zero), - "[res]" (result)); - buff += 64; - count64--; - } + while (unlikely(len >= 64)) { + asm("addq 0*8(%[src]),%[res]\n\t" + "adcq 1*8(%[src]),%[res]\n\t" + "adcq 2*8(%[src]),%[res]\n\t" + "adcq 3*8(%[src]),%[res]\n\t" + "adcq 4*8(%[src]),%[res]\n\t" + "adcq 5*8(%[src]),%[res]\n\t" + "adcq 6*8(%[src]),%[res]\n\t" + "adcq 7*8(%[src]),%[res]\n\t" + "adcq $0,%[res]" + : [res] "+r" (temp64) + : [src] "r" (buff) + : "memory"); + buff += 64; + len -= 64; + } + + if (len & 32) { + asm("addq 0*8(%[src]),%[res]\n\t" + "adcq 1*8(%[src]),%[res]\n\t" + "adcq 2*8(%[src]),%[res]\n\t" + "adcq 3*8(%[src]),%[res]\n\t" + "adcq $0,%[res]" + : [res] "+r" (temp64) + : [src] "r" (buff) + : "memory"); + buff += 32; + } + if (len & 16) { + asm("addq 0*8(%[src]),%[res]\n\t" + "adcq 1*8(%[src]),%[res]\n\t" + "adcq $0,%[res]" + : [res] "+r" (temp64) + : [src] "r" (buff) + : "memory"); + buff += 16; + } + if (len & 8) { + asm("addq 0*8(%[src]),%[res]\n\t" + "adcq $0,%[res]" + : [res] "+r" (temp64) + : [src] "r" (buff) + : "memory"); + buff += 8; + } + if (len & 7) { +#ifdef CONFIG_DCACHE_WORD_ACCESS + unsigned int shift = (8 - (len & 7)) * 8; + unsigned long trail; - /* last up to 7 8byte blocks */ - count %= 8; - while (count) { - asm("addq %1,%0\n\t" - "adcq %2,%0\n" - : "=r" (result) - : "m" (*(unsigned long *)buff), - "r" (zero), "0" (result)); - --count; - buff += 8; - } - result = add32_with_carry(result>>32, - result&0xffffffff); + trail = (load_unaligned_zeropad(buff) << shift) >> shift; - if (len & 4) { - result += *(unsigned int *) buff; - buff += 4; - } + asm("addq %[trail],%[res]\n\t" + "adcq $0,%[res]" + : [res] "+r" (temp64) + : [trail] "r" (trail)); +#else + if (len & 4) { + asm("addq %[val],%[res]\n\t" + "adcq $0,%[res]" + : [res] "+r" (temp64) + : [val] "r" ((u64)*(u32 *)buff) + : "memory"); + buff += 4; } if (len & 2) { - result += *(unsigned short *) buff; + asm("addq %[val],%[res]\n\t" + "adcq $0,%[res]" + : [res] "+r" (temp64) + : [val] "r" ((u64)*(u16 *)buff) + : "memory"); buff += 2; } + if (len & 1) { + asm("addq %[val],%[res]\n\t" + "adcq $0,%[res]" + : [res] "+r" (temp64) + : [val] "r" ((u64)*(u8 *)buff) + : "memory"); + } +#endif } - if (len & 1) - result += *buff; - result = add32_with_carry(result>>32, result & 0xffffffff); - if (unlikely(odd)) { + result = add32_with_carry(temp64 >> 32, temp64 & 0xffffffff); + if (unlikely(odd)) { result = from32to16(result); result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); } - return result; -} - -/* - * computes the checksum of a memory block at buff, length len, - * and adds in "sum" (32-bit) - * - * returns a 32-bit number suitable for feeding into itself - * or csum_tcpudp_magic - * - * this function must be called with even lengths, except - * for the last fragment, which may be odd - * - * it's best to have buff aligned on a 64-bit boundary - */ -__wsum csum_partial(const void *buff, int len, __wsum sum) -{ - return (__force __wsum)add32_with_carry(do_csum(buff, len), - (__force u32)sum); + return (__force __wsum)result; } EXPORT_SYMBOL(csum_partial); @@ -147,4 +147,3 @@ __sum16 ip_compute_csum(const void *buff, int len) return csum_fold(csum_partial(buff,len,0)); } EXPORT_SYMBOL(ip_compute_csum); - -- cgit v1.2.3 From b2f825bfeda884f9d40386cc7d089d023017d2dd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 19 Nov 2021 17:50:24 +0100 Subject: x86: Move RETPOLINE*_CFLAGS to arch Makefile Currently, RETPOLINE*_CFLAGS are defined in the top-level Makefile but only x86 makes use of them. Move them there. If ever another architecture finds the need, it can be reconsidered. [ bp: Massage a bit. ] Suggested-by: Nick Desaulniers Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Kees Cook Reviewed-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20211119165630.219152765@infradead.org --- Makefile | 11 ----------- arch/x86/Makefile | 11 +++++++++++ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index 8e35d7804fef..b9ee63d6660c 100644 --- a/Makefile +++ b/Makefile @@ -688,17 +688,6 @@ ifdef CONFIG_FUNCTION_TRACER CC_FLAGS_FTRACE := -pg endif -ifdef CONFIG_CC_IS_GCC -RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) -RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register) -endif -ifdef CONFIG_CC_IS_CLANG -RETPOLINE_CFLAGS := -mretpoline-external-thunk -RETPOLINE_VDSO_CFLAGS := -mretpoline -endif -export RETPOLINE_CFLAGS -export RETPOLINE_VDSO_CFLAGS - include $(srctree)/arch/$(SRCARCH)/Makefile ifdef need-config diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 42243869216d..2f40de5d82a2 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -12,6 +12,17 @@ else KBUILD_DEFCONFIG := $(ARCH)_defconfig endif +ifdef CONFIG_CC_IS_GCC +RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) +RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register) +endif +ifdef CONFIG_CC_IS_CLANG +RETPOLINE_CFLAGS := -mretpoline-external-thunk +RETPOLINE_VDSO_CFLAGS := -mretpoline +endif +export RETPOLINE_CFLAGS +export RETPOLINE_VDSO_CFLAGS + # For gcc stack alignment is specified with -mpreferred-stack-boundary, # clang has the option -mstack-alignment for that purpose. ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) -- cgit v1.2.3 From 68cf4f2a72ef8786e6b7af6fd9a89f27ac0f520d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 19 Nov 2021 17:50:25 +0100 Subject: x86: Use -mindirect-branch-cs-prefix for RETPOLINE builds In order to further enable commit: bbe2df3f6b6d ("x86/alternative: Try inline spectre_v2=retpoline,amd") add the new GCC flag -mindirect-branch-cs-prefix: https://gcc.gnu.org/g:2196a681d7810ad8b227bf983f38ba716620545e https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102952 https://bugs.llvm.org/show_bug.cgi?id=52323 to RETPOLINE=y builds. This should allow fully inlining retpoline,amd for GCC builds. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Kees Cook Acked-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20211119165630.276205624@infradead.org --- arch/x86/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 2f40de5d82a2..c38b6577c103 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -14,6 +14,7 @@ endif ifdef CONFIG_CC_IS_GCC RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) +RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch-cs-prefix) RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register) endif ifdef CONFIG_CC_IS_CLANG -- cgit v1.2.3 From 22da5a07c75e1104caf6a42f189c97b83d070073 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 4 Dec 2021 14:43:39 +0100 Subject: x86/lib/atomic64_386_32: Rename things Principally, in order to get rid of #define RET in this code to make place for a new RET, but also to clarify the code, rename a bunch of things: s/UNLOCK/IRQ_RESTORE/ s/LOCK/IRQ_SAVE/ s/BEGIN/BEGIN_IRQ_SAVE/ s/\/RET_IRQ_RESTORE/ s/RET_ENDP/\tRET_IRQ_RESTORE\rENDP/ which then leaves RET unused so it can be removed. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211204134907.841623970@infradead.org --- arch/x86/lib/atomic64_386_32.S | 84 +++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 38 deletions(-) diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S index 16bc9130e7a5..4ad6b97fdb6f 100644 --- a/arch/x86/lib/atomic64_386_32.S +++ b/arch/x86/lib/atomic64_386_32.S @@ -9,81 +9,83 @@ #include /* if you want SMP support, implement these with real spinlocks */ -.macro LOCK reg +.macro IRQ_SAVE reg pushfl cli .endm -.macro UNLOCK reg +.macro IRQ_RESTORE reg popfl .endm -#define BEGIN(op) \ +#define BEGIN_IRQ_SAVE(op) \ .macro endp; \ SYM_FUNC_END(atomic64_##op##_386); \ .purgem endp; \ .endm; \ SYM_FUNC_START(atomic64_##op##_386); \ - LOCK v; + IRQ_SAVE v; #define ENDP endp -#define RET \ - UNLOCK v; \ +#define RET_IRQ_RESTORE \ + IRQ_RESTORE v; \ ret -#define RET_ENDP \ - RET; \ - ENDP - #define v %ecx -BEGIN(read) +BEGIN_IRQ_SAVE(read) movl (v), %eax movl 4(v), %edx -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v #define v %esi -BEGIN(set) +BEGIN_IRQ_SAVE(set) movl %ebx, (v) movl %ecx, 4(v) -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v #define v %esi -BEGIN(xchg) +BEGIN_IRQ_SAVE(xchg) movl (v), %eax movl 4(v), %edx movl %ebx, (v) movl %ecx, 4(v) -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v #define v %ecx -BEGIN(add) +BEGIN_IRQ_SAVE(add) addl %eax, (v) adcl %edx, 4(v) -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v #define v %ecx -BEGIN(add_return) +BEGIN_IRQ_SAVE(add_return) addl (v), %eax adcl 4(v), %edx movl %eax, (v) movl %edx, 4(v) -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v #define v %ecx -BEGIN(sub) +BEGIN_IRQ_SAVE(sub) subl %eax, (v) sbbl %edx, 4(v) -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v #define v %ecx -BEGIN(sub_return) +BEGIN_IRQ_SAVE(sub_return) negl %edx negl %eax sbbl $0, %edx @@ -91,47 +93,52 @@ BEGIN(sub_return) adcl 4(v), %edx movl %eax, (v) movl %edx, 4(v) -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v #define v %esi -BEGIN(inc) +BEGIN_IRQ_SAVE(inc) addl $1, (v) adcl $0, 4(v) -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v #define v %esi -BEGIN(inc_return) +BEGIN_IRQ_SAVE(inc_return) movl (v), %eax movl 4(v), %edx addl $1, %eax adcl $0, %edx movl %eax, (v) movl %edx, 4(v) -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v #define v %esi -BEGIN(dec) +BEGIN_IRQ_SAVE(dec) subl $1, (v) sbbl $0, 4(v) -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v #define v %esi -BEGIN(dec_return) +BEGIN_IRQ_SAVE(dec_return) movl (v), %eax movl 4(v), %edx subl $1, %eax sbbl $0, %edx movl %eax, (v) movl %edx, 4(v) -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v #define v %esi -BEGIN(add_unless) +BEGIN_IRQ_SAVE(add_unless) addl %eax, %ecx adcl %edx, %edi addl (v), %eax @@ -143,7 +150,7 @@ BEGIN(add_unless) movl %edx, 4(v) movl $1, %eax 2: - RET + RET_IRQ_RESTORE 3: cmpl %edx, %edi jne 1b @@ -153,7 +160,7 @@ ENDP #undef v #define v %esi -BEGIN(inc_not_zero) +BEGIN_IRQ_SAVE(inc_not_zero) movl (v), %eax movl 4(v), %edx testl %eax, %eax @@ -165,7 +172,7 @@ BEGIN(inc_not_zero) movl %edx, 4(v) movl $1, %eax 2: - RET + RET_IRQ_RESTORE 3: testl %edx, %edx jne 1b @@ -174,7 +181,7 @@ ENDP #undef v #define v %esi -BEGIN(dec_if_positive) +BEGIN_IRQ_SAVE(dec_if_positive) movl (v), %eax movl 4(v), %edx subl $1, %eax @@ -183,5 +190,6 @@ BEGIN(dec_if_positive) movl %eax, (v) movl %edx, 4(v) 1: -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v -- cgit v1.2.3 From f94909ceb1ed4bfdb2ada72f93236305e6d6951f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 4 Dec 2021 14:43:40 +0100 Subject: x86: Prepare asm files for straight-line-speculation Replace all ret/retq instructions with RET in preparation of making RET a macro. Since AS is case insensitive it's a big no-op without RET defined. find arch/x86/ -name \*.S | while read file do sed -i 's/\/RET/' $file done Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211204134907.905503893@infradead.org --- arch/x86/boot/compressed/efi_thunk_64.S | 2 +- arch/x86/boot/compressed/head_64.S | 8 ++-- arch/x86/boot/compressed/mem_encrypt.S | 6 +-- arch/x86/crypto/aegis128-aesni-asm.S | 48 ++++++++++++------------ arch/x86/crypto/aes_ctrby8_avx-x86_64.S | 2 +- arch/x86/crypto/aesni-intel_asm.S | 56 ++++++++++++++-------------- arch/x86/crypto/aesni-intel_avx-x86_64.S | 40 ++++++++++---------- arch/x86/crypto/blake2s-core.S | 4 +- arch/x86/crypto/blowfish-x86_64-asm_64.S | 12 +++--- arch/x86/crypto/camellia-aesni-avx-asm_64.S | 14 +++---- arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 14 +++---- arch/x86/crypto/camellia-x86_64-asm_64.S | 12 +++--- arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 12 +++--- arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 10 ++--- arch/x86/crypto/chacha-avx2-x86_64.S | 6 +-- arch/x86/crypto/chacha-avx512vl-x86_64.S | 6 +-- arch/x86/crypto/chacha-ssse3-x86_64.S | 8 ++-- arch/x86/crypto/crc32-pclmul_asm.S | 2 +- arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 2 +- arch/x86/crypto/crct10dif-pcl-asm_64.S | 2 +- arch/x86/crypto/des3_ede-asm_64.S | 4 +- arch/x86/crypto/ghash-clmulni-intel_asm.S | 6 +-- arch/x86/crypto/nh-avx2-x86_64.S | 2 +- arch/x86/crypto/nh-sse2-x86_64.S | 2 +- arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 10 ++--- arch/x86/crypto/serpent-avx2-asm_64.S | 10 ++--- arch/x86/crypto/serpent-sse2-i586-asm_32.S | 6 +-- arch/x86/crypto/serpent-sse2-x86_64-asm_64.S | 6 +-- arch/x86/crypto/sha1_avx2_x86_64_asm.S | 2 +- arch/x86/crypto/sha1_ni_asm.S | 2 +- arch/x86/crypto/sha1_ssse3_asm.S | 2 +- arch/x86/crypto/sha256-avx-asm.S | 2 +- arch/x86/crypto/sha256-avx2-asm.S | 2 +- arch/x86/crypto/sha256-ssse3-asm.S | 2 +- arch/x86/crypto/sha256_ni_asm.S | 2 +- arch/x86/crypto/sha512-avx-asm.S | 2 +- arch/x86/crypto/sha512-avx2-asm.S | 2 +- arch/x86/crypto/sha512-ssse3-asm.S | 2 +- arch/x86/crypto/sm4-aesni-avx-asm_64.S | 12 +++--- arch/x86/crypto/sm4-aesni-avx2-asm_64.S | 8 ++-- arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 10 ++--- arch/x86/crypto/twofish-i586-asm_32.S | 4 +- arch/x86/crypto/twofish-x86_64-asm_64-3way.S | 6 +-- arch/x86/crypto/twofish-x86_64-asm_64.S | 4 +- arch/x86/entry/entry_32.S | 2 +- arch/x86/entry/entry_64.S | 10 ++--- arch/x86/entry/thunk_32.S | 2 +- arch/x86/entry/thunk_64.S | 2 +- arch/x86/entry/vdso/vdso32/system_call.S | 2 +- arch/x86/entry/vdso/vsgx.S | 2 +- arch/x86/entry/vsyscall/vsyscall_emu_64.S | 6 +-- arch/x86/kernel/acpi/wakeup_32.S | 6 +-- arch/x86/kernel/ftrace_32.S | 6 +-- arch/x86/kernel/ftrace_64.S | 8 ++-- arch/x86/kernel/head_32.S | 2 +- arch/x86/kernel/irqflags.S | 2 +- arch/x86/kernel/relocate_kernel_32.S | 10 ++--- arch/x86/kernel/relocate_kernel_64.S | 10 ++--- arch/x86/kernel/sev_verify_cbit.S | 2 +- arch/x86/kernel/verify_cpu.S | 4 +- arch/x86/kvm/svm/vmenter.S | 4 +- arch/x86/kvm/vmx/vmenter.S | 14 +++---- arch/x86/lib/atomic64_386_32.S | 2 +- arch/x86/lib/atomic64_cx8_32.S | 16 ++++---- arch/x86/lib/checksum_32.S | 8 ++-- arch/x86/lib/clear_page_64.S | 6 +-- arch/x86/lib/cmpxchg16b_emu.S | 4 +- arch/x86/lib/cmpxchg8b_emu.S | 4 +- arch/x86/lib/copy_mc_64.S | 6 +-- arch/x86/lib/copy_page_64.S | 4 +- arch/x86/lib/copy_user_64.S | 10 ++--- arch/x86/lib/csum-copy_64.S | 2 +- arch/x86/lib/getuser.S | 22 +++++------ arch/x86/lib/hweight.S | 6 +-- arch/x86/lib/iomap_copy_64.S | 2 +- arch/x86/lib/memcpy_64.S | 12 +++--- arch/x86/lib/memmove_64.S | 4 +- arch/x86/lib/memset_64.S | 6 +-- arch/x86/lib/msr-reg.S | 4 +- arch/x86/lib/putuser.S | 6 +-- arch/x86/lib/retpoline.S | 2 +- arch/x86/math-emu/div_Xsig.S | 2 +- arch/x86/math-emu/div_small.S | 2 +- arch/x86/math-emu/mul_Xsig.S | 6 +-- arch/x86/math-emu/polynom_Xsig.S | 2 +- arch/x86/math-emu/reg_norm.S | 6 +-- arch/x86/math-emu/reg_round.S | 2 +- arch/x86/math-emu/reg_u_add.S | 2 +- arch/x86/math-emu/reg_u_div.S | 2 +- arch/x86/math-emu/reg_u_mul.S | 2 +- arch/x86/math-emu/reg_u_sub.S | 2 +- arch/x86/math-emu/round_Xsig.S | 4 +- arch/x86/math-emu/shr_Xsig.S | 8 ++-- arch/x86/math-emu/wm_shrx.S | 16 ++++---- arch/x86/mm/mem_encrypt_boot.S | 4 +- arch/x86/platform/efi/efi_stub_32.S | 2 +- arch/x86/platform/efi/efi_stub_64.S | 2 +- arch/x86/platform/efi/efi_thunk_64.S | 2 +- arch/x86/platform/olpc/xo1-wakeup.S | 6 +-- arch/x86/power/hibernate_asm_32.S | 4 +- arch/x86/power/hibernate_asm_64.S | 4 +- arch/x86/um/checksum_32.S | 4 +- arch/x86/um/setjmp_32.S | 2 +- arch/x86/um/setjmp_64.S | 2 +- arch/x86/xen/xen-asm.S | 12 +++--- arch/x86/xen/xen-head.S | 2 +- 106 files changed, 349 insertions(+), 349 deletions(-) diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S index 8bb92e9f4e97..70052779b235 100644 --- a/arch/x86/boot/compressed/efi_thunk_64.S +++ b/arch/x86/boot/compressed/efi_thunk_64.S @@ -93,7 +93,7 @@ SYM_FUNC_START(__efi64_thunk) pop %rbx pop %rbp - ret + RET SYM_FUNC_END(__efi64_thunk) .code32 diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 572c535cf45b..fd9441f40457 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -813,7 +813,7 @@ SYM_FUNC_START(efi32_pe_entry) 2: popl %edi // restore callee-save registers popl %ebx leave - ret + RET SYM_FUNC_END(efi32_pe_entry) .section ".rodata" @@ -868,7 +868,7 @@ SYM_FUNC_START(startup32_set_idt_entry) pop %ecx pop %ebx - ret + RET SYM_FUNC_END(startup32_set_idt_entry) #endif @@ -884,7 +884,7 @@ SYM_FUNC_START(startup32_load_idt) movl %eax, rva(boot32_idt_desc+2)(%ebp) lidt rva(boot32_idt_desc)(%ebp) #endif - ret + RET SYM_FUNC_END(startup32_load_idt) /* @@ -954,7 +954,7 @@ SYM_FUNC_START(startup32_check_sev_cbit) popl %ebx popl %eax #endif - ret + RET SYM_FUNC_END(startup32_check_sev_cbit) /* diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index c1e81a848b2a..a63424d13627 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -58,7 +58,7 @@ SYM_FUNC_START(get_sev_encryption_bit) #endif /* CONFIG_AMD_MEM_ENCRYPT */ - ret + RET SYM_FUNC_END(get_sev_encryption_bit) /** @@ -92,7 +92,7 @@ SYM_CODE_START_LOCAL(sev_es_req_cpuid) /* All good - return success */ xorl %eax, %eax 1: - ret + RET 2: movl $-1, %eax jmp 1b @@ -221,7 +221,7 @@ SYM_FUNC_START(set_sev_encryption_mask) #endif xor %rax, %rax - ret + RET SYM_FUNC_END(set_sev_encryption_mask) .data diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S index 51d46d93efbc..b48ddebb4748 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -122,7 +122,7 @@ SYM_FUNC_START_LOCAL(__load_partial) pxor T0, MSG .Lld_partial_8: - ret + RET SYM_FUNC_END(__load_partial) /* @@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(__store_partial) mov %r10b, (%r9) .Lst_partial_1: - ret + RET SYM_FUNC_END(__store_partial) /* @@ -225,7 +225,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_init) movdqu STATE4, 0x40(STATEP) FRAME_END - ret + RET SYM_FUNC_END(crypto_aegis128_aesni_init) /* @@ -337,7 +337,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) movdqu STATE3, 0x30(STATEP) movdqu STATE4, 0x40(STATEP) FRAME_END - ret + RET .Lad_out_1: movdqu STATE4, 0x00(STATEP) @@ -346,7 +346,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) movdqu STATE2, 0x30(STATEP) movdqu STATE3, 0x40(STATEP) FRAME_END - ret + RET .Lad_out_2: movdqu STATE3, 0x00(STATEP) @@ -355,7 +355,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) movdqu STATE1, 0x30(STATEP) movdqu STATE2, 0x40(STATEP) FRAME_END - ret + RET .Lad_out_3: movdqu STATE2, 0x00(STATEP) @@ -364,7 +364,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) movdqu STATE0, 0x30(STATEP) movdqu STATE1, 0x40(STATEP) FRAME_END - ret + RET .Lad_out_4: movdqu STATE1, 0x00(STATEP) @@ -373,11 +373,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) movdqu STATE4, 0x30(STATEP) movdqu STATE0, 0x40(STATEP) FRAME_END - ret + RET .Lad_out: FRAME_END - ret + RET SYM_FUNC_END(crypto_aegis128_aesni_ad) .macro encrypt_block a s0 s1 s2 s3 s4 i @@ -452,7 +452,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc) movdqu STATE2, 0x30(STATEP) movdqu STATE3, 0x40(STATEP) FRAME_END - ret + RET .Lenc_out_1: movdqu STATE3, 0x00(STATEP) @@ -461,7 +461,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc) movdqu STATE1, 0x30(STATEP) movdqu STATE2, 0x40(STATEP) FRAME_END - ret + RET .Lenc_out_2: movdqu STATE2, 0x00(STATEP) @@ -470,7 +470,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc) movdqu STATE0, 0x30(STATEP) movdqu STATE1, 0x40(STATEP) FRAME_END - ret + RET .Lenc_out_3: movdqu STATE1, 0x00(STATEP) @@ -479,7 +479,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc) movdqu STATE4, 0x30(STATEP) movdqu STATE0, 0x40(STATEP) FRAME_END - ret + RET .Lenc_out_4: movdqu STATE0, 0x00(STATEP) @@ -488,11 +488,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc) movdqu STATE3, 0x30(STATEP) movdqu STATE4, 0x40(STATEP) FRAME_END - ret + RET .Lenc_out: FRAME_END - ret + RET SYM_FUNC_END(crypto_aegis128_aesni_enc) /* @@ -532,7 +532,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc_tail) movdqu STATE3, 0x40(STATEP) FRAME_END - ret + RET SYM_FUNC_END(crypto_aegis128_aesni_enc_tail) .macro decrypt_block a s0 s1 s2 s3 s4 i @@ -606,7 +606,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec) movdqu STATE2, 0x30(STATEP) movdqu STATE3, 0x40(STATEP) FRAME_END - ret + RET .Ldec_out_1: movdqu STATE3, 0x00(STATEP) @@ -615,7 +615,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec) movdqu STATE1, 0x30(STATEP) movdqu STATE2, 0x40(STATEP) FRAME_END - ret + RET .Ldec_out_2: movdqu STATE2, 0x00(STATEP) @@ -624,7 +624,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec) movdqu STATE0, 0x30(STATEP) movdqu STATE1, 0x40(STATEP) FRAME_END - ret + RET .Ldec_out_3: movdqu STATE1, 0x00(STATEP) @@ -633,7 +633,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec) movdqu STATE4, 0x30(STATEP) movdqu STATE0, 0x40(STATEP) FRAME_END - ret + RET .Ldec_out_4: movdqu STATE0, 0x00(STATEP) @@ -642,11 +642,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec) movdqu STATE3, 0x30(STATEP) movdqu STATE4, 0x40(STATEP) FRAME_END - ret + RET .Ldec_out: FRAME_END - ret + RET SYM_FUNC_END(crypto_aegis128_aesni_dec) /* @@ -696,7 +696,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec_tail) movdqu STATE3, 0x40(STATEP) FRAME_END - ret + RET SYM_FUNC_END(crypto_aegis128_aesni_dec_tail) /* @@ -743,5 +743,5 @@ SYM_FUNC_START(crypto_aegis128_aesni_final) movdqu MSG, (%rsi) FRAME_END - ret + RET SYM_FUNC_END(crypto_aegis128_aesni_final) diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S index 3f0fc7dd87d7..c799838242a6 100644 --- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S +++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S @@ -525,7 +525,7 @@ ddq_add_8: /* return updated IV */ vpshufb xbyteswap, xcounter, xcounter vmovdqu xcounter, (p_iv) - ret + RET .endm /* diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 4e3972570916..363699dd7220 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -1594,7 +1594,7 @@ SYM_FUNC_START(aesni_gcm_dec) GCM_ENC_DEC dec GCM_COMPLETE arg10, arg11 FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_dec) @@ -1683,7 +1683,7 @@ SYM_FUNC_START(aesni_gcm_enc) GCM_COMPLETE arg10, arg11 FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_enc) /***************************************************************************** @@ -1701,7 +1701,7 @@ SYM_FUNC_START(aesni_gcm_init) FUNC_SAVE GCM_INIT %arg3, %arg4,%arg5, %arg6 FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_init) /***************************************************************************** @@ -1716,7 +1716,7 @@ SYM_FUNC_START(aesni_gcm_enc_update) FUNC_SAVE GCM_ENC_DEC enc FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_enc_update) /***************************************************************************** @@ -1731,7 +1731,7 @@ SYM_FUNC_START(aesni_gcm_dec_update) FUNC_SAVE GCM_ENC_DEC dec FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_dec_update) /***************************************************************************** @@ -1746,7 +1746,7 @@ SYM_FUNC_START(aesni_gcm_finalize) FUNC_SAVE GCM_COMPLETE %arg3 %arg4 FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_finalize) #endif @@ -1762,7 +1762,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256a) pxor %xmm1, %xmm0 movaps %xmm0, (TKEYP) add $0x10, TKEYP - ret + RET SYM_FUNC_END(_key_expansion_256a) SYM_FUNC_END_ALIAS(_key_expansion_128) @@ -1787,7 +1787,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_192a) shufps $0b01001110, %xmm2, %xmm1 movaps %xmm1, 0x10(TKEYP) add $0x20, TKEYP - ret + RET SYM_FUNC_END(_key_expansion_192a) SYM_FUNC_START_LOCAL(_key_expansion_192b) @@ -1806,7 +1806,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_192b) movaps %xmm0, (TKEYP) add $0x10, TKEYP - ret + RET SYM_FUNC_END(_key_expansion_192b) SYM_FUNC_START_LOCAL(_key_expansion_256b) @@ -1818,7 +1818,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256b) pxor %xmm1, %xmm2 movaps %xmm2, (TKEYP) add $0x10, TKEYP - ret + RET SYM_FUNC_END(_key_expansion_256b) /* @@ -1933,7 +1933,7 @@ SYM_FUNC_START(aesni_set_key) popl KEYP #endif FRAME_END - ret + RET SYM_FUNC_END(aesni_set_key) /* @@ -1957,7 +1957,7 @@ SYM_FUNC_START(aesni_enc) popl KEYP #endif FRAME_END - ret + RET SYM_FUNC_END(aesni_enc) /* @@ -2014,7 +2014,7 @@ SYM_FUNC_START_LOCAL(_aesni_enc1) aesenc KEY, STATE movaps 0x70(TKEYP), KEY aesenclast KEY, STATE - ret + RET SYM_FUNC_END(_aesni_enc1) /* @@ -2122,7 +2122,7 @@ SYM_FUNC_START_LOCAL(_aesni_enc4) aesenclast KEY, STATE2 aesenclast KEY, STATE3 aesenclast KEY, STATE4 - ret + RET SYM_FUNC_END(_aesni_enc4) /* @@ -2147,7 +2147,7 @@ SYM_FUNC_START(aesni_dec) popl KEYP #endif FRAME_END - ret + RET SYM_FUNC_END(aesni_dec) /* @@ -2204,7 +2204,7 @@ SYM_FUNC_START_LOCAL(_aesni_dec1) aesdec KEY, STATE movaps 0x70(TKEYP), KEY aesdeclast KEY, STATE - ret + RET SYM_FUNC_END(_aesni_dec1) /* @@ -2312,7 +2312,7 @@ SYM_FUNC_START_LOCAL(_aesni_dec4) aesdeclast KEY, STATE2 aesdeclast KEY, STATE3 aesdeclast KEY, STATE4 - ret + RET SYM_FUNC_END(_aesni_dec4) /* @@ -2372,7 +2372,7 @@ SYM_FUNC_START(aesni_ecb_enc) popl LEN #endif FRAME_END - ret + RET SYM_FUNC_END(aesni_ecb_enc) /* @@ -2433,7 +2433,7 @@ SYM_FUNC_START(aesni_ecb_dec) popl LEN #endif FRAME_END - ret + RET SYM_FUNC_END(aesni_ecb_dec) /* @@ -2477,7 +2477,7 @@ SYM_FUNC_START(aesni_cbc_enc) popl IVP #endif FRAME_END - ret + RET SYM_FUNC_END(aesni_cbc_enc) /* @@ -2570,7 +2570,7 @@ SYM_FUNC_START(aesni_cbc_dec) popl IVP #endif FRAME_END - ret + RET SYM_FUNC_END(aesni_cbc_dec) /* @@ -2627,7 +2627,7 @@ SYM_FUNC_START(aesni_cts_cbc_enc) popl IVP #endif FRAME_END - ret + RET SYM_FUNC_END(aesni_cts_cbc_enc) /* @@ -2688,7 +2688,7 @@ SYM_FUNC_START(aesni_cts_cbc_dec) popl IVP #endif FRAME_END - ret + RET SYM_FUNC_END(aesni_cts_cbc_dec) .pushsection .rodata @@ -2725,7 +2725,7 @@ SYM_FUNC_START_LOCAL(_aesni_inc_init) mov $1, TCTR_LOW movq TCTR_LOW, INC movq CTR, TCTR_LOW - ret + RET SYM_FUNC_END(_aesni_inc_init) /* @@ -2753,7 +2753,7 @@ SYM_FUNC_START_LOCAL(_aesni_inc) .Linc_low: movaps CTR, IV pshufb BSWAP_MASK, IV - ret + RET SYM_FUNC_END(_aesni_inc) /* @@ -2816,7 +2816,7 @@ SYM_FUNC_START(aesni_ctr_enc) movups IV, (IVP) .Lctr_enc_just_ret: FRAME_END - ret + RET SYM_FUNC_END(aesni_ctr_enc) #endif @@ -2932,7 +2932,7 @@ SYM_FUNC_START(aesni_xts_encrypt) popl IVP #endif FRAME_END - ret + RET .Lxts_enc_1x: add $64, LEN @@ -3092,7 +3092,7 @@ SYM_FUNC_START(aesni_xts_decrypt) popl IVP #endif FRAME_END - ret + RET .Lxts_dec_1x: add $64, LEN diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index 98e3552b6e03..0852ab573fd3 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -1767,7 +1767,7 @@ SYM_FUNC_START(aesni_gcm_init_avx_gen2) FUNC_SAVE INIT GHASH_MUL_AVX, PRECOMPUTE_AVX FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_init_avx_gen2) ############################################################################### @@ -1788,15 +1788,15 @@ SYM_FUNC_START(aesni_gcm_enc_update_avx_gen2) # must be 192 GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 11 FUNC_RESTORE - ret + RET key_128_enc_update: GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 9 FUNC_RESTORE - ret + RET key_256_enc_update: GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 13 FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_enc_update_avx_gen2) ############################################################################### @@ -1817,15 +1817,15 @@ SYM_FUNC_START(aesni_gcm_dec_update_avx_gen2) # must be 192 GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 11 FUNC_RESTORE - ret + RET key_128_dec_update: GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 9 FUNC_RESTORE - ret + RET key_256_dec_update: GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 13 FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_dec_update_avx_gen2) ############################################################################### @@ -1846,15 +1846,15 @@ SYM_FUNC_START(aesni_gcm_finalize_avx_gen2) # must be 192 GCM_COMPLETE GHASH_MUL_AVX, 11, arg3, arg4 FUNC_RESTORE - ret + RET key_128_finalize: GCM_COMPLETE GHASH_MUL_AVX, 9, arg3, arg4 FUNC_RESTORE - ret + RET key_256_finalize: GCM_COMPLETE GHASH_MUL_AVX, 13, arg3, arg4 FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_finalize_avx_gen2) ############################################################################### @@ -2735,7 +2735,7 @@ SYM_FUNC_START(aesni_gcm_init_avx_gen4) FUNC_SAVE INIT GHASH_MUL_AVX2, PRECOMPUTE_AVX2 FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_init_avx_gen4) ############################################################################### @@ -2756,15 +2756,15 @@ SYM_FUNC_START(aesni_gcm_enc_update_avx_gen4) # must be 192 GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 11 FUNC_RESTORE - ret + RET key_128_enc_update4: GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 9 FUNC_RESTORE - ret + RET key_256_enc_update4: GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 13 FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_enc_update_avx_gen4) ############################################################################### @@ -2785,15 +2785,15 @@ SYM_FUNC_START(aesni_gcm_dec_update_avx_gen4) # must be 192 GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 11 FUNC_RESTORE - ret + RET key_128_dec_update4: GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 9 FUNC_RESTORE - ret + RET key_256_dec_update4: GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 13 FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_dec_update_avx_gen4) ############################################################################### @@ -2814,13 +2814,13 @@ SYM_FUNC_START(aesni_gcm_finalize_avx_gen4) # must be 192 GCM_COMPLETE GHASH_MUL_AVX2, 11, arg3, arg4 FUNC_RESTORE - ret + RET key_128_finalize4: GCM_COMPLETE GHASH_MUL_AVX2, 9, arg3, arg4 FUNC_RESTORE - ret + RET key_256_finalize4: GCM_COMPLETE GHASH_MUL_AVX2, 13, arg3, arg4 FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_finalize_avx_gen4) diff --git a/arch/x86/crypto/blake2s-core.S b/arch/x86/crypto/blake2s-core.S index 2ca79974f819..b50b35ff1fdb 100644 --- a/arch/x86/crypto/blake2s-core.S +++ b/arch/x86/crypto/blake2s-core.S @@ -171,7 +171,7 @@ SYM_FUNC_START(blake2s_compress_ssse3) movdqu %xmm1,0x10(%rdi) movdqu %xmm14,0x20(%rdi) .Lendofloop: - ret + RET SYM_FUNC_END(blake2s_compress_ssse3) #ifdef CONFIG_AS_AVX512 @@ -251,6 +251,6 @@ SYM_FUNC_START(blake2s_compress_avx512) vmovdqu %xmm1,0x10(%rdi) vmovdqu %xmm4,0x20(%rdi) vzeroupper - retq + RET SYM_FUNC_END(blake2s_compress_avx512) #endif /* CONFIG_AS_AVX512 */ diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S index 4222ac6d6584..802d71582689 100644 --- a/arch/x86/crypto/blowfish-x86_64-asm_64.S +++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S @@ -135,10 +135,10 @@ SYM_FUNC_START(__blowfish_enc_blk) jnz .L__enc_xor; write_block(); - ret; + RET; .L__enc_xor: xor_block(); - ret; + RET; SYM_FUNC_END(__blowfish_enc_blk) SYM_FUNC_START(blowfish_dec_blk) @@ -170,7 +170,7 @@ SYM_FUNC_START(blowfish_dec_blk) movq %r11, %r12; - ret; + RET; SYM_FUNC_END(blowfish_dec_blk) /********************************************************************** @@ -322,14 +322,14 @@ SYM_FUNC_START(__blowfish_enc_blk_4way) popq %rbx; popq %r12; - ret; + RET; .L__enc_xor4: xor_block4(); popq %rbx; popq %r12; - ret; + RET; SYM_FUNC_END(__blowfish_enc_blk_4way) SYM_FUNC_START(blowfish_dec_blk_4way) @@ -364,5 +364,5 @@ SYM_FUNC_START(blowfish_dec_blk_4way) popq %rbx; popq %r12; - ret; + RET; SYM_FUNC_END(blowfish_dec_blk_4way) diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index e2a0e0f4bf9d..2e1658ddbe1a 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -192,7 +192,7 @@ SYM_FUNC_START_LOCAL(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_c roundsm16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %rcx, (%r9)); - ret; + RET; SYM_FUNC_END(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) .align 8 @@ -200,7 +200,7 @@ SYM_FUNC_START_LOCAL(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_a roundsm16(%xmm4, %xmm5, %xmm6, %xmm7, %xmm0, %xmm1, %xmm2, %xmm3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11, %rax, (%r9)); - ret; + RET; SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) /* @@ -778,7 +778,7 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk16) %xmm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 16(%rax)); FRAME_END - ret; + RET; .align 8 .Lenc_max32: @@ -865,7 +865,7 @@ SYM_FUNC_START_LOCAL(__camellia_dec_blk16) %xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax)); FRAME_END - ret; + RET; .align 8 .Ldec_max32: @@ -906,7 +906,7 @@ SYM_FUNC_START(camellia_ecb_enc_16way) %xmm8, %rsi); FRAME_END - ret; + RET; SYM_FUNC_END(camellia_ecb_enc_16way) SYM_FUNC_START(camellia_ecb_dec_16way) @@ -936,7 +936,7 @@ SYM_FUNC_START(camellia_ecb_dec_16way) %xmm8, %rsi); FRAME_END - ret; + RET; SYM_FUNC_END(camellia_ecb_dec_16way) SYM_FUNC_START(camellia_cbc_dec_16way) @@ -987,5 +987,5 @@ SYM_FUNC_START(camellia_cbc_dec_16way) %xmm8, %rsi); FRAME_END - ret; + RET; SYM_FUNC_END(camellia_cbc_dec_16way) diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 706f70829a07..0e4e9abbf4de 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -226,7 +226,7 @@ SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_c roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15, %rcx, (%r9)); - ret; + RET; SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) .align 8 @@ -234,7 +234,7 @@ SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_a roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3, %ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11, %rax, (%r9)); - ret; + RET; SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) /* @@ -814,7 +814,7 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk32) %ymm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 32(%rax)); FRAME_END - ret; + RET; .align 8 .Lenc_max32: @@ -901,7 +901,7 @@ SYM_FUNC_START_LOCAL(__camellia_dec_blk32) %ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax)); FRAME_END - ret; + RET; .align 8 .Ldec_max32: @@ -946,7 +946,7 @@ SYM_FUNC_START(camellia_ecb_enc_32way) vzeroupper; FRAME_END - ret; + RET; SYM_FUNC_END(camellia_ecb_enc_32way) SYM_FUNC_START(camellia_ecb_dec_32way) @@ -980,7 +980,7 @@ SYM_FUNC_START(camellia_ecb_dec_32way) vzeroupper; FRAME_END - ret; + RET; SYM_FUNC_END(camellia_ecb_dec_32way) SYM_FUNC_START(camellia_cbc_dec_32way) @@ -1047,5 +1047,5 @@ SYM_FUNC_START(camellia_cbc_dec_32way) addq $(16 * 32), %rsp; FRAME_END - ret; + RET; SYM_FUNC_END(camellia_cbc_dec_32way) diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S index 1372e6408850..347c059f5940 100644 --- a/arch/x86/crypto/camellia-x86_64-asm_64.S +++ b/arch/x86/crypto/camellia-x86_64-asm_64.S @@ -213,13 +213,13 @@ SYM_FUNC_START(__camellia_enc_blk) enc_outunpack(mov, RT1); movq RR12, %r12; - ret; + RET; .L__enc_xor: enc_outunpack(xor, RT1); movq RR12, %r12; - ret; + RET; SYM_FUNC_END(__camellia_enc_blk) SYM_FUNC_START(camellia_dec_blk) @@ -257,7 +257,7 @@ SYM_FUNC_START(camellia_dec_blk) dec_outunpack(); movq RR12, %r12; - ret; + RET; SYM_FUNC_END(camellia_dec_blk) /********************************************************************** @@ -448,14 +448,14 @@ SYM_FUNC_START(__camellia_enc_blk_2way) movq RR12, %r12; popq %rbx; - ret; + RET; .L__enc2_xor: enc_outunpack2(xor, RT2); movq RR12, %r12; popq %rbx; - ret; + RET; SYM_FUNC_END(__camellia_enc_blk_2way) SYM_FUNC_START(camellia_dec_blk_2way) @@ -495,5 +495,5 @@ SYM_FUNC_START(camellia_dec_blk_2way) movq RR12, %r12; movq RXOR, %rbx; - ret; + RET; SYM_FUNC_END(camellia_dec_blk_2way) diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S index 8a6181b08b59..b258af420c92 100644 --- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S @@ -279,7 +279,7 @@ SYM_FUNC_START_LOCAL(__cast5_enc_blk16) outunpack_blocks(RR3, RL3, RTMP, RX, RKM); outunpack_blocks(RR4, RL4, RTMP, RX, RKM); - ret; + RET; SYM_FUNC_END(__cast5_enc_blk16) .align 16 @@ -352,7 +352,7 @@ SYM_FUNC_START_LOCAL(__cast5_dec_blk16) outunpack_blocks(RR3, RL3, RTMP, RX, RKM); outunpack_blocks(RR4, RL4, RTMP, RX, RKM); - ret; + RET; .L__skip_dec: vpsrldq $4, RKR, RKR; @@ -393,7 +393,7 @@ SYM_FUNC_START(cast5_ecb_enc_16way) popq %r15; FRAME_END - ret; + RET; SYM_FUNC_END(cast5_ecb_enc_16way) SYM_FUNC_START(cast5_ecb_dec_16way) @@ -431,7 +431,7 @@ SYM_FUNC_START(cast5_ecb_dec_16way) popq %r15; FRAME_END - ret; + RET; SYM_FUNC_END(cast5_ecb_dec_16way) SYM_FUNC_START(cast5_cbc_dec_16way) @@ -483,7 +483,7 @@ SYM_FUNC_START(cast5_cbc_dec_16way) popq %r15; popq %r12; FRAME_END - ret; + RET; SYM_FUNC_END(cast5_cbc_dec_16way) SYM_FUNC_START(cast5_ctr_16way) @@ -559,5 +559,5 @@ SYM_FUNC_START(cast5_ctr_16way) popq %r15; popq %r12; FRAME_END - ret; + RET; SYM_FUNC_END(cast5_ctr_16way) diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index fbddcecc3e3f..82b716fd5dba 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -289,7 +289,7 @@ SYM_FUNC_START_LOCAL(__cast6_enc_blk8) outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); - ret; + RET; SYM_FUNC_END(__cast6_enc_blk8) .align 8 @@ -336,7 +336,7 @@ SYM_FUNC_START_LOCAL(__cast6_dec_blk8) outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); - ret; + RET; SYM_FUNC_END(__cast6_dec_blk8) SYM_FUNC_START(cast6_ecb_enc_8way) @@ -359,7 +359,7 @@ SYM_FUNC_START(cast6_ecb_enc_8way) popq %r15; FRAME_END - ret; + RET; SYM_FUNC_END(cast6_ecb_enc_8way) SYM_FUNC_START(cast6_ecb_dec_8way) @@ -382,7 +382,7 @@ SYM_FUNC_START(cast6_ecb_dec_8way) popq %r15; FRAME_END - ret; + RET; SYM_FUNC_END(cast6_ecb_dec_8way) SYM_FUNC_START(cast6_cbc_dec_8way) @@ -408,5 +408,5 @@ SYM_FUNC_START(cast6_cbc_dec_8way) popq %r15; popq %r12; FRAME_END - ret; + RET; SYM_FUNC_END(cast6_cbc_dec_8way) diff --git a/arch/x86/crypto/chacha-avx2-x86_64.S b/arch/x86/crypto/chacha-avx2-x86_64.S index ee9a40ab4109..f3d8fc018249 100644 --- a/arch/x86/crypto/chacha-avx2-x86_64.S +++ b/arch/x86/crypto/chacha-avx2-x86_64.S @@ -193,7 +193,7 @@ SYM_FUNC_START(chacha_2block_xor_avx2) .Ldone2: vzeroupper - ret + RET .Lxorpart2: # xor remaining bytes from partial register into output @@ -498,7 +498,7 @@ SYM_FUNC_START(chacha_4block_xor_avx2) .Ldone4: vzeroupper - ret + RET .Lxorpart4: # xor remaining bytes from partial register into output @@ -992,7 +992,7 @@ SYM_FUNC_START(chacha_8block_xor_avx2) .Ldone8: vzeroupper lea -8(%r10),%rsp - ret + RET .Lxorpart8: # xor remaining bytes from partial register into output diff --git a/arch/x86/crypto/chacha-avx512vl-x86_64.S b/arch/x86/crypto/chacha-avx512vl-x86_64.S index bb193fde123a..946f74dd6fba 100644 --- a/arch/x86/crypto/chacha-avx512vl-x86_64.S +++ b/arch/x86/crypto/chacha-avx512vl-x86_64.S @@ -166,7 +166,7 @@ SYM_FUNC_START(chacha_2block_xor_avx512vl) .Ldone2: vzeroupper - ret + RET .Lxorpart2: # xor remaining bytes from partial register into output @@ -432,7 +432,7 @@ SYM_FUNC_START(chacha_4block_xor_avx512vl) .Ldone4: vzeroupper - ret + RET .Lxorpart4: # xor remaining bytes from partial register into output @@ -812,7 +812,7 @@ SYM_FUNC_START(chacha_8block_xor_avx512vl) .Ldone8: vzeroupper - ret + RET .Lxorpart8: # xor remaining bytes from partial register into output diff --git a/arch/x86/crypto/chacha-ssse3-x86_64.S b/arch/x86/crypto/chacha-ssse3-x86_64.S index ca1788bfee16..7111949cd5b9 100644 --- a/arch/x86/crypto/chacha-ssse3-x86_64.S +++ b/arch/x86/crypto/chacha-ssse3-x86_64.S @@ -108,7 +108,7 @@ SYM_FUNC_START_LOCAL(chacha_permute) sub $2,%r8d jnz .Ldoubleround - ret + RET SYM_FUNC_END(chacha_permute) SYM_FUNC_START(chacha_block_xor_ssse3) @@ -166,7 +166,7 @@ SYM_FUNC_START(chacha_block_xor_ssse3) .Ldone: FRAME_END - ret + RET .Lxorpart: # xor remaining bytes from partial register into output @@ -217,7 +217,7 @@ SYM_FUNC_START(hchacha_block_ssse3) movdqu %xmm3,0x10(%rsi) FRAME_END - ret + RET SYM_FUNC_END(hchacha_block_ssse3) SYM_FUNC_START(chacha_4block_xor_ssse3) @@ -762,7 +762,7 @@ SYM_FUNC_START(chacha_4block_xor_ssse3) .Ldone4: lea -8(%r10),%rsp - ret + RET .Lxorpart4: # xor remaining bytes from partial register into output diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S index 6e7d4c4d3208..c392a6edbfff 100644 --- a/arch/x86/crypto/crc32-pclmul_asm.S +++ b/arch/x86/crypto/crc32-pclmul_asm.S @@ -236,5 +236,5 @@ fold_64: pxor %xmm2, %xmm1 pextrd $0x01, %xmm1, %eax - ret + RET SYM_FUNC_END(crc32_pclmul_le_16) diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index ac1f303eed0f..80c0d22fc42c 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -306,7 +306,7 @@ do_return: popq %rsi popq %rdi popq %rbx - ret + RET SYM_FUNC_END(crc_pcl) .section .rodata, "a", @progbits diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S index b2533d63030e..721474abfb71 100644 --- a/arch/x86/crypto/crct10dif-pcl-asm_64.S +++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S @@ -257,7 +257,7 @@ SYM_FUNC_START(crc_t10dif_pcl) # Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of xmm0. pextrw $0, %xmm0, %eax - ret + RET .align 16 .Lless_than_256_bytes: diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S index fac0fdc3f25d..f4c760f4cade 100644 --- a/arch/x86/crypto/des3_ede-asm_64.S +++ b/arch/x86/crypto/des3_ede-asm_64.S @@ -243,7 +243,7 @@ SYM_FUNC_START(des3_ede_x86_64_crypt_blk) popq %r12; popq %rbx; - ret; + RET; SYM_FUNC_END(des3_ede_x86_64_crypt_blk) /*********************************************************************** @@ -528,7 +528,7 @@ SYM_FUNC_START(des3_ede_x86_64_crypt_blk_3way) popq %r12; popq %rbx; - ret; + RET; SYM_FUNC_END(des3_ede_x86_64_crypt_blk_3way) .section .rodata, "a", @progbits diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index 99ac25e18e09..2bf871899920 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -85,7 +85,7 @@ SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble) psrlq $1, T2 pxor T2, T1 pxor T1, DATA - ret + RET SYM_FUNC_END(__clmul_gf128mul_ble) /* void clmul_ghash_mul(char *dst, const u128 *shash) */ @@ -99,7 +99,7 @@ SYM_FUNC_START(clmul_ghash_mul) pshufb BSWAP, DATA movups DATA, (%rdi) FRAME_END - ret + RET SYM_FUNC_END(clmul_ghash_mul) /* @@ -128,5 +128,5 @@ SYM_FUNC_START(clmul_ghash_update) movups DATA, (%rdi) .Lupdate_just_ret: FRAME_END - ret + RET SYM_FUNC_END(clmul_ghash_update) diff --git a/arch/x86/crypto/nh-avx2-x86_64.S b/arch/x86/crypto/nh-avx2-x86_64.S index b22c7b936272..6a0b15e7196a 100644 --- a/arch/x86/crypto/nh-avx2-x86_64.S +++ b/arch/x86/crypto/nh-avx2-x86_64.S @@ -153,5 +153,5 @@ SYM_FUNC_START(nh_avx2) vpaddq T1, T0, T0 vpaddq T4, T0, T0 vmovdqu T0, (HASH) - ret + RET SYM_FUNC_END(nh_avx2) diff --git a/arch/x86/crypto/nh-sse2-x86_64.S b/arch/x86/crypto/nh-sse2-x86_64.S index d7ae22dd6683..34c567bbcb4f 100644 --- a/arch/x86/crypto/nh-sse2-x86_64.S +++ b/arch/x86/crypto/nh-sse2-x86_64.S @@ -119,5 +119,5 @@ SYM_FUNC_START(nh_sse2) paddq PASS2_SUMS, T1 movdqu T0, 0x00(HASH) movdqu T1, 0x10(HASH) - ret + RET SYM_FUNC_END(nh_sse2) diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index b7ee24df7fba..82f2313f512b 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -601,7 +601,7 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx) write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); - ret; + RET; SYM_FUNC_END(__serpent_enc_blk8_avx) .align 8 @@ -655,7 +655,7 @@ SYM_FUNC_START_LOCAL(__serpent_dec_blk8_avx) write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2); write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); - ret; + RET; SYM_FUNC_END(__serpent_dec_blk8_avx) SYM_FUNC_START(serpent_ecb_enc_8way_avx) @@ -673,7 +673,7 @@ SYM_FUNC_START(serpent_ecb_enc_8way_avx) store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); FRAME_END - ret; + RET; SYM_FUNC_END(serpent_ecb_enc_8way_avx) SYM_FUNC_START(serpent_ecb_dec_8way_avx) @@ -691,7 +691,7 @@ SYM_FUNC_START(serpent_ecb_dec_8way_avx) store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); FRAME_END - ret; + RET; SYM_FUNC_END(serpent_ecb_dec_8way_avx) SYM_FUNC_START(serpent_cbc_dec_8way_avx) @@ -709,5 +709,5 @@ SYM_FUNC_START(serpent_cbc_dec_8way_avx) store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); FRAME_END - ret; + RET; SYM_FUNC_END(serpent_cbc_dec_8way_avx) diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S index 9161b6e441f3..8ea34c9b9316 100644 --- a/arch/x86/crypto/serpent-avx2-asm_64.S +++ b/arch/x86/crypto/serpent-avx2-asm_64.S @@ -601,7 +601,7 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk16) write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); - ret; + RET; SYM_FUNC_END(__serpent_enc_blk16) .align 8 @@ -655,7 +655,7 @@ SYM_FUNC_START_LOCAL(__serpent_dec_blk16) write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2); write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); - ret; + RET; SYM_FUNC_END(__serpent_dec_blk16) SYM_FUNC_START(serpent_ecb_enc_16way) @@ -677,7 +677,7 @@ SYM_FUNC_START(serpent_ecb_enc_16way) vzeroupper; FRAME_END - ret; + RET; SYM_FUNC_END(serpent_ecb_enc_16way) SYM_FUNC_START(serpent_ecb_dec_16way) @@ -699,7 +699,7 @@ SYM_FUNC_START(serpent_ecb_dec_16way) vzeroupper; FRAME_END - ret; + RET; SYM_FUNC_END(serpent_ecb_dec_16way) SYM_FUNC_START(serpent_cbc_dec_16way) @@ -722,5 +722,5 @@ SYM_FUNC_START(serpent_cbc_dec_16way) vzeroupper; FRAME_END - ret; + RET; SYM_FUNC_END(serpent_cbc_dec_16way) diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S index 6379b99cb722..8ccb03ad7cef 100644 --- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S +++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S @@ -553,12 +553,12 @@ SYM_FUNC_START(__serpent_enc_blk_4way) write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); - ret; + RET; .L__enc_xor4: xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); - ret; + RET; SYM_FUNC_END(__serpent_enc_blk_4way) SYM_FUNC_START(serpent_dec_blk_4way) @@ -612,5 +612,5 @@ SYM_FUNC_START(serpent_dec_blk_4way) movl arg_dst(%esp), %eax; write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA); - ret; + RET; SYM_FUNC_END(serpent_dec_blk_4way) diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S index efb6dc17dc90..e0998a011d1d 100644 --- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S @@ -675,13 +675,13 @@ SYM_FUNC_START(__serpent_enc_blk_8way) write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); - ret; + RET; .L__enc_xor8: xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); - ret; + RET; SYM_FUNC_END(__serpent_enc_blk_8way) SYM_FUNC_START(serpent_dec_blk_8way) @@ -735,5 +735,5 @@ SYM_FUNC_START(serpent_dec_blk_8way) write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2); write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); - ret; + RET; SYM_FUNC_END(serpent_dec_blk_8way) diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S index 5eed620f4676..a96b2fd26dab 100644 --- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S @@ -674,7 +674,7 @@ _loop3: pop %r12 pop %rbx - ret + RET SYM_FUNC_END(\name) .endm diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S index 5d8415f482bd..2f94ec0e763b 100644 --- a/arch/x86/crypto/sha1_ni_asm.S +++ b/arch/x86/crypto/sha1_ni_asm.S @@ -290,7 +290,7 @@ SYM_FUNC_START(sha1_ni_transform) mov %rbp, %rsp pop %rbp - ret + RET SYM_FUNC_END(sha1_ni_transform) .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index d25668d2a1e9..263f916362e0 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S @@ -99,7 +99,7 @@ pop %rbp pop %r12 pop %rbx - ret + RET SYM_FUNC_END(\name) .endm diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index 4739cd31b9db..3baa1ec39097 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S @@ -458,7 +458,7 @@ done_hash: popq %r13 popq %r12 popq %rbx - ret + RET SYM_FUNC_END(sha256_transform_avx) .section .rodata.cst256.K256, "aM", @progbits, 256 diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 4087f7432a7e..9bcdbc47b8b4 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -710,7 +710,7 @@ done_hash: popq %r13 popq %r12 popq %rbx - ret + RET SYM_FUNC_END(sha256_transform_rorx) .section .rodata.cst512.K256, "aM", @progbits, 512 diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S index ddfa863b4ee3..c4a5db612c32 100644 --- a/arch/x86/crypto/sha256-ssse3-asm.S +++ b/arch/x86/crypto/sha256-ssse3-asm.S @@ -472,7 +472,7 @@ done_hash: popq %r12 popq %rbx - ret + RET SYM_FUNC_END(sha256_transform_ssse3) .section .rodata.cst256.K256, "aM", @progbits, 256 diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S index 7abade04a3a3..94d50dd27cb5 100644 --- a/arch/x86/crypto/sha256_ni_asm.S +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -326,7 +326,7 @@ SYM_FUNC_START(sha256_ni_transform) .Ldone_hash: - ret + RET SYM_FUNC_END(sha256_ni_transform) .section .rodata.cst256.K256, "aM", @progbits, 256 diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S index 3d8f0fd4eea8..1fefe6dd3a9e 100644 --- a/arch/x86/crypto/sha512-avx-asm.S +++ b/arch/x86/crypto/sha512-avx-asm.S @@ -361,7 +361,7 @@ updateblock: pop %rbx nowork: - ret + RET SYM_FUNC_END(sha512_transform_avx) ######################################################################## diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index 072cb0f0deae..5cdaab7d6901 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -679,7 +679,7 @@ done_hash: pop %r12 pop %rbx - ret + RET SYM_FUNC_END(sha512_transform_rorx) ######################################################################## diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S index bd51c9070bed..b84c22e06c5f 100644 --- a/arch/x86/crypto/sha512-ssse3-asm.S +++ b/arch/x86/crypto/sha512-ssse3-asm.S @@ -363,7 +363,7 @@ updateblock: pop %rbx nowork: - ret + RET SYM_FUNC_END(sha512_transform_ssse3) ######################################################################## diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S index 1cc72b4804fa..4767ab61ff48 100644 --- a/arch/x86/crypto/sm4-aesni-avx-asm_64.S +++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S @@ -246,7 +246,7 @@ SYM_FUNC_START(sm4_aesni_avx_crypt4) .Lblk4_store_output_done: vzeroall; FRAME_END - ret; + RET; SYM_FUNC_END(sm4_aesni_avx_crypt4) .align 8 @@ -356,7 +356,7 @@ SYM_FUNC_START_LOCAL(__sm4_crypt_blk8) vpshufb RTMP2, RB3, RB3; FRAME_END - ret; + RET; SYM_FUNC_END(__sm4_crypt_blk8) /* @@ -412,7 +412,7 @@ SYM_FUNC_START(sm4_aesni_avx_crypt8) .Lblk8_store_output_done: vzeroall; FRAME_END - ret; + RET; SYM_FUNC_END(sm4_aesni_avx_crypt8) /* @@ -487,7 +487,7 @@ SYM_FUNC_START(sm4_aesni_avx_ctr_enc_blk8) vzeroall; FRAME_END - ret; + RET; SYM_FUNC_END(sm4_aesni_avx_ctr_enc_blk8) /* @@ -537,7 +537,7 @@ SYM_FUNC_START(sm4_aesni_avx_cbc_dec_blk8) vzeroall; FRAME_END - ret; + RET; SYM_FUNC_END(sm4_aesni_avx_cbc_dec_blk8) /* @@ -590,5 +590,5 @@ SYM_FUNC_START(sm4_aesni_avx_cfb_dec_blk8) vzeroall; FRAME_END - ret; + RET; SYM_FUNC_END(sm4_aesni_avx_cfb_dec_blk8) diff --git a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S index 9c5d3f3ad45a..4732fe8bb65b 100644 --- a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S @@ -268,7 +268,7 @@ SYM_FUNC_START_LOCAL(__sm4_crypt_blk16) vpshufb RTMP2, RB3, RB3; FRAME_END - ret; + RET; SYM_FUNC_END(__sm4_crypt_blk16) #define inc_le128(x, minus_one, tmp) \ @@ -387,7 +387,7 @@ SYM_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16) vzeroall; FRAME_END - ret; + RET; SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16) /* @@ -441,7 +441,7 @@ SYM_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16) vzeroall; FRAME_END - ret; + RET; SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16) /* @@ -497,5 +497,5 @@ SYM_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16) vzeroall; FRAME_END - ret; + RET; SYM_FUNC_END(sm4_aesni_avx2_cfb_dec_blk16) diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index 37e63b3c664e..31f9b2ec3857 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -267,7 +267,7 @@ SYM_FUNC_START_LOCAL(__twofish_enc_blk8) outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); - ret; + RET; SYM_FUNC_END(__twofish_enc_blk8) .align 8 @@ -307,7 +307,7 @@ SYM_FUNC_START_LOCAL(__twofish_dec_blk8) outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); - ret; + RET; SYM_FUNC_END(__twofish_dec_blk8) SYM_FUNC_START(twofish_ecb_enc_8way) @@ -327,7 +327,7 @@ SYM_FUNC_START(twofish_ecb_enc_8way) store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); FRAME_END - ret; + RET; SYM_FUNC_END(twofish_ecb_enc_8way) SYM_FUNC_START(twofish_ecb_dec_8way) @@ -347,7 +347,7 @@ SYM_FUNC_START(twofish_ecb_dec_8way) store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); FRAME_END - ret; + RET; SYM_FUNC_END(twofish_ecb_dec_8way) SYM_FUNC_START(twofish_cbc_dec_8way) @@ -372,5 +372,5 @@ SYM_FUNC_START(twofish_cbc_dec_8way) popq %r12; FRAME_END - ret; + RET; SYM_FUNC_END(twofish_cbc_dec_8way) diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S index a6f09e4f2e46..3abcad661884 100644 --- a/arch/x86/crypto/twofish-i586-asm_32.S +++ b/arch/x86/crypto/twofish-i586-asm_32.S @@ -260,7 +260,7 @@ SYM_FUNC_START(twofish_enc_blk) pop %ebx pop %ebp mov $1, %eax - ret + RET SYM_FUNC_END(twofish_enc_blk) SYM_FUNC_START(twofish_dec_blk) @@ -317,5 +317,5 @@ SYM_FUNC_START(twofish_dec_blk) pop %ebx pop %ebp mov $1, %eax - ret + RET SYM_FUNC_END(twofish_dec_blk) diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S index bca4cea757ce..d2288bf38a8a 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S @@ -258,7 +258,7 @@ SYM_FUNC_START(__twofish_enc_blk_3way) popq %rbx; popq %r12; popq %r13; - ret; + RET; .L__enc_xor3: outunpack_enc3(xor); @@ -266,7 +266,7 @@ SYM_FUNC_START(__twofish_enc_blk_3way) popq %rbx; popq %r12; popq %r13; - ret; + RET; SYM_FUNC_END(__twofish_enc_blk_3way) SYM_FUNC_START(twofish_dec_blk_3way) @@ -301,5 +301,5 @@ SYM_FUNC_START(twofish_dec_blk_3way) popq %rbx; popq %r12; popq %r13; - ret; + RET; SYM_FUNC_END(twofish_dec_blk_3way) diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S index d2e56232494a..775af290cd19 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64.S @@ -252,7 +252,7 @@ SYM_FUNC_START(twofish_enc_blk) popq R1 movl $1,%eax - ret + RET SYM_FUNC_END(twofish_enc_blk) SYM_FUNC_START(twofish_dec_blk) @@ -304,5 +304,5 @@ SYM_FUNC_START(twofish_dec_blk) popq R1 movl $1,%eax - ret + RET SYM_FUNC_END(twofish_dec_blk) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index ccb9d32768f3..00413e37feee 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -740,7 +740,7 @@ SYM_FUNC_START(schedule_tail_wrapper) popl %eax FRAME_END - ret + RET SYM_FUNC_END(schedule_tail_wrapper) .popsection diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 97b1f84bb53f..e23319ad3f42 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -738,7 +738,7 @@ SYM_FUNC_START(asm_load_gs_index) 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE swapgs FRAME_END - ret + RET SYM_FUNC_END(asm_load_gs_index) EXPORT_SYMBOL(asm_load_gs_index) @@ -889,7 +889,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) * is needed here. */ SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx - ret + RET .Lparanoid_entry_checkgs: /* EBX = 1 -> kernel GSBASE active, no restore required */ @@ -910,7 +910,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) .Lparanoid_kernel_gsbase: FENCE_SWAPGS_KERNEL_ENTRY - ret + RET SYM_CODE_END(paranoid_entry) /* @@ -989,7 +989,7 @@ SYM_CODE_START_LOCAL(error_entry) movq %rax, %rsp /* switch stack */ ENCODE_FRAME_POINTER pushq %r12 - ret + RET /* * There are two places in the kernel that can potentially fault with @@ -1020,7 +1020,7 @@ SYM_CODE_START_LOCAL(error_entry) */ .Lerror_entry_done_lfence: FENCE_SWAPGS_KERNEL_ENTRY - ret + RET .Lbstep_iret: /* Fix truncated RIP */ diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S index f1f96d4d8cd6..7591bab060f7 100644 --- a/arch/x86/entry/thunk_32.S +++ b/arch/x86/entry/thunk_32.S @@ -24,7 +24,7 @@ SYM_CODE_START_NOALIGN(\name) popl %edx popl %ecx popl %eax - ret + RET _ASM_NOKPROBE(\name) SYM_CODE_END(\name) .endm diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index 496b11ec469d..505b488fcc65 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -50,7 +50,7 @@ SYM_CODE_START_LOCAL_NOALIGN(__thunk_restore) popq %rsi popq %rdi popq %rbp - ret + RET _ASM_NOKPROBE(__thunk_restore) SYM_CODE_END(__thunk_restore) #endif diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S index 6ddd7a937b3e..d33c6513fd2c 100644 --- a/arch/x86/entry/vdso/vdso32/system_call.S +++ b/arch/x86/entry/vdso/vdso32/system_call.S @@ -78,7 +78,7 @@ SYM_INNER_LABEL(int80_landing_pad, SYM_L_GLOBAL) popl %ecx CFI_RESTORE ecx CFI_ADJUST_CFA_OFFSET -4 - ret + RET CFI_ENDPROC .size __kernel_vsyscall,.-__kernel_vsyscall diff --git a/arch/x86/entry/vdso/vsgx.S b/arch/x86/entry/vdso/vsgx.S index 99dafac992e2..d77d278ee9dd 100644 --- a/arch/x86/entry/vdso/vsgx.S +++ b/arch/x86/entry/vdso/vsgx.S @@ -81,7 +81,7 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave) pop %rbx leave .cfi_def_cfa %rsp, 8 - ret + RET /* The out-of-line code runs with the pre-leave stack frame. */ .cfi_def_cfa %rbp, 16 diff --git a/arch/x86/entry/vsyscall/vsyscall_emu_64.S b/arch/x86/entry/vsyscall/vsyscall_emu_64.S index 2e203f3a25a7..15e35159ebb6 100644 --- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S +++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S @@ -19,17 +19,17 @@ __vsyscall_page: mov $__NR_gettimeofday, %rax syscall - ret + RET .balign 1024, 0xcc mov $__NR_time, %rax syscall - ret + RET .balign 1024, 0xcc mov $__NR_getcpu, %rax syscall - ret + RET .balign 4096, 0xcc diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index daf88f8143c5..cf69081073b5 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -60,7 +60,7 @@ save_registers: popl saved_context_eflags movl $ret_point, saved_eip - ret + RET restore_registers: @@ -70,7 +70,7 @@ restore_registers: movl saved_context_edi, %edi pushl saved_context_eflags popfl - ret + RET SYM_CODE_START(do_suspend_lowlevel) call save_processor_state @@ -86,7 +86,7 @@ SYM_CODE_START(do_suspend_lowlevel) ret_point: call restore_registers call restore_processor_state - ret + RET SYM_CODE_END(do_suspend_lowlevel) .data diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S index e405fe1a8bf4..a0ed0e4a2c0c 100644 --- a/arch/x86/kernel/ftrace_32.S +++ b/arch/x86/kernel/ftrace_32.S @@ -19,7 +19,7 @@ #endif SYM_FUNC_START(__fentry__) - ret + RET SYM_FUNC_END(__fentry__) EXPORT_SYMBOL(__fentry__) @@ -84,7 +84,7 @@ ftrace_graph_call: /* This is weak to keep gas from relaxing the jumps */ SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK) - ret + RET SYM_CODE_END(ftrace_caller) SYM_CODE_START(ftrace_regs_caller) @@ -177,7 +177,7 @@ SYM_CODE_START(ftrace_graph_caller) popl %edx popl %ecx popl %eax - ret + RET SYM_CODE_END(ftrace_graph_caller) .globl return_to_handler diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 7a879901f103..11ac028e30e4 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -132,7 +132,7 @@ #ifdef CONFIG_DYNAMIC_FTRACE SYM_FUNC_START(__fentry__) - retq + RET SYM_FUNC_END(__fentry__) EXPORT_SYMBOL(__fentry__) @@ -176,11 +176,11 @@ SYM_FUNC_END(ftrace_caller); SYM_FUNC_START(ftrace_epilogue) /* * This is weak to keep gas from relaxing the jumps. - * It is also used to copy the retq for trampolines. + * It is also used to copy the RET for trampolines. */ SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK) UNWIND_HINT_FUNC - retq + RET SYM_FUNC_END(ftrace_epilogue) SYM_FUNC_START(ftrace_regs_caller) @@ -284,7 +284,7 @@ SYM_FUNC_START(__fentry__) jnz trace SYM_INNER_LABEL(ftrace_stub, SYM_L_GLOBAL) - retq + RET trace: /* save_mcount_regs fills in first two parameters */ diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index d8c64dab0efe..eb8656bac99b 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -340,7 +340,7 @@ SYM_FUNC_END(startup_32_smp) __INIT setup_once: andl $0,setup_once_ref /* Once is enough, thanks */ - ret + RET SYM_FUNC_START(early_idt_handler_array) # 36(%esp) %eflags diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S index 760e1f293093..aaf9e776f323 100644 --- a/arch/x86/kernel/irqflags.S +++ b/arch/x86/kernel/irqflags.S @@ -11,7 +11,7 @@ SYM_FUNC_START(native_save_fl) pushf pop %_ASM_AX - ret + RET SYM_FUNC_END(native_save_fl) .popsection EXPORT_SYMBOL(native_save_fl) diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index f469153eca8a..fcc8a7699103 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -91,7 +91,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel) movl %edi, %eax addl $(identity_mapped - relocate_kernel), %eax pushl %eax - ret + RET SYM_CODE_END(relocate_kernel) SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) @@ -159,7 +159,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) xorl %edx, %edx xorl %esi, %esi xorl %ebp, %ebp - ret + RET 1: popl %edx movl CP_PA_SWAP_PAGE(%edi), %esp @@ -190,7 +190,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) movl %edi, %eax addl $(virtual_mapped - relocate_kernel), %eax pushl %eax - ret + RET SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) @@ -208,7 +208,7 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) popl %edi popl %esi popl %ebx - ret + RET SYM_CODE_END(virtual_mapped) /* Do the copies */ @@ -271,7 +271,7 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) popl %edi popl %ebx popl %ebp - ret + RET SYM_CODE_END(swap_pages) .globl kexec_control_code_size diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index c8fe74a28143..399f075ccdc4 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -104,7 +104,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel) /* jump to identity mapped page */ addq $(identity_mapped - relocate_kernel), %r8 pushq %r8 - ret + RET SYM_CODE_END(relocate_kernel) SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) @@ -191,7 +191,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) xorl %r14d, %r14d xorl %r15d, %r15d - ret + RET 1: popq %rdx @@ -210,7 +210,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) call swap_pages movq $virtual_mapped, %rax pushq %rax - ret + RET SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) @@ -231,7 +231,7 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) popq %r12 popq %rbp popq %rbx - ret + RET SYM_CODE_END(virtual_mapped) /* Do the copies */ @@ -288,7 +288,7 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) lea PAGE_SIZE(%rax), %rsi jmp 0b 3: - ret + RET SYM_CODE_END(swap_pages) .globl kexec_control_code_size diff --git a/arch/x86/kernel/sev_verify_cbit.S b/arch/x86/kernel/sev_verify_cbit.S index ee04941a6546..3355e27c69eb 100644 --- a/arch/x86/kernel/sev_verify_cbit.S +++ b/arch/x86/kernel/sev_verify_cbit.S @@ -85,5 +85,5 @@ SYM_FUNC_START(sev_verify_cbit) #endif /* Return page-table pointer */ movq %rdi, %rax - ret + RET SYM_FUNC_END(sev_verify_cbit) diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index 641f0fe1e5b4..1258a5872d12 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S @@ -132,9 +132,9 @@ SYM_FUNC_START_LOCAL(verify_cpu) .Lverify_cpu_no_longmode: popf # Restore caller passed flags movl $1,%eax - ret + RET .Lverify_cpu_sse_ok: popf # Restore caller passed flags xorl %eax, %eax - ret + RET SYM_FUNC_END(verify_cpu) diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 4fa17df123cd..dfaeb47fcf2a 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -148,7 +148,7 @@ SYM_FUNC_START(__svm_vcpu_run) pop %edi #endif pop %_ASM_BP - ret + RET 3: cmpb $0, kvm_rebooting jne 2b @@ -202,7 +202,7 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) pop %edi #endif pop %_ASM_BP - ret + RET 3: cmpb $0, kvm_rebooting jne 2b diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 3a6461694fc2..435c187927c4 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -49,14 +49,14 @@ SYM_FUNC_START_LOCAL(vmx_vmenter) je 2f 1: vmresume - ret + RET 2: vmlaunch - ret + RET 3: cmpb $0, kvm_rebooting je 4f - ret + RET 4: ud2 _ASM_EXTABLE(1b, 3b) @@ -89,7 +89,7 @@ SYM_FUNC_START(vmx_vmexit) pop %_ASM_AX .Lvmexit_skip_rsb: #endif - ret + RET SYM_FUNC_END(vmx_vmexit) /** @@ -228,7 +228,7 @@ SYM_FUNC_START(__vmx_vcpu_run) pop %edi #endif pop %_ASM_BP - ret + RET /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */ 2: mov $1, %eax @@ -293,7 +293,7 @@ SYM_FUNC_START(vmread_error_trampoline) pop %_ASM_AX pop %_ASM_BP - ret + RET SYM_FUNC_END(vmread_error_trampoline) SYM_FUNC_START(vmx_do_interrupt_nmi_irqoff) @@ -326,5 +326,5 @@ SYM_FUNC_START(vmx_do_interrupt_nmi_irqoff) */ mov %_ASM_BP, %_ASM_SP pop %_ASM_BP - ret + RET SYM_FUNC_END(vmx_do_interrupt_nmi_irqoff) diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S index 4ad6b97fdb6f..e768815e58ae 100644 --- a/arch/x86/lib/atomic64_386_32.S +++ b/arch/x86/lib/atomic64_386_32.S @@ -30,7 +30,7 @@ SYM_FUNC_START(atomic64_##op##_386); \ #define RET_IRQ_RESTORE \ IRQ_RESTORE v; \ - ret + RET #define v %ecx BEGIN_IRQ_SAVE(read) diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S index ce6935690766..90afb488b396 100644 --- a/arch/x86/lib/atomic64_cx8_32.S +++ b/arch/x86/lib/atomic64_cx8_32.S @@ -18,7 +18,7 @@ SYM_FUNC_START(atomic64_read_cx8) read64 %ecx - ret + RET SYM_FUNC_END(atomic64_read_cx8) SYM_FUNC_START(atomic64_set_cx8) @@ -28,7 +28,7 @@ SYM_FUNC_START(atomic64_set_cx8) cmpxchg8b (%esi) jne 1b - ret + RET SYM_FUNC_END(atomic64_set_cx8) SYM_FUNC_START(atomic64_xchg_cx8) @@ -37,7 +37,7 @@ SYM_FUNC_START(atomic64_xchg_cx8) cmpxchg8b (%esi) jne 1b - ret + RET SYM_FUNC_END(atomic64_xchg_cx8) .macro addsub_return func ins insc @@ -68,7 +68,7 @@ SYM_FUNC_START(atomic64_\func\()_return_cx8) popl %esi popl %ebx popl %ebp - ret + RET SYM_FUNC_END(atomic64_\func\()_return_cx8) .endm @@ -93,7 +93,7 @@ SYM_FUNC_START(atomic64_\func\()_return_cx8) movl %ebx, %eax movl %ecx, %edx popl %ebx - ret + RET SYM_FUNC_END(atomic64_\func\()_return_cx8) .endm @@ -118,7 +118,7 @@ SYM_FUNC_START(atomic64_dec_if_positive_cx8) movl %ebx, %eax movl %ecx, %edx popl %ebx - ret + RET SYM_FUNC_END(atomic64_dec_if_positive_cx8) SYM_FUNC_START(atomic64_add_unless_cx8) @@ -149,7 +149,7 @@ SYM_FUNC_START(atomic64_add_unless_cx8) addl $8, %esp popl %ebx popl %ebp - ret + RET 4: cmpl %edx, 4(%esp) jne 2b @@ -176,5 +176,5 @@ SYM_FUNC_START(atomic64_inc_not_zero_cx8) movl $1, %eax 3: popl %ebx - ret + RET SYM_FUNC_END(atomic64_inc_not_zero_cx8) diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index 4304320e51f4..929ad1747dea 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -127,7 +127,7 @@ SYM_FUNC_START(csum_partial) 8: popl %ebx popl %esi - ret + RET SYM_FUNC_END(csum_partial) #else @@ -245,7 +245,7 @@ SYM_FUNC_START(csum_partial) 90: popl %ebx popl %esi - ret + RET SYM_FUNC_END(csum_partial) #endif @@ -371,7 +371,7 @@ EXC( movb %cl, (%edi) ) popl %esi popl %edi popl %ecx # equivalent to addl $4,%esp - ret + RET SYM_FUNC_END(csum_partial_copy_generic) #else @@ -447,7 +447,7 @@ EXC( movb %dl, (%edi) ) popl %esi popl %edi popl %ebx - ret + RET SYM_FUNC_END(csum_partial_copy_generic) #undef ROUND diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index c4c7dd115953..fe59b8ac4fcc 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -17,7 +17,7 @@ SYM_FUNC_START(clear_page_rep) movl $4096/8,%ecx xorl %eax,%eax rep stosq - ret + RET SYM_FUNC_END(clear_page_rep) EXPORT_SYMBOL_GPL(clear_page_rep) @@ -39,7 +39,7 @@ SYM_FUNC_START(clear_page_orig) leaq 64(%rdi),%rdi jnz .Lloop nop - ret + RET SYM_FUNC_END(clear_page_orig) EXPORT_SYMBOL_GPL(clear_page_orig) @@ -47,6 +47,6 @@ SYM_FUNC_START(clear_page_erms) movl $4096,%ecx xorl %eax,%eax rep stosb - ret + RET SYM_FUNC_END(clear_page_erms) EXPORT_SYMBOL_GPL(clear_page_erms) diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S index 3542502faa3b..33c70c0160ea 100644 --- a/arch/x86/lib/cmpxchg16b_emu.S +++ b/arch/x86/lib/cmpxchg16b_emu.S @@ -37,11 +37,11 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu) popfq mov $1, %al - ret + RET .Lnot_same: popfq xor %al,%al - ret + RET SYM_FUNC_END(this_cpu_cmpxchg16b_emu) diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S index ca01ed6029f4..6a912d58fecc 100644 --- a/arch/x86/lib/cmpxchg8b_emu.S +++ b/arch/x86/lib/cmpxchg8b_emu.S @@ -32,7 +32,7 @@ SYM_FUNC_START(cmpxchg8b_emu) movl %ecx, 4(%esi) popfl - ret + RET .Lnot_same: movl (%esi), %eax @@ -40,7 +40,7 @@ SYM_FUNC_START(cmpxchg8b_emu) movl 4(%esi), %edx popfl - ret + RET SYM_FUNC_END(cmpxchg8b_emu) EXPORT_SYMBOL(cmpxchg8b_emu) diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S index 7334055157ba..23009792e19c 100644 --- a/arch/x86/lib/copy_mc_64.S +++ b/arch/x86/lib/copy_mc_64.S @@ -77,7 +77,7 @@ SYM_FUNC_START(copy_mc_fragile) .L_done_memcpy_trap: xorl %eax, %eax .L_done: - ret + RET SYM_FUNC_END(copy_mc_fragile) .section .fixup, "ax" @@ -132,7 +132,7 @@ SYM_FUNC_START(copy_mc_enhanced_fast_string) rep movsb /* Copy successful. Return zero */ xorl %eax, %eax - ret + RET SYM_FUNC_END(copy_mc_enhanced_fast_string) .section .fixup, "ax" @@ -145,7 +145,7 @@ SYM_FUNC_END(copy_mc_enhanced_fast_string) * user-copy routines. */ movq %rcx, %rax - ret + RET .previous diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index db4b4f9197c7..30ea644bf446 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S @@ -17,7 +17,7 @@ SYM_FUNC_START(copy_page) ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD movl $4096/8, %ecx rep movsq - ret + RET SYM_FUNC_END(copy_page) EXPORT_SYMBOL(copy_page) @@ -85,5 +85,5 @@ SYM_FUNC_START_LOCAL(copy_page_regs) movq (%rsp), %rbx movq 1*8(%rsp), %r12 addq $2*8, %rsp - ret + RET SYM_FUNC_END(copy_page_regs) diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 2797e630b9b1..8fb562f1dfaf 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -105,7 +105,7 @@ SYM_FUNC_START(copy_user_generic_unrolled) jnz 21b 23: xor %eax,%eax ASM_CLAC - ret + RET .section .fixup,"ax" 30: shll $6,%ecx @@ -173,7 +173,7 @@ SYM_FUNC_START(copy_user_generic_string) movsb xorl %eax,%eax ASM_CLAC - ret + RET .section .fixup,"ax" 11: leal (%rdx,%rcx,8),%ecx @@ -207,7 +207,7 @@ SYM_FUNC_START(copy_user_enhanced_fast_string) movsb xorl %eax,%eax ASM_CLAC - ret + RET .section .fixup,"ax" 12: movl %ecx,%edx /* ecx is zerorest also */ @@ -237,7 +237,7 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) 1: rep movsb 2: mov %ecx,%eax ASM_CLAC - ret + RET _ASM_EXTABLE_CPY(1b, 2b) SYM_CODE_END(.Lcopy_user_handle_tail) @@ -348,7 +348,7 @@ SYM_FUNC_START(__copy_user_nocache) xorl %eax,%eax ASM_CLAC sfence - ret + RET .section .fixup,"ax" .L_fixup_4x8b_copy: diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S index 1fbd8ee9642d..d9e16a2cf285 100644 --- a/arch/x86/lib/csum-copy_64.S +++ b/arch/x86/lib/csum-copy_64.S @@ -201,7 +201,7 @@ SYM_FUNC_START(csum_partial_copy_generic) movq 3*8(%rsp), %r13 movq 4*8(%rsp), %r15 addq $5*8, %rsp - ret + RET .Lshort: movl %ecx, %r10d jmp .L1 diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index fa1bc2104b32..b70d98d79a9d 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -57,7 +57,7 @@ SYM_FUNC_START(__get_user_1) 1: movzbl (%_ASM_AX),%edx xor %eax,%eax ASM_CLAC - ret + RET SYM_FUNC_END(__get_user_1) EXPORT_SYMBOL(__get_user_1) @@ -71,7 +71,7 @@ SYM_FUNC_START(__get_user_2) 2: movzwl (%_ASM_AX),%edx xor %eax,%eax ASM_CLAC - ret + RET SYM_FUNC_END(__get_user_2) EXPORT_SYMBOL(__get_user_2) @@ -85,7 +85,7 @@ SYM_FUNC_START(__get_user_4) 3: movl (%_ASM_AX),%edx xor %eax,%eax ASM_CLAC - ret + RET SYM_FUNC_END(__get_user_4) EXPORT_SYMBOL(__get_user_4) @@ -100,7 +100,7 @@ SYM_FUNC_START(__get_user_8) 4: movq (%_ASM_AX),%rdx xor %eax,%eax ASM_CLAC - ret + RET #else LOAD_TASK_SIZE_MINUS_N(7) cmp %_ASM_DX,%_ASM_AX @@ -112,7 +112,7 @@ SYM_FUNC_START(__get_user_8) 5: movl 4(%_ASM_AX),%ecx xor %eax,%eax ASM_CLAC - ret + RET #endif SYM_FUNC_END(__get_user_8) EXPORT_SYMBOL(__get_user_8) @@ -124,7 +124,7 @@ SYM_FUNC_START(__get_user_nocheck_1) 6: movzbl (%_ASM_AX),%edx xor %eax,%eax ASM_CLAC - ret + RET SYM_FUNC_END(__get_user_nocheck_1) EXPORT_SYMBOL(__get_user_nocheck_1) @@ -134,7 +134,7 @@ SYM_FUNC_START(__get_user_nocheck_2) 7: movzwl (%_ASM_AX),%edx xor %eax,%eax ASM_CLAC - ret + RET SYM_FUNC_END(__get_user_nocheck_2) EXPORT_SYMBOL(__get_user_nocheck_2) @@ -144,7 +144,7 @@ SYM_FUNC_START(__get_user_nocheck_4) 8: movl (%_ASM_AX),%edx xor %eax,%eax ASM_CLAC - ret + RET SYM_FUNC_END(__get_user_nocheck_4) EXPORT_SYMBOL(__get_user_nocheck_4) @@ -159,7 +159,7 @@ SYM_FUNC_START(__get_user_nocheck_8) #endif xor %eax,%eax ASM_CLAC - ret + RET SYM_FUNC_END(__get_user_nocheck_8) EXPORT_SYMBOL(__get_user_nocheck_8) @@ -169,7 +169,7 @@ SYM_CODE_START_LOCAL(.Lbad_get_user_clac) bad_get_user: xor %edx,%edx mov $(-EFAULT),%_ASM_AX - ret + RET SYM_CODE_END(.Lbad_get_user_clac) #ifdef CONFIG_X86_32 @@ -179,7 +179,7 @@ bad_get_user_8: xor %edx,%edx xor %ecx,%ecx mov $(-EFAULT),%_ASM_AX - ret + RET SYM_CODE_END(.Lbad_get_user_8_clac) #endif diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S index dbf8cc97b7f5..12c16c6aa44a 100644 --- a/arch/x86/lib/hweight.S +++ b/arch/x86/lib/hweight.S @@ -32,7 +32,7 @@ SYM_FUNC_START(__sw_hweight32) imull $0x01010101, %eax, %eax # w_tmp *= 0x01010101 shrl $24, %eax # w = w_tmp >> 24 __ASM_SIZE(pop,) %__ASM_REG(dx) - ret + RET SYM_FUNC_END(__sw_hweight32) EXPORT_SYMBOL(__sw_hweight32) @@ -65,7 +65,7 @@ SYM_FUNC_START(__sw_hweight64) popq %rdx popq %rdi - ret + RET #else /* CONFIG_X86_32 */ /* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */ pushl %ecx @@ -77,7 +77,7 @@ SYM_FUNC_START(__sw_hweight64) addl %ecx, %eax # result popl %ecx - ret + RET #endif SYM_FUNC_END(__sw_hweight64) EXPORT_SYMBOL(__sw_hweight64) diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S index cb5a1964506b..a1f9416bf67a 100644 --- a/arch/x86/lib/iomap_copy_64.S +++ b/arch/x86/lib/iomap_copy_64.S @@ -11,5 +11,5 @@ SYM_FUNC_START(__iowrite32_copy) movl %edx,%ecx rep movsd - ret + RET SYM_FUNC_END(__iowrite32_copy) diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 1cc9da6e29c7..59cf2343f3d9 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -39,7 +39,7 @@ SYM_FUNC_START_WEAK(memcpy) rep movsq movl %edx, %ecx rep movsb - ret + RET SYM_FUNC_END(memcpy) SYM_FUNC_END_ALIAS(__memcpy) EXPORT_SYMBOL(memcpy) @@ -53,7 +53,7 @@ SYM_FUNC_START_LOCAL(memcpy_erms) movq %rdi, %rax movq %rdx, %rcx rep movsb - ret + RET SYM_FUNC_END(memcpy_erms) SYM_FUNC_START_LOCAL(memcpy_orig) @@ -137,7 +137,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movq %r9, 1*8(%rdi) movq %r10, -2*8(%rdi, %rdx) movq %r11, -1*8(%rdi, %rdx) - retq + RET .p2align 4 .Lless_16bytes: cmpl $8, %edx @@ -149,7 +149,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movq -1*8(%rsi, %rdx), %r9 movq %r8, 0*8(%rdi) movq %r9, -1*8(%rdi, %rdx) - retq + RET .p2align 4 .Lless_8bytes: cmpl $4, %edx @@ -162,7 +162,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movl -4(%rsi, %rdx), %r8d movl %ecx, (%rdi) movl %r8d, -4(%rdi, %rdx) - retq + RET .p2align 4 .Lless_3bytes: subl $1, %edx @@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movb %cl, (%rdi) .Lend: - retq + RET SYM_FUNC_END(memcpy_orig) .popsection diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 64801010d312..e84d649620c4 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -40,7 +40,7 @@ SYM_FUNC_START(__memmove) /* FSRM implies ERMS => no length checks, do the copy directly */ .Lmemmove_begin_forward: ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM - ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS + ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; RET", X86_FEATURE_ERMS /* * movsq instruction have many startup latency @@ -205,7 +205,7 @@ SYM_FUNC_START(__memmove) movb (%rsi), %r11b movb %r11b, (%rdi) 13: - retq + RET SYM_FUNC_END(__memmove) SYM_FUNC_END_ALIAS(memmove) EXPORT_SYMBOL(__memmove) diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 9827ae267f96..d624f2bc42f1 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -40,7 +40,7 @@ SYM_FUNC_START(__memset) movl %edx,%ecx rep stosb movq %r9,%rax - ret + RET SYM_FUNC_END(__memset) SYM_FUNC_END_ALIAS(memset) EXPORT_SYMBOL(memset) @@ -63,7 +63,7 @@ SYM_FUNC_START_LOCAL(memset_erms) movq %rdx,%rcx rep stosb movq %r9,%rax - ret + RET SYM_FUNC_END(memset_erms) SYM_FUNC_START_LOCAL(memset_orig) @@ -125,7 +125,7 @@ SYM_FUNC_START_LOCAL(memset_orig) .Lende: movq %r10,%rax - ret + RET .Lbad_alignment: cmpq $7,%rdx diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S index a2b9caa5274c..ebd259f31496 100644 --- a/arch/x86/lib/msr-reg.S +++ b/arch/x86/lib/msr-reg.S @@ -35,7 +35,7 @@ SYM_FUNC_START(\op\()_safe_regs) movl %edi, 28(%r10) popq %r12 popq %rbx - ret + RET 3: movl $-EIO, %r11d jmp 2b @@ -77,7 +77,7 @@ SYM_FUNC_START(\op\()_safe_regs) popl %esi popl %ebp popl %ebx - ret + RET 3: movl $-EIO, 4(%esp) jmp 2b diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index 0ea344c5ea43..ecb2049c1273 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -52,7 +52,7 @@ SYM_INNER_LABEL(__put_user_nocheck_1, SYM_L_GLOBAL) 1: movb %al,(%_ASM_CX) xor %ecx,%ecx ASM_CLAC - ret + RET SYM_FUNC_END(__put_user_1) EXPORT_SYMBOL(__put_user_1) EXPORT_SYMBOL(__put_user_nocheck_1) @@ -66,7 +66,7 @@ SYM_INNER_LABEL(__put_user_nocheck_2, SYM_L_GLOBAL) 2: movw %ax,(%_ASM_CX) xor %ecx,%ecx ASM_CLAC - ret + RET SYM_FUNC_END(__put_user_2) EXPORT_SYMBOL(__put_user_2) EXPORT_SYMBOL(__put_user_nocheck_2) @@ -80,7 +80,7 @@ SYM_INNER_LABEL(__put_user_nocheck_4, SYM_L_GLOBAL) 3: movl %eax,(%_ASM_CX) xor %ecx,%ecx ASM_CLAC - ret + RET SYM_FUNC_END(__put_user_4) EXPORT_SYMBOL(__put_user_4) EXPORT_SYMBOL(__put_user_nocheck_4) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index cf0b39f97adc..a842866062c8 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -23,7 +23,7 @@ .Ldo_rop_\@: mov %\reg, (%_ASM_SP) UNWIND_HINT_FUNC - ret + RET .endm .macro THUNK reg diff --git a/arch/x86/math-emu/div_Xsig.S b/arch/x86/math-emu/div_Xsig.S index 951da2ad54bb..8c270ab415be 100644 --- a/arch/x86/math-emu/div_Xsig.S +++ b/arch/x86/math-emu/div_Xsig.S @@ -341,7 +341,7 @@ L_exit: popl %esi leave - ret + RET #ifdef PARANOID diff --git a/arch/x86/math-emu/div_small.S b/arch/x86/math-emu/div_small.S index d047d1816abe..637439bfefa4 100644 --- a/arch/x86/math-emu/div_small.S +++ b/arch/x86/math-emu/div_small.S @@ -44,5 +44,5 @@ SYM_FUNC_START(FPU_div_small) popl %esi leave - ret + RET SYM_FUNC_END(FPU_div_small) diff --git a/arch/x86/math-emu/mul_Xsig.S b/arch/x86/math-emu/mul_Xsig.S index 4afc7b1fa6e9..54a031b66142 100644 --- a/arch/x86/math-emu/mul_Xsig.S +++ b/arch/x86/math-emu/mul_Xsig.S @@ -62,7 +62,7 @@ SYM_FUNC_START(mul32_Xsig) popl %esi leave - ret + RET SYM_FUNC_END(mul32_Xsig) @@ -115,7 +115,7 @@ SYM_FUNC_START(mul64_Xsig) popl %esi leave - ret + RET SYM_FUNC_END(mul64_Xsig) @@ -175,5 +175,5 @@ SYM_FUNC_START(mul_Xsig_Xsig) popl %esi leave - ret + RET SYM_FUNC_END(mul_Xsig_Xsig) diff --git a/arch/x86/math-emu/polynom_Xsig.S b/arch/x86/math-emu/polynom_Xsig.S index 702315eecb86..35fd723fc0df 100644 --- a/arch/x86/math-emu/polynom_Xsig.S +++ b/arch/x86/math-emu/polynom_Xsig.S @@ -133,5 +133,5 @@ L_accum_done: popl %edi popl %esi leave - ret + RET SYM_FUNC_END(polynomial_Xsig) diff --git a/arch/x86/math-emu/reg_norm.S b/arch/x86/math-emu/reg_norm.S index cad1d60b1e84..594936eeed67 100644 --- a/arch/x86/math-emu/reg_norm.S +++ b/arch/x86/math-emu/reg_norm.S @@ -72,7 +72,7 @@ L_exit_valid: L_exit: popl %ebx leave - ret + RET L_zero: @@ -138,7 +138,7 @@ L_exit_nuo_valid: popl %ebx leave - ret + RET L_exit_nuo_zero: movl TAG_Zero,%eax @@ -146,5 +146,5 @@ L_exit_nuo_zero: popl %ebx leave - ret + RET SYM_FUNC_END(FPU_normalize_nuo) diff --git a/arch/x86/math-emu/reg_round.S b/arch/x86/math-emu/reg_round.S index 4a9fc3cc5a4d..0bb2a092161a 100644 --- a/arch/x86/math-emu/reg_round.S +++ b/arch/x86/math-emu/reg_round.S @@ -437,7 +437,7 @@ fpu_Arith_exit: popl %edi popl %esi leave - ret + RET /* diff --git a/arch/x86/math-emu/reg_u_add.S b/arch/x86/math-emu/reg_u_add.S index 9c9e2c810afe..07247287a3af 100644 --- a/arch/x86/math-emu/reg_u_add.S +++ b/arch/x86/math-emu/reg_u_add.S @@ -164,6 +164,6 @@ L_exit: popl %edi popl %esi leave - ret + RET #endif /* PARANOID */ SYM_FUNC_END(FPU_u_add) diff --git a/arch/x86/math-emu/reg_u_div.S b/arch/x86/math-emu/reg_u_div.S index e2fb5c2644c5..b5a41e2fc484 100644 --- a/arch/x86/math-emu/reg_u_div.S +++ b/arch/x86/math-emu/reg_u_div.S @@ -468,7 +468,7 @@ L_exit: popl %esi leave - ret + RET #endif /* PARANOID */ SYM_FUNC_END(FPU_u_div) diff --git a/arch/x86/math-emu/reg_u_mul.S b/arch/x86/math-emu/reg_u_mul.S index 0c779c87ac5b..e2588b24b8c2 100644 --- a/arch/x86/math-emu/reg_u_mul.S +++ b/arch/x86/math-emu/reg_u_mul.S @@ -144,7 +144,7 @@ L_exit: popl %edi popl %esi leave - ret + RET #endif /* PARANOID */ SYM_FUNC_END(FPU_u_mul) diff --git a/arch/x86/math-emu/reg_u_sub.S b/arch/x86/math-emu/reg_u_sub.S index e9bb7c248649..4c900c29e4ff 100644 --- a/arch/x86/math-emu/reg_u_sub.S +++ b/arch/x86/math-emu/reg_u_sub.S @@ -270,5 +270,5 @@ L_exit: popl %edi popl %esi leave - ret + RET SYM_FUNC_END(FPU_u_sub) diff --git a/arch/x86/math-emu/round_Xsig.S b/arch/x86/math-emu/round_Xsig.S index d9d7de8dbd7b..126c40473bad 100644 --- a/arch/x86/math-emu/round_Xsig.S +++ b/arch/x86/math-emu/round_Xsig.S @@ -78,7 +78,7 @@ L_exit: popl %esi popl %ebx leave - ret + RET SYM_FUNC_END(round_Xsig) @@ -138,5 +138,5 @@ L_n_exit: popl %esi popl %ebx leave - ret + RET SYM_FUNC_END(norm_Xsig) diff --git a/arch/x86/math-emu/shr_Xsig.S b/arch/x86/math-emu/shr_Xsig.S index 726af985f758..f726bf6f6396 100644 --- a/arch/x86/math-emu/shr_Xsig.S +++ b/arch/x86/math-emu/shr_Xsig.S @@ -45,7 +45,7 @@ SYM_FUNC_START(shr_Xsig) popl %ebx popl %esi leave - ret + RET L_more_than_31: cmpl $64,%ecx @@ -61,7 +61,7 @@ L_more_than_31: movl $0,8(%esi) popl %esi leave - ret + RET L_more_than_63: cmpl $96,%ecx @@ -76,7 +76,7 @@ L_more_than_63: movl %edx,8(%esi) popl %esi leave - ret + RET L_more_than_95: xorl %eax,%eax @@ -85,5 +85,5 @@ L_more_than_95: movl %eax,8(%esi) popl %esi leave - ret + RET SYM_FUNC_END(shr_Xsig) diff --git a/arch/x86/math-emu/wm_shrx.S b/arch/x86/math-emu/wm_shrx.S index 4fc89174caf0..f608a28a4c43 100644 --- a/arch/x86/math-emu/wm_shrx.S +++ b/arch/x86/math-emu/wm_shrx.S @@ -55,7 +55,7 @@ SYM_FUNC_START(FPU_shrx) popl %ebx popl %esi leave - ret + RET L_more_than_31: cmpl $64,%ecx @@ -70,7 +70,7 @@ L_more_than_31: movl $0,4(%esi) popl %esi leave - ret + RET L_more_than_63: cmpl $96,%ecx @@ -84,7 +84,7 @@ L_more_than_63: movl %edx,4(%esi) popl %esi leave - ret + RET L_more_than_95: xorl %eax,%eax @@ -92,7 +92,7 @@ L_more_than_95: movl %eax,4(%esi) popl %esi leave - ret + RET SYM_FUNC_END(FPU_shrx) @@ -146,7 +146,7 @@ SYM_FUNC_START(FPU_shrxs) popl %ebx popl %esi leave - ret + RET /* Shift by [0..31] bits */ Ls_less_than_32: @@ -163,7 +163,7 @@ Ls_less_than_32: popl %ebx popl %esi leave - ret + RET /* Shift by [64..95] bits */ Ls_more_than_63: @@ -189,7 +189,7 @@ Ls_more_than_63: popl %ebx popl %esi leave - ret + RET Ls_more_than_95: /* Shift by [96..inf) bits */ @@ -203,5 +203,5 @@ Ls_more_than_95: popl %ebx popl %esi leave - ret + RET SYM_FUNC_END(FPU_shrxs) diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 17d292b7072f..3d1dba05fce4 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -65,7 +65,7 @@ SYM_FUNC_START(sme_encrypt_execute) movq %rbp, %rsp /* Restore original stack pointer */ pop %rbp - ret + RET SYM_FUNC_END(sme_encrypt_execute) SYM_FUNC_START(__enc_copy) @@ -151,6 +151,6 @@ SYM_FUNC_START(__enc_copy) pop %r12 pop %r15 - ret + RET .L__enc_copy_end: SYM_FUNC_END(__enc_copy) diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S index 09ec84f6ef51..f3cfdb1c9a35 100644 --- a/arch/x86/platform/efi/efi_stub_32.S +++ b/arch/x86/platform/efi/efi_stub_32.S @@ -56,5 +56,5 @@ SYM_FUNC_START(efi_call_svam) movl 16(%esp), %ebx leave - ret + RET SYM_FUNC_END(efi_call_svam) diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 90380a17ab23..2206b8bc47b8 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -23,5 +23,5 @@ SYM_FUNC_START(__efi_call) mov %rsi, %rcx CALL_NOSPEC rdi leave - ret + RET SYM_FUNC_END(__efi_call) diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index fd3dd1708eba..f2a8eec69f8f 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -63,7 +63,7 @@ SYM_CODE_START(__efi64_thunk) 1: movq 24(%rsp), %rsp pop %rbx pop %rbp - retq + RET .code32 2: pushl $__KERNEL_CS diff --git a/arch/x86/platform/olpc/xo1-wakeup.S b/arch/x86/platform/olpc/xo1-wakeup.S index 75f4faff8468..3a5abffe5660 100644 --- a/arch/x86/platform/olpc/xo1-wakeup.S +++ b/arch/x86/platform/olpc/xo1-wakeup.S @@ -77,7 +77,7 @@ save_registers: pushfl popl saved_context_eflags - ret + RET restore_registers: movl saved_context_ebp, %ebp @@ -88,7 +88,7 @@ restore_registers: pushl saved_context_eflags popfl - ret + RET SYM_CODE_START(do_olpc_suspend_lowlevel) call save_processor_state @@ -109,7 +109,7 @@ ret_point: call restore_registers call restore_processor_state - ret + RET SYM_CODE_END(do_olpc_suspend_lowlevel) .data diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S index 8786653ad3c0..5606a15cf9a1 100644 --- a/arch/x86/power/hibernate_asm_32.S +++ b/arch/x86/power/hibernate_asm_32.S @@ -32,7 +32,7 @@ SYM_FUNC_START(swsusp_arch_suspend) FRAME_BEGIN call swsusp_save FRAME_END - ret + RET SYM_FUNC_END(swsusp_arch_suspend) SYM_CODE_START(restore_image) @@ -108,5 +108,5 @@ SYM_FUNC_START(restore_registers) /* tell the hibernation core that we've just restored the memory */ movl %eax, in_suspend - ret + RET SYM_FUNC_END(restore_registers) diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index d9bed596d849..0a0539e1cc81 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -66,7 +66,7 @@ SYM_FUNC_START(restore_registers) /* tell the hibernation core that we've just restored the memory */ movq %rax, in_suspend(%rip) - ret + RET SYM_FUNC_END(restore_registers) SYM_FUNC_START(swsusp_arch_suspend) @@ -96,7 +96,7 @@ SYM_FUNC_START(swsusp_arch_suspend) FRAME_BEGIN call swsusp_save FRAME_END - ret + RET SYM_FUNC_END(swsusp_arch_suspend) SYM_FUNC_START(restore_image) diff --git a/arch/x86/um/checksum_32.S b/arch/x86/um/checksum_32.S index 13f118dec74f..aed782ab7721 100644 --- a/arch/x86/um/checksum_32.S +++ b/arch/x86/um/checksum_32.S @@ -110,7 +110,7 @@ csum_partial: 7: popl %ebx popl %esi - ret + RET #else @@ -208,7 +208,7 @@ csum_partial: 80: popl %ebx popl %esi - ret + RET #endif EXPORT_SYMBOL(csum_partial) diff --git a/arch/x86/um/setjmp_32.S b/arch/x86/um/setjmp_32.S index 62eaf8c80e04..2d991ddbcca5 100644 --- a/arch/x86/um/setjmp_32.S +++ b/arch/x86/um/setjmp_32.S @@ -34,7 +34,7 @@ kernel_setjmp: movl %esi,12(%edx) movl %edi,16(%edx) movl %ecx,20(%edx) # Return address - ret + RET .size kernel_setjmp,.-kernel_setjmp diff --git a/arch/x86/um/setjmp_64.S b/arch/x86/um/setjmp_64.S index 1b5d40d4ff46..b46acb6a8ebd 100644 --- a/arch/x86/um/setjmp_64.S +++ b/arch/x86/um/setjmp_64.S @@ -33,7 +33,7 @@ kernel_setjmp: movq %r14,40(%rdi) movq %r15,48(%rdi) movq %rsi,56(%rdi) # Return address - ret + RET .size kernel_setjmp,.-kernel_setjmp diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 444d824775f6..e730e6200e64 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -29,7 +29,7 @@ */ SYM_FUNC_START(xen_irq_disable_direct) movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask - ret + RET SYM_FUNC_END(xen_irq_disable_direct) /* @@ -58,7 +58,7 @@ SYM_FUNC_START(check_events) pop %rcx pop %rax FRAME_END - ret + RET SYM_FUNC_END(check_events) /* @@ -84,7 +84,7 @@ SYM_FUNC_START(xen_irq_enable_direct) call check_events 1: FRAME_END - ret + RET SYM_FUNC_END(xen_irq_enable_direct) /* @@ -100,7 +100,7 @@ SYM_FUNC_START(xen_save_fl_direct) testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask setz %ah addb %ah, %ah - ret + RET SYM_FUNC_END(xen_save_fl_direct) SYM_FUNC_START(xen_read_cr2) @@ -108,14 +108,14 @@ SYM_FUNC_START(xen_read_cr2) _ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX _ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX FRAME_END - ret + RET SYM_FUNC_END(xen_read_cr2); SYM_FUNC_START(xen_read_cr2_direct) FRAME_BEGIN _ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX FRAME_END - ret + RET SYM_FUNC_END(xen_read_cr2_direct); .popsection diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 6a64496edefb..11d286529fe5 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -26,7 +26,7 @@ SYM_CODE_START(hypercall_page) .rept (PAGE_SIZE / 32) UNWIND_HINT_FUNC .skip 31, 0x90 - ret + RET .endr #define HYPERCALL(n) \ -- cgit v1.2.3 From b17c2baa305cccbd16bafa289fd743cc2db77966 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 4 Dec 2021 14:43:41 +0100 Subject: x86: Prepare inline-asm for straight-line-speculation Replace all ret/retq instructions with ASM_RET in preparation of making it more than a single instruction. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211204134907.964635458@infradead.org --- arch/x86/include/asm/linkage.h | 4 ++++ arch/x86/include/asm/paravirt.h | 2 +- arch/x86/include/asm/qspinlock_paravirt.h | 4 ++-- arch/x86/kernel/alternative.c | 2 +- arch/x86/kernel/kprobes/core.c | 2 +- arch/x86/kernel/paravirt.c | 4 ++-- arch/x86/kvm/emulate.c | 4 ++-- arch/x86/lib/error-inject.c | 3 ++- samples/ftrace/ftrace-direct-modify.c | 4 ++-- samples/ftrace/ftrace-direct-too.c | 2 +- samples/ftrace/ftrace-direct.c | 2 +- 11 files changed, 19 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 365111789cc6..ebddec2f3ba8 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -18,6 +18,10 @@ #define __ALIGN_STR __stringify(__ALIGN) #endif +#else /* __ASSEMBLY__ */ + +#define ASM_RET "ret\n\t" + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_LINKAGE_H */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 21c4a694ca11..ce1148c5620b 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -671,7 +671,7 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu); "call " #func ";" \ PV_RESTORE_ALL_CALLER_REGS \ FRAME_END \ - "ret;" \ + ASM_RET \ ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \ ".popsection") diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 159622ee0674..1474cf96251d 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -48,7 +48,7 @@ asm (".pushsection .text;" "jne .slowpath;" "pop %rdx;" FRAME_END - "ret;" + ASM_RET ".slowpath: " "push %rsi;" "movzbl %al,%esi;" @@ -56,7 +56,7 @@ asm (".pushsection .text;" "pop %rsi;" "pop %rdx;" FRAME_END - "ret;" + ASM_RET ".size " PV_UNLOCK ", .-" PV_UNLOCK ";" ".popsection"); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 23fb4d51a5da..175cde66a1ae 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -714,7 +714,7 @@ asm ( " .type int3_magic, @function\n" "int3_magic:\n" " movl $1, (%" _ASM_ARG1 ")\n" -" ret\n" + ASM_RET " .size int3_magic, .-int3_magic\n" " .popsection\n" ); diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index fce99e249d61..6290712cb36d 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1051,7 +1051,7 @@ asm( " addl $4, %esp\n" " popfl\n" #endif - " ret\n" + ASM_RET ".size __kretprobe_trampoline, .-__kretprobe_trampoline\n" ); NOKPROBE_SYMBOL(__kretprobe_trampoline); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 7f7636aac620..4420499f7bb4 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -41,7 +41,7 @@ extern void _paravirt_nop(void); asm (".pushsection .entry.text, \"ax\"\n" ".global _paravirt_nop\n" "_paravirt_nop:\n\t" - "ret\n\t" + ASM_RET ".size _paravirt_nop, . - _paravirt_nop\n\t" ".type _paravirt_nop, @function\n\t" ".popsection"); @@ -51,7 +51,7 @@ asm (".pushsection .entry.text, \"ax\"\n" ".global paravirt_ret0\n" "paravirt_ret0:\n\t" "xor %" _ASM_AX ", %" _ASM_AX ";\n\t" - "ret\n\t" + ASM_RET ".size paravirt_ret0, . - paravirt_ret0\n\t" ".type paravirt_ret0, @function\n\t" ".popsection"); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 28b1a4e57827..b026350c04c7 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -315,7 +315,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); __FOP_FUNC(#name) #define __FOP_RET(name) \ - "ret \n\t" \ + ASM_RET \ ".size " name ", .-" name "\n\t" #define FOP_RET(name) \ @@ -435,7 +435,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); __FOP_RET(#op) asm(".pushsection .fixup, \"ax\"\n" - "kvm_fastop_exception: xor %esi, %esi; ret\n" + "kvm_fastop_exception: xor %esi, %esi; " ASM_RET ".popsection"); FOP_START(setcc) diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c index be5b5fb1598b..520897061ee0 100644 --- a/arch/x86/lib/error-inject.c +++ b/arch/x86/lib/error-inject.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include @@ -10,7 +11,7 @@ asm( ".type just_return_func, @function\n" ".globl just_return_func\n" "just_return_func:\n" - " ret\n" + ASM_RET ".size just_return_func, .-just_return_func\n" ); diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c index 690e4a9ff333..bc8f0d6cd6f8 100644 --- a/samples/ftrace/ftrace-direct-modify.c +++ b/samples/ftrace/ftrace-direct-modify.c @@ -31,7 +31,7 @@ asm ( " call my_direct_func1\n" " leave\n" " .size my_tramp1, .-my_tramp1\n" -" ret\n" + ASM_RET " .type my_tramp2, @function\n" " .globl my_tramp2\n" " my_tramp2:" @@ -39,7 +39,7 @@ asm ( " movq %rsp, %rbp\n" " call my_direct_func2\n" " leave\n" -" ret\n" + ASM_RET " .size my_tramp2, .-my_tramp2\n" " .popsection\n" ); diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c index 6e0de725bf22..d1bec1cded58 100644 --- a/samples/ftrace/ftrace-direct-too.c +++ b/samples/ftrace/ftrace-direct-too.c @@ -31,7 +31,7 @@ asm ( " popq %rsi\n" " popq %rdi\n" " leave\n" -" ret\n" + ASM_RET " .size my_tramp, .-my_tramp\n" " .popsection\n" ); diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c index a30aa42ec76a..51312e0ae2b1 100644 --- a/samples/ftrace/ftrace-direct.c +++ b/samples/ftrace/ftrace-direct.c @@ -25,7 +25,7 @@ asm ( " call my_direct_func\n" " popq %rdi\n" " leave\n" -" ret\n" + ASM_RET " .size my_tramp, .-my_tramp\n" " .popsection\n" ); -- cgit v1.2.3 From 1cc1e4c8aab4213bd4e6353dec2620476a233d6d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 4 Dec 2021 14:43:42 +0100 Subject: objtool: Add straight-line-speculation validation Teach objtool to validate the straight-line-speculation constraints: - speculation trap after indirect calls - speculation trap after RET Notable: when an instruction is annotated RETPOLINE_SAFE, indicating speculation isn't a problem, also don't care about sls for that instruction. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211204134908.023037659@infradead.org --- tools/objtool/arch/x86/decode.c | 13 +++++++++---- tools/objtool/builtin-check.c | 3 ++- tools/objtool/check.c | 14 ++++++++++++++ tools/objtool/include/objtool/arch.h | 1 + tools/objtool/include/objtool/builtin.h | 2 +- 5 files changed, 27 insertions(+), 6 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 4d6d7fc13255..c10ef78df050 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -531,6 +531,11 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec } break; + case 0xcc: + /* int3 */ + *type = INSN_TRAP; + break; + case 0xe3: /* jecxz/jrcxz */ *type = INSN_JUMP_CONDITIONAL; @@ -697,10 +702,10 @@ const char *arch_ret_insn(int len) { static const char ret[5][5] = { { BYTE_RET }, - { BYTE_RET, BYTES_NOP1 }, - { BYTE_RET, BYTES_NOP2 }, - { BYTE_RET, BYTES_NOP3 }, - { BYTE_RET, BYTES_NOP4 }, + { BYTE_RET, 0xcc }, + { BYTE_RET, 0xcc, BYTES_NOP1 }, + { BYTE_RET, 0xcc, BYTES_NOP2 }, + { BYTE_RET, 0xcc, BYTES_NOP3 }, }; if (len < 1 || len > 5) { diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 8b38b5d6fec7..38070f26105b 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -20,7 +20,7 @@ #include bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, - validate_dup, vmlinux, mcount, noinstr, backup; + validate_dup, vmlinux, mcount, noinstr, backup, sls; static const char * const check_usage[] = { "objtool check [] file.o", @@ -45,6 +45,7 @@ const struct option check_options[] = { OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"), OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"), OPT_BOOLEAN('B', "backup", &backup, "create .orig files before modification"), + OPT_BOOLEAN('S', "sls", &sls, "validate straight-line-speculation"), OPT_END(), }; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 21735829b860..e28172f6e792 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3084,6 +3084,12 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, switch (insn->type) { case INSN_RETURN: + if (next_insn && next_insn->type == INSN_TRAP) { + next_insn->ignore = true; + } else if (sls && !insn->retpoline_safe) { + WARN_FUNC("missing int3 after ret", + insn->sec, insn->offset); + } return validate_return(func, insn, &state); case INSN_CALL: @@ -3127,6 +3133,14 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; case INSN_JUMP_DYNAMIC: + if (next_insn && next_insn->type == INSN_TRAP) { + next_insn->ignore = true; + } else if (sls && !insn->retpoline_safe) { + WARN_FUNC("missing int3 after indirect jump", + insn->sec, insn->offset); + } + + /* fallthrough */ case INSN_JUMP_DYNAMIC_CONDITIONAL: if (is_sibling_call(insn)) { ret = validate_sibling_call(file, insn, &state); diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 589ff58426ab..76bae3078286 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -26,6 +26,7 @@ enum insn_type { INSN_CLAC, INSN_STD, INSN_CLD, + INSN_TRAP, INSN_OTHER, }; diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 15ac0b7d3d6a..89ba869ed08f 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -9,7 +9,7 @@ extern const struct option check_options[]; extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, - validate_dup, vmlinux, mcount, noinstr, backup; + validate_dup, vmlinux, mcount, noinstr, backup, sls; extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]); -- cgit v1.2.3 From 26c44b776dba4ac692a0bf5a3836feb8a63fea6b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 4 Dec 2021 14:43:43 +0100 Subject: x86/alternative: Relax text_poke_bp() constraint Currently, text_poke_bp() is very strict to only allow patching a single instruction; however with straight-line-speculation it will be required to patch: ret; int3, which is two instructions. As such, relax the constraints a little to allow int3 padding for all instructions that do not imply the execution of the next instruction, ie: RET, JMP.d8 and JMP.d32. While there, rename the text_poke_loc::rel32 field to ::disp. Note: this fills up the text_poke_loc structure which is now a round 16 bytes big. [ bp: Put comments ontop instead of on the side. ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211204134908.082342723@infradead.org --- arch/x86/kernel/alternative.c | 49 ++++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 175cde66a1ae..5007c3ffe96f 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1113,10 +1113,13 @@ void text_poke_sync(void) } struct text_poke_loc { - s32 rel_addr; /* addr := _stext + rel_addr */ - s32 rel32; + /* addr := _stext + rel_addr */ + s32 rel_addr; + s32 disp; + u8 len; u8 opcode; const u8 text[POKE_MAX_OPCODE_SIZE]; + /* see text_poke_bp_batch() */ u8 old; }; @@ -1131,7 +1134,8 @@ static struct bp_patching_desc *bp_desc; static __always_inline struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp) { - struct bp_patching_desc *desc = __READ_ONCE(*descp); /* rcu_dereference */ + /* rcu_dereference */ + struct bp_patching_desc *desc = __READ_ONCE(*descp); if (!desc || !arch_atomic_inc_not_zero(&desc->refs)) return NULL; @@ -1165,7 +1169,7 @@ noinstr int poke_int3_handler(struct pt_regs *regs) { struct bp_patching_desc *desc; struct text_poke_loc *tp; - int len, ret = 0; + int ret = 0; void *ip; if (user_mode(regs)) @@ -1205,8 +1209,7 @@ noinstr int poke_int3_handler(struct pt_regs *regs) goto out_put; } - len = text_opcode_size(tp->opcode); - ip += len; + ip += tp->len; switch (tp->opcode) { case INT3_INSN_OPCODE: @@ -1221,12 +1224,12 @@ noinstr int poke_int3_handler(struct pt_regs *regs) break; case CALL_INSN_OPCODE: - int3_emulate_call(regs, (long)ip + tp->rel32); + int3_emulate_call(regs, (long)ip + tp->disp); break; case JMP32_INSN_OPCODE: case JMP8_INSN_OPCODE: - int3_emulate_jmp(regs, (long)ip + tp->rel32); + int3_emulate_jmp(regs, (long)ip + tp->disp); break; default: @@ -1301,7 +1304,7 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries */ for (do_sync = 0, i = 0; i < nr_entries; i++) { u8 old[POKE_MAX_OPCODE_SIZE] = { tp[i].old, }; - int len = text_opcode_size(tp[i].opcode); + int len = tp[i].len; if (len - INT3_INSN_SIZE > 0) { memcpy(old + INT3_INSN_SIZE, @@ -1378,20 +1381,36 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, const void *opcode, size_t len, const void *emulate) { struct insn insn; - int ret; + int ret, i; memcpy((void *)tp->text, opcode, len); if (!emulate) emulate = opcode; ret = insn_decode_kernel(&insn, emulate); - BUG_ON(ret < 0); - BUG_ON(len != insn.length); tp->rel_addr = addr - (void *)_stext; + tp->len = len; tp->opcode = insn.opcode.bytes[0]; + switch (tp->opcode) { + case RET_INSN_OPCODE: + case JMP32_INSN_OPCODE: + case JMP8_INSN_OPCODE: + /* + * Control flow instructions without implied execution of the + * next instruction can be padded with INT3. + */ + for (i = insn.length; i < len; i++) + BUG_ON(tp->text[i] != INT3_INSN_OPCODE); + break; + + default: + BUG_ON(len != insn.length); + }; + + switch (tp->opcode) { case INT3_INSN_OPCODE: case RET_INSN_OPCODE: @@ -1400,7 +1419,7 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, case CALL_INSN_OPCODE: case JMP32_INSN_OPCODE: case JMP8_INSN_OPCODE: - tp->rel32 = insn.immediate.value; + tp->disp = insn.immediate.value; break; default: /* assume NOP */ @@ -1408,13 +1427,13 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, case 2: /* NOP2 -- emulate as JMP8+0 */ BUG_ON(memcmp(emulate, x86_nops[len], len)); tp->opcode = JMP8_INSN_OPCODE; - tp->rel32 = 0; + tp->disp = 0; break; case 5: /* NOP5 -- emulate as JMP32+0 */ BUG_ON(memcmp(emulate, x86_nops[len], len)); tp->opcode = JMP32_INSN_OPCODE; - tp->rel32 = 0; + tp->disp = 0; break; default: /* unknown instruction */ -- cgit v1.2.3 From e463a09af2f0677b9485a7e8e4e70b396b2ffb6f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 4 Dec 2021 14:43:44 +0100 Subject: x86: Add straight-line-speculation mitigation Make use of an upcoming GCC feature to mitigate straight-line-speculation for x86: https://gcc.gnu.org/g:53a643f8568067d7700a9f2facc8ba39974973d3 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102952 https://bugs.llvm.org/show_bug.cgi?id=52323 It's built tested on x86_64-allyesconfig using GCC-12 and GCC-11. Maintenance overhead of this should be fairly low due to objtool validation. Size overhead of all these additional int3 instructions comes to: text data bss dec hex filename 22267751 6933356 2011368 31212475 1dc43bb defconfig-build/vmlinux 22804126 6933356 1470696 31208178 1dc32f2 defconfig-build/vmlinux.sls Or roughly 2.4% additional text. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211204134908.140103474@infradead.org --- arch/x86/Kconfig | 12 ++++++++++++ arch/x86/Makefile | 4 ++++ arch/x86/include/asm/linkage.h | 10 ++++++++++ arch/x86/include/asm/static_call.h | 2 +- arch/x86/kernel/ftrace.c | 2 +- arch/x86/kernel/static_call.c | 5 +++-- arch/x86/lib/memmove_64.S | 2 +- arch/x86/lib/retpoline.S | 2 +- scripts/Makefile.build | 3 ++- scripts/link-vmlinux.sh | 3 +++ 10 files changed, 38 insertions(+), 7 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7399327d1eff..dd13ba82ce7a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -472,6 +472,18 @@ config RETPOLINE branches. Requires a compiler with -mindirect-branch=thunk-extern support for full protection. The kernel may run slower. +config CC_HAS_SLS + def_bool $(cc-option,-mharden-sls=all) + +config SLS + bool "Mitigate Straight-Line-Speculation" + depends on CC_HAS_SLS && X86_64 + default n + help + Compile the kernel with straight-line-speculation options to guard + against straight line speculation. The kernel image might be slightly + larger. + config X86_CPU_RESCTRL bool "x86 CPU resource control support" depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index c38b6577c103..e84cdd409b64 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -191,6 +191,10 @@ ifdef CONFIG_RETPOLINE endif endif +ifdef CONFIG_SLS + KBUILD_CFLAGS += -mharden-sls=all +endif + KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE) ifdef CONFIG_LTO_CLANG diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index ebddec2f3ba8..030907922bd0 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -18,9 +18,19 @@ #define __ALIGN_STR __stringify(__ALIGN) #endif +#ifdef CONFIG_SLS +#define RET ret; int3 +#else +#define RET ret +#endif + #else /* __ASSEMBLY__ */ +#ifdef CONFIG_SLS +#define ASM_RET "ret; int3\n\t" +#else #define ASM_RET "ret\n\t" +#endif #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index 39ebe0511869..ed4f8bb6c2d9 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -36,7 +36,7 @@ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)") #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ - __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; nop; nop; nop; nop") + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop") #define ARCH_ADD_TRAMP_KEY(name) \ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index c39f906cdc4e..7cc540e6de0c 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -303,7 +303,7 @@ union ftrace_op_code_union { } __attribute__((packed)); }; -#define RET_SIZE 1 +#define RET_SIZE 1 + IS_ENABLED(CONFIG_SLS) static unsigned long create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 9c407a33a774..531fb4cbb63f 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -17,6 +17,8 @@ enum insn_type { */ static const u8 xor5rax[] = { 0x66, 0x66, 0x48, 0x31, 0xc0 }; +static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc }; + static void __ref __static_call_transform(void *insn, enum insn_type type, void *func) { const void *emulate = NULL; @@ -42,8 +44,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void break; case RET: - code = text_gen_insn(RET_INSN_OPCODE, insn, func); - size = RET_INSN_SIZE; + code = &retinsn; break; } diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index e84d649620c4..50ea390df712 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -40,7 +40,7 @@ SYM_FUNC_START(__memmove) /* FSRM implies ERMS => no length checks, do the copy directly */ .Lmemmove_begin_forward: ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM - ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; RET", X86_FEATURE_ERMS + ALTERNATIVE "", __stringify(movq %rdx, %rcx; rep movsb; RET), X86_FEATURE_ERMS /* * movsq instruction have many startup latency diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index a842866062c8..89b3fb244e15 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -34,7 +34,7 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_AMD .endm diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 78656b527fe5..a4b89b757287 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -234,7 +234,8 @@ objtool_args = \ $(if $(CONFIG_GCOV_KERNEL)$(CONFIG_LTO_CLANG), --no-unreachable)\ $(if $(CONFIG_RETPOLINE), --retpoline) \ $(if $(CONFIG_X86_SMAP), --uaccess) \ - $(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount) + $(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount) \ + $(if $(CONFIG_SLS), --sls) cmd_objtool = $(if $(objtool-enabled), ; $(objtool) $(objtool_args) $@) cmd_gen_objtooldep = $(if $(objtool-enabled), { echo ; echo '$@: $$(wildcard $(objtool))' ; } >> $(dot-target).cmd) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 5cdd9bc5c385..9716f285e404 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -139,6 +139,9 @@ objtool_link() if [ -n "${CONFIG_X86_SMAP}" ]; then objtoolopt="${objtoolopt} --uaccess" fi + if [ -n "${CONFIG_SLS}" ]; then + objtoolopt="${objtoolopt} --sls" + fi info OBJTOOL ${1} tools/objtool/objtool ${objtoolcmd} ${objtoolopt} ${1} fi -- cgit v1.2.3 From bff8c3848e071d387d8b0784dc91fa49cd563774 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:03 +0100 Subject: bitfield.h: Fix "type of reg too small for mask" test The test: 'mask > (typeof(_reg))~0ull' only works correctly when both sides are unsigned, consider: - 0xff000000 vs (int)~0ull - 0x000000ff vs (int)~0ull Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211110101324.950210584@infradead.org --- include/linux/bitfield.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 4e035aca6f7e..6093fa6db260 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -41,6 +41,22 @@ #define __bf_shf(x) (__builtin_ffsll(x) - 1) +#define __scalar_type_to_unsigned_cases(type) \ + unsigned type: (unsigned type)0, \ + signed type: (unsigned type)0 + +#define __unsigned_scalar_typeof(x) typeof( \ + _Generic((x), \ + char: (unsigned char)0, \ + __scalar_type_to_unsigned_cases(char), \ + __scalar_type_to_unsigned_cases(short), \ + __scalar_type_to_unsigned_cases(int), \ + __scalar_type_to_unsigned_cases(long), \ + __scalar_type_to_unsigned_cases(long long), \ + default: (x))) + +#define __bf_cast_unsigned(type, x) ((__unsigned_scalar_typeof(type))(x)) + #define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx) \ ({ \ BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \ @@ -49,7 +65,8 @@ BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ _pfx "value too large for the field"); \ - BUILD_BUG_ON_MSG((_mask) > (typeof(_reg))~0ull, \ + BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \ + __bf_cast_unsigned(_reg, ~0ull), \ _pfx "type of reg too small for mask"); \ __BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \ (1ULL << __bf_shf(_mask))); \ -- cgit v1.2.3 From c6dbd3e5e69cf3ca47a3864115d4cbdd44619243 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Nov 2021 17:46:39 +0100 Subject: x86/mmx_32: Remove X86_USE_3DNOW This code puts an exception table entry on the PREFETCH instruction to overwrite it with a JMP.d8 when it triggers an exception. Except of course, our code is no longer writable, also SMP. Instead of fixing this broken mess, simply take it out. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Borislav Petkov Link: https://lkml.kernel.org/r/YZKQzUmeNuwyvZpk@hirez.programming.kicks-ass.net --- arch/x86/Kconfig | 2 +- arch/x86/Kconfig.cpu | 4 - arch/x86/include/asm/mmx.h | 15 - arch/x86/include/asm/page_32.h | 14 - arch/x86/include/asm/required-features.h | 4 - arch/x86/include/asm/string_32.h | 33 --- arch/x86/lib/Makefile | 1 - arch/x86/lib/memcpy_32.c | 4 - arch/x86/lib/mmx_32.c | 388 ------------------------- arch/x86/lib/usercopy_32.c | 1 - tools/arch/x86/include/asm/required-features.h | 4 - 11 files changed, 1 insertion(+), 469 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index dd13ba82ce7a..0c9c680fd15a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1957,7 +1957,7 @@ config EFI config EFI_STUB bool "EFI stub support" - depends on EFI && !X86_USE_3DNOW + depends on EFI depends on $(cc-option,-mabi=ms) || X86_32 select RELOCATABLE help diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index eefc434351db..542377cd419d 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -342,10 +342,6 @@ config X86_USE_PPRO_CHECKSUM def_bool y depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM -config X86_USE_3DNOW - def_bool y - depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML - # # P6_NOPs are a relatively minor optimization that require a family >= # 6 processor, except that it is broken on certain VIA chips. diff --git a/arch/x86/include/asm/mmx.h b/arch/x86/include/asm/mmx.h index f572d0f944bb..e69de29bb2d1 100644 --- a/arch/x86/include/asm/mmx.h +++ b/arch/x86/include/asm/mmx.h @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_MMX_H -#define _ASM_X86_MMX_H - -/* - * MMX 3Dnow! helper operations - */ - -#include - -extern void *_mmx_memcpy(void *to, const void *from, size_t size); -extern void mmx_clear_page(void *page); -extern void mmx_copy_page(void *to, void *from); - -#endif /* _ASM_X86_MMX_H */ diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h index b13f8488ac85..df42f8aa99e4 100644 --- a/arch/x86/include/asm/page_32.h +++ b/arch/x86/include/asm/page_32.h @@ -19,19 +19,6 @@ extern unsigned long __phys_addr(unsigned long); #define pfn_valid(pfn) ((pfn) < max_mapnr) #endif /* CONFIG_FLATMEM */ -#ifdef CONFIG_X86_USE_3DNOW -#include - -static inline void clear_page(void *page) -{ - mmx_clear_page(page); -} - -static inline void copy_page(void *to, void *from) -{ - mmx_copy_page(to, from); -} -#else /* !CONFIG_X86_USE_3DNOW */ #include static inline void clear_page(void *page) @@ -43,7 +30,6 @@ static inline void copy_page(void *to, void *from) { memcpy(to, from, PAGE_SIZE); } -#endif /* CONFIG_X86_USE_3DNOW */ #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PAGE_32_H */ diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index b2d504f11937..aff774775c67 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -35,11 +35,7 @@ # define NEED_CMOV 0 #endif -#ifdef CONFIG_X86_USE_3DNOW -# define NEED_3DNOW (1<<(X86_FEATURE_3DNOW & 31)) -#else # define NEED_3DNOW 0 -#endif #if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64) # define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31)) diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index f74362b05619..32c0d981a82a 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h @@ -146,42 +146,9 @@ static __always_inline void *__constant_memcpy(void *to, const void *from, extern void *memcpy(void *, const void *, size_t); #ifndef CONFIG_FORTIFY_SOURCE -#ifdef CONFIG_X86_USE_3DNOW - -#include - -/* - * This CPU favours 3DNow strongly (eg AMD Athlon) - */ - -static inline void *__constant_memcpy3d(void *to, const void *from, size_t len) -{ - if (len < 512) - return __constant_memcpy(to, from, len); - return _mmx_memcpy(to, from, len); -} - -static inline void *__memcpy3d(void *to, const void *from, size_t len) -{ - if (len < 512) - return __memcpy(to, from, len); - return _mmx_memcpy(to, from, len); -} - -#define memcpy(t, f, n) \ - (__builtin_constant_p((n)) \ - ? __constant_memcpy3d((t), (f), (n)) \ - : __memcpy3d((t), (f), (n))) - -#else - -/* - * No 3D Now! - */ #define memcpy(t, f, n) __builtin_memcpy(t, f, n) -#endif #endif /* !CONFIG_FORTIFY_SOURCE */ #define __HAVE_ARCH_MEMMOVE diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index c6506c6a7092..f76747862bd2 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -63,7 +63,6 @@ ifeq ($(CONFIG_X86_32),y) ifneq ($(CONFIG_X86_CMPXCHG64),y) lib-y += cmpxchg8b_emu.o atomic64_386_32.o endif - lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o else obj-y += iomap_copy_64.o lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c index e565d1c9019e..3a6e6cfe8c35 100644 --- a/arch/x86/lib/memcpy_32.c +++ b/arch/x86/lib/memcpy_32.c @@ -7,11 +7,7 @@ __visible void *memcpy(void *to, const void *from, size_t n) { -#if defined(CONFIG_X86_USE_3DNOW) && !defined(CONFIG_FORTIFY_SOURCE) - return __memcpy3d(to, from, n); -#else return __memcpy(to, from, n); -#endif } EXPORT_SYMBOL(memcpy); diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c index cc5f4ea943d3..e69de29bb2d1 100644 --- a/arch/x86/lib/mmx_32.c +++ b/arch/x86/lib/mmx_32.c @@ -1,388 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * MMX 3DNow! library helper functions - * - * To do: - * We can use MMX just for prefetch in IRQ's. This may be a win. - * (reported so on K6-III) - * We should use a better code neutral filler for the short jump - * leal ebx. [ebx] is apparently best for K6-2, but Cyrix ?? - * We also want to clobber the filler register so we don't get any - * register forwarding stalls on the filler. - * - * Add *user handling. Checksums are not a win with MMX on any CPU - * tested so far for any MMX solution figured. - * - * 22/09/2000 - Arjan van de Ven - * Improved for non-engineering-sample Athlons - * - */ -#include -#include -#include -#include -#include - -#include -#include - -/* - * Use KFPU_387. MMX instructions are not affected by MXCSR, - * but both AMD and Intel documentation states that even integer MMX - * operations will result in #MF if an exception is pending in FCW. - * - * EMMS is not needed afterwards because, after calling kernel_fpu_end(), - * any subsequent user of the 387 stack will reinitialize it using - * KFPU_387. - */ - -void *_mmx_memcpy(void *to, const void *from, size_t len) -{ - void *p; - int i; - - if (unlikely(in_interrupt())) - return __memcpy(to, from, len); - - p = to; - i = len >> 6; /* len/64 */ - - kernel_fpu_begin_mask(KFPU_387); - - __asm__ __volatile__ ( - "1: prefetch (%0)\n" /* This set is 28 bytes */ - " prefetch 64(%0)\n" - " prefetch 128(%0)\n" - " prefetch 192(%0)\n" - " prefetch 256(%0)\n" - "2: \n" - ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : : "r" (from)); - - for ( ; i > 5; i--) { - __asm__ __volatile__ ( - "1: prefetch 320(%0)\n" - "2: movq (%0), %%mm0\n" - " movq 8(%0), %%mm1\n" - " movq 16(%0), %%mm2\n" - " movq 24(%0), %%mm3\n" - " movq %%mm0, (%1)\n" - " movq %%mm1, 8(%1)\n" - " movq %%mm2, 16(%1)\n" - " movq %%mm3, 24(%1)\n" - " movq 32(%0), %%mm0\n" - " movq 40(%0), %%mm1\n" - " movq 48(%0), %%mm2\n" - " movq 56(%0), %%mm3\n" - " movq %%mm0, 32(%1)\n" - " movq %%mm1, 40(%1)\n" - " movq %%mm2, 48(%1)\n" - " movq %%mm3, 56(%1)\n" - ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : : "r" (from), "r" (to) : "memory"); - - from += 64; - to += 64; - } - - for ( ; i > 0; i--) { - __asm__ __volatile__ ( - " movq (%0), %%mm0\n" - " movq 8(%0), %%mm1\n" - " movq 16(%0), %%mm2\n" - " movq 24(%0), %%mm3\n" - " movq %%mm0, (%1)\n" - " movq %%mm1, 8(%1)\n" - " movq %%mm2, 16(%1)\n" - " movq %%mm3, 24(%1)\n" - " movq 32(%0), %%mm0\n" - " movq 40(%0), %%mm1\n" - " movq 48(%0), %%mm2\n" - " movq 56(%0), %%mm3\n" - " movq %%mm0, 32(%1)\n" - " movq %%mm1, 40(%1)\n" - " movq %%mm2, 48(%1)\n" - " movq %%mm3, 56(%1)\n" - : : "r" (from), "r" (to) : "memory"); - - from += 64; - to += 64; - } - /* - * Now do the tail of the block: - */ - __memcpy(to, from, len & 63); - kernel_fpu_end(); - - return p; -} -EXPORT_SYMBOL(_mmx_memcpy); - -#ifdef CONFIG_MK7 - -/* - * The K7 has streaming cache bypass load/store. The Cyrix III, K6 and - * other MMX using processors do not. - */ - -static void fast_clear_page(void *page) -{ - int i; - - kernel_fpu_begin_mask(KFPU_387); - - __asm__ __volatile__ ( - " pxor %%mm0, %%mm0\n" : : - ); - - for (i = 0; i < 4096/64; i++) { - __asm__ __volatile__ ( - " movntq %%mm0, (%0)\n" - " movntq %%mm0, 8(%0)\n" - " movntq %%mm0, 16(%0)\n" - " movntq %%mm0, 24(%0)\n" - " movntq %%mm0, 32(%0)\n" - " movntq %%mm0, 40(%0)\n" - " movntq %%mm0, 48(%0)\n" - " movntq %%mm0, 56(%0)\n" - : : "r" (page) : "memory"); - page += 64; - } - - /* - * Since movntq is weakly-ordered, a "sfence" is needed to become - * ordered again: - */ - __asm__ __volatile__("sfence\n"::); - - kernel_fpu_end(); -} - -static void fast_copy_page(void *to, void *from) -{ - int i; - - kernel_fpu_begin_mask(KFPU_387); - - /* - * maybe the prefetch stuff can go before the expensive fnsave... - * but that is for later. -AV - */ - __asm__ __volatile__( - "1: prefetch (%0)\n" - " prefetch 64(%0)\n" - " prefetch 128(%0)\n" - " prefetch 192(%0)\n" - " prefetch 256(%0)\n" - "2: \n" - ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) : : "r" (from)); - - for (i = 0; i < (4096-320)/64; i++) { - __asm__ __volatile__ ( - "1: prefetch 320(%0)\n" - "2: movq (%0), %%mm0\n" - " movntq %%mm0, (%1)\n" - " movq 8(%0), %%mm1\n" - " movntq %%mm1, 8(%1)\n" - " movq 16(%0), %%mm2\n" - " movntq %%mm2, 16(%1)\n" - " movq 24(%0), %%mm3\n" - " movntq %%mm3, 24(%1)\n" - " movq 32(%0), %%mm4\n" - " movntq %%mm4, 32(%1)\n" - " movq 40(%0), %%mm5\n" - " movntq %%mm5, 40(%1)\n" - " movq 48(%0), %%mm6\n" - " movntq %%mm6, 48(%1)\n" - " movq 56(%0), %%mm7\n" - " movntq %%mm7, 56(%1)\n" - ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) : : "r" (from), "r" (to) : "memory"); - - from += 64; - to += 64; - } - - for (i = (4096-320)/64; i < 4096/64; i++) { - __asm__ __volatile__ ( - "2: movq (%0), %%mm0\n" - " movntq %%mm0, (%1)\n" - " movq 8(%0), %%mm1\n" - " movntq %%mm1, 8(%1)\n" - " movq 16(%0), %%mm2\n" - " movntq %%mm2, 16(%1)\n" - " movq 24(%0), %%mm3\n" - " movntq %%mm3, 24(%1)\n" - " movq 32(%0), %%mm4\n" - " movntq %%mm4, 32(%1)\n" - " movq 40(%0), %%mm5\n" - " movntq %%mm5, 40(%1)\n" - " movq 48(%0), %%mm6\n" - " movntq %%mm6, 48(%1)\n" - " movq 56(%0), %%mm7\n" - " movntq %%mm7, 56(%1)\n" - : : "r" (from), "r" (to) : "memory"); - from += 64; - to += 64; - } - /* - * Since movntq is weakly-ordered, a "sfence" is needed to become - * ordered again: - */ - __asm__ __volatile__("sfence \n"::); - kernel_fpu_end(); -} - -#else /* CONFIG_MK7 */ - -/* - * Generic MMX implementation without K7 specific streaming - */ -static void fast_clear_page(void *page) -{ - int i; - - kernel_fpu_begin_mask(KFPU_387); - - __asm__ __volatile__ ( - " pxor %%mm0, %%mm0\n" : : - ); - - for (i = 0; i < 4096/128; i++) { - __asm__ __volatile__ ( - " movq %%mm0, (%0)\n" - " movq %%mm0, 8(%0)\n" - " movq %%mm0, 16(%0)\n" - " movq %%mm0, 24(%0)\n" - " movq %%mm0, 32(%0)\n" - " movq %%mm0, 40(%0)\n" - " movq %%mm0, 48(%0)\n" - " movq %%mm0, 56(%0)\n" - " movq %%mm0, 64(%0)\n" - " movq %%mm0, 72(%0)\n" - " movq %%mm0, 80(%0)\n" - " movq %%mm0, 88(%0)\n" - " movq %%mm0, 96(%0)\n" - " movq %%mm0, 104(%0)\n" - " movq %%mm0, 112(%0)\n" - " movq %%mm0, 120(%0)\n" - : : "r" (page) : "memory"); - page += 128; - } - - kernel_fpu_end(); -} - -static void fast_copy_page(void *to, void *from) -{ - int i; - - kernel_fpu_begin_mask(KFPU_387); - - __asm__ __volatile__ ( - "1: prefetch (%0)\n" - " prefetch 64(%0)\n" - " prefetch 128(%0)\n" - " prefetch 192(%0)\n" - " prefetch 256(%0)\n" - "2: \n" - ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) : : "r" (from)); - - for (i = 0; i < 4096/64; i++) { - __asm__ __volatile__ ( - "1: prefetch 320(%0)\n" - "2: movq (%0), %%mm0\n" - " movq 8(%0), %%mm1\n" - " movq 16(%0), %%mm2\n" - " movq 24(%0), %%mm3\n" - " movq %%mm0, (%1)\n" - " movq %%mm1, 8(%1)\n" - " movq %%mm2, 16(%1)\n" - " movq %%mm3, 24(%1)\n" - " movq 32(%0), %%mm0\n" - " movq 40(%0), %%mm1\n" - " movq 48(%0), %%mm2\n" - " movq 56(%0), %%mm3\n" - " movq %%mm0, 32(%1)\n" - " movq %%mm1, 40(%1)\n" - " movq %%mm2, 48(%1)\n" - " movq %%mm3, 56(%1)\n" - ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : : "r" (from), "r" (to) : "memory"); - - from += 64; - to += 64; - } - kernel_fpu_end(); -} - -#endif /* !CONFIG_MK7 */ - -/* - * Favour MMX for page clear and copy: - */ -static void slow_zero_page(void *page) -{ - int d0, d1; - - __asm__ __volatile__( - "cld\n\t" - "rep ; stosl" - - : "=&c" (d0), "=&D" (d1) - :"a" (0), "1" (page), "0" (1024) - :"memory"); -} - -void mmx_clear_page(void *page) -{ - if (unlikely(in_interrupt())) - slow_zero_page(page); - else - fast_clear_page(page); -} -EXPORT_SYMBOL(mmx_clear_page); - -static void slow_copy_page(void *to, void *from) -{ - int d0, d1, d2; - - __asm__ __volatile__( - "cld\n\t" - "rep ; movsl" - : "=&c" (d0), "=&D" (d1), "=&S" (d2) - : "0" (1024), "1" ((long) to), "2" ((long) from) - : "memory"); -} - -void mmx_copy_page(void *to, void *from) -{ - if (unlikely(in_interrupt())) - slow_copy_page(to, from); - else - fast_copy_page(to, from); -} -EXPORT_SYMBOL(mmx_copy_page); diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 7d290777246d..962006bdb8a8 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -8,7 +8,6 @@ */ #include #include -#include #include #ifdef CONFIG_X86_INTEL_USERCOPY diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h index b2d504f11937..aff774775c67 100644 --- a/tools/arch/x86/include/asm/required-features.h +++ b/tools/arch/x86/include/asm/required-features.h @@ -35,11 +35,7 @@ # define NEED_CMOV 0 #endif -#ifdef CONFIG_X86_USE_3DNOW -# define NEED_3DNOW (1<<(X86_FEATURE_3DNOW & 31)) -#else # define NEED_3DNOW 0 -#endif #if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64) # define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31)) -- cgit v1.2.3 From acba44d2436d463f60a54bf934d378dcf384a965 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:05 +0100 Subject: x86/copy_user_64: Remove .fixup usage Place the anonymous .fixup code at the tail of the regular functions. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Reviewed-by: Borislav Petkov Link: https://lore.kernel.org/r/20211110101325.068505810@infradead.org --- arch/x86/lib/copy_user_64.S | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 8fb562f1dfaf..e6ac38587b40 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -32,14 +32,10 @@ decl %ecx jnz 100b 102: - .section .fixup,"ax" -103: addl %ecx,%edx /* ecx is zerorest also */ - jmp .Lcopy_user_handle_tail - .previous - _ASM_EXTABLE_CPY(100b, 103b) - _ASM_EXTABLE_CPY(101b, 103b) - .endm + _ASM_EXTABLE_CPY(100b, .Lcopy_user_handle_align) + _ASM_EXTABLE_CPY(101b, .Lcopy_user_handle_align) +.endm /* * copy_user_generic_unrolled - memory copy with exception handling. @@ -107,7 +103,6 @@ SYM_FUNC_START(copy_user_generic_unrolled) ASM_CLAC RET - .section .fixup,"ax" 30: shll $6,%ecx addl %ecx,%edx jmp 60f @@ -115,7 +110,6 @@ SYM_FUNC_START(copy_user_generic_unrolled) jmp 60f 50: movl %ecx,%edx 60: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */ - .previous _ASM_EXTABLE_CPY(1b, 30b) _ASM_EXTABLE_CPY(2b, 30b) @@ -166,20 +160,16 @@ SYM_FUNC_START(copy_user_generic_string) movl %edx,%ecx shrl $3,%ecx andl $7,%edx -1: rep - movsq +1: rep movsq 2: movl %edx,%ecx -3: rep - movsb +3: rep movsb xorl %eax,%eax ASM_CLAC RET - .section .fixup,"ax" 11: leal (%rdx,%rcx,8),%ecx 12: movl %ecx,%edx /* ecx is zerorest also */ jmp .Lcopy_user_handle_tail - .previous _ASM_EXTABLE_CPY(1b, 11b) _ASM_EXTABLE_CPY(3b, 12b) @@ -203,16 +193,13 @@ SYM_FUNC_START(copy_user_enhanced_fast_string) cmpl $64,%edx jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */ movl %edx,%ecx -1: rep - movsb +1: rep movsb xorl %eax,%eax ASM_CLAC RET - .section .fixup,"ax" 12: movl %ecx,%edx /* ecx is zerorest also */ jmp .Lcopy_user_handle_tail - .previous _ASM_EXTABLE_CPY(1b, 12b) SYM_FUNC_END(copy_user_enhanced_fast_string) @@ -240,6 +227,11 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) RET _ASM_EXTABLE_CPY(1b, 2b) + +.Lcopy_user_handle_align: + addl %ecx,%edx /* ecx is zerorest also */ + jmp .Lcopy_user_handle_tail + SYM_CODE_END(.Lcopy_user_handle_tail) /* @@ -350,7 +342,6 @@ SYM_FUNC_START(__copy_user_nocache) sfence RET - .section .fixup,"ax" .L_fixup_4x8b_copy: shll $6,%ecx addl %ecx,%edx @@ -366,7 +357,6 @@ SYM_FUNC_START(__copy_user_nocache) .L_fixup_handle_tail: sfence jmp .Lcopy_user_handle_tail - .previous _ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy) _ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy) -- cgit v1.2.3 From ab0fedcc714aafaac6ac996b51791aee0d1cd8fd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:06 +0100 Subject: x86/copy_mc_64: Remove .fixup usage Place the anonymous .fixup code at the tail of the regular functions. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Reviewed-by: Borislav Petkov Link: https://lore.kernel.org/r/20211110101325.127055887@infradead.org --- arch/x86/lib/copy_mc_64.S | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S index 23009792e19c..c859a8a09860 100644 --- a/arch/x86/lib/copy_mc_64.S +++ b/arch/x86/lib/copy_mc_64.S @@ -78,9 +78,7 @@ SYM_FUNC_START(copy_mc_fragile) xorl %eax, %eax .L_done: RET -SYM_FUNC_END(copy_mc_fragile) - .section .fixup, "ax" /* * Return number of bytes not copied for any failure. Note that * there is no "tail" handling since the source buffer is 8-byte @@ -105,14 +103,14 @@ SYM_FUNC_END(copy_mc_fragile) movl %ecx, %edx jmp copy_mc_fragile_handle_tail - .previous - _ASM_EXTABLE_TYPE(.L_read_leading_bytes, .E_leading_bytes, EX_TYPE_DEFAULT_MCE_SAFE) _ASM_EXTABLE_TYPE(.L_read_words, .E_read_words, EX_TYPE_DEFAULT_MCE_SAFE) _ASM_EXTABLE_TYPE(.L_read_trailing_bytes, .E_trailing_bytes, EX_TYPE_DEFAULT_MCE_SAFE) _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) _ASM_EXTABLE(.L_write_words, .E_write_words) _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) + +SYM_FUNC_END(copy_mc_fragile) #endif /* CONFIG_X86_MCE */ /* @@ -133,9 +131,7 @@ SYM_FUNC_START(copy_mc_enhanced_fast_string) /* Copy successful. Return zero */ xorl %eax, %eax RET -SYM_FUNC_END(copy_mc_enhanced_fast_string) - .section .fixup, "ax" .E_copy: /* * On fault %rcx is updated such that the copy instruction could @@ -147,7 +143,7 @@ SYM_FUNC_END(copy_mc_enhanced_fast_string) movq %rcx, %rax RET - .previous - _ASM_EXTABLE_TYPE(.L_copy, .E_copy, EX_TYPE_DEFAULT_MCE_SAFE) + +SYM_FUNC_END(copy_mc_enhanced_fast_string) #endif /* !CONFIG_UML */ -- cgit v1.2.3 From 16e617d05ef0c521d000c989796412ce713f28c9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:07 +0100 Subject: x86/entry_64: Remove .fixup usage Place the anonymous .fixup code at the tail of the regular functions. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Reviewed-by: Borislav Petkov Reviewed-by: Lai Jiangshan Link: https://lore.kernel.org/r/20211110101325.186049322@infradead.org --- arch/x86/entry/entry_64.S | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index e23319ad3f42..1ffdbfaad2e2 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -739,13 +739,9 @@ SYM_FUNC_START(asm_load_gs_index) swapgs FRAME_END RET -SYM_FUNC_END(asm_load_gs_index) -EXPORT_SYMBOL(asm_load_gs_index) - _ASM_EXTABLE(.Lgs_change, .Lbad_gs) - .section .fixup, "ax" /* running with kernelgs */ -SYM_CODE_START_LOCAL_NOALIGN(.Lbad_gs) +.Lbad_gs: swapgs /* switch back to user gs */ .macro ZAP_GS /* This can't be a string because the preprocessor needs to see it. */ @@ -756,8 +752,11 @@ SYM_CODE_START_LOCAL_NOALIGN(.Lbad_gs) xorl %eax, %eax movl %eax, %gs jmp 2b -SYM_CODE_END(.Lbad_gs) - .previous + + _ASM_EXTABLE(.Lgs_change, .Lbad_gs) + +SYM_FUNC_END(asm_load_gs_index) +EXPORT_SYMBOL(asm_load_gs_index) #ifdef CONFIG_XEN_PV /* -- cgit v1.2.3 From aa93e2ad7464ffb90155a5ffdde963816f86d5dc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:08 +0100 Subject: x86/entry_32: Remove .fixup usage Where possible, push the .fixup into code, at the tail of functions. This is hard for macros since they're used in multiple functions, therefore introduce a new extable handler to pop zeros. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211110101325.245184699@infradead.org --- arch/x86/entry/entry_32.S | 28 ++++++++-------------------- arch/x86/include/asm/extable_fixup_types.h | 2 ++ arch/x86/mm/extable.c | 14 ++++++++++++++ 3 files changed, 24 insertions(+), 20 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 00413e37feee..e0a95d8a6553 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -270,17 +270,9 @@ 3: popl %fs addl $(4 + \pop), %esp /* pop the unused "gs" slot */ IRET_FRAME -.pushsection .fixup, "ax" -4: movl $0, (%esp) - jmp 1b -5: movl $0, (%esp) - jmp 2b -6: movl $0, (%esp) - jmp 3b -.popsection - _ASM_EXTABLE(1b, 4b) - _ASM_EXTABLE(2b, 5b) - _ASM_EXTABLE(3b, 6b) + _ASM_EXTABLE_TYPE(1b, 1b, EX_TYPE_POP_ZERO) + _ASM_EXTABLE_TYPE(2b, 2b, EX_TYPE_POP_ZERO) + _ASM_EXTABLE_TYPE(3b, 3b, EX_TYPE_POP_ZERO) .endm .macro RESTORE_ALL_NMI cr3_reg:req pop=0 @@ -925,10 +917,8 @@ SYM_FUNC_START(entry_SYSENTER_32) sti sysexit -.pushsection .fixup, "ax" -2: movl $0, PT_FS(%esp) - jmp 1b -.popsection +2: movl $0, PT_FS(%esp) + jmp 1b _ASM_EXTABLE(1b, 2b) .Lsysenter_fix_flags: @@ -996,8 +986,7 @@ restore_all_switch_stack: */ iret -.section .fixup, "ax" -SYM_CODE_START(asm_iret_error) +.Lasm_iret_error: pushl $0 # no error code pushl $iret_error @@ -1014,9 +1003,8 @@ SYM_CODE_START(asm_iret_error) #endif jmp handle_exception -SYM_CODE_END(asm_iret_error) -.previous - _ASM_EXTABLE(.Lirq_return, asm_iret_error) + + _ASM_EXTABLE(.Lirq_return, .Lasm_iret_error) SYM_FUNC_END(entry_INT80_32) .macro FIXUP_ESPFIX_STACK diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h index 409524d5d2eb..4d709a2768bb 100644 --- a/arch/x86/include/asm/extable_fixup_types.h +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -19,4 +19,6 @@ #define EX_TYPE_DEFAULT_MCE_SAFE 12 #define EX_TYPE_FAULT_MCE_SAFE 13 +#define EX_TYPE_POP_ZERO 14 + #endif diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 5cd2a88930a9..fb0c4752df1a 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -99,6 +99,18 @@ static bool ex_handler_clear_fs(const struct exception_table_entry *fixup, return ex_handler_default(fixup, regs); } +static bool ex_handler_pop_zero(const struct exception_table_entry *fixup, + struct pt_regs *regs) +{ + /* + * Typically used for when "pop %seg" traps, in which case we'll clear + * the stack slot and re-try the instruction, which will then succeed + * to pop zero. + */ + *((unsigned long *)regs->sp) = 0; + return ex_handler_default(fixup, regs); +} + int ex_get_fixup_type(unsigned long ip) { const struct exception_table_entry *e = search_exception_tables(ip); @@ -156,6 +168,8 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, case EX_TYPE_WRMSR_IN_MCE: ex_handler_msr_mce(regs, true); break; + case EX_TYPE_POP_ZERO: + return ex_handler_pop_zero(e, regs); } BUG(); } -- cgit v1.2.3 From 4b5305decc8436bfe363d1c1773e8fa1c828b14d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:09 +0100 Subject: x86/extable: Extend extable functionality In order to remove further .fixup usage, extend the extable infrastructure to take additional information from the extable entry sites. Specifically add _ASM_EXTABLE_TYPE_REG() and EX_TYPE_IMM_REG that extend the existing _ASM_EXTABLE_TYPE() by taking an additional register argument and encoding that and an s16 immediate into the existing s32 type field. This limits the actual types to the first byte, 255 seem plenty. Also add a few flags into the type word, specifically CLEAR_AX and CLEAR_DX which clear the return and extended return register. Notes: - due to the % in our register names it's hard to make it more generally usable as arm64 did. - the s16 is far larger than used in these patches, future extentions can easily shrink this to get more bits. - without the bitfield fix this will not compile, because: 0xFF > -1 and we can't even extract the TYPE field. [nathanchance: Build fix for clang-lto builds: https://lkml.kernel.org/r/20211210234953.3420108-1-nathan@kernel.org ] Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Link: https://lore.kernel.org/r/20211110101325.303890153@infradead.org --- arch/x86/include/asm/asm.h | 37 +++++++++++++++++ arch/x86/include/asm/extable.h | 6 +-- arch/x86/include/asm/extable_fixup_types.h | 24 +++++++++++ arch/x86/include/asm/insn-eval.h | 2 + arch/x86/lib/insn-eval.c | 66 +++++++++++++++++++----------- arch/x86/mm/extable.c | 40 ++++++++++++++++-- arch/x86/net/bpf_jit_comp.c | 2 +- 7 files changed, 145 insertions(+), 32 deletions(-) diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 3a168483bc8e..c878fed3056f 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -152,6 +152,33 @@ #else /* ! __ASSEMBLY__ */ +# define DEFINE_EXTABLE_TYPE_REG \ + ".macro extable_type_reg type:req reg:req\n" \ + ".set found, 0\n" \ + ".set regnr, 0\n" \ + ".irp rs,rax,rcx,rdx,rbx,rsp,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,r14,r15\n" \ + ".ifc \\reg, %%\\rs\n" \ + ".set found, found+1\n" \ + ".long \\type + (regnr << 8)\n" \ + ".endif\n" \ + ".set regnr, regnr+1\n" \ + ".endr\n" \ + ".set regnr, 0\n" \ + ".irp rs,eax,ecx,edx,ebx,esp,ebp,esi,edi,r8d,r9d,r10d,r11d,r12d,r13d,r14d,r15d\n" \ + ".ifc \\reg, %%\\rs\n" \ + ".set found, found+1\n" \ + ".long \\type + (regnr << 8)\n" \ + ".endif\n" \ + ".set regnr, regnr+1\n" \ + ".endr\n" \ + ".if (found != 1)\n" \ + ".error \"extable_type_reg: bad register argument\"\n" \ + ".endif\n" \ + ".endm\n" + +# define UNDEFINE_EXTABLE_TYPE_REG \ + ".purgem extable_type_reg\n" + # define _ASM_EXTABLE_TYPE(from, to, type) \ " .pushsection \"__ex_table\",\"a\"\n" \ " .balign 4\n" \ @@ -160,6 +187,16 @@ " .long " __stringify(type) " \n" \ " .popsection\n" +# define _ASM_EXTABLE_TYPE_REG(from, to, type, reg) \ + " .pushsection \"__ex_table\",\"a\"\n" \ + " .balign 4\n" \ + " .long (" #from ") - .\n" \ + " .long (" #to ") - .\n" \ + DEFINE_EXTABLE_TYPE_REG \ + "extable_type_reg reg=" __stringify(reg) ", type=" __stringify(type) " \n"\ + UNDEFINE_EXTABLE_TYPE_REG \ + " .popsection\n" + /* For C file, we already have NOKPROBE_SYMBOL macro */ /* diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h index 93f400eb728f..155c991ba95e 100644 --- a/arch/x86/include/asm/extable.h +++ b/arch/x86/include/asm/extable.h @@ -21,7 +21,7 @@ */ struct exception_table_entry { - int insn, fixup, type; + int insn, fixup, data; }; struct pt_regs; @@ -31,8 +31,8 @@ struct pt_regs; do { \ (a)->fixup = (b)->fixup + (delta); \ (b)->fixup = (tmp).fixup - (delta); \ - (a)->type = (b)->type; \ - (b)->type = (tmp).type; \ + (a)->data = (b)->data; \ + (b)->data = (tmp).data; \ } while (0) extern int fixup_exception(struct pt_regs *regs, int trapnr, diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h index 4d709a2768bb..944f8329022a 100644 --- a/arch/x86/include/asm/extable_fixup_types.h +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -2,6 +2,29 @@ #ifndef _ASM_X86_EXTABLE_FIXUP_TYPES_H #define _ASM_X86_EXTABLE_FIXUP_TYPES_H +/* + * Our IMM is signed, as such it must live at the top end of the word. Also, + * since C99 hex constants are of ambigious type, force cast the mask to 'int' + * so that FIELD_GET() will DTRT and sign extend the value when it extracts it. + */ +#define EX_DATA_TYPE_MASK ((int)0x000000FF) +#define EX_DATA_REG_MASK ((int)0x00000F00) +#define EX_DATA_FLAG_MASK ((int)0x0000F000) +#define EX_DATA_IMM_MASK ((int)0xFFFF0000) + +#define EX_DATA_REG_SHIFT 8 +#define EX_DATA_FLAG_SHIFT 12 +#define EX_DATA_IMM_SHIFT 16 + +#define EX_DATA_FLAG(flag) ((flag) << EX_DATA_FLAG_SHIFT) +#define EX_DATA_IMM(imm) ((imm) << EX_DATA_IMM_SHIFT) + +/* flags */ +#define EX_FLAG_CLEAR_AX EX_DATA_FLAG(1) +#define EX_FLAG_CLEAR_DX EX_DATA_FLAG(2) +#define EX_FLAG_CLEAR_AX_DX EX_DATA_FLAG(3) + +/* types */ #define EX_TYPE_NONE 0 #define EX_TYPE_DEFAULT 1 #define EX_TYPE_FAULT 2 @@ -20,5 +43,6 @@ #define EX_TYPE_FAULT_MCE_SAFE 13 #define EX_TYPE_POP_ZERO 14 +#define EX_TYPE_IMM_REG 15 /* reg := (long)imm */ #endif diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h index 4ec3613551e3..3df123f437c9 100644 --- a/arch/x86/include/asm/insn-eval.h +++ b/arch/x86/include/asm/insn-eval.h @@ -15,6 +15,8 @@ #define INSN_CODE_SEG_OPND_SZ(params) (params & 0xf) #define INSN_CODE_SEG_PARAMS(oper_sz, addr_sz) (oper_sz | (addr_sz << 4)) +int pt_regs_offset(struct pt_regs *regs, int regno); + bool insn_has_rep_prefix(struct insn *insn); void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs); int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs); diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index eb3ccffb9b9d..7760d228041b 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -412,32 +412,39 @@ static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx) #endif /* CONFIG_X86_64 */ } -static int get_reg_offset(struct insn *insn, struct pt_regs *regs, - enum reg_type type) +static const int pt_regoff[] = { + offsetof(struct pt_regs, ax), + offsetof(struct pt_regs, cx), + offsetof(struct pt_regs, dx), + offsetof(struct pt_regs, bx), + offsetof(struct pt_regs, sp), + offsetof(struct pt_regs, bp), + offsetof(struct pt_regs, si), + offsetof(struct pt_regs, di), +#ifdef CONFIG_X86_64 + offsetof(struct pt_regs, r8), + offsetof(struct pt_regs, r9), + offsetof(struct pt_regs, r10), + offsetof(struct pt_regs, r11), + offsetof(struct pt_regs, r12), + offsetof(struct pt_regs, r13), + offsetof(struct pt_regs, r14), + offsetof(struct pt_regs, r15), +#endif +}; + +int pt_regs_offset(struct pt_regs *regs, int regno) { + if ((unsigned)regno < ARRAY_SIZE(pt_regoff)) + return pt_regoff[regno]; + return -EDOM; +} + +static int get_regno(struct insn *insn, enum reg_type type) +{ + int nr_registers = ARRAY_SIZE(pt_regoff); int regno = 0; - static const int regoff[] = { - offsetof(struct pt_regs, ax), - offsetof(struct pt_regs, cx), - offsetof(struct pt_regs, dx), - offsetof(struct pt_regs, bx), - offsetof(struct pt_regs, sp), - offsetof(struct pt_regs, bp), - offsetof(struct pt_regs, si), - offsetof(struct pt_regs, di), -#ifdef CONFIG_X86_64 - offsetof(struct pt_regs, r8), - offsetof(struct pt_regs, r9), - offsetof(struct pt_regs, r10), - offsetof(struct pt_regs, r11), - offsetof(struct pt_regs, r12), - offsetof(struct pt_regs, r13), - offsetof(struct pt_regs, r14), - offsetof(struct pt_regs, r15), -#endif - }; - int nr_registers = ARRAY_SIZE(regoff); /* * Don't possibly decode a 32-bit instructions as * reading a 64-bit-only register. @@ -505,7 +512,18 @@ static int get_reg_offset(struct insn *insn, struct pt_regs *regs, WARN_ONCE(1, "decoded an instruction with an invalid register"); return -EINVAL; } - return regoff[regno]; + return regno; +} + +static int get_reg_offset(struct insn *insn, struct pt_regs *regs, + enum reg_type type) +{ + int regno = get_regno(insn, type); + + if (regno < 0) + return regno; + + return pt_regs_offset(regs, regno); } /** diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index fb0c4752df1a..cef8901b3e6f 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -2,12 +2,25 @@ #include #include #include +#include #include #include #include #include #include +#include + +static inline unsigned long *pt_regs_nr(struct pt_regs *regs, int nr) +{ + int reg_offset = pt_regs_offset(regs, nr); + static unsigned long __dummy; + + if (WARN_ON_ONCE(reg_offset < 0)) + return &__dummy; + + return (unsigned long *)((unsigned long)regs + reg_offset); +} static inline unsigned long ex_fixup_addr(const struct exception_table_entry *x) @@ -15,10 +28,15 @@ ex_fixup_addr(const struct exception_table_entry *x) return (unsigned long)&x->fixup + x->fixup; } -static bool ex_handler_default(const struct exception_table_entry *fixup, +static bool ex_handler_default(const struct exception_table_entry *e, struct pt_regs *regs) { - regs->ip = ex_fixup_addr(fixup); + if (e->data & EX_FLAG_CLEAR_AX) + regs->ax = 0; + if (e->data & EX_FLAG_CLEAR_DX) + regs->dx = 0; + + regs->ip = ex_fixup_addr(e); return true; } @@ -111,17 +129,25 @@ static bool ex_handler_pop_zero(const struct exception_table_entry *fixup, return ex_handler_default(fixup, regs); } +static bool ex_handler_imm_reg(const struct exception_table_entry *fixup, + struct pt_regs *regs, int reg, int imm) +{ + *pt_regs_nr(regs, reg) = (long)imm; + return ex_handler_default(fixup, regs); +} + int ex_get_fixup_type(unsigned long ip) { const struct exception_table_entry *e = search_exception_tables(ip); - return e ? e->type : EX_TYPE_NONE; + return e ? FIELD_GET(EX_DATA_TYPE_MASK, e->data) : EX_TYPE_NONE; } int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, unsigned long fault_addr) { const struct exception_table_entry *e; + int type, reg, imm; #ifdef CONFIG_PNPBIOS if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) { @@ -141,7 +167,11 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, if (!e) return 0; - switch (e->type) { + type = FIELD_GET(EX_DATA_TYPE_MASK, e->data); + reg = FIELD_GET(EX_DATA_REG_MASK, e->data); + imm = FIELD_GET(EX_DATA_IMM_MASK, e->data); + + switch (type) { case EX_TYPE_DEFAULT: case EX_TYPE_DEFAULT_MCE_SAFE: return ex_handler_default(e, regs); @@ -170,6 +200,8 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, break; case EX_TYPE_POP_ZERO: return ex_handler_pop_zero(e, regs); + case EX_TYPE_IMM_REG: + return ex_handler_imm_reg(e, regs, reg, imm); } BUG(); } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 726700fabca6..de10dc4d6e1d 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1291,7 +1291,7 @@ st: if (is_imm8(insn->off)) } ex->insn = delta; - ex->type = EX_TYPE_BPF; + ex->data = EX_TYPE_BPF; if (dst_reg > BPF_REG_9) { pr_err("verifier error\n"); -- cgit v1.2.3 From d52a7344bdfa9c3442d3f86fb3501d9343726c76 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:10 +0100 Subject: x86/msr: Remove .fixup usage Rework the MSR accessors to remove .fixup usage. Add two new extable types (to the 4 already existing msr ones) using the new register infrastructure to record which register should get the error value. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211110101325.364084212@infradead.org --- arch/x86/include/asm/extable_fixup_types.h | 23 +++++++------- arch/x86/include/asm/msr.h | 26 +++++---------- arch/x86/mm/extable.c | 51 ++++++++++++++++-------------- 3 files changed, 47 insertions(+), 53 deletions(-) diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h index 944f8329022a..9d597fe1017d 100644 --- a/arch/x86/include/asm/extable_fixup_types.h +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -32,17 +32,16 @@ #define EX_TYPE_COPY 4 #define EX_TYPE_CLEAR_FS 5 #define EX_TYPE_FPU_RESTORE 6 -#define EX_TYPE_WRMSR 7 -#define EX_TYPE_RDMSR 8 -#define EX_TYPE_BPF 9 - -#define EX_TYPE_WRMSR_IN_MCE 10 -#define EX_TYPE_RDMSR_IN_MCE 11 - -#define EX_TYPE_DEFAULT_MCE_SAFE 12 -#define EX_TYPE_FAULT_MCE_SAFE 13 - -#define EX_TYPE_POP_ZERO 14 -#define EX_TYPE_IMM_REG 15 /* reg := (long)imm */ +#define EX_TYPE_BPF 7 +#define EX_TYPE_WRMSR 8 +#define EX_TYPE_RDMSR 9 +#define EX_TYPE_WRMSR_SAFE 10 /* reg := -EIO */ +#define EX_TYPE_RDMSR_SAFE 11 /* reg := -EIO */ +#define EX_TYPE_WRMSR_IN_MCE 12 +#define EX_TYPE_RDMSR_IN_MCE 13 +#define EX_TYPE_DEFAULT_MCE_SAFE 14 +#define EX_TYPE_FAULT_MCE_SAFE 15 +#define EX_TYPE_POP_ZERO 16 +#define EX_TYPE_IMM_REG 17 /* reg := (long)imm */ #endif diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 6b52182e178a..d42e6c6b47b1 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -137,17 +137,11 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr, { DECLARE_ARGS(val, low, high); - asm volatile("2: rdmsr ; xor %[err],%[err]\n" - "1:\n\t" - ".section .fixup,\"ax\"\n\t" - "3: mov %[fault],%[err]\n\t" - "xorl %%eax, %%eax\n\t" - "xorl %%edx, %%edx\n\t" - "jmp 1b\n\t" - ".previous\n\t" - _ASM_EXTABLE(2b, 3b) + asm volatile("1: rdmsr ; xor %[err],%[err]\n" + "2:\n\t" + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_RDMSR_SAFE, %[err]) : [err] "=r" (*err), EAX_EDX_RET(val, low, high) - : "c" (msr), [fault] "i" (-EIO)); + : "c" (msr)); if (tracepoint_enabled(read_msr)) do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err); return EAX_EDX_VAL(val, low, high); @@ -169,15 +163,11 @@ native_write_msr_safe(unsigned int msr, u32 low, u32 high) { int err; - asm volatile("2: wrmsr ; xor %[err],%[err]\n" - "1:\n\t" - ".section .fixup,\"ax\"\n\t" - "3: mov %[fault],%[err] ; jmp 1b\n\t" - ".previous\n\t" - _ASM_EXTABLE(2b, 3b) + asm volatile("1: wrmsr ; xor %[err],%[err]\n" + "2:\n\t" + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_WRMSR_SAFE, %[err]) : [err] "=a" (err) - : "c" (msr), "0" (low), "d" (high), - [fault] "i" (-EIO) + : "c" (msr), "0" (low), "d" (high) : "memory"); if (tracepoint_enabled(write_msr)) do_trace_write_msr(msr, ((u64)high << 32 | low), err); diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index cef8901b3e6f..717cd35fe41c 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -83,28 +83,29 @@ static bool ex_handler_copy(const struct exception_table_entry *fixup, return ex_handler_fault(fixup, regs, trapnr); } -static bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup, - struct pt_regs *regs) +static bool ex_handler_msr(const struct exception_table_entry *fixup, + struct pt_regs *regs, bool wrmsr, bool safe, int reg) { - if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", + if (!safe && wrmsr && + pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", + (unsigned int)regs->cx, (unsigned int)regs->dx, + (unsigned int)regs->ax, regs->ip, (void *)regs->ip)) + show_stack_regs(regs); + + if (!safe && !wrmsr && + pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", (unsigned int)regs->cx, regs->ip, (void *)regs->ip)) show_stack_regs(regs); - /* Pretend that the read succeeded and returned 0. */ - regs->ax = 0; - regs->dx = 0; - return ex_handler_default(fixup, regs); -} + if (!wrmsr) { + /* Pretend that the read succeeded and returned 0. */ + regs->ax = 0; + regs->dx = 0; + } -static bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup, - struct pt_regs *regs) -{ - if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", - (unsigned int)regs->cx, (unsigned int)regs->dx, - (unsigned int)regs->ax, regs->ip, (void *)regs->ip)) - show_stack_regs(regs); + if (safe) + *pt_regs_nr(regs, reg) = -EIO; - /* Pretend that the write succeeded. */ return ex_handler_default(fixup, regs); } @@ -186,18 +187,22 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, return ex_handler_clear_fs(e, regs); case EX_TYPE_FPU_RESTORE: return ex_handler_fprestore(e, regs); - case EX_TYPE_RDMSR: - return ex_handler_rdmsr_unsafe(e, regs); - case EX_TYPE_WRMSR: - return ex_handler_wrmsr_unsafe(e, regs); case EX_TYPE_BPF: return ex_handler_bpf(e, regs); - case EX_TYPE_RDMSR_IN_MCE: - ex_handler_msr_mce(regs, false); - break; + case EX_TYPE_WRMSR: + return ex_handler_msr(e, regs, true, false, reg); + case EX_TYPE_RDMSR: + return ex_handler_msr(e, regs, false, false, reg); + case EX_TYPE_WRMSR_SAFE: + return ex_handler_msr(e, regs, true, true, reg); + case EX_TYPE_RDMSR_SAFE: + return ex_handler_msr(e, regs, false, true, reg); case EX_TYPE_WRMSR_IN_MCE: ex_handler_msr_mce(regs, true); break; + case EX_TYPE_RDMSR_IN_MCE: + ex_handler_msr_mce(regs, false); + break; case EX_TYPE_POP_ZERO: return ex_handler_pop_zero(e, regs); case EX_TYPE_IMM_REG: -- cgit v1.2.3 From 4c132d1d844a53fc4e4b5c34e36ef10d6124b783 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:11 +0100 Subject: x86/futex: Remove .fixup usage Use the new EX_TYPE_IMM_REG to store -EFAULT into the designated 'ret' register, this removes the need for anonymous .fixup code. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211110101325.426016322@infradead.org --- arch/x86/include/asm/extable_fixup_types.h | 2 ++ arch/x86/include/asm/futex.h | 28 ++++++++-------------------- 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h index 9d597fe1017d..7469038de100 100644 --- a/arch/x86/include/asm/extable_fixup_types.h +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -42,6 +42,8 @@ #define EX_TYPE_DEFAULT_MCE_SAFE 14 #define EX_TYPE_FAULT_MCE_SAFE 15 #define EX_TYPE_POP_ZERO 16 + #define EX_TYPE_IMM_REG 17 /* reg := (long)imm */ +#define EX_TYPE_EFAULT_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(-EFAULT)) #endif diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index f9c00110a69a..99d345b686fa 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h @@ -17,13 +17,9 @@ do { \ int oldval = 0, ret; \ asm volatile("1:\t" insn "\n" \ "2:\n" \ - "\t.section .fixup,\"ax\"\n" \ - "3:\tmov\t%3, %1\n" \ - "\tjmp\t2b\n" \ - "\t.previous\n" \ - _ASM_EXTABLE_UA(1b, 3b) \ + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %1) \ : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \ - : "i" (-EFAULT), "0" (oparg), "1" (0)); \ + : "0" (oparg), "1" (0)); \ if (ret) \ goto label; \ *oval = oldval; \ @@ -39,15 +35,11 @@ do { \ "3:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ "\tjnz\t2b\n" \ "4:\n" \ - "\t.section .fixup,\"ax\"\n" \ - "5:\tmov\t%5, %1\n" \ - "\tjmp\t4b\n" \ - "\t.previous\n" \ - _ASM_EXTABLE_UA(1b, 5b) \ - _ASM_EXTABLE_UA(3b, 5b) \ + _ASM_EXTABLE_TYPE_REG(1b, 4b, EX_TYPE_EFAULT_REG, %1) \ + _ASM_EXTABLE_TYPE_REG(3b, 4b, EX_TYPE_EFAULT_REG, %1) \ : "=&a" (oldval), "=&r" (ret), \ "+m" (*uaddr), "=&r" (tem) \ - : "r" (oparg), "i" (-EFAULT), "1" (0)); \ + : "r" (oparg), "1" (0)); \ if (ret) \ goto label; \ *oval = oldval; \ @@ -95,15 +87,11 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!user_access_begin(uaddr, sizeof(u32))) return -EFAULT; asm volatile("\n" - "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" + "1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" "2:\n" - "\t.section .fixup, \"ax\"\n" - "3:\tmov %3, %0\n" - "\tjmp 2b\n" - "\t.previous\n" - _ASM_EXTABLE_UA(1b, 3b) + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %0) \ : "+r" (ret), "=a" (oldval), "+m" (*uaddr) - : "i" (-EFAULT), "r" (newval), "1" (oldval) + : "r" (newval), "1" (oldval) : "memory" ); user_access_end(); -- cgit v1.2.3 From 99641e094d6ccf547b3eba833aea9a34fdf5681e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:12 +0100 Subject: x86/uaccess: Remove .fixup usage For the !CC_AS_ASM_GOTO_OUTPUT (aka. the legacy codepath), remove the .fixup usage by employing both EX_TYPE_EFAULT_REG and EX_FLAG_CLEAR. Like was already done for X86_32's version of __get_user_asm_u64() use the "a" register for output, specifically so we can use CLEAR_AX. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211110101325.485154848@infradead.org --- arch/x86/include/asm/uaccess.h | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 33a68407def3..ac6233a15856 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -351,24 +351,22 @@ do { \ "1: movl %[lowbits],%%eax\n" \ "2: movl %[highbits],%%edx\n" \ "3:\n" \ - ".section .fixup,\"ax\"\n" \ - "4: mov %[efault],%[errout]\n" \ - " xorl %%eax,%%eax\n" \ - " xorl %%edx,%%edx\n" \ - " jmp 3b\n" \ - ".previous\n" \ - _ASM_EXTABLE_UA(1b, 4b) \ - _ASM_EXTABLE_UA(2b, 4b) \ + _ASM_EXTABLE_TYPE_REG(1b, 3b, EX_TYPE_EFAULT_REG | \ + EX_FLAG_CLEAR_AX_DX, \ + %[errout]) \ + _ASM_EXTABLE_TYPE_REG(2b, 3b, EX_TYPE_EFAULT_REG | \ + EX_FLAG_CLEAR_AX_DX, \ + %[errout]) \ : [errout] "=r" (retval), \ [output] "=&A"(x) \ : [lowbits] "m" (__m(__ptr)), \ [highbits] "m" __m(((u32 __user *)(__ptr)) + 1), \ - [efault] "i" (-EFAULT), "0" (retval)); \ + "0" (retval)); \ }) #else #define __get_user_asm_u64(x, ptr, retval) \ - __get_user_asm(x, ptr, retval, "q", "=r") + __get_user_asm(x, ptr, retval, "q") #endif #define __get_user_size(x, ptr, size, retval) \ @@ -379,14 +377,14 @@ do { \ __chk_user_ptr(ptr); \ switch (size) { \ case 1: \ - __get_user_asm(x_u8__, ptr, retval, "b", "=q"); \ + __get_user_asm(x_u8__, ptr, retval, "b"); \ (x) = x_u8__; \ break; \ case 2: \ - __get_user_asm(x, ptr, retval, "w", "=r"); \ + __get_user_asm(x, ptr, retval, "w"); \ break; \ case 4: \ - __get_user_asm(x, ptr, retval, "l", "=r"); \ + __get_user_asm(x, ptr, retval, "l"); \ break; \ case 8: \ __get_user_asm_u64(x, ptr, retval); \ @@ -396,20 +394,17 @@ do { \ } \ } while (0) -#define __get_user_asm(x, addr, err, itype, ltype) \ +#define __get_user_asm(x, addr, err, itype) \ asm volatile("\n" \ "1: mov"itype" %[umem],%[output]\n" \ "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: mov %[efault],%[errout]\n" \ - " xorl %k[output],%k[output]\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE_UA(1b, 3b) \ + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG | \ + EX_FLAG_CLEAR_AX, \ + %[errout]) \ : [errout] "=r" (err), \ - [output] ltype(x) \ + [output] "=a" (x) \ : [umem] "m" (__m(addr)), \ - [efault] "i" (-EFAULT), "0" (err)) + "0" (err)) #endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT -- cgit v1.2.3 From e2b48e43284c0916ebf8e4240199b9d9747e337a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:13 +0100 Subject: x86/xen: Remove .fixup usage Employ the fancy new EX_TYPE_IMM_REG to store -EFAULT in the return register and use this to remove some Xen .fixup usage. All callers of these functions only test for 0 return, so the actual return value change from -1 to -EFAULT is immaterial. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Juergen Gross Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211110101325.545019822@infradead.org --- arch/x86/include/asm/xen/page.h | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 1a162e559753..e989bc2269f5 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -96,11 +96,7 @@ static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val) asm volatile("1: mov %[val], %[ptr]\n" "2:\n" - ".section .fixup, \"ax\"\n" - "3: sub $1, %[ret]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %[ret]) : [ret] "+r" (ret), [ptr] "=m" (*addr) : [val] "r" (val)); @@ -110,16 +106,12 @@ static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val) static inline int xen_safe_read_ulong(const unsigned long *addr, unsigned long *val) { - int ret = 0; unsigned long rval = ~0ul; + int ret = 0; asm volatile("1: mov %[ptr], %[rval]\n" "2:\n" - ".section .fixup, \"ax\"\n" - "3: sub $1, %[ret]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %[ret]) : [ret] "+r" (ret), [rval] "+r" (rval) : [ptr] "m" (*addr)); *val = rval; -- cgit v1.2.3 From 1c3b9091d084d92c70a4260553853509637276b9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:14 +0100 Subject: x86/fpu: Remove .fixup usage Employ EX_TYPE_EFAULT_REG to store '-EFAULT' into the %[err] register on exception. All the callers only ever test for 0, so the change from -1 to -EFAULT is immaterial. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211110101325.604494664@infradead.org --- arch/x86/kernel/fpu/legacy.h | 6 +----- arch/x86/kernel/fpu/xstate.h | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/fpu/legacy.h b/arch/x86/kernel/fpu/legacy.h index 17c26b164c63..098f367bb8a7 100644 --- a/arch/x86/kernel/fpu/legacy.h +++ b/arch/x86/kernel/fpu/legacy.h @@ -35,11 +35,7 @@ static inline void ldmxcsr(u32 mxcsr) int err; \ asm volatile("1:" #insn "\n\t" \ "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl $-1,%[err]\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %[err]) \ : [err] "=r" (err), output \ : "0"(0), input); \ err; \ diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 86ea7c0fa2f6..e0c9264b1dd0 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -108,11 +108,7 @@ static inline u64 xfeatures_mask_independent(void) "\n" \ "xor %[err], %[err]\n" \ "3:\n" \ - ".pushsection .fixup,\"ax\"\n" \ - "4: movl $-2, %[err]\n" \ - "jmp 3b\n" \ - ".popsection\n" \ - _ASM_EXTABLE(661b, 4b) \ + _ASM_EXTABLE_TYPE_REG(661b, 3b, EX_TYPE_EFAULT_REG, %[err]) \ : [err] "=r" (err) \ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") -- cgit v1.2.3 From 5fc77b916cb82fe476ae2344e0ec37445227a4f8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:15 +0100 Subject: x86/segment: Remove .fixup usage Create and use EX_TYPE_ZERO_REG to clear the register and retry the segment load on exception. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211110101325.663529463@infradead.org --- arch/x86/include/asm/extable_fixup_types.h | 1 + arch/x86/include/asm/segment.h | 9 +-------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h index 7469038de100..a43b8c128e74 100644 --- a/arch/x86/include/asm/extable_fixup_types.h +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -45,5 +45,6 @@ #define EX_TYPE_IMM_REG 17 /* reg := (long)imm */ #define EX_TYPE_EFAULT_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(-EFAULT)) +#define EX_TYPE_ZERO_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(0)) #endif diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 8dd8e8ec9fa5..b228c9d44ee7 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -307,14 +307,7 @@ do { \ \ asm volatile(" \n" \ "1: movl %k0,%%" #seg " \n" \ - \ - ".section .fixup,\"ax\" \n" \ - "2: xorl %k0,%k0 \n" \ - " jmp 1b \n" \ - ".previous \n" \ - \ - _ASM_EXTABLE(1b, 2b) \ - \ + _ASM_EXTABLE_TYPE_REG(1b, 1b, EX_TYPE_ZERO_REG, %k0)\ : "+r" (__val) : : "memory"); \ } while (0) -- cgit v1.2.3 From c9a34c3f4ece192f6d804039fe6aac9618f0d236 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:16 +0100 Subject: x86/kvm: Remove .fixup usage KVM instruction emulation has a gnarly hack where the .fixup does a return, however there's already a ret right after the 10b label, so mark that as 11 and have the exception clear %esi to remove the .fixup. Suggested-by: Paolo Bonzini Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211110101325.722157053@infradead.org --- arch/x86/include/asm/extable_fixup_types.h | 1 + arch/x86/kvm/emulate.c | 16 ++++------------ 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h index a43b8c128e74..31ad42f1a76e 100644 --- a/arch/x86/include/asm/extable_fixup_types.h +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -46,5 +46,6 @@ #define EX_TYPE_IMM_REG 17 /* reg := (long)imm */ #define EX_TYPE_EFAULT_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(-EFAULT)) #define EX_TYPE_ZERO_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(0)) +#define EX_TYPE_ONE_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(1)) #endif diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b026350c04c7..1e19a4de441f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -315,7 +315,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); __FOP_FUNC(#name) #define __FOP_RET(name) \ - ASM_RET \ + "11: " ASM_RET \ ".size " name ", .-" name "\n\t" #define FOP_RET(name) \ @@ -344,7 +344,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); __FOP_RET(#op "_" #dst) #define FOP1EEX(op, dst) \ - FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception) + FOP1E(op, dst) _ASM_EXTABLE_TYPE_REG(10b, 11b, EX_TYPE_ZERO_REG, %%esi) #define FASTOP1(op) \ FOP_START(op) \ @@ -434,10 +434,6 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); #op " %al \n\t" \ __FOP_RET(#op) -asm(".pushsection .fixup, \"ax\"\n" - "kvm_fastop_exception: xor %esi, %esi; " ASM_RET - ".popsection"); - FOP_START(setcc) FOP_SETCC(seto) FOP_SETCC(setno) @@ -473,12 +469,8 @@ FOP_END; \ asm volatile("1:" insn "\n" \ "2:\n" \ - ".pushsection .fixup, \"ax\"\n" \ - "3: movl $1, %[_fault]\n" \ - " jmp 2b\n" \ - ".popsection\n" \ - _ASM_EXTABLE(1b, 3b) \ - : [_fault] "+qm"(_fault) inoutclob ); \ + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_ONE_REG, %[_fault]) \ + : [_fault] "+r"(_fault) inoutclob ); \ \ _fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \ }) -- cgit v1.2.3 From 3e8ea7803a1dedf19120a2fef12c590e90e4b469 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:17 +0100 Subject: x86/vmx: Remove .fixup usage In the vmread exceptin path, use the, thus far, unused output register to push the @fault argument onto the stack. This, in turn, enables the exception handler to not do pushes and only modify that register when an exception does occur. As noted by Sean the input constraint needs to be changed to "=&r" to avoid the value and field occupying the same register. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Paolo Bonzini Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211110101325.781308550@infradead.org --- arch/x86/kvm/vmx/vmx_ops.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index 9e9ef47e988c..35d9324c2f2a 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -80,9 +80,11 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field) * @field, and bounce through the trampoline to preserve * volatile registers. */ - "push $0\n\t" + "xorl %k1, %k1\n\t" + "2:\n\t" + "push %1\n\t" "push %2\n\t" - "2:call vmread_error_trampoline\n\t" + "call vmread_error_trampoline\n\t" /* * Unwind the stack. Note, the trampoline zeros out the @@ -93,13 +95,9 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field) "3:\n\t" /* VMREAD faulted. As above, except push '1' for @fault. */ - ".pushsection .fixup, \"ax\"\n\t" - "4: push $1\n\t" - "push %2\n\t" - "jmp 2b\n\t" - ".popsection\n\t" - _ASM_EXTABLE(1b, 4b) - : ASM_CALL_CONSTRAINT, "=r"(value) : "r"(field) : "cc"); + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_ONE_REG, %1) + + : ASM_CALL_CONSTRAINT, "=&r"(value) : "r"(field) : "cc"); return value; } -- cgit v1.2.3 From fedb24cda1ca5407e1965b261e349ea85d6c03dc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:19 +0100 Subject: x86/checksum_32: Remove .fixup usage Simply add EX_FLAG_CLEAR_AX to do as the .fixup used to do. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211110101325.899657959@infradead.org --- arch/x86/lib/checksum_32.S | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index 929ad1747dea..23318c338db0 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -260,9 +260,9 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst, * Copy from ds while checksumming, otherwise like csum_partial */ -#define EXC(y...) \ - 9999: y; \ - _ASM_EXTABLE_UA(9999b, 6001f) +#define EXC(y...) \ + 9999: y; \ + _ASM_EXTABLE_TYPE(9999b, 7f, EX_TYPE_UACCESS | EX_FLAG_CLEAR_AX) #ifndef CONFIG_X86_USE_PPRO_CHECKSUM @@ -358,15 +358,6 @@ EXC( movb %cl, (%edi) ) adcl $0, %eax 7: -# Exception handler: -.section .fixup, "ax" - -6001: - xorl %eax, %eax - jmp 7b - -.previous - popl %ebx popl %esi popl %edi @@ -439,10 +430,6 @@ EXC( movb %dl, (%edi) ) 6: addl %edx, %eax adcl $0, %eax 7: -.section .fixup, "ax" -6001: xorl %eax, %eax - jmp 7b -.previous popl %esi popl %edi -- cgit v1.2.3 From 5ce8e39f55521c762f0e6d1bba9597284b1f2e69 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:20 +0100 Subject: x86/sgx: Remove .fixup usage Create EX_TYPE_FAULT_SGX which does as EX_TYPE_FAULT does, except adds this extra bit that SGX really fancies having. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211110101325.961246679@infradead.org --- arch/x86/include/asm/extable_fixup_types.h | 2 ++ arch/x86/include/asm/sgx.h | 18 +++++++++++++++ arch/x86/kernel/cpu/sgx/encls.h | 36 +++++------------------------- arch/x86/mm/extable.c | 10 +++++++++ 4 files changed, 35 insertions(+), 31 deletions(-) diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h index 31ad42f1a76e..8278ed220c17 100644 --- a/arch/x86/include/asm/extable_fixup_types.h +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -48,4 +48,6 @@ #define EX_TYPE_ZERO_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(0)) #define EX_TYPE_ONE_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(1)) +#define EX_TYPE_FAULT_SGX 18 + #endif diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h index 05f3e21f01a7..3f9334ef67cd 100644 --- a/arch/x86/include/asm/sgx.h +++ b/arch/x86/include/asm/sgx.h @@ -45,6 +45,24 @@ enum sgx_encls_function { EMODT = 0x0F, }; +/** + * SGX_ENCLS_FAULT_FLAG - flag signifying an ENCLS return code is a trapnr + * + * ENCLS has its own (positive value) error codes and also generates + * ENCLS specific #GP and #PF faults. And the ENCLS values get munged + * with system error codes as everything percolates back up the stack. + * Unfortunately (for us), we need to precisely identify each unique + * error code, e.g. the action taken if EWB fails varies based on the + * type of fault and on the exact SGX error code, i.e. we can't simply + * convert all faults to -EFAULT. + * + * To make all three error types coexist, we set bit 30 to identify an + * ENCLS fault. Bit 31 (technically bits N:31) is used to differentiate + * between positive (faults and SGX error codes) and negative (system + * error codes) values. + */ +#define SGX_ENCLS_FAULT_FLAG 0x40000000 + /** * enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV * %SGX_NOT_TRACKED: Previous ETRACK's shootdown sequence has not diff --git a/arch/x86/kernel/cpu/sgx/encls.h b/arch/x86/kernel/cpu/sgx/encls.h index 9b204843b78d..fa04a73daf9c 100644 --- a/arch/x86/kernel/cpu/sgx/encls.h +++ b/arch/x86/kernel/cpu/sgx/encls.h @@ -11,26 +11,8 @@ #include #include "sgx.h" -/** - * ENCLS_FAULT_FLAG - flag signifying an ENCLS return code is a trapnr - * - * ENCLS has its own (positive value) error codes and also generates - * ENCLS specific #GP and #PF faults. And the ENCLS values get munged - * with system error codes as everything percolates back up the stack. - * Unfortunately (for us), we need to precisely identify each unique - * error code, e.g. the action taken if EWB fails varies based on the - * type of fault and on the exact SGX error code, i.e. we can't simply - * convert all faults to -EFAULT. - * - * To make all three error types coexist, we set bit 30 to identify an - * ENCLS fault. Bit 31 (technically bits N:31) is used to differentiate - * between positive (faults and SGX error codes) and negative (system - * error codes) values. - */ -#define ENCLS_FAULT_FLAG 0x40000000 - /* Retrieve the encoded trapnr from the specified return code. */ -#define ENCLS_TRAPNR(r) ((r) & ~ENCLS_FAULT_FLAG) +#define ENCLS_TRAPNR(r) ((r) & ~SGX_ENCLS_FAULT_FLAG) /* Issue a WARN() about an ENCLS function. */ #define ENCLS_WARN(r, name) { \ @@ -50,7 +32,7 @@ */ static inline bool encls_faulted(int ret) { - return ret & ENCLS_FAULT_FLAG; + return ret & SGX_ENCLS_FAULT_FLAG; } /** @@ -88,11 +70,7 @@ static inline bool encls_failed(int ret) asm volatile( \ "1: .byte 0x0f, 0x01, 0xcf;\n\t" \ "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: orl $"__stringify(ENCLS_FAULT_FLAG)",%%eax\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE_FAULT(1b, 3b) \ + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_SGX) \ : "=a"(ret) \ : "a"(rax), inputs \ : "memory", "cc"); \ @@ -127,7 +105,7 @@ static inline bool encls_failed(int ret) * * Return: * 0 on success, - * trapnr with ENCLS_FAULT_FLAG set on fault + * trapnr with SGX_ENCLS_FAULT_FLAG set on fault */ #define __encls_N(rax, rbx_out, inputs...) \ ({ \ @@ -136,11 +114,7 @@ static inline bool encls_failed(int ret) "1: .byte 0x0f, 0x01, 0xcf;\n\t" \ " xor %%eax,%%eax;\n" \ "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: orl $"__stringify(ENCLS_FAULT_FLAG)",%%eax\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE_FAULT(1b, 3b) \ + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_SGX) \ : "=a"(ret), "=b"(rbx_out) \ : "a"(rax), inputs \ : "memory"); \ diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 717cd35fe41c..c869f43e8a2e 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -10,6 +10,7 @@ #include #include #include +#include static inline unsigned long *pt_regs_nr(struct pt_regs *regs, int nr) { @@ -47,6 +48,13 @@ static bool ex_handler_fault(const struct exception_table_entry *fixup, return ex_handler_default(fixup, regs); } +static bool ex_handler_sgx(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) +{ + regs->ax = trapnr | SGX_ENCLS_FAULT_FLAG; + return ex_handler_default(fixup, regs); +} + /* * Handler for when we fail to restore a task's FPU state. We should never get * here because the FPU state of a task using the FPU (task->thread.fpu.state) @@ -207,6 +215,8 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, return ex_handler_pop_zero(e, regs); case EX_TYPE_IMM_REG: return ex_handler_imm_reg(e, regs, reg, imm); + case EX_TYPE_FAULT_SGX: + return ex_handler_sgx(e, regs, trapnr); } BUG(); } -- cgit v1.2.3 From 13e4bf1bddcb65dd028aaa492789e8d61efaafa1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:21 +0100 Subject: x86/usercopy_32: Simplify __copy_user_intel_nocache() Have an exception jump to a .fixup to only immediately jump out is daft, jump to the right place in one go. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211110101326.021517780@infradead.org --- arch/x86/lib/usercopy_32.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 962006bdb8a8..1eb15060c436 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -256,28 +256,28 @@ static unsigned long __copy_user_intel_nocache(void *to, "8:\n" ".section .fixup,\"ax\"\n" "9: lea 0(%%eax,%0,4),%0\n" - "16: jmp 8b\n" + " jmp 8b\n" ".previous\n" - _ASM_EXTABLE_UA(0b, 16b) - _ASM_EXTABLE_UA(1b, 16b) - _ASM_EXTABLE_UA(2b, 16b) - _ASM_EXTABLE_UA(21b, 16b) - _ASM_EXTABLE_UA(3b, 16b) - _ASM_EXTABLE_UA(31b, 16b) - _ASM_EXTABLE_UA(4b, 16b) - _ASM_EXTABLE_UA(41b, 16b) - _ASM_EXTABLE_UA(10b, 16b) - _ASM_EXTABLE_UA(51b, 16b) - _ASM_EXTABLE_UA(11b, 16b) - _ASM_EXTABLE_UA(61b, 16b) - _ASM_EXTABLE_UA(12b, 16b) - _ASM_EXTABLE_UA(71b, 16b) - _ASM_EXTABLE_UA(13b, 16b) - _ASM_EXTABLE_UA(81b, 16b) - _ASM_EXTABLE_UA(14b, 16b) - _ASM_EXTABLE_UA(91b, 16b) + _ASM_EXTABLE_UA(0b, 8b) + _ASM_EXTABLE_UA(1b, 8b) + _ASM_EXTABLE_UA(2b, 8b) + _ASM_EXTABLE_UA(21b, 8b) + _ASM_EXTABLE_UA(3b, 8b) + _ASM_EXTABLE_UA(31b, 8b) + _ASM_EXTABLE_UA(4b, 8b) + _ASM_EXTABLE_UA(41b, 8b) + _ASM_EXTABLE_UA(10b, 8b) + _ASM_EXTABLE_UA(51b, 8b) + _ASM_EXTABLE_UA(11b, 8b) + _ASM_EXTABLE_UA(61b, 8b) + _ASM_EXTABLE_UA(12b, 8b) + _ASM_EXTABLE_UA(71b, 8b) + _ASM_EXTABLE_UA(13b, 8b) + _ASM_EXTABLE_UA(81b, 8b) + _ASM_EXTABLE_UA(14b, 8b) + _ASM_EXTABLE_UA(91b, 8b) _ASM_EXTABLE_UA(6b, 9b) - _ASM_EXTABLE_UA(7b, 16b) + _ASM_EXTABLE_UA(7b, 8b) : "=&c"(size), "=&D" (d0), "=&S" (d1) : "1"(to), "2"(from), "0"(size) : "eax", "edx", "memory"); -- cgit v1.2.3 From d5d797dcbd781cb7c526ad32f31c7fd96babfdb2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:22 +0100 Subject: x86/usercopy: Remove .fixup usage Typically usercopy does whole word copies followed by a number of byte copies to finish the tail. This means that on exception it needs to compute the remaining length as: words*sizeof(long) + bytes. Create a new extable handler to do just this. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211110101326.081701085@infradead.org --- arch/x86/include/asm/extable_fixup_types.h | 5 +++++ arch/x86/lib/usercopy_32.c | 28 +++++----------------------- arch/x86/lib/usercopy_64.c | 8 +++----- arch/x86/mm/extable.c | 9 +++++++++ 4 files changed, 22 insertions(+), 28 deletions(-) diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h index 8278ed220c17..b5ab333e064a 100644 --- a/arch/x86/include/asm/extable_fixup_types.h +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -50,4 +50,9 @@ #define EX_TYPE_FAULT_SGX 18 +#define EX_TYPE_UCOPY_LEN 19 /* cx := reg + imm*cx */ +#define EX_TYPE_UCOPY_LEN1 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(1)) +#define EX_TYPE_UCOPY_LEN4 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(4)) +#define EX_TYPE_UCOPY_LEN8 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(8)) + #endif diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 1eb15060c436..422257c350c6 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -42,11 +42,7 @@ do { \ " movl %2,%0\n" \ "1: rep; stosb\n" \ "2: " ASM_CLAC "\n" \ - ".section .fixup,\"ax\"\n" \ - "3: lea 0(%2,%0,4),%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE_UA(0b, 3b) \ + _ASM_EXTABLE_TYPE_REG(0b, 2b, EX_TYPE_UCOPY_LEN4, %2) \ _ASM_EXTABLE_UA(1b, 2b) \ : "=&c"(size), "=&D" (__d0) \ : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \ @@ -148,10 +144,6 @@ __copy_user_intel(void __user *to, const void *from, unsigned long size) "36: movl %%eax, %0\n" "37: rep; movsb\n" "100:\n" - ".section .fixup,\"ax\"\n" - "101: lea 0(%%eax,%0,4),%0\n" - " jmp 100b\n" - ".previous\n" _ASM_EXTABLE_UA(1b, 100b) _ASM_EXTABLE_UA(2b, 100b) _ASM_EXTABLE_UA(3b, 100b) @@ -189,7 +181,7 @@ __copy_user_intel(void __user *to, const void *from, unsigned long size) _ASM_EXTABLE_UA(35b, 100b) _ASM_EXTABLE_UA(36b, 100b) _ASM_EXTABLE_UA(37b, 100b) - _ASM_EXTABLE_UA(99b, 101b) + _ASM_EXTABLE_TYPE_REG(99b, 100b, EX_TYPE_UCOPY_LEN4, %%eax) : "=&c"(size), "=&D" (d0), "=&S" (d1) : "1"(to), "2"(from), "0"(size) : "eax", "edx", "memory"); @@ -254,10 +246,6 @@ static unsigned long __copy_user_intel_nocache(void *to, " movl %%eax,%0\n" "7: rep; movsb\n" "8:\n" - ".section .fixup,\"ax\"\n" - "9: lea 0(%%eax,%0,4),%0\n" - " jmp 8b\n" - ".previous\n" _ASM_EXTABLE_UA(0b, 8b) _ASM_EXTABLE_UA(1b, 8b) _ASM_EXTABLE_UA(2b, 8b) @@ -276,7 +264,7 @@ static unsigned long __copy_user_intel_nocache(void *to, _ASM_EXTABLE_UA(81b, 8b) _ASM_EXTABLE_UA(14b, 8b) _ASM_EXTABLE_UA(91b, 8b) - _ASM_EXTABLE_UA(6b, 9b) + _ASM_EXTABLE_TYPE_REG(6b, 8b, EX_TYPE_UCOPY_LEN4, %%eax) _ASM_EXTABLE_UA(7b, 8b) : "=&c"(size), "=&D" (d0), "=&S" (d1) : "1"(to), "2"(from), "0"(size) @@ -314,14 +302,8 @@ do { \ " movl %3,%0\n" \ "1: rep; movsb\n" \ "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "5: addl %3,%0\n" \ - " jmp 2b\n" \ - "3: lea 0(%3,%0,4),%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE_UA(4b, 5b) \ - _ASM_EXTABLE_UA(0b, 3b) \ + _ASM_EXTABLE_TYPE_REG(4b, 2b, EX_TYPE_UCOPY_LEN1, %3) \ + _ASM_EXTABLE_TYPE_REG(0b, 2b, EX_TYPE_UCOPY_LEN4, %3) \ _ASM_EXTABLE_UA(1b, 2b) \ : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ : "3"(size), "0"(size), "1"(to), "2"(from) \ diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 508c81e97ab1..0402a749f3a0 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -35,12 +35,10 @@ unsigned long __clear_user(void __user *addr, unsigned long size) " incq %[dst]\n" " decl %%ecx ; jnz 1b\n" "2:\n" - ".section .fixup,\"ax\"\n" - "3: lea 0(%[size1],%[size8],8),%[size8]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE_UA(0b, 3b) + + _ASM_EXTABLE_TYPE_REG(0b, 2b, EX_TYPE_UCOPY_LEN8, %[size1]) _ASM_EXTABLE_UA(1b, 2b) + : [size8] "=&c"(size), [dst] "=&D" (__d0) : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr)); clac(); diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index c869f43e8a2e..41eaa648349e 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -145,6 +145,13 @@ static bool ex_handler_imm_reg(const struct exception_table_entry *fixup, return ex_handler_default(fixup, regs); } +static bool ex_handler_ucopy_len(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr, int reg, int imm) +{ + regs->cx = imm * regs->cx + *pt_regs_nr(regs, reg); + return ex_handler_uaccess(fixup, regs, trapnr); +} + int ex_get_fixup_type(unsigned long ip) { const struct exception_table_entry *e = search_exception_tables(ip); @@ -217,6 +224,8 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, return ex_handler_imm_reg(e, regs, reg, imm); case EX_TYPE_FAULT_SGX: return ex_handler_sgx(e, regs, trapnr); + case EX_TYPE_UCOPY_LEN: + return ex_handler_ucopy_len(e, regs, trapnr, reg, imm); } BUG(); } -- cgit v1.2.3 From b7760780257354bb14de62abed868405b844fa13 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:23 +0100 Subject: x86/word-at-a-time: Remove .fixup usage Rewrite load_unaligned_zeropad() to not require .fixup text. This is easiest done using asm-goto-output, where we can stick a C label in the exception table entry. The fallback version isn't nearly so nice but should work. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211110101326.141775772@infradead.org --- arch/x86/include/asm/word-at-a-time.h | 66 +++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h index 06006b0351f3..8338b0432b50 100644 --- a/arch/x86/include/asm/word-at-a-time.h +++ b/arch/x86/include/asm/word-at-a-time.h @@ -77,30 +77,58 @@ static inline unsigned long find_zero(unsigned long mask) * and the next page not being mapped, take the exception and * return zeroes in the non-existing part. */ +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT + static inline unsigned long load_unaligned_zeropad(const void *addr) { - unsigned long ret, dummy; + unsigned long offset, data; + unsigned long ret; + + asm_volatile_goto( + "1: mov %[mem], %[ret]\n" + + _ASM_EXTABLE(1b, %l[do_exception]) + + : [ret] "=r" (ret) + : [mem] "m" (*(unsigned long *)addr) + : : do_exception); + + return ret; + +do_exception: + offset = (unsigned long)addr & (sizeof(long) - 1); + addr = (void *)((unsigned long)addr & ~(sizeof(long) - 1)); + data = *(unsigned long *)addr; + ret = data >> offset * 8; + + return ret; +} - asm( - "1:\tmov %2,%0\n" +#else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + +static inline unsigned long load_unaligned_zeropad(const void *addr) +{ + unsigned long offset, data; + unsigned long ret, err = 0; + + asm( "1: mov %[mem], %[ret]\n" "2:\n" - ".section .fixup,\"ax\"\n" - "3:\t" - "lea %2,%1\n\t" - "and %3,%1\n\t" - "mov (%1),%0\n\t" - "leal %2,%%ecx\n\t" - "andl %4,%%ecx\n\t" - "shll $3,%%ecx\n\t" - "shr %%cl,%0\n\t" - "jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - :"=&r" (ret),"=&c" (dummy) - :"m" (*(unsigned long *)addr), - "i" (-sizeof(unsigned long)), - "i" (sizeof(unsigned long)-1)); + + _ASM_EXTABLE_FAULT(1b, 2b) + + : [ret] "=&r" (ret), "+a" (err) + : [mem] "m" (*(unsigned long *)addr)); + + if (unlikely(err)) { + offset = (unsigned long)addr & (sizeof(long) - 1); + addr = (void *)((unsigned long)addr & ~(sizeof(long) - 1)); + data = *(unsigned long *)addr; + ret = data >> offset * 8; + } + return ret; } +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + #endif /* _ASM_WORD_AT_A_TIME_H */ -- cgit v1.2.3 From e5eefda5aa51f3178821b58806e1dddd798c0934 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:24 +0100 Subject: x86: Remove .fixup section No moar users, kill it dead. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211110101326.201590122@infradead.org --- arch/x86/entry/vdso/vdso-layout.lds.S | 1 - arch/x86/kernel/vmlinux.lds.S | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S index dc8da7695859..bafa73f09e92 100644 --- a/arch/x86/entry/vdso/vdso-layout.lds.S +++ b/arch/x86/entry/vdso/vdso-layout.lds.S @@ -77,7 +77,6 @@ SECTIONS .text : { *(.text*) - *(.fixup) } :text =0x90909090, diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 3d6dc12d198f..27f830345b6f 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -137,7 +137,6 @@ SECTIONS ALIGN_ENTRY_TEXT_END SOFTIRQENTRY_TEXT STATIC_CALL_TEXT - *(.fixup) *(.gnu.warning) #ifdef CONFIG_RETPOLINE -- cgit v1.2.3 From 82a8954acd93ae95d6252fb93a3d210c8f71b093 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:25 +0100 Subject: objtool: Remove .fixup handling The .fixup has gone the way of the Dodo, that test will always be false. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211110101326.261496792@infradead.org --- tools/objtool/check.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index e28172f6e792..9fd81490ff07 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3310,14 +3310,10 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio return true; /* - * Ignore any unused exceptions. This can happen when a whitelisted - * function has an exception table entry. - * - * Also ignore alternative replacement instructions. This can happen + * Ignore alternative replacement instructions. This can happen * when a whitelisted function uses one of the ALTERNATIVE macros. */ - if (!strcmp(insn->sec->name, ".fixup") || - !strcmp(insn->sec->name, ".altinstr_replacement") || + if (!strcmp(insn->sec->name, ".altinstr_replacement") || !strcmp(insn->sec->name, ".altinstr_aux")) return true; -- cgit v1.2.3 From 9cdbeec4096804083944d05da96bbaf59a1eb4f9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 11 Jan 2022 12:11:14 +0100 Subject: x86/entry_32: Fix segment exceptions The LKP robot reported that commit in Fixes: caused a failure. Turns out the ldt_gdt_32 selftest turns into an infinite loop trying to clear the segment. As discovered by Sean, what happens is that PARANOID_EXIT_TO_KERNEL_MODE in the handle_exception_return path overwrites the entry stack data with the task stack data, restoring the "bad" segment value. Instead of having the exception retry the instruction, have it emulate the full instruction. Replace EX_TYPE_POP_ZERO with EX_TYPE_POP_REG which will do the equivalent of: POP %reg; MOV $imm, %reg. In order to encode the segment registers, add them as registers 8-11 for 32-bit. By setting regs->[defg]s the (nested) RESTORE_REGS will pop this value at the end of the exception handler and by increasing regs->sp, it will have skipped the stack slot. This was debugged by Sean Christopherson . [ bp: Add EX_REG_GS too. ] Fixes: aa93e2ad7464 ("x86/entry_32: Remove .fixup usage") Reported-by: kernel test robot Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/Yd1l0gInc4zRcnt/@hirez.programming.kicks-ass.net --- arch/x86/entry/entry_32.S | 13 +++++++++---- arch/x86/include/asm/extable_fixup_types.h | 11 ++++++++++- arch/x86/lib/insn-eval.c | 5 +++++ arch/x86/mm/extable.c | 17 +++-------------- 4 files changed, 27 insertions(+), 19 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index e0a95d8a6553..a7ec22b1d06c 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -268,11 +268,16 @@ 1: popl %ds 2: popl %es 3: popl %fs - addl $(4 + \pop), %esp /* pop the unused "gs" slot */ +4: addl $(4 + \pop), %esp /* pop the unused "gs" slot */ IRET_FRAME - _ASM_EXTABLE_TYPE(1b, 1b, EX_TYPE_POP_ZERO) - _ASM_EXTABLE_TYPE(2b, 2b, EX_TYPE_POP_ZERO) - _ASM_EXTABLE_TYPE(3b, 3b, EX_TYPE_POP_ZERO) + + /* + * There is no _ASM_EXTABLE_TYPE_REG() for ASM, however since this is + * ASM the registers are known and we can trivially hard-code them. + */ + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_POP_ZERO|EX_REG_DS) + _ASM_EXTABLE_TYPE(2b, 3b, EX_TYPE_POP_ZERO|EX_REG_ES) + _ASM_EXTABLE_TYPE(3b, 4b, EX_TYPE_POP_ZERO|EX_REG_FS) .endm .macro RESTORE_ALL_NMI cr3_reg:req pop=0 diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h index b5ab333e064a..503622627400 100644 --- a/arch/x86/include/asm/extable_fixup_types.h +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -16,9 +16,16 @@ #define EX_DATA_FLAG_SHIFT 12 #define EX_DATA_IMM_SHIFT 16 +#define EX_DATA_REG(reg) ((reg) << EX_DATA_REG_SHIFT) #define EX_DATA_FLAG(flag) ((flag) << EX_DATA_FLAG_SHIFT) #define EX_DATA_IMM(imm) ((imm) << EX_DATA_IMM_SHIFT) +/* segment regs */ +#define EX_REG_DS EX_DATA_REG(8) +#define EX_REG_ES EX_DATA_REG(9) +#define EX_REG_FS EX_DATA_REG(10) +#define EX_REG_GS EX_DATA_REG(11) + /* flags */ #define EX_FLAG_CLEAR_AX EX_DATA_FLAG(1) #define EX_FLAG_CLEAR_DX EX_DATA_FLAG(2) @@ -41,7 +48,9 @@ #define EX_TYPE_RDMSR_IN_MCE 13 #define EX_TYPE_DEFAULT_MCE_SAFE 14 #define EX_TYPE_FAULT_MCE_SAFE 15 -#define EX_TYPE_POP_ZERO 16 + +#define EX_TYPE_POP_REG 16 /* sp += sizeof(long) */ +#define EX_TYPE_POP_ZERO (EX_TYPE_POP_REG | EX_DATA_IMM(0)) #define EX_TYPE_IMM_REG 17 /* reg := (long)imm */ #define EX_TYPE_EFAULT_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(-EFAULT)) diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 7760d228041b..c8a962c2e653 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -430,6 +430,11 @@ static const int pt_regoff[] = { offsetof(struct pt_regs, r13), offsetof(struct pt_regs, r14), offsetof(struct pt_regs, r15), +#else + offsetof(struct pt_regs, ds), + offsetof(struct pt_regs, es), + offsetof(struct pt_regs, fs), + offsetof(struct pt_regs, gs), #endif }; diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 41eaa648349e..dba2197c05c3 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -126,18 +126,6 @@ static bool ex_handler_clear_fs(const struct exception_table_entry *fixup, return ex_handler_default(fixup, regs); } -static bool ex_handler_pop_zero(const struct exception_table_entry *fixup, - struct pt_regs *regs) -{ - /* - * Typically used for when "pop %seg" traps, in which case we'll clear - * the stack slot and re-try the instruction, which will then succeed - * to pop zero. - */ - *((unsigned long *)regs->sp) = 0; - return ex_handler_default(fixup, regs); -} - static bool ex_handler_imm_reg(const struct exception_table_entry *fixup, struct pt_regs *regs, int reg, int imm) { @@ -218,8 +206,9 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, case EX_TYPE_RDMSR_IN_MCE: ex_handler_msr_mce(regs, false); break; - case EX_TYPE_POP_ZERO: - return ex_handler_pop_zero(e, regs); + case EX_TYPE_POP_REG: + regs->sp += sizeof(long); + fallthrough; case EX_TYPE_IMM_REG: return ex_handler_imm_reg(e, regs, reg, imm); case EX_TYPE_FAULT_SGX: -- cgit v1.2.3