summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-04-28 12:53:24 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-04-28 12:53:24 -0700
commit03b2cd72aad1103127282f39c614e4722e5d9e8f (patch)
tree32592bdc0cded33751b992d13ce29e0abb9cc441 /arch
parent0ff0edb550e256597e505eff308f90d9a0b6677c (diff)
parent7d3d10e0e85fb7c23a86a70f795b1eabd2bc030b (diff)
Merge tag 'objtool-core-2021-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull objtool updates from Ingo Molnar: - Standardize the crypto asm code so that it looks like compiler- generated code to objtool - so that it can understand it. This enables unwinding from crypto asm code - and also fixes the last known remaining objtool warnings for LTO and more. - x86 decoder fixes: clean up and fix the decoder, and also extend it a bit - Misc fixes and cleanups * tag 'objtool-core-2021-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits) x86/crypto: Enable objtool in crypto code x86/crypto/sha512-ssse3: Standardize stack alignment prologue x86/crypto/sha512-avx2: Standardize stack alignment prologue x86/crypto/sha512-avx: Standardize stack alignment prologue x86/crypto/sha256-avx2: Standardize stack alignment prologue x86/crypto/sha1_avx2: Standardize stack alignment prologue x86/crypto/sha_ni: Standardize stack alignment prologue x86/crypto/crc32c-pcl-intel: Standardize jump table x86/crypto/camellia-aesni-avx2: Unconditionally allocate stack buffer x86/crypto/aesni-intel_avx: Standardize stack alignment prologue x86/crypto/aesni-intel_avx: Fix register usage comments x86/crypto/aesni-intel_avx: Remove unused macros objtool: Support asm jump tables objtool: Parse options from OBJTOOL_ARGS objtool: Collate parse_options() users objtool: Add --backup objtool,x86: More ModRM sugar objtool,x86: Rewrite ADD/SUB/AND objtool,x86: Support %riz encodings objtool,x86: Simplify register decode ...
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/crypto/Makefile2
-rw-r--r--arch/x86/crypto/aesni-intel_avx-x86_64.S28
-rw-r--r--arch/x86/crypto/camellia-aesni-avx2-asm_64.S5
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S7
-rw-r--r--arch/x86/crypto/sha1_avx2_x86_64_asm.S8
-rw-r--r--arch/x86/crypto/sha1_ni_asm.S8
-rw-r--r--arch/x86/crypto/sha256-avx2-asm.S13
-rw-r--r--arch/x86/crypto/sha512-avx-asm.S41
-rw-r--r--arch/x86/crypto/sha512-avx2-asm.S42
-rw-r--r--arch/x86/crypto/sha512-ssse3-asm.S41
10 files changed, 85 insertions, 110 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index b28e36b7c96b..d0959e7b809f 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -2,8 +2,6 @@
#
# x86 crypto algorithms
-OBJECT_FILES_NON_STANDARD := y
-
obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S
index 2cf8e94d986a..98e3552b6e03 100644
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -212,10 +212,6 @@ HashKey_8_k = 16*21 # store XOR of HashKey^8 <<1 mod poly here (for Karatsu
#define arg4 %rcx
#define arg5 %r8
#define arg6 %r9
-#define arg7 STACK_OFFSET+8*1(%r14)
-#define arg8 STACK_OFFSET+8*2(%r14)
-#define arg9 STACK_OFFSET+8*3(%r14)
-#define arg10 STACK_OFFSET+8*4(%r14)
#define keysize 2*15*16(arg1)
i = 0
@@ -237,9 +233,6 @@ define_reg j %j
.noaltmacro
.endm
-# need to push 4 registers into stack to maintain
-STACK_OFFSET = 8*4
-
TMP1 = 16*0 # Temporary storage for AAD
TMP2 = 16*1 # Temporary storage for AES State 2 (State 1 is stored in an XMM register)
TMP3 = 16*2 # Temporary storage for AES State 3
@@ -256,25 +249,22 @@ VARIABLE_OFFSET = 16*8
################################
.macro FUNC_SAVE
- #the number of pushes must equal STACK_OFFSET
push %r12
push %r13
- push %r14
push %r15
- mov %rsp, %r14
-
-
+ push %rbp
+ mov %rsp, %rbp
sub $VARIABLE_OFFSET, %rsp
and $~63, %rsp # align rsp to 64 bytes
.endm
.macro FUNC_RESTORE
- mov %r14, %rsp
+ mov %rbp, %rsp
+ pop %rbp
pop %r15
- pop %r14
pop %r13
pop %r12
.endm
@@ -294,7 +284,7 @@ VARIABLE_OFFSET = 16*8
# combined for GCM encrypt and decrypt functions
# clobbering all xmm registers
-# clobbering r10, r11, r12, r13, r14, r15
+# clobbering r10, r11, r12, r13, r15, rax
.macro GCM_ENC_DEC INITIAL_BLOCKS GHASH_8_ENCRYPT_8_PARALLEL GHASH_LAST_8 GHASH_MUL ENC_DEC REP
vmovdqu AadHash(arg2), %xmm8
vmovdqu HashKey(arg2), %xmm13 # xmm13 = HashKey
@@ -996,7 +986,7 @@ _partial_block_done_\@:
## num_initial_blocks = b mod 4#
## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext
## r10, r11, r12, rax are clobbered
-## arg1, arg3, arg4, r14 are used as a pointer only, not modified
+## arg1, arg2, arg3, arg4 are used as pointers only, not modified
.macro INITIAL_BLOCKS_AVX REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC
i = (8-\num_initial_blocks)
@@ -1231,7 +1221,7 @@ _initial_blocks_done\@:
# encrypt 8 blocks at a time
# ghash the 8 previously encrypted ciphertext blocks
-# arg1, arg3, arg4 are used as pointers only, not modified
+# arg1, arg2, arg3, arg4 are used as pointers only, not modified
# r11 is the data offset value
.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX REP T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC
@@ -1944,7 +1934,7 @@ SYM_FUNC_END(aesni_gcm_finalize_avx_gen2)
## num_initial_blocks = b mod 4#
## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext
## r10, r11, r12, rax are clobbered
-## arg1, arg3, arg4, r14 are used as a pointer only, not modified
+## arg1, arg2, arg3, arg4 are used as pointers only, not modified
.macro INITIAL_BLOCKS_AVX2 REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER
i = (8-\num_initial_blocks)
@@ -2186,7 +2176,7 @@ _initial_blocks_done\@:
# encrypt 8 blocks at a time
# ghash the 8 previously encrypted ciphertext blocks
-# arg1, arg3, arg4 are used as pointers only, not modified
+# arg1, arg2, arg3, arg4 are used as pointers only, not modified
# r11 is the data offset value
.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX2 REP T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index 782e9712a1ec..706f70829a07 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -990,6 +990,7 @@ SYM_FUNC_START(camellia_cbc_dec_32way)
* %rdx: src (32 blocks)
*/
FRAME_BEGIN
+ subq $(16 * 32), %rsp;
vzeroupper;
@@ -1002,7 +1003,6 @@ SYM_FUNC_START(camellia_cbc_dec_32way)
%ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
%ymm15, %rdx, (key_table)(CTX, %r8, 8));
- movq %rsp, %r10;
cmpq %rsi, %rdx;
je .Lcbc_dec_use_stack;
@@ -1015,7 +1015,6 @@ SYM_FUNC_START(camellia_cbc_dec_32way)
* dst still in-use (because dst == src), so use stack for temporary
* storage.
*/
- subq $(16 * 32), %rsp;
movq %rsp, %rax;
.Lcbc_dec_continue:
@@ -1025,7 +1024,6 @@ SYM_FUNC_START(camellia_cbc_dec_32way)
vpxor %ymm7, %ymm7, %ymm7;
vinserti128 $1, (%rdx), %ymm7, %ymm7;
vpxor (%rax), %ymm7, %ymm7;
- movq %r10, %rsp;
vpxor (0 * 32 + 16)(%rdx), %ymm6, %ymm6;
vpxor (1 * 32 + 16)(%rdx), %ymm5, %ymm5;
vpxor (2 * 32 + 16)(%rdx), %ymm4, %ymm4;
@@ -1047,6 +1045,7 @@ SYM_FUNC_START(camellia_cbc_dec_32way)
vzeroupper;
+ addq $(16 * 32), %rsp;
FRAME_END
ret;
SYM_FUNC_END(camellia_cbc_dec_32way)
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 884dc767b051..ac1f303eed0f 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -53,7 +53,7 @@
.endm
.macro JMPTBL_ENTRY i
-.word crc_\i - crc_array
+.quad crc_\i
.endm
.macro JNC_LESS_THAN j
@@ -168,10 +168,7 @@ continue_block:
xor crc2, crc2
## branch into array
- lea jump_table(%rip), %bufp
- movzwq (%bufp, %rax, 2), len
- lea crc_array(%rip), %bufp
- lea (%bufp, len, 1), %bufp
+ mov jump_table(,%rax,8), %bufp
JMP_NOSPEC bufp
################################################################
diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
index 1e594d60afa5..5eed620f4676 100644
--- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
+++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
@@ -645,9 +645,9 @@ _loop3:
RESERVE_STACK = (W_SIZE*4 + 8+24)
/* Align stack */
- mov %rsp, %rbx
+ push %rbp
+ mov %rsp, %rbp
and $~(0x20-1), %rsp
- push %rbx
sub $RESERVE_STACK, %rsp
avx2_zeroupper
@@ -665,8 +665,8 @@ _loop3:
avx2_zeroupper
- add $RESERVE_STACK, %rsp
- pop %rsp
+ mov %rbp, %rsp
+ pop %rbp
pop %r15
pop %r14
diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S
index 11efe3a45a1f..5d8415f482bd 100644
--- a/arch/x86/crypto/sha1_ni_asm.S
+++ b/arch/x86/crypto/sha1_ni_asm.S
@@ -59,8 +59,6 @@
#define DATA_PTR %rsi /* 2nd arg */
#define NUM_BLKS %rdx /* 3rd arg */
-#define RSPSAVE %rax
-
/* gcc conversion */
#define FRAME_SIZE 32 /* space for 2x16 bytes */
@@ -96,7 +94,8 @@
.text
.align 32
SYM_FUNC_START(sha1_ni_transform)
- mov %rsp, RSPSAVE
+ push %rbp
+ mov %rsp, %rbp
sub $FRAME_SIZE, %rsp
and $~0xF, %rsp
@@ -288,7 +287,8 @@ SYM_FUNC_START(sha1_ni_transform)
pextrd $3, E0, 1*16(DIGEST_PTR)
.Ldone_hash:
- mov RSPSAVE, %rsp
+ mov %rbp, %rsp
+ pop %rbp
ret
SYM_FUNC_END(sha1_ni_transform)
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
index 11ff60c29c8b..4087f7432a7e 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -117,15 +117,13 @@ _XMM_SAVE_SIZE = 0
_INP_END_SIZE = 8
_INP_SIZE = 8
_CTX_SIZE = 8
-_RSP_SIZE = 8
_XFER = 0
_XMM_SAVE = _XFER + _XFER_SIZE
_INP_END = _XMM_SAVE + _XMM_SAVE_SIZE
_INP = _INP_END + _INP_END_SIZE
_CTX = _INP + _INP_SIZE
-_RSP = _CTX + _CTX_SIZE
-STACK_SIZE = _RSP + _RSP_SIZE
+STACK_SIZE = _CTX + _CTX_SIZE
# rotate_Xs
# Rotate values of symbols X0...X3
@@ -533,11 +531,11 @@ SYM_FUNC_START(sha256_transform_rorx)
pushq %r14
pushq %r15
- mov %rsp, %rax
+ push %rbp
+ mov %rsp, %rbp
+
subq $STACK_SIZE, %rsp
and $-32, %rsp # align rsp to 32 byte boundary
- mov %rax, _RSP(%rsp)
-
shl $6, NUM_BLKS # convert to bytes
jz done_hash
@@ -704,7 +702,8 @@ only_one_block:
done_hash:
- mov _RSP(%rsp), %rsp
+ mov %rbp, %rsp
+ pop %rbp
popq %r15
popq %r14
diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S
index 684d58c8bc4f..3d8f0fd4eea8 100644
--- a/arch/x86/crypto/sha512-avx-asm.S
+++ b/arch/x86/crypto/sha512-avx-asm.S
@@ -76,14 +76,10 @@ tmp0 = %rax
W_SIZE = 80*8
# W[t] + K[t] | W[t+1] + K[t+1]
WK_SIZE = 2*8
-RSPSAVE_SIZE = 1*8
-GPRSAVE_SIZE = 5*8
frame_W = 0
frame_WK = frame_W + W_SIZE
-frame_RSPSAVE = frame_WK + WK_SIZE
-frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
-frame_size = frame_GPRSAVE + GPRSAVE_SIZE
+frame_size = frame_WK + WK_SIZE
# Useful QWORD "arrays" for simpler memory references
# MSG, DIGEST, K_t, W_t are arrays
@@ -281,18 +277,18 @@ SYM_FUNC_START(sha512_transform_avx)
test msglen, msglen
je nowork
+ # Save GPRs
+ push %rbx
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
# Allocate Stack Space
- mov %rsp, %rax
+ push %rbp
+ mov %rsp, %rbp
sub $frame_size, %rsp
and $~(0x20 - 1), %rsp
- mov %rax, frame_RSPSAVE(%rsp)
-
- # Save GPRs
- mov %rbx, frame_GPRSAVE(%rsp)
- mov %r12, frame_GPRSAVE +8*1(%rsp)
- mov %r13, frame_GPRSAVE +8*2(%rsp)
- mov %r14, frame_GPRSAVE +8*3(%rsp)
- mov %r15, frame_GPRSAVE +8*4(%rsp)
updateblock:
@@ -353,15 +349,16 @@ updateblock:
dec msglen
jnz updateblock
- # Restore GPRs
- mov frame_GPRSAVE(%rsp), %rbx
- mov frame_GPRSAVE +8*1(%rsp), %r12
- mov frame_GPRSAVE +8*2(%rsp), %r13
- mov frame_GPRSAVE +8*3(%rsp), %r14
- mov frame_GPRSAVE +8*4(%rsp), %r15
-
# Restore Stack Pointer
- mov frame_RSPSAVE(%rsp), %rsp
+ mov %rbp, %rsp
+ pop %rbp
+
+ # Restore GPRs
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbx
nowork:
ret
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S
index 3a44bdcfd583..072cb0f0deae 100644
--- a/arch/x86/crypto/sha512-avx2-asm.S
+++ b/arch/x86/crypto/sha512-avx2-asm.S
@@ -102,17 +102,13 @@ SRND_SIZE = 1*8
INP_SIZE = 1*8
INPEND_SIZE = 1*8
CTX_SIZE = 1*8
-RSPSAVE_SIZE = 1*8
-GPRSAVE_SIZE = 5*8
frame_XFER = 0
frame_SRND = frame_XFER + XFER_SIZE
frame_INP = frame_SRND + SRND_SIZE
frame_INPEND = frame_INP + INP_SIZE
frame_CTX = frame_INPEND + INPEND_SIZE
-frame_RSPSAVE = frame_CTX + CTX_SIZE
-frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
-frame_size = frame_GPRSAVE + GPRSAVE_SIZE
+frame_size = frame_CTX + CTX_SIZE
## assume buffers not aligned
#define VMOVDQ vmovdqu
@@ -570,18 +566,18 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
# "blocks" is the message length in SHA512 blocks
########################################################################
SYM_FUNC_START(sha512_transform_rorx)
+ # Save GPRs
+ push %rbx
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
# Allocate Stack Space
- mov %rsp, %rax
+ push %rbp
+ mov %rsp, %rbp
sub $frame_size, %rsp
and $~(0x20 - 1), %rsp
- mov %rax, frame_RSPSAVE(%rsp)
-
- # Save GPRs
- mov %rbx, 8*0+frame_GPRSAVE(%rsp)
- mov %r12, 8*1+frame_GPRSAVE(%rsp)
- mov %r13, 8*2+frame_GPRSAVE(%rsp)
- mov %r14, 8*3+frame_GPRSAVE(%rsp)
- mov %r15, 8*4+frame_GPRSAVE(%rsp)
shl $7, NUM_BLKS # convert to bytes
jz done_hash
@@ -672,15 +668,17 @@ loop2:
done_hash:
-# Restore GPRs
- mov 8*0+frame_GPRSAVE(%rsp), %rbx
- mov 8*1+frame_GPRSAVE(%rsp), %r12
- mov 8*2+frame_GPRSAVE(%rsp), %r13
- mov 8*3+frame_GPRSAVE(%rsp), %r14
- mov 8*4+frame_GPRSAVE(%rsp), %r15
-
# Restore Stack Pointer
- mov frame_RSPSAVE(%rsp), %rsp
+ mov %rbp, %rsp
+ pop %rbp
+
+ # Restore GPRs
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbx
+
ret
SYM_FUNC_END(sha512_transform_rorx)
diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S
index 50812af0b083..bd51c9070bed 100644
--- a/arch/x86/crypto/sha512-ssse3-asm.S
+++ b/arch/x86/crypto/sha512-ssse3-asm.S
@@ -74,14 +74,10 @@ tmp0 = %rax
W_SIZE = 80*8
WK_SIZE = 2*8
-RSPSAVE_SIZE = 1*8
-GPRSAVE_SIZE = 5*8
frame_W = 0
frame_WK = frame_W + W_SIZE
-frame_RSPSAVE = frame_WK + WK_SIZE
-frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
-frame_size = frame_GPRSAVE + GPRSAVE_SIZE
+frame_size = frame_WK + WK_SIZE
# Useful QWORD "arrays" for simpler memory references
# MSG, DIGEST, K_t, W_t are arrays
@@ -283,18 +279,18 @@ SYM_FUNC_START(sha512_transform_ssse3)
test msglen, msglen
je nowork
+ # Save GPRs
+ push %rbx
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
# Allocate Stack Space
- mov %rsp, %rax
+ push %rbp
+ mov %rsp, %rbp
sub $frame_size, %rsp
and $~(0x20 - 1), %rsp
- mov %rax, frame_RSPSAVE(%rsp)
-
- # Save GPRs
- mov %rbx, frame_GPRSAVE(%rsp)
- mov %r12, frame_GPRSAVE +8*1(%rsp)
- mov %r13, frame_GPRSAVE +8*2(%rsp)
- mov %r14, frame_GPRSAVE +8*3(%rsp)
- mov %r15, frame_GPRSAVE +8*4(%rsp)
updateblock:
@@ -355,15 +351,16 @@ updateblock:
dec msglen
jnz updateblock
- # Restore GPRs
- mov frame_GPRSAVE(%rsp), %rbx
- mov frame_GPRSAVE +8*1(%rsp), %r12
- mov frame_GPRSAVE +8*2(%rsp), %r13
- mov frame_GPRSAVE +8*3(%rsp), %r14
- mov frame_GPRSAVE +8*4(%rsp), %r15
-
# Restore Stack Pointer
- mov frame_RSPSAVE(%rsp), %rsp
+ mov %rbp, %rsp
+ pop %rbp
+
+ # Restore GPRs
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbx
nowork:
ret