From 9b7fb990e080f3a7c5dff9a829d11247e629b98f Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 23 Jul 2012 17:20:28 +0200 Subject: s390/dis: Instruction decoding interface Provide a new function, insn_to_mnemonic, by which e.g. kvm can obtain a human-readable decoding of an instruction's opcode. Reviewed-by: Christian Borntraeger Reviewed-by: Heiko Carstens Signed-off-by: Cornelia Huck Signed-off-by: Avi Kivity --- arch/s390/include/asm/processor.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/s390/include/asm/processor.h') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index c40fa91e38a8..31feac630544 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -138,6 +138,7 @@ extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); extern unsigned long thread_saved_pc(struct task_struct *t); extern void show_code(struct pt_regs *regs); +extern int insn_to_mnemonic(unsigned char *instruction, char buf[8]); unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(tsk) ((struct pt_regs *) \ -- cgit v1.2.3 From c10302efe569bfd646b4c22df29577a4595b4580 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 31 Jul 2012 16:23:59 +0200 Subject: s390/bpf,jit: BPF Just In Time compiler for s390 The s390 implementation of the JIT compiler for packet filter speedup. Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kbuild | 1 + arch/s390/Kconfig | 1 + arch/s390/include/asm/processor.h | 1 + arch/s390/kernel/dis.c | 23 ++ arch/s390/net/Makefile | 4 + arch/s390/net/bpf_jit.S | 130 +++++++ arch/s390/net/bpf_jit_comp.c | 738 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 898 insertions(+) create mode 100644 arch/s390/net/Makefile create mode 100644 arch/s390/net/bpf_jit.S create mode 100644 arch/s390/net/bpf_jit_comp.c (limited to 'arch/s390/include/asm/processor.h') diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild index 9858476fa0fe..cc45d25487b0 100644 --- a/arch/s390/Kbuild +++ b/arch/s390/Kbuild @@ -5,3 +5,4 @@ obj-$(CONFIG_CRYPTO_HW) += crypto/ obj-$(CONFIG_S390_HYPFS_FS) += hypfs/ obj-$(CONFIG_APPLDATA_BASE) += appldata/ obj-$(CONFIG_MATHEMU) += math-emu/ +obj-y += net/ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 107610e01a29..a7088dc06d2c 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -84,6 +84,7 @@ config S390 select HAVE_KERNEL_XZ select HAVE_ARCH_MUTEX_CPU_RELAX select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 + select HAVE_BPF_JIT if 64BIT && PACK_STACK select ARCH_SAVE_PAGE_KEYS if HIBERNATION select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select HAVE_MEMBLOCK diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 11e4e3236937..d4477ba99a16 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -140,6 +140,7 @@ extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); extern unsigned long thread_saved_pc(struct task_struct *t); extern void show_code(struct pt_regs *regs); +extern void print_fn_code(unsigned char *code, unsigned long len); unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(tsk) ((struct pt_regs *) \ diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 619c5d350726..84fd7e920bfe 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1601,3 +1601,26 @@ void show_code(struct pt_regs *regs) } printk("\n"); } + +void print_fn_code(unsigned char *code, unsigned long len) +{ + char buffer[64], *ptr; + int opsize, i; + + while (len) { + ptr = buffer; + opsize = insn_length(*code); + ptr += sprintf(ptr, "%p: ", code); + for (i = 0; i < opsize; i++) + ptr += sprintf(ptr, "%02x", code[i]); + *ptr++ = '\t'; + if (i < 4) + *ptr++ = '\t'; + ptr += print_insn(ptr, code, (unsigned long) code); + *ptr++ = '\n'; + *ptr++ = 0; + printk(buffer); + code += opsize; + len -= opsize; + } +} diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile new file mode 100644 index 000000000000..90568c33ddb0 --- /dev/null +++ b/arch/s390/net/Makefile @@ -0,0 +1,4 @@ +# +# Arch-specific network modules +# +obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S new file mode 100644 index 000000000000..7e45d13816c1 --- /dev/null +++ b/arch/s390/net/bpf_jit.S @@ -0,0 +1,130 @@ +/* + * BPF Jit compiler for s390, help functions. + * + * Copyright IBM Corp. 2012 + * + * Author(s): Martin Schwidefsky + */ +#include + +/* + * Calling convention: + * registers %r2, %r6-%r8, %r10-%r11, %r13, %r15 are call saved + * %r2: skb pointer + * %r3: offset parameter + * %r5: BPF A accumulator + * %r8: return address + * %r9: save register for skb pointer + * %r10: skb->data + * %r11: skb->len - skb->data_len (headlen) + * %r12: BPF X accumulator + * + * skb_copy_bits takes 4 parameters: + * %r2 = skb pointer + * %r3 = offset into skb data + * %r4 = length to copy + * %r5 = pointer to temp buffer + */ +#define SKBDATA %r8 + + /* A = *(u32 *) (skb->data+K+X) */ +ENTRY(sk_load_word_ind) + ar %r3,%r12 # offset += X + bmr %r8 # < 0 -> return with cc + + /* A = *(u32 *) (skb->data+K) */ +ENTRY(sk_load_word) + llgfr %r1,%r3 # extend offset + ahi %r3,4 # offset + 4 + clr %r11,%r3 # hlen <= offset + 4 ? + jl sk_load_word_slow + l %r5,0(%r1,%r10) # get word from skb + xr %r1,%r1 # set cc to zero + br %r8 + +sk_load_word_slow: + lgr %r9,%r2 # save %r2 + lhi %r4,4 # 4 bytes + la %r5,160(%r15) # pointer to temp buffer + brasl %r14,skb_copy_bits # get data from skb + l %r5,160(%r15) # load result from temp buffer + ltgr %r2,%r2 # set cc to (%r2 != 0) + lgr %r2,%r9 # restore %r2 + br %r8 + + /* A = *(u16 *) (skb->data+K+X) */ +ENTRY(sk_load_half_ind) + ar %r3,%r12 # offset += X + bmr %r8 # < 0 -> return with cc + + /* A = *(u16 *) (skb->data+K) */ +ENTRY(sk_load_half) + llgfr %r1,%r3 # extend offset + ahi %r3,2 # offset + 2 + clr %r11,%r3 # hlen <= offset + 2 ? + jl sk_load_half_slow + llgh %r5,0(%r1,%r10) # get half from skb + xr %r1,%r1 # set cc to zero + br %r8 + +sk_load_half_slow: + lgr %r9,%r2 # save %r2 + lhi %r4,2 # 2 bytes + la %r5,162(%r15) # pointer to temp buffer + brasl %r14,skb_copy_bits # get data from skb + xc 160(2,%r15),160(%r15) + l %r5,160(%r15) # load result from temp buffer + ltgr %r2,%r2 # set cc to (%r2 != 0) + lgr %r2,%r9 # restore %r2 + br %r8 + + /* A = *(u8 *) (skb->data+K+X) */ +ENTRY(sk_load_byte_ind) + ar %r3,%r12 # offset += X + bmr %r8 # < 0 -> return with cc + + /* A = *(u8 *) (skb->data+K) */ +ENTRY(sk_load_byte) + llgfr %r1,%r3 # extend offset + clr %r11,%r3 # hlen < offset ? + jle sk_load_byte_slow + lhi %r5,0 + ic %r5,0(%r1,%r10) # get byte from skb + xr %r1,%r1 # set cc to zero + br %r8 + +sk_load_byte_slow: + lgr %r9,%r2 # save %r2 + lhi %r4,1 # 1 bytes + la %r5,163(%r15) # pointer to temp buffer + brasl %r14,skb_copy_bits # get data from skb + xc 160(3,%r15),160(%r15) + l %r5,160(%r15) # load result from temp buffer + ltgr %r2,%r2 # set cc to (%r2 != 0) + lgr %r2,%r9 # restore %r2 + br %r8 + + /* A = (*(u8 *)(skb->data+K) & 0xf) << 2 */ +ENTRY(sk_load_byte_msh) + llgfr %r1,%r3 # extend offset + clr %r11,%r3 # hlen < offset ? + jle sk_load_byte_slow + lhi %r12,0 + ic %r12,0(%r1,%r10) # get byte from skb + nill %r12,0x0f + sll %r12,2 + xr %r1,%r1 # set cc to zero + br %r8 + +sk_load_byte_msh_slow: + lgr %r9,%r2 # save %r2 + lhi %r4,2 # 2 bytes + la %r5,162(%r15) # pointer to temp buffer + brasl %r14,skb_copy_bits # get data from skb + xc 160(3,%r15),160(%r15) + l %r12,160(%r15) # load result from temp buffer + nill %r12,0x0f + sll %r12,2 + ltgr %r2,%r2 # set cc to (%r2 != 0) + lgr %r2,%r9 # restore %r2 + br %r8 diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c new file mode 100644 index 000000000000..b07aa3dc5eed --- /dev/null +++ b/arch/s390/net/bpf_jit_comp.c @@ -0,0 +1,738 @@ +/* + * BPF Jit compiler for s390. + * + * Copyright IBM Corp. 2012 + * + * Author(s): Martin Schwidefsky + */ +#include +#include +#include +#include +#include + +/* + * Conventions: + * %r2 = skb pointer + * %r3 = offset parameter + * %r4 = scratch register / length parameter + * %r5 = BPF A accumulator + * %r8 = return address + * %r9 = save register for skb pointer + * %r10 = skb->data + * %r11 = skb->len - skb->data_len (headlen) + * %r12 = BPF X accumulator + * %r13 = literal pool pointer + * 0(%r15) - 63(%r15) scratch memory array with BPF_MEMWORDS + */ +int bpf_jit_enable __read_mostly; + +/* + * assembly code in arch/x86/net/bpf_jit.S + */ +extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[]; +extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[]; + +struct bpf_jit { + unsigned int seen; + u8 *start; + u8 *prg; + u8 *mid; + u8 *lit; + u8 *end; + u8 *base_ip; + u8 *ret0_ip; + u8 *exit_ip; + unsigned int off_load_word; + unsigned int off_load_half; + unsigned int off_load_byte; + unsigned int off_load_bmsh; + unsigned int off_load_iword; + unsigned int off_load_ihalf; + unsigned int off_load_ibyte; +}; + +#define BPF_SIZE_MAX 4096 /* Max size for program */ + +#define SEEN_DATAREF 1 /* might call external helpers */ +#define SEEN_XREG 2 /* ebx is used */ +#define SEEN_MEM 4 /* use mem[] for temporary storage */ +#define SEEN_RET0 8 /* pc_ret0 points to a valid return 0 */ +#define SEEN_LITERAL 16 /* code uses literals */ +#define SEEN_LOAD_WORD 32 /* code uses sk_load_word */ +#define SEEN_LOAD_HALF 64 /* code uses sk_load_half */ +#define SEEN_LOAD_BYTE 128 /* code uses sk_load_byte */ +#define SEEN_LOAD_BMSH 256 /* code uses sk_load_byte_msh */ +#define SEEN_LOAD_IWORD 512 /* code uses sk_load_word_ind */ +#define SEEN_LOAD_IHALF 1024 /* code uses sk_load_half_ind */ +#define SEEN_LOAD_IBYTE 2048 /* code uses sk_load_byte_ind */ + +#define EMIT2(op) \ +({ \ + if (jit->prg + 2 <= jit->mid) \ + *(u16 *) jit->prg = op; \ + jit->prg += 2; \ +}) + +#define EMIT4(op) \ +({ \ + if (jit->prg + 4 <= jit->mid) \ + *(u32 *) jit->prg = op; \ + jit->prg += 4; \ +}) + +#define EMIT4_DISP(op, disp) \ +({ \ + unsigned int __disp = (disp) & 0xfff; \ + EMIT4(op | __disp); \ +}) + +#define EMIT4_IMM(op, imm) \ +({ \ + unsigned int __imm = (imm) & 0xffff; \ + EMIT4(op | __imm); \ +}) + +#define EMIT4_PCREL(op, pcrel) \ +({ \ + long __pcrel = ((pcrel) >> 1) & 0xffff; \ + EMIT4(op | __pcrel); \ +}) + +#define EMIT6(op1, op2) \ +({ \ + if (jit->prg + 6 <= jit->mid) { \ + *(u32 *) jit->prg = op1; \ + *(u16 *) (jit->prg + 4) = op2; \ + } \ + jit->prg += 6; \ +}) + +#define EMIT6_DISP(op1, op2, disp) \ +({ \ + unsigned int __disp = (disp) & 0xfff; \ + EMIT6(op1 | __disp, op2); \ +}) + +#define EMIT_CONST(val) \ +({ \ + unsigned int ret; \ + ret = (unsigned int) (jit->lit - jit->base_ip); \ + jit->seen |= SEEN_LITERAL; \ + if (jit->lit + 4 <= jit->end) \ + *(u32 *) jit->lit = val; \ + jit->lit += 4; \ + ret; \ +}) + +#define EMIT_FN_CONST(bit, fn) \ +({ \ + unsigned int ret; \ + ret = (unsigned int) (jit->lit - jit->base_ip); \ + if (jit->seen & bit) { \ + jit->seen |= SEEN_LITERAL; \ + if (jit->lit + 8 <= jit->end) \ + *(void **) jit->lit = fn; \ + jit->lit += 8; \ + } \ + ret; \ +}) + +static void bpf_jit_prologue(struct bpf_jit *jit) +{ + /* Save registers and create stack frame if necessary */ + if (jit->seen & SEEN_DATAREF) { + /* stmg %r8,%r15,88(%r15) */ + EMIT6(0xeb8ff058, 0x0024); + /* lgr %r14,%r15 */ + EMIT4(0xb90400ef); + /* ahi %r15, */ + EMIT4_IMM(0xa7fa0000, (jit->seen & SEEN_MEM) ? -112 : -80); + /* stg %r14,152(%r15) */ + EMIT6(0xe3e0f098, 0x0024); + } else if ((jit->seen & SEEN_XREG) && (jit->seen & SEEN_LITERAL)) + /* stmg %r12,%r13,120(%r15) */ + EMIT6(0xebcdf078, 0x0024); + else if (jit->seen & SEEN_XREG) + /* stg %r12,120(%r15) */ + EMIT6(0xe3c0f078, 0x0024); + else if (jit->seen & SEEN_LITERAL) + /* stg %r13,128(%r15) */ + EMIT6(0xe3d0f080, 0x0024); + + /* Setup literal pool */ + if (jit->seen & SEEN_LITERAL) { + /* basr %r13,0 */ + EMIT2(0x0dd0); + jit->base_ip = jit->prg; + } + jit->off_load_word = EMIT_FN_CONST(SEEN_LOAD_WORD, sk_load_word); + jit->off_load_half = EMIT_FN_CONST(SEEN_LOAD_HALF, sk_load_half); + jit->off_load_byte = EMIT_FN_CONST(SEEN_LOAD_BYTE, sk_load_byte); + jit->off_load_bmsh = EMIT_FN_CONST(SEEN_LOAD_BMSH, sk_load_byte_msh); + jit->off_load_iword = EMIT_FN_CONST(SEEN_LOAD_IWORD, sk_load_word_ind); + jit->off_load_ihalf = EMIT_FN_CONST(SEEN_LOAD_IHALF, sk_load_half_ind); + jit->off_load_ibyte = EMIT_FN_CONST(SEEN_LOAD_IBYTE, sk_load_byte_ind); + + /* Filter needs to access skb data */ + if (jit->seen & SEEN_DATAREF) { + /* l %r11,(%r2) */ + EMIT4_DISP(0x58b02000, offsetof(struct sk_buff, len)); + /* s %r11,(%r2) */ + EMIT4_DISP(0x5bb02000, offsetof(struct sk_buff, data_len)); + /* lg %r10,(%r2) */ + EMIT6_DISP(0xe3a02000, 0x0004, + offsetof(struct sk_buff, data)); + } +} + +static void bpf_jit_epilogue(struct bpf_jit *jit) +{ + /* Return 0 */ + if (jit->seen & SEEN_RET0) { + jit->ret0_ip = jit->prg; + /* lghi %r2,0 */ + EMIT4(0xa7290000); + } + jit->exit_ip = jit->prg; + /* Restore registers */ + if (jit->seen & SEEN_DATAREF) + /* lmg %r8,%r15,(%r15) */ + EMIT6_DISP(0xeb8ff000, 0x0004, + (jit->seen & SEEN_MEM) ? 200 : 168); + else if ((jit->seen & SEEN_XREG) && (jit->seen & SEEN_LITERAL)) + /* lmg %r12,%r13,120(%r15) */ + EMIT6(0xebcdf078, 0x0004); + else if (jit->seen & SEEN_XREG) + /* lg %r12,120(%r15) */ + EMIT6(0xe3c0f078, 0x0004); + else if (jit->seen & SEEN_LITERAL) + /* lg %r13,128(%r15) */ + EMIT6(0xe3d0f080, 0x0004); + /* br %r14 */ + EMIT2(0x07fe); +} + +/* + * make sure we dont leak kernel information to user + */ +static void bpf_jit_noleaks(struct bpf_jit *jit, struct sock_filter *filter) +{ + /* Clear temporary memory if (seen & SEEN_MEM) */ + if (jit->seen & SEEN_MEM) + /* xc 0(64,%r15),0(%r15) */ + EMIT6(0xd73ff000, 0xf000); + /* Clear X if (seen & SEEN_XREG) */ + if (jit->seen & SEEN_XREG) + /* lhi %r12,0 */ + EMIT4(0xa7c80000); + /* Clear A if the first register does not set it. */ + switch (filter[0].code) { + case BPF_S_LD_W_ABS: + case BPF_S_LD_H_ABS: + case BPF_S_LD_B_ABS: + case BPF_S_LD_W_LEN: + case BPF_S_LD_W_IND: + case BPF_S_LD_H_IND: + case BPF_S_LD_B_IND: + case BPF_S_LDX_B_MSH: + case BPF_S_LD_IMM: + case BPF_S_LD_MEM: + case BPF_S_MISC_TXA: + case BPF_S_ANC_PROTOCOL: + case BPF_S_ANC_PKTTYPE: + case BPF_S_ANC_IFINDEX: + case BPF_S_ANC_MARK: + case BPF_S_ANC_QUEUE: + case BPF_S_ANC_HATYPE: + case BPF_S_ANC_RXHASH: + case BPF_S_ANC_CPU: + case BPF_S_RET_K: + /* first instruction sets A register */ + break; + default: /* A = 0 */ + /* lhi %r5,0 */ + EMIT4(0xa7580000); + } +} + +static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, + unsigned int *addrs, int i, int last) +{ + unsigned int K; + int offset; + unsigned int mask; + + K = filter->k; + switch (filter->code) { + case BPF_S_ALU_ADD_X: /* A += X */ + jit->seen |= SEEN_XREG; + /* ar %r5,%r12 */ + EMIT2(0x1a5c); + break; + case BPF_S_ALU_ADD_K: /* A += K */ + if (!K) + break; + if (K <= 16383) + /* ahi %r5, */ + EMIT4_IMM(0xa75a0000, K); + else + /* a %r5,(%r13) */ + EMIT4_DISP(0x5a50d000, EMIT_CONST(K)); + break; + case BPF_S_ALU_SUB_X: /* A -= X */ + jit->seen |= SEEN_XREG; + /* sr %r5,%r12 */ + EMIT2(0x1b5c); + break; + case BPF_S_ALU_SUB_K: /* A -= K */ + if (!K) + break; + if (K <= 16384) + /* ahi %r5,-K */ + EMIT4_IMM(0xa75a0000, -K); + else + /* s %r5,(%r13) */ + EMIT4_DISP(0x5b50d000, EMIT_CONST(K)); + break; + case BPF_S_ALU_MUL_X: /* A *= X */ + jit->seen |= SEEN_XREG; + /* msr %r5,%r12 */ + EMIT4(0xb252005c); + break; + case BPF_S_ALU_MUL_K: /* A *= K */ + if (K <= 16383) + /* mhi %r5,K */ + EMIT4_IMM(0xa75c0000, K); + else + /* ms %r5,(%r13) */ + EMIT4_DISP(0x7150d000, EMIT_CONST(K)); + break; + case BPF_S_ALU_DIV_X: /* A /= X */ + jit->seen |= SEEN_XREG | SEEN_RET0; + /* ltr %r12,%r12 */ + EMIT2(0x12cc); + /* jz */ + EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg)); + /* lhi %r4,0 */ + EMIT4(0xa7480000); + /* dr %r4,%r12 */ + EMIT2(0x1d4c); + break; + case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K) */ + /* m %r4,(%r13) */ + EMIT4_DISP(0x5c40d000, EMIT_CONST(K)); + /* lr %r5,%r4 */ + EMIT2(0x1854); + break; + case BPF_S_ALU_AND_X: /* A &= X */ + jit->seen |= SEEN_XREG; + /* nr %r5,%r12 */ + EMIT2(0x145c); + break; + case BPF_S_ALU_AND_K: /* A &= K */ + /* n %r5,(%r13) */ + EMIT4_DISP(0x5450d000, EMIT_CONST(K)); + break; + case BPF_S_ALU_OR_X: /* A |= X */ + jit->seen |= SEEN_XREG; + /* or %r5,%r12 */ + EMIT2(0x165c); + break; + case BPF_S_ALU_OR_K: /* A |= K */ + /* o %r5,(%r13) */ + EMIT4_DISP(0x5650d000, EMIT_CONST(K)); + break; + case BPF_S_ALU_LSH_X: /* A <<= X; */ + jit->seen |= SEEN_XREG; + /* sll %r5,0(%r12) */ + EMIT4(0x8950c000); + break; + case BPF_S_ALU_LSH_K: /* A <<= K */ + if (K == 0) + break; + /* sll %r5,K */ + EMIT4_DISP(0x89500000, K); + break; + case BPF_S_ALU_RSH_X: /* A >>= X; */ + jit->seen |= SEEN_XREG; + /* srl %r5,0(%r12) */ + EMIT4(0x8850c000); + break; + case BPF_S_ALU_RSH_K: /* A >>= K; */ + if (K == 0) + break; + /* srl %r5,K */ + EMIT4_DISP(0x88500000, K); + break; + case BPF_S_ALU_NEG: /* A = -A */ + /* lnr %r5,%r5 */ + EMIT2(0x1155); + break; + case BPF_S_JMP_JA: /* ip += K */ + offset = addrs[i + K] + jit->start - jit->prg; + EMIT4_PCREL(0xa7f40000, offset); + break; + case BPF_S_JMP_JGT_K: /* ip += (A > K) ? jt : jf */ + mask = 0x200000; /* jh */ + goto kbranch; + case BPF_S_JMP_JGE_K: /* ip += (A >= K) ? jt : jf */ + mask = 0xa00000; /* jhe */ + goto kbranch; + case BPF_S_JMP_JEQ_K: /* ip += (A == K) ? jt : jf */ + mask = 0x800000; /* je */ +kbranch: /* Emit compare if the branch targets are different */ + if (filter->jt != filter->jf) { + if (K <= 16383) + /* chi %r5, */ + EMIT4_IMM(0xa75e0000, K); + else + /* c %r5,(%r13) */ + EMIT4_DISP(0x5950d000, EMIT_CONST(K)); + } +branch: if (filter->jt == filter->jf) { + if (filter->jt == 0) + break; + /* j */ + offset = addrs[i + filter->jt] + jit->start - jit->prg; + EMIT4_PCREL(0xa7f40000, offset); + break; + } + if (filter->jt != 0) { + /* brc , */ + offset = addrs[i + filter->jt] + jit->start - jit->prg; + EMIT4_PCREL(0xa7040000 | mask, offset); + } + if (filter->jf != 0) { + /* brc , */ + offset = addrs[i + filter->jf] + jit->start - jit->prg; + EMIT4_PCREL(0xa7040000 | (mask ^ 0xf00000), offset); + } + break; + case BPF_S_JMP_JSET_K: /* ip += (A & K) ? jt : jf */ + mask = 0x700000; /* jnz */ + /* Emit test if the branch targets are different */ + if (filter->jt != filter->jf) { + if (K > 65535) { + /* lr %r4,%r5 */ + EMIT2(0x1845); + /* n %r4,(%r13) */ + EMIT4_DISP(0x5440d000, EMIT_CONST(K)); + } else + /* tmll %r5,K */ + EMIT4_IMM(0xa7510000, K); + } + goto branch; + case BPF_S_JMP_JGT_X: /* ip += (A > X) ? jt : jf */ + mask = 0x200000; /* jh */ + goto xbranch; + case BPF_S_JMP_JGE_X: /* ip += (A >= X) ? jt : jf */ + mask = 0xa00000; /* jhe */ + goto xbranch; + case BPF_S_JMP_JEQ_X: /* ip += (A == X) ? jt : jf */ + mask = 0x800000; /* je */ +xbranch: /* Emit compare if the branch targets are different */ + if (filter->jt != filter->jf) { + jit->seen |= SEEN_XREG; + /* cr %r5,%r12 */ + EMIT2(0x195c); + } + goto branch; + case BPF_S_JMP_JSET_X: /* ip += (A & X) ? jt : jf */ + mask = 0x700000; /* jnz */ + /* Emit test if the branch targets are different */ + if (filter->jt != filter->jf) { + jit->seen |= SEEN_XREG; + /* lr %r4,%r5 */ + EMIT2(0x1845); + /* nr %r4,%r12 */ + EMIT2(0x144c); + } + goto branch; + case BPF_S_LD_W_ABS: /* A = *(u32 *) (skb->data+K) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_WORD; + offset = jit->off_load_word; + goto load_abs; + case BPF_S_LD_H_ABS: /* A = *(u16 *) (skb->data+K) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_HALF; + offset = jit->off_load_half; + goto load_abs; + case BPF_S_LD_B_ABS: /* A = *(u8 *) (skb->data+K) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_BYTE; + offset = jit->off_load_byte; +load_abs: if ((int) K < 0) + goto out; +call_fn: /* lg %r1,(%r13) */ + EMIT6_DISP(0xe310d000, 0x0004, offset); + /* l %r3,(%r13) */ + EMIT4_DISP(0x5830d000, EMIT_CONST(K)); + /* basr %r8,%r1 */ + EMIT2(0x0d81); + /* jnz */ + EMIT4_PCREL(0xa7740000, (jit->ret0_ip - jit->prg)); + break; + case BPF_S_LD_W_IND: /* A = *(u32 *) (skb->data+K+X) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IWORD; + offset = jit->off_load_iword; + goto call_fn; + case BPF_S_LD_H_IND: /* A = *(u16 *) (skb->data+K+X) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IHALF; + offset = jit->off_load_ihalf; + goto call_fn; + case BPF_S_LD_B_IND: /* A = *(u8 *) (skb->data+K+X) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IBYTE; + offset = jit->off_load_ibyte; + goto call_fn; + case BPF_S_LDX_B_MSH: + /* X = (*(u8 *)(skb->data+K) & 0xf) << 2 */ + jit->seen |= SEEN_RET0; + if ((int) K < 0) { + /* j */ + EMIT4_PCREL(0xa7f40000, (jit->ret0_ip - jit->prg)); + break; + } + jit->seen |= SEEN_DATAREF | SEEN_LOAD_BMSH; + offset = jit->off_load_bmsh; + goto call_fn; + case BPF_S_LD_W_LEN: /* A = skb->len; */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); + /* l %r5,(%r2) */ + EMIT4_DISP(0x58502000, offsetof(struct sk_buff, len)); + break; + case BPF_S_LDX_W_LEN: /* X = skb->len; */ + jit->seen |= SEEN_XREG; + /* l %r12,(%r2) */ + EMIT4_DISP(0x58c02000, offsetof(struct sk_buff, len)); + break; + case BPF_S_LD_IMM: /* A = K */ + if (K <= 16383) + /* lhi %r5,K */ + EMIT4_IMM(0xa7580000, K); + else + /* l %r5,(%r13) */ + EMIT4_DISP(0x5850d000, EMIT_CONST(K)); + break; + case BPF_S_LDX_IMM: /* X = K */ + jit->seen |= SEEN_XREG; + if (K <= 16383) + /* lhi %r12, */ + EMIT4_IMM(0xa7c80000, K); + else + /* l %r12,(%r13) */ + EMIT4_DISP(0x58c0d000, EMIT_CONST(K)); + break; + case BPF_S_LD_MEM: /* A = mem[K] */ + jit->seen |= SEEN_MEM; + /* l %r5,(%r15) */ + EMIT4_DISP(0x5850f000, + (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); + break; + case BPF_S_LDX_MEM: /* X = mem[K] */ + jit->seen |= SEEN_XREG | SEEN_MEM; + /* l %r12,(%r15) */ + EMIT4_DISP(0x58c0f000, + (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); + break; + case BPF_S_MISC_TAX: /* X = A */ + jit->seen |= SEEN_XREG; + /* lr %r12,%r5 */ + EMIT2(0x18c5); + break; + case BPF_S_MISC_TXA: /* A = X */ + jit->seen |= SEEN_XREG; + /* lr %r5,%r12 */ + EMIT2(0x185c); + break; + case BPF_S_RET_K: + if (K == 0) { + jit->seen |= SEEN_RET0; + if (last) + break; + /* j */ + EMIT4_PCREL(0xa7f40000, jit->ret0_ip - jit->prg); + } else { + if (K <= 16383) + /* lghi %r2,K */ + EMIT4_IMM(0xa7290000, K); + else + /* llgf %r2,(%r13) */ + EMIT6_DISP(0xe320d000, 0x0016, EMIT_CONST(K)); + /* j */ + if (last && !(jit->seen & SEEN_RET0)) + break; + EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); + } + break; + case BPF_S_RET_A: + /* llgfr %r2,%r5 */ + EMIT4(0xb9160025); + /* j */ + EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); + break; + case BPF_S_ST: /* mem[K] = A */ + jit->seen |= SEEN_MEM; + /* st %r5,(%r15) */ + EMIT4_DISP(0x5050f000, + (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); + break; + case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */ + jit->seen |= SEEN_XREG | SEEN_MEM; + /* st %r12,(%r15) */ + EMIT4_DISP(0x50c0f000, + (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); + break; + case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); + /* lhi %r5,0 */ + EMIT4(0xa7580000); + /* icm %r5,3,(%r2) */ + EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, protocol)); + break; + case BPF_S_ANC_IFINDEX: /* if (!skb->dev) return 0; + * A = skb->dev->ifindex */ + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); + jit->seen |= SEEN_RET0; + /* lg %r1,(%r2) */ + EMIT6_DISP(0xe3102000, 0x0004, offsetof(struct sk_buff, dev)); + /* ltgr %r1,%r1 */ + EMIT4(0xb9020011); + /* jz */ + EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg); + /* l %r5,(%r1) */ + EMIT4_DISP(0x58501000, offsetof(struct net_device, ifindex)); + break; + case BPF_S_ANC_MARK: /* A = skb->mark */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); + /* l %r5,(%r2) */ + EMIT4_DISP(0x58502000, offsetof(struct sk_buff, mark)); + break; + case BPF_S_ANC_QUEUE: /* A = skb->queue_mapping */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); + /* lhi %r5,0 */ + EMIT4(0xa7580000); + /* icm %r5,3,(%r2) */ + EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, queue_mapping)); + break; + case BPF_S_ANC_HATYPE: /* if (!skb->dev) return 0; + * A = skb->dev->type */ + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); + jit->seen |= SEEN_RET0; + /* lg %r1,(%r2) */ + EMIT6_DISP(0xe3102000, 0x0004, offsetof(struct sk_buff, dev)); + /* ltgr %r1,%r1 */ + EMIT4(0xb9020011); + /* jz */ + EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg); + /* lhi %r5,0 */ + EMIT4(0xa7580000); + /* icm %r5,3,(%r1) */ + EMIT4_DISP(0xbf531000, offsetof(struct net_device, type)); + break; + case BPF_S_ANC_RXHASH: /* A = skb->rxhash */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); + /* l %r5,(%r2) */ + EMIT4_DISP(0x58502000, offsetof(struct sk_buff, rxhash)); + break; + case BPF_S_ANC_CPU: /* A = smp_processor_id() */ +#ifdef CONFIG_SMP + /* l %r5, */ + EMIT4_DISP(0x58500000, offsetof(struct _lowcore, cpu_nr)); +#else + /* lhi %r5,0 */ + EMIT4(0xa7580000); +#endif + break; + default: /* too complex, give up */ + goto out; + } + addrs[i] = jit->prg - jit->start; + return 0; +out: + return -1; +} + +void bpf_jit_compile(struct sk_filter *fp) +{ + unsigned long size, prg_len, lit_len; + struct bpf_jit jit, cjit; + unsigned int *addrs; + int pass, i; + + if (!bpf_jit_enable) + return; + addrs = kmalloc(fp->len * sizeof(*addrs), GFP_KERNEL); + if (addrs == NULL) + return; + memset(addrs, 0, fp->len * sizeof(*addrs)); + memset(&jit, 0, sizeof(cjit)); + memset(&cjit, 0, sizeof(cjit)); + + for (pass = 0; pass < 10; pass++) { + jit.prg = jit.start; + jit.lit = jit.mid; + + bpf_jit_prologue(&jit); + bpf_jit_noleaks(&jit, fp->insns); + for (i = 0; i < fp->len; i++) { + if (bpf_jit_insn(&jit, fp->insns + i, addrs, i, + i == fp->len - 1)) + goto out; + } + bpf_jit_epilogue(&jit); + if (jit.start) { + WARN_ON(jit.prg > cjit.prg || jit.lit > cjit.lit); + if (memcmp(&jit, &cjit, sizeof(jit)) == 0) + break; + } else if (jit.prg == cjit.prg && jit.lit == cjit.lit) { + prg_len = jit.prg - jit.start; + lit_len = jit.lit - jit.mid; + size = max_t(unsigned long, prg_len + lit_len, + sizeof(struct work_struct)); + if (size >= BPF_SIZE_MAX) + goto out; + jit.start = module_alloc(size); + if (!jit.start) + goto out; + jit.prg = jit.mid = jit.start + prg_len; + jit.lit = jit.end = jit.start + prg_len + lit_len; + jit.base_ip += (unsigned long) jit.start; + jit.exit_ip += (unsigned long) jit.start; + jit.ret0_ip += (unsigned long) jit.start; + } + cjit = jit; + } + if (bpf_jit_enable > 1) { + pr_err("flen=%d proglen=%lu pass=%d image=%p\n", + fp->len, jit.end - jit.start, pass, jit.start); + if (jit.start) { + printk(KERN_ERR "JIT code:\n"); + print_fn_code(jit.start, jit.mid - jit.start); + print_hex_dump(KERN_ERR, "JIT literals:\n", + DUMP_PREFIX_ADDRESS, 16, 1, + jit.mid, jit.end - jit.mid, false); + } + } + if (jit.start) + fp->bpf_func = (void *) jit.start; +out: + kfree(addrs); +} + +static void jit_free_defer(struct work_struct *arg) +{ + module_free(NULL, arg); +} + +/* run from softirq, we must use a work_struct to call + * module_free() from process context + */ +void bpf_jit_free(struct sk_filter *fp) +{ + struct work_struct *work; + + if (fp->bpf_func == sk_run_filter) + return; + work = (struct work_struct *)fp->bpf_func; + INIT_WORK(work, jit_free_defer); + schedule_work(work); +} -- cgit v1.2.3 From e4b8b3f33fcaa0ed6e6b5482a606091d8cd20beb Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Tue, 31 Jul 2012 10:52:05 +0200 Subject: s390: add support for runtime instrumentation Allow user-space threads to use runtime instrumentation (RI). To enable RI for a thread there is a new s390 specific system call, sys_s390_runtime_instr, that takes as parameter a realtime signal number. If the RI facility is available the system call sets up a control block for the calling thread with the appropriate permissions for the thread to modify the control block. The user-space thread can then use the store and modify RI instructions to alter the control block and start/stop the instrumentation via RION/RIOFF. If the user specified program buffer runs full RI triggers an external interrupt. The external interrupt is translated to a real-time signal that is delivered to the thread that enabled RI on that CPU. The number of the real-time signal is the number specified in the RI system call. So, user-space can select any available real-time signal number in case the application itself uses real-time signals for other purposes. The kernel saves the RI control blocks on task switch only if the running thread was enabled for RI. Therefore, the performance impact on task switch should be negligible if RI is not used. RI is only enabled for user-space mode and is disabled for the supervisor state. Reviewed-by: Heiko Carstens Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mf.h | 4 + arch/s390/include/asm/irq.h | 1 + arch/s390/include/asm/processor.h | 4 + arch/s390/include/asm/ptrace.h | 4 +- arch/s390/include/asm/runtime_instr.h | 98 ++++++++++++++++++++++ arch/s390/include/asm/switch_to.h | 2 + arch/s390/include/asm/unistd.h | 3 +- arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/compat_wrapper.S | 5 ++ arch/s390/kernel/dis.c | 21 +++++ arch/s390/kernel/irq.c | 1 + arch/s390/kernel/process.c | 7 ++ arch/s390/kernel/runtime_instr.c | 150 ++++++++++++++++++++++++++++++++++ arch/s390/kernel/syscalls.S | 1 + 14 files changed, 300 insertions(+), 3 deletions(-) create mode 100644 arch/s390/include/asm/runtime_instr.h create mode 100644 arch/s390/kernel/runtime_instr.c (limited to 'arch/s390/include/asm/processor.h') diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index a3afecdae145..35f0020b7ba7 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -21,11 +21,15 @@ #define CPU_MF_INT_SF_LSDA (1 << 22) /* loss of sample data alert */ #define CPU_MF_INT_CF_CACA (1 << 7) /* counter auth. change alert */ #define CPU_MF_INT_CF_LCDA (1 << 6) /* loss of counter data alert */ +#define CPU_MF_INT_RI_HALTED (1 << 5) /* run-time instr. halted */ +#define CPU_MF_INT_RI_BUF_FULL (1 << 4) /* run-time instr. program + buffer full */ #define CPU_MF_INT_CF_MASK (CPU_MF_INT_CF_CACA|CPU_MF_INT_CF_LCDA) #define CPU_MF_INT_SF_MASK (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE| \ CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA| \ CPU_MF_INT_SF_LSDA) +#define CPU_MF_INT_RI_MASK (CPU_MF_INT_RI_HALTED|CPU_MF_INT_RI_BUF_FULL) /* CPU measurement facility support */ static inline int cpum_cf_avail(void) diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index 33cc59071581..6703dd986fd4 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -19,6 +19,7 @@ enum interruption_class { EXTINT_IUC, EXTINT_CMS, EXTINT_CMC, + EXTINT_CMR, IOINT_CIO, IOINT_QAI, IOINT_DAS, diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index d4477ba99a16..0fff583d2c7c 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -17,6 +17,7 @@ #include #include #include +#include /* * Default implementation of macro that returns current @@ -78,6 +79,9 @@ struct thread_struct { /* pfault_wait is used to block the process on a pfault event */ unsigned long pfault_wait; struct list_head list; + /* cpu runtime instrumentation */ + struct runtime_instr_cb *ri_cb; + int ri_signum; }; typedef struct thread_struct thread_struct; diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index d5f08ea566ed..5c32bae6b760 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -235,6 +235,7 @@ typedef struct #define PSW_MASK_ASC 0x0000C000UL #define PSW_MASK_CC 0x00003000UL #define PSW_MASK_PM 0x00000F00UL +#define PSW_MASK_RI 0x00000000UL #define PSW_MASK_EA 0x00000000UL #define PSW_MASK_BA 0x00000000UL @@ -264,10 +265,11 @@ typedef struct #define PSW_MASK_ASC 0x0000C00000000000UL #define PSW_MASK_CC 0x0000300000000000UL #define PSW_MASK_PM 0x00000F0000000000UL +#define PSW_MASK_RI 0x0000008000000000UL #define PSW_MASK_EA 0x0000000100000000UL #define PSW_MASK_BA 0x0000000080000000UL -#define PSW_MASK_USER 0x00003F0180000000UL +#define PSW_MASK_USER 0x00003F8180000000UL #define PSW_ADDR_AMODE 0x0000000000000000UL #define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL diff --git a/arch/s390/include/asm/runtime_instr.h b/arch/s390/include/asm/runtime_instr.h new file mode 100644 index 000000000000..830da737ff85 --- /dev/null +++ b/arch/s390/include/asm/runtime_instr.h @@ -0,0 +1,98 @@ +#ifndef _RUNTIME_INSTR_H +#define _RUNTIME_INSTR_H + +#define S390_RUNTIME_INSTR_START 0x1 +#define S390_RUNTIME_INSTR_STOP 0x2 + +struct runtime_instr_cb { + __u64 buf_current; + __u64 buf_origin; + __u64 buf_limit; + + __u32 valid : 1; + __u32 pstate : 1; + __u32 pstate_set_buf : 1; + __u32 home_space : 1; + __u32 altered : 1; + __u32 : 3; + __u32 pstate_sample : 1; + __u32 sstate_sample : 1; + __u32 pstate_collect : 1; + __u32 sstate_collect : 1; + __u32 : 1; + __u32 halted_int : 1; + __u32 int_requested : 1; + __u32 buffer_full_int : 1; + __u32 key : 4; + __u32 : 9; + __u32 rgs : 3; + + __u32 mode : 4; + __u32 next : 1; + __u32 mae : 1; + __u32 : 2; + __u32 call_type_br : 1; + __u32 return_type_br : 1; + __u32 other_type_br : 1; + __u32 bc_other_type : 1; + __u32 emit : 1; + __u32 tx_abort : 1; + __u32 : 2; + __u32 bp_xn : 1; + __u32 bp_xt : 1; + __u32 bp_ti : 1; + __u32 bp_ni : 1; + __u32 suppr_y : 1; + __u32 suppr_z : 1; + + __u32 dc_miss_extra : 1; + __u32 lat_lev_ignore : 1; + __u32 ic_lat_lev : 4; + __u32 dc_lat_lev : 4; + + __u64 reserved1; + __u64 scaling_factor; + __u64 rsic; + __u64 reserved2; +} __packed __aligned(8); + +extern struct runtime_instr_cb runtime_instr_empty_cb; + +static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb) +{ + asm volatile(".insn rsy,0xeb0000000060,0,0,%0" /* LRIC */ + : : "Q" (*cb)); +} + +static inline void store_runtime_instr_cb(struct runtime_instr_cb *cb) +{ + asm volatile(".insn rsy,0xeb0000000061,0,0,%0" /* STRIC */ + : "=Q" (*cb) : : "cc"); +} + +static inline void save_ri_cb(struct runtime_instr_cb *cb_prev) +{ +#ifdef CONFIG_64BIT + if (cb_prev) + store_runtime_instr_cb(cb_prev); +#endif +} + +static inline void restore_ri_cb(struct runtime_instr_cb *cb_next, + struct runtime_instr_cb *cb_prev) +{ +#ifdef CONFIG_64BIT + if (cb_next) + load_runtime_instr_cb(cb_next); + else if (cb_prev) + load_runtime_instr_cb(&runtime_instr_empty_cb); +#endif +} + +#ifdef CONFIG_64BIT +extern void exit_thread_runtime_instr(void); +#else +static inline void exit_thread_runtime_instr(void) { } +#endif + +#endif /* _RUNTIME_INSTR_H */ diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index f223068b7822..dc4967b0e056 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -80,10 +80,12 @@ static inline void restore_access_regs(unsigned int *acrs) if (prev->mm) { \ save_fp_regs(&prev->thread.fp_regs); \ save_access_regs(&prev->thread.acrs[0]); \ + save_ri_cb(prev->thread.ri_cb); \ } \ if (next->mm) { \ restore_fp_regs(&next->thread.fp_regs); \ restore_access_regs(&next->thread.acrs[0]); \ + restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ update_per_regs(next); \ } \ prev = __switch_to(prev,next); \ diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index 6756e78f4808..da2c2f4f7642 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -277,7 +277,8 @@ #define __NR_setns 339 #define __NR_process_vm_readv 340 #define __NR_process_vm_writev 341 -#define NR_syscalls 342 +#define __NR_s390_runtime_instr 342 +#define NR_syscalls 343 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 9a111b621f4e..bba01bf678a6 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -49,7 +49,7 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o -obj-$(CONFIG_64BIT) += cache.o +obj-$(CONFIG_64BIT) += runtime_instr.o cache.o # Kexec part S390_KEXEC_OBJS := machine_kexec.o diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 2d82cfcbce5b..5f7928fe34ce 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1646,3 +1646,8 @@ ENTRY(compat_sys_process_vm_writev_wrapper) llgf %r0,164(%r15) # unsigned long stg %r0,160(%r15) jg compat_sys_process_vm_writev + +ENTRY(sys_s390_runtime_instr_wrapper) + lgfr %r2,%r2 # int + lgfr %r3,%r3 # int + jg sys_s390_runtime_instr diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 84fd7e920bfe..4bc67db63522 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -315,6 +315,8 @@ enum { LONG_INSN_POPCNT, LONG_INSN_RISBHG, LONG_INSN_RISBLG, + LONG_INSN_RINEXT, + LONG_INSN_RIEMIT, }; static char *long_insn_name[] = { @@ -330,6 +332,8 @@ static char *long_insn_name[] = { [LONG_INSN_POPCNT] = "popcnt", [LONG_INSN_RISBHG] = "risbhg", [LONG_INSN_RISBLG] = "risblk", + [LONG_INSN_RINEXT] = "rinext", + [LONG_INSN_RIEMIT] = "riemit", }; static struct insn opcode[] = { @@ -582,6 +586,17 @@ static struct insn opcode_a7[] = { { "", 0, INSTR_INVALID } }; +static struct insn opcode_aa[] = { +#ifdef CONFIG_64BIT + { { 0, LONG_INSN_RINEXT }, 0x00, INSTR_RI_RI }, + { "rion", 0x01, INSTR_RI_RI }, + { "tric", 0x02, INSTR_RI_RI }, + { "rioff", 0x03, INSTR_RI_RI }, + { { 0, LONG_INSN_RIEMIT }, 0x04, INSTR_RI_RI }, +#endif + { "", 0, INSTR_INVALID } +}; + static struct insn opcode_b2[] = { #ifdef CONFIG_64BIT { "sske", 0x2b, INSTR_RRF_M0RR }, @@ -1210,6 +1225,9 @@ static struct insn opcode_eb[] = { { "cliy", 0x55, INSTR_SIY_URD }, { "oiy", 0x56, INSTR_SIY_URD }, { "xiy", 0x57, INSTR_SIY_URD }, + { "lric", 0x60, INSTR_RSY_RDRM }, + { "stric", 0x61, INSTR_RSY_RDRM }, + { "mric", 0x62, INSTR_RSY_RDRM }, { "icmh", 0x80, INSTR_RSE_RURD }, { "icmh", 0x80, INSTR_RSY_RURD }, { "icmy", 0x81, INSTR_RSY_RURD }, @@ -1408,6 +1426,9 @@ static struct insn *find_insn(unsigned char *code) case 0xa7: table = opcode_a7; break; + case 0xaa: + table = opcode_aa; + break; case 0xb2: table = opcode_b2; break; diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index a22fdca6a663..c1b44934fe64 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -44,6 +44,7 @@ static const struct irq_class intrclass_names[] = { {.name = "IUC", .desc = "[EXT] IUCV" }, {.name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling" }, {.name = "CMC", .desc = "[EXT] CPU-Measurement: Counter" }, + {.name = "CMR", .desc = "[EXT] CPU-Measurement: RI" }, {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt" }, {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt" }, {.name = "DAS", .desc = "[I/O] DASD" }, diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 733175373a4c..cb4328e49c75 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "entry.h" asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); @@ -132,6 +133,7 @@ EXPORT_SYMBOL(kernel_thread); */ void exit_thread(void) { + exit_thread_runtime_instr(); } void flush_thread(void) @@ -170,6 +172,11 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, /* Save access registers to new thread structure. */ save_access_regs(&p->thread.acrs[0]); + /* Don't copy runtime instrumentation info */ + p->thread.ri_cb = NULL; + p->thread.ri_signum = 0; + frame->childregs.psw.mask &= ~PSW_MASK_RI; + #ifndef CONFIG_64BIT /* * save fprs to current->thread.fp_regs to merge them with diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c new file mode 100644 index 000000000000..e27e23da0e60 --- /dev/null +++ b/arch/s390/kernel/runtime_instr.c @@ -0,0 +1,150 @@ +/* + * Copyright IBM Corp. 2012 + * Author(s): Jan Glauber + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* empty control block to disable RI by loading it */ +struct runtime_instr_cb runtime_instr_empty_cb; + +static int runtime_instr_avail(void) +{ + return test_facility(64); +} + +static void disable_runtime_instr(void) +{ + struct pt_regs *regs = task_pt_regs(current); + + load_runtime_instr_cb(&runtime_instr_empty_cb); + + /* + * Make sure the RI bit is deleted from the PSW. If the user did not + * switch off RI before the system call the process will get a + * specification exception otherwise. + */ + regs->psw.mask &= ~PSW_MASK_RI; +} + +static void init_runtime_instr_cb(struct runtime_instr_cb *cb) +{ + cb->buf_limit = 0xfff; + if (addressing_mode == HOME_SPACE_MODE) + cb->home_space = 1; + cb->int_requested = 1; + cb->pstate = 1; + cb->pstate_set_buf = 1; + cb->pstate_sample = 1; + cb->pstate_collect = 1; + cb->key = PAGE_DEFAULT_KEY; + cb->valid = 1; +} + +void exit_thread_runtime_instr(void) +{ + struct task_struct *task = current; + + if (!task->thread.ri_cb) + return; + disable_runtime_instr(); + kfree(task->thread.ri_cb); + task->thread.ri_signum = 0; + task->thread.ri_cb = NULL; +} + +static void runtime_instr_int_handler(struct ext_code ext_code, + unsigned int param32, unsigned long param64) +{ + struct siginfo info; + + if (!(param32 & CPU_MF_INT_RI_MASK)) + return; + + kstat_cpu(smp_processor_id()).irqs[EXTINT_CMR]++; + + if (!current->thread.ri_cb) + return; + if (current->thread.ri_signum < SIGRTMIN || + current->thread.ri_signum > SIGRTMAX) { + WARN_ON_ONCE(1); + return; + } + + memset(&info, 0, sizeof(info)); + info.si_signo = current->thread.ri_signum; + info.si_code = SI_QUEUE; + if (param32 & CPU_MF_INT_RI_BUF_FULL) + info.si_int = ENOBUFS; + else if (param32 & CPU_MF_INT_RI_HALTED) + info.si_int = ECANCELED; + else + return; /* unknown reason */ + + send_sig_info(current->thread.ri_signum, &info, current); +} + +SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum) +{ + struct runtime_instr_cb *cb; + + if (!runtime_instr_avail()) + return -EOPNOTSUPP; + + if (command == S390_RUNTIME_INSTR_STOP) { + preempt_disable(); + exit_thread_runtime_instr(); + preempt_enable(); + return 0; + } + + if (command != S390_RUNTIME_INSTR_START || + (signum < SIGRTMIN || signum > SIGRTMAX)) + return -EINVAL; + + if (!current->thread.ri_cb) { + cb = kzalloc(sizeof(*cb), GFP_KERNEL); + if (!cb) + return -ENOMEM; + } else { + cb = current->thread.ri_cb; + memset(cb, 0, sizeof(*cb)); + } + + init_runtime_instr_cb(cb); + current->thread.ri_signum = signum; + + /* now load the control block to make it available */ + preempt_disable(); + current->thread.ri_cb = cb; + load_runtime_instr_cb(cb); + preempt_enable(); + return 0; +} + +static int __init runtime_instr_init(void) +{ + int rc; + + if (!runtime_instr_avail()) + return 0; + + measurement_alert_subclass_register(); + rc = register_external_interrupt(0x1407, runtime_instr_int_handler); + if (rc) + measurement_alert_subclass_unregister(); + else + pr_info("Runtime instrumentation facility initialized\n"); + return rc; +} +device_initcall(runtime_instr_init); diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index bcab2f04ba58..539f0a7701e4 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -350,3 +350,4 @@ SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper) SYSCALL(sys_setns,sys_setns,sys_setns_wrapper) SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv_wrapper) /* 340 */ SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev_wrapper) +SYSCALL(sys_ni_syscall,sys_s390_runtime_instr,sys_s390_runtime_instr_wrapper) -- cgit v1.2.3 From d35339a42dd1f53b0bb86cf75418a9b7cf5f0f30 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 31 Jul 2012 11:03:04 +0200 Subject: s390: add support for transactional memory Allow user-space processes to use transactional execution (TX). If the TX facility is available user space programs can use transactions for fine-grained serialization based on the data objects that are referenced during a transaction. This is useful for lockless data structures and speculative compiler optimizations. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/elf.h | 1 + arch/s390/include/asm/lowcore.h | 6 +++- arch/s390/include/asm/processor.h | 6 ++++ arch/s390/include/asm/ptrace.h | 8 +++-- arch/s390/include/asm/setup.h | 3 ++ arch/s390/kernel/asm-offsets.c | 2 ++ arch/s390/kernel/dis.c | 12 +++++++ arch/s390/kernel/early.c | 3 +- arch/s390/kernel/entry64.S | 12 ++++--- arch/s390/kernel/processor.c | 6 ++-- arch/s390/kernel/ptrace.c | 69 +++++++++++++++++++++++++++++++++++++++ arch/s390/kernel/setup.c | 6 ++++ arch/s390/kernel/traps.c | 37 +++++++++++++++------ include/linux/elf.h | 1 + 14 files changed, 151 insertions(+), 21 deletions(-) (limited to 'arch/s390/include/asm/processor.h') diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 9b94a160fe7f..db57594e94b0 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -101,6 +101,7 @@ #define HWCAP_S390_HPAGE 128 #define HWCAP_S390_ETF3EH 256 #define HWCAP_S390_HIGH_GPRS 512 +#define HWCAP_S390_TE 1024 /* * These are used to set parameters in the core dumps. diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index aab5555bbbda..bbf8141408cd 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -329,9 +329,13 @@ struct _lowcore { __u8 pad_0x1338[0x1340-0x1338]; /* 0x1338 */ __u32 access_regs_save_area[16]; /* 0x1340 */ __u64 cregs_save_area[16]; /* 0x1380 */ + __u8 pad_0x1400[0x1800-0x1400]; /* 0x1400 */ + + /* Transaction abort diagnostic block */ + __u8 pgm_tdb[256]; /* 0x1800 */ /* align to the top of the prefix area */ - __u8 pad_0x1400[0x2000-0x1400]; /* 0x1400 */ + __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */ } __packed; #endif /* CONFIG_32BIT */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 0fff583d2c7c..46fe1fbf91c5 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -76,14 +76,20 @@ struct thread_struct { unsigned long gmap_addr; /* address of last gmap fault. */ struct per_regs per_user; /* User specified PER registers */ struct per_event per_event; /* Cause of the last PER trap */ + unsigned long per_flags; /* Flags to control debug behavior */ /* pfault_wait is used to block the process on a pfault event */ unsigned long pfault_wait; struct list_head list; /* cpu runtime instrumentation */ struct runtime_instr_cb *ri_cb; int ri_signum; +#ifdef CONFIG_64BIT + unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ +#endif }; +#define PER_FLAG_NO_TE 1UL /* Flag to disable transactions. */ + typedef struct thread_struct thread_struct; /* diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 5c32bae6b760..ce20a53afe91 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -361,17 +361,19 @@ struct per_struct_kernel { unsigned char access_id; /* PER trap access identification */ }; -#define PER_EVENT_MASK 0xE9000000UL +#define PER_EVENT_MASK 0xEB000000UL #define PER_EVENT_BRANCH 0x80000000UL #define PER_EVENT_IFETCH 0x40000000UL #define PER_EVENT_STORE 0x20000000UL #define PER_EVENT_STORE_REAL 0x08000000UL +#define PER_EVENT_TRANSACTION_END 0x02000000UL #define PER_EVENT_NULLIFICATION 0x01000000UL -#define PER_CONTROL_MASK 0x00a00000UL +#define PER_CONTROL_MASK 0x00e00000UL #define PER_CONTROL_BRANCH_ADDRESS 0x00800000UL +#define PER_CONTROL_SUSPENSION 0x00400000UL #define PER_CONTROL_ALTERATION 0x00200000UL #endif @@ -485,6 +487,8 @@ typedef struct #define PTRACE_GET_LAST_BREAK 0x5006 #define PTRACE_PEEK_SYSTEM_CALL 0x5007 #define PTRACE_POKE_SYSTEM_CALL 0x5008 +#define PTRACE_ENABLE_TE 0x5009 +#define PTRACE_DISABLE_TE 0x5010 /* * PT_PROT definition is loosely based on hppa bsd definition in diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index e6859d16ee2d..908f68871393 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -80,6 +80,7 @@ extern unsigned int addressing_mode; #define MACHINE_FLAG_LPAR (1UL << 12) #define MACHINE_FLAG_SPP (1UL << 13) #define MACHINE_FLAG_TOPOLOGY (1UL << 14) +#define MACHINE_FLAG_TE (1UL << 15) #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) @@ -98,6 +99,7 @@ extern unsigned int addressing_mode; #define MACHINE_HAS_PFMF (0) #define MACHINE_HAS_SPP (0) #define MACHINE_HAS_TOPOLOGY (0) +#define MACHINE_HAS_TE (0) #else /* CONFIG_64BIT */ #define MACHINE_HAS_IEEE (1) #define MACHINE_HAS_CSP (1) @@ -109,6 +111,7 @@ extern unsigned int addressing_mode; #define MACHINE_HAS_PFMF (S390_lowcore.machine_flags & MACHINE_FLAG_PFMF) #define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP) #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) +#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) #endif /* CONFIG_64BIT */ #define ZFCPDUMP_HSA_SIZE (32UL<<20) diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 45ef1a7b08f9..fface87056eb 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -157,6 +157,8 @@ int main(void) DEFINE(__LC_LAST_BREAK, offsetof(struct _lowcore, breaking_event_addr)); DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data)); DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap)); + DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb)); + DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb)); DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce)); #endif /* CONFIG_32BIT */ return 0; diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 4bc67db63522..7b6ad271155d 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -317,6 +317,9 @@ enum { LONG_INSN_RISBLG, LONG_INSN_RINEXT, LONG_INSN_RIEMIT, + LONG_INSN_TABORT, + LONG_INSN_TBEGIN, + LONG_INSN_TBEGINC, }; static char *long_insn_name[] = { @@ -334,6 +337,9 @@ static char *long_insn_name[] = { [LONG_INSN_RISBLG] = "risblk", [LONG_INSN_RINEXT] = "rinext", [LONG_INSN_RIEMIT] = "riemit", + [LONG_INSN_TABORT] = "tabort", + [LONG_INSN_TBEGIN] = "tbegin", + [LONG_INSN_TBEGINC] = "tbeginc", }; static struct insn opcode[] = { @@ -609,6 +615,9 @@ static struct insn opcode_b2[] = { { "lpswe", 0xb2, INSTR_S_RD }, { "srnmt", 0xb9, INSTR_S_RD }, { "lfas", 0xbd, INSTR_S_RD }, + { "etndg", 0xec, INSTR_RRE_R0 }, + { { 0, LONG_INSN_TABORT }, 0xfc, INSTR_S_RD }, + { "tend", 0xf8, INSTR_S_RD }, #endif { "stidp", 0x02, INSTR_S_RD }, { "sck", 0x04, INSTR_S_RD }, @@ -1165,6 +1174,7 @@ static struct insn opcode_e3[] = { { "stfh", 0xcb, INSTR_RXY_RRRD }, { "chf", 0xcd, INSTR_RXY_RRRD }, { "clhf", 0xcf, INSTR_RXY_RRRD }, + { "ntstg", 0x25, INSTR_RXY_RRRD }, #endif { "lrv", 0x1e, INSTR_RXY_RRRD }, { "lrvh", 0x1f, INSTR_RXY_RRRD }, @@ -1188,6 +1198,8 @@ static struct insn opcode_e5[] = { { "mvhhi", 0x44, INSTR_SIL_RDI }, { "mvhi", 0x4c, INSTR_SIL_RDI }, { "mvghi", 0x48, INSTR_SIL_RDI }, + { { 0, LONG_INSN_TBEGIN }, 0x60, INSTR_SIL_RDU }, + { { 0, LONG_INSN_TBEGINC }, 0x61, INSTR_SIL_RDU }, #endif { "lasp", 0x00, INSTR_SSE_RDRD }, { "tprot", 0x01, INSTR_SSE_RDRD }, diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index e48407962b0f..1345ba452c83 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -370,6 +370,8 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS; if (test_facility(40)) S390_lowcore.machine_flags |= MACHINE_FLAG_SPP; + if (test_facility(50) && test_facility(73)) + S390_lowcore.machine_flags |= MACHINE_FLAG_TE; #endif } @@ -439,7 +441,6 @@ static void __init setup_boot_command_line(void) append_to_cmdline(append_ipl_scpdata); } - /* * Save ipl parameters, clear bss memory, initialize storage keys * and create a kernel NSS at startup if the SAVESYS= parm is defined diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 349b7eeb348a..95e9d93d4f41 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -412,6 +412,11 @@ ENTRY(pgm_check_handler) 1: UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER LAST_BREAK %r14 lg %r15,__LC_KERNEL_STACK + lg %r14,__TI_task(%r12) + lghi %r13,__LC_PGM_TDB + tm __LC_PGM_ILC+2,0x02 # check for transaction abort + jz 2f + mvc __THREAD_trap_tdb(256,%r14),0(%r13) 2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) @@ -422,13 +427,12 @@ ENTRY(pgm_check_handler) stg %r10,__PT_ARGS(%r11) tm __LC_PGM_ILC+3,0x80 # check for per exception jz 0f - lg %r1,__TI_task(%r12) tmhh %r8,0x0001 # kernel per event ? jz pgm_kprobe oi __TI_flags+7(%r12),_TIF_PER_TRAP - mvc __THREAD_per_address(8,%r1),__LC_PER_ADDRESS - mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE - mvc __THREAD_per_paid(1,%r1),__LC_PER_PAID + mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS + mvc __THREAD_per_cause(2,%r14),__LC_PER_CAUSE + mvc __THREAD_per_paid(1,%r14),__LC_PER_PAID 0: REENABLE_IRQS xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) larl %r1,pgm_check_table diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 572d4c9cb33b..22bf8f0ee093 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -39,9 +39,9 @@ void __cpuinit cpu_init(void) */ static int show_cpuinfo(struct seq_file *m, void *v) { - static const char *hwcap_str[10] = { + static const char *hwcap_str[11] = { "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", - "edat", "etf3eh", "highgprs" + "edat", "etf3eh", "highgprs", "te" }; unsigned long n = (unsigned long) v - 1; int i; @@ -54,7 +54,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) num_online_cpus(), loops_per_jiffy/(500000/HZ), (loops_per_jiffy/(5000/HZ))%100); seq_puts(m, "features\t: "); - for (i = 0; i < 10; i++) + for (i = 0; i < 11; i++) if (hwcap_str[i] && (elf_hwcap & (1UL << i))) seq_printf(m, "%s ", hwcap_str[i]); seq_puts(m, "\n"); diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index e4be113fbac6..b817cc5e49ae 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -42,6 +42,7 @@ enum s390_regset { REGSET_GENERAL, REGSET_FP, REGSET_LAST_BREAK, + REGSET_TDB, REGSET_SYSTEM_CALL, REGSET_GENERAL_EXTENDED, }; @@ -52,6 +53,21 @@ void update_per_regs(struct task_struct *task) struct thread_struct *thread = &task->thread; struct per_regs old, new; + /* Take care of the enable/disable of transactional execution. */ + if (MACHINE_HAS_TE) { + unsigned long cr0, cr0_new; + + __ctl_store(cr0, 0, 0); + /* set or clear transaction execution bits 8 and 9. */ + if (task->thread.per_flags & PER_FLAG_NO_TE) + cr0_new = cr0 & ~(3UL << 54); + else + cr0_new = cr0 | (3UL << 54); + /* Only load control register 0 if necessary. */ + if (cr0 != cr0_new) + __ctl_load(cr0_new, 0, 0); + } + /* Copy user specified PER registers */ new.control = thread->per_user.control; new.start = thread->per_user.start; @@ -60,6 +76,10 @@ void update_per_regs(struct task_struct *task) /* merge TIF_SINGLE_STEP into user specified PER registers. */ if (test_tsk_thread_flag(task, TIF_SINGLE_STEP)) { new.control |= PER_EVENT_IFETCH; +#ifdef CONFIG_64BIT + new.control |= PER_CONTROL_SUSPENSION; + new.control |= PER_EVENT_TRANSACTION_END; +#endif new.start = 0; new.end = PSW_ADDR_INSN; } @@ -100,6 +120,7 @@ void ptrace_disable(struct task_struct *task) memset(&task->thread.per_event, 0, sizeof(task->thread.per_event)); clear_tsk_thread_flag(task, TIF_SINGLE_STEP); clear_tsk_thread_flag(task, TIF_PER_TRAP); + task->thread.per_flags = 0; } #ifndef CONFIG_64BIT @@ -416,6 +437,16 @@ long arch_ptrace(struct task_struct *child, long request, put_user(task_thread_info(child)->last_break, (unsigned long __user *) data); return 0; + case PTRACE_ENABLE_TE: + if (!MACHINE_HAS_TE) + return -EIO; + child->thread.per_flags &= ~PER_FLAG_NO_TE; + return 0; + case PTRACE_DISABLE_TE: + if (!MACHINE_HAS_TE) + return -EIO; + child->thread.per_flags |= PER_FLAG_NO_TE; + return 0; default: /* Removing high order bit from addr (only for 31 bit). */ addr &= PSW_ADDR_INSN; @@ -903,6 +934,28 @@ static int s390_last_break_set(struct task_struct *target, return 0; } +static int s390_tdb_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct pt_regs *regs = task_pt_regs(target); + unsigned char *data; + + if (!(regs->int_code & 0x200)) + return -ENODATA; + data = target->thread.trap_tdb; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, data, 0, 256); +} + +static int s390_tdb_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + #endif static int s390_system_call_get(struct task_struct *target, @@ -951,6 +1004,14 @@ static const struct user_regset s390_regsets[] = { .get = s390_last_break_get, .set = s390_last_break_set, }, + [REGSET_TDB] = { + .core_note_type = NT_S390_TDB, + .n = 1, + .size = 256, + .align = 1, + .get = s390_tdb_get, + .set = s390_tdb_set, + }, #endif [REGSET_SYSTEM_CALL] = { .core_note_type = NT_S390_SYSTEM_CALL, @@ -1148,6 +1209,14 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_compat_last_break_get, .set = s390_compat_last_break_set, }, + [REGSET_TDB] = { + .core_note_type = NT_S390_TDB, + .n = 1, + .size = 256, + .align = 1, + .get = s390_tdb_get, + .set = s390_tdb_set, + }, [REGSET_SYSTEM_CALL] = { .core_note_type = NT_S390_SYSTEM_CALL, .n = 1, diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 40b57693de38..7b59fff85f2f 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -980,6 +980,12 @@ static void __init setup_hwcaps(void) * HWCAP_S390_HIGH_GPRS is bit 9. */ elf_hwcap |= HWCAP_S390_HIGH_GPRS; + + /* + * Transactional execution support HWCAP_S390_TE is bit 10. + */ + if (test_facility(50) && test_facility(73)) + elf_hwcap |= HWCAP_S390_TE; #endif get_cpu_id(&cpu_id); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 01775c04a90e..29af52628e8b 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -57,6 +57,23 @@ static int kstack_depth_to_print = 12; static int kstack_depth_to_print = 20; #endif /* CONFIG_64BIT */ +static inline void __user *get_trap_ip(struct pt_regs *regs) +{ +#ifdef CONFIG_64BIT + unsigned long address; + + if (regs->int_code & 0x200) + address = *(unsigned long *)(current->thread.trap_tdb + 24); + else + address = regs->psw.addr; + return (void __user *) + ((address - (regs->int_code >> 16)) & PSW_ADDR_INSN); +#else + return (void __user *) + ((regs->psw.addr - (regs->int_code >> 16)) & PSW_ADDR_INSN); +#endif +} + /* * For show_trace we have tree different stack to consider: * - the panic stack which is used if the kernel stack has overflown @@ -285,12 +302,6 @@ int is_valid_bugaddr(unsigned long addr) return 1; } -static inline void __user *get_psw_address(struct pt_regs *regs) -{ - return (void __user *) - ((regs->psw.addr - (regs->int_code >> 16)) & PSW_ADDR_INSN); -} - static void __kprobes do_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) { @@ -304,7 +315,7 @@ static void __kprobes do_trap(struct pt_regs *regs, info.si_signo = si_signo; info.si_errno = 0; info.si_code = si_code; - info.si_addr = get_psw_address(regs); + info.si_addr = get_trap_ip(regs); force_sig_info(si_signo, &info, current); report_user_fault(regs, si_signo); } else { @@ -381,6 +392,11 @@ DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, DO_ERROR_INFO(translation_exception, SIGILL, ILL_ILLOPN, "translation exception") +#ifdef CONFIG_64BIT +DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, + "transaction constraint exception") +#endif + static inline void do_fp_trap(struct pt_regs *regs, int fpc) { int si_code = 0; @@ -408,7 +424,7 @@ static void __kprobes illegal_op(struct pt_regs *regs) __u16 __user *location; int signal = 0; - location = get_psw_address(regs); + location = get_trap_ip(regs); if (user_mode(regs)) { if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) @@ -476,7 +492,7 @@ void specification_exception(struct pt_regs *regs) __u16 __user *location = NULL; int signal = 0; - location = (__u16 __user *) get_psw_address(regs); + location = (__u16 __user *) get_trap_ip(regs); if (user_mode(regs)) { get_user(*((__u16 *) opcode), location); @@ -525,7 +541,7 @@ static void data_exception(struct pt_regs *regs) __u16 __user *location; int signal = 0; - location = get_psw_address(regs); + location = get_trap_ip(regs); if (MACHINE_HAS_IEEE) asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc)); @@ -641,6 +657,7 @@ void __init trap_init(void) pgm_check_table[0x12] = &translation_exception; pgm_check_table[0x13] = &special_op_exception; #ifdef CONFIG_64BIT + pgm_check_table[0x18] = &transaction_exception; pgm_check_table[0x38] = &do_asce_exception; pgm_check_table[0x39] = &do_dat_exception; pgm_check_table[0x3A] = &do_dat_exception; diff --git a/include/linux/elf.h b/include/linux/elf.h index 999b4f52e8e5..f930b1a390ab 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -387,6 +387,7 @@ typedef struct elf64_shdr { #define NT_S390_PREFIX 0x305 /* s390 prefix register */ #define NT_S390_LAST_BREAK 0x306 /* s390 breaking event address */ #define NT_S390_SYSTEM_CALL 0x307 /* s390 system call restart data */ +#define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */ #define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */ -- cgit v1.2.3 From 6668022c7bde3fdc96d3d257294a7216c7a46829 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 29 Aug 2012 14:12:20 +0200 Subject: s390/cache: add cpu cache information to /proc/cpuinfo Add a line for each cpu cache to /proc/cpuinfo. Since we only have information of private cpu caches in sysfs we add a line for each cpu cache in /proc/cpuinfo which will also contain information about shared caches. For a z196 machine /proc/cpuinfo now looks like: vendor_id : IBM/S390 bogomips per cpu: 14367.00 features : esan3 zarch stfle msa ldisp eimm dfp etf3eh highgprs cache0 : level=1 type=Data scope=Private size=64K line_size=256 associativity=4 cache1 : level=1 type=Instruction scope=Private size=128K line_size=256 associativity=8 cache2 : level=2 type=Unified scope=Private size=1536K line_size=256 associativity=12 cache3 : level=3 type=Unified scope=Shared size=24576K line_size=256 associativity=12 cache4 : level=4 type=Unified scope=Shared size=196608K line_size=256 associativity=24 processor 0: version = FF, identification = 000123, machine = 2817 processor 1: version = FF, identification = 100123, machine = 2817 processor 2: version = FF, identification = 200123, machine = 2817 processor 3: version = FF, identification = 200123, machine = 2817 Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/processor.h | 6 +++++ arch/s390/kernel/cache.c | 46 ++++++++++++++++++++++++++++++--------- arch/s390/kernel/processor.c | 1 + 3 files changed, 43 insertions(+), 10 deletions(-) (limited to 'arch/s390/include/asm/processor.h') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 46fe1fbf91c5..7e81ff17a89b 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -140,6 +140,12 @@ struct task_struct; struct mm_struct; struct seq_file; +#ifdef CONFIG_64BIT +extern void show_cacheinfo(struct seq_file *m); +#else +static inline void show_cacheinfo(struct seq_file *m) { } +#endif + /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index 5e20bab4df22..8df8d8a19c98 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -17,8 +18,9 @@ struct cache { unsigned int line_size; unsigned int associativity; unsigned int nr_sets; - int level; - int type; + unsigned int level : 3; + unsigned int type : 2; + unsigned int private : 1; struct list_head list; }; @@ -83,6 +85,24 @@ static const char * const cache_type_string[] = { static struct cache_dir *cache_dir_cpu[NR_CPUS]; static LIST_HEAD(cache_list); +void show_cacheinfo(struct seq_file *m) +{ + struct cache *cache; + int index = 0; + + list_for_each_entry(cache, &cache_list, list) { + seq_printf(m, "cache%-11d: ", index); + seq_printf(m, "level=%d ", cache->level); + seq_printf(m, "type=%s ", cache_type_string[cache->type]); + seq_printf(m, "scope=%s ", cache->private ? "Private" : "Shared"); + seq_printf(m, "size=%luK ", cache->size >> 10); + seq_printf(m, "line_size=%u ", cache->line_size); + seq_printf(m, "associativity=%d", cache->associativity); + seq_puts(m, "\n"); + index++; + } +} + static inline unsigned long ecag(int ai, int li, int ti) { unsigned long cmd, val; @@ -93,7 +113,7 @@ static inline unsigned long ecag(int ai, int li, int ti) return val; } -static int __init cache_add(int level, int type) +static int __init cache_add(int level, int private, int type) { struct cache *cache; int ti; @@ -107,8 +127,9 @@ static int __init cache_add(int level, int type) cache->associativity = ecag(EXTRACT_ASSOCIATIVITY, level, ti); cache->nr_sets = cache->size / cache->associativity; cache->nr_sets /= cache->line_size; + cache->private = private; cache->level = level + 1; - cache->type = type; + cache->type = type - 1; list_add_tail(&cache->list, &cache_list); return 0; } @@ -117,23 +138,26 @@ static void __init cache_build_info(void) { struct cache *cache, *next; union cache_topology ct; - int level, rc; + int level, private, rc; ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); for (level = 0; level < CACHE_MAX_LEVEL; level++) { switch (ct.ci[level].scope) { case CACHE_SCOPE_NOTEXISTS: case CACHE_SCOPE_RESERVED: - case CACHE_SCOPE_SHARED: return; + case CACHE_SCOPE_SHARED: + private = 0; + break; case CACHE_SCOPE_PRIVATE: + private = 1; break; } if (ct.ci[level].type == CACHE_TYPE_SEPARATE) { - rc = cache_add(level, CACHE_TYPE_DATA); - rc |= cache_add(level, CACHE_TYPE_INSTRUCTION); + rc = cache_add(level, private, CACHE_TYPE_DATA); + rc |= cache_add(level, private, CACHE_TYPE_INSTRUCTION); } else { - rc = cache_add(level, ct.ci[level].type); + rc = cache_add(level, private, ct.ci[level].type); } if (rc) goto error; @@ -208,7 +232,7 @@ DEFINE_CACHE_ATTR(size, "%luK\n", index->cache->size >> 10); DEFINE_CACHE_ATTR(coherency_line_size, "%u\n", index->cache->line_size); DEFINE_CACHE_ATTR(number_of_sets, "%u\n", index->cache->nr_sets); DEFINE_CACHE_ATTR(ways_of_associativity, "%u\n", index->cache->associativity); -DEFINE_CACHE_ATTR(type, "%s\n", cache_type_string[index->cache->type - 1]); +DEFINE_CACHE_ATTR(type, "%s\n", cache_type_string[index->cache->type]); DEFINE_CACHE_ATTR(level, "%d\n", index->cache->level); static ssize_t shared_cpu_map_func(struct kobject *kobj, int type, char *buf) @@ -298,6 +322,8 @@ static int __cpuinit cache_add_cpu(int cpu) if (!cache_dir) return -ENOMEM; list_for_each_entry(cache, &cache_list, list) { + if (!cache->private) + break; rc = cache_create_index_dir(cache_dir, cache, index, cpu); if (rc) return rc; diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 22bf8f0ee093..63f3b76e37a9 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -58,6 +58,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (hwcap_str[i] && (elf_hwcap & (1UL << i))) seq_printf(m, "%s ", hwcap_str[i]); seq_puts(m, "\n"); + show_cacheinfo(m); } get_online_cpus(); if (cpu_online(n)) { -- cgit v1.2.3 From eb608fb366de123a97227437e5306f731f4a63c5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 5 Sep 2012 13:26:11 +0200 Subject: s390/exceptions: switch to relative exception table entries This is the s390 port of 70627654 "x86, extable: Switch to relative exception table entries". Reduces the size of our exception tables by 50% on 64 bit builds. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/processor.h | 42 +++++++++++--------- arch/s390/include/asm/uaccess.h | 15 +++++++- arch/s390/kernel/early.c | 4 +- arch/s390/kernel/entry64.S | 5 +-- arch/s390/kernel/kprobes.c | 2 +- arch/s390/kernel/traps.c | 2 +- arch/s390/mm/Makefile | 2 +- arch/s390/mm/extable.c | 81 +++++++++++++++++++++++++++++++++++++++ arch/s390/mm/fault.c | 2 +- scripts/sortextable.c | 10 ++--- 10 files changed, 132 insertions(+), 33 deletions(-) create mode 100644 arch/s390/mm/extable.c (limited to 'arch/s390/include/asm/processor.h') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 7e81ff17a89b..f3e0aabfc6bc 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -11,6 +11,8 @@ #ifndef __ASM_S390_PROCESSOR_H #define __ASM_S390_PROCESSOR_H +#ifndef __ASSEMBLY__ + #include #include #include @@ -348,23 +350,6 @@ extern void (*s390_base_ext_handler_fn)(void); #define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL -/* - * Helper macro for exception table entries - */ -#ifndef CONFIG_64BIT -#define EX_TABLE(_fault,_target) \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long " #_fault "," #_target "\n" \ - ".previous\n" -#else -#define EX_TABLE(_fault,_target) \ - ".section __ex_table,\"a\"\n" \ - " .align 8\n" \ - " .quad " #_fault "," #_target "\n" \ - ".previous\n" -#endif - extern int memcpy_real(void *, void *, size_t); extern void memcpy_absolute(void *, void *, size_t); @@ -375,4 +360,25 @@ extern void memcpy_absolute(void *, void *, size_t); memcpy_absolute(&(dest), &__tmp, sizeof(__tmp)); \ } -#endif /* __ASM_S390_PROCESSOR_H */ +/* + * Helper macro for exception table entries + */ +#define EX_TABLE(_fault, _target) \ + ".section __ex_table,\"a\"\n" \ + ".align 4\n" \ + ".long (" #_fault ") - .\n" \ + ".long (" #_target ") - .\n" \ + ".previous\n" + +#else /* __ASSEMBLY__ */ + +#define EX_TABLE(_fault, _target) \ + .section __ex_table,"a" ; \ + .align 4 ; \ + .long (_fault) - . ; \ + .long (_target) - . ; \ + .previous + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_S390_PROCESSOR_H */ diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index a8ab18b18b54..34268df959a3 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -76,9 +76,22 @@ static inline int __range_ok(unsigned long addr, unsigned long size) struct exception_table_entry { - unsigned long insn, fixup; + int insn, fixup; }; +static inline unsigned long extable_insn(const struct exception_table_entry *x) +{ + return (unsigned long)&x->insn + x->insn; +} + +static inline unsigned long extable_fixup(const struct exception_table_entry *x) +{ + return (unsigned long)&x->fixup + x->fixup; +} + +#define ARCH_HAS_SORT_EXTABLE +#define ARCH_HAS_SEARCH_EXTABLE + struct uaccess_ops { size_t (*copy_from_user)(size_t, const void __user *, void *); size_t (*copy_from_user_small)(size_t, const void __user *, void *); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index f4bcdc01bfc8..e8000d5ff533 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -255,14 +255,14 @@ static __init void setup_topology(void) static void early_pgm_check_handler(void) { - unsigned long addr; const struct exception_table_entry *fixup; + unsigned long addr; addr = S390_lowcore.program_old_psw.addr; fixup = search_exception_tables(addr & PSW_ADDR_INSN); if (!fixup) disabled_wait(0); - S390_lowcore.program_old_psw.addr = fixup->fixup | PSW_ADDR_AMODE; + S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE; } static noinline __init void setup_lowcore_early(void) diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 95e9d93d4f41..7549985402f7 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -1008,9 +1009,7 @@ sie_fault: .Lhost_id: .quad 0 - .section __ex_table,"a" - .quad sie_loop,sie_fault - .previous + EX_TABLE(sie_loop,sie_fault) #endif .section .rodata, "a" diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 8aa634f5944b..d1c7214e157c 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -547,7 +547,7 @@ static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr) */ entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); if (entry) { - regs->psw.addr = entry->fixup | PSW_ADDR_AMODE; + regs->psw.addr = extable_fixup(entry) | PSW_ADDR_AMODE; return 1; } diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 29af52628e8b..befc761fc154 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -322,7 +322,7 @@ static void __kprobes do_trap(struct pt_regs *regs, const struct exception_table_entry *fixup; fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); if (fixup) - regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE; + regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE; else { enum bug_trap_type btt; diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index d98fe9004a52..0f5536b0c1a1 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -3,7 +3,7 @@ # obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \ - page-states.o gup.o + page-states.o gup.o extable.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_DEBUG_SET_MODULE_RONX) += pageattr.o diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c new file mode 100644 index 000000000000..4d1ee88864e8 --- /dev/null +++ b/arch/s390/mm/extable.c @@ -0,0 +1,81 @@ +#include +#include +#include + +/* + * Search one exception table for an entry corresponding to the + * given instruction address, and return the address of the entry, + * or NULL if none is found. + * We use a binary search, and thus we assume that the table is + * already sorted. + */ +const struct exception_table_entry * +search_extable(const struct exception_table_entry *first, + const struct exception_table_entry *last, + unsigned long value) +{ + const struct exception_table_entry *mid; + unsigned long addr; + + while (first <= last) { + mid = ((last - first) >> 1) + first; + addr = extable_insn(mid); + if (addr < value) + first = mid + 1; + else if (addr > value) + last = mid - 1; + else + return mid; + } + return NULL; +} + +/* + * The exception table needs to be sorted so that the binary + * search that we use to find entries in it works properly. + * This is used both for the kernel exception table and for + * the exception tables of modules that get loaded. + * + */ +static int cmp_ex(const void *a, const void *b) +{ + const struct exception_table_entry *x = a, *y = b; + + /* This compare is only valid after normalization. */ + return x->insn - y->insn; +} + +void sort_extable(struct exception_table_entry *start, + struct exception_table_entry *finish) +{ + struct exception_table_entry *p; + int i; + + /* Normalize entries to being relative to the start of the section */ + for (p = start, i = 0; p < finish; p++, i += 8) + p->insn += i; + sort(start, finish - start, sizeof(*start), cmp_ex, NULL); + /* Denormalize all entries */ + for (p = start, i = 0; p < finish; p++, i += 8) + p->insn -= i; +} + +#ifdef CONFIG_MODULES +/* + * If the exception table is sorted, any referring to the module init + * will be at the beginning or the end. + */ +void trim_init_extable(struct module *m) +{ + /* Trim the beginning */ + while (m->num_exentries && + within_module_init(extable_insn(&m->extable[0]), m)) { + m->extable++; + m->num_exentries--; + } + /* Trim the end */ + while (m->num_exentries && + within_module_init(extable_insn(&m->extable[m->num_exentries-1]), m)) + m->num_exentries--; +} +#endif /* CONFIG_MODULES */ diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 8b2cac1ddbc5..ac9122ca1152 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -163,7 +163,7 @@ static noinline void do_no_context(struct pt_regs *regs) /* Are we prepared to handle this kernel fault? */ fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); if (fixup) { - regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE; + regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE; return; } diff --git a/scripts/sortextable.c b/scripts/sortextable.c index 6acf83449105..f19ddc47304c 100644 --- a/scripts/sortextable.c +++ b/scripts/sortextable.c @@ -161,7 +161,7 @@ typedef void (*table_sort_t)(char *, int); #define SORTEXTABLE_64 #include "sortextable.h" -static int compare_x86_table(const void *a, const void *b) +static int compare_relative_table(const void *a, const void *b) { int32_t av = (int32_t)r(a); int32_t bv = (int32_t)r(b); @@ -173,7 +173,7 @@ static int compare_x86_table(const void *a, const void *b) return 0; } -static void sort_x86_table(char *extab_image, int image_size) +static void sort_relative_table(char *extab_image, int image_size) { int i; @@ -188,7 +188,7 @@ static void sort_x86_table(char *extab_image, int image_size) i += 4; } - qsort(extab_image, image_size / 8, 8, compare_x86_table); + qsort(extab_image, image_size / 8, 8, compare_relative_table); /* Now denormalize. */ i = 0; @@ -245,9 +245,9 @@ do_file(char const *const fname) break; case EM_386: case EM_X86_64: - custom_sort = sort_x86_table; - break; case EM_S390: + custom_sort = sort_relative_table; + break; case EM_MIPS: break; } /* end switch */ -- cgit v1.2.3 From 65f22a906e154e8086ed561904d09c3586de85f4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 6 Sep 2012 15:48:11 -0400 Subject: s390: fold execve_tail() into start_thread(), convert to generic sys_execve() Signed-off-by: Al Viro --- arch/s390/include/asm/processor.h | 3 +++ arch/s390/include/asm/unistd.h | 1 + arch/s390/kernel/compat_linux.c | 26 -------------------------- arch/s390/kernel/compat_linux.h | 2 -- arch/s390/kernel/compat_wrapper.S | 2 +- arch/s390/kernel/entry.S | 3 --- arch/s390/kernel/entry.h | 3 --- arch/s390/kernel/entry64.S | 1 - arch/s390/kernel/process.c | 25 ------------------------- 9 files changed, 5 insertions(+), 61 deletions(-) (limited to 'arch/s390/include/asm/processor.h') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 11e4e3236937..0bc77619c3a3 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -32,6 +32,7 @@ static inline void get_cpu_id(struct cpuid *ptr) extern void s390_adjust_jiffies(void); extern const struct seq_operations cpuinfo_op; extern int sysctl_ieee_emulation_warnings; +extern void execve_tail(void); /* * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit. @@ -114,6 +115,7 @@ struct stack_frame { regs->psw.mask = psw_user_bits | PSW_MASK_EA | PSW_MASK_BA; \ regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ regs->gprs[15] = new_stackp; \ + execve_tail(); \ } while (0) #define start_thread31(regs, new_psw, new_stackp) do { \ @@ -123,6 +125,7 @@ struct stack_frame { __tlb_flush_mm(current->mm); \ crst_table_downgrade(current->mm, 1UL << 31); \ update_mm(current->mm, current); \ + execve_tail(); \ } while (0) /* Forward declaration, a strange C thing */ diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index 6756e78f4808..9afacff38edc 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -415,6 +415,7 @@ # define __ARCH_WANT_COMPAT_SYS_TIME # define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND # endif +#define __ARCH_WANT_SYS_EXECVE /* * "Conditional" syscalls diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index f606d935f495..d03d733dadfe 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -420,32 +420,6 @@ sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo) return ret; } -/* - * sys32_execve() executes a new program after the asm stub has set - * things up for us. This should basically do what I want it to. - */ -asmlinkage long sys32_execve(const char __user *name, compat_uptr_t __user *argv, - compat_uptr_t __user *envp) -{ - struct pt_regs *regs = task_pt_regs(current); - char *filename; - long rc; - - filename = getname(name); - rc = PTR_ERR(filename); - if (IS_ERR(filename)) - return rc; - rc = compat_do_execve(filename, argv, envp, regs); - if (rc) - goto out; - current->thread.fp_regs.fpc=0; - asm volatile("sfpc %0,0" : : "d" (0)); - rc = regs->gprs[2]; -out: - putname(filename); - return rc; -} - asmlinkage long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count, u32 poshi, u32 poslo) { diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index 9635d759c2b9..5daff1d34b8e 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -193,8 +193,6 @@ long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, compat_sigset_t __user *oset, size_t sigsetsize); long sys32_rt_sigpending(compat_sigset_t __user *set, size_t sigsetsize); long sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo); -long sys32_execve(const char __user *name, compat_uptr_t __user *argv, - compat_uptr_t __user *envp); long sys32_init_module(void __user *umod, unsigned long len, const char __user *uargs); long sys32_delete_module(const char __user *name_user, unsigned int flags); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 2d82cfcbce5b..cf1d60f46922 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1576,7 +1576,7 @@ ENTRY(sys32_execve_wrapper) llgtr %r2,%r2 # char * llgtr %r3,%r3 # compat_uptr_t * llgtr %r4,%r4 # compat_uptr_t * - jg sys32_execve # branch to system call + jg compat_sys_execve # branch to system call ENTRY(sys_fanotify_init_wrapper) llgfr %r2,%r2 # unsigned int diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 870bad6d56fc..6286985a1039 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -368,8 +368,6 @@ ENTRY(kernel_execve) l %r12,__LC_THREAD_INFO xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) ssm __LC_SVC_NEW_PSW # reenable interrupts - l %r1,BASED(.Lexecve_tail) - basr %r14,%r1 # call execve_tail j sysc_return /* @@ -932,7 +930,6 @@ cleanup_idle_wait: .Ldo_notify_resume: .long do_notify_resume .Ldo_per_trap: .long do_per_trap .Ldo_execve: .long do_execve -.Lexecve_tail: .long execve_tail .Ljump_table: .long pgm_check_table .Lschedule: .long schedule #ifdef CONFIG_PREEMPT diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index a5f4dc42a5db..d0d3f69a7346 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -58,9 +58,6 @@ long sys_fork(void); long sys_clone(unsigned long newsp, unsigned long clone_flags, int __user *parent_tidptr, int __user *child_tidptr); long sys_vfork(void); -void execve_tail(void); -long sys_execve(const char __user *name, const char __user *const __user *argv, - const char __user *const __user *envp); long sys_sigsuspend(int history0, int history1, old_sigset_t mask); long sys_sigaction(int sig, const struct old_sigaction __user *act, struct old_sigaction __user *oact); diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 349b7eeb348a..1f776f2edda5 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -386,7 +386,6 @@ ENTRY(kernel_execve) lg %r12,__LC_THREAD_INFO xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) ssm __LC_SVC_NEW_PSW # reenable interrupts - brasl %r14,execve_tail j sysc_return /* diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 733175373a4c..e540251e1dd1 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -249,31 +249,6 @@ asmlinkage void execve_tail(void) asm volatile("sfpc %0,%0" : : "d" (0)); } -/* - * sys_execve() executes a new program. - */ -SYSCALL_DEFINE3(execve, const char __user *, name, - const char __user *const __user *, argv, - const char __user *const __user *, envp) -{ - struct pt_regs *regs = task_pt_regs(current); - char *filename; - long rc; - - filename = getname(name); - rc = PTR_ERR(filename); - if (IS_ERR(filename)) - return rc; - rc = do_execve(filename, argv, envp, regs); - if (rc) - goto out; - execve_tail(); - rc = regs->gprs[2]; -out: - putname(filename); - return rc; -} - /* * fill in the FPU structure for a core dump. */ -- cgit v1.2.3 From f9a7e025dfc28783fd2b05812e663251acf4bf21 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 21 Sep 2012 20:48:32 -0400 Subject: s390: switch to generic kernel_thread() Signed-off-by: Al Viro --- arch/s390/Kconfig | 1 + arch/s390/include/asm/processor.h | 1 - arch/s390/kernel/process.c | 72 ++++++++++++++++++--------------------- 3 files changed, 34 insertions(+), 40 deletions(-) (limited to 'arch/s390/include/asm/processor.h') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 107610e01a29..57442393bac7 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -125,6 +125,7 @@ config S390 select GENERIC_CLOCKEVENTS select KTIME_SCALAR if 32BIT select HAVE_ARCH_SECCOMP_FILTER + select GENERIC_KERNEL_THREAD config SCHED_OMIT_FRAME_POINTER def_bool y diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 0bc77619c3a3..da6f5baeee5c 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -135,7 +135,6 @@ struct seq_file; /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); -extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); /* * Return saved PC of a blocked thread. diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 2868a364ff94..bab088de4569 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -98,25 +98,6 @@ void cpu_idle(void) extern void __kprobes kernel_thread_starter(void); -int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) -{ - struct pt_regs regs; - - memset(®s, 0, sizeof(regs)); - regs.psw.mask = psw_kernel_bits | - PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; - regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE; - regs.gprs[9] = (unsigned long) fn; - regs.gprs[10] = (unsigned long) arg; - regs.gprs[11] = (unsigned long) do_exit; - regs.orig_gpr2 = -1; - - /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, - 0, ®s, 0, NULL, NULL); -} -EXPORT_SYMBOL(kernel_thread); - /* * Free current thread data structures etc.. */ @@ -133,7 +114,7 @@ void release_thread(struct task_struct *dead_task) } int copy_thread(unsigned long clone_flags, unsigned long new_stackp, - unsigned long unused, + unsigned long arg, struct task_struct *p, struct pt_regs *regs) { struct thread_info *ti; @@ -145,20 +126,44 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, frame = container_of(task_pt_regs(p), struct fake_frame, childregs); p->thread.ksp = (unsigned long) frame; - /* Store access registers to kernel stack of new process. */ - frame->childregs = *regs; - frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */ - frame->childregs.gprs[15] = new_stackp; - frame->sf.back_chain = 0; + /* Save access registers to new thread structure. */ + save_access_regs(&p->thread.acrs[0]); + /* start new process with ar4 pointing to the correct address space */ + p->thread.mm_segment = get_fs(); + /* Don't copy debug registers */ + memset(&p->thread.per_user, 0, sizeof(p->thread.per_user)); + memset(&p->thread.per_event, 0, sizeof(p->thread.per_event)); + clear_tsk_thread_flag(p, TIF_SINGLE_STEP); + clear_tsk_thread_flag(p, TIF_PER_TRAP); + /* Initialize per thread user and system timer values */ + ti = task_thread_info(p); + ti->user_timer = 0; + ti->system_timer = 0; + frame->sf.back_chain = 0; /* new return point is ret_from_fork */ frame->sf.gprs[8] = (unsigned long) ret_from_fork; - /* fake return stack for resume(), don't go back to schedule */ frame->sf.gprs[9] = (unsigned long) frame; - /* Save access registers to new thread structure. */ - save_access_regs(&p->thread.acrs[0]); + /* Store access registers to kernel stack of new process. */ + if (unlikely(!regs)) { + /* kernel thread */ + memset(&frame->childregs, 0, sizeof(struct pt_regs)); + frame->childregs.psw.mask = psw_kernel_bits | PSW_MASK_DAT | + PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + frame->childregs.psw.addr = PSW_ADDR_AMODE | + (unsigned long) kernel_thread_starter; + frame->childregs.gprs[9] = new_stackp; /* function */ + frame->childregs.gprs[10] = arg; + frame->childregs.gprs[11] = (unsigned long) do_exit; + frame->childregs.orig_gpr2 = -1; + + return 0; + } + frame->childregs = *regs; + frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */ + frame->childregs.gprs[15] = new_stackp; #ifndef CONFIG_64BIT /* @@ -184,17 +189,6 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, } } #endif /* CONFIG_64BIT */ - /* start new process with ar4 pointing to the correct address space */ - p->thread.mm_segment = get_fs(); - /* Don't copy debug registers */ - memset(&p->thread.per_user, 0, sizeof(p->thread.per_user)); - memset(&p->thread.per_event, 0, sizeof(p->thread.per_event)); - clear_tsk_thread_flag(p, TIF_SINGLE_STEP); - clear_tsk_thread_flag(p, TIF_PER_TRAP); - /* Initialize per thread user and system timer values */ - ti = task_thread_info(p); - ti->user_timer = 0; - ti->system_timer = 0; return 0; } -- cgit v1.2.3