From f1b1583d5faa86cb3dcb7b740594868debad7c30 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 22 Mar 2021 16:37:48 +0000 Subject: powerpc/bpf: Move common helpers into bpf_jit.h Move functions bpf_flush_icache(), bpf_is_seen_register() and bpf_set_seen_register() in order to reuse them in future bpf_jit_comp32.c Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/28e8d5a75e64807d7e9d39a4b52658755e259f8c.1616430991.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/bpf_jit.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'arch/powerpc/net/bpf_jit.h') diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index d0a67a1bbaf1..b8fa6908fc5e 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -108,6 +108,41 @@ static inline bool is_nearbranch(int offset) #define COND_LT (CR0_LT | COND_CMP_TRUE) #define COND_LE (CR0_GT | COND_CMP_FALSE) +#define SEEN_FUNC 0x1000 /* might call external helpers */ +#define SEEN_STACK 0x2000 /* uses BPF stack */ +#define SEEN_TAILCALL 0x4000 /* uses tail calls */ + +struct codegen_context { + /* + * This is used to track register usage as well + * as calls to external helpers. + * - register usage is tracked with corresponding + * bits (r3-r10 and r27-r31) + * - rest of the bits can be used to track other + * things -- for now, we use bits 16 to 23 + * encoded in SEEN_* macros above + */ + unsigned int seen; + unsigned int idx; + unsigned int stack_size; +}; + +static inline void bpf_flush_icache(void *start, void *end) +{ + smp_wmb(); /* smp write barrier */ + flush_icache_range((unsigned long)start, (unsigned long)end); +} + +static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i) +{ + return ctx->seen & (1 << (31 - i)); +} + +static inline void bpf_set_seen_register(struct codegen_context *ctx, int i) +{ + ctx->seen |= 1 << (31 - i); +} + #endif #endif -- cgit v1.2.3 From 4ea76e90a97d22f86adbb10044d29d919e620f2e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 22 Mar 2021 16:37:49 +0000 Subject: powerpc/bpf: Move common functions into bpf_jit_comp.c Move into bpf_jit_comp.c the functions that will remain common to PPC64 and PPC32 when we add support of EBPF for PPC32. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2c339d77fb168ef12b213ccddfee3cb6c8ce8ae1.1616430991.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/Makefile | 2 +- arch/powerpc/net/bpf_jit.h | 6 + arch/powerpc/net/bpf_jit_comp.c | 269 ++++++++++++++++++++++++++++++++++++++ arch/powerpc/net/bpf_jit_comp64.c | 263 +------------------------------------ 4 files changed, 281 insertions(+), 259 deletions(-) create mode 100644 arch/powerpc/net/bpf_jit_comp.c (limited to 'arch/powerpc/net/bpf_jit.h') diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile index 52c939cef5b2..969cde177880 100644 --- a/arch/powerpc/net/Makefile +++ b/arch/powerpc/net/Makefile @@ -2,4 +2,4 @@ # # Arch-specific network modules # -obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o +obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_jit_comp64.o diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index b8fa6908fc5e..b34abfce15a6 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -143,6 +143,12 @@ static inline void bpf_set_seen_register(struct codegen_context *ctx, int i) ctx->seen |= 1 << (31 - i); } +void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, + u32 *addrs, bool extra_pass); +void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); +void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); + #endif #endif diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c new file mode 100644 index 000000000000..efac89964873 --- /dev/null +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -0,0 +1,269 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * eBPF JIT compiler + * + * Copyright 2016 Naveen N. Rao + * IBM Corporation + * + * Based on the powerpc classic BPF JIT compiler by Matt Evans + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bpf_jit.h" + +static void bpf_jit_fill_ill_insns(void *area, unsigned int size) +{ + memset32(area, BREAKPOINT_INSTRUCTION, size / 4); +} + +/* Fix the branch target addresses for subprog calls */ +static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, + struct codegen_context *ctx, u32 *addrs) +{ + const struct bpf_insn *insn = fp->insnsi; + bool func_addr_fixed; + u64 func_addr; + u32 tmp_idx; + int i, ret; + + for (i = 0; i < fp->len; i++) { + /* + * During the extra pass, only the branch target addresses for + * the subprog calls need to be fixed. All other instructions + * can left untouched. + * + * The JITed image length does not change because we already + * ensure that the JITed instruction sequence for these calls + * are of fixed length by padding them with NOPs. + */ + if (insn[i].code == (BPF_JMP | BPF_CALL) && + insn[i].src_reg == BPF_PSEUDO_CALL) { + ret = bpf_jit_get_func_addr(fp, &insn[i], true, + &func_addr, + &func_addr_fixed); + if (ret < 0) + return ret; + + /* + * Save ctx->idx as this would currently point to the + * end of the JITed image and set it to the offset of + * the instruction sequence corresponding to the + * subprog call temporarily. + */ + tmp_idx = ctx->idx; + ctx->idx = addrs[i] / 4; + bpf_jit_emit_func_call_rel(image, ctx, func_addr); + + /* + * Restore ctx->idx here. This is safe as the length + * of the JITed sequence remains unchanged. + */ + ctx->idx = tmp_idx; + } + } + + return 0; +} + +struct powerpc64_jit_data { + struct bpf_binary_header *header; + u32 *addrs; + u8 *image; + u32 proglen; + struct codegen_context ctx; +}; + +bool bpf_jit_needs_zext(void) +{ + return true; +} + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) +{ + u32 proglen; + u32 alloclen; + u8 *image = NULL; + u32 *code_base; + u32 *addrs; + struct powerpc64_jit_data *jit_data; + struct codegen_context cgctx; + int pass; + int flen; + struct bpf_binary_header *bpf_hdr; + struct bpf_prog *org_fp = fp; + struct bpf_prog *tmp_fp; + bool bpf_blinded = false; + bool extra_pass = false; + + if (!fp->jit_requested) + return org_fp; + + tmp_fp = bpf_jit_blind_constants(org_fp); + if (IS_ERR(tmp_fp)) + return org_fp; + + if (tmp_fp != org_fp) { + bpf_blinded = true; + fp = tmp_fp; + } + + jit_data = fp->aux->jit_data; + if (!jit_data) { + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); + if (!jit_data) { + fp = org_fp; + goto out; + } + fp->aux->jit_data = jit_data; + } + + flen = fp->len; + addrs = jit_data->addrs; + if (addrs) { + cgctx = jit_data->ctx; + image = jit_data->image; + bpf_hdr = jit_data->header; + proglen = jit_data->proglen; + alloclen = proglen + FUNCTION_DESCR_SIZE; + extra_pass = true; + goto skip_init_ctx; + } + + addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL); + if (addrs == NULL) { + fp = org_fp; + goto out_addrs; + } + + memset(&cgctx, 0, sizeof(struct codegen_context)); + + /* Make sure that the stack is quadword aligned. */ + cgctx.stack_size = round_up(fp->aux->stack_depth, 16); + + /* Scouting faux-generate pass 0 */ + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { + /* We hit something illegal or unsupported. */ + fp = org_fp; + goto out_addrs; + } + + /* + * If we have seen a tail call, we need a second pass. + * This is because bpf_jit_emit_common_epilogue() is called + * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. + */ + if (cgctx.seen & SEEN_TAILCALL) { + cgctx.idx = 0; + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { + fp = org_fp; + goto out_addrs; + } + } + + /* + * Pretend to build prologue, given the features we've seen. This will + * update ctgtx.idx as it pretends to output instructions, then we can + * calculate total size from idx. + */ + bpf_jit_build_prologue(0, &cgctx); + bpf_jit_build_epilogue(0, &cgctx); + + proglen = cgctx.idx * 4; + alloclen = proglen + FUNCTION_DESCR_SIZE; + + bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns); + if (!bpf_hdr) { + fp = org_fp; + goto out_addrs; + } + +skip_init_ctx: + code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); + + if (extra_pass) { + /* + * Do not touch the prologue and epilogue as they will remain + * unchanged. Only fix the branch target address for subprog + * calls in the body. + * + * This does not change the offsets and lengths of the subprog + * call instruction sequences and hence, the size of the JITed + * image as well. + */ + bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); + + /* There is no need to perform the usual passes. */ + goto skip_codegen_passes; + } + + /* Code generation passes 1-2 */ + for (pass = 1; pass < 3; pass++) { + /* Now build the prologue, body code & epilogue for real. */ + cgctx.idx = 0; + bpf_jit_build_prologue(code_base, &cgctx); + bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass); + bpf_jit_build_epilogue(code_base, &cgctx); + + if (bpf_jit_enable > 1) + pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass, + proglen - (cgctx.idx * 4), cgctx.seen); + } + +skip_codegen_passes: + if (bpf_jit_enable > 1) + /* + * Note that we output the base address of the code_base + * rather than image, since opcodes are in code_base. + */ + bpf_jit_dump(flen, proglen, pass, code_base); + +#ifdef PPC64_ELF_ABI_v1 + /* Function descriptor nastiness: Address + TOC */ + ((u64 *)image)[0] = (u64)code_base; + ((u64 *)image)[1] = local_paca->kernel_toc; +#endif + + fp->bpf_func = (void *)image; + fp->jited = 1; + fp->jited_len = alloclen; + + bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); + if (!fp->is_func || extra_pass) { + bpf_prog_fill_jited_linfo(fp, addrs); +out_addrs: + kfree(addrs); + kfree(jit_data); + fp->aux->jit_data = NULL; + } else { + jit_data->addrs = addrs; + jit_data->ctx = cgctx; + jit_data->proglen = proglen; + jit_data->image = image; + jit_data->header = bpf_hdr; + } + +out: + if (bpf_blinded) + bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); + + return fp; +} + +/* Overriding bpf_jit_free() as we don't set images read-only. */ +void bpf_jit_free(struct bpf_prog *fp) +{ + unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; + struct bpf_binary_header *bpf_hdr = (void *)addr; + + if (fp->jited) + bpf_jit_binary_free(bpf_hdr); + + bpf_prog_unlock_free(fp); +} diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 111451bc5cc0..8a1f9fb00e78 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -18,11 +18,6 @@ #include "bpf_jit64.h" -static void bpf_jit_fill_ill_insns(void *area, unsigned int size) -{ - memset32(area, BREAKPOINT_INSTRUCTION, size/4); -} - static inline bool bpf_has_stack_frame(struct codegen_context *ctx) { /* @@ -69,7 +64,7 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) BUG(); } -static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) +void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; @@ -136,7 +131,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx } } -static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) +void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) { bpf_jit_emit_common_epilogue(image, ctx); @@ -171,8 +166,7 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, EMIT(PPC_RAW_BLRL()); } -static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, - u64 func) +void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) { unsigned int i, ctx_idx = ctx->idx; @@ -273,9 +267,8 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 } /* Assemble the body code between the prologue & epilogue */ -static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx, - u32 *addrs, bool extra_pass) +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, + u32 *addrs, bool extra_pass) { const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; @@ -1010,249 +1003,3 @@ cond_branch: return 0; } - -/* Fix the branch target addresses for subprog calls */ -static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx, u32 *addrs) -{ - const struct bpf_insn *insn = fp->insnsi; - bool func_addr_fixed; - u64 func_addr; - u32 tmp_idx; - int i, ret; - - for (i = 0; i < fp->len; i++) { - /* - * During the extra pass, only the branch target addresses for - * the subprog calls need to be fixed. All other instructions - * can left untouched. - * - * The JITed image length does not change because we already - * ensure that the JITed instruction sequence for these calls - * are of fixed length by padding them with NOPs. - */ - if (insn[i].code == (BPF_JMP | BPF_CALL) && - insn[i].src_reg == BPF_PSEUDO_CALL) { - ret = bpf_jit_get_func_addr(fp, &insn[i], true, - &func_addr, - &func_addr_fixed); - if (ret < 0) - return ret; - - /* - * Save ctx->idx as this would currently point to the - * end of the JITed image and set it to the offset of - * the instruction sequence corresponding to the - * subprog call temporarily. - */ - tmp_idx = ctx->idx; - ctx->idx = addrs[i] / 4; - bpf_jit_emit_func_call_rel(image, ctx, func_addr); - - /* - * Restore ctx->idx here. This is safe as the length - * of the JITed sequence remains unchanged. - */ - ctx->idx = tmp_idx; - } - } - - return 0; -} - -struct powerpc64_jit_data { - struct bpf_binary_header *header; - u32 *addrs; - u8 *image; - u32 proglen; - struct codegen_context ctx; -}; - -bool bpf_jit_needs_zext(void) -{ - return true; -} - -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) -{ - u32 proglen; - u32 alloclen; - u8 *image = NULL; - u32 *code_base; - u32 *addrs; - struct powerpc64_jit_data *jit_data; - struct codegen_context cgctx; - int pass; - int flen; - struct bpf_binary_header *bpf_hdr; - struct bpf_prog *org_fp = fp; - struct bpf_prog *tmp_fp; - bool bpf_blinded = false; - bool extra_pass = false; - - if (!fp->jit_requested) - return org_fp; - - tmp_fp = bpf_jit_blind_constants(org_fp); - if (IS_ERR(tmp_fp)) - return org_fp; - - if (tmp_fp != org_fp) { - bpf_blinded = true; - fp = tmp_fp; - } - - jit_data = fp->aux->jit_data; - if (!jit_data) { - jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); - if (!jit_data) { - fp = org_fp; - goto out; - } - fp->aux->jit_data = jit_data; - } - - flen = fp->len; - addrs = jit_data->addrs; - if (addrs) { - cgctx = jit_data->ctx; - image = jit_data->image; - bpf_hdr = jit_data->header; - proglen = jit_data->proglen; - alloclen = proglen + FUNCTION_DESCR_SIZE; - extra_pass = true; - goto skip_init_ctx; - } - - addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL); - if (addrs == NULL) { - fp = org_fp; - goto out_addrs; - } - - memset(&cgctx, 0, sizeof(struct codegen_context)); - - /* Make sure that the stack is quadword aligned. */ - cgctx.stack_size = round_up(fp->aux->stack_depth, 16); - - /* Scouting faux-generate pass 0 */ - if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { - /* We hit something illegal or unsupported. */ - fp = org_fp; - goto out_addrs; - } - - /* - * If we have seen a tail call, we need a second pass. - * This is because bpf_jit_emit_common_epilogue() is called - * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. - */ - if (cgctx.seen & SEEN_TAILCALL) { - cgctx.idx = 0; - if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { - fp = org_fp; - goto out_addrs; - } - } - - /* - * Pretend to build prologue, given the features we've seen. This will - * update ctgtx.idx as it pretends to output instructions, then we can - * calculate total size from idx. - */ - bpf_jit_build_prologue(0, &cgctx); - bpf_jit_build_epilogue(0, &cgctx); - - proglen = cgctx.idx * 4; - alloclen = proglen + FUNCTION_DESCR_SIZE; - - bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, - bpf_jit_fill_ill_insns); - if (!bpf_hdr) { - fp = org_fp; - goto out_addrs; - } - -skip_init_ctx: - code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); - - if (extra_pass) { - /* - * Do not touch the prologue and epilogue as they will remain - * unchanged. Only fix the branch target address for subprog - * calls in the body. - * - * This does not change the offsets and lengths of the subprog - * call instruction sequences and hence, the size of the JITed - * image as well. - */ - bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); - - /* There is no need to perform the usual passes. */ - goto skip_codegen_passes; - } - - /* Code generation passes 1-2 */ - for (pass = 1; pass < 3; pass++) { - /* Now build the prologue, body code & epilogue for real. */ - cgctx.idx = 0; - bpf_jit_build_prologue(code_base, &cgctx); - bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass); - bpf_jit_build_epilogue(code_base, &cgctx); - - if (bpf_jit_enable > 1) - pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass, - proglen - (cgctx.idx * 4), cgctx.seen); - } - -skip_codegen_passes: - if (bpf_jit_enable > 1) - /* - * Note that we output the base address of the code_base - * rather than image, since opcodes are in code_base. - */ - bpf_jit_dump(flen, proglen, pass, code_base); - -#ifdef PPC64_ELF_ABI_v1 - /* Function descriptor nastiness: Address + TOC */ - ((u64 *)image)[0] = (u64)code_base; - ((u64 *)image)[1] = local_paca->kernel_toc; -#endif - - fp->bpf_func = (void *)image; - fp->jited = 1; - fp->jited_len = alloclen; - - bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); - if (!fp->is_func || extra_pass) { - bpf_prog_fill_jited_linfo(fp, addrs); -out_addrs: - kfree(addrs); - kfree(jit_data); - fp->aux->jit_data = NULL; - } else { - jit_data->addrs = addrs; - jit_data->ctx = cgctx; - jit_data->proglen = proglen; - jit_data->image = image; - jit_data->header = bpf_hdr; - } - -out: - if (bpf_blinded) - bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); - - return fp; -} - -/* Overriding bpf_jit_free() as we don't set images read-only. */ -void bpf_jit_free(struct bpf_prog *fp) -{ - unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; - struct bpf_binary_header *bpf_hdr = (void *)addr; - - if (fp->jited) - bpf_jit_binary_free(bpf_hdr); - - bpf_prog_unlock_free(fp); -} -- cgit v1.2.3 From c426810fcf9f96e3b43d16039e41ecb959f6dc29 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 22 Mar 2021 16:37:50 +0000 Subject: powerpc/bpf: Change values of SEEN_ flags Because PPC32 will use more non volatile registers, move SEEN_ flags to positions 0-2 which corresponds to special registers. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/608faa1dc3ecfead649e15392abd07b00313d2ba.1616430991.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/bpf_jit.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/net/bpf_jit.h') diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index b34abfce15a6..fb4656986fb9 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -108,18 +108,18 @@ static inline bool is_nearbranch(int offset) #define COND_LT (CR0_LT | COND_CMP_TRUE) #define COND_LE (CR0_GT | COND_CMP_FALSE) -#define SEEN_FUNC 0x1000 /* might call external helpers */ -#define SEEN_STACK 0x2000 /* uses BPF stack */ -#define SEEN_TAILCALL 0x4000 /* uses tail calls */ +#define SEEN_FUNC 0x20000000 /* might call external helpers */ +#define SEEN_STACK 0x40000000 /* uses BPF stack */ +#define SEEN_TAILCALL 0x80000000 /* uses tail calls */ struct codegen_context { /* * This is used to track register usage as well * as calls to external helpers. * - register usage is tracked with corresponding - * bits (r3-r10 and r27-r31) + * bits (r3-r31) * - rest of the bits can be used to track other - * things -- for now, we use bits 16 to 23 + * things -- for now, we use bits 0 to 2 * encoded in SEEN_* macros above */ unsigned int seen; -- cgit v1.2.3 From 51c66ad849a703d9bbfd7704c941827aed0fd9fd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 22 Mar 2021 16:37:52 +0000 Subject: powerpc/bpf: Implement extended BPF on PPC32 Implement Extended Berkeley Packet Filter on Powerpc 32 Test result with test_bpf module: test_bpf: Summary: 378 PASSED, 0 FAILED, [354/366 JIT'ed] Registers mapping: [BPF_REG_0] = r11-r12 /* function arguments */ [BPF_REG_1] = r3-r4 [BPF_REG_2] = r5-r6 [BPF_REG_3] = r7-r8 [BPF_REG_4] = r9-r10 [BPF_REG_5] = r21-r22 (Args 9 and 10 come in via the stack) /* non volatile registers */ [BPF_REG_6] = r23-r24 [BPF_REG_7] = r25-r26 [BPF_REG_8] = r27-r28 [BPF_REG_9] = r29-r30 /* frame pointer aka BPF_REG_10 */ [BPF_REG_FP] = r17-r18 /* eBPF jit internal registers */ [BPF_REG_AX] = r19-r20 [TMP_REG] = r31 As PPC32 doesn't have a redzone in the stack, a stack frame must always be set in order to host at least the tail count counter. The stack frame remains for tail calls, it is set by the first callee and freed by the last callee. r0 is used as temporary register as much as possible. It is referenced directly in the code in order to avoid misusing it, because some instructions interpret it as value 0 instead of register r0 (ex: addi, addis, stw, lwz, ...) The following operations are not implemented: case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ case BPF_STX | BPF_XADD | BPF_DW: /* *(u64 *)(dst + off) += src */ The following operations are only implemented for power of two constants: case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */ case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/61d8b149176ddf99e7d5cef0b6dc1598583ca202.1616430991.git.christophe.leroy@csgroup.eu --- Documentation/admin-guide/sysctl/net.rst | 2 +- arch/powerpc/Kconfig | 2 +- arch/powerpc/net/Makefile | 2 +- arch/powerpc/net/bpf_jit.h | 4 + arch/powerpc/net/bpf_jit_comp32.c | 1069 ++++++++++++++++++++++++++++++ 5 files changed, 1076 insertions(+), 3 deletions(-) create mode 100644 arch/powerpc/net/bpf_jit_comp32.c (limited to 'arch/powerpc/net/bpf_jit.h') diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index f2ab8a5b6a4b..685cc13f567b 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -64,6 +64,7 @@ two flavors of JITs, the newer eBPF JIT currently supported on: - arm64 - arm32 - ppc64 + - ppc32 - sparc64 - mips64 - s390x @@ -73,7 +74,6 @@ two flavors of JITs, the newer eBPF JIT currently supported on: And the older cBPF JIT supported on the following archs: - mips - - ppc - sparc eBPF JITs are a superset of cBPF JITs, meaning the kernel will diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 29217437b8ac..316ab0bf8112 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -202,7 +202,7 @@ config PPC select HAVE_DEBUG_STACKOVERFLOW select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL - select HAVE_EBPF_JIT if PPC64 + select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile index 969cde177880..8e60af32e51e 100644 --- a/arch/powerpc/net/Makefile +++ b/arch/powerpc/net/Makefile @@ -2,4 +2,4 @@ # # Arch-specific network modules # -obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_jit_comp64.o +obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_jit_comp$(BITS).o diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index fb4656986fb9..a45b8266355d 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -42,6 +42,10 @@ EMIT(PPC_RAW_ORI(d, d, IMM_L(i))); \ } } while(0) +#ifdef CONFIG_PPC32 +#define PPC_EX32(r, i) EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0)) +#endif + #define PPC_LI64(d, i) do { \ if ((long)(i) >= -2147483648 && \ (long)(i) < 2147483648) \ diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c new file mode 100644 index 000000000000..29ce802d7534 --- /dev/null +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -0,0 +1,1069 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * eBPF JIT compiler for PPC32 + * + * Copyright 2020 Christophe Leroy + * CS GROUP France + * + * Based on PPC64 eBPF JIT compiler by Naveen N. Rao + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bpf_jit.h" + +/* + * Stack layout: + * + * [ prev sp ] <------------- + * [ nv gpr save area ] 16 * 4 | + * fp (r31) --> [ ebpf stack space ] upto 512 | + * [ frame header ] 16 | + * sp (r1) ---> [ stack pointer ] -------------- + */ + +/* for gpr non volatile registers r17 to r31 (14) + tail call */ +#define BPF_PPC_STACK_SAVE (15 * 4 + 4) +/* stack frame, ensure this is quadword aligned */ +#define BPF_PPC_STACKFRAME(ctx) (STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_SAVE + (ctx)->stack_size) + +/* BPF register usage */ +#define TMP_REG (MAX_BPF_JIT_REG + 0) + +/* BPF to ppc register mappings */ +static const int b2p[] = { + /* function return value */ + [BPF_REG_0] = 12, + /* function arguments */ + [BPF_REG_1] = 4, + [BPF_REG_2] = 6, + [BPF_REG_3] = 8, + [BPF_REG_4] = 10, + [BPF_REG_5] = 22, + /* non volatile registers */ + [BPF_REG_6] = 24, + [BPF_REG_7] = 26, + [BPF_REG_8] = 28, + [BPF_REG_9] = 30, + /* frame pointer aka BPF_REG_10 */ + [BPF_REG_FP] = 18, + /* eBPF jit internal registers */ + [BPF_REG_AX] = 20, + [TMP_REG] = 31, /* 32 bits */ +}; + +static int bpf_to_ppc(struct codegen_context *ctx, int reg) +{ + return b2p[reg]; +} + +/* PPC NVR range -- update this if we ever use NVRs below r17 */ +#define BPF_PPC_NVR_MIN 17 +#define BPF_PPC_TC 16 + +static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) +{ + if ((reg >= BPF_PPC_NVR_MIN && reg < 32) || reg == BPF_PPC_TC) + return BPF_PPC_STACKFRAME(ctx) - 4 * (32 - reg); + + WARN(true, "BPF JIT is asking about unknown registers, will crash the stack"); + /* Use the hole we have left for alignment */ + return BPF_PPC_STACKFRAME(ctx) - 4; +} + +void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) +{ + int i; + + /* First arg comes in as a 32 bits pointer. */ + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), __REG_R3)); + EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_1) - 1, 0)); + EMIT(PPC_RAW_STWU(__REG_R1, __REG_R1, -BPF_PPC_STACKFRAME(ctx))); + + /* + * Initialize tail_call_cnt in stack frame if we do tail calls. + * Otherwise, put in NOPs so that it can be skipped when we are + * invoked through a tail call. + */ + if (ctx->seen & SEEN_TAILCALL) { + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + } else { + EMIT(PPC_RAW_NOP()); + } + +#define BPF_TAILCALL_PROLOGUE_SIZE 16 + + /* + * We need a stack frame, but we don't necessarily need to + * save/restore LR unless we call other functions + */ + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_MFLR(__REG_R0)); + + /* + * Back up non-volatile regs -- registers r18-r31 + */ + for (i = BPF_PPC_NVR_MIN; i <= 31; i++) + if (bpf_is_seen_register(ctx, i)) + EMIT(PPC_RAW_STW(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i))); + + /* If needed retrieve arguments 9 and 10, ie 5th 64 bits arg.*/ + if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { + EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 8); + EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 12); + } + + /* Setup frame pointer to point to the bpf stack area */ + if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP))) { + EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_FP) - 1, 0)); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), __REG_R1, + STACK_FRAME_MIN_SIZE + ctx->stack_size)); + } + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); +} + +static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx) +{ + int i; + + /* Restore NVRs */ + for (i = BPF_PPC_NVR_MIN; i <= 31; i++) + if (bpf_is_seen_register(ctx, i)) + EMIT(PPC_RAW_LWZ(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i))); +} + +void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) +{ + EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_0))); + + bpf_jit_emit_common_epilogue(image, ctx); + + /* Tear down our stack frame */ + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + + EMIT(PPC_RAW_ADDI(__REG_R1, __REG_R1, BPF_PPC_STACKFRAME(ctx))); + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_MTLR(__REG_R0)); + + EMIT(PPC_RAW_BLR()); +} + +void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) +{ + /* Load function address into r0 */ + EMIT(PPC_RAW_LIS(__REG_R0, IMM_H(func))); + EMIT(PPC_RAW_ORI(__REG_R0, __REG_R0, IMM_L(func))); + EMIT(PPC_RAW_MTLR(__REG_R0)); + EMIT(PPC_RAW_BLRL()); +} + +static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) +{ + /* + * By now, the eBPF program has already setup parameters in r3-r6 + * r3-r4/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program + * r5-r6/BPF_REG_2 - pointer to bpf_array + * r7-r8/BPF_REG_3 - index in bpf_array + */ + int b2p_bpf_array = bpf_to_ppc(ctx, BPF_REG_2); + int b2p_index = bpf_to_ppc(ctx, BPF_REG_3); + + /* + * if (index >= array->map.max_entries) + * goto out; + */ + EMIT(PPC_RAW_LWZ(__REG_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); + EMIT(PPC_RAW_CMPLW(b2p_index, __REG_R0)); + EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + PPC_BCC(COND_GE, out); + + /* + * if (tail_call_cnt > MAX_TAIL_CALL_CNT) + * goto out; + */ + EMIT(PPC_RAW_CMPLWI(__REG_R0, MAX_TAIL_CALL_CNT)); + /* tail_call_cnt++; */ + EMIT(PPC_RAW_ADDIC(__REG_R0, __REG_R0, 1)); + PPC_BCC(COND_GT, out); + + /* prog = array->ptrs[index]; */ + EMIT(PPC_RAW_RLWINM(__REG_R3, b2p_index, 2, 0, 29)); + EMIT(PPC_RAW_ADD(__REG_R3, __REG_R3, b2p_bpf_array)); + EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_array, ptrs))); + EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + + /* + * if (prog == NULL) + * goto out; + */ + EMIT(PPC_RAW_CMPLWI(__REG_R3, 0)); + PPC_BCC(COND_EQ, out); + + /* goto *(prog->bpf_func + prologue_size); */ + EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_prog, bpf_func))); + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + + EMIT(PPC_RAW_ADDIC(__REG_R3, __REG_R3, BPF_TAILCALL_PROLOGUE_SIZE)); + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_MTLR(__REG_R0)); + + EMIT(PPC_RAW_MTCTR(__REG_R3)); + + EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_1))); + + /* tear restore NVRs, ... */ + bpf_jit_emit_common_epilogue(image, ctx); + + EMIT(PPC_RAW_BCTR()); + /* out: */ +} + +/* Assemble the body code between the prologue & epilogue */ +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, + u32 *addrs, bool extra_pass) +{ + const struct bpf_insn *insn = fp->insnsi; + int flen = fp->len; + int i, ret; + + /* Start of epilogue code - will only be valid 2nd pass onwards */ + u32 exit_addr = addrs[flen]; + + for (i = 0; i < flen; i++) { + u32 code = insn[i].code; + u32 dst_reg = bpf_to_ppc(ctx, insn[i].dst_reg); + u32 dst_reg_h = dst_reg - 1; + u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg); + u32 src_reg_h = src_reg - 1; + u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG); + s16 off = insn[i].off; + s32 imm = insn[i].imm; + bool func_addr_fixed; + u64 func_addr; + u32 true_cond; + + /* + * addrs[] maps a BPF bytecode address into a real offset from + * the start of the body code. + */ + addrs[i] = ctx->idx * 4; + + /* + * As an optimization, we note down which registers + * are used so that we can only save/restore those in our + * prologue and epilogue. We do this here regardless of whether + * the actual BPF instruction uses src/dst registers or not + * (for instance, BPF_CALL does not use them). The expectation + * is that those instructions will have src_reg/dst_reg set to + * 0. Even otherwise, we just lose some prologue/epilogue + * optimization but everything else should work without + * any issues. + */ + if (dst_reg >= 3 && dst_reg < 32) { + bpf_set_seen_register(ctx, dst_reg); + bpf_set_seen_register(ctx, dst_reg_h); + } + + if (src_reg >= 3 && src_reg < 32) { + bpf_set_seen_register(ctx, src_reg); + bpf_set_seen_register(ctx, src_reg_h); + } + + switch (code) { + /* + * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG + */ + case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */ + EMIT(PPC_RAW_ADD(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */ + EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_ADDE(dst_reg_h, dst_reg_h, src_reg_h)); + break; + case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */ + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */ + EMIT(PPC_RAW_SUBFC(dst_reg, src_reg, dst_reg)); + EMIT(PPC_RAW_SUBFE(dst_reg_h, src_reg_h, dst_reg_h)); + break; + case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ + imm = -imm; + fallthrough; + case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ + if (IMM_HA(imm) & 0xffff) + EMIT(PPC_RAW_ADDIS(dst_reg, dst_reg, IMM_HA(imm))); + if (IMM_L(imm)) + EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm))); + break; + case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */ + imm = -imm; + fallthrough; + case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */ + if (!imm) + break; + + if (imm >= -32768 && imm < 32768) { + EMIT(PPC_RAW_ADDIC(dst_reg, dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, __REG_R0)); + } + if (imm >= 0) + EMIT(PPC_RAW_ADDZE(dst_reg_h, dst_reg_h)); + else + EMIT(PPC_RAW_ADDME(dst_reg_h, dst_reg_h)); + break; + case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */ + bpf_set_seen_register(ctx, tmp_reg); + EMIT(PPC_RAW_MULW(__REG_R0, dst_reg, src_reg_h)); + EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_MULHWU(tmp_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, tmp_reg)); + break; + case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */ + if (imm >= -32768 && imm < 32768) { + EMIT(PPC_RAW_MULI(dst_reg, dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, __REG_R0)); + } + break; + case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */ + if (!imm) { + PPC_LI32(dst_reg, 0); + PPC_LI32(dst_reg_h, 0); + break; + } + if (imm == 1) + break; + if (imm == -1) { + EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0)); + EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h)); + break; + } + bpf_set_seen_register(ctx, tmp_reg); + PPC_LI32(tmp_reg, imm); + EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, tmp_reg)); + if (imm < 0) + EMIT(PPC_RAW_SUB(dst_reg_h, dst_reg_h, dst_reg)); + EMIT(PPC_RAW_MULHWU(__REG_R0, dst_reg, tmp_reg)); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp_reg)); + EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0)); + break; + case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ + EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(__REG_R0, src_reg, __REG_R0)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0)); + break; + case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ + return -EOPNOTSUPP; + case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ + return -EOPNOTSUPP; + case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */ + if (!imm) + return -EINVAL; + if (imm == 1) + break; + + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, __REG_R0)); + break; + case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ + if (!imm) + return -EINVAL; + + if (!is_power_of_2((u32)imm)) { + bpf_set_seen_register(ctx, tmp_reg); + PPC_LI32(tmp_reg, imm); + EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, tmp_reg)); + EMIT(PPC_RAW_MULW(__REG_R0, tmp_reg, __REG_R0)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0)); + break; + } + if (imm == 1) + EMIT(PPC_RAW_LI(dst_reg, 0)); + else + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - ilog2((u32)imm), 31)); + + break; + case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */ + if (!imm) + return -EINVAL; + if (imm < 0) + imm = -imm; + if (!is_power_of_2(imm)) + return -EOPNOTSUPP; + if (imm == 1) + EMIT(PPC_RAW_LI(dst_reg, 0)); + else + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - ilog2(imm), 31)); + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ + if (!imm) + return -EINVAL; + if (!is_power_of_2(abs(imm))) + return -EOPNOTSUPP; + + if (imm < 0) { + EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0)); + EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h)); + imm = -imm; + } + if (imm == 1) + break; + imm = ilog2(imm); + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31)); + EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, imm)); + break; + case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */ + EMIT(PPC_RAW_NEG(dst_reg, dst_reg)); + break; + case BPF_ALU64 | BPF_NEG: /* dst = -dst */ + EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0)); + EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h)); + break; + + /* + * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH + */ + case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */ + EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_AND(dst_reg_h, dst_reg_h, src_reg_h)); + break; + case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */ + EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */ + if (imm >= 0) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + fallthrough; + case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */ + if (!IMM_H(imm)) { + EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm))); + } else if (!IMM_L(imm)) { + EMIT(PPC_RAW_ANDIS(dst_reg, dst_reg, IMM_H(imm))); + } else if (imm == (((1 << fls(imm)) - 1) ^ ((1 << (ffs(i) - 1)) - 1))) { + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, + 32 - fls(imm), 32 - ffs(imm))); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_AND(dst_reg, dst_reg, __REG_R0)); + } + break; + case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */ + EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, src_reg_h)); + break; + case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ + EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ + /* Sign-extended */ + if (imm < 0) + EMIT(PPC_RAW_LI(dst_reg_h, -1)); + fallthrough; + case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */ + if (IMM_L(imm)) + EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm))); + if (IMM_H(imm)) + EMIT(PPC_RAW_ORIS(dst_reg, dst_reg, IMM_H(imm))); + break; + case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */ + if (dst_reg == src_reg) { + EMIT(PPC_RAW_LI(dst_reg, 0)); + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + } else { + EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_XOR(dst_reg_h, dst_reg_h, src_reg_h)); + } + break; + case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */ + if (dst_reg == src_reg) + EMIT(PPC_RAW_LI(dst_reg, 0)); + else + EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ + if (imm < 0) + EMIT(PPC_RAW_NOR(dst_reg_h, dst_reg_h, dst_reg_h)); + fallthrough; + case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */ + if (IMM_L(imm)) + EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm))); + if (IMM_H(imm)) + EMIT(PPC_RAW_XORIS(dst_reg, dst_reg, IMM_H(imm))); + break; + case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */ + EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ + EMIT(PPC_RAW_ADDIC_DOT(__REG_R0, src_reg, -32)); + PPC_BCC_SHORT(COND_LT, (ctx->idx + 4) * 4); + EMIT(PPC_RAW_SLW(dst_reg_h, dst_reg, __REG_R0)); + EMIT(PPC_RAW_LI(dst_reg, 0)); + PPC_JMP((ctx->idx + 6) * 4); + EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); + EMIT(PPC_RAW_SLW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_SRW(__REG_R0, dst_reg, __REG_R0)); + EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, __REG_R0)); + break; + case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */ + if (!imm) + break; + EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm)); + break; + case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */ + if (imm < 0) + return -EINVAL; + if (!imm) + break; + if (imm < 32) { + EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg_h, imm, 0, 31 - imm)); + EMIT(PPC_RAW_RLWIMI(dst_reg_h, dst_reg, imm, 32 - imm, 31)); + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, imm, 0, 31 - imm)); + break; + } + if (imm < 64) + EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg, imm, 0, 31 - imm)); + else + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + EMIT(PPC_RAW_LI(dst_reg, 0)); + break; + case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */ + EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ + EMIT(PPC_RAW_ADDIC_DOT(__REG_R0, src_reg, -32)); + PPC_BCC_SHORT(COND_LT, (ctx->idx + 4) * 4); + EMIT(PPC_RAW_SRW(dst_reg, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + PPC_JMP((ctx->idx + 6) * 4); + EMIT(PPC_RAW_SUBFIC(0, src_reg, 32)); + EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_SRW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0)); + break; + case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */ + if (!imm) + break; + EMIT(PPC_RAW_SRWI(dst_reg, dst_reg, imm)); + break; + case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */ + if (imm < 0) + return -EINVAL; + if (!imm) + break; + if (imm < 32) { + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31)); + EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1)); + EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg_h, 32 - imm, imm, 31)); + break; + } + if (imm < 64) + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg_h, 64 - imm, imm - 32, 31)); + else + EMIT(PPC_RAW_LI(dst_reg, 0)); + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */ + EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ + EMIT(PPC_RAW_ADDIC_DOT(__REG_R0, src_reg, -32)); + PPC_BCC_SHORT(COND_LT, (ctx->idx + 4) * 4); + EMIT(PPC_RAW_SRAW(dst_reg, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, 31)); + PPC_JMP((ctx->idx + 6) * 4); + EMIT(PPC_RAW_SUBFIC(0, src_reg, 32)); + EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0)); + break; + case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */ + if (!imm) + break; + EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg, imm)); + break; + case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */ + if (imm < 0) + return -EINVAL; + if (!imm) + break; + if (imm < 32) { + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31)); + EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, imm)); + break; + } + if (imm < 64) + EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg_h, imm - 32)); + else + EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg_h, 31)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, 31)); + break; + + /* + * MOV + */ + case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ + if (dst_reg == src_reg) + break; + EMIT(PPC_RAW_MR(dst_reg, src_reg)); + EMIT(PPC_RAW_MR(dst_reg_h, src_reg_h)); + break; + case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */ + /* special mov32 for zext */ + if (imm == 1) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + else if (dst_reg != src_reg) + EMIT(PPC_RAW_MR(dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */ + PPC_LI32(dst_reg, imm); + PPC_EX32(dst_reg_h, imm); + break; + case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */ + PPC_LI32(dst_reg, imm); + break; + + /* + * BPF_FROM_BE/LE + */ + case BPF_ALU | BPF_END | BPF_FROM_LE: + switch (imm) { + case 16: + /* Copy 16 bits to upper part */ + EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg, 16, 0, 15)); + /* Rotate 8 bits right & mask */ + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 24, 16, 31)); + break; + case 32: + /* + * Rotate word left by 8 bits: + * 2 bytes are already in their final position + * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) + */ + EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg, 8, 0, 31)); + /* Rotate 24 bits and insert byte 1 */ + EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 0, 7)); + /* Rotate 24 bits and insert byte 3 */ + EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, __REG_R0)); + break; + case 64: + bpf_set_seen_register(ctx, tmp_reg); + EMIT(PPC_RAW_RLWINM(tmp_reg, dst_reg, 8, 0, 31)); + EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg_h, 8, 0, 31)); + /* Rotate 24 bits and insert byte 1 */ + EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 0, 7)); + EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 0, 7)); + /* Rotate 24 bits and insert byte 3 */ + EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 16, 23)); + EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, __REG_R0)); + EMIT(PPC_RAW_MR(dst_reg_h, tmp_reg)); + break; + } + break; + case BPF_ALU | BPF_END | BPF_FROM_BE: + switch (imm) { + case 16: + /* zero-extend 16 bits into 32 bits */ + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 16, 31)); + break; + case 32: + case 64: + /* nop */ + break; + } + break; + + /* + * BPF_ST(X) + */ + case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ + EMIT(PPC_RAW_STB(src_reg, dst_reg, off)); + break; + case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_STB(__REG_R0, dst_reg, off)); + break; + case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ + EMIT(PPC_RAW_STH(src_reg, dst_reg, off)); + break; + case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_STH(__REG_R0, dst_reg, off)); + break; + case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ + EMIT(PPC_RAW_STW(src_reg, dst_reg, off)); + break; + case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off)); + break; + case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ + EMIT(PPC_RAW_STW(src_reg_h, dst_reg, off)); + EMIT(PPC_RAW_STW(src_reg, dst_reg, off + 4)); + break; + case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off + 4)); + PPC_EX32(__REG_R0, imm); + EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off)); + break; + + /* + * BPF_STX XADD (atomic_add) + */ + case BPF_STX | BPF_XADD | BPF_W: /* *(u32 *)(dst + off) += src */ + bpf_set_seen_register(ctx, tmp_reg); + /* Get offset into TMP_REG */ + EMIT(PPC_RAW_LI(tmp_reg, off)); + /* load value from memory into r0 */ + EMIT(PPC_RAW_LWARX(__REG_R0, tmp_reg, dst_reg, 0)); + /* add value from src_reg into this */ + EMIT(PPC_RAW_ADD(__REG_R0, __REG_R0, src_reg)); + /* store result back */ + EMIT(PPC_RAW_STWCX(__REG_R0, tmp_reg, dst_reg)); + /* we're done if this succeeded */ + PPC_BCC_SHORT(COND_NE, (ctx->idx - 3) * 4); + break; + + case BPF_STX | BPF_XADD | BPF_DW: /* *(u64 *)(dst + off) += src */ + return -EOPNOTSUPP; + + /* + * BPF_LDX + */ + case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */ + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); + if (!fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); + if (!fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); + if (!fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ + EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off)); + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4)); + break; + + /* + * Doubleword load + * 16 byte instruction that uses two 'struct bpf_insn' + */ + case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ + PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm); + PPC_LI32(dst_reg, (u32)insn[i].imm); + /* Adjust for two bpf instructions */ + addrs[++i] = ctx->idx * 4; + break; + + /* + * Return/Exit + */ + case BPF_JMP | BPF_EXIT: + /* + * If this isn't the very last instruction, branch to + * the epilogue. If we _are_ the last instruction, + * we'll just fall through to the epilogue. + */ + if (i != flen - 1) + PPC_JMP(exit_addr); + /* else fall through to the epilogue */ + break; + + /* + * Call kernel helper or bpf function + */ + case BPF_JMP | BPF_CALL: + ctx->seen |= SEEN_FUNC; + + ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, + &func_addr, &func_addr_fixed); + if (ret < 0) + return ret; + + if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, 8)); + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, 12)); + } + + bpf_jit_emit_func_call_rel(image, ctx, func_addr); + + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, __REG_R3)); + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), __REG_R4)); + break; + + /* + * Jumps and branches + */ + case BPF_JMP | BPF_JA: + PPC_JMP(addrs[i + 1 + off]); + break; + + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_X: + true_cond = COND_GT; + goto cond_branch; + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_X: + true_cond = COND_LT; + goto cond_branch; + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_X: + true_cond = COND_GE; + goto cond_branch; + case BPF_JMP | BPF_JLE | BPF_K: + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_X: + true_cond = COND_LE; + goto cond_branch; + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_X: + true_cond = COND_EQ; + goto cond_branch; + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_X: + true_cond = COND_NE; + goto cond_branch; + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_X: + true_cond = COND_NE; + /* fallthrough; */ + +cond_branch: + switch (code) { + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JNE | BPF_X: + /* unsigned comparison */ + EMIT(PPC_RAW_CMPLW(dst_reg_h, src_reg_h)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLW(dst_reg, src_reg)); + break; + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_X: + /* unsigned comparison */ + EMIT(PPC_RAW_CMPLW(dst_reg, src_reg)); + break; + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_X: + /* signed comparison */ + EMIT(PPC_RAW_CMPW(dst_reg_h, src_reg_h)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLW(dst_reg, src_reg)); + break; + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_X: + /* signed comparison */ + EMIT(PPC_RAW_CMPW(dst_reg, src_reg)); + break; + case BPF_JMP | BPF_JSET | BPF_X: + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg_h, src_reg_h)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg)); + break; + case BPF_JMP32 | BPF_JSET | BPF_X: { + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg)); + break; + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JLE | BPF_K: + /* + * Need sign-extended load, so only positive + * values can be used as imm in cmplwi + */ + if (imm >= 0 && imm < 32768) { + EMIT(PPC_RAW_CMPLWI(dst_reg_h, 0)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); + } else { + /* sign-extending load ... but unsigned comparison */ + PPC_EX32(__REG_R0, imm); + EMIT(PPC_RAW_CMPLW(dst_reg_h, __REG_R0)); + PPC_LI32(__REG_R0, imm); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + } + break; + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + if (imm >= 0 && imm < 65536) { + EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + } + break; + } + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_K: + if (imm >= 0 && imm < 65536) { + EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); + } else { + /* sign-extending load */ + EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0)); + PPC_LI32(__REG_R0, imm); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + } + break; + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + /* + * signed comparison, so any 16-bit value + * can be used in cmpwi + */ + if (imm >= -32768 && imm < 32768) { + EMIT(PPC_RAW_CMPWI(dst_reg, imm)); + } else { + /* sign-extending load */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_CMPW(dst_reg, __REG_R0)); + } + break; + case BPF_JMP | BPF_JSET | BPF_K: + /* andi does not sign-extend the immediate */ + if (imm >= 0 && imm < 32768) { + /* PPC_ANDI is _only/always_ dot-form */ + EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + if (imm < 0) { + EMIT(PPC_RAW_CMPWI(dst_reg_h, 0)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + } + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0)); + } + break; + case BPF_JMP32 | BPF_JSET | BPF_K: + /* andi does not sign-extend the immediate */ + if (imm >= -32768 && imm < 32768) { + /* PPC_ANDI is _only/always_ dot-form */ + EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0)); + } + break; + } + PPC_BCC(true_cond, addrs[i + 1 + off]); + break; + + /* + * Tail call + */ + case BPF_JMP | BPF_TAIL_CALL: + ctx->seen |= SEEN_TAILCALL; + bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); + break; + + default: + /* + * The filter contains something cruel & unusual. + * We don't handle it, but also there shouldn't be + * anything missing from our list. + */ + pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n", code, i); + return -EOPNOTSUPP; + } + if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext && + !insn_is_zext(&insn[i + 1])) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + } + + /* Set end-of-body-code address for exit. */ + addrs[i] = ctx->idx * 4; + + return 0; +} -- cgit v1.2.3 From 40272035e1d0edcd515ad45be297c4cce044536d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 22 Mar 2021 16:37:53 +0000 Subject: powerpc/bpf: Reallocate BPF registers to volatile registers when possible on PPC32 When the BPF routine doesn't call any function, the non volatile registers can be reallocated to volatile registers in order to avoid having to save them/restore on the stack. Before this patch, the test #359 ADD default X is: 0: 7c 64 1b 78 mr r4,r3 4: 38 60 00 00 li r3,0 8: 94 21 ff b0 stwu r1,-80(r1) c: 60 00 00 00 nop 10: 92 e1 00 2c stw r23,44(r1) 14: 93 01 00 30 stw r24,48(r1) 18: 93 21 00 34 stw r25,52(r1) 1c: 93 41 00 38 stw r26,56(r1) 20: 39 80 00 00 li r12,0 24: 39 60 00 00 li r11,0 28: 3b 40 00 00 li r26,0 2c: 3b 20 00 00 li r25,0 30: 7c 98 23 78 mr r24,r4 34: 7c 77 1b 78 mr r23,r3 38: 39 80 00 42 li r12,66 3c: 39 60 00 00 li r11,0 40: 7d 8c d2 14 add r12,r12,r26 44: 39 60 00 00 li r11,0 48: 7d 83 63 78 mr r3,r12 4c: 82 e1 00 2c lwz r23,44(r1) 50: 83 01 00 30 lwz r24,48(r1) 54: 83 21 00 34 lwz r25,52(r1) 58: 83 41 00 38 lwz r26,56(r1) 5c: 38 21 00 50 addi r1,r1,80 60: 4e 80 00 20 blr After this patch, the same test has become: 0: 7c 64 1b 78 mr r4,r3 4: 38 60 00 00 li r3,0 8: 94 21 ff b0 stwu r1,-80(r1) c: 60 00 00 00 nop 10: 39 80 00 00 li r12,0 14: 39 60 00 00 li r11,0 18: 39 00 00 00 li r8,0 1c: 38 e0 00 00 li r7,0 20: 7c 86 23 78 mr r6,r4 24: 7c 65 1b 78 mr r5,r3 28: 39 80 00 42 li r12,66 2c: 39 60 00 00 li r11,0 30: 7d 8c 42 14 add r12,r12,r8 34: 39 60 00 00 li r11,0 38: 7d 83 63 78 mr r3,r12 3c: 38 21 00 50 addi r1,r1,80 40: 4e 80 00 20 blr Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b94562d7d2bb21aec89de0c40bb3cd91054b65a2.1616430991.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/bpf_jit.h | 16 ++++++++++++++++ arch/powerpc/net/bpf_jit64.h | 2 +- arch/powerpc/net/bpf_jit_comp.c | 2 ++ arch/powerpc/net/bpf_jit_comp32.c | 30 ++++++++++++++++++++++++++++-- arch/powerpc/net/bpf_jit_comp64.c | 4 ++++ 5 files changed, 51 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/net/bpf_jit.h') diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index a45b8266355d..776abef4d2a0 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -116,6 +116,15 @@ static inline bool is_nearbranch(int offset) #define SEEN_STACK 0x40000000 /* uses BPF stack */ #define SEEN_TAILCALL 0x80000000 /* uses tail calls */ +#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */ +#define SEEN_NVREG_MASK 0x0003ffff /* Non volatile registers r14-r31 */ + +#ifdef CONFIG_PPC64 +extern const int b2p[MAX_BPF_JIT_REG + 2]; +#else +extern const int b2p[MAX_BPF_JIT_REG + 1]; +#endif + struct codegen_context { /* * This is used to track register usage as well @@ -129,6 +138,7 @@ struct codegen_context { unsigned int seen; unsigned int idx; unsigned int stack_size; + int b2p[ARRAY_SIZE(b2p)]; }; static inline void bpf_flush_icache(void *start, void *end) @@ -147,11 +157,17 @@ static inline void bpf_set_seen_register(struct codegen_context *ctx, int i) ctx->seen |= 1 << (31 - i); } +static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i) +{ + ctx->seen &= ~(1 << (31 - i)); +} + void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, u32 *addrs, bool extra_pass); void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); +void bpf_jit_realloc_regs(struct codegen_context *ctx); #endif diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index b05f2e67bba1..7b713edfa7e2 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h @@ -39,7 +39,7 @@ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* BPF to ppc register mappings */ -static const int b2p[] = { +const int b2p[MAX_BPF_JIT_REG + 2] = { /* function return value */ [BPF_REG_0] = 8, /* function arguments */ diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index efac89964873..798ac4350a82 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -143,6 +143,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) } memset(&cgctx, 0, sizeof(struct codegen_context)); + memcpy(cgctx.b2p, b2p, sizeof(cgctx.b2p)); /* Make sure that the stack is quadword aligned. */ cgctx.stack_size = round_up(fp->aux->stack_depth, 16); @@ -167,6 +168,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) } } + bpf_jit_realloc_regs(&cgctx); /* * Pretend to build prologue, given the features we've seen. This will * update ctgtx.idx as it pretends to output instructions, then we can diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 29ce802d7534..003843273b43 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -37,7 +37,7 @@ #define TMP_REG (MAX_BPF_JIT_REG + 0) /* BPF to ppc register mappings */ -static const int b2p[] = { +const int b2p[MAX_BPF_JIT_REG + 1] = { /* function return value */ [BPF_REG_0] = 12, /* function arguments */ @@ -60,7 +60,7 @@ static const int b2p[] = { static int bpf_to_ppc(struct codegen_context *ctx, int reg) { - return b2p[reg]; + return ctx->b2p[reg]; } /* PPC NVR range -- update this if we ever use NVRs below r17 */ @@ -77,6 +77,32 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) return BPF_PPC_STACKFRAME(ctx) - 4; } +void bpf_jit_realloc_regs(struct codegen_context *ctx) +{ + if (ctx->seen & SEEN_FUNC) + return; + + while (ctx->seen & SEEN_NVREG_MASK && + (ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) { + int old = 32 - fls(ctx->seen & (SEEN_NVREG_MASK & 0xaaaaaaab)); + int new = 32 - fls(~ctx->seen & (SEEN_VREG_MASK & 0xaaaaaaaa)); + int i; + + for (i = BPF_REG_0; i <= TMP_REG; i++) { + if (ctx->b2p[i] != old) + continue; + ctx->b2p[i] = new; + bpf_set_seen_register(ctx, new); + bpf_clear_seen_register(ctx, old); + if (i != TMP_REG) { + bpf_set_seen_register(ctx, new - 1); + bpf_clear_seen_register(ctx, old - 1); + } + break; + } + } +} + void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 8a1f9fb00e78..57a8c1153851 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -64,6 +64,10 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) BUG(); } +void bpf_jit_realloc_regs(struct codegen_context *ctx) +{ +} + void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; -- cgit v1.2.3 From ee7c3ec3b4b1222b30272624897826bc40d79bc5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 12 Apr 2021 11:44:18 +0000 Subject: powerpc/ebpf32: Use standard function call for functions within 32M distance If the target of a function call is within 32 Mbytes distance, use a standard function call with 'bl' instead of the 'lis/ori/mtlr/blrl' sequence. In the first pass, no memory has been allocated yet and the code position is not known yet (image pointer is NULL). This pass is there to calculate the amount of memory to allocate for the EBPF code, so assume the 4 instructions sequence is required, so that enough memory is allocated. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/74944a1e3e5cfecc141e440a6ccd37920e186b70.1618227846.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc-opcode.h | 1 + arch/powerpc/net/bpf_jit.h | 3 +++ arch/powerpc/net/bpf_jit_comp32.c | 16 +++++++++++----- 3 files changed, 15 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/net/bpf_jit.h') diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 5b60020dc1f4..ac41776661e9 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -265,6 +265,7 @@ #define PPC_INST_ORI 0x60000000 #define PPC_INST_ORIS 0x64000000 #define PPC_INST_BRANCH 0x48000000 +#define PPC_INST_BL 0x48000001 #define PPC_INST_BRANCH_COND 0x40800000 /* Prefixes */ diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 776abef4d2a0..99fad093f43e 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -26,6 +26,9 @@ /* Long jump; (unconditional 'branch') */ #define PPC_JMP(dest) EMIT(PPC_INST_BRANCH | \ (((dest) - (ctx->idx * 4)) & 0x03fffffc)) +/* blr; (unconditional 'branch' with link) to absolute address */ +#define PPC_BL_ABS(dest) EMIT(PPC_INST_BL | \ + (((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc)) /* "cond" here covers BO:BI fields. */ #define PPC_BCC_SHORT(cond, dest) EMIT(PPC_INST_BRANCH_COND | \ (((cond) & 0x3ff) << 16) | \ diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index ef21b09df76e..bbb16099e8c7 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -187,11 +187,17 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) { - /* Load function address into r0 */ - EMIT(PPC_RAW_LIS(__REG_R0, IMM_H(func))); - EMIT(PPC_RAW_ORI(__REG_R0, __REG_R0, IMM_L(func))); - EMIT(PPC_RAW_MTLR(__REG_R0)); - EMIT(PPC_RAW_BLRL()); + s32 rel = (s32)func - (s32)(image + ctx->idx); + + if (image && rel < 0x2000000 && rel >= -0x2000000) { + PPC_BL_ABS(func); + } else { + /* Load function address into r0 */ + EMIT(PPC_RAW_LIS(__REG_R0, IMM_H(func))); + EMIT(PPC_RAW_ORI(__REG_R0, __REG_R0, IMM_L(func))); + EMIT(PPC_RAW_MTLR(__REG_R0)); + EMIT(PPC_RAW_BLRL()); + } } static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) -- cgit v1.2.3