diff options
author | Alexei Starovoitov <ast@kernel.org> | 2022-07-19 09:33:18 -0700 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2022-07-19 09:36:42 -0700 |
commit | ab850abbcf4d52ecc0e17224bcbcf749f327d6d0 (patch) | |
tree | 074a905be90e01e8d5ea77462471940ca63c51f7 /tools/lib/bpf/bpf_tracing.h | |
parent | 9ff5efdeb0897547cffc275e88d2a55462d9b08e (diff) | |
parent | d814ed62d3d24eb5c5f904b897e0414c1ccb5740 (diff) |
Merge branch 'Add SEC("ksyscall") support'
Andrii Nakryiko says:
====================
Add SEC("ksyscall")/SEC("kretsyscall") sections and corresponding
bpf_program__attach_ksyscall() API that simplifies tracing kernel syscalls
through kprobe mechanism. Kprobing syscalls isn't trivial due to varying
syscall handler names in the kernel and various ways syscall argument are
passed, depending on kernel architecture and configuration. SEC("ksyscall")
allows user to not care about such details and just get access to syscall
input arguments, while libbpf takes care of necessary feature detection logic.
There are still more quirks that are not straightforward to hide completely
(see comments about mmap(), clone() and compat syscalls), so in such more
advanced scenarios user might need to fall back to plain SEC("kprobe")
approach, but for absolute majority of users SEC("ksyscall") is a big
improvement.
As part of this patch set libbpf adds two more virtual __kconfig externs, in
addition to existing LINUX_KERNEL_VERSION: LINUX_HAS_BPF_COOKIE and
LINUX_HAS_SYSCALL_WRAPPER, which let's libbpf-provided BPF-side code minimize
external dependencies and assumptions and let's user-space part of libbpf to
perform all the feature detection logic. This benefits USDT support code,
which now doesn't depend on BPF CO-RE for its functionality.
v1->v2:
- normalize extern variable-related warn and debug message formats (Alan);
rfc->v1:
- drop dependency on kallsyms and speed up SYSCALL_WRAPPER detection (Alexei);
- drop dependency on /proc/config.gz in bpf_tracing.h (Yaniv);
- add doc comment and ephasize mmap(), clone() and compat quirks that are
not supported (Ilya);
- use mechanism similar to LINUX_KERNEL_VERSION to also improve USDT code.
====================
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'tools/lib/bpf/bpf_tracing.h')
-rw-r--r-- | tools/lib/bpf/bpf_tracing.h | 51 |
1 files changed, 38 insertions, 13 deletions
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 11f9096407fc..f4d3e1e2abe2 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -2,6 +2,8 @@ #ifndef __BPF_TRACING_H__ #define __BPF_TRACING_H__ +#include <bpf/bpf_helpers.h> + /* Scan the ARCH passed in from ARCH env variable (see Makefile) */ #if defined(__TARGET_ARCH_x86) #define bpf_target_x86 @@ -140,7 +142,7 @@ struct pt_regs___s390 { #define __PT_RC_REG gprs[2] #define __PT_SP_REG gprs[15] #define __PT_IP_REG psw.addr -#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; }) +#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x) #define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___s390 *)(x), orig_gpr2) #elif defined(bpf_target_arm) @@ -174,7 +176,7 @@ struct pt_regs___arm64 { #define __PT_RC_REG regs[0] #define __PT_SP_REG sp #define __PT_IP_REG pc -#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; }) +#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x) #define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___arm64 *)(x), orig_x0) #elif defined(bpf_target_mips) @@ -493,39 +495,62 @@ typeof(name(0)) name(struct pt_regs *ctx) \ } \ static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) +/* If kernel has CONFIG_ARCH_HAS_SYSCALL_WRAPPER, read pt_regs directly */ #define ___bpf_syscall_args0() ctx -#define ___bpf_syscall_args1(x) ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs) -#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs) -#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs) -#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs) -#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs) +#define ___bpf_syscall_args1(x) ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_SYSCALL(regs) +#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_SYSCALL(regs) +#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_SYSCALL(regs) +#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_SYSCALL(regs) +#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_SYSCALL(regs) #define ___bpf_syscall_args(args...) ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args) +/* If kernel doesn't have CONFIG_ARCH_HAS_SYSCALL_WRAPPER, we have to BPF_CORE_READ from pt_regs */ +#define ___bpf_syswrap_args0() ctx +#define ___bpf_syswrap_args1(x) ___bpf_syswrap_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args2(x, args...) ___bpf_syswrap_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args3(x, args...) ___bpf_syswrap_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args4(x, args...) ___bpf_syswrap_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args5(x, args...) ___bpf_syswrap_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args(args...) ___bpf_apply(___bpf_syswrap_args, ___bpf_narg(args))(args) + /* - * BPF_KPROBE_SYSCALL is a variant of BPF_KPROBE, which is intended for + * BPF_KSYSCALL is a variant of BPF_KPROBE, which is intended for * tracing syscall functions, like __x64_sys_close. It hides the underlying * platform-specific low-level way of getting syscall input arguments from * struct pt_regs, and provides a familiar typed and named function arguments * syntax and semantics of accessing syscall input parameters. * - * Original struct pt_regs* context is preserved as 'ctx' argument. This might + * Original struct pt_regs * context is preserved as 'ctx' argument. This might * be necessary when using BPF helpers like bpf_perf_event_output(). * - * This macro relies on BPF CO-RE support. + * At the moment BPF_KSYSCALL does not handle all the calling convention + * quirks for mmap(), clone() and compat syscalls transparrently. This may or + * may not change in the future. User needs to take extra measures to handle + * such quirks explicitly, if necessary. + * + * This macro relies on BPF CO-RE support and virtual __kconfig externs. */ -#define BPF_KPROBE_SYSCALL(name, args...) \ +#define BPF_KSYSCALL(name, args...) \ name(struct pt_regs *ctx); \ +extern _Bool LINUX_HAS_SYSCALL_WRAPPER __kconfig; \ static __attribute__((always_inline)) typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args); \ typeof(name(0)) name(struct pt_regs *ctx) \ { \ - struct pt_regs *regs = PT_REGS_SYSCALL_REGS(ctx); \ + struct pt_regs *regs = LINUX_HAS_SYSCALL_WRAPPER \ + ? (struct pt_regs *)PT_REGS_PARM1(ctx) \ + : ctx; \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ - return ____##name(___bpf_syscall_args(args)); \ + if (LINUX_HAS_SYSCALL_WRAPPER) \ + return ____##name(___bpf_syswrap_args(args)); \ + else \ + return ____##name(___bpf_syscall_args(args)); \ _Pragma("GCC diagnostic pop") \ } \ static __attribute__((always_inline)) typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args) +#define BPF_KPROBE_SYSCALL BPF_KSYSCALL + #endif |