From f04deb90e516e8e48bf8693397529bc942a9e80b Mon Sep 17 00:00:00 2001 From: Andrey Grafin Date: Wed, 17 Jan 2024 16:06:18 +0300 Subject: libbpf: Apply map_set_def_max_entries() for inner_maps on creation This patch allows to auto create BPF_MAP_TYPE_ARRAY_OF_MAPS and BPF_MAP_TYPE_HASH_OF_MAPS with values of BPF_MAP_TYPE_PERF_EVENT_ARRAY by bpf_object__load(). Previous behaviour created a zero filled btf_map_def for inner maps and tried to use it for a map creation but the linux kernel forbids to create a BPF_MAP_TYPE_PERF_EVENT_ARRAY map with max_entries=0. Fixes: 646f02ffdd49 ("libbpf: Add BTF-defined map-in-map support") Signed-off-by: Andrey Grafin Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Acked-by: Hou Tao Link: https://lore.kernel.org/bpf/20240117130619.9403-1-conquistador@yandex-team.ru Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index afd09571c482..b8b00da62907 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -70,6 +70,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog); +static int map_set_def_max_entries(struct bpf_map *map); static const char * const attach_type_name[] = { [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress", @@ -5172,6 +5173,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b if (bpf_map_type__is_map_in_map(def->type)) { if (map->inner_map) { + err = map_set_def_max_entries(map->inner_map); + if (err) + return err; err = bpf_object__create_map(obj, map->inner_map, true); if (err) { pr_warn("map '%s': failed to create inner map: %d\n", -- cgit v1.2.3 From 9e926acda0c2e21bca431a1818665ddcd6939755 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Fri, 19 Jan 2024 14:50:03 -0800 Subject: libbpf: Find correct module BTFs for struct_ops maps and progs. Locate the module BTFs for struct_ops maps and progs and pass them to the kernel. This ensures that the kernel correctly resolves type IDs from the appropriate module BTFs. For the map of a struct_ops object, the FD of the module BTF is set to bpf_map to keep a reference to the module BTF. The FD is passed to the kernel as value_type_btf_obj_fd when the struct_ops object is loaded. For a bpf_struct_ops prog, attach_btf_obj_fd of bpf_prog is the FD of a module BTF in the kernel. Signed-off-by: Kui-Feng Lee Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240119225005.668602-13-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- tools/lib/bpf/bpf.c | 4 +++- tools/lib/bpf/bpf.h | 4 +++- tools/lib/bpf/libbpf.c | 41 +++++++++++++++++++++++++++++++---------- tools/lib/bpf/libbpf_probes.c | 1 + 4 files changed, 38 insertions(+), 12 deletions(-) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 9dc9625651dc..3a35472a17c5 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -169,7 +169,8 @@ int bpf_map_create(enum bpf_map_type map_type, __u32 max_entries, const struct bpf_map_create_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, map_extra); + const size_t attr_sz = offsetofend(union bpf_attr, + value_type_btf_obj_fd); union bpf_attr attr; int fd; @@ -191,6 +192,7 @@ int bpf_map_create(enum bpf_map_type map_type, attr.btf_key_type_id = OPTS_GET(opts, btf_key_type_id, 0); attr.btf_value_type_id = OPTS_GET(opts, btf_value_type_id, 0); attr.btf_vmlinux_value_type_id = OPTS_GET(opts, btf_vmlinux_value_type_id, 0); + attr.value_type_btf_obj_fd = OPTS_GET(opts, value_type_btf_obj_fd, -1); attr.inner_map_fd = OPTS_GET(opts, inner_map_fd, 0); attr.map_flags = OPTS_GET(opts, map_flags, 0); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index d0f53772bdc0..53b0bee053ad 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -51,8 +51,10 @@ struct bpf_map_create_opts { __u32 numa_node; __u32 map_ifindex; + __s32 value_type_btf_obj_fd; + size_t:0; }; -#define bpf_map_create_opts__last_field map_ifindex +#define bpf_map_create_opts__last_field value_type_btf_obj_fd LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, const char *map_name, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index b8b00da62907..0569b4973a4f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -528,6 +528,7 @@ struct bpf_map { struct bpf_map_def def; __u32 numa_node; __u32 btf_var_idx; + int mod_btf_fd; __u32 btf_key_type_id; __u32 btf_value_type_id; __u32 btf_vmlinux_value_type_id; @@ -931,22 +932,29 @@ find_member_by_name(const struct btf *btf, const struct btf_type *t, return NULL; } +static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, + __u16 kind, struct btf **res_btf, + struct module_btf **res_mod_btf); + #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_" static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, const char *name, __u32 kind); static int -find_struct_ops_kern_types(const struct btf *btf, const char *tname, +find_struct_ops_kern_types(struct bpf_object *obj, const char *tname, + struct module_btf **mod_btf, const struct btf_type **type, __u32 *type_id, const struct btf_type **vtype, __u32 *vtype_id, const struct btf_member **data_member) { const struct btf_type *kern_type, *kern_vtype; const struct btf_member *kern_data_member; + struct btf *btf; __s32 kern_vtype_id, kern_type_id; __u32 i; - kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT); + kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT, + &btf, mod_btf); if (kern_type_id < 0) { pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", tname); @@ -1000,14 +1008,16 @@ static bool bpf_map__is_struct_ops(const struct bpf_map *map) } /* Init the map's fields that depend on kern_btf */ -static int bpf_map__init_kern_struct_ops(struct bpf_map *map, - const struct btf *btf, - const struct btf *kern_btf) +static int bpf_map__init_kern_struct_ops(struct bpf_map *map) { const struct btf_member *member, *kern_member, *kern_data_member; const struct btf_type *type, *kern_type, *kern_vtype; __u32 i, kern_type_id, kern_vtype_id, kern_data_off; + struct bpf_object *obj = map->obj; + const struct btf *btf = obj->btf; struct bpf_struct_ops *st_ops; + const struct btf *kern_btf; + struct module_btf *mod_btf; void *data, *kern_data; const char *tname; int err; @@ -1015,16 +1025,19 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map, st_ops = map->st_ops; type = st_ops->type; tname = st_ops->tname; - err = find_struct_ops_kern_types(kern_btf, tname, + err = find_struct_ops_kern_types(obj, tname, &mod_btf, &kern_type, &kern_type_id, &kern_vtype, &kern_vtype_id, &kern_data_member); if (err) return err; + kern_btf = mod_btf ? mod_btf->btf : obj->btf_vmlinux; + pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n", map->name, st_ops->type_id, kern_type_id, kern_vtype_id); + map->mod_btf_fd = mod_btf ? mod_btf->fd : -1; map->def.value_size = kern_vtype->size; map->btf_vmlinux_value_type_id = kern_vtype_id; @@ -1100,6 +1113,8 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map, return -ENOTSUP; } + if (mod_btf) + prog->attach_btf_obj_fd = mod_btf->fd; prog->attach_btf_id = kern_type_id; prog->expected_attach_type = kern_member_idx; @@ -1142,8 +1157,7 @@ static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) if (!bpf_map__is_struct_ops(map)) continue; - err = bpf_map__init_kern_struct_ops(map, obj->btf, - obj->btf_vmlinux); + err = bpf_map__init_kern_struct_ops(map); if (err) return err; } @@ -5162,8 +5176,13 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.numa_node = map->numa_node; create_attr.map_extra = map->map_extra; - if (bpf_map__is_struct_ops(map)) + if (bpf_map__is_struct_ops(map)) { create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; + if (map->mod_btf_fd >= 0) { + create_attr.value_type_btf_obj_fd = map->mod_btf_fd; + create_attr.map_flags |= BPF_F_VTYPE_BTF_OBJ_FD; + } + } if (obj->btf && btf__fd(obj->btf) >= 0) { create_attr.btf_fd = btf__fd(obj->btf); @@ -9970,7 +9989,9 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac *btf_obj_fd = 0; *btf_type_id = 1; } else { - err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id); + err = find_kernel_btf_id(prog->obj, attach_name, + attach_type, btf_obj_fd, + btf_type_id); } if (err) { pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n", diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 9c4db90b92b6..98373d126d9d 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -326,6 +326,7 @@ static int probe_map_create(enum bpf_map_type map_type) case BPF_MAP_TYPE_STRUCT_OPS: /* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */ opts.btf_vmlinux_value_type_id = 1; + opts.value_type_btf_obj_fd = -1; exp_err = -524; /* -ENOTSUPP */ break; case BPF_MAP_TYPE_BLOOM_FILTER: -- cgit v1.2.3 From ea4d587354eb5e32dfa93cebb055b072f518b193 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:18 -0800 Subject: libbpf: Split feature detectors definitions from cached results Split a list of supported feature detectors with their corresponding callbacks from actual cached supported/missing values. This will allow to have more flexible per-token or per-object feature detectors in subsequent refactorings. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20240124022127.2379740-22-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 0569b4973a4f..227f9f63e259 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4958,12 +4958,17 @@ enum kern_feature_result { FEAT_MISSING = 2, }; +struct kern_feature_cache { + enum kern_feature_result res[__FEAT_CNT]; +}; + typedef int (*feature_probe_fn)(void); +static struct kern_feature_cache feature_cache; + static struct kern_feature_desc { const char *desc; feature_probe_fn probe; - enum kern_feature_result res; } feature_probes[__FEAT_CNT] = { [FEAT_PROG_NAME] = { "BPF program name", probe_kern_prog_name, @@ -5031,6 +5036,7 @@ static struct kern_feature_desc { bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) { struct kern_feature_desc *feat = &feature_probes[feat_id]; + struct kern_feature_cache *cache = &feature_cache; int ret; if (obj && obj->gen_loader) @@ -5039,19 +5045,19 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) */ return true; - if (READ_ONCE(feat->res) == FEAT_UNKNOWN) { + if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) { ret = feat->probe(); if (ret > 0) { - WRITE_ONCE(feat->res, FEAT_SUPPORTED); + WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED); } else if (ret == 0) { - WRITE_ONCE(feat->res, FEAT_MISSING); + WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); } else { pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); - WRITE_ONCE(feat->res, FEAT_MISSING); + WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); } } - return READ_ONCE(feat->res) == FEAT_SUPPORTED; + return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED; } static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) -- cgit v1.2.3 From d6dd1d49367ab03832b3c4b6f8211765d488c82b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:19 -0800 Subject: libbpf: Further decouple feature checking logic from bpf_object Add feat_supported() helper that accepts feature cache instead of bpf_object. This allows low-level code in bpf.c to not know or care about higher-level concept of bpf_object, yet it will be able to utilize custom feature checking in cases where BPF token might influence the outcome. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20240124022127.2379740-23-andrii@kernel.org --- tools/lib/bpf/bpf.c | 6 +++--- tools/lib/bpf/libbpf.c | 22 +++++++++++++++------- tools/lib/bpf/libbpf_internal.h | 5 ++++- 3 files changed, 22 insertions(+), 11 deletions(-) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 42cde79a67c7..1ebf224b8c56 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -146,7 +146,7 @@ int bump_rlimit_memlock(void) struct rlimit rlim; /* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */ - if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT)) + if (memlock_bumped || feat_supported(NULL, FEAT_MEMCG_ACCOUNT)) return 0; memlock_bumped = true; @@ -181,7 +181,7 @@ int bpf_map_create(enum bpf_map_type map_type, return libbpf_err(-EINVAL); attr.map_type = map_type; - if (map_name && kernel_supports(NULL, FEAT_PROG_NAME)) + if (map_name && feat_supported(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); attr.key_size = key_size; attr.value_size = value_size; @@ -266,7 +266,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, attr.kern_version = OPTS_GET(opts, kern_version, 0); attr.prog_token_fd = OPTS_GET(opts, token_fd, 0); - if (prog_name && kernel_supports(NULL, FEAT_PROG_NAME)) + if (prog_name && feat_supported(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); attr.license = ptr_to_u64(license); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 227f9f63e259..4d791660bb56 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5033,17 +5033,14 @@ static struct kern_feature_desc { }, }; -bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) +bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) { struct kern_feature_desc *feat = &feature_probes[feat_id]; - struct kern_feature_cache *cache = &feature_cache; int ret; - if (obj && obj->gen_loader) - /* To generate loader program assume the latest kernel - * to avoid doing extra prog_load, map_create syscalls. - */ - return true; + /* assume global feature cache, unless custom one is provided */ + if (!cache) + cache = &feature_cache; if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) { ret = feat->probe(); @@ -5060,6 +5057,17 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED; } +bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) +{ + if (obj && obj->gen_loader) + /* To generate loader program assume the latest kernel + * to avoid doing extra prog_load, map_create syscalls. + */ + return true; + + return feat_supported(NULL, feat_id); +} + static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) { struct bpf_map_info map_info; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 58c547d473e0..622892fc841d 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -361,8 +361,11 @@ enum kern_feature_id { __FEAT_CNT, }; -int probe_memcg_account(void); +struct kern_feature_cache; +bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id); bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); + +int probe_memcg_account(void); int bump_rlimit_memlock(void); int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); -- cgit v1.2.3 From 05f9cdd55d61cf9c6283fd3dc0edc7cad09bd7fe Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:20 -0800 Subject: libbpf: Move feature detection code into its own file It's quite a lot of well isolated code, so it seems like a good candidate to move it out of libbpf.c to reduce its size. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20240124022127.2379740-24-andrii@kernel.org --- tools/lib/bpf/Build | 2 +- tools/lib/bpf/elf.c | 2 - tools/lib/bpf/features.c | 463 ++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/libbpf.c | 463 +--------------------------------------- tools/lib/bpf/libbpf_internal.h | 14 +- tools/lib/bpf/str_error.h | 3 + 6 files changed, 481 insertions(+), 466 deletions(-) create mode 100644 tools/lib/bpf/features.c (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 2d0c282c8588..b6619199a706 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,4 +1,4 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ - usdt.o zip.o elf.o + usdt.o zip.o elf.o features.o diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c index b02faec748a5..c92e02394159 100644 --- a/tools/lib/bpf/elf.c +++ b/tools/lib/bpf/elf.c @@ -11,8 +11,6 @@ #include "libbpf_internal.h" #include "str_error.h" -#define STRERR_BUFSIZE 128 - /* A SHT_GNU_versym section holds 16-bit words. This bit is set if * the symbol is hidden and can only be seen when referenced using an * explicit version number. This is a GNU extension. diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c new file mode 100644 index 000000000000..a4664526ab7f --- /dev/null +++ b/tools/lib/bpf/features.c @@ -0,0 +1,463 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_common.h" +#include "libbpf_internal.h" +#include "str_error.h" + +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64)(unsigned long)ptr; +} + +int probe_fd(int fd) +{ + if (fd >= 0) + close(fd); + return fd >= 0; +} + +static int probe_kern_prog_name(void) +{ + const size_t attr_sz = offsetofend(union bpf_attr, prog_name); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + union bpf_attr attr; + int ret; + + memset(&attr, 0, attr_sz); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.license = ptr_to_u64("GPL"); + attr.insns = ptr_to_u64(insns); + attr.insn_cnt = (__u32)ARRAY_SIZE(insns); + libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name)); + + /* make sure loading with name works */ + ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS); + return probe_fd(ret); +} + +static int probe_kern_global_data(void) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int ret, map, insn_cnt = ARRAY_SIZE(insns); + + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL); + if (map < 0) { + ret = -errno; + cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); + pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, -ret); + return ret; + } + + insns[0].imm = map; + + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + close(map); + return probe_fd(ret); +} + +static int probe_kern_btf(void) +{ + static const char strs[] = "\0int"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_func(void) +{ + static const char strs[] = "\0int\0x\0a"; + /* void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_func_global(void) +{ + static const char strs[] = "\0int\0x\0a"; + /* static void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_datasec(void) +{ + static const char strs[] = "\0x\0.data"; + /* static int a; */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* DATASEC val */ /* [3] */ + BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_float(void) +{ + static const char strs[] = "\0float"; + __u32 types[] = { + /* float */ + BTF_TYPE_FLOAT_ENC(1, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_decl_tag(void) +{ + static const char strs[] = "\0tag"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* attr */ + BTF_TYPE_DECL_TAG_ENC(1, 2, -1), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_type_tag(void) +{ + static const char strs[] = "\0tag"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* attr */ + BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ + /* ptr */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_array_mmap(void) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); + int fd; + + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts); + return probe_fd(fd); +} + +static int probe_kern_exp_attach_type(void) +{ + LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int fd, insn_cnt = ARRAY_SIZE(insns); + + /* use any valid combination of program type and (optional) + * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) + * to see if kernel supports expected_attach_type field for + * BPF_PROG_LOAD command + */ + fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); + return probe_fd(fd); +} + +static int probe_kern_probe_read_kernel(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ + BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */ + BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), + BPF_EXIT_INSN(), + }; + int fd, insn_cnt = ARRAY_SIZE(insns); + + fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); + return probe_fd(fd); +} + +static int probe_prog_bind_map(void) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); + + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL); + if (map < 0) { + ret = -errno; + cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); + pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, -ret); + return ret; + } + + prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + if (prog < 0) { + close(map); + return 0; + } + + ret = bpf_prog_bind_map(prog, map, NULL); + + close(map); + close(prog); + + return ret >= 0; +} + +static int probe_module_btf(void) +{ + static const char strs[] = "\0int"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }; + struct bpf_btf_info info; + __u32 len = sizeof(info); + char name[16]; + int fd, err; + + fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); + if (fd < 0) + return 0; /* BTF not supported at all */ + + memset(&info, 0, sizeof(info)); + info.name = ptr_to_u64(name); + info.name_len = sizeof(name); + + /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer; + * kernel's module BTF support coincides with support for + * name/name_len fields in struct bpf_btf_info. + */ + err = bpf_btf_get_info_by_fd(fd, &info, &len); + close(fd); + return !err; +} + +static int probe_perf_link(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd, link_fd, err; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", + insns, ARRAY_SIZE(insns), NULL); + if (prog_fd < 0) + return -errno; + + /* use invalid perf_event FD to get EBADF, if link is supported; + * otherwise EINVAL should be returned + */ + link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL); + err = -errno; /* close() can clobber errno */ + + if (link_fd >= 0) + close(link_fd); + close(prog_fd); + + return link_fd < 0 && err == -EBADF; +} + +static int probe_uprobe_multi_link(void) +{ + LIBBPF_OPTS(bpf_prog_load_opts, load_opts, + .expected_attach_type = BPF_TRACE_UPROBE_MULTI, + ); + LIBBPF_OPTS(bpf_link_create_opts, link_opts); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd, link_fd, err; + unsigned long offset = 0; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", + insns, ARRAY_SIZE(insns), &load_opts); + if (prog_fd < 0) + return -errno; + + /* Creating uprobe in '/' binary should fail with -EBADF. */ + link_opts.uprobe_multi.path = "/"; + link_opts.uprobe_multi.offsets = &offset; + link_opts.uprobe_multi.cnt = 1; + + link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); + err = -errno; /* close() can clobber errno */ + + if (link_fd >= 0) + close(link_fd); + close(prog_fd); + + return link_fd < 0 && err == -EBADF; +} + +static int probe_kern_bpf_cookie(void) +{ + struct bpf_insn insns[] = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), + BPF_EXIT_INSN(), + }; + int ret, insn_cnt = ARRAY_SIZE(insns); + + ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); + return probe_fd(ret); +} + +static int probe_kern_btf_enum64(void) +{ + static const char strs[] = "\0enum64"; + __u32 types[] = { + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +typedef int (*feature_probe_fn)(void); + +static struct kern_feature_cache feature_cache; + +static struct kern_feature_desc { + const char *desc; + feature_probe_fn probe; +} feature_probes[__FEAT_CNT] = { + [FEAT_PROG_NAME] = { + "BPF program name", probe_kern_prog_name, + }, + [FEAT_GLOBAL_DATA] = { + "global variables", probe_kern_global_data, + }, + [FEAT_BTF] = { + "minimal BTF", probe_kern_btf, + }, + [FEAT_BTF_FUNC] = { + "BTF functions", probe_kern_btf_func, + }, + [FEAT_BTF_GLOBAL_FUNC] = { + "BTF global function", probe_kern_btf_func_global, + }, + [FEAT_BTF_DATASEC] = { + "BTF data section and variable", probe_kern_btf_datasec, + }, + [FEAT_ARRAY_MMAP] = { + "ARRAY map mmap()", probe_kern_array_mmap, + }, + [FEAT_EXP_ATTACH_TYPE] = { + "BPF_PROG_LOAD expected_attach_type attribute", + probe_kern_exp_attach_type, + }, + [FEAT_PROBE_READ_KERN] = { + "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, + }, + [FEAT_PROG_BIND_MAP] = { + "BPF_PROG_BIND_MAP support", probe_prog_bind_map, + }, + [FEAT_MODULE_BTF] = { + "module BTF support", probe_module_btf, + }, + [FEAT_BTF_FLOAT] = { + "BTF_KIND_FLOAT support", probe_kern_btf_float, + }, + [FEAT_PERF_LINK] = { + "BPF perf link support", probe_perf_link, + }, + [FEAT_BTF_DECL_TAG] = { + "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, + }, + [FEAT_BTF_TYPE_TAG] = { + "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, + }, + [FEAT_MEMCG_ACCOUNT] = { + "memcg-based memory accounting", probe_memcg_account, + }, + [FEAT_BPF_COOKIE] = { + "BPF cookie support", probe_kern_bpf_cookie, + }, + [FEAT_BTF_ENUM64] = { + "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, + }, + [FEAT_SYSCALL_WRAPPER] = { + "Kernel using syscall wrapper", probe_kern_syscall_wrapper, + }, + [FEAT_UPROBE_MULTI_LINK] = { + "BPF multi-uprobe link support", probe_uprobe_multi_link, + }, +}; + +bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) +{ + struct kern_feature_desc *feat = &feature_probes[feat_id]; + int ret; + + /* assume global feature cache, unless custom one is provided */ + if (!cache) + cache = &feature_cache; + + if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) { + ret = feat->probe(); + if (ret > 0) { + WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED); + } else if (ret == 0) { + WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); + } else { + pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); + WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); + } + } + + return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED; +} diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 4d791660bb56..a2b767bc0c5b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4596,467 +4596,6 @@ bpf_object__probe_loading(struct bpf_object *obj) return 0; } -static int probe_fd(int fd) -{ - if (fd >= 0) - close(fd); - return fd >= 0; -} - -static int probe_kern_prog_name(void) -{ - const size_t attr_sz = offsetofend(union bpf_attr, prog_name); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - union bpf_attr attr; - int ret; - - memset(&attr, 0, attr_sz); - attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - attr.license = ptr_to_u64("GPL"); - attr.insns = ptr_to_u64(insns); - attr.insn_cnt = (__u32)ARRAY_SIZE(insns); - libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name)); - - /* make sure loading with name works */ - ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS); - return probe_fd(ret); -} - -static int probe_kern_global_data(void) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - struct bpf_insn insns[] = { - BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), - BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int ret, map, insn_cnt = ARRAY_SIZE(insns); - - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL); - if (map < 0) { - ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); - return ret; - } - - insns[0].imm = map; - - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); - close(map); - return probe_fd(ret); -} - -static int probe_kern_btf(void) -{ - static const char strs[] = "\0int"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_func(void) -{ - static const char strs[] = "\0int\0x\0a"; - /* void x(int a) {} */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* FUNC_PROTO */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), - BTF_PARAM_ENC(7, 1), - /* FUNC x */ /* [3] */ - BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_func_global(void) -{ - static const char strs[] = "\0int\0x\0a"; - /* static void x(int a) {} */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* FUNC_PROTO */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), - BTF_PARAM_ENC(7, 1), - /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ - BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_datasec(void) -{ - static const char strs[] = "\0x\0.data"; - /* static int a; */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* VAR x */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), - BTF_VAR_STATIC, - /* DATASEC val */ /* [3] */ - BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(2, 0, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_float(void) -{ - static const char strs[] = "\0float"; - __u32 types[] = { - /* float */ - BTF_TYPE_FLOAT_ENC(1, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_decl_tag(void) -{ - static const char strs[] = "\0tag"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* VAR x */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), - BTF_VAR_STATIC, - /* attr */ - BTF_TYPE_DECL_TAG_ENC(1, 2, -1), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_type_tag(void) -{ - static const char strs[] = "\0tag"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* attr */ - BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ - /* ptr */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_array_mmap(void) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); - int fd; - - fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts); - return probe_fd(fd); -} - -static int probe_kern_exp_attach_type(void) -{ - LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int fd, insn_cnt = ARRAY_SIZE(insns); - - /* use any valid combination of program type and (optional) - * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) - * to see if kernel supports expected_attach_type field for - * BPF_PROG_LOAD command - */ - fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); - return probe_fd(fd); -} - -static int probe_kern_probe_read_kernel(void) -{ - struct bpf_insn insns[] = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ - BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */ - BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }; - int fd, insn_cnt = ARRAY_SIZE(insns); - - fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); - return probe_fd(fd); -} - -static int probe_prog_bind_map(void) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); - - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL); - if (map < 0) { - ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); - return ret; - } - - prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); - if (prog < 0) { - close(map); - return 0; - } - - ret = bpf_prog_bind_map(prog, map, NULL); - - close(map); - close(prog); - - return ret >= 0; -} - -static int probe_module_btf(void) -{ - static const char strs[] = "\0int"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), - }; - struct bpf_btf_info info; - __u32 len = sizeof(info); - char name[16]; - int fd, err; - - fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); - if (fd < 0) - return 0; /* BTF not supported at all */ - - memset(&info, 0, sizeof(info)); - info.name = ptr_to_u64(name); - info.name_len = sizeof(name); - - /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer; - * kernel's module BTF support coincides with support for - * name/name_len fields in struct bpf_btf_info. - */ - err = bpf_btf_get_info_by_fd(fd, &info, &len); - close(fd); - return !err; -} - -static int probe_perf_link(void) -{ - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int prog_fd, link_fd, err; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", - insns, ARRAY_SIZE(insns), NULL); - if (prog_fd < 0) - return -errno; - - /* use invalid perf_event FD to get EBADF, if link is supported; - * otherwise EINVAL should be returned - */ - link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL); - err = -errno; /* close() can clobber errno */ - - if (link_fd >= 0) - close(link_fd); - close(prog_fd); - - return link_fd < 0 && err == -EBADF; -} - -static int probe_uprobe_multi_link(void) -{ - LIBBPF_OPTS(bpf_prog_load_opts, load_opts, - .expected_attach_type = BPF_TRACE_UPROBE_MULTI, - ); - LIBBPF_OPTS(bpf_link_create_opts, link_opts); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int prog_fd, link_fd, err; - unsigned long offset = 0; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", - insns, ARRAY_SIZE(insns), &load_opts); - if (prog_fd < 0) - return -errno; - - /* Creating uprobe in '/' binary should fail with -EBADF. */ - link_opts.uprobe_multi.path = "/"; - link_opts.uprobe_multi.offsets = &offset; - link_opts.uprobe_multi.cnt = 1; - - link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); - err = -errno; /* close() can clobber errno */ - - if (link_fd >= 0) - close(link_fd); - close(prog_fd); - - return link_fd < 0 && err == -EBADF; -} - -static int probe_kern_bpf_cookie(void) -{ - struct bpf_insn insns[] = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), - BPF_EXIT_INSN(), - }; - int ret, insn_cnt = ARRAY_SIZE(insns); - - ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); - return probe_fd(ret); -} - -static int probe_kern_btf_enum64(void) -{ - static const char strs[] = "\0enum64"; - __u32 types[] = { - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_syscall_wrapper(void); - -enum kern_feature_result { - FEAT_UNKNOWN = 0, - FEAT_SUPPORTED = 1, - FEAT_MISSING = 2, -}; - -struct kern_feature_cache { - enum kern_feature_result res[__FEAT_CNT]; -}; - -typedef int (*feature_probe_fn)(void); - -static struct kern_feature_cache feature_cache; - -static struct kern_feature_desc { - const char *desc; - feature_probe_fn probe; -} feature_probes[__FEAT_CNT] = { - [FEAT_PROG_NAME] = { - "BPF program name", probe_kern_prog_name, - }, - [FEAT_GLOBAL_DATA] = { - "global variables", probe_kern_global_data, - }, - [FEAT_BTF] = { - "minimal BTF", probe_kern_btf, - }, - [FEAT_BTF_FUNC] = { - "BTF functions", probe_kern_btf_func, - }, - [FEAT_BTF_GLOBAL_FUNC] = { - "BTF global function", probe_kern_btf_func_global, - }, - [FEAT_BTF_DATASEC] = { - "BTF data section and variable", probe_kern_btf_datasec, - }, - [FEAT_ARRAY_MMAP] = { - "ARRAY map mmap()", probe_kern_array_mmap, - }, - [FEAT_EXP_ATTACH_TYPE] = { - "BPF_PROG_LOAD expected_attach_type attribute", - probe_kern_exp_attach_type, - }, - [FEAT_PROBE_READ_KERN] = { - "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, - }, - [FEAT_PROG_BIND_MAP] = { - "BPF_PROG_BIND_MAP support", probe_prog_bind_map, - }, - [FEAT_MODULE_BTF] = { - "module BTF support", probe_module_btf, - }, - [FEAT_BTF_FLOAT] = { - "BTF_KIND_FLOAT support", probe_kern_btf_float, - }, - [FEAT_PERF_LINK] = { - "BPF perf link support", probe_perf_link, - }, - [FEAT_BTF_DECL_TAG] = { - "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, - }, - [FEAT_BTF_TYPE_TAG] = { - "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, - }, - [FEAT_MEMCG_ACCOUNT] = { - "memcg-based memory accounting", probe_memcg_account, - }, - [FEAT_BPF_COOKIE] = { - "BPF cookie support", probe_kern_bpf_cookie, - }, - [FEAT_BTF_ENUM64] = { - "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, - }, - [FEAT_SYSCALL_WRAPPER] = { - "Kernel using syscall wrapper", probe_kern_syscall_wrapper, - }, - [FEAT_UPROBE_MULTI_LINK] = { - "BPF multi-uprobe link support", probe_uprobe_multi_link, - }, -}; - -bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) -{ - struct kern_feature_desc *feat = &feature_probes[feat_id]; - int ret; - - /* assume global feature cache, unless custom one is provided */ - if (!cache) - cache = &feature_cache; - - if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) { - ret = feat->probe(); - if (ret > 0) { - WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED); - } else if (ret == 0) { - WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); - } else { - pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); - WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); - } - } - - return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED; -} - bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) { if (obj && obj->gen_loader) @@ -11067,7 +10606,7 @@ static const char *arch_specific_syscall_pfx(void) #endif } -static int probe_kern_syscall_wrapper(void) +int probe_kern_syscall_wrapper(void) { char syscall_name[64]; const char *ksys_pfx; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 622892fc841d..fa25e1232bc8 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -361,10 +361,20 @@ enum kern_feature_id { __FEAT_CNT, }; -struct kern_feature_cache; +enum kern_feature_result { + FEAT_UNKNOWN = 0, + FEAT_SUPPORTED = 1, + FEAT_MISSING = 2, +}; + +struct kern_feature_cache { + enum kern_feature_result res[__FEAT_CNT]; +}; + bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id); bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); +int probe_kern_syscall_wrapper(void); int probe_memcg_account(void); int bump_rlimit_memlock(void); @@ -626,4 +636,6 @@ int elf_resolve_syms_offsets(const char *binary_path, int cnt, int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern, unsigned long **poffsets, size_t *pcnt); +int probe_fd(int fd); + #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h index a139334d57b6..626d7ffb03d6 100644 --- a/tools/lib/bpf/str_error.h +++ b/tools/lib/bpf/str_error.h @@ -2,5 +2,8 @@ #ifndef __LIBBPF_STR_ERROR_H #define __LIBBPF_STR_ERROR_H +#define STRERR_BUFSIZE 128 + char *libbpf_strerror_r(int err, char *dst, int len); + #endif /* __LIBBPF_STR_ERROR_H */ -- cgit v1.2.3 From f3dcee938f485cf403ba2acf1f1548afe637c904 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:21 -0800 Subject: libbpf: Wire up token_fd into feature probing logic Adjust feature probing callbacks to take into account optional token_fd. In unprivileged contexts, some feature detectors would fail to detect kernel support just because BPF program, BPF map, or BTF object can't be loaded due to privileged nature of those operations. So when BPF object is loaded with BPF token, this token should be used for feature probing. This patch is setting support for this scenario, but we don't yet pass non-zero token FD. This will be added in the next patch. We also switched BPF cookie detector from using kprobe program to tracepoint one, as tracepoint is somewhat less dangerous BPF program type and has higher likelihood of being allowed through BPF token in the future. This change has no effect on detection behavior. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20240124022127.2379740-25-andrii@kernel.org --- tools/lib/bpf/bpf.c | 5 +- tools/lib/bpf/features.c | 116 +++++++++++++++++++++++++++------------- tools/lib/bpf/libbpf.c | 4 +- tools/lib/bpf/libbpf_internal.h | 8 +-- tools/lib/bpf/libbpf_probes.c | 11 ++-- 5 files changed, 97 insertions(+), 47 deletions(-) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 1ebf224b8c56..97ec005c3c47 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -103,7 +103,7 @@ int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) * [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/ * [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper") */ -int probe_memcg_account(void) +int probe_memcg_account(int token_fd) { const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); struct bpf_insn insns[] = { @@ -120,6 +120,9 @@ int probe_memcg_account(void) attr.insns = ptr_to_u64(insns); attr.insn_cnt = insn_cnt; attr.license = ptr_to_u64("GPL"); + attr.prog_token_fd = token_fd; + if (token_fd) + attr.prog_flags |= BPF_F_TOKEN_FD; prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz); if (prog_fd >= 0) { diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c index a4664526ab7f..5a5c766bf615 100644 --- a/tools/lib/bpf/features.c +++ b/tools/lib/bpf/features.c @@ -20,7 +20,7 @@ int probe_fd(int fd) return fd >= 0; } -static int probe_kern_prog_name(void) +static int probe_kern_prog_name(int token_fd) { const size_t attr_sz = offsetofend(union bpf_attr, prog_name); struct bpf_insn insns[] = { @@ -35,6 +35,9 @@ static int probe_kern_prog_name(void) attr.license = ptr_to_u64("GPL"); attr.insns = ptr_to_u64(insns); attr.insn_cnt = (__u32)ARRAY_SIZE(insns); + attr.prog_token_fd = token_fd; + if (token_fd) + attr.prog_flags |= BPF_F_TOKEN_FD; libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name)); /* make sure loading with name works */ @@ -42,7 +45,7 @@ static int probe_kern_prog_name(void) return probe_fd(ret); } -static int probe_kern_global_data(void) +static int probe_kern_global_data(int token_fd) { char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { @@ -51,9 +54,17 @@ static int probe_kern_global_data(void) BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; + LIBBPF_OPTS(bpf_map_create_opts, map_opts, + .token_fd = token_fd, + .map_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); int ret, map, insn_cnt = ARRAY_SIZE(insns); - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL); + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, &map_opts); if (map < 0) { ret = -errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -64,12 +75,12 @@ static int probe_kern_global_data(void) insns[0].imm = map; - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts); close(map); return probe_fd(ret); } -static int probe_kern_btf(void) +static int probe_kern_btf(int token_fd) { static const char strs[] = "\0int"; __u32 types[] = { @@ -78,10 +89,10 @@ static int probe_kern_btf(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -static int probe_kern_btf_func(void) +static int probe_kern_btf_func(int token_fd) { static const char strs[] = "\0int\0x\0a"; /* void x(int a) {} */ @@ -96,10 +107,10 @@ static int probe_kern_btf_func(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -static int probe_kern_btf_func_global(void) +static int probe_kern_btf_func_global(int token_fd) { static const char strs[] = "\0int\0x\0a"; /* static void x(int a) {} */ @@ -114,10 +125,10 @@ static int probe_kern_btf_func_global(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -static int probe_kern_btf_datasec(void) +static int probe_kern_btf_datasec(int token_fd) { static const char strs[] = "\0x\0.data"; /* static int a; */ @@ -133,10 +144,10 @@ static int probe_kern_btf_datasec(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -static int probe_kern_btf_float(void) +static int probe_kern_btf_float(int token_fd) { static const char strs[] = "\0float"; __u32 types[] = { @@ -145,10 +156,10 @@ static int probe_kern_btf_float(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -static int probe_kern_btf_decl_tag(void) +static int probe_kern_btf_decl_tag(int token_fd) { static const char strs[] = "\0tag"; __u32 types[] = { @@ -162,10 +173,10 @@ static int probe_kern_btf_decl_tag(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -static int probe_kern_btf_type_tag(void) +static int probe_kern_btf_type_tag(int token_fd) { static const char strs[] = "\0tag"; __u32 types[] = { @@ -178,21 +189,28 @@ static int probe_kern_btf_type_tag(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -static int probe_kern_array_mmap(void) +static int probe_kern_array_mmap(int token_fd) { - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); + LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_flags = BPF_F_MMAPABLE | (token_fd ? BPF_F_TOKEN_FD : 0), + .token_fd = token_fd, + ); int fd; fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts); return probe_fd(fd); } -static int probe_kern_exp_attach_type(void) +static int probe_kern_exp_attach_type(int token_fd) { - LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE); + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), @@ -208,8 +226,12 @@ static int probe_kern_exp_attach_type(void) return probe_fd(fd); } -static int probe_kern_probe_read_kernel(void) +static int probe_kern_probe_read_kernel(int token_fd) { + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); struct bpf_insn insns[] = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ @@ -220,20 +242,28 @@ static int probe_kern_probe_read_kernel(void) }; int fd, insn_cnt = ARRAY_SIZE(insns); - fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); + fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); return probe_fd(fd); } -static int probe_prog_bind_map(void) +static int probe_prog_bind_map(int token_fd) { char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; + LIBBPF_OPTS(bpf_map_create_opts, map_opts, + .token_fd = token_fd, + .map_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL); + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, &map_opts); if (map < 0) { ret = -errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -242,7 +272,7 @@ static int probe_prog_bind_map(void) return ret; } - prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts); if (prog < 0) { close(map); return 0; @@ -256,7 +286,7 @@ static int probe_prog_bind_map(void) return ret >= 0; } -static int probe_module_btf(void) +static int probe_module_btf(int token_fd) { static const char strs[] = "\0int"; __u32 types[] = { @@ -268,7 +298,7 @@ static int probe_module_btf(void) char name[16]; int fd, err; - fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); + fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd); if (fd < 0) return 0; /* BTF not supported at all */ @@ -285,16 +315,20 @@ static int probe_module_btf(void) return !err; } -static int probe_perf_link(void) +static int probe_perf_link(int token_fd) { struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); int prog_fd, link_fd, err; prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", - insns, ARRAY_SIZE(insns), NULL); + insns, ARRAY_SIZE(insns), &opts); if (prog_fd < 0) return -errno; @@ -311,10 +345,12 @@ static int probe_perf_link(void) return link_fd < 0 && err == -EBADF; } -static int probe_uprobe_multi_link(void) +static int probe_uprobe_multi_link(int token_fd) { LIBBPF_OPTS(bpf_prog_load_opts, load_opts, .expected_attach_type = BPF_TRACE_UPROBE_MULTI, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, ); LIBBPF_OPTS(bpf_link_create_opts, link_opts); struct bpf_insn insns[] = { @@ -344,19 +380,23 @@ static int probe_uprobe_multi_link(void) return link_fd < 0 && err == -EBADF; } -static int probe_kern_bpf_cookie(void) +static int probe_kern_bpf_cookie(int token_fd) { struct bpf_insn insns[] = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), BPF_EXIT_INSN(), }; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); int ret, insn_cnt = ARRAY_SIZE(insns); - ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); return probe_fd(ret); } -static int probe_kern_btf_enum64(void) +static int probe_kern_btf_enum64(int token_fd) { static const char strs[] = "\0enum64"; __u32 types[] = { @@ -364,10 +404,10 @@ static int probe_kern_btf_enum64(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -typedef int (*feature_probe_fn)(void); +typedef int (*feature_probe_fn)(int /* token_fd */); static struct kern_feature_cache feature_cache; @@ -448,7 +488,7 @@ bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_ cache = &feature_cache; if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) { - ret = feat->probe(); + ret = feat->probe(cache->token_fd); if (ret > 0) { WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED); } else if (ret == 0) { diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a2b767bc0c5b..a1d100df0c71 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6440,7 +6440,7 @@ static int probe_kern_arg_ctx_tag(void) if (cached_result >= 0) return cached_result; - btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); + btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), 0); if (btf_fd < 0) return 0; @@ -10606,7 +10606,7 @@ static const char *arch_specific_syscall_pfx(void) #endif } -int probe_kern_syscall_wrapper(void) +int probe_kern_syscall_wrapper(int token_fd) { char syscall_name[64]; const char *ksys_pfx; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index fa25e1232bc8..28fabed1cd8f 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -369,19 +369,21 @@ enum kern_feature_result { struct kern_feature_cache { enum kern_feature_result res[__FEAT_CNT]; + int token_fd; }; bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id); bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); -int probe_kern_syscall_wrapper(void); -int probe_memcg_account(void); +int probe_kern_syscall_wrapper(int token_fd); +int probe_memcg_account(int token_fd); int bump_rlimit_memlock(void); int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int libbpf__load_raw_btf(const char *raw_types, size_t types_len, - const char *str_sec, size_t str_len); + const char *str_sec, size_t str_len, + int token_fd); int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level); struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 98373d126d9d..ee9b1dbea9eb 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -219,7 +219,8 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts) } int libbpf__load_raw_btf(const char *raw_types, size_t types_len, - const char *str_sec, size_t str_len) + const char *str_sec, size_t str_len, + int token_fd) { struct btf_header hdr = { .magic = BTF_MAGIC, @@ -229,6 +230,10 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, .str_off = types_len, .str_len = str_len, }; + LIBBPF_OPTS(bpf_btf_load_opts, opts, + .token_fd = token_fd, + .btf_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); int btf_fd, btf_len; __u8 *raw_btf; @@ -241,7 +246,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len); memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len); - btf_fd = bpf_btf_load(raw_btf, btf_len, NULL); + btf_fd = bpf_btf_load(raw_btf, btf_len, &opts); free(raw_btf); return btf_fd; @@ -271,7 +276,7 @@ static int load_local_storage_btf(void) }; return libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs)); + strs, sizeof(strs), 0); } static int probe_map_create(enum bpf_map_type map_type) -- cgit v1.2.3 From 6b434b61b4d9e0e59f2947ce0f58f6fb4de048d8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:22 -0800 Subject: libbpf: Wire up BPF token support at BPF object level Add BPF token support to BPF object-level functionality. BPF token is supported by BPF object logic either as an explicitly provided BPF token from outside (through BPF FS path), or implicitly (unless prevented through bpf_object_open_opts). Implicit mode is assumed to be the most common one for user namespaced unprivileged workloads. The assumption is that privileged container manager sets up default BPF FS mount point at /sys/fs/bpf with BPF token delegation options (delegate_{cmds,maps,progs,attachs} mount options). BPF object during loading will attempt to create BPF token from /sys/fs/bpf location, and pass it for all relevant operations (currently, map creation, BTF load, and program load). In this implicit mode, if BPF token creation fails due to whatever reason (BPF FS is not mounted, or kernel doesn't support BPF token, etc), this is not considered an error. BPF object loading sequence will proceed with no BPF token. In explicit BPF token mode, user provides explicitly custom BPF FS mount point path. In such case, BPF object will attempt to create BPF token from provided BPF FS location. If BPF token creation fails, that is considered a critical error and BPF object load fails with an error. Libbpf provides a way to disable implicit BPF token creation, if it causes any troubles (BPF token is designed to be completely optional and shouldn't cause any problems even if provided, but in the world of BPF LSM, custom security logic can be installed that might change outcome depending on the presence of BPF token). To disable libbpf's default BPF token creation behavior user should provide either invalid BPF token FD (negative), or empty bpf_token_path option. BPF token presence can influence libbpf's feature probing, so if BPF object has associated BPF token, feature probing is instructed to use BPF object-specific feature detection cache and token FD. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20240124022127.2379740-26-andrii@kernel.org --- tools/lib/bpf/btf.c | 10 +++- tools/lib/bpf/libbpf.c | 102 +++++++++++++++++++++++++++++++++++++--- tools/lib/bpf/libbpf.h | 13 ++++- tools/lib/bpf/libbpf_internal.h | 17 ++++++- 4 files changed, 131 insertions(+), 11 deletions(-) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index ee95fd379d4d..ec92b87cae01 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1317,7 +1317,9 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf) static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); -int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level) +int btf_load_into_kernel(struct btf *btf, + char *log_buf, size_t log_sz, __u32 log_level, + int token_fd) { LIBBPF_OPTS(bpf_btf_load_opts, opts); __u32 buf_sz = 0, raw_size; @@ -1367,6 +1369,10 @@ retry_load: opts.log_level = log_level; } + opts.token_fd = token_fd; + if (token_fd) + opts.btf_flags |= BPF_F_TOKEN_FD; + btf->fd = bpf_btf_load(raw_data, raw_size, &opts); if (btf->fd < 0) { /* time to turn on verbose mode and try again */ @@ -1394,7 +1400,7 @@ done: int btf__load_into_kernel(struct btf *btf) { - return btf_load_into_kernel(btf, NULL, 0, 0); + return btf_load_into_kernel(btf, NULL, 0, 0, 0); } int btf__fd(const struct btf *btf) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a1d100df0c71..cefa607be335 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -59,6 +59,8 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif +#define BPF_FS_DEFAULT_PATH "/sys/fs/bpf" + #define BPF_INSN_SZ (sizeof(struct bpf_insn)) /* vsprintf() in __base_pr() uses nonliteral format string. It may break @@ -695,6 +697,10 @@ struct bpf_object { struct usdt_manager *usdt_man; + struct kern_feature_cache *feat_cache; + char *token_path; + int token_fd; + char path[]; }; @@ -2231,7 +2237,7 @@ static int build_map_pin_path(struct bpf_map *map, const char *path) int err; if (!path) - path = "/sys/fs/bpf"; + path = BPF_FS_DEFAULT_PATH; err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); if (err) @@ -3240,7 +3246,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) } else { /* currently BPF_BTF_LOAD only supports log_level 1 */ err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size, - obj->log_level ? 1 : 0); + obj->log_level ? 1 : 0, obj->token_fd); } if (sanitize) { if (!err) { @@ -4561,6 +4567,58 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) return 0; } +static int bpf_object_prepare_token(struct bpf_object *obj) +{ + const char *bpffs_path; + int bpffs_fd = -1, token_fd, err; + bool mandatory; + enum libbpf_print_level level; + + /* token is explicitly prevented */ + if (obj->token_path && obj->token_path[0] == '\0') { + pr_debug("object '%s': token is prevented, skipping...\n", obj->name); + return 0; + } + + mandatory = obj->token_path != NULL; + level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG; + + bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH; + bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR); + if (bpffs_fd < 0) { + err = -errno; + __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n", + obj->name, err, bpffs_path, + mandatory ? "" : ", skipping optional step..."); + return mandatory ? err : 0; + } + + token_fd = bpf_token_create(bpffs_fd, 0); + close(bpffs_fd); + if (token_fd < 0) { + if (!mandatory && token_fd == -ENOENT) { + pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n", + obj->name, bpffs_path); + return 0; + } + __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n", + obj->name, token_fd, bpffs_path, + mandatory ? "" : ", skipping optional step..."); + return mandatory ? token_fd : 0; + } + + obj->feat_cache = calloc(1, sizeof(*obj->feat_cache)); + if (!obj->feat_cache) { + close(token_fd); + return -ENOMEM; + } + + obj->token_fd = token_fd; + obj->feat_cache->token_fd = token_fd; + + return 0; +} + static int bpf_object__probe_loading(struct bpf_object *obj) { @@ -4570,6 +4628,10 @@ bpf_object__probe_loading(struct bpf_object *obj) BPF_EXIT_INSN(), }; int ret, insn_cnt = ARRAY_SIZE(insns); + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .token_fd = obj->token_fd, + .prog_flags = obj->token_fd ? BPF_F_TOKEN_FD : 0, + ); if (obj->gen_loader) return 0; @@ -4579,9 +4641,9 @@ bpf_object__probe_loading(struct bpf_object *obj) pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); /* make sure basic loading works */ - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts); if (ret < 0) - ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); if (ret < 0) { ret = errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4604,6 +4666,9 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) */ return true; + if (obj->token_fd) + return feat_supported(obj->feat_cache, feat_id); + return feat_supported(NULL, feat_id); } @@ -4728,6 +4793,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.map_flags = def->map_flags; create_attr.numa_node = map->numa_node; create_attr.map_extra = map->map_extra; + create_attr.token_fd = obj->token_fd; + if (obj->token_fd) + create_attr.map_flags |= BPF_F_TOKEN_FD; if (bpf_map__is_struct_ops(map)) { create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; @@ -7049,6 +7117,10 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog load_attr.prog_flags = prog->prog_flags; load_attr.fd_array = obj->fd_array; + load_attr.token_fd = obj->token_fd; + if (obj->token_fd) + load_attr.prog_flags |= BPF_F_TOKEN_FD; + /* adjust load_attr if sec_def provides custom preload callback */ if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) { err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie); @@ -7494,7 +7566,7 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts) { - const char *obj_name, *kconfig, *btf_tmp_path; + const char *obj_name, *kconfig, *btf_tmp_path, *token_path; struct bpf_object *obj; char tmp_name[64]; int err; @@ -7531,6 +7603,10 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, if (log_size && !log_buf) return ERR_PTR(-EINVAL); + token_path = OPTS_GET(opts, bpf_token_path, NULL); + if (token_path && strlen(token_path) >= PATH_MAX) + return ERR_PTR(-ENAMETOOLONG); + obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); if (IS_ERR(obj)) return obj; @@ -7539,6 +7615,14 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, obj->log_size = log_size; obj->log_level = log_level; + if (token_path) { + obj->token_path = strdup(token_path); + if (!obj->token_path) { + err = -ENOMEM; + goto out; + } + } + btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); if (btf_tmp_path) { if (strlen(btf_tmp_path) >= PATH_MAX) { @@ -8049,7 +8133,8 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch if (obj->gen_loader) bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); - err = bpf_object__probe_loading(obj); + err = bpf_object_prepare_token(obj); + err = err ? : bpf_object__probe_loading(obj); err = err ? : bpf_object__load_vmlinux_btf(obj, false); err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); err = err ? : bpf_object__sanitize_maps(obj); @@ -8584,6 +8669,11 @@ void bpf_object__close(struct bpf_object *obj) } zfree(&obj->programs); + zfree(&obj->feat_cache); + zfree(&obj->token_path); + if (obj->token_fd > 0) + close(obj->token_fd); + free(obj); } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 6cd9c501624f..535ae15ed493 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -177,10 +177,21 @@ struct bpf_object_open_opts { * logs through its print callback. */ __u32 kernel_log_level; + /* Path to BPF FS mount point to derive BPF token from. + * + * Created BPF token will be used for all bpf() syscall operations + * that accept BPF token (e.g., map creation, BTF and program loads, + * etc) automatically within instantiated BPF object. + * + * Setting bpf_token_path option to empty string disables libbpf's + * automatic attempt to create BPF token from default BPF FS mount + * point (/sys/fs/bpf), in case this default behavior is undesirable. + */ + const char *bpf_token_path; size_t :0; }; -#define bpf_object_open_opts__last_field kernel_log_level +#define bpf_object_open_opts__last_field bpf_token_path /** * @brief **bpf_object__open()** creates a bpf_object by opening diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 28fabed1cd8f..930cc9616527 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -384,7 +384,9 @@ int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int libbpf__load_raw_btf(const char *raw_types, size_t types_len, const char *str_sec, size_t str_len, int token_fd); -int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level); +int btf_load_into_kernel(struct btf *btf, + char *log_buf, size_t log_sz, __u32 log_level, + int token_fd); struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, @@ -548,6 +550,17 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn) return insn->code == (BPF_LD | BPF_IMM | BPF_DW); } +/* Unconditionally dup FD, ensuring it doesn't use [0, 2] range. + * Original FD is not closed or altered in any other way. + * Preserves original FD value, if it's invalid (negative). + */ +static inline int dup_good_fd(int fd) +{ + if (fd < 0) + return fd; + return fcntl(fd, F_DUPFD_CLOEXEC, 3); +} + /* if fd is stdin, stdout, or stderr, dup to a fd greater than 2 * Takes ownership of the fd passed in, and closes it if calling * fcntl(fd, F_DUPFD_CLOEXEC, 3). @@ -559,7 +572,7 @@ static inline int ensure_good_fd(int fd) if (fd < 0) return fd; if (fd < 3) { - fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); + fd = dup_good_fd(fd); saved_errno = errno; close(old_fd); errno = saved_errno; -- cgit v1.2.3 From cac270ad79afe212ed7986e8d271c72521cd8212 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:25 -0800 Subject: libbpf: Support BPF token path setting through LIBBPF_BPF_TOKEN_PATH envvar To allow external admin authority to override default BPF FS location (/sys/fs/bpf) for implicit BPF token creation, teach libbpf to recognize LIBBPF_BPF_TOKEN_PATH envvar. If it is specified and user application didn't explicitly specify bpf_token_path option, it will be treated exactly like bpf_token_path option, overriding default /sys/fs/bpf location and making BPF token mandatory. Suggested-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20240124022127.2379740-29-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 6 ++++++ tools/lib/bpf/libbpf.h | 8 ++++++++ 2 files changed, 14 insertions(+) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index cefa607be335..fa7094ff3e66 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -7604,6 +7604,12 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, return ERR_PTR(-EINVAL); token_path = OPTS_GET(opts, bpf_token_path, NULL); + /* if user didn't specify bpf_token_path explicitly, check if + * LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as bpf_token_path + * option + */ + if (!token_path) + token_path = getenv("LIBBPF_BPF_TOKEN_PATH"); if (token_path && strlen(token_path) >= PATH_MAX) return ERR_PTR(-ENAMETOOLONG); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 535ae15ed493..5723cbbfcc41 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -183,6 +183,14 @@ struct bpf_object_open_opts { * that accept BPF token (e.g., map creation, BTF and program loads, * etc) automatically within instantiated BPF object. * + * If bpf_token_path is not specified, libbpf will consult + * LIBBPF_BPF_TOKEN_PATH environment variable. If set, it will be + * taken as a value of bpf_token_path option and will force libbpf to + * either create BPF token from provided custom BPF FS path, or will + * disable implicit BPF token creation, if envvar value is an empty + * string. bpf_token_path overrides LIBBPF_BPF_TOKEN_PATH, if both are + * set at the same time. + * * Setting bpf_token_path option to empty string disables libbpf's * automatic attempt to create BPF token from default BPF FS mount * point (/sys/fs/bpf), in case this default behavior is undesirable. -- cgit v1.2.3 From 0e6d0a9d2348b64df74239e859fa9d6e86cdcdef Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 25 Jan 2024 12:55:04 -0800 Subject: libbpf: integrate __arg_ctx feature detector into kernel_supports() Now that feature detection code is in bpf-next tree, integrate __arg_ctx kernel-side support into kernel_supports() framework. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240125205510.3642094-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/features.c | 58 ++++++++++++++++++++++++++++++++++++ tools/lib/bpf/libbpf.c | 65 +---------------------------------------- tools/lib/bpf/libbpf_internal.h | 2 ++ 3 files changed, 61 insertions(+), 64 deletions(-) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c index 5a5c766bf615..6b0738ad7063 100644 --- a/tools/lib/bpf/features.c +++ b/tools/lib/bpf/features.c @@ -407,6 +407,61 @@ static int probe_kern_btf_enum64(int token_fd) strs, sizeof(strs), token_fd)); } +static int probe_kern_arg_ctx_tag(int token_fd) +{ + static const char strs[] = "\0a\0b\0arg:ctx\0"; + const __u32 types[] = { + /* [1] INT */ + BTF_TYPE_INT_ENC(1 /* "a" */, BTF_INT_SIGNED, 0, 32, 4), + /* [2] PTR -> VOID */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), + /* [3] FUNC_PROTO `int(void *a)` */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1), + BTF_PARAM_ENC(1 /* "a" */, 2), + /* [4] FUNC 'a' -> FUNC_PROTO (main prog) */ + BTF_TYPE_ENC(1 /* "a" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 3), + /* [5] FUNC_PROTO `int(void *b __arg_ctx)` */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1), + BTF_PARAM_ENC(3 /* "b" */, 2), + /* [6] FUNC 'b' -> FUNC_PROTO (subprog) */ + BTF_TYPE_ENC(3 /* "b" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 5), + /* [7] DECL_TAG 'arg:ctx' -> func 'b' arg 'b' */ + BTF_TYPE_DECL_TAG_ENC(5 /* "arg:ctx" */, 6, 0), + }; + const struct bpf_insn insns[] = { + /* main prog */ + BPF_CALL_REL(+1), + BPF_EXIT_INSN(), + /* global subprog */ + BPF_EMIT_CALL(BPF_FUNC_get_func_ip), /* needs PTR_TO_CTX */ + BPF_EXIT_INSN(), + }; + const struct bpf_func_info_min func_infos[] = { + { 0, 4 }, /* main prog -> FUNC 'a' */ + { 2, 6 }, /* subprog -> FUNC 'b' */ + }; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + int prog_fd, btf_fd, insn_cnt = ARRAY_SIZE(insns); + + btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd); + if (btf_fd < 0) + return 0; + + opts.prog_btf_fd = btf_fd; + opts.func_info = &func_infos; + opts.func_info_cnt = ARRAY_SIZE(func_infos); + opts.func_info_rec_size = sizeof(func_infos[0]); + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, "det_arg_ctx", + "GPL", insns, insn_cnt, &opts); + close(btf_fd); + + return probe_fd(prog_fd); +} + typedef int (*feature_probe_fn)(int /* token_fd */); static struct kern_feature_cache feature_cache; @@ -476,6 +531,9 @@ static struct kern_feature_desc { [FEAT_UPROBE_MULTI_LINK] = { "BPF multi-uprobe link support", probe_uprobe_multi_link, }, + [FEAT_ARG_CTX_TAG] = { + "kernel-side __arg_ctx tag", probe_kern_arg_ctx_tag, + }, }; bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index fa7094ff3e66..41ab7a21f868 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6462,69 +6462,6 @@ static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_progr return fn_id; } -static int probe_kern_arg_ctx_tag(void) -{ - /* To minimize merge conflicts with BPF token series that refactors - * feature detection code a lot, we don't integrate - * probe_kern_arg_ctx_tag() into kernel_supports() feature-detection - * framework yet, doing our own caching internally. - * This will be cleaned up a bit later when bpf/bpf-next trees settle. - */ - static int cached_result = -1; - static const char strs[] = "\0a\0b\0arg:ctx\0"; - const __u32 types[] = { - /* [1] INT */ - BTF_TYPE_INT_ENC(1 /* "a" */, BTF_INT_SIGNED, 0, 32, 4), - /* [2] PTR -> VOID */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), - /* [3] FUNC_PROTO `int(void *a)` */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1), - BTF_PARAM_ENC(1 /* "a" */, 2), - /* [4] FUNC 'a' -> FUNC_PROTO (main prog) */ - BTF_TYPE_ENC(1 /* "a" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 3), - /* [5] FUNC_PROTO `int(void *b __arg_ctx)` */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1), - BTF_PARAM_ENC(3 /* "b" */, 2), - /* [6] FUNC 'b' -> FUNC_PROTO (subprog) */ - BTF_TYPE_ENC(3 /* "b" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 5), - /* [7] DECL_TAG 'arg:ctx' -> func 'b' arg 'b' */ - BTF_TYPE_DECL_TAG_ENC(5 /* "arg:ctx" */, 6, 0), - }; - const struct bpf_insn insns[] = { - /* main prog */ - BPF_CALL_REL(+1), - BPF_EXIT_INSN(), - /* global subprog */ - BPF_EMIT_CALL(BPF_FUNC_get_func_ip), /* needs PTR_TO_CTX */ - BPF_EXIT_INSN(), - }; - const struct bpf_func_info_min func_infos[] = { - { 0, 4 }, /* main prog -> FUNC 'a' */ - { 2, 6 }, /* subprog -> FUNC 'b' */ - }; - LIBBPF_OPTS(bpf_prog_load_opts, opts); - int prog_fd, btf_fd, insn_cnt = ARRAY_SIZE(insns); - - if (cached_result >= 0) - return cached_result; - - btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), 0); - if (btf_fd < 0) - return 0; - - opts.prog_btf_fd = btf_fd; - opts.func_info = &func_infos; - opts.func_info_cnt = ARRAY_SIZE(func_infos); - opts.func_info_rec_size = sizeof(func_infos[0]); - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, "det_arg_ctx", - "GPL", insns, insn_cnt, &opts); - close(btf_fd); - - cached_result = probe_fd(prog_fd); - return cached_result; -} - /* Check if main program or global subprog's function prototype has `arg:ctx` * argument tags, and, if necessary, substitute correct type to match what BPF * verifier would expect, taking into account specific program type. This @@ -6549,7 +6486,7 @@ static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_progra return 0; /* don't do any fix ups if kernel natively supports __arg_ctx */ - if (probe_kern_arg_ctx_tag() > 0) + if (kernel_supports(obj, FEAT_ARG_CTX_TAG)) return 0; /* some BPF program types just don't have named context structs, so diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 5b30f3b67a02..ad936ac5e639 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -372,6 +372,8 @@ enum kern_feature_id { FEAT_SYSCALL_WRAPPER, /* BPF multi-uprobe link support */ FEAT_UPROBE_MULTI_LINK, + /* Kernel supports arg:ctx tag (__arg_ctx) for global subprogs natively */ + FEAT_ARG_CTX_TAG, __FEAT_CNT, }; -- cgit v1.2.3 From 9eea8fafe33eb70868f6ace2fc1e17c4ff5539c3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 25 Jan 2024 12:55:05 -0800 Subject: libbpf: fix __arg_ctx type enforcement for perf_event programs Adjust PERF_EVENT type enforcement around __arg_ctx to match exactly what kernel is doing. Fixes: 76ec90a996e3 ("libbpf: warn on unexpected __arg_ctx type when rewriting BTF") Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240125205510.3642094-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 41ab7a21f868..db65ea59a05a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -6339,6 +6340,14 @@ static struct { /* all other program types don't have "named" context structs */ }; +/* forward declarations for arch-specific underlying types of bpf_user_pt_regs_t typedef, + * for below __builtin_types_compatible_p() checks; + * with this approach we don't need any extra arch-specific #ifdef guards + */ +struct pt_regs; +struct user_pt_regs; +struct user_regs_struct; + static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog, const char *subprog_name, int arg_idx, int arg_type_id, const char *ctx_name) @@ -6379,11 +6388,21 @@ static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_pro /* special cases */ switch (prog->type) { case BPF_PROG_TYPE_KPROBE: - case BPF_PROG_TYPE_PERF_EVENT: /* `struct pt_regs *` is expected, but we need to fix up */ if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0) return true; break; + case BPF_PROG_TYPE_PERF_EVENT: + if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) && + btf_is_struct(t) && strcmp(tname, "pt_regs") == 0) + return 0; + if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) && + btf_is_struct(t) && strcmp(tname, "user_pt_regs") == 0) + return 0; + if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) && + btf_is_struct(t) && strcmp(tname, "user_regs_struct") == 0) + return 0; + break; case BPF_PROG_TYPE_RAW_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: /* allow u64* as ctx */ -- cgit v1.2.3 From 8263b3382d8c1af0fffa27095a9f1db6f2dad899 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 31 Jan 2024 23:26:15 +0200 Subject: libbpf: Remove unnecessary null check in kernel_supports() After recent changes, Coverity complained about inconsistent null checks in kernel_supports() function: kernel_supports(const struct bpf_object *obj, ...) [...] // var_compare_op: Comparing obj to null implies that obj might be null if (obj && obj->gen_loader) return true; // var_deref_op: Dereferencing null pointer obj if (obj->token_fd) return feat_supported(obj->feat_cache, feat_id); [...] - The original null check was introduced by commit [0], which introduced a call `kernel_supports(NULL, ...)` in function bump_rlimit_memlock(); - This call was refactored to use `feat_supported(NULL, ...)` in commit [1]. Looking at all places where kernel_supports() is called: - There is either `obj->...` access before the call; - Or `obj` comes from `prog->obj` expression, where `prog` comes from enumeration of programs in `obj`; - Or `obj` comes from `prog->obj`, where `prog` is a parameter to one of the API functions: - bpf_program__attach_kprobe_opts; - bpf_program__attach_kprobe; - bpf_program__attach_ksyscall. Assuming correct API usage, it appears that `obj` can never be null when passed to kernel_supports(). Silence the Coverity warning by removing redundant null check. [0] e542f2c4cd16 ("libbpf: Auto-bump RLIMIT_MEMLOCK if kernel needs it for BPF") [1] d6dd1d49367a ("libbpf: Further decouple feature checking logic from bpf_object") Signed-off-by: Eduard Zingerman Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240131212615.20112-1-eddyz87@gmail.com --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index db65ea59a05a..f6953d7faff1 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4661,7 +4661,7 @@ bpf_object__probe_loading(struct bpf_object *obj) bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) { - if (obj && obj->gen_loader) + if (obj->gen_loader) /* To generate loader program assume the latest kernel * to avoid doing extra prog_load, map_create syscalls. */ -- cgit v1.2.3 From 9fa5e1a180aa639fb156a16e453ab820b7e7860b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 1 Feb 2024 09:20:23 -0800 Subject: libbpf: Call memfd_create() syscall directly Some versions of Android do not implement memfd_create() wrapper in their libc implementation, leading to build failures ([0]). On the other hand, memfd_create() is available as a syscall on quite old kernels (3.17+, while bpf() syscall itself is available since 3.18+), so it is ok to assume that syscall availability and call into it with syscall() helper to avoid Android-specific workarounds. Validated in libbpf-bootstrap's CI ([1]). [0] https://github.com/libbpf/libbpf-bootstrap/actions/runs/7701003207/job/20986080319#step:5:83 [1] https://github.com/libbpf/libbpf-bootstrap/actions/runs/7715988887/job/21031767212?pr=253 Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240201172027.604869-2-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index f6953d7faff1..6932f2c4ddfd 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1525,11 +1525,20 @@ static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *nam return ERR_PTR(-ENOENT); } +/* Some versions of Android don't provide memfd_create() in their libc + * implementation, so avoid complications and just go straight to Linux + * syscall. + */ +static int sys_memfd_create(const char *name, unsigned flags) +{ + return syscall(__NR_memfd_create, name, flags); +} + static int create_placeholder_fd(void) { int fd; - fd = ensure_good_fd(memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC)); + fd = ensure_good_fd(sys_memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC)); if (fd < 0) return -errno; return fd; -- cgit v1.2.3 From d7bc416aa5cc183691287e8f0b1d5b182a7ce9c3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 5 Feb 2024 16:22:43 -0800 Subject: libbpf: fix return value for PERF_EVENT __arg_ctx type fix up check If PERF_EVENT program has __arg_ctx argument with matching architecture-specific pt_regs/user_pt_regs/user_regs_struct pointer type, libbpf should still perform type rewrite for old kernels, but not emit the warning. Fix copy/paste from kernel code where 0 is meant to signify "no error" condition. For libbpf we need to return "true" to proceed with type rewrite (which for PERF_EVENT program will be a canonical `struct bpf_perf_event_data *` type). Fixes: 9eea8fafe33e ("libbpf: fix __arg_ctx type enforcement for perf_event programs") Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240206002243.1439450-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6932f2c4ddfd..01f407591a92 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6404,13 +6404,13 @@ static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_pro case BPF_PROG_TYPE_PERF_EVENT: if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) && btf_is_struct(t) && strcmp(tname, "pt_regs") == 0) - return 0; + return true; if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) && btf_is_struct(t) && strcmp(tname, "user_pt_regs") == 0) - return 0; + return true; if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) && btf_is_struct(t) && strcmp(tname, "user_regs_struct") == 0) - return 0; + return true; break; case BPF_PROG_TYPE_RAW_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: -- cgit v1.2.3 From 3644d285462a60c80ac225d508fcfe705640d2b4 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 28 Feb 2024 22:45:19 -0800 Subject: libbpf: Set btf_value_type_id of struct bpf_map for struct_ops. For a struct_ops map, btf_value_type_id is the type ID of it's struct type. This value is required by bpftool to generate skeleton including pointers of shadow types. The code generator gets the type ID from bpf_map__btf_value_type_id() in order to get the type information of the struct type of a map. Signed-off-by: Kui-Feng Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240229064523.2091270-2-thinker.li@gmail.com --- tools/lib/bpf/libbpf.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 01f407591a92..c759628f4250 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1229,6 +1229,7 @@ static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, map->name = strdup(var_name); if (!map->name) return -ENOMEM; + map->btf_value_type_id = type_id; map->def.type = BPF_MAP_TYPE_STRUCT_OPS; map->def.key_size = sizeof(int); @@ -4857,6 +4858,10 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.btf_value_type_id = 0; map->btf_key_type_id = 0; map->btf_value_type_id = 0; + break; + case BPF_MAP_TYPE_STRUCT_OPS: + create_attr.btf_value_type_id = 0; + break; default: break; } -- cgit v1.2.3 From 69e4a9d2b3f5adf5af4feeab0a9f505da971265a Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 28 Feb 2024 22:45:20 -0800 Subject: libbpf: Convert st_ops->data to shadow type. Convert st_ops->data to the shadow type of the struct_ops map. The shadow type of a struct_ops type is a variant of the original struct type providing a way to access/change the values in the maps of the struct_ops type. bpf_map__initial_value() will return st_ops->data for struct_ops types. The skeleton is going to use it as the pointer to the shadow type of the original struct type. One of the main differences between the original struct type and the shadow type is that all function pointers of the shadow type are converted to pointers of struct bpf_program. Users can replace these bpf_program pointers with other BPF programs. The st_ops->progs[] will be updated before updating the value of a map to reflect the changes made by users. Signed-off-by: Kui-Feng Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240229064523.2091270-3-thinker.li@gmail.com --- tools/lib/bpf/libbpf.c | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index c759628f4250..6c2979f1b471 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1014,6 +1014,19 @@ static bool bpf_map__is_struct_ops(const struct bpf_map *map) return map->def.type == BPF_MAP_TYPE_STRUCT_OPS; } +static bool is_valid_st_ops_program(struct bpf_object *obj, + const struct bpf_program *prog) +{ + int i; + + for (i = 0; i < obj->nr_programs; i++) { + if (&obj->programs[i] == prog) + return prog->type == BPF_PROG_TYPE_STRUCT_OPS; + } + + return false; +} + /* Init the map's fields that depend on kern_btf */ static int bpf_map__init_kern_struct_ops(struct bpf_map *map) { @@ -1102,9 +1115,16 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map) if (btf_is_ptr(mtype)) { struct bpf_program *prog; - prog = st_ops->progs[i]; + /* Update the value from the shadow type */ + prog = *(void **)mdata; + st_ops->progs[i] = prog; if (!prog) continue; + if (!is_valid_st_ops_program(obj, prog)) { + pr_warn("struct_ops init_kern %s: member %s is not a struct_ops program\n", + map->name, mname); + return -ENOTSUP; + } kern_mtype = skip_mods_and_typedefs(kern_btf, kern_mtype->type, @@ -9310,7 +9330,9 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, return NULL; } -/* Collect the reloc from ELF and populate the st_ops->progs[] */ +/* Collect the reloc from ELF, populate the st_ops->progs[], and update + * st_ops->data for shadow type. + */ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data) { @@ -9424,6 +9446,14 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, } st_ops->progs[member_idx] = prog; + + /* st_ops->data will be exposed to users, being returned by + * bpf_map__initial_value() as a pointer to the shadow + * type. All function pointers in the original struct type + * should be converted to a pointer to struct bpf_program + * in the shadow type. + */ + *((struct bpf_program **)(st_ops->data + moff)) = prog; } return 0; @@ -9882,6 +9912,12 @@ int bpf_map__set_initial_value(struct bpf_map *map, void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) { + if (bpf_map__is_struct_ops(map)) { + if (psize) + *psize = map->def.value_size; + return map->st_ops->data; + } + if (!map->mmaped) return NULL; *psize = map->def.value_size; -- cgit v1.2.3 From a2a5172cf1eb39472bd2038079f65c6f676906a5 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 6 Mar 2024 12:45:15 +0200 Subject: libbpf: Allow version suffixes (___smth) for struct_ops types E.g. allow the following struct_ops definitions: struct bpf_testmod_ops___v1 { int (*test)(void); }; struct bpf_testmod_ops___v2 { int (*test)(void); }; SEC(".struct_ops.link") struct bpf_testmod_ops___v1 a = { .test = ... } SEC(".struct_ops.link") struct bpf_testmod_ops___v2 b = { .test = ... } Where both bpf_testmod_ops__v1 and bpf_testmod_ops__v2 would be resolved as 'struct bpf_testmod_ops' from kernel BTF. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Acked-by: David Vernet Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240306104529.6453-2-eddyz87@gmail.com --- tools/lib/bpf/libbpf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6c2979f1b471..e2a4c409980b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -948,7 +948,7 @@ static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, const char *name, __u32 kind); static int -find_struct_ops_kern_types(struct bpf_object *obj, const char *tname, +find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw, struct module_btf **mod_btf, const struct btf_type **type, __u32 *type_id, const struct btf_type **vtype, __u32 *vtype_id, @@ -958,8 +958,12 @@ find_struct_ops_kern_types(struct bpf_object *obj, const char *tname, const struct btf_member *kern_data_member; struct btf *btf; __s32 kern_vtype_id, kern_type_id; + char tname[256]; __u32 i; + snprintf(tname, sizeof(tname), "%.*s", + (int)bpf_core_essential_name_len(tname_raw), tname_raw); + kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT, &btf, mod_btf); if (kern_type_id < 0) { -- cgit v1.2.3 From d9ab2f76ef5abb76190ffb42d83bdc6caede807e Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 6 Mar 2024 12:45:16 +0200 Subject: libbpf: Tie struct_ops programs to kernel BTF ids, not to local ids Enforce the following existing limitation on struct_ops programs based on kernel BTF id instead of program-local BTF id: struct_ops BPF prog can be re-used between multiple .struct_ops & .struct_ops.link as long as it's the same struct_ops struct definition and the same function pointer field This allows reusing same BPF program for versioned struct_ops map definitions, e.g.: SEC("struct_ops/test") int BPF_PROG(foo) { ... } struct some_ops___v1 { int (*test)(void); }; struct some_ops___v2 { int (*test)(void); }; SEC(".struct_ops.link") struct some_ops___v1 a = { .test = foo } SEC(".struct_ops.link") struct some_ops___v2 b = { .test = foo } Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240306104529.6453-3-eddyz87@gmail.com --- tools/lib/bpf/libbpf.c | 49 ++++++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 23 deletions(-) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e2a4c409980b..2c0cb72bc7a4 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1146,8 +1146,32 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map) if (mod_btf) prog->attach_btf_obj_fd = mod_btf->fd; - prog->attach_btf_id = kern_type_id; - prog->expected_attach_type = kern_member_idx; + + /* if we haven't yet processed this BPF program, record proper + * attach_btf_id and member_idx + */ + if (!prog->attach_btf_id) { + prog->attach_btf_id = kern_type_id; + prog->expected_attach_type = kern_member_idx; + } + + /* struct_ops BPF prog can be re-used between multiple + * .struct_ops & .struct_ops.link as long as it's the + * same struct_ops struct definition and the same + * function pointer field + */ + if (prog->attach_btf_id != kern_type_id) { + pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: attach_btf_id %u != kern_type_id %u\n", + map->name, mname, prog->name, prog->sec_name, prog->type, + prog->attach_btf_id, kern_type_id); + return -EINVAL; + } + if (prog->expected_attach_type != kern_member_idx) { + pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: expected_attach_type %u != kern_member_idx %u\n", + map->name, mname, prog->name, prog->sec_name, prog->type, + prog->expected_attach_type, kern_member_idx); + return -EINVAL; + } st_ops->kern_func_off[i] = kern_data_off + kern_moff; @@ -9428,27 +9452,6 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, return -EINVAL; } - /* if we haven't yet processed this BPF program, record proper - * attach_btf_id and member_idx - */ - if (!prog->attach_btf_id) { - prog->attach_btf_id = st_ops->type_id; - prog->expected_attach_type = member_idx; - } - - /* struct_ops BPF prog can be re-used between multiple - * .struct_ops & .struct_ops.link as long as it's the - * same struct_ops struct definition and the same - * function pointer field - */ - if (prog->attach_btf_id != st_ops->type_id || - prog->expected_attach_type != member_idx) { - pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n", - map->name, prog->name, prog->sec_name, prog->type, - prog->attach_btf_id, prog->expected_attach_type, name); - return -EINVAL; - } - st_ops->progs[member_idx] = prog; /* st_ops->data will be exposed to users, being returned by -- cgit v1.2.3 From 8db052615a9780b45e26d6afabc7abefe1ba20ac Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 6 Mar 2024 12:45:17 +0200 Subject: libbpf: Honor autocreate flag for struct_ops maps Skip load steps for struct_ops maps not marked for automatic creation. This should allow to load bpf object in situations like below: SEC("struct_ops/foo") int BPF_PROG(foo) { ... } SEC("struct_ops/bar") int BPF_PROG(bar) { ... } struct test_ops___v1 { int (*foo)(void); }; struct test_ops___v2 { int (*foo)(void); int (*does_not_exist)(void); }; SEC(".struct_ops.link") struct test_ops___v1 map_for_old = { .test_1 = (void *)foo }; SEC(".struct_ops.link") struct test_ops___v2 map_for_new = { .test_1 = (void *)foo, .does_not_exist = (void *)bar }; Suppose program is loaded on old kernel that does not have definition for 'does_not_exist' struct_ops member. After this commit it would be possible to load such object file after the following tweaks: bpf_program__set_autoload(skel->progs.bar, false); bpf_map__set_autocreate(skel->maps.map_for_new, false); Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20240306104529.6453-4-eddyz87@gmail.com --- tools/lib/bpf/libbpf.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2c0cb72bc7a4..1fb9a4f237b4 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1212,6 +1212,9 @@ static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) if (!bpf_map__is_struct_ops(map)) continue; + if (!map->autocreate) + continue; + err = bpf_map__init_kern_struct_ops(map); if (err) return err; @@ -8133,11 +8136,20 @@ static void bpf_map_prepare_vdata(const struct bpf_map *map) static int bpf_object_prepare_struct_ops(struct bpf_object *obj) { + struct bpf_map *map; int i; - for (i = 0; i < obj->nr_maps; i++) - if (bpf_map__is_struct_ops(&obj->maps[i])) - bpf_map_prepare_vdata(&obj->maps[i]); + for (i = 0; i < obj->nr_maps; i++) { + map = &obj->maps[i]; + + if (!bpf_map__is_struct_ops(map)) + continue; + + if (!map->autocreate) + continue; + + bpf_map_prepare_vdata(map); + } return 0; } -- cgit v1.2.3 From fe9d049c3da06373a1a35914b7f695509e4cb1fe Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 6 Mar 2024 12:45:22 +0200 Subject: libbpf: Sync progs autoload with maps autocreate for struct_ops maps Automatically select which struct_ops programs to load depending on which struct_ops maps are selected for automatic creation. E.g. for the BPF code below: SEC("struct_ops/test_1") int BPF_PROG(foo) { ... } SEC("struct_ops/test_2") int BPF_PROG(bar) { ... } SEC(".struct_ops.link") struct test_ops___v1 A = { .foo = (void *)foo }; SEC(".struct_ops.link") struct test_ops___v2 B = { .foo = (void *)foo, .bar = (void *)bar, }; And the following libbpf API calls: bpf_map__set_autocreate(skel->maps.A, true); bpf_map__set_autocreate(skel->maps.B, false); The autoload would be enabled for program 'foo' and disabled for program 'bar'. During load, for each struct_ops program P, referenced from some struct_ops map M: - set P.autoload = true if M.autocreate is true for some M; - set P.autoload = false if M.autocreate is false for all M; - don't change P.autoload, if P is not referenced from any map. Do this after bpf_object__init_kern_struct_ops_maps() to make sure that shadow vars assignment is done. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240306104529.6453-9-eddyz87@gmail.com --- tools/lib/bpf/libbpf.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 1fb9a4f237b4..595c287d84ef 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1031,6 +1031,48 @@ static bool is_valid_st_ops_program(struct bpf_object *obj, return false; } +/* For each struct_ops program P, referenced from some struct_ops map M, + * enable P.autoload if there are Ms for which M.autocreate is true, + * disable P.autoload if for all Ms M.autocreate is false. + * Don't change P.autoload for programs that are not referenced from any maps. + */ +static int bpf_object_adjust_struct_ops_autoload(struct bpf_object *obj) +{ + struct bpf_program *prog, *slot_prog; + struct bpf_map *map; + int i, j, k, vlen; + + for (i = 0; i < obj->nr_programs; ++i) { + int should_load = false; + int use_cnt = 0; + + prog = &obj->programs[i]; + if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) + continue; + + for (j = 0; j < obj->nr_maps; ++j) { + map = &obj->maps[j]; + if (!bpf_map__is_struct_ops(map)) + continue; + + vlen = btf_vlen(map->st_ops->type); + for (k = 0; k < vlen; ++k) { + slot_prog = map->st_ops->progs[k]; + if (prog != slot_prog) + continue; + + use_cnt++; + if (map->autocreate) + should_load = true; + } + } + if (use_cnt) + prog->autoload = should_load; + } + + return 0; +} + /* Init the map's fields that depend on kern_btf */ static int bpf_map__init_kern_struct_ops(struct bpf_map *map) { @@ -8175,6 +8217,7 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); err = err ? : bpf_object__sanitize_maps(obj); err = err ? : bpf_object__init_kern_struct_ops_maps(obj); + err = err ? : bpf_object_adjust_struct_ops_autoload(obj); err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); err = err ? : bpf_object__sanitize_and_load_btf(obj); err = err ? : bpf_object__create_maps(obj); -- cgit v1.2.3 From 240bf8a5162e8c43cf368909582a01082d494d79 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 6 Mar 2024 12:45:24 +0200 Subject: libbpf: Replace elf_state->st_ops_* fields with SEC_ST_OPS sec_type The next patch would add two new section names for struct_ops maps. To make working with multiple struct_ops sections more convenient: - remove fields like elf_state->st_ops_{shndx,link_shndx}; - mark section descriptions hosting struct_ops as elf_sec_desc->sec_type == SEC_ST_OPS; After these changes struct_ops sections could be processed uniformly by iterating bpf_object->efile.secs entries. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240306104529.6453-11-eddyz87@gmail.com --- tools/lib/bpf/libbpf.c | 61 ++++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 29 deletions(-) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 595c287d84ef..cff72de42bf4 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -612,6 +612,7 @@ enum sec_type { SEC_BSS, SEC_DATA, SEC_RODATA, + SEC_ST_OPS, }; struct elf_sec_desc { @@ -627,8 +628,6 @@ struct elf_state { Elf *elf; Elf64_Ehdr *ehdr; Elf_Data *symbols; - Elf_Data *st_ops_data; - Elf_Data *st_ops_link_data; size_t shstrndx; /* section index for section name strings */ size_t strtabidx; struct elf_sec_desc *secs; @@ -637,8 +636,7 @@ struct elf_state { __u32 btf_maps_sec_btf_id; int text_shndx; int symbols_shndx; - int st_ops_shndx; - int st_ops_link_shndx; + bool has_st_ops; }; struct usdt_manager; @@ -1266,7 +1264,7 @@ static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) } static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, - int shndx, Elf_Data *data, __u32 map_flags) + int shndx, Elf_Data *data) { const struct btf_type *type, *datasec; const struct btf_var_secinfo *vsi; @@ -1328,7 +1326,7 @@ static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, map->def.key_size = sizeof(int); map->def.value_size = type->size; map->def.max_entries = 1; - map->def.map_flags = map_flags; + map->def.map_flags = strcmp(sec_name, STRUCT_OPS_LINK_SEC) == 0 ? BPF_F_LINK : 0; map->st_ops = calloc(1, sizeof(*map->st_ops)); if (!map->st_ops) @@ -1363,15 +1361,25 @@ static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, static int bpf_object_init_struct_ops(struct bpf_object *obj) { - int err; + const char *sec_name; + int sec_idx, err; - err = init_struct_ops_maps(obj, STRUCT_OPS_SEC, obj->efile.st_ops_shndx, - obj->efile.st_ops_data, 0); - err = err ?: init_struct_ops_maps(obj, STRUCT_OPS_LINK_SEC, - obj->efile.st_ops_link_shndx, - obj->efile.st_ops_link_data, - BPF_F_LINK); - return err; + for (sec_idx = 0; sec_idx < obj->efile.sec_cnt; ++sec_idx) { + struct elf_sec_desc *desc = &obj->efile.secs[sec_idx]; + + if (desc->sec_type != SEC_ST_OPS) + continue; + + sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); + if (!sec_name) + return -LIBBPF_ERRNO__FORMAT; + + err = init_struct_ops_maps(obj, sec_name, sec_idx, desc->data); + if (err) + return err; + } + + return 0; } static struct bpf_object *bpf_object__new(const char *path, @@ -1409,8 +1417,6 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.obj_buf = obj_buf; obj->efile.obj_buf_sz = obj_buf_sz; obj->efile.btf_maps_shndx = -1; - obj->efile.st_ops_shndx = -1; - obj->efile.st_ops_link_shndx = -1; obj->kconfig_map_idx = -1; obj->kern_version = get_kernel_version(); @@ -1427,8 +1433,6 @@ static void bpf_object__elf_finish(struct bpf_object *obj) elf_end(obj->efile.elf); obj->efile.elf = NULL; obj->efile.symbols = NULL; - obj->efile.st_ops_data = NULL; - obj->efile.st_ops_link_data = NULL; zfree(&obj->efile.secs); obj->efile.sec_cnt = 0; @@ -2973,14 +2977,13 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) static bool libbpf_needs_btf(const struct bpf_object *obj) { return obj->efile.btf_maps_shndx >= 0 || - obj->efile.st_ops_shndx >= 0 || - obj->efile.st_ops_link_shndx >= 0 || + obj->efile.has_st_ops || obj->nr_extern > 0; } static bool kernel_needs_btf(const struct bpf_object *obj) { - return obj->efile.st_ops_shndx >= 0 || obj->efile.st_ops_link_shndx >= 0; + return obj->efile.has_st_ops; } static int bpf_object__init_btf(struct bpf_object *obj, @@ -3681,12 +3684,12 @@ static int bpf_object__elf_collect(struct bpf_object *obj) sec_desc->sec_type = SEC_RODATA; sec_desc->shdr = sh; sec_desc->data = data; - } else if (strcmp(name, STRUCT_OPS_SEC) == 0) { - obj->efile.st_ops_data = data; - obj->efile.st_ops_shndx = idx; - } else if (strcmp(name, STRUCT_OPS_LINK_SEC) == 0) { - obj->efile.st_ops_link_data = data; - obj->efile.st_ops_link_shndx = idx; + } else if (strcmp(name, STRUCT_OPS_SEC) == 0 || + strcmp(name, STRUCT_OPS_LINK_SEC) == 0) { + sec_desc->sec_type = SEC_ST_OPS; + sec_desc->shdr = sh; + sec_desc->data = data; + obj->efile.has_st_ops = true; } else { pr_info("elf: skipping unrecognized data section(%d) %s\n", idx, name); @@ -6999,12 +7002,12 @@ static int bpf_object__collect_relos(struct bpf_object *obj) data = sec_desc->data; idx = shdr->sh_info; - if (shdr->sh_type != SHT_REL) { + if (shdr->sh_type != SHT_REL || idx < 0 || idx >= obj->efile.sec_cnt) { pr_warn("internal error at %d\n", __LINE__); return -LIBBPF_ERRNO__INTERNAL; } - if (idx == obj->efile.st_ops_shndx || idx == obj->efile.st_ops_link_shndx) + if (obj->efile.secs[idx].sec_type == SEC_ST_OPS) err = bpf_object__collect_st_ops_relos(obj, shdr, data); else if (idx == obj->efile.btf_maps_shndx) err = bpf_object__collect_map_relos(obj, shdr, data); -- cgit v1.2.3 From 5ad0ecbe056a4ea5ffaa73e58503a2f87b119a59 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 6 Mar 2024 12:45:25 +0200 Subject: libbpf: Struct_ops in SEC("?.struct_ops") / SEC("?.struct_ops.link") Allow using two new section names for struct_ops maps: - SEC("?.struct_ops") - SEC("?.struct_ops.link") To specify maps that have bpf_map->autocreate == false after open. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240306104529.6453-12-eddyz87@gmail.com --- tools/lib/bpf/libbpf.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index cff72de42bf4..ec0f508b853d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1322,6 +1322,15 @@ static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, return -ENOMEM; map->btf_value_type_id = type_id; + /* Follow same convention as for programs autoload: + * SEC("?.struct_ops") means map is not created by default. + */ + if (sec_name[0] == '?') { + map->autocreate = false; + /* from now on forget there was ? in section name */ + sec_name++; + } + map->def.type = BPF_MAP_TYPE_STRUCT_OPS; map->def.key_size = sizeof(int); map->def.value_size = type->size; @@ -3685,7 +3694,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj) sec_desc->shdr = sh; sec_desc->data = data; } else if (strcmp(name, STRUCT_OPS_SEC) == 0 || - strcmp(name, STRUCT_OPS_LINK_SEC) == 0) { + strcmp(name, STRUCT_OPS_LINK_SEC) == 0 || + strcmp(name, "?" STRUCT_OPS_SEC) == 0 || + strcmp(name, "?" STRUCT_OPS_LINK_SEC) == 0) { sec_desc->sec_type = SEC_ST_OPS; sec_desc->shdr = sh; sec_desc->data = data; @@ -3705,6 +3716,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) if (!section_have_execinstr(obj, targ_sec_idx) && strcmp(name, ".rel" STRUCT_OPS_SEC) && strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) && + strcmp(name, ".rel?" STRUCT_OPS_SEC) && + strcmp(name, ".rel?" STRUCT_OPS_LINK_SEC) && strcmp(name, ".rel" MAPS_ELF_SEC)) { pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n", idx, name, targ_sec_idx, -- cgit v1.2.3 From 6ebaa3fb88bbe4c33a0e01ce27007e1dd4fd133c Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 6 Mar 2024 12:45:26 +0200 Subject: libbpf: Rewrite btf datasec names starting from '?' Optional struct_ops maps are defined using question mark at the start of the section name, e.g.: SEC("?.struct_ops") struct test_ops optional_map = { ... }; This commit teaches libbpf to detect if kernel allows '?' prefix in datasec names, and if it doesn't then to rewrite such names by replacing '?' with '_', e.g.: DATASEC ?.struct_ops -> DATASEC _.struct_ops Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240306104529.6453-13-eddyz87@gmail.com --- tools/lib/bpf/features.c | 22 ++++++++++++++++++++++ tools/lib/bpf/libbpf.c | 19 +++++++++++++++++-- tools/lib/bpf/libbpf_internal.h | 2 ++ 3 files changed, 41 insertions(+), 2 deletions(-) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c index 6b0738ad7063..4e783cc7fc4b 100644 --- a/tools/lib/bpf/features.c +++ b/tools/lib/bpf/features.c @@ -147,6 +147,25 @@ static int probe_kern_btf_datasec(int token_fd) strs, sizeof(strs), token_fd)); } +static int probe_kern_btf_qmark_datasec(int token_fd) +{ + static const char strs[] = "\0x\0?.data"; + /* static int a; */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* DATASEC ?.data */ /* [3] */ + BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + static int probe_kern_btf_float(int token_fd) { static const char strs[] = "\0float"; @@ -534,6 +553,9 @@ static struct kern_feature_desc { [FEAT_ARG_CTX_TAG] = { "kernel-side __arg_ctx tag", probe_kern_arg_ctx_tag, }, + [FEAT_BTF_QMARK_DATASEC] = { + "BTF DATASEC names starting from '?'", probe_kern_btf_qmark_datasec, + }, }; bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ec0f508b853d..672fca94ff53 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2869,6 +2869,11 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx) return sh->sh_flags & SHF_EXECINSTR; } +static bool starts_with_qmark(const char *s) +{ + return s && s[0] == '?'; +} + static bool btf_needs_sanitization(struct bpf_object *obj) { bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); @@ -2878,9 +2883,10 @@ static bool btf_needs_sanitization(struct bpf_object *obj) bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); + bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC); return !has_func || !has_datasec || !has_func_global || !has_float || - !has_decl_tag || !has_type_tag || !has_enum64; + !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec; } static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) @@ -2892,6 +2898,7 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); + bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC); int enum64_placeholder_id = 0; struct btf_type *t; int i, j, vlen; @@ -2918,7 +2925,7 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) name = (char *)btf__name_by_offset(btf, t->name_off); while (*name) { - if (*name == '.') + if (*name == '.' || *name == '?') *name = '_'; name++; } @@ -2933,6 +2940,14 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) vt = (void *)btf__type_by_id(btf, v->type); m->name_off = vt->name_off; } + } else if (!has_qmark_datasec && btf_is_datasec(t) && + starts_with_qmark(btf__name_by_offset(btf, t->name_off))) { + /* replace '?' prefix with '_' for DATASEC names */ + char *name; + + name = (char *)btf__name_by_offset(btf, t->name_off); + if (name[0] == '?') + name[0] = '_'; } else if (!has_func && btf_is_func_proto(t)) { /* replace FUNC_PROTO with ENUM */ vlen = btf_vlen(t); diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index ad936ac5e639..864b36177424 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -374,6 +374,8 @@ enum kern_feature_id { FEAT_UPROBE_MULTI_LINK, /* Kernel supports arg:ctx tag (__arg_ctx) for global subprogs natively */ FEAT_ARG_CTX_TAG, + /* Kernel supports '?' at the front of datasec names */ + FEAT_BTF_QMARK_DATASEC, __FEAT_CNT, }; -- cgit v1.2.3 From d147357e2e5977c5fe9218457a1e359fd1d36609 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 6 Mar 2024 19:12:26 -0800 Subject: libbpf: Allow specifying 64-bit integers in map BTF. __uint() macro that is used to specify map attributes like: __uint(type, BPF_MAP_TYPE_ARRAY); __uint(map_flags, BPF_F_MMAPABLE); It is limited to 32-bit, since BTF_KIND_ARRAY has u32 "number of elements" field in "struct btf_array". Introduce __ulong() macro that allows specifying values bigger than 32-bit. In map definition "map_extra" is the only u64 field, so far. Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20240307031228.42896-5-alexei.starovoitov@gmail.com Signed-off-by: Martin KaFai Lau --- tools/lib/bpf/bpf_helpers.h | 1 + tools/lib/bpf/libbpf.c | 44 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 2 deletions(-) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 79eaa581be98..112b1504e072 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -13,6 +13,7 @@ #define __uint(name, val) int (*name)[val] #define __type(name, val) typeof(val) *name #define __array(name, val) typeof(val) *name[] +#define __ulong(name, val) enum { ___bpf_concat(__unique_value, __COUNTER__) = val } name /* * Helper macro to place programs, maps, license in diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 672fca94ff53..567ad367e7aa 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2335,6 +2335,46 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf, return true; } +static bool get_map_field_long(const char *map_name, const struct btf *btf, + const struct btf_member *m, __u64 *res) +{ + const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); + const char *name = btf__name_by_offset(btf, m->name_off); + + if (btf_is_ptr(t)) { + __u32 res32; + bool ret; + + ret = get_map_field_int(map_name, btf, m, &res32); + if (ret) + *res = (__u64)res32; + return ret; + } + + if (!btf_is_enum(t) && !btf_is_enum64(t)) { + pr_warn("map '%s': attr '%s': expected ENUM or ENUM64, got %s.\n", + map_name, name, btf_kind_str(t)); + return false; + } + + if (btf_vlen(t) != 1) { + pr_warn("map '%s': attr '%s': invalid __ulong\n", + map_name, name); + return false; + } + + if (btf_is_enum(t)) { + const struct btf_enum *e = btf_enum(t); + + *res = e->val; + } else { + const struct btf_enum64 *e = btf_enum64(t); + + *res = btf_enum64_value(e); + } + return true; +} + static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name) { int len; @@ -2568,9 +2608,9 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, map_def->pinning = val; map_def->parts |= MAP_DEF_PINNING; } else if (strcmp(name, "map_extra") == 0) { - __u32 map_extra; + __u64 map_extra; - if (!get_map_field_int(map_name, btf, m, &map_extra)) + if (!get_map_field_long(map_name, btf, m, &map_extra)) return -EINVAL; map_def->map_extra = map_extra; map_def->parts |= MAP_DEF_MAP_EXTRA; -- cgit v1.2.3 From 79ff13e99169ddb0e2277e046dbfb112f77dfac5 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 7 Mar 2024 17:08:06 -0800 Subject: libbpf: Add support for bpf_arena. mmap() bpf_arena right after creation, since the kernel needs to remember the address returned from mmap. This is user_vm_start. LLVM will generate bpf_arena_cast_user() instructions where necessary and JIT will add upper 32-bit of user_vm_start to such pointers. Fix up bpf_map_mmap_sz() to compute mmap size as map->value_size * map->max_entries for arrays and PAGE_SIZE * map->max_entries for arena. Don't set BTF at arena creation time, since it doesn't support it. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240308010812.89848-9-alexei.starovoitov@gmail.com --- tools/lib/bpf/libbpf.c | 47 +++++++++++++++++++++++++++++++++++-------- tools/lib/bpf/libbpf_probes.c | 7 +++++++ 2 files changed, 46 insertions(+), 8 deletions(-) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 567ad367e7aa..34b722071874 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -185,6 +185,7 @@ static const char * const map_type_name[] = { [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", + [BPF_MAP_TYPE_ARENA] = "arena", }; static const char * const prog_type_name[] = { @@ -1684,7 +1685,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) return map; } -static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) +static size_t array_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) { const long page_sz = sysconf(_SC_PAGE_SIZE); size_t map_sz; @@ -1694,6 +1695,20 @@ static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) return map_sz; } +static size_t bpf_map_mmap_sz(const struct bpf_map *map) +{ + const long page_sz = sysconf(_SC_PAGE_SIZE); + + switch (map->def.type) { + case BPF_MAP_TYPE_ARRAY: + return array_map_mmap_sz(map->def.value_size, map->def.max_entries); + case BPF_MAP_TYPE_ARENA: + return page_sz * map->def.max_entries; + default: + return 0; /* not supported */ + } +} + static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) { void *mmaped; @@ -1847,7 +1862,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", map->name, map->sec_idx, map->sec_offset, def->map_flags); - mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); + mmap_sz = bpf_map_mmap_sz(map); map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); if (map->mmaped == MAP_FAILED) { @@ -5017,6 +5032,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_QUEUE: case BPF_MAP_TYPE_STACK: + case BPF_MAP_TYPE_ARENA: create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; @@ -5261,7 +5277,19 @@ retry: if (err < 0) goto err_out; } - + if (map->def.type == BPF_MAP_TYPE_ARENA) { + map->mmaped = mmap((void *)map->map_extra, bpf_map_mmap_sz(map), + PROT_READ | PROT_WRITE, + map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED, + map->fd, 0); + if (map->mmaped == MAP_FAILED) { + err = -errno; + map->mmaped = NULL; + pr_warn("map '%s': failed to mmap arena: %d\n", + map->name, err); + return err; + } + } if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) { err = init_map_in_map_slots(obj, map); if (err < 0) @@ -8761,7 +8789,7 @@ static void bpf_map__destroy(struct bpf_map *map) if (map->mmaped) { size_t mmap_sz; - mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); + mmap_sz = bpf_map_mmap_sz(map); munmap(map->mmaped, mmap_sz); map->mmaped = NULL; } @@ -9995,11 +10023,14 @@ int bpf_map__set_value_size(struct bpf_map *map, __u32 size) return libbpf_err(-EBUSY); if (map->mmaped) { - int err; size_t mmap_old_sz, mmap_new_sz; + int err; + + if (map->def.type != BPF_MAP_TYPE_ARRAY) + return -EOPNOTSUPP; - mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); - mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries); + mmap_old_sz = bpf_map_mmap_sz(map); + mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries); err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz); if (err) { pr_warn("map '%s': failed to resize memory-mapped region: %d\n", @@ -13530,7 +13561,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s) for (i = 0; i < s->map_cnt; i++) { struct bpf_map *map = *s->maps[i].map; - size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); + size_t mmap_sz = bpf_map_mmap_sz(map); int prot, map_fd = map->fd; void **mmaped = s->maps[i].mmaped; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index ee9b1dbea9eb..302188122439 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -338,6 +338,13 @@ static int probe_map_create(enum bpf_map_type map_type) key_size = 0; max_entries = 1; break; + case BPF_MAP_TYPE_ARENA: + key_size = 0; + value_size = 0; + max_entries = 1; /* one page */ + opts.map_extra = 0; /* can mmap() at any address */ + opts.map_flags = BPF_F_MMAPABLE; + break; case BPF_MAP_TYPE_HASH: case BPF_MAP_TYPE_ARRAY: case BPF_MAP_TYPE_PROG_ARRAY: -- cgit v1.2.3 From 2e7ba4f8fd1fa879b37db0b738c23ba2af8292ee Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 7 Mar 2024 17:08:08 -0800 Subject: libbpf: Recognize __arena global variables. LLVM automatically places __arena variables into ".arena.1" ELF section. In order to use such global variables bpf program must include definition of arena map in ".maps" section, like: struct { __uint(type, BPF_MAP_TYPE_ARENA); __uint(map_flags, BPF_F_MMAPABLE); __uint(max_entries, 1000); /* number of pages */ __ulong(map_extra, 2ull << 44); /* start of mmap() region */ } arena SEC(".maps"); libbpf recognizes both uses of arena and creates single `struct bpf_map *` instance in libbpf APIs. ".arena.1" ELF section data is used as initial data image, which is exposed through skeleton and bpf_map__initial_value() to the user, if they need to tune it before the load phase. During load phase, this initial image is copied over into mmap()'ed region corresponding to arena, and discarded. Few small checks here and there had to be added to make sure this approach works with bpf_map__initial_value(), mostly due to hard-coded assumption that map->mmaped is set up with mmap() syscall and should be munmap()'ed. For arena, .arena.1 can be (much) smaller than maximum arena size, so this smaller data size has to be tracked separately. Given it is enforced that there is only one arena for entire bpf_object instance, we just keep it in a separate field. This can be generalized if necessary later. All global variables from ".arena.1" section are accessible from user space via skel->arena->name_of_var. For bss/data/rodata the skeleton/libbpf perform the following sequence: 1. addr = mmap(MAP_ANONYMOUS) 2. user space optionally modifies global vars 3. map_fd = bpf_create_map() 4. bpf_update_map_elem(map_fd, addr) // to store values into the kernel 5. mmap(addr, MAP_FIXED, map_fd) after step 5 user spaces see the values it wrote at step 2 at the same addresses arena doesn't support update_map_elem. Hence skeleton/libbpf do: 1. addr = malloc(sizeof SEC ".arena.1") 2. user space optionally modifies global vars 3. map_fd = bpf_create_map(MAP_TYPE_ARENA) 4. real_addr = mmap(map->map_extra, MAP_SHARED | MAP_FIXED, map_fd) 5. memcpy(real_addr, addr) // this will fault-in and allocate pages At the end look and feel of global data vs __arena global data is the same from bpf prog pov. Another complication is: struct { __uint(type, BPF_MAP_TYPE_ARENA); } arena SEC(".maps"); int __arena foo; int bar; ptr1 = &foo; // relocation against ".arena.1" section ptr2 = &arena; // relocation against ".maps" section ptr3 = &bar; // relocation against ".bss" section Fo the kernel ptr1 and ptr2 has point to the same arena's map_fd while ptr3 points to a different global array's map_fd. For the verifier: ptr1->type == unknown_scalar ptr2->type == const_ptr_to_map ptr3->type == ptr_to_map_value After verification, from JIT pov all 3 ptr-s are normal ld_imm64 insns. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20240308010812.89848-11-alexei.starovoitov@gmail.com --- tools/bpf/bpftool/gen.c | 13 ++++++ tools/lib/bpf/libbpf.c | 118 +++++++++++++++++++++++++++++++++++++++++++----- tools/lib/bpf/libbpf.h | 2 +- 3 files changed, 120 insertions(+), 13 deletions(-) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index a3d72be347b0..4fa4ade1ce74 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -120,6 +120,12 @@ static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz) static const char *pfxs[] = { ".data", ".rodata", ".bss", ".kconfig" }; int i, n; + /* recognize hard coded LLVM section name */ + if (strcmp(sec_name, ".arena.1") == 0) { + /* this is the name to use in skeleton */ + snprintf(buf, buf_sz, "arena"); + return true; + } for (i = 0, n = ARRAY_SIZE(pfxs); i < n; i++) { const char *pfx = pfxs[i]; @@ -250,6 +256,13 @@ static const struct btf_type *find_type_for_map(struct btf *btf, const char *map static bool is_mmapable_map(const struct bpf_map *map, char *buf, size_t sz) { + size_t tmp_sz; + + if (bpf_map__type(map) == BPF_MAP_TYPE_ARENA && bpf_map__initial_value(map, &tmp_sz)) { + snprintf(buf, sz, "arena"); + return true; + } + if (!bpf_map__is_internal(map) || !(bpf_map__map_flags(map) & BPF_F_MMAPABLE)) return false; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 34b722071874..efab29b8935b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -498,6 +498,7 @@ struct bpf_struct_ops { #define KSYMS_SEC ".ksyms" #define STRUCT_OPS_SEC ".struct_ops" #define STRUCT_OPS_LINK_SEC ".struct_ops.link" +#define ARENA_SEC ".arena.1" enum libbpf_map_type { LIBBPF_MAP_UNSPEC, @@ -629,6 +630,7 @@ struct elf_state { Elf *elf; Elf64_Ehdr *ehdr; Elf_Data *symbols; + Elf_Data *arena_data; size_t shstrndx; /* section index for section name strings */ size_t strtabidx; struct elf_sec_desc *secs; @@ -638,6 +640,7 @@ struct elf_state { int text_shndx; int symbols_shndx; bool has_st_ops; + int arena_data_shndx; }; struct usdt_manager; @@ -697,6 +700,10 @@ struct bpf_object { struct usdt_manager *usdt_man; + struct bpf_map *arena_map; + void *arena_data; + size_t arena_data_sz; + struct kern_feature_cache *feat_cache; char *token_path; int token_fd; @@ -1443,6 +1450,7 @@ static void bpf_object__elf_finish(struct bpf_object *obj) elf_end(obj->efile.elf); obj->efile.elf = NULL; obj->efile.symbols = NULL; + obj->efile.arena_data = NULL; zfree(&obj->efile.secs); obj->efile.sec_cnt = 0; @@ -1851,7 +1859,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, def->value_size = data_sz; def->max_entries = 1; def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG - ? BPF_F_RDONLY_PROG : 0; + ? BPF_F_RDONLY_PROG : 0; /* failures are fine because of maps like .rodata.str1.1 */ (void) map_fill_btf_type_info(obj, map); @@ -2843,6 +2851,32 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, return 0; } +static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map, + const char *sec_name, int sec_idx, + void *data, size_t data_sz) +{ + const long page_sz = sysconf(_SC_PAGE_SIZE); + size_t mmap_sz; + + mmap_sz = bpf_map_mmap_sz(obj->arena_map); + if (roundup(data_sz, page_sz) > mmap_sz) { + pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n", + sec_name, mmap_sz, data_sz); + return -E2BIG; + } + + obj->arena_data = malloc(data_sz); + if (!obj->arena_data) + return -ENOMEM; + memcpy(obj->arena_data, data, data_sz); + obj->arena_data_sz = data_sz; + + /* make bpf_map__init_value() work for ARENA maps */ + map->mmaped = obj->arena_data; + + return 0; +} + static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, const char *pin_root_path) { @@ -2892,6 +2926,33 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, return err; } + for (i = 0; i < obj->nr_maps; i++) { + struct bpf_map *map = &obj->maps[i]; + + if (map->def.type != BPF_MAP_TYPE_ARENA) + continue; + + if (obj->arena_map) { + pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n", + map->name, obj->arena_map->name); + return -EINVAL; + } + obj->arena_map = map; + + if (obj->efile.arena_data) { + err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx, + obj->efile.arena_data->d_buf, + obj->efile.arena_data->d_size); + if (err) + return err; + } + } + if (obj->efile.arena_data && !obj->arena_map) { + pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n", + ARENA_SEC); + return -ENOENT; + } + return 0; } @@ -3771,6 +3832,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj) sec_desc->shdr = sh; sec_desc->data = data; obj->efile.has_st_ops = true; + } else if (strcmp(name, ARENA_SEC) == 0) { + obj->efile.arena_data = data; + obj->efile.arena_data_shndx = idx; } else { pr_info("elf: skipping unrecognized data section(%d) %s\n", idx, name); @@ -4400,6 +4464,15 @@ static int bpf_program__record_reloc(struct bpf_program *prog, type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); + /* arena data relocation */ + if (shdr_idx == obj->efile.arena_data_shndx) { + reloc_desc->type = RELO_DATA; + reloc_desc->insn_idx = insn_idx; + reloc_desc->map_idx = obj->arena_map - obj->maps; + reloc_desc->sym_off = sym->st_value; + return 0; + } + /* generic map reference relocation */ if (type == LIBBPF_MAP_UNSPEC) { if (!bpf_object__shndx_is_maps(obj, shdr_idx)) { @@ -4940,6 +5013,7 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) bpf_gen__map_freeze(obj->gen_loader, map - obj->maps); return 0; } + err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); if (err) { err = -errno; @@ -5289,6 +5363,10 @@ retry: map->name, err); return err; } + if (obj->arena_data) { + memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz); + zfree(&obj->arena_data); + } } if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) { err = init_map_in_map_slots(obj, map); @@ -8786,13 +8864,9 @@ static void bpf_map__destroy(struct bpf_map *map) zfree(&map->init_slots); map->init_slots_sz = 0; - if (map->mmaped) { - size_t mmap_sz; - - mmap_sz = bpf_map_mmap_sz(map); - munmap(map->mmaped, mmap_sz); - map->mmaped = NULL; - } + if (map->mmaped && map->mmaped != map->obj->arena_data) + munmap(map->mmaped, bpf_map_mmap_sz(map)); + map->mmaped = NULL; if (map->st_ops) { zfree(&map->st_ops->data); @@ -8852,6 +8926,8 @@ void bpf_object__close(struct bpf_object *obj) if (obj->token_fd > 0) close(obj->token_fd); + zfree(&obj->arena_data); + free(obj); } @@ -10063,18 +10139,26 @@ __u32 bpf_map__btf_value_type_id(const struct bpf_map *map) int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size) { + size_t actual_sz; + if (map->obj->loaded || map->reused) return libbpf_err(-EBUSY); - if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG || - size != map->def.value_size) + if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG) + return libbpf_err(-EINVAL); + + if (map->def.type == BPF_MAP_TYPE_ARENA) + actual_sz = map->obj->arena_data_sz; + else + actual_sz = map->def.value_size; + if (size != actual_sz) return libbpf_err(-EINVAL); memcpy(map->mmaped, data, size); return 0; } -void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) +void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize) { if (bpf_map__is_struct_ops(map)) { if (psize) @@ -10084,7 +10168,12 @@ void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) if (!map->mmaped) return NULL; - *psize = map->def.value_size; + + if (map->def.type == BPF_MAP_TYPE_ARENA) + *psize = map->obj->arena_data_sz; + else + *psize = map->def.value_size; + return map->mmaped; } @@ -13573,6 +13662,11 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s) continue; } + if (map->def.type == BPF_MAP_TYPE_ARENA) { + *mmaped = map->mmaped; + continue; + } + if (map->def.map_flags & BPF_F_RDONLY_PROG) prot = PROT_READ; else diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 5723cbbfcc41..7b510761f545 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -1014,7 +1014,7 @@ LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra); LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size); -LIBBPF_API void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); +LIBBPF_API void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize); /** * @brief **bpf_map__is_internal()** tells the caller whether or not the -- cgit v1.2.3 From 10ebe835c937a11870690aa44c7c970fe906ff54 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 14 Mar 2024 19:18:32 -0700 Subject: libbpf, selftests/bpf: Adjust libbpf, bpftool, selftests to match LLVM The selftests use to tell LLVM about special pointers. For LLVM there is nothing "arena" about them. They are simply pointers in a different address space. Hence LLVM diff https://github.com/llvm/llvm-project/pull/85161 renamed: . macro __BPF_FEATURE_ARENA_CAST -> __BPF_FEATURE_ADDR_SPACE_CAST . global variables in __attribute__((address_space(N))) are now placed in section named ".addr_space.N" instead of ".arena.N". Adjust libbpf, bpftool, and selftests to match LLVM. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20240315021834.62988-3-alexei.starovoitov@gmail.com --- tools/bpf/bpftool/gen.c | 2 +- tools/lib/bpf/libbpf.c | 2 +- tools/testing/selftests/bpf/bpf_arena_common.h | 2 +- tools/testing/selftests/bpf/progs/arena_htab.c | 2 +- tools/testing/selftests/bpf/progs/arena_list.c | 10 +++++----- tools/testing/selftests/bpf/progs/verifier_arena.c | 4 ++-- 6 files changed, 11 insertions(+), 11 deletions(-) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 4fa4ade1ce74..540c0f2c4fda 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -121,7 +121,7 @@ static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz) int i, n; /* recognize hard coded LLVM section name */ - if (strcmp(sec_name, ".arena.1") == 0) { + if (strcmp(sec_name, ".addr_space.1") == 0) { /* this is the name to use in skeleton */ snprintf(buf, buf_sz, "arena"); return true; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index efab29b8935b..36e26f4f5997 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -498,7 +498,7 @@ struct bpf_struct_ops { #define KSYMS_SEC ".ksyms" #define STRUCT_OPS_SEC ".struct_ops" #define STRUCT_OPS_LINK_SEC ".struct_ops.link" -#define ARENA_SEC ".arena.1" +#define ARENA_SEC ".addr_space.1" enum libbpf_map_type { LIBBPF_MAP_UNSPEC, diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h index bcf195c64a45..567491f3e1b5 100644 --- a/tools/testing/selftests/bpf/bpf_arena_common.h +++ b/tools/testing/selftests/bpf/bpf_arena_common.h @@ -32,7 +32,7 @@ */ #endif -#if defined(__BPF_FEATURE_ARENA_CAST) && !defined(BPF_ARENA_FORCE_ASM) +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) && !defined(BPF_ARENA_FORCE_ASM) #define __arena __attribute__((address_space(1))) #define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */ #define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */ diff --git a/tools/testing/selftests/bpf/progs/arena_htab.c b/tools/testing/selftests/bpf/progs/arena_htab.c index b7bb712cacfd..1e6ac187a6a0 100644 --- a/tools/testing/selftests/bpf/progs/arena_htab.c +++ b/tools/testing/selftests/bpf/progs/arena_htab.c @@ -22,7 +22,7 @@ int zero = 0; SEC("syscall") int arena_htab_llvm(void *ctx) { -#if defined(__BPF_FEATURE_ARENA_CAST) || defined(BPF_ARENA_FORCE_ASM) +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) || defined(BPF_ARENA_FORCE_ASM) struct htab __arena *htab; __u64 i; diff --git a/tools/testing/selftests/bpf/progs/arena_list.c b/tools/testing/selftests/bpf/progs/arena_list.c index cd35b8448435..c0422c58cee2 100644 --- a/tools/testing/selftests/bpf/progs/arena_list.c +++ b/tools/testing/selftests/bpf/progs/arena_list.c @@ -30,13 +30,13 @@ int list_sum; int cnt; bool skip = false; -#ifdef __BPF_FEATURE_ARENA_CAST +#ifdef __BPF_FEATURE_ADDR_SPACE_CAST long __arena arena_sum; int __arena test_val = 1; struct arena_list_head __arena global_head; #else -long arena_sum SEC(".arena.1"); -int test_val SEC(".arena.1"); +long arena_sum SEC(".addr_space.1"); +int test_val SEC(".addr_space.1"); #endif int zero; @@ -44,7 +44,7 @@ int zero; SEC("syscall") int arena_list_add(void *ctx) { -#ifdef __BPF_FEATURE_ARENA_CAST +#ifdef __BPF_FEATURE_ADDR_SPACE_CAST __u64 i; list_head = &global_head; @@ -66,7 +66,7 @@ int arena_list_add(void *ctx) SEC("syscall") int arena_list_del(void *ctx) { -#ifdef __BPF_FEATURE_ARENA_CAST +#ifdef __BPF_FEATURE_ADDR_SPACE_CAST struct elem __arena *n; int sum = 0; diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c index 5540b05ff9ee..969bc091060b 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena.c @@ -19,7 +19,7 @@ SEC("syscall") __success __retval(0) int basic_alloc1(void *ctx) { -#if defined(__BPF_FEATURE_ARENA_CAST) +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) volatile int __arena *page1, *page2, *no_page, *page3; page1 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); @@ -58,7 +58,7 @@ SEC("syscall") __success __retval(0) int basic_alloc2(void *ctx) { -#if defined(__BPF_FEATURE_ARENA_CAST) +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) volatile char __arena *page1, *page2, *page3, *page4; page1 = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0); -- cgit v1.2.3 From 5ab8cb89dbb6f3e111c8b0a9a86496da23c94439 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 19 Mar 2024 14:51:43 -0700 Subject: libbpf: fix u64-to-pointer cast on 32-bit arches It's been reported that (void *)map->map_extra is causing compilation warnings on 32-bit architectures. It's easy enough to fix this by casting to long first. Fixes: 79ff13e99169 ("libbpf: Add support for bpf_arena.") Reported-by: Ryan Eatmon Signed-off-by: Andrii Nakryiko Message-ID: <20240319215143.1279312-1-andrii@kernel.org> Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 36e26f4f5997..1e3e697b98bc 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5352,8 +5352,8 @@ retry: goto err_out; } if (map->def.type == BPF_MAP_TYPE_ARENA) { - map->mmaped = mmap((void *)map->map_extra, bpf_map_mmap_sz(map), - PROT_READ | PROT_WRITE, + map->mmaped = mmap((void *)(long)map->map_extra, + bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED, map->fd, 0); if (map->mmaped == MAP_FAILED) { -- cgit v1.2.3 From ddb2ffdc474a3000887dc776b971d04bde29decc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 21 Mar 2024 13:01:58 -0300 Subject: libbpf: Define MFD_CLOEXEC if not available Since its going directly to the syscall to avoid not having memfd_create() available in some systems, do the same for its MFD_CLOEXEC flags, defining it if not available. This fixes the build in those systems, noticed while building perf on a set of build containers. Fixes: 9fa5e1a180aa639f ("libbpf: Call memfd_create() syscall directly") Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/ZfxZ9nCyKvwmpKkE@x1 --- tools/lib/bpf/libbpf.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools/lib/bpf/libbpf.c') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 1e3e697b98bc..a2061fcd612d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1650,6 +1650,10 @@ static int sys_memfd_create(const char *name, unsigned flags) return syscall(__NR_memfd_create, name, flags); } +#ifndef MFD_CLOEXEC +#define MFD_CLOEXEC 0x0001U +#endif + static int create_placeholder_fd(void) { int fd; -- cgit v1.2.3