summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/bpf_iter.c9
-rw-r--r--kernel/bpf/bpf_lsm.c81
-rw-r--r--kernel/bpf/bpf_struct_ops.c7
-rw-r--r--kernel/bpf/btf.c234
-rw-r--r--kernel/bpf/cgroup.c416
-rw-r--r--kernel/bpf/core.c32
-rw-r--r--kernel/bpf/helpers.c12
-rw-r--r--kernel/bpf/percpu_freelist.c20
-rw-r--r--kernel/bpf/syscall.c25
-rw-r--r--kernel/bpf/trampoline.c262
-rw-r--r--kernel/bpf/verifier.c287
-rw-r--r--kernel/events/core.c16
-rw-r--r--kernel/sysctl.c41
-rw-r--r--kernel/trace/bpf_trace.c4
-rw-r--r--kernel/trace/trace_uprobe.c7
15 files changed, 1184 insertions, 269 deletions
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index d5d96ceca105..7e8fd49406f6 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -723,9 +723,6 @@ const struct bpf_func_proto bpf_for_each_map_elem_proto = {
.arg4_type = ARG_ANYTHING,
};
-/* maximum number of loops */
-#define MAX_LOOPS BIT(23)
-
BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx,
u64, flags)
{
@@ -733,9 +730,13 @@ BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx,
u64 ret;
u32 i;
+ /* Note: these safety checks are also verified when bpf_loop
+ * is inlined, be careful to modify this code in sync. See
+ * function verifier.c:inline_bpf_loop.
+ */
if (flags)
return -EINVAL;
- if (nr_loops > MAX_LOOPS)
+ if (nr_loops > BPF_MAX_LOOPS)
return -E2BIG;
for (i = 0; i < nr_loops; i++) {
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index c1351df9f7ee..d469b7f3deef 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -16,6 +16,7 @@
#include <linux/bpf_local_storage.h>
#include <linux/btf_ids.h>
#include <linux/ima.h>
+#include <linux/bpf-cgroup.h>
/* For every LSM hook that allows attachment of BPF programs, declare a nop
* function where a BPF program can be attached.
@@ -35,6 +36,57 @@ BTF_SET_START(bpf_lsm_hooks)
#undef LSM_HOOK
BTF_SET_END(bpf_lsm_hooks)
+/* List of LSM hooks that should operate on 'current' cgroup regardless
+ * of function signature.
+ */
+BTF_SET_START(bpf_lsm_current_hooks)
+/* operate on freshly allocated sk without any cgroup association */
+BTF_ID(func, bpf_lsm_sk_alloc_security)
+BTF_ID(func, bpf_lsm_sk_free_security)
+BTF_SET_END(bpf_lsm_current_hooks)
+
+/* List of LSM hooks that trigger while the socket is properly locked.
+ */
+BTF_SET_START(bpf_lsm_locked_sockopt_hooks)
+BTF_ID(func, bpf_lsm_socket_sock_rcv_skb)
+BTF_ID(func, bpf_lsm_sock_graft)
+BTF_ID(func, bpf_lsm_inet_csk_clone)
+BTF_ID(func, bpf_lsm_inet_conn_established)
+BTF_SET_END(bpf_lsm_locked_sockopt_hooks)
+
+/* List of LSM hooks that trigger while the socket is _not_ locked,
+ * but it's ok to call bpf_{g,s}etsockopt because the socket is still
+ * in the early init phase.
+ */
+BTF_SET_START(bpf_lsm_unlocked_sockopt_hooks)
+BTF_ID(func, bpf_lsm_socket_post_create)
+BTF_ID(func, bpf_lsm_socket_socketpair)
+BTF_SET_END(bpf_lsm_unlocked_sockopt_hooks)
+
+void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
+ bpf_func_t *bpf_func)
+{
+ const struct btf_param *args;
+
+ if (btf_type_vlen(prog->aux->attach_func_proto) < 1 ||
+ btf_id_set_contains(&bpf_lsm_current_hooks,
+ prog->aux->attach_btf_id)) {
+ *bpf_func = __cgroup_bpf_run_lsm_current;
+ return;
+ }
+
+ args = btf_params(prog->aux->attach_func_proto);
+
+#ifdef CONFIG_NET
+ if (args[0].type == btf_sock_ids[BTF_SOCK_TYPE_SOCKET])
+ *bpf_func = __cgroup_bpf_run_lsm_socket;
+ else if (args[0].type == btf_sock_ids[BTF_SOCK_TYPE_SOCK])
+ *bpf_func = __cgroup_bpf_run_lsm_sock;
+ else
+#endif
+ *bpf_func = __cgroup_bpf_run_lsm_current;
+}
+
int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
const struct bpf_prog *prog)
{
@@ -158,6 +210,35 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return prog->aux->sleepable ? &bpf_ima_file_hash_proto : NULL;
case BPF_FUNC_get_attach_cookie:
return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto : NULL;
+ case BPF_FUNC_get_local_storage:
+ return prog->expected_attach_type == BPF_LSM_CGROUP ?
+ &bpf_get_local_storage_proto : NULL;
+ case BPF_FUNC_set_retval:
+ return prog->expected_attach_type == BPF_LSM_CGROUP ?
+ &bpf_set_retval_proto : NULL;
+ case BPF_FUNC_get_retval:
+ return prog->expected_attach_type == BPF_LSM_CGROUP ?
+ &bpf_get_retval_proto : NULL;
+ case BPF_FUNC_setsockopt:
+ if (prog->expected_attach_type != BPF_LSM_CGROUP)
+ return NULL;
+ if (btf_id_set_contains(&bpf_lsm_locked_sockopt_hooks,
+ prog->aux->attach_btf_id))
+ return &bpf_sk_setsockopt_proto;
+ if (btf_id_set_contains(&bpf_lsm_unlocked_sockopt_hooks,
+ prog->aux->attach_btf_id))
+ return &bpf_unlocked_sk_setsockopt_proto;
+ return NULL;
+ case BPF_FUNC_getsockopt:
+ if (prog->expected_attach_type != BPF_LSM_CGROUP)
+ return NULL;
+ if (btf_id_set_contains(&bpf_lsm_locked_sockopt_hooks,
+ prog->aux->attach_btf_id))
+ return &bpf_sk_getsockopt_proto;
+ if (btf_id_set_contains(&bpf_lsm_unlocked_sockopt_hooks,
+ prog->aux->attach_btf_id))
+ return &bpf_unlocked_sk_getsockopt_proto;
+ return NULL;
default:
return tracing_prog_func_proto(func_id, prog);
}
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index d9a3c9207240..7e0068c3399c 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -503,10 +503,9 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
goto unlock;
}
- /* Error during st_ops->reg(). It is very unlikely since
- * the above init_member() should have caught it earlier
- * before reg(). The only possibility is if there was a race
- * in registering the struct_ops (under the same name) to
+ /* Error during st_ops->reg(). Can happen if this struct_ops needs to be
+ * verified as a whole, after all init_member() calls. Can also happen if
+ * there was a race in registering the struct_ops (under the same name) to
* a sub-system through different struct_ops's maps.
*/
set_memory_nx((long)st_map->image, 1);
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index eb12d4f705cc..4423045b8ff3 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -309,6 +309,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
[BTF_KIND_FLOAT] = "FLOAT",
[BTF_KIND_DECL_TAG] = "DECL_TAG",
[BTF_KIND_TYPE_TAG] = "TYPE_TAG",
+ [BTF_KIND_ENUM64] = "ENUM64",
};
const char *btf_type_str(const struct btf_type *t)
@@ -666,6 +667,7 @@ static bool btf_type_has_size(const struct btf_type *t)
case BTF_KIND_ENUM:
case BTF_KIND_DATASEC:
case BTF_KIND_FLOAT:
+ case BTF_KIND_ENUM64:
return true;
}
@@ -711,6 +713,11 @@ static const struct btf_decl_tag *btf_type_decl_tag(const struct btf_type *t)
return (const struct btf_decl_tag *)(t + 1);
}
+static const struct btf_enum64 *btf_type_enum64(const struct btf_type *t)
+{
+ return (const struct btf_enum64 *)(t + 1);
+}
+
static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
{
return kind_ops[BTF_INFO_KIND(t->info)];
@@ -1019,6 +1026,7 @@ static const char *btf_show_name(struct btf_show *show)
parens = "{";
break;
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
prefix = "enum";
break;
default:
@@ -1834,6 +1842,7 @@ __btf_resolve_size(const struct btf *btf, const struct btf_type *type,
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
case BTF_KIND_FLOAT:
+ case BTF_KIND_ENUM64:
size = type->size;
goto resolved;
@@ -3670,6 +3679,7 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env,
{
const struct btf_enum *enums = btf_type_enum(t);
struct btf *btf = env->btf;
+ const char *fmt_str;
u16 i, nr_enums;
u32 meta_needed;
@@ -3683,11 +3693,6 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
- if (btf_type_kflag(t)) {
- btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
- return -EINVAL;
- }
-
if (t->size > 8 || !is_power_of_2(t->size)) {
btf_verifier_log_type(env, t, "Unexpected size");
return -EINVAL;
@@ -3718,7 +3723,8 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env,
if (env->log.level == BPF_LOG_KERNEL)
continue;
- btf_verifier_log(env, "\t%s val=%d\n",
+ fmt_str = btf_type_kflag(t) ? "\t%s val=%d\n" : "\t%s val=%u\n";
+ btf_verifier_log(env, fmt_str,
__btf_name_by_offset(btf, enums[i].name_off),
enums[i].val);
}
@@ -3759,7 +3765,10 @@ static void btf_enum_show(const struct btf *btf, const struct btf_type *t,
return;
}
- btf_show_type_value(show, "%d", v);
+ if (btf_type_kflag(t))
+ btf_show_type_value(show, "%d", v);
+ else
+ btf_show_type_value(show, "%u", v);
btf_show_end_type(show);
}
@@ -3772,6 +3781,109 @@ static struct btf_kind_operations enum_ops = {
.show = btf_enum_show,
};
+static s32 btf_enum64_check_meta(struct btf_verifier_env *env,
+ const struct btf_type *t,
+ u32 meta_left)
+{
+ const struct btf_enum64 *enums = btf_type_enum64(t);
+ struct btf *btf = env->btf;
+ const char *fmt_str;
+ u16 i, nr_enums;
+ u32 meta_needed;
+
+ nr_enums = btf_type_vlen(t);
+ meta_needed = nr_enums * sizeof(*enums);
+
+ if (meta_left < meta_needed) {
+ btf_verifier_log_basic(env, t,
+ "meta_left:%u meta_needed:%u",
+ meta_left, meta_needed);
+ return -EINVAL;
+ }
+
+ if (t->size > 8 || !is_power_of_2(t->size)) {
+ btf_verifier_log_type(env, t, "Unexpected size");
+ return -EINVAL;
+ }
+
+ /* enum type either no name or a valid one */
+ if (t->name_off &&
+ !btf_name_valid_identifier(env->btf, t->name_off)) {
+ btf_verifier_log_type(env, t, "Invalid name");
+ return -EINVAL;
+ }
+
+ btf_verifier_log_type(env, t, NULL);
+
+ for (i = 0; i < nr_enums; i++) {
+ if (!btf_name_offset_valid(btf, enums[i].name_off)) {
+ btf_verifier_log(env, "\tInvalid name_offset:%u",
+ enums[i].name_off);
+ return -EINVAL;
+ }
+
+ /* enum member must have a valid name */
+ if (!enums[i].name_off ||
+ !btf_name_valid_identifier(btf, enums[i].name_off)) {
+ btf_verifier_log_type(env, t, "Invalid name");
+ return -EINVAL;
+ }
+
+ if (env->log.level == BPF_LOG_KERNEL)
+ continue;
+
+ fmt_str = btf_type_kflag(t) ? "\t%s val=%lld\n" : "\t%s val=%llu\n";
+ btf_verifier_log(env, fmt_str,
+ __btf_name_by_offset(btf, enums[i].name_off),
+ btf_enum64_value(enums + i));
+ }
+
+ return meta_needed;
+}
+
+static void btf_enum64_show(const struct btf *btf, const struct btf_type *t,
+ u32 type_id, void *data, u8 bits_offset,
+ struct btf_show *show)
+{
+ const struct btf_enum64 *enums = btf_type_enum64(t);
+ u32 i, nr_enums = btf_type_vlen(t);
+ void *safe_data;
+ s64 v;
+
+ safe_data = btf_show_start_type(show, t, type_id, data);
+ if (!safe_data)
+ return;
+
+ v = *(u64 *)safe_data;
+
+ for (i = 0; i < nr_enums; i++) {
+ if (v != btf_enum64_value(enums + i))
+ continue;
+
+ btf_show_type_value(show, "%s",
+ __btf_name_by_offset(btf,
+ enums[i].name_off));
+
+ btf_show_end_type(show);
+ return;
+ }
+
+ if (btf_type_kflag(t))
+ btf_show_type_value(show, "%lld", v);
+ else
+ btf_show_type_value(show, "%llu", v);
+ btf_show_end_type(show);
+}
+
+static struct btf_kind_operations enum64_ops = {
+ .check_meta = btf_enum64_check_meta,
+ .resolve = btf_df_resolve,
+ .check_member = btf_enum_check_member,
+ .check_kflag_member = btf_enum_check_kflag_member,
+ .log_details = btf_enum_log,
+ .show = btf_enum64_show,
+};
+
static s32 btf_func_proto_check_meta(struct btf_verifier_env *env,
const struct btf_type *t,
u32 meta_left)
@@ -4438,6 +4550,7 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = {
[BTF_KIND_FLOAT] = &float_ops,
[BTF_KIND_DECL_TAG] = &decl_tag_ops,
[BTF_KIND_TYPE_TAG] = &modifier_ops,
+ [BTF_KIND_ENUM64] = &enum64_ops,
};
static s32 btf_check_meta(struct btf_verifier_env *env,
@@ -5255,6 +5368,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
if (arg == nr_args) {
switch (prog->expected_attach_type) {
+ case BPF_LSM_CGROUP:
case BPF_LSM_MAC:
case BPF_TRACE_FEXIT:
/* When LSM programs are attached to void LSM hooks
@@ -5304,7 +5418,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
/* skip modifiers */
while (btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type);
- if (btf_type_is_small_int(t) || btf_type_is_enum(t))
+ if (btf_type_is_small_int(t) || btf_is_any_enum(t))
/* accessing a scalar */
return true;
if (!btf_type_is_ptr(t)) {
@@ -5768,7 +5882,7 @@ static int __get_type_size(struct btf *btf, u32 btf_id,
if (btf_type_is_ptr(t))
/* kernel size of pointer. Not BPF's size of pointer*/
return sizeof(void *);
- if (btf_type_is_int(t) || btf_type_is_enum(t))
+ if (btf_type_is_int(t) || btf_is_any_enum(t))
return t->size;
*bad_type = t;
return -EINVAL;
@@ -5916,7 +6030,7 @@ static int btf_check_func_type_match(struct bpf_verifier_log *log,
* to context only. And only global functions can be replaced.
* Hence type check only those types.
*/
- if (btf_type_is_int(t1) || btf_type_is_enum(t1))
+ if (btf_type_is_int(t1) || btf_is_any_enum(t1))
continue;
if (!btf_type_is_ptr(t1)) {
bpf_log(log,
@@ -6414,7 +6528,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
t = btf_type_by_id(btf, t->type);
while (btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type);
- if (!btf_type_is_int(t) && !btf_type_is_enum(t)) {
+ if (!btf_type_is_int(t) && !btf_is_any_enum(t)) {
bpf_log(log,
"Global function %s() doesn't return scalar. Only those are supported.\n",
tname);
@@ -6429,7 +6543,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
t = btf_type_by_id(btf, args[i].type);
while (btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type);
- if (btf_type_is_int(t) || btf_type_is_enum(t)) {
+ if (btf_type_is_int(t) || btf_is_any_enum(t)) {
reg->type = SCALAR_VALUE;
continue;
}
@@ -7308,95 +7422,15 @@ EXPORT_SYMBOL_GPL(register_btf_id_dtor_kfuncs);
#define MAX_TYPES_ARE_COMPAT_DEPTH 2
-static
-int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
- const struct btf *targ_btf, __u32 targ_id,
- int level)
-{
- const struct btf_type *local_type, *targ_type;
- int depth = 32; /* max recursion depth */
-
- /* caller made sure that names match (ignoring flavor suffix) */
- local_type = btf_type_by_id(local_btf, local_id);
- targ_type = btf_type_by_id(targ_btf, targ_id);
- if (btf_kind(local_type) != btf_kind(targ_type))
- return 0;
-
-recur:
- depth--;
- if (depth < 0)
- return -EINVAL;
-
- local_type = btf_type_skip_modifiers(local_btf, local_id, &local_id);
- targ_type = btf_type_skip_modifiers(targ_btf, targ_id, &targ_id);
- if (!local_type || !targ_type)
- return -EINVAL;
-
- if (btf_kind(local_type) != btf_kind(targ_type))
- return 0;
-
- switch (btf_kind(local_type)) {
- case BTF_KIND_UNKN:
- case BTF_KIND_STRUCT:
- case BTF_KIND_UNION:
- case BTF_KIND_ENUM:
- case BTF_KIND_FWD:
- return 1;
- case BTF_KIND_INT:
- /* just reject deprecated bitfield-like integers; all other
- * integers are by default compatible between each other
- */
- return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
- case BTF_KIND_PTR:
- local_id = local_type->type;
- targ_id = targ_type->type;
- goto recur;
- case BTF_KIND_ARRAY:
- local_id = btf_array(local_type)->type;
- targ_id = btf_array(targ_type)->type;
- goto recur;
- case BTF_KIND_FUNC_PROTO: {
- struct btf_param *local_p = btf_params(local_type);
- struct btf_param *targ_p = btf_params(targ_type);
- __u16 local_vlen = btf_vlen(local_type);
- __u16 targ_vlen = btf_vlen(targ_type);
- int i, err;
-
- if (local_vlen != targ_vlen)
- return 0;
-
- for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
- if (level <= 0)
- return -EINVAL;
-
- btf_type_skip_modifiers(local_btf, local_p->type, &local_id);
- btf_type_skip_modifiers(targ_btf, targ_p->type, &targ_id);
- err = __bpf_core_types_are_compat(local_btf, local_id,
- targ_btf, targ_id,
- level - 1);
- if (err <= 0)
- return err;
- }
-
- /* tail recurse for return type check */
- btf_type_skip_modifiers(local_btf, local_type->type, &local_id);
- btf_type_skip_modifiers(targ_btf, targ_type->type, &targ_id);
- goto recur;
- }
- default:
- return 0;
- }
-}
-
/* Check local and target types for compatibility. This check is used for
* type-based CO-RE relocations and follow slightly different rules than
* field-based relocations. This function assumes that root types were already
* checked for name match. Beyond that initial root-level name check, names
* are completely ignored. Compatibility rules are as follows:
- * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
+ * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs/ENUM64s are considered compatible, but
* kind should match for local and target types (i.e., STRUCT is not
* compatible with UNION);
- * - for ENUMs, the size is ignored;
+ * - for ENUMs/ENUM64s, the size is ignored;
* - for INT, size and signedness are ignored;
* - for ARRAY, dimensionality is ignored, element types are checked for
* compatibility recursively;
@@ -7410,11 +7444,19 @@ recur:
int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
const struct btf *targ_btf, __u32 targ_id)
{
- return __bpf_core_types_are_compat(local_btf, local_id,
- targ_btf, targ_id,
+ return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id,
MAX_TYPES_ARE_COMPAT_DEPTH);
}
+#define MAX_TYPES_MATCH_DEPTH 2
+
+int bpf_core_types_match(const struct btf *local_btf, u32 local_id,
+ const struct btf *targ_btf, u32 targ_id)
+{
+ return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false,
+ MAX_TYPES_MATCH_DEPTH);
+}
+
static bool bpf_core_is_flavor_sep(const char *s)
{
/* check X___Y name pattern, where X and Y are not underscores */
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index afb414b26d01..59b7eb60d5b4 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -14,6 +14,8 @@
#include <linux/string.h>
#include <linux/bpf.h>
#include <linux/bpf-cgroup.h>
+#include <linux/bpf_lsm.h>
+#include <linux/bpf_verifier.h>
#include <net/sock.h>
#include <net/bpf_sk_storage.h>
@@ -61,6 +63,132 @@ bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
return run_ctx.retval;
}
+unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx,
+ const struct bpf_insn *insn)
+{
+ const struct bpf_prog *shim_prog;
+ struct sock *sk;
+ struct cgroup *cgrp;
+ int ret = 0;
+ u64 *args;
+
+ args = (u64 *)ctx;
+ sk = (void *)(unsigned long)args[0];
+ /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
+ shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
+
+ cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+ if (likely(cgrp))
+ ret = bpf_prog_run_array_cg(&cgrp->bpf,
+ shim_prog->aux->cgroup_atype,
+ ctx, bpf_prog_run, 0, NULL);
+ return ret;
+}
+
+unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx,
+ const struct bpf_insn *insn)
+{
+ const struct bpf_prog *shim_prog;
+ struct socket *sock;
+ struct cgroup *cgrp;
+ int ret = 0;
+ u64 *args;
+
+ args = (u64 *)ctx;
+ sock = (void *)(unsigned long)args[0];
+ /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
+ shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
+
+ cgrp = sock_cgroup_ptr(&sock->sk->sk_cgrp_data);
+ if (likely(cgrp))
+ ret = bpf_prog_run_array_cg(&cgrp->bpf,
+ shim_prog->aux->cgroup_atype,
+ ctx, bpf_prog_run, 0, NULL);
+ return ret;
+}
+
+unsigned int __cgroup_bpf_run_lsm_current(const void *ctx,
+ const struct bpf_insn *insn)
+{
+ const struct bpf_prog *shim_prog;
+ struct cgroup *cgrp;
+ int ret = 0;
+
+ /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
+ shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
+
+ /* We rely on trampoline's __bpf_prog_enter_lsm_cgroup to grab RCU read lock. */
+ cgrp = task_dfl_cgroup(current);
+ if (likely(cgrp))
+ ret = bpf_prog_run_array_cg(&cgrp->bpf,
+ shim_prog->aux->cgroup_atype,
+ ctx, bpf_prog_run, 0, NULL);
+ return ret;
+}
+
+#ifdef CONFIG_BPF_LSM
+struct cgroup_lsm_atype {
+ u32 attach_btf_id;
+ int refcnt;
+};
+
+static struct cgroup_lsm_atype cgroup_lsm_atype[CGROUP_LSM_NUM];
+
+static enum cgroup_bpf_attach_type
+bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id)
+{
+ int i;
+
+ lockdep_assert_held(&cgroup_mutex);
+
+ if (attach_type != BPF_LSM_CGROUP)
+ return to_cgroup_bpf_attach_type(attach_type);
+
+ for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype); i++)
+ if (cgroup_lsm_atype[i].attach_btf_id == attach_btf_id)
+ return CGROUP_LSM_START + i;
+
+ for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype); i++)
+ if (cgroup_lsm_atype[i].attach_btf_id == 0)
+ return CGROUP_LSM_START + i;
+
+ return -E2BIG;
+
+}
+
+void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype)
+{
+ int i = cgroup_atype - CGROUP_LSM_START;
+
+ lockdep_assert_held(&cgroup_mutex);
+
+ WARN_ON_ONCE(cgroup_lsm_atype[i].attach_btf_id &&
+ cgroup_lsm_atype[i].attach_btf_id != attach_btf_id);
+
+ cgroup_lsm_atype[i].attach_btf_id = attach_btf_id;
+ cgroup_lsm_atype[i].refcnt++;
+}
+
+void bpf_cgroup_atype_put(int cgroup_atype)
+{
+ int i = cgroup_atype - CGROUP_LSM_START;
+
+ mutex_lock(&cgroup_mutex);
+ if (--cgroup_lsm_atype[i].refcnt <= 0)
+ cgroup_lsm_atype[i].attach_btf_id = 0;
+ WARN_ON_ONCE(cgroup_lsm_atype[i].refcnt < 0);
+ mutex_unlock(&cgroup_mutex);
+}
+#else
+static enum cgroup_bpf_attach_type
+bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id)
+{
+ if (attach_type != BPF_LSM_CGROUP)
+ return to_cgroup_bpf_attach_type(attach_type);
+ return -EOPNOTSUPP;
+}
+#endif /* CONFIG_BPF_LSM */
+
void cgroup_bpf_offline(struct cgroup *cgrp)
{
cgroup_get(cgrp);
@@ -157,15 +285,22 @@ static void cgroup_bpf_release(struct work_struct *work)
mutex_lock(&cgroup_mutex);
for (atype = 0; atype < ARRAY_SIZE(cgrp->bpf.progs); atype++) {
- struct list_head *progs = &cgrp->bpf.progs[atype];
- struct bpf_prog_list *pl, *pltmp;
+ struct hlist_head *progs = &cgrp->bpf.progs[atype];
+ struct bpf_prog_list *pl;
+ struct hlist_node *pltmp;
- list_for_each_entry_safe(pl, pltmp, progs, node) {
- list_del(&pl->node);
- if (pl->prog)
+ hlist_for_each_entry_safe(pl, pltmp, progs, node) {
+ hlist_del(&pl->node);
+ if (pl->prog) {
+ if (pl->prog->expected_attach_type == BPF_LSM_CGROUP)
+ bpf_trampoline_unlink_cgroup_shim(pl->prog);
bpf_prog_put(pl->prog);
- if (pl->link)
+ }
+ if (pl->link) {
+ if (pl->link->link.prog->expected_attach_type == BPF_LSM_CGROUP)
+ bpf_trampoline_unlink_cgroup_shim(pl->link->link.prog);
bpf_cgroup_link_auto_detach(pl->link);
+ }
kfree(pl);
static_branch_dec(&cgroup_bpf_enabled_key[atype]);
}
@@ -217,12 +352,12 @@ static struct bpf_prog *prog_list_prog(struct bpf_prog_list *pl)
/* count number of elements in the list.
* it's slow but the list cannot be long
*/
-static u32 prog_list_length(struct list_head *head)
+static u32 prog_list_length(struct hlist_head *head)
{
struct bpf_prog_list *pl;
u32 cnt = 0;
- list_for_each_entry(pl, head, node) {
+ hlist_for_each_entry(pl, head, node) {
if (!prog_list_prog(pl))
continue;
cnt++;
@@ -291,7 +426,7 @@ static int compute_effective_progs(struct cgroup *cgrp,
if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
continue;
- list_for_each_entry(pl, &p->bpf.progs[atype], node) {
+ hlist_for_each_entry(pl, &p->bpf.progs[atype], node) {
if (!prog_list_prog(pl))
continue;
@@ -342,7 +477,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
cgroup_bpf_get(p);
for (i = 0; i < NR; i++)
- INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
+ INIT_HLIST_HEAD(&cgrp->bpf.progs[i]);
INIT_LIST_HEAD(&cgrp->bpf.storages);
@@ -418,7 +553,7 @@ cleanup:
#define BPF_CGROUP_MAX_PROGS 64
-static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
+static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs,
struct bpf_prog *prog,
struct bpf_cgroup_link *link,
struct bpf_prog *replace_prog,
@@ -428,12 +563,12 @@ static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
/* single-attach case */
if (!allow_multi) {
- if (list_empty(progs))
+ if (hlist_empty(progs))
return NULL;
- return list_first_entry(progs, typeof(*pl), node);
+ return hlist_entry(progs->first, typeof(*pl), node);
}
- list_for_each_entry(pl, progs, node) {
+ hlist_for_each_entry(pl, progs, node) {
if (prog && pl->prog == prog && prog != replace_prog)
/* disallow attaching the same prog twice */
return ERR_PTR(-EINVAL);
@@ -444,7 +579,7 @@ static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
/* direct prog multi-attach w/ replacement case */
if (replace_prog) {
- list_for_each_entry(pl, progs, node) {
+ hlist_for_each_entry(pl, progs, node) {
if (pl->prog == replace_prog)
/* a match found */
return pl;
@@ -478,9 +613,10 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
struct bpf_prog *old_prog = NULL;
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
+ struct bpf_prog *new_prog = prog ? : link->link.prog;
enum cgroup_bpf_attach_type atype;
struct bpf_prog_list *pl;
- struct list_head *progs;
+ struct hlist_head *progs;
int err;
if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) ||
@@ -494,7 +630,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
/* replace_prog implies BPF_F_REPLACE, and vice versa */
return -EINVAL;
- atype = to_cgroup_bpf_attach_type(type);
+ atype = bpf_cgroup_atype_find(type, new_prog->aux->attach_btf_id);
if (atype < 0)
return -EINVAL;
@@ -503,7 +639,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
if (!hierarchy_allows_attach(cgrp, atype))
return -EPERM;
- if (!list_empty(progs) && cgrp->bpf.flags[atype] != saved_flags)
+ if (!hlist_empty(progs) && cgrp->bpf.flags[atype] != saved_flags)
/* Disallow attaching non-overridable on top
* of existing overridable in this cgroup.
* Disallow attaching multi-prog if overridable or none
@@ -525,12 +661,22 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
if (pl) {
old_prog = pl->prog;
} else {
+ struct hlist_node *last = NULL;
+
pl = kmalloc(sizeof(*pl), GFP_KERNEL);
if (!pl) {
bpf_cgroup_storages_free(new_storage);
return -ENOMEM;
}
- list_add_tail(&pl->node, progs);
+ if (hlist_empty(progs))
+ hlist_add_head(&pl->node, progs);
+ else
+ hlist_for_each(last, progs) {
+ if (last->next)
+ continue;
+ hlist_add_behind(&pl->node, last);
+ break;
+ }
}
pl->prog = prog;
@@ -538,17 +684,30 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
bpf_cgroup_storages_assign(pl->storage, storage);
cgrp->bpf.flags[atype] = saved_flags;
+ if (type == BPF_LSM_CGROUP) {
+ err = bpf_trampoline_link_cgroup_shim(new_prog, atype);
+ if (err)
+ goto cleanup;
+ }
+
err = update_effective_progs(cgrp, atype);
if (err)
- goto cleanup;
+ goto cleanup_trampoline;
- if (old_prog)
+ if (old_prog) {
+ if (type == BPF_LSM_CGROUP)
+ bpf_trampoline_unlink_cgroup_shim(old_prog);
bpf_prog_put(old_prog);
- else
+ } else {
static_branch_inc(&cgroup_bpf_enabled_key[atype]);
+ }
bpf_cgroup_storages_link(new_storage, cgrp, type);
return 0;
+cleanup_trampoline:
+ if (type == BPF_LSM_CGROUP)
+ bpf_trampoline_unlink_cgroup_shim(new_prog);
+
cleanup:
if (old_prog) {
pl->prog = old_prog;
@@ -556,7 +715,7 @@ cleanup:
}
bpf_cgroup_storages_free(new_storage);
if (!old_prog) {
- list_del(&pl->node);
+ hlist_del(&pl->node);
kfree(pl);
}
return err;
@@ -587,7 +746,7 @@ static void replace_effective_prog(struct cgroup *cgrp,
struct cgroup_subsys_state *css;
struct bpf_prog_array *progs;
struct bpf_prog_list *pl;
- struct list_head *head;
+ struct hlist_head *head;
struct cgroup *cg;
int pos;
@@ -603,7 +762,7 @@ static void replace_effective_prog(struct cgroup *cgrp,
continue;
head = &cg->bpf.progs[atype];
- list_for_each_entry(pl, head, node) {
+ hlist_for_each_entry(pl, head, node) {
if (!prog_list_prog(pl))
continue;
if (pl->link == link)
@@ -637,10 +796,10 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
enum cgroup_bpf_attach_type atype;
struct bpf_prog *old_prog;
struct bpf_prog_list *pl;
- struct list_head *progs;
+ struct hlist_head *progs;
bool found = false;
- atype = to_cgroup_bpf_attach_type(link->type);
+ atype = bpf_cgroup_atype_find(link->type, new_prog->aux->attach_btf_id);
if (atype < 0)
return -EINVAL;
@@ -649,7 +808,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
if (link->link.prog->type != new_prog->type)
return -EINVAL;
- list_for_each_entry(pl, progs, node) {
+ hlist_for_each_entry(pl, progs, node) {
if (pl->link == link) {
found = true;
break;
@@ -688,7 +847,7 @@ out_unlock:
return ret;
}
-static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
+static struct bpf_prog_list *find_detach_entry(struct hlist_head *progs,
struct bpf_prog *prog,
struct bpf_cgroup_link *link,
bool allow_multi)
@@ -696,14 +855,14 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
struct bpf_prog_list *pl;
if (!allow_multi) {
- if (list_empty(progs))
+ if (hlist_empty(progs))
/* report error when trying to detach and nothing is attached */
return ERR_PTR(-ENOENT);
/* to maintain backward compatibility NONE and OVERRIDE cgroups
* allow detaching with invalid FD (prog==NULL) in legacy mode
*/
- return list_first_entry(progs, typeof(*pl), node);
+ return hlist_entry(progs->first, typeof(*pl), node);
}
if (!prog && !link)
@@ -713,7 +872,7 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
return ERR_PTR(-EINVAL);
/* find the prog or link and detach it */
- list_for_each_entry(pl, progs, node) {
+ hlist_for_each_entry(pl, progs, node) {
if (pl->prog == prog && pl->link == link)
return pl;
}
@@ -721,6 +880,60 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
}
/**
+ * purge_effective_progs() - After compute_effective_progs fails to alloc new
+ * cgrp->bpf.inactive table we can recover by
+ * recomputing the array in place.
+ *
+ * @cgrp: The cgroup which descendants to travers
+ * @prog: A program to detach or NULL
+ * @link: A link to detach or NULL
+ * @atype: Type of detach operation
+ */
+static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
+ struct bpf_cgroup_link *link,
+ enum cgroup_bpf_attach_type atype)
+{
+ struct cgroup_subsys_state *css;
+ struct bpf_prog_array *progs;
+ struct bpf_prog_list *pl;
+ struct hlist_head *head;
+ struct cgroup *cg;
+ int pos;
+
+ /* recompute effective prog array in place */
+ css_for_each_descendant_pre(css, &cgrp->self) {
+ struct cgroup *desc = container_of(css, struct cgroup, self);
+
+ if (percpu_ref_is_zero(&desc->bpf.refcnt))
+ continue;
+
+ /* find position of link or prog in effective progs array */
+ for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
+ if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
+ continue;
+
+ head = &cg->bpf.progs[atype];
+ hlist_for_each_entry(pl, head, node) {
+ if (!prog_list_prog(pl))
+ continue;
+ if (pl->prog == prog && pl->link == link)
+ goto found;
+ pos++;
+ }
+ }
+found:
+ BUG_ON(!cg);
+ progs = rcu_dereference_protected(
+ desc->bpf.effective[atype],
+ lockdep_is_held(&cgroup_mutex));
+
+ /* Remove the program from the array */
+ WARN_ONCE(bpf_prog_array_delete_safe_at(progs, pos),
+ "Failed to purge a prog from array at index %d", pos);
+ }
+}
+
+/**
* __cgroup_bpf_detach() - Detach the program or link from a cgroup, and
* propagate the change to descendants
* @cgrp: The cgroup which descendants to traverse
@@ -737,11 +950,16 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
enum cgroup_bpf_attach_type atype;
struct bpf_prog *old_prog;
struct bpf_prog_list *pl;
- struct list_head *progs;
+ struct hlist_head *progs;
+ u32 attach_btf_id = 0;
u32 flags;
- int err;
- atype = to_cgroup_bpf_attach_type(type);
+ if (prog)
+ attach_btf_id = prog->aux->attach_btf_id;
+ if (link)
+ attach_btf_id = link->link.prog->aux->attach_btf_id;
+
+ atype = bpf_cgroup_atype_find(type, attach_btf_id);
if (atype < 0)
return -EINVAL;
@@ -761,26 +979,27 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
pl->prog = NULL;
pl->link = NULL;
- err = update_effective_progs(cgrp, atype);
- if (err)
- goto cleanup;
+ if (update_effective_progs(cgrp, atype)) {
+ /* if update effective array failed replace the prog with a dummy prog*/
+ pl->prog = old_prog;
+ pl->link = link;
+ purge_effective_progs(cgrp, old_prog, link, atype);
+ }
/* now can actually delete it from this cgroup list */
- list_del(&pl->node);
+ hlist_del(&pl->node);
+
kfree(pl);
- if (list_empty(progs))
+ if (hlist_empty(progs))
/* last program was detached, reset flags to zero */
cgrp->bpf.flags[atype] = 0;
- if (old_prog)
+ if (old_prog) {
+ if (type == BPF_LSM_CGROUP)
+ bpf_trampoline_unlink_cgroup_shim(old_prog);
bpf_prog_put(old_prog);
+ }
static_branch_dec(&cgroup_bpf_enabled_key[atype]);
return 0;
-
-cleanup:
- /* restore back prog or link */
- pl->prog = old_prog;
- pl->link = link;
- return err;
}
static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
@@ -798,57 +1017,90 @@ static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
+ __u32 __user *prog_attach_flags = u64_to_user_ptr(attr->query.prog_attach_flags);
__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
enum bpf_attach_type type = attr->query.attach_type;
+ enum cgroup_bpf_attach_type from_atype, to_atype;
enum cgroup_bpf_attach_type atype;
struct bpf_prog_array *effective;
- struct list_head *progs;
- struct bpf_prog *prog;
int cnt, ret = 0, i;
+ int total_cnt = 0;
u32 flags;
- atype = to_cgroup_bpf_attach_type(type);
- if (atype < 0)
- return -EINVAL;
-
- progs = &cgrp->bpf.progs[atype];
- flags = cgrp->bpf.flags[atype];
+ if (type == BPF_LSM_CGROUP) {
+ if (attr->query.prog_cnt && prog_ids && !prog_attach_flags)
+ return -EINVAL;
- effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
- lockdep_is_held(&cgroup_mutex));
+ from_atype = CGROUP_LSM_START;
+ to_atype = CGROUP_LSM_END;
+ flags = 0;
+ } else {
+ from_atype = to_cgroup_bpf_attach_type(type);
+ if (from_atype < 0)
+ return -EINVAL;
+ to_atype = from_atype;
+ flags = cgrp->bpf.flags[from_atype];
+ }
- if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
- cnt = bpf_prog_array_length(effective);
- else
- cnt = prog_list_length(progs);
+ for (atype = from_atype; atype <= to_atype; atype++) {
+ if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
+ effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
+ lockdep_is_held(&cgroup_mutex));
+ total_cnt += bpf_prog_array_length(effective);
+ } else {
+ total_cnt += prog_list_length(&cgrp->bpf.progs[atype]);
+ }
+ }
if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
return -EFAULT;
- if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt)))
+ if (copy_to_user(&uattr->query.prog_cnt, &total_cnt, sizeof(total_cnt)))
return -EFAULT;
- if (attr->query.prog_cnt == 0 || !prog_ids || !cnt)
+ if (attr->query.prog_cnt == 0 || !prog_ids || !total_cnt)
/* return early if user requested only program count + flags */
return 0;
- if (attr->query.prog_cnt < cnt) {
- cnt = attr->query.prog_cnt;
+
+ if (attr->query.prog_cnt < total_cnt) {
+ total_cnt = attr->query.prog_cnt;
ret = -ENOSPC;
}
- if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
- return bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
- } else {
- struct bpf_prog_list *pl;
- u32 id;
-
- i = 0;
- list_for_each_entry(pl, progs, node) {
- prog = prog_list_prog(pl);
- id = prog->aux->id;
- if (copy_to_user(prog_ids + i, &id, sizeof(id)))
- return -EFAULT;
- if (++i == cnt)
- break;
+ for (atype = from_atype; atype <= to_atype && total_cnt; atype++) {
+ if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
+ effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
+ lockdep_is_held(&cgroup_mutex));
+ cnt = min_t(int, bpf_prog_array_length(effective), total_cnt);
+ ret = bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
+ } else {
+ struct hlist_head *progs;
+ struct bpf_prog_list *pl;
+ struct bpf_prog *prog;
+ u32 id;
+
+ progs = &cgrp->bpf.progs[atype];
+ cnt = min_t(int, prog_list_length(progs), total_cnt);
+ i = 0;
+ hlist_for_each_entry(pl, progs, node) {
+ prog = prog_list_prog(pl);
+ id = prog->aux->id;
+ if (copy_to_user(prog_ids + i, &id, sizeof(id)))
+ return -EFAULT;
+ if (++i == cnt)
+ break;
+ }
}
+
+ if (prog_attach_flags) {
+ flags = cgrp->bpf.flags[atype];
+
+ for (i = 0; i < cnt; i++)
+ if (copy_to_user(prog_attach_flags + i, &flags, sizeof(flags)))
+ return -EFAULT;
+ prog_attach_flags += cnt;
+ }
+
+ prog_ids += cnt;
+ total_cnt -= cnt;
}
return ret;
}
@@ -937,6 +1189,8 @@ static void bpf_cgroup_link_release(struct bpf_link *link)
WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link,
cg_link->type));
+ if (cg_link->type == BPF_LSM_CGROUP)
+ bpf_trampoline_unlink_cgroup_shim(cg_link->link.prog);
cg = cg_link->cgroup;
cg_link->cgroup = NULL;
@@ -1281,7 +1535,7 @@ BPF_CALL_0(bpf_get_retval)
return ctx->retval;
}
-static const struct bpf_func_proto bpf_get_retval_proto = {
+const struct bpf_func_proto bpf_get_retval_proto = {
.func = bpf_get_retval,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1296,7 +1550,7 @@ BPF_CALL_1(bpf_set_retval, int, retval)
return 0;
}
-static const struct bpf_func_proto bpf_set_retval_proto = {
+const struct bpf_func_proto bpf_set_retval_proto = {
.func = bpf_set_retval,
.gpl_only = false,
.ret_type = RET_INTEGER,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index e7961508a47d..bfeb9b937315 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -109,6 +109,9 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
fp->aux->prog = fp;
fp->jit_requested = ebpf_jit_enabled();
fp->blinding_requested = bpf_jit_blinding_enabled(fp);
+#ifdef CONFIG_CGROUP_BPF
+ aux->cgroup_atype = CGROUP_BPF_ATTACH_TYPE_INVALID;
+#endif
INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode);
mutex_init(&fp->aux->used_maps_mutex);
@@ -178,7 +181,7 @@ void bpf_prog_jit_attempt_done(struct bpf_prog *prog)
* here is relative to the prog itself instead of the main prog.
* This array has one entry for each xlated bpf insn.
*
- * jited_off is the byte off to the last byte of the jited insn.
+ * jited_off is the byte off to the end of the jited insn.
*
* Hence, with
* insn_start:
@@ -2281,6 +2284,21 @@ void bpf_prog_array_free(struct bpf_prog_array *progs)
kfree_rcu(progs, rcu);
}
+static void __bpf_prog_array_free_sleepable_cb(struct rcu_head *rcu)
+{
+ struct bpf_prog_array *progs;
+
+ progs = container_of(rcu, struct bpf_prog_array, rcu);
+ kfree_rcu(progs, rcu);
+}
+
+void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs)
+{
+ if (!progs || progs == &bpf_empty_prog_array.hdr)
+ return;
+ call_rcu_tasks_trace(&progs->rcu, __bpf_prog_array_free_sleepable_cb);
+}
+
int bpf_prog_array_length(struct bpf_prog_array *array)
{
struct bpf_prog_array_item *item;
@@ -2557,6 +2575,10 @@ static void bpf_prog_free_deferred(struct work_struct *work)
#ifdef CONFIG_BPF_SYSCALL
bpf_free_kfunc_btf_tab(aux->kfunc_btf_tab);
#endif
+#ifdef CONFIG_CGROUP_BPF
+ if (aux->cgroup_atype != CGROUP_BPF_ATTACH_TYPE_INVALID)
+ bpf_cgroup_atype_put(aux->cgroup_atype);
+#endif
bpf_free_used_maps(aux);
bpf_free_used_btfs(aux);
if (bpf_prog_is_dev_bound(aux))
@@ -2653,6 +2675,8 @@ const struct bpf_func_proto bpf_get_local_storage_proto __weak;
const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak;
const struct bpf_func_proto bpf_snprintf_btf_proto __weak;
const struct bpf_func_proto bpf_seq_printf_btf_proto __weak;
+const struct bpf_func_proto bpf_set_retval_proto __weak;
+const struct bpf_func_proto bpf_get_retval_proto __weak;
const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
{
@@ -2716,6 +2740,12 @@ bool __weak bpf_jit_needs_zext(void)
return false;
}
+/* Return TRUE if the JIT backend supports mixing bpf2bpf and tailcalls. */
+bool __weak bpf_jit_supports_subprog_tailcalls(void)
+{
+ return false;
+}
+
bool __weak bpf_jit_supports_kfunc_call(void)
{
return false;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index bb1254f07667..1f961f9982d2 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -584,7 +584,7 @@ BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2)
return strncmp(s1, s2, s1_sz);
}
-const struct bpf_func_proto bpf_strncmp_proto = {
+static const struct bpf_func_proto bpf_strncmp_proto = {
.func = bpf_strncmp,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1402,7 +1402,7 @@ BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr)
*/
#define BPF_PTR_POISON ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA))
-const struct bpf_func_proto bpf_kptr_xchg_proto = {
+static const struct bpf_func_proto bpf_kptr_xchg_proto = {
.func = bpf_kptr_xchg,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
@@ -1487,7 +1487,7 @@ error:
return err;
}
-const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
+static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
.func = bpf_dynptr_from_mem,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1514,7 +1514,7 @@ BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src
return 0;
}
-const struct bpf_func_proto bpf_dynptr_read_proto = {
+static const struct bpf_func_proto bpf_dynptr_read_proto = {
.func = bpf_dynptr_read,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1542,7 +1542,7 @@ BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *,
return 0;
}
-const struct bpf_func_proto bpf_dynptr_write_proto = {
+static const struct bpf_func_proto bpf_dynptr_write_proto = {
.func = bpf_dynptr_write,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1570,7 +1570,7 @@ BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len
return (unsigned long)(ptr->data + ptr->offset + offset);
}
-const struct bpf_func_proto bpf_dynptr_data_proto = {
+static const struct bpf_func_proto bpf_dynptr_data_proto = {
.func = bpf_dynptr_data,
.gpl_only = false,
.ret_type = RET_PTR_TO_DYNPTR_MEM_OR_NULL,
diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c
index 3d897de89061..00b874c8e889 100644
--- a/kernel/bpf/percpu_freelist.c
+++ b/kernel/bpf/percpu_freelist.c
@@ -31,7 +31,7 @@ static inline void pcpu_freelist_push_node(struct pcpu_freelist_head *head,
struct pcpu_freelist_node *node)
{
node->next = head->first;
- head->first = node;
+ WRITE_ONCE(head->first, node);
}
static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head,
@@ -130,14 +130,17 @@ static struct pcpu_freelist_node *___pcpu_freelist_pop(struct pcpu_freelist *s)
orig_cpu = cpu = raw_smp_processor_id();
while (1) {
head = per_cpu_ptr(s->freelist, cpu);
+ if (!READ_ONCE(head->first))
+ goto next_cpu;
raw_spin_lock(&head->lock);
node = head->first;
if (node) {
- head->first = node->next;
+ WRITE_ONCE(head->first, node->next);
raw_spin_unlock(&head->lock);
return node;
}
raw_spin_unlock(&head->lock);
+next_cpu:
cpu = cpumask_next(cpu, cpu_possible_mask);
if (cpu >= nr_cpu_ids)
cpu = 0;
@@ -146,10 +149,12 @@ static struct pcpu_freelist_node *___pcpu_freelist_pop(struct pcpu_freelist *s)
}
/* per cpu lists are all empty, try extralist */
+ if (!READ_ONCE(s->extralist.first))
+ return NULL;
raw_spin_lock(&s->extralist.lock);
node = s->extralist.first;
if (node)
- s->extralist.first = node->next;
+ WRITE_ONCE(s->extralist.first, node->next);
raw_spin_unlock(&s->extralist.lock);
return node;
}
@@ -164,15 +169,18 @@ ___pcpu_freelist_pop_nmi(struct pcpu_freelist *s)
orig_cpu = cpu = raw_smp_processor_id();
while (1) {
head = per_cpu_ptr(s->freelist, cpu);
+ if (!READ_ONCE(head->first))
+ goto next_cpu;
if (raw_spin_trylock(&head->lock)) {
node = head->first;
if (node) {
- head->first = node->next;
+ WRITE_ONCE(head->first, node->next);
raw_spin_unlock(&head->lock);
return node;
}
raw_spin_unlock(&head->lock);
}
+next_cpu:
cpu = cpumask_next(cpu, cpu_possible_mask);
if (cpu >= nr_cpu_ids)
cpu = 0;
@@ -181,11 +189,11 @@ ___pcpu_freelist_pop_nmi(struct pcpu_freelist *s)
}
/* cannot pop from per cpu lists, try extralist */
- if (!raw_spin_trylock(&s->extralist.lock))
+ if (!READ_ONCE(s->extralist.first) || !raw_spin_trylock(&s->extralist.lock))
return NULL;
node = s->extralist.first;
if (node)
- s->extralist.first = node->next;
+ WRITE_ONCE(s->extralist.first, node->next);
raw_spin_unlock(&s->extralist.lock);
return node;
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2b69306d3c6e..ab688d85b2c6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3416,6 +3416,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
return BPF_PROG_TYPE_SK_LOOKUP;
case BPF_XDP:
return BPF_PROG_TYPE_XDP;
+ case BPF_LSM_CGROUP:
+ return BPF_PROG_TYPE_LSM;
default:
return BPF_PROG_TYPE_UNSPEC;
}
@@ -3469,6 +3471,11 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
case BPF_PROG_TYPE_CGROUP_SYSCTL:
case BPF_PROG_TYPE_SOCK_OPS:
+ case BPF_PROG_TYPE_LSM:
+ if (ptype == BPF_PROG_TYPE_LSM &&
+ prog->expected_attach_type != BPF_LSM_CGROUP)
+ return -EINVAL;
+
ret = cgroup_bpf_prog_attach(attr, ptype, prog);
break;
default:
@@ -3506,13 +3513,14 @@ static int bpf_prog_detach(const union bpf_attr *attr)
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
case BPF_PROG_TYPE_CGROUP_SYSCTL:
case BPF_PROG_TYPE_SOCK_OPS:
+ case BPF_PROG_TYPE_LSM:
return cgroup_bpf_prog_detach(attr, ptype);
default:
return -EINVAL;
}
}
-#define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt
+#define BPF_PROG_QUERY_LAST_FIELD query.prog_attach_flags
static int bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr)
@@ -3548,6 +3556,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_CGROUP_SYSCTL:
case BPF_CGROUP_GETSOCKOPT:
case BPF_CGROUP_SETSOCKOPT:
+ case BPF_LSM_CGROUP:
return cgroup_bpf_prog_query(attr, uattr);
case BPF_LIRC_MODE2:
return lirc_prog_query(attr, uattr);
@@ -4058,6 +4067,11 @@ static int bpf_prog_get_info_by_fd(struct file *file,
if (prog->aux->btf)
info.btf_id = btf_obj_id(prog->aux->btf);
+ info.attach_btf_id = prog->aux->attach_btf_id;
+ if (prog->aux->attach_btf)
+ info.attach_btf_obj_id = btf_obj_id(prog->aux->attach_btf);
+ else if (prog->aux->dst_prog)
+ info.attach_btf_obj_id = btf_obj_id(prog->aux->dst_prog->aux->attach_btf);
ulen = info.nr_func_info;
info.nr_func_info = prog->aux->func_info_cnt;
@@ -4090,14 +4104,15 @@ static int bpf_prog_get_info_by_fd(struct file *file,
info.nr_jited_line_info = 0;
if (info.nr_jited_line_info && ulen) {
if (bpf_dump_raw_ok(file->f_cred)) {
+ unsigned long line_addr;
__u64 __user *user_linfo;
u32 i;
user_linfo = u64_to_user_ptr(info.jited_line_info);
ulen = min_t(u32, info.nr_jited_line_info, ulen);
for (i = 0; i < ulen; i++) {
- if (put_user((__u64)(long)prog->aux->jited_linfo[i],
- &user_linfo[i]))
+ line_addr = (unsigned long)prog->aux->jited_linfo[i];
+ if (put_user((__u64)line_addr, &user_linfo[i]))
return -EFAULT;
}
} else {
@@ -4539,6 +4554,8 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
ret = bpf_raw_tp_link_attach(prog, NULL);
else if (prog->expected_attach_type == BPF_TRACE_ITER)
ret = bpf_iter_link_attach(attr, uattr, prog);
+ else if (prog->expected_attach_type == BPF_LSM_CGROUP)
+ ret = cgroup_bpf_link_attach(attr, prog);
else
ret = bpf_tracing_prog_attach(prog,
attr->link_create.target_fd,
@@ -5130,7 +5147,7 @@ BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flag
return *res ? 0 : -ENOENT;
}
-const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = {
+static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = {
.func = bpf_kallsyms_lookup_name,
.gpl_only = false,
.ret_type = RET_INTEGER,
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 93c7675f0c9e..6cd226584c33 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -11,6 +11,8 @@
#include <linux/rcupdate_wait.h>
#include <linux/module.h>
#include <linux/static_call.h>
+#include <linux/bpf_verifier.h>
+#include <linux/bpf_lsm.h>
/* dummy _ops. The verifier will operate on target program's ops. */
const struct bpf_verifier_ops bpf_extension_verifier_ops = {
@@ -410,7 +412,7 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
}
}
-int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
+static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
{
enum bpf_tramp_prog_type kind;
struct bpf_tramp_link *link_exiting;
@@ -418,44 +420,33 @@ int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline
int cnt = 0, i;
kind = bpf_attach_type_to_tramp(link->link.prog);
- mutex_lock(&tr->mutex);
- if (tr->extension_prog) {
+ if (tr->extension_prog)
/* cannot attach fentry/fexit if extension prog is attached.
* cannot overwrite extension prog either.
*/
- err = -EBUSY;
- goto out;
- }
+ return -EBUSY;
for (i = 0; i < BPF_TRAMP_MAX; i++)
cnt += tr->progs_cnt[i];
if (kind == BPF_TRAMP_REPLACE) {
/* Cannot attach extension if fentry/fexit are in use. */
- if (cnt) {
- err = -EBUSY;
- goto out;
- }
+ if (cnt)
+ return -EBUSY;
tr->extension_prog = link->link.prog;
- err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
- link->link.prog->bpf_func);
- goto out;
- }
- if (cnt >= BPF_MAX_TRAMP_LINKS) {
- err = -E2BIG;
- goto out;
+ return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
+ link->link.prog->bpf_func);
}
- if (!hlist_unhashed(&link->tramp_hlist)) {
+ if (cnt >= BPF_MAX_TRAMP_LINKS)
+ return -E2BIG;
+ if (!hlist_unhashed(&link->tramp_hlist))
/* prog already linked */
- err = -EBUSY;
- goto out;
- }
+ return -EBUSY;
hlist_for_each_entry(link_exiting, &tr->progs_hlist[kind], tramp_hlist) {
if (link_exiting->link.prog != link->link.prog)
continue;
/* prog already linked */
- err = -EBUSY;
- goto out;
+ return -EBUSY;
}
hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]);
@@ -465,34 +456,220 @@ int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline
hlist_del_init(&link->tramp_hlist);
tr->progs_cnt[kind]--;
}
-out:
+ return err;
+}
+
+int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
+{
+ int err;
+
+ mutex_lock(&tr->mutex);
+ err = __bpf_trampoline_link_prog(link, tr);
mutex_unlock(&tr->mutex);
return err;
}
-/* bpf_trampoline_unlink_prog() should never fail. */
-int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
+static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
{
enum bpf_tramp_prog_type kind;
int err;
kind = bpf_attach_type_to_tramp(link->link.prog);
- mutex_lock(&tr->mutex);
if (kind == BPF_TRAMP_REPLACE) {
WARN_ON_ONCE(!tr->extension_prog);
err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
tr->extension_prog->bpf_func, NULL);
tr->extension_prog = NULL;
- goto out;
+ return err;
}
hlist_del_init(&link->tramp_hlist);
tr->progs_cnt[kind]--;
- err = bpf_trampoline_update(tr);
-out:
+ return bpf_trampoline_update(tr);
+}
+
+/* bpf_trampoline_unlink_prog() should never fail. */
+int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
+{
+ int err;
+
+ mutex_lock(&tr->mutex);
+ err = __bpf_trampoline_unlink_prog(link, tr);
+ mutex_unlock(&tr->mutex);
+ return err;
+}
+
+#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
+static void bpf_shim_tramp_link_release(struct bpf_link *link)
+{
+ struct bpf_shim_tramp_link *shim_link =
+ container_of(link, struct bpf_shim_tramp_link, link.link);
+
+ /* paired with 'shim_link->trampoline = tr' in bpf_trampoline_link_cgroup_shim */
+ if (!shim_link->trampoline)
+ return;
+
+ WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline));
+ bpf_trampoline_put(shim_link->trampoline);
+}
+
+static void bpf_shim_tramp_link_dealloc(struct bpf_link *link)
+{
+ struct bpf_shim_tramp_link *shim_link =
+ container_of(link, struct bpf_shim_tramp_link, link.link);
+
+ kfree(shim_link);
+}
+
+static const struct bpf_link_ops bpf_shim_tramp_link_lops = {
+ .release = bpf_shim_tramp_link_release,
+ .dealloc = bpf_shim_tramp_link_dealloc,
+};
+
+static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog,
+ bpf_func_t bpf_func,
+ int cgroup_atype)
+{
+ struct bpf_shim_tramp_link *shim_link = NULL;
+ struct bpf_prog *p;
+
+ shim_link = kzalloc(sizeof(*shim_link), GFP_USER);
+ if (!shim_link)
+ return NULL;
+
+ p = bpf_prog_alloc(1, 0);
+ if (!p) {
+ kfree(shim_link);
+ return NULL;
+ }
+
+ p->jited = false;
+ p->bpf_func = bpf_func;
+
+ p->aux->cgroup_atype = cgroup_atype;
+ p->aux->attach_func_proto = prog->aux->attach_func_proto;
+ p->aux->attach_btf_id = prog->aux->attach_btf_id;
+ p->aux->attach_btf = prog->aux->attach_btf;
+ btf_get(p->aux->attach_btf);
+ p->type = BPF_PROG_TYPE_LSM;
+ p->expected_attach_type = BPF_LSM_MAC;
+ bpf_prog_inc(p);
+ bpf_link_init(&shim_link->link.link, BPF_LINK_TYPE_UNSPEC,
+ &bpf_shim_tramp_link_lops, p);
+ bpf_cgroup_atype_get(p->aux->attach_btf_id, cgroup_atype);
+
+ return shim_link;
+}
+
+static struct bpf_shim_tramp_link *cgroup_shim_find(struct bpf_trampoline *tr,
+ bpf_func_t bpf_func)
+{
+ struct bpf_tramp_link *link;
+ int kind;
+
+ for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
+ hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
+ struct bpf_prog *p = link->link.prog;
+
+ if (p->bpf_func == bpf_func)
+ return container_of(link, struct bpf_shim_tramp_link, link);
+ }
+ }
+
+ return NULL;
+}
+
+int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
+ int cgroup_atype)
+{
+ struct bpf_shim_tramp_link *shim_link = NULL;
+ struct bpf_attach_target_info tgt_info = {};
+ struct bpf_trampoline *tr;
+ bpf_func_t bpf_func;
+ u64 key;
+ int err;
+
+ err = bpf_check_attach_target(NULL, prog, NULL,
+ prog->aux->attach_btf_id,
+ &tgt_info);
+ if (err)
+ return err;
+
+ key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
+ prog->aux->attach_btf_id);
+
+ bpf_lsm_find_cgroup_shim(prog, &bpf_func);
+ tr = bpf_trampoline_get(key, &tgt_info);
+ if (!tr)
+ return -ENOMEM;
+
+ mutex_lock(&tr->mutex);
+
+ shim_link = cgroup_shim_find(tr, bpf_func);
+ if (shim_link) {
+ /* Reusing existing shim attached by the other program. */
+ bpf_link_inc(&shim_link->link.link);
+
+ mutex_unlock(&tr->mutex);
+ bpf_trampoline_put(tr); /* bpf_trampoline_get above */
+ return 0;
+ }
+
+ /* Allocate and install new shim. */
+
+ shim_link = cgroup_shim_alloc(prog, bpf_func, cgroup_atype);
+ if (!shim_link) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ err = __bpf_trampoline_link_prog(&shim_link->link, tr);
+ if (err)
+ goto err;
+
+ shim_link->trampoline = tr;
+ /* note, we're still holding tr refcnt from above */
+
mutex_unlock(&tr->mutex);
+
+ return 0;
+err:
+ mutex_unlock(&tr->mutex);
+
+ if (shim_link)
+ bpf_link_put(&shim_link->link.link);
+
+ /* have to release tr while _not_ holding its mutex */
+ bpf_trampoline_put(tr); /* bpf_trampoline_get above */
+
return err;
}
+void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
+{
+ struct bpf_shim_tramp_link *shim_link = NULL;
+ struct bpf_trampoline *tr;
+ bpf_func_t bpf_func;
+ u64 key;
+
+ key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
+ prog->aux->attach_btf_id);
+
+ bpf_lsm_find_cgroup_shim(prog, &bpf_func);
+ tr = bpf_trampoline_lookup(key);
+ if (WARN_ON_ONCE(!tr))
+ return;
+
+ mutex_lock(&tr->mutex);
+ shim_link = cgroup_shim_find(tr, bpf_func);
+ mutex_unlock(&tr->mutex);
+
+ if (shim_link)
+ bpf_link_put(&shim_link->link.link);
+
+ bpf_trampoline_put(tr); /* bpf_trampoline_lookup above */
+}
+#endif
+
struct bpf_trampoline *bpf_trampoline_get(u64 key,
struct bpf_attach_target_info *tgt_info)
{
@@ -625,6 +802,31 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_
rcu_read_unlock();
}
+u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
+ struct bpf_tramp_run_ctx *run_ctx)
+ __acquires(RCU)
+{
+ /* Runtime stats are exported via actual BPF_LSM_CGROUP
+ * programs, not the shims.
+ */
+ rcu_read_lock();
+ migrate_disable();
+
+ run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
+
+ return NO_START_TIME;
+}
+
+void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
+ struct bpf_tramp_run_ctx *run_ctx)
+ __releases(RCU)
+{
+ bpf_reset_run_ctx(run_ctx->saved_run_ctx);
+
+ migrate_enable();
+ rcu_read_unlock();
+}
+
u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
{
rcu_read_lock_trace();
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 0efbac0fd126..328cfab3af60 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5847,6 +5847,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
enum bpf_arg_type arg_type = fn->arg_type[arg];
enum bpf_reg_type type = reg->type;
+ u32 *arg_btf_id = NULL;
int err = 0;
if (arg_type == ARG_DONTCARE)
@@ -5883,7 +5884,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
*/
goto skip_type_check;
- err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg], meta);
+ /* arg_btf_id and arg_size are in a union. */
+ if (base_type(arg_type) == ARG_PTR_TO_BTF_ID)
+ arg_btf_id = fn->arg_btf_id[arg];
+
+ err = check_reg_type(env, regno, arg_type, arg_btf_id, meta);
if (err)
return err;
@@ -6010,6 +6015,11 @@ skip_type_check:
* next is_mem_size argument below.
*/
meta->raw_mode = arg_type & MEM_UNINIT;
+ if (arg_type & MEM_FIXED_SIZE) {
+ err = check_helper_mem_access(env, regno,
+ fn->arg_size[arg], false,
+ meta);
+ }
} else if (arg_type_is_mem_size(arg_type)) {
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
@@ -6143,7 +6153,8 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
{
- return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
+ return env->prog->jit_requested &&
+ bpf_jit_supports_subprog_tailcalls();
}
static int check_map_func_compatibility(struct bpf_verifier_env *env,
@@ -6399,11 +6410,19 @@ static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
return count <= 1;
}
-static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
- enum bpf_arg_type arg_next)
+static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
{
- return (base_type(arg_curr) == ARG_PTR_TO_MEM) !=
- arg_type_is_mem_size(arg_next);
+ bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
+ bool has_size = fn->arg_size[arg] != 0;
+ bool is_next_size = false;
+
+ if (arg + 1 < ARRAY_SIZE(fn->arg_type))
+ is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
+
+ if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
+ return is_next_size;
+
+ return has_size == is_next_size || is_next_size == is_fixed;
}
static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
@@ -6414,11 +6433,11 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
* helper function specification.
*/
if (arg_type_is_mem_size(fn->arg1_type) ||
- base_type(fn->arg5_type) == ARG_PTR_TO_MEM ||
- check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
- check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
- check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
- check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
+ check_args_pair_invalid(fn, 0) ||
+ check_args_pair_invalid(fn, 1) ||
+ check_args_pair_invalid(fn, 2) ||
+ check_args_pair_invalid(fn, 3) ||
+ check_args_pair_invalid(fn, 4))
return false;
return true;
@@ -6459,7 +6478,10 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn)
if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
return false;
- if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
+ if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
+ /* arg_btf_id and arg_size are in a union. */
+ (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
+ !(fn->arg_type[i] & MEM_FIXED_SIZE)))
return false;
}
@@ -7100,6 +7122,41 @@ static int check_get_func_ip(struct bpf_verifier_env *env)
return -ENOTSUPP;
}
+static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
+{
+ return &env->insn_aux_data[env->insn_idx];
+}
+
+static bool loop_flag_is_zero(struct bpf_verifier_env *env)
+{
+ struct bpf_reg_state *regs = cur_regs(env);
+ struct bpf_reg_state *reg = &regs[BPF_REG_4];
+ bool reg_is_null = register_is_null(reg);
+
+ if (reg_is_null)
+ mark_chain_precision(env, BPF_REG_4);
+
+ return reg_is_null;
+}
+
+static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
+{
+ struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
+
+ if (!state->initialized) {
+ state->initialized = 1;
+ state->fit_for_inline = loop_flag_is_zero(env);
+ state->callback_subprogno = subprogno;
+ return;
+ }
+
+ if (!state->fit_for_inline)
+ return;
+
+ state->fit_for_inline = (loop_flag_is_zero(env) &&
+ state->callback_subprogno == subprogno);
+}
+
static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p)
{
@@ -7252,6 +7309,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
err = check_bpf_snprintf_call(env, regs);
break;
case BPF_FUNC_loop:
+ update_loop_inline_state(env, meta.subprogno);
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
set_loop_callback_state);
break;
@@ -7261,6 +7319,18 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
reg_type_str(env, regs[BPF_REG_1].type));
return -EACCES;
}
+ break;
+ case BPF_FUNC_set_retval:
+ if (env->prog->expected_attach_type == BPF_LSM_CGROUP) {
+ if (!env->prog->aux->attach_func_proto->type) {
+ /* Make sure programs that attach to void
+ * hooks don't try to modify return value.
+ */
+ verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
+ return -EINVAL;
+ }
+ }
+ break;
}
if (err)
@@ -7658,11 +7728,6 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env,
return true;
}
-static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
-{
- return &env->insn_aux_data[env->insn_idx];
-}
-
enum {
REASON_BOUNDS = -1,
REASON_TYPE = -2,
@@ -9033,7 +9098,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
if (opcode == BPF_END || opcode == BPF_NEG) {
if (opcode == BPF_NEG) {
- if (BPF_SRC(insn->code) != 0 ||
+ if (BPF_SRC(insn->code) != BPF_K ||
insn->src_reg != BPF_REG_0 ||
insn->off != 0 || insn->imm != 0) {
verbose(env, "BPF_NEG uses reserved fields\n");
@@ -10360,11 +10425,21 @@ static int check_return_code(struct bpf_verifier_env *env)
const bool is_subprog = frame->subprogno;
/* LSM and struct_ops func-ptr's return type could be "void" */
- if (!is_subprog &&
- (prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
- prog_type == BPF_PROG_TYPE_LSM) &&
- !prog->aux->attach_func_proto->type)
- return 0;
+ if (!is_subprog) {
+ switch (prog_type) {
+ case BPF_PROG_TYPE_LSM:
+ if (prog->expected_attach_type == BPF_LSM_CGROUP)
+ /* See below, can be 0 or 0-1 depending on hook. */
+ break;
+ fallthrough;
+ case BPF_PROG_TYPE_STRUCT_OPS:
+ if (!prog->aux->attach_func_proto->type)
+ return 0;
+ break;
+ default:
+ break;
+ }
+ }
/* eBPF calling convention is such that R0 is used
* to return the value from eBPF program.
@@ -10455,6 +10530,22 @@ static int check_return_code(struct bpf_verifier_env *env)
case BPF_PROG_TYPE_SK_LOOKUP:
range = tnum_range(SK_DROP, SK_PASS);
break;
+
+ case BPF_PROG_TYPE_LSM:
+ if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
+ /* Regular BPF_PROG_TYPE_LSM programs can return
+ * any value.
+ */
+ return 0;
+ }
+ if (!env->prog->aux->attach_func_proto->type) {
+ /* Make sure programs that attach to void
+ * hooks don't try to modify return value.
+ */
+ range = tnum_range(1, 1);
+ }
+ break;
+
case BPF_PROG_TYPE_EXT:
/* freplace program can return anything as its return value
* depends on the to-be-replaced kernel func or bpf program.
@@ -10471,6 +10562,10 @@ static int check_return_code(struct bpf_verifier_env *env)
if (!tnum_in(range, reg->var_off)) {
verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
+ if (prog->expected_attach_type == BPF_LSM_CGROUP &&
+ prog_type == BPF_PROG_TYPE_LSM &&
+ !prog->aux->attach_func_proto->type)
+ verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
return -EINVAL;
}
@@ -10882,7 +10977,7 @@ static int check_btf_func(struct bpf_verifier_env *env,
goto err_free;
ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
scalar_return =
- btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type);
+ btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type);
if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
goto err_free;
@@ -14275,6 +14370,142 @@ patch_call_imm:
return 0;
}
+static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
+ int position,
+ s32 stack_base,
+ u32 callback_subprogno,
+ u32 *cnt)
+{
+ s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
+ s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
+ s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
+ int reg_loop_max = BPF_REG_6;
+ int reg_loop_cnt = BPF_REG_7;
+ int reg_loop_ctx = BPF_REG_8;
+
+ struct bpf_prog *new_prog;
+ u32 callback_start;
+ u32 call_insn_offset;
+ s32 callback_offset;
+
+ /* This represents an inlined version of bpf_iter.c:bpf_loop,
+ * be careful to modify this code in sync.
+ */
+ struct bpf_insn insn_buf[] = {
+ /* Return error and jump to the end of the patch if
+ * expected number of iterations is too big.
+ */
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2),
+ BPF_MOV32_IMM(BPF_REG_0, -E2BIG),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 16),
+ /* spill R6, R7, R8 to use these as loop vars */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset),
+ /* initialize loop vars */
+ BPF_MOV64_REG(reg_loop_max, BPF_REG_1),
+ BPF_MOV32_IMM(reg_loop_cnt, 0),
+ BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3),
+ /* loop header,
+ * if reg_loop_cnt >= reg_loop_max skip the loop body
+ */
+ BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5),
+ /* callback call,
+ * correct callback offset would be set after patching
+ */
+ BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt),
+ BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx),
+ BPF_CALL_REL(0),
+ /* increment loop counter */
+ BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1),
+ /* jump to loop header if callback returned 0 */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6),
+ /* return value of bpf_loop,
+ * set R0 to the number of iterations
+ */
+ BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt),
+ /* restore original values of R6, R7, R8 */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset),
+ };
+
+ *cnt = ARRAY_SIZE(insn_buf);
+ new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt);
+ if (!new_prog)
+ return new_prog;
+
+ /* callback start is known only after patching */
+ callback_start = env->subprog_info[callback_subprogno].start;
+ /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
+ call_insn_offset = position + 12;
+ callback_offset = callback_start - call_insn_offset - 1;
+ new_prog->insnsi[call_insn_offset].imm = callback_offset;
+
+ return new_prog;
+}
+
+static bool is_bpf_loop_call(struct bpf_insn *insn)
+{
+ return insn->code == (BPF_JMP | BPF_CALL) &&
+ insn->src_reg == 0 &&
+ insn->imm == BPF_FUNC_loop;
+}
+
+/* For all sub-programs in the program (including main) check
+ * insn_aux_data to see if there are bpf_loop calls that require
+ * inlining. If such calls are found the calls are replaced with a
+ * sequence of instructions produced by `inline_bpf_loop` function and
+ * subprog stack_depth is increased by the size of 3 registers.
+ * This stack space is used to spill values of the R6, R7, R8. These
+ * registers are used to store the loop bound, counter and context
+ * variables.
+ */
+static int optimize_bpf_loop(struct bpf_verifier_env *env)
+{
+ struct bpf_subprog_info *subprogs = env->subprog_info;
+ int i, cur_subprog = 0, cnt, delta = 0;
+ struct bpf_insn *insn = env->prog->insnsi;
+ int insn_cnt = env->prog->len;
+ u16 stack_depth = subprogs[cur_subprog].stack_depth;
+ u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
+ u16 stack_depth_extra = 0;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ struct bpf_loop_inline_state *inline_state =
+ &env->insn_aux_data[i + delta].loop_inline_state;
+
+ if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
+ struct bpf_prog *new_prog;
+
+ stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
+ new_prog = inline_bpf_loop(env,
+ i + delta,
+ -(stack_depth + stack_depth_extra),
+ inline_state->callback_subprogno,
+ &cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ }
+
+ if (subprogs[cur_subprog + 1].start == i + delta + 1) {
+ subprogs[cur_subprog].stack_depth += stack_depth_extra;
+ cur_subprog++;
+ stack_depth = subprogs[cur_subprog].stack_depth;
+ stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
+ stack_depth_extra = 0;
+ }
+ }
+
+ env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
+
+ return 0;
+}
+
static void free_states(struct bpf_verifier_env *env)
{
struct bpf_verifier_state_list *sl, *sln;
@@ -14694,6 +14925,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
fallthrough;
case BPF_MODIFY_RETURN:
case BPF_LSM_MAC:
+ case BPF_LSM_CGROUP:
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
if (!btf_type_is_func(t)) {
@@ -14810,8 +15042,8 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
}
if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
- prog->type != BPF_PROG_TYPE_LSM) {
- verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
+ prog->type != BPF_PROG_TYPE_LSM && prog->type != BPF_PROG_TYPE_KPROBE) {
+ verbose(env, "Only fentry/fexit/fmod_ret, lsm, and kprobe/uprobe programs can be sleepable\n");
return -EINVAL;
}
@@ -15012,6 +15244,9 @@ skip_full_check:
ret = check_max_stack_depth(env);
/* instruction rewrites happen after this point */
+ if (ret == 0)
+ ret = optimize_bpf_loop(env);
+
if (is_priv) {
if (ret == 0)
opt_hard_wire_dead_code_branches(env);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d2b354991bf5..07d51f95d6a6 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10068,26 +10068,30 @@ static inline bool perf_event_is_tracing(struct perf_event *event)
int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog,
u64 bpf_cookie)
{
- bool is_kprobe, is_tracepoint, is_syscall_tp;
+ bool is_kprobe, is_uprobe, is_tracepoint, is_syscall_tp;
if (!perf_event_is_tracing(event))
return perf_event_set_bpf_handler(event, prog, bpf_cookie);
- is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
+ is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_KPROBE;
+ is_uprobe = event->tp_event->flags & TRACE_EVENT_FL_UPROBE;
is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
is_syscall_tp = is_syscall_trace_event(event->tp_event);
- if (!is_kprobe && !is_tracepoint && !is_syscall_tp)
+ if (!is_kprobe && !is_uprobe && !is_tracepoint && !is_syscall_tp)
/* bpf programs can only be attached to u/kprobe or tracepoint */
return -EINVAL;
- if ((is_kprobe && prog->type != BPF_PROG_TYPE_KPROBE) ||
+ if (((is_kprobe || is_uprobe) && prog->type != BPF_PROG_TYPE_KPROBE) ||
(is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT) ||
(is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT))
return -EINVAL;
+ if (prog->type == BPF_PROG_TYPE_KPROBE && prog->aux->sleepable && !is_uprobe)
+ /* only uprobe programs are allowed to be sleepable */
+ return -EINVAL;
+
/* Kprobe override only works for kprobes, not uprobes. */
- if (prog->kprobe_override &&
- !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE))
+ if (prog->kprobe_override && !is_kprobe)
return -EINVAL;
if (is_tracepoint || is_syscall_tp) {
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 35d034219513..b233714a1c78 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1240,6 +1240,30 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
return 0;
}
+static int do_proc_dointvec_ms_jiffies_minmax_conv(bool *negp, unsigned long *lvalp,
+ int *valp, int write, void *data)
+{
+ int tmp, ret;
+ struct do_proc_dointvec_minmax_conv_param *param = data;
+ /*
+ * If writing, first do so via a temporary local int so we can
+ * bounds-check it before touching *valp.
+ */
+ int *ip = write ? &tmp : valp;
+
+ ret = do_proc_dointvec_ms_jiffies_conv(negp, lvalp, ip, write, data);
+ if (ret)
+ return ret;
+
+ if (write) {
+ if ((param->min && *param->min > tmp) ||
+ (param->max && *param->max < tmp))
+ return -EINVAL;
+ *valp = tmp;
+ }
+ return 0;
+}
+
/**
* proc_dointvec_jiffies - read a vector of integers as seconds
* @table: the sysctl table
@@ -1262,6 +1286,17 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write,
do_proc_dointvec_jiffies_conv,NULL);
}
+int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct do_proc_dointvec_minmax_conv_param param = {
+ .min = (int *) table->extra1,
+ .max = (int *) table->extra2,
+ };
+ return do_proc_dointvec(table, write, buffer, lenp, ppos,
+ do_proc_dointvec_ms_jiffies_minmax_conv, &param);
+}
+
/**
* proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
* @table: the sysctl table
@@ -1526,6 +1561,12 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write,
return -ENOSYS;
}
+int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ return -ENOSYS;
+}
+
int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 88589d74a892..68e5cdd24cef 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1936,7 +1936,7 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
event->prog = prog;
event->bpf_cookie = bpf_cookie;
rcu_assign_pointer(event->tp_event->prog_array, new_array);
- bpf_prog_array_free(old_array);
+ bpf_prog_array_free_sleepable(old_array);
unlock:
mutex_unlock(&bpf_event_mutex);
@@ -1962,7 +1962,7 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
bpf_prog_array_delete_safe(old_array, event->prog);
} else {
rcu_assign_pointer(event->tp_event->prog_array, new_array);
- bpf_prog_array_free(old_array);
+ bpf_prog_array_free_sleepable(old_array);
}
bpf_prog_put(event->prog);
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index c3dc4f859a6b..88ba5b4bd0c5 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -16,6 +16,7 @@
#include <linux/namei.h>
#include <linux/string.h>
#include <linux/rculist.h>
+#include <linux/filter.h>
#include "trace_dynevent.h"
#include "trace_probe.h"
@@ -1342,15 +1343,15 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
int size, esize;
int rctx;
+#ifdef CONFIG_BPF_EVENTS
if (bpf_prog_array_valid(call)) {
u32 ret;
- preempt_disable();
- ret = trace_call_bpf(call, regs);
- preempt_enable();
+ ret = bpf_prog_run_array_sleepable(call->prog_array, regs, bpf_prog_run);
if (!ret)
return;
}
+#endif /* CONFIG_BPF_EVENTS */
esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));