summaryrefslogtreecommitdiff
path: root/kernel/bpf/syscall.c
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2020-03-30 15:29:22 -0700
committerAlexei Starovoitov <ast@kernel.org>2020-03-30 17:36:41 -0700
commit8596a75f6c830a693ec86e6467a58b225713a7f1 (patch)
tree1d894047fd961e9736b3a45e3bca2da53ac7a5b5 /kernel/bpf/syscall.c
parente5ffcc9191ca72ddf1c999a80ae9165705ac4790 (diff)
parent7cccee42bf76efc9de69fa2e5b8dbe58bfc8ecdf (diff)
Merge branch 'cgroup-bpf_link'
Andrii Nakryiko says: ==================== bpf_link abstraction itself was formalized in [0] with justifications for why its semantics is a good fit for attaching BPF programs of various types. This patch set adds bpf_link-based BPF program attachment mechanism for cgroup BPF programs. Cgroup BPF link is semantically compatible with current BPF_F_ALLOW_MULTI semantics of attaching cgroup BPF programs directly. Thus cgroup bpf_link can co-exist with legacy BPF program multi-attachment. bpf_link is destroyed and automatically detached when the last open FD holding the reference to bpf_link is closed. This means that by default, when the process that created bpf_link exits, attached BPF program will be automatically detached due to bpf_link's clean up code. Cgroup bpf_link, like any other bpf_link, can be pinned in BPF FS and by those means survive the exit of process that created the link. This is useful in many scenarios to provide long-living BPF program attachments. Pinning also means that there could be many owners of bpf_link through independent FDs. Additionally, auto-detachmet of cgroup bpf_link is implemented. When cgroup is dying it will automatically detach all active bpf_links. This ensures that cgroup clean up is not delayed due to active bpf_link even despite no chance for any BPF program to be run for a given cgroup. In that sense it's similar to existing behavior of dropping refcnt of attached bpf_prog. But in the case of bpf_link, bpf_link is not destroyed and is still available to user as long as at least one active FD is still open (or if it's pinned in BPF FS). There are two main cgroup-specific differences between bpf_link-based and direct bpf_prog-based attachment. First, as opposed to direct bpf_prog attachment, cgroup itself doesn't "own" bpf_link, which makes it possible to auto-clean up attached bpf_link when user process abruptly exits without explicitly detaching BPF program. This makes for a safe default behavior proven in BPF tracing program types. But bpf_link doesn't bump cgroup->bpf.refcnt as well and because of that doesn't prevent cgroup from cleaning up its BPF state. Second, only owners of bpf_link (those who created bpf_link in the first place or obtained a new FD by opening bpf_link from BPF FS) can detach and/or update it. This makes sure that no other process can accidentally remove/replace BPF program. This patch set also implements LINK_UPDATE sub-command, which allows to replace bpf_link's underlying bpf_prog, similarly to BPF_F_REPLACE flag behavior for direct bpf_prog cgroup attachment. Similarly to LINK_CREATE, it is supposed to be generic command for different types of bpf_links. [0] https://lore.kernel.org/bpf/20200228223948.360936-1-andriin@fb.com/ v2->v3: - revert back to just MULTI mode (Alexei); - fix tinyconfig compilation warning (kbuild test robot); v1->v2: - implement exclusive and overridable exclusive modes (Andrey Ignatov); - fix build for !CONFIG_CGROUP_BPF build; - add more selftests for non-multi mode and inter-operability; ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r--kernel/bpf/syscall.c119
1 files changed, 110 insertions, 9 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a616b63f23b4..e0a3b34d7039 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2175,13 +2175,6 @@ static int bpf_obj_get(const union bpf_attr *attr)
attr->file_flags);
}
-struct bpf_link {
- atomic64_t refcnt;
- const struct bpf_link_ops *ops;
- struct bpf_prog *prog;
- struct work_struct work;
-};
-
void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
struct bpf_prog *prog)
{
@@ -2195,8 +2188,8 @@ void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
* anon_inode's release() call. This helper manages marking bpf_link as
* defunct, releases anon_inode file and puts reserved FD.
*/
-static void bpf_link_cleanup(struct bpf_link *link, struct file *link_file,
- int link_fd)
+void bpf_link_cleanup(struct bpf_link *link, struct file *link_file,
+ int link_fd)
{
link->prog = NULL;
fput(link_file);
@@ -2266,6 +2259,10 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
link_type = "raw_tracepoint";
else if (link->ops == &bpf_tracing_link_lops)
link_type = "tracing";
+#ifdef CONFIG_CGROUP_BPF
+ else if (link->ops == &bpf_cgroup_link_lops)
+ link_type = "cgroup";
+#endif
else
link_type = "unknown";
@@ -3553,6 +3550,104 @@ err_put:
return err;
}
+#define BPF_LINK_CREATE_LAST_FIELD link_create.flags
+static int link_create(union bpf_attr *attr)
+{
+ enum bpf_prog_type ptype;
+ struct bpf_prog *prog;
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (CHECK_ATTR(BPF_LINK_CREATE))
+ return -EINVAL;
+
+ ptype = attach_type_to_prog_type(attr->link_create.attach_type);
+ if (ptype == BPF_PROG_TYPE_UNSPEC)
+ return -EINVAL;
+
+ prog = bpf_prog_get_type(attr->link_create.prog_fd, ptype);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ ret = bpf_prog_attach_check_attach_type(prog,
+ attr->link_create.attach_type);
+ if (ret)
+ goto err_out;
+
+ switch (ptype) {
+ case BPF_PROG_TYPE_CGROUP_SKB:
+ case BPF_PROG_TYPE_CGROUP_SOCK:
+ case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+ case BPF_PROG_TYPE_SOCK_OPS:
+ case BPF_PROG_TYPE_CGROUP_DEVICE:
+ case BPF_PROG_TYPE_CGROUP_SYSCTL:
+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+ ret = cgroup_bpf_link_attach(attr, prog);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+err_out:
+ if (ret < 0)
+ bpf_prog_put(prog);
+ return ret;
+}
+
+#define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd
+
+static int link_update(union bpf_attr *attr)
+{
+ struct bpf_prog *old_prog = NULL, *new_prog;
+ struct bpf_link *link;
+ u32 flags;
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (CHECK_ATTR(BPF_LINK_UPDATE))
+ return -EINVAL;
+
+ flags = attr->link_update.flags;
+ if (flags & ~BPF_F_REPLACE)
+ return -EINVAL;
+
+ link = bpf_link_get_from_fd(attr->link_update.link_fd);
+ if (IS_ERR(link))
+ return PTR_ERR(link);
+
+ new_prog = bpf_prog_get(attr->link_update.new_prog_fd);
+ if (IS_ERR(new_prog))
+ return PTR_ERR(new_prog);
+
+ if (flags & BPF_F_REPLACE) {
+ old_prog = bpf_prog_get(attr->link_update.old_prog_fd);
+ if (IS_ERR(old_prog)) {
+ ret = PTR_ERR(old_prog);
+ old_prog = NULL;
+ goto out_put_progs;
+ }
+ }
+
+#ifdef CONFIG_CGROUP_BPF
+ if (link->ops == &bpf_cgroup_link_lops) {
+ ret = cgroup_bpf_replace(link, old_prog, new_prog);
+ goto out_put_progs;
+ }
+#endif
+ ret = -EINVAL;
+
+out_put_progs:
+ if (old_prog)
+ bpf_prog_put(old_prog);
+ if (ret)
+ bpf_prog_put(new_prog);
+ return ret;
+}
+
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr = {};
@@ -3663,6 +3758,12 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_MAP_DELETE_BATCH:
err = bpf_map_do_batch(&attr, uattr, BPF_MAP_DELETE_BATCH);
break;
+ case BPF_LINK_CREATE:
+ err = link_create(&attr);
+ break;
+ case BPF_LINK_UPDATE:
+ err = link_update(&attr);
+ break;
default:
err = -EINVAL;
break;