summaryrefslogtreecommitdiff
path: root/net/core/sock_map.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/sock_map.c')
-rw-r--r--net/core/sock_map.c211
1 files changed, 150 insertions, 61 deletions
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 6f1b82b8ad49..a660baedd9e7 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -41,18 +41,18 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
return ERR_PTR(-EINVAL);
- stab = kzalloc(sizeof(*stab), GFP_USER | __GFP_ACCOUNT);
+ stab = bpf_map_area_alloc(sizeof(*stab), NUMA_NO_NODE);
if (!stab)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&stab->map, attr);
raw_spin_lock_init(&stab->lock);
- stab->sks = bpf_map_area_alloc(stab->map.max_entries *
+ stab->sks = bpf_map_area_alloc((u64) stab->map.max_entries *
sizeof(struct sock *),
stab->map.numa_node);
if (!stab->sks) {
- kfree(stab);
+ bpf_map_area_free(stab);
return ERR_PTR(-ENOMEM);
}
@@ -167,8 +167,11 @@ static void sock_map_del_link(struct sock *sk,
write_lock_bh(&sk->sk_callback_lock);
if (strp_stop)
sk_psock_stop_strp(sk, psock);
- else
+ if (verdict_stop)
sk_psock_stop_verdict(sk, psock);
+
+ if (psock->psock_update_sk_prot)
+ psock->psock_update_sk_prot(sk, psock, false);
write_unlock_bh(&sk->sk_callback_lock);
}
}
@@ -211,8 +214,6 @@ out:
return psock;
}
-static bool sock_map_redirect_allowed(const struct sock *sk);
-
static int sock_map_link(struct bpf_map *map, struct sock *sk)
{
struct sk_psock_progs *progs = sock_map_progs(map);
@@ -223,13 +224,6 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
struct sk_psock *psock;
int ret;
- /* Only sockets we can redirect into/from in BPF need to hold
- * refs to parser/verdict progs and have their sk_data_ready
- * and sk_write_space callbacks overridden.
- */
- if (!sock_map_redirect_allowed(sk))
- goto no_progs;
-
stream_verdict = READ_ONCE(progs->stream_verdict);
if (stream_verdict) {
stream_verdict = bpf_prog_inc_not_zero(stream_verdict);
@@ -264,7 +258,6 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
}
}
-no_progs:
psock = sock_map_psock_get_checked(sk);
if (IS_ERR(psock)) {
ret = PTR_ERR(psock);
@@ -292,32 +285,38 @@ no_progs:
if (msg_parser)
psock_set_prog(&psock->progs.msg_parser, msg_parser);
+ if (stream_parser)
+ psock_set_prog(&psock->progs.stream_parser, stream_parser);
+ if (stream_verdict)
+ psock_set_prog(&psock->progs.stream_verdict, stream_verdict);
+ if (skb_verdict)
+ psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
+ /* msg_* and stream_* programs references tracked in psock after this
+ * point. Reference dec and cleanup will occur through psock destructor
+ */
ret = sock_map_init_proto(sk, psock);
- if (ret < 0)
- goto out_drop;
+ if (ret < 0) {
+ sk_psock_put(sk, psock);
+ goto out;
+ }
write_lock_bh(&sk->sk_callback_lock);
if (stream_parser && stream_verdict && !psock->saved_data_ready) {
ret = sk_psock_init_strp(sk, psock);
- if (ret)
- goto out_unlock_drop;
- psock_set_prog(&psock->progs.stream_verdict, stream_verdict);
- psock_set_prog(&psock->progs.stream_parser, stream_parser);
+ if (ret) {
+ write_unlock_bh(&sk->sk_callback_lock);
+ sk_psock_put(sk, psock);
+ goto out;
+ }
sk_psock_start_strp(sk, psock);
} else if (!stream_parser && stream_verdict && !psock->saved_data_ready) {
- psock_set_prog(&psock->progs.stream_verdict, stream_verdict);
sk_psock_start_verdict(sk,psock);
} else if (!stream_verdict && skb_verdict && !psock->saved_data_ready) {
- psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
sk_psock_start_verdict(sk, psock);
}
write_unlock_bh(&sk->sk_callback_lock);
return 0;
-out_unlock_drop:
- write_unlock_bh(&sk->sk_callback_lock);
-out_drop:
- sk_psock_put(sk, psock);
out_progs:
if (skb_verdict)
bpf_prog_put(skb_verdict);
@@ -330,6 +329,7 @@ out_put_stream_parser:
out_put_stream_verdict:
if (stream_verdict)
bpf_prog_put(stream_verdict);
+out:
return ret;
}
@@ -361,7 +361,7 @@ static void sock_map_free(struct bpf_map *map)
synchronize_rcu();
bpf_map_area_free(stab->sks);
- kfree(stab);
+ bpf_map_area_free(stab);
}
static void sock_map_release_progs(struct bpf_map *map)
@@ -521,18 +521,6 @@ static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops)
ops->op == BPF_SOCK_OPS_TCP_LISTEN_CB;
}
-static bool sk_is_tcp(const struct sock *sk)
-{
- return sk->sk_type == SOCK_STREAM &&
- sk->sk_protocol == IPPROTO_TCP;
-}
-
-static bool sk_is_udp(const struct sock *sk)
-{
- return sk->sk_type == SOCK_DGRAM &&
- sk->sk_protocol == IPPROTO_UDP;
-}
-
static bool sock_map_redirect_allowed(const struct sock *sk)
{
if (sk_is_tcp(sk))
@@ -550,10 +538,7 @@ static bool sock_map_sk_state_allowed(const struct sock *sk)
{
if (sk_is_tcp(sk))
return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN);
- else if (sk_is_udp(sk))
- return sk_hashed(sk);
-
- return false;
+ return true;
}
static int sock_hash_update_common(struct bpf_map *map, void *key,
@@ -798,17 +783,26 @@ static int sock_map_init_seq_private(void *priv_data,
{
struct sock_map_seq_info *info = priv_data;
+ bpf_map_inc_with_uref(aux->map);
info->map = aux->map;
return 0;
}
+static void sock_map_fini_seq_private(void *priv_data)
+{
+ struct sock_map_seq_info *info = priv_data;
+
+ bpf_map_put_with_uref(info->map);
+}
+
static const struct bpf_iter_seq_info sock_map_iter_seq_info = {
.seq_ops = &sock_map_seq_ops,
.init_seq_private = sock_map_init_seq_private,
+ .fini_seq_private = sock_map_fini_seq_private,
.seq_priv_size = sizeof(struct sock_map_seq_info),
};
-static int sock_map_btf_id;
+BTF_ID_LIST_SINGLE(sock_map_btf_ids, struct, bpf_stab)
const struct bpf_map_ops sock_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc = sock_map_alloc,
@@ -820,8 +814,7 @@ const struct bpf_map_ops sock_map_ops = {
.map_lookup_elem = sock_map_lookup,
.map_release_uref = sock_map_release_progs,
.map_check_btf = map_check_no_btf,
- .map_btf_name = "bpf_stab",
- .map_btf_id = &sock_map_btf_id,
+ .map_btf_id = &sock_map_btf_ids[0],
.iter_seq_info = &sock_map_iter_seq_info,
};
@@ -1092,7 +1085,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
if (attr->key_size > MAX_BPF_STACK)
return ERR_PTR(-E2BIG);
- htab = kzalloc(sizeof(*htab), GFP_USER | __GFP_ACCOUNT);
+ htab = bpf_map_area_alloc(sizeof(*htab), NUMA_NO_NODE);
if (!htab)
return ERR_PTR(-ENOMEM);
@@ -1122,7 +1115,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
return &htab->map;
free_htab:
- kfree(htab);
+ bpf_map_area_free(htab);
return ERR_PTR(err);
}
@@ -1175,7 +1168,7 @@ static void sock_hash_free(struct bpf_map *map)
synchronize_rcu();
bpf_map_area_free(htab->buckets);
- kfree(htab);
+ bpf_map_area_free(htab);
}
static void *sock_hash_lookup_sys(struct bpf_map *map, void *key)
@@ -1385,22 +1378,31 @@ static const struct seq_operations sock_hash_seq_ops = {
};
static int sock_hash_init_seq_private(void *priv_data,
- struct bpf_iter_aux_info *aux)
+ struct bpf_iter_aux_info *aux)
{
struct sock_hash_seq_info *info = priv_data;
+ bpf_map_inc_with_uref(aux->map);
info->map = aux->map;
info->htab = container_of(aux->map, struct bpf_shtab, map);
return 0;
}
+static void sock_hash_fini_seq_private(void *priv_data)
+{
+ struct sock_hash_seq_info *info = priv_data;
+
+ bpf_map_put_with_uref(info->map);
+}
+
static const struct bpf_iter_seq_info sock_hash_iter_seq_info = {
.seq_ops = &sock_hash_seq_ops,
.init_seq_private = sock_hash_init_seq_private,
+ .fini_seq_private = sock_hash_fini_seq_private,
.seq_priv_size = sizeof(struct sock_hash_seq_info),
};
-static int sock_hash_map_btf_id;
+BTF_ID_LIST_SINGLE(sock_hash_map_btf_ids, struct, bpf_shtab)
const struct bpf_map_ops sock_hash_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc = sock_hash_alloc,
@@ -1412,8 +1414,7 @@ const struct bpf_map_ops sock_hash_ops = {
.map_lookup_elem_sys_only = sock_hash_lookup_sys,
.map_release_uref = sock_hash_release_progs,
.map_check_btf = map_check_no_btf,
- .map_btf_name = "bpf_shtab",
- .map_btf_id = &sock_hash_map_btf_id,
+ .map_btf_id = &sock_hash_map_btf_ids[0],
.iter_seq_info = &sock_hash_iter_seq_info,
};
@@ -1431,38 +1432,50 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
return NULL;
}
-static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
- struct bpf_prog *old, u32 which)
+static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
+ u32 which)
{
struct sk_psock_progs *progs = sock_map_progs(map);
- struct bpf_prog **pprog;
if (!progs)
return -EOPNOTSUPP;
switch (which) {
case BPF_SK_MSG_VERDICT:
- pprog = &progs->msg_parser;
+ *pprog = &progs->msg_parser;
break;
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
case BPF_SK_SKB_STREAM_PARSER:
- pprog = &progs->stream_parser;
+ *pprog = &progs->stream_parser;
break;
#endif
case BPF_SK_SKB_STREAM_VERDICT:
if (progs->skb_verdict)
return -EBUSY;
- pprog = &progs->stream_verdict;
+ *pprog = &progs->stream_verdict;
break;
case BPF_SK_SKB_VERDICT:
if (progs->stream_verdict)
return -EBUSY;
- pprog = &progs->skb_verdict;
+ *pprog = &progs->skb_verdict;
break;
default:
return -EOPNOTSUPP;
}
+ return 0;
+}
+
+static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
+ struct bpf_prog *old, u32 which)
+{
+ struct bpf_prog **pprog;
+ int ret;
+
+ ret = sock_map_prog_lookup(map, &pprog, which);
+ if (ret)
+ return ret;
+
if (old)
return psock_replace_prog(pprog, prog, old);
@@ -1470,6 +1483,57 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
return 0;
}
+int sock_map_bpf_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
+ u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
+ struct bpf_prog **pprog;
+ struct bpf_prog *prog;
+ struct bpf_map *map;
+ struct fd f;
+ u32 id = 0;
+ int ret;
+
+ if (attr->query.query_flags)
+ return -EINVAL;
+
+ f = fdget(ufd);
+ map = __bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ rcu_read_lock();
+
+ ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
+ if (ret)
+ goto end;
+
+ prog = *pprog;
+ prog_cnt = !prog ? 0 : 1;
+
+ if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
+ goto end;
+
+ /* we do not hold the refcnt, the bpf prog may be released
+ * asynchronously and the id would be set to 0.
+ */
+ id = data_race(prog->aux->id);
+ if (id == 0)
+ prog_cnt = 0;
+
+end:
+ rcu_read_unlock();
+
+ if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) ||
+ (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) ||
+ copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
+ ret = -EFAULT;
+
+ fdput(f);
+ return ret;
+}
+
static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
{
switch (link->map->map_type) {
@@ -1513,6 +1577,30 @@ void sock_map_unhash(struct sock *sk)
rcu_read_unlock();
saved_unhash(sk);
}
+EXPORT_SYMBOL_GPL(sock_map_unhash);
+
+void sock_map_destroy(struct sock *sk)
+{
+ void (*saved_destroy)(struct sock *sk);
+ struct sk_psock *psock;
+
+ rcu_read_lock();
+ psock = sk_psock_get(sk);
+ if (unlikely(!psock)) {
+ rcu_read_unlock();
+ if (sk->sk_prot->destroy)
+ sk->sk_prot->destroy(sk);
+ return;
+ }
+
+ saved_destroy = psock->saved_destroy;
+ sock_map_remove_links(sk, psock);
+ rcu_read_unlock();
+ sk_psock_stop(psock, false);
+ sk_psock_put(sk, psock);
+ saved_destroy(sk);
+}
+EXPORT_SYMBOL_GPL(sock_map_destroy);
void sock_map_close(struct sock *sk, long timeout)
{
@@ -1536,6 +1624,7 @@ void sock_map_close(struct sock *sk, long timeout)
release_sock(sk);
saved_close(sk, timeout);
}
+EXPORT_SYMBOL_GPL(sock_map_close);
static int sock_map_iter_attach_target(struct bpf_prog *prog,
union bpf_iter_link_info *linfo,
@@ -1582,7 +1671,7 @@ static struct bpf_iter_reg sock_map_iter_reg = {
.ctx_arg_info_size = 2,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__sockmap, key),
- PTR_TO_RDONLY_BUF_OR_NULL },
+ PTR_TO_BUF | PTR_MAYBE_NULL | MEM_RDONLY },
{ offsetof(struct bpf_iter__sockmap, sk),
PTR_TO_BTF_ID_OR_NULL },
},