summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/bpf-cgroup.h2
-rw-r--r--include/linux/bpf.h41
-rw-r--r--include/linux/filter.h37
-rw-r--r--include/linux/inet_diag.h27
-rw-r--r--include/linux/kernel.h7
-rw-r--r--include/linux/netlink.h4
-rw-r--r--include/linux/preempt.h30
-rw-r--r--include/net/bpf_sk_storage.h27
-rw-r--r--include/uapi/linux/bpf.h2
-rw-r--r--include/uapi/linux/inet_diag.h5
-rw-r--r--include/uapi/linux/sock_diag.h26
11 files changed, 179 insertions, 29 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index a11d5b7dbbf3..a7cd5c7a2509 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -36,7 +36,7 @@ struct bpf_cgroup_storage_map;
struct bpf_storage_buffer {
struct rcu_head rcu;
- char data[0];
+ char data[];
};
struct bpf_cgroup_storage {
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 49b1a70e12c8..6015a4daf118 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -859,7 +859,7 @@ struct bpf_prog_array_item {
struct bpf_prog_array {
struct rcu_head rcu;
- struct bpf_prog_array_item items[0];
+ struct bpf_prog_array_item items[];
};
struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
@@ -885,7 +885,7 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
struct bpf_prog *_prog; \
struct bpf_prog_array *_array; \
u32 _ret = 1; \
- preempt_disable(); \
+ migrate_disable(); \
rcu_read_lock(); \
_array = rcu_dereference(array); \
if (unlikely(check_non_null && !_array))\
@@ -898,7 +898,7 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
} \
_out: \
rcu_read_unlock(); \
- preempt_enable(); \
+ migrate_enable(); \
_ret; \
})
@@ -932,7 +932,7 @@ _out: \
u32 ret; \
u32 _ret = 1; \
u32 _cn = 0; \
- preempt_disable(); \
+ migrate_disable(); \
rcu_read_lock(); \
_array = rcu_dereference(array); \
_item = &_array->items[0]; \
@@ -944,7 +944,7 @@ _out: \
_item++; \
} \
rcu_read_unlock(); \
- preempt_enable(); \
+ migrate_enable(); \
if (_ret) \
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
else \
@@ -961,6 +961,36 @@ _out: \
#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);
+/*
+ * Block execution of BPF programs attached to instrumentation (perf,
+ * kprobes, tracepoints) to prevent deadlocks on map operations as any of
+ * these events can happen inside a region which holds a map bucket lock
+ * and can deadlock on it.
+ *
+ * Use the preemption safe inc/dec variants on RT because migrate disable
+ * is preemptible on RT and preemption in the middle of the RMW operation
+ * might lead to inconsistent state. Use the raw variants for non RT
+ * kernels as migrate_disable() maps to preempt_disable() so the slightly
+ * more expensive save operation can be avoided.
+ */
+static inline void bpf_disable_instrumentation(void)
+{
+ migrate_disable();
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ this_cpu_inc(bpf_prog_active);
+ else
+ __this_cpu_inc(bpf_prog_active);
+}
+
+static inline void bpf_enable_instrumentation(void)
+{
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ this_cpu_dec(bpf_prog_active);
+ else
+ __this_cpu_dec(bpf_prog_active);
+ migrate_enable();
+}
+
extern const struct file_operations bpf_map_fops;
extern const struct file_operations bpf_prog_fops;
@@ -993,6 +1023,7 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux,
void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);
+struct bpf_map *bpf_map_get(u32 ufd);
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
struct bpf_map *__bpf_map_get(struct fd f);
void bpf_map_inc(struct bpf_map *map);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index f349e2c0884c..43b5e455d2f5 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -561,7 +561,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
#define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \
u32 ret; \
- cant_sleep(); \
+ cant_migrate(); \
if (static_branch_unlikely(&bpf_stats_enabled_key)) { \
struct bpf_prog_stats *stats; \
u64 start = sched_clock(); \
@@ -576,8 +576,30 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
} \
ret; })
-#define BPF_PROG_RUN(prog, ctx) __BPF_PROG_RUN(prog, ctx, \
- bpf_dispatcher_nopfunc)
+#define BPF_PROG_RUN(prog, ctx) \
+ __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc)
+
+/*
+ * Use in preemptible and therefore migratable context to make sure that
+ * the execution of the BPF program runs on one CPU.
+ *
+ * This uses migrate_disable/enable() explicitly to document that the
+ * invocation of a BPF program does not require reentrancy protection
+ * against a BPF program which is invoked from a preempting task.
+ *
+ * For non RT enabled kernels migrate_disable/enable() maps to
+ * preempt_disable/enable(), i.e. it disables also preemption.
+ */
+static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog,
+ const void *ctx)
+{
+ u32 ret;
+
+ migrate_disable();
+ ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc);
+ migrate_enable();
+ return ret;
+}
#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
@@ -655,6 +677,7 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb)
return qdisc_skb_cb(skb)->data;
}
+/* Must be invoked with migration disabled */
static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog,
struct sk_buff *skb)
{
@@ -680,9 +703,9 @@ static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog,
{
u32 res;
- preempt_disable();
+ migrate_disable();
res = __bpf_prog_run_save_cb(prog, skb);
- preempt_enable();
+ migrate_enable();
return res;
}
@@ -695,9 +718,7 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
if (unlikely(prog->cb_access))
memset(cb_data, 0, BPF_SKB_CB_LEN);
- preempt_disable();
- res = BPF_PROG_RUN(prog, skb);
- preempt_enable();
+ res = bpf_prog_run_pin_on_cpu(prog, skb);
return res;
}
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 39faaaf843e1..e4ba25d63913 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -15,11 +15,9 @@ struct netlink_callback;
struct inet_diag_handler {
void (*dump)(struct sk_buff *skb,
struct netlink_callback *cb,
- const struct inet_diag_req_v2 *r,
- struct nlattr *bc);
+ const struct inet_diag_req_v2 *r);
- int (*dump_one)(struct sk_buff *in_skb,
- const struct nlmsghdr *nlh,
+ int (*dump_one)(struct netlink_callback *cb,
const struct inet_diag_req_v2 *req);
void (*idiag_get_info)(struct sock *sk,
@@ -40,18 +38,25 @@ struct inet_diag_handler {
__u16 idiag_info_size;
};
+struct bpf_sk_storage_diag;
+struct inet_diag_dump_data {
+ struct nlattr *req_nlas[__INET_DIAG_REQ_MAX];
+#define inet_diag_nla_bc req_nlas[INET_DIAG_REQ_BYTECODE]
+#define inet_diag_nla_bpf_stgs req_nlas[INET_DIAG_REQ_SK_BPF_STORAGES]
+
+ struct bpf_sk_storage_diag *bpf_stg_diag;
+};
+
struct inet_connection_sock;
int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
- struct sk_buff *skb, const struct inet_diag_req_v2 *req,
- struct user_namespace *user_ns,
- u32 pid, u32 seq, u16 nlmsg_flags,
- const struct nlmsghdr *unlh, bool net_admin);
+ struct sk_buff *skb, struct netlink_callback *cb,
+ const struct inet_diag_req_v2 *req,
+ u16 nlmsg_flags, bool net_admin);
void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb,
struct netlink_callback *cb,
- const struct inet_diag_req_v2 *r,
- struct nlattr *bc);
+ const struct inet_diag_req_v2 *r);
int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
- struct sk_buff *in_skb, const struct nlmsghdr *nlh,
+ struct netlink_callback *cb,
const struct inet_diag_req_v2 *req);
struct sock *inet_diag_find_one_icsk(struct net *net,
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 0d9db2a14f44..9b7a8d74a9d6 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -257,6 +257,13 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset);
#define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0)
+#ifndef CONFIG_PREEMPT_RT
+# define cant_migrate() cant_sleep()
+#else
+ /* Placeholder for now */
+# define cant_migrate() do { } while (0)
+#endif
+
/**
* abs - return absolute value of an argument
* @x: the value. If it is unsigned type, it is converted to signed type first.
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 205fa7b1f07a..788969ccbbde 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -188,10 +188,10 @@ struct netlink_callback {
struct module *module;
struct netlink_ext_ack *extack;
u16 family;
- u16 min_dump_alloc;
- bool strict_check;
u16 answer_flags;
+ u32 min_dump_alloc;
unsigned int prev_seq, seq;
+ bool strict_check;
union {
u8 ctx[48];
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index bbb68dba37cc..bc3f1aecaa19 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -322,4 +322,34 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
#endif
+/**
+ * migrate_disable - Prevent migration of the current task
+ *
+ * Maps to preempt_disable() which also disables preemption. Use
+ * migrate_disable() to annotate that the intent is to prevent migration,
+ * but not necessarily preemption.
+ *
+ * Can be invoked nested like preempt_disable() and needs the corresponding
+ * number of migrate_enable() invocations.
+ */
+static __always_inline void migrate_disable(void)
+{
+ preempt_disable();
+}
+
+/**
+ * migrate_enable - Allow migration of the current task
+ *
+ * Counterpart to migrate_disable().
+ *
+ * As migrate_disable() can be invoked nested, only the outermost invocation
+ * reenables migration.
+ *
+ * Currently mapped to preempt_enable().
+ */
+static __always_inline void migrate_enable(void)
+{
+ preempt_enable();
+}
+
#endif /* __LINUX_PREEMPT_H */
diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
index 8e4f831d2e52..5036c94c0503 100644
--- a/include/net/bpf_sk_storage.h
+++ b/include/net/bpf_sk_storage.h
@@ -10,14 +10,41 @@ void bpf_sk_storage_free(struct sock *sk);
extern const struct bpf_func_proto bpf_sk_storage_get_proto;
extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
+struct bpf_sk_storage_diag;
+struct sk_buff;
+struct nlattr;
+struct sock;
+
#ifdef CONFIG_BPF_SYSCALL
int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
+struct bpf_sk_storage_diag *
+bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs);
+void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag);
+int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
+ struct sock *sk, struct sk_buff *skb,
+ int stg_array_type,
+ unsigned int *res_diag_size);
#else
static inline int bpf_sk_storage_clone(const struct sock *sk,
struct sock *newsk)
{
return 0;
}
+static inline struct bpf_sk_storage_diag *
+bpf_sk_storage_diag_alloc(const struct nlattr *nla)
+{
+ return NULL;
+}
+static inline void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag)
+{
+}
+static inline int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
+ struct sock *sk, struct sk_buff *skb,
+ int stg_array_type,
+ unsigned int *res_diag_size)
+{
+ return 0;
+}
#endif
#endif /* _BPF_SK_STORAGE_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 906e9f2752db..8e98ced0963b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -73,7 +73,7 @@ struct bpf_insn {
/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
struct bpf_lpm_trie_key {
__u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
- __u8 data[0]; /* Arbitrary size */
+ __u8 data[]; /* Arbitrary size */
};
struct bpf_cgroup_storage_key {
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index a1ff345b3f33..75dffd78363a 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -64,9 +64,11 @@ struct inet_diag_req_raw {
enum {
INET_DIAG_REQ_NONE,
INET_DIAG_REQ_BYTECODE,
+ INET_DIAG_REQ_SK_BPF_STORAGES,
+ __INET_DIAG_REQ_MAX,
};
-#define INET_DIAG_REQ_MAX INET_DIAG_REQ_BYTECODE
+#define INET_DIAG_REQ_MAX (__INET_DIAG_REQ_MAX - 1)
/* Bytecode is sequence of 4 byte commands followed by variable arguments.
* All the commands identified by "code" are conditional jumps forward:
@@ -154,6 +156,7 @@ enum {
INET_DIAG_CLASS_ID, /* request as INET_DIAG_TCLASS */
INET_DIAG_MD5SIG,
INET_DIAG_ULP_INFO,
+ INET_DIAG_SK_BPF_STORAGES,
__INET_DIAG_MAX,
};
diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h
index e5925009a652..5f74a5f6091d 100644
--- a/include/uapi/linux/sock_diag.h
+++ b/include/uapi/linux/sock_diag.h
@@ -36,4 +36,30 @@ enum sknetlink_groups {
};
#define SKNLGRP_MAX (__SKNLGRP_MAX - 1)
+enum {
+ SK_DIAG_BPF_STORAGE_REQ_NONE,
+ SK_DIAG_BPF_STORAGE_REQ_MAP_FD,
+ __SK_DIAG_BPF_STORAGE_REQ_MAX,
+};
+
+#define SK_DIAG_BPF_STORAGE_REQ_MAX (__SK_DIAG_BPF_STORAGE_REQ_MAX - 1)
+
+enum {
+ SK_DIAG_BPF_STORAGE_REP_NONE,
+ SK_DIAG_BPF_STORAGE,
+ __SK_DIAG_BPF_STORAGE_REP_MAX,
+};
+
+#define SK_DIAB_BPF_STORAGE_REP_MAX (__SK_DIAG_BPF_STORAGE_REP_MAX - 1)
+
+enum {
+ SK_DIAG_BPF_STORAGE_NONE,
+ SK_DIAG_BPF_STORAGE_PAD,
+ SK_DIAG_BPF_STORAGE_MAP_ID,
+ SK_DIAG_BPF_STORAGE_MAP_VALUE,
+ __SK_DIAG_BPF_STORAGE_MAX,
+};
+
+#define SK_DIAG_BPF_STORAGE_MAX (__SK_DIAG_BPF_STORAGE_MAX - 1)
+
#endif /* _UAPI__SOCK_DIAG_H__ */