summaryrefslogtreecommitdiff
path: root/include/net
diff options
context:
space:
mode:
Diffstat (limited to 'include/net')
-rw-r--r--include/net/bpf_sk_storage.h14
-rw-r--r--include/net/cfg80211.h3
-rw-r--r--include/net/dst.h2
-rw-r--r--include/net/inet_connection_sock.h2
-rw-r--r--include/net/inet_sock.h7
-rw-r--r--include/net/ipv6_stubs.h3
-rw-r--r--include/net/mac80211.h35
-rw-r--r--include/net/netlink.h58
-rw-r--r--include/net/request_sock.h9
-rw-r--r--include/net/sock.h6
-rw-r--r--include/net/tcp.h69
-rw-r--r--include/net/xdp_sock.h30
-rw-r--r--include/net/xdp_sock_drv.h122
-rw-r--r--include/net/xsk_buff_pool.h53
14 files changed, 259 insertions, 154 deletions
diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
index 5036c94c0503..119f4c9c3a9c 100644
--- a/include/net/bpf_sk_storage.h
+++ b/include/net/bpf_sk_storage.h
@@ -3,13 +3,27 @@
#ifndef _BPF_SK_STORAGE_H
#define _BPF_SK_STORAGE_H
+#include <linux/rculist.h>
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/bpf.h>
+#include <net/sock.h>
+#include <uapi/linux/sock_diag.h>
+#include <uapi/linux/btf.h>
+#include <linux/bpf_local_storage.h>
+
struct sock;
void bpf_sk_storage_free(struct sock *sk);
extern const struct bpf_func_proto bpf_sk_storage_get_proto;
extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
+extern const struct bpf_func_proto sk_storage_get_btf_proto;
+extern const struct bpf_func_proto sk_storage_delete_btf_proto;
+struct bpf_local_storage_elem;
struct bpf_sk_storage_diag;
struct sk_buff;
struct nlattr;
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index d9e6b9fbd95b..c9bce9bba511 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -678,7 +678,10 @@ struct cfg80211_bitrate_mask {
u32 legacy;
u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN];
u16 vht_mcs[NL80211_VHT_NSS_MAX];
+ u16 he_mcs[NL80211_HE_NSS_MAX];
enum nl80211_txrate_gi gi;
+ enum nl80211_he_gi he_gi;
+ enum nl80211_he_ltf he_ltf;
} control[NUM_NL80211_BANDS];
};
diff --git a/include/net/dst.h b/include/net/dst.h
index 6ae2e625050d..8ea8812b0b41 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -214,7 +214,7 @@ dst_allfrag(const struct dst_entry *dst)
static inline int
dst_metric_locked(const struct dst_entry *dst, int metric)
{
- return dst_metric(dst, RTAX_LOCK) & (1<<metric);
+ return dst_metric(dst, RTAX_LOCK) & (1 << metric);
}
static inline void dst_hold(struct dst_entry *dst)
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index aa8893c68c50..c738abeb3265 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -86,6 +86,8 @@ struct inet_connection_sock {
struct timer_list icsk_retransmit_timer;
struct timer_list icsk_delack_timer;
__u32 icsk_rto;
+ __u32 icsk_rto_min;
+ __u32 icsk_delack_max;
__u32 icsk_pmtu_cookie;
const struct tcp_congestion_ops *icsk_ca_ops;
const struct inet_connection_sock_af_ops *icsk_af_ops;
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index a3702d1d4875..89163ef8cf4b 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -296,13 +296,6 @@ static inline void __inet_sk_copy_descendant(struct sock *sk_to,
memcpy(inet_sk(sk_to) + 1, inet_sk(sk_from) + 1,
sk_from->sk_prot->obj_size - ancestor_size);
}
-#if !(IS_ENABLED(CONFIG_IPV6))
-static inline void inet_sk_copy_descendant(struct sock *sk_to,
- const struct sock *sk_from)
-{
- __inet_sk_copy_descendant(sk_to, sk_from, sizeof(struct inet_sock));
-}
-#endif
int inet_sk_rebuild_header(struct sock *sk);
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index d7a7f7c81e7b..8fce558b5fea 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -63,6 +63,9 @@ struct ipv6_stub {
int encap_type);
#endif
struct neigh_table *nd_tbl;
+
+ int (*ipv6_fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct net *, struct sock *, struct sk_buff *));
};
extern const struct ipv6_stub *ipv6_stub __read_mostly;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 66e2bfd165e8..ec148b3e9c41 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3736,7 +3736,7 @@ enum ieee80211_reconfig_type {
* decremented, and when they reach 1 the driver must call
* ieee80211_csa_finish(). Drivers which use ieee80211_beacon_get()
* get the csa counter decremented by mac80211, but must check if it is
- * 1 using ieee80211_csa_is_complete() after the beacon has been
+ * 1 using ieee80211_beacon_counter_is_complete() after the beacon has been
* transmitted and then call ieee80211_csa_finish().
* If the CSA count starts as zero or 1, this function will not be called,
* since there won't be any time to beacon before the switch anyway.
@@ -4763,21 +4763,21 @@ void ieee80211_tx_status_8023(struct ieee80211_hw *hw,
*/
void ieee80211_report_low_ack(struct ieee80211_sta *sta, u32 num_packets);
-#define IEEE80211_MAX_CSA_COUNTERS_NUM 2
+#define IEEE80211_MAX_CNTDWN_COUNTERS_NUM 2
/**
* struct ieee80211_mutable_offsets - mutable beacon offsets
* @tim_offset: position of TIM element
* @tim_length: size of TIM element
- * @csa_counter_offs: array of IEEE80211_MAX_CSA_COUNTERS_NUM offsets
- * to CSA counters. This array can contain zero values which
+ * @cntdwn_counter_offs: array of IEEE80211_MAX_CNTDWN_COUNTERS_NUM offsets
+ * to countdown counters. This array can contain zero values which
* should be ignored.
*/
struct ieee80211_mutable_offsets {
u16 tim_offset;
u16 tim_length;
- u16 csa_counter_offs[IEEE80211_MAX_CSA_COUNTERS_NUM];
+ u16 cntdwn_counter_offs[IEEE80211_MAX_CNTDWN_COUNTERS_NUM];
};
/**
@@ -4846,31 +4846,31 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
}
/**
- * ieee80211_csa_update_counter - request mac80211 to decrement the csa counter
+ * ieee80211_beacon_update_cntdwn - request mac80211 to decrement the beacon countdown
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
*
- * The csa counter should be updated after each beacon transmission.
+ * The beacon counter should be updated after each beacon transmission.
* This function is called implicitly when
* ieee80211_beacon_get/ieee80211_beacon_get_tim are called, however if the
* beacon frames are generated by the device, the driver should call this
- * function after each beacon transmission to sync mac80211's csa counters.
+ * function after each beacon transmission to sync mac80211's beacon countdown.
*
- * Return: new csa counter value
+ * Return: new countdown value
*/
-u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif);
+u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif);
/**
- * ieee80211_csa_set_counter - request mac80211 to set csa counter
+ * ieee80211_beacon_set_cntdwn - request mac80211 to set beacon countdown
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
* @counter: the new value for the counter
*
- * The csa counter can be changed by the device, this API should be
+ * The beacon countdown can be changed by the device, this API should be
* used by the device driver to update csa counter in mac80211.
*
- * It should never be used together with ieee80211_csa_update_counter(),
+ * It should never be used together with ieee80211_beacon_update_cntdwn(),
* as it will cause a race condition around the counter value.
*/
-void ieee80211_csa_set_counter(struct ieee80211_vif *vif, u8 counter);
+void ieee80211_beacon_set_cntdwn(struct ieee80211_vif *vif, u8 counter);
/**
* ieee80211_csa_finish - notify mac80211 about channel switch
@@ -4883,13 +4883,12 @@ void ieee80211_csa_set_counter(struct ieee80211_vif *vif, u8 counter);
void ieee80211_csa_finish(struct ieee80211_vif *vif);
/**
- * ieee80211_csa_is_complete - find out if counters reached 1
+ * ieee80211_beacon_cntdwn_is_complete - find out if countdown reached 1
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
*
- * This function returns whether the channel switch counters reached zero.
+ * This function returns whether the countdown reached zero.
*/
-bool ieee80211_csa_is_complete(struct ieee80211_vif *vif);
-
+bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif);
/**
* ieee80211_proberesp_get - retrieve a Probe Response template
diff --git a/include/net/netlink.h b/include/net/netlink.h
index c0411f14fb53..fdd317f8fde4 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -181,8 +181,6 @@ enum {
NLA_S64,
NLA_BITFIELD32,
NLA_REJECT,
- NLA_EXACT_LEN,
- NLA_MIN_LEN,
__NLA_TYPE_MAX,
};
@@ -199,11 +197,11 @@ struct netlink_range_validation_signed {
enum nla_policy_validation {
NLA_VALIDATE_NONE,
NLA_VALIDATE_RANGE,
+ NLA_VALIDATE_RANGE_WARN_TOO_LONG,
NLA_VALIDATE_MIN,
NLA_VALIDATE_MAX,
NLA_VALIDATE_RANGE_PTR,
NLA_VALIDATE_FUNCTION,
- NLA_VALIDATE_WARN_TOO_LONG,
};
/**
@@ -222,7 +220,7 @@ enum nla_policy_validation {
* NLA_NUL_STRING Maximum length of string (excluding NUL)
* NLA_FLAG Unused
* NLA_BINARY Maximum length of attribute payload
- * NLA_MIN_LEN Minimum length of attribute payload
+ * (but see also below with the validation type)
* NLA_NESTED,
* NLA_NESTED_ARRAY Length verification is done by checking len of
* nested header (or empty); len field is used if
@@ -237,11 +235,6 @@ enum nla_policy_validation {
* just like "All other"
* NLA_BITFIELD32 Unused
* NLA_REJECT Unused
- * NLA_EXACT_LEN Attribute should have exactly this length, otherwise
- * it is rejected or warned about, the latter happening
- * if and only if the `validation_type' is set to
- * NLA_VALIDATE_WARN_TOO_LONG.
- * NLA_MIN_LEN Minimum length of attribute payload
* All other Minimum length of attribute payload
*
* Meaning of validation union:
@@ -296,6 +289,11 @@ enum nla_policy_validation {
* pointer to a struct netlink_range_validation_signed
* that indicates the min/max values.
* Use NLA_POLICY_FULL_RANGE_SIGNED().
+ *
+ * NLA_BINARY If the validation type is like the ones for integers
+ * above, then the min/max length (not value like for
+ * integers) of the attribute is enforced.
+ *
* All other Unused - but note that it's a union
*
* Meaning of `validate' field, use via NLA_POLICY_VALIDATE_FN:
@@ -309,7 +307,7 @@ enum nla_policy_validation {
* static const struct nla_policy my_policy[ATTR_MAX+1] = {
* [ATTR_FOO] = { .type = NLA_U16 },
* [ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ },
- * [ATTR_BAZ] = { .type = NLA_EXACT_LEN, .len = sizeof(struct mystruct) },
+ * [ATTR_BAZ] = NLA_POLICY_EXACT_LEN(sizeof(struct mystruct)),
* [ATTR_GOO] = NLA_POLICY_BITFIELD32(myvalidflags),
* };
*/
@@ -335,9 +333,10 @@ struct nla_policy {
* nesting validation starts here.
*
* Additionally, it means that NLA_UNSPEC is actually NLA_REJECT
- * for any types >= this, so need to use NLA_MIN_LEN to get the
- * previous pure { .len = xyz } behaviour. The advantage of this
- * is that types not specified in the policy will be rejected.
+ * for any types >= this, so need to use NLA_POLICY_MIN_LEN() to
+ * get the previous pure { .len = xyz } behaviour. The advantage
+ * of this is that types not specified in the policy will be
+ * rejected.
*
* For completely new families it should be set to 1 so that the
* validation is enforced for all attributes. For existing ones
@@ -349,12 +348,6 @@ struct nla_policy {
};
};
-#define NLA_POLICY_EXACT_LEN(_len) { .type = NLA_EXACT_LEN, .len = _len }
-#define NLA_POLICY_EXACT_LEN_WARN(_len) \
- { .type = NLA_EXACT_LEN, .len = _len, \
- .validation_type = NLA_VALIDATE_WARN_TOO_LONG, }
-#define NLA_POLICY_MIN_LEN(_len) { .type = NLA_MIN_LEN, .len = _len }
-
#define NLA_POLICY_ETH_ADDR NLA_POLICY_EXACT_LEN(ETH_ALEN)
#define NLA_POLICY_ETH_ADDR_COMPAT NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN)
@@ -370,19 +363,21 @@ struct nla_policy {
{ .type = NLA_BITFIELD32, .bitfield32_valid = valid }
#define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition))
-#define NLA_ENSURE_UINT_TYPE(tp) \
+#define NLA_ENSURE_UINT_OR_BINARY_TYPE(tp) \
(__NLA_ENSURE(tp == NLA_U8 || tp == NLA_U16 || \
tp == NLA_U32 || tp == NLA_U64 || \
- tp == NLA_MSECS) + tp)
+ tp == NLA_MSECS || \
+ tp == NLA_BINARY) + tp)
#define NLA_ENSURE_SINT_TYPE(tp) \
(__NLA_ENSURE(tp == NLA_S8 || tp == NLA_S16 || \
tp == NLA_S32 || tp == NLA_S64) + tp)
-#define NLA_ENSURE_INT_TYPE(tp) \
+#define NLA_ENSURE_INT_OR_BINARY_TYPE(tp) \
(__NLA_ENSURE(tp == NLA_S8 || tp == NLA_U8 || \
tp == NLA_S16 || tp == NLA_U16 || \
tp == NLA_S32 || tp == NLA_U32 || \
tp == NLA_S64 || tp == NLA_U64 || \
- tp == NLA_MSECS) + tp)
+ tp == NLA_MSECS || \
+ tp == NLA_BINARY) + tp)
#define NLA_ENSURE_NO_VALIDATION_PTR(tp) \
(__NLA_ENSURE(tp != NLA_BITFIELD32 && \
tp != NLA_REJECT && \
@@ -390,14 +385,14 @@ struct nla_policy {
tp != NLA_NESTED_ARRAY) + tp)
#define NLA_POLICY_RANGE(tp, _min, _max) { \
- .type = NLA_ENSURE_INT_TYPE(tp), \
+ .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \
.validation_type = NLA_VALIDATE_RANGE, \
.min = _min, \
.max = _max \
}
#define NLA_POLICY_FULL_RANGE(tp, _range) { \
- .type = NLA_ENSURE_UINT_TYPE(tp), \
+ .type = NLA_ENSURE_UINT_OR_BINARY_TYPE(tp), \
.validation_type = NLA_VALIDATE_RANGE_PTR, \
.range = _range, \
}
@@ -409,13 +404,13 @@ struct nla_policy {
}
#define NLA_POLICY_MIN(tp, _min) { \
- .type = NLA_ENSURE_INT_TYPE(tp), \
+ .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \
.validation_type = NLA_VALIDATE_MIN, \
.min = _min, \
}
#define NLA_POLICY_MAX(tp, _max) { \
- .type = NLA_ENSURE_INT_TYPE(tp), \
+ .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \
.validation_type = NLA_VALIDATE_MAX, \
.max = _max, \
}
@@ -427,6 +422,15 @@ struct nla_policy {
.len = __VA_ARGS__ + 0, \
}
+#define NLA_POLICY_EXACT_LEN(_len) NLA_POLICY_RANGE(NLA_BINARY, _len, _len)
+#define NLA_POLICY_EXACT_LEN_WARN(_len) { \
+ .type = NLA_BINARY, \
+ .validation_type = NLA_VALIDATE_RANGE_WARN_TOO_LONG, \
+ .min = _len, \
+ .max = _len \
+}
+#define NLA_POLICY_MIN_LEN(_len) NLA_POLICY_MIN(NLA_BINARY, _len)
+
/**
* struct nl_info - netlink source information
* @nlh: Netlink message header of original request
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index b2eb8b4ba697..29e41ff3ec93 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -41,6 +41,13 @@ struct request_sock_ops {
int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req);
+struct saved_syn {
+ u32 mac_hdrlen;
+ u32 network_hdrlen;
+ u32 tcp_hdrlen;
+ u8 data[];
+};
+
/* struct request_sock - mini sock to represent a connection request
*/
struct request_sock {
@@ -60,7 +67,7 @@ struct request_sock {
struct timer_list rsk_timer;
const struct request_sock_ops *rsk_ops;
struct sock *sk;
- u32 *saved_syn;
+ struct saved_syn *saved_syn;
u32 secid;
u32 peer_secid;
};
diff --git a/include/net/sock.h b/include/net/sock.h
index 064637d1ddf6..7dd3051551fb 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -246,7 +246,7 @@ struct sock_common {
/* public: */
};
-struct bpf_sk_storage;
+struct bpf_local_storage;
/**
* struct sock - network layer representation of sockets
@@ -517,7 +517,7 @@ struct sock {
void (*sk_destruct)(struct sock *sk);
struct sock_reuseport __rcu *sk_reuseport_cb;
#ifdef CONFIG_BPF_SYSCALL
- struct bpf_sk_storage __rcu *sk_bpf_storage;
+ struct bpf_local_storage __rcu *sk_bpf_storage;
#endif
struct rcu_head sk_rcu;
};
@@ -1478,7 +1478,7 @@ sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
{
if (!sk_has_account(sk))
return true;
- return size<= sk->sk_forward_alloc ||
+ return size <= sk->sk_forward_alloc ||
__sk_mem_schedule(sk, size, SK_MEM_RECV) ||
skb_pfmemalloc(skb);
}
diff --git a/include/net/tcp.h b/include/net/tcp.h
index eab6c7510b5b..e85d564446c6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -394,7 +394,7 @@ void tcp_metrics_init(void);
bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst);
void tcp_close(struct sock *sk, long timeout);
void tcp_init_sock(struct sock *sk);
-void tcp_init_transfer(struct sock *sk, int bpf_op);
+void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb);
__poll_t tcp_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait);
int tcp_getsockopt(struct sock *sk, int level, int optname,
@@ -455,7 +455,8 @@ enum tcp_synack_type {
struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
struct tcp_fastopen_cookie *foc,
- enum tcp_synack_type synack_type);
+ enum tcp_synack_type synack_type,
+ struct sk_buff *syn_skb);
int tcp_disconnect(struct sock *sk, int flags);
void tcp_finish_connect(struct sock *sk, struct sk_buff *skb);
@@ -699,7 +700,7 @@ static inline void tcp_fast_path_check(struct sock *sk)
static inline u32 tcp_rto_min(struct sock *sk)
{
const struct dst_entry *dst = __sk_dst_get(sk);
- u32 rto_min = TCP_RTO_MIN;
+ u32 rto_min = inet_csk(sk)->icsk_rto_min;
if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
@@ -941,16 +942,6 @@ INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb));
#endif
-static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
-{
-#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
- if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
- skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
- return true;
-#endif
- return false;
-}
-
/* TCP_SKB_CB reference means this can not be used from early demux */
static inline int tcp_v4_sdif(struct sk_buff *skb)
{
@@ -2035,7 +2026,8 @@ struct tcp_request_sock_ops {
int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
struct flowi *fl, struct request_sock *req,
struct tcp_fastopen_cookie *foc,
- enum tcp_synack_type synack_type);
+ enum tcp_synack_type synack_type,
+ struct sk_buff *syn_skb);
};
extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops;
@@ -2233,6 +2225,55 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
struct msghdr *msg, int len, int flags);
#endif /* CONFIG_NET_SOCK_MSG */
+#ifdef CONFIG_CGROUP_BPF
+/* Copy the listen sk's HDR_OPT_CB flags to its child.
+ *
+ * During 3-Way-HandShake, the synack is usually sent from
+ * the listen sk with the HDR_OPT_CB flags set so that
+ * bpf-prog will be called to write the BPF hdr option.
+ *
+ * In fastopen, the child sk is used to send synack instead
+ * of the listen sk. Thus, inheriting the HDR_OPT_CB flags
+ * from the listen sk gives the bpf-prog a chance to write
+ * BPF hdr option in the synack pkt during fastopen.
+ *
+ * Both fastopen and non-fastopen child will inherit the
+ * HDR_OPT_CB flags to keep the bpf-prog having a consistent
+ * behavior when deciding to clear this cb flags (or not)
+ * during the PASSIVE_ESTABLISHED_CB.
+ *
+ * In the future, other cb flags could be inherited here also.
+ */
+static inline void bpf_skops_init_child(const struct sock *sk,
+ struct sock *child)
+{
+ tcp_sk(child)->bpf_sock_ops_cb_flags =
+ tcp_sk(sk)->bpf_sock_ops_cb_flags &
+ (BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG |
+ BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
+}
+
+static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
+ struct sk_buff *skb,
+ unsigned int end_offset)
+{
+ skops->skb = skb;
+ skops->skb_data_end = skb->data + end_offset;
+}
+#else
+static inline void bpf_skops_init_child(const struct sock *sk,
+ struct sock *child)
+{
+}
+
+static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
+ struct sk_buff *skb,
+ unsigned int end_offset)
+{
+}
+#endif
+
/* Call BPF_SOCK_OPS program that returns an int. If the return value
* is < 0, then the BPF op failed (for example if the loaded BPF
* program does not support the chosen operation or there is no BPF
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index c9d87cc40c11..1a9559c0cbdd 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -18,25 +18,19 @@ struct xsk_queue;
struct xdp_buff;
struct xdp_umem {
- struct xsk_queue *fq;
- struct xsk_queue *cq;
- struct xsk_buff_pool *pool;
+ void *addrs;
u64 size;
u32 headroom;
u32 chunk_size;
+ u32 chunks;
+ u32 npgs;
struct user_struct *user;
refcount_t users;
- struct work_struct work;
- struct page **pgs;
- u32 npgs;
- u16 queue_id;
- u8 need_wakeup;
u8 flags;
- int id;
- struct net_device *dev;
bool zc;
- spinlock_t xsk_tx_list_lock;
- struct list_head xsk_tx_list;
+ struct page **pgs;
+ int id;
+ struct list_head xsk_dma_list;
};
struct xsk_map {
@@ -48,10 +42,11 @@ struct xsk_map {
struct xdp_sock {
/* struct sock must be the first member of struct xdp_sock */
struct sock sk;
- struct xsk_queue *rx;
+ struct xsk_queue *rx ____cacheline_aligned_in_smp;
struct net_device *dev;
struct xdp_umem *umem;
struct list_head flush_node;
+ struct xsk_buff_pool *pool;
u16 queue_id;
bool zc;
enum {
@@ -59,10 +54,9 @@ struct xdp_sock {
XSK_BOUND,
XSK_UNBOUND,
} state;
- /* Protects multiple processes in the control path */
- struct mutex mutex;
+
struct xsk_queue *tx ____cacheline_aligned_in_smp;
- struct list_head list;
+ struct list_head tx_list;
/* Mutual exclusion of NAPI TX thread and sendmsg error paths
* in the SKB destructor callback.
*/
@@ -77,6 +71,10 @@ struct xdp_sock {
struct list_head map_list;
/* Protects map_list */
spinlock_t map_list_lock;
+ /* Protects multiple processes in the control path */
+ struct mutex mutex;
+ struct xsk_queue *fq_tmp; /* Only as tmp storage before bind */
+ struct xsk_queue *cq_tmp; /* Only as tmp storage before bind */
};
#ifdef CONFIG_XDP_SOCKETS
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index ccf848f7efa4..5b1ee8a9976d 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -11,47 +11,50 @@
#ifdef CONFIG_XDP_SOCKETS
-void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries);
-bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc);
-void xsk_umem_consume_tx_done(struct xdp_umem *umem);
-struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, u16 queue_id);
-void xsk_set_rx_need_wakeup(struct xdp_umem *umem);
-void xsk_set_tx_need_wakeup(struct xdp_umem *umem);
-void xsk_clear_rx_need_wakeup(struct xdp_umem *umem);
-void xsk_clear_tx_need_wakeup(struct xdp_umem *umem);
-bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem);
+void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries);
+bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc);
+void xsk_tx_release(struct xsk_buff_pool *pool);
+struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev,
+ u16 queue_id);
+void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool);
+void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool);
+void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool);
+void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool);
+bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool);
-static inline u32 xsk_umem_get_headroom(struct xdp_umem *umem)
+static inline u32 xsk_pool_get_headroom(struct xsk_buff_pool *pool)
{
- return XDP_PACKET_HEADROOM + umem->headroom;
+ return XDP_PACKET_HEADROOM + pool->headroom;
}
-static inline u32 xsk_umem_get_chunk_size(struct xdp_umem *umem)
+static inline u32 xsk_pool_get_chunk_size(struct xsk_buff_pool *pool)
{
- return umem->chunk_size;
+ return pool->chunk_size;
}
-static inline u32 xsk_umem_get_rx_frame_size(struct xdp_umem *umem)
+static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
{
- return xsk_umem_get_chunk_size(umem) - xsk_umem_get_headroom(umem);
+ return xsk_pool_get_chunk_size(pool) - xsk_pool_get_headroom(pool);
}
-static inline void xsk_buff_set_rxq_info(struct xdp_umem *umem,
+static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
struct xdp_rxq_info *rxq)
{
- xp_set_rxq_info(umem->pool, rxq);
+ xp_set_rxq_info(pool, rxq);
}
-static inline void xsk_buff_dma_unmap(struct xdp_umem *umem,
+static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool,
unsigned long attrs)
{
- xp_dma_unmap(umem->pool, attrs);
+ xp_dma_unmap(pool, attrs);
}
-static inline int xsk_buff_dma_map(struct xdp_umem *umem, struct device *dev,
- unsigned long attrs)
+static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool,
+ struct device *dev, unsigned long attrs)
{
- return xp_dma_map(umem->pool, dev, attrs, umem->pgs, umem->npgs);
+ struct xdp_umem *umem = pool->umem;
+
+ return xp_dma_map(pool, dev, attrs, umem->pgs, umem->npgs);
}
static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp)
@@ -68,14 +71,14 @@ static inline dma_addr_t xsk_buff_xdp_get_frame_dma(struct xdp_buff *xdp)
return xp_get_frame_dma(xskb);
}
-static inline struct xdp_buff *xsk_buff_alloc(struct xdp_umem *umem)
+static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
{
- return xp_alloc(umem->pool);
+ return xp_alloc(pool);
}
-static inline bool xsk_buff_can_alloc(struct xdp_umem *umem, u32 count)
+static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count)
{
- return xp_can_alloc(umem->pool, count);
+ return xp_can_alloc(pool, count);
}
static inline void xsk_buff_free(struct xdp_buff *xdp)
@@ -85,100 +88,104 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
xp_free(xskb);
}
-static inline dma_addr_t xsk_buff_raw_get_dma(struct xdp_umem *umem, u64 addr)
+static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool,
+ u64 addr)
{
- return xp_raw_get_dma(umem->pool, addr);
+ return xp_raw_get_dma(pool, addr);
}
-static inline void *xsk_buff_raw_get_data(struct xdp_umem *umem, u64 addr)
+static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
{
- return xp_raw_get_data(umem->pool, addr);
+ return xp_raw_get_data(pool, addr);
}
-static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
+static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
{
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
+ if (!pool->dma_need_sync)
+ return;
+
xp_dma_sync_for_cpu(xskb);
}
-static inline void xsk_buff_raw_dma_sync_for_device(struct xdp_umem *umem,
+static inline void xsk_buff_raw_dma_sync_for_device(struct xsk_buff_pool *pool,
dma_addr_t dma,
size_t size)
{
- xp_dma_sync_for_device(umem->pool, dma, size);
+ xp_dma_sync_for_device(pool, dma, size);
}
#else
-static inline void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
+static inline void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries)
{
}
-static inline bool xsk_umem_consume_tx(struct xdp_umem *umem,
- struct xdp_desc *desc)
+static inline bool xsk_tx_peek_desc(struct xsk_buff_pool *pool,
+ struct xdp_desc *desc)
{
return false;
}
-static inline void xsk_umem_consume_tx_done(struct xdp_umem *umem)
+static inline void xsk_tx_release(struct xsk_buff_pool *pool)
{
}
-static inline struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
- u16 queue_id)
+static inline struct xsk_buff_pool *
+xsk_get_pool_from_qid(struct net_device *dev, u16 queue_id)
{
return NULL;
}
-static inline void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
+static inline void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
{
}
-static inline void xsk_set_tx_need_wakeup(struct xdp_umem *umem)
+static inline void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool)
{
}
-static inline void xsk_clear_rx_need_wakeup(struct xdp_umem *umem)
+static inline void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool)
{
}
-static inline void xsk_clear_tx_need_wakeup(struct xdp_umem *umem)
+static inline void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool)
{
}
-static inline bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
+static inline bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool)
{
return false;
}
-static inline u32 xsk_umem_get_headroom(struct xdp_umem *umem)
+static inline u32 xsk_pool_get_headroom(struct xsk_buff_pool *pool)
{
return 0;
}
-static inline u32 xsk_umem_get_chunk_size(struct xdp_umem *umem)
+static inline u32 xsk_pool_get_chunk_size(struct xsk_buff_pool *pool)
{
return 0;
}
-static inline u32 xsk_umem_get_rx_frame_size(struct xdp_umem *umem)
+static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
{
return 0;
}
-static inline void xsk_buff_set_rxq_info(struct xdp_umem *umem,
+static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
struct xdp_rxq_info *rxq)
{
}
-static inline void xsk_buff_dma_unmap(struct xdp_umem *umem,
+static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool,
unsigned long attrs)
{
}
-static inline int xsk_buff_dma_map(struct xdp_umem *umem, struct device *dev,
- unsigned long attrs)
+static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool,
+ struct device *dev, unsigned long attrs)
{
return 0;
}
@@ -193,12 +200,12 @@ static inline dma_addr_t xsk_buff_xdp_get_frame_dma(struct xdp_buff *xdp)
return 0;
}
-static inline struct xdp_buff *xsk_buff_alloc(struct xdp_umem *umem)
+static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
{
return NULL;
}
-static inline bool xsk_buff_can_alloc(struct xdp_umem *umem, u32 count)
+static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count)
{
return false;
}
@@ -207,21 +214,22 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
{
}
-static inline dma_addr_t xsk_buff_raw_get_dma(struct xdp_umem *umem, u64 addr)
+static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool,
+ u64 addr)
{
return 0;
}
-static inline void *xsk_buff_raw_get_data(struct xdp_umem *umem, u64 addr)
+static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
{
return NULL;
}
-static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
+static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
{
}
-static inline void xsk_buff_raw_dma_sync_for_device(struct xdp_umem *umem,
+static inline void xsk_buff_raw_dma_sync_for_device(struct xsk_buff_pool *pool,
dma_addr_t dma,
size_t size)
{
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 6842990e2712..0140d086dc84 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -13,6 +13,8 @@ struct xsk_buff_pool;
struct xdp_rxq_info;
struct xsk_queue;
struct xdp_desc;
+struct xdp_umem;
+struct xdp_sock;
struct device;
struct page;
@@ -26,34 +28,68 @@ struct xdp_buff_xsk {
struct list_head free_list_node;
};
+struct xsk_dma_map {
+ dma_addr_t *dma_pages;
+ struct device *dev;
+ struct net_device *netdev;
+ refcount_t users;
+ struct list_head list; /* Protected by the RTNL_LOCK */
+ u32 dma_pages_cnt;
+ bool dma_need_sync;
+};
+
struct xsk_buff_pool {
- struct xsk_queue *fq;
+ /* Members only used in the control path first. */
+ struct device *dev;
+ struct net_device *netdev;
+ struct list_head xsk_tx_list;
+ /* Protects modifications to the xsk_tx_list */
+ spinlock_t xsk_tx_list_lock;
+ refcount_t users;
+ struct xdp_umem *umem;
+ struct work_struct work;
struct list_head free_list;
+ u32 heads_cnt;
+ u16 queue_id;
+
+ /* Data path members as close to free_heads at the end as possible. */
+ struct xsk_queue *fq ____cacheline_aligned_in_smp;
+ struct xsk_queue *cq;
+ /* For performance reasons, each buff pool has its own array of dma_pages
+ * even when they are identical.
+ */
dma_addr_t *dma_pages;
struct xdp_buff_xsk *heads;
u64 chunk_mask;
u64 addrs_cnt;
u32 free_list_cnt;
u32 dma_pages_cnt;
- u32 heads_cnt;
u32 free_heads_cnt;
u32 headroom;
u32 chunk_size;
u32 frame_len;
+ u8 cached_need_wakeup;
+ bool uses_need_wakeup;
bool dma_need_sync;
bool unaligned;
void *addrs;
- struct device *dev;
struct xdp_buff_xsk *free_heads[];
};
/* AF_XDP core. */
-struct xsk_buff_pool *xp_create(struct page **pages, u32 nr_pages, u32 chunks,
- u32 chunk_size, u32 headroom, u64 size,
- bool unaligned);
-void xp_set_fq(struct xsk_buff_pool *pool, struct xsk_queue *fq);
+struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
+ struct xdp_umem *umem);
+int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev,
+ u16 queue_id, u16 flags);
+int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
+ struct net_device *dev, u16 queue_id);
void xp_destroy(struct xsk_buff_pool *pool);
void xp_release(struct xdp_buff_xsk *xskb);
+void xp_get_pool(struct xsk_buff_pool *pool);
+void xp_put_pool(struct xsk_buff_pool *pool);
+void xp_clear_dev(struct xsk_buff_pool *pool);
+void xp_add_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs);
+void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs);
/* AF_XDP, and XDP core. */
void xp_free(struct xdp_buff_xsk *xskb);
@@ -80,9 +116,6 @@ static inline dma_addr_t xp_get_frame_dma(struct xdp_buff_xsk *xskb)
void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb);
static inline void xp_dma_sync_for_cpu(struct xdp_buff_xsk *xskb)
{
- if (!xskb->pool->dma_need_sync)
- return;
-
xp_dma_sync_for_cpu_slow(xskb);
}