summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c20
-rw-r--r--net/core/dev.h3
-rw-r--r--net/core/dev_ioctl.c47
-rw-r--r--net/core/devmem.c10
-rw-r--r--net/core/fib_rules.c2
-rw-r--r--net/core/filter.c46
-rw-r--r--net/core/netpoll.c10
-rw-r--r--net/core/page_pool.c137
-rw-r--r--net/core/pktgen.c7
-rw-r--r--net/core/rtnetlink.c108
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/core/sock.c26
-rw-r--r--net/core/timestamping.c52
-rw-r--r--net/core/xdp.c327
14 files changed, 563 insertions, 234 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index faa23042df38..e7223972b9aa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -460,7 +460,7 @@ EXPORT_PER_CPU_SYMBOL(softnet_data);
* PP consumers must pay attention to run APIs in the appropriate context
* (e.g. NAPI context).
*/
-static DEFINE_PER_CPU(struct page_pool *, system_page_pool);
+DEFINE_PER_CPU(struct page_pool *, system_page_pool);
#ifdef CONFIG_LOCKDEP
/*
@@ -4933,7 +4933,7 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
}
u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog)
+ const struct bpf_prog *xdp_prog)
{
void *orig_data, *orig_data_end, *hard_start;
struct netdev_rx_queue *rxqueue;
@@ -5035,7 +5035,7 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
}
static int
-netif_skb_check_for_xdp(struct sk_buff **pskb, struct bpf_prog *prog)
+netif_skb_check_for_xdp(struct sk_buff **pskb, const struct bpf_prog *prog)
{
struct sk_buff *skb = *pskb;
int err, hroom, troom;
@@ -5059,7 +5059,7 @@ netif_skb_check_for_xdp(struct sk_buff **pskb, struct bpf_prog *prog)
static u32 netif_receive_generic_xdp(struct sk_buff **pskb,
struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog)
+ const struct bpf_prog *xdp_prog)
{
struct sk_buff *skb = *pskb;
u32 mac_len, act = XDP_DROP;
@@ -5112,7 +5112,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff **pskb,
* and DDOS attacks will be more effective. In-driver-XDP use dedicated TX
* queues, so they do not have this starvation issue.
*/
-void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
+void generic_xdp_tx(struct sk_buff *skb, const struct bpf_prog *xdp_prog)
{
struct net_device *dev = skb->dev;
struct netdev_queue *txq;
@@ -5137,7 +5137,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
-int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb)
+int do_xdp_generic(const struct bpf_prog *xdp_prog, struct sk_buff **pskb)
{
struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
@@ -12154,11 +12154,18 @@ static int net_page_pool_create(int cpuid)
.nid = cpu_to_mem(cpuid),
};
struct page_pool *pp_ptr;
+ int err;
pp_ptr = page_pool_create_percpu(&page_pool_params, cpuid);
if (IS_ERR(pp_ptr))
return -ENOMEM;
+ err = xdp_reg_page_pool(pp_ptr);
+ if (err) {
+ page_pool_destroy(pp_ptr);
+ return err;
+ }
+
per_cpu(system_page_pool, cpuid) = pp_ptr;
#endif
return 0;
@@ -12292,6 +12299,7 @@ out:
if (!pp_ptr)
continue;
+ xdp_unreg_page_pool(pp_ptr);
page_pool_destroy(pp_ptr);
per_cpu(system_page_pool, i) = NULL;
}
diff --git a/net/core/dev.h b/net/core/dev.h
index d043dee25a68..aa91eed55a40 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -310,5 +310,8 @@ static inline void dev_xmit_recursion_dec(void)
int dev_set_hwtstamp_phylib(struct net_device *dev,
struct kernel_hwtstamp_config *cfg,
struct netlink_ext_ack *extack);
+int dev_get_hwtstamp_phylib(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg);
+int net_hwtstamp_validate(const struct kernel_hwtstamp_config *cfg);
#endif
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 46d43b950471..087a57b7e4fa 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -6,6 +6,7 @@
#include <linux/rtnetlink.h>
#include <linux/net_tstamp.h>
#include <linux/phylib_stubs.h>
+#include <linux/ptp_clock_kernel.h>
#include <linux/wireless.h>
#include <linux/if_bridge.h>
#include <net/dsa_stubs.h>
@@ -184,7 +185,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm
return err;
}
-static int net_hwtstamp_validate(const struct kernel_hwtstamp_config *cfg)
+int net_hwtstamp_validate(const struct kernel_hwtstamp_config *cfg)
{
enum hwtstamp_tx_types tx_type;
enum hwtstamp_rx_filters rx_filter;
@@ -266,9 +267,24 @@ static int dev_eth_ioctl(struct net_device *dev,
* -EOPNOTSUPP for phylib for now, which is still more accurate than letting
* the netdev handle the GET request.
*/
-static int dev_get_hwtstamp_phylib(struct net_device *dev,
- struct kernel_hwtstamp_config *cfg)
+int dev_get_hwtstamp_phylib(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg)
{
+ struct hwtstamp_provider *hwprov;
+
+ hwprov = rtnl_dereference(dev->hwprov);
+ if (hwprov) {
+ cfg->qualifier = hwprov->desc.qualifier;
+ if (hwprov->source == HWTSTAMP_SOURCE_PHYLIB &&
+ hwprov->phydev)
+ return phy_hwtstamp_get(hwprov->phydev, cfg);
+
+ if (hwprov->source == HWTSTAMP_SOURCE_NETDEV)
+ return dev->netdev_ops->ndo_hwtstamp_get(dev, cfg);
+
+ return -EOPNOTSUPP;
+ }
+
if (phy_is_default_hwtstamp(dev->phydev))
return phy_hwtstamp_get(dev->phydev, cfg);
@@ -324,11 +340,32 @@ int dev_set_hwtstamp_phylib(struct net_device *dev,
struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
- bool phy_ts = phy_is_default_hwtstamp(dev->phydev);
struct kernel_hwtstamp_config old_cfg = {};
+ struct hwtstamp_provider *hwprov;
+ struct phy_device *phydev;
bool changed = false;
+ bool phy_ts;
int err;
+ hwprov = rtnl_dereference(dev->hwprov);
+ if (hwprov) {
+ if (hwprov->source == HWTSTAMP_SOURCE_PHYLIB &&
+ hwprov->phydev) {
+ phy_ts = true;
+ phydev = hwprov->phydev;
+ } else if (hwprov->source == HWTSTAMP_SOURCE_NETDEV) {
+ phy_ts = false;
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ cfg->qualifier = hwprov->desc.qualifier;
+ } else {
+ phy_ts = phy_is_default_hwtstamp(dev->phydev);
+ if (phy_ts)
+ phydev = dev->phydev;
+ }
+
cfg->source = phy_ts ? HWTSTAMP_SOURCE_PHYLIB : HWTSTAMP_SOURCE_NETDEV;
if (phy_ts && dev->see_all_hwtstamp_requests) {
@@ -350,7 +387,7 @@ int dev_set_hwtstamp_phylib(struct net_device *dev,
changed = kernel_hwtstamp_config_changed(&old_cfg, cfg);
if (phy_ts) {
- err = phy_hwtstamp_set(dev->phydev, cfg, extack);
+ err = phy_hwtstamp_set(phydev, cfg, extack);
if (err) {
if (changed)
ops->ndo_hwtstamp_set(dev, &old_cfg, NULL);
diff --git a/net/core/devmem.c b/net/core/devmem.c
index 11b91c12ee11..0b6ed7525b22 100644
--- a/net/core/devmem.c
+++ b/net/core/devmem.c
@@ -331,11 +331,11 @@ int mp_dmabuf_devmem_init(struct page_pool *pool)
if (!binding)
return -EINVAL;
- if (!pool->dma_map)
- return -EOPNOTSUPP;
-
- if (pool->dma_sync)
- return -EOPNOTSUPP;
+ /* dma-buf dma addresses do not need and should not be used with
+ * dma_sync_for_cpu/device. Force disable dma_sync.
+ */
+ pool->dma_sync = false;
+ pool->dma_sync_for_cpu = false;
if (pool->p.order != 0)
return -E2BIG;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 34185d138c95..e684ba3ebb38 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -770,6 +770,8 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = {
[FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) },
[FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) },
[FRA_DSCP] = NLA_POLICY_MAX(NLA_U8, INET_DSCP_MASK >> 2),
+ [FRA_FLOWLABEL] = { .type = NLA_BE32 },
+ [FRA_FLOWLABEL_MASK] = { .type = NLA_BE32 },
};
int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
diff --git a/net/core/filter.c b/net/core/filter.c
index 834614071727..b957cf57299e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4128,13 +4128,13 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
}
static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
- struct xdp_mem_info *mem_info, bool release)
+ enum xdp_mem_type mem_type, bool release)
{
struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp);
if (release) {
xsk_buff_del_tail(zc_frag);
- __xdp_return(NULL, mem_info, false, zc_frag);
+ __xdp_return(0, mem_type, false, zc_frag);
} else {
zc_frag->data_end -= shrink;
}
@@ -4143,19 +4143,16 @@ static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
int shrink)
{
- struct xdp_mem_info *mem_info = &xdp->rxq->mem;
+ enum xdp_mem_type mem_type = xdp->rxq->mem.type;
bool release = skb_frag_size(frag) == shrink;
- if (mem_info->type == MEM_TYPE_XSK_BUFF_POOL) {
- bpf_xdp_shrink_data_zc(xdp, shrink, mem_info, release);
+ if (mem_type == MEM_TYPE_XSK_BUFF_POOL) {
+ bpf_xdp_shrink_data_zc(xdp, shrink, mem_type, release);
goto out;
}
- if (release) {
- struct page *page = skb_frag_page(frag);
-
- __xdp_return(page_address(page), mem_info, false, NULL);
- }
+ if (release)
+ __xdp_return(skb_frag_netmem(frag), mem_type, false, NULL);
out:
return release;
@@ -4357,9 +4354,9 @@ u32 xdp_master_redirect(struct xdp_buff *xdp)
EXPORT_SYMBOL_GPL(xdp_master_redirect);
static inline int __xdp_do_redirect_xsk(struct bpf_redirect_info *ri,
- struct net_device *dev,
+ const struct net_device *dev,
struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog)
+ const struct bpf_prog *xdp_prog)
{
enum bpf_map_type map_type = ri->map_type;
void *fwd = ri->tgt_value;
@@ -4380,10 +4377,10 @@ err:
return err;
}
-static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri,
- struct net_device *dev,
- struct xdp_frame *xdpf,
- struct bpf_prog *xdp_prog)
+static __always_inline int
+__xdp_do_redirect_frame(struct bpf_redirect_info *ri, struct net_device *dev,
+ struct xdp_frame *xdpf,
+ const struct bpf_prog *xdp_prog)
{
enum bpf_map_type map_type = ri->map_type;
void *fwd = ri->tgt_value;
@@ -4452,7 +4449,7 @@ err:
}
int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog)
+ const struct bpf_prog *xdp_prog)
{
struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
enum bpf_map_type map_type = ri->map_type;
@@ -4466,7 +4463,8 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
EXPORT_SYMBOL_GPL(xdp_do_redirect);
int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp,
- struct xdp_frame *xdpf, struct bpf_prog *xdp_prog)
+ struct xdp_frame *xdpf,
+ const struct bpf_prog *xdp_prog)
{
struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
enum bpf_map_type map_type = ri->map_type;
@@ -4481,9 +4479,9 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect_frame);
static int xdp_do_generic_redirect_map(struct net_device *dev,
struct sk_buff *skb,
struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog, void *fwd,
- enum bpf_map_type map_type, u32 map_id,
- u32 flags)
+ const struct bpf_prog *xdp_prog,
+ void *fwd, enum bpf_map_type map_type,
+ u32 map_id, u32 flags)
{
struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
struct bpf_map *map;
@@ -4537,7 +4535,8 @@ err:
}
int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
- struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
+ struct xdp_buff *xdp,
+ const struct bpf_prog *xdp_prog)
{
struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
enum bpf_map_type map_type = ri->map_type;
@@ -9079,7 +9078,8 @@ static bool xdp_is_valid_access(int off, int size,
return __is_valid_xdp_access(off, size);
}
-void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act)
+void bpf_warn_invalid_xdp_action(const struct net_device *dev,
+ const struct bpf_prog *prog, u32 act)
{
const u32 act_max = XDP_REDIRECT;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 2e459b9d88eb..6f2647b000b8 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -390,7 +390,7 @@ netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
}
EXPORT_SYMBOL(netpoll_send_skb);
-void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
+int netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
int total_len, ip_len, udp_len;
struct sk_buff *skb;
@@ -414,7 +414,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
skb = find_skb(np, total_len + np->dev->needed_tailroom,
total_len - len);
if (!skb)
- return;
+ return -ENOMEM;
skb_copy_to_linear_data(skb, msg, len);
skb_put(skb, len);
@@ -490,7 +490,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
skb->dev = np->dev;
- netpoll_send_skb(np, skb);
+ return (int)netpoll_send_skb(np, skb);
}
EXPORT_SYMBOL(netpoll_send_udp);
@@ -634,7 +634,8 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
goto out;
}
- if (!rcu_access_pointer(ndev->npinfo)) {
+ npinfo = rtnl_dereference(ndev->npinfo);
+ if (!npinfo) {
npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
if (!npinfo) {
err = -ENOMEM;
@@ -654,7 +655,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
goto free_npinfo;
}
} else {
- npinfo = rtnl_dereference(ndev->npinfo);
refcount_inc(&npinfo->refcnt);
}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index f89cf93f6eb4..9733206d6406 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -201,6 +201,7 @@ static int page_pool_init(struct page_pool *pool,
memcpy(&pool->slow, &params->slow, sizeof(pool->slow));
pool->cpuid = cpuid;
+ pool->dma_sync_for_cpu = true;
/* Validate only known flags were used */
if (pool->slow.flags & ~PP_FLAG_ALL)
@@ -287,6 +288,9 @@ static int page_pool_init(struct page_pool *pool,
}
if (pool->mp_priv) {
+ if (!pool->dma_map || !pool->dma_sync)
+ return -EOPNOTSUPP;
+
err = mp_dmabuf_devmem_init(pool);
if (err) {
pr_warn("%s() mem-provider init failed %d\n", __func__,
@@ -574,7 +578,7 @@ static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool,
/* For using page_pool replace: alloc_pages() API calls, but provide
* synchronization guarantee for allocation side.
*/
-netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp)
+netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp)
{
netmem_ref netmem;
@@ -590,11 +594,11 @@ netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp)
netmem = __page_pool_alloc_pages_slow(pool, gfp);
return netmem;
}
-EXPORT_SYMBOL(page_pool_alloc_netmem);
+EXPORT_SYMBOL(page_pool_alloc_netmems);
struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
{
- return netmem_to_page(page_pool_alloc_netmem(pool, gfp));
+ return netmem_to_page(page_pool_alloc_netmems(pool, gfp));
}
EXPORT_SYMBOL(page_pool_alloc_pages);
ALLOW_ERROR_INJECTION(page_pool_alloc_pages, NULL);
@@ -839,69 +843,104 @@ void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
}
EXPORT_SYMBOL(page_pool_put_unrefed_page);
-/**
- * page_pool_put_page_bulk() - release references on multiple pages
- * @pool: pool from which pages were allocated
- * @data: array holding page pointers
- * @count: number of pages in @data
- *
- * Tries to refill a number of pages into the ptr_ring cache holding ptr_ring
- * producer lock. If the ptr_ring is full, page_pool_put_page_bulk()
- * will release leftover pages to the page allocator.
- * page_pool_put_page_bulk() is suitable to be run inside the driver NAPI tx
- * completion loop for the XDP_REDIRECT use case.
- *
- * Please note the caller must not use data area after running
- * page_pool_put_page_bulk(), as this function overwrites it.
- */
-void page_pool_put_page_bulk(struct page_pool *pool, void **data,
- int count)
+static void page_pool_recycle_ring_bulk(struct page_pool *pool,
+ netmem_ref *bulk,
+ u32 bulk_len)
{
- int i, bulk_len = 0;
- bool allow_direct;
bool in_softirq;
+ u32 i;
- allow_direct = page_pool_napi_local(pool);
-
- for (i = 0; i < count; i++) {
- netmem_ref netmem = page_to_netmem(virt_to_head_page(data[i]));
-
- /* It is not the last user for the page frag case */
- if (!page_pool_is_last_ref(netmem))
- continue;
-
- netmem = __page_pool_put_page(pool, netmem, -1, allow_direct);
- /* Approved for bulk recycling in ptr_ring cache */
- if (netmem)
- data[bulk_len++] = (__force void *)netmem;
- }
-
- if (!bulk_len)
- return;
-
- /* Bulk producer into ptr_ring page_pool cache */
+ /* Bulk produce into ptr_ring page_pool cache */
in_softirq = page_pool_producer_lock(pool);
+
for (i = 0; i < bulk_len; i++) {
- if (__ptr_ring_produce(&pool->ring, data[i])) {
+ if (__ptr_ring_produce(&pool->ring, (__force void *)bulk[i])) {
/* ring full */
recycle_stat_inc(pool, ring_full);
break;
}
}
- recycle_stat_add(pool, ring, i);
+
page_pool_producer_unlock(pool, in_softirq);
+ recycle_stat_add(pool, ring, i);
- /* Hopefully all pages was return into ptr_ring */
+ /* Hopefully all pages were returned into ptr_ring */
if (likely(i == bulk_len))
return;
- /* ptr_ring cache full, free remaining pages outside producer lock
- * since put_page() with refcnt == 1 can be an expensive operation
+ /*
+ * ptr_ring cache is full, free remaining pages outside producer lock
+ * since put_page() with refcnt == 1 can be an expensive operation.
*/
for (; i < bulk_len; i++)
- page_pool_return_page(pool, (__force netmem_ref)data[i]);
+ page_pool_return_page(pool, bulk[i]);
+}
+
+/**
+ * page_pool_put_netmem_bulk() - release references on multiple netmems
+ * @data: array holding netmem references
+ * @count: number of entries in @data
+ *
+ * Tries to refill a number of netmems into the ptr_ring cache holding ptr_ring
+ * producer lock. If the ptr_ring is full, page_pool_put_netmem_bulk()
+ * will release leftover netmems to the memory provider.
+ * page_pool_put_netmem_bulk() is suitable to be run inside the driver NAPI tx
+ * completion loop for the XDP_REDIRECT use case.
+ *
+ * Please note the caller must not use data area after running
+ * page_pool_put_netmem_bulk(), as this function overwrites it.
+ */
+void page_pool_put_netmem_bulk(netmem_ref *data, u32 count)
+{
+ u32 bulk_len = 0;
+
+ for (u32 i = 0; i < count; i++) {
+ netmem_ref netmem = netmem_compound_head(data[i]);
+
+ if (page_pool_unref_and_test(netmem))
+ data[bulk_len++] = netmem;
+ }
+
+ count = bulk_len;
+ while (count) {
+ netmem_ref bulk[XDP_BULK_QUEUE_SIZE];
+ struct page_pool *pool = NULL;
+ bool allow_direct;
+ u32 foreign = 0;
+
+ bulk_len = 0;
+
+ for (u32 i = 0; i < count; i++) {
+ struct page_pool *netmem_pp;
+ netmem_ref netmem = data[i];
+
+ netmem_pp = netmem_get_pp(netmem);
+ if (unlikely(!pool)) {
+ pool = netmem_pp;
+ allow_direct = page_pool_napi_local(pool);
+ } else if (netmem_pp != pool) {
+ /*
+ * If the netmem belongs to a different
+ * page_pool, save it for another round.
+ */
+ data[foreign++] = netmem;
+ continue;
+ }
+
+ netmem = __page_pool_put_page(pool, netmem, -1,
+ allow_direct);
+ /* Approved for bulk recycling in ptr_ring cache */
+ if (netmem)
+ bulk[bulk_len++] = netmem;
+ }
+
+ if (bulk_len)
+ page_pool_recycle_ring_bulk(pool, bulk, bulk_len);
+
+ count = foreign;
+ }
}
-EXPORT_SYMBOL(page_pool_put_page_bulk);
+EXPORT_SYMBOL(page_pool_put_netmem_bulk);
static netmem_ref page_pool_drain_frag(struct page_pool *pool,
netmem_ref netmem)
@@ -957,7 +996,7 @@ netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool,
}
if (!netmem) {
- netmem = page_pool_alloc_netmem(pool, gfp);
+ netmem = page_pool_alloc_netmems(pool, gfp);
if (unlikely(!netmem)) {
pool->frag_page = 0;
return 0;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 7e23cacbe66e..ee95dbb0539a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3883,17 +3883,14 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn)
list_add_tail(&t->th_list, &pn->pktgen_threads);
init_completion(&t->start_done);
- p = kthread_create_on_node(pktgen_thread_worker,
- t,
- cpu_to_node(cpu),
- "kpktgend_%d", cpu);
+ p = kthread_create_on_cpu(pktgen_thread_worker, t, cpu, "kpktgend_%d");
if (IS_ERR(p)) {
pr_err("kthread_create_on_node() failed for cpu %d\n", t->cpu);
list_del(&t->th_list);
kfree(t);
return PTR_ERR(p);
}
- kthread_bind(p, cpu);
+
t->tsk = p;
pe = proc_create_data(t->tsk->comm, 0600, pn->proc_dir,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d9f959c619d9..6b745096809d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -4765,15 +4765,16 @@ static int nlmsg_populate_fdb(struct sk_buff *skb,
int *idx,
struct netdev_hw_addr_list *list)
{
+ struct ndo_fdb_dump_context *ctx = (void *)cb->ctx;
struct netdev_hw_addr *ha;
- int err;
u32 portid, seq;
+ int err;
portid = NETLINK_CB(cb->skb).portid;
seq = cb->nlh->nlmsg_seq;
list_for_each_entry(ha, &list->list, list) {
- if (*idx < cb->args[2])
+ if (*idx < ctx->fdb_idx)
goto skip;
err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 0,
@@ -4912,18 +4913,16 @@ static int valid_fdb_dump_legacy(const struct nlmsghdr *nlh,
static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net_device *dev;
- struct net_device *br_dev = NULL;
- const struct net_device_ops *ops = NULL;
- const struct net_device_ops *cops = NULL;
+ const struct net_device_ops *ops = NULL, *cops = NULL;
+ struct ndo_fdb_dump_context *ctx = (void *)cb->ctx;
+ struct net_device *dev, *br_dev = NULL;
struct net *net = sock_net(skb->sk);
- struct hlist_head *head;
int brport_idx = 0;
int br_idx = 0;
- int h, s_h;
- int idx = 0, s_idx;
- int err = 0;
int fidx = 0;
+ int err;
+
+ NL_ASSERT_CTX_FITS(struct ndo_fdb_dump_context);
if (cb->strict_check)
err = valid_fdb_dump_strict(cb->nlh, &br_idx, &brport_idx,
@@ -4942,70 +4941,51 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
ops = br_dev->netdev_ops;
}
- s_h = cb->args[0];
- s_idx = cb->args[1];
-
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &net->dev_index_head[h];
- hlist_for_each_entry(dev, head, index_hlist) {
-
- if (brport_idx && (dev->ifindex != brport_idx))
- continue;
-
- if (!br_idx) { /* user did not specify a specific bridge */
- if (netif_is_bridge_port(dev)) {
- br_dev = netdev_master_upper_dev_get(dev);
- cops = br_dev->netdev_ops;
- }
- } else {
- if (dev != br_dev &&
- !netif_is_bridge_port(dev))
- continue;
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ if (brport_idx && (dev->ifindex != brport_idx))
+ continue;
- if (br_dev != netdev_master_upper_dev_get(dev) &&
- !netif_is_bridge_master(dev))
- continue;
- cops = ops;
+ if (!br_idx) { /* user did not specify a specific bridge */
+ if (netif_is_bridge_port(dev)) {
+ br_dev = netdev_master_upper_dev_get(dev);
+ cops = br_dev->netdev_ops;
}
+ } else {
+ if (dev != br_dev &&
+ !netif_is_bridge_port(dev))
+ continue;
- if (idx < s_idx)
- goto cont;
+ if (br_dev != netdev_master_upper_dev_get(dev) &&
+ !netif_is_bridge_master(dev))
+ continue;
+ cops = ops;
+ }
- if (netif_is_bridge_port(dev)) {
- if (cops && cops->ndo_fdb_dump) {
- err = cops->ndo_fdb_dump(skb, cb,
- br_dev, dev,
- &fidx);
- if (err == -EMSGSIZE)
- goto out;
- }
+ if (netif_is_bridge_port(dev)) {
+ if (cops && cops->ndo_fdb_dump) {
+ err = cops->ndo_fdb_dump(skb, cb, br_dev, dev,
+ &fidx);
+ if (err == -EMSGSIZE)
+ break;
}
+ }
- if (dev->netdev_ops->ndo_fdb_dump)
- err = dev->netdev_ops->ndo_fdb_dump(skb, cb,
- dev, NULL,
- &fidx);
- else
- err = ndo_dflt_fdb_dump(skb, cb, dev, NULL,
- &fidx);
- if (err == -EMSGSIZE)
- goto out;
+ if (dev->netdev_ops->ndo_fdb_dump)
+ err = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL,
+ &fidx);
+ else
+ err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, &fidx);
+ if (err == -EMSGSIZE)
+ break;
- cops = NULL;
+ cops = NULL;
- /* reset fdb offset to 0 for rest of the interfaces */
- cb->args[2] = 0;
- fidx = 0;
-cont:
- idx++;
- }
+ /* reset fdb offset to 0 for rest of the interfaces */
+ ctx->fdb_idx = 0;
+ fidx = 0;
}
-out:
- cb->args[0] = h;
- cb->args[1] = idx;
- cb->args[2] = fidx;
+ ctx->fdb_idx = fidx;
return skb->len;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6841e61a6bd0..a441613a1e6c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1009,7 +1009,7 @@ int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
EXPORT_SYMBOL(skb_pp_cow_data);
int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
- struct bpf_prog *prog)
+ const struct bpf_prog *prog)
{
if (!prog->aux->xdp_has_frags)
return -EINVAL;
diff --git a/net/core/sock.c b/net/core/sock.c
index be84885f9290..eae2ae70a2e0 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -454,6 +454,13 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
return 0;
}
+static bool sk_set_prio_allowed(const struct sock *sk, int val)
+{
+ return ((val >= TC_PRIO_BESTEFFORT && val <= TC_PRIO_INTERACTIVE) ||
+ sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
+ sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN));
+}
+
static bool sock_needs_netstamp(const struct sock *sk)
{
switch (sk->sk_family) {
@@ -1193,9 +1200,7 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
/* handle options which do not require locking the socket. */
switch (optname) {
case SO_PRIORITY:
- if ((val >= 0 && val <= 6) ||
- sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
- sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+ if (sk_set_prio_allowed(sk, val)) {
sock_set_priority(sk, val);
return 0;
}
@@ -1517,6 +1522,10 @@ set_sndbuf:
sock_valbool_flag(sk, SOCK_RCVMARK, valbool);
break;
+ case SO_RCVPRIORITY:
+ sock_valbool_flag(sk, SOCK_RCVPRIORITY, valbool);
+ break;
+
case SO_RXQ_OVFL:
sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
break;
@@ -1945,6 +1954,10 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
v.val = sock_flag(sk, SOCK_RCVMARK);
break;
+ case SO_RCVPRIORITY:
+ v.val = sock_flag(sk, SOCK_RCVPRIORITY);
+ break;
+
case SO_RXQ_OVFL:
v.val = sock_flag(sk, SOCK_RXQ_OVFL);
break;
@@ -2945,6 +2958,13 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
case SCM_RIGHTS:
case SCM_CREDENTIALS:
break;
+ case SO_PRIORITY:
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
+ return -EINVAL;
+ if (!sk_set_prio_allowed(sk, *(u32 *)CMSG_DATA(cmsg)))
+ return -EPERM;
+ sockc->priority = *(u32 *)CMSG_DATA(cmsg);
+ break;
default:
return -EINVAL;
}
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 3717fb152ecc..a50a7ef49ae8 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -9,6 +9,7 @@
#include <linux/ptp_classify.h>
#include <linux/skbuff.h>
#include <linux/export.h>
+#include <linux/ptp_clock_kernel.h>
static unsigned int classify(const struct sk_buff *skb)
{
@@ -21,19 +22,39 @@ static unsigned int classify(const struct sk_buff *skb)
void skb_clone_tx_timestamp(struct sk_buff *skb)
{
+ struct hwtstamp_provider *hwprov;
struct mii_timestamper *mii_ts;
+ struct phy_device *phydev;
struct sk_buff *clone;
unsigned int type;
- if (!skb->sk || !skb->dev ||
- !phy_is_default_hwtstamp(skb->dev->phydev))
+ if (!skb->sk || !skb->dev)
return;
+ rcu_read_lock();
+ hwprov = rcu_dereference(skb->dev->hwprov);
+ if (hwprov) {
+ if (hwprov->source != HWTSTAMP_SOURCE_PHYLIB ||
+ !hwprov->phydev) {
+ rcu_read_unlock();
+ return;
+ }
+
+ phydev = hwprov->phydev;
+ } else {
+ phydev = skb->dev->phydev;
+ if (!phy_is_default_hwtstamp(phydev)) {
+ rcu_read_unlock();
+ return;
+ }
+ }
+ rcu_read_unlock();
+
type = classify(skb);
if (type == PTP_CLASS_NONE)
return;
- mii_ts = skb->dev->phydev->mii_ts;
+ mii_ts = phydev->mii_ts;
if (likely(mii_ts->txtstamp)) {
clone = skb_clone_sk(skb);
if (!clone)
@@ -45,12 +66,33 @@ EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp);
bool skb_defer_rx_timestamp(struct sk_buff *skb)
{
+ struct hwtstamp_provider *hwprov;
struct mii_timestamper *mii_ts;
+ struct phy_device *phydev;
unsigned int type;
- if (!skb->dev || !phy_is_default_hwtstamp(skb->dev->phydev))
+ if (!skb->dev)
return false;
+ rcu_read_lock();
+ hwprov = rcu_dereference(skb->dev->hwprov);
+ if (hwprov) {
+ if (hwprov->source != HWTSTAMP_SOURCE_PHYLIB ||
+ !hwprov->phydev) {
+ rcu_read_unlock();
+ return false;
+ }
+
+ phydev = hwprov->phydev;
+ } else {
+ phydev = skb->dev->phydev;
+ if (!phy_is_default_hwtstamp(phydev)) {
+ rcu_read_unlock();
+ return false;
+ }
+ }
+ rcu_read_unlock();
+
if (skb_headroom(skb) < ETH_HLEN)
return false;
@@ -63,7 +105,7 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb)
if (type == PTP_CLASS_NONE)
return false;
- mii_ts = skb->dev->phydev->mii_ts;
+ mii_ts = phydev->mii_ts;
if (likely(mii_ts->rxtstamp))
return mii_ts->rxtstamp(mii_ts, skb, type);
diff --git a/net/core/xdp.c b/net/core/xdp.c
index bcc5551c6424..67b53fc7191e 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -358,6 +358,9 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
if (IS_ERR(xdp_alloc))
return PTR_ERR(xdp_alloc);
+ if (type == MEM_TYPE_XSK_BUFF_POOL && allocator)
+ xsk_pool_set_rxq_info(allocator, xdp_rxq);
+
if (trace_mem_connect_enabled() && xdp_alloc)
trace_mem_connect(xdp_alloc, xdp_rxq);
return 0;
@@ -365,33 +368,87 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
+/**
+ * xdp_reg_page_pool - register &page_pool as a memory provider for XDP
+ * @pool: &page_pool to register
+ *
+ * Can be used to register pools manually without connecting to any XDP RxQ
+ * info, so that the XDP layer will be aware of them. Then, they can be
+ * attached to an RxQ info manually via xdp_rxq_info_attach_page_pool().
+ *
+ * Return: %0 on success, -errno on error.
+ */
+int xdp_reg_page_pool(struct page_pool *pool)
+{
+ struct xdp_mem_info mem;
+
+ return xdp_reg_mem_model(&mem, MEM_TYPE_PAGE_POOL, pool);
+}
+EXPORT_SYMBOL_GPL(xdp_reg_page_pool);
+
+/**
+ * xdp_unreg_page_pool - unregister &page_pool from the memory providers list
+ * @pool: &page_pool to unregister
+ *
+ * A shorthand for manual unregistering page pools. If the pool was previously
+ * attached to an RxQ info, it must be detached first.
+ */
+void xdp_unreg_page_pool(const struct page_pool *pool)
+{
+ struct xdp_mem_info mem = {
+ .type = MEM_TYPE_PAGE_POOL,
+ .id = pool->xdp_mem_id,
+ };
+
+ xdp_unreg_mem_model(&mem);
+}
+EXPORT_SYMBOL_GPL(xdp_unreg_page_pool);
+
+/**
+ * xdp_rxq_info_attach_page_pool - attach registered pool to RxQ info
+ * @xdp_rxq: XDP RxQ info to attach the pool to
+ * @pool: pool to attach
+ *
+ * If the pool was registered manually, this function must be called instead
+ * of xdp_rxq_info_reg_mem_model() to connect it to the RxQ info.
+ */
+void xdp_rxq_info_attach_page_pool(struct xdp_rxq_info *xdp_rxq,
+ const struct page_pool *pool)
+{
+ struct xdp_mem_info mem = {
+ .type = MEM_TYPE_PAGE_POOL,
+ .id = pool->xdp_mem_id,
+ };
+
+ xdp_rxq_info_attach_mem_model(xdp_rxq, &mem);
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_attach_page_pool);
+
/* XDP RX runs under NAPI protection, and in different delivery error
* scenarios (e.g. queue full), it is possible to return the xdp_frame
* while still leveraging this protection. The @napi_direct boolean
* is used for those calls sites. Thus, allowing for faster recycling
* of xdp_frames/pages in those cases.
*/
-void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
- struct xdp_buff *xdp)
+void __xdp_return(netmem_ref netmem, enum xdp_mem_type mem_type,
+ bool napi_direct, struct xdp_buff *xdp)
{
- struct page *page;
-
- switch (mem->type) {
+ switch (mem_type) {
case MEM_TYPE_PAGE_POOL:
- page = virt_to_head_page(data);
+ netmem = netmem_compound_head(netmem);
if (napi_direct && xdp_return_frame_no_direct())
napi_direct = false;
/* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE)
* as mem->type knows this a page_pool page
*/
- page_pool_put_full_page(page->pp, page, napi_direct);
+ page_pool_put_full_netmem(netmem_get_pp(netmem), netmem,
+ napi_direct);
break;
case MEM_TYPE_PAGE_SHARED:
- page_frag_free(data);
+ page_frag_free(__netmem_address(netmem));
break;
case MEM_TYPE_PAGE_ORDER0:
- page = virt_to_page(data); /* Assumes order0 page*/
- put_page(page);
+ put_page(__netmem_to_page(netmem));
break;
case MEM_TYPE_XSK_BUFF_POOL:
/* NB! Only valid from an xdp_buff! */
@@ -399,7 +456,7 @@ void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
break;
default:
/* Not possible, checked in xdp_rxq_info_reg_mem_model() */
- WARN(1, "Incorrect XDP memory type (%d) usage", mem->type);
+ WARN(1, "Incorrect XDP memory type (%d) usage", mem_type);
break;
}
}
@@ -407,38 +464,34 @@ void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
void xdp_return_frame(struct xdp_frame *xdpf)
{
struct skb_shared_info *sinfo;
- int i;
if (likely(!xdp_frame_has_frags(xdpf)))
goto out;
sinfo = xdp_get_shared_info_from_frame(xdpf);
- for (i = 0; i < sinfo->nr_frags; i++) {
- struct page *page = skb_frag_page(&sinfo->frags[i]);
+ for (u32 i = 0; i < sinfo->nr_frags; i++)
+ __xdp_return(skb_frag_netmem(&sinfo->frags[i]), xdpf->mem_type,
+ false, NULL);
- __xdp_return(page_address(page), &xdpf->mem, false, NULL);
- }
out:
- __xdp_return(xdpf->data, &xdpf->mem, false, NULL);
+ __xdp_return(virt_to_netmem(xdpf->data), xdpf->mem_type, false, NULL);
}
EXPORT_SYMBOL_GPL(xdp_return_frame);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
{
struct skb_shared_info *sinfo;
- int i;
if (likely(!xdp_frame_has_frags(xdpf)))
goto out;
sinfo = xdp_get_shared_info_from_frame(xdpf);
- for (i = 0; i < sinfo->nr_frags; i++) {
- struct page *page = skb_frag_page(&sinfo->frags[i]);
+ for (u32 i = 0; i < sinfo->nr_frags; i++)
+ __xdp_return(skb_frag_netmem(&sinfo->frags[i]), xdpf->mem_type,
+ true, NULL);
- __xdp_return(page_address(page), &xdpf->mem, true, NULL);
- }
out:
- __xdp_return(xdpf->data, &xdpf->mem, true, NULL);
+ __xdp_return(virt_to_netmem(xdpf->data), xdpf->mem_type, true, NULL);
}
EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
@@ -452,46 +505,19 @@ EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
* xdp_frame_bulk is usually stored/allocated on the function
* call-stack to avoid locking penalties.
*/
-void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq)
-{
- struct xdp_mem_allocator *xa = bq->xa;
-
- if (unlikely(!xa || !bq->count))
- return;
-
- page_pool_put_page_bulk(xa->page_pool, bq->q, bq->count);
- /* bq->xa is not cleared to save lookup, if mem.id same in next bulk */
- bq->count = 0;
-}
-EXPORT_SYMBOL_GPL(xdp_flush_frame_bulk);
/* Must be called with rcu_read_lock held */
void xdp_return_frame_bulk(struct xdp_frame *xdpf,
struct xdp_frame_bulk *bq)
{
- struct xdp_mem_info *mem = &xdpf->mem;
- struct xdp_mem_allocator *xa;
-
- if (mem->type != MEM_TYPE_PAGE_POOL) {
+ if (xdpf->mem_type != MEM_TYPE_PAGE_POOL) {
xdp_return_frame(xdpf);
return;
}
- xa = bq->xa;
- if (unlikely(!xa)) {
- xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
- bq->count = 0;
- bq->xa = xa;
- }
-
if (bq->count == XDP_BULK_QUEUE_SIZE)
xdp_flush_frame_bulk(bq);
- if (unlikely(mem->id != xa->mem.id)) {
- xdp_flush_frame_bulk(bq);
- bq->xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
- }
-
if (unlikely(xdp_frame_has_frags(xdpf))) {
struct skb_shared_info *sinfo;
int i;
@@ -500,31 +526,40 @@ void xdp_return_frame_bulk(struct xdp_frame *xdpf,
for (i = 0; i < sinfo->nr_frags; i++) {
skb_frag_t *frag = &sinfo->frags[i];
- bq->q[bq->count++] = skb_frag_address(frag);
+ bq->q[bq->count++] = skb_frag_netmem(frag);
if (bq->count == XDP_BULK_QUEUE_SIZE)
xdp_flush_frame_bulk(bq);
}
}
- bq->q[bq->count++] = xdpf->data;
+ bq->q[bq->count++] = virt_to_netmem(xdpf->data);
}
EXPORT_SYMBOL_GPL(xdp_return_frame_bulk);
+/**
+ * xdp_return_frag -- free one XDP frag or decrement its refcount
+ * @netmem: network memory reference to release
+ * @xdp: &xdp_buff to release the frag for
+ */
+void xdp_return_frag(netmem_ref netmem, const struct xdp_buff *xdp)
+{
+ __xdp_return(netmem, xdp->rxq->mem.type, true, NULL);
+}
+EXPORT_SYMBOL_GPL(xdp_return_frag);
+
void xdp_return_buff(struct xdp_buff *xdp)
{
struct skb_shared_info *sinfo;
- int i;
if (likely(!xdp_buff_has_frags(xdp)))
goto out;
sinfo = xdp_get_shared_info_from_buff(xdp);
- for (i = 0; i < sinfo->nr_frags; i++) {
- struct page *page = skb_frag_page(&sinfo->frags[i]);
+ for (u32 i = 0; i < sinfo->nr_frags; i++)
+ __xdp_return(skb_frag_netmem(&sinfo->frags[i]),
+ xdp->rxq->mem.type, true, xdp);
- __xdp_return(page_address(page), &xdp->rxq->mem, true, xdp);
- }
out:
- __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp);
+ __xdp_return(virt_to_netmem(xdp->data), xdp->rxq->mem.type, true, xdp);
}
EXPORT_SYMBOL_GPL(xdp_return_buff);
@@ -570,7 +605,7 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp)
xdpf->headroom = 0;
xdpf->metasize = metasize;
xdpf->frame_sz = PAGE_SIZE;
- xdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
+ xdpf->mem_type = MEM_TYPE_PAGE_ORDER0;
xsk_buff_free(xdp);
return xdpf;
@@ -594,6 +629,173 @@ int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp)
}
EXPORT_SYMBOL_GPL(xdp_alloc_skb_bulk);
+/**
+ * xdp_build_skb_from_buff - create an skb from &xdp_buff
+ * @xdp: &xdp_buff to convert to an skb
+ *
+ * Perform common operations to create a new skb to pass up the stack from
+ * &xdp_buff: allocate an skb head from the NAPI percpu cache, initialize
+ * skb data pointers and offsets, set the recycle bit if the buff is
+ * PP-backed, Rx queue index, protocol and update frags info.
+ *
+ * Return: new &sk_buff on success, %NULL on error.
+ */
+struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp)
+{
+ const struct xdp_rxq_info *rxq = xdp->rxq;
+ const struct skb_shared_info *sinfo;
+ struct sk_buff *skb;
+ u32 nr_frags = 0;
+ int metalen;
+
+ if (unlikely(xdp_buff_has_frags(xdp))) {
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ nr_frags = sinfo->nr_frags;
+ }
+
+ skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz);
+ if (unlikely(!skb))
+ return NULL;
+
+ skb_reserve(skb, xdp->data - xdp->data_hard_start);
+ __skb_put(skb, xdp->data_end - xdp->data);
+
+ metalen = xdp->data - xdp->data_meta;
+ if (metalen > 0)
+ skb_metadata_set(skb, metalen);
+
+ if (rxq->mem.type == MEM_TYPE_PAGE_POOL)
+ skb_mark_for_recycle(skb);
+
+ skb_record_rx_queue(skb, rxq->queue_index);
+
+ if (unlikely(nr_frags)) {
+ u32 tsize;
+
+ tsize = sinfo->xdp_frags_truesize ? : nr_frags * xdp->frame_sz;
+ xdp_update_skb_shared_info(skb, nr_frags,
+ sinfo->xdp_frags_size, tsize,
+ xdp_buff_is_frag_pfmemalloc(xdp));
+ }
+
+ skb->protocol = eth_type_trans(skb, rxq->dev);
+
+ return skb;
+}
+EXPORT_SYMBOL_GPL(xdp_build_skb_from_buff);
+
+/**
+ * xdp_copy_frags_from_zc - copy frags from XSk buff to skb
+ * @skb: skb to copy frags to
+ * @xdp: XSk &xdp_buff from which the frags will be copied
+ * @pp: &page_pool backing page allocation, if available
+ *
+ * Copy all frags from XSk &xdp_buff to the skb to pass it up the stack.
+ * Allocate a new buffer for each frag, copy it and attach to the skb.
+ *
+ * Return: true on success, false on netmem allocation fail.
+ */
+static noinline bool xdp_copy_frags_from_zc(struct sk_buff *skb,
+ const struct xdp_buff *xdp,
+ struct page_pool *pp)
+{
+ struct skb_shared_info *sinfo = skb_shinfo(skb);
+ const struct skb_shared_info *xinfo;
+ u32 nr_frags, tsize = 0;
+ bool pfmemalloc = false;
+
+ xinfo = xdp_get_shared_info_from_buff(xdp);
+ nr_frags = xinfo->nr_frags;
+
+ for (u32 i = 0; i < nr_frags; i++) {
+ u32 len = skb_frag_size(&xinfo->frags[i]);
+ u32 offset, truesize = len;
+ netmem_ref netmem;
+
+ netmem = page_pool_dev_alloc_netmem(pp, &offset, &truesize);
+ if (unlikely(!netmem)) {
+ sinfo->nr_frags = i;
+ return false;
+ }
+
+ memcpy(__netmem_address(netmem),
+ __netmem_address(xinfo->frags[i].netmem),
+ LARGEST_ALIGN(len));
+ __skb_fill_netmem_desc_noacc(sinfo, i, netmem, offset, len);
+
+ tsize += truesize;
+ pfmemalloc |= netmem_is_pfmemalloc(netmem);
+ }
+
+ xdp_update_skb_shared_info(skb, nr_frags, xinfo->xdp_frags_size,
+ tsize, pfmemalloc);
+
+ return true;
+}
+
+/**
+ * xdp_build_skb_from_zc - create an skb from XSk &xdp_buff
+ * @xdp: source XSk buff
+ *
+ * Similar to xdp_build_skb_from_buff(), but for XSk frames. Allocate an skb
+ * head, new buffer for the head, copy the data and initialize the skb fields.
+ * If there are frags, allocate new buffers for them and copy.
+ * Buffers are allocated from the system percpu pools to try recycling them.
+ * If new skb was built successfully, @xdp is returned to XSk pool's freelist.
+ * On error, it remains untouched and the caller must take care of this.
+ *
+ * Return: new &sk_buff on success, %NULL on error.
+ */
+struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp)
+{
+ struct page_pool *pp = this_cpu_read(system_page_pool);
+ const struct xdp_rxq_info *rxq = xdp->rxq;
+ u32 len = xdp->data_end - xdp->data_meta;
+ u32 truesize = xdp->frame_sz;
+ struct sk_buff *skb;
+ int metalen;
+ void *data;
+
+ if (!IS_ENABLED(CONFIG_PAGE_POOL))
+ return NULL;
+
+ data = page_pool_dev_alloc_va(pp, &truesize);
+ if (unlikely(!data))
+ return NULL;
+
+ skb = napi_build_skb(data, truesize);
+ if (unlikely(!skb)) {
+ page_pool_free_va(pp, data, true);
+ return NULL;
+ }
+
+ skb_mark_for_recycle(skb);
+ skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
+
+ memcpy(__skb_put(skb, len), xdp->data_meta, LARGEST_ALIGN(len));
+
+ metalen = xdp->data - xdp->data_meta;
+ if (metalen > 0) {
+ skb_metadata_set(skb, metalen);
+ __skb_pull(skb, metalen);
+ }
+
+ skb_record_rx_queue(skb, rxq->queue_index);
+
+ if (unlikely(xdp_buff_has_frags(xdp)) &&
+ unlikely(!xdp_copy_frags_from_zc(skb, xdp, pp))) {
+ napi_consume_skb(skb, true);
+ return NULL;
+ }
+
+ xsk_buff_free(xdp);
+
+ skb->protocol = eth_type_trans(skb, rxq->dev);
+
+ return skb;
+}
+EXPORT_SYMBOL_GPL(xdp_build_skb_from_zc);
+
struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
struct sk_buff *skb,
struct net_device *dev)
@@ -640,7 +842,7 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
* - RX ring dev queue index (skb_record_rx_queue)
*/
- if (xdpf->mem.type == MEM_TYPE_PAGE_POOL)
+ if (xdpf->mem_type == MEM_TYPE_PAGE_POOL)
skb_mark_for_recycle(skb);
/* Allow SKB to reuse area used by xdp_frame */
@@ -687,8 +889,7 @@ struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf)
nxdpf = addr;
nxdpf->data = addr + headroom;
nxdpf->frame_sz = PAGE_SIZE;
- nxdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
- nxdpf->mem.id = 0;
+ nxdpf->mem_type = MEM_TYPE_PAGE_ORDER0;
return nxdpf;
}