diff options
author | Jakub Kicinski <kuba@kernel.org> | 2025-03-25 10:04:55 -0700 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2025-03-25 10:06:49 -0700 |
commit | 7bd2e6b74ad56a49459ba84e8d4fa3730055ab5e (patch) | |
tree | f3a934ee1001a31e62f38e7cef6c014efa7f6f56 | |
parent | 51068769cc8c699eaba7d411f214bc969b35708b (diff) | |
parent | b52458652eca5a551ddb55605201b136f091b04d (diff) |
Merge branch 'net-skip-taking-rtnl_lock-for-queue-get'
Jakub Kicinski says:
====================
net: skip taking rtnl_lock for queue GET (prep)
Skip taking rtnl_lock for queue GET ops on devices which opt
into running all ops under the instance lock. In preparating
for performing queue ops without rtnl lock clarify the protection
of queue-related fields.
v1: https://lore.kernel.org/20250312223507.805719-1-kuba@kernel.org
====================
Link: https://patch.msgid.link/20250324224537.248800-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r-- | include/linux/netdevice.h | 44 | ||||
-rw-r--r-- | include/net/netdev_lock.h | 20 | ||||
-rw-r--r-- | include/net/netdev_rx_queue.h | 2 | ||||
-rw-r--r-- | net/core/dev.c | 11 | ||||
-rw-r--r-- | net/core/devmem.c | 2 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 2 | ||||
-rw-r--r-- | net/core/netdev-genl.c | 9 | ||||
-rw-r--r-- | net/core/netdev_rx_queue.c | 3 | ||||
-rw-r--r-- | net/core/page_pool.c | 7 |
9 files changed, 65 insertions, 35 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f22cca7c03ad..fa79145518d1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -710,7 +710,7 @@ struct netdev_queue { * slow- / control-path part */ /* NAPI instance for the queue - * Readers and writers must hold RTNL + * "ops protected", see comment about net_device::lock */ struct napi_struct *napi; @@ -2496,18 +2496,38 @@ struct net_device { * Should always be taken using netdev_lock() / netdev_unlock() helpers. * Drivers are free to use it for other protection. * - * Protects: + * For the drivers that implement shaper or queue API, the scope + * of this lock is expanded to cover most ndo/queue/ethtool/sysfs + * operations. Drivers may opt-in to this behavior by setting + * @request_ops_lock. + * + * @lock protection mixes with rtnl_lock in multiple ways, fields are + * either: + * + * - simply protected by the instance @lock; + * + * - double protected - writers hold both locks, readers hold either; + * + * - ops protected - protected by the lock held around the NDOs + * and other callbacks, that is the instance lock on devices for + * which netdev_need_ops_lock() returns true, otherwise by rtnl_lock; + * + * - double ops protected - always protected by rtnl_lock but for + * devices for which netdev_need_ops_lock() returns true - also + * the instance lock. + * + * Simply protects: * @gro_flush_timeout, @napi_defer_hard_irqs, @napi_list, * @net_shaper_hierarchy, @reg_state, @threaded * - * Partially protects (writers must hold both @lock and rtnl_lock): + * Double protects: * @up * - * Also protects some fields in struct napi_struct. + * Double ops protects: + * @real_num_rx_queues, @real_num_tx_queues * - * For the drivers that implement shaper or queue API, the scope - * of this lock is expanded to cover most ndo/queue/ethtool/sysfs - * operations. + * Also protects some fields in: + * struct napi_struct, struct netdev_queue, struct netdev_rx_queue * * Ordering: take after rtnl_lock. */ @@ -4062,17 +4082,7 @@ static inline bool netif_is_multiqueue(const struct net_device *dev) } int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq); - -#ifdef CONFIG_SYSFS int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq); -#else -static inline int netif_set_real_num_rx_queues(struct net_device *dev, - unsigned int rxqs) -{ - dev->real_num_rx_queues = rxqs; - return 0; -} -#endif int netif_set_real_num_queues(struct net_device *dev, unsigned int txq, unsigned int rxq); diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h index 99631fbd7f54..1c0c9a94cc22 100644 --- a/include/net/netdev_lock.h +++ b/include/net/netdev_lock.h @@ -5,25 +5,27 @@ #include <linux/lockdep.h> #include <linux/netdevice.h> +#include <linux/rtnetlink.h> static inline bool netdev_trylock(struct net_device *dev) { return mutex_trylock(&dev->lock); } -static inline void netdev_assert_locked(struct net_device *dev) +static inline void netdev_assert_locked(const struct net_device *dev) { lockdep_assert_held(&dev->lock); } -static inline void netdev_assert_locked_or_invisible(struct net_device *dev) +static inline void +netdev_assert_locked_or_invisible(const struct net_device *dev) { if (dev->reg_state == NETREG_REGISTERED || dev->reg_state == NETREG_UNREGISTERING) netdev_assert_locked(dev); } -static inline bool netdev_need_ops_lock(struct net_device *dev) +static inline bool netdev_need_ops_lock(const struct net_device *dev) { bool ret = dev->request_ops_lock || !!dev->queue_mgmt_ops; @@ -46,10 +48,20 @@ static inline void netdev_unlock_ops(struct net_device *dev) netdev_unlock(dev); } -static inline void netdev_ops_assert_locked(struct net_device *dev) +static inline void netdev_ops_assert_locked(const struct net_device *dev) { if (netdev_need_ops_lock(dev)) lockdep_assert_held(&dev->lock); + else + ASSERT_RTNL(); +} + +static inline void +netdev_ops_assert_locked_or_invisible(const struct net_device *dev) +{ + if (dev->reg_state == NETREG_REGISTERED || + dev->reg_state == NETREG_UNREGISTERING) + netdev_ops_assert_locked(dev); } static inline int netdev_lock_cmp_fn(const struct lockdep_map *a, diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index af40842f229d..b2238b551dce 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -24,7 +24,7 @@ struct netdev_rx_queue { struct xsk_buff_pool *pool; #endif /* NAPI instance for the queue - * Readers and writers must hold RTNL + * "ops protected", see comment about net_device::lock */ struct napi_struct *napi; struct pp_memory_provider_params mp_params; diff --git a/net/core/dev.c b/net/core/dev.c index b6b1c7898281..b597cc27a115 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3130,6 +3130,7 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (dev->reg_state == NETREG_REGISTERED || dev->reg_state == NETREG_UNREGISTERING) { ASSERT_RTNL(); + netdev_ops_assert_locked(dev); rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, txq); @@ -3160,7 +3161,6 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) } EXPORT_SYMBOL(netif_set_real_num_tx_queues); -#ifdef CONFIG_SYSFS /** * netif_set_real_num_rx_queues - set actual number of RX queues used * @dev: Network device @@ -3180,6 +3180,7 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq) if (dev->reg_state == NETREG_REGISTERED) { ASSERT_RTNL(); + netdev_ops_assert_locked(dev); rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues, rxq); @@ -3191,7 +3192,6 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq) return 0; } EXPORT_SYMBOL(netif_set_real_num_rx_queues); -#endif /** * netif_set_real_num_queues - set actual number of RX and TX queues used @@ -6901,8 +6901,7 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index, if (WARN_ON_ONCE(napi && !napi->dev)) return; - if (dev->reg_state >= NETREG_REGISTERED) - ASSERT_RTNL(); + netdev_ops_assert_locked_or_invisible(dev); switch (type) { case NETDEV_QUEUE_TYPE_RX: @@ -10359,7 +10358,7 @@ u32 dev_get_min_mp_channel_count(const struct net_device *dev) { int i; - ASSERT_RTNL(); + netdev_ops_assert_locked(dev); for (i = dev->real_num_rx_queues - 1; i >= 0; i--) if (dev->_rx[i].mp_params.mp_priv) @@ -11963,9 +11962,9 @@ void unregister_netdevice_many_notify(struct list_head *head, dev_tcx_uninstall(dev); netdev_lock_ops(dev); dev_xdp_uninstall(dev); + dev_memory_provider_uninstall(dev); netdev_unlock_ops(dev); bpf_dev_bound_netdev_unregister(dev); - dev_memory_provider_uninstall(dev); netdev_offload_xstats_disable_all(dev); diff --git a/net/core/devmem.c b/net/core/devmem.c index 6802e82a4d03..ee145a2aa41c 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -128,12 +128,10 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) rxq->mp_params.mp_priv = NULL; rxq->mp_params.mp_ops = NULL; - netdev_lock(binding->dev); rxq_idx = get_netdev_rx_queue_index(rxq); err = netdev_rx_queue_restart(binding->dev, rxq_idx); WARN_ON(err && err != -ENETDOWN); - netdev_unlock(binding->dev); } xa_erase(&net_devmem_dmabuf_bindings, binding->id); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index b6fbe629ccee..1ace0cd01adc 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -2148,8 +2148,10 @@ static void remove_queue_kobjects(struct net_device *dev) net_rx_queue_update_kobjects(dev, real_rx, 0); netdev_queue_update_kobjects(dev, real_tx, 0); + netdev_lock_ops(dev); dev->real_num_rx_queues = 0; dev->real_num_tx_queues = 0; + netdev_unlock_ops(dev); #ifdef CONFIG_SYSFS kset_unregister(dev->queues_kset); #endif diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index a186fea63c09..fd1cfa9707dc 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -867,6 +867,13 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) goto err_unlock_sock; } + if (!netdev_need_ops_lock(netdev)) { + err = -EOPNOTSUPP; + NL_SET_BAD_ATTR(info->extack, + info->attrs[NETDEV_A_DEV_IFINDEX]); + goto err_unlock; + } + if (dev_xdp_prog_count(netdev)) { NL_SET_ERR_MSG(info->extack, "unable to bind dmabuf to device with XDP program attached"); err = -EEXIST; @@ -947,7 +954,9 @@ void netdev_nl_sock_priv_destroy(struct netdev_nl_sock *priv) mutex_lock(&priv->lock); list_for_each_entry_safe(binding, temp, &priv->bindings, list) { + netdev_lock(binding->dev); net_devmem_unbind_dmabuf(binding); + netdev_unlock(binding->dev); } mutex_unlock(&priv->lock); } diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index a5b234b33cd5..3af716f77a13 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -92,6 +92,9 @@ static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, struct netdev_rx_queue *rxq; int ret; + if (!netdev_need_ops_lock(dev)) + return -EOPNOTSUPP; + if (ifq_idx >= dev->real_num_rx_queues) return -EINVAL; ifq_idx = array_index_nospec(ifq_idx, dev->real_num_rx_queues); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index acef1fcd8ddc..7745ad924ae2 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <linux/device.h> +#include <net/netdev_lock.h> #include <net/netdev_rx_queue.h> #include <net/page_pool/helpers.h> #include <net/page_pool/memory_provider.h> @@ -279,11 +280,7 @@ static int page_pool_init(struct page_pool *pool, get_device(pool->p.dev); if (pool->slow.flags & PP_FLAG_ALLOW_UNREADABLE_NETMEM) { - /* We rely on rtnl_lock()ing to make sure netdev_rx_queue - * configuration doesn't change while we're initializing - * the page_pool. - */ - ASSERT_RTNL(); + netdev_assert_locked(pool->slow.netdev); rxq = __netif_get_rx_queue(pool->slow.netdev, pool->slow.queue_idx); pool->mp_priv = rxq->mp_params.mp_priv; |