diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en')
49 files changed, 1631 insertions, 516 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 66180ffb4606..08fd1370a8b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -178,16 +178,28 @@ u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); } +u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz) +{ +#define UMR_WQE_BULK (2) + return min_t(unsigned int, UMR_WQE_BULK, wq_sz / 2 - 1); +} + u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { - bool is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ? - mlx5e_rx_is_linear_skb(params, xsk) : - mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk); + u16 linear_headroom = mlx5e_get_linear_rq_headroom(params, xsk); + + if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) + return linear_headroom; + + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) + return linear_headroom; + + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) + return linear_headroom; - return is_linear_skb || params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO ? - mlx5e_get_linear_rq_headroom(params, xsk) : 0; + return 0; } u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) @@ -196,13 +208,13 @@ u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par u16 stop_room; stop_room = mlx5e_tls_get_stop_room(mdev, params); - stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + stop_room += mlx5e_stop_room_for_max_wqe(mdev); if (is_mpwqe) /* A MPWQE can take up to the maximum-sized WQE + all the normal * stop room can be taken if a new packet breaks the active * MPWQE session and allocates its WQEs right away. */ - stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + stop_room += mlx5e_stop_room_for_max_wqe(mdev); return stop_room; } @@ -359,12 +371,13 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, { /* Prefer Striding RQ, unless any of the following holds: * - Striding RQ configuration is not possible/supported. - * - Slow PCI heuristic. + * - CQE compression is ON, and stride_index mini_cqe layout is not supported. * - Legacy RQ would use linear SKB while Striding RQ would use non-linear. * * No XSK params: checking the availability of striding RQ in general. */ - if (!slow_pci_heuristic(mdev) && + if ((!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) || + MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index)) && mlx5e_striding_rq_possible(mdev, params) && (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) || !mlx5e_rx_is_linear_skb(params, NULL))) @@ -385,16 +398,29 @@ void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e }; } +static int mlx5e_max_nonlinear_mtu(int first_frag_size, int frag_size, bool xdp) +{ + if (xdp) + /* XDP requires all fragments to be of the same size. */ + return first_frag_size + (MLX5E_MAX_RX_FRAGS - 1) * frag_size; + + /* Optimization for small packets: the last fragment is bigger than the others. */ + return first_frag_size + (MLX5E_MAX_RX_FRAGS - 2) * frag_size + PAGE_SIZE; +} + #define DEFAULT_FRAG_SIZE (2048) -static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, - struct mlx5e_rq_frags_info *info) +static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_frags_info *info) { u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu); int frag_size_max = DEFAULT_FRAG_SIZE; + int first_frag_size_max; u32 buf_size = 0; + u16 headroom; + int max_mtu; int i; if (mlx5_fpga_is_ipsec_device(mdev)) @@ -413,21 +439,48 @@ static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, goto out; } - if (byte_count > PAGE_SIZE + - (MLX5E_MAX_RX_FRAGS - 1) * frag_size_max) + headroom = mlx5e_get_linear_rq_headroom(params, xsk); + first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom); + + max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max, + params->xdp_prog); + if (byte_count > max_mtu || params->xdp_prog) { frag_size_max = PAGE_SIZE; + first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom); + + max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max, + params->xdp_prog); + if (byte_count > max_mtu) { + mlx5_core_err(mdev, "MTU %u is too big for non-linear legacy RQ (max %d)\n", + params->sw_mtu, max_mtu); + return -EINVAL; + } + } i = 0; while (buf_size < byte_count) { int frag_size = byte_count - buf_size; - if (i < MLX5E_MAX_RX_FRAGS - 1) + if (i == 0) + frag_size = min(frag_size, first_frag_size_max); + else if (i < MLX5E_MAX_RX_FRAGS - 1) frag_size = min(frag_size, frag_size_max); info->arr[i].frag_size = frag_size; - info->arr[i].frag_stride = roundup_pow_of_two(frag_size); - buf_size += frag_size; + + if (params->xdp_prog) { + /* XDP multi buffer expects fragments of the same size. */ + info->arr[i].frag_stride = frag_size_max; + } else { + if (i == 0) { + /* Ensure that headroom and tailroom are included. */ + frag_size += headroom; + frag_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + } + info->arr[i].frag_stride = roundup_pow_of_two(frag_size); + } + i++; } info->num_frags = i; @@ -437,6 +490,8 @@ static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, out: info->wqe_bulk = max_t(u8, info->wqe_bulk, 8); info->log_num_frags = order_base_2(info->num_frags); + + return 0; } static u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs) @@ -533,6 +588,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); int ndsegs = 1; + int err; switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: { @@ -572,7 +628,9 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, } default: /* MLX5_WQ_TYPE_CYCLIC */ MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); - mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info); + err = mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info); + if (err) + return err; ndsegs = param->frags_info.num_frags; } @@ -717,7 +775,7 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev, int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); u32 wqebbs; - max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE; + max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE(mdev); max_hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param); max_num_of_umr_per_wqe = max_hd_per_wqe / max_klm_per_umr; rest = max_hd_per_wqe % max_klm_per_umr; @@ -774,10 +832,10 @@ static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev, void *wq = MLX5_ADDR_OF(sqc, sqc, wq); mlx5e_build_sq_param_common(mdev, param); - param->stop_room = mlx5e_stop_room_for_wqe(1); /* for XSK NOP */ + param->stop_room = mlx5e_stop_room_for_wqe(mdev, 1); /* for XSK NOP */ param->is_tls = mlx5e_accel_is_ktls_rx(mdev); if (param->is_tls) - param->stop_room += mlx5e_stop_room_for_wqe(1); /* for TLS RX resync NOP */ + param->stop_room += mlx5e_stop_room_for_wqe(mdev, 1); /* for TLS RX resync NOP */ MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq)); MLX5_SET(wq, wq, log_wq_sz, log_wq_size); mlx5e_build_ico_cq_param(mdev, log_wq_size, ¶m->cqp); @@ -785,6 +843,7 @@ static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev, void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct mlx5e_sq_param *param) { void *sqc = param->sqc; @@ -793,6 +852,7 @@ void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, mlx5e_build_sq_param_common(mdev, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE); + param->is_xdp_mb = !mlx5e_rx_is_linear_skb(params, xsk); mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); } @@ -812,7 +872,7 @@ int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(mdev); mlx5e_build_sq_param(mdev, params, &cparam->txq_sq); - mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq); + mlx5e_build_xdpsq_param(mdev, params, NULL, &cparam->xdp_sq); mlx5e_build_icosq_param(mdev, icosq_log_wq_sz, &cparam->icosq); mlx5e_build_async_icosq_param(mdev, async_icosq_log_wq_sz, &cparam->async_icosq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 433e6967692d..f5c46e78eebc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -31,6 +31,7 @@ struct mlx5e_sq_param { struct mlx5_wq_param wq; bool is_mpw; bool is_tls; + bool is_xdp_mb; u16 stop_room; }; @@ -129,6 +130,7 @@ u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz); u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); @@ -154,6 +156,7 @@ void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev, struct mlx5e_cq_param *param); void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct mlx5e_sq_param *param); int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 82baafd3c00c..335b20b6383b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -195,7 +195,6 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix, int node; sq->pdev = c->pdev; - sq->tstamp = c->tstamp; sq->clock = &mdev->clock; sq->mkey_be = c->mkey_be; sq->netdev = c->netdev; @@ -449,7 +448,7 @@ static void mlx5e_ptp_build_sq_param(struct mlx5_core_dev *mdev, wq = MLX5_ADDR_OF(sqc, sqc, wq); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); - param->stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + param->stop_room = mlx5e_stop_room_for_max_wqe(mdev); mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index c1e07496c89c..9db677e9ca9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -50,7 +50,6 @@ static int mlx5e_find_unused_qos_qid(struct mlx5e_priv *priv) struct mlx5e_qos_node { struct hlist_node hnode; - struct rcu_head rcu; struct mlx5e_qos_node *parent; u64 rate; u32 bw_share; @@ -132,7 +131,11 @@ static void mlx5e_sw_node_delete(struct mlx5e_priv *priv, struct mlx5e_qos_node __clear_bit(node->qid, priv->htb.qos_used_qids); mlx5e_update_tx_netdev_queues(priv); } - kfree_rcu(node, rcu); + /* Make sure this qid is no longer selected by mlx5e_select_queue, so + * that mlx5e_reactivate_qos_sq can safely restart the netdev TX queue. + */ + synchronize_net(); + kfree(node); } /* TX datapath API */ @@ -273,10 +276,18 @@ err_free_sq: static void mlx5e_activate_qos_sq(struct mlx5e_priv *priv, struct mlx5e_qos_node *node) { struct mlx5e_txqsq *sq; + u16 qid; sq = mlx5e_get_qos_sq(priv, node->qid); - WRITE_ONCE(priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, node->qid)], sq); + qid = mlx5e_qid_from_qos(&priv->channels, node->qid); + + /* If it's a new queue, it will be marked as started at this point. + * Stop it before updating txq2sq. + */ + mlx5e_tx_disable_queue(netdev_get_tx_queue(priv->netdev, qid)); + + priv->txq2sq[qid] = sq; /* Make the change to txq2sq visible before the queue is started. * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE, @@ -299,8 +310,13 @@ static void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid) qos_dbg(priv->mdev, "Deactivate QoS SQ qid %u\n", qid); mlx5e_deactivate_txqsq(sq); - /* The queue is disabled, no synchronization with datapath is needed. */ priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL; + + /* Make the change to txq2sq visible before the queue is started again. + * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE, + * which pairs with this barrier. + */ + smp_wmb(); } static void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid) @@ -485,9 +501,11 @@ int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls, opened = test_bit(MLX5E_STATE_OPENED, &priv->state); if (opened) { + mlx5e_selq_prepare(&priv->selq, &priv->channels.params, true); + err = mlx5e_qos_alloc_queues(priv, &priv->channels); if (err) - return err; + goto err_cancel_selq; } root = mlx5e_sw_node_create_root(priv); @@ -508,6 +526,9 @@ int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls, */ smp_store_release(&priv->htb.maj_id, htb_maj_id); + if (opened) + mlx5e_selq_apply(&priv->selq); + return 0; err_sw_node_delete: @@ -516,6 +537,8 @@ err_sw_node_delete: err_free_queues: if (opened) mlx5e_qos_close_all_queues(&priv->channels); +err_cancel_selq: + mlx5e_selq_cancel(&priv->selq); return err; } @@ -526,8 +549,15 @@ int mlx5e_htb_root_del(struct mlx5e_priv *priv) qos_dbg(priv->mdev, "TC_HTB_DESTROY\n"); + /* Wait until real_num_tx_queues is updated for mlx5e_select_queue, + * so that we can safely switch to its non-HTB non-PTP fastpath. + */ + synchronize_net(); + + mlx5e_selq_prepare(&priv->selq, &priv->channels.params, false); + mlx5e_selq_apply(&priv->selq); + WRITE_ONCE(priv->htb.maj_id, 0); - synchronize_rcu(); /* Sync with mlx5e_select_htb_queue and TX data path. */ root = mlx5e_sw_node_find(priv, MLX5E_HTB_CLASSID_ROOT); if (!root) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h index b7558907ba20..5d9bd91d86c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h @@ -18,7 +18,6 @@ int mlx5e_qos_cur_leaf_nodes(struct mlx5e_priv *priv); /* TX datapath API */ int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid); -struct mlx5e_txqsq *mlx5e_get_sq(struct mlx5e_priv *priv, int qid); /* SQ lifecycle */ int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index 0991345c4ae5..86fa0bdbee36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -263,14 +263,14 @@ int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv) INIT_LIST_HEAD(&uplink_priv->unready_flows); /* init shared tc flow table */ - err = mlx5e_tc_esw_init(&uplink_priv->tc_ht); + err = mlx5e_tc_esw_init(uplink_priv); return err; } void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv) { /* delete shared tc flow table */ - mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht); + mlx5e_tc_esw_cleanup(&rpriv->uplink_priv); mutex_destroy(&rpriv->uplink_priv.unready_flows_lock); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c index c1cdd8c2e37a..7f93426b88b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c @@ -442,7 +442,7 @@ int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss, goto inner_tir; err = mlx5e_tir_modify(tir, builder); if (err) { - mlx5e_rss_warn(rss->mdev, "Failed to update LRO state of indirect TIR %#x for traffic type %d: err = %d\n", + mlx5e_rss_warn(rss->mdev, "Failed to update packet merge state of indirect TIR %#x for traffic type %d: err = %d\n", mlx5e_tir_get_tirn(tir), tt, err); if (!final_err) final_err = err; @@ -457,7 +457,7 @@ inner_tir: continue; err = mlx5e_tir_modify(tir, builder); if (err) { - mlx5e_rss_warn(rss->mdev, "Failed to update LRO state of inner indirect TIR %#x for traffic type %d: err = %d\n", + mlx5e_rss_warn(rss->mdev, "Failed to update packet merge state of inner indirect TIR %#x for traffic type %d: err = %d\n", mlx5e_tir_get_tirn(tir), tt, err); if (!final_err) final_err = err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c new file mode 100644 index 000000000000..d98a277eb7f8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "selq.h" +#include <linux/slab.h> +#include <linux/netdevice.h> +#include <linux/rcupdate.h> +#include "en.h" +#include "en/ptp.h" + +struct mlx5e_selq_params { + unsigned int num_regular_queues; + unsigned int num_channels; + unsigned int num_tcs; + union { + u8 is_special_queues; + struct { + bool is_htb : 1; + bool is_ptp : 1; + }; + }; +}; + +int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock) +{ + struct mlx5e_selq_params *init_params; + + selq->state_lock = state_lock; + + selq->standby = kvzalloc(sizeof(*selq->standby), GFP_KERNEL); + if (!selq->standby) + return -ENOMEM; + + init_params = kvzalloc(sizeof(*selq->active), GFP_KERNEL); + if (!init_params) { + kvfree(selq->standby); + selq->standby = NULL; + return -ENOMEM; + } + /* Assign dummy values, so that mlx5e_select_queue won't crash. */ + *init_params = (struct mlx5e_selq_params) { + .num_regular_queues = 1, + .num_channels = 1, + .num_tcs = 1, + .is_htb = false, + .is_ptp = false, + }; + rcu_assign_pointer(selq->active, init_params); + + return 0; +} + +void mlx5e_selq_cleanup(struct mlx5e_selq *selq) +{ + WARN_ON_ONCE(selq->is_prepared); + + kvfree(selq->standby); + selq->standby = NULL; + selq->is_prepared = true; + + mlx5e_selq_apply(selq); + + kvfree(selq->standby); + selq->standby = NULL; +} + +void mlx5e_selq_prepare(struct mlx5e_selq *selq, struct mlx5e_params *params, bool htb) +{ + lockdep_assert_held(selq->state_lock); + WARN_ON_ONCE(selq->is_prepared); + + selq->is_prepared = true; + + selq->standby->num_channels = params->num_channels; + selq->standby->num_tcs = mlx5e_get_dcb_num_tc(params); + selq->standby->num_regular_queues = + selq->standby->num_channels * selq->standby->num_tcs; + selq->standby->is_htb = htb; + selq->standby->is_ptp = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_TX_PORT_TS); +} + +void mlx5e_selq_apply(struct mlx5e_selq *selq) +{ + struct mlx5e_selq_params *old_params; + + WARN_ON_ONCE(!selq->is_prepared); + + selq->is_prepared = false; + + old_params = rcu_replace_pointer(selq->active, selq->standby, + lockdep_is_held(selq->state_lock)); + synchronize_net(); /* Wait until ndo_select_queue starts emitting correct values. */ + selq->standby = old_params; +} + +void mlx5e_selq_cancel(struct mlx5e_selq *selq) +{ + lockdep_assert_held(selq->state_lock); + WARN_ON_ONCE(!selq->is_prepared); + + selq->is_prepared = false; +} + +#ifdef CONFIG_MLX5_CORE_EN_DCB +static int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb) +{ + int dscp_cp = 0; + + if (skb->protocol == htons(ETH_P_IP)) + dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; + else if (skb->protocol == htons(ETH_P_IPV6)) + dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; + + return priv->dcbx_dp.dscp2prio[dscp_cp]; +} +#endif + +static int mlx5e_get_up(struct mlx5e_priv *priv, struct sk_buff *skb) +{ +#ifdef CONFIG_MLX5_CORE_EN_DCB + if (READ_ONCE(priv->dcbx_dp.trust_state) == MLX5_QPTS_TRUST_DSCP) + return mlx5e_get_dscp_up(priv, skb); +#endif + if (skb_vlan_tag_present(skb)) + return skb_vlan_tag_get_prio(skb); + return 0; +} + +static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb, + struct mlx5e_selq_params *selq) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int up; + + up = selq->num_tcs > 1 ? mlx5e_get_up(priv, skb) : 0; + + return selq->num_regular_queues + up; +} + +static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb) +{ + u16 classid; + + /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */ + if ((TC_H_MAJ(skb->priority) >> 16) == smp_load_acquire(&priv->htb.maj_id)) + classid = TC_H_MIN(skb->priority); + else + classid = READ_ONCE(priv->htb.defcls); + + if (!classid) + return 0; + + return mlx5e_get_txq_by_classid(priv, classid); +} + +u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_selq_params *selq; + int txq_ix, up; + + selq = rcu_dereference_bh(priv->selq.active); + + /* This is a workaround needed only for the mlx5e_netdev_change_profile + * flow that zeroes out the whole priv without unregistering the netdev + * and without preventing ndo_select_queue from being called. + */ + if (unlikely(!selq)) + return 0; + + if (likely(!selq->is_special_queues)) { + /* No special queues, netdev_pick_tx returns one of the regular ones. */ + + txq_ix = netdev_pick_tx(dev, skb, NULL); + + if (selq->num_tcs <= 1) + return txq_ix; + + up = mlx5e_get_up(priv, skb); + + /* Normalize any picked txq_ix to [0, num_channels), + * So we can return a txq_ix that matches the channel and + * packet UP. + */ + return mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels) + + up * selq->num_channels; + } + + if (unlikely(selq->is_htb)) { + /* num_tcs == 1, shortcut for PTP */ + + txq_ix = mlx5e_select_htb_queue(priv, skb); + if (txq_ix > 0) + return txq_ix; + + if (unlikely(selq->is_ptp && mlx5e_use_ptpsq(skb))) + return selq->num_channels; + + txq_ix = netdev_pick_tx(dev, skb, NULL); + + /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs. + * If they are selected, switch to regular queues. + * Driver to select these queues only at mlx5e_select_ptpsq() + * and mlx5e_select_htb_queue(). + */ + return mlx5e_txq_to_ch_ix_htb(txq_ix, selq->num_channels); + } + + /* PTP is enabled */ + + if (mlx5e_use_ptpsq(skb)) + return mlx5e_select_ptpsq(dev, skb, selq); + + txq_ix = netdev_pick_tx(dev, skb, NULL); + + /* Normalize any picked txq_ix to [0, num_channels). Queues in range + * [0, num_regular_queues) will be mapped to the corresponding channel + * index, so that we can apply the packet's UP (if num_tcs > 1). + * If netdev_pick_tx() picks ptp_channel, switch to a regular queue, + * because driver should select the PTP only at mlx5e_select_ptpsq(). + */ + txq_ix = mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels); + + if (selq->num_tcs <= 1) + return txq_ix; + + up = mlx5e_get_up(priv, skb); + + return txq_ix + up * selq->num_channels; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h new file mode 100644 index 000000000000..6c070141d8f1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_SELQ_H__ +#define __MLX5_EN_SELQ_H__ + +#include <linux/kernel.h> + +struct mlx5e_selq_params; + +struct mlx5e_selq { + struct mlx5e_selq_params __rcu *active; + struct mlx5e_selq_params *standby; + struct mutex *state_lock; /* points to priv->state_lock */ + bool is_prepared; +}; + +struct mlx5e_params; +struct net_device; +struct sk_buff; + +int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock); +void mlx5e_selq_cleanup(struct mlx5e_selq *selq); +void mlx5e_selq_prepare(struct mlx5e_selq *selq, struct mlx5e_params *params, bool htb); +void mlx5e_selq_apply(struct mlx5e_selq *selq); +void mlx5e_selq_cancel(struct mlx5e_selq *selq); + +static inline u16 mlx5e_txq_to_ch_ix(u16 txq, u16 num_channels) +{ + while (unlikely(txq >= num_channels)) + txq -= num_channels; + return txq; +} + +static inline u16 mlx5e_txq_to_ch_ix_htb(u16 txq, u16 num_channels) +{ + if (unlikely(txq >= num_channels)) { + if (unlikely(txq >= num_channels << 3)) + txq %= num_channels; + else + do + txq -= num_channels; + while (txq >= num_channels); + } + return txq; +} + +u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev); + +#endif /* __MLX5_EN_SELQ_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c index b0de6b999675..21aab96357b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c @@ -7,7 +7,8 @@ static bool tc_act_can_offload_accept(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { return true; } @@ -18,9 +19,8 @@ tc_act_parse_accept(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) { - attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - attr->flags |= MLX5_ESW_ATTR_FLAG_ACCEPT; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->flags |= MLX5_ATTR_FLAG_ACCEPT; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c index e600924e30ea..af37a8d247a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c @@ -2,6 +2,7 @@ // Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. #include "act.h" +#include "en/tc/post_act.h" #include "en/tc_priv.h" #include "mlx5_core.h" @@ -34,6 +35,13 @@ static struct mlx5e_tc_act *tc_acts_fdb[NUM_FLOW_ACTIONS] = { NULL, /* FLOW_ACTION_CT_METADATA, */ &mlx5e_tc_act_mpls_push, &mlx5e_tc_act_mpls_pop, + NULL, /* FLOW_ACTION_MPLS_MANGLE, */ + NULL, /* FLOW_ACTION_GATE, */ + NULL, /* FLOW_ACTION_PPPOE_PUSH, */ + NULL, /* FLOW_ACTION_JUMP, */ + NULL, /* FLOW_ACTION_PIPE, */ + &mlx5e_tc_act_vlan, + &mlx5e_tc_act_vlan, }; /* Must be aligned with enum flow_action_id. */ @@ -101,3 +109,75 @@ mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state, parse_state->num_actions = flow_action->num_entries; parse_state->extack = extack; } + +void +mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action, + struct mlx5e_tc_flow_action *flow_action_reorder) +{ + struct flow_action_entry *act; + int i, j = 0; + + flow_action_for_each(i, act, flow_action) { + /* Add CT action to be first. */ + if (act->id == FLOW_ACTION_CT) + flow_action_reorder->entries[j++] = act; + } + + flow_action_for_each(i, act, flow_action) { + if (act->id == FLOW_ACTION_CT) + continue; + flow_action_reorder->entries[j++] = act; + } +} + +int +mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state, + struct flow_action *flow_action, + struct mlx5_flow_attr *attr, + enum mlx5_flow_namespace_type ns_type) +{ + struct flow_action_entry *act; + struct mlx5e_tc_act *tc_act; + struct mlx5e_priv *priv; + int err = 0, i; + + priv = parse_state->flow->priv; + + flow_action_for_each(i, act, flow_action) { + tc_act = mlx5e_tc_act_get(act->id, ns_type); + if (!tc_act || !tc_act->post_parse || + !tc_act->can_offload(parse_state, act, i, attr)) + continue; + + err = tc_act->post_parse(parse_state, priv, attr); + if (err) + goto out; + } + +out: + return err; +} + +int +mlx5e_tc_act_set_next_post_act(struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct mlx5_flow_attr *next_attr) +{ + struct mlx5_core_dev *mdev = flow->priv->mdev; + struct mlx5e_tc_mod_hdr_acts *mod_acts; + int err; + + mod_acts = &attr->parse_attr->mod_hdr_acts; + + /* Set handle on current post act rule to next post act rule. */ + err = mlx5e_tc_post_act_set_handle(mdev, next_attr->post_act_handle, mod_acts); + if (err) { + mlx5_core_warn(mdev, "Failed setting post action handle"); + return err; + } + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h index 9cc844bd00f5..f34714c5ddd4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h @@ -16,14 +16,17 @@ struct mlx5e_tc_act_parse_state { unsigned int num_actions; struct mlx5e_tc_flow *flow; struct netlink_ext_ack *extack; + u32 actions; + bool ct; bool ct_clear; bool encap; bool decap; bool mpls_push; + bool eth_push; + bool eth_pop; bool ptype_host; const struct ip_tunnel_info *tun_info; struct mlx5e_mpls_info mpls_info; - struct pedit_headers_action hdrs[__PEDIT_CMD_MAX]; int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS]; int if_count; struct mlx5_tc_ct_priv *ct_priv; @@ -32,7 +35,8 @@ struct mlx5e_tc_act_parse_state { struct mlx5e_tc_act { bool (*can_offload)(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index); + int act_index, + struct mlx5_flow_attr *attr); int (*parse_action)(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, @@ -42,6 +46,15 @@ struct mlx5e_tc_act { int (*post_parse)(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv, struct mlx5_flow_attr *attr); + + bool (*is_multi_table_act)(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5_flow_attr *attr); +}; + +struct mlx5e_tc_flow_action { + unsigned int num_entries; + struct flow_action_entry **entries; }; extern struct mlx5e_tc_act mlx5e_tc_act_drop; @@ -74,4 +87,19 @@ mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state, struct flow_action *flow_action, struct netlink_ext_ack *extack); +void +mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action, + struct mlx5e_tc_flow_action *flow_action_reorder); + +int +mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state, + struct flow_action *flow_action, + struct mlx5_flow_attr *attr, + enum mlx5_flow_namespace_type ns_type); + +int +mlx5e_tc_act_set_next_post_act(struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct mlx5_flow_attr *next_attr); + #endif /* __MLX5_EN_TC_ACT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c index 29920ef0180a..c0f08ae6a57f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c @@ -38,11 +38,12 @@ csum_offload_supported(struct mlx5e_priv *priv, static bool tc_act_can_offload_csum(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct mlx5e_tc_flow *flow = parse_state->flow; - return csum_offload_supported(flow->priv, flow->attr->action, + return csum_offload_supported(flow->priv, attr->action, act->csum_flags, parse_state->extack); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c index 58cc33f1363d..b9d38fe807df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c @@ -8,13 +8,14 @@ static bool tc_act_can_offload_ct(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { + bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR; struct netlink_ext_ack *extack = parse_state->extack; - if (flow_flag_test(parse_state->flow, SAMPLE)) { - NL_SET_ERR_MSG_MOD(extack, - "Sample action with connection tracking is not supported"); + if (parse_state->ct && !clear_action) { + NL_SET_ERR_MSG_MOD(extack, "Multiple CT actions are not supported"); return false; } @@ -40,18 +41,34 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, if (err) return err; - flow_flag_set(parse_state->flow, CT); if (mlx5e_is_eswitch_flow(parse_state->flow)) attr->esw_attr->split_count = attr->esw_attr->out_count; + if (!clear_action) { + attr->flags |= MLX5_ATTR_FLAG_CT; + flow_flag_set(parse_state->flow, CT); + parse_state->ct = true; + } parse_state->ct_clear = clear_action; return 0; } +static bool +tc_act_is_multi_table_act_ct(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5_flow_attr *attr) +{ + if (act->ct.action & TCA_CT_ACT_CLEAR) + return false; + + return true; +} + struct mlx5e_tc_act mlx5e_tc_act_ct = { .can_offload = tc_act_can_offload_ct, .parse_action = tc_act_parse_ct, + .is_multi_table_act = tc_act_is_multi_table_act_ct, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c index 2e29a23bed12..dd025a95c439 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c @@ -7,7 +7,8 @@ static bool tc_act_can_offload_drop(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { return true; } @@ -18,8 +19,7 @@ tc_act_parse_drop(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) { - attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c index f44515061228..4726bcb46eec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c @@ -8,6 +8,7 @@ static int validate_goto_chain(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, const struct flow_action_entry *act, struct netlink_ext_ack *extack) { @@ -32,7 +33,7 @@ validate_goto_chain(struct mlx5e_priv *priv, } if (!mlx5_chains_backwards_supported(chains) && - dest_chain <= flow->attr->chain) { + dest_chain <= attr->chain) { NL_SET_ERR_MSG_MOD(extack, "Goto lower numbered chain isn't supported"); return -EOPNOTSUPP; } @@ -43,8 +44,8 @@ validate_goto_chain(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - if (flow->attr->action & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | - MLX5_FLOW_CONTEXT_ACTION_DECAP) && + if (attr->action & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | + MLX5_FLOW_CONTEXT_ACTION_DECAP) && !reformat_and_fwd) { NL_SET_ERR_MSG_MOD(extack, "Goto chain is not allowed if action has reformat or decap"); @@ -57,12 +58,13 @@ validate_goto_chain(struct mlx5e_priv *priv, static bool tc_act_can_offload_goto(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; struct mlx5e_tc_flow *flow = parse_state->flow; - if (validate_goto_chain(flow->priv, flow, act, extack)) + if (validate_goto_chain(flow->priv, flow, attr, act, extack)) return false; return true; @@ -74,8 +76,7 @@ tc_act_parse_goto(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) { - attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; attr->dest_chain = act->chain_index; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c index d775c3d9edf3..e8d227595b3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c @@ -7,7 +7,8 @@ static bool tc_act_can_offload_mark(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { if (act->mark & ~MLX5E_TC_FLOW_ID_MASK) { NL_SET_ERR_MSG_MOD(parse_state->extack, "Bad flow mark, only 16 bit supported"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c index 2e615e0ba972..2b002c6a2e73 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c @@ -99,7 +99,8 @@ get_fdb_out_dev(struct net_device *uplink_dev, struct net_device *out_dev) static bool tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; struct mlx5e_tc_flow *flow = parse_state->flow; @@ -108,8 +109,8 @@ tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv = flow->priv; struct mlx5_esw_flow_attr *esw_attr; - parse_attr = flow->attr->parse_attr; - esw_attr = flow->attr->esw_attr; + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; if (!out_dev) { /* out_dev is NULL when filters with @@ -124,6 +125,16 @@ tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state, return false; } + if (parse_state->eth_pop && !parse_state->mpls_push) { + NL_SET_ERR_MSG_MOD(extack, "vlan pop eth is supported only with mpls push"); + return false; + } + + if (flow_flag_test(parse_state->flow, L3_TO_L2_DECAP) && !parse_state->eth_push) { + NL_SET_ERR_MSG_MOD(extack, "mpls pop is only supported with vlan eth push"); + return false; + } + if (mlx5e_is_ft_flow(flow) && out_dev == priv->netdev) { /* Ignore forward to self rules generated * by adding both mlx5 devs to the flow table @@ -301,8 +312,7 @@ tc_act_parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, if (err) return err; - attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c index 2c74567b6d25..90b4c1b34776 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c @@ -7,7 +7,8 @@ static bool tc_act_can_offload_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; struct mlx5e_tc_flow *flow = parse_state->flow; @@ -39,8 +40,7 @@ tc_act_parse_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state, { attr->parse_attr->mirred_ifindex[0] = act->dev->ifindex; flow_flag_set(parse_state->flow, HAIRPIN); - attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c index 89ca88c78840..f106190bf37c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c @@ -8,7 +8,8 @@ static bool tc_act_can_offload_mpls_push(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; struct mlx5e_priv *priv = parse_state->flow->priv; @@ -47,21 +48,22 @@ tc_act_parse_mpls_push(struct mlx5e_tc_act_parse_state *parse_state, static bool tc_act_can_offload_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; - struct mlx5e_tc_flow *flow = parse_state->flow; struct net_device *filter_dev; - filter_dev = flow->attr->parse_attr->filter_dev; + filter_dev = attr->parse_attr->filter_dev; /* we only support mpls pop if it is the first action + * or it is second action after tunnel key unset * and the filter net device is bareudp. Subsequent * actions can be pedit and the last can be mirred * egress redirect. */ - if (act_index) { - NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only as first action"); + if ((act_index == 1 && !parse_state->decap) || act_index > 1) { + NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only as first action or with decap"); return false; } @@ -79,7 +81,7 @@ tc_act_parse_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) { - attr->parse_attr->eth.h_proto = act->mpls_pop.proto; + attr->esw_attr->eth.h_proto = act->mpls_pop.proto; attr->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; flow_flag_set(parse_state->flow, L3_TO_L2_DECAP); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c index 79addbbef087..47597c524e59 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c @@ -42,12 +42,11 @@ out_err: return -EOPNOTSUPP; } -static int -parse_pedit_to_modify_hdr(struct mlx5e_priv *priv, - const struct flow_action_entry *act, int namespace, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, - struct netlink_ext_ack *extack) +int +mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv, + const struct flow_action_entry *act, int namespace, + struct pedit_headers_action *hdrs, + struct netlink_ext_ack *extack) { u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1; u8 htype = act->mangle.htype; @@ -79,51 +78,11 @@ out_err: return err; } -static int -parse_pedit_to_reformat(const struct flow_action_entry *act, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct netlink_ext_ack *extack) -{ - u32 mask, val, offset; - u32 *p; - - if (act->id != FLOW_ACTION_MANGLE) { - NL_SET_ERR_MSG_MOD(extack, "Unsupported action id"); - return -EOPNOTSUPP; - } - - if (act->mangle.htype != FLOW_ACT_MANGLE_HDR_TYPE_ETH) { - NL_SET_ERR_MSG_MOD(extack, "Only Ethernet modification is supported"); - return -EOPNOTSUPP; - } - - mask = ~act->mangle.mask; - val = act->mangle.val; - offset = act->mangle.offset; - p = (u32 *)&parse_attr->eth; - *(p + (offset >> 2)) |= (val & mask); - - return 0; -} - -int -mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv, - const struct flow_action_entry *act, int namespace, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, - struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack) -{ - if (flow && flow_flag_test(flow, L3_TO_L2_DECAP)) - return parse_pedit_to_reformat(act, parse_attr, extack); - - return parse_pedit_to_modify_hdr(priv, act, namespace, parse_attr, hdrs, extack); -} - static bool tc_act_can_offload_pedit(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { return true; } @@ -141,21 +100,16 @@ tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state, ns_type = mlx5e_get_flow_namespace(flow); - err = mlx5e_tc_act_pedit_parse_action(flow->priv, act, ns_type, - attr->parse_attr, parse_state->hdrs, - flow, parse_state->extack); + err = mlx5e_tc_act_pedit_parse_action(flow->priv, act, ns_type, attr->parse_attr->hdrs, + parse_state->extack); if (err) return err; - if (flow_flag_test(flow, L3_TO_L2_DECAP)) - goto out; - attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; if (ns_type == MLX5_FLOW_NAMESPACE_FDB) esw_attr->split_count = esw_attr->out_count; -out: return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h index da8ab03af58f..434c8bd710a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h @@ -24,9 +24,7 @@ struct pedit_headers_action { int mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv, const struct flow_action_entry *act, int namespace, - struct mlx5e_tc_flow_parse_attr *parse_attr, struct pedit_headers_action *hdrs, - struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack); #endif /* __MLX5_EN_TC_ACT_PEDIT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c index 0819110193dc..6454b031ff7a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c @@ -7,7 +7,8 @@ static bool tc_act_can_offload_ptype(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c index 1c32e24e528d..ad09a8a5f36e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c @@ -7,16 +7,16 @@ static bool tc_act_can_offload_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; - struct mlx5e_tc_flow *flow = parse_state->flow; struct mlx5e_tc_flow_parse_attr *parse_attr; struct net_device *out_dev = act->dev; struct mlx5_esw_flow_attr *esw_attr; - parse_attr = flow->attr->parse_attr; - esw_attr = flow->attr->esw_attr; + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; if (!out_dev) return false; @@ -58,8 +58,7 @@ tc_act_parse_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state, struct net_device *out_dev = act->dev; int err; - attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex, MLX5E_TC_INT_PORT_INGRESS, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c index 6699bdf5cf01..2c0196431302 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c @@ -4,17 +4,21 @@ #include <net/psample.h> #include "act.h" #include "en/tc_priv.h" +#include "en/tc/act/sample.h" static bool tc_act_can_offload_sample(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; + bool ct_nat; - if (flow_flag_test(parse_state->flow, CT)) { - NL_SET_ERR_MSG_MOD(extack, - "Sample action with connection tracking is not supported"); + ct_nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; + + if (flow_flag_test(parse_state->flow, CT) && ct_nat) { + NL_SET_ERR_MSG_MOD(extack, "Sample action with CT NAT is not supported"); return false; } @@ -27,11 +31,7 @@ tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) { - struct mlx5e_sample_attr *sample_attr; - - sample_attr = kzalloc(sizeof(*attr->sample_attr), GFP_KERNEL); - if (!sample_attr) - return -ENOMEM; + struct mlx5e_sample_attr *sample_attr = &attr->sample_attr; sample_attr->rate = act->sample.rate; sample_attr->group_num = act->sample.psample_group->group_num; @@ -39,13 +39,33 @@ tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state, if (act->sample.truncate) sample_attr->trunc_size = act->sample.trunc_size; - attr->sample_attr = sample_attr; + attr->flags |= MLX5_ATTR_FLAG_SAMPLE; flow_flag_set(parse_state->flow, SAMPLE); return 0; } +bool +mlx5e_tc_act_sample_is_multi_table(struct mlx5_core_dev *mdev, + struct mlx5_flow_attr *attr) +{ + if (MLX5_CAP_GEN(mdev, reg_c_preserve) || + attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) + return true; + + return false; +} + +static bool +tc_act_is_multi_table_act_sample(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5_flow_attr *attr) +{ + return mlx5e_tc_act_sample_is_multi_table(priv->mdev, attr); +} + struct mlx5e_tc_act mlx5e_tc_act_sample = { .can_offload = tc_act_can_offload_sample, .parse_action = tc_act_parse_sample, + .is_multi_table_act = tc_act_is_multi_table_act_sample, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h new file mode 100644 index 000000000000..3efb3a15c5d2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_TC_ACT_SAMPLE_H__ +#define __MLX5_EN_TC_ACT_SAMPLE_H__ + +#include <net/flow_offload.h> +#include "en/tc_priv.h" + +bool +mlx5e_tc_act_sample_is_multi_table(struct mlx5_core_dev *mdev, + struct mlx5_flow_attr *attr); + +#endif /* __MLX5_EN_TC_ACT_SAMPLE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c index 046b64c2cec4..a7d9eab19e4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c @@ -7,7 +7,8 @@ static bool tc_act_can_offload_trap(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; @@ -25,9 +26,8 @@ tc_act_parse_trap(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) { - attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c index 6f4a2cf46afd..b4fa2de9711d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c @@ -8,7 +8,8 @@ static bool tc_act_can_offload_tun_encap(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { if (!act->tunnel) { NL_SET_ERR_MSG_MOD(parse_state->extack, @@ -34,7 +35,8 @@ tc_act_parse_tun_encap(struct mlx5e_tc_act_parse_state *parse_state, static bool tc_act_can_offload_tun_decap(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c index 70fc0c2d8813..b86ac604d0c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c @@ -9,7 +9,6 @@ static int add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, u32 *action, struct netlink_ext_ack *extack) { const struct flow_action_entry prio_tag_act = { @@ -26,7 +25,7 @@ add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv, }; return mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, - &prio_tag_act, parse_attr, hdrs, action, + &prio_tag_act, parse_attr, action, extack); } @@ -35,7 +34,8 @@ parse_tc_vlan_action(struct mlx5e_priv *priv, const struct flow_action_entry *act, struct mlx5_esw_flow_attr *attr, u32 *action, - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + struct mlx5e_tc_act_parse_state *parse_state) { u8 vlan_idx = attr->total_vlan; @@ -85,6 +85,16 @@ parse_tc_vlan_action(struct mlx5e_priv *priv, *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; } break; + case FLOW_ACTION_VLAN_POP_ETH: + parse_state->eth_pop = true; + break; + case FLOW_ACTION_VLAN_PUSH_ETH: + if (!flow_flag_test(parse_state->flow, L3_TO_L2_DECAP)) + return -EOPNOTSUPP; + parse_state->eth_push = true; + memcpy(attr->eth.h_dest, act->vlan_push_eth.dst, ETH_ALEN); + memcpy(attr->eth.h_source, act->vlan_push_eth.src, ETH_ALEN); + break; default: NL_SET_ERR_MSG_MOD(extack, "Unexpected action id for VLAN"); return -EINVAL; @@ -110,7 +120,7 @@ mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv, }; int err; - err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action, extack); + err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action, extack, NULL); if (err) return err; @@ -140,7 +150,7 @@ mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv, priv->netdev->lower_level; while (nest_level--) { err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action, - extack); + extack, NULL); if (err) return err; } @@ -151,7 +161,8 @@ mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv, static bool tc_act_can_offload_vlan(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { return true; } @@ -170,11 +181,11 @@ tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state, /* Replace vlan pop+push with vlan modify */ attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; err = mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, act, - attr->parse_attr, parse_state->hdrs, - &attr->action, parse_state->extack); + attr->parse_attr, &attr->action, + parse_state->extack); } else { err = parse_tc_vlan_action(priv, act, esw_attr, &attr->action, - parse_state->extack); + parse_state->extack, parse_state); } if (err) @@ -191,7 +202,6 @@ tc_act_post_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5_flow_attr *attr) { struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; - struct pedit_headers_action *hdrs = parse_state->hdrs; struct netlink_ext_ack *extack = parse_state->extack; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; int err; @@ -202,7 +212,7 @@ tc_act_post_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state, * tag rewrite. */ attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; - err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs, + err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, &attr->action, extack); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h index 3d62f13ab61f..2fa58c6f44eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h @@ -24,7 +24,6 @@ int mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace, const struct flow_action_entry *act, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, u32 *action, struct netlink_ext_ack *extack); #endif /* __MLX5_EN_TC_ACT_VLAN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c index 63e36e7f53e3..9a8a1a6bd99e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c @@ -12,7 +12,6 @@ int mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace, const struct flow_action_entry *act, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, u32 *action, struct netlink_ext_ack *extack) { u16 mask16 = VLAN_VID_MASK; @@ -44,8 +43,8 @@ mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace, return -EOPNOTSUPP; } - err = mlx5e_tc_act_pedit_parse_action(priv, &pedit_act, namespace, parse_attr, hdrs, - NULL, extack); + err = mlx5e_tc_act_pedit_parse_action(priv, &pedit_act, namespace, parse_attr->hdrs, + extack); *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; return err; @@ -54,7 +53,8 @@ mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace, static bool tc_act_can_offload_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { return true; } @@ -69,8 +69,7 @@ tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state, int err; ns_type = mlx5e_get_flow_namespace(parse_state->flow); - err = mlx5e_tc_act_vlan_add_rewrite_action(priv, ns_type, act, - attr->parse_attr, parse_state->hdrs, + err = mlx5e_tc_act_vlan_add_rewrite_action(priv, ns_type, act, attr->parse_attr, &attr->action, parse_state->extack); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h new file mode 100644 index 000000000000..bb6b1a979ba1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef __MLX5_EN_TC_CT_FS_H__ +#define __MLX5_EN_TC_CT_FS_H__ + +struct mlx5_ct_fs { + const struct net_device *netdev; + struct mlx5_core_dev *dev; + + /* private data */ + void *priv_data[]; +}; + +struct mlx5_ct_fs_rule { +}; + +struct mlx5_ct_fs_ops { + int (*init)(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct, + struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct); + void (*destroy)(struct mlx5_ct_fs *fs); + + struct mlx5_ct_fs_rule * (*ct_rule_add)(struct mlx5_ct_fs *fs, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, + struct flow_rule *flow_rule); + void (*ct_rule_del)(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule); + + size_t priv_size; +}; + +static inline void *mlx5_ct_fs_priv(struct mlx5_ct_fs *fs) +{ + return &fs->priv_data; +} + +struct mlx5_ct_fs_ops *mlx5_ct_fs_dmfs_ops_get(void); + +#if IS_ENABLED(CONFIG_MLX5_SW_STEERING) +struct mlx5_ct_fs_ops *mlx5_ct_fs_smfs_ops_get(void); +#else +static inline struct mlx5_ct_fs_ops * +mlx5_ct_fs_smfs_ops_get(void) +{ + return NULL; +} +#endif /* IS_ENABLED(CONFIG_MLX5_SW_STEERING) */ + +#endif /* __MLX5_EN_TC_CT_FS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c new file mode 100644 index 000000000000..ae4f55be48ce --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#include "en_tc.h" +#include "en/tc_ct.h" +#include "en/tc/ct_fs.h" + +#define ct_dbg(fmt, args...)\ + netdev_dbg(fs->netdev, "ct_fs_dmfs debug: " fmt "\n", ##args) + +struct mlx5_ct_fs_dmfs_rule { + struct mlx5_ct_fs_rule fs_rule; + struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *attr; +}; + +static int +mlx5_ct_fs_dmfs_init(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct, + struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct) +{ + return 0; +} + +static void +mlx5_ct_fs_dmfs_destroy(struct mlx5_ct_fs *fs) +{ +} + +static struct mlx5_ct_fs_rule * +mlx5_ct_fs_dmfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, struct flow_rule *flow_rule) +{ + struct mlx5e_priv *priv = netdev_priv(fs->netdev); + struct mlx5_ct_fs_dmfs_rule *dmfs_rule; + int err; + + dmfs_rule = kzalloc(sizeof(*dmfs_rule), GFP_KERNEL); + if (!dmfs_rule) + return ERR_PTR(-ENOMEM); + + dmfs_rule->rule = mlx5_tc_rule_insert(priv, spec, attr); + if (IS_ERR(dmfs_rule->rule)) { + err = PTR_ERR(dmfs_rule->rule); + ct_dbg("Failed to add ct entry fs rule"); + goto err_insert; + } + + dmfs_rule->attr = attr; + + return &dmfs_rule->fs_rule; + +err_insert: + kfree(dmfs_rule); + return ERR_PTR(err); +} + +static void +mlx5_ct_fs_dmfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule) +{ + struct mlx5_ct_fs_dmfs_rule *dmfs_rule = container_of(fs_rule, + struct mlx5_ct_fs_dmfs_rule, + fs_rule); + + mlx5_tc_rule_delete(netdev_priv(fs->netdev), dmfs_rule->rule, dmfs_rule->attr); + kfree(dmfs_rule); +} + +static struct mlx5_ct_fs_ops dmfs_ops = { + .ct_rule_add = mlx5_ct_fs_dmfs_ct_rule_add, + .ct_rule_del = mlx5_ct_fs_dmfs_ct_rule_del, + + .init = mlx5_ct_fs_dmfs_init, + .destroy = mlx5_ct_fs_dmfs_destroy, +}; + +struct mlx5_ct_fs_ops *mlx5_ct_fs_dmfs_ops_get(void) +{ + return &dmfs_ops; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c new file mode 100644 index 000000000000..59988e24b704 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c @@ -0,0 +1,372 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#include <linux/refcount.h> + +#include "en_tc.h" +#include "en/tc_priv.h" +#include "en/tc_ct.h" +#include "en/tc/ct_fs.h" + +#include "lib/smfs.h" + +#define INIT_ERR_PREFIX "ct_fs_smfs init failed" +#define ct_dbg(fmt, args...)\ + netdev_dbg(fs->netdev, "ct_fs_smfs debug: " fmt "\n", ##args) +#define MLX5_CT_TCP_FLAGS_MASK cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16) + +struct mlx5_ct_fs_smfs_matcher { + struct mlx5dr_matcher *dr_matcher; + struct list_head list; + int prio; + refcount_t ref; +}; + +struct mlx5_ct_fs_smfs_matchers { + struct mlx5_ct_fs_smfs_matcher smfs_matchers[4]; + struct list_head used; +}; + +struct mlx5_ct_fs_smfs { + struct mlx5dr_table *ct_tbl, *ct_nat_tbl; + struct mlx5_ct_fs_smfs_matchers matchers; + struct mlx5_ct_fs_smfs_matchers matchers_nat; + struct mlx5dr_action *fwd_action; + struct mlx5_flow_table *ct_nat; + struct mutex lock; /* Guards matchers */ +}; + +struct mlx5_ct_fs_smfs_rule { + struct mlx5_ct_fs_rule fs_rule; + struct mlx5dr_rule *rule; + struct mlx5dr_action *count_action; + struct mlx5_ct_fs_smfs_matcher *smfs_matcher; +}; + +static inline void +mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bool ipv4, bool tcp) +{ + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); + + if (likely(MLX5_CAP_FLOWTABLE_NIC_RX(fs->dev, ft_field_support.outer_ip_version))) + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version); + else + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); + if (likely(ipv4)) { + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + } else { + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xFF, + MLX5_FLD_SZ_BYTES(fte_match_set_lyr_2_4, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6)); + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xFF, + MLX5_FLD_SZ_BYTES(fte_match_set_lyr_2_4, + src_ipv4_src_ipv6.ipv6_layout.ipv6)); + } + + if (likely(tcp)) { + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_sport); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_dport); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, + ntohs(MLX5_CT_TCP_FLAGS_MASK)); + } else { + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_sport); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_dport); + } + + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 0, MLX5_CT_ZONE_MASK); +} + +static struct mlx5dr_matcher * +mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl, bool ipv4, + bool tcp, u32 priority) +{ + struct mlx5dr_matcher *dr_matcher; + struct mlx5_flow_spec *spec; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + mlx5_ct_fs_smfs_fill_mask(fs, spec, ipv4, tcp); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2 | MLX5_MATCH_OUTER_HEADERS; + + dr_matcher = mlx5_smfs_matcher_create(tbl, priority, spec); + kfree(spec); + if (!dr_matcher) + return ERR_PTR(-EINVAL); + + return dr_matcher; +} + +static struct mlx5_ct_fs_smfs_matcher * +mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp) +{ + struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs); + struct mlx5_ct_fs_smfs_matcher *m, *smfs_matcher; + struct mlx5_ct_fs_smfs_matchers *matchers; + struct mlx5dr_matcher *dr_matcher; + struct mlx5dr_table *tbl; + struct list_head *prev; + int prio; + + matchers = nat ? &fs_smfs->matchers_nat : &fs_smfs->matchers; + smfs_matcher = &matchers->smfs_matchers[ipv4 * 2 + tcp]; + + if (refcount_inc_not_zero(&smfs_matcher->ref)) + return smfs_matcher; + + mutex_lock(&fs_smfs->lock); + + /* Retry with lock, as another thread might have already created the relevant matcher + * till we acquired the lock + */ + if (refcount_inc_not_zero(&smfs_matcher->ref)) + goto out_unlock; + + // Find next available priority in sorted used list + prio = 0; + prev = &matchers->used; + list_for_each_entry(m, &matchers->used, list) { + prev = &m->list; + + if (m->prio == prio) + prio = m->prio + 1; + else + break; + } + + tbl = nat ? fs_smfs->ct_nat_tbl : fs_smfs->ct_tbl; + dr_matcher = mlx5_ct_fs_smfs_matcher_create(fs, tbl, ipv4, tcp, prio); + if (IS_ERR(dr_matcher)) { + netdev_warn(fs->netdev, + "ct_fs_smfs: failed to create matcher (nat %d, ipv4 %d, tcp %d), err: %ld\n", + nat, ipv4, tcp, PTR_ERR(dr_matcher)); + + smfs_matcher = ERR_CAST(dr_matcher); + goto out_unlock; + } + + smfs_matcher->dr_matcher = dr_matcher; + smfs_matcher->prio = prio; + list_add(&smfs_matcher->list, prev); + refcount_set(&smfs_matcher->ref, 1); + +out_unlock: + mutex_unlock(&fs_smfs->lock); + return smfs_matcher; +} + +static void +mlx5_ct_fs_smfs_matcher_put(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_smfs_matcher *smfs_matcher) +{ + struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs); + + if (!refcount_dec_and_mutex_lock(&smfs_matcher->ref, &fs_smfs->lock)) + return; + + mlx5_smfs_matcher_destroy(smfs_matcher->dr_matcher); + list_del(&smfs_matcher->list); + mutex_unlock(&fs_smfs->lock); +} + +static int +mlx5_ct_fs_smfs_init(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct, + struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct) +{ + struct mlx5dr_table *ct_tbl, *ct_nat_tbl, *post_ct_tbl; + struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs); + + post_ct_tbl = mlx5_smfs_table_get_from_fs_ft(post_ct); + ct_nat_tbl = mlx5_smfs_table_get_from_fs_ft(ct_nat); + ct_tbl = mlx5_smfs_table_get_from_fs_ft(ct); + fs_smfs->ct_nat = ct_nat; + + if (!ct_tbl || !ct_nat_tbl || !post_ct_tbl) { + netdev_warn(fs->netdev, "ct_fs_smfs: failed to init, missing backing dr tables"); + return -EOPNOTSUPP; + } + + ct_dbg("using smfs steering"); + + fs_smfs->fwd_action = mlx5_smfs_action_create_dest_table(post_ct_tbl); + if (!fs_smfs->fwd_action) { + return -EINVAL; + } + + fs_smfs->ct_tbl = ct_tbl; + fs_smfs->ct_nat_tbl = ct_nat_tbl; + mutex_init(&fs_smfs->lock); + INIT_LIST_HEAD(&fs_smfs->matchers.used); + INIT_LIST_HEAD(&fs_smfs->matchers_nat.used); + + return 0; +} + +static void +mlx5_ct_fs_smfs_destroy(struct mlx5_ct_fs *fs) +{ + struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs); + + mlx5_smfs_action_destroy(fs_smfs->fwd_action); +} + +static inline bool +mlx5_tc_ct_valid_used_dissector_keys(const u32 used_keys) +{ +#define DISSECTOR_BIT(name) BIT(FLOW_DISSECTOR_KEY_ ## name) + const u32 basic_keys = DISSECTOR_BIT(BASIC) | DISSECTOR_BIT(CONTROL) | + DISSECTOR_BIT(PORTS) | DISSECTOR_BIT(META); + const u32 ipv4_tcp = basic_keys | DISSECTOR_BIT(IPV4_ADDRS) | DISSECTOR_BIT(TCP); + const u32 ipv4_udp = basic_keys | DISSECTOR_BIT(IPV4_ADDRS); + const u32 ipv6_tcp = basic_keys | DISSECTOR_BIT(IPV6_ADDRS) | DISSECTOR_BIT(TCP); + const u32 ipv6_udp = basic_keys | DISSECTOR_BIT(IPV6_ADDRS); + + return (used_keys == ipv4_tcp || used_keys == ipv4_udp || used_keys == ipv6_tcp || + used_keys == ipv6_udp); +} + +static bool +mlx5_ct_fs_smfs_ct_validate_flow_rule(struct mlx5_ct_fs *fs, struct flow_rule *flow_rule) +{ + struct flow_match_ipv4_addrs ipv4_addrs; + struct flow_match_ipv6_addrs ipv6_addrs; + struct flow_match_control control; + struct flow_match_basic basic; + struct flow_match_ports ports; + struct flow_match_tcp tcp; + + if (!mlx5_tc_ct_valid_used_dissector_keys(flow_rule->match.dissector->used_keys)) { + ct_dbg("rule uses unexpected dissectors (0x%08x)", + flow_rule->match.dissector->used_keys); + return false; + } + + flow_rule_match_basic(flow_rule, &basic); + flow_rule_match_control(flow_rule, &control); + flow_rule_match_ipv4_addrs(flow_rule, &ipv4_addrs); + flow_rule_match_ipv6_addrs(flow_rule, &ipv6_addrs); + flow_rule_match_ports(flow_rule, &ports); + flow_rule_match_tcp(flow_rule, &tcp); + + if (basic.mask->n_proto != htons(0xFFFF) || + (basic.key->n_proto != htons(ETH_P_IP) && basic.key->n_proto != htons(ETH_P_IPV6)) || + basic.mask->ip_proto != 0xFF || + (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP)) { + ct_dbg("rule uses unexpected basic match (n_proto 0x%04x/0x%04x, ip_proto 0x%02x/0x%02x)", + ntohs(basic.key->n_proto), ntohs(basic.mask->n_proto), + basic.key->ip_proto, basic.mask->ip_proto); + return false; + } + + if (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF)) { + ct_dbg("rule uses ports match (src 0x%04x, dst 0x%04x)", + ports.mask->src, ports.mask->dst); + return false; + } + + if (basic.key->ip_proto == IPPROTO_TCP && tcp.mask->flags != MLX5_CT_TCP_FLAGS_MASK) { + ct_dbg("rule uses unexpected tcp match (flags 0x%02x)", tcp.mask->flags); + return false; + } + + return true; +} + +static struct mlx5_ct_fs_rule * +mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, struct flow_rule *flow_rule) +{ + struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs); + struct mlx5_ct_fs_smfs_matcher *smfs_matcher; + struct mlx5_ct_fs_smfs_rule *smfs_rule; + struct mlx5dr_action *actions[5]; + struct mlx5dr_rule *rule; + int num_actions = 0, err; + bool nat, tcp, ipv4; + + if (!mlx5_ct_fs_smfs_ct_validate_flow_rule(fs, flow_rule)) + return ERR_PTR(-EOPNOTSUPP); + + smfs_rule = kzalloc(sizeof(*smfs_rule), GFP_KERNEL); + if (!smfs_rule) + return ERR_PTR(-ENOMEM); + + smfs_rule->count_action = mlx5_smfs_action_create_flow_counter(mlx5_fc_id(attr->counter)); + if (!smfs_rule->count_action) { + err = -EINVAL; + goto err_count; + } + + actions[num_actions++] = smfs_rule->count_action; + actions[num_actions++] = attr->modify_hdr->action.dr_action; + actions[num_actions++] = fs_smfs->fwd_action; + + nat = (attr->ft == fs_smfs->ct_nat); + ipv4 = mlx5e_tc_get_ip_version(spec, true) == 4; + tcp = MLX5_GET(fte_match_param, spec->match_value, + outer_headers.ip_protocol) == IPPROTO_TCP; + + smfs_matcher = mlx5_ct_fs_smfs_matcher_get(fs, nat, ipv4, tcp); + if (IS_ERR(smfs_matcher)) { + err = PTR_ERR(smfs_matcher); + goto err_matcher; + } + + rule = mlx5_smfs_rule_create(smfs_matcher->dr_matcher, spec, num_actions, actions, + MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT); + if (!rule) { + err = -EINVAL; + goto err_create; + } + + smfs_rule->rule = rule; + smfs_rule->smfs_matcher = smfs_matcher; + + return &smfs_rule->fs_rule; + +err_create: + mlx5_ct_fs_smfs_matcher_put(fs, smfs_matcher); +err_matcher: + mlx5_smfs_action_destroy(smfs_rule->count_action); +err_count: + kfree(smfs_rule); + return ERR_PTR(err); +} + +static void +mlx5_ct_fs_smfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule) +{ + struct mlx5_ct_fs_smfs_rule *smfs_rule = container_of(fs_rule, + struct mlx5_ct_fs_smfs_rule, + fs_rule); + + mlx5_smfs_rule_destroy(smfs_rule->rule); + mlx5_ct_fs_smfs_matcher_put(fs, smfs_rule->smfs_matcher); + mlx5_smfs_action_destroy(smfs_rule->count_action); + kfree(smfs_rule); +} + +static struct mlx5_ct_fs_ops fs_smfs_ops = { + .ct_rule_add = mlx5_ct_fs_smfs_ct_rule_add, + .ct_rule_del = mlx5_ct_fs_smfs_ct_rule_del, + + .init = mlx5_ct_fs_smfs_init, + .destroy = mlx5_ct_fs_smfs_destroy, + + .priv_size = sizeof(struct mlx5_ct_fs_smfs), +}; + +struct mlx5_ct_fs_ops * +mlx5_ct_fs_smfs_ops_get(void) +{ + return &fs_smfs_ops; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c index 31b4e39be2d3..dea137dd744b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB // Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +#include "en/tc_priv.h" #include "en_tc.h" #include "post_act.h" #include "mlx5_core.h" @@ -75,21 +76,47 @@ mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act) kfree(post_act); } +int +mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act, + struct mlx5e_post_act_handle *handle) +{ + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + /* Post action rule matches on fte_id and executes original rule's tc rule action */ + mlx5e_tc_match_to_reg_match(spec, FTEID_TO_REG, handle->id, MLX5_POST_ACTION_MASK); + + handle->rule = mlx5e_tc_rule_offload(post_act->priv, spec, handle->attr); + if (IS_ERR(handle->rule)) { + err = PTR_ERR(handle->rule); + netdev_warn(post_act->priv->netdev, "Failed to add post action rule"); + goto err_rule; + } + + kvfree(spec); + return 0; + +err_rule: + kvfree(spec); + return err; +} + struct mlx5e_post_act_handle * mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *attr) { u32 attr_sz = ns_to_attr_sz(post_act->ns_type); - struct mlx5e_post_act_handle *handle = NULL; - struct mlx5_flow_attr *post_attr = NULL; - struct mlx5_flow_spec *spec = NULL; + struct mlx5e_post_act_handle *handle; + struct mlx5_flow_attr *post_attr; int err; handle = kzalloc(sizeof(*handle), GFP_KERNEL); - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); post_attr = mlx5_alloc_flow_attr(post_act->ns_type); - if (!handle || !spec || !post_attr) { + if (!handle || !post_attr) { kfree(post_attr); - kvfree(spec); kfree(handle); return ERR_PTR(-ENOMEM); } @@ -100,7 +127,7 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *at post_attr->ft = post_act->ft; post_attr->inner_match_level = MLX5_MATCH_NONE; post_attr->outer_match_level = MLX5_MATCH_NONE; - post_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP); + post_attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_DECAP; handle->ns_type = post_act->ns_type; /* Splits were handled before post action */ @@ -112,36 +139,29 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *at if (err) goto err_xarray; - /* Post action rule matches on fte_id and executes original rule's - * tc rule action - */ - mlx5e_tc_match_to_reg_match(spec, FTEID_TO_REG, - handle->id, MLX5_POST_ACTION_MASK); - - handle->rule = mlx5_tc_rule_insert(post_act->priv, spec, post_attr); - if (IS_ERR(handle->rule)) { - err = PTR_ERR(handle->rule); - netdev_warn(post_act->priv->netdev, "Failed to add post action rule"); - goto err_rule; - } handle->attr = post_attr; - kvfree(spec); return handle; -err_rule: - xa_erase(&post_act->ids, handle->id); err_xarray: kfree(post_attr); - kvfree(spec); kfree(handle); return ERR_PTR(err); } void +mlx5e_tc_post_act_unoffload(struct mlx5e_post_act *post_act, + struct mlx5e_post_act_handle *handle) +{ + mlx5e_tc_rule_unoffload(post_act->priv, handle->rule, handle->attr); + handle->rule = NULL; +} + +void mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle) { - mlx5_tc_rule_delete(post_act->priv, handle->rule, handle->attr); + if (!IS_ERR_OR_NULL(handle->rule)) + mlx5e_tc_post_act_unoffload(post_act, handle); xa_erase(&post_act->ids, handle->id); kfree(handle->attr); kfree(handle); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h index b530ec1981a5..f476774c0b75 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h @@ -24,6 +24,14 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *at void mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle); +int +mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act, + struct mlx5e_post_act_handle *handle); + +void +mlx5e_tc_post_act_unoffload(struct mlx5e_post_act *post_act, + struct mlx5e_post_act_handle *handle); + struct mlx5_flow_table * mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c index ff4b4f8a5a9d..fd4504518578 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c @@ -5,6 +5,7 @@ #include <net/psample.h> #include "en/mapping.h" #include "en/tc/post_act.h" +#include "en/tc/act/sample.h" #include "en/mod_hdr.h" #include "sample.h" #include "eswitch.h" @@ -46,14 +47,12 @@ struct mlx5e_sample_flow { struct mlx5_flow_handle *pre_rule; struct mlx5_flow_attr *post_attr; struct mlx5_flow_handle *post_rule; - struct mlx5e_post_act_handle *post_act_handle; }; struct mlx5e_sample_restore { struct hlist_node hlist; struct mlx5_modify_hdr *modify_hdr; struct mlx5_flow_handle *rule; - struct mlx5e_post_act_handle *post_act_handle; u32 obj_id; int count; }; @@ -231,69 +230,46 @@ sampler_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sampler *sampler) */ static struct mlx5_modify_hdr * sample_modify_hdr_get(struct mlx5_core_dev *mdev, u32 obj_id, - struct mlx5e_post_act_handle *handle) + struct mlx5e_tc_mod_hdr_acts *mod_acts) { - struct mlx5e_tc_mod_hdr_acts mod_acts = {}; struct mlx5_modify_hdr *modify_hdr; int err; - err = mlx5e_tc_match_to_reg_set(mdev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, + err = mlx5e_tc_match_to_reg_set(mdev, mod_acts, MLX5_FLOW_NAMESPACE_FDB, CHAIN_TO_REG, obj_id); if (err) goto err_set_regc0; - if (handle) { - err = mlx5e_tc_post_act_set_handle(mdev, handle, &mod_acts); - if (err) - goto err_post_act; - } - modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB, - mod_acts.num_actions, - mod_acts.actions); + mod_acts->num_actions, + mod_acts->actions); if (IS_ERR(modify_hdr)) { err = PTR_ERR(modify_hdr); goto err_modify_hdr; } - mlx5e_mod_hdr_dealloc(&mod_acts); + mlx5e_mod_hdr_dealloc(mod_acts); return modify_hdr; err_modify_hdr: -err_post_act: - mlx5e_mod_hdr_dealloc(&mod_acts); + mlx5e_mod_hdr_dealloc(mod_acts); err_set_regc0: return ERR_PTR(err); } -static u32 -restore_hash(u32 obj_id, struct mlx5e_post_act_handle *post_act_handle) -{ - return jhash_2words(obj_id, hash32_ptr(post_act_handle), 0); -} - -static bool -restore_equal(struct mlx5e_sample_restore *restore, u32 obj_id, - struct mlx5e_post_act_handle *post_act_handle) -{ - return restore->obj_id == obj_id && restore->post_act_handle == post_act_handle; -} - static struct mlx5e_sample_restore * sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id, - struct mlx5e_post_act_handle *post_act_handle) + struct mlx5e_tc_mod_hdr_acts *mod_acts) { struct mlx5_eswitch *esw = tc_psample->esw; struct mlx5_core_dev *mdev = esw->dev; struct mlx5e_sample_restore *restore; struct mlx5_modify_hdr *modify_hdr; - u32 hash_key; int err; mutex_lock(&tc_psample->restore_lock); - hash_key = restore_hash(obj_id, post_act_handle); - hash_for_each_possible(tc_psample->restore_hashtbl, restore, hlist, hash_key) - if (restore_equal(restore, obj_id, post_act_handle)) + hash_for_each_possible(tc_psample->restore_hashtbl, restore, hlist, obj_id) + if (restore->obj_id == obj_id) goto add_ref; restore = kzalloc(sizeof(*restore), GFP_KERNEL); @@ -302,9 +278,8 @@ sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id, goto err_alloc; } restore->obj_id = obj_id; - restore->post_act_handle = post_act_handle; - modify_hdr = sample_modify_hdr_get(mdev, obj_id, post_act_handle); + modify_hdr = sample_modify_hdr_get(mdev, obj_id, mod_acts); if (IS_ERR(modify_hdr)) { err = PTR_ERR(modify_hdr); goto err_modify_hdr; @@ -317,7 +292,7 @@ sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id, goto err_restore; } - hash_add(tc_psample->restore_hashtbl, &restore->hlist, hash_key); + hash_add(tc_psample->restore_hashtbl, &restore->hlist, obj_id); add_ref: restore->count++; mutex_unlock(&tc_psample->restore_lock); @@ -403,7 +378,7 @@ add_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow, post_attr->chain = 0; post_attr->prio = 0; post_attr->ft = default_tbl; - post_attr->flags = MLX5_ESW_ATTR_FLAG_NO_IN_PORT; + post_attr->flags = MLX5_ATTR_FLAG_NO_IN_PORT; /* When offloading sample and encap action, if there is no valid * neigh data struct, a slow path rule is offloaded first. Source @@ -492,16 +467,16 @@ del_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow, struct mlx5_flow_handle * mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr, - u32 tunnel_id) + struct mlx5_flow_attr *attr) { - struct mlx5e_post_act_handle *post_act_handle = NULL; struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; struct mlx5_esw_flow_attr *pre_esw_attr; struct mlx5_mapped_obj restore_obj = {}; + struct mlx5e_tc_mod_hdr_acts *mod_acts; struct mlx5e_sample_flow *sample_flow; struct mlx5e_sample_attr *sample_attr; struct mlx5_flow_attr *pre_attr; + u32 tunnel_id = attr->tunnel_id; struct mlx5_eswitch *esw; u32 default_tbl_id; u32 obj_id; @@ -513,7 +488,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL); if (!sample_flow) return ERR_PTR(-ENOMEM); - sample_attr = attr->sample_attr; + sample_attr = &attr->sample_attr; sample_attr->sample_flow = sample_flow; /* For NICs with reg_c_preserve support or decap action, use @@ -522,18 +497,11 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, * original flow table. */ esw = tc_psample->esw; - if (MLX5_CAP_GEN(esw->dev, reg_c_preserve) || - attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { + if (mlx5e_tc_act_sample_is_multi_table(esw->dev, attr)) { struct mlx5_flow_table *ft; ft = mlx5e_tc_post_act_get_ft(tc_psample->post_act); default_tbl_id = ft->id; - post_act_handle = mlx5e_tc_post_act_add(tc_psample->post_act, attr); - if (IS_ERR(post_act_handle)) { - err = PTR_ERR(post_act_handle); - goto err_post_act; - } - sample_flow->post_act_handle = post_act_handle; } else { err = add_post_rule(esw, sample_flow, spec, attr, &default_tbl_id); if (err) @@ -546,6 +514,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, err = PTR_ERR(sample_flow->sampler); goto err_sampler; } + sample_attr->sampler_id = sample_flow->sampler->sampler_id; /* Create an id mapping reg_c0 value to sample object. */ restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE; @@ -559,7 +528,8 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, sample_attr->restore_obj_id = obj_id; /* Create sample restore context. */ - sample_flow->restore = sample_restore_get(tc_psample, obj_id, post_act_handle); + mod_acts = &attr->parse_attr->mod_hdr_acts; + sample_flow->restore = sample_restore_get(tc_psample, obj_id, mod_acts); if (IS_ERR(sample_flow->restore)) { err = PTR_ERR(sample_flow->restore); goto err_sample_restore; @@ -580,13 +550,13 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, if (tunnel_id) pre_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; pre_attr->modify_hdr = sample_flow->restore->modify_hdr; - pre_attr->flags = MLX5_ESW_ATTR_FLAG_SAMPLE; + pre_attr->flags = MLX5_ATTR_FLAG_SAMPLE; pre_attr->inner_match_level = attr->inner_match_level; pre_attr->outer_match_level = attr->outer_match_level; pre_attr->chain = attr->chain; pre_attr->prio = attr->prio; - pre_attr->sample_attr = attr->sample_attr; - sample_attr->sampler_id = sample_flow->sampler->sampler_id; + pre_attr->ft = attr->ft; + pre_attr->sample_attr = *sample_attr; pre_esw_attr = pre_attr->esw_attr; pre_esw_attr->in_mdev = esw_attr->in_mdev; pre_esw_attr->in_rep = esw_attr->in_rep; @@ -611,9 +581,6 @@ err_sampler: if (sample_flow->post_rule) del_post_rule(esw, sample_flow, attr); err_post_rule: - if (post_act_handle) - mlx5e_tc_post_act_del(tc_psample->post_act, post_act_handle); -err_post_act: kfree(sample_flow); return ERR_PTR(err); } @@ -633,15 +600,13 @@ mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample, * will hit fw syndromes. */ esw = tc_psample->esw; - sample_flow = attr->sample_attr->sample_flow; + sample_flow = attr->sample_attr.sample_flow; mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, sample_flow->pre_attr); sample_restore_put(tc_psample, sample_flow->restore); - mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr->restore_obj_id); + mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr.restore_obj_id); sampler_put(tc_psample, sample_flow->sampler); - if (sample_flow->post_act_handle) - mlx5e_tc_post_act_del(tc_psample->post_act, sample_flow->post_act_handle); - else + if (sample_flow->post_rule) del_post_rule(esw, sample_flow, attr); kfree(sample_flow->pre_attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h index 9ef8a49d7801..a569367eae4d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h @@ -26,8 +26,7 @@ void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj struct mlx5_flow_handle * mlx5e_tc_sample_offload(struct mlx5e_tc_psample *sample_priv, struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr, - u32 tunnel_id); + struct mlx5_flow_attr *attr); void mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *sample_priv, @@ -45,8 +44,7 @@ mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample); static inline struct mlx5_flow_handle * mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr, - u32 tunnel_id) + struct mlx5_flow_attr *attr) { return ERR_PTR(-EOPNOTSUPP); } static inline void diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 4a0d38d219ed..e49f51124c74 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -18,15 +18,16 @@ #include "lib/fs_chains.h" #include "en/tc_ct.h" +#include "en/tc/ct_fs.h" +#include "en/tc_priv.h" #include "en/mod_hdr.h" #include "en/mapping.h" #include "en/tc/post_act.h" #include "en.h" #include "en_tc.h" #include "en_rep.h" +#include "fs_core.h" -#define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen) -#define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0) #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1) #define MLX5_CT_STATE_TRK_BIT BIT(2) #define MLX5_CT_STATE_NAT_BIT BIT(3) @@ -62,19 +63,20 @@ struct mlx5_tc_ct_priv { struct mapping_ctx *labels_mapping; enum mlx5_flow_namespace_type ns_type; struct mlx5_fs_chains *chains; + struct mlx5_ct_fs *fs; + struct mlx5_ct_fs_ops *fs_ops; spinlock_t ht_lock; /* protects ft entries */ }; struct mlx5_ct_flow { struct mlx5_flow_attr *pre_ct_attr; struct mlx5_flow_handle *pre_ct_rule; - struct mlx5e_post_act_handle *post_act_handle; struct mlx5_ct_ft *ft; u32 chain_mapping; }; struct mlx5_ct_zone_rule { - struct mlx5_flow_handle *rule; + struct mlx5_ct_fs_rule *rule; struct mlx5e_mod_hdr_handle *mh; struct mlx5_flow_attr *attr; bool nat; @@ -258,7 +260,8 @@ mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule) return -EOPNOTSUPP; } } else { - return -EOPNOTSUPP; + if (tuple->ip_proto != IPPROTO_GRE) + return -EOPNOTSUPP; } return 0; @@ -505,7 +508,7 @@ mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv, ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone); - mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr); + ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule); mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh); mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); kfree(attr); @@ -807,16 +810,20 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, attr->dest_chain = 0; attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act); attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct; - attr->outer_match_level = MLX5_MATCH_L4; + if (entry->tuple.ip_proto == IPPROTO_TCP || + entry->tuple.ip_proto == IPPROTO_UDP) + attr->outer_match_level = MLX5_MATCH_L4; + else + attr->outer_match_level = MLX5_MATCH_L3; attr->counter = entry->counter->counter; - attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT; + attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT; if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) attr->esw_attr->in_mdev = priv->mdev; mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule); mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK); - zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr); + zone_rule->rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule); if (IS_ERR(zone_rule->rule)) { err = PTR_ERR(zone_rule->rule); ct_dbg("Failed to add ct entry rule, nat: %d", nat); @@ -1154,7 +1161,6 @@ mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, } rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params); - mlx5_tc_ct_entry_remove_from_tuples(entry); spin_unlock_bh(&ct_priv->ht_lock); mlx5_tc_ct_entry_put(entry); @@ -1224,16 +1230,20 @@ mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple, struct flow_keys flow_keys; skb_reset_network_header(skb); - skb_flow_dissect_flow_keys(skb, &flow_keys, 0); + skb_flow_dissect_flow_keys(skb, &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP); tuple->zone = zone; if (flow_keys.basic.ip_proto != IPPROTO_TCP && - flow_keys.basic.ip_proto != IPPROTO_UDP) + flow_keys.basic.ip_proto != IPPROTO_UDP && + flow_keys.basic.ip_proto != IPPROTO_GRE) return false; - tuple->port.src = flow_keys.ports.src; - tuple->port.dst = flow_keys.ports.dst; + if (flow_keys.basic.ip_proto == IPPROTO_TCP || + flow_keys.basic.ip_proto == IPPROTO_UDP) { + tuple->port.src = flow_keys.ports.src; + tuple->port.dst = flow_keys.ports.dst; + } tuple->n_proto = flow_keys.basic.n_proto; tuple->ip_proto = flow_keys.basic.ip_proto; @@ -1756,7 +1766,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) /* We translate the tc filter with CT action to the following HW model: * * +---------------------+ - * + ft prio (tc chain) + + * + ft prio (tc chain) + * + original match + * +---------------------+ * | set chain miss mapping @@ -1766,7 +1776,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) * v * +---------------------+ * + pre_ct/pre_ct_nat + if matches +-------------------------+ - * + zone+nat match +---------------->+ post_act (see below) + + * + zone+nat match +---------------->+ post_act (see below) + * +---------------------+ set zone +-------------------------+ * | set zone * v @@ -1781,21 +1791,19 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) * | do nat (if needed) * v * +--------------+ - * + post_act + original filter actions + * + post_act + original filter actions * + fte_id match +------------------------> * +--------------+ */ static struct mlx5_flow_handle * __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *orig_spec, struct mlx5_flow_attr *attr) { bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); - struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; + struct mlx5e_tc_mod_hdr_acts *pre_mod_acts; u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type); - struct mlx5e_post_act_handle *handle; struct mlx5_flow_attr *pre_ct_attr; struct mlx5_modify_hdr *mod_hdr; struct mlx5_ct_flow *ct_flow; @@ -1818,14 +1826,6 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, } ct_flow->ft = ft; - handle = mlx5e_tc_post_act_add(ct_priv->post_act, attr); - if (IS_ERR(handle)) { - err = PTR_ERR(handle); - ct_dbg("Failed to allocate post action handle"); - goto err_post_act_handle; - } - ct_flow->post_act_handle = handle; - /* Base flow attributes of both rules on original rule attribute */ ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); if (!ct_flow->pre_ct_attr) { @@ -1835,6 +1835,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, pre_ct_attr = ct_flow->pre_ct_attr; memcpy(pre_ct_attr, attr, attr_sz); + pre_mod_acts = &pre_ct_attr->parse_attr->mod_hdr_acts; /* Modify the original rule's action to fwd and modify, leave decap */ pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP; @@ -1853,30 +1854,22 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, } ct_flow->chain_mapping = chain_mapping; - err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type, + err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type, CHAIN_TO_REG, chain_mapping); if (err) { ct_dbg("Failed to set chain register mapping"); goto err_mapping; } - err = mlx5e_tc_post_act_set_handle(priv->mdev, handle, &pre_mod_acts); - if (err) { - ct_dbg("Failed to set post action handle"); - goto err_mapping; - } - /* If original flow is decap, we do it before going into ct table * so add a rewrite for the tunnel match_id. */ if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) && attr->chain == 0) { - u32 tun_id = mlx5e_tc_get_flow_tun_id(flow); - - err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, + err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type, TUNNEL_TO_REG, - tun_id); + attr->tunnel_id); if (err) { ct_dbg("Failed to set tunnel register mapping"); goto err_mapping; @@ -1884,8 +1877,8 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, } mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type, - pre_mod_acts.num_actions, - pre_mod_acts.actions); + pre_mod_acts->num_actions, + pre_mod_acts->actions); if (IS_ERR(mod_hdr)) { err = PTR_ERR(mod_hdr); ct_dbg("Failed to create pre ct mod hdr"); @@ -1905,20 +1898,18 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, } attr->ct_attr.ct_flow = ct_flow; - mlx5e_mod_hdr_dealloc(&pre_mod_acts); + mlx5e_mod_hdr_dealloc(pre_mod_acts); return ct_flow->pre_ct_rule; err_insert_orig: mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); err_mapping: - mlx5e_mod_hdr_dealloc(&pre_mod_acts); + mlx5e_mod_hdr_dealloc(pre_mod_acts); mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); err_get_chain: kfree(ct_flow->pre_ct_attr); err_alloc_pre: - mlx5e_tc_post_act_del(ct_priv->post_act, handle); -err_post_act_handle: mlx5_tc_ct_del_ft_cb(ct_priv, ft); err_ft: kfree(ct_flow); @@ -1926,87 +1917,19 @@ err_ft: return ERR_PTR(err); } -static struct mlx5_flow_handle * -__mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv, - struct mlx5_flow_spec *orig_spec, - struct mlx5_flow_attr *attr, - struct mlx5e_tc_mod_hdr_acts *mod_acts) -{ - struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); - u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type); - struct mlx5_flow_attr *pre_ct_attr; - struct mlx5_modify_hdr *mod_hdr; - struct mlx5_flow_handle *rule; - struct mlx5_ct_flow *ct_flow; - int err; - - ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); - if (!ct_flow) - return ERR_PTR(-ENOMEM); - - /* Base esw attributes on original rule attribute */ - pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); - if (!pre_ct_attr) { - err = -ENOMEM; - goto err_attr; - } - - memcpy(pre_ct_attr, attr, attr_sz); - - mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type, - mod_acts->num_actions, - mod_acts->actions); - if (IS_ERR(mod_hdr)) { - err = PTR_ERR(mod_hdr); - ct_dbg("Failed to add create ct clear mod hdr"); - goto err_mod_hdr; - } - - pre_ct_attr->modify_hdr = mod_hdr; - - rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - ct_dbg("Failed to add ct clear rule"); - goto err_insert; - } - - attr->ct_attr.ct_flow = ct_flow; - ct_flow->pre_ct_attr = pre_ct_attr; - ct_flow->pre_ct_rule = rule; - return rule; - -err_insert: - mlx5_modify_header_dealloc(priv->mdev, mod_hdr); -err_mod_hdr: - netdev_warn(priv->netdev, - "Failed to offload ct clear flow, err %d\n", err); - kfree(pre_ct_attr); -err_attr: - kfree(ct_flow); - - return ERR_PTR(err); -} - struct mlx5_flow_handle * mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) { - bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; struct mlx5_flow_handle *rule; if (!priv) return ERR_PTR(-EOPNOTSUPP); mutex_lock(&priv->control_lock); - - if (clear_action) - rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts); - else - rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr); + rule = __mlx5_tc_ct_flow_offload(priv, spec, attr); mutex_unlock(&priv->control_lock); return rule; @@ -2014,21 +1937,17 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, static void __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, - struct mlx5e_tc_flow *flow, - struct mlx5_ct_flow *ct_flow) + struct mlx5_ct_flow *ct_flow, + struct mlx5_flow_attr *attr) { struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr; struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); - mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, - pre_ct_attr); + mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr); mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); - if (ct_flow->post_act_handle) { - mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); - mlx5e_tc_post_act_del(ct_priv->post_act, ct_flow->post_act_handle); - mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); - } + mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); + mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); kfree(ct_flow->pre_ct_attr); kfree(ct_flow); @@ -2036,7 +1955,6 @@ __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, void mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) { struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow; @@ -2048,11 +1966,43 @@ mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, return; mutex_lock(&priv->control_lock); - __mlx5_tc_ct_delete_flow(priv, flow, ct_flow); + __mlx5_tc_ct_delete_flow(priv, ct_flow, attr); mutex_unlock(&priv->control_lock); } static int +mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv) +{ + struct mlx5_flow_table *post_ct = mlx5e_tc_post_act_get_ft(ct_priv->post_act); + struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get(); + int err; + + if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB && + ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) { + ct_dbg("Using SMFS ct flow steering provider"); + fs_ops = mlx5_ct_fs_smfs_ops_get(); + } + + ct_priv->fs = kzalloc(sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL); + if (!ct_priv->fs) + return -ENOMEM; + + ct_priv->fs->netdev = ct_priv->netdev; + ct_priv->fs->dev = ct_priv->dev; + ct_priv->fs_ops = fs_ops; + + err = ct_priv->fs_ops->init(ct_priv->fs, ct_priv->ct, ct_priv->ct_nat, post_ct); + if (err) + goto err_init; + + return 0; + +err_init: + kfree(ct_priv->fs); + return err; +} + +static int mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw, const char **err_msg) { @@ -2190,8 +2140,14 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params)) goto err_ct_tuples_nat_ht; + err = mlx5_tc_ct_fs_init(ct_priv); + if (err) + goto err_init_fs; + return ct_priv; +err_init_fs: + rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); err_ct_tuples_nat_ht: rhashtable_destroy(&ct_priv->ct_tuples_ht); err_ct_tuples_ht: @@ -2222,6 +2178,9 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) chains = ct_priv->chains; + ct_priv->fs_ops->destroy(ct_priv->fs); + kfree(ct_priv->fs); + mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); mlx5_chains_destroy_global_table(chains, ct_priv->ct); mapping_destroy(ct_priv->zone_mapping); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 99662af1e41a..36d3652bf829 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -86,6 +86,8 @@ struct mlx5_ct_attr { #define REG_MAPPING_MLEN(reg) (mlx5e_tc_attr_to_reg_mappings[reg].mlen) #define REG_MAPPING_MOFFSET(reg) (mlx5e_tc_attr_to_reg_mappings[reg].moffset) +#define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen) +#define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0) #if IS_ENABLED(CONFIG_MLX5_TC_CT) @@ -116,13 +118,11 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_handle * mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); void mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr); bool @@ -183,7 +183,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, static inline struct mlx5_flow_handle * mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) @@ -193,7 +192,6 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, static inline void mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) { } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h index 70b40ae384e4..3b74a6fd5c43 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -38,9 +38,9 @@ struct mlx5e_tc_flow_parse_attr { struct mlx5e_mpls_info mpls_info[MLX5_MAX_FLOW_FWD_VPORTS]; struct net_device *filter_dev; struct mlx5_flow_spec spec; + struct pedit_headers_action hdrs[__PEDIT_CMD_MAX]; struct mlx5e_tc_mod_hdr_acts mod_hdr_acts; int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; - struct ethhdr eth; struct mlx5e_tc_act_parse_state parse_state; }; @@ -108,10 +108,20 @@ struct mlx5e_tc_flow { struct rcu_head rcu_head; struct completion init_done; struct completion del_hw_done; - int tunnel_id; /* the mapped tunnel id of this flow */ struct mlx5_flow_attr *attr; + struct list_head attrs; }; +struct mlx5_flow_handle * +mlx5e_tc_rule_offload(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); + +void +mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); + u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer); struct mlx5_flow_handle * @@ -120,6 +130,12 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr); +struct mlx5_flow_attr * +mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow); + +void mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow); +int mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow); + bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow); bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow); bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow); @@ -174,6 +190,7 @@ struct mlx5_flow_handle * mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec); + void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index d39d0dae22fc..5aff97914367 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -173,19 +173,29 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, list_for_each_entry(flow, flow_list, tmp_list) { if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW)) continue; - attr = flow->attr; - esw_attr = attr->esw_attr; - spec = &attr->parse_attr->spec; + spec = &flow->attr->parse_attr->spec; + + attr = mlx5e_tc_get_encap_attr(flow); + esw_attr = attr->esw_attr; esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; /* Do not offload flows with unresolved neighbors */ if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) continue; + + err = mlx5e_tc_offload_flow_post_acts(flow); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n", + err); + continue; + } + /* update from slow path rule to encap rule */ - rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr); if (IS_ERR(rule)) { + mlx5e_tc_unoffload_flow_post_acts(flow); err = PTR_ERR(rule); mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", err); @@ -214,12 +224,13 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, list_for_each_entry(flow, flow_list, tmp_list) { if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW)) continue; - attr = flow->attr; - esw_attr = attr->esw_attr; - spec = &attr->parse_attr->spec; + spec = &flow->attr->parse_attr->spec; /* update from encap rule to slow path rule */ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); + + attr = mlx5e_tc_get_encap_attr(flow); + esw_attr = attr->esw_attr; /* mark the flow's encap dest as non-valid */ esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; @@ -230,7 +241,8 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, continue; } - mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); + mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); + mlx5e_tc_unoffload_flow_post_acts(flow); flow->rule[0] = rule; /* was unset when fast path rule removed */ flow_flag_set(flow, OFFLOADED); @@ -488,12 +500,17 @@ static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, int out_index); void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, int out_index) + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + int out_index) { struct mlx5e_encap_entry *e = flow->encaps[out_index].e; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - if (flow->attr->esw_attr->dests[out_index].flags & + if (!mlx5e_is_eswitch_flow(flow)) + return; + + if (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) mlx5e_detach_encap_route(priv, flow, out_index); @@ -733,6 +750,7 @@ static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv) static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, struct mlx5e_encap_entry *e, bool new_encap_entry, unsigned long tbl_time_before, @@ -740,6 +758,7 @@ static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, struct net_device *mirred_dev, int out_index, struct netlink_ext_ack *extack, @@ -748,7 +767,6 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; - struct mlx5_flow_attr *attr = flow->attr; const struct ip_tunnel_info *tun_info; const struct mlx5e_mpls_info *mpls_info; unsigned long tbl_time_before = 0; @@ -837,8 +855,8 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, e->compl_result = 1; attach_flow: - err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before, - out_index); + err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created, + tbl_time_before, out_index); if (err) goto out_err; @@ -888,20 +906,18 @@ int mlx5e_attach_decap(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; struct mlx5_pkt_reformat_params reformat_params; - struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_decap_entry *d; struct mlx5e_decap_key key; uintptr_t hash_key; int err = 0; - parse_attr = flow->attr->parse_attr; - if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { + if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { NL_SET_ERR_MSG_MOD(extack, "encap header larger than max supported"); return -EOPNOTSUPP; } - key.key = parse_attr->eth; + key.key = attr->eth; hash_key = hash_decap_info(&key); mutex_lock(&esw->offloads.decap_tbl_lock); d = mlx5e_decap_get(priv, &key, hash_key); @@ -931,8 +947,8 @@ int mlx5e_attach_decap(struct mlx5e_priv *priv, memset(&reformat_params, 0, sizeof(reformat_params)); reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; - reformat_params.size = sizeof(parse_attr->eth); - reformat_params.data = &parse_attr->eth; + reformat_params.size = sizeof(attr->eth); + reformat_params.data = &attr->eth; d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params, MLX5_FLOW_NAMESPACE_FDB); @@ -1201,6 +1217,7 @@ out: static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, struct mlx5e_encap_entry *e, bool new_encap_entry, unsigned long tbl_time_before, @@ -1209,7 +1226,6 @@ static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; unsigned long tbl_time_after = tbl_time_before; struct mlx5e_tc_flow_parse_attr *parse_attr; - struct mlx5_flow_attr *attr = flow->attr; const struct ip_tunnel_info *tun_info; struct mlx5_esw_flow_attr *esw_attr; struct mlx5e_route_entry *r; @@ -1360,17 +1376,19 @@ static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, list_for_each_entry(flow, encap_flows, tmp_list) { struct mlx5e_tc_flow_parse_attr *parse_attr; - struct mlx5_flow_attr *attr = flow->attr; struct mlx5_esw_flow_attr *esw_attr; struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *attr; struct mlx5_flow_spec *spec; if (flow_flag_test(flow, FAILED)) continue; + spec = &flow->attr->parse_attr->spec; + + attr = mlx5e_tc_get_encap_attr(flow); esw_attr = attr->esw_attr; parse_attr = attr->parse_attr; - spec = &parse_attr->spec; err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts, e->out_dev, e->route_dev_ifindex, @@ -1380,7 +1398,7 @@ static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, continue; } - err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow); + err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr); if (err) { mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d", err); @@ -1392,9 +1410,18 @@ static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) goto offload_to_slow_path; + + err = mlx5e_tc_offload_flow_post_acts(flow); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n", + err); + goto offload_to_slow_path; + } + /* update from slow path rule to encap rule */ - rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr); if (IS_ERR(rule)) { + mlx5e_tc_unoffload_flow_post_acts(flow); err = PTR_ERR(rule); mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h index 3391504d9a08..d542b8476491 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h @@ -7,15 +7,19 @@ #include "tc_priv.h" void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, int out_index); + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + int out_index); int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, struct net_device *mirred_dev, int out_index, struct netlink_ext_ack *extack, struct net_device **encap_dev, bool *encap_valid); + int mlx5e_attach_decap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index b789af07829c..c208ea307bff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -9,19 +9,6 @@ #define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) -/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS - * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment. - * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a - * full-session WQE be cache-aligned. - */ -#if L1_CACHE_BYTES < 128 -#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1) -#else -#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2) -#endif - -#define MLX5E_TX_MPW_MAX_NUM_DS (MLX5E_TX_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS) - #define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) #define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND) @@ -57,10 +44,8 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget); int mlx5e_poll_ico_cq(struct mlx5e_cq *cq); /* RX */ -void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info); -void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info, - bool recycle); +void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page); +void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle); INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)); INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); @@ -68,8 +53,6 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq); void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq); /* TX */ -u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev); netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); @@ -308,9 +291,9 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more); void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq); -static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session) +static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs) { - return session->ds_count == MLX5E_TX_MPW_MAX_NUM_DS; + return session->ds_count == max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS; } static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq) @@ -431,10 +414,10 @@ mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, } } -static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size) -{ - BUILD_BUG_ON(PAGE_SIZE / MLX5_SEND_WQE_BB < MLX5_SEND_WQE_MAX_WQEBBS); +#define MLX5E_STOP_ROOM(wqebbs) ((wqebbs) * 2 - 1) +static inline u16 mlx5e_stop_room_for_wqe(struct mlx5_core_dev *mdev, u16 wqe_size) +{ /* A WQE must not cross the page boundary, hence two conditions: * 1. Its size must not exceed the page size. * 2. If the WQE size is X, and the space remaining in a page is less @@ -443,18 +426,28 @@ static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size) * stop room of X-1 + X. * WQE size is also limited by the hardware limit. */ + WARN_ONCE(wqe_size > mlx5e_get_max_sq_wqebbs(mdev), + "wqe_size %u is greater than max SQ WQEBBs %u", + wqe_size, mlx5e_get_max_sq_wqebbs(mdev)); - if (__builtin_constant_p(wqe_size)) - BUILD_BUG_ON(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS); - else - WARN_ON_ONCE(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS); - return wqe_size * 2 - 1; + return MLX5E_STOP_ROOM(wqe_size); +} + +static inline u16 mlx5e_stop_room_for_max_wqe(struct mlx5_core_dev *mdev) +{ + return MLX5E_STOP_ROOM(mlx5e_get_max_sq_wqebbs(mdev)); } static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size) { - u16 room = sq->reserved_room + mlx5e_stop_room_for_wqe(wqe_size); + u16 room = sq->reserved_room; + + WARN_ONCE(wqe_size > sq->max_sq_wqebbs, + "wqe_size %u is greater than max SQ WQEBBs %u", + wqe_size, sq->max_sq_wqebbs); + + room += MLX5E_STOP_ROOM(wqe_size); return mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 56e10c84a706..8f321a6c0809 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -57,12 +57,14 @@ int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) static inline bool mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, - struct mlx5e_dma_info *di, struct xdp_buff *xdp) + struct page *page, struct xdp_buff *xdp) { + struct skb_shared_info *sinfo = NULL; struct mlx5e_xmit_data xdptxd; struct mlx5e_xdp_info xdpi; struct xdp_frame *xdpf; dma_addr_t dma_addr; + int i; xdpf = xdp_convert_buff_to_frame(xdp); if (unlikely(!xdpf)) @@ -96,46 +98,77 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, xdptxd.dma_addr = dma_addr; xdpi.frame.xdpf = xdpf; xdpi.frame.dma_addr = dma_addr; - } else { - /* Driver assumes that xdp_convert_buff_to_frame returns - * an xdp_frame that points to the same memory region as - * the original xdp_buff. It allows to map the memory only - * once and to use the DMA_BIDIRECTIONAL mode. - */ - xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE; + if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, + mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0))) + return false; + + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); + return true; + } + + /* Driver assumes that xdp_convert_buff_to_frame returns an xdp_frame + * that points to the same memory region as the original xdp_buff. It + * allows to map the memory only once and to use the DMA_BIDIRECTIONAL + * mode. + */ + + xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE; + xdpi.page.rq = rq; + + dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf); + dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, DMA_TO_DEVICE); + + if (unlikely(xdp_frame_has_frags(xdpf))) { + sinfo = xdp_get_shared_info_from_frame(xdpf); - dma_addr = di->addr + (xdpf->data - (void *)xdpf); - dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, - DMA_TO_DEVICE); + for (i = 0; i < sinfo->nr_frags; i++) { + skb_frag_t *frag = &sinfo->frags[i]; + dma_addr_t addr; + u32 len; - xdptxd.dma_addr = dma_addr; - xdpi.page.rq = rq; - xdpi.page.di = *di; + addr = page_pool_get_dma_addr(skb_frag_page(frag)) + + skb_frag_off(frag); + len = skb_frag_size(frag); + dma_sync_single_for_device(sq->pdev, addr, len, + DMA_TO_DEVICE); + } } - return INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, - mlx5e_xmit_xdp_frame, sq, &xdptxd, &xdpi, 0); + xdptxd.dma_addr = dma_addr; + + if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, + mlx5e_xmit_xdp_frame, sq, &xdptxd, sinfo, 0))) + return false; + + xdpi.page.page = page; + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); + + if (unlikely(xdp_frame_has_frags(xdpf))) { + for (i = 0; i < sinfo->nr_frags; i++) { + skb_frag_t *frag = &sinfo->frags[i]; + + xdpi.page.page = skb_frag_page(frag); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); + } + } + + return true; } /* returns true if packet was consumed by xdp */ -bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, - u32 *len, struct xdp_buff *xdp) +bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page, + struct bpf_prog *prog, struct xdp_buff *xdp) { - struct bpf_prog *prog = rcu_dereference(rq->xdp_prog); u32 act; int err; - if (!prog) - return false; - act = bpf_prog_run_xdp(prog, xdp); switch (act) { case XDP_PASS: - *len = xdp->data_end - xdp->data; return false; case XDP_TX: - if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, xdp))) + if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, page, xdp))) goto xdp_abort; __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ return true; @@ -147,7 +180,7 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); if (xdp->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL) - mlx5e_page_dma_unmap(rq, di); + mlx5e_page_dma_unmap(rq, page); rq->stats->xdp_redirect++; return true; default: @@ -199,7 +232,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) struct mlx5e_tx_wqe *wqe; u16 pi; - pi = mlx5e_xdpsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS); + pi = mlx5e_xdpsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs); wqe = MLX5E_TX_FETCH_WQE(sq, pi); net_prefetchw(wqe->data); @@ -245,10 +278,8 @@ enum { INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq) { if (unlikely(!sq->mpwqe.wqe)) { - const u16 stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); - if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, - stop_room))) { + sq->stop_room))) { /* SQ is full, ring doorbell */ mlx5e_xmit_xdp_doorbell(sq); sq->stats->full++; @@ -262,12 +293,26 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq } INDIRECT_CALLABLE_SCOPE bool +mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, + struct skb_shared_info *sinfo, int check_result); + +INDIRECT_CALLABLE_SCOPE bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, - struct mlx5e_xdp_info *xdpi, int check_result) + struct skb_shared_info *sinfo, int check_result) { struct mlx5e_tx_mpwqe *session = &sq->mpwqe; struct mlx5e_xdpsq_stats *stats = sq->stats; + if (unlikely(sinfo)) { + /* MPWQE is enabled, but a multi-buffer packet is queued for + * transmission. MPWQE can't send fragmented packets, so close + * the current session and fall back to a regular WQE. + */ + if (unlikely(sq->mpwqe.wqe)) + mlx5e_xdp_mpwqe_complete(sq); + return mlx5e_xmit_xdp_frame(sq, xdptxd, sinfo, 0); + } + if (unlikely(xdptxd->len > sq->hw_mtu)) { stats->err++; return false; @@ -288,17 +333,16 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptx mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); - if (unlikely(mlx5e_xdp_mpqwe_is_full(session))) + if (unlikely(mlx5e_xdp_mpqwe_is_full(session, sq->max_sq_mpw_wqebbs))) mlx5e_xdp_mpwqe_complete(sq); - mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); stats->xmit++; return true; } -INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq) +static int mlx5e_xmit_xdp_frame_check_stop_room(struct mlx5e_xdpsq *sq, int stop_room) { - if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) { + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, stop_room))) { /* SQ is full, ring doorbell */ mlx5e_xmit_xdp_doorbell(sq); sq->stats->full++; @@ -308,45 +352,76 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq) return MLX5E_XDP_CHECK_OK; } +INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq) +{ + return mlx5e_xmit_xdp_frame_check_stop_room(sq, 1); +} + INDIRECT_CALLABLE_SCOPE bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, - struct mlx5e_xdp_info *xdpi, int check_result) + struct skb_shared_info *sinfo, int check_result) { struct mlx5_wq_cyc *wq = &sq->wq; - u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); - struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - struct mlx5_wqe_eth_seg *eseg = &wqe->eth; - struct mlx5_wqe_data_seg *dseg = wqe->data; + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5_wqe_data_seg *dseg; + struct mlx5_wqe_eth_seg *eseg; + struct mlx5e_tx_wqe *wqe; dma_addr_t dma_addr = xdptxd->dma_addr; u32 dma_len = xdptxd->len; + u16 ds_cnt, inline_hdr_sz; + u8 num_wqebbs = 1; + int num_frags = 0; + u16 pi; struct mlx5e_xdpsq_stats *stats = sq->stats; - net_prefetchw(wqe); - if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) { stats->err++; return false; } - if (!check_result) - check_result = mlx5e_xmit_xdp_frame_check(sq); + ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1; + if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) + ds_cnt++; + + /* check_result must be 0 if sinfo is passed. */ + if (!check_result) { + int stop_room = 1; + + if (unlikely(sinfo)) { + ds_cnt += sinfo->nr_frags; + num_frags = sinfo->nr_frags; + num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); + /* Assuming MLX5_CAP_GEN(mdev, max_wqe_sz_sq) is big + * enough to hold all fragments. + */ + stop_room = MLX5E_STOP_ROOM(num_wqebbs); + } + + check_result = mlx5e_xmit_xdp_frame_check_stop_room(sq, stop_room); + } if (unlikely(check_result < 0)) return false; - cseg->fm_ce_se = 0; + pi = mlx5e_xdpsq_get_next_pi(sq, num_wqebbs); + wqe = mlx5_wq_cyc_get_wqe(wq, pi); + net_prefetchw(wqe); + + cseg = &wqe->ctrl; + eseg = &wqe->eth; + dseg = wqe->data; + + inline_hdr_sz = 0; /* copy the inline part if required */ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start)); - eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start), MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start)); dma_len -= MLX5E_XDP_MIN_INLINE; dma_addr += MLX5E_XDP_MIN_INLINE; + inline_hdr_sz = MLX5E_XDP_MIN_INLINE; dseg++; } @@ -356,11 +431,45 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); - sq->pc++; + if (unlikely(test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state))) { + u8 num_pkts = 1 + num_frags; + int i; + + memset(&cseg->trailer, 0, sizeof(cseg->trailer)); + memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer)); + + eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); + dseg->lkey = sq->mkey_be; + + for (i = 0; i < num_frags; i++) { + skb_frag_t *frag = &sinfo->frags[i]; + dma_addr_t addr; + + addr = page_pool_get_dma_addr(skb_frag_page(frag)) + + skb_frag_off(frag); + + dseg++; + dseg->addr = cpu_to_be64(addr); + dseg->byte_count = cpu_to_be32(skb_frag_size(frag)); + dseg->lkey = sq->mkey_be; + } + + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + + sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) { + .num_wqebbs = num_wqebbs, + .num_pkts = num_pkts, + }; + + sq->pc += num_wqebbs; + } else { + cseg->fm_ce_se = 0; + + sq->pc++; + } sq->doorbell_cseg = cseg; - mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); stats->xmit++; return true; } @@ -386,7 +495,7 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, break; case MLX5E_XDP_XMIT_MODE_PAGE: /* XDP_TX from the regular RQ */ - mlx5e_page_release_dynamic(xdpi.page.rq, &xdpi.page.di, recycle); + mlx5e_page_release_dynamic(xdpi.page.rq, xdpi.page.page, recycle); break; case MLX5E_XDP_XMIT_MODE_XSK: /* AF_XDP send */ @@ -539,12 +648,13 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, xdpi.frame.dma_addr = xdptxd.dma_addr; ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, - mlx5e_xmit_xdp_frame, sq, &xdptxd, &xdpi, 0); + mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0); if (unlikely(!ret)) { dma_unmap_single(sq->pdev, xdptxd.dma_addr, xdptxd.len, DMA_TO_DEVICE); break; } + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); nxmit++; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 8d991c3b7a50..287e17911251 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -38,7 +38,6 @@ #include "en/txrx.h" #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) -#define MLX5E_XDP_TX_DS_COUNT (MLX5E_TX_WQE_EMPTY_DS_COUNT + 1 /* SG DS */) #define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT 16 #define MLX5E_XDP_INLINE_WQE_SZ_THRSD \ @@ -47,8 +46,8 @@ struct mlx5e_xsk_param; int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); -bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, - u32 *len, struct xdp_buff *xdp); +bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page, + struct bpf_prog *prog, struct xdp_buff *xdp); void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq); bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq); void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq); @@ -59,11 +58,11 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, - struct mlx5e_xdp_info *xdpi, + struct skb_shared_info *sinfo, int check_result)); INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, - struct mlx5e_xdp_info *xdpi, + struct skb_shared_info *sinfo, int check_result)); INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)); INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)); @@ -123,12 +122,13 @@ static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur) return cur; } -static inline bool mlx5e_xdp_mpqwe_is_full(struct mlx5e_tx_mpwqe *session) +static inline bool mlx5e_xdp_mpqwe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs) { if (session->inline_on) return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > - MLX5E_TX_MPW_MAX_NUM_DS; - return mlx5e_tx_mpwqe_is_full(session); + max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS; + + return mlx5e_tx_mpwqe_is_full(session, max_sq_mpw_wqebbs); } struct mlx5e_xdp_wqe_info { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index 8e7b877d8a12..021da085e603 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -4,6 +4,7 @@ #include "rx.h" #include "en/xdp.h" #include <net/xdp_sock_drv.h> +#include <linux/filter.h> /* RX data path */ @@ -30,7 +31,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, u32 page_idx) { struct xdp_buff *xdp = wi->umr.dma_info[page_idx].xsk; - u32 cqe_bcnt32 = cqe_bcnt; + struct bpf_prog *prog; /* Check packet size. Note LRO doesn't use linear SKB */ if (unlikely(cqe_bcnt > rq->hw_mtu)) { @@ -45,7 +46,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, */ WARN_ON_ONCE(head_offset); - xdp->data_end = xdp->data + cqe_bcnt32; + xdp->data_end = xdp->data + cqe_bcnt; xdp_set_data_meta_invalid(xdp); xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool); net_prefetch(xdp->data); @@ -65,7 +66,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, * allocated first from the Reuse Ring, so it has enough space. */ - if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp))) { + prog = rcu_dereference(rq->xdp_prog); + if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp))) { if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ return NULL; /* page/packet was consumed by XDP */ @@ -74,7 +76,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the * frame. On SKB allocation failure, NULL is returned. */ - return mlx5e_xsk_construct_skb(rq, xdp->data, cqe_bcnt32); + return mlx5e_xsk_construct_skb(rq, xdp->data, xdp->data_end - xdp->data); } struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, @@ -83,6 +85,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, u32 cqe_bcnt) { struct xdp_buff *xdp = wi->di->xsk; + struct bpf_prog *prog; /* wi->offset is not used in this function, because xdp->data and the * DMA address point directly to the necessary place. Furthermore, the @@ -101,12 +104,13 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, return NULL; } - if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt, xdp))) + prog = rcu_dereference(rq->xdp_prog); + if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp))) return NULL; /* page/packet was consumed by XDP */ /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse * will be handled by mlx5e_put_rx_frag. * On SKB allocation failure, NULL is returned. */ - return mlx5e_xsk_construct_skb(rq, xdp->data, cqe_bcnt); + return mlx5e_xsk_construct_skb(rq, xdp->data, xdp->data_end - xdp->data); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 25eac9e20342..3ad7f1301fa8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -43,7 +43,7 @@ static void mlx5e_build_xsk_cparam(struct mlx5_core_dev *mdev, struct mlx5e_channel_param *cparam) { mlx5e_build_rq_param(mdev, params, xsk, q_counter, &cparam->rq); - mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq); + mlx5e_build_xdpsq_param(mdev, params, xsk, &cparam->xdp_sq); } static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c index 8e96260fce1d..3ec0c17db010 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c @@ -103,12 +103,15 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len); ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, - mlx5e_xmit_xdp_frame, sq, &xdptxd, &xdpi, check_result); + mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, + check_result); if (unlikely(!ret)) { if (sq->mpwqe.wqe) mlx5e_xdp_mpwqe_complete(sq); mlx5e_xsk_tx_post_err(sq, &xdpi); + } else { + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); } flush = true; |