summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2021-08-10 13:19:16 -0700
committerJakub Kicinski <kuba@kernel.org>2021-08-10 13:19:17 -0700
commitebd0d30cc5e44ed3a6db7683bd357b3eea636e74 (patch)
tree599bfe946190cbbc57fcdf95b479dc90893b5ef3 /drivers/net/ethernet/mellanox
parentd1a4e0a9576fd2b29a0d13b306a9f52440908ab4 (diff)
parent598fe77df855feeeca9dfda2ffe622ac7724e5c3 (diff)
Merge branch 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux
Saeed Mahameed says: ==================== pull-request: mlx5-next 2020-08-9 This pulls mlx5-next branch which includes patches already reviewed on net-next and rdma mailing lists. 1) mlx5 single E-Switch FDB for lag 2) IB/mlx5: Rename is_apu_thread_cq function to is_apu_cq 3) Add DCS caps & fields support [1] https://patchwork.kernel.org/project/netdevbpf/cover/20210803231959.26513-1-saeed@kernel.org/ [2] https://patchwork.kernel.org/project/netdevbpf/patch/0e3364dab7e0e4eea5423878b01aa42470be8d36.1626609184.git.leonro@nvidia.com/ [3] https://patchwork.kernel.org/project/netdevbpf/patch/55e1d69bef1fbfa5cf195c0bfcbe35c8019de35e.1624258894.git.leonro@nvidia.com/ * 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux: net/mlx5: Lag, Create shared FDB when in switchdev mode net/mlx5: E-Switch, add logic to enable shared FDB net/mlx5: Lag, move lag destruction to a workqueue net/mlx5: Lag, properly lock eswitch if needed net/mlx5: Add send to vport rules on paired device net/mlx5: E-Switch, Add event callback for representors net/mlx5e: Use shared mappings for restoring from metadata net/mlx5e: Add an option to create a shared mapping net/mlx5: E-Switch, set flow source for send to uplink rule RDMA/mlx5: Add shared FDB support {net, RDMA}/mlx5: Extend send to vport rules RDMA/mlx5: Fill port info based on the relevant eswitch net/mlx5: Lag, add initial logic for shared FDB net/mlx5: Return mdev from eswitch IB/mlx5: Rename is_apu_thread_cq function to is_apu_cq net/mlx5: Add DCS caps & fields support ==================== Link: https://lore.kernel.org/r/20210809202522.316930-1-saeed@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'drivers/net/ethernet/mellanox')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cq.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c45
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c88
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c383
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c58
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.c267
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c2
22 files changed, 926 insertions, 69 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index df3e4938ecdd..99ec278d0370 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -89,7 +89,8 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq,
int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
u32 *in, int inlen, u32 *out, int outlen)
{
- int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn);
+ int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
+ c_eqn_or_apu_element);
u32 din[MLX5_ST_SZ_DW(destroy_cq_in)] = {};
struct mlx5_eq_comp *eq;
int err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c
index ea321e528749..4e72ca8070e2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c
@@ -5,11 +5,15 @@
#include <linux/slab.h>
#include <linux/xarray.h>
#include <linux/hashtable.h>
+#include <linux/refcount.h>
#include "mapping.h"
#define MAPPING_GRACE_PERIOD 2000
+static LIST_HEAD(shared_ctx_list);
+static DEFINE_MUTEX(shared_ctx_lock);
+
struct mapping_ctx {
struct xarray xarray;
DECLARE_HASHTABLE(ht, 8);
@@ -20,6 +24,10 @@ struct mapping_ctx {
struct delayed_work dwork;
struct list_head pending_list;
spinlock_t pending_list_lock; /* Guards pending list */
+ u64 id;
+ u8 type;
+ struct list_head list;
+ refcount_t refcount;
};
struct mapping_item {
@@ -205,11 +213,48 @@ mapping_create(size_t data_size, u32 max_id, bool delayed_removal)
mutex_init(&ctx->lock);
xa_init_flags(&ctx->xarray, XA_FLAGS_ALLOC1);
+ refcount_set(&ctx->refcount, 1);
+ INIT_LIST_HEAD(&ctx->list);
+
+ return ctx;
+}
+
+struct mapping_ctx *
+mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal)
+{
+ struct mapping_ctx *ctx;
+
+ mutex_lock(&shared_ctx_lock);
+ list_for_each_entry(ctx, &shared_ctx_list, list) {
+ if (ctx->id == id && ctx->type == type) {
+ if (refcount_inc_not_zero(&ctx->refcount))
+ goto unlock;
+ break;
+ }
+ }
+
+ ctx = mapping_create(data_size, max_id, delayed_removal);
+ if (IS_ERR(ctx))
+ goto unlock;
+
+ ctx->id = id;
+ ctx->type = type;
+ list_add(&ctx->list, &shared_ctx_list);
+
+unlock:
+ mutex_unlock(&shared_ctx_lock);
return ctx;
}
void mapping_destroy(struct mapping_ctx *ctx)
{
+ if (!refcount_dec_and_test(&ctx->refcount))
+ return;
+
+ mutex_lock(&shared_ctx_lock);
+ list_del(&ctx->list);
+ mutex_unlock(&shared_ctx_lock);
+
mapping_flush_work(ctx);
xa_destroy(&ctx->xarray);
mutex_destroy(&ctx->lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h
index 285525cc5470..4e2119f0f4c1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h
@@ -24,4 +24,9 @@ struct mapping_ctx *mapping_create(size_t data_size, u32 max_id,
bool delayed_removal);
void mapping_destroy(struct mapping_ctx *ctx);
+/* adds mapping with an id or get an existing mapping with the same id
+ */
+struct mapping_ctx *
+mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal);
+
#endif /* __MLX5_MAPPING_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 91e7a01e32be..b1707b86aa16 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -2138,6 +2138,7 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
struct mlx5_tc_ct_priv *ct_priv;
struct mlx5_core_dev *dev;
const char *msg;
+ u64 mapping_id;
int err;
dev = priv->mdev;
@@ -2153,13 +2154,17 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
if (!ct_priv)
goto err_alloc;
- ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true);
+ mapping_id = mlx5_query_nic_system_image_guid(dev);
+
+ ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE,
+ sizeof(u16), 0, true);
if (IS_ERR(ct_priv->zone_mapping)) {
err = PTR_ERR(ct_priv->zone_mapping);
goto err_mapping_zone;
}
- ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true);
+ ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS,
+ sizeof(u32) * 4, 0, true);
if (IS_ERR(ct_priv->labels_mapping)) {
err = PTR_ERR(ct_priv->labels_mapping);
goto err_mapping_labels;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 25a0b5f0984a..ccc569c4ee50 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1627,7 +1627,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
(__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode);
- MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
MLX5_ADAPTER_PAGE_SHIFT);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 1e520640f7e0..c54aaef521b7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -49,6 +49,7 @@
#include "en/devlink.h"
#include "fs_core.h"
#include "lib/mlx5.h"
+#include "lib/devcom.h"
#define CREATE_TRACE_POINTS
#include "diag/en_rep_tracepoint.h"
#include "en_accel/ipsec.h"
@@ -310,6 +311,8 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
rpriv = mlx5e_rep_to_rep_priv(rep);
list_for_each_entry_safe(rep_sq, tmp, &rpriv->vport_sqs_list, list) {
mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
+ if (rep_sq->send_to_vport_rule_peer)
+ mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer);
list_del(&rep_sq->list);
kfree(rep_sq);
}
@@ -319,6 +322,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
struct mlx5_eswitch_rep *rep,
u32 *sqns_array, int sqns_num)
{
+ struct mlx5_eswitch *peer_esw = NULL;
struct mlx5_flow_handle *flow_rule;
struct mlx5e_rep_priv *rpriv;
struct mlx5e_rep_sq *rep_sq;
@@ -329,6 +333,10 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
return 0;
rpriv = mlx5e_rep_to_rep_priv(rep);
+ if (mlx5_devcom_is_paired(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS))
+ peer_esw = mlx5_devcom_get_peer_data(esw->dev->priv.devcom,
+ MLX5_DEVCOM_ESW_OFFLOADS);
+
for (i = 0; i < sqns_num; i++) {
rep_sq = kzalloc(sizeof(*rep_sq), GFP_KERNEL);
if (!rep_sq) {
@@ -337,7 +345,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
}
/* Add re-inject rule to the PF/representor sqs */
- flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, rep,
+ flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep,
sqns_array[i]);
if (IS_ERR(flow_rule)) {
err = PTR_ERR(flow_rule);
@@ -345,12 +353,34 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
goto out_err;
}
rep_sq->send_to_vport_rule = flow_rule;
+ rep_sq->sqn = sqns_array[i];
+
+ if (peer_esw) {
+ flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw,
+ rep, sqns_array[i]);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
+ kfree(rep_sq);
+ goto out_err;
+ }
+ rep_sq->send_to_vport_rule_peer = flow_rule;
+ }
+
list_add(&rep_sq->list, &rpriv->vport_sqs_list);
}
+
+ if (peer_esw)
+ mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+
return 0;
out_err:
mlx5e_sqs2vport_stop(esw, rep);
+
+ if (peer_esw)
+ mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+
return err;
}
@@ -1247,10 +1277,64 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
return rpriv->netdev;
}
+static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_rep_sq *rep_sq;
+
+ rpriv = mlx5e_rep_to_rep_priv(rep);
+ list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) {
+ if (!rep_sq->send_to_vport_rule_peer)
+ continue;
+ mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer);
+ rep_sq->send_to_vport_rule_peer = NULL;
+ }
+}
+
+static int mlx5e_vport_rep_event_pair(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep,
+ struct mlx5_eswitch *peer_esw)
+{
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_rep_sq *rep_sq;
+
+ rpriv = mlx5e_rep_to_rep_priv(rep);
+ list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) {
+ if (rep_sq->send_to_vport_rule_peer)
+ continue;
+ flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep, rep_sq->sqn);
+ if (IS_ERR(flow_rule))
+ goto err_out;
+ rep_sq->send_to_vport_rule_peer = flow_rule;
+ }
+
+ return 0;
+err_out:
+ mlx5e_vport_rep_event_unpair(rep);
+ return PTR_ERR(flow_rule);
+}
+
+static int mlx5e_vport_rep_event(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep,
+ enum mlx5_switchdev_event event,
+ void *data)
+{
+ int err = 0;
+
+ if (event == MLX5_SWITCHDEV_EVENT_PAIR)
+ err = mlx5e_vport_rep_event_pair(esw, rep, data);
+ else if (event == MLX5_SWITCHDEV_EVENT_UNPAIR)
+ mlx5e_vport_rep_event_unpair(rep);
+
+ return err;
+}
+
static const struct mlx5_eswitch_rep_ops rep_ops = {
.load = mlx5e_vport_rep_load,
.unload = mlx5e_vport_rep_unload,
- .get_proto_dev = mlx5e_vport_rep_get_proto_dev
+ .get_proto_dev = mlx5e_vport_rep_get_proto_dev,
+ .event = mlx5e_vport_rep_event,
};
static int mlx5e_rep_probe(struct auxiliary_device *adev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 47a2dfb7792a..8f0c82448eec 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -207,6 +207,8 @@ struct mlx5e_encap_entry {
struct mlx5e_rep_sq {
struct mlx5_flow_handle *send_to_vport_rule;
+ struct mlx5_flow_handle *send_to_vport_rule_peer;
+ u32 sqn;
struct list_head list;
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 349a93e0213d..e5c4344a114e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -4851,6 +4851,7 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
struct mlx5_core_dev *dev = priv->mdev;
struct mapping_ctx *chains_mapping;
struct mlx5_chains_attr attr = {};
+ u64 mapping_id;
int err;
mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
@@ -4864,8 +4865,12 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
- chains_mapping = mapping_create(sizeof(struct mlx5_mapped_obj),
- MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
+ mapping_id = mlx5_query_nic_system_image_guid(dev);
+
+ chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
+ sizeof(struct mlx5_mapped_obj),
+ MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
+
if (IS_ERR(chains_mapping)) {
err = PTR_ERR(chains_mapping);
goto err_mapping;
@@ -4954,6 +4959,7 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
struct mapping_ctx *mapping;
struct mlx5_eswitch *esw;
struct mlx5e_priv *priv;
+ u64 mapping_id;
int err = 0;
uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
@@ -4970,8 +4976,12 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
uplink_priv->esw_psample = mlx5_esw_sample_init(netdev_priv(priv->netdev));
#endif
- mapping = mapping_create(sizeof(struct tunnel_match_key),
- TUNNEL_INFO_BITS_MASK, true);
+ mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
+
+ mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL,
+ sizeof(struct tunnel_match_key),
+ TUNNEL_INFO_BITS_MASK, true);
+
if (IS_ERR(mapping)) {
err = PTR_ERR(mapping);
goto err_tun_mapping;
@@ -4979,7 +4989,8 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
uplink_priv->tunnel_mapping = mapping;
/* 0xFFF is reserved for stack devices slow path table mark */
- mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true);
+ mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS,
+ sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true);
if (IS_ERR(mapping)) {
err = PTR_ERR(mapping);
goto err_enc_opts_mapping;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
index 505bf811984a..2e504c7461c6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
@@ -15,6 +15,15 @@ static void esw_acl_egress_ofld_fwd2vport_destroy(struct mlx5_vport *vport)
vport->egress.offloads.fwd_rule = NULL;
}
+static void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport)
+{
+ if (!vport->egress.offloads.bounce_rule)
+ return;
+
+ mlx5_del_flow_rules(vport->egress.offloads.bounce_rule);
+ vport->egress.offloads.bounce_rule = NULL;
+}
+
static int esw_acl_egress_ofld_fwd2vport_create(struct mlx5_eswitch *esw,
struct mlx5_vport *vport,
struct mlx5_flow_destination *fwd_dest)
@@ -87,6 +96,7 @@ static void esw_acl_egress_ofld_rules_destroy(struct mlx5_vport *vport)
{
esw_acl_egress_vlan_destroy(vport);
esw_acl_egress_ofld_fwd2vport_destroy(vport);
+ esw_acl_egress_ofld_bounce_rule_destroy(vport);
}
static int esw_acl_egress_ofld_groups_create(struct mlx5_eswitch *esw,
@@ -145,6 +155,12 @@ static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport)
mlx5_destroy_flow_group(vport->egress.offloads.fwd_grp);
vport->egress.offloads.fwd_grp = NULL;
}
+
+ if (!IS_ERR_OR_NULL(vport->egress.offloads.bounce_grp)) {
+ mlx5_destroy_flow_group(vport->egress.offloads.bounce_grp);
+ vport->egress.offloads.bounce_grp = NULL;
+ }
+
esw_acl_egress_vlan_grp_destroy(vport);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 2b90388ef209..7ffea2350f44 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1458,8 +1458,6 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs)
esw->mode = mode;
- mlx5_lag_update(esw->dev);
-
if (mode == MLX5_ESWITCH_LEGACY) {
err = esw_legacy_enable(esw);
} else {
@@ -1506,6 +1504,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
if (!mlx5_esw_allowed(esw))
return 0;
+ mlx5_lag_disable_change(esw->dev);
down_write(&esw->mode_lock);
if (esw->mode == MLX5_ESWITCH_NONE) {
ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs);
@@ -1519,6 +1518,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
esw->esw_funcs.num_vfs = num_vfs;
}
up_write(&esw->mode_lock);
+ mlx5_lag_enable_change(esw->dev);
return ret;
}
@@ -1550,8 +1550,6 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf)
old_mode = esw->mode;
esw->mode = MLX5_ESWITCH_NONE;
- mlx5_lag_update(esw->dev);
-
if (old_mode == MLX5_ESWITCH_OFFLOADS)
mlx5_rescan_drivers(esw->dev);
@@ -1567,10 +1565,12 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf)
if (!mlx5_esw_allowed(esw))
return;
+ mlx5_lag_disable_change(esw->dev);
down_write(&esw->mode_lock);
mlx5_eswitch_disable_locked(esw, clear_vf);
esw->esw_funcs.num_vfs = 0;
up_write(&esw->mode_lock);
+ mlx5_lag_enable_change(esw->dev);
}
static int mlx5_query_hca_cap_host_pf(struct mlx5_core_dev *dev, void *out)
@@ -1759,7 +1759,9 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
ida_init(&esw->offloads.vport_metadata_ida);
xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC);
mutex_init(&esw->state_lock);
+ lockdep_register_key(&esw->mode_lock_key);
init_rwsem(&esw->mode_lock);
+ lockdep_set_class(&esw->mode_lock, &esw->mode_lock_key);
esw->enabled_vports = 0;
esw->mode = MLX5_ESWITCH_NONE;
@@ -1793,6 +1795,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
esw->dev->priv.eswitch = NULL;
destroy_workqueue(esw->work_queue);
+ lockdep_unregister_key(&esw->mode_lock_key);
mutex_destroy(&esw->state_lock);
WARN_ON(!xa_empty(&esw->offloads.vhca_map));
xa_destroy(&esw->offloads.vhca_map);
@@ -2364,10 +2367,23 @@ int mlx5_esw_try_lock(struct mlx5_eswitch *esw)
*/
void mlx5_esw_unlock(struct mlx5_eswitch *esw)
{
+ if (!mlx5_esw_allowed(esw))
+ return;
up_write(&esw->mode_lock);
}
/**
+ * mlx5_esw_lock() - Take write lock on esw mode lock
+ * @esw: eswitch device.
+ */
+void mlx5_esw_lock(struct mlx5_eswitch *esw)
+{
+ if (!mlx5_esw_allowed(esw))
+ return;
+ down_write(&esw->mode_lock);
+}
+
+/**
* mlx5_eswitch_get_total_vports - Get total vports of the eswitch
*
* @dev: Pointer to core device
@@ -2382,3 +2398,15 @@ u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
return mlx5_esw_allowed(esw) ? esw->total_vports : 0;
}
EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports);
+
+/**
+ * mlx5_eswitch_get_core_dev - Get the mdev device
+ * @esw : eswitch device.
+ *
+ * Return the mellanox core device which manages the eswitch.
+ */
+struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw)
+{
+ return mlx5_esw_allowed(esw) ? esw->dev : NULL;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_core_dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 41eff9dd1bf6..01e8dfb994d4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -86,6 +86,14 @@ struct mlx5_mapped_obj {
#define esw_chains(esw) \
((esw)->fdb_table.offloads.esw_chains_priv)
+enum {
+ MAPPING_TYPE_CHAIN,
+ MAPPING_TYPE_TUNNEL,
+ MAPPING_TYPE_TUNNEL_ENC_OPTS,
+ MAPPING_TYPE_LABELS,
+ MAPPING_TYPE_ZONE,
+};
+
struct vport_ingress {
struct mlx5_flow_table *acl;
struct mlx5_flow_handle *allow_rule;
@@ -124,6 +132,8 @@ struct vport_egress {
struct {
struct mlx5_flow_group *fwd_grp;
struct mlx5_flow_handle *fwd_rule;
+ struct mlx5_flow_handle *bounce_rule;
+ struct mlx5_flow_group *bounce_grp;
} offloads;
};
};
@@ -315,6 +325,7 @@ struct mlx5_eswitch {
u32 large_group_num;
} params;
struct blocking_notifier_head n_head;
+ struct lock_class_key mode_lock_key;
};
void esw_offloads_disable(struct mlx5_eswitch *esw);
@@ -697,11 +708,18 @@ void mlx5_esw_get(struct mlx5_core_dev *dev);
void mlx5_esw_put(struct mlx5_core_dev *dev);
int mlx5_esw_try_lock(struct mlx5_eswitch *esw);
void mlx5_esw_unlock(struct mlx5_eswitch *esw);
+void mlx5_esw_lock(struct mlx5_eswitch *esw);
void esw_vport_change_handle_locked(struct mlx5_vport *vport);
bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller);
+int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw);
+void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw);
+int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw);
+
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
@@ -717,6 +735,9 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
return ERR_PTR(-EOPNOTSUPP);
}
+static inline void mlx5_esw_unlock(struct mlx5_eswitch *esw) { return; }
+static inline void mlx5_esw_lock(struct mlx5_eswitch *esw) { return; }
+
static inline struct mlx5_flow_handle *
esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
{
@@ -729,6 +750,23 @@ mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev,
{
return vport_num;
}
+
+static inline int
+mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw)
+{
+ return 0;
+}
+
+static inline void
+mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw) {}
+
+static inline int
+mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
+{
+ return 0;
+}
#endif /* CONFIG_MLX5_ESWITCH */
#endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index feecf44994a9..0e3645c4fd0d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -926,6 +926,7 @@ out:
struct mlx5_flow_handle *
mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
+ struct mlx5_eswitch *from_esw,
struct mlx5_eswitch_rep *rep,
u32 sqn)
{
@@ -944,10 +945,10 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
/* source vport is the esw manager */
- MLX5_SET(fte_match_set_misc, misc, source_port, rep->esw->manager_vport);
+ MLX5_SET(fte_match_set_misc, misc, source_port, from_esw->manager_vport);
if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
- MLX5_CAP_GEN(rep->esw->dev, vhca_id));
+ MLX5_CAP_GEN(from_esw->dev, vhca_id));
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
@@ -963,6 +964,9 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ if (rep->vport == MLX5_VPORT_UPLINK)
+ spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+
flow_rule = mlx5_add_flow_rules(on_esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
if (IS_ERR(flow_rule))
@@ -1613,7 +1617,18 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
goto ns_err;
}
- table_size = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ +
+ /* To be strictly correct:
+ * MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ)
+ * should be:
+ * esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ +
+ * peer_esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ
+ * but as the peer device might not be in switchdev mode it's not
+ * possible. We use the fact that by default FW sets max vfs and max sfs
+ * to the same value on both devices. If it needs to be changed in the future note
+ * the peer miss group should also be created based on the number of
+ * total vports of the peer (currently is also uses esw->total_vports).
+ */
+ table_size = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) +
MLX5_ESW_MISS_FLOWS + esw->total_vports + esw->esw_funcs.num_vfs;
/* create the slow path fdb with encap set, so further table instances
@@ -1670,7 +1685,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
source_eswitch_owner_vhca_id_valid, 1);
}
- ix = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ;
+ /* See comment above table_size calculation */
+ ix = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ);
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
@@ -2310,14 +2326,293 @@ void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num)
mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
}
+static int esw_set_uplink_slave_ingress_root(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
+{
+ u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
+ struct mlx5_eswitch *esw;
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_vport *vport;
+ int err;
+
+ MLX5_SET(set_flow_table_root_in, in, opcode,
+ MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+ MLX5_SET(set_flow_table_root_in, in, table_type, FS_FT_ESW_INGRESS_ACL);
+ MLX5_SET(set_flow_table_root_in, in, other_vport, 1);
+ MLX5_SET(set_flow_table_root_in, in, vport_number, MLX5_VPORT_UPLINK);
+
+ if (master) {
+ esw = master->priv.eswitch;
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+ MLX5_SET(set_flow_table_root_in, in, table_of_other_vport, 1);
+ MLX5_SET(set_flow_table_root_in, in, table_vport_number,
+ MLX5_VPORT_UPLINK);
+
+ ns = mlx5_get_flow_vport_acl_namespace(master,
+ MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+ vport->index);
+ root = find_root(&ns->node);
+ mutex_lock(&root->chain_lock);
+
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id_valid, 1);
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(master, vhca_id));
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ root->root_ft->id);
+ } else {
+ esw = slave->priv.eswitch;
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+ ns = mlx5_get_flow_vport_acl_namespace(slave,
+ MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+ vport->index);
+ root = find_root(&ns->node);
+ mutex_lock(&root->chain_lock);
+ MLX5_SET(set_flow_table_root_in, in, table_id, root->root_ft->id);
+ }
+
+ err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
+ mutex_unlock(&root->chain_lock);
+
+ return err;
+}
+
+static int esw_set_slave_root_fdb(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
+{
+ u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_namespace *ns;
+ int err;
+
+ MLX5_SET(set_flow_table_root_in, in, opcode,
+ MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+ MLX5_SET(set_flow_table_root_in, in, table_type,
+ FS_FT_FDB);
+
+ if (master) {
+ ns = mlx5_get_flow_namespace(master,
+ MLX5_FLOW_NAMESPACE_FDB);
+ root = find_root(&ns->node);
+ mutex_lock(&root->chain_lock);
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id_valid, 1);
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(master, vhca_id));
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ root->root_ft->id);
+ } else {
+ ns = mlx5_get_flow_namespace(slave,
+ MLX5_FLOW_NAMESPACE_FDB);
+ root = find_root(&ns->node);
+ mutex_lock(&root->chain_lock);
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ root->root_ft->id);
+ }
+
+ err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
+ mutex_unlock(&root->chain_lock);
+
+ return err;
+}
+
+static int __esw_set_master_egress_rule(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave,
+ struct mlx5_vport *vport,
+ struct mlx5_flow_table *acl)
+{
+ struct mlx5_flow_handle *flow_rule = NULL;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ void *misc;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK);
+ MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(slave, vhca_id));
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest.vport.num = slave->priv.eswitch->manager_vport;
+ dest.vport.vhca_id = MLX5_CAP_GEN(slave, vhca_id);
+ dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+
+ flow_rule = mlx5_add_flow_rules(acl, spec, &flow_act,
+ &dest, 1);
+ if (IS_ERR(flow_rule))
+ err = PTR_ERR(flow_rule);
+ else
+ vport->egress.offloads.bounce_rule = flow_rule;
+
+ kvfree(spec);
+ return err;
+}
+
+static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_eswitch *esw = master->priv.eswitch;
+ struct mlx5_flow_table_attr ft_attr = {
+ .max_fte = 1, .prio = 0, .level = 0,
+ };
+ struct mlx5_flow_namespace *egress_ns;
+ struct mlx5_flow_table *acl;
+ struct mlx5_flow_group *g;
+ struct mlx5_vport *vport;
+ void *match_criteria;
+ u32 *flow_group_in;
+ int err;
+
+ vport = mlx5_eswitch_get_vport(esw, esw->manager_vport);
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ egress_ns = mlx5_get_flow_vport_acl_namespace(master,
+ MLX5_FLOW_NAMESPACE_ESW_EGRESS,
+ vport->index);
+ if (!egress_ns)
+ return -EINVAL;
+
+ if (vport->egress.acl)
+ return -EINVAL;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ acl = mlx5_create_vport_flow_table(egress_ns, &ft_attr, vport->vport);
+ if (IS_ERR(acl)) {
+ err = PTR_ERR(acl);
+ goto out;
+ }
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_port);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_eswitch_owner_vhca_id);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ source_eswitch_owner_vhca_id_valid, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+
+ g = mlx5_create_flow_group(acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ goto err_group;
+ }
+
+ err = __esw_set_master_egress_rule(master, slave, vport, acl);
+ if (err)
+ goto err_rule;
+
+ vport->egress.acl = acl;
+ vport->egress.offloads.bounce_grp = g;
+
+ kvfree(flow_group_in);
+
+ return 0;
+
+err_rule:
+ mlx5_destroy_flow_group(g);
+err_group:
+ mlx5_destroy_flow_table(acl);
+out:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev)
+{
+ struct mlx5_vport *vport;
+
+ vport = mlx5_eswitch_get_vport(dev->priv.eswitch,
+ dev->priv.eswitch->manager_vport);
+
+ esw_acl_egress_ofld_cleanup(vport);
+}
+
+int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw)
+{
+ int err;
+
+ err = esw_set_uplink_slave_ingress_root(master_esw->dev,
+ slave_esw->dev);
+ if (err)
+ return -EINVAL;
+
+ err = esw_set_slave_root_fdb(master_esw->dev,
+ slave_esw->dev);
+ if (err)
+ goto err_fdb;
+
+ err = esw_set_master_egress_rule(master_esw->dev,
+ slave_esw->dev);
+ if (err)
+ goto err_acl;
+
+ return err;
+
+err_acl:
+ esw_set_slave_root_fdb(NULL, slave_esw->dev);
+
+err_fdb:
+ esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev);
+
+ return err;
+}
+
+void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw)
+{
+ esw_unset_master_egress_rule(master_esw->dev);
+ esw_set_slave_root_fdb(NULL, slave_esw->dev);
+ esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev);
+}
+
#define ESW_OFFLOADS_DEVCOM_PAIR (0)
#define ESW_OFFLOADS_DEVCOM_UNPAIR (1)
-static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
- struct mlx5_eswitch *peer_esw)
+static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw)
{
+ const struct mlx5_eswitch_rep_ops *ops;
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+ u8 rep_type;
- return esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
+ mlx5_esw_for_each_rep(esw, i, rep) {
+ rep_type = NUM_REP_TYPES;
+ while (rep_type--) {
+ ops = esw->offloads.rep_ops[rep_type];
+ if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
+ ops->event)
+ ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_UNPAIR, NULL);
+ }
+ }
}
static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
@@ -2325,9 +2620,42 @@ static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
mlx5e_tc_clean_fdb_peer_flows(esw);
#endif
+ mlx5_esw_offloads_rep_event_unpair(esw);
esw_del_fdb_peer_miss_rules(esw);
}
+static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch *peer_esw)
+{
+ const struct mlx5_eswitch_rep_ops *ops;
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+ u8 rep_type;
+ int err;
+
+ err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
+ if (err)
+ return err;
+
+ mlx5_esw_for_each_rep(esw, i, rep) {
+ for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
+ ops = esw->offloads.rep_ops[rep_type];
+ if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
+ ops->event) {
+ err = ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_PAIR, peer_esw);
+ if (err)
+ goto err_out;
+ }
+ }
+ }
+
+ return 0;
+
+err_out:
+ mlx5_esw_offloads_unpair(esw);
+ return err;
+}
+
static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw,
struct mlx5_eswitch *peer_esw,
bool pair)
@@ -2618,6 +2946,31 @@ static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
esw_vport_destroy_offloads_acl_tables(esw, vport);
}
+int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
+{
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+ int ret;
+
+ if (!esw || esw->mode != MLX5_ESWITCH_OFFLOADS)
+ return 0;
+
+ rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
+ if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
+ return 0;
+
+ ret = mlx5_esw_offloads_rep_load(esw, MLX5_VPORT_UPLINK);
+ if (ret)
+ return ret;
+
+ mlx5_esw_for_each_rep(esw, i, rep) {
+ if (atomic_read(&rep->rep_data[REP_ETH].state) == REP_LOADED)
+ mlx5_esw_offloads_rep_load(esw, rep->vport);
+ }
+
+ return 0;
+}
+
static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
{
struct mlx5_esw_indir_table *indir;
@@ -2787,6 +3140,7 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
struct mapping_ctx *reg_c0_obj_pool;
struct mlx5_vport *vport;
unsigned long i;
+ u64 mapping_id;
int err;
if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) &&
@@ -2810,9 +3164,13 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
if (err)
goto err_vport_metadata;
- reg_c0_obj_pool = mapping_create(sizeof(struct mlx5_mapped_obj),
- ESW_REG_C0_USER_DATA_METADATA_MASK,
- true);
+ mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
+
+ reg_c0_obj_pool = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
+ sizeof(struct mlx5_mapped_obj),
+ ESW_REG_C0_USER_DATA_METADATA_MASK,
+ true);
+
if (IS_ERR(reg_c0_obj_pool)) {
err = PTR_ERR(reg_c0_obj_pool);
goto err_pool;
@@ -2990,10 +3348,11 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
if (esw_mode_from_devlink(mode, &mlx5_mode))
return -EINVAL;
+ mlx5_lag_disable_change(esw->dev);
err = mlx5_esw_try_lock(esw);
if (err < 0) {
NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy");
- return err;
+ goto enable_lag;
}
cur_mlx5_mode = err;
err = 0;
@@ -3010,6 +3369,8 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
unlock:
mlx5_esw_unlock(esw);
+enable_lag:
+ mlx5_lag_enable_change(esw->dev);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index bd66ab2af5b5..9b2cca6d9620 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -454,7 +454,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size));
- MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index);
MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.buf.page_shift -
MLX5_ADAPTER_PAGE_SHIFT);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 896a6c3dbdb7..7db8df64a60e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -152,17 +152,56 @@ static int mlx5_cmd_stub_destroy_ns(struct mlx5_flow_root_namespace *ns)
return 0;
}
+static int mlx5_cmd_set_slave_root_fdb(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave,
+ bool ft_id_valid,
+ u32 ft_id)
+{
+ u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_namespace *ns;
+
+ MLX5_SET(set_flow_table_root_in, in, opcode,
+ MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+ MLX5_SET(set_flow_table_root_in, in, table_type,
+ FS_FT_FDB);
+ if (ft_id_valid) {
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id_valid, 1);
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(master, vhca_id));
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ ft_id);
+ } else {
+ ns = mlx5_get_flow_namespace(slave,
+ MLX5_FLOW_NAMESPACE_FDB);
+ root = find_root(&ns->node);
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ root->root_ft->id);
+ }
+
+ return mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
+}
+
static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft, u32 underlay_qpn,
bool disconnect)
{
u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
struct mlx5_core_dev *dev = ns->dev;
+ int err;
if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
underlay_qpn == 0)
return 0;
+ if (ft->type == FS_FT_FDB &&
+ mlx5_lag_is_shared_fdb(dev) &&
+ !mlx5_lag_is_master(dev))
+ return 0;
+
MLX5_SET(set_flow_table_root_in, in, opcode,
MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
MLX5_SET(set_flow_table_root_in, in, table_type, ft->type);
@@ -177,7 +216,24 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
MLX5_SET(set_flow_table_root_in, in, other_vport,
!!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
- return mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+ err = mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+ if (!err &&
+ ft->type == FS_FT_FDB &&
+ mlx5_lag_is_shared_fdb(dev) &&
+ mlx5_lag_is_master(dev)) {
+ err = mlx5_cmd_set_slave_root_fdb(dev,
+ mlx5_lag_get_peer_mdev(dev),
+ !disconnect, (!disconnect) ?
+ ft->id : 0);
+ if (err && !disconnect) {
+ MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ ns->root_ft->id);
+ mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+ }
+ }
+
+ return err;
}
static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index c0697e1b7118..8481027e493c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -413,7 +413,7 @@ static bool check_valid_spec(const struct mlx5_flow_spec *spec)
return true;
}
-static struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
+struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
{
struct fs_node *root;
struct mlx5_flow_namespace *ns;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 7317cdeab661..98240badc342 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -294,6 +294,8 @@ void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev);
int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports);
void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev);
+struct mlx5_flow_root_namespace *find_root(struct fs_node *node);
+
#define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); }
#define fs_list_for_each_entry(pos, root) \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 5c043c5cc403..f4dfa55c8c7e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -32,7 +32,9 @@
#include <linux/netdevice.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eswitch.h>
#include <linux/mlx5/vport.h>
+#include "lib/devcom.h"
#include "mlx5_core.h"
#include "eswitch.h"
#include "lag.h"
@@ -45,7 +47,7 @@
static DEFINE_SPINLOCK(lag_lock);
static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
- u8 remap_port2)
+ u8 remap_port2, bool shared_fdb)
{
u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
@@ -54,6 +56,7 @@ static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
+ MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
return mlx5_cmd_exec_in(dev, create_lag, in);
}
@@ -224,35 +227,59 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
}
static int mlx5_create_lag(struct mlx5_lag *ldev,
- struct lag_tracker *tracker)
+ struct lag_tracker *tracker,
+ bool shared_fdb)
{
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+ u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
int err;
mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
&ldev->v2p_map[MLX5_LAG_P2]);
- mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
- ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]);
+ mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d",
+ ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2],
+ shared_fdb);
err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
- ldev->v2p_map[MLX5_LAG_P2]);
- if (err)
+ ldev->v2p_map[MLX5_LAG_P2], shared_fdb);
+ if (err) {
mlx5_core_err(dev0,
"Failed to create LAG (%d)\n",
err);
+ return err;
+ }
+
+ if (shared_fdb) {
+ err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
+ dev1->priv.eswitch);
+ if (err)
+ mlx5_core_err(dev0, "Can't enable single FDB mode\n");
+ else
+ mlx5_core_info(dev0, "Operation mode is single FDB\n");
+ }
+
+ if (err) {
+ MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
+ if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
+ mlx5_core_err(dev0,
+ "Failed to deactivate RoCE LAG; driver restart required\n");
+ }
+
return err;
}
int mlx5_activate_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker,
- u8 flags)
+ u8 flags,
+ bool shared_fdb)
{
bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
int err;
- err = mlx5_create_lag(ldev, tracker);
+ err = mlx5_create_lag(ldev, tracker, shared_fdb);
if (err) {
if (roce_lag) {
mlx5_core_err(dev0,
@@ -266,6 +293,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
}
ldev->flags |= flags;
+ ldev->shared_fdb = shared_fdb;
return 0;
}
@@ -278,6 +306,12 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
+ if (ldev->shared_fdb) {
+ mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch,
+ ldev->pf[MLX5_LAG_P2].dev->priv.eswitch);
+ ldev->shared_fdb = false;
+ }
+
MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
if (err) {
@@ -333,6 +367,10 @@ static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
if (!ldev->pf[i].dev)
continue;
+ if (ldev->pf[i].dev->priv.flags &
+ MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
+ continue;
+
ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
mlx5_rescan_drivers_locked(ldev->pf[i].dev);
}
@@ -342,12 +380,15 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev)
{
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+ bool shared_fdb = ldev->shared_fdb;
bool roce_lag;
int err;
roce_lag = __mlx5_lag_is_roce(ldev);
- if (roce_lag) {
+ if (shared_fdb) {
+ mlx5_lag_remove_devices(ldev);
+ } else if (roce_lag) {
if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
mlx5_rescan_drivers_locked(dev0);
@@ -359,8 +400,34 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev)
if (err)
return;
- if (roce_lag)
+ if (shared_fdb || roce_lag)
mlx5_lag_add_devices(ldev);
+
+ if (shared_fdb) {
+ if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
+ mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+ if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
+ mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+ }
+}
+
+static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+
+ if (is_mdev_switchdev_mode(dev0) &&
+ is_mdev_switchdev_mode(dev1) &&
+ mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
+ mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
+ mlx5_devcom_is_paired(dev0->priv.devcom,
+ MLX5_DEVCOM_ESW_OFFLOADS) &&
+ MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
+ MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
+ MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
+ return true;
+
+ return false;
}
static void mlx5_do_bond(struct mlx5_lag *ldev)
@@ -371,14 +438,17 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
bool do_bond, roce_lag;
int err;
- if (!mlx5_lag_is_ready(ldev))
- return;
-
- tracker = ldev->tracker;
+ if (!mlx5_lag_is_ready(ldev)) {
+ do_bond = false;
+ } else {
+ tracker = ldev->tracker;
- do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
+ do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
+ }
if (do_bond && !__mlx5_lag_is_active(ldev)) {
+ bool shared_fdb = mlx5_shared_fdb_supported(ldev);
+
roce_lag = !mlx5_sriov_is_enabled(dev0) &&
!mlx5_sriov_is_enabled(dev1);
@@ -388,23 +458,40 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
#endif
- if (roce_lag)
+ if (shared_fdb || roce_lag)
mlx5_lag_remove_devices(ldev);
err = mlx5_activate_lag(ldev, &tracker,
roce_lag ? MLX5_LAG_FLAG_ROCE :
- MLX5_LAG_FLAG_SRIOV);
+ MLX5_LAG_FLAG_SRIOV,
+ shared_fdb);
if (err) {
- if (roce_lag)
+ if (shared_fdb || roce_lag)
mlx5_lag_add_devices(ldev);
return;
- }
-
- if (roce_lag) {
+ } else if (roce_lag) {
dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
mlx5_rescan_drivers_locked(dev0);
mlx5_nic_vport_enable_roce(dev1);
+ } else if (shared_fdb) {
+ dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(dev0);
+
+ err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+ if (!err)
+ err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+
+ if (err) {
+ dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(dev0);
+ mlx5_deactivate_lag(ldev);
+ mlx5_lag_add_devices(ldev);
+ mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+ mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+ mlx5_core_err(dev0, "Failed to enable lag\n");
+ return;
+ }
}
} else if (do_bond && __mlx5_lag_is_active(ldev)) {
mlx5_modify_lag(ldev, &tracker);
@@ -418,21 +505,48 @@ static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
}
+static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0,
+ struct mlx5_core_dev *dev1)
+{
+ if (dev0)
+ mlx5_esw_lock(dev0->priv.eswitch);
+ if (dev1)
+ mlx5_esw_lock(dev1->priv.eswitch);
+}
+
+static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0,
+ struct mlx5_core_dev *dev1)
+{
+ if (dev1)
+ mlx5_esw_unlock(dev1->priv.eswitch);
+ if (dev0)
+ mlx5_esw_unlock(dev0->priv.eswitch);
+}
+
static void mlx5_do_bond_work(struct work_struct *work)
{
struct delayed_work *delayed_work = to_delayed_work(work);
struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
bond_work);
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
int status;
status = mlx5_dev_list_trylock();
if (!status) {
- /* 1 sec delay. */
mlx5_queue_bond_work(ldev, HZ);
return;
}
+ if (ldev->mode_changes_in_progress) {
+ mlx5_dev_list_unlock();
+ mlx5_queue_bond_work(ldev, HZ);
+ return;
+ }
+
+ mlx5_lag_lock_eswitches(dev0, dev1);
mlx5_do_bond(ldev);
+ mlx5_lag_unlock_eswitches(dev0, dev1);
mlx5_dev_list_unlock();
}
@@ -630,7 +744,7 @@ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
}
/* Must be called with intf_mutex held */
-static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
+static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev = NULL;
struct mlx5_core_dev *tmp_dev;
@@ -638,7 +752,7 @@ static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
!MLX5_CAP_GEN(dev, lag_master) ||
MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
- return;
+ return 0;
tmp_dev = mlx5_get_next_phys_dev(dev);
if (tmp_dev)
@@ -648,15 +762,17 @@ static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
ldev = mlx5_lag_dev_alloc(dev);
if (!ldev) {
mlx5_core_err(dev, "Failed to alloc lag dev\n");
- return;
+ return 0;
}
} else {
+ if (ldev->mode_changes_in_progress)
+ return -EAGAIN;
mlx5_ldev_get(ldev);
}
mlx5_ldev_add_mdev(ldev, dev);
- return;
+ return 0;
}
void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
@@ -667,7 +783,13 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
if (!ldev)
return;
+recheck:
mlx5_dev_list_lock();
+ if (ldev->mode_changes_in_progress) {
+ mlx5_dev_list_unlock();
+ msleep(100);
+ goto recheck;
+ }
mlx5_ldev_remove_mdev(ldev, dev);
mlx5_dev_list_unlock();
mlx5_ldev_put(ldev);
@@ -675,8 +797,16 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
{
+ int err;
+
+recheck:
mlx5_dev_list_lock();
- __mlx5_lag_dev_add_mdev(dev);
+ err = __mlx5_lag_dev_add_mdev(dev);
+ if (err) {
+ mlx5_dev_list_unlock();
+ msleep(100);
+ goto recheck;
+ }
mlx5_dev_list_unlock();
}
@@ -690,11 +820,11 @@ void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
if (!ldev)
return;
- if (__mlx5_lag_is_active(ldev))
- mlx5_disable_lag(ldev);
-
mlx5_ldev_remove_netdev(ldev, netdev);
ldev->flags &= ~MLX5_LAG_FLAG_READY;
+
+ if (__mlx5_lag_is_active(ldev))
+ mlx5_queue_bond_work(ldev, 0);
}
/* Must be called with intf_mutex held */
@@ -716,6 +846,7 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
if (i >= MLX5_MAX_PORTS)
ldev->flags |= MLX5_LAG_FLAG_READY;
+ mlx5_queue_bond_work(ldev, 0);
}
bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
@@ -746,6 +877,21 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
}
EXPORT_SYMBOL(mlx5_lag_is_active);
+bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ bool res;
+
+ spin_lock(&lag_lock);
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_active(ldev) &&
+ dev == ldev->pf[MLX5_LAG_P1].dev;
+ spin_unlock(&lag_lock);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_master);
+
bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
@@ -760,19 +906,50 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
}
EXPORT_SYMBOL(mlx5_lag_is_sriov);
-void mlx5_lag_update(struct mlx5_core_dev *dev)
+bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ bool res;
+
+ spin_lock(&lag_lock);
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb;
+ spin_unlock(&lag_lock);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
+
+void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
{
+ struct mlx5_core_dev *dev0;
+ struct mlx5_core_dev *dev1;
struct mlx5_lag *ldev;
mlx5_dev_list_lock();
+
ldev = mlx5_lag_dev(dev);
- if (!ldev)
- goto unlock;
+ dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ dev1 = ldev->pf[MLX5_LAG_P2].dev;
- mlx5_do_bond(ldev);
+ ldev->mode_changes_in_progress++;
+ if (__mlx5_lag_is_active(ldev)) {
+ mlx5_lag_lock_eswitches(dev0, dev1);
+ mlx5_disable_lag(ldev);
+ mlx5_lag_unlock_eswitches(dev0, dev1);
+ }
+ mlx5_dev_list_unlock();
+}
-unlock:
+void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+
+ mlx5_dev_list_lock();
+ ldev = mlx5_lag_dev(dev);
+ ldev->mode_changes_in_progress--;
mlx5_dev_list_unlock();
+ mlx5_queue_bond_work(ldev, 0);
}
struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
@@ -827,6 +1004,26 @@ unlock:
}
EXPORT_SYMBOL(mlx5_lag_get_slave_port);
+struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_dev *peer_dev = NULL;
+ struct mlx5_lag *ldev;
+
+ spin_lock(&lag_lock);
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ goto unlock;
+
+ peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
+ ldev->pf[MLX5_LAG_P2].dev :
+ ldev->pf[MLX5_LAG_P1].dev;
+
+unlock:
+ spin_unlock(&lag_lock);
+ return peer_dev;
+}
+EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
+
int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
u64 *values,
int num_counters,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
index 191392c37558..d4bae528954e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
@@ -39,6 +39,8 @@ struct lag_tracker {
*/
struct mlx5_lag {
u8 flags;
+ int mode_changes_in_progress;
+ bool shared_fdb;
u8 v2p_map[MLX5_MAX_PORTS];
struct kref ref;
struct lag_func pf[MLX5_MAX_PORTS];
@@ -71,7 +73,8 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker);
int mlx5_activate_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker,
- u8 flags);
+ u8 flags,
+ bool shared_fdb);
int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
struct net_device *ndev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
index c4bf8b679541..011b639b29bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -161,7 +161,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
struct lag_tracker tracker;
tracker = ldev->tracker;
- mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
+ mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH, false);
}
mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index a8efd9f1af4c..6fe560307c05 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1179,6 +1179,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
goto err_ec;
}
+ mlx5_lag_add_mdev(dev);
err = mlx5_sriov_attach(dev);
if (err) {
mlx5_core_err(dev, "sriov init failed %d\n", err);
@@ -1186,11 +1187,11 @@ static int mlx5_load(struct mlx5_core_dev *dev)
}
mlx5_sf_dev_table_create(dev);
- mlx5_lag_add_mdev(dev);
return 0;
err_sriov:
+ mlx5_lag_remove_mdev(dev);
mlx5_ec_cleanup(dev);
err_ec:
mlx5_sf_hw_table_destroy(dev);
@@ -1222,9 +1223,9 @@ err_irq_table:
static void mlx5_unload(struct mlx5_core_dev *dev)
{
- mlx5_lag_remove_mdev(dev);
mlx5_sf_dev_table_destroy(dev);
mlx5_sriov_detach(dev);
+ mlx5_lag_remove_mdev(dev);
mlx5_ec_cleanup(dev);
mlx5_sf_hw_table_destroy(dev);
mlx5_vhca_event_stop(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 343807ac2036..14ffd74eeabe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -168,6 +168,8 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
void mlx5_lag_add_mdev(struct mlx5_core_dev *dev);
void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev);
+void mlx5_lag_disable_change(struct mlx5_core_dev *dev);
+void mlx5_lag_enable_change(struct mlx5_core_dev *dev);
int mlx5_events_init(struct mlx5_core_dev *dev);
void mlx5_events_cleanup(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index 12cf323a5943..754f89222858 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -790,7 +790,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
- MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
MLX5_SET(cqc, cqc, uar_page, uar->index);
MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
MLX5_ADAPTER_PAGE_SHIFT);