summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/hw/mlx5/main.c463
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h45
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c52
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c601
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h14
-rw-r--r--include/linux/mlx5/device.h2
-rw-r--r--include/linux/mlx5/fs.h18
-rw-r--r--include/linux/mlx5/mlx5_ifc.h105
9 files changed, 1233 insertions, 76 deletions
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 7e97cb55a6bf..b0ec175cc6ba 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -43,6 +43,9 @@
#include <linux/mlx5/vport.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
+#include <linux/in.h>
+#include <linux/etherdevice.h>
+#include <linux/mlx5/fs.h>
#include "user.h"
#include "mlx5_ib.h"
@@ -835,6 +838,457 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
return 0;
}
+static bool outer_header_zero(u32 *match_criteria)
+{
+ int size = MLX5_ST_SZ_BYTES(fte_match_param);
+ char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria,
+ outer_headers);
+
+ return outer_headers_c[0] == 0 && !memcmp(outer_headers_c,
+ outer_headers_c + 1,
+ size - 1);
+}
+
+static int parse_flow_attr(u32 *match_c, u32 *match_v,
+ union ib_flow_spec *ib_spec)
+{
+ void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ outer_headers);
+ void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ outer_headers);
+ switch (ib_spec->type) {
+ case IB_FLOW_SPEC_ETH:
+ if (ib_spec->size != sizeof(ib_spec->eth))
+ return -EINVAL;
+
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ dmac_47_16),
+ ib_spec->eth.mask.dst_mac);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ dmac_47_16),
+ ib_spec->eth.val.dst_mac);
+
+ if (ib_spec->eth.mask.vlan_tag) {
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ vlan_tag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ vlan_tag, 1);
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ first_vid, ntohs(ib_spec->eth.val.vlan_tag));
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ first_cfi,
+ ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ first_cfi,
+ ntohs(ib_spec->eth.val.vlan_tag) >> 12);
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ first_prio,
+ ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ first_prio,
+ ntohs(ib_spec->eth.val.vlan_tag) >> 13);
+ }
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ ethertype, ntohs(ib_spec->eth.mask.ether_type));
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ ethertype, ntohs(ib_spec->eth.val.ether_type));
+ break;
+ case IB_FLOW_SPEC_IPV4:
+ if (ib_spec->size != sizeof(ib_spec->ipv4))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ ethertype, 0xffff);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ ethertype, ETH_P_IP);
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.mask.src_ip,
+ sizeof(ib_spec->ipv4.mask.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.val.src_ip,
+ sizeof(ib_spec->ipv4.val.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.mask.dst_ip,
+ sizeof(ib_spec->ipv4.mask.dst_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.val.dst_ip,
+ sizeof(ib_spec->ipv4.val.dst_ip));
+ break;
+ case IB_FLOW_SPEC_TCP:
+ if (ib_spec->size != sizeof(ib_spec->tcp_udp))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
+ 0xff);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
+ IPPROTO_TCP);
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport,
+ ntohs(ib_spec->tcp_udp.mask.src_port));
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport,
+ ntohs(ib_spec->tcp_udp.val.src_port));
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport,
+ ntohs(ib_spec->tcp_udp.mask.dst_port));
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport,
+ ntohs(ib_spec->tcp_udp.val.dst_port));
+ break;
+ case IB_FLOW_SPEC_UDP:
+ if (ib_spec->size != sizeof(ib_spec->tcp_udp))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
+ 0xff);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
+ IPPROTO_UDP);
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport,
+ ntohs(ib_spec->tcp_udp.mask.src_port));
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport,
+ ntohs(ib_spec->tcp_udp.val.src_port));
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport,
+ ntohs(ib_spec->tcp_udp.mask.dst_port));
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport,
+ ntohs(ib_spec->tcp_udp.val.dst_port));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* If a flow could catch both multicast and unicast packets,
+ * it won't fall into the multicast flow steering table and this rule
+ * could steal other multicast packets.
+ */
+static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr)
+{
+ struct ib_flow_spec_eth *eth_spec;
+
+ if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
+ ib_attr->size < sizeof(struct ib_flow_attr) +
+ sizeof(struct ib_flow_spec_eth) ||
+ ib_attr->num_of_specs < 1)
+ return false;
+
+ eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1);
+ if (eth_spec->type != IB_FLOW_SPEC_ETH ||
+ eth_spec->size != sizeof(*eth_spec))
+ return false;
+
+ return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
+ is_multicast_ether_addr(eth_spec->val.dst_mac);
+}
+
+static bool is_valid_attr(struct ib_flow_attr *flow_attr)
+{
+ union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
+ bool has_ipv4_spec = false;
+ bool eth_type_ipv4 = true;
+ unsigned int spec_index;
+
+ /* Validate that ethertype is correct */
+ for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
+ if (ib_spec->type == IB_FLOW_SPEC_ETH &&
+ ib_spec->eth.mask.ether_type) {
+ if (!((ib_spec->eth.mask.ether_type == htons(0xffff)) &&
+ ib_spec->eth.val.ether_type == htons(ETH_P_IP)))
+ eth_type_ipv4 = false;
+ } else if (ib_spec->type == IB_FLOW_SPEC_IPV4) {
+ has_ipv4_spec = true;
+ }
+ ib_spec = (void *)ib_spec + ib_spec->size;
+ }
+ return !has_ipv4_spec || eth_type_ipv4;
+}
+
+static void put_flow_table(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *prio, bool ft_added)
+{
+ prio->refcount -= !!ft_added;
+ if (!prio->refcount) {
+ mlx5_destroy_flow_table(prio->flow_table);
+ prio->flow_table = NULL;
+ }
+}
+
+static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
+{
+ struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device);
+ struct mlx5_ib_flow_handler *handler = container_of(flow_id,
+ struct mlx5_ib_flow_handler,
+ ibflow);
+ struct mlx5_ib_flow_handler *iter, *tmp;
+
+ mutex_lock(&dev->flow_db.lock);
+
+ list_for_each_entry_safe(iter, tmp, &handler->list, list) {
+ mlx5_del_flow_rule(iter->rule);
+ list_del(&iter->list);
+ kfree(iter);
+ }
+
+ mlx5_del_flow_rule(handler->rule);
+ put_flow_table(dev, &dev->flow_db.prios[handler->prio], true);
+ mutex_unlock(&dev->flow_db.lock);
+
+ kfree(handler);
+
+ return 0;
+}
+
+#define MLX5_FS_MAX_TYPES 10
+#define MLX5_FS_MAX_ENTRIES 32000UL
+static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
+ struct ib_flow_attr *flow_attr)
+{
+ struct mlx5_flow_namespace *ns = NULL;
+ struct mlx5_ib_flow_prio *prio;
+ struct mlx5_flow_table *ft;
+ int num_entries;
+ int num_groups;
+ int priority;
+ int err = 0;
+
+ if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
+ if (flow_is_multicast_only(flow_attr))
+ priority = MLX5_IB_FLOW_MCAST_PRIO;
+ else
+ priority = flow_attr->priority;
+ ns = mlx5_get_flow_namespace(dev->mdev,
+ MLX5_FLOW_NAMESPACE_BYPASS);
+ num_entries = MLX5_FS_MAX_ENTRIES;
+ num_groups = MLX5_FS_MAX_TYPES;
+ prio = &dev->flow_db.prios[priority];
+ } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
+ ns = mlx5_get_flow_namespace(dev->mdev,
+ MLX5_FLOW_NAMESPACE_LEFTOVERS);
+ build_leftovers_ft_param(&priority,
+ &num_entries,
+ &num_groups);
+ prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
+ }
+
+ if (!ns)
+ return ERR_PTR(-ENOTSUPP);
+
+ ft = prio->flow_table;
+ if (!ft) {
+ ft = mlx5_create_auto_grouped_flow_table(ns, priority,
+ num_entries,
+ num_groups);
+
+ if (!IS_ERR(ft)) {
+ prio->refcount = 0;
+ prio->flow_table = ft;
+ } else {
+ err = PTR_ERR(ft);
+ }
+ }
+
+ return err ? ERR_PTR(err) : prio;
+}
+
+static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst)
+{
+ struct mlx5_flow_table *ft = ft_prio->flow_table;
+ struct mlx5_ib_flow_handler *handler;
+ void *ib_flow = flow_attr + 1;
+ u8 match_criteria_enable = 0;
+ unsigned int spec_index;
+ u32 *match_c;
+ u32 *match_v;
+ int err = 0;
+
+ if (!is_valid_attr(flow_attr))
+ return ERR_PTR(-EINVAL);
+
+ match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+ match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+ handler = kzalloc(sizeof(*handler), GFP_KERNEL);
+ if (!handler || !match_c || !match_v) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ INIT_LIST_HEAD(&handler->list);
+
+ for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
+ err = parse_flow_attr(match_c, match_v, ib_flow);
+ if (err < 0)
+ goto free;
+
+ ib_flow += ((union ib_flow_spec *)ib_flow)->size;
+ }
+
+ /* Outer header support only */
+ match_criteria_enable = (!outer_header_zero(match_c)) << 0;
+ handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable,
+ match_c, match_v,
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ MLX5_FS_DEFAULT_FLOW_TAG,
+ dst);
+
+ if (IS_ERR(handler->rule)) {
+ err = PTR_ERR(handler->rule);
+ goto free;
+ }
+
+ handler->prio = ft_prio - dev->flow_db.prios;
+
+ ft_prio->flow_table = ft;
+free:
+ if (err)
+ kfree(handler);
+ kfree(match_c);
+ kfree(match_v);
+ return err ? ERR_PTR(err) : handler;
+}
+
+enum {
+ LEFTOVERS_MC,
+ LEFTOVERS_UC,
+};
+
+static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst)
+{
+ struct mlx5_ib_flow_handler *handler_ucast = NULL;
+ struct mlx5_ib_flow_handler *handler = NULL;
+
+ static struct {
+ struct ib_flow_attr flow_attr;
+ struct ib_flow_spec_eth eth_flow;
+ } leftovers_specs[] = {
+ [LEFTOVERS_MC] = {
+ .flow_attr = {
+ .num_of_specs = 1,
+ .size = sizeof(leftovers_specs[0])
+ },
+ .eth_flow = {
+ .type = IB_FLOW_SPEC_ETH,
+ .size = sizeof(struct ib_flow_spec_eth),
+ .mask = {.dst_mac = {0x1} },
+ .val = {.dst_mac = {0x1} }
+ }
+ },
+ [LEFTOVERS_UC] = {
+ .flow_attr = {
+ .num_of_specs = 1,
+ .size = sizeof(leftovers_specs[0])
+ },
+ .eth_flow = {
+ .type = IB_FLOW_SPEC_ETH,
+ .size = sizeof(struct ib_flow_spec_eth),
+ .mask = {.dst_mac = {0x1} },
+ .val = {.dst_mac = {} }
+ }
+ }
+ };
+
+ handler = create_flow_rule(dev, ft_prio,
+ &leftovers_specs[LEFTOVERS_MC].flow_attr,
+ dst);
+ if (!IS_ERR(handler) &&
+ flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
+ handler_ucast = create_flow_rule(dev, ft_prio,
+ &leftovers_specs[LEFTOVERS_UC].flow_attr,
+ dst);
+ if (IS_ERR(handler_ucast)) {
+ kfree(handler);
+ handler = handler_ucast;
+ } else {
+ list_add(&handler_ucast->list, &handler->list);
+ }
+ }
+
+ return handler;
+}
+
+static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr,
+ int domain)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_ib_flow_handler *handler = NULL;
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_ib_flow_prio *ft_prio;
+ int err;
+
+ if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
+ return ERR_PTR(-ENOSPC);
+
+ if (domain != IB_FLOW_DOMAIN_USER ||
+ flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
+ flow_attr->flags)
+ return ERR_PTR(-EINVAL);
+
+ dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ if (!dst)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_lock(&dev->flow_db.lock);
+
+ ft_prio = get_flow_table(dev, flow_attr);
+ if (IS_ERR(ft_prio)) {
+ err = PTR_ERR(ft_prio);
+ goto unlock;
+ }
+
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn;
+
+ if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
+ handler = create_flow_rule(dev, ft_prio, flow_attr,
+ dst);
+ } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
+ handler = create_leftovers_rule(dev, ft_prio, flow_attr,
+ dst);
+ } else {
+ err = -EINVAL;
+ goto destroy_ft;
+ }
+
+ if (IS_ERR(handler)) {
+ err = PTR_ERR(handler);
+ handler = NULL;
+ goto destroy_ft;
+ }
+
+ ft_prio->refcount++;
+ mutex_unlock(&dev->flow_db.lock);
+ kfree(dst);
+
+ return &handler->ibflow;
+
+destroy_ft:
+ put_flow_table(dev, ft_prio, false);
+unlock:
+ mutex_unlock(&dev->flow_db.lock);
+ kfree(dst);
+ kfree(handler);
+ return ERR_PTR(err);
+}
+
static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
@@ -1439,10 +1893,19 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
+ if (mlx5_ib_port_link_layer(&dev->ib_dev) ==
+ IB_LINK_LAYER_ETHERNET) {
+ dev->ib_dev.create_flow = mlx5_ib_create_flow;
+ dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
+ dev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
+ }
err = init_node_data(dev);
if (err)
goto err_dealloc;
+ mutex_init(&dev->flow_db.lock);
mutex_init(&dev->cap_mask_mutex);
err = create_dev_resources(&dev->devr);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 633347260b79..1474cccd1e0f 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -105,6 +105,36 @@ struct mlx5_ib_pd {
u32 pdn;
};
+#define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1)
+#define MLX5_IB_FLOW_LAST_PRIO (MLX5_IB_FLOW_MCAST_PRIO - 1)
+#if (MLX5_IB_FLOW_LAST_PRIO <= 0)
+#error "Invalid number of bypass priorities"
+#endif
+#define MLX5_IB_FLOW_LEFTOVERS_PRIO (MLX5_IB_FLOW_MCAST_PRIO + 1)
+
+#define MLX5_IB_NUM_FLOW_FT (MLX5_IB_FLOW_LEFTOVERS_PRIO + 1)
+struct mlx5_ib_flow_prio {
+ struct mlx5_flow_table *flow_table;
+ unsigned int refcount;
+};
+
+struct mlx5_ib_flow_handler {
+ struct list_head list;
+ struct ib_flow ibflow;
+ unsigned int prio;
+ struct mlx5_flow_rule *rule;
+};
+
+struct mlx5_ib_flow_db {
+ struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT];
+ /* Protect flow steering bypass flow tables
+ * when add/del flow rules.
+ * only single add/removal of flow steering rule could be done
+ * simultaneously.
+ */
+ struct mutex lock;
+};
+
/* Use macros here so that don't have to duplicate
* enum ib_send_flags and enum ib_qp_type for low-level driver
*/
@@ -171,9 +201,21 @@ struct mlx5_ib_pfault {
struct mlx5_pagefault mpfault;
};
+struct mlx5_ib_rq {
+ u32 tirn;
+};
+
+struct mlx5_ib_raw_packet_qp {
+ struct mlx5_ib_rq rq;
+};
+
struct mlx5_ib_qp {
struct ib_qp ibqp;
- struct mlx5_core_qp mqp;
+ union {
+ struct mlx5_core_qp mqp;
+ struct mlx5_ib_raw_packet_qp raw_packet_qp;
+ };
+
struct mlx5_buf buf;
struct mlx5_db db;
@@ -431,6 +473,7 @@ struct mlx5_ib_dev {
*/
struct srcu_struct mr_srcu;
#endif
+ struct mlx5_ib_flow_db flow_db;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 5096f4f336bd..a9894d2e8e26 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -38,9 +38,28 @@
#include "fs_cmd.h"
#include "mlx5_core.h"
+int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft)
+{
+ u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)];
+ u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)];
+
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(set_flow_table_root_in, in, opcode,
+ MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+ MLX5_SET(set_flow_table_root_in, in, table_type, ft->type);
+ MLX5_SET(set_flow_table_root_in, in, table_id, ft->id);
+
+ memset(out, 0, sizeof(out));
+ return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
+ sizeof(out));
+}
+
int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
enum fs_flow_table_type type, unsigned int level,
- unsigned int log_size, unsigned int *table_id)
+ unsigned int log_size, struct mlx5_flow_table
+ *next_ft, unsigned int *table_id)
{
u32 out[MLX5_ST_SZ_DW(create_flow_table_out)];
u32 in[MLX5_ST_SZ_DW(create_flow_table_in)];
@@ -51,6 +70,10 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
MLX5_SET(create_flow_table_in, in, opcode,
MLX5_CMD_OP_CREATE_FLOW_TABLE);
+ if (next_ft) {
+ MLX5_SET(create_flow_table_in, in, table_miss_mode, 1);
+ MLX5_SET(create_flow_table_in, in, table_miss_id, next_ft->id);
+ }
MLX5_SET(create_flow_table_in, in, table_type, type);
MLX5_SET(create_flow_table_in, in, level, level);
MLX5_SET(create_flow_table_in, in, log_size, log_size);
@@ -83,6 +106,33 @@ int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
sizeof(out));
}
+int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table *next_ft)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)];
+ u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)];
+
+ memset(in, 0, sizeof(in));
+ memset(out, 0, sizeof(out));
+
+ MLX5_SET(modify_flow_table_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_FLOW_TABLE);
+ MLX5_SET(modify_flow_table_in, in, table_type, ft->type);
+ MLX5_SET(modify_flow_table_in, in, table_id, ft->id);
+ MLX5_SET(modify_flow_table_in, in, modify_field_select,
+ MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID);
+ if (next_ft) {
+ MLX5_SET(modify_flow_table_in, in, table_miss_mode, 1);
+ MLX5_SET(modify_flow_table_in, in, table_miss_id, next_ft->id);
+ } else {
+ MLX5_SET(modify_flow_table_in, in, table_miss_mode, 0);
+ }
+
+ return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
+ sizeof(out));
+}
+
int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft,
u32 *in,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index f39304ede186..9814d4784803 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -35,11 +35,16 @@
int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
enum fs_flow_table_type type, unsigned int level,
- unsigned int log_size, unsigned int *table_id);
+ unsigned int log_size, struct mlx5_flow_table
+ *next_ft, unsigned int *table_id);
int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft);
+int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table *next_ft);
+
int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft,
u32 *in, unsigned int *group_id);
@@ -62,4 +67,6 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft,
unsigned int index);
+int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index f7d62fe595f6..6f68dba8d7ed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -40,51 +40,83 @@
#define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
sizeof(struct init_tree_node))
-#define INIT_PRIO(min_level_val, max_ft_val,\
- start_level_val, ...) {.type = FS_TYPE_PRIO,\
+#define ADD_PRIO(num_prios_val, min_level_val, max_ft_val, caps_val,\
+ ...) {.type = FS_TYPE_PRIO,\
.min_ft_level = min_level_val,\
- .start_level = start_level_val,\
.max_ft = max_ft_val,\
+ .num_leaf_prios = num_prios_val,\
+ .caps = caps_val,\
.children = (struct init_tree_node[]) {__VA_ARGS__},\
.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
}
-#define ADD_PRIO(min_level_val, max_ft_val, start_level_val, ...)\
- INIT_PRIO(min_level_val, max_ft_val, start_level_val,\
- __VA_ARGS__)\
-
-#define ADD_FT_PRIO(max_ft_val, start_level_val, ...)\
- INIT_PRIO(0, max_ft_val, start_level_val,\
- __VA_ARGS__)\
+#define ADD_MULTIPLE_PRIO(num_prios_val, max_ft_val, ...)\
+ ADD_PRIO(num_prios_val, 0, max_ft_val, {},\
+ __VA_ARGS__)\
#define ADD_NS(...) {.type = FS_TYPE_NAMESPACE,\
.children = (struct init_tree_node[]) {__VA_ARGS__},\
.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
}
-#define KERNEL_START_LEVEL 0
-#define KERNEL_P0_START_LEVEL KERNEL_START_LEVEL
+#define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\
+ sizeof(long))
+
+#define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap))
+
+#define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \
+ .caps = (long[]) {__VA_ARGS__} }
+
+#define LEFTOVERS_MAX_FT 1
+#define LEFTOVERS_NUM_PRIOS 1
+#define BY_PASS_PRIO_MAX_FT 1
+#define BY_PASS_MIN_LEVEL (KENREL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
+ LEFTOVERS_MAX_FT)
+
#define KERNEL_MAX_FT 2
+#define KERNEL_NUM_PRIOS 1
#define KENREL_MIN_LEVEL 2
+
+struct node_caps {
+ size_t arr_sz;
+ long *caps;
+};
static struct init_tree_node {
enum fs_node_type type;
struct init_tree_node *children;
int ar_size;
+ struct node_caps caps;
int min_ft_level;
+ int num_leaf_prios;
int prio;
int max_ft;
- int start_level;
} root_fs = {
.type = FS_TYPE_NAMESPACE,
- .ar_size = 1,
+ .ar_size = 3,
.children = (struct init_tree_node[]) {
- ADD_PRIO(KENREL_MIN_LEVEL, KERNEL_MAX_FT,
- KERNEL_START_LEVEL,
- ADD_NS(ADD_FT_PRIO(KERNEL_MAX_FT,
- KERNEL_P0_START_LEVEL))),
+ ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
+ FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
+ FS_CAP(flow_table_properties_nic_receive.modify_root),
+ FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode),
+ FS_CAP(flow_table_properties_nic_receive.flow_table_modify)),
+ ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, BY_PASS_PRIO_MAX_FT))),
+ ADD_PRIO(0, KENREL_MIN_LEVEL, 0, {},
+ ADD_NS(ADD_MULTIPLE_PRIO(KERNEL_NUM_PRIOS, KERNEL_MAX_FT))),
+ ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
+ FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
+ FS_CAP(flow_table_properties_nic_receive.modify_root),
+ FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode),
+ FS_CAP(flow_table_properties_nic_receive.flow_table_modify)),
+ ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_MAX_FT))),
}
};
+enum fs_i_mutex_lock_class {
+ FS_MUTEX_GRANDPARENT,
+ FS_MUTEX_PARENT,
+ FS_MUTEX_CHILD
+};
+
static void del_rule(struct fs_node *node);
static void del_flow_table(struct fs_node *node);
static void del_flow_group(struct fs_node *node);
@@ -119,10 +151,11 @@ static void tree_get_node(struct fs_node *node)
atomic_inc(&node->refcount);
}
-static void nested_lock_ref_node(struct fs_node *node)
+static void nested_lock_ref_node(struct fs_node *node,
+ enum fs_i_mutex_lock_class class)
{
if (node) {
- mutex_lock_nested(&node->lock, SINGLE_DEPTH_NESTING);
+ mutex_lock_nested(&node->lock, class);
atomic_inc(&node->refcount);
}
}
@@ -436,10 +469,162 @@ static struct mlx5_flow_table *alloc_flow_table(int level, int max_fte,
return ft;
}
+/* If reverse is false, then we search for the first flow table in the
+ * root sub-tree from start(closest from right), else we search for the
+ * last flow table in the root sub-tree till start(closest from left).
+ */
+static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root,
+ struct list_head *start,
+ bool reverse)
+{
+#define list_advance_entry(pos, reverse) \
+ ((reverse) ? list_prev_entry(pos, list) : list_next_entry(pos, list))
+
+#define list_for_each_advance_continue(pos, head, reverse) \
+ for (pos = list_advance_entry(pos, reverse); \
+ &pos->list != (head); \
+ pos = list_advance_entry(pos, reverse))
+
+ struct fs_node *iter = list_entry(start, struct fs_node, list);
+ struct mlx5_flow_table *ft = NULL;
+
+ if (!root)
+ return NULL;
+
+ list_for_each_advance_continue(iter, &root->children, reverse) {
+ if (iter->type == FS_TYPE_FLOW_TABLE) {
+ fs_get_obj(ft, iter);
+ return ft;
+ }
+ ft = find_closest_ft_recursive(iter, &iter->children, reverse);
+ if (ft)
+ return ft;
+ }
+
+ return ft;
+}
+
+/* If reverse if false then return the first flow table in next priority of
+ * prio in the tree, else return the last flow table in the previous priority
+ * of prio in the tree.
+ */
+static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool reverse)
+{
+ struct mlx5_flow_table *ft = NULL;
+ struct fs_node *curr_node;
+ struct fs_node *parent;
+
+ parent = prio->node.parent;
+ curr_node = &prio->node;
+ while (!ft && parent) {
+ ft = find_closest_ft_recursive(parent, &curr_node->list, reverse);
+ curr_node = parent;
+ parent = curr_node->parent;
+ }
+ return ft;
+}
+
+/* Assuming all the tree is locked by mutex chain lock */
+static struct mlx5_flow_table *find_next_chained_ft(struct fs_prio *prio)
+{
+ return find_closest_ft(prio, false);
+}
+
+/* Assuming all the tree is locked by mutex chain lock */
+static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio)
+{
+ return find_closest_ft(prio, true);
+}
+
+static int connect_fts_in_prio(struct mlx5_core_dev *dev,
+ struct fs_prio *prio,
+ struct mlx5_flow_table *ft)
+{
+ struct mlx5_flow_table *iter;
+ int i = 0;
+ int err;
+
+ fs_for_each_ft(iter, prio) {
+ i++;
+ err = mlx5_cmd_modify_flow_table(dev,
+ iter,
+ ft);
+ if (err) {
+ mlx5_core_warn(dev, "Failed to modify flow table %d\n",
+ iter->id);
+ /* The driver is out of sync with the FW */
+ if (i > 1)
+ WARN_ON(true);
+ return err;
+ }
+ }
+ return 0;
+}
+
+/* Connect flow tables from previous priority of prio to ft */
+static int connect_prev_fts(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct fs_prio *prio)
+{
+ struct mlx5_flow_table *prev_ft;
+
+ prev_ft = find_prev_chained_ft(prio);
+ if (prev_ft) {
+ struct fs_prio *prev_prio;
+
+ fs_get_obj(prev_prio, prev_ft->node.parent);
+ return connect_fts_in_prio(dev, prev_prio, ft);
+ }
+ return 0;
+}
+
+static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
+ *prio)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&prio->node);
+ int min_level = INT_MAX;
+ int err;
+
+ if (root->root_ft)
+ min_level = root->root_ft->level;
+
+ if (ft->level >= min_level)
+ return 0;
+
+ err = mlx5_cmd_update_root_ft(root->dev, ft);
+ if (err)
+ mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
+ ft->id);
+ else
+ root->root_ft = ft;
+
+ return err;
+}
+
+static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
+ struct fs_prio *prio)
+{
+ int err = 0;
+
+ /* Connect_prev_fts and update_root_ft_create are mutually exclusive */
+
+ if (list_empty(&prio->node.children)) {
+ err = connect_prev_fts(dev, ft, prio);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_FLOWTABLE(dev,
+ flow_table_properties_nic_receive.modify_root))
+ err = update_root_ft_create(ft, prio);
+ return err;
+}
+
struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
int prio,
int max_fte)
{
+ struct mlx5_flow_table *next_ft = NULL;
struct mlx5_flow_table *ft;
int err;
int log_table_sz;
@@ -452,14 +637,15 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
return ERR_PTR(-ENODEV);
}
+ mutex_lock(&root->chain_lock);
fs_prio = find_prio(ns, prio);
- if (!fs_prio)
- return ERR_PTR(-EINVAL);
-
- lock_ref_node(&fs_prio->node);
+ if (!fs_prio) {
+ err = -EINVAL;
+ goto unlock_root;
+ }
if (fs_prio->num_ft == fs_prio->max_ft) {
err = -ENOSPC;
- goto unlock_prio;
+ goto unlock_root;
}
ft = alloc_flow_table(find_next_free_level(fs_prio),
@@ -467,32 +653,63 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
root->table_type);
if (!ft) {
err = -ENOMEM;
- goto unlock_prio;
+ goto unlock_root;
}
tree_init_node(&ft->node, 1, del_flow_table);
log_table_sz = ilog2(ft->max_fte);
+ next_ft = find_next_chained_ft(fs_prio);
err = mlx5_cmd_create_flow_table(root->dev, ft->type, ft->level,
- log_table_sz, &ft->id);
+ log_table_sz, next_ft, &ft->id);
if (err)
goto free_ft;
+ err = connect_flow_table(root->dev, ft, fs_prio);
+ if (err)
+ goto destroy_ft;
+ lock_ref_node(&fs_prio->node);
tree_add_node(&ft->node, &fs_prio->node);
list_add_tail(&ft->node.list, &fs_prio->node.children);
fs_prio->num_ft++;
unlock_ref_node(&fs_prio->node);
-
+ mutex_unlock(&root->chain_lock);
return ft;
-
+destroy_ft:
+ mlx5_cmd_destroy_flow_table(root->dev, ft);
free_ft:
kfree(ft);
-unlock_prio:
- unlock_ref_node(&fs_prio->node);
+unlock_root:
+ mutex_unlock(&root->chain_lock);
return ERR_PTR(err);
}
-struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
- u32 *fg_in)
+struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
+ int prio,
+ int num_flow_table_entries,
+ int max_num_groups)
+{
+ struct mlx5_flow_table *ft;
+
+ if (max_num_groups > num_flow_table_entries)
+ return ERR_PTR(-EINVAL);
+
+ ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries);
+ if (IS_ERR(ft))
+ return ft;
+
+ ft->autogroup.active = true;
+ ft->autogroup.required_groups = max_num_groups;
+
+ return ft;
+}
+EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
+
+/* Flow table should be locked */
+static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table *ft,
+ u32 *fg_in,
+ struct list_head
+ *prev_fg,
+ bool is_auto_fg)
{
struct mlx5_flow_group *fg;
struct mlx5_core_dev *dev = get_dev(&ft->node);
@@ -505,18 +722,33 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
if (IS_ERR(fg))
return fg;
- lock_ref_node(&ft->node);
err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id);
if (err) {
kfree(fg);
- unlock_ref_node(&ft->node);
return ERR_PTR(err);
}
+
+ if (ft->autogroup.active)
+ ft->autogroup.num_groups++;
/* Add node to tree */
- tree_init_node(&fg->node, 1, del_flow_group);
+ tree_init_node(&fg->node, !is_auto_fg, del_flow_group);
tree_add_node(&fg->node, &ft->node);
/* Add node to group list */
list_add(&fg->node.list, ft->node.children.prev);
+
+ return fg;
+}
+
+struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
+ u32 *fg_in)
+{
+ struct mlx5_flow_group *fg;
+
+ if (ft->autogroup.active)
+ return ERR_PTR(-EPERM);
+
+ lock_ref_node(&ft->node);
+ fg = create_flow_group_common(ft, fg_in, &ft->node.children, false);
unlock_ref_node(&ft->node);
return fg;
@@ -614,7 +846,63 @@ static struct fs_fte *create_fte(struct mlx5_flow_group *fg,
return fte;
}
-/* Assuming parent fg(flow table) is locked */
+static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
+ u8 match_criteria_enable,
+ u32 *match_criteria)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct list_head *prev = &ft->node.children;
+ unsigned int candidate_index = 0;
+ struct mlx5_flow_group *fg;
+ void *match_criteria_addr;
+ unsigned int group_size = 0;
+ u32 *in;
+
+ if (!ft->autogroup.active)
+ return ERR_PTR(-ENOENT);
+
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ if (ft->autogroup.num_groups < ft->autogroup.required_groups)
+ /* We save place for flow groups in addition to max types */
+ group_size = ft->max_fte / (ft->autogroup.required_groups + 1);
+
+ /* ft->max_fte == ft->autogroup.max_types */
+ if (group_size == 0)
+ group_size = 1;
+
+ /* sorted by start_index */
+ fs_for_each_fg(fg, ft) {
+ if (candidate_index + group_size > fg->start_index)
+ candidate_index = fg->start_index + fg->max_ftes;
+ else
+ break;
+ prev = &fg->node.list;
+ }
+
+ if (candidate_index + group_size > ft->max_fte) {
+ fg = ERR_PTR(-ENOSPC);
+ goto out;
+ }
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable,
+ match_criteria_enable);
+ MLX5_SET(create_flow_group_in, in, start_flow_index, candidate_index);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, candidate_index +
+ group_size - 1);
+ match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in,
+ in, match_criteria);
+ memcpy(match_criteria_addr, match_criteria,
+ MLX5_ST_SZ_BYTES(fte_match_param));
+
+ fg = create_flow_group_common(ft, in, prev, true);
+out:
+ kvfree(in);
+ return fg;
+}
+
static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg,
u32 *match_value,
u8 action,
@@ -626,9 +914,9 @@ static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg,
struct mlx5_flow_table *ft;
struct list_head *prev;
- lock_ref_node(&fg->node);
+ nested_lock_ref_node(&fg->node, FS_MUTEX_PARENT);
fs_for_each_fte(fte, fg) {
- nested_lock_ref_node(&fte->node);
+ nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
if (compare_match_value(&fg->mask, match_value, &fte->val) &&
action == fte->action && flow_tag == fte->flow_tag) {
rule = add_rule_fte(fte, fg, dest);
@@ -669,6 +957,33 @@ unlock_fg:
return rule;
}
+static struct mlx5_flow_rule *add_rule_to_auto_fg(struct mlx5_flow_table *ft,
+ u8 match_criteria_enable,
+ u32 *match_criteria,
+ u32 *match_value,
+ u8 action,
+ u32 flow_tag,
+ struct mlx5_flow_destination *dest)
+{
+ struct mlx5_flow_rule *rule;
+ struct mlx5_flow_group *g;
+
+ g = create_autogroup(ft, match_criteria_enable, match_criteria);
+ if (IS_ERR(g))
+ return (void *)g;
+
+ rule = add_rule_fg(g, match_value,
+ action, flow_tag, dest);
+ if (IS_ERR(rule)) {
+ /* Remove assumes refcount > 0 and autogroup creates a group
+ * with a refcount = 0.
+ */
+ tree_get_node(&g->node);
+ tree_remove_node(&g->node);
+ }
+ return rule;
+}
+
struct mlx5_flow_rule *
mlx5_add_flow_rule(struct mlx5_flow_table *ft,
u8 match_criteria_enable,
@@ -679,39 +994,115 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft,
struct mlx5_flow_destination *dest)
{
struct mlx5_flow_group *g;
- struct mlx5_flow_rule *rule = ERR_PTR(-EINVAL);
+ struct mlx5_flow_rule *rule;
- tree_get_node(&ft->node);
- lock_ref_node(&ft->node);
+ nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT);
fs_for_each_fg(g, ft)
if (compare_match_criteria(g->mask.match_criteria_enable,
match_criteria_enable,
g->mask.match_criteria,
match_criteria)) {
- unlock_ref_node(&ft->node);
rule = add_rule_fg(g, match_value,
action, flow_tag, dest);
- goto put;
+ if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC)
+ goto unlock;
}
+
+ rule = add_rule_to_auto_fg(ft, match_criteria_enable, match_criteria,
+ match_value, action, flow_tag, dest);
+unlock:
unlock_ref_node(&ft->node);
-put:
- tree_put_node(&ft->node);
return rule;
}
+EXPORT_SYMBOL(mlx5_add_flow_rule);
void mlx5_del_flow_rule(struct mlx5_flow_rule *rule)
{
tree_remove_node(&rule->node);
}
+EXPORT_SYMBOL(mlx5_del_flow_rule);
+
+/* Assuming prio->node.children(flow tables) is sorted by level */
+static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft)
+{
+ struct fs_prio *prio;
+
+ fs_get_obj(prio, ft->node.parent);
+
+ if (!list_is_last(&ft->node.list, &prio->node.children))
+ return list_next_entry(ft, node.list);
+ return find_next_chained_ft(prio);
+}
+
+static int update_root_ft_destroy(struct mlx5_flow_table *ft)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+ struct mlx5_flow_table *new_root_ft = NULL;
+
+ if (root->root_ft != ft)
+ return 0;
+
+ new_root_ft = find_next_ft(ft);
+ if (new_root_ft) {
+ int err = mlx5_cmd_update_root_ft(root->dev, new_root_ft);
+
+ if (err) {
+ mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
+ ft->id);
+ return err;
+ }
+ root->root_ft = new_root_ft;
+ }
+ return 0;
+}
+
+/* Connect flow table from previous priority to
+ * the next flow table.
+ */
+static int disconnect_flow_table(struct mlx5_flow_table *ft)
+{
+ struct mlx5_core_dev *dev = get_dev(&ft->node);
+ struct mlx5_flow_table *next_ft;
+ struct fs_prio *prio;
+ int err = 0;
+
+ err = update_root_ft_destroy(ft);
+ if (err)
+ return err;
+
+ fs_get_obj(prio, ft->node.parent);
+ if (!(list_first_entry(&prio->node.children,
+ struct mlx5_flow_table,
+ node.list) == ft))
+ return 0;
+
+ next_ft = find_next_chained_ft(prio);
+ err = connect_prev_fts(dev, next_ft, prio);
+ if (err)
+ mlx5_core_warn(dev, "Failed to disconnect flow table %d\n",
+ ft->id);
+ return err;
+}
int mlx5_destroy_flow_table(struct mlx5_flow_table *ft)
{
+ struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+ int err = 0;
+
+ mutex_lock(&root->chain_lock);
+ err = disconnect_flow_table(ft);
+ if (err) {
+ mutex_unlock(&root->chain_lock);
+ return err;
+ }
if (tree_remove_node(&ft->node))
mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n",
ft->id);
+ mutex_unlock(&root->chain_lock);
- return 0;
+ return err;
}
+EXPORT_SYMBOL(mlx5_destroy_flow_table);
void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
{
@@ -732,8 +1123,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
return NULL;
switch (type) {
+ case MLX5_FLOW_NAMESPACE_BYPASS:
case MLX5_FLOW_NAMESPACE_KERNEL:
- prio = 0;
+ case MLX5_FLOW_NAMESPACE_LEFTOVERS:
+ prio = type;
break;
case MLX5_FLOW_NAMESPACE_FDB:
if (dev->priv.fdb_root_ns)
@@ -754,10 +1147,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
return ns;
}
+EXPORT_SYMBOL(mlx5_get_flow_namespace);
static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
- unsigned prio, int max_ft,
- int start_level)
+ unsigned prio, int max_ft)
{
struct fs_prio *fs_prio;
@@ -770,7 +1163,6 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
tree_add_node(&fs_prio->node, &ns->node);
fs_prio->max_ft = max_ft;
fs_prio->prio = prio;
- fs_prio->start_level = start_level;
list_add_tail(&fs_prio->node.list, &ns->node.children);
return fs_prio;
@@ -800,11 +1192,45 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio)
return ns;
}
-static int init_root_tree_recursive(int max_ft_level, struct init_tree_node *init_node,
+static int create_leaf_prios(struct mlx5_flow_namespace *ns, struct init_tree_node
+ *prio_metadata)
+{
+ struct fs_prio *fs_prio;
+ int i;
+
+ for (i = 0; i < prio_metadata->num_leaf_prios; i++) {
+ fs_prio = fs_create_prio(ns, i, prio_metadata->max_ft);
+ if (IS_ERR(fs_prio))
+ return PTR_ERR(fs_prio);
+ }
+ return 0;
+}
+
+#define FLOW_TABLE_BIT_SZ 1
+#define GET_FLOW_TABLE_CAP(dev, offset) \
+ ((be32_to_cpu(*((__be32 *)(dev->hca_caps_cur[MLX5_CAP_FLOW_TABLE]) + \
+ offset / 32)) >> \
+ (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ)
+static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps)
+{
+ int i;
+
+ for (i = 0; i < caps->arr_sz; i++) {
+ if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i]))
+ return false;
+ }
+ return true;
+}
+
+static int init_root_tree_recursive(struct mlx5_core_dev *dev,
+ struct init_tree_node *init_node,
struct fs_node *fs_parent_node,
struct init_tree_node *init_parent_node,
int index)
{
+ int max_ft_level = MLX5_CAP_FLOWTABLE(dev,
+ flow_table_properties_nic_receive.
+ max_ft_level);
struct mlx5_flow_namespace *fs_ns;
struct fs_prio *fs_prio;
struct fs_node *base;
@@ -812,12 +1238,14 @@ static int init_root_tree_recursive(int max_ft_level, struct init_tree_node *ini
int err;
if (init_node->type == FS_TYPE_PRIO) {
- if (init_node->min_ft_level > max_ft_level)
- return -ENOTSUPP;
+ if ((init_node->min_ft_level > max_ft_level) ||
+ !has_required_caps(dev, &init_node->caps))
+ return 0;
fs_get_obj(fs_ns, fs_parent_node);
- fs_prio = fs_create_prio(fs_ns, index, init_node->max_ft,
- init_node->start_level);
+ if (init_node->num_leaf_prios)
+ return create_leaf_prios(fs_ns, init_node);
+ fs_prio = fs_create_prio(fs_ns, index, init_node->max_ft);
if (IS_ERR(fs_prio))
return PTR_ERR(fs_prio);
base = &fs_prio->node;
@@ -831,9 +1259,8 @@ static int init_root_tree_recursive(int max_ft_level, struct init_tree_node *ini
return -EINVAL;
}
for (i = 0; i < init_node->ar_size; i++) {
- err = init_root_tree_recursive(max_ft_level,
- &init_node->children[i], base,
- init_node, i);
+ err = init_root_tree_recursive(dev, &init_node->children[i],
+ base, init_node, i);
if (err)
return err;
}
@@ -841,7 +1268,8 @@ static int init_root_tree_recursive(int max_ft_level, struct init_tree_node *ini
return 0;
}
-static int init_root_tree(int max_ft_level, struct init_tree_node *init_node,
+static int init_root_tree(struct mlx5_core_dev *dev,
+ struct init_tree_node *init_node,
struct fs_node *fs_parent_node)
{
int i;
@@ -850,8 +1278,7 @@ static int init_root_tree(int max_ft_level, struct init_tree_node *init_node,
fs_get_obj(fs_ns, fs_parent_node);
for (i = 0; i < init_node->ar_size; i++) {
- err = init_root_tree_recursive(max_ft_level,
- &init_node->children[i],
+ err = init_root_tree_recursive(dev, &init_node->children[i],
&fs_ns->node,
init_node, i);
if (err)
@@ -877,25 +1304,65 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev
ns = &root_ns->ns;
fs_init_namespace(ns);
+ mutex_init(&root_ns->chain_lock);
tree_init_node(&ns->node, 1, NULL);
tree_add_node(&ns->node, NULL);
return root_ns;
}
+static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level);
+
+static int set_prio_attrs_in_ns(struct mlx5_flow_namespace *ns, int acc_level)
+{
+ struct fs_prio *prio;
+
+ fs_for_each_prio(prio, ns) {
+ /* This updates prio start_level and max_ft */
+ set_prio_attrs_in_prio(prio, acc_level);
+ acc_level += prio->max_ft;
+ }
+ return acc_level;
+}
+
+static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level)
+{
+ struct mlx5_flow_namespace *ns;
+ int acc_level_ns = acc_level;
+
+ prio->start_level = acc_level;
+ fs_for_each_ns(ns, prio)
+ /* This updates start_level and max_ft of ns's priority descendants */
+ acc_level_ns = set_prio_attrs_in_ns(ns, acc_level);
+ if (!prio->max_ft)
+ prio->max_ft = acc_level_ns - prio->start_level;
+ WARN_ON(prio->max_ft < acc_level_ns - prio->start_level);
+}
+
+static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns)
+{
+ struct mlx5_flow_namespace *ns = &root_ns->ns;
+ struct fs_prio *prio;
+ int start_level = 0;
+
+ fs_for_each_prio(prio, ns) {
+ set_prio_attrs_in_prio(prio, start_level);
+ start_level += prio->max_ft;
+ }
+}
+
static int init_root_ns(struct mlx5_core_dev *dev)
{
- int max_ft_level = MLX5_CAP_FLOWTABLE(dev,
- flow_table_properties_nic_receive.
- max_ft_level);
dev->priv.root_ns = create_root_ns(dev, FS_FT_NIC_RX);
if (IS_ERR_OR_NULL(dev->priv.root_ns))
goto cleanup;
- if (init_root_tree(max_ft_level, &root_fs, &dev->priv.root_ns->ns.node))
+ if (init_root_tree(dev, &root_fs, &dev->priv.root_ns->ns.node))
goto cleanup;
+ set_prio_attrs(dev->priv.root_ns);
+
return 0;
cleanup:
@@ -1019,7 +1486,7 @@ static int init_fdb_root_ns(struct mlx5_core_dev *dev)
return -ENOMEM;
/* Create single prio */
- prio = fs_create_prio(&dev->priv.fdb_root_ns->ns, 0, 1, 0);
+ prio = fs_create_prio(&dev->priv.fdb_root_ns->ns, 0, 1);
if (IS_ERR(prio)) {
cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns);
return PTR_ERR(prio);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 4ebb97fd5544..00245fd7e4bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -77,6 +77,11 @@ struct mlx5_flow_table {
unsigned int max_fte;
unsigned int level;
enum fs_flow_table_type type;
+ struct {
+ bool active;
+ unsigned int required_groups;
+ unsigned int num_groups;
+ } autogroup;
};
/* Type of children is mlx5_flow_rule */
@@ -124,6 +129,9 @@ struct mlx5_flow_root_namespace {
struct mlx5_flow_namespace ns;
enum fs_flow_table_type table_type;
struct mlx5_core_dev *dev;
+ struct mlx5_flow_table *root_ft;
+ /* Should be held when chaining flow tables */
+ struct mutex chain_lock;
};
int mlx5_init_fs(struct mlx5_core_dev *dev);
@@ -143,6 +151,12 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
#define fs_for_each_prio(pos, ns) \
fs_list_for_each_entry(pos, &(ns)->node.children)
+#define fs_for_each_ns(pos, prio) \
+ fs_list_for_each_entry(pos, &(prio)->node.children)
+
+#define fs_for_each_ft(pos, prio) \
+ fs_list_for_each_entry(pos, &(prio)->node.children)
+
#define fs_for_each_fg(pos, ft) \
fs_list_for_each_entry(pos, &(ft)->node.children)
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index df2f79ef3cac..7be845e30689 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1258,4 +1258,6 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz;
}
+#define MLX5_BY_PASS_NUM_PRIOS 9
+
#endif /* MLX5_DEVICE_H */
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index bc7ad019afde..8230caa3fb6e 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -38,8 +38,20 @@
#define MLX5_FS_DEFAULT_FLOW_TAG 0x0
+#define LEFTOVERS_RULE_NUM 2
+static inline void build_leftovers_ft_param(int *priority,
+ int *n_ent,
+ int *n_grp)
+{
+ *priority = 0; /* Priority of leftovers_prio-0 */
+ *n_ent = LEFTOVERS_RULE_NUM;
+ *n_grp = LEFTOVERS_RULE_NUM;
+}
+
enum mlx5_flow_namespace_type {
+ MLX5_FLOW_NAMESPACE_BYPASS,
MLX5_FLOW_NAMESPACE_KERNEL,
+ MLX5_FLOW_NAMESPACE_LEFTOVERS,
MLX5_FLOW_NAMESPACE_FDB,
};
@@ -62,6 +74,12 @@ mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
enum mlx5_flow_namespace_type type);
struct mlx5_flow_table *
+mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
+ int prio,
+ int num_flow_table_entries,
+ int max_num_groups);
+
+struct mlx5_flow_table *
mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
int prio,
int num_flow_table_entries);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 1780a85a8797..68d73f82e009 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -185,6 +185,7 @@ enum {
MLX5_CMD_OP_MODIFY_RQT = 0x917,
MLX5_CMD_OP_DESTROY_RQT = 0x918,
MLX5_CMD_OP_QUERY_RQT = 0x919,
+ MLX5_CMD_OP_SET_FLOW_TABLE_ROOT = 0x92f,
MLX5_CMD_OP_CREATE_FLOW_TABLE = 0x930,
MLX5_CMD_OP_DESTROY_FLOW_TABLE = 0x931,
MLX5_CMD_OP_QUERY_FLOW_TABLE = 0x932,
@@ -193,7 +194,8 @@ enum {
MLX5_CMD_OP_QUERY_FLOW_GROUP = 0x935,
MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY = 0x936,
MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY = 0x937,
- MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY = 0x938
+ MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY = 0x938,
+ MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c
};
struct mlx5_ifc_flow_table_fields_supported_bits {
@@ -258,7 +260,10 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 ft_support[0x1];
u8 reserved_0[0x2];
u8 flow_modify_en[0x1];
- u8 reserved_1[0x1c];
+ u8 modify_root[0x1];
+ u8 identified_miss_table_mode[0x1];
+ u8 flow_table_modify[0x1];
+ u8 reserved_1[0x19];
u8 reserved_2[0x2];
u8 log_max_ft_size[0x6];
@@ -293,6 +298,22 @@ struct mlx5_ifc_odp_per_transport_service_cap_bits {
u8 reserved_1[0x1a];
};
+struct mlx5_ifc_ipv4_layout_bits {
+ u8 reserved_0[0x60];
+
+ u8 ipv4[0x20];
+};
+
+struct mlx5_ifc_ipv6_layout_bits {
+ u8 ipv6[16][0x8];
+};
+
+union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits {
+ struct mlx5_ifc_ipv6_layout_bits ipv6_layout;
+ struct mlx5_ifc_ipv4_layout_bits ipv4_layout;
+ u8 reserved_0[0x80];
+};
+
struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
u8 smac_47_16[0x20];
@@ -323,9 +344,9 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
u8 udp_sport[0x10];
u8 udp_dport[0x10];
- u8 src_ip[4][0x20];
+ union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6;
- u8 dst_ip[4][0x20];
+ union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6;
};
struct mlx5_ifc_fte_match_set_misc_bits {
@@ -5667,12 +5688,16 @@ struct mlx5_ifc_create_flow_table_in_bits {
u8 reserved_4[0x20];
- u8 reserved_5[0x8];
+ u8 reserved_5[0x4];
+ u8 table_miss_mode[0x4];
u8 level[0x8];
u8 reserved_6[0x8];
u8 log_size[0x8];
- u8 reserved_7[0x120];
+ u8 reserved_7[0x8];
+ u8 table_miss_id[0x18];
+
+ u8 reserved_8[0x100];
};
struct mlx5_ifc_create_flow_group_out_bits {
@@ -6946,4 +6971,72 @@ union mlx5_ifc_uplink_pci_interface_document_bits {
u8 reserved_0[0x20060];
};
+struct mlx5_ifc_set_flow_table_root_out_bits {
+ u8 status[0x8];
+ u8 reserved_0[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_1[0x40];
+};
+
+struct mlx5_ifc_set_flow_table_root_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_0[0x10];
+
+ u8 reserved_1[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_2[0x40];
+
+ u8 table_type[0x8];
+ u8 reserved_3[0x18];
+
+ u8 reserved_4[0x8];
+ u8 table_id[0x18];
+
+ u8 reserved_5[0x140];
+};
+
+enum {
+ MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID = 0x1,
+};
+
+struct mlx5_ifc_modify_flow_table_out_bits {
+ u8 status[0x8];
+ u8 reserved_0[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_1[0x40];
+};
+
+struct mlx5_ifc_modify_flow_table_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_0[0x10];
+
+ u8 reserved_1[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_2[0x20];
+
+ u8 reserved_3[0x10];
+ u8 modify_field_select[0x10];
+
+ u8 table_type[0x8];
+ u8 reserved_4[0x18];
+
+ u8 reserved_5[0x8];
+ u8 table_id[0x18];
+
+ u8 reserved_6[0x4];
+ u8 table_miss_mode[0x4];
+ u8 reserved_7[0x18];
+
+ u8 reserved_8[0x8];
+ u8 table_miss_id[0x18];
+
+ u8 reserved_9[0x100];
+};
+
#endif /* MLX5_IFC_H */