summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/mlx5/main.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-06 17:35:43 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-06 17:35:43 -0700
commit19fd08b85bc7e0502b55cd726f466df82ee7e777 (patch)
treeb042de4b9a8a9478c528ea950b14d34487375695 /drivers/infiniband/hw/mlx5/main.c
parent28da7be5ebc096ada5e6bc526c623bdd8c47800a (diff)
parentefc365e7290d040fbd43f60b0e97653489a739d4 (diff)
Merge tag 'for-linus-unmerged' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "Doug and I are at a conference next week so if another PR is sent I expect it to only be bug fixes. Parav noted yesterday that there are some fringe case behavior changes in his work that he would like to fix, and I see that Intel has a number of rc looking patches for HFI1 they posted yesterday. Parav is again the biggest contributor by patch count with his ongoing work to enable container support in the RDMA stack, followed by Leon doing syzkaller inspired cleanups, though most of the actual fixing went to RC. There is one uncomfortable series here fixing the user ABI to actually work as intended in 32 bit mode. There are lots of notes in the commit messages, but the basic summary is we don't think there is an actual 32 bit kernel user of drivers/infiniband for several good reasons. However we are seeing people want to use a 32 bit user space with 64 bit kernel, which didn't completely work today. So in fixing it we required a 32 bit rxe user to upgrade their userspace. rxe users are still already quite rare and we think a 32 bit one is non-existing. - Fix RDMA uapi headers to actually compile in userspace and be more complete - Three shared with netdev pull requests from Mellanox: * 7 patches, mostly to net with 1 IB related one at the back). This series addresses an IRQ performance issue (patch 1), cleanups related to the fix for the IRQ performance problem (patches 2-6), and then extends the fragmented completion queue support that already exists in the net side of the driver to the ib side of the driver (patch 7). * Mostly IB, with 5 patches to net that are needed to support the remaining 10 patches to the IB subsystem. This series extends the current 'representor' framework when the mlx5 driver is in switchdev mode from being a netdev only construct to being a netdev/IB dev construct. The IB dev is limited to raw Eth queue pairs only, but by having an IB dev of this type attached to the representor for a switchdev port, it enables DPDK to work on the switchdev device. * All net related, but needed as infrastructure for the rdma driver - Updates for the hns, i40iw, bnxt_re, cxgb3, cxgb4, hns drivers - SRP performance updates - IB uverbs write path cleanup patch series from Leon - Add RDMA_CM support to ib_srpt. This is disabled by default. Users need to set the port for ib_srpt to listen on in configfs in order for it to be enabled (/sys/kernel/config/target/srpt/discovery_auth/rdma_cm_port) - TSO and Scatter FCS support in mlx4 - Refactor of modify_qp routine to resolve problems seen while working on new code that is forthcoming - More refactoring and updates of RDMA CM for containers support from Parav - mlx5 'fine grained packet pacing', 'ipsec offload' and 'device memory' user API features - Infrastructure updates for the new IOCTL interface, based on increased usage - ABI compatibility bug fixes to fully support 32 bit userspace on 64 bit kernel as was originally intended. See the commit messages for extensive details - Syzkaller bugs and code cleanups motivated by them" * tag 'for-linus-unmerged' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (199 commits) IB/rxe: Fix for oops in rxe_register_device on ppc64le arch IB/mlx5: Device memory mr registration support net/mlx5: Mkey creation command adjustments IB/mlx5: Device memory support in mlx5_ib net/mlx5: Query device memory capabilities IB/uverbs: Add device memory registration ioctl support IB/uverbs: Add alloc/free dm uverbs ioctl support IB/uverbs: Add device memory capabilities reporting IB/uverbs: Expose device memory capabilities to user RDMA/qedr: Fix wmb usage in qedr IB/rxe: Removed GID add/del dummy routines RDMA/qedr: Zero stack memory before copying to user space IB/mlx5: Add ability to hash by IPSEC_SPI when creating a TIR IB/mlx5: Add information for querying IPsec capabilities IB/mlx5: Add IPsec support for egress and ingress {net,IB}/mlx5: Add ipsec helper IB/mlx5: Add modify_flow_action_esp verb IB/mlx5: Add implementation for create and destroy action_xfrm IB/uverbs: Introduce ESP steering match filter IB/uverbs: Add modify ESP flow_action ...
Diffstat (limited to 'drivers/infiniband/hw/mlx5/main.c')
-rw-r--r--drivers/infiniband/hw/mlx5/main.c572
1 files changed, 535 insertions, 37 deletions
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 071fd9a7b919..daa919e5a442 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -38,6 +38,7 @@
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <linux/slab.h>
+#include <linux/bitmap.h>
#if defined(CONFIG_X86)
#include <asm/pat.h>
#endif
@@ -51,6 +52,7 @@
#include <linux/mlx5/port.h>
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
+#include <linux/mlx5/fs_helpers.h>
#include <linux/list.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
@@ -60,6 +62,13 @@
#include "ib_rep.h"
#include "cmd.h"
#include <linux/mlx5/fs_helpers.h>
+#include <linux/mlx5/accel.h>
+#include <rdma/uverbs_std_types.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
#define DRIVER_NAME "mlx5_ib"
#define DRIVER_VERSION "5.0-0"
@@ -92,6 +101,12 @@ static LIST_HEAD(mlx5_ib_dev_list);
*/
static DEFINE_MUTEX(mlx5_ib_multiport_mutex);
+/* We can't use an array for xlt_emergency_page because dma_map_single
+ * doesn't work on kernel modules memory
+ */
+static unsigned long xlt_emergency_page;
+static struct mutex xlt_emergency_page_mutex;
+
struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi)
{
struct mlx5_ib_dev *dev;
@@ -399,6 +414,9 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
if (err)
goto out;
+ props->active_width = IB_WIDTH_4X;
+ props->active_speed = IB_SPEED_QDR;
+
translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
&props->active_width);
@@ -493,18 +511,19 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
vlan_id, port_num);
}
-static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
- unsigned int index, const union ib_gid *gid,
+static int mlx5_ib_add_gid(const union ib_gid *gid,
const struct ib_gid_attr *attr,
__always_unused void **context)
{
- return set_roce_addr(to_mdev(device), port_num, index, gid, attr);
+ return set_roce_addr(to_mdev(attr->device), attr->port_num,
+ attr->index, gid, attr);
}
-static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
- unsigned int index, __always_unused void **context)
+static int mlx5_ib_del_gid(const struct ib_gid_attr *attr,
+ __always_unused void **context)
{
- return set_roce_addr(to_mdev(device), port_num, index, NULL, NULL);
+ return set_roce_addr(to_mdev(attr->device), attr->port_num,
+ attr->index, NULL, NULL);
}
__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
@@ -516,9 +535,6 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
return 0;
- if (!attr.ndev)
- return 0;
-
dev_put(attr.ndev);
if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
@@ -538,9 +554,6 @@ int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
if (ret)
return ret;
- if (!attr.ndev)
- return -ENODEV;
-
dev_put(attr.ndev);
*gid_type = attr.gid_type;
@@ -844,6 +857,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_RX_HASH_SRC_PORT_UDP |
MLX5_RX_HASH_DST_PORT_UDP |
MLX5_RX_HASH_INNER;
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_DEVICE)
+ resp.rss_caps.rx_hash_fields_mask |=
+ MLX5_RX_HASH_IPSEC_SPI;
resp.response_length += sizeof(resp.rss_caps);
}
} else {
@@ -875,6 +892,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
}
+ if (MLX5_CAP_DEV_MEM(mdev, memic)) {
+ props->max_dm_size =
+ MLX5_CAP_DEV_MEM(mdev, max_memic_size);
+ }
+
if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))
props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
@@ -980,6 +1002,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_CAP_QOS(mdev, packet_pacing_min_rate);
resp.packet_pacing_caps.supported_qpts |=
1 << IB_QPT_RAW_PACKET;
+ if (MLX5_CAP_QOS(mdev, packet_pacing_burst_bound) &&
+ MLX5_CAP_QOS(mdev, packet_pacing_typical_size))
+ resp.packet_pacing_caps.cap_flags |=
+ MLX5_IB_PP_SUPPORT_BURST;
}
resp.response_length += sizeof(resp.packet_pacing_caps);
}
@@ -1665,6 +1691,18 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
resp.response_length = min(offsetof(typeof(resp), response_length) +
sizeof(resp.response_length), udata->outlen);
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) {
+ if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_EGRESS))
+ resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM;
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA)
+ resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA;
+ if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
+ resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING;
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN)
+ resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN;
+ /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */
+ }
+
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
@@ -1702,17 +1740,10 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
#endif
- context->upd_xlt_page = __get_free_page(GFP_KERNEL);
- if (!context->upd_xlt_page) {
- err = -ENOMEM;
- goto out_uars;
- }
- mutex_init(&context->upd_xlt_page_mutex);
-
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
if (err)
- goto out_page;
+ goto out_uars;
}
INIT_LIST_HEAD(&context->vma_private_list);
@@ -1789,9 +1820,6 @@ out_td:
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_ib_dealloc_transport_domain(dev, context->tdn);
-out_page:
- free_page(context->upd_xlt_page);
-
out_uars:
deallocate_uars(dev, context);
@@ -1817,7 +1845,6 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_ib_dealloc_transport_domain(dev, context->tdn);
- free_page(context->upd_xlt_page);
deallocate_uars(dev, context);
kfree(bfregi->sys_pages);
kfree(bfregi->count);
@@ -1993,6 +2020,8 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
return "best effort WC";
case MLX5_IB_MMAP_NC_PAGE:
return "NC";
+ case MLX5_IB_MMAP_DEVICE_MEM:
+ return "Device Memory";
default:
return NULL;
}
@@ -2151,6 +2180,34 @@ free_bfreg:
return err;
}
+static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ struct mlx5_ib_ucontext *mctx = to_mucontext(context);
+ struct mlx5_ib_dev *dev = to_mdev(context->device);
+ u16 page_idx = get_extended_index(vma->vm_pgoff);
+ size_t map_size = vma->vm_end - vma->vm_start;
+ u32 npages = map_size >> PAGE_SHIFT;
+ phys_addr_t pfn;
+ pgprot_t prot;
+
+ if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) !=
+ page_idx + npages)
+ return -EINVAL;
+
+ pfn = ((pci_resource_start(dev->mdev->pdev, 0) +
+ MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
+ PAGE_SHIFT) +
+ page_idx;
+ prot = pgprot_writecombine(vma->vm_page_prot);
+ vma->vm_page_prot = prot;
+
+ if (io_remap_pfn_range(vma, vma->vm_start, pfn, map_size,
+ vma->vm_page_prot))
+ return -EAGAIN;
+
+ return mlx5_ib_set_vma_data(vma, mctx);
+}
+
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
@@ -2195,6 +2252,9 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
case MLX5_IB_MMAP_CLOCK_INFO:
return mlx5_ib_mmap_clock_info_page(dev, vma, context);
+ case MLX5_IB_MMAP_DEVICE_MEM:
+ return dm_mmap(ibcontext, vma);
+
default:
return -EINVAL;
}
@@ -2202,6 +2262,87 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
return 0;
}
+struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ u64 act_size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE);
+ struct mlx5_memic *memic = &to_mdev(ibdev)->memic;
+ phys_addr_t memic_addr;
+ struct mlx5_ib_dm *dm;
+ u64 start_offset;
+ u32 page_idx;
+ int err;
+
+ dm = kzalloc(sizeof(*dm), GFP_KERNEL);
+ if (!dm)
+ return ERR_PTR(-ENOMEM);
+
+ mlx5_ib_dbg(to_mdev(ibdev), "alloc_memic req: user_length=0x%llx act_length=0x%llx log_alignment=%d\n",
+ attr->length, act_size, attr->alignment);
+
+ err = mlx5_cmd_alloc_memic(memic, &memic_addr,
+ act_size, attr->alignment);
+ if (err)
+ goto err_free;
+
+ start_offset = memic_addr & ~PAGE_MASK;
+ page_idx = (memic_addr - pci_resource_start(memic->dev->pdev, 0) -
+ MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >>
+ PAGE_SHIFT;
+
+ err = uverbs_copy_to(attrs,
+ MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+ &start_offset, sizeof(start_offset));
+ if (err)
+ goto err_dealloc;
+
+ err = uverbs_copy_to(attrs,
+ MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
+ &page_idx, sizeof(page_idx));
+ if (err)
+ goto err_dealloc;
+
+ bitmap_set(to_mucontext(context)->dm_pages, page_idx,
+ DIV_ROUND_UP(act_size, PAGE_SIZE));
+
+ dm->dev_addr = memic_addr;
+
+ return &dm->ibdm;
+
+err_dealloc:
+ mlx5_cmd_dealloc_memic(memic, memic_addr,
+ act_size);
+err_free:
+ kfree(dm);
+ return ERR_PTR(err);
+}
+
+int mlx5_ib_dealloc_dm(struct ib_dm *ibdm)
+{
+ struct mlx5_memic *memic = &to_mdev(ibdm->device)->memic;
+ struct mlx5_ib_dm *dm = to_mdm(ibdm);
+ u64 act_size = roundup(dm->ibdm.length, MLX5_MEMIC_BASE_SIZE);
+ u32 page_idx;
+ int ret;
+
+ ret = mlx5_cmd_dealloc_memic(memic, dm->dev_addr, act_size);
+ if (ret)
+ return ret;
+
+ page_idx = (dm->dev_addr - pci_resource_start(memic->dev->pdev, 0) -
+ MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >>
+ PAGE_SHIFT;
+ bitmap_clear(to_mucontext(ibdm->uobject->context)->dm_pages,
+ page_idx,
+ DIV_ROUND_UP(act_size, PAGE_SIZE));
+
+ kfree(dm);
+
+ return 0;
+}
+
static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
@@ -2317,8 +2458,28 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
offsetof(typeof(filter), field) -\
sizeof(filter.field))
+static int parse_flow_flow_action(const union ib_flow_spec *ib_spec,
+ const struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_act *action)
+{
+ struct mlx5_ib_flow_action *maction = to_mflow_act(ib_spec->action.act);
+
+ switch (maction->ib_action.type) {
+ case IB_FLOW_ACTION_ESP:
+ /* Currently only AES_GCM keymat is supported by the driver */
+ action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx;
+ action->action |= flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS ?
+ MLX5_FLOW_CONTEXT_ACTION_ENCRYPT :
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT;
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
u32 *match_v, const union ib_flow_spec *ib_spec,
+ const struct ib_flow_attr *flow_attr,
struct mlx5_flow_act *action)
{
void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
@@ -2328,6 +2489,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
void *headers_c;
void *headers_v;
int match_ipv;
+ int ret;
if (ib_spec->type & IB_FLOW_SPEC_INNER) {
headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
@@ -2478,7 +2640,15 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
ntohl(ib_spec->ipv6.mask.flow_label),
ntohl(ib_spec->ipv6.val.flow_label),
ib_spec->type & IB_FLOW_SPEC_INNER);
+ break;
+ case IB_FLOW_SPEC_ESP:
+ if (ib_spec->esp.mask.seq)
+ return -EOPNOTSUPP;
+ MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi,
+ ntohl(ib_spec->esp.mask.spi));
+ MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
+ ntohl(ib_spec->esp.val.spi));
break;
case IB_FLOW_SPEC_TCP:
if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
@@ -2546,6 +2716,11 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
return -EOPNOTSUPP;
action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
break;
+ case IB_FLOW_SPEC_ACTION_HANDLE:
+ ret = parse_flow_flow_action(ib_spec, flow_attr, action);
+ if (ret)
+ return ret;
+ break;
default:
return -EINVAL;
}
@@ -2587,6 +2762,46 @@ static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
return false;
}
+enum valid_spec {
+ VALID_SPEC_INVALID,
+ VALID_SPEC_VALID,
+ VALID_SPEC_NA,
+};
+
+static enum valid_spec
+is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
+ const struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_act *flow_act,
+ bool egress)
+{
+ const u32 *match_c = spec->match_criteria;
+ bool is_crypto =
+ (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT));
+ bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c);
+ bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ /*
+ * Currently only crypto is supported in egress, when regular egress
+ * rules would be supported, always return VALID_SPEC_NA.
+ */
+ if (!is_crypto)
+ return egress ? VALID_SPEC_INVALID : VALID_SPEC_NA;
+
+ return is_crypto && is_ipsec &&
+ (!egress || (!is_drop && !flow_act->has_flow_tag)) ?
+ VALID_SPEC_VALID : VALID_SPEC_INVALID;
+}
+
+static bool is_valid_spec(struct mlx5_core_dev *mdev,
+ const struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_act *flow_act,
+ bool egress)
+{
+ /* We curretly only support ipsec egress flow */
+ return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID;
+}
+
static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
const struct ib_flow_attr *flow_attr,
bool check_inner)
@@ -2711,13 +2926,17 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
log_max_ft_size));
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- if (flow_is_multicast_only(flow_attr) &&
- !dont_trap)
+ if (ft_type == MLX5_IB_FT_TX)
+ priority = 0;
+ else if (flow_is_multicast_only(flow_attr) &&
+ !dont_trap)
priority = MLX5_IB_FLOW_MCAST_PRIO;
else
priority = ib_prio_to_core_prio(flow_attr->priority,
dont_trap);
ns = mlx5_get_flow_namespace(dev->mdev,
+ ft_type == MLX5_IB_FT_TX ?
+ MLX5_FLOW_NAMESPACE_EGRESS :
MLX5_FLOW_NAMESPACE_BYPASS);
num_entries = MLX5_FS_MAX_ENTRIES;
num_groups = MLX5_FS_MAX_TYPES;
@@ -2804,6 +3023,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
unsigned int spec_index;
int err = 0;
int dest_num = 1;
+ bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
if (!is_valid_attr(dev->mdev, flow_attr))
return ERR_PTR(-EINVAL);
@@ -2820,7 +3040,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
err = parse_flow_attr(dev->mdev, spec->match_criteria,
spec->match_value,
- ib_flow, &flow_act);
+ ib_flow, flow_attr, &flow_act);
if (err < 0)
goto free;
@@ -2843,12 +3063,23 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
}
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
+
+ if (is_egress &&
+ !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) {
+ err = -EINVAL;
+ goto free;
+ }
+
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
rule_dst = NULL;
dest_num = 0;
} else {
- flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
- MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+ if (is_egress)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ else
+ flow_act.action |=
+ dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
+ MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
}
if (flow_act.has_flow_tag &&
@@ -3022,6 +3253,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
struct mlx5_flow_destination *dst = NULL;
struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
struct mlx5_ib_flow_prio *ft_prio;
+ bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
int err;
int underlay_qpn;
@@ -3030,7 +3262,13 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
if (domain != IB_FLOW_DOMAIN_USER ||
flow_attr->port > dev->num_ports ||
- (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP))
+ (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP |
+ IB_FLOW_ATTR_FLAGS_EGRESS)))
+ return ERR_PTR(-EINVAL);
+
+ if (is_egress &&
+ (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT))
return ERR_PTR(-EINVAL);
dst = kzalloc(sizeof(*dst), GFP_KERNEL);
@@ -3039,7 +3277,8 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
mutex_lock(&dev->flow_db->lock);
- ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX);
+ ft_prio = get_flow_table(dev, flow_attr,
+ is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
if (IS_ERR(ft_prio)) {
err = PTR_ERR(ft_prio);
goto unlock;
@@ -3053,11 +3292,15 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
}
}
- dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- if (mqp->flags & MLX5_IB_QP_RSS)
- dst->tir_num = mqp->rss_qp.tirn;
- else
- dst->tir_num = mqp->raw_packet_qp.rq.tirn;
+ if (is_egress) {
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ } else {
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ if (mqp->flags & MLX5_IB_QP_RSS)
+ dst->tir_num = mqp->rss_qp.tirn;
+ else
+ dst->tir_num = mqp->raw_packet_qp.rq.tirn;
+ }
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) {
@@ -3102,6 +3345,170 @@ unlock:
return ERR_PTR(err);
}
+static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags)
+{
+ u32 flags = 0;
+
+ if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA)
+ flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA;
+
+ return flags;
+}
+
+#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA
+static struct ib_flow_action *
+mlx5_ib_create_flow_action_esp(struct ib_device *device,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_dev *mdev = to_mdev(device);
+ struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm;
+ struct mlx5_accel_esp_xfrm_attrs accel_attrs = {};
+ struct mlx5_ib_flow_action *action;
+ u64 action_flags;
+ u64 flags;
+ int err = 0;
+
+ if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&action_flags, attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS)))
+ return ERR_PTR(-EFAULT);
+
+ if (action_flags >= (MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags);
+
+ /* We current only support a subset of the standard features. Only a
+ * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn
+ * (with overlap). Full offload mode isn't supported.
+ */
+ if (!attr->keymat || attr->replay || attr->encap ||
+ attr->spi || attr->seq || attr->tfc_pad ||
+ attr->hard_limit_pkts ||
+ (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (attr->keymat->protocol !=
+ IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ aes_gcm = &attr->keymat->keymat.aes_gcm;
+
+ if (aes_gcm->icv_len != 16 ||
+ aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ action = kmalloc(sizeof(*action), GFP_KERNEL);
+ if (!action)
+ return ERR_PTR(-ENOMEM);
+
+ action->esp_aes_gcm.ib_flags = attr->flags;
+ memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key,
+ sizeof(accel_attrs.keymat.aes_gcm.aes_key));
+ accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8;
+ memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt,
+ sizeof(accel_attrs.keymat.aes_gcm.salt));
+ memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv,
+ sizeof(accel_attrs.keymat.aes_gcm.seq_iv));
+ accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8;
+ accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ;
+ accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
+
+ accel_attrs.esn = attr->esn;
+ if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED)
+ accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED;
+ if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
+ accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
+
+ if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)
+ accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT;
+
+ action->esp_aes_gcm.ctx =
+ mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags);
+ if (IS_ERR(action->esp_aes_gcm.ctx)) {
+ err = PTR_ERR(action->esp_aes_gcm.ctx);
+ goto err_parse;
+ }
+
+ action->esp_aes_gcm.ib_flags = attr->flags;
+
+ return &action->ib_action;
+
+err_parse:
+ kfree(action);
+ return ERR_PTR(err);
+}
+
+static int
+mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_flow_action *maction = to_mflow_act(action);
+ struct mlx5_accel_esp_xfrm_attrs accel_attrs;
+ int err = 0;
+
+ if (attr->keymat || attr->replay || attr->encap ||
+ attr->spi || attr->seq || attr->tfc_pad ||
+ attr->hard_limit_pkts ||
+ (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
+ IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS |
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)))
+ return -EOPNOTSUPP;
+
+ /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can
+ * be modified.
+ */
+ if (!(maction->esp_aes_gcm.ib_flags &
+ IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) &&
+ attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))
+ return -EINVAL;
+
+ memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs,
+ sizeof(accel_attrs));
+
+ accel_attrs.esn = attr->esn;
+ if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
+ accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
+ else
+ accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
+
+ err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx,
+ &accel_attrs);
+ if (err)
+ return err;
+
+ maction->esp_aes_gcm.ib_flags &=
+ ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
+ maction->esp_aes_gcm.ib_flags |=
+ attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
+
+ return 0;
+}
+
+static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
+{
+ struct mlx5_ib_flow_action *maction = to_mflow_act(action);
+
+ switch (action->type) {
+ case IB_FLOW_ACTION_ESP:
+ /*
+ * We only support aes_gcm by now, so we implicitly know this is
+ * the underline crypto.
+ */
+ mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx);
+ break;
+ default:
+ WARN_ON(true);
+ break;
+ }
+
+ kfree(maction);
+ return 0;
+}
+
static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
@@ -4553,6 +4960,47 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
}
+ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_dm, UVERBS_OBJECT_DM,
+ UVERBS_METHOD_DM_ALLOC,
+ &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
+ UVERBS_ATTR_TYPE(u16),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_flow_action, UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
+ &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
+ UVERBS_ATTR_TYPE(u64),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+#define NUM_TREES 2
+static int populate_specs_root(struct mlx5_ib_dev *dev)
+{
+ const struct uverbs_object_tree_def *default_root[NUM_TREES + 1] = {
+ uverbs_default_get_objects()};
+ size_t num_trees = 1;
+
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE &&
+ !WARN_ON(num_trees >= ARRAY_SIZE(default_root)))
+ default_root[num_trees++] = &mlx5_ib_flow_action;
+
+ if (MLX5_CAP_DEV_MEM(dev->mdev, memic) &&
+ !WARN_ON(num_trees >= ARRAY_SIZE(default_root)))
+ default_root[num_trees++] = &mlx5_ib_dm;
+
+ dev->ib_dev.specs_root =
+ uverbs_alloc_spec_tree(num_trees, default_root);
+
+ return PTR_ERR_OR_ZERO(dev->ib_dev.specs_root);
+}
+
+static void depopulate_specs_root(struct mlx5_ib_dev *dev)
+{
+ uverbs_free_spec_tree(dev->ib_dev.specs_root);
+}
+
void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_cleanup_multiport_master(dev);
@@ -4616,6 +5064,9 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
INIT_LIST_HEAD(&dev->qp_list);
spin_lock_init(&dev->reset_flow_resource_lock);
+ spin_lock_init(&dev->memic.memic_lock);
+ dev->memic.dev = mdev;
+
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
err = init_srcu_struct(&dev->mr_srcu);
if (err)
@@ -4778,11 +5229,21 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
+ if (MLX5_CAP_DEV_MEM(mdev, memic)) {
+ dev->ib_dev.alloc_dm = mlx5_ib_alloc_dm;
+ dev->ib_dev.dealloc_dm = mlx5_ib_dealloc_dm;
+ dev->ib_dev.reg_dm_mr = mlx5_ib_reg_dm_mr;
+ }
+
dev->ib_dev.create_flow = mlx5_ib_create_flow;
dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
dev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
+ dev->ib_dev.create_flow_action_esp = mlx5_ib_create_flow_action_esp;
+ dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action;
+ dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp;
+ dev->ib_dev.driver_id = RDMA_DRIVER_MLX5;
err = init_node_data(dev);
if (err)
@@ -4997,11 +5458,21 @@ void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
}
+static int mlx5_ib_stage_populate_specs(struct mlx5_ib_dev *dev)
+{
+ return populate_specs_root(dev);
+}
+
int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
{
return ib_register_device(&dev->ib_dev, NULL);
}
+static void mlx5_ib_stage_depopulate_specs(struct mlx5_ib_dev *dev)
+{
+ depopulate_specs_root(dev);
+}
+
void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
{
destroy_umrc_res(dev);
@@ -5136,6 +5607,9 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_SPECS,
+ mlx5_ib_stage_populate_specs,
+ mlx5_ib_stage_depopulate_specs),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
@@ -5181,6 +5655,9 @@ static const struct mlx5_ib_profile nic_rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_SPECS,
+ mlx5_ib_stage_populate_specs,
+ mlx5_ib_stage_depopulate_specs),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
@@ -5301,13 +5778,32 @@ static struct mlx5_interface mlx5_ib_interface = {
.protocol = MLX5_INTERFACE_PROTOCOL_IB,
};
+unsigned long mlx5_ib_get_xlt_emergency_page(void)
+{
+ mutex_lock(&xlt_emergency_page_mutex);
+ return xlt_emergency_page;
+}
+
+void mlx5_ib_put_xlt_emergency_page(void)
+{
+ mutex_unlock(&xlt_emergency_page_mutex);
+}
+
static int __init mlx5_ib_init(void)
{
int err;
+ xlt_emergency_page = __get_free_page(GFP_KERNEL);
+ if (!xlt_emergency_page)
+ return -ENOMEM;
+
+ mutex_init(&xlt_emergency_page_mutex);
+
mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0);
- if (!mlx5_ib_event_wq)
+ if (!mlx5_ib_event_wq) {
+ free_page(xlt_emergency_page);
return -ENOMEM;
+ }
mlx5_ib_odp_init();
@@ -5320,6 +5816,8 @@ static void __exit mlx5_ib_cleanup(void)
{
mlx5_unregister_interface(&mlx5_ib_interface);
destroy_workqueue(mlx5_ib_event_wq);
+ mutex_destroy(&xlt_emergency_page_mutex);
+ free_page(xlt_emergency_page);
}
module_init(mlx5_ib_init);