summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/Kconfig11
-rw-r--r--drivers/infiniband/core/cm.c5
-rw-r--r--drivers/infiniband/core/cma.c68
-rw-r--r--drivers/infiniband/core/netlink.c2
-rw-r--r--drivers/infiniband/core/sysfs.c1
-rw-r--r--drivers/infiniband/core/ucma.c4
-rw-r--r--drivers/infiniband/core/uverbs.h36
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c109
-rw-r--r--drivers/infiniband/core/uverbs_main.c128
-rw-r--r--drivers/infiniband/core/verbs.c17
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_sdma.c7
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c9
-rw-r--r--drivers/infiniband/hw/mlx4/main.c8
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c25
-rw-r--r--drivers/infiniband/hw/mlx5/main.c3
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h6
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c167
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c21
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c8
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h53
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c7
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c11
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.h14
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h10
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c14
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c24
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c12
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c29
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_netlink.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c106
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/debugfs.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mr.c32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c196
-rw-r--r--include/linux/mlx5/device.h13
-rw-r--r--include/linux/mlx5/driver.h18
-rw-r--r--include/rdma/ib_verbs.h20
-rw-r--r--include/uapi/rdma/ib_user_verbs.h95
46 files changed, 895 insertions, 477 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index b84791f03a27..5ceda710f516 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -31,17 +31,6 @@ config INFINIBAND_USER_ACCESS
libibverbs, libibcm and a hardware driver library from
<http://www.openfabrics.org/git/>.
-config INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
- bool "Experimental and unstable ABI for userspace access to flow steering verbs"
- depends on INFINIBAND_USER_ACCESS
- depends on STAGING
- ---help---
- The final ABI for userspace access to flow steering verbs
- has not been defined. To use the current ABI, *WHICH WILL
- CHANGE IN THE FUTURE*, say Y here.
-
- If unsure, say N.
-
config INFINIBAND_USER_MEM
bool
depends on INFINIBAND_USER_ACCESS != n
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 784b97cb05b0..f2ef7ef0f36f 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -383,14 +383,11 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)
{
unsigned long flags;
int id;
- static int next_id;
idr_preload(GFP_KERNEL);
spin_lock_irqsave(&cm.lock, flags);
- id = idr_alloc(&cm.local_id_table, cm_id_priv, next_id, 0, GFP_NOWAIT);
- if (id >= 0)
- next_id = max(id + 1, 0);
+ id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT);
spin_unlock_irqrestore(&cm.lock, flags);
idr_preload_end();
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index dab4b41f1715..830c983fdeff 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -328,28 +328,6 @@ static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
return ret;
}
-static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num)
-{
- int i;
- int err;
- struct ib_port_attr props;
- union ib_gid tmp;
-
- err = ib_query_port(device, port_num, &props);
- if (err)
- return err;
-
- for (i = 0; i < props.gid_tbl_len; ++i) {
- err = ib_query_gid(device, port_num, i, &tmp);
- if (err)
- return err;
- if (!memcmp(&tmp, gid, sizeof tmp))
- return 0;
- }
-
- return -EADDRNOTAVAIL;
-}
-
static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
{
dev_addr->dev_type = ARPHRD_INFINIBAND;
@@ -371,13 +349,14 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
return ret;
}
-static int cma_acquire_dev(struct rdma_id_private *id_priv)
+static int cma_acquire_dev(struct rdma_id_private *id_priv,
+ struct rdma_id_private *listen_id_priv)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
struct cma_device *cma_dev;
union ib_gid gid, iboe_gid;
int ret = -ENODEV;
- u8 port;
+ u8 port, found_port;
enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
@@ -389,17 +368,39 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
iboe_addr_get_sgid(dev_addr, &iboe_gid);
memcpy(&gid, dev_addr->src_dev_addr +
rdma_addr_gid_offset(dev_addr), sizeof gid);
+ if (listen_id_priv &&
+ rdma_port_get_link_layer(listen_id_priv->id.device,
+ listen_id_priv->id.port_num) == dev_ll) {
+ cma_dev = listen_id_priv->cma_dev;
+ port = listen_id_priv->id.port_num;
+ if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
+ rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
+ ret = ib_find_cached_gid(cma_dev->device, &iboe_gid,
+ &found_port, NULL);
+ else
+ ret = ib_find_cached_gid(cma_dev->device, &gid,
+ &found_port, NULL);
+
+ if (!ret && (port == found_port)) {
+ id_priv->id.port_num = found_port;
+ goto out;
+ }
+ }
list_for_each_entry(cma_dev, &dev_list, list) {
for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
+ if (listen_id_priv &&
+ listen_id_priv->cma_dev == cma_dev &&
+ listen_id_priv->id.port_num == port)
+ continue;
if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) {
if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
- ret = find_gid_port(cma_dev->device, &iboe_gid, port);
+ ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, &found_port, NULL);
else
- ret = find_gid_port(cma_dev->device, &gid, port);
+ ret = ib_find_cached_gid(cma_dev->device, &gid, &found_port, NULL);
- if (!ret) {
- id_priv->id.port_num = port;
+ if (!ret && (port == found_port)) {
+ id_priv->id.port_num = found_port;
goto out;
}
}
@@ -1292,7 +1293,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
}
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
- ret = cma_acquire_dev(conn_id);
+ ret = cma_acquire_dev(conn_id, listen_id);
if (ret)
goto err2;
@@ -1451,7 +1452,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
{
struct rdma_cm_id *new_cm_id;
struct rdma_id_private *listen_id, *conn_id;
- struct net_device *dev = NULL;
struct rdma_cm_event event;
int ret;
struct ib_device_attr attr;
@@ -1481,7 +1481,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
goto out;
}
- ret = cma_acquire_dev(conn_id);
+ ret = cma_acquire_dev(conn_id, listen_id);
if (ret) {
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);
@@ -1529,8 +1529,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
cma_deref_id(conn_id);
out:
- if (dev)
- dev_put(dev);
mutex_unlock(&listen_id->handler_mutex);
return ret;
}
@@ -2050,7 +2048,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
goto out;
if (!status && !id_priv->cma_dev)
- status = cma_acquire_dev(id_priv);
+ status = cma_acquire_dev(id_priv, NULL);
if (status) {
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
@@ -2547,7 +2545,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (ret)
goto err1;
- ret = cma_acquire_dev(id_priv);
+ ret = cma_acquire_dev(id_priv, NULL);
if (ret)
goto err1;
}
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index da06abde9e0d..a1e9cba84944 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -148,7 +148,7 @@ static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
list_for_each_entry(client, &client_list, list) {
if (client->index == index) {
if (op < 0 || op >= client->nops ||
- !client->cb_table[RDMA_NL_GET_OP(op)].dump)
+ !client->cb_table[op].dump)
return -EINVAL;
{
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index cde1e7b5b85d..faad2caf22b1 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -612,6 +612,7 @@ static ssize_t show_node_type(struct device *device,
switch (dev->node_type) {
case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type);
case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type);
+ case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type);
case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index b0f189be543b..ab8b1c30b36b 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -57,7 +57,7 @@ MODULE_LICENSE("Dual BSD/GPL");
static unsigned int max_backlog = 1024;
static struct ctl_table_header *ucma_ctl_table_hdr;
-static ctl_table ucma_ctl_table[] = {
+static struct ctl_table ucma_ctl_table[] = {
{
.procname = "max_backlog",
.data = &max_backlog,
@@ -271,7 +271,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
goto out;
}
ctx->backlog--;
- } else if (!ctx->uid) {
+ } else if (!ctx->uid || ctx->cm_id != cm_id) {
/*
* We ignore events for new connections until userspace has set
* their context. This can only happen if an error occurs on a
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index d8f9c6c272d7..bdc842e9faef 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -47,6 +47,14 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
+#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
+ do { \
+ (udata)->inbuf = (void __user *) (ibuf); \
+ (udata)->outbuf = (void __user *) (obuf); \
+ (udata)->inlen = (ilen); \
+ (udata)->outlen = (olen); \
+ } while (0)
+
/*
* Our lifetime rules for these structs are the following:
*
@@ -178,6 +186,22 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
+struct ib_uverbs_flow_spec {
+ union {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ struct ib_uverbs_flow_spec_eth eth;
+ struct ib_uverbs_flow_spec_ipv4 ipv4;
+ struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
+ };
+};
+
#define IB_UVERBS_DECLARE_CMD(name) \
ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
const char __user *buf, int in_len, \
@@ -217,9 +241,13 @@ IB_UVERBS_DECLARE_CMD(destroy_srq);
IB_UVERBS_DECLARE_CMD(create_xsrq);
IB_UVERBS_DECLARE_CMD(open_xrcd);
IB_UVERBS_DECLARE_CMD(close_xrcd);
-#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
-IB_UVERBS_DECLARE_CMD(create_flow);
-IB_UVERBS_DECLARE_CMD(destroy_flow);
-#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
+
+#define IB_UVERBS_DECLARE_EX_CMD(name) \
+ int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \
+ struct ib_udata *ucore, \
+ struct ib_udata *uhw)
+
+IB_UVERBS_DECLARE_EX_CMD(create_flow);
+IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 2f0f01b70e3b..65f6e7dc380c 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -54,17 +54,7 @@ static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" };
static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" };
static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
-#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
-#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
-
-#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
- do { \
- (udata)->inbuf = (void __user *) (ibuf); \
- (udata)->outbuf = (void __user *) (obuf); \
- (udata)->inlen = (ilen); \
- (udata)->outlen = (olen); \
- } while (0)
/*
* The ib_uobject locking scheme is as follows:
@@ -939,13 +929,9 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
return -EINVAL;
- /*
- * Local write permission is required if remote write or
- * remote atomic permission is also requested.
- */
- if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
- !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
- return -EINVAL;
+ ret = ib_check_mr_access(cmd.access_flags);
+ if (ret)
+ return ret;
uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
if (!uobj)
@@ -2128,6 +2114,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
}
next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn;
next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
+ if (next->opcode == IB_WR_SEND_WITH_IMM)
+ next->ex.imm_data =
+ (__be32 __force) user_wr->ex.imm_data;
} else {
switch (next->opcode) {
case IB_WR_RDMA_WRITE_WITH_IMM:
@@ -2601,8 +2590,7 @@ out_put:
return ret ? ret : in_len;
}
-#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
-static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec,
+static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
union ib_flow_spec *ib_spec)
{
ib_spec->type = kern_spec->type;
@@ -2642,28 +2630,31 @@ static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec,
return 0;
}
-ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
{
struct ib_uverbs_create_flow cmd;
struct ib_uverbs_create_flow_resp resp;
struct ib_uobject *uobj;
struct ib_flow *flow_id;
- struct ib_kern_flow_attr *kern_flow_attr;
+ struct ib_uverbs_flow_attr *kern_flow_attr;
struct ib_flow_attr *flow_attr;
struct ib_qp *qp;
int err = 0;
void *kern_spec;
void *ib_spec;
int i;
- int kern_attr_size;
- if (out_len < sizeof(resp))
+ if (ucore->outlen < sizeof(resp))
return -ENOSPC;
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
+ err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ if (err)
+ return err;
+
+ ucore->inbuf += sizeof(cmd);
+ ucore->inlen -= sizeof(cmd);
if (cmd.comp_mask)
return -EINVAL;
@@ -2672,32 +2663,27 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
!capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))
return -EPERM;
- if (cmd.flow_attr.num_of_specs < 0 ||
- cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
+ if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
return -EINVAL;
- kern_attr_size = cmd.flow_attr.size - sizeof(cmd) -
- sizeof(struct ib_uverbs_cmd_hdr_ex);
-
- if (cmd.flow_attr.size < 0 || cmd.flow_attr.size > in_len ||
- kern_attr_size < 0 || kern_attr_size >
- (cmd.flow_attr.num_of_specs * sizeof(struct ib_kern_spec)))
+ if (cmd.flow_attr.size > ucore->inlen ||
+ cmd.flow_attr.size >
+ (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec)))
return -EINVAL;
if (cmd.flow_attr.num_of_specs) {
- kern_flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL);
+ kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) + cmd.flow_attr.size,
+ GFP_KERNEL);
if (!kern_flow_attr)
return -ENOMEM;
memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
- if (copy_from_user(kern_flow_attr + 1, buf + sizeof(cmd),
- kern_attr_size)) {
- err = -EFAULT;
+ err = ib_copy_from_udata(kern_flow_attr + 1, ucore,
+ cmd.flow_attr.size);
+ if (err)
goto err_free_attr;
- }
} else {
kern_flow_attr = &cmd.flow_attr;
- kern_attr_size = sizeof(cmd.flow_attr);
}
uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
@@ -2714,7 +2700,7 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
goto err_uobj;
}
- flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL);
+ flow_attr = kmalloc(sizeof(*flow_attr) + cmd.flow_attr.size, GFP_KERNEL);
if (!flow_attr) {
err = -ENOMEM;
goto err_put;
@@ -2729,19 +2715,22 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
kern_spec = kern_flow_attr + 1;
ib_spec = flow_attr + 1;
- for (i = 0; i < flow_attr->num_of_specs && kern_attr_size > 0; i++) {
+ for (i = 0; i < flow_attr->num_of_specs &&
+ cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) &&
+ cmd.flow_attr.size >=
+ ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
err = kern_spec_to_ib_spec(kern_spec, ib_spec);
if (err)
goto err_free;
flow_attr->size +=
((union ib_flow_spec *) ib_spec)->size;
- kern_attr_size -= ((struct ib_kern_spec *) kern_spec)->size;
- kern_spec += ((struct ib_kern_spec *) kern_spec)->size;
+ cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size;
+ kern_spec += ((struct ib_uverbs_flow_spec *) kern_spec)->size;
ib_spec += ((union ib_flow_spec *) ib_spec)->size;
}
- if (kern_attr_size) {
- pr_warn("create flow failed, %d bytes left from uverb cmd\n",
- kern_attr_size);
+ if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) {
+ pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n",
+ i, cmd.flow_attr.size);
goto err_free;
}
flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
@@ -2760,11 +2749,10 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
memset(&resp, 0, sizeof(resp));
resp.flow_handle = uobj->id;
- if (copy_to_user((void __user *)(unsigned long) cmd.response,
- &resp, sizeof(resp))) {
- err = -EFAULT;
+ err = ib_copy_to_udata(ucore,
+ &resp, sizeof(resp));
+ if (err)
goto err_copy;
- }
put_qp_read(qp);
mutex_lock(&file->mutex);
@@ -2777,7 +2765,7 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
kfree(flow_attr);
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
- return in_len;
+ return 0;
err_copy:
idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
destroy_flow:
@@ -2794,16 +2782,18 @@ err_free_attr:
return err;
}
-ssize_t ib_uverbs_destroy_flow(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len) {
+int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
struct ib_uverbs_destroy_flow cmd;
struct ib_flow *flow_id;
struct ib_uobject *uobj;
int ret;
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
+ ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ if (ret)
+ return ret;
uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
file->ucontext);
@@ -2825,9 +2815,8 @@ ssize_t ib_uverbs_destroy_flow(struct ib_uverbs_file *file,
put_uobj(uobj);
- return ret ? ret : in_len;
+ return ret;
}
-#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
struct ib_uverbs_create_xsrq *cmd,
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 2df31f68ea09..34386943ebcf 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -115,10 +115,13 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd,
[IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq,
[IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp,
-#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
- [IB_USER_VERBS_CMD_CREATE_FLOW] = ib_uverbs_create_flow,
- [IB_USER_VERBS_CMD_DESTROY_FLOW] = ib_uverbs_destroy_flow
-#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
+};
+
+static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw) = {
+ [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow,
+ [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow
};
static void ib_uverbs_add_one(struct ib_device *device);
@@ -589,6 +592,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
{
struct ib_uverbs_file *file = filp->private_data;
struct ib_uverbs_cmd_hdr hdr;
+ __u32 flags;
if (count < sizeof hdr)
return -EINVAL;
@@ -596,45 +600,105 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
if (copy_from_user(&hdr, buf, sizeof hdr))
return -EFAULT;
- if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
- !uverbs_cmd_table[hdr.command])
- return -EINVAL;
+ flags = (hdr.command &
+ IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
- if (!file->ucontext &&
- hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
- return -EINVAL;
+ if (!flags) {
+ __u32 command;
- if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
- return -ENOSYS;
+ if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
+ IB_USER_VERBS_CMD_COMMAND_MASK))
+ return -EINVAL;
-#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
- if (hdr.command >= IB_USER_VERBS_CMD_THRESHOLD) {
- struct ib_uverbs_cmd_hdr_ex hdr_ex;
+ command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
- if (copy_from_user(&hdr_ex, buf, sizeof(hdr_ex)))
- return -EFAULT;
+ if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
+ !uverbs_cmd_table[command])
+ return -EINVAL;
- if (((hdr_ex.in_words + hdr_ex.provider_in_words) * 4) != count)
+ if (!file->ucontext &&
+ command != IB_USER_VERBS_CMD_GET_CONTEXT)
return -EINVAL;
- return uverbs_cmd_table[hdr.command](file,
- buf + sizeof(hdr_ex),
- (hdr_ex.in_words +
- hdr_ex.provider_in_words) * 4,
- (hdr_ex.out_words +
- hdr_ex.provider_out_words) * 4);
- } else {
-#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
+ if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << command)))
+ return -ENOSYS;
+
if (hdr.in_words * 4 != count)
return -EINVAL;
- return uverbs_cmd_table[hdr.command](file,
- buf + sizeof(hdr),
- hdr.in_words * 4,
- hdr.out_words * 4);
-#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
+ return uverbs_cmd_table[command](file,
+ buf + sizeof(hdr),
+ hdr.in_words * 4,
+ hdr.out_words * 4);
+
+ } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
+ __u32 command;
+
+ struct ib_uverbs_ex_cmd_hdr ex_hdr;
+ struct ib_udata ucore;
+ struct ib_udata uhw;
+ int err;
+ size_t written_count = count;
+
+ if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
+ IB_USER_VERBS_CMD_COMMAND_MASK))
+ return -EINVAL;
+
+ command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
+
+ if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
+ !uverbs_ex_cmd_table[command])
+ return -ENOSYS;
+
+ if (!file->ucontext)
+ return -EINVAL;
+
+ if (!(file->device->ib_dev->uverbs_ex_cmd_mask & (1ull << command)))
+ return -ENOSYS;
+
+ if (count < (sizeof(hdr) + sizeof(ex_hdr)))
+ return -EINVAL;
+
+ if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
+ return -EFAULT;
+
+ count -= sizeof(hdr) + sizeof(ex_hdr);
+ buf += sizeof(hdr) + sizeof(ex_hdr);
+
+ if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count)
+ return -EINVAL;
+
+ if (ex_hdr.response) {
+ if (!hdr.out_words && !ex_hdr.provider_out_words)
+ return -EINVAL;
+ } else {
+ if (hdr.out_words || ex_hdr.provider_out_words)
+ return -EINVAL;
+ }
+
+ INIT_UDATA(&ucore,
+ (hdr.in_words) ? buf : 0,
+ (unsigned long)ex_hdr.response,
+ hdr.in_words * 8,
+ hdr.out_words * 8);
+
+ INIT_UDATA(&uhw,
+ (ex_hdr.provider_in_words) ? buf + ucore.inlen : 0,
+ (ex_hdr.provider_out_words) ? (unsigned long)ex_hdr.response + ucore.outlen : 0,
+ ex_hdr.provider_in_words * 8,
+ ex_hdr.provider_out_words * 8);
+
+ err = uverbs_ex_cmd_table[command](file,
+ &ucore,
+ &uhw);
+
+ if (err)
+ return err;
+
+ return written_count;
}
-#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
+
+ return -ENOSYS;
}
static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index a321df28bab2..d4f6ddf72ffa 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -114,6 +114,8 @@ rdma_node_get_transport(enum rdma_node_type node_type)
return RDMA_TRANSPORT_IB;
case RDMA_NODE_RNIC:
return RDMA_TRANSPORT_IWARP;
+ case RDMA_NODE_USNIC:
+ return RDMA_TRANSPORT_USNIC;
default:
BUG();
return 0;
@@ -130,6 +132,7 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_
case RDMA_TRANSPORT_IB:
return IB_LINK_LAYER_INFINIBAND;
case RDMA_TRANSPORT_IWARP:
+ case RDMA_TRANSPORT_USNIC:
return IB_LINK_LAYER_ETHERNET;
default:
return IB_LINK_LAYER_UNSPECIFIED;
@@ -958,6 +961,11 @@ EXPORT_SYMBOL(ib_resize_cq);
struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
{
struct ib_mr *mr;
+ int err;
+
+ err = ib_check_mr_access(mr_access_flags);
+ if (err)
+ return ERR_PTR(err);
mr = pd->device->get_dma_mr(pd, mr_access_flags);
@@ -980,6 +988,11 @@ struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
u64 *iova_start)
{
struct ib_mr *mr;
+ int err;
+
+ err = ib_check_mr_access(mr_access_flags);
+ if (err)
+ return ERR_PTR(err);
if (!pd->device->reg_phys_mr)
return ERR_PTR(-ENOSYS);
@@ -1010,6 +1023,10 @@ int ib_rereg_phys_mr(struct ib_mr *mr,
struct ib_pd *old_pd;
int ret;
+ ret = ib_check_mr_access(mr_access_flags);
+ if (ret)
+ return ret;
+
if (!mr->device->rereg_phys_mr)
return -ENOSYS;
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 33d2cc6ab562..4a033853312e 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -602,10 +602,10 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
rdev->lldi.vr->qp.size,
rdev->lldi.vr->cq.start,
rdev->lldi.vr->cq.size);
- PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p qpshift %lu "
+ PDBG("udb len 0x%x udb base %llx db_reg %p gts_reg %p qpshift %lu "
"qpmask 0x%x cqshift %lu cqmask 0x%x\n",
(unsigned)pci_resource_len(rdev->lldi.pdev, 2),
- (void *)(unsigned long)pci_resource_start(rdev->lldi.pdev, 2),
+ (u64)pci_resource_start(rdev->lldi.pdev, 2),
rdev->lldi.db_reg,
rdev->lldi.gts_reg,
rdev->qpshift, rdev->qpmask,
diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
index f5cb13b21445..cc04b7ba3488 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
@@ -280,9 +280,7 @@ static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd,
int j;
int ret;
- ret = get_user_pages(current, current->mm, addr,
- npages, 0, 1, pages, NULL);
-
+ ret = get_user_pages_fast(addr, npages, 0, pages);
if (ret != npages) {
int i;
@@ -811,10 +809,7 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd,
while (dim) {
const int mxp = 8;
- down_write(&current->mm->mmap_sem);
ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
- up_write(&current->mm->mmap_sem);
-
if (ret <= 0)
goto done_unlock;
else {
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index d5e60f44ba5a..66dbf8062374 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -324,7 +324,7 @@ static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq)
u32 i;
i = cq->mcq.cons_index;
- while (get_sw_cqe(cq, i & cq->ibcq.cqe))
+ while (get_sw_cqe(cq, i))
++i;
return i - cq->mcq.cons_index;
@@ -365,7 +365,7 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
mutex_lock(&cq->resize_mutex);
- if (entries < 1 || entries > dev->dev->caps.max_cqes) {
+ if (entries < 1) {
err = -EINVAL;
goto out;
}
@@ -376,6 +376,11 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
goto out;
}
+ if (entries > dev->dev->caps.max_cqes) {
+ err = -EINVAL;
+ goto out;
+ }
+
if (ibcq->uobject) {
err = mlx4_alloc_resize_umem(dev, cq, entries, udata);
if (err)
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index f0612645de99..1aad9b3e6bdd 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1691,11 +1691,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.create_flow = mlx4_ib_create_flow;
ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow;
-#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
- ibdev->ib_dev.uverbs_cmd_mask |=
- (1ull << IB_USER_VERBS_CMD_CREATE_FLOW) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_FLOW);
-#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
+ ibdev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
}
mlx4_ib_alloc_eqs(dev, ibdev);
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 344ab03948a3..b72627429745 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -556,7 +556,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
goto err_db;
}
mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0);
- (*cqb)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
+ (*cqb)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
*index = to_mucontext(context)->uuari.uars[0].index;
@@ -620,7 +620,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
}
mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
- (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - PAGE_SHIFT;
+ (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
*index = dev->mdev.priv.uuari.uars[0].index;
return 0;
@@ -653,8 +653,11 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
int eqn;
int err;
+ if (entries < 0)
+ return ERR_PTR(-EINVAL);
+
entries = roundup_pow_of_two(entries + 1);
- if (entries < 1 || entries > dev->mdev.caps.max_cqes)
+ if (entries > dev->mdev.caps.max_cqes)
return ERR_PTR(-EINVAL);
cq = kzalloc(sizeof(*cq), GFP_KERNEL);
@@ -747,17 +750,9 @@ int mlx5_ib_destroy_cq(struct ib_cq *cq)
return 0;
}
-static int is_equal_rsn(struct mlx5_cqe64 *cqe64, struct mlx5_ib_srq *srq,
- u32 rsn)
+static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
{
- u32 lrsn;
-
- if (srq)
- lrsn = be32_to_cpu(cqe64->srqn) & 0xffffff;
- else
- lrsn = be32_to_cpu(cqe64->sop_drop_qpn) & 0xffffff;
-
- return rsn == lrsn;
+ return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff);
}
void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
@@ -787,8 +782,8 @@ void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
- if (is_equal_rsn(cqe64, srq, rsn)) {
- if (srq)
+ if (is_equal_rsn(cqe64, rsn)) {
+ if (srq && (ntohl(cqe64->srqn) & 0xffffff))
mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));
++nfreed;
} else if (nfreed) {
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index b1a6cb3a2809..306534109627 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -745,7 +745,8 @@ static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
seg->start_addr = 0;
- err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in));
+ err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in),
+ NULL, NULL, NULL);
if (err) {
mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
goto err_in;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 836be9157242..4c134d93d4fc 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -262,6 +262,9 @@ struct mlx5_ib_mr {
int npages;
struct completion done;
enum ib_wc_status status;
+ struct mlx5_ib_dev *dev;
+ struct mlx5_create_mkey_mbox_out out;
+ unsigned long start;
};
struct mlx5_ib_fast_reg_page_list {
@@ -323,6 +326,7 @@ struct mlx5_cache_ent {
struct mlx5_ib_dev *dev;
struct work_struct work;
struct delayed_work dwork;
+ int pending;
};
struct mlx5_mr_cache {
@@ -358,6 +362,8 @@ struct mlx5_ib_dev {
spinlock_t mr_lock;
struct mlx5_ib_resources devr;
struct mlx5_mr_cache cache;
+ struct timer_list delay_timer;
+ int fill_delay;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 3453580b1eb2..039c3e40fcb4 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -35,11 +35,12 @@
#include <linux/random.h>
#include <linux/debugfs.h>
#include <linux/export.h>
+#include <linux/delay.h>
#include <rdma/ib_umem.h>
#include "mlx5_ib.h"
enum {
- DEF_CACHE_SIZE = 10,
+ MAX_PENDING_REG_MR = 8,
};
enum {
@@ -63,6 +64,51 @@ static int order2idx(struct mlx5_ib_dev *dev, int order)
return order - cache->ent[0].order;
}
+static void reg_mr_callback(int status, void *context)
+{
+ struct mlx5_ib_mr *mr = context;
+ struct mlx5_ib_dev *dev = mr->dev;
+ struct mlx5_mr_cache *cache = &dev->cache;
+ int c = order2idx(dev, mr->order);
+ struct mlx5_cache_ent *ent = &cache->ent[c];
+ u8 key;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ent->lock, flags);
+ ent->pending--;
+ spin_unlock_irqrestore(&ent->lock, flags);
+ if (status) {
+ mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
+ kfree(mr);
+ dev->fill_delay = 1;
+ mod_timer(&dev->delay_timer, jiffies + HZ);
+ return;
+ }
+
+ if (mr->out.hdr.status) {
+ mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
+ mr->out.hdr.status,
+ be32_to_cpu(mr->out.hdr.syndrome));
+ kfree(mr);
+ dev->fill_delay = 1;
+ mod_timer(&dev->delay_timer, jiffies + HZ);
+ return;
+ }
+
+ spin_lock_irqsave(&dev->mdev.priv.mkey_lock, flags);
+ key = dev->mdev.priv.mkey_key++;
+ spin_unlock_irqrestore(&dev->mdev.priv.mkey_lock, flags);
+ mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
+
+ cache->last_add = jiffies;
+
+ spin_lock_irqsave(&ent->lock, flags);
+ list_add_tail(&mr->list, &ent->head);
+ ent->cur++;
+ ent->size++;
+ spin_unlock_irqrestore(&ent->lock, flags);
+}
+
static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
{
struct mlx5_mr_cache *cache = &dev->cache;
@@ -78,36 +124,39 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
return -ENOMEM;
for (i = 0; i < num; i++) {
+ if (ent->pending >= MAX_PENDING_REG_MR) {
+ err = -EAGAIN;
+ break;
+ }
+
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr) {
err = -ENOMEM;
- goto out;
+ break;
}
mr->order = ent->order;
mr->umred = 1;
+ mr->dev = dev;
in->seg.status = 1 << 6;
in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
in->seg.log2_page_size = 12;
+ spin_lock_irq(&ent->lock);
+ ent->pending++;
+ spin_unlock_irq(&ent->lock);
+ mr->start = jiffies;
err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
- sizeof(*in));
+ sizeof(*in), reg_mr_callback,
+ mr, &mr->out);
if (err) {
mlx5_ib_warn(dev, "create mkey failed %d\n", err);
kfree(mr);
- goto out;
+ break;
}
- cache->last_add = jiffies;
-
- spin_lock(&ent->lock);
- list_add_tail(&mr->list, &ent->head);
- ent->cur++;
- ent->size++;
- spin_unlock(&ent->lock);
}
-out:
kfree(in);
return err;
}
@@ -121,16 +170,16 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
int i;
for (i = 0; i < num; i++) {
- spin_lock(&ent->lock);
+ spin_lock_irq(&ent->lock);
if (list_empty(&ent->head)) {
- spin_unlock(&ent->lock);
+ spin_unlock_irq(&ent->lock);
return;
}
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
list_del(&mr->list);
ent->cur--;
ent->size--;
- spin_unlock(&ent->lock);
+ spin_unlock_irq(&ent->lock);
err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
if (err)
mlx5_ib_warn(dev, "failed destroy mkey\n");
@@ -162,9 +211,13 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
return -EINVAL;
if (var > ent->size) {
- err = add_keys(dev, c, var - ent->size);
- if (err)
- return err;
+ do {
+ err = add_keys(dev, c, var - ent->size);
+ if (err && err != -EAGAIN)
+ return err;
+
+ usleep_range(3000, 5000);
+ } while (err);
} else if (var < ent->size) {
remove_keys(dev, c, ent->size - var);
}
@@ -280,23 +333,37 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
struct mlx5_ib_dev *dev = ent->dev;
struct mlx5_mr_cache *cache = &dev->cache;
int i = order2idx(dev, ent->order);
+ int err;
if (cache->stopped)
return;
ent = &dev->cache.ent[i];
- if (ent->cur < 2 * ent->limit) {
- add_keys(dev, i, 1);
- if (ent->cur < 2 * ent->limit)
- queue_work(cache->wq, &ent->work);
+ if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
+ err = add_keys(dev, i, 1);
+ if (ent->cur < 2 * ent->limit) {
+ if (err == -EAGAIN) {
+ mlx5_ib_dbg(dev, "returned eagain, order %d\n",
+ i + 2);
+ queue_delayed_work(cache->wq, &ent->dwork,
+ msecs_to_jiffies(3));
+ } else if (err) {
+ mlx5_ib_warn(dev, "command failed order %d, err %d\n",
+ i + 2, err);
+ queue_delayed_work(cache->wq, &ent->dwork,
+ msecs_to_jiffies(1000));
+ } else {
+ queue_work(cache->wq, &ent->work);
+ }
+ }
} else if (ent->cur > 2 * ent->limit) {
if (!someone_adding(cache) &&
- time_after(jiffies, cache->last_add + 60 * HZ)) {
+ time_after(jiffies, cache->last_add + 300 * HZ)) {
remove_keys(dev, i, 1);
if (ent->cur > ent->limit)
queue_work(cache->wq, &ent->work);
} else {
- queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ);
+ queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
}
}
}
@@ -336,18 +403,18 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
- spin_lock(&ent->lock);
+ spin_lock_irq(&ent->lock);
if (!list_empty(&ent->head)) {
mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
list);
list_del(&mr->list);
ent->cur--;
- spin_unlock(&ent->lock);
+ spin_unlock_irq(&ent->lock);
if (ent->cur < ent->limit)
queue_work(cache->wq, &ent->work);
break;
}
- spin_unlock(&ent->lock);
+ spin_unlock_irq(&ent->lock);
queue_work(cache->wq, &ent->work);
@@ -374,12 +441,12 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
return;
}
ent = &cache->ent[c];
- spin_lock(&ent->lock);
+ spin_lock_irq(&ent->lock);
list_add_tail(&mr->list, &ent->head);
ent->cur++;
if (ent->cur > 2 * ent->limit)
shrink = 1;
- spin_unlock(&ent->lock);
+ spin_unlock_irq(&ent->lock);
if (shrink)
queue_work(cache->wq, &ent->work);
@@ -394,16 +461,16 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
cancel_delayed_work(&ent->dwork);
while (1) {
- spin_lock(&ent->lock);
+ spin_lock_irq(&ent->lock);
if (list_empty(&ent->head)) {
- spin_unlock(&ent->lock);
+ spin_unlock_irq(&ent->lock);
return;
}
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
list_del(&mr->list);
ent->cur--;
ent->size--;
- spin_unlock(&ent->lock);
+ spin_unlock_irq(&ent->lock);
err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
if (err)
mlx5_ib_warn(dev, "failed destroy mkey\n");
@@ -464,12 +531,18 @@ static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
debugfs_remove_recursive(dev->cache.root);
}
+static void delay_time_func(unsigned long ctx)
+{
+ struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
+
+ dev->fill_delay = 0;
+}
+
int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
int limit;
- int size;
int err;
int i;
@@ -479,6 +552,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
return -ENOMEM;
}
+ setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
INIT_LIST_HEAD(&cache->ent[i].head);
spin_lock_init(&cache->ent[i].lock);
@@ -489,13 +563,11 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
ent->order = i + 2;
ent->dev = dev;
- if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) {
- size = dev->mdev.profile->mr_cache[i].size;
+ if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE)
limit = dev->mdev.profile->mr_cache[i].limit;
- } else {
- size = DEF_CACHE_SIZE;
+ else
limit = 0;
- }
+
INIT_WORK(&ent->work, cache_work_func);
INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
ent->limit = limit;
@@ -522,6 +594,7 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
clean_keys(dev, i);
destroy_workqueue(dev->cache.wq);
+ del_timer_sync(&dev->delay_timer);
return 0;
}
@@ -551,7 +624,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
seg->start_addr = 0;
- err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in));
+ err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
+ NULL);
if (err)
goto err_in;
@@ -660,14 +734,14 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
int err;
int i;
- for (i = 0; i < 10; i++) {
+ for (i = 0; i < 1; i++) {
mr = alloc_cached_mr(dev, order);
if (mr)
break;
err = add_keys(dev, order2idx(dev, order), 1);
- if (err) {
- mlx5_ib_warn(dev, "add_keys failed\n");
+ if (err && err != -EAGAIN) {
+ mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
break;
}
}
@@ -759,8 +833,10 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
in->seg.log2_page_size = page_shift;
in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
- in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
- err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen);
+ in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
+ 1 << page_shift));
+ err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen, NULL,
+ NULL, NULL);
if (err) {
mlx5_ib_warn(dev, "create mkey failed\n");
goto err_2;
@@ -944,7 +1020,8 @@ struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
* TBD not needed - issue 197292 */
in->seg.log2_page_size = PAGE_SHIFT;
- err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in));
+ err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), NULL,
+ NULL, NULL);
kfree(in);
if (err)
goto err_free;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 5659ea880741..7c6b4ba49bec 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -551,7 +551,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
}
mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
(*in)->ctx.log_pg_sz_remote_qpn =
- cpu_to_be32((page_shift - PAGE_SHIFT) << 24);
+ cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
(*in)->ctx.params2 = cpu_to_be32(offset << 6);
(*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
@@ -648,7 +648,8 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
goto err_buf;
}
(*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
- (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - PAGE_SHIFT) << 24);
+ (*in)->ctx.log_pg_sz_remote_qpn =
+ cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
/* Set "fast registration enabled" for all kernel QPs */
(*in)->ctx.params1 |= cpu_to_be32(1 << 11);
(*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
@@ -1317,9 +1318,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_RNR_TIMEOUT |
- MLX5_QP_OPTPAR_PM_STATE,
+ MLX5_QP_OPTPAR_PM_STATE |
+ MLX5_QP_OPTPAR_ALT_ADDR_PATH,
[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE |
- MLX5_QP_OPTPAR_PM_STATE,
+ MLX5_QP_OPTPAR_PM_STATE |
+ MLX5_QP_OPTPAR_ALT_ADDR_PATH,
[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY |
MLX5_QP_OPTPAR_SRQN |
MLX5_QP_OPTPAR_CQN_RCV,
@@ -1550,7 +1553,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
mlx5_cur = to_mlx5_state(cur_state);
mlx5_new = to_mlx5_state(new_state);
mlx5_st = to_mlx5_st(ibqp->qp_type);
- if (mlx5_cur < 0 || mlx5_new < 0 || mlx5_st < 0)
+ if (mlx5_st < 0)
goto out;
optpar = ib_mask_to_mlx5_opt(attr_mask);
@@ -1744,6 +1747,7 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
MLX5_MKEY_MASK_PD |
MLX5_MKEY_MASK_LR |
MLX5_MKEY_MASK_LW |
+ MLX5_MKEY_MASK_KEY |
MLX5_MKEY_MASK_RR |
MLX5_MKEY_MASK_RW |
MLX5_MKEY_MASK_A |
@@ -1800,7 +1804,8 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w
seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
seg->len = cpu_to_be64(wr->wr.fast_reg.length);
seg->log2_page_size = wr->wr.fast_reg.page_shift;
- seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+ seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
+ mlx5_mkey_variant(wr->wr.fast_reg.rkey));
}
static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
@@ -1913,6 +1918,10 @@ static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
if (unlikely((*seg == qp->sq.qend)))
*seg = mlx5_get_send_wqe(qp, 0);
if (!li) {
+ if (unlikely(wr->wr.fast_reg.page_list_len >
+ wr->wr.fast_reg.page_list->max_page_list_len))
+ return -ENOMEM;
+
set_frwr_pages(*seg, wr, mdev, pd, writ);
*seg += sizeof(struct mlx5_wqe_data_seg);
*size += (sizeof(struct mlx5_wqe_data_seg) / 16);
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 0aa478bc291a..210b3eaf188a 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -123,7 +123,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
goto err_in;
}
- (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
+ (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
(*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
return 0;
@@ -192,7 +192,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
}
srq->wq_sig = !!srq_signature;
- (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
+ (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
return 0;
@@ -390,9 +390,7 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq)
mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
ib_umem_release(msrq->umem);
} else {
- kfree(msrq->wrid);
- mlx5_buf_free(&dev->mdev, &msrq->buf);
- mlx5_db_free(&dev->mdev, &msrq->db);
+ destroy_srq_kernel(dev, msrq);
}
kfree(srq);
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 5b53ca5a2284..8308e3634767 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -2834,7 +2834,7 @@ static int nes_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
init_attr->qp_context = nesqp->ibqp.qp_context;
init_attr->send_cq = nesqp->ibqp.send_cq;
init_attr->recv_cq = nesqp->ibqp.recv_cq;
- init_attr->srq = nesqp->ibqp.srq = nesqp->ibqp.srq;
+ init_attr->srq = nesqp->ibqp.srq;
init_attr->cap = attr->cap;
return 0;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index adc11d14f878..294dd27b601e 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -122,6 +122,32 @@ struct mqe_ctx {
bool cmd_done;
};
+struct ocrdma_hw_mr {
+ u32 lkey;
+ u8 fr_mr;
+ u8 remote_atomic;
+ u8 remote_rd;
+ u8 remote_wr;
+ u8 local_rd;
+ u8 local_wr;
+ u8 mw_bind;
+ u8 rsvd;
+ u64 len;
+ struct ocrdma_pbl *pbl_table;
+ u32 num_pbls;
+ u32 num_pbes;
+ u32 pbl_size;
+ u32 pbe_size;
+ u64 fbo;
+ u64 va;
+};
+
+struct ocrdma_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+ struct ocrdma_hw_mr hwmr;
+};
+
struct ocrdma_dev {
struct ib_device ibdev;
struct ocrdma_dev_attr attr;
@@ -169,7 +195,7 @@ struct ocrdma_dev {
struct list_head entry;
struct rcu_head rcu;
int id;
- u64 stag_arr[OCRDMA_MAX_STAG];
+ struct ocrdma_mr *stag_arr[OCRDMA_MAX_STAG];
u16 pvid;
};
@@ -294,31 +320,6 @@ struct ocrdma_qp {
u16 db_cache;
};
-struct ocrdma_hw_mr {
- u32 lkey;
- u8 fr_mr;
- u8 remote_atomic;
- u8 remote_rd;
- u8 remote_wr;
- u8 local_rd;
- u8 local_wr;
- u8 mw_bind;
- u8 rsvd;
- u64 len;
- struct ocrdma_pbl *pbl_table;
- u32 num_pbls;
- u32 num_pbes;
- u32 pbl_size;
- u32 pbe_size;
- u64 fbo;
- u64 va;
-};
-
-struct ocrdma_mr {
- struct ib_mr ibmr;
- struct ib_umem *umem;
- struct ocrdma_hw_mr hwmr;
-};
struct ocrdma_ucontext {
struct ib_ucontext ibucontext;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 50219ab2279d..56bf32fcb62c 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -1783,7 +1783,7 @@ static int ocrdma_set_create_qp_sq_cmd(struct ocrdma_create_qp_req *cmd,
u32 max_sges = attrs->cap.max_send_sge;
/* QP1 may exceed 127 */
- max_wqe_allocated = min_t(int, attrs->cap.max_send_wr + 1,
+ max_wqe_allocated = min_t(u32, attrs->cap.max_send_wr + 1,
dev->attr.max_wqe);
status = ocrdma_build_q_conf(&max_wqe_allocated,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 0ce7674621ea..91443bcb9e0e 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -452,9 +452,6 @@ static void ocrdma_remove_free(struct rcu_head *rcu)
{
struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu);
- ocrdma_free_resources(dev);
- ocrdma_cleanup_hw(dev);
-
idr_remove(&ocrdma_dev_id, dev->id);
kfree(dev->mbx_cmd);
ib_dealloc_device(&dev->ibdev);
@@ -470,6 +467,10 @@ static void ocrdma_remove(struct ocrdma_dev *dev)
spin_lock(&ocrdma_devlist_lock);
list_del_rcu(&dev->entry);
spin_unlock(&ocrdma_devlist_lock);
+
+ ocrdma_free_resources(dev);
+ ocrdma_cleanup_hw(dev);
+
call_rcu(&dev->rcu, ocrdma_remove_free);
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 69f1d1221a6b..7686dceadd29 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -1981,9 +1981,7 @@ static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES);
- if ((wr->wr.fast_reg.page_list_len >
- qp->dev->attr.max_pages_per_frmr) ||
- (wr->wr.fast_reg.length > 0xffffffffULL))
+ if (wr->wr.fast_reg.page_list_len > qp->dev->attr.max_pages_per_frmr)
return -EINVAL;
hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT);
@@ -2839,7 +2837,7 @@ struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len)
goto mbx_err;
mr->ibmr.rkey = mr->hwmr.lkey;
mr->ibmr.lkey = mr->hwmr.lkey;
- dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = (unsigned long) mr;
+ dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = mr;
return &mr->ibmr;
mbx_err:
ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 016e7429adf6..5bfc02f450e6 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -6190,21 +6190,20 @@ static int setup_txselect(const char *str, struct kernel_param *kp)
{
struct qib_devdata *dd;
unsigned long val;
- int ret;
-
+ char *n;
if (strlen(str) >= MAX_ATTEN_LEN) {
pr_info("txselect_values string too long\n");
return -ENOSPC;
}
- ret = kstrtoul(str, 0, &val);
- if (ret || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
+ val = simple_strtoul(str, &n, 0);
+ if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
TXDDS_MFG_SZ)) {
pr_info("txselect_values must start with a number < %d\n",
TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ);
- return ret ? ret : -EINVAL;
+ return -EINVAL;
}
-
strcpy(txselect_list, str);
+
list_for_each_entry(dd, &qib_dev_list, list)
if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322)
set_no_qsfp_atten(dd, 1);
diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h
index 28874f8606f8..941d4d50d8e7 100644
--- a/drivers/infiniband/hw/qib/qib_mad.h
+++ b/drivers/infiniband/hw/qib/qib_mad.h
@@ -54,7 +54,7 @@ struct ib_node_info {
__be32 revision;
u8 local_port_num;
u8 vendor_id[3];
-} __attribute__ ((packed));
+} __packed;
struct ib_mad_notice_attr {
u8 generic_type;
@@ -73,7 +73,7 @@ struct ib_mad_notice_attr {
__be16 reserved;
__be16 lid; /* where violation happened */
u8 port_num; /* where violation happened */
- } __attribute__ ((packed)) ntc_129_131;
+ } __packed ntc_129_131;
struct {
__be16 reserved;
@@ -83,14 +83,14 @@ struct ib_mad_notice_attr {
__be32 new_cap_mask; /* new capability mask */
u8 reserved3;
u8 change_flags; /* low 3 bits only */
- } __attribute__ ((packed)) ntc_144;
+ } __packed ntc_144;
struct {
__be16 reserved;
__be16 lid; /* lid where sys guid changed */
__be16 reserved2;
__be64 new_sys_guid;
- } __attribute__ ((packed)) ntc_145;
+ } __packed ntc_145;
struct {
__be16 reserved;
@@ -104,7 +104,7 @@ struct ib_mad_notice_attr {
u8 reserved3;
u8 dr_trunc_hop;
u8 dr_rtn_path[30];
- } __attribute__ ((packed)) ntc_256;
+ } __packed ntc_256;
struct {
__be16 reserved;
@@ -115,7 +115,7 @@ struct ib_mad_notice_attr {
__be32 qp2; /* high 8 bits reserved */
union ib_gid gid1;
union ib_gid gid2;
- } __attribute__ ((packed)) ntc_257_258;
+ } __packed ntc_257_258;
} details;
};
@@ -209,7 +209,7 @@ struct ib_pma_portcounters_cong {
__be64 port_rcv_packets;
__be64 port_xmit_wait;
__be64 port_adr_events;
-} __attribute__ ((packed));
+} __packed;
#define IB_PMA_CONG_HW_CONTROL_TIMER 0x00
#define IB_PMA_CONG_HW_CONTROL_SAMPLE 0x01
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
index d0a0ea0c14d6..165aee2ca8a0 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -594,8 +594,7 @@ static int qib_user_sdma_pin_pages(const struct qib_devdata *dd,
else
j = npages;
- ret = get_user_pages(current, current->mm, addr,
- j, 0, 1, pages, NULL);
+ ret = get_user_pages_fast(addr, j, 0, pages);
if (ret != j) {
i = 0;
j = ret;
@@ -1294,11 +1293,8 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
int mxp = 8;
int ndesc = 0;
- down_write(&current->mm->mmap_sem);
ret = qib_user_sdma_queue_pkts(dd, ppd, pq,
iov, dim, &list, &mxp, &ndesc);
- up_write(&current->mm->mmap_sem);
-
if (ret < 0)
goto done_unlock;
else {
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 012e2c7575ad..a01c7d2cf541 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -150,14 +150,14 @@ struct ib_reth {
__be64 vaddr;
__be32 rkey;
__be32 length;
-} __attribute__ ((packed));
+} __packed;
struct ib_atomic_eth {
__be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
__be32 rkey;
__be64 swap_data;
__be64 compare_data;
-} __attribute__ ((packed));
+} __packed;
struct qib_other_headers {
__be32 bth[3];
@@ -178,7 +178,7 @@ struct qib_other_headers {
__be32 aeth;
struct ib_atomic_eth atomic_eth;
} u;
-} __attribute__ ((packed));
+} __packed;
/*
* Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
@@ -195,12 +195,12 @@ struct qib_ib_header {
} l;
struct qib_other_headers oth;
} u;
-} __attribute__ ((packed));
+} __packed;
struct qib_pio_header {
__le32 pbc[2];
struct qib_ib_header hdr;
-} __attribute__ ((packed));
+} __packed;
/*
* There is one struct qib_mcast for each multicast GID.
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index eb71aaa26a9a..c639f90cfda4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -101,6 +101,7 @@ enum {
IPOIB_MCAST_FLAG_SENDONLY = 1,
IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
IPOIB_MCAST_FLAG_ATTACHED = 3,
+ IPOIB_MCAST_JOIN_STARTED = 4,
MAX_SEND_CQE = 16,
IPOIB_CM_COPYBREAK = 256,
@@ -151,6 +152,7 @@ struct ipoib_mcast {
struct sk_buff_head pkt_queue;
struct net_device *dev;
+ struct completion done;
};
struct ipoib_rx_buf {
@@ -299,7 +301,7 @@ struct ipoib_dev_priv {
unsigned long flags;
- struct mutex vlan_mutex;
+ struct rw_semaphore vlan_rwsem;
struct rb_root path_tree;
struct list_head path_list;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 7a3175400b2a..1377f85911c2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -140,7 +140,8 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
struct ipoib_cm_rx_buf *rx_ring,
int id, int frags,
- u64 mapping[IPOIB_CM_RX_SG])
+ u64 mapping[IPOIB_CM_RX_SG],
+ gfp_t gfp)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct sk_buff *skb;
@@ -164,7 +165,7 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
}
for (i = 0; i < frags; i++) {
- struct page *page = alloc_page(GFP_ATOMIC);
+ struct page *page = alloc_page(gfp);
if (!page)
goto partial_error;
@@ -382,7 +383,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
for (i = 0; i < ipoib_recvq_size; ++i) {
if (!ipoib_cm_alloc_rx_skb(dev, rx->rx_ring, i, IPOIB_CM_RX_SG - 1,
- rx->rx_ring[i].mapping)) {
+ rx->rx_ring[i].mapping,
+ GFP_KERNEL)) {
ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
ret = -ENOMEM;
goto err_count;
@@ -639,7 +641,8 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
(unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
- newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, mapping);
+ newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags,
+ mapping, GFP_ATOMIC);
if (unlikely(!newskb)) {
/*
* If we can't allocate a new RX buffer, dump
@@ -1556,7 +1559,8 @@ int ipoib_cm_dev_init(struct net_device *dev)
for (i = 0; i < ipoib_recvq_size; ++i) {
if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i,
priv->cm.num_frags - 1,
- priv->cm.srq_ring[i].mapping)) {
+ priv->cm.srq_ring[i].mapping,
+ GFP_KERNEL)) {
ipoib_warn(priv, "failed to allocate "
"receive buffer %d\n", i);
ipoib_cm_dev_cleanup(dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 196b1d13cbcb..6a7003ddb0be 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -685,15 +685,13 @@ int ipoib_ib_dev_open(struct net_device *dev)
ret = ipoib_ib_post_receives(dev);
if (ret) {
ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
- ipoib_ib_dev_stop(dev, 1);
- return -1;
+ goto dev_stop;
}
ret = ipoib_cm_dev_open(dev);
if (ret) {
ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret);
- ipoib_ib_dev_stop(dev, 1);
- return -1;
+ goto dev_stop;
}
clear_bit(IPOIB_STOP_REAPER, &priv->flags);
@@ -704,6 +702,11 @@ int ipoib_ib_dev_open(struct net_device *dev)
napi_enable(&priv->napi);
return 0;
+dev_stop:
+ if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
+ napi_enable(&priv->napi);
+ ipoib_ib_dev_stop(dev, 1);
+ return -1;
}
static void ipoib_pkey_dev_check_presence(struct net_device *dev)
@@ -746,10 +749,8 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
mutex_lock(&pkey_mutex);
set_bit(IPOIB_PKEY_STOP, &priv->flags);
- cancel_delayed_work(&priv->pkey_poll_task);
+ cancel_delayed_work_sync(&priv->pkey_poll_task);
mutex_unlock(&pkey_mutex);
- if (flush)
- flush_workqueue(ipoib_workqueue);
}
ipoib_mcast_stop_thread(dev, flush);
@@ -974,7 +975,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
u16 new_index;
int result;
- mutex_lock(&priv->vlan_mutex);
+ down_read(&priv->vlan_rwsem);
/*
* Flush any child interfaces too -- they might be up even if
@@ -983,7 +984,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
list_for_each_entry(cpriv, &priv->child_intfs, list)
__ipoib_ib_dev_flush(cpriv, level);
- mutex_unlock(&priv->vlan_mutex);
+ up_read(&priv->vlan_rwsem);
if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
/* for non-child devices must check/update the pkey value here */
@@ -1081,6 +1082,11 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
struct ipoib_dev_priv *priv = netdev_priv(dev);
ipoib_dbg(priv, "cleaning up ib_dev\n");
+ /*
+ * We must make sure there are no more (path) completions
+ * that may wish to touch priv fields that are no longer valid
+ */
+ ipoib_flush_paths(dev);
ipoib_mcast_stop_thread(dev, 1);
ipoib_mcast_dev_flush(dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 82cec1af902c..d64ed05fb082 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -119,7 +119,7 @@ int ipoib_open(struct net_device *dev)
struct ipoib_dev_priv *cpriv;
/* Bring up any child interfaces too */
- mutex_lock(&priv->vlan_mutex);
+ down_read(&priv->vlan_rwsem);
list_for_each_entry(cpriv, &priv->child_intfs, list) {
int flags;
@@ -129,7 +129,7 @@ int ipoib_open(struct net_device *dev)
dev_change_flags(cpriv->dev, flags | IFF_UP);
}
- mutex_unlock(&priv->vlan_mutex);
+ up_read(&priv->vlan_rwsem);
}
netif_start_queue(dev);
@@ -162,7 +162,7 @@ static int ipoib_stop(struct net_device *dev)
struct ipoib_dev_priv *cpriv;
/* Bring down any child interfaces too */
- mutex_lock(&priv->vlan_mutex);
+ down_read(&priv->vlan_rwsem);
list_for_each_entry(cpriv, &priv->child_intfs, list) {
int flags;
@@ -172,7 +172,7 @@ static int ipoib_stop(struct net_device *dev)
dev_change_flags(cpriv->dev, flags & ~IFF_UP);
}
- mutex_unlock(&priv->vlan_mutex);
+ up_read(&priv->vlan_rwsem);
}
return 0;
@@ -1350,7 +1350,7 @@ void ipoib_setup(struct net_device *dev)
ipoib_set_ethtool_ops(dev);
- netif_napi_add(dev, &priv->napi, ipoib_poll, 100);
+ netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT);
dev->watchdog_timeo = HZ;
@@ -1372,7 +1372,7 @@ void ipoib_setup(struct net_device *dev)
spin_lock_init(&priv->lock);
- mutex_init(&priv->vlan_mutex);
+ init_rwsem(&priv->vlan_rwsem);
INIT_LIST_HEAD(&priv->path_list);
INIT_LIST_HEAD(&priv->child_intfs);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index cecb98a4c662..d4e005720d01 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -386,8 +386,10 @@ static int ipoib_mcast_join_complete(int status,
mcast->mcmember.mgid.raw, status);
/* We trap for port events ourselves. */
- if (status == -ENETRESET)
- return 0;
+ if (status == -ENETRESET) {
+ status = 0;
+ goto out;
+ }
if (!status)
status = ipoib_mcast_join_finish(mcast, &multicast->rec);
@@ -407,7 +409,8 @@ static int ipoib_mcast_join_complete(int status,
if (mcast == priv->broadcast)
queue_work(ipoib_workqueue, &priv->carrier_on_task);
- return 0;
+ status = 0;
+ goto out;
}
if (mcast->logcount++ < 20) {
@@ -434,7 +437,8 @@ static int ipoib_mcast_join_complete(int status,
mcast->backoff * HZ);
spin_unlock_irq(&priv->lock);
mutex_unlock(&mcast_mutex);
-
+out:
+ complete(&mcast->done);
return status;
}
@@ -484,11 +488,15 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
}
set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ init_completion(&mcast->done);
+ set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags);
+
mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
&rec, comp_mask, GFP_KERNEL,
ipoib_mcast_join_complete, mcast);
if (IS_ERR(mcast->mc)) {
clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ complete(&mcast->done);
ret = PTR_ERR(mcast->mc);
ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
@@ -510,10 +518,18 @@ void ipoib_mcast_join_task(struct work_struct *work)
struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, mcast_task.work);
struct net_device *dev = priv->dev;
+ struct ib_port_attr port_attr;
if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
return;
+ if (ib_query_port(priv->ca, priv->port, &port_attr) ||
+ port_attr.state != IB_PORT_ACTIVE) {
+ ipoib_dbg(priv, "port state is not ACTIVE (state = %d) suspending join task\n",
+ port_attr.state);
+ return;
+ }
+
if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
ipoib_warn(priv, "ib_query_gid() failed\n");
else
@@ -751,6 +767,11 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
spin_unlock_irqrestore(&priv->lock, flags);
+ /* seperate between the wait to the leave*/
+ list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
+ if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags))
+ wait_for_completion(&mcast->done);
+
list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
ipoib_mcast_leave(dev, mcast);
ipoib_mcast_free(mcast);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
index f81abe16cf09..c29b5c838833 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
@@ -142,10 +142,10 @@ static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head
priv = netdev_priv(dev);
ppriv = netdev_priv(priv->parent);
- mutex_lock(&ppriv->vlan_mutex);
+ down_write(&ppriv->vlan_rwsem);
unregister_netdevice_queue(dev, head);
list_del(&priv->list);
- mutex_unlock(&ppriv->vlan_mutex);
+ up_write(&ppriv->vlan_rwsem);
}
static size_t ipoib_get_size(const struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 8292554bccb5..9fad7b5ac8b9 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -140,7 +140,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
if (!rtnl_trylock())
return restart_syscall();
- mutex_lock(&ppriv->vlan_mutex);
+ down_write(&ppriv->vlan_rwsem);
/*
* First ensure this isn't a duplicate. We check the parent device and
@@ -163,7 +163,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD);
out:
- mutex_unlock(&ppriv->vlan_mutex);
+ up_write(&ppriv->vlan_rwsem);
if (result)
free_netdev(priv->dev);
@@ -185,7 +185,8 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
if (!rtnl_trylock())
return restart_syscall();
- mutex_lock(&ppriv->vlan_mutex);
+
+ down_write(&ppriv->vlan_rwsem);
list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
if (priv->pkey == pkey &&
priv->child_type == IPOIB_LEGACY_CHILD) {
@@ -195,7 +196,8 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
break;
}
}
- mutex_unlock(&ppriv->vlan_mutex);
+ up_write(&ppriv->vlan_rwsem);
+
rtnl_unlock();
if (dev) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 6ca30739625f..8675d26a678b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -98,6 +98,7 @@ enum {
static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
struct mlx5_cmd_msg *in,
struct mlx5_cmd_msg *out,
+ void *uout, int uout_size,
mlx5_cmd_cbk_t cbk,
void *context, int page_queue)
{
@@ -110,6 +111,8 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
ent->in = in;
ent->out = out;
+ ent->uout = uout;
+ ent->uout_size = uout_size;
ent->callback = cbk;
ent->context = context;
ent->cmd = cmd;
@@ -534,6 +537,7 @@ static void cmd_work_handler(struct work_struct *work)
ent->lay = lay;
memset(lay, 0, sizeof(*lay));
memcpy(lay->in, ent->in->first.data, sizeof(lay->in));
+ ent->op = be32_to_cpu(lay->in[0]) >> 16;
if (ent->in->next)
lay->in_ptr = cpu_to_be64(ent->in->next->dma);
lay->inlen = cpu_to_be32(ent->in->len);
@@ -628,7 +632,8 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
* 2. page queue commands do not support asynchrous completion
*/
static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
- struct mlx5_cmd_msg *out, mlx5_cmd_cbk_t callback,
+ struct mlx5_cmd_msg *out, void *uout, int uout_size,
+ mlx5_cmd_cbk_t callback,
void *context, int page_queue, u8 *status)
{
struct mlx5_cmd *cmd = &dev->cmd;
@@ -642,7 +647,8 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
if (callback && page_queue)
return -EINVAL;
- ent = alloc_cmd(cmd, in, out, callback, context, page_queue);
+ ent = alloc_cmd(cmd, in, out, uout, uout_size, callback, context,
+ page_queue);
if (IS_ERR(ent))
return PTR_ERR(ent);
@@ -670,10 +676,10 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
if (op < ARRAY_SIZE(cmd->stats)) {
stats = &cmd->stats[op];
- spin_lock(&stats->lock);
+ spin_lock_irq(&stats->lock);
stats->sum += ds;
++stats->n;
- spin_unlock(&stats->lock);
+ spin_unlock_irq(&stats->lock);
}
mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
"fw exec time for %s is %lld nsec\n",
@@ -826,7 +832,7 @@ static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
int n;
int i;
- msg = kzalloc(sizeof(*msg), GFP_KERNEL);
+ msg = kzalloc(sizeof(*msg), flags);
if (!msg)
return ERR_PTR(-ENOMEM);
@@ -1109,6 +1115,19 @@ void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
up(&cmd->sem);
}
+static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
+{
+ unsigned long flags;
+
+ if (msg->cache) {
+ spin_lock_irqsave(&msg->cache->lock, flags);
+ list_add_tail(&msg->list, &msg->cache->head);
+ spin_unlock_irqrestore(&msg->cache->lock, flags);
+ } else {
+ mlx5_free_cmd_msg(dev, msg);
+ }
+}
+
void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
{
struct mlx5_cmd *cmd = &dev->cmd;
@@ -1117,6 +1136,10 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
void *context;
int err;
int i;
+ ktime_t t1, t2, delta;
+ s64 ds;
+ struct mlx5_cmd_stats *stats;
+ unsigned long flags;
for (i = 0; i < (1 << cmd->log_sz); i++) {
if (test_bit(i, &vector)) {
@@ -1141,9 +1164,29 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
}
free_ent(cmd, ent->idx);
if (ent->callback) {
+ t1 = timespec_to_ktime(ent->ts1);
+ t2 = timespec_to_ktime(ent->ts2);
+ delta = ktime_sub(t2, t1);
+ ds = ktime_to_ns(delta);
+ if (ent->op < ARRAY_SIZE(cmd->stats)) {
+ stats = &cmd->stats[ent->op];
+ spin_lock_irqsave(&stats->lock, flags);
+ stats->sum += ds;
+ ++stats->n;
+ spin_unlock_irqrestore(&stats->lock, flags);
+ }
+
callback = ent->callback;
context = ent->context;
err = ent->ret;
+ if (!err)
+ err = mlx5_copy_from_msg(ent->uout,
+ ent->out,
+ ent->uout_size);
+
+ mlx5_free_cmd_msg(dev, ent->out);
+ free_msg(dev, ent->in);
+
free_cmd(ent);
callback(err, context);
} else {
@@ -1160,7 +1203,8 @@ static int status_to_err(u8 status)
return status ? -1 : 0; /* TBD more meaningful codes */
}
-static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size)
+static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
+ gfp_t gfp)
{
struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM);
struct mlx5_cmd *cmd = &dev->cmd;
@@ -1172,7 +1216,7 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size)
ent = &cmd->cache.med;
if (ent) {
- spin_lock(&ent->lock);
+ spin_lock_irq(&ent->lock);
if (!list_empty(&ent->head)) {
msg = list_entry(ent->head.next, typeof(*msg), list);
/* For cached lists, we must explicitly state what is
@@ -1181,43 +1225,34 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size)
msg->len = in_size;
list_del(&msg->list);
}
- spin_unlock(&ent->lock);
+ spin_unlock_irq(&ent->lock);
}
if (IS_ERR(msg))
- msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, in_size);
+ msg = mlx5_alloc_cmd_msg(dev, gfp, in_size);
return msg;
}
-static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
-{
- if (msg->cache) {
- spin_lock(&msg->cache->lock);
- list_add_tail(&msg->list, &msg->cache->head);
- spin_unlock(&msg->cache->lock);
- } else {
- mlx5_free_cmd_msg(dev, msg);
- }
-}
-
static int is_manage_pages(struct mlx5_inbox_hdr *in)
{
return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES;
}
-int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
- int out_size)
+static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+ int out_size, mlx5_cmd_cbk_t callback, void *context)
{
struct mlx5_cmd_msg *inb;
struct mlx5_cmd_msg *outb;
int pages_queue;
+ gfp_t gfp;
int err;
u8 status = 0;
pages_queue = is_manage_pages(in);
+ gfp = callback ? GFP_ATOMIC : GFP_KERNEL;
- inb = alloc_msg(dev, in_size);
+ inb = alloc_msg(dev, in_size, gfp);
if (IS_ERR(inb)) {
err = PTR_ERR(inb);
return err;
@@ -1229,13 +1264,14 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
goto out_in;
}
- outb = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, out_size);
+ outb = mlx5_alloc_cmd_msg(dev, gfp, out_size);
if (IS_ERR(outb)) {
err = PTR_ERR(outb);
goto out_in;
}
- err = mlx5_cmd_invoke(dev, inb, outb, NULL, NULL, pages_queue, &status);
+ err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context,
+ pages_queue, &status);
if (err)
goto out_out;
@@ -1248,14 +1284,30 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
err = mlx5_copy_from_msg(out, outb, out_size);
out_out:
- mlx5_free_cmd_msg(dev, outb);
+ if (!callback)
+ mlx5_free_cmd_msg(dev, outb);
out_in:
- free_msg(dev, inb);
+ if (!callback)
+ free_msg(dev, inb);
return err;
}
+
+int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+ int out_size)
+{
+ return cmd_exec(dev, in, in_size, out, out_size, NULL, NULL);
+}
EXPORT_SYMBOL(mlx5_cmd_exec);
+int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
+ void *out, int out_size, mlx5_cmd_cbk_t callback,
+ void *context)
+{
+ return cmd_exec(dev, in, in_size, out, out_size, callback, context);
+}
+EXPORT_SYMBOL(mlx5_cmd_exec_cb);
+
static void destroy_msg_cache(struct mlx5_core_dev *dev)
{
struct mlx5_cmd *cmd = &dev->cmd;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index 9c7194b26ee2..80f6d127257a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -154,10 +154,10 @@ static ssize_t average_read(struct file *filp, char __user *buf, size_t count,
return 0;
stats = filp->private_data;
- spin_lock(&stats->lock);
+ spin_lock_irq(&stats->lock);
if (stats->n)
field = div64_u64(stats->sum, stats->n);
- spin_unlock(&stats->lock);
+ spin_unlock_irq(&stats->lock);
ret = snprintf(tbuf, sizeof(tbuf), "%llu\n", field);
if (ret > 0) {
if (copy_to_user(buf, tbuf, ret))
@@ -175,10 +175,10 @@ static ssize_t average_write(struct file *filp, const char __user *buf,
struct mlx5_cmd_stats *stats;
stats = filp->private_data;
- spin_lock(&stats->lock);
+ spin_lock_irq(&stats->lock);
stats->sum = 0;
stats->n = 0;
- spin_unlock(&stats->lock);
+ spin_unlock_irq(&stats->lock);
*pos += count;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 2231d93cc7ad..64a61b286b2c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -354,7 +354,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ);
in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index);
in->ctx.intr = vecidx;
- in->ctx.log_page_size = PAGE_SHIFT - 12;
+ in->ctx.log_page_size = eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
in->events_mask = cpu_to_be64(mask);
err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index bc0f5fb66e24..40a9f5ed814d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -159,6 +159,36 @@ struct mlx5_reg_host_endianess {
u8 rsvd[15];
};
+
+#define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
+
+enum {
+ MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
+ CAP_MASK(MLX5_CAP_OFF_DCT, 1),
+};
+
+/* selectively copy writable fields clearing any reserved area
+ */
+static void copy_rw_fields(struct mlx5_hca_cap *to, struct mlx5_hca_cap *from)
+{
+ u64 v64;
+
+ to->log_max_qp = from->log_max_qp & 0x1f;
+ to->log_max_ra_req_dc = from->log_max_ra_req_dc & 0x3f;
+ to->log_max_ra_res_dc = from->log_max_ra_res_dc & 0x3f;
+ to->log_max_ra_req_qp = from->log_max_ra_req_qp & 0x3f;
+ to->log_max_ra_res_qp = from->log_max_ra_res_qp & 0x3f;
+ to->log_max_atomic_size_qp = from->log_max_atomic_size_qp;
+ to->log_max_atomic_size_dc = from->log_max_atomic_size_dc;
+ v64 = be64_to_cpu(from->flags) & MLX5_CAP_BITS_RW_MASK;
+ to->flags = cpu_to_be64(v64);
+}
+
+enum {
+ HCA_CAP_OPMOD_GET_MAX = 0,
+ HCA_CAP_OPMOD_GET_CUR = 1,
+};
+
static int handle_hca_cap(struct mlx5_core_dev *dev)
{
struct mlx5_cmd_query_hca_cap_mbox_out *query_out = NULL;
@@ -180,7 +210,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
}
query_ctx.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP);
- query_ctx.hdr.opmod = cpu_to_be16(0x1);
+ query_ctx.hdr.opmod = cpu_to_be16(HCA_CAP_OPMOD_GET_CUR);
err = mlx5_cmd_exec(dev, &query_ctx, sizeof(query_ctx),
query_out, sizeof(*query_out));
if (err)
@@ -192,8 +222,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
goto query_ex;
}
- memcpy(&set_ctx->hca_cap, &query_out->hca_cap,
- sizeof(set_ctx->hca_cap));
+ copy_rw_fields(&set_ctx->hca_cap, &query_out->hca_cap);
if (dev->profile->mask & MLX5_PROF_MASK_QP_SIZE)
set_ctx->hca_cap.log_max_qp = dev->profile->log_max_qp;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 5b44e2e46daf..35e514dc7b7d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -37,31 +37,41 @@
#include "mlx5_core.h"
int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
- struct mlx5_create_mkey_mbox_in *in, int inlen)
+ struct mlx5_create_mkey_mbox_in *in, int inlen,
+ mlx5_cmd_cbk_t callback, void *context,
+ struct mlx5_create_mkey_mbox_out *out)
{
- struct mlx5_create_mkey_mbox_out out;
+ struct mlx5_create_mkey_mbox_out lout;
int err;
u8 key;
- memset(&out, 0, sizeof(out));
- spin_lock(&dev->priv.mkey_lock);
+ memset(&lout, 0, sizeof(lout));
+ spin_lock_irq(&dev->priv.mkey_lock);
key = dev->priv.mkey_key++;
- spin_unlock(&dev->priv.mkey_lock);
+ spin_unlock_irq(&dev->priv.mkey_lock);
in->seg.qpn_mkey7_0 |= cpu_to_be32(key);
in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_MKEY);
- err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+ if (callback) {
+ err = mlx5_cmd_exec_cb(dev, in, inlen, out, sizeof(*out),
+ callback, context);
+ return err;
+ } else {
+ err = mlx5_cmd_exec(dev, in, inlen, &lout, sizeof(lout));
+ }
+
if (err) {
mlx5_core_dbg(dev, "cmd exec faile %d\n", err);
return err;
}
- if (out.hdr.status) {
- mlx5_core_dbg(dev, "status %d\n", out.hdr.status);
- return mlx5_cmd_status_to_err(&out.hdr);
+ if (lout.hdr.status) {
+ mlx5_core_dbg(dev, "status %d\n", lout.hdr.status);
+ return mlx5_cmd_status_to_err(&lout.hdr);
}
- mr->key = mlx5_idx_to_mkey(be32_to_cpu(out.mkey) & 0xffffff) | key;
- mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", be32_to_cpu(out.mkey), key, mr->key);
+ mr->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key;
+ mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
+ be32_to_cpu(lout.mkey), key, mr->key);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 7b12acf210f8..37b6ad1f9a1b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -57,10 +57,13 @@ struct mlx5_pages_req {
};
struct fw_page {
- struct rb_node rb_node;
- u64 addr;
- struct page *page;
- u16 func_id;
+ struct rb_node rb_node;
+ u64 addr;
+ struct page *page;
+ u16 func_id;
+ unsigned long bitmask;
+ struct list_head list;
+ unsigned free_count;
};
struct mlx5_query_pages_inbox {
@@ -94,6 +97,11 @@ enum {
MAX_RECLAIM_TIME_MSECS = 5000,
};
+enum {
+ MLX5_MAX_RECLAIM_TIME_MILI = 5000,
+ MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / 4096,
+};
+
static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
{
struct rb_root *root = &dev->priv.page_root;
@@ -101,6 +109,7 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u
struct rb_node *parent = NULL;
struct fw_page *nfp;
struct fw_page *tfp;
+ int i;
while (*new) {
parent = *new;
@@ -113,25 +122,29 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u
return -EEXIST;
}
- nfp = kmalloc(sizeof(*nfp), GFP_KERNEL);
+ nfp = kzalloc(sizeof(*nfp), GFP_KERNEL);
if (!nfp)
return -ENOMEM;
nfp->addr = addr;
nfp->page = page;
nfp->func_id = func_id;
+ nfp->free_count = MLX5_NUM_4K_IN_PAGE;
+ for (i = 0; i < MLX5_NUM_4K_IN_PAGE; i++)
+ set_bit(i, &nfp->bitmask);
rb_link_node(&nfp->rb_node, parent, new);
rb_insert_color(&nfp->rb_node, root);
+ list_add(&nfp->list, &dev->priv.free_list);
return 0;
}
-static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr)
+static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr)
{
struct rb_root *root = &dev->priv.page_root;
struct rb_node *tmp = root->rb_node;
- struct page *result = NULL;
+ struct fw_page *result = NULL;
struct fw_page *tfp;
while (tmp) {
@@ -141,9 +154,7 @@ static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr)
} else if (tfp->addr > addr) {
tmp = tmp->rb_right;
} else {
- rb_erase(&tfp->rb_node, root);
- result = tfp->page;
- kfree(tfp);
+ result = tfp;
break;
}
}
@@ -176,12 +187,98 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
return err;
}
+static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
+{
+ struct fw_page *fp;
+ unsigned n;
+
+ if (list_empty(&dev->priv.free_list)) {
+ return -ENOMEM;
+ mlx5_core_warn(dev, "\n");
+ }
+
+ fp = list_entry(dev->priv.free_list.next, struct fw_page, list);
+ n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
+ if (n >= MLX5_NUM_4K_IN_PAGE) {
+ mlx5_core_warn(dev, "alloc 4k bug\n");
+ return -ENOENT;
+ }
+ clear_bit(n, &fp->bitmask);
+ fp->free_count--;
+ if (!fp->free_count)
+ list_del(&fp->list);
+
+ *addr = fp->addr + n * 4096;
+
+ return 0;
+}
+
+static void free_4k(struct mlx5_core_dev *dev, u64 addr)
+{
+ struct fw_page *fwp;
+ int n;
+
+ fwp = find_fw_page(dev, addr & PAGE_MASK);
+ if (!fwp) {
+ mlx5_core_warn(dev, "page not found\n");
+ return;
+ }
+
+ n = (addr & ~PAGE_MASK) % 4096;
+ fwp->free_count++;
+ set_bit(n, &fwp->bitmask);
+ if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
+ rb_erase(&fwp->rb_node, &dev->priv.page_root);
+ if (fwp->free_count != 1)
+ list_del(&fwp->list);
+ dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ __free_page(fwp->page);
+ kfree(fwp);
+ } else if (fwp->free_count == 1) {
+ list_add(&fwp->list, &dev->priv.free_list);
+ }
+}
+
+static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id)
+{
+ struct page *page;
+ u64 addr;
+ int err;
+
+ page = alloc_page(GFP_HIGHUSER);
+ if (!page) {
+ mlx5_core_warn(dev, "failed to allocate page\n");
+ return -ENOMEM;
+ }
+ addr = dma_map_page(&dev->pdev->dev, page, 0,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(&dev->pdev->dev, addr)) {
+ mlx5_core_warn(dev, "failed dma mapping page\n");
+ err = -ENOMEM;
+ goto out_alloc;
+ }
+ err = insert_page(dev, addr, page, func_id);
+ if (err) {
+ mlx5_core_err(dev, "failed to track allocated page\n");
+ goto out_mapping;
+ }
+
+ return 0;
+
+out_mapping:
+ dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+out_alloc:
+ __free_page(page);
+
+ return err;
+}
static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
int notify_fail)
{
struct mlx5_manage_pages_inbox *in;
struct mlx5_manage_pages_outbox out;
- struct page *page;
+ struct mlx5_manage_pages_inbox *nin;
int inlen;
u64 addr;
int err;
@@ -196,27 +293,15 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
memset(&out, 0, sizeof(out));
for (i = 0; i < npages; i++) {
- page = alloc_page(GFP_HIGHUSER);
- if (!page) {
- err = -ENOMEM;
- mlx5_core_warn(dev, "failed to allocate page\n");
- goto out_alloc;
- }
- addr = dma_map_page(&dev->pdev->dev, page, 0,
- PAGE_SIZE, DMA_BIDIRECTIONAL);
- if (dma_mapping_error(&dev->pdev->dev, addr)) {
- mlx5_core_warn(dev, "failed dma mapping page\n");
- __free_page(page);
- err = -ENOMEM;
- goto out_alloc;
- }
- err = insert_page(dev, addr, page, func_id);
+retry:
+ err = alloc_4k(dev, &addr);
if (err) {
- mlx5_core_err(dev, "failed to track allocated page\n");
- dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
- __free_page(page);
- err = -ENOMEM;
- goto out_alloc;
+ if (err == -ENOMEM)
+ err = alloc_system_page(dev, func_id);
+ if (err)
+ goto out_4k;
+
+ goto retry;
}
in->pas[i] = cpu_to_be64(addr);
}
@@ -226,7 +311,6 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
in->func_id = cpu_to_be16(func_id);
in->num_entries = cpu_to_be32(npages);
err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
- mlx5_core_dbg(dev, "err %d\n", err);
if (err) {
mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err);
goto out_alloc;
@@ -247,25 +331,22 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
out_alloc:
if (notify_fail) {
- memset(in, 0, inlen);
- memset(&out, 0, sizeof(out));
- in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
- in->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE);
- if (mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out)))
- mlx5_core_warn(dev, "\n");
- }
- for (i--; i >= 0; i--) {
- addr = be64_to_cpu(in->pas[i]);
- page = remove_page(dev, addr);
- if (!page) {
- mlx5_core_err(dev, "BUG: can't remove page at addr 0x%llx\n",
- addr);
- continue;
+ nin = kzalloc(sizeof(*nin), GFP_KERNEL);
+ if (!nin) {
+ mlx5_core_warn(dev, "allocation failed\n");
+ goto out_4k;
}
- dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
- __free_page(page);
+ memset(&out, 0, sizeof(out));
+ nin->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
+ nin->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE);
+ if (mlx5_cmd_exec(dev, nin, sizeof(*nin), &out, sizeof(out)))
+ mlx5_core_warn(dev, "page notify failed\n");
+ kfree(nin);
}
+out_4k:
+ for (i--; i >= 0; i--)
+ free_4k(dev, be64_to_cpu(in->pas[i]));
out_free:
mlx5_vfree(in);
return err;
@@ -276,7 +357,6 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
{
struct mlx5_manage_pages_inbox in;
struct mlx5_manage_pages_outbox *out;
- struct page *page;
int num_claimed;
int outlen;
u64 addr;
@@ -315,13 +395,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
for (i = 0; i < num_claimed; i++) {
addr = be64_to_cpu(out->pas[i]);
- page = remove_page(dev, addr);
- if (!page) {
- mlx5_core_warn(dev, "FW reported unknown DMA address 0x%llx\n", addr);
- } else {
- dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
- __free_page(page);
- }
+ free_4k(dev, addr);
}
out_free:
@@ -381,14 +455,19 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
return give_pages(dev, func_id, npages, 0);
}
+enum {
+ MLX5_BLKS_FOR_RECLAIM_PAGES = 12
+};
+
static int optimal_reclaimed_pages(void)
{
struct mlx5_cmd_prot_block *block;
struct mlx5_cmd_layout *lay;
int ret;
- ret = (sizeof(lay->in) + sizeof(block->data) -
- sizeof(struct mlx5_manage_pages_outbox)) / 8;
+ ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) -
+ sizeof(struct mlx5_manage_pages_outbox)) /
+ FIELD_SIZEOF(struct mlx5_manage_pages_outbox, pas[0]);
return ret;
}
@@ -427,6 +506,7 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
{
dev->priv.page_root = RB_ROOT;
+ INIT_LIST_HEAD(&dev->priv.free_list);
}
void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 5eb4e31af22b..da78875807fc 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -230,6 +230,15 @@ enum {
MLX5_MAX_PAGE_SHIFT = 31
};
+enum {
+ MLX5_ADAPTER_PAGE_SHIFT = 12
+};
+
+enum {
+ MLX5_CAP_OFF_DCT = 41,
+ MLX5_CAP_OFF_CMDIF_CSUM = 46,
+};
+
struct mlx5_inbox_hdr {
__be16 opcode;
u8 rsvd[4];
@@ -319,9 +328,9 @@ struct mlx5_hca_cap {
u8 rsvd25[42];
__be16 log_uar_page_sz;
u8 rsvd26[28];
- u8 log_msx_atomic_size_qp;
+ u8 log_max_atomic_size_qp;
u8 rsvd27[2];
- u8 log_msx_atomic_size_dc;
+ u8 log_max_atomic_size_dc;
u8 rsvd28[76];
};
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 6b8c496572c8..554548cd3dd4 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -483,6 +483,7 @@ struct mlx5_priv {
struct rb_root page_root;
int fw_pages;
int reg_pages;
+ struct list_head free_list;
struct mlx5_core_health health;
@@ -557,9 +558,11 @@ typedef void (*mlx5_cmd_cbk_t)(int status, void *context);
struct mlx5_cmd_work_ent {
struct mlx5_cmd_msg *in;
struct mlx5_cmd_msg *out;
+ void *uout;
+ int uout_size;
mlx5_cmd_cbk_t callback;
void *context;
- int idx;
+ int idx;
struct completion done;
struct mlx5_cmd *cmd;
struct work_struct work;
@@ -570,6 +573,7 @@ struct mlx5_cmd_work_ent {
u8 token;
struct timespec ts1;
struct timespec ts2;
+ u16 op;
};
struct mlx5_pas {
@@ -653,6 +657,9 @@ void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr);
int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
int out_size);
+int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
+ void *out, int out_size, mlx5_cmd_cbk_t callback,
+ void *context);
int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
@@ -676,7 +683,9 @@ int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
u16 lwm, int is_srq);
int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
- struct mlx5_create_mkey_mbox_in *in, int inlen);
+ struct mlx5_create_mkey_mbox_in *in, int inlen,
+ mlx5_cmd_cbk_t callback, void *context,
+ struct mlx5_create_mkey_mbox_out *out);
int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr);
int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
struct mlx5_query_mkey_mbox_out *out, int outlen);
@@ -745,6 +754,11 @@ static inline u32 mlx5_idx_to_mkey(u32 mkey_idx)
return mkey_idx << 8;
}
+static inline u8 mlx5_mkey_variant(u32 mkey)
+{
+ return mkey & 0xff;
+}
+
enum {
MLX5_PROF_MASK_QP_SIZE = (u64)1 << 0,
MLX5_PROF_MASK_MR_CACHE = (u64)1 << 1,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index e393171e2fac..979874c627ee 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -67,12 +67,14 @@ enum rdma_node_type {
RDMA_NODE_IB_CA = 1,
RDMA_NODE_IB_SWITCH,
RDMA_NODE_IB_ROUTER,
- RDMA_NODE_RNIC
+ RDMA_NODE_RNIC,
+ RDMA_NODE_USNIC,
};
enum rdma_transport_type {
RDMA_TRANSPORT_IB,
- RDMA_TRANSPORT_IWARP
+ RDMA_TRANSPORT_IWARP,
+ RDMA_TRANSPORT_USNIC
};
enum rdma_transport_type
@@ -1436,6 +1438,7 @@ struct ib_device {
int uverbs_abi_ver;
u64 uverbs_cmd_mask;
+ u64 uverbs_ex_cmd_mask;
char node_desc[64];
__be64 node_guid;
@@ -2384,4 +2387,17 @@ struct ib_flow *ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr, int domain);
int ib_destroy_flow(struct ib_flow *flow_id);
+static inline int ib_check_mr_access(int flags)
+{
+ /*
+ * Local write permission is required if remote write or
+ * remote atomic permission is also requested.
+ */
+ if (flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
+ !(flags & IB_ACCESS_LOCAL_WRITE))
+ return -EINVAL;
+
+ return 0;
+}
+
#endif /* IB_VERBS_H */
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index e3ddd86c90a6..cbfdd4ca9510 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -87,10 +87,11 @@ enum {
IB_USER_VERBS_CMD_CLOSE_XRCD,
IB_USER_VERBS_CMD_CREATE_XSRQ,
IB_USER_VERBS_CMD_OPEN_QP,
-#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
- IB_USER_VERBS_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
- IB_USER_VERBS_CMD_DESTROY_FLOW
-#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
+};
+
+enum {
+ IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
+ IB_USER_VERBS_EX_CMD_DESTROY_FLOW
};
/*
@@ -122,22 +123,24 @@ struct ib_uverbs_comp_event_desc {
* the rest of the command struct based on these value.
*/
+#define IB_USER_VERBS_CMD_COMMAND_MASK 0xff
+#define IB_USER_VERBS_CMD_FLAGS_MASK 0xff000000u
+#define IB_USER_VERBS_CMD_FLAGS_SHIFT 24
+
+#define IB_USER_VERBS_CMD_FLAG_EXTENDED 0x80
+
struct ib_uverbs_cmd_hdr {
__u32 command;
__u16 in_words;
__u16 out_words;
};
-#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
-struct ib_uverbs_cmd_hdr_ex {
- __u32 command;
- __u16 in_words;
- __u16 out_words;
+struct ib_uverbs_ex_cmd_hdr {
+ __u64 response;
__u16 provider_in_words;
__u16 provider_out_words;
__u32 cmd_hdr_reserved;
};
-#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
struct ib_uverbs_get_context {
__u64 response;
@@ -700,62 +703,71 @@ struct ib_uverbs_detach_mcast {
__u64 driver_data[0];
};
-#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
-struct ib_kern_eth_filter {
+struct ib_uverbs_flow_spec_hdr {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ /* followed by flow_spec */
+ __u64 flow_spec_data[0];
+};
+
+struct ib_uverbs_flow_eth_filter {
__u8 dst_mac[6];
__u8 src_mac[6];
__be16 ether_type;
__be16 vlan_tag;
};
-struct ib_kern_spec_eth {
- __u32 type;
- __u16 size;
- __u16 reserved;
- struct ib_kern_eth_filter val;
- struct ib_kern_eth_filter mask;
+struct ib_uverbs_flow_spec_eth {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ struct ib_uverbs_flow_eth_filter val;
+ struct ib_uverbs_flow_eth_filter mask;
};
-struct ib_kern_ipv4_filter {
+struct ib_uverbs_flow_ipv4_filter {
__be32 src_ip;
__be32 dst_ip;
};
-struct ib_kern_spec_ipv4 {
- __u32 type;
- __u16 size;
- __u16 reserved;
- struct ib_kern_ipv4_filter val;
- struct ib_kern_ipv4_filter mask;
+struct ib_uverbs_flow_spec_ipv4 {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ struct ib_uverbs_flow_ipv4_filter val;
+ struct ib_uverbs_flow_ipv4_filter mask;
};
-struct ib_kern_tcp_udp_filter {
+struct ib_uverbs_flow_tcp_udp_filter {
__be16 dst_port;
__be16 src_port;
};
-struct ib_kern_spec_tcp_udp {
- __u32 type;
- __u16 size;
- __u16 reserved;
- struct ib_kern_tcp_udp_filter val;
- struct ib_kern_tcp_udp_filter mask;
-};
-
-struct ib_kern_spec {
+struct ib_uverbs_flow_spec_tcp_udp {
union {
+ struct ib_uverbs_flow_spec_hdr hdr;
struct {
__u32 type;
__u16 size;
__u16 reserved;
};
- struct ib_kern_spec_eth eth;
- struct ib_kern_spec_ipv4 ipv4;
- struct ib_kern_spec_tcp_udp tcp_udp;
};
+ struct ib_uverbs_flow_tcp_udp_filter val;
+ struct ib_uverbs_flow_tcp_udp_filter mask;
};
-struct ib_kern_flow_attr {
+struct ib_uverbs_flow_attr {
__u32 type;
__u16 size;
__u16 priority;
@@ -767,13 +779,13 @@ struct ib_kern_flow_attr {
* struct ib_flow_spec_xxx
* struct ib_flow_spec_yyy
*/
+ struct ib_uverbs_flow_spec_hdr flow_specs[0];
};
struct ib_uverbs_create_flow {
__u32 comp_mask;
- __u64 response;
__u32 qp_handle;
- struct ib_kern_flow_attr flow_attr;
+ struct ib_uverbs_flow_attr flow_attr;
};
struct ib_uverbs_create_flow_resp {
@@ -785,7 +797,6 @@ struct ib_uverbs_destroy_flow {
__u32 comp_mask;
__u32 flow_handle;
};
-#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
struct ib_uverbs_create_srq {
__u64 response;