summaryrefslogtreecommitdiff
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/Makefile1
-rw-r--r--drivers/infiniband/core/agent.c1
-rw-r--r--drivers/infiniband/core/cache.c166
-rw-r--r--drivers/infiniband/core/cgroup.c62
-rw-r--r--drivers/infiniband/core/cm.c74
-rw-r--r--drivers/infiniband/core/cma.c225
-rw-r--r--drivers/infiniband/core/cma_configfs.c42
-rw-r--r--drivers/infiniband/core/core_priv.h45
-rw-r--r--drivers/infiniband/core/cq.c16
-rw-r--r--drivers/infiniband/core/device.c45
-rw-r--r--drivers/infiniband/core/fmr_pool.c1
-rw-r--r--drivers/infiniband/core/iwcm.c21
-rw-r--r--drivers/infiniband/core/iwpm_msg.c1
-rw-r--r--drivers/infiniband/core/iwpm_util.c12
-rw-r--r--drivers/infiniband/core/mad.c50
-rw-r--r--drivers/infiniband/core/multicast.c7
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c85
-rw-r--r--drivers/infiniband/core/sysfs.c2
-rw-r--r--drivers/infiniband/core/ucm.c9
-rw-r--r--drivers/infiniband/core/ucma.c5
-rw-r--r--drivers/infiniband/core/umem.c10
-rw-r--r--drivers/infiniband/core/umem_odp.c96
-rw-r--r--drivers/infiniband/core/umem_rbtree.c21
-rw-r--r--drivers/infiniband/core/user_mad.c6
-rw-r--r--drivers/infiniband/core/uverbs.h2
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c386
-rw-r--r--drivers/infiniband/core/uverbs_main.c30
-rw-r--r--drivers/infiniband/core/verbs.c142
28 files changed, 1135 insertions, 428 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index edaae9f9853c..e426ac877d19 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -13,6 +13,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
multicast.o mad.o smi.o agent.o mad_rmpp.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o
+ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
ib_cm-y := cm.o
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 4fa524dfb6cf..11dacd97a667 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -156,7 +156,6 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
/* Create new device info */
port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
if (!port_priv) {
- dev_err(&device->dev, "No memory for ib_agent_port_private\n");
ret = -ENOMEM;
goto error1;
}
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 1a2984c28b95..b1371eb9f46c 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -314,14 +314,13 @@ static void make_default_gid(struct net_device *dev, union ib_gid *gid)
int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
int ix;
int ret = 0;
struct net_device *idev;
int empty;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
if (!memcmp(gid, &zgid, sizeof(*gid)))
return -EINVAL;
@@ -369,11 +368,10 @@ out_unlock:
int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
int ix;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
mutex_lock(&table->lock);
write_lock_irq(&table->rwlock);
@@ -399,12 +397,11 @@ out_unlock:
int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
struct net_device *ndev)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
int ix;
bool deleted = false;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
mutex_lock(&table->lock);
write_lock_irq(&table->rwlock);
@@ -428,10 +425,9 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
union ib_gid *gid, struct ib_gid_attr *attr)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
if (index < 0 || index >= table->sz)
return -EINVAL;
@@ -455,14 +451,13 @@ static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
unsigned long mask,
u8 *port, u16 *index)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
u8 p;
int local_index;
unsigned long flags;
for (p = 0; p < ib_dev->phys_port_cnt; p++) {
- table = ports_table[p];
+ table = ib_dev->cache.ports[p].gid;
read_lock_irqsave(&table->rwlock, flags);
local_index = find_gid(table, gid, val, false, mask, NULL);
if (local_index >= 0) {
@@ -503,18 +498,16 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
u16 *index)
{
int local_index;
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
unsigned long mask = GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_GID_TYPE;
struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
unsigned long flags;
- if (port < rdma_start_port(ib_dev) ||
- port > rdma_end_port(ib_dev))
+ if (!rdma_is_port_valid(ib_dev, port))
return -ENOENT;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
if (ndev)
mask |= GID_ATTR_FIND_MASK_NETDEV;
@@ -562,21 +555,17 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
void *context,
u16 *index)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
unsigned int i;
unsigned long flags;
bool found = false;
- if (!ports_table)
- return -EOPNOTSUPP;
- if (port < rdma_start_port(ib_dev) ||
- port > rdma_end_port(ib_dev) ||
+ if (!rdma_is_port_valid(ib_dev, port) ||
!rdma_protocol_roce(ib_dev, port))
return -EPROTONOSUPPORT;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
read_lock_irqsave(&table->rwlock, flags);
for (i = 0; i < table->sz; i++) {
@@ -668,14 +657,13 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
unsigned long gid_type_mask,
enum ib_cache_gid_default_mode mode)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
union ib_gid gid;
struct ib_gid_attr gid_attr;
struct ib_gid_attr zattr_type = zattr;
struct ib_gid_table *table;
unsigned int gid_type;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
make_default_gid(ndev, &gid);
memset(&gid_attr, 0, sizeof(gid_attr));
@@ -766,75 +754,64 @@ static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
static int _gid_table_setup_one(struct ib_device *ib_dev)
{
u8 port;
- struct ib_gid_table **table;
+ struct ib_gid_table *table;
int err = 0;
- table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL);
-
- if (!table) {
- pr_warn("failed to allocate ib gid cache for %s\n",
- ib_dev->name);
- return -ENOMEM;
- }
-
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
u8 rdma_port = port + rdma_start_port(ib_dev);
- table[port] =
+ table =
alloc_gid_table(
ib_dev->port_immutable[rdma_port].gid_tbl_len);
- if (!table[port]) {
+ if (!table) {
err = -ENOMEM;
goto rollback_table_setup;
}
err = gid_table_reserve_default(ib_dev,
port + rdma_start_port(ib_dev),
- table[port]);
+ table);
if (err)
goto rollback_table_setup;
+ ib_dev->cache.ports[port].gid = table;
}
- ib_dev->cache.gid_cache = table;
return 0;
rollback_table_setup:
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
+ table = ib_dev->cache.ports[port].gid;
+
cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
- table[port]);
- release_gid_table(table[port]);
+ table);
+ release_gid_table(table);
}
- kfree(table);
return err;
}
static void gid_table_release_one(struct ib_device *ib_dev)
{
- struct ib_gid_table **table = ib_dev->cache.gid_cache;
+ struct ib_gid_table *table;
u8 port;
- if (!table)
- return;
-
- for (port = 0; port < ib_dev->phys_port_cnt; port++)
- release_gid_table(table[port]);
-
- kfree(table);
- ib_dev->cache.gid_cache = NULL;
+ for (port = 0; port < ib_dev->phys_port_cnt; port++) {
+ table = ib_dev->cache.ports[port].gid;
+ release_gid_table(table);
+ ib_dev->cache.ports[port].gid = NULL;
+ }
}
static void gid_table_cleanup_one(struct ib_device *ib_dev)
{
- struct ib_gid_table **table = ib_dev->cache.gid_cache;
+ struct ib_gid_table *table;
u8 port;
- if (!table)
- return;
-
- for (port = 0; port < ib_dev->phys_port_cnt; port++)
+ for (port = 0; port < ib_dev->phys_port_cnt; port++) {
+ table = ib_dev->cache.ports[port].gid;
cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
- table[port]);
+ table);
+ }
}
static int gid_table_setup_one(struct ib_device *ib_dev)
@@ -864,12 +841,12 @@ int ib_get_cached_gid(struct ib_device *device,
{
int res;
unsigned long flags;
- struct ib_gid_table **ports_table = device->cache.gid_cache;
- struct ib_gid_table *table = ports_table[port_num - rdma_start_port(device)];
+ struct ib_gid_table *table;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
+ table = device->cache.ports[port_num - rdma_start_port(device)].gid;
read_lock_irqsave(&table->rwlock, flags);
res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
read_unlock_irqrestore(&table->rwlock, flags);
@@ -916,12 +893,12 @@ int ib_get_cached_pkey(struct ib_device *device,
unsigned long flags;
int ret = 0;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
+ cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
if (index < 0 || index >= cache->table_len)
ret = -EINVAL;
@@ -945,12 +922,12 @@ int ib_find_cached_pkey(struct ib_device *device,
int ret = -ENOENT;
int partial_ix = -1;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
+ cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
*index = -1;
@@ -985,12 +962,12 @@ int ib_find_exact_cached_pkey(struct ib_device *device,
int i;
int ret = -ENOENT;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
+ cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
*index = -1;
@@ -1014,17 +991,36 @@ int ib_get_cached_lmc(struct ib_device *device,
unsigned long flags;
int ret = 0;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- *lmc = device->cache.lmc_cache[port_num - rdma_start_port(device)];
+ *lmc = device->cache.ports[port_num - rdma_start_port(device)].lmc;
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_get_cached_lmc);
+int ib_get_cached_port_state(struct ib_device *device,
+ u8 port_num,
+ enum ib_port_state *port_state)
+{
+ unsigned long flags;
+ int ret = 0;
+
+ if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ return -EINVAL;
+
+ read_lock_irqsave(&device->cache.lock, flags);
+ *port_state = device->cache.ports[port_num
+ - rdma_start_port(device)].port_state;
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_get_cached_port_state);
+
static void ib_cache_update(struct ib_device *device,
u8 port)
{
@@ -1037,14 +1033,13 @@ static void ib_cache_update(struct ib_device *device,
int i;
int ret;
struct ib_gid_table *table;
- struct ib_gid_table **ports_table = device->cache.gid_cache;
bool use_roce_gid_table =
rdma_cap_roce_gid_table(device, port);
- if (port < rdma_start_port(device) || port > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port))
return;
- table = ports_table[port - rdma_start_port(device)];
+ table = device->cache.ports[port - rdma_start_port(device)].gid;
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops)
@@ -1096,9 +1091,10 @@ static void ib_cache_update(struct ib_device *device,
write_lock_irq(&device->cache.lock);
- old_pkey_cache = device->cache.pkey_cache[port - rdma_start_port(device)];
+ old_pkey_cache = device->cache.ports[port -
+ rdma_start_port(device)].pkey;
- device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache;
+ device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache;
if (!use_roce_gid_table) {
write_lock(&table->rwlock);
for (i = 0; i < gid_cache->table_len; i++) {
@@ -1108,7 +1104,9 @@ static void ib_cache_update(struct ib_device *device,
write_unlock(&table->rwlock);
}
- device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc;
+ device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc;
+ device->cache.ports[port - rdma_start_port(device)].port_state =
+ tprops->state;
write_unlock_irq(&device->cache.lock);
@@ -1161,23 +1159,17 @@ int ib_cache_setup_one(struct ib_device *device)
rwlock_init(&device->cache.lock);
- device->cache.pkey_cache =
- kzalloc(sizeof *device->cache.pkey_cache *
+ device->cache.ports =
+ kzalloc(sizeof(*device->cache.ports) *
(rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL);
- device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache *
- (rdma_end_port(device) -
- rdma_start_port(device) + 1),
- GFP_KERNEL);
- if (!device->cache.pkey_cache ||
- !device->cache.lmc_cache) {
- pr_warn("Couldn't allocate cache for %s\n", device->name);
- return -ENOMEM;
+ if (!device->cache.ports) {
+ err = -ENOMEM;
+ goto out;
}
err = gid_table_setup_one(device);
if (err)
- /* Allocated memory will be cleaned in the release function */
- return err;
+ goto out;
for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
ib_cache_update(device, p + rdma_start_port(device));
@@ -1192,6 +1184,7 @@ int ib_cache_setup_one(struct ib_device *device)
err:
gid_table_cleanup_one(device);
+out:
return err;
}
@@ -1205,14 +1198,11 @@ void ib_cache_release_one(struct ib_device *device)
* all the device's resources when the cache could no
* longer be accessed.
*/
- if (device->cache.pkey_cache)
- for (p = 0;
- p <= rdma_end_port(device) - rdma_start_port(device); ++p)
- kfree(device->cache.pkey_cache[p]);
+ for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
+ kfree(device->cache.ports[p].pkey);
gid_table_release_one(device);
- kfree(device->cache.pkey_cache);
- kfree(device->cache.lmc_cache);
+ kfree(device->cache.ports);
}
void ib_cache_cleanup_one(struct ib_device *device)
diff --git a/drivers/infiniband/core/cgroup.c b/drivers/infiniband/core/cgroup.c
new file mode 100644
index 000000000000..126ac5f99db7
--- /dev/null
+++ b/drivers/infiniband/core/cgroup.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include "core_priv.h"
+
+/**
+ * ib_device_register_rdmacg - register with rdma cgroup.
+ * @device: device to register to participate in resource
+ * accounting by rdma cgroup.
+ *
+ * Register with the rdma cgroup. Should be called before
+ * exposing rdma device to user space applications to avoid
+ * resource accounting leak.
+ * Returns 0 on success or otherwise failure code.
+ */
+int ib_device_register_rdmacg(struct ib_device *device)
+{
+ device->cg_device.name = device->name;
+ return rdmacg_register_device(&device->cg_device);
+}
+
+/**
+ * ib_device_unregister_rdmacg - unregister with rdma cgroup.
+ * @device: device to unregister.
+ *
+ * Unregister with the rdma cgroup. Should be called after
+ * all the resources are deallocated, and after a stage when any
+ * other resource allocation by user application cannot be done
+ * for this device to avoid any leak in accounting.
+ */
+void ib_device_unregister_rdmacg(struct ib_device *device)
+{
+ rdmacg_unregister_device(&device->cg_device);
+}
+
+int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{
+ return rdmacg_try_charge(&cg_obj->cg, &device->cg_device,
+ resource_index);
+}
+EXPORT_SYMBOL(ib_rdmacg_try_charge);
+
+void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{
+ rdmacg_uncharge(cg_obj->cg, &device->cg_device,
+ resource_index);
+}
+EXPORT_SYMBOL(ib_rdmacg_uncharge);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 71c7c4c328ef..6535f09dc575 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -57,6 +57,54 @@ MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("InfiniBand CM");
MODULE_LICENSE("Dual BSD/GPL");
+static const char * const ibcm_rej_reason_strs[] = {
+ [IB_CM_REJ_NO_QP] = "no QP",
+ [IB_CM_REJ_NO_EEC] = "no EEC",
+ [IB_CM_REJ_NO_RESOURCES] = "no resources",
+ [IB_CM_REJ_TIMEOUT] = "timeout",
+ [IB_CM_REJ_UNSUPPORTED] = "unsupported",
+ [IB_CM_REJ_INVALID_COMM_ID] = "invalid comm ID",
+ [IB_CM_REJ_INVALID_COMM_INSTANCE] = "invalid comm instance",
+ [IB_CM_REJ_INVALID_SERVICE_ID] = "invalid service ID",
+ [IB_CM_REJ_INVALID_TRANSPORT_TYPE] = "invalid transport type",
+ [IB_CM_REJ_STALE_CONN] = "stale conn",
+ [IB_CM_REJ_RDC_NOT_EXIST] = "RDC not exist",
+ [IB_CM_REJ_INVALID_GID] = "invalid GID",
+ [IB_CM_REJ_INVALID_LID] = "invalid LID",
+ [IB_CM_REJ_INVALID_SL] = "invalid SL",
+ [IB_CM_REJ_INVALID_TRAFFIC_CLASS] = "invalid traffic class",
+ [IB_CM_REJ_INVALID_HOP_LIMIT] = "invalid hop limit",
+ [IB_CM_REJ_INVALID_PACKET_RATE] = "invalid packet rate",
+ [IB_CM_REJ_INVALID_ALT_GID] = "invalid alt GID",
+ [IB_CM_REJ_INVALID_ALT_LID] = "invalid alt LID",
+ [IB_CM_REJ_INVALID_ALT_SL] = "invalid alt SL",
+ [IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS] = "invalid alt traffic class",
+ [IB_CM_REJ_INVALID_ALT_HOP_LIMIT] = "invalid alt hop limit",
+ [IB_CM_REJ_INVALID_ALT_PACKET_RATE] = "invalid alt packet rate",
+ [IB_CM_REJ_PORT_CM_REDIRECT] = "port CM redirect",
+ [IB_CM_REJ_PORT_REDIRECT] = "port redirect",
+ [IB_CM_REJ_INVALID_MTU] = "invalid MTU",
+ [IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES] = "insufficient resp resources",
+ [IB_CM_REJ_CONSUMER_DEFINED] = "consumer defined",
+ [IB_CM_REJ_INVALID_RNR_RETRY] = "invalid RNR retry",
+ [IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID] = "duplicate local comm ID",
+ [IB_CM_REJ_INVALID_CLASS_VERSION] = "invalid class version",
+ [IB_CM_REJ_INVALID_FLOW_LABEL] = "invalid flow label",
+ [IB_CM_REJ_INVALID_ALT_FLOW_LABEL] = "invalid alt flow label",
+};
+
+const char *__attribute_const__ ibcm_reject_msg(int reason)
+{
+ size_t index = reason;
+
+ if (index < ARRAY_SIZE(ibcm_rej_reason_strs) &&
+ ibcm_rej_reason_strs[index])
+ return ibcm_rej_reason_strs[index];
+ else
+ return "unrecognized reason";
+}
+EXPORT_SYMBOL(ibcm_reject_msg);
+
static void cm_add_one(struct ib_device *device);
static void cm_remove_one(struct ib_device *device, void *client_data);
@@ -1582,6 +1630,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
struct cm_timewait_info *timewait_info;
struct cm_req_msg *req_msg;
+ struct ib_cm_id *cm_id;
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1603,10 +1652,18 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
if (timewait_info) {
cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
+ timewait_info->work.remote_id);
+
spin_unlock_irq(&cm.lock);
cm_issue_rej(work->port, work->mad_recv_wc,
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
NULL, 0);
+ if (cur_cm_id_priv) {
+ cm_id = &cur_cm_id_priv->id;
+ ib_send_cm_dreq(cm_id, NULL, 0);
+ cm_deref_id(cur_cm_id_priv);
+ }
return NULL;
}
@@ -1984,6 +2041,9 @@ static int cm_rep_handler(struct cm_work *work)
struct cm_id_private *cm_id_priv;
struct cm_rep_msg *rep_msg;
int ret;
+ struct cm_id_private *cur_cm_id_priv;
+ struct ib_cm_id *cm_id;
+ struct cm_timewait_info *timewait_info;
rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
@@ -2018,16 +2078,26 @@ static int cm_rep_handler(struct cm_work *work)
goto error;
}
/* Check for a stale connection. */
- if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) {
+ timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
+ if (timewait_info) {
rb_erase(&cm_id_priv->timewait_info->remote_id_node,
&cm.remote_id_table);
cm_id_priv->timewait_info->inserted_remote_id = 0;
+ cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
+ timewait_info->work.remote_id);
+
spin_unlock(&cm.lock);
spin_unlock_irq(&cm_id_priv->lock);
cm_issue_rej(work->port, work->mad_recv_wc,
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
NULL, 0);
ret = -EINVAL;
+ if (cur_cm_id_priv) {
+ cm_id = &cur_cm_id_priv->id;
+ ib_send_cm_dreq(cm_id, NULL, 0);
+ cm_deref_id(cur_cm_id_priv);
+ }
+
goto error;
}
spin_unlock(&cm.lock);
@@ -3339,6 +3409,8 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
goto discard;
+ pr_debug_ratelimited("CM: failed sending MAD in state %d. (%s)\n",
+ state, ib_wc_status_msg(wc_status));
switch (state) {
case IB_CM_REQ_SENT:
case IB_CM_MRA_REQ_RCVD:
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 2a6fc47a1dfb..acd10d666f1c 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -101,6 +101,49 @@ const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
}
EXPORT_SYMBOL(rdma_event_msg);
+const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id,
+ int reason)
+{
+ if (rdma_ib_or_roce(id->device, id->port_num))
+ return ibcm_reject_msg(reason);
+
+ if (rdma_protocol_iwarp(id->device, id->port_num))
+ return iwcm_reject_msg(reason);
+
+ WARN_ON_ONCE(1);
+ return "unrecognized transport";
+}
+EXPORT_SYMBOL(rdma_reject_msg);
+
+bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
+{
+ if (rdma_ib_or_roce(id->device, id->port_num))
+ return reason == IB_CM_REJ_CONSUMER_DEFINED;
+
+ if (rdma_protocol_iwarp(id->device, id->port_num))
+ return reason == -ECONNREFUSED;
+
+ WARN_ON_ONCE(1);
+ return false;
+}
+EXPORT_SYMBOL(rdma_is_consumer_reject);
+
+const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
+ struct rdma_cm_event *ev, u8 *data_len)
+{
+ const void *p;
+
+ if (rdma_is_consumer_reject(id, ev->status)) {
+ *data_len = ev->param.conn.private_data_len;
+ p = ev->param.conn.private_data;
+ } else {
+ *data_len = 0;
+ p = NULL;
+ }
+ return p;
+}
+EXPORT_SYMBOL(rdma_consumer_reject_data);
+
static void cma_add_one(struct ib_device *device);
static void cma_remove_one(struct ib_device *device, void *client_data);
@@ -116,7 +159,7 @@ static LIST_HEAD(dev_list);
static LIST_HEAD(listen_any_list);
static DEFINE_MUTEX(lock);
static struct workqueue_struct *cma_wq;
-static int cma_pernet_id;
+static unsigned int cma_pernet_id;
struct cma_pernet {
struct idr tcp_ps;
@@ -155,6 +198,7 @@ struct cma_device {
atomic_t refcount;
struct list_head id_list;
enum ib_gid_type *default_gid_type;
+ u8 *default_roce_tos;
};
struct rdma_bind_list {
@@ -226,8 +270,7 @@ struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
int cma_get_default_gid_type(struct cma_device *cma_dev,
unsigned int port)
{
- if (port < rdma_start_port(cma_dev->device) ||
- port > rdma_end_port(cma_dev->device))
+ if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)];
@@ -239,8 +282,7 @@ int cma_set_default_gid_type(struct cma_device *cma_dev,
{
unsigned long supported_gids;
- if (port < rdma_start_port(cma_dev->device) ||
- port > rdma_end_port(cma_dev->device))
+ if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
@@ -254,6 +296,25 @@ int cma_set_default_gid_type(struct cma_device *cma_dev,
return 0;
}
+int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port)
+{
+ if (!rdma_is_port_valid(cma_dev->device, port))
+ return -EINVAL;
+
+ return cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)];
+}
+
+int cma_set_default_roce_tos(struct cma_device *cma_dev, unsigned int port,
+ u8 default_roce_tos)
+{
+ if (!rdma_is_port_valid(cma_dev->device, port))
+ return -EINVAL;
+
+ cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)] =
+ default_roce_tos;
+
+ return 0;
+}
struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
{
return cma_dev->device;
@@ -300,6 +361,7 @@ struct rdma_id_private {
u32 options;
u8 srq;
u8 tos;
+ bool tos_set;
u8 reuseaddr;
u8 afonly;
enum ib_gid_type gid_type;
@@ -666,6 +728,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
union ib_gid gid, sgid, *dgid;
u16 pkey, index;
u8 p;
+ enum ib_port_state port_state;
int i;
cma_dev = NULL;
@@ -681,6 +744,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
continue;
+ if (ib_get_cached_port_state(cur_dev->device, p, &port_state))
+ continue;
for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i,
&gid, NULL);
i++) {
@@ -692,7 +757,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
}
if (!cma_dev && (gid.global.subnet_prefix ==
- dgid->global.subnet_prefix)) {
+ dgid->global.subnet_prefix) &&
+ port_state == IB_PORT_ACTIVE) {
cma_dev = cur_dev;
sgid = gid;
id_priv->id.port_num = p;
@@ -735,6 +801,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
id_priv->id.event_handler = event_handler;
id_priv->id.ps = ps;
id_priv->id.qp_type = qp_type;
+ id_priv->tos_set = false;
spin_lock_init(&id_priv->lock);
mutex_init(&id_priv->qp_mutex);
init_completion(&id_priv->comp);
@@ -1646,6 +1713,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
return 0;
reject:
+ pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret);
cma_modify_qp_err(id_priv);
ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
NULL, 0, NULL, 0);
@@ -1717,6 +1785,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
/* ignore event */
goto out;
case IB_CM_REJ_RECEIVED:
+ pr_debug_ratelimited("RDMA CM: REJECTED: %s\n", rdma_reject_msg(&id_priv->id,
+ ib_event->param.rej_rcvd.reason));
cma_modify_qp_err(id_priv);
event.status = ib_event->param.rej_rcvd.reason;
event.event = RDMA_CM_EVENT_REJECTED;
@@ -2223,6 +2293,7 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos)
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->tos = (u8) tos;
+ id_priv->tos_set = true;
}
EXPORT_SYMBOL(rdma_set_service_type);
@@ -2242,6 +2313,8 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
work->new_state = RDMA_CM_ADDR_RESOLVED;
work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
work->event.status = status;
+ pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n",
+ status);
}
queue_work(cma_wq, &work->work);
@@ -2424,14 +2497,12 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos)
struct net_device *dev;
prio = rt_tos2priority(tos);
- dev = ndev->priv_flags & IFF_802_1Q_VLAN ?
- vlan_dev_real_dev(ndev) : ndev;
-
+ dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev;
if (dev->num_tc)
return netdev_get_prio_tc_map(dev, prio);
#if IS_ENABLED(CONFIG_VLAN_8021Q)
- if (ndev->priv_flags & IFF_802_1Q_VLAN)
+ if (is_vlan_dev(ndev))
return (vlan_dev_get_egress_qos_mask(ndev, prio) &
VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
#endif
@@ -2457,6 +2528,9 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
struct cma_work *work;
int ret;
struct net_device *ndev = NULL;
+ u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
+ rdma_start_port(id_priv->cma_dev->device)];
+ u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
work = kzalloc(sizeof *work, GFP_KERNEL);
@@ -2530,7 +2604,8 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->path_rec->reversible = 1;
route->path_rec->pkey = cpu_to_be16(0xffff);
route->path_rec->mtu_selector = IB_SA_EQ;
- route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos);
+ route->path_rec->sl = iboe_tos_to_sl(ndev, tos);
+ route->path_rec->traffic_class = tos;
route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
route->path_rec->rate_selector = IB_SA_EQ;
route->path_rec->rate = iboe_get_rate(ndev);
@@ -2609,8 +2684,8 @@ static void cma_set_loopback(struct sockaddr *addr)
static int cma_bind_loopback(struct rdma_id_private *id_priv)
{
struct cma_device *cma_dev, *cur_dev;
- struct ib_port_attr port_attr;
union ib_gid gid;
+ enum ib_port_state port_state;
u16 pkey;
int ret;
u8 p;
@@ -2626,8 +2701,8 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
cma_dev = cur_dev;
for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
- if (!ib_query_port(cur_dev->device, p, &port_attr) &&
- port_attr.state == IB_PORT_ACTIVE) {
+ if (!ib_get_cached_port_state(cur_dev->device, p, &port_state) &&
+ port_state == IB_PORT_ACTIVE) {
cma_dev = cur_dev;
goto port_found;
}
@@ -2677,8 +2752,14 @@ static void addr_handler(int status, struct sockaddr *src_addr,
goto out;
memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
- if (!status && !id_priv->cma_dev)
+ if (!status && !id_priv->cma_dev) {
status = cma_acquire_dev(id_priv, NULL);
+ if (status)
+ pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
+ status);
+ } else {
+ pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status);
+ }
if (status) {
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
@@ -2768,7 +2849,8 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
if (!src_addr || !src_addr->sa_family) {
src_addr = (struct sockaddr *) &id->route.addr.src_addr;
src_addr->sa_family = dst_addr->sa_family;
- if (dst_addr->sa_family == AF_INET6) {
+ if (IS_ENABLED(CONFIG_IPV6) &&
+ dst_addr->sa_family == AF_INET6) {
struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr;
struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr;
src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
@@ -2789,20 +2871,26 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
+ memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
if (id_priv->state == RDMA_CM_IDLE) {
ret = cma_bind_addr(id, src_addr, dst_addr);
- if (ret)
+ if (ret) {
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
return ret;
+ }
}
- if (cma_family(id_priv) != dst_addr->sa_family)
+ if (cma_family(id_priv) != dst_addr->sa_family) {
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
return -EINVAL;
+ }
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
return -EINVAL;
+ }
atomic_inc(&id_priv->refcount);
- memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
if (cma_any_addr(dst_addr)) {
ret = cma_resolve_loopback(id_priv);
} else {
@@ -2918,6 +3006,43 @@ err:
return ret == -ENOSPC ? -EADDRNOTAVAIL : ret;
}
+static int cma_port_is_unique(struct rdma_bind_list *bind_list,
+ struct rdma_id_private *id_priv)
+{
+ struct rdma_id_private *cur_id;
+ struct sockaddr *daddr = cma_dst_addr(id_priv);
+ struct sockaddr *saddr = cma_src_addr(id_priv);
+ __be16 dport = cma_port(daddr);
+
+ hlist_for_each_entry(cur_id, &bind_list->owners, node) {
+ struct sockaddr *cur_daddr = cma_dst_addr(cur_id);
+ struct sockaddr *cur_saddr = cma_src_addr(cur_id);
+ __be16 cur_dport = cma_port(cur_daddr);
+
+ if (id_priv == cur_id)
+ continue;
+
+ /* different dest port -> unique */
+ if (!cma_any_port(cur_daddr) &&
+ (dport != cur_dport))
+ continue;
+
+ /* different src address -> unique */
+ if (!cma_any_addr(saddr) &&
+ !cma_any_addr(cur_saddr) &&
+ cma_addr_cmp(saddr, cur_saddr))
+ continue;
+
+ /* different dst address -> unique */
+ if (!cma_any_addr(cur_daddr) &&
+ cma_addr_cmp(daddr, cur_daddr))
+ continue;
+
+ return -EADDRNOTAVAIL;
+ }
+ return 0;
+}
+
static int cma_alloc_any_port(enum rdma_port_space ps,
struct rdma_id_private *id_priv)
{
@@ -2930,9 +3055,19 @@ static int cma_alloc_any_port(enum rdma_port_space ps,
remaining = (high - low) + 1;
rover = prandom_u32() % remaining + low;
retry:
- if (last_used_port != rover &&
- !cma_ps_find(net, ps, (unsigned short)rover)) {
- int ret = cma_alloc_port(ps, id_priv, rover);
+ if (last_used_port != rover) {
+ struct rdma_bind_list *bind_list;
+ int ret;
+
+ bind_list = cma_ps_find(net, ps, (unsigned short)rover);
+
+ if (!bind_list) {
+ ret = cma_alloc_port(ps, id_priv, rover);
+ } else {
+ ret = cma_port_is_unique(bind_list, id_priv);
+ if (!ret)
+ cma_bind_port(bind_list, id_priv);
+ }
/*
* Remember previously used port number in order to avoid
* re-using same port immediately after it is closed.
@@ -3161,6 +3296,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
{
struct rdma_id_private *id_priv;
int ret;
+ struct sockaddr *daddr;
if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 &&
addr->sa_family != AF_IB)
@@ -3200,6 +3336,9 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (ret)
goto err2;
+ daddr = cma_dst_addr(id_priv);
+ daddr->sa_family = addr->sa_family;
+
return 0;
err2:
if (id_priv->cma_dev)
@@ -3264,10 +3403,13 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
if (rep->status != IB_SIDR_SUCCESS) {
event.event = RDMA_CM_EVENT_UNREACHABLE;
event.status = ib_event->param.sidr_rep_rcvd.status;
+ pr_debug_ratelimited("RDMA CM: UNREACHABLE: bad SIDR reply. status %d\n",
+ event.status);
break;
}
ret = cma_set_qkey(id_priv, rep->qkey);
if (ret) {
+ pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to set qkey. status %d\n", ret);
event.event = RDMA_CM_EVENT_ADDR_ERROR;
event.status = ret;
break;
@@ -3539,6 +3681,9 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
struct iw_cm_conn_param iw_param;
int ret;
+ if (!conn_param)
+ return -EINVAL;
+
ret = cma_modify_qp_rtr(id_priv, conn_param);
if (ret)
return ret;
@@ -3716,10 +3861,17 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
if (!status)
status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
+ else
+ pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
+ status);
mutex_lock(&id_priv->qp_mutex);
- if (!status && id_priv->id.qp)
+ if (!status && id_priv->id.qp) {
status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
be16_to_cpu(multicast->rec.mlid));
+ if (status)
+ pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n",
+ status);
+ }
mutex_unlock(&id_priv->qp_mutex);
memset(&event, 0, sizeof event);
@@ -4185,15 +4337,21 @@ static void cma_add_one(struct ib_device *device)
cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
sizeof(*cma_dev->default_gid_type),
GFP_KERNEL);
- if (!cma_dev->default_gid_type) {
- kfree(cma_dev);
- return;
- }
+ if (!cma_dev->default_gid_type)
+ goto free_cma_dev;
+
+ cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt,
+ sizeof(*cma_dev->default_roce_tos),
+ GFP_KERNEL);
+ if (!cma_dev->default_roce_tos)
+ goto free_gid_type;
+
for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
supported_gids = roce_gid_type_mask_support(device, i);
WARN_ON(!supported_gids);
cma_dev->default_gid_type[i - rdma_start_port(device)] =
find_first_bit(&supported_gids, BITS_PER_LONG);
+ cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0;
}
init_completion(&cma_dev->comp);
@@ -4206,6 +4364,16 @@ static void cma_add_one(struct ib_device *device)
list_for_each_entry(id_priv, &listen_any_list, list)
cma_listen_on_dev(id_priv, cma_dev);
mutex_unlock(&lock);
+
+ return;
+
+free_gid_type:
+ kfree(cma_dev->default_gid_type);
+
+free_cma_dev:
+ kfree(cma_dev);
+
+ return;
}
static int cma_remove_id_dev(struct rdma_id_private *id_priv)
@@ -4274,6 +4442,7 @@ static void cma_remove_one(struct ib_device *device, void *client_data)
mutex_unlock(&lock);
cma_process_remove(cma_dev);
+ kfree(cma_dev->default_roce_tos);
kfree(cma_dev->default_gid_type);
kfree(cma_dev);
}
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 41573df1d9fc..54076a3e8007 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -139,8 +139,50 @@ static ssize_t default_roce_mode_store(struct config_item *item,
CONFIGFS_ATTR(, default_roce_mode);
+static ssize_t default_roce_tos_show(struct config_item *item, char *buf)
+{
+ struct cma_device *cma_dev;
+ struct cma_dev_port_group *group;
+ ssize_t ret;
+ u8 tos;
+
+ ret = cma_configfs_params_get(item, &cma_dev, &group);
+ if (ret)
+ return ret;
+
+ tos = cma_get_default_roce_tos(cma_dev, group->port_num);
+ cma_configfs_params_put(cma_dev);
+
+ return sprintf(buf, "%u\n", tos);
+}
+
+static ssize_t default_roce_tos_store(struct config_item *item,
+ const char *buf, size_t count)
+{
+ struct cma_device *cma_dev;
+ struct cma_dev_port_group *group;
+ ssize_t ret;
+ u8 tos;
+
+ ret = kstrtou8(buf, 0, &tos);
+ if (ret)
+ return ret;
+
+ ret = cma_configfs_params_get(item, &cma_dev, &group);
+ if (ret)
+ return ret;
+
+ ret = cma_set_default_roce_tos(cma_dev, group->port_num, tos);
+ cma_configfs_params_put(cma_dev);
+
+ return ret ? ret : strnlen(buf, count);
+}
+
+CONFIGFS_ATTR(, default_roce_tos);
+
static struct configfs_attribute *cma_configfs_attributes[] = {
&attr_default_roce_mode,
+ &attr_default_roce_tos,
NULL,
};
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 19d499dcab76..cb7d372e4bdf 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -35,6 +35,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
+#include <linux/cgroup_rdma.h>
#include <rdma/ib_verbs.h>
@@ -62,6 +63,9 @@ int cma_get_default_gid_type(struct cma_device *cma_dev,
int cma_set_default_gid_type(struct cma_device *cma_dev,
unsigned int port,
enum ib_gid_type default_gid_type);
+int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port);
+int cma_set_default_roce_tos(struct cma_device *a_dev, unsigned int port,
+ u8 default_roce_tos);
struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev);
int ib_device_register_sysfs(struct ib_device *device,
@@ -72,9 +76,6 @@ void ib_device_unregister_sysfs(struct ib_device *device);
void ib_cache_setup(void);
void ib_cache_cleanup(void);
-int ib_resolve_eth_dmac(struct ib_qp *qp,
- struct ib_qp_attr *qp_attr, int *qp_attr_mask);
-
typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
struct net_device *idev, void *cookie);
@@ -124,17 +125,39 @@ int ib_cache_setup_one(struct ib_device *device);
void ib_cache_cleanup_one(struct ib_device *device);
void ib_cache_release_one(struct ib_device *device);
+#ifdef CONFIG_CGROUP_RDMA
+int ib_device_register_rdmacg(struct ib_device *device);
+void ib_device_unregister_rdmacg(struct ib_device *device);
+
+int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index);
+
+void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index);
+#else
+static inline int ib_device_register_rdmacg(struct ib_device *device)
+{ return 0; }
+
+static inline void ib_device_unregister_rdmacg(struct ib_device *device)
+{ }
+
+static inline int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{ return 0; }
+
+static inline void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{ }
+#endif
+
static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
struct net_device *upper)
{
- struct net_device *_upper = NULL;
- struct list_head *iter;
-
- netdev_for_each_all_upper_dev_rcu(dev, _upper, iter)
- if (_upper == upper)
- break;
-
- return _upper == upper;
+ return netdev_has_upper_dev_all_rcu(dev, upper);
}
int addr_init(void);
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index a754fc727de5..f2ae75fa3128 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -29,7 +29,13 @@ static int __ib_process_cq(struct ib_cq *cq, int budget)
{
int i, n, completed = 0;
- while ((n = ib_poll_cq(cq, IB_POLL_BATCH, cq->wc)) > 0) {
+ /*
+ * budget might be (-1) if the caller does not
+ * want to bound this call, thus we need unsigned
+ * minimum here.
+ */
+ while ((n = ib_poll_cq(cq, min_t(u32, IB_POLL_BATCH,
+ budget - completed), cq->wc)) > 0) {
for (i = 0; i < n; i++) {
struct ib_wc *wc = &cq->wc[i];
@@ -58,8 +64,8 @@ static int __ib_process_cq(struct ib_cq *cq, int budget)
* %IB_POLL_DIRECT CQ. It does not offload CQ processing to a different
* context and does not ask for completion interrupts from the HCA.
*
- * Note: for compatibility reasons -1 can be passed in %budget for unlimited
- * polling. Do not use this feature in new code, it will be removed soon.
+ * Note: do not pass -1 as %budget unless it is guaranteed that the number
+ * of completions that will be processed is small.
*/
int ib_process_cq_direct(struct ib_cq *cq, int budget)
{
@@ -120,7 +126,7 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
*
* This is the proper interface to allocate a CQ for in-kernel users. A
* CQ allocated with this interface will automatically be polled from the
- * specified context. The ULP needs must use wr->wr_cqe instead of wr->wr_id
+ * specified context. The ULP must use wr->wr_cqe instead of wr->wr_id
* to use this CQ abstraction.
*/
struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
@@ -196,7 +202,7 @@ void ib_free_cq(struct ib_cq *cq)
irq_poll_disable(&cq->iop);
break;
case IB_POLL_WORKQUEUE:
- flush_work(&cq->work);
+ cancel_work_sync(&cq->work);
break;
default:
WARN_ON_ONCE(1);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 760ef603a468..7c9e34d679d3 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -254,11 +254,8 @@ static int add_client_context(struct ib_device *device, struct ib_client *client
unsigned long flags;
context = kmalloc(sizeof *context, GFP_KERNEL);
- if (!context) {
- pr_warn("Couldn't allocate client context for %s/%s\n",
- device->name, client->name);
+ if (!context)
return -ENOMEM;
- }
context->client = client;
context->data = NULL;
@@ -336,6 +333,29 @@ int ib_register_device(struct ib_device *device,
int ret;
struct ib_client *client;
struct ib_udata uhw = {.outlen = 0, .inlen = 0};
+ struct device *parent = device->dev.parent;
+
+ WARN_ON_ONCE(!parent);
+ WARN_ON_ONCE(device->dma_device);
+ if (device->dev.dma_ops) {
+ /*
+ * The caller provided custom DMA operations. Copy the
+ * DMA-related fields that are used by e.g. dma_alloc_coherent()
+ * into device->dev.
+ */
+ device->dma_device = &device->dev;
+ if (!device->dev.dma_mask)
+ device->dev.dma_mask = parent->dma_mask;
+ if (!device->dev.coherent_dma_mask)
+ device->dev.coherent_dma_mask =
+ parent->coherent_dma_mask;
+ } else {
+ /*
+ * The caller did not provide custom DMA operations. Use the
+ * DMA mapping operations of the parent device.
+ */
+ device->dma_device = parent;
+ }
mutex_lock(&device_mutex);
@@ -363,10 +383,18 @@ int ib_register_device(struct ib_device *device,
goto out;
}
+ ret = ib_device_register_rdmacg(device);
+ if (ret) {
+ pr_warn("Couldn't register device with rdma cgroup\n");
+ ib_cache_cleanup_one(device);
+ goto out;
+ }
+
memset(&device->attrs, 0, sizeof(device->attrs));
ret = device->query_device(device, &device->attrs, &uhw);
if (ret) {
pr_warn("Couldn't query the device attributes\n");
+ ib_device_unregister_rdmacg(device);
ib_cache_cleanup_one(device);
goto out;
}
@@ -375,6 +403,7 @@ int ib_register_device(struct ib_device *device,
if (ret) {
pr_warn("Couldn't register device %s with driver model\n",
device->name);
+ ib_device_unregister_rdmacg(device);
ib_cache_cleanup_one(device);
goto out;
}
@@ -424,6 +453,7 @@ void ib_unregister_device(struct ib_device *device)
mutex_unlock(&device_mutex);
+ ib_device_unregister_rdmacg(device);
ib_device_unregister_sysfs(device);
ib_cache_cleanup_one(device);
@@ -662,7 +692,7 @@ int ib_query_port(struct ib_device *device,
union ib_gid gid;
int err;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
memset(port_attr, 0, sizeof(*port_attr));
@@ -828,7 +858,7 @@ int ib_modify_port(struct ib_device *device,
if (!device->modify_port)
return -ENOSYS;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
return device->modify_port(device, port_num, port_modify_mask,
@@ -999,8 +1029,7 @@ static int __init ib_core_init(void)
return -ENOMEM;
ib_comp_wq = alloc_workqueue("ib-comp-wq",
- WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
- WQ_UNBOUND_MAX_ACTIVE);
+ WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
if (!ib_comp_wq) {
ret = -ENOMEM;
goto err;
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index cdbb1f1a6d97..cdfad5f26212 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -247,7 +247,6 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket,
GFP_KERNEL);
if (!pool->cache_bucket) {
- pr_warn(PFX "Failed to allocate cache in pool\n");
ret = -ENOMEM;
goto out_free_pool;
}
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 5495e22839a7..31661b5c1743 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -59,6 +59,27 @@ MODULE_AUTHOR("Tom Tucker");
MODULE_DESCRIPTION("iWARP CM");
MODULE_LICENSE("Dual BSD/GPL");
+static const char * const iwcm_rej_reason_strs[] = {
+ [ECONNRESET] = "reset by remote host",
+ [ECONNREFUSED] = "refused by remote application",
+ [ETIMEDOUT] = "setup timeout",
+};
+
+const char *__attribute_const__ iwcm_reject_msg(int reason)
+{
+ size_t index;
+
+ /* iWARP uses negative errnos */
+ index = -reason;
+
+ if (index < ARRAY_SIZE(iwcm_rej_reason_strs) &&
+ iwcm_rej_reason_strs[index])
+ return iwcm_rej_reason_strs[index];
+ else
+ return "unrecognized reason";
+}
+EXPORT_SYMBOL(iwcm_reject_msg);
+
static struct ibnl_client_cbs iwcm_nl_cb_table[] = {
[RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
[RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
index 1c41b95cefec..a0e7c16d8bd8 100644
--- a/drivers/infiniband/core/iwpm_msg.c
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -604,7 +604,6 @@ int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
}
rem_info = kzalloc(sizeof(struct iwpm_remote_info), GFP_ATOMIC);
if (!rem_info) {
- pr_err("%s: Unable to allocate a remote info\n", __func__);
ret = -ENOMEM;
return ret;
}
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index ade71e7f0131..3ef51a96bbf1 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -62,7 +62,6 @@ int iwpm_init(u8 nl_client)
sizeof(struct hlist_head), GFP_KERNEL);
if (!iwpm_hash_bucket) {
ret = -ENOMEM;
- pr_err("%s Unable to create mapinfo hash table\n", __func__);
goto init_exit;
}
iwpm_reminfo_bucket = kzalloc(IWPM_REMINFO_HASH_SIZE *
@@ -70,7 +69,6 @@ int iwpm_init(u8 nl_client)
if (!iwpm_reminfo_bucket) {
kfree(iwpm_hash_bucket);
ret = -ENOMEM;
- pr_err("%s Unable to create reminfo hash table\n", __func__);
goto init_exit;
}
}
@@ -128,10 +126,9 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
if (!iwpm_valid_client(nl_client))
return ret;
map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL);
- if (!map_info) {
- pr_err("%s: Unable to allocate a mapping info\n", __func__);
+ if (!map_info)
return -ENOMEM;
- }
+
memcpy(&map_info->local_sockaddr, local_sockaddr,
sizeof(struct sockaddr_storage));
memcpy(&map_info->mapped_sockaddr, mapped_sockaddr,
@@ -309,10 +306,9 @@ struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
unsigned long flags;
nlmsg_request = kzalloc(sizeof(struct iwpm_nlmsg_request), gfp);
- if (!nlmsg_request) {
- pr_err("%s Unable to allocate a nlmsg_request\n", __func__);
+ if (!nlmsg_request)
return NULL;
- }
+
spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags);
list_add_tail(&nlmsg_request->inprocess_list, &iwpm_nlmsg_req_list);
spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 40cbd6bdb73b..57f231f1c721 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -316,7 +316,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
/* Validate device and port */
port_priv = ib_get_mad_port(device, port_num);
if (!port_priv) {
- dev_notice(&device->dev, "ib_register_mad_agent: Invalid port\n");
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: Invalid port %d\n",
+ port_num);
ret = ERR_PTR(-ENODEV);
goto error1;
}
@@ -769,7 +771,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
* If we are at the start of the LID routed part, don't update the
* hop_ptr or hop_cnt. See section 14.2.2, Vol 1 IB spec.
*/
- if (opa && smp->class_version == OPA_SMP_CLASS_VERSION) {
+ if (opa && smp->class_version == OPA_SM_CLASS_VERSION) {
u32 opa_drslid;
if ((opa_get_smp_direction(opa_smp)
@@ -816,7 +818,6 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
local = kmalloc(sizeof *local, GFP_ATOMIC);
if (!local) {
ret = -ENOMEM;
- dev_err(&device->dev, "No memory for ib_mad_local_private\n");
goto out;
}
local->mad_priv = NULL;
@@ -824,7 +825,6 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
mad_priv = alloc_mad_private(mad_size, GFP_ATOMIC);
if (!mad_priv) {
ret = -ENOMEM;
- dev_err(&device->dev, "No memory for local response MAD\n");
kfree(local);
goto out;
}
@@ -947,9 +947,6 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
if (!seg) {
- dev_err(&send_buf->mad_agent->device->dev,
- "alloc_send_rmpp_segs: RMPP mem alloc failed for len %zd, gfp %#x\n",
- sizeof (*seg) + seg_size, gfp_mask);
free_send_rmpp_list(send_wr);
return -ENOMEM;
}
@@ -1362,12 +1359,7 @@ static int allocate_method_table(struct ib_mad_mgmt_method_table **method)
{
/* Allocate management method table */
*method = kzalloc(sizeof **method, GFP_ATOMIC);
- if (!*method) {
- pr_err("No memory for ib_mad_mgmt_method_table\n");
- return -ENOMEM;
- }
-
- return 0;
+ return (*method) ? 0 : (-ENOMEM);
}
/*
@@ -1458,8 +1450,6 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
/* Allocate management class table for "new" class version */
*class = kzalloc(sizeof **class, GFP_ATOMIC);
if (!*class) {
- dev_err(&agent_priv->agent.device->dev,
- "No memory for ib_mad_mgmt_class_table\n");
ret = -ENOMEM;
goto error1;
}
@@ -1524,22 +1514,16 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
if (!*vendor_table) {
/* Allocate mgmt vendor class table for "new" class version */
vendor = kzalloc(sizeof *vendor, GFP_ATOMIC);
- if (!vendor) {
- dev_err(&agent_priv->agent.device->dev,
- "No memory for ib_mad_mgmt_vendor_class_table\n");
+ if (!vendor)
goto error1;
- }
*vendor_table = vendor;
}
if (!(*vendor_table)->vendor_class[vclass]) {
/* Allocate table for this management vendor class */
vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC);
- if (!vendor_class) {
- dev_err(&agent_priv->agent.device->dev,
- "No memory for ib_mad_mgmt_vendor_class\n");
+ if (!vendor_class)
goto error2;
- }
(*vendor_table)->vendor_class[vclass] = vendor_class;
}
@@ -1746,7 +1730,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
if (!class)
goto out;
if (convert_mgmt_class(mad_hdr->mgmt_class) >=
- IB_MGMT_MAX_METHODS)
+ ARRAY_SIZE(class->method_table))
goto out;
method = class->method_table[convert_mgmt_class(
mad_hdr->mgmt_class)];
@@ -2167,7 +2151,7 @@ handle_smi(struct ib_mad_port_private *port_priv,
struct ib_mad_hdr *mad_hdr = (struct ib_mad_hdr *)recv->mad;
if (opa && mad_hdr->base_version == OPA_MGMT_BASE_VERSION &&
- mad_hdr->class_version == OPA_SMI_CLASS_VERSION)
+ mad_hdr->class_version == OPA_SM_CLASS_VERSION)
return handle_opa_smi(port_priv, qp_info, wc, port_num, recv,
response);
@@ -2238,11 +2222,8 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
mad_size = recv->mad_size;
response = alloc_mad_private(mad_size, GFP_KERNEL);
- if (!response) {
- dev_err(&port_priv->device->dev,
- "%s: no memory for response buffer\n", __func__);
+ if (!response)
goto out;
- }
if (rdma_cap_ib_switch(port_priv->device))
port_num = wc->port_num;
@@ -2869,8 +2850,6 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv),
GFP_ATOMIC);
if (!mad_priv) {
- dev_err(&qp_info->port_priv->device->dev,
- "No memory for receive buffer\n");
ret = -ENOMEM;
break;
}
@@ -2961,11 +2940,8 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
u16 pkey_index;
attr = kmalloc(sizeof *attr, GFP_KERNEL);
- if (!attr) {
- dev_err(&port_priv->device->dev,
- "Couldn't kmalloc ib_qp_attr\n");
+ if (!attr)
return -ENOMEM;
- }
ret = ib_find_pkey(port_priv->device, port_priv->port_num,
IB_DEFAULT_PKEY_FULL, &pkey_index);
@@ -3135,10 +3111,8 @@ static int ib_mad_port_open(struct ib_device *device,
/* Create new device info */
port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
- if (!port_priv) {
- dev_err(&device->dev, "No memory for ib_mad_port_private\n");
+ if (!port_priv)
return -ENOMEM;
- }
port_priv->device = device;
port_priv->port_num = port_num;
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index e51b739f6ea3..322cb67b07a9 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -518,8 +518,11 @@ static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
process_join_error(group, status);
else {
int mgids_changed, is_mgid0;
- ib_find_pkey(group->port->dev->device, group->port->port_num,
- be16_to_cpu(rec->pkey), &pkey_index);
+
+ if (ib_find_pkey(group->port->dev->device,
+ group->port->port_num, be16_to_cpu(rec->pkey),
+ &pkey_index))
+ pkey_index = MCAST_INVALID_PKEY_INDEX;
spin_lock_irq(&group->port->lock);
if (group->state == MCAST_BUSY &&
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 06556c34606d..db958d3207ef 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -144,7 +144,6 @@ static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_de
static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
struct net_device *real_dev;
int res;
@@ -152,11 +151,11 @@ static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
return 0;
rcu_read_lock();
- real_dev = rdma_vlan_dev_real_dev(event_ndev);
+ real_dev = rdma_vlan_dev_real_dev(cookie);
if (!real_dev)
- real_dev = event_ndev;
+ real_dev = cookie;
- res = ((rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
+ res = ((rdma_is_upper_dev_rcu(rdma_ndev, cookie) &&
(is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) &
REQUIRED_BOND_STATES)) ||
real_dev == rdma_ndev);
@@ -192,17 +191,16 @@ static int pass_all_filter(struct ib_device *ib_dev, u8 port,
static int upper_device_filter(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
int res;
if (!rdma_ndev)
return 0;
- if (rdma_ndev == event_ndev)
+ if (rdma_ndev == cookie)
return 1;
rcu_read_lock();
- res = rdma_is_upper_dev_rcu(rdma_ndev, event_ndev);
+ res = rdma_is_upper_dev_rcu(rdma_ndev, cookie);
rcu_read_unlock();
return res;
@@ -304,10 +302,9 @@ static void enum_netdev_ipv4_ips(struct ib_device *ib_dev,
for_ifa(in_dev) {
struct sin_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
- if (!entry) {
- pr_warn("roce_gid_mgmt: couldn't allocate entry for IPv4 update\n");
+ if (!entry)
continue;
- }
+
entry->ip.sin_family = AF_INET;
entry->ip.sin_addr.s_addr = ifa->ifa_address;
list_add_tail(&entry->list, &sin_list);
@@ -348,10 +345,8 @@ static void enum_netdev_ipv6_ips(struct ib_device *ib_dev,
list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
struct sin6_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
- if (!entry) {
- pr_warn("roce_gid_mgmt: couldn't allocate entry for IPv6 update\n");
+ if (!entry)
continue;
- }
entry->sin6.sin6_family = AF_INET6;
entry->sin6.sin6_addr = ifp->addr;
@@ -382,18 +377,14 @@ static void _add_netdev_ips(struct ib_device *ib_dev, u8 port,
static void add_netdev_ips(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
-
- enum_netdev_default_gids(ib_dev, port, event_ndev, rdma_ndev);
- _add_netdev_ips(ib_dev, port, event_ndev);
+ enum_netdev_default_gids(ib_dev, port, cookie, rdma_ndev);
+ _add_netdev_ips(ib_dev, port, cookie);
}
static void del_netdev_ips(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
-
- ib_cache_gid_del_all_netdev_gids(ib_dev, port, event_ndev);
+ ib_cache_gid_del_all_netdev_gids(ib_dev, port, cookie);
}
static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
@@ -437,37 +428,39 @@ static void callback_for_addr_gid_device_scan(struct ib_device *device,
&parsed->gid_attr);
}
+struct upper_list {
+ struct list_head list;
+ struct net_device *upper;
+};
+
+static int netdev_upper_walk(struct net_device *upper, void *data)
+{
+ struct upper_list *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+ struct list_head *upper_list = data;
+
+ if (!entry)
+ return 0;
+
+ list_add_tail(&entry->list, upper_list);
+ dev_hold(upper);
+ entry->upper = upper;
+
+ return 0;
+}
+
static void handle_netdev_upper(struct ib_device *ib_dev, u8 port,
void *cookie,
void (*handle_netdev)(struct ib_device *ib_dev,
u8 port,
struct net_device *ndev))
{
- struct net_device *ndev = (struct net_device *)cookie;
- struct upper_list {
- struct list_head list;
- struct net_device *upper;
- };
- struct net_device *upper;
- struct list_head *iter;
+ struct net_device *ndev = cookie;
struct upper_list *upper_iter;
struct upper_list *upper_temp;
LIST_HEAD(upper_list);
rcu_read_lock();
- netdev_for_each_all_upper_dev_rcu(ndev, upper, iter) {
- struct upper_list *entry = kmalloc(sizeof(*entry),
- GFP_ATOMIC);
-
- if (!entry) {
- pr_info("roce_gid_mgmt: couldn't allocate entry to delete ndev\n");
- continue;
- }
-
- list_add_tail(&entry->list, &upper_list);
- dev_hold(upper);
- entry->upper = upper;
- }
+ netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &upper_list);
rcu_read_unlock();
handle_netdev(ib_dev, port, ndev);
@@ -520,9 +513,7 @@ static void del_netdev_default_ips_join(struct ib_device *ib_dev, u8 port,
static void del_netdev_default_ips(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
-
- bond_delete_netdev_default_gids(ib_dev, port, event_ndev, rdma_ndev);
+ bond_delete_netdev_default_gids(ib_dev, port, cookie, rdma_ndev);
}
/* The following functions operate on all IB devices. netdevice_event and
@@ -555,10 +546,8 @@ static int netdevice_queue_work(struct netdev_event_work_cmd *cmds,
struct netdev_event_work *ndev_work =
kmalloc(sizeof(*ndev_work), GFP_KERNEL);
- if (!ndev_work) {
- pr_warn("roce_gid_mgmt: can't allocate work for netdevice_event\n");
+ if (!ndev_work)
return NOTIFY_DONE;
- }
memcpy(ndev_work->cmds, cmds, sizeof(ndev_work->cmds));
for (i = 0; i < ARRAY_SIZE(ndev_work->cmds) && ndev_work->cmds[i].cb; i++) {
@@ -692,10 +681,8 @@ static int addr_event(struct notifier_block *this, unsigned long event,
}
work = kmalloc(sizeof(*work), GFP_ATOMIC);
- if (!work) {
- pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
+ if (!work)
return NOTIFY_DONE;
- }
INIT_WORK(&work->work, update_gid_event_work_handler);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index c1fb545e8d78..daadf3130c9f 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -1258,7 +1258,7 @@ int ib_device_register_sysfs(struct ib_device *device,
int ret;
int i;
- device->dev.parent = device->dma_device;
+ WARN_ON_ONCE(!device->dev.parent);
ret = dev_set_name(class_dev, "%s", device->name);
if (ret)
return ret;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 7713ef089c3c..cc0d51fb06e3 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -46,7 +46,7 @@
#include <linux/mutex.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <rdma/ib.h>
#include <rdma/ib_cm.h>
@@ -1104,8 +1104,11 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
struct ib_ucm_cmd_hdr hdr;
ssize_t result;
- if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
+ if (!ib_safe_file_access(filp)) {
+ pr_err_once("ucm_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
+ task_tgid_vnr(current), current->comm);
return -EACCES;
+ }
if (len < sizeof(hdr))
return -EINVAL;
@@ -1287,7 +1290,7 @@ static void ib_ucm_add_one(struct ib_device *device)
goto err;
ucm_dev->dev.class = &cm_class;
- ucm_dev->dev.parent = device->dma_device;
+ ucm_dev->dev.parent = device->dev.parent;
ucm_dev->dev.devt = ucm_dev->cdev.dev;
ucm_dev->dev.release = ib_ucm_release_dev;
dev_set_name(&ucm_dev->dev, "ucm%d", ucm_dev->devnum);
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 9520154f1d7c..e12f8faf8c23 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -1584,8 +1584,11 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf,
struct rdma_ucm_cmd_hdr hdr;
ssize_t ret;
- if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
+ if (!ib_safe_file_access(filp)) {
+ pr_err_once("ucma_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
+ task_tgid_vnr(current), current->comm);
return -EACCES;
+ }
if (len < sizeof(hdr))
return -EINVAL;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 84b4eff90395..27f155d2df8d 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -34,7 +34,8 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
#include <linux/export.h>
#include <linux/hugetlb.h>
#include <linux/slab.h>
@@ -51,7 +52,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
if (umem->nmap > 0)
ib_dma_unmap_sg(dev, umem->sg_head.sgl,
- umem->nmap,
+ umem->npages,
DMA_BIDIRECTIONAL);
for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
@@ -99,9 +100,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (dmasync)
dma_attrs |= DMA_ATTR_WRITE_BARRIER;
- if (!size)
- return ERR_PTR(-EINVAL);
-
/*
* If the combination of the addr and size requested for this memory
* region causes an integer overflow, return error.
@@ -134,6 +132,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND));
if (access & IB_ACCESS_ON_DEMAND) {
+ put_pid(umem->pid);
ret = ib_umem_odp_get(context, umem);
if (ret) {
kfree(umem);
@@ -149,6 +148,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) {
+ put_pid(umem->pid);
kfree(umem);
return ERR_PTR(-ENOMEM);
}
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 1f0fe3217f23..cb2742b548bb 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -32,6 +32,8 @@
#include <linux/types.h>
#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task.h>
#include <linux/pid.h>
#include <linux/slab.h>
#include <linux/export.h>
@@ -239,6 +241,71 @@ static const struct mmu_notifier_ops ib_umem_notifiers = {
.invalidate_range_end = ib_umem_notifier_invalidate_range_end,
};
+struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
+ unsigned long addr,
+ size_t size)
+{
+ struct ib_umem *umem;
+ struct ib_umem_odp *odp_data;
+ int pages = size >> PAGE_SHIFT;
+ int ret;
+
+ umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+ if (!umem)
+ return ERR_PTR(-ENOMEM);
+
+ umem->context = context;
+ umem->length = size;
+ umem->address = addr;
+ umem->page_size = PAGE_SIZE;
+ umem->writable = 1;
+
+ odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
+ if (!odp_data) {
+ ret = -ENOMEM;
+ goto out_umem;
+ }
+ odp_data->umem = umem;
+
+ mutex_init(&odp_data->umem_mutex);
+ init_completion(&odp_data->notifier_completion);
+
+ odp_data->page_list = vzalloc(pages * sizeof(*odp_data->page_list));
+ if (!odp_data->page_list) {
+ ret = -ENOMEM;
+ goto out_odp_data;
+ }
+
+ odp_data->dma_list = vzalloc(pages * sizeof(*odp_data->dma_list));
+ if (!odp_data->dma_list) {
+ ret = -ENOMEM;
+ goto out_page_list;
+ }
+
+ down_write(&context->umem_rwsem);
+ context->odp_mrs_count++;
+ rbt_ib_umem_insert(&odp_data->interval_tree, &context->umem_tree);
+ if (likely(!atomic_read(&context->notifier_count)))
+ odp_data->mn_counters_active = true;
+ else
+ list_add(&odp_data->no_private_counters,
+ &context->no_private_counters);
+ up_write(&context->umem_rwsem);
+
+ umem->odp_data = odp_data;
+
+ return umem;
+
+out_page_list:
+ vfree(odp_data->page_list);
+out_odp_data:
+ kfree(odp_data);
+out_umem:
+ kfree(umem);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_alloc_odp_umem);
+
int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
{
int ret_val;
@@ -270,18 +337,20 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
init_completion(&umem->odp_data->notifier_completion);
- umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
+ if (ib_umem_num_pages(umem)) {
+ umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
sizeof(*umem->odp_data->page_list));
- if (!umem->odp_data->page_list) {
- ret_val = -ENOMEM;
- goto out_odp_data;
- }
+ if (!umem->odp_data->page_list) {
+ ret_val = -ENOMEM;
+ goto out_odp_data;
+ }
- umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
+ umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
sizeof(*umem->odp_data->dma_list));
- if (!umem->odp_data->dma_list) {
- ret_val = -ENOMEM;
- goto out_page_list;
+ if (!umem->odp_data->dma_list) {
+ ret_val = -ENOMEM;
+ goto out_page_list;
+ }
}
/*
@@ -466,6 +535,7 @@ static int ib_umem_odp_map_dma_single_page(
}
umem->odp_data->dma_list[page_index] = dma_addr | access_mask;
umem->odp_data->page_list[page_index] = page;
+ umem->npages++;
stored_page = 1;
} else if (umem->odp_data->page_list[page_index] == page) {
umem->odp_data->dma_list[page_index] |= access_mask;
@@ -505,7 +575,8 @@ out:
* for failure.
* An -EAGAIN error code is returned when a concurrent mmu notifier prevents
* the function from completing its task.
- *
+ * An -ENOENT error code indicates that userspace process is being terminated
+ * and mm was already destroyed.
* @umem: the umem to map and pin
* @user_virt: the address from which we need to map.
* @bcnt: the minimal number of bytes to pin and map. The mapping might be
@@ -553,7 +624,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
owning_mm = get_task_mm(owning_process);
if (owning_mm == NULL) {
- ret = -EINVAL;
+ ret = -ENOENT;
goto out_put_task;
}
@@ -578,7 +649,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
*/
npages = get_user_pages_remote(owning_process, owning_mm,
user_virt, gup_num_pages,
- flags, local_page_list, NULL);
+ flags, local_page_list, NULL, NULL);
up_read(&owning_mm->mmap_sem);
if (npages < 0)
@@ -665,6 +736,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
put_page(page);
umem->odp_data->page_list[idx] = NULL;
umem->odp_data->dma_list[idx] = 0;
+ umem->npages--;
}
}
mutex_unlock(&umem->odp_data->umem_mutex);
diff --git a/drivers/infiniband/core/umem_rbtree.c b/drivers/infiniband/core/umem_rbtree.c
index 727d788448f5..d176597b4d78 100644
--- a/drivers/infiniband/core/umem_rbtree.c
+++ b/drivers/infiniband/core/umem_rbtree.c
@@ -78,17 +78,32 @@ int rbt_ib_umem_for_each_in_range(struct rb_root *root,
void *cookie)
{
int ret_val = 0;
- struct umem_odp_node *node;
+ struct umem_odp_node *node, *next;
struct ib_umem_odp *umem;
if (unlikely(start == last))
return ret_val;
- for (node = rbt_ib_umem_iter_first(root, start, last - 1); node;
- node = rbt_ib_umem_iter_next(node, start, last - 1)) {
+ for (node = rbt_ib_umem_iter_first(root, start, last - 1);
+ node; node = next) {
+ next = rbt_ib_umem_iter_next(node, start, last - 1);
umem = container_of(node, struct ib_umem_odp, interval_tree);
ret_val = cb(umem->umem, start, last, cookie) || ret_val;
}
return ret_val;
}
+EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range);
+
+struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root,
+ u64 addr, u64 length)
+{
+ struct umem_odp_node *node;
+
+ node = rbt_ib_umem_iter_first(root, addr, addr + length - 1);
+ if (node)
+ return container_of(node, struct ib_umem_odp, interval_tree);
+ return NULL;
+
+}
+EXPORT_SYMBOL(rbt_ib_umem_lookup);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 415a3185cde7..aca7ff7abedc 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -50,7 +50,7 @@
#include <linux/semaphore.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_user_mad.h>
@@ -1188,7 +1188,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
if (cdev_add(&port->cdev, base, 1))
goto err_cdev;
- port->dev = device_create(umad_class, device->dma_device,
+ port->dev = device_create(umad_class, device->dev.parent,
port->cdev.dev, port,
"umad%d", port->dev_num);
if (IS_ERR(port->dev))
@@ -1207,7 +1207,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
if (cdev_add(&port->sm_cdev, base, 1))
goto err_sm_cdev;
- port->sm_dev = device_create(umad_class, device->dma_device,
+ port->sm_dev = device_create(umad_class, device->dev.parent,
port->sm_cdev.dev, port,
"issm%d", port->dev_num);
if (IS_ERR(port->sm_dev))
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index df26a741cda6..e1bedf0bac04 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -228,6 +228,7 @@ struct ib_uverbs_flow_spec {
struct ib_uverbs_flow_spec_ipv4 ipv4;
struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
struct ib_uverbs_flow_spec_ipv6 ipv6;
+ struct ib_uverbs_flow_spec_action_tag flow_tag;
};
};
@@ -289,5 +290,6 @@ IB_UVERBS_DECLARE_EX_CMD(modify_wq);
IB_UVERBS_DECLARE_EX_CMD(destroy_wq);
IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table);
IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table);
+IB_UVERBS_DECLARE_EX_CMD(modify_qp);
#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index cb3f515a2285..7b7a76e1279a 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -38,7 +38,7 @@
#include <linux/slab.h>
#include <linux/sched.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "uverbs.h"
#include "core_priv.h"
@@ -316,6 +316,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
struct ib_udata udata;
struct ib_ucontext *ucontext;
struct file *filp;
+ struct ib_rdmacg_object cg_obj;
int ret;
if (out_len < sizeof resp)
@@ -335,13 +336,18 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
+ ret = ib_rdmacg_try_charge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
+ if (ret)
+ goto err;
+
ucontext = ib_dev->alloc_ucontext(ib_dev, &udata);
if (IS_ERR(ucontext)) {
ret = PTR_ERR(ucontext);
- goto err;
+ goto err_alloc;
}
ucontext->device = ib_dev;
+ ucontext->cg_obj = cg_obj;
INIT_LIST_HEAD(&ucontext->pd_list);
INIT_LIST_HEAD(&ucontext->mr_list);
INIT_LIST_HEAD(&ucontext->mw_list);
@@ -407,6 +413,9 @@ err_free:
put_pid(ucontext->tgid);
ib_dev->dealloc_ucontext(ucontext);
+err_alloc:
+ ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
+
err:
mutex_unlock(&file->mutex);
return ret;
@@ -561,6 +570,13 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
return -ENOMEM;
init_uobj(uobj, 0, file->ucontext, &pd_lock_class);
+ ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret) {
+ kfree(uobj);
+ return ret;
+ }
+
down_write(&uobj->mutex);
pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata);
@@ -605,6 +621,7 @@ err_idr:
ib_dealloc_pd(pd);
err:
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
put_uobj_write(uobj);
return ret;
}
@@ -637,6 +654,8 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
if (ret)
goto err_put;
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
uobj->live = 0;
put_uobj_write(uobj);
@@ -1006,6 +1025,10 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
goto err_put;
}
}
+ ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto err_charge;
mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
cmd.access_flags, &udata);
@@ -1054,6 +1077,9 @@ err_unreg:
ib_dereg_mr(mr);
err_put:
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
+err_charge:
put_pd_read(pd);
err_free:
@@ -1178,6 +1204,8 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
if (ret)
return ret;
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
mutex_lock(&file->mutex);
@@ -1226,6 +1254,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
+ ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto err_charge;
+
mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata);
if (IS_ERR(mw)) {
ret = PTR_ERR(mw);
@@ -1271,6 +1304,9 @@ err_unalloc:
uverbs_dealloc_mw(mw);
err_put:
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
+err_charge:
put_pd_read(pd);
err_free:
@@ -1306,6 +1342,8 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
if (ret)
return ret;
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
mutex_lock(&file->mutex);
@@ -1405,6 +1443,11 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
attr.flags = cmd->flags;
+ ret = ib_rdmacg_try_charge(&obj->uobject.cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto err_charge;
+
cq = ib_dev->create_cq(ib_dev, &attr,
file->ucontext, uhw);
if (IS_ERR(cq)) {
@@ -1452,6 +1495,10 @@ err_free:
ib_destroy_cq(cq);
err_file:
+ ib_rdmacg_uncharge(&obj->uobject.cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+
+err_charge:
if (ev_file)
ib_uverbs_release_ucq(file, ev_file, obj);
@@ -1732,6 +1779,8 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
if (ret)
return ret;
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
mutex_lock(&file->mutex);
@@ -1891,7 +1940,8 @@ static int create_qp(struct ib_uverbs_file *file,
IB_QP_CREATE_CROSS_CHANNEL |
IB_QP_CREATE_MANAGED_SEND |
IB_QP_CREATE_MANAGED_RECV |
- IB_QP_CREATE_SCATTER_FCS)) {
+ IB_QP_CREATE_SCATTER_FCS |
+ IB_QP_CREATE_CVLAN_STRIPPING)) {
ret = -EINVAL;
goto err_put;
}
@@ -1904,6 +1954,11 @@ static int create_qp(struct ib_uverbs_file *file,
goto err_put;
}
+ ret = ib_rdmacg_try_charge(&obj->uevent.uobject.cg_obj, device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto err_put;
+
if (cmd->qp_type == IB_QPT_XRC_TGT)
qp = ib_create_qp(pd, &attr);
else
@@ -1911,7 +1966,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
- goto err_put;
+ goto err_create;
}
if (cmd->qp_type != IB_QPT_XRC_TGT) {
@@ -1992,6 +2047,10 @@ err_cb:
err_destroy:
ib_destroy_qp(qp);
+err_create:
+ ib_rdmacg_uncharge(&obj->uevent.uobject.cg_obj, device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+
err_put:
if (xrcd)
put_xrcd_read(xrcd_uobj);
@@ -2328,94 +2387,88 @@ static int modify_qp_mask(enum ib_qp_type qp_type, int mask)
}
}
-ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int modify_qp(struct ib_uverbs_file *file,
+ struct ib_uverbs_ex_modify_qp *cmd, struct ib_udata *udata)
{
- struct ib_uverbs_modify_qp cmd;
- struct ib_udata udata;
- struct ib_qp *qp;
- struct ib_qp_attr *attr;
- int ret;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
- out_len);
+ struct ib_qp_attr *attr;
+ struct ib_qp *qp;
+ int ret;
attr = kmalloc(sizeof *attr, GFP_KERNEL);
if (!attr)
return -ENOMEM;
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = idr_read_qp(cmd->base.qp_handle, file->ucontext);
if (!qp) {
ret = -EINVAL;
goto out;
}
- attr->qp_state = cmd.qp_state;
- attr->cur_qp_state = cmd.cur_qp_state;
- attr->path_mtu = cmd.path_mtu;
- attr->path_mig_state = cmd.path_mig_state;
- attr->qkey = cmd.qkey;
- attr->rq_psn = cmd.rq_psn;
- attr->sq_psn = cmd.sq_psn;
- attr->dest_qp_num = cmd.dest_qp_num;
- attr->qp_access_flags = cmd.qp_access_flags;
- attr->pkey_index = cmd.pkey_index;
- attr->alt_pkey_index = cmd.alt_pkey_index;
- attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
- attr->max_rd_atomic = cmd.max_rd_atomic;
- attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic;
- attr->min_rnr_timer = cmd.min_rnr_timer;
- attr->port_num = cmd.port_num;
- attr->timeout = cmd.timeout;
- attr->retry_cnt = cmd.retry_cnt;
- attr->rnr_retry = cmd.rnr_retry;
- attr->alt_port_num = cmd.alt_port_num;
- attr->alt_timeout = cmd.alt_timeout;
-
- memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16);
- attr->ah_attr.grh.flow_label = cmd.dest.flow_label;
- attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index;
- attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit;
- attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class;
- attr->ah_attr.dlid = cmd.dest.dlid;
- attr->ah_attr.sl = cmd.dest.sl;
- attr->ah_attr.src_path_bits = cmd.dest.src_path_bits;
- attr->ah_attr.static_rate = cmd.dest.static_rate;
- attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0;
- attr->ah_attr.port_num = cmd.dest.port_num;
-
- memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16);
- attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label;
- attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index;
- attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit;
- attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class;
- attr->alt_ah_attr.dlid = cmd.alt_dest.dlid;
- attr->alt_ah_attr.sl = cmd.alt_dest.sl;
- attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits;
- attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate;
- attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
- attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
+ attr->qp_state = cmd->base.qp_state;
+ attr->cur_qp_state = cmd->base.cur_qp_state;
+ attr->path_mtu = cmd->base.path_mtu;
+ attr->path_mig_state = cmd->base.path_mig_state;
+ attr->qkey = cmd->base.qkey;
+ attr->rq_psn = cmd->base.rq_psn;
+ attr->sq_psn = cmd->base.sq_psn;
+ attr->dest_qp_num = cmd->base.dest_qp_num;
+ attr->qp_access_flags = cmd->base.qp_access_flags;
+ attr->pkey_index = cmd->base.pkey_index;
+ attr->alt_pkey_index = cmd->base.alt_pkey_index;
+ attr->en_sqd_async_notify = cmd->base.en_sqd_async_notify;
+ attr->max_rd_atomic = cmd->base.max_rd_atomic;
+ attr->max_dest_rd_atomic = cmd->base.max_dest_rd_atomic;
+ attr->min_rnr_timer = cmd->base.min_rnr_timer;
+ attr->port_num = cmd->base.port_num;
+ attr->timeout = cmd->base.timeout;
+ attr->retry_cnt = cmd->base.retry_cnt;
+ attr->rnr_retry = cmd->base.rnr_retry;
+ attr->alt_port_num = cmd->base.alt_port_num;
+ attr->alt_timeout = cmd->base.alt_timeout;
+ attr->rate_limit = cmd->rate_limit;
+
+ memcpy(attr->ah_attr.grh.dgid.raw, cmd->base.dest.dgid, 16);
+ attr->ah_attr.grh.flow_label = cmd->base.dest.flow_label;
+ attr->ah_attr.grh.sgid_index = cmd->base.dest.sgid_index;
+ attr->ah_attr.grh.hop_limit = cmd->base.dest.hop_limit;
+ attr->ah_attr.grh.traffic_class = cmd->base.dest.traffic_class;
+ attr->ah_attr.dlid = cmd->base.dest.dlid;
+ attr->ah_attr.sl = cmd->base.dest.sl;
+ attr->ah_attr.src_path_bits = cmd->base.dest.src_path_bits;
+ attr->ah_attr.static_rate = cmd->base.dest.static_rate;
+ attr->ah_attr.ah_flags = cmd->base.dest.is_global ?
+ IB_AH_GRH : 0;
+ attr->ah_attr.port_num = cmd->base.dest.port_num;
+
+ memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd->base.alt_dest.dgid, 16);
+ attr->alt_ah_attr.grh.flow_label = cmd->base.alt_dest.flow_label;
+ attr->alt_ah_attr.grh.sgid_index = cmd->base.alt_dest.sgid_index;
+ attr->alt_ah_attr.grh.hop_limit = cmd->base.alt_dest.hop_limit;
+ attr->alt_ah_attr.grh.traffic_class = cmd->base.alt_dest.traffic_class;
+ attr->alt_ah_attr.dlid = cmd->base.alt_dest.dlid;
+ attr->alt_ah_attr.sl = cmd->base.alt_dest.sl;
+ attr->alt_ah_attr.src_path_bits = cmd->base.alt_dest.src_path_bits;
+ attr->alt_ah_attr.static_rate = cmd->base.alt_dest.static_rate;
+ attr->alt_ah_attr.ah_flags = cmd->base.alt_dest.is_global ?
+ IB_AH_GRH : 0;
+ attr->alt_ah_attr.port_num = cmd->base.alt_dest.port_num;
if (qp->real_qp == qp) {
- ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask);
- if (ret)
- goto release_qp;
+ if (cmd->base.attr_mask & IB_QP_AV) {
+ ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
+ if (ret)
+ goto release_qp;
+ }
ret = qp->device->modify_qp(qp, attr,
- modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata);
+ modify_qp_mask(qp->qp_type,
+ cmd->base.attr_mask),
+ udata);
} else {
- ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask));
+ ret = ib_modify_qp(qp, attr,
+ modify_qp_mask(qp->qp_type,
+ cmd->base.attr_mask));
}
- if (ret)
- goto release_qp;
-
- ret = in_len;
-
release_qp:
put_qp_read(qp);
@@ -2425,6 +2478,68 @@ out:
return ret;
}
+ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
+ struct ib_device *ib_dev,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_ex_modify_qp cmd = {};
+ struct ib_udata udata;
+ int ret;
+
+ if (copy_from_user(&cmd.base, buf, sizeof(cmd.base)))
+ return -EFAULT;
+
+ if (cmd.base.attr_mask &
+ ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1))
+ return -EOPNOTSUPP;
+
+ INIT_UDATA(&udata, buf + sizeof(cmd.base), NULL,
+ in_len - sizeof(cmd.base), out_len);
+
+ ret = modify_qp(file, &cmd, &udata);
+ if (ret)
+ return ret;
+
+ return in_len;
+}
+
+int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
+ struct ib_device *ib_dev,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_ex_modify_qp cmd = {};
+ int ret;
+
+ /*
+ * Last bit is reserved for extending the attr_mask by
+ * using another field.
+ */
+ BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31));
+
+ if (ucore->inlen < sizeof(cmd.base))
+ return -EINVAL;
+
+ ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ if (ret)
+ return ret;
+
+ if (cmd.base.attr_mask &
+ ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1))
+ return -EOPNOTSUPP;
+
+ if (ucore->inlen > sizeof(cmd)) {
+ if (ib_is_udata_cleared(ucore, sizeof(cmd),
+ ucore->inlen - sizeof(cmd)))
+ return -EOPNOTSUPP;
+ }
+
+ ret = modify_qp(file, &cmd, uhw);
+
+ return ret;
+}
+
ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
@@ -2462,6 +2577,8 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
if (ret)
return ret;
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
if (obj->uxrcd)
atomic_dec(&obj->uxrcd->refcnt);
@@ -2875,6 +2992,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
struct ib_ah *ah;
struct ib_ah_attr attr;
int ret;
+ struct ib_udata udata;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -2882,6 +3000,10 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
+ INIT_UDATA(&udata, buf + sizeof(cmd),
+ (unsigned long)cmd.response + sizeof(resp),
+ in_len - sizeof(cmd), out_len - sizeof(resp));
+
uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
if (!uobj)
return -ENOMEM;
@@ -2908,12 +3030,21 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
memset(&attr.dmac, 0, sizeof(attr.dmac));
memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
- ah = ib_create_ah(pd, &attr);
+ ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto err_charge;
+
+ ah = pd->device->create_ah(pd, &attr, &udata);
+
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
- goto err_put;
+ goto err_create;
}
+ ah->device = pd->device;
+ ah->pd = pd;
+ atomic_inc(&pd->usecnt);
ah->uobject = uobj;
uobj->object = ah;
@@ -2947,7 +3078,10 @@ err_copy:
err_destroy:
ib_destroy_ah(ah);
-err_put:
+err_create:
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
+err_charge:
put_pd_read(pd);
err:
@@ -2981,6 +3115,8 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
if (ret)
return ret;
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
mutex_lock(&file->mutex);
@@ -3078,6 +3214,25 @@ out_put:
return ret ? ret : in_len;
}
+static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
+{
+ ib_spec->type = kern_spec->type;
+ switch (ib_spec->type) {
+ case IB_FLOW_SPEC_ACTION_TAG:
+ if (kern_spec->flow_tag.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_tag))
+ return -EINVAL;
+
+ ib_spec->flow_tag.size = sizeof(struct ib_flow_spec_action_tag);
+ ib_spec->flow_tag.tag_id = kern_spec->flow_tag.tag_id;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec)
{
/* Returns user space filter size, includes padding */
@@ -3102,8 +3257,8 @@ static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
return kern_filter_size;
}
-static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
- union ib_flow_spec *ib_spec)
+static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
{
ssize_t actual_filter_sz;
ssize_t kern_filter_sz;
@@ -3124,8 +3279,10 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
kern_spec_val = (void *)kern_spec +
sizeof(struct ib_uverbs_flow_spec_hdr);
kern_spec_mask = kern_spec_val + kern_filter_sz;
+ if (ib_spec->type == (IB_FLOW_SPEC_INNER | IB_FLOW_SPEC_VXLAN_TUNNEL))
+ return -EINVAL;
- switch (ib_spec->type) {
+ switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
case IB_FLOW_SPEC_ETH:
ib_filter_sz = offsetof(struct ib_flow_eth_filter, real_sz);
actual_filter_sz = spec_filter_size(kern_spec_mask,
@@ -3175,12 +3332,39 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
memcpy(&ib_spec->tcp_udp.val, kern_spec_val, actual_filter_sz);
memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz);
break;
+ case IB_FLOW_SPEC_VXLAN_TUNNEL:
+ ib_filter_sz = offsetof(struct ib_flow_tunnel_filter, real_sz);
+ actual_filter_sz = spec_filter_size(kern_spec_mask,
+ kern_filter_sz,
+ ib_filter_sz);
+ if (actual_filter_sz <= 0)
+ return -EINVAL;
+ ib_spec->tunnel.size = sizeof(struct ib_flow_spec_tunnel);
+ memcpy(&ib_spec->tunnel.val, kern_spec_val, actual_filter_sz);
+ memcpy(&ib_spec->tunnel.mask, kern_spec_mask, actual_filter_sz);
+
+ if ((ntohl(ib_spec->tunnel.mask.tunnel_id)) >= BIT(24) ||
+ (ntohl(ib_spec->tunnel.val.tunnel_id)) >= BIT(24))
+ return -EINVAL;
+ break;
default:
return -EINVAL;
}
return 0;
}
+static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
+{
+ if (kern_spec->reserved)
+ return -EINVAL;
+
+ if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
+ return kern_spec_to_ib_spec_action(kern_spec, ib_spec);
+ else
+ return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
+}
+
int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
struct ib_udata *ucore,
@@ -3243,6 +3427,9 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
wq_init_attr.wq_context = file;
wq_init_attr.wq_type = cmd.wq_type;
wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
+ if (ucore->inlen >= (offsetof(typeof(cmd), create_flags) +
+ sizeof(cmd.create_flags)))
+ wq_init_attr.create_flags = cmd.create_flags;
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
@@ -3398,7 +3585,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
if (!cmd.attr_mask)
return -EINVAL;
- if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE))
+ if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
return -EINVAL;
wq = idr_read_wq(cmd.wq_handle, file->ucontext);
@@ -3407,6 +3594,10 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
wq_attr.curr_wq_state = cmd.curr_wq_state;
wq_attr.wq_state = cmd.wq_state;
+ if (cmd.attr_mask & IB_WQ_FLAGS) {
+ wq_attr.flags = cmd.flags;
+ wq_attr.flags_mask = cmd.flags_mask;
+ }
ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
put_wq_read(wq);
return ret;
@@ -3740,12 +3931,17 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
err = -EINVAL;
goto err_free;
}
+
+ err = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (err)
+ goto err_free;
+
flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
if (IS_ERR(flow_id)) {
err = PTR_ERR(flow_id);
- goto err_free;
+ goto err_create;
}
- flow_id->qp = qp;
flow_id->uobject = uobj;
uobj->object = flow_id;
@@ -3777,6 +3973,8 @@ err_copy:
idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
destroy_flow:
ib_destroy_flow(flow_id);
+err_create:
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
err_free:
kfree(flow_attr);
err_put:
@@ -3816,8 +4014,11 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
flow_id = uobj->object;
ret = ib_destroy_flow(flow_id);
- if (!ret)
+ if (!ret) {
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
uobj->live = 0;
+ }
put_uobj_write(uobj);
@@ -3885,6 +4086,11 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
+ ret = ib_rdmacg_try_charge(&obj->uevent.uobject.cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto err_put_cq;
+
srq = pd->device->create_srq(pd, &attr, udata);
if (IS_ERR(srq)) {
ret = PTR_ERR(srq);
@@ -3949,6 +4155,8 @@ err_destroy:
ib_destroy_srq(srq);
err_put:
+ ib_rdmacg_uncharge(&obj->uevent.uobject.cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
put_pd_read(pd);
err_put_cq:
@@ -4135,6 +4343,8 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
if (ret)
return ret;
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
if (srq_type == IB_SRQT_XRC) {
us = container_of(obj, struct ib_usrq_object, uevent);
atomic_dec(&us->uxrcd->refcnt);
@@ -4242,6 +4452,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
resp.max_wq_type_rq = attr.max_wq_type_rq;
resp.response_length += sizeof(resp.max_wq_type_rq);
+
+ if (ucore->outlen < resp.response_length + sizeof(resp.raw_packet_caps))
+ goto end;
+
+ resp.raw_packet_caps = attr.raw_packet_caps;
+ resp.response_length += sizeof(resp.raw_packet_caps);
end:
err = ib_copy_to_udata(ucore, &resp, resp.response_length);
return err;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 44b1104eb168..35c788a32e26 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -46,11 +46,12 @@
#include <linux/anon_inodes.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <rdma/ib.h>
#include "uverbs.h"
+#include "core_priv.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace verbs access");
@@ -137,6 +138,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq,
[IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table,
[IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table,
+ [IB_USER_VERBS_EX_CMD_MODIFY_QP] = ib_uverbs_ex_modify_qp,
};
static void ib_uverbs_add_one(struct ib_device *device);
@@ -236,6 +238,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
ib_destroy_ah(ah);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
kfree(uobj);
}
@@ -245,6 +249,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
uverbs_dealloc_mw(mw);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
kfree(uobj);
}
@@ -253,6 +259,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
ib_destroy_flow(flow_id);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
kfree(uobj);
}
@@ -265,6 +273,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
if (qp == qp->real_qp)
ib_uverbs_detach_umcast(qp, uqp);
ib_destroy_qp(qp);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
ib_uverbs_release_uevent(file, &uqp->uevent);
kfree(uqp);
}
@@ -297,6 +307,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
ib_destroy_srq(srq);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
ib_uverbs_release_uevent(file, uevent);
kfree(uevent);
}
@@ -309,6 +321,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
ib_destroy_cq(cq);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
ib_uverbs_release_ucq(file, ev_file, ucq);
kfree(ucq);
}
@@ -318,6 +332,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
ib_dereg_mr(mr);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
kfree(uobj);
}
@@ -338,11 +354,16 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
ib_dealloc_pd(pd);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
kfree(uobj);
}
put_pid(context->tgid);
+ ib_rdmacg_uncharge(&context->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_HANDLE);
+
return context->device->dealloc_ucontext(context);
}
@@ -746,8 +767,11 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
int srcu_key;
ssize_t ret;
- if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
+ if (!ib_safe_file_access(filp)) {
+ pr_err_once("uverbs_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
+ task_tgid_vnr(current), current->comm);
return -EACCES;
+ }
if (count < sizeof hdr)
return -EINVAL;
@@ -1170,7 +1194,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
if (cdev_add(&uverbs_dev->cdev, base, 1))
goto err_cdev;
- uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
+ uverbs_dev->dev = device_create(uverbs_class, device->dev.parent,
uverbs_dev->cdev.dev, uverbs_dev,
"uverbs%d", uverbs_dev->devnum);
if (IS_ERR(uverbs_dev->dev))
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 83687646da68..85ed5051fdfd 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -315,7 +315,7 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
{
struct ib_ah *ah;
- ah = pd->device->create_ah(pd, ah_attr);
+ ah = pd->device->create_ah(pd, ah_attr, NULL);
if (!IS_ERR(ah)) {
ah->device = pd->device;
@@ -328,7 +328,7 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
}
EXPORT_SYMBOL(ib_create_ah);
-static int ib_get_header_version(const union rdma_network_hdr *hdr)
+int ib_get_rdma_header_version(const union rdma_network_hdr *hdr)
{
const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh;
struct iphdr ip4h_checked;
@@ -359,6 +359,7 @@ static int ib_get_header_version(const union rdma_network_hdr *hdr)
return 4;
return 6;
}
+EXPORT_SYMBOL(ib_get_rdma_header_version);
static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
u8 port_num,
@@ -369,7 +370,7 @@ static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
if (rdma_protocol_ib(device, port_num))
return RDMA_NETWORK_IB;
- grh_version = ib_get_header_version((union rdma_network_hdr *)grh);
+ grh_version = ib_get_rdma_header_version((union rdma_network_hdr *)grh);
if (grh_version == 4)
return RDMA_NETWORK_IPV4;
@@ -415,9 +416,9 @@ static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
&context, gid_index);
}
-static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr,
- enum rdma_network_type net_type,
- union ib_gid *sgid, union ib_gid *dgid)
+int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
+ enum rdma_network_type net_type,
+ union ib_gid *sgid, union ib_gid *dgid)
{
struct sockaddr_in src_in;
struct sockaddr_in dst_in;
@@ -447,6 +448,7 @@ static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr,
return -EINVAL;
}
}
+EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
const struct ib_wc *wc, const struct ib_grh *grh,
@@ -469,8 +471,8 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
net_type = ib_get_net_type_by_grh(device, port_num, grh);
gid_type = ib_network_to_gid_type(net_type);
}
- ret = get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
- &sgid, &dgid);
+ ret = ib_get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
+ &sgid, &dgid);
if (ret)
return ret;
@@ -1014,6 +1016,7 @@ static const struct {
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
+ [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT,
}
}
},
@@ -1047,6 +1050,7 @@ static const struct {
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
+ [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT,
}
},
[IB_QPS_SQD] = {
@@ -1196,66 +1200,65 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
}
EXPORT_SYMBOL(ib_modify_qp_is_ok);
-int ib_resolve_eth_dmac(struct ib_qp *qp,
- struct ib_qp_attr *qp_attr, int *qp_attr_mask)
+int ib_resolve_eth_dmac(struct ib_device *device,
+ struct ib_ah_attr *ah_attr)
{
int ret = 0;
- if (*qp_attr_mask & IB_QP_AV) {
- if (qp_attr->ah_attr.port_num < rdma_start_port(qp->device) ||
- qp_attr->ah_attr.port_num > rdma_end_port(qp->device))
- return -EINVAL;
-
- if (!rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num))
- return 0;
-
- if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) {
- rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw,
- qp_attr->ah_attr.dmac);
- } else {
- union ib_gid sgid;
- struct ib_gid_attr sgid_attr;
- int ifindex;
- int hop_limit;
-
- ret = ib_query_gid(qp->device,
- qp_attr->ah_attr.port_num,
- qp_attr->ah_attr.grh.sgid_index,
- &sgid, &sgid_attr);
-
- if (ret || !sgid_attr.ndev) {
- if (!ret)
- ret = -ENXIO;
- goto out;
- }
+ if (!rdma_is_port_valid(device, ah_attr->port_num))
+ return -EINVAL;
- ifindex = sgid_attr.ndev->ifindex;
+ if (!rdma_cap_eth_ah(device, ah_attr->port_num))
+ return 0;
- ret = rdma_addr_find_l2_eth_by_grh(&sgid,
- &qp_attr->ah_attr.grh.dgid,
- qp_attr->ah_attr.dmac,
- NULL, &ifindex, &hop_limit);
+ if (rdma_link_local_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
+ rdma_get_ll_mac((struct in6_addr *)ah_attr->grh.dgid.raw,
+ ah_attr->dmac);
+ } else {
+ union ib_gid sgid;
+ struct ib_gid_attr sgid_attr;
+ int ifindex;
+ int hop_limit;
+
+ ret = ib_query_gid(device,
+ ah_attr->port_num,
+ ah_attr->grh.sgid_index,
+ &sgid, &sgid_attr);
+
+ if (ret || !sgid_attr.ndev) {
+ if (!ret)
+ ret = -ENXIO;
+ goto out;
+ }
- dev_put(sgid_attr.ndev);
+ ifindex = sgid_attr.ndev->ifindex;
- qp_attr->ah_attr.grh.hop_limit = hop_limit;
- }
+ ret = rdma_addr_find_l2_eth_by_grh(&sgid,
+ &ah_attr->grh.dgid,
+ ah_attr->dmac,
+ NULL, &ifindex, &hop_limit);
+
+ dev_put(sgid_attr.ndev);
+
+ ah_attr->grh.hop_limit = hop_limit;
}
out:
return ret;
}
EXPORT_SYMBOL(ib_resolve_eth_dmac);
-
int ib_modify_qp(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask)
{
- int ret;
- ret = ib_resolve_eth_dmac(qp, qp_attr, &qp_attr_mask);
- if (ret)
- return ret;
+ if (qp_attr_mask & IB_QP_AV) {
+ int ret;
+
+ ret = ib_resolve_eth_dmac(qp->device, &qp_attr->ah_attr);
+ if (ret)
+ return ret;
+ }
return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
}
@@ -1734,8 +1737,10 @@ struct ib_flow *ib_create_flow(struct ib_qp *qp,
return ERR_PTR(-ENOSYS);
flow_id = qp->device->create_flow(qp, flow_attr, domain);
- if (!IS_ERR(flow_id))
+ if (!IS_ERR(flow_id)) {
atomic_inc(&qp->usecnt);
+ flow_id->qp = qp;
+ }
return flow_id;
}
EXPORT_SYMBOL(ib_create_flow);
@@ -1943,17 +1948,12 @@ static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
*/
static void __ib_drain_sq(struct ib_qp *qp)
{
+ struct ib_cq *cq = qp->send_cq;
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
struct ib_drain_cqe sdrain;
struct ib_send_wr swr = {}, *bad_swr;
int ret;
- if (qp->send_cq->poll_ctx == IB_POLL_DIRECT) {
- WARN_ONCE(qp->send_cq->poll_ctx == IB_POLL_DIRECT,
- "IB_POLL_DIRECT poll_ctx not supported for drain\n");
- return;
- }
-
swr.wr_cqe = &sdrain.cqe;
sdrain.cqe.done = ib_drain_qp_done;
init_completion(&sdrain.done);
@@ -1970,7 +1970,11 @@ static void __ib_drain_sq(struct ib_qp *qp)
return;
}
- wait_for_completion(&sdrain.done);
+ if (cq->poll_ctx == IB_POLL_DIRECT)
+ while (wait_for_completion_timeout(&sdrain.done, HZ / 10) <= 0)
+ ib_process_cq_direct(cq, -1);
+ else
+ wait_for_completion(&sdrain.done);
}
/*
@@ -1978,17 +1982,12 @@ static void __ib_drain_sq(struct ib_qp *qp)
*/
static void __ib_drain_rq(struct ib_qp *qp)
{
+ struct ib_cq *cq = qp->recv_cq;
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
struct ib_drain_cqe rdrain;
struct ib_recv_wr rwr = {}, *bad_rwr;
int ret;
- if (qp->recv_cq->poll_ctx == IB_POLL_DIRECT) {
- WARN_ONCE(qp->recv_cq->poll_ctx == IB_POLL_DIRECT,
- "IB_POLL_DIRECT poll_ctx not supported for drain\n");
- return;
- }
-
rwr.wr_cqe = &rdrain.cqe;
rdrain.cqe.done = ib_drain_qp_done;
init_completion(&rdrain.done);
@@ -2005,7 +2004,11 @@ static void __ib_drain_rq(struct ib_qp *qp)
return;
}
- wait_for_completion(&rdrain.done);
+ if (cq->poll_ctx == IB_POLL_DIRECT)
+ while (wait_for_completion_timeout(&rdrain.done, HZ / 10) <= 0)
+ ib_process_cq_direct(cq, -1);
+ else
+ wait_for_completion(&rdrain.done);
}
/**
@@ -2022,8 +2025,7 @@ static void __ib_drain_rq(struct ib_qp *qp)
* ensure there is room in the CQ and SQ for the drain work request and
* completion.
*
- * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
- * IB_POLL_DIRECT.
+ * allocate the CQ using ib_alloc_cq().
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.
@@ -2051,8 +2053,7 @@ EXPORT_SYMBOL(ib_drain_sq);
* ensure there is room in the CQ and RQ for the drain work request and
* completion.
*
- * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
- * IB_POLL_DIRECT.
+ * allocate the CQ using ib_alloc_cq().
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.
@@ -2076,8 +2077,7 @@ EXPORT_SYMBOL(ib_drain_rq);
* ensure there is room in the CQ(s), SQ, and RQ for drain work requests
* and completions.
*
- * allocate the CQs using ib_alloc_cq() and the CQ poll context cannot be
- * IB_POLL_DIRECT.
+ * allocate the CQs using ib_alloc_cq().
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.