summaryrefslogtreecommitdiff
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/addr.c276
-rw-r--r--drivers/infiniband/core/cm.c5
-rw-r--r--drivers/infiniband/core/cma.c131
-rw-r--r--drivers/infiniband/core/iwcm.c1
-rw-r--r--drivers/infiniband/core/mad.c25
-rw-r--r--drivers/infiniband/core/mad_rmpp.c2
-rw-r--r--drivers/infiniband/core/multicast.c1
-rw-r--r--drivers/infiniband/core/sa_query.c6
-rw-r--r--drivers/infiniband/core/sysfs.c3
-rw-r--r--drivers/infiniband/core/ucm.c77
-rw-r--r--drivers/infiniband/core/ucma.c58
-rw-r--r--drivers/infiniband/core/ud_header.c14
-rw-r--r--drivers/infiniband/core/umem.c3
-rw-r--r--drivers/infiniband/core/user_mad.c179
-rw-r--r--drivers/infiniband/core/uverbs.h13
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c28
-rw-r--r--drivers/infiniband/core/uverbs_main.c257
17 files changed, 558 insertions, 521 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index bd07803e9183..0b926e45afe2 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -35,8 +35,8 @@
#include <linux/mutex.h>
#include <linux/inetdevice.h>
+#include <linux/slab.h>
#include <linux/workqueue.h>
-#include <linux/if_arp.h>
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
@@ -92,22 +92,12 @@ EXPORT_SYMBOL(rdma_addr_unregister_client);
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
const unsigned char *dst_dev_addr)
{
- switch (dev->type) {
- case ARPHRD_INFINIBAND:
- dev_addr->dev_type = RDMA_NODE_IB_CA;
- break;
- case ARPHRD_ETHER:
- dev_addr->dev_type = RDMA_NODE_RNIC;
- break;
- default:
- return -EADDRNOTAVAIL;
- }
-
+ dev_addr->dev_type = dev->type;
memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
if (dst_dev_addr)
memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
- dev_addr->src_dev = dev;
+ dev_addr->bound_dev_if = dev->ifindex;
return 0;
}
EXPORT_SYMBOL(rdma_copy_addr);
@@ -117,6 +107,15 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
struct net_device *dev;
int ret = -EADDRNOTAVAIL;
+ if (dev_addr->bound_dev_if) {
+ dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ if (!dev)
+ return -ENODEV;
+ ret = rdma_copy_addr(dev_addr, dev, NULL);
+ dev_put(dev);
+ return ret;
+ }
+
switch (addr->sa_family) {
case AF_INET:
dev = ip_dev_find(&init_net,
@@ -131,6 +130,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case AF_INET6:
+ read_lock(&dev_base_lock);
for_each_netdev(&init_net, dev) {
if (ipv6_chk_addr(&init_net,
&((struct sockaddr_in6 *) addr)->sin6_addr,
@@ -139,6 +139,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
break;
}
}
+ read_unlock(&dev_base_lock);
break;
#endif
}
@@ -176,48 +177,9 @@ static void queue_req(struct addr_req *req)
mutex_unlock(&lock);
}
-static void addr_send_arp(struct sockaddr *dst_in)
-{
- struct rtable *rt;
- struct flowi fl;
-
- memset(&fl, 0, sizeof fl);
-
- switch (dst_in->sa_family) {
- case AF_INET:
- fl.nl_u.ip4_u.daddr =
- ((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
-
- if (ip_route_output_key(&init_net, &rt, &fl))
- return;
-
- neigh_event_send(rt->u.dst.neighbour, NULL);
- ip_rt_put(rt);
- break;
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- case AF_INET6:
- {
- struct dst_entry *dst;
-
- fl.nl_u.ip6_u.daddr =
- ((struct sockaddr_in6 *) dst_in)->sin6_addr;
-
- dst = ip6_route_output(&init_net, NULL, &fl);
- if (!dst)
- return;
-
- neigh_event_send(dst->neighbour, NULL);
- dst_release(dst);
- break;
- }
-#endif
- }
-}
-
-static int addr4_resolve_remote(struct sockaddr_in *src_in,
- struct sockaddr_in *dst_in,
- struct rdma_dev_addr *addr)
+static int addr4_resolve(struct sockaddr_in *src_in,
+ struct sockaddr_in *dst_in,
+ struct rdma_dev_addr *addr)
{
__be32 src_ip = src_in->sin_addr.s_addr;
__be32 dst_ip = dst_in->sin_addr.s_addr;
@@ -229,10 +191,22 @@ static int addr4_resolve_remote(struct sockaddr_in *src_in,
memset(&fl, 0, sizeof fl);
fl.nl_u.ip4_u.daddr = dst_ip;
fl.nl_u.ip4_u.saddr = src_ip;
+ fl.oif = addr->bound_dev_if;
+
ret = ip_route_output_key(&init_net, &rt, &fl);
if (ret)
goto out;
+ src_in->sin_family = AF_INET;
+ src_in->sin_addr.s_addr = rt->rt_src;
+
+ if (rt->idev->dev->flags & IFF_LOOPBACK) {
+ ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
+ if (!ret)
+ memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
+ goto put;
+ }
+
/* If the device does ARP internally, return 'done' */
if (rt->idev->dev->flags & IFF_NOARP) {
rdma_copy_addr(addr, rt->idev->dev, NULL);
@@ -240,21 +214,14 @@ static int addr4_resolve_remote(struct sockaddr_in *src_in,
}
neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
- if (!neigh) {
+ if (!neigh || !(neigh->nud_state & NUD_VALID)) {
+ neigh_event_send(rt->u.dst.neighbour, NULL);
ret = -ENODATA;
+ if (neigh)
+ goto release;
goto put;
}
- if (!(neigh->nud_state & NUD_VALID)) {
- ret = -ENODATA;
- goto release;
- }
-
- if (!src_ip) {
- src_in->sin_family = dst_in->sin_family;
- src_in->sin_addr.s_addr = rt->rt_src;
- }
-
ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
release:
neigh_release(neigh);
@@ -265,52 +232,77 @@ out:
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
- struct sockaddr_in6 *dst_in,
- struct rdma_dev_addr *addr)
+static int addr6_resolve(struct sockaddr_in6 *src_in,
+ struct sockaddr_in6 *dst_in,
+ struct rdma_dev_addr *addr)
{
struct flowi fl;
struct neighbour *neigh;
struct dst_entry *dst;
- int ret = -ENODATA;
+ int ret;
memset(&fl, 0, sizeof fl);
- fl.nl_u.ip6_u.daddr = dst_in->sin6_addr;
- fl.nl_u.ip6_u.saddr = src_in->sin6_addr;
+ ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr);
+ ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr);
+ fl.oif = addr->bound_dev_if;
dst = ip6_route_output(&init_net, NULL, &fl);
- if (!dst)
- return ret;
+ if ((ret = dst->error))
+ goto put;
+
+ if (ipv6_addr_any(&fl.fl6_src)) {
+ ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
+ &fl.fl6_dst, 0, &fl.fl6_src);
+ if (ret)
+ goto put;
+
+ src_in->sin6_family = AF_INET6;
+ ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src);
+ }
+
+ if (dst->dev->flags & IFF_LOOPBACK) {
+ ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
+ if (!ret)
+ memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
+ goto put;
+ }
+ /* If the device does ARP internally, return 'done' */
if (dst->dev->flags & IFF_NOARP) {
ret = rdma_copy_addr(addr, dst->dev, NULL);
- } else {
- neigh = dst->neighbour;
- if (neigh && (neigh->nud_state & NUD_VALID))
- ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
+ goto put;
+ }
+
+ neigh = dst->neighbour;
+ if (!neigh || !(neigh->nud_state & NUD_VALID)) {
+ neigh_event_send(dst->neighbour, NULL);
+ ret = -ENODATA;
+ goto put;
}
+ ret = rdma_copy_addr(addr, dst->dev, neigh->ha);
+put:
dst_release(dst);
return ret;
}
#else
-static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
- struct sockaddr_in6 *dst_in,
- struct rdma_dev_addr *addr)
+static int addr6_resolve(struct sockaddr_in6 *src_in,
+ struct sockaddr_in6 *dst_in,
+ struct rdma_dev_addr *addr)
{
return -EADDRNOTAVAIL;
}
#endif
-static int addr_resolve_remote(struct sockaddr *src_in,
- struct sockaddr *dst_in,
- struct rdma_dev_addr *addr)
+static int addr_resolve(struct sockaddr *src_in,
+ struct sockaddr *dst_in,
+ struct rdma_dev_addr *addr)
{
if (src_in->sa_family == AF_INET) {
- return addr4_resolve_remote((struct sockaddr_in *) src_in,
+ return addr4_resolve((struct sockaddr_in *) src_in,
(struct sockaddr_in *) dst_in, addr);
} else
- return addr6_resolve_remote((struct sockaddr_in6 *) src_in,
+ return addr6_resolve((struct sockaddr_in6 *) src_in,
(struct sockaddr_in6 *) dst_in, addr);
}
@@ -327,8 +319,7 @@ static void process_req(struct work_struct *work)
if (req->status == -ENODATA) {
src_in = (struct sockaddr *) &req->src_addr;
dst_in = (struct sockaddr *) &req->dst_addr;
- req->status = addr_resolve_remote(src_in, dst_in,
- req->addr);
+ req->status = addr_resolve(src_in, dst_in, req->addr);
if (req->status && time_after_eq(jiffies, req->timeout))
req->status = -ETIMEDOUT;
else if (req->status == -ENODATA)
@@ -352,82 +343,6 @@ static void process_req(struct work_struct *work)
}
}
-static int addr_resolve_local(struct sockaddr *src_in,
- struct sockaddr *dst_in,
- struct rdma_dev_addr *addr)
-{
- struct net_device *dev;
- int ret;
-
- switch (dst_in->sa_family) {
- case AF_INET:
- {
- __be32 src_ip = ((struct sockaddr_in *) src_in)->sin_addr.s_addr;
- __be32 dst_ip = ((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
-
- dev = ip_dev_find(&init_net, dst_ip);
- if (!dev)
- return -EADDRNOTAVAIL;
-
- if (ipv4_is_zeronet(src_ip)) {
- src_in->sa_family = dst_in->sa_family;
- ((struct sockaddr_in *) src_in)->sin_addr.s_addr = dst_ip;
- ret = rdma_copy_addr(addr, dev, dev->dev_addr);
- } else if (ipv4_is_loopback(src_ip)) {
- ret = rdma_translate_ip(dst_in, addr);
- if (!ret)
- memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
- } else {
- ret = rdma_translate_ip(src_in, addr);
- if (!ret)
- memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
- }
- dev_put(dev);
- break;
- }
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- case AF_INET6:
- {
- struct in6_addr *a;
-
- for_each_netdev(&init_net, dev)
- if (ipv6_chk_addr(&init_net,
- &((struct sockaddr_in6 *) dst_in)->sin6_addr,
- dev, 1))
- break;
-
- if (!dev)
- return -EADDRNOTAVAIL;
-
- a = &((struct sockaddr_in6 *) src_in)->sin6_addr;
-
- if (ipv6_addr_any(a)) {
- src_in->sa_family = dst_in->sa_family;
- ((struct sockaddr_in6 *) src_in)->sin6_addr =
- ((struct sockaddr_in6 *) dst_in)->sin6_addr;
- ret = rdma_copy_addr(addr, dev, dev->dev_addr);
- } else if (ipv6_addr_loopback(a)) {
- ret = rdma_translate_ip(dst_in, addr);
- if (!ret)
- memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
- } else {
- ret = rdma_translate_ip(src_in, addr);
- if (!ret)
- memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
- }
- break;
- }
-#endif
-
- default:
- ret = -EADDRNOTAVAIL;
- break;
- }
-
- return ret;
-}
-
int rdma_resolve_ip(struct rdma_addr_client *client,
struct sockaddr *src_addr, struct sockaddr *dst_addr,
struct rdma_dev_addr *addr, int timeout_ms,
@@ -443,22 +358,28 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
if (!req)
return -ENOMEM;
- if (src_addr)
- memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr));
- memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr));
+ src_in = (struct sockaddr *) &req->src_addr;
+ dst_in = (struct sockaddr *) &req->dst_addr;
+
+ if (src_addr) {
+ if (src_addr->sa_family != dst_addr->sa_family) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ memcpy(src_in, src_addr, ip_addr_size(src_addr));
+ } else {
+ src_in->sa_family = dst_addr->sa_family;
+ }
+
+ memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
req->addr = addr;
req->callback = callback;
req->context = context;
req->client = client;
atomic_inc(&client->refcount);
- src_in = (struct sockaddr *) &req->src_addr;
- dst_in = (struct sockaddr *) &req->dst_addr;
-
- req->status = addr_resolve_local(src_in, dst_in, addr);
- if (req->status == -EADDRNOTAVAIL)
- req->status = addr_resolve_remote(src_in, dst_in, addr);
-
+ req->status = addr_resolve(src_in, dst_in, addr);
switch (req->status) {
case 0:
req->timeout = jiffies;
@@ -467,15 +388,16 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
case -ENODATA:
req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
queue_req(req);
- addr_send_arp(dst_in);
break;
default:
ret = req->status;
atomic_dec(&client->refcount);
- kfree(req);
- break;
+ goto err;
}
return ret;
+err:
+ kfree(req);
+ return ret;
}
EXPORT_SYMBOL(rdma_resolve_ip);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 5130fc55b8e2..ad63b79afac1 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -42,6 +42,7 @@
#include <linux/random.h>
#include <linux/rbtree.h>
#include <linux/spinlock.h>
+#include <linux/slab.h>
#include <linux/sysfs.h>
#include <linux/workqueue.h>
#include <linux/kdev_t.h>
@@ -3597,7 +3598,7 @@ static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
atomic_long_read(&group->counter[cm_attr->index]));
}
-static struct sysfs_ops cm_counter_ops = {
+static const struct sysfs_ops cm_counter_ops = {
.show = cm_show_counter
};
@@ -3693,7 +3694,7 @@ static void cm_add_one(struct ib_device *ib_device)
cm_dev->device = device_create(&cm_class, &ib_device->dev,
MKDEV(0, 0), NULL,
"%s", ib_device->name);
- if (!cm_dev->device) {
+ if (IS_ERR(cm_dev->device)) {
kfree(cm_dev);
return;
}
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 075317884b53..6d777069d86d 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -40,6 +40,7 @@
#include <linux/random.h>
#include <linux/idr.h>
#include <linux/inetdevice.h>
+#include <linux/slab.h>
#include <net/tcp.h>
#include <net/ipv6.h>
@@ -330,17 +331,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
union ib_gid gid;
int ret = -ENODEV;
- switch (rdma_node_get_transport(dev_addr->dev_type)) {
- case RDMA_TRANSPORT_IB:
- ib_addr_get_sgid(dev_addr, &gid);
- break;
- case RDMA_TRANSPORT_IWARP:
- iw_addr_get_sgid(dev_addr, &gid);
- break;
- default:
- return -ENODEV;
- }
-
+ rdma_addr_get_sgid(dev_addr, &gid);
list_for_each_entry(cma_dev, &dev_list, list) {
ret = ib_find_cached_gid(cma_dev->device, &gid,
&id_priv->id.port_num, NULL);
@@ -1032,11 +1023,17 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
if (rt->num_paths == 2)
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
- ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
- ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
- &id->route.addr.dev_addr);
- if (ret)
- goto destroy_id;
+ if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) {
+ rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
+ rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
+ ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey);
+ } else {
+ ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
+ &rt->addr.dev_addr);
+ if (ret)
+ goto destroy_id;
+ }
+ rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->state = CMA_CONNECT;
@@ -1071,10 +1068,12 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
cma_save_net_info(&id->route.addr, &listen_id->route.addr,
ip_ver, port, src, dst);
- ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
- &id->route.addr.dev_addr);
- if (ret)
- goto err;
+ if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
+ ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
+ &id->route.addr.dev_addr);
+ if (ret)
+ goto err;
+ }
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->state = CMA_CONNECT;
@@ -1474,15 +1473,6 @@ static void cma_listen_on_all(struct rdma_id_private *id_priv)
mutex_unlock(&lock);
}
-static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
-{
- struct sockaddr_storage addr_in;
-
- memset(&addr_in, 0, sizeof addr_in);
- addr_in.ss_family = af;
- return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
-}
-
int rdma_listen(struct rdma_cm_id *id, int backlog)
{
struct rdma_id_private *id_priv;
@@ -1490,7 +1480,8 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
id_priv = container_of(id, struct rdma_id_private, id);
if (id_priv->state == CMA_IDLE) {
- ret = cma_bind_any(id, AF_INET);
+ ((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
+ ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
if (ret)
return ret;
}
@@ -1565,8 +1556,8 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
struct sockaddr_in6 *sin6;
memset(&path_rec, 0, sizeof path_rec);
- ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
- ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
+ rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
+ rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
path_rec.numb_path = 1;
path_rec.reversible = 1;
@@ -1693,6 +1684,7 @@ int rdma_set_ib_paths(struct rdma_cm_id *id,
}
memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths);
+ id->route.num_paths = num_paths;
return 0;
err:
cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
@@ -1781,7 +1773,11 @@ port_found:
if (ret)
goto out;
- ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+ id_priv->id.route.addr.dev_addr.dev_type =
+ (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB) ?
+ ARPHRD_INFINIBAND : ARPHRD_ETHER;
+
+ rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
id_priv->id.port_num = p;
cma_attach_to_dev(id_priv, cma_dev);
@@ -1839,7 +1835,7 @@ out:
static int cma_resolve_loopback(struct rdma_id_private *id_priv)
{
struct cma_work *work;
- struct sockaddr_in *src_in, *dst_in;
+ struct sockaddr *src, *dst;
union ib_gid gid;
int ret;
@@ -1853,14 +1849,19 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
goto err;
}
- ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
- ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
+ rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+ rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
- if (cma_zero_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)) {
- src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
- dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
- src_in->sin_family = dst_in->sin_family;
- src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
+ src = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
+ if (cma_zero_addr(src)) {
+ dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
+ if ((src->sa_family = dst->sa_family) == AF_INET) {
+ ((struct sockaddr_in *) src)->sin_addr.s_addr =
+ ((struct sockaddr_in *) dst)->sin_addr.s_addr;
+ } else {
+ ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr,
+ &((struct sockaddr_in6 *) dst)->sin6_addr);
+ }
}
work->id = id_priv;
@@ -1878,10 +1879,14 @@ err:
static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
struct sockaddr *dst_addr)
{
- if (src_addr && src_addr->sa_family)
- return rdma_bind_addr(id, src_addr);
- else
- return cma_bind_any(id, dst_addr->sa_family);
+ if (!src_addr || !src_addr->sa_family) {
+ src_addr = (struct sockaddr *) &id->route.addr.src_addr;
+ if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) {
+ ((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
+ ((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
+ }
+ }
+ return rdma_bind_addr(id, src_addr);
}
int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
@@ -2077,6 +2082,25 @@ static int cma_get_port(struct rdma_id_private *id_priv)
return ret;
}
+static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
+ struct sockaddr *addr)
+{
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ struct sockaddr_in6 *sin6;
+
+ if (addr->sa_family != AF_INET6)
+ return 0;
+
+ sin6 = (struct sockaddr_in6 *) addr;
+ if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
+ !sin6->sin6_scope_id)
+ return -EINVAL;
+
+ dev_addr->bound_dev_if = sin6->sin6_scope_id;
+#endif
+ return 0;
+}
+
int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
{
struct rdma_id_private *id_priv;
@@ -2089,6 +2113,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
return -EINVAL;
+ ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
+ if (ret)
+ goto err1;
+
if (!cma_any_addr(addr)) {
ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
if (ret)
@@ -2108,7 +2136,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
return 0;
err2:
- if (!cma_any_addr(addr)) {
+ if (id_priv->cma_dev) {
mutex_lock(&lock);
cma_detach_from_dev(id_priv);
mutex_unlock(&lock);
@@ -2687,10 +2715,15 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
if (cma_any_addr(addr)) {
memset(mgid, 0, sizeof *mgid);
} else if ((addr->sa_family == AF_INET6) &&
- ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) ==
+ ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
0xFF10A01B)) {
/* IPv6 address is an SA assigned MGID. */
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
+ } else if ((addr->sa_family == AF_INET6)) {
+ ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
+ if (id_priv->id.ps == RDMA_PS_UDP)
+ mc_map[7] = 0x01; /* Use RDMA CM signature */
+ *mgid = *(union ib_gid *) (mc_map + 4);
} else {
ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
if (id_priv->id.ps == RDMA_PS_UDP)
@@ -2716,7 +2749,7 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
if (id_priv->id.ps == RDMA_PS_UDP)
rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
- ib_addr_get_sgid(dev_addr, &rec.port_gid);
+ rdma_addr_get_sgid(dev_addr, &rec.port_gid);
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
rec.join_state = 1;
@@ -2815,7 +2848,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
dev_addr = &id_priv->id.route.addr.dev_addr;
- if ((dev_addr->src_dev == ndev) &&
+ if ((dev_addr->bound_dev_if == ndev->ifindex) &&
memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
ndev->name, &id_priv->id);
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 0f89909abce9..bfead5bc25f6 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -44,6 +44,7 @@
#include <linux/spinlock.h>
#include <linux/workqueue.h>
#include <linux/completion.h>
+#include <linux/slab.h>
#include <rdma/iw_cm.h>
#include <rdma/ib_addr.h>
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 7522008fda86..1df1194aeba4 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -34,6 +34,7 @@
*
*/
#include <linux/dma-mapping.h>
+#include <linux/slab.h>
#include <rdma/ib_cache.h>
#include "mad_priv.h"
@@ -1193,10 +1194,7 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method,
{
int i;
- for (i = find_first_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS);
- i < IB_MGMT_MAX_METHODS;
- i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
- 1+i)) {
+ for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) {
if ((*method)->agent[i]) {
printk(KERN_ERR PFX "Method %d already in use\n", i);
return -EINVAL;
@@ -1330,13 +1328,9 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
goto error3;
/* Finally, add in methods being registered */
- for (i = find_first_bit(mad_reg_req->method_mask,
- IB_MGMT_MAX_METHODS);
- i < IB_MGMT_MAX_METHODS;
- i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
- 1+i)) {
+ for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
(*method)->agent[i] = agent_priv;
- }
+
return 0;
error3:
@@ -1429,13 +1423,9 @@ check_in_use:
goto error4;
/* Finally, add in methods being registered */
- for (i = find_first_bit(mad_reg_req->method_mask,
- IB_MGMT_MAX_METHODS);
- i < IB_MGMT_MAX_METHODS;
- i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
- 1+i)) {
+ for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
(*method)->agent[i] = agent_priv;
- }
+
return 0;
error4:
@@ -2964,6 +2954,9 @@ static void ib_mad_remove_device(struct ib_device *device)
{
int i, num_ports, cur_port;
+ if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+ return;
+
if (device->node_type == RDMA_NODE_IB_SWITCH) {
num_ports = 1;
cur_port = 0;
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 4e0f2829e0e5..f37878c9c06e 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -31,6 +31,8 @@
* SOFTWARE.
*/
+#include <linux/slab.h>
+
#include "mad_priv.h"
#include "mad_rmpp.h"
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 8d82ba171353..a519801dcfb7 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -34,6 +34,7 @@
#include <linux/dma-mapping.h>
#include <linux/err.h>
#include <linux/interrupt.h>
+#include <linux/slab.h>
#include <linux/bitops.h>
#include <linux/random.h>
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 82543716d59e..7e1ffd8ccd5c 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -604,6 +604,12 @@ retry:
return ret ? ret : id;
}
+void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
+{
+ ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
+}
+EXPORT_SYMBOL(ib_sa_unpack_path);
+
static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 158a214da2f7..f901957abc8b 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -79,7 +79,7 @@ static ssize_t port_attr_show(struct kobject *kobj,
return port_attr->show(p, port_attr, buf);
}
-static struct sysfs_ops port_sysfs_ops = {
+static const struct sysfs_ops port_sysfs_ops = {
.show = port_attr_show
};
@@ -461,6 +461,7 @@ alloc_group_attrs(ssize_t (*show)(struct ib_port *,
element->attr.attr.mode = S_IRUGO;
element->attr.show = show;
element->index = i;
+ sysfs_attr_init(&element->attr.attr);
tab_attr[i] = &element->attr.attr;
}
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index f504c9b00c1b..512b1c43460c 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -44,6 +44,7 @@
#include <linux/cdev.h>
#include <linux/idr.h>
#include <linux/mutex.h>
+#include <linux/slab.h>
#include <asm/uaccess.h>
@@ -1215,15 +1216,18 @@ static void ib_ucm_release_dev(struct device *dev)
ucm_dev = container_of(dev, struct ib_ucm_device, dev);
cdev_del(&ucm_dev->cdev);
- clear_bit(ucm_dev->devnum, dev_map);
+ if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
+ clear_bit(ucm_dev->devnum, dev_map);
+ else
+ clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, dev_map);
kfree(ucm_dev);
}
static const struct file_operations ucm_fops = {
- .owner = THIS_MODULE,
- .open = ib_ucm_open,
+ .owner = THIS_MODULE,
+ .open = ib_ucm_open,
.release = ib_ucm_close,
- .write = ib_ucm_write,
+ .write = ib_ucm_write,
.poll = ib_ucm_poll,
};
@@ -1237,8 +1241,32 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
+static dev_t overflow_maj;
+static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES);
+static int find_overflow_devnum(void)
+{
+ int ret;
+
+ if (!overflow_maj) {
+ ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES,
+ "infiniband_cm");
+ if (ret) {
+ printk(KERN_ERR "ucm: couldn't register dynamic device number\n");
+ return ret;
+ }
+ }
+
+ ret = find_first_zero_bit(overflow_map, IB_UCM_MAX_DEVICES);
+ if (ret >= IB_UCM_MAX_DEVICES)
+ return -1;
+
+ return ret;
+}
+
static void ib_ucm_add_one(struct ib_device *device)
{
+ int devnum;
+ dev_t base;
struct ib_ucm_device *ucm_dev;
if (!device->alloc_ucontext ||
@@ -1251,16 +1279,25 @@ static void ib_ucm_add_one(struct ib_device *device)
ucm_dev->ib_dev = device;
- ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
- if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES)
- goto err;
-
- set_bit(ucm_dev->devnum, dev_map);
+ devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
+ if (devnum >= IB_UCM_MAX_DEVICES) {
+ devnum = find_overflow_devnum();
+ if (devnum < 0)
+ goto err;
+
+ ucm_dev->devnum = devnum + IB_UCM_MAX_DEVICES;
+ base = devnum + overflow_maj;
+ set_bit(devnum, overflow_map);
+ } else {
+ ucm_dev->devnum = devnum;
+ base = devnum + IB_UCM_BASE_DEV;
+ set_bit(devnum, dev_map);
+ }
cdev_init(&ucm_dev->cdev, &ucm_fops);
ucm_dev->cdev.owner = THIS_MODULE;
kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum);
- if (cdev_add(&ucm_dev->cdev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1))
+ if (cdev_add(&ucm_dev->cdev, base, 1))
goto err;
ucm_dev->dev.class = &cm_class;
@@ -1281,7 +1318,10 @@ err_dev:
device_unregister(&ucm_dev->dev);
err_cdev:
cdev_del(&ucm_dev->cdev);
- clear_bit(ucm_dev->devnum, dev_map);
+ if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
+ clear_bit(devnum, dev_map);
+ else
+ clear_bit(devnum, overflow_map);
err:
kfree(ucm_dev);
return;
@@ -1297,11 +1337,8 @@ static void ib_ucm_remove_one(struct ib_device *device)
device_unregister(&ucm_dev->dev);
}
-static ssize_t show_abi_version(struct class *class, char *buf)
-{
- return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION);
-}
-static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
+static CLASS_ATTR_STRING(abi_version, S_IRUGO,
+ __stringify(IB_USER_CM_ABI_VERSION));
static int __init ib_ucm_init(void)
{
@@ -1314,7 +1351,7 @@ static int __init ib_ucm_init(void)
goto error1;
}
- ret = class_create_file(&cm_class, &class_attr_abi_version);
+ ret = class_create_file(&cm_class, &class_attr_abi_version.attr);
if (ret) {
printk(KERN_ERR "ucm: couldn't create abi_version attribute\n");
goto error2;
@@ -1328,7 +1365,7 @@ static int __init ib_ucm_init(void)
return 0;
error3:
- class_remove_file(&cm_class, &class_attr_abi_version);
+ class_remove_file(&cm_class, &class_attr_abi_version.attr);
error2:
unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
error1:
@@ -1338,8 +1375,10 @@ error1:
static void __exit ib_ucm_cleanup(void)
{
ib_unregister_client(&ucm_client);
- class_remove_file(&cm_class, &class_attr_abi_version);
+ class_remove_file(&cm_class, &class_attr_abi_version.attr);
unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
+ if (overflow_maj)
+ unregister_chrdev_region(overflow_maj, IB_UCM_MAX_DEVICES);
idr_destroy(&ctx_id_table);
}
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index bb96d3c4b0f4..46185084121e 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -39,10 +39,12 @@
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/miscdevice.h>
+#include <linux/slab.h>
#include <rdma/rdma_user_cm.h>
#include <rdma/ib_marshall.h>
#include <rdma/rdma_cm.h>
+#include <rdma/rdma_cm_ib.h>
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
@@ -562,10 +564,10 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
switch (route->num_paths) {
case 0:
dev_addr = &route->addr.dev_addr;
- ib_addr_get_dgid(dev_addr,
- (union ib_gid *) &resp->ib_route[0].dgid);
- ib_addr_get_sgid(dev_addr,
- (union ib_gid *) &resp->ib_route[0].sgid);
+ rdma_addr_get_dgid(dev_addr,
+ (union ib_gid *) &resp->ib_route[0].dgid);
+ rdma_addr_get_sgid(dev_addr,
+ (union ib_gid *) &resp->ib_route[0].sgid);
resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
break;
case 2:
@@ -812,6 +814,51 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
return ret;
}
+static int ucma_set_ib_path(struct ucma_context *ctx,
+ struct ib_path_rec_data *path_data, size_t optlen)
+{
+ struct ib_sa_path_rec sa_path;
+ struct rdma_cm_event event;
+ int ret;
+
+ if (optlen % sizeof(*path_data))
+ return -EINVAL;
+
+ for (; optlen; optlen -= sizeof(*path_data), path_data++) {
+ if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY |
+ IB_PATH_BIDIRECTIONAL))
+ break;
+ }
+
+ if (!optlen)
+ return -EINVAL;
+
+ ib_sa_unpack_path(path_data->path_rec, &sa_path);
+ ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
+ if (ret)
+ return ret;
+
+ memset(&event, 0, sizeof event);
+ event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+ return ucma_event_handler(ctx->cm_id, &event);
+}
+
+static int ucma_set_option_ib(struct ucma_context *ctx, int optname,
+ void *optval, size_t optlen)
+{
+ int ret;
+
+ switch (optname) {
+ case RDMA_OPTION_IB_PATH:
+ ret = ucma_set_ib_path(ctx, optval, optlen);
+ break;
+ default:
+ ret = -ENOSYS;
+ }
+
+ return ret;
+}
+
static int ucma_set_option_level(struct ucma_context *ctx, int level,
int optname, void *optval, size_t optlen)
{
@@ -821,6 +868,9 @@ static int ucma_set_option_level(struct ucma_context *ctx, int level,
case RDMA_OPTION_ID:
ret = ucma_set_option_id(ctx, optname, optval, optlen);
break;
+ case RDMA_OPTION_IB:
+ ret = ucma_set_option_ib(ctx, optname, optval, optlen);
+ break;
default:
ret = -ENOSYS;
}
diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
index 8ec7876bedcf..650b501eb142 100644
--- a/drivers/infiniband/core/ud_header.c
+++ b/drivers/infiniband/core/ud_header.c
@@ -181,6 +181,7 @@ static const struct ib_field deth_table[] = {
* ib_ud_header_init - Initialize UD header structure
* @payload_bytes:Length of packet payload
* @grh_present:GRH flag (if non-zero, GRH will be included)
+ * @immediate_present: specify if immediate data should be used
* @header:Structure to initialize
*
* ib_ud_header_init() initializes the lrh.link_version, lrh.link_next_header,
@@ -191,21 +192,13 @@ static const struct ib_field deth_table[] = {
*/
void ib_ud_header_init(int payload_bytes,
int grh_present,
+ int immediate_present,
struct ib_ud_header *header)
{
- int header_len;
u16 packet_length;
memset(header, 0, sizeof *header);
- header_len =
- IB_LRH_BYTES +
- IB_BTH_BYTES +
- IB_DETH_BYTES;
- if (grh_present) {
- header_len += IB_GRH_BYTES;
- }
-
header->lrh.link_version = 0;
header->lrh.link_next_header =
grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL;
@@ -231,7 +224,8 @@ void ib_ud_header_init(int payload_bytes,
header->lrh.packet_length = cpu_to_be16(packet_length);
- if (header->immediate_present)
+ header->immediate_present = immediate_present;
+ if (immediate_present)
header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
else
header->bth.opcode = IB_OPCODE_UD_SEND_ONLY;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 6f7c096abf13..415e186eee32 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -37,6 +37,7 @@
#include <linux/sched.h>
#include <linux/hugetlb.h>
#include <linux/dma-attrs.h>
+#include <linux/slab.h>
#include "uverbs.h"
@@ -136,7 +137,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
down_write(&current->mm->mmap_sem);
locked = npages + current->mm->locked_vm;
- lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
ret = -ENOMEM;
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 7de02969ed7d..e7db054fb1c8 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -46,6 +46,7 @@
#include <linux/compat.h>
#include <linux/sched.h>
#include <linux/semaphore.h>
+#include <linux/slab.h>
#include <asm/uaccess.h>
@@ -65,12 +66,9 @@ enum {
};
/*
- * Our lifetime rules for these structs are the following: each time a
- * device special file is opened, we look up the corresponding struct
- * ib_umad_port by minor in the umad_port[] table while holding the
- * port_lock. If this lookup succeeds, we take a reference on the
- * ib_umad_port's struct ib_umad_device while still holding the
- * port_lock; if the lookup fails, we fail the open(). We drop these
+ * Our lifetime rules for these structs are the following:
+ * device special file is opened, we take a reference on the
+ * ib_umad_port's struct ib_umad_device. We drop these
* references in the corresponding close().
*
* In addition to references coming from open character devices, there
@@ -78,19 +76,14 @@ enum {
* module's reference taken when allocating the ib_umad_device in
* ib_umad_add_one().
*
- * When destroying an ib_umad_device, we clear all of its
- * ib_umad_ports from umad_port[] while holding port_lock before
- * dropping the module's reference to the ib_umad_device. This is
- * always safe because any open() calls will either succeed and obtain
- * a reference before we clear the umad_port[] entries, or fail after
- * we clear the umad_port[] entries.
+ * When destroying an ib_umad_device, we drop the module's reference.
*/
struct ib_umad_port {
- struct cdev *cdev;
+ struct cdev cdev;
struct device *dev;
- struct cdev *sm_cdev;
+ struct cdev sm_cdev;
struct device *sm_dev;
struct semaphore sm_sem;
@@ -136,7 +129,6 @@ static struct class *umad_class;
static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
static DEFINE_SPINLOCK(port_lock);
-static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS];
static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
static void ib_umad_add_one(struct ib_device *device);
@@ -496,8 +488,8 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
ah_attr.ah_flags = IB_AH_GRH;
memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16);
ah_attr.grh.sgid_index = packet->mad.hdr.gid_index;
- ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label);
- ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit;
+ ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label);
+ ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit;
ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class;
}
@@ -528,9 +520,9 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
goto err_ah;
}
- packet->msg->ah = ah;
+ packet->msg->ah = ah;
packet->msg->timeout_ms = packet->mad.hdr.timeout_ms;
- packet->msg->retries = packet->mad.hdr.retries;
+ packet->msg->retries = packet->mad.hdr.retries;
packet->msg->context[0] = packet;
/* Copy MAD header. Any RMPP header is already in place. */
@@ -779,15 +771,11 @@ static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd,
/*
* ib_umad_open() does not need the BKL:
*
- * - umad_port[] accesses are protected by port_lock, the
- * ib_umad_port structures are properly reference counted, and
+ * - the ib_umad_port structures are properly reference counted, and
* everything else is purely local to the file being created, so
* races against other open calls are not a problem;
* - the ioctl method does not affect any global state outside of the
* file structure being operated on;
- * - the port is added to umad_port[] as the last part of module
- * initialization so the open method will either immediately run
- * -ENXIO, or all required initialization will be done.
*/
static int ib_umad_open(struct inode *inode, struct file *filp)
{
@@ -795,13 +783,10 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
struct ib_umad_file *file;
int ret = 0;
- spin_lock(&port_lock);
- port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE];
+ port = container_of(inode->i_cdev, struct ib_umad_port, cdev);
if (port)
kref_get(&port->umad_dev->ref);
- spin_unlock(&port_lock);
-
- if (!port)
+ else
return -ENXIO;
mutex_lock(&port->file_mutex);
@@ -872,16 +857,16 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
}
static const struct file_operations umad_fops = {
- .owner = THIS_MODULE,
- .read = ib_umad_read,
- .write = ib_umad_write,
- .poll = ib_umad_poll,
+ .owner = THIS_MODULE,
+ .read = ib_umad_read,
+ .write = ib_umad_write,
+ .poll = ib_umad_poll,
.unlocked_ioctl = ib_umad_ioctl,
#ifdef CONFIG_COMPAT
- .compat_ioctl = ib_umad_compat_ioctl,
+ .compat_ioctl = ib_umad_compat_ioctl,
#endif
- .open = ib_umad_open,
- .release = ib_umad_close
+ .open = ib_umad_open,
+ .release = ib_umad_close
};
static int ib_umad_sm_open(struct inode *inode, struct file *filp)
@@ -892,13 +877,10 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)
};
int ret;
- spin_lock(&port_lock);
- port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE - IB_UMAD_MAX_PORTS];
+ port = container_of(inode->i_cdev, struct ib_umad_port, sm_cdev);
if (port)
kref_get(&port->umad_dev->ref);
- spin_unlock(&port_lock);
-
- if (!port)
+ else
return -ENXIO;
if (filp->f_flags & O_NONBLOCK) {
@@ -949,8 +931,8 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)
}
static const struct file_operations umad_sm_fops = {
- .owner = THIS_MODULE,
- .open = ib_umad_sm_open,
+ .owner = THIS_MODULE,
+ .open = ib_umad_sm_open,
.release = ib_umad_sm_close
};
@@ -984,22 +966,54 @@ static ssize_t show_port(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
-static ssize_t show_abi_version(struct class *class, char *buf)
+static CLASS_ATTR_STRING(abi_version, S_IRUGO,
+ __stringify(IB_USER_MAD_ABI_VERSION));
+
+static dev_t overflow_maj;
+static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS);
+static int find_overflow_devnum(void)
{
- return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION);
+ int ret;
+
+ if (!overflow_maj) {
+ ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2,
+ "infiniband_mad");
+ if (ret) {
+ printk(KERN_ERR "user_mad: couldn't register dynamic device number\n");
+ return ret;
+ }
+ }
+
+ ret = find_first_zero_bit(overflow_map, IB_UMAD_MAX_PORTS);
+ if (ret >= IB_UMAD_MAX_PORTS)
+ return -1;
+
+ return ret;
}
-static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
static int ib_umad_init_port(struct ib_device *device, int port_num,
struct ib_umad_port *port)
{
+ int devnum;
+ dev_t base;
+
spin_lock(&port_lock);
- port->dev_num = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
- if (port->dev_num >= IB_UMAD_MAX_PORTS) {
+ devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
+ if (devnum >= IB_UMAD_MAX_PORTS) {
spin_unlock(&port_lock);
- return -1;
+ devnum = find_overflow_devnum();
+ if (devnum < 0)
+ return -1;
+
+ spin_lock(&port_lock);
+ port->dev_num = devnum + IB_UMAD_MAX_PORTS;
+ base = devnum + overflow_maj;
+ set_bit(devnum, overflow_map);
+ } else {
+ port->dev_num = devnum;
+ base = devnum + base_dev;
+ set_bit(devnum, dev_map);
}
- set_bit(port->dev_num, dev_map);
spin_unlock(&port_lock);
port->ib_dev = device;
@@ -1008,17 +1022,14 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
mutex_init(&port->file_mutex);
INIT_LIST_HEAD(&port->file_list);
- port->cdev = cdev_alloc();
- if (!port->cdev)
- return -1;
- port->cdev->owner = THIS_MODULE;
- port->cdev->ops = &umad_fops;
- kobject_set_name(&port->cdev->kobj, "umad%d", port->dev_num);
- if (cdev_add(port->cdev, base_dev + port->dev_num, 1))
+ cdev_init(&port->cdev, &umad_fops);
+ port->cdev.owner = THIS_MODULE;
+ kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);
+ if (cdev_add(&port->cdev, base, 1))
goto err_cdev;
port->dev = device_create(umad_class, device->dma_device,
- port->cdev->dev, port,
+ port->cdev.dev, port,
"umad%d", port->dev_num);
if (IS_ERR(port->dev))
goto err_cdev;
@@ -1028,17 +1039,15 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
if (device_create_file(port->dev, &dev_attr_port))
goto err_dev;
- port->sm_cdev = cdev_alloc();
- if (!port->sm_cdev)
- goto err_dev;
- port->sm_cdev->owner = THIS_MODULE;
- port->sm_cdev->ops = &umad_sm_fops;
- kobject_set_name(&port->sm_cdev->kobj, "issm%d", port->dev_num);
- if (cdev_add(port->sm_cdev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1))
+ base += IB_UMAD_MAX_PORTS;
+ cdev_init(&port->sm_cdev, &umad_sm_fops);
+ port->sm_cdev.owner = THIS_MODULE;
+ kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);
+ if (cdev_add(&port->sm_cdev, base, 1))
goto err_sm_cdev;
port->sm_dev = device_create(umad_class, device->dma_device,
- port->sm_cdev->dev, port,
+ port->sm_cdev.dev, port,
"issm%d", port->dev_num);
if (IS_ERR(port->sm_dev))
goto err_sm_cdev;
@@ -1048,24 +1057,23 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
if (device_create_file(port->sm_dev, &dev_attr_port))
goto err_sm_dev;
- spin_lock(&port_lock);
- umad_port[port->dev_num] = port;
- spin_unlock(&port_lock);
-
return 0;
err_sm_dev:
- device_destroy(umad_class, port->sm_cdev->dev);
+ device_destroy(umad_class, port->sm_cdev.dev);
err_sm_cdev:
- cdev_del(port->sm_cdev);
+ cdev_del(&port->sm_cdev);
err_dev:
- device_destroy(umad_class, port->cdev->dev);
+ device_destroy(umad_class, port->cdev.dev);
err_cdev:
- cdev_del(port->cdev);
- clear_bit(port->dev_num, dev_map);
+ cdev_del(&port->cdev);
+ if (port->dev_num < IB_UMAD_MAX_PORTS)
+ clear_bit(devnum, dev_map);
+ else
+ clear_bit(devnum, overflow_map);
return -1;
}
@@ -1079,15 +1087,11 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
dev_set_drvdata(port->dev, NULL);
dev_set_drvdata(port->sm_dev, NULL);
- device_destroy(umad_class, port->cdev->dev);
- device_destroy(umad_class, port->sm_cdev->dev);
+ device_destroy(umad_class, port->cdev.dev);
+ device_destroy(umad_class, port->sm_cdev.dev);
- cdev_del(port->cdev);
- cdev_del(port->sm_cdev);
-
- spin_lock(&port_lock);
- umad_port[port->dev_num] = NULL;
- spin_unlock(&port_lock);
+ cdev_del(&port->cdev);
+ cdev_del(&port->sm_cdev);
mutex_lock(&port->file_mutex);
@@ -1106,7 +1110,10 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
mutex_unlock(&port->file_mutex);
- clear_bit(port->dev_num, dev_map);
+ if (port->dev_num < IB_UMAD_MAX_PORTS)
+ clear_bit(port->dev_num, dev_map);
+ else
+ clear_bit(port->dev_num - IB_UMAD_MAX_PORTS, overflow_map);
}
static void ib_umad_add_one(struct ib_device *device)
@@ -1185,7 +1192,7 @@ static int __init ib_umad_init(void)
goto out_chrdev;
}
- ret = class_create_file(umad_class, &class_attr_abi_version);
+ ret = class_create_file(umad_class, &class_attr_abi_version.attr);
if (ret) {
printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n");
goto out_class;
@@ -1214,6 +1221,8 @@ static void __exit ib_umad_cleanup(void)
ib_unregister_client(&umad_client);
class_destroy(umad_class);
unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
+ if (overflow_maj)
+ unregister_chrdev_region(overflow_maj, IB_UMAD_MAX_PORTS * 2);
}
module_init(ib_umad_init);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index b3ea9587dc80..a078e5624d22 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -41,6 +41,7 @@
#include <linux/idr.h>
#include <linux/mutex.h>
#include <linux/completion.h>
+#include <linux/cdev.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
@@ -69,23 +70,23 @@
struct ib_uverbs_device {
struct kref ref;
+ int num_comp_vectors;
struct completion comp;
- int devnum;
- struct cdev *cdev;
struct device *dev;
struct ib_device *ib_dev;
- int num_comp_vectors;
+ int devnum;
+ struct cdev cdev;
};
struct ib_uverbs_event_file {
struct kref ref;
+ int is_async;
struct ib_uverbs_file *uverbs_file;
spinlock_t lock;
+ int is_closed;
wait_queue_head_t poll_wait;
struct fasync_struct *async_queue;
struct list_head event_list;
- int is_async;
- int is_closed;
};
struct ib_uverbs_file {
@@ -145,7 +146,7 @@ extern struct idr ib_uverbs_srq_idr;
void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
- int is_async, int *fd);
+ int is_async);
struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 56feab6c251e..6fcfbeb24a23 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -35,6 +35,7 @@
#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/slab.h>
#include <asm/uaccess.h>
@@ -285,7 +286,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
ucontext = ibdev->alloc_ucontext(ibdev, &udata);
if (IS_ERR(ucontext)) {
- ret = PTR_ERR(file->ucontext);
+ ret = PTR_ERR(ucontext);
goto err;
}
@@ -301,10 +302,15 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
resp.num_comp_vectors = file->device->num_comp_vectors;
- filp = ib_uverbs_alloc_event_file(file, 1, &resp.async_fd);
+ ret = get_unused_fd();
+ if (ret < 0)
+ goto err_free;
+ resp.async_fd = ret;
+
+ filp = ib_uverbs_alloc_event_file(file, 1);
if (IS_ERR(filp)) {
ret = PTR_ERR(filp);
- goto err_free;
+ goto err_fd;
}
if (copy_to_user((void __user *) (unsigned long) cmd.response,
@@ -332,9 +338,11 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
return in_len;
err_file:
- put_unused_fd(resp.async_fd);
fput(filp);
+err_fd:
+ put_unused_fd(resp.async_fd);
+
err_free:
ibdev->dealloc_ucontext(ucontext);
@@ -715,6 +723,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
struct ib_uverbs_create_comp_channel cmd;
struct ib_uverbs_create_comp_channel_resp resp;
struct file *filp;
+ int ret;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -722,9 +731,16 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- filp = ib_uverbs_alloc_event_file(file, 0, &resp.fd);
- if (IS_ERR(filp))
+ ret = get_unused_fd();
+ if (ret < 0)
+ return ret;
+ resp.fd = ret;
+
+ filp = ib_uverbs_alloc_event_file(file, 0);
+ if (IS_ERR(filp)) {
+ put_unused_fd(resp.fd);
return PTR_ERR(filp);
+ }
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index aec0fbdfe7f0..fb3526254426 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -42,8 +42,9 @@
#include <linux/poll.h>
#include <linux/sched.h>
#include <linux/file.h>
-#include <linux/mount.h>
#include <linux/cdev.h>
+#include <linux/anon_inodes.h>
+#include <linux/slab.h>
#include <asm/uaccess.h>
@@ -53,8 +54,6 @@ MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace verbs access");
MODULE_LICENSE("Dual BSD/GPL");
-#define INFINIBANDEVENTFS_MAGIC 0x49426576 /* "IBev" */
-
enum {
IB_UVERBS_MAJOR = 231,
IB_UVERBS_BASE_MINOR = 192,
@@ -75,44 +74,41 @@ DEFINE_IDR(ib_uverbs_qp_idr);
DEFINE_IDR(ib_uverbs_srq_idr);
static DEFINE_SPINLOCK(map_lock);
-static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES];
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len) = {
- [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
- [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
- [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
- [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
- [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
- [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
- [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
+ [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
+ [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
+ [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
+ [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
+ [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
+ [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
+ [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
[IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
- [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
- [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
- [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
- [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
- [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
- [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
- [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp,
- [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
- [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
- [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
- [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv,
- [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv,
- [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah,
- [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah,
- [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
- [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
- [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
- [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
- [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
- [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
+ [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
+ [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
+ [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
+ [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
+ [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
+ [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
+ [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp,
+ [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
+ [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
+ [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
+ [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv,
+ [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv,
+ [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah,
+ [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah,
+ [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
+ [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
+ [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
+ [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
+ [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
+ [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
};
-static struct vfsmount *uverbs_event_mnt;
-
static void ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device);
@@ -370,7 +366,7 @@ static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
static const struct file_operations uverbs_event_fops = {
.owner = THIS_MODULE,
- .read = ib_uverbs_event_read,
+ .read = ib_uverbs_event_read,
.poll = ib_uverbs_event_poll,
.release = ib_uverbs_event_close,
.fasync = ib_uverbs_event_fasync
@@ -489,11 +485,10 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,
}
struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
- int is_async, int *fd)
+ int is_async)
{
struct ib_uverbs_event_file *ev_file;
struct file *filp;
- int ret;
ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL);
if (!ev_file)
@@ -508,34 +503,12 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
ev_file->is_async = is_async;
ev_file->is_closed = 0;
- *fd = get_unused_fd();
- if (*fd < 0) {
- ret = *fd;
- goto err;
- }
-
- /*
- * fops_get() can't fail here, because we're coming from a
- * system call on a uverbs file, which will already have a
- * module reference.
- */
- filp = alloc_file(uverbs_event_mnt, dget(uverbs_event_mnt->mnt_root),
- FMODE_READ, fops_get(&uverbs_event_fops));
- if (!filp) {
- ret = -ENFILE;
- goto err_fd;
- }
-
- filp->private_data = ev_file;
+ filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops,
+ ev_file, O_RDONLY);
+ if (IS_ERR(filp))
+ kfree(ev_file);
return filp;
-
-err_fd:
- put_unused_fd(*fd);
-
-err:
- kfree(ev_file);
- return ERR_PTR(ret);
}
/*
@@ -612,14 +585,12 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
/*
* ib_uverbs_open() does not need the BKL:
*
- * - dev_table[] accesses are protected by map_lock, the
- * ib_uverbs_device structures are properly reference counted, and
+ * - the ib_uverbs_device structures are properly reference counted and
* everything else is purely local to the file being created, so
* races against other open calls are not a problem;
* - there is no ioctl method to race against;
- * - the device is added to dev_table[] as the last part of module
- * initialization, the open method will either immediately run
- * -ENXIO, or all required initialization will be done.
+ * - the open method will either immediately run -ENXIO, or all
+ * required initialization will be done.
*/
static int ib_uverbs_open(struct inode *inode, struct file *filp)
{
@@ -627,13 +598,10 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
struct ib_uverbs_file *file;
int ret;
- spin_lock(&map_lock);
- dev = dev_table[iminor(inode) - IB_UVERBS_BASE_MINOR];
+ dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
if (dev)
kref_get(&dev->ref);
- spin_unlock(&map_lock);
-
- if (!dev)
+ else
return -ENXIO;
if (!try_module_get(dev->ib_dev->owner)) {
@@ -680,17 +648,17 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
}
static const struct file_operations uverbs_fops = {
- .owner = THIS_MODULE,
- .write = ib_uverbs_write,
- .open = ib_uverbs_open,
+ .owner = THIS_MODULE,
+ .write = ib_uverbs_write,
+ .open = ib_uverbs_open,
.release = ib_uverbs_close
};
static const struct file_operations uverbs_mmap_fops = {
- .owner = THIS_MODULE,
- .write = ib_uverbs_write,
+ .owner = THIS_MODULE,
+ .write = ib_uverbs_write,
.mmap = ib_uverbs_mmap,
- .open = ib_uverbs_open,
+ .open = ib_uverbs_open,
.release = ib_uverbs_close
};
@@ -724,14 +692,41 @@ static ssize_t show_dev_abi_version(struct device *device,
}
static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
-static ssize_t show_abi_version(struct class *class, char *buf)
+static CLASS_ATTR_STRING(abi_version, S_IRUGO,
+ __stringify(IB_USER_VERBS_ABI_VERSION));
+
+static dev_t overflow_maj;
+static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES);
+
+/*
+ * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by
+ * requesting a new major number and doubling the number of max devices we
+ * support. It's stupid, but simple.
+ */
+static int find_overflow_devnum(void)
{
- return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION);
+ int ret;
+
+ if (!overflow_maj) {
+ ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
+ "infiniband_verbs");
+ if (ret) {
+ printk(KERN_ERR "user_verbs: couldn't register dynamic device number\n");
+ return ret;
+ }
+ }
+
+ ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES);
+ if (ret >= IB_UVERBS_MAX_DEVICES)
+ return -1;
+
+ return ret;
}
-static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
static void ib_uverbs_add_one(struct ib_device *device)
{
+ int devnum;
+ dev_t base;
struct ib_uverbs_device *uverbs_dev;
if (!device->alloc_ucontext)
@@ -745,28 +740,36 @@ static void ib_uverbs_add_one(struct ib_device *device)
init_completion(&uverbs_dev->comp);
spin_lock(&map_lock);
- uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
- if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) {
+ devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
+ if (devnum >= IB_UVERBS_MAX_DEVICES) {
spin_unlock(&map_lock);
- goto err;
+ devnum = find_overflow_devnum();
+ if (devnum < 0)
+ goto err;
+
+ spin_lock(&map_lock);
+ uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES;
+ base = devnum + overflow_maj;
+ set_bit(devnum, overflow_map);
+ } else {
+ uverbs_dev->devnum = devnum;
+ base = devnum + IB_UVERBS_BASE_DEV;
+ set_bit(devnum, dev_map);
}
- set_bit(uverbs_dev->devnum, dev_map);
spin_unlock(&map_lock);
uverbs_dev->ib_dev = device;
uverbs_dev->num_comp_vectors = device->num_comp_vectors;
- uverbs_dev->cdev = cdev_alloc();
- if (!uverbs_dev->cdev)
- goto err;
- uverbs_dev->cdev->owner = THIS_MODULE;
- uverbs_dev->cdev->ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
- kobject_set_name(&uverbs_dev->cdev->kobj, "uverbs%d", uverbs_dev->devnum);
- if (cdev_add(uverbs_dev->cdev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1))
+ cdev_init(&uverbs_dev->cdev, NULL);
+ uverbs_dev->cdev.owner = THIS_MODULE;
+ uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
+ kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
+ if (cdev_add(&uverbs_dev->cdev, base, 1))
goto err_cdev;
uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
- uverbs_dev->cdev->dev, uverbs_dev,
+ uverbs_dev->cdev.dev, uverbs_dev,
"uverbs%d", uverbs_dev->devnum);
if (IS_ERR(uverbs_dev->dev))
goto err_cdev;
@@ -776,20 +779,19 @@ static void ib_uverbs_add_one(struct ib_device *device)
if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
goto err_class;
- spin_lock(&map_lock);
- dev_table[uverbs_dev->devnum] = uverbs_dev;
- spin_unlock(&map_lock);
-
ib_set_client_data(device, &uverbs_client, uverbs_dev);
return;
err_class:
- device_destroy(uverbs_class, uverbs_dev->cdev->dev);
+ device_destroy(uverbs_class, uverbs_dev->cdev.dev);
err_cdev:
- cdev_del(uverbs_dev->cdev);
- clear_bit(uverbs_dev->devnum, dev_map);
+ cdev_del(&uverbs_dev->cdev);
+ if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
+ clear_bit(devnum, dev_map);
+ else
+ clear_bit(devnum, overflow_map);
err:
kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
@@ -806,35 +808,19 @@ static void ib_uverbs_remove_one(struct ib_device *device)
return;
dev_set_drvdata(uverbs_dev->dev, NULL);
- device_destroy(uverbs_class, uverbs_dev->cdev->dev);
- cdev_del(uverbs_dev->cdev);
+ device_destroy(uverbs_class, uverbs_dev->cdev.dev);
+ cdev_del(&uverbs_dev->cdev);
- spin_lock(&map_lock);
- dev_table[uverbs_dev->devnum] = NULL;
- spin_unlock(&map_lock);
-
- clear_bit(uverbs_dev->devnum, dev_map);
+ if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
+ clear_bit(uverbs_dev->devnum, dev_map);
+ else
+ clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
wait_for_completion(&uverbs_dev->comp);
kfree(uverbs_dev);
}
-static int uverbs_event_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data,
- struct vfsmount *mnt)
-{
- return get_sb_pseudo(fs_type, "infinibandevent:", NULL,
- INFINIBANDEVENTFS_MAGIC, mnt);
-}
-
-static struct file_system_type uverbs_event_fs = {
- /* No owner field so module can be unloaded */
- .name = "infinibandeventfs",
- .get_sb = uverbs_event_get_sb,
- .kill_sb = kill_litter_super
-};
-
static int __init ib_uverbs_init(void)
{
int ret;
@@ -853,39 +839,20 @@ static int __init ib_uverbs_init(void)
goto out_chrdev;
}
- ret = class_create_file(uverbs_class, &class_attr_abi_version);
+ ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);
if (ret) {
printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n");
goto out_class;
}
- ret = register_filesystem(&uverbs_event_fs);
- if (ret) {
- printk(KERN_ERR "user_verbs: couldn't register infinibandeventfs\n");
- goto out_class;
- }
-
- uverbs_event_mnt = kern_mount(&uverbs_event_fs);
- if (IS_ERR(uverbs_event_mnt)) {
- ret = PTR_ERR(uverbs_event_mnt);
- printk(KERN_ERR "user_verbs: couldn't mount infinibandeventfs\n");
- goto out_fs;
- }
-
ret = ib_register_client(&uverbs_client);
if (ret) {
printk(KERN_ERR "user_verbs: couldn't register client\n");
- goto out_mnt;
+ goto out_class;
}
return 0;
-out_mnt:
- mntput(uverbs_event_mnt);
-
-out_fs:
- unregister_filesystem(&uverbs_event_fs);
-
out_class:
class_destroy(uverbs_class);
@@ -899,10 +866,10 @@ out:
static void __exit ib_uverbs_cleanup(void)
{
ib_unregister_client(&uverbs_client);
- mntput(uverbs_event_mnt);
- unregister_filesystem(&uverbs_event_fs);
class_destroy(uverbs_class);
unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
+ if (overflow_maj)
+ unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
idr_destroy(&ib_uverbs_pd_idr);
idr_destroy(&ib_uverbs_mr_idr);
idr_destroy(&ib_uverbs_mw_idr);