diff options
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r-- | drivers/infiniband/core/addr.c | 276 | ||||
-rw-r--r-- | drivers/infiniband/core/cm.c | 5 | ||||
-rw-r--r-- | drivers/infiniband/core/cma.c | 131 | ||||
-rw-r--r-- | drivers/infiniband/core/iwcm.c | 1 | ||||
-rw-r--r-- | drivers/infiniband/core/mad.c | 25 | ||||
-rw-r--r-- | drivers/infiniband/core/mad_rmpp.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/core/multicast.c | 1 | ||||
-rw-r--r-- | drivers/infiniband/core/sa_query.c | 6 | ||||
-rw-r--r-- | drivers/infiniband/core/sysfs.c | 3 | ||||
-rw-r--r-- | drivers/infiniband/core/ucm.c | 77 | ||||
-rw-r--r-- | drivers/infiniband/core/ucma.c | 58 | ||||
-rw-r--r-- | drivers/infiniband/core/ud_header.c | 14 | ||||
-rw-r--r-- | drivers/infiniband/core/umem.c | 3 | ||||
-rw-r--r-- | drivers/infiniband/core/user_mad.c | 179 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs.h | 13 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_cmd.c | 28 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_main.c | 257 |
17 files changed, 558 insertions, 521 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index bd07803e9183..0b926e45afe2 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -35,8 +35,8 @@ #include <linux/mutex.h> #include <linux/inetdevice.h> +#include <linux/slab.h> #include <linux/workqueue.h> -#include <linux/if_arp.h> #include <net/arp.h> #include <net/neighbour.h> #include <net/route.h> @@ -92,22 +92,12 @@ EXPORT_SYMBOL(rdma_addr_unregister_client); int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, const unsigned char *dst_dev_addr) { - switch (dev->type) { - case ARPHRD_INFINIBAND: - dev_addr->dev_type = RDMA_NODE_IB_CA; - break; - case ARPHRD_ETHER: - dev_addr->dev_type = RDMA_NODE_RNIC; - break; - default: - return -EADDRNOTAVAIL; - } - + dev_addr->dev_type = dev->type; memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); if (dst_dev_addr) memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); - dev_addr->src_dev = dev; + dev_addr->bound_dev_if = dev->ifindex; return 0; } EXPORT_SYMBOL(rdma_copy_addr); @@ -117,6 +107,15 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) struct net_device *dev; int ret = -EADDRNOTAVAIL; + if (dev_addr->bound_dev_if) { + dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); + if (!dev) + return -ENODEV; + ret = rdma_copy_addr(dev_addr, dev, NULL); + dev_put(dev); + return ret; + } + switch (addr->sa_family) { case AF_INET: dev = ip_dev_find(&init_net, @@ -131,6 +130,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case AF_INET6: + read_lock(&dev_base_lock); for_each_netdev(&init_net, dev) { if (ipv6_chk_addr(&init_net, &((struct sockaddr_in6 *) addr)->sin6_addr, @@ -139,6 +139,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) break; } } + read_unlock(&dev_base_lock); break; #endif } @@ -176,48 +177,9 @@ static void queue_req(struct addr_req *req) mutex_unlock(&lock); } -static void addr_send_arp(struct sockaddr *dst_in) -{ - struct rtable *rt; - struct flowi fl; - - memset(&fl, 0, sizeof fl); - - switch (dst_in->sa_family) { - case AF_INET: - fl.nl_u.ip4_u.daddr = - ((struct sockaddr_in *) dst_in)->sin_addr.s_addr; - - if (ip_route_output_key(&init_net, &rt, &fl)) - return; - - neigh_event_send(rt->u.dst.neighbour, NULL); - ip_rt_put(rt); - break; - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case AF_INET6: - { - struct dst_entry *dst; - - fl.nl_u.ip6_u.daddr = - ((struct sockaddr_in6 *) dst_in)->sin6_addr; - - dst = ip6_route_output(&init_net, NULL, &fl); - if (!dst) - return; - - neigh_event_send(dst->neighbour, NULL); - dst_release(dst); - break; - } -#endif - } -} - -static int addr4_resolve_remote(struct sockaddr_in *src_in, - struct sockaddr_in *dst_in, - struct rdma_dev_addr *addr) +static int addr4_resolve(struct sockaddr_in *src_in, + struct sockaddr_in *dst_in, + struct rdma_dev_addr *addr) { __be32 src_ip = src_in->sin_addr.s_addr; __be32 dst_ip = dst_in->sin_addr.s_addr; @@ -229,10 +191,22 @@ static int addr4_resolve_remote(struct sockaddr_in *src_in, memset(&fl, 0, sizeof fl); fl.nl_u.ip4_u.daddr = dst_ip; fl.nl_u.ip4_u.saddr = src_ip; + fl.oif = addr->bound_dev_if; + ret = ip_route_output_key(&init_net, &rt, &fl); if (ret) goto out; + src_in->sin_family = AF_INET; + src_in->sin_addr.s_addr = rt->rt_src; + + if (rt->idev->dev->flags & IFF_LOOPBACK) { + ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); + if (!ret) + memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); + goto put; + } + /* If the device does ARP internally, return 'done' */ if (rt->idev->dev->flags & IFF_NOARP) { rdma_copy_addr(addr, rt->idev->dev, NULL); @@ -240,21 +214,14 @@ static int addr4_resolve_remote(struct sockaddr_in *src_in, } neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev); - if (!neigh) { + if (!neigh || !(neigh->nud_state & NUD_VALID)) { + neigh_event_send(rt->u.dst.neighbour, NULL); ret = -ENODATA; + if (neigh) + goto release; goto put; } - if (!(neigh->nud_state & NUD_VALID)) { - ret = -ENODATA; - goto release; - } - - if (!src_ip) { - src_in->sin_family = dst_in->sin_family; - src_in->sin_addr.s_addr = rt->rt_src; - } - ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); release: neigh_release(neigh); @@ -265,52 +232,77 @@ out: } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static int addr6_resolve_remote(struct sockaddr_in6 *src_in, - struct sockaddr_in6 *dst_in, - struct rdma_dev_addr *addr) +static int addr6_resolve(struct sockaddr_in6 *src_in, + struct sockaddr_in6 *dst_in, + struct rdma_dev_addr *addr) { struct flowi fl; struct neighbour *neigh; struct dst_entry *dst; - int ret = -ENODATA; + int ret; memset(&fl, 0, sizeof fl); - fl.nl_u.ip6_u.daddr = dst_in->sin6_addr; - fl.nl_u.ip6_u.saddr = src_in->sin6_addr; + ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr); + ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr); + fl.oif = addr->bound_dev_if; dst = ip6_route_output(&init_net, NULL, &fl); - if (!dst) - return ret; + if ((ret = dst->error)) + goto put; + + if (ipv6_addr_any(&fl.fl6_src)) { + ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev, + &fl.fl6_dst, 0, &fl.fl6_src); + if (ret) + goto put; + + src_in->sin6_family = AF_INET6; + ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src); + } + + if (dst->dev->flags & IFF_LOOPBACK) { + ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); + if (!ret) + memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); + goto put; + } + /* If the device does ARP internally, return 'done' */ if (dst->dev->flags & IFF_NOARP) { ret = rdma_copy_addr(addr, dst->dev, NULL); - } else { - neigh = dst->neighbour; - if (neigh && (neigh->nud_state & NUD_VALID)) - ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); + goto put; + } + + neigh = dst->neighbour; + if (!neigh || !(neigh->nud_state & NUD_VALID)) { + neigh_event_send(dst->neighbour, NULL); + ret = -ENODATA; + goto put; } + ret = rdma_copy_addr(addr, dst->dev, neigh->ha); +put: dst_release(dst); return ret; } #else -static int addr6_resolve_remote(struct sockaddr_in6 *src_in, - struct sockaddr_in6 *dst_in, - struct rdma_dev_addr *addr) +static int addr6_resolve(struct sockaddr_in6 *src_in, + struct sockaddr_in6 *dst_in, + struct rdma_dev_addr *addr) { return -EADDRNOTAVAIL; } #endif -static int addr_resolve_remote(struct sockaddr *src_in, - struct sockaddr *dst_in, - struct rdma_dev_addr *addr) +static int addr_resolve(struct sockaddr *src_in, + struct sockaddr *dst_in, + struct rdma_dev_addr *addr) { if (src_in->sa_family == AF_INET) { - return addr4_resolve_remote((struct sockaddr_in *) src_in, + return addr4_resolve((struct sockaddr_in *) src_in, (struct sockaddr_in *) dst_in, addr); } else - return addr6_resolve_remote((struct sockaddr_in6 *) src_in, + return addr6_resolve((struct sockaddr_in6 *) src_in, (struct sockaddr_in6 *) dst_in, addr); } @@ -327,8 +319,7 @@ static void process_req(struct work_struct *work) if (req->status == -ENODATA) { src_in = (struct sockaddr *) &req->src_addr; dst_in = (struct sockaddr *) &req->dst_addr; - req->status = addr_resolve_remote(src_in, dst_in, - req->addr); + req->status = addr_resolve(src_in, dst_in, req->addr); if (req->status && time_after_eq(jiffies, req->timeout)) req->status = -ETIMEDOUT; else if (req->status == -ENODATA) @@ -352,82 +343,6 @@ static void process_req(struct work_struct *work) } } -static int addr_resolve_local(struct sockaddr *src_in, - struct sockaddr *dst_in, - struct rdma_dev_addr *addr) -{ - struct net_device *dev; - int ret; - - switch (dst_in->sa_family) { - case AF_INET: - { - __be32 src_ip = ((struct sockaddr_in *) src_in)->sin_addr.s_addr; - __be32 dst_ip = ((struct sockaddr_in *) dst_in)->sin_addr.s_addr; - - dev = ip_dev_find(&init_net, dst_ip); - if (!dev) - return -EADDRNOTAVAIL; - - if (ipv4_is_zeronet(src_ip)) { - src_in->sa_family = dst_in->sa_family; - ((struct sockaddr_in *) src_in)->sin_addr.s_addr = dst_ip; - ret = rdma_copy_addr(addr, dev, dev->dev_addr); - } else if (ipv4_is_loopback(src_ip)) { - ret = rdma_translate_ip(dst_in, addr); - if (!ret) - memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); - } else { - ret = rdma_translate_ip(src_in, addr); - if (!ret) - memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); - } - dev_put(dev); - break; - } - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case AF_INET6: - { - struct in6_addr *a; - - for_each_netdev(&init_net, dev) - if (ipv6_chk_addr(&init_net, - &((struct sockaddr_in6 *) dst_in)->sin6_addr, - dev, 1)) - break; - - if (!dev) - return -EADDRNOTAVAIL; - - a = &((struct sockaddr_in6 *) src_in)->sin6_addr; - - if (ipv6_addr_any(a)) { - src_in->sa_family = dst_in->sa_family; - ((struct sockaddr_in6 *) src_in)->sin6_addr = - ((struct sockaddr_in6 *) dst_in)->sin6_addr; - ret = rdma_copy_addr(addr, dev, dev->dev_addr); - } else if (ipv6_addr_loopback(a)) { - ret = rdma_translate_ip(dst_in, addr); - if (!ret) - memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); - } else { - ret = rdma_translate_ip(src_in, addr); - if (!ret) - memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); - } - break; - } -#endif - - default: - ret = -EADDRNOTAVAIL; - break; - } - - return ret; -} - int rdma_resolve_ip(struct rdma_addr_client *client, struct sockaddr *src_addr, struct sockaddr *dst_addr, struct rdma_dev_addr *addr, int timeout_ms, @@ -443,22 +358,28 @@ int rdma_resolve_ip(struct rdma_addr_client *client, if (!req) return -ENOMEM; - if (src_addr) - memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr)); - memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr)); + src_in = (struct sockaddr *) &req->src_addr; + dst_in = (struct sockaddr *) &req->dst_addr; + + if (src_addr) { + if (src_addr->sa_family != dst_addr->sa_family) { + ret = -EINVAL; + goto err; + } + + memcpy(src_in, src_addr, ip_addr_size(src_addr)); + } else { + src_in->sa_family = dst_addr->sa_family; + } + + memcpy(dst_in, dst_addr, ip_addr_size(dst_addr)); req->addr = addr; req->callback = callback; req->context = context; req->client = client; atomic_inc(&client->refcount); - src_in = (struct sockaddr *) &req->src_addr; - dst_in = (struct sockaddr *) &req->dst_addr; - - req->status = addr_resolve_local(src_in, dst_in, addr); - if (req->status == -EADDRNOTAVAIL) - req->status = addr_resolve_remote(src_in, dst_in, addr); - + req->status = addr_resolve(src_in, dst_in, addr); switch (req->status) { case 0: req->timeout = jiffies; @@ -467,15 +388,16 @@ int rdma_resolve_ip(struct rdma_addr_client *client, case -ENODATA: req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; queue_req(req); - addr_send_arp(dst_in); break; default: ret = req->status; atomic_dec(&client->refcount); - kfree(req); - break; + goto err; } return ret; +err: + kfree(req); + return ret; } EXPORT_SYMBOL(rdma_resolve_ip); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 5130fc55b8e2..ad63b79afac1 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -42,6 +42,7 @@ #include <linux/random.h> #include <linux/rbtree.h> #include <linux/spinlock.h> +#include <linux/slab.h> #include <linux/sysfs.h> #include <linux/workqueue.h> #include <linux/kdev_t.h> @@ -3597,7 +3598,7 @@ static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr, atomic_long_read(&group->counter[cm_attr->index])); } -static struct sysfs_ops cm_counter_ops = { +static const struct sysfs_ops cm_counter_ops = { .show = cm_show_counter }; @@ -3693,7 +3694,7 @@ static void cm_add_one(struct ib_device *ib_device) cm_dev->device = device_create(&cm_class, &ib_device->dev, MKDEV(0, 0), NULL, "%s", ib_device->name); - if (!cm_dev->device) { + if (IS_ERR(cm_dev->device)) { kfree(cm_dev); return; } diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 075317884b53..6d777069d86d 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -40,6 +40,7 @@ #include <linux/random.h> #include <linux/idr.h> #include <linux/inetdevice.h> +#include <linux/slab.h> #include <net/tcp.h> #include <net/ipv6.h> @@ -330,17 +331,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv) union ib_gid gid; int ret = -ENODEV; - switch (rdma_node_get_transport(dev_addr->dev_type)) { - case RDMA_TRANSPORT_IB: - ib_addr_get_sgid(dev_addr, &gid); - break; - case RDMA_TRANSPORT_IWARP: - iw_addr_get_sgid(dev_addr, &gid); - break; - default: - return -ENODEV; - } - + rdma_addr_get_sgid(dev_addr, &gid); list_for_each_entry(cma_dev, &dev_list, list) { ret = ib_find_cached_gid(cma_dev->device, &gid, &id_priv->id.port_num, NULL); @@ -1032,11 +1023,17 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, if (rt->num_paths == 2) rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; - ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); - ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr, - &id->route.addr.dev_addr); - if (ret) - goto destroy_id; + if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) { + rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; + rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); + ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey); + } else { + ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr, + &rt->addr.dev_addr); + if (ret) + goto destroy_id; + } + rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); id_priv = container_of(id, struct rdma_id_private, id); id_priv->state = CMA_CONNECT; @@ -1071,10 +1068,12 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, cma_save_net_info(&id->route.addr, &listen_id->route.addr, ip_ver, port, src, dst); - ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr, - &id->route.addr.dev_addr); - if (ret) - goto err; + if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) { + ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr, + &id->route.addr.dev_addr); + if (ret) + goto err; + } id_priv = container_of(id, struct rdma_id_private, id); id_priv->state = CMA_CONNECT; @@ -1474,15 +1473,6 @@ static void cma_listen_on_all(struct rdma_id_private *id_priv) mutex_unlock(&lock); } -static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af) -{ - struct sockaddr_storage addr_in; - - memset(&addr_in, 0, sizeof addr_in); - addr_in.ss_family = af; - return rdma_bind_addr(id, (struct sockaddr *) &addr_in); -} - int rdma_listen(struct rdma_cm_id *id, int backlog) { struct rdma_id_private *id_priv; @@ -1490,7 +1480,8 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) id_priv = container_of(id, struct rdma_id_private, id); if (id_priv->state == CMA_IDLE) { - ret = cma_bind_any(id, AF_INET); + ((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET; + ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr); if (ret) return ret; } @@ -1565,8 +1556,8 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, struct sockaddr_in6 *sin6; memset(&path_rec, 0, sizeof path_rec); - ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid); - ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid); + rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid); + rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid); path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr)); path_rec.numb_path = 1; path_rec.reversible = 1; @@ -1693,6 +1684,7 @@ int rdma_set_ib_paths(struct rdma_cm_id *id, } memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths); + id->route.num_paths = num_paths; return 0; err: cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED); @@ -1781,7 +1773,11 @@ port_found: if (ret) goto out; - ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); + id_priv->id.route.addr.dev_addr.dev_type = + (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB) ? + ARPHRD_INFINIBAND : ARPHRD_ETHER; + + rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); id_priv->id.port_num = p; cma_attach_to_dev(id_priv, cma_dev); @@ -1839,7 +1835,7 @@ out: static int cma_resolve_loopback(struct rdma_id_private *id_priv) { struct cma_work *work; - struct sockaddr_in *src_in, *dst_in; + struct sockaddr *src, *dst; union ib_gid gid; int ret; @@ -1853,14 +1849,19 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv) goto err; } - ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); - ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); + rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); + rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); - if (cma_zero_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)) { - src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr; - dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr; - src_in->sin_family = dst_in->sin_family; - src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr; + src = (struct sockaddr *) &id_priv->id.route.addr.src_addr; + if (cma_zero_addr(src)) { + dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr; + if ((src->sa_family = dst->sa_family) == AF_INET) { + ((struct sockaddr_in *) src)->sin_addr.s_addr = + ((struct sockaddr_in *) dst)->sin_addr.s_addr; + } else { + ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr, + &((struct sockaddr_in6 *) dst)->sin6_addr); + } } work->id = id_priv; @@ -1878,10 +1879,14 @@ err: static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, struct sockaddr *dst_addr) { - if (src_addr && src_addr->sa_family) - return rdma_bind_addr(id, src_addr); - else - return cma_bind_any(id, dst_addr->sa_family); + if (!src_addr || !src_addr->sa_family) { + src_addr = (struct sockaddr *) &id->route.addr.src_addr; + if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) { + ((struct sockaddr_in6 *) src_addr)->sin6_scope_id = + ((struct sockaddr_in6 *) dst_addr)->sin6_scope_id; + } + } + return rdma_bind_addr(id, src_addr); } int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, @@ -2077,6 +2082,25 @@ static int cma_get_port(struct rdma_id_private *id_priv) return ret; } +static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, + struct sockaddr *addr) +{ +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct sockaddr_in6 *sin6; + + if (addr->sa_family != AF_INET6) + return 0; + + sin6 = (struct sockaddr_in6 *) addr; + if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) && + !sin6->sin6_scope_id) + return -EINVAL; + + dev_addr->bound_dev_if = sin6->sin6_scope_id; +#endif + return 0; +} + int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) { struct rdma_id_private *id_priv; @@ -2089,6 +2113,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND)) return -EINVAL; + ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); + if (ret) + goto err1; + if (!cma_any_addr(addr)) { ret = rdma_translate_ip(addr, &id->route.addr.dev_addr); if (ret) @@ -2108,7 +2136,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) return 0; err2: - if (!cma_any_addr(addr)) { + if (id_priv->cma_dev) { mutex_lock(&lock); cma_detach_from_dev(id_priv); mutex_unlock(&lock); @@ -2687,10 +2715,15 @@ static void cma_set_mgid(struct rdma_id_private *id_priv, if (cma_any_addr(addr)) { memset(mgid, 0, sizeof *mgid); } else if ((addr->sa_family == AF_INET6) && - ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) == + ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) == 0xFF10A01B)) { /* IPv6 address is an SA assigned MGID. */ memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); + } else if ((addr->sa_family == AF_INET6)) { + ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); + if (id_priv->id.ps == RDMA_PS_UDP) + mc_map[7] = 0x01; /* Use RDMA CM signature */ + *mgid = *(union ib_gid *) (mc_map + 4); } else { ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); if (id_priv->id.ps == RDMA_PS_UDP) @@ -2716,7 +2749,7 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); if (id_priv->id.ps == RDMA_PS_UDP) rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); - ib_addr_get_sgid(dev_addr, &rec.port_gid); + rdma_addr_get_sgid(dev_addr, &rec.port_gid); rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); rec.join_state = 1; @@ -2815,7 +2848,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id dev_addr = &id_priv->id.route.addr.dev_addr; - if ((dev_addr->src_dev == ndev) && + if ((dev_addr->bound_dev_if == ndev->ifindex) && memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n", ndev->name, &id_priv->id); diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 0f89909abce9..bfead5bc25f6 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -44,6 +44,7 @@ #include <linux/spinlock.h> #include <linux/workqueue.h> #include <linux/completion.h> +#include <linux/slab.h> #include <rdma/iw_cm.h> #include <rdma/ib_addr.h> diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 7522008fda86..1df1194aeba4 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -34,6 +34,7 @@ * */ #include <linux/dma-mapping.h> +#include <linux/slab.h> #include <rdma/ib_cache.h> #include "mad_priv.h" @@ -1193,10 +1194,7 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method, { int i; - for (i = find_first_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS); - i < IB_MGMT_MAX_METHODS; - i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS, - 1+i)) { + for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) { if ((*method)->agent[i]) { printk(KERN_ERR PFX "Method %d already in use\n", i); return -EINVAL; @@ -1330,13 +1328,9 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, goto error3; /* Finally, add in methods being registered */ - for (i = find_first_bit(mad_reg_req->method_mask, - IB_MGMT_MAX_METHODS); - i < IB_MGMT_MAX_METHODS; - i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS, - 1+i)) { + for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) (*method)->agent[i] = agent_priv; - } + return 0; error3: @@ -1429,13 +1423,9 @@ check_in_use: goto error4; /* Finally, add in methods being registered */ - for (i = find_first_bit(mad_reg_req->method_mask, - IB_MGMT_MAX_METHODS); - i < IB_MGMT_MAX_METHODS; - i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS, - 1+i)) { + for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) (*method)->agent[i] = agent_priv; - } + return 0; error4: @@ -2964,6 +2954,9 @@ static void ib_mad_remove_device(struct ib_device *device) { int i, num_ports, cur_port; + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + if (device->node_type == RDMA_NODE_IB_SWITCH) { num_ports = 1; cur_port = 0; diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index 4e0f2829e0e5..f37878c9c06e 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -31,6 +31,8 @@ * SOFTWARE. */ +#include <linux/slab.h> + #include "mad_priv.h" #include "mad_rmpp.h" diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 8d82ba171353..a519801dcfb7 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -34,6 +34,7 @@ #include <linux/dma-mapping.h> #include <linux/err.h> #include <linux/interrupt.h> +#include <linux/slab.h> #include <linux/bitops.h> #include <linux/random.h> diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 82543716d59e..7e1ffd8ccd5c 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -604,6 +604,12 @@ retry: return ret ? ret : id; } +void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec) +{ + ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec); +} +EXPORT_SYMBOL(ib_sa_unpack_path); + static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 158a214da2f7..f901957abc8b 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -79,7 +79,7 @@ static ssize_t port_attr_show(struct kobject *kobj, return port_attr->show(p, port_attr, buf); } -static struct sysfs_ops port_sysfs_ops = { +static const struct sysfs_ops port_sysfs_ops = { .show = port_attr_show }; @@ -461,6 +461,7 @@ alloc_group_attrs(ssize_t (*show)(struct ib_port *, element->attr.attr.mode = S_IRUGO; element->attr.show = show; element->index = i; + sysfs_attr_init(&element->attr.attr); tab_attr[i] = &element->attr.attr; } diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index f504c9b00c1b..512b1c43460c 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -44,6 +44,7 @@ #include <linux/cdev.h> #include <linux/idr.h> #include <linux/mutex.h> +#include <linux/slab.h> #include <asm/uaccess.h> @@ -1215,15 +1216,18 @@ static void ib_ucm_release_dev(struct device *dev) ucm_dev = container_of(dev, struct ib_ucm_device, dev); cdev_del(&ucm_dev->cdev); - clear_bit(ucm_dev->devnum, dev_map); + if (ucm_dev->devnum < IB_UCM_MAX_DEVICES) + clear_bit(ucm_dev->devnum, dev_map); + else + clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, dev_map); kfree(ucm_dev); } static const struct file_operations ucm_fops = { - .owner = THIS_MODULE, - .open = ib_ucm_open, + .owner = THIS_MODULE, + .open = ib_ucm_open, .release = ib_ucm_close, - .write = ib_ucm_write, + .write = ib_ucm_write, .poll = ib_ucm_poll, }; @@ -1237,8 +1241,32 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); +static dev_t overflow_maj; +static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES); +static int find_overflow_devnum(void) +{ + int ret; + + if (!overflow_maj) { + ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES, + "infiniband_cm"); + if (ret) { + printk(KERN_ERR "ucm: couldn't register dynamic device number\n"); + return ret; + } + } + + ret = find_first_zero_bit(overflow_map, IB_UCM_MAX_DEVICES); + if (ret >= IB_UCM_MAX_DEVICES) + return -1; + + return ret; +} + static void ib_ucm_add_one(struct ib_device *device) { + int devnum; + dev_t base; struct ib_ucm_device *ucm_dev; if (!device->alloc_ucontext || @@ -1251,16 +1279,25 @@ static void ib_ucm_add_one(struct ib_device *device) ucm_dev->ib_dev = device; - ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); - if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES) - goto err; - - set_bit(ucm_dev->devnum, dev_map); + devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); + if (devnum >= IB_UCM_MAX_DEVICES) { + devnum = find_overflow_devnum(); + if (devnum < 0) + goto err; + + ucm_dev->devnum = devnum + IB_UCM_MAX_DEVICES; + base = devnum + overflow_maj; + set_bit(devnum, overflow_map); + } else { + ucm_dev->devnum = devnum; + base = devnum + IB_UCM_BASE_DEV; + set_bit(devnum, dev_map); + } cdev_init(&ucm_dev->cdev, &ucm_fops); ucm_dev->cdev.owner = THIS_MODULE; kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum); - if (cdev_add(&ucm_dev->cdev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1)) + if (cdev_add(&ucm_dev->cdev, base, 1)) goto err; ucm_dev->dev.class = &cm_class; @@ -1281,7 +1318,10 @@ err_dev: device_unregister(&ucm_dev->dev); err_cdev: cdev_del(&ucm_dev->cdev); - clear_bit(ucm_dev->devnum, dev_map); + if (ucm_dev->devnum < IB_UCM_MAX_DEVICES) + clear_bit(devnum, dev_map); + else + clear_bit(devnum, overflow_map); err: kfree(ucm_dev); return; @@ -1297,11 +1337,8 @@ static void ib_ucm_remove_one(struct ib_device *device) device_unregister(&ucm_dev->dev); } -static ssize_t show_abi_version(struct class *class, char *buf) -{ - return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION); -} -static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); +static CLASS_ATTR_STRING(abi_version, S_IRUGO, + __stringify(IB_USER_CM_ABI_VERSION)); static int __init ib_ucm_init(void) { @@ -1314,7 +1351,7 @@ static int __init ib_ucm_init(void) goto error1; } - ret = class_create_file(&cm_class, &class_attr_abi_version); + ret = class_create_file(&cm_class, &class_attr_abi_version.attr); if (ret) { printk(KERN_ERR "ucm: couldn't create abi_version attribute\n"); goto error2; @@ -1328,7 +1365,7 @@ static int __init ib_ucm_init(void) return 0; error3: - class_remove_file(&cm_class, &class_attr_abi_version); + class_remove_file(&cm_class, &class_attr_abi_version.attr); error2: unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); error1: @@ -1338,8 +1375,10 @@ error1: static void __exit ib_ucm_cleanup(void) { ib_unregister_client(&ucm_client); - class_remove_file(&cm_class, &class_attr_abi_version); + class_remove_file(&cm_class, &class_attr_abi_version.attr); unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); + if (overflow_maj) + unregister_chrdev_region(overflow_maj, IB_UCM_MAX_DEVICES); idr_destroy(&ctx_id_table); } diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index bb96d3c4b0f4..46185084121e 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -39,10 +39,12 @@ #include <linux/in.h> #include <linux/in6.h> #include <linux/miscdevice.h> +#include <linux/slab.h> #include <rdma/rdma_user_cm.h> #include <rdma/ib_marshall.h> #include <rdma/rdma_cm.h> +#include <rdma/rdma_cm_ib.h> MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); @@ -562,10 +564,10 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, switch (route->num_paths) { case 0: dev_addr = &route->addr.dev_addr; - ib_addr_get_dgid(dev_addr, - (union ib_gid *) &resp->ib_route[0].dgid); - ib_addr_get_sgid(dev_addr, - (union ib_gid *) &resp->ib_route[0].sgid); + rdma_addr_get_dgid(dev_addr, + (union ib_gid *) &resp->ib_route[0].dgid); + rdma_addr_get_sgid(dev_addr, + (union ib_gid *) &resp->ib_route[0].sgid); resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); break; case 2: @@ -812,6 +814,51 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname, return ret; } +static int ucma_set_ib_path(struct ucma_context *ctx, + struct ib_path_rec_data *path_data, size_t optlen) +{ + struct ib_sa_path_rec sa_path; + struct rdma_cm_event event; + int ret; + + if (optlen % sizeof(*path_data)) + return -EINVAL; + + for (; optlen; optlen -= sizeof(*path_data), path_data++) { + if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY | + IB_PATH_BIDIRECTIONAL)) + break; + } + + if (!optlen) + return -EINVAL; + + ib_sa_unpack_path(path_data->path_rec, &sa_path); + ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1); + if (ret) + return ret; + + memset(&event, 0, sizeof event); + event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; + return ucma_event_handler(ctx->cm_id, &event); +} + +static int ucma_set_option_ib(struct ucma_context *ctx, int optname, + void *optval, size_t optlen) +{ + int ret; + + switch (optname) { + case RDMA_OPTION_IB_PATH: + ret = ucma_set_ib_path(ctx, optval, optlen); + break; + default: + ret = -ENOSYS; + } + + return ret; +} + static int ucma_set_option_level(struct ucma_context *ctx, int level, int optname, void *optval, size_t optlen) { @@ -821,6 +868,9 @@ static int ucma_set_option_level(struct ucma_context *ctx, int level, case RDMA_OPTION_ID: ret = ucma_set_option_id(ctx, optname, optval, optlen); break; + case RDMA_OPTION_IB: + ret = ucma_set_option_ib(ctx, optname, optval, optlen); + break; default: ret = -ENOSYS; } diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c index 8ec7876bedcf..650b501eb142 100644 --- a/drivers/infiniband/core/ud_header.c +++ b/drivers/infiniband/core/ud_header.c @@ -181,6 +181,7 @@ static const struct ib_field deth_table[] = { * ib_ud_header_init - Initialize UD header structure * @payload_bytes:Length of packet payload * @grh_present:GRH flag (if non-zero, GRH will be included) + * @immediate_present: specify if immediate data should be used * @header:Structure to initialize * * ib_ud_header_init() initializes the lrh.link_version, lrh.link_next_header, @@ -191,21 +192,13 @@ static const struct ib_field deth_table[] = { */ void ib_ud_header_init(int payload_bytes, int grh_present, + int immediate_present, struct ib_ud_header *header) { - int header_len; u16 packet_length; memset(header, 0, sizeof *header); - header_len = - IB_LRH_BYTES + - IB_BTH_BYTES + - IB_DETH_BYTES; - if (grh_present) { - header_len += IB_GRH_BYTES; - } - header->lrh.link_version = 0; header->lrh.link_next_header = grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL; @@ -231,7 +224,8 @@ void ib_ud_header_init(int payload_bytes, header->lrh.packet_length = cpu_to_be16(packet_length); - if (header->immediate_present) + header->immediate_present = immediate_present; + if (immediate_present) header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; else header->bth.opcode = IB_OPCODE_UD_SEND_ONLY; diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 6f7c096abf13..415e186eee32 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -37,6 +37,7 @@ #include <linux/sched.h> #include <linux/hugetlb.h> #include <linux/dma-attrs.h> +#include <linux/slab.h> #include "uverbs.h" @@ -136,7 +137,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, down_write(¤t->mm->mmap_sem); locked = npages + current->mm->locked_vm; - lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { ret = -ENOMEM; diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 7de02969ed7d..e7db054fb1c8 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -46,6 +46,7 @@ #include <linux/compat.h> #include <linux/sched.h> #include <linux/semaphore.h> +#include <linux/slab.h> #include <asm/uaccess.h> @@ -65,12 +66,9 @@ enum { }; /* - * Our lifetime rules for these structs are the following: each time a - * device special file is opened, we look up the corresponding struct - * ib_umad_port by minor in the umad_port[] table while holding the - * port_lock. If this lookup succeeds, we take a reference on the - * ib_umad_port's struct ib_umad_device while still holding the - * port_lock; if the lookup fails, we fail the open(). We drop these + * Our lifetime rules for these structs are the following: + * device special file is opened, we take a reference on the + * ib_umad_port's struct ib_umad_device. We drop these * references in the corresponding close(). * * In addition to references coming from open character devices, there @@ -78,19 +76,14 @@ enum { * module's reference taken when allocating the ib_umad_device in * ib_umad_add_one(). * - * When destroying an ib_umad_device, we clear all of its - * ib_umad_ports from umad_port[] while holding port_lock before - * dropping the module's reference to the ib_umad_device. This is - * always safe because any open() calls will either succeed and obtain - * a reference before we clear the umad_port[] entries, or fail after - * we clear the umad_port[] entries. + * When destroying an ib_umad_device, we drop the module's reference. */ struct ib_umad_port { - struct cdev *cdev; + struct cdev cdev; struct device *dev; - struct cdev *sm_cdev; + struct cdev sm_cdev; struct device *sm_dev; struct semaphore sm_sem; @@ -136,7 +129,6 @@ static struct class *umad_class; static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); static DEFINE_SPINLOCK(port_lock); -static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS]; static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS); static void ib_umad_add_one(struct ib_device *device); @@ -496,8 +488,8 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, ah_attr.ah_flags = IB_AH_GRH; memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16); ah_attr.grh.sgid_index = packet->mad.hdr.gid_index; - ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label); - ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit; + ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label); + ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit; ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; } @@ -528,9 +520,9 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, goto err_ah; } - packet->msg->ah = ah; + packet->msg->ah = ah; packet->msg->timeout_ms = packet->mad.hdr.timeout_ms; - packet->msg->retries = packet->mad.hdr.retries; + packet->msg->retries = packet->mad.hdr.retries; packet->msg->context[0] = packet; /* Copy MAD header. Any RMPP header is already in place. */ @@ -779,15 +771,11 @@ static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd, /* * ib_umad_open() does not need the BKL: * - * - umad_port[] accesses are protected by port_lock, the - * ib_umad_port structures are properly reference counted, and + * - the ib_umad_port structures are properly reference counted, and * everything else is purely local to the file being created, so * races against other open calls are not a problem; * - the ioctl method does not affect any global state outside of the * file structure being operated on; - * - the port is added to umad_port[] as the last part of module - * initialization so the open method will either immediately run - * -ENXIO, or all required initialization will be done. */ static int ib_umad_open(struct inode *inode, struct file *filp) { @@ -795,13 +783,10 @@ static int ib_umad_open(struct inode *inode, struct file *filp) struct ib_umad_file *file; int ret = 0; - spin_lock(&port_lock); - port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE]; + port = container_of(inode->i_cdev, struct ib_umad_port, cdev); if (port) kref_get(&port->umad_dev->ref); - spin_unlock(&port_lock); - - if (!port) + else return -ENXIO; mutex_lock(&port->file_mutex); @@ -872,16 +857,16 @@ static int ib_umad_close(struct inode *inode, struct file *filp) } static const struct file_operations umad_fops = { - .owner = THIS_MODULE, - .read = ib_umad_read, - .write = ib_umad_write, - .poll = ib_umad_poll, + .owner = THIS_MODULE, + .read = ib_umad_read, + .write = ib_umad_write, + .poll = ib_umad_poll, .unlocked_ioctl = ib_umad_ioctl, #ifdef CONFIG_COMPAT - .compat_ioctl = ib_umad_compat_ioctl, + .compat_ioctl = ib_umad_compat_ioctl, #endif - .open = ib_umad_open, - .release = ib_umad_close + .open = ib_umad_open, + .release = ib_umad_close }; static int ib_umad_sm_open(struct inode *inode, struct file *filp) @@ -892,13 +877,10 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp) }; int ret; - spin_lock(&port_lock); - port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE - IB_UMAD_MAX_PORTS]; + port = container_of(inode->i_cdev, struct ib_umad_port, sm_cdev); if (port) kref_get(&port->umad_dev->ref); - spin_unlock(&port_lock); - - if (!port) + else return -ENXIO; if (filp->f_flags & O_NONBLOCK) { @@ -949,8 +931,8 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp) } static const struct file_operations umad_sm_fops = { - .owner = THIS_MODULE, - .open = ib_umad_sm_open, + .owner = THIS_MODULE, + .open = ib_umad_sm_open, .release = ib_umad_sm_close }; @@ -984,22 +966,54 @@ static ssize_t show_port(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); -static ssize_t show_abi_version(struct class *class, char *buf) +static CLASS_ATTR_STRING(abi_version, S_IRUGO, + __stringify(IB_USER_MAD_ABI_VERSION)); + +static dev_t overflow_maj; +static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS); +static int find_overflow_devnum(void) { - return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION); + int ret; + + if (!overflow_maj) { + ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2, + "infiniband_mad"); + if (ret) { + printk(KERN_ERR "user_mad: couldn't register dynamic device number\n"); + return ret; + } + } + + ret = find_first_zero_bit(overflow_map, IB_UMAD_MAX_PORTS); + if (ret >= IB_UMAD_MAX_PORTS) + return -1; + + return ret; } -static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); static int ib_umad_init_port(struct ib_device *device, int port_num, struct ib_umad_port *port) { + int devnum; + dev_t base; + spin_lock(&port_lock); - port->dev_num = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); - if (port->dev_num >= IB_UMAD_MAX_PORTS) { + devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); + if (devnum >= IB_UMAD_MAX_PORTS) { spin_unlock(&port_lock); - return -1; + devnum = find_overflow_devnum(); + if (devnum < 0) + return -1; + + spin_lock(&port_lock); + port->dev_num = devnum + IB_UMAD_MAX_PORTS; + base = devnum + overflow_maj; + set_bit(devnum, overflow_map); + } else { + port->dev_num = devnum; + base = devnum + base_dev; + set_bit(devnum, dev_map); } - set_bit(port->dev_num, dev_map); spin_unlock(&port_lock); port->ib_dev = device; @@ -1008,17 +1022,14 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, mutex_init(&port->file_mutex); INIT_LIST_HEAD(&port->file_list); - port->cdev = cdev_alloc(); - if (!port->cdev) - return -1; - port->cdev->owner = THIS_MODULE; - port->cdev->ops = &umad_fops; - kobject_set_name(&port->cdev->kobj, "umad%d", port->dev_num); - if (cdev_add(port->cdev, base_dev + port->dev_num, 1)) + cdev_init(&port->cdev, &umad_fops); + port->cdev.owner = THIS_MODULE; + kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num); + if (cdev_add(&port->cdev, base, 1)) goto err_cdev; port->dev = device_create(umad_class, device->dma_device, - port->cdev->dev, port, + port->cdev.dev, port, "umad%d", port->dev_num); if (IS_ERR(port->dev)) goto err_cdev; @@ -1028,17 +1039,15 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, if (device_create_file(port->dev, &dev_attr_port)) goto err_dev; - port->sm_cdev = cdev_alloc(); - if (!port->sm_cdev) - goto err_dev; - port->sm_cdev->owner = THIS_MODULE; - port->sm_cdev->ops = &umad_sm_fops; - kobject_set_name(&port->sm_cdev->kobj, "issm%d", port->dev_num); - if (cdev_add(port->sm_cdev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1)) + base += IB_UMAD_MAX_PORTS; + cdev_init(&port->sm_cdev, &umad_sm_fops); + port->sm_cdev.owner = THIS_MODULE; + kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num); + if (cdev_add(&port->sm_cdev, base, 1)) goto err_sm_cdev; port->sm_dev = device_create(umad_class, device->dma_device, - port->sm_cdev->dev, port, + port->sm_cdev.dev, port, "issm%d", port->dev_num); if (IS_ERR(port->sm_dev)) goto err_sm_cdev; @@ -1048,24 +1057,23 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, if (device_create_file(port->sm_dev, &dev_attr_port)) goto err_sm_dev; - spin_lock(&port_lock); - umad_port[port->dev_num] = port; - spin_unlock(&port_lock); - return 0; err_sm_dev: - device_destroy(umad_class, port->sm_cdev->dev); + device_destroy(umad_class, port->sm_cdev.dev); err_sm_cdev: - cdev_del(port->sm_cdev); + cdev_del(&port->sm_cdev); err_dev: - device_destroy(umad_class, port->cdev->dev); + device_destroy(umad_class, port->cdev.dev); err_cdev: - cdev_del(port->cdev); - clear_bit(port->dev_num, dev_map); + cdev_del(&port->cdev); + if (port->dev_num < IB_UMAD_MAX_PORTS) + clear_bit(devnum, dev_map); + else + clear_bit(devnum, overflow_map); return -1; } @@ -1079,15 +1087,11 @@ static void ib_umad_kill_port(struct ib_umad_port *port) dev_set_drvdata(port->dev, NULL); dev_set_drvdata(port->sm_dev, NULL); - device_destroy(umad_class, port->cdev->dev); - device_destroy(umad_class, port->sm_cdev->dev); + device_destroy(umad_class, port->cdev.dev); + device_destroy(umad_class, port->sm_cdev.dev); - cdev_del(port->cdev); - cdev_del(port->sm_cdev); - - spin_lock(&port_lock); - umad_port[port->dev_num] = NULL; - spin_unlock(&port_lock); + cdev_del(&port->cdev); + cdev_del(&port->sm_cdev); mutex_lock(&port->file_mutex); @@ -1106,7 +1110,10 @@ static void ib_umad_kill_port(struct ib_umad_port *port) mutex_unlock(&port->file_mutex); - clear_bit(port->dev_num, dev_map); + if (port->dev_num < IB_UMAD_MAX_PORTS) + clear_bit(port->dev_num, dev_map); + else + clear_bit(port->dev_num - IB_UMAD_MAX_PORTS, overflow_map); } static void ib_umad_add_one(struct ib_device *device) @@ -1185,7 +1192,7 @@ static int __init ib_umad_init(void) goto out_chrdev; } - ret = class_create_file(umad_class, &class_attr_abi_version); + ret = class_create_file(umad_class, &class_attr_abi_version.attr); if (ret) { printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n"); goto out_class; @@ -1214,6 +1221,8 @@ static void __exit ib_umad_cleanup(void) ib_unregister_client(&umad_client); class_destroy(umad_class); unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); + if (overflow_maj) + unregister_chrdev_region(overflow_maj, IB_UMAD_MAX_PORTS * 2); } module_init(ib_umad_init); diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index b3ea9587dc80..a078e5624d22 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -41,6 +41,7 @@ #include <linux/idr.h> #include <linux/mutex.h> #include <linux/completion.h> +#include <linux/cdev.h> #include <rdma/ib_verbs.h> #include <rdma/ib_umem.h> @@ -69,23 +70,23 @@ struct ib_uverbs_device { struct kref ref; + int num_comp_vectors; struct completion comp; - int devnum; - struct cdev *cdev; struct device *dev; struct ib_device *ib_dev; - int num_comp_vectors; + int devnum; + struct cdev cdev; }; struct ib_uverbs_event_file { struct kref ref; + int is_async; struct ib_uverbs_file *uverbs_file; spinlock_t lock; + int is_closed; wait_queue_head_t poll_wait; struct fasync_struct *async_queue; struct list_head event_list; - int is_async; - int is_closed; }; struct ib_uverbs_file { @@ -145,7 +146,7 @@ extern struct idr ib_uverbs_srq_idr; void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, - int is_async, int *fd); + int is_async); struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd); void ib_uverbs_release_ucq(struct ib_uverbs_file *file, diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 56feab6c251e..6fcfbeb24a23 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -35,6 +35,7 @@ #include <linux/file.h> #include <linux/fs.h> +#include <linux/slab.h> #include <asm/uaccess.h> @@ -285,7 +286,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, ucontext = ibdev->alloc_ucontext(ibdev, &udata); if (IS_ERR(ucontext)) { - ret = PTR_ERR(file->ucontext); + ret = PTR_ERR(ucontext); goto err; } @@ -301,10 +302,15 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, resp.num_comp_vectors = file->device->num_comp_vectors; - filp = ib_uverbs_alloc_event_file(file, 1, &resp.async_fd); + ret = get_unused_fd(); + if (ret < 0) + goto err_free; + resp.async_fd = ret; + + filp = ib_uverbs_alloc_event_file(file, 1); if (IS_ERR(filp)) { ret = PTR_ERR(filp); - goto err_free; + goto err_fd; } if (copy_to_user((void __user *) (unsigned long) cmd.response, @@ -332,9 +338,11 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, return in_len; err_file: - put_unused_fd(resp.async_fd); fput(filp); +err_fd: + put_unused_fd(resp.async_fd); + err_free: ibdev->dealloc_ucontext(ucontext); @@ -715,6 +723,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, struct ib_uverbs_create_comp_channel cmd; struct ib_uverbs_create_comp_channel_resp resp; struct file *filp; + int ret; if (out_len < sizeof resp) return -ENOSPC; @@ -722,9 +731,16 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - filp = ib_uverbs_alloc_event_file(file, 0, &resp.fd); - if (IS_ERR(filp)) + ret = get_unused_fd(); + if (ret < 0) + return ret; + resp.fd = ret; + + filp = ib_uverbs_alloc_event_file(file, 0); + if (IS_ERR(filp)) { + put_unused_fd(resp.fd); return PTR_ERR(filp); + } if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index aec0fbdfe7f0..fb3526254426 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -42,8 +42,9 @@ #include <linux/poll.h> #include <linux/sched.h> #include <linux/file.h> -#include <linux/mount.h> #include <linux/cdev.h> +#include <linux/anon_inodes.h> +#include <linux/slab.h> #include <asm/uaccess.h> @@ -53,8 +54,6 @@ MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand userspace verbs access"); MODULE_LICENSE("Dual BSD/GPL"); -#define INFINIBANDEVENTFS_MAGIC 0x49426576 /* "IBev" */ - enum { IB_UVERBS_MAJOR = 231, IB_UVERBS_BASE_MINOR = 192, @@ -75,44 +74,41 @@ DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); static DEFINE_SPINLOCK(map_lock); -static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES]; static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) = { - [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, - [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, - [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, - [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, - [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, - [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, - [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, + [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, + [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, + [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, + [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, + [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, + [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, + [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, - [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, - [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq, - [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, - [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, - [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, - [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, - [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp, - [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, - [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, - [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, - [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, - [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, - [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, - [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, - [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, - [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, - [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, - [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, - [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, - [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, + [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, + [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq, + [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, + [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, + [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, + [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, + [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp, + [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, + [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, + [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, + [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, + [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, + [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, + [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, + [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, + [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, + [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, + [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, + [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, + [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, }; -static struct vfsmount *uverbs_event_mnt; - static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device); @@ -370,7 +366,7 @@ static int ib_uverbs_event_close(struct inode *inode, struct file *filp) static const struct file_operations uverbs_event_fops = { .owner = THIS_MODULE, - .read = ib_uverbs_event_read, + .read = ib_uverbs_event_read, .poll = ib_uverbs_event_poll, .release = ib_uverbs_event_close, .fasync = ib_uverbs_event_fasync @@ -489,11 +485,10 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler, } struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, - int is_async, int *fd) + int is_async) { struct ib_uverbs_event_file *ev_file; struct file *filp; - int ret; ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL); if (!ev_file) @@ -508,34 +503,12 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, ev_file->is_async = is_async; ev_file->is_closed = 0; - *fd = get_unused_fd(); - if (*fd < 0) { - ret = *fd; - goto err; - } - - /* - * fops_get() can't fail here, because we're coming from a - * system call on a uverbs file, which will already have a - * module reference. - */ - filp = alloc_file(uverbs_event_mnt, dget(uverbs_event_mnt->mnt_root), - FMODE_READ, fops_get(&uverbs_event_fops)); - if (!filp) { - ret = -ENFILE; - goto err_fd; - } - - filp->private_data = ev_file; + filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops, + ev_file, O_RDONLY); + if (IS_ERR(filp)) + kfree(ev_file); return filp; - -err_fd: - put_unused_fd(*fd); - -err: - kfree(ev_file); - return ERR_PTR(ret); } /* @@ -612,14 +585,12 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) /* * ib_uverbs_open() does not need the BKL: * - * - dev_table[] accesses are protected by map_lock, the - * ib_uverbs_device structures are properly reference counted, and + * - the ib_uverbs_device structures are properly reference counted and * everything else is purely local to the file being created, so * races against other open calls are not a problem; * - there is no ioctl method to race against; - * - the device is added to dev_table[] as the last part of module - * initialization, the open method will either immediately run - * -ENXIO, or all required initialization will be done. + * - the open method will either immediately run -ENXIO, or all + * required initialization will be done. */ static int ib_uverbs_open(struct inode *inode, struct file *filp) { @@ -627,13 +598,10 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) struct ib_uverbs_file *file; int ret; - spin_lock(&map_lock); - dev = dev_table[iminor(inode) - IB_UVERBS_BASE_MINOR]; + dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev); if (dev) kref_get(&dev->ref); - spin_unlock(&map_lock); - - if (!dev) + else return -ENXIO; if (!try_module_get(dev->ib_dev->owner)) { @@ -680,17 +648,17 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) } static const struct file_operations uverbs_fops = { - .owner = THIS_MODULE, - .write = ib_uverbs_write, - .open = ib_uverbs_open, + .owner = THIS_MODULE, + .write = ib_uverbs_write, + .open = ib_uverbs_open, .release = ib_uverbs_close }; static const struct file_operations uverbs_mmap_fops = { - .owner = THIS_MODULE, - .write = ib_uverbs_write, + .owner = THIS_MODULE, + .write = ib_uverbs_write, .mmap = ib_uverbs_mmap, - .open = ib_uverbs_open, + .open = ib_uverbs_open, .release = ib_uverbs_close }; @@ -724,14 +692,41 @@ static ssize_t show_dev_abi_version(struct device *device, } static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); -static ssize_t show_abi_version(struct class *class, char *buf) +static CLASS_ATTR_STRING(abi_version, S_IRUGO, + __stringify(IB_USER_VERBS_ABI_VERSION)); + +static dev_t overflow_maj; +static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES); + +/* + * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by + * requesting a new major number and doubling the number of max devices we + * support. It's stupid, but simple. + */ +static int find_overflow_devnum(void) { - return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION); + int ret; + + if (!overflow_maj) { + ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES, + "infiniband_verbs"); + if (ret) { + printk(KERN_ERR "user_verbs: couldn't register dynamic device number\n"); + return ret; + } + } + + ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES); + if (ret >= IB_UVERBS_MAX_DEVICES) + return -1; + + return ret; } -static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); static void ib_uverbs_add_one(struct ib_device *device) { + int devnum; + dev_t base; struct ib_uverbs_device *uverbs_dev; if (!device->alloc_ucontext) @@ -745,28 +740,36 @@ static void ib_uverbs_add_one(struct ib_device *device) init_completion(&uverbs_dev->comp); spin_lock(&map_lock); - uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); - if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) { + devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); + if (devnum >= IB_UVERBS_MAX_DEVICES) { spin_unlock(&map_lock); - goto err; + devnum = find_overflow_devnum(); + if (devnum < 0) + goto err; + + spin_lock(&map_lock); + uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES; + base = devnum + overflow_maj; + set_bit(devnum, overflow_map); + } else { + uverbs_dev->devnum = devnum; + base = devnum + IB_UVERBS_BASE_DEV; + set_bit(devnum, dev_map); } - set_bit(uverbs_dev->devnum, dev_map); spin_unlock(&map_lock); uverbs_dev->ib_dev = device; uverbs_dev->num_comp_vectors = device->num_comp_vectors; - uverbs_dev->cdev = cdev_alloc(); - if (!uverbs_dev->cdev) - goto err; - uverbs_dev->cdev->owner = THIS_MODULE; - uverbs_dev->cdev->ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; - kobject_set_name(&uverbs_dev->cdev->kobj, "uverbs%d", uverbs_dev->devnum); - if (cdev_add(uverbs_dev->cdev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1)) + cdev_init(&uverbs_dev->cdev, NULL); + uverbs_dev->cdev.owner = THIS_MODULE; + uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; + kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum); + if (cdev_add(&uverbs_dev->cdev, base, 1)) goto err_cdev; uverbs_dev->dev = device_create(uverbs_class, device->dma_device, - uverbs_dev->cdev->dev, uverbs_dev, + uverbs_dev->cdev.dev, uverbs_dev, "uverbs%d", uverbs_dev->devnum); if (IS_ERR(uverbs_dev->dev)) goto err_cdev; @@ -776,20 +779,19 @@ static void ib_uverbs_add_one(struct ib_device *device) if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) goto err_class; - spin_lock(&map_lock); - dev_table[uverbs_dev->devnum] = uverbs_dev; - spin_unlock(&map_lock); - ib_set_client_data(device, &uverbs_client, uverbs_dev); return; err_class: - device_destroy(uverbs_class, uverbs_dev->cdev->dev); + device_destroy(uverbs_class, uverbs_dev->cdev.dev); err_cdev: - cdev_del(uverbs_dev->cdev); - clear_bit(uverbs_dev->devnum, dev_map); + cdev_del(&uverbs_dev->cdev); + if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES) + clear_bit(devnum, dev_map); + else + clear_bit(devnum, overflow_map); err: kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); @@ -806,35 +808,19 @@ static void ib_uverbs_remove_one(struct ib_device *device) return; dev_set_drvdata(uverbs_dev->dev, NULL); - device_destroy(uverbs_class, uverbs_dev->cdev->dev); - cdev_del(uverbs_dev->cdev); + device_destroy(uverbs_class, uverbs_dev->cdev.dev); + cdev_del(&uverbs_dev->cdev); - spin_lock(&map_lock); - dev_table[uverbs_dev->devnum] = NULL; - spin_unlock(&map_lock); - - clear_bit(uverbs_dev->devnum, dev_map); + if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES) + clear_bit(uverbs_dev->devnum, dev_map); + else + clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map); kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); wait_for_completion(&uverbs_dev->comp); kfree(uverbs_dev); } -static int uverbs_event_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, - struct vfsmount *mnt) -{ - return get_sb_pseudo(fs_type, "infinibandevent:", NULL, - INFINIBANDEVENTFS_MAGIC, mnt); -} - -static struct file_system_type uverbs_event_fs = { - /* No owner field so module can be unloaded */ - .name = "infinibandeventfs", - .get_sb = uverbs_event_get_sb, - .kill_sb = kill_litter_super -}; - static int __init ib_uverbs_init(void) { int ret; @@ -853,39 +839,20 @@ static int __init ib_uverbs_init(void) goto out_chrdev; } - ret = class_create_file(uverbs_class, &class_attr_abi_version); + ret = class_create_file(uverbs_class, &class_attr_abi_version.attr); if (ret) { printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n"); goto out_class; } - ret = register_filesystem(&uverbs_event_fs); - if (ret) { - printk(KERN_ERR "user_verbs: couldn't register infinibandeventfs\n"); - goto out_class; - } - - uverbs_event_mnt = kern_mount(&uverbs_event_fs); - if (IS_ERR(uverbs_event_mnt)) { - ret = PTR_ERR(uverbs_event_mnt); - printk(KERN_ERR "user_verbs: couldn't mount infinibandeventfs\n"); - goto out_fs; - } - ret = ib_register_client(&uverbs_client); if (ret) { printk(KERN_ERR "user_verbs: couldn't register client\n"); - goto out_mnt; + goto out_class; } return 0; -out_mnt: - mntput(uverbs_event_mnt); - -out_fs: - unregister_filesystem(&uverbs_event_fs); - out_class: class_destroy(uverbs_class); @@ -899,10 +866,10 @@ out: static void __exit ib_uverbs_cleanup(void) { ib_unregister_client(&uverbs_client); - mntput(uverbs_event_mnt); - unregister_filesystem(&uverbs_event_fs); class_destroy(uverbs_class); unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); + if (overflow_maj) + unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES); idr_destroy(&ib_uverbs_pd_idr); idr_destroy(&ib_uverbs_mr_idr); idr_destroy(&ib_uverbs_mw_idr); |