diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-28 11:04:16 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-28 11:04:16 -0700 |
commit | 1cbe06c3cf542d48eb22180163e00f91760ef8cd (patch) | |
tree | ba093bf9e32790950b99bfec838a0354df5bf0dc /drivers/infiniband/core/sysfs.c | |
parent | ed2608faa0f701b1dbc65277a9e5c7ff7118bfd4 (diff) | |
parent | 7a226f9c32b0481b0744e2726cd7f8349b866af5 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull more rdma updates from Doug Ledford:
"This is the second group of code for the 4.7 merge window. It looks
large, but only in one sense. I'll get to that in a minute. The list
of changes here breaks down as follows:
- Dynamic counter infrastructure in the IB drivers
This is a sysfs based code to allow free form access to the
hardware counters RDMA devices might support so drivers don't need
to code this up repeatedly themselves
- SendOnlyFullMember multicast support
- IB router support
- A couple misc fixes
- The big item on the list: hfi1 driver updates, plus moving the hfi1
driver out of staging
There was a group of 15 patches in the hfi1 list that I thought I had
in the first pull request but they weren't. So that added to the
length of the hfi1 section here.
As far as these go, everything but the hfi1 is pretty straight
forward.
The hfi1 is, if you recall, the driver that Al had complaints about
how it used the write/writev interfaces in an overloaded fashion. The
write portion of their interface behaved like the write handler in the
IB stack proper and did bi-directional communications. The writev
interface, on the other hand, only accepts SDMA request structures.
The completions for those structures are sent back via an entirely
different event mechanism.
With the security patch, we put security checks on the write
interface, however, we also knew they would be going away soon. Now,
we've converted the write handler in the hfi1 driver to use ioctls
from the IB reserved magic area for its bidirectional communications.
With that change, Intel has addressed all of the items originally on
their TODO when they went into staging (as well as many items added to
the list later).
As such, I moved them out, and since they were the last item in the
staging/rdma directory, and I don't have immediate plans to use the
staging area again, I removed the staging/rdma area.
Because of the move out of staging, as well as a series of 5 patches
in the hfi1 driver that removed code people thought should be done in
a different way and was optional to begin with (a snoop debug
interface, an eeprom driver for an eeprom connected directory to their
hfi1 chip and not via an i2c bus, and a few other things like that),
the line count, especially the removal count, is high"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (56 commits)
staging/rdma: Remove the entire rdma subdirectory of staging
IB/core: Make device counter infrastructure dynamic
IB/hfi1: Fix pio map initialization
IB/hfi1: Correct 8051 link parameter settings
IB/hfi1: Update pkey table properly after link down or FM start
IB/rdamvt: Fix rdmavt s_ack_queue sizing
IB/rdmavt: Max atomic value should be a u8
IB/hfi1: Fix hard lockup due to not using save/restore spin lock
IB/hfi1: Add tracing support for send with invalidate opcode
IB/hfi1, qib: Add ieth to the packet header definitions
IB/hfi1: Move driver out of staging
IB/hfi1: Do not free hfi1 cdev parent structure early
IB/hfi1: Add trace message in user IOCTL handling
IB/hfi1: Remove write(), use ioctl() for user cmds
IB/hfi1: Add ioctl() interface for user commands
IB/hfi1: Remove unused user command
IB/hfi1: Remove snoop/diag interface
IB/hfi1: Remove EPROM functionality from data device
IB/hfi1: Remove UI char device
IB/hfi1: Remove multiple device cdev
...
Diffstat (limited to 'drivers/infiniband/core/sysfs.c')
-rw-r--r-- | drivers/infiniband/core/sysfs.c | 366 |
1 files changed, 242 insertions, 124 deletions
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 14606afbfaa8..5e573bb18660 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -56,8 +56,10 @@ struct ib_port { struct gid_attr_group *gid_attr_group; struct attribute_group gid_group; struct attribute_group pkey_group; - u8 port_num; struct attribute_group *pma_table; + struct attribute_group *hw_stats_ag; + struct rdma_hw_stats *hw_stats; + u8 port_num; }; struct port_attribute { @@ -80,6 +82,18 @@ struct port_table_attribute { __be16 attr_id; }; +struct hw_stats_attribute { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, + struct attribute *attr, char *buf); + ssize_t (*store)(struct kobject *kobj, + struct attribute *attr, + const char *buf, + size_t count); + int index; + u8 port_num; +}; + static ssize_t port_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -733,6 +747,212 @@ static struct attribute_group *get_counter_table(struct ib_device *dev, return &pma_group; } +static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats, + u8 port_num, int index) +{ + int ret; + + if (time_is_after_eq_jiffies(stats->timestamp + stats->lifespan)) + return 0; + ret = dev->get_hw_stats(dev, stats, port_num, index); + if (ret < 0) + return ret; + if (ret == stats->num_counters) + stats->timestamp = jiffies; + + return 0; +} + +static ssize_t print_hw_stat(struct rdma_hw_stats *stats, int index, char *buf) +{ + return sprintf(buf, "%llu\n", stats->value[index]); +} + +static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct ib_device *dev; + struct ib_port *port; + struct hw_stats_attribute *hsa; + struct rdma_hw_stats *stats; + int ret; + + hsa = container_of(attr, struct hw_stats_attribute, attr); + if (!hsa->port_num) { + dev = container_of((struct device *)kobj, + struct ib_device, dev); + stats = dev->hw_stats; + } else { + port = container_of(kobj, struct ib_port, kobj); + dev = port->ibdev; + stats = port->hw_stats; + } + ret = update_hw_stats(dev, stats, hsa->port_num, hsa->index); + if (ret) + return ret; + return print_hw_stat(stats, hsa->index, buf); +} + +static ssize_t show_stats_lifespan(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct hw_stats_attribute *hsa; + int msecs; + + hsa = container_of(attr, struct hw_stats_attribute, attr); + if (!hsa->port_num) { + struct ib_device *dev = container_of((struct device *)kobj, + struct ib_device, dev); + msecs = jiffies_to_msecs(dev->hw_stats->lifespan); + } else { + struct ib_port *p = container_of(kobj, struct ib_port, kobj); + msecs = jiffies_to_msecs(p->hw_stats->lifespan); + } + return sprintf(buf, "%d\n", msecs); +} + +static ssize_t set_stats_lifespan(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t count) +{ + struct hw_stats_attribute *hsa; + int msecs; + int jiffies; + int ret; + + ret = kstrtoint(buf, 10, &msecs); + if (ret) + return ret; + if (msecs < 0 || msecs > 10000) + return -EINVAL; + jiffies = msecs_to_jiffies(msecs); + hsa = container_of(attr, struct hw_stats_attribute, attr); + if (!hsa->port_num) { + struct ib_device *dev = container_of((struct device *)kobj, + struct ib_device, dev); + dev->hw_stats->lifespan = jiffies; + } else { + struct ib_port *p = container_of(kobj, struct ib_port, kobj); + p->hw_stats->lifespan = jiffies; + } + return count; +} + +static void free_hsag(struct kobject *kobj, struct attribute_group *attr_group) +{ + struct attribute **attr; + + sysfs_remove_group(kobj, attr_group); + + for (attr = attr_group->attrs; *attr; attr++) + kfree(*attr); + kfree(attr_group); +} + +static struct attribute *alloc_hsa(int index, u8 port_num, const char *name) +{ + struct hw_stats_attribute *hsa; + + hsa = kmalloc(sizeof(*hsa), GFP_KERNEL); + if (!hsa) + return NULL; + + hsa->attr.name = (char *)name; + hsa->attr.mode = S_IRUGO; + hsa->show = show_hw_stats; + hsa->store = NULL; + hsa->index = index; + hsa->port_num = port_num; + + return &hsa->attr; +} + +static struct attribute *alloc_hsa_lifespan(char *name, u8 port_num) +{ + struct hw_stats_attribute *hsa; + + hsa = kmalloc(sizeof(*hsa), GFP_KERNEL); + if (!hsa) + return NULL; + + hsa->attr.name = name; + hsa->attr.mode = S_IWUSR | S_IRUGO; + hsa->show = show_stats_lifespan; + hsa->store = set_stats_lifespan; + hsa->index = 0; + hsa->port_num = port_num; + + return &hsa->attr; +} + +static void setup_hw_stats(struct ib_device *device, struct ib_port *port, + u8 port_num) +{ + struct attribute_group *hsag = NULL; + struct rdma_hw_stats *stats; + int i = 0, ret; + + stats = device->alloc_hw_stats(device, port_num); + + if (!stats) + return; + + if (!stats->names || stats->num_counters <= 0) + goto err; + + hsag = kzalloc(sizeof(*hsag) + + // 1 extra for the lifespan config entry + sizeof(void *) * (stats->num_counters + 1), + GFP_KERNEL); + if (!hsag) + return; + + ret = device->get_hw_stats(device, stats, port_num, + stats->num_counters); + if (ret != stats->num_counters) + goto err; + + stats->timestamp = jiffies; + + hsag->name = "hw_counters"; + hsag->attrs = (void *)hsag + sizeof(*hsag); + + for (i = 0; i < stats->num_counters; i++) { + hsag->attrs[i] = alloc_hsa(i, port_num, stats->names[i]); + if (!hsag->attrs[i]) + goto err; + } + + /* treat an error here as non-fatal */ + hsag->attrs[i] = alloc_hsa_lifespan("lifespan", port_num); + + if (port) { + struct kobject *kobj = &port->kobj; + ret = sysfs_create_group(kobj, hsag); + if (ret) + goto err; + port->hw_stats_ag = hsag; + port->hw_stats = stats; + } else { + struct kobject *kobj = &device->dev.kobj; + ret = sysfs_create_group(kobj, hsag); + if (ret) + goto err; + device->hw_stats_ag = hsag; + device->hw_stats = stats; + } + + return; + +err: + kfree(stats); + for (; i >= 0; i--) + kfree(hsag->attrs[i]); + kfree(hsag); + return; +} + static int add_port(struct ib_device *device, int port_num, int (*port_callback)(struct ib_device *, u8, struct kobject *)) @@ -835,6 +1055,14 @@ static int add_port(struct ib_device *device, int port_num, goto err_remove_pkey; } + /* + * If port == 0, it means we have only one port and the parent + * device, not this port device, should be the holder of the + * hw_counters + */ + if (device->alloc_hw_stats && port_num) + setup_hw_stats(device, p, port_num); + list_add_tail(&p->kobj.entry, &device->port_list); kobject_uevent(&p->kobj, KOBJ_ADD); @@ -972,120 +1200,6 @@ static struct device_attribute *ib_class_attributes[] = { &dev_attr_node_desc }; -/* Show a given an attribute in the statistics group */ -static ssize_t show_protocol_stat(const struct device *device, - struct device_attribute *attr, char *buf, - unsigned offset) -{ - struct ib_device *dev = container_of(device, struct ib_device, dev); - union rdma_protocol_stats stats; - ssize_t ret; - - ret = dev->get_protocol_stats(dev, &stats); - if (ret) - return ret; - - return sprintf(buf, "%llu\n", - (unsigned long long) ((u64 *) &stats)[offset]); -} - -/* generate a read-only iwarp statistics attribute */ -#define IW_STATS_ENTRY(name) \ -static ssize_t show_##name(struct device *device, \ - struct device_attribute *attr, char *buf) \ -{ \ - return show_protocol_stat(device, attr, buf, \ - offsetof(struct iw_protocol_stats, name) / \ - sizeof (u64)); \ -} \ -static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) - -IW_STATS_ENTRY(ipInReceives); -IW_STATS_ENTRY(ipInHdrErrors); -IW_STATS_ENTRY(ipInTooBigErrors); -IW_STATS_ENTRY(ipInNoRoutes); -IW_STATS_ENTRY(ipInAddrErrors); -IW_STATS_ENTRY(ipInUnknownProtos); -IW_STATS_ENTRY(ipInTruncatedPkts); -IW_STATS_ENTRY(ipInDiscards); -IW_STATS_ENTRY(ipInDelivers); -IW_STATS_ENTRY(ipOutForwDatagrams); -IW_STATS_ENTRY(ipOutRequests); -IW_STATS_ENTRY(ipOutDiscards); -IW_STATS_ENTRY(ipOutNoRoutes); -IW_STATS_ENTRY(ipReasmTimeout); -IW_STATS_ENTRY(ipReasmReqds); -IW_STATS_ENTRY(ipReasmOKs); -IW_STATS_ENTRY(ipReasmFails); -IW_STATS_ENTRY(ipFragOKs); -IW_STATS_ENTRY(ipFragFails); -IW_STATS_ENTRY(ipFragCreates); -IW_STATS_ENTRY(ipInMcastPkts); -IW_STATS_ENTRY(ipOutMcastPkts); -IW_STATS_ENTRY(ipInBcastPkts); -IW_STATS_ENTRY(ipOutBcastPkts); -IW_STATS_ENTRY(tcpRtoAlgorithm); -IW_STATS_ENTRY(tcpRtoMin); -IW_STATS_ENTRY(tcpRtoMax); -IW_STATS_ENTRY(tcpMaxConn); -IW_STATS_ENTRY(tcpActiveOpens); -IW_STATS_ENTRY(tcpPassiveOpens); -IW_STATS_ENTRY(tcpAttemptFails); -IW_STATS_ENTRY(tcpEstabResets); -IW_STATS_ENTRY(tcpCurrEstab); -IW_STATS_ENTRY(tcpInSegs); -IW_STATS_ENTRY(tcpOutSegs); -IW_STATS_ENTRY(tcpRetransSegs); -IW_STATS_ENTRY(tcpInErrs); -IW_STATS_ENTRY(tcpOutRsts); - -static struct attribute *iw_proto_stats_attrs[] = { - &dev_attr_ipInReceives.attr, - &dev_attr_ipInHdrErrors.attr, - &dev_attr_ipInTooBigErrors.attr, - &dev_attr_ipInNoRoutes.attr, - &dev_attr_ipInAddrErrors.attr, - &dev_attr_ipInUnknownProtos.attr, - &dev_attr_ipInTruncatedPkts.attr, - &dev_attr_ipInDiscards.attr, - &dev_attr_ipInDelivers.attr, - &dev_attr_ipOutForwDatagrams.attr, - &dev_attr_ipOutRequests.attr, - &dev_attr_ipOutDiscards.attr, - &dev_attr_ipOutNoRoutes.attr, - &dev_attr_ipReasmTimeout.attr, - &dev_attr_ipReasmReqds.attr, - &dev_attr_ipReasmOKs.attr, - &dev_attr_ipReasmFails.attr, - &dev_attr_ipFragOKs.attr, - &dev_attr_ipFragFails.attr, - &dev_attr_ipFragCreates.attr, - &dev_attr_ipInMcastPkts.attr, - &dev_attr_ipOutMcastPkts.attr, - &dev_attr_ipInBcastPkts.attr, - &dev_attr_ipOutBcastPkts.attr, - &dev_attr_tcpRtoAlgorithm.attr, - &dev_attr_tcpRtoMin.attr, - &dev_attr_tcpRtoMax.attr, - &dev_attr_tcpMaxConn.attr, - &dev_attr_tcpActiveOpens.attr, - &dev_attr_tcpPassiveOpens.attr, - &dev_attr_tcpAttemptFails.attr, - &dev_attr_tcpEstabResets.attr, - &dev_attr_tcpCurrEstab.attr, - &dev_attr_tcpInSegs.attr, - &dev_attr_tcpOutSegs.attr, - &dev_attr_tcpRetransSegs.attr, - &dev_attr_tcpInErrs.attr, - &dev_attr_tcpOutRsts.attr, - NULL -}; - -static struct attribute_group iw_stats_group = { - .name = "proto_stats", - .attrs = iw_proto_stats_attrs, -}; - static void free_port_list_attributes(struct ib_device *device) { struct kobject *p, *t; @@ -1093,6 +1207,10 @@ static void free_port_list_attributes(struct ib_device *device) list_for_each_entry_safe(p, t, &device->port_list, entry) { struct ib_port *port = container_of(p, struct ib_port, kobj); list_del(&p->entry); + if (port->hw_stats) { + kfree(port->hw_stats); + free_hsag(&port->kobj, port->hw_stats_ag); + } sysfs_remove_group(p, port->pma_table); sysfs_remove_group(p, &port->pkey_group); sysfs_remove_group(p, &port->gid_group); @@ -1149,11 +1267,8 @@ int ib_device_register_sysfs(struct ib_device *device, } } - if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats) { - ret = sysfs_create_group(&class_dev->kobj, &iw_stats_group); - if (ret) - goto err_put; - } + if (device->alloc_hw_stats) + setup_hw_stats(device, NULL, 0); return 0; @@ -1169,15 +1284,18 @@ err: void ib_device_unregister_sysfs(struct ib_device *device) { - /* Hold kobject until ib_dealloc_device() */ - struct kobject *kobj_dev = kobject_get(&device->dev.kobj); int i; - if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats) - sysfs_remove_group(kobj_dev, &iw_stats_group); + /* Hold kobject until ib_dealloc_device() */ + kobject_get(&device->dev.kobj); free_port_list_attributes(device); + if (device->hw_stats) { + kfree(device->hw_stats); + free_hsag(&device->dev.kobj, device->hw_stats_ag); + } + for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) device_remove_file(&device->dev, ib_class_attributes[i]); |