From fb19d4ad3f9b1ba348bc0855e994805f6083dc7a Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 7 Dec 2009 11:46:28 -0500 Subject: vbus: add a "vbus-proxy" bus model for vbus_driver objects This will generally be used for hypervisors to publish any host-side virtual devices up to a guest. The guest will have the opportunity to consume any devices present on the vbus-proxy as if they were platform devices, similar to existing buses like PCI. Signed-off-by: Gregory Haskins --- drivers/Makefile | 1 + drivers/vbus/Kconfig | 14 +++++ drivers/vbus/Makefile | 3 + drivers/vbus/bus-proxy.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 170 insertions(+) create mode 100644 drivers/vbus/Kconfig create mode 100644 drivers/vbus/Makefile create mode 100644 drivers/vbus/bus-proxy.c (limited to 'drivers') diff --git a/drivers/Makefile b/drivers/Makefile index 6ee53c7a57a1..f0d8b97909d3 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -111,3 +111,4 @@ obj-$(CONFIG_VLYNQ) += vlynq/ obj-$(CONFIG_STAGING) += staging/ obj-y += platform/ obj-y += ieee802154/ +obj-y += vbus/ diff --git a/drivers/vbus/Kconfig b/drivers/vbus/Kconfig new file mode 100644 index 000000000000..e1939f5ae2f7 --- /dev/null +++ b/drivers/vbus/Kconfig @@ -0,0 +1,14 @@ +# +# Virtual-Bus (VBus) driver configuration +# + +config VBUS_PROXY + tristate "Virtual-Bus support" + select SHM_SIGNAL + default n + help + Adds support for a virtual-bus model drivers in a guest to connect + to host side virtual-bus resources. If you are using this kernel + in a virtualization solution which implements virtual-bus devices + on the backend, say Y. If unsure, say N. + diff --git a/drivers/vbus/Makefile b/drivers/vbus/Makefile new file mode 100644 index 000000000000..a29a1e06c60b --- /dev/null +++ b/drivers/vbus/Makefile @@ -0,0 +1,3 @@ + +vbus-proxy-objs += bus-proxy.o +obj-$(CONFIG_VBUS_PROXY) += vbus-proxy.o diff --git a/drivers/vbus/bus-proxy.c b/drivers/vbus/bus-proxy.c new file mode 100644 index 000000000000..3177f9f60d7f --- /dev/null +++ b/drivers/vbus/bus-proxy.c @@ -0,0 +1,152 @@ +/* + * Copyright 2009 Novell. All Rights Reserved. + * + * Author: + * Gregory Haskins + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include + +MODULE_AUTHOR("Gregory Haskins"); +MODULE_LICENSE("GPL"); + +#define VBUS_PROXY_NAME "vbus-proxy" + +static struct vbus_device_proxy *to_dev(struct device *_dev) +{ + return _dev ? container_of(_dev, struct vbus_device_proxy, dev) : NULL; +} + +static struct vbus_driver *to_drv(struct device_driver *_drv) +{ + return container_of(_drv, struct vbus_driver, drv); +} + +/* + * This function is invoked whenever a new driver and/or device is added + * to check if there is a match + */ +static int vbus_dev_proxy_match(struct device *_dev, struct device_driver *_drv) +{ + struct vbus_device_proxy *dev = to_dev(_dev); + struct vbus_driver *drv = to_drv(_drv); + + return !strcmp(dev->type, drv->type); +} + +/* + * This function is invoked after the bus infrastructure has already made a + * match. The device will contain a reference to the paired driver which + * we will extract. + */ +static int vbus_dev_proxy_probe(struct device *_dev) +{ + int ret = 0; + struct vbus_device_proxy *dev = to_dev(_dev); + struct vbus_driver *drv = to_drv(_dev->driver); + + if (drv->ops->probe) + ret = drv->ops->probe(dev); + + return ret; +} + +static struct bus_type vbus_proxy = { + .name = VBUS_PROXY_NAME, + .match = vbus_dev_proxy_match, +}; + +static struct device vbus_proxy_rootdev = { + .parent = NULL, + .init_name = VBUS_PROXY_NAME, +}; + +static int __init vbus_init(void) +{ + int ret; + + ret = bus_register(&vbus_proxy); + BUG_ON(ret < 0); + + ret = device_register(&vbus_proxy_rootdev); + BUG_ON(ret < 0); + + return 0; +} + +postcore_initcall(vbus_init); + +static void device_release(struct device *dev) +{ + struct vbus_device_proxy *_dev; + + _dev = container_of(dev, struct vbus_device_proxy, dev); + + _dev->ops->release(_dev); +} + +int vbus_device_proxy_register(struct vbus_device_proxy *new) +{ + new->dev.parent = &vbus_proxy_rootdev; + new->dev.bus = &vbus_proxy; + new->dev.release = &device_release; + + return device_register(&new->dev); +} +EXPORT_SYMBOL_GPL(vbus_device_proxy_register); + +void vbus_device_proxy_unregister(struct vbus_device_proxy *dev) +{ + device_unregister(&dev->dev); +} +EXPORT_SYMBOL_GPL(vbus_device_proxy_unregister); + +static int match_device_id(struct device *_dev, void *data) +{ + struct vbus_device_proxy *dev = to_dev(_dev); + u64 id = *(u64 *)data; + + return dev->id == id; +} + +struct vbus_device_proxy *vbus_device_proxy_find(u64 id) +{ + struct device *dev; + + dev = bus_find_device(&vbus_proxy, NULL, &id, &match_device_id); + + return to_dev(dev); +} +EXPORT_SYMBOL_GPL(vbus_device_proxy_find); + +int vbus_driver_register(struct vbus_driver *new) +{ + new->drv.bus = &vbus_proxy; + new->drv.name = new->type; + new->drv.owner = new->owner; + new->drv.probe = vbus_dev_proxy_probe; + + return driver_register(&new->drv); +} +EXPORT_SYMBOL_GPL(vbus_driver_register); + +void vbus_driver_unregister(struct vbus_driver *drv) +{ + driver_unregister(&drv->drv); +} +EXPORT_SYMBOL_GPL(vbus_driver_unregister); + -- cgit v1.2.3 From 684f844e40bf2809c3072946738d5d082543f57e Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 7 Dec 2009 11:46:29 -0500 Subject: vbus-proxy: add a pci-to-vbus bridge This patch adds a pci-based bridge driver to interface between the a host VBUS and the guest's vbus-proxy bus model. It completes the guest side notion of a "vbus-connector", and requires a cooresponding host-side connector (in this case, the pci-bridge model) to comlete the connection. Signed-off-by: Gregory Haskins --- drivers/vbus/Kconfig | 10 + drivers/vbus/Makefile | 3 + drivers/vbus/pci-bridge.c | 954 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/Kbuild | 1 + include/linux/vbus_pci.h | 145 +++++++ 5 files changed, 1113 insertions(+) create mode 100644 drivers/vbus/pci-bridge.c create mode 100644 include/linux/vbus_pci.h (limited to 'drivers') diff --git a/drivers/vbus/Kconfig b/drivers/vbus/Kconfig index e1939f5ae2f7..87c545d38fbc 100644 --- a/drivers/vbus/Kconfig +++ b/drivers/vbus/Kconfig @@ -12,3 +12,13 @@ config VBUS_PROXY in a virtualization solution which implements virtual-bus devices on the backend, say Y. If unsure, say N. +config VBUS_PCIBRIDGE + tristate "PCI to Virtual-Bus bridge" + depends on PCI + depends on VBUS_PROXY + select IOQ + default n + help + Provides a way to bridge host side vbus devices via a PCI-BRIDGE + object. If you are running virtualization with vbus devices on the + host, and the vbus is exposed via PCI, say Y. Otherwise, say N. diff --git a/drivers/vbus/Makefile b/drivers/vbus/Makefile index a29a1e06c60b..944b7f1fec90 100644 --- a/drivers/vbus/Makefile +++ b/drivers/vbus/Makefile @@ -1,3 +1,6 @@ vbus-proxy-objs += bus-proxy.o obj-$(CONFIG_VBUS_PROXY) += vbus-proxy.o + +vbus-pcibridge-objs += pci-bridge.o +obj-$(CONFIG_VBUS_PCIBRIDGE) += vbus-pcibridge.o diff --git a/drivers/vbus/pci-bridge.c b/drivers/vbus/pci-bridge.c new file mode 100644 index 000000000000..81f7cdd2167a --- /dev/null +++ b/drivers/vbus/pci-bridge.c @@ -0,0 +1,954 @@ +/* + * Copyright (C) 2009 Novell. All Rights Reserved. + * + * Author: + * Gregory Haskins + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Gregory Haskins"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1"); + +#define VBUS_PCI_NAME "pci-to-vbus-bridge" + +struct vbus_pci { + spinlock_t lock; + struct pci_dev *dev; + struct ioq eventq; + struct vbus_pci_event *ring; + struct vbus_pci_regs *regs; + struct vbus_pci_signals *signals; + int irq; + int enabled:1; + struct { + struct dentry *fs; + int events; + int qnotify; + int qinject; + int notify; + int inject; + int bridgecalls; + int buscalls; + } stats; +}; + +static struct vbus_pci vbus_pci; + +struct vbus_pci_device { + char type[VBUS_MAX_DEVTYPE_LEN]; + u64 handle; + struct list_head shms; + struct vbus_device_proxy vdev; + struct work_struct add; + struct work_struct drop; +}; + +DEFINE_PER_CPU(struct vbus_pci_fastcall_desc, vbus_pci_percpu_fastcall) +____cacheline_aligned; + +/* + * ------------------- + * common routines + * ------------------- + */ + +static int +vbus_pci_bridgecall(unsigned long nr, void *data, unsigned long len) +{ + struct vbus_pci_call_desc params = { + .vector = nr, + .len = len, + .datap = __pa(data), + }; + unsigned long flags; + int ret; + + spin_lock_irqsave(&vbus_pci.lock, flags); + + memcpy_toio(&vbus_pci.regs->bridgecall, ¶ms, sizeof(params)); + ret = ioread32(&vbus_pci.regs->bridgecall); + + spin_unlock_irqrestore(&vbus_pci.lock, flags); + + vbus_pci.stats.bridgecalls++; + + return ret; +} + +static int +vbus_pci_buscall(unsigned long nr, void *data, unsigned long len) +{ + struct vbus_pci_fastcall_desc *params; + int ret; + + preempt_disable(); + + params = &get_cpu_var(vbus_pci_percpu_fastcall); + + params->call.vector = nr; + params->call.len = len; + params->call.datap = __pa(data); + + iowrite32(smp_processor_id(), &vbus_pci.signals->fastcall); + + ret = params->result; + + preempt_enable(); + + vbus_pci.stats.buscalls++; + + return ret; +} + +struct vbus_pci_device * +to_dev(struct vbus_device_proxy *vdev) +{ + return container_of(vdev, struct vbus_pci_device, vdev); +} + +static void +_signal_init(struct shm_signal *signal, struct shm_signal_desc *desc, + struct shm_signal_ops *ops) +{ + desc->magic = SHM_SIGNAL_MAGIC; + desc->ver = SHM_SIGNAL_VER; + + shm_signal_init(signal, shm_locality_north, ops, desc); +} + +/* + * ------------------- + * _signal + * ------------------- + */ + +struct _signal { + struct vbus_pci *pcivbus; + struct shm_signal signal; + u32 handle; + struct rb_node node; + struct list_head list; + struct { + int notify; + int inject; + } stats; +}; + +static struct _signal * +to_signal(struct shm_signal *signal) +{ + return container_of(signal, struct _signal, signal); +} + +static int +_signal_inject(struct shm_signal *signal) +{ + struct _signal *_signal = to_signal(signal); + + vbus_pci.stats.inject++; + _signal->stats.inject++; + iowrite32(_signal->handle, &vbus_pci.signals->shmsignal); + + return 0; +} + +static void +_signal_release(struct shm_signal *signal) +{ + struct _signal *_signal = to_signal(signal); + + kfree(_signal); +} + +static struct shm_signal_ops _signal_ops = { + .inject = _signal_inject, + .release = _signal_release, +}; + +/* + * ------------------- + * vbus_device_proxy routines + * ------------------- + */ + +static int +vbus_pci_device_open(struct vbus_device_proxy *vdev, int version, int flags) +{ + struct vbus_pci_device *dev = to_dev(vdev); + struct vbus_pci_deviceopen params; + int ret; + + if (dev->handle) + return -EINVAL; + + params.devid = vdev->id; + params.version = version; + + ret = vbus_pci_buscall(VBUS_PCI_HC_DEVOPEN, + ¶ms, sizeof(params)); + if (ret < 0) + return ret; + + dev->handle = params.handle; + + return 0; +} + +static int +vbus_pci_device_close(struct vbus_device_proxy *vdev, int flags) +{ + struct vbus_pci_device *dev = to_dev(vdev); + unsigned long iflags; + int ret; + + if (!dev->handle) + return -EINVAL; + + spin_lock_irqsave(&vbus_pci.lock, iflags); + + while (!list_empty(&dev->shms)) { + struct _signal *_signal; + + _signal = list_first_entry(&dev->shms, struct _signal, list); + + list_del(&_signal->list); + + spin_unlock_irqrestore(&vbus_pci.lock, iflags); + shm_signal_put(&_signal->signal); + spin_lock_irqsave(&vbus_pci.lock, iflags); + } + + spin_unlock_irqrestore(&vbus_pci.lock, iflags); + + /* + * The DEVICECLOSE will implicitly close all of the shm on the + * host-side, so there is no need to do an explicit per-shm + * hypercall + */ + ret = vbus_pci_buscall(VBUS_PCI_HC_DEVCLOSE, + &dev->handle, sizeof(dev->handle)); + + if (ret < 0) + printk(KERN_ERR "VBUS-PCI: Error closing device %s/%lld: %d\n", + vdev->type, vdev->id, ret); + + dev->handle = 0; + + return 0; +} + +static int +vbus_pci_device_shm(struct vbus_device_proxy *vdev, int id, int prio, + void *ptr, size_t len, + struct shm_signal_desc *sdesc, struct shm_signal **signal, + int flags) +{ + struct vbus_pci_device *dev = to_dev(vdev); + struct _signal *_signal = NULL; + struct vbus_pci_deviceshm params; + unsigned long iflags; + int ret; + + if (!dev->handle) + return -EINVAL; + + params.devh = dev->handle; + params.id = id; + params.flags = flags; + params.datap = (u64)__pa(ptr); + params.len = len; + + if (signal) { + /* + * The signal descriptor must be embedded within the + * provided ptr + */ + if (!sdesc + || (len < sizeof(*sdesc)) + || ((void *)sdesc < ptr) + || ((void *)sdesc > (ptr + len - sizeof(*sdesc)))) + return -EINVAL; + + _signal = kzalloc(sizeof(*_signal), GFP_KERNEL); + if (!_signal) + return -ENOMEM; + + _signal_init(&_signal->signal, sdesc, &_signal_ops); + + /* + * take another reference for the host. This is dropped + * by a SHMCLOSE event + */ + shm_signal_get(&_signal->signal); + + params.signal.offset = (u64)sdesc - (u64)ptr; + params.signal.prio = prio; + params.signal.cookie = (u64)_signal; + + } else + params.signal.offset = -1; /* yes, this is a u32, but its ok */ + + ret = vbus_pci_buscall(VBUS_PCI_HC_DEVSHM, + ¶ms, sizeof(params)); + if (ret < 0) { + if (_signal) { + /* + * We held two references above, so we need to drop + * both of them + */ + shm_signal_put(&_signal->signal); + shm_signal_put(&_signal->signal); + } + + return ret; + } + + if (signal) { + BUG_ON(ret < 0); + + _signal->handle = ret; + + spin_lock_irqsave(&vbus_pci.lock, iflags); + + list_add_tail(&_signal->list, &dev->shms); + + spin_unlock_irqrestore(&vbus_pci.lock, iflags); + + shm_signal_get(&_signal->signal); + *signal = &_signal->signal; + } + + return 0; +} + +static int +vbus_pci_device_call(struct vbus_device_proxy *vdev, u32 func, void *data, + size_t len, int flags) +{ + struct vbus_pci_device *dev = to_dev(vdev); + struct vbus_pci_devicecall params = { + .devh = dev->handle, + .func = func, + .datap = (u64)__pa(data), + .len = len, + .flags = flags, + }; + + if (!dev->handle) + return -EINVAL; + + return vbus_pci_buscall(VBUS_PCI_HC_DEVCALL, ¶ms, sizeof(params)); +} + +static void +vbus_pci_device_release(struct vbus_device_proxy *vdev) +{ + struct vbus_pci_device *_dev = to_dev(vdev); + + vbus_pci_device_close(vdev, 0); + + kfree(_dev); +} + +struct vbus_device_proxy_ops vbus_pci_device_ops = { + .open = vbus_pci_device_open, + .close = vbus_pci_device_close, + .shm = vbus_pci_device_shm, + .call = vbus_pci_device_call, + .release = vbus_pci_device_release, +}; + +/* + * ------------------- + * vbus events + * ------------------- + */ + +static void +deferred_devadd(struct work_struct *work) +{ + struct vbus_pci_device *new; + int ret; + + new = container_of(work, struct vbus_pci_device, add); + + ret = vbus_device_proxy_register(&new->vdev); + if (ret < 0) + panic("failed to register device %lld(%s): %d\n", + new->vdev.id, new->type, ret); +} + +static void +deferred_devdrop(struct work_struct *work) +{ + struct vbus_pci_device *dev; + + dev = container_of(work, struct vbus_pci_device, drop); + vbus_device_proxy_unregister(&dev->vdev); +} + +static void +event_devadd(struct vbus_pci_add_event *event) +{ + struct vbus_pci_device *new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) { + printk(KERN_ERR "VBUS_PCI: Out of memory on add_event\n"); + return; + } + + INIT_LIST_HEAD(&new->shms); + + memcpy(new->type, event->type, VBUS_MAX_DEVTYPE_LEN); + new->vdev.type = new->type; + new->vdev.id = event->id; + new->vdev.ops = &vbus_pci_device_ops; + + dev_set_name(&new->vdev.dev, "%lld", event->id); + + INIT_WORK(&new->add, deferred_devadd); + INIT_WORK(&new->drop, deferred_devdrop); + + schedule_work(&new->add); +} + +static void +event_devdrop(struct vbus_pci_handle_event *event) +{ + struct vbus_device_proxy *dev = vbus_device_proxy_find(event->handle); + + if (!dev) { + printk(KERN_WARNING "VBUS-PCI: devdrop failed: %lld\n", + event->handle); + return; + } + + schedule_work(&to_dev(dev)->drop); +} + +static void +event_shmsignal(struct vbus_pci_handle_event *event) +{ + struct _signal *_signal = (struct _signal *)event->handle; + + vbus_pci.stats.notify++; + _signal->stats.notify++; + _shm_signal_wakeup(&_signal->signal); +} + +static void +event_shmclose(struct vbus_pci_handle_event *event) +{ + struct _signal *_signal = (struct _signal *)event->handle; + + /* + * This reference was taken during the DEVICESHM call + */ + shm_signal_put(&_signal->signal); +} + +/* + * ------------------- + * eventq routines + * ------------------- + */ + +static struct ioq_notifier eventq_notifier; + +static int __init +eventq_init(int qlen) +{ + struct ioq_iterator iter; + int ret; + int i; + + vbus_pci.ring = kzalloc(sizeof(struct vbus_pci_event) * qlen, + GFP_KERNEL); + if (!vbus_pci.ring) + return -ENOMEM; + + /* + * We want to iterate on the "valid" index. By default the iterator + * will not "autoupdate" which means it will not hypercall the host + * with our changes. This is good, because we are really just + * initializing stuff here anyway. Note that you can always manually + * signal the host with ioq_signal() if the autoupdate feature is not + * used. + */ + ret = ioq_iter_init(&vbus_pci.eventq, &iter, ioq_idxtype_valid, 0); + BUG_ON(ret < 0); + + /* + * Seek to the tail of the valid index (which should be our first + * item since the queue is brand-new) + */ + ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0); + BUG_ON(ret < 0); + + /* + * Now populate each descriptor with an empty vbus_event and mark it + * valid + */ + for (i = 0; i < qlen; i++) { + struct vbus_pci_event *event = &vbus_pci.ring[i]; + size_t len = sizeof(*event); + struct ioq_ring_desc *desc = iter.desc; + + BUG_ON(iter.desc->valid); + + desc->cookie = (u64)event; + desc->ptr = (u64)__pa(event); + desc->len = len; /* total length */ + desc->valid = 1; + + /* + * This push operation will simultaneously advance the + * valid-tail index and increment our position in the queue + * by one. + */ + ret = ioq_iter_push(&iter, 0); + BUG_ON(ret < 0); + } + + vbus_pci.eventq.notifier = &eventq_notifier; + + /* + * And finally, ensure that we can receive notification + */ + ioq_notify_enable(&vbus_pci.eventq, 0); + + return 0; +} + +/* Invoked whenever the hypervisor ioq_signal()s our eventq */ +static void +eventq_wakeup(struct ioq_notifier *notifier) +{ + struct ioq_iterator iter; + int ret; + + /* We want to iterate on the head of the in-use index */ + ret = ioq_iter_init(&vbus_pci.eventq, &iter, ioq_idxtype_inuse, 0); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); + BUG_ON(ret < 0); + + /* + * The EOM is indicated by finding a packet that is still owned by + * the south side. + * + * FIXME: This in theory could run indefinitely if the host keeps + * feeding us events since there is nothing like a NAPI budget. We + * might need to address that + */ + while (!iter.desc->sown) { + struct ioq_ring_desc *desc = iter.desc; + struct vbus_pci_event *event; + + event = (struct vbus_pci_event *)desc->cookie; + + switch (event->eventid) { + case VBUS_PCI_EVENT_DEVADD: + event_devadd(&event->data.add); + break; + case VBUS_PCI_EVENT_DEVDROP: + event_devdrop(&event->data.handle); + break; + case VBUS_PCI_EVENT_SHMSIGNAL: + event_shmsignal(&event->data.handle); + break; + case VBUS_PCI_EVENT_SHMCLOSE: + event_shmclose(&event->data.handle); + break; + default: + printk(KERN_WARNING "VBUS_PCI: Unexpected event %d\n", + event->eventid); + break; + }; + + memset(event, 0, sizeof(*event)); + + /* Advance the in-use head */ + ret = ioq_iter_pop(&iter, 0); + BUG_ON(ret < 0); + + vbus_pci.stats.events++; + } + + /* And let the south side know that we changed the queue */ + ioq_signal(&vbus_pci.eventq, 0); +} + +static struct ioq_notifier eventq_notifier = { + .signal = &eventq_wakeup, +}; + +/* Injected whenever the host issues an ioq_signal() on the eventq */ +irqreturn_t +eventq_intr(int irq, void *dev) +{ + vbus_pci.stats.qnotify++; + _shm_signal_wakeup(vbus_pci.eventq.signal); + + return IRQ_HANDLED; +} + +/* + * ------------------- + */ + +static int +eventq_signal_inject(struct shm_signal *signal) +{ + vbus_pci.stats.qinject++; + + /* The eventq uses the special-case handle=0 */ + iowrite32(0, &vbus_pci.signals->eventq); + + return 0; +} + +static void +eventq_signal_release(struct shm_signal *signal) +{ + kfree(signal); +} + +static struct shm_signal_ops eventq_signal_ops = { + .inject = eventq_signal_inject, + .release = eventq_signal_release, +}; + +/* + * ------------------- + */ + +static void +eventq_ioq_release(struct ioq *ioq) +{ + /* released as part of the vbus_pci object */ +} + +static struct ioq_ops eventq_ioq_ops = { + .release = eventq_ioq_release, +}; + +/* + * ------------------- + */ + +static void +vbus_pci_release(void) +{ +#ifdef CONFIG_DEBUG_FS + if (vbus_pci.stats.fs) + debugfs_remove(vbus_pci.stats.fs); +#endif + + if (vbus_pci.irq > 0) + free_irq(vbus_pci.irq, NULL); + + if (vbus_pci.signals) + pci_iounmap(vbus_pci.dev, (void *)vbus_pci.signals); + + if (vbus_pci.regs) + pci_iounmap(vbus_pci.dev, (void *)vbus_pci.regs); + + pci_release_regions(vbus_pci.dev); + pci_disable_device(vbus_pci.dev); + + kfree(vbus_pci.eventq.head_desc); + kfree(vbus_pci.ring); + + vbus_pci.enabled = false; +} + +static int __init +vbus_pci_open(void) +{ + struct vbus_pci_bridge_negotiate params = { + .magic = VBUS_PCI_ABI_MAGIC, + .version = VBUS_PCI_HC_VERSION, + .capabilities = 0, + }; + + return vbus_pci_bridgecall(VBUS_PCI_BRIDGE_NEGOTIATE, + ¶ms, sizeof(params)); +} + +#define QLEN 1024 + +static int __init +vbus_pci_eventq_register(void) +{ + struct vbus_pci_busreg params = { + .count = 1, + .eventq = { + { + .count = QLEN, + .ring = (u64)__pa(vbus_pci.eventq.head_desc), + .data = (u64)__pa(vbus_pci.ring), + }, + }, + }; + + return vbus_pci_bridgecall(VBUS_PCI_BRIDGE_QREG, + ¶ms, sizeof(params)); +} + +static int __init +_ioq_init(size_t ringsize, struct ioq *ioq, struct ioq_ops *ops) +{ + struct shm_signal *signal = NULL; + struct ioq_ring_head *head = NULL; + size_t len = IOQ_HEAD_DESC_SIZE(ringsize); + + head = kzalloc(len, GFP_KERNEL | GFP_DMA); + if (!head) + return -ENOMEM; + + signal = kzalloc(sizeof(*signal), GFP_KERNEL); + if (!signal) { + kfree(head); + return -ENOMEM; + } + + head->magic = IOQ_RING_MAGIC; + head->ver = IOQ_RING_VER; + head->count = ringsize; + + _signal_init(signal, &head->signal, &eventq_signal_ops); + + ioq_init(ioq, ops, ioq_locality_north, head, signal, ringsize); + + return 0; +} + +#ifdef CONFIG_DEBUG_FS +static int _debugfs_seq_show(struct seq_file *m, void *p) +{ +#define P(F) \ + seq_printf(m, " .%-30s: %d\n", #F, (int)vbus_pci.stats.F) + + P(events); + P(qnotify); + P(qinject); + P(notify); + P(inject); + P(bridgecalls); + P(buscalls); + +#undef P + + return 0; +} + +static int _debugfs_fops_open(struct inode *inode, struct file *file) +{ + return single_open(file, _debugfs_seq_show, inode->i_private); +} + +static const struct file_operations stat_fops = { + .open = _debugfs_fops_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; +#endif + +static int __devinit +vbus_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + int ret; + int cpu; + + if (vbus_pci.enabled) + return -EEXIST; /* we only support one bridge per kernel */ + + if (pdev->revision != VBUS_PCI_ABI_VERSION) { + printk(KERN_DEBUG "VBUS_PCI: expected ABI version %d, got %d\n", + VBUS_PCI_ABI_VERSION, + pdev->revision); + return -ENODEV; + } + + vbus_pci.dev = pdev; + + ret = pci_enable_device(pdev); + if (ret < 0) + return ret; + + ret = pci_request_regions(pdev, VBUS_PCI_NAME); + if (ret < 0) { + printk(KERN_ERR "VBUS_PCI: Could not init BARs: %d\n", ret); + goto out_fail; + } + + vbus_pci.regs = pci_iomap(pdev, 0, sizeof(struct vbus_pci_regs)); + if (!vbus_pci.regs) { + printk(KERN_ERR "VBUS_PCI: Could not map BARs\n"); + goto out_fail; + } + + vbus_pci.signals = pci_iomap(pdev, 1, sizeof(struct vbus_pci_signals)); + if (!vbus_pci.signals) { + printk(KERN_ERR "VBUS_PCI: Could not map BARs\n"); + goto out_fail; + } + + ret = vbus_pci_open(); + if (ret < 0) { + printk(KERN_DEBUG "VBUS_PCI: Could not register with host: %d\n", + ret); + goto out_fail; + } + + /* + * Allocate an IOQ to use for host-2-guest event notification + */ + ret = _ioq_init(QLEN, &vbus_pci.eventq, &eventq_ioq_ops); + if (ret < 0) { + printk(KERN_ERR "VBUS_PCI: Cound not init eventq: %d\n", ret); + goto out_fail; + } + + ret = eventq_init(QLEN); + if (ret < 0) { + printk(KERN_ERR "VBUS_PCI: Cound not setup ring: %d\n", ret); + goto out_fail; + } + + ret = pci_enable_msi(pdev); + if (ret < 0) { + printk(KERN_ERR "VBUS_PCI: Cound not enable MSI: %d\n", ret); + goto out_fail; + } + + vbus_pci.irq = pdev->irq; + + ret = request_irq(pdev->irq, eventq_intr, 0, "vbus", NULL); + if (ret < 0) { + printk(KERN_ERR "VBUS_PCI: Failed to register IRQ %d\n: %d", + pdev->irq, ret); + goto out_fail; + } + + /* + * Add one fastcall vector per cpu so that we can do lockless + * hypercalls + */ + for_each_possible_cpu(cpu) { + struct vbus_pci_fastcall_desc *desc = + &per_cpu(vbus_pci_percpu_fastcall, cpu); + struct vbus_pci_call_desc params = { + .vector = cpu, + .len = sizeof(*desc), + .datap = __pa(desc), + }; + + ret = vbus_pci_bridgecall(VBUS_PCI_BRIDGE_FASTCALL_ADD, + ¶ms, sizeof(params)); + if (ret < 0) { + printk(KERN_ERR \ + "VBUS_PCI: Failed to register cpu:%d\n: %d", + cpu, ret); + goto out_fail; + } + } + + /* + * Finally register our queue on the host to start receiving events + */ + ret = vbus_pci_eventq_register(); + if (ret < 0) { + printk(KERN_ERR "VBUS_PCI: Could not register with host: %d\n", + ret); + goto out_fail; + } + +#ifdef CONFIG_DEBUG_FS + vbus_pci.stats.fs = debugfs_create_file(VBUS_PCI_NAME, S_IRUGO, + NULL, NULL, &stat_fops); + if (IS_ERR(vbus_pci.stats.fs)) { + ret = PTR_ERR(vbus_pci.stats.fs); + printk(KERN_ERR "VBUS_PCI: error creating stats-fs: %d\n", ret); + goto out_fail; + } +#endif + + vbus_pci.enabled = true; + + printk(KERN_INFO "Virtual-Bus: Copyright (c) 2009, " \ + "Gregory Haskins \n"); + + return 0; + + out_fail: + vbus_pci_release(); + + return ret; +} + +static void __devexit +vbus_pci_remove(struct pci_dev *pdev) +{ + vbus_pci_release(); +} + +static DEFINE_PCI_DEVICE_TABLE(vbus_pci_tbl) = { + { PCI_DEVICE(0x11da, 0x2000) }, + { 0 }, +}; + +MODULE_DEVICE_TABLE(pci, vbus_pci_tbl); + +static struct pci_driver vbus_pci_driver = { + .name = VBUS_PCI_NAME, + .id_table = vbus_pci_tbl, + .probe = vbus_pci_probe, + .remove = vbus_pci_remove, +}; + +int __init +vbus_pci_init(void) +{ + memset(&vbus_pci, 0, sizeof(vbus_pci)); + spin_lock_init(&vbus_pci.lock); + + return pci_register_driver(&vbus_pci_driver); +} + +static void __exit +vbus_pci_exit(void) +{ + pci_unregister_driver(&vbus_pci_driver); +} + +module_init(vbus_pci_init); +module_exit(vbus_pci_exit); + diff --git a/include/linux/Kbuild b/include/linux/Kbuild index dbd2994a02e6..ca89bb4e7cb0 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -363,6 +363,7 @@ unifdef-y += uio.h unifdef-y += unistd.h unifdef-y += usbdevice_fs.h unifdef-y += utsname.h +unifdef-y += vbus_pci.h unifdef-y += videodev2.h unifdef-y += videodev.h unifdef-y += virtio_config.h diff --git a/include/linux/vbus_pci.h b/include/linux/vbus_pci.h new file mode 100644 index 000000000000..fe337590e644 --- /dev/null +++ b/include/linux/vbus_pci.h @@ -0,0 +1,145 @@ +/* + * Copyright 2009 Novell. All Rights Reserved. + * + * PCI to Virtual-Bus Bridge + * + * Author: + * Gregory Haskins + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef _LINUX_VBUS_PCI_H +#define _LINUX_VBUS_PCI_H + +#include +#include + +#define VBUS_PCI_ABI_MAGIC 0xbf53eef5 +#define VBUS_PCI_ABI_VERSION 2 +#define VBUS_PCI_HC_VERSION 1 + +enum { + VBUS_PCI_BRIDGE_NEGOTIATE, + VBUS_PCI_BRIDGE_QREG, + VBUS_PCI_BRIDGE_SLOWCALL, + VBUS_PCI_BRIDGE_FASTCALL_ADD, + VBUS_PCI_BRIDGE_FASTCALL_DROP, + + VBUS_PCI_BRIDGE_MAX, /* must be last */ +}; + +enum { + VBUS_PCI_HC_DEVOPEN, + VBUS_PCI_HC_DEVCLOSE, + VBUS_PCI_HC_DEVCALL, + VBUS_PCI_HC_DEVSHM, + + VBUS_PCI_HC_MAX, /* must be last */ +}; + +struct vbus_pci_bridge_negotiate { + __u32 magic; + __u32 version; + __u64 capabilities; +}; + +struct vbus_pci_deviceopen { + __u32 devid; + __u32 version; /* device ABI version */ + __u64 handle; /* return value for devh */ +}; + +struct vbus_pci_devicecall { + __u64 devh; /* device-handle (returned from DEVICEOPEN */ + __u32 func; + __u32 len; + __u32 flags; + __u64 datap; +}; + +struct vbus_pci_deviceshm { + __u64 devh; /* device-handle (returned from DEVICEOPEN */ + __u32 id; + __u32 len; + __u32 flags; + struct { + __u32 offset; + __u32 prio; + __u64 cookie; /* token to pass back when signaling client */ + } signal; + __u64 datap; +}; + +struct vbus_pci_call_desc { + __u32 vector; + __u32 len; + __u64 datap; +}; + +struct vbus_pci_fastcall_desc { + struct vbus_pci_call_desc call; + __u32 result; +}; + +struct vbus_pci_regs { + struct vbus_pci_call_desc bridgecall; + __u8 pad[48]; +}; + +struct vbus_pci_signals { + __u32 eventq; + __u32 fastcall; + __u32 shmsignal; + __u8 pad[20]; +}; + +struct vbus_pci_eventqreg { + __u32 count; + __u64 ring; + __u64 data; +}; + +struct vbus_pci_busreg { + __u32 count; /* supporting multiple queues allows for prio, etc */ + struct vbus_pci_eventqreg eventq[1]; +}; + +enum vbus_pci_eventid { + VBUS_PCI_EVENT_DEVADD, + VBUS_PCI_EVENT_DEVDROP, + VBUS_PCI_EVENT_SHMSIGNAL, + VBUS_PCI_EVENT_SHMCLOSE, +}; + +#define VBUS_MAX_DEVTYPE_LEN 128 + +struct vbus_pci_add_event { + __u64 id; + char type[VBUS_MAX_DEVTYPE_LEN]; +}; + +struct vbus_pci_handle_event { + __u64 handle; +}; + +struct vbus_pci_event { + __u32 eventid; + union { + struct vbus_pci_add_event add; + struct vbus_pci_handle_event handle; + } data; +}; + +#endif /* _LINUX_VBUS_PCI_H */ -- cgit v1.2.3 From 5ff775e4c3d6b0ead7d8784797123a2deed73364 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 7 Dec 2009 11:46:29 -0500 Subject: ioq: add driver-side vbus helpers It will be a common pattern to map an IOQ over the VBUS shared-memory interfaces. Therefore, we provide a helper function to generalize the allocation and registration of an IOQ to make this use case simple and easy. Signed-off-by: Gregory Haskins --- drivers/vbus/bus-proxy.c | 64 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/vbus_driver.h | 7 +++++ 2 files changed, 71 insertions(+) (limited to 'drivers') diff --git a/drivers/vbus/bus-proxy.c b/drivers/vbus/bus-proxy.c index 3177f9f60d7f..88cd9048335f 100644 --- a/drivers/vbus/bus-proxy.c +++ b/drivers/vbus/bus-proxy.c @@ -150,3 +150,67 @@ void vbus_driver_unregister(struct vbus_driver *drv) } EXPORT_SYMBOL_GPL(vbus_driver_unregister); +/* + *--------------------------------- + * driver-side IOQ helper + *--------------------------------- + */ +static void +vbus_driver_ioq_release(struct ioq *ioq) +{ + kfree(ioq->head_desc); + kfree(ioq); +} + +static struct ioq_ops vbus_driver_ioq_ops = { + .release = vbus_driver_ioq_release, +}; + + +int vbus_driver_ioq_alloc(struct vbus_device_proxy *dev, int id, int prio, + size_t count, struct ioq **ioq) +{ + struct ioq *_ioq; + struct ioq_ring_head *head = NULL; + struct shm_signal *signal = NULL; + size_t len = IOQ_HEAD_DESC_SIZE(count); + int ret = -ENOMEM; + + _ioq = kzalloc(sizeof(*_ioq), GFP_KERNEL); + if (!_ioq) + goto error; + + head = kzalloc(len, GFP_KERNEL | GFP_DMA); + if (!head) + goto error; + + head->magic = IOQ_RING_MAGIC; + head->ver = IOQ_RING_VER; + head->count = count; + + ret = dev->ops->shm(dev, id, prio, head, len, + &head->signal, &signal, 0); + if (ret < 0) + goto error; + + ioq_init(_ioq, + &vbus_driver_ioq_ops, + ioq_locality_north, + head, + signal, + count); + + *ioq = _ioq; + + return 0; + + error: + kfree(_ioq); + kfree(head); + + if (signal) + shm_signal_put(signal); + + return ret; +} +EXPORT_SYMBOL_GPL(vbus_driver_ioq_alloc); diff --git a/include/linux/vbus_driver.h b/include/linux/vbus_driver.h index c53e13f17e23..9cfbf60df684 100644 --- a/include/linux/vbus_driver.h +++ b/include/linux/vbus_driver.h @@ -26,6 +26,7 @@ #include #include +#include struct vbus_device_proxy; struct vbus_driver; @@ -70,4 +71,10 @@ struct vbus_driver { int vbus_driver_register(struct vbus_driver *drv); void vbus_driver_unregister(struct vbus_driver *drv); +/* + * driver-side IOQ helper - allocates device-shm and maps an IOQ on it + */ +int vbus_driver_ioq_alloc(struct vbus_device_proxy *dev, int id, int prio, + size_t ringsize, struct ioq **ioq); + #endif /* _LINUX_VBUS_DRIVER_H */ -- cgit v1.2.3 From a3e709243fb69cc07a83c9b9421e9c9f28733840 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 7 Dec 2009 11:46:30 -0500 Subject: vbus-proxy also uses ioq, so it should select IOQ. When CONFIG_VBUS_PROXY=[ym] and VBUS_PCIBRIDGE=n: ERROR: "ioq_init" [drivers/vbus/vbus-proxy.ko] undefined! Signed-off-by: Randy Dunlap Signed-off-by: Gregory Haskins --- drivers/vbus/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/vbus/Kconfig b/drivers/vbus/Kconfig index 87c545d38fbc..08667aaed47e 100644 --- a/drivers/vbus/Kconfig +++ b/drivers/vbus/Kconfig @@ -5,6 +5,7 @@ config VBUS_PROXY tristate "Virtual-Bus support" select SHM_SIGNAL + select IOQ default n help Adds support for a virtual-bus model drivers in a guest to connect -- cgit v1.2.3 From d0f3212293ebdc5742c1fd8ca32434674f7d0d89 Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Mon, 7 Dec 2009 11:46:30 -0500 Subject: vbus: Fix section mismatch warnings in pci-bridge.c The functions vbus_pci_open, _ioq_init, eventq_init, vbus_pci_eventq_register are annoted with __init and referenced from a function __devinit vbus_pci_probe, which causes a section mismatch.So annote those functions with __devinit. We were warned by the following warnings: LD drivers/vbus/vbus-pcibridge.o WARNING: drivers/vbus/vbus-pcibridge.o(.devinit.text+0x19e): Section mismatch in reference from the function vbus_pci_probe() to the function .init.text:vbus_pci_open() The function __devinit vbus_pci_probe() references a function __init vbus_pci_open(). If vbus_pci_open is only used by vbus_pci_probe then annotate vbus_pci_open with a matching annotation. WARNING: drivers/vbus/vbus-pcibridge.o(.devinit.text+0x1f4): Section mismatch in reference from the function vbus_pci_probe() to the function .init.text:_ioq_init() The function __devinit vbus_pci_probe() references a function __init _ioq_init(). If _ioq_init is only used by vbus_pci_probe then annotate _ioq_init with a matching annotation. WARNING: drivers/vbus/vbus-pcibridge.o(.devinit.text+0x240): Section mismatch in reference from the function vbus_pci_probe() to the function .init.text:eventq_init() The function __devinit vbus_pci_probe() references a function __init eventq_init(). If eventq_init is only used by vbus_pci_probe then annotate eventq_init with a matching annotation. WARNING: drivers/vbus/vbus-pcibridge.o(.devinit.text+0x45a): Section mismatch in reference from the function vbus_pci_probe() to the function .init.text:vbus_pci_eventq_register() The function __devinit vbus_pci_probe() references a function __init vbus_pci_eventq_register(). If vbus_pci_eventq_register is only used by vbus_pci_probe then annotate vbus_pci_eventq_register with a matching annotation. Signed-off-by: Rakib Mullick Signed-off-by: Gregory Haskins --- drivers/vbus/pci-bridge.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/vbus/pci-bridge.c b/drivers/vbus/pci-bridge.c index 81f7cdd2167a..80718e693394 100644 --- a/drivers/vbus/pci-bridge.c +++ b/drivers/vbus/pci-bridge.c @@ -478,7 +478,7 @@ event_shmclose(struct vbus_pci_handle_event *event) static struct ioq_notifier eventq_notifier; -static int __init +static int __devinit eventq_init(int qlen) { struct ioq_iterator iter; @@ -687,7 +687,7 @@ vbus_pci_release(void) vbus_pci.enabled = false; } -static int __init +static int __devinit vbus_pci_open(void) { struct vbus_pci_bridge_negotiate params = { @@ -702,7 +702,7 @@ vbus_pci_open(void) #define QLEN 1024 -static int __init +static int __devinit vbus_pci_eventq_register(void) { struct vbus_pci_busreg params = { @@ -720,7 +720,7 @@ vbus_pci_eventq_register(void) ¶ms, sizeof(params)); } -static int __init +static int __devinit _ioq_init(size_t ringsize, struct ioq *ioq, struct ioq_ops *ops) { struct shm_signal *signal = NULL; -- cgit v1.2.3 From a04a45751c42339e5f3a59b6d4d07019346e2581 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 7 Dec 2009 11:46:31 -0500 Subject: net: add vbus_enet driver A virtualized 802.x network device based on the VBUS interface. It can be used with any hypervisor/kernel that supports the virtual-ethernet/vbus protocol. Signed-off-by: Gregory Haskins Acked-by: David S. Miller --- drivers/net/Kconfig | 14 + drivers/net/Makefile | 1 + drivers/net/vbus-enet.c | 895 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/Kbuild | 1 + include/linux/venet.h | 84 +++++ 5 files changed, 995 insertions(+) create mode 100644 drivers/net/vbus-enet.c create mode 100644 include/linux/venet.h (limited to 'drivers') diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index b2f71f79baaf..ee4130305638 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -3241,4 +3241,18 @@ config VMXNET3 To compile this driver as a module, choose M here: the module will be called vmxnet3. +config VBUS_ENET + tristate "VBUS Ethernet Driver" + default n + select VBUS_PROXY + help + A virtualized 802.x network device based on the VBUS + "virtual-ethernet" interface. It can be used with any + hypervisor/kernel that supports the vbus+venet protocol. + +config VBUS_ENET_DEBUG + bool "Enable Debugging" + depends on VBUS_ENET + default n + endif # NETDEVICES diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 246323d7f161..f3dabbcdac41 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -282,6 +282,7 @@ obj-$(CONFIG_FS_ENET) += fs_enet/ obj-$(CONFIG_NETXEN_NIC) += netxen/ obj-$(CONFIG_NIU) += niu.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o +obj-$(CONFIG_VBUS_ENET) += vbus-enet.o obj-$(CONFIG_SFC) += sfc/ obj-$(CONFIG_WIMAX) += wimax/ diff --git a/drivers/net/vbus-enet.c b/drivers/net/vbus-enet.c new file mode 100644 index 000000000000..91c47a9bf417 --- /dev/null +++ b/drivers/net/vbus-enet.c @@ -0,0 +1,895 @@ +/* + * vbus_enet - A virtualized 802.x network device based on the VBUS interface + * + * Copyright (C) 2009 Novell, Gregory Haskins + * + * Derived from the SNULL example from the book "Linux Device Drivers" by + * Alessandro Rubini, Jonathan Corbet, and Greg Kroah-Hartman, published + * by O'Reilly & Associates. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +MODULE_AUTHOR("Gregory Haskins"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("virtual-ethernet"); +MODULE_VERSION("1"); + +static int rx_ringlen = 256; +module_param(rx_ringlen, int, 0444); +static int tx_ringlen = 256; +module_param(tx_ringlen, int, 0444); +static int sg_enabled = 1; +module_param(sg_enabled, int, 0444); + +#define PDEBUG(_dev, fmt, args...) dev_dbg(&(_dev)->dev, fmt, ## args) + +struct vbus_enet_queue { + struct ioq *queue; + struct ioq_notifier notifier; +}; + +struct vbus_enet_priv { + spinlock_t lock; + struct net_device *dev; + struct vbus_device_proxy *vdev; + struct napi_struct napi; + struct vbus_enet_queue rxq; + struct vbus_enet_queue txq; + struct tasklet_struct txtask; + bool sg; +}; + +static void vbus_enet_tx_reap(struct vbus_enet_priv *priv, int force); + +static struct vbus_enet_priv * +napi_to_priv(struct napi_struct *napi) +{ + return container_of(napi, struct vbus_enet_priv, napi); +} + +static int +queue_init(struct vbus_enet_priv *priv, + struct vbus_enet_queue *q, + int qid, + size_t ringsize, + void (*func)(struct ioq_notifier *)) +{ + struct vbus_device_proxy *dev = priv->vdev; + int ret; + + ret = vbus_driver_ioq_alloc(dev, qid, 0, ringsize, &q->queue); + if (ret < 0) + panic("ioq_alloc failed: %d\n", ret); + + if (func) { + q->notifier.signal = func; + q->queue->notifier = &q->notifier; + } + + return 0; +} + +static int +devcall(struct vbus_enet_priv *priv, u32 func, void *data, size_t len) +{ + struct vbus_device_proxy *dev = priv->vdev; + + return dev->ops->call(dev, func, data, len, 0); +} + +/* + * --------------- + * rx descriptors + * --------------- + */ + +static void +rxdesc_alloc(struct net_device *dev, struct ioq_ring_desc *desc, size_t len) +{ + struct sk_buff *skb; + + len += ETH_HLEN; + + skb = netdev_alloc_skb(dev, len + 2); + BUG_ON(!skb); + + skb_reserve(skb, NET_IP_ALIGN); /* align IP on 16B boundary */ + + desc->cookie = (u64)skb; + desc->ptr = (u64)__pa(skb->data); + desc->len = len; /* total length */ + desc->valid = 1; +} + +static void +rx_setup(struct vbus_enet_priv *priv) +{ + struct ioq *ioq = priv->rxq.queue; + struct ioq_iterator iter; + int ret; + + /* + * We want to iterate on the "valid" index. By default the iterator + * will not "autoupdate" which means it will not hypercall the host + * with our changes. This is good, because we are really just + * initializing stuff here anyway. Note that you can always manually + * signal the host with ioq_signal() if the autoupdate feature is not + * used. + */ + ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0); + BUG_ON(ret < 0); /* will never fail unless seriously broken */ + + /* + * Seek to the tail of the valid index (which should be our first + * item, since the queue is brand-new) + */ + ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0); + BUG_ON(ret < 0); + + /* + * Now populate each descriptor with an empty SKB and mark it valid + */ + while (!iter.desc->valid) { + rxdesc_alloc(priv->dev, iter.desc, priv->dev->mtu); + + /* + * This push operation will simultaneously advance the + * valid-head index and increment our position in the queue + * by one. + */ + ret = ioq_iter_push(&iter, 0); + BUG_ON(ret < 0); + } +} + +static void +rx_teardown(struct vbus_enet_priv *priv) +{ + struct ioq *ioq = priv->rxq.queue; + struct ioq_iterator iter; + int ret; + + ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); + BUG_ON(ret < 0); + + /* + * free each valid descriptor + */ + while (iter.desc->valid) { + struct sk_buff *skb = (struct sk_buff *)iter.desc->cookie; + + iter.desc->valid = 0; + wmb(); + + iter.desc->ptr = 0; + iter.desc->cookie = 0; + + ret = ioq_iter_pop(&iter, 0); + BUG_ON(ret < 0); + + dev_kfree_skb(skb); + } +} + +static int +tx_setup(struct vbus_enet_priv *priv) +{ + struct ioq *ioq = priv->txq.queue; + struct ioq_iterator iter; + int i; + int ret; + + if (!priv->sg) + /* + * There is nothing to do for a ring that is not using + * scatter-gather + */ + return 0; + + ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_set, 0, 0); + BUG_ON(ret < 0); + + /* + * Now populate each descriptor with an empty SG descriptor + */ + for (i = 0; i < tx_ringlen; i++) { + struct venet_sg *vsg; + size_t iovlen = sizeof(struct venet_iov) * (MAX_SKB_FRAGS-1); + size_t len = sizeof(*vsg) + iovlen; + + vsg = kzalloc(len, GFP_KERNEL); + if (!vsg) + return -ENOMEM; + + iter.desc->cookie = (u64)vsg; + iter.desc->len = len; + iter.desc->ptr = (u64)__pa(vsg); + + ret = ioq_iter_seek(&iter, ioq_seek_next, 0, 0); + BUG_ON(ret < 0); + } + + return 0; +} + +static void +tx_teardown(struct vbus_enet_priv *priv) +{ + struct ioq *ioq = priv->txq.queue; + struct ioq_iterator iter; + int ret; + + /* forcefully free all outstanding transmissions */ + vbus_enet_tx_reap(priv, 1); + + if (!priv->sg) + /* + * There is nothing else to do for a ring that is not using + * scatter-gather + */ + return; + + ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0); + BUG_ON(ret < 0); + + /* seek to position 0 */ + ret = ioq_iter_seek(&iter, ioq_seek_set, 0, 0); + BUG_ON(ret < 0); + + /* + * free each valid descriptor + */ + while (iter.desc->cookie) { + struct venet_sg *vsg = (struct venet_sg *)iter.desc->cookie; + + iter.desc->valid = 0; + wmb(); + + iter.desc->ptr = 0; + iter.desc->cookie = 0; + + ret = ioq_iter_seek(&iter, ioq_seek_next, 0, 0); + BUG_ON(ret < 0); + + kfree(vsg); + } +} + +/* + * Open and close + */ + +static int +vbus_enet_open(struct net_device *dev) +{ + struct vbus_enet_priv *priv = netdev_priv(dev); + int ret; + + ret = devcall(priv, VENET_FUNC_LINKUP, NULL, 0); + BUG_ON(ret < 0); + + napi_enable(&priv->napi); + + return 0; +} + +static int +vbus_enet_stop(struct net_device *dev) +{ + struct vbus_enet_priv *priv = netdev_priv(dev); + int ret; + + napi_disable(&priv->napi); + + ret = devcall(priv, VENET_FUNC_LINKDOWN, NULL, 0); + BUG_ON(ret < 0); + + return 0; +} + +/* + * Configuration changes (passed on by ifconfig) + */ +static int +vbus_enet_config(struct net_device *dev, struct ifmap *map) +{ + if (dev->flags & IFF_UP) /* can't act on a running interface */ + return -EBUSY; + + /* Don't allow changing the I/O address */ + if (map->base_addr != dev->base_addr) { + dev_warn(&dev->dev, "Can't change I/O address\n"); + return -EOPNOTSUPP; + } + + /* ignore other fields */ + return 0; +} + +static void +vbus_enet_schedule_rx(struct vbus_enet_priv *priv) +{ + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + + if (napi_schedule_prep(&priv->napi)) { + /* Disable further interrupts */ + ioq_notify_disable(priv->rxq.queue, 0); + __napi_schedule(&priv->napi); + } + + spin_unlock_irqrestore(&priv->lock, flags); +} + +static int +vbus_enet_change_mtu(struct net_device *dev, int new_mtu) +{ + struct vbus_enet_priv *priv = netdev_priv(dev); + int ret; + + dev->mtu = new_mtu; + + /* + * FLUSHRX will cause the device to flush any outstanding + * RX buffers. They will appear to come in as 0 length + * packets which we can simply discard and replace with new_mtu + * buffers for the future. + */ + ret = devcall(priv, VENET_FUNC_FLUSHRX, NULL, 0); + BUG_ON(ret < 0); + + vbus_enet_schedule_rx(priv); + + return 0; +} + +/* + * The poll implementation. + */ +static int +vbus_enet_poll(struct napi_struct *napi, int budget) +{ + struct vbus_enet_priv *priv = napi_to_priv(napi); + int npackets = 0; + struct ioq_iterator iter; + int ret; + + PDEBUG(priv->dev, "polling...\n"); + + /* We want to iterate on the head of the in-use index */ + ret = ioq_iter_init(priv->rxq.queue, &iter, ioq_idxtype_inuse, + IOQ_ITER_AUTOUPDATE); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); + BUG_ON(ret < 0); + + /* + * We stop if we have met the quota or there are no more packets. + * The EOM is indicated by finding a packet that is still owned by + * the south side + */ + while ((npackets < budget) && (!iter.desc->sown)) { + struct sk_buff *skb = (struct sk_buff *)iter.desc->cookie; + + if (iter.desc->len) { + skb_put(skb, iter.desc->len); + + /* Maintain stats */ + npackets++; + priv->dev->stats.rx_packets++; + priv->dev->stats.rx_bytes += iter.desc->len; + + /* Pass the buffer up to the stack */ + skb->dev = priv->dev; + skb->protocol = eth_type_trans(skb, priv->dev); + netif_receive_skb(skb); + + mb(); + } else + /* + * the device may send a zero-length packet when its + * flushing references on the ring. We can just drop + * these on the floor + */ + dev_kfree_skb(skb); + + /* Grab a new buffer to put in the ring */ + rxdesc_alloc(priv->dev, iter.desc, priv->dev->mtu); + + /* Advance the in-use tail */ + ret = ioq_iter_pop(&iter, 0); + BUG_ON(ret < 0); + } + + PDEBUG(priv->dev, "%d packets received\n", npackets); + + /* + * If we processed all packets, we're done; tell the kernel and + * reenable ints + */ + if (ioq_empty(priv->rxq.queue, ioq_idxtype_inuse)) { + napi_complete(napi); + ioq_notify_enable(priv->rxq.queue, 0); + ret = 0; + } else + /* We couldn't process everything. */ + ret = 1; + + return ret; +} + +/* + * Transmit a packet (called by the kernel) + */ +static int +vbus_enet_tx_start(struct sk_buff *skb, struct net_device *dev) +{ + struct vbus_enet_priv *priv = netdev_priv(dev); + struct ioq_iterator iter; + int ret; + unsigned long flags; + + PDEBUG(priv->dev, "sending %d bytes\n", skb->len); + + spin_lock_irqsave(&priv->lock, flags); + + if (ioq_full(priv->txq.queue, ioq_idxtype_valid)) { + /* + * We must flow-control the kernel by disabling the + * queue + */ + spin_unlock_irqrestore(&priv->lock, flags); + netif_stop_queue(dev); + dev_err(&priv->dev->dev, "tx on full queue bug\n"); + return 1; + } + + /* + * We want to iterate on the tail of both the "inuse" and "valid" index + * so we specify the "both" index + */ + ret = ioq_iter_init(priv->txq.queue, &iter, ioq_idxtype_both, + IOQ_ITER_AUTOUPDATE); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0); + BUG_ON(ret < 0); + BUG_ON(iter.desc->sown); + + if (priv->sg) { + struct venet_sg *vsg = (struct venet_sg *)iter.desc->cookie; + struct scatterlist sgl[MAX_SKB_FRAGS+1]; + struct scatterlist *sg; + int count, maxcount = ARRAY_SIZE(sgl); + + sg_init_table(sgl, maxcount); + + memset(vsg, 0, sizeof(*vsg)); + + vsg->cookie = (u64)skb; + vsg->len = skb->len; + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + vsg->flags |= VENET_SG_FLAG_NEEDS_CSUM; + vsg->csum.start = skb->csum_start - skb_headroom(skb); + vsg->csum.offset = skb->csum_offset; + } + + if (skb_is_gso(skb)) { + struct skb_shared_info *sinfo = skb_shinfo(skb); + + vsg->flags |= VENET_SG_FLAG_GSO; + + vsg->gso.hdrlen = skb_transport_header(skb) - skb->data; + vsg->gso.size = sinfo->gso_size; + if (sinfo->gso_type & SKB_GSO_TCPV4) + vsg->gso.type = VENET_GSO_TYPE_TCPV4; + else if (sinfo->gso_type & SKB_GSO_TCPV6) + vsg->gso.type = VENET_GSO_TYPE_TCPV6; + else if (sinfo->gso_type & SKB_GSO_UDP) + vsg->gso.type = VENET_GSO_TYPE_UDP; + else + panic("Virtual-Ethernet: unknown GSO type " \ + "0x%x\n", sinfo->gso_type); + + if (sinfo->gso_type & SKB_GSO_TCP_ECN) + vsg->flags |= VENET_SG_FLAG_ECN; + } + + count = skb_to_sgvec(skb, sgl, 0, skb->len); + + BUG_ON(count > maxcount); + + for (sg = &sgl[0]; sg; sg = sg_next(sg)) { + struct venet_iov *iov = &vsg->iov[vsg->count++]; + + iov->len = sg->length; + iov->ptr = (u64)sg_phys(sg); + } + + } else { + /* + * non scatter-gather mode: simply put the skb right onto the + * ring. + */ + iter.desc->cookie = (u64)skb; + iter.desc->len = (u64)skb->len; + iter.desc->ptr = (u64)__pa(skb->data); + } + + iter.desc->valid = 1; + + priv->dev->stats.tx_packets++; + priv->dev->stats.tx_bytes += skb->len; + + /* + * This advances both indexes together implicitly, and then + * signals the south side to consume the packet + */ + ret = ioq_iter_push(&iter, 0); + BUG_ON(ret < 0); + + dev->trans_start = jiffies; /* save the timestamp */ + + if (ioq_full(priv->txq.queue, ioq_idxtype_valid)) { + /* + * If the queue is congested, we must flow-control the kernel + */ + PDEBUG(priv->dev, "backpressure tx queue\n"); + netif_stop_queue(dev); + } + + spin_unlock_irqrestore(&priv->lock, flags); + + return 0; +} + +/* + * reclaim any outstanding completed tx packets + * + * assumes priv->lock held + */ +static void +vbus_enet_tx_reap(struct vbus_enet_priv *priv, int force) +{ + struct ioq_iterator iter; + int ret; + + /* + * We want to iterate on the head of the valid index, but we + * do not want the iter_pop (below) to flip the ownership, so + * we set the NOFLIPOWNER option + */ + ret = ioq_iter_init(priv->txq.queue, &iter, ioq_idxtype_valid, + IOQ_ITER_NOFLIPOWNER); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); + BUG_ON(ret < 0); + + /* + * We are done once we find the first packet either invalid or still + * owned by the south-side + */ + while (iter.desc->valid && (!iter.desc->sown || force)) { + struct sk_buff *skb; + + if (priv->sg) { + struct venet_sg *vsg; + + vsg = (struct venet_sg *)iter.desc->cookie; + skb = (struct sk_buff *)vsg->cookie; + + } else { + skb = (struct sk_buff *)iter.desc->cookie; + } + + PDEBUG(priv->dev, "completed sending %d bytes\n", skb->len); + + /* Reset the descriptor */ + iter.desc->valid = 0; + + dev_kfree_skb(skb); + + /* Advance the valid-index head */ + ret = ioq_iter_pop(&iter, 0); + BUG_ON(ret < 0); + } + + /* + * If we were previously stopped due to flow control, restart the + * processing + */ + if (netif_queue_stopped(priv->dev) + && !ioq_full(priv->txq.queue, ioq_idxtype_valid)) { + PDEBUG(priv->dev, "re-enabling tx queue\n"); + netif_wake_queue(priv->dev); + } +} + +static void +vbus_enet_timeout(struct net_device *dev) +{ + struct vbus_enet_priv *priv = netdev_priv(dev); + unsigned long flags; + + dev_dbg(&dev->dev, "Transmit timeout\n"); + + spin_lock_irqsave(&priv->lock, flags); + vbus_enet_tx_reap(priv, 0); + spin_unlock_irqrestore(&priv->lock, flags); +} + +static void +rx_isr(struct ioq_notifier *notifier) +{ + struct vbus_enet_priv *priv; + struct net_device *dev; + + priv = container_of(notifier, struct vbus_enet_priv, rxq.notifier); + dev = priv->dev; + + if (!ioq_empty(priv->rxq.queue, ioq_idxtype_inuse)) + vbus_enet_schedule_rx(priv); +} + +static void +deferred_tx_isr(unsigned long data) +{ + struct vbus_enet_priv *priv = (struct vbus_enet_priv *)data; + unsigned long flags; + + PDEBUG(priv->dev, "deferred_tx_isr\n"); + + spin_lock_irqsave(&priv->lock, flags); + vbus_enet_tx_reap(priv, 0); + spin_unlock_irqrestore(&priv->lock, flags); + + ioq_notify_enable(priv->txq.queue, 0); +} + +static void +tx_isr(struct ioq_notifier *notifier) +{ + struct vbus_enet_priv *priv; + + priv = container_of(notifier, struct vbus_enet_priv, txq.notifier); + + PDEBUG(priv->dev, "tx_isr\n"); + + ioq_notify_disable(priv->txq.queue, 0); + tasklet_schedule(&priv->txtask); +} + +static int +vbus_enet_negcap(struct vbus_enet_priv *priv) +{ + struct net_device *dev = priv->dev; + struct venet_capabilities caps; + int ret; + + memset(&caps, 0, sizeof(caps)); + + if (sg_enabled) { + caps.gid = VENET_CAP_GROUP_SG; + caps.bits |= (VENET_CAP_SG|VENET_CAP_TSO4|VENET_CAP_TSO6 + |VENET_CAP_ECN); + /* note: exclude UFO for now due to stack bug */ + } + + ret = devcall(priv, VENET_FUNC_NEGCAP, &caps, sizeof(caps)); + if (ret < 0) + return ret; + + if (caps.bits & VENET_CAP_SG) { + priv->sg = true; + + dev->features |= NETIF_F_SG|NETIF_F_HW_CSUM|NETIF_F_FRAGLIST; + + if (caps.bits & VENET_CAP_TSO4) + dev->features |= NETIF_F_TSO; + if (caps.bits & VENET_CAP_UFO) + dev->features |= NETIF_F_UFO; + if (caps.bits & VENET_CAP_TSO6) + dev->features |= NETIF_F_TSO6; + if (caps.bits & VENET_CAP_ECN) + dev->features |= NETIF_F_TSO_ECN; + } + + return 0; +} + +static int vbus_enet_set_tx_csum(struct net_device *dev, u32 data) +{ + struct vbus_enet_priv *priv = netdev_priv(dev); + + if (data && !priv->sg) + return -ENOSYS; + + return ethtool_op_set_tx_hw_csum(dev, data); +} + +static struct ethtool_ops vbus_enet_ethtool_ops = { + .set_tx_csum = vbus_enet_set_tx_csum, + .set_sg = ethtool_op_set_sg, + .set_tso = ethtool_op_set_tso, + .get_link = ethtool_op_get_link, +}; + +static const struct net_device_ops vbus_enet_netdev_ops = { + .ndo_open = vbus_enet_open, + .ndo_stop = vbus_enet_stop, + .ndo_set_config = vbus_enet_config, + .ndo_start_xmit = vbus_enet_tx_start, + .ndo_change_mtu = vbus_enet_change_mtu, + .ndo_tx_timeout = vbus_enet_timeout, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, +}; + +/* + * This is called whenever a new vbus_device_proxy is added to the vbus + * with the matching VENET_ID + */ +static int +vbus_enet_probe(struct vbus_device_proxy *vdev) +{ + struct net_device *dev; + struct vbus_enet_priv *priv; + int ret; + + printk(KERN_INFO "VENET: Found new device at %lld\n", vdev->id); + + ret = vdev->ops->open(vdev, VENET_VERSION, 0); + if (ret < 0) + return ret; + + dev = alloc_etherdev(sizeof(struct vbus_enet_priv)); + if (!dev) + return -ENOMEM; + + priv = netdev_priv(dev); + + spin_lock_init(&priv->lock); + priv->dev = dev; + priv->vdev = vdev; + + ret = vbus_enet_negcap(priv); + if (ret < 0) { + printk(KERN_INFO "VENET: Error negotiating capabilities for " \ + "%lld\n", + priv->vdev->id); + goto out_free; + } + + tasklet_init(&priv->txtask, deferred_tx_isr, (unsigned long)priv); + + queue_init(priv, &priv->rxq, VENET_QUEUE_RX, rx_ringlen, rx_isr); + queue_init(priv, &priv->txq, VENET_QUEUE_TX, tx_ringlen, tx_isr); + + rx_setup(priv); + tx_setup(priv); + + ioq_notify_enable(priv->rxq.queue, 0); /* enable interrupts */ + ioq_notify_enable(priv->txq.queue, 0); + + dev->netdev_ops = &vbus_enet_netdev_ops; + dev->watchdog_timeo = 5 * HZ; + SET_ETHTOOL_OPS(dev, &vbus_enet_ethtool_ops); + SET_NETDEV_DEV(dev, &vdev->dev); + + netif_napi_add(dev, &priv->napi, vbus_enet_poll, 128); + + ret = devcall(priv, VENET_FUNC_MACQUERY, priv->dev->dev_addr, ETH_ALEN); + if (ret < 0) { + printk(KERN_INFO "VENET: Error obtaining MAC address for " \ + "%lld\n", + priv->vdev->id); + goto out_free; + } + + dev->features |= NETIF_F_HIGHDMA; + + ret = register_netdev(dev); + if (ret < 0) { + printk(KERN_INFO "VENET: error %i registering device \"%s\"\n", + ret, dev->name); + goto out_free; + } + + vdev->priv = priv; + + return 0; + + out_free: + free_netdev(dev); + + return ret; +} + +static int +vbus_enet_remove(struct vbus_device_proxy *vdev) +{ + struct vbus_enet_priv *priv = (struct vbus_enet_priv *)vdev->priv; + struct vbus_device_proxy *dev = priv->vdev; + + unregister_netdev(priv->dev); + napi_disable(&priv->napi); + + rx_teardown(priv); + ioq_put(priv->rxq.queue); + + tx_teardown(priv); + ioq_put(priv->txq.queue); + + dev->ops->close(dev, 0); + + free_netdev(priv->dev); + + return 0; +} + +/* + * Finally, the module stuff + */ + +static struct vbus_driver_ops vbus_enet_driver_ops = { + .probe = vbus_enet_probe, + .remove = vbus_enet_remove, +}; + +static struct vbus_driver vbus_enet_driver = { + .type = VENET_TYPE, + .owner = THIS_MODULE, + .ops = &vbus_enet_driver_ops, +}; + +static __init int +vbus_enet_init_module(void) +{ + printk(KERN_INFO "Virtual Ethernet: Copyright (C) 2009 Novell, Gregory Haskins\n"); + printk(KERN_DEBUG "VENET: Using %d/%d queue depth\n", + rx_ringlen, tx_ringlen); + return vbus_driver_register(&vbus_enet_driver); +} + +static __exit void +vbus_enet_cleanup(void) +{ + vbus_driver_unregister(&vbus_enet_driver); +} + +module_init(vbus_enet_init_module); +module_exit(vbus_enet_cleanup); diff --git a/include/linux/Kbuild b/include/linux/Kbuild index ca89bb4e7cb0..669fa0f92352 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -364,6 +364,7 @@ unifdef-y += unistd.h unifdef-y += usbdevice_fs.h unifdef-y += utsname.h unifdef-y += vbus_pci.h +unifdef-y += venet.h unifdef-y += videodev2.h unifdef-y += videodev.h unifdef-y += virtio_config.h diff --git a/include/linux/venet.h b/include/linux/venet.h new file mode 100644 index 000000000000..47ed37d13c53 --- /dev/null +++ b/include/linux/venet.h @@ -0,0 +1,84 @@ +/* + * Copyright 2009 Novell. All Rights Reserved. + * + * Virtual-Ethernet adapter + * + * Author: + * Gregory Haskins + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef _LINUX_VENET_H +#define _LINUX_VENET_H + +#include + +#define VENET_VERSION 1 + +#define VENET_TYPE "virtual-ethernet" + +#define VENET_QUEUE_RX 0 +#define VENET_QUEUE_TX 1 + +struct venet_capabilities { + __u32 gid; + __u32 bits; +}; + +#define VENET_CAP_GROUP_SG 0 + +/* CAPABILITIES-GROUP SG */ +#define VENET_CAP_SG (1 << 0) +#define VENET_CAP_TSO4 (1 << 1) +#define VENET_CAP_TSO6 (1 << 2) +#define VENET_CAP_ECN (1 << 3) +#define VENET_CAP_UFO (1 << 4) + +struct venet_iov { + __u32 len; + __u64 ptr; +}; + +#define VENET_SG_FLAG_NEEDS_CSUM (1 << 0) +#define VENET_SG_FLAG_GSO (1 << 1) +#define VENET_SG_FLAG_ECN (1 << 2) + +struct venet_sg { + __u64 cookie; + __u32 flags; + __u32 len; /* total length of all iovs */ + struct { + __u16 start; /* csum starting position */ + __u16 offset; /* offset to place csum */ + } csum; + struct { +#define VENET_GSO_TYPE_TCPV4 0 /* IPv4 TCP (TSO) */ +#define VENET_GSO_TYPE_UDP 1 /* IPv4 UDP (UFO) */ +#define VENET_GSO_TYPE_TCPV6 2 /* IPv6 TCP */ + __u8 type; + __u16 hdrlen; + __u16 size; + } gso; + __u32 count; /* nr of iovs */ + struct venet_iov iov[1]; +}; + +#define VENET_FUNC_LINKUP 0 +#define VENET_FUNC_LINKDOWN 1 +#define VENET_FUNC_MACQUERY 2 +#define VENET_FUNC_NEGCAP 3 /* negotiate capabilities */ +#define VENET_FUNC_FLUSHRX 4 + +#endif /* _LINUX_VENET_H */ -- cgit v1.2.3 From 8c87732d4a3df1378dbfb46a0756f25d084a2058 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 7 Dec 2009 11:46:32 -0500 Subject: venet: fix gso.hdr_len to report correct length This seemed to have worked for TSO4/6 frames, but breaks for UFO. In either case, its just plain wrong, so lets get the header set properly. Signed-off-by: Gregory Haskins --- drivers/net/vbus-enet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/vbus-enet.c b/drivers/net/vbus-enet.c index 91c47a9bf417..3d614446d071 100644 --- a/drivers/net/vbus-enet.c +++ b/drivers/net/vbus-enet.c @@ -512,7 +512,7 @@ vbus_enet_tx_start(struct sk_buff *skb, struct net_device *dev) vsg->flags |= VENET_SG_FLAG_GSO; - vsg->gso.hdrlen = skb_transport_header(skb) - skb->data; + vsg->gso.hdrlen = skb_headlen(skb); vsg->gso.size = sinfo->gso_size; if (sinfo->gso_type & SKB_GSO_TCPV4) vsg->gso.type = VENET_GSO_TYPE_TCPV4; -- cgit v1.2.3 From 912574e364ce7fefad9f880fb8254ec58776ec1a Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 7 Dec 2009 11:46:33 -0500 Subject: venet: add pre-mapped tx descriptor feature What: Pre-allocate and map our scatter-gather descriptors. Why: The host cannot directly access guest memory, and therefore any indirection adds additional overhead. We currently implement scattergather by pushing a pointer to the sg-descriptor, which points to the actual SKB. This means the host must take an extra read just to obtain the pointer to the SKB data. Therefore we introduce a new shared-memory region that consists of pre-allocated scattergather descriptors. The host may then decode a descriptor pointer as an offset to this pre-mapped region and save time/overhead. Signed-off-by: Gregory Haskins --- drivers/net/vbus-enet.c | 62 ++++++++++++++++++++++++++++++++++++++++++------- include/linux/venet.h | 12 ++++++---- 2 files changed, 61 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/net/vbus-enet.c b/drivers/net/vbus-enet.c index 3d614446d071..b3e9695412e2 100644 --- a/drivers/net/vbus-enet.c +++ b/drivers/net/vbus-enet.c @@ -61,6 +61,10 @@ struct vbus_enet_priv { struct vbus_enet_queue txq; struct tasklet_struct txtask; bool sg; + struct { + bool enabled; + char *pool; + } pmtd; /* pre-mapped transmit descriptors */ }; static void vbus_enet_tx_reap(struct vbus_enet_priv *priv, int force); @@ -201,7 +205,9 @@ rx_teardown(struct vbus_enet_priv *priv) static int tx_setup(struct vbus_enet_priv *priv) { - struct ioq *ioq = priv->txq.queue; + struct ioq *ioq = priv->txq.queue; + size_t iovlen = sizeof(struct venet_iov) * (MAX_SKB_FRAGS-1); + size_t len = sizeof(struct venet_sg) + iovlen; struct ioq_iterator iter; int i; int ret; @@ -213,6 +219,29 @@ tx_setup(struct vbus_enet_priv *priv) */ return 0; + /* pre-allocate our descriptor pool if pmtd is enabled */ + if (priv->pmtd.enabled) { + struct vbus_device_proxy *dev = priv->vdev; + size_t poollen = len * tx_ringlen; + char *pool; + int shmid; + + /* pmtdquery will return the shm-id to use for the pool */ + ret = devcall(priv, VENET_FUNC_PMTDQUERY, NULL, 0); + BUG_ON(ret < 0); + + shmid = ret; + + pool = kzalloc(poollen, GFP_KERNEL | GFP_DMA); + if (!pool) + return -ENOMEM; + + priv->pmtd.pool = pool; + + ret = dev->ops->shm(dev, shmid, 0, pool, poollen, 0, NULL, 0); + BUG_ON(ret < 0); + } + ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0); BUG_ON(ret < 0); @@ -224,16 +253,22 @@ tx_setup(struct vbus_enet_priv *priv) */ for (i = 0; i < tx_ringlen; i++) { struct venet_sg *vsg; - size_t iovlen = sizeof(struct venet_iov) * (MAX_SKB_FRAGS-1); - size_t len = sizeof(*vsg) + iovlen; - vsg = kzalloc(len, GFP_KERNEL); - if (!vsg) - return -ENOMEM; + if (priv->pmtd.enabled) { + size_t offset = (i * len); + + vsg = (struct venet_sg *)&priv->pmtd.pool[offset]; + iter.desc->ptr = (u64)offset; + } else { + vsg = kzalloc(len, GFP_KERNEL); + if (!vsg) + return -ENOMEM; + + iter.desc->ptr = (u64)__pa(vsg); + } iter.desc->cookie = (u64)vsg; iter.desc->len = len; - iter.desc->ptr = (u64)__pa(vsg); ret = ioq_iter_seek(&iter, ioq_seek_next, 0, 0); BUG_ON(ret < 0); @@ -259,6 +294,14 @@ tx_teardown(struct vbus_enet_priv *priv) */ return; + if (priv->pmtd.enabled) { + /* + * PMTD mode means we only need to free the pool + */ + kfree(priv->pmtd.pool); + return; + } + ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0); BUG_ON(ret < 0); @@ -705,7 +748,7 @@ vbus_enet_negcap(struct vbus_enet_priv *priv) if (sg_enabled) { caps.gid = VENET_CAP_GROUP_SG; caps.bits |= (VENET_CAP_SG|VENET_CAP_TSO4|VENET_CAP_TSO6 - |VENET_CAP_ECN); + |VENET_CAP_ECN|VENET_CAP_PMTD); /* note: exclude UFO for now due to stack bug */ } @@ -726,6 +769,9 @@ vbus_enet_negcap(struct vbus_enet_priv *priv) dev->features |= NETIF_F_TSO6; if (caps.bits & VENET_CAP_ECN) dev->features |= NETIF_F_TSO_ECN; + + if (caps.bits & VENET_CAP_PMTD) + priv->pmtd.enabled = true; } return 0; diff --git a/include/linux/venet.h b/include/linux/venet.h index 47ed37d13c53..57aeddd316cb 100644 --- a/include/linux/venet.h +++ b/include/linux/venet.h @@ -45,6 +45,7 @@ struct venet_capabilities { #define VENET_CAP_TSO6 (1 << 2) #define VENET_CAP_ECN (1 << 3) #define VENET_CAP_UFO (1 << 4) +#define VENET_CAP_PMTD (1 << 5) /* pre-mapped tx desc */ struct venet_iov { __u32 len; @@ -75,10 +76,11 @@ struct venet_sg { struct venet_iov iov[1]; }; -#define VENET_FUNC_LINKUP 0 -#define VENET_FUNC_LINKDOWN 1 -#define VENET_FUNC_MACQUERY 2 -#define VENET_FUNC_NEGCAP 3 /* negotiate capabilities */ -#define VENET_FUNC_FLUSHRX 4 +#define VENET_FUNC_LINKUP 0 +#define VENET_FUNC_LINKDOWN 1 +#define VENET_FUNC_MACQUERY 2 +#define VENET_FUNC_NEGCAP 3 /* negotiate capabilities */ +#define VENET_FUNC_FLUSHRX 4 +#define VENET_FUNC_PMTDQUERY 5 #endif /* _LINUX_VENET_H */ -- cgit v1.2.3 From 548a237b47e4a73e12bbed78ac1fcaa7c2e50df2 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 7 Dec 2009 11:46:34 -0500 Subject: venet: report actual used descriptor size This should reduce wasted effort copying parts of the descriptor which are not in use, since the descriptors are typically pre-allocated to their maximum size. Signed-off-by: Gregory Haskins --- drivers/net/vbus-enet.c | 2 ++ include/linux/venet.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'drivers') diff --git a/drivers/net/vbus-enet.c b/drivers/net/vbus-enet.c index b3e9695412e2..63237f327ac5 100644 --- a/drivers/net/vbus-enet.c +++ b/drivers/net/vbus-enet.c @@ -582,6 +582,8 @@ vbus_enet_tx_start(struct sk_buff *skb, struct net_device *dev) iov->ptr = (u64)sg_phys(sg); } + iter.desc->len = (u64)VSG_DESC_SIZE(vsg->count); + } else { /* * non scatter-gather mode: simply put the skb right onto the diff --git a/include/linux/venet.h b/include/linux/venet.h index 57aeddd316cb..53b6958e95d6 100644 --- a/include/linux/venet.h +++ b/include/linux/venet.h @@ -76,6 +76,9 @@ struct venet_sg { struct venet_iov iov[1]; }; +#define VSG_DESC_SIZE(count) (sizeof(struct venet_sg) + \ + sizeof(struct venet_iov) * ((count) - 1)) + #define VENET_FUNC_LINKUP 0 #define VENET_FUNC_LINKDOWN 1 #define VENET_FUNC_MACQUERY 2 -- cgit v1.2.3 From 5d7bcfde3a9d25da1a95edb47232d4ad2cfd4fa0 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 7 Dec 2009 11:46:34 -0500 Subject: venet: cache the ringlen values at init We want to prevent the condition where changes to the module-params could affect the run-time validity of the ringstate Signed-off-by: Gregory Haskins --- drivers/net/vbus-enet.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/vbus-enet.c b/drivers/net/vbus-enet.c index 63237f327ac5..fe9eeca2abaf 100644 --- a/drivers/net/vbus-enet.c +++ b/drivers/net/vbus-enet.c @@ -50,6 +50,7 @@ module_param(sg_enabled, int, 0444); struct vbus_enet_queue { struct ioq *queue; struct ioq_notifier notifier; + unsigned long count; }; struct vbus_enet_priv { @@ -94,6 +95,8 @@ queue_init(struct vbus_enet_priv *priv, q->queue->notifier = &q->notifier; } + q->count = ringsize; + return 0; } @@ -222,7 +225,7 @@ tx_setup(struct vbus_enet_priv *priv) /* pre-allocate our descriptor pool if pmtd is enabled */ if (priv->pmtd.enabled) { struct vbus_device_proxy *dev = priv->vdev; - size_t poollen = len * tx_ringlen; + size_t poollen = len * priv->txq.count; char *pool; int shmid; @@ -251,7 +254,7 @@ tx_setup(struct vbus_enet_priv *priv) /* * Now populate each descriptor with an empty SG descriptor */ - for (i = 0; i < tx_ringlen; i++) { + for (i = 0; i < priv->txq.count; i++) { struct venet_sg *vsg; if (priv->pmtd.enabled) { -- cgit v1.2.3 From cd37d9d80dc425cf23ac74d20ef15daa0bf0954d Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 7 Dec 2009 11:46:34 -0500 Subject: venet: add eventq protocol This adds an event-channel for passing host->guest messages to the guest driver. We will use this later in the series for linkstate and asynchronous transmit-complete events. Signed-off-by: Gregory Haskins --- drivers/net/vbus-enet.c | 203 +++++++++++++++++++++++++++++++++++++++++++++++- include/linux/venet.h | 28 ++++++- 2 files changed, 229 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/vbus-enet.c b/drivers/net/vbus-enet.c index fe9eeca2abaf..5fccfd1cdd75 100644 --- a/drivers/net/vbus-enet.c +++ b/drivers/net/vbus-enet.c @@ -66,6 +66,14 @@ struct vbus_enet_priv { bool enabled; char *pool; } pmtd; /* pre-mapped transmit descriptors */ + struct { + bool enabled; + bool linkstate; + unsigned long evsize; + struct vbus_enet_queue veq; + struct tasklet_struct task; + char *pool; + } evq; }; static void vbus_enet_tx_reap(struct vbus_enet_priv *priv, int force); @@ -331,6 +339,16 @@ tx_teardown(struct vbus_enet_priv *priv) } } +static void +evq_teardown(struct vbus_enet_priv *priv) +{ + if (!priv->evq.enabled) + return; + + ioq_put(priv->evq.veq.queue); + kfree(priv->evq.pool); +} + /* * Open and close */ @@ -741,8 +759,91 @@ tx_isr(struct ioq_notifier *notifier) tasklet_schedule(&priv->txtask); } +static void +evq_linkstate_event(struct vbus_enet_priv *priv, + struct venet_event_header *header) +{ + struct venet_event_linkstate *event = + (struct venet_event_linkstate *)header; + + switch (event->state) { + case 0: + netif_carrier_off(priv->dev); + break; + case 1: + netif_carrier_on(priv->dev); + break; + default: + break; + } +} + +static void +deferred_evq_isr(unsigned long data) +{ + struct vbus_enet_priv *priv = (struct vbus_enet_priv *)data; + int nevents = 0; + struct ioq_iterator iter; + int ret; + + PDEBUG(priv->dev, "evq: polling...\n"); + + /* We want to iterate on the head of the in-use index */ + ret = ioq_iter_init(priv->evq.veq.queue, &iter, ioq_idxtype_inuse, + IOQ_ITER_AUTOUPDATE); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); + BUG_ON(ret < 0); + + /* + * The EOM is indicated by finding a packet that is still owned by + * the south side + */ + while (!iter.desc->sown) { + struct venet_event_header *header; + + header = (struct venet_event_header *)iter.desc->cookie; + + switch (header->id) { + case VENET_EVENT_LINKSTATE: + evq_linkstate_event(priv, header); + break; + default: + panic("venet: unexpected event id:%d of size %d\n", + header->id, header->size); + break; + } + + memset((void *)iter.desc->cookie, 0, priv->evq.evsize); + + /* Advance the in-use tail */ + ret = ioq_iter_pop(&iter, 0); + BUG_ON(ret < 0); + + nevents++; + } + + PDEBUG(priv->dev, "%d events received\n", nevents); + + ioq_notify_enable(priv->evq.veq.queue, 0); +} + +static void +evq_isr(struct ioq_notifier *notifier) +{ + struct vbus_enet_priv *priv; + + priv = container_of(notifier, struct vbus_enet_priv, evq.veq.notifier); + + PDEBUG(priv->dev, "evq_isr\n"); + + ioq_notify_disable(priv->evq.veq.queue, 0); + tasklet_schedule(&priv->evq.task); +} + static int -vbus_enet_negcap(struct vbus_enet_priv *priv) +vbus_enet_sg_negcap(struct vbus_enet_priv *priv) { struct net_device *dev = priv->dev; struct venet_capabilities caps; @@ -782,6 +883,103 @@ vbus_enet_negcap(struct vbus_enet_priv *priv) return 0; } +static int +vbus_enet_evq_negcap(struct vbus_enet_priv *priv, unsigned long count) +{ + struct venet_capabilities caps; + int ret; + + memset(&caps, 0, sizeof(caps)); + + caps.gid = VENET_CAP_GROUP_EVENTQ; + caps.bits |= VENET_CAP_EVQ_LINKSTATE; + + ret = devcall(priv, VENET_FUNC_NEGCAP, &caps, sizeof(caps)); + if (ret < 0) + return ret; + + if (caps.bits) { + struct vbus_device_proxy *dev = priv->vdev; + struct venet_eventq_query query; + size_t poollen; + struct ioq_iterator iter; + char *pool; + int i; + + priv->evq.enabled = true; + + if (caps.bits & VENET_CAP_EVQ_LINKSTATE) { + /* + * We will assume there is no carrier until we get + * an event telling us otherwise + */ + netif_carrier_off(priv->dev); + priv->evq.linkstate = true; + } + + memset(&query, 0, sizeof(query)); + + ret = devcall(priv, VENET_FUNC_EVQQUERY, &query, sizeof(query)); + if (ret < 0) + return ret; + + priv->evq.evsize = query.evsize; + poollen = query.evsize * count; + + pool = kzalloc(poollen, GFP_KERNEL | GFP_DMA); + if (!pool) + return -ENOMEM; + + priv->evq.pool = pool; + + ret = dev->ops->shm(dev, query.dpid, 0, + pool, poollen, 0, NULL, 0); + if (ret < 0) + return ret; + + queue_init(priv, &priv->evq.veq, query.qid, count, evq_isr); + + ret = ioq_iter_init(priv->evq.veq.queue, + &iter, ioq_idxtype_valid, 0); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_set, 0, 0); + BUG_ON(ret < 0); + + /* Now populate each descriptor with an empty event */ + for (i = 0; i < count; i++) { + size_t offset = (i * query.evsize); + void *addr = &priv->evq.pool[offset]; + + iter.desc->ptr = (u64)offset; + iter.desc->cookie = (u64)addr; + iter.desc->len = query.evsize; + + ret = ioq_iter_push(&iter, 0); + BUG_ON(ret < 0); + } + + /* Finally, enable interrupts */ + tasklet_init(&priv->evq.task, deferred_evq_isr, + (unsigned long)priv); + ioq_notify_enable(priv->evq.veq.queue, 0); + } + + return 0; +} + +static int +vbus_enet_negcap(struct vbus_enet_priv *priv) +{ + int ret; + + ret = vbus_enet_sg_negcap(priv); + if (ret < 0) + return ret; + + return vbus_enet_evq_negcap(priv, tx_ringlen); +} + static int vbus_enet_set_tx_csum(struct net_device *dev, u32 data) { struct vbus_enet_priv *priv = netdev_priv(dev); @@ -905,6 +1103,9 @@ vbus_enet_remove(struct vbus_device_proxy *vdev) tx_teardown(priv); ioq_put(priv->txq.queue); + if (priv->evq.enabled) + evq_teardown(priv); + dev->ops->close(dev, 0); free_netdev(priv->dev); diff --git a/include/linux/venet.h b/include/linux/venet.h index 53b6958e95d6..16b0156e66a0 100644 --- a/include/linux/venet.h +++ b/include/linux/venet.h @@ -37,7 +37,8 @@ struct venet_capabilities { __u32 bits; }; -#define VENET_CAP_GROUP_SG 0 +#define VENET_CAP_GROUP_SG 0 +#define VENET_CAP_GROUP_EVENTQ 1 /* CAPABILITIES-GROUP SG */ #define VENET_CAP_SG (1 << 0) @@ -47,6 +48,9 @@ struct venet_capabilities { #define VENET_CAP_UFO (1 << 4) #define VENET_CAP_PMTD (1 << 5) /* pre-mapped tx desc */ +/* CAPABILITIES-GROUP EVENTQ */ +#define VENET_CAP_EVQ_LINKSTATE (1 << 0) + struct venet_iov { __u32 len; __u64 ptr; @@ -76,6 +80,27 @@ struct venet_sg { struct venet_iov iov[1]; }; +struct venet_eventq_query { + __u32 flags; + __u32 evsize; /* size of each event */ + __u32 dpid; /* descriptor pool-id */ + __u32 qid; + __u8 pad[16]; +}; + +#define VENET_EVENT_LINKSTATE 0 + +struct venet_event_header { + __u32 flags; + __u32 size; + __u32 id; +}; + +struct venet_event_linkstate { + struct venet_event_header header; + __u8 state; /* 0 = down, 1 = up */ +}; + #define VSG_DESC_SIZE(count) (sizeof(struct venet_sg) + \ sizeof(struct venet_iov) * ((count) - 1)) @@ -85,5 +110,6 @@ struct venet_sg { #define VENET_FUNC_NEGCAP 3 /* negotiate capabilities */ #define VENET_FUNC_FLUSHRX 4 #define VENET_FUNC_PMTDQUERY 5 +#define VENET_FUNC_EVQQUERY 6 #endif /* _LINUX_VENET_H */ -- cgit v1.2.3 From 20c06334909ea8e0d8d13480f4943b139299eca7 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 7 Dec 2009 11:46:35 -0500 Subject: venet: use an skblist for outstanding descriptors This will be useful later in the series so that we can switch to an asynchronous model. Signed-off-by: Gregory Haskins --- drivers/net/vbus-enet.c | 59 ++++++++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 25 deletions(-) (limited to 'drivers') diff --git a/drivers/net/vbus-enet.c b/drivers/net/vbus-enet.c index 5fccfd1cdd75..30321692ff46 100644 --- a/drivers/net/vbus-enet.c +++ b/drivers/net/vbus-enet.c @@ -59,8 +59,11 @@ struct vbus_enet_priv { struct vbus_device_proxy *vdev; struct napi_struct napi; struct vbus_enet_queue rxq; - struct vbus_enet_queue txq; - struct tasklet_struct txtask; + struct { + struct vbus_enet_queue veq; + struct tasklet_struct task; + struct sk_buff_head outstanding; + } tx; bool sg; struct { bool enabled; @@ -76,7 +79,7 @@ struct vbus_enet_priv { } evq; }; -static void vbus_enet_tx_reap(struct vbus_enet_priv *priv, int force); +static void vbus_enet_tx_reap(struct vbus_enet_priv *priv); static struct vbus_enet_priv * napi_to_priv(struct napi_struct *napi) @@ -216,7 +219,7 @@ rx_teardown(struct vbus_enet_priv *priv) static int tx_setup(struct vbus_enet_priv *priv) { - struct ioq *ioq = priv->txq.queue; + struct ioq *ioq = priv->tx.veq.queue; size_t iovlen = sizeof(struct venet_iov) * (MAX_SKB_FRAGS-1); size_t len = sizeof(struct venet_sg) + iovlen; struct ioq_iterator iter; @@ -233,7 +236,7 @@ tx_setup(struct vbus_enet_priv *priv) /* pre-allocate our descriptor pool if pmtd is enabled */ if (priv->pmtd.enabled) { struct vbus_device_proxy *dev = priv->vdev; - size_t poollen = len * priv->txq.count; + size_t poollen = len * priv->tx.veq.count; char *pool; int shmid; @@ -262,7 +265,7 @@ tx_setup(struct vbus_enet_priv *priv) /* * Now populate each descriptor with an empty SG descriptor */ - for (i = 0; i < priv->txq.count; i++) { + for (i = 0; i < priv->tx.veq.count; i++) { struct venet_sg *vsg; if (priv->pmtd.enabled) { @@ -291,12 +294,14 @@ tx_setup(struct vbus_enet_priv *priv) static void tx_teardown(struct vbus_enet_priv *priv) { - struct ioq *ioq = priv->txq.queue; + struct ioq *ioq = priv->tx.veq.queue; struct ioq_iterator iter; + struct sk_buff *skb; int ret; /* forcefully free all outstanding transmissions */ - vbus_enet_tx_reap(priv, 1); + while ((skb = __skb_dequeue(&priv->tx.outstanding))) + dev_kfree_skb(skb); if (!priv->sg) /* @@ -529,7 +534,7 @@ vbus_enet_tx_start(struct sk_buff *skb, struct net_device *dev) spin_lock_irqsave(&priv->lock, flags); - if (ioq_full(priv->txq.queue, ioq_idxtype_valid)) { + if (ioq_full(priv->tx.veq.queue, ioq_idxtype_valid)) { /* * We must flow-control the kernel by disabling the * queue @@ -544,7 +549,7 @@ vbus_enet_tx_start(struct sk_buff *skb, struct net_device *dev) * We want to iterate on the tail of both the "inuse" and "valid" index * so we specify the "both" index */ - ret = ioq_iter_init(priv->txq.queue, &iter, ioq_idxtype_both, + ret = ioq_iter_init(priv->tx.veq.queue, &iter, ioq_idxtype_both, IOQ_ITER_AUTOUPDATE); BUG_ON(ret < 0); @@ -620,6 +625,8 @@ vbus_enet_tx_start(struct sk_buff *skb, struct net_device *dev) priv->dev->stats.tx_packets++; priv->dev->stats.tx_bytes += skb->len; + __skb_queue_tail(&priv->tx.outstanding, skb); + /* * This advances both indexes together implicitly, and then * signals the south side to consume the packet @@ -629,7 +636,7 @@ vbus_enet_tx_start(struct sk_buff *skb, struct net_device *dev) dev->trans_start = jiffies; /* save the timestamp */ - if (ioq_full(priv->txq.queue, ioq_idxtype_valid)) { + if (ioq_full(priv->tx.veq.queue, ioq_idxtype_valid)) { /* * If the queue is congested, we must flow-control the kernel */ @@ -648,7 +655,7 @@ vbus_enet_tx_start(struct sk_buff *skb, struct net_device *dev) * assumes priv->lock held */ static void -vbus_enet_tx_reap(struct vbus_enet_priv *priv, int force) +vbus_enet_tx_reap(struct vbus_enet_priv *priv) { struct ioq_iterator iter; int ret; @@ -658,7 +665,7 @@ vbus_enet_tx_reap(struct vbus_enet_priv *priv, int force) * do not want the iter_pop (below) to flip the ownership, so * we set the NOFLIPOWNER option */ - ret = ioq_iter_init(priv->txq.queue, &iter, ioq_idxtype_valid, + ret = ioq_iter_init(priv->tx.veq.queue, &iter, ioq_idxtype_valid, IOQ_ITER_NOFLIPOWNER); BUG_ON(ret < 0); @@ -669,7 +676,7 @@ vbus_enet_tx_reap(struct vbus_enet_priv *priv, int force) * We are done once we find the first packet either invalid or still * owned by the south-side */ - while (iter.desc->valid && (!iter.desc->sown || force)) { + while (iter.desc->valid && !iter.desc->sown) { struct sk_buff *skb; if (priv->sg) { @@ -687,6 +694,7 @@ vbus_enet_tx_reap(struct vbus_enet_priv *priv, int force) /* Reset the descriptor */ iter.desc->valid = 0; + __skb_unlink(skb, &priv->tx.outstanding); dev_kfree_skb(skb); /* Advance the valid-index head */ @@ -699,7 +707,7 @@ vbus_enet_tx_reap(struct vbus_enet_priv *priv, int force) * processing */ if (netif_queue_stopped(priv->dev) - && !ioq_full(priv->txq.queue, ioq_idxtype_valid)) { + && !ioq_full(priv->tx.veq.queue, ioq_idxtype_valid)) { PDEBUG(priv->dev, "re-enabling tx queue\n"); netif_wake_queue(priv->dev); } @@ -714,7 +722,7 @@ vbus_enet_timeout(struct net_device *dev) dev_dbg(&dev->dev, "Transmit timeout\n"); spin_lock_irqsave(&priv->lock, flags); - vbus_enet_tx_reap(priv, 0); + vbus_enet_tx_reap(priv); spin_unlock_irqrestore(&priv->lock, flags); } @@ -740,10 +748,10 @@ deferred_tx_isr(unsigned long data) PDEBUG(priv->dev, "deferred_tx_isr\n"); spin_lock_irqsave(&priv->lock, flags); - vbus_enet_tx_reap(priv, 0); + vbus_enet_tx_reap(priv); spin_unlock_irqrestore(&priv->lock, flags); - ioq_notify_enable(priv->txq.queue, 0); + ioq_notify_enable(priv->tx.veq.queue, 0); } static void @@ -751,12 +759,12 @@ tx_isr(struct ioq_notifier *notifier) { struct vbus_enet_priv *priv; - priv = container_of(notifier, struct vbus_enet_priv, txq.notifier); + priv = container_of(notifier, struct vbus_enet_priv, tx.veq.notifier); PDEBUG(priv->dev, "tx_isr\n"); - ioq_notify_disable(priv->txq.queue, 0); - tasklet_schedule(&priv->txtask); + ioq_notify_disable(priv->tx.veq.queue, 0); + tasklet_schedule(&priv->tx.task); } static void @@ -1043,16 +1051,17 @@ vbus_enet_probe(struct vbus_device_proxy *vdev) goto out_free; } - tasklet_init(&priv->txtask, deferred_tx_isr, (unsigned long)priv); + tasklet_init(&priv->tx.task, deferred_tx_isr, (unsigned long)priv); + skb_queue_head_init(&priv->tx.outstanding); queue_init(priv, &priv->rxq, VENET_QUEUE_RX, rx_ringlen, rx_isr); - queue_init(priv, &priv->txq, VENET_QUEUE_TX, tx_ringlen, tx_isr); + queue_init(priv, &priv->tx.veq, VENET_QUEUE_TX, tx_ringlen, tx_isr); rx_setup(priv); tx_setup(priv); ioq_notify_enable(priv->rxq.queue, 0); /* enable interrupts */ - ioq_notify_enable(priv->txq.queue, 0); + ioq_notify_enable(priv->tx.veq.queue, 0); dev->netdev_ops = &vbus_enet_netdev_ops; dev->watchdog_timeo = 5 * HZ; @@ -1101,7 +1110,7 @@ vbus_enet_remove(struct vbus_device_proxy *vdev) ioq_put(priv->rxq.queue); tx_teardown(priv); - ioq_put(priv->txq.queue); + ioq_put(priv->tx.veq.queue); if (priv->evq.enabled) evq_teardown(priv); -- cgit v1.2.3 From f2121177b6a409f9c719342eb3d3e035a900890f Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 7 Dec 2009 11:46:36 -0500 Subject: venet: add a tx-complete event for out-of-order support This paves the way for zero-copy support since we cannot predict the order in which paged-skbs may actually be consumed. Signed-off-by: Gregory Haskins --- drivers/net/vbus-enet.c | 77 +++++++++++++++++++++++++++++++++++++++---------- include/linux/venet.h | 8 +++++ 2 files changed, 70 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/net/vbus-enet.c b/drivers/net/vbus-enet.c index 30321692ff46..e8a05537aa9b 100644 --- a/drivers/net/vbus-enet.c +++ b/drivers/net/vbus-enet.c @@ -72,6 +72,7 @@ struct vbus_enet_priv { struct { bool enabled; bool linkstate; + bool txc; unsigned long evsize; struct vbus_enet_queue veq; struct tasklet_struct task; @@ -649,6 +650,17 @@ vbus_enet_tx_start(struct sk_buff *skb, struct net_device *dev) return 0; } +/* assumes priv->lock held */ +static void +vbus_enet_skb_complete(struct vbus_enet_priv *priv, struct sk_buff *skb) +{ + PDEBUG(priv->dev, "completed sending %d bytes\n", + skb->len); + + __skb_unlink(skb, &priv->tx.outstanding); + dev_kfree_skb(skb); +} + /* * reclaim any outstanding completed tx packets * @@ -677,26 +689,28 @@ vbus_enet_tx_reap(struct vbus_enet_priv *priv) * owned by the south-side */ while (iter.desc->valid && !iter.desc->sown) { - struct sk_buff *skb; - if (priv->sg) { - struct venet_sg *vsg; + if (!priv->evq.txc) { + struct sk_buff *skb; - vsg = (struct venet_sg *)iter.desc->cookie; - skb = (struct sk_buff *)vsg->cookie; + if (priv->sg) { + struct venet_sg *vsg; - } else { - skb = (struct sk_buff *)iter.desc->cookie; - } + vsg = (struct venet_sg *)iter.desc->cookie; + skb = (struct sk_buff *)vsg->cookie; + } else + skb = (struct sk_buff *)iter.desc->cookie; - PDEBUG(priv->dev, "completed sending %d bytes\n", skb->len); + /* + * If TXC is not enabled, we are required to free + * the buffer resources now + */ + vbus_enet_skb_complete(priv, skb); + } /* Reset the descriptor */ iter.desc->valid = 0; - __skb_unlink(skb, &priv->tx.outstanding); - dev_kfree_skb(skb); - /* Advance the valid-index head */ ret = ioq_iter_pop(&iter, 0); BUG_ON(ret < 0); @@ -786,6 +800,22 @@ evq_linkstate_event(struct vbus_enet_priv *priv, } } +static void +evq_txc_event(struct vbus_enet_priv *priv, + struct venet_event_header *header) +{ + struct venet_event_txc *event = + (struct venet_event_txc *)header; + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + + vbus_enet_tx_reap(priv); + vbus_enet_skb_complete(priv, (struct sk_buff *)event->cookie); + + spin_unlock_irqrestore(&priv->lock, flags); +} + static void deferred_evq_isr(unsigned long data) { @@ -817,6 +847,9 @@ deferred_evq_isr(unsigned long data) case VENET_EVENT_LINKSTATE: evq_linkstate_event(priv, header); break; + case VENET_EVENT_TXC: + evq_txc_event(priv, header); + break; default: panic("venet: unexpected event id:%d of size %d\n", header->id, header->size); @@ -901,6 +934,7 @@ vbus_enet_evq_negcap(struct vbus_enet_priv *priv, unsigned long count) caps.gid = VENET_CAP_GROUP_EVENTQ; caps.bits |= VENET_CAP_EVQ_LINKSTATE; + caps.bits |= VENET_CAP_EVQ_TXC; ret = devcall(priv, VENET_FUNC_NEGCAP, &caps, sizeof(caps)); if (ret < 0) @@ -925,6 +959,9 @@ vbus_enet_evq_negcap(struct vbus_enet_priv *priv, unsigned long count) priv->evq.linkstate = true; } + if (caps.bits & VENET_CAP_EVQ_TXC) + priv->evq.txc = true; + memset(&query, 0, sizeof(query)); ret = devcall(priv, VENET_FUNC_EVQQUERY, &query, sizeof(query)); @@ -1051,7 +1088,6 @@ vbus_enet_probe(struct vbus_device_proxy *vdev) goto out_free; } - tasklet_init(&priv->tx.task, deferred_tx_isr, (unsigned long)priv); skb_queue_head_init(&priv->tx.outstanding); queue_init(priv, &priv->rxq, VENET_QUEUE_RX, rx_ringlen, rx_isr); @@ -1060,8 +1096,19 @@ vbus_enet_probe(struct vbus_device_proxy *vdev) rx_setup(priv); tx_setup(priv); - ioq_notify_enable(priv->rxq.queue, 0); /* enable interrupts */ - ioq_notify_enable(priv->tx.veq.queue, 0); + ioq_notify_enable(priv->rxq.queue, 0); /* enable rx interrupts */ + + if (!priv->evq.txc) { + /* + * If the TXC feature is present, we will recieve our + * tx-complete notification via the event-channel. Therefore, + * we only enable txq interrupts if the TXC feature is not + * present. + */ + tasklet_init(&priv->tx.task, deferred_tx_isr, + (unsigned long)priv); + ioq_notify_enable(priv->tx.veq.queue, 0); + } dev->netdev_ops = &vbus_enet_netdev_ops; dev->watchdog_timeo = 5 * HZ; diff --git a/include/linux/venet.h b/include/linux/venet.h index 16b0156e66a0..b6bfd9135028 100644 --- a/include/linux/venet.h +++ b/include/linux/venet.h @@ -50,6 +50,7 @@ struct venet_capabilities { /* CAPABILITIES-GROUP EVENTQ */ #define VENET_CAP_EVQ_LINKSTATE (1 << 0) +#define VENET_CAP_EVQ_TXC (1 << 1) /* tx-complete */ struct venet_iov { __u32 len; @@ -89,6 +90,7 @@ struct venet_eventq_query { }; #define VENET_EVENT_LINKSTATE 0 +#define VENET_EVENT_TXC 1 struct venet_event_header { __u32 flags; @@ -101,6 +103,12 @@ struct venet_event_linkstate { __u8 state; /* 0 = down, 1 = up */ }; +struct venet_event_txc { + struct venet_event_header header; + __u32 txqid; + __u64 cookie; +}; + #define VSG_DESC_SIZE(count) (sizeof(struct venet_sg) + \ sizeof(struct venet_iov) * ((count) - 1)) -- cgit v1.2.3 From daa13c9c5df6b8f161281cffa0c0cb63c7e71012 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 7 Dec 2009 11:46:36 -0500 Subject: venet: add Layer-4 Reassembler Offload (L4RO) support This is the converse to GSO. It lets us receive fully reassembled L4 frames from the host. This allows us to reduce the interrupt rate of the guest, take advantage of host-based hardware that does reassembly, and to skip the SAR overhead for localhost (host->guest, guest->guest) connectivity. We accomplish this by re-using the SG support from the transmit/GSO side and supplying a "page-queue" of free pages to use for when we need frames larger than MTU. Signed-off-by: Gregory Haskins --- drivers/net/vbus-enet.c | 384 ++++++++++++++++++++++++++++++++++++++++++++---- include/linux/venet.h | 10 ++ 2 files changed, 365 insertions(+), 29 deletions(-) (limited to 'drivers') diff --git a/drivers/net/vbus-enet.c b/drivers/net/vbus-enet.c index e8a05537aa9b..6fe2241dddd6 100644 --- a/drivers/net/vbus-enet.c +++ b/drivers/net/vbus-enet.c @@ -47,6 +47,8 @@ module_param(sg_enabled, int, 0444); #define PDEBUG(_dev, fmt, args...) dev_dbg(&(_dev)->dev, fmt, ## args) +#define SG_DESC_SIZE VSG_DESC_SIZE(MAX_SKB_FRAGS) + struct vbus_enet_queue { struct ioq *queue; struct ioq_notifier notifier; @@ -78,6 +80,14 @@ struct vbus_enet_priv { struct tasklet_struct task; char *pool; } evq; + struct { + bool available; + char *pool; + struct vbus_enet_queue pageq; + } l4ro; + + struct sk_buff *(*import)(struct vbus_enet_priv *priv, + struct ioq_ring_desc *desc); }; static void vbus_enet_tx_reap(struct vbus_enet_priv *priv); @@ -127,29 +137,88 @@ devcall(struct vbus_enet_priv *priv, u32 func, void *data, size_t len) */ static void -rxdesc_alloc(struct net_device *dev, struct ioq_ring_desc *desc, size_t len) +rxdesc_alloc(struct vbus_enet_priv *priv, struct ioq_ring_desc *desc, size_t len) { + struct net_device *dev = priv->dev; struct sk_buff *skb; len += ETH_HLEN; - skb = netdev_alloc_skb(dev, len + 2); + skb = netdev_alloc_skb(dev, len + NET_IP_ALIGN); BUG_ON(!skb); skb_reserve(skb, NET_IP_ALIGN); /* align IP on 16B boundary */ - desc->cookie = (u64)skb; - desc->ptr = (u64)__pa(skb->data); - desc->len = len; /* total length */ + if (priv->l4ro.available) { + /* + * We will populate an SG descriptor initially with one + * IOV filled with an MTU SKB. If the packet needs to be + * larger than MTU, the host will grab pages out of the + * page-queue and populate additional IOVs + */ + struct venet_sg *vsg = (struct venet_sg *)desc->cookie; + struct venet_iov *iov = &vsg->iov[0]; + + memset(vsg, 0, SG_DESC_SIZE); + + vsg->cookie = (u64)skb; + vsg->count = 1; + + iov->ptr = (u64)__pa(skb->data); + iov->len = len; + } else { + desc->cookie = (u64)skb; + desc->ptr = (u64)__pa(skb->data); + desc->len = len; /* total length */ + } + desc->valid = 1; } +static void +rx_pageq_refill(struct vbus_enet_priv *priv) +{ + struct ioq *ioq = priv->l4ro.pageq.queue; + struct ioq_iterator iter; + int ret; + + if (ioq_full(ioq, ioq_idxtype_inuse)) + /* nothing to do if the pageq is already fully populated */ + return; + + ret = ioq_iter_init(ioq, &iter, ioq_idxtype_inuse, 0); + BUG_ON(ret < 0); /* will never fail unless seriously broken */ + + ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0); + BUG_ON(ret < 0); + + /* + * Now populate each descriptor with an empty page + */ + while (!iter.desc->sown) { + struct page *page; + + page = alloc_page(GFP_KERNEL); + BUG_ON(!page); + + iter.desc->cookie = (u64)page; + iter.desc->ptr = (u64)__pa(page_address(page)); + iter.desc->len = PAGE_SIZE; + + ret = ioq_iter_push(&iter, 0); + BUG_ON(ret < 0); + } + + ioq_signal(ioq, 0); +} + static void rx_setup(struct vbus_enet_priv *priv) { struct ioq *ioq = priv->rxq.queue; struct ioq_iterator iter; int ret; + int i = 0; /* * We want to iterate on the "valid" index. By default the iterator @@ -170,10 +239,19 @@ rx_setup(struct vbus_enet_priv *priv) BUG_ON(ret < 0); /* - * Now populate each descriptor with an empty SKB and mark it valid + * Now populate each descriptor with an empty buffer and mark it valid */ while (!iter.desc->valid) { - rxdesc_alloc(priv->dev, iter.desc, priv->dev->mtu); + if (priv->l4ro.available) { + size_t offset = (i * SG_DESC_SIZE); + void *addr = &priv->l4ro.pool[offset]; + + iter.desc->ptr = (u64)offset; + iter.desc->cookie = (u64)addr; + iter.desc->len = SG_DESC_SIZE; + } + + rxdesc_alloc(priv, iter.desc, priv->dev->mtu); /* * This push operation will simultaneously advance the @@ -182,11 +260,16 @@ rx_setup(struct vbus_enet_priv *priv) */ ret = ioq_iter_push(&iter, 0); BUG_ON(ret < 0); + + i++; } + + if (priv->l4ro.available) + rx_pageq_refill(priv); } static void -rx_teardown(struct vbus_enet_priv *priv) +rx_rxq_teardown(struct vbus_enet_priv *priv) { struct ioq *ioq = priv->rxq.queue; struct ioq_iterator iter; @@ -202,7 +285,25 @@ rx_teardown(struct vbus_enet_priv *priv) * free each valid descriptor */ while (iter.desc->valid) { - struct sk_buff *skb = (struct sk_buff *)iter.desc->cookie; + struct sk_buff *skb; + + if (priv->l4ro.available) { + struct venet_sg *vsg; + int i; + + vsg = (struct venet_sg *)iter.desc->cookie; + + /* skip i=0, since that is the skb->data IOV */ + for (i = 1; i < vsg->count; i++) { + struct venet_iov *iov = &vsg->iov[i]; + struct page *page = (struct page *)iov->ptr; + + put_page(page); + } + + skb = (struct sk_buff *)vsg->cookie; + } else + skb = (struct sk_buff *)iter.desc->cookie; iter.desc->valid = 0; wmb(); @@ -217,12 +318,54 @@ rx_teardown(struct vbus_enet_priv *priv) } } +static void +rx_l4ro_teardown(struct vbus_enet_priv *priv) +{ + struct ioq *ioq = priv->l4ro.pageq.queue; + struct ioq_iterator iter; + int ret; + + ret = ioq_iter_init(ioq, &iter, ioq_idxtype_inuse, 0); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); + BUG_ON(ret < 0); + + /* + * free each valid descriptor + */ + while (iter.desc->sown) { + struct page *page = (struct page *)iter.desc->cookie; + + iter.desc->valid = 0; + wmb(); + + iter.desc->ptr = 0; + iter.desc->cookie = 0; + + ret = ioq_iter_pop(&iter, 0); + BUG_ON(ret < 0); + + put_page(page); + } + + ioq_put(ioq); + kfree(priv->l4ro.pool); +} + +static void +rx_teardown(struct vbus_enet_priv *priv) +{ + rx_rxq_teardown(priv); + + if (priv->l4ro.available) + rx_l4ro_teardown(priv); +} + static int tx_setup(struct vbus_enet_priv *priv) { struct ioq *ioq = priv->tx.veq.queue; - size_t iovlen = sizeof(struct venet_iov) * (MAX_SKB_FRAGS-1); - size_t len = sizeof(struct venet_sg) + iovlen; struct ioq_iterator iter; int i; int ret; @@ -237,7 +380,7 @@ tx_setup(struct vbus_enet_priv *priv) /* pre-allocate our descriptor pool if pmtd is enabled */ if (priv->pmtd.enabled) { struct vbus_device_proxy *dev = priv->vdev; - size_t poollen = len * priv->tx.veq.count; + size_t poollen = SG_DESC_SIZE * priv->tx.veq.count; char *pool; int shmid; @@ -270,12 +413,12 @@ tx_setup(struct vbus_enet_priv *priv) struct venet_sg *vsg; if (priv->pmtd.enabled) { - size_t offset = (i * len); + size_t offset = (i * SG_DESC_SIZE); vsg = (struct venet_sg *)&priv->pmtd.pool[offset]; iter.desc->ptr = (u64)offset; } else { - vsg = kzalloc(len, GFP_KERNEL); + vsg = kzalloc(SG_DESC_SIZE, GFP_KERNEL); if (!vsg) return -ENOMEM; @@ -283,7 +426,7 @@ tx_setup(struct vbus_enet_priv *priv) } iter.desc->cookie = (u64)vsg; - iter.desc->len = len; + iter.desc->len = SG_DESC_SIZE; ret = ioq_iter_seek(&iter, ioq_seek_next, 0, 0); BUG_ON(ret < 0); @@ -444,6 +587,120 @@ vbus_enet_change_mtu(struct net_device *dev, int new_mtu) return 0; } +static struct sk_buff * +vbus_enet_l4ro_import(struct vbus_enet_priv *priv, struct ioq_ring_desc *desc) +{ + struct venet_sg *vsg = (struct venet_sg *)desc->cookie; + struct sk_buff *skb = (struct sk_buff *)vsg->cookie; + struct skb_shared_info *sinfo = skb_shinfo(skb); + int i; + + rx_pageq_refill(priv); + + if (!vsg->len) + /* + * the device may send a zero-length packet when its + * flushing references on the ring. We can just drop + * these on the floor + */ + goto fail; + + /* advance only by the linear portion in IOV[0] */ + skb_put(skb, vsg->iov[0].len); + + /* skip i=0, since that is the skb->data IOV */ + for (i = 1; i < vsg->count; i++) { + struct venet_iov *iov = &vsg->iov[i]; + struct page *page = (struct page *)iov->ptr; + skb_frag_t *f = &sinfo->frags[i-1]; + + f->page = page; + f->page_offset = 0; + f->size = iov->len; + + PDEBUG(priv->dev, "SG: Importing %d byte page[%i]\n", + f->size, i); + + skb->data_len += f->size; + skb->len += f->size; + skb->truesize += f->size; + sinfo->nr_frags++; + } + + if (vsg->flags & VENET_SG_FLAG_NEEDS_CSUM + && !skb_partial_csum_set(skb, vsg->csum.start, + vsg->csum.offset)) { + priv->dev->stats.rx_frame_errors++; + goto fail; + } + + if (vsg->flags & VENET_SG_FLAG_GSO) { + PDEBUG(priv->dev, "L4RO packet detected\n"); + + switch (vsg->gso.type) { + case VENET_GSO_TYPE_TCPV4: + sinfo->gso_type = SKB_GSO_TCPV4; + break; + case VENET_GSO_TYPE_TCPV6: + sinfo->gso_type = SKB_GSO_TCPV6; + break; + case VENET_GSO_TYPE_UDP: + sinfo->gso_type = SKB_GSO_UDP; + break; + default: + PDEBUG(priv->dev, "Illegal L4RO type: %d\n", + vsg->gso.type); + priv->dev->stats.rx_frame_errors++; + goto fail; + } + + if (vsg->flags & VENET_SG_FLAG_ECN) + sinfo->gso_type |= SKB_GSO_TCP_ECN; + + sinfo->gso_size = vsg->gso.size; + if (sinfo->gso_size == 0) { + PDEBUG(priv->dev, "Illegal L4RO size: %d\n", + vsg->gso.size); + priv->dev->stats.rx_frame_errors++; + goto fail; + } + + /* + * Header must be checked, and gso_segs + * computed. + */ + sinfo->gso_type |= SKB_GSO_DODGY; + sinfo->gso_segs = 0; + } + + return skb; + +fail: + dev_kfree_skb(skb); + + return NULL; +} + +static struct sk_buff * +vbus_enet_flat_import(struct vbus_enet_priv *priv, struct ioq_ring_desc *desc) +{ + struct sk_buff *skb = (struct sk_buff *)desc->cookie; + + if (!desc->len) { + /* + * the device may send a zero-length packet when its + * flushing references on the ring. We can just drop + * these on the floor + */ + dev_kfree_skb(skb); + return NULL; + } + + skb_put(skb, desc->len); + + return skb; +} + /* * The poll implementation. */ @@ -471,15 +728,14 @@ vbus_enet_poll(struct napi_struct *napi, int budget) * the south side */ while ((npackets < budget) && (!iter.desc->sown)) { - struct sk_buff *skb = (struct sk_buff *)iter.desc->cookie; - - if (iter.desc->len) { - skb_put(skb, iter.desc->len); + struct sk_buff *skb; + skb = priv->import(priv, iter.desc); + if (skb) { /* Maintain stats */ npackets++; priv->dev->stats.rx_packets++; - priv->dev->stats.rx_bytes += iter.desc->len; + priv->dev->stats.rx_bytes += skb->len; /* Pass the buffer up to the stack */ skb->dev = priv->dev; @@ -487,16 +743,10 @@ vbus_enet_poll(struct napi_struct *napi, int budget) netif_receive_skb(skb); mb(); - } else - /* - * the device may send a zero-length packet when its - * flushing references on the ring. We can just drop - * these on the floor - */ - dev_kfree_skb(skb); + } /* Grab a new buffer to put in the ring */ - rxdesc_alloc(priv->dev, iter.desc, priv->dev->mtu); + rxdesc_alloc(priv, iter.desc, priv->dev->mtu); /* Advance the in-use tail */ ret = ioq_iter_pop(&iter, 0); @@ -1013,6 +1263,69 @@ vbus_enet_evq_negcap(struct vbus_enet_priv *priv, unsigned long count) return 0; } +static int +vbus_enet_l4ro_negcap(struct vbus_enet_priv *priv, unsigned long count) +{ + struct venet_capabilities caps; + int ret; + + memset(&caps, 0, sizeof(caps)); + + caps.gid = VENET_CAP_GROUP_L4RO; + caps.bits |= (VENET_CAP_SG|VENET_CAP_TSO4|VENET_CAP_TSO6 + |VENET_CAP_ECN); + + ret = devcall(priv, VENET_FUNC_NEGCAP, &caps, sizeof(caps)); + if (ret < 0) { + printk(KERN_ERR "Error negotiating L4RO: %d\n", ret); + return ret; + } + + if (caps.bits & VENET_CAP_SG) { + struct vbus_device_proxy *dev = priv->vdev; + size_t poollen = SG_DESC_SIZE * count; + struct venet_l4ro_query query; + char *pool; + + memset(&query, 0, sizeof(query)); + + ret = devcall(priv, VENET_FUNC_L4ROQUERY, &query, sizeof(query)); + if (ret < 0) { + printk(KERN_ERR "Error querying L4RO: %d\n", ret); + return ret; + } + + pool = kzalloc(poollen, GFP_KERNEL | GFP_DMA); + if (!pool) + return -ENOMEM; + + /* + * pre-mapped descriptor pool + */ + ret = dev->ops->shm(dev, query.dpid, 0, + pool, poollen, 0, NULL, 0); + if (ret < 0) { + printk(KERN_ERR "Error registering L4RO pool: %d\n", + ret); + kfree(pool); + return ret; + } + + /* + * page-queue: contains a ring of arbitrary pages for + * consumption by the host for when the SG::IOV count exceeds + * one MTU frame. All we need to do is keep it populated + * with free pages. + */ + queue_init(priv, &priv->l4ro.pageq, query.pqid, count, NULL); + + priv->l4ro.pool = pool; + priv->l4ro.available = true; + } + + return 0; +} + static int vbus_enet_negcap(struct vbus_enet_priv *priv) { @@ -1022,7 +1335,15 @@ vbus_enet_negcap(struct vbus_enet_priv *priv) if (ret < 0) return ret; - return vbus_enet_evq_negcap(priv, tx_ringlen); + ret = vbus_enet_evq_negcap(priv, tx_ringlen); + if (ret < 0) + return ret; + + ret = vbus_enet_l4ro_negcap(priv, rx_ringlen); + if (ret < 0) + return ret; + + return 0; } static int vbus_enet_set_tx_csum(struct net_device *dev, u32 data) @@ -1088,6 +1409,11 @@ vbus_enet_probe(struct vbus_device_proxy *vdev) goto out_free; } + if (priv->l4ro.available) + priv->import = &vbus_enet_l4ro_import; + else + priv->import = &vbus_enet_flat_import; + skb_queue_head_init(&priv->tx.outstanding); queue_init(priv, &priv->rxq, VENET_QUEUE_RX, rx_ringlen, rx_isr); diff --git a/include/linux/venet.h b/include/linux/venet.h index b6bfd9135028..0578d797c973 100644 --- a/include/linux/venet.h +++ b/include/linux/venet.h @@ -39,6 +39,7 @@ struct venet_capabilities { #define VENET_CAP_GROUP_SG 0 #define VENET_CAP_GROUP_EVENTQ 1 +#define VENET_CAP_GROUP_L4RO 2 /* layer-4 reassem offloading */ /* CAPABILITIES-GROUP SG */ #define VENET_CAP_SG (1 << 0) @@ -109,6 +110,14 @@ struct venet_event_txc { __u64 cookie; }; +struct venet_l4ro_query { + __u32 flags; + __u32 dpid; /* descriptor pool-id */ + __u32 pqid; /* page queue-id */ + __u8 pad[20]; +}; + + #define VSG_DESC_SIZE(count) (sizeof(struct venet_sg) + \ sizeof(struct venet_iov) * ((count) - 1)) @@ -119,5 +128,6 @@ struct venet_event_txc { #define VENET_FUNC_FLUSHRX 4 #define VENET_FUNC_PMTDQUERY 5 #define VENET_FUNC_EVQQUERY 6 +#define VENET_FUNC_L4ROQUERY 7 #endif /* _LINUX_VENET_H */ -- cgit v1.2.3 From 275732e4c0715ceed7b1d0af358b12ec6c7bc67e Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 7 Dec 2009 11:46:37 -0500 Subject: vbus: allow shmsignals to be named This will allow the signals to be displayed to the end-user in some meaningful way later in the series, such as for statistics, etc. Signed-off-by: Gregory Haskins --- drivers/net/vbus-enet.c | 43 ++++++++++++++++++++++++++++++++++--------- drivers/vbus/bus-proxy.c | 6 +++--- drivers/vbus/pci-bridge.c | 3 ++- include/linux/vbus_driver.h | 7 ++++--- 4 files changed, 43 insertions(+), 16 deletions(-) (limited to 'drivers') diff --git a/drivers/net/vbus-enet.c b/drivers/net/vbus-enet.c index 6fe2241dddd6..9d4867452944 100644 --- a/drivers/net/vbus-enet.c +++ b/drivers/net/vbus-enet.c @@ -101,14 +101,20 @@ napi_to_priv(struct napi_struct *napi) static int queue_init(struct vbus_enet_priv *priv, struct vbus_enet_queue *q, + const char *name, int qid, size_t ringsize, void (*func)(struct ioq_notifier *)) { struct vbus_device_proxy *dev = priv->vdev; int ret; + char _name[64]; - ret = vbus_driver_ioq_alloc(dev, qid, 0, ringsize, &q->queue); + if (name) + snprintf(_name, sizeof(_name), "%s-%s", priv->dev->name, name); + + ret = vbus_driver_ioq_alloc(dev, name ? _name : NULL, qid, 0, + ringsize, &q->queue); if (ret < 0) panic("ioq_alloc failed: %d\n", ret); @@ -396,7 +402,8 @@ tx_setup(struct vbus_enet_priv *priv) priv->pmtd.pool = pool; - ret = dev->ops->shm(dev, shmid, 0, pool, poollen, 0, NULL, 0); + ret = dev->ops->shm(dev, NULL, shmid, 0, pool, poollen, + 0, NULL, 0); BUG_ON(ret < 0); } @@ -1227,12 +1234,13 @@ vbus_enet_evq_negcap(struct vbus_enet_priv *priv, unsigned long count) priv->evq.pool = pool; - ret = dev->ops->shm(dev, query.dpid, 0, + ret = dev->ops->shm(dev, NULL, query.dpid, 0, pool, poollen, 0, NULL, 0); if (ret < 0) return ret; - queue_init(priv, &priv->evq.veq, query.qid, count, evq_isr); + queue_init(priv, &priv->evq.veq, "evq", + query.qid, count, evq_isr); ret = ioq_iter_init(priv->evq.veq.queue, &iter, ioq_idxtype_valid, 0); @@ -1302,7 +1310,7 @@ vbus_enet_l4ro_negcap(struct vbus_enet_priv *priv, unsigned long count) /* * pre-mapped descriptor pool */ - ret = dev->ops->shm(dev, query.dpid, 0, + ret = dev->ops->shm(dev, NULL, query.dpid, 0, pool, poollen, 0, NULL, 0); if (ret < 0) { printk(KERN_ERR "Error registering L4RO pool: %d\n", @@ -1317,7 +1325,8 @@ vbus_enet_l4ro_negcap(struct vbus_enet_priv *priv, unsigned long count) * one MTU frame. All we need to do is keep it populated * with free pages. */ - queue_init(priv, &priv->l4ro.pageq, query.pqid, count, NULL); + queue_init(priv, &priv->l4ro.pageq, "pageq", query.pqid, + count, NULL); priv->l4ro.pool = pool; priv->l4ro.available = true; @@ -1395,6 +1404,16 @@ vbus_enet_probe(struct vbus_device_proxy *vdev) if (!dev) return -ENOMEM; + /* + * establish our device-name early so we can incorporate it into + * the signal-path names, etc + */ + rtnl_lock(); + + ret = dev_alloc_name(dev, dev->name); + if (ret < 0) + goto out_free; + priv = netdev_priv(dev); spin_lock_init(&priv->lock); @@ -1416,8 +1435,10 @@ vbus_enet_probe(struct vbus_device_proxy *vdev) skb_queue_head_init(&priv->tx.outstanding); - queue_init(priv, &priv->rxq, VENET_QUEUE_RX, rx_ringlen, rx_isr); - queue_init(priv, &priv->tx.veq, VENET_QUEUE_TX, tx_ringlen, tx_isr); + queue_init(priv, &priv->rxq, "rx", VENET_QUEUE_RX, rx_ringlen, + rx_isr); + queue_init(priv, &priv->tx.veq, "tx", VENET_QUEUE_TX, tx_ringlen, + tx_isr); rx_setup(priv); tx_setup(priv); @@ -1453,18 +1474,22 @@ vbus_enet_probe(struct vbus_device_proxy *vdev) dev->features |= NETIF_F_HIGHDMA; - ret = register_netdev(dev); + ret = register_netdevice(dev); if (ret < 0) { printk(KERN_INFO "VENET: error %i registering device \"%s\"\n", ret, dev->name); goto out_free; } + rtnl_unlock(); + vdev->priv = priv; return 0; out_free: + rtnl_unlock(); + free_netdev(dev); return ret; diff --git a/drivers/vbus/bus-proxy.c b/drivers/vbus/bus-proxy.c index 88cd9048335f..5d349427661a 100644 --- a/drivers/vbus/bus-proxy.c +++ b/drivers/vbus/bus-proxy.c @@ -167,8 +167,8 @@ static struct ioq_ops vbus_driver_ioq_ops = { }; -int vbus_driver_ioq_alloc(struct vbus_device_proxy *dev, int id, int prio, - size_t count, struct ioq **ioq) +int vbus_driver_ioq_alloc(struct vbus_device_proxy *dev, const char *name, + int id, int prio, size_t count, struct ioq **ioq) { struct ioq *_ioq; struct ioq_ring_head *head = NULL; @@ -188,7 +188,7 @@ int vbus_driver_ioq_alloc(struct vbus_device_proxy *dev, int id, int prio, head->ver = IOQ_RING_VER; head->count = count; - ret = dev->ops->shm(dev, id, prio, head, len, + ret = dev->ops->shm(dev, name, id, prio, head, len, &head->signal, &signal, 0); if (ret < 0) goto error; diff --git a/drivers/vbus/pci-bridge.c b/drivers/vbus/pci-bridge.c index 80718e693394..fa77318c2a04 100644 --- a/drivers/vbus/pci-bridge.c +++ b/drivers/vbus/pci-bridge.c @@ -262,7 +262,8 @@ vbus_pci_device_close(struct vbus_device_proxy *vdev, int flags) } static int -vbus_pci_device_shm(struct vbus_device_proxy *vdev, int id, int prio, +vbus_pci_device_shm(struct vbus_device_proxy *vdev, const char *name, + int id, int prio, void *ptr, size_t len, struct shm_signal_desc *sdesc, struct shm_signal **signal, int flags) diff --git a/include/linux/vbus_driver.h b/include/linux/vbus_driver.h index 9cfbf60df684..2b1dac47f180 100644 --- a/include/linux/vbus_driver.h +++ b/include/linux/vbus_driver.h @@ -34,7 +34,8 @@ struct vbus_driver; struct vbus_device_proxy_ops { int (*open)(struct vbus_device_proxy *dev, int version, int flags); int (*close)(struct vbus_device_proxy *dev, int flags); - int (*shm)(struct vbus_device_proxy *dev, int id, int prio, + int (*shm)(struct vbus_device_proxy *dev, const char *name, + int id, int prio, void *ptr, size_t len, struct shm_signal_desc *sigdesc, struct shm_signal **signal, int flags); @@ -74,7 +75,7 @@ void vbus_driver_unregister(struct vbus_driver *drv); /* * driver-side IOQ helper - allocates device-shm and maps an IOQ on it */ -int vbus_driver_ioq_alloc(struct vbus_device_proxy *dev, int id, int prio, - size_t ringsize, struct ioq **ioq); +int vbus_driver_ioq_alloc(struct vbus_device_proxy *dev, const char *name, + int id, int prio, size_t ringsize, struct ioq **ioq); #endif /* _LINUX_VBUS_DRIVER_H */ -- cgit v1.2.3 From ab8ccab252121a22a74be5d1fef7882bc4b76a57 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 7 Dec 2009 11:46:38 -0500 Subject: vbus: register shm-signal events as standard Linux IRQ vectors This will allow us to view the vector statistics in a uniform way, as well as pave the way for supporting irqbalance in the future. The VBUS PCI-BRIDGE will present itself as an irqchip device to the kernel proper. Each shm-signal that is created by a specific driver will show up as a "VBUS-edge" IRQ in /proc/interrupts. For instane, here is example output from running a venet device as "eth1" (see vectors 28-31): vbus-guest:/home/ghaskins # cat /proc/interrupts CPU0 0: 89 IO-APIC-edge timer 1: 6 IO-APIC-edge i8042 4: 1620 IO-APIC-edge serial 6: 2 IO-APIC-edge floppy 7: 0 IO-APIC-edge parport0 8: 0 IO-APIC-edge rtc0 9: 0 IO-APIC-fasteoi acpi 10: 0 IO-APIC-fasteoi virtio1 12: 90 IO-APIC-edge i8042 14: 3474 IO-APIC-edge ata_piix 15: 7778 IO-APIC-edge ata_piix 24: 185849 PCI-MSI-edge vbus 25: 0 PCI-MSI-edge virtio0-config 26: 210 PCI-MSI-edge virtio0-input 27: 28 PCI-MSI-edge virtio0-output 28: 167062 VBUS-edge eth1-evq 29: 0 VBUS-edge eth1-pageq 30: 161593 VBUS-edge eth1-rx 31: 0 VBUS-edge eth1-tx NMI: 0 Non-maskable interrupts LOC: 72010 Local timer interrupts SPU: 0 Spurious interrupts CNT: 0 Performance counter interrupts PND: 0 Performance pending work RES: 0 Rescheduling interrupts CAL: 0 Function call interrupts TLB: 0 TLB shootdowns TRM: 0 Thermal event interrupts THR: 0 Threshold APIC interrupts MCE: 0 Machine check exceptions MCP: 20 Machine check polls ERR: 0 MIS: 0 Signed-off-by: Gregory Haskins --- drivers/vbus/Kconfig | 4 +-- drivers/vbus/pci-bridge.c | 91 +++++++++++++++++++++++++++++++++++++---------- 2 files changed, 74 insertions(+), 21 deletions(-) (limited to 'drivers') diff --git a/drivers/vbus/Kconfig b/drivers/vbus/Kconfig index 08667aaed47e..f51cba10913e 100644 --- a/drivers/vbus/Kconfig +++ b/drivers/vbus/Kconfig @@ -3,7 +3,7 @@ # config VBUS_PROXY - tristate "Virtual-Bus support" + bool "Virtual-Bus support" select SHM_SIGNAL select IOQ default n @@ -14,7 +14,7 @@ config VBUS_PROXY on the backend, say Y. If unsure, say N. config VBUS_PCIBRIDGE - tristate "PCI to Virtual-Bus bridge" + bool "PCI to Virtual-Bus bridge" depends on PCI depends on VBUS_PROXY select IOQ diff --git a/drivers/vbus/pci-bridge.c b/drivers/vbus/pci-bridge.c index fa77318c2a04..fcde49525886 100644 --- a/drivers/vbus/pci-bridge.c +++ b/drivers/vbus/pci-bridge.c @@ -147,15 +147,14 @@ _signal_init(struct shm_signal *signal, struct shm_signal_desc *desc, */ struct _signal { + char name[64]; struct vbus_pci *pcivbus; struct shm_signal signal; u32 handle; struct rb_node node; struct list_head list; - struct { - int notify; - int inject; - } stats; + int irq; + struct irq_desc *desc; }; static struct _signal * @@ -170,7 +169,6 @@ _signal_inject(struct shm_signal *signal) struct _signal *_signal = to_signal(signal); vbus_pci.stats.inject++; - _signal->stats.inject++; iowrite32(_signal->handle, &vbus_pci.signals->shmsignal); return 0; @@ -236,6 +234,7 @@ vbus_pci_device_close(struct vbus_device_proxy *vdev, int flags) _signal = list_first_entry(&dev->shms, struct _signal, list); list_del(&_signal->list); + free_irq(_signal->irq, _signal); spin_unlock_irqrestore(&vbus_pci.lock, iflags); shm_signal_put(&_signal->signal); @@ -261,6 +260,27 @@ vbus_pci_device_close(struct vbus_device_proxy *vdev, int flags) return 0; } +static void vbus_irq_chip_noop(unsigned int irq) +{ +} + +static struct irq_chip vbus_irq_chip = { + .name = "VBUS", + .mask = vbus_irq_chip_noop, + .unmask = vbus_irq_chip_noop, + .eoi = vbus_irq_chip_noop, +}; + +irqreturn_t +shm_signal_intr(int irq, void *dev) +{ + struct _signal *_signal = (struct _signal *)dev; + + _shm_signal_wakeup(&_signal->signal); + + return IRQ_HANDLED; +} + static int vbus_pci_device_shm(struct vbus_device_proxy *vdev, const char *name, int id, int prio, @@ -315,24 +335,45 @@ vbus_pci_device_shm(struct vbus_device_proxy *vdev, const char *name, ret = vbus_pci_buscall(VBUS_PCI_HC_DEVSHM, ¶ms, sizeof(params)); - if (ret < 0) { - if (_signal) { - /* - * We held two references above, so we need to drop - * both of them - */ - shm_signal_put(&_signal->signal); - shm_signal_put(&_signal->signal); - } - - return ret; - } + if (ret < 0) + goto fail; if (signal) { + int irq; + BUG_ON(ret < 0); _signal->handle = ret; + irq = create_irq(); + if (irq < 0) { + printk(KERN_ERR "Failed to create IRQ: %d\n", irq); + ret = -ENOSPC; + goto fail; + } + + _signal->irq = irq; + _signal->desc = irq_to_desc(irq); + + set_irq_chip_and_handler_name(irq, + &vbus_irq_chip, + handle_percpu_irq, + "edge"); + + if (!name) + snprintf(_signal->name, sizeof(_signal->name), + "dev%lld-id%d", vdev->id, id); + else + snprintf(_signal->name, sizeof(_signal->name), + "%s", name); + + ret = request_irq(irq, shm_signal_intr, 0, + _signal->name, _signal); + if (ret) { + printk(KERN_ERR "Failed to request irq: %d\n", irq); + goto fail; + } + spin_lock_irqsave(&vbus_pci.lock, iflags); list_add_tail(&_signal->list, &dev->shms); @@ -344,6 +385,18 @@ vbus_pci_device_shm(struct vbus_device_proxy *vdev, const char *name, } return 0; + +fail: + if (_signal) { + /* + * We held two references above, so we need to drop + * both of them + */ + shm_signal_put(&_signal->signal); + shm_signal_put(&_signal->signal); + } + + return ret; } static int @@ -454,10 +507,10 @@ static void event_shmsignal(struct vbus_pci_handle_event *event) { struct _signal *_signal = (struct _signal *)event->handle; + struct irq_desc *desc = _signal->desc; vbus_pci.stats.notify++; - _signal->stats.notify++; - _shm_signal_wakeup(&_signal->signal); + desc->handle_irq(_signal->irq, desc); } static void -- cgit v1.2.3 From 7bdb4fe8e3e4e56022976781c67461ec690fe6d9 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 7 Dec 2009 11:46:38 -0500 Subject: net: fix vbus-enet Kconfig dependencies We currently select VBUS_PROXY when vbus-enet is enabled, which is the wrong direction. Not all platforms will define VBUS-PROXY, and venet depends on its inclusion. Therefore, lets fix vbus-enet to properly depend on the presence of VBUS_PROXY to get this right. Signed-off-by: Gregory Haskins --- drivers/net/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index ee4130305638..d8571497d6fc 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -3244,7 +3244,7 @@ config VMXNET3 config VBUS_ENET tristate "VBUS Ethernet Driver" default n - select VBUS_PROXY + depends on VBUS_PROXY help A virtualized 802.x network device based on the VBUS "virtual-ethernet" interface. It can be used with any -- cgit v1.2.3 From 6c574131cabe93d605057df10a4f8c80ab77f352 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 7 Dec 2009 11:46:39 -0500 Subject: venet: fix locking issue with dev_kfree_skb() We currently hold the priv->lock with interrupts disabled while calling dev_kfree_skb(). lockdep indicated that this arrangement is problematic with higher stack components which handle the wmem facility. It is probably a bad idea to hold the lock/interrupts over the duration of this function independent of the lock-conflict issue, so lets rectify this. This new design switches to a finer-grained model, where we acquire/release the lock for each packet that we reap from the tx queue. This adds theoretical lock acquistion overhead, but gains the ability to release the skbs without holding a lock and while improving critical section granularity. Signed-off-by: Gregory Haskins --- drivers/net/vbus-enet.c | 71 ++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 36 deletions(-) (limited to 'drivers') diff --git a/drivers/net/vbus-enet.c b/drivers/net/vbus-enet.c index 9d4867452944..228c366a50a7 100644 --- a/drivers/net/vbus-enet.c +++ b/drivers/net/vbus-enet.c @@ -883,7 +883,7 @@ vbus_enet_tx_start(struct sk_buff *skb, struct net_device *dev) priv->dev->stats.tx_packets++; priv->dev->stats.tx_bytes += skb->len; - __skb_queue_tail(&priv->tx.outstanding, skb); + skb_queue_tail(&priv->tx.outstanding, skb); /* * This advances both indexes together implicitly, and then @@ -914,7 +914,7 @@ vbus_enet_skb_complete(struct vbus_enet_priv *priv, struct sk_buff *skb) PDEBUG(priv->dev, "completed sending %d bytes\n", skb->len); - __skb_unlink(skb, &priv->tx.outstanding); + skb_unlink(skb, &priv->tx.outstanding); dev_kfree_skb(skb); } @@ -923,12 +923,16 @@ vbus_enet_skb_complete(struct vbus_enet_priv *priv, struct sk_buff *skb) * * assumes priv->lock held */ -static void -vbus_enet_tx_reap(struct vbus_enet_priv *priv) +static struct sk_buff * +vbus_enet_tx_reap_one(struct vbus_enet_priv *priv) { + struct sk_buff *skb = NULL; struct ioq_iterator iter; + unsigned long flags; int ret; + spin_lock_irqsave(&priv->lock, flags); + /* * We want to iterate on the head of the valid index, but we * do not want the iter_pop (below) to flip the ownership, so @@ -941,29 +945,15 @@ vbus_enet_tx_reap(struct vbus_enet_priv *priv) ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); BUG_ON(ret < 0); - /* - * We are done once we find the first packet either invalid or still - * owned by the south-side - */ - while (iter.desc->valid && !iter.desc->sown) { - - if (!priv->evq.txc) { - struct sk_buff *skb; + if (iter.desc->valid && !iter.desc->sown) { - if (priv->sg) { - struct venet_sg *vsg; - - vsg = (struct venet_sg *)iter.desc->cookie; - skb = (struct sk_buff *)vsg->cookie; - } else - skb = (struct sk_buff *)iter.desc->cookie; + if (priv->sg) { + struct venet_sg *vsg; - /* - * If TXC is not enabled, we are required to free - * the buffer resources now - */ - vbus_enet_skb_complete(priv, skb); - } + vsg = (struct venet_sg *)iter.desc->cookie; + skb = (struct sk_buff *)vsg->cookie; + } else + skb = (struct sk_buff *)iter.desc->cookie; /* Reset the descriptor */ iter.desc->valid = 0; @@ -982,19 +972,35 @@ vbus_enet_tx_reap(struct vbus_enet_priv *priv) PDEBUG(priv->dev, "re-enabling tx queue\n"); netif_wake_queue(priv->dev); } + + spin_unlock_irqrestore(&priv->lock, flags); + + return skb; +} + +static void +vbus_enet_tx_reap(struct vbus_enet_priv *priv) +{ + struct sk_buff *skb; + + while ((skb = vbus_enet_tx_reap_one(priv))) { + if (!priv->evq.txc) + /* + * We are responsible for freeing the packet upon + * reap if TXC is not enabled + */ + vbus_enet_skb_complete(priv, skb); + } } static void vbus_enet_timeout(struct net_device *dev) { struct vbus_enet_priv *priv = netdev_priv(dev); - unsigned long flags; dev_dbg(&dev->dev, "Transmit timeout\n"); - spin_lock_irqsave(&priv->lock, flags); vbus_enet_tx_reap(priv); - spin_unlock_irqrestore(&priv->lock, flags); } static void @@ -1014,13 +1020,10 @@ static void deferred_tx_isr(unsigned long data) { struct vbus_enet_priv *priv = (struct vbus_enet_priv *)data; - unsigned long flags; PDEBUG(priv->dev, "deferred_tx_isr\n"); - spin_lock_irqsave(&priv->lock, flags); vbus_enet_tx_reap(priv); - spin_unlock_irqrestore(&priv->lock, flags); ioq_notify_enable(priv->tx.veq.queue, 0); } @@ -1063,14 +1066,10 @@ evq_txc_event(struct vbus_enet_priv *priv, { struct venet_event_txc *event = (struct venet_event_txc *)header; - unsigned long flags; - - spin_lock_irqsave(&priv->lock, flags); vbus_enet_tx_reap(priv); - vbus_enet_skb_complete(priv, (struct sk_buff *)event->cookie); - spin_unlock_irqrestore(&priv->lock, flags); + vbus_enet_skb_complete(priv, (struct sk_buff *)event->cookie); } static void -- cgit v1.2.3 From 55a1f2aa6b86b47fea344e52252dfbc8ad84d569 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 7 Dec 2009 11:46:40 -0500 Subject: vbus: fix kmalloc() from interrupt context to use GFP_ATOMIC DEVADD events currently perform a GFP_KERNEL allocation for the device object in interrupt context. This is technically illegal, although we have gotten away with it to date by sheer luck that the allocation never tried to swap or otherwise sleep. Lets fix this properly by making sure that we only allocated the space for the device object using GFP_KERNEL from process-context. We achieve this by generating a temporary GFP_ATOMIC relay for the event and deferring the actual device allocation/registration to process context. Signed-off-by: Gregory Haskins --- drivers/vbus/pci-bridge.c | 54 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/vbus/pci-bridge.c b/drivers/vbus/pci-bridge.c index fcde49525886..c1af37cd30d1 100644 --- a/drivers/vbus/pci-bridge.c +++ b/drivers/vbus/pci-bridge.c @@ -63,7 +63,6 @@ struct vbus_pci_device { u64 handle; struct list_head shms; struct vbus_device_proxy vdev; - struct work_struct add; struct work_struct drop; }; @@ -442,18 +441,45 @@ struct vbus_device_proxy_ops vbus_pci_device_ops = { * ------------------- */ +struct deferred_devadd_event { + struct work_struct work; + struct vbus_pci_add_event event; +}; + +static void deferred_devdrop(struct work_struct *work); + static void deferred_devadd(struct work_struct *work) { + struct deferred_devadd_event *_event; struct vbus_pci_device *new; int ret; - new = container_of(work, struct vbus_pci_device, add); + _event = container_of(work, struct deferred_devadd_event, work); + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) { + printk(KERN_ERR "VBUS_PCI: Out of memory on add_event\n"); + return; + } + + INIT_LIST_HEAD(&new->shms); + + memcpy(new->type, _event->event.type, VBUS_MAX_DEVTYPE_LEN); + new->vdev.type = new->type; + new->vdev.id = _event->event.id; + new->vdev.ops = &vbus_pci_device_ops; + + dev_set_name(&new->vdev.dev, "%lld", _event->event.id); + + INIT_WORK(&new->drop, deferred_devdrop); ret = vbus_device_proxy_register(&new->vdev); if (ret < 0) panic("failed to register device %lld(%s): %d\n", new->vdev.id, new->type, ret); + + kfree(_event); } static void @@ -468,25 +494,19 @@ deferred_devdrop(struct work_struct *work) static void event_devadd(struct vbus_pci_add_event *event) { - struct vbus_pci_device *new = kzalloc(sizeof(*new), GFP_KERNEL); - if (!new) { - printk(KERN_ERR "VBUS_PCI: Out of memory on add_event\n"); + struct deferred_devadd_event *_event; + + _event = kzalloc(sizeof(*_event), GFP_ATOMIC); + if (!_event) { + printk(KERN_ERR \ + "VBUS_PCI: Out of ATOMIC memory on add_event\n"); return; } - INIT_LIST_HEAD(&new->shms); - - memcpy(new->type, event->type, VBUS_MAX_DEVTYPE_LEN); - new->vdev.type = new->type; - new->vdev.id = event->id; - new->vdev.ops = &vbus_pci_device_ops; - - dev_set_name(&new->vdev.dev, "%lld", event->id); - - INIT_WORK(&new->add, deferred_devadd); - INIT_WORK(&new->drop, deferred_devdrop); + INIT_WORK(&_event->work, deferred_devadd); + memcpy(&_event->event, event, sizeof(*event)); - schedule_work(&new->add); + schedule_work(&_event->work); } static void -- cgit v1.2.3 From 2cbb9463fe0f82cefb8f9e2042728b142f4204c0 Mon Sep 17 00:00:00 2001 From: Patrick Mullaney Date: Mon, 7 Dec 2009 11:46:40 -0500 Subject: vbus-enet: fix l4ro pool non-atomic allocations in softirq context The current code exhibits odd behavior in the guest when receiving L4RO packets. This was tracked down to the improper allocation of GFP_KERNEL memory from softirq context Signed-off-by: Patrick Mullaney Signed-off-by: Gregory Haskins --- drivers/net/vbus-enet.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/net/vbus-enet.c b/drivers/net/vbus-enet.c index 228c366a50a7..6aaee1c85745 100644 --- a/drivers/net/vbus-enet.c +++ b/drivers/net/vbus-enet.c @@ -182,11 +182,11 @@ rxdesc_alloc(struct vbus_enet_priv *priv, struct ioq_ring_desc *desc, size_t len } static void -rx_pageq_refill(struct vbus_enet_priv *priv) +rx_pageq_refill(struct vbus_enet_priv *priv, gfp_t gfp_mask) { struct ioq *ioq = priv->l4ro.pageq.queue; struct ioq_iterator iter; - int ret; + int ret, added = 0; if (ioq_full(ioq, ioq_idxtype_inuse)) /* nothing to do if the pageq is already fully populated */ @@ -202,11 +202,14 @@ rx_pageq_refill(struct vbus_enet_priv *priv) * Now populate each descriptor with an empty page */ while (!iter.desc->sown) { - struct page *page; + struct page *page = NULL; + + page = alloc_page(gfp_mask); - page = alloc_page(GFP_KERNEL); - BUG_ON(!page); + if (!page) + break; + added = 1; iter.desc->cookie = (u64)page; iter.desc->ptr = (u64)__pa(page_address(page)); iter.desc->len = PAGE_SIZE; @@ -215,7 +218,8 @@ rx_pageq_refill(struct vbus_enet_priv *priv) BUG_ON(ret < 0); } - ioq_signal(ioq, 0); + if (added) + ioq_signal(ioq, 0); } static void @@ -271,7 +275,7 @@ rx_setup(struct vbus_enet_priv *priv) } if (priv->l4ro.available) - rx_pageq_refill(priv); + rx_pageq_refill(priv, GFP_KERNEL); } static void @@ -602,7 +606,7 @@ vbus_enet_l4ro_import(struct vbus_enet_priv *priv, struct ioq_ring_desc *desc) struct skb_shared_info *sinfo = skb_shinfo(skb); int i; - rx_pageq_refill(priv); + rx_pageq_refill(priv, GFP_ATOMIC); if (!vsg->len) /* -- cgit v1.2.3 From 8e53c9208c3668310123ca6c4f36caec6ec4c0da Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 7 Dec 2009 11:46:41 -0500 Subject: Eliminate all cast warnings in vbus-enet.c and pci-bridge.c. Signed-off-by: Randy Dunlap Signed-off-by: Gregory Haskins --- drivers/net/vbus-enet.c | 52 +++++++++++++++++++++++------------------------ drivers/vbus/pci-bridge.c | 13 ++++++------ 2 files changed, 33 insertions(+), 32 deletions(-) (limited to 'drivers') diff --git a/drivers/net/vbus-enet.c b/drivers/net/vbus-enet.c index 6aaee1c85745..29b388ffdcaa 100644 --- a/drivers/net/vbus-enet.c +++ b/drivers/net/vbus-enet.c @@ -162,18 +162,18 @@ rxdesc_alloc(struct vbus_enet_priv *priv, struct ioq_ring_desc *desc, size_t len * larger than MTU, the host will grab pages out of the * page-queue and populate additional IOVs */ - struct venet_sg *vsg = (struct venet_sg *)desc->cookie; + struct venet_sg *vsg = (struct venet_sg *)(unsigned long)desc->cookie; struct venet_iov *iov = &vsg->iov[0]; memset(vsg, 0, SG_DESC_SIZE); - vsg->cookie = (u64)skb; + vsg->cookie = (u64)(unsigned long)skb; vsg->count = 1; iov->ptr = (u64)__pa(skb->data); iov->len = len; } else { - desc->cookie = (u64)skb; + desc->cookie = (u64)(unsigned long)skb; desc->ptr = (u64)__pa(skb->data); desc->len = len; /* total length */ } @@ -210,7 +210,7 @@ rx_pageq_refill(struct vbus_enet_priv *priv, gfp_t gfp_mask) break; added = 1; - iter.desc->cookie = (u64)page; + iter.desc->cookie = (u64)(unsigned long)page; iter.desc->ptr = (u64)__pa(page_address(page)); iter.desc->len = PAGE_SIZE; @@ -257,7 +257,7 @@ rx_setup(struct vbus_enet_priv *priv) void *addr = &priv->l4ro.pool[offset]; iter.desc->ptr = (u64)offset; - iter.desc->cookie = (u64)addr; + iter.desc->cookie = (u64)(unsigned long)addr; iter.desc->len = SG_DESC_SIZE; } @@ -301,19 +301,19 @@ rx_rxq_teardown(struct vbus_enet_priv *priv) struct venet_sg *vsg; int i; - vsg = (struct venet_sg *)iter.desc->cookie; + vsg = (struct venet_sg *)(unsigned long)iter.desc->cookie; /* skip i=0, since that is the skb->data IOV */ for (i = 1; i < vsg->count; i++) { struct venet_iov *iov = &vsg->iov[i]; - struct page *page = (struct page *)iov->ptr; + struct page *page = (struct page *)(unsigned long)iov->ptr; put_page(page); } - skb = (struct sk_buff *)vsg->cookie; + skb = (struct sk_buff *)(unsigned long)vsg->cookie; } else - skb = (struct sk_buff *)iter.desc->cookie; + skb = (struct sk_buff *)(unsigned long)iter.desc->cookie; iter.desc->valid = 0; wmb(); @@ -345,7 +345,7 @@ rx_l4ro_teardown(struct vbus_enet_priv *priv) * free each valid descriptor */ while (iter.desc->sown) { - struct page *page = (struct page *)iter.desc->cookie; + struct page *page = (struct page *)(unsigned long)iter.desc->cookie; iter.desc->valid = 0; wmb(); @@ -436,7 +436,7 @@ tx_setup(struct vbus_enet_priv *priv) iter.desc->ptr = (u64)__pa(vsg); } - iter.desc->cookie = (u64)vsg; + iter.desc->cookie = (u64)(unsigned long)vsg; iter.desc->len = SG_DESC_SIZE; ret = ioq_iter_seek(&iter, ioq_seek_next, 0, 0); @@ -484,7 +484,7 @@ tx_teardown(struct vbus_enet_priv *priv) * free each valid descriptor */ while (iter.desc->cookie) { - struct venet_sg *vsg = (struct venet_sg *)iter.desc->cookie; + struct venet_sg *vsg = (struct venet_sg *)(unsigned long)iter.desc->cookie; iter.desc->valid = 0; wmb(); @@ -601,8 +601,8 @@ vbus_enet_change_mtu(struct net_device *dev, int new_mtu) static struct sk_buff * vbus_enet_l4ro_import(struct vbus_enet_priv *priv, struct ioq_ring_desc *desc) { - struct venet_sg *vsg = (struct venet_sg *)desc->cookie; - struct sk_buff *skb = (struct sk_buff *)vsg->cookie; + struct venet_sg *vsg = (struct venet_sg *)(unsigned long)desc->cookie; + struct sk_buff *skb = (struct sk_buff *)(unsigned long)vsg->cookie; struct skb_shared_info *sinfo = skb_shinfo(skb); int i; @@ -622,7 +622,7 @@ vbus_enet_l4ro_import(struct vbus_enet_priv *priv, struct ioq_ring_desc *desc) /* skip i=0, since that is the skb->data IOV */ for (i = 1; i < vsg->count; i++) { struct venet_iov *iov = &vsg->iov[i]; - struct page *page = (struct page *)iov->ptr; + struct page *page = (struct page *)(unsigned long)iov->ptr; skb_frag_t *f = &sinfo->frags[i-1]; f->page = page; @@ -695,7 +695,7 @@ fail: static struct sk_buff * vbus_enet_flat_import(struct vbus_enet_priv *priv, struct ioq_ring_desc *desc) { - struct sk_buff *skb = (struct sk_buff *)desc->cookie; + struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->cookie; if (!desc->len) { /* @@ -820,7 +820,7 @@ vbus_enet_tx_start(struct sk_buff *skb, struct net_device *dev) BUG_ON(iter.desc->sown); if (priv->sg) { - struct venet_sg *vsg = (struct venet_sg *)iter.desc->cookie; + struct venet_sg *vsg = (struct venet_sg *)(unsigned long)iter.desc->cookie; struct scatterlist sgl[MAX_SKB_FRAGS+1]; struct scatterlist *sg; int count, maxcount = ARRAY_SIZE(sgl); @@ -829,7 +829,7 @@ vbus_enet_tx_start(struct sk_buff *skb, struct net_device *dev) memset(vsg, 0, sizeof(*vsg)); - vsg->cookie = (u64)skb; + vsg->cookie = (u64)(unsigned long)skb; vsg->len = skb->len; if (skb->ip_summed == CHECKSUM_PARTIAL) { @@ -877,7 +877,7 @@ vbus_enet_tx_start(struct sk_buff *skb, struct net_device *dev) * non scatter-gather mode: simply put the skb right onto the * ring. */ - iter.desc->cookie = (u64)skb; + iter.desc->cookie = (u64)(unsigned long)skb; iter.desc->len = (u64)skb->len; iter.desc->ptr = (u64)__pa(skb->data); } @@ -954,10 +954,10 @@ vbus_enet_tx_reap_one(struct vbus_enet_priv *priv) if (priv->sg) { struct venet_sg *vsg; - vsg = (struct venet_sg *)iter.desc->cookie; - skb = (struct sk_buff *)vsg->cookie; + vsg = (struct venet_sg *)(unsigned long)iter.desc->cookie; + skb = (struct sk_buff *)(unsigned long)vsg->cookie; } else - skb = (struct sk_buff *)iter.desc->cookie; + skb = (struct sk_buff *)(unsigned long)iter.desc->cookie; /* Reset the descriptor */ iter.desc->valid = 0; @@ -1073,7 +1073,7 @@ evq_txc_event(struct vbus_enet_priv *priv, vbus_enet_tx_reap(priv); - vbus_enet_skb_complete(priv, (struct sk_buff *)event->cookie); + vbus_enet_skb_complete(priv, (struct sk_buff *)(unsigned long)event->cookie); } static void @@ -1101,7 +1101,7 @@ deferred_evq_isr(unsigned long data) while (!iter.desc->sown) { struct venet_event_header *header; - header = (struct venet_event_header *)iter.desc->cookie; + header = (struct venet_event_header *)(unsigned long)iter.desc->cookie; switch (header->id) { case VENET_EVENT_LINKSTATE: @@ -1116,7 +1116,7 @@ deferred_evq_isr(unsigned long data) break; } - memset((void *)iter.desc->cookie, 0, priv->evq.evsize); + memset((void *)(unsigned long)iter.desc->cookie, 0, priv->evq.evsize); /* Advance the in-use tail */ ret = ioq_iter_pop(&iter, 0); @@ -1258,7 +1258,7 @@ vbus_enet_evq_negcap(struct vbus_enet_priv *priv, unsigned long count) void *addr = &priv->evq.pool[offset]; iter.desc->ptr = (u64)offset; - iter.desc->cookie = (u64)addr; + iter.desc->cookie = (u64)(unsigned long)addr; iter.desc->len = query.evsize; ret = ioq_iter_push(&iter, 0); diff --git a/drivers/vbus/pci-bridge.c b/drivers/vbus/pci-bridge.c index c1af37cd30d1..ade9f7b6dd34 100644 --- a/drivers/vbus/pci-bridge.c +++ b/drivers/vbus/pci-bridge.c @@ -325,9 +325,10 @@ vbus_pci_device_shm(struct vbus_device_proxy *vdev, const char *name, */ shm_signal_get(&_signal->signal); - params.signal.offset = (u64)sdesc - (u64)ptr; + params.signal.offset = (u64)(unsigned long)sdesc - + (u64)(unsigned long)ptr; params.signal.prio = prio; - params.signal.cookie = (u64)_signal; + params.signal.cookie = (u64)(unsigned long)_signal; } else params.signal.offset = -1; /* yes, this is a u32, but its ok */ @@ -526,7 +527,7 @@ event_devdrop(struct vbus_pci_handle_event *event) static void event_shmsignal(struct vbus_pci_handle_event *event) { - struct _signal *_signal = (struct _signal *)event->handle; + struct _signal *_signal = (struct _signal *)(unsigned long)event->handle; struct irq_desc *desc = _signal->desc; vbus_pci.stats.notify++; @@ -536,7 +537,7 @@ event_shmsignal(struct vbus_pci_handle_event *event) static void event_shmclose(struct vbus_pci_handle_event *event) { - struct _signal *_signal = (struct _signal *)event->handle; + struct _signal *_signal = (struct _signal *)(unsigned long)event->handle; /* * This reference was taken during the DEVICESHM call @@ -593,7 +594,7 @@ eventq_init(int qlen) BUG_ON(iter.desc->valid); - desc->cookie = (u64)event; + desc->cookie = (u64)(unsigned long)event; desc->ptr = (u64)__pa(event); desc->len = len; /* total length */ desc->valid = 1; @@ -643,7 +644,7 @@ eventq_wakeup(struct ioq_notifier *notifier) struct ioq_ring_desc *desc = iter.desc; struct vbus_pci_event *event; - event = (struct vbus_pci_event *)desc->cookie; + event = (struct vbus_pci_event *)(unsigned long)desc->cookie; switch (event->eventid) { case VBUS_PCI_EVENT_DEVADD: -- cgit v1.2.3 From 14a6625cff213cdc90d532672206a417ffe88456 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 7 Dec 2009 11:46:43 -0500 Subject: fix irq resource leak Signed-off-by: Gregory Haskins --- drivers/vbus/pci-bridge.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/vbus/pci-bridge.c b/drivers/vbus/pci-bridge.c index ade9f7b6dd34..5af0732eaaf9 100644 --- a/drivers/vbus/pci-bridge.c +++ b/drivers/vbus/pci-bridge.c @@ -234,6 +234,7 @@ vbus_pci_device_close(struct vbus_device_proxy *vdev, int flags) list_del(&_signal->list); free_irq(_signal->irq, _signal); + destroy_irq(_signal->irq); spin_unlock_irqrestore(&vbus_pci.lock, iflags); shm_signal_put(&_signal->signal); -- cgit v1.2.3 From c26611d3e5894961a085e28225e7d3f43bbcaa6d Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 7 Dec 2009 11:46:43 -0500 Subject: vbus: remove create_irq() references from the pcibridge We cannot rely on create_irq() being available in all circumstances, therefore lets remove it from the code for now until we can figure out a way to support it with config options. Signed-off-by: Gregory Haskins --- drivers/vbus/pci-bridge.c | 69 ++++++++++++++++++----------------------------- 1 file changed, 26 insertions(+), 43 deletions(-) (limited to 'drivers') diff --git a/drivers/vbus/pci-bridge.c b/drivers/vbus/pci-bridge.c index 5af0732eaaf9..add1cc37e1f9 100644 --- a/drivers/vbus/pci-bridge.c +++ b/drivers/vbus/pci-bridge.c @@ -186,6 +186,8 @@ static struct shm_signal_ops _signal_ops = { .release = _signal_release, }; +static void shmsignal_disconnect(struct _signal *_signal); + /* * ------------------- * vbus_device_proxy routines @@ -233,8 +235,7 @@ vbus_pci_device_close(struct vbus_device_proxy *vdev, int flags) _signal = list_first_entry(&dev->shms, struct _signal, list); list_del(&_signal->list); - free_irq(_signal->irq, _signal); - destroy_irq(_signal->irq); + shmsignal_disconnect(_signal); spin_unlock_irqrestore(&vbus_pci.lock, iflags); shm_signal_put(&_signal->signal); @@ -260,25 +261,30 @@ vbus_pci_device_close(struct vbus_device_proxy *vdev, int flags) return 0; } -static void vbus_irq_chip_noop(unsigned int irq) +/* + * ------------------- + * shmsignal interrupt routines + * ------------------- + */ + +/* We abstract these routines so that we can drop in irqchip later */ + +static void +shmsignal_wakeup(struct _signal *_signal) { + _shm_signal_wakeup(&_signal->signal); } -static struct irq_chip vbus_irq_chip = { - .name = "VBUS", - .mask = vbus_irq_chip_noop, - .unmask = vbus_irq_chip_noop, - .eoi = vbus_irq_chip_noop, -}; - -irqreturn_t -shm_signal_intr(int irq, void *dev) +static int +shmsignal_connect(struct _signal *_signal) { - struct _signal *_signal = (struct _signal *)dev; + return 0; +} - _shm_signal_wakeup(&_signal->signal); +static void +shmsignal_disconnect(struct _signal *_signal) +{ - return IRQ_HANDLED; } static int @@ -340,45 +346,22 @@ vbus_pci_device_shm(struct vbus_device_proxy *vdev, const char *name, goto fail; if (signal) { - int irq; BUG_ON(ret < 0); _signal->handle = ret; - irq = create_irq(); - if (irq < 0) { - printk(KERN_ERR "Failed to create IRQ: %d\n", irq); - ret = -ENOSPC; - goto fail; - } - - _signal->irq = irq; - _signal->desc = irq_to_desc(irq); - - set_irq_chip_and_handler_name(irq, - &vbus_irq_chip, - handle_percpu_irq, - "edge"); - if (!name) snprintf(_signal->name, sizeof(_signal->name), - "dev%lld-id%d", vdev->id, id); + "dev%lld-id%d", vdev->id, id); else snprintf(_signal->name, sizeof(_signal->name), - "%s", name); + "%s", name); - ret = request_irq(irq, shm_signal_intr, 0, - _signal->name, _signal); - if (ret) { - printk(KERN_ERR "Failed to request irq: %d\n", irq); - goto fail; - } + shmsignal_connect(_signal); spin_lock_irqsave(&vbus_pci.lock, iflags); - list_add_tail(&_signal->list, &dev->shms); - spin_unlock_irqrestore(&vbus_pci.lock, iflags); shm_signal_get(&_signal->signal); @@ -529,10 +512,10 @@ static void event_shmsignal(struct vbus_pci_handle_event *event) { struct _signal *_signal = (struct _signal *)(unsigned long)event->handle; - struct irq_desc *desc = _signal->desc; vbus_pci.stats.notify++; - desc->handle_irq(_signal->irq, desc); + + shmsignal_wakeup(_signal); } static void -- cgit v1.2.3 From 80493c05d90cd6e3c16d87a81fae9313be1820a7 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 7 Dec 2009 11:46:45 -0500 Subject: venet: add missing ethtool include It's sloppy to use the facility without officially including its header file. Signed-off-by: Gregory Haskins --- drivers/net/vbus-enet.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/net/vbus-enet.c b/drivers/net/vbus-enet.c index 29b388ffdcaa..25c6bea9616c 100644 --- a/drivers/net/vbus-enet.c +++ b/drivers/net/vbus-enet.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 9af247272ce02bc67c3848c8fde3b54b3b120b38 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 7 Dec 2009 11:46:45 -0500 Subject: vbus: add autoprobe capability to guest This enables the guest to automatically load the appropriate driver when vbus devices are detected. Signed-off-by: Gregory Haskins --- drivers/net/vbus-enet.c | 2 ++ drivers/vbus/bus-proxy.c | 33 ++++++++++++++++++++++++++++++++- include/linux/vbus_driver.h | 2 ++ 3 files changed, 36 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/vbus-enet.c b/drivers/net/vbus-enet.c index 25c6bea9616c..4e9ecac13023 100644 --- a/drivers/net/vbus-enet.c +++ b/drivers/net/vbus-enet.c @@ -1556,3 +1556,5 @@ vbus_enet_cleanup(void) module_init(vbus_enet_init_module); module_exit(vbus_enet_cleanup); + +VBUS_DRIVER_AUTOPROBE(VENET_TYPE); diff --git a/drivers/vbus/bus-proxy.c b/drivers/vbus/bus-proxy.c index 5d349427661a..a318c6754b4b 100644 --- a/drivers/vbus/bus-proxy.c +++ b/drivers/vbus/bus-proxy.c @@ -48,6 +48,16 @@ static int vbus_dev_proxy_match(struct device *_dev, struct device_driver *_drv) return !strcmp(dev->type, drv->type); } +static int vbus_dev_proxy_uevent(struct device *_dev, struct kobj_uevent_env *env) +{ + struct vbus_device_proxy *dev = to_dev(_dev); + + if (add_uevent_var(env, "MODALIAS=vbus-proxy:%s", dev->type)) + return -ENOMEM; + + return 0; +} + /* * This function is invoked after the bus infrastructure has already made a * match. The device will contain a reference to the paired driver which @@ -68,6 +78,7 @@ static int vbus_dev_proxy_probe(struct device *_dev) static struct bus_type vbus_proxy = { .name = VBUS_PROXY_NAME, .match = vbus_dev_proxy_match, + .uevent = vbus_dev_proxy_uevent, }; static struct device vbus_proxy_rootdev = { @@ -99,18 +110,38 @@ static void device_release(struct device *dev) _dev->ops->release(_dev); } +static ssize_t _show_modalias(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "vbus-proxy:%s\n", to_dev(dev)->type); +} +static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, _show_modalias, NULL); + int vbus_device_proxy_register(struct vbus_device_proxy *new) { + int ret; + new->dev.parent = &vbus_proxy_rootdev; new->dev.bus = &vbus_proxy; new->dev.release = &device_release; - return device_register(&new->dev); + ret = device_register(&new->dev); + if (ret < 0) + return ret; + + ret = device_create_file(&new->dev, &dev_attr_modalias); + if (ret < 0) { + device_unregister(&new->dev); + return ret; + } + + return 0; } EXPORT_SYMBOL_GPL(vbus_device_proxy_register); void vbus_device_proxy_unregister(struct vbus_device_proxy *dev) { + device_remove_file(&dev->dev, &dev_attr_modalias); device_unregister(&dev->dev); } EXPORT_SYMBOL_GPL(vbus_device_proxy_unregister); diff --git a/include/linux/vbus_driver.h b/include/linux/vbus_driver.h index 2b1dac47f180..8a7acb1a7a05 100644 --- a/include/linux/vbus_driver.h +++ b/include/linux/vbus_driver.h @@ -78,4 +78,6 @@ void vbus_driver_unregister(struct vbus_driver *drv); int vbus_driver_ioq_alloc(struct vbus_device_proxy *dev, const char *name, int id, int prio, size_t ringsize, struct ioq **ioq); +#define VBUS_DRIVER_AUTOPROBE(name) MODULE_ALIAS("vbus-proxy:" name) + #endif /* _LINUX_VBUS_DRIVER_H */ -- cgit v1.2.3 From 46d77295685c0dd95c550a3f43cf807f0ce29c6c Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 7 Dec 2009 11:46:46 -0500 Subject: vbus: fix pcibridge busmaster support We should technically enable the busmaster bit, even though its not actually used on the host side Signed-off-by: Gregory Haskins --- drivers/vbus/pci-bridge.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/vbus/pci-bridge.c b/drivers/vbus/pci-bridge.c index add1cc37e1f9..9e37df15d250 100644 --- a/drivers/vbus/pci-bridge.c +++ b/drivers/vbus/pci-bridge.c @@ -862,6 +862,8 @@ vbus_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret < 0) return ret; + pci_set_master(pdev); + ret = pci_request_regions(pdev, VBUS_PCI_NAME); if (ret < 0) { printk(KERN_ERR "VBUS_PCI: Could not init BARs: %d\n", ret); -- cgit v1.2.3 From 333a17e3d31647a806c1d93eda11b71e22e863c5 Mon Sep 17 00:00:00 2001 From: "Ira W. Snyder" Date: Tue, 8 Dec 2009 16:12:28 -0500 Subject: vbus: pci-bridge: fix sparse warnings The sparse tool caught many missing static declarations. Add them. There are still many address space errors, but I'm unsure about the best way to fix these. Signed-off-by: Ira W. Snyder --- drivers/vbus/pci-bridge.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/vbus/pci-bridge.c b/drivers/vbus/pci-bridge.c index 9e37df15d250..0f5f886b2faa 100644 --- a/drivers/vbus/pci-bridge.c +++ b/drivers/vbus/pci-bridge.c @@ -66,7 +66,7 @@ struct vbus_pci_device { struct work_struct drop; }; -DEFINE_PER_CPU(struct vbus_pci_fastcall_desc, vbus_pci_percpu_fastcall) +static DEFINE_PER_CPU(struct vbus_pci_fastcall_desc, vbus_pci_percpu_fastcall) ____cacheline_aligned; /* @@ -123,7 +123,7 @@ vbus_pci_buscall(unsigned long nr, void *data, unsigned long len) return ret; } -struct vbus_pci_device * +static struct vbus_pci_device * to_dev(struct vbus_device_proxy *vdev) { return container_of(vdev, struct vbus_pci_device, vdev); @@ -412,7 +412,7 @@ vbus_pci_device_release(struct vbus_device_proxy *vdev) kfree(_dev); } -struct vbus_device_proxy_ops vbus_pci_device_ops = { +static struct vbus_device_proxy_ops vbus_pci_device_ops = { .open = vbus_pci_device_open, .close = vbus_pci_device_close, .shm = vbus_pci_device_shm, @@ -667,7 +667,7 @@ static struct ioq_notifier eventq_notifier = { }; /* Injected whenever the host issues an ioq_signal() on the eventq */ -irqreturn_t +static irqreturn_t eventq_intr(int irq, void *dev) { vbus_pci.stats.qnotify++; @@ -995,7 +995,7 @@ static struct pci_driver vbus_pci_driver = { .remove = vbus_pci_remove, }; -int __init +static int __init vbus_pci_init(void) { memset(&vbus_pci, 0, sizeof(vbus_pci)); -- cgit v1.2.3 From 4a238e3979020ce259cdd994c81ffdbd35ed2ac3 Mon Sep 17 00:00:00 2001 From: "Ira W. Snyder" Date: Tue, 8 Dec 2009 16:12:29 -0500 Subject: vbus-enet: fix sparse warnings Fix the following sparse warnings: drivers/net/vbus-enet.c:411:9: warning: Using plain integer as NULL pointer drivers/net/vbus-enet.c:1250:24: warning: Using plain integer as NULL pointer drivers/net/vbus-enet.c:1326:24: warning: Using plain integer as NULL pointer Signed-off-by: Ira W. Snyder --- drivers/net/vbus-enet.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/net/vbus-enet.c b/drivers/net/vbus-enet.c index 4e9ecac13023..8232215e62b7 100644 --- a/drivers/net/vbus-enet.c +++ b/drivers/net/vbus-enet.c @@ -408,7 +408,7 @@ tx_setup(struct vbus_enet_priv *priv) priv->pmtd.pool = pool; ret = dev->ops->shm(dev, NULL, shmid, 0, pool, poollen, - 0, NULL, 0); + NULL, NULL, 0); BUG_ON(ret < 0); } @@ -1239,7 +1239,7 @@ vbus_enet_evq_negcap(struct vbus_enet_priv *priv, unsigned long count) priv->evq.pool = pool; ret = dev->ops->shm(dev, NULL, query.dpid, 0, - pool, poollen, 0, NULL, 0); + pool, poollen, NULL, NULL, 0); if (ret < 0) return ret; @@ -1315,7 +1315,7 @@ vbus_enet_l4ro_negcap(struct vbus_enet_priv *priv, unsigned long count) * pre-mapped descriptor pool */ ret = dev->ops->shm(dev, NULL, query.dpid, 0, - pool, poollen, 0, NULL, 0); + pool, poollen, NULL, NULL, 0); if (ret < 0) { printk(KERN_ERR "Error registering L4RO pool: %d\n", ret); -- cgit v1.2.3 From d8544b0386b7d6a1f70fe3b7a9f71dfd4e2dafc0 Mon Sep 17 00:00:00 2001 From: "Ira W. Snyder" Date: Tue, 8 Dec 2009 16:12:30 -0500 Subject: vbus: fix lots of sparse "dubious signed bitfield" warnings The sparse utility gave tons of warnings about signed bitfields. A simple inspection shows that they are all used as booleans, so convert them to the correct type. Signed-off-by: Ira W. Snyder --- drivers/vbus/pci-bridge.c | 2 +- include/linux/ioq.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/vbus/pci-bridge.c b/drivers/vbus/pci-bridge.c index 0f5f886b2faa..078b8f435ab9 100644 --- a/drivers/vbus/pci-bridge.c +++ b/drivers/vbus/pci-bridge.c @@ -43,7 +43,7 @@ struct vbus_pci { struct vbus_pci_regs *regs; struct vbus_pci_signals *signals; int irq; - int enabled:1; + bool enabled; struct { struct dentry *fs; int events; diff --git a/include/linux/ioq.h b/include/linux/ioq.h index eba1021af1c7..f04dfb49f70a 100644 --- a/include/linux/ioq.h +++ b/include/linux/ioq.h @@ -112,9 +112,9 @@ struct ioq_iterator { struct ioq_ring_idx *idx; u32 pos; struct ioq_ring_desc *desc; - int update:1; - int dualidx:1; - int flipowner:1; + bool update; + bool dualidx; + bool flipowner; }; struct ioq_notifier { -- cgit v1.2.3 From e1077ef3b2751766c4437e2f974e3d7372742d0d Mon Sep 17 00:00:00 2001 From: "Ira W. Snyder" Date: Tue, 8 Dec 2009 16:12:32 -0500 Subject: ioq: clearly specify endianness The IOQ code uses structures which are designed to be shared between disparate systems, such as 32-bit and 64-bit, as well as Linux and Windows. Since IOQ is primarily intended to be used by qemu/kvm, which support virtual guests with a different CPU architecture than the host, clearly define the endianness for the shared structures. The endianness is defined to be little-endian, to avoid byte swapping in the most common case: x86. Note that the cookie member was not changed to have a fixed endianness. This is because it is only intended for use by one side of the IOQ. Signed-off-by: Ira W. Snyder --- drivers/net/vbus-enet.c | 30 +++++++++++++++--------------- drivers/vbus/bus-proxy.c | 2 +- drivers/vbus/pci-bridge.c | 6 +++--- include/linux/ioq.h | 18 +++++++++--------- lib/ioq.c | 28 ++++++++++++++++------------ 5 files changed, 44 insertions(+), 40 deletions(-) (limited to 'drivers') diff --git a/drivers/net/vbus-enet.c b/drivers/net/vbus-enet.c index 8232215e62b7..94b86d482cee 100644 --- a/drivers/net/vbus-enet.c +++ b/drivers/net/vbus-enet.c @@ -175,8 +175,8 @@ rxdesc_alloc(struct vbus_enet_priv *priv, struct ioq_ring_desc *desc, size_t len iov->len = len; } else { desc->cookie = (u64)(unsigned long)skb; - desc->ptr = (u64)__pa(skb->data); - desc->len = len; /* total length */ + desc->ptr = cpu_to_le64(__pa(skb->data)); + desc->len = cpu_to_le64(len); /* total length */ } desc->valid = 1; @@ -212,8 +212,8 @@ rx_pageq_refill(struct vbus_enet_priv *priv, gfp_t gfp_mask) added = 1; iter.desc->cookie = (u64)(unsigned long)page; - iter.desc->ptr = (u64)__pa(page_address(page)); - iter.desc->len = PAGE_SIZE; + iter.desc->ptr = cpu_to_le64(__pa(page_address(page))); + iter.desc->len = cpu_to_le64(PAGE_SIZE); ret = ioq_iter_push(&iter, 0); BUG_ON(ret < 0); @@ -257,9 +257,9 @@ rx_setup(struct vbus_enet_priv *priv) size_t offset = (i * SG_DESC_SIZE); void *addr = &priv->l4ro.pool[offset]; - iter.desc->ptr = (u64)offset; + iter.desc->ptr = cpu_to_le64(offset); iter.desc->cookie = (u64)(unsigned long)addr; - iter.desc->len = SG_DESC_SIZE; + iter.desc->len = cpu_to_le64(SG_DESC_SIZE); } rxdesc_alloc(priv, iter.desc, priv->dev->mtu); @@ -428,17 +428,17 @@ tx_setup(struct vbus_enet_priv *priv) size_t offset = (i * SG_DESC_SIZE); vsg = (struct venet_sg *)&priv->pmtd.pool[offset]; - iter.desc->ptr = (u64)offset; + iter.desc->ptr = cpu_to_le64(offset); } else { vsg = kzalloc(SG_DESC_SIZE, GFP_KERNEL); if (!vsg) return -ENOMEM; - iter.desc->ptr = (u64)__pa(vsg); + iter.desc->ptr = cpu_to_le64(__pa(vsg)); } iter.desc->cookie = (u64)(unsigned long)vsg; - iter.desc->len = SG_DESC_SIZE; + iter.desc->len = cpu_to_le64(SG_DESC_SIZE); ret = ioq_iter_seek(&iter, ioq_seek_next, 0, 0); BUG_ON(ret < 0); @@ -708,7 +708,7 @@ vbus_enet_flat_import(struct vbus_enet_priv *priv, struct ioq_ring_desc *desc) return NULL; } - skb_put(skb, desc->len); + skb_put(skb, le64_to_cpu(desc->len)); return skb; } @@ -871,7 +871,7 @@ vbus_enet_tx_start(struct sk_buff *skb, struct net_device *dev) iov->ptr = (u64)sg_phys(sg); } - iter.desc->len = (u64)VSG_DESC_SIZE(vsg->count); + iter.desc->len = cpu_to_le64(VSG_DESC_SIZE(vsg->count)); } else { /* @@ -879,8 +879,8 @@ vbus_enet_tx_start(struct sk_buff *skb, struct net_device *dev) * ring. */ iter.desc->cookie = (u64)(unsigned long)skb; - iter.desc->len = (u64)skb->len; - iter.desc->ptr = (u64)__pa(skb->data); + iter.desc->len = cpu_to_le64(skb->len); + iter.desc->ptr = cpu_to_le64(__pa(skb->data)); } iter.desc->valid = 1; @@ -1258,9 +1258,9 @@ vbus_enet_evq_negcap(struct vbus_enet_priv *priv, unsigned long count) size_t offset = (i * query.evsize); void *addr = &priv->evq.pool[offset]; - iter.desc->ptr = (u64)offset; + iter.desc->ptr = cpu_to_le64(offset); iter.desc->cookie = (u64)(unsigned long)addr; - iter.desc->len = query.evsize; + iter.desc->len = cpu_to_le64(query.evsize); ret = ioq_iter_push(&iter, 0); BUG_ON(ret < 0); diff --git a/drivers/vbus/bus-proxy.c b/drivers/vbus/bus-proxy.c index a318c6754b4b..47928423a050 100644 --- a/drivers/vbus/bus-proxy.c +++ b/drivers/vbus/bus-proxy.c @@ -217,7 +217,7 @@ int vbus_driver_ioq_alloc(struct vbus_device_proxy *dev, const char *name, head->magic = IOQ_RING_MAGIC; head->ver = IOQ_RING_VER; - head->count = count; + head->count = cpu_to_le32(count); ret = dev->ops->shm(dev, name, id, prio, head, len, &head->signal, &signal, 0); diff --git a/drivers/vbus/pci-bridge.c b/drivers/vbus/pci-bridge.c index 078b8f435ab9..0d513248dae6 100644 --- a/drivers/vbus/pci-bridge.c +++ b/drivers/vbus/pci-bridge.c @@ -579,8 +579,8 @@ eventq_init(int qlen) BUG_ON(iter.desc->valid); desc->cookie = (u64)(unsigned long)event; - desc->ptr = (u64)__pa(event); - desc->len = len; /* total length */ + desc->ptr = cpu_to_le64(__pa(event)); + desc->len = cpu_to_le64(len); /* total length */ desc->valid = 1; /* @@ -798,7 +798,7 @@ _ioq_init(size_t ringsize, struct ioq *ioq, struct ioq_ops *ops) head->magic = IOQ_RING_MAGIC; head->ver = IOQ_RING_VER; - head->count = ringsize; + head->count = cpu_to_le32(ringsize); _signal_init(signal, &head->signal, &eventq_signal_ops); diff --git a/include/linux/ioq.h b/include/linux/ioq.h index f04dfb49f70a..7c6d6cad83c7 100644 --- a/include/linux/ioq.h +++ b/include/linux/ioq.h @@ -52,18 +52,18 @@ */ struct ioq_ring_desc { __u64 cookie; /* for arbitrary use by north-side */ - __u64 ptr; - __u64 len; + __le64 ptr; + __le64 len; __u8 valid; __u8 sown; /* South owned = 1, North owned = 0 */ }; -#define IOQ_RING_MAGIC 0x47fa2fe4 -#define IOQ_RING_VER 4 +#define IOQ_RING_MAGIC cpu_to_le32(0x47fa2fe4) +#define IOQ_RING_VER cpu_to_le32(4) struct ioq_ring_idx { - __u32 head; /* 0 based index to head of ptr array */ - __u32 tail; /* 0 based index to tail of ptr array */ + __le32 head; /* 0 based index to head of ptr array */ + __le32 tail; /* 0 based index to tail of ptr array */ __u8 full; }; @@ -73,11 +73,11 @@ enum ioq_locality { }; struct ioq_ring_head { - __u32 magic; - __u32 ver; + __le32 magic; + __le32 ver; struct shm_signal_desc signal; struct ioq_ring_idx idx[2]; - __u32 count; + __le32 count; struct ioq_ring_desc ring[1]; /* "count" elements will be allocated */ }; diff --git a/lib/ioq.c b/lib/ioq.c index d5e57be674db..4027848d7436 100644 --- a/lib/ioq.c +++ b/lib/ioq.c @@ -71,10 +71,10 @@ int ioq_iter_seek(struct ioq_iterator *iter, enum ioq_seek_type type, pos = modulo_inc(iter->pos, iter->ioq->count); break; case ioq_seek_tail: - pos = idx->tail; + pos = le32_to_cpu(idx->tail); break; case ioq_seek_head: - pos = idx->head; + pos = le32_to_cpu(idx->head); break; case ioq_seek_set: if (offset >= iter->ioq->count) @@ -91,19 +91,23 @@ EXPORT_SYMBOL_GPL(ioq_iter_seek); static int ioq_ring_count(struct ioq_ring_idx *idx, int count) { - if (idx->full && (idx->head == idx->tail)) + u32 head = le32_to_cpu(idx->head); + u32 tail = le32_to_cpu(idx->tail); + + if (idx->full && (head == tail)) return count; - else if (idx->tail >= idx->head) - return idx->tail - idx->head; + else if (tail >= head) + return tail - head; else - return (idx->tail + count) - idx->head; + return (tail + count) - head; } static void idx_tail_push(struct ioq_ring_idx *idx, int count) { - u32 tail = modulo_inc(idx->tail, count); + u32 tail = modulo_inc(le32_to_cpu(idx->tail), count); + u32 head = le32_to_cpu(idx->head); - if (idx->head == tail) { + if (head == tail) { rmb(); /* @@ -116,7 +120,7 @@ static void idx_tail_push(struct ioq_ring_idx *idx, int count) wmb(); } - idx->tail = tail; + idx->tail = cpu_to_le32(tail); } int ioq_iter_push(struct ioq_iterator *iter, int flags) @@ -128,7 +132,7 @@ int ioq_iter_push(struct ioq_iterator *iter, int flags) /* * Its only valid to push if we are currently pointed at the tail */ - if (iter->pos != idx->tail || iter->desc->sown != iter->ioq->locale) + if (iter->pos != le32_to_cpu(idx->tail) || iter->desc->sown != iter->ioq->locale) return -EINVAL; idx_tail_push(idx, iter->ioq->count); @@ -167,10 +171,10 @@ int ioq_iter_pop(struct ioq_iterator *iter, int flags) /* * Its only valid to pop if we are currently pointed at the head */ - if (iter->pos != idx->head || iter->desc->sown != iter->ioq->locale) + if (iter->pos != le32_to_cpu(idx->head) || iter->desc->sown != iter->ioq->locale) return -EINVAL; - idx->head = modulo_inc(idx->head, iter->ioq->count); + idx->head = cpu_to_le32(modulo_inc(le32_to_cpu(idx->head), iter->ioq->count)); wmb(); /* head must be visible before full */ if (idx->full) { -- cgit v1.2.3