summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2010-02-04 16:46:11 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2010-02-04 16:46:11 +1100
commitf0c43568459b4935ef53a3ef3f6c35fccc547131 (patch)
tree7bc5056289121c23716c972eabdcdfdfd4fadedb
parente1211b782215664769539ce3173a6e686bff4741 (diff)
parente1077ef3b2751766c4437e2f974e3d7372742d0d (diff)
Merge remote branch 'alacrity/linux-next'
Conflicts: include/linux/Kbuild lib/Kconfig
-rw-r--r--MAINTAINERS25
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/net/Kconfig14
-rw-r--r--drivers/net/Makefile1
-rw-r--r--drivers/net/vbus-enet.c1560
-rw-r--r--drivers/vbus/Kconfig25
-rw-r--r--drivers/vbus/Makefile6
-rw-r--r--drivers/vbus/bus-proxy.c247
-rw-r--r--drivers/vbus/pci-bridge.c1015
-rw-r--r--include/linux/Kbuild4
-rw-r--r--include/linux/ioq.h414
-rw-r--r--include/linux/shm_signal.h189
-rw-r--r--include/linux/vbus_driver.h83
-rw-r--r--include/linux/vbus_pci.h145
-rw-r--r--include/linux/venet.h133
-rw-r--r--lib/Kconfig21
-rw-r--r--lib/Makefile2
-rw-r--r--lib/ioq.c304
-rw-r--r--lib/shm_signal.c196
20 files changed, 4387 insertions, 0 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index ea2137221e9d..ee9007f64a8b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2919,6 +2919,12 @@ L: linux-mips@linux-mips.org
S: Maintained
F: drivers/serial/ioc3_serial.c
+IOQ LIBRARY
+M: Gregory Haskins <ghaskins@novell.com>
+S: Maintained
+F: include/linux/ioq.h
+F: lib/ioq.c
+
IP MASQUERADING
M: Juanjo Ciarlante <jjciarla@raiz.uncu.edu.ar>
S: Maintained
@@ -4922,6 +4928,12 @@ F: drivers/serial/serial_lh7a40x.c
F: drivers/usb/gadget/lh7a40*
F: drivers/usb/host/ohci-lh7a40*
+SHM-SIGNAL LIBRARY
+M: Gregory Haskins <ghaskins@novell.com>
+S: Maintained
+F: include/linux/shm_signal.h
+F: lib/shm_signal.c
+
SIMPLE FIRMWARE INTERFACE (SFI)
M: Len Brown <lenb@kernel.org>
L: sfi-devel@simplefirmware.org
@@ -5832,6 +5844,19 @@ S: Maintained
F: Documentation/fb/uvesafb.txt
F: drivers/video/uvesafb.*
+VBUS
+M: Gregory Haskins <ghaskins@novell.com>
+S: Maintained
+F: include/linux/vbus*
+F: drivers/vbus/*
+
+VBUS ETHERNET DRIVER
+M: Gregory Haskins <ghaskins@novell.com>
+S: Maintained
+W: http://developer.novell.com/wiki/index.php/AlacrityVM
+F: include/linux/venet.h
+F: drivers/net/vbus-enet.c
+
VFAT/FAT/MSDOS FILESYSTEM
M: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
S: Maintained
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index eb4092568f9e..15c1c095fc1a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2032,6 +2032,8 @@ source "drivers/pcmcia/Kconfig"
source "drivers/pci/hotplug/Kconfig"
+source "drivers/vbus/Kconfig"
+
endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 81e36596b1e9..0ee709f463ab 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -112,3 +112,4 @@ obj-$(CONFIG_VLYNQ) += vlynq/
obj-$(CONFIG_STAGING) += staging/
obj-y += platform/
obj-y += ieee802154/
+obj-y += vbus/
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index d1039a16e9e6..b94e0392be2a 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -3261,4 +3261,18 @@ config VMXNET3
To compile this driver as a module, choose M here: the
module will be called vmxnet3.
+config VBUS_ENET
+ tristate "VBUS Ethernet Driver"
+ default n
+ depends on VBUS_PROXY
+ help
+ A virtualized 802.x network device based on the VBUS
+ "virtual-ethernet" interface. It can be used with any
+ hypervisor/kernel that supports the vbus+venet protocol.
+
+config VBUS_ENET_DEBUG
+ bool "Enable Debugging"
+ depends on VBUS_ENET
+ default n
+
endif # NETDEVICES
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 0b763cbe9b1f..2ab2d62368bc 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -284,6 +284,7 @@ obj-$(CONFIG_FS_ENET) += fs_enet/
obj-$(CONFIG_NETXEN_NIC) += netxen/
obj-$(CONFIG_NIU) += niu.o
obj-$(CONFIG_VIRTIO_NET) += virtio_net.o
+obj-$(CONFIG_VBUS_ENET) += vbus-enet.o
obj-$(CONFIG_SFC) += sfc/
obj-$(CONFIG_WIMAX) += wimax/
diff --git a/drivers/net/vbus-enet.c b/drivers/net/vbus-enet.c
new file mode 100644
index 000000000000..94b86d482cee
--- /dev/null
+++ b/drivers/net/vbus-enet.c
@@ -0,0 +1,1560 @@
+/*
+ * vbus_enet - A virtualized 802.x network device based on the VBUS interface
+ *
+ * Copyright (C) 2009 Novell, Gregory Haskins <ghaskins@novell.com>
+ *
+ * Derived from the SNULL example from the book "Linux Device Drivers" by
+ * Alessandro Rubini, Jonathan Corbet, and Greg Kroah-Hartman, published
+ * by O'Reilly & Associates.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+
+#include <linux/in.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/skbuff.h>
+#include <linux/ioq.h>
+#include <linux/vbus_driver.h>
+
+#include <linux/in6.h>
+#include <asm/checksum.h>
+
+#include <linux/venet.h>
+
+MODULE_AUTHOR("Gregory Haskins");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("virtual-ethernet");
+MODULE_VERSION("1");
+
+static int rx_ringlen = 256;
+module_param(rx_ringlen, int, 0444);
+static int tx_ringlen = 256;
+module_param(tx_ringlen, int, 0444);
+static int sg_enabled = 1;
+module_param(sg_enabled, int, 0444);
+
+#define PDEBUG(_dev, fmt, args...) dev_dbg(&(_dev)->dev, fmt, ## args)
+
+#define SG_DESC_SIZE VSG_DESC_SIZE(MAX_SKB_FRAGS)
+
+struct vbus_enet_queue {
+ struct ioq *queue;
+ struct ioq_notifier notifier;
+ unsigned long count;
+};
+
+struct vbus_enet_priv {
+ spinlock_t lock;
+ struct net_device *dev;
+ struct vbus_device_proxy *vdev;
+ struct napi_struct napi;
+ struct vbus_enet_queue rxq;
+ struct {
+ struct vbus_enet_queue veq;
+ struct tasklet_struct task;
+ struct sk_buff_head outstanding;
+ } tx;
+ bool sg;
+ struct {
+ bool enabled;
+ char *pool;
+ } pmtd; /* pre-mapped transmit descriptors */
+ struct {
+ bool enabled;
+ bool linkstate;
+ bool txc;
+ unsigned long evsize;
+ struct vbus_enet_queue veq;
+ struct tasklet_struct task;
+ char *pool;
+ } evq;
+ struct {
+ bool available;
+ char *pool;
+ struct vbus_enet_queue pageq;
+ } l4ro;
+
+ struct sk_buff *(*import)(struct vbus_enet_priv *priv,
+ struct ioq_ring_desc *desc);
+};
+
+static void vbus_enet_tx_reap(struct vbus_enet_priv *priv);
+
+static struct vbus_enet_priv *
+napi_to_priv(struct napi_struct *napi)
+{
+ return container_of(napi, struct vbus_enet_priv, napi);
+}
+
+static int
+queue_init(struct vbus_enet_priv *priv,
+ struct vbus_enet_queue *q,
+ const char *name,
+ int qid,
+ size_t ringsize,
+ void (*func)(struct ioq_notifier *))
+{
+ struct vbus_device_proxy *dev = priv->vdev;
+ int ret;
+ char _name[64];
+
+ if (name)
+ snprintf(_name, sizeof(_name), "%s-%s", priv->dev->name, name);
+
+ ret = vbus_driver_ioq_alloc(dev, name ? _name : NULL, qid, 0,
+ ringsize, &q->queue);
+ if (ret < 0)
+ panic("ioq_alloc failed: %d\n", ret);
+
+ if (func) {
+ q->notifier.signal = func;
+ q->queue->notifier = &q->notifier;
+ }
+
+ q->count = ringsize;
+
+ return 0;
+}
+
+static int
+devcall(struct vbus_enet_priv *priv, u32 func, void *data, size_t len)
+{
+ struct vbus_device_proxy *dev = priv->vdev;
+
+ return dev->ops->call(dev, func, data, len, 0);
+}
+
+/*
+ * ---------------
+ * rx descriptors
+ * ---------------
+ */
+
+static void
+rxdesc_alloc(struct vbus_enet_priv *priv, struct ioq_ring_desc *desc, size_t len)
+{
+ struct net_device *dev = priv->dev;
+ struct sk_buff *skb;
+
+ len += ETH_HLEN;
+
+ skb = netdev_alloc_skb(dev, len + NET_IP_ALIGN);
+ BUG_ON(!skb);
+
+ skb_reserve(skb, NET_IP_ALIGN); /* align IP on 16B boundary */
+
+ if (priv->l4ro.available) {
+ /*
+ * We will populate an SG descriptor initially with one
+ * IOV filled with an MTU SKB. If the packet needs to be
+ * larger than MTU, the host will grab pages out of the
+ * page-queue and populate additional IOVs
+ */
+ struct venet_sg *vsg = (struct venet_sg *)(unsigned long)desc->cookie;
+ struct venet_iov *iov = &vsg->iov[0];
+
+ memset(vsg, 0, SG_DESC_SIZE);
+
+ vsg->cookie = (u64)(unsigned long)skb;
+ vsg->count = 1;
+
+ iov->ptr = (u64)__pa(skb->data);
+ iov->len = len;
+ } else {
+ desc->cookie = (u64)(unsigned long)skb;
+ desc->ptr = cpu_to_le64(__pa(skb->data));
+ desc->len = cpu_to_le64(len); /* total length */
+ }
+
+ desc->valid = 1;
+}
+
+static void
+rx_pageq_refill(struct vbus_enet_priv *priv, gfp_t gfp_mask)
+{
+ struct ioq *ioq = priv->l4ro.pageq.queue;
+ struct ioq_iterator iter;
+ int ret, added = 0;
+
+ if (ioq_full(ioq, ioq_idxtype_inuse))
+ /* nothing to do if the pageq is already fully populated */
+ return;
+
+ ret = ioq_iter_init(ioq, &iter, ioq_idxtype_inuse, 0);
+ BUG_ON(ret < 0); /* will never fail unless seriously broken */
+
+ ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0);
+ BUG_ON(ret < 0);
+
+ /*
+ * Now populate each descriptor with an empty page
+ */
+ while (!iter.desc->sown) {
+ struct page *page = NULL;
+
+ page = alloc_page(gfp_mask);
+
+ if (!page)
+ break;
+
+ added = 1;
+ iter.desc->cookie = (u64)(unsigned long)page;
+ iter.desc->ptr = cpu_to_le64(__pa(page_address(page)));
+ iter.desc->len = cpu_to_le64(PAGE_SIZE);
+
+ ret = ioq_iter_push(&iter, 0);
+ BUG_ON(ret < 0);
+ }
+
+ if (added)
+ ioq_signal(ioq, 0);
+}
+
+static void
+rx_setup(struct vbus_enet_priv *priv)
+{
+ struct ioq *ioq = priv->rxq.queue;
+ struct ioq_iterator iter;
+ int ret;
+ int i = 0;
+
+ /*
+ * We want to iterate on the "valid" index. By default the iterator
+ * will not "autoupdate" which means it will not hypercall the host
+ * with our changes. This is good, because we are really just
+ * initializing stuff here anyway. Note that you can always manually
+ * signal the host with ioq_signal() if the autoupdate feature is not
+ * used.
+ */
+ ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0);
+ BUG_ON(ret < 0); /* will never fail unless seriously broken */
+
+ /*
+ * Seek to the tail of the valid index (which should be our first
+ * item, since the queue is brand-new)
+ */
+ ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0);
+ BUG_ON(ret < 0);
+
+ /*
+ * Now populate each descriptor with an empty buffer and mark it valid
+ */
+ while (!iter.desc->valid) {
+ if (priv->l4ro.available) {
+ size_t offset = (i * SG_DESC_SIZE);
+ void *addr = &priv->l4ro.pool[offset];
+
+ iter.desc->ptr = cpu_to_le64(offset);
+ iter.desc->cookie = (u64)(unsigned long)addr;
+ iter.desc->len = cpu_to_le64(SG_DESC_SIZE);
+ }
+
+ rxdesc_alloc(priv, iter.desc, priv->dev->mtu);
+
+ /*
+ * This push operation will simultaneously advance the
+ * valid-head index and increment our position in the queue
+ * by one.
+ */
+ ret = ioq_iter_push(&iter, 0);
+ BUG_ON(ret < 0);
+
+ i++;
+ }
+
+ if (priv->l4ro.available)
+ rx_pageq_refill(priv, GFP_KERNEL);
+}
+
+static void
+rx_rxq_teardown(struct vbus_enet_priv *priv)
+{
+ struct ioq *ioq = priv->rxq.queue;
+ struct ioq_iterator iter;
+ int ret;
+
+ ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0);
+ BUG_ON(ret < 0);
+
+ ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0);
+ BUG_ON(ret < 0);
+
+ /*
+ * free each valid descriptor
+ */
+ while (iter.desc->valid) {
+ struct sk_buff *skb;
+
+ if (priv->l4ro.available) {
+ struct venet_sg *vsg;
+ int i;
+
+ vsg = (struct venet_sg *)(unsigned long)iter.desc->cookie;
+
+ /* skip i=0, since that is the skb->data IOV */
+ for (i = 1; i < vsg->count; i++) {
+ struct venet_iov *iov = &vsg->iov[i];
+ struct page *page = (struct page *)(unsigned long)iov->ptr;
+
+ put_page(page);
+ }
+
+ skb = (struct sk_buff *)(unsigned long)vsg->cookie;
+ } else
+ skb = (struct sk_buff *)(unsigned long)iter.desc->cookie;
+
+ iter.desc->valid = 0;
+ wmb();
+
+ iter.desc->ptr = 0;
+ iter.desc->cookie = 0;
+
+ ret = ioq_iter_pop(&iter, 0);
+ BUG_ON(ret < 0);
+
+ dev_kfree_skb(skb);
+ }
+}
+
+static void
+rx_l4ro_teardown(struct vbus_enet_priv *priv)
+{
+ struct ioq *ioq = priv->l4ro.pageq.queue;
+ struct ioq_iterator iter;
+ int ret;
+
+ ret = ioq_iter_init(ioq, &iter, ioq_idxtype_inuse, 0);
+ BUG_ON(ret < 0);
+
+ ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0);
+ BUG_ON(ret < 0);
+
+ /*
+ * free each valid descriptor
+ */
+ while (iter.desc->sown) {
+ struct page *page = (struct page *)(unsigned long)iter.desc->cookie;
+
+ iter.desc->valid = 0;
+ wmb();
+
+ iter.desc->ptr = 0;
+ iter.desc->cookie = 0;
+
+ ret = ioq_iter_pop(&iter, 0);
+ BUG_ON(ret < 0);
+
+ put_page(page);
+ }
+
+ ioq_put(ioq);
+ kfree(priv->l4ro.pool);
+}
+
+static void
+rx_teardown(struct vbus_enet_priv *priv)
+{
+ rx_rxq_teardown(priv);
+
+ if (priv->l4ro.available)
+ rx_l4ro_teardown(priv);
+}
+
+static int
+tx_setup(struct vbus_enet_priv *priv)
+{
+ struct ioq *ioq = priv->tx.veq.queue;
+ struct ioq_iterator iter;
+ int i;
+ int ret;
+
+ if (!priv->sg)
+ /*
+ * There is nothing to do for a ring that is not using
+ * scatter-gather
+ */
+ return 0;
+
+ /* pre-allocate our descriptor pool if pmtd is enabled */
+ if (priv->pmtd.enabled) {
+ struct vbus_device_proxy *dev = priv->vdev;
+ size_t poollen = SG_DESC_SIZE * priv->tx.veq.count;
+ char *pool;
+ int shmid;
+
+ /* pmtdquery will return the shm-id to use for the pool */
+ ret = devcall(priv, VENET_FUNC_PMTDQUERY, NULL, 0);
+ BUG_ON(ret < 0);
+
+ shmid = ret;
+
+ pool = kzalloc(poollen, GFP_KERNEL | GFP_DMA);
+ if (!pool)
+ return -ENOMEM;
+
+ priv->pmtd.pool = pool;
+
+ ret = dev->ops->shm(dev, NULL, shmid, 0, pool, poollen,
+ NULL, NULL, 0);
+ BUG_ON(ret < 0);
+ }
+
+ ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0);
+ BUG_ON(ret < 0);
+
+ ret = ioq_iter_seek(&iter, ioq_seek_set, 0, 0);
+ BUG_ON(ret < 0);
+
+ /*
+ * Now populate each descriptor with an empty SG descriptor
+ */
+ for (i = 0; i < priv->tx.veq.count; i++) {
+ struct venet_sg *vsg;
+
+ if (priv->pmtd.enabled) {
+ size_t offset = (i * SG_DESC_SIZE);
+
+ vsg = (struct venet_sg *)&priv->pmtd.pool[offset];
+ iter.desc->ptr = cpu_to_le64(offset);
+ } else {
+ vsg = kzalloc(SG_DESC_SIZE, GFP_KERNEL);
+ if (!vsg)
+ return -ENOMEM;
+
+ iter.desc->ptr = cpu_to_le64(__pa(vsg));
+ }
+
+ iter.desc->cookie = (u64)(unsigned long)vsg;
+ iter.desc->len = cpu_to_le64(SG_DESC_SIZE);
+
+ ret = ioq_iter_seek(&iter, ioq_seek_next, 0, 0);
+ BUG_ON(ret < 0);
+ }
+
+ return 0;
+}
+
+static void
+tx_teardown(struct vbus_enet_priv *priv)
+{
+ struct ioq *ioq = priv->tx.veq.queue;
+ struct ioq_iterator iter;
+ struct sk_buff *skb;
+ int ret;
+
+ /* forcefully free all outstanding transmissions */
+ while ((skb = __skb_dequeue(&priv->tx.outstanding)))
+ dev_kfree_skb(skb);
+
+ if (!priv->sg)
+ /*
+ * There is nothing else to do for a ring that is not using
+ * scatter-gather
+ */
+ return;
+
+ if (priv->pmtd.enabled) {
+ /*
+ * PMTD mode means we only need to free the pool
+ */
+ kfree(priv->pmtd.pool);
+ return;
+ }
+
+ ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0);
+ BUG_ON(ret < 0);
+
+ /* seek to position 0 */
+ ret = ioq_iter_seek(&iter, ioq_seek_set, 0, 0);
+ BUG_ON(ret < 0);
+
+ /*
+ * free each valid descriptor
+ */
+ while (iter.desc->cookie) {
+ struct venet_sg *vsg = (struct venet_sg *)(unsigned long)iter.desc->cookie;
+
+ iter.desc->valid = 0;
+ wmb();
+
+ iter.desc->ptr = 0;
+ iter.desc->cookie = 0;
+
+ ret = ioq_iter_seek(&iter, ioq_seek_next, 0, 0);
+ BUG_ON(ret < 0);
+
+ kfree(vsg);
+ }
+}
+
+static void
+evq_teardown(struct vbus_enet_priv *priv)
+{
+ if (!priv->evq.enabled)
+ return;
+
+ ioq_put(priv->evq.veq.queue);
+ kfree(priv->evq.pool);
+}
+
+/*
+ * Open and close
+ */
+
+static int
+vbus_enet_open(struct net_device *dev)
+{
+ struct vbus_enet_priv *priv = netdev_priv(dev);
+ int ret;
+
+ ret = devcall(priv, VENET_FUNC_LINKUP, NULL, 0);
+ BUG_ON(ret < 0);
+
+ napi_enable(&priv->napi);
+
+ return 0;
+}
+
+static int
+vbus_enet_stop(struct net_device *dev)
+{
+ struct vbus_enet_priv *priv = netdev_priv(dev);
+ int ret;
+
+ napi_disable(&priv->napi);
+
+ ret = devcall(priv, VENET_FUNC_LINKDOWN, NULL, 0);
+ BUG_ON(ret < 0);
+
+ return 0;
+}
+
+/*
+ * Configuration changes (passed on by ifconfig)
+ */
+static int
+vbus_enet_config(struct net_device *dev, struct ifmap *map)
+{
+ if (dev->flags & IFF_UP) /* can't act on a running interface */
+ return -EBUSY;
+
+ /* Don't allow changing the I/O address */
+ if (map->base_addr != dev->base_addr) {
+ dev_warn(&dev->dev, "Can't change I/O address\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* ignore other fields */
+ return 0;
+}
+
+static void
+vbus_enet_schedule_rx(struct vbus_enet_priv *priv)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ if (napi_schedule_prep(&priv->napi)) {
+ /* Disable further interrupts */
+ ioq_notify_disable(priv->rxq.queue, 0);
+ __napi_schedule(&priv->napi);
+ }
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int
+vbus_enet_change_mtu(struct net_device *dev, int new_mtu)
+{
+ struct vbus_enet_priv *priv = netdev_priv(dev);
+ int ret;
+
+ dev->mtu = new_mtu;
+
+ /*
+ * FLUSHRX will cause the device to flush any outstanding
+ * RX buffers. They will appear to come in as 0 length
+ * packets which we can simply discard and replace with new_mtu
+ * buffers for the future.
+ */
+ ret = devcall(priv, VENET_FUNC_FLUSHRX, NULL, 0);
+ BUG_ON(ret < 0);
+
+ vbus_enet_schedule_rx(priv);
+
+ return 0;
+}
+
+static struct sk_buff *
+vbus_enet_l4ro_import(struct vbus_enet_priv *priv, struct ioq_ring_desc *desc)
+{
+ struct venet_sg *vsg = (struct venet_sg *)(unsigned long)desc->cookie;
+ struct sk_buff *skb = (struct sk_buff *)(unsigned long)vsg->cookie;
+ struct skb_shared_info *sinfo = skb_shinfo(skb);
+ int i;
+
+ rx_pageq_refill(priv, GFP_ATOMIC);
+
+ if (!vsg->len)
+ /*
+ * the device may send a zero-length packet when its
+ * flushing references on the ring. We can just drop
+ * these on the floor
+ */
+ goto fail;
+
+ /* advance only by the linear portion in IOV[0] */
+ skb_put(skb, vsg->iov[0].len);
+
+ /* skip i=0, since that is the skb->data IOV */
+ for (i = 1; i < vsg->count; i++) {
+ struct venet_iov *iov = &vsg->iov[i];
+ struct page *page = (struct page *)(unsigned long)iov->ptr;
+ skb_frag_t *f = &sinfo->frags[i-1];
+
+ f->page = page;
+ f->page_offset = 0;
+ f->size = iov->len;
+
+ PDEBUG(priv->dev, "SG: Importing %d byte page[%i]\n",
+ f->size, i);
+
+ skb->data_len += f->size;
+ skb->len += f->size;
+ skb->truesize += f->size;
+ sinfo->nr_frags++;
+ }
+
+ if (vsg->flags & VENET_SG_FLAG_NEEDS_CSUM
+ && !skb_partial_csum_set(skb, vsg->csum.start,
+ vsg->csum.offset)) {
+ priv->dev->stats.rx_frame_errors++;
+ goto fail;
+ }
+
+ if (vsg->flags & VENET_SG_FLAG_GSO) {
+ PDEBUG(priv->dev, "L4RO packet detected\n");
+
+ switch (vsg->gso.type) {
+ case VENET_GSO_TYPE_TCPV4:
+ sinfo->gso_type = SKB_GSO_TCPV4;
+ break;
+ case VENET_GSO_TYPE_TCPV6:
+ sinfo->gso_type = SKB_GSO_TCPV6;
+ break;
+ case VENET_GSO_TYPE_UDP:
+ sinfo->gso_type = SKB_GSO_UDP;
+ break;
+ default:
+ PDEBUG(priv->dev, "Illegal L4RO type: %d\n",
+ vsg->gso.type);
+ priv->dev->stats.rx_frame_errors++;
+ goto fail;
+ }
+
+ if (vsg->flags & VENET_SG_FLAG_ECN)
+ sinfo->gso_type |= SKB_GSO_TCP_ECN;
+
+ sinfo->gso_size = vsg->gso.size;
+ if (sinfo->gso_size == 0) {
+ PDEBUG(priv->dev, "Illegal L4RO size: %d\n",
+ vsg->gso.size);
+ priv->dev->stats.rx_frame_errors++;
+ goto fail;
+ }
+
+ /*
+ * Header must be checked, and gso_segs
+ * computed.
+ */
+ sinfo->gso_type |= SKB_GSO_DODGY;
+ sinfo->gso_segs = 0;
+ }
+
+ return skb;
+
+fail:
+ dev_kfree_skb(skb);
+
+ return NULL;
+}
+
+static struct sk_buff *
+vbus_enet_flat_import(struct vbus_enet_priv *priv, struct ioq_ring_desc *desc)
+{
+ struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->cookie;
+
+ if (!desc->len) {
+ /*
+ * the device may send a zero-length packet when its
+ * flushing references on the ring. We can just drop
+ * these on the floor
+ */
+ dev_kfree_skb(skb);
+ return NULL;
+ }
+
+ skb_put(skb, le64_to_cpu(desc->len));
+
+ return skb;
+}
+
+/*
+ * The poll implementation.
+ */
+static int
+vbus_enet_poll(struct napi_struct *napi, int budget)
+{
+ struct vbus_enet_priv *priv = napi_to_priv(napi);
+ int npackets = 0;
+ struct ioq_iterator iter;
+ int ret;
+
+ PDEBUG(priv->dev, "polling...\n");
+
+ /* We want to iterate on the head of the in-use index */
+ ret = ioq_iter_init(priv->rxq.queue, &iter, ioq_idxtype_inuse,
+ IOQ_ITER_AUTOUPDATE);
+ BUG_ON(ret < 0);
+
+ ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0);
+ BUG_ON(ret < 0);
+
+ /*
+ * We stop if we have met the quota or there are no more packets.
+ * The EOM is indicated by finding a packet that is still owned by
+ * the south side
+ */
+ while ((npackets < budget) && (!iter.desc->sown)) {
+ struct sk_buff *skb;
+
+ skb = priv->import(priv, iter.desc);
+ if (skb) {
+ /* Maintain stats */
+ npackets++;
+ priv->dev->stats.rx_packets++;
+ priv->dev->stats.rx_bytes += skb->len;
+
+ /* Pass the buffer up to the stack */
+ skb->dev = priv->dev;
+ skb->protocol = eth_type_trans(skb, priv->dev);
+ netif_receive_skb(skb);
+
+ mb();
+ }
+
+ /* Grab a new buffer to put in the ring */
+ rxdesc_alloc(priv, iter.desc, priv->dev->mtu);
+
+ /* Advance the in-use tail */
+ ret = ioq_iter_pop(&iter, 0);
+ BUG_ON(ret < 0);
+ }
+
+ PDEBUG(priv->dev, "%d packets received\n", npackets);
+
+ /*
+ * If we processed all packets, we're done; tell the kernel and
+ * reenable ints
+ */
+ if (ioq_empty(priv->rxq.queue, ioq_idxtype_inuse)) {
+ napi_complete(napi);
+ ioq_notify_enable(priv->rxq.queue, 0);
+ ret = 0;
+ } else
+ /* We couldn't process everything. */
+ ret = 1;
+
+ return ret;
+}
+
+/*
+ * Transmit a packet (called by the kernel)
+ */
+static int
+vbus_enet_tx_start(struct sk_buff *skb, struct net_device *dev)
+{
+ struct vbus_enet_priv *priv = netdev_priv(dev);
+ struct ioq_iterator iter;
+ int ret;
+ unsigned long flags;
+
+ PDEBUG(priv->dev, "sending %d bytes\n", skb->len);
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ if (ioq_full(priv->tx.veq.queue, ioq_idxtype_valid)) {
+ /*
+ * We must flow-control the kernel by disabling the
+ * queue
+ */
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_stop_queue(dev);
+ dev_err(&priv->dev->dev, "tx on full queue bug\n");
+ return 1;
+ }
+
+ /*
+ * We want to iterate on the tail of both the "inuse" and "valid" index
+ * so we specify the "both" index
+ */
+ ret = ioq_iter_init(priv->tx.veq.queue, &iter, ioq_idxtype_both,
+ IOQ_ITER_AUTOUPDATE);
+ BUG_ON(ret < 0);
+
+ ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0);
+ BUG_ON(ret < 0);
+ BUG_ON(iter.desc->sown);
+
+ if (priv->sg) {
+ struct venet_sg *vsg = (struct venet_sg *)(unsigned long)iter.desc->cookie;
+ struct scatterlist sgl[MAX_SKB_FRAGS+1];
+ struct scatterlist *sg;
+ int count, maxcount = ARRAY_SIZE(sgl);
+
+ sg_init_table(sgl, maxcount);
+
+ memset(vsg, 0, sizeof(*vsg));
+
+ vsg->cookie = (u64)(unsigned long)skb;
+ vsg->len = skb->len;
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ vsg->flags |= VENET_SG_FLAG_NEEDS_CSUM;
+ vsg->csum.start = skb->csum_start - skb_headroom(skb);
+ vsg->csum.offset = skb->csum_offset;
+ }
+
+ if (skb_is_gso(skb)) {
+ struct skb_shared_info *sinfo = skb_shinfo(skb);
+
+ vsg->flags |= VENET_SG_FLAG_GSO;
+
+ vsg->gso.hdrlen = skb_headlen(skb);
+ vsg->gso.size = sinfo->gso_size;
+ if (sinfo->gso_type & SKB_GSO_TCPV4)
+ vsg->gso.type = VENET_GSO_TYPE_TCPV4;
+ else if (sinfo->gso_type & SKB_GSO_TCPV6)
+ vsg->gso.type = VENET_GSO_TYPE_TCPV6;
+ else if (sinfo->gso_type & SKB_GSO_UDP)
+ vsg->gso.type = VENET_GSO_TYPE_UDP;
+ else
+ panic("Virtual-Ethernet: unknown GSO type " \
+ "0x%x\n", sinfo->gso_type);
+
+ if (sinfo->gso_type & SKB_GSO_TCP_ECN)
+ vsg->flags |= VENET_SG_FLAG_ECN;
+ }
+
+ count = skb_to_sgvec(skb, sgl, 0, skb->len);
+
+ BUG_ON(count > maxcount);
+
+ for (sg = &sgl[0]; sg; sg = sg_next(sg)) {
+ struct venet_iov *iov = &vsg->iov[vsg->count++];
+
+ iov->len = sg->length;
+ iov->ptr = (u64)sg_phys(sg);
+ }
+
+ iter.desc->len = cpu_to_le64(VSG_DESC_SIZE(vsg->count));
+
+ } else {
+ /*
+ * non scatter-gather mode: simply put the skb right onto the
+ * ring.
+ */
+ iter.desc->cookie = (u64)(unsigned long)skb;
+ iter.desc->len = cpu_to_le64(skb->len);
+ iter.desc->ptr = cpu_to_le64(__pa(skb->data));
+ }
+
+ iter.desc->valid = 1;
+
+ priv->dev->stats.tx_packets++;
+ priv->dev->stats.tx_bytes += skb->len;
+
+ skb_queue_tail(&priv->tx.outstanding, skb);
+
+ /*
+ * This advances both indexes together implicitly, and then
+ * signals the south side to consume the packet
+ */
+ ret = ioq_iter_push(&iter, 0);
+ BUG_ON(ret < 0);
+
+ dev->trans_start = jiffies; /* save the timestamp */
+
+ if (ioq_full(priv->tx.veq.queue, ioq_idxtype_valid)) {
+ /*
+ * If the queue is congested, we must flow-control the kernel
+ */
+ PDEBUG(priv->dev, "backpressure tx queue\n");
+ netif_stop_queue(dev);
+ }
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ return 0;
+}
+
+/* assumes priv->lock held */
+static void
+vbus_enet_skb_complete(struct vbus_enet_priv *priv, struct sk_buff *skb)
+{
+ PDEBUG(priv->dev, "completed sending %d bytes\n",
+ skb->len);
+
+ skb_unlink(skb, &priv->tx.outstanding);
+ dev_kfree_skb(skb);
+}
+
+/*
+ * reclaim any outstanding completed tx packets
+ *
+ * assumes priv->lock held
+ */
+static struct sk_buff *
+vbus_enet_tx_reap_one(struct vbus_enet_priv *priv)
+{
+ struct sk_buff *skb = NULL;
+ struct ioq_iterator iter;
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ /*
+ * We want to iterate on the head of the valid index, but we
+ * do not want the iter_pop (below) to flip the ownership, so
+ * we set the NOFLIPOWNER option
+ */
+ ret = ioq_iter_init(priv->tx.veq.queue, &iter, ioq_idxtype_valid,
+ IOQ_ITER_NOFLIPOWNER);
+ BUG_ON(ret < 0);
+
+ ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0);
+ BUG_ON(ret < 0);
+
+ if (iter.desc->valid && !iter.desc->sown) {
+
+ if (priv->sg) {
+ struct venet_sg *vsg;
+
+ vsg = (struct venet_sg *)(unsigned long)iter.desc->cookie;
+ skb = (struct sk_buff *)(unsigned long)vsg->cookie;
+ } else
+ skb = (struct sk_buff *)(unsigned long)iter.desc->cookie;
+
+ /* Reset the descriptor */
+ iter.desc->valid = 0;
+
+ /* Advance the valid-index head */
+ ret = ioq_iter_pop(&iter, 0);
+ BUG_ON(ret < 0);
+ }
+
+ /*
+ * If we were previously stopped due to flow control, restart the
+ * processing
+ */
+ if (netif_queue_stopped(priv->dev)
+ && !ioq_full(priv->tx.veq.queue, ioq_idxtype_valid)) {
+ PDEBUG(priv->dev, "re-enabling tx queue\n");
+ netif_wake_queue(priv->dev);
+ }
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ return skb;
+}
+
+static void
+vbus_enet_tx_reap(struct vbus_enet_priv *priv)
+{
+ struct sk_buff *skb;
+
+ while ((skb = vbus_enet_tx_reap_one(priv))) {
+ if (!priv->evq.txc)
+ /*
+ * We are responsible for freeing the packet upon
+ * reap if TXC is not enabled
+ */
+ vbus_enet_skb_complete(priv, skb);
+ }
+}
+
+static void
+vbus_enet_timeout(struct net_device *dev)
+{
+ struct vbus_enet_priv *priv = netdev_priv(dev);
+
+ dev_dbg(&dev->dev, "Transmit timeout\n");
+
+ vbus_enet_tx_reap(priv);
+}
+
+static void
+rx_isr(struct ioq_notifier *notifier)
+{
+ struct vbus_enet_priv *priv;
+ struct net_device *dev;
+
+ priv = container_of(notifier, struct vbus_enet_priv, rxq.notifier);
+ dev = priv->dev;
+
+ if (!ioq_empty(priv->rxq.queue, ioq_idxtype_inuse))
+ vbus_enet_schedule_rx(priv);
+}
+
+static void
+deferred_tx_isr(unsigned long data)
+{
+ struct vbus_enet_priv *priv = (struct vbus_enet_priv *)data;
+
+ PDEBUG(priv->dev, "deferred_tx_isr\n");
+
+ vbus_enet_tx_reap(priv);
+
+ ioq_notify_enable(priv->tx.veq.queue, 0);
+}
+
+static void
+tx_isr(struct ioq_notifier *notifier)
+{
+ struct vbus_enet_priv *priv;
+
+ priv = container_of(notifier, struct vbus_enet_priv, tx.veq.notifier);
+
+ PDEBUG(priv->dev, "tx_isr\n");
+
+ ioq_notify_disable(priv->tx.veq.queue, 0);
+ tasklet_schedule(&priv->tx.task);
+}
+
+static void
+evq_linkstate_event(struct vbus_enet_priv *priv,
+ struct venet_event_header *header)
+{
+ struct venet_event_linkstate *event =
+ (struct venet_event_linkstate *)header;
+
+ switch (event->state) {
+ case 0:
+ netif_carrier_off(priv->dev);
+ break;
+ case 1:
+ netif_carrier_on(priv->dev);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+evq_txc_event(struct vbus_enet_priv *priv,
+ struct venet_event_header *header)
+{
+ struct venet_event_txc *event =
+ (struct venet_event_txc *)header;
+
+ vbus_enet_tx_reap(priv);
+
+ vbus_enet_skb_complete(priv, (struct sk_buff *)(unsigned long)event->cookie);
+}
+
+static void
+deferred_evq_isr(unsigned long data)
+{
+ struct vbus_enet_priv *priv = (struct vbus_enet_priv *)data;
+ int nevents = 0;
+ struct ioq_iterator iter;
+ int ret;
+
+ PDEBUG(priv->dev, "evq: polling...\n");
+
+ /* We want to iterate on the head of the in-use index */
+ ret = ioq_iter_init(priv->evq.veq.queue, &iter, ioq_idxtype_inuse,
+ IOQ_ITER_AUTOUPDATE);
+ BUG_ON(ret < 0);
+
+ ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0);
+ BUG_ON(ret < 0);
+
+ /*
+ * The EOM is indicated by finding a packet that is still owned by
+ * the south side
+ */
+ while (!iter.desc->sown) {
+ struct venet_event_header *header;
+
+ header = (struct venet_event_header *)(unsigned long)iter.desc->cookie;
+
+ switch (header->id) {
+ case VENET_EVENT_LINKSTATE:
+ evq_linkstate_event(priv, header);
+ break;
+ case VENET_EVENT_TXC:
+ evq_txc_event(priv, header);
+ break;
+ default:
+ panic("venet: unexpected event id:%d of size %d\n",
+ header->id, header->size);
+ break;
+ }
+
+ memset((void *)(unsigned long)iter.desc->cookie, 0, priv->evq.evsize);
+
+ /* Advance the in-use tail */
+ ret = ioq_iter_pop(&iter, 0);
+ BUG_ON(ret < 0);
+
+ nevents++;
+ }
+
+ PDEBUG(priv->dev, "%d events received\n", nevents);
+
+ ioq_notify_enable(priv->evq.veq.queue, 0);
+}
+
+static void
+evq_isr(struct ioq_notifier *notifier)
+{
+ struct vbus_enet_priv *priv;
+
+ priv = container_of(notifier, struct vbus_enet_priv, evq.veq.notifier);
+
+ PDEBUG(priv->dev, "evq_isr\n");
+
+ ioq_notify_disable(priv->evq.veq.queue, 0);
+ tasklet_schedule(&priv->evq.task);
+}
+
+static int
+vbus_enet_sg_negcap(struct vbus_enet_priv *priv)
+{
+ struct net_device *dev = priv->dev;
+ struct venet_capabilities caps;
+ int ret;
+
+ memset(&caps, 0, sizeof(caps));
+
+ if (sg_enabled) {
+ caps.gid = VENET_CAP_GROUP_SG;
+ caps.bits |= (VENET_CAP_SG|VENET_CAP_TSO4|VENET_CAP_TSO6
+ |VENET_CAP_ECN|VENET_CAP_PMTD);
+ /* note: exclude UFO for now due to stack bug */
+ }
+
+ ret = devcall(priv, VENET_FUNC_NEGCAP, &caps, sizeof(caps));
+ if (ret < 0)
+ return ret;
+
+ if (caps.bits & VENET_CAP_SG) {
+ priv->sg = true;
+
+ dev->features |= NETIF_F_SG|NETIF_F_HW_CSUM|NETIF_F_FRAGLIST;
+
+ if (caps.bits & VENET_CAP_TSO4)
+ dev->features |= NETIF_F_TSO;
+ if (caps.bits & VENET_CAP_UFO)
+ dev->features |= NETIF_F_UFO;
+ if (caps.bits & VENET_CAP_TSO6)
+ dev->features |= NETIF_F_TSO6;
+ if (caps.bits & VENET_CAP_ECN)
+ dev->features |= NETIF_F_TSO_ECN;
+
+ if (caps.bits & VENET_CAP_PMTD)
+ priv->pmtd.enabled = true;
+ }
+
+ return 0;
+}
+
+static int
+vbus_enet_evq_negcap(struct vbus_enet_priv *priv, unsigned long count)
+{
+ struct venet_capabilities caps;
+ int ret;
+
+ memset(&caps, 0, sizeof(caps));
+
+ caps.gid = VENET_CAP_GROUP_EVENTQ;
+ caps.bits |= VENET_CAP_EVQ_LINKSTATE;
+ caps.bits |= VENET_CAP_EVQ_TXC;
+
+ ret = devcall(priv, VENET_FUNC_NEGCAP, &caps, sizeof(caps));
+ if (ret < 0)
+ return ret;
+
+ if (caps.bits) {
+ struct vbus_device_proxy *dev = priv->vdev;
+ struct venet_eventq_query query;
+ size_t poollen;
+ struct ioq_iterator iter;
+ char *pool;
+ int i;
+
+ priv->evq.enabled = true;
+
+ if (caps.bits & VENET_CAP_EVQ_LINKSTATE) {
+ /*
+ * We will assume there is no carrier until we get
+ * an event telling us otherwise
+ */
+ netif_carrier_off(priv->dev);
+ priv->evq.linkstate = true;
+ }
+
+ if (caps.bits & VENET_CAP_EVQ_TXC)
+ priv->evq.txc = true;
+
+ memset(&query, 0, sizeof(query));
+
+ ret = devcall(priv, VENET_FUNC_EVQQUERY, &query, sizeof(query));
+ if (ret < 0)
+ return ret;
+
+ priv->evq.evsize = query.evsize;
+ poollen = query.evsize * count;
+
+ pool = kzalloc(poollen, GFP_KERNEL | GFP_DMA);
+ if (!pool)
+ return -ENOMEM;
+
+ priv->evq.pool = pool;
+
+ ret = dev->ops->shm(dev, NULL, query.dpid, 0,
+ pool, poollen, NULL, NULL, 0);
+ if (ret < 0)
+ return ret;
+
+ queue_init(priv, &priv->evq.veq, "evq",
+ query.qid, count, evq_isr);
+
+ ret = ioq_iter_init(priv->evq.veq.queue,
+ &iter, ioq_idxtype_valid, 0);
+ BUG_ON(ret < 0);
+
+ ret = ioq_iter_seek(&iter, ioq_seek_set, 0, 0);
+ BUG_ON(ret < 0);
+
+ /* Now populate each descriptor with an empty event */
+ for (i = 0; i < count; i++) {
+ size_t offset = (i * query.evsize);
+ void *addr = &priv->evq.pool[offset];
+
+ iter.desc->ptr = cpu_to_le64(offset);
+ iter.desc->cookie = (u64)(unsigned long)addr;
+ iter.desc->len = cpu_to_le64(query.evsize);
+
+ ret = ioq_iter_push(&iter, 0);
+ BUG_ON(ret < 0);
+ }
+
+ /* Finally, enable interrupts */
+ tasklet_init(&priv->evq.task, deferred_evq_isr,
+ (unsigned long)priv);
+ ioq_notify_enable(priv->evq.veq.queue, 0);
+ }
+
+ return 0;
+}
+
+static int
+vbus_enet_l4ro_negcap(struct vbus_enet_priv *priv, unsigned long count)
+{
+ struct venet_capabilities caps;
+ int ret;
+
+ memset(&caps, 0, sizeof(caps));
+
+ caps.gid = VENET_CAP_GROUP_L4RO;
+ caps.bits |= (VENET_CAP_SG|VENET_CAP_TSO4|VENET_CAP_TSO6
+ |VENET_CAP_ECN);
+
+ ret = devcall(priv, VENET_FUNC_NEGCAP, &caps, sizeof(caps));
+ if (ret < 0) {
+ printk(KERN_ERR "Error negotiating L4RO: %d\n", ret);
+ return ret;
+ }
+
+ if (caps.bits & VENET_CAP_SG) {
+ struct vbus_device_proxy *dev = priv->vdev;
+ size_t poollen = SG_DESC_SIZE * count;
+ struct venet_l4ro_query query;
+ char *pool;
+
+ memset(&query, 0, sizeof(query));
+
+ ret = devcall(priv, VENET_FUNC_L4ROQUERY, &query, sizeof(query));
+ if (ret < 0) {
+ printk(KERN_ERR "Error querying L4RO: %d\n", ret);
+ return ret;
+ }
+
+ pool = kzalloc(poollen, GFP_KERNEL | GFP_DMA);
+ if (!pool)
+ return -ENOMEM;
+
+ /*
+ * pre-mapped descriptor pool
+ */
+ ret = dev->ops->shm(dev, NULL, query.dpid, 0,
+ pool, poollen, NULL, NULL, 0);
+ if (ret < 0) {
+ printk(KERN_ERR "Error registering L4RO pool: %d\n",
+ ret);
+ kfree(pool);
+ return ret;
+ }
+
+ /*
+ * page-queue: contains a ring of arbitrary pages for
+ * consumption by the host for when the SG::IOV count exceeds
+ * one MTU frame. All we need to do is keep it populated
+ * with free pages.
+ */
+ queue_init(priv, &priv->l4ro.pageq, "pageq", query.pqid,
+ count, NULL);
+
+ priv->l4ro.pool = pool;
+ priv->l4ro.available = true;
+ }
+
+ return 0;
+}
+
+static int
+vbus_enet_negcap(struct vbus_enet_priv *priv)
+{
+ int ret;
+
+ ret = vbus_enet_sg_negcap(priv);
+ if (ret < 0)
+ return ret;
+
+ ret = vbus_enet_evq_negcap(priv, tx_ringlen);
+ if (ret < 0)
+ return ret;
+
+ ret = vbus_enet_l4ro_negcap(priv, rx_ringlen);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int vbus_enet_set_tx_csum(struct net_device *dev, u32 data)
+{
+ struct vbus_enet_priv *priv = netdev_priv(dev);
+
+ if (data && !priv->sg)
+ return -ENOSYS;
+
+ return ethtool_op_set_tx_hw_csum(dev, data);
+}
+
+static struct ethtool_ops vbus_enet_ethtool_ops = {
+ .set_tx_csum = vbus_enet_set_tx_csum,
+ .set_sg = ethtool_op_set_sg,
+ .set_tso = ethtool_op_set_tso,
+ .get_link = ethtool_op_get_link,
+};
+
+static const struct net_device_ops vbus_enet_netdev_ops = {
+ .ndo_open = vbus_enet_open,
+ .ndo_stop = vbus_enet_stop,
+ .ndo_set_config = vbus_enet_config,
+ .ndo_start_xmit = vbus_enet_tx_start,
+ .ndo_change_mtu = vbus_enet_change_mtu,
+ .ndo_tx_timeout = vbus_enet_timeout,
+ .ndo_set_mac_address = eth_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+};
+
+/*
+ * This is called whenever a new vbus_device_proxy is added to the vbus
+ * with the matching VENET_ID
+ */
+static int
+vbus_enet_probe(struct vbus_device_proxy *vdev)
+{
+ struct net_device *dev;
+ struct vbus_enet_priv *priv;
+ int ret;
+
+ printk(KERN_INFO "VENET: Found new device at %lld\n", vdev->id);
+
+ ret = vdev->ops->open(vdev, VENET_VERSION, 0);
+ if (ret < 0)
+ return ret;
+
+ dev = alloc_etherdev(sizeof(struct vbus_enet_priv));
+ if (!dev)
+ return -ENOMEM;
+
+ /*
+ * establish our device-name early so we can incorporate it into
+ * the signal-path names, etc
+ */
+ rtnl_lock();
+
+ ret = dev_alloc_name(dev, dev->name);
+ if (ret < 0)
+ goto out_free;
+
+ priv = netdev_priv(dev);
+
+ spin_lock_init(&priv->lock);
+ priv->dev = dev;
+ priv->vdev = vdev;
+
+ ret = vbus_enet_negcap(priv);
+ if (ret < 0) {
+ printk(KERN_INFO "VENET: Error negotiating capabilities for " \
+ "%lld\n",
+ priv->vdev->id);
+ goto out_free;
+ }
+
+ if (priv->l4ro.available)
+ priv->import = &vbus_enet_l4ro_import;
+ else
+ priv->import = &vbus_enet_flat_import;
+
+ skb_queue_head_init(&priv->tx.outstanding);
+
+ queue_init(priv, &priv->rxq, "rx", VENET_QUEUE_RX, rx_ringlen,
+ rx_isr);
+ queue_init(priv, &priv->tx.veq, "tx", VENET_QUEUE_TX, tx_ringlen,
+ tx_isr);
+
+ rx_setup(priv);
+ tx_setup(priv);
+
+ ioq_notify_enable(priv->rxq.queue, 0); /* enable rx interrupts */
+
+ if (!priv->evq.txc) {
+ /*
+ * If the TXC feature is present, we will recieve our
+ * tx-complete notification via the event-channel. Therefore,
+ * we only enable txq interrupts if the TXC feature is not
+ * present.
+ */
+ tasklet_init(&priv->tx.task, deferred_tx_isr,
+ (unsigned long)priv);
+ ioq_notify_enable(priv->tx.veq.queue, 0);
+ }
+
+ dev->netdev_ops = &vbus_enet_netdev_ops;
+ dev->watchdog_timeo = 5 * HZ;
+ SET_ETHTOOL_OPS(dev, &vbus_enet_ethtool_ops);
+ SET_NETDEV_DEV(dev, &vdev->dev);
+
+ netif_napi_add(dev, &priv->napi, vbus_enet_poll, 128);
+
+ ret = devcall(priv, VENET_FUNC_MACQUERY, priv->dev->dev_addr, ETH_ALEN);
+ if (ret < 0) {
+ printk(KERN_INFO "VENET: Error obtaining MAC address for " \
+ "%lld\n",
+ priv->vdev->id);
+ goto out_free;
+ }
+
+ dev->features |= NETIF_F_HIGHDMA;
+
+ ret = register_netdevice(dev);
+ if (ret < 0) {
+ printk(KERN_INFO "VENET: error %i registering device \"%s\"\n",
+ ret, dev->name);
+ goto out_free;
+ }
+
+ rtnl_unlock();
+
+ vdev->priv = priv;
+
+ return 0;
+
+ out_free:
+ rtnl_unlock();
+
+ free_netdev(dev);
+
+ return ret;
+}
+
+static int
+vbus_enet_remove(struct vbus_device_proxy *vdev)
+{
+ struct vbus_enet_priv *priv = (struct vbus_enet_priv *)vdev->priv;
+ struct vbus_device_proxy *dev = priv->vdev;
+
+ unregister_netdev(priv->dev);
+ napi_disable(&priv->napi);
+
+ rx_teardown(priv);
+ ioq_put(priv->rxq.queue);
+
+ tx_teardown(priv);
+ ioq_put(priv->tx.veq.queue);
+
+ if (priv->evq.enabled)
+ evq_teardown(priv);
+
+ dev->ops->close(dev, 0);
+
+ free_netdev(priv->dev);
+
+ return 0;
+}
+
+/*
+ * Finally, the module stuff
+ */
+
+static struct vbus_driver_ops vbus_enet_driver_ops = {
+ .probe = vbus_enet_probe,
+ .remove = vbus_enet_remove,
+};
+
+static struct vbus_driver vbus_enet_driver = {
+ .type = VENET_TYPE,
+ .owner = THIS_MODULE,
+ .ops = &vbus_enet_driver_ops,
+};
+
+static __init int
+vbus_enet_init_module(void)
+{
+ printk(KERN_INFO "Virtual Ethernet: Copyright (C) 2009 Novell, Gregory Haskins\n");
+ printk(KERN_DEBUG "VENET: Using %d/%d queue depth\n",
+ rx_ringlen, tx_ringlen);
+ return vbus_driver_register(&vbus_enet_driver);
+}
+
+static __exit void
+vbus_enet_cleanup(void)
+{
+ vbus_driver_unregister(&vbus_enet_driver);
+}
+
+module_init(vbus_enet_init_module);
+module_exit(vbus_enet_cleanup);
+
+VBUS_DRIVER_AUTOPROBE(VENET_TYPE);
diff --git a/drivers/vbus/Kconfig b/drivers/vbus/Kconfig
new file mode 100644
index 000000000000..f51cba10913e
--- /dev/null
+++ b/drivers/vbus/Kconfig
@@ -0,0 +1,25 @@
+#
+# Virtual-Bus (VBus) driver configuration
+#
+
+config VBUS_PROXY
+ bool "Virtual-Bus support"
+ select SHM_SIGNAL
+ select IOQ
+ default n
+ help
+ Adds support for a virtual-bus model drivers in a guest to connect
+ to host side virtual-bus resources. If you are using this kernel
+ in a virtualization solution which implements virtual-bus devices
+ on the backend, say Y. If unsure, say N.
+
+config VBUS_PCIBRIDGE
+ bool "PCI to Virtual-Bus bridge"
+ depends on PCI
+ depends on VBUS_PROXY
+ select IOQ
+ default n
+ help
+ Provides a way to bridge host side vbus devices via a PCI-BRIDGE
+ object. If you are running virtualization with vbus devices on the
+ host, and the vbus is exposed via PCI, say Y. Otherwise, say N.
diff --git a/drivers/vbus/Makefile b/drivers/vbus/Makefile
new file mode 100644
index 000000000000..944b7f1fec90
--- /dev/null
+++ b/drivers/vbus/Makefile
@@ -0,0 +1,6 @@
+
+vbus-proxy-objs += bus-proxy.o
+obj-$(CONFIG_VBUS_PROXY) += vbus-proxy.o
+
+vbus-pcibridge-objs += pci-bridge.o
+obj-$(CONFIG_VBUS_PCIBRIDGE) += vbus-pcibridge.o
diff --git a/drivers/vbus/bus-proxy.c b/drivers/vbus/bus-proxy.c
new file mode 100644
index 000000000000..47928423a050
--- /dev/null
+++ b/drivers/vbus/bus-proxy.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright 2009 Novell. All Rights Reserved.
+ *
+ * Author:
+ * Gregory Haskins <ghaskins@novell.com>
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/vbus_driver.h>
+
+MODULE_AUTHOR("Gregory Haskins");
+MODULE_LICENSE("GPL");
+
+#define VBUS_PROXY_NAME "vbus-proxy"
+
+static struct vbus_device_proxy *to_dev(struct device *_dev)
+{
+ return _dev ? container_of(_dev, struct vbus_device_proxy, dev) : NULL;
+}
+
+static struct vbus_driver *to_drv(struct device_driver *_drv)
+{
+ return container_of(_drv, struct vbus_driver, drv);
+}
+
+/*
+ * This function is invoked whenever a new driver and/or device is added
+ * to check if there is a match
+ */
+static int vbus_dev_proxy_match(struct device *_dev, struct device_driver *_drv)
+{
+ struct vbus_device_proxy *dev = to_dev(_dev);
+ struct vbus_driver *drv = to_drv(_drv);
+
+ return !strcmp(dev->type, drv->type);
+}
+
+static int vbus_dev_proxy_uevent(struct device *_dev, struct kobj_uevent_env *env)
+{
+ struct vbus_device_proxy *dev = to_dev(_dev);
+
+ if (add_uevent_var(env, "MODALIAS=vbus-proxy:%s", dev->type))
+ return -ENOMEM;
+
+ return 0;
+}
+
+/*
+ * This function is invoked after the bus infrastructure has already made a
+ * match. The device will contain a reference to the paired driver which
+ * we will extract.
+ */
+static int vbus_dev_proxy_probe(struct device *_dev)
+{
+ int ret = 0;
+ struct vbus_device_proxy *dev = to_dev(_dev);
+ struct vbus_driver *drv = to_drv(_dev->driver);
+
+ if (drv->ops->probe)
+ ret = drv->ops->probe(dev);
+
+ return ret;
+}
+
+static struct bus_type vbus_proxy = {
+ .name = VBUS_PROXY_NAME,
+ .match = vbus_dev_proxy_match,
+ .uevent = vbus_dev_proxy_uevent,
+};
+
+static struct device vbus_proxy_rootdev = {
+ .parent = NULL,
+ .init_name = VBUS_PROXY_NAME,
+};
+
+static int __init vbus_init(void)
+{
+ int ret;
+
+ ret = bus_register(&vbus_proxy);
+ BUG_ON(ret < 0);
+
+ ret = device_register(&vbus_proxy_rootdev);
+ BUG_ON(ret < 0);
+
+ return 0;
+}
+
+postcore_initcall(vbus_init);
+
+static void device_release(struct device *dev)
+{
+ struct vbus_device_proxy *_dev;
+
+ _dev = container_of(dev, struct vbus_device_proxy, dev);
+
+ _dev->ops->release(_dev);
+}
+
+static ssize_t _show_modalias(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "vbus-proxy:%s\n", to_dev(dev)->type);
+}
+static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, _show_modalias, NULL);
+
+int vbus_device_proxy_register(struct vbus_device_proxy *new)
+{
+ int ret;
+
+ new->dev.parent = &vbus_proxy_rootdev;
+ new->dev.bus = &vbus_proxy;
+ new->dev.release = &device_release;
+
+ ret = device_register(&new->dev);
+ if (ret < 0)
+ return ret;
+
+ ret = device_create_file(&new->dev, &dev_attr_modalias);
+ if (ret < 0) {
+ device_unregister(&new->dev);
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vbus_device_proxy_register);
+
+void vbus_device_proxy_unregister(struct vbus_device_proxy *dev)
+{
+ device_remove_file(&dev->dev, &dev_attr_modalias);
+ device_unregister(&dev->dev);
+}
+EXPORT_SYMBOL_GPL(vbus_device_proxy_unregister);
+
+static int match_device_id(struct device *_dev, void *data)
+{
+ struct vbus_device_proxy *dev = to_dev(_dev);
+ u64 id = *(u64 *)data;
+
+ return dev->id == id;
+}
+
+struct vbus_device_proxy *vbus_device_proxy_find(u64 id)
+{
+ struct device *dev;
+
+ dev = bus_find_device(&vbus_proxy, NULL, &id, &match_device_id);
+
+ return to_dev(dev);
+}
+EXPORT_SYMBOL_GPL(vbus_device_proxy_find);
+
+int vbus_driver_register(struct vbus_driver *new)
+{
+ new->drv.bus = &vbus_proxy;
+ new->drv.name = new->type;
+ new->drv.owner = new->owner;
+ new->drv.probe = vbus_dev_proxy_probe;
+
+ return driver_register(&new->drv);
+}
+EXPORT_SYMBOL_GPL(vbus_driver_register);
+
+void vbus_driver_unregister(struct vbus_driver *drv)
+{
+ driver_unregister(&drv->drv);
+}
+EXPORT_SYMBOL_GPL(vbus_driver_unregister);
+
+/*
+ *---------------------------------
+ * driver-side IOQ helper
+ *---------------------------------
+ */
+static void
+vbus_driver_ioq_release(struct ioq *ioq)
+{
+ kfree(ioq->head_desc);
+ kfree(ioq);
+}
+
+static struct ioq_ops vbus_driver_ioq_ops = {
+ .release = vbus_driver_ioq_release,
+};
+
+
+int vbus_driver_ioq_alloc(struct vbus_device_proxy *dev, const char *name,
+ int id, int prio, size_t count, struct ioq **ioq)
+{
+ struct ioq *_ioq;
+ struct ioq_ring_head *head = NULL;
+ struct shm_signal *signal = NULL;
+ size_t len = IOQ_HEAD_DESC_SIZE(count);
+ int ret = -ENOMEM;
+
+ _ioq = kzalloc(sizeof(*_ioq), GFP_KERNEL);
+ if (!_ioq)
+ goto error;
+
+ head = kzalloc(len, GFP_KERNEL | GFP_DMA);
+ if (!head)
+ goto error;
+
+ head->magic = IOQ_RING_MAGIC;
+ head->ver = IOQ_RING_VER;
+ head->count = cpu_to_le32(count);
+
+ ret = dev->ops->shm(dev, name, id, prio, head, len,
+ &head->signal, &signal, 0);
+ if (ret < 0)
+ goto error;
+
+ ioq_init(_ioq,
+ &vbus_driver_ioq_ops,
+ ioq_locality_north,
+ head,
+ signal,
+ count);
+
+ *ioq = _ioq;
+
+ return 0;
+
+ error:
+ kfree(_ioq);
+ kfree(head);
+
+ if (signal)
+ shm_signal_put(signal);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(vbus_driver_ioq_alloc);
diff --git a/drivers/vbus/pci-bridge.c b/drivers/vbus/pci-bridge.c
new file mode 100644
index 000000000000..0d513248dae6
--- /dev/null
+++ b/drivers/vbus/pci-bridge.c
@@ -0,0 +1,1015 @@
+/*
+ * Copyright (C) 2009 Novell. All Rights Reserved.
+ *
+ * Author:
+ * Gregory Haskins <ghaskins@novell.com>
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/mm.h>
+#include <linux/workqueue.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/ioq.h>
+#include <linux/interrupt.h>
+#include <linux/vbus_driver.h>
+#include <linux/vbus_pci.h>
+
+MODULE_AUTHOR("Gregory Haskins");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1");
+
+#define VBUS_PCI_NAME "pci-to-vbus-bridge"
+
+struct vbus_pci {
+ spinlock_t lock;
+ struct pci_dev *dev;
+ struct ioq eventq;
+ struct vbus_pci_event *ring;
+ struct vbus_pci_regs *regs;
+ struct vbus_pci_signals *signals;
+ int irq;
+ bool enabled;
+ struct {
+ struct dentry *fs;
+ int events;
+ int qnotify;
+ int qinject;
+ int notify;
+ int inject;
+ int bridgecalls;
+ int buscalls;
+ } stats;
+};
+
+static struct vbus_pci vbus_pci;
+
+struct vbus_pci_device {
+ char type[VBUS_MAX_DEVTYPE_LEN];
+ u64 handle;
+ struct list_head shms;
+ struct vbus_device_proxy vdev;
+ struct work_struct drop;
+};
+
+static DEFINE_PER_CPU(struct vbus_pci_fastcall_desc, vbus_pci_percpu_fastcall)
+____cacheline_aligned;
+
+/*
+ * -------------------
+ * common routines
+ * -------------------
+ */
+
+static int
+vbus_pci_bridgecall(unsigned long nr, void *data, unsigned long len)
+{
+ struct vbus_pci_call_desc params = {
+ .vector = nr,
+ .len = len,
+ .datap = __pa(data),
+ };
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&vbus_pci.lock, flags);
+
+ memcpy_toio(&vbus_pci.regs->bridgecall, &params, sizeof(params));
+ ret = ioread32(&vbus_pci.regs->bridgecall);
+
+ spin_unlock_irqrestore(&vbus_pci.lock, flags);
+
+ vbus_pci.stats.bridgecalls++;
+
+ return ret;
+}
+
+static int
+vbus_pci_buscall(unsigned long nr, void *data, unsigned long len)
+{
+ struct vbus_pci_fastcall_desc *params;
+ int ret;
+
+ preempt_disable();
+
+ params = &get_cpu_var(vbus_pci_percpu_fastcall);
+
+ params->call.vector = nr;
+ params->call.len = len;
+ params->call.datap = __pa(data);
+
+ iowrite32(smp_processor_id(), &vbus_pci.signals->fastcall);
+
+ ret = params->result;
+
+ preempt_enable();
+
+ vbus_pci.stats.buscalls++;
+
+ return ret;
+}
+
+static struct vbus_pci_device *
+to_dev(struct vbus_device_proxy *vdev)
+{
+ return container_of(vdev, struct vbus_pci_device, vdev);
+}
+
+static void
+_signal_init(struct shm_signal *signal, struct shm_signal_desc *desc,
+ struct shm_signal_ops *ops)
+{
+ desc->magic = SHM_SIGNAL_MAGIC;
+ desc->ver = SHM_SIGNAL_VER;
+
+ shm_signal_init(signal, shm_locality_north, ops, desc);
+}
+
+/*
+ * -------------------
+ * _signal
+ * -------------------
+ */
+
+struct _signal {
+ char name[64];
+ struct vbus_pci *pcivbus;
+ struct shm_signal signal;
+ u32 handle;
+ struct rb_node node;
+ struct list_head list;
+ int irq;
+ struct irq_desc *desc;
+};
+
+static struct _signal *
+to_signal(struct shm_signal *signal)
+{
+ return container_of(signal, struct _signal, signal);
+}
+
+static int
+_signal_inject(struct shm_signal *signal)
+{
+ struct _signal *_signal = to_signal(signal);
+
+ vbus_pci.stats.inject++;
+ iowrite32(_signal->handle, &vbus_pci.signals->shmsignal);
+
+ return 0;
+}
+
+static void
+_signal_release(struct shm_signal *signal)
+{
+ struct _signal *_signal = to_signal(signal);
+
+ kfree(_signal);
+}
+
+static struct shm_signal_ops _signal_ops = {
+ .inject = _signal_inject,
+ .release = _signal_release,
+};
+
+static void shmsignal_disconnect(struct _signal *_signal);
+
+/*
+ * -------------------
+ * vbus_device_proxy routines
+ * -------------------
+ */
+
+static int
+vbus_pci_device_open(struct vbus_device_proxy *vdev, int version, int flags)
+{
+ struct vbus_pci_device *dev = to_dev(vdev);
+ struct vbus_pci_deviceopen params;
+ int ret;
+
+ if (dev->handle)
+ return -EINVAL;
+
+ params.devid = vdev->id;
+ params.version = version;
+
+ ret = vbus_pci_buscall(VBUS_PCI_HC_DEVOPEN,
+ &params, sizeof(params));
+ if (ret < 0)
+ return ret;
+
+ dev->handle = params.handle;
+
+ return 0;
+}
+
+static int
+vbus_pci_device_close(struct vbus_device_proxy *vdev, int flags)
+{
+ struct vbus_pci_device *dev = to_dev(vdev);
+ unsigned long iflags;
+ int ret;
+
+ if (!dev->handle)
+ return -EINVAL;
+
+ spin_lock_irqsave(&vbus_pci.lock, iflags);
+
+ while (!list_empty(&dev->shms)) {
+ struct _signal *_signal;
+
+ _signal = list_first_entry(&dev->shms, struct _signal, list);
+
+ list_del(&_signal->list);
+ shmsignal_disconnect(_signal);
+
+ spin_unlock_irqrestore(&vbus_pci.lock, iflags);
+ shm_signal_put(&_signal->signal);
+ spin_lock_irqsave(&vbus_pci.lock, iflags);
+ }
+
+ spin_unlock_irqrestore(&vbus_pci.lock, iflags);
+
+ /*
+ * The DEVICECLOSE will implicitly close all of the shm on the
+ * host-side, so there is no need to do an explicit per-shm
+ * hypercall
+ */
+ ret = vbus_pci_buscall(VBUS_PCI_HC_DEVCLOSE,
+ &dev->handle, sizeof(dev->handle));
+
+ if (ret < 0)
+ printk(KERN_ERR "VBUS-PCI: Error closing device %s/%lld: %d\n",
+ vdev->type, vdev->id, ret);
+
+ dev->handle = 0;
+
+ return 0;
+}
+
+/*
+ * -------------------
+ * shmsignal interrupt routines
+ * -------------------
+ */
+
+/* We abstract these routines so that we can drop in irqchip later */
+
+static void
+shmsignal_wakeup(struct _signal *_signal)
+{
+ _shm_signal_wakeup(&_signal->signal);
+}
+
+static int
+shmsignal_connect(struct _signal *_signal)
+{
+ return 0;
+}
+
+static void
+shmsignal_disconnect(struct _signal *_signal)
+{
+
+}
+
+static int
+vbus_pci_device_shm(struct vbus_device_proxy *vdev, const char *name,
+ int id, int prio,
+ void *ptr, size_t len,
+ struct shm_signal_desc *sdesc, struct shm_signal **signal,
+ int flags)
+{
+ struct vbus_pci_device *dev = to_dev(vdev);
+ struct _signal *_signal = NULL;
+ struct vbus_pci_deviceshm params;
+ unsigned long iflags;
+ int ret;
+
+ if (!dev->handle)
+ return -EINVAL;
+
+ params.devh = dev->handle;
+ params.id = id;
+ params.flags = flags;
+ params.datap = (u64)__pa(ptr);
+ params.len = len;
+
+ if (signal) {
+ /*
+ * The signal descriptor must be embedded within the
+ * provided ptr
+ */
+ if (!sdesc
+ || (len < sizeof(*sdesc))
+ || ((void *)sdesc < ptr)
+ || ((void *)sdesc > (ptr + len - sizeof(*sdesc))))
+ return -EINVAL;
+
+ _signal = kzalloc(sizeof(*_signal), GFP_KERNEL);
+ if (!_signal)
+ return -ENOMEM;
+
+ _signal_init(&_signal->signal, sdesc, &_signal_ops);
+
+ /*
+ * take another reference for the host. This is dropped
+ * by a SHMCLOSE event
+ */
+ shm_signal_get(&_signal->signal);
+
+ params.signal.offset = (u64)(unsigned long)sdesc -
+ (u64)(unsigned long)ptr;
+ params.signal.prio = prio;
+ params.signal.cookie = (u64)(unsigned long)_signal;
+
+ } else
+ params.signal.offset = -1; /* yes, this is a u32, but its ok */
+
+ ret = vbus_pci_buscall(VBUS_PCI_HC_DEVSHM,
+ &params, sizeof(params));
+ if (ret < 0)
+ goto fail;
+
+ if (signal) {
+
+ BUG_ON(ret < 0);
+
+ _signal->handle = ret;
+
+ if (!name)
+ snprintf(_signal->name, sizeof(_signal->name),
+ "dev%lld-id%d", vdev->id, id);
+ else
+ snprintf(_signal->name, sizeof(_signal->name),
+ "%s", name);
+
+ shmsignal_connect(_signal);
+
+ spin_lock_irqsave(&vbus_pci.lock, iflags);
+ list_add_tail(&_signal->list, &dev->shms);
+ spin_unlock_irqrestore(&vbus_pci.lock, iflags);
+
+ shm_signal_get(&_signal->signal);
+ *signal = &_signal->signal;
+ }
+
+ return 0;
+
+fail:
+ if (_signal) {
+ /*
+ * We held two references above, so we need to drop
+ * both of them
+ */
+ shm_signal_put(&_signal->signal);
+ shm_signal_put(&_signal->signal);
+ }
+
+ return ret;
+}
+
+static int
+vbus_pci_device_call(struct vbus_device_proxy *vdev, u32 func, void *data,
+ size_t len, int flags)
+{
+ struct vbus_pci_device *dev = to_dev(vdev);
+ struct vbus_pci_devicecall params = {
+ .devh = dev->handle,
+ .func = func,
+ .datap = (u64)__pa(data),
+ .len = len,
+ .flags = flags,
+ };
+
+ if (!dev->handle)
+ return -EINVAL;
+
+ return vbus_pci_buscall(VBUS_PCI_HC_DEVCALL, &params, sizeof(params));
+}
+
+static void
+vbus_pci_device_release(struct vbus_device_proxy *vdev)
+{
+ struct vbus_pci_device *_dev = to_dev(vdev);
+
+ vbus_pci_device_close(vdev, 0);
+
+ kfree(_dev);
+}
+
+static struct vbus_device_proxy_ops vbus_pci_device_ops = {
+ .open = vbus_pci_device_open,
+ .close = vbus_pci_device_close,
+ .shm = vbus_pci_device_shm,
+ .call = vbus_pci_device_call,
+ .release = vbus_pci_device_release,
+};
+
+/*
+ * -------------------
+ * vbus events
+ * -------------------
+ */
+
+struct deferred_devadd_event {
+ struct work_struct work;
+ struct vbus_pci_add_event event;
+};
+
+static void deferred_devdrop(struct work_struct *work);
+
+static void
+deferred_devadd(struct work_struct *work)
+{
+ struct deferred_devadd_event *_event;
+ struct vbus_pci_device *new;
+ int ret;
+
+ _event = container_of(work, struct deferred_devadd_event, work);
+
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
+ if (!new) {
+ printk(KERN_ERR "VBUS_PCI: Out of memory on add_event\n");
+ return;
+ }
+
+ INIT_LIST_HEAD(&new->shms);
+
+ memcpy(new->type, _event->event.type, VBUS_MAX_DEVTYPE_LEN);
+ new->vdev.type = new->type;
+ new->vdev.id = _event->event.id;
+ new->vdev.ops = &vbus_pci_device_ops;
+
+ dev_set_name(&new->vdev.dev, "%lld", _event->event.id);
+
+ INIT_WORK(&new->drop, deferred_devdrop);
+
+ ret = vbus_device_proxy_register(&new->vdev);
+ if (ret < 0)
+ panic("failed to register device %lld(%s): %d\n",
+ new->vdev.id, new->type, ret);
+
+ kfree(_event);
+}
+
+static void
+deferred_devdrop(struct work_struct *work)
+{
+ struct vbus_pci_device *dev;
+
+ dev = container_of(work, struct vbus_pci_device, drop);
+ vbus_device_proxy_unregister(&dev->vdev);
+}
+
+static void
+event_devadd(struct vbus_pci_add_event *event)
+{
+ struct deferred_devadd_event *_event;
+
+ _event = kzalloc(sizeof(*_event), GFP_ATOMIC);
+ if (!_event) {
+ printk(KERN_ERR \
+ "VBUS_PCI: Out of ATOMIC memory on add_event\n");
+ return;
+ }
+
+ INIT_WORK(&_event->work, deferred_devadd);
+ memcpy(&_event->event, event, sizeof(*event));
+
+ schedule_work(&_event->work);
+}
+
+static void
+event_devdrop(struct vbus_pci_handle_event *event)
+{
+ struct vbus_device_proxy *dev = vbus_device_proxy_find(event->handle);
+
+ if (!dev) {
+ printk(KERN_WARNING "VBUS-PCI: devdrop failed: %lld\n",
+ event->handle);
+ return;
+ }
+
+ schedule_work(&to_dev(dev)->drop);
+}
+
+static void
+event_shmsignal(struct vbus_pci_handle_event *event)
+{
+ struct _signal *_signal = (struct _signal *)(unsigned long)event->handle;
+
+ vbus_pci.stats.notify++;
+
+ shmsignal_wakeup(_signal);
+}
+
+static void
+event_shmclose(struct vbus_pci_handle_event *event)
+{
+ struct _signal *_signal = (struct _signal *)(unsigned long)event->handle;
+
+ /*
+ * This reference was taken during the DEVICESHM call
+ */
+ shm_signal_put(&_signal->signal);
+}
+
+/*
+ * -------------------
+ * eventq routines
+ * -------------------
+ */
+
+static struct ioq_notifier eventq_notifier;
+
+static int __devinit
+eventq_init(int qlen)
+{
+ struct ioq_iterator iter;
+ int ret;
+ int i;
+
+ vbus_pci.ring = kzalloc(sizeof(struct vbus_pci_event) * qlen,
+ GFP_KERNEL);
+ if (!vbus_pci.ring)
+ return -ENOMEM;
+
+ /*
+ * We want to iterate on the "valid" index. By default the iterator
+ * will not "autoupdate" which means it will not hypercall the host
+ * with our changes. This is good, because we are really just
+ * initializing stuff here anyway. Note that you can always manually
+ * signal the host with ioq_signal() if the autoupdate feature is not
+ * used.
+ */
+ ret = ioq_iter_init(&vbus_pci.eventq, &iter, ioq_idxtype_valid, 0);
+ BUG_ON(ret < 0);
+
+ /*
+ * Seek to the tail of the valid index (which should be our first
+ * item since the queue is brand-new)
+ */
+ ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0);
+ BUG_ON(ret < 0);
+
+ /*
+ * Now populate each descriptor with an empty vbus_event and mark it
+ * valid
+ */
+ for (i = 0; i < qlen; i++) {
+ struct vbus_pci_event *event = &vbus_pci.ring[i];
+ size_t len = sizeof(*event);
+ struct ioq_ring_desc *desc = iter.desc;
+
+ BUG_ON(iter.desc->valid);
+
+ desc->cookie = (u64)(unsigned long)event;
+ desc->ptr = cpu_to_le64(__pa(event));
+ desc->len = cpu_to_le64(len); /* total length */
+ desc->valid = 1;
+
+ /*
+ * This push operation will simultaneously advance the
+ * valid-tail index and increment our position in the queue
+ * by one.
+ */
+ ret = ioq_iter_push(&iter, 0);
+ BUG_ON(ret < 0);
+ }
+
+ vbus_pci.eventq.notifier = &eventq_notifier;
+
+ /*
+ * And finally, ensure that we can receive notification
+ */
+ ioq_notify_enable(&vbus_pci.eventq, 0);
+
+ return 0;
+}
+
+/* Invoked whenever the hypervisor ioq_signal()s our eventq */
+static void
+eventq_wakeup(struct ioq_notifier *notifier)
+{
+ struct ioq_iterator iter;
+ int ret;
+
+ /* We want to iterate on the head of the in-use index */
+ ret = ioq_iter_init(&vbus_pci.eventq, &iter, ioq_idxtype_inuse, 0);
+ BUG_ON(ret < 0);
+
+ ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0);
+ BUG_ON(ret < 0);
+
+ /*
+ * The EOM is indicated by finding a packet that is still owned by
+ * the south side.
+ *
+ * FIXME: This in theory could run indefinitely if the host keeps
+ * feeding us events since there is nothing like a NAPI budget. We
+ * might need to address that
+ */
+ while (!iter.desc->sown) {
+ struct ioq_ring_desc *desc = iter.desc;
+ struct vbus_pci_event *event;
+
+ event = (struct vbus_pci_event *)(unsigned long)desc->cookie;
+
+ switch (event->eventid) {
+ case VBUS_PCI_EVENT_DEVADD:
+ event_devadd(&event->data.add);
+ break;
+ case VBUS_PCI_EVENT_DEVDROP:
+ event_devdrop(&event->data.handle);
+ break;
+ case VBUS_PCI_EVENT_SHMSIGNAL:
+ event_shmsignal(&event->data.handle);
+ break;
+ case VBUS_PCI_EVENT_SHMCLOSE:
+ event_shmclose(&event->data.handle);
+ break;
+ default:
+ printk(KERN_WARNING "VBUS_PCI: Unexpected event %d\n",
+ event->eventid);
+ break;
+ };
+
+ memset(event, 0, sizeof(*event));
+
+ /* Advance the in-use head */
+ ret = ioq_iter_pop(&iter, 0);
+ BUG_ON(ret < 0);
+
+ vbus_pci.stats.events++;
+ }
+
+ /* And let the south side know that we changed the queue */
+ ioq_signal(&vbus_pci.eventq, 0);
+}
+
+static struct ioq_notifier eventq_notifier = {
+ .signal = &eventq_wakeup,
+};
+
+/* Injected whenever the host issues an ioq_signal() on the eventq */
+static irqreturn_t
+eventq_intr(int irq, void *dev)
+{
+ vbus_pci.stats.qnotify++;
+ _shm_signal_wakeup(vbus_pci.eventq.signal);
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * -------------------
+ */
+
+static int
+eventq_signal_inject(struct shm_signal *signal)
+{
+ vbus_pci.stats.qinject++;
+
+ /* The eventq uses the special-case handle=0 */
+ iowrite32(0, &vbus_pci.signals->eventq);
+
+ return 0;
+}
+
+static void
+eventq_signal_release(struct shm_signal *signal)
+{
+ kfree(signal);
+}
+
+static struct shm_signal_ops eventq_signal_ops = {
+ .inject = eventq_signal_inject,
+ .release = eventq_signal_release,
+};
+
+/*
+ * -------------------
+ */
+
+static void
+eventq_ioq_release(struct ioq *ioq)
+{
+ /* released as part of the vbus_pci object */
+}
+
+static struct ioq_ops eventq_ioq_ops = {
+ .release = eventq_ioq_release,
+};
+
+/*
+ * -------------------
+ */
+
+static void
+vbus_pci_release(void)
+{
+#ifdef CONFIG_DEBUG_FS
+ if (vbus_pci.stats.fs)
+ debugfs_remove(vbus_pci.stats.fs);
+#endif
+
+ if (vbus_pci.irq > 0)
+ free_irq(vbus_pci.irq, NULL);
+
+ if (vbus_pci.signals)
+ pci_iounmap(vbus_pci.dev, (void *)vbus_pci.signals);
+
+ if (vbus_pci.regs)
+ pci_iounmap(vbus_pci.dev, (void *)vbus_pci.regs);
+
+ pci_release_regions(vbus_pci.dev);
+ pci_disable_device(vbus_pci.dev);
+
+ kfree(vbus_pci.eventq.head_desc);
+ kfree(vbus_pci.ring);
+
+ vbus_pci.enabled = false;
+}
+
+static int __devinit
+vbus_pci_open(void)
+{
+ struct vbus_pci_bridge_negotiate params = {
+ .magic = VBUS_PCI_ABI_MAGIC,
+ .version = VBUS_PCI_HC_VERSION,
+ .capabilities = 0,
+ };
+
+ return vbus_pci_bridgecall(VBUS_PCI_BRIDGE_NEGOTIATE,
+ &params, sizeof(params));
+}
+
+#define QLEN 1024
+
+static int __devinit
+vbus_pci_eventq_register(void)
+{
+ struct vbus_pci_busreg params = {
+ .count = 1,
+ .eventq = {
+ {
+ .count = QLEN,
+ .ring = (u64)__pa(vbus_pci.eventq.head_desc),
+ .data = (u64)__pa(vbus_pci.ring),
+ },
+ },
+ };
+
+ return vbus_pci_bridgecall(VBUS_PCI_BRIDGE_QREG,
+ &params, sizeof(params));
+}
+
+static int __devinit
+_ioq_init(size_t ringsize, struct ioq *ioq, struct ioq_ops *ops)
+{
+ struct shm_signal *signal = NULL;
+ struct ioq_ring_head *head = NULL;
+ size_t len = IOQ_HEAD_DESC_SIZE(ringsize);
+
+ head = kzalloc(len, GFP_KERNEL | GFP_DMA);
+ if (!head)
+ return -ENOMEM;
+
+ signal = kzalloc(sizeof(*signal), GFP_KERNEL);
+ if (!signal) {
+ kfree(head);
+ return -ENOMEM;
+ }
+
+ head->magic = IOQ_RING_MAGIC;
+ head->ver = IOQ_RING_VER;
+ head->count = cpu_to_le32(ringsize);
+
+ _signal_init(signal, &head->signal, &eventq_signal_ops);
+
+ ioq_init(ioq, ops, ioq_locality_north, head, signal, ringsize);
+
+ return 0;
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int _debugfs_seq_show(struct seq_file *m, void *p)
+{
+#define P(F) \
+ seq_printf(m, " .%-30s: %d\n", #F, (int)vbus_pci.stats.F)
+
+ P(events);
+ P(qnotify);
+ P(qinject);
+ P(notify);
+ P(inject);
+ P(bridgecalls);
+ P(buscalls);
+
+#undef P
+
+ return 0;
+}
+
+static int _debugfs_fops_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, _debugfs_seq_show, inode->i_private);
+}
+
+static const struct file_operations stat_fops = {
+ .open = _debugfs_fops_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .owner = THIS_MODULE,
+};
+#endif
+
+static int __devinit
+vbus_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ int ret;
+ int cpu;
+
+ if (vbus_pci.enabled)
+ return -EEXIST; /* we only support one bridge per kernel */
+
+ if (pdev->revision != VBUS_PCI_ABI_VERSION) {
+ printk(KERN_DEBUG "VBUS_PCI: expected ABI version %d, got %d\n",
+ VBUS_PCI_ABI_VERSION,
+ pdev->revision);
+ return -ENODEV;
+ }
+
+ vbus_pci.dev = pdev;
+
+ ret = pci_enable_device(pdev);
+ if (ret < 0)
+ return ret;
+
+ pci_set_master(pdev);
+
+ ret = pci_request_regions(pdev, VBUS_PCI_NAME);
+ if (ret < 0) {
+ printk(KERN_ERR "VBUS_PCI: Could not init BARs: %d\n", ret);
+ goto out_fail;
+ }
+
+ vbus_pci.regs = pci_iomap(pdev, 0, sizeof(struct vbus_pci_regs));
+ if (!vbus_pci.regs) {
+ printk(KERN_ERR "VBUS_PCI: Could not map BARs\n");
+ goto out_fail;
+ }
+
+ vbus_pci.signals = pci_iomap(pdev, 1, sizeof(struct vbus_pci_signals));
+ if (!vbus_pci.signals) {
+ printk(KERN_ERR "VBUS_PCI: Could not map BARs\n");
+ goto out_fail;
+ }
+
+ ret = vbus_pci_open();
+ if (ret < 0) {
+ printk(KERN_DEBUG "VBUS_PCI: Could not register with host: %d\n",
+ ret);
+ goto out_fail;
+ }
+
+ /*
+ * Allocate an IOQ to use for host-2-guest event notification
+ */
+ ret = _ioq_init(QLEN, &vbus_pci.eventq, &eventq_ioq_ops);
+ if (ret < 0) {
+ printk(KERN_ERR "VBUS_PCI: Cound not init eventq: %d\n", ret);
+ goto out_fail;
+ }
+
+ ret = eventq_init(QLEN);
+ if (ret < 0) {
+ printk(KERN_ERR "VBUS_PCI: Cound not setup ring: %d\n", ret);
+ goto out_fail;
+ }
+
+ ret = pci_enable_msi(pdev);
+ if (ret < 0) {
+ printk(KERN_ERR "VBUS_PCI: Cound not enable MSI: %d\n", ret);
+ goto out_fail;
+ }
+
+ vbus_pci.irq = pdev->irq;
+
+ ret = request_irq(pdev->irq, eventq_intr, 0, "vbus", NULL);
+ if (ret < 0) {
+ printk(KERN_ERR "VBUS_PCI: Failed to register IRQ %d\n: %d",
+ pdev->irq, ret);
+ goto out_fail;
+ }
+
+ /*
+ * Add one fastcall vector per cpu so that we can do lockless
+ * hypercalls
+ */
+ for_each_possible_cpu(cpu) {
+ struct vbus_pci_fastcall_desc *desc =
+ &per_cpu(vbus_pci_percpu_fastcall, cpu);
+ struct vbus_pci_call_desc params = {
+ .vector = cpu,
+ .len = sizeof(*desc),
+ .datap = __pa(desc),
+ };
+
+ ret = vbus_pci_bridgecall(VBUS_PCI_BRIDGE_FASTCALL_ADD,
+ &params, sizeof(params));
+ if (ret < 0) {
+ printk(KERN_ERR \
+ "VBUS_PCI: Failed to register cpu:%d\n: %d",
+ cpu, ret);
+ goto out_fail;
+ }
+ }
+
+ /*
+ * Finally register our queue on the host to start receiving events
+ */
+ ret = vbus_pci_eventq_register();
+ if (ret < 0) {
+ printk(KERN_ERR "VBUS_PCI: Could not register with host: %d\n",
+ ret);
+ goto out_fail;
+ }
+
+#ifdef CONFIG_DEBUG_FS
+ vbus_pci.stats.fs = debugfs_create_file(VBUS_PCI_NAME, S_IRUGO,
+ NULL, NULL, &stat_fops);
+ if (IS_ERR(vbus_pci.stats.fs)) {
+ ret = PTR_ERR(vbus_pci.stats.fs);
+ printk(KERN_ERR "VBUS_PCI: error creating stats-fs: %d\n", ret);
+ goto out_fail;
+ }
+#endif
+
+ vbus_pci.enabled = true;
+
+ printk(KERN_INFO "Virtual-Bus: Copyright (c) 2009, " \
+ "Gregory Haskins <ghaskins@novell.com>\n");
+
+ return 0;
+
+ out_fail:
+ vbus_pci_release();
+
+ return ret;
+}
+
+static void __devexit
+vbus_pci_remove(struct pci_dev *pdev)
+{
+ vbus_pci_release();
+}
+
+static DEFINE_PCI_DEVICE_TABLE(vbus_pci_tbl) = {
+ { PCI_DEVICE(0x11da, 0x2000) },
+ { 0 },
+};
+
+MODULE_DEVICE_TABLE(pci, vbus_pci_tbl);
+
+static struct pci_driver vbus_pci_driver = {
+ .name = VBUS_PCI_NAME,
+ .id_table = vbus_pci_tbl,
+ .probe = vbus_pci_probe,
+ .remove = vbus_pci_remove,
+};
+
+static int __init
+vbus_pci_init(void)
+{
+ memset(&vbus_pci, 0, sizeof(vbus_pci));
+ spin_lock_init(&vbus_pci.lock);
+
+ return pci_register_driver(&vbus_pci_driver);
+}
+
+static void __exit
+vbus_pci_exit(void)
+{
+ pci_unregister_driver(&vbus_pci_driver);
+}
+
+module_init(vbus_pci_init);
+module_exit(vbus_pci_exit);
+
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 493b1722001c..0f26191241d7 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -249,6 +249,7 @@ unifdef-y += in.h
unifdef-y += in6.h
unifdef-y += inotify.h
unifdef-y += input.h
+unifdef-y += ioq.h
unifdef-y += ip.h
unifdef-y += ipc.h
unifdef-y += ipmi.h
@@ -338,6 +339,7 @@ unifdef-y += serial_core.h
unifdef-y += serial.h
unifdef-y += serio.h
unifdef-y += shm.h
+unifdef-y += shm_signal.h
unifdef-y += signal.h
unifdef-y += smb_fs.h
unifdef-y += smb.h
@@ -363,6 +365,8 @@ unifdef-y += uio.h
unifdef-y += unistd.h
unifdef-y += usbdevice_fs.h
unifdef-y += utsname.h
+unifdef-y += vbus_pci.h
+unifdef-y += venet.h
unifdef-y += vhost.h
unifdef-y += videodev2.h
unifdef-y += videodev.h
diff --git a/include/linux/ioq.h b/include/linux/ioq.h
new file mode 100644
index 000000000000..7c6d6cad83c7
--- /dev/null
+++ b/include/linux/ioq.h
@@ -0,0 +1,414 @@
+/*
+ * Copyright 2009 Novell. All Rights Reserved.
+ *
+ * IOQ is a generic shared-memory, lockless queue mechanism. It can be used
+ * in a variety of ways, though its intended purpose is to become the
+ * asynchronous communication path for virtual-bus drivers.
+ *
+ * The following are a list of key design points:
+ *
+ * #) All shared-memory is always allocated on explicitly one side of the
+ * link. This typically would be the guest side in a VM/VMM scenario.
+ * #) Each IOQ has the concept of "north" and "south" locales, where
+ * north denotes the memory-owner side (e.g. guest).
+ * #) An IOQ is manipulated using an iterator idiom.
+ * #) Provides a bi-directional signaling/notification infrastructure on
+ * a per-queue basis, which includes an event mitigation strategy
+ * to reduce boundary switching.
+ * #) The signaling path is abstracted so that various technologies and
+ * topologies can define their own specific implementation while sharing
+ * the basic structures and code.
+ *
+ * Author:
+ * Gregory Haskins <ghaskins@novell.com>
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _LINUX_IOQ_H
+#define _LINUX_IOQ_H
+
+#include <linux/types.h>
+#include <linux/shm_signal.h>
+
+/*
+ *---------
+ * The following structures represent data that is shared across boundaries
+ * which may be quite disparate from one another (e.g. Windows vs Linux,
+ * 32 vs 64 bit, etc). Therefore, care has been taken to make sure they
+ * present data in a manner that is independent of the environment.
+ *-----------
+ */
+struct ioq_ring_desc {
+ __u64 cookie; /* for arbitrary use by north-side */
+ __le64 ptr;
+ __le64 len;
+ __u8 valid;
+ __u8 sown; /* South owned = 1, North owned = 0 */
+};
+
+#define IOQ_RING_MAGIC cpu_to_le32(0x47fa2fe4)
+#define IOQ_RING_VER cpu_to_le32(4)
+
+struct ioq_ring_idx {
+ __le32 head; /* 0 based index to head of ptr array */
+ __le32 tail; /* 0 based index to tail of ptr array */
+ __u8 full;
+};
+
+enum ioq_locality {
+ ioq_locality_north,
+ ioq_locality_south,
+};
+
+struct ioq_ring_head {
+ __le32 magic;
+ __le32 ver;
+ struct shm_signal_desc signal;
+ struct ioq_ring_idx idx[2];
+ __le32 count;
+ struct ioq_ring_desc ring[1]; /* "count" elements will be allocated */
+};
+
+#define IOQ_HEAD_DESC_SIZE(count) \
+ (sizeof(struct ioq_ring_head) + sizeof(struct ioq_ring_desc) * (count - 1))
+
+/* --- END SHARED STRUCTURES --- */
+
+#ifdef __KERNEL__
+
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/kref.h>
+
+enum ioq_idx_type {
+ ioq_idxtype_valid,
+ ioq_idxtype_inuse,
+ ioq_idxtype_both,
+ ioq_idxtype_invalid,
+};
+
+enum ioq_seek_type {
+ ioq_seek_tail,
+ ioq_seek_next,
+ ioq_seek_head,
+ ioq_seek_set
+};
+
+struct ioq_iterator {
+ struct ioq *ioq;
+ struct ioq_ring_idx *idx;
+ u32 pos;
+ struct ioq_ring_desc *desc;
+ bool update;
+ bool dualidx;
+ bool flipowner;
+};
+
+struct ioq_notifier {
+ void (*signal)(struct ioq_notifier *);
+};
+
+struct ioq_ops {
+ void (*release)(struct ioq *ioq);
+};
+
+struct ioq {
+ struct ioq_ops *ops;
+
+ struct kref kref;
+ enum ioq_locality locale;
+ struct ioq_ring_head *head_desc;
+ struct ioq_ring_desc *ring;
+ struct shm_signal *signal;
+ wait_queue_head_t wq;
+ struct ioq_notifier *notifier;
+ size_t count;
+ struct shm_signal_notifier shm_notifier;
+};
+
+#define IOQ_ITER_AUTOUPDATE (1 << 0)
+#define IOQ_ITER_NOFLIPOWNER (1 << 1)
+
+/**
+ * ioq_init() - initialize an IOQ
+ * @ioq: IOQ context
+ *
+ * Initializes IOQ context before first use
+ *
+ **/
+void ioq_init(struct ioq *ioq,
+ struct ioq_ops *ops,
+ enum ioq_locality locale,
+ struct ioq_ring_head *head,
+ struct shm_signal *signal,
+ size_t count);
+
+/**
+ * ioq_get() - acquire an IOQ context reference
+ * @ioq: IOQ context
+ *
+ **/
+static inline struct ioq *ioq_get(struct ioq *ioq)
+{
+ kref_get(&ioq->kref);
+
+ return ioq;
+}
+
+static inline void _ioq_kref_release(struct kref *kref)
+{
+ struct ioq *ioq = container_of(kref, struct ioq, kref);
+
+ shm_signal_put(ioq->signal);
+ ioq->ops->release(ioq);
+}
+
+/**
+ * ioq_put() - release an IOQ context reference
+ * @ioq: IOQ context
+ *
+ **/
+static inline void ioq_put(struct ioq *ioq)
+{
+ kref_put(&ioq->kref, _ioq_kref_release);
+}
+
+/**
+ * ioq_notify_enable() - enables local notifications on an IOQ
+ * @ioq: IOQ context
+ * @flags: Reserved for future use, must be 0
+ *
+ * Enables/unmasks the registered ioq_notifier (if applicable) and waitq to
+ * receive wakeups whenever the remote side performs an ioq_signal() operation.
+ * A notification will be dispatched immediately if any pending signals have
+ * already been issued prior to invoking this call.
+ *
+ * This is synonymous with unmasking an interrupt.
+ *
+ * Returns: success = 0, <0 = ERRNO
+ *
+ **/
+static inline int ioq_notify_enable(struct ioq *ioq, int flags)
+{
+ return shm_signal_enable(ioq->signal, 0);
+}
+
+/**
+ * ioq_notify_disable() - disable local notifications on an IOQ
+ * @ioq: IOQ context
+ * @flags: Reserved for future use, must be 0
+ *
+ * Disables/masks the registered ioq_notifier (if applicable) and waitq
+ * from receiving any further notifications. Any subsequent calls to
+ * ioq_signal() by the remote side will update the ring as dirty, but
+ * will not traverse the locale boundary and will not invoke the notifier
+ * callback or wakeup the waitq. Signals delivered while masked will
+ * be deferred until ioq_notify_enable() is invoked
+ *
+ * This is synonymous with masking an interrupt
+ *
+ * Returns: success = 0, <0 = ERRNO
+ *
+ **/
+static inline int ioq_notify_disable(struct ioq *ioq, int flags)
+{
+ return shm_signal_disable(ioq->signal, 0);
+}
+
+/**
+ * ioq_signal() - notify the remote side about ring changes
+ * @ioq: IOQ context
+ * @flags: Reserved for future use, must be 0
+ *
+ * Marks the ring state as "dirty" and, if enabled, will traverse
+ * a locale boundary to invoke a remote notification. The remote
+ * side controls whether the notification should be delivered via
+ * the ioq_notify_enable/disable() interface.
+ *
+ * The specifics of how to traverse a locale boundary are abstracted
+ * by the ioq_ops->signal() interface and provided by a particular
+ * implementation. However, typically going north to south would be
+ * something like a syscall/hypercall, and going south to north would be
+ * something like a posix-signal/guest-interrupt.
+ *
+ * Returns: success = 0, <0 = ERRNO
+ *
+ **/
+static inline int ioq_signal(struct ioq *ioq, int flags)
+{
+ return shm_signal_inject(ioq->signal, 0);
+}
+
+/**
+ * ioq_count() - counts the number of outstanding descriptors in an index
+ * @ioq: IOQ context
+ * @type: Specifies the index type
+ * (*) valid: the descriptor is valid. This is usually
+ * used to keep track of descriptors that may not
+ * be carrying a useful payload, but still need to
+ * be tracked carefully.
+ * (*) inuse: Descriptors that carry useful payload
+ *
+ * Returns:
+ * (*) >=0: # of descriptors outstanding in the index
+ * (*) <0 = ERRNO
+ *
+ **/
+int ioq_count(struct ioq *ioq, enum ioq_idx_type type);
+
+/**
+ * ioq_remain() - counts the number of remaining descriptors in an index
+ * @ioq: IOQ context
+ * @type: Specifies the index type
+ * (*) valid: the descriptor is valid. This is usually
+ * used to keep track of descriptors that may not
+ * be carrying a useful payload, but still need to
+ * be tracked carefully.
+ * (*) inuse: Descriptors that carry useful payload
+ *
+ * This is the converse of ioq_count(). This function returns the number
+ * of "free" descriptors left in a particular index
+ *
+ * Returns:
+ * (*) >=0: # of descriptors remaining in the index
+ * (*) <0 = ERRNO
+ *
+ **/
+int ioq_remain(struct ioq *ioq, enum ioq_idx_type type);
+
+/**
+ * ioq_size() - counts the maximum number of descriptors in an ring
+ * @ioq: IOQ context
+ *
+ * This function returns the maximum number of descriptors supported in
+ * a ring, regardless of their current state (free or inuse).
+ *
+ * Returns:
+ * (*) >=0: total # of descriptors in the ring
+ * (*) <0 = ERRNO
+ *
+ **/
+int ioq_size(struct ioq *ioq);
+
+/**
+ * ioq_full() - determines if a specific index is "full"
+ * @ioq: IOQ context
+ * @type: Specifies the index type
+ * (*) valid: the descriptor is valid. This is usually
+ * used to keep track of descriptors that may not
+ * be carrying a useful payload, but still need to
+ * be tracked carefully.
+ * (*) inuse: Descriptors that carry useful payload
+ *
+ * Returns:
+ * (*) 0: index is not full
+ * (*) 1: index is full
+ * (*) <0 = ERRNO
+ *
+ **/
+int ioq_full(struct ioq *ioq, enum ioq_idx_type type);
+
+/**
+ * ioq_empty() - determines if a specific index is "empty"
+ * @ioq: IOQ context
+ * @type: Specifies the index type
+ * (*) valid: the descriptor is valid. This is usually
+ * used to keep track of descriptors that may not
+ * be carrying a useful payload, but still need to
+ * be tracked carefully.
+ * (*) inuse: Descriptors that carry useful payload
+ *
+ * Returns:
+ * (*) 0: index is not empty
+ * (*) 1: index is empty
+ * (*) <0 = ERRNO
+ *
+ **/
+static inline int ioq_empty(struct ioq *ioq, enum ioq_idx_type type)
+{
+ return !ioq_count(ioq, type);
+}
+
+/**
+ * ioq_iter_init() - initialize an iterator for IOQ descriptor traversal
+ * @ioq: IOQ context to iterate on
+ * @iter: Iterator context to init (usually from stack)
+ * @type: Specifies the index type to iterate against
+ * (*) valid: iterate against the "valid" index
+ * (*) inuse: iterate against the "inuse" index
+ * (*) both: iterate against both indexes simultaneously
+ * @flags: Bitfield with 0 or more bits set to alter behavior
+ * (*) autoupdate: automatically signal the remote side
+ * whenever the iterator pushes/pops to a new desc
+ * (*) noflipowner: do not flip the ownership bit during
+ * a push/pop operation
+ *
+ * Returns: success = 0, <0 = ERRNO
+ *
+ **/
+int ioq_iter_init(struct ioq *ioq, struct ioq_iterator *iter,
+ enum ioq_idx_type type, int flags);
+
+/**
+ * ioq_iter_seek() - seek to a specific location in the IOQ ring
+ * @iter: Iterator context (must be initialized with ioq_iter_init)
+ * @type: Specifies the type of seek operation
+ * (*) tail: seek to the absolute tail, offset is ignored
+ * (*) next: seek to the relative next, offset is ignored
+ * (*) head: seek to the absolute head, offset is ignored
+ * (*) set: seek to the absolute offset
+ * @offset: Offset for ioq_seek_set operations
+ * @flags: Reserved for future use, must be 0
+ *
+ * Returns: success = 0, <0 = ERRNO
+ *
+ **/
+int ioq_iter_seek(struct ioq_iterator *iter, enum ioq_seek_type type,
+ long offset, int flags);
+
+/**
+ * ioq_iter_push() - push the tail pointer forward
+ * @iter: Iterator context (must be initialized with ioq_iter_init)
+ * @flags: Reserved for future use, must be 0
+ *
+ * This function will simultaneously advance the tail ptr in the current
+ * index (valid/inuse, as specified in the ioq_iter_init) as well as
+ * perform a seek(next) operation. This effectively "pushes" a new pointer
+ * onto the tail of the index.
+ *
+ * Returns: success = 0, <0 = ERRNO
+ *
+ **/
+int ioq_iter_push(struct ioq_iterator *iter, int flags);
+
+/**
+ * ioq_iter_pop() - pop the head pointer from the ring
+ * @iter: Iterator context (must be initialized with ioq_iter_init)
+ * @flags: Reserved for future use, must be 0
+ *
+ * This function will simultaneously advance the head ptr in the current
+ * index (valid/inuse, as specified in the ioq_iter_init) as well as
+ * perform a seek(next) operation. This effectively "pops" a pointer
+ * from the head of the index.
+ *
+ * Returns: success = 0, <0 = ERRNO
+ *
+ **/
+int ioq_iter_pop(struct ioq_iterator *iter, int flags);
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_IOQ_H */
diff --git a/include/linux/shm_signal.h b/include/linux/shm_signal.h
new file mode 100644
index 000000000000..b2efd72669fb
--- /dev/null
+++ b/include/linux/shm_signal.h
@@ -0,0 +1,189 @@
+/*
+ * Copyright 2009 Novell. All Rights Reserved.
+ *
+ * Author:
+ * Gregory Haskins <ghaskins@novell.com>
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _LINUX_SHM_SIGNAL_H
+#define _LINUX_SHM_SIGNAL_H
+
+#include <linux/types.h>
+
+/*
+ *---------
+ * The following structures represent data that is shared across boundaries
+ * which may be quite disparate from one another (e.g. Windows vs Linux,
+ * 32 vs 64 bit, etc). Therefore, care has been taken to make sure they
+ * present data in a manner that is independent of the environment.
+ *-----------
+ */
+
+#define SHM_SIGNAL_MAGIC cpu_to_le32(0x58fa39df)
+#define SHM_SIGNAL_VER cpu_to_le32(1)
+
+struct shm_signal_irq {
+ __u8 enabled;
+ __u8 pending;
+ __u8 dirty;
+};
+
+enum shm_signal_locality {
+ shm_locality_north,
+ shm_locality_south,
+};
+
+struct shm_signal_desc {
+ __le32 magic;
+ __le32 ver;
+ struct shm_signal_irq irq[2];
+};
+
+/* --- END SHARED STRUCTURES --- */
+
+#ifdef __KERNEL__
+
+#include <linux/kref.h>
+#include <linux/interrupt.h>
+
+struct shm_signal_notifier {
+ void (*signal)(struct shm_signal_notifier *);
+};
+
+struct shm_signal;
+
+struct shm_signal_ops {
+ int (*inject)(struct shm_signal *s);
+ void (*fault)(struct shm_signal *s, const char *fmt, ...);
+ void (*release)(struct shm_signal *s);
+};
+
+enum {
+ shm_signal_in_wakeup,
+};
+
+struct shm_signal {
+ struct kref kref;
+ spinlock_t lock;
+ enum shm_signal_locality locale;
+ unsigned long flags;
+ struct shm_signal_ops *ops;
+ struct shm_signal_desc *desc;
+ struct shm_signal_notifier *notifier;
+ struct tasklet_struct deferred_notify;
+};
+
+#define SHM_SIGNAL_FAULT(s, fmt, args...) \
+ ((s)->ops->fault ? (s)->ops->fault((s), fmt, ## args) : panic(fmt, ## args))
+
+ /*
+ * These functions should only be used internally
+ */
+void _shm_signal_release(struct kref *kref);
+void _shm_signal_wakeup(struct shm_signal *s);
+
+/**
+ * shm_signal_init() - initialize an SHM_SIGNAL
+ * @s: SHM_SIGNAL context
+ *
+ * Initializes SHM_SIGNAL context before first use
+ *
+ **/
+void shm_signal_init(struct shm_signal *s, enum shm_signal_locality locale,
+ struct shm_signal_ops *ops, struct shm_signal_desc *desc);
+
+/**
+ * shm_signal_get() - acquire an SHM_SIGNAL context reference
+ * @s: SHM_SIGNAL context
+ *
+ **/
+static inline struct shm_signal *shm_signal_get(struct shm_signal *s)
+{
+ kref_get(&s->kref);
+
+ return s;
+}
+
+/**
+ * shm_signal_put() - release an SHM_SIGNAL context reference
+ * @s: SHM_SIGNAL context
+ *
+ **/
+static inline void shm_signal_put(struct shm_signal *s)
+{
+ kref_put(&s->kref, _shm_signal_release);
+}
+
+/**
+ * shm_signal_enable() - enables local notifications on an SHM_SIGNAL
+ * @s: SHM_SIGNAL context
+ * @flags: Reserved for future use, must be 0
+ *
+ * Enables/unmasks the registered notifier (if applicable) to receive wakeups
+ * whenever the remote side performs an shm_signal() operation. A notification
+ * will be dispatched immediately if any pending signals have already been
+ * issued prior to invoking this call.
+ *
+ * This is synonymous with unmasking an interrupt.
+ *
+ * Returns: success = 0, <0 = ERRNO
+ *
+ **/
+int shm_signal_enable(struct shm_signal *s, int flags);
+
+/**
+ * shm_signal_disable() - disable local notifications on an SHM_SIGNAL
+ * @s: SHM_SIGNAL context
+ * @flags: Reserved for future use, must be 0
+ *
+ * Disables/masks the registered shm_signal_notifier (if applicable) from
+ * receiving any further notifications. Any subsequent calls to shm_signal()
+ * by the remote side will update the shm as dirty, but will not traverse the
+ * locale boundary and will not invoke the notifier callback. Signals
+ * delivered while masked will be deferred until shm_signal_enable() is
+ * invoked.
+ *
+ * This is synonymous with masking an interrupt
+ *
+ * Returns: success = 0, <0 = ERRNO
+ *
+ **/
+int shm_signal_disable(struct shm_signal *s, int flags);
+
+/**
+ * shm_signal_inject() - notify the remote side about shm changes
+ * @s: SHM_SIGNAL context
+ * @flags: Reserved for future use, must be 0
+ *
+ * Marks the shm state as "dirty" and, if enabled, will traverse
+ * a locale boundary to inject a remote notification. The remote
+ * side controls whether the notification should be delivered via
+ * the shm_signal_enable/disable() interface.
+ *
+ * The specifics of how to traverse a locale boundary are abstracted
+ * by the shm_signal_ops->signal() interface and provided by a particular
+ * implementation. However, typically going north to south would be
+ * something like a syscall/hypercall, and going south to north would be
+ * something like a posix-signal/guest-interrupt.
+ *
+ * Returns: success = 0, <0 = ERRNO
+ *
+ **/
+int shm_signal_inject(struct shm_signal *s, int flags);
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_SHM_SIGNAL_H */
diff --git a/include/linux/vbus_driver.h b/include/linux/vbus_driver.h
new file mode 100644
index 000000000000..8a7acb1a7a05
--- /dev/null
+++ b/include/linux/vbus_driver.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2009 Novell. All Rights Reserved.
+ *
+ * Mediates access to a host VBUS from a guest kernel by providing a
+ * global view of all VBUS devices
+ *
+ * Author:
+ * Gregory Haskins <ghaskins@novell.com>
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _LINUX_VBUS_DRIVER_H
+#define _LINUX_VBUS_DRIVER_H
+
+#include <linux/device.h>
+#include <linux/shm_signal.h>
+#include <linux/ioq.h>
+
+struct vbus_device_proxy;
+struct vbus_driver;
+
+struct vbus_device_proxy_ops {
+ int (*open)(struct vbus_device_proxy *dev, int version, int flags);
+ int (*close)(struct vbus_device_proxy *dev, int flags);
+ int (*shm)(struct vbus_device_proxy *dev, const char *name,
+ int id, int prio,
+ void *ptr, size_t len,
+ struct shm_signal_desc *sigdesc, struct shm_signal **signal,
+ int flags);
+ int (*call)(struct vbus_device_proxy *dev, u32 func,
+ void *data, size_t len, int flags);
+ void (*release)(struct vbus_device_proxy *dev);
+};
+
+struct vbus_device_proxy {
+ char *type;
+ u64 id;
+ void *priv; /* Used by drivers */
+ struct vbus_device_proxy_ops *ops;
+ struct device dev;
+};
+
+int vbus_device_proxy_register(struct vbus_device_proxy *dev);
+void vbus_device_proxy_unregister(struct vbus_device_proxy *dev);
+
+struct vbus_device_proxy *vbus_device_proxy_find(u64 id);
+
+struct vbus_driver_ops {
+ int (*probe)(struct vbus_device_proxy *dev);
+ int (*remove)(struct vbus_device_proxy *dev);
+};
+
+struct vbus_driver {
+ char *type;
+ struct module *owner;
+ struct vbus_driver_ops *ops;
+ struct device_driver drv;
+};
+
+int vbus_driver_register(struct vbus_driver *drv);
+void vbus_driver_unregister(struct vbus_driver *drv);
+
+/*
+ * driver-side IOQ helper - allocates device-shm and maps an IOQ on it
+ */
+int vbus_driver_ioq_alloc(struct vbus_device_proxy *dev, const char *name,
+ int id, int prio, size_t ringsize, struct ioq **ioq);
+
+#define VBUS_DRIVER_AUTOPROBE(name) MODULE_ALIAS("vbus-proxy:" name)
+
+#endif /* _LINUX_VBUS_DRIVER_H */
diff --git a/include/linux/vbus_pci.h b/include/linux/vbus_pci.h
new file mode 100644
index 000000000000..fe337590e644
--- /dev/null
+++ b/include/linux/vbus_pci.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2009 Novell. All Rights Reserved.
+ *
+ * PCI to Virtual-Bus Bridge
+ *
+ * Author:
+ * Gregory Haskins <ghaskins@novell.com>
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _LINUX_VBUS_PCI_H
+#define _LINUX_VBUS_PCI_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#define VBUS_PCI_ABI_MAGIC 0xbf53eef5
+#define VBUS_PCI_ABI_VERSION 2
+#define VBUS_PCI_HC_VERSION 1
+
+enum {
+ VBUS_PCI_BRIDGE_NEGOTIATE,
+ VBUS_PCI_BRIDGE_QREG,
+ VBUS_PCI_BRIDGE_SLOWCALL,
+ VBUS_PCI_BRIDGE_FASTCALL_ADD,
+ VBUS_PCI_BRIDGE_FASTCALL_DROP,
+
+ VBUS_PCI_BRIDGE_MAX, /* must be last */
+};
+
+enum {
+ VBUS_PCI_HC_DEVOPEN,
+ VBUS_PCI_HC_DEVCLOSE,
+ VBUS_PCI_HC_DEVCALL,
+ VBUS_PCI_HC_DEVSHM,
+
+ VBUS_PCI_HC_MAX, /* must be last */
+};
+
+struct vbus_pci_bridge_negotiate {
+ __u32 magic;
+ __u32 version;
+ __u64 capabilities;
+};
+
+struct vbus_pci_deviceopen {
+ __u32 devid;
+ __u32 version; /* device ABI version */
+ __u64 handle; /* return value for devh */
+};
+
+struct vbus_pci_devicecall {
+ __u64 devh; /* device-handle (returned from DEVICEOPEN */
+ __u32 func;
+ __u32 len;
+ __u32 flags;
+ __u64 datap;
+};
+
+struct vbus_pci_deviceshm {
+ __u64 devh; /* device-handle (returned from DEVICEOPEN */
+ __u32 id;
+ __u32 len;
+ __u32 flags;
+ struct {
+ __u32 offset;
+ __u32 prio;
+ __u64 cookie; /* token to pass back when signaling client */
+ } signal;
+ __u64 datap;
+};
+
+struct vbus_pci_call_desc {
+ __u32 vector;
+ __u32 len;
+ __u64 datap;
+};
+
+struct vbus_pci_fastcall_desc {
+ struct vbus_pci_call_desc call;
+ __u32 result;
+};
+
+struct vbus_pci_regs {
+ struct vbus_pci_call_desc bridgecall;
+ __u8 pad[48];
+};
+
+struct vbus_pci_signals {
+ __u32 eventq;
+ __u32 fastcall;
+ __u32 shmsignal;
+ __u8 pad[20];
+};
+
+struct vbus_pci_eventqreg {
+ __u32 count;
+ __u64 ring;
+ __u64 data;
+};
+
+struct vbus_pci_busreg {
+ __u32 count; /* supporting multiple queues allows for prio, etc */
+ struct vbus_pci_eventqreg eventq[1];
+};
+
+enum vbus_pci_eventid {
+ VBUS_PCI_EVENT_DEVADD,
+ VBUS_PCI_EVENT_DEVDROP,
+ VBUS_PCI_EVENT_SHMSIGNAL,
+ VBUS_PCI_EVENT_SHMCLOSE,
+};
+
+#define VBUS_MAX_DEVTYPE_LEN 128
+
+struct vbus_pci_add_event {
+ __u64 id;
+ char type[VBUS_MAX_DEVTYPE_LEN];
+};
+
+struct vbus_pci_handle_event {
+ __u64 handle;
+};
+
+struct vbus_pci_event {
+ __u32 eventid;
+ union {
+ struct vbus_pci_add_event add;
+ struct vbus_pci_handle_event handle;
+ } data;
+};
+
+#endif /* _LINUX_VBUS_PCI_H */
diff --git a/include/linux/venet.h b/include/linux/venet.h
new file mode 100644
index 000000000000..0578d797c973
--- /dev/null
+++ b/include/linux/venet.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2009 Novell. All Rights Reserved.
+ *
+ * Virtual-Ethernet adapter
+ *
+ * Author:
+ * Gregory Haskins <ghaskins@novell.com>
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _LINUX_VENET_H
+#define _LINUX_VENET_H
+
+#include <linux/types.h>
+
+#define VENET_VERSION 1
+
+#define VENET_TYPE "virtual-ethernet"
+
+#define VENET_QUEUE_RX 0
+#define VENET_QUEUE_TX 1
+
+struct venet_capabilities {
+ __u32 gid;
+ __u32 bits;
+};
+
+#define VENET_CAP_GROUP_SG 0
+#define VENET_CAP_GROUP_EVENTQ 1
+#define VENET_CAP_GROUP_L4RO 2 /* layer-4 reassem offloading */
+
+/* CAPABILITIES-GROUP SG */
+#define VENET_CAP_SG (1 << 0)
+#define VENET_CAP_TSO4 (1 << 1)
+#define VENET_CAP_TSO6 (1 << 2)
+#define VENET_CAP_ECN (1 << 3)
+#define VENET_CAP_UFO (1 << 4)
+#define VENET_CAP_PMTD (1 << 5) /* pre-mapped tx desc */
+
+/* CAPABILITIES-GROUP EVENTQ */
+#define VENET_CAP_EVQ_LINKSTATE (1 << 0)
+#define VENET_CAP_EVQ_TXC (1 << 1) /* tx-complete */
+
+struct venet_iov {
+ __u32 len;
+ __u64 ptr;
+};
+
+#define VENET_SG_FLAG_NEEDS_CSUM (1 << 0)
+#define VENET_SG_FLAG_GSO (1 << 1)
+#define VENET_SG_FLAG_ECN (1 << 2)
+
+struct venet_sg {
+ __u64 cookie;
+ __u32 flags;
+ __u32 len; /* total length of all iovs */
+ struct {
+ __u16 start; /* csum starting position */
+ __u16 offset; /* offset to place csum */
+ } csum;
+ struct {
+#define VENET_GSO_TYPE_TCPV4 0 /* IPv4 TCP (TSO) */
+#define VENET_GSO_TYPE_UDP 1 /* IPv4 UDP (UFO) */
+#define VENET_GSO_TYPE_TCPV6 2 /* IPv6 TCP */
+ __u8 type;
+ __u16 hdrlen;
+ __u16 size;
+ } gso;
+ __u32 count; /* nr of iovs */
+ struct venet_iov iov[1];
+};
+
+struct venet_eventq_query {
+ __u32 flags;
+ __u32 evsize; /* size of each event */
+ __u32 dpid; /* descriptor pool-id */
+ __u32 qid;
+ __u8 pad[16];
+};
+
+#define VENET_EVENT_LINKSTATE 0
+#define VENET_EVENT_TXC 1
+
+struct venet_event_header {
+ __u32 flags;
+ __u32 size;
+ __u32 id;
+};
+
+struct venet_event_linkstate {
+ struct venet_event_header header;
+ __u8 state; /* 0 = down, 1 = up */
+};
+
+struct venet_event_txc {
+ struct venet_event_header header;
+ __u32 txqid;
+ __u64 cookie;
+};
+
+struct venet_l4ro_query {
+ __u32 flags;
+ __u32 dpid; /* descriptor pool-id */
+ __u32 pqid; /* page queue-id */
+ __u8 pad[20];
+};
+
+
+#define VSG_DESC_SIZE(count) (sizeof(struct venet_sg) + \
+ sizeof(struct venet_iov) * ((count) - 1))
+
+#define VENET_FUNC_LINKUP 0
+#define VENET_FUNC_LINKDOWN 1
+#define VENET_FUNC_MACQUERY 2
+#define VENET_FUNC_NEGCAP 3 /* negotiate capabilities */
+#define VENET_FUNC_FLUSHRX 4
+#define VENET_FUNC_PMTDQUERY 5
+#define VENET_FUNC_EVQQUERY 6
+#define VENET_FUNC_L4ROQUERY 7
+
+#endif /* _LINUX_VENET_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 390e88c732d7..d8dc96d945f7 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -213,4 +213,25 @@ config GENERIC_ATOMIC64
config LRU_CACHE
tristate
+config SHM_SIGNAL
+ tristate "SHM Signal - Generic shared-memory signaling mechanism"
+ default n
+ help
+ Provides a shared-memory based signaling mechanism to indicate
+ memory-dirty notifications between two end-points.
+
+ If unsure, say N
+
+config IOQ
+ tristate "IO-Queue library - Generic shared-memory queue"
+ select SHM_SIGNAL
+ default n
+ help
+ IOQ is a generic shared-memory-queue mechanism that happens to be
+ friendly to virtualization boundaries. It can be used in a variety
+ of ways, though its intended purpose is to become a low-level
+ communication path for paravirtualized drivers.
+
+ If unsure, say N
+
endmenu
diff --git a/lib/Makefile b/lib/Makefile
index d9f1f812317b..091be3fedbbe 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -78,6 +78,8 @@ obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o
obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o
obj-$(CONFIG_SMP) += percpu_counter.o
obj-$(CONFIG_AUDIT_GENERIC) += audit.o
+obj-$(CONFIG_SHM_SIGNAL) += shm_signal.o
+obj-$(CONFIG_IOQ) += ioq.o
obj-$(CONFIG_SWIOTLB) += swiotlb.o
obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
diff --git a/lib/ioq.c b/lib/ioq.c
new file mode 100644
index 000000000000..4027848d7436
--- /dev/null
+++ b/lib/ioq.c
@@ -0,0 +1,304 @@
+/*
+ * Copyright 2009 Novell. All Rights Reserved.
+ *
+ * See include/linux/ioq.h for documentation
+ *
+ * Author:
+ * Gregory Haskins <ghaskins@novell.com>
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/sched.h>
+#include <linux/ioq.h>
+#include <linux/bitops.h>
+#include <linux/module.h>
+
+MODULE_AUTHOR("Gregory Haskins");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1");
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+static int ioq_iter_setpos(struct ioq_iterator *iter, u32 pos)
+{
+ struct ioq *ioq = iter->ioq;
+
+ BUG_ON(pos >= ioq->count);
+
+ iter->pos = pos;
+ iter->desc = &ioq->ring[pos];
+
+ return 0;
+}
+
+static inline u32 modulo_inc(u32 val, u32 mod)
+{
+ BUG_ON(val >= mod);
+
+ if (val == (mod - 1))
+ return 0;
+
+ return val + 1;
+}
+
+static inline int idx_full(struct ioq_ring_idx *idx)
+{
+ return idx->full && (idx->head == idx->tail);
+}
+
+int ioq_iter_seek(struct ioq_iterator *iter, enum ioq_seek_type type,
+ long offset, int flags)
+{
+ struct ioq_ring_idx *idx = iter->idx;
+ u32 pos;
+
+ switch (type) {
+ case ioq_seek_next:
+ pos = modulo_inc(iter->pos, iter->ioq->count);
+ break;
+ case ioq_seek_tail:
+ pos = le32_to_cpu(idx->tail);
+ break;
+ case ioq_seek_head:
+ pos = le32_to_cpu(idx->head);
+ break;
+ case ioq_seek_set:
+ if (offset >= iter->ioq->count)
+ return -1;
+ pos = offset;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ioq_iter_setpos(iter, pos);
+}
+EXPORT_SYMBOL_GPL(ioq_iter_seek);
+
+static int ioq_ring_count(struct ioq_ring_idx *idx, int count)
+{
+ u32 head = le32_to_cpu(idx->head);
+ u32 tail = le32_to_cpu(idx->tail);
+
+ if (idx->full && (head == tail))
+ return count;
+ else if (tail >= head)
+ return tail - head;
+ else
+ return (tail + count) - head;
+}
+
+static void idx_tail_push(struct ioq_ring_idx *idx, int count)
+{
+ u32 tail = modulo_inc(le32_to_cpu(idx->tail), count);
+ u32 head = le32_to_cpu(idx->head);
+
+ if (head == tail) {
+ rmb();
+
+ /*
+ * Setting full here may look racy, but note that we havent
+ * flipped the owner bit yet. So it is impossible for the
+ * remote locale to move head in such a way that this operation
+ * becomes invalid
+ */
+ idx->full = 1;
+ wmb();
+ }
+
+ idx->tail = cpu_to_le32(tail);
+}
+
+int ioq_iter_push(struct ioq_iterator *iter, int flags)
+{
+ struct ioq_ring_head *head_desc = iter->ioq->head_desc;
+ struct ioq_ring_idx *idx = iter->idx;
+ int ret;
+
+ /*
+ * Its only valid to push if we are currently pointed at the tail
+ */
+ if (iter->pos != le32_to_cpu(idx->tail) || iter->desc->sown != iter->ioq->locale)
+ return -EINVAL;
+
+ idx_tail_push(idx, iter->ioq->count);
+ if (iter->dualidx) {
+ idx_tail_push(&head_desc->idx[ioq_idxtype_inuse],
+ iter->ioq->count);
+ if (head_desc->idx[ioq_idxtype_inuse].tail !=
+ head_desc->idx[ioq_idxtype_valid].tail) {
+ SHM_SIGNAL_FAULT(iter->ioq->signal,
+ "Tails not synchronized");
+ return -EINVAL;
+ }
+ }
+
+ wmb(); /* the index must be visible before the sown, or signal */
+
+ if (iter->flipowner) {
+ iter->desc->sown = !iter->ioq->locale;
+ wmb(); /* sown must be visible before we signal */
+ }
+
+ ret = ioq_iter_seek(iter, ioq_seek_next, 0, flags);
+
+ if (iter->update)
+ ioq_signal(iter->ioq, 0);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ioq_iter_push);
+
+int ioq_iter_pop(struct ioq_iterator *iter, int flags)
+{
+ struct ioq_ring_idx *idx = iter->idx;
+ int ret;
+
+ /*
+ * Its only valid to pop if we are currently pointed at the head
+ */
+ if (iter->pos != le32_to_cpu(idx->head) || iter->desc->sown != iter->ioq->locale)
+ return -EINVAL;
+
+ idx->head = cpu_to_le32(modulo_inc(le32_to_cpu(idx->head), iter->ioq->count));
+ wmb(); /* head must be visible before full */
+
+ if (idx->full) {
+ idx->full = 0;
+ wmb(); /* full must be visible before sown */
+ }
+
+ if (iter->flipowner) {
+ iter->desc->sown = !iter->ioq->locale;
+ wmb(); /* sown must be visible before we signal */
+ }
+
+ ret = ioq_iter_seek(iter, ioq_seek_next, 0, flags);
+
+ if (iter->update)
+ ioq_signal(iter->ioq, 0);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ioq_iter_pop);
+
+static struct ioq_ring_idx *idxtype_to_idx(struct ioq *ioq,
+ enum ioq_idx_type type)
+{
+ struct ioq_ring_idx *idx;
+
+ switch (type) {
+ case ioq_idxtype_valid:
+ case ioq_idxtype_inuse:
+ idx = &ioq->head_desc->idx[type];
+ break;
+ default:
+ panic("IOQ: illegal index type: %d", type);
+ break;
+ }
+
+ return idx;
+}
+
+int ioq_iter_init(struct ioq *ioq, struct ioq_iterator *iter,
+ enum ioq_idx_type type, int flags)
+{
+ iter->ioq = ioq;
+ iter->update = (flags & IOQ_ITER_AUTOUPDATE);
+ iter->flipowner = !(flags & IOQ_ITER_NOFLIPOWNER);
+ iter->pos = -1;
+ iter->desc = NULL;
+ iter->dualidx = 0;
+
+ if (type == ioq_idxtype_both) {
+ /*
+ * "both" is a special case, so we set the dualidx flag.
+ *
+ * However, we also just want to use the valid-index
+ * for normal processing, so override that here
+ */
+ type = ioq_idxtype_valid;
+ iter->dualidx = 1;
+ }
+
+ iter->idx = idxtype_to_idx(ioq, type);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ioq_iter_init);
+
+int ioq_count(struct ioq *ioq, enum ioq_idx_type type)
+{
+ return ioq_ring_count(idxtype_to_idx(ioq, type), ioq->count);
+}
+EXPORT_SYMBOL_GPL(ioq_count);
+
+int ioq_remain(struct ioq *ioq, enum ioq_idx_type type)
+{
+ int count = ioq_ring_count(idxtype_to_idx(ioq, type), ioq->count);
+
+ return ioq->count - count;
+}
+EXPORT_SYMBOL_GPL(ioq_remain);
+
+int ioq_size(struct ioq *ioq)
+{
+ return ioq->count;
+}
+EXPORT_SYMBOL_GPL(ioq_size);
+
+int ioq_full(struct ioq *ioq, enum ioq_idx_type type)
+{
+ struct ioq_ring_idx *idx = idxtype_to_idx(ioq, type);
+
+ return idx_full(idx);
+}
+EXPORT_SYMBOL_GPL(ioq_full);
+
+static void ioq_shm_signal(struct shm_signal_notifier *notifier)
+{
+ struct ioq *ioq = container_of(notifier, struct ioq, shm_notifier);
+
+ if (waitqueue_active(&ioq->wq))
+ wake_up(&ioq->wq);
+
+ if (ioq->notifier)
+ ioq->notifier->signal(ioq->notifier);
+}
+
+void ioq_init(struct ioq *ioq,
+ struct ioq_ops *ops,
+ enum ioq_locality locale,
+ struct ioq_ring_head *head,
+ struct shm_signal *signal,
+ size_t count)
+{
+ memset(ioq, 0, sizeof(*ioq));
+ kref_init(&ioq->kref);
+ init_waitqueue_head(&ioq->wq);
+
+ ioq->ops = ops;
+ ioq->locale = locale;
+ ioq->head_desc = head;
+ ioq->ring = &head->ring[0];
+ ioq->count = count;
+ ioq->signal = signal;
+
+ ioq->shm_notifier.signal = &ioq_shm_signal;
+ signal->notifier = &ioq->shm_notifier;
+}
+EXPORT_SYMBOL_GPL(ioq_init);
diff --git a/lib/shm_signal.c b/lib/shm_signal.c
new file mode 100644
index 000000000000..8d3e9b418a27
--- /dev/null
+++ b/lib/shm_signal.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright 2009 Novell. All Rights Reserved.
+ *
+ * See include/linux/shm_signal.h for documentation
+ *
+ * Author:
+ * Gregory Haskins <ghaskins@novell.com>
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/shm_signal.h>
+
+MODULE_AUTHOR("Gregory Haskins");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1");
+
+int shm_signal_enable(struct shm_signal *s, int flags)
+{
+ struct shm_signal_irq *irq = &s->desc->irq[s->locale];
+ unsigned long iflags;
+
+ spin_lock_irqsave(&s->lock, iflags);
+
+ irq->enabled = 1;
+ wmb();
+
+ if ((irq->dirty || irq->pending)
+ && !test_bit(shm_signal_in_wakeup, &s->flags)) {
+ rmb();
+ tasklet_schedule(&s->deferred_notify);
+ }
+
+ spin_unlock_irqrestore(&s->lock, iflags);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(shm_signal_enable);
+
+int shm_signal_disable(struct shm_signal *s, int flags)
+{
+ struct shm_signal_irq *irq = &s->desc->irq[s->locale];
+
+ irq->enabled = 0;
+ wmb();
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(shm_signal_disable);
+
+/*
+ * signaling protocol:
+ *
+ * each side of the shm_signal has an "irq" structure with the following
+ * fields:
+ *
+ * - enabled: controlled by shm_signal_enable/disable() to mask/unmask
+ * the notification locally
+ * - dirty: indicates if the shared-memory is dirty or clean. This
+ * is updated regardless of the enabled/pending state so that
+ * the state is always accurately tracked.
+ * - pending: indicates if a signal is pending to the remote locale.
+ * This allows us to determine if a remote-notification is
+ * already in flight to optimize spurious notifications away.
+ */
+int shm_signal_inject(struct shm_signal *s, int flags)
+{
+ /* Load the irq structure from the other locale */
+ struct shm_signal_irq *irq = &s->desc->irq[!s->locale];
+
+ /*
+ * We always mark the remote side as dirty regardless of whether
+ * they need to be notified.
+ */
+ irq->dirty = 1;
+ wmb(); /* dirty must be visible before we test the pending state */
+
+ if (irq->enabled && !irq->pending) {
+ rmb();
+
+ /*
+ * If the remote side has enabled notifications, and we do
+ * not see a notification pending, we must inject a new one.
+ */
+ irq->pending = 1;
+ wmb(); /* make it visible before we do the injection */
+
+ s->ops->inject(s);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(shm_signal_inject);
+
+void _shm_signal_wakeup(struct shm_signal *s)
+{
+ struct shm_signal_irq *irq = &s->desc->irq[s->locale];
+ int dirty;
+ unsigned long flags;
+
+ spin_lock_irqsave(&s->lock, flags);
+
+ __set_bit(shm_signal_in_wakeup, &s->flags);
+
+ /*
+ * The outer loop protects against race conditions between
+ * irq->dirty and irq->pending updates
+ */
+ while (irq->enabled && (irq->dirty || irq->pending)) {
+
+ /*
+ * Run until we completely exhaust irq->dirty (it may
+ * be re-dirtied by the remote side while we are in the
+ * callback). We let "pending" remain untouched until we have
+ * processed them all so that the remote side knows we do not
+ * need a new notification (yet).
+ */
+ do {
+ irq->dirty = 0;
+ /* the unlock is an implicit wmb() for dirty = 0 */
+ spin_unlock_irqrestore(&s->lock, flags);
+
+ if (s->notifier)
+ s->notifier->signal(s->notifier);
+
+ spin_lock_irqsave(&s->lock, flags);
+ dirty = irq->dirty;
+ rmb();
+
+ } while (irq->enabled && dirty);
+
+ barrier();
+
+ /*
+ * We can finally acknowledge the notification by clearing
+ * "pending" after all of the dirty memory has been processed
+ * Races against this clearing are handled by the outer loop.
+ * Subsequent iterations of this loop will execute with
+ * pending=0 potentially leading to future spurious
+ * notifications, but this is an acceptable tradeoff as this
+ * will be rare and harmless.
+ */
+ irq->pending = 0;
+ wmb();
+
+ }
+
+ __clear_bit(shm_signal_in_wakeup, &s->flags);
+ spin_unlock_irqrestore(&s->lock, flags);
+
+}
+EXPORT_SYMBOL_GPL(_shm_signal_wakeup);
+
+void _shm_signal_release(struct kref *kref)
+{
+ struct shm_signal *s = container_of(kref, struct shm_signal, kref);
+
+ s->ops->release(s);
+}
+EXPORT_SYMBOL_GPL(_shm_signal_release);
+
+static void
+deferred_notify(unsigned long data)
+{
+ struct shm_signal *s = (struct shm_signal *)data;
+
+ _shm_signal_wakeup(s);
+}
+
+void shm_signal_init(struct shm_signal *s, enum shm_signal_locality locale,
+ struct shm_signal_ops *ops, struct shm_signal_desc *desc)
+{
+ memset(s, 0, sizeof(*s));
+ kref_init(&s->kref);
+ spin_lock_init(&s->lock);
+ tasklet_init(&s->deferred_notify,
+ deferred_notify,
+ (unsigned long)s);
+ s->locale = locale;
+ s->ops = ops;
+ s->desc = desc;
+}
+EXPORT_SYMBOL_GPL(shm_signal_init);