diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2010-04-15 13:43:06 +1000 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2010-04-15 13:43:12 +1000 |
commit | ced19ef5bb95b825d0fd40b67178c1786557edb3 (patch) | |
tree | 4046727f827550fd1dc31716432fe0959ff7b37b /drivers | |
parent | f574bb9fae7e5d7248252ec93ea85daaec745192 (diff) | |
parent | e1077ef3b2751766c4437e2f974e3d7372742d0d (diff) |
Merge remote branch 'alacrity/linux-next'
Conflicts:
include/linux/Kbuild
lib/Kconfig
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/Makefile | 1 | ||||
-rw-r--r-- | drivers/net/Kconfig | 14 | ||||
-rw-r--r-- | drivers/net/Makefile | 1 | ||||
-rw-r--r-- | drivers/net/vbus-enet.c | 1560 | ||||
-rw-r--r-- | drivers/vbus/Kconfig | 25 | ||||
-rw-r--r-- | drivers/vbus/Makefile | 6 | ||||
-rw-r--r-- | drivers/vbus/bus-proxy.c | 248 | ||||
-rw-r--r-- | drivers/vbus/pci-bridge.c | 1016 |
8 files changed, 2871 insertions, 0 deletions
diff --git a/drivers/Makefile b/drivers/Makefile index 34f1e1064dbc..19e82b9c91c2 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -113,3 +113,4 @@ obj-$(CONFIG_VLYNQ) += vlynq/ obj-$(CONFIG_STAGING) += staging/ obj-y += platform/ obj-y += ieee802154/ +obj-y += vbus/ diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index dbd26f992158..18c0d7737911 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -3316,4 +3316,18 @@ config VMXNET3 To compile this driver as a module, choose M here: the module will be called vmxnet3. +config VBUS_ENET + tristate "VBUS Ethernet Driver" + default n + depends on VBUS_PROXY + help + A virtualized 802.x network device based on the VBUS + "virtual-ethernet" interface. It can be used with any + hypervisor/kernel that supports the vbus+venet protocol. + +config VBUS_ENET_DEBUG + bool "Enable Debugging" + depends on VBUS_ENET + default n + endif # NETDEVICES diff --git a/drivers/net/Makefile b/drivers/net/Makefile index ebf80b983063..432cae16c002 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -288,6 +288,7 @@ obj-$(CONFIG_FS_ENET) += fs_enet/ obj-$(CONFIG_NETXEN_NIC) += netxen/ obj-$(CONFIG_NIU) += niu.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o +obj-$(CONFIG_VBUS_ENET) += vbus-enet.o obj-$(CONFIG_SFC) += sfc/ obj-$(CONFIG_WIMAX) += wimax/ diff --git a/drivers/net/vbus-enet.c b/drivers/net/vbus-enet.c new file mode 100644 index 000000000000..94b86d482cee --- /dev/null +++ b/drivers/net/vbus-enet.c @@ -0,0 +1,1560 @@ +/* + * vbus_enet - A virtualized 802.x network device based on the VBUS interface + * + * Copyright (C) 2009 Novell, Gregory Haskins <ghaskins@novell.com> + * + * Derived from the SNULL example from the book "Linux Device Drivers" by + * Alessandro Rubini, Jonathan Corbet, and Greg Kroah-Hartman, published + * by O'Reilly & Associates. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/moduleparam.h> + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/interrupt.h> + +#include <linux/in.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <linux/skbuff.h> +#include <linux/ioq.h> +#include <linux/vbus_driver.h> + +#include <linux/in6.h> +#include <asm/checksum.h> + +#include <linux/venet.h> + +MODULE_AUTHOR("Gregory Haskins"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("virtual-ethernet"); +MODULE_VERSION("1"); + +static int rx_ringlen = 256; +module_param(rx_ringlen, int, 0444); +static int tx_ringlen = 256; +module_param(tx_ringlen, int, 0444); +static int sg_enabled = 1; +module_param(sg_enabled, int, 0444); + +#define PDEBUG(_dev, fmt, args...) dev_dbg(&(_dev)->dev, fmt, ## args) + +#define SG_DESC_SIZE VSG_DESC_SIZE(MAX_SKB_FRAGS) + +struct vbus_enet_queue { + struct ioq *queue; + struct ioq_notifier notifier; + unsigned long count; +}; + +struct vbus_enet_priv { + spinlock_t lock; + struct net_device *dev; + struct vbus_device_proxy *vdev; + struct napi_struct napi; + struct vbus_enet_queue rxq; + struct { + struct vbus_enet_queue veq; + struct tasklet_struct task; + struct sk_buff_head outstanding; + } tx; + bool sg; + struct { + bool enabled; + char *pool; + } pmtd; /* pre-mapped transmit descriptors */ + struct { + bool enabled; + bool linkstate; + bool txc; + unsigned long evsize; + struct vbus_enet_queue veq; + struct tasklet_struct task; + char *pool; + } evq; + struct { + bool available; + char *pool; + struct vbus_enet_queue pageq; + } l4ro; + + struct sk_buff *(*import)(struct vbus_enet_priv *priv, + struct ioq_ring_desc *desc); +}; + +static void vbus_enet_tx_reap(struct vbus_enet_priv *priv); + +static struct vbus_enet_priv * +napi_to_priv(struct napi_struct *napi) +{ + return container_of(napi, struct vbus_enet_priv, napi); +} + +static int +queue_init(struct vbus_enet_priv *priv, + struct vbus_enet_queue *q, + const char *name, + int qid, + size_t ringsize, + void (*func)(struct ioq_notifier *)) +{ + struct vbus_device_proxy *dev = priv->vdev; + int ret; + char _name[64]; + + if (name) + snprintf(_name, sizeof(_name), "%s-%s", priv->dev->name, name); + + ret = vbus_driver_ioq_alloc(dev, name ? _name : NULL, qid, 0, + ringsize, &q->queue); + if (ret < 0) + panic("ioq_alloc failed: %d\n", ret); + + if (func) { + q->notifier.signal = func; + q->queue->notifier = &q->notifier; + } + + q->count = ringsize; + + return 0; +} + +static int +devcall(struct vbus_enet_priv *priv, u32 func, void *data, size_t len) +{ + struct vbus_device_proxy *dev = priv->vdev; + + return dev->ops->call(dev, func, data, len, 0); +} + +/* + * --------------- + * rx descriptors + * --------------- + */ + +static void +rxdesc_alloc(struct vbus_enet_priv *priv, struct ioq_ring_desc *desc, size_t len) +{ + struct net_device *dev = priv->dev; + struct sk_buff *skb; + + len += ETH_HLEN; + + skb = netdev_alloc_skb(dev, len + NET_IP_ALIGN); + BUG_ON(!skb); + + skb_reserve(skb, NET_IP_ALIGN); /* align IP on 16B boundary */ + + if (priv->l4ro.available) { + /* + * We will populate an SG descriptor initially with one + * IOV filled with an MTU SKB. If the packet needs to be + * larger than MTU, the host will grab pages out of the + * page-queue and populate additional IOVs + */ + struct venet_sg *vsg = (struct venet_sg *)(unsigned long)desc->cookie; + struct venet_iov *iov = &vsg->iov[0]; + + memset(vsg, 0, SG_DESC_SIZE); + + vsg->cookie = (u64)(unsigned long)skb; + vsg->count = 1; + + iov->ptr = (u64)__pa(skb->data); + iov->len = len; + } else { + desc->cookie = (u64)(unsigned long)skb; + desc->ptr = cpu_to_le64(__pa(skb->data)); + desc->len = cpu_to_le64(len); /* total length */ + } + + desc->valid = 1; +} + +static void +rx_pageq_refill(struct vbus_enet_priv *priv, gfp_t gfp_mask) +{ + struct ioq *ioq = priv->l4ro.pageq.queue; + struct ioq_iterator iter; + int ret, added = 0; + + if (ioq_full(ioq, ioq_idxtype_inuse)) + /* nothing to do if the pageq is already fully populated */ + return; + + ret = ioq_iter_init(ioq, &iter, ioq_idxtype_inuse, 0); + BUG_ON(ret < 0); /* will never fail unless seriously broken */ + + ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0); + BUG_ON(ret < 0); + + /* + * Now populate each descriptor with an empty page + */ + while (!iter.desc->sown) { + struct page *page = NULL; + + page = alloc_page(gfp_mask); + + if (!page) + break; + + added = 1; + iter.desc->cookie = (u64)(unsigned long)page; + iter.desc->ptr = cpu_to_le64(__pa(page_address(page))); + iter.desc->len = cpu_to_le64(PAGE_SIZE); + + ret = ioq_iter_push(&iter, 0); + BUG_ON(ret < 0); + } + + if (added) + ioq_signal(ioq, 0); +} + +static void +rx_setup(struct vbus_enet_priv *priv) +{ + struct ioq *ioq = priv->rxq.queue; + struct ioq_iterator iter; + int ret; + int i = 0; + + /* + * We want to iterate on the "valid" index. By default the iterator + * will not "autoupdate" which means it will not hypercall the host + * with our changes. This is good, because we are really just + * initializing stuff here anyway. Note that you can always manually + * signal the host with ioq_signal() if the autoupdate feature is not + * used. + */ + ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0); + BUG_ON(ret < 0); /* will never fail unless seriously broken */ + + /* + * Seek to the tail of the valid index (which should be our first + * item, since the queue is brand-new) + */ + ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0); + BUG_ON(ret < 0); + + /* + * Now populate each descriptor with an empty buffer and mark it valid + */ + while (!iter.desc->valid) { + if (priv->l4ro.available) { + size_t offset = (i * SG_DESC_SIZE); + void *addr = &priv->l4ro.pool[offset]; + + iter.desc->ptr = cpu_to_le64(offset); + iter.desc->cookie = (u64)(unsigned long)addr; + iter.desc->len = cpu_to_le64(SG_DESC_SIZE); + } + + rxdesc_alloc(priv, iter.desc, priv->dev->mtu); + + /* + * This push operation will simultaneously advance the + * valid-head index and increment our position in the queue + * by one. + */ + ret = ioq_iter_push(&iter, 0); + BUG_ON(ret < 0); + + i++; + } + + if (priv->l4ro.available) + rx_pageq_refill(priv, GFP_KERNEL); +} + +static void +rx_rxq_teardown(struct vbus_enet_priv *priv) +{ + struct ioq *ioq = priv->rxq.queue; + struct ioq_iterator iter; + int ret; + + ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); + BUG_ON(ret < 0); + + /* + * free each valid descriptor + */ + while (iter.desc->valid) { + struct sk_buff *skb; + + if (priv->l4ro.available) { + struct venet_sg *vsg; + int i; + + vsg = (struct venet_sg *)(unsigned long)iter.desc->cookie; + + /* skip i=0, since that is the skb->data IOV */ + for (i = 1; i < vsg->count; i++) { + struct venet_iov *iov = &vsg->iov[i]; + struct page *page = (struct page *)(unsigned long)iov->ptr; + + put_page(page); + } + + skb = (struct sk_buff *)(unsigned long)vsg->cookie; + } else + skb = (struct sk_buff *)(unsigned long)iter.desc->cookie; + + iter.desc->valid = 0; + wmb(); + + iter.desc->ptr = 0; + iter.desc->cookie = 0; + + ret = ioq_iter_pop(&iter, 0); + BUG_ON(ret < 0); + + dev_kfree_skb(skb); + } +} + +static void +rx_l4ro_teardown(struct vbus_enet_priv *priv) +{ + struct ioq *ioq = priv->l4ro.pageq.queue; + struct ioq_iterator iter; + int ret; + + ret = ioq_iter_init(ioq, &iter, ioq_idxtype_inuse, 0); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); + BUG_ON(ret < 0); + + /* + * free each valid descriptor + */ + while (iter.desc->sown) { + struct page *page = (struct page *)(unsigned long)iter.desc->cookie; + + iter.desc->valid = 0; + wmb(); + + iter.desc->ptr = 0; + iter.desc->cookie = 0; + + ret = ioq_iter_pop(&iter, 0); + BUG_ON(ret < 0); + + put_page(page); + } + + ioq_put(ioq); + kfree(priv->l4ro.pool); +} + +static void +rx_teardown(struct vbus_enet_priv *priv) +{ + rx_rxq_teardown(priv); + + if (priv->l4ro.available) + rx_l4ro_teardown(priv); +} + +static int +tx_setup(struct vbus_enet_priv *priv) +{ + struct ioq *ioq = priv->tx.veq.queue; + struct ioq_iterator iter; + int i; + int ret; + + if (!priv->sg) + /* + * There is nothing to do for a ring that is not using + * scatter-gather + */ + return 0; + + /* pre-allocate our descriptor pool if pmtd is enabled */ + if (priv->pmtd.enabled) { + struct vbus_device_proxy *dev = priv->vdev; + size_t poollen = SG_DESC_SIZE * priv->tx.veq.count; + char *pool; + int shmid; + + /* pmtdquery will return the shm-id to use for the pool */ + ret = devcall(priv, VENET_FUNC_PMTDQUERY, NULL, 0); + BUG_ON(ret < 0); + + shmid = ret; + + pool = kzalloc(poollen, GFP_KERNEL | GFP_DMA); + if (!pool) + return -ENOMEM; + + priv->pmtd.pool = pool; + + ret = dev->ops->shm(dev, NULL, shmid, 0, pool, poollen, + NULL, NULL, 0); + BUG_ON(ret < 0); + } + + ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_set, 0, 0); + BUG_ON(ret < 0); + + /* + * Now populate each descriptor with an empty SG descriptor + */ + for (i = 0; i < priv->tx.veq.count; i++) { + struct venet_sg *vsg; + + if (priv->pmtd.enabled) { + size_t offset = (i * SG_DESC_SIZE); + + vsg = (struct venet_sg *)&priv->pmtd.pool[offset]; + iter.desc->ptr = cpu_to_le64(offset); + } else { + vsg = kzalloc(SG_DESC_SIZE, GFP_KERNEL); + if (!vsg) + return -ENOMEM; + + iter.desc->ptr = cpu_to_le64(__pa(vsg)); + } + + iter.desc->cookie = (u64)(unsigned long)vsg; + iter.desc->len = cpu_to_le64(SG_DESC_SIZE); + + ret = ioq_iter_seek(&iter, ioq_seek_next, 0, 0); + BUG_ON(ret < 0); + } + + return 0; +} + +static void +tx_teardown(struct vbus_enet_priv *priv) +{ + struct ioq *ioq = priv->tx.veq.queue; + struct ioq_iterator iter; + struct sk_buff *skb; + int ret; + + /* forcefully free all outstanding transmissions */ + while ((skb = __skb_dequeue(&priv->tx.outstanding))) + dev_kfree_skb(skb); + + if (!priv->sg) + /* + * There is nothing else to do for a ring that is not using + * scatter-gather + */ + return; + + if (priv->pmtd.enabled) { + /* + * PMTD mode means we only need to free the pool + */ + kfree(priv->pmtd.pool); + return; + } + + ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0); + BUG_ON(ret < 0); + + /* seek to position 0 */ + ret = ioq_iter_seek(&iter, ioq_seek_set, 0, 0); + BUG_ON(ret < 0); + + /* + * free each valid descriptor + */ + while (iter.desc->cookie) { + struct venet_sg *vsg = (struct venet_sg *)(unsigned long)iter.desc->cookie; + + iter.desc->valid = 0; + wmb(); + + iter.desc->ptr = 0; + iter.desc->cookie = 0; + + ret = ioq_iter_seek(&iter, ioq_seek_next, 0, 0); + BUG_ON(ret < 0); + + kfree(vsg); + } +} + +static void +evq_teardown(struct vbus_enet_priv *priv) +{ + if (!priv->evq.enabled) + return; + + ioq_put(priv->evq.veq.queue); + kfree(priv->evq.pool); +} + +/* + * Open and close + */ + +static int +vbus_enet_open(struct net_device *dev) +{ + struct vbus_enet_priv *priv = netdev_priv(dev); + int ret; + + ret = devcall(priv, VENET_FUNC_LINKUP, NULL, 0); + BUG_ON(ret < 0); + + napi_enable(&priv->napi); + + return 0; +} + +static int +vbus_enet_stop(struct net_device *dev) +{ + struct vbus_enet_priv *priv = netdev_priv(dev); + int ret; + + napi_disable(&priv->napi); + + ret = devcall(priv, VENET_FUNC_LINKDOWN, NULL, 0); + BUG_ON(ret < 0); + + return 0; +} + +/* + * Configuration changes (passed on by ifconfig) + */ +static int +vbus_enet_config(struct net_device *dev, struct ifmap *map) +{ + if (dev->flags & IFF_UP) /* can't act on a running interface */ + return -EBUSY; + + /* Don't allow changing the I/O address */ + if (map->base_addr != dev->base_addr) { + dev_warn(&dev->dev, "Can't change I/O address\n"); + return -EOPNOTSUPP; + } + + /* ignore other fields */ + return 0; +} + +static void +vbus_enet_schedule_rx(struct vbus_enet_priv *priv) +{ + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + + if (napi_schedule_prep(&priv->napi)) { + /* Disable further interrupts */ + ioq_notify_disable(priv->rxq.queue, 0); + __napi_schedule(&priv->napi); + } + + spin_unlock_irqrestore(&priv->lock, flags); +} + +static int +vbus_enet_change_mtu(struct net_device *dev, int new_mtu) +{ + struct vbus_enet_priv *priv = netdev_priv(dev); + int ret; + + dev->mtu = new_mtu; + + /* + * FLUSHRX will cause the device to flush any outstanding + * RX buffers. They will appear to come in as 0 length + * packets which we can simply discard and replace with new_mtu + * buffers for the future. + */ + ret = devcall(priv, VENET_FUNC_FLUSHRX, NULL, 0); + BUG_ON(ret < 0); + + vbus_enet_schedule_rx(priv); + + return 0; +} + +static struct sk_buff * +vbus_enet_l4ro_import(struct vbus_enet_priv *priv, struct ioq_ring_desc *desc) +{ + struct venet_sg *vsg = (struct venet_sg *)(unsigned long)desc->cookie; + struct sk_buff *skb = (struct sk_buff *)(unsigned long)vsg->cookie; + struct skb_shared_info *sinfo = skb_shinfo(skb); + int i; + + rx_pageq_refill(priv, GFP_ATOMIC); + + if (!vsg->len) + /* + * the device may send a zero-length packet when its + * flushing references on the ring. We can just drop + * these on the floor + */ + goto fail; + + /* advance only by the linear portion in IOV[0] */ + skb_put(skb, vsg->iov[0].len); + + /* skip i=0, since that is the skb->data IOV */ + for (i = 1; i < vsg->count; i++) { + struct venet_iov *iov = &vsg->iov[i]; + struct page *page = (struct page *)(unsigned long)iov->ptr; + skb_frag_t *f = &sinfo->frags[i-1]; + + f->page = page; + f->page_offset = 0; + f->size = iov->len; + + PDEBUG(priv->dev, "SG: Importing %d byte page[%i]\n", + f->size, i); + + skb->data_len += f->size; + skb->len += f->size; + skb->truesize += f->size; + sinfo->nr_frags++; + } + + if (vsg->flags & VENET_SG_FLAG_NEEDS_CSUM + && !skb_partial_csum_set(skb, vsg->csum.start, + vsg->csum.offset)) { + priv->dev->stats.rx_frame_errors++; + goto fail; + } + + if (vsg->flags & VENET_SG_FLAG_GSO) { + PDEBUG(priv->dev, "L4RO packet detected\n"); + + switch (vsg->gso.type) { + case VENET_GSO_TYPE_TCPV4: + sinfo->gso_type = SKB_GSO_TCPV4; + break; + case VENET_GSO_TYPE_TCPV6: + sinfo->gso_type = SKB_GSO_TCPV6; + break; + case VENET_GSO_TYPE_UDP: + sinfo->gso_type = SKB_GSO_UDP; + break; + default: + PDEBUG(priv->dev, "Illegal L4RO type: %d\n", + vsg->gso.type); + priv->dev->stats.rx_frame_errors++; + goto fail; + } + + if (vsg->flags & VENET_SG_FLAG_ECN) + sinfo->gso_type |= SKB_GSO_TCP_ECN; + + sinfo->gso_size = vsg->gso.size; + if (sinfo->gso_size == 0) { + PDEBUG(priv->dev, "Illegal L4RO size: %d\n", + vsg->gso.size); + priv->dev->stats.rx_frame_errors++; + goto fail; + } + + /* + * Header must be checked, and gso_segs + * computed. + */ + sinfo->gso_type |= SKB_GSO_DODGY; + sinfo->gso_segs = 0; + } + + return skb; + +fail: + dev_kfree_skb(skb); + + return NULL; +} + +static struct sk_buff * +vbus_enet_flat_import(struct vbus_enet_priv *priv, struct ioq_ring_desc *desc) +{ + struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->cookie; + + if (!desc->len) { + /* + * the device may send a zero-length packet when its + * flushing references on the ring. We can just drop + * these on the floor + */ + dev_kfree_skb(skb); + return NULL; + } + + skb_put(skb, le64_to_cpu(desc->len)); + + return skb; +} + +/* + * The poll implementation. + */ +static int +vbus_enet_poll(struct napi_struct *napi, int budget) +{ + struct vbus_enet_priv *priv = napi_to_priv(napi); + int npackets = 0; + struct ioq_iterator iter; + int ret; + + PDEBUG(priv->dev, "polling...\n"); + + /* We want to iterate on the head of the in-use index */ + ret = ioq_iter_init(priv->rxq.queue, &iter, ioq_idxtype_inuse, + IOQ_ITER_AUTOUPDATE); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); + BUG_ON(ret < 0); + + /* + * We stop if we have met the quota or there are no more packets. + * The EOM is indicated by finding a packet that is still owned by + * the south side + */ + while ((npackets < budget) && (!iter.desc->sown)) { + struct sk_buff *skb; + + skb = priv->import(priv, iter.desc); + if (skb) { + /* Maintain stats */ + npackets++; + priv->dev->stats.rx_packets++; + priv->dev->stats.rx_bytes += skb->len; + + /* Pass the buffer up to the stack */ + skb->dev = priv->dev; + skb->protocol = eth_type_trans(skb, priv->dev); + netif_receive_skb(skb); + + mb(); + } + + /* Grab a new buffer to put in the ring */ + rxdesc_alloc(priv, iter.desc, priv->dev->mtu); + + /* Advance the in-use tail */ + ret = ioq_iter_pop(&iter, 0); + BUG_ON(ret < 0); + } + + PDEBUG(priv->dev, "%d packets received\n", npackets); + + /* + * If we processed all packets, we're done; tell the kernel and + * reenable ints + */ + if (ioq_empty(priv->rxq.queue, ioq_idxtype_inuse)) { + napi_complete(napi); + ioq_notify_enable(priv->rxq.queue, 0); + ret = 0; + } else + /* We couldn't process everything. */ + ret = 1; + + return ret; +} + +/* + * Transmit a packet (called by the kernel) + */ +static int +vbus_enet_tx_start(struct sk_buff *skb, struct net_device *dev) +{ + struct vbus_enet_priv *priv = netdev_priv(dev); + struct ioq_iterator iter; + int ret; + unsigned long flags; + + PDEBUG(priv->dev, "sending %d bytes\n", skb->len); + + spin_lock_irqsave(&priv->lock, flags); + + if (ioq_full(priv->tx.veq.queue, ioq_idxtype_valid)) { + /* + * We must flow-control the kernel by disabling the + * queue + */ + spin_unlock_irqrestore(&priv->lock, flags); + netif_stop_queue(dev); + dev_err(&priv->dev->dev, "tx on full queue bug\n"); + return 1; + } + + /* + * We want to iterate on the tail of both the "inuse" and "valid" index + * so we specify the "both" index + */ + ret = ioq_iter_init(priv->tx.veq.queue, &iter, ioq_idxtype_both, + IOQ_ITER_AUTOUPDATE); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0); + BUG_ON(ret < 0); + BUG_ON(iter.desc->sown); + + if (priv->sg) { + struct venet_sg *vsg = (struct venet_sg *)(unsigned long)iter.desc->cookie; + struct scatterlist sgl[MAX_SKB_FRAGS+1]; + struct scatterlist *sg; + int count, maxcount = ARRAY_SIZE(sgl); + + sg_init_table(sgl, maxcount); + + memset(vsg, 0, sizeof(*vsg)); + + vsg->cookie = (u64)(unsigned long)skb; + vsg->len = skb->len; + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + vsg->flags |= VENET_SG_FLAG_NEEDS_CSUM; + vsg->csum.start = skb->csum_start - skb_headroom(skb); + vsg->csum.offset = skb->csum_offset; + } + + if (skb_is_gso(skb)) { + struct skb_shared_info *sinfo = skb_shinfo(skb); + + vsg->flags |= VENET_SG_FLAG_GSO; + + vsg->gso.hdrlen = skb_headlen(skb); + vsg->gso.size = sinfo->gso_size; + if (sinfo->gso_type & SKB_GSO_TCPV4) + vsg->gso.type = VENET_GSO_TYPE_TCPV4; + else if (sinfo->gso_type & SKB_GSO_TCPV6) + vsg->gso.type = VENET_GSO_TYPE_TCPV6; + else if (sinfo->gso_type & SKB_GSO_UDP) + vsg->gso.type = VENET_GSO_TYPE_UDP; + else + panic("Virtual-Ethernet: unknown GSO type " \ + "0x%x\n", sinfo->gso_type); + + if (sinfo->gso_type & SKB_GSO_TCP_ECN) + vsg->flags |= VENET_SG_FLAG_ECN; + } + + count = skb_to_sgvec(skb, sgl, 0, skb->len); + + BUG_ON(count > maxcount); + + for (sg = &sgl[0]; sg; sg = sg_next(sg)) { + struct venet_iov *iov = &vsg->iov[vsg->count++]; + + iov->len = sg->length; + iov->ptr = (u64)sg_phys(sg); + } + + iter.desc->len = cpu_to_le64(VSG_DESC_SIZE(vsg->count)); + + } else { + /* + * non scatter-gather mode: simply put the skb right onto the + * ring. + */ + iter.desc->cookie = (u64)(unsigned long)skb; + iter.desc->len = cpu_to_le64(skb->len); + iter.desc->ptr = cpu_to_le64(__pa(skb->data)); + } + + iter.desc->valid = 1; + + priv->dev->stats.tx_packets++; + priv->dev->stats.tx_bytes += skb->len; + + skb_queue_tail(&priv->tx.outstanding, skb); + + /* + * This advances both indexes together implicitly, and then + * signals the south side to consume the packet + */ + ret = ioq_iter_push(&iter, 0); + BUG_ON(ret < 0); + + dev->trans_start = jiffies; /* save the timestamp */ + + if (ioq_full(priv->tx.veq.queue, ioq_idxtype_valid)) { + /* + * If the queue is congested, we must flow-control the kernel + */ + PDEBUG(priv->dev, "backpressure tx queue\n"); + netif_stop_queue(dev); + } + + spin_unlock_irqrestore(&priv->lock, flags); + + return 0; +} + +/* assumes priv->lock held */ +static void +vbus_enet_skb_complete(struct vbus_enet_priv *priv, struct sk_buff *skb) +{ + PDEBUG(priv->dev, "completed sending %d bytes\n", + skb->len); + + skb_unlink(skb, &priv->tx.outstanding); + dev_kfree_skb(skb); +} + +/* + * reclaim any outstanding completed tx packets + * + * assumes priv->lock held + */ +static struct sk_buff * +vbus_enet_tx_reap_one(struct vbus_enet_priv *priv) +{ + struct sk_buff *skb = NULL; + struct ioq_iterator iter; + unsigned long flags; + int ret; + + spin_lock_irqsave(&priv->lock, flags); + + /* + * We want to iterate on the head of the valid index, but we + * do not want the iter_pop (below) to flip the ownership, so + * we set the NOFLIPOWNER option + */ + ret = ioq_iter_init(priv->tx.veq.queue, &iter, ioq_idxtype_valid, + IOQ_ITER_NOFLIPOWNER); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); + BUG_ON(ret < 0); + + if (iter.desc->valid && !iter.desc->sown) { + + if (priv->sg) { + struct venet_sg *vsg; + + vsg = (struct venet_sg *)(unsigned long)iter.desc->cookie; + skb = (struct sk_buff *)(unsigned long)vsg->cookie; + } else + skb = (struct sk_buff *)(unsigned long)iter.desc->cookie; + + /* Reset the descriptor */ + iter.desc->valid = 0; + + /* Advance the valid-index head */ + ret = ioq_iter_pop(&iter, 0); + BUG_ON(ret < 0); + } + + /* + * If we were previously stopped due to flow control, restart the + * processing + */ + if (netif_queue_stopped(priv->dev) + && !ioq_full(priv->tx.veq.queue, ioq_idxtype_valid)) { + PDEBUG(priv->dev, "re-enabling tx queue\n"); + netif_wake_queue(priv->dev); + } + + spin_unlock_irqrestore(&priv->lock, flags); + + return skb; +} + +static void +vbus_enet_tx_reap(struct vbus_enet_priv *priv) +{ + struct sk_buff *skb; + + while ((skb = vbus_enet_tx_reap_one(priv))) { + if (!priv->evq.txc) + /* + * We are responsible for freeing the packet upon + * reap if TXC is not enabled + */ + vbus_enet_skb_complete(priv, skb); + } +} + +static void +vbus_enet_timeout(struct net_device *dev) +{ + struct vbus_enet_priv *priv = netdev_priv(dev); + + dev_dbg(&dev->dev, "Transmit timeout\n"); + + vbus_enet_tx_reap(priv); +} + +static void +rx_isr(struct ioq_notifier *notifier) +{ + struct vbus_enet_priv *priv; + struct net_device *dev; + + priv = container_of(notifier, struct vbus_enet_priv, rxq.notifier); + dev = priv->dev; + + if (!ioq_empty(priv->rxq.queue, ioq_idxtype_inuse)) + vbus_enet_schedule_rx(priv); +} + +static void +deferred_tx_isr(unsigned long data) +{ + struct vbus_enet_priv *priv = (struct vbus_enet_priv *)data; + + PDEBUG(priv->dev, "deferred_tx_isr\n"); + + vbus_enet_tx_reap(priv); + + ioq_notify_enable(priv->tx.veq.queue, 0); +} + +static void +tx_isr(struct ioq_notifier *notifier) +{ + struct vbus_enet_priv *priv; + + priv = container_of(notifier, struct vbus_enet_priv, tx.veq.notifier); + + PDEBUG(priv->dev, "tx_isr\n"); + + ioq_notify_disable(priv->tx.veq.queue, 0); + tasklet_schedule(&priv->tx.task); +} + +static void +evq_linkstate_event(struct vbus_enet_priv *priv, + struct venet_event_header *header) +{ + struct venet_event_linkstate *event = + (struct venet_event_linkstate *)header; + + switch (event->state) { + case 0: + netif_carrier_off(priv->dev); + break; + case 1: + netif_carrier_on(priv->dev); + break; + default: + break; + } +} + +static void +evq_txc_event(struct vbus_enet_priv *priv, + struct venet_event_header *header) +{ + struct venet_event_txc *event = + (struct venet_event_txc *)header; + + vbus_enet_tx_reap(priv); + + vbus_enet_skb_complete(priv, (struct sk_buff *)(unsigned long)event->cookie); +} + +static void +deferred_evq_isr(unsigned long data) +{ + struct vbus_enet_priv *priv = (struct vbus_enet_priv *)data; + int nevents = 0; + struct ioq_iterator iter; + int ret; + + PDEBUG(priv->dev, "evq: polling...\n"); + + /* We want to iterate on the head of the in-use index */ + ret = ioq_iter_init(priv->evq.veq.queue, &iter, ioq_idxtype_inuse, + IOQ_ITER_AUTOUPDATE); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); + BUG_ON(ret < 0); + + /* + * The EOM is indicated by finding a packet that is still owned by + * the south side + */ + while (!iter.desc->sown) { + struct venet_event_header *header; + + header = (struct venet_event_header *)(unsigned long)iter.desc->cookie; + + switch (header->id) { + case VENET_EVENT_LINKSTATE: + evq_linkstate_event(priv, header); + break; + case VENET_EVENT_TXC: + evq_txc_event(priv, header); + break; + default: + panic("venet: unexpected event id:%d of size %d\n", + header->id, header->size); + break; + } + + memset((void *)(unsigned long)iter.desc->cookie, 0, priv->evq.evsize); + + /* Advance the in-use tail */ + ret = ioq_iter_pop(&iter, 0); + BUG_ON(ret < 0); + + nevents++; + } + + PDEBUG(priv->dev, "%d events received\n", nevents); + + ioq_notify_enable(priv->evq.veq.queue, 0); +} + +static void +evq_isr(struct ioq_notifier *notifier) +{ + struct vbus_enet_priv *priv; + + priv = container_of(notifier, struct vbus_enet_priv, evq.veq.notifier); + + PDEBUG(priv->dev, "evq_isr\n"); + + ioq_notify_disable(priv->evq.veq.queue, 0); + tasklet_schedule(&priv->evq.task); +} + +static int +vbus_enet_sg_negcap(struct vbus_enet_priv *priv) +{ + struct net_device *dev = priv->dev; + struct venet_capabilities caps; + int ret; + + memset(&caps, 0, sizeof(caps)); + + if (sg_enabled) { + caps.gid = VENET_CAP_GROUP_SG; + caps.bits |= (VENET_CAP_SG|VENET_CAP_TSO4|VENET_CAP_TSO6 + |VENET_CAP_ECN|VENET_CAP_PMTD); + /* note: exclude UFO for now due to stack bug */ + } + + ret = devcall(priv, VENET_FUNC_NEGCAP, &caps, sizeof(caps)); + if (ret < 0) + return ret; + + if (caps.bits & VENET_CAP_SG) { + priv->sg = true; + + dev->features |= NETIF_F_SG|NETIF_F_HW_CSUM|NETIF_F_FRAGLIST; + + if (caps.bits & VENET_CAP_TSO4) + dev->features |= NETIF_F_TSO; + if (caps.bits & VENET_CAP_UFO) + dev->features |= NETIF_F_UFO; + if (caps.bits & VENET_CAP_TSO6) + dev->features |= NETIF_F_TSO6; + if (caps.bits & VENET_CAP_ECN) + dev->features |= NETIF_F_TSO_ECN; + + if (caps.bits & VENET_CAP_PMTD) + priv->pmtd.enabled = true; + } + + return 0; +} + +static int +vbus_enet_evq_negcap(struct vbus_enet_priv *priv, unsigned long count) +{ + struct venet_capabilities caps; + int ret; + + memset(&caps, 0, sizeof(caps)); + + caps.gid = VENET_CAP_GROUP_EVENTQ; + caps.bits |= VENET_CAP_EVQ_LINKSTATE; + caps.bits |= VENET_CAP_EVQ_TXC; + + ret = devcall(priv, VENET_FUNC_NEGCAP, &caps, sizeof(caps)); + if (ret < 0) + return ret; + + if (caps.bits) { + struct vbus_device_proxy *dev = priv->vdev; + struct venet_eventq_query query; + size_t poollen; + struct ioq_iterator iter; + char *pool; + int i; + + priv->evq.enabled = true; + + if (caps.bits & VENET_CAP_EVQ_LINKSTATE) { + /* + * We will assume there is no carrier until we get + * an event telling us otherwise + */ + netif_carrier_off(priv->dev); + priv->evq.linkstate = true; + } + + if (caps.bits & VENET_CAP_EVQ_TXC) + priv->evq.txc = true; + + memset(&query, 0, sizeof(query)); + + ret = devcall(priv, VENET_FUNC_EVQQUERY, &query, sizeof(query)); + if (ret < 0) + return ret; + + priv->evq.evsize = query.evsize; + poollen = query.evsize * count; + + pool = kzalloc(poollen, GFP_KERNEL | GFP_DMA); + if (!pool) + return -ENOMEM; + + priv->evq.pool = pool; + + ret = dev->ops->shm(dev, NULL, query.dpid, 0, + pool, poollen, NULL, NULL, 0); + if (ret < 0) + return ret; + + queue_init(priv, &priv->evq.veq, "evq", + query.qid, count, evq_isr); + + ret = ioq_iter_init(priv->evq.veq.queue, + &iter, ioq_idxtype_valid, 0); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_set, 0, 0); + BUG_ON(ret < 0); + + /* Now populate each descriptor with an empty event */ + for (i = 0; i < count; i++) { + size_t offset = (i * query.evsize); + void *addr = &priv->evq.pool[offset]; + + iter.desc->ptr = cpu_to_le64(offset); + iter.desc->cookie = (u64)(unsigned long)addr; + iter.desc->len = cpu_to_le64(query.evsize); + + ret = ioq_iter_push(&iter, 0); + BUG_ON(ret < 0); + } + + /* Finally, enable interrupts */ + tasklet_init(&priv->evq.task, deferred_evq_isr, + (unsigned long)priv); + ioq_notify_enable(priv->evq.veq.queue, 0); + } + + return 0; +} + +static int +vbus_enet_l4ro_negcap(struct vbus_enet_priv *priv, unsigned long count) +{ + struct venet_capabilities caps; + int ret; + + memset(&caps, 0, sizeof(caps)); + + caps.gid = VENET_CAP_GROUP_L4RO; + caps.bits |= (VENET_CAP_SG|VENET_CAP_TSO4|VENET_CAP_TSO6 + |VENET_CAP_ECN); + + ret = devcall(priv, VENET_FUNC_NEGCAP, &caps, sizeof(caps)); + if (ret < 0) { + printk(KERN_ERR "Error negotiating L4RO: %d\n", ret); + return ret; + } + + if (caps.bits & VENET_CAP_SG) { + struct vbus_device_proxy *dev = priv->vdev; + size_t poollen = SG_DESC_SIZE * count; + struct venet_l4ro_query query; + char *pool; + + memset(&query, 0, sizeof(query)); + + ret = devcall(priv, VENET_FUNC_L4ROQUERY, &query, sizeof(query)); + if (ret < 0) { + printk(KERN_ERR "Error querying L4RO: %d\n", ret); + return ret; + } + + pool = kzalloc(poollen, GFP_KERNEL | GFP_DMA); + if (!pool) + return -ENOMEM; + + /* + * pre-mapped descriptor pool + */ + ret = dev->ops->shm(dev, NULL, query.dpid, 0, + pool, poollen, NULL, NULL, 0); + if (ret < 0) { + printk(KERN_ERR "Error registering L4RO pool: %d\n", + ret); + kfree(pool); + return ret; + } + + /* + * page-queue: contains a ring of arbitrary pages for + * consumption by the host for when the SG::IOV count exceeds + * one MTU frame. All we need to do is keep it populated + * with free pages. + */ + queue_init(priv, &priv->l4ro.pageq, "pageq", query.pqid, + count, NULL); + + priv->l4ro.pool = pool; + priv->l4ro.available = true; + } + + return 0; +} + +static int +vbus_enet_negcap(struct vbus_enet_priv *priv) +{ + int ret; + + ret = vbus_enet_sg_negcap(priv); + if (ret < 0) + return ret; + + ret = vbus_enet_evq_negcap(priv, tx_ringlen); + if (ret < 0) + return ret; + + ret = vbus_enet_l4ro_negcap(priv, rx_ringlen); + if (ret < 0) + return ret; + + return 0; +} + +static int vbus_enet_set_tx_csum(struct net_device *dev, u32 data) +{ + struct vbus_enet_priv *priv = netdev_priv(dev); + + if (data && !priv->sg) + return -ENOSYS; + + return ethtool_op_set_tx_hw_csum(dev, data); +} + +static struct ethtool_ops vbus_enet_ethtool_ops = { + .set_tx_csum = vbus_enet_set_tx_csum, + .set_sg = ethtool_op_set_sg, + .set_tso = ethtool_op_set_tso, + .get_link = ethtool_op_get_link, +}; + +static const struct net_device_ops vbus_enet_netdev_ops = { + .ndo_open = vbus_enet_open, + .ndo_stop = vbus_enet_stop, + .ndo_set_config = vbus_enet_config, + .ndo_start_xmit = vbus_enet_tx_start, + .ndo_change_mtu = vbus_enet_change_mtu, + .ndo_tx_timeout = vbus_enet_timeout, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, +}; + +/* + * This is called whenever a new vbus_device_proxy is added to the vbus + * with the matching VENET_ID + */ +static int +vbus_enet_probe(struct vbus_device_proxy *vdev) +{ + struct net_device *dev; + struct vbus_enet_priv *priv; + int ret; + + printk(KERN_INFO "VENET: Found new device at %lld\n", vdev->id); + + ret = vdev->ops->open(vdev, VENET_VERSION, 0); + if (ret < 0) + return ret; + + dev = alloc_etherdev(sizeof(struct vbus_enet_priv)); + if (!dev) + return -ENOMEM; + + /* + * establish our device-name early so we can incorporate it into + * the signal-path names, etc + */ + rtnl_lock(); + + ret = dev_alloc_name(dev, dev->name); + if (ret < 0) + goto out_free; + + priv = netdev_priv(dev); + + spin_lock_init(&priv->lock); + priv->dev = dev; + priv->vdev = vdev; + + ret = vbus_enet_negcap(priv); + if (ret < 0) { + printk(KERN_INFO "VENET: Error negotiating capabilities for " \ + "%lld\n", + priv->vdev->id); + goto out_free; + } + + if (priv->l4ro.available) + priv->import = &vbus_enet_l4ro_import; + else + priv->import = &vbus_enet_flat_import; + + skb_queue_head_init(&priv->tx.outstanding); + + queue_init(priv, &priv->rxq, "rx", VENET_QUEUE_RX, rx_ringlen, + rx_isr); + queue_init(priv, &priv->tx.veq, "tx", VENET_QUEUE_TX, tx_ringlen, + tx_isr); + + rx_setup(priv); + tx_setup(priv); + + ioq_notify_enable(priv->rxq.queue, 0); /* enable rx interrupts */ + + if (!priv->evq.txc) { + /* + * If the TXC feature is present, we will recieve our + * tx-complete notification via the event-channel. Therefore, + * we only enable txq interrupts if the TXC feature is not + * present. + */ + tasklet_init(&priv->tx.task, deferred_tx_isr, + (unsigned long)priv); + ioq_notify_enable(priv->tx.veq.queue, 0); + } + + dev->netdev_ops = &vbus_enet_netdev_ops; + dev->watchdog_timeo = 5 * HZ; + SET_ETHTOOL_OPS(dev, &vbus_enet_ethtool_ops); + SET_NETDEV_DEV(dev, &vdev->dev); + + netif_napi_add(dev, &priv->napi, vbus_enet_poll, 128); + + ret = devcall(priv, VENET_FUNC_MACQUERY, priv->dev->dev_addr, ETH_ALEN); + if (ret < 0) { + printk(KERN_INFO "VENET: Error obtaining MAC address for " \ + "%lld\n", + priv->vdev->id); + goto out_free; + } + + dev->features |= NETIF_F_HIGHDMA; + + ret = register_netdevice(dev); + if (ret < 0) { + printk(KERN_INFO "VENET: error %i registering device \"%s\"\n", + ret, dev->name); + goto out_free; + } + + rtnl_unlock(); + + vdev->priv = priv; + + return 0; + + out_free: + rtnl_unlock(); + + free_netdev(dev); + + return ret; +} + +static int +vbus_enet_remove(struct vbus_device_proxy *vdev) +{ + struct vbus_enet_priv *priv = (struct vbus_enet_priv *)vdev->priv; + struct vbus_device_proxy *dev = priv->vdev; + + unregister_netdev(priv->dev); + napi_disable(&priv->napi); + + rx_teardown(priv); + ioq_put(priv->rxq.queue); + + tx_teardown(priv); + ioq_put(priv->tx.veq.queue); + + if (priv->evq.enabled) + evq_teardown(priv); + + dev->ops->close(dev, 0); + + free_netdev(priv->dev); + + return 0; +} + +/* + * Finally, the module stuff + */ + +static struct vbus_driver_ops vbus_enet_driver_ops = { + .probe = vbus_enet_probe, + .remove = vbus_enet_remove, +}; + +static struct vbus_driver vbus_enet_driver = { + .type = VENET_TYPE, + .owner = THIS_MODULE, + .ops = &vbus_enet_driver_ops, +}; + +static __init int +vbus_enet_init_module(void) +{ + printk(KERN_INFO "Virtual Ethernet: Copyright (C) 2009 Novell, Gregory Haskins\n"); + printk(KERN_DEBUG "VENET: Using %d/%d queue depth\n", + rx_ringlen, tx_ringlen); + return vbus_driver_register(&vbus_enet_driver); +} + +static __exit void +vbus_enet_cleanup(void) +{ + vbus_driver_unregister(&vbus_enet_driver); +} + +module_init(vbus_enet_init_module); +module_exit(vbus_enet_cleanup); + +VBUS_DRIVER_AUTOPROBE(VENET_TYPE); diff --git a/drivers/vbus/Kconfig b/drivers/vbus/Kconfig new file mode 100644 index 000000000000..f51cba10913e --- /dev/null +++ b/drivers/vbus/Kconfig @@ -0,0 +1,25 @@ +# +# Virtual-Bus (VBus) driver configuration +# + +config VBUS_PROXY + bool "Virtual-Bus support" + select SHM_SIGNAL + select IOQ + default n + help + Adds support for a virtual-bus model drivers in a guest to connect + to host side virtual-bus resources. If you are using this kernel + in a virtualization solution which implements virtual-bus devices + on the backend, say Y. If unsure, say N. + +config VBUS_PCIBRIDGE + bool "PCI to Virtual-Bus bridge" + depends on PCI + depends on VBUS_PROXY + select IOQ + default n + help + Provides a way to bridge host side vbus devices via a PCI-BRIDGE + object. If you are running virtualization with vbus devices on the + host, and the vbus is exposed via PCI, say Y. Otherwise, say N. diff --git a/drivers/vbus/Makefile b/drivers/vbus/Makefile new file mode 100644 index 000000000000..944b7f1fec90 --- /dev/null +++ b/drivers/vbus/Makefile @@ -0,0 +1,6 @@ + +vbus-proxy-objs += bus-proxy.o +obj-$(CONFIG_VBUS_PROXY) += vbus-proxy.o + +vbus-pcibridge-objs += pci-bridge.o +obj-$(CONFIG_VBUS_PCIBRIDGE) += vbus-pcibridge.o diff --git a/drivers/vbus/bus-proxy.c b/drivers/vbus/bus-proxy.c new file mode 100644 index 000000000000..ae11f679d34e --- /dev/null +++ b/drivers/vbus/bus-proxy.c @@ -0,0 +1,248 @@ +/* + * Copyright 2009 Novell. All Rights Reserved. + * + * Author: + * Gregory Haskins <ghaskins@novell.com> + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/vbus_driver.h> + +MODULE_AUTHOR("Gregory Haskins"); +MODULE_LICENSE("GPL"); + +#define VBUS_PROXY_NAME "vbus-proxy" + +static struct vbus_device_proxy *to_dev(struct device *_dev) +{ + return _dev ? container_of(_dev, struct vbus_device_proxy, dev) : NULL; +} + +static struct vbus_driver *to_drv(struct device_driver *_drv) +{ + return container_of(_drv, struct vbus_driver, drv); +} + +/* + * This function is invoked whenever a new driver and/or device is added + * to check if there is a match + */ +static int vbus_dev_proxy_match(struct device *_dev, struct device_driver *_drv) +{ + struct vbus_device_proxy *dev = to_dev(_dev); + struct vbus_driver *drv = to_drv(_drv); + + return !strcmp(dev->type, drv->type); +} + +static int vbus_dev_proxy_uevent(struct device *_dev, struct kobj_uevent_env *env) +{ + struct vbus_device_proxy *dev = to_dev(_dev); + + if (add_uevent_var(env, "MODALIAS=vbus-proxy:%s", dev->type)) + return -ENOMEM; + + return 0; +} + +/* + * This function is invoked after the bus infrastructure has already made a + * match. The device will contain a reference to the paired driver which + * we will extract. + */ +static int vbus_dev_proxy_probe(struct device *_dev) +{ + int ret = 0; + struct vbus_device_proxy *dev = to_dev(_dev); + struct vbus_driver *drv = to_drv(_dev->driver); + + if (drv->ops->probe) + ret = drv->ops->probe(dev); + + return ret; +} + +static struct bus_type vbus_proxy = { + .name = VBUS_PROXY_NAME, + .match = vbus_dev_proxy_match, + .uevent = vbus_dev_proxy_uevent, +}; + +static struct device vbus_proxy_rootdev = { + .parent = NULL, + .init_name = VBUS_PROXY_NAME, +}; + +static int __init vbus_init(void) +{ + int ret; + + ret = bus_register(&vbus_proxy); + BUG_ON(ret < 0); + + ret = device_register(&vbus_proxy_rootdev); + BUG_ON(ret < 0); + + return 0; +} + +postcore_initcall(vbus_init); + +static void device_release(struct device *dev) +{ + struct vbus_device_proxy *_dev; + + _dev = container_of(dev, struct vbus_device_proxy, dev); + + _dev->ops->release(_dev); +} + +static ssize_t _show_modalias(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "vbus-proxy:%s\n", to_dev(dev)->type); +} +static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, _show_modalias, NULL); + +int vbus_device_proxy_register(struct vbus_device_proxy *new) +{ + int ret; + + new->dev.parent = &vbus_proxy_rootdev; + new->dev.bus = &vbus_proxy; + new->dev.release = &device_release; + + ret = device_register(&new->dev); + if (ret < 0) + return ret; + + ret = device_create_file(&new->dev, &dev_attr_modalias); + if (ret < 0) { + device_unregister(&new->dev); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(vbus_device_proxy_register); + +void vbus_device_proxy_unregister(struct vbus_device_proxy *dev) +{ + device_remove_file(&dev->dev, &dev_attr_modalias); + device_unregister(&dev->dev); +} +EXPORT_SYMBOL_GPL(vbus_device_proxy_unregister); + +static int match_device_id(struct device *_dev, void *data) +{ + struct vbus_device_proxy *dev = to_dev(_dev); + u64 id = *(u64 *)data; + + return dev->id == id; +} + +struct vbus_device_proxy *vbus_device_proxy_find(u64 id) +{ + struct device *dev; + + dev = bus_find_device(&vbus_proxy, NULL, &id, &match_device_id); + + return to_dev(dev); +} +EXPORT_SYMBOL_GPL(vbus_device_proxy_find); + +int vbus_driver_register(struct vbus_driver *new) +{ + new->drv.bus = &vbus_proxy; + new->drv.name = new->type; + new->drv.owner = new->owner; + new->drv.probe = vbus_dev_proxy_probe; + + return driver_register(&new->drv); +} +EXPORT_SYMBOL_GPL(vbus_driver_register); + +void vbus_driver_unregister(struct vbus_driver *drv) +{ + driver_unregister(&drv->drv); +} +EXPORT_SYMBOL_GPL(vbus_driver_unregister); + +/* + *--------------------------------- + * driver-side IOQ helper + *--------------------------------- + */ +static void +vbus_driver_ioq_release(struct ioq *ioq) +{ + kfree(ioq->head_desc); + kfree(ioq); +} + +static struct ioq_ops vbus_driver_ioq_ops = { + .release = vbus_driver_ioq_release, +}; + + +int vbus_driver_ioq_alloc(struct vbus_device_proxy *dev, const char *name, + int id, int prio, size_t count, struct ioq **ioq) +{ + struct ioq *_ioq; + struct ioq_ring_head *head = NULL; + struct shm_signal *signal = NULL; + size_t len = IOQ_HEAD_DESC_SIZE(count); + int ret = -ENOMEM; + + _ioq = kzalloc(sizeof(*_ioq), GFP_KERNEL); + if (!_ioq) + goto error; + + head = kzalloc(len, GFP_KERNEL | GFP_DMA); + if (!head) + goto error; + + head->magic = IOQ_RING_MAGIC; + head->ver = IOQ_RING_VER; + head->count = cpu_to_le32(count); + + ret = dev->ops->shm(dev, name, id, prio, head, len, + &head->signal, &signal, 0); + if (ret < 0) + goto error; + + ioq_init(_ioq, + &vbus_driver_ioq_ops, + ioq_locality_north, + head, + signal, + count); + + *ioq = _ioq; + + return 0; + + error: + kfree(_ioq); + kfree(head); + + if (signal) + shm_signal_put(signal); + + return ret; +} +EXPORT_SYMBOL_GPL(vbus_driver_ioq_alloc); diff --git a/drivers/vbus/pci-bridge.c b/drivers/vbus/pci-bridge.c new file mode 100644 index 000000000000..36de7c48891c --- /dev/null +++ b/drivers/vbus/pci-bridge.c @@ -0,0 +1,1016 @@ +/* + * Copyright (C) 2009 Novell. All Rights Reserved. + * + * Author: + * Gregory Haskins <ghaskins@novell.com> + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/mm.h> +#include <linux/workqueue.h> +#include <linux/slab.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/ioq.h> +#include <linux/interrupt.h> +#include <linux/vbus_driver.h> +#include <linux/vbus_pci.h> + +MODULE_AUTHOR("Gregory Haskins"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1"); + +#define VBUS_PCI_NAME "pci-to-vbus-bridge" + +struct vbus_pci { + spinlock_t lock; + struct pci_dev *dev; + struct ioq eventq; + struct vbus_pci_event *ring; + struct vbus_pci_regs *regs; + struct vbus_pci_signals *signals; + int irq; + bool enabled; + struct { + struct dentry *fs; + int events; + int qnotify; + int qinject; + int notify; + int inject; + int bridgecalls; + int buscalls; + } stats; +}; + +static struct vbus_pci vbus_pci; + +struct vbus_pci_device { + char type[VBUS_MAX_DEVTYPE_LEN]; + u64 handle; + struct list_head shms; + struct vbus_device_proxy vdev; + struct work_struct drop; +}; + +static DEFINE_PER_CPU(struct vbus_pci_fastcall_desc, vbus_pci_percpu_fastcall) +____cacheline_aligned; + +/* + * ------------------- + * common routines + * ------------------- + */ + +static int +vbus_pci_bridgecall(unsigned long nr, void *data, unsigned long len) +{ + struct vbus_pci_call_desc params = { + .vector = nr, + .len = len, + .datap = __pa(data), + }; + unsigned long flags; + int ret; + + spin_lock_irqsave(&vbus_pci.lock, flags); + + memcpy_toio(&vbus_pci.regs->bridgecall, ¶ms, sizeof(params)); + ret = ioread32(&vbus_pci.regs->bridgecall); + + spin_unlock_irqrestore(&vbus_pci.lock, flags); + + vbus_pci.stats.bridgecalls++; + + return ret; +} + +static int +vbus_pci_buscall(unsigned long nr, void *data, unsigned long len) +{ + struct vbus_pci_fastcall_desc *params; + int ret; + + preempt_disable(); + + params = &get_cpu_var(vbus_pci_percpu_fastcall); + + params->call.vector = nr; + params->call.len = len; + params->call.datap = __pa(data); + + iowrite32(smp_processor_id(), &vbus_pci.signals->fastcall); + + ret = params->result; + + preempt_enable(); + + vbus_pci.stats.buscalls++; + + return ret; +} + +static struct vbus_pci_device * +to_dev(struct vbus_device_proxy *vdev) +{ + return container_of(vdev, struct vbus_pci_device, vdev); +} + +static void +_signal_init(struct shm_signal *signal, struct shm_signal_desc *desc, + struct shm_signal_ops *ops) +{ + desc->magic = SHM_SIGNAL_MAGIC; + desc->ver = SHM_SIGNAL_VER; + + shm_signal_init(signal, shm_locality_north, ops, desc); +} + +/* + * ------------------- + * _signal + * ------------------- + */ + +struct _signal { + char name[64]; + struct vbus_pci *pcivbus; + struct shm_signal signal; + u32 handle; + struct rb_node node; + struct list_head list; + int irq; + struct irq_desc *desc; +}; + +static struct _signal * +to_signal(struct shm_signal *signal) +{ + return container_of(signal, struct _signal, signal); +} + +static int +_signal_inject(struct shm_signal *signal) +{ + struct _signal *_signal = to_signal(signal); + + vbus_pci.stats.inject++; + iowrite32(_signal->handle, &vbus_pci.signals->shmsignal); + + return 0; +} + +static void +_signal_release(struct shm_signal *signal) +{ + struct _signal *_signal = to_signal(signal); + + kfree(_signal); +} + +static struct shm_signal_ops _signal_ops = { + .inject = _signal_inject, + .release = _signal_release, +}; + +static void shmsignal_disconnect(struct _signal *_signal); + +/* + * ------------------- + * vbus_device_proxy routines + * ------------------- + */ + +static int +vbus_pci_device_open(struct vbus_device_proxy *vdev, int version, int flags) +{ + struct vbus_pci_device *dev = to_dev(vdev); + struct vbus_pci_deviceopen params; + int ret; + + if (dev->handle) + return -EINVAL; + + params.devid = vdev->id; + params.version = version; + + ret = vbus_pci_buscall(VBUS_PCI_HC_DEVOPEN, + ¶ms, sizeof(params)); + if (ret < 0) + return ret; + + dev->handle = params.handle; + + return 0; +} + +static int +vbus_pci_device_close(struct vbus_device_proxy *vdev, int flags) +{ + struct vbus_pci_device *dev = to_dev(vdev); + unsigned long iflags; + int ret; + + if (!dev->handle) + return -EINVAL; + + spin_lock_irqsave(&vbus_pci.lock, iflags); + + while (!list_empty(&dev->shms)) { + struct _signal *_signal; + + _signal = list_first_entry(&dev->shms, struct _signal, list); + + list_del(&_signal->list); + shmsignal_disconnect(_signal); + + spin_unlock_irqrestore(&vbus_pci.lock, iflags); + shm_signal_put(&_signal->signal); + spin_lock_irqsave(&vbus_pci.lock, iflags); + } + + spin_unlock_irqrestore(&vbus_pci.lock, iflags); + + /* + * The DEVICECLOSE will implicitly close all of the shm on the + * host-side, so there is no need to do an explicit per-shm + * hypercall + */ + ret = vbus_pci_buscall(VBUS_PCI_HC_DEVCLOSE, + &dev->handle, sizeof(dev->handle)); + + if (ret < 0) + printk(KERN_ERR "VBUS-PCI: Error closing device %s/%lld: %d\n", + vdev->type, vdev->id, ret); + + dev->handle = 0; + + return 0; +} + +/* + * ------------------- + * shmsignal interrupt routines + * ------------------- + */ + +/* We abstract these routines so that we can drop in irqchip later */ + +static void +shmsignal_wakeup(struct _signal *_signal) +{ + _shm_signal_wakeup(&_signal->signal); +} + +static int +shmsignal_connect(struct _signal *_signal) +{ + return 0; +} + +static void +shmsignal_disconnect(struct _signal *_signal) +{ + +} + +static int +vbus_pci_device_shm(struct vbus_device_proxy *vdev, const char *name, + int id, int prio, + void *ptr, size_t len, + struct shm_signal_desc *sdesc, struct shm_signal **signal, + int flags) +{ + struct vbus_pci_device *dev = to_dev(vdev); + struct _signal *_signal = NULL; + struct vbus_pci_deviceshm params; + unsigned long iflags; + int ret; + + if (!dev->handle) + return -EINVAL; + + params.devh = dev->handle; + params.id = id; + params.flags = flags; + params.datap = (u64)__pa(ptr); + params.len = len; + + if (signal) { + /* + * The signal descriptor must be embedded within the + * provided ptr + */ + if (!sdesc + || (len < sizeof(*sdesc)) + || ((void *)sdesc < ptr) + || ((void *)sdesc > (ptr + len - sizeof(*sdesc)))) + return -EINVAL; + + _signal = kzalloc(sizeof(*_signal), GFP_KERNEL); + if (!_signal) + return -ENOMEM; + + _signal_init(&_signal->signal, sdesc, &_signal_ops); + + /* + * take another reference for the host. This is dropped + * by a SHMCLOSE event + */ + shm_signal_get(&_signal->signal); + + params.signal.offset = (u64)(unsigned long)sdesc - + (u64)(unsigned long)ptr; + params.signal.prio = prio; + params.signal.cookie = (u64)(unsigned long)_signal; + + } else + params.signal.offset = -1; /* yes, this is a u32, but its ok */ + + ret = vbus_pci_buscall(VBUS_PCI_HC_DEVSHM, + ¶ms, sizeof(params)); + if (ret < 0) + goto fail; + + if (signal) { + + BUG_ON(ret < 0); + + _signal->handle = ret; + + if (!name) + snprintf(_signal->name, sizeof(_signal->name), + "dev%lld-id%d", vdev->id, id); + else + snprintf(_signal->name, sizeof(_signal->name), + "%s", name); + + shmsignal_connect(_signal); + + spin_lock_irqsave(&vbus_pci.lock, iflags); + list_add_tail(&_signal->list, &dev->shms); + spin_unlock_irqrestore(&vbus_pci.lock, iflags); + + shm_signal_get(&_signal->signal); + *signal = &_signal->signal; + } + + return 0; + +fail: + if (_signal) { + /* + * We held two references above, so we need to drop + * both of them + */ + shm_signal_put(&_signal->signal); + shm_signal_put(&_signal->signal); + } + + return ret; +} + +static int +vbus_pci_device_call(struct vbus_device_proxy *vdev, u32 func, void *data, + size_t len, int flags) +{ + struct vbus_pci_device *dev = to_dev(vdev); + struct vbus_pci_devicecall params = { + .devh = dev->handle, + .func = func, + .datap = (u64)__pa(data), + .len = len, + .flags = flags, + }; + + if (!dev->handle) + return -EINVAL; + + return vbus_pci_buscall(VBUS_PCI_HC_DEVCALL, ¶ms, sizeof(params)); +} + +static void +vbus_pci_device_release(struct vbus_device_proxy *vdev) +{ + struct vbus_pci_device *_dev = to_dev(vdev); + + vbus_pci_device_close(vdev, 0); + + kfree(_dev); +} + +static struct vbus_device_proxy_ops vbus_pci_device_ops = { + .open = vbus_pci_device_open, + .close = vbus_pci_device_close, + .shm = vbus_pci_device_shm, + .call = vbus_pci_device_call, + .release = vbus_pci_device_release, +}; + +/* + * ------------------- + * vbus events + * ------------------- + */ + +struct deferred_devadd_event { + struct work_struct work; + struct vbus_pci_add_event event; +}; + +static void deferred_devdrop(struct work_struct *work); + +static void +deferred_devadd(struct work_struct *work) +{ + struct deferred_devadd_event *_event; + struct vbus_pci_device *new; + int ret; + + _event = container_of(work, struct deferred_devadd_event, work); + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) { + printk(KERN_ERR "VBUS_PCI: Out of memory on add_event\n"); + return; + } + + INIT_LIST_HEAD(&new->shms); + + memcpy(new->type, _event->event.type, VBUS_MAX_DEVTYPE_LEN); + new->vdev.type = new->type; + new->vdev.id = _event->event.id; + new->vdev.ops = &vbus_pci_device_ops; + + dev_set_name(&new->vdev.dev, "%lld", _event->event.id); + + INIT_WORK(&new->drop, deferred_devdrop); + + ret = vbus_device_proxy_register(&new->vdev); + if (ret < 0) + panic("failed to register device %lld(%s): %d\n", + new->vdev.id, new->type, ret); + + kfree(_event); +} + +static void +deferred_devdrop(struct work_struct *work) +{ + struct vbus_pci_device *dev; + + dev = container_of(work, struct vbus_pci_device, drop); + vbus_device_proxy_unregister(&dev->vdev); +} + +static void +event_devadd(struct vbus_pci_add_event *event) +{ + struct deferred_devadd_event *_event; + + _event = kzalloc(sizeof(*_event), GFP_ATOMIC); + if (!_event) { + printk(KERN_ERR \ + "VBUS_PCI: Out of ATOMIC memory on add_event\n"); + return; + } + + INIT_WORK(&_event->work, deferred_devadd); + memcpy(&_event->event, event, sizeof(*event)); + + schedule_work(&_event->work); +} + +static void +event_devdrop(struct vbus_pci_handle_event *event) +{ + struct vbus_device_proxy *dev = vbus_device_proxy_find(event->handle); + + if (!dev) { + printk(KERN_WARNING "VBUS-PCI: devdrop failed: %lld\n", + event->handle); + return; + } + + schedule_work(&to_dev(dev)->drop); +} + +static void +event_shmsignal(struct vbus_pci_handle_event *event) +{ + struct _signal *_signal = (struct _signal *)(unsigned long)event->handle; + + vbus_pci.stats.notify++; + + shmsignal_wakeup(_signal); +} + +static void +event_shmclose(struct vbus_pci_handle_event *event) +{ + struct _signal *_signal = (struct _signal *)(unsigned long)event->handle; + + /* + * This reference was taken during the DEVICESHM call + */ + shm_signal_put(&_signal->signal); +} + +/* + * ------------------- + * eventq routines + * ------------------- + */ + +static struct ioq_notifier eventq_notifier; + +static int __devinit +eventq_init(int qlen) +{ + struct ioq_iterator iter; + int ret; + int i; + + vbus_pci.ring = kzalloc(sizeof(struct vbus_pci_event) * qlen, + GFP_KERNEL); + if (!vbus_pci.ring) + return -ENOMEM; + + /* + * We want to iterate on the "valid" index. By default the iterator + * will not "autoupdate" which means it will not hypercall the host + * with our changes. This is good, because we are really just + * initializing stuff here anyway. Note that you can always manually + * signal the host with ioq_signal() if the autoupdate feature is not + * used. + */ + ret = ioq_iter_init(&vbus_pci.eventq, &iter, ioq_idxtype_valid, 0); + BUG_ON(ret < 0); + + /* + * Seek to the tail of the valid index (which should be our first + * item since the queue is brand-new) + */ + ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0); + BUG_ON(ret < 0); + + /* + * Now populate each descriptor with an empty vbus_event and mark it + * valid + */ + for (i = 0; i < qlen; i++) { + struct vbus_pci_event *event = &vbus_pci.ring[i]; + size_t len = sizeof(*event); + struct ioq_ring_desc *desc = iter.desc; + + BUG_ON(iter.desc->valid); + + desc->cookie = (u64)(unsigned long)event; + desc->ptr = cpu_to_le64(__pa(event)); + desc->len = cpu_to_le64(len); /* total length */ + desc->valid = 1; + + /* + * This push operation will simultaneously advance the + * valid-tail index and increment our position in the queue + * by one. + */ + ret = ioq_iter_push(&iter, 0); + BUG_ON(ret < 0); + } + + vbus_pci.eventq.notifier = &eventq_notifier; + + /* + * And finally, ensure that we can receive notification + */ + ioq_notify_enable(&vbus_pci.eventq, 0); + + return 0; +} + +/* Invoked whenever the hypervisor ioq_signal()s our eventq */ +static void +eventq_wakeup(struct ioq_notifier *notifier) +{ + struct ioq_iterator iter; + int ret; + + /* We want to iterate on the head of the in-use index */ + ret = ioq_iter_init(&vbus_pci.eventq, &iter, ioq_idxtype_inuse, 0); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); + BUG_ON(ret < 0); + + /* + * The EOM is indicated by finding a packet that is still owned by + * the south side. + * + * FIXME: This in theory could run indefinitely if the host keeps + * feeding us events since there is nothing like a NAPI budget. We + * might need to address that + */ + while (!iter.desc->sown) { + struct ioq_ring_desc *desc = iter.desc; + struct vbus_pci_event *event; + + event = (struct vbus_pci_event *)(unsigned long)desc->cookie; + + switch (event->eventid) { + case VBUS_PCI_EVENT_DEVADD: + event_devadd(&event->data.add); + break; + case VBUS_PCI_EVENT_DEVDROP: + event_devdrop(&event->data.handle); + break; + case VBUS_PCI_EVENT_SHMSIGNAL: + event_shmsignal(&event->data.handle); + break; + case VBUS_PCI_EVENT_SHMCLOSE: + event_shmclose(&event->data.handle); + break; + default: + printk(KERN_WARNING "VBUS_PCI: Unexpected event %d\n", + event->eventid); + break; + }; + + memset(event, 0, sizeof(*event)); + + /* Advance the in-use head */ + ret = ioq_iter_pop(&iter, 0); + BUG_ON(ret < 0); + + vbus_pci.stats.events++; + } + + /* And let the south side know that we changed the queue */ + ioq_signal(&vbus_pci.eventq, 0); +} + +static struct ioq_notifier eventq_notifier = { + .signal = &eventq_wakeup, +}; + +/* Injected whenever the host issues an ioq_signal() on the eventq */ +static irqreturn_t +eventq_intr(int irq, void *dev) +{ + vbus_pci.stats.qnotify++; + _shm_signal_wakeup(vbus_pci.eventq.signal); + + return IRQ_HANDLED; +} + +/* + * ------------------- + */ + +static int +eventq_signal_inject(struct shm_signal *signal) +{ + vbus_pci.stats.qinject++; + + /* The eventq uses the special-case handle=0 */ + iowrite32(0, &vbus_pci.signals->eventq); + + return 0; +} + +static void +eventq_signal_release(struct shm_signal *signal) +{ + kfree(signal); +} + +static struct shm_signal_ops eventq_signal_ops = { + .inject = eventq_signal_inject, + .release = eventq_signal_release, +}; + +/* + * ------------------- + */ + +static void +eventq_ioq_release(struct ioq *ioq) +{ + /* released as part of the vbus_pci object */ +} + +static struct ioq_ops eventq_ioq_ops = { + .release = eventq_ioq_release, +}; + +/* + * ------------------- + */ + +static void +vbus_pci_release(void) +{ +#ifdef CONFIG_DEBUG_FS + if (vbus_pci.stats.fs) + debugfs_remove(vbus_pci.stats.fs); +#endif + + if (vbus_pci.irq > 0) + free_irq(vbus_pci.irq, NULL); + + if (vbus_pci.signals) + pci_iounmap(vbus_pci.dev, (void *)vbus_pci.signals); + + if (vbus_pci.regs) + pci_iounmap(vbus_pci.dev, (void *)vbus_pci.regs); + + pci_release_regions(vbus_pci.dev); + pci_disable_device(vbus_pci.dev); + + kfree(vbus_pci.eventq.head_desc); + kfree(vbus_pci.ring); + + vbus_pci.enabled = false; +} + +static int __devinit +vbus_pci_open(void) +{ + struct vbus_pci_bridge_negotiate params = { + .magic = VBUS_PCI_ABI_MAGIC, + .version = VBUS_PCI_HC_VERSION, + .capabilities = 0, + }; + + return vbus_pci_bridgecall(VBUS_PCI_BRIDGE_NEGOTIATE, + ¶ms, sizeof(params)); +} + +#define QLEN 1024 + +static int __devinit +vbus_pci_eventq_register(void) +{ + struct vbus_pci_busreg params = { + .count = 1, + .eventq = { + { + .count = QLEN, + .ring = (u64)__pa(vbus_pci.eventq.head_desc), + .data = (u64)__pa(vbus_pci.ring), + }, + }, + }; + + return vbus_pci_bridgecall(VBUS_PCI_BRIDGE_QREG, + ¶ms, sizeof(params)); +} + +static int __devinit +_ioq_init(size_t ringsize, struct ioq *ioq, struct ioq_ops *ops) +{ + struct shm_signal *signal = NULL; + struct ioq_ring_head *head = NULL; + size_t len = IOQ_HEAD_DESC_SIZE(ringsize); + + head = kzalloc(len, GFP_KERNEL | GFP_DMA); + if (!head) + return -ENOMEM; + + signal = kzalloc(sizeof(*signal), GFP_KERNEL); + if (!signal) { + kfree(head); + return -ENOMEM; + } + + head->magic = IOQ_RING_MAGIC; + head->ver = IOQ_RING_VER; + head->count = cpu_to_le32(ringsize); + + _signal_init(signal, &head->signal, &eventq_signal_ops); + + ioq_init(ioq, ops, ioq_locality_north, head, signal, ringsize); + + return 0; +} + +#ifdef CONFIG_DEBUG_FS +static int _debugfs_seq_show(struct seq_file *m, void *p) +{ +#define P(F) \ + seq_printf(m, " .%-30s: %d\n", #F, (int)vbus_pci.stats.F) + + P(events); + P(qnotify); + P(qinject); + P(notify); + P(inject); + P(bridgecalls); + P(buscalls); + +#undef P + + return 0; +} + +static int _debugfs_fops_open(struct inode *inode, struct file *file) +{ + return single_open(file, _debugfs_seq_show, inode->i_private); +} + +static const struct file_operations stat_fops = { + .open = _debugfs_fops_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; +#endif + +static int __devinit +vbus_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + int ret; + int cpu; + + if (vbus_pci.enabled) + return -EEXIST; /* we only support one bridge per kernel */ + + if (pdev->revision != VBUS_PCI_ABI_VERSION) { + printk(KERN_DEBUG "VBUS_PCI: expected ABI version %d, got %d\n", + VBUS_PCI_ABI_VERSION, + pdev->revision); + return -ENODEV; + } + + vbus_pci.dev = pdev; + + ret = pci_enable_device(pdev); + if (ret < 0) + return ret; + + pci_set_master(pdev); + + ret = pci_request_regions(pdev, VBUS_PCI_NAME); + if (ret < 0) { + printk(KERN_ERR "VBUS_PCI: Could not init BARs: %d\n", ret); + goto out_fail; + } + + vbus_pci.regs = pci_iomap(pdev, 0, sizeof(struct vbus_pci_regs)); + if (!vbus_pci.regs) { + printk(KERN_ERR "VBUS_PCI: Could not map BARs\n"); + goto out_fail; + } + + vbus_pci.signals = pci_iomap(pdev, 1, sizeof(struct vbus_pci_signals)); + if (!vbus_pci.signals) { + printk(KERN_ERR "VBUS_PCI: Could not map BARs\n"); + goto out_fail; + } + + ret = vbus_pci_open(); + if (ret < 0) { + printk(KERN_DEBUG "VBUS_PCI: Could not register with host: %d\n", + ret); + goto out_fail; + } + + /* + * Allocate an IOQ to use for host-2-guest event notification + */ + ret = _ioq_init(QLEN, &vbus_pci.eventq, &eventq_ioq_ops); + if (ret < 0) { + printk(KERN_ERR "VBUS_PCI: Cound not init eventq: %d\n", ret); + goto out_fail; + } + + ret = eventq_init(QLEN); + if (ret < 0) { + printk(KERN_ERR "VBUS_PCI: Cound not setup ring: %d\n", ret); + goto out_fail; + } + + ret = pci_enable_msi(pdev); + if (ret < 0) { + printk(KERN_ERR "VBUS_PCI: Cound not enable MSI: %d\n", ret); + goto out_fail; + } + + vbus_pci.irq = pdev->irq; + + ret = request_irq(pdev->irq, eventq_intr, 0, "vbus", NULL); + if (ret < 0) { + printk(KERN_ERR "VBUS_PCI: Failed to register IRQ %d\n: %d", + pdev->irq, ret); + goto out_fail; + } + + /* + * Add one fastcall vector per cpu so that we can do lockless + * hypercalls + */ + for_each_possible_cpu(cpu) { + struct vbus_pci_fastcall_desc *desc = + &per_cpu(vbus_pci_percpu_fastcall, cpu); + struct vbus_pci_call_desc params = { + .vector = cpu, + .len = sizeof(*desc), + .datap = __pa(desc), + }; + + ret = vbus_pci_bridgecall(VBUS_PCI_BRIDGE_FASTCALL_ADD, + ¶ms, sizeof(params)); + if (ret < 0) { + printk(KERN_ERR \ + "VBUS_PCI: Failed to register cpu:%d\n: %d", + cpu, ret); + goto out_fail; + } + } + + /* + * Finally register our queue on the host to start receiving events + */ + ret = vbus_pci_eventq_register(); + if (ret < 0) { + printk(KERN_ERR "VBUS_PCI: Could not register with host: %d\n", + ret); + goto out_fail; + } + +#ifdef CONFIG_DEBUG_FS + vbus_pci.stats.fs = debugfs_create_file(VBUS_PCI_NAME, S_IRUGO, + NULL, NULL, &stat_fops); + if (IS_ERR(vbus_pci.stats.fs)) { + ret = PTR_ERR(vbus_pci.stats.fs); + printk(KERN_ERR "VBUS_PCI: error creating stats-fs: %d\n", ret); + goto out_fail; + } +#endif + + vbus_pci.enabled = true; + + printk(KERN_INFO "Virtual-Bus: Copyright (c) 2009, " \ + "Gregory Haskins <ghaskins@novell.com>\n"); + + return 0; + + out_fail: + vbus_pci_release(); + + return ret; +} + +static void __devexit +vbus_pci_remove(struct pci_dev *pdev) +{ + vbus_pci_release(); +} + +static DEFINE_PCI_DEVICE_TABLE(vbus_pci_tbl) = { + { PCI_DEVICE(0x11da, 0x2000) }, + { 0 }, +}; + +MODULE_DEVICE_TABLE(pci, vbus_pci_tbl); + +static struct pci_driver vbus_pci_driver = { + .name = VBUS_PCI_NAME, + .id_table = vbus_pci_tbl, + .probe = vbus_pci_probe, + .remove = vbus_pci_remove, +}; + +static int __init +vbus_pci_init(void) +{ + memset(&vbus_pci, 0, sizeof(vbus_pci)); + spin_lock_init(&vbus_pci.lock); + + return pci_register_driver(&vbus_pci_driver); +} + +static void __exit +vbus_pci_exit(void) +{ + pci_unregister_driver(&vbus_pci_driver); +} + +module_init(vbus_pci_init); +module_exit(vbus_pci_exit); + |