/* * vbus_enet - A virtualized 802.x network device based on the VBUS interface * * Copyright (C) 2009 Novell, Gregory Haskins * * Derived from the SNULL example from the book "Linux Device Drivers" by * Alessandro Rubini, Jonathan Corbet, and Greg Kroah-Hartman, published * by O'Reilly & Associates. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_AUTHOR("Gregory Haskins"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("virtual-ethernet"); MODULE_VERSION("1"); static int rx_ringlen = 256; module_param(rx_ringlen, int, 0444); static int tx_ringlen = 256; module_param(tx_ringlen, int, 0444); static int sg_enabled = 1; module_param(sg_enabled, int, 0444); #define PDEBUG(_dev, fmt, args...) dev_dbg(&(_dev)->dev, fmt, ## args) #define SG_DESC_SIZE VSG_DESC_SIZE(MAX_SKB_FRAGS) struct vbus_enet_queue { struct ioq *queue; struct ioq_notifier notifier; unsigned long count; }; struct vbus_enet_priv { spinlock_t lock; struct net_device *dev; struct vbus_device_proxy *vdev; struct napi_struct napi; struct vbus_enet_queue rxq; struct { struct vbus_enet_queue veq; struct tasklet_struct task; struct sk_buff_head outstanding; } tx; bool sg; struct { bool enabled; char *pool; } pmtd; /* pre-mapped transmit descriptors */ struct { bool enabled; bool linkstate; bool txc; unsigned long evsize; struct vbus_enet_queue veq; struct tasklet_struct task; char *pool; } evq; struct { bool available; char *pool; struct vbus_enet_queue pageq; } l4ro; struct sk_buff *(*import)(struct vbus_enet_priv *priv, struct ioq_ring_desc *desc); }; static void vbus_enet_tx_reap(struct vbus_enet_priv *priv); static struct vbus_enet_priv * napi_to_priv(struct napi_struct *napi) { return container_of(napi, struct vbus_enet_priv, napi); } static int queue_init(struct vbus_enet_priv *priv, struct vbus_enet_queue *q, const char *name, int qid, size_t ringsize, void (*func)(struct ioq_notifier *)) { struct vbus_device_proxy *dev = priv->vdev; int ret; char _name[64]; if (name) snprintf(_name, sizeof(_name), "%s-%s", priv->dev->name, name); ret = vbus_driver_ioq_alloc(dev, name ? _name : NULL, qid, 0, ringsize, &q->queue); if (ret < 0) panic("ioq_alloc failed: %d\n", ret); if (func) { q->notifier.signal = func; q->queue->notifier = &q->notifier; } q->count = ringsize; return 0; } static int devcall(struct vbus_enet_priv *priv, u32 func, void *data, size_t len) { struct vbus_device_proxy *dev = priv->vdev; return dev->ops->call(dev, func, data, len, 0); } /* * --------------- * rx descriptors * --------------- */ static void rxdesc_alloc(struct vbus_enet_priv *priv, struct ioq_ring_desc *desc, size_t len) { struct net_device *dev = priv->dev; struct sk_buff *skb; len += ETH_HLEN; skb = netdev_alloc_skb(dev, len + NET_IP_ALIGN); BUG_ON(!skb); skb_reserve(skb, NET_IP_ALIGN); /* align IP on 16B boundary */ if (priv->l4ro.available) { /* * We will populate an SG descriptor initially with one * IOV filled with an MTU SKB. If the packet needs to be * larger than MTU, the host will grab pages out of the * page-queue and populate additional IOVs */ struct venet_sg *vsg = (struct venet_sg *)(unsigned long)desc->cookie; struct venet_iov *iov = &vsg->iov[0]; memset(vsg, 0, SG_DESC_SIZE); vsg->cookie = (u64)(unsigned long)skb; vsg->count = 1; iov->ptr = (u64)__pa(skb->data); iov->len = len; } else { desc->cookie = (u64)(unsigned long)skb; desc->ptr = cpu_to_le64(__pa(skb->data)); desc->len = cpu_to_le64(len); /* total length */ } desc->valid = 1; } static void rx_pageq_refill(struct vbus_enet_priv *priv, gfp_t gfp_mask) { struct ioq *ioq = priv->l4ro.pageq.queue; struct ioq_iterator iter; int ret, added = 0; if (ioq_full(ioq, ioq_idxtype_inuse)) /* nothing to do if the pageq is already fully populated */ return; ret = ioq_iter_init(ioq, &iter, ioq_idxtype_inuse, 0); BUG_ON(ret < 0); /* will never fail unless seriously broken */ ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0); BUG_ON(ret < 0); /* * Now populate each descriptor with an empty page */ while (!iter.desc->sown) { struct page *page = NULL; page = alloc_page(gfp_mask); if (!page) break; added = 1; iter.desc->cookie = (u64)(unsigned long)page; iter.desc->ptr = cpu_to_le64(__pa(page_address(page))); iter.desc->len = cpu_to_le64(PAGE_SIZE); ret = ioq_iter_push(&iter, 0); BUG_ON(ret < 0); } if (added) ioq_signal(ioq, 0); } static void rx_setup(struct vbus_enet_priv *priv) { struct ioq *ioq = priv->rxq.queue; struct ioq_iterator iter; int ret; int i = 0; /* * We want to iterate on the "valid" index. By default the iterator * will not "autoupdate" which means it will not hypercall the host * with our changes. This is good, because we are really just * initializing stuff here anyway. Note that you can always manually * signal the host with ioq_signal() if the autoupdate feature is not * used. */ ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0); BUG_ON(ret < 0); /* will never fail unless seriously broken */ /* * Seek to the tail of the valid index (which should be our first * item, since the queue is brand-new) */ ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0); BUG_ON(ret < 0); /* * Now populate each descriptor with an empty buffer and mark it valid */ while (!iter.desc->valid) { if (priv->l4ro.available) { size_t offset = (i * SG_DESC_SIZE); void *addr = &priv->l4ro.pool[offset]; iter.desc->ptr = cpu_to_le64(offset); iter.desc->cookie = (u64)(unsigned long)addr; iter.desc->len = cpu_to_le64(SG_DESC_SIZE); } rxdesc_alloc(priv, iter.desc, priv->dev->mtu); /* * This push operation will simultaneously advance the * valid-head index and increment our position in the queue * by one. */ ret = ioq_iter_push(&iter, 0); BUG_ON(ret < 0); i++; } if (priv->l4ro.available) rx_pageq_refill(priv, GFP_KERNEL); } static void rx_rxq_teardown(struct vbus_enet_priv *priv) { struct ioq *ioq = priv->rxq.queue; struct ioq_iterator iter; int ret; ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0); BUG_ON(ret < 0); ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); BUG_ON(ret < 0); /* * free each valid descriptor */ while (iter.desc->valid) { struct sk_buff *skb; if (priv->l4ro.available) { struct venet_sg *vsg; int i; vsg = (struct venet_sg *)(unsigned long)iter.desc->cookie; /* skip i=0, since that is the skb->data IOV */ for (i = 1; i < vsg->count; i++) { struct venet_iov *iov = &vsg->iov[i]; struct page *page = (struct page *)(unsigned long)iov->ptr; put_page(page); } skb = (struct sk_buff *)(unsigned long)vsg->cookie; } else skb = (struct sk_buff *)(unsigned long)iter.desc->cookie; iter.desc->valid = 0; wmb(); iter.desc->ptr = 0; iter.desc->cookie = 0; ret = ioq_iter_pop(&iter, 0); BUG_ON(ret < 0); dev_kfree_skb(skb); } } static void rx_l4ro_teardown(struct vbus_enet_priv *priv) { struct ioq *ioq = priv->l4ro.pageq.queue; struct ioq_iterator iter; int ret; ret = ioq_iter_init(ioq, &iter, ioq_idxtype_inuse, 0); BUG_ON(ret < 0); ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); BUG_ON(ret < 0); /* * free each valid descriptor */ while (iter.desc->sown) { struct page *page = (struct page *)(unsigned long)iter.desc->cookie; iter.desc->valid = 0; wmb(); iter.desc->ptr = 0; iter.desc->cookie = 0; ret = ioq_iter_pop(&iter, 0); BUG_ON(ret < 0); put_page(page); } ioq_put(ioq); kfree(priv->l4ro.pool); } static void rx_teardown(struct vbus_enet_priv *priv) { rx_rxq_teardown(priv); if (priv->l4ro.available) rx_l4ro_teardown(priv); } static int tx_setup(struct vbus_enet_priv *priv) { struct ioq *ioq = priv->tx.veq.queue; struct ioq_iterator iter; int i; int ret; if (!priv->sg) /* * There is nothing to do for a ring that is not using * scatter-gather */ return 0; /* pre-allocate our descriptor pool if pmtd is enabled */ if (priv->pmtd.enabled) { struct vbus_device_proxy *dev = priv->vdev; size_t poollen = SG_DESC_SIZE * priv->tx.veq.count; char *pool; int shmid; /* pmtdquery will return the shm-id to use for the pool */ ret = devcall(priv, VENET_FUNC_PMTDQUERY, NULL, 0); BUG_ON(ret < 0); shmid = ret; pool = kzalloc(poollen, GFP_KERNEL | GFP_DMA); if (!pool) return -ENOMEM; priv->pmtd.pool = pool; ret = dev->ops->shm(dev, NULL, shmid, 0, pool, poollen, NULL, NULL, 0); BUG_ON(ret < 0); } ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0); BUG_ON(ret < 0); ret = ioq_iter_seek(&iter, ioq_seek_set, 0, 0); BUG_ON(ret < 0); /* * Now populate each descriptor with an empty SG descriptor */ for (i = 0; i < priv->tx.veq.count; i++) { struct venet_sg *vsg; if (priv->pmtd.enabled) { size_t offset = (i * SG_DESC_SIZE); vsg = (struct venet_sg *)&priv->pmtd.pool[offset]; iter.desc->ptr = cpu_to_le64(offset); } else { vsg = kzalloc(SG_DESC_SIZE, GFP_KERNEL); if (!vsg) return -ENOMEM; iter.desc->ptr = cpu_to_le64(__pa(vsg)); } iter.desc->cookie = (u64)(unsigned long)vsg; iter.desc->len = cpu_to_le64(SG_DESC_SIZE); ret = ioq_iter_seek(&iter, ioq_seek_next, 0, 0); BUG_ON(ret < 0); } return 0; } static void tx_teardown(struct vbus_enet_priv *priv) { struct ioq *ioq = priv->tx.veq.queue; struct ioq_iterator iter; struct sk_buff *skb; int ret; /* forcefully free all outstanding transmissions */ while ((skb = __skb_dequeue(&priv->tx.outstanding))) dev_kfree_skb(skb); if (!priv->sg) /* * There is nothing else to do for a ring that is not using * scatter-gather */ return; if (priv->pmtd.enabled) { /* * PMTD mode means we only need to free the pool */ kfree(priv->pmtd.pool); return; } ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0); BUG_ON(ret < 0); /* seek to position 0 */ ret = ioq_iter_seek(&iter, ioq_seek_set, 0, 0); BUG_ON(ret < 0); /* * free each valid descriptor */ while (iter.desc->cookie) { struct venet_sg *vsg = (struct venet_sg *)(unsigned long)iter.desc->cookie; iter.desc->valid = 0; wmb(); iter.desc->ptr = 0; iter.desc->cookie = 0; ret = ioq_iter_seek(&iter, ioq_seek_next, 0, 0); BUG_ON(ret < 0); kfree(vsg); } } static void evq_teardown(struct vbus_enet_priv *priv) { if (!priv->evq.enabled) return; ioq_put(priv->evq.veq.queue); kfree(priv->evq.pool); } /* * Open and close */ static int vbus_enet_open(struct net_device *dev) { struct vbus_enet_priv *priv = netdev_priv(dev); int ret; ret = devcall(priv, VENET_FUNC_LINKUP, NULL, 0); BUG_ON(ret < 0); napi_enable(&priv->napi); return 0; } static int vbus_enet_stop(struct net_device *dev) { struct vbus_enet_priv *priv = netdev_priv(dev); int ret; napi_disable(&priv->napi); ret = devcall(priv, VENET_FUNC_LINKDOWN, NULL, 0); BUG_ON(ret < 0); return 0; } /* * Configuration changes (passed on by ifconfig) */ static int vbus_enet_config(struct net_device *dev, struct ifmap *map) { if (dev->flags & IFF_UP) /* can't act on a running interface */ return -EBUSY; /* Don't allow changing the I/O address */ if (map->base_addr != dev->base_addr) { dev_warn(&dev->dev, "Can't change I/O address\n"); return -EOPNOTSUPP; } /* ignore other fields */ return 0; } static void vbus_enet_schedule_rx(struct vbus_enet_priv *priv) { unsigned long flags; spin_lock_irqsave(&priv->lock, flags); if (napi_schedule_prep(&priv->napi)) { /* Disable further interrupts */ ioq_notify_disable(priv->rxq.queue, 0); __napi_schedule(&priv->napi); } spin_unlock_irqrestore(&priv->lock, flags); } static int vbus_enet_change_mtu(struct net_device *dev, int new_mtu) { struct vbus_enet_priv *priv = netdev_priv(dev); int ret; dev->mtu = new_mtu; /* * FLUSHRX will cause the device to flush any outstanding * RX buffers. They will appear to come in as 0 length * packets which we can simply discard and replace with new_mtu * buffers for the future. */ ret = devcall(priv, VENET_FUNC_FLUSHRX, NULL, 0); BUG_ON(ret < 0); vbus_enet_schedule_rx(priv); return 0; } static struct sk_buff * vbus_enet_l4ro_import(struct vbus_enet_priv *priv, struct ioq_ring_desc *desc) { struct venet_sg *vsg = (struct venet_sg *)(unsigned long)desc->cookie; struct sk_buff *skb = (struct sk_buff *)(unsigned long)vsg->cookie; struct skb_shared_info *sinfo = skb_shinfo(skb); int i; rx_pageq_refill(priv, GFP_ATOMIC); if (!vsg->len) /* * the device may send a zero-length packet when its * flushing references on the ring. We can just drop * these on the floor */ goto fail; /* advance only by the linear portion in IOV[0] */ skb_put(skb, vsg->iov[0].len); /* skip i=0, since that is the skb->data IOV */ for (i = 1; i < vsg->count; i++) { struct venet_iov *iov = &vsg->iov[i]; struct page *page = (struct page *)(unsigned long)iov->ptr; skb_frag_t *f = &sinfo->frags[i-1]; f->page = page; f->page_offset = 0; f->size = iov->len; PDEBUG(priv->dev, "SG: Importing %d byte page[%i]\n", f->size, i); skb->data_len += f->size; skb->len += f->size; skb->truesize += f->size; sinfo->nr_frags++; } if (vsg->flags & VENET_SG_FLAG_NEEDS_CSUM && !skb_partial_csum_set(skb, vsg->csum.start, vsg->csum.offset)) { priv->dev->stats.rx_frame_errors++; goto fail; } if (vsg->flags & VENET_SG_FLAG_GSO) { PDEBUG(priv->dev, "L4RO packet detected\n"); switch (vsg->gso.type) { case VENET_GSO_TYPE_TCPV4: sinfo->gso_type = SKB_GSO_TCPV4; break; case VENET_GSO_TYPE_TCPV6: sinfo->gso_type = SKB_GSO_TCPV6; break; case VENET_GSO_TYPE_UDP: sinfo->gso_type = SKB_GSO_UDP; break; default: PDEBUG(priv->dev, "Illegal L4RO type: %d\n", vsg->gso.type); priv->dev->stats.rx_frame_errors++; goto fail; } if (vsg->flags & VENET_SG_FLAG_ECN) sinfo->gso_type |= SKB_GSO_TCP_ECN; sinfo->gso_size = vsg->gso.size; if (sinfo->gso_size == 0) { PDEBUG(priv->dev, "Illegal L4RO size: %d\n", vsg->gso.size); priv->dev->stats.rx_frame_errors++; goto fail; } /* * Header must be checked, and gso_segs * computed. */ sinfo->gso_type |= SKB_GSO_DODGY; sinfo->gso_segs = 0; } return skb; fail: dev_kfree_skb(skb); return NULL; } static struct sk_buff * vbus_enet_flat_import(struct vbus_enet_priv *priv, struct ioq_ring_desc *desc) { struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->cookie; if (!desc->len) { /* * the device may send a zero-length packet when its * flushing references on the ring. We can just drop * these on the floor */ dev_kfree_skb(skb); return NULL; } skb_put(skb, le64_to_cpu(desc->len)); return skb; } /* * The poll implementation. */ static int vbus_enet_poll(struct napi_struct *napi, int budget) { struct vbus_enet_priv *priv = napi_to_priv(napi); int npackets = 0; struct ioq_iterator iter; int ret; PDEBUG(priv->dev, "polling...\n"); /* We want to iterate on the head of the in-use index */ ret = ioq_iter_init(priv->rxq.queue, &iter, ioq_idxtype_inuse, IOQ_ITER_AUTOUPDATE); BUG_ON(ret < 0); ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); BUG_ON(ret < 0); /* * We stop if we have met the quota or there are no more packets. * The EOM is indicated by finding a packet that is still owned by * the south side */ while ((npackets < budget) && (!iter.desc->sown)) { struct sk_buff *skb; skb = priv->import(priv, iter.desc); if (skb) { /* Maintain stats */ npackets++; priv->dev->stats.rx_packets++; priv->dev->stats.rx_bytes += skb->len; /* Pass the buffer up to the stack */ skb->dev = priv->dev; skb->protocol = eth_type_trans(skb, priv->dev); netif_receive_skb(skb); mb(); } /* Grab a new buffer to put in the ring */ rxdesc_alloc(priv, iter.desc, priv->dev->mtu); /* Advance the in-use tail */ ret = ioq_iter_pop(&iter, 0); BUG_ON(ret < 0); } PDEBUG(priv->dev, "%d packets received\n", npackets); /* * If we processed all packets, we're done; tell the kernel and * reenable ints */ if (ioq_empty(priv->rxq.queue, ioq_idxtype_inuse)) { napi_complete(napi); ioq_notify_enable(priv->rxq.queue, 0); ret = 0; } else /* We couldn't process everything. */ ret = 1; return ret; } /* * Transmit a packet (called by the kernel) */ static int vbus_enet_tx_start(struct sk_buff *skb, struct net_device *dev) { struct vbus_enet_priv *priv = netdev_priv(dev); struct ioq_iterator iter; int ret; unsigned long flags; PDEBUG(priv->dev, "sending %d bytes\n", skb->len); spin_lock_irqsave(&priv->lock, flags); if (ioq_full(priv->tx.veq.queue, ioq_idxtype_valid)) { /* * We must flow-control the kernel by disabling the * queue */ spin_unlock_irqrestore(&priv->lock, flags); netif_stop_queue(dev); dev_err(&priv->dev->dev, "tx on full queue bug\n"); return 1; } /* * We want to iterate on the tail of both the "inuse" and "valid" index * so we specify the "both" index */ ret = ioq_iter_init(priv->tx.veq.queue, &iter, ioq_idxtype_both, IOQ_ITER_AUTOUPDATE); BUG_ON(ret < 0); ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0); BUG_ON(ret < 0); BUG_ON(iter.desc->sown); if (priv->sg) { struct venet_sg *vsg = (struct venet_sg *)(unsigned long)iter.desc->cookie; struct scatterlist sgl[MAX_SKB_FRAGS+1]; struct scatterlist *sg; int count, maxcount = ARRAY_SIZE(sgl); sg_init_table(sgl, maxcount); memset(vsg, 0, sizeof(*vsg)); vsg->cookie = (u64)(unsigned long)skb; vsg->len = skb->len; if (skb->ip_summed == CHECKSUM_PARTIAL) { vsg->flags |= VENET_SG_FLAG_NEEDS_CSUM; vsg->csum.start = skb->csum_start - skb_headroom(skb); vsg->csum.offset = skb->csum_offset; } if (skb_is_gso(skb)) { struct skb_shared_info *sinfo = skb_shinfo(skb); vsg->flags |= VENET_SG_FLAG_GSO; vsg->gso.hdrlen = skb_headlen(skb); vsg->gso.size = sinfo->gso_size; if (sinfo->gso_type & SKB_GSO_TCPV4) vsg->gso.type = VENET_GSO_TYPE_TCPV4; else if (sinfo->gso_type & SKB_GSO_TCPV6) vsg->gso.type = VENET_GSO_TYPE_TCPV6; else if (sinfo->gso_type & SKB_GSO_UDP) vsg->gso.type = VENET_GSO_TYPE_UDP; else panic("Virtual-Ethernet: unknown GSO type " \ "0x%x\n", sinfo->gso_type); if (sinfo->gso_type & SKB_GSO_TCP_ECN) vsg->flags |= VENET_SG_FLAG_ECN; } count = skb_to_sgvec(skb, sgl, 0, skb->len); BUG_ON(count > maxcount); for (sg = &sgl[0]; sg; sg = sg_next(sg)) { struct venet_iov *iov = &vsg->iov[vsg->count++]; iov->len = sg->length; iov->ptr = (u64)sg_phys(sg); } iter.desc->len = cpu_to_le64(VSG_DESC_SIZE(vsg->count)); } else { /* * non scatter-gather mode: simply put the skb right onto the * ring. */ iter.desc->cookie = (u64)(unsigned long)skb; iter.desc->len = cpu_to_le64(skb->len); iter.desc->ptr = cpu_to_le64(__pa(skb->data)); } iter.desc->valid = 1; priv->dev->stats.tx_packets++; priv->dev->stats.tx_bytes += skb->len; skb_queue_tail(&priv->tx.outstanding, skb); /* * This advances both indexes together implicitly, and then * signals the south side to consume the packet */ ret = ioq_iter_push(&iter, 0); BUG_ON(ret < 0); dev->trans_start = jiffies; /* save the timestamp */ if (ioq_full(priv->tx.veq.queue, ioq_idxtype_valid)) { /* * If the queue is congested, we must flow-control the kernel */ PDEBUG(priv->dev, "backpressure tx queue\n"); netif_stop_queue(dev); } spin_unlock_irqrestore(&priv->lock, flags); return 0; } /* assumes priv->lock held */ static void vbus_enet_skb_complete(struct vbus_enet_priv *priv, struct sk_buff *skb) { PDEBUG(priv->dev, "completed sending %d bytes\n", skb->len); skb_unlink(skb, &priv->tx.outstanding); dev_kfree_skb(skb); } /* * reclaim any outstanding completed tx packets * * assumes priv->lock held */ static struct sk_buff * vbus_enet_tx_reap_one(struct vbus_enet_priv *priv) { struct sk_buff *skb = NULL; struct ioq_iterator iter; unsigned long flags; int ret; spin_lock_irqsave(&priv->lock, flags); /* * We want to iterate on the head of the valid index, but we * do not want the iter_pop (below) to flip the ownership, so * we set the NOFLIPOWNER option */ ret = ioq_iter_init(priv->tx.veq.queue, &iter, ioq_idxtype_valid, IOQ_ITER_NOFLIPOWNER); BUG_ON(ret < 0); ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); BUG_ON(ret < 0); if (iter.desc->valid && !iter.desc->sown) { if (priv->sg) { struct venet_sg *vsg; vsg = (struct venet_sg *)(unsigned long)iter.desc->cookie; skb = (struct sk_buff *)(unsigned long)vsg->cookie; } else skb = (struct sk_buff *)(unsigned long)iter.desc->cookie; /* Reset the descriptor */ iter.desc->valid = 0; /* Advance the valid-index head */ ret = ioq_iter_pop(&iter, 0); BUG_ON(ret < 0); } /* * If we were previously stopped due to flow control, restart the * processing */ if (netif_queue_stopped(priv->dev) && !ioq_full(priv->tx.veq.queue, ioq_idxtype_valid)) { PDEBUG(priv->dev, "re-enabling tx queue\n"); netif_wake_queue(priv->dev); } spin_unlock_irqrestore(&priv->lock, flags); return skb; } static void vbus_enet_tx_reap(struct vbus_enet_priv *priv) { struct sk_buff *skb; while ((skb = vbus_enet_tx_reap_one(priv))) { if (!priv->evq.txc) /* * We are responsible for freeing the packet upon * reap if TXC is not enabled */ vbus_enet_skb_complete(priv, skb); } } static void vbus_enet_timeout(struct net_device *dev) { struct vbus_enet_priv *priv = netdev_priv(dev); dev_dbg(&dev->dev, "Transmit timeout\n"); vbus_enet_tx_reap(priv); } static void rx_isr(struct ioq_notifier *notifier) { struct vbus_enet_priv *priv; struct net_device *dev; priv = container_of(notifier, struct vbus_enet_priv, rxq.notifier); dev = priv->dev; if (!ioq_empty(priv->rxq.queue, ioq_idxtype_inuse)) vbus_enet_schedule_rx(priv); } static void deferred_tx_isr(unsigned long data) { struct vbus_enet_priv *priv = (struct vbus_enet_priv *)data; PDEBUG(priv->dev, "deferred_tx_isr\n"); vbus_enet_tx_reap(priv); ioq_notify_enable(priv->tx.veq.queue, 0); } static void tx_isr(struct ioq_notifier *notifier) { struct vbus_enet_priv *priv; priv = container_of(notifier, struct vbus_enet_priv, tx.veq.notifier); PDEBUG(priv->dev, "tx_isr\n"); ioq_notify_disable(priv->tx.veq.queue, 0); tasklet_schedule(&priv->tx.task); } static void evq_linkstate_event(struct vbus_enet_priv *priv, struct venet_event_header *header) { struct venet_event_linkstate *event = (struct venet_event_linkstate *)header; switch (event->state) { case 0: netif_carrier_off(priv->dev); break; case 1: netif_carrier_on(priv->dev); break; default: break; } } static void evq_txc_event(struct vbus_enet_priv *priv, struct venet_event_header *header) { struct venet_event_txc *event = (struct venet_event_txc *)header; vbus_enet_tx_reap(priv); vbus_enet_skb_complete(priv, (struct sk_buff *)(unsigned long)event->cookie); } static void deferred_evq_isr(unsigned long data) { struct vbus_enet_priv *priv = (struct vbus_enet_priv *)data; int nevents = 0; struct ioq_iterator iter; int ret; PDEBUG(priv->dev, "evq: polling...\n"); /* We want to iterate on the head of the in-use index */ ret = ioq_iter_init(priv->evq.veq.queue, &iter, ioq_idxtype_inuse, IOQ_ITER_AUTOUPDATE); BUG_ON(ret < 0); ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); BUG_ON(ret < 0); /* * The EOM is indicated by finding a packet that is still owned by * the south side */ while (!iter.desc->sown) { struct venet_event_header *header; header = (struct venet_event_header *)(unsigned long)iter.desc->cookie; switch (header->id) { case VENET_EVENT_LINKSTATE: evq_linkstate_event(priv, header); break; case VENET_EVENT_TXC: evq_txc_event(priv, header); break; default: panic("venet: unexpected event id:%d of size %d\n", header->id, header->size); break; } memset((void *)(unsigned long)iter.desc->cookie, 0, priv->evq.evsize); /* Advance the in-use tail */ ret = ioq_iter_pop(&iter, 0); BUG_ON(ret < 0); nevents++; } PDEBUG(priv->dev, "%d events received\n", nevents); ioq_notify_enable(priv->evq.veq.queue, 0); } static void evq_isr(struct ioq_notifier *notifier) { struct vbus_enet_priv *priv; priv = container_of(notifier, struct vbus_enet_priv, evq.veq.notifier); PDEBUG(priv->dev, "evq_isr\n"); ioq_notify_disable(priv->evq.veq.queue, 0); tasklet_schedule(&priv->evq.task); } static int vbus_enet_sg_negcap(struct vbus_enet_priv *priv) { struct net_device *dev = priv->dev; struct venet_capabilities caps; int ret; memset(&caps, 0, sizeof(caps)); if (sg_enabled) { caps.gid = VENET_CAP_GROUP_SG; caps.bits |= (VENET_CAP_SG|VENET_CAP_TSO4|VENET_CAP_TSO6 |VENET_CAP_ECN|VENET_CAP_PMTD); /* note: exclude UFO for now due to stack bug */ } ret = devcall(priv, VENET_FUNC_NEGCAP, &caps, sizeof(caps)); if (ret < 0) return ret; if (caps.bits & VENET_CAP_SG) { priv->sg = true; dev->features |= NETIF_F_SG|NETIF_F_HW_CSUM|NETIF_F_FRAGLIST; if (caps.bits & VENET_CAP_TSO4) dev->features |= NETIF_F_TSO; if (caps.bits & VENET_CAP_UFO) dev->features |= NETIF_F_UFO; if (caps.bits & VENET_CAP_TSO6) dev->features |= NETIF_F_TSO6; if (caps.bits & VENET_CAP_ECN) dev->features |= NETIF_F_TSO_ECN; if (caps.bits & VENET_CAP_PMTD) priv->pmtd.enabled = true; } return 0; } static int vbus_enet_evq_negcap(struct vbus_enet_priv *priv, unsigned long count) { struct venet_capabilities caps; int ret; memset(&caps, 0, sizeof(caps)); caps.gid = VENET_CAP_GROUP_EVENTQ; caps.bits |= VENET_CAP_EVQ_LINKSTATE; caps.bits |= VENET_CAP_EVQ_TXC; ret = devcall(priv, VENET_FUNC_NEGCAP, &caps, sizeof(caps)); if (ret < 0) return ret; if (caps.bits) { struct vbus_device_proxy *dev = priv->vdev; struct venet_eventq_query query; size_t poollen; struct ioq_iterator iter; char *pool; int i; priv->evq.enabled = true; if (caps.bits & VENET_CAP_EVQ_LINKSTATE) { /* * We will assume there is no carrier until we get * an event telling us otherwise */ netif_carrier_off(priv->dev); priv->evq.linkstate = true; } if (caps.bits & VENET_CAP_EVQ_TXC) priv->evq.txc = true; memset(&query, 0, sizeof(query)); ret = devcall(priv, VENET_FUNC_EVQQUERY, &query, sizeof(query)); if (ret < 0) return ret; priv->evq.evsize = query.evsize; poollen = query.evsize * count; pool = kzalloc(poollen, GFP_KERNEL | GFP_DMA); if (!pool) return -ENOMEM; priv->evq.pool = pool; ret = dev->ops->shm(dev, NULL, query.dpid, 0, pool, poollen, NULL, NULL, 0); if (ret < 0) return ret; queue_init(priv, &priv->evq.veq, "evq", query.qid, count, evq_isr); ret = ioq_iter_init(priv->evq.veq.queue, &iter, ioq_idxtype_valid, 0); BUG_ON(ret < 0); ret = ioq_iter_seek(&iter, ioq_seek_set, 0, 0); BUG_ON(ret < 0); /* Now populate each descriptor with an empty event */ for (i = 0; i < count; i++) { size_t offset = (i * query.evsize); void *addr = &priv->evq.pool[offset]; iter.desc->ptr = cpu_to_le64(offset); iter.desc->cookie = (u64)(unsigned long)addr; iter.desc->len = cpu_to_le64(query.evsize); ret = ioq_iter_push(&iter, 0); BUG_ON(ret < 0); } /* Finally, enable interrupts */ tasklet_init(&priv->evq.task, deferred_evq_isr, (unsigned long)priv); ioq_notify_enable(priv->evq.veq.queue, 0); } return 0; } static int vbus_enet_l4ro_negcap(struct vbus_enet_priv *priv, unsigned long count) { struct venet_capabilities caps; int ret; memset(&caps, 0, sizeof(caps)); caps.gid = VENET_CAP_GROUP_L4RO; caps.bits |= (VENET_CAP_SG|VENET_CAP_TSO4|VENET_CAP_TSO6 |VENET_CAP_ECN); ret = devcall(priv, VENET_FUNC_NEGCAP, &caps, sizeof(caps)); if (ret < 0) { printk(KERN_ERR "Error negotiating L4RO: %d\n", ret); return ret; } if (caps.bits & VENET_CAP_SG) { struct vbus_device_proxy *dev = priv->vdev; size_t poollen = SG_DESC_SIZE * count; struct venet_l4ro_query query; char *pool; memset(&query, 0, sizeof(query)); ret = devcall(priv, VENET_FUNC_L4ROQUERY, &query, sizeof(query)); if (ret < 0) { printk(KERN_ERR "Error querying L4RO: %d\n", ret); return ret; } pool = kzalloc(poollen, GFP_KERNEL | GFP_DMA); if (!pool) return -ENOMEM; /* * pre-mapped descriptor pool */ ret = dev->ops->shm(dev, NULL, query.dpid, 0, pool, poollen, NULL, NULL, 0); if (ret < 0) { printk(KERN_ERR "Error registering L4RO pool: %d\n", ret); kfree(pool); return ret; } /* * page-queue: contains a ring of arbitrary pages for * consumption by the host for when the SG::IOV count exceeds * one MTU frame. All we need to do is keep it populated * with free pages. */ queue_init(priv, &priv->l4ro.pageq, "pageq", query.pqid, count, NULL); priv->l4ro.pool = pool; priv->l4ro.available = true; } return 0; } static int vbus_enet_negcap(struct vbus_enet_priv *priv) { int ret; ret = vbus_enet_sg_negcap(priv); if (ret < 0) return ret; ret = vbus_enet_evq_negcap(priv, tx_ringlen); if (ret < 0) return ret; ret = vbus_enet_l4ro_negcap(priv, rx_ringlen); if (ret < 0) return ret; return 0; } static int vbus_enet_set_tx_csum(struct net_device *dev, u32 data) { struct vbus_enet_priv *priv = netdev_priv(dev); if (data && !priv->sg) return -ENOSYS; return ethtool_op_set_tx_hw_csum(dev, data); } static struct ethtool_ops vbus_enet_ethtool_ops = { .set_tx_csum = vbus_enet_set_tx_csum, .set_sg = ethtool_op_set_sg, .set_tso = ethtool_op_set_tso, .get_link = ethtool_op_get_link, }; static const struct net_device_ops vbus_enet_netdev_ops = { .ndo_open = vbus_enet_open, .ndo_stop = vbus_enet_stop, .ndo_set_config = vbus_enet_config, .ndo_start_xmit = vbus_enet_tx_start, .ndo_change_mtu = vbus_enet_change_mtu, .ndo_tx_timeout = vbus_enet_timeout, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; /* * This is called whenever a new vbus_device_proxy is added to the vbus * with the matching VENET_ID */ static int vbus_enet_probe(struct vbus_device_proxy *vdev) { struct net_device *dev; struct vbus_enet_priv *priv; int ret; printk(KERN_INFO "VENET: Found new device at %lld\n", vdev->id); ret = vdev->ops->open(vdev, VENET_VERSION, 0); if (ret < 0) return ret; dev = alloc_etherdev(sizeof(struct vbus_enet_priv)); if (!dev) return -ENOMEM; /* * establish our device-name early so we can incorporate it into * the signal-path names, etc */ rtnl_lock(); ret = dev_alloc_name(dev, dev->name); if (ret < 0) goto out_free; priv = netdev_priv(dev); spin_lock_init(&priv->lock); priv->dev = dev; priv->vdev = vdev; ret = vbus_enet_negcap(priv); if (ret < 0) { printk(KERN_INFO "VENET: Error negotiating capabilities for " \ "%lld\n", priv->vdev->id); goto out_free; } if (priv->l4ro.available) priv->import = &vbus_enet_l4ro_import; else priv->import = &vbus_enet_flat_import; skb_queue_head_init(&priv->tx.outstanding); queue_init(priv, &priv->rxq, "rx", VENET_QUEUE_RX, rx_ringlen, rx_isr); queue_init(priv, &priv->tx.veq, "tx", VENET_QUEUE_TX, tx_ringlen, tx_isr); rx_setup(priv); tx_setup(priv); ioq_notify_enable(priv->rxq.queue, 0); /* enable rx interrupts */ if (!priv->evq.txc) { /* * If the TXC feature is present, we will recieve our * tx-complete notification via the event-channel. Therefore, * we only enable txq interrupts if the TXC feature is not * present. */ tasklet_init(&priv->tx.task, deferred_tx_isr, (unsigned long)priv); ioq_notify_enable(priv->tx.veq.queue, 0); } dev->netdev_ops = &vbus_enet_netdev_ops; dev->watchdog_timeo = 5 * HZ; SET_ETHTOOL_OPS(dev, &vbus_enet_ethtool_ops); SET_NETDEV_DEV(dev, &vdev->dev); netif_napi_add(dev, &priv->napi, vbus_enet_poll, 128); ret = devcall(priv, VENET_FUNC_MACQUERY, priv->dev->dev_addr, ETH_ALEN); if (ret < 0) { printk(KERN_INFO "VENET: Error obtaining MAC address for " \ "%lld\n", priv->vdev->id); goto out_free; } dev->features |= NETIF_F_HIGHDMA; ret = register_netdevice(dev); if (ret < 0) { printk(KERN_INFO "VENET: error %i registering device \"%s\"\n", ret, dev->name); goto out_free; } rtnl_unlock(); vdev->priv = priv; return 0; out_free: rtnl_unlock(); free_netdev(dev); return ret; } static int vbus_enet_remove(struct vbus_device_proxy *vdev) { struct vbus_enet_priv *priv = (struct vbus_enet_priv *)vdev->priv; struct vbus_device_proxy *dev = priv->vdev; unregister_netdev(priv->dev); napi_disable(&priv->napi); rx_teardown(priv); ioq_put(priv->rxq.queue); tx_teardown(priv); ioq_put(priv->tx.veq.queue); if (priv->evq.enabled) evq_teardown(priv); dev->ops->close(dev, 0); free_netdev(priv->dev); return 0; } /* * Finally, the module stuff */ static struct vbus_driver_ops vbus_enet_driver_ops = { .probe = vbus_enet_probe, .remove = vbus_enet_remove, }; static struct vbus_driver vbus_enet_driver = { .type = VENET_TYPE, .owner = THIS_MODULE, .ops = &vbus_enet_driver_ops, }; static __init int vbus_enet_init_module(void) { printk(KERN_INFO "Virtual Ethernet: Copyright (C) 2009 Novell, Gregory Haskins\n"); printk(KERN_DEBUG "VENET: Using %d/%d queue depth\n", rx_ringlen, tx_ringlen); return vbus_driver_register(&vbus_enet_driver); } static __exit void vbus_enet_cleanup(void) { vbus_driver_unregister(&vbus_enet_driver); } module_init(vbus_enet_init_module); module_exit(vbus_enet_cleanup); VBUS_DRIVER_AUTOPROBE(VENET_TYPE);