From 9953ca6cb757fb317bb7cdd2fcbf9b88312e241b Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Tue, 27 May 2008 12:06:26 +0100 Subject: virtio: fix virtio_net xmit of freed skb bug On Mon, 2008-05-26 at 17:42 +1000, Rusty Russell wrote: > If we fail to transmit a packet, we assume the queue is full and put > the skb into last_xmit_skb. However, if more space frees up before we > xmit it, we loop, and the result can be transmitting the same skb twice. > > Fix is simple: set skb to NULL if we've used it in some way, and check > before sending. ... > diff -r 564237b31993 drivers/net/virtio_net.c > --- a/drivers/net/virtio_net.c Mon May 19 12:22:00 2008 +1000 > +++ b/drivers/net/virtio_net.c Mon May 19 12:24:58 2008 +1000 > @@ -287,21 +287,25 @@ again: > free_old_xmit_skbs(vi); > > /* If we has a buffer left over from last time, send it now. */ > - if (vi->last_xmit_skb) { > + if (unlikely(vi->last_xmit_skb)) { > if (xmit_skb(vi, vi->last_xmit_skb) != 0) { > /* Drop this skb: we only queue one. */ > vi->dev->stats.tx_dropped++; > kfree_skb(skb); > + skb = NULL; > goto stop_queue; > } > vi->last_xmit_skb = NULL; With this, may drop an skb and then later in the function discover that we could have sent it after all. Poor wee skb :) How about the incremental patch below? Cheers, Mark. Subject: [PATCH] virtio_net: Delay dropping tx skbs Currently we drop the skb in start_xmit() if we have a queued buffer and fail to transmit it. However, if we delay dropping it until we've stopped the queue and enabled the tx notification callback, then there is a chance space might become available for it. Signed-off-by: Mark McLoughlin Signed-off-by: Rusty Russell --- drivers/net/virtio_net.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c28d7cb2035b..06d5c43bb207 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -335,16 +335,11 @@ again: free_old_xmit_skbs(vi); /* If we has a buffer left over from last time, send it now. */ - if (unlikely(vi->last_xmit_skb)) { - if (xmit_skb(vi, vi->last_xmit_skb) != 0) { - /* Drop this skb: we only queue one. */ - vi->dev->stats.tx_dropped++; - kfree_skb(skb); - skb = NULL; - goto stop_queue; - } - vi->last_xmit_skb = NULL; - } + if (unlikely(vi->last_xmit_skb) && + xmit_skb(vi, vi->last_xmit_skb) != 0) + goto stop_queue; + + vi->last_xmit_skb = NULL; /* Put new one in send queue and do transmit */ if (likely(skb)) { @@ -370,6 +365,11 @@ stop_queue: netif_start_queue(dev); goto again; } + if (skb) { + /* Drop this skb: we only queue one. */ + vi->dev->stats.tx_dropped++; + kfree_skb(skb); + } goto done; } -- cgit v1.2.3 From a9ea3fc6f2654a7407864fec983d1671d775b5ee Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2008 11:21:42 +0800 Subject: virtio net: Add ethtool ops for SG/GSO This patch adds some basic ethtool operations to virtio_net so I could test SG without GSO (which was really useful because TSO turned out to be buggy :) Signed-off-by: Rusty Russell (remove MTU setting) --- drivers/net/virtio_net.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 06d5c43bb207..ce37a7e9541c 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -19,6 +19,7 @@ //#define DEBUG #include #include +#include #include #include #include @@ -408,6 +409,22 @@ static int virtnet_close(struct net_device *dev) return 0; } +static int virtnet_set_tx_csum(struct net_device *dev, u32 data) +{ + struct virtnet_info *vi = netdev_priv(dev); + struct virtio_device *vdev = vi->vdev; + + if (data && !virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) + return -ENOSYS; + + return ethtool_op_set_tx_hw_csum(dev, data); +} + +static struct ethtool_ops virtnet_ethtool_ops = { + .set_tx_csum = virtnet_set_tx_csum, + .set_sg = ethtool_op_set_sg, +}; + static int virtnet_probe(struct virtio_device *vdev) { int err; @@ -427,6 +444,7 @@ static int virtnet_probe(struct virtio_device *vdev) #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = virtnet_netpoll; #endif + SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops); SET_NETDEV_DEV(dev, &vdev->dev); /* Do we support "hardware" checksums? */ -- cgit v1.2.3 From 97402b96f87c6e32f75f1bffdd91a5ee144b679d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2008 11:24:27 +0800 Subject: virtio net: Allow receiving SG packets Finally this patch lets virtio_net receive GSO packets in addition to sending them. This can definitely be optimised for the non-GSO case. For comparison the Xen approach stores one page in each skb and uses subsequent skb's pages to construct an SG skb instead of preallocating the maximum amount of pages per skb. Signed-off-by: Rusty Russell (added feature bits) --- drivers/net/virtio_net.c | 44 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 5 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ce37a7e9541c..0886b8a2d92d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -55,6 +55,9 @@ struct virtnet_info struct tasklet_struct tasklet; bool free_in_tasklet; + /* I like... big packets and I cannot lie! */ + bool big_packets; + /* Receive & send queues. */ struct sk_buff_head recv; struct sk_buff_head send; @@ -89,6 +92,7 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, unsigned len) { struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); + int err; if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); @@ -96,10 +100,14 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, goto drop; } len -= sizeof(struct virtio_net_hdr); - BUG_ON(len > MAX_PACKET_LEN); - - skb_trim(skb, len); + err = pskb_trim(skb, len); + if (err) { + pr_debug("%s: pskb_trim failed %i %d\n", dev->name, len, err); + dev->stats.rx_dropped++; + goto drop; + } + skb->truesize += skb->data_len; dev->stats.rx_bytes += skb->len; dev->stats.rx_packets++; @@ -161,7 +169,7 @@ static void try_fill_recv(struct virtnet_info *vi) { struct sk_buff *skb; struct scatterlist sg[2+MAX_SKB_FRAGS]; - int num, err; + int num, err, i; sg_init_table(sg, 2+MAX_SKB_FRAGS); for (;;) { @@ -171,6 +179,24 @@ static void try_fill_recv(struct virtnet_info *vi) skb_put(skb, MAX_PACKET_LEN); vnet_hdr_to_sg(sg, skb); + + if (vi->big_packets) { + for (i = 0; i < MAX_SKB_FRAGS; i++) { + skb_frag_t *f = &skb_shinfo(skb)->frags[i]; + f->page = alloc_page(GFP_ATOMIC); + if (!f->page) + break; + + f->page_offset = 0; + f->size = PAGE_SIZE; + + skb->data_len += PAGE_SIZE; + skb->len += PAGE_SIZE; + + skb_shinfo(skb)->nr_frags++; + } + } + num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; skb_queue_head(&vi->recv, skb); @@ -485,6 +511,12 @@ static int virtnet_probe(struct virtio_device *vdev) * the timer. */ vi->free_in_tasklet = virtio_has_feature(vdev,VIRTIO_F_NOTIFY_ON_EMPTY); + /* If we can receive ANY GSO packets, we must allocate large ones. */ + if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) + || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) + || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN)) + vi->big_packets = true; + /* We expect two virtqueues, receive then send. */ vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done); if (IS_ERR(vi->rvq)) { @@ -571,7 +603,9 @@ static unsigned int features[] = { VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC, VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, - VIRTIO_NET_F_HOST_ECN, VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, + VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */ + VIRTIO_F_NOTIFY_ON_EMPTY, }; static struct virtio_driver virtio_net = { -- cgit v1.2.3 From fb6813f480806d62361719e84777c8e00d3e86a8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 25 Jul 2008 12:06:01 -0500 Subject: virtio: Recycle unused recv buffer pages for large skbs in net driver If we hack the virtio_net driver to always allocate full-sized (64k+) skbuffs, the driver slows down (lguest numbers): Time to receive 1GB (small buffers): 10.85 seconds Time to receive 1GB (64k+ buffers): 24.75 seconds Of course, large buffers use up more space in the ring, so we increase that from 128 to 2048: Time to receive 1GB (64k+ buffers, 2k ring): 16.61 seconds If we recycle pages rather than using alloc_page/free_page: Time to receive 1GB (64k+ buffers, 2k ring, recycle pages): 10.81 seconds This demonstrates that with efficient allocation, we don't need to have a separate "small buffer" queue. Signed-off-by: Rusty Russell --- drivers/net/virtio_net.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0886b8a2d92d..0196a0df9021 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -61,6 +61,9 @@ struct virtnet_info /* Receive & send queues. */ struct sk_buff_head recv; struct sk_buff_head send; + + /* Chain pages by the private ptr. */ + struct page *pages; }; static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb) @@ -73,6 +76,23 @@ static inline void vnet_hdr_to_sg(struct scatterlist *sg, struct sk_buff *skb) sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr)); } +static void give_a_page(struct virtnet_info *vi, struct page *page) +{ + page->private = (unsigned long)vi->pages; + vi->pages = page; +} + +static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask) +{ + struct page *p = vi->pages; + + if (p) + vi->pages = (struct page *)p->private; + else + p = alloc_page(gfp_mask); + return p; +} + static void skb_xmit_done(struct virtqueue *svq) { struct virtnet_info *vi = svq->vdev->priv; @@ -101,6 +121,15 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, } len -= sizeof(struct virtio_net_hdr); + if (len <= MAX_PACKET_LEN) { + unsigned int i; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) + give_a_page(dev->priv, skb_shinfo(skb)->frags[i].page); + skb->data_len = 0; + skb_shinfo(skb)->nr_frags = 0; + } + err = pskb_trim(skb, len); if (err) { pr_debug("%s: pskb_trim failed %i %d\n", dev->name, len, err); @@ -183,7 +212,7 @@ static void try_fill_recv(struct virtnet_info *vi) if (vi->big_packets) { for (i = 0; i < MAX_SKB_FRAGS; i++) { skb_frag_t *f = &skb_shinfo(skb)->frags[i]; - f->page = alloc_page(GFP_ATOMIC); + f->page = get_a_page(vi, GFP_ATOMIC); if (!f->page) break; @@ -506,6 +535,7 @@ static int virtnet_probe(struct virtio_device *vdev) vi->dev = dev; vi->vdev = vdev; vdev->priv = vi; + vi->pages = NULL; /* If they give us a callback when all buffers are done, we don't need * the timer. */ @@ -591,6 +621,10 @@ static void virtnet_remove(struct virtio_device *vdev) vdev->config->del_vq(vi->svq); vdev->config->del_vq(vi->rvq); unregister_netdev(vi->dev); + + while (vi->pages) + __free_pages(get_a_page(vi, GFP_KERNEL), 0); + free_netdev(vi->dev); } -- cgit v1.2.3