From 7b21e34fd1c272e3a8c3846168f2f6287a4cd72b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 12 Jan 2012 15:44:42 +1030 Subject: virtio: harsher barriers for rpmsg. We were cheating with our barriers; using the smp ones rather than the real device ones. That was fine, until rpmsg came along, which is used to talk to a real device (a non-SMP CPU). Unfortunately, just putting back the real barriers (reverting d57ed95d) causes a performance regression on virtio-pci. In particular, Amos reports netbench's TCP_RR over virtio_net CPU utilization increased up to 35% while throughput went down by up to 14%. By comparison, this branch is in the noise. Reference: https://lkml.org/lkml/2011/12/11/22 Signed-off-by: Rusty Russell --- drivers/virtio/virtio_ring.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) (limited to 'drivers/virtio/virtio_ring.c') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index c7a2c208f6ea..50da92046092 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -28,17 +28,20 @@ #ifdef CONFIG_SMP /* Where possible, use SMP barriers which are more lightweight than mandatory * barriers, because mandatory barriers control MMIO effects on accesses - * through relaxed memory I/O windows (which virtio does not use). */ -#define virtio_mb() smp_mb() -#define virtio_rmb() smp_rmb() -#define virtio_wmb() smp_wmb() + * through relaxed memory I/O windows (which virtio-pci does not use). */ +#define virtio_mb(vq) \ + do { if ((vq)->weak_barriers) smp_mb(); else mb(); } while(0) +#define virtio_rmb(vq) \ + do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0) +#define virtio_wmb(vq) \ + do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0) #else /* We must force memory ordering even if guest is UP since host could be * running on another CPU, but SMP barriers are defined to barrier() in that * configuration. So fall back to mandatory barriers instead. */ -#define virtio_mb() mb() -#define virtio_rmb() rmb() -#define virtio_wmb() wmb() +#define virtio_mb(vq) mb() +#define virtio_rmb(vq) rmb() +#define virtio_wmb(vq) wmb() #endif #ifdef DEBUG @@ -77,6 +80,9 @@ struct vring_virtqueue /* Actual memory layout for this queue */ struct vring vring; + /* Can we use weak barriers? */ + bool weak_barriers; + /* Other side has made a mess, don't try any more. */ bool broken; @@ -245,14 +251,14 @@ void virtqueue_kick(struct virtqueue *_vq) START_USE(vq); /* Descriptors and available array need to be set before we expose the * new available array entries. */ - virtio_wmb(); + virtio_wmb(vq); old = vq->vring.avail->idx; new = vq->vring.avail->idx = old + vq->num_added; vq->num_added = 0; /* Need to update avail index before checking if we should notify */ - virtio_mb(); + virtio_mb(vq); if (vq->event ? vring_need_event(vring_avail_event(&vq->vring), new, old) : @@ -314,7 +320,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) } /* Only get used array entries after they have been exposed by host. */ - virtio_rmb(); + virtio_rmb(vq); i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id; *len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len; @@ -337,7 +343,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) * the read in the next get_buf call. */ if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { vring_used_event(&vq->vring) = vq->last_used_idx; - virtio_mb(); + virtio_mb(vq); } END_USE(vq); @@ -366,7 +372,7 @@ bool virtqueue_enable_cb(struct virtqueue *_vq) * entry. Always do both to keep code simple. */ vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; vring_used_event(&vq->vring) = vq->last_used_idx; - virtio_mb(); + virtio_mb(vq); if (unlikely(more_used(vq))) { END_USE(vq); return false; @@ -393,7 +399,7 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) /* TODO: tune this threshold */ bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4; vring_used_event(&vq->vring) = vq->last_used_idx + bufs; - virtio_mb(); + virtio_mb(vq); if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) { END_USE(vq); return false; @@ -453,6 +459,7 @@ EXPORT_SYMBOL_GPL(vring_interrupt); struct virtqueue *vring_new_virtqueue(unsigned int num, unsigned int vring_align, struct virtio_device *vdev, + bool weak_barriers, void *pages, void (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), @@ -476,6 +483,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, vq->vq.vdev = vdev; vq->vq.name = name; vq->notify = notify; + vq->weak_barriers = weak_barriers; vq->broken = false; vq->last_used_idx = 0; vq->num_added = 0; -- cgit v1.2.3 From 5dfc17628d57f9e62043ed0cba03a6e3eb019a78 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 12 Jan 2012 15:44:42 +1030 Subject: virtio: document functions better. The old documentation is left over from when we used a structure with strategy pointers. And move the documentation to the C file as per kernel practice. Though I disagree... Signed-off-by: Rusty Russell Reviewed-by: Christoph Hellwig --- drivers/virtio/virtio_ring.c | 92 +++++++++++++++++++++++++++++++++++++++++++- include/linux/virtio.h | 47 ---------------------- 2 files changed, 91 insertions(+), 48 deletions(-) (limited to 'drivers/virtio/virtio_ring.c') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 50da92046092..fe50486341a4 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -166,6 +166,23 @@ static int vring_add_indirect(struct vring_virtqueue *vq, return head; } +/** + * virtqueue_add_buf_gfp - expose buffer to other end + * @vq: the struct virtqueue we're talking about. + * @sg: the description of the buffer(s). + * @out_num: the number of sg readable by other side + * @in_num: the number of sg which are writable (after readable ones) + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns remaining capacity of queue or a negative error + * (ie. ENOSPC). Note that it only really makes sense to treat all + * positive return values as "available": indirect buffers mean that + * we can put an entire sg[] array inside a single queue entry. + */ int virtqueue_add_buf_gfp(struct virtqueue *_vq, struct scatterlist sg[], unsigned int out, @@ -244,6 +261,16 @@ add_head: } EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp); +/** + * virtqueue_kick - update after add_buf + * @vq: the struct virtqueue + * + * After one or more virtqueue_add_buf_gfp calls, invoke this to kick + * the other side. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ void virtqueue_kick(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -300,6 +327,22 @@ static inline bool more_used(const struct vring_virtqueue *vq) return vq->last_used_idx != vq->vring.used->idx; } +/** + * virtqueue_get_buf - get the next used buffer + * @vq: the struct virtqueue we're talking about. + * @len: the length written into the buffer + * + * If the driver wrote data into the buffer, @len will be set to the + * amount written. This means you don't need to clear the buffer + * beforehand to ensure there's no data leakage in the case of short + * writes. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + * + * Returns NULL if there are no used buffers, or the "data" token + * handed to virtqueue_add_buf_gfp(). + */ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -351,6 +394,15 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) } EXPORT_SYMBOL_GPL(virtqueue_get_buf); +/** + * virtqueue_disable_cb - disable callbacks + * @vq: the struct virtqueue we're talking about. + * + * Note that this is not necessarily synchronous, hence unreliable and only + * useful as an optimization. + * + * Unlike other operations, this need not be serialized. + */ void virtqueue_disable_cb(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -359,6 +411,17 @@ void virtqueue_disable_cb(struct virtqueue *_vq) } EXPORT_SYMBOL_GPL(virtqueue_disable_cb); +/** + * virtqueue_enable_cb - restart callbacks after disable_cb. + * @vq: the struct virtqueue we're talking about. + * + * This re-enables callbacks; it returns "false" if there are pending + * buffers in the queue, to detect a possible race between the driver + * checking for more work, and enabling callbacks. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ bool virtqueue_enable_cb(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -383,6 +446,19 @@ bool virtqueue_enable_cb(struct virtqueue *_vq) } EXPORT_SYMBOL_GPL(virtqueue_enable_cb); +/** + * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. + * @vq: the struct virtqueue we're talking about. + * + * This re-enables callbacks but hints to the other side to delay + * interrupts until most of the available buffers have been processed; + * it returns "false" if there are many pending buffers in the queue, + * to detect a possible race between the driver checking for more work, + * and enabling callbacks. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -410,6 +486,14 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) } EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); +/** + * virtqueue_detach_unused_buf - detach first unused buffer + * @vq: the struct virtqueue we're talking about. + * + * Returns NULL or the "data" token handed to virtqueue_add_buf_gfp(). + * This is not valid on an active queue; it is useful only for device + * shutdown. + */ void *virtqueue_detach_unused_buf(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -538,7 +622,13 @@ void vring_transport_features(struct virtio_device *vdev) } EXPORT_SYMBOL_GPL(vring_transport_features); -/* return the size of the vring within the virtqueue */ +/** + * virtqueue_get_vring_size - return the size of the virtqueue's vring + * @vq: the struct virtqueue containing the vring of interest. + * + * Returns the size of the vring. This is mainly used for boasting to + * userspace. Unlike other operations, this need not be serialized. + */ unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) { diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 4c069d8bd740..73ad7243128f 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -25,53 +25,6 @@ struct virtqueue { void *priv; }; -/** - * operations for virtqueue - * virtqueue_add_buf: expose buffer to other end - * vq: the struct virtqueue we're talking about. - * sg: the description of the buffer(s). - * out_num: the number of sg readable by other side - * in_num: the number of sg which are writable (after readable ones) - * data: the token identifying the buffer. - * gfp: how to do memory allocations (if necessary). - * Returns remaining capacity of queue (sg segments) or a negative error. - * virtqueue_kick: update after add_buf - * vq: the struct virtqueue - * After one or more add_buf calls, invoke this to kick the other side. - * virtqueue_get_buf: get the next used buffer - * vq: the struct virtqueue we're talking about. - * len: the length written into the buffer - * Returns NULL or the "data" token handed to add_buf. - * virtqueue_disable_cb: disable callbacks - * vq: the struct virtqueue we're talking about. - * Note that this is not necessarily synchronous, hence unreliable and only - * useful as an optimization. - * virtqueue_enable_cb: restart callbacks after disable_cb. - * vq: the struct virtqueue we're talking about. - * This re-enables callbacks; it returns "false" if there are pending - * buffers in the queue, to detect a possible race between the driver - * checking for more work, and enabling callbacks. - * virtqueue_enable_cb_delayed: restart callbacks after disable_cb. - * vq: the struct virtqueue we're talking about. - * This re-enables callbacks but hints to the other side to delay - * interrupts until most of the available buffers have been processed; - * it returns "false" if there are many pending buffers in the queue, - * to detect a possible race between the driver checking for more work, - * and enabling callbacks. - * virtqueue_detach_unused_buf: detach first unused buffer - * vq: the struct virtqueue we're talking about. - * Returns NULL or the "data" token handed to add_buf - * virtqueue_get_vring_size: return the size of the virtqueue's vring - * vq: the struct virtqueue containing the vring of interest. - * Returns the size of the vring. - * - * Locking rules are straightforward: the driver is responsible for - * locking. No two operations may be invoked simultaneously, with the exception - * of virtqueue_disable_cb. - * - * All operations can be called in any context. - */ - int virtqueue_add_buf_gfp(struct virtqueue *vq, struct scatterlist sg[], unsigned int out_num, -- cgit v1.2.3 From f96fde41f7f9af6cf20f6a1919f5d9670f84d574 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 12 Jan 2012 15:44:42 +1030 Subject: virtio: rename virtqueue_add_buf_gfp to virtqueue_add_buf Remove wrapper functions. This makes the allocation type explicit in all callers; I used GPF_KERNEL where it seemed obvious, left it at GFP_ATOMIC otherwise. Signed-off-by: Rusty Russell Reviewed-by: Christoph Hellwig --- drivers/block/virtio_blk.c | 2 +- drivers/char/hw_random/virtio-rng.c | 2 +- drivers/char/virtio_console.c | 6 +++--- drivers/net/virtio_net.c | 12 ++++++------ drivers/virtio/virtio_balloon.c | 7 ++++--- drivers/virtio/virtio_ring.c | 22 +++++++++++----------- include/linux/virtio.h | 21 ++++++--------------- net/9p/trans_virtio.c | 6 ++++-- tools/virtio/linux/virtio.h | 21 ++++++--------------- tools/virtio/virtio_test.c | 3 ++- 10 files changed, 44 insertions(+), 58 deletions(-) (limited to 'drivers/virtio/virtio_ring.c') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 4d0b70adf5f7..a345e40e1bca 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -172,7 +172,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, } } - if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) { + if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr, GFP_ATOMIC)<0) { mempool_free(vbr, vblk->pool); return false; } diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index fd699ccecf5b..723725bbb96b 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -47,7 +47,7 @@ static void register_buffer(u8 *buf, size_t size) sg_init_one(&sg, buf, size); /* There should always be room for one buffer. */ - if (virtqueue_add_buf(vq, &sg, 0, 1, buf) < 0) + if (virtqueue_add_buf(vq, &sg, 0, 1, buf, GFP_KERNEL) < 0) BUG(); virtqueue_kick(vq); diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 8e3c46d67cb3..d1ae1492ee78 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -392,7 +392,7 @@ static int add_inbuf(struct virtqueue *vq, struct port_buffer *buf) sg_init_one(sg, buf->buf, buf->size); - ret = virtqueue_add_buf(vq, sg, 0, 1, buf); + ret = virtqueue_add_buf(vq, sg, 0, 1, buf, GFP_ATOMIC); virtqueue_kick(vq); return ret; } @@ -457,7 +457,7 @@ static ssize_t __send_control_msg(struct ports_device *portdev, u32 port_id, vq = portdev->c_ovq; sg_init_one(sg, &cpkt, sizeof(cpkt)); - if (virtqueue_add_buf(vq, sg, 1, 0, &cpkt) >= 0) { + if (virtqueue_add_buf(vq, sg, 1, 0, &cpkt, GFP_ATOMIC) >= 0) { virtqueue_kick(vq); while (!virtqueue_get_buf(vq, &len)) cpu_relax(); @@ -506,7 +506,7 @@ static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count, reclaim_consumed_buffers(port); sg_init_one(sg, in_buf, in_count); - ret = virtqueue_add_buf(out_vq, sg, 1, 0, in_buf); + ret = virtqueue_add_buf(out_vq, sg, 1, 0, in_buf, GFP_ATOMIC); /* Tell Host to go! */ virtqueue_kick(out_vq); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 76fe14efb2b5..6345a52194f9 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -370,7 +370,7 @@ static int add_recvbuf_small(struct virtnet_info *vi, gfp_t gfp) skb_to_sgvec(skb, vi->rx_sg + 1, 0, skb->len); - err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, 2, skb, gfp); + err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, 2, skb, gfp); if (err < 0) dev_kfree_skb(skb); @@ -415,8 +415,8 @@ static int add_recvbuf_big(struct virtnet_info *vi, gfp_t gfp) /* chain first in list head */ first->private = (unsigned long)list; - err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, MAX_SKB_FRAGS + 2, - first, gfp); + err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, MAX_SKB_FRAGS + 2, + first, gfp); if (err < 0) give_pages(vi, first); @@ -434,7 +434,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, gfp_t gfp) sg_init_one(vi->rx_sg, page_address(page), PAGE_SIZE); - err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, 1, page, gfp); + err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, 1, page, gfp); if (err < 0) give_pages(vi, page); @@ -609,7 +609,7 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) hdr->num_sg = skb_to_sgvec(skb, vi->tx_sg + 1, 0, skb->len) + 1; return virtqueue_add_buf(vi->svq, vi->tx_sg, hdr->num_sg, - 0, skb); + 0, skb, GFP_ATOMIC); } static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) @@ -767,7 +767,7 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, sg_set_buf(&sg[i + 1], sg_virt(s), s->length); sg_set_buf(&sg[out + in - 1], &status, sizeof(status)); - BUG_ON(virtqueue_add_buf(vi->cvq, sg, out, in, vi) < 0); + BUG_ON(virtqueue_add_buf(vi->cvq, sg, out, in, vi, GFP_ATOMIC) < 0); virtqueue_kick(vi->cvq); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index f64ff185b8b5..0a6425aadf95 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -88,7 +88,7 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq) init_completion(&vb->acked); /* We should always be able to add one buffer to an empty queue. */ - if (virtqueue_add_buf(vq, &sg, 1, 0, vb) < 0) + if (virtqueue_add_buf(vq, &sg, 1, 0, vb, GFP_KERNEL) < 0) BUG(); virtqueue_kick(vq); @@ -220,7 +220,7 @@ static void stats_handle_request(struct virtio_balloon *vb) vq = vb->stats_vq; sg_init_one(&sg, vb->stats, sizeof(vb->stats)); - if (virtqueue_add_buf(vq, &sg, 1, 0, vb) < 0) + if (virtqueue_add_buf(vq, &sg, 1, 0, vb, GFP_KERNEL) < 0) BUG(); virtqueue_kick(vq); } @@ -313,7 +313,8 @@ static int virtballoon_probe(struct virtio_device *vdev) * use it to signal us later. */ sg_init_one(&sg, vb->stats, sizeof vb->stats); - if (virtqueue_add_buf(vb->stats_vq, &sg, 1, 0, vb) < 0) + if (virtqueue_add_buf(vb->stats_vq, &sg, 1, 0, vb, GFP_KERNEL) + < 0) BUG(); virtqueue_kick(vb->stats_vq); } diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index fe50486341a4..6ea92a6d1134 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -167,7 +167,7 @@ static int vring_add_indirect(struct vring_virtqueue *vq, } /** - * virtqueue_add_buf_gfp - expose buffer to other end + * virtqueue_add_buf - expose buffer to other end * @vq: the struct virtqueue we're talking about. * @sg: the description of the buffer(s). * @out_num: the number of sg readable by other side @@ -183,12 +183,12 @@ static int vring_add_indirect(struct vring_virtqueue *vq, * positive return values as "available": indirect buffers mean that * we can put an entire sg[] array inside a single queue entry. */ -int virtqueue_add_buf_gfp(struct virtqueue *_vq, - struct scatterlist sg[], - unsigned int out, - unsigned int in, - void *data, - gfp_t gfp) +int virtqueue_add_buf(struct virtqueue *_vq, + struct scatterlist sg[], + unsigned int out, + unsigned int in, + void *data, + gfp_t gfp) { struct vring_virtqueue *vq = to_vvq(_vq); unsigned int i, avail, uninitialized_var(prev); @@ -259,13 +259,13 @@ add_head: return vq->num_free; } -EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp); +EXPORT_SYMBOL_GPL(virtqueue_add_buf); /** * virtqueue_kick - update after add_buf * @vq: the struct virtqueue * - * After one or more virtqueue_add_buf_gfp calls, invoke this to kick + * After one or more virtqueue_add_buf calls, invoke this to kick * the other side. * * Caller must ensure we don't call this with other virtqueue @@ -341,7 +341,7 @@ static inline bool more_used(const struct vring_virtqueue *vq) * operations at the same time (except where noted). * * Returns NULL if there are no used buffers, or the "data" token - * handed to virtqueue_add_buf_gfp(). + * handed to virtqueue_add_buf(). */ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) { @@ -490,7 +490,7 @@ EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); * virtqueue_detach_unused_buf - detach first unused buffer * @vq: the struct virtqueue we're talking about. * - * Returns NULL or the "data" token handed to virtqueue_add_buf_gfp(). + * Returns NULL or the "data" token handed to virtqueue_add_buf(). * This is not valid on an active queue; it is useful only for device * shutdown. */ diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 73ad7243128f..ec1706e7df50 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -25,21 +25,12 @@ struct virtqueue { void *priv; }; -int virtqueue_add_buf_gfp(struct virtqueue *vq, - struct scatterlist sg[], - unsigned int out_num, - unsigned int in_num, - void *data, - gfp_t gfp); - -static inline int virtqueue_add_buf(struct virtqueue *vq, - struct scatterlist sg[], - unsigned int out_num, - unsigned int in_num, - void *data) -{ - return virtqueue_add_buf_gfp(vq, sg, out_num, in_num, data, GFP_ATOMIC); -} +int virtqueue_add_buf(struct virtqueue *vq, + struct scatterlist sg[], + unsigned int out_num, + unsigned int in_num, + void *data, + gfp_t gfp); void virtqueue_kick(struct virtqueue *vq); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 330421e54713..3d432068f627 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -272,7 +272,8 @@ req_retry: in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity); - err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); + err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc, + GFP_ATOMIC); if (err < 0) { if (err == -ENOSPC) { chan->ring_bufs_avail = 0; @@ -414,7 +415,8 @@ req_retry_pinned: in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM, in_pages, in_nr_pages, uidata, inlen); - err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); + err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc, + GFP_ATOMIC); if (err < 0) { if (err == -ENOSPC) { chan->ring_bufs_avail = 0; diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h index 953db2abf6b9..b4fbc91c41b4 100644 --- a/tools/virtio/linux/virtio.h +++ b/tools/virtio/linux/virtio.h @@ -186,21 +186,12 @@ struct virtqueue { #endif /* Interfaces exported by virtio_ring. */ -int virtqueue_add_buf_gfp(struct virtqueue *vq, - struct scatterlist sg[], - unsigned int out_num, - unsigned int in_num, - void *data, - gfp_t gfp); - -static inline int virtqueue_add_buf(struct virtqueue *vq, - struct scatterlist sg[], - unsigned int out_num, - unsigned int in_num, - void *data) -{ - return virtqueue_add_buf_gfp(vq, sg, out_num, in_num, data, GFP_ATOMIC); -} +int virtqueue_add_buf(struct virtqueue *vq, + struct scatterlist sg[], + unsigned int out_num, + unsigned int in_num, + void *data, + gfp_t gfp); void virtqueue_kick(struct virtqueue *vq); diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index 0740284396c1..6bf95f995364 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -161,7 +161,8 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs) if (started < bufs) { sg_init_one(&sl, dev->buf, dev->buf_size); r = virtqueue_add_buf(vq->vq, &sl, 1, 0, - dev->buf + started); + dev->buf + started, + GFP_ATOMIC); if (likely(r >= 0)) { ++started; virtqueue_kick(vq->vq); -- cgit v1.2.3 From 41f0377f73039ca6fe97a469d1941a89cd9757f1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 12 Jan 2012 15:44:43 +1030 Subject: virtio: support unlocked queue kick Based on patch by Christoph for virtio_blk speedup: Split virtqueue_kick to be able to do the actual notification outside the lock protecting the virtqueue. This patch was originally done by Stefan Hajnoczi, but I can't find the original one anymore and had to recreated it from memory. Pointers to the original or corrections for the commit message are welcome. Stefan's patch was here: https://github.com/stefanha/linux/commit/a6d06644e3a58e57a774e77d7dc34c4a5a2e7496 http://www.spinics.net/lists/linux-virtualization/msg14616.html Third time's the charm! Signed-off-by: Rusty Russell --- drivers/virtio/virtio_ring.c | 60 +++++++++++++++++++++++++++++++++++--------- include/linux/virtio.h | 4 +++ 2 files changed, 52 insertions(+), 12 deletions(-) (limited to 'drivers/virtio/virtio_ring.c') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 6ea92a6d1134..c56bbe799241 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -262,19 +262,22 @@ add_head: EXPORT_SYMBOL_GPL(virtqueue_add_buf); /** - * virtqueue_kick - update after add_buf + * virtqueue_kick_prepare - first half of split virtqueue_kick call. * @vq: the struct virtqueue * - * After one or more virtqueue_add_buf calls, invoke this to kick - * the other side. + * Instead of virtqueue_kick(), you can do: + * if (virtqueue_kick_prepare(vq)) + * virtqueue_notify(vq); * - * Caller must ensure we don't call this with other virtqueue - * operations at the same time (except where noted). + * This is sometimes useful because the virtqueue_kick_prepare() needs + * to be serialized, but the actual virtqueue_notify() call does not. */ -void virtqueue_kick(struct virtqueue *_vq) +bool virtqueue_kick_prepare(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); u16 new, old; + bool needs_kick; + START_USE(vq); /* Descriptors and available array need to be set before we expose the * new available array entries. */ @@ -287,13 +290,46 @@ void virtqueue_kick(struct virtqueue *_vq) /* Need to update avail index before checking if we should notify */ virtio_mb(vq); - if (vq->event ? - vring_need_event(vring_avail_event(&vq->vring), new, old) : - !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) - /* Prod other side to tell it about changes. */ - vq->notify(&vq->vq); - + if (vq->event) { + needs_kick = vring_need_event(vring_avail_event(&vq->vring), + new, old); + } else { + needs_kick = !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY); + } END_USE(vq); + return needs_kick; +} +EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); + +/** + * virtqueue_notify - second half of split virtqueue_kick call. + * @vq: the struct virtqueue + * + * This does not need to be serialized. + */ +void virtqueue_notify(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + /* Prod other side to tell it about changes. */ + vq->notify(_vq); +} +EXPORT_SYMBOL_GPL(virtqueue_notify); + +/** + * virtqueue_kick - update after add_buf + * @vq: the struct virtqueue + * + * After one or more virtqueue_add_buf calls, invoke this to kick + * the other side. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ +void virtqueue_kick(struct virtqueue *vq) +{ + if (virtqueue_kick_prepare(vq)) + virtqueue_notify(vq); } EXPORT_SYMBOL_GPL(virtqueue_kick); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index ec1706e7df50..31fe3a62874b 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -34,6 +34,10 @@ int virtqueue_add_buf(struct virtqueue *vq, void virtqueue_kick(struct virtqueue *vq); +bool virtqueue_kick_prepare(struct virtqueue *vq); + +void virtqueue_notify(struct virtqueue *vq); + void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); void virtqueue_disable_cb(struct virtqueue *vq); -- cgit v1.2.3 From 3b720b8c865098c49c1570b6b5c7832bcfa6e6c2 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 12 Jan 2012 15:44:43 +1030 Subject: virtio: avoid modulus operation. Since we know vq->vring.num is a power of 2, modulus is lazy (it's asserted in vring_new_virtqueue()). Signed-off-by: Rusty Russell --- drivers/virtio/virtio_ring.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/virtio/virtio_ring.c') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index c56bbe799241..99dc9480f3fe 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -250,8 +250,8 @@ add_head: vq->data[head] = data; /* Put entry in available array (but don't update avail->idx until they - * do sync). FIXME: avoid modulus here? */ - avail = (vq->vring.avail->idx + vq->num_added++) % vq->vring.num; + * do sync). */ + avail = ((vq->vring.avail->idx + vq->num_added++) & (vq->vring.num-1)); vq->vring.avail->ring[avail] = head; pr_debug("Added buffer head %i to %p\n", head, vq); @@ -384,6 +384,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) struct vring_virtqueue *vq = to_vvq(_vq); void *ret; unsigned int i; + u16 last_used; START_USE(vq); @@ -401,8 +402,9 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) /* Only get used array entries after they have been exposed by host. */ virtio_rmb(vq); - i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id; - *len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len; + last_used = (vq->last_used_idx & (vq->vring.num - 1)); + i = vq->vring.used->ring[last_used].id; + *len = vq->vring.used->ring[last_used].len; if (unlikely(i >= vq->vring.num)) { BAD_RING(vq, "id %u out of range\n", i); -- cgit v1.2.3 From ee7cd8981e15bcb365fc762afe3fc47b8242f630 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 12 Jan 2012 15:44:43 +1030 Subject: virtio: expose added descriptors immediately. A virtio driver does virtqueue_add_buf() multiple times before finally calling virtqueue_kick(); previously we only exposed the added buffers in the virtqueue_kick() call. This means we don't need a memory barrier in virtqueue_add_buf(), but it reduces concurrency as the device (ie. host) can't see the buffers until the kick. In the unusual (but now possible) case where a driver does add_buf() and get_buf() without doing a kick, we do need to insert one before our counter wraps. Otherwise we could wrap num_added, and later on not realize that we have passed the marker where we should have kicked. Signed-off-by: Rusty Russell --- drivers/virtio/virtio_ring.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'drivers/virtio/virtio_ring.c') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 99dc9480f3fe..36bb6a613728 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -251,9 +251,20 @@ add_head: /* Put entry in available array (but don't update avail->idx until they * do sync). */ - avail = ((vq->vring.avail->idx + vq->num_added++) & (vq->vring.num-1)); + avail = (vq->vring.avail->idx & (vq->vring.num-1)); vq->vring.avail->ring[avail] = head; + /* Descriptors and available array need to be set before we expose the + * new available array entries. */ + virtio_wmb(vq); + vq->vring.avail->idx++; + vq->num_added++; + + /* This is very unlikely, but theoretically possible. Kick + * just in case. */ + if (unlikely(vq->num_added == (1 << 16) - 1)) + virtqueue_kick(_vq); + pr_debug("Added buffer head %i to %p\n", head, vq); END_USE(vq); @@ -283,13 +294,10 @@ bool virtqueue_kick_prepare(struct virtqueue *_vq) * new available array entries. */ virtio_wmb(vq); - old = vq->vring.avail->idx; - new = vq->vring.avail->idx = old + vq->num_added; + old = vq->vring.avail->idx - vq->num_added; + new = vq->vring.avail->idx; vq->num_added = 0; - /* Need to update avail index before checking if we should notify */ - virtio_mb(vq); - if (vq->event) { needs_kick = vring_need_event(vring_avail_event(&vq->vring), new, old); -- cgit v1.2.3 From e93300b1afc7cd4fe1e741ceaf06714d060e88b8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 12 Jan 2012 15:44:43 +1030 Subject: virtio: add debugging if driver doesn't kick. Under the existing #ifdef DEBUG, check that they don't have more than 1/10 of a second between an add_buf() and a virtqueue_notify()/virtqueue_kick_prepare() call. We could get false positives on a really busy system, but good for development. Signed-off-by: Rusty Russell --- drivers/virtio/virtio_ring.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'drivers/virtio/virtio_ring.c') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 36bb6a613728..79e1b292c030 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -22,6 +22,7 @@ #include #include #include +#include /* virtio guest is communicating with a virtual "device" that actually runs on * a host processor. Memory barriers are used to control SMP effects. */ @@ -108,6 +109,10 @@ struct vring_virtqueue #ifdef DEBUG /* They're supposed to lock for us. */ unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; #endif /* Tokens for callbacks. */ @@ -198,6 +203,19 @@ int virtqueue_add_buf(struct virtqueue *_vq, BUG_ON(data == NULL); +#ifdef DEBUG + { + ktime_t now = ktime_get(); + + /* No kick or get, with .1 second between? Warn. */ + if (vq->last_add_time_valid) + WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time)) + > 100); + vq->last_add_time = now; + vq->last_add_time_valid = true; + } +#endif + /* If the host supports indirect descriptor tables, and we have multiple * buffers, then go indirect. FIXME: tune this threshold */ if (vq->indirect && (out + in) > 1 && vq->num_free) { @@ -298,6 +316,14 @@ bool virtqueue_kick_prepare(struct virtqueue *_vq) new = vq->vring.avail->idx; vq->num_added = 0; +#ifdef DEBUG + if (vq->last_add_time_valid) { + WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), + vq->last_add_time)) > 100); + } + vq->last_add_time_valid = false; +#endif + if (vq->event) { needs_kick = vring_need_event(vring_avail_event(&vq->vring), new, old); @@ -435,6 +461,10 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) virtio_mb(vq); } +#ifdef DEBUG + vq->last_add_time_valid = false; +#endif + END_USE(vq); return ret; } @@ -620,6 +650,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, list_add_tail(&vq->vq.list, &vdev->vqs); #ifdef DEBUG vq->in_use = false; + vq->last_add_time_valid = false; #endif vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC); -- cgit v1.2.3 From 4dbc5d9f4f791df8a5879f4a655f517adc7f56d1 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 20 Jan 2012 16:16:59 +0800 Subject: virtio: fix typos of memory barriers Note: this fixes a bug introduced recently in 7b21e34fd1c272e3a8c3846168f2f6287a4cd72b. Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/virtio/virtio_ring.c') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 79e1b292c030..78428a863b46 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -35,7 +35,7 @@ #define virtio_rmb(vq) \ do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0) #define virtio_wmb(vq) \ - do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0) + do { if ((vq)->weak_barriers) smp_wmb(); else wmb(); } while(0) #else /* We must force memory ordering even if guest is UP since host could be * running on another CPU, but SMP barriers are defined to barrier() in that -- cgit v1.2.3 From a72caae21803b74e04e2afda5e035f149d4ea118 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 20 Jan 2012 16:17:08 +0800 Subject: virtio: correct the memory barrier in virtqueue_kick_prepare() Use virtio_mb() to make sure the available index to be exposed before checking the the avail event. Otherwise we may get stale value of avail event in guest and never kick the host after. Note: this fixes a bug introduced by ee7cd8981e15bcb365fc762afe3fc47b8242f630. Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: Rusty Russell Cc: stable@kernel.org --- drivers/virtio/virtio_ring.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/virtio/virtio_ring.c') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 78428a863b46..5aa43c3392a2 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -308,9 +308,9 @@ bool virtqueue_kick_prepare(struct virtqueue *_vq) bool needs_kick; START_USE(vq); - /* Descriptors and available array need to be set before we expose the - * new available array entries. */ - virtio_wmb(vq); + /* We need to expose available array entries before checking avail + * event. */ + virtio_mb(vq); old = vq->vring.avail->idx - vq->num_added; new = vq->vring.avail->idx; -- cgit v1.2.3