diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2010-02-04 14:20:37 +1100 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2010-02-04 14:20:37 +1100 |
commit | 7ab06f14c76d9a4638df1c12f0c576b38d757b8f (patch) | |
tree | d7940475c52a7deff40f2499deb28eefa866407b | |
parent | 62e02dbb82baf1a56595188bf25b7b32b00adf34 (diff) | |
parent | e12c6060acad557bdb604412623bbb2521eb4f61 (diff) |
Merge branch 'quilt/rr'
Conflicts:
drivers/char/hvc_console.c
drivers/char/hvc_console.h
-rw-r--r-- | Documentation/cpu-hotplug.txt | 2 | ||||
-rw-r--r-- | Documentation/lguest/lguest.c | 1 | ||||
-rw-r--r-- | drivers/char/virtio_console.c | 664 | ||||
-rw-r--r-- | drivers/virtio/virtio_balloon.c | 108 | ||||
-rw-r--r-- | drivers/virtio/virtio_pci.c | 2 | ||||
-rw-r--r-- | drivers/virtio/virtio_ring.c | 29 | ||||
-rw-r--r-- | include/linux/gfp.h | 2 | ||||
-rw-r--r-- | include/linux/kernel.h | 82 | ||||
-rw-r--r-- | include/linux/kmemcheck.h | 2 | ||||
-rw-r--r-- | include/linux/virtio_balloon.h | 15 | ||||
-rw-r--r-- | include/linux/virtio_config.h | 5 | ||||
-rw-r--r-- | include/linux/virtio_console.h | 6 |
12 files changed, 716 insertions, 202 deletions
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index a99d7031cdf9..45d5a217484f 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt @@ -2,7 +2,7 @@ Maintainers: CPU Hotplug Core: - Rusty Russell <rusty@rustycorp.com.au> + Rusty Russell <rusty@rustcorp.com.au> Srivatsa Vaddagiri <vatsa@in.ibm.com> i386: Zwane Mwaikambo <zwane@arm.linux.org.uk> diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 42208511b5c0..3119f5db75bd 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -34,7 +34,6 @@ #include <sys/uio.h> #include <termios.h> #include <getopt.h> -#include <zlib.h> #include <assert.h> #include <sched.h> #include <limits.h> diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index a035ae39a359..5096d92f5b89 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1,18 +1,5 @@ -/*D:300 - * The Guest console driver - * - * Writing console drivers is one of the few remaining Dark Arts in Linux. - * Fortunately for us, the path of virtual consoles has been well-trodden by - * the PowerPC folks, who wrote "hvc_console.c" to generically support any - * virtual console. We use that infrastructure which only requires us to write - * the basic put_chars and get_chars functions and call the right register - * functions. - :*/ - -/*M:002 The console can be flooded: while the Guest is processing input the - * Host can send more. Buffering in the Host could alleviate this, but it is a - * difficult problem in general. :*/ -/* Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation +/* + * Copyright (C) 2006, 2007, 2009 Rusty Russell, IBM Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -30,45 +17,289 @@ */ #include <linux/err.h> #include <linux/init.h> +#include <linux/list.h> +#include <linux/spinlock.h> #include <linux/virtio.h> #include <linux/virtio_console.h> #include "hvc_console.h" -/*D:340 These represent our input and output console queues, and the virtio - * operations for them. */ -static struct virtqueue *in_vq, *out_vq; -static struct virtio_device *vdev; +/* + * This is a global struct for storing common data for all the devices + * this driver handles. + * + * Mainly, it has a linked list for all the consoles in one place so + * that callbacks from hvc for get_chars(), put_chars() work properly + * across multiple devices and multiple ports per device. + */ +struct ports_driver_data { + /* + * This is used to keep track of the number of hvc consoles + * spawned by this driver. This number is given as the first + * argument to hvc_alloc(). To correctly map an initial + * console spawned via hvc_instantiate to the console being + * hooked up via hvc_alloc, we need to pass the same vtermno. + * + * We also just assume the first console being initialised was + * the first one that got used as the initial console. + */ + unsigned int next_vtermno; + + /* All the console devices handled by this driver */ + struct list_head consoles; +}; +static struct ports_driver_data pdrvdata; + +DEFINE_SPINLOCK(pdrvdata_lock); + +/* This struct holds information that's relevant only for console ports */ +struct console { + /* We'll place all consoles in a list in the pdrvdata struct */ + struct list_head list; + + /* The hvc device associated with this console port */ + struct hvc_struct *hvc; + + /* + * This number identifies the number that we used to register + * with hvc in hvc_instantiate() and hvc_alloc(); this is the + * number passed on by the hvc callbacks to us to + * differentiate between the other console ports handled by + * this driver + */ + u32 vtermno; +}; + +/* + * This is a per-device struct that stores data common to all the + * ports for that device (vdev->priv). + */ +struct ports_device { + /* Array of per-port IO virtqueues */ + struct virtqueue **in_vqs, **out_vqs; + + struct virtio_device *vdev; +}; + +struct port_buffer { + char *buf; + + /* size of the buffer in *buf above */ + size_t size; + + /* used length of the buffer */ + size_t len; + /* offset in the buf from which to consume data */ + size_t offset; +}; + +/* This struct holds the per-port data */ +struct port { + /* Pointer to the parent virtio_console device */ + struct ports_device *portdev; + + /* The current buffer from which data has to be fed to readers */ + struct port_buffer *inbuf; + + /* + * To protect the operations on the in_vq associated with this + * port. Has to be a spinlock because it can be called from + * interrupt context (get_char()). + */ + spinlock_t inbuf_lock; + + /* The IO vqs for this port */ + struct virtqueue *in_vq, *out_vq; + + /* + * The entries in this struct will be valid if this port is + * hooked up to an hvc console + */ + struct console cons; +}; + +/* This is the very early arch-specified put chars function. */ +static int (*early_put_chars)(u32, const char *, int); + +static struct port *find_port_by_vtermno(u32 vtermno) +{ + struct port *port; + struct console *cons; + unsigned long flags; + + spin_lock_irqsave(&pdrvdata_lock, flags); + list_for_each_entry(cons, &pdrvdata.consoles, list) { + if (cons->vtermno == vtermno) { + port = container_of(cons, struct port, cons); + goto out; + } + } + port = NULL; +out: + spin_unlock_irqrestore(&pdrvdata_lock, flags); + return port; +} + +static struct port *find_port_by_vq(struct ports_device *portdev, + struct virtqueue *vq) +{ + struct port *port; + struct console *cons; + unsigned long flags; + + spin_lock_irqsave(&pdrvdata_lock, flags); + list_for_each_entry(cons, &pdrvdata.consoles, list) { + port = container_of(cons, struct port, cons); + if (port->in_vq == vq || port->out_vq == vq) + goto out; + } + port = NULL; +out: + spin_unlock_irqrestore(&pdrvdata_lock, flags); + return port; +} + +static void free_buf(struct port_buffer *buf) +{ + kfree(buf->buf); + kfree(buf); +} + +static struct port_buffer *alloc_buf(size_t buf_size) +{ + struct port_buffer *buf; + + buf = kmalloc(sizeof(*buf), GFP_KERNEL); + if (!buf) + goto fail; + buf->buf = kzalloc(buf_size, GFP_KERNEL); + if (!buf->buf) + goto free_buf; + buf->len = 0; + buf->offset = 0; + buf->size = buf_size; + return buf; + +free_buf: + kfree(buf); +fail: + return NULL; +} + +/* Callers should take appropriate locks */ +static void *get_inbuf(struct port *port) +{ + struct port_buffer *buf; + struct virtqueue *vq; + unsigned int len; + + vq = port->in_vq; + buf = vq->vq_ops->get_buf(vq, &len); + if (buf) { + buf->len = len; + buf->offset = 0; + } + return buf; +} + +/* + * Create a scatter-gather list representing our input buffer and put + * it in the queue. + * + * Callers should take appropriate locks. + */ +static int add_inbuf(struct virtqueue *vq, struct port_buffer *buf) +{ + struct scatterlist sg[1]; + int ret; + + sg_init_one(sg, buf->buf, buf->size); + + ret = vq->vq_ops->add_buf(vq, sg, 0, 1, buf); + vq->vq_ops->kick(vq); + return ret; +} -/* This is our input buffer, and how much data is left in it. */ -static unsigned int in_len; -static char *in, *inbuf; +static bool port_has_data(struct port *port) +{ + unsigned long flags; + bool ret; -/* The operations for our console. */ -static struct hv_ops virtio_cons; + ret = false; + spin_lock_irqsave(&port->inbuf_lock, flags); + if (port->inbuf) + ret = true; + spin_unlock_irqrestore(&port->inbuf_lock, flags); -/* The hvc device */ -static struct hvc_struct *hvc; + return ret; +} -/*D:310 The put_chars() callback is pretty straightforward. +/* + * Give out the data that's requested from the buffer that we have + * queued up. + */ +static ssize_t fill_readbuf(struct port *port, char *out_buf, size_t out_count) +{ + struct port_buffer *buf; + unsigned long flags; + + if (!out_count || !port_has_data(port)) + return 0; + + buf = port->inbuf; + if (out_count > buf->len - buf->offset) + out_count = buf->len - buf->offset; + + memcpy(out_buf, buf->buf + buf->offset, out_count); + + /* Return the number of bytes actually copied */ + buf->offset += out_count; + + if (buf->offset == buf->len) { + /* + * We're done using all the data in this buffer. + * Re-queue so that the Host can send us more data. + */ + spin_lock_irqsave(&port->inbuf_lock, flags); + port->inbuf = NULL; + + if (add_inbuf(port->in_vq, buf) < 0) + dev_warn(&port->portdev->vdev->dev, "failed add_buf\n"); + + spin_unlock_irqrestore(&port->inbuf_lock, flags); + } + return out_count; +} + +/* + * The put_chars() callback is pretty straightforward. * - * We turn the characters into a scatter-gather list, add it to the output - * queue and then kick the Host. Then we sit here waiting for it to finish: - * inefficient in theory, but in practice implementations will do it - * immediately (lguest's Launcher does). */ + * We turn the characters into a scatter-gather list, add it to the + * output queue and then kick the Host. Then we sit here waiting for + * it to finish: inefficient in theory, but in practice + * implementations will do it immediately (lguest's Launcher does). + */ static int put_chars(u32 vtermno, const char *buf, int count) { struct scatterlist sg[1]; + struct port *port; + struct virtqueue *out_vq; unsigned int len; + port = find_port_by_vtermno(vtermno); + if (!port) + return 0; + + if (unlikely(early_put_chars)) + return early_put_chars(vtermno, buf, count); + + out_vq = port->out_vq; /* This is a convenient routine to initialize a single-elem sg list */ sg_init_one(sg, buf, count); - /* add_buf wants a token to identify this buffer: we hand it any - * non-NULL pointer, since there's only ever one buffer. */ - if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, (void *)1) >= 0) { + /* This shouldn't fail: if it does, we lose chars. */ + if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, port) >= 0) { /* Tell Host to go! */ out_vq->vq_ops->kick(out_vq); - /* Chill out until it's done with the buffer. */ while (!out_vq->vq_ops->get_buf(out_vq, &len)) cpu_relax(); } @@ -77,93 +308,60 @@ static int put_chars(u32 vtermno, const char *buf, int count) return count; } -/* Create a scatter-gather list representing our input buffer and put it in the - * queue. */ -static void add_inbuf(void) -{ - struct scatterlist sg[1]; - sg_init_one(sg, inbuf, PAGE_SIZE); - - /* We should always be able to add one buffer to an empty queue. */ - if (in_vq->vq_ops->add_buf(in_vq, sg, 0, 1, inbuf) < 0) - BUG(); - in_vq->vq_ops->kick(in_vq); -} - -/*D:350 get_chars() is the callback from the hvc_console infrastructure when - * an interrupt is received. +/* + * get_chars() is the callback from the hvc_console infrastructure + * when an interrupt is received. * - * Most of the code deals with the fact that the hvc_console() infrastructure - * only asks us for 16 bytes at a time. We keep in_offset and in_used fields - * for partially-filled buffers. */ + * We call out to fill_readbuf that gets us the required data from the + * buffers that are queued up. + */ static int get_chars(u32 vtermno, char *buf, int count) { - /* If we don't have an input queue yet, we can't get input. */ - BUG_ON(!in_vq); + struct port *port; - /* No buffer? Try to get one. */ - if (!in_len) { - in = in_vq->vq_ops->get_buf(in_vq, &in_len); - if (!in) - return 0; - } + port = find_port_by_vtermno(vtermno); + if (!port) + return 0; - /* You want more than we have to give? Well, try wanting less! */ - if (in_len < count) - count = in_len; - - /* Copy across to their buffer and increment offset. */ - memcpy(buf, in, count); - in += count; - in_len -= count; - - /* Finished? Re-register buffer so Host will use it again. */ - if (in_len == 0) - add_inbuf(); + /* If we don't have an input queue yet, we can't get input. */ + BUG_ON(!port->in_vq); - return count; + return fill_readbuf(port, buf, count); } -/*:*/ -/*D:320 Console drivers are initialized very early so boot messages can go out, - * so we do things slightly differently from the generic virtio initialization - * of the net and block drivers. - * - * At this stage, the console is output-only. It's too early to set up a - * virtqueue, so we let the drivers do some boutique early-output thing. */ -int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int)) -{ - virtio_cons.put_chars = put_chars; - return hvc_instantiate(0, 0, &virtio_cons); -} - -/* - * virtio console configuration. This supports: - * - console resize - */ -static void virtcons_apply_config(struct virtio_device *dev) +static void resize_console(struct port *port) { + struct virtio_device *vdev; struct winsize ws; - if (virtio_has_feature(dev, VIRTIO_CONSOLE_F_SIZE)) { - dev->config->get(dev, - offsetof(struct virtio_console_config, cols), - &ws.ws_col, sizeof(u16)); - dev->config->get(dev, - offsetof(struct virtio_console_config, rows), - &ws.ws_row, sizeof(u16)); - hvc_resize(hvc, ws); + vdev = port->portdev->vdev; + if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_SIZE)) { + vdev->config->get(vdev, + offsetof(struct virtio_console_config, cols), + &ws.ws_col, sizeof(u16)); + vdev->config->get(vdev, + offsetof(struct virtio_console_config, rows), + &ws.ws_row, sizeof(u16)); + hvc_resize(port->cons.hvc, ws); } } -/* - * we support only one console, the hvc struct is a global var - * We set the configuration at this point, since we now have a tty - */ +static void virtcons_apply_config(struct virtio_device *vdev) +{ + resize_console(find_port_by_vtermno(0)); +} + +/* We set the configuration at this point, since we now have a tty */ static int notifier_add_vio(struct hvc_struct *hp, int data) { + struct port *port; + + port = find_port_by_vtermno(hp->vtermno); + if (!port) + return -EINVAL; + hp->irq_requested = 1; - virtcons_apply_config(vdev); + resize_console(port); return 0; } @@ -175,73 +373,241 @@ static void notifier_del_vio(struct hvc_struct *hp, int data) static void hvc_handle_input(struct virtqueue *vq) { - if (hvc_poll(hvc)) + struct port *port; + unsigned long flags; + + port = find_port_by_vq(vq->vdev->priv, vq); + if (!port) + return; + + spin_lock_irqsave(&port->inbuf_lock, flags); + port->inbuf = get_inbuf(port); + spin_unlock_irqrestore(&port->inbuf_lock, flags); + + if (hvc_poll(port->cons.hvc)) hvc_kick(); } -/*D:370 Once we're further in boot, we get probed like any other virtio device. - * At this stage we set up the output virtqueue. - * - * To set up and manage our virtual console, we call hvc_alloc(). Since we - * never remove the console device we never need this pointer again. +/* The operations for the console. */ +static const struct hv_ops hv_ops = { + .get_chars = get_chars, + .put_chars = put_chars, + .notifier_add = notifier_add_vio, + .notifier_del = notifier_del_vio, + .notifier_hangup = notifier_del_vio, +}; + +/* + * Console drivers are initialized very early so boot messages can go + * out, so we do things slightly differently from the generic virtio + * initialization of the net and block drivers. * - * Finally we put our input buffer in the input queue, ready to receive. */ -static int __devinit virtcons_probe(struct virtio_device *dev) + * At this stage, the console is output-only. It's too early to set + * up a virtqueue, so we let the drivers do some boutique early-output + * thing. + */ +int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int)) +{ + early_put_chars = put_chars; + return hvc_instantiate(0, 0, &hv_ops); +} + +int __devinit init_port_console(struct port *port) +{ + int ret; + + /* + * The Host's telling us this port is a console port. Hook it + * up with an hvc console. + * + * To set up and manage our virtual console, we call + * hvc_alloc(). + * + * The first argument of hvc_alloc() is the virtual console + * number. The second argument is the parameter for the + * notification mechanism (like irq number). We currently + * leave this as zero, virtqueues have implicit notifications. + * + * The third argument is a "struct hv_ops" containing the + * put_chars() get_chars(), notifier_add() and notifier_del() + * pointers. The final argument is the output buffer size: we + * can do any size, so we put PAGE_SIZE here. + */ + port->cons.vtermno = pdrvdata.next_vtermno; + + port->cons.hvc = hvc_alloc(port->cons.vtermno, 0, &hv_ops, PAGE_SIZE); + if (IS_ERR(port->cons.hvc)) { + ret = PTR_ERR(port->cons.hvc); + port->cons.hvc = NULL; + return ret; + } + spin_lock_irq(&pdrvdata_lock); + pdrvdata.next_vtermno++; + list_add_tail(&port->cons.list, &pdrvdata.consoles); + spin_unlock_irq(&pdrvdata_lock); + + return 0; +} + +static int __devinit add_port(struct ports_device *portdev) { - vq_callback_t *callbacks[] = { hvc_handle_input, NULL}; - const char *names[] = { "input", "output" }; - struct virtqueue *vqs[2]; + struct port *port; + struct port_buffer *inbuf; int err; - vdev = dev; + port = kmalloc(sizeof(*port), GFP_KERNEL); + if (!port) { + err = -ENOMEM; + goto fail; + } + + port->portdev = portdev; + + port->inbuf = NULL; + + port->in_vq = portdev->in_vqs[0]; + port->out_vq = portdev->out_vqs[0]; - /* This is the scratch page we use to receive console input */ - inbuf = kmalloc(PAGE_SIZE, GFP_KERNEL); + spin_lock_init(&port->inbuf_lock); + + inbuf = alloc_buf(PAGE_SIZE); if (!inbuf) { err = -ENOMEM; + goto free_port; + } + + /* Register the input buffer the first time. */ + add_inbuf(port->in_vq, inbuf); + + err = init_port_console(port); + if (err) + goto free_inbuf; + + return 0; + +free_inbuf: + free_buf(inbuf); +free_port: + kfree(port); +fail: + return err; +} + +static int init_vqs(struct ports_device *portdev) +{ + vq_callback_t **io_callbacks; + char **io_names; + struct virtqueue **vqs; + u32 nr_ports, nr_queues; + int err; + + /* We currently only have one port and two queues for that port */ + nr_ports = 1; + nr_queues = 2; + + vqs = kmalloc(nr_queues * sizeof(struct virtqueue *), GFP_KERNEL); + if (!vqs) { + err = -ENOMEM; goto fail; } + io_callbacks = kmalloc(nr_queues * sizeof(vq_callback_t *), GFP_KERNEL); + if (!io_callbacks) { + err = -ENOMEM; + goto free_vqs; + } + io_names = kmalloc(nr_queues * sizeof(char *), GFP_KERNEL); + if (!io_names) { + err = -ENOMEM; + goto free_callbacks; + } + portdev->in_vqs = kmalloc(nr_ports * sizeof(struct virtqueue *), + GFP_KERNEL); + if (!portdev->in_vqs) { + err = -ENOMEM; + goto free_names; + } + portdev->out_vqs = kmalloc(nr_ports * sizeof(struct virtqueue *), + GFP_KERNEL); + if (!portdev->out_vqs) { + err = -ENOMEM; + goto free_invqs; + } + + io_callbacks[0] = hvc_handle_input; + io_callbacks[1] = NULL; + io_names[0] = "input"; + io_names[1] = "output"; /* Find the queues. */ - /* FIXME: This is why we want to wean off hvc: we do nothing - * when input comes in. */ - err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names); + err = portdev->vdev->config->find_vqs(portdev->vdev, nr_queues, vqs, + io_callbacks, + (const char **)io_names); if (err) - goto free; + goto free_outvqs; - in_vq = vqs[0]; - out_vq = vqs[1]; + portdev->in_vqs[0] = vqs[0]; + portdev->out_vqs[0] = vqs[1]; - /* Start using the new console output. */ - virtio_cons.get_chars = get_chars; - virtio_cons.put_chars = put_chars; - virtio_cons.notifier_add = notifier_add_vio; - virtio_cons.notifier_del = notifier_del_vio; - virtio_cons.notifier_hangup = notifier_del_vio; - - /* The first argument of hvc_alloc() is the virtual console number, so - * we use zero. The second argument is the parameter for the - * notification mechanism (like irq number). We currently leave this - * as zero, virtqueues have implicit notifications. - * - * The third argument is a "struct hv_ops" containing the put_chars() - * get_chars(), notifier_add() and notifier_del() pointers. - * The final argument is the output buffer size: we can do any size, - * so we put PAGE_SIZE here. */ - hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE); - if (IS_ERR(hvc)) { - err = PTR_ERR(hvc); - goto free_vqs; + kfree(io_callbacks); + kfree(io_names); + kfree(vqs); + + return 0; + +free_names: + kfree(io_names); +free_callbacks: + kfree(io_callbacks); +free_outvqs: + kfree(portdev->out_vqs); +free_invqs: + kfree(portdev->in_vqs); +free_vqs: + kfree(vqs); +fail: + return err; +} + +/* + * Once we're further in boot, we get probed like any other virtio + * device. + */ +static int __devinit virtcons_probe(struct virtio_device *vdev) +{ + struct ports_device *portdev; + int err; + + portdev = kmalloc(sizeof(*portdev), GFP_KERNEL); + if (!portdev) { + err = -ENOMEM; + goto fail; } - /* Register the input buffer the first time. */ - add_inbuf(); + /* Attach this portdev to this virtio_device, and vice-versa. */ + portdev->vdev = vdev; + vdev->priv = portdev; + + err = init_vqs(portdev); + if (err < 0) { + dev_err(&vdev->dev, "Error %d initializing vqs\n", err); + goto free; + } + + /* We only have one port. */ + err = add_port(portdev); + if (err) + goto free_vqs; + + /* Start using the new console output. */ + early_put_chars = NULL; return 0; free_vqs: vdev->config->del_vqs(vdev); + kfree(portdev->in_vqs); + kfree(portdev->out_vqs); free: - kfree(inbuf); + kfree(portdev); fail: return err; } @@ -267,6 +633,8 @@ static struct virtio_driver virtio_console = { static int __init init(void) { + INIT_LIST_HEAD(&pdrvdata.consoles); + return register_virtio_driver(&virtio_console); } module_init(init); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 505be88c82ae..3db3d242c3ee 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -28,7 +28,7 @@ struct virtio_balloon { struct virtio_device *vdev; - struct virtqueue *inflate_vq, *deflate_vq; + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; /* Where the ballooning thread waits for config to change. */ wait_queue_head_t config_change; @@ -49,6 +49,10 @@ struct virtio_balloon /* The array of pfns we tell the Host about. */ unsigned int num_pfns; u32 pfns[256]; + + /* Memory statistics */ + int need_stats_update; + struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; }; static struct virtio_device_id id_table[] = { @@ -154,6 +158,72 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num) } } +static inline void update_stat(struct virtio_balloon *vb, int idx, + u16 tag, u64 val) +{ + BUG_ON(idx >= VIRTIO_BALLOON_S_NR); + vb->stats[idx].tag = tag; + vb->stats[idx].val = val; +} + +#define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT) + +static void update_balloon_stats(struct virtio_balloon *vb) +{ + unsigned long events[NR_VM_EVENT_ITEMS]; + struct sysinfo i; + int idx = 0; + + all_vm_events(events); + si_meminfo(&i); + + update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, + pages_to_bytes(events[PSWPIN])); + update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, + pages_to_bytes(events[PSWPOUT])); + update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]); + update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]); + update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, + pages_to_bytes(i.freeram)); + update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, + pages_to_bytes(i.totalram)); +} + +/* + * While most virtqueues communicate guest-initiated requests to the hypervisor, + * the stats queue operates in reverse. The driver initializes the virtqueue + * with a single buffer. From that point forward, all conversations consist of + * a hypervisor request (a call to this function) which directs us to refill + * the virtqueue with a fresh stats buffer. Since stats collection can sleep, + * we notify our kthread which does the actual work via stats_handle_request(). + */ +static void stats_request(struct virtqueue *vq) +{ + struct virtio_balloon *vb; + unsigned int len; + + vb = vq->vq_ops->get_buf(vq, &len); + if (!vb) + return; + vb->need_stats_update = 1; + wake_up(&vb->config_change); +} + +static void stats_handle_request(struct virtio_balloon *vb) +{ + struct virtqueue *vq; + struct scatterlist sg; + + vb->need_stats_update = 0; + update_balloon_stats(vb); + + vq = vb->stats_vq; + sg_init_one(&sg, vb->stats, sizeof(vb->stats)); + if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) < 0) + BUG(); + vq->vq_ops->kick(vq); +} + static void virtballoon_changed(struct virtio_device *vdev) { struct virtio_balloon *vb = vdev->priv; @@ -190,8 +260,11 @@ static int balloon(void *_vballoon) try_to_freeze(); wait_event_interruptible(vb->config_change, (diff = towards_target(vb)) != 0 + || vb->need_stats_update || kthread_should_stop() || freezing(current)); + if (vb->need_stats_update) + stats_handle_request(vb); if (diff > 0) fill_balloon(vb, diff); else if (diff < 0) @@ -204,10 +277,10 @@ static int balloon(void *_vballoon) static int virtballoon_probe(struct virtio_device *vdev) { struct virtio_balloon *vb; - struct virtqueue *vqs[2]; - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack }; - const char *names[] = { "inflate", "deflate" }; - int err; + struct virtqueue *vqs[3]; + vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; + const char *names[] = { "inflate", "deflate", "stats" }; + int err, nvqs; vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL); if (!vb) { @@ -220,13 +293,29 @@ static int virtballoon_probe(struct virtio_device *vdev) init_waitqueue_head(&vb->config_change); vb->vdev = vdev; - /* We expect two virtqueues. */ - err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names); + /* We expect two virtqueues: inflate and deflate, + * and optionally stat. */ + nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; + err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names); if (err) goto out_free_vb; vb->inflate_vq = vqs[0]; vb->deflate_vq = vqs[1]; + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { + struct scatterlist sg; + vb->stats_vq = vqs[2]; + + /* + * Prime this virtqueue with one buffer so the hypervisor can + * use it to signal us later. + */ + sg_init_one(&sg, vb->stats, sizeof vb->stats); + if (vb->stats_vq->vq_ops->add_buf(vb->stats_vq, + &sg, 1, 0, vb) < 0) + BUG(); + vb->stats_vq->vq_ops->kick(vb->stats_vq); + } vb->thread = kthread_run(balloon, vb, "vballoon"); if (IS_ERR(vb->thread)) { @@ -264,7 +353,10 @@ static void __devexit virtballoon_remove(struct virtio_device *vdev) kfree(vb); } -static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST }; +static unsigned int features[] = { + VIRTIO_BALLOON_F_MUST_TELL_HOST, + VIRTIO_BALLOON_F_STATS_VQ, +}; static struct virtio_driver virtio_balloon_driver = { .feature_table = features, diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 28d9cf7cf72f..1d5191fab62e 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -702,7 +702,7 @@ static struct pci_driver virtio_pci_driver = { .name = "virtio-pci", .id_table = virtio_pci_id_table, .probe = virtio_pci_probe, - .remove = virtio_pci_remove, + .remove = __devexit_p(virtio_pci_remove), #ifdef CONFIG_PM .suspend = virtio_pci_suspend, .resume = virtio_pci_resume, diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 71929ee00d69..782b7292a3d8 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -21,6 +21,24 @@ #include <linux/virtio_config.h> #include <linux/device.h> +/* virtio guest is communicating with a virtual "device" that actually runs on + * a host processor. Memory barriers are used to control SMP effects. */ +#ifdef CONFIG_SMP +/* Where possible, use SMP barriers which are more lightweight than mandatory + * barriers, because mandatory barriers control MMIO effects on accesses + * through relaxed memory I/O windows (which virtio does not use). */ +#define virtio_mb() smp_mb() +#define virtio_rmb() smp_rmb() +#define virtio_wmb() smp_wmb() +#else +/* We must force memory ordering even if guest is UP since host could be + * running on another CPU, but SMP barriers are defined to barrier() in that + * configuration. So fall back to mandatory barriers instead. */ +#define virtio_mb() mb() +#define virtio_rmb() rmb() +#define virtio_wmb() wmb() +#endif + #ifdef DEBUG /* For development, we want to crash whenever the ring is screwed. */ #define BAD_RING(_vq, fmt, args...) \ @@ -36,10 +54,9 @@ panic("%s:in_use = %i\n", \ (_vq)->vq.name, (_vq)->in_use); \ (_vq)->in_use = __LINE__; \ - mb(); \ } while (0) #define END_USE(_vq) \ - do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; mb(); } while(0) + do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) #else #define BAD_RING(_vq, fmt, args...) \ do { \ @@ -221,13 +238,13 @@ static void vring_kick(struct virtqueue *_vq) START_USE(vq); /* Descriptors and available array need to be set before we expose the * new available array entries. */ - wmb(); + virtio_wmb(); vq->vring.avail->idx += vq->num_added; vq->num_added = 0; /* Need to update avail index before checking if we should notify */ - mb(); + virtio_mb(); if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) /* Prod other side to tell it about changes. */ @@ -286,7 +303,7 @@ static void *vring_get_buf(struct virtqueue *_vq, unsigned int *len) } /* Only get used array entries after they have been exposed by host. */ - rmb(); + virtio_rmb(); i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id; *len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len; @@ -324,7 +341,7 @@ static bool vring_enable_cb(struct virtqueue *_vq) /* We optimistically turn back on interrupts, then check if there was * more to do. */ vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; - mb(); + virtio_mb(); if (unlikely(more_used(vq))) { END_USE(vq); return false; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 557bdad320b6..f53e9b868c26 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -220,7 +220,7 @@ static inline enum zone_type gfp_zone(gfp_t flags) ((1 << ZONES_SHIFT) - 1); if (__builtin_constant_p(bit)) - MAYBE_BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1); + BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1); else { #ifdef CONFIG_DEBUG_VM BUG_ON((GFP_ZONE_BAD >> bit) & 1); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 328bca609b9b..b632226ddc2f 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -702,38 +702,6 @@ static inline void ftrace_dump(void) { } struct sysinfo; extern int do_sysinfo(struct sysinfo *info); -#endif /* __KERNEL__ */ - -#ifndef __EXPORTED_HEADERS__ -#ifndef __KERNEL__ -#warning Attempt to use kernel headers from user space, see http://kernelnewbies.org/KernelHeaders -#endif /* __KERNEL__ */ -#endif /* __EXPORTED_HEADERS__ */ - -#define SI_LOAD_SHIFT 16 -struct sysinfo { - long uptime; /* Seconds since boot */ - unsigned long loads[3]; /* 1, 5, and 15 minute load averages */ - unsigned long totalram; /* Total usable main memory size */ - unsigned long freeram; /* Available memory size */ - unsigned long sharedram; /* Amount of shared memory */ - unsigned long bufferram; /* Memory used by buffers */ - unsigned long totalswap; /* Total swap space size */ - unsigned long freeswap; /* swap space still available */ - unsigned short procs; /* Number of current processes */ - unsigned short pad; /* explicit padding for m68k */ - unsigned long totalhigh; /* Total high memory size */ - unsigned long freehigh; /* Available high memory size */ - unsigned int mem_unit; /* Memory unit size in bytes */ - char _f[20-2*sizeof(long)-sizeof(int)]; /* Padding: libc5 uses this.. */ -}; - -/* Force a compilation error if condition is true */ -#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition)) - -/* Force a compilation error if condition is constant and true */ -#define MAYBE_BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2 * !!(cond)])) - /* Force a compilation error if a constant expression is not a power of 2 */ #define BUILD_BUG_ON_NOT_POWER_OF_2(n) \ BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0)) @@ -745,6 +713,32 @@ struct sysinfo { #define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) #define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) +/** + * BUILD_BUG_ON - break compile if a condition is true. + * @cond: the condition which the compiler should know is false. + * + * If you have some code which relies on certain constants being equal, or + * other compile-time-evaluated condition, you should use BUILD_BUG_ON to + * detect if someone changes it. + * + * The implementation uses gcc's reluctance to create a negative array, but + * gcc (as of 4.4) only emits that error for obvious cases (eg. not arguments + * to inline functions). So as a fallback we use the optimizer; if it can't + * prove the condition is false, it will cause a link error on the undefined + * "__build_bug_on_failed". This error message can be harder to track down + * though, hence the two different methods. + */ +#ifndef __OPTIMIZE__ +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) +#else +extern int __build_bug_on_failed; +#define BUILD_BUG_ON(condition) \ + do { \ + ((void)sizeof(char[1 - 2*!!(condition)])); \ + if (condition) __build_bug_on_failed = 1; \ + } while(0) +#endif + /* Trap pasters of __FUNCTION__ at compile-time */ #define __FUNCTION__ (__func__) @@ -760,4 +754,28 @@ struct sysinfo { # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD #endif +#else /* __KERNEL__ */ +#ifndef __EXPORTED_HEADERS__ +#warning Attempt to use kernel headers from user space, see http://kernelnewbies.org/KernelHeaders +#endif /* __EXPORTED_HEADERS__ */ +#endif /* !__KERNEL__ */ + +#define SI_LOAD_SHIFT 16 +struct sysinfo { + long uptime; /* Seconds since boot */ + unsigned long loads[3]; /* 1, 5, and 15 minute load averages */ + unsigned long totalram; /* Total usable main memory size */ + unsigned long freeram; /* Available memory size */ + unsigned long sharedram; /* Amount of shared memory */ + unsigned long bufferram; /* Memory used by buffers */ + unsigned long totalswap; /* Total swap space size */ + unsigned long freeswap; /* swap space still available */ + unsigned short procs; /* Number of current processes */ + unsigned short pad; /* explicit padding for m68k */ + unsigned long totalhigh; /* Total high memory size */ + unsigned long freehigh; /* Available high memory size */ + unsigned int mem_unit; /* Memory unit size in bytes */ + char _f[20-2*sizeof(long)-sizeof(int)]; /* Padding: libc5 uses this.. */ +}; + #endif diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h index 08d7dc4ddf40..39f8453239f7 100644 --- a/include/linux/kmemcheck.h +++ b/include/linux/kmemcheck.h @@ -76,7 +76,7 @@ bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size); \ _n = (long) &((ptr)->name##_end) \ - (long) &((ptr)->name##_begin); \ - MAYBE_BUILD_BUG_ON(_n < 0); \ + BUILD_BUG_ON(_n < 0); \ \ kmemcheck_mark_initialized(&((ptr)->name##_begin), _n); \ } while (0) diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h index 1418f048cb34..a50ecd1b81a2 100644 --- a/include/linux/virtio_balloon.h +++ b/include/linux/virtio_balloon.h @@ -7,6 +7,7 @@ /* The feature bitmap for virtio balloon */ #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ +#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ /* Size of a PFN in the balloon interface. */ #define VIRTIO_BALLOON_PFN_SHIFT 12 @@ -18,4 +19,18 @@ struct virtio_balloon_config /* Number of pages we've actually got in balloon. */ __le32 actual; }; + +#define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */ +#define VIRTIO_BALLOON_S_SWAP_OUT 1 /* Amount of memory swapped out */ +#define VIRTIO_BALLOON_S_MAJFLT 2 /* Number of major faults */ +#define VIRTIO_BALLOON_S_MINFLT 3 /* Number of minor faults */ +#define VIRTIO_BALLOON_S_MEMFREE 4 /* Total amount of free memory */ +#define VIRTIO_BALLOON_S_MEMTOT 5 /* Total amount of memory */ +#define VIRTIO_BALLOON_S_NR 6 + +struct virtio_balloon_stat { + u16 tag; + u64 val; +} __attribute__((packed)); + #endif /* _LINUX_VIRTIO_BALLOON_H */ diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 0093dd7c1d6f..800617b4ddd5 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -109,7 +109,10 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev, unsigned int fbit) { /* Did you forget to fix assumptions on max features? */ - MAYBE_BUILD_BUG_ON(fbit >= 32); + if (__builtin_constant_p(fbit)) + BUILD_BUG_ON(fbit >= 32); + else + BUG_ON(fbit >= 32); if (fbit < VIRTIO_TRANSPORT_F_START) virtio_check_driver_offered_feature(vdev, fbit); diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h index fe885174cc1f..9e0da40beae0 100644 --- a/include/linux/virtio_console.h +++ b/include/linux/virtio_console.h @@ -3,8 +3,10 @@ #include <linux/types.h> #include <linux/virtio_ids.h> #include <linux/virtio_config.h> -/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so - * anyone can use the definitions to implement compatible drivers/servers. */ +/* + * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so + * anyone can use the definitions to implement compatible drivers/servers. + */ /* Feature bits */ #define VIRTIO_CONSOLE_F_SIZE 0 /* Does host provide console size? */ |