diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/char/Kconfig | 8 | ||||
-rw-r--r-- | drivers/char/hvc_beat.c | 2 | ||||
-rw-r--r-- | drivers/char/hvc_console.c | 7 | ||||
-rw-r--r-- | drivers/char/hvc_console.h | 7 | ||||
-rw-r--r-- | drivers/char/hvc_iseries.c | 2 | ||||
-rw-r--r-- | drivers/char/hvc_iucv.c | 2 | ||||
-rw-r--r-- | drivers/char/hvc_rtas.c | 2 | ||||
-rw-r--r-- | drivers/char/hvc_udbg.c | 2 | ||||
-rw-r--r-- | drivers/char/hvc_vio.c | 2 | ||||
-rw-r--r-- | drivers/char/hvc_xen.c | 2 | ||||
-rw-r--r-- | drivers/char/virtio_console.c | 1559 | ||||
-rw-r--r-- | drivers/net/virtio_net.c | 427 | ||||
-rw-r--r-- | drivers/virtio/virtio_balloon.c | 108 | ||||
-rw-r--r-- | drivers/virtio/virtio_pci.c | 2 | ||||
-rw-r--r-- | drivers/virtio/virtio_ring.c | 25 |
15 files changed, 1810 insertions, 347 deletions
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index e023682be2c4..3141dd3b6e53 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -666,6 +666,14 @@ config VIRTIO_CONSOLE help Virtio console for use with lguest and other hypervisors. + Also serves as a general-purpose serial device for data + transfer between the guest and host. Character devices at + /dev/vportNpn will be created when corresponding ports are + found, where N is the device number and n is the port number + within that device. If specified by the host, a sysfs + attribute called 'name' will be populated with a name for + the port which can be used by udev scripts to create a + symlink to the device. config HVCS tristate "IBM Hypervisor Virtual Console Server support" diff --git a/drivers/char/hvc_beat.c b/drivers/char/hvc_beat.c index 0afc8b82212e..6913fc33270c 100644 --- a/drivers/char/hvc_beat.c +++ b/drivers/char/hvc_beat.c @@ -84,7 +84,7 @@ static int hvc_beat_put_chars(uint32_t vtermno, const char *buf, int cnt) return cnt; } -static struct hv_ops hvc_beat_get_put_ops = { +static const struct hv_ops hvc_beat_get_put_ops = { .get_chars = hvc_beat_get_chars, .put_chars = hvc_beat_put_chars, }; diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index 416d3423150d..d8dac5820f0e 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -125,7 +125,7 @@ static struct hvc_struct *hvc_get_by_index(int index) * console interfaces but can still be used as a tty device. This has to be * static because kmalloc will not work during early console init. */ -static struct hv_ops *cons_ops[MAX_NR_HVC_CONSOLES]; +static const struct hv_ops *cons_ops[MAX_NR_HVC_CONSOLES]; static uint32_t vtermnos[MAX_NR_HVC_CONSOLES] = {[0 ... MAX_NR_HVC_CONSOLES - 1] = -1}; @@ -247,7 +247,7 @@ static void destroy_hvc_struct(struct kref *kref) * vty adapters do NOT get an hvc_instantiate() callback since they * appear after early console init. */ -int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops) +int hvc_instantiate(uint32_t vtermno, int index, const struct hv_ops *ops) { struct hvc_struct *hp; @@ -749,7 +749,8 @@ static const struct tty_operations hvc_ops = { }; struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int data, - struct hv_ops *ops, int outbuf_size) + const struct hv_ops *ops, + int outbuf_size) { struct hvc_struct *hp; int i; diff --git a/drivers/char/hvc_console.h b/drivers/char/hvc_console.h index 10950ca706d8..52ddf4d3716c 100644 --- a/drivers/char/hvc_console.h +++ b/drivers/char/hvc_console.h @@ -55,7 +55,7 @@ struct hvc_struct { int outbuf_size; int n_outbuf; uint32_t vtermno; - struct hv_ops *ops; + const struct hv_ops *ops; int irq_requested; int data; struct winsize ws; @@ -76,11 +76,12 @@ struct hv_ops { }; /* Register a vterm and a slot index for use as a console (console_init) */ -extern int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops); +extern int hvc_instantiate(uint32_t vtermno, int index, + const struct hv_ops *ops); /* register a vterm for hvc tty operation (module_init or hotplug add) */ extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int data, - struct hv_ops *ops, int outbuf_size); + const struct hv_ops *ops, int outbuf_size); /* remove a vterm from hvc tty operation (module_exit or hotplug remove) */ extern int hvc_remove(struct hvc_struct *hp); diff --git a/drivers/char/hvc_iseries.c b/drivers/char/hvc_iseries.c index 936d05bf37fa..fd0242676a2a 100644 --- a/drivers/char/hvc_iseries.c +++ b/drivers/char/hvc_iseries.c @@ -197,7 +197,7 @@ done: return sent; } -static struct hv_ops hvc_get_put_ops = { +static const struct hv_ops hvc_get_put_ops = { .get_chars = get_chars, .put_chars = put_chars, .notifier_add = notifier_add_irq, diff --git a/drivers/char/hvc_iucv.c b/drivers/char/hvc_iucv.c index fe62bd0e17b7..21681a81cc35 100644 --- a/drivers/char/hvc_iucv.c +++ b/drivers/char/hvc_iucv.c @@ -922,7 +922,7 @@ static int hvc_iucv_pm_restore_thaw(struct device *dev) /* HVC operations */ -static struct hv_ops hvc_iucv_ops = { +static const struct hv_ops hvc_iucv_ops = { .get_chars = hvc_iucv_get_chars, .put_chars = hvc_iucv_put_chars, .notifier_add = hvc_iucv_notifier_add, diff --git a/drivers/char/hvc_rtas.c b/drivers/char/hvc_rtas.c index 88590d040046..61c4a61558d9 100644 --- a/drivers/char/hvc_rtas.c +++ b/drivers/char/hvc_rtas.c @@ -71,7 +71,7 @@ static int hvc_rtas_read_console(uint32_t vtermno, char *buf, int count) return i; } -static struct hv_ops hvc_rtas_get_put_ops = { +static const struct hv_ops hvc_rtas_get_put_ops = { .get_chars = hvc_rtas_read_console, .put_chars = hvc_rtas_write_console, }; diff --git a/drivers/char/hvc_udbg.c b/drivers/char/hvc_udbg.c index bd63ba878a56..b0957e61a7be 100644 --- a/drivers/char/hvc_udbg.c +++ b/drivers/char/hvc_udbg.c @@ -58,7 +58,7 @@ static int hvc_udbg_get(uint32_t vtermno, char *buf, int count) return i; } -static struct hv_ops hvc_udbg_ops = { +static const struct hv_ops hvc_udbg_ops = { .get_chars = hvc_udbg_get, .put_chars = hvc_udbg_put, }; diff --git a/drivers/char/hvc_vio.c b/drivers/char/hvc_vio.c index 10be343d6ae7..27370e99c66f 100644 --- a/drivers/char/hvc_vio.c +++ b/drivers/char/hvc_vio.c @@ -77,7 +77,7 @@ static int filtered_get_chars(uint32_t vtermno, char *buf, int count) return got; } -static struct hv_ops hvc_get_put_ops = { +static const struct hv_ops hvc_get_put_ops = { .get_chars = filtered_get_chars, .put_chars = hvc_put_chars, .notifier_add = notifier_add_irq, diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c index b1a71638c772..60446f82a3fc 100644 --- a/drivers/char/hvc_xen.c +++ b/drivers/char/hvc_xen.c @@ -122,7 +122,7 @@ static int read_console(uint32_t vtermno, char *buf, int len) return recv; } -static struct hv_ops hvc_ops = { +static const struct hv_ops hvc_ops = { .get_chars = read_console, .put_chars = write_console, .notifier_add = notifier_add_irq, diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index a035ae39a359..166c5b2e20bb 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1,18 +1,6 @@ -/*D:300 - * The Guest console driver - * - * Writing console drivers is one of the few remaining Dark Arts in Linux. - * Fortunately for us, the path of virtual consoles has been well-trodden by - * the PowerPC folks, who wrote "hvc_console.c" to generically support any - * virtual console. We use that infrastructure which only requires us to write - * the basic put_chars and get_chars functions and call the right register - * functions. - :*/ - -/*M:002 The console can be flooded: while the Guest is processing input the - * Host can send more. Buffering in the Host could alleviate this, but it is a - * difficult problem in general. :*/ -/* Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation +/* + * Copyright (C) 2006, 2007, 2009 Rusty Russell, IBM Corporation + * Copyright (C) 2009, Red Hat, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -28,142 +16,704 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/cdev.h> +#include <linux/debugfs.h> +#include <linux/device.h> #include <linux/err.h> +#include <linux/fs.h> #include <linux/init.h> +#include <linux/list.h> +#include <linux/poll.h> +#include <linux/sched.h> +#include <linux/spinlock.h> #include <linux/virtio.h> #include <linux/virtio_console.h> +#include <linux/wait.h> +#include <linux/workqueue.h> #include "hvc_console.h" -/*D:340 These represent our input and output console queues, and the virtio - * operations for them. */ -static struct virtqueue *in_vq, *out_vq; -static struct virtio_device *vdev; +/* + * This is a global struct for storing common data for all the devices + * this driver handles. + * + * Mainly, it has a linked list for all the consoles in one place so + * that callbacks from hvc for get_chars(), put_chars() work properly + * across multiple devices and multiple ports per device. + */ +struct ports_driver_data { + /* Used for registering chardevs */ + struct class *class; + + /* Used for exporting per-port information to debugfs */ + struct dentry *debugfs_dir; + + /* Number of devices this driver is handling */ + unsigned int index; + + /* + * This is used to keep track of the number of hvc consoles + * spawned by this driver. This number is given as the first + * argument to hvc_alloc(). To correctly map an initial + * console spawned via hvc_instantiate to the console being + * hooked up via hvc_alloc, we need to pass the same vtermno. + * + * We also just assume the first console being initialised was + * the first one that got used as the initial console. + */ + unsigned int next_vtermno; + + /* All the console devices handled by this driver */ + struct list_head consoles; +}; +static struct ports_driver_data pdrvdata; + +DEFINE_SPINLOCK(pdrvdata_lock); + +/* This struct holds information that's relevant only for console ports */ +struct console { + /* We'll place all consoles in a list in the pdrvdata struct */ + struct list_head list; + + /* The hvc device associated with this console port */ + struct hvc_struct *hvc; + + /* + * This number identifies the number that we used to register + * with hvc in hvc_instantiate() and hvc_alloc(); this is the + * number passed on by the hvc callbacks to us to + * differentiate between the other console ports handled by + * this driver + */ + u32 vtermno; +}; + +struct port_buffer { + char *buf; + + /* size of the buffer in *buf above */ + size_t size; + + /* used length of the buffer */ + size_t len; + /* offset in the buf from which to consume data */ + size_t offset; +}; + +/* + * This is a per-device struct that stores data common to all the + * ports for that device (vdev->priv). + */ +struct ports_device { + /* + * Workqueue handlers where we process deferred work after + * notification + */ + struct work_struct control_work; + struct work_struct config_work; + + struct list_head ports; + + /* To protect the list of ports */ + spinlock_t ports_lock; + + /* To protect the vq operations for the control channel */ + spinlock_t cvq_lock; -/* This is our input buffer, and how much data is left in it. */ -static unsigned int in_len; -static char *in, *inbuf; + /* The current config space is stored here */ + struct virtio_console_config config; -/* The operations for our console. */ -static struct hv_ops virtio_cons; + /* The virtio device we're associated with */ + struct virtio_device *vdev; -/* The hvc device */ -static struct hvc_struct *hvc; + /* + * A couple of virtqueues for the control channel: one for + * guest->host transfers, one for host->guest transfers + */ + struct virtqueue *c_ivq, *c_ovq; -/*D:310 The put_chars() callback is pretty straightforward. + /* Array of per-port IO virtqueues */ + struct virtqueue **in_vqs, **out_vqs; + + /* The control messages to the Host are sent via this buffer */ + struct port_buffer *outbuf; + + /* Used for numbering devices for sysfs and debugfs */ + unsigned int drv_index; + + /* Major number for this device. Ports will be created as minors. */ + int chr_major; +}; + +/* This struct holds the per-port data */ +struct port { + /* Next port in the list, head is in the ports_device */ + struct list_head list; + + /* Pointer to the parent virtio_console device */ + struct ports_device *portdev; + + /* The current buffer from which data has to be fed to readers */ + struct port_buffer *inbuf; + + /* + * To protect the operations on the in_vq associated with this + * port. Has to be a spinlock because it can be called from + * interrupt context (get_char()). + */ + spinlock_t inbuf_lock; + + /* Buffer that's used to pass data from the guest to the host */ + struct port_buffer *outbuf; + + /* The IO vqs for this port */ + struct virtqueue *in_vq, *out_vq; + + /* File in the debugfs directory that exposes this port's information */ + struct dentry *debugfs_file; + + /* + * The entries in this struct will be valid if this port is + * hooked up to an hvc console + */ + struct console cons; + + /* Each port associates with a separate char device */ + struct cdev cdev; + struct device *dev; + + /* A waitqueue for poll() or blocking read operations */ + wait_queue_head_t waitqueue; + + /* The 'name' of the port that we expose via sysfs properties */ + char *name; + + /* The 'id' to identify the port with the Host */ + u32 id; + + /* Is the host device open */ + bool host_connected; + + /* We should allow only one process to open a port */ + bool guest_connected; + + /* Does the Host not want to accept more data currently? */ + bool host_throttled; +}; + +/* This is the very early arch-specified put chars function. */ +static int (*early_put_chars)(u32, const char *, int); + +static struct port *find_port_by_vtermno(u32 vtermno) +{ + struct port *port; + struct console *cons; + unsigned long flags; + + spin_lock_irqsave(&pdrvdata_lock, flags); + list_for_each_entry(cons, &pdrvdata.consoles, list) { + if (cons->vtermno == vtermno) { + port = container_of(cons, struct port, cons); + goto out; + } + } + port = NULL; +out: + spin_unlock_irqrestore(&pdrvdata_lock, flags); + return port; +} + +static struct port *find_port_by_id(struct ports_device *portdev, u32 id) +{ + struct port *port; + unsigned long flags; + + spin_lock_irqsave(&portdev->ports_lock, flags); + list_for_each_entry(port, &portdev->ports, list) + if (port->id == id) + goto out; + port = NULL; +out: + spin_unlock_irqrestore(&portdev->ports_lock, flags); + + return port; +} + +static struct port *find_port_by_vq(struct ports_device *portdev, + struct virtqueue *vq) +{ + struct port *port; + unsigned long flags; + + spin_lock_irqsave(&portdev->ports_lock, flags); + list_for_each_entry(port, &portdev->ports, list) + if (port->in_vq == vq || port->out_vq == vq) + goto out; + port = NULL; +out: + spin_unlock_irqrestore(&portdev->ports_lock, flags); + return port; +} + +static bool is_console_port(struct port *port) +{ + if (port->cons.hvc) + return true; + return false; +} + +static inline bool use_multiport(struct ports_device *portdev) +{ + /* + * This condition can be true when put_chars is called from + * early_init + */ + if (!portdev->vdev) + return 0; + return portdev->vdev->features[0] & (1 << VIRTIO_CONSOLE_F_MULTIPORT); +} + +static void free_buf(struct port_buffer *buf) +{ + kfree(buf->buf); + kfree(buf); +} + +static struct port_buffer *alloc_buf(size_t buf_size) +{ + struct port_buffer *buf; + + buf = kmalloc(sizeof(*buf), GFP_KERNEL); + if (!buf) + goto fail; + buf->buf = kzalloc(buf_size, GFP_KERNEL); + if (!buf->buf) + goto free_buf; + buf->len = 0; + buf->offset = 0; + buf->size = buf_size; + return buf; + +free_buf: + kfree(buf); +fail: + return NULL; +} + +/* Callers should take appropriate locks */ +static void *get_inbuf(struct port *port) +{ + struct port_buffer *buf; + struct virtqueue *vq; + unsigned int len; + + vq = port->in_vq; + buf = vq->vq_ops->get_buf(vq, &len); + if (buf) { + buf->len = len; + buf->offset = 0; + } + return buf; +} + +/* + * Create a scatter-gather list representing our input buffer and put + * it in the queue. * - * We turn the characters into a scatter-gather list, add it to the output - * queue and then kick the Host. Then we sit here waiting for it to finish: - * inefficient in theory, but in practice implementations will do it - * immediately (lguest's Launcher does). */ -static int put_chars(u32 vtermno, const char *buf, int count) + * Callers should take appropriate locks. + */ +static int add_inbuf(struct virtqueue *vq, struct port_buffer *buf) { struct scatterlist sg[1]; + int ret; + + sg_init_one(sg, buf->buf, buf->size); + + ret = vq->vq_ops->add_buf(vq, sg, 0, 1, buf); + vq->vq_ops->kick(vq); + return ret; +} + +/* Discard any unread data this port has. Callers lockers. */ +static void discard_port_data(struct port *port) +{ + struct port_buffer *buf; + struct virtqueue *vq; unsigned int len; - /* This is a convenient routine to initialize a single-elem sg list */ - sg_init_one(sg, buf, count); + vq = port->in_vq; + if (port->inbuf) + buf = port->inbuf; + else + buf = vq->vq_ops->get_buf(vq, &len); - /* add_buf wants a token to identify this buffer: we hand it any - * non-NULL pointer, since there's only ever one buffer. */ - if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, (void *)1) >= 0) { - /* Tell Host to go! */ - out_vq->vq_ops->kick(out_vq); - /* Chill out until it's done with the buffer. */ - while (!out_vq->vq_ops->get_buf(out_vq, &len)) - cpu_relax(); + if (!buf) + return; + + if (add_inbuf(vq, buf) < 0) { + buf->len = buf->offset = 0; + dev_warn(port->dev, "Error adding buffer back to vq\n"); + return; } - /* We're expected to return the amount of data we wrote: all of it. */ - return count; + port->inbuf = NULL; +} + +static bool port_has_data(struct port *port) +{ + unsigned long flags; + bool ret; + + ret = false; + spin_lock_irqsave(&port->inbuf_lock, flags); + if (port->inbuf) + ret = true; + spin_unlock_irqrestore(&port->inbuf_lock, flags); + + return ret; +} + +static ssize_t send_control_msg(struct port *port, unsigned int event, + unsigned int value) +{ + struct scatterlist sg[1]; + struct virtio_console_control cpkt; + struct virtqueue *vq; + struct port_buffer *outbuf; + int tmplen; + + if (!use_multiport(port->portdev)) + return 0; + + cpkt.id = port->id; + cpkt.event = event; + cpkt.value = value; + + vq = port->portdev->c_ovq; + outbuf = port->portdev->outbuf; + + memcpy(outbuf->buf, (void *)&cpkt, sizeof(cpkt)); + + sg_init_one(sg, outbuf->buf, sizeof(cpkt)); + if (vq->vq_ops->add_buf(vq, sg, 1, 0, outbuf) >= 0) { + vq->vq_ops->kick(vq); + while (!vq->vq_ops->get_buf(vq, &tmplen)) + cpu_relax(); + } + return 0; } -/* Create a scatter-gather list representing our input buffer and put it in the - * queue. */ -static void add_inbuf(void) +static ssize_t send_buf(struct port *port, const char *in_buf, size_t in_count, + bool from_user) { struct scatterlist sg[1]; - sg_init_one(sg, inbuf, PAGE_SIZE); + struct virtqueue *out_vq; + struct port_buffer *buf; + ssize_t ret; + unsigned int tmplen; + + out_vq = port->out_vq; + buf = port->outbuf; + + if (buf->len) { + /* + * Nonzero buf->len means we had queued a buffer + * earlier to the Host to be consumed. Get the buffer + * back for this write request; wait while the Host + * consumes it. + */ + while (!out_vq->vq_ops->get_buf(out_vq, &tmplen)) + cpu_relax(); + } + + if (in_count > buf->size) + in_count = buf->size; + + if (from_user) { + ret = copy_from_user(buf->buf, in_buf, in_count); + } else { + /* + * Since we're not sure when the host will actually + * consume the data and tell us about it, we have to + * copy the data here in case the caller frees the + * in_buf. + */ + memcpy(buf->buf, in_buf, in_count); + ret = 0; /* Emulate copy_from_user behaviour */ + } + buf->len = in_count - ret; + + sg_init_one(sg, buf->buf, buf->len); + ret = out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, buf); + + /* Tell Host to go! */ + out_vq->vq_ops->kick(out_vq); + + if (ret < 0) + buf->len = 0; - /* We should always be able to add one buffer to an empty queue. */ - if (in_vq->vq_ops->add_buf(in_vq, sg, 0, 1, inbuf) < 0) - BUG(); - in_vq->vq_ops->kick(in_vq); + /* We're expected to return the amount of data we wrote */ + return buf->len; } -/*D:350 get_chars() is the callback from the hvc_console infrastructure when - * an interrupt is received. - * - * Most of the code deals with the fact that the hvc_console() infrastructure - * only asks us for 16 bytes at a time. We keep in_offset and in_used fields - * for partially-filled buffers. */ -static int get_chars(u32 vtermno, char *buf, int count) +/* + * Give out the data that's requested from the buffer that we have + * queued up. + */ +static ssize_t fill_readbuf(struct port *port, char *out_buf, size_t out_count, + bool to_user) { - /* If we don't have an input queue yet, we can't get input. */ - BUG_ON(!in_vq); + struct port_buffer *buf; + ssize_t ret; + unsigned long flags; + + if (!out_count || !port_has_data(port)) + return 0; - /* No buffer? Try to get one. */ - if (!in_len) { - in = in_vq->vq_ops->get_buf(in_vq, &in_len); - if (!in) + buf = port->inbuf; + if (out_count > buf->len - buf->offset) + out_count = buf->len - buf->offset; + + if (to_user) { + ret = copy_to_user(out_buf, buf->buf + buf->offset, out_count); + } else { + memcpy(out_buf, buf->buf + buf->offset, out_count); + ret = 0; /* Emulate copy_to_user behaviour */ + } + + /* Return the number of bytes actually copied */ + ret = out_count - ret; + buf->offset += ret; + + if (buf->offset == buf->len) { + /* + * We're done using all the data in this buffer. + * Re-queue so that the Host can send us more data. + */ + spin_lock_irqsave(&port->inbuf_lock, flags); + port->inbuf = NULL; + + if (add_inbuf(port->in_vq, buf) < 0) + dev_warn(port->dev, "failed add_buf\n"); + + spin_unlock_irqrestore(&port->inbuf_lock, flags); + } + return ret; +} + +/* The condition that must be true for polling to end */ +static bool wait_is_over(struct port *port) +{ + return port_has_data(port) || !port->host_connected; +} + +static ssize_t port_fops_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *offp) +{ + struct port *port; + ssize_t ret; + + port = filp->private_data; + + if (!port_has_data(port)) { + /* + * If nothing's connected on the host just return 0 in + * case of list_empty; this tells the userspace app + * that there's no connection + */ + if (!port->host_connected) return 0; + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + ret = wait_event_interruptible(port->waitqueue, + wait_is_over(port)); + if (ret < 0) + return ret; } + /* + * We could've received a disconnection message while we were + * waiting for more data. + * + * This check is not clubbed in the if() statement above as we + * might receive some data as well as the host could get + * disconnected after we got woken up from our wait. So we + * really want to give off whatever data we have and only then + * check for host_connected. + */ + if (!port_has_data(port) && !port->host_connected) + return 0; + + return fill_readbuf(port, ubuf, count, true); +} - /* You want more than we have to give? Well, try wanting less! */ - if (in_len < count) - count = in_len; +static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, + size_t count, loff_t *offp) +{ + struct port *port; - /* Copy across to their buffer and increment offset. */ - memcpy(buf, in, count); - in += count; - in_len -= count; + port = filp->private_data; - /* Finished? Re-register buffer so Host will use it again. */ - if (in_len == 0) - add_inbuf(); + if (port->host_throttled) + return -ENOSPC; - return count; + return send_buf(port, ubuf, count, true); } -/*:*/ -/*D:320 Console drivers are initialized very early so boot messages can go out, - * so we do things slightly differently from the generic virtio initialization - * of the net and block drivers. +static unsigned int port_fops_poll(struct file *filp, poll_table *wait) +{ + struct port *port; + unsigned int ret; + + port = filp->private_data; + poll_wait(filp, &port->waitqueue, wait); + + ret = 0; + if (port->inbuf) + ret |= POLLIN | POLLRDNORM; + if (port->host_connected && !port->host_throttled) + ret |= POLLOUT; + if (!port->host_connected) + ret |= POLLHUP; + + return ret; +} + +static int port_fops_release(struct inode *inode, struct file *filp) +{ + struct port *port; + + port = filp->private_data; + + /* Notify host of port being closed */ + send_control_msg(port, VIRTIO_CONSOLE_PORT_OPEN, 0); + + spin_lock_irq(&port->inbuf_lock); + port->guest_connected = false; + + discard_port_data(port); + + spin_unlock_irq(&port->inbuf_lock); + + return 0; +} + +static int port_fops_open(struct inode *inode, struct file *filp) +{ + struct cdev *cdev = inode->i_cdev; + struct port *port; + + port = container_of(cdev, struct port, cdev); + filp->private_data = port; + + /* + * Don't allow opening of console port devices -- that's done + * via /dev/hvc + */ + if (is_console_port(port)) + return -ENXIO; + + /* Allow only one process to open a particular port at a time */ + spin_lock_irq(&port->inbuf_lock); + if (port->guest_connected) { + spin_unlock_irq(&port->inbuf_lock); + return -EMFILE; + } + + port->guest_connected = true; + spin_unlock_irq(&port->inbuf_lock); + + /* Notify host of port being opened */ + send_control_msg(filp->private_data, VIRTIO_CONSOLE_PORT_OPEN, 1); + + return 0; +} + +/* + * The file operations that we support: programs in the guest can open + * a console device, read from it, write to it, poll for data and + * close it. The devices are at + * /dev/vport<device number>p<port number> + */ +static const struct file_operations port_fops = { + .owner = THIS_MODULE, + .open = port_fops_open, + .read = port_fops_read, + .write = port_fops_write, + .poll = port_fops_poll, + .release = port_fops_release, +}; + +/* + * The put_chars() callback is pretty straightforward. * - * At this stage, the console is output-only. It's too early to set up a - * virtqueue, so we let the drivers do some boutique early-output thing. */ -int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int)) + * We turn the characters into a scatter-gather list, add it to the + * output queue and then kick the Host. Then we sit here waiting for + * it to finish: inefficient in theory, but in practice + * implementations will do it immediately (lguest's Launcher does). + */ +static int put_chars(u32 vtermno, const char *buf, int count) { - virtio_cons.put_chars = put_chars; - return hvc_instantiate(0, 0, &virtio_cons); + struct port *port; + + port = find_port_by_vtermno(vtermno); + if (!port) + return 0; + + if (unlikely(early_put_chars)) + return early_put_chars(vtermno, buf, count); + + return send_buf(port, buf, count, false); } /* - * virtio console configuration. This supports: - * - console resize + * get_chars() is the callback from the hvc_console infrastructure + * when an interrupt is received. + * + * We call out to fill_readbuf that gets us the required data from the + * buffers that are queued up. */ -static void virtcons_apply_config(struct virtio_device *dev) +static int get_chars(u32 vtermno, char *buf, int count) { + struct port *port; + + port = find_port_by_vtermno(vtermno); + if (!port) + return 0; + + /* If we don't have an input queue yet, we can't get input. */ + BUG_ON(!port->in_vq); + + return fill_readbuf(port, buf, count, false); +} + +static void resize_console(struct port *port) +{ + struct virtio_device *vdev; struct winsize ws; - if (virtio_has_feature(dev, VIRTIO_CONSOLE_F_SIZE)) { - dev->config->get(dev, - offsetof(struct virtio_console_config, cols), - &ws.ws_col, sizeof(u16)); - dev->config->get(dev, - offsetof(struct virtio_console_config, rows), - &ws.ws_row, sizeof(u16)); - hvc_resize(hvc, ws); + vdev = port->portdev->vdev; + if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_SIZE)) { + vdev->config->get(vdev, + offsetof(struct virtio_console_config, cols), + &ws.ws_col, sizeof(u16)); + vdev->config->get(vdev, + offsetof(struct virtio_console_config, rows), + &ws.ws_row, sizeof(u16)); + hvc_resize(port->cons.hvc, ws); } } -/* - * we support only one console, the hvc struct is a global var - * We set the configuration at this point, since we now have a tty - */ +/* We set the configuration at this point, since we now have a tty */ static int notifier_add_vio(struct hvc_struct *hp, int data) { + struct port *port; + + port = find_port_by_vtermno(hp->vtermno); + if (!port) + return -EINVAL; + hp->irq_requested = 1; - virtcons_apply_config(vdev); + resize_console(port); return 0; } @@ -173,75 +723,775 @@ static void notifier_del_vio(struct hvc_struct *hp, int data) hp->irq_requested = 0; } -static void hvc_handle_input(struct virtqueue *vq) +/* The operations for console ports. */ +static const struct hv_ops hv_ops = { + .get_chars = get_chars, + .put_chars = put_chars, + .notifier_add = notifier_add_vio, + .notifier_del = notifier_del_vio, + .notifier_hangup = notifier_del_vio, +}; + +/* + * Console drivers are initialized very early so boot messages can go + * out, so we do things slightly differently from the generic virtio + * initialization of the net and block drivers. + * + * At this stage, the console is output-only. It's too early to set + * up a virtqueue, so we let the drivers do some boutique early-output + * thing. + */ +int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int)) +{ + early_put_chars = put_chars; + return hvc_instantiate(0, 0, &hv_ops); +} + +int init_port_console(struct port *port) +{ + int ret; + + /* + * The Host's telling us this port is a console port. Hook it + * up with an hvc console. + * + * To set up and manage our virtual console, we call + * hvc_alloc(). + * + * The first argument of hvc_alloc() is the virtual console + * number. The second argument is the parameter for the + * notification mechanism (like irq number). We currently + * leave this as zero, virtqueues have implicit notifications. + * + * The third argument is a "struct hv_ops" containing the + * put_chars() get_chars(), notifier_add() and notifier_del() + * pointers. The final argument is the output buffer size: we + * can do any size, so we put PAGE_SIZE here. + */ + port->cons.vtermno = pdrvdata.next_vtermno; + + port->cons.hvc = hvc_alloc(port->cons.vtermno, 0, &hv_ops, PAGE_SIZE); + if (IS_ERR(port->cons.hvc)) { + ret = PTR_ERR(port->cons.hvc); + dev_err(port->dev, + "error %d allocating hvc for port\n", ret); + port->cons.hvc = NULL; + return ret; + } + spin_lock_irq(&pdrvdata_lock); + pdrvdata.next_vtermno++; + list_add_tail(&port->cons.list, &pdrvdata.consoles); + spin_unlock_irq(&pdrvdata_lock); + port->guest_connected = true; + + /* Notify host of port being opened */ + send_control_msg(port, VIRTIO_CONSOLE_PORT_OPEN, 1); + + return 0; +} + +static ssize_t show_port_name(struct device *dev, + struct device_attribute *attr, char *buffer) { - if (hvc_poll(hvc)) + struct port *port; + + port = dev_get_drvdata(dev); + + return sprintf(buffer, "%s\n", port->name); +} + +static DEVICE_ATTR(name, S_IRUGO, show_port_name, NULL); + +static struct attribute *port_sysfs_entries[] = { + &dev_attr_name.attr, + NULL +}; + +static struct attribute_group port_attribute_group = { + .name = NULL, /* put in device directory */ + .attrs = port_sysfs_entries, +}; + +static int debugfs_open(struct inode *inode, struct file *filp) +{ + filp->private_data = inode->i_private; + return 0; +} + +static ssize_t debugfs_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *offp) +{ + struct port *port; + char *buf; + ssize_t ret, out_offset, out_count; + + out_count = 1024; + buf = kmalloc(out_count, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + port = filp->private_data; + out_offset = 0; + out_offset += snprintf(buf + out_offset, out_count, + "name: %s\n", port->name ? port->name : ""); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "guest_connected: %d\n", port->guest_connected); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "host_connected: %d\n", port->host_connected); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "host_throttled: %d\n", port->host_throttled); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "is_console: %s\n", + is_console_port(port) ? "yes" : "no"); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "console_vtermno: %u\n", port->cons.vtermno); + + ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset); + kfree(buf); + return ret; +} + +static const struct file_operations port_debugfs_ops = { + .owner = THIS_MODULE, + .open = debugfs_open, + .read = debugfs_read, +}; + +/* Remove all port-specific data. */ +static int remove_port(struct port *port) +{ + spin_lock_irq(&port->portdev->ports_lock); + list_del(&port->list); + spin_unlock_irq(&port->portdev->ports_lock); + + if (is_console_port(port)) { + spin_lock_irq(&pdrvdata_lock); + list_del(&port->cons.list); + spin_unlock_irq(&pdrvdata_lock); + hvc_remove(port->cons.hvc); + } + if (port->guest_connected) + send_control_msg(port, VIRTIO_CONSOLE_PORT_OPEN, 0); + + while (port->in_vq->vq_ops->detach_unused_buf(port->in_vq)) + ; + + sysfs_remove_group(&port->dev->kobj, &port_attribute_group); + device_destroy(pdrvdata.class, port->dev->devt); + cdev_del(&port->cdev); + + discard_port_data(port); + free_buf(port->outbuf); + kfree(port->name); + + debugfs_remove(port->debugfs_file); + + kfree(port); + return 0; +} + +/* Any private messages that the Host and Guest want to share */ +static void handle_control_message(struct ports_device *portdev, + struct port_buffer *buf) +{ + struct virtio_console_control *cpkt; + struct port *port; + size_t name_size; + int err; + + cpkt = (struct virtio_console_control *)(buf->buf + buf->offset); + + port = find_port_by_id(portdev, cpkt->id); + if (!port) { + /* No valid header at start of buffer. Drop it. */ + dev_dbg(&portdev->vdev->dev, + "Invalid index %u in control packet\n", cpkt->id); + return; + } + + switch (cpkt->event) { + case VIRTIO_CONSOLE_CONSOLE_PORT: + if (!cpkt->value) + break; + if (is_console_port(port)) + break; + + init_port_console(port); + /* + * Could remove the port here in case init fails - but + * have to notify the host first. + */ + break; + case VIRTIO_CONSOLE_RESIZE: + if (!is_console_port(port)) + break; + port->cons.hvc->irq_requested = 1; + resize_console(port); + break; + case VIRTIO_CONSOLE_PORT_OPEN: + port->host_connected = cpkt->value; + wake_up_interruptible(&port->waitqueue); + break; + case VIRTIO_CONSOLE_PORT_NAME: + /* + * Skip the size of the header and the cpkt to get the size + * of the name that was sent + */ + name_size = buf->len - buf->offset - sizeof(*cpkt) + 1; + + port->name = kmalloc(name_size, GFP_KERNEL); + if (!port->name) { + dev_err(port->dev, + "Not enough space to store port name\n"); + break; + } + strncpy(port->name, buf->buf + buf->offset + sizeof(*cpkt), + name_size - 1); + port->name[name_size - 1] = 0; + + /* + * Since we only have one sysfs attribute, 'name', + * create it only if we have a name for the port. + */ + err = sysfs_create_group(&port->dev->kobj, + &port_attribute_group); + if (err) + dev_err(port->dev, + "Error %d creating sysfs device attributes\n", + err); + + break; + case VIRTIO_CONSOLE_THROTTLE_PORT: + /* + * Hosts can govern some policy to disallow rogue + * guest processes writing indefinitely to ports + * leading to OOM situations. If we receive this + * message here, it means the Host side of the port + * either reached its max. limit to cache data or + * signal to us that the host is ready to accept more + * data. + */ + port->host_throttled = cpkt->value; + break; + case VIRTIO_CONSOLE_PORT_REMOVE: + /* + * Hot unplug the port. We don't decrement nr_ports + * since we don't want to deal with extra complexities + * of using the lowest-available port id: We can just + * pick up the nr_ports number as the id and not have + * userspace send it to us. This helps us in two + * ways: + * + * - We don't need to have a 'port_id' field in the + * config space when a port is hot-added. This is a + * good thing as we might queue up multiple hotplug + * requests issued in our workqueue. + * + * - Another way to deal with this would have been to + * use a bitmap of the active ports and select the + * lowest non-active port from that map. That + * bloats the already tight config space and we + * would end up artificially limiting the + * max. number of ports to sizeof(bitmap). Right + * now we can support 2^32 ports (as the port id is + * stored in a u32 type). + * + */ + remove_port(port); + break; + } +} + +static void control_work_handler(struct work_struct *work) +{ + struct ports_device *portdev; + struct virtqueue *vq; + struct port_buffer *buf; + unsigned int len; + + portdev = container_of(work, struct ports_device, control_work); + vq = portdev->c_ivq; + + spin_lock(&portdev->cvq_lock); + while ((buf = vq->vq_ops->get_buf(vq, &len))) { + spin_unlock(&portdev->cvq_lock); + + buf->len = len; + buf->offset = 0; + + handle_control_message(portdev, buf); + + spin_lock(&portdev->cvq_lock); + if (add_inbuf(portdev->c_ivq, buf) < 0) { + dev_warn(&portdev->vdev->dev, + "Error adding buffer to queue\n"); + free_buf(buf); + } + } + spin_unlock(&portdev->cvq_lock); +} + +static void in_intr(struct virtqueue *vq) +{ + struct port *port; + unsigned long flags; + + port = find_port_by_vq(vq->vdev->priv, vq); + if (!port) + return; + + spin_lock_irqsave(&port->inbuf_lock, flags); + port->inbuf = get_inbuf(port); + + /* + * Don't queue up data when port is closed. This condition + * can be reached when a console port is not yet connected (no + * tty is spawned) and the host sends out data to console + * ports. For generic serial ports, the host won't + * (shouldn't) send data till the guest is connected. + */ + if (!port->guest_connected) + discard_port_data(port); + + spin_unlock_irqrestore(&port->inbuf_lock, flags); + + wake_up_interruptible(&port->waitqueue); + + if (is_console_port(port) && hvc_poll(port->cons.hvc)) hvc_kick(); } -/*D:370 Once we're further in boot, we get probed like any other virtio device. - * At this stage we set up the output virtqueue. - * - * To set up and manage our virtual console, we call hvc_alloc(). Since we - * never remove the console device we never need this pointer again. - * - * Finally we put our input buffer in the input queue, ready to receive. */ -static int __devinit virtcons_probe(struct virtio_device *dev) +static void control_intr(struct virtqueue *vq) { - vq_callback_t *callbacks[] = { hvc_handle_input, NULL}; - const char *names[] = { "input", "output" }; - struct virtqueue *vqs[2]; + struct ports_device *portdev; + + portdev = vq->vdev->priv; + schedule_work(&portdev->control_work); +} + +static void config_intr(struct virtio_device *vdev) +{ + struct ports_device *portdev; + + portdev = vdev->priv; + if (use_multiport(portdev)) { + /* Handle port hot-add */ + schedule_work(&portdev->config_work); + } + /* + * We'll use this way of resizing only for legacy support. + * For newer userspace (VIRTIO_CONSOLE_F_MULTPORT+), use + * control messages to indicate console size changes so that + * it can be done per-port + */ + resize_console(find_port_by_id(portdev, 0)); +} + +static void fill_queue(struct virtqueue *vq, spinlock_t *lock) +{ + struct port_buffer *buf; + int ret; + + do { + buf = alloc_buf(PAGE_SIZE); + if (!buf) + break; + + spin_lock_irq(lock); + ret = add_inbuf(vq, buf); + if (ret < 0) { + spin_unlock_irq(lock); + free_buf(buf); + break; + } + spin_unlock_irq(lock); + } while (ret > 0); +} + +static int add_port(struct ports_device *portdev, u32 id) +{ + char debugfs_name[16]; + struct port *port; + struct port_buffer *inbuf; + dev_t devt; int err; - vdev = dev; + port = kmalloc(sizeof(*port), GFP_KERNEL); + if (!port) { + err = -ENOMEM; + goto fail; + } + + port->portdev = portdev; + port->id = id; + + port->name = NULL; + port->inbuf = NULL; + port->cons.hvc = NULL; + + port->host_connected = port->guest_connected = false; + port->host_throttled = false; - /* This is the scratch page we use to receive console input */ - inbuf = kmalloc(PAGE_SIZE, GFP_KERNEL); + port->in_vq = portdev->in_vqs[port->id]; + port->out_vq = portdev->out_vqs[port->id]; + + cdev_init(&port->cdev, &port_fops); + + devt = MKDEV(portdev->chr_major, id); + err = cdev_add(&port->cdev, devt, 1); + if (err < 0) { + dev_err(&port->portdev->vdev->dev, + "Error %d adding cdev for port %u\n", err, id); + goto free_port; + } + port->dev = device_create(pdrvdata.class, &port->portdev->vdev->dev, + devt, port, "vport%up%u", + port->portdev->drv_index, id); + if (IS_ERR(port->dev)) { + err = PTR_ERR(port->dev); + dev_err(&port->portdev->vdev->dev, + "Error %d creating device for port %u\n", + err, id); + goto free_cdev; + } + + spin_lock_init(&port->inbuf_lock); + init_waitqueue_head(&port->waitqueue); + + inbuf = alloc_buf(PAGE_SIZE); if (!inbuf) { err = -ENOMEM; + goto free_device; + } + port->outbuf = alloc_buf(PAGE_SIZE); + if (!port->outbuf) { + err = -ENOMEM; + goto free_inbuf; + } + + /* Register the input buffer the first time. */ + add_inbuf(port->in_vq, inbuf); + + /* + * If we're not using multiport support, this has to be a console port + */ + if (!use_multiport(port->portdev)) { + err = init_port_console(port); + if (err) + goto free_outbuf; + } + + spin_lock_irq(&portdev->ports_lock); + list_add_tail(&port->list, &port->portdev->ports); + spin_unlock_irq(&portdev->ports_lock); + + /* + * Tell the Host we're set so that it can send us various + * configuration parameters for this port (eg, port name, + * caching, whether this is a console port, etc.) + */ + send_control_msg(port, VIRTIO_CONSOLE_PORT_READY, 1); + + if (pdrvdata.debugfs_dir) { + /* + * Finally, create the debugfs file that we can use to + * inspect a port's state at any time + */ + sprintf(debugfs_name, "vport%up%u", + port->portdev->drv_index, id); + port->debugfs_file = debugfs_create_file(debugfs_name, 0444, + pdrvdata.debugfs_dir, + port, + &port_debugfs_ops); + } + return 0; + +free_outbuf: + free_buf(port->outbuf); +free_inbuf: + free_buf(inbuf); +free_device: + device_destroy(pdrvdata.class, port->dev->devt); +free_cdev: + cdev_del(&port->cdev); +free_port: + kfree(port); +fail: + return err; +} + +/* + * The workhandler for config-space updates. + * + * This is called when ports are hot-added. + */ +static void config_work_handler(struct work_struct *work) +{ + struct virtio_console_config virtconconf; + struct ports_device *portdev; + struct virtio_device *vdev; + int err; + + portdev = container_of(work, struct ports_device, config_work); + + vdev = portdev->vdev; + vdev->config->get(vdev, + offsetof(struct virtio_console_config, nr_ports), + &virtconconf.nr_ports, + sizeof(virtconconf.nr_ports)); + + if (portdev->config.nr_ports == virtconconf.nr_ports) { + /* + * Port 0 got hot-added. Since we already did all the + * other initialisation for it, just tell the Host + * that the port is ready if we find the port. In + * case the port was hot-removed earlier, we call + * add_port to add the port. + */ + struct port *port; + + port = find_port_by_id(portdev, 0); + if (!port) + add_port(portdev, 0); + else + send_control_msg(port, VIRTIO_CONSOLE_PORT_READY, 1); + return; + } + if (virtconconf.nr_ports > portdev->config.max_nr_ports) { + dev_warn(&vdev->dev, + "More ports specified (%u) than allowed (%u)", + portdev->config.nr_ports + 1, + portdev->config.max_nr_ports); + return; + } + if (virtconconf.nr_ports < portdev->config.nr_ports) + return; + + /* Hot-add ports */ + while (virtconconf.nr_ports - portdev->config.nr_ports) { + err = add_port(portdev, portdev->config.nr_ports); + if (err) + break; + portdev->config.nr_ports++; + } +} + +static int init_vqs(struct ports_device *portdev) +{ + vq_callback_t **io_callbacks; + char **io_names; + struct virtqueue **vqs; + u32 i, j, nr_ports, nr_queues; + int err; + + nr_ports = portdev->config.max_nr_ports; + nr_queues = use_multiport(portdev) ? (nr_ports + 1) * 2 : 2; + + vqs = kmalloc(nr_queues * sizeof(struct virtqueue *), GFP_KERNEL); + if (!vqs) { + err = -ENOMEM; goto fail; } + io_callbacks = kmalloc(nr_queues * sizeof(vq_callback_t *), GFP_KERNEL); + if (!io_callbacks) { + err = -ENOMEM; + goto free_vqs; + } + io_names = kmalloc(nr_queues * sizeof(char *), GFP_KERNEL); + if (!io_names) { + err = -ENOMEM; + goto free_callbacks; + } + portdev->in_vqs = kmalloc(nr_ports * sizeof(struct virtqueue *), + GFP_KERNEL); + if (!portdev->in_vqs) { + err = -ENOMEM; + goto free_names; + } + portdev->out_vqs = kmalloc(nr_ports * sizeof(struct virtqueue *), + GFP_KERNEL); + if (!portdev->out_vqs) { + err = -ENOMEM; + goto free_invqs; + } + + /* + * For backward compat (newer host but older guest), the host + * spawns a console port first and also inits the vqs for port + * 0 before others. + */ + j = 0; + io_callbacks[j] = in_intr; + io_callbacks[j + 1] = NULL; + io_names[j] = "input"; + io_names[j + 1] = "output"; + j += 2; + if (use_multiport(portdev)) { + io_callbacks[j] = control_intr; + io_callbacks[j + 1] = NULL; + io_names[j] = "control-i"; + io_names[j + 1] = "control-o"; + + for (i = 1; i < nr_ports; i++) { + j += 2; + io_callbacks[j] = in_intr; + io_callbacks[j + 1] = NULL; + io_names[j] = "input"; + io_names[j + 1] = "output"; + } + } /* Find the queues. */ - /* FIXME: This is why we want to wean off hvc: we do nothing - * when input comes in. */ - err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names); + err = portdev->vdev->config->find_vqs(portdev->vdev, nr_queues, vqs, + io_callbacks, + (const char **)io_names); if (err) + goto free_outvqs; + + j = 0; + portdev->in_vqs[0] = vqs[0]; + portdev->out_vqs[0] = vqs[1]; + j += 2; + if (use_multiport(portdev)) { + portdev->c_ivq = vqs[j]; + portdev->c_ovq = vqs[j + 1]; + + for (i = 1; i < nr_ports; i++) { + j += 2; + portdev->in_vqs[i] = vqs[j]; + portdev->out_vqs[i] = vqs[j + 1]; + } + } + kfree(io_callbacks); + kfree(io_names); + kfree(vqs); + + return 0; + +free_names: + kfree(io_names); +free_callbacks: + kfree(io_callbacks); +free_outvqs: + kfree(portdev->out_vqs); +free_invqs: + kfree(portdev->in_vqs); +free_vqs: + kfree(vqs); +fail: + return err; +} + +static const struct file_operations portdev_fops = { + .owner = THIS_MODULE, +}; + +/* + * Once we're further in boot, we get probed like any other virtio + * device. + * + * If the host also supports multiple console ports, we check the + * config space to see how many ports the host has spawned. We + * initialize each port found. + */ +static int __devinit virtcons_probe(struct virtio_device *vdev) +{ + struct ports_device *portdev; + u32 i; + int err; + bool multiport; + + portdev = kmalloc(sizeof(*portdev), GFP_KERNEL); + if (!portdev) { + err = -ENOMEM; + goto fail; + } + + /* Attach this portdev to this virtio_device, and vice-versa. */ + portdev->vdev = vdev; + vdev->priv = portdev; + + spin_lock_irq(&pdrvdata_lock); + portdev->drv_index = pdrvdata.index++; + spin_unlock_irq(&pdrvdata_lock); + + portdev->chr_major = register_chrdev(0, "virtio-portsdev", + &portdev_fops); + if (portdev->chr_major < 0) { + dev_err(&vdev->dev, + "Error %d registering chrdev for device %u\n", + portdev->chr_major, portdev->drv_index); + err = portdev->chr_major; goto free; + } - in_vq = vqs[0]; - out_vq = vqs[1]; + multiport = false; + portdev->config.nr_ports = 1; + portdev->config.max_nr_ports = 1; + if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT)) { + multiport = true; + vdev->features[0] |= 1 << VIRTIO_CONSOLE_F_MULTIPORT; - /* Start using the new console output. */ - virtio_cons.get_chars = get_chars; - virtio_cons.put_chars = put_chars; - virtio_cons.notifier_add = notifier_add_vio; - virtio_cons.notifier_del = notifier_del_vio; - virtio_cons.notifier_hangup = notifier_del_vio; - - /* The first argument of hvc_alloc() is the virtual console number, so - * we use zero. The second argument is the parameter for the - * notification mechanism (like irq number). We currently leave this - * as zero, virtqueues have implicit notifications. - * - * The third argument is a "struct hv_ops" containing the put_chars() - * get_chars(), notifier_add() and notifier_del() pointers. - * The final argument is the output buffer size: we can do any size, - * so we put PAGE_SIZE here. */ - hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE); - if (IS_ERR(hvc)) { - err = PTR_ERR(hvc); - goto free_vqs; + vdev->config->get(vdev, offsetof(struct virtio_console_config, + nr_ports), + &portdev->config.nr_ports, + sizeof(portdev->config.nr_ports)); + vdev->config->get(vdev, offsetof(struct virtio_console_config, + max_nr_ports), + &portdev->config.max_nr_ports, + sizeof(portdev->config.max_nr_ports)); + if (portdev->config.nr_ports > portdev->config.max_nr_ports) { + dev_warn(&vdev->dev, + "More ports (%u) specified than allowed (%u). Will init %u ports.", + portdev->config.nr_ports, + portdev->config.max_nr_ports, + portdev->config.max_nr_ports); + + portdev->config.nr_ports = portdev->config.max_nr_ports; + } } - /* Register the input buffer the first time. */ - add_inbuf(); + /* Let the Host know we support multiple ports.*/ + vdev->config->finalize_features(vdev); + + err = init_vqs(portdev); + if (err < 0) { + dev_err(&vdev->dev, "Error %d initializing vqs\n", err); + goto free_chrdev; + } + + spin_lock_init(&portdev->ports_lock); + INIT_LIST_HEAD(&portdev->ports); + + if (multiport) { + spin_lock_init(&portdev->cvq_lock); + INIT_WORK(&portdev->control_work, &control_work_handler); + INIT_WORK(&portdev->config_work, &config_work_handler); + + portdev->outbuf = alloc_buf(PAGE_SIZE); + if (!portdev->outbuf) { + err = -ENOMEM; + dev_err(&vdev->dev, "OOM for control outbuf\n"); + goto free_vqs; + } + fill_queue(portdev->c_ivq, &portdev->cvq_lock); + } + + for (i = 0; i < portdev->config.nr_ports; i++) + add_port(portdev, i); + + /* Start using the new console output. */ + early_put_chars = NULL; return 0; free_vqs: vdev->config->del_vqs(vdev); + kfree(portdev->in_vqs); + kfree(portdev->out_vqs); +free_chrdev: + unregister_chrdev(portdev->chr_major, "virtio-portsdev"); free: - kfree(inbuf); + kfree(portdev); fail: return err; } @@ -253,6 +1503,7 @@ static struct virtio_device_id id_table[] = { static unsigned int features[] = { VIRTIO_CONSOLE_F_SIZE, + VIRTIO_CONSOLE_F_MULTIPORT, }; static struct virtio_driver virtio_console = { @@ -262,11 +1513,27 @@ static struct virtio_driver virtio_console = { .driver.owner = THIS_MODULE, .id_table = id_table, .probe = virtcons_probe, - .config_changed = virtcons_apply_config, + .config_changed = config_intr, }; static int __init init(void) { + int err; + + pdrvdata.class = class_create(THIS_MODULE, "virtio-ports"); + if (IS_ERR(pdrvdata.class)) { + err = PTR_ERR(pdrvdata.class); + pr_err("Error %d creating virtio-ports class\n", err); + return err; + } + + pdrvdata.debugfs_dir = debugfs_create_dir("virtio-ports", NULL); + if (!pdrvdata.debugfs_dir) { + pr_warning("Error %ld creating debugfs dir for virtio-ports\n", + PTR_ERR(pdrvdata.debugfs_dir)); + } + INIT_LIST_HEAD(&pdrvdata.consoles); + return register_virtio_driver(&virtio_console); } module_init(init); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c708ecc3cb2e..72b3f212b613 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -56,8 +56,7 @@ struct virtnet_info /* Host will merge rx buffers for big packets (shake it! shake it!) */ bool mergeable_rx_bufs; - /* Receive & send queues. */ - struct sk_buff_head recv; + /* Send queue. */ struct sk_buff_head send; /* Work struct for refilling if we run low on memory. */ @@ -75,34 +74,44 @@ struct skb_vnet_hdr { unsigned int num_sg; }; +struct padded_vnet_hdr { + struct virtio_net_hdr hdr; + /* + * virtio_net_hdr should be in a separated sg buffer because of a + * QEMU bug, and data sg buffer shares same page with this header sg. + * This padding makes next sg 16 byte aligned after virtio_net_hdr. + */ + char padding[6]; +}; + static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb) { return (struct skb_vnet_hdr *)skb->cb; } -static void give_a_page(struct virtnet_info *vi, struct page *page) -{ - page->private = (unsigned long)vi->pages; - vi->pages = page; -} - -static void trim_pages(struct virtnet_info *vi, struct sk_buff *skb) +/* + * private is used to chain pages for big packets, put the whole + * most recent used list in the beginning for reuse + */ +static void give_pages(struct virtnet_info *vi, struct page *page) { - unsigned int i; + struct page *end; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - give_a_page(vi, skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb)->nr_frags = 0; - skb->data_len = 0; + /* Find end of list, sew whole thing into vi->pages. */ + for (end = page; end->private; end = (struct page *)end->private); + end->private = (unsigned long)vi->pages; + vi->pages = page; } static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask) { struct page *p = vi->pages; - if (p) + if (p) { vi->pages = (struct page *)p->private; - else + /* clear private here, it is used to chain pages */ + p->private = 0; + } else p = alloc_page(gfp_mask); return p; } @@ -118,99 +127,142 @@ static void skb_xmit_done(struct virtqueue *svq) netif_wake_queue(vi->dev); } -static void receive_skb(struct net_device *dev, struct sk_buff *skb, - unsigned len) +static void set_skb_frag(struct sk_buff *skb, struct page *page, + unsigned int offset, unsigned int *len) { - struct virtnet_info *vi = netdev_priv(dev); - struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb); - int err; - int i; - - if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { - pr_debug("%s: short packet %i\n", dev->name, len); - dev->stats.rx_length_errors++; - goto drop; - } + int i = skb_shinfo(skb)->nr_frags; + skb_frag_t *f; + + f = &skb_shinfo(skb)->frags[i]; + f->size = min((unsigned)PAGE_SIZE - offset, *len); + f->page_offset = offset; + f->page = page; + + skb->data_len += f->size; + skb->len += f->size; + skb_shinfo(skb)->nr_frags++; + *len -= f->size; +} - if (vi->mergeable_rx_bufs) { - unsigned int copy; - char *p = page_address(skb_shinfo(skb)->frags[0].page); +static struct sk_buff *page_to_skb(struct virtnet_info *vi, + struct page *page, unsigned int len) +{ + struct sk_buff *skb; + struct skb_vnet_hdr *hdr; + unsigned int copy, hdr_len, offset; + char *p; - if (len > PAGE_SIZE) - len = PAGE_SIZE; - len -= sizeof(struct virtio_net_hdr_mrg_rxbuf); + p = page_address(page); - memcpy(&hdr->mhdr, p, sizeof(hdr->mhdr)); - p += sizeof(hdr->mhdr); + /* copy small packet so we can reuse these pages for small data */ + skb = netdev_alloc_skb_ip_align(vi->dev, GOOD_COPY_LEN); + if (unlikely(!skb)) + return NULL; - copy = len; - if (copy > skb_tailroom(skb)) - copy = skb_tailroom(skb); + hdr = skb_vnet_hdr(skb); - memcpy(skb_put(skb, copy), p, copy); + if (vi->mergeable_rx_bufs) { + hdr_len = sizeof hdr->mhdr; + offset = hdr_len; + } else { + hdr_len = sizeof hdr->hdr; + offset = sizeof(struct padded_vnet_hdr); + } - len -= copy; + memcpy(hdr, p, hdr_len); - if (!len) { - give_a_page(vi, skb_shinfo(skb)->frags[0].page); - skb_shinfo(skb)->nr_frags--; - } else { - skb_shinfo(skb)->frags[0].page_offset += - sizeof(hdr->mhdr) + copy; - skb_shinfo(skb)->frags[0].size = len; - skb->data_len += len; - skb->len += len; - } + len -= hdr_len; + p += offset; - while (--hdr->mhdr.num_buffers) { - struct sk_buff *nskb; + copy = len; + if (copy > skb_tailroom(skb)) + copy = skb_tailroom(skb); + memcpy(skb_put(skb, copy), p, copy); - i = skb_shinfo(skb)->nr_frags; - if (i >= MAX_SKB_FRAGS) { - pr_debug("%s: packet too long %d\n", dev->name, - len); - dev->stats.rx_length_errors++; - goto drop; - } + len -= copy; + offset += copy; - nskb = vi->rvq->vq_ops->get_buf(vi->rvq, &len); - if (!nskb) { - pr_debug("%s: rx error: %d buffers missing\n", - dev->name, hdr->mhdr.num_buffers); - dev->stats.rx_length_errors++; - goto drop; - } + while (len) { + set_skb_frag(skb, page, offset, &len); + page = (struct page *)page->private; + offset = 0; + } - __skb_unlink(nskb, &vi->recv); - vi->num--; + if (page) + give_pages(vi, page); - skb_shinfo(skb)->frags[i] = skb_shinfo(nskb)->frags[0]; - skb_shinfo(nskb)->nr_frags = 0; - kfree_skb(nskb); + return skb; +} - if (len > PAGE_SIZE) - len = PAGE_SIZE; +static int receive_mergeable(struct virtnet_info *vi, struct sk_buff *skb) +{ + struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb); + struct page *page; + int num_buf, i, len; + + num_buf = hdr->mhdr.num_buffers; + while (--num_buf) { + i = skb_shinfo(skb)->nr_frags; + if (i >= MAX_SKB_FRAGS) { + pr_debug("%s: packet too long\n", skb->dev->name); + skb->dev->stats.rx_length_errors++; + return -EINVAL; + } - skb_shinfo(skb)->frags[i].size = len; - skb_shinfo(skb)->nr_frags++; - skb->data_len += len; - skb->len += len; + page = vi->rvq->vq_ops->get_buf(vi->rvq, &len); + if (!page) { + pr_debug("%s: rx error: %d buffers missing\n", + skb->dev->name, hdr->mhdr.num_buffers); + skb->dev->stats.rx_length_errors++; + return -EINVAL; } - } else { - len -= sizeof(hdr->hdr); + if (len > PAGE_SIZE) + len = PAGE_SIZE; + + set_skb_frag(skb, page, 0, &len); + + --vi->num; + } + return 0; +} + +static void receive_buf(struct net_device *dev, void *buf, unsigned int len) +{ + struct virtnet_info *vi = netdev_priv(dev); + struct sk_buff *skb; + struct page *page; + struct skb_vnet_hdr *hdr; - if (len <= MAX_PACKET_LEN) - trim_pages(vi, skb); + if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { + pr_debug("%s: short packet %i\n", dev->name, len); + dev->stats.rx_length_errors++; + if (vi->mergeable_rx_bufs || vi->big_packets) + give_pages(vi, buf); + else + dev_kfree_skb(buf); + return; + } - err = pskb_trim(skb, len); - if (err) { - pr_debug("%s: pskb_trim failed %i %d\n", dev->name, - len, err); + if (!vi->mergeable_rx_bufs && !vi->big_packets) { + skb = buf; + len -= sizeof(struct virtio_net_hdr); + skb_trim(skb, len); + } else { + page = buf; + skb = page_to_skb(vi, page, len); + if (unlikely(!skb)) { dev->stats.rx_dropped++; - goto drop; + give_pages(vi, page); + return; } + if (vi->mergeable_rx_bufs) + if (receive_mergeable(vi, skb)) { + dev_kfree_skb(skb); + return; + } } + hdr = skb_vnet_hdr(skb); skb->truesize += skb->data_len; dev->stats.rx_bytes += skb->len; dev->stats.rx_packets++; @@ -267,110 +319,119 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, frame_err: dev->stats.rx_frame_errors++; -drop: dev_kfree_skb(skb); } -static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp) +static int add_recvbuf_small(struct virtnet_info *vi, gfp_t gfp) { struct sk_buff *skb; - struct scatterlist sg[2+MAX_SKB_FRAGS]; - int num, err, i; - bool oom = false; - - sg_init_table(sg, 2+MAX_SKB_FRAGS); - do { - struct skb_vnet_hdr *hdr; + struct skb_vnet_hdr *hdr; + struct scatterlist sg[2]; + int err; - skb = netdev_alloc_skb_ip_align(vi->dev, MAX_PACKET_LEN); - if (unlikely(!skb)) { - oom = true; - break; - } + skb = netdev_alloc_skb_ip_align(vi->dev, MAX_PACKET_LEN); + if (unlikely(!skb)) + return -ENOMEM; - skb_put(skb, MAX_PACKET_LEN); + skb_put(skb, MAX_PACKET_LEN); - hdr = skb_vnet_hdr(skb); - sg_set_buf(sg, &hdr->hdr, sizeof(hdr->hdr)); + hdr = skb_vnet_hdr(skb); + sg_set_buf(sg, &hdr->hdr, sizeof hdr->hdr); - if (vi->big_packets) { - for (i = 0; i < MAX_SKB_FRAGS; i++) { - skb_frag_t *f = &skb_shinfo(skb)->frags[i]; - f->page = get_a_page(vi, gfp); - if (!f->page) - break; + skb_to_sgvec(skb, sg + 1, 0, skb->len); - f->page_offset = 0; - f->size = PAGE_SIZE; + err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, 2, skb); + if (err < 0) + dev_kfree_skb(skb); - skb->data_len += PAGE_SIZE; - skb->len += PAGE_SIZE; + return err; +} - skb_shinfo(skb)->nr_frags++; - } +static int add_recvbuf_big(struct virtnet_info *vi, gfp_t gfp) +{ + struct scatterlist sg[MAX_SKB_FRAGS + 2]; + struct page *first, *list = NULL; + char *p; + int i, err, offset; + + /* page in sg[MAX_SKB_FRAGS + 1] is list tail */ + for (i = MAX_SKB_FRAGS + 1; i > 1; --i) { + first = get_a_page(vi, gfp); + if (!first) { + if (list) + give_pages(vi, list); + return -ENOMEM; } + sg_set_buf(&sg[i], page_address(first), PAGE_SIZE); - num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; - skb_queue_head(&vi->recv, skb); + /* chain new page in list head to match sg */ + first->private = (unsigned long)list; + list = first; + } - err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, num, skb); - if (err < 0) { - skb_unlink(skb, &vi->recv); - trim_pages(vi, skb); - kfree_skb(skb); - break; - } - vi->num++; - } while (err >= num); - if (unlikely(vi->num > vi->max)) - vi->max = vi->num; - vi->rvq->vq_ops->kick(vi->rvq); - return !oom; + first = get_a_page(vi, gfp); + if (!first) { + give_pages(vi, list); + return -ENOMEM; + } + p = page_address(first); + + /* sg[0], sg[1] share the same page */ + /* a separated sg[0] for virtio_net_hdr only during to QEMU bug*/ + sg_set_buf(&sg[0], p, sizeof(struct virtio_net_hdr)); + + /* sg[1] for data packet, from offset */ + offset = sizeof(struct padded_vnet_hdr); + sg_set_buf(&sg[1], p + offset, PAGE_SIZE - offset); + + /* chain first in list head */ + first->private = (unsigned long)list; + err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, MAX_SKB_FRAGS + 2, + first); + if (err < 0) + give_pages(vi, first); + + return err; } -/* Returns false if we couldn't fill entirely (OOM). */ -static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp) +static int add_recvbuf_mergeable(struct virtnet_info *vi, gfp_t gfp) { - struct sk_buff *skb; - struct scatterlist sg[1]; + struct page *page; + struct scatterlist sg; int err; - bool oom = false; - - if (!vi->mergeable_rx_bufs) - return try_fill_recv_maxbufs(vi, gfp); - do { - skb_frag_t *f; + page = get_a_page(vi, gfp); + if (!page) + return -ENOMEM; - skb = netdev_alloc_skb_ip_align(vi->dev, GOOD_COPY_LEN); - if (unlikely(!skb)) { - oom = true; - break; - } + sg_init_one(&sg, page_address(page), PAGE_SIZE); - f = &skb_shinfo(skb)->frags[0]; - f->page = get_a_page(vi, gfp); - if (!f->page) { - oom = true; - kfree_skb(skb); - break; - } + err = vi->rvq->vq_ops->add_buf(vi->rvq, &sg, 0, 1, page); + if (err < 0) + give_pages(vi, page); - f->page_offset = 0; - f->size = PAGE_SIZE; + return err; +} - skb_shinfo(skb)->nr_frags++; +/* Returns false if we couldn't fill entirely (OOM). */ +static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp) +{ + int err; + bool oom = false; - sg_init_one(sg, page_address(f->page), PAGE_SIZE); - skb_queue_head(&vi->recv, skb); + do { + if (vi->mergeable_rx_bufs) + err = add_recvbuf_mergeable(vi, gfp); + else if (vi->big_packets) + err = add_recvbuf_big(vi, gfp); + else + err = add_recvbuf_small(vi, gfp); - err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, 1, skb); if (err < 0) { - skb_unlink(skb, &vi->recv); - kfree_skb(skb); + oom = true; break; } - vi->num++; + ++vi->num; } while (err > 0); if (unlikely(vi->num > vi->max)) vi->max = vi->num; @@ -408,15 +469,14 @@ static void refill_work(struct work_struct *work) static int virtnet_poll(struct napi_struct *napi, int budget) { struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi); - struct sk_buff *skb = NULL; + void *buf; unsigned int len, received = 0; again: while (received < budget && - (skb = vi->rvq->vq_ops->get_buf(vi->rvq, &len)) != NULL) { - __skb_unlink(skb, &vi->recv); - receive_skb(vi->dev, skb, len); - vi->num--; + (buf = vi->rvq->vq_ops->get_buf(vi->rvq, &len)) != NULL) { + receive_buf(vi->dev, buf, len); + --vi->num; received++; } @@ -496,9 +556,9 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) /* Encode metadata header at front. */ if (vi->mergeable_rx_bufs) - sg_set_buf(sg, &hdr->mhdr, sizeof(hdr->mhdr)); + sg_set_buf(sg, &hdr->mhdr, sizeof hdr->mhdr); else - sg_set_buf(sg, &hdr->hdr, sizeof(hdr->hdr)); + sg_set_buf(sg, &hdr->hdr, sizeof hdr->hdr); hdr->num_sg = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; return vi->svq->vq_ops->add_buf(vi->svq, sg, hdr->num_sg, 0, skb); @@ -916,8 +976,7 @@ static int virtnet_probe(struct virtio_device *vdev) dev->features |= NETIF_F_HW_VLAN_FILTER; } - /* Initialize our empty receive and send queues. */ - skb_queue_head_init(&vi->recv); + /* Initialize our empty send queue. */ skb_queue_head_init(&vi->send); err = register_netdev(dev); @@ -952,25 +1011,35 @@ free: return err; } +static void free_unused_bufs(struct virtnet_info *vi) +{ + void *buf; + while (1) { + buf = vi->rvq->vq_ops->detach_unused_buf(vi->rvq); + if (!buf) + break; + if (vi->mergeable_rx_bufs || vi->big_packets) + give_pages(vi, buf); + else + dev_kfree_skb(buf); + --vi->num; + } + BUG_ON(vi->num != 0); +} + static void __devexit virtnet_remove(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; - struct sk_buff *skb; /* Stop all the virtqueues. */ vdev->config->reset(vdev); - /* Free our skbs in send and recv queues, if any. */ - while ((skb = __skb_dequeue(&vi->recv)) != NULL) { - kfree_skb(skb); - vi->num--; - } + /* Free our skbs in send queue, if any. */ __skb_queue_purge(&vi->send); - BUG_ON(vi->num != 0); - unregister_netdev(vi->dev); cancel_delayed_work_sync(&vi->refill); + free_unused_bufs(vi); vdev->config->del_vqs(vi->vdev); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 505be88c82ae..3db3d242c3ee 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -28,7 +28,7 @@ struct virtio_balloon { struct virtio_device *vdev; - struct virtqueue *inflate_vq, *deflate_vq; + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; /* Where the ballooning thread waits for config to change. */ wait_queue_head_t config_change; @@ -49,6 +49,10 @@ struct virtio_balloon /* The array of pfns we tell the Host about. */ unsigned int num_pfns; u32 pfns[256]; + + /* Memory statistics */ + int need_stats_update; + struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; }; static struct virtio_device_id id_table[] = { @@ -154,6 +158,72 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num) } } +static inline void update_stat(struct virtio_balloon *vb, int idx, + u16 tag, u64 val) +{ + BUG_ON(idx >= VIRTIO_BALLOON_S_NR); + vb->stats[idx].tag = tag; + vb->stats[idx].val = val; +} + +#define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT) + +static void update_balloon_stats(struct virtio_balloon *vb) +{ + unsigned long events[NR_VM_EVENT_ITEMS]; + struct sysinfo i; + int idx = 0; + + all_vm_events(events); + si_meminfo(&i); + + update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, + pages_to_bytes(events[PSWPIN])); + update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, + pages_to_bytes(events[PSWPOUT])); + update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]); + update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]); + update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, + pages_to_bytes(i.freeram)); + update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, + pages_to_bytes(i.totalram)); +} + +/* + * While most virtqueues communicate guest-initiated requests to the hypervisor, + * the stats queue operates in reverse. The driver initializes the virtqueue + * with a single buffer. From that point forward, all conversations consist of + * a hypervisor request (a call to this function) which directs us to refill + * the virtqueue with a fresh stats buffer. Since stats collection can sleep, + * we notify our kthread which does the actual work via stats_handle_request(). + */ +static void stats_request(struct virtqueue *vq) +{ + struct virtio_balloon *vb; + unsigned int len; + + vb = vq->vq_ops->get_buf(vq, &len); + if (!vb) + return; + vb->need_stats_update = 1; + wake_up(&vb->config_change); +} + +static void stats_handle_request(struct virtio_balloon *vb) +{ + struct virtqueue *vq; + struct scatterlist sg; + + vb->need_stats_update = 0; + update_balloon_stats(vb); + + vq = vb->stats_vq; + sg_init_one(&sg, vb->stats, sizeof(vb->stats)); + if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) < 0) + BUG(); + vq->vq_ops->kick(vq); +} + static void virtballoon_changed(struct virtio_device *vdev) { struct virtio_balloon *vb = vdev->priv; @@ -190,8 +260,11 @@ static int balloon(void *_vballoon) try_to_freeze(); wait_event_interruptible(vb->config_change, (diff = towards_target(vb)) != 0 + || vb->need_stats_update || kthread_should_stop() || freezing(current)); + if (vb->need_stats_update) + stats_handle_request(vb); if (diff > 0) fill_balloon(vb, diff); else if (diff < 0) @@ -204,10 +277,10 @@ static int balloon(void *_vballoon) static int virtballoon_probe(struct virtio_device *vdev) { struct virtio_balloon *vb; - struct virtqueue *vqs[2]; - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack }; - const char *names[] = { "inflate", "deflate" }; - int err; + struct virtqueue *vqs[3]; + vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; + const char *names[] = { "inflate", "deflate", "stats" }; + int err, nvqs; vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL); if (!vb) { @@ -220,13 +293,29 @@ static int virtballoon_probe(struct virtio_device *vdev) init_waitqueue_head(&vb->config_change); vb->vdev = vdev; - /* We expect two virtqueues. */ - err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names); + /* We expect two virtqueues: inflate and deflate, + * and optionally stat. */ + nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; + err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names); if (err) goto out_free_vb; vb->inflate_vq = vqs[0]; vb->deflate_vq = vqs[1]; + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { + struct scatterlist sg; + vb->stats_vq = vqs[2]; + + /* + * Prime this virtqueue with one buffer so the hypervisor can + * use it to signal us later. + */ + sg_init_one(&sg, vb->stats, sizeof vb->stats); + if (vb->stats_vq->vq_ops->add_buf(vb->stats_vq, + &sg, 1, 0, vb) < 0) + BUG(); + vb->stats_vq->vq_ops->kick(vb->stats_vq); + } vb->thread = kthread_run(balloon, vb, "vballoon"); if (IS_ERR(vb->thread)) { @@ -264,7 +353,10 @@ static void __devexit virtballoon_remove(struct virtio_device *vdev) kfree(vb); } -static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST }; +static unsigned int features[] = { + VIRTIO_BALLOON_F_MUST_TELL_HOST, + VIRTIO_BALLOON_F_STATS_VQ, +}; static struct virtio_driver virtio_balloon_driver = { .feature_table = features, diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 28d9cf7cf72f..1d5191fab62e 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -702,7 +702,7 @@ static struct pci_driver virtio_pci_driver = { .name = "virtio-pci", .id_table = virtio_pci_id_table, .probe = virtio_pci_probe, - .remove = virtio_pci_remove, + .remove = __devexit_p(virtio_pci_remove), #ifdef CONFIG_PM .suspend = virtio_pci_suspend, .resume = virtio_pci_resume, diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index fbd2ecde93e4..71929ee00d69 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -334,6 +334,30 @@ static bool vring_enable_cb(struct virtqueue *_vq) return true; } +static void *vring_detach_unused_buf(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + unsigned int i; + void *buf; + + START_USE(vq); + + for (i = 0; i < vq->vring.num; i++) { + if (!vq->data[i]) + continue; + /* detach_buf clears data, so grab it now. */ + buf = vq->data[i]; + detach_buf(vq, i); + END_USE(vq); + return buf; + } + /* That should have freed everything. */ + BUG_ON(vq->num_free != vq->vring.num); + + END_USE(vq); + return NULL; +} + irqreturn_t vring_interrupt(int irq, void *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -360,6 +384,7 @@ static struct virtqueue_ops vring_vq_ops = { .kick = vring_kick, .disable_cb = vring_disable_cb, .enable_cb = vring_enable_cb, + .detach_unused_buf = vring_detach_unused_buf, }; struct virtqueue *vring_new_virtqueue(unsigned int num, |