summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2010-01-25 12:29:15 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2010-01-25 12:29:15 +1100
commit9fbd689999fdb1ae07b5875acc8ff74689588a28 (patch)
tree9df3da73d14d6eee15fb69923efeb13e5f9e30cc /drivers
parent2c366ded0308dcb44ff737fce516ec2f87ac7c9d (diff)
parent2d48aec1b18e1dd5f58c348ac25c86f6c37051b8 (diff)
Merge branch 'quilt/rr'
Diffstat (limited to 'drivers')
-rw-r--r--drivers/char/Kconfig8
-rw-r--r--drivers/char/hvc_beat.c2
-rw-r--r--drivers/char/hvc_console.c7
-rw-r--r--drivers/char/hvc_console.h7
-rw-r--r--drivers/char/hvc_iseries.c2
-rw-r--r--drivers/char/hvc_iucv.c2
-rw-r--r--drivers/char/hvc_rtas.c2
-rw-r--r--drivers/char/hvc_udbg.c2
-rw-r--r--drivers/char/hvc_vio.c2
-rw-r--r--drivers/char/hvc_xen.c2
-rw-r--r--drivers/char/virtio_console.c1559
-rw-r--r--drivers/net/virtio_net.c427
-rw-r--r--drivers/virtio/virtio_balloon.c108
-rw-r--r--drivers/virtio/virtio_pci.c2
-rw-r--r--drivers/virtio/virtio_ring.c25
15 files changed, 1810 insertions, 347 deletions
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index e023682be2c4..3141dd3b6e53 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -666,6 +666,14 @@ config VIRTIO_CONSOLE
help
Virtio console for use with lguest and other hypervisors.
+ Also serves as a general-purpose serial device for data
+ transfer between the guest and host. Character devices at
+ /dev/vportNpn will be created when corresponding ports are
+ found, where N is the device number and n is the port number
+ within that device. If specified by the host, a sysfs
+ attribute called 'name' will be populated with a name for
+ the port which can be used by udev scripts to create a
+ symlink to the device.
config HVCS
tristate "IBM Hypervisor Virtual Console Server support"
diff --git a/drivers/char/hvc_beat.c b/drivers/char/hvc_beat.c
index 0afc8b82212e..6913fc33270c 100644
--- a/drivers/char/hvc_beat.c
+++ b/drivers/char/hvc_beat.c
@@ -84,7 +84,7 @@ static int hvc_beat_put_chars(uint32_t vtermno, const char *buf, int cnt)
return cnt;
}
-static struct hv_ops hvc_beat_get_put_ops = {
+static const struct hv_ops hvc_beat_get_put_ops = {
.get_chars = hvc_beat_get_chars,
.put_chars = hvc_beat_put_chars,
};
diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c
index 416d3423150d..d8dac5820f0e 100644
--- a/drivers/char/hvc_console.c
+++ b/drivers/char/hvc_console.c
@@ -125,7 +125,7 @@ static struct hvc_struct *hvc_get_by_index(int index)
* console interfaces but can still be used as a tty device. This has to be
* static because kmalloc will not work during early console init.
*/
-static struct hv_ops *cons_ops[MAX_NR_HVC_CONSOLES];
+static const struct hv_ops *cons_ops[MAX_NR_HVC_CONSOLES];
static uint32_t vtermnos[MAX_NR_HVC_CONSOLES] =
{[0 ... MAX_NR_HVC_CONSOLES - 1] = -1};
@@ -247,7 +247,7 @@ static void destroy_hvc_struct(struct kref *kref)
* vty adapters do NOT get an hvc_instantiate() callback since they
* appear after early console init.
*/
-int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops)
+int hvc_instantiate(uint32_t vtermno, int index, const struct hv_ops *ops)
{
struct hvc_struct *hp;
@@ -749,7 +749,8 @@ static const struct tty_operations hvc_ops = {
};
struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int data,
- struct hv_ops *ops, int outbuf_size)
+ const struct hv_ops *ops,
+ int outbuf_size)
{
struct hvc_struct *hp;
int i;
diff --git a/drivers/char/hvc_console.h b/drivers/char/hvc_console.h
index 10950ca706d8..52ddf4d3716c 100644
--- a/drivers/char/hvc_console.h
+++ b/drivers/char/hvc_console.h
@@ -55,7 +55,7 @@ struct hvc_struct {
int outbuf_size;
int n_outbuf;
uint32_t vtermno;
- struct hv_ops *ops;
+ const struct hv_ops *ops;
int irq_requested;
int data;
struct winsize ws;
@@ -76,11 +76,12 @@ struct hv_ops {
};
/* Register a vterm and a slot index for use as a console (console_init) */
-extern int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops);
+extern int hvc_instantiate(uint32_t vtermno, int index,
+ const struct hv_ops *ops);
/* register a vterm for hvc tty operation (module_init or hotplug add) */
extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int data,
- struct hv_ops *ops, int outbuf_size);
+ const struct hv_ops *ops, int outbuf_size);
/* remove a vterm from hvc tty operation (module_exit or hotplug remove) */
extern int hvc_remove(struct hvc_struct *hp);
diff --git a/drivers/char/hvc_iseries.c b/drivers/char/hvc_iseries.c
index 936d05bf37fa..fd0242676a2a 100644
--- a/drivers/char/hvc_iseries.c
+++ b/drivers/char/hvc_iseries.c
@@ -197,7 +197,7 @@ done:
return sent;
}
-static struct hv_ops hvc_get_put_ops = {
+static const struct hv_ops hvc_get_put_ops = {
.get_chars = get_chars,
.put_chars = put_chars,
.notifier_add = notifier_add_irq,
diff --git a/drivers/char/hvc_iucv.c b/drivers/char/hvc_iucv.c
index fe62bd0e17b7..21681a81cc35 100644
--- a/drivers/char/hvc_iucv.c
+++ b/drivers/char/hvc_iucv.c
@@ -922,7 +922,7 @@ static int hvc_iucv_pm_restore_thaw(struct device *dev)
/* HVC operations */
-static struct hv_ops hvc_iucv_ops = {
+static const struct hv_ops hvc_iucv_ops = {
.get_chars = hvc_iucv_get_chars,
.put_chars = hvc_iucv_put_chars,
.notifier_add = hvc_iucv_notifier_add,
diff --git a/drivers/char/hvc_rtas.c b/drivers/char/hvc_rtas.c
index 88590d040046..61c4a61558d9 100644
--- a/drivers/char/hvc_rtas.c
+++ b/drivers/char/hvc_rtas.c
@@ -71,7 +71,7 @@ static int hvc_rtas_read_console(uint32_t vtermno, char *buf, int count)
return i;
}
-static struct hv_ops hvc_rtas_get_put_ops = {
+static const struct hv_ops hvc_rtas_get_put_ops = {
.get_chars = hvc_rtas_read_console,
.put_chars = hvc_rtas_write_console,
};
diff --git a/drivers/char/hvc_udbg.c b/drivers/char/hvc_udbg.c
index bd63ba878a56..b0957e61a7be 100644
--- a/drivers/char/hvc_udbg.c
+++ b/drivers/char/hvc_udbg.c
@@ -58,7 +58,7 @@ static int hvc_udbg_get(uint32_t vtermno, char *buf, int count)
return i;
}
-static struct hv_ops hvc_udbg_ops = {
+static const struct hv_ops hvc_udbg_ops = {
.get_chars = hvc_udbg_get,
.put_chars = hvc_udbg_put,
};
diff --git a/drivers/char/hvc_vio.c b/drivers/char/hvc_vio.c
index 10be343d6ae7..27370e99c66f 100644
--- a/drivers/char/hvc_vio.c
+++ b/drivers/char/hvc_vio.c
@@ -77,7 +77,7 @@ static int filtered_get_chars(uint32_t vtermno, char *buf, int count)
return got;
}
-static struct hv_ops hvc_get_put_ops = {
+static const struct hv_ops hvc_get_put_ops = {
.get_chars = filtered_get_chars,
.put_chars = hvc_put_chars,
.notifier_add = notifier_add_irq,
diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c
index b1a71638c772..60446f82a3fc 100644
--- a/drivers/char/hvc_xen.c
+++ b/drivers/char/hvc_xen.c
@@ -122,7 +122,7 @@ static int read_console(uint32_t vtermno, char *buf, int len)
return recv;
}
-static struct hv_ops hvc_ops = {
+static const struct hv_ops hvc_ops = {
.get_chars = read_console,
.put_chars = write_console,
.notifier_add = notifier_add_irq,
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index a035ae39a359..166c5b2e20bb 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1,18 +1,6 @@
-/*D:300
- * The Guest console driver
- *
- * Writing console drivers is one of the few remaining Dark Arts in Linux.
- * Fortunately for us, the path of virtual consoles has been well-trodden by
- * the PowerPC folks, who wrote "hvc_console.c" to generically support any
- * virtual console. We use that infrastructure which only requires us to write
- * the basic put_chars and get_chars functions and call the right register
- * functions.
- :*/
-
-/*M:002 The console can be flooded: while the Guest is processing input the
- * Host can send more. Buffering in the Host could alleviate this, but it is a
- * difficult problem in general. :*/
-/* Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation
+/*
+ * Copyright (C) 2006, 2007, 2009 Rusty Russell, IBM Corporation
+ * Copyright (C) 2009, Red Hat, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -28,142 +16,704 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <linux/cdev.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
#include <linux/err.h>
+#include <linux/fs.h>
#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
#include <linux/virtio.h>
#include <linux/virtio_console.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
#include "hvc_console.h"
-/*D:340 These represent our input and output console queues, and the virtio
- * operations for them. */
-static struct virtqueue *in_vq, *out_vq;
-static struct virtio_device *vdev;
+/*
+ * This is a global struct for storing common data for all the devices
+ * this driver handles.
+ *
+ * Mainly, it has a linked list for all the consoles in one place so
+ * that callbacks from hvc for get_chars(), put_chars() work properly
+ * across multiple devices and multiple ports per device.
+ */
+struct ports_driver_data {
+ /* Used for registering chardevs */
+ struct class *class;
+
+ /* Used for exporting per-port information to debugfs */
+ struct dentry *debugfs_dir;
+
+ /* Number of devices this driver is handling */
+ unsigned int index;
+
+ /*
+ * This is used to keep track of the number of hvc consoles
+ * spawned by this driver. This number is given as the first
+ * argument to hvc_alloc(). To correctly map an initial
+ * console spawned via hvc_instantiate to the console being
+ * hooked up via hvc_alloc, we need to pass the same vtermno.
+ *
+ * We also just assume the first console being initialised was
+ * the first one that got used as the initial console.
+ */
+ unsigned int next_vtermno;
+
+ /* All the console devices handled by this driver */
+ struct list_head consoles;
+};
+static struct ports_driver_data pdrvdata;
+
+DEFINE_SPINLOCK(pdrvdata_lock);
+
+/* This struct holds information that's relevant only for console ports */
+struct console {
+ /* We'll place all consoles in a list in the pdrvdata struct */
+ struct list_head list;
+
+ /* The hvc device associated with this console port */
+ struct hvc_struct *hvc;
+
+ /*
+ * This number identifies the number that we used to register
+ * with hvc in hvc_instantiate() and hvc_alloc(); this is the
+ * number passed on by the hvc callbacks to us to
+ * differentiate between the other console ports handled by
+ * this driver
+ */
+ u32 vtermno;
+};
+
+struct port_buffer {
+ char *buf;
+
+ /* size of the buffer in *buf above */
+ size_t size;
+
+ /* used length of the buffer */
+ size_t len;
+ /* offset in the buf from which to consume data */
+ size_t offset;
+};
+
+/*
+ * This is a per-device struct that stores data common to all the
+ * ports for that device (vdev->priv).
+ */
+struct ports_device {
+ /*
+ * Workqueue handlers where we process deferred work after
+ * notification
+ */
+ struct work_struct control_work;
+ struct work_struct config_work;
+
+ struct list_head ports;
+
+ /* To protect the list of ports */
+ spinlock_t ports_lock;
+
+ /* To protect the vq operations for the control channel */
+ spinlock_t cvq_lock;
-/* This is our input buffer, and how much data is left in it. */
-static unsigned int in_len;
-static char *in, *inbuf;
+ /* The current config space is stored here */
+ struct virtio_console_config config;
-/* The operations for our console. */
-static struct hv_ops virtio_cons;
+ /* The virtio device we're associated with */
+ struct virtio_device *vdev;
-/* The hvc device */
-static struct hvc_struct *hvc;
+ /*
+ * A couple of virtqueues for the control channel: one for
+ * guest->host transfers, one for host->guest transfers
+ */
+ struct virtqueue *c_ivq, *c_ovq;
-/*D:310 The put_chars() callback is pretty straightforward.
+ /* Array of per-port IO virtqueues */
+ struct virtqueue **in_vqs, **out_vqs;
+
+ /* The control messages to the Host are sent via this buffer */
+ struct port_buffer *outbuf;
+
+ /* Used for numbering devices for sysfs and debugfs */
+ unsigned int drv_index;
+
+ /* Major number for this device. Ports will be created as minors. */
+ int chr_major;
+};
+
+/* This struct holds the per-port data */
+struct port {
+ /* Next port in the list, head is in the ports_device */
+ struct list_head list;
+
+ /* Pointer to the parent virtio_console device */
+ struct ports_device *portdev;
+
+ /* The current buffer from which data has to be fed to readers */
+ struct port_buffer *inbuf;
+
+ /*
+ * To protect the operations on the in_vq associated with this
+ * port. Has to be a spinlock because it can be called from
+ * interrupt context (get_char()).
+ */
+ spinlock_t inbuf_lock;
+
+ /* Buffer that's used to pass data from the guest to the host */
+ struct port_buffer *outbuf;
+
+ /* The IO vqs for this port */
+ struct virtqueue *in_vq, *out_vq;
+
+ /* File in the debugfs directory that exposes this port's information */
+ struct dentry *debugfs_file;
+
+ /*
+ * The entries in this struct will be valid if this port is
+ * hooked up to an hvc console
+ */
+ struct console cons;
+
+ /* Each port associates with a separate char device */
+ struct cdev cdev;
+ struct device *dev;
+
+ /* A waitqueue for poll() or blocking read operations */
+ wait_queue_head_t waitqueue;
+
+ /* The 'name' of the port that we expose via sysfs properties */
+ char *name;
+
+ /* The 'id' to identify the port with the Host */
+ u32 id;
+
+ /* Is the host device open */
+ bool host_connected;
+
+ /* We should allow only one process to open a port */
+ bool guest_connected;
+
+ /* Does the Host not want to accept more data currently? */
+ bool host_throttled;
+};
+
+/* This is the very early arch-specified put chars function. */
+static int (*early_put_chars)(u32, const char *, int);
+
+static struct port *find_port_by_vtermno(u32 vtermno)
+{
+ struct port *port;
+ struct console *cons;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pdrvdata_lock, flags);
+ list_for_each_entry(cons, &pdrvdata.consoles, list) {
+ if (cons->vtermno == vtermno) {
+ port = container_of(cons, struct port, cons);
+ goto out;
+ }
+ }
+ port = NULL;
+out:
+ spin_unlock_irqrestore(&pdrvdata_lock, flags);
+ return port;
+}
+
+static struct port *find_port_by_id(struct ports_device *portdev, u32 id)
+{
+ struct port *port;
+ unsigned long flags;
+
+ spin_lock_irqsave(&portdev->ports_lock, flags);
+ list_for_each_entry(port, &portdev->ports, list)
+ if (port->id == id)
+ goto out;
+ port = NULL;
+out:
+ spin_unlock_irqrestore(&portdev->ports_lock, flags);
+
+ return port;
+}
+
+static struct port *find_port_by_vq(struct ports_device *portdev,
+ struct virtqueue *vq)
+{
+ struct port *port;
+ unsigned long flags;
+
+ spin_lock_irqsave(&portdev->ports_lock, flags);
+ list_for_each_entry(port, &portdev->ports, list)
+ if (port->in_vq == vq || port->out_vq == vq)
+ goto out;
+ port = NULL;
+out:
+ spin_unlock_irqrestore(&portdev->ports_lock, flags);
+ return port;
+}
+
+static bool is_console_port(struct port *port)
+{
+ if (port->cons.hvc)
+ return true;
+ return false;
+}
+
+static inline bool use_multiport(struct ports_device *portdev)
+{
+ /*
+ * This condition can be true when put_chars is called from
+ * early_init
+ */
+ if (!portdev->vdev)
+ return 0;
+ return portdev->vdev->features[0] & (1 << VIRTIO_CONSOLE_F_MULTIPORT);
+}
+
+static void free_buf(struct port_buffer *buf)
+{
+ kfree(buf->buf);
+ kfree(buf);
+}
+
+static struct port_buffer *alloc_buf(size_t buf_size)
+{
+ struct port_buffer *buf;
+
+ buf = kmalloc(sizeof(*buf), GFP_KERNEL);
+ if (!buf)
+ goto fail;
+ buf->buf = kzalloc(buf_size, GFP_KERNEL);
+ if (!buf->buf)
+ goto free_buf;
+ buf->len = 0;
+ buf->offset = 0;
+ buf->size = buf_size;
+ return buf;
+
+free_buf:
+ kfree(buf);
+fail:
+ return NULL;
+}
+
+/* Callers should take appropriate locks */
+static void *get_inbuf(struct port *port)
+{
+ struct port_buffer *buf;
+ struct virtqueue *vq;
+ unsigned int len;
+
+ vq = port->in_vq;
+ buf = vq->vq_ops->get_buf(vq, &len);
+ if (buf) {
+ buf->len = len;
+ buf->offset = 0;
+ }
+ return buf;
+}
+
+/*
+ * Create a scatter-gather list representing our input buffer and put
+ * it in the queue.
*
- * We turn the characters into a scatter-gather list, add it to the output
- * queue and then kick the Host. Then we sit here waiting for it to finish:
- * inefficient in theory, but in practice implementations will do it
- * immediately (lguest's Launcher does). */
-static int put_chars(u32 vtermno, const char *buf, int count)
+ * Callers should take appropriate locks.
+ */
+static int add_inbuf(struct virtqueue *vq, struct port_buffer *buf)
{
struct scatterlist sg[1];
+ int ret;
+
+ sg_init_one(sg, buf->buf, buf->size);
+
+ ret = vq->vq_ops->add_buf(vq, sg, 0, 1, buf);
+ vq->vq_ops->kick(vq);
+ return ret;
+}
+
+/* Discard any unread data this port has. Callers lockers. */
+static void discard_port_data(struct port *port)
+{
+ struct port_buffer *buf;
+ struct virtqueue *vq;
unsigned int len;
- /* This is a convenient routine to initialize a single-elem sg list */
- sg_init_one(sg, buf, count);
+ vq = port->in_vq;
+ if (port->inbuf)
+ buf = port->inbuf;
+ else
+ buf = vq->vq_ops->get_buf(vq, &len);
- /* add_buf wants a token to identify this buffer: we hand it any
- * non-NULL pointer, since there's only ever one buffer. */
- if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, (void *)1) >= 0) {
- /* Tell Host to go! */
- out_vq->vq_ops->kick(out_vq);
- /* Chill out until it's done with the buffer. */
- while (!out_vq->vq_ops->get_buf(out_vq, &len))
- cpu_relax();
+ if (!buf)
+ return;
+
+ if (add_inbuf(vq, buf) < 0) {
+ buf->len = buf->offset = 0;
+ dev_warn(port->dev, "Error adding buffer back to vq\n");
+ return;
}
- /* We're expected to return the amount of data we wrote: all of it. */
- return count;
+ port->inbuf = NULL;
+}
+
+static bool port_has_data(struct port *port)
+{
+ unsigned long flags;
+ bool ret;
+
+ ret = false;
+ spin_lock_irqsave(&port->inbuf_lock, flags);
+ if (port->inbuf)
+ ret = true;
+ spin_unlock_irqrestore(&port->inbuf_lock, flags);
+
+ return ret;
+}
+
+static ssize_t send_control_msg(struct port *port, unsigned int event,
+ unsigned int value)
+{
+ struct scatterlist sg[1];
+ struct virtio_console_control cpkt;
+ struct virtqueue *vq;
+ struct port_buffer *outbuf;
+ int tmplen;
+
+ if (!use_multiport(port->portdev))
+ return 0;
+
+ cpkt.id = port->id;
+ cpkt.event = event;
+ cpkt.value = value;
+
+ vq = port->portdev->c_ovq;
+ outbuf = port->portdev->outbuf;
+
+ memcpy(outbuf->buf, (void *)&cpkt, sizeof(cpkt));
+
+ sg_init_one(sg, outbuf->buf, sizeof(cpkt));
+ if (vq->vq_ops->add_buf(vq, sg, 1, 0, outbuf) >= 0) {
+ vq->vq_ops->kick(vq);
+ while (!vq->vq_ops->get_buf(vq, &tmplen))
+ cpu_relax();
+ }
+ return 0;
}
-/* Create a scatter-gather list representing our input buffer and put it in the
- * queue. */
-static void add_inbuf(void)
+static ssize_t send_buf(struct port *port, const char *in_buf, size_t in_count,
+ bool from_user)
{
struct scatterlist sg[1];
- sg_init_one(sg, inbuf, PAGE_SIZE);
+ struct virtqueue *out_vq;
+ struct port_buffer *buf;
+ ssize_t ret;
+ unsigned int tmplen;
+
+ out_vq = port->out_vq;
+ buf = port->outbuf;
+
+ if (buf->len) {
+ /*
+ * Nonzero buf->len means we had queued a buffer
+ * earlier to the Host to be consumed. Get the buffer
+ * back for this write request; wait while the Host
+ * consumes it.
+ */
+ while (!out_vq->vq_ops->get_buf(out_vq, &tmplen))
+ cpu_relax();
+ }
+
+ if (in_count > buf->size)
+ in_count = buf->size;
+
+ if (from_user) {
+ ret = copy_from_user(buf->buf, in_buf, in_count);
+ } else {
+ /*
+ * Since we're not sure when the host will actually
+ * consume the data and tell us about it, we have to
+ * copy the data here in case the caller frees the
+ * in_buf.
+ */
+ memcpy(buf->buf, in_buf, in_count);
+ ret = 0; /* Emulate copy_from_user behaviour */
+ }
+ buf->len = in_count - ret;
+
+ sg_init_one(sg, buf->buf, buf->len);
+ ret = out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, buf);
+
+ /* Tell Host to go! */
+ out_vq->vq_ops->kick(out_vq);
+
+ if (ret < 0)
+ buf->len = 0;
- /* We should always be able to add one buffer to an empty queue. */
- if (in_vq->vq_ops->add_buf(in_vq, sg, 0, 1, inbuf) < 0)
- BUG();
- in_vq->vq_ops->kick(in_vq);
+ /* We're expected to return the amount of data we wrote */
+ return buf->len;
}
-/*D:350 get_chars() is the callback from the hvc_console infrastructure when
- * an interrupt is received.
- *
- * Most of the code deals with the fact that the hvc_console() infrastructure
- * only asks us for 16 bytes at a time. We keep in_offset and in_used fields
- * for partially-filled buffers. */
-static int get_chars(u32 vtermno, char *buf, int count)
+/*
+ * Give out the data that's requested from the buffer that we have
+ * queued up.
+ */
+static ssize_t fill_readbuf(struct port *port, char *out_buf, size_t out_count,
+ bool to_user)
{
- /* If we don't have an input queue yet, we can't get input. */
- BUG_ON(!in_vq);
+ struct port_buffer *buf;
+ ssize_t ret;
+ unsigned long flags;
+
+ if (!out_count || !port_has_data(port))
+ return 0;
- /* No buffer? Try to get one. */
- if (!in_len) {
- in = in_vq->vq_ops->get_buf(in_vq, &in_len);
- if (!in)
+ buf = port->inbuf;
+ if (out_count > buf->len - buf->offset)
+ out_count = buf->len - buf->offset;
+
+ if (to_user) {
+ ret = copy_to_user(out_buf, buf->buf + buf->offset, out_count);
+ } else {
+ memcpy(out_buf, buf->buf + buf->offset, out_count);
+ ret = 0; /* Emulate copy_to_user behaviour */
+ }
+
+ /* Return the number of bytes actually copied */
+ ret = out_count - ret;
+ buf->offset += ret;
+
+ if (buf->offset == buf->len) {
+ /*
+ * We're done using all the data in this buffer.
+ * Re-queue so that the Host can send us more data.
+ */
+ spin_lock_irqsave(&port->inbuf_lock, flags);
+ port->inbuf = NULL;
+
+ if (add_inbuf(port->in_vq, buf) < 0)
+ dev_warn(port->dev, "failed add_buf\n");
+
+ spin_unlock_irqrestore(&port->inbuf_lock, flags);
+ }
+ return ret;
+}
+
+/* The condition that must be true for polling to end */
+static bool wait_is_over(struct port *port)
+{
+ return port_has_data(port) || !port->host_connected;
+}
+
+static ssize_t port_fops_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *offp)
+{
+ struct port *port;
+ ssize_t ret;
+
+ port = filp->private_data;
+
+ if (!port_has_data(port)) {
+ /*
+ * If nothing's connected on the host just return 0 in
+ * case of list_empty; this tells the userspace app
+ * that there's no connection
+ */
+ if (!port->host_connected)
return 0;
+ if (filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ ret = wait_event_interruptible(port->waitqueue,
+ wait_is_over(port));
+ if (ret < 0)
+ return ret;
}
+ /*
+ * We could've received a disconnection message while we were
+ * waiting for more data.
+ *
+ * This check is not clubbed in the if() statement above as we
+ * might receive some data as well as the host could get
+ * disconnected after we got woken up from our wait. So we
+ * really want to give off whatever data we have and only then
+ * check for host_connected.
+ */
+ if (!port_has_data(port) && !port->host_connected)
+ return 0;
+
+ return fill_readbuf(port, ubuf, count, true);
+}
- /* You want more than we have to give? Well, try wanting less! */
- if (in_len < count)
- count = in_len;
+static ssize_t port_fops_write(struct file *filp, const char __user *ubuf,
+ size_t count, loff_t *offp)
+{
+ struct port *port;
- /* Copy across to their buffer and increment offset. */
- memcpy(buf, in, count);
- in += count;
- in_len -= count;
+ port = filp->private_data;
- /* Finished? Re-register buffer so Host will use it again. */
- if (in_len == 0)
- add_inbuf();
+ if (port->host_throttled)
+ return -ENOSPC;
- return count;
+ return send_buf(port, ubuf, count, true);
}
-/*:*/
-/*D:320 Console drivers are initialized very early so boot messages can go out,
- * so we do things slightly differently from the generic virtio initialization
- * of the net and block drivers.
+static unsigned int port_fops_poll(struct file *filp, poll_table *wait)
+{
+ struct port *port;
+ unsigned int ret;
+
+ port = filp->private_data;
+ poll_wait(filp, &port->waitqueue, wait);
+
+ ret = 0;
+ if (port->inbuf)
+ ret |= POLLIN | POLLRDNORM;
+ if (port->host_connected && !port->host_throttled)
+ ret |= POLLOUT;
+ if (!port->host_connected)
+ ret |= POLLHUP;
+
+ return ret;
+}
+
+static int port_fops_release(struct inode *inode, struct file *filp)
+{
+ struct port *port;
+
+ port = filp->private_data;
+
+ /* Notify host of port being closed */
+ send_control_msg(port, VIRTIO_CONSOLE_PORT_OPEN, 0);
+
+ spin_lock_irq(&port->inbuf_lock);
+ port->guest_connected = false;
+
+ discard_port_data(port);
+
+ spin_unlock_irq(&port->inbuf_lock);
+
+ return 0;
+}
+
+static int port_fops_open(struct inode *inode, struct file *filp)
+{
+ struct cdev *cdev = inode->i_cdev;
+ struct port *port;
+
+ port = container_of(cdev, struct port, cdev);
+ filp->private_data = port;
+
+ /*
+ * Don't allow opening of console port devices -- that's done
+ * via /dev/hvc
+ */
+ if (is_console_port(port))
+ return -ENXIO;
+
+ /* Allow only one process to open a particular port at a time */
+ spin_lock_irq(&port->inbuf_lock);
+ if (port->guest_connected) {
+ spin_unlock_irq(&port->inbuf_lock);
+ return -EMFILE;
+ }
+
+ port->guest_connected = true;
+ spin_unlock_irq(&port->inbuf_lock);
+
+ /* Notify host of port being opened */
+ send_control_msg(filp->private_data, VIRTIO_CONSOLE_PORT_OPEN, 1);
+
+ return 0;
+}
+
+/*
+ * The file operations that we support: programs in the guest can open
+ * a console device, read from it, write to it, poll for data and
+ * close it. The devices are at
+ * /dev/vport<device number>p<port number>
+ */
+static const struct file_operations port_fops = {
+ .owner = THIS_MODULE,
+ .open = port_fops_open,
+ .read = port_fops_read,
+ .write = port_fops_write,
+ .poll = port_fops_poll,
+ .release = port_fops_release,
+};
+
+/*
+ * The put_chars() callback is pretty straightforward.
*
- * At this stage, the console is output-only. It's too early to set up a
- * virtqueue, so we let the drivers do some boutique early-output thing. */
-int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int))
+ * We turn the characters into a scatter-gather list, add it to the
+ * output queue and then kick the Host. Then we sit here waiting for
+ * it to finish: inefficient in theory, but in practice
+ * implementations will do it immediately (lguest's Launcher does).
+ */
+static int put_chars(u32 vtermno, const char *buf, int count)
{
- virtio_cons.put_chars = put_chars;
- return hvc_instantiate(0, 0, &virtio_cons);
+ struct port *port;
+
+ port = find_port_by_vtermno(vtermno);
+ if (!port)
+ return 0;
+
+ if (unlikely(early_put_chars))
+ return early_put_chars(vtermno, buf, count);
+
+ return send_buf(port, buf, count, false);
}
/*
- * virtio console configuration. This supports:
- * - console resize
+ * get_chars() is the callback from the hvc_console infrastructure
+ * when an interrupt is received.
+ *
+ * We call out to fill_readbuf that gets us the required data from the
+ * buffers that are queued up.
*/
-static void virtcons_apply_config(struct virtio_device *dev)
+static int get_chars(u32 vtermno, char *buf, int count)
{
+ struct port *port;
+
+ port = find_port_by_vtermno(vtermno);
+ if (!port)
+ return 0;
+
+ /* If we don't have an input queue yet, we can't get input. */
+ BUG_ON(!port->in_vq);
+
+ return fill_readbuf(port, buf, count, false);
+}
+
+static void resize_console(struct port *port)
+{
+ struct virtio_device *vdev;
struct winsize ws;
- if (virtio_has_feature(dev, VIRTIO_CONSOLE_F_SIZE)) {
- dev->config->get(dev,
- offsetof(struct virtio_console_config, cols),
- &ws.ws_col, sizeof(u16));
- dev->config->get(dev,
- offsetof(struct virtio_console_config, rows),
- &ws.ws_row, sizeof(u16));
- hvc_resize(hvc, ws);
+ vdev = port->portdev->vdev;
+ if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_SIZE)) {
+ vdev->config->get(vdev,
+ offsetof(struct virtio_console_config, cols),
+ &ws.ws_col, sizeof(u16));
+ vdev->config->get(vdev,
+ offsetof(struct virtio_console_config, rows),
+ &ws.ws_row, sizeof(u16));
+ hvc_resize(port->cons.hvc, ws);
}
}
-/*
- * we support only one console, the hvc struct is a global var
- * We set the configuration at this point, since we now have a tty
- */
+/* We set the configuration at this point, since we now have a tty */
static int notifier_add_vio(struct hvc_struct *hp, int data)
{
+ struct port *port;
+
+ port = find_port_by_vtermno(hp->vtermno);
+ if (!port)
+ return -EINVAL;
+
hp->irq_requested = 1;
- virtcons_apply_config(vdev);
+ resize_console(port);
return 0;
}
@@ -173,75 +723,775 @@ static void notifier_del_vio(struct hvc_struct *hp, int data)
hp->irq_requested = 0;
}
-static void hvc_handle_input(struct virtqueue *vq)
+/* The operations for console ports. */
+static const struct hv_ops hv_ops = {
+ .get_chars = get_chars,
+ .put_chars = put_chars,
+ .notifier_add = notifier_add_vio,
+ .notifier_del = notifier_del_vio,
+ .notifier_hangup = notifier_del_vio,
+};
+
+/*
+ * Console drivers are initialized very early so boot messages can go
+ * out, so we do things slightly differently from the generic virtio
+ * initialization of the net and block drivers.
+ *
+ * At this stage, the console is output-only. It's too early to set
+ * up a virtqueue, so we let the drivers do some boutique early-output
+ * thing.
+ */
+int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int))
+{
+ early_put_chars = put_chars;
+ return hvc_instantiate(0, 0, &hv_ops);
+}
+
+int init_port_console(struct port *port)
+{
+ int ret;
+
+ /*
+ * The Host's telling us this port is a console port. Hook it
+ * up with an hvc console.
+ *
+ * To set up and manage our virtual console, we call
+ * hvc_alloc().
+ *
+ * The first argument of hvc_alloc() is the virtual console
+ * number. The second argument is the parameter for the
+ * notification mechanism (like irq number). We currently
+ * leave this as zero, virtqueues have implicit notifications.
+ *
+ * The third argument is a "struct hv_ops" containing the
+ * put_chars() get_chars(), notifier_add() and notifier_del()
+ * pointers. The final argument is the output buffer size: we
+ * can do any size, so we put PAGE_SIZE here.
+ */
+ port->cons.vtermno = pdrvdata.next_vtermno;
+
+ port->cons.hvc = hvc_alloc(port->cons.vtermno, 0, &hv_ops, PAGE_SIZE);
+ if (IS_ERR(port->cons.hvc)) {
+ ret = PTR_ERR(port->cons.hvc);
+ dev_err(port->dev,
+ "error %d allocating hvc for port\n", ret);
+ port->cons.hvc = NULL;
+ return ret;
+ }
+ spin_lock_irq(&pdrvdata_lock);
+ pdrvdata.next_vtermno++;
+ list_add_tail(&port->cons.list, &pdrvdata.consoles);
+ spin_unlock_irq(&pdrvdata_lock);
+ port->guest_connected = true;
+
+ /* Notify host of port being opened */
+ send_control_msg(port, VIRTIO_CONSOLE_PORT_OPEN, 1);
+
+ return 0;
+}
+
+static ssize_t show_port_name(struct device *dev,
+ struct device_attribute *attr, char *buffer)
{
- if (hvc_poll(hvc))
+ struct port *port;
+
+ port = dev_get_drvdata(dev);
+
+ return sprintf(buffer, "%s\n", port->name);
+}
+
+static DEVICE_ATTR(name, S_IRUGO, show_port_name, NULL);
+
+static struct attribute *port_sysfs_entries[] = {
+ &dev_attr_name.attr,
+ NULL
+};
+
+static struct attribute_group port_attribute_group = {
+ .name = NULL, /* put in device directory */
+ .attrs = port_sysfs_entries,
+};
+
+static int debugfs_open(struct inode *inode, struct file *filp)
+{
+ filp->private_data = inode->i_private;
+ return 0;
+}
+
+static ssize_t debugfs_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *offp)
+{
+ struct port *port;
+ char *buf;
+ ssize_t ret, out_offset, out_count;
+
+ out_count = 1024;
+ buf = kmalloc(out_count, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ port = filp->private_data;
+ out_offset = 0;
+ out_offset += snprintf(buf + out_offset, out_count,
+ "name: %s\n", port->name ? port->name : "");
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "guest_connected: %d\n", port->guest_connected);
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "host_connected: %d\n", port->host_connected);
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "host_throttled: %d\n", port->host_throttled);
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "is_console: %s\n",
+ is_console_port(port) ? "yes" : "no");
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "console_vtermno: %u\n", port->cons.vtermno);
+
+ ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
+ kfree(buf);
+ return ret;
+}
+
+static const struct file_operations port_debugfs_ops = {
+ .owner = THIS_MODULE,
+ .open = debugfs_open,
+ .read = debugfs_read,
+};
+
+/* Remove all port-specific data. */
+static int remove_port(struct port *port)
+{
+ spin_lock_irq(&port->portdev->ports_lock);
+ list_del(&port->list);
+ spin_unlock_irq(&port->portdev->ports_lock);
+
+ if (is_console_port(port)) {
+ spin_lock_irq(&pdrvdata_lock);
+ list_del(&port->cons.list);
+ spin_unlock_irq(&pdrvdata_lock);
+ hvc_remove(port->cons.hvc);
+ }
+ if (port->guest_connected)
+ send_control_msg(port, VIRTIO_CONSOLE_PORT_OPEN, 0);
+
+ while (port->in_vq->vq_ops->detach_unused_buf(port->in_vq))
+ ;
+
+ sysfs_remove_group(&port->dev->kobj, &port_attribute_group);
+ device_destroy(pdrvdata.class, port->dev->devt);
+ cdev_del(&port->cdev);
+
+ discard_port_data(port);
+ free_buf(port->outbuf);
+ kfree(port->name);
+
+ debugfs_remove(port->debugfs_file);
+
+ kfree(port);
+ return 0;
+}
+
+/* Any private messages that the Host and Guest want to share */
+static void handle_control_message(struct ports_device *portdev,
+ struct port_buffer *buf)
+{
+ struct virtio_console_control *cpkt;
+ struct port *port;
+ size_t name_size;
+ int err;
+
+ cpkt = (struct virtio_console_control *)(buf->buf + buf->offset);
+
+ port = find_port_by_id(portdev, cpkt->id);
+ if (!port) {
+ /* No valid header at start of buffer. Drop it. */
+ dev_dbg(&portdev->vdev->dev,
+ "Invalid index %u in control packet\n", cpkt->id);
+ return;
+ }
+
+ switch (cpkt->event) {
+ case VIRTIO_CONSOLE_CONSOLE_PORT:
+ if (!cpkt->value)
+ break;
+ if (is_console_port(port))
+ break;
+
+ init_port_console(port);
+ /*
+ * Could remove the port here in case init fails - but
+ * have to notify the host first.
+ */
+ break;
+ case VIRTIO_CONSOLE_RESIZE:
+ if (!is_console_port(port))
+ break;
+ port->cons.hvc->irq_requested = 1;
+ resize_console(port);
+ break;
+ case VIRTIO_CONSOLE_PORT_OPEN:
+ port->host_connected = cpkt->value;
+ wake_up_interruptible(&port->waitqueue);
+ break;
+ case VIRTIO_CONSOLE_PORT_NAME:
+ /*
+ * Skip the size of the header and the cpkt to get the size
+ * of the name that was sent
+ */
+ name_size = buf->len - buf->offset - sizeof(*cpkt) + 1;
+
+ port->name = kmalloc(name_size, GFP_KERNEL);
+ if (!port->name) {
+ dev_err(port->dev,
+ "Not enough space to store port name\n");
+ break;
+ }
+ strncpy(port->name, buf->buf + buf->offset + sizeof(*cpkt),
+ name_size - 1);
+ port->name[name_size - 1] = 0;
+
+ /*
+ * Since we only have one sysfs attribute, 'name',
+ * create it only if we have a name for the port.
+ */
+ err = sysfs_create_group(&port->dev->kobj,
+ &port_attribute_group);
+ if (err)
+ dev_err(port->dev,
+ "Error %d creating sysfs device attributes\n",
+ err);
+
+ break;
+ case VIRTIO_CONSOLE_THROTTLE_PORT:
+ /*
+ * Hosts can govern some policy to disallow rogue
+ * guest processes writing indefinitely to ports
+ * leading to OOM situations. If we receive this
+ * message here, it means the Host side of the port
+ * either reached its max. limit to cache data or
+ * signal to us that the host is ready to accept more
+ * data.
+ */
+ port->host_throttled = cpkt->value;
+ break;
+ case VIRTIO_CONSOLE_PORT_REMOVE:
+ /*
+ * Hot unplug the port. We don't decrement nr_ports
+ * since we don't want to deal with extra complexities
+ * of using the lowest-available port id: We can just
+ * pick up the nr_ports number as the id and not have
+ * userspace send it to us. This helps us in two
+ * ways:
+ *
+ * - We don't need to have a 'port_id' field in the
+ * config space when a port is hot-added. This is a
+ * good thing as we might queue up multiple hotplug
+ * requests issued in our workqueue.
+ *
+ * - Another way to deal with this would have been to
+ * use a bitmap of the active ports and select the
+ * lowest non-active port from that map. That
+ * bloats the already tight config space and we
+ * would end up artificially limiting the
+ * max. number of ports to sizeof(bitmap). Right
+ * now we can support 2^32 ports (as the port id is
+ * stored in a u32 type).
+ *
+ */
+ remove_port(port);
+ break;
+ }
+}
+
+static void control_work_handler(struct work_struct *work)
+{
+ struct ports_device *portdev;
+ struct virtqueue *vq;
+ struct port_buffer *buf;
+ unsigned int len;
+
+ portdev = container_of(work, struct ports_device, control_work);
+ vq = portdev->c_ivq;
+
+ spin_lock(&portdev->cvq_lock);
+ while ((buf = vq->vq_ops->get_buf(vq, &len))) {
+ spin_unlock(&portdev->cvq_lock);
+
+ buf->len = len;
+ buf->offset = 0;
+
+ handle_control_message(portdev, buf);
+
+ spin_lock(&portdev->cvq_lock);
+ if (add_inbuf(portdev->c_ivq, buf) < 0) {
+ dev_warn(&portdev->vdev->dev,
+ "Error adding buffer to queue\n");
+ free_buf(buf);
+ }
+ }
+ spin_unlock(&portdev->cvq_lock);
+}
+
+static void in_intr(struct virtqueue *vq)
+{
+ struct port *port;
+ unsigned long flags;
+
+ port = find_port_by_vq(vq->vdev->priv, vq);
+ if (!port)
+ return;
+
+ spin_lock_irqsave(&port->inbuf_lock, flags);
+ port->inbuf = get_inbuf(port);
+
+ /*
+ * Don't queue up data when port is closed. This condition
+ * can be reached when a console port is not yet connected (no
+ * tty is spawned) and the host sends out data to console
+ * ports. For generic serial ports, the host won't
+ * (shouldn't) send data till the guest is connected.
+ */
+ if (!port->guest_connected)
+ discard_port_data(port);
+
+ spin_unlock_irqrestore(&port->inbuf_lock, flags);
+
+ wake_up_interruptible(&port->waitqueue);
+
+ if (is_console_port(port) && hvc_poll(port->cons.hvc))
hvc_kick();
}
-/*D:370 Once we're further in boot, we get probed like any other virtio device.
- * At this stage we set up the output virtqueue.
- *
- * To set up and manage our virtual console, we call hvc_alloc(). Since we
- * never remove the console device we never need this pointer again.
- *
- * Finally we put our input buffer in the input queue, ready to receive. */
-static int __devinit virtcons_probe(struct virtio_device *dev)
+static void control_intr(struct virtqueue *vq)
{
- vq_callback_t *callbacks[] = { hvc_handle_input, NULL};
- const char *names[] = { "input", "output" };
- struct virtqueue *vqs[2];
+ struct ports_device *portdev;
+
+ portdev = vq->vdev->priv;
+ schedule_work(&portdev->control_work);
+}
+
+static void config_intr(struct virtio_device *vdev)
+{
+ struct ports_device *portdev;
+
+ portdev = vdev->priv;
+ if (use_multiport(portdev)) {
+ /* Handle port hot-add */
+ schedule_work(&portdev->config_work);
+ }
+ /*
+ * We'll use this way of resizing only for legacy support.
+ * For newer userspace (VIRTIO_CONSOLE_F_MULTPORT+), use
+ * control messages to indicate console size changes so that
+ * it can be done per-port
+ */
+ resize_console(find_port_by_id(portdev, 0));
+}
+
+static void fill_queue(struct virtqueue *vq, spinlock_t *lock)
+{
+ struct port_buffer *buf;
+ int ret;
+
+ do {
+ buf = alloc_buf(PAGE_SIZE);
+ if (!buf)
+ break;
+
+ spin_lock_irq(lock);
+ ret = add_inbuf(vq, buf);
+ if (ret < 0) {
+ spin_unlock_irq(lock);
+ free_buf(buf);
+ break;
+ }
+ spin_unlock_irq(lock);
+ } while (ret > 0);
+}
+
+static int add_port(struct ports_device *portdev, u32 id)
+{
+ char debugfs_name[16];
+ struct port *port;
+ struct port_buffer *inbuf;
+ dev_t devt;
int err;
- vdev = dev;
+ port = kmalloc(sizeof(*port), GFP_KERNEL);
+ if (!port) {
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ port->portdev = portdev;
+ port->id = id;
+
+ port->name = NULL;
+ port->inbuf = NULL;
+ port->cons.hvc = NULL;
+
+ port->host_connected = port->guest_connected = false;
+ port->host_throttled = false;
- /* This is the scratch page we use to receive console input */
- inbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ port->in_vq = portdev->in_vqs[port->id];
+ port->out_vq = portdev->out_vqs[port->id];
+
+ cdev_init(&port->cdev, &port_fops);
+
+ devt = MKDEV(portdev->chr_major, id);
+ err = cdev_add(&port->cdev, devt, 1);
+ if (err < 0) {
+ dev_err(&port->portdev->vdev->dev,
+ "Error %d adding cdev for port %u\n", err, id);
+ goto free_port;
+ }
+ port->dev = device_create(pdrvdata.class, &port->portdev->vdev->dev,
+ devt, port, "vport%up%u",
+ port->portdev->drv_index, id);
+ if (IS_ERR(port->dev)) {
+ err = PTR_ERR(port->dev);
+ dev_err(&port->portdev->vdev->dev,
+ "Error %d creating device for port %u\n",
+ err, id);
+ goto free_cdev;
+ }
+
+ spin_lock_init(&port->inbuf_lock);
+ init_waitqueue_head(&port->waitqueue);
+
+ inbuf = alloc_buf(PAGE_SIZE);
if (!inbuf) {
err = -ENOMEM;
+ goto free_device;
+ }
+ port->outbuf = alloc_buf(PAGE_SIZE);
+ if (!port->outbuf) {
+ err = -ENOMEM;
+ goto free_inbuf;
+ }
+
+ /* Register the input buffer the first time. */
+ add_inbuf(port->in_vq, inbuf);
+
+ /*
+ * If we're not using multiport support, this has to be a console port
+ */
+ if (!use_multiport(port->portdev)) {
+ err = init_port_console(port);
+ if (err)
+ goto free_outbuf;
+ }
+
+ spin_lock_irq(&portdev->ports_lock);
+ list_add_tail(&port->list, &port->portdev->ports);
+ spin_unlock_irq(&portdev->ports_lock);
+
+ /*
+ * Tell the Host we're set so that it can send us various
+ * configuration parameters for this port (eg, port name,
+ * caching, whether this is a console port, etc.)
+ */
+ send_control_msg(port, VIRTIO_CONSOLE_PORT_READY, 1);
+
+ if (pdrvdata.debugfs_dir) {
+ /*
+ * Finally, create the debugfs file that we can use to
+ * inspect a port's state at any time
+ */
+ sprintf(debugfs_name, "vport%up%u",
+ port->portdev->drv_index, id);
+ port->debugfs_file = debugfs_create_file(debugfs_name, 0444,
+ pdrvdata.debugfs_dir,
+ port,
+ &port_debugfs_ops);
+ }
+ return 0;
+
+free_outbuf:
+ free_buf(port->outbuf);
+free_inbuf:
+ free_buf(inbuf);
+free_device:
+ device_destroy(pdrvdata.class, port->dev->devt);
+free_cdev:
+ cdev_del(&port->cdev);
+free_port:
+ kfree(port);
+fail:
+ return err;
+}
+
+/*
+ * The workhandler for config-space updates.
+ *
+ * This is called when ports are hot-added.
+ */
+static void config_work_handler(struct work_struct *work)
+{
+ struct virtio_console_config virtconconf;
+ struct ports_device *portdev;
+ struct virtio_device *vdev;
+ int err;
+
+ portdev = container_of(work, struct ports_device, config_work);
+
+ vdev = portdev->vdev;
+ vdev->config->get(vdev,
+ offsetof(struct virtio_console_config, nr_ports),
+ &virtconconf.nr_ports,
+ sizeof(virtconconf.nr_ports));
+
+ if (portdev->config.nr_ports == virtconconf.nr_ports) {
+ /*
+ * Port 0 got hot-added. Since we already did all the
+ * other initialisation for it, just tell the Host
+ * that the port is ready if we find the port. In
+ * case the port was hot-removed earlier, we call
+ * add_port to add the port.
+ */
+ struct port *port;
+
+ port = find_port_by_id(portdev, 0);
+ if (!port)
+ add_port(portdev, 0);
+ else
+ send_control_msg(port, VIRTIO_CONSOLE_PORT_READY, 1);
+ return;
+ }
+ if (virtconconf.nr_ports > portdev->config.max_nr_ports) {
+ dev_warn(&vdev->dev,
+ "More ports specified (%u) than allowed (%u)",
+ portdev->config.nr_ports + 1,
+ portdev->config.max_nr_ports);
+ return;
+ }
+ if (virtconconf.nr_ports < portdev->config.nr_ports)
+ return;
+
+ /* Hot-add ports */
+ while (virtconconf.nr_ports - portdev->config.nr_ports) {
+ err = add_port(portdev, portdev->config.nr_ports);
+ if (err)
+ break;
+ portdev->config.nr_ports++;
+ }
+}
+
+static int init_vqs(struct ports_device *portdev)
+{
+ vq_callback_t **io_callbacks;
+ char **io_names;
+ struct virtqueue **vqs;
+ u32 i, j, nr_ports, nr_queues;
+ int err;
+
+ nr_ports = portdev->config.max_nr_ports;
+ nr_queues = use_multiport(portdev) ? (nr_ports + 1) * 2 : 2;
+
+ vqs = kmalloc(nr_queues * sizeof(struct virtqueue *), GFP_KERNEL);
+ if (!vqs) {
+ err = -ENOMEM;
goto fail;
}
+ io_callbacks = kmalloc(nr_queues * sizeof(vq_callback_t *), GFP_KERNEL);
+ if (!io_callbacks) {
+ err = -ENOMEM;
+ goto free_vqs;
+ }
+ io_names = kmalloc(nr_queues * sizeof(char *), GFP_KERNEL);
+ if (!io_names) {
+ err = -ENOMEM;
+ goto free_callbacks;
+ }
+ portdev->in_vqs = kmalloc(nr_ports * sizeof(struct virtqueue *),
+ GFP_KERNEL);
+ if (!portdev->in_vqs) {
+ err = -ENOMEM;
+ goto free_names;
+ }
+ portdev->out_vqs = kmalloc(nr_ports * sizeof(struct virtqueue *),
+ GFP_KERNEL);
+ if (!portdev->out_vqs) {
+ err = -ENOMEM;
+ goto free_invqs;
+ }
+
+ /*
+ * For backward compat (newer host but older guest), the host
+ * spawns a console port first and also inits the vqs for port
+ * 0 before others.
+ */
+ j = 0;
+ io_callbacks[j] = in_intr;
+ io_callbacks[j + 1] = NULL;
+ io_names[j] = "input";
+ io_names[j + 1] = "output";
+ j += 2;
+ if (use_multiport(portdev)) {
+ io_callbacks[j] = control_intr;
+ io_callbacks[j + 1] = NULL;
+ io_names[j] = "control-i";
+ io_names[j + 1] = "control-o";
+
+ for (i = 1; i < nr_ports; i++) {
+ j += 2;
+ io_callbacks[j] = in_intr;
+ io_callbacks[j + 1] = NULL;
+ io_names[j] = "input";
+ io_names[j + 1] = "output";
+ }
+ }
/* Find the queues. */
- /* FIXME: This is why we want to wean off hvc: we do nothing
- * when input comes in. */
- err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
+ err = portdev->vdev->config->find_vqs(portdev->vdev, nr_queues, vqs,
+ io_callbacks,
+ (const char **)io_names);
if (err)
+ goto free_outvqs;
+
+ j = 0;
+ portdev->in_vqs[0] = vqs[0];
+ portdev->out_vqs[0] = vqs[1];
+ j += 2;
+ if (use_multiport(portdev)) {
+ portdev->c_ivq = vqs[j];
+ portdev->c_ovq = vqs[j + 1];
+
+ for (i = 1; i < nr_ports; i++) {
+ j += 2;
+ portdev->in_vqs[i] = vqs[j];
+ portdev->out_vqs[i] = vqs[j + 1];
+ }
+ }
+ kfree(io_callbacks);
+ kfree(io_names);
+ kfree(vqs);
+
+ return 0;
+
+free_names:
+ kfree(io_names);
+free_callbacks:
+ kfree(io_callbacks);
+free_outvqs:
+ kfree(portdev->out_vqs);
+free_invqs:
+ kfree(portdev->in_vqs);
+free_vqs:
+ kfree(vqs);
+fail:
+ return err;
+}
+
+static const struct file_operations portdev_fops = {
+ .owner = THIS_MODULE,
+};
+
+/*
+ * Once we're further in boot, we get probed like any other virtio
+ * device.
+ *
+ * If the host also supports multiple console ports, we check the
+ * config space to see how many ports the host has spawned. We
+ * initialize each port found.
+ */
+static int __devinit virtcons_probe(struct virtio_device *vdev)
+{
+ struct ports_device *portdev;
+ u32 i;
+ int err;
+ bool multiport;
+
+ portdev = kmalloc(sizeof(*portdev), GFP_KERNEL);
+ if (!portdev) {
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ /* Attach this portdev to this virtio_device, and vice-versa. */
+ portdev->vdev = vdev;
+ vdev->priv = portdev;
+
+ spin_lock_irq(&pdrvdata_lock);
+ portdev->drv_index = pdrvdata.index++;
+ spin_unlock_irq(&pdrvdata_lock);
+
+ portdev->chr_major = register_chrdev(0, "virtio-portsdev",
+ &portdev_fops);
+ if (portdev->chr_major < 0) {
+ dev_err(&vdev->dev,
+ "Error %d registering chrdev for device %u\n",
+ portdev->chr_major, portdev->drv_index);
+ err = portdev->chr_major;
goto free;
+ }
- in_vq = vqs[0];
- out_vq = vqs[1];
+ multiport = false;
+ portdev->config.nr_ports = 1;
+ portdev->config.max_nr_ports = 1;
+ if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT)) {
+ multiport = true;
+ vdev->features[0] |= 1 << VIRTIO_CONSOLE_F_MULTIPORT;
- /* Start using the new console output. */
- virtio_cons.get_chars = get_chars;
- virtio_cons.put_chars = put_chars;
- virtio_cons.notifier_add = notifier_add_vio;
- virtio_cons.notifier_del = notifier_del_vio;
- virtio_cons.notifier_hangup = notifier_del_vio;
-
- /* The first argument of hvc_alloc() is the virtual console number, so
- * we use zero. The second argument is the parameter for the
- * notification mechanism (like irq number). We currently leave this
- * as zero, virtqueues have implicit notifications.
- *
- * The third argument is a "struct hv_ops" containing the put_chars()
- * get_chars(), notifier_add() and notifier_del() pointers.
- * The final argument is the output buffer size: we can do any size,
- * so we put PAGE_SIZE here. */
- hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE);
- if (IS_ERR(hvc)) {
- err = PTR_ERR(hvc);
- goto free_vqs;
+ vdev->config->get(vdev, offsetof(struct virtio_console_config,
+ nr_ports),
+ &portdev->config.nr_ports,
+ sizeof(portdev->config.nr_ports));
+ vdev->config->get(vdev, offsetof(struct virtio_console_config,
+ max_nr_ports),
+ &portdev->config.max_nr_ports,
+ sizeof(portdev->config.max_nr_ports));
+ if (portdev->config.nr_ports > portdev->config.max_nr_ports) {
+ dev_warn(&vdev->dev,
+ "More ports (%u) specified than allowed (%u). Will init %u ports.",
+ portdev->config.nr_ports,
+ portdev->config.max_nr_ports,
+ portdev->config.max_nr_ports);
+
+ portdev->config.nr_ports = portdev->config.max_nr_ports;
+ }
}
- /* Register the input buffer the first time. */
- add_inbuf();
+ /* Let the Host know we support multiple ports.*/
+ vdev->config->finalize_features(vdev);
+
+ err = init_vqs(portdev);
+ if (err < 0) {
+ dev_err(&vdev->dev, "Error %d initializing vqs\n", err);
+ goto free_chrdev;
+ }
+
+ spin_lock_init(&portdev->ports_lock);
+ INIT_LIST_HEAD(&portdev->ports);
+
+ if (multiport) {
+ spin_lock_init(&portdev->cvq_lock);
+ INIT_WORK(&portdev->control_work, &control_work_handler);
+ INIT_WORK(&portdev->config_work, &config_work_handler);
+
+ portdev->outbuf = alloc_buf(PAGE_SIZE);
+ if (!portdev->outbuf) {
+ err = -ENOMEM;
+ dev_err(&vdev->dev, "OOM for control outbuf\n");
+ goto free_vqs;
+ }
+ fill_queue(portdev->c_ivq, &portdev->cvq_lock);
+ }
+
+ for (i = 0; i < portdev->config.nr_ports; i++)
+ add_port(portdev, i);
+
+ /* Start using the new console output. */
+ early_put_chars = NULL;
return 0;
free_vqs:
vdev->config->del_vqs(vdev);
+ kfree(portdev->in_vqs);
+ kfree(portdev->out_vqs);
+free_chrdev:
+ unregister_chrdev(portdev->chr_major, "virtio-portsdev");
free:
- kfree(inbuf);
+ kfree(portdev);
fail:
return err;
}
@@ -253,6 +1503,7 @@ static struct virtio_device_id id_table[] = {
static unsigned int features[] = {
VIRTIO_CONSOLE_F_SIZE,
+ VIRTIO_CONSOLE_F_MULTIPORT,
};
static struct virtio_driver virtio_console = {
@@ -262,11 +1513,27 @@ static struct virtio_driver virtio_console = {
.driver.owner = THIS_MODULE,
.id_table = id_table,
.probe = virtcons_probe,
- .config_changed = virtcons_apply_config,
+ .config_changed = config_intr,
};
static int __init init(void)
{
+ int err;
+
+ pdrvdata.class = class_create(THIS_MODULE, "virtio-ports");
+ if (IS_ERR(pdrvdata.class)) {
+ err = PTR_ERR(pdrvdata.class);
+ pr_err("Error %d creating virtio-ports class\n", err);
+ return err;
+ }
+
+ pdrvdata.debugfs_dir = debugfs_create_dir("virtio-ports", NULL);
+ if (!pdrvdata.debugfs_dir) {
+ pr_warning("Error %ld creating debugfs dir for virtio-ports\n",
+ PTR_ERR(pdrvdata.debugfs_dir));
+ }
+ INIT_LIST_HEAD(&pdrvdata.consoles);
+
return register_virtio_driver(&virtio_console);
}
module_init(init);
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c708ecc3cb2e..72b3f212b613 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -56,8 +56,7 @@ struct virtnet_info
/* Host will merge rx buffers for big packets (shake it! shake it!) */
bool mergeable_rx_bufs;
- /* Receive & send queues. */
- struct sk_buff_head recv;
+ /* Send queue. */
struct sk_buff_head send;
/* Work struct for refilling if we run low on memory. */
@@ -75,34 +74,44 @@ struct skb_vnet_hdr {
unsigned int num_sg;
};
+struct padded_vnet_hdr {
+ struct virtio_net_hdr hdr;
+ /*
+ * virtio_net_hdr should be in a separated sg buffer because of a
+ * QEMU bug, and data sg buffer shares same page with this header sg.
+ * This padding makes next sg 16 byte aligned after virtio_net_hdr.
+ */
+ char padding[6];
+};
+
static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb)
{
return (struct skb_vnet_hdr *)skb->cb;
}
-static void give_a_page(struct virtnet_info *vi, struct page *page)
-{
- page->private = (unsigned long)vi->pages;
- vi->pages = page;
-}
-
-static void trim_pages(struct virtnet_info *vi, struct sk_buff *skb)
+/*
+ * private is used to chain pages for big packets, put the whole
+ * most recent used list in the beginning for reuse
+ */
+static void give_pages(struct virtnet_info *vi, struct page *page)
{
- unsigned int i;
+ struct page *end;
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- give_a_page(vi, skb_shinfo(skb)->frags[i].page);
- skb_shinfo(skb)->nr_frags = 0;
- skb->data_len = 0;
+ /* Find end of list, sew whole thing into vi->pages. */
+ for (end = page; end->private; end = (struct page *)end->private);
+ end->private = (unsigned long)vi->pages;
+ vi->pages = page;
}
static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask)
{
struct page *p = vi->pages;
- if (p)
+ if (p) {
vi->pages = (struct page *)p->private;
- else
+ /* clear private here, it is used to chain pages */
+ p->private = 0;
+ } else
p = alloc_page(gfp_mask);
return p;
}
@@ -118,99 +127,142 @@ static void skb_xmit_done(struct virtqueue *svq)
netif_wake_queue(vi->dev);
}
-static void receive_skb(struct net_device *dev, struct sk_buff *skb,
- unsigned len)
+static void set_skb_frag(struct sk_buff *skb, struct page *page,
+ unsigned int offset, unsigned int *len)
{
- struct virtnet_info *vi = netdev_priv(dev);
- struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
- int err;
- int i;
-
- if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
- pr_debug("%s: short packet %i\n", dev->name, len);
- dev->stats.rx_length_errors++;
- goto drop;
- }
+ int i = skb_shinfo(skb)->nr_frags;
+ skb_frag_t *f;
+
+ f = &skb_shinfo(skb)->frags[i];
+ f->size = min((unsigned)PAGE_SIZE - offset, *len);
+ f->page_offset = offset;
+ f->page = page;
+
+ skb->data_len += f->size;
+ skb->len += f->size;
+ skb_shinfo(skb)->nr_frags++;
+ *len -= f->size;
+}
- if (vi->mergeable_rx_bufs) {
- unsigned int copy;
- char *p = page_address(skb_shinfo(skb)->frags[0].page);
+static struct sk_buff *page_to_skb(struct virtnet_info *vi,
+ struct page *page, unsigned int len)
+{
+ struct sk_buff *skb;
+ struct skb_vnet_hdr *hdr;
+ unsigned int copy, hdr_len, offset;
+ char *p;
- if (len > PAGE_SIZE)
- len = PAGE_SIZE;
- len -= sizeof(struct virtio_net_hdr_mrg_rxbuf);
+ p = page_address(page);
- memcpy(&hdr->mhdr, p, sizeof(hdr->mhdr));
- p += sizeof(hdr->mhdr);
+ /* copy small packet so we can reuse these pages for small data */
+ skb = netdev_alloc_skb_ip_align(vi->dev, GOOD_COPY_LEN);
+ if (unlikely(!skb))
+ return NULL;
- copy = len;
- if (copy > skb_tailroom(skb))
- copy = skb_tailroom(skb);
+ hdr = skb_vnet_hdr(skb);
- memcpy(skb_put(skb, copy), p, copy);
+ if (vi->mergeable_rx_bufs) {
+ hdr_len = sizeof hdr->mhdr;
+ offset = hdr_len;
+ } else {
+ hdr_len = sizeof hdr->hdr;
+ offset = sizeof(struct padded_vnet_hdr);
+ }
- len -= copy;
+ memcpy(hdr, p, hdr_len);
- if (!len) {
- give_a_page(vi, skb_shinfo(skb)->frags[0].page);
- skb_shinfo(skb)->nr_frags--;
- } else {
- skb_shinfo(skb)->frags[0].page_offset +=
- sizeof(hdr->mhdr) + copy;
- skb_shinfo(skb)->frags[0].size = len;
- skb->data_len += len;
- skb->len += len;
- }
+ len -= hdr_len;
+ p += offset;
- while (--hdr->mhdr.num_buffers) {
- struct sk_buff *nskb;
+ copy = len;
+ if (copy > skb_tailroom(skb))
+ copy = skb_tailroom(skb);
+ memcpy(skb_put(skb, copy), p, copy);
- i = skb_shinfo(skb)->nr_frags;
- if (i >= MAX_SKB_FRAGS) {
- pr_debug("%s: packet too long %d\n", dev->name,
- len);
- dev->stats.rx_length_errors++;
- goto drop;
- }
+ len -= copy;
+ offset += copy;
- nskb = vi->rvq->vq_ops->get_buf(vi->rvq, &len);
- if (!nskb) {
- pr_debug("%s: rx error: %d buffers missing\n",
- dev->name, hdr->mhdr.num_buffers);
- dev->stats.rx_length_errors++;
- goto drop;
- }
+ while (len) {
+ set_skb_frag(skb, page, offset, &len);
+ page = (struct page *)page->private;
+ offset = 0;
+ }
- __skb_unlink(nskb, &vi->recv);
- vi->num--;
+ if (page)
+ give_pages(vi, page);
- skb_shinfo(skb)->frags[i] = skb_shinfo(nskb)->frags[0];
- skb_shinfo(nskb)->nr_frags = 0;
- kfree_skb(nskb);
+ return skb;
+}
- if (len > PAGE_SIZE)
- len = PAGE_SIZE;
+static int receive_mergeable(struct virtnet_info *vi, struct sk_buff *skb)
+{
+ struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
+ struct page *page;
+ int num_buf, i, len;
+
+ num_buf = hdr->mhdr.num_buffers;
+ while (--num_buf) {
+ i = skb_shinfo(skb)->nr_frags;
+ if (i >= MAX_SKB_FRAGS) {
+ pr_debug("%s: packet too long\n", skb->dev->name);
+ skb->dev->stats.rx_length_errors++;
+ return -EINVAL;
+ }
- skb_shinfo(skb)->frags[i].size = len;
- skb_shinfo(skb)->nr_frags++;
- skb->data_len += len;
- skb->len += len;
+ page = vi->rvq->vq_ops->get_buf(vi->rvq, &len);
+ if (!page) {
+ pr_debug("%s: rx error: %d buffers missing\n",
+ skb->dev->name, hdr->mhdr.num_buffers);
+ skb->dev->stats.rx_length_errors++;
+ return -EINVAL;
}
- } else {
- len -= sizeof(hdr->hdr);
+ if (len > PAGE_SIZE)
+ len = PAGE_SIZE;
+
+ set_skb_frag(skb, page, 0, &len);
+
+ --vi->num;
+ }
+ return 0;
+}
+
+static void receive_buf(struct net_device *dev, void *buf, unsigned int len)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ struct sk_buff *skb;
+ struct page *page;
+ struct skb_vnet_hdr *hdr;
- if (len <= MAX_PACKET_LEN)
- trim_pages(vi, skb);
+ if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
+ pr_debug("%s: short packet %i\n", dev->name, len);
+ dev->stats.rx_length_errors++;
+ if (vi->mergeable_rx_bufs || vi->big_packets)
+ give_pages(vi, buf);
+ else
+ dev_kfree_skb(buf);
+ return;
+ }
- err = pskb_trim(skb, len);
- if (err) {
- pr_debug("%s: pskb_trim failed %i %d\n", dev->name,
- len, err);
+ if (!vi->mergeable_rx_bufs && !vi->big_packets) {
+ skb = buf;
+ len -= sizeof(struct virtio_net_hdr);
+ skb_trim(skb, len);
+ } else {
+ page = buf;
+ skb = page_to_skb(vi, page, len);
+ if (unlikely(!skb)) {
dev->stats.rx_dropped++;
- goto drop;
+ give_pages(vi, page);
+ return;
}
+ if (vi->mergeable_rx_bufs)
+ if (receive_mergeable(vi, skb)) {
+ dev_kfree_skb(skb);
+ return;
+ }
}
+ hdr = skb_vnet_hdr(skb);
skb->truesize += skb->data_len;
dev->stats.rx_bytes += skb->len;
dev->stats.rx_packets++;
@@ -267,110 +319,119 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb,
frame_err:
dev->stats.rx_frame_errors++;
-drop:
dev_kfree_skb(skb);
}
-static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp)
+static int add_recvbuf_small(struct virtnet_info *vi, gfp_t gfp)
{
struct sk_buff *skb;
- struct scatterlist sg[2+MAX_SKB_FRAGS];
- int num, err, i;
- bool oom = false;
-
- sg_init_table(sg, 2+MAX_SKB_FRAGS);
- do {
- struct skb_vnet_hdr *hdr;
+ struct skb_vnet_hdr *hdr;
+ struct scatterlist sg[2];
+ int err;
- skb = netdev_alloc_skb_ip_align(vi->dev, MAX_PACKET_LEN);
- if (unlikely(!skb)) {
- oom = true;
- break;
- }
+ skb = netdev_alloc_skb_ip_align(vi->dev, MAX_PACKET_LEN);
+ if (unlikely(!skb))
+ return -ENOMEM;
- skb_put(skb, MAX_PACKET_LEN);
+ skb_put(skb, MAX_PACKET_LEN);
- hdr = skb_vnet_hdr(skb);
- sg_set_buf(sg, &hdr->hdr, sizeof(hdr->hdr));
+ hdr = skb_vnet_hdr(skb);
+ sg_set_buf(sg, &hdr->hdr, sizeof hdr->hdr);
- if (vi->big_packets) {
- for (i = 0; i < MAX_SKB_FRAGS; i++) {
- skb_frag_t *f = &skb_shinfo(skb)->frags[i];
- f->page = get_a_page(vi, gfp);
- if (!f->page)
- break;
+ skb_to_sgvec(skb, sg + 1, 0, skb->len);
- f->page_offset = 0;
- f->size = PAGE_SIZE;
+ err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, 2, skb);
+ if (err < 0)
+ dev_kfree_skb(skb);
- skb->data_len += PAGE_SIZE;
- skb->len += PAGE_SIZE;
+ return err;
+}
- skb_shinfo(skb)->nr_frags++;
- }
+static int add_recvbuf_big(struct virtnet_info *vi, gfp_t gfp)
+{
+ struct scatterlist sg[MAX_SKB_FRAGS + 2];
+ struct page *first, *list = NULL;
+ char *p;
+ int i, err, offset;
+
+ /* page in sg[MAX_SKB_FRAGS + 1] is list tail */
+ for (i = MAX_SKB_FRAGS + 1; i > 1; --i) {
+ first = get_a_page(vi, gfp);
+ if (!first) {
+ if (list)
+ give_pages(vi, list);
+ return -ENOMEM;
}
+ sg_set_buf(&sg[i], page_address(first), PAGE_SIZE);
- num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
- skb_queue_head(&vi->recv, skb);
+ /* chain new page in list head to match sg */
+ first->private = (unsigned long)list;
+ list = first;
+ }
- err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, num, skb);
- if (err < 0) {
- skb_unlink(skb, &vi->recv);
- trim_pages(vi, skb);
- kfree_skb(skb);
- break;
- }
- vi->num++;
- } while (err >= num);
- if (unlikely(vi->num > vi->max))
- vi->max = vi->num;
- vi->rvq->vq_ops->kick(vi->rvq);
- return !oom;
+ first = get_a_page(vi, gfp);
+ if (!first) {
+ give_pages(vi, list);
+ return -ENOMEM;
+ }
+ p = page_address(first);
+
+ /* sg[0], sg[1] share the same page */
+ /* a separated sg[0] for virtio_net_hdr only during to QEMU bug*/
+ sg_set_buf(&sg[0], p, sizeof(struct virtio_net_hdr));
+
+ /* sg[1] for data packet, from offset */
+ offset = sizeof(struct padded_vnet_hdr);
+ sg_set_buf(&sg[1], p + offset, PAGE_SIZE - offset);
+
+ /* chain first in list head */
+ first->private = (unsigned long)list;
+ err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, MAX_SKB_FRAGS + 2,
+ first);
+ if (err < 0)
+ give_pages(vi, first);
+
+ return err;
}
-/* Returns false if we couldn't fill entirely (OOM). */
-static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp)
+static int add_recvbuf_mergeable(struct virtnet_info *vi, gfp_t gfp)
{
- struct sk_buff *skb;
- struct scatterlist sg[1];
+ struct page *page;
+ struct scatterlist sg;
int err;
- bool oom = false;
-
- if (!vi->mergeable_rx_bufs)
- return try_fill_recv_maxbufs(vi, gfp);
- do {
- skb_frag_t *f;
+ page = get_a_page(vi, gfp);
+ if (!page)
+ return -ENOMEM;
- skb = netdev_alloc_skb_ip_align(vi->dev, GOOD_COPY_LEN);
- if (unlikely(!skb)) {
- oom = true;
- break;
- }
+ sg_init_one(&sg, page_address(page), PAGE_SIZE);
- f = &skb_shinfo(skb)->frags[0];
- f->page = get_a_page(vi, gfp);
- if (!f->page) {
- oom = true;
- kfree_skb(skb);
- break;
- }
+ err = vi->rvq->vq_ops->add_buf(vi->rvq, &sg, 0, 1, page);
+ if (err < 0)
+ give_pages(vi, page);
- f->page_offset = 0;
- f->size = PAGE_SIZE;
+ return err;
+}
- skb_shinfo(skb)->nr_frags++;
+/* Returns false if we couldn't fill entirely (OOM). */
+static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp)
+{
+ int err;
+ bool oom = false;
- sg_init_one(sg, page_address(f->page), PAGE_SIZE);
- skb_queue_head(&vi->recv, skb);
+ do {
+ if (vi->mergeable_rx_bufs)
+ err = add_recvbuf_mergeable(vi, gfp);
+ else if (vi->big_packets)
+ err = add_recvbuf_big(vi, gfp);
+ else
+ err = add_recvbuf_small(vi, gfp);
- err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, 1, skb);
if (err < 0) {
- skb_unlink(skb, &vi->recv);
- kfree_skb(skb);
+ oom = true;
break;
}
- vi->num++;
+ ++vi->num;
} while (err > 0);
if (unlikely(vi->num > vi->max))
vi->max = vi->num;
@@ -408,15 +469,14 @@ static void refill_work(struct work_struct *work)
static int virtnet_poll(struct napi_struct *napi, int budget)
{
struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
- struct sk_buff *skb = NULL;
+ void *buf;
unsigned int len, received = 0;
again:
while (received < budget &&
- (skb = vi->rvq->vq_ops->get_buf(vi->rvq, &len)) != NULL) {
- __skb_unlink(skb, &vi->recv);
- receive_skb(vi->dev, skb, len);
- vi->num--;
+ (buf = vi->rvq->vq_ops->get_buf(vi->rvq, &len)) != NULL) {
+ receive_buf(vi->dev, buf, len);
+ --vi->num;
received++;
}
@@ -496,9 +556,9 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
/* Encode metadata header at front. */
if (vi->mergeable_rx_bufs)
- sg_set_buf(sg, &hdr->mhdr, sizeof(hdr->mhdr));
+ sg_set_buf(sg, &hdr->mhdr, sizeof hdr->mhdr);
else
- sg_set_buf(sg, &hdr->hdr, sizeof(hdr->hdr));
+ sg_set_buf(sg, &hdr->hdr, sizeof hdr->hdr);
hdr->num_sg = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
return vi->svq->vq_ops->add_buf(vi->svq, sg, hdr->num_sg, 0, skb);
@@ -916,8 +976,7 @@ static int virtnet_probe(struct virtio_device *vdev)
dev->features |= NETIF_F_HW_VLAN_FILTER;
}
- /* Initialize our empty receive and send queues. */
- skb_queue_head_init(&vi->recv);
+ /* Initialize our empty send queue. */
skb_queue_head_init(&vi->send);
err = register_netdev(dev);
@@ -952,25 +1011,35 @@ free:
return err;
}
+static void free_unused_bufs(struct virtnet_info *vi)
+{
+ void *buf;
+ while (1) {
+ buf = vi->rvq->vq_ops->detach_unused_buf(vi->rvq);
+ if (!buf)
+ break;
+ if (vi->mergeable_rx_bufs || vi->big_packets)
+ give_pages(vi, buf);
+ else
+ dev_kfree_skb(buf);
+ --vi->num;
+ }
+ BUG_ON(vi->num != 0);
+}
+
static void __devexit virtnet_remove(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
- struct sk_buff *skb;
/* Stop all the virtqueues. */
vdev->config->reset(vdev);
- /* Free our skbs in send and recv queues, if any. */
- while ((skb = __skb_dequeue(&vi->recv)) != NULL) {
- kfree_skb(skb);
- vi->num--;
- }
+ /* Free our skbs in send queue, if any. */
__skb_queue_purge(&vi->send);
- BUG_ON(vi->num != 0);
-
unregister_netdev(vi->dev);
cancel_delayed_work_sync(&vi->refill);
+ free_unused_bufs(vi);
vdev->config->del_vqs(vi->vdev);
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 505be88c82ae..3db3d242c3ee 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -28,7 +28,7 @@
struct virtio_balloon
{
struct virtio_device *vdev;
- struct virtqueue *inflate_vq, *deflate_vq;
+ struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
/* Where the ballooning thread waits for config to change. */
wait_queue_head_t config_change;
@@ -49,6 +49,10 @@ struct virtio_balloon
/* The array of pfns we tell the Host about. */
unsigned int num_pfns;
u32 pfns[256];
+
+ /* Memory statistics */
+ int need_stats_update;
+ struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
};
static struct virtio_device_id id_table[] = {
@@ -154,6 +158,72 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
}
}
+static inline void update_stat(struct virtio_balloon *vb, int idx,
+ u16 tag, u64 val)
+{
+ BUG_ON(idx >= VIRTIO_BALLOON_S_NR);
+ vb->stats[idx].tag = tag;
+ vb->stats[idx].val = val;
+}
+
+#define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT)
+
+static void update_balloon_stats(struct virtio_balloon *vb)
+{
+ unsigned long events[NR_VM_EVENT_ITEMS];
+ struct sysinfo i;
+ int idx = 0;
+
+ all_vm_events(events);
+ si_meminfo(&i);
+
+ update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN,
+ pages_to_bytes(events[PSWPIN]));
+ update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT,
+ pages_to_bytes(events[PSWPOUT]));
+ update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
+ update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
+ update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE,
+ pages_to_bytes(i.freeram));
+ update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT,
+ pages_to_bytes(i.totalram));
+}
+
+/*
+ * While most virtqueues communicate guest-initiated requests to the hypervisor,
+ * the stats queue operates in reverse. The driver initializes the virtqueue
+ * with a single buffer. From that point forward, all conversations consist of
+ * a hypervisor request (a call to this function) which directs us to refill
+ * the virtqueue with a fresh stats buffer. Since stats collection can sleep,
+ * we notify our kthread which does the actual work via stats_handle_request().
+ */
+static void stats_request(struct virtqueue *vq)
+{
+ struct virtio_balloon *vb;
+ unsigned int len;
+
+ vb = vq->vq_ops->get_buf(vq, &len);
+ if (!vb)
+ return;
+ vb->need_stats_update = 1;
+ wake_up(&vb->config_change);
+}
+
+static void stats_handle_request(struct virtio_balloon *vb)
+{
+ struct virtqueue *vq;
+ struct scatterlist sg;
+
+ vb->need_stats_update = 0;
+ update_balloon_stats(vb);
+
+ vq = vb->stats_vq;
+ sg_init_one(&sg, vb->stats, sizeof(vb->stats));
+ if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) < 0)
+ BUG();
+ vq->vq_ops->kick(vq);
+}
+
static void virtballoon_changed(struct virtio_device *vdev)
{
struct virtio_balloon *vb = vdev->priv;
@@ -190,8 +260,11 @@ static int balloon(void *_vballoon)
try_to_freeze();
wait_event_interruptible(vb->config_change,
(diff = towards_target(vb)) != 0
+ || vb->need_stats_update
|| kthread_should_stop()
|| freezing(current));
+ if (vb->need_stats_update)
+ stats_handle_request(vb);
if (diff > 0)
fill_balloon(vb, diff);
else if (diff < 0)
@@ -204,10 +277,10 @@ static int balloon(void *_vballoon)
static int virtballoon_probe(struct virtio_device *vdev)
{
struct virtio_balloon *vb;
- struct virtqueue *vqs[2];
- vq_callback_t *callbacks[] = { balloon_ack, balloon_ack };
- const char *names[] = { "inflate", "deflate" };
- int err;
+ struct virtqueue *vqs[3];
+ vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request };
+ const char *names[] = { "inflate", "deflate", "stats" };
+ int err, nvqs;
vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
if (!vb) {
@@ -220,13 +293,29 @@ static int virtballoon_probe(struct virtio_device *vdev)
init_waitqueue_head(&vb->config_change);
vb->vdev = vdev;
- /* We expect two virtqueues. */
- err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
+ /* We expect two virtqueues: inflate and deflate,
+ * and optionally stat. */
+ nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
+ err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
if (err)
goto out_free_vb;
vb->inflate_vq = vqs[0];
vb->deflate_vq = vqs[1];
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
+ struct scatterlist sg;
+ vb->stats_vq = vqs[2];
+
+ /*
+ * Prime this virtqueue with one buffer so the hypervisor can
+ * use it to signal us later.
+ */
+ sg_init_one(&sg, vb->stats, sizeof vb->stats);
+ if (vb->stats_vq->vq_ops->add_buf(vb->stats_vq,
+ &sg, 1, 0, vb) < 0)
+ BUG();
+ vb->stats_vq->vq_ops->kick(vb->stats_vq);
+ }
vb->thread = kthread_run(balloon, vb, "vballoon");
if (IS_ERR(vb->thread)) {
@@ -264,7 +353,10 @@ static void __devexit virtballoon_remove(struct virtio_device *vdev)
kfree(vb);
}
-static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST };
+static unsigned int features[] = {
+ VIRTIO_BALLOON_F_MUST_TELL_HOST,
+ VIRTIO_BALLOON_F_STATS_VQ,
+};
static struct virtio_driver virtio_balloon_driver = {
.feature_table = features,
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 28d9cf7cf72f..1d5191fab62e 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -702,7 +702,7 @@ static struct pci_driver virtio_pci_driver = {
.name = "virtio-pci",
.id_table = virtio_pci_id_table,
.probe = virtio_pci_probe,
- .remove = virtio_pci_remove,
+ .remove = __devexit_p(virtio_pci_remove),
#ifdef CONFIG_PM
.suspend = virtio_pci_suspend,
.resume = virtio_pci_resume,
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index fbd2ecde93e4..71929ee00d69 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -334,6 +334,30 @@ static bool vring_enable_cb(struct virtqueue *_vq)
return true;
}
+static void *vring_detach_unused_buf(struct virtqueue *_vq)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ unsigned int i;
+ void *buf;
+
+ START_USE(vq);
+
+ for (i = 0; i < vq->vring.num; i++) {
+ if (!vq->data[i])
+ continue;
+ /* detach_buf clears data, so grab it now. */
+ buf = vq->data[i];
+ detach_buf(vq, i);
+ END_USE(vq);
+ return buf;
+ }
+ /* That should have freed everything. */
+ BUG_ON(vq->num_free != vq->vring.num);
+
+ END_USE(vq);
+ return NULL;
+}
+
irqreturn_t vring_interrupt(int irq, void *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
@@ -360,6 +384,7 @@ static struct virtqueue_ops vring_vq_ops = {
.kick = vring_kick,
.disable_cb = vring_disable_cb,
.enable_cb = vring_enable_cb,
+ .detach_unused_buf = vring_detach_unused_buf,
};
struct virtqueue *vring_new_virtqueue(unsigned int num,