summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2009-11-26 14:29:11 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2009-11-26 14:29:16 +1100
commit619dd65acb8fbb7bf42fc18071ec023a1b6cd95f (patch)
tree2ecbc87c7e1c9b6bbc91b08f7579ac5467def725 /drivers
parent8b1cb56e5b44609c8605949269c1faabe2983777 (diff)
parent59cc907eaaf64b29a8e6b44355b79d19f840d3ce (diff)
Merge branch 'quilt/rr'
Conflicts: arch/arm/kernel/vmlinux.lds.S
Diffstat (limited to 'drivers')
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/acpi/debug.c32
-rw-r--r--drivers/char/hvc_iucv.c9
-rw-r--r--drivers/char/ipmi/ipmi_watchdog.c42
-rw-r--r--drivers/ide/ide.c20
-rw-r--r--drivers/idle/i7300_idle.c15
-rw-r--r--drivers/input/misc/ati_remote2.c26
-rw-r--r--drivers/input/mouse/psmouse-base.c14
-rw-r--r--drivers/message/fusion/mptbase.c3
-rw-r--r--drivers/misc/lkdtm.c4
-rw-r--r--drivers/net/myri10ge/myri10ge.c54
-rw-r--r--drivers/net/tun.c101
-rw-r--r--drivers/net/wireless/libertas/if_sdio.c32
-rw-r--r--drivers/net/wireless/libertas/if_usb.c3
-rw-r--r--drivers/net/wireless/libertas_tf/if_usb.c3
-rw-r--r--drivers/scsi/bfa/bfad.c2
-rw-r--r--drivers/scsi/fcoe/fcoe.c2
-rw-r--r--drivers/staging/rtl8187se/r8180_core.c6
-rw-r--r--drivers/staging/rtl8192e/r8192E_core.c6
-rw-r--r--drivers/staging/rtl8192su/r8192U_core.c6
-rw-r--r--drivers/usb/atm/ueagle-atm.c2
-rw-r--r--drivers/vhost/Kconfig11
-rw-r--r--drivers/vhost/Makefile2
-rw-r--r--drivers/vhost/net.c648
-rw-r--r--drivers/vhost/vhost.c968
-rw-r--r--drivers/vhost/vhost.h159
-rw-r--r--drivers/video/uvesafb.c7
-rw-r--r--drivers/video/vt8623fb.c2
28 files changed, 2076 insertions, 104 deletions
diff --git a/drivers/Makefile b/drivers/Makefile
index 6ee53c7a57a1..81e36596b1e9 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -106,6 +106,7 @@ obj-$(CONFIG_HID) += hid/
obj-$(CONFIG_PPC_PS3) += ps3/
obj-$(CONFIG_OF) += of/
obj-$(CONFIG_SSB) += ssb/
+obj-$(CONFIG_VHOST_NET) += vhost/
obj-$(CONFIG_VIRTIO) += virtio/
obj-$(CONFIG_VLYNQ) += vlynq/
obj-$(CONFIG_STAGING) += staging/
diff --git a/drivers/acpi/debug.c b/drivers/acpi/debug.c
index 8a690c3b8e23..941469801322 100644
--- a/drivers/acpi/debug.c
+++ b/drivers/acpi/debug.c
@@ -94,7 +94,8 @@ static const struct acpi_dlevel acpi_debug_levels[] = {
/* --------------------------------------------------------------------------
FS Interface (/sys)
-------------------------------------------------------------------------- */
-static int param_get_debug_layer(char *buffer, struct kernel_param *kp) {
+static int param_get_debug_layer(char *buffer, const struct kernel_param *kp)
+{
int result = 0;
int i;
@@ -116,7 +117,8 @@ static int param_get_debug_layer(char *buffer, struct kernel_param *kp) {
return result;
}
-static int param_get_debug_level(char *buffer, struct kernel_param *kp) {
+static int param_get_debug_level(char *buffer, const struct kernel_param *kp)
+{
int result = 0;
int i;
@@ -135,8 +137,18 @@ static int param_get_debug_level(char *buffer, struct kernel_param *kp) {
return result;
}
-module_param_call(debug_layer, param_set_uint, param_get_debug_layer, &acpi_dbg_layer, 0644);
-module_param_call(debug_level, param_set_uint, param_get_debug_level, &acpi_dbg_level, 0644);
+static struct kernel_param_ops acpi_debug_layer_ops = {
+ .set = param_set_uint,
+ .get = param_get_debug_layer,
+};
+
+static struct kernel_param_ops acpi_debug_level_ops = {
+ .set = param_set_uint,
+ .get = param_get_debug_level,
+};
+
+module_param_cb(debug_layer, &acpi_debug_layer_ops, &acpi_dbg_layer, 0644);
+module_param_cb(debug_level, &acpi_debug_level_ops, &acpi_dbg_level, 0644);
static char trace_method_name[6];
module_param_string(trace_method_name, trace_method_name, 6, 0644);
@@ -145,7 +157,7 @@ module_param(trace_debug_layer, uint, 0644);
static unsigned int trace_debug_level;
module_param(trace_debug_level, uint, 0644);
-static int param_set_trace_state(const char *val, struct kernel_param *kp)
+static int param_set_trace_state(const char *val, const struct kernel_param *kp)
{
int result = 0;
@@ -179,7 +191,7 @@ exit:
return result;
}
-static int param_get_trace_state(char *buffer, struct kernel_param *kp)
+static int param_get_trace_state(char *buffer, const struct kernel_param *kp)
{
if (!acpi_gbl_trace_method_name)
return sprintf(buffer, "disable");
@@ -192,8 +204,12 @@ static int param_get_trace_state(char *buffer, struct kernel_param *kp)
return 0;
}
-module_param_call(trace_state, param_set_trace_state, param_get_trace_state,
- NULL, 0644);
+static struct kernel_param_ops param_ops_trace_state = {
+ .set = param_set_trace_state,
+ .get = param_get_trace_state,
+};
+
+module_param_cb(trace_state, &param_ops_trace_state, NULL, 0644);
/* --------------------------------------------------------------------------
FS Interface (/proc)
diff --git a/drivers/char/hvc_iucv.c b/drivers/char/hvc_iucv.c
index b8a5d654d3d0..448788fc2bf2 100644
--- a/drivers/char/hvc_iucv.c
+++ b/drivers/char/hvc_iucv.c
@@ -1146,7 +1146,7 @@ out_err:
* Note: If it is called early in the boot process, @val is stored and
* parsed later in hvc_iucv_init().
*/
-static int param_set_vmidfilter(const char *val, struct kernel_param *kp)
+static int param_set_vmidfilter(const char *val, const struct kernel_param *kp)
{
int rc;
@@ -1173,7 +1173,7 @@ static int param_set_vmidfilter(const char *val, struct kernel_param *kp)
* The function stores the filter as a comma-separated list of z/VM user IDs
* in @buffer. Typically, sysfs routines call this function for attr show.
*/
-static int param_get_vmidfilter(char *buffer, struct kernel_param *kp)
+static int param_get_vmidfilter(char *buffer, const struct kernel_param *kp)
{
int rc;
size_t index, len;
@@ -1200,6 +1200,11 @@ static int param_get_vmidfilter(char *buffer, struct kernel_param *kp)
#define param_check_vmidfilter(name, p) __param_check(name, p, void)
+static struct kernel_param_ops param_ops_vmidfilter = {
+ .set = param_set_vmidfilter,
+ .get = param_get_vmidfilter,
+};
+
/**
* hvc_iucv_init() - z/VM IUCV HVC device driver initialization
*/
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index a4d57e31f713..45999d81004f 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -196,7 +196,7 @@ static void ipmi_unregister_watchdog(int ipmi_intf);
*/
static int start_now;
-static int set_param_int(const char *val, struct kernel_param *kp)
+static int set_param_timeout(const char *val, const struct kernel_param *kp)
{
char *endp;
int l;
@@ -215,10 +215,11 @@ static int set_param_int(const char *val, struct kernel_param *kp)
return rv;
}
-static int get_param_int(char *buffer, struct kernel_param *kp)
-{
- return sprintf(buffer, "%i", *((int *)kp->arg));
-}
+static struct kernel_param_ops param_ops_timeout = {
+ .set = set_param_timeout,
+ .get = param_get_int,
+};
+#define param_check_timeout param_check_int
typedef int (*action_fn)(const char *intval, char *outval);
@@ -227,7 +228,7 @@ static int preaction_op(const char *inval, char *outval);
static int preop_op(const char *inval, char *outval);
static void check_parms(void);
-static int set_param_str(const char *val, struct kernel_param *kp)
+static int set_param_str(const char *val, const struct kernel_param *kp)
{
action_fn fn = (action_fn) kp->arg;
int rv = 0;
@@ -251,7 +252,7 @@ static int set_param_str(const char *val, struct kernel_param *kp)
return rv;
}
-static int get_param_str(char *buffer, struct kernel_param *kp)
+static int get_param_str(char *buffer, const struct kernel_param *kp)
{
action_fn fn = (action_fn) kp->arg;
int rv;
@@ -263,7 +264,7 @@ static int get_param_str(char *buffer, struct kernel_param *kp)
}
-static int set_param_wdog_ifnum(const char *val, struct kernel_param *kp)
+static int set_param_wdog_ifnum(const char *val, const struct kernel_param *kp)
{
int rv = param_set_int(val, kp);
if (rv)
@@ -276,27 +277,38 @@ static int set_param_wdog_ifnum(const char *val, struct kernel_param *kp)
return 0;
}
-module_param_call(ifnum_to_use, set_param_wdog_ifnum, get_param_int,
- &ifnum_to_use, 0644);
+static struct kernel_param_ops param_ops_wdog_ifnum = {
+ .set = set_param_wdog_ifnum,
+ .get = param_get_int,
+};
+
+#define param_check_wdog_ifnum param_check_int
+
+static struct kernel_param_ops param_ops_str = {
+ .set = set_param_str,
+ .get = get_param_str,
+};
+
+module_param(ifnum_to_use, wdog_ifnum, 0644);
MODULE_PARM_DESC(ifnum_to_use, "The interface number to use for the watchdog "
"timer. Setting to -1 defaults to the first registered "
"interface");
-module_param_call(timeout, set_param_int, get_param_int, &timeout, 0644);
+module_param(timeout, timeout, 0644);
MODULE_PARM_DESC(timeout, "Timeout value in seconds.");
-module_param_call(pretimeout, set_param_int, get_param_int, &pretimeout, 0644);
+module_param(pretimeout, timeout, 0644);
MODULE_PARM_DESC(pretimeout, "Pretimeout value in seconds.");
-module_param_call(action, set_param_str, get_param_str, action_op, 0644);
+module_param_cb(action, &param_ops_str, action_op, 0644);
MODULE_PARM_DESC(action, "Timeout action. One of: "
"reset, none, power_cycle, power_off.");
-module_param_call(preaction, set_param_str, get_param_str, preaction_op, 0644);
+module_param_cb(preaction, &param_ops_str, preaction_op, 0644);
MODULE_PARM_DESC(preaction, "Pretimeout action. One of: "
"pre_none, pre_smi, pre_nmi, pre_int.");
-module_param_call(preop, set_param_str, get_param_str, preop_op, 0644);
+module_param_cb(preop, &param_ops_str, preop_op, 0644);
MODULE_PARM_DESC(preop, "Pretimeout driver operation. One of: "
"preop_none, preop_panic, preop_give_data.");
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 16d056939f9f..2c27a526025f 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -178,7 +178,7 @@ EXPORT_SYMBOL_GPL(ide_pci_clk);
module_param_named(pci_clock, ide_pci_clk, int, 0);
MODULE_PARM_DESC(pci_clock, "PCI bus clock frequency (in MHz)");
-static int ide_set_dev_param_mask(const char *s, struct kernel_param *kp)
+static int ide_set_dev_param_mask(const char *s, const struct kernel_param *kp)
{
int a, b, i, j = 1;
unsigned int *dev_param_mask = (unsigned int *)kp->arg;
@@ -201,34 +201,40 @@ static int ide_set_dev_param_mask(const char *s, struct kernel_param *kp)
return 0;
}
+static struct kernel_param_ops param_ops_ide_dev_mask = {
+ .set = ide_set_dev_param_mask
+};
+
+#define param_check_ide_dev_mask(name, p) param_check_uint(name, p)
+
static unsigned int ide_nodma;
-module_param_call(nodma, ide_set_dev_param_mask, NULL, &ide_nodma, 0);
+module_param_named(nodma, ide_nodma, ide_dev_mask, 0);
MODULE_PARM_DESC(nodma, "disallow DMA for a device");
static unsigned int ide_noflush;
-module_param_call(noflush, ide_set_dev_param_mask, NULL, &ide_noflush, 0);
+module_param_named(noflush, ide_noflush, ide_dev_mask, 0);
MODULE_PARM_DESC(noflush, "disable flush requests for a device");
static unsigned int ide_nohpa;
-module_param_call(nohpa, ide_set_dev_param_mask, NULL, &ide_nohpa, 0);
+module_param_named(nohpa, ide_nohpa, ide_dev_mask, 0);
MODULE_PARM_DESC(nohpa, "disable Host Protected Area for a device");
static unsigned int ide_noprobe;
-module_param_call(noprobe, ide_set_dev_param_mask, NULL, &ide_noprobe, 0);
+module_param_named(noprobe, ide_noprobe, ide_dev_mask, 0);
MODULE_PARM_DESC(noprobe, "skip probing for a device");
static unsigned int ide_nowerr;
-module_param_call(nowerr, ide_set_dev_param_mask, NULL, &ide_nowerr, 0);
+module_param_named(nowerr, ide_nowerr, ide_dev_mask, 0);
MODULE_PARM_DESC(nowerr, "ignore the ATA_DF bit for a device");
static unsigned int ide_cdroms;
-module_param_call(cdrom, ide_set_dev_param_mask, NULL, &ide_cdroms, 0);
+module_param_named(cdrom, ide_cdroms, ide_dev_mask, 0);
MODULE_PARM_DESC(cdrom, "force device as a CD-ROM");
struct chs_geom {
diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c
index 1f20a042a4f5..dd253002cd50 100644
--- a/drivers/idle/i7300_idle.c
+++ b/drivers/idle/i7300_idle.c
@@ -81,7 +81,7 @@ static u8 i7300_idle_thrtctl_saved;
static u8 i7300_idle_thrtlow_saved;
static u32 i7300_idle_mc_saved;
-static cpumask_t idle_cpumask;
+static cpumask_var_t idle_cpumask;
static ktime_t start_ktime;
static unsigned long avg_idle_us;
@@ -459,9 +459,9 @@ static int i7300_idle_notifier(struct notifier_block *nb, unsigned long val,
spin_lock_irqsave(&i7300_idle_lock, flags);
if (val == IDLE_START) {
- cpu_set(smp_processor_id(), idle_cpumask);
+ cpumask_set_cpu(smp_processor_id(), idle_cpumask);
- if (cpus_weight(idle_cpumask) != num_online_cpus())
+ if (cpumask_weight(idle_cpumask) != num_online_cpus())
goto end;
now_ktime = ktime_get();
@@ -478,8 +478,8 @@ static int i7300_idle_notifier(struct notifier_block *nb, unsigned long val,
i7300_idle_ioat_start();
} else if (val == IDLE_END) {
- cpu_clear(smp_processor_id(), idle_cpumask);
- if (cpus_weight(idle_cpumask) == (num_online_cpus() - 1)) {
+ cpumask_clear_cpu(smp_processor_id(), idle_cpumask);
+ if (cpumask_weight(idle_cpumask) == (num_online_cpus() - 1)) {
/* First CPU coming out of idle */
u64 idle_duration_us;
@@ -553,7 +553,6 @@ struct debugfs_file_info {
static int __init i7300_idle_init(void)
{
spin_lock_init(&i7300_idle_lock);
- cpus_clear(idle_cpumask);
total_us = 0;
if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload))
@@ -565,6 +564,9 @@ static int __init i7300_idle_init(void)
if (i7300_idle_ioat_init())
return -ENODEV;
+ if (!zalloc_cpumask_var(&idle_cpumask, GFP_KERNEL))
+ return -ENOMEM;
+
debugfs_dir = debugfs_create_dir("i7300_idle", NULL);
if (debugfs_dir) {
int i = 0;
@@ -589,6 +591,7 @@ static int __init i7300_idle_init(void)
static void __exit i7300_idle_exit(void)
{
idle_notifier_unregister(&i7300_idle_nb);
+ free_cpumask_var(idle_cpumask);
if (debugfs_dir) {
int i = 0;
diff --git a/drivers/input/misc/ati_remote2.c b/drivers/input/misc/ati_remote2.c
index 0501f0e65157..3665e3975158 100644
--- a/drivers/input/misc/ati_remote2.c
+++ b/drivers/input/misc/ati_remote2.c
@@ -37,7 +37,8 @@ enum {
};
static int ati_remote2_set_mask(const char *val,
- struct kernel_param *kp, unsigned int max)
+ const struct kernel_param *kp,
+ unsigned int max)
{
unsigned long mask;
int ret;
@@ -58,28 +59,31 @@ static int ati_remote2_set_mask(const char *val,
}
static int ati_remote2_set_channel_mask(const char *val,
- struct kernel_param *kp)
+ const struct kernel_param *kp)
{
pr_debug("%s()\n", __func__);
return ati_remote2_set_mask(val, kp, ATI_REMOTE2_MAX_CHANNEL_MASK);
}
-static int ati_remote2_get_channel_mask(char *buffer, struct kernel_param *kp)
+static int ati_remote2_get_channel_mask(char *buffer,
+ const struct kernel_param *kp)
{
pr_debug("%s()\n", __func__);
return sprintf(buffer, "0x%04x", *(unsigned int *)kp->arg);
}
-static int ati_remote2_set_mode_mask(const char *val, struct kernel_param *kp)
+static int ati_remote2_set_mode_mask(const char *val,
+ const struct kernel_param *kp)
{
pr_debug("%s()\n", __func__);
return ati_remote2_set_mask(val, kp, ATI_REMOTE2_MAX_MODE_MASK);
}
-static int ati_remote2_get_mode_mask(char *buffer, struct kernel_param *kp)
+static int ati_remote2_get_mode_mask(char *buffer,
+ const struct kernel_param *kp)
{
pr_debug("%s()\n", __func__);
@@ -88,15 +92,19 @@ static int ati_remote2_get_mode_mask(char *buffer, struct kernel_param *kp)
static unsigned int channel_mask = ATI_REMOTE2_MAX_CHANNEL_MASK;
#define param_check_channel_mask(name, p) __param_check(name, p, unsigned int)
-#define param_set_channel_mask ati_remote2_set_channel_mask
-#define param_get_channel_mask ati_remote2_get_channel_mask
+static struct kernel_param_ops param_ops_channel_mask = {
+ .set = ati_remote2_set_channel_mask,
+ .get = ati_remote2_get_channel_mask,
+};
module_param(channel_mask, channel_mask, 0644);
MODULE_PARM_DESC(channel_mask, "Bitmask of channels to accept <15:Channel16>...<1:Channel2><0:Channel1>");
static unsigned int mode_mask = ATI_REMOTE2_MAX_MODE_MASK;
#define param_check_mode_mask(name, p) __param_check(name, p, unsigned int)
-#define param_set_mode_mask ati_remote2_set_mode_mask
-#define param_get_mode_mask ati_remote2_get_mode_mask
+static struct kernel_param_ops param_ops_mode_mask = {
+ .set = ati_remote2_set_mode_mask,
+ .get = ati_remote2_get_mode_mask,
+};
module_param(mode_mask, mode_mask, 0644);
MODULE_PARM_DESC(mode_mask, "Bitmask of modes to accept <4:PC><3:AUX4><2:AUX3><1:AUX2><0:AUX1>");
diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index 07c53798301a..e4570dbff543 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -39,11 +39,13 @@ MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");
static unsigned int psmouse_max_proto = PSMOUSE_AUTO;
-static int psmouse_set_maxproto(const char *val, struct kernel_param *kp);
-static int psmouse_get_maxproto(char *buffer, struct kernel_param *kp);
+static int psmouse_set_maxproto(const char *val, const struct kernel_param *);
+static int psmouse_get_maxproto(char *buffer, const struct kernel_param *kp);
+static struct kernel_param_ops param_ops_proto_abbrev = {
+ .set = psmouse_set_maxproto,
+ .get = psmouse_get_maxproto,
+};
#define param_check_proto_abbrev(name, p) __param_check(name, p, unsigned int)
-#define param_set_proto_abbrev psmouse_set_maxproto
-#define param_get_proto_abbrev psmouse_get_maxproto
module_param_named(proto, psmouse_max_proto, proto_abbrev, 0644);
MODULE_PARM_DESC(proto, "Highest protocol extension to probe (bare, imps, exps, any). Useful for KVM switches.");
@@ -1652,7 +1654,7 @@ static ssize_t psmouse_attr_set_resolution(struct psmouse *psmouse, void *data,
}
-static int psmouse_set_maxproto(const char *val, struct kernel_param *kp)
+static int psmouse_set_maxproto(const char *val, const struct kernel_param *kp)
{
const struct psmouse_protocol *proto;
@@ -1669,7 +1671,7 @@ static int psmouse_set_maxproto(const char *val, struct kernel_param *kp)
return 0;
}
-static int psmouse_get_maxproto(char *buffer, struct kernel_param *kp)
+static int psmouse_get_maxproto(char *buffer, const struct kernel_param *kp)
{
int type = *((unsigned int *)kp->arg);
diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 610e914abe6c..2b6f750ee394 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -109,8 +109,7 @@ MODULE_PARM_DESC(mpt_debug_level, " debug level - refer to mptdebug.h \
int mpt_fwfault_debug;
EXPORT_SYMBOL(mpt_fwfault_debug);
-module_param_call(mpt_fwfault_debug, param_set_int, param_get_int,
- &mpt_fwfault_debug, 0600);
+module_param(mpt_fwfault_debug, int, 0600);
MODULE_PARM_DESC(mpt_fwfault_debug, "Enable detection of Firmware fault"
" and halt Firmware on fault - (default=0)");
diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c
index 3648b23d5c92..42660e55119e 100644
--- a/drivers/misc/lkdtm.c
+++ b/drivers/misc/lkdtm.c
@@ -120,9 +120,9 @@ static int count = DEFAULT_COUNT;
module_param(recur_count, int, 0644);
MODULE_PARM_DESC(recur_count, " Recursion level for the stack overflow test, "\
"default is 10");
-module_param(cpoint_name, charp, 0644);
+module_param(cpoint_name, charp, 0444);
MODULE_PARM_DESC(cpoint_name, " Crash Point, where kernel is to be crashed");
-module_param(cpoint_type, charp, 0644);
+module_param(cpoint_type, charp, 0444);
MODULE_PARM_DESC(cpoint_type, " Crash Point Type, action to be taken on "\
"hitting the crash point");
module_param(cpoint_count, int, 0644);
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index 85e1b6a3ac1b..926c7215c667 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -236,6 +236,7 @@ struct myri10ge_priv {
int watchdog_resets;
int watchdog_pause;
int pause;
+ bool fw_name_allocated;
char *fw_name;
char eeprom_strings[MYRI10GE_EEPROM_STRINGS_SIZE];
char *product_code_string;
@@ -268,6 +269,7 @@ MODULE_FIRMWARE("myri10ge_eth_z8e.dat");
MODULE_FIRMWARE("myri10ge_rss_ethp_z8e.dat");
MODULE_FIRMWARE("myri10ge_rss_eth_z8e.dat");
+/* Careful: must be accessed under kparam_block_sysfs_write */
static char *myri10ge_fw_name = NULL;
module_param(myri10ge_fw_name, charp, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image name");
@@ -373,6 +375,14 @@ static inline void put_be32(__be32 val, __be32 __iomem * p)
static struct net_device_stats *myri10ge_get_stats(struct net_device *dev);
+static void set_fw_name(struct myri10ge_priv *mgp, char *name, bool allocated)
+{
+ if (mgp->fw_name_allocated)
+ kfree(mgp->fw_name);
+ mgp->fw_name = name;
+ mgp->fw_name_allocated = allocated;
+}
+
static int
myri10ge_send_cmd(struct myri10ge_priv *mgp, u32 cmd,
struct myri10ge_cmd *data, int atomic)
@@ -744,7 +754,7 @@ static int myri10ge_load_firmware(struct myri10ge_priv *mgp, int adopt)
dev_warn(&mgp->pdev->dev, "via hotplug\n");
}
- mgp->fw_name = "adopted";
+ set_fw_name(mgp, "adopted", false);
mgp->tx_boundary = 2048;
myri10ge_dummy_rdma(mgp, 1);
status = myri10ge_get_firmware_capabilities(mgp);
@@ -3260,7 +3270,7 @@ static void myri10ge_firmware_probe(struct myri10ge_priv *mgp)
* load the optimized firmware (which assumes aligned PCIe
* completions) in order to see if it works on this host.
*/
- mgp->fw_name = myri10ge_fw_aligned;
+ set_fw_name(mgp, myri10ge_fw_aligned, false);
status = myri10ge_load_firmware(mgp, 1);
if (status != 0) {
goto abort;
@@ -3288,7 +3298,7 @@ static void myri10ge_firmware_probe(struct myri10ge_priv *mgp)
abort:
/* fall back to using the unaligned firmware */
mgp->tx_boundary = 2048;
- mgp->fw_name = myri10ge_fw_unaligned;
+ set_fw_name(mgp, myri10ge_fw_unaligned, false);
}
@@ -3311,7 +3321,7 @@ static void myri10ge_select_firmware(struct myri10ge_priv *mgp)
dev_info(&mgp->pdev->dev, "PCIE x%d Link\n",
link_width);
mgp->tx_boundary = 4096;
- mgp->fw_name = myri10ge_fw_aligned;
+ set_fw_name(mgp, myri10ge_fw_aligned, false);
} else {
myri10ge_firmware_probe(mgp);
}
@@ -3320,22 +3330,29 @@ static void myri10ge_select_firmware(struct myri10ge_priv *mgp)
dev_info(&mgp->pdev->dev,
"Assuming aligned completions (forced)\n");
mgp->tx_boundary = 4096;
- mgp->fw_name = myri10ge_fw_aligned;
+ set_fw_name(mgp, myri10ge_fw_aligned, false);
} else {
dev_info(&mgp->pdev->dev,
"Assuming unaligned completions (forced)\n");
mgp->tx_boundary = 2048;
- mgp->fw_name = myri10ge_fw_unaligned;
+ set_fw_name(mgp, myri10ge_fw_unaligned, false);
}
}
+
+ kparam_block_sysfs_write(myri10ge_fw_name);
if (myri10ge_fw_name != NULL) {
- overridden = 1;
- mgp->fw_name = myri10ge_fw_name;
+ char *fw_name = kstrdup(myri10ge_fw_name, GFP_KERNEL);
+ if (fw_name) {
+ overridden = 1;
+ set_fw_name(mgp, fw_name, true);
+ }
}
+ kparam_unblock_sysfs_write(myri10ge_fw_name);
+
if (mgp->board_number < MYRI10GE_MAX_BOARDS &&
myri10ge_fw_names[mgp->board_number] != NULL &&
strlen(myri10ge_fw_names[mgp->board_number])) {
- mgp->fw_name = myri10ge_fw_names[mgp->board_number];
+ set_fw_name(mgp, myri10ge_fw_names[mgp->board_number], false);
overridden = 1;
}
if (overridden)
@@ -3699,6 +3716,7 @@ static void myri10ge_probe_slices(struct myri10ge_priv *mgp)
struct myri10ge_cmd cmd;
struct pci_dev *pdev = mgp->pdev;
char *old_fw;
+ bool old_allocated;
int i, status, ncpus, msix_cap;
mgp->num_slices = 1;
@@ -3711,17 +3729,23 @@ static void myri10ge_probe_slices(struct myri10ge_priv *mgp)
/* try to load the slice aware rss firmware */
old_fw = mgp->fw_name;
+ old_allocated = mgp->fw_name_allocated;
+ /* don't free old_fw if we override it. */
+ mgp->fw_name_allocated = false;
+
if (myri10ge_fw_name != NULL) {
dev_info(&mgp->pdev->dev, "overriding rss firmware to %s\n",
myri10ge_fw_name);
- mgp->fw_name = myri10ge_fw_name;
+ set_fw_name(mgp, myri10ge_fw_name, false);
} else if (old_fw == myri10ge_fw_aligned)
- mgp->fw_name = myri10ge_fw_rss_aligned;
+ set_fw_name(mgp, myri10ge_fw_rss_aligned, false);
else
- mgp->fw_name = myri10ge_fw_rss_unaligned;
+ set_fw_name(mgp, myri10ge_fw_rss_unaligned, false);
status = myri10ge_load_firmware(mgp, 0);
if (status != 0) {
dev_info(&pdev->dev, "Rss firmware not found\n");
+ if (old_allocated)
+ kfree(old_fw);
return;
}
@@ -3787,6 +3811,8 @@ static void myri10ge_probe_slices(struct myri10ge_priv *mgp)
mgp->num_slices);
if (status == 0) {
pci_disable_msix(pdev);
+ if (old_allocated)
+ kfree(old_fw);
return;
}
if (status > 0)
@@ -3803,7 +3829,7 @@ disable_msix:
abort_with_fw:
mgp->num_slices = 1;
- mgp->fw_name = old_fw;
+ set_fw_name(mgp, old_fw, old_allocated);
myri10ge_load_firmware(mgp, 0);
}
@@ -4033,6 +4059,7 @@ abort_with_enabled:
pci_disable_device(pdev);
abort_with_netdev:
+ set_fw_name(mgp, NULL, false);
free_netdev(netdev);
return status;
}
@@ -4077,6 +4104,7 @@ static void myri10ge_remove(struct pci_dev *pdev)
dma_free_coherent(&pdev->dev, sizeof(*mgp->cmd),
mgp->cmd, mgp->cmd_bus);
+ set_fw_name(mgp, NULL, false);
free_netdev(netdev);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 01e99f22210e..9142bfd3bab7 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -144,6 +144,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
err = 0;
tfile->tun = tun;
tun->tfile = tfile;
+ tun->socket.file = file;
dev_hold(tun->dev);
sock_hold(tun->socket.sk);
atomic_inc(&tfile->count);
@@ -158,6 +159,7 @@ static void __tun_detach(struct tun_struct *tun)
/* Detach from net device */
netif_tx_lock_bh(tun->dev);
tun->tfile = NULL;
+ tun->socket.file = NULL;
netif_tx_unlock_bh(tun->dev);
/* Drop read queue */
@@ -387,7 +389,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
/* Notify and wake up reader process */
if (tun->flags & TUN_FASYNC)
kill_fasync(&tun->fasync, SIGIO, POLL_IN);
- wake_up_interruptible(&tun->socket.wait);
+ wake_up_interruptible_poll(&tun->socket.wait, POLLIN |
+ POLLRDNORM | POLLRDBAND);
return NETDEV_TX_OK;
drop:
@@ -743,7 +746,7 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
len = min_t(int, skb->len, len);
skb_copy_datagram_const_iovec(skb, 0, iv, total, len);
- total += len;
+ total += skb->len;
tun->dev->stats.tx_packets++;
tun->dev->stats.tx_bytes += len;
@@ -751,34 +754,23 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
return total;
}
-static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
- unsigned long count, loff_t pos)
+static ssize_t tun_do_read(struct tun_struct *tun,
+ struct kiocb *iocb, const struct iovec *iv,
+ ssize_t len, int noblock)
{
- struct file *file = iocb->ki_filp;
- struct tun_file *tfile = file->private_data;
- struct tun_struct *tun = __tun_get(tfile);
DECLARE_WAITQUEUE(wait, current);
struct sk_buff *skb;
- ssize_t len, ret = 0;
-
- if (!tun)
- return -EBADFD;
+ ssize_t ret = 0;
DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name);
- len = iov_length(iv, count);
- if (len < 0) {
- ret = -EINVAL;
- goto out;
- }
-
add_wait_queue(&tun->socket.wait, &wait);
while (len) {
current->state = TASK_INTERRUPTIBLE;
/* Read frames from the queue */
if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) {
- if (file->f_flags & O_NONBLOCK) {
+ if (noblock) {
ret = -EAGAIN;
break;
}
@@ -805,6 +797,27 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
current->state = TASK_RUNNING;
remove_wait_queue(&tun->socket.wait, &wait);
+ return ret;
+}
+
+static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
+ unsigned long count, loff_t pos)
+{
+ struct file *file = iocb->ki_filp;
+ struct tun_file *tfile = file->private_data;
+ struct tun_struct *tun = __tun_get(tfile);
+ ssize_t len, ret;
+
+ if (!tun)
+ return -EBADFD;
+ len = iov_length(iv, count);
+ if (len < 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = tun_do_read(tun, iocb, iv, len, file->f_flags & O_NONBLOCK);
+ ret = min_t(ssize_t, ret, len);
out:
tun_put(tun);
return ret;
@@ -847,7 +860,8 @@ static void tun_sock_write_space(struct sock *sk)
return;
if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
- wake_up_interruptible_sync(sk->sk_sleep);
+ wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
+ POLLWRNORM | POLLWRBAND);
tun = container_of(sk, struct tun_sock, sk)->tun;
kill_fasync(&tun->fasync, SIGIO, POLL_OUT);
@@ -858,6 +872,37 @@ static void tun_sock_destruct(struct sock *sk)
free_netdev(container_of(sk, struct tun_sock, sk)->tun->dev);
}
+static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t total_len)
+{
+ struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
+ return tun_get_user(tun, m->msg_iov, total_len,
+ m->msg_flags & MSG_DONTWAIT);
+}
+
+static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t total_len,
+ int flags)
+{
+ struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
+ int ret;
+ if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
+ return -EINVAL;
+ ret = tun_do_read(tun, iocb, m->msg_iov, total_len,
+ flags & MSG_DONTWAIT);
+ if (ret > total_len) {
+ m->msg_flags |= MSG_TRUNC;
+ ret = flags & MSG_TRUNC ? ret : total_len;
+ }
+ return ret;
+}
+
+/* Ops structure to mimic raw sockets with tun */
+static const struct proto_ops tun_socket_ops = {
+ .sendmsg = tun_sendmsg,
+ .recvmsg = tun_recvmsg,
+};
+
static struct proto tun_proto = {
.name = "tun",
.owner = THIS_MODULE,
@@ -986,6 +1031,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
goto err_free_dev;
init_waitqueue_head(&tun->socket.wait);
+ tun->socket.ops = &tun_socket_ops;
sock_init_data(&tun->socket, sk);
sk->sk_write_space = tun_sock_write_space;
sk->sk_sndbuf = INT_MAX;
@@ -1525,6 +1571,23 @@ static void tun_cleanup(void)
rtnl_link_unregister(&tun_link_ops);
}
+/* Get an underlying socket object from tun file. Returns error unless file is
+ * attached to a device. The returned object works like a packet socket, it
+ * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for
+ * holding a reference to the file for as long as the socket is in use. */
+struct socket *tun_get_socket(struct file *file)
+{
+ struct tun_struct *tun;
+ if (file->f_op != &tun_fops)
+ return ERR_PTR(-EINVAL);
+ tun = tun_get(file);
+ if (!tun)
+ return ERR_PTR(-EBADFD);
+ tun_put(tun);
+ return &tun->socket;
+}
+EXPORT_SYMBOL_GPL(tun_get_socket);
+
module_init(tun_init);
module_exit(tun_cleanup);
MODULE_DESCRIPTION(DRV_DESCRIPTION);
diff --git a/drivers/net/wireless/libertas/if_sdio.c b/drivers/net/wireless/libertas/if_sdio.c
index 09fcfad742e7..71d27ccec630 100644
--- a/drivers/net/wireless/libertas/if_sdio.c
+++ b/drivers/net/wireless/libertas/if_sdio.c
@@ -122,6 +122,8 @@ struct if_sdio_card {
const char *helper;
const char *firmware;
+ bool helper_allocated;
+ bool firmware_allocated;
u8 buffer[65536];
@@ -1001,16 +1003,34 @@ static int if_sdio_probe(struct sdio_func *func,
card->helper = if_sdio_models[i].helper;
card->firmware = if_sdio_models[i].firmware;
+ kparam_block_sysfs_write(helper_name);
if (lbs_helper_name) {
+ char *helper = kstrdup(lbs_helper_name, GFP_KERNEL);
+ if (!helper) {
+ kparam_unblock_sysfs_write(helper_name);
+ ret = -ENOMEM;
+ goto free;
+ }
lbs_deb_sdio("overriding helper firmware: %s\n",
lbs_helper_name);
- card->helper = lbs_helper_name;
+ card->helper = helper;
+ card->helper_allocated = true;
}
+ kparam_unblock_sysfs_write(helper_name);
+ kparam_block_sysfs_write(fw_name);
if (lbs_fw_name) {
+ char *fw_name = kstrdup(lbs_fw_name, GFP_KERNEL);
+ if (!fw_name) {
+ kparam_unblock_sysfs_write(fw_name);
+ ret = -ENOMEM;
+ goto free;
+ }
lbs_deb_sdio("overriding firmware: %s\n", lbs_fw_name);
- card->firmware = lbs_fw_name;
+ card->firmware = fw_name;
+ card->firmware_allocated = true;
}
+ kparam_unblock_sysfs_write(fw_name);
sdio_claim_host(func);
@@ -1125,6 +1145,10 @@ free:
kfree(packet);
}
+ if (card->helper_allocated)
+ kfree(card->helper);
+ if (card->firmware_allocated)
+ kfree(card->firmware);
kfree(card);
goto out;
@@ -1175,6 +1199,10 @@ static void if_sdio_remove(struct sdio_func *func)
kfree(packet);
}
+ if (card->helper_allocated)
+ kfree(card->helper);
+ if (card->firmware_allocated)
+ kfree(card->firmware);
kfree(card);
lbs_deb_leave(LBS_DEB_SDIO);
diff --git a/drivers/net/wireless/libertas/if_usb.c b/drivers/net/wireless/libertas/if_usb.c
index 65e174595d12..a41007ebe1f3 100644
--- a/drivers/net/wireless/libertas/if_usb.c
+++ b/drivers/net/wireless/libertas/if_usb.c
@@ -290,10 +290,13 @@ static int if_usb_probe(struct usb_interface *intf,
}
/* Upload firmware */
+ kparam_block_sysfs_write(fw_name);
if (__if_usb_prog_firmware(cardp, lbs_fw_name, BOOT_CMD_FW_BY_USB)) {
+ kparam_unblock_sysfs_write(fw_name);
lbs_deb_usbd(&udev->dev, "FW upload failed\n");
goto err_prog_firmware;
}
+ kparam_unblock_sysfs_write(fw_name);
if (!(priv = lbs_add_card(cardp, &udev->dev)))
goto err_prog_firmware;
diff --git a/drivers/net/wireless/libertas_tf/if_usb.c b/drivers/net/wireless/libertas_tf/if_usb.c
index 3691c307e674..669a3de68508 100644
--- a/drivers/net/wireless/libertas_tf/if_usb.c
+++ b/drivers/net/wireless/libertas_tf/if_usb.c
@@ -650,12 +650,15 @@ static int if_usb_prog_firmware(struct if_usb_card *cardp)
static int reset_count = 10;
int ret = 0;
+ kparam_block_sysfs_write(fw_name);
ret = request_firmware(&cardp->fw, lbtf_fw_name, &cardp->udev->dev);
if (ret < 0) {
printk(KERN_INFO "libertastf: firmware %s not found\n",
lbtf_fw_name);
+ kparam_unblock_sysfs_write(fw_name);
goto done;
}
+ kparam_unblock_sysfs_write(fw_name);
if (check_fwfile_format(cardp->fw->data, cardp->fw->size))
goto release_fw;
diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c
index b52b773d49d9..c7307167e33c 100644
--- a/drivers/scsi/bfa/bfad.c
+++ b/drivers/scsi/bfa/bfad.c
@@ -726,6 +726,7 @@ bfad_drv_init(struct bfad_s *bfad)
memset(&driver_info, 0, sizeof(driver_info));
strncpy(driver_info.version, BFAD_DRIVER_VERSION,
sizeof(driver_info.version) - 1);
+ __kernel_param_lock();
if (host_name)
strncpy(driver_info.host_machine_name, host_name,
sizeof(driver_info.host_machine_name) - 1);
@@ -735,6 +736,7 @@ bfad_drv_init(struct bfad_s *bfad)
if (os_patch)
strncpy(driver_info.host_os_patch, os_patch,
sizeof(driver_info.host_os_patch) - 1);
+ __kernel_param_unlock();
strncpy(driver_info.os_device_name, bfad->pci_name,
sizeof(driver_info.os_device_name - 1));
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 4a43b74c0d27..0b728aa885bb 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -1227,7 +1227,7 @@ int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev,
"CPU.\n");
spin_unlock_bh(&fps->fcoe_rx_list.lock);
- cpu = first_cpu(cpu_online_map);
+ cpu = cpumask_first(cpu_online_mask);
fps = &per_cpu(fcoe_percpu, cpu);
spin_lock_bh(&fps->fcoe_rx_list.lock);
if (!fps->thread) {
diff --git a/drivers/staging/rtl8187se/r8180_core.c b/drivers/staging/rtl8187se/r8180_core.c
index 53e654d0d4fa..9ca3c6f84dc7 100644
--- a/drivers/staging/rtl8187se/r8180_core.c
+++ b/drivers/staging/rtl8187se/r8180_core.c
@@ -66,7 +66,7 @@ static struct pci_device_id rtl8180_pci_id_tbl[] __devinitdata = {
};
-static char* ifname = "wlan%d";
+static char ifname[IFNAMSIZ] = "wlan%d";
static int hwseqnum = 0;
static int hwwep = 0;
static int channels = 0x3fff;
@@ -79,7 +79,7 @@ MODULE_AUTHOR("Andrea Merello <andreamrl@tiscali.it>");
MODULE_DESCRIPTION("Linux driver for Realtek RTL8180 / RTL8185 WiFi cards");
-module_param(ifname, charp, S_IRUGO|S_IWUSR );
+module_param_string(ifname, ifname, sizef(ifname), S_IRUGO|S_IWUSR);
module_param(hwseqnum,int, S_IRUGO|S_IWUSR);
module_param(hwwep,int, S_IRUGO|S_IWUSR);
module_param(channels,int, S_IRUGO|S_IWUSR);
@@ -3918,7 +3918,7 @@ static int __devinit rtl8180_pci_probe(struct pci_dev *pdev,
if (dev_alloc_name(dev, ifname) < 0){
DMESG("Oops: devname already taken! Trying wlan%%d...\n");
- ifname = "wlan%d";
+ strcpy(ifname, "wlan%d");
dev_alloc_name(dev, ifname);
}
diff --git a/drivers/staging/rtl8192e/r8192E_core.c b/drivers/staging/rtl8192e/r8192E_core.c
index b0802a7aeb5f..c7f72ea54f08 100644
--- a/drivers/staging/rtl8192e/r8192E_core.c
+++ b/drivers/staging/rtl8192e/r8192E_core.c
@@ -115,7 +115,7 @@ static struct pci_device_id rtl8192_pci_id_tbl[] __devinitdata = {
{}
};
-static char* ifname = "wlan%d";
+static char ifname[IFNAMSIZ] = "wlan%d";
static int hwwep = 1; //default use hw. set 0 to use software security
static int channels = 0x3fff;
@@ -126,7 +126,7 @@ MODULE_DEVICE_TABLE(pci, rtl8192_pci_id_tbl);
MODULE_DESCRIPTION("Linux driver for Realtek RTL819x WiFi cards");
-module_param(ifname, charp, S_IRUGO|S_IWUSR );
+module_param_string(ifname, ifname, sizeof(ifname), S_IRUGO|S_IWUSR);
//module_param(hwseqnum,int, S_IRUGO|S_IWUSR);
module_param(hwwep,int, S_IRUGO|S_IWUSR);
module_param(channels,int, S_IRUGO|S_IWUSR);
@@ -5972,7 +5972,7 @@ static int __devinit rtl8192_pci_probe(struct pci_dev *pdev,
if (dev_alloc_name(dev, ifname) < 0){
RT_TRACE(COMP_INIT, "Oops: devname already taken! Trying wlan%%d...\n");
- ifname = "wlan%d";
+ strcpy(ifname, "wlan%d");
dev_alloc_name(dev, ifname);
}
diff --git a/drivers/staging/rtl8192su/r8192U_core.c b/drivers/staging/rtl8192su/r8192U_core.c
index 66274d7666ff..e24815f52142 100644
--- a/drivers/staging/rtl8192su/r8192U_core.c
+++ b/drivers/staging/rtl8192su/r8192U_core.c
@@ -136,13 +136,13 @@ MODULE_VERSION("V 1.1");
MODULE_DEVICE_TABLE(usb, rtl8192_usb_id_tbl);
MODULE_DESCRIPTION("Linux driver for Realtek RTL8192 USB WiFi cards");
-static char* ifname = "wlan%d";
+static char ifname[IFNAMSIZ] = "wlan%d";
static int hwwep = 1; //default use hw. set 0 to use software security
static int channels = 0x3fff;
-module_param(ifname, charp, S_IRUGO|S_IWUSR );
+module_param_string(ifname, ifname, sizef(ifname), S_IRUGO|S_IWUSR);
//module_param(hwseqnum,int, S_IRUGO|S_IWUSR);
module_param(hwwep,int, S_IRUGO|S_IWUSR);
module_param(channels,int, S_IRUGO|S_IWUSR);
@@ -7463,7 +7463,7 @@ static int __devinit rtl8192_usb_probe(struct usb_interface *intf,
if (dev_alloc_name(dev, ifname) < 0){
RT_TRACE(COMP_INIT, "Oops: devname already taken! Trying wlan%%d...\n");
- ifname = "wlan%d";
+ strcpy(ifname, "wlan%d");
dev_alloc_name(dev, ifname);
}
diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c
index d171b563e94c..283a178997a5 100644
--- a/drivers/usb/atm/ueagle-atm.c
+++ b/drivers/usb/atm/ueagle-atm.c
@@ -1550,6 +1550,7 @@ static void cmvs_file_name(struct uea_softc *sc, char *const cmv_name, int ver)
char file_arr[] = "CMVxy.bin";
char *file;
+ kparam_block_sysfs_write(cmv_file);
/* set proper name corresponding modem version and line type */
if (cmv_file[sc->modem_index] == NULL) {
if (UEA_CHIP_VERSION(sc) == ADI930)
@@ -1568,6 +1569,7 @@ static void cmvs_file_name(struct uea_softc *sc, char *const cmv_name, int ver)
strlcat(cmv_name, file, UEA_FW_NAME_MAX);
if (ver == 2)
strlcat(cmv_name, ".v2", UEA_FW_NAME_MAX);
+ kparam_unblock_sysfs_write(cmv_file);
}
static int request_cmvs_old(struct uea_softc *sc,
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
new file mode 100644
index 000000000000..9e9355367bb3
--- /dev/null
+++ b/drivers/vhost/Kconfig
@@ -0,0 +1,11 @@
+config VHOST_NET
+ tristate "Host kernel accelerator for virtio net (EXPERIMENTAL)"
+ depends on NET && EVENTFD && (TUN || !TUN) && EXPERIMENTAL
+ ---help---
+ This kernel module can be loaded in host kernel to accelerate
+ guest networking with virtio_net. Not to be confused with virtio_net
+ module itself which needs to be loaded in guest kernel.
+
+ To compile this driver as a module, choose M here: the module will
+ be called vhost_net.
+
diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile
new file mode 100644
index 000000000000..72dd02050bb9
--- /dev/null
+++ b/drivers/vhost/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_VHOST_NET) += vhost_net.o
+vhost_net-y := vhost.o net.o
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
new file mode 100644
index 000000000000..22d5fef472cc
--- /dev/null
+++ b/drivers/vhost/net.c
@@ -0,0 +1,648 @@
+/* Copyright (C) 2009 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * virtio-net server in host kernel.
+ */
+
+#include <linux/compat.h>
+#include <linux/eventfd.h>
+#include <linux/vhost.h>
+#include <linux/virtio_net.h>
+#include <linux/mmu_context.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+#include <linux/rcupdate.h>
+#include <linux/file.h>
+
+#include <linux/net.h>
+#include <linux/if_packet.h>
+#include <linux/if_arp.h>
+#include <linux/if_tun.h>
+
+#include <net/sock.h>
+
+#include "vhost.h"
+
+/* Max number of bytes transferred before requeueing the job.
+ * Using this limit prevents one virtqueue from starving others. */
+#define VHOST_NET_WEIGHT 0x80000
+
+enum {
+ VHOST_NET_VQ_RX = 0,
+ VHOST_NET_VQ_TX = 1,
+ VHOST_NET_VQ_MAX = 2,
+};
+
+enum vhost_net_poll_state {
+ VHOST_NET_POLL_DISABLED = 0,
+ VHOST_NET_POLL_STARTED = 1,
+ VHOST_NET_POLL_STOPPED = 2,
+};
+
+struct vhost_net {
+ struct vhost_dev dev;
+ struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
+ struct vhost_poll poll[VHOST_NET_VQ_MAX];
+ /* Tells us whether we are polling a socket for TX.
+ * We only do this when socket buffer fills up.
+ * Protected by tx vq lock. */
+ enum vhost_net_poll_state tx_poll_state;
+};
+
+/* Pop first len bytes from iovec. Return number of segments used. */
+static int move_iovec_hdr(struct iovec *from, struct iovec *to,
+ size_t len, int iov_count)
+{
+ int seg = 0;
+ size_t size;
+ while (len && seg < iov_count) {
+ size = min(from->iov_len, len);
+ to->iov_base = from->iov_base;
+ to->iov_len = size;
+ from->iov_len -= size;
+ from->iov_base += size;
+ len -= size;
+ ++from;
+ ++to;
+ ++seg;
+ }
+ return seg;
+}
+
+/* Caller must have TX VQ lock */
+static void tx_poll_stop(struct vhost_net *net)
+{
+ if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
+ return;
+ vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
+ net->tx_poll_state = VHOST_NET_POLL_STOPPED;
+}
+
+/* Caller must have TX VQ lock */
+static void tx_poll_start(struct vhost_net *net, struct socket *sock)
+{
+ if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
+ return;
+ vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
+ net->tx_poll_state = VHOST_NET_POLL_STARTED;
+}
+
+/* Expects to be always run from workqueue - which acts as
+ * read-size critical section for our kind of RCU. */
+static void handle_tx(struct vhost_net *net)
+{
+ struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX];
+ unsigned head, out, in, s;
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_iov = vq->iov,
+ .msg_flags = MSG_DONTWAIT,
+ };
+ size_t len, total_len = 0;
+ int err, wmem;
+ size_t hdr_size;
+ struct socket *sock = rcu_dereference(vq->private_data);
+ if (!sock)
+ return;
+
+ wmem = atomic_read(&sock->sk->sk_wmem_alloc);
+ if (wmem >= sock->sk->sk_sndbuf)
+ return;
+
+ use_mm(net->dev.mm);
+ mutex_lock(&vq->mutex);
+ vhost_disable_notify(vq);
+
+ if (wmem < sock->sk->sk_sndbuf * 2)
+ tx_poll_stop(net);
+ hdr_size = vq->hdr_size;
+
+ for (;;) {
+ head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
+ ARRAY_SIZE(vq->iov),
+ &out, &in,
+ NULL, NULL);
+ /* Nothing new? Wait for eventfd to tell us they refilled. */
+ if (head == vq->num) {
+ wmem = atomic_read(&sock->sk->sk_wmem_alloc);
+ if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
+ tx_poll_start(net, sock);
+ set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
+ break;
+ }
+ if (unlikely(vhost_enable_notify(vq))) {
+ vhost_disable_notify(vq);
+ continue;
+ }
+ break;
+ }
+ if (in) {
+ vq_err(vq, "Unexpected descriptor format for TX: "
+ "out %d, int %d\n", out, in);
+ break;
+ }
+ /* Skip header. TODO: support TSO. */
+ s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, out);
+ msg.msg_iovlen = out;
+ len = iov_length(vq->iov, out);
+ /* Sanity check */
+ if (!len) {
+ vq_err(vq, "Unexpected header len for TX: "
+ "%zd expected %zd\n",
+ iov_length(vq->hdr, s), hdr_size);
+ break;
+ }
+ /* TODO: Check specific error and bomb out unless ENOBUFS? */
+ err = sock->ops->sendmsg(NULL, sock, &msg, len);
+ if (unlikely(err < 0)) {
+ vhost_discard_vq_desc(vq);
+ tx_poll_start(net, sock);
+ break;
+ }
+ if (err != len)
+ pr_err("Truncated TX packet: "
+ " len %d != %zd\n", err, len);
+ vhost_add_used_and_signal(&net->dev, vq, head, 0);
+ total_len += len;
+ if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
+ vhost_poll_queue(&vq->poll);
+ break;
+ }
+ }
+
+ mutex_unlock(&vq->mutex);
+ unuse_mm(net->dev.mm);
+}
+
+/* Expects to be always run from workqueue - which acts as
+ * read-size critical section for our kind of RCU. */
+static void handle_rx(struct vhost_net *net)
+{
+ struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
+ unsigned head, out, in, log, s;
+ struct vhost_log *vq_log;
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_control = NULL, /* FIXME: get and handle RX aux data. */
+ .msg_controllen = 0,
+ .msg_iov = vq->iov,
+ .msg_flags = MSG_DONTWAIT,
+ };
+
+ struct virtio_net_hdr hdr = {
+ .flags = 0,
+ .gso_type = VIRTIO_NET_HDR_GSO_NONE
+ };
+
+ size_t len, total_len = 0;
+ int err;
+ size_t hdr_size;
+ struct socket *sock = rcu_dereference(vq->private_data);
+ if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
+ return;
+
+ use_mm(net->dev.mm);
+ mutex_lock(&vq->mutex);
+ vhost_disable_notify(vq);
+ hdr_size = vq->hdr_size;
+
+ vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
+ vq->log : NULL;
+
+ for (;;) {
+ head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
+ ARRAY_SIZE(vq->iov),
+ &out, &in,
+ vq_log, &log);
+ /* OK, now we need to know about added descriptors. */
+ if (head == vq->num) {
+ if (unlikely(vhost_enable_notify(vq))) {
+ /* They have slipped one in as we were
+ * doing that: check again. */
+ vhost_disable_notify(vq);
+ continue;
+ }
+ /* Nothing new? Wait for eventfd to tell us
+ * they refilled. */
+ break;
+ }
+ /* We don't need to be notified again. */
+ if (out) {
+ vq_err(vq, "Unexpected descriptor format for RX: "
+ "out %d, int %d\n",
+ out, in);
+ break;
+ }
+ /* Skip header. TODO: support TSO/mergeable rx buffers. */
+ s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in);
+ msg.msg_iovlen = in;
+ len = iov_length(vq->iov, in);
+ /* Sanity check */
+ if (!len) {
+ vq_err(vq, "Unexpected header len for RX: "
+ "%zd expected %zd\n",
+ iov_length(vq->hdr, s), hdr_size);
+ break;
+ }
+ err = sock->ops->recvmsg(NULL, sock, &msg,
+ len, MSG_DONTWAIT | MSG_TRUNC);
+ /* TODO: Check specific error and bomb out unless EAGAIN? */
+ if (err < 0) {
+ vhost_discard_vq_desc(vq);
+ break;
+ }
+ /* TODO: Should check and handle checksum. */
+ if (err > len) {
+ pr_err("Discarded truncated rx packet: "
+ " len %d > %zd\n", err, len);
+ vhost_discard_vq_desc(vq);
+ continue;
+ }
+ len = err;
+ err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size);
+ if (err) {
+ vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n",
+ vq->iov->iov_base, err);
+ break;
+ }
+ len += hdr_size;
+ vhost_add_used_and_signal(&net->dev, vq, head, len);
+ if (unlikely(vq_log))
+ vhost_log_write(vq, vq_log, log, len);
+ total_len += len;
+ if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
+ vhost_poll_queue(&vq->poll);
+ break;
+ }
+ }
+
+ mutex_unlock(&vq->mutex);
+ unuse_mm(net->dev.mm);
+}
+
+static void handle_tx_kick(struct work_struct *work)
+{
+ struct vhost_virtqueue *vq;
+ struct vhost_net *net;
+ vq = container_of(work, struct vhost_virtqueue, poll.work);
+ net = container_of(vq->dev, struct vhost_net, dev);
+ handle_tx(net);
+}
+
+static void handle_rx_kick(struct work_struct *work)
+{
+ struct vhost_virtqueue *vq;
+ struct vhost_net *net;
+ vq = container_of(work, struct vhost_virtqueue, poll.work);
+ net = container_of(vq->dev, struct vhost_net, dev);
+ handle_rx(net);
+}
+
+static void handle_tx_net(struct work_struct *work)
+{
+ struct vhost_net *net;
+ net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_TX].work);
+ handle_tx(net);
+}
+
+static void handle_rx_net(struct work_struct *work)
+{
+ struct vhost_net *net;
+ net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_RX].work);
+ handle_rx(net);
+}
+
+static int vhost_net_open(struct inode *inode, struct file *f)
+{
+ struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL);
+ int r;
+ if (!n)
+ return -ENOMEM;
+ f->private_data = n;
+ n->vqs[VHOST_NET_VQ_TX].handle_kick = handle_tx_kick;
+ n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick;
+ r = vhost_dev_init(&n->dev, n->vqs, VHOST_NET_VQ_MAX);
+ if (r < 0) {
+ kfree(n);
+ return r;
+ }
+
+ vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT);
+ vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN);
+ n->tx_poll_state = VHOST_NET_POLL_DISABLED;
+ return 0;
+}
+
+static void vhost_net_disable_vq(struct vhost_net *n,
+ struct vhost_virtqueue *vq)
+{
+ if (!vq->private_data)
+ return;
+ if (vq == n->vqs + VHOST_NET_VQ_TX) {
+ tx_poll_stop(n);
+ n->tx_poll_state = VHOST_NET_POLL_DISABLED;
+ } else
+ vhost_poll_stop(n->poll + VHOST_NET_VQ_RX);
+}
+
+static void vhost_net_enable_vq(struct vhost_net *n,
+ struct vhost_virtqueue *vq)
+{
+ struct socket *sock = vq->private_data;
+ if (!sock)
+ return;
+ if (vq == n->vqs + VHOST_NET_VQ_TX) {
+ n->tx_poll_state = VHOST_NET_POLL_STOPPED;
+ tx_poll_start(n, sock);
+ } else
+ vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file);
+}
+
+static struct socket *vhost_net_stop_vq(struct vhost_net *n,
+ struct vhost_virtqueue *vq)
+{
+ struct socket *sock;
+
+ mutex_lock(&vq->mutex);
+ sock = vq->private_data;
+ vhost_net_disable_vq(n, vq);
+ rcu_assign_pointer(vq->private_data, NULL);
+ mutex_unlock(&vq->mutex);
+ return sock;
+}
+
+static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock,
+ struct socket **rx_sock)
+{
+ *tx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_TX);
+ *rx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_RX);
+}
+
+static void vhost_net_flush_vq(struct vhost_net *n, int index)
+{
+ vhost_poll_flush(n->poll + index);
+ vhost_poll_flush(&n->dev.vqs[index].poll);
+}
+
+static void vhost_net_flush(struct vhost_net *n)
+{
+ vhost_net_flush_vq(n, VHOST_NET_VQ_TX);
+ vhost_net_flush_vq(n, VHOST_NET_VQ_RX);
+}
+
+static int vhost_net_release(struct inode *inode, struct file *f)
+{
+ struct vhost_net *n = f->private_data;
+ struct socket *tx_sock;
+ struct socket *rx_sock;
+
+ vhost_net_stop(n, &tx_sock, &rx_sock);
+ vhost_net_flush(n);
+ vhost_dev_cleanup(&n->dev);
+ if (tx_sock)
+ fput(tx_sock->file);
+ if (rx_sock)
+ fput(rx_sock->file);
+ /* We do an extra flush before freeing memory,
+ * since jobs can re-queue themselves. */
+ vhost_net_flush(n);
+ kfree(n);
+ return 0;
+}
+
+static struct socket *get_raw_socket(int fd)
+{
+ struct {
+ struct sockaddr_ll sa;
+ char buf[MAX_ADDR_LEN];
+ } uaddr;
+ int uaddr_len = sizeof uaddr, r;
+ struct socket *sock = sockfd_lookup(fd, &r);
+ if (!sock)
+ return ERR_PTR(-ENOTSOCK);
+
+ /* Parameter checking */
+ if (sock->sk->sk_type != SOCK_RAW) {
+ r = -ESOCKTNOSUPPORT;
+ goto err;
+ }
+
+ r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa,
+ &uaddr_len, 0);
+ if (r)
+ goto err;
+
+ if (uaddr.sa.sll_family != AF_PACKET) {
+ r = -EPFNOSUPPORT;
+ goto err;
+ }
+ return sock;
+err:
+ fput(sock->file);
+ return ERR_PTR(r);
+}
+
+static struct socket *get_tun_socket(int fd)
+{
+ struct file *file = fget(fd);
+ struct socket *sock;
+ if (!file)
+ return ERR_PTR(-EBADF);
+ sock = tun_get_socket(file);
+ if (IS_ERR(sock))
+ fput(file);
+ return sock;
+}
+
+static struct socket *get_socket(int fd)
+{
+ struct socket *sock;
+ if (fd == -1)
+ return NULL;
+ sock = get_raw_socket(fd);
+ if (!IS_ERR(sock))
+ return sock;
+ sock = get_tun_socket(fd);
+ if (!IS_ERR(sock))
+ return sock;
+ return ERR_PTR(-ENOTSOCK);
+}
+
+static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
+{
+ struct socket *sock, *oldsock;
+ struct vhost_virtqueue *vq;
+ int r;
+
+ mutex_lock(&n->dev.mutex);
+ r = vhost_dev_check_owner(&n->dev);
+ if (r)
+ goto err;
+
+ if (index >= VHOST_NET_VQ_MAX) {
+ r = -ENOBUFS;
+ goto err;
+ }
+ vq = n->vqs + index;
+ mutex_lock(&vq->mutex);
+ sock = get_socket(fd);
+ if (IS_ERR(sock)) {
+ r = PTR_ERR(sock);
+ goto err;
+ }
+
+ /* start polling new socket */
+ oldsock = vq->private_data;
+ if (sock == oldsock)
+ goto done;
+
+ vhost_net_disable_vq(n, vq);
+ rcu_assign_pointer(vq->private_data, sock);
+ vhost_net_enable_vq(n, vq);
+ mutex_unlock(&vq->mutex);
+done:
+ mutex_unlock(&n->dev.mutex);
+ if (oldsock) {
+ vhost_net_flush_vq(n, index);
+ fput(oldsock->file);
+ }
+ return r;
+err:
+ mutex_unlock(&n->dev.mutex);
+ return r;
+}
+
+static long vhost_net_reset_owner(struct vhost_net *n)
+{
+ struct socket *tx_sock = NULL;
+ struct socket *rx_sock = NULL;
+ long err;
+ mutex_lock(&n->dev.mutex);
+ err = vhost_dev_check_owner(&n->dev);
+ if (err)
+ goto done;
+ vhost_net_stop(n, &tx_sock, &rx_sock);
+ vhost_net_flush(n);
+ err = vhost_dev_reset_owner(&n->dev);
+done:
+ mutex_unlock(&n->dev.mutex);
+ if (tx_sock)
+ fput(tx_sock->file);
+ if (rx_sock)
+ fput(rx_sock->file);
+ return err;
+}
+
+static void vhost_net_set_features(struct vhost_net *n, u64 features)
+{
+ size_t hdr_size = features & (1 << VHOST_NET_F_VIRTIO_NET_HDR) ?
+ sizeof(struct virtio_net_hdr) : 0;
+ int i;
+ mutex_lock(&n->dev.mutex);
+ n->dev.acked_features = features;
+ smp_wmb();
+ for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
+ mutex_lock(&n->vqs[i].mutex);
+ n->vqs[i].hdr_size = hdr_size;
+ mutex_unlock(&n->vqs[i].mutex);
+ }
+ mutex_unlock(&n->dev.mutex);
+ vhost_net_flush(n);
+}
+
+static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
+ unsigned long arg)
+{
+ struct vhost_net *n = f->private_data;
+ void __user *argp = (void __user *)arg;
+ u32 __user *featurep = argp;
+ struct vhost_vring_file backend;
+ u64 features;
+ int r;
+ switch (ioctl) {
+ case VHOST_NET_SET_BACKEND:
+ r = copy_from_user(&backend, argp, sizeof backend);
+ if (r < 0)
+ return r;
+ return vhost_net_set_backend(n, backend.index, backend.fd);
+ case VHOST_GET_FEATURES:
+ features = VHOST_FEATURES;
+ return put_user(features, featurep);
+ case VHOST_SET_FEATURES:
+ r = get_user(features, featurep);
+ /* No features for now */
+ if (r < 0)
+ return r;
+ if (features & ~VHOST_FEATURES)
+ return -EOPNOTSUPP;
+ vhost_net_set_features(n, features);
+ return 0;
+ case VHOST_RESET_OWNER:
+ return vhost_net_reset_owner(n);
+ default:
+ r = vhost_dev_ioctl(&n->dev, ioctl, arg);
+ vhost_net_flush(n);
+ return r;
+ }
+}
+
+#ifdef CONFIG_COMPAT
+static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl,
+ unsigned long arg)
+{
+ return vhost_net_ioctl(f, ioctl, (unsigned long)compat_ptr(arg));
+}
+#endif
+
+const static struct file_operations vhost_net_fops = {
+ .owner = THIS_MODULE,
+ .release = vhost_net_release,
+ .unlocked_ioctl = vhost_net_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = vhost_net_compat_ioctl,
+#endif
+ .open = vhost_net_open,
+};
+
+static struct miscdevice vhost_net_misc = {
+ VHOST_NET_MINOR,
+ "vhost-net",
+ &vhost_net_fops,
+};
+
+int vhost_net_init(void)
+{
+ int r = vhost_init();
+ if (r)
+ goto err_init;
+ r = misc_register(&vhost_net_misc);
+ if (r)
+ goto err_reg;
+ return 0;
+err_reg:
+ vhost_cleanup();
+err_init:
+ return r;
+
+}
+module_init(vhost_net_init);
+
+void vhost_net_exit(void)
+{
+ misc_deregister(&vhost_net_misc);
+ vhost_cleanup();
+}
+module_exit(vhost_net_exit);
+
+MODULE_VERSION("0.0.1");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Michael S. Tsirkin");
+MODULE_DESCRIPTION("Host kernel accelerator for virtio net");
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
new file mode 100644
index 000000000000..e7b4dea515dc
--- /dev/null
+++ b/drivers/vhost/vhost.c
@@ -0,0 +1,968 @@
+/* Copyright (C) 2009 Red Hat, Inc.
+ * Copyright (C) 2006 Rusty Russell IBM Corporation
+ *
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ * Inspiration, some code, and most witty comments come from
+ * Documentation/lguest/lguest.c, by Rusty Russell
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Generic code for virtio server in host kernel.
+ */
+
+#include <linux/eventfd.h>
+#include <linux/vhost.h>
+#include <linux/virtio_net.h>
+#include <linux/mm.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+#include <linux/rcupdate.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/highmem.h>
+
+#include <linux/net.h>
+#include <linux/if_packet.h>
+#include <linux/if_arp.h>
+
+#include <net/sock.h>
+
+#include "vhost.h"
+
+enum {
+ VHOST_MEMORY_MAX_NREGIONS = 64,
+ VHOST_MEMORY_F_LOG = 0x1,
+};
+
+static struct workqueue_struct *vhost_workqueue;
+
+static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
+ poll_table *pt)
+{
+ struct vhost_poll *poll;
+ poll = container_of(pt, struct vhost_poll, table);
+
+ poll->wqh = wqh;
+ add_wait_queue(wqh, &poll->wait);
+}
+
+static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync,
+ void *key)
+{
+ struct vhost_poll *poll;
+ poll = container_of(wait, struct vhost_poll, wait);
+ if (!((unsigned long)key & poll->mask))
+ return 0;
+
+ queue_work(vhost_workqueue, &poll->work);
+ return 0;
+}
+
+/* Init poll structure */
+void vhost_poll_init(struct vhost_poll *poll, work_func_t func,
+ unsigned long mask)
+{
+ INIT_WORK(&poll->work, func);
+ init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup);
+ init_poll_funcptr(&poll->table, vhost_poll_func);
+ poll->mask = mask;
+}
+
+/* Start polling a file. We add ourselves to file's wait queue. The caller must
+ * keep a reference to a file until after vhost_poll_stop is called. */
+void vhost_poll_start(struct vhost_poll *poll, struct file *file)
+{
+ unsigned long mask;
+ mask = file->f_op->poll(file, &poll->table);
+ if (mask)
+ vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
+}
+
+/* Stop polling a file. After this function returns, it becomes safe to drop the
+ * file reference. You must also flush afterwards. */
+void vhost_poll_stop(struct vhost_poll *poll)
+{
+ remove_wait_queue(poll->wqh, &poll->wait);
+}
+
+/* Flush any work that has been scheduled. When calling this, don't hold any
+ * locks that are also used by the callback. */
+void vhost_poll_flush(struct vhost_poll *poll)
+{
+ flush_work(&poll->work);
+}
+
+void vhost_poll_queue(struct vhost_poll *poll)
+{
+ queue_work(vhost_workqueue, &poll->work);
+}
+
+static void vhost_vq_reset(struct vhost_dev *dev,
+ struct vhost_virtqueue *vq)
+{
+ vq->num = 1;
+ vq->desc = NULL;
+ vq->avail = NULL;
+ vq->used = NULL;
+ vq->last_avail_idx = 0;
+ vq->avail_idx = 0;
+ vq->last_used_idx = 0;
+ vq->used_flags = 0;
+ vq->used_flags = 0;
+ vq->log_used = false;
+ vq->log_addr = -1ull;
+ vq->hdr_size = 0;
+ vq->private_data = NULL;
+ vq->log_base = NULL;
+ vq->error_ctx = NULL;
+ vq->error = NULL;
+ vq->kick = NULL;
+ vq->call_ctx = NULL;
+ vq->call = NULL;
+}
+
+long vhost_dev_init(struct vhost_dev *dev,
+ struct vhost_virtqueue *vqs, int nvqs)
+{
+ int i;
+ dev->vqs = vqs;
+ dev->nvqs = nvqs;
+ mutex_init(&dev->mutex);
+ dev->log_ctx = NULL;
+ dev->log_file = NULL;
+ dev->memory = NULL;
+ dev->mm = NULL;
+
+ for (i = 0; i < dev->nvqs; ++i) {
+ dev->vqs[i].dev = dev;
+ mutex_init(&dev->vqs[i].mutex);
+ vhost_vq_reset(dev, dev->vqs + i);
+ if (dev->vqs[i].handle_kick)
+ vhost_poll_init(&dev->vqs[i].poll,
+ dev->vqs[i].handle_kick,
+ POLLIN);
+ }
+ return 0;
+}
+
+/* Caller should have device mutex */
+long vhost_dev_check_owner(struct vhost_dev *dev)
+{
+ /* Are you the owner? If not, I don't think you mean to do that */
+ return dev->mm == current->mm ? 0 : -EPERM;
+}
+
+/* Caller should have device mutex */
+static long vhost_dev_set_owner(struct vhost_dev *dev)
+{
+ /* Is there an owner already? */
+ if (dev->mm)
+ return -EBUSY;
+ /* No owner, become one */
+ dev->mm = get_task_mm(current);
+ return 0;
+}
+
+/* Caller should have device mutex */
+long vhost_dev_reset_owner(struct vhost_dev *dev)
+{
+ struct vhost_memory *memory;
+
+ /* Restore memory to default 1:1 mapping. */
+ memory = kmalloc(offsetof(struct vhost_memory, regions) +
+ 2 * sizeof *memory->regions, GFP_KERNEL);
+ if (!memory)
+ return -ENOMEM;
+
+ vhost_dev_cleanup(dev);
+
+ memory->nregions = 2;
+ memory->regions[0].guest_phys_addr = 1;
+ memory->regions[0].userspace_addr = 1;
+ memory->regions[0].memory_size = ~0ULL;
+ memory->regions[1].guest_phys_addr = 0;
+ memory->regions[1].userspace_addr = 0;
+ memory->regions[1].memory_size = 1;
+ dev->memory = memory;
+ return 0;
+}
+
+/* Caller should have device mutex */
+void vhost_dev_cleanup(struct vhost_dev *dev)
+{
+ int i;
+ for (i = 0; i < dev->nvqs; ++i) {
+ if (dev->vqs[i].kick && dev->vqs[i].handle_kick) {
+ vhost_poll_stop(&dev->vqs[i].poll);
+ vhost_poll_flush(&dev->vqs[i].poll);
+ }
+ if (dev->vqs[i].error_ctx)
+ eventfd_ctx_put(dev->vqs[i].error_ctx);
+ if (dev->vqs[i].error)
+ fput(dev->vqs[i].error);
+ if (dev->vqs[i].kick)
+ fput(dev->vqs[i].kick);
+ if (dev->vqs[i].call_ctx)
+ eventfd_ctx_put(dev->vqs[i].call_ctx);
+ if (dev->vqs[i].call)
+ fput(dev->vqs[i].call);
+ vhost_vq_reset(dev, dev->vqs + i);
+ }
+ if (dev->log_ctx)
+ eventfd_ctx_put(dev->log_ctx);
+ dev->log_ctx = NULL;
+ if (dev->log_file)
+ fput(dev->log_file);
+ dev->log_file = NULL;
+ /* No one will access memory at this point */
+ kfree(dev->memory);
+ dev->memory = NULL;
+ if (dev->mm)
+ mmput(dev->mm);
+ dev->mm = NULL;
+}
+
+static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
+{
+ struct vhost_memory mem, *newmem, *oldmem;
+ unsigned long size = offsetof(struct vhost_memory, regions);
+ long r;
+ r = copy_from_user(&mem, m, size);
+ if (r)
+ return r;
+ if (mem.padding)
+ return -EOPNOTSUPP;
+ if (mem.nregions > VHOST_MEMORY_MAX_NREGIONS)
+ return -E2BIG;
+ newmem = kmalloc(size + mem.nregions * sizeof *m->regions, GFP_KERNEL);
+ if (!newmem)
+ return -ENOMEM;
+
+ memcpy(newmem, &mem, size);
+ r = copy_from_user(newmem->regions, m->regions,
+ mem.nregions * sizeof *m->regions);
+ if (r) {
+ kfree(newmem);
+ return r;
+ }
+ oldmem = d->memory;
+ rcu_assign_pointer(d->memory, newmem);
+ synchronize_rcu();
+ kfree(oldmem);
+ return 0;
+}
+
+static int init_used(struct vhost_virtqueue *vq,
+ struct vring_used __user *used)
+{
+ int r = put_user(vq->used_flags, &used->flags);
+ if (r)
+ return r;
+ return get_user(vq->last_used_idx, &used->idx);
+}
+
+static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
+{
+ struct file *eventfp, *filep = NULL,
+ *pollstart = NULL, *pollstop = NULL;
+ struct eventfd_ctx *ctx = NULL;
+ u32 __user *idxp = argp;
+ struct vhost_virtqueue *vq;
+ struct vhost_vring_state s;
+ struct vhost_vring_file f;
+ struct vhost_vring_addr a;
+ u32 idx;
+ long r;
+
+ r = get_user(idx, idxp);
+ if (r < 0)
+ return r;
+ if (idx > d->nvqs)
+ return -ENOBUFS;
+
+ vq = d->vqs + idx;
+
+ mutex_lock(&vq->mutex);
+
+ switch (ioctl) {
+ case VHOST_SET_VRING_NUM:
+ r = copy_from_user(&s, argp, sizeof s);
+ if (r < 0)
+ break;
+ if (!s.num || s.num > 0xffff || (s.num & (s.num - 1))) {
+ r = -EINVAL;
+ break;
+ }
+ vq->num = s.num;
+ break;
+ case VHOST_SET_VRING_BASE:
+ r = copy_from_user(&s, argp, sizeof s);
+ if (r < 0)
+ break;
+ if (s.num > 0xffff) {
+ r = -EINVAL;
+ break;
+ }
+ vq->last_avail_idx = s.num;
+ /* Forget the cached index value. */
+ vq->avail_idx = vq->last_avail_idx;
+ break;
+ case VHOST_GET_VRING_BASE:
+ s.index = idx;
+ s.num = vq->last_avail_idx;
+ r = copy_to_user(argp, &s, sizeof s);
+ break;
+ case VHOST_SET_VRING_ADDR:
+ r = copy_from_user(&a, argp, sizeof a);
+ if (r < 0)
+ break;
+ if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) {
+ r = -EOPNOTSUPP;
+ break;
+ }
+ /* For 32bit, verify that the top 32bits of the user
+ data are set to zero. */
+ if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr ||
+ (u64)(unsigned long)a.used_user_addr != a.used_user_addr ||
+ (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr) {
+ r = -EFAULT;
+ break;
+ }
+ if ((a.avail_user_addr & (sizeof *vq->avail->ring - 1)) ||
+ (a.used_user_addr & (sizeof *vq->used->ring - 1)) ||
+ (a.log_guest_addr & (sizeof *vq->used->ring - 1))) {
+ r = -EINVAL;
+ break;
+ }
+ r = init_used(vq, (struct vring_used __user *)(unsigned long)
+ a.used_user_addr);
+ if (r)
+ break;
+ vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG));
+ vq->desc = (void __user *)(unsigned long)a.desc_user_addr;
+ vq->avail = (void __user *)(unsigned long)a.avail_user_addr;
+ vq->log_addr = a.log_guest_addr;
+ vq->used = (void __user *)(unsigned long)a.used_user_addr;
+ break;
+ case VHOST_SET_VRING_KICK:
+ r = copy_from_user(&f, argp, sizeof f);
+ if (r < 0)
+ break;
+ eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd);
+ if (IS_ERR(eventfp))
+ return PTR_ERR(eventfp);
+ if (eventfp != vq->kick) {
+ pollstop = filep = vq->kick;
+ pollstart = vq->kick = eventfp;
+ } else
+ filep = eventfp;
+ break;
+ case VHOST_SET_VRING_CALL:
+ r = copy_from_user(&f, argp, sizeof f);
+ if (r < 0)
+ break;
+ eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd);
+ if (IS_ERR(eventfp))
+ return PTR_ERR(eventfp);
+ if (eventfp != vq->call) {
+ filep = vq->call;
+ ctx = vq->call_ctx;
+ vq->call = eventfp;
+ vq->call_ctx = eventfp ?
+ eventfd_ctx_fileget(eventfp) : NULL;
+ } else
+ filep = eventfp;
+ break;
+ case VHOST_SET_VRING_ERR:
+ r = copy_from_user(&f, argp, sizeof f);
+ if (r < 0)
+ break;
+ eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd);
+ if (IS_ERR(eventfp))
+ return PTR_ERR(eventfp);
+ if (eventfp != vq->error) {
+ filep = vq->error;
+ vq->error = eventfp;
+ ctx = vq->error_ctx;
+ vq->error_ctx = eventfp ?
+ eventfd_ctx_fileget(eventfp) : NULL;
+ } else
+ filep = eventfp;
+ break;
+ default:
+ r = -ENOIOCTLCMD;
+ }
+
+ if (pollstop && vq->handle_kick)
+ vhost_poll_stop(&vq->poll);
+
+ if (ctx)
+ eventfd_ctx_put(ctx);
+ if (filep)
+ fput(filep);
+
+ if (pollstart && vq->handle_kick)
+ vhost_poll_start(&vq->poll, vq->kick);
+
+ mutex_unlock(&vq->mutex);
+
+ if (pollstop && vq->handle_kick)
+ vhost_poll_flush(&vq->poll);
+ return r;
+}
+
+long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg)
+{
+ void __user *argp = (void __user *)arg;
+ struct file *eventfp, *filep = NULL;
+ struct eventfd_ctx *ctx = NULL;
+ u64 p;
+ long r;
+ int i, fd;
+
+ mutex_lock(&d->mutex);
+ /* If you are not the owner, you can become one */
+ if (ioctl == VHOST_SET_OWNER) {
+ r = vhost_dev_set_owner(d);
+ goto done;
+ }
+
+ /* You must be the owner to do anything else */
+ r = vhost_dev_check_owner(d);
+ if (r)
+ goto done;
+
+ switch (ioctl) {
+ case VHOST_SET_MEM_TABLE:
+ r = vhost_set_memory(d, argp);
+ break;
+ case VHOST_SET_LOG_BASE:
+ r = copy_from_user(&p, argp, sizeof p);
+ if (r < 0)
+ break;
+ if ((u64)(unsigned long)p != p) {
+ r = -EFAULT;
+ break;
+ }
+ for (i = 0; i < d->nvqs; ++i) {
+ mutex_lock(&d->vqs[i].mutex);
+ d->vqs[i].log_base = (void __user *)(unsigned long)p;
+ mutex_unlock(&d->vqs[i].mutex);
+ }
+ break;
+ case VHOST_SET_LOG_FD:
+ r = get_user(fd, (int __user *)argp);
+ if (r < 0)
+ break;
+ eventfp = fd == -1 ? NULL : eventfd_fget(fd);
+ if (IS_ERR(eventfp)) {
+ r = PTR_ERR(eventfp);
+ break;
+ }
+ if (eventfp != d->log_file) {
+ filep = d->log_file;
+ ctx = d->log_ctx;
+ d->log_ctx = eventfp ?
+ eventfd_ctx_fileget(eventfp) : NULL;
+ } else
+ filep = eventfp;
+ for (i = 0; i < d->nvqs; ++i) {
+ mutex_lock(&d->vqs[i].mutex);
+ d->vqs[i].log_ctx = d->log_ctx;
+ mutex_unlock(&d->vqs[i].mutex);
+ }
+ if (ctx)
+ eventfd_ctx_put(ctx);
+ if (filep)
+ fput(filep);
+ break;
+ default:
+ r = vhost_set_vring(d, ioctl, argp);
+ break;
+ }
+done:
+ mutex_unlock(&d->mutex);
+ return r;
+}
+
+static const struct vhost_memory_region *find_region(struct vhost_memory *mem,
+ __u64 addr, __u32 len)
+{
+ struct vhost_memory_region *reg;
+ int i;
+ /* linear search is not brilliant, but we really have on the order of 6
+ * regions in practice */
+ for (i = 0; i < mem->nregions; ++i) {
+ reg = mem->regions + i;
+ if (reg->guest_phys_addr <= addr &&
+ reg->guest_phys_addr + reg->memory_size - 1 >= addr)
+ return reg;
+ }
+ return NULL;
+}
+
+/* TODO: This is really inefficient. We need something like get_user()
+ * (instruction directly accesses the data, with an exception table entry
+ * returning -EFAULT). See Documentation/x86/exception-tables.txt.
+ */
+static int set_bit_to_user(int nr, void __user *addr)
+{
+ unsigned long log = (unsigned long)addr;
+ struct page *page;
+ void *base;
+ int bit = nr + (log % PAGE_SIZE) * 8;
+ int r;
+ r = get_user_pages_fast(log, 1, 1, &page);
+ if (r)
+ return r;
+ base = kmap_atomic(page, KM_USER0);
+ set_bit(bit, base);
+ kunmap_atomic(base, KM_USER0);
+ set_page_dirty_lock(page);
+ put_page(page);
+ return 0;
+}
+
+static int log_write(void __user *log_base,
+ u64 write_address, u64 write_length)
+{
+ int r;
+ if (!write_length)
+ return 0;
+ write_address /= VHOST_PAGE_SIZE;
+ for (;;) {
+ u64 base = (u64)(unsigned long)log_base;
+ u64 log = base + write_address / 8;
+ int bit = write_address % 8;
+ if ((u64)(unsigned long)log != log)
+ return -EFAULT;
+ r = set_bit_to_user(bit, (void __user *)(unsigned long)log);
+ if (r < 0)
+ return r;
+ if (write_length <= VHOST_PAGE_SIZE)
+ break;
+ write_length -= VHOST_PAGE_SIZE;
+ write_address += VHOST_PAGE_SIZE;
+ }
+ return r;
+}
+
+int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
+ unsigned int log_num, u64 len)
+{
+ int i, r;
+
+ /* Make sure data written is seen before log. */
+ wmb();
+ for (i = 0; i < log_num; ++i) {
+ u64 l = min(log[i].len, len);
+ r = log_write(vq->log_base, log[i].addr, l);
+ if (r < 0)
+ return r;
+ len -= l;
+ if (!len)
+ return 0;
+ }
+ if (vq->log_ctx)
+ eventfd_signal(vq->log_ctx, 1);
+ /* Length written exceeds what we have stored. This is a bug. */
+ BUG();
+ return 0;
+}
+
+int translate_desc(struct vhost_dev *dev, u64 addr, u32 len,
+ struct iovec iov[], int iov_size)
+{
+ const struct vhost_memory_region *reg;
+ struct vhost_memory *mem;
+ struct iovec *_iov;
+ u64 s = 0;
+ int ret = 0;
+
+ rcu_read_lock();
+
+ mem = rcu_dereference(dev->memory);
+ while ((u64)len > s) {
+ u64 size;
+ if (ret >= iov_size) {
+ ret = -ENOBUFS;
+ break;
+ }
+ reg = find_region(mem, addr, len);
+ if (!reg) {
+ ret = -EFAULT;
+ break;
+ }
+ _iov = iov + ret;
+ size = reg->memory_size - addr + reg->guest_phys_addr;
+ _iov->iov_len = min((u64)len, size);
+ _iov->iov_base = (void *)(unsigned long)
+ (reg->userspace_addr + addr - reg->guest_phys_addr);
+ s += size;
+ addr += size;
+ ++ret;
+ }
+
+ rcu_read_unlock();
+ return ret;
+}
+
+/* Each buffer in the virtqueues is actually a chain of descriptors. This
+ * function returns the next descriptor in the chain,
+ * or -1U if we're at the end. */
+static unsigned next_desc(struct vring_desc *desc)
+{
+ unsigned int next;
+
+ /* If this descriptor says it doesn't chain, we're done. */
+ if (!(desc->flags & VRING_DESC_F_NEXT))
+ return -1U;
+
+ /* Check they're not leading us off end of descriptors. */
+ next = desc->next;
+ /* Make sure compiler knows to grab that: we don't want it changing! */
+ /* We will use the result as an index in an array, so most
+ * architectures only need a compiler barrier here. */
+ read_barrier_depends();
+
+ return next;
+}
+
+static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
+ struct iovec iov[], unsigned int iov_size,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vhost_log *log, unsigned int *log_num,
+ struct vring_desc *indirect)
+{
+ struct vring_desc desc;
+ unsigned int i = 0, count, found = 0;
+ int ret;
+
+ /* Sanity check */
+ if (indirect->len % sizeof desc) {
+ vq_err(vq, "Invalid length in indirect descriptor: "
+ "len 0x%llx not multiple of 0x%zx\n",
+ (unsigned long long)indirect->len,
+ sizeof desc);
+ return -EINVAL;
+ }
+
+ ret = translate_desc(dev, indirect->addr, indirect->len, vq->indirect,
+ ARRAY_SIZE(vq->indirect));
+ if (ret < 0) {
+ vq_err(vq, "Translation failure %d in indirect.\n", ret);
+ return ret;
+ }
+
+ /* We will use the result as an address to read from, so most
+ * architectures only need a compiler barrier here. */
+ read_barrier_depends();
+
+ count = indirect->len / sizeof desc;
+ /* Buffers are chained via a 16 bit next field, so
+ * we can have at most 2^16 of these. */
+ if (count > USHORT_MAX + 1) {
+ vq_err(vq, "Indirect buffer length too big: %d\n",
+ indirect->len);
+ return -E2BIG;
+ }
+
+ do {
+ unsigned iov_count = *in_num + *out_num;
+ if (++found > count) {
+ vq_err(vq, "Loop detected: last one at %u "
+ "indirect size %u\n",
+ i, count);
+ return -EINVAL;
+ }
+ if (memcpy_fromiovec((unsigned char *)&desc, vq->indirect,
+ sizeof desc)) {
+ vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n",
+ i, (size_t)indirect->addr + i * sizeof desc);
+ return -EINVAL;
+ }
+ if (desc.flags & VRING_DESC_F_INDIRECT) {
+ vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n",
+ i, (size_t)indirect->addr + i * sizeof desc);
+ return -EINVAL;
+ }
+
+ ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count,
+ iov_size - iov_count);
+ if (ret < 0) {
+ vq_err(vq, "Translation failure %d indirect idx %d\n",
+ ret, i);
+ return ret;
+ }
+ /* If this is an input descriptor, increment that count. */
+ if (desc.flags & VRING_DESC_F_WRITE) {
+ *in_num += ret;
+ if (unlikely(log)) {
+ log[*log_num].addr = desc.addr;
+ log[*log_num].len = desc.len;
+ ++*log_num;
+ }
+ } else {
+ /* If it's an output descriptor, they're all supposed
+ * to come before any input descriptors. */
+ if (*in_num) {
+ vq_err(vq, "Indirect descriptor "
+ "has out after in: idx %d\n", i);
+ return -EINVAL;
+ }
+ *out_num += ret;
+ }
+ } while ((i = next_desc(&desc)) != -1);
+ return 0;
+}
+
+/* This looks in the virtqueue and for the first available buffer, and converts
+ * it to an iovec for convenient access. Since descriptors consist of some
+ * number of output then some number of input descriptors, it's actually two
+ * iovecs, but we pack them into one and note how many of each there were.
+ *
+ * This function returns the descriptor number found, or vq->num (which
+ * is never a valid descriptor number) if none was found. */
+unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
+ struct iovec iov[], unsigned int iov_size,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vhost_log *log, unsigned int *log_num)
+{
+ struct vring_desc desc;
+ unsigned int i, head, found = 0;
+ u16 last_avail_idx;
+ int ret;
+
+ /* Check it isn't doing very strange things with descriptor numbers. */
+ last_avail_idx = vq->last_avail_idx;
+ if (get_user(vq->avail_idx, &vq->avail->idx)) {
+ vq_err(vq, "Failed to access avail idx at %p\n",
+ &vq->avail->idx);
+ return vq->num;
+ }
+
+ if ((u16)(vq->avail_idx - last_avail_idx) > vq->num) {
+ vq_err(vq, "Guest moved used index from %u to %u",
+ last_avail_idx, vq->avail_idx);
+ return vq->num;
+ }
+
+ /* If there's nothing new since last we looked, return invalid. */
+ if (vq->avail_idx == last_avail_idx)
+ return vq->num;
+
+ /* Only get avail ring entries after they have been exposed by guest. */
+ rmb();
+
+ /* Grab the next descriptor number they're advertising, and increment
+ * the index we've seen. */
+ if (get_user(head, &vq->avail->ring[last_avail_idx % vq->num])) {
+ vq_err(vq, "Failed to read head: idx %d address %p\n",
+ last_avail_idx,
+ &vq->avail->ring[last_avail_idx % vq->num]);
+ return vq->num;
+ }
+
+ /* If their number is silly, that's an error. */
+ if (head >= vq->num) {
+ vq_err(vq, "Guest says index %u > %u is available",
+ head, vq->num);
+ return vq->num;
+ }
+
+ /* When we start there are none of either input nor output. */
+ *out_num = *in_num = 0;
+ if (unlikely(log))
+ *log_num = 0;
+
+ i = head;
+ do {
+ unsigned iov_count = *in_num + *out_num;
+ if (i >= vq->num) {
+ vq_err(vq, "Desc index is %u > %u, head = %u",
+ i, vq->num, head);
+ return vq->num;
+ }
+ if (++found > vq->num) {
+ vq_err(vq, "Loop detected: last one at %u "
+ "vq size %u head %u\n",
+ i, vq->num, head);
+ return vq->num;
+ }
+ ret = copy_from_user(&desc, vq->desc + i, sizeof desc);
+ if (ret) {
+ vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
+ i, vq->desc + i);
+ return vq->num;
+ }
+ if (desc.flags & VRING_DESC_F_INDIRECT) {
+ ret = get_indirect(dev, vq, iov, iov_size,
+ out_num, in_num,
+ log, log_num, &desc);
+ if (ret < 0) {
+ vq_err(vq, "Failure detected "
+ "in indirect descriptor at idx %d\n", i);
+ return vq->num;
+ }
+ continue;
+ }
+
+ ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count,
+ iov_size - iov_count);
+ if (ret < 0) {
+ vq_err(vq, "Translation failure %d descriptor idx %d\n",
+ ret, i);
+ return vq->num;
+ }
+ if (desc.flags & VRING_DESC_F_WRITE) {
+ /* If this is an input descriptor,
+ * increment that count. */
+ *in_num += ret;
+ if (unlikely(log)) {
+ log[*log_num].addr = desc.addr;
+ log[*log_num].len = desc.len;
+ ++*log_num;
+ }
+ } else {
+ /* If it's an output descriptor, they're all supposed
+ * to come before any input descriptors. */
+ if (*in_num) {
+ vq_err(vq, "Descriptor has out after in: "
+ "idx %d\n", i);
+ return vq->num;
+ }
+ *out_num += ret;
+ }
+ } while ((i = next_desc(&desc)) != -1);
+
+ /* On success, increment avail index. */
+ vq->last_avail_idx++;
+ return head;
+}
+
+/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
+void vhost_discard_vq_desc(struct vhost_virtqueue *vq)
+{
+ vq->last_avail_idx--;
+}
+
+/* After we've used one of their buffers, we tell them about it. We'll then
+ * want to notify the guest, using eventfd. */
+int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
+{
+ struct vring_used_elem *used;
+
+ /* The virtqueue contains a ring of used buffers. Get a pointer to the
+ * next entry in that used ring. */
+ used = &vq->used->ring[vq->last_used_idx % vq->num];
+ if (put_user(head, &used->id)) {
+ vq_err(vq, "Failed to write used id");
+ return -EFAULT;
+ }
+ if (put_user(len, &used->len)) {
+ vq_err(vq, "Failed to write used len");
+ return -EFAULT;
+ }
+ /* Make sure buffer is written before we update index. */
+ wmb();
+ if (put_user(vq->last_used_idx + 1, &vq->used->idx)) {
+ vq_err(vq, "Failed to increment used idx");
+ return -EFAULT;
+ }
+ if (unlikely(vq->log_used)) {
+ /* Make sure data is seen before log. */
+ wmb();
+ log_write(vq->log_base, vq->log_addr + sizeof *vq->used->ring *
+ (vq->last_used_idx % vq->num),
+ sizeof *vq->used->ring);
+ log_write(vq->log_base, vq->log_addr, sizeof *vq->used->ring);
+ if (vq->log_ctx)
+ eventfd_signal(vq->log_ctx, 1);
+ }
+ vq->last_used_idx++;
+ return 0;
+}
+
+/* This actually signals the guest, using eventfd. */
+void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+{
+ __u16 flags = 0;
+ if (get_user(flags, &vq->avail->flags)) {
+ vq_err(vq, "Failed to get flags");
+ return;
+ }
+
+ /* If they don't want an interrupt, don't signal, unless empty. */
+ if ((flags & VRING_AVAIL_F_NO_INTERRUPT) &&
+ (vq->avail_idx != vq->last_avail_idx ||
+ !vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY)))
+ return;
+
+ /* Signal the Guest tell them we used something up. */
+ if (vq->call_ctx)
+ eventfd_signal(vq->call_ctx, 1);
+}
+
+/* And here's the combo meal deal. Supersize me! */
+void vhost_add_used_and_signal(struct vhost_dev *dev,
+ struct vhost_virtqueue *vq,
+ unsigned int head, int len)
+{
+ vhost_add_used(vq, head, len);
+ vhost_signal(dev, vq);
+}
+
+/* OK, now we need to know about added descriptors. */
+bool vhost_enable_notify(struct vhost_virtqueue *vq)
+{
+ u16 avail_idx;
+ int r;
+ if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
+ return false;
+ vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
+ r = put_user(vq->used_flags, &vq->used->flags);
+ if (r) {
+ vq_err(vq, "Failed to enable notification at %p: %d\n",
+ &vq->used->flags, r);
+ return false;
+ }
+ /* They could have slipped one in as we were doing that: make
+ * sure it's written, then check again. */
+ mb();
+ r = get_user(avail_idx, &vq->avail->idx);
+ if (r) {
+ vq_err(vq, "Failed to check avail idx at %p: %d\n",
+ &vq->avail->idx, r);
+ return false;
+ }
+
+ return avail_idx != vq->last_avail_idx;
+}
+
+/* We don't need to be notified again. */
+void vhost_disable_notify(struct vhost_virtqueue *vq)
+{
+ int r;
+ if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
+ return;
+ vq->used_flags |= VRING_USED_F_NO_NOTIFY;
+ r = put_user(vq->used_flags, &vq->used->flags);
+ if (r)
+ vq_err(vq, "Failed to enable notification at %p: %d\n",
+ &vq->used->flags, r);
+}
+
+int vhost_init(void)
+{
+ vhost_workqueue = create_singlethread_workqueue("vhost");
+ if (!vhost_workqueue)
+ return -ENOMEM;
+ return 0;
+}
+
+void vhost_cleanup(void)
+{
+ destroy_workqueue(vhost_workqueue);
+}
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
new file mode 100644
index 000000000000..d1f045376017
--- /dev/null
+++ b/drivers/vhost/vhost.h
@@ -0,0 +1,159 @@
+#ifndef _VHOST_H
+#define _VHOST_H
+
+#include <linux/eventfd.h>
+#include <linux/vhost.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/skbuff.h>
+#include <linux/uio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
+
+struct vhost_device;
+
+enum {
+ /* Enough place for all fragments, head, and virtio net header. */
+ VHOST_NET_MAX_SG = MAX_SKB_FRAGS + 2,
+};
+
+/* Poll a file (eventfd or socket) */
+/* Note: there's nothing vhost specific about this structure. */
+struct vhost_poll {
+ poll_table table;
+ wait_queue_head_t *wqh;
+ wait_queue_t wait;
+ /* struct which will handle all actual work. */
+ struct work_struct work;
+ unsigned long mask;
+};
+
+void vhost_poll_init(struct vhost_poll *poll, work_func_t func,
+ unsigned long mask);
+void vhost_poll_start(struct vhost_poll *poll, struct file *file);
+void vhost_poll_stop(struct vhost_poll *poll);
+void vhost_poll_flush(struct vhost_poll *poll);
+void vhost_poll_queue(struct vhost_poll *poll);
+
+struct vhost_log {
+ u64 addr;
+ u64 len;
+};
+
+/* The virtqueue structure describes a queue attached to a device. */
+struct vhost_virtqueue {
+ struct vhost_dev *dev;
+
+ /* The actual ring of buffers. */
+ struct mutex mutex;
+ unsigned int num;
+ struct vring_desc __user *desc;
+ struct vring_avail __user *avail;
+ struct vring_used __user *used;
+ struct file *kick;
+ struct file *call;
+ struct file *error;
+ struct eventfd_ctx *call_ctx;
+ struct eventfd_ctx *error_ctx;
+ struct eventfd_ctx *log_ctx;
+
+ struct vhost_poll poll;
+
+ /* The routine to call when the Guest pings us, or timeout. */
+ work_func_t handle_kick;
+
+ /* Last available index we saw. */
+ u16 last_avail_idx;
+
+ /* Caches available index value from user. */
+ u16 avail_idx;
+
+ /* Last index we used. */
+ u16 last_used_idx;
+
+ /* Used flags */
+ u16 used_flags;
+
+ /* Log writes to used structure. */
+ bool log_used;
+ u64 log_addr;
+
+ struct iovec indirect[VHOST_NET_MAX_SG];
+ struct iovec iov[VHOST_NET_MAX_SG];
+ struct iovec hdr[VHOST_NET_MAX_SG];
+ size_t hdr_size;
+ /* We use a kind of RCU to access private pointer.
+ * All readers access it from workqueue, which makes it possible to
+ * flush the workqueue instead of synchronize_rcu. Therefore readers do
+ * not need to call rcu_read_lock/rcu_read_unlock: the beginning of
+ * work item execution acts instead of rcu_read_lock() and the end of
+ * work item execution acts instead of rcu_read_lock().
+ * Writers use virtqueue mutex. */
+ void *private_data;
+ /* Log write descriptors */
+ void __user *log_base;
+ struct vhost_log log[VHOST_NET_MAX_SG];
+};
+
+struct vhost_dev {
+ /* Readers use RCU to access memory table pointer
+ * log base pointer and features.
+ * Writers use mutex below.*/
+ struct vhost_memory *memory;
+ struct mm_struct *mm;
+ struct mutex mutex;
+ unsigned acked_features;
+ struct vhost_virtqueue *vqs;
+ int nvqs;
+ struct file *log_file;
+ struct eventfd_ctx *log_ctx;
+};
+
+long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue *vqs, int nvqs);
+long vhost_dev_check_owner(struct vhost_dev *);
+long vhost_dev_reset_owner(struct vhost_dev *);
+void vhost_dev_cleanup(struct vhost_dev *);
+long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, unsigned long arg);
+
+unsigned vhost_get_vq_desc(struct vhost_dev *, struct vhost_virtqueue *,
+ struct iovec iov[], unsigned int iov_count,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vhost_log *log, unsigned int *log_num);
+void vhost_discard_vq_desc(struct vhost_virtqueue *);
+
+int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
+void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
+void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *,
+ unsigned int head, int len);
+void vhost_disable_notify(struct vhost_virtqueue *);
+bool vhost_enable_notify(struct vhost_virtqueue *);
+
+int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
+ unsigned int log_num, u64 len);
+
+int vhost_init(void);
+void vhost_cleanup(void);
+
+#define vq_err(vq, fmt, ...) do { \
+ pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
+ if ((vq)->error_ctx) \
+ eventfd_signal((vq)->error_ctx, 1);\
+ } while (0)
+
+enum {
+ VHOST_FEATURES = (1 << VIRTIO_F_NOTIFY_ON_EMPTY) |
+ (1 << VIRTIO_RING_F_INDIRECT_DESC) |
+ (1 << VHOST_F_LOG_ALL) |
+ (1 << VHOST_NET_F_VIRTIO_NET_HDR),
+};
+
+static inline int vhost_has_feature(struct vhost_dev *dev, int bit)
+{
+ unsigned acked_features = rcu_dereference(dev->acked_features);
+ return acked_features & (1 << bit);
+}
+
+#endif
diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
index 54fbb2995a5f..156c661b2912 100644
--- a/drivers/video/uvesafb.c
+++ b/drivers/video/uvesafb.c
@@ -1976,8 +1976,7 @@ static void __devexit uvesafb_exit(void)
module_exit(uvesafb_exit);
-#define param_get_scroll NULL
-static int param_set_scroll(const char *val, struct kernel_param *kp)
+static int param_set_scroll(const char *val, const struct kernel_param *kp)
{
ypan = 0;
@@ -1992,7 +1991,9 @@ static int param_set_scroll(const char *val, struct kernel_param *kp)
return 0;
}
-
+static struct kernel_param_ops param_ops_scroll = {
+ .set = param_set_scroll,
+};
#define param_check_scroll(name, p) __param_check(name, p, void)
module_param_named(scroll, ypan, scroll, 0);
diff --git a/drivers/video/vt8623fb.c b/drivers/video/vt8623fb.c
index 3df17dc8c3d7..8bfa47d5a5c0 100644
--- a/drivers/video/vt8623fb.c
+++ b/drivers/video/vt8623fb.c
@@ -727,7 +727,9 @@ static int __devinit vt8623_pci_probe(struct pci_dev *dev, const struct pci_devi
/* Prepare startup mode */
+ kparam_block_sysfs_write(mode_option);
rc = fb_find_mode(&(info->var), info, mode_option, NULL, 0, NULL, 8);
+ kparam_unblock_sysfs_write(mode_option);
if (! ((rc == 1) || (rc == 2))) {
rc = -EINVAL;
dev_err(info->device, "mode %s not found\n", mode_option);