diff options
Diffstat (limited to 'drivers')
30 files changed, 2073 insertions, 106 deletions
diff --git a/drivers/Makefile b/drivers/Makefile index 6ee53c7a57a1..81e36596b1e9 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -106,6 +106,7 @@ obj-$(CONFIG_HID) += hid/ obj-$(CONFIG_PPC_PS3) += ps3/ obj-$(CONFIG_OF) += of/ obj-$(CONFIG_SSB) += ssb/ +obj-$(CONFIG_VHOST_NET) += vhost/ obj-$(CONFIG_VIRTIO) += virtio/ obj-$(CONFIG_VLYNQ) += vlynq/ obj-$(CONFIG_STAGING) += staging/ diff --git a/drivers/acpi/debug.c b/drivers/acpi/debug.c index 8a690c3b8e23..941469801322 100644 --- a/drivers/acpi/debug.c +++ b/drivers/acpi/debug.c @@ -94,7 +94,8 @@ static const struct acpi_dlevel acpi_debug_levels[] = { /* -------------------------------------------------------------------------- FS Interface (/sys) -------------------------------------------------------------------------- */ -static int param_get_debug_layer(char *buffer, struct kernel_param *kp) { +static int param_get_debug_layer(char *buffer, const struct kernel_param *kp) +{ int result = 0; int i; @@ -116,7 +117,8 @@ static int param_get_debug_layer(char *buffer, struct kernel_param *kp) { return result; } -static int param_get_debug_level(char *buffer, struct kernel_param *kp) { +static int param_get_debug_level(char *buffer, const struct kernel_param *kp) +{ int result = 0; int i; @@ -135,8 +137,18 @@ static int param_get_debug_level(char *buffer, struct kernel_param *kp) { return result; } -module_param_call(debug_layer, param_set_uint, param_get_debug_layer, &acpi_dbg_layer, 0644); -module_param_call(debug_level, param_set_uint, param_get_debug_level, &acpi_dbg_level, 0644); +static struct kernel_param_ops acpi_debug_layer_ops = { + .set = param_set_uint, + .get = param_get_debug_layer, +}; + +static struct kernel_param_ops acpi_debug_level_ops = { + .set = param_set_uint, + .get = param_get_debug_level, +}; + +module_param_cb(debug_layer, &acpi_debug_layer_ops, &acpi_dbg_layer, 0644); +module_param_cb(debug_level, &acpi_debug_level_ops, &acpi_dbg_level, 0644); static char trace_method_name[6]; module_param_string(trace_method_name, trace_method_name, 6, 0644); @@ -145,7 +157,7 @@ module_param(trace_debug_layer, uint, 0644); static unsigned int trace_debug_level; module_param(trace_debug_level, uint, 0644); -static int param_set_trace_state(const char *val, struct kernel_param *kp) +static int param_set_trace_state(const char *val, const struct kernel_param *kp) { int result = 0; @@ -179,7 +191,7 @@ exit: return result; } -static int param_get_trace_state(char *buffer, struct kernel_param *kp) +static int param_get_trace_state(char *buffer, const struct kernel_param *kp) { if (!acpi_gbl_trace_method_name) return sprintf(buffer, "disable"); @@ -192,8 +204,12 @@ static int param_get_trace_state(char *buffer, struct kernel_param *kp) return 0; } -module_param_call(trace_state, param_set_trace_state, param_get_trace_state, - NULL, 0644); +static struct kernel_param_ops param_ops_trace_state = { + .set = param_set_trace_state, + .get = param_get_trace_state, +}; + +module_param_cb(trace_state, ¶m_ops_trace_state, NULL, 0644); /* -------------------------------------------------------------------------- FS Interface (/proc) diff --git a/drivers/char/hvc_iucv.c b/drivers/char/hvc_iucv.c index b8a5d654d3d0..448788fc2bf2 100644 --- a/drivers/char/hvc_iucv.c +++ b/drivers/char/hvc_iucv.c @@ -1146,7 +1146,7 @@ out_err: * Note: If it is called early in the boot process, @val is stored and * parsed later in hvc_iucv_init(). */ -static int param_set_vmidfilter(const char *val, struct kernel_param *kp) +static int param_set_vmidfilter(const char *val, const struct kernel_param *kp) { int rc; @@ -1173,7 +1173,7 @@ static int param_set_vmidfilter(const char *val, struct kernel_param *kp) * The function stores the filter as a comma-separated list of z/VM user IDs * in @buffer. Typically, sysfs routines call this function for attr show. */ -static int param_get_vmidfilter(char *buffer, struct kernel_param *kp) +static int param_get_vmidfilter(char *buffer, const struct kernel_param *kp) { int rc; size_t index, len; @@ -1200,6 +1200,11 @@ static int param_get_vmidfilter(char *buffer, struct kernel_param *kp) #define param_check_vmidfilter(name, p) __param_check(name, p, void) +static struct kernel_param_ops param_ops_vmidfilter = { + .set = param_set_vmidfilter, + .get = param_get_vmidfilter, +}; + /** * hvc_iucv_init() - z/VM IUCV HVC device driver initialization */ diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index a4d57e31f713..45999d81004f 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -196,7 +196,7 @@ static void ipmi_unregister_watchdog(int ipmi_intf); */ static int start_now; -static int set_param_int(const char *val, struct kernel_param *kp) +static int set_param_timeout(const char *val, const struct kernel_param *kp) { char *endp; int l; @@ -215,10 +215,11 @@ static int set_param_int(const char *val, struct kernel_param *kp) return rv; } -static int get_param_int(char *buffer, struct kernel_param *kp) -{ - return sprintf(buffer, "%i", *((int *)kp->arg)); -} +static struct kernel_param_ops param_ops_timeout = { + .set = set_param_timeout, + .get = param_get_int, +}; +#define param_check_timeout param_check_int typedef int (*action_fn)(const char *intval, char *outval); @@ -227,7 +228,7 @@ static int preaction_op(const char *inval, char *outval); static int preop_op(const char *inval, char *outval); static void check_parms(void); -static int set_param_str(const char *val, struct kernel_param *kp) +static int set_param_str(const char *val, const struct kernel_param *kp) { action_fn fn = (action_fn) kp->arg; int rv = 0; @@ -251,7 +252,7 @@ static int set_param_str(const char *val, struct kernel_param *kp) return rv; } -static int get_param_str(char *buffer, struct kernel_param *kp) +static int get_param_str(char *buffer, const struct kernel_param *kp) { action_fn fn = (action_fn) kp->arg; int rv; @@ -263,7 +264,7 @@ static int get_param_str(char *buffer, struct kernel_param *kp) } -static int set_param_wdog_ifnum(const char *val, struct kernel_param *kp) +static int set_param_wdog_ifnum(const char *val, const struct kernel_param *kp) { int rv = param_set_int(val, kp); if (rv) @@ -276,27 +277,38 @@ static int set_param_wdog_ifnum(const char *val, struct kernel_param *kp) return 0; } -module_param_call(ifnum_to_use, set_param_wdog_ifnum, get_param_int, - &ifnum_to_use, 0644); +static struct kernel_param_ops param_ops_wdog_ifnum = { + .set = set_param_wdog_ifnum, + .get = param_get_int, +}; + +#define param_check_wdog_ifnum param_check_int + +static struct kernel_param_ops param_ops_str = { + .set = set_param_str, + .get = get_param_str, +}; + +module_param(ifnum_to_use, wdog_ifnum, 0644); MODULE_PARM_DESC(ifnum_to_use, "The interface number to use for the watchdog " "timer. Setting to -1 defaults to the first registered " "interface"); -module_param_call(timeout, set_param_int, get_param_int, &timeout, 0644); +module_param(timeout, timeout, 0644); MODULE_PARM_DESC(timeout, "Timeout value in seconds."); -module_param_call(pretimeout, set_param_int, get_param_int, &pretimeout, 0644); +module_param(pretimeout, timeout, 0644); MODULE_PARM_DESC(pretimeout, "Pretimeout value in seconds."); -module_param_call(action, set_param_str, get_param_str, action_op, 0644); +module_param_cb(action, ¶m_ops_str, action_op, 0644); MODULE_PARM_DESC(action, "Timeout action. One of: " "reset, none, power_cycle, power_off."); -module_param_call(preaction, set_param_str, get_param_str, preaction_op, 0644); +module_param_cb(preaction, ¶m_ops_str, preaction_op, 0644); MODULE_PARM_DESC(preaction, "Pretimeout action. One of: " "pre_none, pre_smi, pre_nmi, pre_int."); -module_param_call(preop, set_param_str, get_param_str, preop_op, 0644); +module_param_cb(preop, ¶m_ops_str, preop_op, 0644); MODULE_PARM_DESC(preop, "Pretimeout driver operation. One of: " "preop_none, preop_panic, preop_give_data."); diff --git a/drivers/dio/dio-driver.c b/drivers/dio/dio-driver.c index 9c0c9afcd0ac..a7b174ef4c85 100644 --- a/drivers/dio/dio-driver.c +++ b/drivers/dio/dio-driver.c @@ -140,5 +140,4 @@ postcore_initcall(dio_driver_init); EXPORT_SYMBOL(dio_match_device); EXPORT_SYMBOL(dio_register_driver); EXPORT_SYMBOL(dio_unregister_driver); -EXPORT_SYMBOL(dio_dev_driver); EXPORT_SYMBOL(dio_bus_type); diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 16d056939f9f..2c27a526025f 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -178,7 +178,7 @@ EXPORT_SYMBOL_GPL(ide_pci_clk); module_param_named(pci_clock, ide_pci_clk, int, 0); MODULE_PARM_DESC(pci_clock, "PCI bus clock frequency (in MHz)"); -static int ide_set_dev_param_mask(const char *s, struct kernel_param *kp) +static int ide_set_dev_param_mask(const char *s, const struct kernel_param *kp) { int a, b, i, j = 1; unsigned int *dev_param_mask = (unsigned int *)kp->arg; @@ -201,34 +201,40 @@ static int ide_set_dev_param_mask(const char *s, struct kernel_param *kp) return 0; } +static struct kernel_param_ops param_ops_ide_dev_mask = { + .set = ide_set_dev_param_mask +}; + +#define param_check_ide_dev_mask(name, p) param_check_uint(name, p) + static unsigned int ide_nodma; -module_param_call(nodma, ide_set_dev_param_mask, NULL, &ide_nodma, 0); +module_param_named(nodma, ide_nodma, ide_dev_mask, 0); MODULE_PARM_DESC(nodma, "disallow DMA for a device"); static unsigned int ide_noflush; -module_param_call(noflush, ide_set_dev_param_mask, NULL, &ide_noflush, 0); +module_param_named(noflush, ide_noflush, ide_dev_mask, 0); MODULE_PARM_DESC(noflush, "disable flush requests for a device"); static unsigned int ide_nohpa; -module_param_call(nohpa, ide_set_dev_param_mask, NULL, &ide_nohpa, 0); +module_param_named(nohpa, ide_nohpa, ide_dev_mask, 0); MODULE_PARM_DESC(nohpa, "disable Host Protected Area for a device"); static unsigned int ide_noprobe; -module_param_call(noprobe, ide_set_dev_param_mask, NULL, &ide_noprobe, 0); +module_param_named(noprobe, ide_noprobe, ide_dev_mask, 0); MODULE_PARM_DESC(noprobe, "skip probing for a device"); static unsigned int ide_nowerr; -module_param_call(nowerr, ide_set_dev_param_mask, NULL, &ide_nowerr, 0); +module_param_named(nowerr, ide_nowerr, ide_dev_mask, 0); MODULE_PARM_DESC(nowerr, "ignore the ATA_DF bit for a device"); static unsigned int ide_cdroms; -module_param_call(cdrom, ide_set_dev_param_mask, NULL, &ide_cdroms, 0); +module_param_named(cdrom, ide_cdroms, ide_dev_mask, 0); MODULE_PARM_DESC(cdrom, "force device as a CD-ROM"); struct chs_geom { diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c index 1f20a042a4f5..dd253002cd50 100644 --- a/drivers/idle/i7300_idle.c +++ b/drivers/idle/i7300_idle.c @@ -81,7 +81,7 @@ static u8 i7300_idle_thrtctl_saved; static u8 i7300_idle_thrtlow_saved; static u32 i7300_idle_mc_saved; -static cpumask_t idle_cpumask; +static cpumask_var_t idle_cpumask; static ktime_t start_ktime; static unsigned long avg_idle_us; @@ -459,9 +459,9 @@ static int i7300_idle_notifier(struct notifier_block *nb, unsigned long val, spin_lock_irqsave(&i7300_idle_lock, flags); if (val == IDLE_START) { - cpu_set(smp_processor_id(), idle_cpumask); + cpumask_set_cpu(smp_processor_id(), idle_cpumask); - if (cpus_weight(idle_cpumask) != num_online_cpus()) + if (cpumask_weight(idle_cpumask) != num_online_cpus()) goto end; now_ktime = ktime_get(); @@ -478,8 +478,8 @@ static int i7300_idle_notifier(struct notifier_block *nb, unsigned long val, i7300_idle_ioat_start(); } else if (val == IDLE_END) { - cpu_clear(smp_processor_id(), idle_cpumask); - if (cpus_weight(idle_cpumask) == (num_online_cpus() - 1)) { + cpumask_clear_cpu(smp_processor_id(), idle_cpumask); + if (cpumask_weight(idle_cpumask) == (num_online_cpus() - 1)) { /* First CPU coming out of idle */ u64 idle_duration_us; @@ -553,7 +553,6 @@ struct debugfs_file_info { static int __init i7300_idle_init(void) { spin_lock_init(&i7300_idle_lock); - cpus_clear(idle_cpumask); total_us = 0; if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload)) @@ -565,6 +564,9 @@ static int __init i7300_idle_init(void) if (i7300_idle_ioat_init()) return -ENODEV; + if (!zalloc_cpumask_var(&idle_cpumask, GFP_KERNEL)) + return -ENOMEM; + debugfs_dir = debugfs_create_dir("i7300_idle", NULL); if (debugfs_dir) { int i = 0; @@ -589,6 +591,7 @@ static int __init i7300_idle_init(void) static void __exit i7300_idle_exit(void) { idle_notifier_unregister(&i7300_idle_nb); + free_cpumask_var(idle_cpumask); if (debugfs_dir) { int i = 0; diff --git a/drivers/input/misc/ati_remote2.c b/drivers/input/misc/ati_remote2.c index 0501f0e65157..3665e3975158 100644 --- a/drivers/input/misc/ati_remote2.c +++ b/drivers/input/misc/ati_remote2.c @@ -37,7 +37,8 @@ enum { }; static int ati_remote2_set_mask(const char *val, - struct kernel_param *kp, unsigned int max) + const struct kernel_param *kp, + unsigned int max) { unsigned long mask; int ret; @@ -58,28 +59,31 @@ static int ati_remote2_set_mask(const char *val, } static int ati_remote2_set_channel_mask(const char *val, - struct kernel_param *kp) + const struct kernel_param *kp) { pr_debug("%s()\n", __func__); return ati_remote2_set_mask(val, kp, ATI_REMOTE2_MAX_CHANNEL_MASK); } -static int ati_remote2_get_channel_mask(char *buffer, struct kernel_param *kp) +static int ati_remote2_get_channel_mask(char *buffer, + const struct kernel_param *kp) { pr_debug("%s()\n", __func__); return sprintf(buffer, "0x%04x", *(unsigned int *)kp->arg); } -static int ati_remote2_set_mode_mask(const char *val, struct kernel_param *kp) +static int ati_remote2_set_mode_mask(const char *val, + const struct kernel_param *kp) { pr_debug("%s()\n", __func__); return ati_remote2_set_mask(val, kp, ATI_REMOTE2_MAX_MODE_MASK); } -static int ati_remote2_get_mode_mask(char *buffer, struct kernel_param *kp) +static int ati_remote2_get_mode_mask(char *buffer, + const struct kernel_param *kp) { pr_debug("%s()\n", __func__); @@ -88,15 +92,19 @@ static int ati_remote2_get_mode_mask(char *buffer, struct kernel_param *kp) static unsigned int channel_mask = ATI_REMOTE2_MAX_CHANNEL_MASK; #define param_check_channel_mask(name, p) __param_check(name, p, unsigned int) -#define param_set_channel_mask ati_remote2_set_channel_mask -#define param_get_channel_mask ati_remote2_get_channel_mask +static struct kernel_param_ops param_ops_channel_mask = { + .set = ati_remote2_set_channel_mask, + .get = ati_remote2_get_channel_mask, +}; module_param(channel_mask, channel_mask, 0644); MODULE_PARM_DESC(channel_mask, "Bitmask of channels to accept <15:Channel16>...<1:Channel2><0:Channel1>"); static unsigned int mode_mask = ATI_REMOTE2_MAX_MODE_MASK; #define param_check_mode_mask(name, p) __param_check(name, p, unsigned int) -#define param_set_mode_mask ati_remote2_set_mode_mask -#define param_get_mode_mask ati_remote2_get_mode_mask +static struct kernel_param_ops param_ops_mode_mask = { + .set = ati_remote2_set_mode_mask, + .get = ati_remote2_get_mode_mask, +}; module_param(mode_mask, mode_mask, 0644); MODULE_PARM_DESC(mode_mask, "Bitmask of modes to accept <4:PC><3:AUX4><2:AUX3><1:AUX2><0:AUX1>"); diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c index 07c53798301a..e4570dbff543 100644 --- a/drivers/input/mouse/psmouse-base.c +++ b/drivers/input/mouse/psmouse-base.c @@ -39,11 +39,13 @@ MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); static unsigned int psmouse_max_proto = PSMOUSE_AUTO; -static int psmouse_set_maxproto(const char *val, struct kernel_param *kp); -static int psmouse_get_maxproto(char *buffer, struct kernel_param *kp); +static int psmouse_set_maxproto(const char *val, const struct kernel_param *); +static int psmouse_get_maxproto(char *buffer, const struct kernel_param *kp); +static struct kernel_param_ops param_ops_proto_abbrev = { + .set = psmouse_set_maxproto, + .get = psmouse_get_maxproto, +}; #define param_check_proto_abbrev(name, p) __param_check(name, p, unsigned int) -#define param_set_proto_abbrev psmouse_set_maxproto -#define param_get_proto_abbrev psmouse_get_maxproto module_param_named(proto, psmouse_max_proto, proto_abbrev, 0644); MODULE_PARM_DESC(proto, "Highest protocol extension to probe (bare, imps, exps, any). Useful for KVM switches."); @@ -1652,7 +1654,7 @@ static ssize_t psmouse_attr_set_resolution(struct psmouse *psmouse, void *data, } -static int psmouse_set_maxproto(const char *val, struct kernel_param *kp) +static int psmouse_set_maxproto(const char *val, const struct kernel_param *kp) { const struct psmouse_protocol *proto; @@ -1669,7 +1671,7 @@ static int psmouse_set_maxproto(const char *val, struct kernel_param *kp) return 0; } -static int psmouse_get_maxproto(char *buffer, struct kernel_param *kp) +static int psmouse_get_maxproto(char *buffer, const struct kernel_param *kp) { int type = *((unsigned int *)kp->arg); diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c index 610e914abe6c..2b6f750ee394 100644 --- a/drivers/message/fusion/mptbase.c +++ b/drivers/message/fusion/mptbase.c @@ -109,8 +109,7 @@ MODULE_PARM_DESC(mpt_debug_level, " debug level - refer to mptdebug.h \ int mpt_fwfault_debug; EXPORT_SYMBOL(mpt_fwfault_debug); -module_param_call(mpt_fwfault_debug, param_set_int, param_get_int, - &mpt_fwfault_debug, 0600); +module_param(mpt_fwfault_debug, int, 0600); MODULE_PARM_DESC(mpt_fwfault_debug, "Enable detection of Firmware fault" " and halt Firmware on fault - (default=0)"); diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c index 3648b23d5c92..42660e55119e 100644 --- a/drivers/misc/lkdtm.c +++ b/drivers/misc/lkdtm.c @@ -120,9 +120,9 @@ static int count = DEFAULT_COUNT; module_param(recur_count, int, 0644); MODULE_PARM_DESC(recur_count, " Recursion level for the stack overflow test, "\ "default is 10"); -module_param(cpoint_name, charp, 0644); +module_param(cpoint_name, charp, 0444); MODULE_PARM_DESC(cpoint_name, " Crash Point, where kernel is to be crashed"); -module_param(cpoint_type, charp, 0644); +module_param(cpoint_type, charp, 0444); MODULE_PARM_DESC(cpoint_type, " Crash Point Type, action to be taken on "\ "hitting the crash point"); module_param(cpoint_count, int, 0644); diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c index 85e1b6a3ac1b..926c7215c667 100644 --- a/drivers/net/myri10ge/myri10ge.c +++ b/drivers/net/myri10ge/myri10ge.c @@ -236,6 +236,7 @@ struct myri10ge_priv { int watchdog_resets; int watchdog_pause; int pause; + bool fw_name_allocated; char *fw_name; char eeprom_strings[MYRI10GE_EEPROM_STRINGS_SIZE]; char *product_code_string; @@ -268,6 +269,7 @@ MODULE_FIRMWARE("myri10ge_eth_z8e.dat"); MODULE_FIRMWARE("myri10ge_rss_ethp_z8e.dat"); MODULE_FIRMWARE("myri10ge_rss_eth_z8e.dat"); +/* Careful: must be accessed under kparam_block_sysfs_write */ static char *myri10ge_fw_name = NULL; module_param(myri10ge_fw_name, charp, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image name"); @@ -373,6 +375,14 @@ static inline void put_be32(__be32 val, __be32 __iomem * p) static struct net_device_stats *myri10ge_get_stats(struct net_device *dev); +static void set_fw_name(struct myri10ge_priv *mgp, char *name, bool allocated) +{ + if (mgp->fw_name_allocated) + kfree(mgp->fw_name); + mgp->fw_name = name; + mgp->fw_name_allocated = allocated; +} + static int myri10ge_send_cmd(struct myri10ge_priv *mgp, u32 cmd, struct myri10ge_cmd *data, int atomic) @@ -744,7 +754,7 @@ static int myri10ge_load_firmware(struct myri10ge_priv *mgp, int adopt) dev_warn(&mgp->pdev->dev, "via hotplug\n"); } - mgp->fw_name = "adopted"; + set_fw_name(mgp, "adopted", false); mgp->tx_boundary = 2048; myri10ge_dummy_rdma(mgp, 1); status = myri10ge_get_firmware_capabilities(mgp); @@ -3260,7 +3270,7 @@ static void myri10ge_firmware_probe(struct myri10ge_priv *mgp) * load the optimized firmware (which assumes aligned PCIe * completions) in order to see if it works on this host. */ - mgp->fw_name = myri10ge_fw_aligned; + set_fw_name(mgp, myri10ge_fw_aligned, false); status = myri10ge_load_firmware(mgp, 1); if (status != 0) { goto abort; @@ -3288,7 +3298,7 @@ static void myri10ge_firmware_probe(struct myri10ge_priv *mgp) abort: /* fall back to using the unaligned firmware */ mgp->tx_boundary = 2048; - mgp->fw_name = myri10ge_fw_unaligned; + set_fw_name(mgp, myri10ge_fw_unaligned, false); } @@ -3311,7 +3321,7 @@ static void myri10ge_select_firmware(struct myri10ge_priv *mgp) dev_info(&mgp->pdev->dev, "PCIE x%d Link\n", link_width); mgp->tx_boundary = 4096; - mgp->fw_name = myri10ge_fw_aligned; + set_fw_name(mgp, myri10ge_fw_aligned, false); } else { myri10ge_firmware_probe(mgp); } @@ -3320,22 +3330,29 @@ static void myri10ge_select_firmware(struct myri10ge_priv *mgp) dev_info(&mgp->pdev->dev, "Assuming aligned completions (forced)\n"); mgp->tx_boundary = 4096; - mgp->fw_name = myri10ge_fw_aligned; + set_fw_name(mgp, myri10ge_fw_aligned, false); } else { dev_info(&mgp->pdev->dev, "Assuming unaligned completions (forced)\n"); mgp->tx_boundary = 2048; - mgp->fw_name = myri10ge_fw_unaligned; + set_fw_name(mgp, myri10ge_fw_unaligned, false); } } + + kparam_block_sysfs_write(myri10ge_fw_name); if (myri10ge_fw_name != NULL) { - overridden = 1; - mgp->fw_name = myri10ge_fw_name; + char *fw_name = kstrdup(myri10ge_fw_name, GFP_KERNEL); + if (fw_name) { + overridden = 1; + set_fw_name(mgp, fw_name, true); + } } + kparam_unblock_sysfs_write(myri10ge_fw_name); + if (mgp->board_number < MYRI10GE_MAX_BOARDS && myri10ge_fw_names[mgp->board_number] != NULL && strlen(myri10ge_fw_names[mgp->board_number])) { - mgp->fw_name = myri10ge_fw_names[mgp->board_number]; + set_fw_name(mgp, myri10ge_fw_names[mgp->board_number], false); overridden = 1; } if (overridden) @@ -3699,6 +3716,7 @@ static void myri10ge_probe_slices(struct myri10ge_priv *mgp) struct myri10ge_cmd cmd; struct pci_dev *pdev = mgp->pdev; char *old_fw; + bool old_allocated; int i, status, ncpus, msix_cap; mgp->num_slices = 1; @@ -3711,17 +3729,23 @@ static void myri10ge_probe_slices(struct myri10ge_priv *mgp) /* try to load the slice aware rss firmware */ old_fw = mgp->fw_name; + old_allocated = mgp->fw_name_allocated; + /* don't free old_fw if we override it. */ + mgp->fw_name_allocated = false; + if (myri10ge_fw_name != NULL) { dev_info(&mgp->pdev->dev, "overriding rss firmware to %s\n", myri10ge_fw_name); - mgp->fw_name = myri10ge_fw_name; + set_fw_name(mgp, myri10ge_fw_name, false); } else if (old_fw == myri10ge_fw_aligned) - mgp->fw_name = myri10ge_fw_rss_aligned; + set_fw_name(mgp, myri10ge_fw_rss_aligned, false); else - mgp->fw_name = myri10ge_fw_rss_unaligned; + set_fw_name(mgp, myri10ge_fw_rss_unaligned, false); status = myri10ge_load_firmware(mgp, 0); if (status != 0) { dev_info(&pdev->dev, "Rss firmware not found\n"); + if (old_allocated) + kfree(old_fw); return; } @@ -3787,6 +3811,8 @@ static void myri10ge_probe_slices(struct myri10ge_priv *mgp) mgp->num_slices); if (status == 0) { pci_disable_msix(pdev); + if (old_allocated) + kfree(old_fw); return; } if (status > 0) @@ -3803,7 +3829,7 @@ disable_msix: abort_with_fw: mgp->num_slices = 1; - mgp->fw_name = old_fw; + set_fw_name(mgp, old_fw, old_allocated); myri10ge_load_firmware(mgp, 0); } @@ -4033,6 +4059,7 @@ abort_with_enabled: pci_disable_device(pdev); abort_with_netdev: + set_fw_name(mgp, NULL, false); free_netdev(netdev); return status; } @@ -4077,6 +4104,7 @@ static void myri10ge_remove(struct pci_dev *pdev) dma_free_coherent(&pdev->dev, sizeof(*mgp->cmd), mgp->cmd, mgp->cmd_bus); + set_fw_name(mgp, NULL, false); free_netdev(netdev); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 01e99f22210e..9142bfd3bab7 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -144,6 +144,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file) err = 0; tfile->tun = tun; tun->tfile = tfile; + tun->socket.file = file; dev_hold(tun->dev); sock_hold(tun->socket.sk); atomic_inc(&tfile->count); @@ -158,6 +159,7 @@ static void __tun_detach(struct tun_struct *tun) /* Detach from net device */ netif_tx_lock_bh(tun->dev); tun->tfile = NULL; + tun->socket.file = NULL; netif_tx_unlock_bh(tun->dev); /* Drop read queue */ @@ -387,7 +389,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* Notify and wake up reader process */ if (tun->flags & TUN_FASYNC) kill_fasync(&tun->fasync, SIGIO, POLL_IN); - wake_up_interruptible(&tun->socket.wait); + wake_up_interruptible_poll(&tun->socket.wait, POLLIN | + POLLRDNORM | POLLRDBAND); return NETDEV_TX_OK; drop: @@ -743,7 +746,7 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun, len = min_t(int, skb->len, len); skb_copy_datagram_const_iovec(skb, 0, iv, total, len); - total += len; + total += skb->len; tun->dev->stats.tx_packets++; tun->dev->stats.tx_bytes += len; @@ -751,34 +754,23 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun, return total; } -static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, - unsigned long count, loff_t pos) +static ssize_t tun_do_read(struct tun_struct *tun, + struct kiocb *iocb, const struct iovec *iv, + ssize_t len, int noblock) { - struct file *file = iocb->ki_filp; - struct tun_file *tfile = file->private_data; - struct tun_struct *tun = __tun_get(tfile); DECLARE_WAITQUEUE(wait, current); struct sk_buff *skb; - ssize_t len, ret = 0; - - if (!tun) - return -EBADFD; + ssize_t ret = 0; DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name); - len = iov_length(iv, count); - if (len < 0) { - ret = -EINVAL; - goto out; - } - add_wait_queue(&tun->socket.wait, &wait); while (len) { current->state = TASK_INTERRUPTIBLE; /* Read frames from the queue */ if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) { - if (file->f_flags & O_NONBLOCK) { + if (noblock) { ret = -EAGAIN; break; } @@ -805,6 +797,27 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, current->state = TASK_RUNNING; remove_wait_queue(&tun->socket.wait, &wait); + return ret; +} + +static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, + unsigned long count, loff_t pos) +{ + struct file *file = iocb->ki_filp; + struct tun_file *tfile = file->private_data; + struct tun_struct *tun = __tun_get(tfile); + ssize_t len, ret; + + if (!tun) + return -EBADFD; + len = iov_length(iv, count); + if (len < 0) { + ret = -EINVAL; + goto out; + } + + ret = tun_do_read(tun, iocb, iv, len, file->f_flags & O_NONBLOCK); + ret = min_t(ssize_t, ret, len); out: tun_put(tun); return ret; @@ -847,7 +860,8 @@ static void tun_sock_write_space(struct sock *sk) return; if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_sync(sk->sk_sleep); + wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT | + POLLWRNORM | POLLWRBAND); tun = container_of(sk, struct tun_sock, sk)->tun; kill_fasync(&tun->fasync, SIGIO, POLL_OUT); @@ -858,6 +872,37 @@ static void tun_sock_destruct(struct sock *sk) free_netdev(container_of(sk, struct tun_sock, sk)->tun->dev); } +static int tun_sendmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t total_len) +{ + struct tun_struct *tun = container_of(sock, struct tun_struct, socket); + return tun_get_user(tun, m->msg_iov, total_len, + m->msg_flags & MSG_DONTWAIT); +} + +static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t total_len, + int flags) +{ + struct tun_struct *tun = container_of(sock, struct tun_struct, socket); + int ret; + if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) + return -EINVAL; + ret = tun_do_read(tun, iocb, m->msg_iov, total_len, + flags & MSG_DONTWAIT); + if (ret > total_len) { + m->msg_flags |= MSG_TRUNC; + ret = flags & MSG_TRUNC ? ret : total_len; + } + return ret; +} + +/* Ops structure to mimic raw sockets with tun */ +static const struct proto_ops tun_socket_ops = { + .sendmsg = tun_sendmsg, + .recvmsg = tun_recvmsg, +}; + static struct proto tun_proto = { .name = "tun", .owner = THIS_MODULE, @@ -986,6 +1031,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) goto err_free_dev; init_waitqueue_head(&tun->socket.wait); + tun->socket.ops = &tun_socket_ops; sock_init_data(&tun->socket, sk); sk->sk_write_space = tun_sock_write_space; sk->sk_sndbuf = INT_MAX; @@ -1525,6 +1571,23 @@ static void tun_cleanup(void) rtnl_link_unregister(&tun_link_ops); } +/* Get an underlying socket object from tun file. Returns error unless file is + * attached to a device. The returned object works like a packet socket, it + * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for + * holding a reference to the file for as long as the socket is in use. */ +struct socket *tun_get_socket(struct file *file) +{ + struct tun_struct *tun; + if (file->f_op != &tun_fops) + return ERR_PTR(-EINVAL); + tun = tun_get(file); + if (!tun) + return ERR_PTR(-EBADFD); + tun_put(tun); + return &tun->socket; +} +EXPORT_SYMBOL_GPL(tun_get_socket); + module_init(tun_init); module_exit(tun_cleanup); MODULE_DESCRIPTION(DRV_DESCRIPTION); diff --git a/drivers/net/wireless/libertas/if_sdio.c b/drivers/net/wireless/libertas/if_sdio.c index 09fcfad742e7..71d27ccec630 100644 --- a/drivers/net/wireless/libertas/if_sdio.c +++ b/drivers/net/wireless/libertas/if_sdio.c @@ -122,6 +122,8 @@ struct if_sdio_card { const char *helper; const char *firmware; + bool helper_allocated; + bool firmware_allocated; u8 buffer[65536]; @@ -1001,16 +1003,34 @@ static int if_sdio_probe(struct sdio_func *func, card->helper = if_sdio_models[i].helper; card->firmware = if_sdio_models[i].firmware; + kparam_block_sysfs_write(helper_name); if (lbs_helper_name) { + char *helper = kstrdup(lbs_helper_name, GFP_KERNEL); + if (!helper) { + kparam_unblock_sysfs_write(helper_name); + ret = -ENOMEM; + goto free; + } lbs_deb_sdio("overriding helper firmware: %s\n", lbs_helper_name); - card->helper = lbs_helper_name; + card->helper = helper; + card->helper_allocated = true; } + kparam_unblock_sysfs_write(helper_name); + kparam_block_sysfs_write(fw_name); if (lbs_fw_name) { + char *fw_name = kstrdup(lbs_fw_name, GFP_KERNEL); + if (!fw_name) { + kparam_unblock_sysfs_write(fw_name); + ret = -ENOMEM; + goto free; + } lbs_deb_sdio("overriding firmware: %s\n", lbs_fw_name); - card->firmware = lbs_fw_name; + card->firmware = fw_name; + card->firmware_allocated = true; } + kparam_unblock_sysfs_write(fw_name); sdio_claim_host(func); @@ -1125,6 +1145,10 @@ free: kfree(packet); } + if (card->helper_allocated) + kfree(card->helper); + if (card->firmware_allocated) + kfree(card->firmware); kfree(card); goto out; @@ -1175,6 +1199,10 @@ static void if_sdio_remove(struct sdio_func *func) kfree(packet); } + if (card->helper_allocated) + kfree(card->helper); + if (card->firmware_allocated) + kfree(card->firmware); kfree(card); lbs_deb_leave(LBS_DEB_SDIO); diff --git a/drivers/net/wireless/libertas/if_usb.c b/drivers/net/wireless/libertas/if_usb.c index 65e174595d12..a41007ebe1f3 100644 --- a/drivers/net/wireless/libertas/if_usb.c +++ b/drivers/net/wireless/libertas/if_usb.c @@ -290,10 +290,13 @@ static int if_usb_probe(struct usb_interface *intf, } /* Upload firmware */ + kparam_block_sysfs_write(fw_name); if (__if_usb_prog_firmware(cardp, lbs_fw_name, BOOT_CMD_FW_BY_USB)) { + kparam_unblock_sysfs_write(fw_name); lbs_deb_usbd(&udev->dev, "FW upload failed\n"); goto err_prog_firmware; } + kparam_unblock_sysfs_write(fw_name); if (!(priv = lbs_add_card(cardp, &udev->dev))) goto err_prog_firmware; diff --git a/drivers/net/wireless/libertas_tf/if_usb.c b/drivers/net/wireless/libertas_tf/if_usb.c index 3691c307e674..669a3de68508 100644 --- a/drivers/net/wireless/libertas_tf/if_usb.c +++ b/drivers/net/wireless/libertas_tf/if_usb.c @@ -650,12 +650,15 @@ static int if_usb_prog_firmware(struct if_usb_card *cardp) static int reset_count = 10; int ret = 0; + kparam_block_sysfs_write(fw_name); ret = request_firmware(&cardp->fw, lbtf_fw_name, &cardp->udev->dev); if (ret < 0) { printk(KERN_INFO "libertastf: firmware %s not found\n", lbtf_fw_name); + kparam_unblock_sysfs_write(fw_name); goto done; } + kparam_unblock_sysfs_write(fw_name); if (check_fwfile_format(cardp->fw->data, cardp->fw->size)) goto release_fw; diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c index b52b773d49d9..c7307167e33c 100644 --- a/drivers/scsi/bfa/bfad.c +++ b/drivers/scsi/bfa/bfad.c @@ -726,6 +726,7 @@ bfad_drv_init(struct bfad_s *bfad) memset(&driver_info, 0, sizeof(driver_info)); strncpy(driver_info.version, BFAD_DRIVER_VERSION, sizeof(driver_info.version) - 1); + __kernel_param_lock(); if (host_name) strncpy(driver_info.host_machine_name, host_name, sizeof(driver_info.host_machine_name) - 1); @@ -735,6 +736,7 @@ bfad_drv_init(struct bfad_s *bfad) if (os_patch) strncpy(driver_info.host_os_patch, os_patch, sizeof(driver_info.host_os_patch) - 1); + __kernel_param_unlock(); strncpy(driver_info.os_device_name, bfad->pci_name, sizeof(driver_info.os_device_name - 1)); diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index 4a43b74c0d27..0b728aa885bb 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -1227,7 +1227,7 @@ int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev, "CPU.\n"); spin_unlock_bh(&fps->fcoe_rx_list.lock); - cpu = first_cpu(cpu_online_map); + cpu = cpumask_first(cpu_online_mask); fps = &per_cpu(fcoe_percpu, cpu); spin_lock_bh(&fps->fcoe_rx_list.lock); if (!fps->thread) { diff --git a/drivers/staging/rtl8187se/r8180_core.c b/drivers/staging/rtl8187se/r8180_core.c index 53e654d0d4fa..9ca3c6f84dc7 100644 --- a/drivers/staging/rtl8187se/r8180_core.c +++ b/drivers/staging/rtl8187se/r8180_core.c @@ -66,7 +66,7 @@ static struct pci_device_id rtl8180_pci_id_tbl[] __devinitdata = { }; -static char* ifname = "wlan%d"; +static char ifname[IFNAMSIZ] = "wlan%d"; static int hwseqnum = 0; static int hwwep = 0; static int channels = 0x3fff; @@ -79,7 +79,7 @@ MODULE_AUTHOR("Andrea Merello <andreamrl@tiscali.it>"); MODULE_DESCRIPTION("Linux driver for Realtek RTL8180 / RTL8185 WiFi cards"); -module_param(ifname, charp, S_IRUGO|S_IWUSR ); +module_param_string(ifname, ifname, sizef(ifname), S_IRUGO|S_IWUSR); module_param(hwseqnum,int, S_IRUGO|S_IWUSR); module_param(hwwep,int, S_IRUGO|S_IWUSR); module_param(channels,int, S_IRUGO|S_IWUSR); @@ -3918,7 +3918,7 @@ static int __devinit rtl8180_pci_probe(struct pci_dev *pdev, if (dev_alloc_name(dev, ifname) < 0){ DMESG("Oops: devname already taken! Trying wlan%%d...\n"); - ifname = "wlan%d"; + strcpy(ifname, "wlan%d"); dev_alloc_name(dev, ifname); } diff --git a/drivers/staging/rtl8192e/r8192E_core.c b/drivers/staging/rtl8192e/r8192E_core.c index b0802a7aeb5f..8f4637ad466a 100644 --- a/drivers/staging/rtl8192e/r8192E_core.c +++ b/drivers/staging/rtl8192e/r8192E_core.c @@ -115,7 +115,7 @@ static struct pci_device_id rtl8192_pci_id_tbl[] __devinitdata = { {} }; -static char* ifname = "wlan%d"; +static char ifname[IFNAMSIZ] = "wlan%d"; static int hwwep = 1; //default use hw. set 0 to use software security static int channels = 0x3fff; @@ -126,7 +126,7 @@ MODULE_DEVICE_TABLE(pci, rtl8192_pci_id_tbl); MODULE_DESCRIPTION("Linux driver for Realtek RTL819x WiFi cards"); -module_param(ifname, charp, S_IRUGO|S_IWUSR ); +module_param_string(ifname, ifname, sizef(ifname), S_IRUGO|S_IWUSR); //module_param(hwseqnum,int, S_IRUGO|S_IWUSR); module_param(hwwep,int, S_IRUGO|S_IWUSR); module_param(channels,int, S_IRUGO|S_IWUSR); @@ -5972,7 +5972,7 @@ static int __devinit rtl8192_pci_probe(struct pci_dev *pdev, if (dev_alloc_name(dev, ifname) < 0){ RT_TRACE(COMP_INIT, "Oops: devname already taken! Trying wlan%%d...\n"); - ifname = "wlan%d"; + strcpt(ifname, "wlan%d"); dev_alloc_name(dev, ifname); } diff --git a/drivers/staging/rtl8192su/r8192U_core.c b/drivers/staging/rtl8192su/r8192U_core.c index 66274d7666ff..e24815f52142 100644 --- a/drivers/staging/rtl8192su/r8192U_core.c +++ b/drivers/staging/rtl8192su/r8192U_core.c @@ -136,13 +136,13 @@ MODULE_VERSION("V 1.1"); MODULE_DEVICE_TABLE(usb, rtl8192_usb_id_tbl); MODULE_DESCRIPTION("Linux driver for Realtek RTL8192 USB WiFi cards"); -static char* ifname = "wlan%d"; +static char ifname[IFNAMSIZ] = "wlan%d"; static int hwwep = 1; //default use hw. set 0 to use software security static int channels = 0x3fff; -module_param(ifname, charp, S_IRUGO|S_IWUSR ); +module_param_string(ifname, ifname, sizef(ifname), S_IRUGO|S_IWUSR); //module_param(hwseqnum,int, S_IRUGO|S_IWUSR); module_param(hwwep,int, S_IRUGO|S_IWUSR); module_param(channels,int, S_IRUGO|S_IWUSR); @@ -7463,7 +7463,7 @@ static int __devinit rtl8192_usb_probe(struct usb_interface *intf, if (dev_alloc_name(dev, ifname) < 0){ RT_TRACE(COMP_INIT, "Oops: devname already taken! Trying wlan%%d...\n"); - ifname = "wlan%d"; + strcpy(ifname, "wlan%d"); dev_alloc_name(dev, ifname); } diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c index d171b563e94c..283a178997a5 100644 --- a/drivers/usb/atm/ueagle-atm.c +++ b/drivers/usb/atm/ueagle-atm.c @@ -1550,6 +1550,7 @@ static void cmvs_file_name(struct uea_softc *sc, char *const cmv_name, int ver) char file_arr[] = "CMVxy.bin"; char *file; + kparam_block_sysfs_write(cmv_file); /* set proper name corresponding modem version and line type */ if (cmv_file[sc->modem_index] == NULL) { if (UEA_CHIP_VERSION(sc) == ADI930) @@ -1568,6 +1569,7 @@ static void cmvs_file_name(struct uea_softc *sc, char *const cmv_name, int ver) strlcat(cmv_name, file, UEA_FW_NAME_MAX); if (ver == 2) strlcat(cmv_name, ".v2", UEA_FW_NAME_MAX); + kparam_unblock_sysfs_write(cmv_file); } static int request_cmvs_old(struct uea_softc *sc, diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig new file mode 100644 index 000000000000..9e9355367bb3 --- /dev/null +++ b/drivers/vhost/Kconfig @@ -0,0 +1,11 @@ +config VHOST_NET + tristate "Host kernel accelerator for virtio net (EXPERIMENTAL)" + depends on NET && EVENTFD && (TUN || !TUN) && EXPERIMENTAL + ---help--- + This kernel module can be loaded in host kernel to accelerate + guest networking with virtio_net. Not to be confused with virtio_net + module itself which needs to be loaded in guest kernel. + + To compile this driver as a module, choose M here: the module will + be called vhost_net. + diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile new file mode 100644 index 000000000000..72dd02050bb9 --- /dev/null +++ b/drivers/vhost/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_VHOST_NET) += vhost_net.o +vhost_net-y := vhost.o net.o diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c new file mode 100644 index 000000000000..22d5fef472cc --- /dev/null +++ b/drivers/vhost/net.c @@ -0,0 +1,648 @@ +/* Copyright (C) 2009 Red Hat, Inc. + * Author: Michael S. Tsirkin <mst@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * virtio-net server in host kernel. + */ + +#include <linux/compat.h> +#include <linux/eventfd.h> +#include <linux/vhost.h> +#include <linux/virtio_net.h> +#include <linux/mmu_context.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/workqueue.h> +#include <linux/rcupdate.h> +#include <linux/file.h> + +#include <linux/net.h> +#include <linux/if_packet.h> +#include <linux/if_arp.h> +#include <linux/if_tun.h> + +#include <net/sock.h> + +#include "vhost.h" + +/* Max number of bytes transferred before requeueing the job. + * Using this limit prevents one virtqueue from starving others. */ +#define VHOST_NET_WEIGHT 0x80000 + +enum { + VHOST_NET_VQ_RX = 0, + VHOST_NET_VQ_TX = 1, + VHOST_NET_VQ_MAX = 2, +}; + +enum vhost_net_poll_state { + VHOST_NET_POLL_DISABLED = 0, + VHOST_NET_POLL_STARTED = 1, + VHOST_NET_POLL_STOPPED = 2, +}; + +struct vhost_net { + struct vhost_dev dev; + struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; + struct vhost_poll poll[VHOST_NET_VQ_MAX]; + /* Tells us whether we are polling a socket for TX. + * We only do this when socket buffer fills up. + * Protected by tx vq lock. */ + enum vhost_net_poll_state tx_poll_state; +}; + +/* Pop first len bytes from iovec. Return number of segments used. */ +static int move_iovec_hdr(struct iovec *from, struct iovec *to, + size_t len, int iov_count) +{ + int seg = 0; + size_t size; + while (len && seg < iov_count) { + size = min(from->iov_len, len); + to->iov_base = from->iov_base; + to->iov_len = size; + from->iov_len -= size; + from->iov_base += size; + len -= size; + ++from; + ++to; + ++seg; + } + return seg; +} + +/* Caller must have TX VQ lock */ +static void tx_poll_stop(struct vhost_net *net) +{ + if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED)) + return; + vhost_poll_stop(net->poll + VHOST_NET_VQ_TX); + net->tx_poll_state = VHOST_NET_POLL_STOPPED; +} + +/* Caller must have TX VQ lock */ +static void tx_poll_start(struct vhost_net *net, struct socket *sock) +{ + if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED)) + return; + vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file); + net->tx_poll_state = VHOST_NET_POLL_STARTED; +} + +/* Expects to be always run from workqueue - which acts as + * read-size critical section for our kind of RCU. */ +static void handle_tx(struct vhost_net *net) +{ + struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX]; + unsigned head, out, in, s; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_control = NULL, + .msg_controllen = 0, + .msg_iov = vq->iov, + .msg_flags = MSG_DONTWAIT, + }; + size_t len, total_len = 0; + int err, wmem; + size_t hdr_size; + struct socket *sock = rcu_dereference(vq->private_data); + if (!sock) + return; + + wmem = atomic_read(&sock->sk->sk_wmem_alloc); + if (wmem >= sock->sk->sk_sndbuf) + return; + + use_mm(net->dev.mm); + mutex_lock(&vq->mutex); + vhost_disable_notify(vq); + + if (wmem < sock->sk->sk_sndbuf * 2) + tx_poll_stop(net); + hdr_size = vq->hdr_size; + + for (;;) { + head = vhost_get_vq_desc(&net->dev, vq, vq->iov, + ARRAY_SIZE(vq->iov), + &out, &in, + NULL, NULL); + /* Nothing new? Wait for eventfd to tell us they refilled. */ + if (head == vq->num) { + wmem = atomic_read(&sock->sk->sk_wmem_alloc); + if (wmem >= sock->sk->sk_sndbuf * 3 / 4) { + tx_poll_start(net, sock); + set_bit(SOCK_ASYNC_NOSPACE, &sock->flags); + break; + } + if (unlikely(vhost_enable_notify(vq))) { + vhost_disable_notify(vq); + continue; + } + break; + } + if (in) { + vq_err(vq, "Unexpected descriptor format for TX: " + "out %d, int %d\n", out, in); + break; + } + /* Skip header. TODO: support TSO. */ + s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, out); + msg.msg_iovlen = out; + len = iov_length(vq->iov, out); + /* Sanity check */ + if (!len) { + vq_err(vq, "Unexpected header len for TX: " + "%zd expected %zd\n", + iov_length(vq->hdr, s), hdr_size); + break; + } + /* TODO: Check specific error and bomb out unless ENOBUFS? */ + err = sock->ops->sendmsg(NULL, sock, &msg, len); + if (unlikely(err < 0)) { + vhost_discard_vq_desc(vq); + tx_poll_start(net, sock); + break; + } + if (err != len) + pr_err("Truncated TX packet: " + " len %d != %zd\n", err, len); + vhost_add_used_and_signal(&net->dev, vq, head, 0); + total_len += len; + if (unlikely(total_len >= VHOST_NET_WEIGHT)) { + vhost_poll_queue(&vq->poll); + break; + } + } + + mutex_unlock(&vq->mutex); + unuse_mm(net->dev.mm); +} + +/* Expects to be always run from workqueue - which acts as + * read-size critical section for our kind of RCU. */ +static void handle_rx(struct vhost_net *net) +{ + struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; + unsigned head, out, in, log, s; + struct vhost_log *vq_log; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_control = NULL, /* FIXME: get and handle RX aux data. */ + .msg_controllen = 0, + .msg_iov = vq->iov, + .msg_flags = MSG_DONTWAIT, + }; + + struct virtio_net_hdr hdr = { + .flags = 0, + .gso_type = VIRTIO_NET_HDR_GSO_NONE + }; + + size_t len, total_len = 0; + int err; + size_t hdr_size; + struct socket *sock = rcu_dereference(vq->private_data); + if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) + return; + + use_mm(net->dev.mm); + mutex_lock(&vq->mutex); + vhost_disable_notify(vq); + hdr_size = vq->hdr_size; + + vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? + vq->log : NULL; + + for (;;) { + head = vhost_get_vq_desc(&net->dev, vq, vq->iov, + ARRAY_SIZE(vq->iov), + &out, &in, + vq_log, &log); + /* OK, now we need to know about added descriptors. */ + if (head == vq->num) { + if (unlikely(vhost_enable_notify(vq))) { + /* They have slipped one in as we were + * doing that: check again. */ + vhost_disable_notify(vq); + continue; + } + /* Nothing new? Wait for eventfd to tell us + * they refilled. */ + break; + } + /* We don't need to be notified again. */ + if (out) { + vq_err(vq, "Unexpected descriptor format for RX: " + "out %d, int %d\n", + out, in); + break; + } + /* Skip header. TODO: support TSO/mergeable rx buffers. */ + s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in); + msg.msg_iovlen = in; + len = iov_length(vq->iov, in); + /* Sanity check */ + if (!len) { + vq_err(vq, "Unexpected header len for RX: " + "%zd expected %zd\n", + iov_length(vq->hdr, s), hdr_size); + break; + } + err = sock->ops->recvmsg(NULL, sock, &msg, + len, MSG_DONTWAIT | MSG_TRUNC); + /* TODO: Check specific error and bomb out unless EAGAIN? */ + if (err < 0) { + vhost_discard_vq_desc(vq); + break; + } + /* TODO: Should check and handle checksum. */ + if (err > len) { + pr_err("Discarded truncated rx packet: " + " len %d > %zd\n", err, len); + vhost_discard_vq_desc(vq); + continue; + } + len = err; + err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size); + if (err) { + vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n", + vq->iov->iov_base, err); + break; + } + len += hdr_size; + vhost_add_used_and_signal(&net->dev, vq, head, len); + if (unlikely(vq_log)) + vhost_log_write(vq, vq_log, log, len); + total_len += len; + if (unlikely(total_len >= VHOST_NET_WEIGHT)) { + vhost_poll_queue(&vq->poll); + break; + } + } + + mutex_unlock(&vq->mutex); + unuse_mm(net->dev.mm); +} + +static void handle_tx_kick(struct work_struct *work) +{ + struct vhost_virtqueue *vq; + struct vhost_net *net; + vq = container_of(work, struct vhost_virtqueue, poll.work); + net = container_of(vq->dev, struct vhost_net, dev); + handle_tx(net); +} + +static void handle_rx_kick(struct work_struct *work) +{ + struct vhost_virtqueue *vq; + struct vhost_net *net; + vq = container_of(work, struct vhost_virtqueue, poll.work); + net = container_of(vq->dev, struct vhost_net, dev); + handle_rx(net); +} + +static void handle_tx_net(struct work_struct *work) +{ + struct vhost_net *net; + net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_TX].work); + handle_tx(net); +} + +static void handle_rx_net(struct work_struct *work) +{ + struct vhost_net *net; + net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_RX].work); + handle_rx(net); +} + +static int vhost_net_open(struct inode *inode, struct file *f) +{ + struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL); + int r; + if (!n) + return -ENOMEM; + f->private_data = n; + n->vqs[VHOST_NET_VQ_TX].handle_kick = handle_tx_kick; + n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick; + r = vhost_dev_init(&n->dev, n->vqs, VHOST_NET_VQ_MAX); + if (r < 0) { + kfree(n); + return r; + } + + vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT); + vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN); + n->tx_poll_state = VHOST_NET_POLL_DISABLED; + return 0; +} + +static void vhost_net_disable_vq(struct vhost_net *n, + struct vhost_virtqueue *vq) +{ + if (!vq->private_data) + return; + if (vq == n->vqs + VHOST_NET_VQ_TX) { + tx_poll_stop(n); + n->tx_poll_state = VHOST_NET_POLL_DISABLED; + } else + vhost_poll_stop(n->poll + VHOST_NET_VQ_RX); +} + +static void vhost_net_enable_vq(struct vhost_net *n, + struct vhost_virtqueue *vq) +{ + struct socket *sock = vq->private_data; + if (!sock) + return; + if (vq == n->vqs + VHOST_NET_VQ_TX) { + n->tx_poll_state = VHOST_NET_POLL_STOPPED; + tx_poll_start(n, sock); + } else + vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file); +} + +static struct socket *vhost_net_stop_vq(struct vhost_net *n, + struct vhost_virtqueue *vq) +{ + struct socket *sock; + + mutex_lock(&vq->mutex); + sock = vq->private_data; + vhost_net_disable_vq(n, vq); + rcu_assign_pointer(vq->private_data, NULL); + mutex_unlock(&vq->mutex); + return sock; +} + +static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock, + struct socket **rx_sock) +{ + *tx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_TX); + *rx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_RX); +} + +static void vhost_net_flush_vq(struct vhost_net *n, int index) +{ + vhost_poll_flush(n->poll + index); + vhost_poll_flush(&n->dev.vqs[index].poll); +} + +static void vhost_net_flush(struct vhost_net *n) +{ + vhost_net_flush_vq(n, VHOST_NET_VQ_TX); + vhost_net_flush_vq(n, VHOST_NET_VQ_RX); +} + +static int vhost_net_release(struct inode *inode, struct file *f) +{ + struct vhost_net *n = f->private_data; + struct socket *tx_sock; + struct socket *rx_sock; + + vhost_net_stop(n, &tx_sock, &rx_sock); + vhost_net_flush(n); + vhost_dev_cleanup(&n->dev); + if (tx_sock) + fput(tx_sock->file); + if (rx_sock) + fput(rx_sock->file); + /* We do an extra flush before freeing memory, + * since jobs can re-queue themselves. */ + vhost_net_flush(n); + kfree(n); + return 0; +} + +static struct socket *get_raw_socket(int fd) +{ + struct { + struct sockaddr_ll sa; + char buf[MAX_ADDR_LEN]; + } uaddr; + int uaddr_len = sizeof uaddr, r; + struct socket *sock = sockfd_lookup(fd, &r); + if (!sock) + return ERR_PTR(-ENOTSOCK); + + /* Parameter checking */ + if (sock->sk->sk_type != SOCK_RAW) { + r = -ESOCKTNOSUPPORT; + goto err; + } + + r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, + &uaddr_len, 0); + if (r) + goto err; + + if (uaddr.sa.sll_family != AF_PACKET) { + r = -EPFNOSUPPORT; + goto err; + } + return sock; +err: + fput(sock->file); + return ERR_PTR(r); +} + +static struct socket *get_tun_socket(int fd) +{ + struct file *file = fget(fd); + struct socket *sock; + if (!file) + return ERR_PTR(-EBADF); + sock = tun_get_socket(file); + if (IS_ERR(sock)) + fput(file); + return sock; +} + +static struct socket *get_socket(int fd) +{ + struct socket *sock; + if (fd == -1) + return NULL; + sock = get_raw_socket(fd); + if (!IS_ERR(sock)) + return sock; + sock = get_tun_socket(fd); + if (!IS_ERR(sock)) + return sock; + return ERR_PTR(-ENOTSOCK); +} + +static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) +{ + struct socket *sock, *oldsock; + struct vhost_virtqueue *vq; + int r; + + mutex_lock(&n->dev.mutex); + r = vhost_dev_check_owner(&n->dev); + if (r) + goto err; + + if (index >= VHOST_NET_VQ_MAX) { + r = -ENOBUFS; + goto err; + } + vq = n->vqs + index; + mutex_lock(&vq->mutex); + sock = get_socket(fd); + if (IS_ERR(sock)) { + r = PTR_ERR(sock); + goto err; + } + + /* start polling new socket */ + oldsock = vq->private_data; + if (sock == oldsock) + goto done; + + vhost_net_disable_vq(n, vq); + rcu_assign_pointer(vq->private_data, sock); + vhost_net_enable_vq(n, vq); + mutex_unlock(&vq->mutex); +done: + mutex_unlock(&n->dev.mutex); + if (oldsock) { + vhost_net_flush_vq(n, index); + fput(oldsock->file); + } + return r; +err: + mutex_unlock(&n->dev.mutex); + return r; +} + +static long vhost_net_reset_owner(struct vhost_net *n) +{ + struct socket *tx_sock = NULL; + struct socket *rx_sock = NULL; + long err; + mutex_lock(&n->dev.mutex); + err = vhost_dev_check_owner(&n->dev); + if (err) + goto done; + vhost_net_stop(n, &tx_sock, &rx_sock); + vhost_net_flush(n); + err = vhost_dev_reset_owner(&n->dev); +done: + mutex_unlock(&n->dev.mutex); + if (tx_sock) + fput(tx_sock->file); + if (rx_sock) + fput(rx_sock->file); + return err; +} + +static void vhost_net_set_features(struct vhost_net *n, u64 features) +{ + size_t hdr_size = features & (1 << VHOST_NET_F_VIRTIO_NET_HDR) ? + sizeof(struct virtio_net_hdr) : 0; + int i; + mutex_lock(&n->dev.mutex); + n->dev.acked_features = features; + smp_wmb(); + for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { + mutex_lock(&n->vqs[i].mutex); + n->vqs[i].hdr_size = hdr_size; + mutex_unlock(&n->vqs[i].mutex); + } + mutex_unlock(&n->dev.mutex); + vhost_net_flush(n); +} + +static long vhost_net_ioctl(struct file *f, unsigned int ioctl, + unsigned long arg) +{ + struct vhost_net *n = f->private_data; + void __user *argp = (void __user *)arg; + u32 __user *featurep = argp; + struct vhost_vring_file backend; + u64 features; + int r; + switch (ioctl) { + case VHOST_NET_SET_BACKEND: + r = copy_from_user(&backend, argp, sizeof backend); + if (r < 0) + return r; + return vhost_net_set_backend(n, backend.index, backend.fd); + case VHOST_GET_FEATURES: + features = VHOST_FEATURES; + return put_user(features, featurep); + case VHOST_SET_FEATURES: + r = get_user(features, featurep); + /* No features for now */ + if (r < 0) + return r; + if (features & ~VHOST_FEATURES) + return -EOPNOTSUPP; + vhost_net_set_features(n, features); + return 0; + case VHOST_RESET_OWNER: + return vhost_net_reset_owner(n); + default: + r = vhost_dev_ioctl(&n->dev, ioctl, arg); + vhost_net_flush(n); + return r; + } +} + +#ifdef CONFIG_COMPAT +static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl, + unsigned long arg) +{ + return vhost_net_ioctl(f, ioctl, (unsigned long)compat_ptr(arg)); +} +#endif + +const static struct file_operations vhost_net_fops = { + .owner = THIS_MODULE, + .release = vhost_net_release, + .unlocked_ioctl = vhost_net_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = vhost_net_compat_ioctl, +#endif + .open = vhost_net_open, +}; + +static struct miscdevice vhost_net_misc = { + VHOST_NET_MINOR, + "vhost-net", + &vhost_net_fops, +}; + +int vhost_net_init(void) +{ + int r = vhost_init(); + if (r) + goto err_init; + r = misc_register(&vhost_net_misc); + if (r) + goto err_reg; + return 0; +err_reg: + vhost_cleanup(); +err_init: + return r; + +} +module_init(vhost_net_init); + +void vhost_net_exit(void) +{ + misc_deregister(&vhost_net_misc); + vhost_cleanup(); +} +module_exit(vhost_net_exit); + +MODULE_VERSION("0.0.1"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Michael S. Tsirkin"); +MODULE_DESCRIPTION("Host kernel accelerator for virtio net"); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c new file mode 100644 index 000000000000..97233d5ad1b9 --- /dev/null +++ b/drivers/vhost/vhost.c @@ -0,0 +1,965 @@ +/* Copyright (C) 2009 Red Hat, Inc. + * Copyright (C) 2006 Rusty Russell IBM Corporation + * + * Author: Michael S. Tsirkin <mst@redhat.com> + * + * Inspiration, some code, and most witty comments come from + * Documentation/lguest/lguest.c, by Rusty Russell + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Generic code for virtio server in host kernel. + */ + +#include <linux/eventfd.h> +#include <linux/vhost.h> +#include <linux/virtio_net.h> +#include <linux/mm.h> +#include <linux/miscdevice.h> +#include <linux/mutex.h> +#include <linux/workqueue.h> +#include <linux/rcupdate.h> +#include <linux/poll.h> +#include <linux/file.h> +#include <linux/highmem.h> + +#include <linux/net.h> +#include <linux/if_packet.h> +#include <linux/if_arp.h> + +#include <net/sock.h> + +#include "vhost.h" + +enum { + VHOST_MEMORY_MAX_NREGIONS = 64, + VHOST_MEMORY_F_LOG = 0x1, +}; + +static struct workqueue_struct *vhost_workqueue; + +static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, + poll_table *pt) +{ + struct vhost_poll *poll; + poll = container_of(pt, struct vhost_poll, table); + + poll->wqh = wqh; + add_wait_queue(wqh, &poll->wait); +} + +static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, + void *key) +{ + struct vhost_poll *poll; + poll = container_of(wait, struct vhost_poll, wait); + if (!((unsigned long)key & poll->mask)) + return 0; + + queue_work(vhost_workqueue, &poll->work); + return 0; +} + +/* Init poll structure */ +void vhost_poll_init(struct vhost_poll *poll, work_func_t func, + unsigned long mask) +{ + INIT_WORK(&poll->work, func); + init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); + init_poll_funcptr(&poll->table, vhost_poll_func); + poll->mask = mask; +} + +/* Start polling a file. We add ourselves to file's wait queue. The caller must + * keep a reference to a file until after vhost_poll_stop is called. */ +void vhost_poll_start(struct vhost_poll *poll, struct file *file) +{ + unsigned long mask; + mask = file->f_op->poll(file, &poll->table); + if (mask) + vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask); +} + +/* Stop polling a file. After this function returns, it becomes safe to drop the + * file reference. You must also flush afterwards. */ +void vhost_poll_stop(struct vhost_poll *poll) +{ + remove_wait_queue(poll->wqh, &poll->wait); +} + +/* Flush any work that has been scheduled. When calling this, don't hold any + * locks that are also used by the callback. */ +void vhost_poll_flush(struct vhost_poll *poll) +{ + flush_work(&poll->work); +} + +void vhost_poll_queue(struct vhost_poll *poll) +{ + queue_work(vhost_workqueue, &poll->work); +} + +static void vhost_vq_reset(struct vhost_dev *dev, + struct vhost_virtqueue *vq) +{ + vq->num = 1; + vq->desc = NULL; + vq->avail = NULL; + vq->used = NULL; + vq->last_avail_idx = 0; + vq->avail_idx = 0; + vq->last_used_idx = 0; + vq->used_flags = 0; + vq->used_flags = 0; + vq->log_used = false; + vq->log_addr = -1ull; + vq->hdr_size = 0; + vq->private_data = NULL; + vq->log_base = NULL; + vq->error_ctx = NULL; + vq->error = NULL; + vq->kick = NULL; + vq->call_ctx = NULL; + vq->call = NULL; +} + +long vhost_dev_init(struct vhost_dev *dev, + struct vhost_virtqueue *vqs, int nvqs) +{ + int i; + dev->vqs = vqs; + dev->nvqs = nvqs; + mutex_init(&dev->mutex); + dev->log_ctx = NULL; + dev->log_file = NULL; + dev->memory = NULL; + dev->mm = NULL; + + for (i = 0; i < dev->nvqs; ++i) { + dev->vqs[i].dev = dev; + mutex_init(&dev->vqs[i].mutex); + vhost_vq_reset(dev, dev->vqs + i); + if (dev->vqs[i].handle_kick) + vhost_poll_init(&dev->vqs[i].poll, + dev->vqs[i].handle_kick, + POLLIN); + } + return 0; +} + +/* Caller should have device mutex */ +long vhost_dev_check_owner(struct vhost_dev *dev) +{ + /* Are you the owner? If not, I don't think you mean to do that */ + return dev->mm == current->mm ? 0 : -EPERM; +} + +/* Caller should have device mutex */ +static long vhost_dev_set_owner(struct vhost_dev *dev) +{ + /* Is there an owner already? */ + if (dev->mm) + return -EBUSY; + /* No owner, become one */ + dev->mm = get_task_mm(current); + return 0; +} + +/* Caller should have device mutex */ +long vhost_dev_reset_owner(struct vhost_dev *dev) +{ + struct vhost_memory *memory; + + /* Restore memory to default 1:1 mapping. */ + memory = kmalloc(offsetof(struct vhost_memory, regions) + + 2 * sizeof *memory->regions, GFP_KERNEL); + if (!memory) + return -ENOMEM; + + vhost_dev_cleanup(dev); + + memory->nregions = 2; + memory->regions[0].guest_phys_addr = 1; + memory->regions[0].userspace_addr = 1; + memory->regions[0].memory_size = ~0ULL; + memory->regions[1].guest_phys_addr = 0; + memory->regions[1].userspace_addr = 0; + memory->regions[1].memory_size = 1; + dev->memory = memory; + return 0; +} + +/* Caller should have device mutex */ +void vhost_dev_cleanup(struct vhost_dev *dev) +{ + int i; + for (i = 0; i < dev->nvqs; ++i) { + if (dev->vqs[i].kick && dev->vqs[i].handle_kick) { + vhost_poll_stop(&dev->vqs[i].poll); + vhost_poll_flush(&dev->vqs[i].poll); + } + if (dev->vqs[i].error_ctx) + eventfd_ctx_put(dev->vqs[i].error_ctx); + if (dev->vqs[i].error) + fput(dev->vqs[i].error); + if (dev->vqs[i].kick) + fput(dev->vqs[i].kick); + if (dev->vqs[i].call_ctx) + eventfd_ctx_put(dev->vqs[i].call_ctx); + if (dev->vqs[i].call) + fput(dev->vqs[i].call); + vhost_vq_reset(dev, dev->vqs + i); + } + if (dev->log_ctx) + eventfd_ctx_put(dev->log_ctx); + dev->log_ctx = NULL; + if (dev->log_file) + fput(dev->log_file); + dev->log_file = NULL; + /* No one will access memory at this point */ + kfree(dev->memory); + dev->memory = NULL; + if (dev->mm) + mmput(dev->mm); + dev->mm = NULL; +} + +static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) +{ + struct vhost_memory mem, *newmem, *oldmem; + unsigned long size = offsetof(struct vhost_memory, regions); + long r; + r = copy_from_user(&mem, m, size); + if (r) + return r; + if (mem.padding) + return -EOPNOTSUPP; + if (mem.nregions > VHOST_MEMORY_MAX_NREGIONS) + return -E2BIG; + newmem = kmalloc(size + mem.nregions * sizeof *m->regions, GFP_KERNEL); + if (!newmem) + return -ENOMEM; + + memcpy(newmem, &mem, size); + r = copy_from_user(newmem->regions, m->regions, + mem.nregions * sizeof *m->regions); + if (r) { + kfree(newmem); + return r; + } + oldmem = d->memory; + rcu_assign_pointer(d->memory, newmem); + synchronize_rcu(); + kfree(oldmem); + return 0; +} + +static int init_used(struct vhost_virtqueue *vq, + struct vring_used __user *used) +{ + int r = put_user(vq->used_flags, &used->flags); + if (r) + return r; + return get_user(vq->last_used_idx, &used->idx); +} + +static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp) +{ + struct file *eventfp, *filep = NULL, + *pollstart = NULL, *pollstop = NULL; + struct eventfd_ctx *ctx = NULL; + u32 __user *idxp = argp; + struct vhost_virtqueue *vq; + struct vhost_vring_state s; + struct vhost_vring_file f; + struct vhost_vring_addr a; + u32 idx; + long r; + + r = get_user(idx, idxp); + if (r < 0) + return r; + if (idx > d->nvqs) + return -ENOBUFS; + + vq = d->vqs + idx; + + mutex_lock(&vq->mutex); + + switch (ioctl) { + case VHOST_SET_VRING_NUM: + r = copy_from_user(&s, argp, sizeof s); + if (r < 0) + break; + if (!s.num || s.num > 0xffff || (s.num & (s.num - 1))) { + r = -EINVAL; + break; + } + vq->num = s.num; + break; + case VHOST_SET_VRING_BASE: + r = copy_from_user(&s, argp, sizeof s); + if (r < 0) + break; + if (s.num > 0xffff) { + r = -EINVAL; + break; + } + vq->last_avail_idx = s.num; + /* Forget the cached index value. */ + vq->avail_idx = vq->last_avail_idx; + break; + case VHOST_GET_VRING_BASE: + s.index = idx; + s.num = vq->last_avail_idx; + r = copy_to_user(argp, &s, sizeof s); + break; + case VHOST_SET_VRING_ADDR: + r = copy_from_user(&a, argp, sizeof a); + if (r < 0) + break; + if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) { + r = -EOPNOTSUPP; + break; + } + if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr || + (u64)(unsigned long)a.used_user_addr != a.used_user_addr || + (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr) { + r = -EFAULT; + break; + } + if ((a.avail_user_addr & (sizeof *vq->avail->ring - 1)) || + (a.used_user_addr & (sizeof *vq->used->ring - 1)) || + (a.log_guest_addr & (sizeof *vq->used->ring - 1))) { + r = -EINVAL; + break; + } + r = init_used(vq, (struct vring_used __user *)a.used_user_addr); + if (r) + break; + vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG)); + vq->desc = (void __user *)(unsigned long)a.desc_user_addr; + vq->avail = (void __user *)(unsigned long)a.avail_user_addr; + vq->log_addr = a.log_guest_addr; + vq->used = (void __user *)(unsigned long)a.used_user_addr; + break; + case VHOST_SET_VRING_KICK: + r = copy_from_user(&f, argp, sizeof f); + if (r < 0) + break; + eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); + if (IS_ERR(eventfp)) + return PTR_ERR(eventfp); + if (eventfp != vq->kick) { + pollstop = filep = vq->kick; + pollstart = vq->kick = eventfp; + } else + filep = eventfp; + break; + case VHOST_SET_VRING_CALL: + r = copy_from_user(&f, argp, sizeof f); + if (r < 0) + break; + eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); + if (IS_ERR(eventfp)) + return PTR_ERR(eventfp); + if (eventfp != vq->call) { + filep = vq->call; + ctx = vq->call_ctx; + vq->call = eventfp; + vq->call_ctx = eventfp ? + eventfd_ctx_fileget(eventfp) : NULL; + } else + filep = eventfp; + break; + case VHOST_SET_VRING_ERR: + r = copy_from_user(&f, argp, sizeof f); + if (r < 0) + break; + eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); + if (IS_ERR(eventfp)) + return PTR_ERR(eventfp); + if (eventfp != vq->error) { + filep = vq->error; + vq->error = eventfp; + ctx = vq->error_ctx; + vq->error_ctx = eventfp ? + eventfd_ctx_fileget(eventfp) : NULL; + } else + filep = eventfp; + break; + default: + r = -ENOIOCTLCMD; + } + + if (pollstop && vq->handle_kick) + vhost_poll_stop(&vq->poll); + + if (ctx) + eventfd_ctx_put(ctx); + if (filep) + fput(filep); + + if (pollstart && vq->handle_kick) + vhost_poll_start(&vq->poll, vq->kick); + + mutex_unlock(&vq->mutex); + + if (pollstop && vq->handle_kick) + vhost_poll_flush(&vq->poll); + return r; +} + +long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg) +{ + void __user *argp = (void __user *)arg; + struct file *eventfp, *filep = NULL; + struct eventfd_ctx *ctx = NULL; + u64 p; + long r; + int i, fd; + + mutex_lock(&d->mutex); + /* If you are not the owner, you can become one */ + if (ioctl == VHOST_SET_OWNER) { + r = vhost_dev_set_owner(d); + goto done; + } + + /* You must be the owner to do anything else */ + r = vhost_dev_check_owner(d); + if (r) + goto done; + + switch (ioctl) { + case VHOST_SET_MEM_TABLE: + r = vhost_set_memory(d, argp); + break; + case VHOST_SET_LOG_BASE: + r = copy_from_user(&p, argp, sizeof p); + if (r < 0) + break; + if ((u64)(unsigned long)p != p) { + r = -EFAULT; + break; + } + for (i = 0; i < d->nvqs; ++i) { + mutex_lock(&d->vqs[i].mutex); + d->vqs[i].log_base = (void __user *)(unsigned long)p; + mutex_unlock(&d->vqs[i].mutex); + } + break; + case VHOST_SET_LOG_FD: + r = get_user(fd, (int __user *)argp); + if (r < 0) + break; + eventfp = fd == -1 ? NULL : eventfd_fget(fd); + if (IS_ERR(eventfp)) { + r = PTR_ERR(eventfp); + break; + } + if (eventfp != d->log_file) { + filep = d->log_file; + ctx = d->log_ctx; + d->log_ctx = eventfp ? + eventfd_ctx_fileget(eventfp) : NULL; + } else + filep = eventfp; + for (i = 0; i < d->nvqs; ++i) { + mutex_lock(&d->vqs[i].mutex); + d->vqs[i].log_ctx = d->log_ctx; + mutex_unlock(&d->vqs[i].mutex); + } + if (ctx) + eventfd_ctx_put(ctx); + if (filep) + fput(filep); + break; + default: + r = vhost_set_vring(d, ioctl, argp); + break; + } +done: + mutex_unlock(&d->mutex); + return r; +} + +static const struct vhost_memory_region *find_region(struct vhost_memory *mem, + __u64 addr, __u32 len) +{ + struct vhost_memory_region *reg; + int i; + /* linear search is not brilliant, but we really have on the order of 6 + * regions in practice */ + for (i = 0; i < mem->nregions; ++i) { + reg = mem->regions + i; + if (reg->guest_phys_addr <= addr && + reg->guest_phys_addr + reg->memory_size - 1 >= addr) + return reg; + } + return NULL; +} + +/* TODO: This is really inefficient. We need something like get_user() + * (instruction directly accesses the data, with an exception table entry + * returning -EFAULT). See Documentation/x86/exception-tables.txt. + */ +static int set_bit_to_user(int nr, void __user *addr) +{ + unsigned long log = (unsigned long)addr; + struct page *page; + void *base; + int bit = nr + (log % PAGE_SIZE) * 8; + int r; + r = get_user_pages_fast(log, 1, 1, &page); + if (r) + return r; + base = kmap_atomic(page, KM_USER0); + set_bit(bit, base); + kunmap_atomic(base, KM_USER0); + set_page_dirty_lock(page); + put_page(page); + return 0; +} + +static int log_write(void __user *log_base, + u64 write_address, u64 write_length) +{ + int r; + if (!write_length) + return 0; + write_address /= VHOST_PAGE_SIZE; + for (;;) { + u64 base = (u64)(unsigned long)log_base; + u64 log = base + write_address / 8; + int bit = write_address % 8; + if ((u64)(unsigned long)log != log) + return -EFAULT; + r = set_bit_to_user(bit, (void __user *)(unsigned long)log); + if (r < 0) + return r; + if (write_length <= VHOST_PAGE_SIZE) + break; + write_length -= VHOST_PAGE_SIZE; + write_address += VHOST_PAGE_SIZE; + } + return r; +} + +int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, + unsigned int log_num, u64 len) +{ + int i, r; + + /* Make sure data written is seen before log. */ + wmb(); + for (i = 0; i < log_num; ++i) { + u64 l = min(log[i].len, len); + r = log_write(vq->log_base, log[i].addr, l); + if (r < 0) + return r; + len -= l; + if (!len) + return 0; + } + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); + /* Length written exceeds what we have stored. This is a bug. */ + BUG(); + return 0; +} + +int translate_desc(struct vhost_dev *dev, u64 addr, u32 len, + struct iovec iov[], int iov_size) +{ + const struct vhost_memory_region *reg; + struct vhost_memory *mem; + struct iovec *_iov; + u64 s = 0; + int ret = 0; + + rcu_read_lock(); + + mem = rcu_dereference(dev->memory); + while ((u64)len > s) { + u64 size; + if (ret >= iov_size) { + ret = -ENOBUFS; + break; + } + reg = find_region(mem, addr, len); + if (!reg) { + ret = -EFAULT; + break; + } + _iov = iov + ret; + size = reg->memory_size - addr + reg->guest_phys_addr; + _iov->iov_len = min((u64)len, size); + _iov->iov_base = (void *)(unsigned long) + (reg->userspace_addr + addr - reg->guest_phys_addr); + s += size; + addr += size; + ++ret; + } + + rcu_read_unlock(); + return ret; +} + +/* Each buffer in the virtqueues is actually a chain of descriptors. This + * function returns the next descriptor in the chain, + * or -1U if we're at the end. */ +static unsigned next_desc(struct vring_desc *desc) +{ + unsigned int next; + + /* If this descriptor says it doesn't chain, we're done. */ + if (!(desc->flags & VRING_DESC_F_NEXT)) + return -1U; + + /* Check they're not leading us off end of descriptors. */ + next = desc->next; + /* Make sure compiler knows to grab that: we don't want it changing! */ + /* We will use the result as an index in an array, so most + * architectures only need a compiler barrier here. */ + read_barrier_depends(); + + return next; +} + +static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num, + struct vhost_log *log, unsigned int *log_num, + struct vring_desc *indirect) +{ + struct vring_desc desc; + unsigned int i = 0, count, found = 0; + int ret; + + /* Sanity check */ + if (indirect->len % sizeof desc) { + vq_err(vq, "Invalid length in indirect descriptor: " + "len 0x%llx not multiple of 0x%zx\n", + (unsigned long long)indirect->len, + sizeof desc); + return -EINVAL; + } + + ret = translate_desc(dev, indirect->addr, indirect->len, vq->indirect, + ARRAY_SIZE(vq->indirect)); + if (ret < 0) { + vq_err(vq, "Translation failure %d in indirect.\n", ret); + return ret; + } + + /* We will use the result as an address to read from, so most + * architectures only need a compiler barrier here. */ + read_barrier_depends(); + + count = indirect->len / sizeof desc; + /* Buffers are chained via a 16 bit next field, so + * we can have at most 2^16 of these. */ + if (count > USHORT_MAX + 1) { + vq_err(vq, "Indirect buffer length too big: %d\n", + indirect->len); + return -E2BIG; + } + + do { + unsigned iov_count = *in_num + *out_num; + if (++found > count) { + vq_err(vq, "Loop detected: last one at %u " + "indirect size %u\n", + i, count); + return -EINVAL; + } + if (memcpy_fromiovec((unsigned char *)&desc, vq->indirect, + sizeof desc)) { + vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n", + i, (size_t)indirect->addr + i * sizeof desc); + return -EINVAL; + } + if (desc.flags & VRING_DESC_F_INDIRECT) { + vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n", + i, (size_t)indirect->addr + i * sizeof desc); + return -EINVAL; + } + + ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count, + iov_size - iov_count); + if (ret < 0) { + vq_err(vq, "Translation failure %d indirect idx %d\n", + ret, i); + return ret; + } + /* If this is an input descriptor, increment that count. */ + if (desc.flags & VRING_DESC_F_WRITE) { + *in_num += ret; + if (unlikely(log)) { + log[*log_num].addr = desc.addr; + log[*log_num].len = desc.len; + ++*log_num; + } + } else { + /* If it's an output descriptor, they're all supposed + * to come before any input descriptors. */ + if (*in_num) { + vq_err(vq, "Indirect descriptor " + "has out after in: idx %d\n", i); + return -EINVAL; + } + *out_num += ret; + } + } while ((i = next_desc(&desc)) != -1); + return 0; +} + +/* This looks in the virtqueue and for the first available buffer, and converts + * it to an iovec for convenient access. Since descriptors consist of some + * number of output then some number of input descriptors, it's actually two + * iovecs, but we pack them into one and note how many of each there were. + * + * This function returns the descriptor number found, or vq->num (which + * is never a valid descriptor number) if none was found. */ +unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num, + struct vhost_log *log, unsigned int *log_num) +{ + struct vring_desc desc; + unsigned int i, head, found = 0; + u16 last_avail_idx; + int ret; + + /* Check it isn't doing very strange things with descriptor numbers. */ + last_avail_idx = vq->last_avail_idx; + if (get_user(vq->avail_idx, &vq->avail->idx)) { + vq_err(vq, "Failed to access avail idx at %p\n", + &vq->avail->idx); + return vq->num; + } + + if ((u16)(vq->avail_idx - last_avail_idx) > vq->num) { + vq_err(vq, "Guest moved used index from %u to %u", + last_avail_idx, vq->avail_idx); + return vq->num; + } + + /* If there's nothing new since last we looked, return invalid. */ + if (vq->avail_idx == last_avail_idx) + return vq->num; + + /* Only get avail ring entries after they have been exposed by guest. */ + rmb(); + + /* Grab the next descriptor number they're advertising, and increment + * the index we've seen. */ + if (get_user(head, &vq->avail->ring[last_avail_idx % vq->num])) { + vq_err(vq, "Failed to read head: idx %d address %p\n", + last_avail_idx, + &vq->avail->ring[last_avail_idx % vq->num]); + return vq->num; + } + + /* If their number is silly, that's an error. */ + if (head >= vq->num) { + vq_err(vq, "Guest says index %u > %u is available", + head, vq->num); + return vq->num; + } + + /* When we start there are none of either input nor output. */ + *out_num = *in_num = 0; + if (unlikely(log)) + *log_num = 0; + + i = head; + do { + unsigned iov_count = *in_num + *out_num; + if (i >= vq->num) { + vq_err(vq, "Desc index is %u > %u, head = %u", + i, vq->num, head); + return vq->num; + } + if (++found > vq->num) { + vq_err(vq, "Loop detected: last one at %u " + "vq size %u head %u\n", + i, vq->num, head); + return vq->num; + } + ret = copy_from_user(&desc, vq->desc + i, sizeof desc); + if (ret) { + vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", + i, vq->desc + i); + return vq->num; + } + if (desc.flags & VRING_DESC_F_INDIRECT) { + ret = get_indirect(dev, vq, iov, iov_size, + out_num, in_num, + log, log_num, &desc); + if (ret < 0) { + vq_err(vq, "Failure detected " + "in indirect descriptor at idx %d\n", i); + return vq->num; + } + continue; + } + + ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count, + iov_size - iov_count); + if (ret < 0) { + vq_err(vq, "Translation failure %d descriptor idx %d\n", + ret, i); + return vq->num; + } + if (desc.flags & VRING_DESC_F_WRITE) { + /* If this is an input descriptor, + * increment that count. */ + *in_num += ret; + if (unlikely(log)) { + log[*log_num].addr = desc.addr; + log[*log_num].len = desc.len; + ++*log_num; + } + } else { + /* If it's an output descriptor, they're all supposed + * to come before any input descriptors. */ + if (*in_num) { + vq_err(vq, "Descriptor has out after in: " + "idx %d\n", i); + return vq->num; + } + *out_num += ret; + } + } while ((i = next_desc(&desc)) != -1); + + /* On success, increment avail index. */ + vq->last_avail_idx++; + return head; +} + +/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ +void vhost_discard_vq_desc(struct vhost_virtqueue *vq) +{ + vq->last_avail_idx--; +} + +/* After we've used one of their buffers, we tell them about it. We'll then + * want to notify the guest, using eventfd. */ +int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) +{ + struct vring_used_elem *used; + + /* The virtqueue contains a ring of used buffers. Get a pointer to the + * next entry in that used ring. */ + used = &vq->used->ring[vq->last_used_idx % vq->num]; + if (put_user(head, &used->id)) { + vq_err(vq, "Failed to write used id"); + return -EFAULT; + } + if (put_user(len, &used->len)) { + vq_err(vq, "Failed to write used len"); + return -EFAULT; + } + /* Make sure buffer is written before we update index. */ + wmb(); + if (put_user(vq->last_used_idx + 1, &vq->used->idx)) { + vq_err(vq, "Failed to increment used idx"); + return -EFAULT; + } + if (unlikely(vq->log_used)) { + /* Make sure data is seen before log. */ + wmb(); + log_write(vq->log_base, vq->log_addr + sizeof *vq->used->ring * + (vq->last_used_idx % vq->num), + sizeof *vq->used->ring); + log_write(vq->log_base, vq->log_addr, sizeof *vq->used->ring); + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); + } + vq->last_used_idx++; + return 0; +} + +/* This actually signals the guest, using eventfd. */ +void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) +{ + __u16 flags = 0; + if (get_user(flags, &vq->avail->flags)) { + vq_err(vq, "Failed to get flags"); + return; + } + + /* If they don't want an interrupt, don't signal, unless empty. */ + if ((flags & VRING_AVAIL_F_NO_INTERRUPT) && + (vq->avail_idx != vq->last_avail_idx || + !vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY))) + return; + + /* Signal the Guest tell them we used something up. */ + if (vq->call_ctx) + eventfd_signal(vq->call_ctx, 1); +} + +/* And here's the combo meal deal. Supersize me! */ +void vhost_add_used_and_signal(struct vhost_dev *dev, + struct vhost_virtqueue *vq, + unsigned int head, int len) +{ + vhost_add_used(vq, head, len); + vhost_signal(dev, vq); +} + +/* OK, now we need to know about added descriptors. */ +bool vhost_enable_notify(struct vhost_virtqueue *vq) +{ + u16 avail_idx; + int r; + if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) + return false; + vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; + r = put_user(vq->used_flags, &vq->used->flags); + if (r) { + vq_err(vq, "Failed to enable notification at %p: %d\n", + &vq->used->flags, r); + return false; + } + /* They could have slipped one in as we were doing that: make + * sure it's written, then check again. */ + mb(); + r = get_user(avail_idx, &vq->avail->idx); + if (r) { + vq_err(vq, "Failed to check avail idx at %p: %d\n", + &vq->avail->idx, r); + return false; + } + + return avail_idx != vq->last_avail_idx; +} + +/* We don't need to be notified again. */ +void vhost_disable_notify(struct vhost_virtqueue *vq) +{ + int r; + if (vq->used_flags & VRING_USED_F_NO_NOTIFY) + return; + vq->used_flags |= VRING_USED_F_NO_NOTIFY; + r = put_user(vq->used_flags, &vq->used->flags); + if (r) + vq_err(vq, "Failed to enable notification at %p: %d\n", + &vq->used->flags, r); +} + +int vhost_init(void) +{ + vhost_workqueue = create_singlethread_workqueue("vhost"); + if (!vhost_workqueue) + return -ENOMEM; + return 0; +} + +void vhost_cleanup(void) +{ + destroy_workqueue(vhost_workqueue); +} diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h new file mode 100644 index 000000000000..d1f045376017 --- /dev/null +++ b/drivers/vhost/vhost.h @@ -0,0 +1,159 @@ +#ifndef _VHOST_H +#define _VHOST_H + +#include <linux/eventfd.h> +#include <linux/vhost.h> +#include <linux/mm.h> +#include <linux/mutex.h> +#include <linux/workqueue.h> +#include <linux/poll.h> +#include <linux/file.h> +#include <linux/skbuff.h> +#include <linux/uio.h> +#include <linux/virtio_config.h> +#include <linux/virtio_ring.h> + +struct vhost_device; + +enum { + /* Enough place for all fragments, head, and virtio net header. */ + VHOST_NET_MAX_SG = MAX_SKB_FRAGS + 2, +}; + +/* Poll a file (eventfd or socket) */ +/* Note: there's nothing vhost specific about this structure. */ +struct vhost_poll { + poll_table table; + wait_queue_head_t *wqh; + wait_queue_t wait; + /* struct which will handle all actual work. */ + struct work_struct work; + unsigned long mask; +}; + +void vhost_poll_init(struct vhost_poll *poll, work_func_t func, + unsigned long mask); +void vhost_poll_start(struct vhost_poll *poll, struct file *file); +void vhost_poll_stop(struct vhost_poll *poll); +void vhost_poll_flush(struct vhost_poll *poll); +void vhost_poll_queue(struct vhost_poll *poll); + +struct vhost_log { + u64 addr; + u64 len; +}; + +/* The virtqueue structure describes a queue attached to a device. */ +struct vhost_virtqueue { + struct vhost_dev *dev; + + /* The actual ring of buffers. */ + struct mutex mutex; + unsigned int num; + struct vring_desc __user *desc; + struct vring_avail __user *avail; + struct vring_used __user *used; + struct file *kick; + struct file *call; + struct file *error; + struct eventfd_ctx *call_ctx; + struct eventfd_ctx *error_ctx; + struct eventfd_ctx *log_ctx; + + struct vhost_poll poll; + + /* The routine to call when the Guest pings us, or timeout. */ + work_func_t handle_kick; + + /* Last available index we saw. */ + u16 last_avail_idx; + + /* Caches available index value from user. */ + u16 avail_idx; + + /* Last index we used. */ + u16 last_used_idx; + + /* Used flags */ + u16 used_flags; + + /* Log writes to used structure. */ + bool log_used; + u64 log_addr; + + struct iovec indirect[VHOST_NET_MAX_SG]; + struct iovec iov[VHOST_NET_MAX_SG]; + struct iovec hdr[VHOST_NET_MAX_SG]; + size_t hdr_size; + /* We use a kind of RCU to access private pointer. + * All readers access it from workqueue, which makes it possible to + * flush the workqueue instead of synchronize_rcu. Therefore readers do + * not need to call rcu_read_lock/rcu_read_unlock: the beginning of + * work item execution acts instead of rcu_read_lock() and the end of + * work item execution acts instead of rcu_read_lock(). + * Writers use virtqueue mutex. */ + void *private_data; + /* Log write descriptors */ + void __user *log_base; + struct vhost_log log[VHOST_NET_MAX_SG]; +}; + +struct vhost_dev { + /* Readers use RCU to access memory table pointer + * log base pointer and features. + * Writers use mutex below.*/ + struct vhost_memory *memory; + struct mm_struct *mm; + struct mutex mutex; + unsigned acked_features; + struct vhost_virtqueue *vqs; + int nvqs; + struct file *log_file; + struct eventfd_ctx *log_ctx; +}; + +long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue *vqs, int nvqs); +long vhost_dev_check_owner(struct vhost_dev *); +long vhost_dev_reset_owner(struct vhost_dev *); +void vhost_dev_cleanup(struct vhost_dev *); +long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, unsigned long arg); + +unsigned vhost_get_vq_desc(struct vhost_dev *, struct vhost_virtqueue *, + struct iovec iov[], unsigned int iov_count, + unsigned int *out_num, unsigned int *in_num, + struct vhost_log *log, unsigned int *log_num); +void vhost_discard_vq_desc(struct vhost_virtqueue *); + +int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len); +void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *); +void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *, + unsigned int head, int len); +void vhost_disable_notify(struct vhost_virtqueue *); +bool vhost_enable_notify(struct vhost_virtqueue *); + +int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, + unsigned int log_num, u64 len); + +int vhost_init(void); +void vhost_cleanup(void); + +#define vq_err(vq, fmt, ...) do { \ + pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \ + if ((vq)->error_ctx) \ + eventfd_signal((vq)->error_ctx, 1);\ + } while (0) + +enum { + VHOST_FEATURES = (1 << VIRTIO_F_NOTIFY_ON_EMPTY) | + (1 << VIRTIO_RING_F_INDIRECT_DESC) | + (1 << VHOST_F_LOG_ALL) | + (1 << VHOST_NET_F_VIRTIO_NET_HDR), +}; + +static inline int vhost_has_feature(struct vhost_dev *dev, int bit) +{ + unsigned acked_features = rcu_dereference(dev->acked_features); + return acked_features & (1 << bit); +} + +#endif diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c index 54fbb2995a5f..156c661b2912 100644 --- a/drivers/video/uvesafb.c +++ b/drivers/video/uvesafb.c @@ -1976,8 +1976,7 @@ static void __devexit uvesafb_exit(void) module_exit(uvesafb_exit); -#define param_get_scroll NULL -static int param_set_scroll(const char *val, struct kernel_param *kp) +static int param_set_scroll(const char *val, const struct kernel_param *kp) { ypan = 0; @@ -1992,7 +1991,9 @@ static int param_set_scroll(const char *val, struct kernel_param *kp) return 0; } - +static struct kernel_param_ops param_ops_scroll = { + .set = param_set_scroll, +}; #define param_check_scroll(name, p) __param_check(name, p, void) module_param_named(scroll, ypan, scroll, 0); diff --git a/drivers/video/vt8623fb.c b/drivers/video/vt8623fb.c index 3df17dc8c3d7..8bfa47d5a5c0 100644 --- a/drivers/video/vt8623fb.c +++ b/drivers/video/vt8623fb.c @@ -727,7 +727,9 @@ static int __devinit vt8623_pci_probe(struct pci_dev *dev, const struct pci_devi /* Prepare startup mode */ + kparam_block_sysfs_write(mode_option); rc = fb_find_mode(&(info->var), info, mode_option, NULL, 0, NULL, 8); + kparam_unblock_sysfs_write(mode_option); if (! ((rc == 1) || (rc == 2))) { rc = -EINVAL; dev_err(info->device, "mode %s not found\n", mode_option); diff --git a/drivers/zorro/zorro-driver.c b/drivers/zorro/zorro-driver.c index e6c4390d8bd6..53180a37cc9a 100644 --- a/drivers/zorro/zorro-driver.c +++ b/drivers/zorro/zorro-driver.c @@ -156,5 +156,4 @@ postcore_initcall(zorro_driver_init); EXPORT_SYMBOL(zorro_match_device); EXPORT_SYMBOL(zorro_register_driver); EXPORT_SYMBOL(zorro_unregister_driver); -EXPORT_SYMBOL(zorro_dev_driver); EXPORT_SYMBOL(zorro_bus_type); |