diff options
79 files changed, 3344 insertions, 639 deletions
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index 9d620c153b04..d041ae8a4ace 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt @@ -309,41 +309,26 @@ A: The following are what is required for CPU hotplug infrastructure to work Q: I need to ensure that a particular cpu is not removed when there is some work specific to this cpu is in progress. -A: First switch the current thread context to preferred cpu +A: There are two ways. If your code can be run in interrupt context, use + smp_call_function_single(), otherwise use work_on_cpu(). Note that + work_on_cpu() is slow, and can fail due to out of memory: int my_func_on_cpu(int cpu) { - cpumask_t saved_mask, new_mask = CPU_MASK_NONE; - int curr_cpu, err = 0; - - saved_mask = current->cpus_allowed; - cpu_set(cpu, new_mask); - err = set_cpus_allowed(current, new_mask); - - if (err) - return err; - - /* - * If we got scheduled out just after the return from - * set_cpus_allowed() before running the work, this ensures - * we stay locked. - */ - curr_cpu = get_cpu(); - - if (curr_cpu != cpu) { - err = -EAGAIN; - goto ret; - } else { - /* - * Do work : But cant sleep, since get_cpu() disables preempt - */ - } - ret: - put_cpu(); - set_cpus_allowed(current, saved_mask); - return err; - } - + int err; + get_online_cpus(); + if (!cpu_online(cpu)) + err = -EINVAL; + else +#if NEEDS_BLOCKING + err = work_on_cpu(cpu, __my_func_on_cpu, NULL); +#else + smp_call_function_single(cpu, __my_func_on_cpu, &err, + true); +#endif + put_online_cpus(); + return err; + } Q: How do we determine how many CPUs are available for hotplug. A: There is no clear spec defined way from ACPI that can give us that diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 098de5bce00a..9fdfbb969b12 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -34,7 +34,6 @@ #include <sys/uio.h> #include <termios.h> #include <getopt.h> -#include <zlib.h> #include <assert.h> #include <sched.h> #include <limits.h> diff --git a/MAINTAINERS b/MAINTAINERS index 98ba38360755..1cc3da5565d0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5699,6 +5699,15 @@ S: Maintained F: Documentation/filesystems/vfat.txt F: fs/fat/ +VIRTIO HOST (VHOST) +M: "Michael S. Tsirkin" <mst@redhat.com> +L: kvm@vger.kernel.org +L: virtualization@lists.osdl.org +L: netdev@vger.kernel.org +S: Maintained +F: drivers/vhost/ +F: include/linux/vhost.h + VIA RHINE NETWORK DRIVER M: Roger Luethi <rl@hellgate.ch> S: Maintained @@ -679,6 +679,8 @@ libs-y := $(libs-y1) $(libs-y2) # +--< $(vmlinux-main) # | +--< driver/built-in.o mm/built-in.o + more # | +# +-< .tmp_exports-asm.o (see comments regarding modpost of vmlinux.o) +# | # +-< kallsyms.o (see description in CONFIG_KALLSYMS section) # # vmlinux version (uname -v) cannot be updated during normal @@ -742,7 +744,6 @@ define rule_vmlinux__ $(verify_kallsyms) endef - ifdef CONFIG_KALLSYMS # Generate section listing all symbols and add it into vmlinux $(kallsyms.o) # It's a three stage process: @@ -802,13 +803,13 @@ quiet_cmd_kallsyms = KSYM $@ $(call cmd,kallsyms) # .tmp_vmlinux1 must be complete except kallsyms, so update vmlinux version -.tmp_vmlinux1: $(vmlinux-lds) $(vmlinux-all) FORCE +.tmp_vmlinux1: $(vmlinux-lds) $(vmlinux-all) .tmp_exports-asm.o FORCE $(call if_changed_rule,ksym_ld) -.tmp_vmlinux2: $(vmlinux-lds) $(vmlinux-all) .tmp_kallsyms1.o FORCE +.tmp_vmlinux2: $(vmlinux-lds) $(vmlinux-all) .tmp_exports-asm.o .tmp_kallsyms1.o FORCE $(call if_changed,vmlinux__) -.tmp_vmlinux3: $(vmlinux-lds) $(vmlinux-all) .tmp_kallsyms2.o FORCE +.tmp_vmlinux3: $(vmlinux-lds) $(vmlinux-all) .tmp_exports-asm.o .tmp_kallsyms2.o FORCE $(call if_changed,vmlinux__) # Needs to visit scripts/ before $(KALLSYMS) can be used. @@ -839,8 +840,18 @@ define rule_vmlinux-modpost $(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd endef +# The modpost of vmlinux.o above creates .tmp_exports-asm.S, a list of exported +# symbols sorted by name. This list is linked into vmlinux to replace the +# original unsorted exports. It allows symbols to be resolved efficiently +# when loading modules. +.tmp_exports-asm.S: vmlinux.o + +ifneq ($(CONFIG_SYMBOL_PREFIX),) +export AFLAGS_.tmp_exports-asm.o += -DSYMBOL_PREFIX=$(patsubst "%",%,$(CONFIG_SYMBOL_PREFIX)) +endif + # vmlinux image - including updated kernel symbols -vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE +vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o .tmp_exports-asm.o $(kallsyms.o) FORCE ifdef CONFIG_HEADERS_CHECK $(Q)$(MAKE) -f $(srctree)/Makefile headers_check endif @@ -1144,7 +1155,8 @@ endif # CONFIG_MODULES # Directories & files removed with 'make clean' CLEAN_DIRS += $(MODVERDIR) CLEAN_FILES += vmlinux System.map \ - .tmp_kallsyms* .tmp_version .tmp_vmlinux* .tmp_System.map + .tmp_kallsyms* .tmp_version .tmp_vmlinux* .tmp_System.map \ + .tmp_exports-asm* # Directories & files removed with 'make mrproper' MRPROPER_DIRS += include/config usr/include include/generated diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 0e627705f746..8214bfebfaca 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -48,27 +48,7 @@ extern void __aeabi_uidivmod(void); extern void __aeabi_ulcmp(void); extern void fpundefinstr(void); -extern void fp_enter(void); -/* - * This has a special calling convention; it doesn't - * modify any of the usual registers, except for LR. - */ -#define EXPORT_CRC_ALIAS(sym) __CRC_SYMBOL(sym, "") - -#define EXPORT_SYMBOL_ALIAS(sym,orig) \ - EXPORT_CRC_ALIAS(sym) \ - static const struct kernel_symbol __ksymtab_##sym \ - __used __attribute__((section("__ksymtab"))) = \ - { (unsigned long)&orig, #sym }; - -/* - * floating point math emulator support. - * These symbols will never change their calling convention... - */ -EXPORT_SYMBOL_ALIAS(kern_fp_enter,fp_enter); -EXPORT_SYMBOL_ALIAS(fp_printk,printk); -EXPORT_SYMBOL_ALIAS(fp_send_sig,send_sig); EXPORT_SYMBOL(__backtrace); diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index aecf87dfbaec..ec511d4969f8 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -79,11 +79,11 @@ SECTIONS #endif } - /DISCARD/ : { /* Exit code and data */ - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - *(.discard) + /* + * unwind exit sections must be discarded before the rest of the + * unwind sections get included. + */ + /DISCARD/ : { *(.ARM.exidx.exit.text) *(.ARM.extab.exit.text) #ifndef CONFIG_HOTPLUG_CPU @@ -271,6 +271,9 @@ SECTIONS .stab.index 0 : { *(.stab.index) } .stab.indexstr 0 : { *(.stab.indexstr) } .comment 0 : { *(.comment) } + + /* Default discards */ + DISCARDS } /* diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index ae6a60f10120..2180433213b7 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -5,6 +5,10 @@ mainmenu "Blackfin Kernel Configuration" +config SYMBOL_PREFIX + string + default "_" + config MMU def_bool n diff --git a/arch/blackfin/include/asm/module.h b/arch/blackfin/include/asm/module.h index 9c1cfffddd9b..4282b169ead9 100644 --- a/arch/blackfin/include/asm/module.h +++ b/arch/blackfin/include/asm/module.h @@ -7,8 +7,6 @@ #ifndef _ASM_BFIN_MODULE_H #define _ASM_BFIN_MODULE_H -#define MODULE_SYMBOL_PREFIX "_" - #define Elf_Shdr Elf32_Shdr #define Elf_Sym Elf32_Sym #define Elf_Ehdr Elf32_Ehdr diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S index 10e12539000e..f39707c6590d 100644 --- a/arch/blackfin/kernel/vmlinux.lds.S +++ b/arch/blackfin/kernel/vmlinux.lds.S @@ -4,8 +4,6 @@ * Licensed under the GPL-2 or later */ -#define VMLINUX_SYMBOL(_sym_) _##_sym_ - #include <asm-generic/vmlinux.lds.h> #include <asm/mem_map.h> #include <asm/page.h> diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 9420648352b8..53cc669e6d59 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -10,6 +10,10 @@ config H8300 default y select HAVE_IDE +config SYMBOL_PREFIX + string + default "_" + config MMU bool default n diff --git a/arch/h8300/include/asm/module.h b/arch/h8300/include/asm/module.h index de23231f3196..8e46724b7c09 100644 --- a/arch/h8300/include/asm/module.h +++ b/arch/h8300/include/asm/module.h @@ -8,6 +8,4 @@ struct mod_arch_specific { }; #define Elf_Sym Elf32_Sym #define Elf_Ehdr Elf32_Ehdr -#define MODULE_SYMBOL_PREFIX "_" - #endif /* _ASM_H8/300_MODULE_H */ diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S index b9e24907e6ea..03d356d96e5d 100644 --- a/arch/h8300/kernel/vmlinux.lds.S +++ b/arch/h8300/kernel/vmlinux.lds.S @@ -1,4 +1,3 @@ -#define VMLINUX_SYMBOL(_sym_) _##_sym_ #include <asm-generic/vmlinux.lds.h> #include <asm/page.h> diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 07115d6cd4ba..425451453e96 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -96,8 +96,6 @@ EXPORT_SYMBOL(copy_4K_page); EXPORT_SYMBOL(isa_io_base); EXPORT_SYMBOL(isa_mem_base); EXPORT_SYMBOL(pci_dram_offset); -EXPORT_SYMBOL(pci_alloc_consistent); -EXPORT_SYMBOL(pci_free_consistent); #endif /* CONFIG_PCI */ EXPORT_SYMBOL(start_thread); diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index b85374f7cf94..539e83f8e087 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -1064,7 +1064,6 @@ int pci64_dma_supported(struct pci_dev *pdev, u64 device_mask) return (device_mask & dma_addr_mask) == dma_addr_mask; } -EXPORT_SYMBOL(pci_dma_supported); void pci_resource_to_user(const struct pci_dev *pdev, int bar, const struct resource *rp, resource_size_t *start, diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c index 0f26066a08d9..372ad59c4cba 100644 --- a/arch/sparc/kernel/sparc_ksyms_64.c +++ b/arch/sparc/kernel/sparc_ksyms_64.c @@ -38,17 +38,5 @@ EXPORT_SYMBOL(sun4v_niagara_setperf); EXPORT_SYMBOL(sun4v_niagara2_getperf); EXPORT_SYMBOL(sun4v_niagara2_setperf); -#ifdef CONFIG_PCI -/* inline functions in asm/pci_64.h */ -EXPORT_SYMBOL(pci_alloc_consistent); -EXPORT_SYMBOL(pci_free_consistent); -EXPORT_SYMBOL(pci_map_single); -EXPORT_SYMBOL(pci_unmap_single); -EXPORT_SYMBOL(pci_map_sg); -EXPORT_SYMBOL(pci_unmap_sg); -EXPORT_SYMBOL(pci_dma_sync_single_for_cpu); -EXPORT_SYMBOL(pci_dma_sync_sg_for_cpu); -#endif - /* Exporting a symbol from /init/main.c */ EXPORT_SYMBOL(saved_command_line); diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c index 368219cc2366..3ceede02f426 100644 --- a/arch/um/drivers/hostaudio_kern.c +++ b/arch/um/drivers/hostaudio_kern.c @@ -186,7 +186,9 @@ static int hostaudio_open(struct inode *inode, struct file *file) int ret; #ifdef DEBUG + kparam_block_sysfs_write(dsp); printk(KERN_DEBUG "hostaudio: open called (host: %s)\n", dsp); + kparam_unblock_sysfs_write(dsp); #endif state = kmalloc(sizeof(struct hostaudio_state), GFP_KERNEL); @@ -198,7 +200,9 @@ static int hostaudio_open(struct inode *inode, struct file *file) if (file->f_mode & FMODE_WRITE) w = 1; + kparam_block_sysfs_write(dsp); ret = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0); + kparam_unblock_sysfs_write(dsp); if (ret < 0) { kfree(state); return ret; @@ -254,11 +258,15 @@ static int hostmixer_open_mixdev(struct inode *inode, struct file *file) if (file->f_mode & FMODE_WRITE) w = 1; + kparam_block_sysfs_write(mixer); ret = os_open_file(mixer, of_set_rw(OPENFLAGS(), r, w), 0); + kparam_unblock_sysfs_write(mixer); if (ret < 0) { + kparam_block_sysfs_write(dsp); printk(KERN_ERR "hostaudio_open_mixdev failed to open '%s', " "err = %d\n", dsp, -ret); + kparam_unblock_sysfs_write(dsp); kfree(state); return ret; } @@ -314,8 +322,10 @@ MODULE_LICENSE("GPL"); static int __init hostaudio_init_module(void) { + __kernel_param_lock(); printk(KERN_INFO "UML Audio Relay (host dsp = %s, host mixer = %s)\n", dsp, mixer); + __kernel_param_unlock(); module_data.dev_audio = register_sound_dsp(&hostaudio_fops, -1); if (module_data.dev_audio < 0) { diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 3909e3ba5ce3..505b2d6873a0 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -57,4 +57,6 @@ EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(init_level4_pgt); -EXPORT_SYMBOL(load_gs_index); +#ifndef CONFIG_PARAVIRT +EXPORT_SYMBOL(native_load_gs_index); +#endif diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 4cd498332466..3c4d0109ad20 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -65,6 +65,7 @@ config KVM_AMD # OK, it's a little counter-intuitive to do this, but it puts it neatly under # the virtualization menu. +source drivers/vhost/Kconfig source drivers/lguest/Kconfig source drivers/virtio/Kconfig diff --git a/drivers/Makefile b/drivers/Makefile index 6ee53c7a57a1..81e36596b1e9 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -106,6 +106,7 @@ obj-$(CONFIG_HID) += hid/ obj-$(CONFIG_PPC_PS3) += ps3/ obj-$(CONFIG_OF) += of/ obj-$(CONFIG_SSB) += ssb/ +obj-$(CONFIG_VHOST_NET) += vhost/ obj-$(CONFIG_VIRTIO) += virtio/ obj-$(CONFIG_VLYNQ) += vlynq/ obj-$(CONFIG_STAGING) += staging/ diff --git a/drivers/acpi/debug.c b/drivers/acpi/debug.c index 8a690c3b8e23..941469801322 100644 --- a/drivers/acpi/debug.c +++ b/drivers/acpi/debug.c @@ -94,7 +94,8 @@ static const struct acpi_dlevel acpi_debug_levels[] = { /* -------------------------------------------------------------------------- FS Interface (/sys) -------------------------------------------------------------------------- */ -static int param_get_debug_layer(char *buffer, struct kernel_param *kp) { +static int param_get_debug_layer(char *buffer, const struct kernel_param *kp) +{ int result = 0; int i; @@ -116,7 +117,8 @@ static int param_get_debug_layer(char *buffer, struct kernel_param *kp) { return result; } -static int param_get_debug_level(char *buffer, struct kernel_param *kp) { +static int param_get_debug_level(char *buffer, const struct kernel_param *kp) +{ int result = 0; int i; @@ -135,8 +137,18 @@ static int param_get_debug_level(char *buffer, struct kernel_param *kp) { return result; } -module_param_call(debug_layer, param_set_uint, param_get_debug_layer, &acpi_dbg_layer, 0644); -module_param_call(debug_level, param_set_uint, param_get_debug_level, &acpi_dbg_level, 0644); +static struct kernel_param_ops acpi_debug_layer_ops = { + .set = param_set_uint, + .get = param_get_debug_layer, +}; + +static struct kernel_param_ops acpi_debug_level_ops = { + .set = param_set_uint, + .get = param_get_debug_level, +}; + +module_param_cb(debug_layer, &acpi_debug_layer_ops, &acpi_dbg_layer, 0644); +module_param_cb(debug_level, &acpi_debug_level_ops, &acpi_dbg_level, 0644); static char trace_method_name[6]; module_param_string(trace_method_name, trace_method_name, 6, 0644); @@ -145,7 +157,7 @@ module_param(trace_debug_layer, uint, 0644); static unsigned int trace_debug_level; module_param(trace_debug_level, uint, 0644); -static int param_set_trace_state(const char *val, struct kernel_param *kp) +static int param_set_trace_state(const char *val, const struct kernel_param *kp) { int result = 0; @@ -179,7 +191,7 @@ exit: return result; } -static int param_get_trace_state(char *buffer, struct kernel_param *kp) +static int param_get_trace_state(char *buffer, const struct kernel_param *kp) { if (!acpi_gbl_trace_method_name) return sprintf(buffer, "disable"); @@ -192,8 +204,12 @@ static int param_get_trace_state(char *buffer, struct kernel_param *kp) return 0; } -module_param_call(trace_state, param_set_trace_state, param_get_trace_state, - NULL, 0644); +static struct kernel_param_ops param_ops_trace_state = { + .set = param_set_trace_state, + .get = param_get_trace_state, +}; + +module_param_cb(trace_state, ¶m_ops_trace_state, NULL, 0644); /* -------------------------------------------------------------------------- FS Interface (/proc) diff --git a/drivers/char/hvc_iucv.c b/drivers/char/hvc_iucv.c index b8a5d654d3d0..448788fc2bf2 100644 --- a/drivers/char/hvc_iucv.c +++ b/drivers/char/hvc_iucv.c @@ -1146,7 +1146,7 @@ out_err: * Note: If it is called early in the boot process, @val is stored and * parsed later in hvc_iucv_init(). */ -static int param_set_vmidfilter(const char *val, struct kernel_param *kp) +static int param_set_vmidfilter(const char *val, const struct kernel_param *kp) { int rc; @@ -1173,7 +1173,7 @@ static int param_set_vmidfilter(const char *val, struct kernel_param *kp) * The function stores the filter as a comma-separated list of z/VM user IDs * in @buffer. Typically, sysfs routines call this function for attr show. */ -static int param_get_vmidfilter(char *buffer, struct kernel_param *kp) +static int param_get_vmidfilter(char *buffer, const struct kernel_param *kp) { int rc; size_t index, len; @@ -1200,6 +1200,11 @@ static int param_get_vmidfilter(char *buffer, struct kernel_param *kp) #define param_check_vmidfilter(name, p) __param_check(name, p, void) +static struct kernel_param_ops param_ops_vmidfilter = { + .set = param_set_vmidfilter, + .get = param_get_vmidfilter, +}; + /** * hvc_iucv_init() - z/VM IUCV HVC device driver initialization */ diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index a4d57e31f713..45999d81004f 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -196,7 +196,7 @@ static void ipmi_unregister_watchdog(int ipmi_intf); */ static int start_now; -static int set_param_int(const char *val, struct kernel_param *kp) +static int set_param_timeout(const char *val, const struct kernel_param *kp) { char *endp; int l; @@ -215,10 +215,11 @@ static int set_param_int(const char *val, struct kernel_param *kp) return rv; } -static int get_param_int(char *buffer, struct kernel_param *kp) -{ - return sprintf(buffer, "%i", *((int *)kp->arg)); -} +static struct kernel_param_ops param_ops_timeout = { + .set = set_param_timeout, + .get = param_get_int, +}; +#define param_check_timeout param_check_int typedef int (*action_fn)(const char *intval, char *outval); @@ -227,7 +228,7 @@ static int preaction_op(const char *inval, char *outval); static int preop_op(const char *inval, char *outval); static void check_parms(void); -static int set_param_str(const char *val, struct kernel_param *kp) +static int set_param_str(const char *val, const struct kernel_param *kp) { action_fn fn = (action_fn) kp->arg; int rv = 0; @@ -251,7 +252,7 @@ static int set_param_str(const char *val, struct kernel_param *kp) return rv; } -static int get_param_str(char *buffer, struct kernel_param *kp) +static int get_param_str(char *buffer, const struct kernel_param *kp) { action_fn fn = (action_fn) kp->arg; int rv; @@ -263,7 +264,7 @@ static int get_param_str(char *buffer, struct kernel_param *kp) } -static int set_param_wdog_ifnum(const char *val, struct kernel_param *kp) +static int set_param_wdog_ifnum(const char *val, const struct kernel_param *kp) { int rv = param_set_int(val, kp); if (rv) @@ -276,27 +277,38 @@ static int set_param_wdog_ifnum(const char *val, struct kernel_param *kp) return 0; } -module_param_call(ifnum_to_use, set_param_wdog_ifnum, get_param_int, - &ifnum_to_use, 0644); +static struct kernel_param_ops param_ops_wdog_ifnum = { + .set = set_param_wdog_ifnum, + .get = param_get_int, +}; + +#define param_check_wdog_ifnum param_check_int + +static struct kernel_param_ops param_ops_str = { + .set = set_param_str, + .get = get_param_str, +}; + +module_param(ifnum_to_use, wdog_ifnum, 0644); MODULE_PARM_DESC(ifnum_to_use, "The interface number to use for the watchdog " "timer. Setting to -1 defaults to the first registered " "interface"); -module_param_call(timeout, set_param_int, get_param_int, &timeout, 0644); +module_param(timeout, timeout, 0644); MODULE_PARM_DESC(timeout, "Timeout value in seconds."); -module_param_call(pretimeout, set_param_int, get_param_int, &pretimeout, 0644); +module_param(pretimeout, timeout, 0644); MODULE_PARM_DESC(pretimeout, "Pretimeout value in seconds."); -module_param_call(action, set_param_str, get_param_str, action_op, 0644); +module_param_cb(action, ¶m_ops_str, action_op, 0644); MODULE_PARM_DESC(action, "Timeout action. One of: " "reset, none, power_cycle, power_off."); -module_param_call(preaction, set_param_str, get_param_str, preaction_op, 0644); +module_param_cb(preaction, ¶m_ops_str, preaction_op, 0644); MODULE_PARM_DESC(preaction, "Pretimeout action. One of: " "pre_none, pre_smi, pre_nmi, pre_int."); -module_param_call(preop, set_param_str, get_param_str, preop_op, 0644); +module_param_cb(preop, ¶m_ops_str, preop_op, 0644); MODULE_PARM_DESC(preop, "Pretimeout driver operation. One of: " "preop_none, preop_panic, preop_give_data."); diff --git a/drivers/dio/dio-driver.c b/drivers/dio/dio-driver.c index 9c0c9afcd0ac..a7b174ef4c85 100644 --- a/drivers/dio/dio-driver.c +++ b/drivers/dio/dio-driver.c @@ -140,5 +140,4 @@ postcore_initcall(dio_driver_init); EXPORT_SYMBOL(dio_match_device); EXPORT_SYMBOL(dio_register_driver); EXPORT_SYMBOL(dio_unregister_driver); -EXPORT_SYMBOL(dio_dev_driver); EXPORT_SYMBOL(dio_bus_type); diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 16d056939f9f..2c27a526025f 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -178,7 +178,7 @@ EXPORT_SYMBOL_GPL(ide_pci_clk); module_param_named(pci_clock, ide_pci_clk, int, 0); MODULE_PARM_DESC(pci_clock, "PCI bus clock frequency (in MHz)"); -static int ide_set_dev_param_mask(const char *s, struct kernel_param *kp) +static int ide_set_dev_param_mask(const char *s, const struct kernel_param *kp) { int a, b, i, j = 1; unsigned int *dev_param_mask = (unsigned int *)kp->arg; @@ -201,34 +201,40 @@ static int ide_set_dev_param_mask(const char *s, struct kernel_param *kp) return 0; } +static struct kernel_param_ops param_ops_ide_dev_mask = { + .set = ide_set_dev_param_mask +}; + +#define param_check_ide_dev_mask(name, p) param_check_uint(name, p) + static unsigned int ide_nodma; -module_param_call(nodma, ide_set_dev_param_mask, NULL, &ide_nodma, 0); +module_param_named(nodma, ide_nodma, ide_dev_mask, 0); MODULE_PARM_DESC(nodma, "disallow DMA for a device"); static unsigned int ide_noflush; -module_param_call(noflush, ide_set_dev_param_mask, NULL, &ide_noflush, 0); +module_param_named(noflush, ide_noflush, ide_dev_mask, 0); MODULE_PARM_DESC(noflush, "disable flush requests for a device"); static unsigned int ide_nohpa; -module_param_call(nohpa, ide_set_dev_param_mask, NULL, &ide_nohpa, 0); +module_param_named(nohpa, ide_nohpa, ide_dev_mask, 0); MODULE_PARM_DESC(nohpa, "disable Host Protected Area for a device"); static unsigned int ide_noprobe; -module_param_call(noprobe, ide_set_dev_param_mask, NULL, &ide_noprobe, 0); +module_param_named(noprobe, ide_noprobe, ide_dev_mask, 0); MODULE_PARM_DESC(noprobe, "skip probing for a device"); static unsigned int ide_nowerr; -module_param_call(nowerr, ide_set_dev_param_mask, NULL, &ide_nowerr, 0); +module_param_named(nowerr, ide_nowerr, ide_dev_mask, 0); MODULE_PARM_DESC(nowerr, "ignore the ATA_DF bit for a device"); static unsigned int ide_cdroms; -module_param_call(cdrom, ide_set_dev_param_mask, NULL, &ide_cdroms, 0); +module_param_named(cdrom, ide_cdroms, ide_dev_mask, 0); MODULE_PARM_DESC(cdrom, "force device as a CD-ROM"); struct chs_geom { diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c index 1f20a042a4f5..dd253002cd50 100644 --- a/drivers/idle/i7300_idle.c +++ b/drivers/idle/i7300_idle.c @@ -81,7 +81,7 @@ static u8 i7300_idle_thrtctl_saved; static u8 i7300_idle_thrtlow_saved; static u32 i7300_idle_mc_saved; -static cpumask_t idle_cpumask; +static cpumask_var_t idle_cpumask; static ktime_t start_ktime; static unsigned long avg_idle_us; @@ -459,9 +459,9 @@ static int i7300_idle_notifier(struct notifier_block *nb, unsigned long val, spin_lock_irqsave(&i7300_idle_lock, flags); if (val == IDLE_START) { - cpu_set(smp_processor_id(), idle_cpumask); + cpumask_set_cpu(smp_processor_id(), idle_cpumask); - if (cpus_weight(idle_cpumask) != num_online_cpus()) + if (cpumask_weight(idle_cpumask) != num_online_cpus()) goto end; now_ktime = ktime_get(); @@ -478,8 +478,8 @@ static int i7300_idle_notifier(struct notifier_block *nb, unsigned long val, i7300_idle_ioat_start(); } else if (val == IDLE_END) { - cpu_clear(smp_processor_id(), idle_cpumask); - if (cpus_weight(idle_cpumask) == (num_online_cpus() - 1)) { + cpumask_clear_cpu(smp_processor_id(), idle_cpumask); + if (cpumask_weight(idle_cpumask) == (num_online_cpus() - 1)) { /* First CPU coming out of idle */ u64 idle_duration_us; @@ -553,7 +553,6 @@ struct debugfs_file_info { static int __init i7300_idle_init(void) { spin_lock_init(&i7300_idle_lock); - cpus_clear(idle_cpumask); total_us = 0; if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload)) @@ -565,6 +564,9 @@ static int __init i7300_idle_init(void) if (i7300_idle_ioat_init()) return -ENODEV; + if (!zalloc_cpumask_var(&idle_cpumask, GFP_KERNEL)) + return -ENOMEM; + debugfs_dir = debugfs_create_dir("i7300_idle", NULL); if (debugfs_dir) { int i = 0; @@ -589,6 +591,7 @@ static int __init i7300_idle_init(void) static void __exit i7300_idle_exit(void) { idle_notifier_unregister(&i7300_idle_nb); + free_cpumask_var(idle_cpumask); if (debugfs_dir) { int i = 0; diff --git a/drivers/input/misc/ati_remote2.c b/drivers/input/misc/ati_remote2.c index 0501f0e65157..3665e3975158 100644 --- a/drivers/input/misc/ati_remote2.c +++ b/drivers/input/misc/ati_remote2.c @@ -37,7 +37,8 @@ enum { }; static int ati_remote2_set_mask(const char *val, - struct kernel_param *kp, unsigned int max) + const struct kernel_param *kp, + unsigned int max) { unsigned long mask; int ret; @@ -58,28 +59,31 @@ static int ati_remote2_set_mask(const char *val, } static int ati_remote2_set_channel_mask(const char *val, - struct kernel_param *kp) + const struct kernel_param *kp) { pr_debug("%s()\n", __func__); return ati_remote2_set_mask(val, kp, ATI_REMOTE2_MAX_CHANNEL_MASK); } -static int ati_remote2_get_channel_mask(char *buffer, struct kernel_param *kp) +static int ati_remote2_get_channel_mask(char *buffer, + const struct kernel_param *kp) { pr_debug("%s()\n", __func__); return sprintf(buffer, "0x%04x", *(unsigned int *)kp->arg); } -static int ati_remote2_set_mode_mask(const char *val, struct kernel_param *kp) +static int ati_remote2_set_mode_mask(const char *val, + const struct kernel_param *kp) { pr_debug("%s()\n", __func__); return ati_remote2_set_mask(val, kp, ATI_REMOTE2_MAX_MODE_MASK); } -static int ati_remote2_get_mode_mask(char *buffer, struct kernel_param *kp) +static int ati_remote2_get_mode_mask(char *buffer, + const struct kernel_param *kp) { pr_debug("%s()\n", __func__); @@ -88,15 +92,19 @@ static int ati_remote2_get_mode_mask(char *buffer, struct kernel_param *kp) static unsigned int channel_mask = ATI_REMOTE2_MAX_CHANNEL_MASK; #define param_check_channel_mask(name, p) __param_check(name, p, unsigned int) -#define param_set_channel_mask ati_remote2_set_channel_mask -#define param_get_channel_mask ati_remote2_get_channel_mask +static struct kernel_param_ops param_ops_channel_mask = { + .set = ati_remote2_set_channel_mask, + .get = ati_remote2_get_channel_mask, +}; module_param(channel_mask, channel_mask, 0644); MODULE_PARM_DESC(channel_mask, "Bitmask of channels to accept <15:Channel16>...<1:Channel2><0:Channel1>"); static unsigned int mode_mask = ATI_REMOTE2_MAX_MODE_MASK; #define param_check_mode_mask(name, p) __param_check(name, p, unsigned int) -#define param_set_mode_mask ati_remote2_set_mode_mask -#define param_get_mode_mask ati_remote2_get_mode_mask +static struct kernel_param_ops param_ops_mode_mask = { + .set = ati_remote2_set_mode_mask, + .get = ati_remote2_get_mode_mask, +}; module_param(mode_mask, mode_mask, 0644); MODULE_PARM_DESC(mode_mask, "Bitmask of modes to accept <4:PC><3:AUX4><2:AUX3><1:AUX2><0:AUX1>"); diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c index 07c53798301a..e4570dbff543 100644 --- a/drivers/input/mouse/psmouse-base.c +++ b/drivers/input/mouse/psmouse-base.c @@ -39,11 +39,13 @@ MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); static unsigned int psmouse_max_proto = PSMOUSE_AUTO; -static int psmouse_set_maxproto(const char *val, struct kernel_param *kp); -static int psmouse_get_maxproto(char *buffer, struct kernel_param *kp); +static int psmouse_set_maxproto(const char *val, const struct kernel_param *); +static int psmouse_get_maxproto(char *buffer, const struct kernel_param *kp); +static struct kernel_param_ops param_ops_proto_abbrev = { + .set = psmouse_set_maxproto, + .get = psmouse_get_maxproto, +}; #define param_check_proto_abbrev(name, p) __param_check(name, p, unsigned int) -#define param_set_proto_abbrev psmouse_set_maxproto -#define param_get_proto_abbrev psmouse_get_maxproto module_param_named(proto, psmouse_max_proto, proto_abbrev, 0644); MODULE_PARM_DESC(proto, "Highest protocol extension to probe (bare, imps, exps, any). Useful for KVM switches."); @@ -1652,7 +1654,7 @@ static ssize_t psmouse_attr_set_resolution(struct psmouse *psmouse, void *data, } -static int psmouse_set_maxproto(const char *val, struct kernel_param *kp) +static int psmouse_set_maxproto(const char *val, const struct kernel_param *kp) { const struct psmouse_protocol *proto; @@ -1669,7 +1671,7 @@ static int psmouse_set_maxproto(const char *val, struct kernel_param *kp) return 0; } -static int psmouse_get_maxproto(char *buffer, struct kernel_param *kp) +static int psmouse_get_maxproto(char *buffer, const struct kernel_param *kp) { int type = *((unsigned int *)kp->arg); diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c index 610e914abe6c..2b6f750ee394 100644 --- a/drivers/message/fusion/mptbase.c +++ b/drivers/message/fusion/mptbase.c @@ -109,8 +109,7 @@ MODULE_PARM_DESC(mpt_debug_level, " debug level - refer to mptdebug.h \ int mpt_fwfault_debug; EXPORT_SYMBOL(mpt_fwfault_debug); -module_param_call(mpt_fwfault_debug, param_set_int, param_get_int, - &mpt_fwfault_debug, 0600); +module_param(mpt_fwfault_debug, int, 0600); MODULE_PARM_DESC(mpt_fwfault_debug, "Enable detection of Firmware fault" " and halt Firmware on fault - (default=0)"); diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c index 3648b23d5c92..42660e55119e 100644 --- a/drivers/misc/lkdtm.c +++ b/drivers/misc/lkdtm.c @@ -120,9 +120,9 @@ static int count = DEFAULT_COUNT; module_param(recur_count, int, 0644); MODULE_PARM_DESC(recur_count, " Recursion level for the stack overflow test, "\ "default is 10"); -module_param(cpoint_name, charp, 0644); +module_param(cpoint_name, charp, 0444); MODULE_PARM_DESC(cpoint_name, " Crash Point, where kernel is to be crashed"); -module_param(cpoint_type, charp, 0644); +module_param(cpoint_type, charp, 0444); MODULE_PARM_DESC(cpoint_type, " Crash Point Type, action to be taken on "\ "hitting the crash point"); module_param(cpoint_count, int, 0644); diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c index 85e1b6a3ac1b..926c7215c667 100644 --- a/drivers/net/myri10ge/myri10ge.c +++ b/drivers/net/myri10ge/myri10ge.c @@ -236,6 +236,7 @@ struct myri10ge_priv { int watchdog_resets; int watchdog_pause; int pause; + bool fw_name_allocated; char *fw_name; char eeprom_strings[MYRI10GE_EEPROM_STRINGS_SIZE]; char *product_code_string; @@ -268,6 +269,7 @@ MODULE_FIRMWARE("myri10ge_eth_z8e.dat"); MODULE_FIRMWARE("myri10ge_rss_ethp_z8e.dat"); MODULE_FIRMWARE("myri10ge_rss_eth_z8e.dat"); +/* Careful: must be accessed under kparam_block_sysfs_write */ static char *myri10ge_fw_name = NULL; module_param(myri10ge_fw_name, charp, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image name"); @@ -373,6 +375,14 @@ static inline void put_be32(__be32 val, __be32 __iomem * p) static struct net_device_stats *myri10ge_get_stats(struct net_device *dev); +static void set_fw_name(struct myri10ge_priv *mgp, char *name, bool allocated) +{ + if (mgp->fw_name_allocated) + kfree(mgp->fw_name); + mgp->fw_name = name; + mgp->fw_name_allocated = allocated; +} + static int myri10ge_send_cmd(struct myri10ge_priv *mgp, u32 cmd, struct myri10ge_cmd *data, int atomic) @@ -744,7 +754,7 @@ static int myri10ge_load_firmware(struct myri10ge_priv *mgp, int adopt) dev_warn(&mgp->pdev->dev, "via hotplug\n"); } - mgp->fw_name = "adopted"; + set_fw_name(mgp, "adopted", false); mgp->tx_boundary = 2048; myri10ge_dummy_rdma(mgp, 1); status = myri10ge_get_firmware_capabilities(mgp); @@ -3260,7 +3270,7 @@ static void myri10ge_firmware_probe(struct myri10ge_priv *mgp) * load the optimized firmware (which assumes aligned PCIe * completions) in order to see if it works on this host. */ - mgp->fw_name = myri10ge_fw_aligned; + set_fw_name(mgp, myri10ge_fw_aligned, false); status = myri10ge_load_firmware(mgp, 1); if (status != 0) { goto abort; @@ -3288,7 +3298,7 @@ static void myri10ge_firmware_probe(struct myri10ge_priv *mgp) abort: /* fall back to using the unaligned firmware */ mgp->tx_boundary = 2048; - mgp->fw_name = myri10ge_fw_unaligned; + set_fw_name(mgp, myri10ge_fw_unaligned, false); } @@ -3311,7 +3321,7 @@ static void myri10ge_select_firmware(struct myri10ge_priv *mgp) dev_info(&mgp->pdev->dev, "PCIE x%d Link\n", link_width); mgp->tx_boundary = 4096; - mgp->fw_name = myri10ge_fw_aligned; + set_fw_name(mgp, myri10ge_fw_aligned, false); } else { myri10ge_firmware_probe(mgp); } @@ -3320,22 +3330,29 @@ static void myri10ge_select_firmware(struct myri10ge_priv *mgp) dev_info(&mgp->pdev->dev, "Assuming aligned completions (forced)\n"); mgp->tx_boundary = 4096; - mgp->fw_name = myri10ge_fw_aligned; + set_fw_name(mgp, myri10ge_fw_aligned, false); } else { dev_info(&mgp->pdev->dev, "Assuming unaligned completions (forced)\n"); mgp->tx_boundary = 2048; - mgp->fw_name = myri10ge_fw_unaligned; + set_fw_name(mgp, myri10ge_fw_unaligned, false); } } + + kparam_block_sysfs_write(myri10ge_fw_name); if (myri10ge_fw_name != NULL) { - overridden = 1; - mgp->fw_name = myri10ge_fw_name; + char *fw_name = kstrdup(myri10ge_fw_name, GFP_KERNEL); + if (fw_name) { + overridden = 1; + set_fw_name(mgp, fw_name, true); + } } + kparam_unblock_sysfs_write(myri10ge_fw_name); + if (mgp->board_number < MYRI10GE_MAX_BOARDS && myri10ge_fw_names[mgp->board_number] != NULL && strlen(myri10ge_fw_names[mgp->board_number])) { - mgp->fw_name = myri10ge_fw_names[mgp->board_number]; + set_fw_name(mgp, myri10ge_fw_names[mgp->board_number], false); overridden = 1; } if (overridden) @@ -3699,6 +3716,7 @@ static void myri10ge_probe_slices(struct myri10ge_priv *mgp) struct myri10ge_cmd cmd; struct pci_dev *pdev = mgp->pdev; char *old_fw; + bool old_allocated; int i, status, ncpus, msix_cap; mgp->num_slices = 1; @@ -3711,17 +3729,23 @@ static void myri10ge_probe_slices(struct myri10ge_priv *mgp) /* try to load the slice aware rss firmware */ old_fw = mgp->fw_name; + old_allocated = mgp->fw_name_allocated; + /* don't free old_fw if we override it. */ + mgp->fw_name_allocated = false; + if (myri10ge_fw_name != NULL) { dev_info(&mgp->pdev->dev, "overriding rss firmware to %s\n", myri10ge_fw_name); - mgp->fw_name = myri10ge_fw_name; + set_fw_name(mgp, myri10ge_fw_name, false); } else if (old_fw == myri10ge_fw_aligned) - mgp->fw_name = myri10ge_fw_rss_aligned; + set_fw_name(mgp, myri10ge_fw_rss_aligned, false); else - mgp->fw_name = myri10ge_fw_rss_unaligned; + set_fw_name(mgp, myri10ge_fw_rss_unaligned, false); status = myri10ge_load_firmware(mgp, 0); if (status != 0) { dev_info(&pdev->dev, "Rss firmware not found\n"); + if (old_allocated) + kfree(old_fw); return; } @@ -3787,6 +3811,8 @@ static void myri10ge_probe_slices(struct myri10ge_priv *mgp) mgp->num_slices); if (status == 0) { pci_disable_msix(pdev); + if (old_allocated) + kfree(old_fw); return; } if (status > 0) @@ -3803,7 +3829,7 @@ disable_msix: abort_with_fw: mgp->num_slices = 1; - mgp->fw_name = old_fw; + set_fw_name(mgp, old_fw, old_allocated); myri10ge_load_firmware(mgp, 0); } @@ -4033,6 +4059,7 @@ abort_with_enabled: pci_disable_device(pdev); abort_with_netdev: + set_fw_name(mgp, NULL, false); free_netdev(netdev); return status; } @@ -4077,6 +4104,7 @@ static void myri10ge_remove(struct pci_dev *pdev) dma_free_coherent(&pdev->dev, sizeof(*mgp->cmd), mgp->cmd, mgp->cmd_bus); + set_fw_name(mgp, NULL, false); free_netdev(netdev); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 01e99f22210e..9142bfd3bab7 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -144,6 +144,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file) err = 0; tfile->tun = tun; tun->tfile = tfile; + tun->socket.file = file; dev_hold(tun->dev); sock_hold(tun->socket.sk); atomic_inc(&tfile->count); @@ -158,6 +159,7 @@ static void __tun_detach(struct tun_struct *tun) /* Detach from net device */ netif_tx_lock_bh(tun->dev); tun->tfile = NULL; + tun->socket.file = NULL; netif_tx_unlock_bh(tun->dev); /* Drop read queue */ @@ -387,7 +389,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* Notify and wake up reader process */ if (tun->flags & TUN_FASYNC) kill_fasync(&tun->fasync, SIGIO, POLL_IN); - wake_up_interruptible(&tun->socket.wait); + wake_up_interruptible_poll(&tun->socket.wait, POLLIN | + POLLRDNORM | POLLRDBAND); return NETDEV_TX_OK; drop: @@ -743,7 +746,7 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun, len = min_t(int, skb->len, len); skb_copy_datagram_const_iovec(skb, 0, iv, total, len); - total += len; + total += skb->len; tun->dev->stats.tx_packets++; tun->dev->stats.tx_bytes += len; @@ -751,34 +754,23 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun, return total; } -static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, - unsigned long count, loff_t pos) +static ssize_t tun_do_read(struct tun_struct *tun, + struct kiocb *iocb, const struct iovec *iv, + ssize_t len, int noblock) { - struct file *file = iocb->ki_filp; - struct tun_file *tfile = file->private_data; - struct tun_struct *tun = __tun_get(tfile); DECLARE_WAITQUEUE(wait, current); struct sk_buff *skb; - ssize_t len, ret = 0; - - if (!tun) - return -EBADFD; + ssize_t ret = 0; DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name); - len = iov_length(iv, count); - if (len < 0) { - ret = -EINVAL; - goto out; - } - add_wait_queue(&tun->socket.wait, &wait); while (len) { current->state = TASK_INTERRUPTIBLE; /* Read frames from the queue */ if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) { - if (file->f_flags & O_NONBLOCK) { + if (noblock) { ret = -EAGAIN; break; } @@ -805,6 +797,27 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, current->state = TASK_RUNNING; remove_wait_queue(&tun->socket.wait, &wait); + return ret; +} + +static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, + unsigned long count, loff_t pos) +{ + struct file *file = iocb->ki_filp; + struct tun_file *tfile = file->private_data; + struct tun_struct *tun = __tun_get(tfile); + ssize_t len, ret; + + if (!tun) + return -EBADFD; + len = iov_length(iv, count); + if (len < 0) { + ret = -EINVAL; + goto out; + } + + ret = tun_do_read(tun, iocb, iv, len, file->f_flags & O_NONBLOCK); + ret = min_t(ssize_t, ret, len); out: tun_put(tun); return ret; @@ -847,7 +860,8 @@ static void tun_sock_write_space(struct sock *sk) return; if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_sync(sk->sk_sleep); + wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT | + POLLWRNORM | POLLWRBAND); tun = container_of(sk, struct tun_sock, sk)->tun; kill_fasync(&tun->fasync, SIGIO, POLL_OUT); @@ -858,6 +872,37 @@ static void tun_sock_destruct(struct sock *sk) free_netdev(container_of(sk, struct tun_sock, sk)->tun->dev); } +static int tun_sendmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t total_len) +{ + struct tun_struct *tun = container_of(sock, struct tun_struct, socket); + return tun_get_user(tun, m->msg_iov, total_len, + m->msg_flags & MSG_DONTWAIT); +} + +static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t total_len, + int flags) +{ + struct tun_struct *tun = container_of(sock, struct tun_struct, socket); + int ret; + if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) + return -EINVAL; + ret = tun_do_read(tun, iocb, m->msg_iov, total_len, + flags & MSG_DONTWAIT); + if (ret > total_len) { + m->msg_flags |= MSG_TRUNC; + ret = flags & MSG_TRUNC ? ret : total_len; + } + return ret; +} + +/* Ops structure to mimic raw sockets with tun */ +static const struct proto_ops tun_socket_ops = { + .sendmsg = tun_sendmsg, + .recvmsg = tun_recvmsg, +}; + static struct proto tun_proto = { .name = "tun", .owner = THIS_MODULE, @@ -986,6 +1031,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) goto err_free_dev; init_waitqueue_head(&tun->socket.wait); + tun->socket.ops = &tun_socket_ops; sock_init_data(&tun->socket, sk); sk->sk_write_space = tun_sock_write_space; sk->sk_sndbuf = INT_MAX; @@ -1525,6 +1571,23 @@ static void tun_cleanup(void) rtnl_link_unregister(&tun_link_ops); } +/* Get an underlying socket object from tun file. Returns error unless file is + * attached to a device. The returned object works like a packet socket, it + * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for + * holding a reference to the file for as long as the socket is in use. */ +struct socket *tun_get_socket(struct file *file) +{ + struct tun_struct *tun; + if (file->f_op != &tun_fops) + return ERR_PTR(-EINVAL); + tun = tun_get(file); + if (!tun) + return ERR_PTR(-EBADFD); + tun_put(tun); + return &tun->socket; +} +EXPORT_SYMBOL_GPL(tun_get_socket); + module_init(tun_init); module_exit(tun_cleanup); MODULE_DESCRIPTION(DRV_DESCRIPTION); diff --git a/drivers/net/wireless/libertas/if_sdio.c b/drivers/net/wireless/libertas/if_sdio.c index 09fcfad742e7..71d27ccec630 100644 --- a/drivers/net/wireless/libertas/if_sdio.c +++ b/drivers/net/wireless/libertas/if_sdio.c @@ -122,6 +122,8 @@ struct if_sdio_card { const char *helper; const char *firmware; + bool helper_allocated; + bool firmware_allocated; u8 buffer[65536]; @@ -1001,16 +1003,34 @@ static int if_sdio_probe(struct sdio_func *func, card->helper = if_sdio_models[i].helper; card->firmware = if_sdio_models[i].firmware; + kparam_block_sysfs_write(helper_name); if (lbs_helper_name) { + char *helper = kstrdup(lbs_helper_name, GFP_KERNEL); + if (!helper) { + kparam_unblock_sysfs_write(helper_name); + ret = -ENOMEM; + goto free; + } lbs_deb_sdio("overriding helper firmware: %s\n", lbs_helper_name); - card->helper = lbs_helper_name; + card->helper = helper; + card->helper_allocated = true; } + kparam_unblock_sysfs_write(helper_name); + kparam_block_sysfs_write(fw_name); if (lbs_fw_name) { + char *fw_name = kstrdup(lbs_fw_name, GFP_KERNEL); + if (!fw_name) { + kparam_unblock_sysfs_write(fw_name); + ret = -ENOMEM; + goto free; + } lbs_deb_sdio("overriding firmware: %s\n", lbs_fw_name); - card->firmware = lbs_fw_name; + card->firmware = fw_name; + card->firmware_allocated = true; } + kparam_unblock_sysfs_write(fw_name); sdio_claim_host(func); @@ -1125,6 +1145,10 @@ free: kfree(packet); } + if (card->helper_allocated) + kfree(card->helper); + if (card->firmware_allocated) + kfree(card->firmware); kfree(card); goto out; @@ -1175,6 +1199,10 @@ static void if_sdio_remove(struct sdio_func *func) kfree(packet); } + if (card->helper_allocated) + kfree(card->helper); + if (card->firmware_allocated) + kfree(card->firmware); kfree(card); lbs_deb_leave(LBS_DEB_SDIO); diff --git a/drivers/net/wireless/libertas/if_usb.c b/drivers/net/wireless/libertas/if_usb.c index 65e174595d12..a41007ebe1f3 100644 --- a/drivers/net/wireless/libertas/if_usb.c +++ b/drivers/net/wireless/libertas/if_usb.c @@ -290,10 +290,13 @@ static int if_usb_probe(struct usb_interface *intf, } /* Upload firmware */ + kparam_block_sysfs_write(fw_name); if (__if_usb_prog_firmware(cardp, lbs_fw_name, BOOT_CMD_FW_BY_USB)) { + kparam_unblock_sysfs_write(fw_name); lbs_deb_usbd(&udev->dev, "FW upload failed\n"); goto err_prog_firmware; } + kparam_unblock_sysfs_write(fw_name); if (!(priv = lbs_add_card(cardp, &udev->dev))) goto err_prog_firmware; diff --git a/drivers/net/wireless/libertas_tf/if_usb.c b/drivers/net/wireless/libertas_tf/if_usb.c index 3691c307e674..669a3de68508 100644 --- a/drivers/net/wireless/libertas_tf/if_usb.c +++ b/drivers/net/wireless/libertas_tf/if_usb.c @@ -650,12 +650,15 @@ static int if_usb_prog_firmware(struct if_usb_card *cardp) static int reset_count = 10; int ret = 0; + kparam_block_sysfs_write(fw_name); ret = request_firmware(&cardp->fw, lbtf_fw_name, &cardp->udev->dev); if (ret < 0) { printk(KERN_INFO "libertastf: firmware %s not found\n", lbtf_fw_name); + kparam_unblock_sysfs_write(fw_name); goto done; } + kparam_unblock_sysfs_write(fw_name); if (check_fwfile_format(cardp->fw->data, cardp->fw->size)) goto release_fw; diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c index b52b773d49d9..c7307167e33c 100644 --- a/drivers/scsi/bfa/bfad.c +++ b/drivers/scsi/bfa/bfad.c @@ -726,6 +726,7 @@ bfad_drv_init(struct bfad_s *bfad) memset(&driver_info, 0, sizeof(driver_info)); strncpy(driver_info.version, BFAD_DRIVER_VERSION, sizeof(driver_info.version) - 1); + __kernel_param_lock(); if (host_name) strncpy(driver_info.host_machine_name, host_name, sizeof(driver_info.host_machine_name) - 1); @@ -735,6 +736,7 @@ bfad_drv_init(struct bfad_s *bfad) if (os_patch) strncpy(driver_info.host_os_patch, os_patch, sizeof(driver_info.host_os_patch) - 1); + __kernel_param_unlock(); strncpy(driver_info.os_device_name, bfad->pci_name, sizeof(driver_info.os_device_name - 1)); diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index 4a43b74c0d27..0b728aa885bb 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -1227,7 +1227,7 @@ int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev, "CPU.\n"); spin_unlock_bh(&fps->fcoe_rx_list.lock); - cpu = first_cpu(cpu_online_map); + cpu = cpumask_first(cpu_online_mask); fps = &per_cpu(fcoe_percpu, cpu); spin_lock_bh(&fps->fcoe_rx_list.lock); if (!fps->thread) { diff --git a/drivers/staging/rtl8187se/r8180_core.c b/drivers/staging/rtl8187se/r8180_core.c index 53e654d0d4fa..9ca3c6f84dc7 100644 --- a/drivers/staging/rtl8187se/r8180_core.c +++ b/drivers/staging/rtl8187se/r8180_core.c @@ -66,7 +66,7 @@ static struct pci_device_id rtl8180_pci_id_tbl[] __devinitdata = { }; -static char* ifname = "wlan%d"; +static char ifname[IFNAMSIZ] = "wlan%d"; static int hwseqnum = 0; static int hwwep = 0; static int channels = 0x3fff; @@ -79,7 +79,7 @@ MODULE_AUTHOR("Andrea Merello <andreamrl@tiscali.it>"); MODULE_DESCRIPTION("Linux driver for Realtek RTL8180 / RTL8185 WiFi cards"); -module_param(ifname, charp, S_IRUGO|S_IWUSR ); +module_param_string(ifname, ifname, sizef(ifname), S_IRUGO|S_IWUSR); module_param(hwseqnum,int, S_IRUGO|S_IWUSR); module_param(hwwep,int, S_IRUGO|S_IWUSR); module_param(channels,int, S_IRUGO|S_IWUSR); @@ -3918,7 +3918,7 @@ static int __devinit rtl8180_pci_probe(struct pci_dev *pdev, if (dev_alloc_name(dev, ifname) < 0){ DMESG("Oops: devname already taken! Trying wlan%%d...\n"); - ifname = "wlan%d"; + strcpy(ifname, "wlan%d"); dev_alloc_name(dev, ifname); } diff --git a/drivers/staging/rtl8192e/r8192E_core.c b/drivers/staging/rtl8192e/r8192E_core.c index b0802a7aeb5f..8f4637ad466a 100644 --- a/drivers/staging/rtl8192e/r8192E_core.c +++ b/drivers/staging/rtl8192e/r8192E_core.c @@ -115,7 +115,7 @@ static struct pci_device_id rtl8192_pci_id_tbl[] __devinitdata = { {} }; -static char* ifname = "wlan%d"; +static char ifname[IFNAMSIZ] = "wlan%d"; static int hwwep = 1; //default use hw. set 0 to use software security static int channels = 0x3fff; @@ -126,7 +126,7 @@ MODULE_DEVICE_TABLE(pci, rtl8192_pci_id_tbl); MODULE_DESCRIPTION("Linux driver for Realtek RTL819x WiFi cards"); -module_param(ifname, charp, S_IRUGO|S_IWUSR ); +module_param_string(ifname, ifname, sizef(ifname), S_IRUGO|S_IWUSR); //module_param(hwseqnum,int, S_IRUGO|S_IWUSR); module_param(hwwep,int, S_IRUGO|S_IWUSR); module_param(channels,int, S_IRUGO|S_IWUSR); @@ -5972,7 +5972,7 @@ static int __devinit rtl8192_pci_probe(struct pci_dev *pdev, if (dev_alloc_name(dev, ifname) < 0){ RT_TRACE(COMP_INIT, "Oops: devname already taken! Trying wlan%%d...\n"); - ifname = "wlan%d"; + strcpt(ifname, "wlan%d"); dev_alloc_name(dev, ifname); } diff --git a/drivers/staging/rtl8192su/r8192U_core.c b/drivers/staging/rtl8192su/r8192U_core.c index 66274d7666ff..e24815f52142 100644 --- a/drivers/staging/rtl8192su/r8192U_core.c +++ b/drivers/staging/rtl8192su/r8192U_core.c @@ -136,13 +136,13 @@ MODULE_VERSION("V 1.1"); MODULE_DEVICE_TABLE(usb, rtl8192_usb_id_tbl); MODULE_DESCRIPTION("Linux driver for Realtek RTL8192 USB WiFi cards"); -static char* ifname = "wlan%d"; +static char ifname[IFNAMSIZ] = "wlan%d"; static int hwwep = 1; //default use hw. set 0 to use software security static int channels = 0x3fff; -module_param(ifname, charp, S_IRUGO|S_IWUSR ); +module_param_string(ifname, ifname, sizef(ifname), S_IRUGO|S_IWUSR); //module_param(hwseqnum,int, S_IRUGO|S_IWUSR); module_param(hwwep,int, S_IRUGO|S_IWUSR); module_param(channels,int, S_IRUGO|S_IWUSR); @@ -7463,7 +7463,7 @@ static int __devinit rtl8192_usb_probe(struct usb_interface *intf, if (dev_alloc_name(dev, ifname) < 0){ RT_TRACE(COMP_INIT, "Oops: devname already taken! Trying wlan%%d...\n"); - ifname = "wlan%d"; + strcpy(ifname, "wlan%d"); dev_alloc_name(dev, ifname); } diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c index d171b563e94c..283a178997a5 100644 --- a/drivers/usb/atm/ueagle-atm.c +++ b/drivers/usb/atm/ueagle-atm.c @@ -1550,6 +1550,7 @@ static void cmvs_file_name(struct uea_softc *sc, char *const cmv_name, int ver) char file_arr[] = "CMVxy.bin"; char *file; + kparam_block_sysfs_write(cmv_file); /* set proper name corresponding modem version and line type */ if (cmv_file[sc->modem_index] == NULL) { if (UEA_CHIP_VERSION(sc) == ADI930) @@ -1568,6 +1569,7 @@ static void cmvs_file_name(struct uea_softc *sc, char *const cmv_name, int ver) strlcat(cmv_name, file, UEA_FW_NAME_MAX); if (ver == 2) strlcat(cmv_name, ".v2", UEA_FW_NAME_MAX); + kparam_unblock_sysfs_write(cmv_file); } static int request_cmvs_old(struct uea_softc *sc, diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig new file mode 100644 index 000000000000..9e9355367bb3 --- /dev/null +++ b/drivers/vhost/Kconfig @@ -0,0 +1,11 @@ +config VHOST_NET + tristate "Host kernel accelerator for virtio net (EXPERIMENTAL)" + depends on NET && EVENTFD && (TUN || !TUN) && EXPERIMENTAL + ---help--- + This kernel module can be loaded in host kernel to accelerate + guest networking with virtio_net. Not to be confused with virtio_net + module itself which needs to be loaded in guest kernel. + + To compile this driver as a module, choose M here: the module will + be called vhost_net. + diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile new file mode 100644 index 000000000000..72dd02050bb9 --- /dev/null +++ b/drivers/vhost/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_VHOST_NET) += vhost_net.o +vhost_net-y := vhost.o net.o diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c new file mode 100644 index 000000000000..22d5fef472cc --- /dev/null +++ b/drivers/vhost/net.c @@ -0,0 +1,648 @@ +/* Copyright (C) 2009 Red Hat, Inc. + * Author: Michael S. Tsirkin <mst@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * virtio-net server in host kernel. + */ + +#include <linux/compat.h> +#include <linux/eventfd.h> +#include <linux/vhost.h> +#include <linux/virtio_net.h> +#include <linux/mmu_context.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/workqueue.h> +#include <linux/rcupdate.h> +#include <linux/file.h> + +#include <linux/net.h> +#include <linux/if_packet.h> +#include <linux/if_arp.h> +#include <linux/if_tun.h> + +#include <net/sock.h> + +#include "vhost.h" + +/* Max number of bytes transferred before requeueing the job. + * Using this limit prevents one virtqueue from starving others. */ +#define VHOST_NET_WEIGHT 0x80000 + +enum { + VHOST_NET_VQ_RX = 0, + VHOST_NET_VQ_TX = 1, + VHOST_NET_VQ_MAX = 2, +}; + +enum vhost_net_poll_state { + VHOST_NET_POLL_DISABLED = 0, + VHOST_NET_POLL_STARTED = 1, + VHOST_NET_POLL_STOPPED = 2, +}; + +struct vhost_net { + struct vhost_dev dev; + struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; + struct vhost_poll poll[VHOST_NET_VQ_MAX]; + /* Tells us whether we are polling a socket for TX. + * We only do this when socket buffer fills up. + * Protected by tx vq lock. */ + enum vhost_net_poll_state tx_poll_state; +}; + +/* Pop first len bytes from iovec. Return number of segments used. */ +static int move_iovec_hdr(struct iovec *from, struct iovec *to, + size_t len, int iov_count) +{ + int seg = 0; + size_t size; + while (len && seg < iov_count) { + size = min(from->iov_len, len); + to->iov_base = from->iov_base; + to->iov_len = size; + from->iov_len -= size; + from->iov_base += size; + len -= size; + ++from; + ++to; + ++seg; + } + return seg; +} + +/* Caller must have TX VQ lock */ +static void tx_poll_stop(struct vhost_net *net) +{ + if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED)) + return; + vhost_poll_stop(net->poll + VHOST_NET_VQ_TX); + net->tx_poll_state = VHOST_NET_POLL_STOPPED; +} + +/* Caller must have TX VQ lock */ +static void tx_poll_start(struct vhost_net *net, struct socket *sock) +{ + if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED)) + return; + vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file); + net->tx_poll_state = VHOST_NET_POLL_STARTED; +} + +/* Expects to be always run from workqueue - which acts as + * read-size critical section for our kind of RCU. */ +static void handle_tx(struct vhost_net *net) +{ + struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX]; + unsigned head, out, in, s; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_control = NULL, + .msg_controllen = 0, + .msg_iov = vq->iov, + .msg_flags = MSG_DONTWAIT, + }; + size_t len, total_len = 0; + int err, wmem; + size_t hdr_size; + struct socket *sock = rcu_dereference(vq->private_data); + if (!sock) + return; + + wmem = atomic_read(&sock->sk->sk_wmem_alloc); + if (wmem >= sock->sk->sk_sndbuf) + return; + + use_mm(net->dev.mm); + mutex_lock(&vq->mutex); + vhost_disable_notify(vq); + + if (wmem < sock->sk->sk_sndbuf * 2) + tx_poll_stop(net); + hdr_size = vq->hdr_size; + + for (;;) { + head = vhost_get_vq_desc(&net->dev, vq, vq->iov, + ARRAY_SIZE(vq->iov), + &out, &in, + NULL, NULL); + /* Nothing new? Wait for eventfd to tell us they refilled. */ + if (head == vq->num) { + wmem = atomic_read(&sock->sk->sk_wmem_alloc); + if (wmem >= sock->sk->sk_sndbuf * 3 / 4) { + tx_poll_start(net, sock); + set_bit(SOCK_ASYNC_NOSPACE, &sock->flags); + break; + } + if (unlikely(vhost_enable_notify(vq))) { + vhost_disable_notify(vq); + continue; + } + break; + } + if (in) { + vq_err(vq, "Unexpected descriptor format for TX: " + "out %d, int %d\n", out, in); + break; + } + /* Skip header. TODO: support TSO. */ + s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, out); + msg.msg_iovlen = out; + len = iov_length(vq->iov, out); + /* Sanity check */ + if (!len) { + vq_err(vq, "Unexpected header len for TX: " + "%zd expected %zd\n", + iov_length(vq->hdr, s), hdr_size); + break; + } + /* TODO: Check specific error and bomb out unless ENOBUFS? */ + err = sock->ops->sendmsg(NULL, sock, &msg, len); + if (unlikely(err < 0)) { + vhost_discard_vq_desc(vq); + tx_poll_start(net, sock); + break; + } + if (err != len) + pr_err("Truncated TX packet: " + " len %d != %zd\n", err, len); + vhost_add_used_and_signal(&net->dev, vq, head, 0); + total_len += len; + if (unlikely(total_len >= VHOST_NET_WEIGHT)) { + vhost_poll_queue(&vq->poll); + break; + } + } + + mutex_unlock(&vq->mutex); + unuse_mm(net->dev.mm); +} + +/* Expects to be always run from workqueue - which acts as + * read-size critical section for our kind of RCU. */ +static void handle_rx(struct vhost_net *net) +{ + struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; + unsigned head, out, in, log, s; + struct vhost_log *vq_log; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_control = NULL, /* FIXME: get and handle RX aux data. */ + .msg_controllen = 0, + .msg_iov = vq->iov, + .msg_flags = MSG_DONTWAIT, + }; + + struct virtio_net_hdr hdr = { + .flags = 0, + .gso_type = VIRTIO_NET_HDR_GSO_NONE + }; + + size_t len, total_len = 0; + int err; + size_t hdr_size; + struct socket *sock = rcu_dereference(vq->private_data); + if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) + return; + + use_mm(net->dev.mm); + mutex_lock(&vq->mutex); + vhost_disable_notify(vq); + hdr_size = vq->hdr_size; + + vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? + vq->log : NULL; + + for (;;) { + head = vhost_get_vq_desc(&net->dev, vq, vq->iov, + ARRAY_SIZE(vq->iov), + &out, &in, + vq_log, &log); + /* OK, now we need to know about added descriptors. */ + if (head == vq->num) { + if (unlikely(vhost_enable_notify(vq))) { + /* They have slipped one in as we were + * doing that: check again. */ + vhost_disable_notify(vq); + continue; + } + /* Nothing new? Wait for eventfd to tell us + * they refilled. */ + break; + } + /* We don't need to be notified again. */ + if (out) { + vq_err(vq, "Unexpected descriptor format for RX: " + "out %d, int %d\n", + out, in); + break; + } + /* Skip header. TODO: support TSO/mergeable rx buffers. */ + s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in); + msg.msg_iovlen = in; + len = iov_length(vq->iov, in); + /* Sanity check */ + if (!len) { + vq_err(vq, "Unexpected header len for RX: " + "%zd expected %zd\n", + iov_length(vq->hdr, s), hdr_size); + break; + } + err = sock->ops->recvmsg(NULL, sock, &msg, + len, MSG_DONTWAIT | MSG_TRUNC); + /* TODO: Check specific error and bomb out unless EAGAIN? */ + if (err < 0) { + vhost_discard_vq_desc(vq); + break; + } + /* TODO: Should check and handle checksum. */ + if (err > len) { + pr_err("Discarded truncated rx packet: " + " len %d > %zd\n", err, len); + vhost_discard_vq_desc(vq); + continue; + } + len = err; + err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size); + if (err) { + vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n", + vq->iov->iov_base, err); + break; + } + len += hdr_size; + vhost_add_used_and_signal(&net->dev, vq, head, len); + if (unlikely(vq_log)) + vhost_log_write(vq, vq_log, log, len); + total_len += len; + if (unlikely(total_len >= VHOST_NET_WEIGHT)) { + vhost_poll_queue(&vq->poll); + break; + } + } + + mutex_unlock(&vq->mutex); + unuse_mm(net->dev.mm); +} + +static void handle_tx_kick(struct work_struct *work) +{ + struct vhost_virtqueue *vq; + struct vhost_net *net; + vq = container_of(work, struct vhost_virtqueue, poll.work); + net = container_of(vq->dev, struct vhost_net, dev); + handle_tx(net); +} + +static void handle_rx_kick(struct work_struct *work) +{ + struct vhost_virtqueue *vq; + struct vhost_net *net; + vq = container_of(work, struct vhost_virtqueue, poll.work); + net = container_of(vq->dev, struct vhost_net, dev); + handle_rx(net); +} + +static void handle_tx_net(struct work_struct *work) +{ + struct vhost_net *net; + net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_TX].work); + handle_tx(net); +} + +static void handle_rx_net(struct work_struct *work) +{ + struct vhost_net *net; + net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_RX].work); + handle_rx(net); +} + +static int vhost_net_open(struct inode *inode, struct file *f) +{ + struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL); + int r; + if (!n) + return -ENOMEM; + f->private_data = n; + n->vqs[VHOST_NET_VQ_TX].handle_kick = handle_tx_kick; + n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick; + r = vhost_dev_init(&n->dev, n->vqs, VHOST_NET_VQ_MAX); + if (r < 0) { + kfree(n); + return r; + } + + vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT); + vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN); + n->tx_poll_state = VHOST_NET_POLL_DISABLED; + return 0; +} + +static void vhost_net_disable_vq(struct vhost_net *n, + struct vhost_virtqueue *vq) +{ + if (!vq->private_data) + return; + if (vq == n->vqs + VHOST_NET_VQ_TX) { + tx_poll_stop(n); + n->tx_poll_state = VHOST_NET_POLL_DISABLED; + } else + vhost_poll_stop(n->poll + VHOST_NET_VQ_RX); +} + +static void vhost_net_enable_vq(struct vhost_net *n, + struct vhost_virtqueue *vq) +{ + struct socket *sock = vq->private_data; + if (!sock) + return; + if (vq == n->vqs + VHOST_NET_VQ_TX) { + n->tx_poll_state = VHOST_NET_POLL_STOPPED; + tx_poll_start(n, sock); + } else + vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file); +} + +static struct socket *vhost_net_stop_vq(struct vhost_net *n, + struct vhost_virtqueue *vq) +{ + struct socket *sock; + + mutex_lock(&vq->mutex); + sock = vq->private_data; + vhost_net_disable_vq(n, vq); + rcu_assign_pointer(vq->private_data, NULL); + mutex_unlock(&vq->mutex); + return sock; +} + +static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock, + struct socket **rx_sock) +{ + *tx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_TX); + *rx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_RX); +} + +static void vhost_net_flush_vq(struct vhost_net *n, int index) +{ + vhost_poll_flush(n->poll + index); + vhost_poll_flush(&n->dev.vqs[index].poll); +} + +static void vhost_net_flush(struct vhost_net *n) +{ + vhost_net_flush_vq(n, VHOST_NET_VQ_TX); + vhost_net_flush_vq(n, VHOST_NET_VQ_RX); +} + +static int vhost_net_release(struct inode *inode, struct file *f) +{ + struct vhost_net *n = f->private_data; + struct socket *tx_sock; + struct socket *rx_sock; + + vhost_net_stop(n, &tx_sock, &rx_sock); + vhost_net_flush(n); + vhost_dev_cleanup(&n->dev); + if (tx_sock) + fput(tx_sock->file); + if (rx_sock) + fput(rx_sock->file); + /* We do an extra flush before freeing memory, + * since jobs can re-queue themselves. */ + vhost_net_flush(n); + kfree(n); + return 0; +} + +static struct socket *get_raw_socket(int fd) +{ + struct { + struct sockaddr_ll sa; + char buf[MAX_ADDR_LEN]; + } uaddr; + int uaddr_len = sizeof uaddr, r; + struct socket *sock = sockfd_lookup(fd, &r); + if (!sock) + return ERR_PTR(-ENOTSOCK); + + /* Parameter checking */ + if (sock->sk->sk_type != SOCK_RAW) { + r = -ESOCKTNOSUPPORT; + goto err; + } + + r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, + &uaddr_len, 0); + if (r) + goto err; + + if (uaddr.sa.sll_family != AF_PACKET) { + r = -EPFNOSUPPORT; + goto err; + } + return sock; +err: + fput(sock->file); + return ERR_PTR(r); +} + +static struct socket *get_tun_socket(int fd) +{ + struct file *file = fget(fd); + struct socket *sock; + if (!file) + return ERR_PTR(-EBADF); + sock = tun_get_socket(file); + if (IS_ERR(sock)) + fput(file); + return sock; +} + +static struct socket *get_socket(int fd) +{ + struct socket *sock; + if (fd == -1) + return NULL; + sock = get_raw_socket(fd); + if (!IS_ERR(sock)) + return sock; + sock = get_tun_socket(fd); + if (!IS_ERR(sock)) + return sock; + return ERR_PTR(-ENOTSOCK); +} + +static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) +{ + struct socket *sock, *oldsock; + struct vhost_virtqueue *vq; + int r; + + mutex_lock(&n->dev.mutex); + r = vhost_dev_check_owner(&n->dev); + if (r) + goto err; + + if (index >= VHOST_NET_VQ_MAX) { + r = -ENOBUFS; + goto err; + } + vq = n->vqs + index; + mutex_lock(&vq->mutex); + sock = get_socket(fd); + if (IS_ERR(sock)) { + r = PTR_ERR(sock); + goto err; + } + + /* start polling new socket */ + oldsock = vq->private_data; + if (sock == oldsock) + goto done; + + vhost_net_disable_vq(n, vq); + rcu_assign_pointer(vq->private_data, sock); + vhost_net_enable_vq(n, vq); + mutex_unlock(&vq->mutex); +done: + mutex_unlock(&n->dev.mutex); + if (oldsock) { + vhost_net_flush_vq(n, index); + fput(oldsock->file); + } + return r; +err: + mutex_unlock(&n->dev.mutex); + return r; +} + +static long vhost_net_reset_owner(struct vhost_net *n) +{ + struct socket *tx_sock = NULL; + struct socket *rx_sock = NULL; + long err; + mutex_lock(&n->dev.mutex); + err = vhost_dev_check_owner(&n->dev); + if (err) + goto done; + vhost_net_stop(n, &tx_sock, &rx_sock); + vhost_net_flush(n); + err = vhost_dev_reset_owner(&n->dev); +done: + mutex_unlock(&n->dev.mutex); + if (tx_sock) + fput(tx_sock->file); + if (rx_sock) + fput(rx_sock->file); + return err; +} + +static void vhost_net_set_features(struct vhost_net *n, u64 features) +{ + size_t hdr_size = features & (1 << VHOST_NET_F_VIRTIO_NET_HDR) ? + sizeof(struct virtio_net_hdr) : 0; + int i; + mutex_lock(&n->dev.mutex); + n->dev.acked_features = features; + smp_wmb(); + for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { + mutex_lock(&n->vqs[i].mutex); + n->vqs[i].hdr_size = hdr_size; + mutex_unlock(&n->vqs[i].mutex); + } + mutex_unlock(&n->dev.mutex); + vhost_net_flush(n); +} + +static long vhost_net_ioctl(struct file *f, unsigned int ioctl, + unsigned long arg) +{ + struct vhost_net *n = f->private_data; + void __user *argp = (void __user *)arg; + u32 __user *featurep = argp; + struct vhost_vring_file backend; + u64 features; + int r; + switch (ioctl) { + case VHOST_NET_SET_BACKEND: + r = copy_from_user(&backend, argp, sizeof backend); + if (r < 0) + return r; + return vhost_net_set_backend(n, backend.index, backend.fd); + case VHOST_GET_FEATURES: + features = VHOST_FEATURES; + return put_user(features, featurep); + case VHOST_SET_FEATURES: + r = get_user(features, featurep); + /* No features for now */ + if (r < 0) + return r; + if (features & ~VHOST_FEATURES) + return -EOPNOTSUPP; + vhost_net_set_features(n, features); + return 0; + case VHOST_RESET_OWNER: + return vhost_net_reset_owner(n); + default: + r = vhost_dev_ioctl(&n->dev, ioctl, arg); + vhost_net_flush(n); + return r; + } +} + +#ifdef CONFIG_COMPAT +static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl, + unsigned long arg) +{ + return vhost_net_ioctl(f, ioctl, (unsigned long)compat_ptr(arg)); +} +#endif + +const static struct file_operations vhost_net_fops = { + .owner = THIS_MODULE, + .release = vhost_net_release, + .unlocked_ioctl = vhost_net_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = vhost_net_compat_ioctl, +#endif + .open = vhost_net_open, +}; + +static struct miscdevice vhost_net_misc = { + VHOST_NET_MINOR, + "vhost-net", + &vhost_net_fops, +}; + +int vhost_net_init(void) +{ + int r = vhost_init(); + if (r) + goto err_init; + r = misc_register(&vhost_net_misc); + if (r) + goto err_reg; + return 0; +err_reg: + vhost_cleanup(); +err_init: + return r; + +} +module_init(vhost_net_init); + +void vhost_net_exit(void) +{ + misc_deregister(&vhost_net_misc); + vhost_cleanup(); +} +module_exit(vhost_net_exit); + +MODULE_VERSION("0.0.1"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Michael S. Tsirkin"); +MODULE_DESCRIPTION("Host kernel accelerator for virtio net"); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c new file mode 100644 index 000000000000..97233d5ad1b9 --- /dev/null +++ b/drivers/vhost/vhost.c @@ -0,0 +1,965 @@ +/* Copyright (C) 2009 Red Hat, Inc. + * Copyright (C) 2006 Rusty Russell IBM Corporation + * + * Author: Michael S. Tsirkin <mst@redhat.com> + * + * Inspiration, some code, and most witty comments come from + * Documentation/lguest/lguest.c, by Rusty Russell + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Generic code for virtio server in host kernel. + */ + +#include <linux/eventfd.h> +#include <linux/vhost.h> +#include <linux/virtio_net.h> +#include <linux/mm.h> +#include <linux/miscdevice.h> +#include <linux/mutex.h> +#include <linux/workqueue.h> +#include <linux/rcupdate.h> +#include <linux/poll.h> +#include <linux/file.h> +#include <linux/highmem.h> + +#include <linux/net.h> +#include <linux/if_packet.h> +#include <linux/if_arp.h> + +#include <net/sock.h> + +#include "vhost.h" + +enum { + VHOST_MEMORY_MAX_NREGIONS = 64, + VHOST_MEMORY_F_LOG = 0x1, +}; + +static struct workqueue_struct *vhost_workqueue; + +static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, + poll_table *pt) +{ + struct vhost_poll *poll; + poll = container_of(pt, struct vhost_poll, table); + + poll->wqh = wqh; + add_wait_queue(wqh, &poll->wait); +} + +static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, + void *key) +{ + struct vhost_poll *poll; + poll = container_of(wait, struct vhost_poll, wait); + if (!((unsigned long)key & poll->mask)) + return 0; + + queue_work(vhost_workqueue, &poll->work); + return 0; +} + +/* Init poll structure */ +void vhost_poll_init(struct vhost_poll *poll, work_func_t func, + unsigned long mask) +{ + INIT_WORK(&poll->work, func); + init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); + init_poll_funcptr(&poll->table, vhost_poll_func); + poll->mask = mask; +} + +/* Start polling a file. We add ourselves to file's wait queue. The caller must + * keep a reference to a file until after vhost_poll_stop is called. */ +void vhost_poll_start(struct vhost_poll *poll, struct file *file) +{ + unsigned long mask; + mask = file->f_op->poll(file, &poll->table); + if (mask) + vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask); +} + +/* Stop polling a file. After this function returns, it becomes safe to drop the + * file reference. You must also flush afterwards. */ +void vhost_poll_stop(struct vhost_poll *poll) +{ + remove_wait_queue(poll->wqh, &poll->wait); +} + +/* Flush any work that has been scheduled. When calling this, don't hold any + * locks that are also used by the callback. */ +void vhost_poll_flush(struct vhost_poll *poll) +{ + flush_work(&poll->work); +} + +void vhost_poll_queue(struct vhost_poll *poll) +{ + queue_work(vhost_workqueue, &poll->work); +} + +static void vhost_vq_reset(struct vhost_dev *dev, + struct vhost_virtqueue *vq) +{ + vq->num = 1; + vq->desc = NULL; + vq->avail = NULL; + vq->used = NULL; + vq->last_avail_idx = 0; + vq->avail_idx = 0; + vq->last_used_idx = 0; + vq->used_flags = 0; + vq->used_flags = 0; + vq->log_used = false; + vq->log_addr = -1ull; + vq->hdr_size = 0; + vq->private_data = NULL; + vq->log_base = NULL; + vq->error_ctx = NULL; + vq->error = NULL; + vq->kick = NULL; + vq->call_ctx = NULL; + vq->call = NULL; +} + +long vhost_dev_init(struct vhost_dev *dev, + struct vhost_virtqueue *vqs, int nvqs) +{ + int i; + dev->vqs = vqs; + dev->nvqs = nvqs; + mutex_init(&dev->mutex); + dev->log_ctx = NULL; + dev->log_file = NULL; + dev->memory = NULL; + dev->mm = NULL; + + for (i = 0; i < dev->nvqs; ++i) { + dev->vqs[i].dev = dev; + mutex_init(&dev->vqs[i].mutex); + vhost_vq_reset(dev, dev->vqs + i); + if (dev->vqs[i].handle_kick) + vhost_poll_init(&dev->vqs[i].poll, + dev->vqs[i].handle_kick, + POLLIN); + } + return 0; +} + +/* Caller should have device mutex */ +long vhost_dev_check_owner(struct vhost_dev *dev) +{ + /* Are you the owner? If not, I don't think you mean to do that */ + return dev->mm == current->mm ? 0 : -EPERM; +} + +/* Caller should have device mutex */ +static long vhost_dev_set_owner(struct vhost_dev *dev) +{ + /* Is there an owner already? */ + if (dev->mm) + return -EBUSY; + /* No owner, become one */ + dev->mm = get_task_mm(current); + return 0; +} + +/* Caller should have device mutex */ +long vhost_dev_reset_owner(struct vhost_dev *dev) +{ + struct vhost_memory *memory; + + /* Restore memory to default 1:1 mapping. */ + memory = kmalloc(offsetof(struct vhost_memory, regions) + + 2 * sizeof *memory->regions, GFP_KERNEL); + if (!memory) + return -ENOMEM; + + vhost_dev_cleanup(dev); + + memory->nregions = 2; + memory->regions[0].guest_phys_addr = 1; + memory->regions[0].userspace_addr = 1; + memory->regions[0].memory_size = ~0ULL; + memory->regions[1].guest_phys_addr = 0; + memory->regions[1].userspace_addr = 0; + memory->regions[1].memory_size = 1; + dev->memory = memory; + return 0; +} + +/* Caller should have device mutex */ +void vhost_dev_cleanup(struct vhost_dev *dev) +{ + int i; + for (i = 0; i < dev->nvqs; ++i) { + if (dev->vqs[i].kick && dev->vqs[i].handle_kick) { + vhost_poll_stop(&dev->vqs[i].poll); + vhost_poll_flush(&dev->vqs[i].poll); + } + if (dev->vqs[i].error_ctx) + eventfd_ctx_put(dev->vqs[i].error_ctx); + if (dev->vqs[i].error) + fput(dev->vqs[i].error); + if (dev->vqs[i].kick) + fput(dev->vqs[i].kick); + if (dev->vqs[i].call_ctx) + eventfd_ctx_put(dev->vqs[i].call_ctx); + if (dev->vqs[i].call) + fput(dev->vqs[i].call); + vhost_vq_reset(dev, dev->vqs + i); + } + if (dev->log_ctx) + eventfd_ctx_put(dev->log_ctx); + dev->log_ctx = NULL; + if (dev->log_file) + fput(dev->log_file); + dev->log_file = NULL; + /* No one will access memory at this point */ + kfree(dev->memory); + dev->memory = NULL; + if (dev->mm) + mmput(dev->mm); + dev->mm = NULL; +} + +static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) +{ + struct vhost_memory mem, *newmem, *oldmem; + unsigned long size = offsetof(struct vhost_memory, regions); + long r; + r = copy_from_user(&mem, m, size); + if (r) + return r; + if (mem.padding) + return -EOPNOTSUPP; + if (mem.nregions > VHOST_MEMORY_MAX_NREGIONS) + return -E2BIG; + newmem = kmalloc(size + mem.nregions * sizeof *m->regions, GFP_KERNEL); + if (!newmem) + return -ENOMEM; + + memcpy(newmem, &mem, size); + r = copy_from_user(newmem->regions, m->regions, + mem.nregions * sizeof *m->regions); + if (r) { + kfree(newmem); + return r; + } + oldmem = d->memory; + rcu_assign_pointer(d->memory, newmem); + synchronize_rcu(); + kfree(oldmem); + return 0; +} + +static int init_used(struct vhost_virtqueue *vq, + struct vring_used __user *used) +{ + int r = put_user(vq->used_flags, &used->flags); + if (r) + return r; + return get_user(vq->last_used_idx, &used->idx); +} + +static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp) +{ + struct file *eventfp, *filep = NULL, + *pollstart = NULL, *pollstop = NULL; + struct eventfd_ctx *ctx = NULL; + u32 __user *idxp = argp; + struct vhost_virtqueue *vq; + struct vhost_vring_state s; + struct vhost_vring_file f; + struct vhost_vring_addr a; + u32 idx; + long r; + + r = get_user(idx, idxp); + if (r < 0) + return r; + if (idx > d->nvqs) + return -ENOBUFS; + + vq = d->vqs + idx; + + mutex_lock(&vq->mutex); + + switch (ioctl) { + case VHOST_SET_VRING_NUM: + r = copy_from_user(&s, argp, sizeof s); + if (r < 0) + break; + if (!s.num || s.num > 0xffff || (s.num & (s.num - 1))) { + r = -EINVAL; + break; + } + vq->num = s.num; + break; + case VHOST_SET_VRING_BASE: + r = copy_from_user(&s, argp, sizeof s); + if (r < 0) + break; + if (s.num > 0xffff) { + r = -EINVAL; + break; + } + vq->last_avail_idx = s.num; + /* Forget the cached index value. */ + vq->avail_idx = vq->last_avail_idx; + break; + case VHOST_GET_VRING_BASE: + s.index = idx; + s.num = vq->last_avail_idx; + r = copy_to_user(argp, &s, sizeof s); + break; + case VHOST_SET_VRING_ADDR: + r = copy_from_user(&a, argp, sizeof a); + if (r < 0) + break; + if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) { + r = -EOPNOTSUPP; + break; + } + if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr || + (u64)(unsigned long)a.used_user_addr != a.used_user_addr || + (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr) { + r = -EFAULT; + break; + } + if ((a.avail_user_addr & (sizeof *vq->avail->ring - 1)) || + (a.used_user_addr & (sizeof *vq->used->ring - 1)) || + (a.log_guest_addr & (sizeof *vq->used->ring - 1))) { + r = -EINVAL; + break; + } + r = init_used(vq, (struct vring_used __user *)a.used_user_addr); + if (r) + break; + vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG)); + vq->desc = (void __user *)(unsigned long)a.desc_user_addr; + vq->avail = (void __user *)(unsigned long)a.avail_user_addr; + vq->log_addr = a.log_guest_addr; + vq->used = (void __user *)(unsigned long)a.used_user_addr; + break; + case VHOST_SET_VRING_KICK: + r = copy_from_user(&f, argp, sizeof f); + if (r < 0) + break; + eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); + if (IS_ERR(eventfp)) + return PTR_ERR(eventfp); + if (eventfp != vq->kick) { + pollstop = filep = vq->kick; + pollstart = vq->kick = eventfp; + } else + filep = eventfp; + break; + case VHOST_SET_VRING_CALL: + r = copy_from_user(&f, argp, sizeof f); + if (r < 0) + break; + eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); + if (IS_ERR(eventfp)) + return PTR_ERR(eventfp); + if (eventfp != vq->call) { + filep = vq->call; + ctx = vq->call_ctx; + vq->call = eventfp; + vq->call_ctx = eventfp ? + eventfd_ctx_fileget(eventfp) : NULL; + } else + filep = eventfp; + break; + case VHOST_SET_VRING_ERR: + r = copy_from_user(&f, argp, sizeof f); + if (r < 0) + break; + eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); + if (IS_ERR(eventfp)) + return PTR_ERR(eventfp); + if (eventfp != vq->error) { + filep = vq->error; + vq->error = eventfp; + ctx = vq->error_ctx; + vq->error_ctx = eventfp ? + eventfd_ctx_fileget(eventfp) : NULL; + } else + filep = eventfp; + break; + default: + r = -ENOIOCTLCMD; + } + + if (pollstop && vq->handle_kick) + vhost_poll_stop(&vq->poll); + + if (ctx) + eventfd_ctx_put(ctx); + if (filep) + fput(filep); + + if (pollstart && vq->handle_kick) + vhost_poll_start(&vq->poll, vq->kick); + + mutex_unlock(&vq->mutex); + + if (pollstop && vq->handle_kick) + vhost_poll_flush(&vq->poll); + return r; +} + +long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg) +{ + void __user *argp = (void __user *)arg; + struct file *eventfp, *filep = NULL; + struct eventfd_ctx *ctx = NULL; + u64 p; + long r; + int i, fd; + + mutex_lock(&d->mutex); + /* If you are not the owner, you can become one */ + if (ioctl == VHOST_SET_OWNER) { + r = vhost_dev_set_owner(d); + goto done; + } + + /* You must be the owner to do anything else */ + r = vhost_dev_check_owner(d); + if (r) + goto done; + + switch (ioctl) { + case VHOST_SET_MEM_TABLE: + r = vhost_set_memory(d, argp); + break; + case VHOST_SET_LOG_BASE: + r = copy_from_user(&p, argp, sizeof p); + if (r < 0) + break; + if ((u64)(unsigned long)p != p) { + r = -EFAULT; + break; + } + for (i = 0; i < d->nvqs; ++i) { + mutex_lock(&d->vqs[i].mutex); + d->vqs[i].log_base = (void __user *)(unsigned long)p; + mutex_unlock(&d->vqs[i].mutex); + } + break; + case VHOST_SET_LOG_FD: + r = get_user(fd, (int __user *)argp); + if (r < 0) + break; + eventfp = fd == -1 ? NULL : eventfd_fget(fd); + if (IS_ERR(eventfp)) { + r = PTR_ERR(eventfp); + break; + } + if (eventfp != d->log_file) { + filep = d->log_file; + ctx = d->log_ctx; + d->log_ctx = eventfp ? + eventfd_ctx_fileget(eventfp) : NULL; + } else + filep = eventfp; + for (i = 0; i < d->nvqs; ++i) { + mutex_lock(&d->vqs[i].mutex); + d->vqs[i].log_ctx = d->log_ctx; + mutex_unlock(&d->vqs[i].mutex); + } + if (ctx) + eventfd_ctx_put(ctx); + if (filep) + fput(filep); + break; + default: + r = vhost_set_vring(d, ioctl, argp); + break; + } +done: + mutex_unlock(&d->mutex); + return r; +} + +static const struct vhost_memory_region *find_region(struct vhost_memory *mem, + __u64 addr, __u32 len) +{ + struct vhost_memory_region *reg; + int i; + /* linear search is not brilliant, but we really have on the order of 6 + * regions in practice */ + for (i = 0; i < mem->nregions; ++i) { + reg = mem->regions + i; + if (reg->guest_phys_addr <= addr && + reg->guest_phys_addr + reg->memory_size - 1 >= addr) + return reg; + } + return NULL; +} + +/* TODO: This is really inefficient. We need something like get_user() + * (instruction directly accesses the data, with an exception table entry + * returning -EFAULT). See Documentation/x86/exception-tables.txt. + */ +static int set_bit_to_user(int nr, void __user *addr) +{ + unsigned long log = (unsigned long)addr; + struct page *page; + void *base; + int bit = nr + (log % PAGE_SIZE) * 8; + int r; + r = get_user_pages_fast(log, 1, 1, &page); + if (r) + return r; + base = kmap_atomic(page, KM_USER0); + set_bit(bit, base); + kunmap_atomic(base, KM_USER0); + set_page_dirty_lock(page); + put_page(page); + return 0; +} + +static int log_write(void __user *log_base, + u64 write_address, u64 write_length) +{ + int r; + if (!write_length) + return 0; + write_address /= VHOST_PAGE_SIZE; + for (;;) { + u64 base = (u64)(unsigned long)log_base; + u64 log = base + write_address / 8; + int bit = write_address % 8; + if ((u64)(unsigned long)log != log) + return -EFAULT; + r = set_bit_to_user(bit, (void __user *)(unsigned long)log); + if (r < 0) + return r; + if (write_length <= VHOST_PAGE_SIZE) + break; + write_length -= VHOST_PAGE_SIZE; + write_address += VHOST_PAGE_SIZE; + } + return r; +} + +int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, + unsigned int log_num, u64 len) +{ + int i, r; + + /* Make sure data written is seen before log. */ + wmb(); + for (i = 0; i < log_num; ++i) { + u64 l = min(log[i].len, len); + r = log_write(vq->log_base, log[i].addr, l); + if (r < 0) + return r; + len -= l; + if (!len) + return 0; + } + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); + /* Length written exceeds what we have stored. This is a bug. */ + BUG(); + return 0; +} + +int translate_desc(struct vhost_dev *dev, u64 addr, u32 len, + struct iovec iov[], int iov_size) +{ + const struct vhost_memory_region *reg; + struct vhost_memory *mem; + struct iovec *_iov; + u64 s = 0; + int ret = 0; + + rcu_read_lock(); + + mem = rcu_dereference(dev->memory); + while ((u64)len > s) { + u64 size; + if (ret >= iov_size) { + ret = -ENOBUFS; + break; + } + reg = find_region(mem, addr, len); + if (!reg) { + ret = -EFAULT; + break; + } + _iov = iov + ret; + size = reg->memory_size - addr + reg->guest_phys_addr; + _iov->iov_len = min((u64)len, size); + _iov->iov_base = (void *)(unsigned long) + (reg->userspace_addr + addr - reg->guest_phys_addr); + s += size; + addr += size; + ++ret; + } + + rcu_read_unlock(); + return ret; +} + +/* Each buffer in the virtqueues is actually a chain of descriptors. This + * function returns the next descriptor in the chain, + * or -1U if we're at the end. */ +static unsigned next_desc(struct vring_desc *desc) +{ + unsigned int next; + + /* If this descriptor says it doesn't chain, we're done. */ + if (!(desc->flags & VRING_DESC_F_NEXT)) + return -1U; + + /* Check they're not leading us off end of descriptors. */ + next = desc->next; + /* Make sure compiler knows to grab that: we don't want it changing! */ + /* We will use the result as an index in an array, so most + * architectures only need a compiler barrier here. */ + read_barrier_depends(); + + return next; +} + +static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num, + struct vhost_log *log, unsigned int *log_num, + struct vring_desc *indirect) +{ + struct vring_desc desc; + unsigned int i = 0, count, found = 0; + int ret; + + /* Sanity check */ + if (indirect->len % sizeof desc) { + vq_err(vq, "Invalid length in indirect descriptor: " + "len 0x%llx not multiple of 0x%zx\n", + (unsigned long long)indirect->len, + sizeof desc); + return -EINVAL; + } + + ret = translate_desc(dev, indirect->addr, indirect->len, vq->indirect, + ARRAY_SIZE(vq->indirect)); + if (ret < 0) { + vq_err(vq, "Translation failure %d in indirect.\n", ret); + return ret; + } + + /* We will use the result as an address to read from, so most + * architectures only need a compiler barrier here. */ + read_barrier_depends(); + + count = indirect->len / sizeof desc; + /* Buffers are chained via a 16 bit next field, so + * we can have at most 2^16 of these. */ + if (count > USHORT_MAX + 1) { + vq_err(vq, "Indirect buffer length too big: %d\n", + indirect->len); + return -E2BIG; + } + + do { + unsigned iov_count = *in_num + *out_num; + if (++found > count) { + vq_err(vq, "Loop detected: last one at %u " + "indirect size %u\n", + i, count); + return -EINVAL; + } + if (memcpy_fromiovec((unsigned char *)&desc, vq->indirect, + sizeof desc)) { + vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n", + i, (size_t)indirect->addr + i * sizeof desc); + return -EINVAL; + } + if (desc.flags & VRING_DESC_F_INDIRECT) { + vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n", + i, (size_t)indirect->addr + i * sizeof desc); + return -EINVAL; + } + + ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count, + iov_size - iov_count); + if (ret < 0) { + vq_err(vq, "Translation failure %d indirect idx %d\n", + ret, i); + return ret; + } + /* If this is an input descriptor, increment that count. */ + if (desc.flags & VRING_DESC_F_WRITE) { + *in_num += ret; + if (unlikely(log)) { + log[*log_num].addr = desc.addr; + log[*log_num].len = desc.len; + ++*log_num; + } + } else { + /* If it's an output descriptor, they're all supposed + * to come before any input descriptors. */ + if (*in_num) { + vq_err(vq, "Indirect descriptor " + "has out after in: idx %d\n", i); + return -EINVAL; + } + *out_num += ret; + } + } while ((i = next_desc(&desc)) != -1); + return 0; +} + +/* This looks in the virtqueue and for the first available buffer, and converts + * it to an iovec for convenient access. Since descriptors consist of some + * number of output then some number of input descriptors, it's actually two + * iovecs, but we pack them into one and note how many of each there were. + * + * This function returns the descriptor number found, or vq->num (which + * is never a valid descriptor number) if none was found. */ +unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num, + struct vhost_log *log, unsigned int *log_num) +{ + struct vring_desc desc; + unsigned int i, head, found = 0; + u16 last_avail_idx; + int ret; + + /* Check it isn't doing very strange things with descriptor numbers. */ + last_avail_idx = vq->last_avail_idx; + if (get_user(vq->avail_idx, &vq->avail->idx)) { + vq_err(vq, "Failed to access avail idx at %p\n", + &vq->avail->idx); + return vq->num; + } + + if ((u16)(vq->avail_idx - last_avail_idx) > vq->num) { + vq_err(vq, "Guest moved used index from %u to %u", + last_avail_idx, vq->avail_idx); + return vq->num; + } + + /* If there's nothing new since last we looked, return invalid. */ + if (vq->avail_idx == last_avail_idx) + return vq->num; + + /* Only get avail ring entries after they have been exposed by guest. */ + rmb(); + + /* Grab the next descriptor number they're advertising, and increment + * the index we've seen. */ + if (get_user(head, &vq->avail->ring[last_avail_idx % vq->num])) { + vq_err(vq, "Failed to read head: idx %d address %p\n", + last_avail_idx, + &vq->avail->ring[last_avail_idx % vq->num]); + return vq->num; + } + + /* If their number is silly, that's an error. */ + if (head >= vq->num) { + vq_err(vq, "Guest says index %u > %u is available", + head, vq->num); + return vq->num; + } + + /* When we start there are none of either input nor output. */ + *out_num = *in_num = 0; + if (unlikely(log)) + *log_num = 0; + + i = head; + do { + unsigned iov_count = *in_num + *out_num; + if (i >= vq->num) { + vq_err(vq, "Desc index is %u > %u, head = %u", + i, vq->num, head); + return vq->num; + } + if (++found > vq->num) { + vq_err(vq, "Loop detected: last one at %u " + "vq size %u head %u\n", + i, vq->num, head); + return vq->num; + } + ret = copy_from_user(&desc, vq->desc + i, sizeof desc); + if (ret) { + vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", + i, vq->desc + i); + return vq->num; + } + if (desc.flags & VRING_DESC_F_INDIRECT) { + ret = get_indirect(dev, vq, iov, iov_size, + out_num, in_num, + log, log_num, &desc); + if (ret < 0) { + vq_err(vq, "Failure detected " + "in indirect descriptor at idx %d\n", i); + return vq->num; + } + continue; + } + + ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count, + iov_size - iov_count); + if (ret < 0) { + vq_err(vq, "Translation failure %d descriptor idx %d\n", + ret, i); + return vq->num; + } + if (desc.flags & VRING_DESC_F_WRITE) { + /* If this is an input descriptor, + * increment that count. */ + *in_num += ret; + if (unlikely(log)) { + log[*log_num].addr = desc.addr; + log[*log_num].len = desc.len; + ++*log_num; + } + } else { + /* If it's an output descriptor, they're all supposed + * to come before any input descriptors. */ + if (*in_num) { + vq_err(vq, "Descriptor has out after in: " + "idx %d\n", i); + return vq->num; + } + *out_num += ret; + } + } while ((i = next_desc(&desc)) != -1); + + /* On success, increment avail index. */ + vq->last_avail_idx++; + return head; +} + +/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ +void vhost_discard_vq_desc(struct vhost_virtqueue *vq) +{ + vq->last_avail_idx--; +} + +/* After we've used one of their buffers, we tell them about it. We'll then + * want to notify the guest, using eventfd. */ +int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) +{ + struct vring_used_elem *used; + + /* The virtqueue contains a ring of used buffers. Get a pointer to the + * next entry in that used ring. */ + used = &vq->used->ring[vq->last_used_idx % vq->num]; + if (put_user(head, &used->id)) { + vq_err(vq, "Failed to write used id"); + return -EFAULT; + } + if (put_user(len, &used->len)) { + vq_err(vq, "Failed to write used len"); + return -EFAULT; + } + /* Make sure buffer is written before we update index. */ + wmb(); + if (put_user(vq->last_used_idx + 1, &vq->used->idx)) { + vq_err(vq, "Failed to increment used idx"); + return -EFAULT; + } + if (unlikely(vq->log_used)) { + /* Make sure data is seen before log. */ + wmb(); + log_write(vq->log_base, vq->log_addr + sizeof *vq->used->ring * + (vq->last_used_idx % vq->num), + sizeof *vq->used->ring); + log_write(vq->log_base, vq->log_addr, sizeof *vq->used->ring); + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); + } + vq->last_used_idx++; + return 0; +} + +/* This actually signals the guest, using eventfd. */ +void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) +{ + __u16 flags = 0; + if (get_user(flags, &vq->avail->flags)) { + vq_err(vq, "Failed to get flags"); + return; + } + + /* If they don't want an interrupt, don't signal, unless empty. */ + if ((flags & VRING_AVAIL_F_NO_INTERRUPT) && + (vq->avail_idx != vq->last_avail_idx || + !vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY))) + return; + + /* Signal the Guest tell them we used something up. */ + if (vq->call_ctx) + eventfd_signal(vq->call_ctx, 1); +} + +/* And here's the combo meal deal. Supersize me! */ +void vhost_add_used_and_signal(struct vhost_dev *dev, + struct vhost_virtqueue *vq, + unsigned int head, int len) +{ + vhost_add_used(vq, head, len); + vhost_signal(dev, vq); +} + +/* OK, now we need to know about added descriptors. */ +bool vhost_enable_notify(struct vhost_virtqueue *vq) +{ + u16 avail_idx; + int r; + if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) + return false; + vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; + r = put_user(vq->used_flags, &vq->used->flags); + if (r) { + vq_err(vq, "Failed to enable notification at %p: %d\n", + &vq->used->flags, r); + return false; + } + /* They could have slipped one in as we were doing that: make + * sure it's written, then check again. */ + mb(); + r = get_user(avail_idx, &vq->avail->idx); + if (r) { + vq_err(vq, "Failed to check avail idx at %p: %d\n", + &vq->avail->idx, r); + return false; + } + + return avail_idx != vq->last_avail_idx; +} + +/* We don't need to be notified again. */ +void vhost_disable_notify(struct vhost_virtqueue *vq) +{ + int r; + if (vq->used_flags & VRING_USED_F_NO_NOTIFY) + return; + vq->used_flags |= VRING_USED_F_NO_NOTIFY; + r = put_user(vq->used_flags, &vq->used->flags); + if (r) + vq_err(vq, "Failed to enable notification at %p: %d\n", + &vq->used->flags, r); +} + +int vhost_init(void) +{ + vhost_workqueue = create_singlethread_workqueue("vhost"); + if (!vhost_workqueue) + return -ENOMEM; + return 0; +} + +void vhost_cleanup(void) +{ + destroy_workqueue(vhost_workqueue); +} diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h new file mode 100644 index 000000000000..d1f045376017 --- /dev/null +++ b/drivers/vhost/vhost.h @@ -0,0 +1,159 @@ +#ifndef _VHOST_H +#define _VHOST_H + +#include <linux/eventfd.h> +#include <linux/vhost.h> +#include <linux/mm.h> +#include <linux/mutex.h> +#include <linux/workqueue.h> +#include <linux/poll.h> +#include <linux/file.h> +#include <linux/skbuff.h> +#include <linux/uio.h> +#include <linux/virtio_config.h> +#include <linux/virtio_ring.h> + +struct vhost_device; + +enum { + /* Enough place for all fragments, head, and virtio net header. */ + VHOST_NET_MAX_SG = MAX_SKB_FRAGS + 2, +}; + +/* Poll a file (eventfd or socket) */ +/* Note: there's nothing vhost specific about this structure. */ +struct vhost_poll { + poll_table table; + wait_queue_head_t *wqh; + wait_queue_t wait; + /* struct which will handle all actual work. */ + struct work_struct work; + unsigned long mask; +}; + +void vhost_poll_init(struct vhost_poll *poll, work_func_t func, + unsigned long mask); +void vhost_poll_start(struct vhost_poll *poll, struct file *file); +void vhost_poll_stop(struct vhost_poll *poll); +void vhost_poll_flush(struct vhost_poll *poll); +void vhost_poll_queue(struct vhost_poll *poll); + +struct vhost_log { + u64 addr; + u64 len; +}; + +/* The virtqueue structure describes a queue attached to a device. */ +struct vhost_virtqueue { + struct vhost_dev *dev; + + /* The actual ring of buffers. */ + struct mutex mutex; + unsigned int num; + struct vring_desc __user *desc; + struct vring_avail __user *avail; + struct vring_used __user *used; + struct file *kick; + struct file *call; + struct file *error; + struct eventfd_ctx *call_ctx; + struct eventfd_ctx *error_ctx; + struct eventfd_ctx *log_ctx; + + struct vhost_poll poll; + + /* The routine to call when the Guest pings us, or timeout. */ + work_func_t handle_kick; + + /* Last available index we saw. */ + u16 last_avail_idx; + + /* Caches available index value from user. */ + u16 avail_idx; + + /* Last index we used. */ + u16 last_used_idx; + + /* Used flags */ + u16 used_flags; + + /* Log writes to used structure. */ + bool log_used; + u64 log_addr; + + struct iovec indirect[VHOST_NET_MAX_SG]; + struct iovec iov[VHOST_NET_MAX_SG]; + struct iovec hdr[VHOST_NET_MAX_SG]; + size_t hdr_size; + /* We use a kind of RCU to access private pointer. + * All readers access it from workqueue, which makes it possible to + * flush the workqueue instead of synchronize_rcu. Therefore readers do + * not need to call rcu_read_lock/rcu_read_unlock: the beginning of + * work item execution acts instead of rcu_read_lock() and the end of + * work item execution acts instead of rcu_read_lock(). + * Writers use virtqueue mutex. */ + void *private_data; + /* Log write descriptors */ + void __user *log_base; + struct vhost_log log[VHOST_NET_MAX_SG]; +}; + +struct vhost_dev { + /* Readers use RCU to access memory table pointer + * log base pointer and features. + * Writers use mutex below.*/ + struct vhost_memory *memory; + struct mm_struct *mm; + struct mutex mutex; + unsigned acked_features; + struct vhost_virtqueue *vqs; + int nvqs; + struct file *log_file; + struct eventfd_ctx *log_ctx; +}; + +long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue *vqs, int nvqs); +long vhost_dev_check_owner(struct vhost_dev *); +long vhost_dev_reset_owner(struct vhost_dev *); +void vhost_dev_cleanup(struct vhost_dev *); +long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, unsigned long arg); + +unsigned vhost_get_vq_desc(struct vhost_dev *, struct vhost_virtqueue *, + struct iovec iov[], unsigned int iov_count, + unsigned int *out_num, unsigned int *in_num, + struct vhost_log *log, unsigned int *log_num); +void vhost_discard_vq_desc(struct vhost_virtqueue *); + +int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len); +void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *); +void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *, + unsigned int head, int len); +void vhost_disable_notify(struct vhost_virtqueue *); +bool vhost_enable_notify(struct vhost_virtqueue *); + +int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, + unsigned int log_num, u64 len); + +int vhost_init(void); +void vhost_cleanup(void); + +#define vq_err(vq, fmt, ...) do { \ + pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \ + if ((vq)->error_ctx) \ + eventfd_signal((vq)->error_ctx, 1);\ + } while (0) + +enum { + VHOST_FEATURES = (1 << VIRTIO_F_NOTIFY_ON_EMPTY) | + (1 << VIRTIO_RING_F_INDIRECT_DESC) | + (1 << VHOST_F_LOG_ALL) | + (1 << VHOST_NET_F_VIRTIO_NET_HDR), +}; + +static inline int vhost_has_feature(struct vhost_dev *dev, int bit) +{ + unsigned acked_features = rcu_dereference(dev->acked_features); + return acked_features & (1 << bit); +} + +#endif diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c index 54fbb2995a5f..156c661b2912 100644 --- a/drivers/video/uvesafb.c +++ b/drivers/video/uvesafb.c @@ -1976,8 +1976,7 @@ static void __devexit uvesafb_exit(void) module_exit(uvesafb_exit); -#define param_get_scroll NULL -static int param_set_scroll(const char *val, struct kernel_param *kp) +static int param_set_scroll(const char *val, const struct kernel_param *kp) { ypan = 0; @@ -1992,7 +1991,9 @@ static int param_set_scroll(const char *val, struct kernel_param *kp) return 0; } - +static struct kernel_param_ops param_ops_scroll = { + .set = param_set_scroll, +}; #define param_check_scroll(name, p) __param_check(name, p, void) module_param_named(scroll, ypan, scroll, 0); diff --git a/drivers/video/vt8623fb.c b/drivers/video/vt8623fb.c index 3df17dc8c3d7..8bfa47d5a5c0 100644 --- a/drivers/video/vt8623fb.c +++ b/drivers/video/vt8623fb.c @@ -727,7 +727,9 @@ static int __devinit vt8623_pci_probe(struct pci_dev *dev, const struct pci_devi /* Prepare startup mode */ + kparam_block_sysfs_write(mode_option); rc = fb_find_mode(&(info->var), info, mode_option, NULL, 0, NULL, 8); + kparam_unblock_sysfs_write(mode_option); if (! ((rc == 1) || (rc == 2))) { rc = -EINVAL; dev_err(info->device, "mode %s not found\n", mode_option); diff --git a/drivers/zorro/zorro-driver.c b/drivers/zorro/zorro-driver.c index e6c4390d8bd6..53180a37cc9a 100644 --- a/drivers/zorro/zorro-driver.c +++ b/drivers/zorro/zorro-driver.c @@ -156,5 +156,4 @@ postcore_initcall(zorro_driver_init); EXPORT_SYMBOL(zorro_match_device); EXPORT_SYMBOL(zorro_register_driver); EXPORT_SYMBOL(zorro_unregister_driver); -EXPORT_SYMBOL(zorro_dev_driver); EXPORT_SYMBOL(zorro_bus_type); diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 293fa0528a6e..52a7200233f0 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -45,7 +45,7 @@ unsigned short nfs_callback_tcpport; unsigned short nfs_callback_tcpport6; #define NFS_CALLBACK_MAXPORTNR (65535U) -static int param_set_portnr(const char *val, struct kernel_param *kp) +static int param_set_portnr(const char *val, const struct kernel_param *kp) { unsigned long num; int ret; @@ -58,11 +58,10 @@ static int param_set_portnr(const char *val, struct kernel_param *kp) *((unsigned int *)kp->arg) = num; return 0; } - -static int param_get_portnr(char *buffer, struct kernel_param *kp) -{ - return param_get_uint(buffer, kp); -} +static struct kernel_param_ops param_ops_portnr = { + .set = param_set_portnr, + .get = param_get_uint, +}; #define param_check_portnr(name, p) __param_check(name, p, unsigned int); module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index b6e818f4b247..ea14edee36ae 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -52,8 +52,12 @@ #define LOAD_OFFSET 0 #endif -#ifndef VMLINUX_SYMBOL -#define VMLINUX_SYMBOL(_sym_) _sym_ +#ifndef SYMBOL_PREFIX +#define VMLINUX_SYMBOL(sym) sym +#else +#define PASTE2(x,y) x##y +#define PASTE(x,y) PASTE2(x,y) +#define VMLINUX_SYMBOL(sym) PASTE(SYMBOL_PREFIX, sym) #endif /* Align . to a 8 byte boundary equals to maximum function alignment. */ @@ -254,76 +258,76 @@ /* Kernel symbol table: Normal symbols */ \ __ksymtab : AT(ADDR(__ksymtab) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___ksymtab) = .; \ - *(__ksymtab) \ + *(__ksymtab_sorted) \ VMLINUX_SYMBOL(__stop___ksymtab) = .; \ } \ \ /* Kernel symbol table: GPL-only symbols */ \ __ksymtab_gpl : AT(ADDR(__ksymtab_gpl) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___ksymtab_gpl) = .; \ - *(__ksymtab_gpl) \ + *(__ksymtab_gpl_sorted) \ VMLINUX_SYMBOL(__stop___ksymtab_gpl) = .; \ } \ \ /* Kernel symbol table: Normal unused symbols */ \ __ksymtab_unused : AT(ADDR(__ksymtab_unused) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___ksymtab_unused) = .; \ - *(__ksymtab_unused) \ + *(__ksymtab_unused_sorted) \ VMLINUX_SYMBOL(__stop___ksymtab_unused) = .; \ } \ \ /* Kernel symbol table: GPL-only unused symbols */ \ __ksymtab_unused_gpl : AT(ADDR(__ksymtab_unused_gpl) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___ksymtab_unused_gpl) = .; \ - *(__ksymtab_unused_gpl) \ + *(__ksymtab_unused_gpl_sorted) \ VMLINUX_SYMBOL(__stop___ksymtab_unused_gpl) = .; \ } \ \ /* Kernel symbol table: GPL-future-only symbols */ \ __ksymtab_gpl_future : AT(ADDR(__ksymtab_gpl_future) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___ksymtab_gpl_future) = .; \ - *(__ksymtab_gpl_future) \ + *(__ksymtab_gpl_future_sorted) \ VMLINUX_SYMBOL(__stop___ksymtab_gpl_future) = .; \ } \ \ /* Kernel symbol table: Normal symbols */ \ __kcrctab : AT(ADDR(__kcrctab) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___kcrctab) = .; \ - *(__kcrctab) \ + *(__kcrctab_sorted) \ VMLINUX_SYMBOL(__stop___kcrctab) = .; \ } \ \ /* Kernel symbol table: GPL-only symbols */ \ __kcrctab_gpl : AT(ADDR(__kcrctab_gpl) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___kcrctab_gpl) = .; \ - *(__kcrctab_gpl) \ + *(__kcrctab_gpl_sorted) \ VMLINUX_SYMBOL(__stop___kcrctab_gpl) = .; \ } \ \ /* Kernel symbol table: Normal unused symbols */ \ __kcrctab_unused : AT(ADDR(__kcrctab_unused) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___kcrctab_unused) = .; \ - *(__kcrctab_unused) \ + *(__kcrctab_unused_sorted) \ VMLINUX_SYMBOL(__stop___kcrctab_unused) = .; \ } \ \ /* Kernel symbol table: GPL-only unused symbols */ \ __kcrctab_unused_gpl : AT(ADDR(__kcrctab_unused_gpl) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___kcrctab_unused_gpl) = .; \ - *(__kcrctab_unused_gpl) \ + *(__kcrctab_unused_gpl_sorted) \ VMLINUX_SYMBOL(__stop___kcrctab_unused_gpl) = .; \ } \ \ /* Kernel symbol table: GPL-future-only symbols */ \ __kcrctab_gpl_future : AT(ADDR(__kcrctab_gpl_future) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___kcrctab_gpl_future) = .; \ - *(__kcrctab_gpl_future) \ + *(__kcrctab_gpl_future_sorted) \ VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .; \ } \ \ /* Kernel symbol table: strings */ \ __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) { \ - *(__ksymtab_strings) \ + *(__ksymtab_strings_sorted) \ } \ \ /* __*init sections */ \ @@ -639,6 +643,23 @@ EXIT_DATA \ EXIT_CALL \ *(.discard) \ + \ + /* \ + * Discard the original unsorted symbol tables. \ + * In vmlinux they are replaced by sorted versions \ + * generated by modpost -x. \ + */ \ + *(__ksymtab) \ + *(__ksymtab_gpl) \ + *(__ksymtab_unused) \ + *(__ksymtab_unused_gpl) \ + *(__ksymtab_gpl_future) \ + *(__kcrctab) \ + *(__kcrctab_gpl) \ + *(__kcrctab_unused) \ + *(__kcrctab_unused_gpl) \ + *(__kcrctab_gpl_future) \ + *(__ksymtab_strings) \ } /** diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 1feed71551c9..e21019492453 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -361,6 +361,7 @@ unifdef-y += uio.h unifdef-y += unistd.h unifdef-y += usbdevice_fs.h unifdef-y += utsname.h +unifdef-y += vhost.h unifdef-y += videodev2.h unifdef-y += videodev.h unifdef-y += virtio_config.h diff --git a/include/linux/bsearch.h b/include/linux/bsearch.h new file mode 100644 index 000000000000..90b1aa867224 --- /dev/null +++ b/include/linux/bsearch.h @@ -0,0 +1,9 @@ +#ifndef _LINUX_BSEARCH_H +#define _LINUX_BSEARCH_H + +#include <linux/types.h> + +void *bsearch(const void *key, const void *base, size_t num, size_t size, + int (*cmp)(const void *key, const void *elt)); + +#endif /* _LINUX_BSEARCH_H */ diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 557bdad320b6..f53e9b868c26 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -220,7 +220,7 @@ static inline enum zone_type gfp_zone(gfp_t flags) ((1 << ZONES_SHIFT) - 1); if (__builtin_constant_p(bit)) - MAYBE_BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1); + BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1); else { #ifdef CONFIG_DEBUG_VM BUG_ON((GFP_ZONE_BAD >> bit) & 1); diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 3f5fd523b49d..404abe00162c 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -86,4 +86,18 @@ struct tun_filter { __u8 addr[0][ETH_ALEN]; }; +#ifdef __KERNEL__ +#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) +struct socket *tun_get_socket(struct file *); +#else +#include <linux/err.h> +#include <linux/errno.h> +struct file; +struct socket; +static inline struct socket *tun_get_socket(struct file *f) +{ + return ERR_PTR(-EINVAL); +} +#endif /* CONFIG_TUN */ +#endif /* __KERNEL__ */ #endif /* __IF_TUN_H */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index f4e3184fa054..be3018d8a6fc 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -683,12 +683,6 @@ struct sysinfo { char _f[20-2*sizeof(long)-sizeof(int)]; /* Padding: libc5 uses this.. */ }; -/* Force a compilation error if condition is true */ -#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition)) - -/* Force a compilation error if condition is constant and true */ -#define MAYBE_BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2 * !!(cond)])) - /* Force a compilation error if condition is true, but also produce a result (of value 0 and type size_t), so the expression can be used e.g. in a structure initializer (or where-ever else comma expressions @@ -696,6 +690,32 @@ struct sysinfo { #define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) #define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) +/** + * BUILD_BUG_ON - break compile if a condition is true. + * @cond: the condition which the compiler should know is false. + * + * If you have some code which relies on certain constants being equal, or + * other compile-time-evaluated condition, you should use BUILD_BUG_ON to + * detect if someone changes it. + * + * The implementation uses gcc's reluctance to create a negative array, but + * gcc (as of 4.4) only emits that error for obvious cases (eg. not arguments + * to inline functions). So as a fallback we use the optimizer; if it can't + * prove the condition is false, it will cause a link error on the undefined + * "__build_bug_on_failed". This error message can be harder to track down + * though, hence the two different methods. + */ +#ifndef __OPTIMIZE__ +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) +#else +extern int __build_bug_on_failed; +#define BUILD_BUG_ON(condition) \ + do { \ + ((void)sizeof(char[1 - 2*!!(condition)])); \ + if (condition) __build_bug_on_failed = 1; \ + } while(0) +#endif + /* Trap pasters of __FUNCTION__ at compile-time */ #define __FUNCTION__ (__func__) diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h index e880d4cf9e22..136cdcdf92ef 100644 --- a/include/linux/kmemcheck.h +++ b/include/linux/kmemcheck.h @@ -152,7 +152,7 @@ static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) \ _n = (long) &((ptr)->name##_end) \ - (long) &((ptr)->name##_begin); \ - MAYBE_BUILD_BUG_ON(_n < 0); \ + BUILD_BUG_ON(_n < 0); \ \ kmemcheck_mark_initialized(&((ptr)->name##_begin), _n); \ } while (0) diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index adaf3c15e449..8b5f7cc0fba6 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -30,6 +30,7 @@ #define HPET_MINOR 228 #define FUSE_MINOR 229 #define KVM_MINOR 232 +#define VHOST_NET_MINOR 233 #define MISC_DYNAMIC_MINOR 255 struct device; diff --git a/include/linux/mod_export.h b/include/linux/mod_export.h new file mode 100644 index 000000000000..394795e7f03d --- /dev/null +++ b/include/linux/mod_export.h @@ -0,0 +1,149 @@ +#ifndef LINUX_MOD_EXPORT_H +#define LINUX_MOD_EXPORT_H +/* + * Define EXPORT_SYMBOL() and friends for kernel modules. + * + * Under __GENKSYMS__ these definitions are skipped, making it possible to + * scan for EXPORT_SYMBOL() in preprocessed C files. + * + * Under __MODPOST_EXPORTS__ we skip C definitions and define __EXPORT_SYMBOL() + * in arch-independent assembly code. This makes it possible to construct + * sorted symbol tables. + */ + +#ifndef __GENKSYMS__ +#ifndef __MODPOST_EXPORTS__ + +#include <linux/compiler.h> + +/* Some toolchains use a `_' prefix for all user symbols. */ +#ifdef CONFIG_SYMBOL_PREFIX +#define MODULE_SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX +#else +#define MODULE_SYMBOL_PREFIX "" +#endif + +#ifdef CONFIG_MODULES + +struct kernel_symbol { + unsigned long value; + const char *name; +}; + +#ifdef CONFIG_MODVERSIONS +/* Mark the CRC weak since genksyms apparently decides not to + * generate a checksums for some symbols */ +#define __CRC_SYMBOL(sym, sec) \ + extern void *__crc_##sym __attribute__((weak)); \ + static const unsigned long __kcrctab_##sym \ + __used \ + __attribute__((section("__kcrctab" sec), unused)) \ + = (unsigned long) &__crc_##sym; +#else +#define __CRC_SYMBOL(sym, sec) +#endif + +/* For every exported symbol, place a struct in the __ksymtab section */ +#define __EXPORT_SYMBOL(sym, sec) \ + extern typeof(sym) sym; \ + __CRC_SYMBOL(sym, sec) \ + static const char __kstrtab_##sym[] \ + __attribute__((section("__ksymtab_strings"), aligned(1))) \ + = MODULE_SYMBOL_PREFIX #sym; \ + static const struct kernel_symbol __ksymtab_##sym \ + __used \ + __attribute__((section("__ksymtab" sec), unused)) \ + = { (unsigned long)&sym, __kstrtab_##sym } + +#define EXPORT_SYMBOL(sym) \ + __EXPORT_SYMBOL(sym, "") + +#define EXPORT_SYMBOL_GPL(sym) \ + __EXPORT_SYMBOL(sym, "_gpl") + +#define EXPORT_SYMBOL_GPL_FUTURE(sym) \ + __EXPORT_SYMBOL(sym, "_gpl_future") + +#ifdef CONFIG_UNUSED_SYMBOLS +#define EXPORT_UNUSED_SYMBOL(sym) __EXPORT_SYMBOL(sym, "_unused") +#define EXPORT_UNUSED_SYMBOL_GPL(sym) __EXPORT_SYMBOL(sym, "_unused_gpl") +#else +#define EXPORT_UNUSED_SYMBOL(sym) +#define EXPORT_UNUSED_SYMBOL_GPL(sym) +#endif + +#else /* !CONFIG_MODULES */ + +#define EXPORT_SYMBOL(sym) +#define EXPORT_SYMBOL_GPL(sym) +#define EXPORT_SYMBOL_GPL_FUTURE(sym) +#define EXPORT_UNUSED_SYMBOL(sym) +#define EXPORT_UNUSED_SYMBOL_GPL(sym) + +#endif /* CONFIG_MODULES */ + +#else /* __MODPOST_EXPORTS__ */ +/* + * Here is the arch-independent assembly version, used in .tmp_exports-asm.S. + * + * We use CPP macros since they are more familiar than assembly macros. + * Note that CPP macros eat newlines, so each pseudo-instruction must be + * terminated by a semicolon. + */ + +#include <asm/bitsperlong.h> +#include <linux/stringify.h> + +#if BITS_PER_LONG == 64 +#define PTR .quad +#define ALGN .balign 8 +#else +#define PTR .long +#define ALGN .balign 4 +#endif + +/* build system gives us an unstringified version of CONFIG_SYMBOL_PREFIX */ +#ifndef SYMBOL_PREFIX +#define SYM(sym) sym +#else +#define PASTE2(x,y) x##y +#define PASTE(x,y) PASTE2(x,y) +#define SYM(sym) PASTE(SYMBOL_PREFIX, sym) +#endif + + +#ifdef CONFIG_MODVERSIONS +#define __CRC_SYMBOL(sym, crcsec) \ + .globl SYM(__crc_##sym); \ + .weak SYM(__crc_##sym); \ + .pushsection crcsec, "a"; \ + ALGN; \ + SYM(__kcrctab_##sym): \ + PTR SYM(__crc_##sym); \ + .popsection; +#else +#define __CRC_SYMBOL(sym, section) +#endif + +#define __EXPORT_SYMBOL(sym, sec, strsec, crcsec) \ + .globl SYM(sym); \ + \ + __CRC_SYMBOL(sym, crcsec) \ + \ + .pushsection strsec, "a"; \ + SYM(__kstrtab_##sym): \ + .asciz __stringify(SYM(sym)); \ + .popsection; \ + \ + .pushsection sec, "a"; \ + ALGN; \ + SYM(__ksymtab_##sym): \ + PTR SYM(sym); \ + PTR SYM(__kstrtab_##sym); \ + .popsection; + +#endif /* __MODPOST_EXPORTS__ */ + +#endif /* __GENKSYMS__ */ + +#endif /* LINUX_MOD_EXPORT_H */ diff --git a/include/linux/module.h b/include/linux/module.h index 482efc865acf..0bb0f7464ff0 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -15,6 +15,7 @@ #include <linux/stringify.h> #include <linux/kobject.h> #include <linux/moduleparam.h> +#include <linux/mod_export.h> #include <linux/tracepoint.h> #include <asm/local.h> @@ -25,19 +26,8 @@ /* Not Yet Implemented */ #define MODULE_SUPPORTED_DEVICE(name) -/* some toolchains uses a `_' prefix for all user symbols */ -#ifndef MODULE_SYMBOL_PREFIX -#define MODULE_SYMBOL_PREFIX "" -#endif - #define MODULE_NAME_LEN MAX_PARAM_PREFIX_LEN -struct kernel_symbol -{ - unsigned long value; - const char *name; -}; - struct modversion_info { unsigned long crc; @@ -178,51 +168,61 @@ void *__symbol_get(const char *symbol); void *__symbol_get_gpl(const char *symbol); #define symbol_get(x) ((typeof(&x))(__symbol_get(MODULE_SYMBOL_PREFIX #x))) -#ifndef __GENKSYMS__ -#ifdef CONFIG_MODVERSIONS -/* Mark the CRC weak since genksyms apparently decides not to - * generate a checksums for some symbols */ -#define __CRC_SYMBOL(sym, sec) \ - extern void *__crc_##sym __attribute__((weak)); \ - static const unsigned long __kcrctab_##sym \ - __used \ - __attribute__((section("__kcrctab" sec), unused)) \ - = (unsigned long) &__crc_##sym; -#else -#define __CRC_SYMBOL(sym, sec) -#endif +#ifdef CONFIG_UNUSED_SYMBOLS +enum export_type { + /* GPL-only exported symbols. */ + __EXPORT_FLAG_GPL_ONLY = 0x1, -/* For every exported symbol, place a struct in the __ksymtab section */ -#define __EXPORT_SYMBOL(sym, sec) \ - extern typeof(sym) sym; \ - __CRC_SYMBOL(sym, sec) \ - static const char __kstrtab_##sym[] \ - __attribute__((section("__ksymtab_strings"), aligned(1))) \ - = MODULE_SYMBOL_PREFIX #sym; \ - static const struct kernel_symbol __ksymtab_##sym \ - __used \ - __attribute__((section("__ksymtab" sec), unused)) \ - = { (unsigned long)&sym, __kstrtab_##sym } + /* unused exported symbols. */ + __EXPORT_FLAG_UNUSED = 0x2, -#define EXPORT_SYMBOL(sym) \ - __EXPORT_SYMBOL(sym, "") + /* exports that will be GPL-only in the near future. */ + __EXPORT_FLAG_GPL_FUTURE = 0x4, -#define EXPORT_SYMBOL_GPL(sym) \ - __EXPORT_SYMBOL(sym, "_gpl") + EXPORT_TYPE_PLAIN = 0x0, + EXPORT_TYPE_GPL = 0x1, + EXPORT_TYPE_UNUSED = 0x2, + EXPORT_TYPE_UNUSED_GPL = 0x3, + EXPORT_TYPE_GPL_FUTURE = 0x4, -#define EXPORT_SYMBOL_GPL_FUTURE(sym) \ - __EXPORT_SYMBOL(sym, "_gpl_future") + EXPORT_TYPE_MAX +}; +#else /* !CONFIG_UNUSED_EXPORTS */ +enum export_type { + __EXPORT_FLAG_UNUSED = 0x0, + __EXPORT_FLAG_GPL_ONLY = 0x1, + __EXPORT_FLAG_GPL_FUTURE = 0x2, -#ifdef CONFIG_UNUSED_SYMBOLS -#define EXPORT_UNUSED_SYMBOL(sym) __EXPORT_SYMBOL(sym, "_unused") -#define EXPORT_UNUSED_SYMBOL_GPL(sym) __EXPORT_SYMBOL(sym, "_unused_gpl") -#else -#define EXPORT_UNUSED_SYMBOL(sym) -#define EXPORT_UNUSED_SYMBOL_GPL(sym) + EXPORT_TYPE_PLAIN = 0x0, + EXPORT_TYPE_GPL = 0x1, + EXPORT_TYPE_GPL_FUTURE = 0x2, + EXPORT_TYPE_MAX +}; #endif +static inline bool export_is_gpl_only(enum export_type type) +{ + return (type & __EXPORT_FLAG_GPL_ONLY); +} + +static inline bool export_is_unused(enum export_type type) +{ + return (type & __EXPORT_FLAG_UNUSED); +} + +static inline bool export_is_gpl_future(enum export_type type) +{ + return (type & __EXPORT_FLAG_GPL_FUTURE); +} + +struct ksymtab { + const struct kernel_symbol *syms; +#ifdef CONFIG_MODVERSIONS + const unsigned long *crcs; #endif + unsigned int num_syms; +}; enum module_state { @@ -249,36 +249,12 @@ struct module struct kobject *holders_dir; /* Exported symbols */ - const struct kernel_symbol *syms; - const unsigned long *crcs; - unsigned int num_syms; + struct ksymtab syms[EXPORT_TYPE_MAX]; /* Kernel parameters. */ struct kernel_param *kp; unsigned int num_kp; - /* GPL-only exported symbols. */ - unsigned int num_gpl_syms; - const struct kernel_symbol *gpl_syms; - const unsigned long *gpl_crcs; - -#ifdef CONFIG_UNUSED_SYMBOLS - /* unused exported symbols. */ - const struct kernel_symbol *unused_syms; - const unsigned long *unused_crcs; - unsigned int num_unused_syms; - - /* GPL-only, unused exported symbols. */ - unsigned int num_unused_gpl_syms; - const struct kernel_symbol *unused_gpl_syms; - const unsigned long *unused_gpl_crcs; -#endif - - /* symbols that will be GPL-only in the near future. */ - const struct kernel_symbol *gpl_future_syms; - const unsigned long *gpl_future_crcs; - unsigned int num_gpl_future_syms; - /* Exception table */ unsigned int num_exentries; struct exception_table_entry *extable; @@ -408,17 +384,6 @@ static inline int within_module_init(unsigned long addr, struct module *mod) /* Search for module by name: must hold module_mutex. */ struct module *find_module(const char *name); -struct symsearch { - const struct kernel_symbol *start, *stop; - const unsigned long *crcs; - enum { - NOT_GPL_ONLY, - GPL_ONLY, - WILL_BE_GPL_ONLY, - } licence; - bool unused; -}; - /* Search for an exported symbol by name. */ const struct kernel_symbol *find_symbol(const char *name, struct module **owner, @@ -426,9 +391,14 @@ const struct kernel_symbol *find_symbol(const char *name, bool gplok, bool warn); +typedef bool each_symbol_fn_t (enum export_type type, + const struct kernel_symbol *sym, + const unsigned long *crc, + struct module *owner, + void *data); + /* Walk the exported symbol table */ -bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner, - unsigned int symnum, void *data), void *data); +bool each_symbol(each_symbol_fn_t *fn, void *data); /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if symnum out of range. */ @@ -541,11 +511,6 @@ extern void module_update_tracepoints(void); extern int module_get_iter_tracepoints(struct tracepoint_iter *iter); #else /* !CONFIG_MODULES... */ -#define EXPORT_SYMBOL(sym) -#define EXPORT_SYMBOL_GPL(sym) -#define EXPORT_SYMBOL_GPL_FUTURE(sym) -#define EXPORT_UNUSED_SYMBOL(sym) -#define EXPORT_UNUSED_SYMBOL_GPL(sym) /* Given an address, look for it in the exception tables. */ static inline const struct exception_table_entry * diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 82a9124f7d75..893549c04265 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -31,20 +31,23 @@ static const char __module_cat(name,__LINE__)[] \ struct kernel_param; -/* Returns 0, or -errno. arg is in kp->arg. */ -typedef int (*param_set_fn)(const char *val, struct kernel_param *kp); -/* Returns length written or -errno. Buffer is 4k (ie. be short!) */ -typedef int (*param_get_fn)(char *buffer, struct kernel_param *kp); +struct kernel_param_ops { + /* Returns 0, or -errno. arg is in kp->arg. */ + int (*set)(const char *val, const struct kernel_param *kp); + /* Returns length written or -errno. Buffer is 4k (ie. be short!) */ + int (*get)(char *buffer, const struct kernel_param *kp); + /* Optional function to free kp->arg when module unloaded. */ + void (*free)(void *arg); +}; /* Flag bits for kernel_param.flags */ #define KPARAM_ISBOOL 2 struct kernel_param { const char *name; + const struct kernel_param_ops *ops; u16 perm; u16 flags; - param_set_fn set; - param_get_fn get; union { void *arg; const struct kparam_string *str; @@ -63,12 +66,67 @@ struct kparam_array { unsigned int max; unsigned int *num; - param_set_fn set; - param_get_fn get; + const struct kernel_param_ops *ops; unsigned int elemsize; void *elem; }; +/** + * module_param - typesafe helper for a module/cmdline parameter + * @value: the variable to alter, and exposed parameter name. + * @type: the type of the parameter + * @perm: visibility in sysfs. + * + * @value becomes the module parameter, or (prefixed by KBUILD_MODNAME and a + * ".") the kernel commandline parameter. Note that - is changed to _, so + * the user can use "foo-bar=1" even for variable "foo_bar". + * + * @perm is 0 if the the variable is not to appear in sysfs, or 0444 + * for world-readable, 0644 for root-writable, etc. Note that if it + * is writable, you may need to use kparam_block_sysfs_write() around + * accesses (esp. charp, which can be kfreed when it changes). + * + * The @type is simply pasted to refer to a param_ops_##type and a + * param_check_##type: for convenience many standard types are provided but + * you can create your own by defining those variables. + * + * Standard types are: + * byte, short, ushort, int, uint, long, ulong + * charp: a character pointer + * bool: a bool, values 0/1, y/n, Y/N. + * invbool: the above, only sense-reversed (N = true). + */ +#define module_param(name, type, perm) \ + module_param_named(name, name, type, perm) + +/** + * module_param_named - typesafe helper for a renamed module/cmdline parameter + * @name: a valid C identifier which is the parameter name. + * @value: the actual lvalue to alter. + * @type: the type of the parameter + * @perm: visibility in sysfs. + * + * Usually it's a good idea to have variable names and user-exposed names the + * same, but that's harder if the variable must be non-static or is inside a + * structure. This allows exposure under a different name. + */ +#define module_param_named(name, value, type, perm) \ + param_check_##type(name, &(value)); \ + module_param_cb(name, ¶m_ops_##type, &value, perm); \ + __MODULE_PARM_TYPE(name, #type) + +/** + * module_param_cb - general callback for a module/cmdline parameter + * @name: a valid C identifier which is the parameter name. + * @ops: the set & get operations for this parameter. + * @perm: visibility in sysfs. + * + * The ops can have NULL set or get functions. + */ +#define module_param_cb(name, ops, arg, perm) \ + __module_param_call(MODULE_PARAM_PREFIX, \ + name, ops, arg, __same_type(*(arg), bool), perm) + /* On alpha, ia64 and ppc64 relocations to global data cannot go into read-only sections (which is part of respective UNIX ABI on these platforms). So 'const' makes no sense and even causes compile failures @@ -80,10 +138,8 @@ struct kparam_array #endif /* This is the fundamental function for registering boot/module - parameters. perm sets the visibility in sysfs: 000 means it's - not there, read bits mean it's readable, write bits mean it's - writable. */ -#define __module_param_call(prefix, name, set, get, arg, isbool, perm) \ + parameters. */ +#define __module_param_call(prefix, name, ops, arg, isbool, perm) \ /* Default value instead of permissions? */ \ static int __param_perm_check_##name __attribute__((unused)) = \ BUILD_BUG_ON_ZERO((perm) < 0 || (perm) > 0777 || ((perm) & 2)) \ @@ -92,31 +148,87 @@ struct kparam_array static struct kernel_param __moduleparam_const __param_##name \ __used \ __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \ - = { __param_str_##name, perm, isbool ? KPARAM_ISBOOL : 0, \ - set, get, { arg } } + = { __param_str_##name, ops, perm, isbool ? KPARAM_ISBOOL : 0, \ + { arg } } + +/* Obsolete - use module_param_cb() */ +#define module_param_call(name, set, get, arg, perm) \ + static struct kernel_param_ops __param_ops_##name = \ + { (void *)set, (void *)get }; \ + __module_param_call(MODULE_PARAM_PREFIX, \ + name, &__param_ops_##name, arg, \ + __same_type(*(arg), bool), \ + (perm) + sizeof(__check_old_set_param(set))*0) + +/* We don't get oldget: it's often a new-style param_get_uint, etc. */ +static inline int +__check_old_set_param(int (*oldset)(const char *, struct kernel_param *)) +{ + return 0; +} -#define module_param_call(name, set, get, arg, perm) \ - __module_param_call(MODULE_PARAM_PREFIX, \ - name, set, get, arg, \ - __same_type(*(arg), bool), perm) +/** + * kparam_block_sysfs_write - make sure a parameter isn't written via sysfs. + * @name: the name of the parameter + * + * There's no point blocking write on a paramter that isn't writable via sysfs! + */ +#define kparam_block_sysfs_write(name) \ + do { \ + BUG_ON(!(__param_##name.perm & 0222)); \ + __kernel_param_lock(); \ + } while (0) -/* Helper functions: type is byte, short, ushort, int, uint, long, - ulong, charp, bool or invbool, or XXX if you define param_get_XXX, - param_set_XXX and param_check_XXX. */ -#define module_param_named(name, value, type, perm) \ - param_check_##type(name, &(value)); \ - module_param_call(name, param_set_##type, param_get_##type, &value, perm); \ - __MODULE_PARM_TYPE(name, #type) +/** + * kparam_unblock_sysfs_write - allows sysfs to write to a parameter again. + * @name: the name of the parameter + */ +#define kparam_unblock_sysfs_write(name) \ + do { \ + BUG_ON(!(__param_##name.perm & 0222)); \ + __kernel_param_unlock(); \ + } while (0) -#define module_param(name, type, perm) \ - module_param_named(name, name, type, perm) +/** + * kparam_block_sysfs_read - make sure a parameter isn't read via sysfs. + * @name: the name of the parameter + * + * This also blocks sysfs writes. + */ +#define kparam_block_sysfs_read(name) \ + do { \ + BUG_ON(!(__param_##name.perm & 0444)); \ + __kernel_param_lock(); \ + } while (0) + +/** + * kparam_unblock_sysfs_read - allows sysfs to read a parameter again. + * @name: the name of the parameter + */ +#define kparam_unblock_sysfs_read(name) \ + do { \ + BUG_ON(!(__param_##name.perm & 0444)); \ + __kernel_param_unlock(); \ + } while (0) + +#ifdef CONFIG_SYSFS +extern void __kernel_param_lock(void); +extern void __kernel_param_unlock(void); +#else +static inline void __kernel_param_lock(void) +{ +} +static inline void __kernel_param_unlock(void) +{ +} +#endif #ifndef MODULE /** * core_param - define a historical core kernel parameter. * @name: the name of the cmdline and sysfs parameter (often the same as var) * @var: the variable - * @type: the type (for param_set_##type and param_get_##type) + * @type: the type of the parameter * @perm: visibility in sysfs * * core_param is just like module_param(), but cannot be modular and @@ -126,23 +238,32 @@ struct kparam_array */ #define core_param(name, var, type, perm) \ param_check_##type(name, &(var)); \ - __module_param_call("", name, param_set_##type, param_get_##type, \ + __module_param_call("", name, ¶m_ops_##type, \ &var, __same_type(var, bool), perm) #endif /* !MODULE */ -/* Actually copy string: maxlen param is usually sizeof(string). */ +/** + * module_param_string - a char array parameter + * @name: the name of the parameter + * @string: the string variable + * @len: the maximum length of the string, incl. terminator + * @perm: visibility in sysfs. + * + * This actually copies the string when it's set (unlike type charp). + * @len is usually just sizeof(string). + */ #define module_param_string(name, string, len, perm) \ static const struct kparam_string __param_string_##name \ = { len, string }; \ __module_param_call(MODULE_PARAM_PREFIX, name, \ - param_set_copystring, param_get_string, \ + ¶m_ops_string, \ .str = &__param_string_##name, 0, perm); \ __MODULE_PARM_TYPE(name, "string") /* Called on module insert or kernel boot */ extern int parse_args(const char *name, char *args, - struct kernel_param *params, + const struct kernel_param *params, unsigned num, int (*unknown)(char *param, char *val)); @@ -162,41 +283,50 @@ static inline void destroy_params(const struct kernel_param *params, #define __param_check(name, p, type) \ static inline type *__check_##name(void) { return(p); } -extern int param_set_byte(const char *val, struct kernel_param *kp); -extern int param_get_byte(char *buffer, struct kernel_param *kp); +extern struct kernel_param_ops param_ops_byte; +extern int param_set_byte(const char *val, const struct kernel_param *kp); +extern int param_get_byte(char *buffer, const struct kernel_param *kp); #define param_check_byte(name, p) __param_check(name, p, unsigned char) -extern int param_set_short(const char *val, struct kernel_param *kp); -extern int param_get_short(char *buffer, struct kernel_param *kp); +extern struct kernel_param_ops param_ops_short; +extern int param_set_short(const char *val, const struct kernel_param *kp); +extern int param_get_short(char *buffer, const struct kernel_param *kp); #define param_check_short(name, p) __param_check(name, p, short) -extern int param_set_ushort(const char *val, struct kernel_param *kp); -extern int param_get_ushort(char *buffer, struct kernel_param *kp); +extern struct kernel_param_ops param_ops_ushort; +extern int param_set_ushort(const char *val, const struct kernel_param *kp); +extern int param_get_ushort(char *buffer, const struct kernel_param *kp); #define param_check_ushort(name, p) __param_check(name, p, unsigned short) -extern int param_set_int(const char *val, struct kernel_param *kp); -extern int param_get_int(char *buffer, struct kernel_param *kp); +extern struct kernel_param_ops param_ops_int; +extern int param_set_int(const char *val, const struct kernel_param *kp); +extern int param_get_int(char *buffer, const struct kernel_param *kp); #define param_check_int(name, p) __param_check(name, p, int) -extern int param_set_uint(const char *val, struct kernel_param *kp); -extern int param_get_uint(char *buffer, struct kernel_param *kp); +extern struct kernel_param_ops param_ops_uint; +extern int param_set_uint(const char *val, const struct kernel_param *kp); +extern int param_get_uint(char *buffer, const struct kernel_param *kp); #define param_check_uint(name, p) __param_check(name, p, unsigned int) -extern int param_set_long(const char *val, struct kernel_param *kp); -extern int param_get_long(char *buffer, struct kernel_param *kp); +extern struct kernel_param_ops param_ops_long; +extern int param_set_long(const char *val, const struct kernel_param *kp); +extern int param_get_long(char *buffer, const struct kernel_param *kp); #define param_check_long(name, p) __param_check(name, p, long) -extern int param_set_ulong(const char *val, struct kernel_param *kp); -extern int param_get_ulong(char *buffer, struct kernel_param *kp); +extern struct kernel_param_ops param_ops_ulong; +extern int param_set_ulong(const char *val, const struct kernel_param *kp); +extern int param_get_ulong(char *buffer, const struct kernel_param *kp); #define param_check_ulong(name, p) __param_check(name, p, unsigned long) -extern int param_set_charp(const char *val, struct kernel_param *kp); -extern int param_get_charp(char *buffer, struct kernel_param *kp); +extern struct kernel_param_ops param_ops_charp; +extern int param_set_charp(const char *val, const struct kernel_param *kp); +extern int param_get_charp(char *buffer, const struct kernel_param *kp); #define param_check_charp(name, p) __param_check(name, p, char *) /* For historical reasons "bool" parameters can be (unsigned) "int". */ -extern int param_set_bool(const char *val, struct kernel_param *kp); -extern int param_get_bool(char *buffer, struct kernel_param *kp); +extern struct kernel_param_ops param_ops_bool; +extern int param_set_bool(const char *val, const struct kernel_param *kp); +extern int param_get_bool(char *buffer, const struct kernel_param *kp); #define param_check_bool(name, p) \ static inline void __check_##name(void) \ { \ @@ -205,29 +335,53 @@ extern int param_get_bool(char *buffer, struct kernel_param *kp); !__same_type(*(p), int)); \ } -extern int param_set_invbool(const char *val, struct kernel_param *kp); -extern int param_get_invbool(char *buffer, struct kernel_param *kp); +extern struct kernel_param_ops param_ops_invbool; +extern int param_set_invbool(const char *val, const struct kernel_param *kp); +extern int param_get_invbool(char *buffer, const struct kernel_param *kp); #define param_check_invbool(name, p) __param_check(name, p, bool) -/* Comma-separated array: *nump is set to number they actually specified. */ +/** + * module_param_array - a parameter which is an array of some type + * @name: the name of the array variable + * @type: the type, as per module_param() + * @nump: optional pointer filled in with the number written + * @perm: visibility in sysfs + * + * Input and output are as comma-separated values. Commas inside values + * don't work properly (eg. an array of charp). + * + * ARRAY_SIZE(@name) is used to determine the number of elements in the + * array, so the definition must be visible. + */ +#define module_param_array(name, type, nump, perm) \ + module_param_array_named(name, name, type, nump, perm) + +/** + * module_param_array_named - renamed parameter which is an array of some type + * @name: a valid C identifier which is the parameter name + * @array: the name of the array variable + * @type: the type, as per module_param() + * @nump: optional pointer filled in with the number written + * @perm: visibility in sysfs + * + * This exposes a different name than the actual variable name. See + * module_param_named() for why this might be necessary. + */ #define module_param_array_named(name, array, type, nump, perm) \ static const struct kparam_array __param_arr_##name \ - = { ARRAY_SIZE(array), nump, param_set_##type, param_get_##type,\ + = { ARRAY_SIZE(array), nump, ¶m_ops_##type, \ sizeof(array[0]), array }; \ __module_param_call(MODULE_PARAM_PREFIX, name, \ - param_array_set, param_array_get, \ + ¶m_array_ops, \ .arr = &__param_arr_##name, \ __same_type(array[0], bool), perm); \ __MODULE_PARM_TYPE(name, "array of " #type) -#define module_param_array(name, type, nump, perm) \ - module_param_array_named(name, name, type, nump, perm) - -extern int param_array_set(const char *val, struct kernel_param *kp); -extern int param_array_get(char *buffer, struct kernel_param *kp); +extern struct kernel_param_ops param_array_ops; -extern int param_set_copystring(const char *val, struct kernel_param *kp); -extern int param_get_string(char *buffer, struct kernel_param *kp); +extern struct kernel_param_ops param_ops_string; +extern int param_set_copystring(const char *val, const struct kernel_param *); +extern int param_get_string(char *buffer, const struct kernel_param *kp); /* for exporting parameters in /sys/parameters */ @@ -235,13 +389,13 @@ struct module; #if defined(CONFIG_SYSFS) && defined(CONFIG_MODULES) extern int module_param_sysfs_setup(struct module *mod, - struct kernel_param *kparam, + const struct kernel_param *kparam, unsigned int num_params); extern void module_param_sysfs_remove(struct module *mod); #else static inline int module_param_sysfs_setup(struct module *mod, - struct kernel_param *kparam, + const struct kernel_param *kparam, unsigned int num_params) { return 0; diff --git a/include/linux/vhost.h b/include/linux/vhost.h new file mode 100644 index 000000000000..e847f1e30756 --- /dev/null +++ b/include/linux/vhost.h @@ -0,0 +1,130 @@ +#ifndef _LINUX_VHOST_H +#define _LINUX_VHOST_H +/* Userspace interface for in-kernel virtio accelerators. */ + +/* vhost is used to reduce the number of system calls involved in virtio. + * + * Existing virtio net code is used in the guest without modification. + * + * This header includes interface used by userspace hypervisor for + * device configuration. + */ + +#include <linux/types.h> +#include <linux/compiler.h> +#include <linux/ioctl.h> +#include <linux/virtio_config.h> +#include <linux/virtio_ring.h> + +struct vhost_vring_state { + unsigned int index; + unsigned int num; +}; + +struct vhost_vring_file { + unsigned int index; + int fd; /* Pass -1 to unbind from file. */ + +}; + +struct vhost_vring_addr { + unsigned int index; + /* Option flags. */ + unsigned int flags; + /* Flag values: */ + /* Whether log address is valid. If set enables logging. */ +#define VHOST_VRING_F_LOG 0 + + /* Start of array of descriptors (virtually contiguous) */ + __u64 desc_user_addr; + /* Used structure address. Must be 32 bit aligned */ + __u64 used_user_addr; + /* Available structure address. Must be 16 bit aligned */ + __u64 avail_user_addr; + /* Logging support. */ + /* Log writes to used structure, at offset calculated from specified + * address. Address must be 32 bit aligned. */ + __u64 log_guest_addr; +}; + +struct vhost_memory_region { + __u64 guest_phys_addr; + __u64 memory_size; /* bytes */ + __u64 userspace_addr; + __u64 flags_padding; /* No flags are currently specified. */ +}; + +/* All region addresses and sizes must be 4K aligned. */ +#define VHOST_PAGE_SIZE 0x1000 + +struct vhost_memory { + __u32 nregions; + __u32 padding; + struct vhost_memory_region regions[0]; +}; + +/* ioctls */ + +#define VHOST_VIRTIO 0xAF + +/* Features bitmask for forward compatibility. Transport bits are used for + * vhost specific features. */ +#define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64) +#define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64) + +/* Set current process as the (exclusive) owner of this file descriptor. This + * must be called before any other vhost command. Further calls to + * VHOST_OWNER_SET fail until VHOST_OWNER_RESET is called. */ +#define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) +/* Give up ownership, and reset the device to default values. + * Allows subsequent call to VHOST_OWNER_SET to succeed. */ +#define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) + +/* Set up/modify memory layout */ +#define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory) + +/* Write logging setup. */ +/* Memory writes can optionally be logged by setting bit at an offset + * (calculated from the physical address) from specified log base. + * The bit is set using an atomic 32 bit operation. */ +/* Set base address for logging. */ +#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) +/* Specify an eventfd file descriptor to signal on log write. */ +#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) + +/* Ring setup. */ +/* Set number of descriptors in ring. This parameter can not + * be modified while ring is running (bound to a device). */ +#define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state) +/* Set addresses for the ring. */ +#define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr) +/* Base value where queue looks for available descriptors */ +#define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state) +/* Get accessor: reads index, writes value in num */ +#define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) + +/* The following ioctls use eventfd file descriptors to signal and poll + * for events. */ + +/* Set eventfd to poll for added buffers */ +#define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file) +/* Set eventfd to signal when buffers have beed used */ +#define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file) +/* Set eventfd to signal an error */ +#define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) + +/* VHOST_NET specific defines */ + +/* Attach virtio net ring to a raw socket, or tap device. + * The socket must be already bound to an ethernet device, this device will be + * used for transmit. Pass fd -1 to unbind from the socket and the transmit + * device. This can be used to stop the ring (e.g. for migration). */ +#define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) + +/* Feature bits */ +/* Log all write descriptors. Can be changed while device is active. */ +#define VHOST_F_LOG_ALL 26 +/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */ +#define VHOST_NET_F_VIRTIO_NET_HDR 27 + +#endif diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 0093dd7c1d6f..800617b4ddd5 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -109,7 +109,10 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev, unsigned int fbit) { /* Did you forget to fix assumptions on max features? */ - MAYBE_BUILD_BUG_ON(fbit >= 32); + if (__builtin_constant_p(fbit)) + BUILD_BUG_ON(fbit >= 32); + else + BUG_ON(fbit >= 32); if (fbit < VIRTIO_TRANSPORT_F_START) virtio_check_driver_offered_feature(vdev, fbit); diff --git a/init/Kconfig b/init/Kconfig index 9e03ef8b311e..d3a5db8fb011 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1118,6 +1118,11 @@ config BASE_SMALL default 0 if BASE_FULL default 1 if !BASE_FULL +config HAVE_SYMBOL_PREFIX + bool + help + Some arch toolchains use a `_' prefix for all user symbols. + menuconfig MODULES bool "Enable loadable module support" help diff --git a/init/main.c b/init/main.c index 5988debfc505..698066ff8762 100644 --- a/init/main.c +++ b/init/main.c @@ -187,11 +187,11 @@ static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; static const char *panic_later, *panic_param; -extern struct obs_kernel_param __setup_start[], __setup_end[]; +extern const struct obs_kernel_param __setup_start[], __setup_end[]; static int __init obsolete_checksetup(char *line) { - struct obs_kernel_param *p; + const struct obs_kernel_param *p; int had_early_param = 0; p = __setup_start; @@ -446,7 +446,7 @@ static noinline void __init_refok rest_init(void) /* Check for early params. */ static int __init do_early_param(char *param, char *val) { - struct obs_kernel_param *p; + const struct obs_kernel_param *p; for (p = __setup_start; p < __setup_end; p++) { if ((p->early && strcmp(param, p->str) == 0) || @@ -522,7 +522,7 @@ static void __init mm_init(void) asmlinkage void __init start_kernel(void) { char * command_line; - extern struct kernel_param __start___param[], __stop___param[]; + extern const struct kernel_param __start___param[], __stop___param[]; smp_setup_processor_id(); diff --git a/kernel/module.c b/kernel/module.c index 8b7d8805819d..29432fd20340 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -55,6 +55,7 @@ #include <linux/async.h> #include <linux/percpu.h> #include <linux/kmemleak.h> +#include <linux/bsearch.h> #define CREATE_TRACE_POINTS #include <trace/events/module.h> @@ -192,25 +193,71 @@ extern const unsigned long __start___kcrctab_unused[]; extern const unsigned long __start___kcrctab_unused_gpl[]; #endif +static struct ksymtab ksymtab[EXPORT_TYPE_MAX]; + +#ifdef CONFIG_MODVERSIONS +#define init_one_ksymtab(ksymt, start, stop, crc_start) \ + do { \ + struct ksymtab *ks = (ksymt); \ + ks->syms = (start); \ + ks->num_syms = (stop) - ks->syms; \ + ks->crcs = (crc_start); \ + } while (0) +#else +#define init_one_ksymtab(ksymt, start, stop, crc_start) \ + do { \ + struct ksymtab *ks = (ksymt); \ + ks->syms = (start); \ + ks->num_syms = (stop) - ks->syms; \ + } while (0) +#endif + +static int __init init_ksymtab(void) +{ + init_one_ksymtab(&ksymtab[EXPORT_TYPE_PLAIN], + __start___ksymtab, __stop___ksymtab, + __start___kcrctab); + init_one_ksymtab(&ksymtab[EXPORT_TYPE_GPL], + __start___ksymtab_gpl, __stop___ksymtab_gpl, + __start___kcrctab_gpl); +#ifdef CONFIG_UNUSED_EXPORTS + init_one_ksymtab(&ksymtab[EXPORT_TYPE_UNUSED], + __start___ksymtab_unused, + __stop___ksymtab_unused, + __start___kcrctab_unused); + init_one_ksymtab(&ksymtab[EXPORT_TYPE_UNUSED_GPL], + __start___ksymtab_unused_gpl, + __stop___ksymtab_unused_gpl, + __start___kcrctab_unused_gpl); +#endif + init_one_ksymtab(&ksymtab[EXPORT_TYPE_GPL_FUTURE], + __start___ksymtab_gpl_future, + __stop___ksymtab_gpl_future, + __start___kcrctab_gpl_future); + + return 0; +} +pure_initcall(init_ksymtab); + #ifndef CONFIG_MODVERSIONS #define symversion(base, idx) NULL #else #define symversion(base, idx) ((base != NULL) ? ((base) + (idx)) : NULL) #endif -static bool each_symbol_in_section(const struct symsearch *arr, - unsigned int arrsize, +static bool each_symbol_in_section(const struct ksymtab syms[EXPORT_TYPE_MAX], struct module *owner, - bool (*fn)(const struct symsearch *syms, - struct module *owner, - unsigned int symnum, void *data), + each_symbol_fn_t *fn, void *data) { - unsigned int i, j; + enum export_type type; + unsigned int i; - for (j = 0; j < arrsize; j++) { - for (i = 0; i < arr[j].stop - arr[j].start; i++) - if (fn(&arr[j], owner, i, data)) + for (type = 0; type < EXPORT_TYPE_MAX; type++) { + for (i = 0; i < syms[type].num_syms; i++) + if (fn(type, &syms[type].syms[i], + symversion(syms[type].crcs, i), + owner, data)) return true; } @@ -218,90 +265,113 @@ static bool each_symbol_in_section(const struct symsearch *arr, } /* Returns true as soon as fn returns true, otherwise false. */ -bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner, - unsigned int symnum, void *data), void *data) +bool each_symbol(each_symbol_fn_t *fn, void *data) { struct module *mod; - const struct symsearch arr[] = { - { __start___ksymtab, __stop___ksymtab, __start___kcrctab, - NOT_GPL_ONLY, false }, - { __start___ksymtab_gpl, __stop___ksymtab_gpl, - __start___kcrctab_gpl, - GPL_ONLY, false }, - { __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future, - __start___kcrctab_gpl_future, - WILL_BE_GPL_ONLY, false }, -#ifdef CONFIG_UNUSED_SYMBOLS - { __start___ksymtab_unused, __stop___ksymtab_unused, - __start___kcrctab_unused, - NOT_GPL_ONLY, true }, - { __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl, - __start___kcrctab_unused_gpl, - GPL_ONLY, true }, -#endif - }; - if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data)) + if (each_symbol_in_section(ksymtab, NULL, fn, data)) return true; list_for_each_entry_rcu(mod, &modules, list) { - struct symsearch arr[] = { - { mod->syms, mod->syms + mod->num_syms, mod->crcs, - NOT_GPL_ONLY, false }, - { mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms, - mod->gpl_crcs, - GPL_ONLY, false }, - { mod->gpl_future_syms, - mod->gpl_future_syms + mod->num_gpl_future_syms, - mod->gpl_future_crcs, - WILL_BE_GPL_ONLY, false }, -#ifdef CONFIG_UNUSED_SYMBOLS - { mod->unused_syms, - mod->unused_syms + mod->num_unused_syms, - mod->unused_crcs, - NOT_GPL_ONLY, true }, - { mod->unused_gpl_syms, - mod->unused_gpl_syms + mod->num_unused_gpl_syms, - mod->unused_gpl_crcs, - GPL_ONLY, true }, -#endif - }; - - if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data)) + if (each_symbol_in_section(mod->syms, mod, fn, data)) return true; } return false; } EXPORT_SYMBOL_GPL(each_symbol); -struct find_symbol_arg { - /* Input */ - const char *name; - bool gplok; - bool warn; +static int symbol_compare(const void *key, const void *elt) +{ + const char *str = key; + const struct kernel_symbol *sym = elt; + return strcmp(str, sym->name); +} - /* Output */ - struct module *owner; - const unsigned long *crc; +/* binary search on sorted symbols */ +static const struct kernel_symbol *find_symbol_in_kernel( + const char *name, + enum export_type *t, + const unsigned long **crc) +{ + struct kernel_symbol *sym; + enum export_type type; + unsigned int i; + + for (type = 0; type < ARRAY_SIZE(ksymtab); type++) { + sym = bsearch(name, ksymtab[type].syms, ksymtab[type].num_syms, + sizeof(struct kernel_symbol), symbol_compare); + if (sym) { + i = sym - ksymtab[type].syms; + *crc = symversion(ksymtab[type].crcs, i); + *t = type; + return sym; + } + } + + return NULL; +} + +/* linear search on unsorted symbols */ +static const struct kernel_symbol *find_symbol_in_module( + struct module *mod, + const char *name, + enum export_type *t, + const unsigned long **crc) +{ + struct ksymtab *symtab = mod->syms; const struct kernel_symbol *sym; -}; + enum export_type type; + unsigned int i; -static bool find_symbol_in_section(const struct symsearch *syms, - struct module *owner, - unsigned int symnum, void *data) + for (type = 0; type < EXPORT_TYPE_MAX; type++) { + for (i = 0; i < symtab[type].num_syms; i++) { + sym = &symtab[type].syms[i]; + + if (strcmp(sym->name, name) == 0) { + *crc = symversion(symtab[type].crcs, i); + *t = type; + return sym; + } + } + } + + return NULL; +} + +/* Find a symbol and return it, along with, (optional) crc and + * (optional) module which owns it */ +const struct kernel_symbol *find_symbol(const char *name, + struct module **owner, + const unsigned long **crc, + bool gplok, + bool warn) { - struct find_symbol_arg *fsa = data; + struct module *mod = NULL; + const struct kernel_symbol *sym; + enum export_type type; + const unsigned long *crc_value; - if (strcmp(syms->start[symnum].name, fsa->name) != 0) - return false; + sym = find_symbol_in_kernel(name, &type, &crc_value); + if (sym) + goto found; + + list_for_each_entry_rcu(mod, &modules, list) { + sym = find_symbol_in_module(mod, name, &type, &crc_value); + if (sym) + goto found; + } - if (!fsa->gplok) { - if (syms->licence == GPL_ONLY) - return false; - if (syms->licence == WILL_BE_GPL_ONLY && fsa->warn) { + DEBUGP("Failed to find symbol %s\n", name); + return NULL; + +found: + if (!gplok) { + if (export_is_gpl_only(type)) + return NULL; + if (export_is_gpl_future(type) && warn) { printk(KERN_WARNING "Symbol %s is being used " "by a non-GPL module, which will not " - "be allowed in the future\n", fsa->name); + "be allowed in the future\n", name); printk(KERN_WARNING "Please see the file " "Documentation/feature-removal-schedule.txt " "in the kernel source tree for more details.\n"); @@ -309,9 +379,9 @@ static bool find_symbol_in_section(const struct symsearch *syms, } #ifdef CONFIG_UNUSED_SYMBOLS - if (syms->unused && fsa->warn) { + if (export_is_unused(type) && warn) { printk(KERN_WARNING "Symbol %s is marked as UNUSED, " - "however this module is using it.\n", fsa->name); + "however this module is using it.\n", name); printk(KERN_WARNING "This symbol will go away in the future.\n"); printk(KERN_WARNING @@ -322,36 +392,11 @@ static bool find_symbol_in_section(const struct symsearch *syms, } #endif - fsa->owner = owner; - fsa->crc = symversion(syms->crcs, symnum); - fsa->sym = &syms->start[symnum]; - return true; -} - -/* Find a symbol and return it, along with, (optional) crc and - * (optional) module which owns it */ -const struct kernel_symbol *find_symbol(const char *name, - struct module **owner, - const unsigned long **crc, - bool gplok, - bool warn) -{ - struct find_symbol_arg fsa; - - fsa.name = name; - fsa.gplok = gplok; - fsa.warn = warn; - - if (each_symbol(find_symbol_in_section, &fsa)) { - if (owner) - *owner = fsa.owner; - if (crc) - *crc = fsa.crc; - return fsa.sym; - } - - DEBUGP("Failed to find symbol %s\n", name); - return NULL; + if (owner) + *owner = mod; + if (crc) + *crc = crc_value; + return sym; } EXPORT_SYMBOL_GPL(find_symbol); @@ -1030,6 +1075,16 @@ static int try_to_force_load(struct module *mod, const char *reason) } #ifdef CONFIG_MODVERSIONS +static const char *crc_section_names[] = { + [EXPORT_TYPE_PLAIN] = "__kcrctab", + [EXPORT_TYPE_GPL] = "__kcrctab_gpl", +#ifdef CONFIG_UNUSED_SYMBOLS + [EXPORT_TYPE_UNUSED] = "__kcrctab_unused", + [EXPORT_TYPE_UNUSED_GPL] = "__kcrctab_unused_gpl", +#endif + [EXPORT_TYPE_GPL_FUTURE] = "__kcrctab_gpl_future", +}; + static int check_version(Elf_Shdr *sechdrs, unsigned int versindex, const char *symname, @@ -1564,23 +1619,13 @@ EXPORT_SYMBOL_GPL(__symbol_get); static int verify_export_symbols(struct module *mod) { unsigned int i; - struct module *owner; + enum export_type type; const struct kernel_symbol *s; - struct { - const struct kernel_symbol *sym; - unsigned int num; - } arr[] = { - { mod->syms, mod->num_syms }, - { mod->gpl_syms, mod->num_gpl_syms }, - { mod->gpl_future_syms, mod->num_gpl_future_syms }, -#ifdef CONFIG_UNUSED_SYMBOLS - { mod->unused_syms, mod->num_unused_syms }, - { mod->unused_gpl_syms, mod->num_unused_gpl_syms }, -#endif - }; + struct module *owner; - for (i = 0; i < ARRAY_SIZE(arr); i++) { - for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) { + for (type = 0; type < EXPORT_TYPE_MAX; type++) { + for (i = 0; i < mod->syms[type].num_syms; i++) { + s = &mod->syms[type].syms[i]; if (find_symbol(s->name, &owner, NULL, true, false)) { printk(KERN_ERR "%s: exports duplicate symbol %s" @@ -1810,27 +1855,19 @@ static void free_modinfo(struct module *mod) #ifdef CONFIG_KALLSYMS -/* lookup symbol in given range of kernel_symbols */ -static const struct kernel_symbol *lookup_symbol(const char *name, - const struct kernel_symbol *start, - const struct kernel_symbol *stop) -{ - const struct kernel_symbol *ks = start; - for (; ks < stop; ks++) - if (strcmp(ks->name, name) == 0) - return ks; - return NULL; -} - static int is_exported(const char *name, unsigned long value, - const struct module *mod) + struct module *mod) { - const struct kernel_symbol *ks; - if (!mod) - ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab); + const struct kernel_symbol *sym; + enum export_type type; + const unsigned long *crc; + + if (mod) + sym = find_symbol_in_module(mod, name, &type, &crc); else - ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms); - return ks != NULL && ks->value == value; + sym = find_symbol_in_kernel(name, &type, &crc); + + return (sym && sym->value == value); } /* As per nm */ @@ -2066,6 +2103,16 @@ static inline void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, } #endif +static const char *export_section_names[] = { + [EXPORT_TYPE_PLAIN] = "__ksymtab", + [EXPORT_TYPE_GPL] = "__ksymtab_gpl", +#ifdef CONFIG_UNUSED_SYMBOLS + [EXPORT_TYPE_UNUSED] = "__ksymtab_unused", + [EXPORT_TYPE_UNUSED_GPL] = "__ksymtab_unused_gpl", +#endif + [EXPORT_TYPE_GPL_FUTURE] = "__ksymtab_gpl_future", +}; + /* Allocate and load the module: note that size of section 0 is always zero, and we rely on this for optional sections. */ static noinline struct module *load_module(void __user *umod, @@ -2080,6 +2127,7 @@ static noinline struct module *load_module(void __user *umod, unsigned int symindex = 0; unsigned int strindex = 0; unsigned int modindex, versindex, infoindex, pcpuindex; + enum export_type export_type; struct module *mod; long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ @@ -2339,34 +2387,21 @@ static noinline struct module *load_module(void __user *umod, * find optional sections. */ mod->kp = section_objs(hdr, sechdrs, secstrings, "__param", sizeof(*mod->kp), &mod->num_kp); - mod->syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab", - sizeof(*mod->syms), &mod->num_syms); - mod->crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab"); - mod->gpl_syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab_gpl", - sizeof(*mod->gpl_syms), - &mod->num_gpl_syms); - mod->gpl_crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab_gpl"); - mod->gpl_future_syms = section_objs(hdr, sechdrs, secstrings, - "__ksymtab_gpl_future", - sizeof(*mod->gpl_future_syms), - &mod->num_gpl_future_syms); - mod->gpl_future_crcs = section_addr(hdr, sechdrs, secstrings, - "__kcrctab_gpl_future"); -#ifdef CONFIG_UNUSED_SYMBOLS - mod->unused_syms = section_objs(hdr, sechdrs, secstrings, - "__ksymtab_unused", - sizeof(*mod->unused_syms), - &mod->num_unused_syms); - mod->unused_crcs = section_addr(hdr, sechdrs, secstrings, - "__kcrctab_unused"); - mod->unused_gpl_syms = section_objs(hdr, sechdrs, secstrings, - "__ksymtab_unused_gpl", - sizeof(*mod->unused_gpl_syms), - &mod->num_unused_gpl_syms); - mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings, - "__kcrctab_unused_gpl"); + export_type = 0; + for (; export_type < ARRAY_SIZE(export_section_names); export_type++) { + mod->syms[export_type].syms = + section_objs(hdr, sechdrs, secstrings, + export_section_names[export_type], + sizeof(struct kernel_symbol), + &mod->syms[export_type].num_syms); +#ifdef CONFIG_MODVERSIONS + mod->syms[export_type].crcs = + section_addr(hdr, sechdrs, secstrings, + crc_section_names[export_type]); #endif + } + #ifdef CONFIG_CONSTRUCTORS mod->ctors = section_objs(hdr, sechdrs, secstrings, ".ctors", sizeof(*mod->ctors), &mod->num_ctors); @@ -2391,19 +2426,20 @@ static noinline struct module *load_module(void __user *umod, sizeof(*mod->ftrace_callsites), &mod->num_ftrace_callsites); #endif + #ifdef CONFIG_MODVERSIONS - if ((mod->num_syms && !mod->crcs) - || (mod->num_gpl_syms && !mod->gpl_crcs) - || (mod->num_gpl_future_syms && !mod->gpl_future_crcs) -#ifdef CONFIG_UNUSED_SYMBOLS - || (mod->num_unused_syms && !mod->unused_crcs) - || (mod->num_unused_gpl_syms && !mod->unused_gpl_crcs) -#endif - ) { - err = try_to_force_load(mod, - "no versions for exported symbols"); - if (err) - goto cleanup; + export_type = 0; + for (; export_type < ARRAY_SIZE(mod->syms); export_type++) { + if (mod->syms[export_type].syms && + !mod->syms[export_type].crcs) { + err = try_to_force_load(mod, + "no versions for exported symbols"); + if (err) + goto cleanup; + + /* force load approved, don't check other sections */ + break; + } } #endif diff --git a/kernel/params.c b/kernel/params.c index d656c276508d..9ed6fcd84f2e 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -31,6 +31,42 @@ #define DEBUGP(fmt, a...) #endif +/* Protects all parameters, and incidentally kmalloced_param list. */ +static DEFINE_MUTEX(param_lock); + +/* This just allows us to keep track of which parameters are kmalloced. */ +struct kmalloced_param { + struct list_head list; + char val[]; +}; +static LIST_HEAD(kmalloced_params); + +static void *kmalloc_parameter(unsigned int size) +{ + struct kmalloced_param *p; + + p = kmalloc(sizeof(*p) + size, GFP_KERNEL); + if (!p) + return NULL; + + list_add(&p->list, &kmalloced_params); + return p->val; +} + +/* Does nothing if parameter wasn't kmalloced above. */ +static void maybe_kfree_parameter(void *param) +{ + struct kmalloced_param *p; + + list_for_each_entry(p, &kmalloced_params, list) { + if (p->val == param) { + list_del(&p->list); + kfree(p); + break; + } + } +} + static inline char dash2underscore(char c) { if (c == '-') @@ -49,18 +85,22 @@ static inline int parameq(const char *input, const char *paramname) static int parse_one(char *param, char *val, - struct kernel_param *params, + const struct kernel_param *params, unsigned num_params, int (*handle_unknown)(char *param, char *val)) { unsigned int i; + int err; /* Find parameter */ for (i = 0; i < num_params; i++) { if (parameq(param, params[i].name)) { DEBUGP("They are equal! Calling %p\n", - params[i].set); - return params[i].set(val, ¶ms[i]); + params[i].ops->set); + mutex_lock(¶m_lock); + err = params[i].ops->set(val, ¶ms[i]); + mutex_unlock(¶m_lock); + return err; } } @@ -130,7 +170,7 @@ static char *next_arg(char *args, char **param, char **val) /* Args looks like "foo=bar,bar2 baz=fuz wiz". */ int parse_args(const char *name, char *args, - struct kernel_param *params, + const struct kernel_param *params, unsigned num, int (*unknown)(char *param, char *val)) { @@ -179,7 +219,7 @@ int parse_args(const char *name, /* Lazy bastard, eh? */ #define STANDARD_PARAM_DEF(name, type, format, tmptype, strtolfn) \ - int param_set_##name(const char *val, struct kernel_param *kp) \ + int param_set_##name(const char *val, const struct kernel_param *kp) \ { \ tmptype l; \ int ret; \ @@ -191,10 +231,18 @@ int parse_args(const char *name, *((type *)kp->arg) = l; \ return 0; \ } \ - int param_get_##name(char *buffer, struct kernel_param *kp) \ + int param_get_##name(char *buffer, const struct kernel_param *kp) \ { \ return sprintf(buffer, format, *((type *)kp->arg)); \ - } + } \ + struct kernel_param_ops param_ops_##name = { \ + .set = param_set_##name, \ + .get = param_get_##name, \ + }; \ + EXPORT_SYMBOL(param_set_##name); \ + EXPORT_SYMBOL(param_get_##name); \ + EXPORT_SYMBOL(param_ops_##name) + STANDARD_PARAM_DEF(byte, unsigned char, "%c", unsigned long, strict_strtoul); STANDARD_PARAM_DEF(short, short, "%hi", long, strict_strtol); @@ -204,7 +252,7 @@ STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, strict_strtoul); STANDARD_PARAM_DEF(long, long, "%li", long, strict_strtol); STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, strict_strtoul); -int param_set_charp(const char *val, struct kernel_param *kp) +int param_set_charp(const char *val, const struct kernel_param *kp) { if (!val) { printk(KERN_ERR "%s: string parameter expected\n", @@ -218,25 +266,42 @@ int param_set_charp(const char *val, struct kernel_param *kp) return -ENOSPC; } - /* This is a hack. We can't need to strdup in early boot, and we + maybe_kfree_parameter(*(char **)kp->arg); + + /* This is a hack. We can't kmalloc in early boot, and we * don't need to; this mangled commandline is preserved. */ if (slab_is_available()) { - *(char **)kp->arg = kstrdup(val, GFP_KERNEL); + *(char **)kp->arg = kmalloc_parameter(strlen(val)+1); if (!*(char **)kp->arg) return -ENOMEM; + strcpy(*(char **)kp->arg, val); } else *(const char **)kp->arg = val; return 0; } +EXPORT_SYMBOL(param_set_charp); -int param_get_charp(char *buffer, struct kernel_param *kp) +int param_get_charp(char *buffer, const struct kernel_param *kp) { return sprintf(buffer, "%s", *((char **)kp->arg)); } +EXPORT_SYMBOL(param_get_charp); + +static void param_free_charp(void *arg) +{ + maybe_kfree_parameter(*((char **)arg)); +} + +struct kernel_param_ops param_ops_charp = { + .set = param_set_charp, + .get = param_get_charp, + .free = param_free_charp, +}; +EXPORT_SYMBOL(param_ops_charp); /* Actually could be a bool or an int, for historical reasons. */ -int param_set_bool(const char *val, struct kernel_param *kp) +int param_set_bool(const char *val, const struct kernel_param *kp) { bool v; @@ -261,8 +326,9 @@ int param_set_bool(const char *val, struct kernel_param *kp) *(int *)kp->arg = v; return 0; } +EXPORT_SYMBOL(param_set_bool); -int param_get_bool(char *buffer, struct kernel_param *kp) +int param_get_bool(char *buffer, const struct kernel_param *kp) { bool val; if (kp->flags & KPARAM_ISBOOL) @@ -273,9 +339,16 @@ int param_get_bool(char *buffer, struct kernel_param *kp) /* Y and N chosen as being relatively non-coder friendly */ return sprintf(buffer, "%c", val ? 'Y' : 'N'); } +EXPORT_SYMBOL(param_get_bool); + +struct kernel_param_ops param_ops_bool = { + .set = param_set_bool, + .get = param_get_bool, +}; +EXPORT_SYMBOL(param_ops_bool); /* This one must be bool. */ -int param_set_invbool(const char *val, struct kernel_param *kp) +int param_set_invbool(const char *val, const struct kernel_param *kp) { int ret; bool boolval; @@ -288,18 +361,26 @@ int param_set_invbool(const char *val, struct kernel_param *kp) *(bool *)kp->arg = !boolval; return ret; } +EXPORT_SYMBOL(param_set_invbool); -int param_get_invbool(char *buffer, struct kernel_param *kp) +int param_get_invbool(char *buffer, const struct kernel_param *kp) { return sprintf(buffer, "%c", (*(bool *)kp->arg) ? 'N' : 'Y'); } +EXPORT_SYMBOL(param_get_invbool); + +struct kernel_param_ops param_ops_invbool = { + .set = param_set_invbool, + .get = param_get_invbool, +}; +EXPORT_SYMBOL(param_ops_invbool); /* We break the rule and mangle the string. */ static int param_array(const char *name, const char *val, unsigned int min, unsigned int max, void *elem, int elemsize, - int (*set)(const char *, struct kernel_param *kp), + int (*set)(const char *, const struct kernel_param *kp), u16 flags, unsigned int *num) { @@ -333,6 +414,7 @@ static int param_array(const char *name, /* nul-terminate and parse */ save = val[len]; ((char *)val)[len] = '\0'; + BUG_ON(!mutex_is_locked(¶m_lock)); ret = set(val, &kp); if (ret != 0) @@ -350,17 +432,17 @@ static int param_array(const char *name, return 0; } -int param_array_set(const char *val, struct kernel_param *kp) +static int param_array_set(const char *val, const struct kernel_param *kp) { const struct kparam_array *arr = kp->arr; unsigned int temp_num; return param_array(kp->name, val, 1, arr->max, arr->elem, - arr->elemsize, arr->set, kp->flags, + arr->elemsize, arr->ops->set, kp->flags, arr->num ?: &temp_num); } -int param_array_get(char *buffer, struct kernel_param *kp) +static int param_array_get(char *buffer, const struct kernel_param *kp) { int i, off, ret; const struct kparam_array *arr = kp->arr; @@ -371,7 +453,8 @@ int param_array_get(char *buffer, struct kernel_param *kp) if (i) buffer[off++] = ','; p.arg = arr->elem + arr->elemsize * i; - ret = arr->get(buffer + off, &p); + BUG_ON(!mutex_is_locked(¶m_lock)); + ret = arr->ops->get(buffer + off, &p); if (ret < 0) return ret; off += ret; @@ -380,7 +463,24 @@ int param_array_get(char *buffer, struct kernel_param *kp) return off; } -int param_set_copystring(const char *val, struct kernel_param *kp) +static void param_array_free(void *arg) +{ + unsigned int i; + const struct kparam_array *arr = arg; + + if (arr->ops->free) + for (i = 0; i < (arr->num ? *arr->num : arr->max); i++) + arr->ops->free(arr->elem + arr->elemsize * i); +} + +struct kernel_param_ops param_array_ops = { + .set = param_array_set, + .get = param_array_get, + .free = param_array_free, +}; +EXPORT_SYMBOL(param_array_ops); + +int param_set_copystring(const char *val, const struct kernel_param *kp) { const struct kparam_string *kps = kp->str; @@ -396,12 +496,20 @@ int param_set_copystring(const char *val, struct kernel_param *kp) strcpy(kps->string, val); return 0; } +EXPORT_SYMBOL(param_set_copystring); -int param_get_string(char *buffer, struct kernel_param *kp) +int param_get_string(char *buffer, const struct kernel_param *kp) { const struct kparam_string *kps = kp->str; return strlcpy(buffer, kps->string, kps->maxlen); } +EXPORT_SYMBOL(param_get_string); + +struct kernel_param_ops param_ops_string = { + .set = param_set_copystring, + .get = param_get_string, +}; +EXPORT_SYMBOL(param_ops_string); /* sysfs output in /sys/modules/XYZ/parameters/ */ #define to_module_attr(n) container_of(n, struct module_attribute, attr); @@ -412,7 +520,7 @@ extern struct kernel_param __start___param[], __stop___param[]; struct param_attribute { struct module_attribute mattr; - struct kernel_param *param; + const struct kernel_param *param; }; struct module_param_attrs @@ -431,10 +539,12 @@ static ssize_t param_attr_show(struct module_attribute *mattr, int count; struct param_attribute *attribute = to_param_attr(mattr); - if (!attribute->param->get) + if (!attribute->param->ops->get) return -EPERM; - count = attribute->param->get(buf, attribute->param); + mutex_lock(¶m_lock); + count = attribute->param->ops->get(buf, attribute->param); + mutex_unlock(¶m_lock); if (count > 0) { strcat(buf, "\n"); ++count; @@ -450,10 +560,12 @@ static ssize_t param_attr_store(struct module_attribute *mattr, int err; struct param_attribute *attribute = to_param_attr(mattr); - if (!attribute->param->set) + if (!attribute->param->ops->set) return -EPERM; - err = attribute->param->set(buf, attribute->param); + mutex_lock(¶m_lock); + err = attribute->param->ops->set(buf, attribute->param); + mutex_unlock(¶m_lock); if (!err) return len; return err; @@ -467,6 +579,18 @@ static ssize_t param_attr_store(struct module_attribute *mattr, #endif #ifdef CONFIG_SYSFS +void __kernel_param_lock(void) +{ + mutex_lock(¶m_lock); +} +EXPORT_SYMBOL(__kernel_param_lock); + +void __kernel_param_unlock(void) +{ + mutex_unlock(¶m_lock); +} +EXPORT_SYMBOL(__kernel_param_unlock); + /* * add_sysfs_param - add a parameter to sysfs * @mk: struct module_kobject @@ -478,7 +602,7 @@ static ssize_t param_attr_store(struct module_attribute *mattr, * if there's an error. */ static __modinit int add_sysfs_param(struct module_kobject *mk, - struct kernel_param *kp, + const struct kernel_param *kp, const char *name) { struct module_param_attrs *new; @@ -559,7 +683,7 @@ static void free_module_param_attrs(struct module_kobject *mk) * /sys/module/[mod->name]/parameters/ */ int module_param_sysfs_setup(struct module *mod, - struct kernel_param *kparam, + const struct kernel_param *kparam, unsigned int num_params) { int i, err; @@ -604,7 +728,11 @@ void module_param_sysfs_remove(struct module *mod) void destroy_params(const struct kernel_param *params, unsigned num) { - /* FIXME: This should free kmalloced charp parameters. It doesn't. */ + unsigned int i; + + for (i = 0; i < num; i++) + if (params[i].ops->free) + params[i].ops->free(params[i].arg); } static void __init kernel_add_sysfs_param(const char *name, @@ -770,28 +898,3 @@ static int __init param_sysfs_init(void) subsys_initcall(param_sysfs_init); #endif /* CONFIG_SYSFS */ - -EXPORT_SYMBOL(param_set_byte); -EXPORT_SYMBOL(param_get_byte); -EXPORT_SYMBOL(param_set_short); -EXPORT_SYMBOL(param_get_short); -EXPORT_SYMBOL(param_set_ushort); -EXPORT_SYMBOL(param_get_ushort); -EXPORT_SYMBOL(param_set_int); -EXPORT_SYMBOL(param_get_int); -EXPORT_SYMBOL(param_set_uint); -EXPORT_SYMBOL(param_get_uint); -EXPORT_SYMBOL(param_set_long); -EXPORT_SYMBOL(param_get_long); -EXPORT_SYMBOL(param_set_ulong); -EXPORT_SYMBOL(param_get_ulong); -EXPORT_SYMBOL(param_set_charp); -EXPORT_SYMBOL(param_get_charp); -EXPORT_SYMBOL(param_set_bool); -EXPORT_SYMBOL(param_get_bool); -EXPORT_SYMBOL(param_set_invbool); -EXPORT_SYMBOL(param_get_invbool); -EXPORT_SYMBOL(param_array_set); -EXPORT_SYMBOL(param_array_get); -EXPORT_SYMBOL(param_set_copystring); -EXPORT_SYMBOL(param_get_string); diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 7f29643c8985..8f1e5642fce7 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1583,7 +1583,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) return ERR_PTR(-EACCES); - if (cpu < 0 || cpu > num_possible_cpus()) + if (cpu < 0 || cpu >= nr_cpu_ids) return ERR_PTR(-EINVAL); /* @@ -1591,7 +1591,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) * offline CPU and activate it when the CPU comes up, but * that's for later. */ - if (!cpu_isset(cpu, cpu_online_map)) + if (!cpu_online(cpu)) return ERR_PTR(-ENODEV); cpuctx = &per_cpu(perf_cpu_context, cpu); diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 1b5b7aa2fdfd..971c40ae15fd 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -232,10 +232,10 @@ static void timer_list_show_tickdevices(struct seq_file *m) #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST print_tickdevice(m, tick_get_broadcast_device(), -1); SEQ_printf(m, "tick_broadcast_mask: %08lx\n", - tick_get_broadcast_mask()->bits[0]); + cpumask_bits(tick_get_broadcast_mask())[0]); #ifdef CONFIG_TICK_ONESHOT SEQ_printf(m, "tick_broadcast_oneshot_mask: %08lx\n", - tick_get_broadcast_oneshot_mask()->bits[0]); + cpumask_bits(tick_get_broadcast_oneshot_mask())[0]); #endif SEQ_printf(m, "\n"); #endif diff --git a/lib/Makefile b/lib/Makefile index 2e78277eff9d..fb60af165607 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -21,7 +21,7 @@ lib-y += kobject.o kref.o klist.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ - string_helpers.o gcd.o + string_helpers.o gcd.o bsearch.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG diff --git a/lib/bsearch.c b/lib/bsearch.c new file mode 100644 index 000000000000..29233eda58f3 --- /dev/null +++ b/lib/bsearch.c @@ -0,0 +1,53 @@ +/* + * A generic implementation of binary search for the Linux kernel + * + * Copyright (C) 2008-2009 Ksplice, Inc. + * Author: Tim Abbott <tabbott@ksplice.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2. + */ + +#include <linux/module.h> +#include <linux/bsearch.h> + +/* + * bsearch - binary search an array of elements + * @key: pointer to item being searched for + * @base: pointer to data to sort + * @num: number of elements + * @size: size of each element + * @cmp: pointer to comparison function + * + * This function does a binary search on the given array. The + * contents of the array should already be in ascending sorted order + * under the provided comparison function. + * + * Note that the key need not have the same type as the elements in + * the array, e.g. key could be a string and the comparison function + * could compare the string with the struct's name field. However, if + * the key and elements in the array are of the same type, you can use + * the same comparison function for both sort() and bsearch(). + */ +void *bsearch(const void *key, const void *base, size_t num, size_t size, + int (*cmp)(const void *key, const void *elt)) +{ + size_t start = 0, end = num; + int result; + + while (start < end) { + size_t mid = start + (end - start) / 2; + + result = cmp(key, base + mid * size); + if (result < 0) + end = mid; + else if (result > 0) + start = mid + 1; + else + return (void *)base + mid * size; + } + + return NULL; +} +EXPORT_SYMBOL(bsearch); diff --git a/mm/mmu_context.c b/mm/mmu_context.c index ded9081f4021..0777654147c9 100644 --- a/mm/mmu_context.c +++ b/mm/mmu_context.c @@ -5,6 +5,7 @@ #include <linux/mm.h> #include <linux/mmu_context.h> +#include <linux/module.h> #include <linux/sched.h> #include <asm/mmu_context.h> @@ -37,6 +38,7 @@ void use_mm(struct mm_struct *mm) if (active_mm != mm) mmdrop(active_mm); } +EXPORT_SYMBOL_GPL(use_mm); /* * unuse_mm @@ -56,3 +58,4 @@ void unuse_mm(struct mm_struct *mm) enter_lazy_tlb(mm, tsk); task_unlock(tsk); } +EXPORT_SYMBOL_GPL(unuse_mm); diff --git a/mm/slab.c b/mm/slab.c index 7dfa481c96ba..4c7b16f81828 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1120,7 +1120,7 @@ static void __cpuinit cpuup_canceled(long cpu) if (nc) free_block(cachep, nc->entry, nc->avail, node); - if (!cpus_empty(*mask)) { + if (!cpumask_empty(mask)) { spin_unlock_irq(&l3->list_lock); goto free_array_cache; } diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index b9007f80cb92..bcd6d0b9cb88 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -102,6 +102,7 @@ ieee80211_rate_control_ops_get(const char *name) struct rate_control_ops *ops; const char *alg_name; + kparam_block_sysfs_write(ieee80211_default_rc_algo); if (!name) alg_name = ieee80211_default_rc_algo; else @@ -119,6 +120,7 @@ ieee80211_rate_control_ops_get(const char *name) /* try built-in one if specific alg requested but not found */ if (!ops && strlen(CONFIG_MAC80211_RC_DEFAULT)) ops = ieee80211_try_rate_control_ops_get(CONFIG_MAC80211_RC_DEFAULT); + kparam_unblock_sysfs_write(ieee80211_default_rc_algo); return ops; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 37c5475ba258..de7ffe3b5953 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2619,7 +2619,8 @@ void cleanup_socket_xprt(void) xprt_unregister_transport(&xs_bc_tcp_transport); } -static int param_set_uint_minmax(const char *val, struct kernel_param *kp, +static int param_set_uint_minmax(const char *val, + const struct kernel_param *kp, unsigned int min, unsigned int max) { unsigned long num; @@ -2634,34 +2635,37 @@ static int param_set_uint_minmax(const char *val, struct kernel_param *kp, return 0; } -static int param_set_portnr(const char *val, struct kernel_param *kp) +static int param_set_portnr(const char *val, const struct kernel_param *kp) { return param_set_uint_minmax(val, kp, RPC_MIN_RESVPORT, RPC_MAX_RESVPORT); } -static int param_get_portnr(char *buffer, struct kernel_param *kp) -{ - return param_get_uint(buffer, kp); -} +static struct kernel_param_ops param_ops_portnr = { + .set = param_set_portnr, + .get = param_get_uint, +}; + #define param_check_portnr(name, p) \ __param_check(name, p, unsigned int); module_param_named(min_resvport, xprt_min_resvport, portnr, 0644); module_param_named(max_resvport, xprt_max_resvport, portnr, 0644); -static int param_set_slot_table_size(const char *val, struct kernel_param *kp) +static int param_set_slot_table_size(const char *val, + const struct kernel_param *kp) { return param_set_uint_minmax(val, kp, RPC_MIN_SLOT_TABLE, RPC_MAX_SLOT_TABLE); } -static int param_get_slot_table_size(char *buffer, struct kernel_param *kp) -{ - return param_get_uint(buffer, kp); -} +static struct kernel_param_ops param_ops_slot_table_size = { + .set = param_set_slot_table_size, + .get = param_get_uint, +}; + #define param_check_slot_table_size(name, p) \ __param_check(name, p, unsigned int); diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index d9f0cb837400..cd815ac2a50b 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -127,6 +127,11 @@ _c_flags += $(if $(patsubst n%,, \ $(CFLAGS_GCOV)) endif +ifdef CONFIG_SYMBOL_PREFIX +_cpp_flags += -DSYMBOL_PREFIX=$(patsubst "%",%,$(CONFIG_SYMBOL_PREFIX)) +endif + + # If building the kernel in a separate objtree expand all occurrences # of -Idir to -I$(srctree)/dir except for absolute paths (starting with '/'). diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 8f14c81abbc7..876a3c7c5dad 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -91,7 +91,7 @@ __modpost: $(modules:.ko=.o) FORCE $(call cmd,modpost) $(wildcard vmlinux) $(filter-out FORCE,$^) quiet_cmd_kernel-mod = MODPOST $@ - cmd_kernel-mod = $(modpost) $@ + cmd_kernel-mod = $(modpost) -x .tmp_exports-asm.S $@ vmlinux.o: FORCE $(call cmd,kernel-mod) diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile index 11d69c35e5b4..ff954f8168c1 100644 --- a/scripts/mod/Makefile +++ b/scripts/mod/Makefile @@ -8,7 +8,7 @@ modpost-objs := modpost.o file2alias.o sumversion.o $(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h quiet_cmd_elfconfig = MKELF $@ - cmd_elfconfig = $(obj)/mk_elfconfig $(ARCH) < $< > $@ + cmd_elfconfig = $(obj)/mk_elfconfig < $< > $@ $(obj)/elfconfig.h: $(obj)/empty.o $(obj)/mk_elfconfig FORCE $(call if_changed,elfconfig) diff --git a/scripts/mod/mk_elfconfig.c b/scripts/mod/mk_elfconfig.c index 6a96d47bd1e6..639bca7ba559 100644 --- a/scripts/mod/mk_elfconfig.c +++ b/scripts/mod/mk_elfconfig.c @@ -9,9 +9,6 @@ main(int argc, char **argv) unsigned char ei[EI_NIDENT]; union { short s; char c[2]; } endian_test; - if (argc != 2) { - fprintf(stderr, "Error: no arch\n"); - } if (fread(ei, 1, EI_NIDENT, stdin) != EI_NIDENT) { fprintf(stderr, "Error: input truncated\n"); return 1; @@ -55,12 +52,6 @@ main(int argc, char **argv) else exit(1); - if ((strcmp(argv[1], "h8300") == 0) - || (strcmp(argv[1], "blackfin") == 0)) - printf("#define MODULE_SYMBOL_PREFIX \"_\"\n"); - else - printf("#define MODULE_SYMBOL_PREFIX \"\"\n"); - return 0; } diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 801a16a17545..769ea9535c68 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -15,8 +15,17 @@ #include <stdio.h> #include <ctype.h> #include "modpost.h" +#include "../../include/generated/autoconf.h" #include "../../include/linux/license.h" +/* Some toolchains use a `_' prefix for all user symbols. */ +#ifdef CONFIG_SYMBOL_PREFIX +#define MODULE_SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX +#else +#define MODULE_SYMBOL_PREFIX "" +#endif + + /* Are we using CONFIG_MODVERSIONS? */ int modversions = 0; /* Warn about undefined symbols? (do so if we have vmlinux) */ @@ -154,6 +163,7 @@ struct symbol { }; static struct symbol *symbolhash[SYMBOL_HASH_SIZE]; +unsigned int symbolcount; /* This is based on the hash agorithm from gdbm, via tdb */ static inline unsigned int tdb_hash(const char *name) @@ -191,6 +201,7 @@ static struct symbol *new_symbol(const char *name, struct module *module, unsigned int hash; struct symbol *new; + symbolcount++; hash = tdb_hash(name) % SYMBOL_HASH_SIZE; new = symbolhash[hash] = alloc_symbol(name, 0, symbolhash[hash]); new->module = module; @@ -951,6 +962,13 @@ static int section_mismatch(const char *fromsec, const char *tosec) * fromsec = .data* * atsym =__param* * + * Pattern 1a: + * module_param_call() ops can refer to __init set function if permissions=0 + * The pattern is identified by: + * tosec = .init.text + * fromsec = .data* + * atsym = __param_ops_* + * * Pattern 2: * Many drivers utilise a *driver container with references to * add, remove, probe functions etc. @@ -984,6 +1002,12 @@ static int secref_whitelist(const char *fromsec, const char *fromsym, (strncmp(fromsym, "__param", strlen("__param")) == 0)) return 0; + /* Check for pattern 1a */ + if (strcmp(tosec, ".init.text") == 0 && + match(fromsec, data_sections) && + (strncmp(fromsym, "__param_ops_", strlen("__param_ops_")) == 0)) + return 0; + /* Check for pattern 2 */ if (match(tosec, init_exit_sections) && match(fromsec, data_sections) && @@ -1976,6 +2000,58 @@ static void write_dump(const char *fname) write_if_changed(&buf, fname); } +static const char *section_names[] = { + [export_plain] = "", + [export_unused] = "_unused", + [export_gpl] = "_gpl", + [export_unused_gpl] = "_unused_gpl", + [export_gpl_future] = "_gpl_future", +}; + +static int compare_symbol_names(const void *a, const void *b) +{ + struct symbol *const *syma = a; + struct symbol *const *symb = b; + + return strcmp((*syma)->name, (*symb)->name); +} + +/* sort exported symbols and output using arch-independent assembly macros */ +static void write_exports(const char *fname) +{ + struct buffer buf = { }; + struct symbol *sym, **symbols; + int i, n; + + symbols = NOFAIL(malloc(sizeof(struct symbol *) * symbolcount)); + n = 0; + + for (i = 0; i < SYMBOL_HASH_SIZE; i++) { + for (sym = symbolhash[i]; sym; sym = sym->next) + symbols[n++] = sym; + } + + qsort(symbols, n, sizeof(struct symbol *), compare_symbol_names); + + buf_printf(&buf, "#define __MODPOST_EXPORTS__\n"); + buf_printf(&buf, "#include <linux/mod_export.h>\n"); + buf_printf(&buf, "\n"); + + for (i = 0; i < n; i++) { + sym = symbols[i]; + + buf_printf(&buf, "__EXPORT_SYMBOL(%s," + " __ksymtab%s_sorted," + " __ksymtab_strings_sorted," + " __kcrctab%s_sorted)\n", + sym->name, + section_names[sym->export], + section_names[sym->export]); + } + + write_if_changed(&buf, fname); +} + static void add_marker(struct module *mod, const char *name, const char *fmt) { char *line = NULL; @@ -2077,6 +2153,7 @@ int main(int argc, char **argv) struct buffer buf = { }; char *kernel_read = NULL, *module_read = NULL; char *dump_write = NULL; + char *exports_write = NULL; char *markers_read = NULL; char *markers_write = NULL; int opt; @@ -2084,7 +2161,7 @@ int main(int argc, char **argv) struct ext_sym_list *extsym_iter; struct ext_sym_list *extsym_start = NULL; - while ((opt = getopt(argc, argv, "i:I:e:cmsSo:awM:K:")) != -1) { + while ((opt = getopt(argc, argv, "i:I:e:cmsSo:awM:K:x:")) != -1) { switch (opt) { case 'i': kernel_read = optarg; @@ -2122,6 +2199,9 @@ int main(int argc, char **argv) case 'w': warn_unresolved = 1; break; + case 'x': + exports_write = optarg; + break; case 'M': markers_write = optarg; break; @@ -2182,6 +2262,9 @@ int main(int argc, char **argv) "'make CONFIG_DEBUG_SECTION_MISMATCH=y'\n", sec_mismatch_count); + if (exports_write) + write_exports(exports_write); + if (markers_read) read_markers(markers_read); |