summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/cpu-hotplug.txt49
-rw-r--r--Documentation/lguest/lguest.c1
-rw-r--r--MAINTAINERS9
-rw-r--r--Makefile24
-rw-r--r--arch/arm/kernel/armksyms.c20
-rw-r--r--arch/arm/kernel/vmlinux.lds.S13
-rw-r--r--arch/blackfin/Kconfig4
-rw-r--r--arch/blackfin/include/asm/module.h2
-rw-r--r--arch/blackfin/kernel/vmlinux.lds.S2
-rw-r--r--arch/h8300/Kconfig4
-rw-r--r--arch/h8300/include/asm/module.h2
-rw-r--r--arch/h8300/kernel/vmlinux.lds.S1
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c2
-rw-r--r--arch/sparc/kernel/pci.c1
-rw-r--r--arch/sparc/kernel/sparc_ksyms_64.c12
-rw-r--r--arch/um/drivers/hostaudio_kern.c10
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c4
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/acpi/debug.c32
-rw-r--r--drivers/char/hvc_iucv.c9
-rw-r--r--drivers/char/ipmi/ipmi_watchdog.c42
-rw-r--r--drivers/dio/dio-driver.c1
-rw-r--r--drivers/ide/ide.c20
-rw-r--r--drivers/idle/i7300_idle.c15
-rw-r--r--drivers/input/misc/ati_remote2.c26
-rw-r--r--drivers/input/mouse/psmouse-base.c14
-rw-r--r--drivers/message/fusion/mptbase.c3
-rw-r--r--drivers/misc/lkdtm.c4
-rw-r--r--drivers/net/myri10ge/myri10ge.c54
-rw-r--r--drivers/net/tun.c101
-rw-r--r--drivers/net/wireless/libertas/if_sdio.c32
-rw-r--r--drivers/net/wireless/libertas/if_usb.c3
-rw-r--r--drivers/net/wireless/libertas_tf/if_usb.c3
-rw-r--r--drivers/scsi/bfa/bfad.c2
-rw-r--r--drivers/scsi/fcoe/fcoe.c2
-rw-r--r--drivers/staging/rtl8187se/r8180_core.c6
-rw-r--r--drivers/staging/rtl8192e/r8192E_core.c6
-rw-r--r--drivers/staging/rtl8192su/r8192U_core.c6
-rw-r--r--drivers/usb/atm/ueagle-atm.c2
-rw-r--r--drivers/vhost/Kconfig11
-rw-r--r--drivers/vhost/Makefile2
-rw-r--r--drivers/vhost/net.c648
-rw-r--r--drivers/vhost/vhost.c965
-rw-r--r--drivers/vhost/vhost.h159
-rw-r--r--drivers/video/uvesafb.c7
-rw-r--r--drivers/video/vt8623fb.c2
-rw-r--r--drivers/zorro/zorro-driver.c1
-rw-r--r--fs/nfs/callback.c11
-rw-r--r--include/asm-generic/vmlinux.lds.h47
-rw-r--r--include/linux/Kbuild1
-rw-r--r--include/linux/bsearch.h9
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/if_tun.h14
-rw-r--r--include/linux/kernel.h32
-rw-r--r--include/linux/kmemcheck.h2
-rw-r--r--include/linux/miscdevice.h1
-rw-r--r--include/linux/mod_export.h149
-rw-r--r--include/linux/module.h145
-rw-r--r--include/linux/moduleparam.h282
-rw-r--r--include/linux/vhost.h130
-rw-r--r--include/linux/virtio_config.h5
-rw-r--r--init/Kconfig5
-rw-r--r--init/main.c8
-rw-r--r--kernel/module.c388
-rw-r--r--kernel/params.c213
-rw-r--r--kernel/perf_event.c4
-rw-r--r--kernel/time/timer_list.c4
-rw-r--r--lib/Makefile2
-rw-r--r--lib/bsearch.c53
-rw-r--r--mm/mmu_context.c3
-rw-r--r--mm/slab.c2
-rw-r--r--net/mac80211/rate.c2
-rw-r--r--net/sunrpc/xprtsock.c26
-rw-r--r--scripts/Makefile.lib5
-rw-r--r--scripts/Makefile.modpost2
-rw-r--r--scripts/mod/Makefile2
-rw-r--r--scripts/mod/mk_elfconfig.c9
-rw-r--r--scripts/mod/modpost.c85
79 files changed, 3344 insertions, 639 deletions
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
index 9d620c153b04..d041ae8a4ace 100644
--- a/Documentation/cpu-hotplug.txt
+++ b/Documentation/cpu-hotplug.txt
@@ -309,41 +309,26 @@ A: The following are what is required for CPU hotplug infrastructure to work
Q: I need to ensure that a particular cpu is not removed when there is some
work specific to this cpu is in progress.
-A: First switch the current thread context to preferred cpu
+A: There are two ways. If your code can be run in interrupt context, use
+ smp_call_function_single(), otherwise use work_on_cpu(). Note that
+ work_on_cpu() is slow, and can fail due to out of memory:
int my_func_on_cpu(int cpu)
{
- cpumask_t saved_mask, new_mask = CPU_MASK_NONE;
- int curr_cpu, err = 0;
-
- saved_mask = current->cpus_allowed;
- cpu_set(cpu, new_mask);
- err = set_cpus_allowed(current, new_mask);
-
- if (err)
- return err;
-
- /*
- * If we got scheduled out just after the return from
- * set_cpus_allowed() before running the work, this ensures
- * we stay locked.
- */
- curr_cpu = get_cpu();
-
- if (curr_cpu != cpu) {
- err = -EAGAIN;
- goto ret;
- } else {
- /*
- * Do work : But cant sleep, since get_cpu() disables preempt
- */
- }
- ret:
- put_cpu();
- set_cpus_allowed(current, saved_mask);
- return err;
- }
-
+ int err;
+ get_online_cpus();
+ if (!cpu_online(cpu))
+ err = -EINVAL;
+ else
+#if NEEDS_BLOCKING
+ err = work_on_cpu(cpu, __my_func_on_cpu, NULL);
+#else
+ smp_call_function_single(cpu, __my_func_on_cpu, &err,
+ true);
+#endif
+ put_online_cpus();
+ return err;
+ }
Q: How do we determine how many CPUs are available for hotplug.
A: There is no clear spec defined way from ACPI that can give us that
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 098de5bce00a..9fdfbb969b12 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -34,7 +34,6 @@
#include <sys/uio.h>
#include <termios.h>
#include <getopt.h>
-#include <zlib.h>
#include <assert.h>
#include <sched.h>
#include <limits.h>
diff --git a/MAINTAINERS b/MAINTAINERS
index 98ba38360755..1cc3da5565d0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5699,6 +5699,15 @@ S: Maintained
F: Documentation/filesystems/vfat.txt
F: fs/fat/
+VIRTIO HOST (VHOST)
+M: "Michael S. Tsirkin" <mst@redhat.com>
+L: kvm@vger.kernel.org
+L: virtualization@lists.osdl.org
+L: netdev@vger.kernel.org
+S: Maintained
+F: drivers/vhost/
+F: include/linux/vhost.h
+
VIA RHINE NETWORK DRIVER
M: Roger Luethi <rl@hellgate.ch>
S: Maintained
diff --git a/Makefile b/Makefile
index c61d4c88c1ad..2a65d8632e6f 100644
--- a/Makefile
+++ b/Makefile
@@ -679,6 +679,8 @@ libs-y := $(libs-y1) $(libs-y2)
# +--< $(vmlinux-main)
# | +--< driver/built-in.o mm/built-in.o + more
# |
+# +-< .tmp_exports-asm.o (see comments regarding modpost of vmlinux.o)
+# |
# +-< kallsyms.o (see description in CONFIG_KALLSYMS section)
#
# vmlinux version (uname -v) cannot be updated during normal
@@ -742,7 +744,6 @@ define rule_vmlinux__
$(verify_kallsyms)
endef
-
ifdef CONFIG_KALLSYMS
# Generate section listing all symbols and add it into vmlinux $(kallsyms.o)
# It's a three stage process:
@@ -802,13 +803,13 @@ quiet_cmd_kallsyms = KSYM $@
$(call cmd,kallsyms)
# .tmp_vmlinux1 must be complete except kallsyms, so update vmlinux version
-.tmp_vmlinux1: $(vmlinux-lds) $(vmlinux-all) FORCE
+.tmp_vmlinux1: $(vmlinux-lds) $(vmlinux-all) .tmp_exports-asm.o FORCE
$(call if_changed_rule,ksym_ld)
-.tmp_vmlinux2: $(vmlinux-lds) $(vmlinux-all) .tmp_kallsyms1.o FORCE
+.tmp_vmlinux2: $(vmlinux-lds) $(vmlinux-all) .tmp_exports-asm.o .tmp_kallsyms1.o FORCE
$(call if_changed,vmlinux__)
-.tmp_vmlinux3: $(vmlinux-lds) $(vmlinux-all) .tmp_kallsyms2.o FORCE
+.tmp_vmlinux3: $(vmlinux-lds) $(vmlinux-all) .tmp_exports-asm.o .tmp_kallsyms2.o FORCE
$(call if_changed,vmlinux__)
# Needs to visit scripts/ before $(KALLSYMS) can be used.
@@ -839,8 +840,18 @@ define rule_vmlinux-modpost
$(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd
endef
+# The modpost of vmlinux.o above creates .tmp_exports-asm.S, a list of exported
+# symbols sorted by name. This list is linked into vmlinux to replace the
+# original unsorted exports. It allows symbols to be resolved efficiently
+# when loading modules.
+.tmp_exports-asm.S: vmlinux.o
+
+ifneq ($(CONFIG_SYMBOL_PREFIX),)
+export AFLAGS_.tmp_exports-asm.o += -DSYMBOL_PREFIX=$(patsubst "%",%,$(CONFIG_SYMBOL_PREFIX))
+endif
+
# vmlinux image - including updated kernel symbols
-vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE
+vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o .tmp_exports-asm.o $(kallsyms.o) FORCE
ifdef CONFIG_HEADERS_CHECK
$(Q)$(MAKE) -f $(srctree)/Makefile headers_check
endif
@@ -1144,7 +1155,8 @@ endif # CONFIG_MODULES
# Directories & files removed with 'make clean'
CLEAN_DIRS += $(MODVERDIR)
CLEAN_FILES += vmlinux System.map \
- .tmp_kallsyms* .tmp_version .tmp_vmlinux* .tmp_System.map
+ .tmp_kallsyms* .tmp_version .tmp_vmlinux* .tmp_System.map \
+ .tmp_exports-asm*
# Directories & files removed with 'make mrproper'
MRPROPER_DIRS += include/config usr/include include/generated
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 0e627705f746..8214bfebfaca 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -48,27 +48,7 @@ extern void __aeabi_uidivmod(void);
extern void __aeabi_ulcmp(void);
extern void fpundefinstr(void);
-extern void fp_enter(void);
-/*
- * This has a special calling convention; it doesn't
- * modify any of the usual registers, except for LR.
- */
-#define EXPORT_CRC_ALIAS(sym) __CRC_SYMBOL(sym, "")
-
-#define EXPORT_SYMBOL_ALIAS(sym,orig) \
- EXPORT_CRC_ALIAS(sym) \
- static const struct kernel_symbol __ksymtab_##sym \
- __used __attribute__((section("__ksymtab"))) = \
- { (unsigned long)&orig, #sym };
-
-/*
- * floating point math emulator support.
- * These symbols will never change their calling convention...
- */
-EXPORT_SYMBOL_ALIAS(kern_fp_enter,fp_enter);
-EXPORT_SYMBOL_ALIAS(fp_printk,printk);
-EXPORT_SYMBOL_ALIAS(fp_send_sig,send_sig);
EXPORT_SYMBOL(__backtrace);
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index aecf87dfbaec..ec511d4969f8 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -79,11 +79,11 @@ SECTIONS
#endif
}
- /DISCARD/ : { /* Exit code and data */
- EXIT_TEXT
- EXIT_DATA
- *(.exitcall.exit)
- *(.discard)
+ /*
+ * unwind exit sections must be discarded before the rest of the
+ * unwind sections get included.
+ */
+ /DISCARD/ : {
*(.ARM.exidx.exit.text)
*(.ARM.extab.exit.text)
#ifndef CONFIG_HOTPLUG_CPU
@@ -271,6 +271,9 @@ SECTIONS
.stab.index 0 : { *(.stab.index) }
.stab.indexstr 0 : { *(.stab.indexstr) }
.comment 0 : { *(.comment) }
+
+ /* Default discards */
+ DISCARDS
}
/*
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index ae6a60f10120..2180433213b7 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -5,6 +5,10 @@
mainmenu "Blackfin Kernel Configuration"
+config SYMBOL_PREFIX
+ string
+ default "_"
+
config MMU
def_bool n
diff --git a/arch/blackfin/include/asm/module.h b/arch/blackfin/include/asm/module.h
index 9c1cfffddd9b..4282b169ead9 100644
--- a/arch/blackfin/include/asm/module.h
+++ b/arch/blackfin/include/asm/module.h
@@ -7,8 +7,6 @@
#ifndef _ASM_BFIN_MODULE_H
#define _ASM_BFIN_MODULE_H
-#define MODULE_SYMBOL_PREFIX "_"
-
#define Elf_Shdr Elf32_Shdr
#define Elf_Sym Elf32_Sym
#define Elf_Ehdr Elf32_Ehdr
diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S
index 10e12539000e..f39707c6590d 100644
--- a/arch/blackfin/kernel/vmlinux.lds.S
+++ b/arch/blackfin/kernel/vmlinux.lds.S
@@ -4,8 +4,6 @@
* Licensed under the GPL-2 or later
*/
-#define VMLINUX_SYMBOL(_sym_) _##_sym_
-
#include <asm-generic/vmlinux.lds.h>
#include <asm/mem_map.h>
#include <asm/page.h>
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 9420648352b8..53cc669e6d59 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -10,6 +10,10 @@ config H8300
default y
select HAVE_IDE
+config SYMBOL_PREFIX
+ string
+ default "_"
+
config MMU
bool
default n
diff --git a/arch/h8300/include/asm/module.h b/arch/h8300/include/asm/module.h
index de23231f3196..8e46724b7c09 100644
--- a/arch/h8300/include/asm/module.h
+++ b/arch/h8300/include/asm/module.h
@@ -8,6 +8,4 @@ struct mod_arch_specific { };
#define Elf_Sym Elf32_Sym
#define Elf_Ehdr Elf32_Ehdr
-#define MODULE_SYMBOL_PREFIX "_"
-
#endif /* _ASM_H8/300_MODULE_H */
diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S
index b9e24907e6ea..03d356d96e5d 100644
--- a/arch/h8300/kernel/vmlinux.lds.S
+++ b/arch/h8300/kernel/vmlinux.lds.S
@@ -1,4 +1,3 @@
-#define VMLINUX_SYMBOL(_sym_) _##_sym_
#include <asm-generic/vmlinux.lds.h>
#include <asm/page.h>
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 07115d6cd4ba..425451453e96 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -96,8 +96,6 @@ EXPORT_SYMBOL(copy_4K_page);
EXPORT_SYMBOL(isa_io_base);
EXPORT_SYMBOL(isa_mem_base);
EXPORT_SYMBOL(pci_dram_offset);
-EXPORT_SYMBOL(pci_alloc_consistent);
-EXPORT_SYMBOL(pci_free_consistent);
#endif /* CONFIG_PCI */
EXPORT_SYMBOL(start_thread);
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index b85374f7cf94..539e83f8e087 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -1064,7 +1064,6 @@ int pci64_dma_supported(struct pci_dev *pdev, u64 device_mask)
return (device_mask & dma_addr_mask) == dma_addr_mask;
}
-EXPORT_SYMBOL(pci_dma_supported);
void pci_resource_to_user(const struct pci_dev *pdev, int bar,
const struct resource *rp, resource_size_t *start,
diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c
index 0f26066a08d9..372ad59c4cba 100644
--- a/arch/sparc/kernel/sparc_ksyms_64.c
+++ b/arch/sparc/kernel/sparc_ksyms_64.c
@@ -38,17 +38,5 @@ EXPORT_SYMBOL(sun4v_niagara_setperf);
EXPORT_SYMBOL(sun4v_niagara2_getperf);
EXPORT_SYMBOL(sun4v_niagara2_setperf);
-#ifdef CONFIG_PCI
-/* inline functions in asm/pci_64.h */
-EXPORT_SYMBOL(pci_alloc_consistent);
-EXPORT_SYMBOL(pci_free_consistent);
-EXPORT_SYMBOL(pci_map_single);
-EXPORT_SYMBOL(pci_unmap_single);
-EXPORT_SYMBOL(pci_map_sg);
-EXPORT_SYMBOL(pci_unmap_sg);
-EXPORT_SYMBOL(pci_dma_sync_single_for_cpu);
-EXPORT_SYMBOL(pci_dma_sync_sg_for_cpu);
-#endif
-
/* Exporting a symbol from /init/main.c */
EXPORT_SYMBOL(saved_command_line);
diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c
index 368219cc2366..3ceede02f426 100644
--- a/arch/um/drivers/hostaudio_kern.c
+++ b/arch/um/drivers/hostaudio_kern.c
@@ -186,7 +186,9 @@ static int hostaudio_open(struct inode *inode, struct file *file)
int ret;
#ifdef DEBUG
+ kparam_block_sysfs_write(dsp);
printk(KERN_DEBUG "hostaudio: open called (host: %s)\n", dsp);
+ kparam_unblock_sysfs_write(dsp);
#endif
state = kmalloc(sizeof(struct hostaudio_state), GFP_KERNEL);
@@ -198,7 +200,9 @@ static int hostaudio_open(struct inode *inode, struct file *file)
if (file->f_mode & FMODE_WRITE)
w = 1;
+ kparam_block_sysfs_write(dsp);
ret = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0);
+ kparam_unblock_sysfs_write(dsp);
if (ret < 0) {
kfree(state);
return ret;
@@ -254,11 +258,15 @@ static int hostmixer_open_mixdev(struct inode *inode, struct file *file)
if (file->f_mode & FMODE_WRITE)
w = 1;
+ kparam_block_sysfs_write(mixer);
ret = os_open_file(mixer, of_set_rw(OPENFLAGS(), r, w), 0);
+ kparam_unblock_sysfs_write(mixer);
if (ret < 0) {
+ kparam_block_sysfs_write(dsp);
printk(KERN_ERR "hostaudio_open_mixdev failed to open '%s', "
"err = %d\n", dsp, -ret);
+ kparam_unblock_sysfs_write(dsp);
kfree(state);
return ret;
}
@@ -314,8 +322,10 @@ MODULE_LICENSE("GPL");
static int __init hostaudio_init_module(void)
{
+ __kernel_param_lock();
printk(KERN_INFO "UML Audio Relay (host dsp = %s, host mixer = %s)\n",
dsp, mixer);
+ __kernel_param_unlock();
module_data.dev_audio = register_sound_dsp(&hostaudio_fops, -1);
if (module_data.dev_audio < 0) {
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 3909e3ba5ce3..505b2d6873a0 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -57,4 +57,6 @@ EXPORT_SYMBOL(__memcpy);
EXPORT_SYMBOL(empty_zero_page);
EXPORT_SYMBOL(init_level4_pgt);
-EXPORT_SYMBOL(load_gs_index);
+#ifndef CONFIG_PARAVIRT
+EXPORT_SYMBOL(native_load_gs_index);
+#endif
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 4cd498332466..3c4d0109ad20 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -65,6 +65,7 @@ config KVM_AMD
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
# the virtualization menu.
+source drivers/vhost/Kconfig
source drivers/lguest/Kconfig
source drivers/virtio/Kconfig
diff --git a/drivers/Makefile b/drivers/Makefile
index 6ee53c7a57a1..81e36596b1e9 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -106,6 +106,7 @@ obj-$(CONFIG_HID) += hid/
obj-$(CONFIG_PPC_PS3) += ps3/
obj-$(CONFIG_OF) += of/
obj-$(CONFIG_SSB) += ssb/
+obj-$(CONFIG_VHOST_NET) += vhost/
obj-$(CONFIG_VIRTIO) += virtio/
obj-$(CONFIG_VLYNQ) += vlynq/
obj-$(CONFIG_STAGING) += staging/
diff --git a/drivers/acpi/debug.c b/drivers/acpi/debug.c
index 8a690c3b8e23..941469801322 100644
--- a/drivers/acpi/debug.c
+++ b/drivers/acpi/debug.c
@@ -94,7 +94,8 @@ static const struct acpi_dlevel acpi_debug_levels[] = {
/* --------------------------------------------------------------------------
FS Interface (/sys)
-------------------------------------------------------------------------- */
-static int param_get_debug_layer(char *buffer, struct kernel_param *kp) {
+static int param_get_debug_layer(char *buffer, const struct kernel_param *kp)
+{
int result = 0;
int i;
@@ -116,7 +117,8 @@ static int param_get_debug_layer(char *buffer, struct kernel_param *kp) {
return result;
}
-static int param_get_debug_level(char *buffer, struct kernel_param *kp) {
+static int param_get_debug_level(char *buffer, const struct kernel_param *kp)
+{
int result = 0;
int i;
@@ -135,8 +137,18 @@ static int param_get_debug_level(char *buffer, struct kernel_param *kp) {
return result;
}
-module_param_call(debug_layer, param_set_uint, param_get_debug_layer, &acpi_dbg_layer, 0644);
-module_param_call(debug_level, param_set_uint, param_get_debug_level, &acpi_dbg_level, 0644);
+static struct kernel_param_ops acpi_debug_layer_ops = {
+ .set = param_set_uint,
+ .get = param_get_debug_layer,
+};
+
+static struct kernel_param_ops acpi_debug_level_ops = {
+ .set = param_set_uint,
+ .get = param_get_debug_level,
+};
+
+module_param_cb(debug_layer, &acpi_debug_layer_ops, &acpi_dbg_layer, 0644);
+module_param_cb(debug_level, &acpi_debug_level_ops, &acpi_dbg_level, 0644);
static char trace_method_name[6];
module_param_string(trace_method_name, trace_method_name, 6, 0644);
@@ -145,7 +157,7 @@ module_param(trace_debug_layer, uint, 0644);
static unsigned int trace_debug_level;
module_param(trace_debug_level, uint, 0644);
-static int param_set_trace_state(const char *val, struct kernel_param *kp)
+static int param_set_trace_state(const char *val, const struct kernel_param *kp)
{
int result = 0;
@@ -179,7 +191,7 @@ exit:
return result;
}
-static int param_get_trace_state(char *buffer, struct kernel_param *kp)
+static int param_get_trace_state(char *buffer, const struct kernel_param *kp)
{
if (!acpi_gbl_trace_method_name)
return sprintf(buffer, "disable");
@@ -192,8 +204,12 @@ static int param_get_trace_state(char *buffer, struct kernel_param *kp)
return 0;
}
-module_param_call(trace_state, param_set_trace_state, param_get_trace_state,
- NULL, 0644);
+static struct kernel_param_ops param_ops_trace_state = {
+ .set = param_set_trace_state,
+ .get = param_get_trace_state,
+};
+
+module_param_cb(trace_state, &param_ops_trace_state, NULL, 0644);
/* --------------------------------------------------------------------------
FS Interface (/proc)
diff --git a/drivers/char/hvc_iucv.c b/drivers/char/hvc_iucv.c
index b8a5d654d3d0..448788fc2bf2 100644
--- a/drivers/char/hvc_iucv.c
+++ b/drivers/char/hvc_iucv.c
@@ -1146,7 +1146,7 @@ out_err:
* Note: If it is called early in the boot process, @val is stored and
* parsed later in hvc_iucv_init().
*/
-static int param_set_vmidfilter(const char *val, struct kernel_param *kp)
+static int param_set_vmidfilter(const char *val, const struct kernel_param *kp)
{
int rc;
@@ -1173,7 +1173,7 @@ static int param_set_vmidfilter(const char *val, struct kernel_param *kp)
* The function stores the filter as a comma-separated list of z/VM user IDs
* in @buffer. Typically, sysfs routines call this function for attr show.
*/
-static int param_get_vmidfilter(char *buffer, struct kernel_param *kp)
+static int param_get_vmidfilter(char *buffer, const struct kernel_param *kp)
{
int rc;
size_t index, len;
@@ -1200,6 +1200,11 @@ static int param_get_vmidfilter(char *buffer, struct kernel_param *kp)
#define param_check_vmidfilter(name, p) __param_check(name, p, void)
+static struct kernel_param_ops param_ops_vmidfilter = {
+ .set = param_set_vmidfilter,
+ .get = param_get_vmidfilter,
+};
+
/**
* hvc_iucv_init() - z/VM IUCV HVC device driver initialization
*/
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index a4d57e31f713..45999d81004f 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -196,7 +196,7 @@ static void ipmi_unregister_watchdog(int ipmi_intf);
*/
static int start_now;
-static int set_param_int(const char *val, struct kernel_param *kp)
+static int set_param_timeout(const char *val, const struct kernel_param *kp)
{
char *endp;
int l;
@@ -215,10 +215,11 @@ static int set_param_int(const char *val, struct kernel_param *kp)
return rv;
}
-static int get_param_int(char *buffer, struct kernel_param *kp)
-{
- return sprintf(buffer, "%i", *((int *)kp->arg));
-}
+static struct kernel_param_ops param_ops_timeout = {
+ .set = set_param_timeout,
+ .get = param_get_int,
+};
+#define param_check_timeout param_check_int
typedef int (*action_fn)(const char *intval, char *outval);
@@ -227,7 +228,7 @@ static int preaction_op(const char *inval, char *outval);
static int preop_op(const char *inval, char *outval);
static void check_parms(void);
-static int set_param_str(const char *val, struct kernel_param *kp)
+static int set_param_str(const char *val, const struct kernel_param *kp)
{
action_fn fn = (action_fn) kp->arg;
int rv = 0;
@@ -251,7 +252,7 @@ static int set_param_str(const char *val, struct kernel_param *kp)
return rv;
}
-static int get_param_str(char *buffer, struct kernel_param *kp)
+static int get_param_str(char *buffer, const struct kernel_param *kp)
{
action_fn fn = (action_fn) kp->arg;
int rv;
@@ -263,7 +264,7 @@ static int get_param_str(char *buffer, struct kernel_param *kp)
}
-static int set_param_wdog_ifnum(const char *val, struct kernel_param *kp)
+static int set_param_wdog_ifnum(const char *val, const struct kernel_param *kp)
{
int rv = param_set_int(val, kp);
if (rv)
@@ -276,27 +277,38 @@ static int set_param_wdog_ifnum(const char *val, struct kernel_param *kp)
return 0;
}
-module_param_call(ifnum_to_use, set_param_wdog_ifnum, get_param_int,
- &ifnum_to_use, 0644);
+static struct kernel_param_ops param_ops_wdog_ifnum = {
+ .set = set_param_wdog_ifnum,
+ .get = param_get_int,
+};
+
+#define param_check_wdog_ifnum param_check_int
+
+static struct kernel_param_ops param_ops_str = {
+ .set = set_param_str,
+ .get = get_param_str,
+};
+
+module_param(ifnum_to_use, wdog_ifnum, 0644);
MODULE_PARM_DESC(ifnum_to_use, "The interface number to use for the watchdog "
"timer. Setting to -1 defaults to the first registered "
"interface");
-module_param_call(timeout, set_param_int, get_param_int, &timeout, 0644);
+module_param(timeout, timeout, 0644);
MODULE_PARM_DESC(timeout, "Timeout value in seconds.");
-module_param_call(pretimeout, set_param_int, get_param_int, &pretimeout, 0644);
+module_param(pretimeout, timeout, 0644);
MODULE_PARM_DESC(pretimeout, "Pretimeout value in seconds.");
-module_param_call(action, set_param_str, get_param_str, action_op, 0644);
+module_param_cb(action, &param_ops_str, action_op, 0644);
MODULE_PARM_DESC(action, "Timeout action. One of: "
"reset, none, power_cycle, power_off.");
-module_param_call(preaction, set_param_str, get_param_str, preaction_op, 0644);
+module_param_cb(preaction, &param_ops_str, preaction_op, 0644);
MODULE_PARM_DESC(preaction, "Pretimeout action. One of: "
"pre_none, pre_smi, pre_nmi, pre_int.");
-module_param_call(preop, set_param_str, get_param_str, preop_op, 0644);
+module_param_cb(preop, &param_ops_str, preop_op, 0644);
MODULE_PARM_DESC(preop, "Pretimeout driver operation. One of: "
"preop_none, preop_panic, preop_give_data.");
diff --git a/drivers/dio/dio-driver.c b/drivers/dio/dio-driver.c
index 9c0c9afcd0ac..a7b174ef4c85 100644
--- a/drivers/dio/dio-driver.c
+++ b/drivers/dio/dio-driver.c
@@ -140,5 +140,4 @@ postcore_initcall(dio_driver_init);
EXPORT_SYMBOL(dio_match_device);
EXPORT_SYMBOL(dio_register_driver);
EXPORT_SYMBOL(dio_unregister_driver);
-EXPORT_SYMBOL(dio_dev_driver);
EXPORT_SYMBOL(dio_bus_type);
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 16d056939f9f..2c27a526025f 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -178,7 +178,7 @@ EXPORT_SYMBOL_GPL(ide_pci_clk);
module_param_named(pci_clock, ide_pci_clk, int, 0);
MODULE_PARM_DESC(pci_clock, "PCI bus clock frequency (in MHz)");
-static int ide_set_dev_param_mask(const char *s, struct kernel_param *kp)
+static int ide_set_dev_param_mask(const char *s, const struct kernel_param *kp)
{
int a, b, i, j = 1;
unsigned int *dev_param_mask = (unsigned int *)kp->arg;
@@ -201,34 +201,40 @@ static int ide_set_dev_param_mask(const char *s, struct kernel_param *kp)
return 0;
}
+static struct kernel_param_ops param_ops_ide_dev_mask = {
+ .set = ide_set_dev_param_mask
+};
+
+#define param_check_ide_dev_mask(name, p) param_check_uint(name, p)
+
static unsigned int ide_nodma;
-module_param_call(nodma, ide_set_dev_param_mask, NULL, &ide_nodma, 0);
+module_param_named(nodma, ide_nodma, ide_dev_mask, 0);
MODULE_PARM_DESC(nodma, "disallow DMA for a device");
static unsigned int ide_noflush;
-module_param_call(noflush, ide_set_dev_param_mask, NULL, &ide_noflush, 0);
+module_param_named(noflush, ide_noflush, ide_dev_mask, 0);
MODULE_PARM_DESC(noflush, "disable flush requests for a device");
static unsigned int ide_nohpa;
-module_param_call(nohpa, ide_set_dev_param_mask, NULL, &ide_nohpa, 0);
+module_param_named(nohpa, ide_nohpa, ide_dev_mask, 0);
MODULE_PARM_DESC(nohpa, "disable Host Protected Area for a device");
static unsigned int ide_noprobe;
-module_param_call(noprobe, ide_set_dev_param_mask, NULL, &ide_noprobe, 0);
+module_param_named(noprobe, ide_noprobe, ide_dev_mask, 0);
MODULE_PARM_DESC(noprobe, "skip probing for a device");
static unsigned int ide_nowerr;
-module_param_call(nowerr, ide_set_dev_param_mask, NULL, &ide_nowerr, 0);
+module_param_named(nowerr, ide_nowerr, ide_dev_mask, 0);
MODULE_PARM_DESC(nowerr, "ignore the ATA_DF bit for a device");
static unsigned int ide_cdroms;
-module_param_call(cdrom, ide_set_dev_param_mask, NULL, &ide_cdroms, 0);
+module_param_named(cdrom, ide_cdroms, ide_dev_mask, 0);
MODULE_PARM_DESC(cdrom, "force device as a CD-ROM");
struct chs_geom {
diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c
index 1f20a042a4f5..dd253002cd50 100644
--- a/drivers/idle/i7300_idle.c
+++ b/drivers/idle/i7300_idle.c
@@ -81,7 +81,7 @@ static u8 i7300_idle_thrtctl_saved;
static u8 i7300_idle_thrtlow_saved;
static u32 i7300_idle_mc_saved;
-static cpumask_t idle_cpumask;
+static cpumask_var_t idle_cpumask;
static ktime_t start_ktime;
static unsigned long avg_idle_us;
@@ -459,9 +459,9 @@ static int i7300_idle_notifier(struct notifier_block *nb, unsigned long val,
spin_lock_irqsave(&i7300_idle_lock, flags);
if (val == IDLE_START) {
- cpu_set(smp_processor_id(), idle_cpumask);
+ cpumask_set_cpu(smp_processor_id(), idle_cpumask);
- if (cpus_weight(idle_cpumask) != num_online_cpus())
+ if (cpumask_weight(idle_cpumask) != num_online_cpus())
goto end;
now_ktime = ktime_get();
@@ -478,8 +478,8 @@ static int i7300_idle_notifier(struct notifier_block *nb, unsigned long val,
i7300_idle_ioat_start();
} else if (val == IDLE_END) {
- cpu_clear(smp_processor_id(), idle_cpumask);
- if (cpus_weight(idle_cpumask) == (num_online_cpus() - 1)) {
+ cpumask_clear_cpu(smp_processor_id(), idle_cpumask);
+ if (cpumask_weight(idle_cpumask) == (num_online_cpus() - 1)) {
/* First CPU coming out of idle */
u64 idle_duration_us;
@@ -553,7 +553,6 @@ struct debugfs_file_info {
static int __init i7300_idle_init(void)
{
spin_lock_init(&i7300_idle_lock);
- cpus_clear(idle_cpumask);
total_us = 0;
if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload))
@@ -565,6 +564,9 @@ static int __init i7300_idle_init(void)
if (i7300_idle_ioat_init())
return -ENODEV;
+ if (!zalloc_cpumask_var(&idle_cpumask, GFP_KERNEL))
+ return -ENOMEM;
+
debugfs_dir = debugfs_create_dir("i7300_idle", NULL);
if (debugfs_dir) {
int i = 0;
@@ -589,6 +591,7 @@ static int __init i7300_idle_init(void)
static void __exit i7300_idle_exit(void)
{
idle_notifier_unregister(&i7300_idle_nb);
+ free_cpumask_var(idle_cpumask);
if (debugfs_dir) {
int i = 0;
diff --git a/drivers/input/misc/ati_remote2.c b/drivers/input/misc/ati_remote2.c
index 0501f0e65157..3665e3975158 100644
--- a/drivers/input/misc/ati_remote2.c
+++ b/drivers/input/misc/ati_remote2.c
@@ -37,7 +37,8 @@ enum {
};
static int ati_remote2_set_mask(const char *val,
- struct kernel_param *kp, unsigned int max)
+ const struct kernel_param *kp,
+ unsigned int max)
{
unsigned long mask;
int ret;
@@ -58,28 +59,31 @@ static int ati_remote2_set_mask(const char *val,
}
static int ati_remote2_set_channel_mask(const char *val,
- struct kernel_param *kp)
+ const struct kernel_param *kp)
{
pr_debug("%s()\n", __func__);
return ati_remote2_set_mask(val, kp, ATI_REMOTE2_MAX_CHANNEL_MASK);
}
-static int ati_remote2_get_channel_mask(char *buffer, struct kernel_param *kp)
+static int ati_remote2_get_channel_mask(char *buffer,
+ const struct kernel_param *kp)
{
pr_debug("%s()\n", __func__);
return sprintf(buffer, "0x%04x", *(unsigned int *)kp->arg);
}
-static int ati_remote2_set_mode_mask(const char *val, struct kernel_param *kp)
+static int ati_remote2_set_mode_mask(const char *val,
+ const struct kernel_param *kp)
{
pr_debug("%s()\n", __func__);
return ati_remote2_set_mask(val, kp, ATI_REMOTE2_MAX_MODE_MASK);
}
-static int ati_remote2_get_mode_mask(char *buffer, struct kernel_param *kp)
+static int ati_remote2_get_mode_mask(char *buffer,
+ const struct kernel_param *kp)
{
pr_debug("%s()\n", __func__);
@@ -88,15 +92,19 @@ static int ati_remote2_get_mode_mask(char *buffer, struct kernel_param *kp)
static unsigned int channel_mask = ATI_REMOTE2_MAX_CHANNEL_MASK;
#define param_check_channel_mask(name, p) __param_check(name, p, unsigned int)
-#define param_set_channel_mask ati_remote2_set_channel_mask
-#define param_get_channel_mask ati_remote2_get_channel_mask
+static struct kernel_param_ops param_ops_channel_mask = {
+ .set = ati_remote2_set_channel_mask,
+ .get = ati_remote2_get_channel_mask,
+};
module_param(channel_mask, channel_mask, 0644);
MODULE_PARM_DESC(channel_mask, "Bitmask of channels to accept <15:Channel16>...<1:Channel2><0:Channel1>");
static unsigned int mode_mask = ATI_REMOTE2_MAX_MODE_MASK;
#define param_check_mode_mask(name, p) __param_check(name, p, unsigned int)
-#define param_set_mode_mask ati_remote2_set_mode_mask
-#define param_get_mode_mask ati_remote2_get_mode_mask
+static struct kernel_param_ops param_ops_mode_mask = {
+ .set = ati_remote2_set_mode_mask,
+ .get = ati_remote2_get_mode_mask,
+};
module_param(mode_mask, mode_mask, 0644);
MODULE_PARM_DESC(mode_mask, "Bitmask of modes to accept <4:PC><3:AUX4><2:AUX3><1:AUX2><0:AUX1>");
diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index 07c53798301a..e4570dbff543 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -39,11 +39,13 @@ MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");
static unsigned int psmouse_max_proto = PSMOUSE_AUTO;
-static int psmouse_set_maxproto(const char *val, struct kernel_param *kp);
-static int psmouse_get_maxproto(char *buffer, struct kernel_param *kp);
+static int psmouse_set_maxproto(const char *val, const struct kernel_param *);
+static int psmouse_get_maxproto(char *buffer, const struct kernel_param *kp);
+static struct kernel_param_ops param_ops_proto_abbrev = {
+ .set = psmouse_set_maxproto,
+ .get = psmouse_get_maxproto,
+};
#define param_check_proto_abbrev(name, p) __param_check(name, p, unsigned int)
-#define param_set_proto_abbrev psmouse_set_maxproto
-#define param_get_proto_abbrev psmouse_get_maxproto
module_param_named(proto, psmouse_max_proto, proto_abbrev, 0644);
MODULE_PARM_DESC(proto, "Highest protocol extension to probe (bare, imps, exps, any). Useful for KVM switches.");
@@ -1652,7 +1654,7 @@ static ssize_t psmouse_attr_set_resolution(struct psmouse *psmouse, void *data,
}
-static int psmouse_set_maxproto(const char *val, struct kernel_param *kp)
+static int psmouse_set_maxproto(const char *val, const struct kernel_param *kp)
{
const struct psmouse_protocol *proto;
@@ -1669,7 +1671,7 @@ static int psmouse_set_maxproto(const char *val, struct kernel_param *kp)
return 0;
}
-static int psmouse_get_maxproto(char *buffer, struct kernel_param *kp)
+static int psmouse_get_maxproto(char *buffer, const struct kernel_param *kp)
{
int type = *((unsigned int *)kp->arg);
diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 610e914abe6c..2b6f750ee394 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -109,8 +109,7 @@ MODULE_PARM_DESC(mpt_debug_level, " debug level - refer to mptdebug.h \
int mpt_fwfault_debug;
EXPORT_SYMBOL(mpt_fwfault_debug);
-module_param_call(mpt_fwfault_debug, param_set_int, param_get_int,
- &mpt_fwfault_debug, 0600);
+module_param(mpt_fwfault_debug, int, 0600);
MODULE_PARM_DESC(mpt_fwfault_debug, "Enable detection of Firmware fault"
" and halt Firmware on fault - (default=0)");
diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c
index 3648b23d5c92..42660e55119e 100644
--- a/drivers/misc/lkdtm.c
+++ b/drivers/misc/lkdtm.c
@@ -120,9 +120,9 @@ static int count = DEFAULT_COUNT;
module_param(recur_count, int, 0644);
MODULE_PARM_DESC(recur_count, " Recursion level for the stack overflow test, "\
"default is 10");
-module_param(cpoint_name, charp, 0644);
+module_param(cpoint_name, charp, 0444);
MODULE_PARM_DESC(cpoint_name, " Crash Point, where kernel is to be crashed");
-module_param(cpoint_type, charp, 0644);
+module_param(cpoint_type, charp, 0444);
MODULE_PARM_DESC(cpoint_type, " Crash Point Type, action to be taken on "\
"hitting the crash point");
module_param(cpoint_count, int, 0644);
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index 85e1b6a3ac1b..926c7215c667 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -236,6 +236,7 @@ struct myri10ge_priv {
int watchdog_resets;
int watchdog_pause;
int pause;
+ bool fw_name_allocated;
char *fw_name;
char eeprom_strings[MYRI10GE_EEPROM_STRINGS_SIZE];
char *product_code_string;
@@ -268,6 +269,7 @@ MODULE_FIRMWARE("myri10ge_eth_z8e.dat");
MODULE_FIRMWARE("myri10ge_rss_ethp_z8e.dat");
MODULE_FIRMWARE("myri10ge_rss_eth_z8e.dat");
+/* Careful: must be accessed under kparam_block_sysfs_write */
static char *myri10ge_fw_name = NULL;
module_param(myri10ge_fw_name, charp, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image name");
@@ -373,6 +375,14 @@ static inline void put_be32(__be32 val, __be32 __iomem * p)
static struct net_device_stats *myri10ge_get_stats(struct net_device *dev);
+static void set_fw_name(struct myri10ge_priv *mgp, char *name, bool allocated)
+{
+ if (mgp->fw_name_allocated)
+ kfree(mgp->fw_name);
+ mgp->fw_name = name;
+ mgp->fw_name_allocated = allocated;
+}
+
static int
myri10ge_send_cmd(struct myri10ge_priv *mgp, u32 cmd,
struct myri10ge_cmd *data, int atomic)
@@ -744,7 +754,7 @@ static int myri10ge_load_firmware(struct myri10ge_priv *mgp, int adopt)
dev_warn(&mgp->pdev->dev, "via hotplug\n");
}
- mgp->fw_name = "adopted";
+ set_fw_name(mgp, "adopted", false);
mgp->tx_boundary = 2048;
myri10ge_dummy_rdma(mgp, 1);
status = myri10ge_get_firmware_capabilities(mgp);
@@ -3260,7 +3270,7 @@ static void myri10ge_firmware_probe(struct myri10ge_priv *mgp)
* load the optimized firmware (which assumes aligned PCIe
* completions) in order to see if it works on this host.
*/
- mgp->fw_name = myri10ge_fw_aligned;
+ set_fw_name(mgp, myri10ge_fw_aligned, false);
status = myri10ge_load_firmware(mgp, 1);
if (status != 0) {
goto abort;
@@ -3288,7 +3298,7 @@ static void myri10ge_firmware_probe(struct myri10ge_priv *mgp)
abort:
/* fall back to using the unaligned firmware */
mgp->tx_boundary = 2048;
- mgp->fw_name = myri10ge_fw_unaligned;
+ set_fw_name(mgp, myri10ge_fw_unaligned, false);
}
@@ -3311,7 +3321,7 @@ static void myri10ge_select_firmware(struct myri10ge_priv *mgp)
dev_info(&mgp->pdev->dev, "PCIE x%d Link\n",
link_width);
mgp->tx_boundary = 4096;
- mgp->fw_name = myri10ge_fw_aligned;
+ set_fw_name(mgp, myri10ge_fw_aligned, false);
} else {
myri10ge_firmware_probe(mgp);
}
@@ -3320,22 +3330,29 @@ static void myri10ge_select_firmware(struct myri10ge_priv *mgp)
dev_info(&mgp->pdev->dev,
"Assuming aligned completions (forced)\n");
mgp->tx_boundary = 4096;
- mgp->fw_name = myri10ge_fw_aligned;
+ set_fw_name(mgp, myri10ge_fw_aligned, false);
} else {
dev_info(&mgp->pdev->dev,
"Assuming unaligned completions (forced)\n");
mgp->tx_boundary = 2048;
- mgp->fw_name = myri10ge_fw_unaligned;
+ set_fw_name(mgp, myri10ge_fw_unaligned, false);
}
}
+
+ kparam_block_sysfs_write(myri10ge_fw_name);
if (myri10ge_fw_name != NULL) {
- overridden = 1;
- mgp->fw_name = myri10ge_fw_name;
+ char *fw_name = kstrdup(myri10ge_fw_name, GFP_KERNEL);
+ if (fw_name) {
+ overridden = 1;
+ set_fw_name(mgp, fw_name, true);
+ }
}
+ kparam_unblock_sysfs_write(myri10ge_fw_name);
+
if (mgp->board_number < MYRI10GE_MAX_BOARDS &&
myri10ge_fw_names[mgp->board_number] != NULL &&
strlen(myri10ge_fw_names[mgp->board_number])) {
- mgp->fw_name = myri10ge_fw_names[mgp->board_number];
+ set_fw_name(mgp, myri10ge_fw_names[mgp->board_number], false);
overridden = 1;
}
if (overridden)
@@ -3699,6 +3716,7 @@ static void myri10ge_probe_slices(struct myri10ge_priv *mgp)
struct myri10ge_cmd cmd;
struct pci_dev *pdev = mgp->pdev;
char *old_fw;
+ bool old_allocated;
int i, status, ncpus, msix_cap;
mgp->num_slices = 1;
@@ -3711,17 +3729,23 @@ static void myri10ge_probe_slices(struct myri10ge_priv *mgp)
/* try to load the slice aware rss firmware */
old_fw = mgp->fw_name;
+ old_allocated = mgp->fw_name_allocated;
+ /* don't free old_fw if we override it. */
+ mgp->fw_name_allocated = false;
+
if (myri10ge_fw_name != NULL) {
dev_info(&mgp->pdev->dev, "overriding rss firmware to %s\n",
myri10ge_fw_name);
- mgp->fw_name = myri10ge_fw_name;
+ set_fw_name(mgp, myri10ge_fw_name, false);
} else if (old_fw == myri10ge_fw_aligned)
- mgp->fw_name = myri10ge_fw_rss_aligned;
+ set_fw_name(mgp, myri10ge_fw_rss_aligned, false);
else
- mgp->fw_name = myri10ge_fw_rss_unaligned;
+ set_fw_name(mgp, myri10ge_fw_rss_unaligned, false);
status = myri10ge_load_firmware(mgp, 0);
if (status != 0) {
dev_info(&pdev->dev, "Rss firmware not found\n");
+ if (old_allocated)
+ kfree(old_fw);
return;
}
@@ -3787,6 +3811,8 @@ static void myri10ge_probe_slices(struct myri10ge_priv *mgp)
mgp->num_slices);
if (status == 0) {
pci_disable_msix(pdev);
+ if (old_allocated)
+ kfree(old_fw);
return;
}
if (status > 0)
@@ -3803,7 +3829,7 @@ disable_msix:
abort_with_fw:
mgp->num_slices = 1;
- mgp->fw_name = old_fw;
+ set_fw_name(mgp, old_fw, old_allocated);
myri10ge_load_firmware(mgp, 0);
}
@@ -4033,6 +4059,7 @@ abort_with_enabled:
pci_disable_device(pdev);
abort_with_netdev:
+ set_fw_name(mgp, NULL, false);
free_netdev(netdev);
return status;
}
@@ -4077,6 +4104,7 @@ static void myri10ge_remove(struct pci_dev *pdev)
dma_free_coherent(&pdev->dev, sizeof(*mgp->cmd),
mgp->cmd, mgp->cmd_bus);
+ set_fw_name(mgp, NULL, false);
free_netdev(netdev);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 01e99f22210e..9142bfd3bab7 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -144,6 +144,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
err = 0;
tfile->tun = tun;
tun->tfile = tfile;
+ tun->socket.file = file;
dev_hold(tun->dev);
sock_hold(tun->socket.sk);
atomic_inc(&tfile->count);
@@ -158,6 +159,7 @@ static void __tun_detach(struct tun_struct *tun)
/* Detach from net device */
netif_tx_lock_bh(tun->dev);
tun->tfile = NULL;
+ tun->socket.file = NULL;
netif_tx_unlock_bh(tun->dev);
/* Drop read queue */
@@ -387,7 +389,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
/* Notify and wake up reader process */
if (tun->flags & TUN_FASYNC)
kill_fasync(&tun->fasync, SIGIO, POLL_IN);
- wake_up_interruptible(&tun->socket.wait);
+ wake_up_interruptible_poll(&tun->socket.wait, POLLIN |
+ POLLRDNORM | POLLRDBAND);
return NETDEV_TX_OK;
drop:
@@ -743,7 +746,7 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
len = min_t(int, skb->len, len);
skb_copy_datagram_const_iovec(skb, 0, iv, total, len);
- total += len;
+ total += skb->len;
tun->dev->stats.tx_packets++;
tun->dev->stats.tx_bytes += len;
@@ -751,34 +754,23 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
return total;
}
-static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
- unsigned long count, loff_t pos)
+static ssize_t tun_do_read(struct tun_struct *tun,
+ struct kiocb *iocb, const struct iovec *iv,
+ ssize_t len, int noblock)
{
- struct file *file = iocb->ki_filp;
- struct tun_file *tfile = file->private_data;
- struct tun_struct *tun = __tun_get(tfile);
DECLARE_WAITQUEUE(wait, current);
struct sk_buff *skb;
- ssize_t len, ret = 0;
-
- if (!tun)
- return -EBADFD;
+ ssize_t ret = 0;
DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name);
- len = iov_length(iv, count);
- if (len < 0) {
- ret = -EINVAL;
- goto out;
- }
-
add_wait_queue(&tun->socket.wait, &wait);
while (len) {
current->state = TASK_INTERRUPTIBLE;
/* Read frames from the queue */
if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) {
- if (file->f_flags & O_NONBLOCK) {
+ if (noblock) {
ret = -EAGAIN;
break;
}
@@ -805,6 +797,27 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
current->state = TASK_RUNNING;
remove_wait_queue(&tun->socket.wait, &wait);
+ return ret;
+}
+
+static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
+ unsigned long count, loff_t pos)
+{
+ struct file *file = iocb->ki_filp;
+ struct tun_file *tfile = file->private_data;
+ struct tun_struct *tun = __tun_get(tfile);
+ ssize_t len, ret;
+
+ if (!tun)
+ return -EBADFD;
+ len = iov_length(iv, count);
+ if (len < 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = tun_do_read(tun, iocb, iv, len, file->f_flags & O_NONBLOCK);
+ ret = min_t(ssize_t, ret, len);
out:
tun_put(tun);
return ret;
@@ -847,7 +860,8 @@ static void tun_sock_write_space(struct sock *sk)
return;
if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
- wake_up_interruptible_sync(sk->sk_sleep);
+ wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
+ POLLWRNORM | POLLWRBAND);
tun = container_of(sk, struct tun_sock, sk)->tun;
kill_fasync(&tun->fasync, SIGIO, POLL_OUT);
@@ -858,6 +872,37 @@ static void tun_sock_destruct(struct sock *sk)
free_netdev(container_of(sk, struct tun_sock, sk)->tun->dev);
}
+static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t total_len)
+{
+ struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
+ return tun_get_user(tun, m->msg_iov, total_len,
+ m->msg_flags & MSG_DONTWAIT);
+}
+
+static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t total_len,
+ int flags)
+{
+ struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
+ int ret;
+ if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
+ return -EINVAL;
+ ret = tun_do_read(tun, iocb, m->msg_iov, total_len,
+ flags & MSG_DONTWAIT);
+ if (ret > total_len) {
+ m->msg_flags |= MSG_TRUNC;
+ ret = flags & MSG_TRUNC ? ret : total_len;
+ }
+ return ret;
+}
+
+/* Ops structure to mimic raw sockets with tun */
+static const struct proto_ops tun_socket_ops = {
+ .sendmsg = tun_sendmsg,
+ .recvmsg = tun_recvmsg,
+};
+
static struct proto tun_proto = {
.name = "tun",
.owner = THIS_MODULE,
@@ -986,6 +1031,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
goto err_free_dev;
init_waitqueue_head(&tun->socket.wait);
+ tun->socket.ops = &tun_socket_ops;
sock_init_data(&tun->socket, sk);
sk->sk_write_space = tun_sock_write_space;
sk->sk_sndbuf = INT_MAX;
@@ -1525,6 +1571,23 @@ static void tun_cleanup(void)
rtnl_link_unregister(&tun_link_ops);
}
+/* Get an underlying socket object from tun file. Returns error unless file is
+ * attached to a device. The returned object works like a packet socket, it
+ * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for
+ * holding a reference to the file for as long as the socket is in use. */
+struct socket *tun_get_socket(struct file *file)
+{
+ struct tun_struct *tun;
+ if (file->f_op != &tun_fops)
+ return ERR_PTR(-EINVAL);
+ tun = tun_get(file);
+ if (!tun)
+ return ERR_PTR(-EBADFD);
+ tun_put(tun);
+ return &tun->socket;
+}
+EXPORT_SYMBOL_GPL(tun_get_socket);
+
module_init(tun_init);
module_exit(tun_cleanup);
MODULE_DESCRIPTION(DRV_DESCRIPTION);
diff --git a/drivers/net/wireless/libertas/if_sdio.c b/drivers/net/wireless/libertas/if_sdio.c
index 09fcfad742e7..71d27ccec630 100644
--- a/drivers/net/wireless/libertas/if_sdio.c
+++ b/drivers/net/wireless/libertas/if_sdio.c
@@ -122,6 +122,8 @@ struct if_sdio_card {
const char *helper;
const char *firmware;
+ bool helper_allocated;
+ bool firmware_allocated;
u8 buffer[65536];
@@ -1001,16 +1003,34 @@ static int if_sdio_probe(struct sdio_func *func,
card->helper = if_sdio_models[i].helper;
card->firmware = if_sdio_models[i].firmware;
+ kparam_block_sysfs_write(helper_name);
if (lbs_helper_name) {
+ char *helper = kstrdup(lbs_helper_name, GFP_KERNEL);
+ if (!helper) {
+ kparam_unblock_sysfs_write(helper_name);
+ ret = -ENOMEM;
+ goto free;
+ }
lbs_deb_sdio("overriding helper firmware: %s\n",
lbs_helper_name);
- card->helper = lbs_helper_name;
+ card->helper = helper;
+ card->helper_allocated = true;
}
+ kparam_unblock_sysfs_write(helper_name);
+ kparam_block_sysfs_write(fw_name);
if (lbs_fw_name) {
+ char *fw_name = kstrdup(lbs_fw_name, GFP_KERNEL);
+ if (!fw_name) {
+ kparam_unblock_sysfs_write(fw_name);
+ ret = -ENOMEM;
+ goto free;
+ }
lbs_deb_sdio("overriding firmware: %s\n", lbs_fw_name);
- card->firmware = lbs_fw_name;
+ card->firmware = fw_name;
+ card->firmware_allocated = true;
}
+ kparam_unblock_sysfs_write(fw_name);
sdio_claim_host(func);
@@ -1125,6 +1145,10 @@ free:
kfree(packet);
}
+ if (card->helper_allocated)
+ kfree(card->helper);
+ if (card->firmware_allocated)
+ kfree(card->firmware);
kfree(card);
goto out;
@@ -1175,6 +1199,10 @@ static void if_sdio_remove(struct sdio_func *func)
kfree(packet);
}
+ if (card->helper_allocated)
+ kfree(card->helper);
+ if (card->firmware_allocated)
+ kfree(card->firmware);
kfree(card);
lbs_deb_leave(LBS_DEB_SDIO);
diff --git a/drivers/net/wireless/libertas/if_usb.c b/drivers/net/wireless/libertas/if_usb.c
index 65e174595d12..a41007ebe1f3 100644
--- a/drivers/net/wireless/libertas/if_usb.c
+++ b/drivers/net/wireless/libertas/if_usb.c
@@ -290,10 +290,13 @@ static int if_usb_probe(struct usb_interface *intf,
}
/* Upload firmware */
+ kparam_block_sysfs_write(fw_name);
if (__if_usb_prog_firmware(cardp, lbs_fw_name, BOOT_CMD_FW_BY_USB)) {
+ kparam_unblock_sysfs_write(fw_name);
lbs_deb_usbd(&udev->dev, "FW upload failed\n");
goto err_prog_firmware;
}
+ kparam_unblock_sysfs_write(fw_name);
if (!(priv = lbs_add_card(cardp, &udev->dev)))
goto err_prog_firmware;
diff --git a/drivers/net/wireless/libertas_tf/if_usb.c b/drivers/net/wireless/libertas_tf/if_usb.c
index 3691c307e674..669a3de68508 100644
--- a/drivers/net/wireless/libertas_tf/if_usb.c
+++ b/drivers/net/wireless/libertas_tf/if_usb.c
@@ -650,12 +650,15 @@ static int if_usb_prog_firmware(struct if_usb_card *cardp)
static int reset_count = 10;
int ret = 0;
+ kparam_block_sysfs_write(fw_name);
ret = request_firmware(&cardp->fw, lbtf_fw_name, &cardp->udev->dev);
if (ret < 0) {
printk(KERN_INFO "libertastf: firmware %s not found\n",
lbtf_fw_name);
+ kparam_unblock_sysfs_write(fw_name);
goto done;
}
+ kparam_unblock_sysfs_write(fw_name);
if (check_fwfile_format(cardp->fw->data, cardp->fw->size))
goto release_fw;
diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c
index b52b773d49d9..c7307167e33c 100644
--- a/drivers/scsi/bfa/bfad.c
+++ b/drivers/scsi/bfa/bfad.c
@@ -726,6 +726,7 @@ bfad_drv_init(struct bfad_s *bfad)
memset(&driver_info, 0, sizeof(driver_info));
strncpy(driver_info.version, BFAD_DRIVER_VERSION,
sizeof(driver_info.version) - 1);
+ __kernel_param_lock();
if (host_name)
strncpy(driver_info.host_machine_name, host_name,
sizeof(driver_info.host_machine_name) - 1);
@@ -735,6 +736,7 @@ bfad_drv_init(struct bfad_s *bfad)
if (os_patch)
strncpy(driver_info.host_os_patch, os_patch,
sizeof(driver_info.host_os_patch) - 1);
+ __kernel_param_unlock();
strncpy(driver_info.os_device_name, bfad->pci_name,
sizeof(driver_info.os_device_name - 1));
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 4a43b74c0d27..0b728aa885bb 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -1227,7 +1227,7 @@ int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev,
"CPU.\n");
spin_unlock_bh(&fps->fcoe_rx_list.lock);
- cpu = first_cpu(cpu_online_map);
+ cpu = cpumask_first(cpu_online_mask);
fps = &per_cpu(fcoe_percpu, cpu);
spin_lock_bh(&fps->fcoe_rx_list.lock);
if (!fps->thread) {
diff --git a/drivers/staging/rtl8187se/r8180_core.c b/drivers/staging/rtl8187se/r8180_core.c
index 53e654d0d4fa..9ca3c6f84dc7 100644
--- a/drivers/staging/rtl8187se/r8180_core.c
+++ b/drivers/staging/rtl8187se/r8180_core.c
@@ -66,7 +66,7 @@ static struct pci_device_id rtl8180_pci_id_tbl[] __devinitdata = {
};
-static char* ifname = "wlan%d";
+static char ifname[IFNAMSIZ] = "wlan%d";
static int hwseqnum = 0;
static int hwwep = 0;
static int channels = 0x3fff;
@@ -79,7 +79,7 @@ MODULE_AUTHOR("Andrea Merello <andreamrl@tiscali.it>");
MODULE_DESCRIPTION("Linux driver for Realtek RTL8180 / RTL8185 WiFi cards");
-module_param(ifname, charp, S_IRUGO|S_IWUSR );
+module_param_string(ifname, ifname, sizef(ifname), S_IRUGO|S_IWUSR);
module_param(hwseqnum,int, S_IRUGO|S_IWUSR);
module_param(hwwep,int, S_IRUGO|S_IWUSR);
module_param(channels,int, S_IRUGO|S_IWUSR);
@@ -3918,7 +3918,7 @@ static int __devinit rtl8180_pci_probe(struct pci_dev *pdev,
if (dev_alloc_name(dev, ifname) < 0){
DMESG("Oops: devname already taken! Trying wlan%%d...\n");
- ifname = "wlan%d";
+ strcpy(ifname, "wlan%d");
dev_alloc_name(dev, ifname);
}
diff --git a/drivers/staging/rtl8192e/r8192E_core.c b/drivers/staging/rtl8192e/r8192E_core.c
index b0802a7aeb5f..8f4637ad466a 100644
--- a/drivers/staging/rtl8192e/r8192E_core.c
+++ b/drivers/staging/rtl8192e/r8192E_core.c
@@ -115,7 +115,7 @@ static struct pci_device_id rtl8192_pci_id_tbl[] __devinitdata = {
{}
};
-static char* ifname = "wlan%d";
+static char ifname[IFNAMSIZ] = "wlan%d";
static int hwwep = 1; //default use hw. set 0 to use software security
static int channels = 0x3fff;
@@ -126,7 +126,7 @@ MODULE_DEVICE_TABLE(pci, rtl8192_pci_id_tbl);
MODULE_DESCRIPTION("Linux driver for Realtek RTL819x WiFi cards");
-module_param(ifname, charp, S_IRUGO|S_IWUSR );
+module_param_string(ifname, ifname, sizef(ifname), S_IRUGO|S_IWUSR);
//module_param(hwseqnum,int, S_IRUGO|S_IWUSR);
module_param(hwwep,int, S_IRUGO|S_IWUSR);
module_param(channels,int, S_IRUGO|S_IWUSR);
@@ -5972,7 +5972,7 @@ static int __devinit rtl8192_pci_probe(struct pci_dev *pdev,
if (dev_alloc_name(dev, ifname) < 0){
RT_TRACE(COMP_INIT, "Oops: devname already taken! Trying wlan%%d...\n");
- ifname = "wlan%d";
+ strcpt(ifname, "wlan%d");
dev_alloc_name(dev, ifname);
}
diff --git a/drivers/staging/rtl8192su/r8192U_core.c b/drivers/staging/rtl8192su/r8192U_core.c
index 66274d7666ff..e24815f52142 100644
--- a/drivers/staging/rtl8192su/r8192U_core.c
+++ b/drivers/staging/rtl8192su/r8192U_core.c
@@ -136,13 +136,13 @@ MODULE_VERSION("V 1.1");
MODULE_DEVICE_TABLE(usb, rtl8192_usb_id_tbl);
MODULE_DESCRIPTION("Linux driver for Realtek RTL8192 USB WiFi cards");
-static char* ifname = "wlan%d";
+static char ifname[IFNAMSIZ] = "wlan%d";
static int hwwep = 1; //default use hw. set 0 to use software security
static int channels = 0x3fff;
-module_param(ifname, charp, S_IRUGO|S_IWUSR );
+module_param_string(ifname, ifname, sizef(ifname), S_IRUGO|S_IWUSR);
//module_param(hwseqnum,int, S_IRUGO|S_IWUSR);
module_param(hwwep,int, S_IRUGO|S_IWUSR);
module_param(channels,int, S_IRUGO|S_IWUSR);
@@ -7463,7 +7463,7 @@ static int __devinit rtl8192_usb_probe(struct usb_interface *intf,
if (dev_alloc_name(dev, ifname) < 0){
RT_TRACE(COMP_INIT, "Oops: devname already taken! Trying wlan%%d...\n");
- ifname = "wlan%d";
+ strcpy(ifname, "wlan%d");
dev_alloc_name(dev, ifname);
}
diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c
index d171b563e94c..283a178997a5 100644
--- a/drivers/usb/atm/ueagle-atm.c
+++ b/drivers/usb/atm/ueagle-atm.c
@@ -1550,6 +1550,7 @@ static void cmvs_file_name(struct uea_softc *sc, char *const cmv_name, int ver)
char file_arr[] = "CMVxy.bin";
char *file;
+ kparam_block_sysfs_write(cmv_file);
/* set proper name corresponding modem version and line type */
if (cmv_file[sc->modem_index] == NULL) {
if (UEA_CHIP_VERSION(sc) == ADI930)
@@ -1568,6 +1569,7 @@ static void cmvs_file_name(struct uea_softc *sc, char *const cmv_name, int ver)
strlcat(cmv_name, file, UEA_FW_NAME_MAX);
if (ver == 2)
strlcat(cmv_name, ".v2", UEA_FW_NAME_MAX);
+ kparam_unblock_sysfs_write(cmv_file);
}
static int request_cmvs_old(struct uea_softc *sc,
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
new file mode 100644
index 000000000000..9e9355367bb3
--- /dev/null
+++ b/drivers/vhost/Kconfig
@@ -0,0 +1,11 @@
+config VHOST_NET
+ tristate "Host kernel accelerator for virtio net (EXPERIMENTAL)"
+ depends on NET && EVENTFD && (TUN || !TUN) && EXPERIMENTAL
+ ---help---
+ This kernel module can be loaded in host kernel to accelerate
+ guest networking with virtio_net. Not to be confused with virtio_net
+ module itself which needs to be loaded in guest kernel.
+
+ To compile this driver as a module, choose M here: the module will
+ be called vhost_net.
+
diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile
new file mode 100644
index 000000000000..72dd02050bb9
--- /dev/null
+++ b/drivers/vhost/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_VHOST_NET) += vhost_net.o
+vhost_net-y := vhost.o net.o
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
new file mode 100644
index 000000000000..22d5fef472cc
--- /dev/null
+++ b/drivers/vhost/net.c
@@ -0,0 +1,648 @@
+/* Copyright (C) 2009 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * virtio-net server in host kernel.
+ */
+
+#include <linux/compat.h>
+#include <linux/eventfd.h>
+#include <linux/vhost.h>
+#include <linux/virtio_net.h>
+#include <linux/mmu_context.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+#include <linux/rcupdate.h>
+#include <linux/file.h>
+
+#include <linux/net.h>
+#include <linux/if_packet.h>
+#include <linux/if_arp.h>
+#include <linux/if_tun.h>
+
+#include <net/sock.h>
+
+#include "vhost.h"
+
+/* Max number of bytes transferred before requeueing the job.
+ * Using this limit prevents one virtqueue from starving others. */
+#define VHOST_NET_WEIGHT 0x80000
+
+enum {
+ VHOST_NET_VQ_RX = 0,
+ VHOST_NET_VQ_TX = 1,
+ VHOST_NET_VQ_MAX = 2,
+};
+
+enum vhost_net_poll_state {
+ VHOST_NET_POLL_DISABLED = 0,
+ VHOST_NET_POLL_STARTED = 1,
+ VHOST_NET_POLL_STOPPED = 2,
+};
+
+struct vhost_net {
+ struct vhost_dev dev;
+ struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
+ struct vhost_poll poll[VHOST_NET_VQ_MAX];
+ /* Tells us whether we are polling a socket for TX.
+ * We only do this when socket buffer fills up.
+ * Protected by tx vq lock. */
+ enum vhost_net_poll_state tx_poll_state;
+};
+
+/* Pop first len bytes from iovec. Return number of segments used. */
+static int move_iovec_hdr(struct iovec *from, struct iovec *to,
+ size_t len, int iov_count)
+{
+ int seg = 0;
+ size_t size;
+ while (len && seg < iov_count) {
+ size = min(from->iov_len, len);
+ to->iov_base = from->iov_base;
+ to->iov_len = size;
+ from->iov_len -= size;
+ from->iov_base += size;
+ len -= size;
+ ++from;
+ ++to;
+ ++seg;
+ }
+ return seg;
+}
+
+/* Caller must have TX VQ lock */
+static void tx_poll_stop(struct vhost_net *net)
+{
+ if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
+ return;
+ vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
+ net->tx_poll_state = VHOST_NET_POLL_STOPPED;
+}
+
+/* Caller must have TX VQ lock */
+static void tx_poll_start(struct vhost_net *net, struct socket *sock)
+{
+ if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
+ return;
+ vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
+ net->tx_poll_state = VHOST_NET_POLL_STARTED;
+}
+
+/* Expects to be always run from workqueue - which acts as
+ * read-size critical section for our kind of RCU. */
+static void handle_tx(struct vhost_net *net)
+{
+ struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX];
+ unsigned head, out, in, s;
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_iov = vq->iov,
+ .msg_flags = MSG_DONTWAIT,
+ };
+ size_t len, total_len = 0;
+ int err, wmem;
+ size_t hdr_size;
+ struct socket *sock = rcu_dereference(vq->private_data);
+ if (!sock)
+ return;
+
+ wmem = atomic_read(&sock->sk->sk_wmem_alloc);
+ if (wmem >= sock->sk->sk_sndbuf)
+ return;
+
+ use_mm(net->dev.mm);
+ mutex_lock(&vq->mutex);
+ vhost_disable_notify(vq);
+
+ if (wmem < sock->sk->sk_sndbuf * 2)
+ tx_poll_stop(net);
+ hdr_size = vq->hdr_size;
+
+ for (;;) {
+ head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
+ ARRAY_SIZE(vq->iov),
+ &out, &in,
+ NULL, NULL);
+ /* Nothing new? Wait for eventfd to tell us they refilled. */
+ if (head == vq->num) {
+ wmem = atomic_read(&sock->sk->sk_wmem_alloc);
+ if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
+ tx_poll_start(net, sock);
+ set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
+ break;
+ }
+ if (unlikely(vhost_enable_notify(vq))) {
+ vhost_disable_notify(vq);
+ continue;
+ }
+ break;
+ }
+ if (in) {
+ vq_err(vq, "Unexpected descriptor format for TX: "
+ "out %d, int %d\n", out, in);
+ break;
+ }
+ /* Skip header. TODO: support TSO. */
+ s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, out);
+ msg.msg_iovlen = out;
+ len = iov_length(vq->iov, out);
+ /* Sanity check */
+ if (!len) {
+ vq_err(vq, "Unexpected header len for TX: "
+ "%zd expected %zd\n",
+ iov_length(vq->hdr, s), hdr_size);
+ break;
+ }
+ /* TODO: Check specific error and bomb out unless ENOBUFS? */
+ err = sock->ops->sendmsg(NULL, sock, &msg, len);
+ if (unlikely(err < 0)) {
+ vhost_discard_vq_desc(vq);
+ tx_poll_start(net, sock);
+ break;
+ }
+ if (err != len)
+ pr_err("Truncated TX packet: "
+ " len %d != %zd\n", err, len);
+ vhost_add_used_and_signal(&net->dev, vq, head, 0);
+ total_len += len;
+ if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
+ vhost_poll_queue(&vq->poll);
+ break;
+ }
+ }
+
+ mutex_unlock(&vq->mutex);
+ unuse_mm(net->dev.mm);
+}
+
+/* Expects to be always run from workqueue - which acts as
+ * read-size critical section for our kind of RCU. */
+static void handle_rx(struct vhost_net *net)
+{
+ struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
+ unsigned head, out, in, log, s;
+ struct vhost_log *vq_log;
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_control = NULL, /* FIXME: get and handle RX aux data. */
+ .msg_controllen = 0,
+ .msg_iov = vq->iov,
+ .msg_flags = MSG_DONTWAIT,
+ };
+
+ struct virtio_net_hdr hdr = {
+ .flags = 0,
+ .gso_type = VIRTIO_NET_HDR_GSO_NONE
+ };
+
+ size_t len, total_len = 0;
+ int err;
+ size_t hdr_size;
+ struct socket *sock = rcu_dereference(vq->private_data);
+ if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
+ return;
+
+ use_mm(net->dev.mm);
+ mutex_lock(&vq->mutex);
+ vhost_disable_notify(vq);
+ hdr_size = vq->hdr_size;
+
+ vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
+ vq->log : NULL;
+
+ for (;;) {
+ head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
+ ARRAY_SIZE(vq->iov),
+ &out, &in,
+ vq_log, &log);
+ /* OK, now we need to know about added descriptors. */
+ if (head == vq->num) {
+ if (unlikely(vhost_enable_notify(vq))) {
+ /* They have slipped one in as we were
+ * doing that: check again. */
+ vhost_disable_notify(vq);
+ continue;
+ }
+ /* Nothing new? Wait for eventfd to tell us
+ * they refilled. */
+ break;
+ }
+ /* We don't need to be notified again. */
+ if (out) {
+ vq_err(vq, "Unexpected descriptor format for RX: "
+ "out %d, int %d\n",
+ out, in);
+ break;
+ }
+ /* Skip header. TODO: support TSO/mergeable rx buffers. */
+ s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in);
+ msg.msg_iovlen = in;
+ len = iov_length(vq->iov, in);
+ /* Sanity check */
+ if (!len) {
+ vq_err(vq, "Unexpected header len for RX: "
+ "%zd expected %zd\n",
+ iov_length(vq->hdr, s), hdr_size);
+ break;
+ }
+ err = sock->ops->recvmsg(NULL, sock, &msg,
+ len, MSG_DONTWAIT | MSG_TRUNC);
+ /* TODO: Check specific error and bomb out unless EAGAIN? */
+ if (err < 0) {
+ vhost_discard_vq_desc(vq);
+ break;
+ }
+ /* TODO: Should check and handle checksum. */
+ if (err > len) {
+ pr_err("Discarded truncated rx packet: "
+ " len %d > %zd\n", err, len);
+ vhost_discard_vq_desc(vq);
+ continue;
+ }
+ len = err;
+ err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size);
+ if (err) {
+ vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n",
+ vq->iov->iov_base, err);
+ break;
+ }
+ len += hdr_size;
+ vhost_add_used_and_signal(&net->dev, vq, head, len);
+ if (unlikely(vq_log))
+ vhost_log_write(vq, vq_log, log, len);
+ total_len += len;
+ if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
+ vhost_poll_queue(&vq->poll);
+ break;
+ }
+ }
+
+ mutex_unlock(&vq->mutex);
+ unuse_mm(net->dev.mm);
+}
+
+static void handle_tx_kick(struct work_struct *work)
+{
+ struct vhost_virtqueue *vq;
+ struct vhost_net *net;
+ vq = container_of(work, struct vhost_virtqueue, poll.work);
+ net = container_of(vq->dev, struct vhost_net, dev);
+ handle_tx(net);
+}
+
+static void handle_rx_kick(struct work_struct *work)
+{
+ struct vhost_virtqueue *vq;
+ struct vhost_net *net;
+ vq = container_of(work, struct vhost_virtqueue, poll.work);
+ net = container_of(vq->dev, struct vhost_net, dev);
+ handle_rx(net);
+}
+
+static void handle_tx_net(struct work_struct *work)
+{
+ struct vhost_net *net;
+ net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_TX].work);
+ handle_tx(net);
+}
+
+static void handle_rx_net(struct work_struct *work)
+{
+ struct vhost_net *net;
+ net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_RX].work);
+ handle_rx(net);
+}
+
+static int vhost_net_open(struct inode *inode, struct file *f)
+{
+ struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL);
+ int r;
+ if (!n)
+ return -ENOMEM;
+ f->private_data = n;
+ n->vqs[VHOST_NET_VQ_TX].handle_kick = handle_tx_kick;
+ n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick;
+ r = vhost_dev_init(&n->dev, n->vqs, VHOST_NET_VQ_MAX);
+ if (r < 0) {
+ kfree(n);
+ return r;
+ }
+
+ vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT);
+ vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN);
+ n->tx_poll_state = VHOST_NET_POLL_DISABLED;
+ return 0;
+}
+
+static void vhost_net_disable_vq(struct vhost_net *n,
+ struct vhost_virtqueue *vq)
+{
+ if (!vq->private_data)
+ return;
+ if (vq == n->vqs + VHOST_NET_VQ_TX) {
+ tx_poll_stop(n);
+ n->tx_poll_state = VHOST_NET_POLL_DISABLED;
+ } else
+ vhost_poll_stop(n->poll + VHOST_NET_VQ_RX);
+}
+
+static void vhost_net_enable_vq(struct vhost_net *n,
+ struct vhost_virtqueue *vq)
+{
+ struct socket *sock = vq->private_data;
+ if (!sock)
+ return;
+ if (vq == n->vqs + VHOST_NET_VQ_TX) {
+ n->tx_poll_state = VHOST_NET_POLL_STOPPED;
+ tx_poll_start(n, sock);
+ } else
+ vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file);
+}
+
+static struct socket *vhost_net_stop_vq(struct vhost_net *n,
+ struct vhost_virtqueue *vq)
+{
+ struct socket *sock;
+
+ mutex_lock(&vq->mutex);
+ sock = vq->private_data;
+ vhost_net_disable_vq(n, vq);
+ rcu_assign_pointer(vq->private_data, NULL);
+ mutex_unlock(&vq->mutex);
+ return sock;
+}
+
+static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock,
+ struct socket **rx_sock)
+{
+ *tx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_TX);
+ *rx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_RX);
+}
+
+static void vhost_net_flush_vq(struct vhost_net *n, int index)
+{
+ vhost_poll_flush(n->poll + index);
+ vhost_poll_flush(&n->dev.vqs[index].poll);
+}
+
+static void vhost_net_flush(struct vhost_net *n)
+{
+ vhost_net_flush_vq(n, VHOST_NET_VQ_TX);
+ vhost_net_flush_vq(n, VHOST_NET_VQ_RX);
+}
+
+static int vhost_net_release(struct inode *inode, struct file *f)
+{
+ struct vhost_net *n = f->private_data;
+ struct socket *tx_sock;
+ struct socket *rx_sock;
+
+ vhost_net_stop(n, &tx_sock, &rx_sock);
+ vhost_net_flush(n);
+ vhost_dev_cleanup(&n->dev);
+ if (tx_sock)
+ fput(tx_sock->file);
+ if (rx_sock)
+ fput(rx_sock->file);
+ /* We do an extra flush before freeing memory,
+ * since jobs can re-queue themselves. */
+ vhost_net_flush(n);
+ kfree(n);
+ return 0;
+}
+
+static struct socket *get_raw_socket(int fd)
+{
+ struct {
+ struct sockaddr_ll sa;
+ char buf[MAX_ADDR_LEN];
+ } uaddr;
+ int uaddr_len = sizeof uaddr, r;
+ struct socket *sock = sockfd_lookup(fd, &r);
+ if (!sock)
+ return ERR_PTR(-ENOTSOCK);
+
+ /* Parameter checking */
+ if (sock->sk->sk_type != SOCK_RAW) {
+ r = -ESOCKTNOSUPPORT;
+ goto err;
+ }
+
+ r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa,
+ &uaddr_len, 0);
+ if (r)
+ goto err;
+
+ if (uaddr.sa.sll_family != AF_PACKET) {
+ r = -EPFNOSUPPORT;
+ goto err;
+ }
+ return sock;
+err:
+ fput(sock->file);
+ return ERR_PTR(r);
+}
+
+static struct socket *get_tun_socket(int fd)
+{
+ struct file *file = fget(fd);
+ struct socket *sock;
+ if (!file)
+ return ERR_PTR(-EBADF);
+ sock = tun_get_socket(file);
+ if (IS_ERR(sock))
+ fput(file);
+ return sock;
+}
+
+static struct socket *get_socket(int fd)
+{
+ struct socket *sock;
+ if (fd == -1)
+ return NULL;
+ sock = get_raw_socket(fd);
+ if (!IS_ERR(sock))
+ return sock;
+ sock = get_tun_socket(fd);
+ if (!IS_ERR(sock))
+ return sock;
+ return ERR_PTR(-ENOTSOCK);
+}
+
+static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
+{
+ struct socket *sock, *oldsock;
+ struct vhost_virtqueue *vq;
+ int r;
+
+ mutex_lock(&n->dev.mutex);
+ r = vhost_dev_check_owner(&n->dev);
+ if (r)
+ goto err;
+
+ if (index >= VHOST_NET_VQ_MAX) {
+ r = -ENOBUFS;
+ goto err;
+ }
+ vq = n->vqs + index;
+ mutex_lock(&vq->mutex);
+ sock = get_socket(fd);
+ if (IS_ERR(sock)) {
+ r = PTR_ERR(sock);
+ goto err;
+ }
+
+ /* start polling new socket */
+ oldsock = vq->private_data;
+ if (sock == oldsock)
+ goto done;
+
+ vhost_net_disable_vq(n, vq);
+ rcu_assign_pointer(vq->private_data, sock);
+ vhost_net_enable_vq(n, vq);
+ mutex_unlock(&vq->mutex);
+done:
+ mutex_unlock(&n->dev.mutex);
+ if (oldsock) {
+ vhost_net_flush_vq(n, index);
+ fput(oldsock->file);
+ }
+ return r;
+err:
+ mutex_unlock(&n->dev.mutex);
+ return r;
+}
+
+static long vhost_net_reset_owner(struct vhost_net *n)
+{
+ struct socket *tx_sock = NULL;
+ struct socket *rx_sock = NULL;
+ long err;
+ mutex_lock(&n->dev.mutex);
+ err = vhost_dev_check_owner(&n->dev);
+ if (err)
+ goto done;
+ vhost_net_stop(n, &tx_sock, &rx_sock);
+ vhost_net_flush(n);
+ err = vhost_dev_reset_owner(&n->dev);
+done:
+ mutex_unlock(&n->dev.mutex);
+ if (tx_sock)
+ fput(tx_sock->file);
+ if (rx_sock)
+ fput(rx_sock->file);
+ return err;
+}
+
+static void vhost_net_set_features(struct vhost_net *n, u64 features)
+{
+ size_t hdr_size = features & (1 << VHOST_NET_F_VIRTIO_NET_HDR) ?
+ sizeof(struct virtio_net_hdr) : 0;
+ int i;
+ mutex_lock(&n->dev.mutex);
+ n->dev.acked_features = features;
+ smp_wmb();
+ for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
+ mutex_lock(&n->vqs[i].mutex);
+ n->vqs[i].hdr_size = hdr_size;
+ mutex_unlock(&n->vqs[i].mutex);
+ }
+ mutex_unlock(&n->dev.mutex);
+ vhost_net_flush(n);
+}
+
+static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
+ unsigned long arg)
+{
+ struct vhost_net *n = f->private_data;
+ void __user *argp = (void __user *)arg;
+ u32 __user *featurep = argp;
+ struct vhost_vring_file backend;
+ u64 features;
+ int r;
+ switch (ioctl) {
+ case VHOST_NET_SET_BACKEND:
+ r = copy_from_user(&backend, argp, sizeof backend);
+ if (r < 0)
+ return r;
+ return vhost_net_set_backend(n, backend.index, backend.fd);
+ case VHOST_GET_FEATURES:
+ features = VHOST_FEATURES;
+ return put_user(features, featurep);
+ case VHOST_SET_FEATURES:
+ r = get_user(features, featurep);
+ /* No features for now */
+ if (r < 0)
+ return r;
+ if (features & ~VHOST_FEATURES)
+ return -EOPNOTSUPP;
+ vhost_net_set_features(n, features);
+ return 0;
+ case VHOST_RESET_OWNER:
+ return vhost_net_reset_owner(n);
+ default:
+ r = vhost_dev_ioctl(&n->dev, ioctl, arg);
+ vhost_net_flush(n);
+ return r;
+ }
+}
+
+#ifdef CONFIG_COMPAT
+static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl,
+ unsigned long arg)
+{
+ return vhost_net_ioctl(f, ioctl, (unsigned long)compat_ptr(arg));
+}
+#endif
+
+const static struct file_operations vhost_net_fops = {
+ .owner = THIS_MODULE,
+ .release = vhost_net_release,
+ .unlocked_ioctl = vhost_net_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = vhost_net_compat_ioctl,
+#endif
+ .open = vhost_net_open,
+};
+
+static struct miscdevice vhost_net_misc = {
+ VHOST_NET_MINOR,
+ "vhost-net",
+ &vhost_net_fops,
+};
+
+int vhost_net_init(void)
+{
+ int r = vhost_init();
+ if (r)
+ goto err_init;
+ r = misc_register(&vhost_net_misc);
+ if (r)
+ goto err_reg;
+ return 0;
+err_reg:
+ vhost_cleanup();
+err_init:
+ return r;
+
+}
+module_init(vhost_net_init);
+
+void vhost_net_exit(void)
+{
+ misc_deregister(&vhost_net_misc);
+ vhost_cleanup();
+}
+module_exit(vhost_net_exit);
+
+MODULE_VERSION("0.0.1");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Michael S. Tsirkin");
+MODULE_DESCRIPTION("Host kernel accelerator for virtio net");
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
new file mode 100644
index 000000000000..97233d5ad1b9
--- /dev/null
+++ b/drivers/vhost/vhost.c
@@ -0,0 +1,965 @@
+/* Copyright (C) 2009 Red Hat, Inc.
+ * Copyright (C) 2006 Rusty Russell IBM Corporation
+ *
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ * Inspiration, some code, and most witty comments come from
+ * Documentation/lguest/lguest.c, by Rusty Russell
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Generic code for virtio server in host kernel.
+ */
+
+#include <linux/eventfd.h>
+#include <linux/vhost.h>
+#include <linux/virtio_net.h>
+#include <linux/mm.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+#include <linux/rcupdate.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/highmem.h>
+
+#include <linux/net.h>
+#include <linux/if_packet.h>
+#include <linux/if_arp.h>
+
+#include <net/sock.h>
+
+#include "vhost.h"
+
+enum {
+ VHOST_MEMORY_MAX_NREGIONS = 64,
+ VHOST_MEMORY_F_LOG = 0x1,
+};
+
+static struct workqueue_struct *vhost_workqueue;
+
+static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
+ poll_table *pt)
+{
+ struct vhost_poll *poll;
+ poll = container_of(pt, struct vhost_poll, table);
+
+ poll->wqh = wqh;
+ add_wait_queue(wqh, &poll->wait);
+}
+
+static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync,
+ void *key)
+{
+ struct vhost_poll *poll;
+ poll = container_of(wait, struct vhost_poll, wait);
+ if (!((unsigned long)key & poll->mask))
+ return 0;
+
+ queue_work(vhost_workqueue, &poll->work);
+ return 0;
+}
+
+/* Init poll structure */
+void vhost_poll_init(struct vhost_poll *poll, work_func_t func,
+ unsigned long mask)
+{
+ INIT_WORK(&poll->work, func);
+ init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup);
+ init_poll_funcptr(&poll->table, vhost_poll_func);
+ poll->mask = mask;
+}
+
+/* Start polling a file. We add ourselves to file's wait queue. The caller must
+ * keep a reference to a file until after vhost_poll_stop is called. */
+void vhost_poll_start(struct vhost_poll *poll, struct file *file)
+{
+ unsigned long mask;
+ mask = file->f_op->poll(file, &poll->table);
+ if (mask)
+ vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
+}
+
+/* Stop polling a file. After this function returns, it becomes safe to drop the
+ * file reference. You must also flush afterwards. */
+void vhost_poll_stop(struct vhost_poll *poll)
+{
+ remove_wait_queue(poll->wqh, &poll->wait);
+}
+
+/* Flush any work that has been scheduled. When calling this, don't hold any
+ * locks that are also used by the callback. */
+void vhost_poll_flush(struct vhost_poll *poll)
+{
+ flush_work(&poll->work);
+}
+
+void vhost_poll_queue(struct vhost_poll *poll)
+{
+ queue_work(vhost_workqueue, &poll->work);
+}
+
+static void vhost_vq_reset(struct vhost_dev *dev,
+ struct vhost_virtqueue *vq)
+{
+ vq->num = 1;
+ vq->desc = NULL;
+ vq->avail = NULL;
+ vq->used = NULL;
+ vq->last_avail_idx = 0;
+ vq->avail_idx = 0;
+ vq->last_used_idx = 0;
+ vq->used_flags = 0;
+ vq->used_flags = 0;
+ vq->log_used = false;
+ vq->log_addr = -1ull;
+ vq->hdr_size = 0;
+ vq->private_data = NULL;
+ vq->log_base = NULL;
+ vq->error_ctx = NULL;
+ vq->error = NULL;
+ vq->kick = NULL;
+ vq->call_ctx = NULL;
+ vq->call = NULL;
+}
+
+long vhost_dev_init(struct vhost_dev *dev,
+ struct vhost_virtqueue *vqs, int nvqs)
+{
+ int i;
+ dev->vqs = vqs;
+ dev->nvqs = nvqs;
+ mutex_init(&dev->mutex);
+ dev->log_ctx = NULL;
+ dev->log_file = NULL;
+ dev->memory = NULL;
+ dev->mm = NULL;
+
+ for (i = 0; i < dev->nvqs; ++i) {
+ dev->vqs[i].dev = dev;
+ mutex_init(&dev->vqs[i].mutex);
+ vhost_vq_reset(dev, dev->vqs + i);
+ if (dev->vqs[i].handle_kick)
+ vhost_poll_init(&dev->vqs[i].poll,
+ dev->vqs[i].handle_kick,
+ POLLIN);
+ }
+ return 0;
+}
+
+/* Caller should have device mutex */
+long vhost_dev_check_owner(struct vhost_dev *dev)
+{
+ /* Are you the owner? If not, I don't think you mean to do that */
+ return dev->mm == current->mm ? 0 : -EPERM;
+}
+
+/* Caller should have device mutex */
+static long vhost_dev_set_owner(struct vhost_dev *dev)
+{
+ /* Is there an owner already? */
+ if (dev->mm)
+ return -EBUSY;
+ /* No owner, become one */
+ dev->mm = get_task_mm(current);
+ return 0;
+}
+
+/* Caller should have device mutex */
+long vhost_dev_reset_owner(struct vhost_dev *dev)
+{
+ struct vhost_memory *memory;
+
+ /* Restore memory to default 1:1 mapping. */
+ memory = kmalloc(offsetof(struct vhost_memory, regions) +
+ 2 * sizeof *memory->regions, GFP_KERNEL);
+ if (!memory)
+ return -ENOMEM;
+
+ vhost_dev_cleanup(dev);
+
+ memory->nregions = 2;
+ memory->regions[0].guest_phys_addr = 1;
+ memory->regions[0].userspace_addr = 1;
+ memory->regions[0].memory_size = ~0ULL;
+ memory->regions[1].guest_phys_addr = 0;
+ memory->regions[1].userspace_addr = 0;
+ memory->regions[1].memory_size = 1;
+ dev->memory = memory;
+ return 0;
+}
+
+/* Caller should have device mutex */
+void vhost_dev_cleanup(struct vhost_dev *dev)
+{
+ int i;
+ for (i = 0; i < dev->nvqs; ++i) {
+ if (dev->vqs[i].kick && dev->vqs[i].handle_kick) {
+ vhost_poll_stop(&dev->vqs[i].poll);
+ vhost_poll_flush(&dev->vqs[i].poll);
+ }
+ if (dev->vqs[i].error_ctx)
+ eventfd_ctx_put(dev->vqs[i].error_ctx);
+ if (dev->vqs[i].error)
+ fput(dev->vqs[i].error);
+ if (dev->vqs[i].kick)
+ fput(dev->vqs[i].kick);
+ if (dev->vqs[i].call_ctx)
+ eventfd_ctx_put(dev->vqs[i].call_ctx);
+ if (dev->vqs[i].call)
+ fput(dev->vqs[i].call);
+ vhost_vq_reset(dev, dev->vqs + i);
+ }
+ if (dev->log_ctx)
+ eventfd_ctx_put(dev->log_ctx);
+ dev->log_ctx = NULL;
+ if (dev->log_file)
+ fput(dev->log_file);
+ dev->log_file = NULL;
+ /* No one will access memory at this point */
+ kfree(dev->memory);
+ dev->memory = NULL;
+ if (dev->mm)
+ mmput(dev->mm);
+ dev->mm = NULL;
+}
+
+static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
+{
+ struct vhost_memory mem, *newmem, *oldmem;
+ unsigned long size = offsetof(struct vhost_memory, regions);
+ long r;
+ r = copy_from_user(&mem, m, size);
+ if (r)
+ return r;
+ if (mem.padding)
+ return -EOPNOTSUPP;
+ if (mem.nregions > VHOST_MEMORY_MAX_NREGIONS)
+ return -E2BIG;
+ newmem = kmalloc(size + mem.nregions * sizeof *m->regions, GFP_KERNEL);
+ if (!newmem)
+ return -ENOMEM;
+
+ memcpy(newmem, &mem, size);
+ r = copy_from_user(newmem->regions, m->regions,
+ mem.nregions * sizeof *m->regions);
+ if (r) {
+ kfree(newmem);
+ return r;
+ }
+ oldmem = d->memory;
+ rcu_assign_pointer(d->memory, newmem);
+ synchronize_rcu();
+ kfree(oldmem);
+ return 0;
+}
+
+static int init_used(struct vhost_virtqueue *vq,
+ struct vring_used __user *used)
+{
+ int r = put_user(vq->used_flags, &used->flags);
+ if (r)
+ return r;
+ return get_user(vq->last_used_idx, &used->idx);
+}
+
+static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
+{
+ struct file *eventfp, *filep = NULL,
+ *pollstart = NULL, *pollstop = NULL;
+ struct eventfd_ctx *ctx = NULL;
+ u32 __user *idxp = argp;
+ struct vhost_virtqueue *vq;
+ struct vhost_vring_state s;
+ struct vhost_vring_file f;
+ struct vhost_vring_addr a;
+ u32 idx;
+ long r;
+
+ r = get_user(idx, idxp);
+ if (r < 0)
+ return r;
+ if (idx > d->nvqs)
+ return -ENOBUFS;
+
+ vq = d->vqs + idx;
+
+ mutex_lock(&vq->mutex);
+
+ switch (ioctl) {
+ case VHOST_SET_VRING_NUM:
+ r = copy_from_user(&s, argp, sizeof s);
+ if (r < 0)
+ break;
+ if (!s.num || s.num > 0xffff || (s.num & (s.num - 1))) {
+ r = -EINVAL;
+ break;
+ }
+ vq->num = s.num;
+ break;
+ case VHOST_SET_VRING_BASE:
+ r = copy_from_user(&s, argp, sizeof s);
+ if (r < 0)
+ break;
+ if (s.num > 0xffff) {
+ r = -EINVAL;
+ break;
+ }
+ vq->last_avail_idx = s.num;
+ /* Forget the cached index value. */
+ vq->avail_idx = vq->last_avail_idx;
+ break;
+ case VHOST_GET_VRING_BASE:
+ s.index = idx;
+ s.num = vq->last_avail_idx;
+ r = copy_to_user(argp, &s, sizeof s);
+ break;
+ case VHOST_SET_VRING_ADDR:
+ r = copy_from_user(&a, argp, sizeof a);
+ if (r < 0)
+ break;
+ if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) {
+ r = -EOPNOTSUPP;
+ break;
+ }
+ if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr ||
+ (u64)(unsigned long)a.used_user_addr != a.used_user_addr ||
+ (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr) {
+ r = -EFAULT;
+ break;
+ }
+ if ((a.avail_user_addr & (sizeof *vq->avail->ring - 1)) ||
+ (a.used_user_addr & (sizeof *vq->used->ring - 1)) ||
+ (a.log_guest_addr & (sizeof *vq->used->ring - 1))) {
+ r = -EINVAL;
+ break;
+ }
+ r = init_used(vq, (struct vring_used __user *)a.used_user_addr);
+ if (r)
+ break;
+ vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG));
+ vq->desc = (void __user *)(unsigned long)a.desc_user_addr;
+ vq->avail = (void __user *)(unsigned long)a.avail_user_addr;
+ vq->log_addr = a.log_guest_addr;
+ vq->used = (void __user *)(unsigned long)a.used_user_addr;
+ break;
+ case VHOST_SET_VRING_KICK:
+ r = copy_from_user(&f, argp, sizeof f);
+ if (r < 0)
+ break;
+ eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd);
+ if (IS_ERR(eventfp))
+ return PTR_ERR(eventfp);
+ if (eventfp != vq->kick) {
+ pollstop = filep = vq->kick;
+ pollstart = vq->kick = eventfp;
+ } else
+ filep = eventfp;
+ break;
+ case VHOST_SET_VRING_CALL:
+ r = copy_from_user(&f, argp, sizeof f);
+ if (r < 0)
+ break;
+ eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd);
+ if (IS_ERR(eventfp))
+ return PTR_ERR(eventfp);
+ if (eventfp != vq->call) {
+ filep = vq->call;
+ ctx = vq->call_ctx;
+ vq->call = eventfp;
+ vq->call_ctx = eventfp ?
+ eventfd_ctx_fileget(eventfp) : NULL;
+ } else
+ filep = eventfp;
+ break;
+ case VHOST_SET_VRING_ERR:
+ r = copy_from_user(&f, argp, sizeof f);
+ if (r < 0)
+ break;
+ eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd);
+ if (IS_ERR(eventfp))
+ return PTR_ERR(eventfp);
+ if (eventfp != vq->error) {
+ filep = vq->error;
+ vq->error = eventfp;
+ ctx = vq->error_ctx;
+ vq->error_ctx = eventfp ?
+ eventfd_ctx_fileget(eventfp) : NULL;
+ } else
+ filep = eventfp;
+ break;
+ default:
+ r = -ENOIOCTLCMD;
+ }
+
+ if (pollstop && vq->handle_kick)
+ vhost_poll_stop(&vq->poll);
+
+ if (ctx)
+ eventfd_ctx_put(ctx);
+ if (filep)
+ fput(filep);
+
+ if (pollstart && vq->handle_kick)
+ vhost_poll_start(&vq->poll, vq->kick);
+
+ mutex_unlock(&vq->mutex);
+
+ if (pollstop && vq->handle_kick)
+ vhost_poll_flush(&vq->poll);
+ return r;
+}
+
+long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg)
+{
+ void __user *argp = (void __user *)arg;
+ struct file *eventfp, *filep = NULL;
+ struct eventfd_ctx *ctx = NULL;
+ u64 p;
+ long r;
+ int i, fd;
+
+ mutex_lock(&d->mutex);
+ /* If you are not the owner, you can become one */
+ if (ioctl == VHOST_SET_OWNER) {
+ r = vhost_dev_set_owner(d);
+ goto done;
+ }
+
+ /* You must be the owner to do anything else */
+ r = vhost_dev_check_owner(d);
+ if (r)
+ goto done;
+
+ switch (ioctl) {
+ case VHOST_SET_MEM_TABLE:
+ r = vhost_set_memory(d, argp);
+ break;
+ case VHOST_SET_LOG_BASE:
+ r = copy_from_user(&p, argp, sizeof p);
+ if (r < 0)
+ break;
+ if ((u64)(unsigned long)p != p) {
+ r = -EFAULT;
+ break;
+ }
+ for (i = 0; i < d->nvqs; ++i) {
+ mutex_lock(&d->vqs[i].mutex);
+ d->vqs[i].log_base = (void __user *)(unsigned long)p;
+ mutex_unlock(&d->vqs[i].mutex);
+ }
+ break;
+ case VHOST_SET_LOG_FD:
+ r = get_user(fd, (int __user *)argp);
+ if (r < 0)
+ break;
+ eventfp = fd == -1 ? NULL : eventfd_fget(fd);
+ if (IS_ERR(eventfp)) {
+ r = PTR_ERR(eventfp);
+ break;
+ }
+ if (eventfp != d->log_file) {
+ filep = d->log_file;
+ ctx = d->log_ctx;
+ d->log_ctx = eventfp ?
+ eventfd_ctx_fileget(eventfp) : NULL;
+ } else
+ filep = eventfp;
+ for (i = 0; i < d->nvqs; ++i) {
+ mutex_lock(&d->vqs[i].mutex);
+ d->vqs[i].log_ctx = d->log_ctx;
+ mutex_unlock(&d->vqs[i].mutex);
+ }
+ if (ctx)
+ eventfd_ctx_put(ctx);
+ if (filep)
+ fput(filep);
+ break;
+ default:
+ r = vhost_set_vring(d, ioctl, argp);
+ break;
+ }
+done:
+ mutex_unlock(&d->mutex);
+ return r;
+}
+
+static const struct vhost_memory_region *find_region(struct vhost_memory *mem,
+ __u64 addr, __u32 len)
+{
+ struct vhost_memory_region *reg;
+ int i;
+ /* linear search is not brilliant, but we really have on the order of 6
+ * regions in practice */
+ for (i = 0; i < mem->nregions; ++i) {
+ reg = mem->regions + i;
+ if (reg->guest_phys_addr <= addr &&
+ reg->guest_phys_addr + reg->memory_size - 1 >= addr)
+ return reg;
+ }
+ return NULL;
+}
+
+/* TODO: This is really inefficient. We need something like get_user()
+ * (instruction directly accesses the data, with an exception table entry
+ * returning -EFAULT). See Documentation/x86/exception-tables.txt.
+ */
+static int set_bit_to_user(int nr, void __user *addr)
+{
+ unsigned long log = (unsigned long)addr;
+ struct page *page;
+ void *base;
+ int bit = nr + (log % PAGE_SIZE) * 8;
+ int r;
+ r = get_user_pages_fast(log, 1, 1, &page);
+ if (r)
+ return r;
+ base = kmap_atomic(page, KM_USER0);
+ set_bit(bit, base);
+ kunmap_atomic(base, KM_USER0);
+ set_page_dirty_lock(page);
+ put_page(page);
+ return 0;
+}
+
+static int log_write(void __user *log_base,
+ u64 write_address, u64 write_length)
+{
+ int r;
+ if (!write_length)
+ return 0;
+ write_address /= VHOST_PAGE_SIZE;
+ for (;;) {
+ u64 base = (u64)(unsigned long)log_base;
+ u64 log = base + write_address / 8;
+ int bit = write_address % 8;
+ if ((u64)(unsigned long)log != log)
+ return -EFAULT;
+ r = set_bit_to_user(bit, (void __user *)(unsigned long)log);
+ if (r < 0)
+ return r;
+ if (write_length <= VHOST_PAGE_SIZE)
+ break;
+ write_length -= VHOST_PAGE_SIZE;
+ write_address += VHOST_PAGE_SIZE;
+ }
+ return r;
+}
+
+int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
+ unsigned int log_num, u64 len)
+{
+ int i, r;
+
+ /* Make sure data written is seen before log. */
+ wmb();
+ for (i = 0; i < log_num; ++i) {
+ u64 l = min(log[i].len, len);
+ r = log_write(vq->log_base, log[i].addr, l);
+ if (r < 0)
+ return r;
+ len -= l;
+ if (!len)
+ return 0;
+ }
+ if (vq->log_ctx)
+ eventfd_signal(vq->log_ctx, 1);
+ /* Length written exceeds what we have stored. This is a bug. */
+ BUG();
+ return 0;
+}
+
+int translate_desc(struct vhost_dev *dev, u64 addr, u32 len,
+ struct iovec iov[], int iov_size)
+{
+ const struct vhost_memory_region *reg;
+ struct vhost_memory *mem;
+ struct iovec *_iov;
+ u64 s = 0;
+ int ret = 0;
+
+ rcu_read_lock();
+
+ mem = rcu_dereference(dev->memory);
+ while ((u64)len > s) {
+ u64 size;
+ if (ret >= iov_size) {
+ ret = -ENOBUFS;
+ break;
+ }
+ reg = find_region(mem, addr, len);
+ if (!reg) {
+ ret = -EFAULT;
+ break;
+ }
+ _iov = iov + ret;
+ size = reg->memory_size - addr + reg->guest_phys_addr;
+ _iov->iov_len = min((u64)len, size);
+ _iov->iov_base = (void *)(unsigned long)
+ (reg->userspace_addr + addr - reg->guest_phys_addr);
+ s += size;
+ addr += size;
+ ++ret;
+ }
+
+ rcu_read_unlock();
+ return ret;
+}
+
+/* Each buffer in the virtqueues is actually a chain of descriptors. This
+ * function returns the next descriptor in the chain,
+ * or -1U if we're at the end. */
+static unsigned next_desc(struct vring_desc *desc)
+{
+ unsigned int next;
+
+ /* If this descriptor says it doesn't chain, we're done. */
+ if (!(desc->flags & VRING_DESC_F_NEXT))
+ return -1U;
+
+ /* Check they're not leading us off end of descriptors. */
+ next = desc->next;
+ /* Make sure compiler knows to grab that: we don't want it changing! */
+ /* We will use the result as an index in an array, so most
+ * architectures only need a compiler barrier here. */
+ read_barrier_depends();
+
+ return next;
+}
+
+static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
+ struct iovec iov[], unsigned int iov_size,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vhost_log *log, unsigned int *log_num,
+ struct vring_desc *indirect)
+{
+ struct vring_desc desc;
+ unsigned int i = 0, count, found = 0;
+ int ret;
+
+ /* Sanity check */
+ if (indirect->len % sizeof desc) {
+ vq_err(vq, "Invalid length in indirect descriptor: "
+ "len 0x%llx not multiple of 0x%zx\n",
+ (unsigned long long)indirect->len,
+ sizeof desc);
+ return -EINVAL;
+ }
+
+ ret = translate_desc(dev, indirect->addr, indirect->len, vq->indirect,
+ ARRAY_SIZE(vq->indirect));
+ if (ret < 0) {
+ vq_err(vq, "Translation failure %d in indirect.\n", ret);
+ return ret;
+ }
+
+ /* We will use the result as an address to read from, so most
+ * architectures only need a compiler barrier here. */
+ read_barrier_depends();
+
+ count = indirect->len / sizeof desc;
+ /* Buffers are chained via a 16 bit next field, so
+ * we can have at most 2^16 of these. */
+ if (count > USHORT_MAX + 1) {
+ vq_err(vq, "Indirect buffer length too big: %d\n",
+ indirect->len);
+ return -E2BIG;
+ }
+
+ do {
+ unsigned iov_count = *in_num + *out_num;
+ if (++found > count) {
+ vq_err(vq, "Loop detected: last one at %u "
+ "indirect size %u\n",
+ i, count);
+ return -EINVAL;
+ }
+ if (memcpy_fromiovec((unsigned char *)&desc, vq->indirect,
+ sizeof desc)) {
+ vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n",
+ i, (size_t)indirect->addr + i * sizeof desc);
+ return -EINVAL;
+ }
+ if (desc.flags & VRING_DESC_F_INDIRECT) {
+ vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n",
+ i, (size_t)indirect->addr + i * sizeof desc);
+ return -EINVAL;
+ }
+
+ ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count,
+ iov_size - iov_count);
+ if (ret < 0) {
+ vq_err(vq, "Translation failure %d indirect idx %d\n",
+ ret, i);
+ return ret;
+ }
+ /* If this is an input descriptor, increment that count. */
+ if (desc.flags & VRING_DESC_F_WRITE) {
+ *in_num += ret;
+ if (unlikely(log)) {
+ log[*log_num].addr = desc.addr;
+ log[*log_num].len = desc.len;
+ ++*log_num;
+ }
+ } else {
+ /* If it's an output descriptor, they're all supposed
+ * to come before any input descriptors. */
+ if (*in_num) {
+ vq_err(vq, "Indirect descriptor "
+ "has out after in: idx %d\n", i);
+ return -EINVAL;
+ }
+ *out_num += ret;
+ }
+ } while ((i = next_desc(&desc)) != -1);
+ return 0;
+}
+
+/* This looks in the virtqueue and for the first available buffer, and converts
+ * it to an iovec for convenient access. Since descriptors consist of some
+ * number of output then some number of input descriptors, it's actually two
+ * iovecs, but we pack them into one and note how many of each there were.
+ *
+ * This function returns the descriptor number found, or vq->num (which
+ * is never a valid descriptor number) if none was found. */
+unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
+ struct iovec iov[], unsigned int iov_size,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vhost_log *log, unsigned int *log_num)
+{
+ struct vring_desc desc;
+ unsigned int i, head, found = 0;
+ u16 last_avail_idx;
+ int ret;
+
+ /* Check it isn't doing very strange things with descriptor numbers. */
+ last_avail_idx = vq->last_avail_idx;
+ if (get_user(vq->avail_idx, &vq->avail->idx)) {
+ vq_err(vq, "Failed to access avail idx at %p\n",
+ &vq->avail->idx);
+ return vq->num;
+ }
+
+ if ((u16)(vq->avail_idx - last_avail_idx) > vq->num) {
+ vq_err(vq, "Guest moved used index from %u to %u",
+ last_avail_idx, vq->avail_idx);
+ return vq->num;
+ }
+
+ /* If there's nothing new since last we looked, return invalid. */
+ if (vq->avail_idx == last_avail_idx)
+ return vq->num;
+
+ /* Only get avail ring entries after they have been exposed by guest. */
+ rmb();
+
+ /* Grab the next descriptor number they're advertising, and increment
+ * the index we've seen. */
+ if (get_user(head, &vq->avail->ring[last_avail_idx % vq->num])) {
+ vq_err(vq, "Failed to read head: idx %d address %p\n",
+ last_avail_idx,
+ &vq->avail->ring[last_avail_idx % vq->num]);
+ return vq->num;
+ }
+
+ /* If their number is silly, that's an error. */
+ if (head >= vq->num) {
+ vq_err(vq, "Guest says index %u > %u is available",
+ head, vq->num);
+ return vq->num;
+ }
+
+ /* When we start there are none of either input nor output. */
+ *out_num = *in_num = 0;
+ if (unlikely(log))
+ *log_num = 0;
+
+ i = head;
+ do {
+ unsigned iov_count = *in_num + *out_num;
+ if (i >= vq->num) {
+ vq_err(vq, "Desc index is %u > %u, head = %u",
+ i, vq->num, head);
+ return vq->num;
+ }
+ if (++found > vq->num) {
+ vq_err(vq, "Loop detected: last one at %u "
+ "vq size %u head %u\n",
+ i, vq->num, head);
+ return vq->num;
+ }
+ ret = copy_from_user(&desc, vq->desc + i, sizeof desc);
+ if (ret) {
+ vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
+ i, vq->desc + i);
+ return vq->num;
+ }
+ if (desc.flags & VRING_DESC_F_INDIRECT) {
+ ret = get_indirect(dev, vq, iov, iov_size,
+ out_num, in_num,
+ log, log_num, &desc);
+ if (ret < 0) {
+ vq_err(vq, "Failure detected "
+ "in indirect descriptor at idx %d\n", i);
+ return vq->num;
+ }
+ continue;
+ }
+
+ ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count,
+ iov_size - iov_count);
+ if (ret < 0) {
+ vq_err(vq, "Translation failure %d descriptor idx %d\n",
+ ret, i);
+ return vq->num;
+ }
+ if (desc.flags & VRING_DESC_F_WRITE) {
+ /* If this is an input descriptor,
+ * increment that count. */
+ *in_num += ret;
+ if (unlikely(log)) {
+ log[*log_num].addr = desc.addr;
+ log[*log_num].len = desc.len;
+ ++*log_num;
+ }
+ } else {
+ /* If it's an output descriptor, they're all supposed
+ * to come before any input descriptors. */
+ if (*in_num) {
+ vq_err(vq, "Descriptor has out after in: "
+ "idx %d\n", i);
+ return vq->num;
+ }
+ *out_num += ret;
+ }
+ } while ((i = next_desc(&desc)) != -1);
+
+ /* On success, increment avail index. */
+ vq->last_avail_idx++;
+ return head;
+}
+
+/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
+void vhost_discard_vq_desc(struct vhost_virtqueue *vq)
+{
+ vq->last_avail_idx--;
+}
+
+/* After we've used one of their buffers, we tell them about it. We'll then
+ * want to notify the guest, using eventfd. */
+int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
+{
+ struct vring_used_elem *used;
+
+ /* The virtqueue contains a ring of used buffers. Get a pointer to the
+ * next entry in that used ring. */
+ used = &vq->used->ring[vq->last_used_idx % vq->num];
+ if (put_user(head, &used->id)) {
+ vq_err(vq, "Failed to write used id");
+ return -EFAULT;
+ }
+ if (put_user(len, &used->len)) {
+ vq_err(vq, "Failed to write used len");
+ return -EFAULT;
+ }
+ /* Make sure buffer is written before we update index. */
+ wmb();
+ if (put_user(vq->last_used_idx + 1, &vq->used->idx)) {
+ vq_err(vq, "Failed to increment used idx");
+ return -EFAULT;
+ }
+ if (unlikely(vq->log_used)) {
+ /* Make sure data is seen before log. */
+ wmb();
+ log_write(vq->log_base, vq->log_addr + sizeof *vq->used->ring *
+ (vq->last_used_idx % vq->num),
+ sizeof *vq->used->ring);
+ log_write(vq->log_base, vq->log_addr, sizeof *vq->used->ring);
+ if (vq->log_ctx)
+ eventfd_signal(vq->log_ctx, 1);
+ }
+ vq->last_used_idx++;
+ return 0;
+}
+
+/* This actually signals the guest, using eventfd. */
+void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+{
+ __u16 flags = 0;
+ if (get_user(flags, &vq->avail->flags)) {
+ vq_err(vq, "Failed to get flags");
+ return;
+ }
+
+ /* If they don't want an interrupt, don't signal, unless empty. */
+ if ((flags & VRING_AVAIL_F_NO_INTERRUPT) &&
+ (vq->avail_idx != vq->last_avail_idx ||
+ !vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY)))
+ return;
+
+ /* Signal the Guest tell them we used something up. */
+ if (vq->call_ctx)
+ eventfd_signal(vq->call_ctx, 1);
+}
+
+/* And here's the combo meal deal. Supersize me! */
+void vhost_add_used_and_signal(struct vhost_dev *dev,
+ struct vhost_virtqueue *vq,
+ unsigned int head, int len)
+{
+ vhost_add_used(vq, head, len);
+ vhost_signal(dev, vq);
+}
+
+/* OK, now we need to know about added descriptors. */
+bool vhost_enable_notify(struct vhost_virtqueue *vq)
+{
+ u16 avail_idx;
+ int r;
+ if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
+ return false;
+ vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
+ r = put_user(vq->used_flags, &vq->used->flags);
+ if (r) {
+ vq_err(vq, "Failed to enable notification at %p: %d\n",
+ &vq->used->flags, r);
+ return false;
+ }
+ /* They could have slipped one in as we were doing that: make
+ * sure it's written, then check again. */
+ mb();
+ r = get_user(avail_idx, &vq->avail->idx);
+ if (r) {
+ vq_err(vq, "Failed to check avail idx at %p: %d\n",
+ &vq->avail->idx, r);
+ return false;
+ }
+
+ return avail_idx != vq->last_avail_idx;
+}
+
+/* We don't need to be notified again. */
+void vhost_disable_notify(struct vhost_virtqueue *vq)
+{
+ int r;
+ if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
+ return;
+ vq->used_flags |= VRING_USED_F_NO_NOTIFY;
+ r = put_user(vq->used_flags, &vq->used->flags);
+ if (r)
+ vq_err(vq, "Failed to enable notification at %p: %d\n",
+ &vq->used->flags, r);
+}
+
+int vhost_init(void)
+{
+ vhost_workqueue = create_singlethread_workqueue("vhost");
+ if (!vhost_workqueue)
+ return -ENOMEM;
+ return 0;
+}
+
+void vhost_cleanup(void)
+{
+ destroy_workqueue(vhost_workqueue);
+}
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
new file mode 100644
index 000000000000..d1f045376017
--- /dev/null
+++ b/drivers/vhost/vhost.h
@@ -0,0 +1,159 @@
+#ifndef _VHOST_H
+#define _VHOST_H
+
+#include <linux/eventfd.h>
+#include <linux/vhost.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/skbuff.h>
+#include <linux/uio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
+
+struct vhost_device;
+
+enum {
+ /* Enough place for all fragments, head, and virtio net header. */
+ VHOST_NET_MAX_SG = MAX_SKB_FRAGS + 2,
+};
+
+/* Poll a file (eventfd or socket) */
+/* Note: there's nothing vhost specific about this structure. */
+struct vhost_poll {
+ poll_table table;
+ wait_queue_head_t *wqh;
+ wait_queue_t wait;
+ /* struct which will handle all actual work. */
+ struct work_struct work;
+ unsigned long mask;
+};
+
+void vhost_poll_init(struct vhost_poll *poll, work_func_t func,
+ unsigned long mask);
+void vhost_poll_start(struct vhost_poll *poll, struct file *file);
+void vhost_poll_stop(struct vhost_poll *poll);
+void vhost_poll_flush(struct vhost_poll *poll);
+void vhost_poll_queue(struct vhost_poll *poll);
+
+struct vhost_log {
+ u64 addr;
+ u64 len;
+};
+
+/* The virtqueue structure describes a queue attached to a device. */
+struct vhost_virtqueue {
+ struct vhost_dev *dev;
+
+ /* The actual ring of buffers. */
+ struct mutex mutex;
+ unsigned int num;
+ struct vring_desc __user *desc;
+ struct vring_avail __user *avail;
+ struct vring_used __user *used;
+ struct file *kick;
+ struct file *call;
+ struct file *error;
+ struct eventfd_ctx *call_ctx;
+ struct eventfd_ctx *error_ctx;
+ struct eventfd_ctx *log_ctx;
+
+ struct vhost_poll poll;
+
+ /* The routine to call when the Guest pings us, or timeout. */
+ work_func_t handle_kick;
+
+ /* Last available index we saw. */
+ u16 last_avail_idx;
+
+ /* Caches available index value from user. */
+ u16 avail_idx;
+
+ /* Last index we used. */
+ u16 last_used_idx;
+
+ /* Used flags */
+ u16 used_flags;
+
+ /* Log writes to used structure. */
+ bool log_used;
+ u64 log_addr;
+
+ struct iovec indirect[VHOST_NET_MAX_SG];
+ struct iovec iov[VHOST_NET_MAX_SG];
+ struct iovec hdr[VHOST_NET_MAX_SG];
+ size_t hdr_size;
+ /* We use a kind of RCU to access private pointer.
+ * All readers access it from workqueue, which makes it possible to
+ * flush the workqueue instead of synchronize_rcu. Therefore readers do
+ * not need to call rcu_read_lock/rcu_read_unlock: the beginning of
+ * work item execution acts instead of rcu_read_lock() and the end of
+ * work item execution acts instead of rcu_read_lock().
+ * Writers use virtqueue mutex. */
+ void *private_data;
+ /* Log write descriptors */
+ void __user *log_base;
+ struct vhost_log log[VHOST_NET_MAX_SG];
+};
+
+struct vhost_dev {
+ /* Readers use RCU to access memory table pointer
+ * log base pointer and features.
+ * Writers use mutex below.*/
+ struct vhost_memory *memory;
+ struct mm_struct *mm;
+ struct mutex mutex;
+ unsigned acked_features;
+ struct vhost_virtqueue *vqs;
+ int nvqs;
+ struct file *log_file;
+ struct eventfd_ctx *log_ctx;
+};
+
+long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue *vqs, int nvqs);
+long vhost_dev_check_owner(struct vhost_dev *);
+long vhost_dev_reset_owner(struct vhost_dev *);
+void vhost_dev_cleanup(struct vhost_dev *);
+long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, unsigned long arg);
+
+unsigned vhost_get_vq_desc(struct vhost_dev *, struct vhost_virtqueue *,
+ struct iovec iov[], unsigned int iov_count,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vhost_log *log, unsigned int *log_num);
+void vhost_discard_vq_desc(struct vhost_virtqueue *);
+
+int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
+void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
+void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *,
+ unsigned int head, int len);
+void vhost_disable_notify(struct vhost_virtqueue *);
+bool vhost_enable_notify(struct vhost_virtqueue *);
+
+int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
+ unsigned int log_num, u64 len);
+
+int vhost_init(void);
+void vhost_cleanup(void);
+
+#define vq_err(vq, fmt, ...) do { \
+ pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
+ if ((vq)->error_ctx) \
+ eventfd_signal((vq)->error_ctx, 1);\
+ } while (0)
+
+enum {
+ VHOST_FEATURES = (1 << VIRTIO_F_NOTIFY_ON_EMPTY) |
+ (1 << VIRTIO_RING_F_INDIRECT_DESC) |
+ (1 << VHOST_F_LOG_ALL) |
+ (1 << VHOST_NET_F_VIRTIO_NET_HDR),
+};
+
+static inline int vhost_has_feature(struct vhost_dev *dev, int bit)
+{
+ unsigned acked_features = rcu_dereference(dev->acked_features);
+ return acked_features & (1 << bit);
+}
+
+#endif
diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
index 54fbb2995a5f..156c661b2912 100644
--- a/drivers/video/uvesafb.c
+++ b/drivers/video/uvesafb.c
@@ -1976,8 +1976,7 @@ static void __devexit uvesafb_exit(void)
module_exit(uvesafb_exit);
-#define param_get_scroll NULL
-static int param_set_scroll(const char *val, struct kernel_param *kp)
+static int param_set_scroll(const char *val, const struct kernel_param *kp)
{
ypan = 0;
@@ -1992,7 +1991,9 @@ static int param_set_scroll(const char *val, struct kernel_param *kp)
return 0;
}
-
+static struct kernel_param_ops param_ops_scroll = {
+ .set = param_set_scroll,
+};
#define param_check_scroll(name, p) __param_check(name, p, void)
module_param_named(scroll, ypan, scroll, 0);
diff --git a/drivers/video/vt8623fb.c b/drivers/video/vt8623fb.c
index 3df17dc8c3d7..8bfa47d5a5c0 100644
--- a/drivers/video/vt8623fb.c
+++ b/drivers/video/vt8623fb.c
@@ -727,7 +727,9 @@ static int __devinit vt8623_pci_probe(struct pci_dev *dev, const struct pci_devi
/* Prepare startup mode */
+ kparam_block_sysfs_write(mode_option);
rc = fb_find_mode(&(info->var), info, mode_option, NULL, 0, NULL, 8);
+ kparam_unblock_sysfs_write(mode_option);
if (! ((rc == 1) || (rc == 2))) {
rc = -EINVAL;
dev_err(info->device, "mode %s not found\n", mode_option);
diff --git a/drivers/zorro/zorro-driver.c b/drivers/zorro/zorro-driver.c
index e6c4390d8bd6..53180a37cc9a 100644
--- a/drivers/zorro/zorro-driver.c
+++ b/drivers/zorro/zorro-driver.c
@@ -156,5 +156,4 @@ postcore_initcall(zorro_driver_init);
EXPORT_SYMBOL(zorro_match_device);
EXPORT_SYMBOL(zorro_register_driver);
EXPORT_SYMBOL(zorro_unregister_driver);
-EXPORT_SYMBOL(zorro_dev_driver);
EXPORT_SYMBOL(zorro_bus_type);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 293fa0528a6e..52a7200233f0 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -45,7 +45,7 @@ unsigned short nfs_callback_tcpport;
unsigned short nfs_callback_tcpport6;
#define NFS_CALLBACK_MAXPORTNR (65535U)
-static int param_set_portnr(const char *val, struct kernel_param *kp)
+static int param_set_portnr(const char *val, const struct kernel_param *kp)
{
unsigned long num;
int ret;
@@ -58,11 +58,10 @@ static int param_set_portnr(const char *val, struct kernel_param *kp)
*((unsigned int *)kp->arg) = num;
return 0;
}
-
-static int param_get_portnr(char *buffer, struct kernel_param *kp)
-{
- return param_get_uint(buffer, kp);
-}
+static struct kernel_param_ops param_ops_portnr = {
+ .set = param_set_portnr,
+ .get = param_get_uint,
+};
#define param_check_portnr(name, p) __param_check(name, p, unsigned int);
module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644);
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index b6e818f4b247..ea14edee36ae 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -52,8 +52,12 @@
#define LOAD_OFFSET 0
#endif
-#ifndef VMLINUX_SYMBOL
-#define VMLINUX_SYMBOL(_sym_) _sym_
+#ifndef SYMBOL_PREFIX
+#define VMLINUX_SYMBOL(sym) sym
+#else
+#define PASTE2(x,y) x##y
+#define PASTE(x,y) PASTE2(x,y)
+#define VMLINUX_SYMBOL(sym) PASTE(SYMBOL_PREFIX, sym)
#endif
/* Align . to a 8 byte boundary equals to maximum function alignment. */
@@ -254,76 +258,76 @@
/* Kernel symbol table: Normal symbols */ \
__ksymtab : AT(ADDR(__ksymtab) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab) = .; \
- *(__ksymtab) \
+ *(__ksymtab_sorted) \
VMLINUX_SYMBOL(__stop___ksymtab) = .; \
} \
\
/* Kernel symbol table: GPL-only symbols */ \
__ksymtab_gpl : AT(ADDR(__ksymtab_gpl) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab_gpl) = .; \
- *(__ksymtab_gpl) \
+ *(__ksymtab_gpl_sorted) \
VMLINUX_SYMBOL(__stop___ksymtab_gpl) = .; \
} \
\
/* Kernel symbol table: Normal unused symbols */ \
__ksymtab_unused : AT(ADDR(__ksymtab_unused) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab_unused) = .; \
- *(__ksymtab_unused) \
+ *(__ksymtab_unused_sorted) \
VMLINUX_SYMBOL(__stop___ksymtab_unused) = .; \
} \
\
/* Kernel symbol table: GPL-only unused symbols */ \
__ksymtab_unused_gpl : AT(ADDR(__ksymtab_unused_gpl) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab_unused_gpl) = .; \
- *(__ksymtab_unused_gpl) \
+ *(__ksymtab_unused_gpl_sorted) \
VMLINUX_SYMBOL(__stop___ksymtab_unused_gpl) = .; \
} \
\
/* Kernel symbol table: GPL-future-only symbols */ \
__ksymtab_gpl_future : AT(ADDR(__ksymtab_gpl_future) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab_gpl_future) = .; \
- *(__ksymtab_gpl_future) \
+ *(__ksymtab_gpl_future_sorted) \
VMLINUX_SYMBOL(__stop___ksymtab_gpl_future) = .; \
} \
\
/* Kernel symbol table: Normal symbols */ \
__kcrctab : AT(ADDR(__kcrctab) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab) = .; \
- *(__kcrctab) \
+ *(__kcrctab_sorted) \
VMLINUX_SYMBOL(__stop___kcrctab) = .; \
} \
\
/* Kernel symbol table: GPL-only symbols */ \
__kcrctab_gpl : AT(ADDR(__kcrctab_gpl) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab_gpl) = .; \
- *(__kcrctab_gpl) \
+ *(__kcrctab_gpl_sorted) \
VMLINUX_SYMBOL(__stop___kcrctab_gpl) = .; \
} \
\
/* Kernel symbol table: Normal unused symbols */ \
__kcrctab_unused : AT(ADDR(__kcrctab_unused) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab_unused) = .; \
- *(__kcrctab_unused) \
+ *(__kcrctab_unused_sorted) \
VMLINUX_SYMBOL(__stop___kcrctab_unused) = .; \
} \
\
/* Kernel symbol table: GPL-only unused symbols */ \
__kcrctab_unused_gpl : AT(ADDR(__kcrctab_unused_gpl) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab_unused_gpl) = .; \
- *(__kcrctab_unused_gpl) \
+ *(__kcrctab_unused_gpl_sorted) \
VMLINUX_SYMBOL(__stop___kcrctab_unused_gpl) = .; \
} \
\
/* Kernel symbol table: GPL-future-only symbols */ \
__kcrctab_gpl_future : AT(ADDR(__kcrctab_gpl_future) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab_gpl_future) = .; \
- *(__kcrctab_gpl_future) \
+ *(__kcrctab_gpl_future_sorted) \
VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .; \
} \
\
/* Kernel symbol table: strings */ \
__ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) { \
- *(__ksymtab_strings) \
+ *(__ksymtab_strings_sorted) \
} \
\
/* __*init sections */ \
@@ -639,6 +643,23 @@
EXIT_DATA \
EXIT_CALL \
*(.discard) \
+ \
+ /* \
+ * Discard the original unsorted symbol tables. \
+ * In vmlinux they are replaced by sorted versions \
+ * generated by modpost -x. \
+ */ \
+ *(__ksymtab) \
+ *(__ksymtab_gpl) \
+ *(__ksymtab_unused) \
+ *(__ksymtab_unused_gpl) \
+ *(__ksymtab_gpl_future) \
+ *(__kcrctab) \
+ *(__kcrctab_gpl) \
+ *(__kcrctab_unused) \
+ *(__kcrctab_unused_gpl) \
+ *(__kcrctab_gpl_future) \
+ *(__ksymtab_strings) \
}
/**
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 1feed71551c9..e21019492453 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -361,6 +361,7 @@ unifdef-y += uio.h
unifdef-y += unistd.h
unifdef-y += usbdevice_fs.h
unifdef-y += utsname.h
+unifdef-y += vhost.h
unifdef-y += videodev2.h
unifdef-y += videodev.h
unifdef-y += virtio_config.h
diff --git a/include/linux/bsearch.h b/include/linux/bsearch.h
new file mode 100644
index 000000000000..90b1aa867224
--- /dev/null
+++ b/include/linux/bsearch.h
@@ -0,0 +1,9 @@
+#ifndef _LINUX_BSEARCH_H
+#define _LINUX_BSEARCH_H
+
+#include <linux/types.h>
+
+void *bsearch(const void *key, const void *base, size_t num, size_t size,
+ int (*cmp)(const void *key, const void *elt));
+
+#endif /* _LINUX_BSEARCH_H */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 557bdad320b6..f53e9b868c26 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -220,7 +220,7 @@ static inline enum zone_type gfp_zone(gfp_t flags)
((1 << ZONES_SHIFT) - 1);
if (__builtin_constant_p(bit))
- MAYBE_BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
+ BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
else {
#ifdef CONFIG_DEBUG_VM
BUG_ON((GFP_ZONE_BAD >> bit) & 1);
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 3f5fd523b49d..404abe00162c 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -86,4 +86,18 @@ struct tun_filter {
__u8 addr[0][ETH_ALEN];
};
+#ifdef __KERNEL__
+#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
+struct socket *tun_get_socket(struct file *);
+#else
+#include <linux/err.h>
+#include <linux/errno.h>
+struct file;
+struct socket;
+static inline struct socket *tun_get_socket(struct file *f)
+{
+ return ERR_PTR(-EINVAL);
+}
+#endif /* CONFIG_TUN */
+#endif /* __KERNEL__ */
#endif /* __IF_TUN_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index f4e3184fa054..be3018d8a6fc 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -683,12 +683,6 @@ struct sysinfo {
char _f[20-2*sizeof(long)-sizeof(int)]; /* Padding: libc5 uses this.. */
};
-/* Force a compilation error if condition is true */
-#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition))
-
-/* Force a compilation error if condition is constant and true */
-#define MAYBE_BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2 * !!(cond)]))
-
/* Force a compilation error if condition is true, but also produce a
result (of value 0 and type size_t), so the expression can be used
e.g. in a structure initializer (or where-ever else comma expressions
@@ -696,6 +690,32 @@ struct sysinfo {
#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
+/**
+ * BUILD_BUG_ON - break compile if a condition is true.
+ * @cond: the condition which the compiler should know is false.
+ *
+ * If you have some code which relies on certain constants being equal, or
+ * other compile-time-evaluated condition, you should use BUILD_BUG_ON to
+ * detect if someone changes it.
+ *
+ * The implementation uses gcc's reluctance to create a negative array, but
+ * gcc (as of 4.4) only emits that error for obvious cases (eg. not arguments
+ * to inline functions). So as a fallback we use the optimizer; if it can't
+ * prove the condition is false, it will cause a link error on the undefined
+ * "__build_bug_on_failed". This error message can be harder to track down
+ * though, hence the two different methods.
+ */
+#ifndef __OPTIMIZE__
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#else
+extern int __build_bug_on_failed;
+#define BUILD_BUG_ON(condition) \
+ do { \
+ ((void)sizeof(char[1 - 2*!!(condition)])); \
+ if (condition) __build_bug_on_failed = 1; \
+ } while(0)
+#endif
+
/* Trap pasters of __FUNCTION__ at compile-time */
#define __FUNCTION__ (__func__)
diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index e880d4cf9e22..136cdcdf92ef 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -152,7 +152,7 @@ static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
\
_n = (long) &((ptr)->name##_end) \
- (long) &((ptr)->name##_begin); \
- MAYBE_BUILD_BUG_ON(_n < 0); \
+ BUILD_BUG_ON(_n < 0); \
\
kmemcheck_mark_initialized(&((ptr)->name##_begin), _n); \
} while (0)
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index adaf3c15e449..8b5f7cc0fba6 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -30,6 +30,7 @@
#define HPET_MINOR 228
#define FUSE_MINOR 229
#define KVM_MINOR 232
+#define VHOST_NET_MINOR 233
#define MISC_DYNAMIC_MINOR 255
struct device;
diff --git a/include/linux/mod_export.h b/include/linux/mod_export.h
new file mode 100644
index 000000000000..394795e7f03d
--- /dev/null
+++ b/include/linux/mod_export.h
@@ -0,0 +1,149 @@
+#ifndef LINUX_MOD_EXPORT_H
+#define LINUX_MOD_EXPORT_H
+/*
+ * Define EXPORT_SYMBOL() and friends for kernel modules.
+ *
+ * Under __GENKSYMS__ these definitions are skipped, making it possible to
+ * scan for EXPORT_SYMBOL() in preprocessed C files.
+ *
+ * Under __MODPOST_EXPORTS__ we skip C definitions and define __EXPORT_SYMBOL()
+ * in arch-independent assembly code. This makes it possible to construct
+ * sorted symbol tables.
+ */
+
+#ifndef __GENKSYMS__
+#ifndef __MODPOST_EXPORTS__
+
+#include <linux/compiler.h>
+
+/* Some toolchains use a `_' prefix for all user symbols. */
+#ifdef CONFIG_SYMBOL_PREFIX
+#define MODULE_SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX
+#else
+#define MODULE_SYMBOL_PREFIX ""
+#endif
+
+#ifdef CONFIG_MODULES
+
+struct kernel_symbol {
+ unsigned long value;
+ const char *name;
+};
+
+#ifdef CONFIG_MODVERSIONS
+/* Mark the CRC weak since genksyms apparently decides not to
+ * generate a checksums for some symbols */
+#define __CRC_SYMBOL(sym, sec) \
+ extern void *__crc_##sym __attribute__((weak)); \
+ static const unsigned long __kcrctab_##sym \
+ __used \
+ __attribute__((section("__kcrctab" sec), unused)) \
+ = (unsigned long) &__crc_##sym;
+#else
+#define __CRC_SYMBOL(sym, sec)
+#endif
+
+/* For every exported symbol, place a struct in the __ksymtab section */
+#define __EXPORT_SYMBOL(sym, sec) \
+ extern typeof(sym) sym; \
+ __CRC_SYMBOL(sym, sec) \
+ static const char __kstrtab_##sym[] \
+ __attribute__((section("__ksymtab_strings"), aligned(1))) \
+ = MODULE_SYMBOL_PREFIX #sym; \
+ static const struct kernel_symbol __ksymtab_##sym \
+ __used \
+ __attribute__((section("__ksymtab" sec), unused)) \
+ = { (unsigned long)&sym, __kstrtab_##sym }
+
+#define EXPORT_SYMBOL(sym) \
+ __EXPORT_SYMBOL(sym, "")
+
+#define EXPORT_SYMBOL_GPL(sym) \
+ __EXPORT_SYMBOL(sym, "_gpl")
+
+#define EXPORT_SYMBOL_GPL_FUTURE(sym) \
+ __EXPORT_SYMBOL(sym, "_gpl_future")
+
+#ifdef CONFIG_UNUSED_SYMBOLS
+#define EXPORT_UNUSED_SYMBOL(sym) __EXPORT_SYMBOL(sym, "_unused")
+#define EXPORT_UNUSED_SYMBOL_GPL(sym) __EXPORT_SYMBOL(sym, "_unused_gpl")
+#else
+#define EXPORT_UNUSED_SYMBOL(sym)
+#define EXPORT_UNUSED_SYMBOL_GPL(sym)
+#endif
+
+#else /* !CONFIG_MODULES */
+
+#define EXPORT_SYMBOL(sym)
+#define EXPORT_SYMBOL_GPL(sym)
+#define EXPORT_SYMBOL_GPL_FUTURE(sym)
+#define EXPORT_UNUSED_SYMBOL(sym)
+#define EXPORT_UNUSED_SYMBOL_GPL(sym)
+
+#endif /* CONFIG_MODULES */
+
+#else /* __MODPOST_EXPORTS__ */
+/*
+ * Here is the arch-independent assembly version, used in .tmp_exports-asm.S.
+ *
+ * We use CPP macros since they are more familiar than assembly macros.
+ * Note that CPP macros eat newlines, so each pseudo-instruction must be
+ * terminated by a semicolon.
+ */
+
+#include <asm/bitsperlong.h>
+#include <linux/stringify.h>
+
+#if BITS_PER_LONG == 64
+#define PTR .quad
+#define ALGN .balign 8
+#else
+#define PTR .long
+#define ALGN .balign 4
+#endif
+
+/* build system gives us an unstringified version of CONFIG_SYMBOL_PREFIX */
+#ifndef SYMBOL_PREFIX
+#define SYM(sym) sym
+#else
+#define PASTE2(x,y) x##y
+#define PASTE(x,y) PASTE2(x,y)
+#define SYM(sym) PASTE(SYMBOL_PREFIX, sym)
+#endif
+
+
+#ifdef CONFIG_MODVERSIONS
+#define __CRC_SYMBOL(sym, crcsec) \
+ .globl SYM(__crc_##sym); \
+ .weak SYM(__crc_##sym); \
+ .pushsection crcsec, "a"; \
+ ALGN; \
+ SYM(__kcrctab_##sym): \
+ PTR SYM(__crc_##sym); \
+ .popsection;
+#else
+#define __CRC_SYMBOL(sym, section)
+#endif
+
+#define __EXPORT_SYMBOL(sym, sec, strsec, crcsec) \
+ .globl SYM(sym); \
+ \
+ __CRC_SYMBOL(sym, crcsec) \
+ \
+ .pushsection strsec, "a"; \
+ SYM(__kstrtab_##sym): \
+ .asciz __stringify(SYM(sym)); \
+ .popsection; \
+ \
+ .pushsection sec, "a"; \
+ ALGN; \
+ SYM(__ksymtab_##sym): \
+ PTR SYM(sym); \
+ PTR SYM(__kstrtab_##sym); \
+ .popsection;
+
+#endif /* __MODPOST_EXPORTS__ */
+
+#endif /* __GENKSYMS__ */
+
+#endif /* LINUX_MOD_EXPORT_H */
diff --git a/include/linux/module.h b/include/linux/module.h
index 482efc865acf..0bb0f7464ff0 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -15,6 +15,7 @@
#include <linux/stringify.h>
#include <linux/kobject.h>
#include <linux/moduleparam.h>
+#include <linux/mod_export.h>
#include <linux/tracepoint.h>
#include <asm/local.h>
@@ -25,19 +26,8 @@
/* Not Yet Implemented */
#define MODULE_SUPPORTED_DEVICE(name)
-/* some toolchains uses a `_' prefix for all user symbols */
-#ifndef MODULE_SYMBOL_PREFIX
-#define MODULE_SYMBOL_PREFIX ""
-#endif
-
#define MODULE_NAME_LEN MAX_PARAM_PREFIX_LEN
-struct kernel_symbol
-{
- unsigned long value;
- const char *name;
-};
-
struct modversion_info
{
unsigned long crc;
@@ -178,51 +168,61 @@ void *__symbol_get(const char *symbol);
void *__symbol_get_gpl(const char *symbol);
#define symbol_get(x) ((typeof(&x))(__symbol_get(MODULE_SYMBOL_PREFIX #x)))
-#ifndef __GENKSYMS__
-#ifdef CONFIG_MODVERSIONS
-/* Mark the CRC weak since genksyms apparently decides not to
- * generate a checksums for some symbols */
-#define __CRC_SYMBOL(sym, sec) \
- extern void *__crc_##sym __attribute__((weak)); \
- static const unsigned long __kcrctab_##sym \
- __used \
- __attribute__((section("__kcrctab" sec), unused)) \
- = (unsigned long) &__crc_##sym;
-#else
-#define __CRC_SYMBOL(sym, sec)
-#endif
+#ifdef CONFIG_UNUSED_SYMBOLS
+enum export_type {
+ /* GPL-only exported symbols. */
+ __EXPORT_FLAG_GPL_ONLY = 0x1,
-/* For every exported symbol, place a struct in the __ksymtab section */
-#define __EXPORT_SYMBOL(sym, sec) \
- extern typeof(sym) sym; \
- __CRC_SYMBOL(sym, sec) \
- static const char __kstrtab_##sym[] \
- __attribute__((section("__ksymtab_strings"), aligned(1))) \
- = MODULE_SYMBOL_PREFIX #sym; \
- static const struct kernel_symbol __ksymtab_##sym \
- __used \
- __attribute__((section("__ksymtab" sec), unused)) \
- = { (unsigned long)&sym, __kstrtab_##sym }
+ /* unused exported symbols. */
+ __EXPORT_FLAG_UNUSED = 0x2,
-#define EXPORT_SYMBOL(sym) \
- __EXPORT_SYMBOL(sym, "")
+ /* exports that will be GPL-only in the near future. */
+ __EXPORT_FLAG_GPL_FUTURE = 0x4,
-#define EXPORT_SYMBOL_GPL(sym) \
- __EXPORT_SYMBOL(sym, "_gpl")
+ EXPORT_TYPE_PLAIN = 0x0,
+ EXPORT_TYPE_GPL = 0x1,
+ EXPORT_TYPE_UNUSED = 0x2,
+ EXPORT_TYPE_UNUSED_GPL = 0x3,
+ EXPORT_TYPE_GPL_FUTURE = 0x4,
-#define EXPORT_SYMBOL_GPL_FUTURE(sym) \
- __EXPORT_SYMBOL(sym, "_gpl_future")
+ EXPORT_TYPE_MAX
+};
+#else /* !CONFIG_UNUSED_EXPORTS */
+enum export_type {
+ __EXPORT_FLAG_UNUSED = 0x0,
+ __EXPORT_FLAG_GPL_ONLY = 0x1,
+ __EXPORT_FLAG_GPL_FUTURE = 0x2,
-#ifdef CONFIG_UNUSED_SYMBOLS
-#define EXPORT_UNUSED_SYMBOL(sym) __EXPORT_SYMBOL(sym, "_unused")
-#define EXPORT_UNUSED_SYMBOL_GPL(sym) __EXPORT_SYMBOL(sym, "_unused_gpl")
-#else
-#define EXPORT_UNUSED_SYMBOL(sym)
-#define EXPORT_UNUSED_SYMBOL_GPL(sym)
+ EXPORT_TYPE_PLAIN = 0x0,
+ EXPORT_TYPE_GPL = 0x1,
+ EXPORT_TYPE_GPL_FUTURE = 0x2,
+ EXPORT_TYPE_MAX
+};
#endif
+static inline bool export_is_gpl_only(enum export_type type)
+{
+ return (type & __EXPORT_FLAG_GPL_ONLY);
+}
+
+static inline bool export_is_unused(enum export_type type)
+{
+ return (type & __EXPORT_FLAG_UNUSED);
+}
+
+static inline bool export_is_gpl_future(enum export_type type)
+{
+ return (type & __EXPORT_FLAG_GPL_FUTURE);
+}
+
+struct ksymtab {
+ const struct kernel_symbol *syms;
+#ifdef CONFIG_MODVERSIONS
+ const unsigned long *crcs;
#endif
+ unsigned int num_syms;
+};
enum module_state
{
@@ -249,36 +249,12 @@ struct module
struct kobject *holders_dir;
/* Exported symbols */
- const struct kernel_symbol *syms;
- const unsigned long *crcs;
- unsigned int num_syms;
+ struct ksymtab syms[EXPORT_TYPE_MAX];
/* Kernel parameters. */
struct kernel_param *kp;
unsigned int num_kp;
- /* GPL-only exported symbols. */
- unsigned int num_gpl_syms;
- const struct kernel_symbol *gpl_syms;
- const unsigned long *gpl_crcs;
-
-#ifdef CONFIG_UNUSED_SYMBOLS
- /* unused exported symbols. */
- const struct kernel_symbol *unused_syms;
- const unsigned long *unused_crcs;
- unsigned int num_unused_syms;
-
- /* GPL-only, unused exported symbols. */
- unsigned int num_unused_gpl_syms;
- const struct kernel_symbol *unused_gpl_syms;
- const unsigned long *unused_gpl_crcs;
-#endif
-
- /* symbols that will be GPL-only in the near future. */
- const struct kernel_symbol *gpl_future_syms;
- const unsigned long *gpl_future_crcs;
- unsigned int num_gpl_future_syms;
-
/* Exception table */
unsigned int num_exentries;
struct exception_table_entry *extable;
@@ -408,17 +384,6 @@ static inline int within_module_init(unsigned long addr, struct module *mod)
/* Search for module by name: must hold module_mutex. */
struct module *find_module(const char *name);
-struct symsearch {
- const struct kernel_symbol *start, *stop;
- const unsigned long *crcs;
- enum {
- NOT_GPL_ONLY,
- GPL_ONLY,
- WILL_BE_GPL_ONLY,
- } licence;
- bool unused;
-};
-
/* Search for an exported symbol by name. */
const struct kernel_symbol *find_symbol(const char *name,
struct module **owner,
@@ -426,9 +391,14 @@ const struct kernel_symbol *find_symbol(const char *name,
bool gplok,
bool warn);
+typedef bool each_symbol_fn_t (enum export_type type,
+ const struct kernel_symbol *sym,
+ const unsigned long *crc,
+ struct module *owner,
+ void *data);
+
/* Walk the exported symbol table */
-bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner,
- unsigned int symnum, void *data), void *data);
+bool each_symbol(each_symbol_fn_t *fn, void *data);
/* Returns 0 and fills in value, defined and namebuf, or -ERANGE if
symnum out of range. */
@@ -541,11 +511,6 @@ extern void module_update_tracepoints(void);
extern int module_get_iter_tracepoints(struct tracepoint_iter *iter);
#else /* !CONFIG_MODULES... */
-#define EXPORT_SYMBOL(sym)
-#define EXPORT_SYMBOL_GPL(sym)
-#define EXPORT_SYMBOL_GPL_FUTURE(sym)
-#define EXPORT_UNUSED_SYMBOL(sym)
-#define EXPORT_UNUSED_SYMBOL_GPL(sym)
/* Given an address, look for it in the exception tables. */
static inline const struct exception_table_entry *
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 82a9124f7d75..893549c04265 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -31,20 +31,23 @@ static const char __module_cat(name,__LINE__)[] \
struct kernel_param;
-/* Returns 0, or -errno. arg is in kp->arg. */
-typedef int (*param_set_fn)(const char *val, struct kernel_param *kp);
-/* Returns length written or -errno. Buffer is 4k (ie. be short!) */
-typedef int (*param_get_fn)(char *buffer, struct kernel_param *kp);
+struct kernel_param_ops {
+ /* Returns 0, or -errno. arg is in kp->arg. */
+ int (*set)(const char *val, const struct kernel_param *kp);
+ /* Returns length written or -errno. Buffer is 4k (ie. be short!) */
+ int (*get)(char *buffer, const struct kernel_param *kp);
+ /* Optional function to free kp->arg when module unloaded. */
+ void (*free)(void *arg);
+};
/* Flag bits for kernel_param.flags */
#define KPARAM_ISBOOL 2
struct kernel_param {
const char *name;
+ const struct kernel_param_ops *ops;
u16 perm;
u16 flags;
- param_set_fn set;
- param_get_fn get;
union {
void *arg;
const struct kparam_string *str;
@@ -63,12 +66,67 @@ struct kparam_array
{
unsigned int max;
unsigned int *num;
- param_set_fn set;
- param_get_fn get;
+ const struct kernel_param_ops *ops;
unsigned int elemsize;
void *elem;
};
+/**
+ * module_param - typesafe helper for a module/cmdline parameter
+ * @value: the variable to alter, and exposed parameter name.
+ * @type: the type of the parameter
+ * @perm: visibility in sysfs.
+ *
+ * @value becomes the module parameter, or (prefixed by KBUILD_MODNAME and a
+ * ".") the kernel commandline parameter. Note that - is changed to _, so
+ * the user can use "foo-bar=1" even for variable "foo_bar".
+ *
+ * @perm is 0 if the the variable is not to appear in sysfs, or 0444
+ * for world-readable, 0644 for root-writable, etc. Note that if it
+ * is writable, you may need to use kparam_block_sysfs_write() around
+ * accesses (esp. charp, which can be kfreed when it changes).
+ *
+ * The @type is simply pasted to refer to a param_ops_##type and a
+ * param_check_##type: for convenience many standard types are provided but
+ * you can create your own by defining those variables.
+ *
+ * Standard types are:
+ * byte, short, ushort, int, uint, long, ulong
+ * charp: a character pointer
+ * bool: a bool, values 0/1, y/n, Y/N.
+ * invbool: the above, only sense-reversed (N = true).
+ */
+#define module_param(name, type, perm) \
+ module_param_named(name, name, type, perm)
+
+/**
+ * module_param_named - typesafe helper for a renamed module/cmdline parameter
+ * @name: a valid C identifier which is the parameter name.
+ * @value: the actual lvalue to alter.
+ * @type: the type of the parameter
+ * @perm: visibility in sysfs.
+ *
+ * Usually it's a good idea to have variable names and user-exposed names the
+ * same, but that's harder if the variable must be non-static or is inside a
+ * structure. This allows exposure under a different name.
+ */
+#define module_param_named(name, value, type, perm) \
+ param_check_##type(name, &(value)); \
+ module_param_cb(name, &param_ops_##type, &value, perm); \
+ __MODULE_PARM_TYPE(name, #type)
+
+/**
+ * module_param_cb - general callback for a module/cmdline parameter
+ * @name: a valid C identifier which is the parameter name.
+ * @ops: the set & get operations for this parameter.
+ * @perm: visibility in sysfs.
+ *
+ * The ops can have NULL set or get functions.
+ */
+#define module_param_cb(name, ops, arg, perm) \
+ __module_param_call(MODULE_PARAM_PREFIX, \
+ name, ops, arg, __same_type(*(arg), bool), perm)
+
/* On alpha, ia64 and ppc64 relocations to global data cannot go into
read-only sections (which is part of respective UNIX ABI on these
platforms). So 'const' makes no sense and even causes compile failures
@@ -80,10 +138,8 @@ struct kparam_array
#endif
/* This is the fundamental function for registering boot/module
- parameters. perm sets the visibility in sysfs: 000 means it's
- not there, read bits mean it's readable, write bits mean it's
- writable. */
-#define __module_param_call(prefix, name, set, get, arg, isbool, perm) \
+ parameters. */
+#define __module_param_call(prefix, name, ops, arg, isbool, perm) \
/* Default value instead of permissions? */ \
static int __param_perm_check_##name __attribute__((unused)) = \
BUILD_BUG_ON_ZERO((perm) < 0 || (perm) > 0777 || ((perm) & 2)) \
@@ -92,31 +148,87 @@ struct kparam_array
static struct kernel_param __moduleparam_const __param_##name \
__used \
__attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \
- = { __param_str_##name, perm, isbool ? KPARAM_ISBOOL : 0, \
- set, get, { arg } }
+ = { __param_str_##name, ops, perm, isbool ? KPARAM_ISBOOL : 0, \
+ { arg } }
+
+/* Obsolete - use module_param_cb() */
+#define module_param_call(name, set, get, arg, perm) \
+ static struct kernel_param_ops __param_ops_##name = \
+ { (void *)set, (void *)get }; \
+ __module_param_call(MODULE_PARAM_PREFIX, \
+ name, &__param_ops_##name, arg, \
+ __same_type(*(arg), bool), \
+ (perm) + sizeof(__check_old_set_param(set))*0)
+
+/* We don't get oldget: it's often a new-style param_get_uint, etc. */
+static inline int
+__check_old_set_param(int (*oldset)(const char *, struct kernel_param *))
+{
+ return 0;
+}
-#define module_param_call(name, set, get, arg, perm) \
- __module_param_call(MODULE_PARAM_PREFIX, \
- name, set, get, arg, \
- __same_type(*(arg), bool), perm)
+/**
+ * kparam_block_sysfs_write - make sure a parameter isn't written via sysfs.
+ * @name: the name of the parameter
+ *
+ * There's no point blocking write on a paramter that isn't writable via sysfs!
+ */
+#define kparam_block_sysfs_write(name) \
+ do { \
+ BUG_ON(!(__param_##name.perm & 0222)); \
+ __kernel_param_lock(); \
+ } while (0)
-/* Helper functions: type is byte, short, ushort, int, uint, long,
- ulong, charp, bool or invbool, or XXX if you define param_get_XXX,
- param_set_XXX and param_check_XXX. */
-#define module_param_named(name, value, type, perm) \
- param_check_##type(name, &(value)); \
- module_param_call(name, param_set_##type, param_get_##type, &value, perm); \
- __MODULE_PARM_TYPE(name, #type)
+/**
+ * kparam_unblock_sysfs_write - allows sysfs to write to a parameter again.
+ * @name: the name of the parameter
+ */
+#define kparam_unblock_sysfs_write(name) \
+ do { \
+ BUG_ON(!(__param_##name.perm & 0222)); \
+ __kernel_param_unlock(); \
+ } while (0)
-#define module_param(name, type, perm) \
- module_param_named(name, name, type, perm)
+/**
+ * kparam_block_sysfs_read - make sure a parameter isn't read via sysfs.
+ * @name: the name of the parameter
+ *
+ * This also blocks sysfs writes.
+ */
+#define kparam_block_sysfs_read(name) \
+ do { \
+ BUG_ON(!(__param_##name.perm & 0444)); \
+ __kernel_param_lock(); \
+ } while (0)
+
+/**
+ * kparam_unblock_sysfs_read - allows sysfs to read a parameter again.
+ * @name: the name of the parameter
+ */
+#define kparam_unblock_sysfs_read(name) \
+ do { \
+ BUG_ON(!(__param_##name.perm & 0444)); \
+ __kernel_param_unlock(); \
+ } while (0)
+
+#ifdef CONFIG_SYSFS
+extern void __kernel_param_lock(void);
+extern void __kernel_param_unlock(void);
+#else
+static inline void __kernel_param_lock(void)
+{
+}
+static inline void __kernel_param_unlock(void)
+{
+}
+#endif
#ifndef MODULE
/**
* core_param - define a historical core kernel parameter.
* @name: the name of the cmdline and sysfs parameter (often the same as var)
* @var: the variable
- * @type: the type (for param_set_##type and param_get_##type)
+ * @type: the type of the parameter
* @perm: visibility in sysfs
*
* core_param is just like module_param(), but cannot be modular and
@@ -126,23 +238,32 @@ struct kparam_array
*/
#define core_param(name, var, type, perm) \
param_check_##type(name, &(var)); \
- __module_param_call("", name, param_set_##type, param_get_##type, \
+ __module_param_call("", name, &param_ops_##type, \
&var, __same_type(var, bool), perm)
#endif /* !MODULE */
-/* Actually copy string: maxlen param is usually sizeof(string). */
+/**
+ * module_param_string - a char array parameter
+ * @name: the name of the parameter
+ * @string: the string variable
+ * @len: the maximum length of the string, incl. terminator
+ * @perm: visibility in sysfs.
+ *
+ * This actually copies the string when it's set (unlike type charp).
+ * @len is usually just sizeof(string).
+ */
#define module_param_string(name, string, len, perm) \
static const struct kparam_string __param_string_##name \
= { len, string }; \
__module_param_call(MODULE_PARAM_PREFIX, name, \
- param_set_copystring, param_get_string, \
+ &param_ops_string, \
.str = &__param_string_##name, 0, perm); \
__MODULE_PARM_TYPE(name, "string")
/* Called on module insert or kernel boot */
extern int parse_args(const char *name,
char *args,
- struct kernel_param *params,
+ const struct kernel_param *params,
unsigned num,
int (*unknown)(char *param, char *val));
@@ -162,41 +283,50 @@ static inline void destroy_params(const struct kernel_param *params,
#define __param_check(name, p, type) \
static inline type *__check_##name(void) { return(p); }
-extern int param_set_byte(const char *val, struct kernel_param *kp);
-extern int param_get_byte(char *buffer, struct kernel_param *kp);
+extern struct kernel_param_ops param_ops_byte;
+extern int param_set_byte(const char *val, const struct kernel_param *kp);
+extern int param_get_byte(char *buffer, const struct kernel_param *kp);
#define param_check_byte(name, p) __param_check(name, p, unsigned char)
-extern int param_set_short(const char *val, struct kernel_param *kp);
-extern int param_get_short(char *buffer, struct kernel_param *kp);
+extern struct kernel_param_ops param_ops_short;
+extern int param_set_short(const char *val, const struct kernel_param *kp);
+extern int param_get_short(char *buffer, const struct kernel_param *kp);
#define param_check_short(name, p) __param_check(name, p, short)
-extern int param_set_ushort(const char *val, struct kernel_param *kp);
-extern int param_get_ushort(char *buffer, struct kernel_param *kp);
+extern struct kernel_param_ops param_ops_ushort;
+extern int param_set_ushort(const char *val, const struct kernel_param *kp);
+extern int param_get_ushort(char *buffer, const struct kernel_param *kp);
#define param_check_ushort(name, p) __param_check(name, p, unsigned short)
-extern int param_set_int(const char *val, struct kernel_param *kp);
-extern int param_get_int(char *buffer, struct kernel_param *kp);
+extern struct kernel_param_ops param_ops_int;
+extern int param_set_int(const char *val, const struct kernel_param *kp);
+extern int param_get_int(char *buffer, const struct kernel_param *kp);
#define param_check_int(name, p) __param_check(name, p, int)
-extern int param_set_uint(const char *val, struct kernel_param *kp);
-extern int param_get_uint(char *buffer, struct kernel_param *kp);
+extern struct kernel_param_ops param_ops_uint;
+extern int param_set_uint(const char *val, const struct kernel_param *kp);
+extern int param_get_uint(char *buffer, const struct kernel_param *kp);
#define param_check_uint(name, p) __param_check(name, p, unsigned int)
-extern int param_set_long(const char *val, struct kernel_param *kp);
-extern int param_get_long(char *buffer, struct kernel_param *kp);
+extern struct kernel_param_ops param_ops_long;
+extern int param_set_long(const char *val, const struct kernel_param *kp);
+extern int param_get_long(char *buffer, const struct kernel_param *kp);
#define param_check_long(name, p) __param_check(name, p, long)
-extern int param_set_ulong(const char *val, struct kernel_param *kp);
-extern int param_get_ulong(char *buffer, struct kernel_param *kp);
+extern struct kernel_param_ops param_ops_ulong;
+extern int param_set_ulong(const char *val, const struct kernel_param *kp);
+extern int param_get_ulong(char *buffer, const struct kernel_param *kp);
#define param_check_ulong(name, p) __param_check(name, p, unsigned long)
-extern int param_set_charp(const char *val, struct kernel_param *kp);
-extern int param_get_charp(char *buffer, struct kernel_param *kp);
+extern struct kernel_param_ops param_ops_charp;
+extern int param_set_charp(const char *val, const struct kernel_param *kp);
+extern int param_get_charp(char *buffer, const struct kernel_param *kp);
#define param_check_charp(name, p) __param_check(name, p, char *)
/* For historical reasons "bool" parameters can be (unsigned) "int". */
-extern int param_set_bool(const char *val, struct kernel_param *kp);
-extern int param_get_bool(char *buffer, struct kernel_param *kp);
+extern struct kernel_param_ops param_ops_bool;
+extern int param_set_bool(const char *val, const struct kernel_param *kp);
+extern int param_get_bool(char *buffer, const struct kernel_param *kp);
#define param_check_bool(name, p) \
static inline void __check_##name(void) \
{ \
@@ -205,29 +335,53 @@ extern int param_get_bool(char *buffer, struct kernel_param *kp);
!__same_type(*(p), int)); \
}
-extern int param_set_invbool(const char *val, struct kernel_param *kp);
-extern int param_get_invbool(char *buffer, struct kernel_param *kp);
+extern struct kernel_param_ops param_ops_invbool;
+extern int param_set_invbool(const char *val, const struct kernel_param *kp);
+extern int param_get_invbool(char *buffer, const struct kernel_param *kp);
#define param_check_invbool(name, p) __param_check(name, p, bool)
-/* Comma-separated array: *nump is set to number they actually specified. */
+/**
+ * module_param_array - a parameter which is an array of some type
+ * @name: the name of the array variable
+ * @type: the type, as per module_param()
+ * @nump: optional pointer filled in with the number written
+ * @perm: visibility in sysfs
+ *
+ * Input and output are as comma-separated values. Commas inside values
+ * don't work properly (eg. an array of charp).
+ *
+ * ARRAY_SIZE(@name) is used to determine the number of elements in the
+ * array, so the definition must be visible.
+ */
+#define module_param_array(name, type, nump, perm) \
+ module_param_array_named(name, name, type, nump, perm)
+
+/**
+ * module_param_array_named - renamed parameter which is an array of some type
+ * @name: a valid C identifier which is the parameter name
+ * @array: the name of the array variable
+ * @type: the type, as per module_param()
+ * @nump: optional pointer filled in with the number written
+ * @perm: visibility in sysfs
+ *
+ * This exposes a different name than the actual variable name. See
+ * module_param_named() for why this might be necessary.
+ */
#define module_param_array_named(name, array, type, nump, perm) \
static const struct kparam_array __param_arr_##name \
- = { ARRAY_SIZE(array), nump, param_set_##type, param_get_##type,\
+ = { ARRAY_SIZE(array), nump, &param_ops_##type, \
sizeof(array[0]), array }; \
__module_param_call(MODULE_PARAM_PREFIX, name, \
- param_array_set, param_array_get, \
+ &param_array_ops, \
.arr = &__param_arr_##name, \
__same_type(array[0], bool), perm); \
__MODULE_PARM_TYPE(name, "array of " #type)
-#define module_param_array(name, type, nump, perm) \
- module_param_array_named(name, name, type, nump, perm)
-
-extern int param_array_set(const char *val, struct kernel_param *kp);
-extern int param_array_get(char *buffer, struct kernel_param *kp);
+extern struct kernel_param_ops param_array_ops;
-extern int param_set_copystring(const char *val, struct kernel_param *kp);
-extern int param_get_string(char *buffer, struct kernel_param *kp);
+extern struct kernel_param_ops param_ops_string;
+extern int param_set_copystring(const char *val, const struct kernel_param *);
+extern int param_get_string(char *buffer, const struct kernel_param *kp);
/* for exporting parameters in /sys/parameters */
@@ -235,13 +389,13 @@ struct module;
#if defined(CONFIG_SYSFS) && defined(CONFIG_MODULES)
extern int module_param_sysfs_setup(struct module *mod,
- struct kernel_param *kparam,
+ const struct kernel_param *kparam,
unsigned int num_params);
extern void module_param_sysfs_remove(struct module *mod);
#else
static inline int module_param_sysfs_setup(struct module *mod,
- struct kernel_param *kparam,
+ const struct kernel_param *kparam,
unsigned int num_params)
{
return 0;
diff --git a/include/linux/vhost.h b/include/linux/vhost.h
new file mode 100644
index 000000000000..e847f1e30756
--- /dev/null
+++ b/include/linux/vhost.h
@@ -0,0 +1,130 @@
+#ifndef _LINUX_VHOST_H
+#define _LINUX_VHOST_H
+/* Userspace interface for in-kernel virtio accelerators. */
+
+/* vhost is used to reduce the number of system calls involved in virtio.
+ *
+ * Existing virtio net code is used in the guest without modification.
+ *
+ * This header includes interface used by userspace hypervisor for
+ * device configuration.
+ */
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/ioctl.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
+
+struct vhost_vring_state {
+ unsigned int index;
+ unsigned int num;
+};
+
+struct vhost_vring_file {
+ unsigned int index;
+ int fd; /* Pass -1 to unbind from file. */
+
+};
+
+struct vhost_vring_addr {
+ unsigned int index;
+ /* Option flags. */
+ unsigned int flags;
+ /* Flag values: */
+ /* Whether log address is valid. If set enables logging. */
+#define VHOST_VRING_F_LOG 0
+
+ /* Start of array of descriptors (virtually contiguous) */
+ __u64 desc_user_addr;
+ /* Used structure address. Must be 32 bit aligned */
+ __u64 used_user_addr;
+ /* Available structure address. Must be 16 bit aligned */
+ __u64 avail_user_addr;
+ /* Logging support. */
+ /* Log writes to used structure, at offset calculated from specified
+ * address. Address must be 32 bit aligned. */
+ __u64 log_guest_addr;
+};
+
+struct vhost_memory_region {
+ __u64 guest_phys_addr;
+ __u64 memory_size; /* bytes */
+ __u64 userspace_addr;
+ __u64 flags_padding; /* No flags are currently specified. */
+};
+
+/* All region addresses and sizes must be 4K aligned. */
+#define VHOST_PAGE_SIZE 0x1000
+
+struct vhost_memory {
+ __u32 nregions;
+ __u32 padding;
+ struct vhost_memory_region regions[0];
+};
+
+/* ioctls */
+
+#define VHOST_VIRTIO 0xAF
+
+/* Features bitmask for forward compatibility. Transport bits are used for
+ * vhost specific features. */
+#define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64)
+#define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64)
+
+/* Set current process as the (exclusive) owner of this file descriptor. This
+ * must be called before any other vhost command. Further calls to
+ * VHOST_OWNER_SET fail until VHOST_OWNER_RESET is called. */
+#define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01)
+/* Give up ownership, and reset the device to default values.
+ * Allows subsequent call to VHOST_OWNER_SET to succeed. */
+#define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02)
+
+/* Set up/modify memory layout */
+#define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory)
+
+/* Write logging setup. */
+/* Memory writes can optionally be logged by setting bit at an offset
+ * (calculated from the physical address) from specified log base.
+ * The bit is set using an atomic 32 bit operation. */
+/* Set base address for logging. */
+#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
+/* Specify an eventfd file descriptor to signal on log write. */
+#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
+
+/* Ring setup. */
+/* Set number of descriptors in ring. This parameter can not
+ * be modified while ring is running (bound to a device). */
+#define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state)
+/* Set addresses for the ring. */
+#define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr)
+/* Base value where queue looks for available descriptors */
+#define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
+/* Get accessor: reads index, writes value in num */
+#define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
+
+/* The following ioctls use eventfd file descriptors to signal and poll
+ * for events. */
+
+/* Set eventfd to poll for added buffers */
+#define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file)
+/* Set eventfd to signal when buffers have beed used */
+#define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file)
+/* Set eventfd to signal an error */
+#define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file)
+
+/* VHOST_NET specific defines */
+
+/* Attach virtio net ring to a raw socket, or tap device.
+ * The socket must be already bound to an ethernet device, this device will be
+ * used for transmit. Pass fd -1 to unbind from the socket and the transmit
+ * device. This can be used to stop the ring (e.g. for migration). */
+#define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file)
+
+/* Feature bits */
+/* Log all write descriptors. Can be changed while device is active. */
+#define VHOST_F_LOG_ALL 26
+/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */
+#define VHOST_NET_F_VIRTIO_NET_HDR 27
+
+#endif
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 0093dd7c1d6f..800617b4ddd5 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -109,7 +109,10 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
unsigned int fbit)
{
/* Did you forget to fix assumptions on max features? */
- MAYBE_BUILD_BUG_ON(fbit >= 32);
+ if (__builtin_constant_p(fbit))
+ BUILD_BUG_ON(fbit >= 32);
+ else
+ BUG_ON(fbit >= 32);
if (fbit < VIRTIO_TRANSPORT_F_START)
virtio_check_driver_offered_feature(vdev, fbit);
diff --git a/init/Kconfig b/init/Kconfig
index 9e03ef8b311e..d3a5db8fb011 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1118,6 +1118,11 @@ config BASE_SMALL
default 0 if BASE_FULL
default 1 if !BASE_FULL
+config HAVE_SYMBOL_PREFIX
+ bool
+ help
+ Some arch toolchains use a `_' prefix for all user symbols.
+
menuconfig MODULES
bool "Enable loadable module support"
help
diff --git a/init/main.c b/init/main.c
index 5988debfc505..698066ff8762 100644
--- a/init/main.c
+++ b/init/main.c
@@ -187,11 +187,11 @@ static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
static const char *panic_later, *panic_param;
-extern struct obs_kernel_param __setup_start[], __setup_end[];
+extern const struct obs_kernel_param __setup_start[], __setup_end[];
static int __init obsolete_checksetup(char *line)
{
- struct obs_kernel_param *p;
+ const struct obs_kernel_param *p;
int had_early_param = 0;
p = __setup_start;
@@ -446,7 +446,7 @@ static noinline void __init_refok rest_init(void)
/* Check for early params. */
static int __init do_early_param(char *param, char *val)
{
- struct obs_kernel_param *p;
+ const struct obs_kernel_param *p;
for (p = __setup_start; p < __setup_end; p++) {
if ((p->early && strcmp(param, p->str) == 0) ||
@@ -522,7 +522,7 @@ static void __init mm_init(void)
asmlinkage void __init start_kernel(void)
{
char * command_line;
- extern struct kernel_param __start___param[], __stop___param[];
+ extern const struct kernel_param __start___param[], __stop___param[];
smp_setup_processor_id();
diff --git a/kernel/module.c b/kernel/module.c
index 8b7d8805819d..29432fd20340 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -55,6 +55,7 @@
#include <linux/async.h>
#include <linux/percpu.h>
#include <linux/kmemleak.h>
+#include <linux/bsearch.h>
#define CREATE_TRACE_POINTS
#include <trace/events/module.h>
@@ -192,25 +193,71 @@ extern const unsigned long __start___kcrctab_unused[];
extern const unsigned long __start___kcrctab_unused_gpl[];
#endif
+static struct ksymtab ksymtab[EXPORT_TYPE_MAX];
+
+#ifdef CONFIG_MODVERSIONS
+#define init_one_ksymtab(ksymt, start, stop, crc_start) \
+ do { \
+ struct ksymtab *ks = (ksymt); \
+ ks->syms = (start); \
+ ks->num_syms = (stop) - ks->syms; \
+ ks->crcs = (crc_start); \
+ } while (0)
+#else
+#define init_one_ksymtab(ksymt, start, stop, crc_start) \
+ do { \
+ struct ksymtab *ks = (ksymt); \
+ ks->syms = (start); \
+ ks->num_syms = (stop) - ks->syms; \
+ } while (0)
+#endif
+
+static int __init init_ksymtab(void)
+{
+ init_one_ksymtab(&ksymtab[EXPORT_TYPE_PLAIN],
+ __start___ksymtab, __stop___ksymtab,
+ __start___kcrctab);
+ init_one_ksymtab(&ksymtab[EXPORT_TYPE_GPL],
+ __start___ksymtab_gpl, __stop___ksymtab_gpl,
+ __start___kcrctab_gpl);
+#ifdef CONFIG_UNUSED_EXPORTS
+ init_one_ksymtab(&ksymtab[EXPORT_TYPE_UNUSED],
+ __start___ksymtab_unused,
+ __stop___ksymtab_unused,
+ __start___kcrctab_unused);
+ init_one_ksymtab(&ksymtab[EXPORT_TYPE_UNUSED_GPL],
+ __start___ksymtab_unused_gpl,
+ __stop___ksymtab_unused_gpl,
+ __start___kcrctab_unused_gpl);
+#endif
+ init_one_ksymtab(&ksymtab[EXPORT_TYPE_GPL_FUTURE],
+ __start___ksymtab_gpl_future,
+ __stop___ksymtab_gpl_future,
+ __start___kcrctab_gpl_future);
+
+ return 0;
+}
+pure_initcall(init_ksymtab);
+
#ifndef CONFIG_MODVERSIONS
#define symversion(base, idx) NULL
#else
#define symversion(base, idx) ((base != NULL) ? ((base) + (idx)) : NULL)
#endif
-static bool each_symbol_in_section(const struct symsearch *arr,
- unsigned int arrsize,
+static bool each_symbol_in_section(const struct ksymtab syms[EXPORT_TYPE_MAX],
struct module *owner,
- bool (*fn)(const struct symsearch *syms,
- struct module *owner,
- unsigned int symnum, void *data),
+ each_symbol_fn_t *fn,
void *data)
{
- unsigned int i, j;
+ enum export_type type;
+ unsigned int i;
- for (j = 0; j < arrsize; j++) {
- for (i = 0; i < arr[j].stop - arr[j].start; i++)
- if (fn(&arr[j], owner, i, data))
+ for (type = 0; type < EXPORT_TYPE_MAX; type++) {
+ for (i = 0; i < syms[type].num_syms; i++)
+ if (fn(type, &syms[type].syms[i],
+ symversion(syms[type].crcs, i),
+ owner, data))
return true;
}
@@ -218,90 +265,113 @@ static bool each_symbol_in_section(const struct symsearch *arr,
}
/* Returns true as soon as fn returns true, otherwise false. */
-bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner,
- unsigned int symnum, void *data), void *data)
+bool each_symbol(each_symbol_fn_t *fn, void *data)
{
struct module *mod;
- const struct symsearch arr[] = {
- { __start___ksymtab, __stop___ksymtab, __start___kcrctab,
- NOT_GPL_ONLY, false },
- { __start___ksymtab_gpl, __stop___ksymtab_gpl,
- __start___kcrctab_gpl,
- GPL_ONLY, false },
- { __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future,
- __start___kcrctab_gpl_future,
- WILL_BE_GPL_ONLY, false },
-#ifdef CONFIG_UNUSED_SYMBOLS
- { __start___ksymtab_unused, __stop___ksymtab_unused,
- __start___kcrctab_unused,
- NOT_GPL_ONLY, true },
- { __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl,
- __start___kcrctab_unused_gpl,
- GPL_ONLY, true },
-#endif
- };
- if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data))
+ if (each_symbol_in_section(ksymtab, NULL, fn, data))
return true;
list_for_each_entry_rcu(mod, &modules, list) {
- struct symsearch arr[] = {
- { mod->syms, mod->syms + mod->num_syms, mod->crcs,
- NOT_GPL_ONLY, false },
- { mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms,
- mod->gpl_crcs,
- GPL_ONLY, false },
- { mod->gpl_future_syms,
- mod->gpl_future_syms + mod->num_gpl_future_syms,
- mod->gpl_future_crcs,
- WILL_BE_GPL_ONLY, false },
-#ifdef CONFIG_UNUSED_SYMBOLS
- { mod->unused_syms,
- mod->unused_syms + mod->num_unused_syms,
- mod->unused_crcs,
- NOT_GPL_ONLY, true },
- { mod->unused_gpl_syms,
- mod->unused_gpl_syms + mod->num_unused_gpl_syms,
- mod->unused_gpl_crcs,
- GPL_ONLY, true },
-#endif
- };
-
- if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data))
+ if (each_symbol_in_section(mod->syms, mod, fn, data))
return true;
}
return false;
}
EXPORT_SYMBOL_GPL(each_symbol);
-struct find_symbol_arg {
- /* Input */
- const char *name;
- bool gplok;
- bool warn;
+static int symbol_compare(const void *key, const void *elt)
+{
+ const char *str = key;
+ const struct kernel_symbol *sym = elt;
+ return strcmp(str, sym->name);
+}
- /* Output */
- struct module *owner;
- const unsigned long *crc;
+/* binary search on sorted symbols */
+static const struct kernel_symbol *find_symbol_in_kernel(
+ const char *name,
+ enum export_type *t,
+ const unsigned long **crc)
+{
+ struct kernel_symbol *sym;
+ enum export_type type;
+ unsigned int i;
+
+ for (type = 0; type < ARRAY_SIZE(ksymtab); type++) {
+ sym = bsearch(name, ksymtab[type].syms, ksymtab[type].num_syms,
+ sizeof(struct kernel_symbol), symbol_compare);
+ if (sym) {
+ i = sym - ksymtab[type].syms;
+ *crc = symversion(ksymtab[type].crcs, i);
+ *t = type;
+ return sym;
+ }
+ }
+
+ return NULL;
+}
+
+/* linear search on unsorted symbols */
+static const struct kernel_symbol *find_symbol_in_module(
+ struct module *mod,
+ const char *name,
+ enum export_type *t,
+ const unsigned long **crc)
+{
+ struct ksymtab *symtab = mod->syms;
const struct kernel_symbol *sym;
-};
+ enum export_type type;
+ unsigned int i;
-static bool find_symbol_in_section(const struct symsearch *syms,
- struct module *owner,
- unsigned int symnum, void *data)
+ for (type = 0; type < EXPORT_TYPE_MAX; type++) {
+ for (i = 0; i < symtab[type].num_syms; i++) {
+ sym = &symtab[type].syms[i];
+
+ if (strcmp(sym->name, name) == 0) {
+ *crc = symversion(symtab[type].crcs, i);
+ *t = type;
+ return sym;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+/* Find a symbol and return it, along with, (optional) crc and
+ * (optional) module which owns it */
+const struct kernel_symbol *find_symbol(const char *name,
+ struct module **owner,
+ const unsigned long **crc,
+ bool gplok,
+ bool warn)
{
- struct find_symbol_arg *fsa = data;
+ struct module *mod = NULL;
+ const struct kernel_symbol *sym;
+ enum export_type type;
+ const unsigned long *crc_value;
- if (strcmp(syms->start[symnum].name, fsa->name) != 0)
- return false;
+ sym = find_symbol_in_kernel(name, &type, &crc_value);
+ if (sym)
+ goto found;
+
+ list_for_each_entry_rcu(mod, &modules, list) {
+ sym = find_symbol_in_module(mod, name, &type, &crc_value);
+ if (sym)
+ goto found;
+ }
- if (!fsa->gplok) {
- if (syms->licence == GPL_ONLY)
- return false;
- if (syms->licence == WILL_BE_GPL_ONLY && fsa->warn) {
+ DEBUGP("Failed to find symbol %s\n", name);
+ return NULL;
+
+found:
+ if (!gplok) {
+ if (export_is_gpl_only(type))
+ return NULL;
+ if (export_is_gpl_future(type) && warn) {
printk(KERN_WARNING "Symbol %s is being used "
"by a non-GPL module, which will not "
- "be allowed in the future\n", fsa->name);
+ "be allowed in the future\n", name);
printk(KERN_WARNING "Please see the file "
"Documentation/feature-removal-schedule.txt "
"in the kernel source tree for more details.\n");
@@ -309,9 +379,9 @@ static bool find_symbol_in_section(const struct symsearch *syms,
}
#ifdef CONFIG_UNUSED_SYMBOLS
- if (syms->unused && fsa->warn) {
+ if (export_is_unused(type) && warn) {
printk(KERN_WARNING "Symbol %s is marked as UNUSED, "
- "however this module is using it.\n", fsa->name);
+ "however this module is using it.\n", name);
printk(KERN_WARNING
"This symbol will go away in the future.\n");
printk(KERN_WARNING
@@ -322,36 +392,11 @@ static bool find_symbol_in_section(const struct symsearch *syms,
}
#endif
- fsa->owner = owner;
- fsa->crc = symversion(syms->crcs, symnum);
- fsa->sym = &syms->start[symnum];
- return true;
-}
-
-/* Find a symbol and return it, along with, (optional) crc and
- * (optional) module which owns it */
-const struct kernel_symbol *find_symbol(const char *name,
- struct module **owner,
- const unsigned long **crc,
- bool gplok,
- bool warn)
-{
- struct find_symbol_arg fsa;
-
- fsa.name = name;
- fsa.gplok = gplok;
- fsa.warn = warn;
-
- if (each_symbol(find_symbol_in_section, &fsa)) {
- if (owner)
- *owner = fsa.owner;
- if (crc)
- *crc = fsa.crc;
- return fsa.sym;
- }
-
- DEBUGP("Failed to find symbol %s\n", name);
- return NULL;
+ if (owner)
+ *owner = mod;
+ if (crc)
+ *crc = crc_value;
+ return sym;
}
EXPORT_SYMBOL_GPL(find_symbol);
@@ -1030,6 +1075,16 @@ static int try_to_force_load(struct module *mod, const char *reason)
}
#ifdef CONFIG_MODVERSIONS
+static const char *crc_section_names[] = {
+ [EXPORT_TYPE_PLAIN] = "__kcrctab",
+ [EXPORT_TYPE_GPL] = "__kcrctab_gpl",
+#ifdef CONFIG_UNUSED_SYMBOLS
+ [EXPORT_TYPE_UNUSED] = "__kcrctab_unused",
+ [EXPORT_TYPE_UNUSED_GPL] = "__kcrctab_unused_gpl",
+#endif
+ [EXPORT_TYPE_GPL_FUTURE] = "__kcrctab_gpl_future",
+};
+
static int check_version(Elf_Shdr *sechdrs,
unsigned int versindex,
const char *symname,
@@ -1564,23 +1619,13 @@ EXPORT_SYMBOL_GPL(__symbol_get);
static int verify_export_symbols(struct module *mod)
{
unsigned int i;
- struct module *owner;
+ enum export_type type;
const struct kernel_symbol *s;
- struct {
- const struct kernel_symbol *sym;
- unsigned int num;
- } arr[] = {
- { mod->syms, mod->num_syms },
- { mod->gpl_syms, mod->num_gpl_syms },
- { mod->gpl_future_syms, mod->num_gpl_future_syms },
-#ifdef CONFIG_UNUSED_SYMBOLS
- { mod->unused_syms, mod->num_unused_syms },
- { mod->unused_gpl_syms, mod->num_unused_gpl_syms },
-#endif
- };
+ struct module *owner;
- for (i = 0; i < ARRAY_SIZE(arr); i++) {
- for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) {
+ for (type = 0; type < EXPORT_TYPE_MAX; type++) {
+ for (i = 0; i < mod->syms[type].num_syms; i++) {
+ s = &mod->syms[type].syms[i];
if (find_symbol(s->name, &owner, NULL, true, false)) {
printk(KERN_ERR
"%s: exports duplicate symbol %s"
@@ -1810,27 +1855,19 @@ static void free_modinfo(struct module *mod)
#ifdef CONFIG_KALLSYMS
-/* lookup symbol in given range of kernel_symbols */
-static const struct kernel_symbol *lookup_symbol(const char *name,
- const struct kernel_symbol *start,
- const struct kernel_symbol *stop)
-{
- const struct kernel_symbol *ks = start;
- for (; ks < stop; ks++)
- if (strcmp(ks->name, name) == 0)
- return ks;
- return NULL;
-}
-
static int is_exported(const char *name, unsigned long value,
- const struct module *mod)
+ struct module *mod)
{
- const struct kernel_symbol *ks;
- if (!mod)
- ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab);
+ const struct kernel_symbol *sym;
+ enum export_type type;
+ const unsigned long *crc;
+
+ if (mod)
+ sym = find_symbol_in_module(mod, name, &type, &crc);
else
- ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms);
- return ks != NULL && ks->value == value;
+ sym = find_symbol_in_kernel(name, &type, &crc);
+
+ return (sym && sym->value == value);
}
/* As per nm */
@@ -2066,6 +2103,16 @@ static inline void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
}
#endif
+static const char *export_section_names[] = {
+ [EXPORT_TYPE_PLAIN] = "__ksymtab",
+ [EXPORT_TYPE_GPL] = "__ksymtab_gpl",
+#ifdef CONFIG_UNUSED_SYMBOLS
+ [EXPORT_TYPE_UNUSED] = "__ksymtab_unused",
+ [EXPORT_TYPE_UNUSED_GPL] = "__ksymtab_unused_gpl",
+#endif
+ [EXPORT_TYPE_GPL_FUTURE] = "__ksymtab_gpl_future",
+};
+
/* Allocate and load the module: note that size of section 0 is always
zero, and we rely on this for optional sections. */
static noinline struct module *load_module(void __user *umod,
@@ -2080,6 +2127,7 @@ static noinline struct module *load_module(void __user *umod,
unsigned int symindex = 0;
unsigned int strindex = 0;
unsigned int modindex, versindex, infoindex, pcpuindex;
+ enum export_type export_type;
struct module *mod;
long err = 0;
void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -2339,34 +2387,21 @@ static noinline struct module *load_module(void __user *umod,
* find optional sections. */
mod->kp = section_objs(hdr, sechdrs, secstrings, "__param",
sizeof(*mod->kp), &mod->num_kp);
- mod->syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab",
- sizeof(*mod->syms), &mod->num_syms);
- mod->crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab");
- mod->gpl_syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab_gpl",
- sizeof(*mod->gpl_syms),
- &mod->num_gpl_syms);
- mod->gpl_crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab_gpl");
- mod->gpl_future_syms = section_objs(hdr, sechdrs, secstrings,
- "__ksymtab_gpl_future",
- sizeof(*mod->gpl_future_syms),
- &mod->num_gpl_future_syms);
- mod->gpl_future_crcs = section_addr(hdr, sechdrs, secstrings,
- "__kcrctab_gpl_future");
-#ifdef CONFIG_UNUSED_SYMBOLS
- mod->unused_syms = section_objs(hdr, sechdrs, secstrings,
- "__ksymtab_unused",
- sizeof(*mod->unused_syms),
- &mod->num_unused_syms);
- mod->unused_crcs = section_addr(hdr, sechdrs, secstrings,
- "__kcrctab_unused");
- mod->unused_gpl_syms = section_objs(hdr, sechdrs, secstrings,
- "__ksymtab_unused_gpl",
- sizeof(*mod->unused_gpl_syms),
- &mod->num_unused_gpl_syms);
- mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings,
- "__kcrctab_unused_gpl");
+ export_type = 0;
+ for (; export_type < ARRAY_SIZE(export_section_names); export_type++) {
+ mod->syms[export_type].syms =
+ section_objs(hdr, sechdrs, secstrings,
+ export_section_names[export_type],
+ sizeof(struct kernel_symbol),
+ &mod->syms[export_type].num_syms);
+#ifdef CONFIG_MODVERSIONS
+ mod->syms[export_type].crcs =
+ section_addr(hdr, sechdrs, secstrings,
+ crc_section_names[export_type]);
#endif
+ }
+
#ifdef CONFIG_CONSTRUCTORS
mod->ctors = section_objs(hdr, sechdrs, secstrings, ".ctors",
sizeof(*mod->ctors), &mod->num_ctors);
@@ -2391,19 +2426,20 @@ static noinline struct module *load_module(void __user *umod,
sizeof(*mod->ftrace_callsites),
&mod->num_ftrace_callsites);
#endif
+
#ifdef CONFIG_MODVERSIONS
- if ((mod->num_syms && !mod->crcs)
- || (mod->num_gpl_syms && !mod->gpl_crcs)
- || (mod->num_gpl_future_syms && !mod->gpl_future_crcs)
-#ifdef CONFIG_UNUSED_SYMBOLS
- || (mod->num_unused_syms && !mod->unused_crcs)
- || (mod->num_unused_gpl_syms && !mod->unused_gpl_crcs)
-#endif
- ) {
- err = try_to_force_load(mod,
- "no versions for exported symbols");
- if (err)
- goto cleanup;
+ export_type = 0;
+ for (; export_type < ARRAY_SIZE(mod->syms); export_type++) {
+ if (mod->syms[export_type].syms &&
+ !mod->syms[export_type].crcs) {
+ err = try_to_force_load(mod,
+ "no versions for exported symbols");
+ if (err)
+ goto cleanup;
+
+ /* force load approved, don't check other sections */
+ break;
+ }
}
#endif
diff --git a/kernel/params.c b/kernel/params.c
index d656c276508d..9ed6fcd84f2e 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -31,6 +31,42 @@
#define DEBUGP(fmt, a...)
#endif
+/* Protects all parameters, and incidentally kmalloced_param list. */
+static DEFINE_MUTEX(param_lock);
+
+/* This just allows us to keep track of which parameters are kmalloced. */
+struct kmalloced_param {
+ struct list_head list;
+ char val[];
+};
+static LIST_HEAD(kmalloced_params);
+
+static void *kmalloc_parameter(unsigned int size)
+{
+ struct kmalloced_param *p;
+
+ p = kmalloc(sizeof(*p) + size, GFP_KERNEL);
+ if (!p)
+ return NULL;
+
+ list_add(&p->list, &kmalloced_params);
+ return p->val;
+}
+
+/* Does nothing if parameter wasn't kmalloced above. */
+static void maybe_kfree_parameter(void *param)
+{
+ struct kmalloced_param *p;
+
+ list_for_each_entry(p, &kmalloced_params, list) {
+ if (p->val == param) {
+ list_del(&p->list);
+ kfree(p);
+ break;
+ }
+ }
+}
+
static inline char dash2underscore(char c)
{
if (c == '-')
@@ -49,18 +85,22 @@ static inline int parameq(const char *input, const char *paramname)
static int parse_one(char *param,
char *val,
- struct kernel_param *params,
+ const struct kernel_param *params,
unsigned num_params,
int (*handle_unknown)(char *param, char *val))
{
unsigned int i;
+ int err;
/* Find parameter */
for (i = 0; i < num_params; i++) {
if (parameq(param, params[i].name)) {
DEBUGP("They are equal! Calling %p\n",
- params[i].set);
- return params[i].set(val, &params[i]);
+ params[i].ops->set);
+ mutex_lock(&param_lock);
+ err = params[i].ops->set(val, &params[i]);
+ mutex_unlock(&param_lock);
+ return err;
}
}
@@ -130,7 +170,7 @@ static char *next_arg(char *args, char **param, char **val)
/* Args looks like "foo=bar,bar2 baz=fuz wiz". */
int parse_args(const char *name,
char *args,
- struct kernel_param *params,
+ const struct kernel_param *params,
unsigned num,
int (*unknown)(char *param, char *val))
{
@@ -179,7 +219,7 @@ int parse_args(const char *name,
/* Lazy bastard, eh? */
#define STANDARD_PARAM_DEF(name, type, format, tmptype, strtolfn) \
- int param_set_##name(const char *val, struct kernel_param *kp) \
+ int param_set_##name(const char *val, const struct kernel_param *kp) \
{ \
tmptype l; \
int ret; \
@@ -191,10 +231,18 @@ int parse_args(const char *name,
*((type *)kp->arg) = l; \
return 0; \
} \
- int param_get_##name(char *buffer, struct kernel_param *kp) \
+ int param_get_##name(char *buffer, const struct kernel_param *kp) \
{ \
return sprintf(buffer, format, *((type *)kp->arg)); \
- }
+ } \
+ struct kernel_param_ops param_ops_##name = { \
+ .set = param_set_##name, \
+ .get = param_get_##name, \
+ }; \
+ EXPORT_SYMBOL(param_set_##name); \
+ EXPORT_SYMBOL(param_get_##name); \
+ EXPORT_SYMBOL(param_ops_##name)
+
STANDARD_PARAM_DEF(byte, unsigned char, "%c", unsigned long, strict_strtoul);
STANDARD_PARAM_DEF(short, short, "%hi", long, strict_strtol);
@@ -204,7 +252,7 @@ STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, strict_strtoul);
STANDARD_PARAM_DEF(long, long, "%li", long, strict_strtol);
STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, strict_strtoul);
-int param_set_charp(const char *val, struct kernel_param *kp)
+int param_set_charp(const char *val, const struct kernel_param *kp)
{
if (!val) {
printk(KERN_ERR "%s: string parameter expected\n",
@@ -218,25 +266,42 @@ int param_set_charp(const char *val, struct kernel_param *kp)
return -ENOSPC;
}
- /* This is a hack. We can't need to strdup in early boot, and we
+ maybe_kfree_parameter(*(char **)kp->arg);
+
+ /* This is a hack. We can't kmalloc in early boot, and we
* don't need to; this mangled commandline is preserved. */
if (slab_is_available()) {
- *(char **)kp->arg = kstrdup(val, GFP_KERNEL);
+ *(char **)kp->arg = kmalloc_parameter(strlen(val)+1);
if (!*(char **)kp->arg)
return -ENOMEM;
+ strcpy(*(char **)kp->arg, val);
} else
*(const char **)kp->arg = val;
return 0;
}
+EXPORT_SYMBOL(param_set_charp);
-int param_get_charp(char *buffer, struct kernel_param *kp)
+int param_get_charp(char *buffer, const struct kernel_param *kp)
{
return sprintf(buffer, "%s", *((char **)kp->arg));
}
+EXPORT_SYMBOL(param_get_charp);
+
+static void param_free_charp(void *arg)
+{
+ maybe_kfree_parameter(*((char **)arg));
+}
+
+struct kernel_param_ops param_ops_charp = {
+ .set = param_set_charp,
+ .get = param_get_charp,
+ .free = param_free_charp,
+};
+EXPORT_SYMBOL(param_ops_charp);
/* Actually could be a bool or an int, for historical reasons. */
-int param_set_bool(const char *val, struct kernel_param *kp)
+int param_set_bool(const char *val, const struct kernel_param *kp)
{
bool v;
@@ -261,8 +326,9 @@ int param_set_bool(const char *val, struct kernel_param *kp)
*(int *)kp->arg = v;
return 0;
}
+EXPORT_SYMBOL(param_set_bool);
-int param_get_bool(char *buffer, struct kernel_param *kp)
+int param_get_bool(char *buffer, const struct kernel_param *kp)
{
bool val;
if (kp->flags & KPARAM_ISBOOL)
@@ -273,9 +339,16 @@ int param_get_bool(char *buffer, struct kernel_param *kp)
/* Y and N chosen as being relatively non-coder friendly */
return sprintf(buffer, "%c", val ? 'Y' : 'N');
}
+EXPORT_SYMBOL(param_get_bool);
+
+struct kernel_param_ops param_ops_bool = {
+ .set = param_set_bool,
+ .get = param_get_bool,
+};
+EXPORT_SYMBOL(param_ops_bool);
/* This one must be bool. */
-int param_set_invbool(const char *val, struct kernel_param *kp)
+int param_set_invbool(const char *val, const struct kernel_param *kp)
{
int ret;
bool boolval;
@@ -288,18 +361,26 @@ int param_set_invbool(const char *val, struct kernel_param *kp)
*(bool *)kp->arg = !boolval;
return ret;
}
+EXPORT_SYMBOL(param_set_invbool);
-int param_get_invbool(char *buffer, struct kernel_param *kp)
+int param_get_invbool(char *buffer, const struct kernel_param *kp)
{
return sprintf(buffer, "%c", (*(bool *)kp->arg) ? 'N' : 'Y');
}
+EXPORT_SYMBOL(param_get_invbool);
+
+struct kernel_param_ops param_ops_invbool = {
+ .set = param_set_invbool,
+ .get = param_get_invbool,
+};
+EXPORT_SYMBOL(param_ops_invbool);
/* We break the rule and mangle the string. */
static int param_array(const char *name,
const char *val,
unsigned int min, unsigned int max,
void *elem, int elemsize,
- int (*set)(const char *, struct kernel_param *kp),
+ int (*set)(const char *, const struct kernel_param *kp),
u16 flags,
unsigned int *num)
{
@@ -333,6 +414,7 @@ static int param_array(const char *name,
/* nul-terminate and parse */
save = val[len];
((char *)val)[len] = '\0';
+ BUG_ON(!mutex_is_locked(&param_lock));
ret = set(val, &kp);
if (ret != 0)
@@ -350,17 +432,17 @@ static int param_array(const char *name,
return 0;
}
-int param_array_set(const char *val, struct kernel_param *kp)
+static int param_array_set(const char *val, const struct kernel_param *kp)
{
const struct kparam_array *arr = kp->arr;
unsigned int temp_num;
return param_array(kp->name, val, 1, arr->max, arr->elem,
- arr->elemsize, arr->set, kp->flags,
+ arr->elemsize, arr->ops->set, kp->flags,
arr->num ?: &temp_num);
}
-int param_array_get(char *buffer, struct kernel_param *kp)
+static int param_array_get(char *buffer, const struct kernel_param *kp)
{
int i, off, ret;
const struct kparam_array *arr = kp->arr;
@@ -371,7 +453,8 @@ int param_array_get(char *buffer, struct kernel_param *kp)
if (i)
buffer[off++] = ',';
p.arg = arr->elem + arr->elemsize * i;
- ret = arr->get(buffer + off, &p);
+ BUG_ON(!mutex_is_locked(&param_lock));
+ ret = arr->ops->get(buffer + off, &p);
if (ret < 0)
return ret;
off += ret;
@@ -380,7 +463,24 @@ int param_array_get(char *buffer, struct kernel_param *kp)
return off;
}
-int param_set_copystring(const char *val, struct kernel_param *kp)
+static void param_array_free(void *arg)
+{
+ unsigned int i;
+ const struct kparam_array *arr = arg;
+
+ if (arr->ops->free)
+ for (i = 0; i < (arr->num ? *arr->num : arr->max); i++)
+ arr->ops->free(arr->elem + arr->elemsize * i);
+}
+
+struct kernel_param_ops param_array_ops = {
+ .set = param_array_set,
+ .get = param_array_get,
+ .free = param_array_free,
+};
+EXPORT_SYMBOL(param_array_ops);
+
+int param_set_copystring(const char *val, const struct kernel_param *kp)
{
const struct kparam_string *kps = kp->str;
@@ -396,12 +496,20 @@ int param_set_copystring(const char *val, struct kernel_param *kp)
strcpy(kps->string, val);
return 0;
}
+EXPORT_SYMBOL(param_set_copystring);
-int param_get_string(char *buffer, struct kernel_param *kp)
+int param_get_string(char *buffer, const struct kernel_param *kp)
{
const struct kparam_string *kps = kp->str;
return strlcpy(buffer, kps->string, kps->maxlen);
}
+EXPORT_SYMBOL(param_get_string);
+
+struct kernel_param_ops param_ops_string = {
+ .set = param_set_copystring,
+ .get = param_get_string,
+};
+EXPORT_SYMBOL(param_ops_string);
/* sysfs output in /sys/modules/XYZ/parameters/ */
#define to_module_attr(n) container_of(n, struct module_attribute, attr);
@@ -412,7 +520,7 @@ extern struct kernel_param __start___param[], __stop___param[];
struct param_attribute
{
struct module_attribute mattr;
- struct kernel_param *param;
+ const struct kernel_param *param;
};
struct module_param_attrs
@@ -431,10 +539,12 @@ static ssize_t param_attr_show(struct module_attribute *mattr,
int count;
struct param_attribute *attribute = to_param_attr(mattr);
- if (!attribute->param->get)
+ if (!attribute->param->ops->get)
return -EPERM;
- count = attribute->param->get(buf, attribute->param);
+ mutex_lock(&param_lock);
+ count = attribute->param->ops->get(buf, attribute->param);
+ mutex_unlock(&param_lock);
if (count > 0) {
strcat(buf, "\n");
++count;
@@ -450,10 +560,12 @@ static ssize_t param_attr_store(struct module_attribute *mattr,
int err;
struct param_attribute *attribute = to_param_attr(mattr);
- if (!attribute->param->set)
+ if (!attribute->param->ops->set)
return -EPERM;
- err = attribute->param->set(buf, attribute->param);
+ mutex_lock(&param_lock);
+ err = attribute->param->ops->set(buf, attribute->param);
+ mutex_unlock(&param_lock);
if (!err)
return len;
return err;
@@ -467,6 +579,18 @@ static ssize_t param_attr_store(struct module_attribute *mattr,
#endif
#ifdef CONFIG_SYSFS
+void __kernel_param_lock(void)
+{
+ mutex_lock(&param_lock);
+}
+EXPORT_SYMBOL(__kernel_param_lock);
+
+void __kernel_param_unlock(void)
+{
+ mutex_unlock(&param_lock);
+}
+EXPORT_SYMBOL(__kernel_param_unlock);
+
/*
* add_sysfs_param - add a parameter to sysfs
* @mk: struct module_kobject
@@ -478,7 +602,7 @@ static ssize_t param_attr_store(struct module_attribute *mattr,
* if there's an error.
*/
static __modinit int add_sysfs_param(struct module_kobject *mk,
- struct kernel_param *kp,
+ const struct kernel_param *kp,
const char *name)
{
struct module_param_attrs *new;
@@ -559,7 +683,7 @@ static void free_module_param_attrs(struct module_kobject *mk)
* /sys/module/[mod->name]/parameters/
*/
int module_param_sysfs_setup(struct module *mod,
- struct kernel_param *kparam,
+ const struct kernel_param *kparam,
unsigned int num_params)
{
int i, err;
@@ -604,7 +728,11 @@ void module_param_sysfs_remove(struct module *mod)
void destroy_params(const struct kernel_param *params, unsigned num)
{
- /* FIXME: This should free kmalloced charp parameters. It doesn't. */
+ unsigned int i;
+
+ for (i = 0; i < num; i++)
+ if (params[i].ops->free)
+ params[i].ops->free(params[i].arg);
}
static void __init kernel_add_sysfs_param(const char *name,
@@ -770,28 +898,3 @@ static int __init param_sysfs_init(void)
subsys_initcall(param_sysfs_init);
#endif /* CONFIG_SYSFS */
-
-EXPORT_SYMBOL(param_set_byte);
-EXPORT_SYMBOL(param_get_byte);
-EXPORT_SYMBOL(param_set_short);
-EXPORT_SYMBOL(param_get_short);
-EXPORT_SYMBOL(param_set_ushort);
-EXPORT_SYMBOL(param_get_ushort);
-EXPORT_SYMBOL(param_set_int);
-EXPORT_SYMBOL(param_get_int);
-EXPORT_SYMBOL(param_set_uint);
-EXPORT_SYMBOL(param_get_uint);
-EXPORT_SYMBOL(param_set_long);
-EXPORT_SYMBOL(param_get_long);
-EXPORT_SYMBOL(param_set_ulong);
-EXPORT_SYMBOL(param_get_ulong);
-EXPORT_SYMBOL(param_set_charp);
-EXPORT_SYMBOL(param_get_charp);
-EXPORT_SYMBOL(param_set_bool);
-EXPORT_SYMBOL(param_get_bool);
-EXPORT_SYMBOL(param_set_invbool);
-EXPORT_SYMBOL(param_get_invbool);
-EXPORT_SYMBOL(param_array_set);
-EXPORT_SYMBOL(param_array_get);
-EXPORT_SYMBOL(param_set_copystring);
-EXPORT_SYMBOL(param_get_string);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 7f29643c8985..8f1e5642fce7 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1583,7 +1583,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
return ERR_PTR(-EACCES);
- if (cpu < 0 || cpu > num_possible_cpus())
+ if (cpu < 0 || cpu >= nr_cpu_ids)
return ERR_PTR(-EINVAL);
/*
@@ -1591,7 +1591,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
* offline CPU and activate it when the CPU comes up, but
* that's for later.
*/
- if (!cpu_isset(cpu, cpu_online_map))
+ if (!cpu_online(cpu))
return ERR_PTR(-ENODEV);
cpuctx = &per_cpu(perf_cpu_context, cpu);
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 1b5b7aa2fdfd..971c40ae15fd 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -232,10 +232,10 @@ static void timer_list_show_tickdevices(struct seq_file *m)
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
print_tickdevice(m, tick_get_broadcast_device(), -1);
SEQ_printf(m, "tick_broadcast_mask: %08lx\n",
- tick_get_broadcast_mask()->bits[0]);
+ cpumask_bits(tick_get_broadcast_mask())[0]);
#ifdef CONFIG_TICK_ONESHOT
SEQ_printf(m, "tick_broadcast_oneshot_mask: %08lx\n",
- tick_get_broadcast_oneshot_mask()->bits[0]);
+ cpumask_bits(tick_get_broadcast_oneshot_mask())[0]);
#endif
SEQ_printf(m, "\n");
#endif
diff --git a/lib/Makefile b/lib/Makefile
index 2e78277eff9d..fb60af165607 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -21,7 +21,7 @@ lib-y += kobject.o kref.o klist.o
obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
- string_helpers.o gcd.o
+ string_helpers.o gcd.o bsearch.o
ifeq ($(CONFIG_DEBUG_KOBJECT),y)
CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/bsearch.c b/lib/bsearch.c
new file mode 100644
index 000000000000..29233eda58f3
--- /dev/null
+++ b/lib/bsearch.c
@@ -0,0 +1,53 @@
+/*
+ * A generic implementation of binary search for the Linux kernel
+ *
+ * Copyright (C) 2008-2009 Ksplice, Inc.
+ * Author: Tim Abbott <tabbott@ksplice.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2.
+ */
+
+#include <linux/module.h>
+#include <linux/bsearch.h>
+
+/*
+ * bsearch - binary search an array of elements
+ * @key: pointer to item being searched for
+ * @base: pointer to data to sort
+ * @num: number of elements
+ * @size: size of each element
+ * @cmp: pointer to comparison function
+ *
+ * This function does a binary search on the given array. The
+ * contents of the array should already be in ascending sorted order
+ * under the provided comparison function.
+ *
+ * Note that the key need not have the same type as the elements in
+ * the array, e.g. key could be a string and the comparison function
+ * could compare the string with the struct's name field. However, if
+ * the key and elements in the array are of the same type, you can use
+ * the same comparison function for both sort() and bsearch().
+ */
+void *bsearch(const void *key, const void *base, size_t num, size_t size,
+ int (*cmp)(const void *key, const void *elt))
+{
+ size_t start = 0, end = num;
+ int result;
+
+ while (start < end) {
+ size_t mid = start + (end - start) / 2;
+
+ result = cmp(key, base + mid * size);
+ if (result < 0)
+ end = mid;
+ else if (result > 0)
+ start = mid + 1;
+ else
+ return (void *)base + mid * size;
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL(bsearch);
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
index ded9081f4021..0777654147c9 100644
--- a/mm/mmu_context.c
+++ b/mm/mmu_context.c
@@ -5,6 +5,7 @@
#include <linux/mm.h>
#include <linux/mmu_context.h>
+#include <linux/module.h>
#include <linux/sched.h>
#include <asm/mmu_context.h>
@@ -37,6 +38,7 @@ void use_mm(struct mm_struct *mm)
if (active_mm != mm)
mmdrop(active_mm);
}
+EXPORT_SYMBOL_GPL(use_mm);
/*
* unuse_mm
@@ -56,3 +58,4 @@ void unuse_mm(struct mm_struct *mm)
enter_lazy_tlb(mm, tsk);
task_unlock(tsk);
}
+EXPORT_SYMBOL_GPL(unuse_mm);
diff --git a/mm/slab.c b/mm/slab.c
index 7dfa481c96ba..4c7b16f81828 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1120,7 +1120,7 @@ static void __cpuinit cpuup_canceled(long cpu)
if (nc)
free_block(cachep, nc->entry, nc->avail, node);
- if (!cpus_empty(*mask)) {
+ if (!cpumask_empty(mask)) {
spin_unlock_irq(&l3->list_lock);
goto free_array_cache;
}
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index b9007f80cb92..bcd6d0b9cb88 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -102,6 +102,7 @@ ieee80211_rate_control_ops_get(const char *name)
struct rate_control_ops *ops;
const char *alg_name;
+ kparam_block_sysfs_write(ieee80211_default_rc_algo);
if (!name)
alg_name = ieee80211_default_rc_algo;
else
@@ -119,6 +120,7 @@ ieee80211_rate_control_ops_get(const char *name)
/* try built-in one if specific alg requested but not found */
if (!ops && strlen(CONFIG_MAC80211_RC_DEFAULT))
ops = ieee80211_try_rate_control_ops_get(CONFIG_MAC80211_RC_DEFAULT);
+ kparam_unblock_sysfs_write(ieee80211_default_rc_algo);
return ops;
}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 37c5475ba258..de7ffe3b5953 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2619,7 +2619,8 @@ void cleanup_socket_xprt(void)
xprt_unregister_transport(&xs_bc_tcp_transport);
}
-static int param_set_uint_minmax(const char *val, struct kernel_param *kp,
+static int param_set_uint_minmax(const char *val,
+ const struct kernel_param *kp,
unsigned int min, unsigned int max)
{
unsigned long num;
@@ -2634,34 +2635,37 @@ static int param_set_uint_minmax(const char *val, struct kernel_param *kp,
return 0;
}
-static int param_set_portnr(const char *val, struct kernel_param *kp)
+static int param_set_portnr(const char *val, const struct kernel_param *kp)
{
return param_set_uint_minmax(val, kp,
RPC_MIN_RESVPORT,
RPC_MAX_RESVPORT);
}
-static int param_get_portnr(char *buffer, struct kernel_param *kp)
-{
- return param_get_uint(buffer, kp);
-}
+static struct kernel_param_ops param_ops_portnr = {
+ .set = param_set_portnr,
+ .get = param_get_uint,
+};
+
#define param_check_portnr(name, p) \
__param_check(name, p, unsigned int);
module_param_named(min_resvport, xprt_min_resvport, portnr, 0644);
module_param_named(max_resvport, xprt_max_resvport, portnr, 0644);
-static int param_set_slot_table_size(const char *val, struct kernel_param *kp)
+static int param_set_slot_table_size(const char *val,
+ const struct kernel_param *kp)
{
return param_set_uint_minmax(val, kp,
RPC_MIN_SLOT_TABLE,
RPC_MAX_SLOT_TABLE);
}
-static int param_get_slot_table_size(char *buffer, struct kernel_param *kp)
-{
- return param_get_uint(buffer, kp);
-}
+static struct kernel_param_ops param_ops_slot_table_size = {
+ .set = param_set_slot_table_size,
+ .get = param_get_uint,
+};
+
#define param_check_slot_table_size(name, p) \
__param_check(name, p, unsigned int);
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index d9f0cb837400..cd815ac2a50b 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -127,6 +127,11 @@ _c_flags += $(if $(patsubst n%,, \
$(CFLAGS_GCOV))
endif
+ifdef CONFIG_SYMBOL_PREFIX
+_cpp_flags += -DSYMBOL_PREFIX=$(patsubst "%",%,$(CONFIG_SYMBOL_PREFIX))
+endif
+
+
# If building the kernel in a separate objtree expand all occurrences
# of -Idir to -I$(srctree)/dir except for absolute paths (starting with '/').
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index 8f14c81abbc7..876a3c7c5dad 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -91,7 +91,7 @@ __modpost: $(modules:.ko=.o) FORCE
$(call cmd,modpost) $(wildcard vmlinux) $(filter-out FORCE,$^)
quiet_cmd_kernel-mod = MODPOST $@
- cmd_kernel-mod = $(modpost) $@
+ cmd_kernel-mod = $(modpost) -x .tmp_exports-asm.S $@
vmlinux.o: FORCE
$(call cmd,kernel-mod)
diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
index 11d69c35e5b4..ff954f8168c1 100644
--- a/scripts/mod/Makefile
+++ b/scripts/mod/Makefile
@@ -8,7 +8,7 @@ modpost-objs := modpost.o file2alias.o sumversion.o
$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h
quiet_cmd_elfconfig = MKELF $@
- cmd_elfconfig = $(obj)/mk_elfconfig $(ARCH) < $< > $@
+ cmd_elfconfig = $(obj)/mk_elfconfig < $< > $@
$(obj)/elfconfig.h: $(obj)/empty.o $(obj)/mk_elfconfig FORCE
$(call if_changed,elfconfig)
diff --git a/scripts/mod/mk_elfconfig.c b/scripts/mod/mk_elfconfig.c
index 6a96d47bd1e6..639bca7ba559 100644
--- a/scripts/mod/mk_elfconfig.c
+++ b/scripts/mod/mk_elfconfig.c
@@ -9,9 +9,6 @@ main(int argc, char **argv)
unsigned char ei[EI_NIDENT];
union { short s; char c[2]; } endian_test;
- if (argc != 2) {
- fprintf(stderr, "Error: no arch\n");
- }
if (fread(ei, 1, EI_NIDENT, stdin) != EI_NIDENT) {
fprintf(stderr, "Error: input truncated\n");
return 1;
@@ -55,12 +52,6 @@ main(int argc, char **argv)
else
exit(1);
- if ((strcmp(argv[1], "h8300") == 0)
- || (strcmp(argv[1], "blackfin") == 0))
- printf("#define MODULE_SYMBOL_PREFIX \"_\"\n");
- else
- printf("#define MODULE_SYMBOL_PREFIX \"\"\n");
-
return 0;
}
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 801a16a17545..769ea9535c68 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -15,8 +15,17 @@
#include <stdio.h>
#include <ctype.h>
#include "modpost.h"
+#include "../../include/generated/autoconf.h"
#include "../../include/linux/license.h"
+/* Some toolchains use a `_' prefix for all user symbols. */
+#ifdef CONFIG_SYMBOL_PREFIX
+#define MODULE_SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX
+#else
+#define MODULE_SYMBOL_PREFIX ""
+#endif
+
+
/* Are we using CONFIG_MODVERSIONS? */
int modversions = 0;
/* Warn about undefined symbols? (do so if we have vmlinux) */
@@ -154,6 +163,7 @@ struct symbol {
};
static struct symbol *symbolhash[SYMBOL_HASH_SIZE];
+unsigned int symbolcount;
/* This is based on the hash agorithm from gdbm, via tdb */
static inline unsigned int tdb_hash(const char *name)
@@ -191,6 +201,7 @@ static struct symbol *new_symbol(const char *name, struct module *module,
unsigned int hash;
struct symbol *new;
+ symbolcount++;
hash = tdb_hash(name) % SYMBOL_HASH_SIZE;
new = symbolhash[hash] = alloc_symbol(name, 0, symbolhash[hash]);
new->module = module;
@@ -951,6 +962,13 @@ static int section_mismatch(const char *fromsec, const char *tosec)
* fromsec = .data*
* atsym =__param*
*
+ * Pattern 1a:
+ * module_param_call() ops can refer to __init set function if permissions=0
+ * The pattern is identified by:
+ * tosec = .init.text
+ * fromsec = .data*
+ * atsym = __param_ops_*
+ *
* Pattern 2:
* Many drivers utilise a *driver container with references to
* add, remove, probe functions etc.
@@ -984,6 +1002,12 @@ static int secref_whitelist(const char *fromsec, const char *fromsym,
(strncmp(fromsym, "__param", strlen("__param")) == 0))
return 0;
+ /* Check for pattern 1a */
+ if (strcmp(tosec, ".init.text") == 0 &&
+ match(fromsec, data_sections) &&
+ (strncmp(fromsym, "__param_ops_", strlen("__param_ops_")) == 0))
+ return 0;
+
/* Check for pattern 2 */
if (match(tosec, init_exit_sections) &&
match(fromsec, data_sections) &&
@@ -1976,6 +2000,58 @@ static void write_dump(const char *fname)
write_if_changed(&buf, fname);
}
+static const char *section_names[] = {
+ [export_plain] = "",
+ [export_unused] = "_unused",
+ [export_gpl] = "_gpl",
+ [export_unused_gpl] = "_unused_gpl",
+ [export_gpl_future] = "_gpl_future",
+};
+
+static int compare_symbol_names(const void *a, const void *b)
+{
+ struct symbol *const *syma = a;
+ struct symbol *const *symb = b;
+
+ return strcmp((*syma)->name, (*symb)->name);
+}
+
+/* sort exported symbols and output using arch-independent assembly macros */
+static void write_exports(const char *fname)
+{
+ struct buffer buf = { };
+ struct symbol *sym, **symbols;
+ int i, n;
+
+ symbols = NOFAIL(malloc(sizeof(struct symbol *) * symbolcount));
+ n = 0;
+
+ for (i = 0; i < SYMBOL_HASH_SIZE; i++) {
+ for (sym = symbolhash[i]; sym; sym = sym->next)
+ symbols[n++] = sym;
+ }
+
+ qsort(symbols, n, sizeof(struct symbol *), compare_symbol_names);
+
+ buf_printf(&buf, "#define __MODPOST_EXPORTS__\n");
+ buf_printf(&buf, "#include <linux/mod_export.h>\n");
+ buf_printf(&buf, "\n");
+
+ for (i = 0; i < n; i++) {
+ sym = symbols[i];
+
+ buf_printf(&buf, "__EXPORT_SYMBOL(%s,"
+ " __ksymtab%s_sorted,"
+ " __ksymtab_strings_sorted,"
+ " __kcrctab%s_sorted)\n",
+ sym->name,
+ section_names[sym->export],
+ section_names[sym->export]);
+ }
+
+ write_if_changed(&buf, fname);
+}
+
static void add_marker(struct module *mod, const char *name, const char *fmt)
{
char *line = NULL;
@@ -2077,6 +2153,7 @@ int main(int argc, char **argv)
struct buffer buf = { };
char *kernel_read = NULL, *module_read = NULL;
char *dump_write = NULL;
+ char *exports_write = NULL;
char *markers_read = NULL;
char *markers_write = NULL;
int opt;
@@ -2084,7 +2161,7 @@ int main(int argc, char **argv)
struct ext_sym_list *extsym_iter;
struct ext_sym_list *extsym_start = NULL;
- while ((opt = getopt(argc, argv, "i:I:e:cmsSo:awM:K:")) != -1) {
+ while ((opt = getopt(argc, argv, "i:I:e:cmsSo:awM:K:x:")) != -1) {
switch (opt) {
case 'i':
kernel_read = optarg;
@@ -2122,6 +2199,9 @@ int main(int argc, char **argv)
case 'w':
warn_unresolved = 1;
break;
+ case 'x':
+ exports_write = optarg;
+ break;
case 'M':
markers_write = optarg;
break;
@@ -2182,6 +2262,9 @@ int main(int argc, char **argv)
"'make CONFIG_DEBUG_SECTION_MISMATCH=y'\n",
sec_mismatch_count);
+ if (exports_write)
+ write_exports(exports_write);
+
if (markers_read)
read_markers(markers_read);