From ca4787b779dd698a2a33a328aa5fa90a3e954077 Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Mon, 5 Jan 2009 08:40:10 -0600 Subject: kernel/module.c: compare symbol values when marking symbols as exported in /proc/kallsyms. When there are two symbols in a module with the same name, one of which is exported, both will be marked as exported in /proc/kallsyms. There aren't any instances of this in the current kernel, but it is easy to construct a simple module with two compilation units that exhibits the problem. $ objdump -j .text -t testmod.ko | grep foo 00000000 l F .text 00000032 foo 00000080 g F .text 00000001 foo $ sudo insmod testmod.ko $ grep "T foo" /proc/kallsyms c28e8000 T foo [testmod] c28e8080 T foo [testmod] Fix this by comparing the symbol values once we've found the exported symbol table entry matching the symbol name. Tested using Ksplice: $ ksplice-create --patch=this_commit.patch --id=bar . $ sudo ksplice-apply ksplice-bar.tar.gz Done! $ grep "T foo" /proc/kallsyms c28e8080 T foo [testmod] Signed-off-by: Tim Abbott Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Rusty Russell --- kernel/module.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index dd2a54155b54..895c5675edb7 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1725,15 +1725,15 @@ static const struct kernel_symbol *lookup_symbol(const char *name, return NULL; } -static int is_exported(const char *name, const struct module *mod) +static int is_exported(const char *name, unsigned long value, + const struct module *mod) { - if (!mod && lookup_symbol(name, __start___ksymtab, __stop___ksymtab)) - return 1; + const struct kernel_symbol *ks; + if (!mod) + ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab); else - if (mod && lookup_symbol(name, mod->syms, mod->syms + mod->num_syms)) - return 1; - else - return 0; + ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms); + return ks != NULL && ks->value == value; } /* As per nm */ @@ -2504,7 +2504,7 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, strlcpy(name, mod->strtab + mod->symtab[symnum].st_name, KSYM_NAME_LEN); strlcpy(module_name, mod->name, MODULE_NAME_LEN); - *exported = is_exported(name, mod); + *exported = is_exported(name, *value, mod); preempt_enable(); return 0; } -- cgit v1.2.3 From d1e99d7ae4e6bbd1ebb5e81ecd3af2b8793efee0 Mon Sep 17 00:00:00 2001 From: Jianjun Kong Date: Mon, 8 Dec 2008 14:26:29 +0800 Subject: module: fix warning of unused function when !CONFIG_PROC_FS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix this warning: kernel/module.c:824: warning: ‘print_unload_info’ defined but not used print_unload_info() just was used when CONFIG_PROC_FS was defined. This patch mark print_unload_info() inline to solve the problem. Signed-off-by: Jianjun Kong Signed-off-by: Rusty Russell CC: Ingo Molnar CC: Américo Wang --- kernel/module.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 895c5675edb7..d3d254571bda 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -820,7 +820,7 @@ sys_delete_module(const char __user *name_user, unsigned int flags) return ret; } -static void print_unload_info(struct seq_file *m, struct module *mod) +static inline void print_unload_info(struct seq_file *m, struct module *mod) { struct module_use *use; int printed_something = 0; @@ -893,7 +893,7 @@ void module_put(struct module *module) EXPORT_SYMBOL(module_put); #else /* !CONFIG_MODULE_UNLOAD */ -static void print_unload_info(struct seq_file *m, struct module *mod) +static inline void print_unload_info(struct seq_file *m, struct module *mod) { /* We don't know the usage count, or what modules are using. */ seq_printf(m, " - -"); -- cgit v1.2.3 From 088af9a6e05d51e7c3dc85d45d8b7a52c3ee08d7 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 31 Dec 2008 12:31:18 +0100 Subject: module: fix module loading failure of large kernel modules for parisc When creating the final layout of a kernel module in memory, allow the module loader to reserve some additional memory in front of a given section. This is currently only needed for the parisc port which needs to put the stub entries there to fulfill the 17/22bit PCREL relocations with large kernel modules like xfs. Signed-off-by: Helge Deller Signed-off-by: Rusty Russell (renamed fn) --- include/linux/moduleloader.h | 3 +++ kernel/module.c | 16 +++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'kernel/module.c') diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index eb1033957486..c1f40c2f7ffb 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -13,6 +13,9 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, char *secstrings, struct module *mod); +/* Additional bytes needed by arch in front of individual sections */ +unsigned int arch_mod_section_prepend(struct module *mod, unsigned int section); + /* Allocator used for allocating struct module, core sections and init sections. Returns NULL on failure. */ void *module_alloc(unsigned long size); diff --git a/kernel/module.c b/kernel/module.c index d3d254571bda..4299aefc20b8 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1578,11 +1578,21 @@ static int simplify_symbols(Elf_Shdr *sechdrs, return ret; } +/* Additional bytes needed by arch in front of individual sections */ +unsigned int __weak arch_mod_section_prepend(struct module *mod, + unsigned int section) +{ + /* default implementation just returns zero */ + return 0; +} + /* Update size with this section: return offset. */ -static long get_offset(unsigned int *size, Elf_Shdr *sechdr) +static long get_offset(struct module *mod, unsigned int *size, + Elf_Shdr *sechdr, unsigned int section) { long ret; + *size += arch_mod_section_prepend(mod, section); ret = ALIGN(*size, sechdr->sh_addralign ?: 1); *size = ret + sechdr->sh_size; return ret; @@ -1622,7 +1632,7 @@ static void layout_sections(struct module *mod, || strncmp(secstrings + s->sh_name, ".init", 5) == 0) continue; - s->sh_entsize = get_offset(&mod->core_size, s); + s->sh_entsize = get_offset(mod, &mod->core_size, s, i); DEBUGP("\t%s\n", secstrings + s->sh_name); } if (m == 0) @@ -1640,7 +1650,7 @@ static void layout_sections(struct module *mod, || strncmp(secstrings + s->sh_name, ".init", 5) != 0) continue; - s->sh_entsize = (get_offset(&mod->init_size, s) + s->sh_entsize = (get_offset(mod, &mod->init_size, s, i) | INIT_OFFSET_MASK); DEBUGP("\t%s\n", secstrings + s->sh_name); } -- cgit v1.2.3 From 9e01892c4234070bbcf3a9f582514c8b91464375 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 22 Dec 2008 12:36:31 +0100 Subject: module: convert to stop_machine_create/destroy. The module code relies on a non-failing stop_machine call. So we create the kstop threads in advance and with that make sure the call won't fail. Signed-off-by: Heiko Carstens Signed-off-by: Rusty Russell --- kernel/module.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 4299aefc20b8..f47cce910f25 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -757,8 +757,16 @@ sys_delete_module(const char __user *name_user, unsigned int flags) return -EFAULT; name[MODULE_NAME_LEN-1] = '\0'; - if (mutex_lock_interruptible(&module_mutex) != 0) - return -EINTR; + /* Create stop_machine threads since free_module relies on + * a non-failing stop_machine call. */ + ret = stop_machine_create(); + if (ret) + return ret; + + if (mutex_lock_interruptible(&module_mutex) != 0) { + ret = -EINTR; + goto out_stop; + } mod = find_module(name); if (!mod) { @@ -817,6 +825,8 @@ sys_delete_module(const char __user *name_user, unsigned int flags) out: mutex_unlock(&module_mutex); +out_stop: + stop_machine_destroy(); return ret; } @@ -1875,6 +1885,13 @@ static noinline struct module *load_module(void __user *umod, /* vmalloc barfs on "unusual" numbers. Check here */ if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL) return ERR_PTR(-ENOMEM); + + /* Create stop_machine threads since the error path relies on + * a non-failing stop_machine call. */ + err = stop_machine_create(); + if (err) + goto free_hdr; + if (copy_from_user(hdr, umod, len) != 0) { err = -EFAULT; goto free_hdr; @@ -2258,6 +2275,7 @@ static noinline struct module *load_module(void __user *umod, /* Get rid of temporary copy */ vfree(hdr); + stop_machine_destroy(); /* Done! */ return mod; @@ -2280,6 +2298,7 @@ static noinline struct module *load_module(void __user *umod, kfree(args); free_hdr: vfree(hdr); + stop_machine_destroy(); return ERR_PTR(err); truncated: -- cgit v1.2.3 From f1883f86dea84fe47a71a39fc1afccc005915ed8 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Jan 2009 14:40:45 -0800 Subject: Remove remaining unwinder code Signed-off-by: Alexey Dobriyan Cc: Gabor Gombas Cc: Jan Beulich Cc: Andi Kleen Cc: Ingo Molnar , Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/unwind.h | 13 --------- arch/x86/kernel/traps.c | 2 -- include/linux/module.h | 3 -- include/linux/unwind.h | 68 ------------------------------------------- init/main.c | 3 -- kernel/module.c | 15 ---------- lib/fault-inject.c | 1 - 7 files changed, 105 deletions(-) delete mode 100644 arch/x86/include/asm/unwind.h delete mode 100644 include/linux/unwind.h (limited to 'kernel/module.c') diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h deleted file mode 100644 index 8b064bd9c553..000000000000 --- a/arch/x86/include/asm/unwind.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef _ASM_X86_UNWIND_H -#define _ASM_X86_UNWIND_H - -#define UNW_PC(frame) ((void)(frame), 0UL) -#define UNW_SP(frame) ((void)(frame), 0UL) -#define UNW_FP(frame) ((void)(frame), 0UL) - -static inline int arch_unw_user_mode(const void *info) -{ - return 0; -} - -#endif /* _ASM_X86_UNWIND_H */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ce6650eb64e9..c9a666cdd3db 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -51,7 +50,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/module.h b/include/linux/module.h index 3bfed013350b..03cb93d1865a 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -294,9 +294,6 @@ struct module /* The size of the executable code in each section. */ unsigned int init_text_size, core_text_size; - /* The handle returned from unwind_add_table. */ - void *unwind_info; - /* Arch-specific module values */ struct mod_arch_specific arch; diff --git a/include/linux/unwind.h b/include/linux/unwind.h deleted file mode 100644 index 7760860fa170..000000000000 --- a/include/linux/unwind.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef _LINUX_UNWIND_H -#define _LINUX_UNWIND_H - -/* - * Copyright (C) 2002-2006 Novell, Inc. - * Jan Beulich - * This code is released under version 2 of the GNU GPL. - * - * A simple API for unwinding kernel stacks. This is used for - * debugging and error reporting purposes. The kernel doesn't need - * full-blown stack unwinding with all the bells and whistles, so there - * is not much point in implementing the full Dwarf2 unwind API. - */ - -struct module; - -struct unwind_frame_info {}; - -static inline void unwind_init(void) {} -static inline void unwind_setup(void) {} - -#ifdef CONFIG_MODULES - -static inline void *unwind_add_table(struct module *mod, - const void *table_start, - unsigned long table_size) -{ - return NULL; -} - -static inline void unwind_remove_table(void *handle, int init_only) -{ -} - -#endif - -static inline int unwind_init_frame_info(struct unwind_frame_info *info, - struct task_struct *tsk, - const struct pt_regs *regs) -{ - return -ENOSYS; -} - -static inline int unwind_init_blocked(struct unwind_frame_info *info, - struct task_struct *tsk) -{ - return -ENOSYS; -} - -static inline int unwind_init_running(struct unwind_frame_info *info, - asmlinkage int (*cb)(struct unwind_frame_info *, - void *arg), - void *arg) -{ - return -ENOSYS; -} - -static inline int unwind(struct unwind_frame_info *info) -{ - return -ENOSYS; -} - -static inline int unwind_to_user(struct unwind_frame_info *info) -{ - return -ENOSYS; -} - -#endif /* _LINUX_UNWIND_H */ diff --git a/init/main.c b/init/main.c index 90926dadc20d..e119dd28dd7d 100644 --- a/init/main.c +++ b/init/main.c @@ -50,7 +50,6 @@ #include #include #include -#include #include #include #include @@ -537,7 +536,6 @@ asmlinkage void __init start_kernel(void) * Need to run as early as possible, to initialize the * lockdep hash: */ - unwind_init(); lockdep_init(); debug_objects_early_init(); cgroup_init_early(); @@ -559,7 +557,6 @@ asmlinkage void __init start_kernel(void) setup_arch(&command_line); mm_init_owner(&init_mm, &init_task); setup_command_line(command_line); - unwind_setup(); setup_per_cpu_areas(); setup_nr_cpu_ids(); smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ diff --git a/kernel/module.c b/kernel/module.c index f47cce910f25..34b56cf06615 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include #include @@ -1449,8 +1448,6 @@ static void free_module(struct module *mod) remove_sect_attrs(mod); mod_kobject_remove(mod); - unwind_remove_table(mod->unwind_info, 0); - /* Arch-specific cleanup. */ module_arch_cleanup(mod); @@ -1867,7 +1864,6 @@ static noinline struct module *load_module(void __user *umod, unsigned int symindex = 0; unsigned int strindex = 0; unsigned int modindex, versindex, infoindex, pcpuindex; - unsigned int unwindex = 0; unsigned int num_kp, num_mcount; struct kernel_param *kp; struct module *mod; @@ -1957,9 +1953,6 @@ static noinline struct module *load_module(void __user *umod, versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); -#ifdef ARCH_UNWIND_SECTION_NAME - unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME); -#endif /* Don't keep modinfo and version sections. */ sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; @@ -1969,8 +1962,6 @@ static noinline struct module *load_module(void __user *umod, sechdrs[symindex].sh_flags |= SHF_ALLOC; sechdrs[strindex].sh_flags |= SHF_ALLOC; #endif - if (unwindex) - sechdrs[unwindex].sh_flags |= SHF_ALLOC; /* Check module struct version now, before we try to use module. */ if (!check_modstruct_version(sechdrs, versindex, mod)) { @@ -2267,11 +2258,6 @@ static noinline struct module *load_module(void __user *umod, add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs); - /* Size of section 0 is 0, so this works well if no unwind info. */ - mod->unwind_info = unwind_add_table(mod, - (void *)sechdrs[unwindex].sh_addr, - sechdrs[unwindex].sh_size); - /* Get rid of temporary copy */ vfree(hdr); @@ -2370,7 +2356,6 @@ sys_init_module(void __user *umod, mutex_lock(&module_mutex); /* Drop initial reference. */ module_put(mod); - unwind_remove_table(mod->unwind_info, 1); module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; diff --git a/lib/fault-inject.c b/lib/fault-inject.c index a50a311554cc..f97af55bdd96 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From a06f6211ef9b1785922f9d0e8766d63ac4e66de1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 6 Jan 2009 14:41:49 -0800 Subject: module: add within_module_core() and within_module_init() This series of patches allows kprobes to probe module's __init and __exit functions. This means, you can probe driver initialization and terminating. Currently, kprobes can't probe __init function because these functions are freed after module initialization. And it also can't probe module __exit functions because kprobe increments reference count of target module and user can't unload it. this means __exit functions never be called unless removing probes from the module. To solve both cases, this series of patches introduces GONE flag and sets it when the target code is freed(for this purpose, kprobes hooks MODULE_STATE_* events). This also removes refcount incrementing for allowing user to unload target module. Users can check which probes are GONE by debugfs interface. For taking timing of freeing module's .init text, these also include a patch which adds module's notifier of MODULE_STATE_LIVE event. This patch: Add within_module_core() and within_module_init() for checking whether an address is in the module .init.text section or .text section, and replace within() local inline functions in kernel/module.c with them. kprobes uses these functions to check where the kprobe is inserted. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Acked-by: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/module.h | 12 ++++++++++++ kernel/module.c | 16 ++++++++-------- 2 files changed, 20 insertions(+), 8 deletions(-) (limited to 'kernel/module.c') diff --git a/include/linux/module.h b/include/linux/module.h index 03cb93d1865a..4f7ea12463d3 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -365,6 +365,18 @@ struct module *module_text_address(unsigned long addr); struct module *__module_text_address(unsigned long addr); int is_module_address(unsigned long addr); +static inline int within_module_core(unsigned long addr, struct module *mod) +{ + return (unsigned long)mod->module_core <= addr && + addr < (unsigned long)mod->module_core + mod->core_size; +} + +static inline int within_module_init(unsigned long addr, struct module *mod) +{ + return (unsigned long)mod->module_init <= addr && + addr < (unsigned long)mod->module_init + mod->init_size; +} + /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if symnum out of range. */ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, diff --git a/kernel/module.c b/kernel/module.c index 34b56cf06615..cc79c942c572 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2390,7 +2390,7 @@ static const char *get_ksymbol(struct module *mod, unsigned long nextval; /* At worse, next value is at end of module */ - if (within(addr, mod->module_init, mod->init_size)) + if (within_module_init(addr, mod)) nextval = (unsigned long)mod->module_init+mod->init_text_size; else nextval = (unsigned long)mod->module_core+mod->core_text_size; @@ -2438,8 +2438,8 @@ const char *module_address_lookup(unsigned long addr, preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { - if (within(addr, mod->module_init, mod->init_size) - || within(addr, mod->module_core, mod->core_size)) { + if (within_module_init(addr, mod) || + within_module_core(addr, mod)) { if (modname) *modname = mod->name; ret = get_ksymbol(mod, addr, size, offset); @@ -2461,8 +2461,8 @@ int lookup_module_symbol_name(unsigned long addr, char *symname) preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { - if (within(addr, mod->module_init, mod->init_size) || - within(addr, mod->module_core, mod->core_size)) { + if (within_module_init(addr, mod) || + within_module_core(addr, mod)) { const char *sym; sym = get_ksymbol(mod, addr, NULL, NULL); @@ -2485,8 +2485,8 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { - if (within(addr, mod->module_init, mod->init_size) || - within(addr, mod->module_core, mod->core_size)) { + if (within_module_init(addr, mod) || + within_module_core(addr, mod)) { const char *sym; sym = get_ksymbol(mod, addr, size, offset); @@ -2705,7 +2705,7 @@ int is_module_address(unsigned long addr) preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { - if (within(addr, mod->module_core, mod->core_size)) { + if (within_module_core(addr, mod)) { preempt_enable(); return 1; } -- cgit v1.2.3 From 0deddf436a37c18ceb26c6e3b632fb9b5f58a0c1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 6 Jan 2009 14:41:54 -0800 Subject: module: add MODULE_STATE_LIVE notify Add a module notifier call which notifies that the state of a module changes from MODULE_STATE_COMING to MODULE_STATE_LIVE. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Acked-by: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/module.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index cc79c942c572..496dcb57b608 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2352,6 +2352,8 @@ sys_init_module(void __user *umod, /* Now it's a first class citizen! Wake up anyone waiting for it. */ mod->state = MODULE_STATE_LIVE; wake_up(&module_wq); + blocking_notifier_call_chain(&module_notify_list, + MODULE_STATE_LIVE, mod); mutex_lock(&module_mutex); /* Drop initial reference. */ -- cgit v1.2.3 From 22a9d645677feefd402befd02edd59b122289ef1 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 7 Jan 2009 08:45:46 -0800 Subject: async: Asynchronous function calls to speed up kernel boot Right now, most of the kernel boot is strictly synchronous, such that various hardware delays are done sequentially. In order to make the kernel boot faster, this patch introduces infrastructure to allow doing some of the initialization steps asynchronously, which will hide significant portions of the hardware delays in practice. In order to not change device order and other similar observables, this patch does NOT do full parallel initialization. Rather, it operates more in the way an out of order CPU does; the work may be done out of order and asynchronous, but the observable effects (instruction retiring for the CPU) are still done in the original sequence. Signed-off-by: Arjan van de Ven --- include/linux/async.h | 25 ++++ init/do_mounts.c | 2 + init/main.c | 5 +- kernel/Makefile | 3 +- kernel/async.c | 321 +++++++++++++++++++++++++++++++++++++++++++++++++ kernel/irq/autoprobe.c | 5 + kernel/module.c | 2 + 7 files changed, 361 insertions(+), 2 deletions(-) create mode 100644 include/linux/async.h create mode 100644 kernel/async.c (limited to 'kernel/module.c') diff --git a/include/linux/async.h b/include/linux/async.h new file mode 100644 index 000000000000..c4ecacd0b327 --- /dev/null +++ b/include/linux/async.h @@ -0,0 +1,25 @@ +/* + * async.h: Asynchronous function calls for boot performance + * + * (C) Copyright 2009 Intel Corporation + * Author: Arjan van de Ven + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include +#include + +typedef u64 async_cookie_t; +typedef void (async_func_ptr) (void *data, async_cookie_t cookie); + +extern async_cookie_t async_schedule(async_func_ptr *ptr, void *data); +extern async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *list); +extern void async_synchronize_full(void); +extern void async_synchronize_full_special(struct list_head *list); +extern void async_synchronize_cookie(async_cookie_t cookie); +extern void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *list); + diff --git a/init/do_mounts.c b/init/do_mounts.c index 5efca73b39f9..708105e163df 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -372,6 +373,7 @@ void __init prepare_namespace(void) /* wait for the known devices to complete their probing */ while (driver_probe_done() != 0) msleep(100); + async_synchronize_full(); md_run_setup(); diff --git a/init/main.c b/init/main.c index b5a892c68375..f66715d8a853 100644 --- a/init/main.c +++ b/init/main.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -684,7 +685,7 @@ asmlinkage void __init start_kernel(void) rest_init(); } -static int initcall_debug; +int initcall_debug; core_param(initcall_debug, initcall_debug, bool, 0644); int do_one_initcall(initcall_t fn) @@ -785,6 +786,8 @@ static void run_init_process(char *init_filename) */ static noinline int init_post(void) { + /* need to finish all async __init code before freeing the memory */ + async_synchronize_full(); free_initmem(); unlock_kernel(); mark_rodata_ro(); diff --git a/kernel/Makefile b/kernel/Makefile index e1c5bf3365c0..2921d90ce32f 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,7 +9,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ - notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o + notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ + async.o ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files diff --git a/kernel/async.c b/kernel/async.c new file mode 100644 index 000000000000..afaa8a653d5a --- /dev/null +++ b/kernel/async.c @@ -0,0 +1,321 @@ +/* + * async.c: Asynchronous function calls for boot performance + * + * (C) Copyright 2009 Intel Corporation + * Author: Arjan van de Ven + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + + +/* + +Goals and Theory of Operation + +The primary goal of this feature is to reduce the kernel boot time, +by doing various independent hardware delays and discovery operations +decoupled and not strictly serialized. + +More specifically, the asynchronous function call concept allows +certain operations (primarily during system boot) to happen +asynchronously, out of order, while these operations still +have their externally visible parts happen sequentially and in-order. +(not unlike how out-of-order CPUs retire their instructions in order) + +Key to the asynchronous function call implementation is the concept of +a "sequence cookie" (which, although it has an abstracted type, can be +thought of as a monotonically incrementing number). + +The async core will assign each scheduled event such a sequence cookie and +pass this to the called functions. + +The asynchronously called function should before doing a globally visible +operation, such as registering device numbers, call the +async_synchronize_cookie() function and pass in its own cookie. The +async_synchronize_cookie() function will make sure that all asynchronous +operations that were scheduled prior to the operation corresponding with the +cookie have completed. + +Subsystem/driver initialization code that scheduled asynchronous probe +functions, but which shares global resources with other drivers/subsystems +that do not use the asynchronous call feature, need to do a full +synchronization with the async_synchronize_full() function, before returning +from their init function. This is to maintain strict ordering between the +asynchronous and synchronous parts of the kernel. + +*/ + +#include +#include +#include +#include +#include +#include +#include + +static async_cookie_t next_cookie = 1; + +#define MAX_THREADS 256 +#define MAX_WORK 32768 + +static LIST_HEAD(async_pending); +static LIST_HEAD(async_running); +static DEFINE_SPINLOCK(async_lock); + +struct async_entry { + struct list_head list; + async_cookie_t cookie; + async_func_ptr *func; + void *data; + struct list_head *running; +}; + +static DECLARE_WAIT_QUEUE_HEAD(async_done); +static DECLARE_WAIT_QUEUE_HEAD(async_new); + +static atomic_t entry_count; +static atomic_t thread_count; + +extern int initcall_debug; + + +/* + * MUST be called with the lock held! + */ +static async_cookie_t __lowest_in_progress(struct list_head *running) +{ + struct async_entry *entry; + if (!list_empty(&async_pending)) { + entry = list_first_entry(&async_pending, + struct async_entry, list); + return entry->cookie; + } else if (!list_empty(running)) { + entry = list_first_entry(running, + struct async_entry, list); + return entry->cookie; + } else { + /* nothing in progress... next_cookie is "infinity" */ + return next_cookie; + } + +} +/* + * pick the first pending entry and run it + */ +static void run_one_entry(void) +{ + unsigned long flags; + struct async_entry *entry; + ktime_t calltime, delta, rettime; + + /* 1) pick one task from the pending queue */ + + spin_lock_irqsave(&async_lock, flags); + if (list_empty(&async_pending)) + goto out; + entry = list_first_entry(&async_pending, struct async_entry, list); + + /* 2) move it to the running queue */ + list_del(&entry->list); + list_add_tail(&entry->list, &async_running); + spin_unlock_irqrestore(&async_lock, flags); + + /* 3) run it (and print duration)*/ + if (initcall_debug) { + printk("calling %lli_%pF @ %i\n", entry->cookie, entry->func, task_pid_nr(current)); + calltime = ktime_get(); + } + entry->func(entry->data, entry->cookie); + if (initcall_debug) { + rettime = ktime_get(); + delta = ktime_sub(rettime, calltime); + printk("initcall %lli_%pF returned 0 after %lld usecs\n", entry->cookie, + entry->func, ktime_to_ns(delta) >> 10); + } + + /* 4) remove it from the running queue */ + spin_lock_irqsave(&async_lock, flags); + list_del(&entry->list); + + /* 5) free the entry */ + kfree(entry); + atomic_dec(&entry_count); + + spin_unlock_irqrestore(&async_lock, flags); + + /* 6) wake up any waiters. */ + wake_up(&async_done); + return; + +out: + spin_unlock_irqrestore(&async_lock, flags); +} + + +static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct list_head *running) +{ + struct async_entry *entry; + unsigned long flags; + async_cookie_t newcookie; + + + /* allow irq-off callers */ + entry = kzalloc(sizeof(struct async_entry), GFP_ATOMIC); + + /* + * If we're out of memory or if there's too much work + * pending already, we execute synchronously. + */ + if (!entry || atomic_read(&entry_count) > MAX_WORK) { + kfree(entry); + spin_lock_irqsave(&async_lock, flags); + newcookie = next_cookie++; + spin_unlock_irqrestore(&async_lock, flags); + + /* low on memory.. run synchronously */ + ptr(data, newcookie); + return newcookie; + } + entry->func = ptr; + entry->data = data; + entry->running = running; + + spin_lock_irqsave(&async_lock, flags); + newcookie = entry->cookie = next_cookie++; + list_add_tail(&entry->list, &async_pending); + atomic_inc(&entry_count); + spin_unlock_irqrestore(&async_lock, flags); + wake_up(&async_new); + return newcookie; +} + +async_cookie_t async_schedule(async_func_ptr *ptr, void *data) +{ + return __async_schedule(ptr, data, &async_pending); +} +EXPORT_SYMBOL_GPL(async_schedule); + +async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *running) +{ + return __async_schedule(ptr, data, running); +} +EXPORT_SYMBOL_GPL(async_schedule_special); + +void async_synchronize_full(void) +{ + async_synchronize_cookie(next_cookie); +} +EXPORT_SYMBOL_GPL(async_synchronize_full); + +void async_synchronize_full_special(struct list_head *list) +{ + async_synchronize_cookie_special(next_cookie, list); +} +EXPORT_SYMBOL_GPL(async_synchronize_full_special); + +void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *running) +{ + ktime_t starttime, delta, endtime; + + if (initcall_debug) { + printk("async_waiting @ %i\n", task_pid_nr(current)); + starttime = ktime_get(); + } + + wait_event(async_done, __lowest_in_progress(running) >= cookie); + + if (initcall_debug) { + endtime = ktime_get(); + delta = ktime_sub(endtime, starttime); + + printk("async_continuing @ %i after %lli usec\n", + task_pid_nr(current), ktime_to_ns(delta) >> 10); + } +} +EXPORT_SYMBOL_GPL(async_synchronize_cookie_special); + +void async_synchronize_cookie(async_cookie_t cookie) +{ + async_synchronize_cookie_special(cookie, &async_running); +} +EXPORT_SYMBOL_GPL(async_synchronize_cookie); + + +static int async_thread(void *unused) +{ + DECLARE_WAITQUEUE(wq, current); + add_wait_queue(&async_new, &wq); + + while (!kthread_should_stop()) { + int ret = HZ; + set_current_state(TASK_INTERRUPTIBLE); + /* + * check the list head without lock.. false positives + * are dealt with inside run_one_entry() while holding + * the lock. + */ + rmb(); + if (!list_empty(&async_pending)) + run_one_entry(); + else + ret = schedule_timeout(HZ); + + if (ret == 0) { + /* + * we timed out, this means we as thread are redundant. + * we sign off and die, but we to avoid any races there + * is a last-straw check to see if work snuck in. + */ + atomic_dec(&thread_count); + wmb(); /* manager must see our departure first */ + if (list_empty(&async_pending)) + break; + /* + * woops work came in between us timing out and us + * signing off; we need to stay alive and keep working. + */ + atomic_inc(&thread_count); + } + } + remove_wait_queue(&async_new, &wq); + + return 0; +} + +static int async_manager_thread(void *unused) +{ + DECLARE_WAITQUEUE(wq, current); + add_wait_queue(&async_new, &wq); + + while (!kthread_should_stop()) { + int tc, ec; + + set_current_state(TASK_INTERRUPTIBLE); + + tc = atomic_read(&thread_count); + rmb(); + ec = atomic_read(&entry_count); + + while (tc < ec && tc < MAX_THREADS) { + kthread_run(async_thread, NULL, "async/%i", tc); + atomic_inc(&thread_count); + tc++; + } + + schedule(); + } + remove_wait_queue(&async_new, &wq); + + return 0; +} + +static int __init async_init(void) +{ + kthread_run(async_manager_thread, NULL, "async/mgr"); + return 0; +} + +core_initcall(async_init); diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c index cc0f7321b8ce..1de9700f416e 100644 --- a/kernel/irq/autoprobe.c +++ b/kernel/irq/autoprobe.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "internals.h" @@ -34,6 +35,10 @@ unsigned long probe_irq_on(void) unsigned int status; int i; + /* + * quiesce the kernel, or at least the asynchronous portion + */ + async_synchronize_full(); mutex_lock(&probing_active); /* * something may have generated an irq long ago and we want to diff --git a/kernel/module.c b/kernel/module.c index 496dcb57b608..c9332c90d5a0 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -50,6 +50,7 @@ #include #include #include +#include #if 0 #define DEBUGP printk @@ -816,6 +817,7 @@ sys_delete_module(const char __user *name_user, unsigned int flags) mod->exit(); blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING, mod); + async_synchronize_full(); mutex_lock(&module_mutex); /* Store the name of the last unloaded module for diagnostic purposes */ strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); -- cgit v1.2.3