From 6a4c264313c4ae32dc53821a9c57e0dc9696fb81 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 27 Aug 2014 06:21:23 +0930 Subject: module: rename KERNEL_PARAM_FL_NOARG to avoid confusion Make it clear this is about kernel_param_ops, not kernel_param (which will soon have a flags field of its own). No functional changes. Cc: Rusty Russell Cc: Jean Delvare Cc: Andrew Morton Cc: Li Zhong Cc: Jon Mason Cc: Daniel Vetter Signed-off-by: Jani Nikula Signed-off-by: Rusty Russell --- kernel/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 03214bd288e9..8a0dc91eddbc 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -135,7 +135,7 @@ static int param_set_bool_enable_only(const char *val, } static const struct kernel_param_ops param_ops_bool_enable_only = { - .flags = KERNEL_PARAM_FL_NOARG, + .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = param_set_bool_enable_only, .get = param_get_bool, }; -- cgit v1.2.3 From 6c34f1f5424395994c125f8c68bed395920ecc58 Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Tue, 16 Sep 2014 22:37:18 +0100 Subject: aarch64: filter $x from kallsyms Similar to ARM, AArch64 is generating $x and $d syms... which isn't terribly helpful when looking at %pF output and the like. Filter those out in kallsyms, modpost and when looking at module symbols. Seems simplest since none of these check EM_ARM anyway, to just add it to the strchr used, rather than trying to make things overly complicated. initcall_debug improves: dmesg_before.txt: initcall $x+0x0/0x154 [sg] returned 0 after 26331 usecs dmesg_after.txt: initcall init_sg+0x0/0x154 [sg] returned 0 after 15461 usecs Signed-off-by: Kyle McMartin Acked-by: Rusty Russell Signed-off-by: Catalin Marinas --- kernel/module.c | 2 +- scripts/kallsyms.c | 2 +- scripts/mod/modpost.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 03214bd288e9..3d52936031cc 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3388,7 +3388,7 @@ static inline int is_arm_mapping_symbol(const char *str) { if (str[0] == '.' && str[1] == 'L') return true; - return str[0] == '$' && strchr("atd", str[1]) + return str[0] == '$' && strchr("axtd", str[1]) && (str[2] == '\0' || str[2] == '.'); } diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index dc7aa45e80ce..c6d33bd15b04 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -84,7 +84,7 @@ static void usage(void) */ static inline int is_arm_mapping_symbol(const char *str) { - return str[0] == '$' && strchr("atd", str[1]) + return str[0] == '$' && strchr("axtd", str[1]) && (str[2] == '\0' || str[2] == '.'); } diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 091d90573b63..3017ec20e9f8 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1146,7 +1146,7 @@ static Elf_Sym *find_elf_symbol(struct elf_info *elf, Elf64_Sword addr, static inline int is_arm_mapping_symbol(const char *str) { - return str[0] == '$' && strchr("atd", str[1]) + return str[0] == '$' && strchr("axtd", str[1]) && (str[2] == '\0' || str[2] == '.'); } -- cgit v1.2.3 From d3051b489aa81ca9ba62af366149ef42b8dae97c Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Tue, 14 Oct 2014 02:51:39 +1030 Subject: modules, lock around setting of MODULE_STATE_UNFORMED A panic was seen in the following sitation. There are two threads running on the system. The first thread is a system monitoring thread that is reading /proc/modules. The second thread is loading and unloading a module (in this example I'm using my simple dummy-module.ko). Note, in the "real world" this occurred with the qlogic driver module. When doing this, the following panic occurred: ------------[ cut here ]------------ kernel BUG at kernel/module.c:3739! invalid opcode: 0000 [#1] SMP Modules linked in: binfmt_misc sg nfsv3 rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache intel_powerclamp coretemp kvm_intel kvm crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel lrw igb gf128mul glue_helper iTCO_wdt iTCO_vendor_support ablk_helper ptp sb_edac cryptd pps_core edac_core shpchp i2c_i801 pcspkr wmi lpc_ich ioatdma mfd_core dca ipmi_si nfsd ipmi_msghandler auth_rpcgss nfs_acl lockd sunrpc xfs libcrc32c sr_mod cdrom sd_mod crc_t10dif crct10dif_common mgag200 syscopyarea sysfillrect sysimgblt i2c_algo_bit drm_kms_helper ttm isci drm libsas ahci libahci scsi_transport_sas libata i2c_core dm_mirror dm_region_hash dm_log dm_mod [last unloaded: dummy_module] CPU: 37 PID: 186343 Comm: cat Tainted: GF O-------------- 3.10.0+ #7 Hardware name: Intel Corporation S2600CP/S2600CP, BIOS RMLSDP.86I.00.29.D696.1311111329 11/11/2013 task: ffff8807fd2d8000 ti: ffff88080fa7c000 task.ti: ffff88080fa7c000 RIP: 0010:[] [] module_flags+0xb5/0xc0 RSP: 0018:ffff88080fa7fe18 EFLAGS: 00010246 RAX: 0000000000000003 RBX: ffffffffa03b5200 RCX: 0000000000000000 RDX: 0000000000001000 RSI: ffff88080fa7fe38 RDI: ffffffffa03b5000 RBP: ffff88080fa7fe28 R08: 0000000000000010 R09: 0000000000000000 R10: 0000000000000000 R11: 000000000000000f R12: ffffffffa03b5000 R13: ffffffffa03b5008 R14: ffffffffa03b5200 R15: ffffffffa03b5000 FS: 00007f6ae57ef740(0000) GS:ffff88101e7a0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000404f70 CR3: 0000000ffed48000 CR4: 00000000001407e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffffffffa03b5200 ffff8810101e4800 ffff88080fa7fe70 ffffffff810d666c ffff88081e807300 000000002e0f2fbf 0000000000000000 ffff88100f257b00 ffffffffa03b5008 ffff88080fa7ff48 ffff8810101e4800 ffff88080fa7fee0 Call Trace: [] m_show+0x19c/0x1e0 [] seq_read+0x16e/0x3b0 [] proc_reg_read+0x3d/0x80 [] vfs_read+0x9c/0x170 [] SyS_read+0x58/0xb0 [] system_call_fastpath+0x16/0x1b Code: 48 63 c2 83 c2 01 c6 04 03 29 48 63 d2 eb d9 0f 1f 80 00 00 00 00 48 63 d2 c6 04 13 2d 41 8b 0c 24 8d 50 02 83 f9 01 75 b2 eb cb <0f> 0b 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41 RIP [] module_flags+0xb5/0xc0 RSP Consider the two processes running on the system. CPU 0 (/proc/modules reader) CPU 1 (loading/unloading module) CPU 0 opens /proc/modules, and starts displaying data for each module by traversing the modules list via fs/seq_file.c:seq_open() and fs/seq_file.c:seq_read(). For each module in the modules list, seq_read does op->start() <-- this is a pointer to m_start() op->show() <- this is a pointer to m_show() op->stop() <-- this is a pointer to m_stop() The m_start(), m_show(), and m_stop() module functions are defined in kernel/module.c. The m_start() and m_stop() functions acquire and release the module_mutex respectively. ie) When reading /proc/modules, the module_mutex is acquired and released for each module. m_show() is called with the module_mutex held. It accesses the module struct data and attempts to write out module data. It is in this code path that the above BUG_ON() warning is encountered, specifically m_show() calls static char *module_flags(struct module *mod, char *buf) { int bx = 0; BUG_ON(mod->state == MODULE_STATE_UNFORMED); ... The other thread, CPU 1, in unloading the module calls the syscall delete_module() defined in kernel/module.c. The module_mutex is acquired for a short time, and then released. free_module() is called without the module_mutex. free_module() then sets mod->state = MODULE_STATE_UNFORMED, also without the module_mutex. Some additional code is called and then the module_mutex is reacquired to remove the module from the modules list: /* Now we can delete it from the lists */ mutex_lock(&module_mutex); stop_machine(__unlink_module, mod, NULL); mutex_unlock(&module_mutex); This is the sequence of events that leads to the panic. CPU 1 is removing dummy_module via delete_module(). It acquires the module_mutex, and then releases it. CPU 1 has NOT set dummy_module->state to MODULE_STATE_UNFORMED yet. CPU 0, which is reading the /proc/modules, acquires the module_mutex and acquires a pointer to the dummy_module which is still in the modules list. CPU 0 calls m_show for dummy_module. The check in m_show() for MODULE_STATE_UNFORMED passed for dummy_module even though it is being torn down. Meanwhile CPU 1, which has been continuing to remove dummy_module without holding the module_mutex, now calls free_module() and sets dummy_module->state to MODULE_STATE_UNFORMED. CPU 0 now calls module_flags() with dummy_module and ... static char *module_flags(struct module *mod, char *buf) { int bx = 0; BUG_ON(mod->state == MODULE_STATE_UNFORMED); and BOOM. Acquire and release the module_mutex lock around the setting of MODULE_STATE_UNFORMED in the teardown path, which should resolve the problem. Testing: In the unpatched kernel I can panic the system within 1 minute by doing while (true) do insmod dummy_module.ko; rmmod dummy_module.ko; done and while (true) do cat /proc/modules; done in separate terminals. In the patched kernel I was able to run just over one hour without seeing any issues. I also verified the output of panic via sysrq-c and the output of /proc/modules looks correct for all three states for the dummy_module. dummy_module 12661 0 - Unloading 0xffffffffa03a5000 (OE-) dummy_module 12661 0 - Live 0xffffffffa03bb000 (OE) dummy_module 14015 1 - Loading 0xffffffffa03a5000 (OE+) Signed-off-by: Prarit Bhargava Reviewed-by: Oleg Nesterov Signed-off-by: Rusty Russell Cc: stable@kernel.org --- kernel/module.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 8a0dc91eddbc..138b83e31bd5 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1842,7 +1842,9 @@ static void free_module(struct module *mod) /* We leave it in list to prevent duplicate loads, but make sure * that noone uses it while it's being deconstructed. */ + mutex_lock(&module_mutex); mod->state = MODULE_STATE_UNFORMED; + mutex_unlock(&module_mutex); /* Remove dynamic debug info */ ddebug_remove_module(mod->name); -- cgit v1.2.3 From 3c9b2c3d64a49f264422d7743599cf7f6535972d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Sep 2014 10:18:53 +0200 Subject: sched, modules: Fix nested sleep in add_unformed_module() This is a genuine bug in add_unformed_module(), we cannot use blocking primitives inside a wait loop. So rewrite the wait_event_interruptible() usage to use the fresh wait_woken() stuff. Reported-by: Fengguang Wu Signed-off-by: Peter Zijlstra (Intel) Cc: tglx@linutronix.de Cc: ilya.dryomov@inktank.com Cc: umgwanakikbuti@gmail.com Cc: Rusty Russell Cc: oleg@redhat.com Cc: Linus Torvalds Cc: Andrew Morton Cc: Greg Kroah-Hartman Link: http://lkml.kernel.org/r/20140924082242.458562904@infradead.org [ So this is probably complex to backport and the race wasn't reported AFAIK, so not marked for -stable. ] Signed-off-by: Ingo Molnar Signed-off-by: Ingo Molnar --- kernel/module.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 88cec1ddb1e3..e52a8739361a 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3096,6 +3096,32 @@ static int may_init_module(void) return 0; } +/* + * Can't use wait_event_interruptible() because our condition + * 'finished_loading()' contains a blocking primitive itself (mutex_lock). + */ +static int wait_finished_loading(struct module *mod) +{ + DEFINE_WAIT_FUNC(wait, woken_wake_function); + int ret = 0; + + add_wait_queue(&module_wq, &wait); + for (;;) { + if (finished_loading(mod->name)) + break; + + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); + } + remove_wait_queue(&module_wq, &wait); + + return ret; +} + /* * We try to place it in the list now to make sure it's unique before * we dedicate too many resources. In particular, temporary percpu @@ -3116,8 +3142,8 @@ again: || old->state == MODULE_STATE_UNFORMED) { /* Wait in case it fails to load. */ mutex_unlock(&module_mutex); - err = wait_event_interruptible(module_wq, - finished_loading(mod->name)); + + err = wait_finished_loading(mod); if (err) goto out_unlocked; goto again; -- cgit v1.2.3 From 4f48795b6154852d07d971e402c35ecc460ddcb6 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 10 Nov 2014 09:26:29 +1030 Subject: module: Wait for RCU synchronizing before releasing a module Wait for RCU synchronizing on failure path of module loading before releasing struct module, because the memory of mod->list can still be accessed by list walkers (e.g. kallsyms). Signed-off-by: Masami Hiramatsu Signed-off-by: Rusty Russell --- kernel/module.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 88cec1ddb1e3..331b03f6b411 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3326,6 +3326,8 @@ static int load_module(struct load_info *info, const char __user *uargs, /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); wake_up_all(&module_wq); + /* Wait for RCU synchronizing before releasing mod->list. */ + synchronize_rcu(); mutex_unlock(&module_mutex); free_module: module_deallocate(mod, info); -- cgit v1.2.3 From 461e34aed0550fee706a9a28fb453830b5079ea0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 10 Nov 2014 09:27:29 +1030 Subject: module: Unlink module with RCU synchronizing instead of stop_machine Unlink module from module list with RCU synchronizing instead of using stop_machine(). Since module list is already protected by rcu, we don't need stop_machine() anymore. Signed-off-by: Masami Hiramatsu Signed-off-by: Rusty Russell --- kernel/module.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 331b03f6b411..bed608b8c8a6 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1697,18 +1697,6 @@ static void mod_sysfs_teardown(struct module *mod) mod_sysfs_fini(mod); } -/* - * unlink the module with the whole machine is stopped with interrupts off - * - this defends against kallsyms not taking locks - */ -static int __unlink_module(void *_mod) -{ - struct module *mod = _mod; - list_del(&mod->list); - module_bug_cleanup(mod); - return 0; -} - #ifdef CONFIG_DEBUG_SET_MODULE_RONX /* * LKM RO/NX protection: protect module's text/ro-data @@ -1860,7 +1848,11 @@ static void free_module(struct module *mod) /* Now we can delete it from the lists */ mutex_lock(&module_mutex); - stop_machine(__unlink_module, mod, NULL); + /* Unlink carefully: kallsyms could be walking list. */ + list_del_rcu(&mod->list); + /* Wait for RCU synchronizing before releasing mod->list. */ + synchronize_rcu(); + module_bug_cleanup(mod); mutex_unlock(&module_mutex); /* This may be NULL, but that's OK */ -- cgit v1.2.3 From 0286b5ea125e58b4797747f688949c05394412e8 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 10 Nov 2014 09:28:29 +1030 Subject: lib/bug: Use RCU list ops for module_bug_list Actually since module_bug_list should be used in BUG context, we may not need this. But for someone who want to use this from normal context, this makes module_bug_list an RCU list. Signed-off-by: Masami Hiramatsu Signed-off-by: Rusty Russell --- kernel/module.c | 5 +++-- lib/bug.c | 20 ++++++++++++++------ 2 files changed, 17 insertions(+), 8 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index bed608b8c8a6..d596a306b0a1 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1850,9 +1850,10 @@ static void free_module(struct module *mod) mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); - /* Wait for RCU synchronizing before releasing mod->list. */ - synchronize_rcu(); + /* Remove this module from bug list, this uses list_del_rcu */ module_bug_cleanup(mod); + /* Wait for RCU synchronizing before releasing mod->list and buglist. */ + synchronize_rcu(); mutex_unlock(&module_mutex); /* This may be NULL, but that's OK */ diff --git a/lib/bug.c b/lib/bug.c index d1d7c7878900..0c3bd9552b6f 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -64,16 +64,22 @@ static LIST_HEAD(module_bug_list); static const struct bug_entry *module_find_bug(unsigned long bugaddr) { struct module *mod; + const struct bug_entry *bug = NULL; - list_for_each_entry(mod, &module_bug_list, bug_list) { - const struct bug_entry *bug = mod->bug_table; + rcu_read_lock(); + list_for_each_entry_rcu(mod, &module_bug_list, bug_list) { unsigned i; + bug = mod->bug_table; for (i = 0; i < mod->num_bugs; ++i, ++bug) if (bugaddr == bug_addr(bug)) - return bug; + goto out; } - return NULL; + bug = NULL; +out: + rcu_read_unlock(); + + return bug; } void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, @@ -99,13 +105,15 @@ void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, * Strictly speaking this should have a spinlock to protect against * traversals, but since we only traverse on BUG()s, a spinlock * could potentially lead to deadlock and thus be counter-productive. + * Thus, this uses RCU to safely manipulate the bug list, since BUG + * must run in non-interruptive state. */ - list_add(&mod->bug_list, &module_bug_list); + list_add_rcu(&mod->bug_list, &module_bug_list); } void module_bug_cleanup(struct module *mod) { - list_del(&mod->bug_list); + list_del_rcu(&mod->bug_list); } #else -- cgit v1.2.3 From 2f35c41f58a978dfa44ffa102249d556caa99eeb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 10 Nov 2014 09:29:29 +1030 Subject: module: Replace module_ref with atomic_t refcnt Replace module_ref per-cpu complex reference counter with an atomic_t simple refcnt. This is for code simplification. Signed-off-by: Masami Hiramatsu Signed-off-by: Rusty Russell --- include/linux/module.h | 16 +--------------- include/trace/events/module.h | 2 +- kernel/module.c | 39 +++++---------------------------------- 3 files changed, 7 insertions(+), 50 deletions(-) (limited to 'kernel/module.c') diff --git a/include/linux/module.h b/include/linux/module.h index 71f282a4e307..ebfb0e153c6a 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -210,20 +210,6 @@ enum module_state { MODULE_STATE_UNFORMED, /* Still setting it up. */ }; -/** - * struct module_ref - per cpu module reference counts - * @incs: number of module get on this cpu - * @decs: number of module put on this cpu - * - * We force an alignment on 8 or 16 bytes, so that alloc_percpu() - * put @incs/@decs in same cache line, with no extra memory cost, - * since alloc_percpu() is fine grained. - */ -struct module_ref { - unsigned long incs; - unsigned long decs; -} __attribute((aligned(2 * sizeof(unsigned long)))); - struct module { enum module_state state; @@ -367,7 +353,7 @@ struct module { /* Destruction function. */ void (*exit)(void); - struct module_ref __percpu *refptr; + atomic_t refcnt; #endif #ifdef CONFIG_CONSTRUCTORS diff --git a/include/trace/events/module.h b/include/trace/events/module.h index 7c5cbfe3fc49..81c4c183d348 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -80,7 +80,7 @@ DECLARE_EVENT_CLASS(module_refcnt, TP_fast_assign( __entry->ip = ip; - __entry->refcnt = __this_cpu_read(mod->refptr->incs) - __this_cpu_read(mod->refptr->decs); + __entry->refcnt = atomic_read(&mod->refcnt); __assign_str(name, mod->name); ), diff --git a/kernel/module.c b/kernel/module.c index d596a306b0a1..b1d485df5ac1 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -631,15 +631,11 @@ EXPORT_TRACEPOINT_SYMBOL(module_get); /* Init the unload section of the module. */ static int module_unload_init(struct module *mod) { - mod->refptr = alloc_percpu(struct module_ref); - if (!mod->refptr) - return -ENOMEM; - INIT_LIST_HEAD(&mod->source_list); INIT_LIST_HEAD(&mod->target_list); /* Hold reference count during initialization. */ - raw_cpu_write(mod->refptr->incs, 1); + atomic_set(&mod->refcnt, 1); return 0; } @@ -721,8 +717,6 @@ static void module_unload_free(struct module *mod) kfree(use); } mutex_unlock(&module_mutex); - - free_percpu(mod->refptr); } #ifdef CONFIG_MODULE_FORCE_UNLOAD @@ -772,28 +766,7 @@ static int try_stop_module(struct module *mod, int flags, int *forced) unsigned long module_refcount(struct module *mod) { - unsigned long incs = 0, decs = 0; - int cpu; - - for_each_possible_cpu(cpu) - decs += per_cpu_ptr(mod->refptr, cpu)->decs; - /* - * ensure the incs are added up after the decs. - * module_put ensures incs are visible before decs with smp_wmb. - * - * This 2-count scheme avoids the situation where the refcount - * for CPU0 is read, then CPU0 increments the module refcount, - * then CPU1 drops that refcount, then the refcount for CPU1 is - * read. We would record a decrement but not its corresponding - * increment so we would see a low count (disaster). - * - * Rare situation? But module_refcount can be preempted, and we - * might be tallying up 4096+ CPUs. So it is not impossible. - */ - smp_rmb(); - for_each_possible_cpu(cpu) - incs += per_cpu_ptr(mod->refptr, cpu)->incs; - return incs - decs; + return (unsigned long)atomic_read(&mod->refcnt); } EXPORT_SYMBOL(module_refcount); @@ -935,7 +908,7 @@ void __module_get(struct module *module) { if (module) { preempt_disable(); - __this_cpu_inc(module->refptr->incs); + atomic_inc(&module->refcnt); trace_module_get(module, _RET_IP_); preempt_enable(); } @@ -950,7 +923,7 @@ bool try_module_get(struct module *module) preempt_disable(); if (likely(module_is_live(module))) { - __this_cpu_inc(module->refptr->incs); + atomic_inc(&module->refcnt); trace_module_get(module, _RET_IP_); } else ret = false; @@ -965,9 +938,7 @@ void module_put(struct module *module) { if (module) { preempt_disable(); - smp_wmb(); /* see comment in module_refcount */ - __this_cpu_inc(module->refptr->decs); - + atomic_dec(&module->refcnt); trace_module_put(module, _RET_IP_); preempt_enable(); } -- cgit v1.2.3 From e513cc1c07e2ab93a4514eec9833e031df3e30bb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 10 Nov 2014 09:30:29 +1030 Subject: module: Remove stop_machine from module unloading Remove stop_machine from module unloading by adding new reference counting algorithm. This atomic refcounter works like a semaphore, it can get (be incremented) only when the counter is not 0. When loading a module, kmodule subsystem sets the counter MODULE_REF_BASE (= 1). And when unloading the module, it subtracts MODULE_REF_BASE from the counter. If no one refers the module, the refcounter becomes 0 and we can remove the module safely. If someone referes it, we try to recover the counter by adding MODULE_REF_BASE unless the counter becomes 0, because the referrer can put the module right before recovering. If the recovering is failed, we can get the 0 refcount and it never be incremented again, it can be removed safely too. Note that __module_get() forcibly gets the module refcounter, users should use try_module_get() instead of that. Signed-off-by: Masami Hiramatsu Signed-off-by: Rusty Russell --- kernel/module.c | 67 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 39 insertions(+), 28 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index b1d485df5ac1..e772595d73db 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -98,7 +97,7 @@ * 1) List of modules (also safely readable with preempt_disable), * 2) module_use links, * 3) module_addr_min/module_addr_max. - * (delete uses stop_machine/add uses RCU list operations). */ + * (delete and add uses RCU list operations). */ DEFINE_MUTEX(module_mutex); EXPORT_SYMBOL_GPL(module_mutex); static LIST_HEAD(modules); @@ -628,14 +627,23 @@ static char last_unloaded_module[MODULE_NAME_LEN+1]; EXPORT_TRACEPOINT_SYMBOL(module_get); +/* MODULE_REF_BASE is the base reference count by kmodule loader. */ +#define MODULE_REF_BASE 1 + /* Init the unload section of the module. */ static int module_unload_init(struct module *mod) { + /* + * Initialize reference counter to MODULE_REF_BASE. + * refcnt == 0 means module is going. + */ + atomic_set(&mod->refcnt, MODULE_REF_BASE); + INIT_LIST_HEAD(&mod->source_list); INIT_LIST_HEAD(&mod->target_list); /* Hold reference count during initialization. */ - atomic_set(&mod->refcnt, 1); + atomic_inc(&mod->refcnt); return 0; } @@ -734,39 +742,39 @@ static inline int try_force_unload(unsigned int flags) } #endif /* CONFIG_MODULE_FORCE_UNLOAD */ -struct stopref +/* Try to release refcount of module, 0 means success. */ +static int try_release_module_ref(struct module *mod) { - struct module *mod; - int flags; - int *forced; -}; + int ret; -/* Whole machine is stopped with interrupts off when this runs. */ -static int __try_stop_module(void *_sref) -{ - struct stopref *sref = _sref; + /* Try to decrement refcnt which we set at loading */ + ret = atomic_sub_return(MODULE_REF_BASE, &mod->refcnt); + BUG_ON(ret < 0); + if (ret) + /* Someone can put this right now, recover with checking */ + ret = atomic_add_unless(&mod->refcnt, MODULE_REF_BASE, 0); + + return ret; +} +static int try_stop_module(struct module *mod, int flags, int *forced) +{ /* If it's not unused, quit unless we're forcing. */ - if (module_refcount(sref->mod) != 0) { - if (!(*sref->forced = try_force_unload(sref->flags))) + if (try_release_module_ref(mod) != 0) { + *forced = try_force_unload(flags); + if (!(*forced)) return -EWOULDBLOCK; } /* Mark it as dying. */ - sref->mod->state = MODULE_STATE_GOING; - return 0; -} - -static int try_stop_module(struct module *mod, int flags, int *forced) -{ - struct stopref sref = { mod, flags, forced }; + mod->state = MODULE_STATE_GOING; - return stop_machine(__try_stop_module, &sref, NULL); + return 0; } unsigned long module_refcount(struct module *mod) { - return (unsigned long)atomic_read(&mod->refcnt); + return (unsigned long)atomic_read(&mod->refcnt) - MODULE_REF_BASE; } EXPORT_SYMBOL(module_refcount); @@ -921,11 +929,11 @@ bool try_module_get(struct module *module) if (module) { preempt_disable(); - - if (likely(module_is_live(module))) { - atomic_inc(&module->refcnt); + /* Note: here, we can fail to get a reference */ + if (likely(module_is_live(module) && + atomic_inc_not_zero(&module->refcnt) != 0)) trace_module_get(module, _RET_IP_); - } else + else ret = false; preempt_enable(); @@ -936,9 +944,12 @@ EXPORT_SYMBOL(try_module_get); void module_put(struct module *module) { + int ret; + if (module) { preempt_disable(); - atomic_dec(&module->refcnt); + ret = atomic_dec_if_positive(&module->refcnt); + WARN_ON(ret < 0); /* Failed to put refcount */ trace_module_put(module, _RET_IP_); preempt_enable(); } -- cgit v1.2.3 From 6da0b565150b32318757062bc75834113f0508d6 Mon Sep 17 00:00:00 2001 From: Ionut Alexa Date: Mon, 10 Nov 2014 09:31:29 +1030 Subject: kernel:module Fix coding style errors and warnings. Fixed codin style errors and warnings. Changes printk with print_debug/warn. Changed seq_printf to seq_puts. Signed-off-by: Ionut Alexa Signed-off-by: Rusty Russell (removed bogus KERN_DEFAULT conversion) --- kernel/module.c | 53 ++++++++++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 25 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index e772595d73db..381105b2aaae 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -157,13 +157,13 @@ static BLOCKING_NOTIFIER_HEAD(module_notify_list); * Protected by module_mutex. */ static unsigned long module_addr_min = -1UL, module_addr_max = 0; -int register_module_notifier(struct notifier_block * nb) +int register_module_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&module_notify_list, nb); } EXPORT_SYMBOL(register_module_notifier); -int unregister_module_notifier(struct notifier_block * nb) +int unregister_module_notifier(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&module_notify_list, nb); } @@ -858,8 +858,10 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod) seq_printf(m, " %lu ", module_refcount(mod)); - /* Always include a trailing , so userspace can differentiate - between this and the old multi-field proc format. */ + /* + * Always include a trailing , so userspace can differentiate + * between this and the old multi-field proc format. + */ list_for_each_entry(use, &mod->source_list, source_list) { printed_something = 1; seq_printf(m, "%s,", use->source->name); @@ -867,11 +869,11 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod) if (mod->init != NULL && mod->exit == NULL) { printed_something = 1; - seq_printf(m, "[permanent],"); + seq_puts(m, "[permanent],"); } if (!printed_something) - seq_printf(m, "-"); + seq_puts(m, "-"); } void __symbol_put(const char *symbol) @@ -960,7 +962,7 @@ EXPORT_SYMBOL(module_put); static inline void print_unload_info(struct seq_file *m, struct module *mod) { /* We don't know the usage count, or what modules are using. */ - seq_printf(m, " - -"); + seq_puts(m, " - -"); } static inline void module_unload_free(struct module *mod) @@ -1113,7 +1115,7 @@ static unsigned long maybe_relocated(unsigned long crc, static int check_version(Elf_Shdr *sechdrs, unsigned int versindex, const char *symname, - struct module *mod, + struct module *mod, const unsigned long *crc, const struct module *crc_owner) { @@ -1147,7 +1149,7 @@ static int check_version(Elf_Shdr *sechdrs, return 0; bad_version: - printk("%s: disagrees about version of symbol %s\n", + pr_warn("%s: disagrees about version of symbol %s\n", mod->name, symname); return 0; } @@ -1182,7 +1184,7 @@ static inline int same_magic(const char *amagic, const char *bmagic, static inline int check_version(Elf_Shdr *sechdrs, unsigned int versindex, const char *symname, - struct module *mod, + struct module *mod, const unsigned long *crc, const struct module *crc_owner) { @@ -1270,15 +1272,13 @@ static inline bool sect_empty(const Elf_Shdr *sect) return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0; } -struct module_sect_attr -{ +struct module_sect_attr { struct module_attribute mattr; char *name; unsigned long address; }; -struct module_sect_attrs -{ +struct module_sect_attrs { struct attribute_group grp; unsigned int nsections; struct module_sect_attr attrs[0]; @@ -1532,7 +1532,8 @@ static int module_add_modinfo_attrs(struct module *mod) (attr->test && attr->test(mod))) { memcpy(temp_attr, attr, sizeof(*temp_attr)); sysfs_attr_init(&temp_attr->attr); - error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr); + error = sysfs_create_file(&mod->mkobj.kobj, + &temp_attr->attr); ++temp_attr; } } @@ -1548,7 +1549,7 @@ static void module_remove_modinfo_attrs(struct module *mod) /* pick a field to test for end of list */ if (!attr->attr.name) break; - sysfs_remove_file(&mod->mkobj.kobj,&attr->attr); + sysfs_remove_file(&mod->mkobj.kobj, &attr->attr); if (attr->free) attr->free(mod); } @@ -1930,7 +1931,7 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) /* We compiled with -fno-common. These are not supposed to happen. */ pr_debug("Common symbol: %s\n", name); - printk("%s: please compile with -fno-common\n", + pr_warn("%s: please compile with -fno-common\n", mod->name); ret = -ENOEXEC; break; @@ -2234,7 +2235,7 @@ static char elf_type(const Elf_Sym *sym, const struct load_info *info) } static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, - unsigned int shnum) + unsigned int shnum) { const Elf_Shdr *sec; @@ -2710,7 +2711,7 @@ static int find_module_sections(struct module *mod, struct load_info *info) * This shouldn't happen with same compiler and binutils * building all parts of the module. */ - printk(KERN_WARNING "%s: has both .ctors and .init_array.\n", + pr_warn("%s: has both .ctors and .init_array.\n", mod->name); return -EINVAL; } @@ -2998,8 +2999,10 @@ static int do_init_module(struct module *mod) if (mod->init != NULL) ret = do_one_initcall(mod->init); if (ret < 0) { - /* Init routine failed: abort. Try to protect us from - buggy refcounters. */ + /* + * Init routine failed: abort. Try to protect us from + * buggy refcounters. + */ mod->state = MODULE_STATE_GOING; synchronize_sched(); module_put(mod); @@ -3151,7 +3154,7 @@ out: static int unknown_module_param_cb(char *param, char *val, const char *modname) { - /* Check for magic 'dyndbg' arg */ + /* Check for magic 'dyndbg' arg */ int ret = ddebug_dyndbg_module_param_cb(param, val, modname); if (ret != 0) pr_warn("%s: unknown parameter '%s' ignored\n", modname, param); @@ -3636,8 +3639,8 @@ static int m_show(struct seq_file *m, void *p) /* Informative for users. */ seq_printf(m, " %s", - mod->state == MODULE_STATE_GOING ? "Unloading": - mod->state == MODULE_STATE_COMING ? "Loading": + mod->state == MODULE_STATE_GOING ? "Unloading" : + mod->state == MODULE_STATE_COMING ? "Loading" : "Live"); /* Used by oprofile and other similar tools. */ seq_printf(m, " 0x%pK", mod->module_core); @@ -3646,7 +3649,7 @@ static int m_show(struct seq_file *m, void *p) if (mod->taints) seq_printf(m, " %s", module_flags(mod, buf)); - seq_printf(m, "\n"); + seq_puts(m, "\n"); return 0; } -- cgit v1.2.3