From 98a79d6a50181ca1ecf7400eda01d5dc1bc0dbf0 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 13 Dec 2008 21:19:41 +1030 Subject: cpumask: centralize cpu_online_map and cpu_possible_map Impact: cleanup Each SMP arch defines these themselves. Move them to a central location. Twists: 1) Some archs (m32, parisc, s390) set possible_map to all 1, so we add a CONFIG_INIT_ALL_POSSIBLE for this rather than break them. 2) mips and sparc32 '#define cpu_possible_map phys_cpu_present_map'. Those archs simply have phys_cpu_present_map replaced everywhere. 3) Alpha defined cpu_possible_map to cpu_present_map; this is tricky so I just manipulate them both in sync. 4) IA64, cris and m32r have gratuitous 'extern cpumask_t cpu_possible_map' declarations. Signed-off-by: Rusty Russell Reviewed-by: Grant Grundler Tested-by: Tony Luck Acked-by: Ingo Molnar Cc: Mike Travis Cc: ink@jurassic.park.msu.ru Cc: rmk@arm.linux.org.uk Cc: starvik@axis.com Cc: tony.luck@intel.com Cc: takata@linux-m32r.org Cc: ralf@linux-mips.org Cc: grundler@parisc-linux.org Cc: paulus@samba.org Cc: schwidefsky@de.ibm.com Cc: lethal@linux-sh.org Cc: wli@holomorphy.com Cc: davem@davemloft.net Cc: jdike@addtoit.com Cc: mingo@redhat.com --- kernel/cpu.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index 8ea32e8d68b0..bae131a1211b 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -24,19 +24,20 @@ cpumask_t cpu_present_map __read_mostly; EXPORT_SYMBOL(cpu_present_map); -#ifndef CONFIG_SMP - /* * Represents all cpu's that are currently online. */ -cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL; +cpumask_t cpu_online_map __read_mostly; EXPORT_SYMBOL(cpu_online_map); +#ifdef CONFIG_INIT_ALL_POSSIBLE cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; +#else +cpumask_t cpu_possible_map __read_mostly; +#endif EXPORT_SYMBOL(cpu_possible_map); -#else /* CONFIG_SMP */ - +#ifdef CONFIG_SMP /* Serializes the updates to cpu_online_map, cpu_present_map */ static DEFINE_MUTEX(cpu_add_remove_lock); -- cgit v1.2.3 From b3199c025d1646e25e7d1d640dd605db251dccf8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:05:14 +1030 Subject: cpumask: switch over to cpu_online/possible/active/present_mask: core Impact: cleanup This implements the obsolescent cpu_online_map in terms of cpu_online_mask, rather than the other way around. Same for the other maps. The documentation comments are also updated to refer to _mask rather than _map. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- include/linux/cpumask.h | 75 +++++++++++++++++++------------------------------ kernel/cpu.c | 49 +++++++++++++++----------------- 2 files changed, 52 insertions(+), 72 deletions(-) (limited to 'kernel/cpu.c') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index b5ad19a6f43f..db2341beca45 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -416,65 +416,54 @@ int __next_cpu_nr(int n, const cpumask_t *srcp); /* * The following particular system cpumasks and operations manage - * possible, present, active and online cpus. Each of them is a fixed size - * bitmap of size NR_CPUS. + * possible, present, active and online cpus. * - * #ifdef CONFIG_HOTPLUG_CPU - * cpu_possible_map - has bit 'cpu' set iff cpu is populatable - * cpu_present_map - has bit 'cpu' set iff cpu is populated - * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler - * cpu_active_map - has bit 'cpu' set iff cpu available to migration - * #else - * cpu_possible_map - has bit 'cpu' set iff cpu is populated - * cpu_present_map - copy of cpu_possible_map - * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler - * #endif + * cpu_possible_mask- has bit 'cpu' set iff cpu is populatable + * cpu_present_mask - has bit 'cpu' set iff cpu is populated + * cpu_online_mask - has bit 'cpu' set iff cpu available to scheduler + * cpu_active_mask - has bit 'cpu' set iff cpu available to migration * - * In either case, NR_CPUS is fixed at compile time, as the static - * size of these bitmaps. The cpu_possible_map is fixed at boot - * time, as the set of CPU id's that it is possible might ever - * be plugged in at anytime during the life of that system boot. - * The cpu_present_map is dynamic(*), representing which CPUs - * are currently plugged in. And cpu_online_map is the dynamic - * subset of cpu_present_map, indicating those CPUs available - * for scheduling. + * If !CONFIG_HOTPLUG_CPU, present == possible, and active == online. * - * If HOTPLUG is enabled, then cpu_possible_map is forced to have + * The cpu_possible_mask is fixed at boot time, as the set of CPU id's + * that it is possible might ever be plugged in at anytime during the + * life of that system boot. The cpu_present_mask is dynamic(*), + * representing which CPUs are currently plugged in. And + * cpu_online_mask is the dynamic subset of cpu_present_mask, + * indicating those CPUs available for scheduling. + * + * If HOTPLUG is enabled, then cpu_possible_mask is forced to have * all NR_CPUS bits set, otherwise it is just the set of CPUs that * ACPI reports present at boot. * - * If HOTPLUG is enabled, then cpu_present_map varies dynamically, + * If HOTPLUG is enabled, then cpu_present_mask varies dynamically, * depending on what ACPI reports as currently plugged in, otherwise - * cpu_present_map is just a copy of cpu_possible_map. + * cpu_present_mask is just a copy of cpu_possible_mask. * - * (*) Well, cpu_present_map is dynamic in the hotplug case. If not - * hotplug, it's a copy of cpu_possible_map, hence fixed at boot. + * (*) Well, cpu_present_mask is dynamic in the hotplug case. If not + * hotplug, it's a copy of cpu_possible_mask, hence fixed at boot. * * Subtleties: * 1) UP arch's (NR_CPUS == 1, CONFIG_SMP not defined) hardcode * assumption that their single CPU is online. The UP - * cpu_{online,possible,present}_maps are placebos. Changing them + * cpu_{online,possible,present}_masks are placebos. Changing them * will have no useful affect on the following num_*_cpus() * and cpu_*() macros in the UP case. This ugliness is a UP * optimization - don't waste any instructions or memory references * asking if you're online or how many CPUs there are if there is * only one CPU. - * 2) Most SMP arch's #define some of these maps to be some - * other map specific to that arch. Therefore, the following - * must be #define macros, not inlines. To see why, examine - * the assembly code produced by the following. Note that - * set1() writes phys_x_map, but set2() writes x_map: - * int x_map, phys_x_map; - * #define set1(a) x_map = a - * inline void set2(int a) { x_map = a; } - * #define x_map phys_x_map - * main(){ set1(3); set2(5); } */ -extern cpumask_t cpu_possible_map; -extern cpumask_t cpu_online_map; -extern cpumask_t cpu_present_map; -extern cpumask_t cpu_active_map; +extern const struct cpumask *const cpu_possible_mask; +extern const struct cpumask *const cpu_online_mask; +extern const struct cpumask *const cpu_present_mask; +extern const struct cpumask *const cpu_active_mask; + +/* These strip const, as traditionally they weren't const. */ +#define cpu_possible_map (*(cpumask_t *)cpu_possible_mask) +#define cpu_online_map (*(cpumask_t *)cpu_online_mask) +#define cpu_present_map (*(cpumask_t *)cpu_present_mask) +#define cpu_active_map (*(cpumask_t *)cpu_active_mask) #if NR_CPUS > 1 #define num_online_cpus() cpus_weight_nr(cpu_online_map) @@ -1058,12 +1047,6 @@ static inline void free_bootmem_cpumask_var(cpumask_var_t mask) } #endif /* CONFIG_CPUMASK_OFFSTACK */ -/* The pointer versions of the maps, these will become the primary versions. */ -#define cpu_possible_mask ((const struct cpumask *)&cpu_possible_map) -#define cpu_online_mask ((const struct cpumask *)&cpu_online_map) -#define cpu_present_mask ((const struct cpumask *)&cpu_present_map) -#define cpu_active_mask ((const struct cpumask *)&cpu_active_map) - /* It's common to want to use cpu_all_mask in struct member initializers, * so it has to refer to an address rather than a pointer. */ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); diff --git a/kernel/cpu.c b/kernel/cpu.c index bae131a1211b..3ddc509b19c5 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -15,30 +15,8 @@ #include #include -/* - * Represents all cpu's present in the system - * In systems capable of hotplug, this map could dynamically grow - * as new cpu's are detected in the system via any platform specific - * method, such as ACPI for e.g. - */ -cpumask_t cpu_present_map __read_mostly; -EXPORT_SYMBOL(cpu_present_map); - -/* - * Represents all cpu's that are currently online. - */ -cpumask_t cpu_online_map __read_mostly; -EXPORT_SYMBOL(cpu_online_map); - -#ifdef CONFIG_INIT_ALL_POSSIBLE -cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; -#else -cpumask_t cpu_possible_map __read_mostly; -#endif -EXPORT_SYMBOL(cpu_possible_map); - #ifdef CONFIG_SMP -/* Serializes the updates to cpu_online_map, cpu_present_map */ +/* Serializes the updates to cpu_online_mask, cpu_present_mask */ static DEFINE_MUTEX(cpu_add_remove_lock); static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain); @@ -65,8 +43,6 @@ void __init cpu_hotplug_init(void) cpu_hotplug.refcount = 0; } -cpumask_t cpu_active_map; - #ifdef CONFIG_HOTPLUG_CPU void get_online_cpus(void) @@ -97,7 +73,7 @@ EXPORT_SYMBOL_GPL(put_online_cpus); /* * The following two API's must be used when attempting - * to serialize the updates to cpu_online_map, cpu_present_map. + * to serialize the updates to cpu_online_mask, cpu_present_mask. */ void cpu_maps_update_begin(void) { @@ -503,3 +479,24 @@ EXPORT_SYMBOL_GPL(cpu_bit_bitmap); const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL; EXPORT_SYMBOL(cpu_all_bits); + +#ifdef CONFIG_INIT_ALL_POSSIBLE +static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly + = CPU_BITS_ALL; +#else +static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly; +#endif +const struct cpumask *const cpu_possible_mask = to_cpumask(cpu_possible_bits); +EXPORT_SYMBOL(cpu_possible_mask); + +static DECLARE_BITMAP(cpu_online_bits, CONFIG_NR_CPUS) __read_mostly; +const struct cpumask *const cpu_online_mask = to_cpumask(cpu_online_bits); +EXPORT_SYMBOL(cpu_online_mask); + +static DECLARE_BITMAP(cpu_present_bits, CONFIG_NR_CPUS) __read_mostly; +const struct cpumask *const cpu_present_mask = to_cpumask(cpu_present_bits); +EXPORT_SYMBOL(cpu_present_mask); + +static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly; +const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits); +EXPORT_SYMBOL(cpu_active_mask); -- cgit v1.2.3 From 3fa41520696fec2815e2d88fbcccdda77ba4d693 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:05:16 +1030 Subject: cpumask: make set_cpu_*/init_cpu_* out-of-line They're only for use in boot/cpu hotplug code anyway, and this avoids the use of deprecated cpu_*_map. Stephen Rothwell points out that gcc 4.2.4 (on powerpc at least) didn't like the cast away of const anyway: include/linux/cpumask.h: In function 'set_cpu_possible': include/linux/cpumask.h:1052: warning: passing argument 2 of 'cpumask_set_cpu' discards qualifiers from pointer target type So this kills two birds with one stone. Signed-off-by: Rusty Russell --- include/linux/cpumask.h | 53 +++++++------------------------------------------ kernel/cpu.c | 47 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 46 deletions(-) (limited to 'kernel/cpu.c') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index e62a67156c53..7c178a6baae3 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -1057,50 +1057,11 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); #define for_each_present_cpu(cpu) for_each_cpu((cpu), cpu_present_mask) /* Wrappers for arch boot code to manipulate normally-constant masks */ -static inline void set_cpu_possible(unsigned int cpu, bool possible) -{ - if (possible) - cpumask_set_cpu(cpu, &cpu_possible_map); - else - cpumask_clear_cpu(cpu, &cpu_possible_map); -} - -static inline void set_cpu_present(unsigned int cpu, bool present) -{ - if (present) - cpumask_set_cpu(cpu, &cpu_present_map); - else - cpumask_clear_cpu(cpu, &cpu_present_map); -} - -static inline void set_cpu_online(unsigned int cpu, bool online) -{ - if (online) - cpumask_set_cpu(cpu, &cpu_online_map); - else - cpumask_clear_cpu(cpu, &cpu_online_map); -} - -static inline void set_cpu_active(unsigned int cpu, bool active) -{ - if (active) - cpumask_set_cpu(cpu, &cpu_active_map); - else - cpumask_clear_cpu(cpu, &cpu_active_map); -} - -static inline void init_cpu_present(const struct cpumask *src) -{ - cpumask_copy(&cpu_present_map, src); -} - -static inline void init_cpu_possible(const struct cpumask *src) -{ - cpumask_copy(&cpu_possible_map, src); -} - -static inline void init_cpu_online(const struct cpumask *src) -{ - cpumask_copy(&cpu_online_map, src); -} +void set_cpu_possible(unsigned int cpu, bool possible); +void set_cpu_present(unsigned int cpu, bool present); +void set_cpu_online(unsigned int cpu, bool online); +void set_cpu_active(unsigned int cpu, bool active); +void init_cpu_present(const struct cpumask *src); +void init_cpu_possible(const struct cpumask *src); +void init_cpu_online(const struct cpumask *src); #endif /* __LINUX_CPUMASK_H */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 3ddc509b19c5..2c9f78f3a2fc 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -500,3 +500,50 @@ EXPORT_SYMBOL(cpu_present_mask); static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly; const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits); EXPORT_SYMBOL(cpu_active_mask); + +void set_cpu_possible(unsigned int cpu, bool possible) +{ + if (possible) + cpumask_set_cpu(cpu, to_cpumask(cpu_possible_bits)); + else + cpumask_clear_cpu(cpu, to_cpumask(cpu_possible_bits)); +} + +void set_cpu_present(unsigned int cpu, bool present) +{ + if (present) + cpumask_set_cpu(cpu, to_cpumask(cpu_present_bits)); + else + cpumask_clear_cpu(cpu, to_cpumask(cpu_present_bits)); +} + +void set_cpu_online(unsigned int cpu, bool online) +{ + if (online) + cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits)); + else + cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits)); +} + +void set_cpu_active(unsigned int cpu, bool active) +{ + if (active) + cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits)); + else + cpumask_clear_cpu(cpu, to_cpumask(cpu_active_bits)); +} + +void init_cpu_present(const struct cpumask *src) +{ + cpumask_copy(to_cpumask(cpu_present_bits), src); +} + +void init_cpu_possible(const struct cpumask *src) +{ + cpumask_copy(to_cpumask(cpu_possible_bits), src); +} + +void init_cpu_online(const struct cpumask *src) +{ + cpumask_copy(to_cpumask(cpu_online_bits), src); +} -- cgit v1.2.3 From e0b582ec56f1a1d8b30ebf340a7b91fb09f26c8c Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 1 Jan 2009 10:12:28 +1030 Subject: cpumask: convert kernel/cpu.c Impact: Reduce kernel stack and memory usage, use new cpumask API. Use cpumask_var_t for take_cpu_down() stack var, and frozen_cpus. Note that notify_cpu_starting() can be called before core_initcall allocates frozen_cpus, but the NULL check is optimized out by gcc for the CONFIG_CPUMASK_OFFSTACK=n case. Signed-off-by: Rusty Russell --- kernel/cpu.c | 48 +++++++++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 19 deletions(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index 2c9f78f3a2fc..47fff3b63cbf 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -194,7 +194,7 @@ static int __ref take_cpu_down(void *_param) static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) { int err, nr_calls = 0; - cpumask_t old_allowed, tmp; + cpumask_var_t old_allowed; void *hcpu = (void *)(long)cpu; unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; struct take_cpu_down_param tcd_param = { @@ -208,6 +208,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) if (!cpu_online(cpu)) return -EINVAL; + if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL)) + return -ENOMEM; + cpu_hotplug_begin(); err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls); @@ -222,13 +225,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) } /* Ensure that we are not runnable on dying cpu */ - old_allowed = current->cpus_allowed; - cpus_setall(tmp); - cpu_clear(cpu, tmp); - set_cpus_allowed_ptr(current, &tmp); - tmp = cpumask_of_cpu(cpu); + cpumask_copy(old_allowed, ¤t->cpus_allowed); + set_cpus_allowed_ptr(current, + cpumask_of(cpumask_any_but(cpu_online_mask, cpu))); - err = __stop_machine(take_cpu_down, &tcd_param, &tmp); + err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); if (err) { /* CPU didn't die: tell everyone. Can't complain. */ if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, @@ -254,7 +255,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) check_for_tasks(cpu); out_allowed: - set_cpus_allowed_ptr(current, &old_allowed); + set_cpus_allowed_ptr(current, old_allowed); out_release: cpu_hotplug_done(); if (!err) { @@ -262,6 +263,7 @@ out_release: hcpu) == NOTIFY_BAD) BUG(); } + free_cpumask_var(old_allowed); return err; } @@ -280,7 +282,7 @@ int __ref cpu_down(unsigned int cpu) /* * Make sure the all cpus did the reschedule and are not - * using stale version of the cpu_active_map. + * using stale version of the cpu_active_mask. * This is not strictly necessary becuase stop_machine() * that we run down the line already provides the required * synchronization. But it's really a side effect and we do not @@ -344,7 +346,7 @@ out_notify: int __cpuinit cpu_up(unsigned int cpu) { int err = 0; - if (!cpu_isset(cpu, cpu_possible_map)) { + if (!cpu_possible(cpu)) { printk(KERN_ERR "can't online cpu %d because it is not " "configured as may-hotadd at boot time\n", cpu); #if defined(CONFIG_IA64) || defined(CONFIG_X86_64) @@ -369,25 +371,25 @@ out: } #ifdef CONFIG_PM_SLEEP_SMP -static cpumask_t frozen_cpus; +static cpumask_var_t frozen_cpus; int disable_nonboot_cpus(void) { int cpu, first_cpu, error = 0; cpu_maps_update_begin(); - first_cpu = first_cpu(cpu_online_map); + first_cpu = cpumask_first(cpu_online_mask); /* We take down all of the non-boot CPUs in one shot to avoid races * with the userspace trying to use the CPU hotplug at the same time */ - cpus_clear(frozen_cpus); + cpumask_clear(frozen_cpus); printk("Disabling non-boot CPUs ...\n"); for_each_online_cpu(cpu) { if (cpu == first_cpu) continue; error = _cpu_down(cpu, 1); if (!error) { - cpu_set(cpu, frozen_cpus); + cpumask_set_cpu(cpu, frozen_cpus); printk("CPU%d is down\n", cpu); } else { printk(KERN_ERR "Error taking CPU%d down: %d\n", @@ -413,11 +415,11 @@ void __ref enable_nonboot_cpus(void) /* Allow everyone to use the CPU hotplug again */ cpu_maps_update_begin(); cpu_hotplug_disabled = 0; - if (cpus_empty(frozen_cpus)) + if (cpumask_empty(frozen_cpus)) goto out; printk("Enabling non-boot CPUs ...\n"); - for_each_cpu_mask_nr(cpu, frozen_cpus) { + for_each_cpu(cpu, frozen_cpus) { error = _cpu_up(cpu, 1); if (!error) { printk("CPU%d is up\n", cpu); @@ -425,10 +427,18 @@ void __ref enable_nonboot_cpus(void) } printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error); } - cpus_clear(frozen_cpus); + cpumask_clear(frozen_cpus); out: cpu_maps_update_done(); } + +static int alloc_frozen_cpus(void) +{ + if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO)) + return -ENOMEM; + return 0; +} +core_initcall(alloc_frozen_cpus); #endif /* CONFIG_PM_SLEEP_SMP */ /** @@ -444,7 +454,7 @@ void __cpuinit notify_cpu_starting(unsigned int cpu) unsigned long val = CPU_STARTING; #ifdef CONFIG_PM_SLEEP_SMP - if (cpu_isset(cpu, frozen_cpus)) + if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus)) val = CPU_STARTING_FROZEN; #endif /* CONFIG_PM_SLEEP_SMP */ raw_notifier_call_chain(&cpu_chain, val, (void *)(long)cpu); @@ -456,7 +466,7 @@ void __cpuinit notify_cpu_starting(unsigned int cpu) * cpu_bit_bitmap[] is a special, "compressed" data structure that * represents all NR_CPUS bits binary values of 1< Date: Mon, 22 Dec 2008 12:36:30 +0100 Subject: stop_machine: introduce stop_machine_create/destroy. Introduce stop_machine_create/destroy. With this interface subsystems that need a non-failing stop_machine environment can create the stop_machine machine threads before actually calling stop_machine. When the threads aren't needed anymore they can be killed with stop_machine_destroy again. When stop_machine gets called and the threads aren't present they will be created and destroyed automatically. This restores the old behaviour of stop_machine. This patch also converts cpu hotplug to the new interface since it is special: cpu_down calls __stop_machine instead of stop_machine. However the kstop threads will only be created when stop_machine gets called. Changing the code so that the threads would be created automatically on __stop_machine is currently not possible: when __stop_machine gets called we hold cpu_add_remove_lock, which is the same lock that create_rt_workqueue would take. So the workqueue needs to be created before the cpu hotplug code locks cpu_add_remove_lock. Signed-off-by: Heiko Carstens Signed-off-by: Rusty Russell --- include/linux/stop_machine.h | 22 ++++++++++++++++++ kernel/cpu.c | 6 ++++- kernel/stop_machine.c | 55 ++++++++++++++++++++++++++++++++++++-------- 3 files changed, 72 insertions(+), 11 deletions(-) (limited to 'kernel/cpu.c') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 74d59a641362..baba3a23a814 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -35,6 +35,24 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); * won't come or go while it's being called. Used by hotplug cpu. */ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); + +/** + * stop_machine_create: create all stop_machine threads + * + * Description: This causes all stop_machine threads to be created before + * stop_machine actually gets called. This can be used by subsystems that + * need a non failing stop_machine infrastructure. + */ +int stop_machine_create(void); + +/** + * stop_machine_destroy: destroy all stop_machine threads + * + * Description: This causes all stop_machine threads which were created with + * stop_machine_create to be destroyed again. + */ +void stop_machine_destroy(void); + #else static inline int stop_machine(int (*fn)(void *), void *data, @@ -46,5 +64,9 @@ static inline int stop_machine(int (*fn)(void *), void *data, local_irq_enable(); return ret; } + +static inline int stop_machine_create(void) { return 0; } +static inline void stop_machine_destroy(void) { } + #endif /* CONFIG_SMP */ #endif /* _LINUX_STOP_MACHINE */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 47fff3b63cbf..30e74dd6d01b 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -269,8 +269,11 @@ out_release: int __ref cpu_down(unsigned int cpu) { - int err = 0; + int err; + err = stop_machine_create(); + if (err) + return err; cpu_maps_update_begin(); if (cpu_hotplug_disabled) { @@ -297,6 +300,7 @@ int __ref cpu_down(unsigned int cpu) out: cpu_maps_update_done(); + stop_machine_destroy(); return err; } EXPORT_SYMBOL(cpu_down); diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 286c41722e8c..0cd415ee62a2 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -38,7 +38,10 @@ struct stop_machine_data { static unsigned int num_threads; static atomic_t thread_ack; static DEFINE_MUTEX(lock); - +/* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */ +static DEFINE_MUTEX(setup_lock); +/* Users of stop_machine. */ +static int refcount; static struct workqueue_struct *stop_machine_wq; static struct stop_machine_data active, idle; static const cpumask_t *active_cpus; @@ -109,6 +112,43 @@ static int chill(void *unused) return 0; } +int stop_machine_create(void) +{ + mutex_lock(&setup_lock); + if (refcount) + goto done; + stop_machine_wq = create_rt_workqueue("kstop"); + if (!stop_machine_wq) + goto err_out; + stop_machine_work = alloc_percpu(struct work_struct); + if (!stop_machine_work) + goto err_out; +done: + refcount++; + mutex_unlock(&setup_lock); + return 0; + +err_out: + if (stop_machine_wq) + destroy_workqueue(stop_machine_wq); + mutex_unlock(&setup_lock); + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(stop_machine_create); + +void stop_machine_destroy(void) +{ + mutex_lock(&setup_lock); + refcount--; + if (refcount) + goto done; + destroy_workqueue(stop_machine_wq); + free_percpu(stop_machine_work); +done: + mutex_unlock(&setup_lock); +} +EXPORT_SYMBOL_GPL(stop_machine_destroy); + int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) { struct work_struct *sm_work; @@ -146,19 +186,14 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) { int ret; + ret = stop_machine_create(); + if (ret) + return ret; /* No CPUs can come up or down during this. */ get_online_cpus(); ret = __stop_machine(fn, data, cpus); put_online_cpus(); - + stop_machine_destroy(); return ret; } EXPORT_SYMBOL_GPL(stop_machine); - -static int __init stop_machine_init(void) -{ - stop_machine_wq = create_rt_workqueue("kstop"); - stop_machine_work = alloc_percpu(struct work_struct); - return 0; -} -core_initcall(stop_machine_init); -- cgit v1.2.3 From a0e280e0f33f6c859a235fb69a875ed8f3420388 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 7 Jan 2009 16:19:46 +0100 Subject: stop_machine/cpu hotplug: fix disable_nonboot_cpus disable_nonboot_cpus calls _cpu_down. But _cpu_down requires that the caller already created the stop_machine workqueue (like cpu_down does). Otherwise a call to stop_machine will lead to accesses to random memory regions. When introducing this new interface (9ea09af3bd3090e8349ca2899ca2011bd94cda85 "stop_machine: introduce stop_machine_create/destroy") I missed the second call site of _cpu_down. So add the missing stop_machine_create/destroy calls to disable_nonboot_cpus as well. Fixes suspend-to-ram/disk and also this bug: [ 286.547348] BUG: unable to handle kernel paging request at 6b6b6b6b [ 286.548940] IP: [] __stop_machine+0x88/0xe3 [ 286.550598] Oops: 0002 [#1] SMP [ 286.560580] Pid: 3273, comm: halt Not tainted (2.6.28-06127-g238c6d5 [ 286.560580] EIP: is at __stop_machine+0x88/0xe3 [ 286.560580] Process halt (pid: 3273, ti=f1a28000 task=f4530f30 [ 286.560580] Call Trace: [ 286.560580] [] ? _cpu_down+0x10f/0x234 [ 286.560580] [] ? disable_nonboot_cpus+0x58/0xdc [ 286.560580] [] ? kernel_poweroff+0x22/0x39 [ 286.560580] [] ? sys_reboot+0xde/0x14c [ 286.560580] [] ? complete_signal+0x179/0x191 [ 286.560580] [] ? send_signal+0x1cc/0x1e1 [ 286.560580] [] ? _spin_unlock_irqrestore+0x2d/0x3c [ 286.560580] [] ? group_send_signal_info+0x58/0x61 [ 286.560580] [] ? kill_pid_info+0x30/0x3a [ 286.560580] [] ? sys_kill+0x75/0x13a [ 286.560580] [] ? mntput_no_expire+ox1f/0x101 [ 286.560580] [] ? dput+0x1e/0x105 [ 286.560580] [] ? __fput+0x150/0x158 [ 286.560580] [] ? audit_syscall_entry+0x137/0x159 [ 286.560580] [] ? sysenter_do_call+0x12/0x34 Reported-and-tested-by: "Justin P. Mattock" Reviewed-by: Pekka Enberg Signed-off-by: Heiko Carstens Tested-by: Ingo Molnar Signed-off-by: Linus Torvalds --- kernel/cpu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index 30e74dd6d01b..79e40f00dcb8 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -379,8 +379,11 @@ static cpumask_var_t frozen_cpus; int disable_nonboot_cpus(void) { - int cpu, first_cpu, error = 0; + int cpu, first_cpu, error; + error = stop_machine_create(); + if (error) + return error; cpu_maps_update_begin(); first_cpu = cpumask_first(cpu_online_mask); /* We take down all of the non-boot CPUs in one shot to avoid races @@ -409,6 +412,7 @@ int disable_nonboot_cpus(void) printk(KERN_ERR "Non-boot CPUs are not disabled\n"); } cpu_maps_update_done(); + stop_machine_destroy(); return error; } -- cgit v1.2.3