Merge branch 'smp/hotplug'

author: Ingo Molnar <mingo@kernel.org> 2017-05-07 19:59:58 +0200
committer: Ingo Molnar <mingo@kernel.org> 2017-05-07 19:59:58 +0200
commit: 879aa8b5f43c33d4d4dfa76786f33a1fe6609e07 (patch)
tree: 4980587038679120831f3a9cb111809879092147 /kernel
parent: e829b72758b4c37e207844d9931ee0159bc5d202 (diff)
parent: 74b3980ba87e6bdb78694600e234f91fef592dbd (diff)
8 files changed, 207 insertions, 166 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 9ae6fbe5b5cf..983163ef36ee 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -27,6 +27,7 @@
 #include <linux/smpboot.h>
 #include <linux/relay.h>
 #include <linux/slab.h>
+#include <linux/percpu-rwsem.h>
 
 #include <trace/events/power.h>
 #define CREATE_TRACE_POINTS
@@ -196,121 +197,41 @@ void cpu_maps_update_done(void)
 	mutex_unlock(&cpu_add_remove_lock);
 }
 
-/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
+/*
+ * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
  * Should always be manipulated under cpu_add_remove_lock
  */
 static int cpu_hotplug_disabled;
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-static struct {
-	struct task_struct *active_writer;
-	/* wait queue to wake up the active_writer */
-	wait_queue_head_t wq;
-	/* verifies that no writer will get active while readers are active */
-	struct mutex lock;
-	/*
-	 * Also blocks the new readers during
-	 * an ongoing cpu hotplug operation.
-	 */
-	atomic_t refcount;
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map dep_map;
-#endif
-} cpu_hotplug = {
-	.active_writer = NULL,
-	.wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
-	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	.dep_map = STATIC_LOCKDEP_MAP_INIT("cpu_hotplug.dep_map", &cpu_hotplug.dep_map),
-#endif
-};
-
-/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
-#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
-#define cpuhp_lock_acquire_tryread() \
-				  lock_map_acquire_tryread(&cpu_hotplug.dep_map)
-#define cpuhp_lock_acquire()      lock_map_acquire(&cpu_hotplug.dep_map)
-#define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)
-
+DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
 
 void get_online_cpus(void)
 {
-	might_sleep();
-	if (cpu_hotplug.active_writer == current)
-		return;
-	cpuhp_lock_acquire_read();
-	mutex_lock(&cpu_hotplug.lock);
-	atomic_inc(&cpu_hotplug.refcount);
-	mutex_unlock(&cpu_hotplug.lock);
+	percpu_down_read(&cpu_hotplug_lock);
 }
 EXPORT_SYMBOL_GPL(get_online_cpus);
 
 void put_online_cpus(void)
 {
-	int refcount;
-
-	if (cpu_hotplug.active_writer == current)
-		return;
-
-	refcount = atomic_dec_return(&cpu_hotplug.refcount);
-	if (WARN_ON(refcount < 0)) /* try to fix things up */
-		atomic_inc(&cpu_hotplug.refcount);
-
-	if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
-		wake_up(&cpu_hotplug.wq);
-
-	cpuhp_lock_release();
-
+	percpu_up_read(&cpu_hotplug_lock);
 }
 EXPORT_SYMBOL_GPL(put_online_cpus);
 
-/*
- * This ensures that the hotplug operation can begin only when the
- * refcount goes to zero.
- *
- * Note that during a cpu-hotplug operation, the new readers, if any,
- * will be blocked by the cpu_hotplug.lock
- *
- * Since cpu_hotplug_begin() is always called after invoking
- * cpu_maps_update_begin(), we can be sure that only one writer is active.
- *
- * Note that theoretically, there is a possibility of a livelock:
- * - Refcount goes to zero, last reader wakes up the sleeping
- *   writer.
- * - Last reader unlocks the cpu_hotplug.lock.
- * - A new reader arrives at this moment, bumps up the refcount.
- * - The writer acquires the cpu_hotplug.lock finds the refcount
- *   non zero and goes to sleep again.
- *
- * However, this is very difficult to achieve in practice since
- * get_online_cpus() not an api which is called all that often.
- *
- */
 void cpu_hotplug_begin(void)
 {
-	DEFINE_WAIT(wait);
-
-	cpu_hotplug.active_writer = current;
-	cpuhp_lock_acquire();
-
-	for (;;) {
-		mutex_lock(&cpu_hotplug.lock);
-		prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
-		if (likely(!atomic_read(&cpu_hotplug.refcount)))
-				break;
-		mutex_unlock(&cpu_hotplug.lock);
-		schedule();
-	}
-	finish_wait(&cpu_hotplug.wq, &wait);
+	percpu_down_write(&cpu_hotplug_lock);
 }
 
 void cpu_hotplug_done(void)
 {
-	cpu_hotplug.active_writer = NULL;
-	mutex_unlock(&cpu_hotplug.lock);
-	cpuhp_lock_release();
+	percpu_up_write(&cpu_hotplug_lock);
+}
+
+void lockdep_assert_hotplug_held(void)
+{
+	percpu_rwsem_assert_held(&cpu_hotplug_lock);
 }
 
 /*
@@ -344,8 +265,6 @@ void cpu_hotplug_enable(void)
 EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
 #endif	/* CONFIG_HOTPLUG_CPU */
 
-/* Notifier wrappers for transitioning to state machine */
-
 static int bringup_wait_for_ap(unsigned int cpu)
 {
 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
@@ -701,7 +620,7 @@ static int takedown_cpu(unsigned int cpu)
 	/*
 	 * So now all preempt/rcu users must observe !cpu_active().
 	 */
-	err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu));
+	err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
 	if (err) {
 		/* CPU refused to die */
 		irq_unlock_sparse();
@@ -1413,8 +1332,9 @@ static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
 	}
 }
 
-int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
-			       bool invoke)
+int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
+					  struct hlist_node *node,
+					  bool invoke)
 {
 	struct cpuhp_step *sp;
 	int cpu;
@@ -1424,7 +1344,6 @@ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
 	if (sp->multi_instance == false)
 		return -EINVAL;
 
-	get_online_cpus();
 	mutex_lock(&cpuhp_state_mutex);
 
 	if (!invoke || !sp->startup.multi)
@@ -1453,13 +1372,23 @@ add_node:
 	hlist_add_head(node, &sp->list);
 unlock:
 	mutex_unlock(&cpuhp_state_mutex);
+	return ret;
+}
+
+int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
+			       bool invoke)
+{
+	int ret;
+
+	get_online_cpus();
+	ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
 	put_online_cpus();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
 
 /**
- * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
+ * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
  * @state:		The state to setup
  * @invoke:		If true, the startup function is invoked for cpus where
  *			cpu state >= @state
@@ -1468,25 +1397,27 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
  * @multi_instance:	State is set up for multiple instances which get
  *			added afterwards.
  *
+ * The caller needs to hold get_online_cpus() while calling this function.
  * Returns:
  *   On success:
  *      Positive state number if @state is CPUHP_AP_ONLINE_DYN
  *      0 for all other states
  *   On failure: proper (negative) error code
  */
-int __cpuhp_setup_state(enum cpuhp_state state,
-			const char *name, bool invoke,
-			int (*startup)(unsigned int cpu),
-			int (*teardown)(unsigned int cpu),
-			bool multi_instance)
+int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
+				   const char *name, bool invoke,
+				   int (*startup)(unsigned int cpu),
+				   int (*teardown)(unsigned int cpu),
+				   bool multi_instance)
 {
 	int cpu, ret = 0;
 	bool dynstate;
 
+	lockdep_assert_hotplug_held();
+
 	if (cpuhp_cb_check(state) || !name)
 		return -EINVAL;
 
-	get_online_cpus();
 	mutex_lock(&cpuhp_state_mutex);
 
 	ret = cpuhp_store_callbacks(state, name, startup, teardown,
@@ -1522,7 +1453,6 @@ int __cpuhp_setup_state(enum cpuhp_state state,
 	}
 out:
 	mutex_unlock(&cpuhp_state_mutex);
-	put_online_cpus();
 	/*
 	 * If the requested state is CPUHP_AP_ONLINE_DYN, return the
 	 * dynamically allocated state in case of success.
@@ -1531,6 +1461,22 @@ out:
 		return state;
 	return ret;
 }
+EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
+
+int __cpuhp_setup_state(enum cpuhp_state state,
+			const char *name, bool invoke,
+			int (*startup)(unsigned int cpu),
+			int (*teardown)(unsigned int cpu),
+			bool multi_instance)
+{
+	int ret;
+
+	get_online_cpus();
+	ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
+					     teardown, multi_instance);
+	put_online_cpus();
+	return ret;
+}
 EXPORT_SYMBOL(__cpuhp_setup_state);
 
 int __cpuhp_state_remove_instance(enum cpuhp_state state,
@@ -1572,22 +1518,22 @@ remove:
 EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
 
 /**
- * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
+ * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
  * @state:	The state to remove
  * @invoke:	If true, the teardown function is invoked for cpus where
  *		cpu state >= @state
  *
+ * The caller needs to hold get_online_cpus() while calling this function.
  * The teardown callback is currently not allowed to fail. Think
  * about module removal!
  */
-void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
+void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
 {
 	struct cpuhp_step *sp = cpuhp_get_step(state);
 	int cpu;
 
 	BUG_ON(cpuhp_cb_check(state));
-
-	get_online_cpus();
+	lockdep_assert_hotplug_held();
 
 	mutex_lock(&cpuhp_state_mutex);
 	if (sp->multi_instance) {
@@ -1615,6 +1561,13 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
 remove:
 	cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
 	mutex_unlock(&cpuhp_state_mutex);
+}
+EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
+
+void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
+{
+	get_online_cpus();
+	__cpuhp_remove_state_cpuslocked(state, invoke);
 	put_online_cpus();
 }
 EXPORT_SYMBOL(__cpuhp_remove_state);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6e75a5c9412d..13f5b942580b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4021,10 +4021,12 @@ static void unaccount_event(struct perf_event *event)
 
 static void perf_sched_delayed(struct work_struct *work)
 {
+	get_online_cpus();
 	mutex_lock(&perf_sched_mutex);
 	if (atomic_dec_and_test(&perf_sched_count))
-		static_branch_disable(&perf_sched_events);
+		static_branch_disable_cpuslocked(&perf_sched_events);
 	mutex_unlock(&perf_sched_mutex);
+	put_online_cpus();
 }
 
 /*
@@ -4340,7 +4342,15 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
  */
 static int perf_release(struct inode *inode, struct file *file)
 {
-	perf_event_release_kernel(file->private_data);
+	/*
+	 * The error exit path of sys_perf_event_open() might have released
+	 * the event already and cleared file->private_data.
+	 */
+	if (file->private_data) {
+		get_online_cpus();
+		perf_event_release_kernel(file->private_data);
+		put_online_cpus();
+	}
 	return 0;
 }
 
@@ -7724,7 +7734,6 @@ static int swevent_hlist_get(void)
 {
 	int err, cpu, failed_cpu;
 
-	get_online_cpus();
 	for_each_possible_cpu(cpu) {
 		err = swevent_hlist_get_cpu(cpu);
 		if (err) {
@@ -7732,8 +7741,6 @@ static int swevent_hlist_get(void)
 			goto fail;
 		}
 	}
-	put_online_cpus();
-
 	return 0;
 fail:
 	for_each_possible_cpu(cpu) {
@@ -7741,8 +7748,6 @@ fail:
 			break;
 		swevent_hlist_put_cpu(cpu);
 	}
-
-	put_online_cpus();
 	return err;
 }
 
@@ -7754,7 +7759,7 @@ static void sw_perf_event_destroy(struct perf_event *event)
 
 	WARN_ON(event->parent);
 
-	static_key_slow_dec(&perf_swevent_enabled[event_id]);
+	static_key_slow_dec_cpuslocked(&perf_swevent_enabled[event_id]);
 	swevent_hlist_put();
 }
 
@@ -7790,7 +7795,7 @@ static int perf_swevent_init(struct perf_event *event)
 		if (err)
 			return err;
 
-		static_key_slow_inc(&perf_swevent_enabled[event_id]);
+		static_key_slow_inc_cpuslocked(&perf_swevent_enabled[event_id]);
 		event->destroy = sw_perf_event_destroy;
 	}
 
@@ -9299,7 +9304,7 @@ static void account_event(struct perf_event *event)
 
 		mutex_lock(&perf_sched_mutex);
 		if (!atomic_read(&perf_sched_count)) {
-			static_branch_enable(&perf_sched_events);
+			static_key_slow_inc_cpuslocked(&perf_sched_events.key);
 			/*
 			 * Guarantee that all CPUs observe they key change and
 			 * call the perf scheduling hooks before proceeding to
@@ -9882,12 +9887,10 @@ SYSCALL_DEFINE5(perf_event_open,
 		goto err_task;
 	}
 
-	get_online_cpus();
-
 	if (task) {
 		err = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
 		if (err)
-			goto err_cpus;
+			goto err_task;
 
 		/*
 		 * Reuse ptrace permission checks for now.
@@ -9905,11 +9908,13 @@ SYSCALL_DEFINE5(perf_event_open,
 	if (flags & PERF_FLAG_PID_CGROUP)
 		cgroup_fd = pid;
 
+	get_online_cpus();
+
 	event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
 				 NULL, NULL, cgroup_fd);
 	if (IS_ERR(event)) {
 		err = PTR_ERR(event);
-		goto err_cred;
+		goto err_cpus;
 	}
 
 	if (is_sampling_event(event)) {
@@ -10157,13 +10162,13 @@ SYSCALL_DEFINE5(perf_event_open,
 		perf_event_ctx_unlock(group_leader, gctx);
 	mutex_unlock(&ctx->mutex);
 
+	put_online_cpus();
+
 	if (task) {
 		mutex_unlock(&task->signal->cred_guard_mutex);
 		put_task_struct(task);
 	}
 
-	put_online_cpus();
-
 	mutex_lock(&current->perf_event_mutex);
 	list_add_tail(&event->owner_entry, &current->perf_event_list);
 	mutex_unlock(&current->perf_event_mutex);
@@ -10183,6 +10188,12 @@ err_locked:
 		perf_event_ctx_unlock(group_leader, gctx);
 	mutex_unlock(&ctx->mutex);
 /* err_file: */
+	/*
+	 * Release the event manually to avoid hotplug lock recursion in
+	 * perf_release().
+	 */
+	event_file->private_data = NULL;
+	perf_event_release_kernel(event);
 	fput(event_file);
 err_context:
 	perf_unpin_context(ctx);
@@ -10194,11 +10205,11 @@ err_alloc:
 	 */
 	if (!event_file)
 		free_event(event);
+err_cpus:
+	put_online_cpus();
 err_cred:
 	if (task)
 		mutex_unlock(&task->signal->cred_guard_mutex);
-err_cpus:
-	put_online_cpus();
 err_task:
 	if (task)
 		put_task_struct(task);
@@ -10226,10 +10237,10 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 	struct perf_event *event;
 	int err;
 
+	get_online_cpus();
 	/*
 	 * Get the target context (task or percpu):
 	 */
-
 	event = perf_event_alloc(attr, cpu, task, NULL, NULL,
 				 overflow_handler, context, -1);
 	if (IS_ERR(event)) {
@@ -10261,7 +10272,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 	perf_install_in_context(ctx, event, cpu);
 	perf_unpin_context(ctx);
 	mutex_unlock(&ctx->mutex);
-
+	put_online_cpus();
 	return event;
 
 err_unlock:
@@ -10271,6 +10282,7 @@ err_unlock:
 err_free:
 	free_event(event);
 err:
+	put_online_cpus();
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
@@ -10504,8 +10516,10 @@ void perf_event_exit_task(struct task_struct *child)
 	}
 	mutex_unlock(&child->perf_event_mutex);
 
+	get_online_cpus();
 	for_each_task_context_nr(ctxn)
 		perf_event_exit_task_context(child, ctxn);
+	put_online_cpus();
 
 	/*
 	 * The perf_event_exit_task_context calls perf_event_task
@@ -10550,6 +10564,7 @@ void perf_event_free_task(struct task_struct *task)
 	struct perf_event *event, *tmp;
 	int ctxn;
 
+	get_online_cpus();
 	for_each_task_context_nr(ctxn) {
 		ctx = task->perf_event_ctxp[ctxn];
 		if (!ctx)
@@ -10574,6 +10589,7 @@ void perf_event_free_task(struct task_struct *task)
 		mutex_unlock(&ctx->mutex);
 		put_ctx(ctx);
 	}
+	put_online_cpus();
 }
 
 void perf_event_delayed_put(struct task_struct *task)
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 6c9cb208ac48..6343f4c7e27f 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -15,6 +15,7 @@
 #include <linux/static_key.h>
 #include <linux/jump_label_ratelimit.h>
 #include <linux/bug.h>
+#include <linux/cpu.h>
 
 #ifdef HAVE_JUMP_LABEL
 
@@ -89,6 +90,16 @@ void static_key_enable(struct static_key *key)
 }
 EXPORT_SYMBOL_GPL(static_key_enable);
 
+void static_key_enable_cpuslocked(struct static_key *key)
+{
+	int count = static_key_count(key);
+
+	WARN_ON_ONCE(count < 0 || count > 1);
+
+	if (!count)
+		static_key_slow_inc_cpuslocked(key);
+}
+
 void static_key_disable(struct static_key *key)
 {
 	int count = static_key_count(key);
@@ -100,7 +111,17 @@ void static_key_disable(struct static_key *key)
 }
 EXPORT_SYMBOL_GPL(static_key_disable);
 
-void static_key_slow_inc(struct static_key *key)
+void static_key_disable_cpuslocked(struct static_key *key)
+{
+	int count = static_key_count(key);
+
+	WARN_ON_ONCE(count < 0 || count > 1);
+
+	if (count)
+		static_key_slow_dec_cpuslocked(key);
+}
+
+void __static_key_slow_inc(struct static_key *key)
 {
 	int v, v1;
 
@@ -124,6 +145,12 @@ void static_key_slow_inc(struct static_key *key)
 			return;
 	}
 
+	/*
+	 * A number of architectures need to synchronize I$ across
+	 * the all CPUs, for that to be serialized against CPU hot-plug
+	 * we need to avoid CPUs coming online.
+	 */
+	lockdep_assert_hotplug_held();
 	jump_label_lock();
 	if (atomic_read(&key->enabled) == 0) {
 		atomic_set(&key->enabled, -1);
@@ -134,8 +161,21 @@ void static_key_slow_inc(struct static_key *key)
 	}
 	jump_label_unlock();
 }
+
+void static_key_slow_inc(struct static_key *key)
+{
+	get_online_cpus();
+	__static_key_slow_inc(key);
+	put_online_cpus();
+}
 EXPORT_SYMBOL_GPL(static_key_slow_inc);
 
+void static_key_slow_inc_cpuslocked(struct static_key *key)
+{
+	__static_key_slow_inc(key);
+}
+EXPORT_SYMBOL_GPL(static_key_slow_inc_cpuslocked);
+
 static void __static_key_slow_dec(struct static_key *key,
 		unsigned long rate_limit, struct delayed_work *work)
 {
@@ -152,6 +192,7 @@ static void __static_key_slow_dec(struct static_key *key,
 		return;
 	}
 
+	lockdep_assert_hotplug_held();
 	if (rate_limit) {
 		atomic_inc(&key->enabled);
 		schedule_delayed_work(work, rate_limit);
@@ -165,16 +206,26 @@ static void jump_label_update_timeout(struct work_struct *work)
 {
 	struct static_key_deferred *key =
 		container_of(work, struct static_key_deferred, work.work);
+	get_online_cpus();
 	__static_key_slow_dec(&key->key, 0, NULL);
+	put_online_cpus();
 }
 
 void static_key_slow_dec(struct static_key *key)
 {
 	STATIC_KEY_CHECK_USE();
+	get_online_cpus();
 	__static_key_slow_dec(key, 0, NULL);
+	put_online_cpus();
 }
 EXPORT_SYMBOL_GPL(static_key_slow_dec);
 
+void static_key_slow_dec_cpuslocked(struct static_key *key)
+{
+	STATIC_KEY_CHECK_USE();
+	__static_key_slow_dec(key, 0, NULL);
+}
+
 void static_key_slow_dec_deferred(struct static_key_deferred *key)
 {
 	STATIC_KEY_CHECK_USE();
@@ -592,6 +643,10 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val,
 
 	switch (val) {
 	case MODULE_STATE_COMING:
+		/*
+		 * XXX do we need get_online_cpus() ?  the module isn't
+		 * executable yet, so nothing should be looking at our code.
+		 */
 		jump_label_lock();
 		ret = jump_label_add_module(mod);
 		if (ret) {
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index d733479a10ee..c7ea9960433a 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -498,14 +498,13 @@ static void do_optimize_kprobes(void)
 	 * This combination can cause a deadlock (cpu-hotplug try to lock
 	 * text_mutex but stop_machine can not be done because online_cpus
 	 * has been changed)
-	 * To avoid this deadlock, we need to call get_online_cpus()
+	 * To avoid this deadlock, caller must have locked cpu hotplug
 	 * for preventing cpu-hotplug outside of text_mutex locking.
 	 */
-	get_online_cpus();
+	lockdep_assert_hotplug_held();
 	mutex_lock(&text_mutex);
 	arch_optimize_kprobes(&optimizing_list);
 	mutex_unlock(&text_mutex);
-	put_online_cpus();
 }
 
 /*
@@ -521,7 +520,7 @@ static void do_unoptimize_kprobes(void)
 		return;
 
 	/* Ditto to do_optimize_kprobes */
-	get_online_cpus();
+	lockdep_assert_hotplug_held();
 	mutex_lock(&text_mutex);
 	arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
 	/* Loop free_list for disarming */
@@ -540,7 +539,6 @@ static void do_unoptimize_kprobes(void)
 			list_del_init(&op->list);
 	}
 	mutex_unlock(&text_mutex);
-	put_online_cpus();
 }
 
 /* Reclaim all kprobes on the free_list */
@@ -564,6 +562,7 @@ static void kick_kprobe_optimizer(void)
 /* Kprobe jump optimizer */
 static void kprobe_optimizer(struct work_struct *work)
 {
+	get_online_cpus();
 	mutex_lock(&kprobe_mutex);
 	/* Lock modules while optimizing kprobes */
 	mutex_lock(&module_mutex);
@@ -595,6 +594,7 @@ static void kprobe_optimizer(struct work_struct *work)
 	/* Step 5: Kick optimizer again if needed */
 	if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
 		kick_kprobe_optimizer();
+	put_online_cpus();
 }
 
 /* Wait for completing optimization and unoptimization */
@@ -653,9 +653,7 @@ static void optimize_kprobe(struct kprobe *p)
 /* Short cut to direct unoptimizing */
 static void force_unoptimize_kprobe(struct optimized_kprobe *op)
 {
-	get_online_cpus();
 	arch_unoptimize_kprobe(op);
-	put_online_cpus();
 	if (kprobe_disabled(&op->kp))
 		arch_disarm_kprobe(&op->kp);
 }
@@ -817,6 +815,7 @@ static void optimize_all_kprobes(void)
 	struct kprobe *p;
 	unsigned int i;
 
+	get_online_cpus();
 	mutex_lock(&kprobe_mutex);
 	/* If optimization is already allowed, just return */
 	if (kprobes_allow_optimization)
@@ -832,6 +831,7 @@ static void optimize_all_kprobes(void)
 	printk(KERN_INFO "Kprobes globally optimized\n");
 out:
 	mutex_unlock(&kprobe_mutex);
+	put_online_cpus();
 }
 
 static void unoptimize_all_kprobes(void)
@@ -840,6 +840,7 @@ static void unoptimize_all_kprobes(void)
 	struct kprobe *p;
 	unsigned int i;
 
+	get_online_cpus();
 	mutex_lock(&kprobe_mutex);
 	/* If optimization is already prohibited, just return */
 	if (!kprobes_allow_optimization) {
@@ -860,6 +861,7 @@ static void unoptimize_all_kprobes(void)
 	/* Wait for unoptimizing completion */
 	wait_for_kprobe_optimizer();
 	printk(KERN_INFO "Kprobes globally unoptimized\n");
+	put_online_cpus();
 }
 
 static DEFINE_MUTEX(kprobe_sysctl_mutex);
@@ -1297,10 +1299,11 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
 	/* For preparing optimization, jump_label_text_reserved() is called */
 	jump_label_lock();
 	/*
-	 * Get online CPUs to avoid text_mutex deadlock.with stop machine,
-	 * which is invoked by unoptimize_kprobe() in add_new_kprobe()
+	 * Caller must have locked CPUs to avoid text_mutex deadlock.with stop
+	 * machine, which is invoked by unoptimize_kprobe() in
+	 * add_new_kprobe()
 	 */
-	get_online_cpus();
+	lockdep_assert_hotplug_held();
 	mutex_lock(&text_mutex);
 
 	if (!kprobe_aggrprobe(orig_p)) {
@@ -1542,6 +1545,7 @@ int register_kprobe(struct kprobe *p)
 	if (ret)
 		return ret;
 
+	get_online_cpus();
 	mutex_lock(&kprobe_mutex);
 
 	old_p = get_kprobe(p->addr);
@@ -1569,6 +1573,7 @@ int register_kprobe(struct kprobe *p)
 
 out:
 	mutex_unlock(&kprobe_mutex);
+	put_online_cpus();
 
 	if (probed_mod)
 		module_put(probed_mod);
diff --git a/kernel/padata.c b/kernel/padata.c
index 3202aa17492c..2d5a70f1aaab 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -939,29 +939,18 @@ static struct kobj_type padata_attr_type = {
 };
 
 /**
- * padata_alloc_possible - Allocate and initialize padata instance.
- *                         Use the cpu_possible_mask for serial and
- *                         parallel workers.
- *
- * @wq: workqueue to use for the allocated padata instance
- */
-struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
-{
-	return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
-}
-EXPORT_SYMBOL(padata_alloc_possible);
-
-/**
  * padata_alloc - allocate and initialize a padata instance and specify
  *                cpumasks for serial and parallel workers.
  *
  * @wq: workqueue to use for the allocated padata instance
  * @pcpumask: cpumask that will be used for padata parallelization
  * @cbcpumask: cpumask that will be used for padata serialization
+ *
+ * Must be called from a get_online_cpus() protected region
  */
-struct padata_instance *padata_alloc(struct workqueue_struct *wq,
-				     const struct cpumask *pcpumask,
-				     const struct cpumask *cbcpumask)
+static struct padata_instance *padata_alloc(struct workqueue_struct *wq,
+					    const struct cpumask *pcpumask,
+					    const struct cpumask *cbcpumask)
 {
 	struct padata_instance *pinst;
 	struct parallel_data *pd = NULL;
@@ -970,7 +959,6 @@ struct padata_instance *padata_alloc(struct workqueue_struct *wq,
 	if (!pinst)
 		goto err;
 
-	get_online_cpus();
 	if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
 		goto err_free_inst;
 	if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) {
@@ -994,14 +982,12 @@ struct padata_instance *padata_alloc(struct workqueue_struct *wq,
 
 	pinst->flags = 0;
 
-	put_online_cpus();
-
 	BLOCKING_INIT_NOTIFIER_HEAD(&pinst->cpumask_change_notifier);
 	kobject_init(&pinst->kobj, &padata_attr_type);
 	mutex_init(&pinst->lock);
 
 #ifdef CONFIG_HOTPLUG_CPU
-	cpuhp_state_add_instance_nocalls(hp_online, &pinst->node);
+	cpuhp_state_add_instance_nocalls_cpuslocked(hp_online, &pinst->node);
 #endif
 	return pinst;
 
@@ -1010,12 +996,27 @@ err_free_masks:
 	free_cpumask_var(pinst->cpumask.cbcpu);
 err_free_inst:
 	kfree(pinst);
-	put_online_cpus();
 err:
 	return NULL;
 }
 
 /**
+ * padata_alloc_possible - Allocate and initialize padata instance.
+ *                         Use the cpu_possible_mask for serial and
+ *                         parallel workers.
+ *
+ * @wq: workqueue to use for the allocated padata instance
+ *
+ * Must be called from a get_online_cpus() protected region
+ */
+struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
+{
+	lockdep_assert_hotplug_held();
+	return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
+}
+EXPORT_SYMBOL(padata_alloc_possible);
+
+/**
  * padata_free - free a padata instance
  *
  * @padata_inst: padata instance to free
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 1eb82661ecdb..2390340c4a2d 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -552,7 +552,8 @@ static int __init cpu_stop_init(void)
 }
 early_initcall(cpu_stop_init);
 
-static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
+int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
+			    const struct cpumask *cpus)
 {
 	struct multi_stop_data msdata = {
 		.fn = fn,
@@ -561,6 +562,8 @@ static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cp
 		.active_cpus = cpus,
 	};
 
+	lockdep_assert_hotplug_held();
+
 	if (!stop_machine_initialized) {
 		/*
 		 * Handle the case where stop_machine() is called
@@ -591,7 +594,7 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
 
 	/* No CPUs can come up or down during this. */
 	get_online_cpus();
-	ret = __stop_machine(fn, data, cpus);
+	ret = stop_machine_cpuslocked(fn, data, cpus);
 	put_online_cpus();
 	return ret;
 }
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 93116549a284..4330b1da1a39 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -326,6 +326,7 @@ void trace_event_enable_cmd_record(bool enable)
 	struct trace_event_file *file;
 	struct trace_array *tr;
 
+	get_online_cpus();
 	mutex_lock(&event_mutex);
 	do_for_each_event_file(tr, file) {
 
@@ -341,6 +342,7 @@ void trace_event_enable_cmd_record(bool enable)
 		}
 	} while_for_each_event_file();
 	mutex_unlock(&event_mutex);
+	put_online_cpus();
 }
 
 static int __ftrace_event_enable_disable(struct trace_event_file *file,
@@ -747,9 +749,11 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
 {
 	int ret;
 
+	get_online_cpus();
 	mutex_lock(&event_mutex);
 	ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set);
 	mutex_unlock(&event_mutex);
+	put_online_cpus();
 
 	return ret;
 }
@@ -1039,11 +1043,13 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
 	case 0:
 	case 1:
 		ret = -ENODEV;
+		get_online_cpus();
 		mutex_lock(&event_mutex);
 		file = event_file_data(filp);
 		if (likely(file))
 			ret = ftrace_event_enable_disable(file, val);
 		mutex_unlock(&event_mutex);
+		put_online_cpus();
 		break;
 
 	default:
@@ -2902,6 +2908,7 @@ early_event_add_tracer(struct dentry *parent, struct trace_array *tr)
 
 int event_trace_del_tracer(struct trace_array *tr)
 {
+	get_online_cpus();
 	mutex_lock(&event_mutex);
 
 	/* Disable any event triggers and associated soft-disabled events */
@@ -2924,6 +2931,7 @@ int event_trace_del_tracer(struct trace_array *tr)
 	tr->event_dir = NULL;
 
 	mutex_unlock(&event_mutex);
+	put_online_cpus();
 
 	return 0;
 }
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 685c50ae6300..10ce9103712b 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -220,7 +220,7 @@ static int tracepoint_add_func(struct tracepoint *tp,
 	 */
 	rcu_assign_pointer(tp->funcs, tp_funcs);
 	if (!static_key_enabled(&tp->key))
-		static_key_slow_inc(&tp->key);
+		static_key_slow_inc_cpuslocked(&tp->key);
 	release_probes(old);
 	return 0;
 }
@@ -250,7 +250,7 @@ static int tracepoint_remove_func(struct tracepoint *tp,
 			tp->unregfunc();
 
 		if (static_key_enabled(&tp->key))
-			static_key_slow_dec(&tp->key);
+			static_key_slow_dec_cpuslocked(&tp->key);
 	}
 	rcu_assign_pointer(tp->funcs, tp_funcs);
 	release_probes(old);
author	Ingo Molnar <mingo@kernel.org>	2017-05-07 19:59:58 +0200
committer	Ingo Molnar <mingo@kernel.org>	2017-05-07 19:59:58 +0200
commit	879aa8b5f43c33d4d4dfa76786f33a1fe6609e07 (patch)
tree	4980587038679120831f3a9cb111809879092147 /kernel
parent	e829b72758b4c37e207844d9931ee0159bc5d202 (diff)
parent	74b3980ba87e6bdb78694600e234f91fef592dbd (diff)