summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2009-12-01 17:42:30 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2009-12-01 17:42:30 +1100
commite43ffc4278c8dc4d5d30f4a621c4e47d88ba8fb7 (patch)
tree57a2199320c1841a74d68235706f238bed4496ec
parent1be2457c3b29d52141535b8524db52577a261270 (diff)
parent9fe828124d21c282fe5eeb4096ebf351a436a18f (diff)
Merge remote branch 'workqueues/for-next'
Conflicts: init/Kconfig kernel/sched.c
-rw-r--r--arch/ia64/kvm/Kconfig1
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/s390/kvm/Kconfig1
-rw-r--r--arch/x86/kernel/smpboot.c4
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--drivers/acpi/osl.c41
-rw-r--r--include/linux/kvm_host.h4
-rw-r--r--include/linux/preempt.h43
-rw-r--r--include/linux/sched.h59
-rw-r--r--include/linux/stop_machine.h6
-rw-r--r--include/linux/workqueue.h96
-rw-r--r--init/Kconfig3
-rw-r--r--init/main.c2
-rw-r--r--kernel/sched.c334
-rw-r--r--kernel/stop_machine.c151
-rw-r--r--kernel/trace/Kconfig4
-rw-r--r--kernel/workqueue.c412
-rw-r--r--lib/Kconfig.debug8
-rw-r--r--virt/kvm/kvm_main.c28
19 files changed, 795 insertions, 404 deletions
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index ef3e7be29caf..a9e2b9c60d65 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -22,7 +22,6 @@ config KVM
depends on HAVE_KVM && MODULES && EXPERIMENTAL
# for device assignment:
depends on PCI
- select PREEMPT_NOTIFIERS
select ANON_INODES
select HAVE_KVM_IRQCHIP
select KVM_APIC_ARCHITECTURE
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 07703f72330e..38818c08a33c 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -18,7 +18,6 @@ if VIRTUALIZATION
config KVM
bool
- select PREEMPT_NOTIFIERS
select ANON_INODES
config KVM_BOOK3S_64_HANDLER
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 6ee55ae84ce2..f9b46b07a0a1 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -18,7 +18,6 @@ if VIRTUALIZATION
config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
depends on HAVE_KVM && EXPERIMENTAL
- select PREEMPT_NOTIFIERS
select ANON_INODES
---help---
Support hosting paravirtualized guest machines using the SIE
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 565ebc65920e..ba43dfed353d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -687,7 +687,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
};
- INIT_WORK(&c_idle.work, do_fork_idle);
+ INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle);
alternatives_smp_switch(1);
@@ -713,6 +713,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
if (IS_ERR(c_idle.idle)) {
printk("failed fork for CPU %d\n", cpu);
+ destroy_work_on_stack(&c_idle.work);
return PTR_ERR(c_idle.idle);
}
@@ -831,6 +832,7 @@ do_rest:
smpboot_restore_warm_reset_vector();
}
+ destroy_work_on_stack(&c_idle.work);
return boot_error;
}
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 3c4d0109ad20..f5a136a65638 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -22,7 +22,6 @@ config KVM
depends on HAVE_KVM
# for device assignment:
depends on PCI
- select PREEMPT_NOTIFIERS
select MMU_NOTIFIER
select ANON_INODES
select HAVE_KVM_IRQCHIP
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 02e8464e480f..93f664733d3f 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -191,36 +191,11 @@ acpi_status __init acpi_os_initialize(void)
return AE_OK;
}
-static void bind_to_cpu0(struct work_struct *work)
-{
- set_cpus_allowed_ptr(current, cpumask_of(0));
- kfree(work);
-}
-
-static void bind_workqueue(struct workqueue_struct *wq)
-{
- struct work_struct *work;
-
- work = kzalloc(sizeof(struct work_struct), GFP_KERNEL);
- INIT_WORK(work, bind_to_cpu0);
- queue_work(wq, work);
-}
-
acpi_status acpi_os_initialize1(void)
{
- /*
- * On some machines, a software-initiated SMI causes corruption unless
- * the SMI runs on CPU 0. An SMI can be initiated by any AML, but
- * typically it's done in GPE-related methods that are run via
- * workqueues, so we can avoid the known corruption cases by binding
- * the workqueues to CPU 0.
- */
- kacpid_wq = create_singlethread_workqueue("kacpid");
- bind_workqueue(kacpid_wq);
- kacpi_notify_wq = create_singlethread_workqueue("kacpi_notify");
- bind_workqueue(kacpi_notify_wq);
- kacpi_hotplug_wq = create_singlethread_workqueue("kacpi_hotplug");
- bind_workqueue(kacpi_hotplug_wq);
+ kacpid_wq = create_workqueue("kacpid");
+ kacpi_notify_wq = create_workqueue("kacpi_notify");
+ kacpi_hotplug_wq = create_workqueue("kacpi_hotplug");
BUG_ON(!kacpid_wq);
BUG_ON(!kacpi_notify_wq);
BUG_ON(!kacpi_hotplug_wq);
@@ -759,7 +734,15 @@ static acpi_status __acpi_os_execute(acpi_execute_type type,
(type == OSL_NOTIFY_HANDLER ? kacpi_notify_wq : kacpid_wq);
dpc->wait = hp ? 1 : 0;
INIT_WORK(&dpc->work, acpi_os_execute_deferred);
- ret = queue_work(queue, &dpc->work);
+
+ /*
+ * On some machines, a software-initiated SMI causes corruption unless
+ * the SMI runs on CPU 0. An SMI can be initiated by any AML, but
+ * typically it's done in GPE-related methods that are run via
+ * workqueues, so we can avoid the known corruption cases by always
+ * queueing on CPU 0.
+ */
+ ret = queue_work_on(0, queue, &dpc->work);
if (!ret) {
printk(KERN_ERR PREFIX
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bd5a616d9373..45b631e9dc3b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -74,9 +74,7 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, struct kvm_io_bus *bus,
struct kvm_vcpu {
struct kvm *kvm;
-#ifdef CONFIG_PREEMPT_NOTIFIERS
- struct preempt_notifier preempt_notifier;
-#endif
+ struct sched_notifier sched_notifier;
int vcpu_id;
struct mutex mutex;
int cpu;
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 72b1a10a59b6..538c675b979d 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -93,47 +93,4 @@ do { \
#endif
-#ifdef CONFIG_PREEMPT_NOTIFIERS
-
-struct preempt_notifier;
-
-/**
- * preempt_ops - notifiers called when a task is preempted and rescheduled
- * @sched_in: we're about to be rescheduled:
- * notifier: struct preempt_notifier for the task being scheduled
- * cpu: cpu we're scheduled on
- * @sched_out: we've just been preempted
- * notifier: struct preempt_notifier for the task being preempted
- * next: the task that's kicking us out
- */
-struct preempt_ops {
- void (*sched_in)(struct preempt_notifier *notifier, int cpu);
- void (*sched_out)(struct preempt_notifier *notifier,
- struct task_struct *next);
-};
-
-/**
- * preempt_notifier - key for installing preemption notifiers
- * @link: internal use
- * @ops: defines the notifier functions to be called
- *
- * Usually used in conjunction with container_of().
- */
-struct preempt_notifier {
- struct hlist_node link;
- struct preempt_ops *ops;
-};
-
-void preempt_notifier_register(struct preempt_notifier *notifier);
-void preempt_notifier_unregister(struct preempt_notifier *notifier);
-
-static inline void preempt_notifier_init(struct preempt_notifier *notifier,
- struct preempt_ops *ops)
-{
- INIT_HLIST_NODE(&notifier->link);
- notifier->ops = ops;
-}
-
-#endif
-
#endif /* __LINUX_PREEMPT_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1cddd4230868..e3c45ab0ad04 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1214,6 +1214,50 @@ struct sched_rt_entity {
#endif
};
+struct sched_notifier;
+
+/**
+ * sched_notifier_ops - notifiers called for scheduling events
+ * @wakeup: we're waking up
+ * notifier: struct sched_notifier for the task being woken up
+ * @sleep: we're going to bed
+ * notifier: struct sched_notifier for the task sleeping
+ * @in: we're now running on the cpu
+ * notifier: struct sched_notifier for the task being scheduled in
+ * prev: the task which ran before us
+ * @out: we're leaving the cpu
+ * notifier: struct sched_notifier for the task being scheduled out
+ * next: the task which will run after us
+ */
+struct sched_notifier_ops {
+ void (*wakeup)(struct sched_notifier *notifier);
+ void (*sleep)(struct sched_notifier *notifier);
+ void (*in)(struct sched_notifier *notifier, struct task_struct *prev);
+ void (*out)(struct sched_notifier *notifier, struct task_struct *next);
+};
+
+/**
+ * sched_notifier - key for installing scheduler notifiers
+ * @link: internal use
+ * @ops: defines the notifier functions to be called
+ *
+ * Usually used in conjunction with container_of().
+ */
+struct sched_notifier {
+ struct hlist_node link;
+ struct sched_notifier_ops *ops;
+};
+
+void sched_notifier_register(struct sched_notifier *notifier);
+void sched_notifier_unregister(struct sched_notifier *notifier);
+
+static inline void sched_notifier_init(struct sched_notifier *notifier,
+ struct sched_notifier_ops *ops)
+{
+ INIT_HLIST_NODE(&notifier->link);
+ notifier->ops = ops;
+}
+
struct rcu_node;
struct task_struct {
@@ -1237,10 +1281,8 @@ struct task_struct {
struct sched_entity se;
struct sched_rt_entity rt;
-#ifdef CONFIG_PREEMPT_NOTIFIERS
- /* list of struct preempt_notifier: */
- struct hlist_head preempt_notifiers;
-#endif
+ /* list of struct sched_notifier: */
+ struct hlist_head sched_notifiers;
/*
* fpu_counter contains the number of consecutive context switches
@@ -1811,6 +1853,8 @@ static inline void rcu_copy_process(struct task_struct *p)
#ifdef CONFIG_SMP
extern int set_cpus_allowed_ptr(struct task_struct *p,
const struct cpumask *new_mask);
+extern int force_cpus_allowed(struct task_struct *p,
+ const struct cpumask *new_mask);
#else
static inline int set_cpus_allowed_ptr(struct task_struct *p,
const struct cpumask *new_mask)
@@ -1819,6 +1863,11 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
return -EINVAL;
return 0;
}
+static inline int force_cpus_allowed(struct task_struct *p,
+ const struct cpumask *new_mask)
+{
+ return set_cpus_allowed_ptr(p, new_mask);
+}
#endif
#ifndef CONFIG_CPUMASK_OFFSTACK
@@ -2017,6 +2066,8 @@ extern void release_uids(struct user_namespace *ns);
extern void do_timer(unsigned long ticks);
+extern bool try_to_wake_up_local(struct task_struct *p, unsigned int state,
+ int wake_flags);
extern int wake_up_state(struct task_struct *tsk, unsigned int state);
extern int wake_up_process(struct task_struct *tsk);
extern void wake_up_new_task(struct task_struct *tsk,
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index baba3a23a814..2d32e061bdc4 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -53,6 +53,11 @@ int stop_machine_create(void);
*/
void stop_machine_destroy(void);
+/**
+ * init_stop_machine: initialize stop_machine during boot
+ */
+void init_stop_machine(void);
+
#else
static inline int stop_machine(int (*fn)(void *), void *data,
@@ -67,6 +72,7 @@ static inline int stop_machine(int (*fn)(void *), void *data,
static inline int stop_machine_create(void) { return 0; }
static inline void stop_machine_destroy(void) { }
+static inline void init_stop_machine(void) { }
#endif /* CONFIG_SMP */
#endif /* _LINUX_STOP_MACHINE */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index cf24c20de9e4..8e689d1f91d8 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -22,11 +22,26 @@ typedef void (*work_func_t)(struct work_struct *work);
*/
#define work_data_bits(work) ((unsigned long *)(&(work)->data))
+enum {
+ WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */
+ WORK_STRUCT_STATIC_BIT = 1, /* static initializer (debugobjects) */
+
+ WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT,
+ WORK_STRUCT_STATIC = 1 << WORK_STRUCT_STATIC_BIT,
+
+ /*
+ * Reserve 3bits off of cwq pointer. This is enough and
+ * provides acceptable alignment on both 32 and 64bit
+ * machines.
+ */
+ WORK_STRUCT_FLAG_BITS = 3,
+
+ WORK_STRUCT_FLAG_MASK = (1UL << WORK_STRUCT_FLAG_BITS) - 1,
+ WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
+};
+
struct work_struct {
atomic_long_t data;
-#define WORK_STRUCT_PENDING 0 /* T if work item pending execution */
-#define WORK_STRUCT_FLAG_MASK (3UL)
-#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
struct list_head entry;
work_func_t func;
#ifdef CONFIG_LOCKDEP
@@ -35,6 +50,7 @@ struct work_struct {
};
#define WORK_DATA_INIT() ATOMIC_LONG_INIT(0)
+#define WORK_DATA_STATIC_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_STATIC)
struct delayed_work {
struct work_struct work;
@@ -63,7 +79,7 @@ struct execute_work {
#endif
#define __WORK_INITIALIZER(n, f) { \
- .data = WORK_DATA_INIT(), \
+ .data = WORK_DATA_STATIC_INIT(), \
.entry = { &(n).entry, &(n).entry }, \
.func = (f), \
__WORK_INIT_LOCKDEP_MAP(#n, &(n)) \
@@ -91,6 +107,19 @@ struct execute_work {
#define PREPARE_DELAYED_WORK(_work, _func) \
PREPARE_WORK(&(_work)->work, (_func))
+#ifdef CONFIG_DEBUG_OBJECTS_WORK
+extern void __init_work(struct work_struct *work, int onstack);
+extern void destroy_work_on_stack(struct work_struct *work);
+static inline bool work_static(struct work_struct *work)
+{
+ return test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work));
+}
+#else
+static inline void __init_work(struct work_struct *work, int onstack) { }
+static inline void destroy_work_on_stack(struct work_struct *work) { }
+static inline bool work_static(struct work_struct *work) { return false; }
+#endif
+
/*
* initialize all of a work item in one go
*
@@ -99,24 +128,36 @@ struct execute_work {
* to generate better code.
*/
#ifdef CONFIG_LOCKDEP
-#define INIT_WORK(_work, _func) \
+#define __INIT_WORK(_work, _func, _onstack) \
do { \
static struct lock_class_key __key; \
\
+ __init_work((_work), _onstack); \
(_work)->data = (atomic_long_t) WORK_DATA_INIT(); \
lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0);\
INIT_LIST_HEAD(&(_work)->entry); \
PREPARE_WORK((_work), (_func)); \
} while (0)
#else
-#define INIT_WORK(_work, _func) \
+#define __INIT_WORK(_work, _func, _onstack) \
do { \
+ __init_work((_work), _onstack); \
(_work)->data = (atomic_long_t) WORK_DATA_INIT(); \
INIT_LIST_HEAD(&(_work)->entry); \
PREPARE_WORK((_work), (_func)); \
} while (0)
#endif
+#define INIT_WORK(_work, _func) \
+ do { \
+ __INIT_WORK((_work), (_func), 0); \
+ } while (0)
+
+#define INIT_WORK_ON_STACK(_work, _func) \
+ do { \
+ __INIT_WORK((_work), (_func), 1); \
+ } while (0)
+
#define INIT_DELAYED_WORK(_work, _func) \
do { \
INIT_WORK(&(_work)->work, (_func)); \
@@ -125,28 +166,22 @@ struct execute_work {
#define INIT_DELAYED_WORK_ON_STACK(_work, _func) \
do { \
- INIT_WORK(&(_work)->work, (_func)); \
+ INIT_WORK_ON_STACK(&(_work)->work, (_func)); \
init_timer_on_stack(&(_work)->timer); \
} while (0)
-#define INIT_DELAYED_WORK_DEFERRABLE(_work, _func) \
+#define INIT_DELAYED_WORK_DEFERRABLE(_work, _func) \
do { \
INIT_WORK(&(_work)->work, (_func)); \
init_timer_deferrable(&(_work)->timer); \
} while (0)
-#define INIT_DELAYED_WORK_ON_STACK(_work, _func) \
- do { \
- INIT_WORK(&(_work)->work, (_func)); \
- init_timer_on_stack(&(_work)->timer); \
- } while (0)
-
/**
* work_pending - Find out whether a work item is currently pending
* @work: The work item in question
*/
#define work_pending(work) \
- test_bit(WORK_STRUCT_PENDING, work_data_bits(work))
+ test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))
/**
* delayed_work_pending - Find out whether a delayable work item is currently
@@ -161,16 +196,19 @@ struct execute_work {
* @work: The work item in question
*/
#define work_clear_pending(work) \
- clear_bit(WORK_STRUCT_PENDING, work_data_bits(work))
+ clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))
+enum {
+ WQ_FREEZEABLE = 1 << 0, /* freeze during suspend */
+ WQ_SINGLE_THREAD = 1 << 1, /* no per-cpu worker */
+};
extern struct workqueue_struct *
-__create_workqueue_key(const char *name, int singlethread,
- int freezeable, int rt, struct lock_class_key *key,
- const char *lock_name);
+__create_workqueue_key(const char *name, unsigned int flags,
+ struct lock_class_key *key, const char *lock_name);
#ifdef CONFIG_LOCKDEP
-#define __create_workqueue(name, singlethread, freezeable, rt) \
+#define __create_workqueue(name, flags) \
({ \
static struct lock_class_key __key; \
const char *__lock_name; \
@@ -180,20 +218,20 @@ __create_workqueue_key(const char *name, int singlethread,
else \
__lock_name = #name; \
\
- __create_workqueue_key((name), (singlethread), \
- (freezeable), (rt), &__key, \
+ __create_workqueue_key((name), (flags), &__key, \
__lock_name); \
})
#else
-#define __create_workqueue(name, singlethread, freezeable, rt) \
- __create_workqueue_key((name), (singlethread), (freezeable), (rt), \
- NULL, NULL)
+#define __create_workqueue(name, flags) \
+ __create_workqueue_key((name), (flags), NULL, NULL)
#endif
-#define create_workqueue(name) __create_workqueue((name), 0, 0, 0)
-#define create_rt_workqueue(name) __create_workqueue((name), 0, 0, 1)
-#define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1, 0)
-#define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0, 0)
+#define create_workqueue(name) \
+ __create_workqueue((name), 0)
+#define create_freezeable_workqueue(name) \
+ __create_workqueue((name), WQ_FREEZEABLE | WQ_SINGLE_THREAD)
+#define create_singlethread_workqueue(name) \
+ __create_workqueue((name), WQ_SINGLE_THREAD)
extern void destroy_workqueue(struct workqueue_struct *wq);
diff --git a/init/Kconfig b/init/Kconfig
index 94a8e10d6290..1b532005aede 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1235,7 +1235,4 @@ config STOP_MACHINE
source "block/Kconfig"
-config PREEMPT_NOTIFIERS
- bool
-
source "kernel/Kconfig.locks"
diff --git a/init/main.c b/init/main.c
index 698066ff8762..43807f7dcf5f 100644
--- a/init/main.c
+++ b/init/main.c
@@ -34,6 +34,7 @@
#include <linux/security.h>
#include <linux/smp.h>
#include <linux/workqueue.h>
+#include <linux/stop_machine.h>
#include <linux/profile.h>
#include <linux/rcupdate.h>
#include <linux/moduleparam.h>
@@ -779,6 +780,7 @@ static void __init do_initcalls(void)
static void __init do_basic_setup(void)
{
init_workqueues();
+ init_stop_machine();
cpuset_init_smp();
usermodehelper_init();
init_tmpfs();
diff --git a/kernel/sched.c b/kernel/sched.c
index dba459d08916..be7592902610 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1389,6 +1389,16 @@ static const u32 prio_to_wmult[40] = {
/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
};
+#define fire_sched_notifier(p, callback, args...) do { \
+ struct task_struct *__p = (p); \
+ struct sched_notifier *__sn; \
+ struct hlist_node *__pos; \
+ \
+ hlist_for_each_entry(__sn, __pos, &__p->sched_notifiers, link) \
+ if (__sn->ops->callback) \
+ __sn->ops->callback(__sn , ##args); \
+} while (0)
+
static void activate_task(struct rq *rq, struct task_struct *p, int wakeup);
/*
@@ -2097,6 +2107,7 @@ struct migration_req {
struct task_struct *task;
int dest_cpu;
+ bool force;
struct completion done;
};
@@ -2105,8 +2116,8 @@ struct migration_req {
* The task's runqueue lock must be held.
* Returns true if you have to wait for migration thread.
*/
-static int
-migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
+static int migrate_task(struct task_struct *p, int dest_cpu,
+ struct migration_req *req, bool force)
{
struct rq *rq = task_rq(p);
@@ -2123,6 +2134,7 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
init_completion(&req->done);
req->task = p;
req->dest_cpu = dest_cpu;
+ req->force = force;
list_add(&req->list, &rq->migration_queue);
return 1;
@@ -2323,11 +2335,73 @@ void task_oncpu_function_call(struct task_struct *p,
preempt_enable();
}
-/***
+static inline void ttwu_activate(struct task_struct *p, struct rq *rq,
+ bool is_sync, bool is_migrate, bool is_local)
+{
+ schedstat_inc(p, se.nr_wakeups);
+ if (is_sync)
+ schedstat_inc(p, se.nr_wakeups_sync);
+ if (is_migrate)
+ schedstat_inc(p, se.nr_wakeups_migrate);
+ if (is_local)
+ schedstat_inc(p, se.nr_wakeups_local);
+ else
+ schedstat_inc(p, se.nr_wakeups_remote);
+
+ activate_task(rq, p, 1);
+
+ /*
+ * Only attribute actual wakeups done by this task.
+ */
+ if (!in_interrupt()) {
+ struct sched_entity *se = &current->se;
+ u64 sample = se->sum_exec_runtime;
+
+ if (se->last_wakeup)
+ sample -= se->last_wakeup;
+ else
+ sample -= se->start_runtime;
+ update_avg(&se->avg_wakeup, sample);
+
+ se->last_wakeup = se->sum_exec_runtime;
+ }
+}
+
+static inline void ttwu_woken_up(struct task_struct *p, struct rq *rq,
+ int wake_flags, bool success)
+{
+ trace_sched_wakeup(rq, p, success);
+ check_preempt_curr(rq, p, wake_flags);
+
+ p->state = TASK_RUNNING;
+#ifdef CONFIG_SMP
+ if (p->sched_class->task_wake_up)
+ p->sched_class->task_wake_up(rq, p);
+
+ if (unlikely(rq->idle_stamp)) {
+ u64 delta = rq->clock - rq->idle_stamp;
+ u64 max = 2*sysctl_sched_migration_cost;
+
+ if (delta > max)
+ rq->avg_idle = max;
+ else
+ update_avg(&rq->avg_idle, delta);
+ rq->idle_stamp = 0;
+ }
+#endif
+ /*
+ * Wake up is complete, fire wake up notifier. This allows
+ * try_to_wake_up_local() to be called from wake up notifiers.
+ */
+ if (success)
+ fire_sched_notifier(p, wakeup);
+}
+
+/**
* try_to_wake_up - wake up a thread
* @p: the to-be-woken-up thread
* @state: the mask of task states that can be woken
- * @sync: do a synchronous wakeup?
+ * @wake_flags: wake modifier flags (WF_*)
*
* Put it on the run-queue if it's not already there. The "current"
* thread is always on the run-queue (except when the actual
@@ -2335,7 +2409,8 @@ void task_oncpu_function_call(struct task_struct *p,
* the simpler "current->state = TASK_RUNNING" to mark yourself
* runnable without the overhead of this.
*
- * returns failure only if the task is already active.
+ * Returns %true if @p was woken up, %false if it was already running
+ * or @state didn't match @p's state.
*/
static int try_to_wake_up(struct task_struct *p, unsigned int state,
int wake_flags)
@@ -2406,57 +2481,61 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
out_activate:
#endif /* CONFIG_SMP */
- schedstat_inc(p, se.nr_wakeups);
- if (wake_flags & WF_SYNC)
- schedstat_inc(p, se.nr_wakeups_sync);
- if (orig_cpu != cpu)
- schedstat_inc(p, se.nr_wakeups_migrate);
- if (cpu == this_cpu)
- schedstat_inc(p, se.nr_wakeups_local);
- else
- schedstat_inc(p, se.nr_wakeups_remote);
- activate_task(rq, p, 1);
+ ttwu_activate(p, rq, wake_flags & WF_SYNC, orig_cpu != cpu,
+ cpu == this_cpu);
success = 1;
+out_running:
+ ttwu_woken_up(p, rq, wake_flags, success);
+out:
+ task_rq_unlock(rq, &flags);
+ put_cpu();
- /*
- * Only attribute actual wakeups done by this task.
- */
- if (!in_interrupt()) {
- struct sched_entity *se = &current->se;
- u64 sample = se->sum_exec_runtime;
-
- if (se->last_wakeup)
- sample -= se->last_wakeup;
- else
- sample -= se->start_runtime;
- update_avg(&se->avg_wakeup, sample);
+ return success;
+}
- se->last_wakeup = se->sum_exec_runtime;
- }
+/**
+ * try_to_wake_up_local - try to wake up a local task with rq lock held
+ * @p: the to-be-woken-up thread
+ * @state: the mask of task states that can be woken
+ * @wake_flags: wake modifier flags (WF_*)
+ *
+ * Put @p on the run-queue if it's not alredy there. The caller must
+ * ensure that this_rq() is locked, @p is bound to this_rq() and @p is
+ * not the current task. this_rq() stays locked over invocation.
+ *
+ * This function can be called from wakeup and sleep scheduler
+ * notifiers. Be careful not to create deep recursion by chaining
+ * wakeup notifiers.
+ *
+ * Returns %true if @p was woken up, %false if it was already running
+ * or @state didn't match @p's state.
+ */
+bool try_to_wake_up_local(struct task_struct *p, unsigned int state,
+ int wake_flags)
+{
+ struct rq *rq = task_rq(p);
+ bool success = false;
-out_running:
- trace_sched_wakeup(rq, p, success);
- check_preempt_curr(rq, p, wake_flags);
+ BUG_ON(rq != this_rq());
+ BUG_ON(p == current);
+ lockdep_assert_held(&rq->lock);
- p->state = TASK_RUNNING;
-#ifdef CONFIG_SMP
- if (p->sched_class->task_wake_up)
- p->sched_class->task_wake_up(rq, p);
+ if (!sched_feat(SYNC_WAKEUPS))
+ wake_flags &= ~WF_SYNC;
- if (unlikely(rq->idle_stamp)) {
- u64 delta = rq->clock - rq->idle_stamp;
- u64 max = 2*sysctl_sched_migration_cost;
+ if (!(p->state & state))
+ return false;
- if (delta > max)
- rq->avg_idle = max;
- else
- update_avg(&rq->avg_idle, delta);
- rq->idle_stamp = 0;
+ if (!p->se.on_rq) {
+ if (likely(!task_running(rq, p))) {
+ schedstat_inc(rq, ttwu_count);
+ schedstat_inc(rq, ttwu_local);
+ }
+ ttwu_activate(p, rq, wake_flags & WF_SYNC, false, true);
+ success = true;
}
-#endif
-out:
- task_rq_unlock(rq, &flags);
- put_cpu();
+
+ ttwu_woken_up(p, rq, wake_flags, success);
return success;
}
@@ -2538,10 +2617,7 @@ static void __sched_fork(struct task_struct *p)
INIT_LIST_HEAD(&p->rt.run_list);
p->se.on_rq = 0;
INIT_LIST_HEAD(&p->se.group_node);
-
-#ifdef CONFIG_PREEMPT_NOTIFIERS
- INIT_HLIST_HEAD(&p->preempt_notifiers);
-#endif
+ INIT_HLIST_HEAD(&p->sched_notifiers);
/*
* We mark the process as running here, but have not actually
@@ -2651,63 +2727,27 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
task_rq_unlock(rq, &flags);
}
-#ifdef CONFIG_PREEMPT_NOTIFIERS
-
/**
- * preempt_notifier_register - tell me when current is being preempted & rescheduled
+ * sched_notifier_register - register scheduler notifier
* @notifier: notifier struct to register
*/
-void preempt_notifier_register(struct preempt_notifier *notifier)
+void sched_notifier_register(struct sched_notifier *notifier)
{
- hlist_add_head(&notifier->link, &current->preempt_notifiers);
+ hlist_add_head(&notifier->link, &current->sched_notifiers);
}
-EXPORT_SYMBOL_GPL(preempt_notifier_register);
+EXPORT_SYMBOL_GPL(sched_notifier_register);
/**
- * preempt_notifier_unregister - no longer interested in preemption notifications
+ * sched_notifier_unregister - unregister scheduler notifier
* @notifier: notifier struct to unregister
*
- * This is safe to call from within a preemption notifier.
+ * This is safe to call from within a scheduler notifier.
*/
-void preempt_notifier_unregister(struct preempt_notifier *notifier)
+void sched_notifier_unregister(struct sched_notifier *notifier)
{
hlist_del(&notifier->link);
}
-EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
-
-static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-{
- struct preempt_notifier *notifier;
- struct hlist_node *node;
-
- hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
- notifier->ops->sched_in(notifier, raw_smp_processor_id());
-}
-
-static void
-fire_sched_out_preempt_notifiers(struct task_struct *curr,
- struct task_struct *next)
-{
- struct preempt_notifier *notifier;
- struct hlist_node *node;
-
- hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
- notifier->ops->sched_out(notifier, next);
-}
-
-#else /* !CONFIG_PREEMPT_NOTIFIERS */
-
-static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-{
-}
-
-static void
-fire_sched_out_preempt_notifiers(struct task_struct *curr,
- struct task_struct *next)
-{
-}
-
-#endif /* CONFIG_PREEMPT_NOTIFIERS */
+EXPORT_SYMBOL_GPL(sched_notifier_unregister);
/**
* prepare_task_switch - prepare to switch tasks
@@ -2726,7 +2766,7 @@ static inline void
prepare_task_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next)
{
- fire_sched_out_preempt_notifiers(prev, next);
+ fire_sched_notifier(current, out, next);
prepare_lock_switch(rq, next);
prepare_arch_switch(next);
}
@@ -2768,7 +2808,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
prev_state = prev->state;
finish_arch_switch(prev);
perf_event_task_sched_in(current, cpu_of(rq));
- fire_sched_in_preempt_notifiers(current);
+ fire_sched_notifier(current, in, prev);
finish_lock_switch(rq, prev);
if (mm)
@@ -3133,7 +3173,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
goto out;
/* force the process onto the specified CPU */
- if (migrate_task(p, dest_cpu, &req)) {
+ if (migrate_task(p, dest_cpu, &req, false)) {
/* Need to wait for migration thread (might exit: take ref). */
struct task_struct *mt = rq->migration_thread;
@@ -5446,10 +5486,17 @@ need_resched_nonpreemptible:
clear_tsk_need_resched(prev);
if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
- if (unlikely(signal_pending_state(prev->state, prev)))
+ if (unlikely(signal_pending_state(prev->state, prev))) {
prev->state = TASK_RUNNING;
- else
+ } else {
+ /*
+ * Fire sleep notifier before changing any scheduler
+ * state. This allows try_to_wake_up_local() to be
+ * called from sleep notifiers.
+ */
+ fire_sched_notifier(prev, sleep);
deactivate_task(rq, prev, 1);
+ }
switch_count = &prev->nvcsw;
}
@@ -7039,34 +7086,19 @@ static inline void sched_init_granularity(void)
* 7) we wake up and the migration is done.
*/
-/*
- * Change a given task's CPU affinity. Migrate the thread to a
- * proper CPU and schedule it away if the CPU it's executing on
- * is removed from the allowed bitmask.
- *
- * NOTE: the caller must have a valid reference to the task, the
- * task must not exit() & deallocate itself prematurely. The
- * call is not atomic; no spinlocks may be held.
- */
-int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
+static inline int __set_cpus_allowed(struct task_struct *p,
+ const struct cpumask *new_mask,
+ struct rq *rq, unsigned long *flags,
+ bool force)
{
struct migration_req req;
- unsigned long flags;
- struct rq *rq;
int ret = 0;
- rq = task_rq_lock(p, &flags);
if (!cpumask_intersects(new_mask, cpu_online_mask)) {
ret = -EINVAL;
goto out;
}
- if (unlikely((p->flags & PF_THREAD_BOUND) && p != current &&
- !cpumask_equal(&p->cpus_allowed, new_mask))) {
- ret = -EINVAL;
- goto out;
- }
-
if (p->sched_class->set_cpus_allowed)
p->sched_class->set_cpus_allowed(p, new_mask);
else {
@@ -7078,12 +7110,13 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
if (cpumask_test_cpu(task_cpu(p), new_mask))
goto out;
- if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) {
+ if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req,
+ force)) {
/* Need help from migration thread: drop lock and wait. */
struct task_struct *mt = rq->migration_thread;
get_task_struct(mt);
- task_rq_unlock(rq, &flags);
+ task_rq_unlock(rq, flags);
wake_up_process(rq->migration_thread);
put_task_struct(mt);
wait_for_completion(&req.done);
@@ -7091,13 +7124,54 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
return 0;
}
out:
- task_rq_unlock(rq, &flags);
+ task_rq_unlock(rq, flags);
return ret;
}
+
+/*
+ * Change a given task's CPU affinity. Migrate the thread to a
+ * proper CPU and schedule it away if the CPU it's executing on
+ * is removed from the allowed bitmask.
+ *
+ * NOTE: the caller must have a valid reference to the task, the
+ * task must not exit() & deallocate itself prematurely. The
+ * call is not atomic; no spinlocks may be held.
+ */
+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
+{
+ unsigned long flags;
+ struct rq *rq;
+
+ rq = task_rq_lock(p, &flags);
+
+ if (unlikely((p->flags & PF_THREAD_BOUND) && p != current &&
+ !cpumask_equal(&p->cpus_allowed, new_mask))) {
+ task_rq_unlock(rq, &flags);
+ return -EINVAL;
+ }
+
+ return __set_cpus_allowed(p, new_mask, rq, &flags, false);
+}
EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
/*
+ * Similar to set_cpus_allowed_ptr() but bypasses PF_THREAD_BOUND
+ * check and ignores cpu_active() status as long as the cpu is online.
+ * The caller is responsible for guaranteeing that the destination
+ * cpus don't go down until this function finishes and in general
+ * ensuring things don't go bonkers.
+ */
+int force_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+{
+ unsigned long flags;
+ struct rq *rq;
+
+ rq = task_rq_lock(p, &flags);
+ return __set_cpus_allowed(p, new_mask, rq, &flags, true);
+}
+
+/*
* Move (not current) task off this cpu, onto dest cpu. We're doing
* this because either it can't run here any more (set_cpus_allowed()
* away from this CPU, or CPU going down), or because we're
@@ -7108,12 +7182,13 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
*
* Returns non-zero if task was successfully migrated.
*/
-static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
+static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu,
+ bool force)
{
struct rq *rq_dest, *rq_src;
int ret = 0, on_rq;
- if (unlikely(!cpu_active(dest_cpu)))
+ if (!force && unlikely(!cpu_active(dest_cpu)))
return ret;
rq_src = cpu_rq(src_cpu);
@@ -7192,7 +7267,8 @@ static int migration_thread(void *data)
if (req->task != NULL) {
spin_unlock(&rq->lock);
- __migrate_task(req->task, cpu, req->dest_cpu);
+ __migrate_task(req->task, cpu, req->dest_cpu,
+ req->force);
} else if (likely(cpu == (badcpu = smp_processor_id()))) {
req->dest_cpu = RCU_MIGRATION_GOT_QS;
spin_unlock(&rq->lock);
@@ -7217,7 +7293,7 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
int ret;
local_irq_disable();
- ret = __migrate_task(p, src_cpu, dest_cpu);
+ ret = __migrate_task(p, src_cpu, dest_cpu, false);
local_irq_enable();
return ret;
}
@@ -9569,9 +9645,7 @@ void __init sched_init(void)
set_load_weight(&init_task);
-#ifdef CONFIG_PREEMPT_NOTIFIERS
- INIT_HLIST_HEAD(&init_task.preempt_notifiers);
-#endif
+ INIT_HLIST_HEAD(&init_task.sched_notifiers);
#ifdef CONFIG_SMP
open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 912823e2a11b..671a4ac00fba 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -25,6 +25,8 @@ enum stopmachine_state {
STOPMACHINE_RUN,
/* Exit */
STOPMACHINE_EXIT,
+ /* Done */
+ STOPMACHINE_DONE,
};
static enum stopmachine_state state;
@@ -42,10 +44,9 @@ static DEFINE_MUTEX(lock);
static DEFINE_MUTEX(setup_lock);
/* Users of stop_machine. */
static int refcount;
-static struct workqueue_struct *stop_machine_wq;
+static struct task_struct **stop_machine_threads;
static struct stop_machine_data active, idle;
static const struct cpumask *active_cpus;
-static void *stop_machine_work;
static void set_state(enum stopmachine_state newstate)
{
@@ -63,14 +64,31 @@ static void ack_state(void)
}
/* This is the actual function which stops the CPU. It runs
- * in the context of a dedicated stopmachine workqueue. */
-static void stop_cpu(struct work_struct *unused)
+ * on dedicated per-cpu kthreads. */
+static int stop_cpu(void *unused)
{
enum stopmachine_state curstate = STOPMACHINE_NONE;
- struct stop_machine_data *smdata = &idle;
+ struct stop_machine_data *smdata;
int cpu = smp_processor_id();
int err;
+repeat:
+ /* Wait for __stop_machine() to initiate */
+ while (true) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ /* <- kthread_stop() and __stop_machine()::smp_wmb() */
+ if (kthread_should_stop()) {
+ __set_current_state(TASK_RUNNING);
+ return 0;
+ }
+ if (state == STOPMACHINE_PREPARE)
+ break;
+ schedule();
+ }
+ smp_rmb(); /* <- __stop_machine()::set_state() */
+
+ /* Okay, let's go */
+ smdata = &idle;
if (!active_cpus) {
if (cpu == cpumask_first(cpu_online_mask))
smdata = &active;
@@ -104,6 +122,7 @@ static void stop_cpu(struct work_struct *unused)
} while (curstate != STOPMACHINE_EXIT);
local_irq_enable();
+ goto repeat;
}
/* Callback for CPUs which aren't supposed to do anything. */
@@ -112,46 +131,122 @@ static int chill(void *unused)
return 0;
}
+static int create_stop_machine_thread(unsigned int cpu)
+{
+ struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+ struct task_struct **pp = per_cpu_ptr(stop_machine_threads, cpu);
+ struct task_struct *p;
+
+ if (*pp)
+ return -EBUSY;
+
+ p = kthread_create(stop_cpu, NULL, "kstop/%u", cpu);
+ if (IS_ERR(p))
+ return PTR_ERR(p);
+
+ sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
+ *pp = p;
+ return 0;
+}
+
+/* Should be called with cpu hotplug disabled and setup_lock held */
+static void kill_stop_machine_threads(void)
+{
+ unsigned int cpu;
+
+ if (!stop_machine_threads)
+ return;
+
+ for_each_online_cpu(cpu) {
+ struct task_struct *p = *per_cpu_ptr(stop_machine_threads, cpu);
+ if (p)
+ kthread_stop(p);
+ }
+ free_percpu(stop_machine_threads);
+ stop_machine_threads = NULL;
+}
+
int stop_machine_create(void)
{
+ unsigned int cpu;
+
+ get_online_cpus();
mutex_lock(&setup_lock);
if (refcount)
goto done;
- stop_machine_wq = create_rt_workqueue("kstop");
- if (!stop_machine_wq)
- goto err_out;
- stop_machine_work = alloc_percpu(struct work_struct);
- if (!stop_machine_work)
+
+ stop_machine_threads = alloc_percpu(struct task_struct *);
+ if (!stop_machine_threads)
goto err_out;
+
+ /*
+ * cpu hotplug is disabled, create only for online cpus,
+ * cpu_callback() will handle cpu hot [un]plugs.
+ */
+ for_each_online_cpu(cpu) {
+ if (create_stop_machine_thread(cpu))
+ goto err_out;
+ kthread_bind(*per_cpu_ptr(stop_machine_threads, cpu), cpu);
+ }
done:
refcount++;
mutex_unlock(&setup_lock);
+ put_online_cpus();
return 0;
err_out:
- if (stop_machine_wq)
- destroy_workqueue(stop_machine_wq);
+ kill_stop_machine_threads();
mutex_unlock(&setup_lock);
+ put_online_cpus();
return -ENOMEM;
}
EXPORT_SYMBOL_GPL(stop_machine_create);
void stop_machine_destroy(void)
{
+ get_online_cpus();
mutex_lock(&setup_lock);
- refcount--;
- if (refcount)
- goto done;
- destroy_workqueue(stop_machine_wq);
- free_percpu(stop_machine_work);
-done:
+ if (!--refcount)
+ kill_stop_machine_threads();
mutex_unlock(&setup_lock);
+ put_online_cpus();
}
EXPORT_SYMBOL_GPL(stop_machine_destroy);
+static int __cpuinit stop_machine_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+ struct task_struct **pp = per_cpu_ptr(stop_machine_threads, cpu);
+
+ /* Hotplug exclusion is enough, no need to worry about setup_lock */
+ if (!stop_machine_threads)
+ return NOTIFY_OK;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_UP_PREPARE:
+ if (create_stop_machine_thread(cpu)) {
+ printk(KERN_ERR "failed to create stop machine "
+ "thread for %u\n", cpu);
+ return NOTIFY_BAD;
+ }
+ break;
+
+ case CPU_ONLINE:
+ kthread_bind(*pp, cpu);
+ break;
+
+ case CPU_UP_CANCELED:
+ case CPU_POST_DEAD:
+ kthread_stop(*pp);
+ *pp = NULL;
+ break;
+ }
+ return NOTIFY_OK;
+}
+
int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
{
- struct work_struct *sm_work;
int i, ret;
/* Set up initial state. */
@@ -164,19 +259,18 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
idle.fn = chill;
idle.data = NULL;
- set_state(STOPMACHINE_PREPARE);
+ set_state(STOPMACHINE_PREPARE); /* -> stop_cpu()::smp_rmb() */
+ smp_wmb(); /* -> stop_cpu()::set_current_state() */
/* Schedule the stop_cpu work on all cpus: hold this CPU so one
* doesn't hit this CPU until we're ready. */
get_cpu();
- for_each_online_cpu(i) {
- sm_work = per_cpu_ptr(stop_machine_work, i);
- INIT_WORK(sm_work, stop_cpu);
- queue_work_on(i, stop_machine_wq, sm_work);
- }
+ for_each_online_cpu(i)
+ wake_up_process(*per_cpu_ptr(stop_machine_threads, i));
/* This will release the thread on our CPU. */
put_cpu();
- flush_workqueue(stop_machine_wq);
+ while (state < STOPMACHINE_DONE)
+ yield();
ret = active.fnret;
mutex_unlock(&lock);
return ret;
@@ -197,3 +291,8 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
return ret;
}
EXPORT_SYMBOL_GPL(stop_machine);
+
+void __init init_stop_machine(void)
+{
+ hotcpu_notifier(stop_machine_cpu_callback, 0);
+}
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d006554888dc..de0bfeb5e95f 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -414,7 +414,9 @@ config KMEMTRACE
If unsure, say N.
config WORKQUEUE_TRACER
- bool "Trace workqueues"
+# Temporarily disabled during workqueue reimplementation
+# bool "Trace workqueues"
+ def_bool n
select GENERIC_TRACER
help
The workqueue tracer provides some statistical informations
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 67e526b6ae81..5392939cb1e7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -33,12 +33,22 @@
#include <linux/kallsyms.h>
#include <linux/debug_locks.h>
#include <linux/lockdep.h>
-#define CREATE_TRACE_POINTS
-#include <trace/events/workqueue.h>
+
+/*
+ * Structure fields follow one of the following exclusion rules.
+ *
+ * I: Set during initialization and read-only afterwards.
+ *
+ * L: cwq->lock protected. Access with cwq->lock held.
+ *
+ * W: workqueue_lock protected.
+ */
/*
* The per-CPU workqueue (if single thread, we always use the first
- * possible cpu).
+ * possible cpu). The lower WORK_STRUCT_FLAG_BITS of
+ * work_struct->data are used for flags and thus cwqs need to be
+ * aligned at two's power of the number of flag bits.
*/
struct cpu_workqueue_struct {
@@ -48,26 +58,134 @@ struct cpu_workqueue_struct {
wait_queue_head_t more_work;
struct work_struct *current_work;
- struct workqueue_struct *wq;
- struct task_struct *thread;
-} ____cacheline_aligned;
+ struct workqueue_struct *wq; /* I: the owning workqueue */
+ struct task_struct *thread;
+} __attribute__((aligned(1 << WORK_STRUCT_FLAG_BITS)));
/*
* The externally visible workqueue abstraction is an array of
* per-CPU workqueues:
*/
struct workqueue_struct {
- struct cpu_workqueue_struct *cpu_wq;
- struct list_head list;
- const char *name;
- int singlethread;
- int freezeable; /* Freeze threads during suspend */
- int rt;
+ unsigned int flags; /* I: WQ_* flags */
+ struct cpu_workqueue_struct *cpu_wq; /* I: cwq's */
+ struct list_head list; /* W: list of all workqueues */
+ const char *name; /* I: workqueue name */
#ifdef CONFIG_LOCKDEP
- struct lockdep_map lockdep_map;
+ struct lockdep_map lockdep_map;
#endif
};
+#ifdef CONFIG_DEBUG_OBJECTS_WORK
+
+static struct debug_obj_descr work_debug_descr;
+
+/*
+ * fixup_init is called when:
+ * - an active object is initialized
+ */
+static int work_fixup_init(void *addr, enum debug_obj_state state)
+{
+ struct work_struct *work = addr;
+
+ switch (state) {
+ case ODEBUG_STATE_ACTIVE:
+ cancel_work_sync(work);
+ debug_object_init(work, &work_debug_descr);
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/*
+ * fixup_activate is called when:
+ * - an active object is activated
+ * - an unknown object is activated (might be a statically initialized object)
+ */
+static int work_fixup_activate(void *addr, enum debug_obj_state state)
+{
+ struct work_struct *work = addr;
+
+ switch (state) {
+
+ case ODEBUG_STATE_NOTAVAILABLE:
+ /*
+ * This is not really a fixup. The work struct was
+ * statically initialized. We just make sure that it
+ * is tracked in the object tracker.
+ */
+ if (test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work))) {
+ debug_object_init(work, &work_debug_descr);
+ debug_object_activate(work, &work_debug_descr);
+ return 0;
+ }
+ WARN_ON_ONCE(1);
+ return 0;
+
+ case ODEBUG_STATE_ACTIVE:
+ WARN_ON(1);
+
+ default:
+ return 0;
+ }
+}
+
+/*
+ * fixup_free is called when:
+ * - an active object is freed
+ */
+static int work_fixup_free(void *addr, enum debug_obj_state state)
+{
+ struct work_struct *work = addr;
+
+ switch (state) {
+ case ODEBUG_STATE_ACTIVE:
+ cancel_work_sync(work);
+ debug_object_free(work, &work_debug_descr);
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static struct debug_obj_descr work_debug_descr = {
+ .name = "work_struct",
+ .fixup_init = work_fixup_init,
+ .fixup_activate = work_fixup_activate,
+ .fixup_free = work_fixup_free,
+};
+
+static inline void debug_work_activate(struct work_struct *work)
+{
+ debug_object_activate(work, &work_debug_descr);
+}
+
+static inline void debug_work_deactivate(struct work_struct *work)
+{
+ debug_object_deactivate(work, &work_debug_descr);
+}
+
+void __init_work(struct work_struct *work, int onstack)
+{
+ if (onstack)
+ debug_object_init_on_stack(work, &work_debug_descr);
+ else
+ debug_object_init(work, &work_debug_descr);
+}
+EXPORT_SYMBOL_GPL(__init_work);
+
+void destroy_work_on_stack(struct work_struct *work)
+{
+ debug_object_free(work, &work_debug_descr);
+}
+EXPORT_SYMBOL_GPL(destroy_work_on_stack);
+
+#else
+static inline void debug_work_activate(struct work_struct *work) { }
+static inline void debug_work_deactivate(struct work_struct *work) { }
+#endif
+
/* Serializes the accesses to the list of workqueues. */
static DEFINE_SPINLOCK(workqueue_lock);
static LIST_HEAD(workqueues);
@@ -84,9 +202,9 @@ static const struct cpumask *cpu_singlethread_map __read_mostly;
static cpumask_var_t cpu_populated_map __read_mostly;
/* If it's single threaded, it isn't in the list of workqueues. */
-static inline int is_wq_single_threaded(struct workqueue_struct *wq)
+static inline bool is_wq_single_threaded(struct workqueue_struct *wq)
{
- return wq->singlethread;
+ return wq->flags & WQ_SINGLE_THREAD;
}
static const struct cpumask *wq_cpu_map(struct workqueue_struct *wq)
@@ -95,8 +213,8 @@ static const struct cpumask *wq_cpu_map(struct workqueue_struct *wq)
? cpu_singlethread_map : cpu_populated_map;
}
-static
-struct cpu_workqueue_struct *wq_per_cpu(struct workqueue_struct *wq, int cpu)
+static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
+ struct workqueue_struct *wq)
{
if (unlikely(is_wq_single_threaded(wq)))
cpu = singlethread_cpu;
@@ -108,45 +226,61 @@ struct cpu_workqueue_struct *wq_per_cpu(struct workqueue_struct *wq, int cpu)
* - Must *only* be called if the pending flag is set
*/
static inline void set_wq_data(struct work_struct *work,
- struct cpu_workqueue_struct *cwq)
+ struct cpu_workqueue_struct *cwq,
+ unsigned long extra_flags)
{
- unsigned long new;
-
BUG_ON(!work_pending(work));
- new = (unsigned long) cwq | (1UL << WORK_STRUCT_PENDING);
- new |= WORK_STRUCT_FLAG_MASK & *work_data_bits(work);
- atomic_long_set(&work->data, new);
+ atomic_long_set(&work->data, (unsigned long)cwq |
+ (work_static(work) ? WORK_STRUCT_STATIC : 0) |
+ WORK_STRUCT_PENDING | extra_flags);
}
-static inline
-struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
+static inline struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
{
- return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
+ return (void *)(atomic_long_read(&work->data) &
+ WORK_STRUCT_WQ_DATA_MASK);
}
+/**
+ * insert_work - insert a work into cwq
+ * @cwq: cwq @work belongs to
+ * @work: work to insert
+ * @head: insertion point
+ * @extra_flags: extra WORK_STRUCT_* flags to set
+ *
+ * Insert @work into @cwq after @head.
+ *
+ * CONTEXT:
+ * spin_lock_irq(cwq->lock).
+ */
static void insert_work(struct cpu_workqueue_struct *cwq,
- struct work_struct *work, struct list_head *head)
+ struct work_struct *work, struct list_head *head,
+ unsigned int extra_flags)
{
- trace_workqueue_insertion(cwq->thread, work);
+ /* we own @work, set data and link */
+ set_wq_data(work, cwq, extra_flags);
- set_wq_data(work, cwq);
/*
* Ensure that we get the right work->data if we see the
* result of list_add() below, see try_to_grab_pending().
*/
smp_wmb();
+
list_add_tail(&work->entry, head);
wake_up(&cwq->more_work);
}
-static void __queue_work(struct cpu_workqueue_struct *cwq,
+static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
struct work_struct *work)
{
+ struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
unsigned long flags;
+ debug_work_activate(work);
spin_lock_irqsave(&cwq->lock, flags);
- insert_work(cwq, work, &cwq->worklist);
+ BUG_ON(!list_empty(&work->entry));
+ insert_work(cwq, work, &cwq->worklist, 0);
spin_unlock_irqrestore(&cwq->lock, flags);
}
@@ -187,9 +321,8 @@ queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
{
int ret = 0;
- if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
- BUG_ON(!list_empty(&work->entry));
- __queue_work(wq_per_cpu(wq, cpu), work);
+ if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
+ __queue_work(cpu, wq, work);
ret = 1;
}
return ret;
@@ -200,9 +333,8 @@ static void delayed_work_timer_fn(unsigned long __data)
{
struct delayed_work *dwork = (struct delayed_work *)__data;
struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work);
- struct workqueue_struct *wq = cwq->wq;
- __queue_work(wq_per_cpu(wq, smp_processor_id()), &dwork->work);
+ __queue_work(smp_processor_id(), cwq->wq, &dwork->work);
}
/**
@@ -239,14 +371,14 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
struct timer_list *timer = &dwork->timer;
struct work_struct *work = &dwork->work;
- if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
+ if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
BUG_ON(timer_pending(timer));
BUG_ON(!list_empty(&work->entry));
timer_stats_timer_set_start_info(&dwork->timer);
/* This stores cwq for the moment, for the timer_fn */
- set_wq_data(work, wq_per_cpu(wq, raw_smp_processor_id()));
+ set_wq_data(work, get_cwq(raw_smp_processor_id(), wq), 0);
timer->expires = jiffies + delay;
timer->data = (unsigned long)dwork;
timer->function = delayed_work_timer_fn;
@@ -261,60 +393,88 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
}
EXPORT_SYMBOL_GPL(queue_delayed_work_on);
+/**
+ * process_one_work - process single work
+ * @cwq: cwq to process work for
+ * @work: work to process
+ *
+ * Process @work. This function contains all the logics necessary to
+ * process a single work including synchronization against and
+ * interaction with other workers on the same cpu, queueing and
+ * flushing. As long as context requirement is met, any worker can
+ * call this function to process a work.
+ *
+ * CONTEXT:
+ * spin_lock_irq(cwq->lock) which is released and regrabbed.
+ */
+static void process_one_work(struct cpu_workqueue_struct *cwq,
+ struct work_struct *work)
+{
+ work_func_t f = work->func;
+#ifdef CONFIG_LOCKDEP
+ /*
+ * It is permissible to free the struct work_struct from
+ * inside the function that is called from it, this we need to
+ * take into account for lockdep too. To avoid bogus "held
+ * lock freed" warnings as well as problems when looking into
+ * work->lockdep_map, make a copy and use that here.
+ */
+ struct lockdep_map lockdep_map = work->lockdep_map;
+#endif
+ /* claim and process */
+ debug_work_deactivate(work);
+ cwq->current_work = work;
+ list_del_init(&work->entry);
+
+ spin_unlock_irq(&cwq->lock);
+
+ BUG_ON(get_wq_data(work) != cwq);
+ work_clear_pending(work);
+ lock_map_acquire(&cwq->wq->lockdep_map);
+ lock_map_acquire(&lockdep_map);
+ f(work);
+ lock_map_release(&lockdep_map);
+ lock_map_release(&cwq->wq->lockdep_map);
+
+ if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
+ printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
+ "%s/0x%08x/%d\n",
+ current->comm, preempt_count(), task_pid_nr(current));
+ printk(KERN_ERR " last function: ");
+ print_symbol("%s\n", (unsigned long)f);
+ debug_show_held_locks(current);
+ dump_stack();
+ }
+
+ spin_lock_irq(&cwq->lock);
+
+ /* we're done with it, release */
+ cwq->current_work = NULL;
+}
+
static void run_workqueue(struct cpu_workqueue_struct *cwq)
{
spin_lock_irq(&cwq->lock);
while (!list_empty(&cwq->worklist)) {
struct work_struct *work = list_entry(cwq->worklist.next,
struct work_struct, entry);
- work_func_t f = work->func;
-#ifdef CONFIG_LOCKDEP
- /*
- * It is permissible to free the struct work_struct
- * from inside the function that is called from it,
- * this we need to take into account for lockdep too.
- * To avoid bogus "held lock freed" warnings as well
- * as problems when looking into work->lockdep_map,
- * make a copy and use that here.
- */
- struct lockdep_map lockdep_map = work->lockdep_map;
-#endif
- trace_workqueue_execution(cwq->thread, work);
- cwq->current_work = work;
- list_del_init(cwq->worklist.next);
- spin_unlock_irq(&cwq->lock);
-
- BUG_ON(get_wq_data(work) != cwq);
- work_clear_pending(work);
- lock_map_acquire(&cwq->wq->lockdep_map);
- lock_map_acquire(&lockdep_map);
- f(work);
- lock_map_release(&lockdep_map);
- lock_map_release(&cwq->wq->lockdep_map);
-
- if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
- printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
- "%s/0x%08x/%d\n",
- current->comm, preempt_count(),
- task_pid_nr(current));
- printk(KERN_ERR " last function: ");
- print_symbol("%s\n", (unsigned long)f);
- debug_show_held_locks(current);
- dump_stack();
- }
-
- spin_lock_irq(&cwq->lock);
- cwq->current_work = NULL;
+ process_one_work(cwq, work);
}
spin_unlock_irq(&cwq->lock);
}
+/**
+ * worker_thread - the worker thread function
+ * @__cwq: cwq to serve
+ *
+ * The cwq worker thread function.
+ */
static int worker_thread(void *__cwq)
{
struct cpu_workqueue_struct *cwq = __cwq;
DEFINE_WAIT(wait);
- if (cwq->wq->freezeable)
+ if (cwq->wq->flags & WQ_FREEZEABLE)
set_freezable();
for (;;) {
@@ -347,15 +507,32 @@ static void wq_barrier_func(struct work_struct *work)
complete(&barr->done);
}
+/**
+ * insert_wq_barrier - insert a barrier work
+ * @cwq: cwq to insert barrier into
+ * @barr: wq_barrier to insert
+ * @head: insertion point
+ *
+ * Insert barrier @barr into @cwq before @head.
+ *
+ * CONTEXT:
+ * spin_lock_irq(cwq->lock).
+ */
static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
struct wq_barrier *barr, struct list_head *head)
{
- INIT_WORK(&barr->work, wq_barrier_func);
- __set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
-
+ /*
+ * debugobject calls are safe here even with cwq->lock locked
+ * as we know for sure that this will not trigger any of the
+ * checks and call back into the fixup functions where we
+ * might deadlock.
+ */
+ INIT_WORK_ON_STACK(&barr->work, wq_barrier_func);
+ __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
init_completion(&barr->done);
- insert_work(cwq, &barr->work, head);
+ debug_work_activate(&barr->work);
+ insert_work(cwq, &barr->work, head, 0);
}
static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
@@ -372,8 +549,10 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
}
spin_unlock_irq(&cwq->lock);
- if (active)
+ if (active) {
wait_for_completion(&barr.done);
+ destroy_work_on_stack(&barr.work);
+ }
return active;
}
@@ -387,9 +566,6 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
*
* We sleep until all works which were queued on entry have been handled,
* but we are not livelocked by new incoming ones.
- *
- * This function used to run the workqueues itself. Now we just wait for the
- * helper threads to do it.
*/
void flush_workqueue(struct workqueue_struct *wq)
{
@@ -428,7 +604,6 @@ int flush_work(struct work_struct *work)
lock_map_acquire(&cwq->wq->lockdep_map);
lock_map_release(&cwq->wq->lockdep_map);
- prev = NULL;
spin_lock_irq(&cwq->lock);
if (!list_empty(&work->entry)) {
/*
@@ -437,21 +612,22 @@ int flush_work(struct work_struct *work)
*/
smp_rmb();
if (unlikely(cwq != get_wq_data(work)))
- goto out;
+ goto already_gone;
prev = &work->entry;
} else {
if (cwq->current_work != work)
- goto out;
+ goto already_gone;
prev = &cwq->worklist;
}
insert_wq_barrier(cwq, &barr, prev->next);
-out:
- spin_unlock_irq(&cwq->lock);
- if (!prev)
- return 0;
+ spin_unlock_irq(&cwq->lock);
wait_for_completion(&barr.done);
+ destroy_work_on_stack(&barr.work);
return 1;
+already_gone:
+ spin_unlock_irq(&cwq->lock);
+ return 0;
}
EXPORT_SYMBOL_GPL(flush_work);
@@ -464,7 +640,7 @@ static int try_to_grab_pending(struct work_struct *work)
struct cpu_workqueue_struct *cwq;
int ret = -1;
- if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work)))
+ if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
return 0;
/*
@@ -485,6 +661,7 @@ static int try_to_grab_pending(struct work_struct *work)
*/
smp_rmb();
if (cwq == get_wq_data(work)) {
+ debug_work_deactivate(work);
list_del_init(&work->entry);
ret = 1;
}
@@ -507,8 +684,10 @@ static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
}
spin_unlock_irq(&cwq->lock);
- if (unlikely(running))
+ if (unlikely(running)) {
wait_for_completion(&barr.done);
+ destroy_work_on_stack(&barr.work);
+ }
}
static void wait_on_work(struct work_struct *work)
@@ -531,7 +710,7 @@ static void wait_on_work(struct work_struct *work)
cpu_map = wq_cpu_map(wq);
for_each_cpu(cpu, cpu_map)
- wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
+ wait_on_cpu_work(get_cwq(cpu, wq), work);
}
static int __cancel_work_timer(struct work_struct *work,
@@ -648,9 +827,7 @@ EXPORT_SYMBOL(schedule_delayed_work);
void flush_delayed_work(struct delayed_work *dwork)
{
if (del_timer_sync(&dwork->timer)) {
- struct cpu_workqueue_struct *cwq;
- cwq = wq_per_cpu(keventd_wq, get_cpu());
- __queue_work(cwq, &dwork->work);
+ __queue_work(get_cpu(), keventd_wq, &dwork->work);
put_cpu();
}
flush_work(&dwork->work);
@@ -788,7 +965,6 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
{
- struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
struct workqueue_struct *wq = cwq->wq;
const char *fmt = is_wq_single_threaded(wq) ? "%s" : "%s/%d";
struct task_struct *p;
@@ -804,12 +980,8 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
*/
if (IS_ERR(p))
return PTR_ERR(p);
- if (cwq->wq->rt)
- sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
cwq->thread = p;
- trace_workqueue_creation(cwq->thread, cpu);
-
return 0;
}
@@ -825,9 +997,7 @@ static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
}
struct workqueue_struct *__create_workqueue_key(const char *name,
- int singlethread,
- int freezeable,
- int rt,
+ unsigned int flags,
struct lock_class_key *key,
const char *lock_name)
{
@@ -837,22 +1007,18 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
wq = kzalloc(sizeof(*wq), GFP_KERNEL);
if (!wq)
- return NULL;
+ goto err;
wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
- if (!wq->cpu_wq) {
- kfree(wq);
- return NULL;
- }
+ if (!wq->cpu_wq)
+ goto err;
+ wq->flags = flags;
wq->name = name;
lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
- wq->singlethread = singlethread;
- wq->freezeable = freezeable;
- wq->rt = rt;
INIT_LIST_HEAD(&wq->list);
- if (singlethread) {
+ if (flags & WQ_SINGLE_THREAD) {
cwq = init_cpu_workqueue(wq, singlethread_cpu);
err = create_workqueue_thread(cwq, singlethread_cpu);
start_workqueue_thread(cwq, -1);
@@ -888,6 +1054,12 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
wq = NULL;
}
return wq;
+err:
+ if (wq) {
+ free_percpu(wq->cpu_wq);
+ kfree(wq);
+ }
+ return NULL;
}
EXPORT_SYMBOL_GPL(__create_workqueue_key);
@@ -914,7 +1086,6 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
* checks list_empty(), and a "normal" queue_work() can't use
* a dead CPU.
*/
- trace_workqueue_destruction(cwq->thread);
kthread_stop(cwq->thread);
cwq->thread = NULL;
}
@@ -1043,6 +1214,15 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
void __init init_workqueues(void)
{
+ /*
+ * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
+ * Make sure that the alignment isn't lower than that of
+ * unsigned long long in case this code survives for longer
+ * than twenty years. :-P
+ */
+ BUILD_BUG_ON(__alignof__(struct cpu_workqueue_struct) <
+ __alignof__(unsigned long long));
+
alloc_cpumask_var(&cpu_populated_map, GFP_KERNEL);
cpumask_copy(cpu_populated_map, cpu_online_mask);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index a24efcc3e6f0..62a33f3a2b49 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -298,6 +298,14 @@ config DEBUG_OBJECTS_TIMERS
timer routines to track the life time of timer objects and
validate the timer operations.
+config DEBUG_OBJECTS_WORK
+ bool "Debug work objects"
+ depends on DEBUG_OBJECTS
+ help
+ If you say Y here, additional code will be inserted into the
+ work queue routines to track the life time of work objects and
+ validate the work operations.
+
config DEBUG_OBJECTS_ENABLE_DEFAULT
int "debug_objects bootup default value (0-1)"
range 0 1
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e1f2bf8d7b1e..9ebb284403f9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -77,7 +77,7 @@ static atomic_t hardware_enable_failed;
struct kmem_cache *kvm_vcpu_cache;
EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
-static __read_mostly struct preempt_ops kvm_preempt_ops;
+static __read_mostly struct sched_notifier_ops kvm_sched_notifier_ops;
struct dentry *kvm_debugfs_dir;
@@ -109,7 +109,7 @@ void vcpu_load(struct kvm_vcpu *vcpu)
mutex_lock(&vcpu->mutex);
cpu = get_cpu();
- preempt_notifier_register(&vcpu->preempt_notifier);
+ sched_notifier_register(&vcpu->sched_notifier);
kvm_arch_vcpu_load(vcpu, cpu);
put_cpu();
}
@@ -118,7 +118,7 @@ void vcpu_put(struct kvm_vcpu *vcpu)
{
preempt_disable();
kvm_arch_vcpu_put(vcpu);
- preempt_notifier_unregister(&vcpu->preempt_notifier);
+ sched_notifier_unregister(&vcpu->sched_notifier);
preempt_enable();
mutex_unlock(&vcpu->mutex);
}
@@ -1192,7 +1192,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
if (IS_ERR(vcpu))
return PTR_ERR(vcpu);
- preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
+ sched_notifier_init(&vcpu->sched_notifier, &kvm_sched_notifier_ops);
r = kvm_arch_vcpu_setup(vcpu);
if (r)
@@ -2026,23 +2026,21 @@ static struct sys_device kvm_sysdev = {
struct page *bad_page;
pfn_t bad_pfn;
-static inline
-struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
+static inline struct kvm_vcpu *sched_notifier_to_vcpu(struct sched_notifier *sn)
{
- return container_of(pn, struct kvm_vcpu, preempt_notifier);
+ return container_of(sn, struct kvm_vcpu, sched_notifier);
}
-static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
+static void kvm_sched_in(struct sched_notifier *sn, struct task_struct *prev)
{
- struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
+ struct kvm_vcpu *vcpu = sched_notifier_to_vcpu(sn);
- kvm_arch_vcpu_load(vcpu, cpu);
+ kvm_arch_vcpu_load(vcpu, smp_processor_id());
}
-static void kvm_sched_out(struct preempt_notifier *pn,
- struct task_struct *next)
+static void kvm_sched_out(struct sched_notifier *sn, struct task_struct *next)
{
- struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
+ struct kvm_vcpu *vcpu = sched_notifier_to_vcpu(sn);
kvm_arch_vcpu_put(vcpu);
}
@@ -2115,8 +2113,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
goto out_free;
}
- kvm_preempt_ops.sched_in = kvm_sched_in;
- kvm_preempt_ops.sched_out = kvm_sched_out;
+ kvm_sched_notifier_ops.in = kvm_sched_in;
+ kvm_sched_notifier_ops.out = kvm_sched_out;
kvm_init_debug();