diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 334 |
1 files changed, 204 insertions, 130 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index dba459d08916..be7592902610 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1389,6 +1389,16 @@ static const u32 prio_to_wmult[40] = { /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, }; +#define fire_sched_notifier(p, callback, args...) do { \ + struct task_struct *__p = (p); \ + struct sched_notifier *__sn; \ + struct hlist_node *__pos; \ + \ + hlist_for_each_entry(__sn, __pos, &__p->sched_notifiers, link) \ + if (__sn->ops->callback) \ + __sn->ops->callback(__sn , ##args); \ +} while (0) + static void activate_task(struct rq *rq, struct task_struct *p, int wakeup); /* @@ -2097,6 +2107,7 @@ struct migration_req { struct task_struct *task; int dest_cpu; + bool force; struct completion done; }; @@ -2105,8 +2116,8 @@ struct migration_req { * The task's runqueue lock must be held. * Returns true if you have to wait for migration thread. */ -static int -migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) +static int migrate_task(struct task_struct *p, int dest_cpu, + struct migration_req *req, bool force) { struct rq *rq = task_rq(p); @@ -2123,6 +2134,7 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) init_completion(&req->done); req->task = p; req->dest_cpu = dest_cpu; + req->force = force; list_add(&req->list, &rq->migration_queue); return 1; @@ -2323,11 +2335,73 @@ void task_oncpu_function_call(struct task_struct *p, preempt_enable(); } -/*** +static inline void ttwu_activate(struct task_struct *p, struct rq *rq, + bool is_sync, bool is_migrate, bool is_local) +{ + schedstat_inc(p, se.nr_wakeups); + if (is_sync) + schedstat_inc(p, se.nr_wakeups_sync); + if (is_migrate) + schedstat_inc(p, se.nr_wakeups_migrate); + if (is_local) + schedstat_inc(p, se.nr_wakeups_local); + else + schedstat_inc(p, se.nr_wakeups_remote); + + activate_task(rq, p, 1); + + /* + * Only attribute actual wakeups done by this task. + */ + if (!in_interrupt()) { + struct sched_entity *se = ¤t->se; + u64 sample = se->sum_exec_runtime; + + if (se->last_wakeup) + sample -= se->last_wakeup; + else + sample -= se->start_runtime; + update_avg(&se->avg_wakeup, sample); + + se->last_wakeup = se->sum_exec_runtime; + } +} + +static inline void ttwu_woken_up(struct task_struct *p, struct rq *rq, + int wake_flags, bool success) +{ + trace_sched_wakeup(rq, p, success); + check_preempt_curr(rq, p, wake_flags); + + p->state = TASK_RUNNING; +#ifdef CONFIG_SMP + if (p->sched_class->task_wake_up) + p->sched_class->task_wake_up(rq, p); + + if (unlikely(rq->idle_stamp)) { + u64 delta = rq->clock - rq->idle_stamp; + u64 max = 2*sysctl_sched_migration_cost; + + if (delta > max) + rq->avg_idle = max; + else + update_avg(&rq->avg_idle, delta); + rq->idle_stamp = 0; + } +#endif + /* + * Wake up is complete, fire wake up notifier. This allows + * try_to_wake_up_local() to be called from wake up notifiers. + */ + if (success) + fire_sched_notifier(p, wakeup); +} + +/** * try_to_wake_up - wake up a thread * @p: the to-be-woken-up thread * @state: the mask of task states that can be woken - * @sync: do a synchronous wakeup? + * @wake_flags: wake modifier flags (WF_*) * * Put it on the run-queue if it's not already there. The "current" * thread is always on the run-queue (except when the actual @@ -2335,7 +2409,8 @@ void task_oncpu_function_call(struct task_struct *p, * the simpler "current->state = TASK_RUNNING" to mark yourself * runnable without the overhead of this. * - * returns failure only if the task is already active. + * Returns %true if @p was woken up, %false if it was already running + * or @state didn't match @p's state. */ static int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) @@ -2406,57 +2481,61 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, out_activate: #endif /* CONFIG_SMP */ - schedstat_inc(p, se.nr_wakeups); - if (wake_flags & WF_SYNC) - schedstat_inc(p, se.nr_wakeups_sync); - if (orig_cpu != cpu) - schedstat_inc(p, se.nr_wakeups_migrate); - if (cpu == this_cpu) - schedstat_inc(p, se.nr_wakeups_local); - else - schedstat_inc(p, se.nr_wakeups_remote); - activate_task(rq, p, 1); + ttwu_activate(p, rq, wake_flags & WF_SYNC, orig_cpu != cpu, + cpu == this_cpu); success = 1; +out_running: + ttwu_woken_up(p, rq, wake_flags, success); +out: + task_rq_unlock(rq, &flags); + put_cpu(); - /* - * Only attribute actual wakeups done by this task. - */ - if (!in_interrupt()) { - struct sched_entity *se = ¤t->se; - u64 sample = se->sum_exec_runtime; - - if (se->last_wakeup) - sample -= se->last_wakeup; - else - sample -= se->start_runtime; - update_avg(&se->avg_wakeup, sample); + return success; +} - se->last_wakeup = se->sum_exec_runtime; - } +/** + * try_to_wake_up_local - try to wake up a local task with rq lock held + * @p: the to-be-woken-up thread + * @state: the mask of task states that can be woken + * @wake_flags: wake modifier flags (WF_*) + * + * Put @p on the run-queue if it's not alredy there. The caller must + * ensure that this_rq() is locked, @p is bound to this_rq() and @p is + * not the current task. this_rq() stays locked over invocation. + * + * This function can be called from wakeup and sleep scheduler + * notifiers. Be careful not to create deep recursion by chaining + * wakeup notifiers. + * + * Returns %true if @p was woken up, %false if it was already running + * or @state didn't match @p's state. + */ +bool try_to_wake_up_local(struct task_struct *p, unsigned int state, + int wake_flags) +{ + struct rq *rq = task_rq(p); + bool success = false; -out_running: - trace_sched_wakeup(rq, p, success); - check_preempt_curr(rq, p, wake_flags); + BUG_ON(rq != this_rq()); + BUG_ON(p == current); + lockdep_assert_held(&rq->lock); - p->state = TASK_RUNNING; -#ifdef CONFIG_SMP - if (p->sched_class->task_wake_up) - p->sched_class->task_wake_up(rq, p); + if (!sched_feat(SYNC_WAKEUPS)) + wake_flags &= ~WF_SYNC; - if (unlikely(rq->idle_stamp)) { - u64 delta = rq->clock - rq->idle_stamp; - u64 max = 2*sysctl_sched_migration_cost; + if (!(p->state & state)) + return false; - if (delta > max) - rq->avg_idle = max; - else - update_avg(&rq->avg_idle, delta); - rq->idle_stamp = 0; + if (!p->se.on_rq) { + if (likely(!task_running(rq, p))) { + schedstat_inc(rq, ttwu_count); + schedstat_inc(rq, ttwu_local); + } + ttwu_activate(p, rq, wake_flags & WF_SYNC, false, true); + success = true; } -#endif -out: - task_rq_unlock(rq, &flags); - put_cpu(); + + ttwu_woken_up(p, rq, wake_flags, success); return success; } @@ -2538,10 +2617,7 @@ static void __sched_fork(struct task_struct *p) INIT_LIST_HEAD(&p->rt.run_list); p->se.on_rq = 0; INIT_LIST_HEAD(&p->se.group_node); - -#ifdef CONFIG_PREEMPT_NOTIFIERS - INIT_HLIST_HEAD(&p->preempt_notifiers); -#endif + INIT_HLIST_HEAD(&p->sched_notifiers); /* * We mark the process as running here, but have not actually @@ -2651,63 +2727,27 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) task_rq_unlock(rq, &flags); } -#ifdef CONFIG_PREEMPT_NOTIFIERS - /** - * preempt_notifier_register - tell me when current is being preempted & rescheduled + * sched_notifier_register - register scheduler notifier * @notifier: notifier struct to register */ -void preempt_notifier_register(struct preempt_notifier *notifier) +void sched_notifier_register(struct sched_notifier *notifier) { - hlist_add_head(¬ifier->link, ¤t->preempt_notifiers); + hlist_add_head(¬ifier->link, ¤t->sched_notifiers); } -EXPORT_SYMBOL_GPL(preempt_notifier_register); +EXPORT_SYMBOL_GPL(sched_notifier_register); /** - * preempt_notifier_unregister - no longer interested in preemption notifications + * sched_notifier_unregister - unregister scheduler notifier * @notifier: notifier struct to unregister * - * This is safe to call from within a preemption notifier. + * This is safe to call from within a scheduler notifier. */ -void preempt_notifier_unregister(struct preempt_notifier *notifier) +void sched_notifier_unregister(struct sched_notifier *notifier) { hlist_del(¬ifier->link); } -EXPORT_SYMBOL_GPL(preempt_notifier_unregister); - -static void fire_sched_in_preempt_notifiers(struct task_struct *curr) -{ - struct preempt_notifier *notifier; - struct hlist_node *node; - - hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) - notifier->ops->sched_in(notifier, raw_smp_processor_id()); -} - -static void -fire_sched_out_preempt_notifiers(struct task_struct *curr, - struct task_struct *next) -{ - struct preempt_notifier *notifier; - struct hlist_node *node; - - hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) - notifier->ops->sched_out(notifier, next); -} - -#else /* !CONFIG_PREEMPT_NOTIFIERS */ - -static void fire_sched_in_preempt_notifiers(struct task_struct *curr) -{ -} - -static void -fire_sched_out_preempt_notifiers(struct task_struct *curr, - struct task_struct *next) -{ -} - -#endif /* CONFIG_PREEMPT_NOTIFIERS */ +EXPORT_SYMBOL_GPL(sched_notifier_unregister); /** * prepare_task_switch - prepare to switch tasks @@ -2726,7 +2766,7 @@ static inline void prepare_task_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { - fire_sched_out_preempt_notifiers(prev, next); + fire_sched_notifier(current, out, next); prepare_lock_switch(rq, next); prepare_arch_switch(next); } @@ -2768,7 +2808,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) prev_state = prev->state; finish_arch_switch(prev); perf_event_task_sched_in(current, cpu_of(rq)); - fire_sched_in_preempt_notifiers(current); + fire_sched_notifier(current, in, prev); finish_lock_switch(rq, prev); if (mm) @@ -3133,7 +3173,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu) goto out; /* force the process onto the specified CPU */ - if (migrate_task(p, dest_cpu, &req)) { + if (migrate_task(p, dest_cpu, &req, false)) { /* Need to wait for migration thread (might exit: take ref). */ struct task_struct *mt = rq->migration_thread; @@ -5446,10 +5486,17 @@ need_resched_nonpreemptible: clear_tsk_need_resched(prev); if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { - if (unlikely(signal_pending_state(prev->state, prev))) + if (unlikely(signal_pending_state(prev->state, prev))) { prev->state = TASK_RUNNING; - else + } else { + /* + * Fire sleep notifier before changing any scheduler + * state. This allows try_to_wake_up_local() to be + * called from sleep notifiers. + */ + fire_sched_notifier(prev, sleep); deactivate_task(rq, prev, 1); + } switch_count = &prev->nvcsw; } @@ -7039,34 +7086,19 @@ static inline void sched_init_granularity(void) * 7) we wake up and the migration is done. */ -/* - * Change a given task's CPU affinity. Migrate the thread to a - * proper CPU and schedule it away if the CPU it's executing on - * is removed from the allowed bitmask. - * - * NOTE: the caller must have a valid reference to the task, the - * task must not exit() & deallocate itself prematurely. The - * call is not atomic; no spinlocks may be held. - */ -int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) +static inline int __set_cpus_allowed(struct task_struct *p, + const struct cpumask *new_mask, + struct rq *rq, unsigned long *flags, + bool force) { struct migration_req req; - unsigned long flags; - struct rq *rq; int ret = 0; - rq = task_rq_lock(p, &flags); if (!cpumask_intersects(new_mask, cpu_online_mask)) { ret = -EINVAL; goto out; } - if (unlikely((p->flags & PF_THREAD_BOUND) && p != current && - !cpumask_equal(&p->cpus_allowed, new_mask))) { - ret = -EINVAL; - goto out; - } - if (p->sched_class->set_cpus_allowed) p->sched_class->set_cpus_allowed(p, new_mask); else { @@ -7078,12 +7110,13 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) if (cpumask_test_cpu(task_cpu(p), new_mask)) goto out; - if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) { + if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req, + force)) { /* Need help from migration thread: drop lock and wait. */ struct task_struct *mt = rq->migration_thread; get_task_struct(mt); - task_rq_unlock(rq, &flags); + task_rq_unlock(rq, flags); wake_up_process(rq->migration_thread); put_task_struct(mt); wait_for_completion(&req.done); @@ -7091,13 +7124,54 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) return 0; } out: - task_rq_unlock(rq, &flags); + task_rq_unlock(rq, flags); return ret; } + +/* + * Change a given task's CPU affinity. Migrate the thread to a + * proper CPU and schedule it away if the CPU it's executing on + * is removed from the allowed bitmask. + * + * NOTE: the caller must have a valid reference to the task, the + * task must not exit() & deallocate itself prematurely. The + * call is not atomic; no spinlocks may be held. + */ +int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) +{ + unsigned long flags; + struct rq *rq; + + rq = task_rq_lock(p, &flags); + + if (unlikely((p->flags & PF_THREAD_BOUND) && p != current && + !cpumask_equal(&p->cpus_allowed, new_mask))) { + task_rq_unlock(rq, &flags); + return -EINVAL; + } + + return __set_cpus_allowed(p, new_mask, rq, &flags, false); +} EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); /* + * Similar to set_cpus_allowed_ptr() but bypasses PF_THREAD_BOUND + * check and ignores cpu_active() status as long as the cpu is online. + * The caller is responsible for guaranteeing that the destination + * cpus don't go down until this function finishes and in general + * ensuring things don't go bonkers. + */ +int force_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) +{ + unsigned long flags; + struct rq *rq; + + rq = task_rq_lock(p, &flags); + return __set_cpus_allowed(p, new_mask, rq, &flags, true); +} + +/* * Move (not current) task off this cpu, onto dest cpu. We're doing * this because either it can't run here any more (set_cpus_allowed() * away from this CPU, or CPU going down), or because we're @@ -7108,12 +7182,13 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); * * Returns non-zero if task was successfully migrated. */ -static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) +static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu, + bool force) { struct rq *rq_dest, *rq_src; int ret = 0, on_rq; - if (unlikely(!cpu_active(dest_cpu))) + if (!force && unlikely(!cpu_active(dest_cpu))) return ret; rq_src = cpu_rq(src_cpu); @@ -7192,7 +7267,8 @@ static int migration_thread(void *data) if (req->task != NULL) { spin_unlock(&rq->lock); - __migrate_task(req->task, cpu, req->dest_cpu); + __migrate_task(req->task, cpu, req->dest_cpu, + req->force); } else if (likely(cpu == (badcpu = smp_processor_id()))) { req->dest_cpu = RCU_MIGRATION_GOT_QS; spin_unlock(&rq->lock); @@ -7217,7 +7293,7 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu) int ret; local_irq_disable(); - ret = __migrate_task(p, src_cpu, dest_cpu); + ret = __migrate_task(p, src_cpu, dest_cpu, false); local_irq_enable(); return ret; } @@ -9569,9 +9645,7 @@ void __init sched_init(void) set_load_weight(&init_task); -#ifdef CONFIG_PREEMPT_NOTIFIERS - INIT_HLIST_HEAD(&init_task.preempt_notifiers); -#endif + INIT_HLIST_HEAD(&init_task.sched_notifiers); #ifdef CONFIG_SMP open_softirq(SCHED_SOFTIRQ, run_rebalance_domains); |