diff options
Diffstat (limited to 'kernel/sched/sched.h')
-rw-r--r-- | kernel/sched/sched.h | 78 |
1 files changed, 59 insertions, 19 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c7cf4cc57cdd..c8512a9fb022 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -572,7 +572,7 @@ extern void sched_online_group(struct task_group *tg, extern void sched_destroy_group(struct task_group *tg); extern void sched_release_group(struct task_group *tg); -extern void sched_move_task(struct task_struct *tsk); +extern void sched_move_task(struct task_struct *tsk, bool for_autogroup); #ifdef CONFIG_FAIR_GROUP_SCHED extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); @@ -759,6 +759,7 @@ enum scx_rq_flags { SCX_RQ_BAL_PENDING = 1 << 2, /* balance hasn't run yet */ SCX_RQ_BAL_KEEP = 1 << 3, /* balance decided to keep current */ SCX_RQ_BYPASSING = 1 << 4, + SCX_RQ_CLK_VALID = 1 << 5, /* RQ clock is fresh and valid */ SCX_RQ_IN_WAKEUP = 1 << 16, SCX_RQ_IN_BALANCE = 1 << 17, @@ -771,9 +772,10 @@ struct scx_rq { unsigned long ops_qseq; u64 extra_enq_flags; /* see move_task_to_local_dsq() */ u32 nr_running; - u32 flags; u32 cpuperf_target; /* [0, SCHED_CAPACITY_SCALE] */ bool cpu_released; + u32 flags; + u64 clock; /* current per-rq clock -- see scx_bpf_now() */ cpumask_var_t cpus_to_kick; cpumask_var_t cpus_to_kick_if_idle; cpumask_var_t cpus_to_preempt; @@ -1722,6 +1724,38 @@ struct rq_flags { extern struct balance_callback balance_push_callback; +#ifdef CONFIG_SCHED_CLASS_EXT +extern const struct sched_class ext_sched_class; + +DECLARE_STATIC_KEY_FALSE(__scx_ops_enabled); /* SCX BPF scheduler loaded */ +DECLARE_STATIC_KEY_FALSE(__scx_switched_all); /* all fair class tasks on SCX */ + +#define scx_enabled() static_branch_unlikely(&__scx_ops_enabled) +#define scx_switched_all() static_branch_unlikely(&__scx_switched_all) + +static inline void scx_rq_clock_update(struct rq *rq, u64 clock) +{ + if (!scx_enabled()) + return; + WRITE_ONCE(rq->scx.clock, clock); + smp_store_release(&rq->scx.flags, rq->scx.flags | SCX_RQ_CLK_VALID); +} + +static inline void scx_rq_clock_invalidate(struct rq *rq) +{ + if (!scx_enabled()) + return; + WRITE_ONCE(rq->scx.flags, rq->scx.flags & ~SCX_RQ_CLK_VALID); +} + +#else /* !CONFIG_SCHED_CLASS_EXT */ +#define scx_enabled() false +#define scx_switched_all() false + +static inline void scx_rq_clock_update(struct rq *rq, u64 clock) {} +static inline void scx_rq_clock_invalidate(struct rq *rq) {} +#endif /* !CONFIG_SCHED_CLASS_EXT */ + /* * Lockdep annotation that avoids accidental unlocks; it's like a * sticky/continuous lockdep_assert_held(). @@ -1751,7 +1785,7 @@ static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf) if (rq->clock_update_flags > RQCF_ACT_SKIP) rf->clock_update_flags = RQCF_UPDATED; #endif - + scx_rq_clock_invalidate(rq); lockdep_unpin_lock(__rq_lockp(rq), rf->cookie); } @@ -2510,19 +2544,6 @@ extern const struct sched_class rt_sched_class; extern const struct sched_class fair_sched_class; extern const struct sched_class idle_sched_class; -#ifdef CONFIG_SCHED_CLASS_EXT -extern const struct sched_class ext_sched_class; - -DECLARE_STATIC_KEY_FALSE(__scx_ops_enabled); /* SCX BPF scheduler loaded */ -DECLARE_STATIC_KEY_FALSE(__scx_switched_all); /* all fair class tasks on SCX */ - -#define scx_enabled() static_branch_unlikely(&__scx_ops_enabled) -#define scx_switched_all() static_branch_unlikely(&__scx_switched_all) -#else /* !CONFIG_SCHED_CLASS_EXT */ -#define scx_enabled() false -#define scx_switched_all() false -#endif /* !CONFIG_SCHED_CLASS_EXT */ - /* * Iterate only active classes. SCX can take over all fair tasks or be * completely disabled. If the former, skip fair. If the latter, skip SCX. @@ -3677,10 +3698,28 @@ static inline int __mm_cid_try_get(struct task_struct *t, struct mm_struct *mm) { struct cpumask *cidmask = mm_cidmask(mm); struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid; - int cid = __this_cpu_read(pcpu_cid->recent_cid); + int cid, max_nr_cid, allowed_max_nr_cid; + /* + * After shrinking the number of threads or reducing the number + * of allowed cpus, reduce the value of max_nr_cid so expansion + * of cid allocation will preserve cache locality if the number + * of threads or allowed cpus increase again. + */ + max_nr_cid = atomic_read(&mm->max_nr_cid); + while ((allowed_max_nr_cid = min_t(int, READ_ONCE(mm->nr_cpus_allowed), + atomic_read(&mm->mm_users))), + max_nr_cid > allowed_max_nr_cid) { + /* atomic_try_cmpxchg loads previous mm->max_nr_cid into max_nr_cid. */ + if (atomic_try_cmpxchg(&mm->max_nr_cid, &max_nr_cid, allowed_max_nr_cid)) { + max_nr_cid = allowed_max_nr_cid; + break; + } + } /* Try to re-use recent cid. This improves cache locality. */ - if (!mm_cid_is_unset(cid) && !cpumask_test_and_set_cpu(cid, cidmask)) + cid = __this_cpu_read(pcpu_cid->recent_cid); + if (!mm_cid_is_unset(cid) && cid < max_nr_cid && + !cpumask_test_and_set_cpu(cid, cidmask)) return cid; /* * Expand cid allocation if the maximum number of concurrency @@ -3688,8 +3727,9 @@ static inline int __mm_cid_try_get(struct task_struct *t, struct mm_struct *mm) * and number of threads. Expanding cid allocation as much as * possible improves cache locality. */ - cid = atomic_read(&mm->max_nr_cid); + cid = max_nr_cid; while (cid < READ_ONCE(mm->nr_cpus_allowed) && cid < atomic_read(&mm->mm_users)) { + /* atomic_try_cmpxchg loads previous mm->max_nr_cid into cid. */ if (!atomic_try_cmpxchg(&mm->max_nr_cid, &cid, cid + 1)) continue; if (!cpumask_test_and_set_cpu(cid, cidmask)) |