summaryrefslogtreecommitdiff
path: root/kernel/sched/sched.h
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/sched.h')
-rw-r--r--kernel/sched/sched.h78
1 files changed, 59 insertions, 19 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c7cf4cc57cdd..c8512a9fb022 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -572,7 +572,7 @@ extern void sched_online_group(struct task_group *tg,
extern void sched_destroy_group(struct task_group *tg);
extern void sched_release_group(struct task_group *tg);
-extern void sched_move_task(struct task_struct *tsk);
+extern void sched_move_task(struct task_struct *tsk, bool for_autogroup);
#ifdef CONFIG_FAIR_GROUP_SCHED
extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
@@ -759,6 +759,7 @@ enum scx_rq_flags {
SCX_RQ_BAL_PENDING = 1 << 2, /* balance hasn't run yet */
SCX_RQ_BAL_KEEP = 1 << 3, /* balance decided to keep current */
SCX_RQ_BYPASSING = 1 << 4,
+ SCX_RQ_CLK_VALID = 1 << 5, /* RQ clock is fresh and valid */
SCX_RQ_IN_WAKEUP = 1 << 16,
SCX_RQ_IN_BALANCE = 1 << 17,
@@ -771,9 +772,10 @@ struct scx_rq {
unsigned long ops_qseq;
u64 extra_enq_flags; /* see move_task_to_local_dsq() */
u32 nr_running;
- u32 flags;
u32 cpuperf_target; /* [0, SCHED_CAPACITY_SCALE] */
bool cpu_released;
+ u32 flags;
+ u64 clock; /* current per-rq clock -- see scx_bpf_now() */
cpumask_var_t cpus_to_kick;
cpumask_var_t cpus_to_kick_if_idle;
cpumask_var_t cpus_to_preempt;
@@ -1722,6 +1724,38 @@ struct rq_flags {
extern struct balance_callback balance_push_callback;
+#ifdef CONFIG_SCHED_CLASS_EXT
+extern const struct sched_class ext_sched_class;
+
+DECLARE_STATIC_KEY_FALSE(__scx_ops_enabled); /* SCX BPF scheduler loaded */
+DECLARE_STATIC_KEY_FALSE(__scx_switched_all); /* all fair class tasks on SCX */
+
+#define scx_enabled() static_branch_unlikely(&__scx_ops_enabled)
+#define scx_switched_all() static_branch_unlikely(&__scx_switched_all)
+
+static inline void scx_rq_clock_update(struct rq *rq, u64 clock)
+{
+ if (!scx_enabled())
+ return;
+ WRITE_ONCE(rq->scx.clock, clock);
+ smp_store_release(&rq->scx.flags, rq->scx.flags | SCX_RQ_CLK_VALID);
+}
+
+static inline void scx_rq_clock_invalidate(struct rq *rq)
+{
+ if (!scx_enabled())
+ return;
+ WRITE_ONCE(rq->scx.flags, rq->scx.flags & ~SCX_RQ_CLK_VALID);
+}
+
+#else /* !CONFIG_SCHED_CLASS_EXT */
+#define scx_enabled() false
+#define scx_switched_all() false
+
+static inline void scx_rq_clock_update(struct rq *rq, u64 clock) {}
+static inline void scx_rq_clock_invalidate(struct rq *rq) {}
+#endif /* !CONFIG_SCHED_CLASS_EXT */
+
/*
* Lockdep annotation that avoids accidental unlocks; it's like a
* sticky/continuous lockdep_assert_held().
@@ -1751,7 +1785,7 @@ static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)
if (rq->clock_update_flags > RQCF_ACT_SKIP)
rf->clock_update_flags = RQCF_UPDATED;
#endif
-
+ scx_rq_clock_invalidate(rq);
lockdep_unpin_lock(__rq_lockp(rq), rf->cookie);
}
@@ -2510,19 +2544,6 @@ extern const struct sched_class rt_sched_class;
extern const struct sched_class fair_sched_class;
extern const struct sched_class idle_sched_class;
-#ifdef CONFIG_SCHED_CLASS_EXT
-extern const struct sched_class ext_sched_class;
-
-DECLARE_STATIC_KEY_FALSE(__scx_ops_enabled); /* SCX BPF scheduler loaded */
-DECLARE_STATIC_KEY_FALSE(__scx_switched_all); /* all fair class tasks on SCX */
-
-#define scx_enabled() static_branch_unlikely(&__scx_ops_enabled)
-#define scx_switched_all() static_branch_unlikely(&__scx_switched_all)
-#else /* !CONFIG_SCHED_CLASS_EXT */
-#define scx_enabled() false
-#define scx_switched_all() false
-#endif /* !CONFIG_SCHED_CLASS_EXT */
-
/*
* Iterate only active classes. SCX can take over all fair tasks or be
* completely disabled. If the former, skip fair. If the latter, skip SCX.
@@ -3677,10 +3698,28 @@ static inline int __mm_cid_try_get(struct task_struct *t, struct mm_struct *mm)
{
struct cpumask *cidmask = mm_cidmask(mm);
struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
- int cid = __this_cpu_read(pcpu_cid->recent_cid);
+ int cid, max_nr_cid, allowed_max_nr_cid;
+ /*
+ * After shrinking the number of threads or reducing the number
+ * of allowed cpus, reduce the value of max_nr_cid so expansion
+ * of cid allocation will preserve cache locality if the number
+ * of threads or allowed cpus increase again.
+ */
+ max_nr_cid = atomic_read(&mm->max_nr_cid);
+ while ((allowed_max_nr_cid = min_t(int, READ_ONCE(mm->nr_cpus_allowed),
+ atomic_read(&mm->mm_users))),
+ max_nr_cid > allowed_max_nr_cid) {
+ /* atomic_try_cmpxchg loads previous mm->max_nr_cid into max_nr_cid. */
+ if (atomic_try_cmpxchg(&mm->max_nr_cid, &max_nr_cid, allowed_max_nr_cid)) {
+ max_nr_cid = allowed_max_nr_cid;
+ break;
+ }
+ }
/* Try to re-use recent cid. This improves cache locality. */
- if (!mm_cid_is_unset(cid) && !cpumask_test_and_set_cpu(cid, cidmask))
+ cid = __this_cpu_read(pcpu_cid->recent_cid);
+ if (!mm_cid_is_unset(cid) && cid < max_nr_cid &&
+ !cpumask_test_and_set_cpu(cid, cidmask))
return cid;
/*
* Expand cid allocation if the maximum number of concurrency
@@ -3688,8 +3727,9 @@ static inline int __mm_cid_try_get(struct task_struct *t, struct mm_struct *mm)
* and number of threads. Expanding cid allocation as much as
* possible improves cache locality.
*/
- cid = atomic_read(&mm->max_nr_cid);
+ cid = max_nr_cid;
while (cid < READ_ONCE(mm->nr_cpus_allowed) && cid < atomic_read(&mm->mm_users)) {
+ /* atomic_try_cmpxchg loads previous mm->max_nr_cid into cid. */
if (!atomic_try_cmpxchg(&mm->max_nr_cid, &cid, cid + 1))
continue;
if (!cpumask_test_and_set_cpu(cid, cidmask))