summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-02-20 17:41:08 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2023-02-20 17:41:08 -0800
commit1f2d9ffc7a5f916935749ffc6e93fb33bfe94d2f (patch)
treea5dabaa924d50867cbe347e20a7643b2850f11c0 /drivers
parenta2f0e7eee1344eb9f91b22bc72d9eb0a52b849c9 (diff)
parent7c4a5b89a0b5a57a64b601775b296abf77a9fe97 (diff)
Merge tag 'sched-core-2023-02-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: - Improve the scalability of the CFS bandwidth unthrottling logic with large number of CPUs. - Fix & rework various cpuidle routines, simplify interaction with the generic scheduler code. Add __cpuidle methods as noinstr to objtool's noinstr detection and fix boatloads of cpuidle bugs & quirks. - Add new ABI: introduce MEMBARRIER_CMD_GET_REGISTRATIONS, to query previously issued registrations. - Limit scheduler slice duration to the sysctl_sched_latency period, to improve scheduling granularity with a large number of SCHED_IDLE tasks. - Debuggability enhancement on sys_exit(): warn about disabled IRQs, but also enable them to prevent a cascade of followup problems and repeat warnings. - Fix the rescheduling logic in prio_changed_dl(). - Micro-optimize cpufreq and sched-util methods. - Micro-optimize ttwu_runnable() - Micro-optimize the idle-scanning in update_numa_stats(), select_idle_capacity() and steal_cookie_task(). - Update the RSEQ code & self-tests - Constify various scheduler methods - Remove unused methods - Refine __init tags - Documentation updates - Misc other cleanups, fixes * tag 'sched-core-2023-02-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (110 commits) sched/rt: pick_next_rt_entity(): check list_entry sched/deadline: Add more reschedule cases to prio_changed_dl() sched/fair: sanitize vruntime of entity being placed sched/fair: Remove capacity inversion detection sched/fair: unlink misfit task from cpu overutilized objtool: mem*() are not uaccess safe cpuidle: Fix poll_idle() noinstr annotation sched/clock: Make local_clock() noinstr sched/clock/x86: Mark sched_clock() noinstr x86/pvclock: Improve atomic update of last_value in pvclock_clocksource_read() x86/atomics: Always inline arch_atomic64*() cpuidle: tracing, preempt: Squash _rcuidle tracing cpuidle: tracing: Warn about !rcu_is_watching() cpuidle: lib/bug: Disable rcu_is_watching() during WARN/BUG cpuidle: drivers: firmware: psci: Dont instrument suspend code KVM: selftests: Fix build of rseq test exit: Detect and fix irq disabled state in oops cpuidle, arm64: Fix the ARM64 cpuidle logic cpuidle: mvebu: Fix duplicate flags assignment sched/fair: Limit sched slice duration ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/processor_idle.c28
-rw-r--r--drivers/base/power/runtime.c24
-rw-r--r--drivers/clk/clk.c8
-rw-r--r--drivers/cpuidle/cpuidle-arm.c4
-rw-r--r--drivers/cpuidle/cpuidle-big_little.c12
-rw-r--r--drivers/cpuidle/cpuidle-mvebu-v7.c15
-rw-r--r--drivers/cpuidle/cpuidle-psci.c22
-rw-r--r--drivers/cpuidle/cpuidle-qcom-spm.c4
-rw-r--r--drivers/cpuidle/cpuidle-riscv-sbi.c19
-rw-r--r--drivers/cpuidle/cpuidle-tegra.c31
-rw-r--r--drivers/cpuidle/cpuidle.c72
-rw-r--r--drivers/cpuidle/dt_idle_states.c2
-rw-r--r--drivers/cpuidle/poll_state.c8
-rw-r--r--drivers/firmware/psci/psci.c42
-rw-r--r--drivers/idle/intel_idle.c19
-rw-r--r--drivers/perf/arm_pmu.c11
-rw-r--r--drivers/perf/riscv_pmu_sbi.c8
17 files changed, 193 insertions, 136 deletions
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 7bf882fcd64b..7f77710c86fc 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -109,8 +109,8 @@ static const struct dmi_system_id processor_power_dmi_table[] = {
static void __cpuidle acpi_safe_halt(void)
{
if (!tif_need_resched()) {
- safe_halt();
- local_irq_disable();
+ raw_safe_halt();
+ raw_local_irq_disable();
}
}
@@ -523,8 +523,11 @@ static int acpi_idle_bm_check(void)
return bm_status;
}
-static void wait_for_freeze(void)
+static __cpuidle void io_idle(unsigned long addr)
{
+ /* IO port based C-state */
+ inb(addr);
+
#ifdef CONFIG_X86
/* No delay is needed if we are in guest */
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
@@ -569,9 +572,7 @@ static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx)
} else if (cx->entry_method == ACPI_CSTATE_HALT) {
acpi_safe_halt();
} else {
- /* IO port based C-state */
- inb(cx->address);
- wait_for_freeze();
+ io_idle(cx->address);
}
perf_lopwr_cb(false);
@@ -593,8 +594,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
if (cx->entry_method == ACPI_CSTATE_HALT)
safe_halt();
else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
- inb(cx->address);
- wait_for_freeze();
+ io_idle(cx->address);
} else
return -ENODEV;
@@ -607,7 +607,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
return 0;
}
-static bool acpi_idle_fallback_to_c1(struct acpi_processor *pr)
+static __always_inline bool acpi_idle_fallback_to_c1(struct acpi_processor *pr)
{
return IS_ENABLED(CONFIG_HOTPLUG_CPU) && !pr->flags.has_cst &&
!(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED);
@@ -642,6 +642,8 @@ static int __cpuidle acpi_idle_enter_bm(struct cpuidle_driver *drv,
*/
bool dis_bm = pr->flags.bm_control;
+ instrumentation_begin();
+
/* If we can skip BM, demote to a safe state. */
if (!cx->bm_sts_skip && acpi_idle_bm_check()) {
dis_bm = false;
@@ -663,11 +665,11 @@ static int __cpuidle acpi_idle_enter_bm(struct cpuidle_driver *drv,
raw_spin_unlock(&c3_lock);
}
- ct_idle_enter();
+ ct_cpuidle_enter();
acpi_idle_do_entry(cx);
- ct_idle_exit();
+ ct_cpuidle_exit();
/* Re-enable bus master arbitration */
if (dis_bm) {
@@ -677,6 +679,8 @@ static int __cpuidle acpi_idle_enter_bm(struct cpuidle_driver *drv,
raw_spin_unlock(&c3_lock);
}
+ instrumentation_end();
+
return index;
}
@@ -1219,6 +1223,8 @@ static int acpi_processor_setup_lpi_states(struct acpi_processor *pr)
state->target_residency = lpi->min_residency;
if (lpi->arch_flags)
state->flags |= CPUIDLE_FLAG_TIMER_STOP;
+ if (i != 0 && lpi->entry_method == ACPI_CSTATE_FFH)
+ state->flags |= CPUIDLE_FLAG_RCU_IDLE;
state->enter = acpi_idle_lpi_enter;
drv->safe_state_index = i;
}
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 50e726b6c2cf..98f7b3d7d669 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -468,7 +468,7 @@ static int rpm_idle(struct device *dev, int rpmflags)
int (*callback)(struct device *);
int retval;
- trace_rpm_idle_rcuidle(dev, rpmflags);
+ trace_rpm_idle(dev, rpmflags);
retval = rpm_check_suspend_allowed(dev);
if (retval < 0)
; /* Conditions are wrong. */
@@ -508,7 +508,7 @@ static int rpm_idle(struct device *dev, int rpmflags)
dev->power.request_pending = true;
queue_work(pm_wq, &dev->power.work);
}
- trace_rpm_return_int_rcuidle(dev, _THIS_IP_, 0);
+ trace_rpm_return_int(dev, _THIS_IP_, 0);
return 0;
}
@@ -530,7 +530,7 @@ static int rpm_idle(struct device *dev, int rpmflags)
wake_up_all(&dev->power.wait_queue);
out:
- trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
+ trace_rpm_return_int(dev, _THIS_IP_, retval);
return retval ? retval : rpm_suspend(dev, rpmflags | RPM_AUTO);
}
@@ -562,7 +562,7 @@ static int rpm_suspend(struct device *dev, int rpmflags)
struct device *parent = NULL;
int retval;
- trace_rpm_suspend_rcuidle(dev, rpmflags);
+ trace_rpm_suspend(dev, rpmflags);
repeat:
retval = rpm_check_suspend_allowed(dev);
@@ -713,7 +713,7 @@ static int rpm_suspend(struct device *dev, int rpmflags)
}
out:
- trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
+ trace_rpm_return_int(dev, _THIS_IP_, retval);
return retval;
@@ -765,7 +765,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
struct device *parent = NULL;
int retval = 0;
- trace_rpm_resume_rcuidle(dev, rpmflags);
+ trace_rpm_resume(dev, rpmflags);
repeat:
if (dev->power.runtime_error) {
@@ -935,7 +935,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
spin_lock_irq(&dev->power.lock);
}
- trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
+ trace_rpm_return_int(dev, _THIS_IP_, retval);
return retval;
}
@@ -1091,7 +1091,7 @@ int __pm_runtime_idle(struct device *dev, int rpmflags)
if (retval < 0) {
return retval;
} else if (retval > 0) {
- trace_rpm_usage_rcuidle(dev, rpmflags);
+ trace_rpm_usage(dev, rpmflags);
return 0;
}
}
@@ -1129,7 +1129,7 @@ int __pm_runtime_suspend(struct device *dev, int rpmflags)
if (retval < 0) {
return retval;
} else if (retval > 0) {
- trace_rpm_usage_rcuidle(dev, rpmflags);
+ trace_rpm_usage(dev, rpmflags);
return 0;
}
}
@@ -1212,7 +1212,7 @@ int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count)
} else {
retval = atomic_inc_not_zero(&dev->power.usage_count);
}
- trace_rpm_usage_rcuidle(dev, 0);
+ trace_rpm_usage(dev, 0);
spin_unlock_irqrestore(&dev->power.lock, flags);
return retval;
@@ -1576,7 +1576,7 @@ void pm_runtime_allow(struct device *dev)
if (ret == 0)
rpm_idle(dev, RPM_AUTO | RPM_ASYNC);
else if (ret > 0)
- trace_rpm_usage_rcuidle(dev, RPM_AUTO | RPM_ASYNC);
+ trace_rpm_usage(dev, RPM_AUTO | RPM_ASYNC);
out:
spin_unlock_irq(&dev->power.lock);
@@ -1646,7 +1646,7 @@ static void update_autosuspend(struct device *dev, int old_delay, int old_use)
atomic_inc(&dev->power.usage_count);
rpm_resume(dev, 0);
} else {
- trace_rpm_usage_rcuidle(dev, 0);
+ trace_rpm_usage(dev, 0);
}
}
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index e62552a75f08..f6d7c6a9a654 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -1055,12 +1055,12 @@ static void clk_core_disable(struct clk_core *core)
if (--core->enable_count > 0)
return;
- trace_clk_disable_rcuidle(core);
+ trace_clk_disable(core);
if (core->ops->disable)
core->ops->disable(core->hw);
- trace_clk_disable_complete_rcuidle(core);
+ trace_clk_disable_complete(core);
clk_core_disable(core->parent);
}
@@ -1114,12 +1114,12 @@ static int clk_core_enable(struct clk_core *core)
if (ret)
return ret;
- trace_clk_enable_rcuidle(core);
+ trace_clk_enable(core);
if (core->ops->enable)
ret = core->ops->enable(core->hw);
- trace_clk_enable_complete_rcuidle(core);
+ trace_clk_enable_complete(core);
if (ret) {
clk_core_disable(core->parent);
diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c
index 8c758920d699..7cfb980a357d 100644
--- a/drivers/cpuidle/cpuidle-arm.c
+++ b/drivers/cpuidle/cpuidle-arm.c
@@ -31,8 +31,8 @@
* Called from the CPUidle framework to program the device to the
* specified target state selected by the governor.
*/
-static int arm_enter_idle_state(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx)
+static __cpuidle int arm_enter_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
{
/*
* Pass idle state index to arm_cpuidle_suspend which in turn
diff --git a/drivers/cpuidle/cpuidle-big_little.c b/drivers/cpuidle/cpuidle-big_little.c
index abe51185f243..74972deda0ea 100644
--- a/drivers/cpuidle/cpuidle-big_little.c
+++ b/drivers/cpuidle/cpuidle-big_little.c
@@ -64,7 +64,8 @@ static struct cpuidle_driver bl_idle_little_driver = {
.enter = bl_enter_powerdown,
.exit_latency = 700,
.target_residency = 2500,
- .flags = CPUIDLE_FLAG_TIMER_STOP,
+ .flags = CPUIDLE_FLAG_TIMER_STOP |
+ CPUIDLE_FLAG_RCU_IDLE,
.name = "C1",
.desc = "ARM little-cluster power down",
},
@@ -85,7 +86,8 @@ static struct cpuidle_driver bl_idle_big_driver = {
.enter = bl_enter_powerdown,
.exit_latency = 500,
.target_residency = 2000,
- .flags = CPUIDLE_FLAG_TIMER_STOP,
+ .flags = CPUIDLE_FLAG_TIMER_STOP |
+ CPUIDLE_FLAG_RCU_IDLE,
.name = "C1",
.desc = "ARM big-cluster power down",
},
@@ -120,15 +122,17 @@ static int notrace bl_powerdown_finisher(unsigned long arg)
* Called from the CPUidle framework to program the device to the
* specified target state selected by the governor.
*/
-static int bl_enter_powerdown(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx)
+static __cpuidle int bl_enter_powerdown(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
{
cpu_pm_enter();
+ ct_cpuidle_enter();
cpu_suspend(0, bl_powerdown_finisher);
/* signals the MCPM core that CPU is out of low power state */
mcpm_cpu_powered_up();
+ ct_cpuidle_exit();
cpu_pm_exit();
diff --git a/drivers/cpuidle/cpuidle-mvebu-v7.c b/drivers/cpuidle/cpuidle-mvebu-v7.c
index 01a856971f05..563dba609b98 100644
--- a/drivers/cpuidle/cpuidle-mvebu-v7.c
+++ b/drivers/cpuidle/cpuidle-mvebu-v7.c
@@ -25,9 +25,9 @@
static int (*mvebu_v7_cpu_suspend)(int);
-static int mvebu_v7_enter_idle(struct cpuidle_device *dev,
- struct cpuidle_driver *drv,
- int index)
+static __cpuidle int mvebu_v7_enter_idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
{
int ret;
bool deepidle = false;
@@ -36,7 +36,10 @@ static int mvebu_v7_enter_idle(struct cpuidle_device *dev,
if (drv->states[index].flags & MVEBU_V7_FLAG_DEEP_IDLE)
deepidle = true;
+ ct_cpuidle_enter();
ret = mvebu_v7_cpu_suspend(deepidle);
+ ct_cpuidle_exit();
+
cpu_pm_exit();
if (ret)
@@ -53,6 +56,7 @@ static struct cpuidle_driver armadaxp_idle_driver = {
.exit_latency = 100,
.power_usage = 50,
.target_residency = 1000,
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.name = "MV CPU IDLE",
.desc = "CPU power down",
},
@@ -61,7 +65,7 @@ static struct cpuidle_driver armadaxp_idle_driver = {
.exit_latency = 1000,
.power_usage = 5,
.target_residency = 10000,
- .flags = MVEBU_V7_FLAG_DEEP_IDLE,
+ .flags = MVEBU_V7_FLAG_DEEP_IDLE | CPUIDLE_FLAG_RCU_IDLE,
.name = "MV CPU DEEP IDLE",
.desc = "CPU and L2 Fabric power down",
},
@@ -76,7 +80,7 @@ static struct cpuidle_driver armada370_idle_driver = {
.exit_latency = 100,
.power_usage = 5,
.target_residency = 1000,
- .flags = MVEBU_V7_FLAG_DEEP_IDLE,
+ .flags = MVEBU_V7_FLAG_DEEP_IDLE | CPUIDLE_FLAG_RCU_IDLE,
.name = "Deep Idle",
.desc = "CPU and L2 Fabric power down",
},
@@ -91,6 +95,7 @@ static struct cpuidle_driver armada38x_idle_driver = {
.exit_latency = 10,
.power_usage = 5,
.target_residency = 100,
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.name = "Idle",
.desc = "CPU and SCU power down",
},
diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c
index 57bc3e3ae391..312a34ef28dc 100644
--- a/drivers/cpuidle/cpuidle-psci.c
+++ b/drivers/cpuidle/cpuidle-psci.c
@@ -49,14 +49,9 @@ static inline u32 psci_get_domain_state(void)
return __this_cpu_read(domain_state);
}
-static inline int psci_enter_state(int idx, u32 state)
-{
- return CPU_PM_CPU_IDLE_ENTER_PARAM(psci_cpu_suspend_enter, idx, state);
-}
-
-static int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx,
- bool s2idle)
+static __cpuidle int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx,
+ bool s2idle)
{
struct psci_cpuidle_data *data = this_cpu_ptr(&psci_cpuidle_data);
u32 *states = data->psci_states;
@@ -69,12 +64,10 @@ static int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
return -1;
/* Do runtime PM to manage a hierarchical CPU toplogy. */
- ct_irq_enter_irqson();
if (s2idle)
dev_pm_genpd_suspend(pd_dev);
else
pm_runtime_put_sync_suspend(pd_dev);
- ct_irq_exit_irqson();
state = psci_get_domain_state();
if (!state)
@@ -82,12 +75,10 @@ static int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
ret = psci_cpu_suspend_enter(state) ? -1 : idx;
- ct_irq_enter_irqson();
if (s2idle)
dev_pm_genpd_resume(pd_dev);
else
pm_runtime_get_sync(pd_dev);
- ct_irq_exit_irqson();
cpu_pm_exit();
@@ -192,12 +183,12 @@ static void psci_idle_init_cpuhp(void)
pr_warn("Failed %d while setup cpuhp state\n", err);
}
-static int psci_enter_idle_state(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx)
+static __cpuidle int psci_enter_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
{
u32 *state = __this_cpu_read(psci_cpuidle_data.psci_states);
- return psci_enter_state(idx, state[idx]);
+ return CPU_PM_CPU_IDLE_ENTER_PARAM_RCU(psci_cpu_suspend_enter, idx, state[idx]);
}
static const struct of_device_id psci_idle_state_match[] = {
@@ -240,6 +231,7 @@ static int psci_dt_cpu_init_topology(struct cpuidle_driver *drv,
* of a shared state for the domain, assumes the domain states are all
* deeper states.
*/
+ drv->states[state_count - 1].flags |= CPUIDLE_FLAG_RCU_IDLE;
drv->states[state_count - 1].enter = psci_enter_domain_idle_state;
drv->states[state_count - 1].enter_s2idle = psci_enter_s2idle_domain_idle_state;
psci_cpuidle_use_cpuhp = true;
diff --git a/drivers/cpuidle/cpuidle-qcom-spm.c b/drivers/cpuidle/cpuidle-qcom-spm.c
index beedf22cbe78..326bca154ac7 100644
--- a/drivers/cpuidle/cpuidle-qcom-spm.c
+++ b/drivers/cpuidle/cpuidle-qcom-spm.c
@@ -58,8 +58,8 @@ static int qcom_cpu_spc(struct spm_driver_data *drv)
return ret;
}
-static int spm_enter_idle_state(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx)
+static __cpuidle int spm_enter_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
{
struct cpuidle_qcom_spm_data *data = container_of(drv, struct cpuidle_qcom_spm_data,
cpuidle_driver);
diff --git a/drivers/cpuidle/cpuidle-riscv-sbi.c b/drivers/cpuidle/cpuidle-riscv-sbi.c
index 05fe2902df9a..be383f4b6855 100644
--- a/drivers/cpuidle/cpuidle-riscv-sbi.c
+++ b/drivers/cpuidle/cpuidle-riscv-sbi.c
@@ -93,8 +93,8 @@ static int sbi_suspend(u32 state)
return sbi_suspend_finisher(state, 0, 0);
}
-static int sbi_cpuidle_enter_state(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx)
+static __cpuidle int sbi_cpuidle_enter_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
{
u32 *states = __this_cpu_read(sbi_cpuidle_data.states);
u32 state = states[idx];
@@ -106,9 +106,9 @@ static int sbi_cpuidle_enter_state(struct cpuidle_device *dev,
idx, state);
}
-static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx,
- bool s2idle)
+static __cpuidle int __sbi_enter_domain_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx,
+ bool s2idle)
{
struct sbi_cpuidle_data *data = this_cpu_ptr(&sbi_cpuidle_data);
u32 *states = data->states;
@@ -121,12 +121,12 @@ static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev,
return -1;
/* Do runtime PM to manage a hierarchical CPU toplogy. */
- ct_irq_enter_irqson();
if (s2idle)
dev_pm_genpd_suspend(pd_dev);
else
pm_runtime_put_sync_suspend(pd_dev);
- ct_irq_exit_irqson();
+
+ ct_cpuidle_enter();
if (sbi_is_domain_state_available())
state = sbi_get_domain_state();
@@ -135,12 +135,12 @@ static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev,
ret = sbi_suspend(state) ? -1 : idx;
- ct_irq_enter_irqson();
+ ct_cpuidle_exit();
+
if (s2idle)
dev_pm_genpd_resume(pd_dev);
else
pm_runtime_get_sync(pd_dev);
- ct_irq_exit_irqson();
cpu_pm_exit();
@@ -251,6 +251,7 @@ static int sbi_dt_cpu_init_topology(struct cpuidle_driver *drv,
* of a shared state for the domain, assumes the domain states are all
* deeper states.
*/
+ drv->states[state_count - 1].flags |= CPUIDLE_FLAG_RCU_IDLE;
drv->states[state_count - 1].enter = sbi_enter_domain_idle_state;
drv->states[state_count - 1].enter_s2idle =
sbi_enter_s2idle_domain_idle_state;
diff --git a/drivers/cpuidle/cpuidle-tegra.c b/drivers/cpuidle/cpuidle-tegra.c
index 9845629aeb6d..b203a93deac5 100644
--- a/drivers/cpuidle/cpuidle-tegra.c
+++ b/drivers/cpuidle/cpuidle-tegra.c
@@ -160,8 +160,8 @@ static int tegra_cpuidle_coupled_barrier(struct cpuidle_device *dev)
return 0;
}
-static int tegra_cpuidle_state_enter(struct cpuidle_device *dev,
- int index, unsigned int cpu)
+static __cpuidle int tegra_cpuidle_state_enter(struct cpuidle_device *dev,
+ int index, unsigned int cpu)
{
int err;
@@ -180,9 +180,11 @@ static int tegra_cpuidle_state_enter(struct cpuidle_device *dev,
}
local_fiq_disable();
- RCU_NONIDLE(tegra_pm_set_cpu_in_lp2());
+ tegra_pm_set_cpu_in_lp2();
cpu_pm_enter();
+ ct_cpuidle_enter();
+
switch (index) {
case TEGRA_C7:
err = tegra_cpuidle_c7_enter();
@@ -197,8 +199,10 @@ static int tegra_cpuidle_state_enter(struct cpuidle_device *dev,
break;
}
+ ct_cpuidle_exit();
+
cpu_pm_exit();
- RCU_NONIDLE(tegra_pm_clear_cpu_in_lp2());
+ tegra_pm_clear_cpu_in_lp2();
local_fiq_enable();
return err ?: index;
@@ -222,10 +226,11 @@ static int tegra_cpuidle_adjust_state_index(int index, unsigned int cpu)
return index;
}
-static int tegra_cpuidle_enter(struct cpuidle_device *dev,
- struct cpuidle_driver *drv,
- int index)
+static __cpuidle int tegra_cpuidle_enter(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
{
+ bool do_rcu = drv->states[index].flags & CPUIDLE_FLAG_RCU_IDLE;
unsigned int cpu = cpu_logical_map(dev->cpu);
int ret;
@@ -233,9 +238,13 @@ static int tegra_cpuidle_enter(struct cpuidle_device *dev,
if (dev->states_usage[index].disable)
return -1;
- if (index == TEGRA_C1)
+ if (index == TEGRA_C1) {
+ if (do_rcu)
+ ct_cpuidle_enter();
ret = arm_cpuidle_simple_enter(dev, drv, index);
- else
+ if (do_rcu)
+ ct_cpuidle_exit();
+ } else
ret = tegra_cpuidle_state_enter(dev, index, cpu);
if (ret < 0) {
@@ -285,7 +294,8 @@ static struct cpuidle_driver tegra_idle_driver = {
.exit_latency = 2000,
.target_residency = 2200,
.power_usage = 100,
- .flags = CPUIDLE_FLAG_TIMER_STOP,
+ .flags = CPUIDLE_FLAG_TIMER_STOP |
+ CPUIDLE_FLAG_RCU_IDLE,
.name = "C7",
.desc = "CPU core powered off",
},
@@ -295,6 +305,7 @@ static struct cpuidle_driver tegra_idle_driver = {
.target_residency = 10000,
.power_usage = 0,
.flags = CPUIDLE_FLAG_TIMER_STOP |
+ CPUIDLE_FLAG_RCU_IDLE |
CPUIDLE_FLAG_COUPLED,
.name = "CC6",
.desc = "CPU cluster powered off",
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 6eceb1988243..0b00f21cefe3 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -14,6 +14,7 @@
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
+#include <linux/sched/idle.h>
#include <linux/notifier.h>
#include <linux/pm_qos.h>
#include <linux/cpu.h>
@@ -136,11 +137,13 @@ int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
}
#ifdef CONFIG_SUSPEND
-static void enter_s2idle_proper(struct cpuidle_driver *drv,
- struct cpuidle_device *dev, int index)
+static noinstr void enter_s2idle_proper(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev, int index)
{
- ktime_t time_start, time_end;
struct cpuidle_state *target_state = &drv->states[index];
+ ktime_t time_start, time_end;
+
+ instrumentation_begin();
time_start = ns_to_ktime(local_clock());
@@ -151,13 +154,18 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv,
* suspended is generally unsafe.
*/
stop_critical_timings();
- if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- ct_idle_enter();
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) {
+ ct_cpuidle_enter();
+ /* Annotate away the indirect call */
+ instrumentation_begin();
+ }
target_state->enter_s2idle(dev, drv, index);
if (WARN_ON_ONCE(!irqs_disabled()))
- local_irq_disable();
- if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- ct_idle_exit();
+ raw_local_irq_disable();
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) {
+ instrumentation_end();
+ ct_cpuidle_exit();
+ }
tick_unfreeze();
start_critical_timings();
@@ -165,6 +173,7 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv,
dev->states_usage[index].s2idle_time += ktime_us_delta(time_end, time_start);
dev->states_usage[index].s2idle_usage++;
+ instrumentation_end();
}
/**
@@ -199,8 +208,9 @@ int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* @drv: cpuidle driver for this cpu
* @index: index into the states table in @drv of the state to enter
*/
-int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
- int index)
+noinstr int cpuidle_enter_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
{
int entered_state;
@@ -208,6 +218,8 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP);
ktime_t time_start, time_end;
+ instrumentation_begin();
+
/*
* Tell the time framework to switch to a broadcast timer because our
* local timer will be shut down. If a local timer is used from another
@@ -234,11 +246,33 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
time_start = ns_to_ktime(local_clock());
stop_critical_timings();
- if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- ct_idle_enter();
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) {
+ ct_cpuidle_enter();
+ /* Annotate away the indirect call */
+ instrumentation_begin();
+ }
+
+ /*
+ * NOTE!!
+ *
+ * For cpuidle_state::enter() methods that do *NOT* set
+ * CPUIDLE_FLAG_RCU_IDLE RCU will be disabled here and these functions
+ * must be marked either noinstr or __cpuidle.
+ *
+ * For cpuidle_state::enter() methods that *DO* set
+ * CPUIDLE_FLAG_RCU_IDLE this isn't required, but they must mark the
+ * function calling ct_cpuidle_enter() as noinstr/__cpuidle and all
+ * functions called within the RCU-idle region.
+ */
entered_state = target_state->enter(dev, drv, index);
- if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- ct_idle_exit();
+
+ if (WARN_ONCE(!irqs_disabled(), "%ps leaked IRQ state", target_state->enter))
+ raw_local_irq_disable();
+
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) {
+ instrumentation_end();
+ ct_cpuidle_exit();
+ }
start_critical_timings();
sched_clock_idle_wakeup_event();
@@ -248,12 +282,8 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
/* The cpu is no longer idle or about to enter idle. */
sched_idle_set_state(NULL);
- if (broadcast) {
- if (WARN_ON_ONCE(!irqs_disabled()))
- local_irq_disable();
-
+ if (broadcast)
tick_broadcast_exit();
- }
if (!cpuidle_state_is_coupled(drv, index))
local_irq_enable();
@@ -305,6 +335,8 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
dev->states_usage[index].rejected++;
}
+ instrumentation_end();
+
return entered_state;
}
@@ -394,7 +426,7 @@ void cpuidle_reflect(struct cpuidle_device *dev, int index)
* @dev: the cpuidle device
*
*/
-u64 cpuidle_poll_time(struct cpuidle_driver *drv,
+__cpuidle u64 cpuidle_poll_time(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
{
int i;
diff --git a/drivers/cpuidle/dt_idle_states.c b/drivers/cpuidle/dt_idle_states.c
index 7ca3d7d9b5ea..02aa0b39af9d 100644
--- a/drivers/cpuidle/dt_idle_states.c
+++ b/drivers/cpuidle/dt_idle_states.c
@@ -77,7 +77,7 @@ static int init_state_node(struct cpuidle_state *idle_state,
if (err)
desc = state_node->name;
- idle_state->flags = 0;
+ idle_state->flags = CPUIDLE_FLAG_RCU_IDLE;
if (of_property_read_bool(state_node, "local-timer-stop"))
idle_state->flags |= CPUIDLE_FLAG_TIMER_STOP;
/*
diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c
index f7e83613ae94..bdcfeaecd228 100644
--- a/drivers/cpuidle/poll_state.c
+++ b/drivers/cpuidle/poll_state.c
@@ -13,11 +13,13 @@
static int __cpuidle poll_idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
- u64 time_start = local_clock();
+ u64 time_start;
+
+ time_start = local_clock();
dev->poll_time_limit = false;
- local_irq_enable();
+ raw_local_irq_enable();
if (!current_set_polling_and_test()) {
unsigned int loop_count = 0;
u64 limit;
@@ -36,6 +38,8 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev,
}
}
}
+ raw_local_irq_disable();
+
current_clr_polling();
return index;
diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c
index 447ee4ea5c90..29619f49873a 100644
--- a/drivers/firmware/psci/psci.c
+++ b/drivers/firmware/psci/psci.c
@@ -108,9 +108,10 @@ bool psci_power_state_is_valid(u32 state)
return !(state & ~valid_mask);
}
-static unsigned long __invoke_psci_fn_hvc(unsigned long function_id,
- unsigned long arg0, unsigned long arg1,
- unsigned long arg2)
+static __always_inline unsigned long
+__invoke_psci_fn_hvc(unsigned long function_id,
+ unsigned long arg0, unsigned long arg1,
+ unsigned long arg2)
{
struct arm_smccc_res res;
@@ -118,9 +119,10 @@ static unsigned long __invoke_psci_fn_hvc(unsigned long function_id,
return res.a0;
}
-static unsigned long __invoke_psci_fn_smc(unsigned long function_id,
- unsigned long arg0, unsigned long arg1,
- unsigned long arg2)
+static __always_inline unsigned long
+__invoke_psci_fn_smc(unsigned long function_id,
+ unsigned long arg0, unsigned long arg1,
+ unsigned long arg2)
{
struct arm_smccc_res res;
@@ -128,7 +130,7 @@ static unsigned long __invoke_psci_fn_smc(unsigned long function_id,
return res.a0;
}
-static int psci_to_linux_errno(int errno)
+static __always_inline int psci_to_linux_errno(int errno)
{
switch (errno) {
case PSCI_RET_SUCCESS:
@@ -169,7 +171,8 @@ int psci_set_osi_mode(bool enable)
return psci_to_linux_errno(err);
}
-static int __psci_cpu_suspend(u32 fn, u32 state, unsigned long entry_point)
+static __always_inline int
+__psci_cpu_suspend(u32 fn, u32 state, unsigned long entry_point)
{
int err;
@@ -177,13 +180,15 @@ static int __psci_cpu_suspend(u32 fn, u32 state, unsigned long entry_point)
return psci_to_linux_errno(err);
}
-static int psci_0_1_cpu_suspend(u32 state, unsigned long entry_point)
+static __always_inline int
+psci_0_1_cpu_suspend(u32 state, unsigned long entry_point)
{
return __psci_cpu_suspend(psci_0_1_function_ids.cpu_suspend,
state, entry_point);
}
-static int psci_0_2_cpu_suspend(u32 state, unsigned long entry_point)
+static __always_inline int
+psci_0_2_cpu_suspend(u32 state, unsigned long entry_point)
{
return __psci_cpu_suspend(PSCI_FN_NATIVE(0_2, CPU_SUSPEND),
state, entry_point);
@@ -450,10 +455,12 @@ late_initcall(psci_debugfs_init)
#endif
#ifdef CONFIG_CPU_IDLE
-static int psci_suspend_finisher(unsigned long state)
+static noinstr int psci_suspend_finisher(unsigned long state)
{
u32 power_state = state;
- phys_addr_t pa_cpu_resume = __pa_symbol(cpu_resume);
+ phys_addr_t pa_cpu_resume;
+
+ pa_cpu_resume = __pa_symbol_nodebug((unsigned long)cpu_resume);
return psci_ops.cpu_suspend(power_state, pa_cpu_resume);
}
@@ -465,11 +472,22 @@ int psci_cpu_suspend_enter(u32 state)
if (!psci_power_state_loses_context(state)) {
struct arm_cpuidle_irq_context context;
+ ct_cpuidle_enter();
arm_cpuidle_save_irq_context(&context);
ret = psci_ops.cpu_suspend(state, 0);
arm_cpuidle_restore_irq_context(&context);
+ ct_cpuidle_exit();
} else {
+ /*
+ * ARM64 cpu_suspend() wants to do ct_cpuidle_*() itself.
+ */
+ if (!IS_ENABLED(CONFIG_ARM64))
+ ct_cpuidle_enter();
+
ret = cpu_suspend(state, psci_suspend_finisher);
+
+ if (!IS_ENABLED(CONFIG_ARM64))
+ ct_cpuidle_exit();
}
return ret;
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index cfeb24d40d37..e2d64a8f9422 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -168,13 +168,7 @@ static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
raw_local_irq_enable();
ret = __intel_idle(dev, drv, index);
-
- /*
- * The lockdep hardirqs state may be changed to 'on' with timer
- * tick interrupt followed by __do_softirq(). Use local_irq_disable()
- * to keep the hardirqs state correct.
- */
- local_irq_disable();
+ raw_local_irq_disable();
return ret;
}
@@ -187,12 +181,12 @@ static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
int ret;
if (smt_active)
- wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
ret = __intel_idle(dev, drv, index);
if (smt_active)
- wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
+ native_wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
return ret;
}
@@ -1843,6 +1837,9 @@ static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
return true;
}
+static bool force_irq_on __read_mostly;
+module_param(force_irq_on, bool, 0444);
+
static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
{
int cstate;
@@ -1895,8 +1892,10 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
/* Structure copy. */
drv->states[drv->state_count] = cpuidle_state_table[cstate];
- if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE)
+ if ((cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE) || force_irq_on) {
+ printk("intel_idle: forced intel_idle_irq for state %d\n", cstate);
drv->states[drv->state_count].enter = intel_idle_irq;
+ }
if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 40f70f83daba..15bd1e34a88e 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -752,17 +752,8 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
case CPU_PM_ENTER_FAILED:
/*
* Restore and enable the counter.
- * armpmu_start() indirectly calls
- *
- * perf_event_update_userpage()
- *
- * that requires RCU read locking to be functional,
- * wrap the call within RCU_NONIDLE to make the
- * RCU subsystem aware this cpu is not idle from
- * an RCU perspective for the armpmu_start() call
- * duration.
*/
- RCU_NONIDLE(armpmu_start(event, PERF_EF_RELOAD));
+ armpmu_start(event, PERF_EF_RELOAD);
break;
default:
break;
diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
index f6507efe2a58..7b2288d4b1ec 100644
--- a/drivers/perf/riscv_pmu_sbi.c
+++ b/drivers/perf/riscv_pmu_sbi.c
@@ -771,14 +771,8 @@ static int riscv_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
case CPU_PM_ENTER_FAILED:
/*
* Restore and enable the counter.
- *
- * Requires RCU read locking to be functional,
- * wrap the call within RCU_NONIDLE to make the
- * RCU subsystem aware this cpu is not idle from
- * an RCU perspective for the riscv_pmu_start() call
- * duration.
*/
- RCU_NONIDLE(riscv_pmu_start(event, PERF_EF_RELOAD));
+ riscv_pmu_start(event, PERF_EF_RELOAD);
break;
default:
break;