From 6e9643a864aa4d532b0d467bacc18a15adf5ca82 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Fri, 23 Oct 2020 17:58:39 +0800 Subject: cpufreq: e_powersaver: remove unreachable break A 'break' following a 'return' statement is pointless, so remove it. Signed-off-by: Zhang Qilong Acked-by: Viresh Kumar [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/e_powersaver.c | 1 - drivers/cpufreq/longhaul.c | 1 - 2 files changed, 2 deletions(-) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/e_powersaver.c b/drivers/cpufreq/e_powersaver.c index 776a58bab0ff..ab93bce8ae77 100644 --- a/drivers/cpufreq/e_powersaver.c +++ b/drivers/cpufreq/e_powersaver.c @@ -223,7 +223,6 @@ static int eps_cpu_init(struct cpufreq_policy *policy) case EPS_BRAND_C3: pr_cont("C3\n"); return -ENODEV; - break; } /* Enable Enhanced PowerSaver */ rdmsrl(MSR_IA32_MISC_ENABLE, val); diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c index 123fb006810d..182a4dbca095 100644 --- a/drivers/cpufreq/longhaul.c +++ b/drivers/cpufreq/longhaul.c @@ -593,7 +593,6 @@ static void longhaul_setup_voltagescaling(void) break; default: return; - break; } if (min_vid_speed >= highest_speed) return; -- cgit v1.2.3 From db865272d9c4687520dc29f77e701a1b2669872f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Oct 2020 17:15:56 +0200 Subject: cpufreq: Avoid configuring old governors as default with intel_pstate Commit 33aa46f252c7 ("cpufreq: intel_pstate: Use passive mode by default without HWP") was meant to cause intel_pstate to be used in the passive mode with the schedutil governor on top of it, but it missed the case in which either "ondemand" or "conservative" was selected as the default governor in the existing kernel config, in which case the previous old governor configuration would be used, causing the default legacy governor to be used on top of intel_pstate instead of schedutil. Address this by preventing "ondemand" and "conservative" from being configured as the default cpufreq governor in the case when schedutil is the default choice for the default governor setting. [Note that the default cpufreq governor can still be set via the kernel command line if need be and that choice is not limited, so if anyone really wants to use one of the legacy governors by default, it can be achieved this way.] Fixes: 33aa46f252c7 ("cpufreq: intel_pstate: Use passive mode by default without HWP") Reported-by: Julia Lawall Cc: 5.8+ # 5.8+ Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/Kconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 2c7171e0b001..85de313ddec2 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -71,6 +71,7 @@ config CPU_FREQ_DEFAULT_GOV_USERSPACE config CPU_FREQ_DEFAULT_GOV_ONDEMAND bool "ondemand" + depends on !(X86_INTEL_PSTATE && SMP) select CPU_FREQ_GOV_ONDEMAND select CPU_FREQ_GOV_PERFORMANCE help @@ -83,6 +84,7 @@ config CPU_FREQ_DEFAULT_GOV_ONDEMAND config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE bool "conservative" + depends on !(X86_INTEL_PSTATE && SMP) select CPU_FREQ_GOV_CONSERVATIVE select CPU_FREQ_GOV_PERFORMANCE help -- cgit v1.2.3 From 1c534352f47fd83eb08075ac2474f707e74bf7f7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Oct 2020 17:35:19 +0200 Subject: cpufreq: Introduce CPUFREQ_NEED_UPDATE_LIMITS driver flag Generally, a cpufreq driver may need to update some internal upper and lower frequency boundaries on policy max and min changes, respectively, but currently this does not work if the target frequency does not change along with the policy limit. Namely, if the target frequency does not change along with the policy min or max, the "target_freq == policy->cur" check in __cpufreq_driver_target() prevents driver callbacks from being invoked and they do not even have a chance to update the corresponding internal boundary. This particularly affects the "powersave" and "performance" governors that always set the target frequency to one of the policy limits and it never changes when the other limit is updated. To allow cpufreq the drivers needing to update internal frequency boundaries on policy limits changes to avoid this issue, introduce a new driver flag, CPUFREQ_NEED_UPDATE_LIMITS, that (when set) will neutralize the check mentioned above. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 3 ++- include/linux/cpufreq.h | 10 +++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index f4b60663efe6..ea58337fb65f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2187,7 +2187,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, * exactly same freq is called again and so we can save on few function * calls. */ - if (target_freq == policy->cur) + if (target_freq == policy->cur && + !(cpufreq_driver->flags & CPUFREQ_NEED_UPDATE_LIMITS)) return 0; /* Save last value to restore later on errors */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index fa37b1c66443..038ed83aab41 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -298,7 +298,7 @@ __ATTR(_name, 0644, show_##_name, store_##_name) struct cpufreq_driver { char name[CPUFREQ_NAME_LEN]; - u8 flags; + u16 flags; void *driver_data; /* needed by all drivers */ @@ -422,6 +422,14 @@ struct cpufreq_driver { */ #define CPUFREQ_IS_COOLING_DEV BIT(7) +/* + * Set by drivers that need to update internale upper and lower boundaries along + * with the target frequency and so the core and governors should also invoke + * the diver if the target frequency does not change, but the policy min or max + * may have changed. + */ +#define CPUFREQ_NEED_UPDATE_LIMITS BIT(8) + int cpufreq_register_driver(struct cpufreq_driver *driver_data); int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); -- cgit v1.2.3 From e0be38ed4ab413ddd492118cf146369b86ee0ab5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Oct 2020 17:35:32 +0200 Subject: cpufreq: intel_pstate: Avoid missing HWP max updates in passive mode If the cpufreq policy max limit is changed when intel_pstate operates in the passive mode with HWP enabled and the "powersave" governor is used on top of it, the HWP max limit is not updated as appropriate. Namely, in the "powersave" governor case, the target P-state is always equal to the policy min limit, so if the latter does not change, intel_cpufreq_adjust_hwp() is not invoked to update the HWP Request MSR due to the "target_pstate != old_pstate" check in intel_cpufreq_update_pstate(), so the HWP max limit is not updated as a result. Also, if the CPUFREQ_NEED_UPDATE_LIMITS flag is not set for the driver and the target frequency does not change along with the policy max limit, the "target_freq == policy->cur" check in __cpufreq_driver_target() prevents the driver's ->target() callback from being invoked at all, so the HWP max limit is not updated. To prevent that occurring, set the CPUFREQ_NEED_UPDATE_LIMITS flag in the intel_cpufreq driver structure if HWP is enabled and modify intel_cpufreq_update_pstate() to do the "target_pstate != old_pstate" check only in the non-HWP case and let intel_cpufreq_adjust_hwp() always run in the HWP case (it will update HWP Request only if the cached value of the register is different from the new one including the limits, so if neither the target P-state value nor the max limit changes, the register write will still be avoided). Fixes: f6ebbcf08f37 ("cpufreq: intel_pstate: Implement passive mode with HWP enabled") Reported-by: Zhang Rui Cc: 5.9+ # 5.9+: 1c534352f47f cpufreq: Introduce CPUFREQ_NEED_UPDATE_LIMITS ... Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Tested-by: Zhang Rui --- drivers/cpufreq/intel_pstate.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 3c1455518738..b7a9779250aa 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2568,14 +2568,12 @@ static int intel_cpufreq_update_pstate(struct cpudata *cpu, int target_pstate, int old_pstate = cpu->pstate.current_pstate; target_pstate = intel_pstate_prepare_request(cpu, target_pstate); - if (target_pstate != old_pstate) { + if (hwp_active) { + intel_cpufreq_adjust_hwp(cpu, target_pstate, fast_switch); + cpu->pstate.current_pstate = target_pstate; + } else if (target_pstate != old_pstate) { + intel_cpufreq_adjust_perf_ctl(cpu, target_pstate, fast_switch); cpu->pstate.current_pstate = target_pstate; - if (hwp_active) - intel_cpufreq_adjust_hwp(cpu, target_pstate, - fast_switch); - else - intel_cpufreq_adjust_perf_ctl(cpu, target_pstate, - fast_switch); } intel_cpufreq_trace(cpu, fast_switch ? INTEL_PSTATE_TRACE_FAST_SWITCH : @@ -3032,6 +3030,7 @@ static int __init intel_pstate_init(void) hwp_mode_bdw = id->driver_data; intel_pstate.attr = hwp_cpufreq_attrs; intel_cpufreq.attr = hwp_cpufreq_attrs; + intel_cpufreq.flags |= CPUFREQ_NEED_UPDATE_LIMITS; if (!default_driver) default_driver = &intel_pstate; -- cgit v1.2.3 From 00d4394792418f8fe968f0cb22557053c6310010 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 27 Oct 2020 11:59:34 -0700 Subject: cpufreq: speedstep: remove unneeded semicolon A semicolon is not needed after a switch statement. Signed-off-by: Tom Rix Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/speedstep-lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c index a13a2d1e444e..0b66df4ed513 100644 --- a/drivers/cpufreq/speedstep-lib.c +++ b/drivers/cpufreq/speedstep-lib.c @@ -240,7 +240,7 @@ unsigned int speedstep_get_frequency(enum speedstep_processor processor) return pentium3_get_frequency(processor); default: return 0; - }; + } return 0; } EXPORT_SYMBOL_GPL(speedstep_get_frequency); -- cgit v1.2.3 From a62f68f5ca53ab61cba2f0a410d0add7a6d54a52 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Oct 2020 17:35:46 +0200 Subject: cpufreq: Introduce cpufreq_driver_test_flags() Add a helper function to test the flags of the cpufreq driver in use againt a given flags mask. In particular, this will be needed to test the CPUFREQ_NEED_UPDATE_LIMITS cpufreq driver flag in the schedutil governor. Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 12 ++++++++++++ include/linux/cpufreq.h | 1 + 2 files changed, 13 insertions(+) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index ea58337fb65f..336b5e94cbc8 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1907,6 +1907,18 @@ void cpufreq_resume(void) } } +/** + * cpufreq_driver_test_flags - Test cpufreq driver's flags against given ones. + * @flags: Flags to test against the current cpufreq driver's flags. + * + * Assumes that the driver is there, so callers must ensure that this is the + * case. + */ +bool cpufreq_driver_test_flags(u16 flags) +{ + return !!(cpufreq_driver->flags & flags); +} + /** * cpufreq_get_current_driver - return current driver's name * diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 038ed83aab41..1eaa04f1bae6 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -433,6 +433,7 @@ struct cpufreq_driver { int cpufreq_register_driver(struct cpufreq_driver *driver_data); int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); +bool cpufreq_driver_test_flags(u16 flags); const char *cpufreq_get_current_driver(void); void *cpufreq_get_driver_data(void); -- cgit v1.2.3 From c250d50fe2ce627ca9805d9c8ac11cbbf922a4a6 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 5 Nov 2020 12:50:01 +0000 Subject: PM: EM: Add a flag indicating units of power values in Energy Model There are different platforms and devices which might use different scale for the power values. Kernel sub-systems might need to check if all Energy Model (EM) devices are using the same scale. Address that issue and store the information inside EM for each device. Thanks to that they can be easily compared and proper action triggered. Suggested-by: Daniel Lezcano Reviewed-by: Quentin Perret Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/scmi-cpufreq.c | 3 ++- drivers/opp/of.c | 2 +- include/linux/energy_model.h | 9 +++++++-- kernel/power/energy_model.c | 24 +++++++++++++++++++++++- 4 files changed, 33 insertions(+), 5 deletions(-) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index e855e8612a67..3714a4cd07fa 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -188,7 +188,8 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) policy->fast_switch_possible = handle->perf_ops->fast_switch_possible(handle, cpu_dev); - em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus); + em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus, + false); return 0; diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 9faeb83e4b32..16f39e2127a5 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -1335,7 +1335,7 @@ int dev_pm_opp_of_register_em(struct device *dev, struct cpumask *cpus) goto failed; } - ret = em_dev_register_perf_domain(dev, nr_opp, &em_cb, cpus); + ret = em_dev_register_perf_domain(dev, nr_opp, &em_cb, cpus, true); if (ret) goto failed; diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index b67a51c574b9..3a33c738d876 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -29,6 +29,8 @@ struct em_perf_state { * em_perf_domain - Performance domain * @table: List of performance states, in ascending order * @nr_perf_states: Number of performance states + * @milliwatts: Flag indicating the power values are in milli-Watts + * or some other scale. * @cpus: Cpumask covering the CPUs of the domain. It's here * for performance reasons to avoid potential cache * misses during energy calculations in the scheduler @@ -43,6 +45,7 @@ struct em_perf_state { struct em_perf_domain { struct em_perf_state *table; int nr_perf_states; + int milliwatts; unsigned long cpus[]; }; @@ -79,7 +82,8 @@ struct em_data_callback { struct em_perf_domain *em_cpu_get(int cpu); struct em_perf_domain *em_pd_get(struct device *dev); int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, - struct em_data_callback *cb, cpumask_t *span); + struct em_data_callback *cb, cpumask_t *span, + bool milliwatts); void em_dev_unregister_perf_domain(struct device *dev); /** @@ -186,7 +190,8 @@ struct em_data_callback {}; static inline int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, - struct em_data_callback *cb, cpumask_t *span) + struct em_data_callback *cb, cpumask_t *span, + bool milliwatts) { return -EINVAL; } diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index c1ff7fa030ab..efe2a595988e 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -52,6 +52,17 @@ static int em_debug_cpus_show(struct seq_file *s, void *unused) } DEFINE_SHOW_ATTRIBUTE(em_debug_cpus); +static int em_debug_units_show(struct seq_file *s, void *unused) +{ + struct em_perf_domain *pd = s->private; + char *units = pd->milliwatts ? "milliWatts" : "bogoWatts"; + + seq_printf(s, "%s\n", units); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(em_debug_units); + static void em_debug_create_pd(struct device *dev) { struct dentry *d; @@ -64,6 +75,8 @@ static void em_debug_create_pd(struct device *dev) debugfs_create_file("cpus", 0444, d, dev->em_pd->cpus, &em_debug_cpus_fops); + debugfs_create_file("units", 0444, d, dev->em_pd, &em_debug_units_fops); + /* Create a sub-directory for each performance state */ for (i = 0; i < dev->em_pd->nr_perf_states; i++) em_debug_create_ps(&dev->em_pd->table[i], d); @@ -250,17 +263,24 @@ EXPORT_SYMBOL_GPL(em_cpu_get); * @cpus : Pointer to cpumask_t, which in case of a CPU device is * obligatory. It can be taken from i.e. 'policy->cpus'. For other * type of devices this should be set to NULL. + * @milliwatts : Flag indicating that the power values are in milliWatts or + * in some other scale. It must be set properly. * * Create Energy Model tables for a performance domain using the callbacks * defined in cb. * + * The @milliwatts is important to set with correct value. Some kernel + * sub-systems might rely on this flag and check if all devices in the EM are + * using the same scale. + * * If multiple clients register the same performance domain, all but the first * registration will be ignored. * * Return 0 on success */ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, - struct em_data_callback *cb, cpumask_t *cpus) + struct em_data_callback *cb, cpumask_t *cpus, + bool milliwatts) { unsigned long cap, prev_cap = 0; int cpu, ret; @@ -313,6 +333,8 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, if (ret) goto unlock; + dev->em_pd->milliwatts = milliwatts; + em_debug_create_pd(dev); dev_info(dev, "EM: created perf domain\n"); -- cgit v1.2.3 From f9b0498d29404f230894490d622e57e481c7d45a Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Tue, 24 Nov 2020 10:43:46 +0000 Subject: cpufreq: arm_scmi: Discover the power scale in performance protocol Add mechanism to discover the power scale present in the performance protocol for all domains. Provide this information to Energy Model, which then can be checked in other frameworks, e.g. thermal. Suggested-by: Morten Rasmussen Signed-off-by: Lukasz Luba Signed-off-by: Viresh Kumar --- drivers/cpufreq/scmi-cpufreq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 3714a4cd07fa..144afd1265c1 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -125,6 +125,7 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) struct scmi_data *priv; struct cpufreq_frequency_table *freq_table; struct em_data_callback em_cb = EM_DATA_CB(scmi_get_cpu_power); + bool power_scale_mw; cpu_dev = get_cpu_device(policy->cpu); if (!cpu_dev) { @@ -188,8 +189,9 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) policy->fast_switch_possible = handle->perf_ops->fast_switch_possible(handle, cpu_dev); + power_scale_mw = handle->perf_ops->power_scale_mw_get(handle); em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus, - false); + power_scale_mw); return 0; -- cgit v1.2.3