diff options
Diffstat (limited to 'drivers')
28 files changed, 931 insertions, 291 deletions
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 0f17b1c32718..02d83c807271 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1154,6 +1154,19 @@ int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) } /** + * cppc_get_epp_perf - Get the epp register value. + * @cpunum: CPU from which to get epp preference value. + * @epp_perf: Return address. + * + * Return: 0 for success, -EIO otherwise. + */ +int cppc_get_epp_perf(int cpunum, u64 *epp_perf) +{ + return cppc_get_perf(cpunum, ENERGY_PERF, epp_perf); +} +EXPORT_SYMBOL_GPL(cppc_get_epp_perf); + +/** * cppc_get_perf_caps - Get a CPU's performance capabilities. * @cpunum: CPU from which to get capabilities info. * @perf_caps: ptr to cppc_perf_caps. See cppc_acpi.h @@ -1365,6 +1378,60 @@ out_err: } EXPORT_SYMBOL_GPL(cppc_get_perf_ctrs); +/* + * Set Energy Performance Preference Register value through + * Performance Controls Interface + */ +int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable) +{ + int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); + struct cpc_register_resource *epp_set_reg; + struct cpc_register_resource *auto_sel_reg; + struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); + struct cppc_pcc_data *pcc_ss_data = NULL; + int ret; + + if (!cpc_desc) { + pr_debug("No CPC descriptor for CPU:%d\n", cpu); + return -ENODEV; + } + + auto_sel_reg = &cpc_desc->cpc_regs[AUTO_SEL_ENABLE]; + epp_set_reg = &cpc_desc->cpc_regs[ENERGY_PERF]; + + if (CPC_IN_PCC(epp_set_reg) || CPC_IN_PCC(auto_sel_reg)) { + if (pcc_ss_id < 0) { + pr_debug("Invalid pcc_ss_id for CPU:%d\n", cpu); + return -ENODEV; + } + + if (CPC_SUPPORTED(auto_sel_reg)) { + ret = cpc_write(cpu, auto_sel_reg, enable); + if (ret) + return ret; + } + + if (CPC_SUPPORTED(epp_set_reg)) { + ret = cpc_write(cpu, epp_set_reg, perf_ctrls->energy_perf); + if (ret) + return ret; + } + + pcc_ss_data = pcc_data[pcc_ss_id]; + + down_write(&pcc_ss_data->pcc_lock); + /* after writing CPC, transfer the ownership of PCC to platform */ + ret = send_pcc_cmd(pcc_ss_id, CMD_WRITE); + up_write(&pcc_ss_data->pcc_lock); + } else { + ret = -ENOTSUPP; + pr_debug("_CPC in PCC is not supported\n"); + } + + return ret; +} +EXPORT_SYMBOL_GPL(cppc_set_epp_perf); + /** * cppc_set_enable - Set to enable CPPC on the processor by writing the * Continuous Performance Control package EnableRegister field. diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 967bcf9d415e..6097644ebdc5 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -220,13 +220,10 @@ static void genpd_debug_add(struct generic_pm_domain *genpd); static void genpd_debug_remove(struct generic_pm_domain *genpd) { - struct dentry *d; - if (!genpd_debugfs_dir) return; - d = debugfs_lookup(genpd->name, genpd_debugfs_dir); - debugfs_remove(d); + debugfs_lookup_and_remove(genpd->name, genpd_debugfs_dir); } static void genpd_update_accounting(struct generic_pm_domain *genpd) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 98f7b3d7d669..4545669cb973 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1864,6 +1864,10 @@ static bool pm_runtime_need_not_resume(struct device *dev) * sure the device is put into low power state and it should only be used during * system-wide PM transitions to sleep states. It assumes that the analogous * pm_runtime_force_resume() will be used to resume the device. + * + * Do not use with DPM_FLAG_SMART_SUSPEND as this can lead to an inconsistent + * state where this function has called the ->runtime_suspend callback but the + * PM core marks the driver as runtime active. */ int pm_runtime_force_suspend(struct device *dev) { diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 8466f78651fc..2c839bd2b051 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -3,7 +3,6 @@ menu "CPU Frequency scaling" config CPU_FREQ bool "CPU Frequency scaling" - select SRCU help CPU Frequency scaling allows you to change the clock speed of CPUs on the fly. This is a nice method to save power, because @@ -271,15 +270,6 @@ config LOONGSON2_CPUFREQ Loongson2F and its successors support this feature. If in doubt, say N. - -config LOONGSON1_CPUFREQ - tristate "Loongson1 CPUFreq Driver" - depends on LOONGSON1_LS1B - help - This option adds a CPUFreq driver for loongson1 processors which - support software configurable cpu frequency. - - If in doubt, say N. endif if SPARC64 diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index a19842fbd521..ef8510774913 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -104,7 +104,6 @@ obj-$(CONFIG_POWERNV_CPUFREQ) += powernv-cpufreq.o obj-$(CONFIG_BMIPS_CPUFREQ) += bmips-cpufreq.o obj-$(CONFIG_IA64_ACPI_CPUFREQ) += ia64-acpi-cpufreq.o obj-$(CONFIG_LOONGSON2_CPUFREQ) += loongson2_cpufreq.o -obj-$(CONFIG_LOONGSON1_CPUFREQ) += loongson1-cpufreq.o obj-$(CONFIG_SH_CPU_FREQ) += sh-cpufreq.o obj-$(CONFIG_SPARC_US2E_CPUFREQ) += sparc-us2e-cpufreq.o obj-$(CONFIG_SPARC_US3_CPUFREQ) += sparc-us3-cpufreq.o diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index c17bd845f5fc..45c88894fd8e 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -59,8 +59,171 @@ * we disable it by default to go acpi-cpufreq on these processors and add a * module parameter to be able to enable it manually for debugging. */ +static struct cpufreq_driver *current_pstate_driver; static struct cpufreq_driver amd_pstate_driver; -static int cppc_load __initdata; +static struct cpufreq_driver amd_pstate_epp_driver; +static int cppc_state = AMD_PSTATE_DISABLE; +struct kobject *amd_pstate_kobj; + +/* + * AMD Energy Preference Performance (EPP) + * The EPP is used in the CCLK DPM controller to drive + * the frequency that a core is going to operate during + * short periods of activity. EPP values will be utilized for + * different OS profiles (balanced, performance, power savings) + * display strings corresponding to EPP index in the + * energy_perf_strings[] + * index String + *------------------------------------- + * 0 default + * 1 performance + * 2 balance_performance + * 3 balance_power + * 4 power + */ +enum energy_perf_value_index { + EPP_INDEX_DEFAULT = 0, + EPP_INDEX_PERFORMANCE, + EPP_INDEX_BALANCE_PERFORMANCE, + EPP_INDEX_BALANCE_POWERSAVE, + EPP_INDEX_POWERSAVE, +}; + +static const char * const energy_perf_strings[] = { + [EPP_INDEX_DEFAULT] = "default", + [EPP_INDEX_PERFORMANCE] = "performance", + [EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance", + [EPP_INDEX_BALANCE_POWERSAVE] = "balance_power", + [EPP_INDEX_POWERSAVE] = "power", + NULL +}; + +static unsigned int epp_values[] = { + [EPP_INDEX_DEFAULT] = 0, + [EPP_INDEX_PERFORMANCE] = AMD_CPPC_EPP_PERFORMANCE, + [EPP_INDEX_BALANCE_PERFORMANCE] = AMD_CPPC_EPP_BALANCE_PERFORMANCE, + [EPP_INDEX_BALANCE_POWERSAVE] = AMD_CPPC_EPP_BALANCE_POWERSAVE, + [EPP_INDEX_POWERSAVE] = AMD_CPPC_EPP_POWERSAVE, + }; + +static inline int get_mode_idx_from_str(const char *str, size_t size) +{ + int i; + + for (i=0; i < AMD_PSTATE_MAX; i++) { + if (!strncmp(str, amd_pstate_mode_string[i], size)) + return i; + } + return -EINVAL; +} + +static DEFINE_MUTEX(amd_pstate_limits_lock); +static DEFINE_MUTEX(amd_pstate_driver_lock); + +static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) +{ + u64 epp; + int ret; + + if (boot_cpu_has(X86_FEATURE_CPPC)) { + if (!cppc_req_cached) { + epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, + &cppc_req_cached); + if (epp) + return epp; + } + epp = (cppc_req_cached >> 24) & 0xFF; + } else { + ret = cppc_get_epp_perf(cpudata->cpu, &epp); + if (ret < 0) { + pr_debug("Could not retrieve energy perf value (%d)\n", ret); + return -EIO; + } + } + + return (s16)(epp & 0xff); +} + +static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata) +{ + s16 epp; + int index = -EINVAL; + + epp = amd_pstate_get_epp(cpudata, 0); + if (epp < 0) + return epp; + + switch (epp) { + case AMD_CPPC_EPP_PERFORMANCE: + index = EPP_INDEX_PERFORMANCE; + break; + case AMD_CPPC_EPP_BALANCE_PERFORMANCE: + index = EPP_INDEX_BALANCE_PERFORMANCE; + break; + case AMD_CPPC_EPP_BALANCE_POWERSAVE: + index = EPP_INDEX_BALANCE_POWERSAVE; + break; + case AMD_CPPC_EPP_POWERSAVE: + index = EPP_INDEX_POWERSAVE; + break; + default: + break; + } + + return index; +} + +static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) +{ + int ret; + struct cppc_perf_ctrls perf_ctrls; + + if (boot_cpu_has(X86_FEATURE_CPPC)) { + u64 value = READ_ONCE(cpudata->cppc_req_cached); + + value &= ~GENMASK_ULL(31, 24); + value |= (u64)epp << 24; + WRITE_ONCE(cpudata->cppc_req_cached, value); + + ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + if (!ret) + cpudata->epp_cached = epp; + } else { + perf_ctrls.energy_perf = epp; + ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); + if (ret) { + pr_debug("failed to set energy perf value (%d)\n", ret); + return ret; + } + cpudata->epp_cached = epp; + } + + return ret; +} + +static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, + int pref_index) +{ + int epp = -EINVAL; + int ret; + + if (!pref_index) { + pr_debug("EPP pref_index is invalid\n"); + return -EINVAL; + } + + if (epp == -EINVAL) + epp = epp_values[pref_index]; + + if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { + pr_debug("EPP cannot be set under performance policy\n"); + return -EBUSY; + } + + ret = amd_pstate_set_epp(cpudata, epp); + + return ret; +} static inline int pstate_enable(bool enable) { @@ -70,11 +233,21 @@ static inline int pstate_enable(bool enable) static int cppc_enable(bool enable) { int cpu, ret = 0; + struct cppc_perf_ctrls perf_ctrls; for_each_present_cpu(cpu) { ret = cppc_set_enable(cpu, enable); if (ret) return ret; + + /* Enable autonomous mode for EPP */ + if (cppc_state == AMD_PSTATE_ACTIVE) { + /* Set desired perf as zero to allow EPP firmware control */ + perf_ctrls.desired_perf = 0; + ret = cppc_set_perf(cpu, &perf_ctrls); + if (ret) + return ret; + } } return ret; @@ -418,7 +591,7 @@ static void amd_pstate_boost_init(struct amd_cpudata *cpudata) return; cpudata->boost_supported = true; - amd_pstate_driver.boost_enabled = true; + current_pstate_driver->boost_enabled = true; } static void amd_perf_ctl_reset(unsigned int cpu) @@ -501,6 +674,8 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) policy->driver_data = cpudata; amd_pstate_boost_init(cpudata); + if (!current_pstate_driver->adjust_perf) + current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; return 0; @@ -561,7 +736,7 @@ static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, if (max_freq < 0) return max_freq; - return sprintf(&buf[0], "%u\n", max_freq); + return sysfs_emit(buf, "%u\n", max_freq); } static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy, @@ -574,7 +749,7 @@ static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *poli if (freq < 0) return freq; - return sprintf(&buf[0], "%u\n", freq); + return sysfs_emit(buf, "%u\n", freq); } /* @@ -589,13 +764,151 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, perf = READ_ONCE(cpudata->highest_perf); - return sprintf(&buf[0], "%u\n", perf); + return sysfs_emit(buf, "%u\n", perf); +} + +static ssize_t show_energy_performance_available_preferences( + struct cpufreq_policy *policy, char *buf) +{ + int i = 0; + int offset = 0; + + while (energy_perf_strings[i] != NULL) + offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i++]); + + sysfs_emit_at(buf, offset, "\n"); + + return offset; +} + +static ssize_t store_energy_performance_preference( + struct cpufreq_policy *policy, const char *buf, size_t count) +{ + struct amd_cpudata *cpudata = policy->driver_data; + char str_preference[21]; + ssize_t ret; + + ret = sscanf(buf, "%20s", str_preference); + if (ret != 1) + return -EINVAL; + + ret = match_string(energy_perf_strings, -1, str_preference); + if (ret < 0) + return -EINVAL; + + mutex_lock(&amd_pstate_limits_lock); + ret = amd_pstate_set_energy_pref_index(cpudata, ret); + mutex_unlock(&amd_pstate_limits_lock); + + return ret ?: count; +} + +static ssize_t show_energy_performance_preference( + struct cpufreq_policy *policy, char *buf) +{ + struct amd_cpudata *cpudata = policy->driver_data; + int preference; + + preference = amd_pstate_get_energy_pref_index(cpudata); + if (preference < 0) + return preference; + + return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]); +} + +static ssize_t amd_pstate_show_status(char *buf) +{ + if (!current_pstate_driver) + return sysfs_emit(buf, "disable\n"); + + return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]); +} + +static void amd_pstate_driver_cleanup(void) +{ + current_pstate_driver = NULL; +} + +static int amd_pstate_update_status(const char *buf, size_t size) +{ + int ret = 0; + int mode_idx; + + if (size > 7 || size < 6) + return -EINVAL; + mode_idx = get_mode_idx_from_str(buf, size); + + switch(mode_idx) { + case AMD_PSTATE_DISABLE: + if (!current_pstate_driver) + return -EINVAL; + if (cppc_state == AMD_PSTATE_ACTIVE) + return -EBUSY; + cpufreq_unregister_driver(current_pstate_driver); + amd_pstate_driver_cleanup(); + break; + case AMD_PSTATE_PASSIVE: + if (current_pstate_driver) { + if (current_pstate_driver == &amd_pstate_driver) + return 0; + cpufreq_unregister_driver(current_pstate_driver); + cppc_state = AMD_PSTATE_PASSIVE; + current_pstate_driver = &amd_pstate_driver; + } + + ret = cpufreq_register_driver(current_pstate_driver); + break; + case AMD_PSTATE_ACTIVE: + if (current_pstate_driver) { + if (current_pstate_driver == &amd_pstate_epp_driver) + return 0; + cpufreq_unregister_driver(current_pstate_driver); + current_pstate_driver = &amd_pstate_epp_driver; + cppc_state = AMD_PSTATE_ACTIVE; + } + + ret = cpufreq_register_driver(current_pstate_driver); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static ssize_t show_status(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + ssize_t ret; + + mutex_lock(&amd_pstate_driver_lock); + ret = amd_pstate_show_status(buf); + mutex_unlock(&amd_pstate_driver_lock); + + return ret; +} + +static ssize_t store_status(struct kobject *a, struct kobj_attribute *b, + const char *buf, size_t count) +{ + char *p = memchr(buf, '\n', count); + int ret; + + mutex_lock(&amd_pstate_driver_lock); + ret = amd_pstate_update_status(buf, p ? p - buf : count); + mutex_unlock(&amd_pstate_driver_lock); + + return ret < 0 ? ret : count; } cpufreq_freq_attr_ro(amd_pstate_max_freq); cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); cpufreq_freq_attr_ro(amd_pstate_highest_perf); +cpufreq_freq_attr_rw(energy_performance_preference); +cpufreq_freq_attr_ro(energy_performance_available_preferences); +define_one_global_rw(status); static struct freq_attr *amd_pstate_attr[] = { &amd_pstate_max_freq, @@ -604,6 +917,313 @@ static struct freq_attr *amd_pstate_attr[] = { NULL, }; +static struct freq_attr *amd_pstate_epp_attr[] = { + &amd_pstate_max_freq, + &amd_pstate_lowest_nonlinear_freq, + &amd_pstate_highest_perf, + &energy_performance_preference, + &energy_performance_available_preferences, + NULL, +}; + +static struct attribute *pstate_global_attributes[] = { + &status.attr, + NULL +}; + +static const struct attribute_group amd_pstate_global_attr_group = { + .attrs = pstate_global_attributes, +}; + +static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) +{ + int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; + struct amd_cpudata *cpudata; + struct device *dev; + u64 value; + + /* + * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, + * which is ideal for initialization process. + */ + amd_perf_ctl_reset(policy->cpu); + dev = get_cpu_device(policy->cpu); + if (!dev) + return -ENODEV; + + cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL); + if (!cpudata) + return -ENOMEM; + + cpudata->cpu = policy->cpu; + cpudata->epp_policy = 0; + + ret = amd_pstate_init_perf(cpudata); + if (ret) + goto free_cpudata1; + + min_freq = amd_get_min_freq(cpudata); + max_freq = amd_get_max_freq(cpudata); + nominal_freq = amd_get_nominal_freq(cpudata); + lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); + if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { + dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", + min_freq, max_freq); + ret = -EINVAL; + goto free_cpudata1; + } + + policy->cpuinfo.min_freq = min_freq; + policy->cpuinfo.max_freq = max_freq; + /* It will be updated by governor */ + policy->cur = policy->cpuinfo.min_freq; + + /* Initial processor data capability frequencies */ + cpudata->max_freq = max_freq; + cpudata->min_freq = min_freq; + cpudata->nominal_freq = nominal_freq; + cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; + + policy->driver_data = cpudata; + + cpudata->epp_cached = amd_pstate_get_epp(cpudata, 0); + + policy->min = policy->cpuinfo.min_freq; + policy->max = policy->cpuinfo.max_freq; + + /* + * Set the policy to powersave to provide a valid fallback value in case + * the default cpufreq governor is neither powersave nor performance. + */ + policy->policy = CPUFREQ_POLICY_POWERSAVE; + + if (boot_cpu_has(X86_FEATURE_CPPC)) { + policy->fast_switch_possible = true; + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); + if (ret) + return ret; + WRITE_ONCE(cpudata->cppc_req_cached, value); + + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value); + if (ret) + return ret; + WRITE_ONCE(cpudata->cppc_cap1_cached, value); + } + amd_pstate_boost_init(cpudata); + + return 0; + +free_cpudata1: + kfree(cpudata); + return ret; +} + +static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) +{ + pr_debug("CPU %d exiting\n", policy->cpu); + policy->fast_switch_possible = false; + return 0; +} + +static void amd_pstate_epp_init(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct amd_cpudata *cpudata = policy->driver_data; + u32 max_perf, min_perf; + u64 value; + s16 epp; + + max_perf = READ_ONCE(cpudata->highest_perf); + min_perf = READ_ONCE(cpudata->lowest_perf); + + value = READ_ONCE(cpudata->cppc_req_cached); + + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) + min_perf = max_perf; + + /* Initial min/max values for CPPC Performance Controls Register */ + value &= ~AMD_CPPC_MIN_PERF(~0L); + value |= AMD_CPPC_MIN_PERF(min_perf); + + value &= ~AMD_CPPC_MAX_PERF(~0L); + value |= AMD_CPPC_MAX_PERF(max_perf); + + /* CPPC EPP feature require to set zero to the desire perf bit */ + value &= ~AMD_CPPC_DES_PERF(~0L); + value |= AMD_CPPC_DES_PERF(0); + + if (cpudata->epp_policy == cpudata->policy) + goto skip_epp; + + cpudata->epp_policy = cpudata->policy; + + /* Get BIOS pre-defined epp value */ + epp = amd_pstate_get_epp(cpudata, value); + if (epp < 0) { + /** + * This return value can only be negative for shared_memory + * systems where EPP register read/write not supported. + */ + goto skip_epp; + } + + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) + epp = 0; + + /* Set initial EPP value */ + if (boot_cpu_has(X86_FEATURE_CPPC)) { + value &= ~GENMASK_ULL(31, 24); + value |= (u64)epp << 24; + } + + WRITE_ONCE(cpudata->cppc_req_cached, value); + amd_pstate_set_epp(cpudata, epp); +skip_epp: + cpufreq_cpu_put(policy); +} + +static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) +{ + struct amd_cpudata *cpudata = policy->driver_data; + + if (!policy->cpuinfo.max_freq) + return -ENODEV; + + pr_debug("set_policy: cpuinfo.max %u policy->max %u\n", + policy->cpuinfo.max_freq, policy->max); + + cpudata->policy = policy->policy; + + amd_pstate_epp_init(policy->cpu); + + return 0; +} + +static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) +{ + struct cppc_perf_ctrls perf_ctrls; + u64 value, max_perf; + int ret; + + ret = amd_pstate_enable(true); + if (ret) + pr_err("failed to enable amd pstate during resume, return %d\n", ret); + + value = READ_ONCE(cpudata->cppc_req_cached); + max_perf = READ_ONCE(cpudata->highest_perf); + + if (boot_cpu_has(X86_FEATURE_CPPC)) { + wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + } else { + perf_ctrls.max_perf = max_perf; + perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(cpudata->epp_cached); + cppc_set_perf(cpudata->cpu, &perf_ctrls); + } +} + +static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) +{ + struct amd_cpudata *cpudata = policy->driver_data; + + pr_debug("AMD CPU Core %d going online\n", cpudata->cpu); + + if (cppc_state == AMD_PSTATE_ACTIVE) { + amd_pstate_epp_reenable(cpudata); + cpudata->suspended = false; + } + + return 0; +} + +static void amd_pstate_epp_offline(struct cpufreq_policy *policy) +{ + struct amd_cpudata *cpudata = policy->driver_data; + struct cppc_perf_ctrls perf_ctrls; + int min_perf; + u64 value; + + min_perf = READ_ONCE(cpudata->lowest_perf); + value = READ_ONCE(cpudata->cppc_req_cached); + + mutex_lock(&amd_pstate_limits_lock); + if (boot_cpu_has(X86_FEATURE_CPPC)) { + cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN; + + /* Set max perf same as min perf */ + value &= ~AMD_CPPC_MAX_PERF(~0L); + value |= AMD_CPPC_MAX_PERF(min_perf); + value &= ~AMD_CPPC_MIN_PERF(~0L); + value |= AMD_CPPC_MIN_PERF(min_perf); + wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + } else { + perf_ctrls.desired_perf = 0; + perf_ctrls.max_perf = min_perf; + perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(HWP_EPP_BALANCE_POWERSAVE); + cppc_set_perf(cpudata->cpu, &perf_ctrls); + } + mutex_unlock(&amd_pstate_limits_lock); +} + +static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) +{ + struct amd_cpudata *cpudata = policy->driver_data; + + pr_debug("AMD CPU Core %d going offline\n", cpudata->cpu); + + if (cpudata->suspended) + return 0; + + if (cppc_state == AMD_PSTATE_ACTIVE) + amd_pstate_epp_offline(policy); + + return 0; +} + +static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy) +{ + cpufreq_verify_within_cpu_limits(policy); + pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min); + return 0; +} + +static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) +{ + struct amd_cpudata *cpudata = policy->driver_data; + int ret; + + /* avoid suspending when EPP is not enabled */ + if (cppc_state != AMD_PSTATE_ACTIVE) + return 0; + + /* set this flag to avoid setting core offline*/ + cpudata->suspended = true; + + /* disable CPPC in lowlevel firmware */ + ret = amd_pstate_enable(false); + if (ret) + pr_err("failed to suspend, return %d\n", ret); + + return 0; +} + +static int amd_pstate_epp_resume(struct cpufreq_policy *policy) +{ + struct amd_cpudata *cpudata = policy->driver_data; + + if (cpudata->suspended) { + mutex_lock(&amd_pstate_limits_lock); + + /* enable amd pstate from suspend state*/ + amd_pstate_epp_reenable(cpudata); + + mutex_unlock(&amd_pstate_limits_lock); + + cpudata->suspended = false; + } + + return 0; +} + static struct cpufreq_driver amd_pstate_driver = { .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS, .verify = amd_pstate_verify, @@ -617,6 +1237,20 @@ static struct cpufreq_driver amd_pstate_driver = { .attr = amd_pstate_attr, }; +static struct cpufreq_driver amd_pstate_epp_driver = { + .flags = CPUFREQ_CONST_LOOPS, + .verify = amd_pstate_epp_verify_policy, + .setpolicy = amd_pstate_epp_set_policy, + .init = amd_pstate_epp_cpu_init, + .exit = amd_pstate_epp_cpu_exit, + .offline = amd_pstate_epp_cpu_offline, + .online = amd_pstate_epp_cpu_online, + .suspend = amd_pstate_epp_suspend, + .resume = amd_pstate_epp_resume, + .name = "amd_pstate_epp", + .attr = amd_pstate_epp_attr, +}; + static int __init amd_pstate_init(void) { int ret; @@ -626,10 +1260,10 @@ static int __init amd_pstate_init(void) /* * by default the pstate driver is disabled to load * enable the amd_pstate passive mode driver explicitly - * with amd_pstate=passive in kernel command line + * with amd_pstate=passive or other modes in kernel command line */ - if (!cppc_load) { - pr_debug("driver load is disabled, boot with amd_pstate=passive to enable this\n"); + if (cppc_state == AMD_PSTATE_DISABLE) { + pr_debug("driver load is disabled, boot with specific mode to enable this\n"); return -ENODEV; } @@ -645,7 +1279,8 @@ static int __init amd_pstate_init(void) /* capability check */ if (boot_cpu_has(X86_FEATURE_CPPC)) { pr_debug("AMD CPPC MSR based functionality is supported\n"); - amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf; + if (cppc_state == AMD_PSTATE_PASSIVE) + current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; } else { pr_debug("AMD CPPC shared memory based functionality is supported\n"); static_call_update(amd_pstate_enable, cppc_enable); @@ -656,31 +1291,63 @@ static int __init amd_pstate_init(void) /* enable amd pstate feature */ ret = amd_pstate_enable(true); if (ret) { - pr_err("failed to enable amd-pstate with return %d\n", ret); + pr_err("failed to enable with return %d\n", ret); return ret; } - ret = cpufreq_register_driver(&amd_pstate_driver); + ret = cpufreq_register_driver(current_pstate_driver); if (ret) - pr_err("failed to register amd_pstate_driver with return %d\n", - ret); + pr_err("failed to register with return %d\n", ret); + + amd_pstate_kobj = kobject_create_and_add("amd_pstate", &cpu_subsys.dev_root->kobj); + if (!amd_pstate_kobj) { + ret = -EINVAL; + pr_err("global sysfs registration failed.\n"); + goto kobject_free; + } + + ret = sysfs_create_group(amd_pstate_kobj, &amd_pstate_global_attr_group); + if (ret) { + pr_err("sysfs attribute export failed with error %d.\n", ret); + goto global_attr_free; + } return ret; + +global_attr_free: + kobject_put(amd_pstate_kobj); +kobject_free: + cpufreq_unregister_driver(current_pstate_driver); + return ret; } device_initcall(amd_pstate_init); static int __init amd_pstate_param(char *str) { + size_t size; + int mode_idx; + if (!str) return -EINVAL; - if (!strcmp(str, "disable")) { - cppc_load = 0; - pr_info("driver is explicitly disabled\n"); - } else if (!strcmp(str, "passive")) - cppc_load = 1; + size = strlen(str); + mode_idx = get_mode_idx_from_str(str, size); - return 0; + if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) { + cppc_state = mode_idx; + if (cppc_state == AMD_PSTATE_DISABLE) + pr_info("driver is explicitly disabled\n"); + + if (cppc_state == AMD_PSTATE_ACTIVE) + current_pstate_driver = &amd_pstate_epp_driver; + + if (cppc_state == AMD_PSTATE_PASSIVE) + current_pstate_driver = &amd_pstate_driver; + + return 0; + } + + return -EINVAL; } early_param("amd_pstate", amd_pstate_param); diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index 4153150e20db..ffea6402189d 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -751,10 +751,7 @@ static int brcm_avs_cpufreq_probe(struct platform_device *pdev) static int brcm_avs_cpufreq_remove(struct platform_device *pdev) { - int ret; - - ret = cpufreq_unregister_driver(&brcm_avs_driver); - WARN_ON(ret); + cpufreq_unregister_driver(&brcm_avs_driver); brcm_avs_prepare_uninit(pdev); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 7e56a42750ea..6d8fd3b8dcb5 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -993,7 +993,7 @@ static const struct sysfs_ops sysfs_ops = { .store = store, }; -static struct kobj_type ktype_cpufreq = { +static const struct kobj_type ktype_cpufreq = { .sysfs_ops = &sysfs_ops, .default_groups = cpufreq_groups, .release = cpufreq_sysfs_release, @@ -2904,12 +2904,12 @@ EXPORT_SYMBOL_GPL(cpufreq_register_driver); * Returns zero if successful, and -EINVAL if the cpufreq_driver is * currently not initialised. */ -int cpufreq_unregister_driver(struct cpufreq_driver *driver) +void cpufreq_unregister_driver(struct cpufreq_driver *driver) { unsigned long flags; - if (!cpufreq_driver || (driver != cpufreq_driver)) - return -EINVAL; + if (WARN_ON(!cpufreq_driver || (driver != cpufreq_driver))) + return; pr_debug("unregistering driver %s\n", driver->name); @@ -2926,8 +2926,6 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver) write_unlock_irqrestore(&cpufreq_driver_lock, flags); cpus_read_unlock(); - - return 0; } EXPORT_SYMBOL_GPL(cpufreq_unregister_driver); diff --git a/drivers/cpufreq/davinci-cpufreq.c b/drivers/cpufreq/davinci-cpufreq.c index 9e97f60f8199..ebb3a8102681 100644 --- a/drivers/cpufreq/davinci-cpufreq.c +++ b/drivers/cpufreq/davinci-cpufreq.c @@ -133,12 +133,14 @@ static int __init davinci_cpufreq_probe(struct platform_device *pdev) static int __exit davinci_cpufreq_remove(struct platform_device *pdev) { + cpufreq_unregister_driver(&davinci_driver); + clk_put(cpufreq.armclk); if (cpufreq.asyncclk) clk_put(cpufreq.asyncclk); - return cpufreq_unregister_driver(&davinci_driver); + return 0; } static struct platform_driver davinci_cpufreq_driver = { diff --git a/drivers/cpufreq/loongson1-cpufreq.c b/drivers/cpufreq/loongson1-cpufreq.c deleted file mode 100644 index fb72d709db56..000000000000 --- a/drivers/cpufreq/loongson1-cpufreq.c +++ /dev/null @@ -1,222 +0,0 @@ -/* - * CPU Frequency Scaling for Loongson 1 SoC - * - * Copyright (C) 2014-2016 Zhang, Keguang <keguang.zhang@gmail.com> - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -#include <linux/clk.h> -#include <linux/clk-provider.h> -#include <linux/cpu.h> -#include <linux/cpufreq.h> -#include <linux/delay.h> -#include <linux/io.h> -#include <linux/module.h> -#include <linux/platform_device.h> -#include <linux/slab.h> - -#include <cpufreq.h> -#include <loongson1.h> - -struct ls1x_cpufreq { - struct device *dev; - struct clk *clk; /* CPU clk */ - struct clk *mux_clk; /* MUX of CPU clk */ - struct clk *pll_clk; /* PLL clk */ - struct clk *osc_clk; /* OSC clk */ - unsigned int max_freq; - unsigned int min_freq; -}; - -static struct ls1x_cpufreq *cpufreq; - -static int ls1x_cpufreq_notifier(struct notifier_block *nb, - unsigned long val, void *data) -{ - if (val == CPUFREQ_POSTCHANGE) - current_cpu_data.udelay_val = loops_per_jiffy; - - return NOTIFY_OK; -} - -static struct notifier_block ls1x_cpufreq_notifier_block = { - .notifier_call = ls1x_cpufreq_notifier -}; - -static int ls1x_cpufreq_target(struct cpufreq_policy *policy, - unsigned int index) -{ - struct device *cpu_dev = get_cpu_device(policy->cpu); - unsigned int old_freq, new_freq; - - old_freq = policy->cur; - new_freq = policy->freq_table[index].frequency; - - /* - * The procedure of reconfiguring CPU clk is as below. - * - * - Reparent CPU clk to OSC clk - * - Reset CPU clock (very important) - * - Reconfigure CPU DIV - * - Reparent CPU clk back to CPU DIV clk - */ - - clk_set_parent(policy->clk, cpufreq->osc_clk); - __raw_writel(__raw_readl(LS1X_CLK_PLL_DIV) | RST_CPU_EN | RST_CPU, - LS1X_CLK_PLL_DIV); - __raw_writel(__raw_readl(LS1X_CLK_PLL_DIV) & ~(RST_CPU_EN | RST_CPU), - LS1X_CLK_PLL_DIV); - clk_set_rate(cpufreq->mux_clk, new_freq * 1000); - clk_set_parent(policy->clk, cpufreq->mux_clk); - dev_dbg(cpu_dev, "%u KHz --> %u KHz\n", old_freq, new_freq); - - return 0; -} - -static int ls1x_cpufreq_init(struct cpufreq_policy *policy) -{ - struct device *cpu_dev = get_cpu_device(policy->cpu); - struct cpufreq_frequency_table *freq_tbl; - unsigned int pll_freq, freq; - int steps, i; - - pll_freq = clk_get_rate(cpufreq->pll_clk) / 1000; - - steps = 1 << DIV_CPU_WIDTH; - freq_tbl = kcalloc(steps, sizeof(*freq_tbl), GFP_KERNEL); - if (!freq_tbl) - return -ENOMEM; - - for (i = 0; i < (steps - 1); i++) { - freq = pll_freq / (i + 1); - if ((freq < cpufreq->min_freq) || (freq > cpufreq->max_freq)) - freq_tbl[i].frequency = CPUFREQ_ENTRY_INVALID; - else - freq_tbl[i].frequency = freq; - dev_dbg(cpu_dev, - "cpufreq table: index %d: frequency %d\n", i, - freq_tbl[i].frequency); - } - freq_tbl[i].frequency = CPUFREQ_TABLE_END; - - policy->clk = cpufreq->clk; - cpufreq_generic_init(policy, freq_tbl, 0); - - return 0; -} - -static int ls1x_cpufreq_exit(struct cpufreq_policy *policy) -{ - kfree(policy->freq_table); - return 0; -} - -static struct cpufreq_driver ls1x_cpufreq_driver = { - .name = "cpufreq-ls1x", - .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK, - .verify = cpufreq_generic_frequency_table_verify, - .target_index = ls1x_cpufreq_target, - .get = cpufreq_generic_get, - .init = ls1x_cpufreq_init, - .exit = ls1x_cpufreq_exit, - .attr = cpufreq_generic_attr, -}; - -static int ls1x_cpufreq_remove(struct platform_device *pdev) -{ - cpufreq_unregister_notifier(&ls1x_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - cpufreq_unregister_driver(&ls1x_cpufreq_driver); - - return 0; -} - -static int ls1x_cpufreq_probe(struct platform_device *pdev) -{ - struct plat_ls1x_cpufreq *pdata = dev_get_platdata(&pdev->dev); - struct clk *clk; - int ret; - - if (!pdata || !pdata->clk_name || !pdata->osc_clk_name) { - dev_err(&pdev->dev, "platform data missing\n"); - return -EINVAL; - } - - cpufreq = - devm_kzalloc(&pdev->dev, sizeof(struct ls1x_cpufreq), GFP_KERNEL); - if (!cpufreq) - return -ENOMEM; - - cpufreq->dev = &pdev->dev; - - clk = devm_clk_get(&pdev->dev, pdata->clk_name); - if (IS_ERR(clk)) { - dev_err(&pdev->dev, "unable to get %s clock\n", - pdata->clk_name); - return PTR_ERR(clk); - } - cpufreq->clk = clk; - - clk = clk_get_parent(clk); - if (IS_ERR(clk)) { - dev_err(&pdev->dev, "unable to get parent of %s clock\n", - __clk_get_name(cpufreq->clk)); - return PTR_ERR(clk); - } - cpufreq->mux_clk = clk; - - clk = clk_get_parent(clk); - if (IS_ERR(clk)) { - dev_err(&pdev->dev, "unable to get parent of %s clock\n", - __clk_get_name(cpufreq->mux_clk)); - return PTR_ERR(clk); - } - cpufreq->pll_clk = clk; - - clk = devm_clk_get(&pdev->dev, pdata->osc_clk_name); - if (IS_ERR(clk)) { - dev_err(&pdev->dev, "unable to get %s clock\n", - pdata->osc_clk_name); - return PTR_ERR(clk); - } - cpufreq->osc_clk = clk; - - cpufreq->max_freq = pdata->max_freq; - cpufreq->min_freq = pdata->min_freq; - - ret = cpufreq_register_driver(&ls1x_cpufreq_driver); - if (ret) { - dev_err(&pdev->dev, - "failed to register CPUFreq driver: %d\n", ret); - return ret; - } - - ret = cpufreq_register_notifier(&ls1x_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - - if (ret) { - dev_err(&pdev->dev, - "failed to register CPUFreq notifier: %d\n",ret); - cpufreq_unregister_driver(&ls1x_cpufreq_driver); - } - - return ret; -} - -static struct platform_driver ls1x_cpufreq_platdrv = { - .probe = ls1x_cpufreq_probe, - .remove = ls1x_cpufreq_remove, - .driver = { - .name = "ls1x-cpufreq", - }, -}; - -module_platform_driver(ls1x_cpufreq_platdrv); - -MODULE_ALIAS("platform:ls1x-cpufreq"); -MODULE_AUTHOR("Kelvin Cheung <keguang.zhang@gmail.com>"); -MODULE_DESCRIPTION("Loongson1 CPUFreq driver"); -MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index f80339779084..b22f5cc8a463 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -317,13 +317,16 @@ static int mtk_cpufreq_hw_driver_probe(struct platform_device *pdev) static int mtk_cpufreq_hw_driver_remove(struct platform_device *pdev) { - return cpufreq_unregister_driver(&cpufreq_mtk_hw_driver); + cpufreq_unregister_driver(&cpufreq_mtk_hw_driver); + + return 0; } static const struct of_device_id mtk_cpufreq_hw_match[] = { { .compatible = "mediatek,cpufreq-hw", .data = &cpufreq_mtk_offsets }, {} }; +MODULE_DEVICE_TABLE(of, mtk_cpufreq_hw_match); static struct platform_driver mtk_cpufreq_hw_driver = { .probe = mtk_cpufreq_hw_driver_probe, diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c index 1b50df06c6bc..81649a1969b6 100644 --- a/drivers/cpufreq/omap-cpufreq.c +++ b/drivers/cpufreq/omap-cpufreq.c @@ -184,7 +184,9 @@ static int omap_cpufreq_probe(struct platform_device *pdev) static int omap_cpufreq_remove(struct platform_device *pdev) { - return cpufreq_unregister_driver(&omap_driver); + cpufreq_unregister_driver(&omap_driver); + + return 0; } static struct platform_driver omap_cpufreq_platdrv = { diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index d3f55ca06ed3..2f581d2d617d 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -770,7 +770,9 @@ of_exit: static int qcom_cpufreq_hw_driver_remove(struct platform_device *pdev) { - return cpufreq_unregister_driver(&cpufreq_qcom_hw_driver); + cpufreq_unregister_driver(&cpufreq_qcom_hw_driver); + + return 0; } static struct platform_driver qcom_cpufreq_hw_driver = { diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c index 4596c3e323aa..5890e25d7f77 100644 --- a/drivers/cpufreq/tegra194-cpufreq.c +++ b/drivers/cpufreq/tegra194-cpufreq.c @@ -411,7 +411,8 @@ static int tegra194_cpufreq_set_target(struct cpufreq_policy *policy, static struct cpufreq_driver tegra194_cpufreq_driver = { .name = "tegra194", - .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_INITIAL_FREQ_CHECK, + .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_INITIAL_FREQ_CHECK | + CPUFREQ_IS_COOLING_DEV, .verify = cpufreq_generic_frequency_table_verify, .target_index = tegra194_cpufreq_set_target, .get = tegra194_get_speed, diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index ff71dd662880..cac5997dca50 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -74,6 +74,7 @@ endmenu config HALTPOLL_CPUIDLE tristate "Halt poll cpuidle driver" depends on X86 && KVM_GUEST + select CPU_IDLE_GOV_HALTPOLL default y help This option enables halt poll cpuidle driver, which allows to poll diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm index 747aa537389b..a1ee475d180d 100644 --- a/drivers/cpuidle/Kconfig.arm +++ b/drivers/cpuidle/Kconfig.arm @@ -24,6 +24,14 @@ config ARM_PSCI_CPUIDLE It provides an idle driver that is capable of detecting and managing idle states through the PSCI firmware interface. + The driver has limitations when used with PREEMPT_RT: + - If the idle states are described with the non-hierarchical layout, + all idle states are still available. + + - If the idle states are described with the hierarchical layout, + only the idle states defined per CPU are available, but not the ones + being shared among a group of CPUs (aka cluster idle states). + config ARM_PSCI_CPUIDLE_DOMAIN bool "PSCI CPU idle Domain" depends on ARM_PSCI_CPUIDLE @@ -102,6 +110,7 @@ config ARM_MVEBU_V7_CPUIDLE config ARM_TEGRA_CPUIDLE bool "CPU Idle Driver for NVIDIA Tegra SoCs" depends on (ARCH_TEGRA || COMPILE_TEST) && !ARM64 && MMU + depends on ARCH_SUSPEND_POSSIBLE select ARCH_NEEDS_CPU_IDLE_COUPLED if SMP select ARM_CPU_SUSPEND help @@ -110,6 +119,7 @@ config ARM_TEGRA_CPUIDLE config ARM_QCOM_SPM_CPUIDLE bool "CPU Idle Driver for Qualcomm Subsystem Power Manager (SPM)" depends on (ARCH_QCOM || COMPILE_TEST) && !ARM64 && MMU + depends on ARCH_SUSPEND_POSSIBLE select ARM_CPU_SUSPEND select CPU_IDLE_MULTIPLE_DRIVERS select DT_IDLE_STATES diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index 3a39a7f48b77..e66df22f9695 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -32,7 +32,7 @@ static int default_enter_idle(struct cpuidle_device *dev, local_irq_enable(); return index; } - default_idle(); + arch_cpu_idle(); return index; } diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c index c80cf9ddabd8..6ad2954948a5 100644 --- a/drivers/cpuidle/cpuidle-psci-domain.c +++ b/drivers/cpuidle/cpuidle-psci-domain.c @@ -64,8 +64,11 @@ static int psci_pd_init(struct device_node *np, bool use_osi) pd->flags |= GENPD_FLAG_IRQ_SAFE | GENPD_FLAG_CPU_DOMAIN; - /* Allow power off when OSI has been successfully enabled. */ - if (use_osi) + /* + * Allow power off when OSI has been successfully enabled. + * PREEMPT_RT is not yet ready to enter domain idle states. + */ + if (use_osi && !IS_ENABLED(CONFIG_PREEMPT_RT)) pd->power_off = psci_pd_power_off; else pd->flags |= GENPD_FLAG_ALWAYS_ON; diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c index 312a34ef28dc..6de027f9f6f5 100644 --- a/drivers/cpuidle/cpuidle-psci.c +++ b/drivers/cpuidle/cpuidle-psci.c @@ -222,6 +222,9 @@ static int psci_dt_cpu_init_topology(struct cpuidle_driver *drv, if (!psci_has_osi_support()) return 0; + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + return 0; + data->dev = psci_dt_attach_cpu(cpu); if (IS_ERR_OR_NULL(data->dev)) return PTR_ERR_OR_ZERO(data->dev); diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index f70aa17e2a8e..d9cda7f6ccb9 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -183,11 +183,15 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv) s->target_residency_ns = s->target_residency * NSEC_PER_USEC; else if (s->target_residency_ns < 0) s->target_residency_ns = 0; + else + s->target_residency = div_u64(s->target_residency_ns, NSEC_PER_USEC); if (s->exit_latency > 0) s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC; else if (s->exit_latency_ns < 0) s->exit_latency_ns = 0; + else + s->exit_latency = div_u64(s->exit_latency_ns, NSEC_PER_USEC); } } diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index d9262db79cae..987fc5f3997d 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -2,8 +2,13 @@ /* * Timer events oriented CPU idle governor * + * TEO governor: * Copyright (C) 2018 - 2021 Intel Corporation * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> + * + * Util-awareness mechanism: + * Copyright (C) 2022 Arm Ltd. + * Author: Kajetan Puchalski <kajetan.puchalski@arm.com> */ /** @@ -99,15 +104,56 @@ * select the given idle state instead of the candidate one. * * 3. By default, select the candidate state. + * + * Util-awareness mechanism: + * + * The idea behind the util-awareness extension is that there are two distinct + * scenarios for the CPU which should result in two different approaches to idle + * state selection - utilized and not utilized. + * + * In this case, 'utilized' means that the average runqueue util of the CPU is + * above a certain threshold. + * + * When the CPU is utilized while going into idle, more likely than not it will + * be woken up to do more work soon and so a shallower idle state should be + * selected to minimise latency and maximise performance. When the CPU is not + * being utilized, the usual metrics-based approach to selecting the deepest + * available idle state should be preferred to take advantage of the power + * saving. + * + * In order to achieve this, the governor uses a utilization threshold. + * The threshold is computed per-CPU as a percentage of the CPU's capacity + * by bit shifting the capacity value. Based on testing, the shift of 6 (~1.56%) + * seems to be getting the best results. + * + * Before selecting the next idle state, the governor compares the current CPU + * util to the precomputed util threshold. If it's below, it defaults to the + * TEO metrics mechanism. If it's above, the closest shallower idle state will + * be selected instead, as long as is not a polling state. */ #include <linux/cpuidle.h> #include <linux/jiffies.h> #include <linux/kernel.h> +#include <linux/sched.h> #include <linux/sched/clock.h> +#include <linux/sched/topology.h> #include <linux/tick.h> /* + * The number of bits to shift the CPU's capacity by in order to determine + * the utilized threshold. + * + * 6 was chosen based on testing as the number that achieved the best balance + * of power and performance on average. + * + * The resulting threshold is high enough to not be triggered by background + * noise and low enough to react quickly when activity starts to ramp up. + */ +#define UTIL_THRESHOLD_SHIFT 6 + + +/* * The PULSE value is added to metrics when they grow and the DECAY_SHIFT value * is used for decreasing metrics on a regular basis. */ @@ -137,9 +183,11 @@ struct teo_bin { * @time_span_ns: Time between idle state selection and post-wakeup update. * @sleep_length_ns: Time till the closest timer event (at the selection time). * @state_bins: Idle state data bins for this CPU. - * @total: Grand total of the "intercepts" and "hits" mertics for all bins. + * @total: Grand total of the "intercepts" and "hits" metrics for all bins. * @next_recent_idx: Index of the next @recent_idx entry to update. * @recent_idx: Indices of bins corresponding to recent "intercepts". + * @util_threshold: Threshold above which the CPU is considered utilized + * @utilized: Whether the last sleep on the CPU happened while utilized */ struct teo_cpu { s64 time_span_ns; @@ -148,11 +196,30 @@ struct teo_cpu { unsigned int total; int next_recent_idx; int recent_idx[NR_RECENT]; + unsigned long util_threshold; + bool utilized; }; static DEFINE_PER_CPU(struct teo_cpu, teo_cpus); /** + * teo_cpu_is_utilized - Check if the CPU's util is above the threshold + * @cpu: Target CPU + * @cpu_data: Governor CPU data for the target CPU + */ +#ifdef CONFIG_SMP +static bool teo_cpu_is_utilized(int cpu, struct teo_cpu *cpu_data) +{ + return sched_cpu_util(cpu) > cpu_data->util_threshold; +} +#else +static bool teo_cpu_is_utilized(int cpu, struct teo_cpu *cpu_data) +{ + return false; +} +#endif + +/** * teo_update - Update CPU metrics after wakeup. * @drv: cpuidle driver containing state data. * @dev: Target CPU. @@ -258,15 +325,17 @@ static s64 teo_middle_of_bin(int idx, struct cpuidle_driver *drv) * @dev: Target CPU. * @state_idx: Index of the capping idle state. * @duration_ns: Idle duration value to match. + * @no_poll: Don't consider polling states. */ static int teo_find_shallower_state(struct cpuidle_driver *drv, struct cpuidle_device *dev, int state_idx, - s64 duration_ns) + s64 duration_ns, bool no_poll) { int i; for (i = state_idx - 1; i >= 0; i--) { - if (dev->states_usage[i].disable) + if (dev->states_usage[i].disable || + (no_poll && drv->states[i].flags & CPUIDLE_FLAG_POLLING)) continue; state_idx = i; @@ -321,6 +390,22 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, goto end; } + cpu_data->utilized = teo_cpu_is_utilized(dev->cpu, cpu_data); + /* + * If the CPU is being utilized over the threshold and there are only 2 + * states to choose from, the metrics need not be considered, so choose + * the shallowest non-polling state and exit. + */ + if (drv->state_count < 3 && cpu_data->utilized) { + for (i = 0; i < drv->state_count; ++i) { + if (!dev->states_usage[i].disable && + !(drv->states[i].flags & CPUIDLE_FLAG_POLLING)) { + idx = i; + goto end; + } + } + } + /* * Find the deepest idle state whose target residency does not exceed * the current sleep length and the deepest idle state not deeper than @@ -452,6 +537,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, if (idx > constraint_idx) idx = constraint_idx; + /* + * If the CPU is being utilized over the threshold, choose a shallower + * non-polling state to improve latency + */ + if (cpu_data->utilized) + idx = teo_find_shallower_state(drv, dev, idx, duration_ns, true); + end: /* * Don't stop the tick if the selected state is a polling one or if the @@ -469,7 +561,7 @@ end: */ if (idx > idx0 && drv->states[idx].target_residency_ns > delta_tick) - idx = teo_find_shallower_state(drv, dev, idx, delta_tick); + idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false); } return idx; @@ -508,9 +600,11 @@ static int teo_enable_device(struct cpuidle_driver *drv, struct cpuidle_device *dev) { struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); + unsigned long max_capacity = arch_scale_cpu_capacity(dev->cpu); int i; memset(cpu_data, 0, sizeof(*cpu_data)); + cpu_data->util_threshold = max_capacity >> UTIL_THRESHOLD_SHIFT; for (i = 0; i < NR_RECENT; i++) cpu_data->recent_idx[i] = -1; diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 2b496a53cbca..48948b171749 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -200,7 +200,7 @@ static void cpuidle_sysfs_release(struct kobject *kobj) complete(&kdev->kobj_unregister); } -static struct kobj_type ktype_cpuidle = { +static const struct kobj_type ktype_cpuidle = { .sysfs_ops = &cpuidle_sysfs_ops, .release = cpuidle_sysfs_release, }; @@ -447,7 +447,7 @@ static void cpuidle_state_sysfs_release(struct kobject *kobj) complete(&state_obj->kobj_unregister); } -static struct kobj_type ktype_state_cpuidle = { +static const struct kobj_type ktype_state_cpuidle = { .sysfs_ops = &cpuidle_state_sysfs_ops, .default_groups = cpuidle_state_default_groups, .release = cpuidle_state_sysfs_release, @@ -594,7 +594,7 @@ static struct attribute *cpuidle_driver_default_attrs[] = { }; ATTRIBUTE_GROUPS(cpuidle_driver_default); -static struct kobj_type ktype_driver_cpuidle = { +static const struct kobj_type ktype_driver_cpuidle = { .sysfs_ops = &cpuidle_driver_sysfs_ops, .default_groups = cpuidle_driver_default_groups, .release = cpuidle_driver_sysfs_release, diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index e2d64a8f9422..938c17f25d94 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1424,6 +1424,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &idle_cpu_adl_n), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &idle_cpu_spr), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), @@ -1859,6 +1860,7 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) skx_idle_state_table_update(); break; case INTEL_FAM6_SAPPHIRERAPIDS_X: + case INTEL_FAM6_EMERALDRAPIDS_X: spr_idle_state_table_update(); break; case INTEL_FAM6_ALDERLAKE: diff --git a/drivers/opp/Kconfig b/drivers/opp/Kconfig index e8ce47b32735..d7c649a1a981 100644 --- a/drivers/opp/Kconfig +++ b/drivers/opp/Kconfig @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only config PM_OPP bool - select SRCU help SOCs have a standard set of tuples consisting of frequency and voltage pairs that the device will support per voltage domain. This diff --git a/drivers/opp/debugfs.c b/drivers/opp/debugfs.c index 96a30a032c5f..2c7fb683441e 100644 --- a/drivers/opp/debugfs.c +++ b/drivers/opp/debugfs.c @@ -235,7 +235,7 @@ static void opp_migrate_dentry(struct opp_device *opp_dev, dentry = debugfs_rename(rootdir, opp_dev->dentry, rootdir, opp_table->dentry_name); - if (!dentry) { + if (IS_ERR(dentry)) { dev_err(dev, "%s: Failed to rename link from: %s to %s\n", __func__, dev_name(opp_dev->dev), dev_name(dev)); return; diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c index fe86a09e3b67..c03b5402c03b 100644 --- a/drivers/powercap/idle_inject.c +++ b/drivers/powercap/idle_inject.c @@ -155,10 +155,12 @@ void idle_inject_set_duration(struct idle_inject_device *ii_dev, unsigned int run_duration_us, unsigned int idle_duration_us) { - if (run_duration_us && idle_duration_us) { + if (run_duration_us + idle_duration_us) { WRITE_ONCE(ii_dev->run_duration_us, run_duration_us); WRITE_ONCE(ii_dev->idle_duration_us, idle_duration_us); } + if (!run_duration_us) + pr_debug("CPU is forced to 100 percent idle\n"); } /** @@ -201,7 +203,7 @@ int idle_inject_start(struct idle_inject_device *ii_dev) unsigned int idle_duration_us = READ_ONCE(ii_dev->idle_duration_us); unsigned int run_duration_us = READ_ONCE(ii_dev->run_duration_us); - if (!idle_duration_us || !run_duration_us) + if (!(idle_duration_us + run_duration_us)) return -EINVAL; pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n", diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 26d00b1853b4..8970c7b80884 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -999,7 +999,15 @@ static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value, do_div(value, rp->time_unit); y = ilog2(value); - f = div64_u64(4 * (value - (1 << y)), 1 << y); + + /* + * The target hardware field is 7 bits wide, so return all ones + * if the exponent is too large. + */ + if (y > 0x1f) + return 0x7f; + + f = div64_u64(4 * (value - (1ULL << y)), 1ULL << y); value = (y & 0x1f) | ((f & 0x3) << 5); } return value; @@ -1113,7 +1121,10 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &rapl_defaults_spr_server), X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &rapl_defaults_byt), diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c index 1f968353d479..e180dee0f83d 100644 --- a/drivers/powercap/powercap_sys.c +++ b/drivers/powercap/powercap_sys.c @@ -530,9 +530,6 @@ struct powercap_zone *powercap_register_zone( power_zone->name = kstrdup(name, GFP_KERNEL); if (!power_zone->name) goto err_name_alloc; - dev_set_name(&power_zone->dev, "%s:%x", - dev_name(power_zone->dev.parent), - power_zone->id); power_zone->constraints = kcalloc(nr_constraints, sizeof(*power_zone->constraints), GFP_KERNEL); @@ -555,9 +552,16 @@ struct powercap_zone *powercap_register_zone( power_zone->dev_attr_groups[0] = &power_zone->dev_zone_attr_group; power_zone->dev_attr_groups[1] = NULL; power_zone->dev.groups = power_zone->dev_attr_groups; + dev_set_name(&power_zone->dev, "%s:%x", + dev_name(power_zone->dev.parent), + power_zone->id); result = device_register(&power_zone->dev); - if (result) - goto err_dev_ret; + if (result) { + put_device(&power_zone->dev); + mutex_unlock(&control_type->lock); + + return ERR_PTR(result); + } control_type->nr_zones++; mutex_unlock(&control_type->lock); |