From 70b7ace397e9a5f4437fef40e8bdbab87997bb2f Mon Sep 17 00:00:00 2001 From: Naga Chumbalkar Date: Mon, 29 Jun 2009 19:53:41 +0000 Subject: [CPUFREQ] update Doc for cpuinfo_cur_freq and scaling_cur_freq I think the way "cpuinfo_cur_info" and "scaling_cur_info" are defined under ./Documentation/cpu-freq/user-guide.txt can be enhanced. Currently, they are both defined the same way: "Current speed/frequency" of the CPU, in KHz". Below is a patch that distinguishes one from the other. Regards, - naga - ----------------------------------------- Update description for "cpuinfo_cur_freq" and "scaling_cur_freq". Some of the wording is drawn from comments found in ./drivers/cpufreq/cpufreq.c: cpufreq_out_of_sync(): * @old_freq: CPU frequency the kernel thinks the CPU runs at * @new_freq: CPU frequency the CPU actually runs at Signed-off-by: Naga Chumbalkar Signed-off-by: Dave Jones --- Documentation/cpu-freq/user-guide.txt | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt index 5d5f5fadd1c2..2a5b850847c0 100644 --- a/Documentation/cpu-freq/user-guide.txt +++ b/Documentation/cpu-freq/user-guide.txt @@ -176,7 +176,9 @@ scaling_governor, and by "echoing" the name of another work on some specific architectures or processors. -cpuinfo_cur_freq : Current speed of the CPU, in KHz. +cpuinfo_cur_freq : Current frequency of the CPU as obtained from + the hardware, in KHz. This is the frequency + the CPU actually runs at. scaling_available_frequencies : List of available frequencies, in KHz. @@ -196,7 +198,10 @@ related_cpus : List of CPUs that need some sort of frequency scaling_driver : Hardware driver for cpufreq. -scaling_cur_freq : Current frequency of the CPU, in KHz. +scaling_cur_freq : Current frequency of the CPU as determined by + the governor and cpufreq core, in KHz. This is + the frequency the kernel thinks the CPU runs + at. If you have selected the "userspace" governor which allows you to set the CPU operating frequency to a specific value, you can read out -- cgit v1.2.3 From 6ffaaa012021823a38f3da3e24bd9f6d25a5b694 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 8 Jul 2009 16:28:05 -0400 Subject: [CPUFREQ] Reduce scope of cpu_sys_dev in cpufreq_add_dev Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index fd69086d08d5..d5241b64807c 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -773,7 +773,6 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) struct cpufreq_policy new_policy; struct cpufreq_policy *policy; struct freq_attr **drv_attr; - struct sys_device *cpu_sys_dev; unsigned long flags; unsigned int j; @@ -936,6 +935,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) /* symlink affected CPUs */ for_each_cpu(j, policy->cpus) { struct cpufreq_policy *managed_policy; + struct sys_device *cpu_sys_dev; if (j == cpu) continue; -- cgit v1.2.3 From 9a9527227bb763a57a7770e5159eb67481c89194 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 8 Jul 2009 16:30:03 -0400 Subject: [CPUFREQ] cleanup up -ENOMEM handling in cpufreq_add_dev Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d5241b64807c..e4b4ab4b8a17 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -798,19 +798,16 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) goto module_out; } + ret = -ENOMEM; policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL); - if (!policy) { - ret = -ENOMEM; + if (!policy) goto nomem_out; - } - if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL)) { - ret = -ENOMEM; + + if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL)) goto err_free_policy; - } - if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) { - ret = -ENOMEM; + + if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) goto err_free_cpumask; - } policy->cpu = cpu; cpumask_copy(policy->cpus, cpumask_of(cpu)); -- cgit v1.2.3 From aa28df7c1b3c701bed813b5cfa3d2744d05e7ab0 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 8 Jul 2009 17:35:39 -0400 Subject: [CPUFREQ] Factor out symlink creation from cpufreq_add_dev Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 51 ++++++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e4b4ab4b8a17..ade15879e725 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -756,6 +756,34 @@ static struct kobj_type ktype_cpufreq = { .release = cpufreq_sysfs_release, }; +/* symlink affected CPUs */ +int cpufreq_add_dev_symlink(unsigned int cpu, struct cpufreq_policy *policy) +{ + unsigned int j; + int ret = 0; + + for_each_cpu(j, policy->cpus) { + struct cpufreq_policy *managed_policy; + struct sys_device *cpu_sys_dev; + + if (j == cpu) + continue; + if (!cpu_online(j)) + continue; + + dprintk("CPU %u already managed, adding link\n", j); + managed_policy = cpufreq_cpu_get(cpu); + cpu_sys_dev = get_cpu_sysdev(j); + ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj, + "cpufreq"); + if (ret) { + cpufreq_cpu_put(managed_policy); + return ret; + } + } + return ret; +} + /** * cpufreq_add_dev - add a CPU device @@ -929,26 +957,9 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) } spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - /* symlink affected CPUs */ - for_each_cpu(j, policy->cpus) { - struct cpufreq_policy *managed_policy; - struct sys_device *cpu_sys_dev; - - if (j == cpu) - continue; - if (!cpu_online(j)) - continue; - - dprintk("CPU %u already managed, adding link\n", j); - managed_policy = cpufreq_cpu_get(cpu); - cpu_sys_dev = get_cpu_sysdev(j); - ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj, - "cpufreq"); - if (ret) { - cpufreq_cpu_put(managed_policy); - goto err_out_unregister; - } - } + ret = cpufreq_add_dev_symlink(cpu, policy->cpus, policy); + if (ret) + goto err_out_unregister; policy->governor = NULL; /* to assure that the starting sequence is * run in cpufreq_set_policy */ -- cgit v1.2.3 From 90b92316a175af17bdebc9f2aa170f51e22be2c7 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 8 Jul 2009 18:05:42 -0400 Subject: [CPUFREQ] Factor out interface creation from cpufreq_add_dev Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 89 +++++++++++++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 37 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index ade15879e725..5d8109d52f64 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -784,6 +784,57 @@ int cpufreq_add_dev_symlink(unsigned int cpu, struct cpufreq_policy *policy) return ret; } +int cpufreq_add_dev_interface(unsigned int cpu, struct cpufreq_policy *policy, + struct sys_device *sys_dev) +{ + struct freq_attr **drv_attr; + unsigned long flags; + int ret = 0; + unsigned int j; + + /* prepare interface data */ + ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, + &sys_dev->kobj, "cpufreq"); + if (ret) + return ret; + + /* set up files for this cpu device */ + drv_attr = cpufreq_driver->attr; + while ((drv_attr) && (*drv_attr)) { + ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr)); + if (ret) + goto err_out_kobj_put; + drv_attr++; + } + if (cpufreq_driver->get) { + ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr); + if (ret) + goto err_out_kobj_put; + } + if (cpufreq_driver->target) { + ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); + if (ret) + goto err_out_kobj_put; + } + + spin_lock_irqsave(&cpufreq_driver_lock, flags); + for_each_cpu(j, policy->cpus) { + if (!cpu_online(j)) + continue; + per_cpu(cpufreq_cpu_data, j) = policy; + per_cpu(policy_cpu, j) = policy->cpu; + } + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + + ret = cpufreq_add_dev_symlink(cpu, policy); + return ret; + +err_out_kobj_put: + kobject_put(&policy->kobj); + wait_for_completion(&policy->kobj_unregister); + return ret; +} + /** * cpufreq_add_dev - add a CPU device @@ -800,7 +851,6 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) int ret = 0; struct cpufreq_policy new_policy; struct cpufreq_policy *policy; - struct freq_attr **drv_attr; unsigned long flags; unsigned int j; @@ -923,41 +973,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) #endif memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); - /* prepare interface data */ - ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &sys_dev->kobj, - "cpufreq"); - if (ret) - goto out_driver_exit; - - /* set up files for this cpu device */ - drv_attr = cpufreq_driver->attr; - while ((drv_attr) && (*drv_attr)) { - ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr)); - if (ret) - goto err_out_kobj_put; - drv_attr++; - } - if (cpufreq_driver->get) { - ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr); - if (ret) - goto err_out_kobj_put; - } - if (cpufreq_driver->target) { - ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); - if (ret) - goto err_out_kobj_put; - } - - spin_lock_irqsave(&cpufreq_driver_lock, flags); - for_each_cpu(j, policy->cpus) { - if (!cpu_online(j)) - continue; - per_cpu(cpufreq_cpu_data, j) = policy; - per_cpu(policy_cpu, j) = policy->cpu; - } - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - - ret = cpufreq_add_dev_symlink(cpu, policy->cpus, policy); + ret = cpufreq_add_dev_interface(cpu, policy, sys_dev); if (ret) goto err_out_unregister; @@ -990,7 +1006,6 @@ err_out_unregister: per_cpu(cpufreq_cpu_data, j) = NULL; spin_unlock_irqrestore(&cpufreq_driver_lock, flags); -err_out_kobj_put: kobject_put(&policy->kobj); wait_for_completion(&policy->kobj_unregister); -- cgit v1.2.3 From 332ed7dd4b64e57c3cabe3127a0bd4a416bf4f10 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 8 Jul 2009 18:48:47 -0400 Subject: [CPUFREQ] Factor out policy setting from cpufreq_add_dev Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 166 +++++++++++++++++++++++++--------------------- 1 file changed, 90 insertions(+), 76 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 5d8109d52f64..7d8a7b11980b 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -756,6 +756,75 @@ static struct kobj_type ktype_cpufreq = { .release = cpufreq_sysfs_release, }; + +int cpufreq_add_dev_policy(unsigned int cpu, struct cpufreq_policy *policy, + struct sys_device *sys_dev) +{ + int ret = 0; +#ifdef CONFIG_SMP + unsigned long flags; + unsigned int j; + +#ifdef CONFIG_HOTPLUG_CPU + if (per_cpu(cpufreq_cpu_governor, cpu)) { + policy->governor = per_cpu(cpufreq_cpu_governor, cpu); + dprintk("Restoring governor %s for cpu %d\n", + policy->governor->name, cpu); + } +#endif + + for_each_cpu(j, policy->cpus) { + struct cpufreq_policy *managed_policy; + + if (cpu == j) + continue; + + /* Check for existing affected CPUs. + * They may not be aware of it due to CPU Hotplug. + * cpufreq_cpu_put is called when the device is removed + * in __cpufreq_remove_dev() + */ + managed_policy = cpufreq_cpu_get(j); + if (unlikely(managed_policy)) { + + /* Set proper policy_cpu */ + unlock_policy_rwsem_write(cpu); + per_cpu(policy_cpu, cpu) = managed_policy->cpu; + + if (lock_policy_rwsem_write(cpu) < 0) { + /* Should not go through policy unlock path */ + if (cpufreq_driver->exit) + cpufreq_driver->exit(policy); + cpufreq_cpu_put(managed_policy); + return -EBUSY; + } + + spin_lock_irqsave(&cpufreq_driver_lock, flags); + cpumask_copy(managed_policy->cpus, policy->cpus); + per_cpu(cpufreq_cpu_data, cpu) = managed_policy; + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + + dprintk("CPU already managed, adding link\n"); + ret = sysfs_create_link(&sys_dev->kobj, + &managed_policy->kobj, + "cpufreq"); + if (ret) + cpufreq_cpu_put(managed_policy); + /* + * Success. We only needed to be added to the mask. + * Call driver->exit() because only the cpu parent of + * the kobj needed to call init(). + */ + if (cpufreq_driver->exit) + cpufreq_driver->exit(policy); + return ret; + } + } +#endif + return ret; +} + + /* symlink affected CPUs */ int cpufreq_add_dev_symlink(unsigned int cpu, struct cpufreq_policy *policy) { @@ -787,6 +856,7 @@ int cpufreq_add_dev_symlink(unsigned int cpu, struct cpufreq_policy *policy) int cpufreq_add_dev_interface(unsigned int cpu, struct cpufreq_policy *policy, struct sys_device *sys_dev) { + struct cpufreq_policy new_policy; struct freq_attr **drv_attr; unsigned long flags; int ret = 0; @@ -827,6 +897,23 @@ int cpufreq_add_dev_interface(unsigned int cpu, struct cpufreq_policy *policy, spin_unlock_irqrestore(&cpufreq_driver_lock, flags); ret = cpufreq_add_dev_symlink(cpu, policy); + if (ret) + goto err_out_kobj_put; + + memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); + /* assure that the starting sequence is run in __cpufreq_set_policy */ + policy->governor = NULL; + + /* set default policy */ + ret = __cpufreq_set_policy(policy, &new_policy); + policy->user_policy.policy = policy->policy; + policy->user_policy.governor = policy->governor; + + if (ret) { + dprintk("setting policy failed\n"); + if (cpufreq_driver->exit) + cpufreq_driver->exit(policy); + } return ret; err_out_kobj_put: @@ -849,7 +936,6 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) { unsigned int cpu = sys_dev->id; int ret = 0; - struct cpufreq_policy new_policy; struct cpufreq_policy *policy; unsigned long flags; unsigned int j; @@ -914,82 +1000,14 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_START, policy); -#ifdef CONFIG_SMP - -#ifdef CONFIG_HOTPLUG_CPU - if (per_cpu(cpufreq_cpu_governor, cpu)) { - policy->governor = per_cpu(cpufreq_cpu_governor, cpu); - dprintk("Restoring governor %s for cpu %d\n", - policy->governor->name, cpu); - } -#endif - - for_each_cpu(j, policy->cpus) { - struct cpufreq_policy *managed_policy; - - if (cpu == j) - continue; - - /* Check for existing affected CPUs. - * They may not be aware of it due to CPU Hotplug. - * cpufreq_cpu_put is called when the device is removed - * in __cpufreq_remove_dev() - */ - managed_policy = cpufreq_cpu_get(j); - if (unlikely(managed_policy)) { - - /* Set proper policy_cpu */ - unlock_policy_rwsem_write(cpu); - per_cpu(policy_cpu, cpu) = managed_policy->cpu; - - if (lock_policy_rwsem_write(cpu) < 0) { - /* Should not go through policy unlock path */ - if (cpufreq_driver->exit) - cpufreq_driver->exit(policy); - ret = -EBUSY; - cpufreq_cpu_put(managed_policy); - goto err_free_cpumask; - } - - spin_lock_irqsave(&cpufreq_driver_lock, flags); - cpumask_copy(managed_policy->cpus, policy->cpus); - per_cpu(cpufreq_cpu_data, cpu) = managed_policy; - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - - dprintk("CPU already managed, adding link\n"); - ret = sysfs_create_link(&sys_dev->kobj, - &managed_policy->kobj, - "cpufreq"); - if (ret) - cpufreq_cpu_put(managed_policy); - /* - * Success. We only needed to be added to the mask. - * Call driver->exit() because only the cpu parent of - * the kobj needed to call init(). - */ - goto out_driver_exit; /* call driver->exit() */ - } - } -#endif - memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); + ret = cpufreq_add_dev_policy(cpu, policy, sys_dev); + if (ret) + goto err_unlock_policy; ret = cpufreq_add_dev_interface(cpu, policy, sys_dev); if (ret) goto err_out_unregister; - policy->governor = NULL; /* to assure that the starting sequence is - * run in cpufreq_set_policy */ - - /* set default policy */ - ret = __cpufreq_set_policy(policy, &new_policy); - policy->user_policy.policy = policy->policy; - policy->user_policy.governor = policy->governor; - - if (ret) { - dprintk("setting policy failed\n"); - goto err_out_unregister; - } - unlock_policy_rwsem_write(cpu); kobject_uevent(&policy->kobj, KOBJ_ADD); @@ -1009,10 +1027,6 @@ err_out_unregister: kobject_put(&policy->kobj); wait_for_completion(&policy->kobj_unregister); -out_driver_exit: - if (cpufreq_driver->exit) - cpufreq_driver->exit(policy); - err_unlock_policy: unlock_policy_rwsem_write(cpu); err_free_cpumask: -- cgit v1.2.3 From 9147d2452311a45924d49a787ac8d835e646c8ff Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 24 Jul 2009 15:25:03 +0200 Subject: [CPUFREQ] Bail out of cpufreq_add_dev if the link for a managed CPU got created Doing: echo 0 >cpu1/online echo 1 >cpu1/online on a managed CPU will result in: Jul 22 15:15:37 linux kernel: [ 80.013864] WARNING: at fs/sysfs/dir.c:487 sysfs_add_one+0xcf/0xe6() Jul 22 15:15:37 linux kernel: [ 80.013866] Hardware name: To Be Filled By O.E.M. Jul 22 15:15:37 linux kernel: [ 80.013868] sysfs: cannot create duplicate filename '/devices/system/cpu/cpu1/cpufreq' Jul 22 15:15:37 linux kernel: [ 80.013870] Modules linked in: powernow_k8 Jul 22 15:15:37 linux kernel: [ 80.013874] Pid: 5750, comm: bash Not tainted 2.6.31-rc2 #40 Jul 22 15:15:37 linux kernel: [ 80.013876] Call Trace: Jul 22 15:15:37 linux kernel: [ 80.013879] [] ? sysfs_add_one+0xcf/0xe6 Jul 22 15:15:37 linux kernel: [ 80.013884] [] warn_slowpath_common+0x77/0xa4 Jul 22 15:15:37 linux kernel: [ 80.013888] [] warn_slowpath_fmt+0x3c/0x3e Jul 22 15:15:37 linux kernel: [ 80.013891] [] sysfs_add_one+0xcf/0xe6 Jul 22 15:15:37 linux kernel: [ 80.013894] [] create_dir+0x58/0x87 Jul 22 15:15:37 linux kernel: [ 80.013898] [] sysfs_create_dir+0x38/0x4f Jul 22 15:15:37 linux kernel: [ 80.013902] [] kobject_add_internal+0x11f/0x1de Jul 22 15:15:37 linux kernel: [ 80.013905] [] kobject_add_varg+0x41/0x4e Jul 22 15:15:37 linux kernel: [ 80.013908] [] kobject_init_and_add+0x4c/0x57 Jul 22 15:15:37 linux kernel: [ 80.013913] [] ? mark_lock+0x22/0x228 Jul 22 15:15:37 linux kernel: [ 80.013918] [] cpufreq_add_dev_interface+0x40/0x1e4 ... This bug slipped in by git commit: 150b06f7f223cfd0f808737a5243cceca8ea47fa When splitting up cpufreq_add_dev, the whole cpufreq_add_dev function is not left anymore, only cpufreq_add_dev_policy. This patch should reconstruct the identical functionality again as it was before the split. CC: Venkatesh Pallipadi Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 7d8a7b11980b..cd1c3532f02c 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -756,7 +756,12 @@ static struct kobj_type ktype_cpufreq = { .release = cpufreq_sysfs_release, }; - +/* + * Returns: + * Negative: Failure + * 0: Success + * Positive: When we have a managed CPU and the sysfs got symlinked + */ int cpufreq_add_dev_policy(unsigned int cpu, struct cpufreq_policy *policy, struct sys_device *sys_dev) { @@ -817,7 +822,11 @@ int cpufreq_add_dev_policy(unsigned int cpu, struct cpufreq_policy *policy, */ if (cpufreq_driver->exit) cpufreq_driver->exit(policy); - return ret; + + if (!ret) + return 1; + else + return ret; } } #endif @@ -1001,8 +1010,13 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) CPUFREQ_START, policy); ret = cpufreq_add_dev_policy(cpu, policy, sys_dev); - if (ret) + if (ret) { + if (ret > 0) + /* This is a managed cpu, symlink created, + exit with 0 */ + ret = 0; goto err_unlock_policy; + } ret = cpufreq_add_dev_interface(cpu, policy, sys_dev); if (ret) -- cgit v1.2.3 From 4438a02d6fcba5328028b6e3e70fed24ac3969b2 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 24 Jul 2009 15:25:04 +0200 Subject: [CPUFREQ] Cleanup governor struct, remove userspace specific set_speed functions The store/show _setspeed functions in the general cpufreq governor struct are only used by the userspace governor. - Remove them from the struct - Move the sysfs set_speed store/show functions from cpufreq.c to cpufreq_userspace.c - Create and destroy /sys/../cpuX/cpufreq/scaling_setspeed inside the userspace governor instead of always setting it up in the cpufreq core This will result in a slightly modified sysfs behavior: Before, the scaling_setspeed always existed in the /sys/../cpuX/cpufreq directory. If the userspace governor was not active, reads resulted in output and writes failed with -EINVAL. Goal: Cleanup interface, make the governors core independent. Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 30 ------------------------------ drivers/cpufreq/cpufreq_userspace.c | 34 ++++++++++++++++++++++++++++++++-- include/linux/cpufreq.h | 4 ---- 3 files changed, 32 insertions(+), 36 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index cd1c3532f02c..b8b4807f23b2 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -619,32 +619,6 @@ static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf) return show_cpus(policy->cpus, buf); } -static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy, - const char *buf, size_t count) -{ - unsigned int freq = 0; - unsigned int ret; - - if (!policy->governor || !policy->governor->store_setspeed) - return -EINVAL; - - ret = sscanf(buf, "%u", &freq); - if (ret != 1) - return -EINVAL; - - policy->governor->store_setspeed(policy, freq); - - return count; -} - -static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf) -{ - if (!policy->governor || !policy->governor->show_setspeed) - return sprintf(buf, "\n"); - - return policy->governor->show_setspeed(policy, buf); -} - #define define_one_ro(_name) \ static struct freq_attr _name = \ __ATTR(_name, 0444, show_##_name, NULL) @@ -669,7 +643,6 @@ define_one_ro(affected_cpus); define_one_rw(scaling_min_freq); define_one_rw(scaling_max_freq); define_one_rw(scaling_governor); -define_one_rw(scaling_setspeed); static struct attribute *default_attrs[] = { &cpuinfo_min_freq.attr, @@ -682,7 +655,6 @@ static struct attribute *default_attrs[] = { &scaling_governor.attr, &scaling_driver.attr, &scaling_available_governors.attr, - &scaling_setspeed.attr, NULL }; @@ -1658,7 +1630,6 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, return ret; } - int cpufreq_register_governor(struct cpufreq_governor *governor) { int err; @@ -1693,7 +1664,6 @@ void cpufreq_unregister_governor(struct cpufreq_governor *governor) EXPORT_SYMBOL_GPL(cpufreq_unregister_governor); - /********************************************************************* * POLICY INTERFACE * *********************************************************************/ diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index 66d2d1d6c80f..b41f1a69df84 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c @@ -40,6 +40,33 @@ static int cpus_using_userspace_governor; #define dprintk(msg...) \ cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "userspace", msg) +static ssize_t show_speed(struct cpufreq_policy *policy, char *buf); +static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq); + +static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy, + const char *buf, size_t count) +{ + unsigned int freq = 0; + unsigned int ret; + + ret = sscanf(buf, "%u", &freq); + if (ret != 1) + return -EINVAL; + + cpufreq_set(policy, freq); + + return count; +} + +static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf) +{ + return show_speed(policy, buf); +} + +static struct freq_attr scaling_setspeed = + __ATTR(scaling_setspeed, 0644, show_scaling_setspeed, + store_scaling_setspeed); + /* keep track of frequency transitions */ static int userspace_cpufreq_notifier(struct notifier_block *nb, unsigned long val, @@ -120,6 +147,10 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy, if (!cpu_online(cpu)) return -EINVAL; BUG_ON(!policy->cur); + rc = sysfs_create_file(&policy->kobj, &scaling_setspeed.attr); + if (rc) + return rc; + mutex_lock(&userspace_mutex); if (cpus_using_userspace_governor == 0) { @@ -144,6 +175,7 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy, mutex_unlock(&userspace_mutex); break; case CPUFREQ_GOV_STOP: + sysfs_remove_file(&policy->kobj, &scaling_setspeed.attr); mutex_lock(&userspace_mutex); cpus_using_userspace_governor--; if (cpus_using_userspace_governor == 0) { @@ -193,8 +225,6 @@ static struct cpufreq_governor cpufreq_gov_userspace = { .name = "userspace", .governor = cpufreq_governor_userspace, - .store_setspeed = cpufreq_set, - .show_setspeed = show_speed, .owner = THIS_MODULE, }; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 161042746afc..642141aa0e7f 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -166,10 +166,6 @@ struct cpufreq_governor { char name[CPUFREQ_NAME_LEN]; int (*governor) (struct cpufreq_policy *policy, unsigned int event); - ssize_t (*show_setspeed) (struct cpufreq_policy *policy, - char *buf); - int (*store_setspeed) (struct cpufreq_policy *policy, - unsigned int freq); unsigned int max_transition_latency; /* HW must be able to switch to next freq faster than this value in nano secs or we will fallback to performance governor */ -- cgit v1.2.3 From 96b5230382442cdcdb826aeb661cbfcb00999219 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 24 Jul 2009 15:25:05 +0200 Subject: [CPUFREQ] Introduce global, not per core: /sys/devices/system/cpu/cpufreq Currently everything in the cpufreq layer is per core based. This does not reflect reality, for example ondemand on conservative governors have global sysfs variables. Introduce a global cpufreq directory and add the kobject to the governor struct, so that governors can easily access it. The directory is initialized in the cpufreq_core_init initcall and thus will always be created if cpufreq is compiled in, even if no cpufreq driver is active later. Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 9 ++++++++- include/linux/cpufreq.h | 10 ++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b8b4807f23b2..56b338c25716 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -658,6 +658,9 @@ static struct attribute *default_attrs[] = { NULL }; +struct kobject *cpufreq_global_kobject; +EXPORT_SYMBOL(cpufreq_global_kobject); + #define to_policy(k) container_of(k, struct cpufreq_policy, kobj) #define to_attr(a) container_of(a, struct freq_attr, attr) @@ -1986,7 +1989,11 @@ static int __init cpufreq_core_init(void) per_cpu(policy_cpu, cpu) = -1; init_rwsem(&per_cpu(cpu_policy_rwsem, cpu)); } + + cpufreq_global_kobject = kobject_create_and_add("cpufreq", + &cpu_sysdev_class.kset.kobj); + BUG_ON(!cpufreq_global_kobject); + return 0; } - core_initcall(cpufreq_core_init); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 642141aa0e7f..336017aed597 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -65,6 +65,9 @@ static inline int cpufreq_unregister_notifier(struct notifier_block *nb, struct cpufreq_governor; +/* /sys/devices/system/cpu/cpufreq: entry point for global variables */ +extern struct kobject *cpufreq_global_kobject; + #define CPUFREQ_ETERNAL (-1) struct cpufreq_cpuinfo { unsigned int max_freq; @@ -270,6 +273,13 @@ struct freq_attr { ssize_t (*store)(struct cpufreq_policy *, const char *, size_t count); }; +struct global_attr { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, + struct attribute *attr, char *buf); + ssize_t (*store)(struct kobject *a, struct attribute *b, + const char *c, size_t count); +}; /********************************************************************* * CPUFREQ 2.6. INTERFACE * -- cgit v1.2.3 From f71421ab7b558e747001631ca232795b1d3a10bb Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 24 Jul 2009 15:25:06 +0200 Subject: [CPUFREQ] ondemand - Use global sysfs dir for tuning settings Ondemand has only global variables for userspace tunings via sysfs. But they were exposed per CPU which wrongly implies to the user that his settings are applied per cpu. Also locking sysfs against concurrent access won't be necessary anymore after deprecation time. This means the ondemand config dir is moved: /sys/devices/system/cpu/cpu*/cpufreq/ondemand -> /sys/devices/system/cpu/cpufreq/ondemand The old files will still exist, but reading or writing to them will result in one (printk_once) deprecation msg to syslog per file. Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_ondemand.c | 139 ++++++++++++++++++++++++++++++------- 1 file changed, 113 insertions(+), 26 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index d6ba14276bb1..1a3e5c7252ff 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -55,6 +55,18 @@ static unsigned int min_sampling_rate; #define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) static void do_dbs_timer(struct work_struct *work); +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event); + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND +static +#endif +struct cpufreq_governor cpufreq_gov_ondemand = { + .name = "ondemand", + .governor = cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, +}; /* Sampling types */ enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; @@ -206,20 +218,23 @@ static void ondemand_powersave_bias_init(void) } /************************** sysfs interface ************************/ -static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) + +static ssize_t show_sampling_rate_max(struct kobject *kobj, + struct attribute *attr, char *buf) { printk_once(KERN_INFO "CPUFREQ: ondemand sampling_rate_max " "sysfs file is deprecated - used by: %s\n", current->comm); return sprintf(buf, "%u\n", -1U); } -static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) +static ssize_t show_sampling_rate_min(struct kobject *kobj, + struct attribute *attr, char *buf) { return sprintf(buf, "%u\n", min_sampling_rate); } #define define_one_ro(_name) \ -static struct freq_attr _name = \ +static struct global_attr _name = \ __ATTR(_name, 0444, show_##_name, NULL) define_one_ro(sampling_rate_max); @@ -228,7 +243,7 @@ define_one_ro(sampling_rate_min); /* cpufreq_ondemand Governor Tunables */ #define show_one(file_name, object) \ static ssize_t show_##file_name \ -(struct cpufreq_policy *unused, char *buf) \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ { \ return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ } @@ -237,8 +252,38 @@ show_one(up_threshold, up_threshold); show_one(ignore_nice_load, ignore_nice); show_one(powersave_bias, powersave_bias); -static ssize_t store_sampling_rate(struct cpufreq_policy *unused, - const char *buf, size_t count) +/*** delete after deprecation time ***/ + +#define DEPRECATION_MSG(file_name) \ + printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs " \ + "interface is deprecated - " #file_name "\n"); + +#define show_one_old(file_name) \ +static ssize_t show_##file_name##_old \ +(struct cpufreq_policy *unused, char *buf) \ +{ \ + printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs " \ + "interface is deprecated - " #file_name "\n"); \ + return show_##file_name(NULL, NULL, buf); \ +} +show_one_old(sampling_rate); +show_one_old(up_threshold); +show_one_old(ignore_nice_load); +show_one_old(powersave_bias); +show_one_old(sampling_rate_min); +show_one_old(sampling_rate_max); + +#define define_one_ro_old(object, _name) \ +static struct freq_attr object = \ +__ATTR(_name, 0444, show_##_name##_old, NULL) + +define_one_ro_old(sampling_rate_min_old, sampling_rate_min); +define_one_ro_old(sampling_rate_max_old, sampling_rate_max); + +/*** delete after deprecation time ***/ + +static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) { unsigned int input; int ret; @@ -253,8 +298,8 @@ static ssize_t store_sampling_rate(struct cpufreq_policy *unused, return count; } -static ssize_t store_up_threshold(struct cpufreq_policy *unused, - const char *buf, size_t count) +static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, + const char *buf, size_t count) { unsigned int input; int ret; @@ -272,8 +317,8 @@ static ssize_t store_up_threshold(struct cpufreq_policy *unused, return count; } -static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, - const char *buf, size_t count) +static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, + const char *buf, size_t count) { unsigned int input; int ret; @@ -309,8 +354,8 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, return count; } -static ssize_t store_powersave_bias(struct cpufreq_policy *unused, - const char *buf, size_t count) +static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b, + const char *buf, size_t count) { unsigned int input; int ret; @@ -331,7 +376,7 @@ static ssize_t store_powersave_bias(struct cpufreq_policy *unused, } #define define_one_rw(_name) \ -static struct freq_attr _name = \ +static struct global_attr _name = \ __ATTR(_name, 0644, show_##_name, store_##_name) define_one_rw(sampling_rate); @@ -354,6 +399,47 @@ static struct attribute_group dbs_attr_group = { .name = "ondemand", }; +/*** delete after deprecation time ***/ + +#define write_one_old(file_name) \ +static ssize_t store_##file_name##_old \ +(struct cpufreq_policy *unused, const char *buf, size_t count) \ +{ \ + printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs " \ + "interface is deprecated - " #file_name "\n"); \ + return store_##file_name(NULL, NULL, buf, count); \ +} +write_one_old(sampling_rate); +write_one_old(up_threshold); +write_one_old(ignore_nice_load); +write_one_old(powersave_bias); + +#define define_one_rw_old(object, _name) \ +static struct freq_attr object = \ +__ATTR(_name, 0644, show_##_name##_old, store_##_name##_old) + +define_one_rw_old(sampling_rate_old, sampling_rate); +define_one_rw_old(up_threshold_old, up_threshold); +define_one_rw_old(ignore_nice_load_old, ignore_nice_load); +define_one_rw_old(powersave_bias_old, powersave_bias); + +static struct attribute *dbs_attributes_old[] = { + &sampling_rate_max_old.attr, + &sampling_rate_min_old.attr, + &sampling_rate_old.attr, + &up_threshold_old.attr, + &ignore_nice_load_old.attr, + &powersave_bias_old.attr, + NULL +}; + +static struct attribute_group dbs_attr_group_old = { + .attrs = dbs_attributes_old, + .name = "ondemand", +}; + +/*** delete after deprecation time ***/ + /************************** sysfs end ************************/ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) @@ -544,7 +630,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, mutex_lock(&dbs_mutex); - rc = sysfs_create_group(&policy->kobj, &dbs_attr_group); + rc = sysfs_create_group(&policy->kobj, &dbs_attr_group_old); if (rc) { mutex_unlock(&dbs_mutex); return rc; @@ -565,13 +651,20 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, } this_dbs_info->cpu = cpu; ondemand_powersave_bias_init_cpu(cpu); - mutex_init(&this_dbs_info->timer_mutex); /* * Start the timerschedule work, when this governor * is used for first time */ if (dbs_enable == 1) { unsigned int latency; + + rc = sysfs_create_group(cpufreq_global_kobject, + &dbs_attr_group); + if (rc) { + mutex_unlock(&dbs_mutex); + return rc; + } + /* policy latency is in nS. Convert it to uS first */ latency = policy->cpuinfo.transition_latency / 1000; if (latency == 0) @@ -585,6 +678,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, } mutex_unlock(&dbs_mutex); + mutex_init(&this_dbs_info->timer_mutex); dbs_timer_init(this_dbs_info); break; @@ -592,10 +686,13 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, dbs_timer_exit(this_dbs_info); mutex_lock(&dbs_mutex); - sysfs_remove_group(&policy->kobj, &dbs_attr_group); + sysfs_remove_group(&policy->kobj, &dbs_attr_group_old); mutex_destroy(&this_dbs_info->timer_mutex); dbs_enable--; mutex_unlock(&dbs_mutex); + if (!dbs_enable) + sysfs_remove_group(cpufreq_global_kobject, + &dbs_attr_group); break; @@ -613,16 +710,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, return 0; } -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND -static -#endif -struct cpufreq_governor cpufreq_gov_ondemand = { - .name = "ondemand", - .governor = cpufreq_governor_dbs, - .max_transition_latency = TRANSITION_LATENCY_LIMIT, - .owner = THIS_MODULE, -}; - static int __init cpufreq_gov_dbs_init(void) { int err; -- cgit v1.2.3 From 15fe1c0411f557a90d8e2eafe6889170a932c699 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 24 Jul 2009 15:25:07 +0200 Subject: [CPUFREQ] Only allow machine wide governor switching The old cpu/cpu*/cpufreq/scaling_{governor,driver} will still be usable for some time. A deprecation message is printed once if one of the deprecated sysfs files is used: CPUFREQ: Per core cpufreq sysfs interface is deprecated - show_scaling_available_governors The new interface is placed statically (will never be removed) into the global /sys/devices/system/cpu/cpufreq dir. I found one inconsistency/bug while doing sysfs stress tests: cat cpu/cpu*/cpufreq/scaling_governor could (very rarely) show different governors, while only one should be active. I expect to solve this a global mutex between online/offling (CPU_DOWN_PREPARE_FROZEN notifier event) and store_scaling_governor, around the while loop is needed. Not sure whether this works out, but seeing different governors could happen before and should not be sever. Working this around can get ugly. As soon as the cpu* scaling_governor files vanished this can of course not happen. Another "uglyness" which can ripped out after deprecation time is to detect the store_governor access in the generic store() function and not doing the rwsem_write locking. If this is not done a deadlock will happen between some kind of generic sysfs lock with the cpu online file and the rwsem lock: INFO: task switch_governor:5934 blocked for more than 120 seconds. 2 locks held by switch_governor/5934: Jul 23 17:14:22 linux kernel: [ 481.152132] #0: (&buffer->mutex){+.+.+.}, at: [] sysfs_write_file+0x38/0x119 Jul 23 17:14:22 linux kernel: [ 481.152138] #1: (&per_cpu(cpu_policy_rwsem, cpu)){+++++.}, at: [] lock_policy_rwsem_write+0x48/0x79 4 locks held by offline_cpus.sh/6665: Jul 23 17:14:22 linux kernel: [ 481.152244] #0: (&buffer->mutex){+.+.+.}, at: [] sysfs_write_file+0x38/0x119 Jul 23 17:14:22 linux kernel: [ 481.152249] #1: (cpu_add_remove_lock){+.+.+.}, at: [] cpu_down+0x28/0x82 Jul 23 17:14:22 linux kernel: [ 481.152254] #2: (cpu_hotplug.lock){+.+.+.}, at: [] cpu_hotplug_begin+0x22/0x50 Jul 23 17:14:22 linux kernel: [ 481.152259] #3: (&per_cpu(cpu_policy_rwsem, cpu)){+++++.}, at: [] lock_policy_rwsem_write+0x48/0x79 CC: Venkatesh Pallipadi Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 229 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 176 insertions(+), 53 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 56b338c25716..0d55280ebf16 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -39,10 +39,6 @@ */ static struct cpufreq_driver *cpufreq_driver; static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); -#ifdef CONFIG_HOTPLUG_CPU -/* This one keeps track of the previously set governor of a removed CPU */ -static DEFINE_PER_CPU(struct cpufreq_governor *, cpufreq_cpu_governor); -#endif static DEFINE_SPINLOCK(cpufreq_driver_lock); /* @@ -128,6 +124,10 @@ static int __init init_cpufreq_transition_notifier_list(void) } pure_initcall(init_cpufreq_transition_notifier_list); +static struct cpufreq_governor *cpufreq_current_governor = + CPUFREQ_DEFAULT_GOVERNOR; +static int current_driver_policy; + static LIST_HEAD(cpufreq_governor_list); static DEFINE_MUTEX(cpufreq_governor_mutex); @@ -503,48 +503,99 @@ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, /** * show_scaling_governor - show the current policy for the specified CPU */ -static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) + +static ssize_t show_scaling_governor(struct kobject *kobj, + struct attribute *attr, char *buf) { - if (policy->policy == CPUFREQ_POLICY_POWERSAVE) - return sprintf(buf, "powersave\n"); - else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) - return sprintf(buf, "performance\n"); - else if (policy->governor) + if (current_driver_policy) { + if (current_driver_policy == CPUFREQ_POLICY_POWERSAVE) + return sprintf(buf, "powersave\n"); + else if (current_driver_policy == CPUFREQ_POLICY_PERFORMANCE) + return sprintf(buf, "performance\n"); + } else if (cpufreq_current_governor) return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", - policy->governor->name); + cpufreq_current_governor->name); return -EINVAL; } - /** * store_scaling_governor - store policy for the specified CPU */ -static ssize_t store_scaling_governor(struct cpufreq_policy *policy, - const char *buf, size_t count) +static ssize_t store_scaling_governor(struct kobject *kobj, + struct attribute *attr, const char *buf, + size_t count) { unsigned int ret = -EINVAL; + int i, j; char str_governor[16]; + struct cpufreq_policy *old_policy; struct cpufreq_policy new_policy; - - ret = cpufreq_get_policy(&new_policy, policy->cpu); - if (ret) - return ret; + struct cpufreq_governor *old_gov = cpufreq_current_governor; + int old_driver_policy = current_driver_policy; + struct cpumask already_handled; + int is_handled = 0; + cpus_clear(already_handled); ret = sscanf(buf, "%15s", str_governor); if (ret != 1) return -EINVAL; - if (cpufreq_parse_governor(str_governor, &new_policy.policy, - &new_policy.governor)) + if (cpufreq_parse_governor(str_governor, ¤t_driver_policy, + &cpufreq_current_governor)) return -EINVAL; - /* Do not use cpufreq_set_policy here or the user_policy.max - will be wrongly overridden */ - ret = __cpufreq_set_policy(policy, &new_policy); + if (old_gov == cpufreq_current_governor && + old_driver_policy == current_driver_policy) + return count; - policy->user_policy.policy = policy->policy; - policy->user_policy.governor = policy->governor; + for_each_present_cpu(i) { + ret = cpufreq_get_policy(&new_policy, i); + if (ret) { + ret = 0; + continue; + } + + /* Do not set managed policies/cpus twice */ + for_each_cpu(j, new_policy.cpus) { + if (cpu_isset(j, already_handled)) + is_handled = 1; + } + if (is_handled) { + dprintk("CPU %d already handled\n", i); + is_handled = 0; + continue; + } + ret = unlikely(lock_policy_rwsem_write(i)); + if (ret) + continue; + + /* race can happen with offlining, handle it */ + if (cpu_is_offline(i)) { + unlock_policy_rwsem_write(i); + continue; + } + + old_policy = cpufreq_cpu_get(i); + if (!old_policy) { + unlock_policy_rwsem_write(i); + continue; + } + + new_policy.governor = cpufreq_current_governor; + new_policy.policy = current_driver_policy; + ret = __cpufreq_set_policy(old_policy, &new_policy); + cpufreq_cpu_put(old_policy); + WARN_ON(ret); + if (ret) { + unlock_policy_rwsem_write(i); + continue; + } + old_policy->user_policy.governor = cpufreq_current_governor; + old_policy->user_policy.policy = current_driver_policy; + unlock_policy_rwsem_write(i); + cpu_set(new_policy.cpu, already_handled); + } if (ret) return ret; else @@ -554,7 +605,8 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy, /** * show_scaling_driver - show the cpufreq driver currently loaded */ -static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf) +static ssize_t show_scaling_driver(struct kobject *kobj, + struct attribute *attr, char *buf) { return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name); } @@ -562,7 +614,8 @@ static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf) /** * show_scaling_available_governors - show the available CPUfreq governors */ -static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy, +static ssize_t show_scaling_available_governors(struct kobject *kobj, + struct attribute *attr, char *buf) { ssize_t i = 0; @@ -584,6 +637,61 @@ out: return i; } +/*** delete after deprecation time ***/ + +#define DEPRECATION_MSG(file_name) \ + printk_once(KERN_INFO "CPUFREQ: Per core cpufreq sysfs " \ + "interface is deprecated - " #file_name "\n"); + +static ssize_t show_scaling_governor_old(struct cpufreq_policy *policy, + char *buf) +{ + DEPRECATION_MSG(show_scaling_governor); + return show_scaling_governor(NULL, NULL, buf); +}; + +static ssize_t store_scaling_governor_old(struct cpufreq_policy *policy, + const char *buf, size_t count) +{ + int ret; + + DEPRECATION_MSG(store_scaling_governor); + cpufreq_cpu_put(policy); + ret = store_scaling_governor(NULL, NULL, buf, count); + cpufreq_cpu_get(policy->cpu); + return ret; +} + +static ssize_t show_scaling_driver_old(struct cpufreq_policy *policy, + char *buf) +{ + DEPRECATION_MSG(show_scaling_driver); + return show_scaling_driver(NULL, NULL, buf); +} + +static ssize_t show_scaling_available_governors_old(struct cpufreq_policy *p, + char *buf) +{ + DEPRECATION_MSG(show_scaling_available_governors); + return show_scaling_available_governors(NULL, NULL, buf); +} + +#define define_one_ro_old(object, _name) \ +static struct freq_attr object = \ +__ATTR(_name, 0444, show_##_name##_old, NULL) + +#define define_one_rw_old(object, _name) \ +static struct freq_attr object = \ +__ATTR(_name, 0644, show_##_name##_old, store_##_name##_old) + +define_one_ro_old(scaling_available_governors_old, + scaling_available_governors); +define_one_ro_old(scaling_driver_old, scaling_driver); +define_one_rw_old(scaling_governor_old, scaling_governor); + +/*** delete after deprecation time ***/ + + static ssize_t show_cpus(const struct cpumask *mask, char *buf) { ssize_t i = 0; @@ -635,14 +743,11 @@ define_one_ro0400(cpuinfo_cur_freq); define_one_ro(cpuinfo_min_freq); define_one_ro(cpuinfo_max_freq); define_one_ro(cpuinfo_transition_latency); -define_one_ro(scaling_available_governors); -define_one_ro(scaling_driver); define_one_ro(scaling_cur_freq); define_one_ro(related_cpus); define_one_ro(affected_cpus); define_one_rw(scaling_min_freq); define_one_rw(scaling_max_freq); -define_one_rw(scaling_governor); static struct attribute *default_attrs[] = { &cpuinfo_min_freq.attr, @@ -652,12 +757,24 @@ static struct attribute *default_attrs[] = { &scaling_max_freq.attr, &affected_cpus.attr, &related_cpus.attr, - &scaling_governor.attr, - &scaling_driver.attr, - &scaling_available_governors.attr, + &scaling_governor_old.attr, + &scaling_driver_old.attr, + &scaling_available_governors_old.attr, NULL }; +#define define_one_global_ro(_name) \ +static struct global_attr _name = \ +__ATTR(_name, 0444, show_##_name, NULL) + +#define define_one_global_rw(_name) \ +static struct global_attr _name = \ +__ATTR(_name, 0644, show_##_name, store_##_name) + +define_one_global_ro(scaling_available_governors); +define_one_global_ro(scaling_driver); +define_one_global_rw(scaling_governor); + struct kobject *cpufreq_global_kobject; EXPORT_SYMBOL(cpufreq_global_kobject); @@ -694,19 +811,33 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, struct cpufreq_policy *policy = to_policy(kobj); struct freq_attr *fattr = to_attr(attr); ssize_t ret = -EINVAL; + /* + * TBD: remove scal_gov and rest after per cpu governor + * deprecation time + */ + int scal_gov = 0; policy = cpufreq_cpu_get(policy->cpu); if (!policy) goto no_policy; - if (lock_policy_rwsem_write(policy->cpu) < 0) - goto fail; + if (!strncmp(attr->name, "scaling_governor", 16)) { + printk(KERN_INFO "Scaling gov found\n"); + scal_gov = 1; + } + + if (!scal_gov) { + if (lock_policy_rwsem_write(policy->cpu) < 0) + goto fail; + } if (fattr->store) ret = fattr->store(policy, buf, count); else ret = -EIO; - unlock_policy_rwsem_write(policy->cpu); + if (!scal_gov) + unlock_policy_rwsem_write(policy->cpu); + scal_gov = 0; fail: cpufreq_cpu_put(policy); no_policy: @@ -745,14 +876,6 @@ int cpufreq_add_dev_policy(unsigned int cpu, struct cpufreq_policy *policy, unsigned long flags; unsigned int j; -#ifdef CONFIG_HOTPLUG_CPU - if (per_cpu(cpufreq_cpu_governor, cpu)) { - policy->governor = per_cpu(cpufreq_cpu_governor, cpu); - dprintk("Restoring governor %s for cpu %d\n", - policy->governor->name, cpu); - } -#endif - for_each_cpu(j, policy->cpus) { struct cpufreq_policy *managed_policy; @@ -969,7 +1092,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) INIT_WORK(&policy->update, handle_update); /* Set governor before ->init, so that driver could check it */ - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->governor = cpufreq_current_governor; /* call driver. From then on the cpufreq must be able * to accept all calls to ->verify and ->setpolicy for this CPU */ @@ -1080,10 +1203,6 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) #ifdef CONFIG_SMP -#ifdef CONFIG_HOTPLUG_CPU - per_cpu(cpufreq_cpu_governor, cpu) = data->governor; -#endif - /* if we have other CPUs still registered, we need to unlink them, * or else wait_for_completion below will lock up. Clean the * per_cpu(cpufreq_cpu_data) while holding the lock, and remove @@ -1104,9 +1223,6 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) if (j == cpu) continue; dprintk("removing link for cpu %u\n", j); -#ifdef CONFIG_HOTPLUG_CPU - per_cpu(cpufreq_cpu_governor, j) = data->governor; -#endif cpu_sys_dev = get_cpu_sysdev(j); sysfs_remove_link(&cpu_sys_dev->kobj, "cpufreq"); cpufreq_cpu_put(data); @@ -1992,7 +2108,14 @@ static int __init cpufreq_core_init(void) cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_sysdev_class.kset.kobj); - BUG_ON(!cpufreq_global_kobject); + WARN_ON(!cpufreq_global_kobject); + + WARN_ON(sysfs_create_file(cpufreq_global_kobject, + &scaling_governor.attr)); + WARN_ON(sysfs_create_file(cpufreq_global_kobject, + &scaling_driver.attr)); + WARN_ON(sysfs_create_file(cpufreq_global_kobject, + &scaling_available_governors.attr)); return 0; } -- cgit v1.2.3 From 5141022ced0ae12f2c8828785c92d29909d7bd17 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 8 Jun 2009 13:17:31 -0400 Subject: [CPUFREQ] remove rwsem lock from CPUFREQ_GOV_STOP call (second call site) remove rwsem lock from CPUFREQ_GOV_STOP call (second call site) commit 42a06f2166f2f6f7bf04f32b4e823eacdceafdc9 Missed a call site for CPUFREQ_GOV_STOP to remove the rwlock taken around the teardown. To make a long story short, the rwlock write-lock causes a circular dependency with cancel_delayed_work_sync(), because the timer handler takes the read lock. Note that all callers to __cpufreq_set_policy are taking the rwsem. All sysfs callers (writers) hold the write rwsem at the earliest sysfs calling stage. However, the rwlock write-lock is not needed upon governor stop. Signed-off-by: Mathieu Desnoyers Acked-by: Venkatesh Pallipadi CC: rjw@sisk.pl CC: mingo@elte.hu CC: Shaohua Li CC: Pekka Enberg CC: Dave Young CC: "Rafael J. Wysocki" CC: Rusty Russell CC: trenn@suse.de CC: sven.wegener@stealer.net CC: cpufreq@vger.kernel.org Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0d55280ebf16..19230221b030 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -57,6 +57,8 @@ static DEFINE_SPINLOCK(cpufreq_driver_lock); * are concerned with are online after they get the lock. * - Governor routines that can be called in cpufreq hotplug path should not * take this sem as top level hotplug notifier handler takes this. + * - Lock should not be held across + * __cpufreq_governor(data, CPUFREQ_GOV_STOP); */ static DEFINE_PER_CPU(int, policy_cpu); static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem); @@ -1874,8 +1876,17 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data, dprintk("governor switch\n"); /* end old governor */ - if (data->governor) + if (data->governor) { + /* + * Need to release the rwsem around governor + * stop due to lock dependency between + * cancel_delayed_work_sync and the read lock + * taken in the delayed work handler. + */ + unlock_policy_rwsem_write(data->cpu); __cpufreq_governor(data, CPUFREQ_GOV_STOP); + lock_policy_rwsem_write(data->cpu); + } /* start new governor */ data->governor = policy->governor; -- cgit v1.2.3