From a3c98b8b2ede1f4230f49f9af7135cd902e71e83 Mon Sep 17 00:00:00 2001 From: MyungJoo Ham Date: Sun, 2 Oct 2011 00:19:15 +0200 Subject: PM: Introduce devfreq: generic DVFS framework with device-specific OPPs With OPPs, a device may have multiple operable frequency and voltage sets. However, there can be multiple possible operable sets and a system will need to choose one from them. In order to reduce the power consumption (by reducing frequency and voltage) without affecting the performance too much, a Dynamic Voltage and Frequency Scaling (DVFS) scheme may be used. This patch introduces the DVFS capability to non-CPU devices with OPPs. DVFS is a techique whereby the frequency and supplied voltage of a device is adjusted on-the-fly. DVFS usually sets the frequency as low as possible with given conditions (such as QoS assurance) and adjusts voltage according to the chosen frequency in order to reduce power consumption and heat dissipation. The generic DVFS for devices, devfreq, may appear quite similar with /drivers/cpufreq. However, cpufreq does not allow to have multiple devices registered and is not suitable to have multiple heterogenous devices with different (but simple) governors. Normally, DVFS mechanism controls frequency based on the demand for the device, and then, chooses voltage based on the chosen frequency. devfreq also controls the frequency based on the governor's frequency recommendation and let OPP pick up the pair of frequency and voltage based on the recommended frequency. Then, the chosen OPP is passed to device driver's "target" callback. When PM QoS is going to be used with the devfreq device, the device driver should enable OPPs that are appropriate with the current PM QoS requests. In order to do so, the device driver may call opp_enable and opp_disable at the notifier callback of PM QoS so that PM QoS's update_target() call enables the appropriate OPPs. Note that at least one of OPPs should be enabled at any time; be careful when there is a transition. Signed-off-by: MyungJoo Ham Signed-off-by: Kyungmin Park Reviewed-by: Mike Turquette Acked-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- include/linux/devfreq.h | 203 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 203 insertions(+) create mode 100644 include/linux/devfreq.h (limited to 'include/linux/devfreq.h') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h new file mode 100644 index 000000000000..b3be3d3cbaa7 --- /dev/null +++ b/include/linux/devfreq.h @@ -0,0 +1,203 @@ +/* + * devfreq: Generic Dynamic Voltage and Frequency Scaling (DVFS) Framework + * for Non-CPU Devices. + * + * Copyright (C) 2011 Samsung Electronics + * MyungJoo Ham + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_DEVFREQ_H__ +#define __LINUX_DEVFREQ_H__ + +#include +#include +#include + +#define DEVFREQ_NAME_LEN 16 + +struct devfreq; + +/** + * struct devfreq_dev_status - Data given from devfreq user device to + * governors. Represents the performance + * statistics. + * @total_time The total time represented by this instance of + * devfreq_dev_status + * @busy_time The time that the device was working among the + * total_time. + * @current_frequency The operating frequency. + * @private_data An entry not specified by the devfreq framework. + * A device and a specific governor may have their + * own protocol with private_data. However, because + * this is governor-specific, a governor using this + * will be only compatible with devices aware of it. + */ +struct devfreq_dev_status { + /* both since the last measure */ + unsigned long total_time; + unsigned long busy_time; + unsigned long current_frequency; + void *private_date; +}; + +/** + * struct devfreq_dev_profile - Devfreq's user device profile + * @initial_freq The operating frequency when devfreq_add_device() is + * called. + * @polling_ms The polling interval in ms. 0 disables polling. + * @target The device should set its operating frequency at + * freq or lowest-upper-than-freq value. If freq is + * higher than any operable frequency, set maximum. + * Before returning, target function should set + * freq at the current frequency. + * @get_dev_status The device should provide the current performance + * status to devfreq, which is used by governors. + * @exit An optional callback that is called when devfreq + * is removing the devfreq object due to error or + * from devfreq_remove_device() call. If the user + * has registered devfreq->nb at a notifier-head, + * this is the time to unregister it. + */ +struct devfreq_dev_profile { + unsigned long initial_freq; + unsigned int polling_ms; + + int (*target)(struct device *dev, unsigned long *freq); + int (*get_dev_status)(struct device *dev, + struct devfreq_dev_status *stat); + void (*exit)(struct device *dev); +}; + +/** + * struct devfreq_governor - Devfreq policy governor + * @name Governor's name + * @get_target_freq Returns desired operating frequency for the device. + * Basically, get_target_freq will run + * devfreq_dev_profile.get_dev_status() to get the + * status of the device (load = busy_time / total_time). + * If no_central_polling is set, this callback is called + * only with update_devfreq() notified by OPP. + * @init Called when the devfreq is being attached to a device + * @exit Called when the devfreq is being removed from a + * device. Governor should stop any internal routines + * before return because related data may be + * freed after exit(). + * @no_central_polling Do not use devfreq's central polling mechanism. + * When this is set, devfreq will not call + * get_target_freq with devfreq_monitor(). However, + * devfreq will call get_target_freq with + * devfreq_update() notified by OPP framework. + * + * Note that the callbacks are called with devfreq->lock locked by devfreq. + */ +struct devfreq_governor { + const char name[DEVFREQ_NAME_LEN]; + int (*get_target_freq)(struct devfreq *this, unsigned long *freq); + int (*init)(struct devfreq *this); + void (*exit)(struct devfreq *this); + const bool no_central_polling; +}; + +/** + * struct devfreq - Device devfreq structure + * @node list node - contains the devices with devfreq that have been + * registered. + * @lock a mutex to protect accessing devfreq. + * @dev device registered by devfreq class. dev.parent is the device + * using devfreq. + * @profile device-specific devfreq profile + * @governor method how to choose frequency based on the usage. + * @nb notifier block used to notify devfreq object that it should + * reevaluate operable frequencies. Devfreq users may use + * devfreq.nb to the corresponding register notifier call chain. + * @polling_jiffies interval in jiffies. + * @previous_freq previously configured frequency value. + * @next_polling the number of remaining jiffies to poll with + * "devfreq_monitor" executions to reevaluate + * frequency/voltage of the device. Set by + * profile's polling_ms interval. + * @data Private data of the governor. The devfreq framework does not + * touch this. + * @being_removed a flag to mark that this object is being removed in + * order to prevent trying to remove the object multiple times. + * + * This structure stores the devfreq information for a give device. + * + * Note that when a governor accesses entries in struct devfreq in its + * functions except for the context of callbacks defined in struct + * devfreq_governor, the governor should protect its access with the + * struct mutex lock in struct devfreq. A governor may use this mutex + * to protect its own private data in void *data as well. + */ +struct devfreq { + struct list_head node; + + struct mutex lock; + struct device dev; + struct devfreq_dev_profile *profile; + const struct devfreq_governor *governor; + struct notifier_block nb; + + unsigned long polling_jiffies; + unsigned long previous_freq; + unsigned int next_polling; + + void *data; /* private data for governors */ + + bool being_removed; +}; + +#if defined(CONFIG_PM_DEVFREQ) +extern struct devfreq *devfreq_add_device(struct device *dev, + struct devfreq_dev_profile *profile, + const struct devfreq_governor *governor, + void *data); +extern int devfreq_remove_device(struct devfreq *devfreq); + +/* Helper functions for devfreq user device driver with OPP. */ +extern struct opp *devfreq_recommended_opp(struct device *dev, + unsigned long *freq); +extern int devfreq_register_opp_notifier(struct device *dev, + struct devfreq *devfreq); +extern int devfreq_unregister_opp_notifier(struct device *dev, + struct devfreq *devfreq); + +#else /* !CONFIG_PM_DEVFREQ */ +static struct devfreq *devfreq_add_device(struct device *dev, + struct devfreq_dev_profile *profile, + struct devfreq_governor *governor, + void *data); +{ + return NULL; +} + +static int devfreq_remove_device(struct devfreq *devfreq); +{ + return 0; +} + +static struct opp *devfreq_recommended_opp(struct device *dev, + unsigned long *freq) +{ + return -EINVAL; +} + +static int devfreq_register_opp_notifier(struct device *dev, + struct devfreq *devfreq) +{ + return -EINVAL; +} + +static int devfreq_unregister_opp_notifier(struct device *dev, + struct devfreq *devfreq) +{ + return -EINVAL; +} + +#endif /* CONFIG_PM_DEVFREQ */ + +#endif /* __LINUX_DEVFREQ_H__ */ -- cgit v1.2.3 From ce26c5bb9569d8b826f01b8620fc16d8da6821e9 Mon Sep 17 00:00:00 2001 From: MyungJoo Ham Date: Sun, 2 Oct 2011 00:19:34 +0200 Subject: PM / devfreq: Add basic governors Four cpufreq-like governors are provided as examples. powersave: use the lowest frequency possible. The user (device) should set the polling_ms as 0 because polling is useless for this governor. performance: use the highest freqeuncy possible. The user (device) should set the polling_ms as 0 because polling is useless for this governor. userspace: use the user specified frequency stored at devfreq.user_set_freq. With sysfs support in the following patch, a user may set the value with the sysfs interface. simple_ondemand: simplified version of cpufreq's ondemand governor. When a user updates OPP entries (enable/disable/add), OPP framework automatically notifies devfreq to update operating frequency accordingly. Thus, devfreq users (device drivers) do not need to update devfreq manually with OPP entry updates or set polling_ms for powersave , performance, userspace, or any other "static" governors. Note that these are given only as basic examples for governors and any devices with devfreq may implement their own governors with the drivers and use them. Signed-off-by: MyungJoo Ham Signed-off-by: Kyungmin Park Reviewed-by: Mike Turquette Acked-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- Documentation/ABI/testing/sysfs-class-devfreq | 8 ++ drivers/devfreq/Kconfig | 36 ++++++++ drivers/devfreq/Makefile | 4 + drivers/devfreq/governor_performance.c | 29 +++++++ drivers/devfreq/governor_powersave.c | 29 +++++++ drivers/devfreq/governor_simpleondemand.c | 88 +++++++++++++++++++ drivers/devfreq/governor_userspace.c | 116 ++++++++++++++++++++++++++ include/linux/devfreq.h | 35 ++++++++ 8 files changed, 345 insertions(+) create mode 100644 drivers/devfreq/governor_performance.c create mode 100644 drivers/devfreq/governor_powersave.c create mode 100644 drivers/devfreq/governor_simpleondemand.c create mode 100644 drivers/devfreq/governor_userspace.c (limited to 'include/linux/devfreq.h') diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq index 0ec855f479ce..23d78b5aab11 100644 --- a/Documentation/ABI/testing/sysfs-class-devfreq +++ b/Documentation/ABI/testing/sysfs-class-devfreq @@ -42,3 +42,11 @@ Description: devfreq-provided central polling (/sys/class/devfreq/.../central_polling is 0), this value may be useless. + +What: /sys/class/devfreq/.../userspace/set_freq +Date: September 2011 +Contact: MyungJoo Ham +Description: + The /sys/class/devfreq/.../userspace/set_freq shows and + sets the requested frequency for the devfreq object if + userspace governor is in effect. diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index 1fb42de4f420..643b055ed3cd 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -34,6 +34,42 @@ menuconfig PM_DEVFREQ if PM_DEVFREQ +comment "DEVFREQ Governors" + +config DEVFREQ_GOV_SIMPLE_ONDEMAND + bool "Simple Ondemand" + help + Chooses frequency based on the recent load on the device. Works + similar as ONDEMAND governor of CPUFREQ does. A device with + Simple-Ondemand should be able to provide busy/total counter + values that imply the usage rate. A device may provide tuned + values to the governor with data field at devfreq_add_device(). + +config DEVFREQ_GOV_PERFORMANCE + bool "Performance" + help + Sets the frequency at the maximum available frequency. + This governor always returns UINT_MAX as frequency so that + the DEVFREQ framework returns the highest frequency available + at any time. + +config DEVFREQ_GOV_POWERSAVE + bool "Powersave" + help + Sets the frequency at the minimum available frequency. + This governor always returns 0 as frequency so that + the DEVFREQ framework returns the lowest frequency available + at any time. + +config DEVFREQ_GOV_USERSPACE + bool "Userspace" + help + Sets the frequency at the user specified one. + This governor returns the user configured frequency if there + has been an input to /sys/devices/.../power/devfreq_set_freq. + Otherwise, the governor does not change the frequnecy + given at the initialization. + comment "DEVFREQ Drivers" endif # PM_DEVFREQ diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile index 168934a12b38..4564a89e970a 100644 --- a/drivers/devfreq/Makefile +++ b/drivers/devfreq/Makefile @@ -1 +1,5 @@ obj-$(CONFIG_PM_DEVFREQ) += devfreq.o +obj-$(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND) += governor_simpleondemand.o +obj-$(CONFIG_DEVFREQ_GOV_PERFORMANCE) += governor_performance.o +obj-$(CONFIG_DEVFREQ_GOV_POWERSAVE) += governor_powersave.o +obj-$(CONFIG_DEVFREQ_GOV_USERSPACE) += governor_userspace.o diff --git a/drivers/devfreq/governor_performance.c b/drivers/devfreq/governor_performance.c new file mode 100644 index 000000000000..c0596b291761 --- /dev/null +++ b/drivers/devfreq/governor_performance.c @@ -0,0 +1,29 @@ +/* + * linux/drivers/devfreq/governor_performance.c + * + * Copyright (C) 2011 Samsung Electronics + * MyungJoo Ham + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + +static int devfreq_performance_func(struct devfreq *df, + unsigned long *freq) +{ + /* + * target callback should be able to get floor value as + * said in devfreq.h + */ + *freq = UINT_MAX; + return 0; +} + +const struct devfreq_governor devfreq_performance = { + .name = "performance", + .get_target_freq = devfreq_performance_func, + .no_central_polling = true, +}; diff --git a/drivers/devfreq/governor_powersave.c b/drivers/devfreq/governor_powersave.c new file mode 100644 index 000000000000..2483a85a266f --- /dev/null +++ b/drivers/devfreq/governor_powersave.c @@ -0,0 +1,29 @@ +/* + * linux/drivers/devfreq/governor_powersave.c + * + * Copyright (C) 2011 Samsung Electronics + * MyungJoo Ham + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + +static int devfreq_powersave_func(struct devfreq *df, + unsigned long *freq) +{ + /* + * target callback should be able to get ceiling value as + * said in devfreq.h + */ + *freq = 0; + return 0; +} + +const struct devfreq_governor devfreq_powersave = { + .name = "powersave", + .get_target_freq = devfreq_powersave_func, + .no_central_polling = true, +}; diff --git a/drivers/devfreq/governor_simpleondemand.c b/drivers/devfreq/governor_simpleondemand.c new file mode 100644 index 000000000000..efad8dcf9028 --- /dev/null +++ b/drivers/devfreq/governor_simpleondemand.c @@ -0,0 +1,88 @@ +/* + * linux/drivers/devfreq/governor_simpleondemand.c + * + * Copyright (C) 2011 Samsung Electronics + * MyungJoo Ham + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +/* Default constants for DevFreq-Simple-Ondemand (DFSO) */ +#define DFSO_UPTHRESHOLD (90) +#define DFSO_DOWNDIFFERENCTIAL (5) +static int devfreq_simple_ondemand_func(struct devfreq *df, + unsigned long *freq) +{ + struct devfreq_dev_status stat; + int err = df->profile->get_dev_status(df->dev.parent, &stat); + unsigned long long a, b; + unsigned int dfso_upthreshold = DFSO_UPTHRESHOLD; + unsigned int dfso_downdifferential = DFSO_DOWNDIFFERENCTIAL; + struct devfreq_simple_ondemand_data *data = df->data; + + if (err) + return err; + + if (data) { + if (data->upthreshold) + dfso_upthreshold = data->upthreshold; + if (data->downdifferential) + dfso_downdifferential = data->downdifferential; + } + if (dfso_upthreshold > 100 || + dfso_upthreshold < dfso_downdifferential) + return -EINVAL; + + /* Assume MAX if it is going to be divided by zero */ + if (stat.total_time == 0) { + *freq = UINT_MAX; + return 0; + } + + /* Prevent overflow */ + if (stat.busy_time >= (1 << 24) || stat.total_time >= (1 << 24)) { + stat.busy_time >>= 7; + stat.total_time >>= 7; + } + + /* Set MAX if it's busy enough */ + if (stat.busy_time * 100 > + stat.total_time * dfso_upthreshold) { + *freq = UINT_MAX; + return 0; + } + + /* Set MAX if we do not know the initial frequency */ + if (stat.current_frequency == 0) { + *freq = UINT_MAX; + return 0; + } + + /* Keep the current frequency */ + if (stat.busy_time * 100 > + stat.total_time * (dfso_upthreshold - dfso_downdifferential)) { + *freq = stat.current_frequency; + return 0; + } + + /* Set the desired frequency based on the load */ + a = stat.busy_time; + a *= stat.current_frequency; + b = div_u64(a, stat.total_time); + b *= 100; + b = div_u64(b, (dfso_upthreshold - dfso_downdifferential / 2)); + *freq = (unsigned long) b; + + return 0; +} + +const struct devfreq_governor devfreq_simple_ondemand = { + .name = "simple_ondemand", + .get_target_freq = devfreq_simple_ondemand_func, +}; diff --git a/drivers/devfreq/governor_userspace.c b/drivers/devfreq/governor_userspace.c new file mode 100644 index 000000000000..4f8b563da782 --- /dev/null +++ b/drivers/devfreq/governor_userspace.c @@ -0,0 +1,116 @@ +/* + * linux/drivers/devfreq/governor_simpleondemand.c + * + * Copyright (C) 2011 Samsung Electronics + * MyungJoo Ham + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include "governor.h" + +struct userspace_data { + unsigned long user_frequency; + bool valid; +}; + +static int devfreq_userspace_func(struct devfreq *df, unsigned long *freq) +{ + struct userspace_data *data = df->data; + + if (!data->valid) + *freq = df->previous_freq; /* No user freq specified yet */ + else + *freq = data->user_frequency; + return 0; +} + +static ssize_t store_freq(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct devfreq *devfreq = to_devfreq(dev); + struct userspace_data *data; + unsigned long wanted; + int err = 0; + + + mutex_lock(&devfreq->lock); + data = devfreq->data; + + sscanf(buf, "%lu", &wanted); + data->user_frequency = wanted; + data->valid = true; + err = update_devfreq(devfreq); + if (err == 0) + err = count; + mutex_unlock(&devfreq->lock); + return err; +} + +static ssize_t show_freq(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct devfreq *devfreq = to_devfreq(dev); + struct userspace_data *data; + int err = 0; + + mutex_lock(&devfreq->lock); + data = devfreq->data; + + if (data->valid) + err = sprintf(buf, "%lu\n", data->user_frequency); + else + err = sprintf(buf, "undefined\n"); + mutex_unlock(&devfreq->lock); + return err; +} + +static DEVICE_ATTR(set_freq, 0644, show_freq, store_freq); +static struct attribute *dev_entries[] = { + &dev_attr_set_freq.attr, + NULL, +}; +static struct attribute_group dev_attr_group = { + .name = "userspace", + .attrs = dev_entries, +}; + +static int userspace_init(struct devfreq *devfreq) +{ + int err = 0; + struct userspace_data *data = kzalloc(sizeof(struct userspace_data), + GFP_KERNEL); + + if (!data) { + err = -ENOMEM; + goto out; + } + data->valid = false; + devfreq->data = data; + + err = sysfs_create_group(&devfreq->dev.kobj, &dev_attr_group); +out: + return err; +} + +static void userspace_exit(struct devfreq *devfreq) +{ + sysfs_remove_group(&devfreq->dev.kobj, &dev_attr_group); + kfree(devfreq->data); + devfreq->data = NULL; +} + +const struct devfreq_governor devfreq_userspace = { + .name = "userspace", + .get_target_freq = devfreq_userspace_func, + .init = userspace_init, + .exit = userspace_exit, + .no_central_polling = true, +}; diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index b3be3d3cbaa7..afb94583960c 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -166,6 +166,36 @@ extern int devfreq_register_opp_notifier(struct device *dev, extern int devfreq_unregister_opp_notifier(struct device *dev, struct devfreq *devfreq); +#ifdef CONFIG_DEVFREQ_GOV_POWERSAVE +extern const struct devfreq_governor devfreq_powersave; +#endif +#ifdef CONFIG_DEVFREQ_GOV_PERFORMANCE +extern const struct devfreq_governor devfreq_performance; +#endif +#ifdef CONFIG_DEVFREQ_GOV_USERSPACE +extern const struct devfreq_governor devfreq_userspace; +#endif +#ifdef CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND +extern const struct devfreq_governor devfreq_simple_ondemand; +/** + * struct devfreq_simple_ondemand_data - void *data fed to struct devfreq + * and devfreq_add_device + * @ upthreshold If the load is over this value, the frequency jumps. + * Specify 0 to use the default. Valid value = 0 to 100. + * @ downdifferential If the load is under upthreshold - downdifferential, + * the governor may consider slowing the frequency down. + * Specify 0 to use the default. Valid value = 0 to 100. + * downdifferential < upthreshold must hold. + * + * If the fed devfreq_simple_ondemand_data pointer is NULL to the governor, + * the governor uses the default values. + */ +struct devfreq_simple_ondemand_data { + unsigned int upthreshold; + unsigned int downdifferential; +}; +#endif + #else /* !CONFIG_PM_DEVFREQ */ static struct devfreq *devfreq_add_device(struct device *dev, struct devfreq_dev_profile *profile, @@ -198,6 +228,11 @@ static int devfreq_unregister_opp_notifier(struct device *dev, return -EINVAL; } +#define devfreq_powersave NULL +#define devfreq_performance NULL +#define devfreq_userspace NULL +#define devfreq_simple_ondemand NULL + #endif /* CONFIG_PM_DEVFREQ */ #endif /* __LINUX_DEVFREQ_H__ */ -- cgit v1.2.3 From 1a51cfdc4516a6e1f2c1f8a579630a027c30331a Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 7 Nov 2011 23:54:53 +0100 Subject: PM / devfreq: fix private_data The "private_date" field in struct devfreq_dev_status almost certainly wants to be "private_data"; since there are no in-tree users of this functionality, now seems like an easy time to make the fix. Signed-off-by: Jonathan Corbet Signed-off-by: Rafael J. Wysocki --- include/linux/devfreq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/devfreq.h') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index afb94583960c..98ce8124b1cc 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -41,7 +41,7 @@ struct devfreq_dev_status { unsigned long total_time; unsigned long busy_time; unsigned long current_frequency; - void *private_date; + void *private_data; }; /** -- cgit v1.2.3