summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/controllers/cpuacct.txt32
-rw-r--r--Documentation/scheduler/sched-arch.txt4
-rw-r--r--arch/ia64/Kconfig2
-rw-r--r--arch/m32r/Kconfig2
-rw-r--r--arch/mips/Kconfig2
-rw-r--r--arch/powerpc/Kconfig2
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--include/asm-m32r/system.h2
-rw-r--r--include/linux/rcuclassic.h2
-rw-r--r--include/linux/sched.h2
-rw-r--r--kernel/Makefile3
-rw-r--r--kernel/rcuclassic.c4
-rw-r--r--kernel/sched.c257
-rw-r--r--kernel/sched_debug.c51
-rw-r--r--kernel/sched_rt.c7
15 files changed, 190 insertions, 186 deletions
diff --git a/Documentation/controllers/cpuacct.txt b/Documentation/controllers/cpuacct.txt
new file mode 100644
index 000000000000..bb775fbe43d7
--- /dev/null
+++ b/Documentation/controllers/cpuacct.txt
@@ -0,0 +1,32 @@
+CPU Accounting Controller
+-------------------------
+
+The CPU accounting controller is used to group tasks using cgroups and
+account the CPU usage of these groups of tasks.
+
+The CPU accounting controller supports multi-hierarchy groups. An accounting
+group accumulates the CPU usage of all of its child groups and the tasks
+directly present in its group.
+
+Accounting groups can be created by first mounting the cgroup filesystem.
+
+# mkdir /cgroups
+# mount -t cgroup -ocpuacct none /cgroups
+
+With the above step, the initial or the parent accounting group
+becomes visible at /cgroups. At bootup, this group includes all the
+tasks in the system. /cgroups/tasks lists the tasks in this cgroup.
+/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by
+this group which is essentially the CPU time obtained by all the tasks
+in the system.
+
+New accounting groups can be created under the parent group /cgroups.
+
+# cd /cgroups
+# mkdir g1
+# echo $$ > g1
+
+The above steps create a new group g1 and move the current shell
+process (bash) into it. CPU time consumed by this bash and its children
+can be obtained from g1/cpuacct.usage and the same is accumulated in
+/cgroups/cpuacct.usage also.
diff --git a/Documentation/scheduler/sched-arch.txt b/Documentation/scheduler/sched-arch.txt
index 941615a9769b..d43dbcbd163b 100644
--- a/Documentation/scheduler/sched-arch.txt
+++ b/Documentation/scheduler/sched-arch.txt
@@ -8,7 +8,7 @@ Context switch
By default, the switch_to arch function is called with the runqueue
locked. This is usually not a problem unless switch_to may need to
take the runqueue lock. This is usually due to a wake up operation in
-the context switch. See include/asm-ia64/system.h for an example.
+the context switch. See arch/ia64/include/asm/system.h for an example.
To request the scheduler call switch_to with the runqueue unlocked,
you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file
@@ -23,7 +23,7 @@ disabled. Interrupts may be enabled over the call if it is likely to
introduce a significant interrupt latency by adding the line
`#define __ARCH_WANT_INTERRUPTS_ON_CTXSW` in the same place as for
unlocked context switches. This define also implies
-`__ARCH_WANT_UNLOCKED_CTXSW`. See include/asm-arm/system.h for an
+`__ARCH_WANT_UNLOCKED_CTXSW`. See arch/arm/include/asm/system.h for an
example.
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 6bd91ed7cd03..7fa8f615ba6e 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -99,7 +99,7 @@ config GENERIC_IOMAP
bool
default y
-config SCHED_NO_NO_OMIT_FRAME_POINTER
+config SCHED_OMIT_FRAME_POINTER
bool
default y
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index dbaed4a63815..29047d5c259a 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -273,7 +273,7 @@ config GENERIC_CALIBRATE_DELAY
bool
default y
-config SCHED_NO_NO_OMIT_FRAME_POINTER
+config SCHED_OMIT_FRAME_POINTER
bool
default y
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index f4af967a6b30..a5255e7c79e0 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -653,7 +653,7 @@ config GENERIC_CMOS_UPDATE
bool
default y
-config SCHED_NO_NO_OMIT_FRAME_POINTER
+config SCHED_OMIT_FRAME_POINTER
bool
default y
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 525c13a4de93..adb23ea1c1ef 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -141,7 +141,7 @@ config GENERIC_NVRAM
bool
default y if PPC32
-config SCHED_NO_NO_OMIT_FRAME_POINTER
+config SCHED_OMIT_FRAME_POINTER
bool
default y
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7fc2107af9b9..007b2bc5ca47 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -362,10 +362,10 @@ config X86_RDC321X
as R-8610-(G).
If you don't have one of these chips, you should say N here.
-config SCHED_NO_NO_OMIT_FRAME_POINTER
+config SCHED_OMIT_FRAME_POINTER
def_bool y
prompt "Single-depth WCHAN output"
- depends on X86_32
+ depends on X86
help
Calculate simpler /proc/<PID>/wchan values. If this option
is disabled then wchan values will recurse back to the
diff --git a/include/asm-m32r/system.h b/include/asm-m32r/system.h
index 70a57c8c002b..c980f5ba8de7 100644
--- a/include/asm-m32r/system.h
+++ b/include/asm-m32r/system.h
@@ -23,7 +23,7 @@
*/
#if defined(CONFIG_FRAME_POINTER) || \
- !defined(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER)
+ !defined(CONFIG_SCHED_OMIT_FRAME_POINTER)
#define M32R_PUSH_FP " push fp\n"
#define M32R_POP_FP " pop fp\n"
#else
diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index 5f89b62e6983..301dda829e37 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -41,7 +41,7 @@
#include <linux/seqlock.h>
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-#define RCU_SECONDS_TILL_STALL_CHECK ( 3 * HZ) /* for rcp->jiffies_stall */
+#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rcp->jiffies_stall */
#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rcp->jiffies_stall */
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index bee1e93c95ad..4ce5c603c51a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -259,8 +259,6 @@ static inline int select_nohz_load_balancer(int cpu)
}
#endif
-extern unsigned long rt_needs_cpu(int cpu);
-
/*
* Only dump TASK_* tasks. (0 for all tasks)
*/
diff --git a/kernel/Makefile b/kernel/Makefile
index 03a45e7e87b7..010ccb311166 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -19,7 +19,6 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
CFLAGS_REMOVE_rtmutex-debug.o = -pg
CFLAGS_REMOVE_cgroup-debug.o = -pg
CFLAGS_REMOVE_sched_clock.o = -pg
-CFLAGS_REMOVE_sched.o = -pg
endif
ifdef CONFIG_FUNCTION_RET_TRACER
CFLAGS_REMOVE_extable.o = -pg # For __kernel_text_address()
@@ -94,7 +93,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace/
obj-$(CONFIG_TRACING) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
-ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
+ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
# needed for x86 only. Why this used to be enabled for all architectures is beyond
# me. I suspect most platforms don't need this, but until we know that for sure
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 37f72e551542..e503a002f330 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -191,7 +191,7 @@ static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
/* OK, time to rat on our buddy... */
- printk(KERN_ERR "RCU detected CPU stalls:");
+ printk(KERN_ERR "INFO: RCU detected CPU stalls:");
for_each_possible_cpu(cpu) {
if (cpu_isset(cpu, rcp->cpumask))
printk(" %d", cpu);
@@ -204,7 +204,7 @@ static void print_cpu_stall(struct rcu_ctrlblk *rcp)
{
unsigned long flags;
- printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
+ printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
smp_processor_id(), jiffies,
jiffies - rcp->gp_start);
dump_stack();
diff --git a/kernel/sched.c b/kernel/sched.c
index b50324f3579f..2075054cafdf 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -709,45 +709,18 @@ static __read_mostly char *sched_feat_names[] = {
#undef SCHED_FEAT
-static int sched_feat_open(struct inode *inode, struct file *filp)
-{
- filp->private_data = inode->i_private;
- return 0;
-}
-
-static ssize_t
-sched_feat_read(struct file *filp, char __user *ubuf,
- size_t cnt, loff_t *ppos)
+static int sched_feat_show(struct seq_file *m, void *v)
{
- char *buf;
- int r = 0;
- int len = 0;
int i;
for (i = 0; sched_feat_names[i]; i++) {
- len += strlen(sched_feat_names[i]);
- len += 4;
+ if (!(sysctl_sched_features & (1UL << i)))
+ seq_puts(m, "NO_");
+ seq_printf(m, "%s ", sched_feat_names[i]);
}
+ seq_puts(m, "\n");
- buf = kmalloc(len + 2, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- for (i = 0; sched_feat_names[i]; i++) {
- if (sysctl_sched_features & (1UL << i))
- r += sprintf(buf + r, "%s ", sched_feat_names[i]);
- else
- r += sprintf(buf + r, "NO_%s ", sched_feat_names[i]);
- }
-
- r += sprintf(buf + r, "\n");
- WARN_ON(r >= len + 2);
-
- r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
-
- kfree(buf);
-
- return r;
+ return 0;
}
static ssize_t
@@ -792,10 +765,17 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
return cnt;
}
+static int sched_feat_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, sched_feat_show, NULL);
+}
+
static struct file_operations sched_feat_fops = {
- .open = sched_feat_open,
- .read = sched_feat_read,
- .write = sched_feat_write,
+ .open = sched_feat_open,
+ .write = sched_feat_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
};
static __init int sched_init_debug(void)
@@ -1480,27 +1460,13 @@ static void
update_group_shares_cpu(struct task_group *tg, int cpu,
unsigned long sd_shares, unsigned long sd_rq_weight)
{
- int boost = 0;
unsigned long shares;
unsigned long rq_weight;
if (!tg->se[cpu])
return;
- rq_weight = tg->cfs_rq[cpu]->load.weight;
-
- /*
- * If there are currently no tasks on the cpu pretend there is one of
- * average load so that when a new task gets to run here it will not
- * get delayed by group starvation.
- */
- if (!rq_weight) {
- boost = 1;
- rq_weight = NICE_0_LOAD;
- }
-
- if (unlikely(rq_weight > sd_rq_weight))
- rq_weight = sd_rq_weight;
+ rq_weight = tg->cfs_rq[cpu]->rq_weight;
/*
* \Sum shares * rq_weight
@@ -1508,7 +1474,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
* \Sum rq_weight
*
*/
- shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
+ shares = (sd_shares * rq_weight) / sd_rq_weight;
shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
if (abs(shares - tg->se[cpu]->load.weight) >
@@ -1517,11 +1483,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
unsigned long flags;
spin_lock_irqsave(&rq->lock, flags);
- /*
- * record the actual number of shares, not the boosted amount.
- */
- tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
- tg->cfs_rq[cpu]->rq_weight = rq_weight;
+ tg->cfs_rq[cpu]->shares = shares;
__set_se_shares(tg->se[cpu], shares);
spin_unlock_irqrestore(&rq->lock, flags);
@@ -1535,13 +1497,23 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
*/
static int tg_shares_up(struct task_group *tg, void *data)
{
- unsigned long rq_weight = 0;
+ unsigned long weight, rq_weight = 0;
unsigned long shares = 0;
struct sched_domain *sd = data;
int i;
for_each_cpu_mask(i, sd->span) {
- rq_weight += tg->cfs_rq[i]->load.weight;
+ /*
+ * If there are currently no tasks on the cpu pretend there
+ * is one of average load so that when a new task gets to
+ * run here it will not get delayed by group starvation.
+ */
+ weight = tg->cfs_rq[i]->load.weight;
+ if (!weight)
+ weight = NICE_0_LOAD;
+
+ tg->cfs_rq[i]->rq_weight = weight;
+ rq_weight += weight;
shares += tg->cfs_rq[i]->shares;
}
@@ -1551,9 +1523,6 @@ static int tg_shares_up(struct task_group *tg, void *data)
if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
shares = tg->shares;
- if (!rq_weight)
- rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
-
for_each_cpu_mask(i, sd->span)
update_group_shares_cpu(tg, i, shares, rq_weight);
@@ -1618,6 +1587,39 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd)
#endif
+/*
+ * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+ */
+static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
+ __releases(this_rq->lock)
+ __acquires(busiest->lock)
+ __acquires(this_rq->lock)
+{
+ int ret = 0;
+
+ if (unlikely(!irqs_disabled())) {
+ /* printk() doesn't work good under rq->lock */
+ spin_unlock(&this_rq->lock);
+ BUG_ON(1);
+ }
+ if (unlikely(!spin_trylock(&busiest->lock))) {
+ if (busiest < this_rq) {
+ spin_unlock(&this_rq->lock);
+ spin_lock(&busiest->lock);
+ spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
+ ret = 1;
+ } else
+ spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
+ }
+ return ret;
+}
+
+static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
+ __releases(busiest->lock)
+{
+ spin_unlock(&busiest->lock);
+ lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
+}
#endif
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -2818,40 +2820,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
}
/*
- * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
- */
-static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
- __releases(this_rq->lock)
- __acquires(busiest->lock)
- __acquires(this_rq->lock)
-{
- int ret = 0;
-
- if (unlikely(!irqs_disabled())) {
- /* printk() doesn't work good under rq->lock */
- spin_unlock(&this_rq->lock);
- BUG_ON(1);
- }
- if (unlikely(!spin_trylock(&busiest->lock))) {
- if (busiest < this_rq) {
- spin_unlock(&this_rq->lock);
- spin_lock(&busiest->lock);
- spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
- ret = 1;
- } else
- spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
- }
- return ret;
-}
-
-static void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
- __releases(busiest->lock)
-{
- spin_unlock(&busiest->lock);
- lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
-}
-
-/*
* If dest_cpu is allowed for this process, migrate the task to it.
* This is accomplished by forcing the cpu_allowed mask to only
* allow dest_cpu, which will force the cpu onto dest_cpu. Then
@@ -6132,7 +6100,6 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
/*
* Figure out where task on dead CPU should go, use force if necessary.
- * NOTE: interrupts should be disabled by the caller
*/
static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
{
@@ -6642,28 +6609,6 @@ early_initcall(migration_init);
#ifdef CONFIG_SCHED_DEBUG
-static inline const char *sd_level_to_string(enum sched_domain_level lvl)
-{
- switch (lvl) {
- case SD_LV_NONE:
- return "NONE";
- case SD_LV_SIBLING:
- return "SIBLING";
- case SD_LV_MC:
- return "MC";
- case SD_LV_CPU:
- return "CPU";
- case SD_LV_NODE:
- return "NODE";
- case SD_LV_ALLNODES:
- return "ALLNODES";
- case SD_LV_MAX:
- return "MAX";
-
- }
- return "MAX";
-}
-
static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
cpumask_t *groupmask)
{
@@ -6683,8 +6628,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
return -1;
}
- printk(KERN_CONT "span %s level %s\n",
- str, sd_level_to_string(sd->level));
+ printk(KERN_CONT "span %s level %s\n", str, sd->name);
if (!cpu_isset(cpu, sd->span)) {
printk(KERN_ERR "ERROR: domain->span does not contain "
@@ -7340,13 +7284,21 @@ struct allmasks {
};
#if NR_CPUS > 128
-#define SCHED_CPUMASK_ALLOC 1
-#define SCHED_CPUMASK_FREE(v) kfree(v)
-#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v
+#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v
+static inline void sched_cpumask_alloc(struct allmasks **masks)
+{
+ *masks = kmalloc(sizeof(**masks), GFP_KERNEL);
+}
+static inline void sched_cpumask_free(struct allmasks *masks)
+{
+ kfree(masks);
+}
#else
-#define SCHED_CPUMASK_ALLOC 0
-#define SCHED_CPUMASK_FREE(v)
-#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v
+#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v
+static inline void sched_cpumask_alloc(struct allmasks **masks)
+{ }
+static inline void sched_cpumask_free(struct allmasks *masks)
+{ }
#endif
#define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \
@@ -7422,9 +7374,8 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
return -ENOMEM;
}
-#if SCHED_CPUMASK_ALLOC
/* get space for all scratch cpumask variables */
- allmasks = kmalloc(sizeof(*allmasks), GFP_KERNEL);
+ sched_cpumask_alloc(&allmasks);
if (!allmasks) {
printk(KERN_WARNING "Cannot alloc cpumask array\n");
kfree(rd);
@@ -7433,7 +7384,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
#endif
return -ENOMEM;
}
-#endif
+
tmpmask = (cpumask_t *)allmasks;
@@ -7687,13 +7638,13 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
cpu_attach_domain(sd, rd, i);
}
- SCHED_CPUMASK_FREE((void *)allmasks);
+ sched_cpumask_free(allmasks);
return 0;
#ifdef CONFIG_NUMA
error:
free_sched_groups(cpu_map, tmpmask);
- SCHED_CPUMASK_FREE((void *)allmasks);
+ sched_cpumask_free(allmasks);
kfree(rd);
return -ENOMEM;
#endif
@@ -7757,8 +7708,6 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
cpumask_t tmpmask;
int i;
- unregister_sched_domain_sysctl();
-
for_each_cpu_mask_nr(i, *cpu_map)
cpu_attach_domain(NULL, &def_root_domain, i);
synchronize_sched();
@@ -7836,7 +7785,7 @@ match1:
ndoms_cur = 0;
doms_new = &fallback_doms;
cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
- dattr_new = NULL;
+ WARN_ON_ONCE(dattr_new);
}
/* Build new domains */
@@ -8496,7 +8445,7 @@ static
int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
{
struct cfs_rq *cfs_rq;
- struct sched_entity *se, *parent_se;
+ struct sched_entity *se;
struct rq *rq;
int i;
@@ -8512,18 +8461,17 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
for_each_possible_cpu(i) {
rq = cpu_rq(i);
- cfs_rq = kmalloc_node(sizeof(struct cfs_rq),
- GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+ cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
+ GFP_KERNEL, cpu_to_node(i));
if (!cfs_rq)
goto err;
- se = kmalloc_node(sizeof(struct sched_entity),
- GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+ se = kzalloc_node(sizeof(struct sched_entity),
+ GFP_KERNEL, cpu_to_node(i));
if (!se)
goto err;
- parent_se = parent ? parent->se[i] : NULL;
- init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent_se);
+ init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
}
return 1;
@@ -8584,7 +8532,7 @@ static
int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
{
struct rt_rq *rt_rq;
- struct sched_rt_entity *rt_se, *parent_se;
+ struct sched_rt_entity *rt_se;
struct rq *rq;
int i;
@@ -8601,18 +8549,17 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
for_each_possible_cpu(i) {
rq = cpu_rq(i);
- rt_rq = kmalloc_node(sizeof(struct rt_rq),
- GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+ rt_rq = kzalloc_node(sizeof(struct rt_rq),
+ GFP_KERNEL, cpu_to_node(i));
if (!rt_rq)
goto err;
- rt_se = kmalloc_node(sizeof(struct sched_rt_entity),
- GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+ rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
+ GFP_KERNEL, cpu_to_node(i));
if (!rt_se)
goto err;
- parent_se = parent ? parent->rt_se[i] : NULL;
- init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent_se);
+ init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
}
return 1;
@@ -9255,11 +9202,12 @@ struct cgroup_subsys cpu_cgroup_subsys = {
* (balbir@in.ibm.com).
*/
-/* track cpu usage of a group of tasks */
+/* track cpu usage of a group of tasks and its child groups */
struct cpuacct {
struct cgroup_subsys_state css;
/* cpuusage holds pointer to a u64-type object on every cpu */
u64 *cpuusage;
+ struct cpuacct *parent;
};
struct cgroup_subsys cpuacct_subsys;
@@ -9293,6 +9241,9 @@ static struct cgroup_subsys_state *cpuacct_create(
return ERR_PTR(-ENOMEM);
}
+ if (cgrp->parent)
+ ca->parent = cgroup_ca(cgrp->parent);
+
return &ca->css;
}
@@ -9372,14 +9323,16 @@ static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
struct cpuacct *ca;
+ int cpu;
if (!cpuacct_subsys.active)
return;
+ cpu = task_cpu(tsk);
ca = task_ca(tsk);
- if (ca) {
- u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
+ for (; ca; ca = ca->parent) {
+ u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
*cpuusage += cputime;
}
}
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 26ed8e3d1c15..baf2f17af462 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -53,6 +53,40 @@ static unsigned long nsec_low(unsigned long long nsec)
#define SPLIT_NS(x) nsec_high(x), nsec_low(x)
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static void print_cfs_group_stats(struct seq_file *m, int cpu,
+ struct task_group *tg)
+{
+ struct sched_entity *se = tg->se[cpu];
+ if (!se)
+ return;
+
+#define P(F) \
+ SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F)
+#define PN(F) \
+ SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
+
+ PN(se->exec_start);
+ PN(se->vruntime);
+ PN(se->sum_exec_runtime);
+#ifdef CONFIG_SCHEDSTATS
+ PN(se->wait_start);
+ PN(se->sleep_start);
+ PN(se->block_start);
+ PN(se->sleep_max);
+ PN(se->block_max);
+ PN(se->exec_max);
+ PN(se->slice_max);
+ PN(se->wait_max);
+ PN(se->wait_sum);
+ P(se->wait_count);
+#endif
+ P(se->load.weight);
+#undef PN
+#undef P
+}
+#endif
+
static void
print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
{
@@ -121,14 +155,9 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
char path[128] = "";
- struct cgroup *cgroup = NULL;
struct task_group *tg = cfs_rq->tg;
- if (tg)
- cgroup = tg->css.cgroup;
-
- if (cgroup)
- cgroup_path(cgroup, path, sizeof(path));
+ cgroup_path(tg->css.cgroup, path, sizeof(path));
SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
#else
@@ -168,6 +197,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
#ifdef CONFIG_SMP
SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares);
#endif
+ print_cfs_group_stats(m, cpu, cfs_rq->tg);
#endif
}
@@ -175,14 +205,9 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
{
#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED)
char path[128] = "";
- struct cgroup *cgroup = NULL;
struct task_group *tg = rt_rq->tg;
- if (tg)
- cgroup = tg->css.cgroup;
-
- if (cgroup)
- cgroup_path(cgroup, path, sizeof(path));
+ cgroup_path(tg->css.cgroup, path, sizeof(path));
SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path);
#else
@@ -272,7 +297,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
u64 now = ktime_to_ns(ktime_get());
int cpu;
- SEQ_printf(m, "Sched Debug Version: v0.07, %s %.*s\n",
+ SEQ_printf(m, "Sched Debug Version: v0.08, %s %.*s\n",
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index d9ba9d5f99d6..587a16e2a8f5 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -537,13 +537,13 @@ static void update_curr_rt(struct rq *rq)
for_each_sched_rt_entity(rt_se) {
rt_rq = rt_rq_of_se(rt_se);
- spin_lock(&rt_rq->rt_runtime_lock);
if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
+ spin_lock(&rt_rq->rt_runtime_lock);
rt_rq->rt_time += delta_exec;
if (sched_rt_runtime_exceeded(rt_rq))
resched_task(curr);
+ spin_unlock(&rt_rq->rt_runtime_lock);
}
- spin_unlock(&rt_rq->rt_runtime_lock);
}
}
@@ -909,9 +909,6 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
/* Only try algorithms three times */
#define RT_MAX_TRIES 3
-static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
-static void double_unlock_balance(struct rq *this_rq, struct rq *busiest);
-
static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)