summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-02-25 16:09:57 +0100
committerIngo Molnar <mingo@elte.hu>2011-02-25 16:09:57 +0100
commit3afb7d0ea558187da4ae8b2def2d4f2c6e796cb1 (patch)
tree96428676757d7a7c662c46c8045303a2b56381bf /kernel
parent19334cf0a35d84a8997bfa6e09a6623303289c6e (diff)
parent7e9498705e810404ecf29bb2d6fa632b9484c609 (diff)
Merge branch 'sched/core' into auto-latest
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c2
-rw-r--r--kernel/sched_autogroup.c15
-rw-r--r--kernel/sched_autogroup.h5
-rw-r--r--kernel/sched_fair.c63
-rw-r--r--kernel/sysctl.c2
5 files changed, 51 insertions, 36 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 850bdb53589a..36e936da9969 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3666,6 +3666,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
__account_system_time(p, cputime, cputime_scaled, target_cputime64);
}
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
/*
* Account a tick to a process and cpustat
@@ -3732,6 +3733,7 @@ static void irqtime_account_idle_ticks(int ticks) {}
static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
struct rq *rq) {}
#endif
+#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
/*
* Account for involuntary wait time.
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c
index 9fb656283157..5946ac515602 100644
--- a/kernel/sched_autogroup.c
+++ b/kernel/sched_autogroup.c
@@ -12,7 +12,6 @@ static atomic_t autogroup_seq_nr;
static void __init autogroup_init(struct task_struct *init_task)
{
autogroup_default.tg = &root_task_group;
- root_task_group.autogroup = &autogroup_default;
kref_init(&autogroup_default.kref);
init_rwsem(&autogroup_default.lock);
init_task->signal->autogroup = &autogroup_default;
@@ -130,7 +129,7 @@ task_wants_autogroup(struct task_struct *p, struct task_group *tg)
static inline bool task_group_is_autogroup(struct task_group *tg)
{
- return tg != &root_task_group && tg->autogroup;
+ return !!tg->autogroup;
}
static inline struct task_group *
@@ -161,11 +160,15 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag)
p->signal->autogroup = autogroup_kref_get(ag);
+ if (!ACCESS_ONCE(sysctl_sched_autogroup_enabled))
+ goto out;
+
t = p;
do {
sched_move_task(t);
} while_each_thread(p, t);
+out:
unlock_task_sighand(p, &flags);
autogroup_kref_put(prev);
}
@@ -247,10 +250,14 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
{
struct autogroup *ag = autogroup_task_get(p);
+ if (!task_group_is_autogroup(ag->tg))
+ goto out;
+
down_read(&ag->lock);
seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice);
up_read(&ag->lock);
+out:
autogroup_kref_put(ag);
}
#endif /* CONFIG_PROC_FS */
@@ -258,9 +265,7 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
#ifdef CONFIG_SCHED_DEBUG
static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
{
- int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
-
- if (!enabled || !tg->autogroup)
+ if (!task_group_is_autogroup(tg))
return 0;
return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h
index 7b859ffe5dad..05577055cfca 100644
--- a/kernel/sched_autogroup.h
+++ b/kernel/sched_autogroup.h
@@ -1,6 +1,11 @@
#ifdef CONFIG_SCHED_AUTOGROUP
struct autogroup {
+ /*
+ * reference doesn't mean how many thread attach to this
+ * autogroup now. It just stands for the number of task
+ * could use this autogroup.
+ */
struct kref kref;
struct task_group *tg;
struct rw_semaphore lock;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index d384e739ea95..3a88dee165c0 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2743,7 +2743,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
/*
* Consider the group unbalanced when the imbalance is larger
- * than the average weight of two tasks.
+ * than the average weight of a task.
*
* APZ: with cgroup the avg task weight can vary wildly and
* might not be a suitable number - should we keep a
@@ -2753,7 +2753,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
if (sgs->sum_nr_running)
avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
- if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task && max_nr_running > 1)
+ if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1)
sgs->group_imb = 1;
sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
@@ -3113,19 +3113,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
*/
update_sd_lb_stats(sd, this_cpu, idle, cpus, balance, &sds);
- /* Cases where imbalance does not exist from POV of this_cpu */
- /* 1) this_cpu is not the appropriate cpu to perform load balancing
- * at this level.
- * 2) There is no busy sibling group to pull from.
- * 3) This group is the busiest group.
- * 4) This group is more busy than the avg busieness at this
- * sched_domain.
- * 5) The imbalance is within the specified limit.
- *
- * Note: when doing newidle balance, if the local group has excess
- * capacity (i.e. nr_running < group_capacity) and the busiest group
- * does not have any capacity, we force a load balance to pull tasks
- * to the local group. In this case, we skip past checks 3, 4 and 5.
+ /*
+ * this_cpu is not the appropriate cpu to perform load balancing at
+ * this level.
*/
if (!(*balance))
goto ret;
@@ -3134,41 +3124,55 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
check_asym_packing(sd, &sds, this_cpu, imbalance))
return sds.busiest;
+ /* There is no busy sibling group to pull tasks from */
if (!sds.busiest || sds.busiest_nr_running == 0)
goto out_balanced;
- /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
+ /*
+ * If the busiest group is imbalanced the below checks don't
+ * work because they assumes all things are equal, which typically
+ * isn't true due to cpus_allowed constraints and the like.
+ */
+ if (sds.group_imb)
+ goto force_balance;
+
+ /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity &&
!sds.busiest_has_capacity)
goto force_balance;
+ /*
+ * If the local group is more busy than the selected busiest group
+ * don't try and pull any tasks.
+ */
if (sds.this_load >= sds.max_load)
goto out_balanced;
+ /*
+ * Don't pull any tasks if this group is already above the domain
+ * average load.
+ */
sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr;
-
if (sds.this_load >= sds.avg_load)
goto out_balanced;
- /*
- * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative.
- * And to check for busy balance use !idle_cpu instead of
- * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE
- * even when they are idle.
- */
- if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) {
- if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
- goto out_balanced;
- } else {
+ if (idle == CPU_IDLE) {
/*
* This cpu is idle. If the busiest group load doesn't
* have more tasks than the number of available cpu's and
* there is no imbalance between this and busiest group
* wrt to idle cpu's, it is balanced.
*/
- if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) &&
+ if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) &&
sds.busiest_nr_running <= sds.busiest_group_weight)
goto out_balanced;
+ } else {
+ /*
+ * In the CPU_NEWLY_IDLE, CPU_NOT_IDLE cases, use
+ * imbalance_pct to be conservative.
+ */
+ if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
+ goto out_balanced;
}
force_balance:
@@ -3862,8 +3866,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
if (load_balance(cpu, rq, sd, idle, &balance)) {
/*
* We've pulled tasks over so either we're no
- * longer idle, or one of our SMT siblings is
- * not idle.
+ * longer idle.
*/
idle = CPU_NOT_IDLE;
}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 40eb4bf48ca4..f76aeae389ce 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -367,7 +367,7 @@ static struct ctl_table kern_table[] = {
.data = &sysctl_sched_autogroup_enabled,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},