diff options
author | Ingo Molnar <mingo@elte.hu> | 2011-02-25 16:09:57 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-02-25 16:09:57 +0100 |
commit | 3afb7d0ea558187da4ae8b2def2d4f2c6e796cb1 (patch) | |
tree | 96428676757d7a7c662c46c8045303a2b56381bf /kernel | |
parent | 19334cf0a35d84a8997bfa6e09a6623303289c6e (diff) | |
parent | 7e9498705e810404ecf29bb2d6fa632b9484c609 (diff) |
Merge branch 'sched/core' into auto-latest
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched.c | 2 | ||||
-rw-r--r-- | kernel/sched_autogroup.c | 15 | ||||
-rw-r--r-- | kernel/sched_autogroup.h | 5 | ||||
-rw-r--r-- | kernel/sched_fair.c | 63 | ||||
-rw-r--r-- | kernel/sysctl.c | 2 |
5 files changed, 51 insertions, 36 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 850bdb53589a..36e936da9969 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3666,6 +3666,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset, __account_system_time(p, cputime, cputime_scaled, target_cputime64); } +#ifndef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_IRQ_TIME_ACCOUNTING /* * Account a tick to a process and cpustat @@ -3732,6 +3733,7 @@ static void irqtime_account_idle_ticks(int ticks) {} static void irqtime_account_process_tick(struct task_struct *p, int user_tick, struct rq *rq) {} #endif +#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ /* * Account for involuntary wait time. diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c index 9fb656283157..5946ac515602 100644 --- a/kernel/sched_autogroup.c +++ b/kernel/sched_autogroup.c @@ -12,7 +12,6 @@ static atomic_t autogroup_seq_nr; static void __init autogroup_init(struct task_struct *init_task) { autogroup_default.tg = &root_task_group; - root_task_group.autogroup = &autogroup_default; kref_init(&autogroup_default.kref); init_rwsem(&autogroup_default.lock); init_task->signal->autogroup = &autogroup_default; @@ -130,7 +129,7 @@ task_wants_autogroup(struct task_struct *p, struct task_group *tg) static inline bool task_group_is_autogroup(struct task_group *tg) { - return tg != &root_task_group && tg->autogroup; + return !!tg->autogroup; } static inline struct task_group * @@ -161,11 +160,15 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag) p->signal->autogroup = autogroup_kref_get(ag); + if (!ACCESS_ONCE(sysctl_sched_autogroup_enabled)) + goto out; + t = p; do { sched_move_task(t); } while_each_thread(p, t); +out: unlock_task_sighand(p, &flags); autogroup_kref_put(prev); } @@ -247,10 +250,14 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m) { struct autogroup *ag = autogroup_task_get(p); + if (!task_group_is_autogroup(ag->tg)) + goto out; + down_read(&ag->lock); seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice); up_read(&ag->lock); +out: autogroup_kref_put(ag); } #endif /* CONFIG_PROC_FS */ @@ -258,9 +265,7 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m) #ifdef CONFIG_SCHED_DEBUG static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) { - int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled); - - if (!enabled || !tg->autogroup) + if (!task_group_is_autogroup(tg)) return 0; return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h index 7b859ffe5dad..05577055cfca 100644 --- a/kernel/sched_autogroup.h +++ b/kernel/sched_autogroup.h @@ -1,6 +1,11 @@ #ifdef CONFIG_SCHED_AUTOGROUP struct autogroup { + /* + * reference doesn't mean how many thread attach to this + * autogroup now. It just stands for the number of task + * could use this autogroup. + */ struct kref kref; struct task_group *tg; struct rw_semaphore lock; diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index d384e739ea95..3a88dee165c0 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -2743,7 +2743,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, /* * Consider the group unbalanced when the imbalance is larger - * than the average weight of two tasks. + * than the average weight of a task. * * APZ: with cgroup the avg task weight can vary wildly and * might not be a suitable number - should we keep a @@ -2753,7 +2753,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, if (sgs->sum_nr_running) avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; - if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task && max_nr_running > 1) + if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1) sgs->group_imb = 1; sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); @@ -3113,19 +3113,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, */ update_sd_lb_stats(sd, this_cpu, idle, cpus, balance, &sds); - /* Cases where imbalance does not exist from POV of this_cpu */ - /* 1) this_cpu is not the appropriate cpu to perform load balancing - * at this level. - * 2) There is no busy sibling group to pull from. - * 3) This group is the busiest group. - * 4) This group is more busy than the avg busieness at this - * sched_domain. - * 5) The imbalance is within the specified limit. - * - * Note: when doing newidle balance, if the local group has excess - * capacity (i.e. nr_running < group_capacity) and the busiest group - * does not have any capacity, we force a load balance to pull tasks - * to the local group. In this case, we skip past checks 3, 4 and 5. + /* + * this_cpu is not the appropriate cpu to perform load balancing at + * this level. */ if (!(*balance)) goto ret; @@ -3134,41 +3124,55 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, check_asym_packing(sd, &sds, this_cpu, imbalance)) return sds.busiest; + /* There is no busy sibling group to pull tasks from */ if (!sds.busiest || sds.busiest_nr_running == 0) goto out_balanced; - /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */ + /* + * If the busiest group is imbalanced the below checks don't + * work because they assumes all things are equal, which typically + * isn't true due to cpus_allowed constraints and the like. + */ + if (sds.group_imb) + goto force_balance; + + /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */ if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity && !sds.busiest_has_capacity) goto force_balance; + /* + * If the local group is more busy than the selected busiest group + * don't try and pull any tasks. + */ if (sds.this_load >= sds.max_load) goto out_balanced; + /* + * Don't pull any tasks if this group is already above the domain + * average load. + */ sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr; - if (sds.this_load >= sds.avg_load) goto out_balanced; - /* - * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative. - * And to check for busy balance use !idle_cpu instead of - * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE - * even when they are idle. - */ - if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) { - if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) - goto out_balanced; - } else { + if (idle == CPU_IDLE) { /* * This cpu is idle. If the busiest group load doesn't * have more tasks than the number of available cpu's and * there is no imbalance between this and busiest group * wrt to idle cpu's, it is balanced. */ - if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) && + if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) && sds.busiest_nr_running <= sds.busiest_group_weight) goto out_balanced; + } else { + /* + * In the CPU_NEWLY_IDLE, CPU_NOT_IDLE cases, use + * imbalance_pct to be conservative. + */ + if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) + goto out_balanced; } force_balance: @@ -3862,8 +3866,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) if (load_balance(cpu, rq, sd, idle, &balance)) { /* * We've pulled tasks over so either we're no - * longer idle, or one of our SMT siblings is - * not idle. + * longer idle. */ idle = CPU_NOT_IDLE; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 40eb4bf48ca4..f76aeae389ce 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -367,7 +367,7 @@ static struct ctl_table kern_table[] = { .data = &sysctl_sched_autogroup_enabled, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &zero, .extra2 = &one, }, |