summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2013-09-19 11:19:01 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2013-09-19 11:19:01 +1000
commit72d421bfbeef39471d2bfda727567a5a84c71633 (patch)
treee2a8c3ca4a04cbc72f90132488187b4e9544ba96
parentffa0c722edfe3b85a03e5a861a5861c5c6e493f9 (diff)
parentd7b0a1b0b9b1cec1a841eaccfefe6532837609ab (diff)
Merge remote-tracking branch 'idle/next'
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h2
-rw-r--r--drivers/idle/intel_idle.c38
-rw-r--r--tools/power/x86/turbostat/turbostat.c155
3 files changed, 153 insertions, 42 deletions
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index bb0465090ae5..940ed3fd889a 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -147,6 +147,8 @@
#define MSR_PP1_ENERGY_STATUS 0x00000641
#define MSR_PP1_POLICY 0x00000642
+#define MSR_CORE_C1_RES 0x00000660
+
#define MSR_AMD64_MC0_MASK 0xc0010044
#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x))
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index fa6964d8681a..d8c99e67d973 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -1,7 +1,7 @@
/*
* intel_idle.c - native hardware idle loop for modern Intel processors
*
- * Copyright (c) 2010, Intel Corporation.
+ * Copyright (c) 2013, Intel Corporation.
* Len Brown <len.brown@intel.com>
*
* This program is free software; you can redistribute it and/or modify it
@@ -329,6 +329,36 @@ static struct cpuidle_state atom_cstates[CPUIDLE_STATE_MAX] = {
{
.enter = NULL }
};
+static struct cpuidle_state avn_cstates[CPUIDLE_STATE_MAX] = {
+ {
+ .name = "C1-AVN",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 1,
+ .target_residency = 1,
+ .enter = &intel_idle },
+ {
+ .name = "C1E-AVN",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 5,
+ .target_residency = 10,
+ .enter = &intel_idle },
+ {
+ .name = "C6NS-AVN", /* No Cache Shrink */
+ .desc = "MWAIT 0x51",
+ .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 15,
+ .target_residency = 45,
+ .enter = &intel_idle },
+ {
+ .name = "C6FS-AVN", /* Full Cache shrink */
+ .desc = "MWAIT 0x52",
+ .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 150, /* fake penalty added due to cold cache */
+ .target_residency = 100000, /* fake penalty added due to cold cache */
+ .enter = &intel_idle },
+};
/**
* intel_idle
@@ -462,6 +492,11 @@ static const struct idle_cpu idle_cpu_hsw = {
.disable_promotion_to_c1e = true,
};
+static const struct idle_cpu idle_cpu_avn = {
+ .state_table = avn_cstates,
+ .disable_promotion_to_c1e = true,
+};
+
#define ICPU(model, cpu) \
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu }
@@ -483,6 +518,7 @@ static const struct x86_cpu_id intel_idle_ids[] = {
ICPU(0x3f, idle_cpu_hsw),
ICPU(0x45, idle_cpu_hsw),
ICPU(0x46, idle_cpu_hsw),
+ ICPU(0x4D, idle_cpu_avn),
{}
};
MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids);
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index fe702076ca46..2bb8bf506681 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2,7 +2,7 @@
* turbostat -- show CPU frequency and C-state residency
* on modern Intel turbo-capable processors.
*
- * Copyright (c) 2012 Intel Corporation.
+ * Copyright (c) 2013 Intel Corporation.
* Len Brown <len.brown@intel.com>
*
* This program is free software; you can redistribute it and/or modify it
@@ -47,6 +47,8 @@ unsigned int skip_c1;
unsigned int do_nhm_cstates;
unsigned int do_snb_cstates;
unsigned int do_c8_c9_c10;
+unsigned int do_slm_cstates;
+unsigned int use_c1_residency_msr;
unsigned int has_aperf;
unsigned int has_epb;
unsigned int units = 1000000000; /* Ghz etc */
@@ -81,6 +83,8 @@ double rapl_joule_counter_range;
#define RAPL_DRAM (1 << 3)
#define RAPL_PKG_PERF_STATUS (1 << 4)
#define RAPL_DRAM_PERF_STATUS (1 << 5)
+#define RAPL_PKG_POWER_INFO (1 << 6)
+#define RAPL_CORE_POLICY (1 << 7)
#define TJMAX_DEFAULT 100
#define MAX(a, b) ((a) > (b) ? (a) : (b))
@@ -96,7 +100,7 @@ struct thread_data {
unsigned long long tsc;
unsigned long long aperf;
unsigned long long mperf;
- unsigned long long c1; /* derived */
+ unsigned long long c1;
unsigned long long extra_msr64;
unsigned long long extra_delta64;
unsigned long long extra_msr32;
@@ -266,7 +270,7 @@ void print_header(void)
outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64);
if (do_nhm_cstates)
outp += sprintf(outp, " %%c1");
- if (do_nhm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates)
outp += sprintf(outp, " %%c3");
if (do_nhm_cstates)
outp += sprintf(outp, " %%c6");
@@ -280,9 +284,9 @@ void print_header(void)
if (do_snb_cstates)
outp += sprintf(outp, " %%pc2");
- if (do_nhm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates)
outp += sprintf(outp, " %%pc3");
- if (do_nhm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates)
outp += sprintf(outp, " %%pc6");
if (do_snb_cstates)
outp += sprintf(outp, " %%pc7");
@@ -480,7 +484,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
goto done;
- if (do_nhm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates)
outp += sprintf(outp, " %6.2f", 100.0 * c->c3/t->tsc);
if (do_nhm_cstates)
outp += sprintf(outp, " %6.2f", 100.0 * c->c6/t->tsc);
@@ -499,9 +503,9 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (do_snb_cstates)
outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc);
- if (do_nhm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates)
outp += sprintf(outp, " %6.2f", 100.0 * p->pc3/t->tsc);
- if (do_nhm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates)
outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc);
if (do_snb_cstates)
outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc);
@@ -648,17 +652,24 @@ delta_thread(struct thread_data *new, struct thread_data *old,
}
- /*
- * As counter collection is not atomic,
- * it is possible for mperf's non-halted cycles + idle states
- * to exceed TSC's all cycles: show c1 = 0% in that case.
- */
- if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
- old->c1 = 0;
- else {
- /* normal case, derive c1 */
- old->c1 = old->tsc - old->mperf - core_delta->c3
+ if (use_c1_residency_msr) {
+ /*
+ * Some models have a dedicated C1 residency MSR,
+ * which should be more accurate than the derivation below.
+ */
+ } else {
+ /*
+ * As counter collection is not atomic,
+ * it is possible for mperf's non-halted cycles + idle states
+ * to exceed TSC's all cycles: show c1 = 0% in that case.
+ */
+ if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
+ old->c1 = 0;
+ else {
+ /* normal case, derive c1 */
+ old->c1 = old->tsc - old->mperf - core_delta->c3
- core_delta->c6 - core_delta->c7;
+ }
}
if (old->mperf == 0) {
@@ -872,13 +883,21 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64))
return -5;
+ if (use_c1_residency_msr) {
+ if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
+ return -6;
+ }
+
/* collect core counters only for 1st thread in core */
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
return 0;
- if (do_nhm_cstates) {
+ if (do_nhm_cstates && !do_slm_cstates) {
if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
return -6;
+ }
+
+ if (do_nhm_cstates) {
if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
return -7;
}
@@ -898,7 +917,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
return 0;
- if (do_nhm_cstates) {
+ if (do_nhm_cstates && !do_slm_cstates) {
if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
return -9;
if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
@@ -1046,25 +1065,28 @@ print_nhm_turbo_ratio_limits:
switch(msr & 0x7) {
case 0:
- fprintf(stderr, "pc0");
+ fprintf(stderr, do_slm_cstates ? "no pkg states" : "pc0");
break;
case 1:
- fprintf(stderr, do_snb_cstates ? "pc2" : "pc0");
+ fprintf(stderr, do_slm_cstates ? "no pkg states" : do_snb_cstates ? "pc2" : "pc0");
break;
case 2:
- fprintf(stderr, do_snb_cstates ? "pc6-noret" : "pc3");
+ fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc6-noret" : "pc3");
break;
case 3:
- fprintf(stderr, "pc6");
+ fprintf(stderr, do_slm_cstates ? "invalid" : "pc6");
break;
case 4:
- fprintf(stderr, "pc7");
+ fprintf(stderr, do_slm_cstates ? "pc4" : "pc7");
break;
case 5:
- fprintf(stderr, do_snb_cstates ? "pc7s" : "invalid");
+ fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc7s" : "invalid");
+ break;
+ case 6:
+ fprintf(stderr, do_slm_cstates ? "pc6" : "invalid");
break;
case 7:
- fprintf(stderr, "unlimited");
+ fprintf(stderr, do_slm_cstates ? "pc7" : "unlimited");
break;
default:
fprintf(stderr, "invalid");
@@ -1460,6 +1482,7 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
case 0x3F: /* HSW */
case 0x45: /* HSW */
case 0x46: /* HSW */
+ case 0x4D: /* AVN */
return 1;
case 0x2E: /* Nehalem-EX Xeon - Beckton */
case 0x2F: /* Westmere-EX Xeon - Eagleton */
@@ -1555,11 +1578,14 @@ void rapl_probe(unsigned int family, unsigned int model)
case 0x3F: /* HSW */
case 0x45: /* HSW */
case 0x46: /* HSW */
- do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX;
+ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
break;
case 0x2D:
case 0x3E:
- do_rapl = RAPL_PKG | RAPL_CORES | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS;
+ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
+ break;
+ case 0x4D: /* AVN */
+ do_rapl = RAPL_PKG | RAPL_CORES ;
break;
default:
return;
@@ -1573,17 +1599,18 @@ void rapl_probe(unsigned int family, unsigned int model)
rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF));
- /* get TDP to determine energy counter range */
- if (get_msr(0, MSR_PKG_POWER_INFO, &msr))
- return;
+ if (do_rapl & RAPL_PKG_POWER_INFO) {
+ /* get TDP to determine energy counter range */
+ if (get_msr(0, MSR_PKG_POWER_INFO, &msr))
+ return;
- tdp = ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
+ tdp = ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
- rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
-
- if (verbose)
- fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range\n", rapl_joule_counter_range);
+ rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
+ if (verbose)
+ fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range\n", rapl_joule_counter_range);
+ }
return;
}
@@ -1702,7 +1729,8 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
"(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units);
}
- if (do_rapl & RAPL_PKG) {
+ if (do_rapl & RAPL_PKG_POWER_INFO) {
+
if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
return -5;
@@ -1714,6 +1742,9 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
+ }
+ if (do_rapl & RAPL_PKG) {
+
if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
return -9;
@@ -1749,12 +1780,16 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
print_power_limit_msr(cpu, msr, "DRAM Limit");
}
- if (do_rapl & RAPL_CORES) {
+ if (do_rapl & RAPL_CORE_POLICY) {
if (verbose) {
if (get_msr(cpu, MSR_PP0_POLICY, &msr))
return -7;
fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
+ }
+ }
+ if (do_rapl & RAPL_CORES) {
+ if (verbose) {
if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
return -9;
@@ -1813,10 +1848,47 @@ int has_c8_c9_c10(unsigned int family, unsigned int model)
}
+int is_slm(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+ switch (model) {
+ case 0x4D: /* AVN */
+ return 1;
+ }
+ return 0;
+}
+
+#define SLM_BCLK_FREQS 5
+double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
+
+double slm_bclk(void)
+{
+ unsigned long long msr = 3;
+ unsigned int i;
+ double freq;
+
+ if (get_msr(0, MSR_FSB_FREQ, &msr))
+ fprintf(stderr, "SLM BCLK: unknown\n");
+
+ i = msr & 0xf;
+ if (i >= SLM_BCLK_FREQS) {
+ fprintf(stderr, "SLM BCLK[%d] invalid\n", i);
+ msr = 3;
+ }
+ freq = slm_freq_table[i];
+
+ fprintf(stderr, "SLM BCLK: %.1f Mhz\n", freq);
+
+ return freq;
+}
+
double discover_bclk(unsigned int family, unsigned int model)
{
if (is_snb(family, model))
return 100.00;
+ else if (is_slm(family, model))
+ return slm_bclk();
else
return 133.33;
}
@@ -1873,7 +1945,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
cpu, msr, target_c_local);
- if (target_c_local < 85 || target_c_local > 120)
+ if (target_c_local < 85 || target_c_local > 127)
goto guess;
tcc_activation_temp = target_c_local;
@@ -1970,6 +2042,7 @@ void check_cpuid()
do_smi = do_nhm_cstates;
do_snb_cstates = is_snb(family, model);
do_c8_c9_c10 = has_c8_c9_c10(family, model);
+ do_slm_cstates = is_slm(family, model);
bclk = discover_bclk(family, model);
do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model);
@@ -2331,7 +2404,7 @@ int main(int argc, char **argv)
cmdline(argc, argv);
if (verbose)
- fprintf(stderr, "turbostat v3.4 April 17, 2013"
+ fprintf(stderr, "turbostat v3.5 April 26, 2013"
" - Len Brown <lenb@kernel.org>\n");
turbostat_init();