From ed2f752e0e0a21d941ca0ee539ef3d4cd576bc5e Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Fri, 20 Oct 2023 18:19:20 +0200 Subject: x86/percpu: Introduce const-qualified const_pcpu_hot to micro-optimize code generation Some variables in pcpu_hot, currently current_task and top_of_stack are actually per-thread variables implemented as per-CPU variables and thus stable for the duration of the respective task. There is already an attempt to eliminate redundant reads from these variables using this_cpu_read_stable() asm macro, which hides the dependency on the read memory address. However, the compiler has limited ability to eliminate asm common subexpressions, so this approach results in a limited success. The solution is to allow more aggressive elimination by aliasing pcpu_hot into a const-qualified const_pcpu_hot, and to read stable per-CPU variables from this constant copy. The current per-CPU infrastructure does not support reads from const-qualified variables. However, when the compiler supports segment qualifiers, it is possible to declare the const-aliased variable in the relevant named address space. The compiler considers access to the variable, declared in this way, as a read from a constant location, and will optimize reads from the variable accordingly. By implementing constant-qualified const_pcpu_hot, the compiler can eliminate redundant reads from the constant variables, reducing the number of loads from current_task from 3766 to 3217 on a test build, a -14.6% reduction. The reduction of loads translates to the following code savings: text data bss dec hex filename 25,477,353 4389456 808452 30675261 1d4113d vmlinux-old.o 25,476,074 4389440 808452 30673966 1d40c2e vmlinux-new.o representing a code size reduction of -1279 bytes. [ mingo: Updated the changelog, EXPORT(const_pcpu_hot). ] Co-developed-by: Nadav Amit Signed-off-by: Nadav Amit Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231020162004.135244-1-ubizjak@gmail.com --- arch/x86/kernel/cpu/common.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 382d4e6b848d..4cc0ab0dfbb5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2051,6 +2051,7 @@ DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = { .top_of_stack = TOP_OF_INIT_STACK, }; EXPORT_PER_CPU_SYMBOL(pcpu_hot); +EXPORT_PER_CPU_SYMBOL(const_pcpu_hot); #ifdef CONFIG_X86_64 DEFINE_PER_CPU_FIRST(struct fixed_percpu_data, -- cgit v1.2.3 From acaa4b5c4c854b5009f4d4a5395b2609ad0f4937 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 25 Jan 2024 22:11:02 -0600 Subject: x86/speculation: Do not enable Automatic IBRS if SEV-SNP is enabled Without SEV-SNP, Automatic IBRS protects only the kernel. But when SEV-SNP is enabled, the Automatic IBRS protection umbrella widens to all host-side code, including userspace. This protection comes at a cost: reduced userspace indirect branch performance. To avoid this performance loss, don't use Automatic IBRS on SEV-SNP hosts and all back to retpolines instead. [ mdr: squash in changes from review discussion. ] Signed-off-by: Kim Phillips Signed-off-by: Michael Roth Signed-off-by: Borislav Petkov (AMD) Acked-by: Dave Hansen Link: https://lore.kernel.org/r/20240126041126.1927228-3-michael.roth@amd.com --- arch/x86/kernel/cpu/common.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0b97bcde70c6..9e35e276c55a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1355,8 +1355,13 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) /* * AMD's AutoIBRS is equivalent to Intel's eIBRS - use the Intel feature * flag and protect from vendor-specific bugs via the whitelist. + * + * Don't use AutoIBRS when SNP is enabled because it degrades host + * userspace indirect branch performance. */ - if ((ia32_cap & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) { + if ((ia32_cap & ARCH_CAP_IBRS_ALL) || + (cpu_has(c, X86_FEATURE_AUTOIBRS) && + !cpu_feature_enabled(X86_FEATURE_SEV_SNP))) { setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && !(ia32_cap & ARCH_CAP_PBRSB_NO)) -- cgit v1.2.3 From ff45746fbf005f96e42bea466698e3fdbf926013 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Tue, 5 Dec 2023 02:50:00 -0800 Subject: x86/cpu: Add X86_CR4_FRED macro Add X86_CR4_FRED macro for the FRED bit in %cr4. This bit must not be changed after initialization, so add it to the pinned CR4 bits. Signed-off-by: H. Peter Anvin (Intel) Signed-off-by: Xin Li Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Tested-by: Shan Kang Link: https://lore.kernel.org/r/20231205105030.8698-12-xin3.li@intel.com --- arch/x86/include/uapi/asm/processor-flags.h | 7 +++++++ arch/x86/kernel/cpu/common.c | 5 ++--- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h index d898432947ff..f1a4adc78272 100644 --- a/arch/x86/include/uapi/asm/processor-flags.h +++ b/arch/x86/include/uapi/asm/processor-flags.h @@ -139,6 +139,13 @@ #define X86_CR4_LAM_SUP_BIT 28 /* LAM for supervisor pointers */ #define X86_CR4_LAM_SUP _BITUL(X86_CR4_LAM_SUP_BIT) +#ifdef __x86_64__ +#define X86_CR4_FRED_BIT 32 /* enable FRED kernel entry */ +#define X86_CR4_FRED _BITUL(X86_CR4_FRED_BIT) +#else +#define X86_CR4_FRED (0) +#endif + /* * x86-64 Task Priority Register, CR8 */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0b97bcde70c6..c3a175770df0 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -382,9 +382,8 @@ out: } /* These bits should not change their value after CPU init is finished. */ -static const unsigned long cr4_pinned_mask = - X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | - X86_CR4_FSGSBASE | X86_CR4_CET; +static const unsigned long cr4_pinned_mask = X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | + X86_CR4_FSGSBASE | X86_CR4_CET | X86_CR4_FRED; static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); static unsigned long cr4_pinned_bits __ro_after_init; -- cgit v1.2.3 From 530dce278afffd8084af9a23493532912cdbe98a Mon Sep 17 00:00:00 2001 From: Xin Li Date: Tue, 5 Dec 2023 02:50:22 -0800 Subject: x86/syscall: Split IDT syscall setup code into idt_syscall_init() Because FRED uses the ring 3 FRED entrypoint for SYSCALL and SYSENTER and ERETU is the only legit instruction to return to ring 3, there is NO need to setup SYSCALL and SYSENTER MSRs for FRED, except the IA32_STAR MSR. Split IDT syscall setup code into idt_syscall_init() to make it easy to skip syscall setup code when FRED is enabled. Suggested-by: Thomas Gleixner Signed-off-by: Xin Li Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Tested-by: Shan Kang Link: https://lore.kernel.org/r/20231205105030.8698-34-xin3.li@intel.com --- arch/x86/kernel/cpu/common.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c3a175770df0..4f5e4aa35e5a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2066,10 +2066,8 @@ static void wrmsrl_cstar(unsigned long val) wrmsrl(MSR_CSTAR, val); } -/* May not be marked __init: used by software suspend */ -void syscall_init(void) +static inline void idt_syscall_init(void) { - wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); if (ia32_enabled()) { @@ -2103,6 +2101,15 @@ void syscall_init(void) X86_EFLAGS_AC|X86_EFLAGS_ID); } +/* May not be marked __init: used by software suspend */ +void syscall_init(void) +{ + /* The default user and kernel segments */ + wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); + + idt_syscall_init(); +} + #else /* CONFIG_X86_64 */ #ifdef CONFIG_STACKPROTECTOR -- cgit v1.2.3 From 208d8c79fd0f155bce1b23d8d78926653f7603b7 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Tue, 5 Dec 2023 02:50:24 -0800 Subject: x86/fred: Invoke FRED initialization code to enable FRED Let cpu_init_exception_handling() call cpu_init_fred_exceptions() to initialize FRED. However if FRED is unavailable or disabled, it falls back to set up TSS IST and initialize IDT. Co-developed-by: Xin Li Signed-off-by: H. Peter Anvin (Intel) Signed-off-by: Xin Li Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Tested-by: Shan Kang Link: https://lore.kernel.org/r/20231205105030.8698-36-xin3.li@intel.com --- arch/x86/kernel/cpu/common.c | 22 +++++++++++++++++----- arch/x86/kernel/irqinit.c | 7 ++++++- arch/x86/kernel/traps.c | 5 ++++- 3 files changed, 27 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4f5e4aa35e5a..cf82e3181f7a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include #include @@ -2107,7 +2108,15 @@ void syscall_init(void) /* The default user and kernel segments */ wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); - idt_syscall_init(); + /* + * Except the IA32_STAR MSR, there is NO need to setup SYSCALL and + * SYSENTER MSRs for FRED, because FRED uses the ring 3 FRED + * entrypoint for SYSCALL and SYSENTER, and ERETU is the only legit + * instruction to return to ring 3 (both sysexit and sysret cause + * #UD when FRED is enabled). + */ + if (!cpu_feature_enabled(X86_FEATURE_FRED)) + idt_syscall_init(); } #else /* CONFIG_X86_64 */ @@ -2213,8 +2222,9 @@ void cpu_init_exception_handling(void) /* paranoid_entry() gets the CPU number from the GDT */ setup_getcpu(cpu); - /* IST vectors need TSS to be set up. */ - tss_setup_ist(tss); + /* For IDT mode, IST vectors need to be set in TSS. */ + if (!cpu_feature_enabled(X86_FEATURE_FRED)) + tss_setup_ist(tss); tss_setup_io_bitmap(tss); set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); @@ -2223,8 +2233,10 @@ void cpu_init_exception_handling(void) /* GHCB needs to be setup to handle #VC. */ setup_ghcb(); - /* Finally load the IDT */ - load_current_idt(); + if (cpu_feature_enabled(X86_FEATURE_FRED)) + cpu_init_fred_exceptions(); + else + load_current_idt(); } /* diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index c683666876f1..f79c5edc0b89 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -28,6 +28,7 @@ #include #include #include +#include #include /* @@ -96,7 +97,11 @@ void __init native_init_IRQ(void) /* Execute any quirks before the call gates are initialised: */ x86_init.irqs.pre_vector_init(); - idt_setup_apic_and_irq_gates(); + if (cpu_feature_enabled(X86_FEATURE_FRED)) + fred_complete_exception_setup(); + else + idt_setup_apic_and_irq_gates(); + lapic_assign_system_vectors(); if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) { diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1b19a170f5e3..6cb31df3d5ff 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -1438,7 +1438,10 @@ void __init trap_init(void) /* Initialize TSS before setting up traps so ISTs work */ cpu_init_exception_handling(); + /* Setup traps as cpu_init() might #GP */ - idt_setup_traps(); + if (!cpu_feature_enabled(X86_FEATURE_FRED)) + idt_setup_traps(); + cpu_init(); } -- cgit v1.2.3 From ebdb20361059b3c4fd7b23cfa10c28e798b7a3d2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:01 +0100 Subject: x86/cpu: Provide cpu_init/parse_topology() Topology evaluation is a complete disaster and impenetrable mess. It's scattered all over the place with some vendor implementations doing early evaluation and some not. The most horrific part is the permanent overwriting of smt_max_siblings and __max_die_per_package, instead of establishing them once on the boot CPU and validating the result on the APs. The goals are: - One topology evaluation entry point - Proper sharing of pointlessly duplicated code - Proper structuring of the evaluation logic and preferences. - Evaluating important system wide information only once on the boot CPU - Making the 0xb/0x1f leaf parsing less convoluted and actually fixing the short comings of leaf 0x1f evaluation. Start to consolidate the topology evaluation code by providing the entry points for the early boot CPU evaluation and for the final parsing on the boot CPU and the APs. Move the trivial pieces into that new code: - The initialization of cpuinfo_x86::topo - The evaluation of CPUID leaf 1, which presets topo::initial_apicid - topo_apicid is set to topo::initial_apicid when invoked from early boot. When invoked for the final evaluation on the boot CPU it reads the actual APIC ID, which makes apic_get_initial_apicid() obsolete once everything is converted over. Provide a temporary helper function topo_converted() which shields off the not yet converted CPU vendors from invoking code which would break them. This shielding covers all vendor CPUs which support SMP, but not the historical pure UP ones as they only need the topology info init and eventually the initial APIC initialization. Provide two new members in cpuinfo_x86::topo to store the maximum number of SMT siblings and the number of dies per package and add them to the debugfs readout. These two members will be used to populate this information on the boot CPU and to validate the APs against it. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Peter Zijlstra (Intel) Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240212153624.581436579@linutronix.de --- arch/x86/include/asm/topology.h | 19 ++++ arch/x86/kernel/cpu/Makefile | 3 +- arch/x86/kernel/cpu/common.c | 24 ++--- arch/x86/kernel/cpu/cpu.h | 6 ++ arch/x86/kernel/cpu/debugfs.c | 38 +++++++ arch/x86/kernel/cpu/topology.h | 36 +++++++ arch/x86/kernel/cpu/topology_common.c | 188 ++++++++++++++++++++++++++++++++++ 7 files changed, 296 insertions(+), 18 deletions(-) create mode 100644 arch/x86/kernel/cpu/topology.h create mode 100644 arch/x86/kernel/cpu/topology_common.c (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 5f87f6b9b09e..fa5d803ed7e2 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -102,6 +102,25 @@ static inline void setup_node_to_cpumask_map(void) { } #include +/* Topology information */ +enum x86_topology_domains { + TOPO_SMT_DOMAIN, + TOPO_CORE_DOMAIN, + TOPO_MODULE_DOMAIN, + TOPO_TILE_DOMAIN, + TOPO_DIE_DOMAIN, + TOPO_DIEGRP_DOMAIN, + TOPO_PKG_DOMAIN, + TOPO_MAX_DOMAIN, +}; + +struct x86_topology_system { + unsigned int dom_shifts[TOPO_MAX_DOMAIN]; + unsigned int dom_size[TOPO_MAX_DOMAIN]; +}; + +extern struct x86_topology_system x86_topo_system; + extern const struct cpumask *cpu_coregroup_mask(int cpu); extern const struct cpumask *cpu_clustergroup_mask(int cpu); diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 93eabf544031..b1c655e8689d 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -17,7 +17,8 @@ KMSAN_SANITIZE_common.o := n # As above, instrumenting secondary CPU boot code causes boot hangs. KCSAN_SANITIZE_common.o := n -obj-y := cacheinfo.o scattered.o topology.o +obj-y := cacheinfo.o scattered.o +obj-y += topology_common.o topology.o obj-y += common.o obj-y += rdrand.o obj-y += match.o diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0b97bcde70c6..d674e7bdd6e4 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1591,6 +1591,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_cap(X86_FEATURE_CPUID); cpu_parse_early_param(); + cpu_init_topology(c); + if (this_cpu->c_early_init) this_cpu->c_early_init(c); @@ -1601,6 +1603,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_bsp_init(c); } else { setup_clear_cpu_cap(X86_FEATURE_CPUID); + cpu_init_topology(c); } get_cpu_address_sizes(c); @@ -1748,18 +1751,6 @@ static void generic_identify(struct cpuinfo_x86 *c) get_cpu_address_sizes(c); - if (c->cpuid_level >= 0x00000001) { - c->topo.initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; -#ifdef CONFIG_X86_32 -# ifdef CONFIG_SMP - c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0); -# else - c->topo.apicid = c->topo.initial_apicid; -# endif -#endif - c->topo.pkg_id = c->topo.initial_apicid; - } - get_model_name(c); /* Default name */ /* @@ -1818,9 +1809,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; c->x86_coreid_bits = 0; - c->topo.cu_id = 0xff; - c->topo.llc_id = BAD_APICID; - c->topo.l2c_id = BAD_APICID; #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; c->x86_phys_bits = 36; @@ -1839,6 +1827,8 @@ static void identify_cpu(struct cpuinfo_x86 *c) generic_identify(c); + cpu_parse_topology(c); + if (this_cpu->c_identify) this_cpu->c_identify(c); @@ -1846,10 +1836,10 @@ static void identify_cpu(struct cpuinfo_x86 *c) apply_forced_caps(c); #ifdef CONFIG_X86_64 - c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0); + if (!topo_is_converted(c)) + c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0); #endif - /* * Set default APIC and TSC_DEADLINE MSR fencing flag. AMD and * Hygon will clear it in ->c_init() below. diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 885281ae79a5..2a446ecb2cc0 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -2,6 +2,11 @@ #ifndef ARCH_X86_CPU_H #define ARCH_X86_CPU_H +#include +#include + +#include "topology.h" + /* attempt to consolidate cpu attributes */ struct cpu_dev { const char *c_vendor; @@ -96,4 +101,5 @@ static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) mode == SPECTRE_V2_EIBRS_RETPOLINE || mode == SPECTRE_V2_EIBRS_LFENCE; } + #endif /* ARCH_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c index 0c179d684b3b..be8ca21ecd6a 100644 --- a/arch/x86/kernel/cpu/debugfs.c +++ b/arch/x86/kernel/cpu/debugfs.c @@ -5,6 +5,8 @@ #include #include +#include "cpu.h" + static int cpu_debug_show(struct seq_file *m, void *p) { unsigned long cpu = (unsigned long)m->private; @@ -42,12 +44,48 @@ static const struct file_operations dfs_cpu_ops = { .release = single_release, }; +static int dom_debug_show(struct seq_file *m, void *p) +{ + static const char *domain_names[TOPO_MAX_DOMAIN] = { + [TOPO_SMT_DOMAIN] = "Thread", + [TOPO_CORE_DOMAIN] = "Core", + [TOPO_MODULE_DOMAIN] = "Module", + [TOPO_TILE_DOMAIN] = "Tile", + [TOPO_DIE_DOMAIN] = "Die", + [TOPO_DIEGRP_DOMAIN] = "DieGrp", + [TOPO_PKG_DOMAIN] = "Package", + }; + unsigned int dom, nthreads = 1; + + for (dom = 0; dom < TOPO_MAX_DOMAIN; dom++) { + nthreads *= x86_topo_system.dom_size[dom]; + seq_printf(m, "domain: %-10s shift: %u dom_size: %5u max_threads: %5u\n", + domain_names[dom], x86_topo_system.dom_shifts[dom], + x86_topo_system.dom_size[dom], nthreads); + } + return 0; +} + +static int dom_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, dom_debug_show, inode->i_private); +} + +static const struct file_operations dfs_dom_ops = { + .open = dom_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static __init int cpu_init_debugfs(void) { struct dentry *dir, *base = debugfs_create_dir("topo", arch_debugfs_dir); unsigned long id; char name[24]; + debugfs_create_file("domains", 0444, base, NULL, &dfs_dom_ops); + dir = debugfs_create_dir("cpus", base); for_each_possible_cpu(id) { sprintf(name, "%lu", id); diff --git a/arch/x86/kernel/cpu/topology.h b/arch/x86/kernel/cpu/topology.h new file mode 100644 index 000000000000..99c94c519b5d --- /dev/null +++ b/arch/x86/kernel/cpu/topology.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_X86_TOPOLOGY_H +#define ARCH_X86_TOPOLOGY_H + +struct topo_scan { + struct cpuinfo_x86 *c; + unsigned int dom_shifts[TOPO_MAX_DOMAIN]; + unsigned int dom_ncpus[TOPO_MAX_DOMAIN]; +}; + +bool topo_is_converted(struct cpuinfo_x86 *c); +void cpu_init_topology(struct cpuinfo_x86 *c); +void cpu_parse_topology(struct cpuinfo_x86 *c); +void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, + unsigned int shift, unsigned int ncpus); + +static inline u32 topo_shift_apicid(u32 apicid, enum x86_topology_domains dom) +{ + if (dom == TOPO_SMT_DOMAIN) + return apicid; + return apicid >> x86_topo_system.dom_shifts[dom - 1]; +} + +static inline u32 topo_relative_domain_id(u32 apicid, enum x86_topology_domains dom) +{ + if (dom != TOPO_SMT_DOMAIN) + apicid >>= x86_topo_system.dom_shifts[dom - 1]; + return apicid & (x86_topo_system.dom_size[dom] - 1); +} + +static inline u32 topo_domain_mask(enum x86_topology_domains dom) +{ + return (1U << x86_topo_system.dom_shifts[dom]) - 1; +} + +#endif /* ARCH_X86_TOPOLOGY_H */ diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c new file mode 100644 index 000000000000..873d7c3fe7ac --- /dev/null +++ b/arch/x86/kernel/cpu/topology_common.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +#include + +#include +#include +#include + +#include "cpu.h" + +struct x86_topology_system x86_topo_system __ro_after_init; + +void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, + unsigned int shift, unsigned int ncpus) +{ + tscan->dom_shifts[dom] = shift; + tscan->dom_ncpus[dom] = ncpus; + + /* Propagate to the upper levels */ + for (dom++; dom < TOPO_MAX_DOMAIN; dom++) { + tscan->dom_shifts[dom] = tscan->dom_shifts[dom - 1]; + tscan->dom_ncpus[dom] = tscan->dom_ncpus[dom - 1]; + } +} + +bool topo_is_converted(struct cpuinfo_x86 *c) +{ + /* Temporary until everything is converted over. */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + case X86_VENDOR_CENTAUR: + case X86_VENDOR_INTEL: + case X86_VENDOR_HYGON: + case X86_VENDOR_ZHAOXIN: + return false; + default: + /* Let all UP systems use the below */ + return true; + } +} + +static bool fake_topology(struct topo_scan *tscan) +{ + /* + * Preset the CORE level shift for CPUID less systems and XEN_PV, + * which has useless CPUID information. + */ + topology_set_dom(tscan, TOPO_SMT_DOMAIN, 0, 1); + topology_set_dom(tscan, TOPO_CORE_DOMAIN, 1, 1); + + return tscan->c->cpuid_level < 1 || xen_pv_domain(); +} + +static void parse_topology(struct topo_scan *tscan, bool early) +{ + const struct cpuinfo_topology topo_defaults = { + .cu_id = 0xff, + .llc_id = BAD_APICID, + .l2c_id = BAD_APICID, + }; + struct cpuinfo_x86 *c = tscan->c; + struct { + u32 unused0 : 16, + nproc : 8, + apicid : 8; + } ebx; + + c->topo = topo_defaults; + + if (fake_topology(tscan)) + return; + + /* Preset Initial APIC ID from CPUID leaf 1 */ + cpuid_leaf_reg(1, CPUID_EBX, &ebx); + c->topo.initial_apicid = ebx.apicid; + + /* + * The initial invocation from early_identify_cpu() happens before + * the APIC is mapped or X2APIC enabled. For establishing the + * topology, that's not required. Use the initial APIC ID. + */ + if (early) + c->topo.apicid = c->topo.initial_apicid; + else + c->topo.apicid = read_apic_id(); + + /* The above is sufficient for UP */ + if (!IS_ENABLED(CONFIG_SMP)) + return; +} + +static void topo_set_ids(struct topo_scan *tscan) +{ + struct cpuinfo_x86 *c = tscan->c; + u32 apicid = c->topo.apicid; + + c->topo.pkg_id = topo_shift_apicid(apicid, TOPO_PKG_DOMAIN); + c->topo.die_id = topo_shift_apicid(apicid, TOPO_DIE_DOMAIN); + + /* Package relative core ID */ + c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >> + x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN]; +} + +static void topo_set_max_cores(struct topo_scan *tscan) +{ + /* + * Bug compatible for now. This is broken on hybrid systems: + * 8 cores SMT + 8 cores w/o SMT + * tscan.dom_ncpus[TOPO_DIEGRP_DOMAIN] = 24; 24 / 2 = 12 !! + * + * Cannot be fixed without further topology enumeration changes. + */ + tscan->c->x86_max_cores = tscan->dom_ncpus[TOPO_DIEGRP_DOMAIN] >> + x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN]; +} + +void cpu_parse_topology(struct cpuinfo_x86 *c) +{ + unsigned int dom, cpu = smp_processor_id(); + struct topo_scan tscan = { .c = c, }; + + parse_topology(&tscan, false); + + if (!topo_is_converted(c)) + return; + + for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++) { + if (tscan.dom_shifts[dom] == x86_topo_system.dom_shifts[dom]) + continue; + pr_err(FW_BUG "CPU%d: Topology domain %u shift %u != %u\n", cpu, dom, + tscan.dom_shifts[dom], x86_topo_system.dom_shifts[dom]); + } + + /* Bug compatible with the existing parsers */ + if (tscan.dom_ncpus[TOPO_SMT_DOMAIN] > smp_num_siblings) { + if (system_state == SYSTEM_BOOTING) { + pr_warn_once("CPU%d: SMT detected and enabled late\n", cpu); + smp_num_siblings = tscan.dom_ncpus[TOPO_SMT_DOMAIN]; + } else { + pr_warn_once("CPU%d: SMT detected after init. Too late!\n", cpu); + } + } + + topo_set_ids(&tscan); + topo_set_max_cores(&tscan); +} + +void __init cpu_init_topology(struct cpuinfo_x86 *c) +{ + struct topo_scan tscan = { .c = c, }; + unsigned int dom, sft; + + parse_topology(&tscan, true); + + if (!topo_is_converted(c)) + return; + + /* Copy the shift values and calculate the unit sizes. */ + memcpy(x86_topo_system.dom_shifts, tscan.dom_shifts, sizeof(x86_topo_system.dom_shifts)); + + dom = TOPO_SMT_DOMAIN; + x86_topo_system.dom_size[dom] = 1U << x86_topo_system.dom_shifts[dom]; + + for (dom++; dom < TOPO_MAX_DOMAIN; dom++) { + sft = x86_topo_system.dom_shifts[dom] - x86_topo_system.dom_shifts[dom - 1]; + x86_topo_system.dom_size[dom] = 1U << sft; + } + + topo_set_ids(&tscan); + topo_set_max_cores(&tscan); + + /* + * Bug compatible with the existing code. If the boot CPU does not + * have SMT this ends up with one sibling. This needs way deeper + * changes further down the road to get it right during early boot. + */ + smp_num_siblings = tscan.dom_ncpus[TOPO_SMT_DOMAIN]; + + /* + * Neither it's clear whether there are as many dies as the APIC + * space indicating die level is. But assume that the actual number + * of CPUs gives a proper indication for now to stay bug compatible. + */ + __max_die_per_package = tscan.dom_ncpus[TOPO_DIE_DOMAIN] / + tscan.dom_ncpus[TOPO_DIE_DOMAIN - 1]; +} -- cgit v1.2.3 From bda74aae20086c044b31ca0dcdab7deaaf23d0e8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:03 +0100 Subject: x86/cpu: Add legacy topology parser The legacy topology detection via CPUID leaf 4, which provides the number of cores in the package and CPUID leaf 1 which provides the number of logical CPUs in case that FEATURE_HT is enabled and the CMP_LEGACY feature is not set, is shared for Intel, Centaur and Zhaoxin CPUs. Lift the code from common.c without the early detection hack and provide it as common fallback mechanism. Will be utilized in later changes. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153624.644448852@linutronix.de --- arch/x86/kernel/cpu/common.c | 3 +++ arch/x86/kernel/cpu/topology.h | 3 +++ arch/x86/kernel/cpu/topology_common.c | 46 ++++++++++++++++++++++++++++++++++- 3 files changed, 51 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d674e7bdd6e4..004ae9cfcb63 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -892,6 +892,9 @@ void detect_ht(struct cpuinfo_x86 *c) #ifdef CONFIG_SMP int index_msb, core_bits; + if (topo_is_converted(c)) + return; + if (detect_ht_early(c) < 0) return; diff --git a/arch/x86/kernel/cpu/topology.h b/arch/x86/kernel/cpu/topology.h index 99c94c519b5d..934a100b9fe6 100644 --- a/arch/x86/kernel/cpu/topology.h +++ b/arch/x86/kernel/cpu/topology.h @@ -6,6 +6,9 @@ struct topo_scan { struct cpuinfo_x86 *c; unsigned int dom_shifts[TOPO_MAX_DOMAIN]; unsigned int dom_ncpus[TOPO_MAX_DOMAIN]; + + /* Legacy CPUID[1]:EBX[23:16] number of logical processors */ + unsigned int ebx1_nproc_shift; }; bool topo_is_converted(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index 873d7c3fe7ac..b0ff1fc84a13 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -24,6 +24,48 @@ void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, } } +static unsigned int __maybe_unused parse_num_cores_legacy(struct cpuinfo_x86 *c) +{ + struct { + u32 cache_type : 5, + unused : 21, + ncores : 6; + } eax; + + if (c->cpuid_level < 4) + return 1; + + cpuid_subleaf_reg(4, 0, CPUID_EAX, &eax); + if (!eax.cache_type) + return 1; + + return eax.ncores + 1; +} + +static void __maybe_unused parse_legacy(struct topo_scan *tscan) +{ + unsigned int cores, core_shift, smt_shift = 0; + struct cpuinfo_x86 *c = tscan->c; + + cores = parse_num_cores_legacy(c); + core_shift = get_count_order(cores); + + if (cpu_has(c, X86_FEATURE_HT)) { + if (!WARN_ON_ONCE(tscan->ebx1_nproc_shift < core_shift)) + smt_shift = tscan->ebx1_nproc_shift - core_shift; + /* + * The parser expects leaf 0xb/0x1f format, which means + * the number of logical processors at core level is + * counting threads. + */ + core_shift += smt_shift; + cores <<= smt_shift; + } + + topology_set_dom(tscan, TOPO_SMT_DOMAIN, smt_shift, 1U << smt_shift); + topology_set_dom(tscan, TOPO_CORE_DOMAIN, core_shift, cores); +} + bool topo_is_converted(struct cpuinfo_x86 *c) { /* Temporary until everything is converted over. */ @@ -47,7 +89,7 @@ static bool fake_topology(struct topo_scan *tscan) * which has useless CPUID information. */ topology_set_dom(tscan, TOPO_SMT_DOMAIN, 0, 1); - topology_set_dom(tscan, TOPO_CORE_DOMAIN, 1, 1); + topology_set_dom(tscan, TOPO_CORE_DOMAIN, 0, 1); return tscan->c->cpuid_level < 1 || xen_pv_domain(); } @@ -88,6 +130,8 @@ static void parse_topology(struct topo_scan *tscan, bool early) /* The above is sufficient for UP */ if (!IS_ENABLED(CONFIG_SMP)) return; + + tscan->ebx1_nproc_shift = get_count_order(ebx.nproc); } static void topo_set_ids(struct topo_scan *tscan) -- cgit v1.2.3 From 92853a7774f942e3692dbd83bace82333a2b47bd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:05 +0100 Subject: x86/cpu: Move __max_die_per_package to common.c In preparation of a complete replacement for the topology leaf 0xb/0x1f evaluation, move __max_die_per_package into the common code. Will be removed once everything is converted over. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153624.768188958@linutronix.de --- arch/x86/kernel/cpu/common.c | 3 +++ arch/x86/kernel/cpu/topology.c | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 004ae9cfcb63..5d7911f6f3a9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -76,6 +76,9 @@ u32 elf_hwcap2 __read_mostly; int smp_num_siblings = 1; EXPORT_SYMBOL(smp_num_siblings); +unsigned int __max_die_per_package __read_mostly = 1; +EXPORT_SYMBOL(__max_die_per_package); + static struct ppin_info { int feature; int msr_ppin_ctl; diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index dc136703566f..208e17a77226 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -25,9 +25,6 @@ #define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) #define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff) -unsigned int __max_die_per_package __read_mostly = 1; -EXPORT_SYMBOL(__max_die_per_package); - #ifdef CONFIG_SMP /* * Check if given CPUID extended topology "leaf" is implemented -- cgit v1.2.3 From 22d63660c35eb751c63a709bf901a64c1726592a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:08 +0100 Subject: x86/cpu: Use common topology code for Intel Intel CPUs use either topology leaf 0xb/0x1f evaluation or the legacy SMP/HT evaluation based on CPUID leaf 0x1/0x4. Move it over to the consolidated topology code and remove the random topology hacks which are sprinkled into the Intel and the common code. No functional change intended. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153624.893644349@linutronix.de --- arch/x86/kernel/cpu/common.c | 65 ----------------------------------- arch/x86/kernel/cpu/cpu.h | 4 --- arch/x86/kernel/cpu/intel.c | 25 -------------- arch/x86/kernel/cpu/topology.c | 22 ------------ arch/x86/kernel/cpu/topology_common.c | 5 ++- 5 files changed, 4 insertions(+), 117 deletions(-) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5d7911f6f3a9..f83dd86802ac 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -793,19 +793,6 @@ static void get_model_name(struct cpuinfo_x86 *c) *(s + 1) = '\0'; } -void detect_num_cpu_cores(struct cpuinfo_x86 *c) -{ - unsigned int eax, ebx, ecx, edx; - - c->x86_max_cores = 1; - if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4) - return; - - cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); - if (eax & 0x1f) - c->x86_max_cores = (eax >> 26) + 1; -} - void cpu_detect_cache_sizes(struct cpuinfo_x86 *c) { unsigned int n, dummy, ebx, ecx, edx, l2size; @@ -867,54 +854,6 @@ static void cpu_detect_tlb(struct cpuinfo_x86 *c) tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]); } -int detect_ht_early(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - u32 eax, ebx, ecx, edx; - - if (!cpu_has(c, X86_FEATURE_HT)) - return -1; - - if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) - return -1; - - if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) - return -1; - - cpuid(1, &eax, &ebx, &ecx, &edx); - - smp_num_siblings = (ebx & 0xff0000) >> 16; - if (smp_num_siblings == 1) - pr_info_once("CPU0: Hyper-Threading is disabled\n"); -#endif - return 0; -} - -void detect_ht(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - int index_msb, core_bits; - - if (topo_is_converted(c)) - return; - - if (detect_ht_early(c) < 0) - return; - - index_msb = get_count_order(smp_num_siblings); - c->topo.pkg_id = apic->phys_pkg_id(c->topo.initial_apicid, index_msb); - - smp_num_siblings = smp_num_siblings / c->x86_max_cores; - - index_msb = get_count_order(smp_num_siblings); - - core_bits = get_count_order(c->x86_max_cores); - - c->topo.core_id = apic->phys_pkg_id(c->topo.initial_apicid, index_msb) & - ((1 << core_bits) - 1); -#endif -} - static void get_cpu_vendor(struct cpuinfo_x86 *c) { char *v = c->x86_vendor_id; @@ -1899,10 +1838,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86, c->x86_model); } -#ifdef CONFIG_X86_64 - detect_ht(c); -#endif - x86_init_rdrand(c); setup_pku(c); setup_cet(c); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 2a446ecb2cc0..5a790f12dc28 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -76,11 +76,7 @@ extern void init_intel_cacheinfo(struct cpuinfo_x86 *c); extern void init_amd_cacheinfo(struct cpuinfo_x86 *c); extern void init_hygon_cacheinfo(struct cpuinfo_x86 *c); -extern void detect_num_cpu_cores(struct cpuinfo_x86 *c); -extern int detect_extended_topology_early(struct cpuinfo_x86 *c); extern int detect_extended_topology(struct cpuinfo_x86 *c); -extern int detect_ht_early(struct cpuinfo_x86 *c); -extern void detect_ht(struct cpuinfo_x86 *c); extern void check_null_seg_clears_base(struct cpuinfo_x86 *c); void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index a927a8fc9624..1bb37a69d837 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -315,13 +315,6 @@ static void early_init_intel(struct cpuinfo_x86 *c) } check_memory_type_self_snoop_errata(c); - - /* - * Get the number of SMT siblings early from the extended topology - * leaf, if available. Otherwise try the legacy SMT detection. - */ - if (detect_extended_topology_early(c) < 0) - detect_ht_early(c); } static void bsp_init_intel(struct cpuinfo_x86 *c) @@ -603,24 +596,6 @@ static void init_intel(struct cpuinfo_x86 *c) intel_workarounds(c); - /* - * Detect the extended topology information if available. This - * will reinitialise the initial_apicid which will be used - * in init_intel_cacheinfo() - */ - detect_extended_topology(c); - - if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { - /* - * let's use the legacy cpuid vector 0x1 and 0x4 for topology - * detection. - */ - detect_num_cpu_cores(c); -#ifdef CONFIG_X86_32 - detect_ht(c); -#endif - } - init_intel_cacheinfo(c); if (c->cpuid_level > 9) { diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 208e17a77226..2b68f26a99a4 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -59,28 +59,6 @@ static int detect_extended_topology_leaf(struct cpuinfo_x86 *c) } #endif -int detect_extended_topology_early(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned int eax, ebx, ecx, edx; - int leaf; - - leaf = detect_extended_topology_leaf(c); - if (leaf < 0) - return -1; - - set_cpu_cap(c, X86_FEATURE_XTOPOLOGY); - - cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx); - /* - * initial apic id, which also represents 32-bit extended x2apic id. - */ - c->topo.initial_apicid = edx; - smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx)); -#endif - return 0; -} - /* * Check for extended topology enumeration cpuid leaf, and if it * exists, use it for populating initial_apicid and cpu topology diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index bcaaeecaf1a6..ef99499c9bbe 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -71,7 +71,6 @@ bool topo_is_converted(struct cpuinfo_x86 *c) /* Temporary until everything is converted over. */ switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: - case X86_VENDOR_INTEL: case X86_VENDOR_HYGON: return false; default: @@ -136,6 +135,10 @@ static void parse_topology(struct topo_scan *tscan, bool early) case X86_VENDOR_ZHAOXIN: parse_legacy(tscan); break; + case X86_VENDOR_INTEL: + if (!IS_ENABLED(CONFIG_CPU_SUP_INTEL) || !cpu_parse_topology_ext(tscan)) + parse_legacy(tscan); + break; } } -- cgit v1.2.3 From 3279081dd0cb6bc13ffd5ee0e5cb11cfeae2c625 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:15 +0100 Subject: x86/cpu: Use common topology code for HYGON Switch it over to use the consolidated topology evaluation and remove the temporary safe guards which are not longer needed. No functional change intended. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.207750409@linutronix.de --- arch/x86/kernel/cpu/common.c | 5 -- arch/x86/kernel/cpu/cpu.h | 1 - arch/x86/kernel/cpu/hygon.c | 129 ---------------------------------- arch/x86/kernel/cpu/topology.h | 1 - arch/x86/kernel/cpu/topology_common.c | 22 ++---- 5 files changed, 4 insertions(+), 154 deletions(-) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f83dd86802ac..d72787d1b8cb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1780,11 +1780,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) /* Clear/Set all flags overridden by options, after probe */ apply_forced_caps(c); -#ifdef CONFIG_X86_64 - if (!topo_is_converted(c)) - c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0); -#endif - /* * Set default APIC and TSC_DEADLINE MSR fencing flag. AMD and * Hygon will clear it in ->c_init() below. diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 83f3163bec26..ea9e07d57c8d 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -76,7 +76,6 @@ extern void init_intel_cacheinfo(struct cpuinfo_x86 *c); extern void init_amd_cacheinfo(struct cpuinfo_x86 *c); extern void init_hygon_cacheinfo(struct cpuinfo_x86 *c); -extern int detect_extended_topology(struct cpuinfo_x86 *c); extern void check_null_seg_clears_base(struct cpuinfo_x86 *c); void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id); diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index f0cd95502faa..c5191b06f9f2 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -18,14 +18,6 @@ #include "cpu.h" -#define APICID_SOCKET_ID_BIT 6 - -/* - * nodes_per_socket: Stores the number of nodes per socket. - * Refer to CPUID Fn8000_001E_ECX Node Identifiers[10:8] - */ -static u32 nodes_per_socket = 1; - #ifdef CONFIG_NUMA /* * To workaround broken NUMA config. Read the comment in @@ -49,80 +41,6 @@ static int nearby_node(int apicid) } #endif -static void hygon_get_topology_early(struct cpuinfo_x86 *c) -{ - if (cpu_has(c, X86_FEATURE_TOPOEXT)) - smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1; -} - -/* - * Fixup core topology information for - * (1) Hygon multi-node processors - * Assumption: Number of cores in each internal node is the same. - * (2) Hygon processors supporting compute units - */ -static void hygon_get_topology(struct cpuinfo_x86 *c) -{ - /* get information required for multi-node processors */ - if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { - int err; - u32 eax, ebx, ecx, edx; - - cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - - c->topo.die_id = ecx & 0xff; - - c->topo.core_id = ebx & 0xff; - - if (smp_num_siblings > 1) - c->x86_max_cores /= smp_num_siblings; - - /* - * In case leaf B is available, use it to derive - * topology information. - */ - err = detect_extended_topology(c); - if (!err) - c->x86_coreid_bits = get_count_order(c->x86_max_cores); - - /* - * Socket ID is ApicId[6] for the processors with model <= 0x3 - * when running on host. - */ - if (!boot_cpu_has(X86_FEATURE_HYPERVISOR) && c->x86_model <= 0x3) - c->topo.pkg_id = c->topo.apicid >> APICID_SOCKET_ID_BIT; - - cacheinfo_hygon_init_llc_id(c); - } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { - u64 value; - - rdmsrl(MSR_FAM10H_NODE_ID, value); - c->topo.die_id = value & 7; - c->topo.llc_id = c->topo.die_id; - } else - return; - - if (nodes_per_socket > 1) - set_cpu_cap(c, X86_FEATURE_AMD_DCM); -} - -/* - * On Hygon setup the lower bits of the APIC id distinguish the cores. - * Assumes number of cores is a power of two. - */ -static void hygon_detect_cmp(struct cpuinfo_x86 *c) -{ - unsigned int bits; - - bits = c->x86_coreid_bits; - /* Low order bits define the core id (index of core in socket) */ - c->topo.core_id = c->topo.initial_apicid & ((1 << bits)-1); - /* Convert the initial APIC ID into the socket ID */ - c->topo.pkg_id = c->topo.initial_apicid >> bits; - /* Use package ID also for last level cache */ - c->topo.llc_id = c->topo.die_id = c->topo.pkg_id; -} - static void srat_detect_node(struct cpuinfo_x86 *c) { #ifdef CONFIG_NUMA @@ -173,32 +91,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } -static void early_init_hygon_mc(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned int bits, ecx; - - /* Multi core CPU? */ - if (c->extended_cpuid_level < 0x80000008) - return; - - ecx = cpuid_ecx(0x80000008); - - c->x86_max_cores = (ecx & 0xff) + 1; - - /* CPU telling us the core id bits shift? */ - bits = (ecx >> 12) & 0xF; - - /* Otherwise recompute */ - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - - c->x86_coreid_bits = bits; -#endif -} - static void bsp_init_hygon(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { @@ -212,18 +104,6 @@ static void bsp_init_hygon(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_MWAITX)) use_mwaitx_delay(); - if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { - u32 ecx; - - ecx = cpuid_ecx(0x8000001e); - __max_die_per_package = nodes_per_socket = ((ecx >> 8) & 7) + 1; - } else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) { - u64 value; - - rdmsrl(MSR_FAM10H_NODE_ID, value); - __max_die_per_package = nodes_per_socket = ((value >> 3) & 7) + 1; - } - if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) && !boot_cpu_has(X86_FEATURE_VIRT_SSBD)) { /* @@ -242,8 +122,6 @@ static void early_init_hygon(struct cpuinfo_x86 *c) { u32 dummy; - early_init_hygon_mc(c); - set_cpu_cap(c, X86_FEATURE_K8); rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); @@ -284,8 +162,6 @@ static void early_init_hygon(struct cpuinfo_x86 *c) * we can set it unconditionally. */ set_cpu_cap(c, X86_FEATURE_VMMCALL); - - hygon_get_topology_early(c); } static void init_hygon(struct cpuinfo_x86 *c) @@ -302,9 +178,6 @@ static void init_hygon(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_REP_GOOD); - /* get apicid instead of initial apic id from cpuid */ - c->topo.apicid = read_apic_id(); - /* * XXX someone from Hygon needs to confirm this DTRT * @@ -316,8 +189,6 @@ static void init_hygon(struct cpuinfo_x86 *c) cpu_detect_cache_sizes(c); - hygon_detect_cmp(c); - hygon_get_topology(c); srat_detect_node(c); init_hygon_cacheinfo(c); diff --git a/arch/x86/kernel/cpu/topology.h b/arch/x86/kernel/cpu/topology.h index 7eead546c20e..2a3c838b6044 100644 --- a/arch/x86/kernel/cpu/topology.h +++ b/arch/x86/kernel/cpu/topology.h @@ -15,7 +15,6 @@ struct topo_scan { u16 amd_node_id; }; -bool topo_is_converted(struct cpuinfo_x86 *c); void cpu_init_topology(struct cpuinfo_x86 *c); void cpu_parse_topology(struct cpuinfo_x86 *c); void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index 3c69229ba154..ab944d6f973f 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -68,18 +68,6 @@ static void parse_legacy(struct topo_scan *tscan) topology_set_dom(tscan, TOPO_CORE_DOMAIN, core_shift, cores); } -bool topo_is_converted(struct cpuinfo_x86 *c) -{ - /* Temporary until everything is converted over. */ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_HYGON: - return false; - default: - /* Let all UP systems use the below */ - return true; - } -} - static bool fake_topology(struct topo_scan *tscan) { /* @@ -144,6 +132,10 @@ static void parse_topology(struct topo_scan *tscan, bool early) if (!IS_ENABLED(CONFIG_CPU_SUP_INTEL) || !cpu_parse_topology_ext(tscan)) parse_legacy(tscan); break; + case X86_VENDOR_HYGON: + if (IS_ENABLED(CONFIG_CPU_SUP_HYGON)) + cpu_parse_topology_amd(tscan); + break; } } @@ -187,9 +179,6 @@ void cpu_parse_topology(struct cpuinfo_x86 *c) parse_topology(&tscan, false); - if (!topo_is_converted(c)) - return; - for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++) { if (tscan.dom_shifts[dom] == x86_topo_system.dom_shifts[dom]) continue; @@ -218,9 +207,6 @@ void __init cpu_init_topology(struct cpuinfo_x86 *c) parse_topology(&tscan, true); - if (!topo_is_converted(c)) - return; - /* Copy the shift values and calculate the unit sizes. */ memcpy(x86_topo_system.dom_shifts, tscan.dom_shifts, sizeof(x86_topo_system.dom_shifts)); -- cgit v1.2.3 From fab75e790f00dca592d9a934d9f1237b81093b99 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:21 +0100 Subject: x86/cpu: Remove x86_coreid_bits No more users. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.455839743@linutronix.de --- arch/x86/include/asm/processor.h | 2 -- arch/x86/kernel/cpu/common.c | 1 - 2 files changed, 3 deletions(-) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 4fbeffd678e2..de1648ee2b9e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -122,8 +122,6 @@ struct cpuinfo_x86 { #endif __u8 x86_virt_bits; __u8 x86_phys_bits; - /* CPUID returned core id bits: */ - __u8 x86_coreid_bits; /* Max extended CPUID function supported: */ __u32 extended_cpuid_level; /* Maximum supported CPUID level, -1=no CPUID: */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d72787d1b8cb..33064e661a2d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1753,7 +1753,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; - c->x86_coreid_bits = 0; #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; c->x86_phys_bits = 36; -- cgit v1.2.3 From 52128a7a21f79d5d0be62f10cb0b73d115ab492e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:52 +0100 Subject: x86/cpu/topology: Make the APIC mismatch warnings complete Detect all possible combinations of mismatch right in the CPUID evaluation code. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154638.867699078@linutronix.de --- arch/x86/include/asm/apic.h | 5 ++--- arch/x86/kernel/cpu/common.c | 15 ++------------- arch/x86/kernel/cpu/topology_common.c | 12 ++++++++++++ 3 files changed, 16 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 80d6383b825d..6c5cffceaa14 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -46,16 +46,15 @@ extern void x86_32_probe_apic(void); static inline void x86_32_probe_apic(void) { } #endif -#ifdef CONFIG_X86_LOCAL_APIC +extern u32 cpuid_to_apicid[]; +#ifdef CONFIG_X86_LOCAL_APIC extern int apic_verbosity; extern int local_apic_timer_c2_ok; extern bool apic_is_disabled; extern unsigned int lapic_timer_period; -extern u32 cpuid_to_apicid[]; - extern enum apic_intr_mode_id apic_intr_mode; enum apic_intr_mode_id { APIC_PIC, diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 33064e661a2d..b221e144c9fc 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1716,22 +1716,11 @@ static void generic_identify(struct cpuinfo_x86 *c) #endif } -/* - * Validate that ACPI/mptables have the same information about the - * effective APIC id and update the package map. - */ -static void validate_apic_and_package_id(struct cpuinfo_x86 *c) +static void update_package_map(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP unsigned int cpu = smp_processor_id(); - u32 apicid; - apicid = apic->cpu_present_to_apicid(cpu); - - if (apicid != c->topo.apicid) { - pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x APIC: %x\n", - cpu, apicid, c->topo.initial_apicid); - } BUG_ON(topology_update_package_map(c->topo.pkg_id, cpu)); BUG_ON(topology_update_die_map(c->topo.die_id, cpu)); #else @@ -1923,7 +1912,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_32 enable_sep_cpu(); #endif - validate_apic_and_package_id(c); + update_package_map(c); x86_spec_ctrl_setup_ap(); update_srbds_msr(); if (boot_cpu_has_bug(X86_BUG_GDS)) diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index afea34d59598..3876a3342fe9 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -177,6 +177,18 @@ void cpu_parse_topology(struct cpuinfo_x86 *c) parse_topology(&tscan, false); + if (IS_ENABLED(CONFIG_X86_LOCAL_APIC)) { + if (c->topo.initial_apicid != c->topo.apicid) { + pr_err(FW_BUG "CPU%4u: APIC ID mismatch. CPUID: 0x%04x APIC: 0x%04x\n", + cpu, c->topo.initial_apicid, c->topo.apicid); + } + + if (c->topo.apicid != cpuid_to_apicid[cpu]) { + pr_err(FW_BUG "CPU%4u: APIC ID mismatch. Firmware: 0x%04x APIC: 0x%04x\n", + cpu, cpuid_to_apicid[cpu], c->topo.apicid); + } + } + for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++) { if (tscan.dom_shifts[dom] == x86_topo_system.dom_shifts[dom]) continue; -- cgit v1.2.3 From 090610ba704a66d7a58919be3bad195f24499ecb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:03 +0100 Subject: x86/cpu/topology: Use topology bitmaps for sizing Now that all possible APIC IDs are tracked in the topology bitmaps, its trivial to retrieve the real information from there. This gets rid of the guesstimates for the maximal packages and dies per package as the actual numbers can be determined before a single AP has been brought up. The number of SMT threads can now be determined correctly from the bitmaps in all situations. Up to now a system which has SMT disabled in the BIOS will still claim that it is SMT capable, because the lowest APIC ID bit is reserved for that and CPUID leaf 0xb/0x1f still enumerates the SMT domain accordingly. By calculating the bitmap weights of the SMT and the CORE domain and setting them into relation the SMT disabled in BIOS situation reports correctly that the system is not SMT capable. It also handles the situation correctly when a hybrid systems boot CPU does not have SMT as it takes the SMT capability of the APs fully into account. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.681709880@linutronix.de --- arch/x86/include/asm/smp.h | 3 +-- arch/x86/include/asm/topology.h | 23 ++++++++++++----------- arch/x86/kernel/cpu/common.c | 9 ++++++--- arch/x86/kernel/cpu/debugfs.c | 2 +- arch/x86/kernel/cpu/topology.c | 20 +++++++++++++++++++- arch/x86/kernel/cpu/topology_common.c | 24 ------------------------ arch/x86/kernel/smpboot.c | 16 ---------------- arch/x86/xen/smp.c | 2 -- 8 files changed, 39 insertions(+), 60 deletions(-) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index f1510d6f4e84..5318470cea02 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -8,7 +8,7 @@ #include #include -extern int smp_num_siblings; +extern unsigned int smp_num_siblings; DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); @@ -109,7 +109,6 @@ void cpu_disable_common(void); void native_smp_prepare_boot_cpu(void); void smp_prepare_cpus_common(void); void native_smp_prepare_cpus(unsigned int max_cpus); -void calculate_max_logical_packages(void); void native_smp_cpus_done(unsigned int max_cpus); int common_cpu_up(unsigned int cpunum, struct task_struct *tidle); int native_kick_ap(unsigned int cpu, struct task_struct *tidle); diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 3e11a5a3b830..94ef1a620234 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -143,7 +143,18 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); #define topology_amd_node_id(cpu) (cpu_data(cpu).topo.amd_node_id) -extern unsigned int __max_die_per_package; +extern unsigned int __max_dies_per_package; +extern unsigned int __max_logical_packages; + +static inline unsigned int topology_max_packages(void) +{ + return __max_logical_packages; +} + +static inline unsigned int topology_max_die_per_package(void) +{ + return __max_dies_per_package; +} #ifdef CONFIG_SMP #define topology_cluster_id(cpu) (cpu_data(cpu).topo.l2c_id) @@ -152,14 +163,6 @@ extern unsigned int __max_die_per_package; #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) -extern unsigned int __max_logical_packages; -#define topology_max_packages() (__max_logical_packages) - -static inline int topology_max_die_per_package(void) -{ - return __max_die_per_package; -} - extern int __max_smt_threads; static inline int topology_max_smt_threads(void) @@ -193,13 +196,11 @@ static inline bool topology_is_primary_thread(unsigned int cpu) } #else /* CONFIG_SMP */ -#define topology_max_packages() (1) static inline int topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } static inline int topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; } static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } -static inline int topology_max_die_per_package(void) { return 1; } static inline int topology_max_smt_threads(void) { return 1; } static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } static inline unsigned int topology_amd_nodes_per_pkg(void) { return 0; }; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b221e144c9fc..4babe3ca4f67 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -73,11 +73,14 @@ u32 elf_hwcap2 __read_mostly; /* Number of siblings per CPU package */ -int smp_num_siblings = 1; +unsigned int smp_num_siblings __ro_after_init = 1; EXPORT_SYMBOL(smp_num_siblings); -unsigned int __max_die_per_package __read_mostly = 1; -EXPORT_SYMBOL(__max_die_per_package); +unsigned int __max_dies_per_package __ro_after_init = 1; +EXPORT_SYMBOL(__max_dies_per_package); + +unsigned int __max_logical_packages __ro_after_init = 1; +EXPORT_SYMBOL(__max_logical_packages); static struct ppin_info { int feature; diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c index 86de544cdb14..543efc487206 100644 --- a/arch/x86/kernel/cpu/debugfs.c +++ b/arch/x86/kernel/cpu/debugfs.c @@ -29,7 +29,7 @@ static int cpu_debug_show(struct seq_file *m, void *p) seq_printf(m, "amd_node_id: %u\n", c->topo.amd_node_id); seq_printf(m, "amd_nodes_per_pkg: %u\n", topology_amd_nodes_per_pkg()); seq_printf(m, "max_cores: %u\n", c->x86_max_cores); - seq_printf(m, "max_die_per_pkg: %u\n", __max_die_per_package); + seq_printf(m, "max_dies_per_pkg: %u\n", __max_dies_per_package); seq_printf(m, "smp_num_siblings: %u\n", smp_num_siblings); return 0; } diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 07124da9df71..630ebe503ae3 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -348,8 +348,8 @@ void __init topology_init_possible_cpus(void) { unsigned int assigned = topo_info.nr_assigned_cpus; unsigned int disabled = topo_info.nr_disabled_cpus; + unsigned int cnta, cntb, cpu, allowed = 1; unsigned int total = assigned + disabled; - unsigned int cpu, allowed = 1; u32 apicid; if (!restrict_to_up()) { @@ -372,6 +372,24 @@ void __init topology_init_possible_cpus(void) total_cpus = allowed; set_nr_cpu_ids(allowed); + cnta = domain_weight(TOPO_PKG_DOMAIN); + cntb = domain_weight(TOPO_DIE_DOMAIN); + __max_logical_packages = cnta; + __max_dies_per_package = 1U << (get_count_order(cntb) - get_count_order(cnta)); + + pr_info("Max. logical packages: %3u\n", cnta); + pr_info("Max. logical dies: %3u\n", cntb); + pr_info("Max. dies per package: %3u\n", __max_dies_per_package); + + cnta = domain_weight(TOPO_CORE_DOMAIN); + cntb = domain_weight(TOPO_SMT_DOMAIN); + /* + * Can't use order delta here as order(cnta) can be equal + * order(cntb) even if cnta != cntb. + */ + smp_num_siblings = DIV_ROUND_UP(cntb, cnta); + pr_info("Max. threads per core: %3u\n", smp_num_siblings); + pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled); if (topo_info.nr_rejected_cpus) pr_info("Rejected CPUs %u\n", topo_info.nr_rejected_cpus); diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index b0b68c867aaf..0276978bc272 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -196,16 +196,6 @@ void cpu_parse_topology(struct cpuinfo_x86 *c) tscan.dom_shifts[dom], x86_topo_system.dom_shifts[dom]); } - /* Bug compatible with the existing parsers */ - if (tscan.dom_ncpus[TOPO_SMT_DOMAIN] > smp_num_siblings) { - if (system_state == SYSTEM_BOOTING) { - pr_warn_once("CPU%d: SMT detected and enabled late\n", cpu); - smp_num_siblings = tscan.dom_ncpus[TOPO_SMT_DOMAIN]; - } else { - pr_warn_once("CPU%d: SMT detected after init. Too late!\n", cpu); - } - } - topo_set_ids(&tscan); topo_set_max_cores(&tscan); } @@ -231,20 +221,6 @@ void __init cpu_init_topology(struct cpuinfo_x86 *c) topo_set_ids(&tscan); topo_set_max_cores(&tscan); - /* - * Bug compatible with the existing code. If the boot CPU does not - * have SMT this ends up with one sibling. This needs way deeper - * changes further down the road to get it right during early boot. - */ - smp_num_siblings = tscan.dom_ncpus[TOPO_SMT_DOMAIN]; - - /* - * Neither it's clear whether there are as many dies as the APIC - * space indicating die level is. But assume that the actual number - * of CPUs gives a proper indication for now to stay bug compatible. - */ - __max_die_per_package = tscan.dom_ncpus[TOPO_DIE_DOMAIN] / - tscan.dom_ncpus[TOPO_DIE_DOMAIN - 1]; /* * AMD systems have Nodes per package which cannot be mapped to * APIC ID. diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7f85f174690f..93470ebe2831 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -139,8 +139,6 @@ static DEFINE_PER_CPU_READ_MOSTLY(struct logical_maps, logical_maps) = { .phys_die_id = U32_MAX, }; -unsigned int __max_logical_packages __read_mostly; -EXPORT_SYMBOL(__max_logical_packages); static unsigned int logical_packages __read_mostly; static unsigned int logical_die __read_mostly; @@ -1267,24 +1265,10 @@ void __init native_smp_prepare_boot_cpu(void) native_pv_lock_init(); } -void __init calculate_max_logical_packages(void) -{ - int ncpus; - - /* - * Today neither Intel nor AMD support heterogeneous systems so - * extrapolate the boot cpu's data to all packages. - */ - ncpus = cpu_data(0).booted_cores * topology_max_smt_threads(); - __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus); - pr_info("Max logical packages: %u\n", __max_logical_packages); -} - void __init native_smp_cpus_done(unsigned int max_cpus) { pr_debug("Boot done\n"); - calculate_max_logical_packages(); build_sched_topology(); nmi_selftest(); impress_friends(); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 4b0d6fff88de..114b362cb7af 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -123,8 +123,6 @@ void __init xen_smp_cpus_done(unsigned int max_cpus) { if (xen_hvm_domain()) native_smp_cpus_done(max_cpus); - else - calculate_max_logical_packages(); } void xen_smp_send_reschedule(int cpu) -- cgit v1.2.3 From 380414be78bf8dbe1c3ed98feb75e2579c4a1bae Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:09 +0100 Subject: x86/cpu/topology: Use topology logical mapping mechanism Replace the logical package and die management functionality and retrieve the logical IDs from the topology bitmaps. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.901865302@linutronix.de --- arch/x86/include/asm/topology.h | 15 +++-- arch/x86/kernel/cpu/common.c | 13 ---- arch/x86/kernel/cpu/topology_common.c | 4 ++ arch/x86/kernel/smpboot.c | 111 +--------------------------------- 4 files changed, 12 insertions(+), 131 deletions(-) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index bdd6a984b22c..5ce06f37a3f7 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -172,6 +172,13 @@ static inline int topology_get_logical_id(u32 apicid, enum x86_topology_domains #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) + +static inline int topology_phys_to_logical_pkg(unsigned int pkg) +{ + return topology_get_logical_id(pkg << x86_topo_system.dom_shifts[TOPO_PKG_DOMAIN], + TOPO_PKG_DOMAIN); +} + extern int __max_smt_threads; static inline int topology_max_smt_threads(void) @@ -181,10 +188,6 @@ static inline int topology_max_smt_threads(void) #include -int topology_update_package_map(unsigned int apicid, unsigned int cpu); -int topology_update_die_map(unsigned int dieid, unsigned int cpu); -int topology_phys_to_logical_pkg(unsigned int pkg); - extern unsigned int __amd_nodes_per_pkg; static inline unsigned int topology_amd_nodes_per_pkg(void) @@ -205,10 +208,6 @@ static inline bool topology_is_primary_thread(unsigned int cpu) } #else /* CONFIG_SMP */ -static inline int -topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } -static inline int -topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; } static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } static inline int topology_max_smt_threads(void) { return 1; } static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4babe3ca4f67..58f9cc732d0d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1719,18 +1719,6 @@ static void generic_identify(struct cpuinfo_x86 *c) #endif } -static void update_package_map(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned int cpu = smp_processor_id(); - - BUG_ON(topology_update_package_map(c->topo.pkg_id, cpu)); - BUG_ON(topology_update_die_map(c->topo.die_id, cpu)); -#else - c->topo.logical_pkg_id = 0; -#endif -} - /* * This does the hard work of actually picking apart the CPU stuff... */ @@ -1915,7 +1903,6 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_32 enable_sep_cpu(); #endif - update_package_map(c); x86_spec_ctrl_setup_ap(); update_srbds_msr(); if (boot_cpu_has_bug(X86_BUG_GDS)) diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index 0276978bc272..c21a3871a870 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -10,6 +10,7 @@ #include "cpu.h" struct x86_topology_system x86_topo_system __ro_after_init; +EXPORT_SYMBOL_GPL(x86_topo_system); unsigned int __amd_nodes_per_pkg __ro_after_init; EXPORT_SYMBOL_GPL(__amd_nodes_per_pkg); @@ -147,6 +148,9 @@ static void topo_set_ids(struct topo_scan *tscan) c->topo.pkg_id = topo_shift_apicid(apicid, TOPO_PKG_DOMAIN); c->topo.die_id = topo_shift_apicid(apicid, TOPO_DIE_DOMAIN); + c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN); + c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN); + /* Package relative core ID */ c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >> x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN]; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 93470ebe2831..9ade68572dd8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -125,23 +125,6 @@ struct mwait_cpu_dead { */ static DEFINE_PER_CPU_ALIGNED(struct mwait_cpu_dead, mwait_cpu_dead); -/* Logical package management. */ -struct logical_maps { - u32 phys_pkg_id; - u32 phys_die_id; - u32 logical_pkg_id; - u32 logical_die_id; -}; - -/* Temporary workaround until the full topology mechanics is in place */ -static DEFINE_PER_CPU_READ_MOSTLY(struct logical_maps, logical_maps) = { - .phys_pkg_id = U32_MAX, - .phys_die_id = U32_MAX, -}; - -static unsigned int logical_packages __read_mostly; -static unsigned int logical_die __read_mostly; - /* Maximum number of SMT threads on any online core */ int __read_mostly __max_smt_threads = 1; @@ -334,103 +317,11 @@ static void notrace start_secondary(void *unused) cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } -/** - * topology_phys_to_logical_pkg - Map a physical package id to a logical - * @phys_pkg: The physical package id to map - * - * Returns logical package id or -1 if not found - */ -int topology_phys_to_logical_pkg(unsigned int phys_pkg) -{ - int cpu; - - for_each_possible_cpu(cpu) { - if (per_cpu(logical_maps.phys_pkg_id, cpu) == phys_pkg) - return per_cpu(logical_maps.logical_pkg_id, cpu); - } - return -1; -} -EXPORT_SYMBOL(topology_phys_to_logical_pkg); - -/** - * topology_phys_to_logical_die - Map a physical die id to logical - * @die_id: The physical die id to map - * @cur_cpu: The CPU for which the mapping is done - * - * Returns logical die id or -1 if not found - */ -static int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu) -{ - int cpu, proc_id = cpu_data(cur_cpu).topo.pkg_id; - - for_each_possible_cpu(cpu) { - if (per_cpu(logical_maps.phys_pkg_id, cpu) == proc_id && - per_cpu(logical_maps.phys_die_id, cpu) == die_id) - return per_cpu(logical_maps.logical_die_id, cpu); - } - return -1; -} - -/** - * topology_update_package_map - Update the physical to logical package map - * @pkg: The physical package id as retrieved via CPUID - * @cpu: The cpu for which this is updated - */ -int topology_update_package_map(unsigned int pkg, unsigned int cpu) -{ - int new; - - /* Already available somewhere? */ - new = topology_phys_to_logical_pkg(pkg); - if (new >= 0) - goto found; - - new = logical_packages++; - if (new != pkg) { - pr_info("CPU %u Converting physical %u to logical package %u\n", - cpu, pkg, new); - } -found: - per_cpu(logical_maps.phys_pkg_id, cpu) = pkg; - per_cpu(logical_maps.logical_pkg_id, cpu) = new; - cpu_data(cpu).topo.logical_pkg_id = new; - return 0; -} -/** - * topology_update_die_map - Update the physical to logical die map - * @die: The die id as retrieved via CPUID - * @cpu: The cpu for which this is updated - */ -int topology_update_die_map(unsigned int die, unsigned int cpu) -{ - int new; - - /* Already available somewhere? */ - new = topology_phys_to_logical_die(die, cpu); - if (new >= 0) - goto found; - - new = logical_die++; - if (new != die) { - pr_info("CPU %u Converting physical %u to logical die %u\n", - cpu, die, new); - } -found: - per_cpu(logical_maps.phys_die_id, cpu) = die; - per_cpu(logical_maps.logical_die_id, cpu) = new; - cpu_data(cpu).topo.logical_die_id = new; - return 0; -} - static void __init smp_store_boot_cpu_info(void) { - int id = 0; /* CPU 0 */ - struct cpuinfo_x86 *c = &cpu_data(id); + struct cpuinfo_x86 *c = &cpu_data(0); *c = boot_cpu_data; - c->cpu_index = id; - topology_update_package_map(c->topo.pkg_id, id); - topology_update_die_map(c->topo.die_id, id); c->initialized = true; } -- cgit v1.2.3 From 8078f4d6102f9370b3b7436d25717735d21f5c09 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:12 +0100 Subject: x86/cpu/topology: Rename smp_num_siblings It's really a non-intuitive name. Rename it to __max_threads_per_core which is obvious. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210253.011307973@linutronix.de --- arch/x86/include/asm/perf_event_p4.h | 4 ++-- arch/x86/include/asm/smp.h | 2 -- arch/x86/include/asm/topology.h | 1 + arch/x86/kernel/cpu/common.c | 6 +++--- arch/x86/kernel/cpu/debugfs.c | 2 +- arch/x86/kernel/cpu/mce/inject.c | 2 +- arch/x86/kernel/cpu/topology.c | 6 +++--- arch/x86/kernel/process.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- 9 files changed, 13 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 94de1a05aeba..d65e338b6a5f 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -181,7 +181,7 @@ static inline u64 p4_clear_ht_bit(u64 config) static inline int p4_ht_active(void) { #ifdef CONFIG_SMP - return smp_num_siblings > 1; + return __max_threads_per_core > 1; #endif return 0; } @@ -189,7 +189,7 @@ static inline int p4_ht_active(void) static inline int p4_ht_thread(int cpu) { #ifdef CONFIG_SMP - if (smp_num_siblings == 2) + if (__max_threads_per_core == 2) return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map)); #endif return 0; diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 5318470cea02..54d6d71e0eca 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -8,8 +8,6 @@ #include #include -extern unsigned int smp_num_siblings; - DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 5ce06f37a3f7..f9eb7a7831f0 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -145,6 +145,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); extern unsigned int __max_dies_per_package; extern unsigned int __max_logical_packages; +extern unsigned int __max_threads_per_core; static inline unsigned int topology_max_packages(void) { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 58f9cc732d0d..cb22cb8d983b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -73,8 +73,8 @@ u32 elf_hwcap2 __read_mostly; /* Number of siblings per CPU package */ -unsigned int smp_num_siblings __ro_after_init = 1; -EXPORT_SYMBOL(smp_num_siblings); +unsigned int __max_threads_per_core __ro_after_init = 1; +EXPORT_SYMBOL(__max_threads_per_core); unsigned int __max_dies_per_package __ro_after_init = 1; EXPORT_SYMBOL(__max_dies_per_package); @@ -2251,7 +2251,7 @@ void __init arch_cpu_finalize_init(void) * identify_boot_cpu() initialized SMT support information, let the * core code know. */ - cpu_smt_set_num_threads(smp_num_siblings, smp_num_siblings); + cpu_smt_set_num_threads(__max_threads_per_core, __max_threads_per_core); if (!IS_ENABLED(CONFIG_SMP)) { pr_info("CPU: "); diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c index 543efc487206..f40f3eecebc6 100644 --- a/arch/x86/kernel/cpu/debugfs.c +++ b/arch/x86/kernel/cpu/debugfs.c @@ -30,7 +30,7 @@ static int cpu_debug_show(struct seq_file *m, void *p) seq_printf(m, "amd_nodes_per_pkg: %u\n", topology_amd_nodes_per_pkg()); seq_printf(m, "max_cores: %u\n", c->x86_max_cores); seq_printf(m, "max_dies_per_pkg: %u\n", __max_dies_per_package); - seq_printf(m, "smp_num_siblings: %u\n", smp_num_siblings); + seq_printf(m, "max_threads_per_core:%u\n", __max_threads_per_core); return 0; } diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 2b290452e194..1e327881073f 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -433,7 +433,7 @@ static u32 get_nbc_for_node(int node_id) struct cpuinfo_x86 *c = &boot_cpu_data; u32 cores_per_node; - cores_per_node = (c->x86_max_cores * smp_num_siblings) / topology_amd_nodes_per_pkg(); + cores_per_node = (c->x86_max_cores * __max_threads_per_core) / topology_amd_nodes_per_pkg(); return cores_per_node * node_id; } diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 7db9df50ada8..b078facce865 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -76,7 +76,7 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id) #ifdef CONFIG_SMP static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { - if (!(apicid & (smp_num_siblings - 1))) + if (!(apicid & (__max_threads_per_core - 1))) cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); } #else @@ -429,8 +429,8 @@ void __init topology_init_possible_cpus(void) * Can't use order delta here as order(cnta) can be equal * order(cntb) even if cnta != cntb. */ - smp_num_siblings = DIV_ROUND_UP(cntb, cnta); - pr_info("Max. threads per core: %3u\n", smp_num_siblings); + __max_threads_per_core = DIV_ROUND_UP(cntb, cnta); + pr_info("Max. threads per core: %3u\n", __max_threads_per_core); pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled); if (topo_info.nr_rejected_cpus) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ab49ade31b0d..6121c2b42ecf 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -936,7 +936,7 @@ static __cpuidle void mwait_idle(void) void select_idle_routine(const struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP - if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1) + if (boot_option_idle_override == IDLE_POLL && __max_threads_per_core > 1) pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); #endif if (x86_idle_set() || boot_option_idle_override == IDLE_POLL) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9ade68572dd8..35c272cb45fb 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -563,7 +563,7 @@ static void __init build_sched_topology(void) void set_cpu_sibling_map(int cpu) { - bool has_smt = smp_num_siblings > 1; + bool has_smt = __max_threads_per_core > 1; bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1; struct cpuinfo_x86 *c = &cpu_data(cpu); struct cpuinfo_x86 *o; -- cgit v1.2.3 From fd43b8ae76e903c76f14d06eb939449bcc3f614f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:14 +0100 Subject: x86/cpu/topology: Provide __num_[cores|threads]_per_package Expose properly accounted information and accessors so the fiddling with other topology variables can be replaced. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210253.120958987@linutronix.de --- arch/x86/include/asm/topology.h | 12 ++++++++++++ arch/x86/kernel/cpu/common.c | 6 ++++++ arch/x86/kernel/cpu/topology.c | 8 +++++++- 3 files changed, 25 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 6a71794bd4e2..76b1d87f1531 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -146,6 +146,8 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); extern unsigned int __max_dies_per_package; extern unsigned int __max_logical_packages; extern unsigned int __max_threads_per_core; +extern unsigned int __num_threads_per_package; +extern unsigned int __num_cores_per_package; static inline unsigned int topology_max_packages(void) { @@ -157,6 +159,16 @@ static inline unsigned int topology_max_dies_per_package(void) return __max_dies_per_package; } +static inline unsigned int topology_num_cores_per_package(void) +{ + return __num_cores_per_package; +} + +static inline unsigned int topology_num_threads_per_package(void) +{ + return __num_threads_per_package; +} + #ifdef CONFIG_X86_LOCAL_APIC int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level); #else diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cb22cb8d983b..c9a1014386a9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -82,6 +82,12 @@ EXPORT_SYMBOL(__max_dies_per_package); unsigned int __max_logical_packages __ro_after_init = 1; EXPORT_SYMBOL(__max_logical_packages); +unsigned int __num_cores_per_package __ro_after_init = 1; +EXPORT_SYMBOL(__num_cores_per_package); + +unsigned int __num_threads_per_package __ro_after_init = 1; +EXPORT_SYMBOL(__num_threads_per_package); + static struct ppin_info { int feature; int msr_ppin_ctl; diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index b078facce865..41dd8e0eb511 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -392,7 +392,7 @@ void __init topology_init_possible_cpus(void) unsigned int disabled = topo_info.nr_disabled_cpus; unsigned int cnta, cntb, cpu, allowed = 1; unsigned int total = assigned + disabled; - u32 apicid; + u32 apicid, firstid; if (!restrict_to_up()) { if (WARN_ON_ONCE(assigned > nr_cpu_ids)) { @@ -432,6 +432,12 @@ void __init topology_init_possible_cpus(void) __max_threads_per_core = DIV_ROUND_UP(cntb, cnta); pr_info("Max. threads per core: %3u\n", __max_threads_per_core); + firstid = find_first_bit(apic_maps[TOPO_SMT_DOMAIN].map, MAX_LOCAL_APIC); + __num_cores_per_package = topology_unit_count(firstid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN); + pr_info("Num. cores per package: %3u\n", __num_cores_per_package); + __num_threads_per_package = topology_unit_count(firstid, TOPO_SMT_DOMAIN, TOPO_PKG_DOMAIN); + pr_info("Num. threads per package: %3u\n", __num_threads_per_package); + pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled); if (topo_info.nr_rejected_cpus) pr_info("Rejected CPUs %u\n", topo_info.nr_rejected_cpus); -- cgit v1.2.3 From 89b0f15f408f7c4ee98c1ec4c3224852fcbc3274 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:16 +0100 Subject: x86/cpu/topology: Get rid of cpuinfo::x86_max_cores Now that __num_cores_per_package and __num_threads_per_package are available, cpuinfo::x86_max_cores and the related math all over the place can be replaced with the ready to consume data. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210253.176147806@linutronix.de --- Documentation/arch/x86/topology.rst | 24 +++++++++--------------- arch/x86/events/intel/uncore_nhmex.c | 4 ++-- arch/x86/events/intel/uncore_snb.c | 8 ++++---- arch/x86/events/intel/uncore_snbep.c | 16 ++++++++-------- arch/x86/include/asm/processor.h | 2 -- arch/x86/kernel/cpu/cacheinfo.c | 2 +- arch/x86/kernel/cpu/common.c | 1 - arch/x86/kernel/cpu/debugfs.c | 3 ++- arch/x86/kernel/cpu/mce/inject.c | 3 +-- arch/x86/kernel/cpu/microcode/intel.c | 2 +- arch/x86/kernel/cpu/topology_common.c | 3 --- arch/x86/kernel/smpboot.c | 2 +- drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 2 +- drivers/hwmon/fam15h_power.c | 2 +- 14 files changed, 31 insertions(+), 43 deletions(-) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/Documentation/arch/x86/topology.rst b/Documentation/arch/x86/topology.rst index 08ebf9edbfc1..7352ab89a55a 100644 --- a/Documentation/arch/x86/topology.rst +++ b/Documentation/arch/x86/topology.rst @@ -47,17 +47,21 @@ AMD nomenclature for package is 'Node'. Package-related topology information in the kernel: - - cpuinfo_x86.x86_max_cores: + - topology_num_threads_per_package() - The number of cores in a package. This information is retrieved via CPUID. + The number of threads in a package. - - cpuinfo_x86.x86_max_dies: + - topology_num_cores_per_package() - The number of dies in a package. This information is retrieved via CPUID. + The number of cores in a package. + + - topology_max_dies_per_package() + + The maximum number of dies in a package. - cpuinfo_x86.topo.die_id: - The physical ID of the die. This information is retrieved via CPUID. + The physical ID of the die. - cpuinfo_x86.topo.pkg_id: @@ -96,16 +100,6 @@ are SMT- or CMT-type threads. AMDs nomenclature for a CMT core is "Compute Unit". The kernel always uses "core". -Core-related topology information in the kernel: - - - smp_num_siblings: - - The number of threads in a core. The number of threads in a package can be - calculated by:: - - threads_per_package = cpuinfo_x86.x86_max_cores * smp_num_siblings - - Threads ======= A thread is a single scheduling unit. It's the equivalent to a logical Linux diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c index 56eea2c66cfb..92da8aaa5966 100644 --- a/arch/x86/events/intel/uncore_nhmex.c +++ b/arch/x86/events/intel/uncore_nhmex.c @@ -1221,8 +1221,8 @@ void nhmex_uncore_cpu_init(void) uncore_nhmex = true; else nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events; - if (nhmex_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - nhmex_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (nhmex_uncore_cbox.num_boxes > topology_num_cores_per_package()) + nhmex_uncore_cbox.num_boxes = topology_num_cores_per_package(); uncore_msr_uncores = nhmex_msr_uncores; } /* end of Nehalem-EX uncore support */ diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 7fd4334e12a1..9462fd9f3b7a 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -364,8 +364,8 @@ static struct intel_uncore_type *snb_msr_uncores[] = { void snb_uncore_cpu_init(void) { uncore_msr_uncores = snb_msr_uncores; - if (snb_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (snb_uncore_cbox.num_boxes > topology_num_cores_per_package()) + snb_uncore_cbox.num_boxes = topology_num_cores_per_package(); } static void skl_uncore_msr_init_box(struct intel_uncore_box *box) @@ -428,8 +428,8 @@ static struct intel_uncore_type *skl_msr_uncores[] = { void skl_uncore_cpu_init(void) { uncore_msr_uncores = skl_msr_uncores; - if (skl_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - skl_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (skl_uncore_cbox.num_boxes > topology_num_cores_per_package()) + skl_uncore_cbox.num_boxes = topology_num_cores_per_package(); snb_uncore_arb.ops = &skl_uncore_msr_ops; } diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 3f6bd3e4a763..2eaf0f339849 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1172,8 +1172,8 @@ static struct intel_uncore_type *snbep_msr_uncores[] = { void snbep_uncore_cpu_init(void) { - if (snbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - snbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (snbep_uncore_cbox.num_boxes > topology_num_cores_per_package()) + snbep_uncore_cbox.num_boxes = topology_num_cores_per_package(); uncore_msr_uncores = snbep_msr_uncores; } @@ -1845,8 +1845,8 @@ static struct intel_uncore_type *ivbep_msr_uncores[] = { void ivbep_uncore_cpu_init(void) { - if (ivbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - ivbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (ivbep_uncore_cbox.num_boxes > topology_num_cores_per_package()) + ivbep_uncore_cbox.num_boxes = topology_num_cores_per_package(); uncore_msr_uncores = ivbep_msr_uncores; } @@ -2917,8 +2917,8 @@ static bool hswep_has_limit_sbox(unsigned int device) void hswep_uncore_cpu_init(void) { - if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (hswep_uncore_cbox.num_boxes > topology_num_cores_per_package()) + hswep_uncore_cbox.num_boxes = topology_num_cores_per_package(); /* Detect 6-8 core systems with only two SBOXes */ if (hswep_has_limit_sbox(HSWEP_PCU_DID)) @@ -3280,8 +3280,8 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = { void bdx_uncore_cpu_init(void) { - if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (bdx_uncore_cbox.num_boxes > topology_num_cores_per_package()) + bdx_uncore_cbox.num_boxes = topology_num_cores_per_package(); uncore_msr_uncores = bdx_msr_uncores; /* Detect systems with no SBOXes */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index de1648ee2b9e..326581df4846 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -149,8 +149,6 @@ struct cpuinfo_x86 { unsigned long loops_per_jiffy; /* protected processor identification number */ u64 ppin; - /* cpuid returned max cores value: */ - u16 x86_max_cores; u16 x86_clflush_size; /* number of cores as seen by the OS: */ u16 booted_cores; diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index e1d118e6926e..f2241e7e96fd 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -301,7 +301,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, eax->split.type = types[leaf]; eax->split.level = levels[leaf]; eax->split.num_threads_sharing = 0; - eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1; + eax->split.num_cores_on_die = topology_num_cores_per_package(); if (assoc == 0xffff) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c9a1014386a9..05e0b31f75e9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1738,7 +1738,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86_model = c->x86_stepping = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_max_cores = 1; #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; c->x86_phys_bits = 36; diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c index f40f3eecebc6..3baf3e435834 100644 --- a/arch/x86/kernel/cpu/debugfs.c +++ b/arch/x86/kernel/cpu/debugfs.c @@ -28,7 +28,8 @@ static int cpu_debug_show(struct seq_file *m, void *p) seq_printf(m, "l2c_id: %u\n", c->topo.l2c_id); seq_printf(m, "amd_node_id: %u\n", c->topo.amd_node_id); seq_printf(m, "amd_nodes_per_pkg: %u\n", topology_amd_nodes_per_pkg()); - seq_printf(m, "max_cores: %u\n", c->x86_max_cores); + seq_printf(m, "num_threads: %u\n", __num_threads_per_package); + seq_printf(m, "num_cores: %u\n", __num_cores_per_package); seq_printf(m, "max_dies_per_pkg: %u\n", __max_dies_per_package); seq_printf(m, "max_threads_per_core:%u\n", __max_threads_per_core); return 0; diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 1e327881073f..94953d749475 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -430,10 +430,9 @@ static void trigger_thr_int(void *info) static u32 get_nbc_for_node(int node_id) { - struct cpuinfo_x86 *c = &boot_cpu_data; u32 cores_per_node; - cores_per_node = (c->x86_max_cores * __max_threads_per_core) / topology_amd_nodes_per_pkg(); + cores_per_node = topology_num_threads_per_package() / topology_amd_nodes_per_pkg(); return cores_per_node * node_id; } diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 857e608af641..5f0414452b67 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -641,7 +641,7 @@ static __init void calc_llc_size_per_core(struct cpuinfo_x86 *c) { u64 llc_size = c->x86_cache_size * 1024ULL; - do_div(llc_size, c->x86_max_cores); + do_div(llc_size, topology_num_cores_per_package()); llc_size_per_core = (unsigned int)llc_size; } diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index a2c3f8f5886d..a50ae8d63d1c 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -155,9 +155,6 @@ static void topo_set_ids(struct topo_scan *tscan) c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >> x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN]; - /* Maximum number of cores on this package */ - c->x86_max_cores = topology_unit_count(apicid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN); - c->topo.amd_node_id = tscan->amd_node_id; if (c->x86_vendor == X86_VENDOR_AMD) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 35c272cb45fb..9c1e1219c28f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -564,7 +564,7 @@ static void __init build_sched_topology(void) void set_cpu_sibling_map(int cpu) { bool has_smt = __max_threads_per_core > 1; - bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1; + bool has_mp = has_smt || topology_num_cores_per_package() > 1; struct cpuinfo_x86 *c = &cpu_data(cpu); struct cpuinfo_x86 *o; int i, threads; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 2ff6deedef95..da1f43999d09 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -451,7 +451,7 @@ static int vangogh_init_smc_tables(struct smu_context *smu) #ifdef CONFIG_X86 /* AMD x86 APU only */ - smu->cpu_core_num = boot_cpu_data.x86_max_cores; + smu->cpu_core_num = topology_num_cores_per_package(); #else smu->cpu_core_num = 4; #endif diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c index 6307112c2c0c..9ed2c4b6734e 100644 --- a/drivers/hwmon/fam15h_power.c +++ b/drivers/hwmon/fam15h_power.c @@ -209,7 +209,7 @@ static ssize_t power1_average_show(struct device *dev, * With the new x86 topology modelling, x86_max_cores is the * compute unit number. */ - cu_num = boot_cpu_data.x86_max_cores; + cu_num = topology_num_cores_per_package(); ret = read_registers(data); if (ret) -- cgit v1.2.3 From 9a458198eba98b7207669a166e64d04b04cb651b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 1 Feb 2024 00:09:01 +0100 Subject: x86/cpu: Allow reducing x86_phys_bits during early_identify_cpu() In commit fbf6449f84bf ("x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach"), the initialization of c->x86_phys_bits was moved after this_cpu->c_early_init(c). This is incorrect because early_init_amd() expected to be able to reduce the value according to the contents of CPUID leaf 0x8000001f. Fortunately, the bug was negated by init_amd()'s call to early_init_amd(), which does reduce x86_phys_bits in the end. However, this is very late in the boot process and, most notably, the wrong value is used for x86_phys_bits when setting up MTRRs. To fix this, call get_cpu_address_sizes() as soon as X86_FEATURE_CPUID is set/cleared, and c->extended_cpuid_level is retrieved. Fixes: fbf6449f84bf ("x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach") Signed-off-by: Paolo Bonzini Signed-off-by: Dave Hansen Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20240131230902.1867092-2-pbonzini%40redhat.com --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0b97bcde70c6..fbc4e60d027c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1589,6 +1589,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) get_cpu_vendor(c); get_cpu_cap(c); setup_force_cpu_cap(X86_FEATURE_CPUID); + get_cpu_address_sizes(c); cpu_parse_early_param(); if (this_cpu->c_early_init) @@ -1601,10 +1602,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_bsp_init(c); } else { setup_clear_cpu_cap(X86_FEATURE_CPUID); + get_cpu_address_sizes(c); } - get_cpu_address_sizes(c); - setup_force_cpu_cap(X86_FEATURE_ALWAYS); cpu_set_bug_bits(c); -- cgit v1.2.3 From 71eb4893cfaf37f8884515c8f71717044b97bf44 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Mar 2024 11:12:23 +0100 Subject: x86/percpu: Cure per CPU madness on UP On UP builds Sparse complains rightfully about accesses to cpu_info with per CPU accessors: cacheinfo.c:282:30: sparse: warning: incorrect type in initializer (different address spaces) cacheinfo.c:282:30: sparse: expected void const [noderef] __percpu *__vpp_verify cacheinfo.c:282:30: sparse: got unsigned int * The reason is that on UP builds cpu_info which is a per CPU variable on SMP is mapped to boot_cpu_info which is a regular variable. There is a hideous accessor cpu_data() which tries to hide this, but it's not sufficient as some places require raw accessors and generates worse code than the regular per CPU accessors. Waste sizeof(struct x86_cpuinfo) memory on UP and provide the per CPU cpu_info unconditionally. This requires to update the CPU info on the boot CPU as SMP does. (Ab)use the weakly defined smp_prepare_boot_cpu() function and implement exactly that. This allows to use regular per CPU accessors uncoditionally and paves the way to remove the cpu_data() hackery. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20240304005104.622511517@linutronix.de --- arch/x86/include/asm/processor.h | 5 ----- arch/x86/kernel/cpu/common.c | 3 +++ arch/x86/kernel/setup.c | 10 ++++++++++ arch/x86/kernel/smpboot.c | 4 ---- 4 files changed, 13 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a61f76990b6a..e2262aca6561 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -185,13 +185,8 @@ extern struct cpuinfo_x86 new_cpu_data; extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; -#ifdef CONFIG_SMP DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); #define cpu_data(cpu) per_cpu(cpu_info, cpu) -#else -#define cpu_info boot_cpu_data -#define cpu_data(cpu) boot_cpu_data -#endif extern const struct seq_operations cpuinfo_op; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fbc4e60d027c..6057a9ecac93 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -70,6 +70,9 @@ #include "cpu.h" +DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); +EXPORT_PER_CPU_SYMBOL(cpu_info); + u32 elf_hwcap2 __read_mostly; /* Number of siblings per CPU package */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 84201071dfac..8f669d3be445 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1211,6 +1211,16 @@ void __init i386_reserve_resources(void) #endif /* CONFIG_X86_32 */ +#ifndef CONFIG_SMP +void __init smp_prepare_boot_cpu(void) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + + *c = boot_cpu_data; + c->initialized = true; +} +#endif + static struct notifier_block kernel_offset_notifier = { .notifier_call = dump_kernel_offset }; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 980782bd2c97..37ea8c872e4a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -101,10 +101,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map); DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); EXPORT_PER_CPU_SYMBOL(cpu_die_map); -/* Per CPU bogomips and other parameters */ -DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); -EXPORT_PER_CPU_SYMBOL(cpu_info); - /* CPUs which are the primary SMT threads */ struct cpumask __cpu_primary_thread_mask __read_mostly; -- cgit v1.2.3 From 35ce64922c8263448e58a2b9e8d15a64e11e9b2d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 28 Feb 2024 23:20:32 +0100 Subject: x86/idle: Select idle routine only once The idle routine selection is done on every CPU bringup operation and has a guard in place which is effective after the first invocation, which is a pointless exercise. Invoke it once on the boot CPU and mark the related functions __init. The guard check has to stay as xen_set_default_idle() runs early. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/87edcu6vaq.ffs@tglx --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/process.c | 8 +++++--- 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1188e8bf76a2..523c466c2fc9 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -558,7 +558,7 @@ static inline void load_sp0(unsigned long sp0) unsigned long __get_wchan(struct task_struct *p); -extern void select_idle_routine(const struct cpuinfo_x86 *c); +extern void select_idle_routine(void); extern void amd_e400_c1e_apic_setup(void); extern unsigned long boot_option_idle_override; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8f367d376520..5c72af16dd06 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1938,8 +1938,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) /* Init Machine Check Exception if available. */ mcheck_cpu_init(c); - select_idle_routine(c); - #ifdef CONFIG_NUMA numa_add_cpu(smp_processor_id()); #endif @@ -2344,6 +2342,8 @@ void __init arch_cpu_finalize_init(void) { identify_boot_cpu(); + select_idle_routine(); + /* * identify_boot_cpu() initialized SMT support information, let the * core code know. diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ccaacc7f9681..f0166b31a803 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -853,8 +853,9 @@ void __noreturn stop_this_cpu(void *dummy) * Do not prefer MWAIT if MONITOR instruction has a bug or idle=nomwait * is passed to kernel commandline parameter. */ -static bool prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) +static __init bool prefer_mwait_c1_over_halt(void) { + const struct cpuinfo_x86 *c = &boot_cpu_data; u32 eax, ebx, ecx, edx; /* If override is enforced on the command line, fall back to HALT. */ @@ -908,7 +909,7 @@ static __cpuidle void mwait_idle(void) __current_clr_polling(); } -void select_idle_routine(const struct cpuinfo_x86 *c) +void __init select_idle_routine(void) { if (boot_option_idle_override == IDLE_POLL) { if (IS_ENABLED(CONFIG_SMP) && smp_num_siblings > 1) @@ -916,10 +917,11 @@ void select_idle_routine(const struct cpuinfo_x86 *c) return; } + /* Required to guard against xen_set_default_idle() */ if (x86_idle_set()) return; - if (prefer_mwait_c1_over_halt(c)) { + if (prefer_mwait_c1_over_halt()) { pr_info("using mwait in idle threads\n"); static_call_update(x86_idle, mwait_idle); } else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) { -- cgit v1.2.3 From 8076fcde016c9c0e0660543e67bff86cb48a7c9c Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 11 Mar 2024 12:29:43 -0700 Subject: x86/rfds: Mitigate Register File Data Sampling (RFDS) RFDS is a CPU vulnerability that may allow userspace to infer kernel stale data previously used in floating point registers, vector registers and integer registers. RFDS only affects certain Intel Atom processors. Intel released a microcode update that uses VERW instruction to clear the affected CPU buffers. Unlike MDS, none of the affected cores support SMT. Add RFDS bug infrastructure and enable the VERW based mitigation by default, that clears the affected buffers just before exiting to userspace. Also add sysfs reporting and cmdline parameter "reg_file_data_sampling" to control the mitigation. For details see: Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Acked-by: Josh Poimboeuf --- Documentation/ABI/testing/sysfs-devices-system-cpu | 1 + Documentation/admin-guide/kernel-parameters.txt | 21 ++++++ arch/x86/Kconfig | 11 +++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 8 +++ arch/x86/kernel/cpu/bugs.c | 78 +++++++++++++++++++++- arch/x86/kernel/cpu/common.c | 38 ++++++++++- drivers/base/cpu.c | 3 + include/linux/cpu.h | 2 + 9 files changed, 157 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index a1db6db47505..710d47be11e0 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -516,6 +516,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/mds /sys/devices/system/cpu/vulnerabilities/meltdown /sys/devices/system/cpu/vulnerabilities/mmio_stale_data + /sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling /sys/devices/system/cpu/vulnerabilities/retbleed /sys/devices/system/cpu/vulnerabilities/spec_store_bypass /sys/devices/system/cpu/vulnerabilities/spectre_v1 diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 31b3a25680d0..73062d47a462 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1150,6 +1150,26 @@ The filter can be disabled or changed to another driver later using sysfs. + reg_file_data_sampling= + [X86] Controls mitigation for Register File Data + Sampling (RFDS) vulnerability. RFDS is a CPU + vulnerability which may allow userspace to infer + kernel data values previously stored in floating point + registers, vector registers, or integer registers. + RFDS only affects Intel Atom processors. + + on: Turns ON the mitigation. + off: Turns OFF the mitigation. + + This parameter overrides the compile time default set + by CONFIG_MITIGATION_RFDS. Mitigation cannot be + disabled when other VERW based mitigations (like MDS) + are enabled. In order to disable RFDS mitigation all + VERW based mitigations need to be disabled. + + For details see: + Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst + driver_async_probe= [KNL] List of driver names to be probed asynchronously. * matches with all driver names. If * is specified, the @@ -3398,6 +3418,7 @@ nospectre_bhb [ARM64] nospectre_v1 [X86,PPC] nospectre_v2 [X86,PPC,S390,ARM64] + reg_file_data_sampling=off [X86] retbleed=off [X86] spec_store_bypass_disable=off [X86,PPC] spectre_v2_user=off [X86] diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5edec175b9bf..637e337c332e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2614,6 +2614,17 @@ config GDS_FORCE_MITIGATION If in doubt, say N. +config MITIGATION_RFDS + bool "RFDS Mitigation" + depends on CPU_SUP_INTEL + default y + help + Enable mitigation for Register File Data Sampling (RFDS) by default. + RFDS is a hardware vulnerability which affects Intel Atom CPUs. It + allows unprivileged speculative access to stale data previously + stored in floating point, vector and integer registers. + See also + endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 2b62cdd8dd12..8511aad59581 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -503,4 +503,5 @@ /* BUG word 2 */ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ +#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index f1bd7b91b3c6..d1b5edaf6c34 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -165,6 +165,14 @@ * CPU is not vulnerable to Gather * Data Sampling (GDS). */ +#define ARCH_CAP_RFDS_NO BIT(27) /* + * Not susceptible to Register + * File Data Sampling. + */ +#define ARCH_CAP_RFDS_CLEAR BIT(28) /* + * VERW clears CPU Register + * File. + */ #define ARCH_CAP_XAPIC_DISABLE BIT(21) /* * IA32_XAPIC_DISABLE_STATUS MSR diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index cd6ac89c1a0d..01ac18f56147 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -480,6 +480,57 @@ static int __init mmio_stale_data_parse_cmdline(char *str) } early_param("mmio_stale_data", mmio_stale_data_parse_cmdline); +#undef pr_fmt +#define pr_fmt(fmt) "Register File Data Sampling: " fmt + +enum rfds_mitigations { + RFDS_MITIGATION_OFF, + RFDS_MITIGATION_VERW, + RFDS_MITIGATION_UCODE_NEEDED, +}; + +/* Default mitigation for Register File Data Sampling */ +static enum rfds_mitigations rfds_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_RFDS) ? RFDS_MITIGATION_VERW : RFDS_MITIGATION_OFF; + +static const char * const rfds_strings[] = { + [RFDS_MITIGATION_OFF] = "Vulnerable", + [RFDS_MITIGATION_VERW] = "Mitigation: Clear Register File", + [RFDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", +}; + +static void __init rfds_select_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_RFDS) || cpu_mitigations_off()) { + rfds_mitigation = RFDS_MITIGATION_OFF; + return; + } + if (rfds_mitigation == RFDS_MITIGATION_OFF) + return; + + if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR) + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + else + rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED; +} + +static __init int rfds_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!boot_cpu_has_bug(X86_BUG_RFDS)) + return 0; + + if (!strcmp(str, "off")) + rfds_mitigation = RFDS_MITIGATION_OFF; + else if (!strcmp(str, "on")) + rfds_mitigation = RFDS_MITIGATION_VERW; + + return 0; +} +early_param("reg_file_data_sampling", rfds_parse_cmdline); + #undef pr_fmt #define pr_fmt(fmt) "" fmt @@ -513,6 +564,11 @@ static void __init md_clear_update_mitigation(void) mmio_mitigation = MMIO_MITIGATION_VERW; mmio_select_mitigation(); } + if (rfds_mitigation == RFDS_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_RFDS)) { + rfds_mitigation = RFDS_MITIGATION_VERW; + rfds_select_mitigation(); + } out: if (boot_cpu_has_bug(X86_BUG_MDS)) pr_info("MDS: %s\n", mds_strings[mds_mitigation]); @@ -522,6 +578,8 @@ out: pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) pr_info("MMIO Stale Data: Unknown: No mitigations\n"); + if (boot_cpu_has_bug(X86_BUG_RFDS)) + pr_info("Register File Data Sampling: %s\n", rfds_strings[rfds_mitigation]); } static void __init md_clear_select_mitigation(void) @@ -529,11 +587,12 @@ static void __init md_clear_select_mitigation(void) mds_select_mitigation(); taa_select_mitigation(); mmio_select_mitigation(); + rfds_select_mitigation(); /* - * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update - * and print their mitigation after MDS, TAA and MMIO Stale Data - * mitigation selection is done. + * As these mitigations are inter-related and rely on VERW instruction + * to clear the microarchitural buffers, update and print their status + * after mitigation selection is done for each of these vulnerabilities. */ md_clear_update_mitigation(); } @@ -2622,6 +2681,11 @@ static ssize_t mmio_stale_data_show_state(char *buf) sched_smt_active() ? "vulnerable" : "disabled"); } +static ssize_t rfds_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", rfds_strings[rfds_mitigation]); +} + static char *stibp_state(void) { if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && @@ -2781,6 +2845,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_GDS: return gds_show_state(buf); + case X86_BUG_RFDS: + return rfds_show_state(buf); + default: break; } @@ -2855,4 +2922,9 @@ ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *bu { return cpu_show_common(dev, attr, buf, X86_BUG_GDS); } + +ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_RFDS); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fbc4e60d027c..40d8c110bb32 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1267,6 +1267,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define SRSO BIT(5) /* CPU is affected by GDS */ #define GDS BIT(6) +/* CPU is affected by Register File Data Sampling */ +#define RFDS BIT(7) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), @@ -1294,9 +1296,18 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS), VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ALDERLAKE, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ALDERLAKE_L, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(RAPTORLAKE, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(RAPTORLAKE_P, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(RAPTORLAKE_S, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_GRACEMONT, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO | RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_D, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_PLUS, X86_STEPPING_ANY, RFDS), VULNBL_AMD(0x15, RETBLEED), VULNBL_AMD(0x16, RETBLEED), @@ -1330,6 +1341,24 @@ static bool arch_cap_mmio_immune(u64 ia32_cap) ia32_cap & ARCH_CAP_SBDR_SSDP_NO); } +static bool __init vulnerable_to_rfds(u64 ia32_cap) +{ + /* The "immunity" bit trumps everything else: */ + if (ia32_cap & ARCH_CAP_RFDS_NO) + return false; + + /* + * VMMs set ARCH_CAP_RFDS_CLEAR for processors not in the blacklist to + * indicate that mitigation is needed because guest is running on a + * vulnerable hardware or may migrate to such hardware: + */ + if (ia32_cap & ARCH_CAP_RFDS_CLEAR) + return true; + + /* Only consult the blacklist when there is no enumeration: */ + return cpu_matches(cpu_vuln_blacklist, RFDS); +} + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = x86_read_arch_cap_msr(); @@ -1441,6 +1470,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) boot_cpu_has(X86_FEATURE_AVX)) setup_force_cpu_bug(X86_BUG_GDS); + if (vulnerable_to_rfds(ia32_cap)) + setup_force_cpu_bug(X86_BUG_RFDS); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 47de0f140ba6..0b33e81f9c9b 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -588,6 +588,7 @@ CPU_SHOW_VULN_FALLBACK(mmio_stale_data); CPU_SHOW_VULN_FALLBACK(retbleed); CPU_SHOW_VULN_FALLBACK(spec_rstack_overflow); CPU_SHOW_VULN_FALLBACK(gds); +CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling); static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); @@ -602,6 +603,7 @@ static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL); static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); +static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -617,6 +619,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_retbleed.attr, &dev_attr_spec_rstack_overflow.attr, &dev_attr_gather_data_sampling.attr, + &dev_attr_reg_file_data_sampling.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index dcb89c987164..8654714421a0 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -75,6 +75,8 @@ extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, -- cgit v1.2.3 From c90399fbd74a0713d5972a6d931e4a9918621e88 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 22 Mar 2024 19:56:35 +0100 Subject: x86/cpu: Ensure that CPU info updates are propagated on UP The boot sequence evaluates CPUID information twice: 1) During early boot 2) When finalizing the early setup right before mitigations are selected and alternatives are patched. In both cases the evaluation is stored in boot_cpu_data, but on UP the copying of boot_cpu_data to the per CPU info of the boot CPU happens between #1 and #2. So any update which happens in #2 is never propagated to the per CPU info instance. Consolidate the whole logic and copy boot_cpu_data right before applying alternatives as that's the point where boot_cpu_data is in it's final state and not supposed to change anymore. This also removes the voodoo mb() from smp_prepare_cpus_common() which had absolutely no purpose. Fixes: 71eb4893cfaf ("x86/percpu: Cure per CPU madness on UP") Reported-by: Guenter Roeck Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Tested-by: Guenter Roeck Link: https://lore.kernel.org/r/20240322185305.127642785@linutronix.de --- arch/x86/kernel/cpu/common.c | 9 +++++++++ arch/x86/kernel/setup.c | 10 ---------- arch/x86/kernel/smpboot.c | 32 +++++--------------------------- 3 files changed, 14 insertions(+), 37 deletions(-) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ba8cf5e9ce56..5c1e6d6be267 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2307,6 +2307,8 @@ void arch_smt_update(void) void __init arch_cpu_finalize_init(void) { + struct cpuinfo_x86 *c = this_cpu_ptr(&cpu_info); + identify_boot_cpu(); select_idle_routine(); @@ -2345,6 +2347,13 @@ void __init arch_cpu_finalize_init(void) fpu__init_system(); fpu__init_cpu(); + /* + * Ensure that access to the per CPU representation has the initial + * boot CPU configuration. + */ + *c = boot_cpu_data; + c->initialized = true; + alternative_instructions(); if (IS_ENABLED(CONFIG_X86_64)) { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 3e1e96efadfe..ef206500ed6f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1206,16 +1206,6 @@ void __init i386_reserve_resources(void) #endif /* CONFIG_X86_32 */ -#ifndef CONFIG_SMP -void __init smp_prepare_boot_cpu(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - *c = boot_cpu_data; - c->initialized = true; -} -#endif - static struct notifier_block kernel_offset_notifier = { .notifier_call = dump_kernel_offset }; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fe355c89f6c1..76bb65045c64 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -313,14 +313,6 @@ static void notrace start_secondary(void *unused) cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } -static void __init smp_store_boot_cpu_info(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - *c = boot_cpu_data; - c->initialized = true; -} - /* * The bootstrap kernel entry code has set these up. Save them for * a given CPU @@ -1039,29 +1031,15 @@ static __init void disable_smp(void) cpumask_set_cpu(0, topology_die_cpumask(0)); } -static void __init smp_cpu_index_default(void) -{ - int i; - struct cpuinfo_x86 *c; - - for_each_possible_cpu(i) { - c = &cpu_data(i); - /* mark all to hotplug */ - c->cpu_index = nr_cpu_ids; - } -} - void __init smp_prepare_cpus_common(void) { unsigned int i; - smp_cpu_index_default(); - - /* - * Setup boot CPU information - */ - smp_store_boot_cpu_info(); /* Final full version of the data */ - mb(); + /* Mark all except the boot CPU as hotpluggable */ + for_each_possible_cpu(i) { + if (i) + per_cpu(cpu_info.cpu_index, i) = nr_cpu_ids; + } for_each_possible_cpu(i) { zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); -- cgit v1.2.3