From ed2f752e0e0a21d941ca0ee539ef3d4cd576bc5e Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Fri, 20 Oct 2023 18:19:20 +0200 Subject: x86/percpu: Introduce const-qualified const_pcpu_hot to micro-optimize code generation Some variables in pcpu_hot, currently current_task and top_of_stack are actually per-thread variables implemented as per-CPU variables and thus stable for the duration of the respective task. There is already an attempt to eliminate redundant reads from these variables using this_cpu_read_stable() asm macro, which hides the dependency on the read memory address. However, the compiler has limited ability to eliminate asm common subexpressions, so this approach results in a limited success. The solution is to allow more aggressive elimination by aliasing pcpu_hot into a const-qualified const_pcpu_hot, and to read stable per-CPU variables from this constant copy. The current per-CPU infrastructure does not support reads from const-qualified variables. However, when the compiler supports segment qualifiers, it is possible to declare the const-aliased variable in the relevant named address space. The compiler considers access to the variable, declared in this way, as a read from a constant location, and will optimize reads from the variable accordingly. By implementing constant-qualified const_pcpu_hot, the compiler can eliminate redundant reads from the constant variables, reducing the number of loads from current_task from 3766 to 3217 on a test build, a -14.6% reduction. The reduction of loads translates to the following code savings: text data bss dec hex filename 25,477,353 4389456 808452 30675261 1d4113d vmlinux-old.o 25,476,074 4389440 808452 30673966 1d40c2e vmlinux-new.o representing a code size reduction of -1279 bytes. [ mingo: Updated the changelog, EXPORT(const_pcpu_hot). ] Co-developed-by: Nadav Amit Signed-off-by: Nadav Amit Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231020162004.135244-1-ubizjak@gmail.com --- arch/x86/include/asm/processor.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 0086920cda06..b47a9975233f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -518,6 +518,9 @@ static __always_inline unsigned long current_top_of_stack(void) * and around vm86 mode and sp0 on x86_64 is special because of the * entry trampoline. */ + if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT)) + return pcpu_hot.top_of_stack; + return this_cpu_read_stable(pcpu_hot.top_of_stack); } -- cgit v1.2.3 From 0548eb067ed664b93043e033295ca71e3e706245 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 24 Oct 2023 16:28:14 +0200 Subject: x86/percpu: Return correct variable from current_top_of_stack() current_top_of_stack() should return variable from _seg_gs qualified named address space when CONFIG_USE_X86_SEG_SUPPORT=y is enbled. Fixes: ed2f752e0e0a ("x86/percpu: Introduce const-qualified const_pcpu_hot to micro-optimize code generation") Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231024142830.3226-1-ubizjak@gmail.com Cc: Andy Lutomirski Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Cc: Sean Christopherson --- arch/x86/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b47a9975233f..f20e87632669 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -519,7 +519,7 @@ static __always_inline unsigned long current_top_of_stack(void) * entry trampoline. */ if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT)) - return pcpu_hot.top_of_stack; + return const_pcpu_hot.top_of_stack; return this_cpu_read_stable(pcpu_hot.top_of_stack); } -- cgit v1.2.3 From 0e3703630bd3fead041a6bfb3a3f2a9891af6c34 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 4 Dec 2023 22:02:29 +0100 Subject: x86/percpu: Fix "const_pcpu_hot" version generation failure Version generation for "const_pcpu_hot" symbol failed because genksyms doesn't know the __seg_gs keyword. Effectively revert commit 4604c052b84d ("x86/percpu: Declare const_pcpu_hot as extern const variable") and use this_cpu_read_const() instead to avoid "sparse: dereference of noderef expression" warning when reading const_pcpu_hot. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20231204210320.114429-1-ubizjak@gmail.com --- arch/x86/include/asm/current.h | 5 +++-- arch/x86/include/asm/percpu.h | 7 +++++++ arch/x86/include/asm/processor.h | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 9fbd7cb2dc86..c8c5674d69f6 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -37,12 +37,13 @@ static_assert(sizeof(struct pcpu_hot) == 64); DECLARE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot); /* const-qualified alias to pcpu_hot, aliased by linker. */ -extern const struct pcpu_hot __percpu_seg_override const_pcpu_hot; +DECLARE_PER_CPU_ALIGNED(const struct pcpu_hot __percpu_seg_override, + const_pcpu_hot); static __always_inline struct task_struct *get_current(void) { if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT)) - return const_pcpu_hot.current_task; + return this_cpu_read_const(const_pcpu_hot.current_task); return this_cpu_read_stable(pcpu_hot.current_task); } diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 0f12b2004b94..3859abad19ec 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -28,6 +28,7 @@ #else /* ...!ASSEMBLY */ +#include #include #include @@ -462,6 +463,7 @@ do { \ #define this_cpu_write_8(pcp, val) __raw_cpu_write(volatile, pcp, val) #endif +#define this_cpu_read_const(pcp) __raw_cpu_read(, pcp) #else /* CONFIG_USE_X86_SEG_SUPPORT */ #define raw_cpu_read_1(pcp) percpu_from_op(1, , "mov", pcp) @@ -486,6 +488,11 @@ do { \ #define this_cpu_write_8(pcp, val) percpu_to_op(8, volatile, "mov", (pcp), val) #endif +/* + * The generic per-cpu infrastrucutre is not suitable for + * reading const-qualified variables. + */ +#define this_cpu_read_const(pcp) ({ BUILD_BUG(); (typeof(pcp))0; }) #endif /* CONFIG_USE_X86_SEG_SUPPORT */ #define raw_cpu_add_1(pcp, val) percpu_add_op(1, , (pcp), val) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index f20e87632669..a94a857152c4 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -519,7 +519,7 @@ static __always_inline unsigned long current_top_of_stack(void) * entry trampoline. */ if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT)) - return const_pcpu_hot.top_of_stack; + return this_cpu_read_const(const_pcpu_hot.top_of_stack); return this_cpu_read_stable(pcpu_hot.top_of_stack); } -- cgit v1.2.3 From 7e3ec6286753b404666af9a58d283690302c9321 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:10 +0100 Subject: x86/cpu/amd: Provide a separate accessor for Node ID AMD (ab)uses topology_die_id() to store the Node ID information and topology_max_dies_per_pkg to store the number of nodes per package. This collides with the proper processor die level enumeration which is coming on AMD with CPUID 8000_0026, unless there is a correlation between the two. There is zero documentation about that. So provide new storage and new accessors which for now still access die_id and topology_max_die_per_pkg(). Will be mopped up after AMD and HYGON are converted over. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153624.956116738@linutronix.de --- arch/x86/events/amd/core.c | 2 +- arch/x86/include/asm/processor.h | 3 +++ arch/x86/include/asm/topology.h | 8 ++++++++ arch/x86/kernel/amd_nb.c | 4 ++-- arch/x86/kernel/cpu/cacheinfo.c | 2 +- arch/x86/kernel/cpu/mce/amd.c | 4 ++-- arch/x86/kernel/cpu/mce/inject.c | 4 ++-- drivers/edac/amd64_edac.c | 4 ++-- drivers/edac/mce_amd.c | 4 ++-- 9 files changed, 23 insertions(+), 12 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 81f6d8275b6b..69a3b02e50bb 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -579,7 +579,7 @@ static void amd_pmu_cpu_starting(int cpu) if (!x86_pmu.amd_nb_constraints) return; - nb_id = topology_die_id(cpu); + nb_id = topology_amd_node_id(cpu); WARN_ON_ONCE(nb_id == BAD_APICID); for_each_online_cpu(i) { diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 26620d7642a9..26a6001ddafd 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -100,6 +100,9 @@ struct cpuinfo_topology { u32 logical_pkg_id; u32 logical_die_id; + // AMD Node ID and Nodes per Package info + u32 amd_node_id; + // Cache level topology IDs u32 llc_id; u32 l2c_id; diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index fa5d803ed7e2..1fd12e98a283 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -131,6 +131,8 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); #define topology_core_id(cpu) (cpu_data(cpu).topo.core_id) #define topology_ppin(cpu) (cpu_data(cpu).ppin) +#define topology_amd_node_id(cpu) (cpu_data(cpu).topo.die_id) + extern unsigned int __max_die_per_package; #ifdef CONFIG_SMP @@ -161,6 +163,11 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu); int topology_update_die_map(unsigned int dieid, unsigned int cpu); int topology_phys_to_logical_pkg(unsigned int pkg); +static inline unsigned int topology_amd_nodes_per_pkg(void) +{ + return __max_die_per_package; +} + extern struct cpumask __cpu_primary_thread_mask; #define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask) @@ -182,6 +189,7 @@ static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } static inline int topology_max_die_per_package(void) { return 1; } static inline int topology_max_smt_threads(void) { return 1; } static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } +static inline unsigned int topology_amd_nodes_per_pkg(void) { return 0; }; #endif /* !CONFIG_SMP */ static inline void arch_fix_phys_package_id(int num, u32 slot) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 053f6dcc6b2c..5bf5f9fc5753 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -386,7 +386,7 @@ struct resource *amd_get_mmconfig_range(struct resource *res) int amd_get_subcaches(int cpu) { - struct pci_dev *link = node_to_amd_nb(topology_die_id(cpu))->link; + struct pci_dev *link = node_to_amd_nb(topology_amd_node_id(cpu))->link; unsigned int mask; if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) @@ -400,7 +400,7 @@ int amd_get_subcaches(int cpu) int amd_set_subcaches(int cpu, unsigned long mask) { static unsigned int reset, ban; - struct amd_northbridge *nb = node_to_amd_nb(topology_die_id(cpu)); + struct amd_northbridge *nb = node_to_amd_nb(topology_amd_node_id(cpu)); unsigned int reg; int cuid; diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index c131c412db89..4a33218cb103 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -595,7 +595,7 @@ static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index) if (index < 3) return; - node = topology_die_id(smp_processor_id()); + node = topology_amd_node_id(smp_processor_id()); this_leaf->nb = node_to_amd_nb(node); if (this_leaf->nb && !this_leaf->nb->l3_cache.indices) amd_calc_l3_indices(this_leaf->nb); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 2b46eb0fdf3a..9a0133ef7e20 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -1231,7 +1231,7 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu, return -ENODEV; if (is_shared_bank(bank)) { - nb = node_to_amd_nb(topology_die_id(cpu)); + nb = node_to_amd_nb(topology_amd_node_id(cpu)); /* threshold descriptor already initialized on this node? */ if (nb && nb->bank4) { @@ -1335,7 +1335,7 @@ static void threshold_remove_bank(struct threshold_bank *bank) * The last CPU on this node using the shared bank is going * away, remove that bank now. */ - nb = node_to_amd_nb(topology_die_id(smp_processor_id())); + nb = node_to_amd_nb(topology_amd_node_id(smp_processor_id())); nb->bank4 = NULL; } diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 72f0695c3dc1..308c5b5e0bbe 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -543,8 +543,8 @@ static void do_inject(void) if (boot_cpu_has(X86_FEATURE_AMD_DCM) && b == 4 && boot_cpu_data.x86 < 0x17) { - toggle_nb_mca_mst_cpu(topology_die_id(cpu)); - cpu = get_nbc_for_node(topology_die_id(cpu)); + toggle_nb_mca_mst_cpu(topology_amd_node_id(cpu)); + cpu = get_nbc_for_node(topology_amd_node_id(cpu)); } cpus_read_lock(); diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 537b9987a431..2b8c20bb926a 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1915,7 +1915,7 @@ ddr3: /* On F10h and later ErrAddr is MC4_ADDR[47:1] */ static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m) { - u16 mce_nid = topology_die_id(m->extcpu); + u16 mce_nid = topology_amd_node_id(m->extcpu); struct mem_ctl_info *mci; u8 start_bit = 1; u8 end_bit = 47; @@ -3446,7 +3446,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid) int cpu; for_each_online_cpu(cpu) - if (topology_die_id(cpu) == nid) + if (topology_amd_node_id(cpu) == nid) cpumask_set_cpu(cpu, mask); } diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index ec8b6c9fedfd..8130c3dc64da 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -584,7 +584,7 @@ static void decode_mc3_mce(struct mce *m) static void decode_mc4_mce(struct mce *m) { unsigned int fam = x86_family(m->cpuid); - int node_id = topology_die_id(m->extcpu); + int node_id = topology_amd_node_id(m->extcpu); u16 ec = EC(m->status); u8 xec = XEC(m->status, 0x1f); u8 offset = 0; @@ -746,7 +746,7 @@ static void decode_smca_error(struct mce *m) if ((bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) && xec == 0 && decode_dram_ecc) - decode_dram_ecc(topology_die_id(m->extcpu), m); + decode_dram_ecc(topology_amd_node_id(m->extcpu), m); } static inline void amd_decode_err_code(u16 ec) -- cgit v1.2.3 From c749ce393b8fe9db5ed894411f06eafa88f0e13a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:14 +0100 Subject: x86/cpu: Use common topology code for AMD Switch it over to the new topology evaluation mechanism and remove the random bits and pieces which are sprinkled all over the place. No functional change intended. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.145745053@linutronix.de --- arch/x86/include/asm/processor.h | 2 - arch/x86/kernel/cpu/amd.c | 146 ---------------------------------- arch/x86/kernel/cpu/mce/inject.c | 3 +- arch/x86/kernel/cpu/topology_common.c | 5 +- 4 files changed, 5 insertions(+), 151 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 26a6001ddafd..4fbeffd678e2 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -707,12 +707,10 @@ static inline u32 per_cpu_l2c_id(unsigned int cpu) } #ifdef CONFIG_CPU_SUP_AMD -extern u32 amd_get_nodes_per_socket(void); extern u32 amd_get_highest_perf(void); extern void amd_clear_divider(void); extern void amd_check_microcode(void); #else -static inline u32 amd_get_nodes_per_socket(void) { return 0; } static inline u32 amd_get_highest_perf(void) { return 0; } static inline void amd_clear_divider(void) { } static inline void amd_check_microcode(void) { } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a6b58200f963..c82069e5f341 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -27,13 +27,6 @@ #include "cpu.h" -/* - * nodes_per_socket: Stores the number of nodes per socket. - * Refer to Fam15h Models 00-0fh BKDG - CPUID Fn8000_001E_ECX - * Node Identifiers[10:8] - */ -static u32 nodes_per_socket = 1; - static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) { u32 gprs[8] = { 0 }; @@ -300,97 +293,6 @@ static int nearby_node(int apicid) } #endif -/* - * Fix up topo::core_id for pre-F17h systems to be in the - * [0 .. cores_per_node - 1] range. Not really needed but - * kept so as not to break existing setups. - */ -static void legacy_fixup_core_id(struct cpuinfo_x86 *c) -{ - u32 cus_per_node; - - if (c->x86 >= 0x17) - return; - - cus_per_node = c->x86_max_cores / nodes_per_socket; - c->topo.core_id %= cus_per_node; -} - -/* - * Fixup core topology information for - * (1) AMD multi-node processors - * Assumption: Number of cores in each internal node is the same. - * (2) AMD processors supporting compute units - */ -static void amd_get_topology(struct cpuinfo_x86 *c) -{ - /* get information required for multi-node processors */ - if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { - int err; - u32 eax, ebx, ecx, edx; - - cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - - c->topo.die_id = ecx & 0xff; - - if (c->x86 == 0x15) - c->topo.cu_id = ebx & 0xff; - - if (c->x86 >= 0x17) { - c->topo.core_id = ebx & 0xff; - - if (smp_num_siblings > 1) - c->x86_max_cores /= smp_num_siblings; - } - - /* - * In case leaf B is available, use it to derive - * topology information. - */ - err = detect_extended_topology(c); - if (!err) - c->x86_coreid_bits = get_count_order(c->x86_max_cores); - - cacheinfo_amd_init_llc_id(c, c->topo.die_id); - - } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { - u64 value; - - rdmsrl(MSR_FAM10H_NODE_ID, value); - c->topo.die_id = value & 7; - c->topo.llc_id = c->topo.die_id; - } else - return; - - if (nodes_per_socket > 1) { - set_cpu_cap(c, X86_FEATURE_AMD_DCM); - legacy_fixup_core_id(c); - } -} - -/* - * On a AMD dual core setup the lower bits of the APIC id distinguish the cores. - * Assumes number of cores is a power of two. - */ -static void amd_detect_cmp(struct cpuinfo_x86 *c) -{ - unsigned bits; - - bits = c->x86_coreid_bits; - /* Low order bits define the core id (index of core in socket) */ - c->topo.core_id = c->topo.initial_apicid & ((1 << bits)-1); - /* Convert the initial APIC ID into the socket ID */ - c->topo.pkg_id = c->topo.initial_apicid >> bits; - /* use socket ID also for last level cache */ - c->topo.llc_id = c->topo.die_id = c->topo.pkg_id; -} - -u32 amd_get_nodes_per_socket(void) -{ - return nodes_per_socket; -} -EXPORT_SYMBOL_GPL(amd_get_nodes_per_socket); - static void srat_detect_node(struct cpuinfo_x86 *c) { #ifdef CONFIG_NUMA @@ -442,32 +344,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } -static void early_init_amd_mc(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned bits, ecx; - - /* Multi core CPU? */ - if (c->extended_cpuid_level < 0x80000008) - return; - - ecx = cpuid_ecx(0x80000008); - - c->x86_max_cores = (ecx & 0xff) + 1; - - /* CPU telling us the core id bits shift? */ - bits = (ecx >> 12) & 0xF; - - /* Otherwise recompute */ - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - - c->x86_coreid_bits = bits; -#endif -} - static void bsp_init_amd(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { @@ -500,18 +376,6 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_MWAITX)) use_mwaitx_delay(); - if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { - u32 ecx; - - ecx = cpuid_ecx(0x8000001e); - __max_die_per_package = nodes_per_socket = ((ecx >> 8) & 7) + 1; - } else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) { - u64 value; - - rdmsrl(MSR_FAM10H_NODE_ID, value); - __max_die_per_package = nodes_per_socket = ((value >> 3) & 7) + 1; - } - if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) && !boot_cpu_has(X86_FEATURE_VIRT_SSBD) && c->x86 >= 0x15 && c->x86 <= 0x17) { @@ -649,8 +513,6 @@ static void early_init_amd(struct cpuinfo_x86 *c) u64 value; u32 dummy; - early_init_amd_mc(c); - if (c->x86 >= 0xf) set_cpu_cap(c, X86_FEATURE_K8); @@ -730,9 +592,6 @@ static void early_init_amd(struct cpuinfo_x86 *c) } } - if (cpu_has(c, X86_FEATURE_TOPOEXT)) - smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1; - if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) { if (c->x86 == 0x17 && boot_cpu_has(X86_FEATURE_AMD_IBPB)) setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE); @@ -1076,9 +935,6 @@ static void init_amd(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_FSRM)) set_cpu_cap(c, X86_FEATURE_FSRS); - /* get apicid instead of initial apic id from cpuid */ - c->topo.apicid = read_apic_id(); - /* K6s reports MCEs but don't actually have all the MSRs */ if (c->x86 < 6) clear_cpu_cap(c, X86_FEATURE_MCE); @@ -1114,8 +970,6 @@ static void init_amd(struct cpuinfo_x86 *c) cpu_detect_cache_sizes(c); - amd_detect_cmp(c); - amd_get_topology(c); srat_detect_node(c); init_amd_cacheinfo(c); diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 308c5b5e0bbe..2b290452e194 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -433,8 +433,7 @@ static u32 get_nbc_for_node(int node_id) struct cpuinfo_x86 *c = &boot_cpu_data; u32 cores_per_node; - cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket(); - + cores_per_node = (c->x86_max_cores * smp_num_siblings) / topology_amd_nodes_per_pkg(); return cores_per_node * node_id; } diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index e1d1a7b7c8a9..3c69229ba154 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -72,7 +72,6 @@ bool topo_is_converted(struct cpuinfo_x86 *c) { /* Temporary until everything is converted over. */ switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: case X86_VENDOR_HYGON: return false; default: @@ -133,6 +132,10 @@ static void parse_topology(struct topo_scan *tscan, bool early) tscan->ebx1_nproc_shift = get_count_order(ebx.nproc); switch (c->x86_vendor) { + case X86_VENDOR_AMD: + if (IS_ENABLED(CONFIG_CPU_SUP_AMD)) + cpu_parse_topology_amd(tscan); + break; case X86_VENDOR_CENTAUR: case X86_VENDOR_ZHAOXIN: parse_legacy(tscan); -- cgit v1.2.3 From fab75e790f00dca592d9a934d9f1237b81093b99 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:21 +0100 Subject: x86/cpu: Remove x86_coreid_bits No more users. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.455839743@linutronix.de --- arch/x86/include/asm/processor.h | 2 -- arch/x86/kernel/cpu/common.c | 1 - 2 files changed, 3 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 4fbeffd678e2..de1648ee2b9e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -122,8 +122,6 @@ struct cpuinfo_x86 { #endif __u8 x86_virt_bits; __u8 x86_phys_bits; - /* CPUID returned core id bits: */ - __u8 x86_coreid_bits; /* Max extended CPUID function supported: */ __u32 extended_cpuid_level; /* Maximum supported CPUID level, -1=no CPUID: */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d72787d1b8cb..33064e661a2d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1753,7 +1753,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; - c->x86_coreid_bits = 0; #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; c->x86_phys_bits = 36; -- cgit v1.2.3 From 89b0f15f408f7c4ee98c1ec4c3224852fcbc3274 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:16 +0100 Subject: x86/cpu/topology: Get rid of cpuinfo::x86_max_cores Now that __num_cores_per_package and __num_threads_per_package are available, cpuinfo::x86_max_cores and the related math all over the place can be replaced with the ready to consume data. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210253.176147806@linutronix.de --- Documentation/arch/x86/topology.rst | 24 +++++++++--------------- arch/x86/events/intel/uncore_nhmex.c | 4 ++-- arch/x86/events/intel/uncore_snb.c | 8 ++++---- arch/x86/events/intel/uncore_snbep.c | 16 ++++++++-------- arch/x86/include/asm/processor.h | 2 -- arch/x86/kernel/cpu/cacheinfo.c | 2 +- arch/x86/kernel/cpu/common.c | 1 - arch/x86/kernel/cpu/debugfs.c | 3 ++- arch/x86/kernel/cpu/mce/inject.c | 3 +-- arch/x86/kernel/cpu/microcode/intel.c | 2 +- arch/x86/kernel/cpu/topology_common.c | 3 --- arch/x86/kernel/smpboot.c | 2 +- drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 2 +- drivers/hwmon/fam15h_power.c | 2 +- 14 files changed, 31 insertions(+), 43 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/Documentation/arch/x86/topology.rst b/Documentation/arch/x86/topology.rst index 08ebf9edbfc1..7352ab89a55a 100644 --- a/Documentation/arch/x86/topology.rst +++ b/Documentation/arch/x86/topology.rst @@ -47,17 +47,21 @@ AMD nomenclature for package is 'Node'. Package-related topology information in the kernel: - - cpuinfo_x86.x86_max_cores: + - topology_num_threads_per_package() - The number of cores in a package. This information is retrieved via CPUID. + The number of threads in a package. - - cpuinfo_x86.x86_max_dies: + - topology_num_cores_per_package() - The number of dies in a package. This information is retrieved via CPUID. + The number of cores in a package. + + - topology_max_dies_per_package() + + The maximum number of dies in a package. - cpuinfo_x86.topo.die_id: - The physical ID of the die. This information is retrieved via CPUID. + The physical ID of the die. - cpuinfo_x86.topo.pkg_id: @@ -96,16 +100,6 @@ are SMT- or CMT-type threads. AMDs nomenclature for a CMT core is "Compute Unit". The kernel always uses "core". -Core-related topology information in the kernel: - - - smp_num_siblings: - - The number of threads in a core. The number of threads in a package can be - calculated by:: - - threads_per_package = cpuinfo_x86.x86_max_cores * smp_num_siblings - - Threads ======= A thread is a single scheduling unit. It's the equivalent to a logical Linux diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c index 56eea2c66cfb..92da8aaa5966 100644 --- a/arch/x86/events/intel/uncore_nhmex.c +++ b/arch/x86/events/intel/uncore_nhmex.c @@ -1221,8 +1221,8 @@ void nhmex_uncore_cpu_init(void) uncore_nhmex = true; else nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events; - if (nhmex_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - nhmex_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (nhmex_uncore_cbox.num_boxes > topology_num_cores_per_package()) + nhmex_uncore_cbox.num_boxes = topology_num_cores_per_package(); uncore_msr_uncores = nhmex_msr_uncores; } /* end of Nehalem-EX uncore support */ diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 7fd4334e12a1..9462fd9f3b7a 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -364,8 +364,8 @@ static struct intel_uncore_type *snb_msr_uncores[] = { void snb_uncore_cpu_init(void) { uncore_msr_uncores = snb_msr_uncores; - if (snb_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (snb_uncore_cbox.num_boxes > topology_num_cores_per_package()) + snb_uncore_cbox.num_boxes = topology_num_cores_per_package(); } static void skl_uncore_msr_init_box(struct intel_uncore_box *box) @@ -428,8 +428,8 @@ static struct intel_uncore_type *skl_msr_uncores[] = { void skl_uncore_cpu_init(void) { uncore_msr_uncores = skl_msr_uncores; - if (skl_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - skl_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (skl_uncore_cbox.num_boxes > topology_num_cores_per_package()) + skl_uncore_cbox.num_boxes = topology_num_cores_per_package(); snb_uncore_arb.ops = &skl_uncore_msr_ops; } diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 3f6bd3e4a763..2eaf0f339849 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1172,8 +1172,8 @@ static struct intel_uncore_type *snbep_msr_uncores[] = { void snbep_uncore_cpu_init(void) { - if (snbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - snbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (snbep_uncore_cbox.num_boxes > topology_num_cores_per_package()) + snbep_uncore_cbox.num_boxes = topology_num_cores_per_package(); uncore_msr_uncores = snbep_msr_uncores; } @@ -1845,8 +1845,8 @@ static struct intel_uncore_type *ivbep_msr_uncores[] = { void ivbep_uncore_cpu_init(void) { - if (ivbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - ivbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (ivbep_uncore_cbox.num_boxes > topology_num_cores_per_package()) + ivbep_uncore_cbox.num_boxes = topology_num_cores_per_package(); uncore_msr_uncores = ivbep_msr_uncores; } @@ -2917,8 +2917,8 @@ static bool hswep_has_limit_sbox(unsigned int device) void hswep_uncore_cpu_init(void) { - if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (hswep_uncore_cbox.num_boxes > topology_num_cores_per_package()) + hswep_uncore_cbox.num_boxes = topology_num_cores_per_package(); /* Detect 6-8 core systems with only two SBOXes */ if (hswep_has_limit_sbox(HSWEP_PCU_DID)) @@ -3280,8 +3280,8 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = { void bdx_uncore_cpu_init(void) { - if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (bdx_uncore_cbox.num_boxes > topology_num_cores_per_package()) + bdx_uncore_cbox.num_boxes = topology_num_cores_per_package(); uncore_msr_uncores = bdx_msr_uncores; /* Detect systems with no SBOXes */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index de1648ee2b9e..326581df4846 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -149,8 +149,6 @@ struct cpuinfo_x86 { unsigned long loops_per_jiffy; /* protected processor identification number */ u64 ppin; - /* cpuid returned max cores value: */ - u16 x86_max_cores; u16 x86_clflush_size; /* number of cores as seen by the OS: */ u16 booted_cores; diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index e1d118e6926e..f2241e7e96fd 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -301,7 +301,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, eax->split.type = types[leaf]; eax->split.level = levels[leaf]; eax->split.num_threads_sharing = 0; - eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1; + eax->split.num_cores_on_die = topology_num_cores_per_package(); if (assoc == 0xffff) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c9a1014386a9..05e0b31f75e9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1738,7 +1738,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86_model = c->x86_stepping = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_max_cores = 1; #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; c->x86_phys_bits = 36; diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c index f40f3eecebc6..3baf3e435834 100644 --- a/arch/x86/kernel/cpu/debugfs.c +++ b/arch/x86/kernel/cpu/debugfs.c @@ -28,7 +28,8 @@ static int cpu_debug_show(struct seq_file *m, void *p) seq_printf(m, "l2c_id: %u\n", c->topo.l2c_id); seq_printf(m, "amd_node_id: %u\n", c->topo.amd_node_id); seq_printf(m, "amd_nodes_per_pkg: %u\n", topology_amd_nodes_per_pkg()); - seq_printf(m, "max_cores: %u\n", c->x86_max_cores); + seq_printf(m, "num_threads: %u\n", __num_threads_per_package); + seq_printf(m, "num_cores: %u\n", __num_cores_per_package); seq_printf(m, "max_dies_per_pkg: %u\n", __max_dies_per_package); seq_printf(m, "max_threads_per_core:%u\n", __max_threads_per_core); return 0; diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 1e327881073f..94953d749475 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -430,10 +430,9 @@ static void trigger_thr_int(void *info) static u32 get_nbc_for_node(int node_id) { - struct cpuinfo_x86 *c = &boot_cpu_data; u32 cores_per_node; - cores_per_node = (c->x86_max_cores * __max_threads_per_core) / topology_amd_nodes_per_pkg(); + cores_per_node = topology_num_threads_per_package() / topology_amd_nodes_per_pkg(); return cores_per_node * node_id; } diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 857e608af641..5f0414452b67 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -641,7 +641,7 @@ static __init void calc_llc_size_per_core(struct cpuinfo_x86 *c) { u64 llc_size = c->x86_cache_size * 1024ULL; - do_div(llc_size, c->x86_max_cores); + do_div(llc_size, topology_num_cores_per_package()); llc_size_per_core = (unsigned int)llc_size; } diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index a2c3f8f5886d..a50ae8d63d1c 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -155,9 +155,6 @@ static void topo_set_ids(struct topo_scan *tscan) c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >> x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN]; - /* Maximum number of cores on this package */ - c->x86_max_cores = topology_unit_count(apicid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN); - c->topo.amd_node_id = tscan->amd_node_id; if (c->x86_vendor == X86_VENDOR_AMD) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 35c272cb45fb..9c1e1219c28f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -564,7 +564,7 @@ static void __init build_sched_topology(void) void set_cpu_sibling_map(int cpu) { bool has_smt = __max_threads_per_core > 1; - bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1; + bool has_mp = has_smt || topology_num_cores_per_package() > 1; struct cpuinfo_x86 *c = &cpu_data(cpu); struct cpuinfo_x86 *o; int i, threads; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 2ff6deedef95..da1f43999d09 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -451,7 +451,7 @@ static int vangogh_init_smc_tables(struct smu_context *smu) #ifdef CONFIG_X86 /* AMD x86 APU only */ - smu->cpu_core_num = boot_cpu_data.x86_max_cores; + smu->cpu_core_num = topology_num_cores_per_package(); #else smu->cpu_core_num = 4; #endif diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c index 6307112c2c0c..9ed2c4b6734e 100644 --- a/drivers/hwmon/fam15h_power.c +++ b/drivers/hwmon/fam15h_power.c @@ -209,7 +209,7 @@ static ssize_t power1_average_show(struct device *dev, * With the new x86 topology modelling, x86_max_cores is the * compute unit number. */ - cu_num = boot_cpu_data.x86_max_cores; + cu_num = topology_num_cores_per_package(); ret = read_registers(data); if (ret) -- cgit v1.2.3 From 154fcf3a788868cb87d8c2e50c0b5b3a2fe89853 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Mar 2024 11:12:19 +0100 Subject: x86/msr: Prepare for including into To clean up the per CPU insanity of UP which causes sparse to be rightfully unhappy and prevents the usage of the generic per CPU accessors on cpu_info it is necessary to include into . Including into is impossible because it ends up in header dependency hell. The problem is that includes . The inclusion of results in a compile fail where the compiler cannot longer handle an include in which references boot_cpu_data which is defined in . The only reason why is included in are the set/get_debugctlmsr() inlines. They are defined there because is such a nice dump ground for everything. In fact they belong obviously into . Move them to and fix up the resulting damage which is just exposing the reliance on random include chains. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20240304005104.454678686@linutronix.de --- arch/x86/events/intel/core.c | 1 + arch/x86/events/intel/ds.c | 1 + arch/x86/include/asm/debugreg.h | 24 ++++++++++++++++++++++++ arch/x86/include/asm/fsgsbase.h | 2 +- arch/x86/include/asm/processor.h | 22 ---------------------- arch/x86/include/asm/special_insns.h | 4 ++-- arch/x86/kernel/cpu/intel_pconfig.c | 2 ++ arch/x86/kernel/cpu/rdrand.c | 1 + arch/x86/kernel/fpu/bugs.c | 2 ++ arch/x86/kernel/step.c | 2 ++ 10 files changed, 36 insertions(+), 25 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 3804f21ab049..768d1414897f 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index d49d661ec0a7..2641ba620f12 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 0cec92c430cc..fdbbbfec745a 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -5,7 +5,9 @@ #include #include #include + #include +#include DECLARE_PER_CPU(unsigned long, cpu_dr7); @@ -159,4 +161,26 @@ static inline unsigned long amd_get_dr_addr_mask(unsigned int dr) } #endif +static inline unsigned long get_debugctlmsr(void) +{ + unsigned long debugctlmsr = 0; + +#ifndef CONFIG_X86_DEBUGCTLMSR + if (boot_cpu_data.x86 < 6) + return 0; +#endif + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); + + return debugctlmsr; +} + +static inline void update_debugctlmsr(unsigned long debugctlmsr) +{ +#ifndef CONFIG_X86_DEBUGCTLMSR + if (boot_cpu_data.x86 < 6) + return; +#endif + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); +} + #endif /* _ASM_X86_DEBUGREG_H */ diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h index 35cff5f2becf..9e7e8ca8e299 100644 --- a/arch/x86/include/asm/fsgsbase.h +++ b/arch/x86/include/asm/fsgsbase.h @@ -6,7 +6,7 @@ #ifdef CONFIG_X86_64 -#include +#include /* * Read/write a task's FSBASE or GSBASE. This returns the value that diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 26620d7642a9..d2ef4f5e03f8 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -576,28 +576,6 @@ extern void cpu_init(void); extern void cpu_init_exception_handling(void); extern void cr4_init(void); -static inline unsigned long get_debugctlmsr(void) -{ - unsigned long debugctlmsr = 0; - -#ifndef CONFIG_X86_DEBUGCTLMSR - if (boot_cpu_data.x86 < 6) - return 0; -#endif - rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); - - return debugctlmsr; -} - -static inline void update_debugctlmsr(unsigned long debugctlmsr) -{ -#ifndef CONFIG_X86_DEBUGCTLMSR - if (boot_cpu_data.x86 < 6) - return; -#endif - wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); -} - extern void set_task_blockstep(struct task_struct *task, bool on); /* Boot loader type from the setup header: */ diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 48f8dd47cf68..f13df37c0060 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -2,11 +2,11 @@ #ifndef _ASM_X86_SPECIAL_INSNS_H #define _ASM_X86_SPECIAL_INSNS_H - #ifdef __KERNEL__ - #include #include + +#include #include #include diff --git a/arch/x86/kernel/cpu/intel_pconfig.c b/arch/x86/kernel/cpu/intel_pconfig.c index 0771a905b286..5be2b1790282 100644 --- a/arch/x86/kernel/cpu/intel_pconfig.c +++ b/arch/x86/kernel/cpu/intel_pconfig.c @@ -7,6 +7,8 @@ * Author: * Kirill A. Shutemov */ +#include +#include #include #include diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c index 26a427fa84ea..eeac00d20926 100644 --- a/arch/x86/kernel/cpu/rdrand.c +++ b/arch/x86/kernel/cpu/rdrand.c @@ -6,6 +6,7 @@ * Authors: Fenghua Yu , * H. Peter Anvin */ +#include #include #include diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c index a06b876bbf2d..edbafc5940e3 100644 --- a/arch/x86/kernel/fpu/bugs.c +++ b/arch/x86/kernel/fpu/bugs.c @@ -2,6 +2,8 @@ /* * x86 FPU bug checks: */ +#include + #include #include diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 8e2b2552b5ee..3e2952679b88 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -6,7 +6,9 @@ #include #include #include + #include +#include #include unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs) -- cgit v1.2.3 From 5323922f50ecdf9d10cdd2fabd06507e5b4f3feb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Mar 2024 11:12:20 +0100 Subject: x86/msr: Add missing __percpu annotations Sparse rightfully complains about using a plain pointer for per CPU accessors: msr-smp.c:15:23: sparse: warning: incorrect type in initializer (different address spaces) msr-smp.c:15:23: sparse: expected void const [noderef] __percpu *__vpp_verify msr-smp.c:15:23: sparse: got struct msr * Add __percpu annotations to the related datastructure and function arguments to cure this. This also cures the related sparse warnings at the callsites in drivers/edac/amd64_edac.c. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20240304005104.513181735@linutronix.de --- arch/x86/include/asm/msr.h | 26 ++++++++++++++------------ arch/x86/include/asm/processor.h | 1 - arch/x86/include/asm/tsc.h | 3 ++- arch/x86/lib/msr-smp.c | 12 +++++------- arch/x86/lib/msr.c | 6 +++--- 5 files changed, 24 insertions(+), 24 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 65ec1965cd28..4621e083af7b 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -12,11 +12,13 @@ #include #include +#include + struct msr_info { - u32 msr_no; - struct msr reg; - struct msr *msrs; - int err; + u32 msr_no; + struct msr reg; + struct msr __percpu *msrs; + int err; }; struct msr_regs_info { @@ -305,8 +307,8 @@ static inline int wrmsrl_safe(u32 msr, u64 val) return wrmsr_safe(msr, (u32)val, (u32)(val >> 32)); } -struct msr *msrs_alloc(void); -void msrs_free(struct msr *msrs); +struct msr __percpu *msrs_alloc(void); +void msrs_free(struct msr __percpu *msrs); int msr_set_bit(u32 msr, u8 bit); int msr_clear_bit(u32 msr, u8 bit); @@ -315,8 +317,8 @@ int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q); int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q); -void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); -void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); +void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs); +void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs); int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q); @@ -345,14 +347,14 @@ static inline int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q) return 0; } static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no, - struct msr *msrs) + struct msr __percpu *msrs) { - rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h)); + rdmsr_on_cpu(0, msr_no, raw_cpu_ptr(&msrs->l), raw_cpu_ptr(&msrs->h)); } static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no, - struct msr *msrs) + struct msr __percpu *msrs) { - wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h); + wrmsr_on_cpu(0, msr_no, raw_cpu_read(msrs->l), raw_cpu_read(msrs->h)); } static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d2ef4f5e03f8..a61f76990b6a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -20,7 +20,6 @@ struct vm86; #include #include #include -#include #include #include #include diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 594fce0ca744..405efb3e4996 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -5,8 +5,9 @@ #ifndef _ASM_X86_TSC_H #define _ASM_X86_TSC_H -#include #include +#include +#include /* * Standard way to access the cycle counter. diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c index 40bbe56bde32..acd463d887e1 100644 --- a/arch/x86/lib/msr-smp.c +++ b/arch/x86/lib/msr-smp.c @@ -9,10 +9,9 @@ static void __rdmsr_on_cpu(void *info) { struct msr_info *rv = info; struct msr *reg; - int this_cpu = raw_smp_processor_id(); if (rv->msrs) - reg = per_cpu_ptr(rv->msrs, this_cpu); + reg = this_cpu_ptr(rv->msrs); else reg = &rv->reg; @@ -23,10 +22,9 @@ static void __wrmsr_on_cpu(void *info) { struct msr_info *rv = info; struct msr *reg; - int this_cpu = raw_smp_processor_id(); if (rv->msrs) - reg = per_cpu_ptr(rv->msrs, this_cpu); + reg = this_cpu_ptr(rv->msrs); else reg = &rv->reg; @@ -97,7 +95,7 @@ int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q) EXPORT_SYMBOL(wrmsrl_on_cpu); static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, - struct msr *msrs, + struct msr __percpu *msrs, void (*msr_func) (void *info)) { struct msr_info rv; @@ -124,7 +122,7 @@ static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, * @msrs: array of MSR values * */ -void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) +void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs) { __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu); } @@ -138,7 +136,7 @@ EXPORT_SYMBOL(rdmsr_on_cpus); * @msrs: array of MSR values * */ -void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) +void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs) { __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu); } diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 47fd9bd6b91d..4bf4fad5b148 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -6,9 +6,9 @@ #define CREATE_TRACE_POINTS #include -struct msr *msrs_alloc(void) +struct msr __percpu *msrs_alloc(void) { - struct msr *msrs = NULL; + struct msr __percpu *msrs = NULL; msrs = alloc_percpu(struct msr); if (!msrs) { @@ -20,7 +20,7 @@ struct msr *msrs_alloc(void) } EXPORT_SYMBOL(msrs_alloc); -void msrs_free(struct msr *msrs) +void msrs_free(struct msr __percpu *msrs) { free_percpu(msrs); } -- cgit v1.2.3 From 71eb4893cfaf37f8884515c8f71717044b97bf44 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Mar 2024 11:12:23 +0100 Subject: x86/percpu: Cure per CPU madness on UP On UP builds Sparse complains rightfully about accesses to cpu_info with per CPU accessors: cacheinfo.c:282:30: sparse: warning: incorrect type in initializer (different address spaces) cacheinfo.c:282:30: sparse: expected void const [noderef] __percpu *__vpp_verify cacheinfo.c:282:30: sparse: got unsigned int * The reason is that on UP builds cpu_info which is a per CPU variable on SMP is mapped to boot_cpu_info which is a regular variable. There is a hideous accessor cpu_data() which tries to hide this, but it's not sufficient as some places require raw accessors and generates worse code than the regular per CPU accessors. Waste sizeof(struct x86_cpuinfo) memory on UP and provide the per CPU cpu_info unconditionally. This requires to update the CPU info on the boot CPU as SMP does. (Ab)use the weakly defined smp_prepare_boot_cpu() function and implement exactly that. This allows to use regular per CPU accessors uncoditionally and paves the way to remove the cpu_data() hackery. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20240304005104.622511517@linutronix.de --- arch/x86/include/asm/processor.h | 5 ----- arch/x86/kernel/cpu/common.c | 3 +++ arch/x86/kernel/setup.c | 10 ++++++++++ arch/x86/kernel/smpboot.c | 4 ---- 4 files changed, 13 insertions(+), 9 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a61f76990b6a..e2262aca6561 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -185,13 +185,8 @@ extern struct cpuinfo_x86 new_cpu_data; extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; -#ifdef CONFIG_SMP DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); #define cpu_data(cpu) per_cpu(cpu_info, cpu) -#else -#define cpu_info boot_cpu_data -#define cpu_data(cpu) boot_cpu_data -#endif extern const struct seq_operations cpuinfo_op; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fbc4e60d027c..6057a9ecac93 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -70,6 +70,9 @@ #include "cpu.h" +DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); +EXPORT_PER_CPU_SYMBOL(cpu_info); + u32 elf_hwcap2 __read_mostly; /* Number of siblings per CPU package */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 84201071dfac..8f669d3be445 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1211,6 +1211,16 @@ void __init i386_reserve_resources(void) #endif /* CONFIG_X86_32 */ +#ifndef CONFIG_SMP +void __init smp_prepare_boot_cpu(void) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + + *c = boot_cpu_data; + c->initialized = true; +} +#endif + static struct notifier_block kernel_offset_notifier = { .notifier_call = dump_kernel_offset }; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 980782bd2c97..37ea8c872e4a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -101,10 +101,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map); DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); EXPORT_PER_CPU_SYMBOL(cpu_die_map); -/* Per CPU bogomips and other parameters */ -DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); -EXPORT_PER_CPU_SYMBOL(cpu_info); - /* CPUs which are the primary SMT threads */ struct cpumask __cpu_primary_thread_mask __read_mostly; -- cgit v1.2.3 From 35ce64922c8263448e58a2b9e8d15a64e11e9b2d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 28 Feb 2024 23:20:32 +0100 Subject: x86/idle: Select idle routine only once The idle routine selection is done on every CPU bringup operation and has a guard in place which is effective after the first invocation, which is a pointless exercise. Invoke it once on the boot CPU and mark the related functions __init. The guard check has to stay as xen_set_default_idle() runs early. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/87edcu6vaq.ffs@tglx --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/process.c | 8 +++++--- 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1188e8bf76a2..523c466c2fc9 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -558,7 +558,7 @@ static inline void load_sp0(unsigned long sp0) unsigned long __get_wchan(struct task_struct *p); -extern void select_idle_routine(const struct cpuinfo_x86 *c); +extern void select_idle_routine(void); extern void amd_e400_c1e_apic_setup(void); extern unsigned long boot_option_idle_override; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8f367d376520..5c72af16dd06 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1938,8 +1938,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) /* Init Machine Check Exception if available. */ mcheck_cpu_init(c); - select_idle_routine(c); - #ifdef CONFIG_NUMA numa_add_cpu(smp_processor_id()); #endif @@ -2344,6 +2342,8 @@ void __init arch_cpu_finalize_init(void) { identify_boot_cpu(); + select_idle_routine(); + /* * identify_boot_cpu() initialized SMT support information, let the * core code know. diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ccaacc7f9681..f0166b31a803 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -853,8 +853,9 @@ void __noreturn stop_this_cpu(void *dummy) * Do not prefer MWAIT if MONITOR instruction has a bug or idle=nomwait * is passed to kernel commandline parameter. */ -static bool prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) +static __init bool prefer_mwait_c1_over_halt(void) { + const struct cpuinfo_x86 *c = &boot_cpu_data; u32 eax, ebx, ecx, edx; /* If override is enforced on the command line, fall back to HALT. */ @@ -908,7 +909,7 @@ static __cpuidle void mwait_idle(void) __current_clr_polling(); } -void select_idle_routine(const struct cpuinfo_x86 *c) +void __init select_idle_routine(void) { if (boot_option_idle_override == IDLE_POLL) { if (IS_ENABLED(CONFIG_SMP) && smp_num_siblings > 1) @@ -916,10 +917,11 @@ void select_idle_routine(const struct cpuinfo_x86 *c) return; } + /* Required to guard against xen_set_default_idle() */ if (x86_idle_set()) return; - if (prefer_mwait_c1_over_halt(c)) { + if (prefer_mwait_c1_over_halt()) { pr_info("using mwait in idle threads\n"); static_call_update(x86_idle, mwait_idle); } else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) { -- cgit v1.2.3 From c416b5bac6ad6ffe21e36225553b82ff2ec1558c Mon Sep 17 00:00:00 2001 From: "Xin Li (Intel)" Date: Mon, 4 Mar 2024 00:33:33 -0800 Subject: x86/fred: Fix init_task thread stack pointer initialization As TOP_OF_KERNEL_STACK_PADDING was defined as 0 on x86_64, it went unnoticed that the initialization of the .sp field in INIT_THREAD and some calculations in the low level startup code do not take the padding into account. FRED enabled kernels require a 16 byte padding, which means that the init task initialization and the low level startup code use the wrong stack offset. Subtract TOP_OF_KERNEL_STACK_PADDING in all affected places to adjust for this. Fixes: 65c9cc9e2c14 ("x86/fred: Reserve space for the FRED stack frame") Fixes: 3adee777ad0d ("x86/smpboot: Remove initial_stack on 64-bit") Reported-by: kernel test robot Signed-off-by: Xin Li (Intel) Signed-off-by: Thomas Gleixner Closes: https://lore.kernel.org/oe-lkp/202402262159.183c2a37-lkp@intel.com Link: https://lore.kernel.org/r/20240304083333.449322-1-xin@zytor.com --- arch/x86/include/asm/processor.h | 6 ++++-- arch/x86/kernel/head_64.S | 3 ++- arch/x86/xen/xen-head.S | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 26620d7642a9..17fe81998ce4 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -664,8 +664,10 @@ static __always_inline void prefetchw(const void *x) #else extern unsigned long __end_init_task[]; -#define INIT_THREAD { \ - .sp = (unsigned long)&__end_init_task - sizeof(struct pt_regs), \ +#define INIT_THREAD { \ + .sp = (unsigned long)&__end_init_task - \ + TOP_OF_KERNEL_STACK_PADDING - \ + sizeof(struct pt_regs), \ } extern unsigned long KSTK_ESP(struct task_struct *task); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index d4918d03efb4..c38e43589046 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -26,6 +26,7 @@ #include #include #include +#include /* * We are not able to switch in one step to the final KERNEL ADDRESS SPACE @@ -66,7 +67,7 @@ SYM_CODE_START_NOALIGN(startup_64) mov %rsi, %r15 /* Set up the stack for verify_cpu() */ - leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp + leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp leaq _text(%rip), %rdi diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index a0ea285878db..04101b984f24 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -49,7 +49,7 @@ SYM_CODE_START(startup_xen) ANNOTATE_NOENDBR cld - leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp + leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp /* Set up %gs. * -- cgit v1.2.3