From 60f6e65d7887c257392313755f95540ef5e7ea89 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 17 Jan 2011 10:52:02 +0800 Subject: x86: Cleanup vector usage Cleanup the vector usage and make them continuous if possible. Signed-off-by: Shaohua Li Cc: Andi Kleen Cc: Eric Dumazet LKML-Reference: <1295232722.1949.707.camel@sli10-conroe> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 40 ++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'arch/x86/include/asm/irq_vectors.h') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 6af0894dafb4..42f0d4a30f1b 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -1,6 +1,7 @@ #ifndef _ASM_X86_IRQ_VECTORS_H #define _ASM_X86_IRQ_VECTORS_H +#include /* * Linux IRQ vector layout. * @@ -16,8 +17,8 @@ * Vectors 0 ... 31 : system traps and exceptions - hardcoded events * Vectors 32 ... 127 : device interrupts * Vector 128 : legacy int80 syscall interface - * Vectors 129 ... 237 : device interrupts - * Vectors 238 ... 255 : special interrupts + * Vectors 129 ... 229 : device interrupts + * Vectors 230 ... 255 : special interrupts * * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. * @@ -96,37 +97,38 @@ #define THRESHOLD_APIC_VECTOR 0xf9 #define REBOOT_VECTOR 0xf8 -/* f0-f7 used for spreading out TLB flushes: */ -#define INVALIDATE_TLB_VECTOR_END 0xf7 -#define INVALIDATE_TLB_VECTOR_START 0xf0 -#define NUM_INVALIDATE_TLB_VECTORS 8 - -/* - * Local APIC timer IRQ vector is on a different priority level, - * to work around the 'lost local interrupt if more than 2 IRQ - * sources per level' errata. - */ -#define LOCAL_TIMER_VECTOR 0xef - /* * Generic system vector for platform specific use */ -#define X86_PLATFORM_IPI_VECTOR 0xed +#define X86_PLATFORM_IPI_VECTOR 0xf7 /* * IRQ work vector: */ -#define IRQ_WORK_VECTOR 0xec +#define IRQ_WORK_VECTOR 0xf6 -#define UV_BAU_MESSAGE 0xea +#define UV_BAU_MESSAGE 0xf5 /* * Self IPI vector for machine checks */ -#define MCE_SELF_VECTOR 0xeb +#define MCE_SELF_VECTOR 0xf4 /* Xen vector callback to receive events in a HVM domain */ -#define XEN_HVM_EVTCHN_CALLBACK 0xe9 +#define XEN_HVM_EVTCHN_CALLBACK 0xf3 + +/* + * Local APIC timer IRQ vector is on a different priority level, + * to work around the 'lost local interrupt if more than 2 IRQ + * sources per level' errata. + */ +#define LOCAL_TIMER_VECTOR 0xef + +/* f0-f7 used for spreading out TLB flushes: */ +#define NUM_INVALIDATE_TLB_VECTORS 8 +#define INVALIDATE_TLB_VECTOR_END 0xee +#define INVALIDATE_TLB_VECTOR_START \ + (INVALIDATE_TLB_VECTOR_END - NUM_INVALIDATE_TLB_VECTORS + 1) #define NR_VECTORS 256 -- cgit v1.2.3 From 70e4a369733a21e3d16b059a6ccdad22a344bf57 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 17 Jan 2011 10:52:07 +0800 Subject: x86: Scale up the number of TLB invalidate vectors with NR_CPUs, up to 32 Make the maxium TLB invalidate vectors depend on NR_CPUS linearly, with a maximum of 32 vectors. We currently only have 8 vectors for TLB invalidation and that is clearly inadequate. If we have a lot of CPUs, the CPUs need share the 8 vectors and tlbstate_lock is used to protect them. flush_tlb_page() is heavily used in page reclaim, which will cause a lot of lock contention for tlbstate_lock. Andi Kleen suggested increasing the vectors number to 32, which should be good for current typical systems to reduce the tlbstate_lock contention. My test system has 4 sockets and 64G memory, and 64 CPUs. My workload creates 64 processes. Each process mmap reads a big empty sparse file. The total size of the files are 2*total_mem, so this will cause a lot of page reclaim. Below is the result I get from perf call-graph profiling: without the patch: ------------------ 24.25% usemem [kernel] [k] _raw_spin_lock | --- _raw_spin_lock | |--42.15%-- native_flush_tlb_others with the patch: ------------------ 14.96% usemem [kernel] [k] _raw_spin_lock | --- _raw_spin_lock |--13.89%-- native_flush_tlb_others So this heavily reduces the tlbstate_lock contention. Suggested-by: Andi Kleen Signed-off-by: Shaohua Li Cc: Eric Dumazet Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra LKML-Reference: <1295232727.1949.709.camel@sli10-conroe> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/x86/include/asm/irq_vectors.h') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 42f0d4a30f1b..4980f48bbbb7 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -17,8 +17,8 @@ * Vectors 0 ... 31 : system traps and exceptions - hardcoded events * Vectors 32 ... 127 : device interrupts * Vector 128 : legacy int80 syscall interface - * Vectors 129 ... 229 : device interrupts - * Vectors 230 ... 255 : special interrupts + * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 : device interrupts + * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts * * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. * @@ -124,8 +124,13 @@ */ #define LOCAL_TIMER_VECTOR 0xef -/* f0-f7 used for spreading out TLB flushes: */ -#define NUM_INVALIDATE_TLB_VECTORS 8 +/* up to 32 vectors used for spreading out TLB flushes: */ +#if NR_CPUS <= 32 +# define NUM_INVALIDATE_TLB_VECTORS NR_CPUS +#else +# define NUM_INVALIDATE_TLB_VECTORS 32 +#endif + #define INVALIDATE_TLB_VECTOR_END 0xee #define INVALIDATE_TLB_VECTOR_START \ (INVALIDATE_TLB_VECTOR_END - NUM_INVALIDATE_TLB_VECTORS + 1) -- cgit v1.2.3 From d04c579f971bf7d995db1ef7a7161c0143068859 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 3 Mar 2011 10:55:29 +0000 Subject: x86: Work around old gas bug Add extra parentheses around a couple of definitions introduced by "x86: Cleanup vector usage" and used in assembly macro arguments, and remove spaces. Without that old (2.16.1) gas would see more macro arguments than were actually specified. Reported-and-tested-by: Andrew Morton Signed-off-by: Jan Beulich Cc: Shaohua Li LKML-Reference: <4D6F81B10200007800034B0B@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/include/asm/irq_vectors.h') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 4980f48bbbb7..6e976ee3b3ef 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -126,14 +126,14 @@ /* up to 32 vectors used for spreading out TLB flushes: */ #if NR_CPUS <= 32 -# define NUM_INVALIDATE_TLB_VECTORS NR_CPUS +# define NUM_INVALIDATE_TLB_VECTORS (NR_CPUS) #else -# define NUM_INVALIDATE_TLB_VECTORS 32 +# define NUM_INVALIDATE_TLB_VECTORS (32) #endif -#define INVALIDATE_TLB_VECTOR_END 0xee +#define INVALIDATE_TLB_VECTOR_END (0xee) #define INVALIDATE_TLB_VECTOR_START \ - (INVALIDATE_TLB_VECTOR_END - NUM_INVALIDATE_TLB_VECTORS + 1) + (INVALIDATE_TLB_VECTOR_END-NUM_INVALIDATE_TLB_VECTORS+1) #define NR_VECTORS 256 -- cgit v1.2.3