From 0bdf525f04afd3a32c14e5a8778771f9c9e0f074 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 16 Nov 2012 13:53:51 -0800 Subject: x86: Improve __phys_addr performance by making use of carry flags and inlining This patch is meant to improve overall system performance when making use of the __phys_addr call. To do this I have implemented several changes. First if CONFIG_DEBUG_VIRTUAL is not defined __phys_addr is made an inline, similar to how this is currently handled in 32 bit. However in order to do this it is required to export phys_base so that it is available if __phys_addr is used in kernel modules. The second change was to streamline the code by making use of the carry flag on an add operation instead of performing a compare on a 64 bit value. The advantage to this is that it allows us to significantly reduce the overall size of the call. On my Xeon E5 system the entire __phys_addr inline call consumes a little less than 32 bytes and 5 instructions. I also applied similar logic to the debug version of the function. My testing shows that the debug version of the function with this patch applied is slightly faster than the non-debug version without the patch. Finally I also applied the same logic changes to __virt_addr_valid since it used the same general code flow as __phys_addr and could achieve similar gains though these changes. Signed-off-by: Alexander Duyck Link: http://lkml.kernel.org/r/20121116215315.8521.46270.stgit@ahduyck-cp1.jf.intel.com Signed-off-by: H. Peter Anvin --- arch/x86/mm/physaddr.c | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) (limited to 'arch/x86/mm/physaddr.c') diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c index d2e2735327b4..fd40d75109ef 100644 --- a/arch/x86/mm/physaddr.c +++ b/arch/x86/mm/physaddr.c @@ -8,33 +8,43 @@ #ifdef CONFIG_X86_64 +#ifdef CONFIG_DEBUG_VIRTUAL unsigned long __phys_addr(unsigned long x) { - if (x >= __START_KERNEL_map) { - x -= __START_KERNEL_map; - VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE); - x += phys_base; + unsigned long y = x - __START_KERNEL_map; + + /* use the carry flag to determine if x was < __START_KERNEL_map */ + if (unlikely(x > y)) { + x = y + phys_base; + + VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE); } else { - VIRTUAL_BUG_ON(x < PAGE_OFFSET); - x -= PAGE_OFFSET; - VIRTUAL_BUG_ON(!phys_addr_valid(x)); + x = y + (__START_KERNEL_map - PAGE_OFFSET); + + /* carry flag will be set if starting x was >= PAGE_OFFSET */ + VIRTUAL_BUG_ON((x > y) || !phys_addr_valid(x)); } + return x; } EXPORT_SYMBOL(__phys_addr); +#endif bool __virt_addr_valid(unsigned long x) { - if (x >= __START_KERNEL_map) { - x -= __START_KERNEL_map; - if (x >= KERNEL_IMAGE_SIZE) + unsigned long y = x - __START_KERNEL_map; + + /* use the carry flag to determine if x was < __START_KERNEL_map */ + if (unlikely(x > y)) { + x = y + phys_base; + + if (y >= KERNEL_IMAGE_SIZE) return false; - x += phys_base; } else { - if (x < PAGE_OFFSET) - return false; - x -= PAGE_OFFSET; - if (!phys_addr_valid(x)) + x = y + (__START_KERNEL_map - PAGE_OFFSET); + + /* carry flag will be set if starting x was >= PAGE_OFFSET */ + if ((x > y) || !phys_addr_valid(x)) return false; } -- cgit v1.2.3 From 7d74275d39def4d3ccc8cf4725388bf79ef13861 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 16 Nov 2012 13:55:46 -0800 Subject: x86: Make it so that __pa_symbol can only process kernel symbols on x86_64 I submitted an earlier patch that make __phys_addr an inline. This obviously results in an increase in the code size. One step I can take to reduce that is to make it so that the __pa_symbol call does a direct translation for kernel addresses instead of covering all of virtual memory. On my system this reduced the size for __pa_symbol from 5 instructions totalling 30 bytes to 3 instructions totalling 16 bytes. Signed-off-by: Alexander Duyck Link: http://lkml.kernel.org/r/20121116215356.8521.92472.stgit@ahduyck-cp1.jf.intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/page.h | 3 ++- arch/x86/include/asm/page_32.h | 1 + arch/x86/include/asm/page_64.h | 3 +++ arch/x86/mm/physaddr.c | 11 +++++++++++ 4 files changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86/mm/physaddr.c') diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 8ca82839288a..3698a6a0a940 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -44,7 +44,8 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, * case properly. Once all supported versions of gcc understand it, we can * remove this Voodoo magic stuff. (i.e. once gcc3.x is deprecated) */ -#define __pa_symbol(x) __pa(__phys_reloc_hide((unsigned long)(x))) +#define __pa_symbol(x) \ + __phys_addr_symbol(__phys_reloc_hide((unsigned long)(x))) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h index da4e762406f7..4d550d04b609 100644 --- a/arch/x86/include/asm/page_32.h +++ b/arch/x86/include/asm/page_32.h @@ -15,6 +15,7 @@ extern unsigned long __phys_addr(unsigned long); #else #define __phys_addr(x) __phys_addr_nodebug(x) #endif +#define __phys_addr_symbol(x) __phys_addr(x) #define __phys_reloc_hide(x) RELOC_HIDE((x), 0) #ifdef CONFIG_FLATMEM diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 5138174c2101..0f1ddee6a0ce 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -21,8 +21,11 @@ static inline unsigned long __phys_addr_nodebug(unsigned long x) #ifdef CONFIG_DEBUG_VIRTUAL extern unsigned long __phys_addr(unsigned long); +extern unsigned long __phys_addr_symbol(unsigned long); #else #define __phys_addr(x) __phys_addr_nodebug(x) +#define __phys_addr_symbol(x) \ + ((unsigned long)(x) - __START_KERNEL_map + phys_base) #endif #define __phys_reloc_hide(x) (x) diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c index fd40d75109ef..c73feddd518d 100644 --- a/arch/x86/mm/physaddr.c +++ b/arch/x86/mm/physaddr.c @@ -28,6 +28,17 @@ unsigned long __phys_addr(unsigned long x) return x; } EXPORT_SYMBOL(__phys_addr); + +unsigned long __phys_addr_symbol(unsigned long x) +{ + unsigned long y = x - __START_KERNEL_map; + + /* only check upper bounds since lower bounds will trigger carry */ + VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE); + + return y + phys_base; +} +EXPORT_SYMBOL(__phys_addr_symbol); #endif bool __virt_addr_valid(unsigned long x) -- cgit v1.2.3 From a25b9316841c5afa226f8f70a457861b35276a92 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 22 Jan 2013 13:24:30 -0800 Subject: x86, mm: Make DEBUG_VIRTUAL work earlier in boot The KVM code has some repeated bugs in it around use of __pa() on per-cpu data. Those data are not in an area on which using __pa() is valid. However, they are also called early enough in boot that __vmalloc_start_set is not set, and thus the CONFIG_DEBUG_VIRTUAL debugging does not catch them. This adds a check to also verify __pa() calls against max_low_pfn, which we can use earler in boot than is_vmalloc_addr(). However, if we are super-early in boot, max_low_pfn=0 and this will trip on every call, so also make sure that max_low_pfn is set before we try to use it. With this patch applied, CONFIG_DEBUG_VIRTUAL will actually catch the bug I was chasing (and fix later in this series). I'd love to find a generic way so that any __pa() call on percpu areas could do a BUG_ON(), but there don't appear to be any nice and easy ways to check if an address is a percpu one. Anybody have ideas on a way to do this? Signed-off-by: Dave Hansen Link: http://lkml.kernel.org/r/20130122212430.F46F8159@kernel.stglabs.ibm.com Signed-off-by: H. Peter Anvin --- arch/x86/mm/numa.c | 2 +- arch/x86/mm/pat.c | 4 ++-- arch/x86/mm/physaddr.c | 9 ++++++++- 3 files changed, 11 insertions(+), 4 deletions(-) (limited to 'arch/x86/mm/physaddr.c') diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 2d125be1bae9..76604eb9e4b0 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -219,7 +219,7 @@ static void __init setup_node_data(int nid, u64 start, u64 end) */ nd = alloc_remap(nid, nd_size); if (nd) { - nd_pa = __pa(nd); + nd_pa = __phys_addr_nodebug(nd); remapped = true; } else { nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 0eb572eda406..2610bd93c896 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -560,10 +560,10 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) { unsigned long id_sz; - if (base >= __pa(high_memory)) + if (base > __pa(high_memory-1)) return 0; - id_sz = (__pa(high_memory) < base + size) ? + id_sz = (__pa(high_memory-1) <= base + size) ? __pa(high_memory) - base : size; diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c index c73feddd518d..e666cbbb9261 100644 --- a/arch/x86/mm/physaddr.c +++ b/arch/x86/mm/physaddr.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -68,10 +69,16 @@ EXPORT_SYMBOL(__virt_addr_valid); #ifdef CONFIG_DEBUG_VIRTUAL unsigned long __phys_addr(unsigned long x) { + unsigned long phys_addr = x - PAGE_OFFSET; /* VMALLOC_* aren't constants */ VIRTUAL_BUG_ON(x < PAGE_OFFSET); VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x)); - return x - PAGE_OFFSET; + /* max_low_pfn is set early, but not _that_ early */ + if (max_low_pfn) { + VIRTUAL_BUG_ON((phys_addr >> PAGE_SHIFT) > max_low_pfn); + BUG_ON(slow_virt_to_phys((void *)x) != phys_addr); + } + return phys_addr; } EXPORT_SYMBOL(__phys_addr); #endif -- cgit v1.2.3