From 0167d7d8b0beb4cf12076b47e4dc73897ae5acb0 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Tue, 12 Nov 2013 15:08:02 -0800 Subject: x86/mm: factor out of top-down direct mapping setup Create a new function memory_map_top_down to factor out of the top-down direct memory mapping pagetable setup. This is also a preparation for the following patch, which will introduce the bottom-up memory mapping. That said, we will put the two ways of pagetable setup into separate functions, and choose to use which way in init_mem_mapping, which makes the code more clear. Signed-off-by: Tang Chen Signed-off-by: Zhang Yanfei Acked-by: Tejun Heo Acked-by: Toshi Kani Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Wanpeng Li Cc: Thomas Renninger Cc: Yinghai Lu Cc: Jiang Liu Cc: Wen Congyang Cc: Lai Jiangshan Cc: Yasuaki Ishimatsu Cc: Taku Izumi Cc: Mel Gorman Cc: Michal Nazarewicz Cc: Minchan Kim Cc: Rik van Riel Cc: Johannes Weiner Cc: Kamezawa Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init.c | 59 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 20 deletions(-) (limited to 'arch/x86/mm/init.c') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index ce32017c5e38..742d6d4ad9eb 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -418,27 +418,27 @@ static unsigned long __init get_new_step_size(unsigned long step_size) return step_size << 5; } -void __init init_mem_mapping(void) +/** + * memory_map_top_down - Map [map_start, map_end) top down + * @map_start: start address of the target memory range + * @map_end: end address of the target memory range + * + * This function will setup direct mapping for memory range + * [map_start, map_end) in top-down. That said, the page tables + * will be allocated at the end of the memory, and we map the + * memory in top-down. + */ +static void __init memory_map_top_down(unsigned long map_start, + unsigned long map_end) { - unsigned long end, real_end, start, last_start; + unsigned long real_end, start, last_start; unsigned long step_size; unsigned long addr; unsigned long mapped_ram_size = 0; unsigned long new_mapped_ram_size; - probe_page_size_mask(); - -#ifdef CONFIG_X86_64 - end = max_pfn << PAGE_SHIFT; -#else - end = max_low_pfn << PAGE_SHIFT; -#endif - - /* the ISA range is always mapped regardless of memory holes */ - init_memory_mapping(0, ISA_END_ADDRESS); - /* xen has big range in reserved near end of ram, skip it at first.*/ - addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE, PMD_SIZE); + addr = memblock_find_in_range(map_start, map_end, PMD_SIZE, PMD_SIZE); real_end = addr + PMD_SIZE; /* step_size need to be small so pgt_buf from BRK could cover it */ @@ -453,13 +453,13 @@ void __init init_mem_mapping(void) * end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages * for page table. */ - while (last_start > ISA_END_ADDRESS) { + while (last_start > map_start) { if (last_start > step_size) { start = round_down(last_start - 1, step_size); - if (start < ISA_END_ADDRESS) - start = ISA_END_ADDRESS; + if (start < map_start) + start = map_start; } else - start = ISA_END_ADDRESS; + start = map_start; new_mapped_ram_size = init_range_memory_mapping(start, last_start); last_start = start; @@ -470,8 +470,27 @@ void __init init_mem_mapping(void) mapped_ram_size += new_mapped_ram_size; } - if (real_end < end) - init_range_memory_mapping(real_end, end); + if (real_end < map_end) + init_range_memory_mapping(real_end, map_end); +} + +void __init init_mem_mapping(void) +{ + unsigned long end; + + probe_page_size_mask(); + +#ifdef CONFIG_X86_64 + end = max_pfn << PAGE_SHIFT; +#else + end = max_low_pfn << PAGE_SHIFT; +#endif + + /* the ISA range is always mapped regardless of memory holes */ + init_memory_mapping(0, ISA_END_ADDRESS); + + /* setup direct mapping for range [ISA_END_ADDRESS, end) in top-down*/ + memory_map_top_down(ISA_END_ADDRESS, end); #ifdef CONFIG_X86_64 if (max_pfn > max_low_pfn) { -- cgit v1.2.3 From b959ed6c73845aebf51afb8f76bb74b9388344d2 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Tue, 12 Nov 2013 15:08:05 -0800 Subject: x86/mem-hotplug: support initialize page tables in bottom-up The Linux kernel cannot migrate pages used by the kernel. As a result, kernel pages cannot be hot-removed. So we cannot allocate hotpluggable memory for the kernel. In a memory hotplug system, any numa node the kernel resides in should be unhotpluggable. And for a modern server, each node could have at least 16GB memory. So memory around the kernel image is highly likely unhotpluggable. ACPI SRAT (System Resource Affinity Table) contains the memory hotplug info. But before SRAT is parsed, memblock has already started to allocate memory for the kernel. So we need to prevent memblock from doing this. So direct memory mapping page tables setup is the case. init_mem_mapping() is called before SRAT is parsed. To prevent page tables being allocated within hotpluggable memory, we will use bottom-up direction to allocate page tables from the end of kernel image to the higher memory. Note: As for allocating page tables in lower memory, TJ said: : This is an optional behavior which is triggered by a very specific kernel : boot param, which I suspect is gonna need to stick around to support : memory hotplug in the current setup unless we add another layer of address : translation to support memory hotplug. As for page tables may occupy too much lower memory if using 4K mapping (CONFIG_DEBUG_PAGEALLOC and CONFIG_KMEMCHECK both disable using >4k pages), TJ said: : But as I said in the same paragraph, parsing SRAT earlier doesn't solve : the problem in itself either. Ignoring the option if 4k mapping is : required and memory consumption would be prohibitive should work, no? : Something like that would be necessary if we're gonna worry about cases : like this no matter how we implement it, but, frankly, I'm not sure this : is something worth worrying about. Signed-off-by: Tang Chen Signed-off-by: Zhang Yanfei Acked-by: Tejun Heo Acked-by: Toshi Kani Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Wanpeng Li Cc: Thomas Renninger Cc: Yinghai Lu Cc: Jiang Liu Cc: Wen Congyang Cc: Lai Jiangshan Cc: Yasuaki Ishimatsu Cc: Taku Izumi Cc: Mel Gorman Cc: Michal Nazarewicz Cc: Minchan Kim Cc: Rik van Riel Cc: Johannes Weiner Cc: Kamezawa Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 2 deletions(-) (limited to 'arch/x86/mm/init.c') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 742d6d4ad9eb..91b522072a4d 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -474,6 +474,51 @@ static void __init memory_map_top_down(unsigned long map_start, init_range_memory_mapping(real_end, map_end); } +/** + * memory_map_bottom_up - Map [map_start, map_end) bottom up + * @map_start: start address of the target memory range + * @map_end: end address of the target memory range + * + * This function will setup direct mapping for memory range + * [map_start, map_end) in bottom-up. Since we have limited the + * bottom-up allocation above the kernel, the page tables will + * be allocated just above the kernel and we map the memory + * in [map_start, map_end) in bottom-up. + */ +static void __init memory_map_bottom_up(unsigned long map_start, + unsigned long map_end) +{ + unsigned long next, new_mapped_ram_size, start; + unsigned long mapped_ram_size = 0; + /* step_size need to be small so pgt_buf from BRK could cover it */ + unsigned long step_size = PMD_SIZE; + + start = map_start; + min_pfn_mapped = start >> PAGE_SHIFT; + + /* + * We start from the bottom (@map_start) and go to the top (@map_end). + * The memblock_find_in_range() gets us a block of RAM from the + * end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages + * for page table. + */ + while (start < map_end) { + if (map_end - start > step_size) { + next = round_up(start + 1, step_size); + if (next > map_end) + next = map_end; + } else + next = map_end; + + new_mapped_ram_size = init_range_memory_mapping(start, next); + start = next; + + if (new_mapped_ram_size > mapped_ram_size) + step_size = get_new_step_size(step_size); + mapped_ram_size += new_mapped_ram_size; + } +} + void __init init_mem_mapping(void) { unsigned long end; @@ -489,8 +534,25 @@ void __init init_mem_mapping(void) /* the ISA range is always mapped regardless of memory holes */ init_memory_mapping(0, ISA_END_ADDRESS); - /* setup direct mapping for range [ISA_END_ADDRESS, end) in top-down*/ - memory_map_top_down(ISA_END_ADDRESS, end); + /* + * If the allocation is in bottom-up direction, we setup direct mapping + * in bottom-up, otherwise we setup direct mapping in top-down. + */ + if (memblock_bottom_up()) { + unsigned long kernel_end = __pa_symbol(_end); + + /* + * we need two separate calls here. This is because we want to + * allocate page tables above the kernel. So we first map + * [kernel_end, end) to make memory above the kernel be mapped + * as soon as possible. And then use page tables allocated above + * the kernel to map [ISA_END_ADDRESS, kernel_end). + */ + memory_map_bottom_up(kernel_end, end); + memory_map_bottom_up(ISA_END_ADDRESS, kernel_end); + } else { + memory_map_top_down(ISA_END_ADDRESS, end); + } #ifdef CONFIG_X86_64 if (max_pfn > max_low_pfn) { -- cgit v1.2.3 From d4dd100f2ebb2bc9aca5378ad3cb333b6117069c Mon Sep 17 00:00:00 2001 From: Zhi Yong Wu Date: Tue, 12 Nov 2013 15:08:28 -0800 Subject: arch/x86/mm/init.c: fix incorrect function name in alloc_low_pages() Signed-off-by: Zhi Yong Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/mm/init.c') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 91b522072a4d..f97130618113 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -53,12 +53,12 @@ __ref void *alloc_low_pages(unsigned int num) if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) { unsigned long ret; if (min_pfn_mapped >= max_pfn_mapped) - panic("alloc_low_page: ran out of memory"); + panic("alloc_low_pages: ran out of memory"); ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT, max_pfn_mapped << PAGE_SHIFT, PAGE_SIZE * num , PAGE_SIZE); if (!ret) - panic("alloc_low_page: can not alloc memory"); + panic("alloc_low_pages: can not alloc memory"); memblock_reserve(ret, PAGE_SIZE * num); pfn = ret >> PAGE_SHIFT; } else { -- cgit v1.2.3