From 6677e3eaf4d78abd7b09133414c05dc3ec353e7f Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Fri, 22 Feb 2013 16:32:52 -0800 Subject: memory-hotplug: check whether all memory blocks are offlined or not when removing memory We remove the memory like this: 1. lock memory hotplug 2. offline a memory block 3. unlock memory hotplug 4. repeat 1-3 to offline all memory blocks 5. lock memory hotplug 6. remove memory(TODO) 7. unlock memory hotplug All memory blocks must be offlined before removing memory. But we don't hold the lock in the whole operation. So we should check whether all memory blocks are offlined before step6. Otherwise, kernel maybe panicked. Offlining a memory block and removing a memory device can be two different operations. Users can just offline some memory blocks without removing the memory device. For this purpose, the kernel has held lock_memory_hotplug() in __offline_pages(). To reuse the code for memory hot-remove, we repeat step 1-3 to offline all the memory blocks, repeatedly lock and unlock memory hotplug, but not hold the memory hotplug lock in the whole operation. Signed-off-by: Wen Congyang Signed-off-by: Yasuaki Ishimatsu Signed-off-by: Tang Chen Acked-by: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Jiang Liu Cc: Jianguo Wu Cc: Kamezawa Hiroyuki Cc: Lai Jiangshan Cc: Wu Jianguo Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/memory_hotplug.h') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 4a45c4e50025..8dd0950a6a7a 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -247,6 +247,7 @@ extern int add_memory(int nid, u64 start, u64 size); extern int arch_add_memory(int nid, u64 start, u64 size); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern int offline_memory_block(struct memory_block *mem); +extern bool is_memblock_offlined(struct memory_block *mem); extern int remove_memory(u64 start, u64 size); extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, int nr_pages); -- cgit v1.2.3 From 24d335ca3606b610ec69c66a1e42760c96d89470 Mon Sep 17 00:00:00 2001 From: Wen Congyang Date: Fri, 22 Feb 2013 16:32:58 -0800 Subject: memory-hotplug: introduce new arch_remove_memory() for removing page table For removing memory, we need to remove page tables. But it depends on architecture. So the patch introduce arch_remove_memory() for removing page table. Now it only calls __remove_pages(). Note: __remove_pages() for some archtecuture is not implemented (I don't know how to implement it for s390). Signed-off-by: Wen Congyang Signed-off-by: Tang Chen Acked-by: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Jiang Liu Cc: Jianguo Wu Cc: Kamezawa Hiroyuki Cc: Lai Jiangshan Cc: Wu Jianguo Cc: Yasuaki Ishimatsu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/mm/init.c | 18 ++++++++++++++++++ arch/powerpc/mm/mem.c | 12 ++++++++++++ arch/s390/mm/init.c | 12 ++++++++++++ arch/sh/mm/init.c | 17 +++++++++++++++++ arch/tile/mm/init.c | 8 ++++++++ arch/x86/mm/init_32.c | 12 ++++++++++++ arch/x86/mm/init_64.c | 15 +++++++++++++++ include/linux/memory_hotplug.h | 1 + mm/memory_hotplug.c | 2 ++ 9 files changed, 97 insertions(+) (limited to 'include/linux/memory_hotplug.h') diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index b755ea92aea7..20bc967c7209 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -688,6 +688,24 @@ int arch_add_memory(int nid, u64 start, u64 size) return ret; } + +#ifdef CONFIG_MEMORY_HOTREMOVE +int arch_remove_memory(u64 start, u64 size) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct zone *zone; + int ret; + + zone = page_zone(pfn_to_page(start_pfn)); + ret = __remove_pages(zone, start_pfn, nr_pages); + if (ret) + pr_warn("%s: Problem encountered in __remove_pages() as" + " ret=%d\n", __func__, ret); + + return ret; +} +#endif #endif /* diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 0dba5066c22a..09c64518b4d0 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -133,6 +133,18 @@ int arch_add_memory(int nid, u64 start, u64 size) return __add_pages(nid, zone, start_pfn, nr_pages); } + +#ifdef CONFIG_MEMORY_HOTREMOVE +int arch_remove_memory(u64 start, u64 size) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct zone *zone; + + zone = page_zone(pfn_to_page(start_pfn)); + return __remove_pages(zone, start_pfn, nr_pages); +} +#endif #endif /* CONFIG_MEMORY_HOTPLUG */ /* diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index ae672f41c464..49ce6bb2c641 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -228,4 +228,16 @@ int arch_add_memory(int nid, u64 start, u64 size) vmem_remove_mapping(start, size); return rc; } + +#ifdef CONFIG_MEMORY_HOTREMOVE +int arch_remove_memory(u64 start, u64 size) +{ + /* + * There is no hardware or firmware interface which could trigger a + * hot memory remove on s390. So there is nothing that needs to be + * implemented. + */ + return -EBUSY; +} +#endif #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 82cc576fab15..105794037143 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -558,4 +558,21 @@ int memory_add_physaddr_to_nid(u64 addr) EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif +#ifdef CONFIG_MEMORY_HOTREMOVE +int arch_remove_memory(u64 start, u64 size) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct zone *zone; + int ret; + + zone = page_zone(pfn_to_page(start_pfn)); + ret = __remove_pages(zone, start_pfn, nr_pages); + if (unlikely(ret)) + pr_warn("%s: Failed, __remove_pages() == %d\n", __func__, + ret); + + return ret; +} +#endif #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index ef29d6c5e10e..2749515a0547 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -935,6 +935,14 @@ int remove_memory(u64 start, u64 size) { return -EINVAL; } + +#ifdef CONFIG_MEMORY_HOTREMOVE +int arch_remove_memory(u64 start, u64 size) +{ + /* TODO */ + return -EBUSY; +} +#endif #endif struct kmem_cache *pgd_cache; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index b299724f6e34..2d19001151d5 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -862,6 +862,18 @@ int arch_add_memory(int nid, u64 start, u64 size) return __add_pages(nid, zone, start_pfn, nr_pages); } + +#ifdef CONFIG_MEMORY_HOTREMOVE +int arch_remove_memory(u64 start, u64 size) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct zone *zone; + + zone = page_zone(pfn_to_page(start_pfn)); + return __remove_pages(zone, start_pfn, nr_pages); +} +#endif #endif /* diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3eba7f429880..b6dd1c480b30 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -707,6 +707,21 @@ int arch_add_memory(int nid, u64 start, u64 size) } EXPORT_SYMBOL_GPL(arch_add_memory); +#ifdef CONFIG_MEMORY_HOTREMOVE +int __ref arch_remove_memory(u64 start, u64 size) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct zone *zone; + int ret; + + zone = page_zone(pfn_to_page(start_pfn)); + ret = __remove_pages(zone, start_pfn, nr_pages); + WARN_ON_ONCE(ret); + + return ret; +} +#endif #endif /* CONFIG_MEMORY_HOTPLUG */ static struct kcore_list kcore_vsyscall; diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 8dd0950a6a7a..31a563bbd936 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -96,6 +96,7 @@ extern void __online_page_free(struct page *page); #ifdef CONFIG_MEMORY_HOTREMOVE extern bool is_pageblock_removable_nolock(struct page *page); +extern int arch_remove_memory(u64 start, u64 size); #endif /* CONFIG_MEMORY_HOTREMOVE */ /* reasonably generic interface to expand the physical pages in a zone */ diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a776dbf3fa00..942b43f6d736 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1513,6 +1513,8 @@ repeat: /* remove memmap entry */ firmware_map_remove(start, start + size, "System RAM"); + arch_remove_memory(start, size); + unlock_memory_hotplug(); return 0; -- cgit v1.2.3 From 46723bfa540f0a1e494476a1734d03626a0bd1e0 Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Fri, 22 Feb 2013 16:33:00 -0800 Subject: memory-hotplug: implement register_page_bootmem_info_section of sparse-vmemmap For removing memmap region of sparse-vmemmap which is allocated bootmem, memmap region of sparse-vmemmap needs to be registered by get_page_bootmem(). So the patch searches pages of virtual mapping and registers the pages by get_page_bootmem(). NOTE: register_page_bootmem_memmap() is not implemented for ia64, ppc, s390, and sparc. So introduce CONFIG_HAVE_BOOTMEM_INFO_NODE and revert register_page_bootmem_info_node() when platform doesn't support it. It's implemented by adding a new Kconfig option named CONFIG_HAVE_BOOTMEM_INFO_NODE, which will be automatically selected by memory-hotplug feature fully supported archs(currently only on x86_64). Since we have 2 config options called MEMORY_HOTPLUG and MEMORY_HOTREMOVE used for memory hot-add and hot-remove separately, and codes in function register_page_bootmem_info_node() are only used for collecting infomation for hot-remove, so reside it under MEMORY_HOTREMOVE. Besides page_isolation.c selected by MEMORY_ISOLATION under MEMORY_HOTPLUG is also such case, move it too. [mhocko@suse.cz: put register_page_bootmem_memmap inside CONFIG_MEMORY_HOTPLUG_SPARSE] [linfeng@cn.fujitsu.com: introduce CONFIG_HAVE_BOOTMEM_INFO_NODE and revert register_page_bootmem_info_node()] [mhocko@suse.cz: remove the arch specific functions without any implementation] [linfeng@cn.fujitsu.com: mm/Kconfig: move auto selects from MEMORY_HOTPLUG to MEMORY_HOTREMOVE as needed] [rientjes@google.com: fix defined but not used warning] Signed-off-by: Wen Congyang Signed-off-by: Yasuaki Ishimatsu Signed-off-by: Tang Chen Reviewed-by: Wu Jianguo Cc: KOSAKI Motohiro Cc: Jiang Liu Cc: Jianguo Wu Cc: Kamezawa Hiroyuki Cc: Lai Jiangshan Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Michal Hocko Signed-off-by: Lin Feng Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/mm/discontig.c | 1 + arch/powerpc/mm/init_64.c | 1 + arch/sparc/mm/init_64.c | 1 + arch/x86/mm/init_64.c | 60 ++++++++++++++++++++++++++++++++++++++++++ include/linux/memory_hotplug.h | 13 +++++---- include/linux/mm.h | 3 ++- mm/Kconfig | 10 ++++++- mm/memory_hotplug.c | 35 +++++++++++++++++++++--- 8 files changed, 111 insertions(+), 13 deletions(-) (limited to 'include/linux/memory_hotplug.h') diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index c641333cd997..731bf84094b6 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -822,4 +822,5 @@ int __meminit vmemmap_populate(struct page *start_page, { return vmemmap_populate_basepages(start_page, size, node); } + #endif diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 95a45293e5ac..42bf082f0124 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -297,5 +297,6 @@ int __meminit vmemmap_populate(struct page *start_page, return 0; } + #endif /* CONFIG_SPARSEMEM_VMEMMAP */ diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 5c2c6e61facb..59c6fcfdc782 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2235,6 +2235,7 @@ void __meminit vmemmap_populate_print_last(void) node_start = 0; } } + #endif /* CONFIG_SPARSEMEM_VMEMMAP */ static void prot_init_common(unsigned long page_none, diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index b6dd1c480b30..f17aa76dc1ae 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1034,6 +1034,66 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) return 0; } +#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE) +void register_page_bootmem_memmap(unsigned long section_nr, + struct page *start_page, unsigned long size) +{ + unsigned long addr = (unsigned long)start_page; + unsigned long end = (unsigned long)(start_page + size); + unsigned long next; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + unsigned int nr_pages; + struct page *page; + + for (; addr < end; addr = next) { + pte_t *pte = NULL; + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) { + next = (addr + PAGE_SIZE) & PAGE_MASK; + continue; + } + get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO); + + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) { + next = (addr + PAGE_SIZE) & PAGE_MASK; + continue; + } + get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO); + + if (!cpu_has_pse) { + next = (addr + PAGE_SIZE) & PAGE_MASK; + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + continue; + get_page_bootmem(section_nr, pmd_page(*pmd), + MIX_SECTION_INFO); + + pte = pte_offset_kernel(pmd, addr); + if (pte_none(*pte)) + continue; + get_page_bootmem(section_nr, pte_page(*pte), + SECTION_INFO); + } else { + next = pmd_addr_end(addr, end); + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + continue; + + nr_pages = 1 << (get_order(PMD_SIZE)); + page = pmd_page(*pmd); + while (nr_pages--) + get_page_bootmem(section_nr, page++, + SECTION_INFO); + } + } +} +#endif + void __meminit vmemmap_populate_print_last(void) { if (p_start) { diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 31a563bbd936..4d523fe75ba1 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -174,17 +174,16 @@ static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) #endif /* CONFIG_NUMA */ #endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ -#ifdef CONFIG_SPARSEMEM_VMEMMAP +#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE +extern void register_page_bootmem_info_node(struct pglist_data *pgdat); +#else static inline void register_page_bootmem_info_node(struct pglist_data *pgdat) { } -static inline void put_page_bootmem(struct page *page) -{ -} -#else -extern void register_page_bootmem_info_node(struct pglist_data *pgdat); -extern void put_page_bootmem(struct page *page); #endif +extern void put_page_bootmem(struct page *page); +extern void get_page_bootmem(unsigned long ingo, struct page *page, + unsigned long type); /* * Lock for memory hotplug guarantees 1) all callbacks for memory hotplug diff --git a/include/linux/mm.h b/include/linux/mm.h index 95db68e34b18..060557b9764f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1718,7 +1718,8 @@ int vmemmap_populate_basepages(struct page *start_page, unsigned long pages, int node); int vmemmap_populate(struct page *start_page, unsigned long pages, int node); void vmemmap_populate_print_last(void); - +void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, + unsigned long size); enum mf_flags { MF_COUNT_INCREASED = 1 << 0, diff --git a/mm/Kconfig b/mm/Kconfig index 0b23db9a8791..2c7aea7106f9 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -162,10 +162,16 @@ config MOVABLE_NODE Say Y here if you want to hotplug a whole node. Say N here if you want kernel to use memory on all nodes evenly. +# +# Only be set on architectures that have completely implemented memory hotplug +# feature. If you are not sure, don't touch it. +# +config HAVE_BOOTMEM_INFO_NODE + def_bool n + # eventually, we can have this option just 'select SPARSEMEM' config MEMORY_HOTPLUG bool "Allow for memory hot-add" - select MEMORY_ISOLATION depends on SPARSEMEM || X86_64_ACPI_NUMA depends on HOTPLUG && ARCH_ENABLE_MEMORY_HOTPLUG depends on (IA64 || X86 || PPC_BOOK3S_64 || SUPERH || S390) @@ -176,6 +182,8 @@ config MEMORY_HOTPLUG_SPARSE config MEMORY_HOTREMOVE bool "Allow for memory hot remove" + select MEMORY_ISOLATION + select HAVE_BOOTMEM_INFO_NODE if X86_64 depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE depends on MIGRATION diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 942b43f6d736..6c90d222ec0a 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -91,9 +91,8 @@ static void release_memory_resource(struct resource *res) } #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE -#ifndef CONFIG_SPARSEMEM_VMEMMAP -static void get_page_bootmem(unsigned long info, struct page *page, - unsigned long type) +void get_page_bootmem(unsigned long info, struct page *page, + unsigned long type) { page->lru.next = (struct list_head *) type; SetPagePrivate(page); @@ -128,6 +127,8 @@ void __ref put_page_bootmem(struct page *page) } +#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE +#ifndef CONFIG_SPARSEMEM_VMEMMAP static void register_page_bootmem_info_section(unsigned long start_pfn) { unsigned long *usemap, mapsize, section_nr, i; @@ -161,6 +162,32 @@ static void register_page_bootmem_info_section(unsigned long start_pfn) get_page_bootmem(section_nr, page, MIX_SECTION_INFO); } +#else /* CONFIG_SPARSEMEM_VMEMMAP */ +static void register_page_bootmem_info_section(unsigned long start_pfn) +{ + unsigned long *usemap, mapsize, section_nr, i; + struct mem_section *ms; + struct page *page, *memmap; + + if (!pfn_valid(start_pfn)) + return; + + section_nr = pfn_to_section_nr(start_pfn); + ms = __nr_to_section(section_nr); + + memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); + + register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION); + + usemap = __nr_to_section(section_nr)->pageblock_flags; + page = virt_to_page(usemap); + + mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT; + + for (i = 0; i < mapsize; i++, page++) + get_page_bootmem(section_nr, page, MIX_SECTION_INFO); +} +#endif /* !CONFIG_SPARSEMEM_VMEMMAP */ void register_page_bootmem_info_node(struct pglist_data *pgdat) { @@ -203,7 +230,7 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat) register_page_bootmem_info_section(pfn); } } -#endif /* !CONFIG_SPARSEMEM_VMEMMAP */ +#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */ static void grow_zone_span(struct zone *zone, unsigned long start_pfn, unsigned long end_pfn) -- cgit v1.2.3 From 60a5a19e7419ba0bc22ed01b3285e8940b42944c Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Fri, 22 Feb 2013 16:33:14 -0800 Subject: memory-hotplug: remove sysfs file of node Introduce a new function try_offline_node() to remove sysfs file of node when all memory sections of this node are removed. If some memory sections of this node are not removed, this function does nothing. Signed-off-by: Wen Congyang Signed-off-by: Tang Chen Cc: KOSAKI Motohiro Cc: Jiang Liu Cc: Jianguo Wu Cc: Kamezawa Hiroyuki Cc: Lai Jiangshan Cc: Wu Jianguo Cc: Yasuaki Ishimatsu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/acpi/acpi_memhotplug.c | 8 ++++-- include/linux/memory_hotplug.h | 2 +- mm/memory_hotplug.c | 58 ++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 63 insertions(+), 5 deletions(-) (limited to 'include/linux/memory_hotplug.h') diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 034d3e72aa92..da1f82b445e0 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -280,9 +280,11 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) static int acpi_memory_remove_memory(struct acpi_memory_device *mem_device) { - int result = 0; + int result = 0, nid; struct acpi_memory_info *info, *n; + nid = acpi_get_node(mem_device->device->handle); + list_for_each_entry_safe(info, n, &mem_device->res_list, list) { if (info->failed) /* The kernel does not use this memory block */ @@ -295,7 +297,9 @@ static int acpi_memory_remove_memory(struct acpi_memory_device *mem_device) */ return -EBUSY; - result = remove_memory(info->start_addr, info->length); + if (nid < 0) + nid = memory_add_physaddr_to_nid(info->start_addr); + result = remove_memory(nid, info->start_addr, info->length); if (result) return result; diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 4d523fe75ba1..69903ccf549e 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -248,7 +248,7 @@ extern int arch_add_memory(int nid, u64 start, u64 size); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern int offline_memory_block(struct memory_block *mem); extern bool is_memblock_offlined(struct memory_block *mem); -extern int remove_memory(u64 start, u64 size); +extern int remove_memory(int nid, u64 start, u64 size); extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, int nr_pages); extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 3f792375f326..aea6374f435a 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -1679,7 +1680,58 @@ static int is_memblock_offlined_cb(struct memory_block *mem, void *arg) return ret; } -int __ref remove_memory(u64 start, u64 size) +static int check_cpu_on_node(void *data) +{ + struct pglist_data *pgdat = data; + int cpu; + + for_each_present_cpu(cpu) { + if (cpu_to_node(cpu) == pgdat->node_id) + /* + * the cpu on this node isn't removed, and we can't + * offline this node. + */ + return -EBUSY; + } + + return 0; +} + +/* offline the node if all memory sections of this node are removed */ +static void try_offline_node(int nid) +{ + unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn; + unsigned long end_pfn = start_pfn + NODE_DATA(nid)->node_spanned_pages; + unsigned long pfn; + + for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { + unsigned long section_nr = pfn_to_section_nr(pfn); + + if (!present_section_nr(section_nr)) + continue; + + if (pfn_to_nid(pfn) != nid) + continue; + + /* + * some memory sections of this node are not removed, and we + * can't offline node now. + */ + return; + } + + if (stop_machine(check_cpu_on_node, NODE_DATA(nid), NULL)) + return; + + /* + * all memory/cpu of this node are removed, we can offline this + * node now. + */ + node_set_offline(nid); + unregister_one_node(nid); +} + +int __ref remove_memory(int nid, u64 start, u64 size) { unsigned long start_pfn, end_pfn; int ret = 0; @@ -1734,6 +1786,8 @@ repeat: arch_remove_memory(start, size); + try_offline_node(nid); + unlock_memory_hotplug(); return 0; @@ -1743,7 +1797,7 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages) { return -EINVAL; } -int remove_memory(u64 start, u64 size) +int remove_memory(int nid, u64 start, u64 size) { return -EINVAL; } -- cgit v1.2.3 From 90b30cdc1d87450e2ae89c8f8a29102dc2c1992e Mon Sep 17 00:00:00 2001 From: Wen Congyang Date: Fri, 22 Feb 2013 16:33:27 -0800 Subject: memory-hotplug: export the function try_offline_node() try_offline_node() will be needed in the tristate drivers/acpi/processor_driver.c. The node will be offlined when all memory/cpu on the node have been hotremoved. So we need the function try_offline_node() in cpu-hotplug path. If the memory-hotplug is disabled, and cpu-hotplug is enabled 1. no memory no the node we don't online the node, and cpu's node is the nearest node. 2. the node contains some memory the node has been onlined, and cpu's node is still needed to migrate the sleep task on the cpu to the same node. So we do nothing in try_offline_node() in this case. [rientjes@google.com: export the function try_offline_node() fix] Signed-off-by: Wen Congyang Signed-off-by: Tang Chen Cc: Yasuaki Ishimatsu Cc: David Rientjes Cc: Jiang Liu Cc: Minchan Kim Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Peter Zijlstra Cc: Len Brown Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 3 +++ mm/memory_hotplug.c | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux/memory_hotplug.h') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 69903ccf549e..b6a3be7d47bf 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -233,6 +233,7 @@ static inline void unlock_memory_hotplug(void) {} #ifdef CONFIG_MEMORY_HOTREMOVE extern int is_mem_section_removable(unsigned long pfn, unsigned long nr_pages); +extern void try_offline_node(int nid); #else static inline int is_mem_section_removable(unsigned long pfn, @@ -240,6 +241,8 @@ static inline int is_mem_section_removable(unsigned long pfn, { return 0; } + +static inline void try_offline_node(int nid) {} #endif /* CONFIG_MEMORY_HOTREMOVE */ extern int mem_online_node(int nid); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e189b1f4a9db..17e1447077ab 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1706,7 +1706,7 @@ static int check_cpu_on_node(void *data) } /* offline the node if all memory sections of this node are removed */ -static void try_offline_node(int nid) +void try_offline_node(int nid) { pg_data_t *pgdat = NODE_DATA(nid); unsigned long start_pfn = pgdat->node_start_pfn; @@ -1762,6 +1762,7 @@ static void try_offline_node(int nid) */ memset(pgdat, 0, sizeof(*pgdat)); } +EXPORT_SYMBOL(try_offline_node); int __ref remove_memory(int nid, u64 start, u64 size) { -- cgit v1.2.3