From 7cb93181629c613ee2b8f4ffe3446f8003074842 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 30 Jul 2008 02:18:26 +0900 Subject: mm/hugetlb.c must #include This patch fixes the following build error on sh caused by commit aa888a74977a8f2120ae9332376e179c39a6b07d (hugetlb: support larger than MAX_ORDER): <-- snip --> ... CC mm/hugetlb.o /home/bunk/linux/kernel-2.6/git/linux-2.6/mm/hugetlb.c: In function 'alloc_bootmem_huge_page': /home/bunk/linux/kernel-2.6/git/linux-2.6/mm/hugetlb.c:958: error: implicit declaration of function 'virt_to_phys' make[2]: *** [mm/hugetlb.o] Error 1 <-- snip --> Reported-by: Adrian Bunk Signed-off-by: Adrian Bunk Signed-off-by: Paul Mundt --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3be79dc18c5c..b3c78640b629 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -16,7 +16,7 @@ #include #include #include - +#include #include #include -- cgit v1.2.3 From 0ef89d25d3e390dfa7c46772907951744a4067dc Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 31 Jul 2008 00:07:30 -0700 Subject: mm/hugetlb: don't crash when HPAGE_SHIFT is 0 Some platform decide whether they support huge pages at boot time. On these, such as powerpc, HPAGE_SHIFT is a variable, not a constant, and is set to 0 when there is no such support. The patches to introduce multiple huge pages support broke that causing the kernel to crash at boot time on machines such as POWER3 which lack support for multiple page sizes. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d237a02eb228..28a2980ee435 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1283,7 +1283,12 @@ module_exit(hugetlb_exit); static int __init hugetlb_init(void) { - BUILD_BUG_ON(HPAGE_SHIFT == 0); + /* Some platform decide whether they support huge pages at boot + * time. On these, such as powerpc, HPAGE_SHIFT is set to 0 when + * there is no such support + */ + if (HPAGE_SHIFT == 0) + return 0; if (!size_to_hstate(default_hstate_size)) { default_hstate_size = HPAGE_SIZE; -- cgit v1.2.3 From d6606683a5e3dac35cb979c7195f54ed827567bd Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 6 Aug 2008 12:04:54 -0700 Subject: Revert duplicate "mm/hugetlb.c must #include " This reverts commit 7cb93181629c613ee2b8f4ffe3446f8003074842, since we did that patch twice, and the problem was already fixed earlier by 78a34ae29bf1c9df62a5bd0f0798b6c62a54d520. Reported-by: Andi Kleen Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 28a2980ee435..757ca983fd99 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -17,7 +17,7 @@ #include #include #include -#include + #include #include #include -- cgit v1.2.3 From caff3a2c333e11a794308bd9a875a09b94fee24a Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Tue, 12 Aug 2008 15:08:38 -0700 Subject: hugetlb: call arch_prepare_hugepage() for surplus pages The s390 software large page emulation implements shared page tables by using page->index of the first tail page from a compound large page to store page table information. This is set up in arch_prepare_hugepage(), which is called from alloc_fresh_huge_page_node(). A similar call to arch_prepare_hugepage() is missing for surplus large pages that are allocated in alloc_buddy_huge_page(), which breaks the software emulation mode for (surplus) large pages on s390. This patch adds the missing call to arch_prepare_hugepage(). It will have no effect on other architectures where arch_prepare_hugepage() is a nop. Also, use the correct order in the error path in alloc_fresh_huge_page_node(). Acked-by: Martin Schwidefsky Signed-off-by: Gerald Schaefer Acked-by: Nick Piggin Acked-by: Adam Litke Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 757ca983fd99..92155db888b9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -565,7 +565,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) huge_page_order(h)); if (page) { if (arch_prepare_hugepage(page)) { - __free_pages(page, HUGETLB_PAGE_ORDER); + __free_pages(page, huge_page_order(h)); return NULL; } prep_new_huge_page(h, page, nid); @@ -665,6 +665,11 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, __GFP_REPEAT|__GFP_NOWARN, huge_page_order(h)); + if (page && arch_prepare_hugepage(page)) { + __free_pages(page, huge_page_order(h)); + return NULL; + } + spin_lock(&hugetlb_lock); if (page) { /* -- cgit v1.2.3 From 57303d80175e10056bf51206f9961d586f02f967 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Tue, 12 Aug 2008 15:08:47 -0700 Subject: hugetlbfs: allocate structures for reservation tracking outside of spinlocks In the normal case, hugetlbfs reserves hugepages at map time so that the pages exist for future faults. A struct file_region is used to track when reservations have been consumed and where. These file_regions are allocated as necessary with kmalloc() which can sleep with the mm->page_table_lock held. This is wrong and triggers may-sleep warning when PREEMPT is enabled. Updates to the underlying file_region are done in two phases. The first phase prepares the region for the change, allocating any necessary memory, without actually making the change. The second phase actually commits the change. This patch makes use of this by checking the reservations before the page_table_lock is taken; triggering any necessary allocations. This may then be safely repeated within the locks without any allocations being required. Credit to Mel Gorman for diagnosing this failure and initial versions of the patch. Signed-off-by: Andy Whitcroft Tested-by: Gerald Schaefer Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 92155db888b9..4c97c174e2e1 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1942,6 +1942,15 @@ retry: lock_page(page); } + /* + * If we are going to COW a private mapping later, we examine the + * pending reservations for this page now. This will ensure that + * any allocations necessary to record that reservation occur outside + * the spinlock. + */ + if (write_access && !(vma->vm_flags & VM_SHARED)) + vma_needs_reservation(h, vma, address); + spin_lock(&mm->page_table_lock); size = i_size_read(mapping->host) >> huge_page_shift(h); if (idx >= size) @@ -1978,6 +1987,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, pte_t *ptep; pte_t entry; int ret; + struct page *pagecache_page = NULL; static DEFINE_MUTEX(hugetlb_instantiation_mutex); struct hstate *h = hstate_vma(vma); @@ -2000,19 +2010,35 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, ret = 0; + /* + * If we are going to COW the mapping later, we examine the pending + * reservations for this page now. This will ensure that any + * allocations necessary to record that reservation occur outside the + * spinlock. For private mappings, we also lookup the pagecache + * page now as it is used to determine if a reservation has been + * consumed. + */ + if (write_access && !pte_write(entry)) { + vma_needs_reservation(h, vma, address); + + if (!(vma->vm_flags & VM_SHARED)) + pagecache_page = hugetlbfs_pagecache_page(h, + vma, address); + } + spin_lock(&mm->page_table_lock); /* Check for a racing update before calling hugetlb_cow */ if (likely(pte_same(entry, huge_ptep_get(ptep)))) - if (write_access && !pte_write(entry)) { - struct page *page; - page = hugetlbfs_pagecache_page(h, vma, address); - ret = hugetlb_cow(mm, vma, address, ptep, entry, page); - if (page) { - unlock_page(page); - put_page(page); - } - } + if (write_access && !pte_write(entry)) + ret = hugetlb_cow(mm, vma, address, ptep, entry, + pagecache_page); spin_unlock(&mm->page_table_lock); + + if (pagecache_page) { + unlock_page(pagecache_page); + put_page(pagecache_page); + } + mutex_unlock(&hugetlb_instantiation_mutex); return ret; -- cgit v1.2.3 From 2b26736c88db85c038e04c2306d0745553e69602 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Tue, 12 Aug 2008 15:08:49 -0700 Subject: allocate structures for reservation tracking in hugetlbfs outside of spinlocks v2 [Andrew this should replace the previous version which did not check the returns from the region prepare for errors. This has been tested by us and Gerald and it looks good. Bah, while reviewing the locking based on your previous email I spotted that we need to check the return from the vma_needs_reservation call for allocation errors. Here is an updated patch to correct this. This passes testing here.] Signed-off-by: Andy Whitcroft Tested-by: Gerald Schaefer Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4c97c174e2e1..67a71191136e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1949,7 +1949,10 @@ retry: * the spinlock. */ if (write_access && !(vma->vm_flags & VM_SHARED)) - vma_needs_reservation(h, vma, address); + if (vma_needs_reservation(h, vma, address) < 0) { + ret = VM_FAULT_OOM; + goto backout_unlocked; + } spin_lock(&mm->page_table_lock); size = i_size_read(mapping->host) >> huge_page_shift(h); @@ -1976,6 +1979,7 @@ out: backout: spin_unlock(&mm->page_table_lock); +backout_unlocked: unlock_page(page); put_page(page); goto out; @@ -2004,8 +2008,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, entry = huge_ptep_get(ptep); if (huge_pte_none(entry)) { ret = hugetlb_no_page(mm, vma, address, ptep, write_access); - mutex_unlock(&hugetlb_instantiation_mutex); - return ret; + goto out_unlock; } ret = 0; @@ -2019,7 +2022,10 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, * consumed. */ if (write_access && !pte_write(entry)) { - vma_needs_reservation(h, vma, address); + if (vma_needs_reservation(h, vma, address) < 0) { + ret = VM_FAULT_OOM; + goto out_unlock; + } if (!(vma->vm_flags & VM_SHARED)) pagecache_page = hugetlbfs_pagecache_page(h, @@ -2039,6 +2045,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, put_page(pagecache_page); } +out_unlock: mutex_unlock(&hugetlb_instantiation_mutex); return ret; -- cgit v1.2.3