summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrea Arcangeli <aarcange@redhat.com>2010-11-02 21:46:57 +0100
committerAndrea Arcangeli <aarcange@redhat.com>2010-11-02 20:47:42 +0000
commit8e8b82243f899168ed4b3530cc9309286d81b689 (patch)
tree822dddfd581a6c3159607d266747b8397be72242
parenta6b25ae6edbd880f4b14f5b07bf4e433d2bfa03d (diff)
avoid breaking huge pmd invariants in case of vma_adjust failures
An huge pmd can only be mapped if the corresponding 2M virtual range is fully contained in the vma. At times the VM calls split_vma twice, if the first split_vma succeeds and the second fail, the first split_vma remains in effect and it's not rolled back. For split_vma or vma_adjust to fail an allocation failure is needed so it's a very unlikely event (the out of memory killer would normally fire before any allocation failure is visible to kernel and userland and if an out of memory condition happens it's unlikely to happen exactly here). Nevertheless it's safer to ensure that no huge pmd can be left around if the vma is adjusted in a way that can't fit hugepages anymore at the new vm_start/vm_end address. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
-rw-r--r--include/linux/huge_mm.h19
-rw-r--r--mm/huge_memory.c80
-rw-r--r--mm/mmap.c2
3 files changed, 99 insertions, 2 deletions
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index b4fca2d0d63e..8358986dc881 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -106,6 +106,19 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);
extern unsigned long vma_address(struct page *page, struct vm_area_struct *vma);
extern int hugepage_madvise(unsigned long *vm_flags);
+extern void __vma_adjust_trans_huge(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end,
+ long adjust_next);
+static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end,
+ long adjust_next)
+{
+ if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
+ return;
+ __vma_adjust_trans_huge(vma, start, end, adjust_next);
+}
static inline int PageTransHuge(struct page *page)
{
VM_BUG_ON(PageTail(page));
@@ -138,6 +151,12 @@ static inline int hugepage_madvise(unsigned long *vm_flags)
BUG_ON(0);
return 0;
}
+static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end,
+ long adjust_next)
+{
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* _LINUX_HUGE_MM_H */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 428cc69bd81d..253fa2cdd24f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1071,8 +1071,16 @@ pmd_t *page_check_address_pmd(struct page *page,
goto out;
if (pmd_page(*pmd) != page)
goto out;
- VM_BUG_ON(flag == PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG &&
- pmd_trans_splitting(*pmd));
+ /*
+ * split_vma() may create temporary aliased mappings. There is
+ * no risk as long as all huge pmd are found and have their
+ * splitting bit set before __split_huge_page_refcount
+ * runs. Finding the same huge pmd more than once during the
+ * same rmap walk is not a problem.
+ */
+ if (flag == PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG &&
+ pmd_trans_splitting(*pmd))
+ goto out;
if (pmd_trans_huge(*pmd)) {
VM_BUG_ON(flag == PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG &&
!pmd_trans_splitting(*pmd));
@@ -2174,3 +2182,71 @@ void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd)
put_page(page);
BUG_ON(pmd_trans_huge(*pmd));
}
+
+static void split_huge_page_address(struct mm_struct *mm,
+ unsigned long address)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ VM_BUG_ON(!(address & ~HPAGE_PMD_MASK));
+
+ pgd = pgd_offset(mm, address);
+ if (!pgd_present(*pgd))
+ return;
+
+ pud = pud_offset(pgd, address);
+ if (!pud_present(*pud))
+ return;
+
+ pmd = pmd_offset(pud, address);
+ if (!pmd_present(*pmd))
+ return;
+ /*
+ * Caller holds the mmap_sem write mode, so a huge pmd cannot
+ * materialize from under us.
+ */
+ split_huge_page_pmd(mm, pmd);
+}
+
+void __vma_adjust_trans_huge(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end,
+ long adjust_next)
+{
+ /*
+ * If the new start address isn't hpage aligned and it could
+ * previously contain an hugepage: check if we need to split
+ * an huge pmd.
+ */
+ if (start & ~HPAGE_PMD_MASK &&
+ (start & HPAGE_PMD_MASK) >= vma->vm_start &&
+ (start & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
+ split_huge_page_address(vma->vm_mm, start);
+
+ /*
+ * If the new end address isn't hpage aligned and it could
+ * previously contain an hugepage: check if we need to split
+ * an huge pmd.
+ */
+ if (end & ~HPAGE_PMD_MASK &&
+ (end & HPAGE_PMD_MASK) >= vma->vm_start &&
+ (end & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
+ split_huge_page_address(vma->vm_mm, end);
+
+ /*
+ * If we're also updating the vma->vm_next->vm_start, if the new
+ * vm_next->vm_start isn't page aligned and it could previously
+ * contain an hugepage: check if we need to split an huge pmd.
+ */
+ if (adjust_next > 0) {
+ struct vm_area_struct *next = vma->vm_next;
+ unsigned long nstart = next->vm_start;
+ nstart += adjust_next << PAGE_SHIFT;
+ if (nstart & ~HPAGE_PMD_MASK &&
+ (nstart & HPAGE_PMD_MASK) >= next->vm_start &&
+ (nstart & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= next->vm_end)
+ split_huge_page_address(next->vm_mm, nstart);
+ }
+}
diff --git a/mm/mmap.c b/mm/mmap.c
index 4115e7441df6..7d1535283a1a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -589,6 +589,8 @@ again: remove_next = 1 + (end > next->vm_end);
}
}
+ vma_adjust_trans_huge(vma, start, end, adjust_next);
+
/*
* When changing only vma->vm_end, we don't really need anon_vma
* lock. This is a fairly rare case by itself, but the anon_vma