From 6afb5157b9eba4092e2f0f54d24a3806409bdde5 Mon Sep 17 00:00:00 2001 From: Haicheng Li Date: Wed, 19 May 2010 17:42:14 +0800 Subject: x86, mm: Separate x86_64 vmalloc_sync_all() into separate functions No behavior change. Move some of vmalloc_sync_all() code into a new function sync_global_pgds() that will be useful for memory hotplug. Signed-off-by: Haicheng Li LKML-Reference: <4C6E4ECD.1090607@linux.intel.com> Reviewed-by: Wu Fengguang Reviewed-by: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/mm/fault.c | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 4c4508e8a204..51f7ee71d6c7 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -326,29 +326,7 @@ out: void vmalloc_sync_all(void) { - unsigned long address; - - for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END; - address += PGDIR_SIZE) { - - const pgd_t *pgd_ref = pgd_offset_k(address); - unsigned long flags; - struct page *page; - - if (pgd_none(*pgd_ref)) - continue; - - spin_lock_irqsave(&pgd_lock, flags); - list_for_each_entry(page, &pgd_list, lru) { - pgd_t *pgd; - pgd = (pgd_t *)page_address(page) + pgd_index(address); - if (pgd_none(*pgd)) - set_pgd(pgd, *pgd_ref); - else - BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); - } - spin_unlock_irqrestore(&pgd_lock, flags); - } + sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END); } /* -- cgit v1.2.3 From 660a293ea9be709b893d371fbc0328fcca33c33a Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 27 Jul 2010 16:06:28 +0800 Subject: x86, mm: Make spurious_fault check explicitly check the PRESENT bit pte_present() returns true even present bit isn't set but _PAGE_PROTNONE (global bit) bit is set. While with CONFIG_DEBUG_PAGEALLOC, free pages have global bit set but present bit clear. This patch makes we could catch free pages access with CONFIG_DEBUG_PAGEALLOC enabled. [ hpa: added a comment in the code as a warning to janitors ] Signed-off-by: Shaohua Li LKML-Reference: <1280217988.32400.75.camel@sli10-desk.sh.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/mm/fault.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 51f7ee71d6c7..caec22906d7c 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -872,8 +872,14 @@ spurious_fault(unsigned long error_code, unsigned long address) if (pmd_large(*pmd)) return spurious_fault_check(error_code, (pte_t *) pmd); + /* + * Note: don't use pte_present() here, since it returns true + * if the _PAGE_PROTNONE bit is set. However, this aliases the + * _PAGE_GLOBAL bit, which for kernel pages give false positives + * when CONFIG_DEBUG_PAGEALLOC is used. + */ pte = pte_offset_kernel(pmd, address); - if (!pte_present(*pte)) + if (!(pte_flags(*pte) & _PAGE_PRESENT)) return 0; ret = spurious_fault_check(error_code, pte); -- cgit v1.2.3 From 617d34d9e5d8326ec8f188c616aa06ac59d083fe Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 21 Sep 2010 12:01:51 -0700 Subject: x86, mm: Hold mm->page_table_lock while doing vmalloc_sync Take mm->page_table_lock while syncing the vmalloc region. This prevents a race with the Xen pagetable pin/unpin code, which expects that the page_table_lock is already held. If this race occurs, then Xen can see an inconsistent page type (a page can either be read/write or a pagetable page, and pin/unpin converts it between them), which will cause either the pin or the set_p[gm]d to fail; either will crash the kernel. vmalloc_sync_all() should be called rarely, so this extra use of page_table_lock should not interfere with its normal users. The mm pointer is stashed in the pgd page's index field, as that won't be otherwise used for pgds. Reported-by: Ian Campbell Originally-by: Jan Beulich LKML-Reference: <4CB88A4C.1080305@goop.org> Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pgtable.h | 2 ++ arch/x86/mm/fault.c | 11 ++++++++++- arch/x86/mm/init_64.c | 7 +++++++ arch/x86/mm/pgtable.c | 20 +++++++++++++++++--- 4 files changed, 36 insertions(+), 4 deletions(-) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 2d0a33bd2971..ada823a13c7c 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -28,6 +28,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; extern spinlock_t pgd_lock; extern struct list_head pgd_list; +extern struct mm_struct *pgd_page_get_mm(struct page *page); + #ifdef CONFIG_PARAVIRT #include #else /* !CONFIG_PARAVIRT */ diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index caec22906d7c..6c27c39f8a37 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -229,7 +229,16 @@ void vmalloc_sync_all(void) spin_lock_irqsave(&pgd_lock, flags); list_for_each_entry(page, &pgd_list, lru) { - if (!vmalloc_sync_one(page_address(page), address)) + spinlock_t *pgt_lock; + int ret; + + pgt_lock = &pgd_page_get_mm(page)->page_table_lock; + + spin_lock(pgt_lock); + ret = vmalloc_sync_one(page_address(page), address); + spin_unlock(pgt_lock); + + if (!ret) break; } spin_unlock_irqrestore(&pgd_lock, flags); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1ad7c0ff5d2b..4d323fb770c2 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -116,12 +116,19 @@ void sync_global_pgds(unsigned long start, unsigned long end) spin_lock_irqsave(&pgd_lock, flags); list_for_each_entry(page, &pgd_list, lru) { pgd_t *pgd; + spinlock_t *pgt_lock; + pgd = (pgd_t *)page_address(page) + pgd_index(address); + pgt_lock = &pgd_page_get_mm(page)->page_table_lock; + spin_lock(pgt_lock); + if (pgd_none(*pgd)) set_pgd(pgd, *pgd_ref); else BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); + + spin_unlock(pgt_lock); } spin_unlock_irqrestore(&pgd_lock, flags); } diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 5c4ee422590e..c70e57dbb491 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -87,7 +87,19 @@ static inline void pgd_list_del(pgd_t *pgd) #define UNSHARED_PTRS_PER_PGD \ (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) -static void pgd_ctor(pgd_t *pgd) + +static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) +{ + BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm)); + virt_to_page(pgd)->index = (pgoff_t)mm; +} + +struct mm_struct *pgd_page_get_mm(struct page *page) +{ + return (struct mm_struct *)page->index; +} + +static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) { /* If the pgd points to a shared pagetable level (either the ptes in non-PAE, or shared PMD in PAE), then just copy the @@ -105,8 +117,10 @@ static void pgd_ctor(pgd_t *pgd) } /* list required to sync kernel mapping updates */ - if (!SHARED_KERNEL_PMD) + if (!SHARED_KERNEL_PMD) { + pgd_set_mm(pgd, mm); pgd_list_add(pgd); + } } static void pgd_dtor(pgd_t *pgd) @@ -272,7 +286,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) */ spin_lock_irqsave(&pgd_lock, flags); - pgd_ctor(pgd); + pgd_ctor(mm, pgd); pgd_prepopulate_pmd(mm, pgd, pmds); spin_unlock_irqrestore(&pgd_lock, flags); -- cgit v1.2.3 From f01f7c56a1425b9749a99af821e1de334fb64d7e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 19 Oct 2010 22:17:37 +0000 Subject: x86, mm: Fix incorrect data type in vmalloc_sync_all() arch/x86/mm/fault.c: In function 'vmalloc_sync_all': arch/x86/mm/fault.c:238: warning: assignment makes integer from pointer without a cast introduced by 617d34d9e5d8326ec8f188c616aa06ac59d083fe. Signed-off-by: Borislav Petkov LKML-Reference: <20101020103642.GA3135@kryptos.osrc.amd.com> Signed-off-by: H. Peter Anvin --- arch/x86/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 6c27c39f8a37..0cdb8d493f61 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -230,7 +230,7 @@ void vmalloc_sync_all(void) spin_lock_irqsave(&pgd_lock, flags); list_for_each_entry(page, &pgd_list, lru) { spinlock_t *pgt_lock; - int ret; + pmd_t *ret; pgt_lock = &pgd_page_get_mm(page)->page_table_lock; -- cgit v1.2.3