From 91d1aa43d30505b0b825db8898ffc80a8eca96c7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 27 Nov 2012 19:33:25 +0100 Subject: context_tracking: New context tracking susbsystem Create a new subsystem that probes on kernel boundaries to keep track of the transitions between level contexts with two basic initial contexts: user or kernel. This is an abstraction of some RCU code that use such tracking to implement its userspace extended quiescent state. We need to pull this up from RCU into this new level of indirection because this tracking is also going to be used to implement an "on demand" generic virtual cputime accounting. A necessary step to shutdown the tick while still accounting the cputime. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Li Zhong Cc: Gilad Ben-Yossef Reviewed-by: Steven Rostedt [ paulmck: fix whitespace error and email address. ] Signed-off-by: Paul E. McKenney --- arch/x86/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 8e13ecb41bee..7a529cbab7ad 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -18,7 +18,7 @@ #include /* pgd_*(), ... */ #include /* kmemcheck_*(), ... */ #include /* VSYSCALL_START */ -#include /* exception_enter(), ... */ +#include /* exception_enter(), ... */ /* * Page fault error code bits: -- cgit v1.2.3 From c2d23f919bafcbc2259f5257d9a7d729802f0e3a Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 12 Dec 2012 13:52:10 -0800 Subject: mm, oom: remove statically defined arch functions of same name out_of_memory() is a globally defined function to call the oom killer. x86, sh, and powerpc all use a function of the same name within file scope in their respective fault.c unnecessarily. Inline the functions into the pagefault handlers to clean the code up. Signed-off-by: David Rientjes Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Paul Mundt Reviewed-by: Michal Hocko Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/mm/fault.c | 27 ++++++++++++--------------- arch/sh/mm/fault.c | 19 +++++++------------ arch/x86/mm/fault.c | 23 ++++++++--------------- 3 files changed, 27 insertions(+), 42 deletions(-) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 0a6b28336eb0..3a8489a354e9 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -113,19 +113,6 @@ static int store_updates_sp(struct pt_regs *regs) #define MM_FAULT_CONTINUE -1 #define MM_FAULT_ERR(sig) (sig) -static int out_of_memory(struct pt_regs *regs) -{ - /* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ - up_read(¤t->mm->mmap_sem); - if (!user_mode(regs)) - return MM_FAULT_ERR(SIGKILL); - pagefault_out_of_memory(); - return MM_FAULT_RETURN; -} - static int do_sigbus(struct pt_regs *regs, unsigned long address) { siginfo_t info; @@ -169,8 +156,18 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) return MM_FAULT_CONTINUE; /* Out of memory */ - if (fault & VM_FAULT_OOM) - return out_of_memory(regs); + if (fault & VM_FAULT_OOM) { + up_read(¤t->mm->mmap_sem); + + /* + * We ran out of memory, or some other thing happened to us that + * made us unable to handle the page fault gracefully. + */ + if (!user_mode(regs)) + return MM_FAULT_ERR(SIGKILL); + pagefault_out_of_memory(); + return MM_FAULT_RETURN; + } /* Bus error. x86 handles HWPOISON here, we'll add this if/when * we support the feature in HW diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index cbbdcad8fcb3..1f49c28affa9 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -301,17 +301,6 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code, __bad_area(regs, error_code, address, SEGV_ACCERR); } -static void out_of_memory(void) -{ - /* - * We ran out of memory, call the OOM killer, and return the userspace - * (which will retry the fault, or kill us if we got oom-killed): - */ - up_read(¤t->mm->mmap_sem); - - pagefault_out_of_memory(); -} - static void do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) { @@ -353,8 +342,14 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, no_context(regs, error_code, address); return 1; } + up_read(¤t->mm->mmap_sem); - out_of_memory(); + /* + * We ran out of memory, call the OOM killer, and return the + * userspace (which will retry the fault, or kill us if we got + * oom-killed): + */ + pagefault_out_of_memory(); } else { if (fault & VM_FAULT_SIGBUS) do_sigbus(regs, error_code, address); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 7a529cbab7ad..027088f2f7dd 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -803,20 +803,6 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code, __bad_area(regs, error_code, address, SEGV_ACCERR); } -/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */ -static void -out_of_memory(struct pt_regs *regs, unsigned long error_code, - unsigned long address) -{ - /* - * We ran out of memory, call the OOM killer, and return the userspace - * (which will retry the fault, or kill us if we got oom-killed): - */ - up_read(¤t->mm->mmap_sem); - - pagefault_out_of_memory(); -} - static void do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, unsigned int fault) @@ -879,7 +865,14 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, return 1; } - out_of_memory(regs, error_code, address); + up_read(¤t->mm->mmap_sem); + + /* + * We ran out of memory, call the OOM killer, and return the + * userspace (which will retry the fault, or kill us if we got + * oom-killed): + */ + pagefault_out_of_memory(); } else { if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| VM_FAULT_HWPOISON_LARGE)) -- cgit v1.2.3 From e575a86fdc50d013bf3ad3aa81d9100e8e6cc60d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 7 Feb 2013 09:44:13 -0800 Subject: x86: Do not leak kernel page mapping locations Without this patch, it is trivial to determine kernel page mappings by examining the error code reported to dmesg[1]. Instead, declare the entire kernel memory space as a violation of a present page. Additionally, since show_unhandled_signals is enabled by default, switch branch hinting to the more realistic expectation, and unobfuscate the setting of the PF_PROT bit to improve readability. [1] http://vulnfactory.org/blog/2013/02/06/a-linux-memory-trick/ Reported-by: Dan Rosenberg Suggested-by: Brad Spengler Signed-off-by: Kees Cook Cc: stable@vger.kernel.org Acked-by: H. Peter Anvin Cc: Paul E. McKenney Cc: Frederic Weisbecker Cc: Eric W. Biederman Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20130207174413.GA12485@www.outflux.net Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 027088f2f7dd..fb674fd3fc22 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -748,13 +748,15 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, return; } #endif + /* Kernel addresses are always protection faults: */ + if (address >= TASK_SIZE) + error_code |= PF_PROT; - if (unlikely(show_unhandled_signals)) + if (likely(show_unhandled_signals)) show_signal_msg(regs, error_code, address, tsk); - /* Kernel addresses are always protection faults: */ tsk->thread.cr2 = address; - tsk->thread.error_code = error_code | (address >= TASK_SIZE); + tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_PF; force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0); -- cgit v1.2.3 From 954f857187033ee3d3704a8206715cf354c38898 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 22 Feb 2013 15:11:49 -0800 Subject: Revert "x86, mm: Make spurious_fault check explicitly check explicitly check the PRESENT bit" I got a report for a minor regression introduced by commit 027ef6c87853b ("mm: thp: fix pmd_present for split_huge_page and PROT_NONE with THP"). So the problem is, pageattr creates kernel pagetables (pte and pmds) that breaks pte_present/pmd_present and the patch above exposed this invariant breakage for pmd_present. The same problem already existed for the pte and pte_present and it was fixed by commit 660a293ea9be709 ("x86, mm: Make spurious_fault check explicitly check the PRESENT bit") (if it wasn't for that commit, it wouldn't even be a regression). That fix avoids the pagefault to use pte_present. I could follow through by stopping using pmd_present/pmd_huge too. However I think it's more robust to fix pageattr and to clear the PSE/GLOBAL bitflags too in addition to the present bitflag. So the kernel page fault can keep using the regular pte_present/pmd_present/pmd_huge. The confusion arises because _PAGE_GLOBAL and _PAGE_PROTNONE are sharing the same bit, and in the pmd case we pretend _PAGE_PSE to be set only in present pmds (to facilitate split_huge_page final tlb flush). Signed-off-by: Andrea Arcangeli Cc: Andi Kleen Cc: Shaohua Li Cc: "H. Peter Anvin" Cc: Mel Gorman Cc: Hugh Dickins Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index fb674fd3fc22..2b97525246d4 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -939,14 +939,8 @@ spurious_fault(unsigned long error_code, unsigned long address) if (pmd_large(*pmd)) return spurious_fault_check(error_code, (pte_t *) pmd); - /* - * Note: don't use pte_present() here, since it returns true - * if the _PAGE_PROTNONE bit is set. However, this aliases the - * _PAGE_GLOBAL bit, which for kernel pages give false positives - * when CONFIG_DEBUG_PAGEALLOC is used. - */ pte = pte_offset_kernel(pmd, address); - if (!(pte_flags(*pte) & _PAGE_PRESENT)) + if (!pte_present(*pte)) return 0; ret = spurious_fault_check(error_code, pte); -- cgit v1.2.3