diff options
Diffstat (limited to 'mm/mmap.c')
-rw-r--r-- | mm/mmap.c | 71 |
1 files changed, 56 insertions, 15 deletions
diff --git a/mm/mmap.c b/mm/mmap.c index f19efcf75418..924839fac0e6 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -44,6 +44,7 @@ #include <linux/userfaultfd_k.h> #include <linux/moduleparam.h> #include <linux/pkeys.h> +#include <linux/oom.h> #include <linux/uaccess.h> #include <asm/cacheflush.h> @@ -684,7 +685,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *next = vma->vm_next, *orig_vma = vma; struct address_space *mapping = NULL; - struct rb_root *root = NULL; + struct rb_root_cached *root = NULL; struct anon_vma *anon_vma = NULL; struct file *file = vma->vm_file; bool start_changed = false, end_changed = false; @@ -1386,9 +1387,24 @@ unsigned long do_mmap(struct file *file, unsigned long addr, if (file) { struct inode *inode = file_inode(file); + unsigned long flags_mask; + + flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags; switch (flags & MAP_TYPE) { case MAP_SHARED: + /* + * Force use of MAP_SHARED_VALIDATE with non-legacy + * flags. E.g. MAP_SYNC is dangerous to use with + * MAP_SHARED as you don't know which consistency model + * you will get. We silently ignore unsupported flags + * with MAP_SHARED to preserve backward compatibility. + */ + flags &= LEGACY_MAP_MASK; + /* fall through */ + case MAP_SHARED_VALIDATE: + if (flags & ~flags_mask) + return -EOPNOTSUPP; if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE)) return -EACCES; @@ -2639,13 +2655,6 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, if (vma->vm_start >= end) return 0; - if (uf) { - int error = userfaultfd_unmap_prep(vma, start, end, uf); - - if (error) - return error; - } - /* * If we need to split any vma, do it now to save pain later. * @@ -2679,6 +2688,21 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, } vma = prev ? prev->vm_next : mm->mmap; + if (unlikely(uf)) { + /* + * If userfaultfd_unmap_prep returns an error the vmas + * will remain splitted, but userland will get a + * highly unexpected error anyway. This is no + * different than the case where the first of the two + * __split_vma fails, but we don't undo the first + * split, despite we could. This is unlikely enough + * failure that it's not worth optimizing it for. + */ + int error = userfaultfd_unmap_prep(vma, start, end, uf); + if (error) + return error; + } + /* * unlock any mlock()ed ranges before detaching vmas */ @@ -2993,6 +3017,23 @@ void exit_mmap(struct mm_struct *mm) /* Use -1 here to ensure all VMAs in the mm are unmapped */ unmap_vmas(&tlb, vma, 0, -1); + set_bit(MMF_OOM_SKIP, &mm->flags); + if (unlikely(tsk_is_oom_victim(current))) { + /* + * Wait for oom_reap_task() to stop working on this + * mm. Because MMF_OOM_SKIP is already set before + * calling down_read(), oom_reap_task() will not run + * on this "mm" post up_write(). + * + * tsk_is_oom_victim() cannot be set from under us + * either because current->mm is already set to NULL + * under task_lock before calling mmput and oom_mm is + * set not NULL by the OOM killer only if current->mm + * is found not NULL while holding the task_lock. + */ + down_write(&mm->mmap_sem); + up_write(&mm->mmap_sem); + } free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); tlb_finish_mmu(&tlb, 0, -1); @@ -3314,7 +3355,7 @@ static DEFINE_MUTEX(mm_all_locks_mutex); static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma) { - if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) { + if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) { /* * The LSB of head.next can't change from under us * because we hold the mm_all_locks_mutex. @@ -3330,7 +3371,7 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma) * anon_vma->root->rwsem. */ if (__test_and_set_bit(0, (unsigned long *) - &anon_vma->root->rb_root.rb_node)) + &anon_vma->root->rb_root.rb_root.rb_node)) BUG(); } } @@ -3432,7 +3473,7 @@ out_unlock: static void vm_unlock_anon_vma(struct anon_vma *anon_vma) { - if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) { + if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) { /* * The LSB of head.next can't change to 0 from under * us because we hold the mm_all_locks_mutex. @@ -3446,7 +3487,7 @@ static void vm_unlock_anon_vma(struct anon_vma *anon_vma) * anon_vma->root->rwsem. */ if (!__test_and_clear_bit(0, (unsigned long *) - &anon_vma->root->rb_root.rb_node)) + &anon_vma->root->rb_root.rb_root.rb_node)) BUG(); anon_vma_unlock_write(anon_vma); } @@ -3514,7 +3555,7 @@ static int init_user_reserve(void) { unsigned long free_kbytes; - free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); + free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17); return 0; @@ -3535,7 +3576,7 @@ static int init_admin_reserve(void) { unsigned long free_kbytes; - free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); + free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13); return 0; @@ -3579,7 +3620,7 @@ static int reserve_mem_notifier(struct notifier_block *nb, break; case MEM_OFFLINE: - free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); + free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); if (sysctl_user_reserve_kbytes > free_kbytes) { init_user_reserve(); |