From 82f71ae4a2b829a25971bdf54b4d0d3d69d3c8b7 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 6 Aug 2014 16:06:36 -0700 Subject: mm: catch memory commitment underflow Print a warning (if CONFIG_DEBUG_VM=y) when memory commitment becomes too negative. This shouldn't happen any more - the previous two patches fixed the committed_as underflow issues. [akpm@linux-foundation.org: use VM_WARN_ONCE, per Dave] Signed-off-by: Konstantin Khlebnikov Cc: Hugh Dickins Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmap.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index 129b847d30cc..64c9d736155c 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -134,6 +135,10 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) { unsigned long free, allowed, reserve; + VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) < + -(s64)vm_committed_as_batch * num_online_cpus(), + "memory commitment underflow"); + vm_acct_memory(pages); /* -- cgit v1.2.3 From 4bb5f5d9395bc112d93a134d8f5b05611eddc9c0 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Fri, 8 Aug 2014 14:25:25 -0700 Subject: mm: allow drivers to prevent new writable mappings This patch (of 6): The i_mmap_writable field counts existing writable mappings of an address_space. To allow drivers to prevent new writable mappings, make this counter signed and prevent new writable mappings if it is negative. This is modelled after i_writecount and DENYWRITE. This will be required by the shmem-sealing infrastructure to prevent any new writable mappings after the WRITE seal has been set. In case there exists a writable mapping, this operation will fail with EBUSY. Note that we rely on the fact that iff you already own a writable mapping, you can increase the counter without using the helpers. This is the same that we do for i_writecount. Signed-off-by: David Herrmann Acked-by: Hugh Dickins Cc: Michael Kerrisk Cc: Ryan Lortie Cc: Lennart Poettering Cc: Daniel Mack Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inode.c | 1 + include/linux/fs.h | 29 +++++++++++++++++++++++++++-- kernel/fork.c | 2 +- mm/mmap.c | 30 ++++++++++++++++++++++++------ mm/swap_state.c | 1 + 5 files changed, 54 insertions(+), 9 deletions(-) (limited to 'mm/mmap.c') diff --git a/fs/inode.c b/fs/inode.c index 5938f3928944..26753ba7b6d6 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -165,6 +165,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) mapping->a_ops = &empty_aops; mapping->host = inode; mapping->flags = 0; + atomic_set(&mapping->i_mmap_writable, 0); mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); mapping->private_data = NULL; mapping->backing_dev_info = &default_backing_dev_info; diff --git a/include/linux/fs.h b/include/linux/fs.h index 1ab6c6913040..f0890e4a7c25 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -387,7 +387,7 @@ struct address_space { struct inode *host; /* owner: inode, block_device */ struct radix_tree_root page_tree; /* radix tree of all pages */ spinlock_t tree_lock; /* and lock protecting it */ - unsigned int i_mmap_writable;/* count VM_SHARED mappings */ + atomic_t i_mmap_writable;/* count VM_SHARED mappings */ struct rb_root i_mmap; /* tree of private and shared mappings */ struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ struct mutex i_mmap_mutex; /* protect tree, count, list */ @@ -470,10 +470,35 @@ static inline int mapping_mapped(struct address_space *mapping) * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff * marks vma as VM_SHARED if it is shared, and the file was opened for * writing i.e. vma may be mprotected writable even if now readonly. + * + * If i_mmap_writable is negative, no new writable mappings are allowed. You + * can only deny writable mappings, if none exists right now. */ static inline int mapping_writably_mapped(struct address_space *mapping) { - return mapping->i_mmap_writable != 0; + return atomic_read(&mapping->i_mmap_writable) > 0; +} + +static inline int mapping_map_writable(struct address_space *mapping) +{ + return atomic_inc_unless_negative(&mapping->i_mmap_writable) ? + 0 : -EPERM; +} + +static inline void mapping_unmap_writable(struct address_space *mapping) +{ + atomic_dec(&mapping->i_mmap_writable); +} + +static inline int mapping_deny_writable(struct address_space *mapping) +{ + return atomic_dec_unless_positive(&mapping->i_mmap_writable) ? + 0 : -EBUSY; +} + +static inline void mapping_allow_writable(struct address_space *mapping) +{ + atomic_inc(&mapping->i_mmap_writable); } /* diff --git a/kernel/fork.c b/kernel/fork.c index fa9124322cd4..1380d8ace334 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -429,7 +429,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) atomic_dec(&inode->i_writecount); mutex_lock(&mapping->i_mmap_mutex); if (tmp->vm_flags & VM_SHARED) - mapping->i_mmap_writable++; + atomic_inc(&mapping->i_mmap_writable); flush_dcache_mmap_lock(mapping); /* insert tmp into the share list, just after mpnt */ if (unlikely(tmp->vm_flags & VM_NONLINEAR)) diff --git a/mm/mmap.c b/mm/mmap.c index 64c9d736155c..c1f2ea4a0b99 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -221,7 +221,7 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma, if (vma->vm_flags & VM_DENYWRITE) atomic_inc(&file_inode(file)->i_writecount); if (vma->vm_flags & VM_SHARED) - mapping->i_mmap_writable--; + mapping_unmap_writable(mapping); flush_dcache_mmap_lock(mapping); if (unlikely(vma->vm_flags & VM_NONLINEAR)) @@ -622,7 +622,7 @@ static void __vma_link_file(struct vm_area_struct *vma) if (vma->vm_flags & VM_DENYWRITE) atomic_dec(&file_inode(file)->i_writecount); if (vma->vm_flags & VM_SHARED) - mapping->i_mmap_writable++; + atomic_inc(&mapping->i_mmap_writable); flush_dcache_mmap_lock(mapping); if (unlikely(vma->vm_flags & VM_NONLINEAR)) @@ -1577,6 +1577,17 @@ munmap_back: if (error) goto free_vma; } + if (vm_flags & VM_SHARED) { + error = mapping_map_writable(file->f_mapping); + if (error) + goto allow_write_and_free_vma; + } + + /* ->mmap() can change vma->vm_file, but must guarantee that + * vma_link() below can deny write-access if VM_DENYWRITE is set + * and map writably if VM_SHARED is set. This usually means the + * new file must not have been exposed to user-space, yet. + */ vma->vm_file = get_file(file); error = file->f_op->mmap(file, vma); if (error) @@ -1616,8 +1627,12 @@ munmap_back: vma_link(mm, vma, prev, rb_link, rb_parent); /* Once vma denies write, undo our temporary denial count */ - if (vm_flags & VM_DENYWRITE) - allow_write_access(file); + if (file) { + if (vm_flags & VM_SHARED) + mapping_unmap_writable(file->f_mapping); + if (vm_flags & VM_DENYWRITE) + allow_write_access(file); + } file = vma->vm_file; out: perf_event_mmap(vma); @@ -1646,14 +1661,17 @@ out: return addr; unmap_and_free_vma: - if (vm_flags & VM_DENYWRITE) - allow_write_access(file); vma->vm_file = NULL; fput(file); /* Undo any partial mapping done by a device driver. */ unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end); charged = 0; + if (vm_flags & VM_SHARED) + mapping_unmap_writable(file->f_mapping); +allow_write_and_free_vma: + if (vm_flags & VM_DENYWRITE) + allow_write_access(file); free_vma: kmem_cache_free(vm_area_cachep, vma); unacct_error: diff --git a/mm/swap_state.c b/mm/swap_state.c index e160151da6b8..3e0ec83d000c 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -39,6 +39,7 @@ static struct backing_dev_info swap_backing_dev_info = { struct address_space swapper_spaces[MAX_SWAPFILES] = { [0 ... MAX_SWAPFILES - 1] = { .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), + .i_mmap_writable = ATOMIC_INIT(0), .a_ops = &swap_aops, .backing_dev_info = &swap_backing_dev_info, } -- cgit v1.2.3 From 8542bdfc6632b55aa1cf4fa255283c878b662499 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 9 Sep 2014 14:50:59 -0700 Subject: mm/mmap.c: use pr_emerg when printing BUG related information Make sure we actually see the output of validate_mm() and browse_rb() before triggering a BUG(). pr_info isn't shown by default so the reason for the BUG() isn't obvious. Signed-off-by: Sasha Levin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmap.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index c1f2ea4a0b99..c0a3637cdb64 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -369,20 +369,20 @@ static int browse_rb(struct rb_root *root) struct vm_area_struct *vma; vma = rb_entry(nd, struct vm_area_struct, vm_rb); if (vma->vm_start < prev) { - pr_info("vm_start %lx prev %lx\n", vma->vm_start, prev); + pr_emerg("vm_start %lx prev %lx\n", vma->vm_start, prev); bug = 1; } if (vma->vm_start < pend) { - pr_info("vm_start %lx pend %lx\n", vma->vm_start, pend); + pr_emerg("vm_start %lx pend %lx\n", vma->vm_start, pend); bug = 1; } if (vma->vm_start > vma->vm_end) { - pr_info("vm_end %lx < vm_start %lx\n", + pr_emerg("vm_end %lx < vm_start %lx\n", vma->vm_end, vma->vm_start); bug = 1; } if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) { - pr_info("free gap %lx, correct %lx\n", + pr_emerg("free gap %lx, correct %lx\n", vma->rb_subtree_gap, vma_compute_subtree_gap(vma)); bug = 1; @@ -396,7 +396,7 @@ static int browse_rb(struct rb_root *root) for (nd = pn; nd; nd = rb_prev(nd)) j++; if (i != j) { - pr_info("backwards %d, forwards %d\n", j, i); + pr_emerg("backwards %d, forwards %d\n", j, i); bug = 1; } return bug ? -1 : i; @@ -431,17 +431,17 @@ static void validate_mm(struct mm_struct *mm) i++; } if (i != mm->map_count) { - pr_info("map_count %d vm_next %d\n", mm->map_count, i); + pr_emerg("map_count %d vm_next %d\n", mm->map_count, i); bug = 1; } if (highest_address != mm->highest_vm_end) { - pr_info("mm->highest_vm_end %lx, found %lx\n", + pr_emerg("mm->highest_vm_end %lx, found %lx\n", mm->highest_vm_end, highest_address); bug = 1; } i = browse_rb(&mm->mm_rb); if (i != mm->map_count) { - pr_info("map_count %d rb %d\n", mm->map_count, i); + pr_emerg("map_count %d rb %d\n", mm->map_count, i); bug = 1; } BUG_ON(bug); -- cgit v1.2.3