summaryrefslogtreecommitdiff
path: root/mm/mmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mmap.c')
-rw-r--r--mm/mmap.c413
1 files changed, 219 insertions, 194 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index 13678edaa22c..9a93b054148a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -76,10 +76,10 @@ int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
static bool ignore_rlimit_data;
core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
-static void unmap_region(struct mm_struct *mm, struct maple_tree *mt,
+static void unmap_region(struct mm_struct *mm, struct ma_state *mas,
struct vm_area_struct *vma, struct vm_area_struct *prev,
struct vm_area_struct *next, unsigned long start,
- unsigned long end, bool mm_wr_locked);
+ unsigned long end, unsigned long tree_end, bool mm_wr_locked);
static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
{
@@ -154,18 +154,6 @@ static inline struct vm_area_struct *vma_prev_limit(struct vma_iterator *vmi,
return mas_prev(&vmi->mas, min);
}
-static inline int vma_iter_clear_gfp(struct vma_iterator *vmi,
- unsigned long start, unsigned long end, gfp_t gfp)
-{
- vmi->mas.index = start;
- vmi->mas.last = end - 1;
- mas_store_gfp(&vmi->mas, NULL, gfp);
- if (unlikely(mas_is_err(&vmi->mas)))
- return -ENOMEM;
-
- return 0;
-}
-
/*
* check_brk_limits() - Use platform specific check of range & verify mlock
* limits.
@@ -182,7 +170,8 @@ static int check_brk_limits(unsigned long addr, unsigned long len)
if (IS_ERR_VALUE(mapped_addr))
return mapped_addr;
- return mlock_future_check(current->mm, current->mm->def_flags, len);
+ return mlock_future_ok(current->mm, current->mm->def_flags, len)
+ ? 0 : -EAGAIN;
}
static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *brkvma,
unsigned long addr, unsigned long request, unsigned long flags);
@@ -300,61 +289,40 @@ out:
}
#if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
-extern void mt_validate(struct maple_tree *mt);
-extern void mt_dump(const struct maple_tree *mt);
-
-/* Validate the maple tree */
-static void validate_mm_mt(struct mm_struct *mm)
-{
- struct maple_tree *mt = &mm->mm_mt;
- struct vm_area_struct *vma_mt;
-
- MA_STATE(mas, mt, 0, 0);
-
- mt_validate(&mm->mm_mt);
- mas_for_each(&mas, vma_mt, ULONG_MAX) {
- if ((vma_mt->vm_start != mas.index) ||
- (vma_mt->vm_end - 1 != mas.last)) {
- pr_emerg("issue in %s\n", current->comm);
- dump_stack();
- dump_vma(vma_mt);
- pr_emerg("mt piv: %p %lu - %lu\n", vma_mt,
- mas.index, mas.last);
- pr_emerg("mt vma: %p %lu - %lu\n", vma_mt,
- vma_mt->vm_start, vma_mt->vm_end);
-
- mt_dump(mas.tree);
- if (vma_mt->vm_end != mas.last + 1) {
- pr_err("vma: %p vma_mt %lu-%lu\tmt %lu-%lu\n",
- mm, vma_mt->vm_start, vma_mt->vm_end,
- mas.index, mas.last);
- mt_dump(mas.tree);
- }
- VM_BUG_ON_MM(vma_mt->vm_end != mas.last + 1, mm);
- if (vma_mt->vm_start != mas.index) {
- pr_err("vma: %p vma_mt %p %lu - %lu doesn't match\n",
- mm, vma_mt, vma_mt->vm_start, vma_mt->vm_end);
- mt_dump(mas.tree);
- }
- VM_BUG_ON_MM(vma_mt->vm_start != mas.index, mm);
- }
- }
-}
-
static void validate_mm(struct mm_struct *mm)
{
int bug = 0;
int i = 0;
struct vm_area_struct *vma;
- MA_STATE(mas, &mm->mm_mt, 0, 0);
-
- validate_mm_mt(mm);
+ VMA_ITERATOR(vmi, mm, 0);
- mas_for_each(&mas, vma, ULONG_MAX) {
+ mt_validate(&mm->mm_mt);
+ for_each_vma(vmi, vma) {
#ifdef CONFIG_DEBUG_VM_RB
struct anon_vma *anon_vma = vma->anon_vma;
struct anon_vma_chain *avc;
+#endif
+ unsigned long vmi_start, vmi_end;
+ bool warn = 0;
+
+ vmi_start = vma_iter_addr(&vmi);
+ vmi_end = vma_iter_end(&vmi);
+ if (VM_WARN_ON_ONCE_MM(vma->vm_end != vmi_end, mm))
+ warn = 1;
+
+ if (VM_WARN_ON_ONCE_MM(vma->vm_start != vmi_start, mm))
+ warn = 1;
+ if (warn) {
+ pr_emerg("issue in %s\n", current->comm);
+ dump_stack();
+ dump_vma(vma);
+ pr_emerg("tree range: %px start %lx end %lx\n", vma,
+ vmi_start, vmi_end - 1);
+ vma_iter_dump_tree(&vmi);
+ }
+
+#ifdef CONFIG_DEBUG_VM_RB
if (anon_vma) {
anon_vma_lock_read(anon_vma);
list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
@@ -365,14 +333,13 @@ static void validate_mm(struct mm_struct *mm)
i++;
}
if (i != mm->map_count) {
- pr_emerg("map_count %d mas_for_each %d\n", mm->map_count, i);
+ pr_emerg("map_count %d vma iterator %d\n", mm->map_count, i);
bug = 1;
}
VM_BUG_ON_MM(bug, mm);
}
#else /* !CONFIG_DEBUG_VM_MAPLE_TREE */
-#define validate_mm_mt(root) do { } while (0)
#define validate_mm(mm) do { } while (0)
#endif /* CONFIG_DEBUG_VM_MAPLE_TREE */
@@ -441,7 +408,8 @@ static int vma_link(struct mm_struct *mm, struct vm_area_struct *vma)
VMA_ITERATOR(vmi, mm, 0);
struct address_space *mapping = NULL;
- if (vma_iter_prealloc(&vmi))
+ vma_iter_config(&vmi, vma->vm_start, vma->vm_end);
+ if (vma_iter_prealloc(&vmi, vma))
return -ENOMEM;
if (vma->vm_file) {
@@ -694,19 +662,16 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
/* Only handles expanding */
VM_WARN_ON(vma->vm_start < start || vma->vm_end > end);
- if (vma_iter_prealloc(vmi))
+ /* Note: vma iterator must be pointing to 'start' */
+ vma_iter_config(vmi, start, end);
+ if (vma_iter_prealloc(vmi, vma))
goto nomem;
vma_prepare(&vp);
vma_adjust_trans_huge(vma, start, end, 0);
- /* VMA iterator points to previous, so set to start if necessary */
- if (vma_iter_addr(vmi) != start)
- vma_iter_set(vmi, start);
-
vma->vm_start = start;
vma->vm_end = end;
vma->vm_pgoff = pgoff;
- /* Note: mas must be pointing to the expanding VMA */
vma_iter_store(vmi, vma);
vma_complete(&vp, vmi, vma->vm_mm);
@@ -733,19 +698,19 @@ int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
WARN_ON((vma->vm_start != start) && (vma->vm_end != end));
- if (vma_iter_prealloc(vmi))
+ if (vma->vm_start < start)
+ vma_iter_config(vmi, vma->vm_start, start);
+ else
+ vma_iter_config(vmi, end, vma->vm_end);
+
+ if (vma_iter_prealloc(vmi, NULL))
return -ENOMEM;
init_vma_prep(&vp, vma);
vma_prepare(&vp);
vma_adjust_trans_huge(vma, start, end, 0);
- if (vma->vm_start < start)
- vma_iter_clear(vmi, vma->vm_start, start);
-
- if (vma->vm_end > end)
- vma_iter_clear(vmi, end, vma->vm_end);
-
+ vma_iter_clear(vmi);
vma->vm_start = start;
vma->vm_end = end;
vma->vm_pgoff = pgoff;
@@ -1021,7 +986,17 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
if (err)
return NULL;
- if (vma_iter_prealloc(vmi))
+ if (vma_start < vma->vm_start || vma_end > vma->vm_end)
+ vma_expanded = true;
+
+ if (vma_expanded) {
+ vma_iter_config(vmi, vma_start, vma_end);
+ } else {
+ vma_iter_config(vmi, adjust->vm_start + adj_start,
+ adjust->vm_end);
+ }
+
+ if (vma_iter_prealloc(vmi, vma))
return NULL;
init_multi_vma_prep(&vp, vma, adjust, remove, remove2);
@@ -1030,8 +1005,6 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
vma_prepare(&vp);
vma_adjust_trans_huge(vma, vma_start, vma_end, adj_start);
- if (vma_start < vma->vm_start || vma_end > vma->vm_end)
- vma_expanded = true;
vma->vm_start = vma_start;
vma->vm_end = vma_end;
@@ -1167,21 +1140,21 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
return hint;
}
-int mlock_future_check(struct mm_struct *mm, unsigned long flags,
- unsigned long len)
+bool mlock_future_ok(struct mm_struct *mm, unsigned long flags,
+ unsigned long bytes)
{
- unsigned long locked, lock_limit;
+ unsigned long locked_pages, limit_pages;
- /* mlock MCL_FUTURE? */
- if (flags & VM_LOCKED) {
- locked = len >> PAGE_SHIFT;
- locked += mm->locked_vm;
- lock_limit = rlimit(RLIMIT_MEMLOCK);
- lock_limit >>= PAGE_SHIFT;
- if (locked > lock_limit && !capable(CAP_IPC_LOCK))
- return -EAGAIN;
- }
- return 0;
+ if (!(flags & VM_LOCKED) || capable(CAP_IPC_LOCK))
+ return true;
+
+ locked_pages = bytes >> PAGE_SHIFT;
+ locked_pages += mm->locked_vm;
+
+ limit_pages = rlimit(RLIMIT_MEMLOCK);
+ limit_pages >>= PAGE_SHIFT;
+
+ return locked_pages <= limit_pages;
}
static inline u64 file_mmap_size_max(struct file *file, struct inode *inode)
@@ -1293,7 +1266,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
if (!can_do_mlock())
return -EPERM;
- if (mlock_future_check(mm, vm_flags, len))
+ if (!mlock_future_ok(mm, vm_flags, len))
return -EAGAIN;
if (file) {
@@ -1475,6 +1448,48 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
}
#endif /* __ARCH_WANT_SYS_OLD_MMAP */
+static bool vm_ops_needs_writenotify(const struct vm_operations_struct *vm_ops)
+{
+ return vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite);
+}
+
+static bool vma_is_shared_writable(struct vm_area_struct *vma)
+{
+ return (vma->vm_flags & (VM_WRITE | VM_SHARED)) ==
+ (VM_WRITE | VM_SHARED);
+}
+
+static bool vma_fs_can_writeback(struct vm_area_struct *vma)
+{
+ /* No managed pages to writeback. */
+ if (vma->vm_flags & VM_PFNMAP)
+ return false;
+
+ return vma->vm_file && vma->vm_file->f_mapping &&
+ mapping_can_writeback(vma->vm_file->f_mapping);
+}
+
+/*
+ * Does this VMA require the underlying folios to have their dirty state
+ * tracked?
+ */
+bool vma_needs_dirty_tracking(struct vm_area_struct *vma)
+{
+ /* Only shared, writable VMAs require dirty tracking. */
+ if (!vma_is_shared_writable(vma))
+ return false;
+
+ /* Does the filesystem need to be notified? */
+ if (vm_ops_needs_writenotify(vma->vm_ops))
+ return true;
+
+ /*
+ * Even if the filesystem doesn't indicate a need for writenotify, if it
+ * can writeback, dirty tracking is still required.
+ */
+ return vma_fs_can_writeback(vma);
+}
+
/*
* Some shared mappings will want the pages marked read-only
* to track write events. If so, we'll downgrade vm_page_prot
@@ -1483,21 +1498,18 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
*/
int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
{
- vm_flags_t vm_flags = vma->vm_flags;
- const struct vm_operations_struct *vm_ops = vma->vm_ops;
-
/* If it was private or non-writable, the write bit is already clear */
- if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
+ if (!vma_is_shared_writable(vma))
return 0;
/* The backer wishes to know when pages are first written to? */
- if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
+ if (vm_ops_needs_writenotify(vma->vm_ops))
return 1;
/* The open routine did something to the protections that pgprot_modify
* won't preserve? */
if (pgprot_val(vm_page_prot) !=
- pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags)))
+ pgprot_val(vm_pgprot_modify(vm_page_prot, vma->vm_flags)))
return 0;
/*
@@ -1511,13 +1523,8 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
if (userfaultfd_wp(vma))
return 1;
- /* Specialty mapping? */
- if (vm_flags & VM_PFNMAP)
- return 0;
-
/* Can the mapping track the dirty pages? */
- return vma->vm_file && vma->vm_file->f_mapping &&
- mapping_can_writeback(vma->vm_file->f_mapping);
+ return vma_fs_can_writeback(vma);
}
/*
@@ -1911,7 +1918,7 @@ static int acct_stack_growth(struct vm_area_struct *vma,
return -ENOMEM;
/* mlock limit tests */
- if (mlock_future_check(mm, vma->vm_flags, grow << PAGE_SHIFT))
+ if (!mlock_future_ok(mm, vma->vm_flags, grow << PAGE_SHIFT))
return -ENOMEM;
/* Check to ensure the stack will not grow into a hugetlb-only region */
@@ -1941,7 +1948,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
struct vm_area_struct *next;
unsigned long gap_addr;
int error = 0;
- MA_STATE(mas, &mm->mm_mt, 0, 0);
+ MA_STATE(mas, &mm->mm_mt, vma->vm_start, address);
if (!(vma->vm_flags & VM_GROWSUP))
return -EFAULT;
@@ -1966,7 +1973,11 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
/* Check that both stack segments have the same anon_vma? */
}
- if (mas_preallocate(&mas, GFP_KERNEL))
+ if (next)
+ mas_prev_range(&mas, address);
+
+ __mas_set_range(&mas, vma->vm_start, address - 1);
+ if (mas_preallocate(&mas, vma, GFP_KERNEL))
return -ENOMEM;
/* We must make sure the anon_vma is allocated. */
@@ -2009,7 +2020,6 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
anon_vma_interval_tree_pre_update_vma(vma);
vma->vm_end = address;
/* Overwrite old entry in mtree. */
- mas_set_range(&mas, vma->vm_start, address - 1);
mas_store_prealloc(&mas, vma);
anon_vma_interval_tree_post_update_vma(vma);
spin_unlock(&mm->page_table_lock);
@@ -2048,7 +2058,11 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
return -ENOMEM;
}
- if (mas_preallocate(&mas, GFP_KERNEL))
+ if (prev)
+ mas_next_range(&mas, vma->vm_start);
+
+ __mas_set_range(&mas, address, vma->vm_end - 1);
+ if (mas_preallocate(&mas, vma, GFP_KERNEL))
return -ENOMEM;
/* We must make sure the anon_vma is allocated. */
@@ -2092,7 +2106,6 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
vma->vm_start = address;
vma->vm_pgoff -= grow;
/* Overwrite old entry in mtree. */
- mas_set_range(&mas, address, vma->vm_end - 1);
mas_store_prealloc(&mas, vma);
anon_vma_interval_tree_post_update_vma(vma);
spin_unlock(&mm->page_table_lock);
@@ -2205,18 +2218,20 @@ static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas)
*
* Called with the mm semaphore held.
*/
-static void unmap_region(struct mm_struct *mm, struct maple_tree *mt,
+static void unmap_region(struct mm_struct *mm, struct ma_state *mas,
struct vm_area_struct *vma, struct vm_area_struct *prev,
- struct vm_area_struct *next,
- unsigned long start, unsigned long end, bool mm_wr_locked)
+ struct vm_area_struct *next, unsigned long start,
+ unsigned long end, unsigned long tree_end, bool mm_wr_locked)
{
struct mmu_gather tlb;
+ unsigned long mt_start = mas->index;
lru_add_drain();
tlb_gather_mmu(&tlb, mm);
update_hiwater_rss(mm);
- unmap_vmas(&tlb, mt, vma, start, end, mm_wr_locked);
- free_pgtables(&tlb, mt, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
+ unmap_vmas(&tlb, mas, vma, start, end, tree_end, mm_wr_locked);
+ mas_set(mas, mt_start);
+ free_pgtables(&tlb, mas, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
next ? next->vm_start : USER_PGTABLES_CEILING,
mm_wr_locked);
tlb_finish_mmu(&tlb);
@@ -2234,7 +2249,7 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
struct vm_area_struct *new;
int err;
- validate_mm_mt(vma->vm_mm);
+ validate_mm(vma->vm_mm);
WARN_ON(vma->vm_start >= addr);
WARN_ON(vma->vm_end <= addr);
@@ -2249,10 +2264,6 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
if (!new)
return -ENOMEM;
- err = -ENOMEM;
- if (vma_iter_prealloc(vmi))
- goto out_free_vma;
-
if (new_below) {
new->vm_end = addr;
} else {
@@ -2260,6 +2271,11 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
}
+ err = -ENOMEM;
+ vma_iter_config(vmi, new->vm_start, new->vm_end);
+ if (vma_iter_prealloc(vmi, new))
+ goto out_free_vma;
+
err = vma_dup_policy(vma, new);
if (err)
goto out_free_vmi;
@@ -2292,7 +2308,7 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
/* Success. */
if (new_below)
vma_next(vmi);
- validate_mm_mt(vma->vm_mm);
+ validate_mm(vma->vm_mm);
return 0;
out_free_mpol:
@@ -2301,7 +2317,7 @@ out_free_vmi:
vma_iter_free(vmi);
out_free_vma:
vm_area_free(new);
- validate_mm_mt(vma->vm_mm);
+ validate_mm(vma->vm_mm);
return err;
}
@@ -2318,21 +2334,6 @@ int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
return __split_vma(vmi, vma, addr, new_below);
}
-static inline int munmap_sidetree(struct vm_area_struct *vma,
- struct ma_state *mas_detach)
-{
- vma_start_write(vma);
- mas_set_range(mas_detach, vma->vm_start, vma->vm_end - 1);
- if (mas_store_gfp(mas_detach, vma, GFP_KERNEL))
- return -ENOMEM;
-
- vma_mark_detached(vma, true);
- if (vma->vm_flags & VM_LOCKED)
- vma->vm_mm->locked_vm -= vma_pages(vma);
-
- return 0;
-}
-
/*
* do_vmi_align_munmap() - munmap the aligned region from @start to @end.
* @vmi: The vma iterator
@@ -2354,6 +2355,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
struct maple_tree mt_detach;
int count = 0;
int error = -ENOMEM;
+ unsigned long locked_vm = 0;
MA_STATE(mas_detach, &mt_detach, 0, 0);
mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
mt_set_external_lock(&mt_detach, &mm->mmap_lock);
@@ -2377,81 +2379,85 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
goto map_count_exceeded;
- error = __split_vma(vmi, vma, start, 0);
+ error = __split_vma(vmi, vma, start, 1);
if (error)
goto start_split_failed;
-
- vma = vma_iter_load(vmi);
}
- prev = vma_prev(vmi);
- if (unlikely((!prev)))
- vma_iter_set(vmi, start);
-
/*
* Detach a range of VMAs from the mm. Using next as a temp variable as
* it is always overwritten.
*/
- for_each_vma_range(*vmi, next, end) {
+ next = vma;
+ do {
/* Does it split the end? */
if (next->vm_end > end) {
error = __split_vma(vmi, next, end, 0);
if (error)
goto end_split_failed;
}
- error = munmap_sidetree(next, &mas_detach);
- if (error)
- goto munmap_sidetree_failed;
+ vma_start_write(next);
+ if (mas_store_gfp(&mas_detach, next, GFP_KERNEL))
+ goto munmap_gather_failed;
+ vma_mark_detached(next, true);
+ if (next->vm_flags & VM_LOCKED)
+ locked_vm += vma_pages(next);
count++;
+ if (unlikely(uf)) {
+ /*
+ * If userfaultfd_unmap_prep returns an error the vmas
+ * will remain split, but userland will get a
+ * highly unexpected error anyway. This is no
+ * different than the case where the first of the two
+ * __split_vma fails, but we don't undo the first
+ * split, despite we could. This is unlikely enough
+ * failure that it's not worth optimizing it for.
+ */
+ error = userfaultfd_unmap_prep(next, start, end, uf);
+
+ if (error)
+ goto userfaultfd_error;
+ }
#ifdef CONFIG_DEBUG_VM_MAPLE_TREE
BUG_ON(next->vm_start < start);
BUG_ON(next->vm_start > end);
#endif
- }
-
- next = vma_next(vmi);
- if (unlikely(uf)) {
- /*
- * If userfaultfd_unmap_prep returns an error the vmas
- * will remain split, but userland will get a
- * highly unexpected error anyway. This is no
- * different than the case where the first of the two
- * __split_vma fails, but we don't undo the first
- * split, despite we could. This is unlikely enough
- * failure that it's not worth optimizing it for.
- */
- error = userfaultfd_unmap_prep(mm, start, end, uf);
-
- if (error)
- goto userfaultfd_error;
- }
+ } for_each_vma_range(*vmi, next, end);
#if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
/* Make sure no VMAs are about to be lost. */
{
- MA_STATE(test, &mt_detach, start, end - 1);
+ MA_STATE(test, &mt_detach, 0, 0);
struct vm_area_struct *vma_mas, *vma_test;
int test_count = 0;
vma_iter_set(vmi, start);
rcu_read_lock();
- vma_test = mas_find(&test, end - 1);
+ vma_test = mas_find(&test, count - 1);
for_each_vma_range(*vmi, vma_mas, end) {
BUG_ON(vma_mas != vma_test);
test_count++;
- vma_test = mas_next(&test, end - 1);
+ vma_test = mas_next(&test, count - 1);
}
rcu_read_unlock();
BUG_ON(count != test_count);
}
#endif
- /* Point of no return */
- vma_iter_set(vmi, start);
+ error = -ENOMEM;
+ while (vma_iter_addr(vmi) > start)
+ vma_iter_prev_range(vmi);
+
if (vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL))
- return -ENOMEM;
+ goto clear_tree_failed;
+ mm->locked_vm -= locked_vm;
mm->map_count -= count;
+ prev = vma_iter_prev_range(vmi);
+ next = vma_next(vmi);
+ if (next)
+ vma_iter_prev_range(vmi);
+
/*
* Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or
* VM_GROWSUP VMA. Such VMAs can change their size under
@@ -2470,9 +2476,11 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
* We can free page tables without write-locking mmap_lock because VMAs
* were isolated before we downgraded mmap_lock.
*/
- unmap_region(mm, &mt_detach, vma, prev, next, start, end, !downgrade);
+ mas_set(&mas_detach, 1);
+ unmap_region(mm, &mas_detach, vma, prev, next, start, end, count,
+ !downgrade);
/* Statistics and freeing VMAs */
- mas_set(&mas_detach, start);
+ mas_set(&mas_detach, 0);
remove_mt(mm, &mas_detach);
__mt_destroy(&mt_detach);
@@ -2480,9 +2488,14 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
validate_mm(mm);
return downgrade ? 1 : 0;
+clear_tree_failed:
userfaultfd_error:
-munmap_sidetree_failed:
+munmap_gather_failed:
end_split_failed:
+ mas_set(&mas_detach, 0);
+ mas_for_each(&mas_detach, next, end)
+ vma_mark_detached(next, false);
+
__mt_destroy(&mt_detach);
start_split_failed:
map_count_exceeded:
@@ -2590,8 +2603,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
next = vma_next(&vmi);
prev = vma_prev(&vmi);
- if (vm_flags & VM_SPECIAL)
+ if (vm_flags & VM_SPECIAL) {
+ if (prev)
+ vma_iter_next_range(&vmi);
+
goto cannot_expand;
+ }
/* Attempt to expand an old mapping */
/* Check next */
@@ -2601,6 +2618,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
merge_end = next->vm_end;
vma = next;
vm_pgoff = next->vm_pgoff - pglen;
+
}
/* Check prev */
@@ -2612,9 +2630,10 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
merge_start = prev->vm_start;
vma = prev;
vm_pgoff = prev->vm_pgoff;
+ } else if (prev) {
+ vma_iter_next_range(&vmi);
}
-
/* Actually expand, if possible */
if (vma &&
!vma_expand(&vmi, vma, merge_start, merge_end, vm_pgoff, next)) {
@@ -2634,7 +2653,7 @@ cannot_expand:
goto unacct_error;
}
- vma_iter_set(&vmi, addr);
+ vma_iter_config(&vmi, addr, end);
vma->vm_start = addr;
vma->vm_end = end;
vm_flags_init(vma, vm_flags);
@@ -2661,7 +2680,7 @@ cannot_expand:
if (WARN_ON((addr != vma->vm_start)))
goto close_and_free_vma;
- vma_iter_set(&vmi, addr);
+ vma_iter_config(&vmi, addr, end);
/*
* If vm_flags changed after call_mmap(), we should try merge
* vma again as we may succeed this time.
@@ -2708,7 +2727,7 @@ cannot_expand:
goto close_and_free_vma;
error = -ENOMEM;
- if (vma_iter_prealloc(&vmi))
+ if (vma_iter_prealloc(&vmi, vma))
goto close_and_free_vma;
if (vma->vm_file)
@@ -2777,9 +2796,10 @@ unmap_and_free_vma:
fput(vma->vm_file);
vma->vm_file = NULL;
+ vma_iter_set(&vmi, vma->vm_end);
/* Undo any partial mapping done by a device driver. */
- unmap_region(mm, &mm->mm_mt, vma, prev, next, vma->vm_start,
- vma->vm_end, true);
+ unmap_region(mm, &vmi.mas, vma, prev, next, vma->vm_start,
+ vma->vm_end, vma->vm_end, true);
}
if (file && (vm_flags & VM_SHARED))
mapping_unmap_writable(file->f_mapping);
@@ -2936,7 +2956,7 @@ int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
arch_unmap(mm, start, end);
ret = do_vmi_align_munmap(vmi, vma, mm, start, end, uf, downgrade);
- validate_mm_mt(mm);
+ validate_mm(mm);
return ret;
}
@@ -2958,7 +2978,7 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
struct mm_struct *mm = current->mm;
struct vma_prepare vp;
- validate_mm_mt(mm);
+ validate_mm(mm);
/*
* Check against address space limits by the changed size
* Note: This happens *after* clearing old mappings in some code paths.
@@ -2980,7 +3000,8 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
if (vma && vma->vm_end == addr && !vma_policy(vma) &&
can_vma_merge_after(vma, flags, NULL, NULL,
addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL)) {
- if (vma_iter_prealloc(vmi))
+ vma_iter_config(vmi, vma->vm_start, addr + len);
+ if (vma_iter_prealloc(vmi, vma))
goto unacct_fail;
init_vma_prep(&vp, vma);
@@ -2995,6 +3016,8 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
goto out;
}
+ if (vma)
+ vma_iter_next_range(vmi);
/* create a vma struct for an anonymous mapping */
vma = vm_area_alloc(mm);
if (!vma)
@@ -3108,7 +3131,7 @@ void exit_mmap(struct mm_struct *mm)
tlb_gather_mmu_fullmm(&tlb, mm);
/* update_hiwater_rss(mm) here? but nobody should be looking */
/* Use ULONG_MAX here to ensure all VMAs in the mm are unmapped */
- unmap_vmas(&tlb, &mm->mm_mt, vma, 0, ULONG_MAX, false);
+ unmap_vmas(&tlb, &mas, vma, 0, ULONG_MAX, ULONG_MAX, false);
mmap_read_unlock(mm);
/*
@@ -3118,7 +3141,8 @@ void exit_mmap(struct mm_struct *mm)
set_bit(MMF_OOM_SKIP, &mm->flags);
mmap_write_lock(mm);
mt_clear_in_rcu(&mm->mm_mt);
- free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS,
+ mas_set(&mas, vma->vm_end);
+ free_pgtables(&tlb, &mas, vma, FIRST_USER_ADDRESS,
USER_PGTABLES_CEILING, true);
tlb_finish_mmu(&tlb);
@@ -3127,6 +3151,7 @@ void exit_mmap(struct mm_struct *mm)
* enabled, without holding any MM locks besides the unreachable
* mmap_write_lock.
*/
+ mas_set(&mas, vma->vm_end);
do {
if (vma->vm_flags & VM_ACCOUNT)
nr_accounted += vma_pages(vma);
@@ -3199,7 +3224,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
bool faulted_in_anon_vma = true;
VMA_ITERATOR(vmi, mm, addr);
- validate_mm_mt(mm);
+ validate_mm(mm);
/*
* If anonymous vma has not yet been faulted, update new pgoff
* to match new location, to increase its chance of merging.
@@ -3258,7 +3283,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
goto out_vma_link;
*need_rmap_locks = false;
}
- validate_mm_mt(mm);
+ validate_mm(mm);
return new_vma;
out_vma_link:
@@ -3274,7 +3299,7 @@ out_free_mempol:
out_free_vma:
vm_area_free(new_vma);
out:
- validate_mm_mt(mm);
+ validate_mm(mm);
return NULL;
}
@@ -3411,7 +3436,7 @@ static struct vm_area_struct *__install_special_mapping(
int ret;
struct vm_area_struct *vma;
- validate_mm_mt(mm);
+ validate_mm(mm);
vma = vm_area_alloc(mm);
if (unlikely(vma == NULL))
return ERR_PTR(-ENOMEM);
@@ -3434,12 +3459,12 @@ static struct vm_area_struct *__install_special_mapping(
perf_event_mmap(vma);
- validate_mm_mt(mm);
+ validate_mm(mm);
return vma;
out:
vm_area_free(vma);
- validate_mm_mt(mm);
+ validate_mm(mm);
return ERR_PTR(ret);
}