From d72dc8a25afc71ce90ee92bdd77550e9beb85d4d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 6 Sep 2017 16:21:18 -0700 Subject: mm: make pagevec_lookup() update index Make pagevec_lookup() (and underlying find_get_pages()) update index to the next page where iteration should continue. Most callers want this and also pagevec_lookup_tag() already does this. Link: http://lkml.kernel.org/r/20170726114704.7626-3-jack@suse.cz Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'fs/hugetlbfs/inode.c') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 28d2753be094..b9678ce91e25 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -401,7 +401,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, const pgoff_t end = lend >> huge_page_shift(h); struct vm_area_struct pseudo_vma; struct pagevec pvec; - pgoff_t next; + pgoff_t next, index; int i, freed = 0; long lookup_nr = PAGEVEC_SIZE; bool truncate_op = (lend == LLONG_MAX); @@ -420,7 +420,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, /* * When no more pages are found, we are done. */ - if (!pagevec_lookup(&pvec, mapping, next, lookup_nr)) + if (!pagevec_lookup(&pvec, mapping, &next, lookup_nr)) break; for (i = 0; i < pagevec_count(&pvec); ++i) { @@ -432,13 +432,13 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, * only possible in the punch hole case as end is * max page offset in the truncate case. */ - next = page->index; - if (next >= end) + index = page->index; + if (index >= end) break; hash = hugetlb_fault_mutex_hash(h, current->mm, &pseudo_vma, - mapping, next, 0); + mapping, index, 0); mutex_lock(&hugetlb_fault_mutex_table[hash]); /* @@ -455,8 +455,8 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, i_mmap_lock_write(mapping); hugetlb_vmdelete_list(&mapping->i_mmap, - next * pages_per_huge_page(h), - (next + 1) * pages_per_huge_page(h)); + index * pages_per_huge_page(h), + (index + 1) * pages_per_huge_page(h)); i_mmap_unlock_write(mapping); } @@ -475,14 +475,13 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, freed++; if (!truncate_op) { if (unlikely(hugetlb_unreserve_pages(inode, - next, next + 1, 1))) + index, index + 1, 1))) hugetlb_fix_reserve_counts(inode); } unlock_page(page); mutex_unlock(&hugetlb_fault_mutex_table[hash]); } - ++next; huge_pagevec_release(&pvec); cond_resched(); } -- cgit v1.2.3 From 48f2301c07b299f5bcd64563d90215a8ba73e154 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 6 Sep 2017 16:21:33 -0700 Subject: hugetlbfs: use pagevec_lookup_range() in remove_inode_hugepages() We want only pages from given range in remove_inode_hugepages(). Use pagevec_lookup_range() instead of pagevec_lookup(). Link: http://lkml.kernel.org/r/20170726114704.7626-8-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Mike Kravetz Cc: Nadia Yvette Chambers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'fs/hugetlbfs/inode.c') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index b9678ce91e25..8931236f3ef4 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -403,7 +403,6 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, struct pagevec pvec; pgoff_t next, index; int i, freed = 0; - long lookup_nr = PAGEVEC_SIZE; bool truncate_op = (lend == LLONG_MAX); memset(&pseudo_vma, 0, sizeof(struct vm_area_struct)); @@ -411,31 +410,18 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, pagevec_init(&pvec, 0); next = start; while (next < end) { - /* - * Don't grab more pages than the number left in the range. - */ - if (end - next < lookup_nr) - lookup_nr = end - next; - /* * When no more pages are found, we are done. */ - if (!pagevec_lookup(&pvec, mapping, &next, lookup_nr)) + if (!pagevec_lookup_range(&pvec, mapping, &next, end - 1, + PAGEVEC_SIZE)) break; for (i = 0; i < pagevec_count(&pvec); ++i) { struct page *page = pvec.pages[i]; u32 hash; - /* - * The page (index) could be beyond end. This is - * only possible in the punch hole case as end is - * max page offset in the truncate case. - */ index = page->index; - if (index >= end) - break; - hash = hugetlb_fault_mutex_hash(h, current->mm, &pseudo_vma, mapping, index, 0); -- cgit v1.2.3 From 397162ffa2ed1cadffe05c324c6ddc53647f9c62 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 6 Sep 2017 16:21:43 -0700 Subject: mm: remove nr_pages argument from pagevec_lookup{,_range}() All users of pagevec_lookup() and pagevec_lookup_range() now pass PAGEVEC_SIZE as a desired number of pages. Just drop the argument. Link: http://lkml.kernel.org/r/20170726114704.7626-11-jack@suse.cz Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 5 ++--- fs/ext4/file.c | 2 +- fs/ext4/inode.c | 5 ++--- fs/fscache/page.c | 2 +- fs/hugetlbfs/inode.c | 3 +-- fs/nilfs2/page.c | 2 +- include/linux/pagevec.h | 7 +++---- mm/swap.c | 5 ++--- 8 files changed, 13 insertions(+), 18 deletions(-) (limited to 'fs/hugetlbfs/inode.c') diff --git a/fs/buffer.c b/fs/buffer.c index 8770b58ca569..50da0e102ca0 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1633,8 +1633,7 @@ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len) end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits); pagevec_init(&pvec, 0); - while (pagevec_lookup_range(&pvec, bd_mapping, &index, end, - PAGEVEC_SIZE)) { + while (pagevec_lookup_range(&pvec, bd_mapping, &index, end)) { count = pagevec_count(&pvec); for (i = 0; i < count; i++) { struct page *page = pvec.pages[i]; @@ -3552,7 +3551,7 @@ page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length, unsigned nr_pages, i; nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping, &index, - end - 1, PAGEVEC_SIZE); + end - 1); if (nr_pages == 0) break; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 726344809721..484c1326dd6a 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -468,7 +468,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, unsigned long nr_pages; nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping, - &index, end, PAGEVEC_SIZE); + &index, end); if (nr_pages == 0) break; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6103ce0430df..ce68a3483666 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1720,8 +1720,7 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd, pagevec_init(&pvec, 0); while (index <= end) { - nr_pages = pagevec_lookup_range(&pvec, mapping, &index, end, - PAGEVEC_SIZE); + nr_pages = pagevec_lookup_range(&pvec, mapping, &index, end); if (nr_pages == 0) break; for (i = 0; i < nr_pages; i++) { @@ -2348,7 +2347,7 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) pagevec_init(&pvec, 0); while (start <= end) { nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping, - &start, end, PAGEVEC_SIZE); + &start, end); if (nr_pages == 0) break; for (i = 0; i < nr_pages; i++) { diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 83018861dcd2..0ad3fd3ad0b4 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -1178,7 +1178,7 @@ void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, pagevec_init(&pvec, 0); next = 0; do { - if (!pagevec_lookup(&pvec, mapping, &next, PAGEVEC_SIZE)) + if (!pagevec_lookup(&pvec, mapping, &next)) break; for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8931236f3ef4..7c02b3f738e1 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -413,8 +413,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, /* * When no more pages are found, we are done. */ - if (!pagevec_lookup_range(&pvec, mapping, &next, end - 1, - PAGEVEC_SIZE)) + if (!pagevec_lookup_range(&pvec, mapping, &next, end - 1)) break; for (i = 0; i < pagevec_count(&pvec); ++i) { diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 382a36c72d72..8616c46d33da 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -312,7 +312,7 @@ void nilfs_copy_back_pages(struct address_space *dmap, pagevec_init(&pvec, 0); repeat: - n = pagevec_lookup(&pvec, smap, &index, PAGEVEC_SIZE); + n = pagevec_lookup(&pvec, smap, &index); if (!n) return; diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 7df056910437..4dcd5506f1ed 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -29,13 +29,12 @@ unsigned pagevec_lookup_entries(struct pagevec *pvec, void pagevec_remove_exceptionals(struct pagevec *pvec); unsigned pagevec_lookup_range(struct pagevec *pvec, struct address_space *mapping, - pgoff_t *start, pgoff_t end, unsigned nr_pages); + pgoff_t *start, pgoff_t end); static inline unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, - pgoff_t *start, unsigned nr_pages) + pgoff_t *start) { - return pagevec_lookup_range(pvec, mapping, start, (pgoff_t)-1, - nr_pages); + return pagevec_lookup_range(pvec, mapping, start, (pgoff_t)-1); } unsigned pagevec_lookup_tag(struct pagevec *pvec, diff --git a/mm/swap.c b/mm/swap.c index e06e9aa2478e..62d96b8e5eb3 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -967,10 +967,9 @@ void pagevec_remove_exceptionals(struct pagevec *pvec) * reached. */ unsigned pagevec_lookup_range(struct pagevec *pvec, - struct address_space *mapping, pgoff_t *start, pgoff_t end, - unsigned nr_pages) + struct address_space *mapping, pgoff_t *start, pgoff_t end) { - pvec->nr = find_get_pages_range(mapping, start, end, nr_pages, + pvec->nr = find_get_pages_range(mapping, start, end, PAGEVEC_SIZE, pvec->pages); return pagevec_count(pvec); } -- cgit v1.2.3 From 2916ecc0f9d435d849c98f4da50e453124c87531 Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Fri, 8 Sep 2017 16:12:06 -0700 Subject: mm/migrate: new migrate mode MIGRATE_SYNC_NO_COPY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a new migration mode that allow to offload the copy to a device DMA engine. This changes the workflow of migration and not all address_space migratepage callback can support this. This is intended to be use by migrate_vma() which itself is use for thing like HMM (see include/linux/hmm.h). No additional per-filesystem migratepage testing is needed. I disables MIGRATE_SYNC_NO_COPY in all problematic migratepage() callback and i added comment in those to explain why (part of this patch). The commit message is unclear it should say that any callback that wish to support this new mode need to be aware of the difference in the migration flow from other mode. Some of these callbacks do extra locking while copying (aio, zsmalloc, balloon, ...) and for DMA to be effective you want to copy multiple pages in one DMA operations. But in the problematic case you can not easily hold the extra lock accross multiple call to this callback. Usual flow is: For each page { 1 - lock page 2 - call migratepage() callback 3 - (extra locking in some migratepage() callback) 4 - migrate page state (freeze refcount, update page cache, buffer head, ...) 5 - copy page 6 - (unlock any extra lock of migratepage() callback) 7 - return from migratepage() callback 8 - unlock page } The new mode MIGRATE_SYNC_NO_COPY: 1 - lock multiple pages For each page { 2 - call migratepage() callback 3 - abort in all problematic migratepage() callback 4 - migrate page state (freeze refcount, update page cache, buffer head, ...) } // finished all calls to migratepage() callback 5 - DMA copy multiple pages 6 - unlock all the pages To support MIGRATE_SYNC_NO_COPY in the problematic case we would need a new callback migratepages() (for instance) that deals with multiple pages in one transaction. Because the problematic cases are not important for current usage I did not wanted to complexify this patchset even more for no good reason. Link: http://lkml.kernel.org/r/20170817000548.32038-14-jglisse@redhat.com Signed-off-by: Jérôme Glisse Cc: Aneesh Kumar Cc: Balbir Singh Cc: Benjamin Herrenschmidt Cc: Dan Williams Cc: David Nellans Cc: Evgeny Baskakov Cc: Johannes Weiner Cc: John Hubbard Cc: Kirill A. Shutemov Cc: Mark Hairgrove Cc: Michal Hocko Cc: Paul E. McKenney Cc: Ross Zwisler Cc: Sherry Cheung Cc: Subhash Gutti Cc: Vladimir Davydov Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 8 +++++++ fs/f2fs/data.c | 5 ++++- fs/hugetlbfs/inode.c | 5 ++++- fs/ubifs/file.c | 5 ++++- include/linux/migrate.h | 5 +++++ include/linux/migrate_mode.h | 5 +++++ mm/balloon_compaction.c | 8 +++++++ mm/migrate.c | 52 ++++++++++++++++++++++++++++++++++---------- mm/zsmalloc.c | 8 +++++++ 9 files changed, 86 insertions(+), 15 deletions(-) (limited to 'fs/hugetlbfs/inode.c') diff --git a/fs/aio.c b/fs/aio.c index 8f0127526299..b5d69f28d8b1 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -373,6 +373,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, pgoff_t idx; int rc; + /* + * We cannot support the _NO_COPY case here, because copy needs to + * happen under the ctx->completion_lock. That does not work with the + * migration workflow of MIGRATE_SYNC_NO_COPY. + */ + if (mode == MIGRATE_SYNC_NO_COPY) + return -EINVAL; + rc = 0; /* mapping->private_lock here protects against the kioctx teardown. */ diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a791aac4c5af..fb96bb71da00 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2253,7 +2253,10 @@ int f2fs_migrate_page(struct address_space *mapping, SetPagePrivate(newpage); set_page_private(newpage, page_private(page)); - migrate_page_copy(newpage, page); + if (mode != MIGRATE_SYNC_NO_COPY) + migrate_page_copy(newpage, page); + else + migrate_page_states(newpage, page); return MIGRATEPAGE_SUCCESS; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 7c02b3f738e1..8c6f4b8f910f 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -830,7 +830,10 @@ static int hugetlbfs_migrate_page(struct address_space *mapping, rc = migrate_huge_page_move_mapping(mapping, newpage, page); if (rc != MIGRATEPAGE_SUCCESS) return rc; - migrate_page_copy(newpage, page); + if (mode != MIGRATE_SYNC_NO_COPY) + migrate_page_copy(newpage, page); + else + migrate_page_states(newpage, page); return MIGRATEPAGE_SUCCESS; } diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index f90a466ea5db..a02aa59d1e24 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1490,7 +1490,10 @@ static int ubifs_migrate_page(struct address_space *mapping, SetPagePrivate(newpage); } - migrate_page_copy(newpage, page); + if (mode != MIGRATE_SYNC_NO_COPY) + migrate_page_copy(newpage, page); + else + migrate_page_states(newpage, page); return MIGRATEPAGE_SUCCESS; } #endif diff --git a/include/linux/migrate.h b/include/linux/migrate.h index ce15989521a1..7db4c812a2a6 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -72,6 +72,7 @@ extern void putback_movable_page(struct page *page); extern int migrate_prep(void); extern int migrate_prep_local(void); +extern void migrate_page_states(struct page *newpage, struct page *page); extern void migrate_page_copy(struct page *newpage, struct page *page); extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); @@ -92,6 +93,10 @@ static inline int isolate_movable_page(struct page *page, isolate_mode_t mode) static inline int migrate_prep(void) { return -ENOSYS; } static inline int migrate_prep_local(void) { return -ENOSYS; } +static inline void migrate_page_states(struct page *newpage, struct page *page) +{ +} + static inline void migrate_page_copy(struct page *newpage, struct page *page) {} diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h index ebf3d89a3919..bdf66af9b937 100644 --- a/include/linux/migrate_mode.h +++ b/include/linux/migrate_mode.h @@ -6,11 +6,16 @@ * on most operations but not ->writepage as the potential stall time * is too significant * MIGRATE_SYNC will block when migrating pages + * MIGRATE_SYNC_NO_COPY will block when migrating pages but will not copy pages + * with the CPU. Instead, page copy happens outside the migratepage() + * callback and is likely using a DMA engine. See migrate_vma() and HMM + * (mm/hmm.c) for users of this mode. */ enum migrate_mode { MIGRATE_ASYNC, MIGRATE_SYNC_LIGHT, MIGRATE_SYNC, + MIGRATE_SYNC_NO_COPY, }; #endif /* MIGRATE_MODE_H_INCLUDED */ diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index b06d9fe23a28..68d28924ba79 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -139,6 +139,14 @@ int balloon_page_migrate(struct address_space *mapping, { struct balloon_dev_info *balloon = balloon_page_device(page); + /* + * We can not easily support the no copy case here so ignore it as it + * is unlikely to be use with ballon pages. See include/linux/hmm.h for + * user of the MIGRATE_SYNC_NO_COPY mode. + */ + if (mode == MIGRATE_SYNC_NO_COPY) + return -EINVAL; + VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); diff --git a/mm/migrate.c b/mm/migrate.c index 1088cef6ef8b..71de36cfb673 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -634,15 +634,10 @@ static void copy_huge_page(struct page *dst, struct page *src) /* * Copy the page to its new location */ -void migrate_page_copy(struct page *newpage, struct page *page) +void migrate_page_states(struct page *newpage, struct page *page) { int cpupid; - if (PageHuge(page) || PageTransHuge(page)) - copy_huge_page(newpage, page); - else - copy_highpage(newpage, page); - if (PageError(page)) SetPageError(newpage); if (PageReferenced(page)) @@ -696,6 +691,17 @@ void migrate_page_copy(struct page *newpage, struct page *page) mem_cgroup_migrate(page, newpage); } +EXPORT_SYMBOL(migrate_page_states); + +void migrate_page_copy(struct page *newpage, struct page *page) +{ + if (PageHuge(page) || PageTransHuge(page)) + copy_huge_page(newpage, page); + else + copy_highpage(newpage, page); + + migrate_page_states(newpage, page); +} EXPORT_SYMBOL(migrate_page_copy); /************************************************************ @@ -721,7 +727,10 @@ int migrate_page(struct address_space *mapping, if (rc != MIGRATEPAGE_SUCCESS) return rc; - migrate_page_copy(newpage, page); + if (mode != MIGRATE_SYNC_NO_COPY) + migrate_page_copy(newpage, page); + else + migrate_page_states(newpage, page); return MIGRATEPAGE_SUCCESS; } EXPORT_SYMBOL(migrate_page); @@ -771,12 +780,15 @@ int buffer_migrate_page(struct address_space *mapping, SetPagePrivate(newpage); - migrate_page_copy(newpage, page); + if (mode != MIGRATE_SYNC_NO_COPY) + migrate_page_copy(newpage, page); + else + migrate_page_states(newpage, page); bh = head; do { unlock_buffer(bh); - put_bh(bh); + put_bh(bh); bh = bh->b_this_page; } while (bh != head); @@ -835,8 +847,13 @@ static int fallback_migrate_page(struct address_space *mapping, { if (PageDirty(page)) { /* Only writeback pages in full synchronous migration */ - if (mode != MIGRATE_SYNC) + switch (mode) { + case MIGRATE_SYNC: + case MIGRATE_SYNC_NO_COPY: + break; + default: return -EBUSY; + } return writeout(mapping, page); } @@ -973,7 +990,11 @@ static int __unmap_and_move(struct page *page, struct page *newpage, * the retry loop is too short and in the sync-light case, * the overhead of stalling is too much */ - if (mode != MIGRATE_SYNC) { + switch (mode) { + case MIGRATE_SYNC: + case MIGRATE_SYNC_NO_COPY: + break; + default: rc = -EBUSY; goto out_unlock; } @@ -1243,8 +1264,15 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, return -ENOMEM; if (!trylock_page(hpage)) { - if (!force || mode != MIGRATE_SYNC) + if (!force) goto out; + switch (mode) { + case MIGRATE_SYNC: + case MIGRATE_SYNC_NO_COPY: + break; + default: + goto out; + } lock_page(hpage); } diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 62457eb82330..5ad75ec4151c 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1969,6 +1969,14 @@ int zs_page_migrate(struct address_space *mapping, struct page *newpage, unsigned int obj_idx; int ret = -EAGAIN; + /* + * We cannot support the _NO_COPY case here, because copy needs to + * happen under the zs lock, which does not work with + * MIGRATE_SYNC_NO_COPY workflow. + */ + if (mode == MIGRATE_SYNC_NO_COPY) + return -EINVAL; + VM_BUG_ON_PAGE(!PageMovable(page), page); VM_BUG_ON_PAGE(!PageIsolated(page), page); -- cgit v1.2.3 From f808c13fd3738948e10196496959871130612b61 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 8 Sep 2017 16:15:08 -0700 Subject: lib/interval_tree: fast overlap detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow interval trees to quickly check for overlaps to avoid unnecesary tree lookups in interval_tree_iter_first(). As of this patch, all interval tree flavors will require using a 'rb_root_cached' such that we can have the leftmost node easily available. While most users will make use of this feature, those with special functions (in addition to the generic insert, delete, search calls) will avoid using the cached option as they can do funky things with insertions -- for example, vma_interval_tree_insert_after(). [jglisse@redhat.com: fix deadlock from typo vm_lock_anon_vma()] Link: http://lkml.kernel.org/r/20170808225719.20723-1-jglisse@redhat.com Link: http://lkml.kernel.org/r/20170719014603.19029-12-dave@stgolabs.net Signed-off-by: Davidlohr Bueso Signed-off-by: Jérôme Glisse Acked-by: Christian König Acked-by: Peter Zijlstra (Intel) Acked-by: Doug Ledford Acked-by: Michael S. Tsirkin Cc: David Airlie Cc: Jason Wang Cc: Christian Benvenuti Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 8 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +- drivers/gpu/drm/drm_mm.c | 19 +++++---- drivers/gpu/drm/drm_vma_manager.c | 2 +- drivers/gpu/drm/i915/i915_gem_userptr.c | 6 +-- drivers/gpu/drm/radeon/radeon.h | 2 +- drivers/gpu/drm/radeon/radeon_mn.c | 8 ++-- drivers/gpu/drm/radeon/radeon_vm.c | 7 ++-- drivers/infiniband/core/umem_rbtree.c | 4 +- drivers/infiniband/core/uverbs_cmd.c | 2 +- drivers/infiniband/hw/hfi1/mmu_rb.c | 10 ++--- drivers/infiniband/hw/usnic/usnic_uiom.c | 6 +-- drivers/infiniband/hw/usnic/usnic_uiom.h | 2 +- .../infiniband/hw/usnic/usnic_uiom_interval_tree.c | 15 +++---- .../infiniband/hw/usnic/usnic_uiom_interval_tree.h | 12 +++--- drivers/vhost/vhost.c | 2 +- drivers/vhost/vhost.h | 2 +- fs/hugetlbfs/inode.c | 6 +-- fs/inode.c | 2 +- include/drm/drm_mm.h | 2 +- include/linux/fs.h | 4 +- include/linux/interval_tree.h | 8 ++-- include/linux/interval_tree_generic.h | 46 +++++++++++++++++----- include/linux/mm.h | 17 ++++---- include/linux/rmap.h | 4 +- include/rdma/ib_umem_odp.h | 11 ++++-- include/rdma/ib_verbs.h | 2 +- lib/interval_tree_test.c | 4 +- mm/interval_tree.c | 10 ++--- mm/memory.c | 4 +- mm/mmap.c | 10 ++--- mm/rmap.c | 4 +- 33 files changed, 145 insertions(+), 105 deletions(-) (limited to 'fs/hugetlbfs/inode.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index e1cde6b80027..3b0f2ec6eec7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -51,7 +51,7 @@ struct amdgpu_mn { /* objects protected by lock */ struct mutex lock; - struct rb_root objects; + struct rb_root_cached objects; }; struct amdgpu_mn_node { @@ -76,8 +76,8 @@ static void amdgpu_mn_destroy(struct work_struct *work) mutex_lock(&adev->mn_lock); mutex_lock(&rmn->lock); hash_del(&rmn->node); - rbtree_postorder_for_each_entry_safe(node, next_node, &rmn->objects, - it.rb) { + rbtree_postorder_for_each_entry_safe(node, next_node, + &rmn->objects.rb_root, it.rb) { list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { bo->mn = NULL; list_del_init(&bo->mn_list); @@ -221,7 +221,7 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) rmn->mm = mm; rmn->mn.ops = &amdgpu_mn_ops; mutex_init(&rmn->lock); - rmn->objects = RB_ROOT; + rmn->objects = RB_ROOT_CACHED; r = __mmu_notifier_register(&rmn->mn, mm); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6b1343e5541d..b9a5a77eedaf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2475,7 +2475,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u64 flags; uint64_t init_pde_value = 0; - vm->va = RB_ROOT; + vm->va = RB_ROOT_CACHED; vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) vm->reserved_vmid[i] = NULL; @@ -2596,10 +2596,11 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amd_sched_entity_fini(vm->entity.sched, &vm->entity); - if (!RB_EMPTY_ROOT(&vm->va)) { + if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { dev_err(adev->dev, "still active bo inside vm\n"); } - rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, rb) { + rbtree_postorder_for_each_entry_safe(mapping, tmp, + &vm->va.rb_root, rb) { list_del(&mapping->list); amdgpu_vm_it_remove(mapping, &vm->va); kfree(mapping); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index ba6691b58ee7..6716355403ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -118,7 +118,7 @@ struct amdgpu_vm_pt { struct amdgpu_vm { /* tree of virtual addresses mapped */ - struct rb_root va; + struct rb_root_cached va; /* protecting invalidated */ spinlock_t status_lock; diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index f794089d30ac..61a1c8ea74bc 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -169,7 +169,7 @@ INTERVAL_TREE_DEFINE(struct drm_mm_node, rb, struct drm_mm_node * __drm_mm_interval_first(const struct drm_mm *mm, u64 start, u64 last) { - return drm_mm_interval_tree_iter_first((struct rb_root *)&mm->interval_tree, + return drm_mm_interval_tree_iter_first((struct rb_root_cached *)&mm->interval_tree, start, last) ?: (struct drm_mm_node *)&mm->head_node; } EXPORT_SYMBOL(__drm_mm_interval_first); @@ -180,6 +180,7 @@ static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node, struct drm_mm *mm = hole_node->mm; struct rb_node **link, *rb; struct drm_mm_node *parent; + bool leftmost = true; node->__subtree_last = LAST(node); @@ -196,9 +197,10 @@ static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node, rb = &hole_node->rb; link = &hole_node->rb.rb_right; + leftmost = false; } else { rb = NULL; - link = &mm->interval_tree.rb_node; + link = &mm->interval_tree.rb_root.rb_node; } while (*link) { @@ -208,14 +210,15 @@ static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node, parent->__subtree_last = node->__subtree_last; if (node->start < parent->start) link = &parent->rb.rb_left; - else + else { link = &parent->rb.rb_right; + leftmost = true; + } } rb_link_node(&node->rb, rb, link); - rb_insert_augmented(&node->rb, - &mm->interval_tree, - &drm_mm_interval_tree_augment); + rb_insert_augmented_cached(&node->rb, &mm->interval_tree, leftmost, + &drm_mm_interval_tree_augment); } #define RB_INSERT(root, member, expr) do { \ @@ -577,7 +580,7 @@ void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new) *new = *old; list_replace(&old->node_list, &new->node_list); - rb_replace_node(&old->rb, &new->rb, &old->mm->interval_tree); + rb_replace_node(&old->rb, &new->rb, &old->mm->interval_tree.rb_root); if (drm_mm_hole_follows(old)) { list_replace(&old->hole_stack, &new->hole_stack); @@ -863,7 +866,7 @@ void drm_mm_init(struct drm_mm *mm, u64 start, u64 size) mm->color_adjust = NULL; INIT_LIST_HEAD(&mm->hole_stack); - mm->interval_tree = RB_ROOT; + mm->interval_tree = RB_ROOT_CACHED; mm->holes_size = RB_ROOT; mm->holes_addr = RB_ROOT; diff --git a/drivers/gpu/drm/drm_vma_manager.c b/drivers/gpu/drm/drm_vma_manager.c index d9100b565198..28f1226576f8 100644 --- a/drivers/gpu/drm/drm_vma_manager.c +++ b/drivers/gpu/drm/drm_vma_manager.c @@ -147,7 +147,7 @@ struct drm_vma_offset_node *drm_vma_offset_lookup_locked(struct drm_vma_offset_m struct rb_node *iter; unsigned long offset; - iter = mgr->vm_addr_space_mm.interval_tree.rb_node; + iter = mgr->vm_addr_space_mm.interval_tree.rb_root.rb_node; best = NULL; while (likely(iter)) { diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index f152a38d7079..23fd18bd1b56 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -49,7 +49,7 @@ struct i915_mmu_notifier { spinlock_t lock; struct hlist_node node; struct mmu_notifier mn; - struct rb_root objects; + struct rb_root_cached objects; struct workqueue_struct *wq; }; @@ -123,7 +123,7 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, struct interval_tree_node *it; LIST_HEAD(cancelled); - if (RB_EMPTY_ROOT(&mn->objects)) + if (RB_EMPTY_ROOT(&mn->objects.rb_root)) return; /* interval ranges are inclusive, but invalidate range is exclusive */ @@ -172,7 +172,7 @@ i915_mmu_notifier_create(struct mm_struct *mm) spin_lock_init(&mn->lock); mn->mn.ops = &i915_gem_userptr_notifier; - mn->objects = RB_ROOT; + mn->objects = RB_ROOT_CACHED; mn->wq = alloc_workqueue("i915-userptr-release", WQ_UNBOUND, 0); if (mn->wq == NULL) { kfree(mn); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index ec63bc5e9de7..8cbaeec090c9 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -924,7 +924,7 @@ struct radeon_vm_id { struct radeon_vm { struct mutex mutex; - struct rb_root va; + struct rb_root_cached va; /* protecting invalidated and freed */ spinlock_t status_lock; diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c index 896f2cf51e4e..1d62288b7ee3 100644 --- a/drivers/gpu/drm/radeon/radeon_mn.c +++ b/drivers/gpu/drm/radeon/radeon_mn.c @@ -50,7 +50,7 @@ struct radeon_mn { /* objects protected by lock */ struct mutex lock; - struct rb_root objects; + struct rb_root_cached objects; }; struct radeon_mn_node { @@ -75,8 +75,8 @@ static void radeon_mn_destroy(struct work_struct *work) mutex_lock(&rdev->mn_lock); mutex_lock(&rmn->lock); hash_del(&rmn->node); - rbtree_postorder_for_each_entry_safe(node, next_node, &rmn->objects, - it.rb) { + rbtree_postorder_for_each_entry_safe(node, next_node, + &rmn->objects.rb_root, it.rb) { interval_tree_remove(&node->it, &rmn->objects); list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { @@ -205,7 +205,7 @@ static struct radeon_mn *radeon_mn_get(struct radeon_device *rdev) rmn->mm = mm; rmn->mn.ops = &radeon_mn_ops; mutex_init(&rmn->lock); - rmn->objects = RB_ROOT; + rmn->objects = RB_ROOT_CACHED; r = __mmu_notifier_register(&rmn->mn, mm); if (r) diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 5e82b408d522..e5c0e635e371 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -1185,7 +1185,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) vm->ids[i].last_id_use = NULL; } mutex_init(&vm->mutex); - vm->va = RB_ROOT; + vm->va = RB_ROOT_CACHED; spin_lock_init(&vm->status_lock); INIT_LIST_HEAD(&vm->invalidated); INIT_LIST_HEAD(&vm->freed); @@ -1232,10 +1232,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) struct radeon_bo_va *bo_va, *tmp; int i, r; - if (!RB_EMPTY_ROOT(&vm->va)) { + if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { dev_err(rdev->dev, "still active bo inside vm\n"); } - rbtree_postorder_for_each_entry_safe(bo_va, tmp, &vm->va, it.rb) { + rbtree_postorder_for_each_entry_safe(bo_va, tmp, + &vm->va.rb_root, it.rb) { interval_tree_remove(&bo_va->it, &vm->va); r = radeon_bo_reserve(bo_va->bo, false); if (!r) { diff --git a/drivers/infiniband/core/umem_rbtree.c b/drivers/infiniband/core/umem_rbtree.c index d176597b4d78..fc801920e341 100644 --- a/drivers/infiniband/core/umem_rbtree.c +++ b/drivers/infiniband/core/umem_rbtree.c @@ -72,7 +72,7 @@ INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last, /* @last is not a part of the interval. See comment for function * node_last. */ -int rbt_ib_umem_for_each_in_range(struct rb_root *root, +int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, u64 start, u64 last, umem_call_back cb, void *cookie) @@ -95,7 +95,7 @@ int rbt_ib_umem_for_each_in_range(struct rb_root *root, } EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range); -struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root, +struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root, u64 addr, u64 length) { struct umem_odp_node *node; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index e0cb99860934..4ab30d832ac5 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -118,7 +118,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, ucontext->closing = 0; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - ucontext->umem_tree = RB_ROOT; + ucontext->umem_tree = RB_ROOT_CACHED; init_rwsem(&ucontext->umem_rwsem); ucontext->odp_mrs_count = 0; INIT_LIST_HEAD(&ucontext->no_private_counters); diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index 2f0d285dc278..175002c046ed 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -54,7 +54,7 @@ struct mmu_rb_handler { struct mmu_notifier mn; - struct rb_root root; + struct rb_root_cached root; void *ops_arg; spinlock_t lock; /* protect the RB tree */ struct mmu_rb_ops *ops; @@ -108,7 +108,7 @@ int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm, if (!handlr) return -ENOMEM; - handlr->root = RB_ROOT; + handlr->root = RB_ROOT_CACHED; handlr->ops = ops; handlr->ops_arg = ops_arg; INIT_HLIST_NODE(&handlr->mn.hlist); @@ -149,9 +149,9 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler) INIT_LIST_HEAD(&del_list); spin_lock_irqsave(&handler->lock, flags); - while ((node = rb_first(&handler->root))) { + while ((node = rb_first_cached(&handler->root))) { rbnode = rb_entry(node, struct mmu_rb_node, node); - rb_erase(node, &handler->root); + rb_erase_cached(node, &handler->root); /* move from LRU list to delete list */ list_move(&rbnode->list, &del_list); } @@ -300,7 +300,7 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, { struct mmu_rb_handler *handler = container_of(mn, struct mmu_rb_handler, mn); - struct rb_root *root = &handler->root; + struct rb_root_cached *root = &handler->root; struct mmu_rb_node *node, *ptr = NULL; unsigned long flags; bool added = false; diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index c49db7c33979..4381c0a9a873 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -227,7 +227,7 @@ static void __usnic_uiom_reg_release(struct usnic_uiom_pd *pd, vpn_last = vpn_start + npages - 1; spin_lock(&pd->lock); - usnic_uiom_remove_interval(&pd->rb_root, vpn_start, + usnic_uiom_remove_interval(&pd->root, vpn_start, vpn_last, &rm_intervals); usnic_uiom_unmap_sorted_intervals(&rm_intervals, pd); @@ -379,7 +379,7 @@ struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, err = usnic_uiom_get_intervals_diff(vpn_start, vpn_last, (writable) ? IOMMU_WRITE : 0, IOMMU_WRITE, - &pd->rb_root, + &pd->root, &sorted_diff_intervals); if (err) { usnic_err("Failed disjoint interval vpn [0x%lx,0x%lx] err %d\n", @@ -395,7 +395,7 @@ struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, } - err = usnic_uiom_insert_interval(&pd->rb_root, vpn_start, vpn_last, + err = usnic_uiom_insert_interval(&pd->root, vpn_start, vpn_last, (writable) ? IOMMU_WRITE : 0); if (err) { usnic_err("Failed insert interval vpn [0x%lx,0x%lx] err %d\n", diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h index 45ca7c1613a7..431efe4143f4 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.h +++ b/drivers/infiniband/hw/usnic/usnic_uiom.h @@ -55,7 +55,7 @@ struct usnic_uiom_dev { struct usnic_uiom_pd { struct iommu_domain *domain; spinlock_t lock; - struct rb_root rb_root; + struct rb_root_cached root; struct list_head devs; int dev_cnt; }; diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c index 42b4b4c4e452..d399523206c7 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c @@ -100,9 +100,9 @@ static int interval_cmp(void *priv, struct list_head *a, struct list_head *b) } static void -find_intervals_intersection_sorted(struct rb_root *root, unsigned long start, - unsigned long last, - struct list_head *list) +find_intervals_intersection_sorted(struct rb_root_cached *root, + unsigned long start, unsigned long last, + struct list_head *list) { struct usnic_uiom_interval_node *node; @@ -118,7 +118,7 @@ find_intervals_intersection_sorted(struct rb_root *root, unsigned long start, int usnic_uiom_get_intervals_diff(unsigned long start, unsigned long last, int flags, int flag_mask, - struct rb_root *root, + struct rb_root_cached *root, struct list_head *diff_set) { struct usnic_uiom_interval_node *interval, *tmp; @@ -175,7 +175,7 @@ void usnic_uiom_put_interval_set(struct list_head *intervals) kfree(interval); } -int usnic_uiom_insert_interval(struct rb_root *root, unsigned long start, +int usnic_uiom_insert_interval(struct rb_root_cached *root, unsigned long start, unsigned long last, int flags) { struct usnic_uiom_interval_node *interval, *tmp; @@ -246,8 +246,9 @@ err_out: return err; } -void usnic_uiom_remove_interval(struct rb_root *root, unsigned long start, - unsigned long last, struct list_head *removed) +void usnic_uiom_remove_interval(struct rb_root_cached *root, + unsigned long start, unsigned long last, + struct list_head *removed) { struct usnic_uiom_interval_node *interval; diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h index c0b0b876ab90..1d7fc3226bca 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h +++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h @@ -48,12 +48,12 @@ struct usnic_uiom_interval_node { extern void usnic_uiom_interval_tree_insert(struct usnic_uiom_interval_node *node, - struct rb_root *root); + struct rb_root_cached *root); extern void usnic_uiom_interval_tree_remove(struct usnic_uiom_interval_node *node, - struct rb_root *root); + struct rb_root_cached *root); extern struct usnic_uiom_interval_node * -usnic_uiom_interval_tree_iter_first(struct rb_root *root, +usnic_uiom_interval_tree_iter_first(struct rb_root_cached *root, unsigned long start, unsigned long last); extern struct usnic_uiom_interval_node * @@ -63,7 +63,7 @@ usnic_uiom_interval_tree_iter_next(struct usnic_uiom_interval_node *node, * Inserts {start...last} into {root}. If there are overlaps, * nodes will be broken up and merged */ -int usnic_uiom_insert_interval(struct rb_root *root, +int usnic_uiom_insert_interval(struct rb_root_cached *root, unsigned long start, unsigned long last, int flags); /* @@ -71,7 +71,7 @@ int usnic_uiom_insert_interval(struct rb_root *root, * 'removed.' The caller is responsibile for freeing memory of nodes in * 'removed.' */ -void usnic_uiom_remove_interval(struct rb_root *root, +void usnic_uiom_remove_interval(struct rb_root_cached *root, unsigned long start, unsigned long last, struct list_head *removed); /* @@ -81,7 +81,7 @@ void usnic_uiom_remove_interval(struct rb_root *root, int usnic_uiom_get_intervals_diff(unsigned long start, unsigned long last, int flags, int flag_mask, - struct rb_root *root, + struct rb_root_cached *root, struct list_head *diff_set); /* Call this to free diff_set returned by usnic_uiom_get_intervals_diff */ void usnic_uiom_put_interval_set(struct list_head *intervals); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 9cb3f722dce1..d6dbb28245e6 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1271,7 +1271,7 @@ static struct vhost_umem *vhost_umem_alloc(void) if (!umem) return NULL; - umem->umem_tree = RB_ROOT; + umem->umem_tree = RB_ROOT_CACHED; umem->numem = 0; INIT_LIST_HEAD(&umem->umem_list); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index bb7c29b8b9fc..d59a9cc65f9d 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -71,7 +71,7 @@ struct vhost_umem_node { }; struct vhost_umem { - struct rb_root umem_tree; + struct rb_root_cached umem_tree; struct list_head umem_list; int numem; }; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8c6f4b8f910f..59073e9f01a4 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -334,7 +334,7 @@ static void remove_huge_page(struct page *page) } static void -hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end) +hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end) { struct vm_area_struct *vma; @@ -498,7 +498,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) i_size_write(inode, offset); i_mmap_lock_write(mapping); - if (!RB_EMPTY_ROOT(&mapping->i_mmap)) + if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0); i_mmap_unlock_write(mapping); remove_inode_hugepages(inode, offset, LLONG_MAX); @@ -523,7 +523,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) inode_lock(inode); i_mmap_lock_write(mapping); - if (!RB_EMPTY_ROOT(&mapping->i_mmap)) + if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) hugetlb_vmdelete_list(&mapping->i_mmap, hole_start >> PAGE_SHIFT, hole_end >> PAGE_SHIFT); diff --git a/fs/inode.c b/fs/inode.c index 6a1626e0edaf..210054157a49 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -353,7 +353,7 @@ void address_space_init_once(struct address_space *mapping) init_rwsem(&mapping->i_mmap_rwsem); INIT_LIST_HEAD(&mapping->private_list); spin_lock_init(&mapping->private_lock); - mapping->i_mmap = RB_ROOT; + mapping->i_mmap = RB_ROOT_CACHED; } EXPORT_SYMBOL(address_space_init_once); diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index 49b292e98fec..8d10fc97801c 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -172,7 +172,7 @@ struct drm_mm { * according to the (increasing) start address of the memory node. */ struct drm_mm_node head_node; /* Keep an interval_tree for fast lookup of drm_mm_nodes by address. */ - struct rb_root interval_tree; + struct rb_root_cached interval_tree; struct rb_root holes_size; struct rb_root holes_addr; diff --git a/include/linux/fs.h b/include/linux/fs.h index 509434aaf5a4..6111976848ff 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -392,7 +392,7 @@ struct address_space { struct radix_tree_root page_tree; /* radix tree of all pages */ spinlock_t tree_lock; /* and lock protecting it */ atomic_t i_mmap_writable;/* count VM_SHARED mappings */ - struct rb_root i_mmap; /* tree of private and shared mappings */ + struct rb_root_cached i_mmap; /* tree of private and shared mappings */ struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */ /* Protected by tree_lock together with the radix tree */ unsigned long nrpages; /* number of total pages */ @@ -487,7 +487,7 @@ static inline void i_mmap_unlock_read(struct address_space *mapping) */ static inline int mapping_mapped(struct address_space *mapping) { - return !RB_EMPTY_ROOT(&mapping->i_mmap); + return !RB_EMPTY_ROOT(&mapping->i_mmap.rb_root); } /* diff --git a/include/linux/interval_tree.h b/include/linux/interval_tree.h index 724556aa3c95..202ee1283f4b 100644 --- a/include/linux/interval_tree.h +++ b/include/linux/interval_tree.h @@ -11,13 +11,15 @@ struct interval_tree_node { }; extern void -interval_tree_insert(struct interval_tree_node *node, struct rb_root *root); +interval_tree_insert(struct interval_tree_node *node, + struct rb_root_cached *root); extern void -interval_tree_remove(struct interval_tree_node *node, struct rb_root *root); +interval_tree_remove(struct interval_tree_node *node, + struct rb_root_cached *root); extern struct interval_tree_node * -interval_tree_iter_first(struct rb_root *root, +interval_tree_iter_first(struct rb_root_cached *root, unsigned long start, unsigned long last); extern struct interval_tree_node * diff --git a/include/linux/interval_tree_generic.h b/include/linux/interval_tree_generic.h index 58370e1862ad..f096423c8cbd 100644 --- a/include/linux/interval_tree_generic.h +++ b/include/linux/interval_tree_generic.h @@ -65,11 +65,13 @@ RB_DECLARE_CALLBACKS(static, ITPREFIX ## _augment, ITSTRUCT, ITRB, \ \ /* Insert / remove interval nodes from the tree */ \ \ -ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, struct rb_root *root) \ +ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, \ + struct rb_root_cached *root) \ { \ - struct rb_node **link = &root->rb_node, *rb_parent = NULL; \ + struct rb_node **link = &root->rb_root.rb_node, *rb_parent = NULL; \ ITTYPE start = ITSTART(node), last = ITLAST(node); \ ITSTRUCT *parent; \ + bool leftmost = true; \ \ while (*link) { \ rb_parent = *link; \ @@ -78,18 +80,22 @@ ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, struct rb_root *root) \ parent->ITSUBTREE = last; \ if (start < ITSTART(parent)) \ link = &parent->ITRB.rb_left; \ - else \ + else { \ link = &parent->ITRB.rb_right; \ + leftmost = false; \ + } \ } \ \ node->ITSUBTREE = last; \ rb_link_node(&node->ITRB, rb_parent, link); \ - rb_insert_augmented(&node->ITRB, root, &ITPREFIX ## _augment); \ + rb_insert_augmented_cached(&node->ITRB, root, \ + leftmost, &ITPREFIX ## _augment); \ } \ \ -ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, struct rb_root *root) \ +ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, \ + struct rb_root_cached *root) \ { \ - rb_erase_augmented(&node->ITRB, root, &ITPREFIX ## _augment); \ + rb_erase_augmented_cached(&node->ITRB, root, &ITPREFIX ## _augment); \ } \ \ /* \ @@ -140,15 +146,35 @@ ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ } \ \ ITSTATIC ITSTRUCT * \ -ITPREFIX ## _iter_first(struct rb_root *root, ITTYPE start, ITTYPE last) \ +ITPREFIX ## _iter_first(struct rb_root_cached *root, \ + ITTYPE start, ITTYPE last) \ { \ - ITSTRUCT *node; \ + ITSTRUCT *node, *leftmost; \ \ - if (!root->rb_node) \ + if (!root->rb_root.rb_node) \ return NULL; \ - node = rb_entry(root->rb_node, ITSTRUCT, ITRB); \ + \ + /* \ + * Fastpath range intersection/overlap between A: [a0, a1] and \ + * B: [b0, b1] is given by: \ + * \ + * a0 <= b1 && b0 <= a1 \ + * \ + * ... where A holds the lock range and B holds the smallest \ + * 'start' and largest 'last' in the tree. For the later, we \ + * rely on the root node, which by augmented interval tree \ + * property, holds the largest value in its last-in-subtree. \ + * This allows mitigating some of the tree walk overhead for \ + * for non-intersecting ranges, maintained and consulted in O(1). \ + */ \ + node = rb_entry(root->rb_root.rb_node, ITSTRUCT, ITRB); \ if (node->ITSUBTREE < start) \ return NULL; \ + \ + leftmost = rb_entry(root->rb_leftmost, ITSTRUCT, ITRB); \ + if (ITSTART(leftmost) > last) \ + return NULL; \ + \ return ITPREFIX ## _subtree_search(node, start, last); \ } \ \ diff --git a/include/linux/mm.h b/include/linux/mm.h index 5195e272fc4a..f8c10d336e42 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2034,13 +2034,13 @@ extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t); /* interval_tree.c */ void vma_interval_tree_insert(struct vm_area_struct *node, - struct rb_root *root); + struct rb_root_cached *root); void vma_interval_tree_insert_after(struct vm_area_struct *node, struct vm_area_struct *prev, - struct rb_root *root); + struct rb_root_cached *root); void vma_interval_tree_remove(struct vm_area_struct *node, - struct rb_root *root); -struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root *root, + struct rb_root_cached *root); +struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root_cached *root, unsigned long start, unsigned long last); struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node, unsigned long start, unsigned long last); @@ -2050,11 +2050,12 @@ struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node, vma; vma = vma_interval_tree_iter_next(vma, start, last)) void anon_vma_interval_tree_insert(struct anon_vma_chain *node, - struct rb_root *root); + struct rb_root_cached *root); void anon_vma_interval_tree_remove(struct anon_vma_chain *node, - struct rb_root *root); -struct anon_vma_chain *anon_vma_interval_tree_iter_first( - struct rb_root *root, unsigned long start, unsigned long last); + struct rb_root_cached *root); +struct anon_vma_chain * +anon_vma_interval_tree_iter_first(struct rb_root_cached *root, + unsigned long start, unsigned long last); struct anon_vma_chain *anon_vma_interval_tree_iter_next( struct anon_vma_chain *node, unsigned long start, unsigned long last); #ifdef CONFIG_DEBUG_VM_RB diff --git a/include/linux/rmap.h b/include/linux/rmap.h index f8ca2e74b819..733d3d8181e2 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -55,7 +55,9 @@ struct anon_vma { * is serialized by a system wide lock only visible to * mm_take_all_locks() (mm_all_locks_mutex). */ - struct rb_root rb_root; /* Interval tree of private "related" vmas */ + + /* Interval tree of private "related" vmas */ + struct rb_root_cached rb_root; }; /* diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index fb67554aabd6..5eb7f5bc8248 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -111,22 +111,25 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt, void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bound); -void rbt_ib_umem_insert(struct umem_odp_node *node, struct rb_root *root); -void rbt_ib_umem_remove(struct umem_odp_node *node, struct rb_root *root); +void rbt_ib_umem_insert(struct umem_odp_node *node, + struct rb_root_cached *root); +void rbt_ib_umem_remove(struct umem_odp_node *node, + struct rb_root_cached *root); typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end, void *cookie); /* * Call the callback on each ib_umem in the range. Returns the logical or of * the return values of the functions called. */ -int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 end, +int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, + u64 start, u64 end, umem_call_back cb, void *cookie); /* * Find first region intersecting with address range. * Return NULL if not found */ -struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root, +struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root, u64 addr, u64 length); static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e6df68048517..bdb1279a415b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1457,7 +1457,7 @@ struct ib_ucontext { struct pid *tgid; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - struct rb_root umem_tree; + struct rb_root_cached umem_tree; /* * Protects .umem_rbroot and tree, as well as odp_mrs_count and * mmu notifiers registration. diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c index df495fe81421..0e343fd29570 100644 --- a/lib/interval_tree_test.c +++ b/lib/interval_tree_test.c @@ -19,14 +19,14 @@ __param(bool, search_all, false, "Searches will iterate all nodes in the tree"); __param(uint, max_endpoint, ~0, "Largest value for the interval's endpoint"); -static struct rb_root root = RB_ROOT; +static struct rb_root_cached root = RB_ROOT_CACHED; static struct interval_tree_node *nodes = NULL; static u32 *queries = NULL; static struct rnd_state rnd; static inline unsigned long -search(struct rb_root *root, unsigned long start, unsigned long last) +search(struct rb_root_cached *root, unsigned long start, unsigned long last) { struct interval_tree_node *node; unsigned long results = 0; diff --git a/mm/interval_tree.c b/mm/interval_tree.c index f2c2492681bf..b47664358796 100644 --- a/mm/interval_tree.c +++ b/mm/interval_tree.c @@ -28,7 +28,7 @@ INTERVAL_TREE_DEFINE(struct vm_area_struct, shared.rb, /* Insert node immediately after prev in the interval tree */ void vma_interval_tree_insert_after(struct vm_area_struct *node, struct vm_area_struct *prev, - struct rb_root *root) + struct rb_root_cached *root) { struct rb_node **link; struct vm_area_struct *parent; @@ -55,7 +55,7 @@ void vma_interval_tree_insert_after(struct vm_area_struct *node, node->shared.rb_subtree_last = last; rb_link_node(&node->shared.rb, &parent->shared.rb, link); - rb_insert_augmented(&node->shared.rb, root, + rb_insert_augmented(&node->shared.rb, &root->rb_root, &vma_interval_tree_augment); } @@ -74,7 +74,7 @@ INTERVAL_TREE_DEFINE(struct anon_vma_chain, rb, unsigned long, rb_subtree_last, static inline, __anon_vma_interval_tree) void anon_vma_interval_tree_insert(struct anon_vma_chain *node, - struct rb_root *root) + struct rb_root_cached *root) { #ifdef CONFIG_DEBUG_VM_RB node->cached_vma_start = avc_start_pgoff(node); @@ -84,13 +84,13 @@ void anon_vma_interval_tree_insert(struct anon_vma_chain *node, } void anon_vma_interval_tree_remove(struct anon_vma_chain *node, - struct rb_root *root) + struct rb_root_cached *root) { __anon_vma_interval_tree_remove(node, root); } struct anon_vma_chain * -anon_vma_interval_tree_iter_first(struct rb_root *root, +anon_vma_interval_tree_iter_first(struct rb_root_cached *root, unsigned long first, unsigned long last) { return __anon_vma_interval_tree_iter_first(root, first, last); diff --git a/mm/memory.c b/mm/memory.c index 0bbc1d612a63..ec4e15494901 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2761,7 +2761,7 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma, zap_page_range_single(vma, start_addr, end_addr - start_addr, details); } -static inline void unmap_mapping_range_tree(struct rb_root *root, +static inline void unmap_mapping_range_tree(struct rb_root_cached *root, struct zap_details *details) { struct vm_area_struct *vma; @@ -2825,7 +2825,7 @@ void unmap_mapping_range(struct address_space *mapping, details.last_index = ULONG_MAX; i_mmap_lock_write(mapping); - if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap))) + if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) unmap_mapping_range_tree(&mapping->i_mmap, &details); i_mmap_unlock_write(mapping); } diff --git a/mm/mmap.c b/mm/mmap.c index 4c5981651407..680506faceae 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -685,7 +685,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *next = vma->vm_next, *orig_vma = vma; struct address_space *mapping = NULL; - struct rb_root *root = NULL; + struct rb_root_cached *root = NULL; struct anon_vma *anon_vma = NULL; struct file *file = vma->vm_file; bool start_changed = false, end_changed = false; @@ -3340,7 +3340,7 @@ static DEFINE_MUTEX(mm_all_locks_mutex); static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma) { - if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) { + if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) { /* * The LSB of head.next can't change from under us * because we hold the mm_all_locks_mutex. @@ -3356,7 +3356,7 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma) * anon_vma->root->rwsem. */ if (__test_and_set_bit(0, (unsigned long *) - &anon_vma->root->rb_root.rb_node)) + &anon_vma->root->rb_root.rb_root.rb_node)) BUG(); } } @@ -3458,7 +3458,7 @@ out_unlock: static void vm_unlock_anon_vma(struct anon_vma *anon_vma) { - if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) { + if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) { /* * The LSB of head.next can't change to 0 from under * us because we hold the mm_all_locks_mutex. @@ -3472,7 +3472,7 @@ static void vm_unlock_anon_vma(struct anon_vma *anon_vma) * anon_vma->root->rwsem. */ if (!__test_and_clear_bit(0, (unsigned long *) - &anon_vma->root->rb_root.rb_node)) + &anon_vma->root->rb_root.rb_root.rb_node)) BUG(); anon_vma_unlock_write(anon_vma); } diff --git a/mm/rmap.c b/mm/rmap.c index 0618cd85b862..b874c4761e84 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -391,7 +391,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma) * Leave empty anon_vmas on the list - we'll need * to free them outside the lock. */ - if (RB_EMPTY_ROOT(&anon_vma->rb_root)) { + if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) { anon_vma->parent->degree--; continue; } @@ -425,7 +425,7 @@ static void anon_vma_ctor(void *data) init_rwsem(&anon_vma->rwsem); atomic_set(&anon_vma->refcount, 0); - anon_vma->rb_root = RB_ROOT; + anon_vma->rb_root = RB_ROOT_CACHED; } void __init anon_vma_init(void) -- cgit v1.2.3 From ab615a5b879292e83653be60aa82113f7c6f462d Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Thu, 2 Nov 2017 15:59:41 -0700 Subject: fs/hugetlbfs/inode.c: fix hwpoison reserve accounting Calling madvise(MADV_HWPOISON) on a hugetlbfs page will result in bad (negative) reserved huge page counts. This may not happen immediately, but may happen later when the underlying file is removed or filesystem unmounted. For example: AnonHugePages: 0 kB ShmemHugePages: 0 kB HugePages_Total: 1 HugePages_Free: 0 HugePages_Rsvd: 18446744073709551615 HugePages_Surp: 0 Hugepagesize: 2048 kB In routine hugetlbfs_error_remove_page(), hugetlb_fix_reserve_counts is called after remove_huge_page. hugetlb_fix_reserve_counts is designed to only be called/used only if a failure is returned from hugetlb_unreserve_pages. Therefore, call hugetlb_unreserve_pages as required and only call hugetlb_fix_reserve_counts in the unlikely event that hugetlb_unreserve_pages returns an error. Link: http://lkml.kernel.org/r/20171019230007.17043-2-mike.kravetz@oracle.com Fixes: 78bb920344b8 ("mm: hwpoison: dissolve in-use hugepage in unrecoverable memory error") Signed-off-by: Mike Kravetz Acked-by: Naoya Horiguchi Cc: Michal Hocko Cc: Aneesh Kumar Cc: Anshuman Khandual Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/hugetlbfs/inode.c') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 59073e9f01a4..ed113ea17aff 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -842,9 +842,12 @@ static int hugetlbfs_error_remove_page(struct address_space *mapping, struct page *page) { struct inode *inode = mapping->host; + pgoff_t index = page->index; remove_huge_page(page); - hugetlb_fix_reserve_counts(inode); + if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1))) + hugetlb_fix_reserve_counts(inode); + return 0; } -- cgit v1.2.3 From 007ab7b49acb93af650f3acec0e51d2c6b843934 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 15 Nov 2017 17:33:56 -0800 Subject: fs/hugetlbfs/inode.c: remove redundant -ENIVAL return from hugetlbfs_setattr() There is no need to have a local return code set with -EINVAL when both the conditions following it return error codes appropriately. Just remove the redundant one. Link: http://lkml.kernel.org/r/20170929145444.17611-1-khandual@linux.vnet.ibm.com Signed-off-by: Anshuman Khandual Reviewed-by: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/hugetlbfs/inode.c') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index ed113ea17aff..6adb17bb1a38 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -668,7 +668,6 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) return error; if (ia_valid & ATTR_SIZE) { - error = -EINVAL; if (attr->ia_size & ~huge_page_mask(h)) return -EINVAL; error = hugetlb_vmtruncate(inode, attr->ia_size); -- cgit v1.2.3 From 8667982014d6048e0b5e286b6247ff24f48d4cc6 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 15 Nov 2017 17:37:52 -0800 Subject: mm, pagevec: remove cold parameter for pagevecs Every pagevec_init user claims the pages being released are hot even in cases where it is unlikely the pages are hot. As no one cares about the hotness of pages being released to the allocator, just ditch the parameter. No performance impact is expected as the overhead is marginal. The parameter is removed simply because it is a bit stupid to have a useless parameter copied everywhere. Link: http://lkml.kernel.org/r/20171018075952.10627-6-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Andi Kleen Cc: Dave Chinner Cc: Dave Hansen Cc: Jan Kara Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- fs/afs/write.c | 4 ++-- fs/btrfs/extent_io.c | 4 ++-- fs/buffer.c | 4 ++-- fs/cachefiles/rdwr.c | 4 ++-- fs/ceph/addr.c | 4 ++-- fs/dax.c | 2 +- fs/ext4/inode.c | 6 +++--- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/data.c | 2 +- fs/f2fs/node.c | 8 ++++---- fs/fscache/page.c | 2 +- fs/gfs2/aops.c | 2 +- fs/hugetlbfs/inode.c | 2 +- fs/nilfs2/btree.c | 2 +- fs/nilfs2/page.c | 8 ++++---- fs/nilfs2/segment.c | 4 ++-- include/linux/pagevec.h | 4 +--- mm/filemap.c | 2 +- mm/mlock.c | 4 ++-- mm/page-writeback.c | 2 +- mm/shmem.c | 6 +++--- mm/swap.c | 4 ++-- mm/truncate.c | 8 ++++---- 24 files changed, 45 insertions(+), 47 deletions(-) (limited to 'fs/hugetlbfs/inode.c') diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ad524cb0f6fc..7982ad817c11 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1859,7 +1859,7 @@ static void i915_address_space_init(struct i915_address_space *vm, INIT_LIST_HEAD(&vm->unbound_list); list_add_tail(&vm->global_link, &dev_priv->vm_list); - pagevec_init(&vm->free_pages, false); + pagevec_init(&vm->free_pages); } static void i915_address_space_fini(struct i915_address_space *vm) diff --git a/fs/afs/write.c b/fs/afs/write.c index d62a6b54152d..11dd0526b96b 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -308,7 +308,7 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error, _enter("{%x:%u},%lx-%lx", vnode->fid.vid, vnode->fid.vnode, first, last); - pagevec_init(&pv, 0); + pagevec_init(&pv); do { _debug("kill %lx-%lx", first, last); @@ -602,7 +602,7 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) ASSERT(wb != NULL); - pagevec_init(&pv, 0); + pagevec_init(&pv); do { _debug("done %lx-%lx", first, last); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 93fdaac81212..16045ea86fc1 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3797,7 +3797,7 @@ int btree_write_cache_pages(struct address_space *mapping, int scanned = 0; int tag; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); if (wbc->range_cyclic) { index = mapping->writeback_index; /* Start from prev offset */ end = -1; @@ -3936,7 +3936,7 @@ static int extent_write_cache_pages(struct address_space *mapping, if (!igrab(inode)) return 0; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); if (wbc->range_cyclic) { index = mapping->writeback_index; /* Start from prev offset */ end = -1; diff --git a/fs/buffer.c b/fs/buffer.c index 1c18a22a6013..0736a6a2e2f0 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1592,7 +1592,7 @@ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len) struct buffer_head *head; end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits); - pagevec_init(&pvec, 0); + pagevec_init(&pvec); while (pagevec_lookup_range(&pvec, bd_mapping, &index, end)) { count = pagevec_count(&pvec); for (i = 0; i < count; i++) { @@ -3514,7 +3514,7 @@ page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length, if (length <= 0) return -ENOENT; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); do { unsigned nr_pages, i; diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 18d7aa61ef0f..23097cca2674 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -710,7 +710,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, /* calculate the shift required to use bmap */ shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; - pagevec_init(&pagevec, 0); + pagevec_init(&pagevec); op->op.flags &= FSCACHE_OP_KEEP_FLAGS; op->op.flags |= FSCACHE_OP_ASYNC; @@ -844,7 +844,7 @@ int cachefiles_allocate_pages(struct fscache_retrieval *op, ret = cachefiles_has_space(cache, 0, *nr_pages); if (ret == 0) { - pagevec_init(&pagevec, 0); + pagevec_init(&pagevec); list_for_each_entry(page, pages, lru) { if (pagevec_add(&pagevec, page) == 0) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 26c682db94ee..dbf07051aacd 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -680,7 +680,7 @@ static void ceph_release_pages(struct page **pages, int num) struct pagevec pvec; int i; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); for (i = 0; i < num; i++) { if (pagevec_add(&pvec, pages[i]) == 0) pagevec_release(&pvec); @@ -811,7 +811,7 @@ static int ceph_writepages_start(struct address_space *mapping, if (fsc->mount_options->wsize < wsize) wsize = fsc->mount_options->wsize; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); start_index = wbc->range_cyclic ? mapping->writeback_index : 0; index = start_index; diff --git a/fs/dax.c b/fs/dax.c index 165fdfb6e508..3652b26a0048 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -794,7 +794,7 @@ int dax_writeback_mapping_range(struct address_space *mapping, tag_pages_for_writeback(mapping, start_index, end_index); - pagevec_init(&pvec, 0); + pagevec_init(&pvec); while (!done) { pvec.nr = find_get_entries_tag(mapping, start_index, PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE, diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b239a41dbeeb..8d2b582fb141 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1719,7 +1719,7 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd, ext4_es_remove_extent(inode, start, last - start + 1); } - pagevec_init(&pvec, 0); + pagevec_init(&pvec); while (index <= end) { nr_pages = pagevec_lookup_range(&pvec, mapping, &index, end); if (nr_pages == 0) @@ -2345,7 +2345,7 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) lblk = start << bpp_bits; pblock = mpd->map.m_pblk; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); while (start <= end) { nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping, &start, end); @@ -2616,7 +2616,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) else tag = PAGECACHE_TAG_DIRTY; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); mpd->map.m_len = 0; mpd->next_page = index; while (index <= end) { diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6124f8710dc3..0bb8e2c022d3 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -314,7 +314,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, }; struct blk_plug plug; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); blk_start_plug(&plug); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 687703755824..7b3ad5d8e2e9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1635,7 +1635,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping, int range_whole = 0; int tag; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); if (get_dirty_pages(mapping->host) <= SM_I(F2FS_M_SB(mapping))->min_hot_blocks) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d6e4df0bb622..b33dac9592ca 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1282,7 +1282,7 @@ static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) struct page *last_page = NULL; int nr_pages; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); index = 0; while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, @@ -1436,7 +1436,7 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, return PTR_ERR_OR_ZERO(last_page); } retry: - pagevec_init(&pvec, 0); + pagevec_init(&pvec); index = 0; while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, @@ -1547,7 +1547,7 @@ int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, int ret = 0; int nr_pages; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); next_step: index = 0; @@ -1648,7 +1648,7 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) int ret2, ret = 0; int nr_pages; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, PAGECACHE_TAG_WRITEBACK))) { diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 0ad3fd3ad0b4..961029e04027 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -1175,7 +1175,7 @@ void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, return; } - pagevec_init(&pvec, 0); + pagevec_init(&pvec); next = 0; do { if (!pagevec_lookup(&pvec, mapping, &next)) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 3fea3d7780b0..1daf15a1f00c 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -371,7 +371,7 @@ static int gfs2_write_cache_jdata(struct address_space *mapping, int range_whole = 0; int tag; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); if (wbc->range_cyclic) { writeback_index = mapping->writeback_index; /* prev offset */ index = writeback_index; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 6adb17bb1a38..1e76730aac0d 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -407,7 +407,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, memset(&pseudo_vma, 0, sizeof(struct vm_area_struct)); pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED); - pagevec_init(&pvec, 0); + pagevec_init(&pvec); next = start; while (next < end) { /* diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 35989c7bb065..16a7a67a11c9 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -2156,7 +2156,7 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, level++) INIT_LIST_HEAD(&lists[level]); - pagevec_init(&pvec, 0); + pagevec_init(&pvec); while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY)) { diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 1c16726915c1..68241512d7c1 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -255,7 +255,7 @@ int nilfs_copy_dirty_pages(struct address_space *dmap, pgoff_t index = 0; int err = 0; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); repeat: if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY)) return 0; @@ -309,7 +309,7 @@ void nilfs_copy_back_pages(struct address_space *dmap, pgoff_t index = 0; int err; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); repeat: n = pagevec_lookup(&pvec, smap, &index); if (!n) @@ -373,7 +373,7 @@ void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) unsigned int i; pgoff_t index = 0; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY)) { @@ -518,7 +518,7 @@ unsigned long nilfs_find_uncommitted_extent(struct inode *inode, index = start_blk >> (PAGE_SHIFT - inode->i_blkbits); nblocks_in_page = 1U << (PAGE_SHIFT - inode->i_blkbits); - pagevec_init(&pvec, 0); + pagevec_init(&pvec); repeat: pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE, diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 19366ab20bea..f65392fecb5c 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -708,7 +708,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, index = start >> PAGE_SHIFT; last = end >> PAGE_SHIFT; } - pagevec_init(&pvec, 0); + pagevec_init(&pvec); repeat: if (unlikely(index > last) || !pagevec_lookup_range_tag(&pvec, mapping, &index, last, @@ -753,7 +753,7 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, unsigned int i; pgoff_t index = 0; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY)) { diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 95c75c858d1f..eebefd209424 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -17,7 +17,6 @@ struct address_space; struct pagevec { unsigned long nr; - bool cold; bool drained; struct page *pages[PAGEVEC_SIZE]; }; @@ -51,10 +50,9 @@ static inline unsigned pagevec_lookup_tag(struct pagevec *pvec, return pagevec_lookup_range_tag(pvec, mapping, index, (pgoff_t)-1, tag); } -static inline void pagevec_init(struct pagevec *pvec, int cold) +static inline void pagevec_init(struct pagevec *pvec) { pvec->nr = 0; - pvec->cold = cold; pvec->drained = false; } diff --git a/mm/filemap.c b/mm/filemap.c index 155370fc87f2..90a9f261f85f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -519,7 +519,7 @@ static void __filemap_fdatawait_range(struct address_space *mapping, if (end_byte < start_byte) return; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); while (index <= end) { unsigned i; diff --git a/mm/mlock.c b/mm/mlock.c index 46af369c13e5..ed37cb208d19 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -289,7 +289,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) struct pagevec pvec_putback; int pgrescued = 0; - pagevec_init(&pvec_putback, 0); + pagevec_init(&pvec_putback); /* Phase 1: page isolation */ spin_lock_irq(zone_lru_lock(zone)); @@ -448,7 +448,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, struct pagevec pvec; struct zone *zone; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); /* * Although FOLL_DUMP is intended for get_dump_page(), * it just so happens that its special treatment of the diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 436714917e03..05313f402ba8 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2168,7 +2168,7 @@ int write_cache_pages(struct address_space *mapping, int range_whole = 0; int tag; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); if (wbc->range_cyclic) { writeback_index = mapping->writeback_index; /* prev offset */ index = writeback_index; diff --git a/mm/shmem.c b/mm/shmem.c index a72f68aee6a4..7ea8b276ba8b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -747,7 +747,7 @@ void shmem_unlock_mapping(struct address_space *mapping) pgoff_t indices[PAGEVEC_SIZE]; pgoff_t index = 0; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); /* * Minor point, but we might as well stop if someone else SHM_LOCKs it. */ @@ -790,7 +790,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, if (lend == -1) end = -1; /* unsigned, so actually very big */ - pagevec_init(&pvec, 0); + pagevec_init(&pvec); index = start; while (index < end) { pvec.nr = find_get_entries(mapping, index, @@ -2528,7 +2528,7 @@ static pgoff_t shmem_seek_hole_data(struct address_space *mapping, bool done = false; int i; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); pvec.nr = 1; /* start small: we may be there already */ while (!done) { pvec.nr = find_get_entries(mapping, index, diff --git a/mm/swap.c b/mm/swap.c index 3e564a95ee73..88a19b6cdf7c 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -210,7 +210,7 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, } if (pgdat) spin_unlock_irqrestore(&pgdat->lru_lock, flags); - release_pages(pvec->pages, pvec->nr, pvec->cold); + release_pages(pvec->pages, pvec->nr, 0); pagevec_reinit(pvec); } @@ -837,7 +837,7 @@ void __pagevec_release(struct pagevec *pvec) lru_add_drain(); pvec->drained = true; } - release_pages(pvec->pages, pagevec_count(pvec), pvec->cold); + release_pages(pvec->pages, pagevec_count(pvec), 0); pagevec_reinit(pvec); } EXPORT_SYMBOL(__pagevec_release); diff --git a/mm/truncate.c b/mm/truncate.c index c30e8fa3d063..e4b4cf0f4070 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -330,7 +330,7 @@ void truncate_inode_pages_range(struct address_space *mapping, else end = (lend + 1) >> PAGE_SHIFT; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); index = start; while (index < end && pagevec_lookup_entries(&pvec, mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE), @@ -342,7 +342,7 @@ void truncate_inode_pages_range(struct address_space *mapping, */ struct pagevec locked_pvec; - pagevec_init(&locked_pvec, 0); + pagevec_init(&locked_pvec); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; @@ -553,7 +553,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, unsigned long count = 0; int i; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, indices)) { @@ -683,7 +683,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, if (mapping->nrpages == 0 && mapping->nrexceptional == 0) goto out; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); index = start; while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, -- cgit v1.2.3