summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-11-06 14:08:17 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-11-06 14:08:17 -0700
commit512b7931ad0561ffe14265f9ff554a3c081b476b (patch)
treea94450d08468e094d2d92a495de4650faab09c1f /fs
parentfe91c4725aeed35023ba4f7a1e1adfebb6878c23 (diff)
parent658f9ae761b5965893727dd4edcdad56e5a439bb (diff)
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton: "257 patches. Subsystems affected by this patch series: scripts, ocfs2, vfs, and mm (slab-generic, slab, slub, kconfig, dax, kasan, debug, pagecache, gup, swap, memcg, pagemap, mprotect, mremap, iomap, tracing, vmalloc, pagealloc, memory-failure, hugetlb, userfaultfd, vmscan, tools, memblock, oom-kill, hugetlbfs, migration, thp, readahead, nommu, ksm, vmstat, madvise, memory-hotplug, rmap, zsmalloc, highmem, zram, cleanups, kfence, and damon)" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (257 commits) mm/damon: remove return value from before_terminate callback mm/damon: fix a few spelling mistakes in comments and a pr_debug message mm/damon: simplify stop mechanism Docs/admin-guide/mm/pagemap: wordsmith page flags descriptions Docs/admin-guide/mm/damon/start: simplify the content Docs/admin-guide/mm/damon/start: fix a wrong link Docs/admin-guide/mm/damon/start: fix wrong example commands mm/damon/dbgfs: add adaptive_targets list check before enable monitor_on mm/damon: remove unnecessary variable initialization Documentation/admin-guide/mm/damon: add a document for DAMON_RECLAIM mm/damon: introduce DAMON-based Reclamation (DAMON_RECLAIM) selftests/damon: support watermarks mm/damon/dbgfs: support watermarks mm/damon/schemes: activate schemes based on a watermarks mechanism tools/selftests/damon: update for regions prioritization of schemes mm/damon/dbgfs: support prioritization weights mm/damon/vaddr,paddr: support pageout prioritization mm/damon/schemes: prioritize regions within the quotas mm/damon/selftests: support schemes quotas mm/damon/dbgfs: support quotas of schemes ...
Diffstat (limited to 'fs')
-rw-r--r--fs/d_path.c8
-rw-r--r--fs/ocfs2/alloc.c21
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c1
-rw-r--r--fs/ocfs2/file.c8
-rw-r--r--fs/ocfs2/inode.c4
-rw-r--r--fs/ocfs2/journal.c26
-rw-r--r--fs/ocfs2/journal.h3
-rw-r--r--fs/ocfs2/super.c40
-rw-r--r--fs/open.c16
-rw-r--r--fs/posix_acl.c3
-rw-r--r--fs/proc/task_mmu.c28
-rw-r--r--fs/super.c3
12 files changed, 84 insertions, 77 deletions
diff --git a/fs/d_path.c b/fs/d_path.c
index cd60c7535181..e4e0ebad1f15 100644
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -77,9 +77,8 @@ static bool prepend(struct prepend_buffer *p, const char *str, int namelen)
/**
* prepend_name - prepend a pathname in front of current buffer pointer
- * @buffer: buffer pointer
- * @buflen: allocated length of the buffer
- * @name: name string and length qstr structure
+ * @p: prepend buffer which contains buffer pointer and allocated length
+ * @name: name string and length qstr structure
*
* With RCU path tracing, it may race with d_move(). Use READ_ONCE() to
* make sure that either the old or the new name pointer and length are
@@ -141,8 +140,7 @@ static int __prepend_path(const struct dentry *dentry, const struct mount *mnt,
* prepend_path - Prepend path string to a buffer
* @path: the dentry/vfsmount to report
* @root: root vfsmnt/dentry
- * @buffer: pointer to the end of the buffer
- * @buflen: pointer to buffer length
+ * @p: prepend buffer which contains buffer pointer and allocated length
*
* The function will first try to write out the pathname without taking any
* lock other than the RCU read lock to make sure that dentries won't go away.
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 5d9ae17bd443..bb247bc349e4 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5940,6 +5940,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
+ ocfs2_commit_trans(osb, handle);
mlog_errno(status);
goto bail;
}
@@ -5964,6 +5965,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
data_alloc_bh, start_blk,
num_clusters);
if (status < 0) {
+ ocfs2_commit_trans(osb, handle);
mlog_errno(status);
goto bail;
}
@@ -6921,13 +6923,12 @@ static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
}
/*
- * Zero the area past i_size but still within an allocated
- * cluster. This avoids exposing nonzero data on subsequent file
- * extends.
+ * Zero partial cluster for a hole punch or truncate. This avoids exposing
+ * nonzero data on subsequent file extends.
*
* We need to call this before i_size is updated on the inode because
* otherwise block_write_full_page() will skip writeout of pages past
- * i_size. The new_i_size parameter is passed for this reason.
+ * i_size.
*/
int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
u64 range_start, u64 range_end)
@@ -6945,6 +6946,15 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
if (!ocfs2_sparse_alloc(OCFS2_SB(sb)))
return 0;
+ /*
+ * Avoid zeroing pages fully beyond current i_size. It is pointless as
+ * underlying blocks of those pages should be already zeroed out and
+ * page writeback will skip them anyway.
+ */
+ range_end = min_t(u64, range_end, i_size_read(inode));
+ if (range_start >= range_end)
+ return 0;
+
pages = kcalloc(ocfs2_pages_per_cluster(sb),
sizeof(struct page *), GFP_NOFS);
if (pages == NULL) {
@@ -6953,9 +6963,6 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
goto out;
}
- if (range_start == range_end)
- goto out;
-
ret = ocfs2_extent_map_get_blocks(inode,
range_start >> sb->s_blocksize_bits,
&phys, NULL, &ext_flags);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 0e7aad1b11cc..5cd5f7511dac 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2698,7 +2698,6 @@ static int dlm_send_begin_reco_message(struct dlm_ctxt *dlm, u8 dead_node)
continue;
}
retry:
- ret = -EINVAL;
mlog(0, "attempting to send begin reco msg to %d\n",
nodenum);
ret = o2net_send_message(DLM_BEGIN_RECO_MSG, dlm->key,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 54d7843c0211..fc5f780fa235 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -476,10 +476,11 @@ int ocfs2_truncate_file(struct inode *inode,
* greater than page size, so we have to truncate them
* anyway.
*/
- unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
- truncate_inode_pages(inode->i_mapping, new_i_size);
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
+ unmap_mapping_range(inode->i_mapping,
+ new_i_size + PAGE_SIZE - 1, 0, 1);
+ truncate_inode_pages(inode->i_mapping, new_i_size);
status = ocfs2_truncate_inline(inode, di_bh, new_i_size,
i_size_read(inode), 1);
if (status)
@@ -498,6 +499,9 @@ int ocfs2_truncate_file(struct inode *inode,
goto bail_unlock_sem;
}
+ unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
+ truncate_inode_pages(inode->i_mapping, new_i_size);
+
status = ocfs2_commit_truncate(osb, inode, di_bh);
if (status < 0) {
mlog_errno(status);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index bc8f32fab964..6c2411c2afcf 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -125,7 +125,6 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
struct inode *inode = NULL;
struct super_block *sb = osb->sb;
struct ocfs2_find_inode_args args;
- journal_t *journal = OCFS2_SB(sb)->journal->j_journal;
trace_ocfs2_iget_begin((unsigned long long)blkno, flags,
sysfile_type);
@@ -172,10 +171,11 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
* part of the transaction - the inode could have been reclaimed and
* now it is reread from disk.
*/
- if (journal) {
+ if (osb->journal) {
transaction_t *transaction;
tid_t tid;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ journal_t *journal = osb->journal->j_journal;
read_lock(&journal->j_state_lock);
if (journal->j_running_transaction)
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 4f15750aac5d..b9c339335a53 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -810,19 +810,34 @@ void ocfs2_set_journal_params(struct ocfs2_super *osb)
write_unlock(&journal->j_state_lock);
}
-int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
+int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
{
int status = -1;
struct inode *inode = NULL; /* the journal inode */
journal_t *j_journal = NULL;
+ struct ocfs2_journal *journal = NULL;
struct ocfs2_dinode *di = NULL;
struct buffer_head *bh = NULL;
- struct ocfs2_super *osb;
int inode_lock = 0;
- BUG_ON(!journal);
+ /* initialize our journal structure */
+ journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL);
+ if (!journal) {
+ mlog(ML_ERROR, "unable to alloc journal\n");
+ status = -ENOMEM;
+ goto done;
+ }
+ osb->journal = journal;
+ journal->j_osb = osb;
- osb = journal->j_osb;
+ atomic_set(&journal->j_num_trans, 0);
+ init_rwsem(&journal->j_trans_barrier);
+ init_waitqueue_head(&journal->j_checkpointed);
+ spin_lock_init(&journal->j_lock);
+ journal->j_trans_id = 1UL;
+ INIT_LIST_HEAD(&journal->j_la_cleanups);
+ INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
+ journal->j_state = OCFS2_JOURNAL_FREE;
/* already have the inode for our journal */
inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
@@ -1028,9 +1043,10 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
journal->j_state = OCFS2_JOURNAL_FREE;
-// up_write(&journal->j_trans_barrier);
done:
iput(inode);
+ kfree(journal);
+ osb->journal = NULL;
}
static void ocfs2_clear_journal_error(struct super_block *sb,
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index d158acb8b38a..8dcb2f2cadbc 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -167,8 +167,7 @@ int ocfs2_compute_replay_slots(struct ocfs2_super *osb);
* ocfs2_start_checkpoint - Kick the commit thread to do a checkpoint.
*/
void ocfs2_set_journal_params(struct ocfs2_super *osb);
-int ocfs2_journal_init(struct ocfs2_journal *journal,
- int *dirty);
+int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty);
void ocfs2_journal_shutdown(struct ocfs2_super *osb);
int ocfs2_journal_wipe(struct ocfs2_journal *journal,
int full);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 5c914ce9b3ac..1286b88b6fa1 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1894,8 +1894,6 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
/* This will disable recovery and flush any recovery work. */
ocfs2_recovery_exit(osb);
- ocfs2_journal_shutdown(osb);
-
ocfs2_sync_blockdev(sb);
ocfs2_purge_refcount_trees(osb);
@@ -1918,6 +1916,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
ocfs2_release_system_inodes(osb);
+ ocfs2_journal_shutdown(osb);
+
/*
* If we're dismounting due to mount error, mount.ocfs2 will clean
* up heartbeat. If we're a local mount, there is no heartbeat.
@@ -2016,7 +2016,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
int i, cbits, bbits;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
struct inode *inode = NULL;
- struct ocfs2_journal *journal;
struct ocfs2_super *osb;
u64 total_blocks;
@@ -2197,33 +2196,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
get_random_bytes(&osb->s_next_generation, sizeof(u32));
- /* FIXME
- * This should be done in ocfs2_journal_init(), but unknown
- * ordering issues will cause the filesystem to crash.
- * If anyone wants to figure out what part of the code
- * refers to osb->journal before ocfs2_journal_init() is run,
- * be my guest.
- */
- /* initialize our journal structure */
-
- journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL);
- if (!journal) {
- mlog(ML_ERROR, "unable to alloc journal\n");
- status = -ENOMEM;
- goto bail;
- }
- osb->journal = journal;
- journal->j_osb = osb;
-
- atomic_set(&journal->j_num_trans, 0);
- init_rwsem(&journal->j_trans_barrier);
- init_waitqueue_head(&journal->j_checkpointed);
- spin_lock_init(&journal->j_lock);
- journal->j_trans_id = (unsigned long) 1;
- INIT_LIST_HEAD(&journal->j_la_cleanups);
- INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
- journal->j_state = OCFS2_JOURNAL_FREE;
-
INIT_WORK(&osb->dquot_drop_work, ocfs2_drop_dquot_refs);
init_llist_head(&osb->dquot_drop_list);
@@ -2404,7 +2376,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
* ourselves. */
/* Init our journal object. */
- status = ocfs2_journal_init(osb->journal, &dirty);
+ status = ocfs2_journal_init(osb, &dirty);
if (status < 0) {
mlog(ML_ERROR, "Could not initialize journal!\n");
goto finally;
@@ -2513,12 +2485,6 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
kfree(osb->osb_orphan_wipes);
kfree(osb->slot_recovery_generations);
- /* FIXME
- * This belongs in journal shutdown, but because we have to
- * allocate osb->journal at the start of ocfs2_initialize_osb(),
- * we free it here.
- */
- kfree(osb->journal);
kfree(osb->local_alloc_copy);
kfree(osb->uuid_str);
kfree(osb->vol_label);
diff --git a/fs/open.c b/fs/open.c
index a7f6cab81267..f732fb94600c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -856,8 +856,20 @@ static int do_dentry_open(struct file *f,
* of THPs into the page cache will fail.
*/
smp_mb();
- if (filemap_nr_thps(inode->i_mapping))
- truncate_pagecache(inode, 0);
+ if (filemap_nr_thps(inode->i_mapping)) {
+ struct address_space *mapping = inode->i_mapping;
+
+ filemap_invalidate_lock(inode->i_mapping);
+ /*
+ * unmap_mapping_range just need to be called once
+ * here, because the private pages is not need to be
+ * unmapped mapping (e.g. data segment of dynamic
+ * shared libraries here).
+ */
+ unmap_mapping_range(mapping, 0, 0, 0);
+ truncate_inode_pages(mapping, 0);
+ filemap_invalidate_unlock(inode->i_mapping);
+ }
}
return 0;
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index f5c25f580dd9..9323a854a60a 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -134,8 +134,7 @@ struct posix_acl *get_acl(struct inode *inode, int type)
* to just call ->get_acl to fetch the ACL ourself. (This is going to
* be an unlikely race.)
*/
- if (cmpxchg(p, ACL_NOT_CACHED, sentinel) != ACL_NOT_CACHED)
- /* fall through */ ;
+ cmpxchg(p, ACL_NOT_CACHED, sentinel);
/*
* Normally, the ACL returned by ->get_acl will be cached.
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index cf25be3e0321..ad667dbc96f5 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -397,7 +397,6 @@ struct mem_size_stats {
u64 pss_shmem;
u64 pss_locked;
u64 swap_pss;
- bool check_shmem_swap;
};
static void smaps_page_accumulate(struct mem_size_stats *mss,
@@ -478,9 +477,11 @@ static int smaps_pte_hole(unsigned long addr, unsigned long end,
__always_unused int depth, struct mm_walk *walk)
{
struct mem_size_stats *mss = walk->private;
+ struct vm_area_struct *vma = walk->vma;
- mss->swap += shmem_partial_swap_usage(
- walk->vma->vm_file->f_mapping, addr, end);
+ mss->swap += shmem_partial_swap_usage(walk->vma->vm_file->f_mapping,
+ linear_page_index(vma, addr),
+ linear_page_index(vma, end));
return 0;
}
@@ -488,6 +489,16 @@ static int smaps_pte_hole(unsigned long addr, unsigned long end,
#define smaps_pte_hole NULL
#endif /* CONFIG_SHMEM */
+static void smaps_pte_hole_lookup(unsigned long addr, struct mm_walk *walk)
+{
+#ifdef CONFIG_SHMEM
+ if (walk->ops->pte_hole) {
+ /* depth is not used */
+ smaps_pte_hole(addr, addr + PAGE_SIZE, 0, walk);
+ }
+#endif
+}
+
static void smaps_pte_entry(pte_t *pte, unsigned long addr,
struct mm_walk *walk)
{
@@ -516,12 +527,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
}
} else if (is_pfn_swap_entry(swpent))
page = pfn_swap_entry_to_page(swpent);
- } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap
- && pte_none(*pte))) {
- page = xa_load(&vma->vm_file->f_mapping->i_pages,
- linear_page_index(vma, addr));
- if (xa_is_value(page))
- mss->swap += PAGE_SIZE;
+ } else {
+ smaps_pte_hole_lookup(addr, walk);
return;
}
@@ -735,8 +742,6 @@ static void smap_gather_stats(struct vm_area_struct *vma,
return;
#ifdef CONFIG_SHMEM
- /* In case of smaps_rollup, reset the value from previous vma */
- mss->check_shmem_swap = false;
if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) {
/*
* For shared or readonly shmem mappings we know that all
@@ -754,7 +759,6 @@ static void smap_gather_stats(struct vm_area_struct *vma,
!(vma->vm_flags & VM_WRITE))) {
mss->swap += shmem_swapped;
} else {
- mss->check_shmem_swap = true;
ops = &smaps_shmem_walk_ops;
}
}
diff --git a/fs/super.c b/fs/super.c
index bcef3a6f4c4b..3bfc0f8fbd5b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -476,6 +476,8 @@ void generic_shutdown_super(struct super_block *sb)
spin_unlock(&sb_lock);
up_write(&sb->s_umount);
if (sb->s_bdi != &noop_backing_dev_info) {
+ if (sb->s_iflags & SB_I_PERSB_BDI)
+ bdi_unregister(sb->s_bdi);
bdi_put(sb->s_bdi);
sb->s_bdi = &noop_backing_dev_info;
}
@@ -1562,6 +1564,7 @@ int super_setup_bdi_name(struct super_block *sb, char *fmt, ...)
}
WARN_ON(sb->s_bdi != &noop_backing_dev_info);
sb->s_bdi = bdi;
+ sb->s_iflags |= SB_I_PERSB_BDI;
return 0;
}