diff options
Diffstat (limited to 'fs')
358 files changed, 3942 insertions, 2656 deletions
diff --git a/fs/9p/fid.h b/fs/9p/fid.h index 0c51889a60b3..29281b7c3887 100644 --- a/fs/9p/fid.h +++ b/fs/9p/fid.h @@ -46,8 +46,8 @@ static inline struct p9_fid *v9fs_fid_clone(struct dentry *dentry) * NOTE: these are set after open so only reflect 9p client not * underlying file system on server. */ -static inline void v9fs_fid_add_modes(struct p9_fid *fid, int s_flags, - int s_cache, unsigned int f_flags) +static inline void v9fs_fid_add_modes(struct p9_fid *fid, unsigned int s_flags, + unsigned int s_cache, unsigned int f_flags) { if (fid->qid.type != P9_QTFILE) return; @@ -57,7 +57,7 @@ static inline void v9fs_fid_add_modes(struct p9_fid *fid, int s_flags, (s_flags & V9FS_DIRECT_IO) || (f_flags & O_DIRECT)) { fid->mode |= P9L_DIRECT; /* no read or write cache */ } else if ((!(s_cache & CACHE_WRITEBACK)) || - (f_flags & O_DSYNC) | (s_flags & V9FS_SYNC)) { + (f_flags & O_DSYNC) || (s_flags & V9FS_SYNC)) { fid->mode |= P9L_NOWRITECACHE; } } diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index c7f774fe398f..d525957594b6 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -545,8 +545,6 @@ void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses) p9_client_begin_disconnect(v9ses->clnt); } -extern int v9fs_error_init(void); - static struct kobject *v9fs_kobj; #ifdef CONFIG_9P_FSCACHE diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 06a2514f0d88..698c43dd5dc8 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -108,7 +108,7 @@ enum p9_cache_bits { struct v9fs_session_info { /* options */ - unsigned char flags; + unsigned int flags; unsigned char nodev; unsigned short debug; unsigned int afid; diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 45b684b7d8d7..4102759a5cb5 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -208,7 +208,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) struct p9_fid *fid; __le32 version; loff_t i_size; - int retval = 0; + int retval = 0, put_err; fid = filp->private_data; p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n", @@ -221,7 +221,8 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) spin_lock(&inode->i_lock); hlist_del(&fid->ilist); spin_unlock(&inode->i_lock); - retval = p9_fid_put(fid); + put_err = p9_fid_put(fid); + retval = retval < 0 ? retval : put_err; } if ((filp->f_mode & FMODE_WRITE)) { diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 2996fb00387f..11cd8d23f6f2 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -505,10 +505,7 @@ v9fs_file_mmap(struct file *filp, struct vm_area_struct *vma) p9_debug(P9_DEBUG_MMAP, "filp :%p\n", filp); if (!(v9ses->cache & CACHE_WRITEBACK)) { - p9_debug(P9_DEBUG_CACHE, "(no mmap mode)"); - if (vma->vm_flags & VM_MAYSHARE) - return -ENODEV; - invalidate_inode_pages2(filp->f_mapping); + p9_debug(P9_DEBUG_CACHE, "(read-only mmap mode)"); return generic_file_readonly_mmap(filp, vma); } diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 36b466e35887..0d28ecf668d0 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -163,7 +163,6 @@ int v9fs_uflags2omode(int uflags, int extended) { int ret; - ret = 0; switch (uflags&3) { default: case O_RDONLY: @@ -261,7 +260,7 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses, inode_init_owner(&nop_mnt_idmap, inode, NULL, mode); inode->i_blocks = 0; inode->i_rdev = rdev; - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); inode->i_mapping->a_ops = &v9fs_addr_operations; inode->i_private = NULL; @@ -603,7 +602,6 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry); - err = 0; name = dentry->d_name.name; dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { @@ -815,8 +813,6 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, if (!(flags & O_CREAT) || d_really_is_positive(dentry)) return finish_no_open(file, res); - err = 0; - v9ses = v9fs_inode2v9ses(dir); perm = unixmode2p9mode(v9ses, mode); p9_omode = v9fs_uflags2omode(flags, v9fs_proto_dotu(v9ses)); @@ -912,7 +908,6 @@ v9fs_vfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, return -EINVAL; p9_debug(P9_DEBUG_VFS, "\n"); - retval = 0; old_inode = d_inode(old_dentry); new_inode = d_inode(new_dentry); v9ses = v9fs_inode2v9ses(old_inode); @@ -1016,7 +1011,7 @@ v9fs_vfs_getattr(struct mnt_idmap *idmap, const struct path *path, p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry); v9ses = v9fs_dentry2v9ses(dentry); if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) { - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); return 0; } else if (v9ses->cache & CACHE_WRITEBACK) { if (S_ISREG(inode->i_mode)) { @@ -1037,7 +1032,7 @@ v9fs_vfs_getattr(struct mnt_idmap *idmap, const struct path *path, return PTR_ERR(st); v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb, 0); - generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat); + generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat); p9stat_free(st); kfree(st); @@ -1066,7 +1061,6 @@ static int v9fs_vfs_setattr(struct mnt_idmap *idmap, if (retval) return retval; - retval = -EPERM; v9ses = v9fs_dentry2v9ses(dentry); if (iattr->ia_valid & ATTR_FILE) { fid = iattr->ia_file->private_data; @@ -1158,7 +1152,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, inode->i_atime.tv_sec = stat->atime; inode->i_mtime.tv_sec = stat->mtime; - inode->i_ctime.tv_sec = stat->mtime; + inode_set_ctime(inode, stat->mtime, 0); inode->i_uid = v9ses->dfltuid; inode->i_gid = v9ses->dfltgid; diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 5361cd2d7996..1312f68965ac 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -366,7 +366,6 @@ static int v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap, struct posix_acl *dacl = NULL, *pacl = NULL; p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry); - err = 0; v9ses = v9fs_inode2v9ses(dir); omode |= S_IFDIR; @@ -451,7 +450,7 @@ v9fs_vfs_getattr_dotl(struct mnt_idmap *idmap, p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry); v9ses = v9fs_dentry2v9ses(dentry); if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) { - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); return 0; } else if (v9ses->cache) { if (S_ISREG(inode->i_mode)) { @@ -476,7 +475,7 @@ v9fs_vfs_getattr_dotl(struct mnt_idmap *idmap, return PTR_ERR(st); v9fs_stat2inode_dotl(st, d_inode(dentry), 0); - generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat); + generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat); /* Change block size to what the server returned */ stat->blksize = st->st_blksize; @@ -646,8 +645,8 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode, inode->i_atime.tv_nsec = stat->st_atime_nsec; inode->i_mtime.tv_sec = stat->st_mtime_sec; inode->i_mtime.tv_nsec = stat->st_mtime_nsec; - inode->i_ctime.tv_sec = stat->st_ctime_sec; - inode->i_ctime.tv_nsec = stat->st_ctime_nsec; + inode_set_ctime(inode, stat->st_ctime_sec, + stat->st_ctime_nsec); inode->i_uid = stat->st_uid; inode->i_gid = stat->st_gid; set_nlink(inode, stat->st_nlink); @@ -669,8 +668,8 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode, inode->i_mtime.tv_nsec = stat->st_mtime_nsec; } if (stat->st_result_mask & P9_STATS_CTIME) { - inode->i_ctime.tv_sec = stat->st_ctime_sec; - inode->i_ctime.tv_nsec = stat->st_ctime_nsec; + inode_set_ctime(inode, stat->st_ctime_sec, + stat->st_ctime_nsec); } if (stat->st_result_mask & P9_STATS_UID) inode->i_uid = stat->st_uid; diff --git a/fs/Kconfig b/fs/Kconfig index 18d034ec7953..7da21f563192 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -205,8 +205,8 @@ config TMPFS_XATTR Extended attributes are name:value pairs associated with inodes by the kernel or by users (see the attr(5) manual page for details). - Currently this enables support for the trusted.* and - security.* namespaces. + This enables support for the trusted.*, security.* and user.* + namespaces. You need this for POSIX ACL support on tmpfs. @@ -233,6 +233,18 @@ config TMPFS_INODE64 If unsure, say N. +config TMPFS_QUOTA + bool "Tmpfs quota support" + depends on TMPFS + select QUOTA + help + Quota support allows to set per user and group limits for tmpfs + usage. Say Y to enable quota support. Once enabled you can control + user and group quota enforcement with quota, usrquota and grpquota + mount options. + + If unsure, say N. + config ARCH_SUPPORTS_HUGETLBFS def_bool n diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index c3ac613d0975..20963002578a 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -270,7 +270,7 @@ adfs_iget(struct super_block *sb, struct object_info *obj) inode->i_mode = adfs_atts2mode(sb, inode); adfs_adfs2unix_time(&inode->i_mtime, inode); inode->i_atime = inode->i_mtime; - inode->i_ctime = inode->i_mtime; + inode_set_ctime_to_ts(inode, inode->i_mtime); if (S_ISDIR(inode->i_mode)) { inode->i_op = &adfs_dir_inode_operations; @@ -331,7 +331,7 @@ adfs_notify_change(struct mnt_idmap *idmap, struct dentry *dentry, if (ia_valid & ATTR_ATIME) inode->i_atime = attr->ia_atime; if (ia_valid & ATTR_CTIME) - inode->i_ctime = attr->ia_ctime; + inode_set_ctime_to_ts(inode, attr->ia_ctime); if (ia_valid & ATTR_MODE) { ADFS_I(inode)->attr = adfs_mode2atts(sb, inode, attr->ia_mode); inode->i_mode = adfs_atts2mode(sb, inode); diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index 29f11e10a7c7..7ba93efc1143 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -60,7 +60,7 @@ affs_insert_hash(struct inode *dir, struct buffer_head *bh) mark_buffer_dirty_inode(dir_bh, dir); affs_brelse(dir_bh); - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); inode_inc_iversion(dir); mark_inode_dirty(dir); @@ -114,7 +114,7 @@ affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh) affs_brelse(bh); - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); inode_inc_iversion(dir); mark_inode_dirty(dir); @@ -315,7 +315,7 @@ affs_remove_header(struct dentry *dentry) else clear_nlink(inode); affs_unlock_link(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); mark_inode_dirty(inode); done: diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 27f77a52c5c8..060746c63151 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -149,13 +149,13 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino) break; } - inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec - = (be32_to_cpu(tail->change.days) * 86400LL + - be32_to_cpu(tail->change.mins) * 60 + - be32_to_cpu(tail->change.ticks) / 50 + - AFFS_EPOCH_DELTA) + - sys_tz.tz_minuteswest * 60; - inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_atime.tv_nsec = 0; + inode->i_mtime.tv_sec = inode->i_atime.tv_sec = + inode_set_ctime(inode, + (be32_to_cpu(tail->change.days) * 86400LL + + be32_to_cpu(tail->change.mins) * 60 + + be32_to_cpu(tail->change.ticks) / 50 + AFFS_EPOCH_DELTA) + + sys_tz.tz_minuteswest * 60, 0).tv_sec; + inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = 0; affs_brelse(bh); unlock_new_inode(inode); return inode; @@ -314,7 +314,7 @@ affs_new_inode(struct inode *dir) inode->i_gid = current_fsgid(); inode->i_ino = block; set_nlink(inode, 1); - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); atomic_set(&AFFS_I(inode)->i_opencnt, 0); AFFS_I(inode)->i_blkcnt = 0; AFFS_I(inode)->i_lc = NULL; diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index d7d9402ff718..95bcbd7654d1 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -88,7 +88,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root) set_nlink(inode, 2); inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; - inode->i_ctime = inode->i_atime = inode->i_mtime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); inode->i_blocks = 0; inode->i_generation = 0; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 866bab860a88..1c794a1896aa 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -90,7 +90,7 @@ static int afs_inode_init_from_status(struct afs_operation *op, vnode->status = *status; t = status->mtime_client; - inode->i_ctime = t; + inode_set_ctime_to_ts(inode, t); inode->i_mtime = t; inode->i_atime = t; inode->i_flags |= S_NOATIME; @@ -206,7 +206,7 @@ static void afs_apply_status(struct afs_operation *op, t = status->mtime_client; inode->i_mtime = t; if (vp->update_ctime) - inode->i_ctime = op->ctime; + inode_set_ctime_to_ts(inode, op->ctime); if (vnode->status.data_version != status->data_version) data_changed = true; @@ -252,7 +252,7 @@ static void afs_apply_status(struct afs_operation *op, vnode->netfs.remote_i_size = status->size; if (change_size) { afs_set_i_size(vnode, status->size); - inode->i_ctime = t; + inode_set_ctime_to_ts(inode, t); inode->i_atime = t; } } @@ -773,7 +773,7 @@ int afs_getattr(struct mnt_idmap *idmap, const struct path *path, do { read_seqbegin_or_lock(&vnode->cb_lock, &seq); - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); if (test_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags) && stat->nlink > 0) stat->nlink -= 1; @@ -1447,13 +1447,8 @@ static void aio_complete_rw(struct kiocb *kiocb, long res) if (kiocb->ki_flags & IOCB_WRITE) { struct inode *inode = file_inode(kiocb->ki_filp); - /* - * Tell lockdep we inherited freeze protection from submission - * thread. - */ if (S_ISREG(inode->i_mode)) - __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); - file_end_write(kiocb->ki_filp); + kiocb_end_write(kiocb); } iocb->ki_res.res = res; @@ -1581,17 +1576,8 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb, return ret; ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter)); if (!ret) { - /* - * Open-code file_start_write here to grab freeze protection, - * which will be released by another thread in - * aio_complete_rw(). Fool lockdep by telling it the lock got - * released so that it doesn't complain about the held lock when - * we return to userspace. - */ - if (S_ISREG(file_inode(file)->i_mode)) { - sb_start_write(file_inode(file)->i_sb); - __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE); - } + if (S_ISREG(file_inode(file)->i_mode)) + kiocb_start_write(req); req->ki_flags |= IOCB_WRITE; aio_rw_done(req, call_write_iter(file, req, &iter)); } diff --git a/fs/attr.c b/fs/attr.c index d60dc1edb526..a8ae5f6d9b16 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -312,7 +312,7 @@ void setattr_copy(struct mnt_idmap *idmap, struct inode *inode, if (ia_valid & ATTR_MTIME) inode->i_mtime = attr->ia_mtime; if (ia_valid & ATTR_CTIME) - inode->i_ctime = attr->ia_ctime; + inode_set_ctime_to_ts(inode, attr->ia_ctime); if (ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; if (!in_group_or_capable(idmap, inode, @@ -394,9 +394,25 @@ int notify_change(struct mnt_idmap *idmap, struct dentry *dentry, return error; if ((ia_valid & ATTR_MODE)) { - umode_t amode = attr->ia_mode; + /* + * Don't allow changing the mode of symlinks: + * + * (1) The vfs doesn't take the mode of symlinks into account + * during permission checking. + * (2) This has never worked correctly. Most major filesystems + * did return EOPNOTSUPP due to interactions with POSIX ACLs + * but did still updated the mode of the symlink. + * This inconsistency led system call wrapper providers such + * as libc to block changing the mode of symlinks with + * EOPNOTSUPP already. + * (3) To even do this in the first place one would have to use + * specific file descriptors and quite some effort. + */ + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + /* Flag setting protected by i_mutex */ - if (is_sxid(amode)) + if (is_sxid(attr->ia_mode)) inode->i_flags &= ~S_NOSEC; } diff --git a/fs/autofs/Kconfig b/fs/autofs/Kconfig index 3b3a6b1423c6..54c12d9484cb 100644 --- a/fs/autofs/Kconfig +++ b/fs/autofs/Kconfig @@ -1,18 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only -config AUTOFS4_FS - tristate "Old Kconfig name for Kernel automounter support" - select AUTOFS_FS - help - This name exists for people to just automatically pick up the - new name of the autofs Kconfig option. All it does is select - the new option name. - - It will go away in a release or two as people have - transitioned to just plain AUTOFS_FS. - config AUTOFS_FS tristate "Kernel automounter support (supports v3, v4 and v5)" - default n help The automounter is a tool to automatically mount remote file systems on demand. This implementation is partially kernel-based to reduce diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index affa70360b1f..2b49662ed237 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -370,7 +370,7 @@ struct inode *autofs_get_inode(struct super_block *sb, umode_t mode) inode->i_uid = d_inode(sb->s_root)->i_uid; inode->i_gid = d_inode(sb->s_root)->i_gid; } - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); inode->i_ino = get_next_ino(); if (S_ISDIR(mode)) { diff --git a/fs/autofs/root.c b/fs/autofs/root.c index 93046c9dc461..512b9a26c63d 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -600,7 +600,7 @@ static int autofs_dir_symlink(struct mnt_idmap *idmap, p_ino = autofs_dentry_ino(dentry->d_parent); p_ino->count++; - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); return 0; } @@ -633,7 +633,7 @@ static int autofs_dir_unlink(struct inode *dir, struct dentry *dentry) d_inode(dentry)->i_size = 0; clear_nlink(d_inode(dentry)); - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); spin_lock(&sbi->lookup_lock); __autofs_add_expiring(dentry); @@ -749,7 +749,7 @@ static int autofs_dir_mkdir(struct mnt_idmap *idmap, p_ino = autofs_dentry_ino(dentry->d_parent); p_ino->count++; inc_nlink(dir); - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); return 0; } diff --git a/fs/bad_inode.c b/fs/bad_inode.c index db649487d58c..83f9566c973b 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -133,8 +133,7 @@ static int bad_inode_fiemap(struct inode *inode, return -EIO; } -static int bad_inode_update_time(struct inode *inode, struct timespec64 *time, - int flags) +static int bad_inode_update_time(struct inode *inode, int flags) { return -EIO; } @@ -209,8 +208,7 @@ void make_bad_inode(struct inode *inode) remove_inode_hash(inode); inode->i_mode = S_IFREG; - inode->i_atime = inode->i_mtime = inode->i_ctime = - current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); inode->i_op = &bad_inode_ops; inode->i_opflags &= ~IOP_XATTR; inode->i_fop = &bad_file_ops; diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index eee9237386e2..9a16a51fbb88 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -363,7 +363,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino) inode->i_mtime.tv_sec = fs64_to_cpu(sb, raw_inode->last_modified_time) >> 16; inode->i_mtime.tv_nsec = 0; /* lower 16 bits are not a time */ - inode->i_ctime = inode->i_mtime; + inode_set_ctime_to_ts(inode, inode->i_mtime); inode->i_atime = inode->i_mtime; befs_ino->i_inode_num = fsrun_to_cpu(sb, raw_inode->inode_num); diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 040d5140e426..12b8af04dcb3 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -97,7 +97,7 @@ static int bfs_create(struct mnt_idmap *idmap, struct inode *dir, set_bit(ino, info->si_imap); info->si_freei--; inode_init_owner(&nop_mnt_idmap, inode, dir, mode); - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); inode->i_blocks = 0; inode->i_op = &bfs_file_inops; inode->i_fop = &bfs_file_operations; @@ -158,7 +158,7 @@ static int bfs_link(struct dentry *old, struct inode *dir, return err; } inc_nlink(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); mark_inode_dirty(inode); ihold(inode); d_instantiate(new, inode); @@ -187,9 +187,9 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry) } de->ino = 0; mark_buffer_dirty_inode(bh, dir); - dir->i_ctime = dir->i_mtime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); mark_inode_dirty(dir); - inode->i_ctime = dir->i_ctime; + inode_set_ctime_to_ts(inode, inode_get_ctime(dir)); inode_dec_link_count(inode); error = 0; @@ -240,10 +240,10 @@ static int bfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, goto end_rename; } old_de->ino = 0; - old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir); + old_dir->i_mtime = inode_set_ctime_current(old_dir); mark_inode_dirty(old_dir); if (new_inode) { - new_inode->i_ctime = current_time(new_inode); + inode_set_ctime_current(new_inode); inode_dec_link_count(new_inode); } mark_buffer_dirty_inode(old_bh, old_dir); @@ -292,9 +292,9 @@ static int bfs_add_entry(struct inode *dir, const struct qstr *child, int ino) pos = (block - sblock) * BFS_BSIZE + off; if (pos >= dir->i_size) { dir->i_size += BFS_DIRENT_SIZE; - dir->i_ctime = current_time(dir); + inode_set_ctime_current(dir); } - dir->i_mtime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); mark_inode_dirty(dir); de->ino = cpu_to_le16((u16)ino); for (i = 0; i < BFS_NAMELEN; i++) diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 1926bec2c850..e6a76ae9eb44 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -82,10 +82,9 @@ struct inode *bfs_iget(struct super_block *sb, unsigned long ino) inode->i_blocks = BFS_FILEBLOCKS(di); inode->i_atime.tv_sec = le32_to_cpu(di->i_atime); inode->i_mtime.tv_sec = le32_to_cpu(di->i_mtime); - inode->i_ctime.tv_sec = le32_to_cpu(di->i_ctime); + inode_set_ctime(inode, le32_to_cpu(di->i_ctime), 0); inode->i_atime.tv_nsec = 0; inode->i_mtime.tv_nsec = 0; - inode->i_ctime.tv_nsec = 0; brelse(bh); unlock_new_inode(inode); @@ -143,7 +142,7 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc) di->i_nlink = cpu_to_le32(inode->i_nlink); di->i_atime = cpu_to_le32(inode->i_atime.tv_sec); di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); - di->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); + di->i_ctime = cpu_to_le32(inode_get_ctime(inode).tv_sec); i_sblock = BFS_I(inode)->i_sblock; di->i_sblock = cpu_to_le32(i_sblock); di->i_eblock = cpu_to_le32(BFS_I(inode)->i_eblock); diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index bb202ad369d5..e0108d17b085 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -547,8 +547,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode) if (inode) { inode->i_ino = get_next_ino(); inode->i_mode = mode; - inode->i_atime = inode->i_mtime = inode->i_ctime = - current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); } return inode; } diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 48ae509f2ac2..82324c327a50 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -441,13 +441,23 @@ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache, u64 num_bytes) { struct btrfs_caching_control *caching_ctl; + int progress; caching_ctl = btrfs_get_caching_control(cache); if (!caching_ctl) return; + /* + * We've already failed to allocate from this block group, so even if + * there's enough space in the block group it isn't contiguous enough to + * allow for an allocation, so wait for at least the next wakeup tick, + * or for the thing to be done. + */ + progress = atomic_read(&caching_ctl->progress); + wait_event(caching_ctl->wait, btrfs_block_group_done(cache) || - (cache->free_space_ctl->free_space >= num_bytes)); + (progress != atomic_read(&caching_ctl->progress) && + (cache->free_space_ctl->free_space >= num_bytes))); btrfs_put_caching_control(caching_ctl); } @@ -499,12 +509,16 @@ static void fragment_free_space(struct btrfs_block_group *block_group) * used yet since their free space will be released as soon as the transaction * commits. */ -u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end) +int add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end, + u64 *total_added_ret) { struct btrfs_fs_info *info = block_group->fs_info; - u64 extent_start, extent_end, size, total_added = 0; + u64 extent_start, extent_end, size; int ret; + if (total_added_ret) + *total_added_ret = 0; + while (start < end) { ret = find_first_extent_bit(&info->excluded_extents, start, &extent_start, &extent_end, @@ -517,10 +531,12 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end start = extent_end + 1; } else if (extent_start > start && extent_start < end) { size = extent_start - start; - total_added += size; ret = btrfs_add_free_space_async_trimmed(block_group, start, size); - BUG_ON(ret); /* -ENOMEM or logic error */ + if (ret) + return ret; + if (total_added_ret) + *total_added_ret += size; start = extent_end + 1; } else { break; @@ -529,13 +545,15 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end if (start < end) { size = end - start; - total_added += size; ret = btrfs_add_free_space_async_trimmed(block_group, start, size); - BUG_ON(ret); /* -ENOMEM or logic error */ + if (ret) + return ret; + if (total_added_ret) + *total_added_ret += size; } - return total_added; + return 0; } /* @@ -779,8 +797,13 @@ next: if (key.type == BTRFS_EXTENT_ITEM_KEY || key.type == BTRFS_METADATA_ITEM_KEY) { - total_found += add_new_free_space(block_group, last, - key.objectid); + u64 space_added; + + ret = add_new_free_space(block_group, last, key.objectid, + &space_added); + if (ret) + goto out; + total_found += space_added; if (key.type == BTRFS_METADATA_ITEM_KEY) last = key.objectid + fs_info->nodesize; @@ -789,17 +812,18 @@ next: if (total_found > CACHING_CTL_WAKE_UP) { total_found = 0; - if (wakeup) + if (wakeup) { + atomic_inc(&caching_ctl->progress); wake_up(&caching_ctl->wait); + } } } path->slots[0]++; } - ret = 0; - - total_found += add_new_free_space(block_group, last, - block_group->start + block_group->length); + ret = add_new_free_space(block_group, last, + block_group->start + block_group->length, + NULL); out: btrfs_free_path(path); return ret; @@ -898,6 +922,7 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait) init_waitqueue_head(&caching_ctl->wait); caching_ctl->block_group = cache; refcount_set(&caching_ctl->count, 2); + atomic_set(&caching_ctl->progress, 0); btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); spin_lock(&cache->lock); @@ -1640,13 +1665,14 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg) { struct btrfs_fs_info *fs_info = bg->fs_info; - trace_btrfs_add_unused_block_group(bg); spin_lock(&fs_info->unused_bgs_lock); if (list_empty(&bg->bg_list)) { btrfs_get_block_group(bg); + trace_btrfs_add_unused_block_group(bg); list_add_tail(&bg->bg_list, &fs_info->unused_bgs); - } else { + } else if (!test_bit(BLOCK_GROUP_FLAG_NEW, &bg->runtime_flags)) { /* Pull out the block group from the reclaim_bgs list. */ + trace_btrfs_add_unused_block_group(bg); list_move_tail(&bg->bg_list, &fs_info->unused_bgs); } spin_unlock(&fs_info->unused_bgs_lock); @@ -2087,6 +2113,7 @@ static int exclude_super_stripes(struct btrfs_block_group *cache) /* Shouldn't have super stripes in sequential zones */ if (zoned && nr) { + kfree(logical); btrfs_err(fs_info, "zoned: block group %llu must not contain super block", cache->start); @@ -2292,9 +2319,11 @@ static int read_one_block_group(struct btrfs_fs_info *info, btrfs_free_excluded_extents(cache); } else if (cache->used == 0) { cache->cached = BTRFS_CACHE_FINISHED; - add_new_free_space(cache, cache->start, - cache->start + cache->length); + ret = add_new_free_space(cache, cache->start, + cache->start + cache->length, NULL); btrfs_free_excluded_extents(cache); + if (ret) + goto error; } ret = btrfs_add_block_group_cache(info, cache); @@ -2668,6 +2697,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans) next: btrfs_delayed_refs_rsv_release(fs_info, 1); list_del_init(&block_group->bg_list); + clear_bit(BLOCK_GROUP_FLAG_NEW, &block_group->runtime_flags); } btrfs_trans_release_chunk_metadata(trans); } @@ -2707,6 +2737,13 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran if (!cache) return ERR_PTR(-ENOMEM); + /* + * Mark it as new before adding it to the rbtree of block groups or any + * list, so that no other task finds it and calls btrfs_mark_bg_unused() + * before the new flag is set. + */ + set_bit(BLOCK_GROUP_FLAG_NEW, &cache->runtime_flags); + cache->length = size; set_free_space_tree_thresholds(cache); cache->flags = type; @@ -2730,9 +2767,12 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran return ERR_PTR(ret); } - add_new_free_space(cache, chunk_offset, chunk_offset + size); - + ret = add_new_free_space(cache, chunk_offset, chunk_offset + size, NULL); btrfs_free_excluded_extents(cache); + if (ret) { + btrfs_put_block_group(cache); + return ERR_PTR(ret); + } /* * Ensure the corresponding space_info object is created and diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index f204addc3fe8..74b61e663028 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -70,6 +70,11 @@ enum btrfs_block_group_flags { BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, /* Indicate that the block group is placed on a sequential zone */ BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, + /* + * Indicate that block group is in the list of new block groups of a + * transaction. + */ + BLOCK_GROUP_FLAG_NEW, }; enum btrfs_caching_type { @@ -85,6 +90,8 @@ struct btrfs_caching_control { wait_queue_head_t wait; struct btrfs_work work; struct btrfs_block_group *block_group; + /* Track progress of caching during allocation. */ + atomic_t progress; refcount_t count; }; @@ -284,8 +291,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait); void btrfs_put_caching_control(struct btrfs_caching_control *ctl); struct btrfs_caching_control *btrfs_get_caching_control( struct btrfs_block_group *cache); -u64 add_new_free_space(struct btrfs_block_group *block_group, - u64 start, u64 end); +int add_new_free_space(struct btrfs_block_group *block_group, + u64 start, u64 end, u64 *total_added_ret); struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( struct btrfs_fs_info *fs_info, const u64 chunk_offset); diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c index 6279d200cf83..77684c5e0c8b 100644 --- a/fs/btrfs/block-rsv.c +++ b/fs/btrfs/block-rsv.c @@ -349,6 +349,11 @@ void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info) } read_unlock(&fs_info->global_root_lock); + if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) { + num_bytes += btrfs_root_used(&fs_info->block_group_root->root_item); + min_items++; + } + /* * But we also want to reserve enough space so we can do the fallback * global reserve for an unlink, which is an additional diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f2d2b313bde5..9419f4e37a58 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -443,6 +443,7 @@ struct btrfs_drop_extents_args { struct btrfs_file_private { void *filldir_buf; + u64 last_index; struct extent_state *llseek_cached_state; }; diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 6b457b010cbc..d37c066f2df7 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1632,6 +1632,7 @@ int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode) } bool btrfs_readdir_get_delayed_items(struct inode *inode, + u64 last_index, struct list_head *ins_list, struct list_head *del_list) { @@ -1651,14 +1652,14 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode, mutex_lock(&delayed_node->mutex); item = __btrfs_first_delayed_insertion_item(delayed_node); - while (item) { + while (item && item->index <= last_index) { refcount_inc(&item->refs); list_add_tail(&item->readdir_list, ins_list); item = __btrfs_next_delayed_item(item); } item = __btrfs_first_delayed_deletion_item(delayed_node); - while (item) { + while (item && item->index <= last_index) { refcount_inc(&item->refs); list_add_tail(&item->readdir_list, del_list); item = __btrfs_next_delayed_item(item); @@ -1808,9 +1809,9 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans, inode->i_mtime.tv_nsec); btrfs_set_stack_timespec_sec(&inode_item->ctime, - inode->i_ctime.tv_sec); + inode_get_ctime(inode).tv_sec); btrfs_set_stack_timespec_nsec(&inode_item->ctime, - inode->i_ctime.tv_nsec); + inode_get_ctime(inode).tv_nsec); btrfs_set_stack_timespec_sec(&inode_item->otime, BTRFS_I(inode)->i_otime.tv_sec); @@ -1861,8 +1862,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev) inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime); inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime); - inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime); - inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime); + inode_set_ctime(inode, btrfs_stack_timespec_sec(&inode_item->ctime), + btrfs_stack_timespec_nsec(&inode_item->ctime)); BTRFS_I(inode)->i_otime.tv_sec = btrfs_stack_timespec_sec(&inode_item->otime); diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 4f21daa3dbc7..dc1085b2a397 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -148,6 +148,7 @@ void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info); /* Used for readdir() */ bool btrfs_readdir_get_delayed_items(struct inode *inode, + u64 last_index, struct list_head *ins_list, struct list_head *del_list); void btrfs_readdir_put_delayed_items(struct inode *inode, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7513388b0567..a9a2c5446c18 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1103,7 +1103,8 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev) btrfs_drew_lock_init(&root->snapshot_lock); if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID && - !btrfs_is_data_reloc_root(root)) { + !btrfs_is_data_reloc_root(root) && + is_fstree(root->root_key.objectid)) { set_bit(BTRFS_ROOT_SHAREABLE, &root->state); btrfs_check_and_init_root_item(&root->root_item); } @@ -1300,6 +1301,16 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, root = btrfs_get_global_root(fs_info, objectid); if (root) return root; + + /* + * If we're called for non-subvolume trees, and above function didn't + * find one, do not try to read it from disk. + * + * This is namely for free-space-tree and quota tree, which can change + * at runtime and should only be grabbed from fs_info. + */ + if (!is_fstree(objectid) && objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) + return ERR_PTR(-ENOENT); again: root = btrfs_lookup_fs_root(fs_info, objectid); if (root) { @@ -3438,11 +3449,16 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device * For devices supporting discard turn on discard=async automatically, * unless it's already set or disabled. This could be turned off by * nodiscard for the same mount. + * + * The zoned mode piggy backs on the discard functionality for + * resetting a zone. There is no reason to delay the zone reset as it is + * fast enough. So, do not enable async discard for zoned mode. */ if (!(btrfs_test_opt(fs_info, DISCARD_SYNC) || btrfs_test_opt(fs_info, DISCARD_ASYNC) || btrfs_test_opt(fs_info, NODISCARD)) && - fs_info->fs_devices->discardable) { + fs_info->fs_devices->discardable && + !btrfs_is_zoned(fs_info)) { btrfs_set_and_info(fs_info, DISCARD_ASYNC, "auto enabling async discard"); } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 911908ea5f6f..f396a9afa403 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4310,8 +4310,11 @@ have_block_group: ret = 0; } - if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) + if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) { + if (!cache_block_group_error) + cache_block_group_error = -EIO; goto loop; + } if (!find_free_extent_check_size_class(ffe_ctl, block_group)) goto loop; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a91d5ad27984..90ad3006ef3a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -902,7 +902,30 @@ static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl, size -= len; pg_offset += len; disk_bytenr += len; - bio_ctrl->len_to_oe_boundary -= len; + + /* + * len_to_oe_boundary defaults to U32_MAX, which isn't page or + * sector aligned. alloc_new_bio() then sets it to the end of + * our ordered extent for writes into zoned devices. + * + * When len_to_oe_boundary is tracking an ordered extent, we + * trust the ordered extent code to align things properly, and + * the check above to cap our write to the ordered extent + * boundary is correct. + * + * When len_to_oe_boundary is U32_MAX, the cap above would + * result in a 4095 byte IO for the last page right before + * we hit the bio limit of UINT_MAX. bio_add_page() has all + * the checks required to make sure we don't overflow the bio, + * and we should just ignore len_to_oe_boundary completely + * unless we're using it to track an ordered extent. + * + * It's pretty hard to make a bio sized U32_MAX, but it can + * happen when the page cache is able to feed us contiguous + * pages for large extents. + */ + if (bio_ctrl->len_to_oe_boundary != U32_MAX) + bio_ctrl->len_to_oe_boundary -= len; /* Ordered extent boundary: move on to a new bio. */ if (bio_ctrl->len_to_oe_boundary == 0) @@ -2145,6 +2168,12 @@ retry: continue; } + if (!folio_test_dirty(folio)) { + /* Someone wrote it for us. */ + folio_unlock(folio); + continue; + } + if (wbc->sync_mode != WB_SYNC_NONE) { if (folio_test_writeback(folio)) submit_write_bio(bio_ctrl, 0); @@ -2164,11 +2193,12 @@ retry: } /* - * the filesystem may choose to bump up nr_to_write. + * The filesystem may choose to bump up nr_to_write. * We have to make sure to honor the new nr_to_write - * at any time + * at any time. */ - nr_to_write_done = wbc->nr_to_write <= 0; + nr_to_write_done = (wbc->sync_mode == WB_SYNC_NONE && + wbc->nr_to_write <= 0); } folio_batch_release(&fbatch); cond_resched(); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 0cdb3e86f29b..a6d8368ed0ed 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -760,8 +760,6 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end, if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { start = em_end; - if (end != (u64)-1) - len = start + len - em_end; goto next; } @@ -829,8 +827,8 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end, if (!split) goto remove_em; } - split->start = start + len; - split->len = em_end - (start + len); + split->start = end; + split->len = em_end - end; split->block_start = em->block_start; split->flags = flags; split->compress_type = em->compress_type; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index fd03e689a6be..b9e75c9f95ac 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1106,24 +1106,6 @@ void btrfs_check_nocow_unlock(struct btrfs_inode *inode) btrfs_drew_write_unlock(&inode->root->snapshot_lock); } -static void update_time_for_write(struct inode *inode) -{ - struct timespec64 now; - - if (IS_NOCMTIME(inode)) - return; - - now = current_time(inode); - if (!timespec64_equal(&inode->i_mtime, &now)) - inode->i_mtime = now; - - if (!timespec64_equal(&inode->i_ctime, &now)) - inode->i_ctime = now; - - if (IS_I_VERSION(inode)) - inode_inc_iversion(inode); -} - static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, size_t count) { @@ -1155,7 +1137,10 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, * need to start yet another transaction to update the inode as we will * update the inode when we finish writing whatever data we write. */ - update_time_for_write(inode); + if (!IS_NOCMTIME(inode)) { + inode->i_mtime = inode_set_ctime_current(inode); + inode_inc_iversion(inode); + } start_pos = round_down(pos, fs_info->sectorsize); oldsize = i_size_read(inode); @@ -2459,10 +2444,8 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode, */ inode_inc_iversion(&inode->vfs_inode); - if (!extent_info || extent_info->update_times) { - inode->vfs_inode.i_mtime = current_time(&inode->vfs_inode); - inode->vfs_inode.i_ctime = inode->vfs_inode.i_mtime; - } + if (!extent_info || extent_info->update_times) + inode->vfs_inode.i_mtime = inode_set_ctime_current(&inode->vfs_inode); ret = btrfs_update_inode(trans, root, inode); if (ret) @@ -2703,8 +2686,7 @@ static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len) ASSERT(trans != NULL); inode_inc_iversion(inode); - inode->i_mtime = current_time(inode); - inode->i_ctime = inode->i_mtime; + inode->i_mtime = inode_set_ctime_current(inode); ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); updated_inode = true; btrfs_end_transaction(trans); @@ -2721,11 +2703,10 @@ out_only_mutex: * for detecting, at fsync time, if the inode isn't yet in the * log tree or it's there but not up to date. */ - struct timespec64 now = current_time(inode); + struct timespec64 now = inode_set_ctime_current(inode); inode_inc_iversion(inode); inode->i_mtime = now; - inode->i_ctime = now; trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); @@ -2796,7 +2777,7 @@ static int btrfs_fallocate_update_isize(struct inode *inode, if (IS_ERR(trans)) return PTR_ERR(trans); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); i_size_write(inode, end); btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0); ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 045ddce32eca..f169378e2ca6 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1515,9 +1515,13 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, if (prev_bit == 0 && bit == 1) { extent_start = offset; } else if (prev_bit == 1 && bit == 0) { - total_found += add_new_free_space(block_group, - extent_start, - offset); + u64 space_added; + + ret = add_new_free_space(block_group, extent_start, + offset, &space_added); + if (ret) + goto out; + total_found += space_added; if (total_found > CACHING_CTL_WAKE_UP) { total_found = 0; wake_up(&caching_ctl->wait); @@ -1529,8 +1533,9 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, } } if (prev_bit == 1) { - total_found += add_new_free_space(block_group, extent_start, - end); + ret = add_new_free_space(block_group, extent_start, end, NULL); + if (ret) + goto out; extent_count++; } @@ -1569,6 +1574,8 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, end = block_group->start + block_group->length; while (1) { + u64 space_added; + ret = btrfs_next_item(root, path); if (ret < 0) goto out; @@ -1583,8 +1590,11 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); ASSERT(key.objectid < end && key.objectid + key.offset <= end); - total_found += add_new_free_space(block_group, key.objectid, - key.objectid + key.offset); + ret = add_new_free_space(block_group, key.objectid, + key.objectid + key.offset, &space_added); + if (ret) + goto out; + total_found += space_added; if (total_found > CACHING_CTL_WAKE_UP) { total_found = 0; wake_up(&caching_ctl->wait); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dbbb67293e34..4e987fe6d648 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1654,8 +1654,6 @@ out_unlock: clear_bits, page_ops); start += cur_alloc_size; - if (start >= end) - return ret; } /* @@ -1664,9 +1662,11 @@ out_unlock: * space_info's bytes_may_use counter, reserved in * btrfs_check_data_free_space(). */ - extent_clear_unlock_delalloc(inode, start, end, locked_page, - clear_bits | EXTENT_CLEAR_DATA_RESV, - page_ops); + if (start < end) { + clear_bits |= EXTENT_CLEAR_DATA_RESV; + extent_clear_unlock_delalloc(inode, start, end, locked_page, + clear_bits, page_ops); + } return ret; } @@ -3482,15 +3482,21 @@ zeroit: void btrfs_add_delayed_iput(struct btrfs_inode *inode) { struct btrfs_fs_info *fs_info = inode->root->fs_info; + unsigned long flags; if (atomic_add_unless(&inode->vfs_inode.i_count, -1, 1)) return; atomic_inc(&fs_info->nr_delayed_iputs); - spin_lock(&fs_info->delayed_iput_lock); + /* + * Need to be irq safe here because we can be called from either an irq + * context (see bio.c and btrfs_put_ordered_extent()) or a non-irq + * context. + */ + spin_lock_irqsave(&fs_info->delayed_iput_lock, flags); ASSERT(list_empty(&inode->delayed_iput)); list_add_tail(&inode->delayed_iput, &fs_info->delayed_iputs); - spin_unlock(&fs_info->delayed_iput_lock); + spin_unlock_irqrestore(&fs_info->delayed_iput_lock, flags); if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags)) wake_up_process(fs_info->cleaner_kthread); } @@ -3499,37 +3505,46 @@ static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode) { list_del_init(&inode->delayed_iput); - spin_unlock(&fs_info->delayed_iput_lock); + spin_unlock_irq(&fs_info->delayed_iput_lock); iput(&inode->vfs_inode); if (atomic_dec_and_test(&fs_info->nr_delayed_iputs)) wake_up(&fs_info->delayed_iputs_wait); - spin_lock(&fs_info->delayed_iput_lock); + spin_lock_irq(&fs_info->delayed_iput_lock); } static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode) { if (!list_empty(&inode->delayed_iput)) { - spin_lock(&fs_info->delayed_iput_lock); + spin_lock_irq(&fs_info->delayed_iput_lock); if (!list_empty(&inode->delayed_iput)) run_delayed_iput_locked(fs_info, inode); - spin_unlock(&fs_info->delayed_iput_lock); + spin_unlock_irq(&fs_info->delayed_iput_lock); } } void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info) { - - spin_lock(&fs_info->delayed_iput_lock); + /* + * btrfs_put_ordered_extent() can run in irq context (see bio.c), which + * calls btrfs_add_delayed_iput() and that needs to lock + * fs_info->delayed_iput_lock. So we need to disable irqs here to + * prevent a deadlock. + */ + spin_lock_irq(&fs_info->delayed_iput_lock); while (!list_empty(&fs_info->delayed_iputs)) { struct btrfs_inode *inode; inode = list_first_entry(&fs_info->delayed_iputs, struct btrfs_inode, delayed_iput); run_delayed_iput_locked(fs_info, inode); - cond_resched_lock(&fs_info->delayed_iput_lock); + if (need_resched()) { + spin_unlock_irq(&fs_info->delayed_iput_lock); + cond_resched(); + spin_lock_irq(&fs_info->delayed_iput_lock); + } } - spin_unlock(&fs_info->delayed_iput_lock); + spin_unlock_irq(&fs_info->delayed_iput_lock); } /* @@ -3659,11 +3674,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) found_key.type = BTRFS_INODE_ITEM_KEY; found_key.offset = 0; inode = btrfs_iget(fs_info->sb, last_objectid, root); - ret = PTR_ERR_OR_ZERO(inode); - if (ret && ret != -ENOENT) - goto out; + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + inode = NULL; + if (ret != -ENOENT) + goto out; + } - if (ret == -ENOENT && root == fs_info->tree_root) { + if (!inode && root == fs_info->tree_root) { struct btrfs_root *dead_root; int is_dead_root = 0; @@ -3724,17 +3742,17 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) * deleted but wasn't. The inode number may have been reused, * but either way, we can delete the orphan item. */ - if (ret == -ENOENT || inode->i_nlink) { - if (!ret) { + if (!inode || inode->i_nlink) { + if (inode) { ret = btrfs_drop_verity_items(BTRFS_I(inode)); iput(inode); + inode = NULL; if (ret) goto out; } trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); - iput(inode); goto out; } btrfs_debug(fs_info, "auto deleting %Lu", @@ -3742,10 +3760,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) ret = btrfs_del_orphan_item(trans, root, found_key.objectid); btrfs_end_transaction(trans); - if (ret) { - iput(inode); + if (ret) goto out; - } continue; } @@ -3901,8 +3917,8 @@ static int btrfs_read_locked_inode(struct inode *inode, inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime); inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime); - inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime); - inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime); + inode_set_ctime(inode, btrfs_timespec_sec(leaf, &inode_item->ctime), + btrfs_timespec_nsec(leaf, &inode_item->ctime)); BTRFS_I(inode)->i_otime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->otime); @@ -4073,9 +4089,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, inode->i_mtime.tv_nsec); btrfs_set_token_timespec_sec(&token, &item->ctime, - inode->i_ctime.tv_sec); + inode_get_ctime(inode).tv_sec); btrfs_set_token_timespec_nsec(&token, &item->ctime, - inode->i_ctime.tv_nsec); + inode_get_ctime(inode).tv_nsec); btrfs_set_token_timespec_sec(&token, &item->otime, BTRFS_I(inode)->i_otime.tv_sec); @@ -4273,9 +4289,8 @@ err: btrfs_i_size_write(dir, dir->vfs_inode.i_size - name->len * 2); inode_inc_iversion(&inode->vfs_inode); inode_inc_iversion(&dir->vfs_inode); - inode->vfs_inode.i_ctime = current_time(&inode->vfs_inode); - dir->vfs_inode.i_mtime = inode->vfs_inode.i_ctime; - dir->vfs_inode.i_ctime = inode->vfs_inode.i_ctime; + inode_set_ctime_current(&inode->vfs_inode); + dir->vfs_inode.i_mtime = inode_set_ctime_current(&dir->vfs_inode); ret = btrfs_update_inode(trans, root, dir); out: return ret; @@ -4448,8 +4463,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, btrfs_i_size_write(dir, dir->vfs_inode.i_size - fname.disk_name.len * 2); inode_inc_iversion(&dir->vfs_inode); - dir->vfs_inode.i_mtime = current_time(&dir->vfs_inode); - dir->vfs_inode.i_ctime = dir->vfs_inode.i_mtime; + dir->vfs_inode.i_mtime = inode_set_ctime_current(&dir->vfs_inode); ret = btrfs_update_inode_fallback(trans, root, dir); if (ret) btrfs_abort_transaction(trans, ret); @@ -4847,9 +4861,6 @@ again: ret = -ENOMEM; goto out; } - ret = set_page_extent_mapped(page); - if (ret < 0) - goto out_unlock; if (!PageUptodate(page)) { ret = btrfs_read_folio(NULL, page_folio(page)); @@ -4864,6 +4875,17 @@ again: goto out_unlock; } } + + /* + * We unlock the page after the io is completed and then re-lock it + * above. release_folio() could have come in between that and cleared + * PagePrivate(), but left the page in the mapping. Set the page mapped + * here to make sure it's properly set for the subpage stuff. + */ + ret = set_page_extent_mapped(page); + if (ret < 0) + goto out_unlock; + wait_on_page_writeback(page); lock_extent(io_tree, block_start, block_end, &cached_state); @@ -5091,8 +5113,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) if (newsize != oldsize) { inode_inc_iversion(inode); if (!(mask & (ATTR_CTIME | ATTR_MTIME))) { - inode->i_mtime = current_time(inode); - inode->i_ctime = inode->i_mtime; + inode->i_mtime = inode_set_ctime_current(inode); } } @@ -5736,9 +5757,8 @@ static struct inode *new_simple_dir(struct super_block *s, inode->i_opflags &= ~IOP_XATTR; inode->i_fop = &simple_dir_operations; inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; - inode->i_mtime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); inode->i_atime = inode->i_mtime; - inode->i_ctime = inode->i_mtime; BTRFS_I(inode)->i_otime = inode->i_mtime; return inode; @@ -5849,6 +5869,74 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, } /* + * Find the highest existing sequence number in a directory and then set the + * in-memory index_cnt variable to the first free sequence number. + */ +static int btrfs_set_inode_index_count(struct btrfs_inode *inode) +{ + struct btrfs_root *root = inode->root; + struct btrfs_key key, found_key; + struct btrfs_path *path; + struct extent_buffer *leaf; + int ret; + + key.objectid = btrfs_ino(inode); + key.type = BTRFS_DIR_INDEX_KEY; + key.offset = (u64)-1; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + /* FIXME: we should be able to handle this */ + if (ret == 0) + goto out; + ret = 0; + + if (path->slots[0] == 0) { + inode->index_cnt = BTRFS_DIR_START_INDEX; + goto out; + } + + path->slots[0]--; + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + if (found_key.objectid != btrfs_ino(inode) || + found_key.type != BTRFS_DIR_INDEX_KEY) { + inode->index_cnt = BTRFS_DIR_START_INDEX; + goto out; + } + + inode->index_cnt = found_key.offset + 1; +out: + btrfs_free_path(path); + return ret; +} + +static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index) +{ + if (dir->index_cnt == (u64)-1) { + int ret; + + ret = btrfs_inode_delayed_dir_index_count(dir); + if (ret) { + ret = btrfs_set_inode_index_count(dir); + if (ret) + return ret; + } + } + + *index = dir->index_cnt; + + return 0; +} + +/* * All this infrastructure exists because dir_emit can fault, and we are holding * the tree lock when doing readdir. For now just allocate a buffer and copy * our information into that, and then dir_emit from the buffer. This is @@ -5860,10 +5948,17 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, static int btrfs_opendir(struct inode *inode, struct file *file) { struct btrfs_file_private *private; + u64 last_index; + int ret; + + ret = btrfs_get_dir_last_index(BTRFS_I(inode), &last_index); + if (ret) + return ret; private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL); if (!private) return -ENOMEM; + private->last_index = last_index; private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!private->filldir_buf) { kfree(private); @@ -5930,7 +6025,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) INIT_LIST_HEAD(&ins_list); INIT_LIST_HEAD(&del_list); - put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list); + put = btrfs_readdir_get_delayed_items(inode, private->last_index, + &ins_list, &del_list); again: key.type = BTRFS_DIR_INDEX_KEY; @@ -5948,6 +6044,8 @@ again: break; if (found_key.offset < ctx->pos) continue; + if (found_key.offset > private->last_index) + break; if (btrfs_should_delete_dir_index(&del_list, found_key.offset)) continue; di = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); @@ -6063,8 +6161,7 @@ static int btrfs_dirty_inode(struct btrfs_inode *inode) * This is a copy of file_update_time. We need this so we can return error on * ENOSPC for updating the inode in the case of file write and mmap writes. */ -static int btrfs_update_time(struct inode *inode, struct timespec64 *now, - int flags) +static int btrfs_update_time(struct inode *inode, int flags) { struct btrfs_root *root = BTRFS_I(inode)->root; bool dirty = flags & ~S_VERSION; @@ -6072,69 +6169,11 @@ static int btrfs_update_time(struct inode *inode, struct timespec64 *now, if (btrfs_root_readonly(root)) return -EROFS; - if (flags & S_VERSION) - dirty |= inode_maybe_inc_iversion(inode, dirty); - if (flags & S_CTIME) - inode->i_ctime = *now; - if (flags & S_MTIME) - inode->i_mtime = *now; - if (flags & S_ATIME) - inode->i_atime = *now; + dirty = inode_update_timestamps(inode, flags); return dirty ? btrfs_dirty_inode(BTRFS_I(inode)) : 0; } /* - * find the highest existing sequence number in a directory - * and then set the in-memory index_cnt variable to reflect - * free sequence numbers - */ -static int btrfs_set_inode_index_count(struct btrfs_inode *inode) -{ - struct btrfs_root *root = inode->root; - struct btrfs_key key, found_key; - struct btrfs_path *path; - struct extent_buffer *leaf; - int ret; - - key.objectid = btrfs_ino(inode); - key.type = BTRFS_DIR_INDEX_KEY; - key.offset = (u64)-1; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) - goto out; - /* FIXME: we should be able to handle this */ - if (ret == 0) - goto out; - ret = 0; - - if (path->slots[0] == 0) { - inode->index_cnt = BTRFS_DIR_START_INDEX; - goto out; - } - - path->slots[0]--; - - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - - if (found_key.objectid != btrfs_ino(inode) || - found_key.type != BTRFS_DIR_INDEX_KEY) { - inode->index_cnt = BTRFS_DIR_START_INDEX; - goto out; - } - - inode->index_cnt = found_key.offset + 1; -out: - btrfs_free_path(path); - return ret; -} - -/* * helper to find a free sequence number in a given directory. This current * code is very simple, later versions will do smarter things in the btree */ @@ -6378,9 +6417,8 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, goto discard; } - inode->i_mtime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); inode->i_atime = inode->i_mtime; - inode->i_ctime = inode->i_mtime; BTRFS_I(inode)->i_otime = inode->i_mtime; /* @@ -6545,12 +6583,10 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, * log replay procedure is responsible for setting them to their correct * values (the ones it had when the fsync was done). */ - if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) { - struct timespec64 now = current_time(&parent_inode->vfs_inode); + if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) + parent_inode->vfs_inode.i_mtime = + inode_set_ctime_current(&parent_inode->vfs_inode); - parent_inode->vfs_inode.i_mtime = now; - parent_inode->vfs_inode.i_ctime = now; - } ret = btrfs_update_inode(trans, root, parent_inode); if (ret) btrfs_abort_transaction(trans, ret); @@ -6690,7 +6726,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, BTRFS_I(inode)->dir_index = 0ULL; inc_nlink(inode); inode_inc_iversion(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); ihold(inode); set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); @@ -7849,8 +7885,11 @@ static void btrfs_dio_submit_io(const struct iomap_iter *iter, struct bio *bio, ret = btrfs_extract_ordered_extent(bbio, dio_data->ordered); if (ret) { - bbio->bio.bi_status = errno_to_blk_status(ret); - btrfs_dio_end_io(bbio); + btrfs_finish_ordered_extent(dio_data->ordered, NULL, + file_offset, dip->bytes, + !ret); + bio->bi_status = errno_to_blk_status(ret); + iomap_dio_bio_end_io(bio); return; } } @@ -8753,7 +8792,7 @@ static int btrfs_getattr(struct mnt_idmap *idmap, STATX_ATTR_IMMUTABLE | STATX_ATTR_NODUMP); - generic_fillattr(idmap, inode, stat); + generic_fillattr(idmap, request_mask, inode, stat); stat->dev = BTRFS_I(inode)->root->anon_dev; spin_lock(&BTRFS_I(inode)->lock); @@ -8777,7 +8816,6 @@ static int btrfs_rename_exchange(struct inode *old_dir, struct btrfs_root *dest = BTRFS_I(new_dir)->root; struct inode *new_inode = new_dentry->d_inode; struct inode *old_inode = old_dentry->d_inode; - struct timespec64 ctime = current_time(old_inode); struct btrfs_rename_ctx old_rename_ctx; struct btrfs_rename_ctx new_rename_ctx; u64 old_ino = btrfs_ino(BTRFS_I(old_inode)); @@ -8908,12 +8946,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, inode_inc_iversion(new_dir); inode_inc_iversion(old_inode); inode_inc_iversion(new_inode); - old_dir->i_mtime = ctime; - old_dir->i_ctime = ctime; - new_dir->i_mtime = ctime; - new_dir->i_ctime = ctime; - old_inode->i_ctime = ctime; - new_inode->i_ctime = ctime; + simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); if (old_dentry->d_parent != new_dentry->d_parent) { btrfs_record_unlink_dir(trans, BTRFS_I(old_dir), @@ -9177,11 +9210,7 @@ static int btrfs_rename(struct mnt_idmap *idmap, inode_inc_iversion(old_dir); inode_inc_iversion(new_dir); inode_inc_iversion(old_inode); - old_dir->i_mtime = current_time(old_dir); - old_dir->i_ctime = old_dir->i_mtime; - new_dir->i_mtime = old_dir->i_mtime; - new_dir->i_ctime = old_dir->i_mtime; - old_inode->i_ctime = old_dir->i_mtime; + simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); if (old_dentry->d_parent != new_dentry->d_parent) btrfs_record_unlink_dir(trans, BTRFS_I(old_dir), @@ -9203,7 +9232,6 @@ static int btrfs_rename(struct mnt_idmap *idmap, if (new_inode) { inode_inc_iversion(new_inode); - new_inode->i_ctime = current_time(new_inode); if (unlikely(btrfs_ino(BTRFS_I(new_inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { ret = btrfs_unlink_subvol(trans, BTRFS_I(new_dir), new_dentry); @@ -9743,7 +9771,7 @@ next: *alloc_hint = ins.objectid + ins.offset; inode_inc_iversion(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; if (!(mode & FALLOC_FL_KEEP_SIZE) && (actual_len > inode->i_size) && diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a895d105464b..a18ee7b5a166 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -384,7 +384,7 @@ update_flags: binode->flags = binode_flags; btrfs_sync_inode_flags_to_i_flags(inode); inode_inc_iversion(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); out_end_trans: diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index da1f84a0eb29..2637d6b157ff 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -4445,4 +4445,5 @@ void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans) ulist_free(entry->old_roots); kfree(entry); } + *root = RB_ROOT; } diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index f37b925d587f..0249ea52bb80 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -71,7 +71,7 @@ static void rmw_rbio_work_locked(struct work_struct *work); static void index_rbio_pages(struct btrfs_raid_bio *rbio); static int alloc_rbio_pages(struct btrfs_raid_bio *rbio); -static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check); +static int finish_parity_scrub(struct btrfs_raid_bio *rbio); static void scrub_rbio_work_locked(struct work_struct *work); static void free_raid_bio_pointers(struct btrfs_raid_bio *rbio) @@ -2404,7 +2404,7 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio) return 0; } -static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check) +static int finish_parity_scrub(struct btrfs_raid_bio *rbio) { struct btrfs_io_context *bioc = rbio->bioc; const u32 sectorsize = bioc->fs_info->sectorsize; @@ -2445,9 +2445,6 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check) */ clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); - if (!need_check) - goto writeback; - p_sector.page = alloc_page(GFP_NOFS); if (!p_sector.page) return -ENOMEM; @@ -2516,7 +2513,6 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check) q_sector.page = NULL; } -writeback: /* * time to start writing. Make bios for everything from the * higher layers (the bio_list in our rbio) and our p/q. Ignore @@ -2699,7 +2695,6 @@ static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio) static void scrub_rbio(struct btrfs_raid_bio *rbio) { - bool need_check = false; int sector_nr; int ret; @@ -2722,7 +2717,7 @@ static void scrub_rbio(struct btrfs_raid_bio *rbio) * We have every sector properly prepared. Can finish the scrub * and writeback the good content. */ - ret = finish_parity_scrub(rbio, need_check); + ret = finish_parity_scrub(rbio); wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0); for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) { int found_errors; diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index 0474bbe39da7..65d2bd6910f2 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -30,8 +30,7 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans, inode_inc_iversion(inode); if (!no_time_update) { - inode->i_mtime = current_time(inode); - inode->i_ctime = inode->i_mtime; + inode->i_mtime = inode_set_ctime_current(inode); } /* * We round up to the block size at eof when determining which diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 25a3361caedc..46c3c1d57266 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1916,7 +1916,39 @@ again: err = PTR_ERR(root); break; } - ASSERT(root->reloc_root == reloc_root); + + if (unlikely(root->reloc_root != reloc_root)) { + if (root->reloc_root) { + btrfs_err(fs_info, +"reloc tree mismatch, root %lld has reloc root key (%lld %u %llu) gen %llu, expect reloc root key (%lld %u %llu) gen %llu", + root->root_key.objectid, + root->reloc_root->root_key.objectid, + root->reloc_root->root_key.type, + root->reloc_root->root_key.offset, + btrfs_root_generation( + &root->reloc_root->root_item), + reloc_root->root_key.objectid, + reloc_root->root_key.type, + reloc_root->root_key.offset, + btrfs_root_generation( + &reloc_root->root_item)); + } else { + btrfs_err(fs_info, +"reloc tree mismatch, root %lld has no reloc root, expect reloc root key (%lld %u %llu) gen %llu", + root->root_key.objectid, + reloc_root->root_key.objectid, + reloc_root->root_key.type, + reloc_root->root_key.offset, + btrfs_root_generation( + &reloc_root->root_item)); + } + list_add(&reloc_root->root_list, &reloc_roots); + btrfs_put_root(root); + btrfs_abort_transaction(trans, -EUCLEAN); + if (!err) + err = -EUCLEAN; + break; + } /* * set reference count to 1, so btrfs_recover_relocation @@ -1989,7 +2021,7 @@ again: root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset, false); if (btrfs_root_refs(&reloc_root->root_item) > 0) { - if (IS_ERR(root)) { + if (WARN_ON(IS_ERR(root))) { /* * For recovery we read the fs roots on mount, * and if we didn't find the root then we marked @@ -1998,17 +2030,14 @@ again: * memory. However there's no reason we can't * handle the error properly here just in case. */ - ASSERT(0); ret = PTR_ERR(root); goto out; } - if (root->reloc_root != reloc_root) { + if (WARN_ON(root->reloc_root != reloc_root)) { /* - * This is actually impossible without something - * going really wrong (like weird race condition - * or cosmic rays). + * This can happen if on-disk metadata has some + * corruption, e.g. bad reloc tree key offset. */ - ASSERT(0); ret = -EINVAL; goto out; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 4cae41bd6de0..7289f5bff397 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -605,7 +605,8 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr btrfs_stack_header_bytenr(header), logical); return; } - if (memcmp(header->fsid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE) != 0) { + if (memcmp(header->fsid, fs_info->fs_devices->metadata_uuid, + BTRFS_FSID_SIZE) != 0) { bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree); bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f1dd172d8d5b..8eda51b095c9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2144,7 +2144,7 @@ static struct file_system_type btrfs_fs_type = { .name = "btrfs", .mount = btrfs_mount, .kill_sb = btrfs_kill_super, - .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA, + .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_MGTIME, }; static struct file_system_type btrfs_root_fs_type = { @@ -2152,7 +2152,8 @@ static struct file_system_type btrfs_root_fs_type = { .name = "btrfs", .mount = btrfs_mount_root, .kill_sb = btrfs_kill_super, - .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_ALLOW_IDMAP, + .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | + FS_ALLOW_IDMAP | FS_MGTIME, }; MODULE_ALIAS_FS("btrfs"); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index cf306351b148..a10e38c2609f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -826,8 +826,13 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root) trans = start_transaction(root, 0, TRANS_ATTACH, BTRFS_RESERVE_NO_FLUSH, true); - if (trans == ERR_PTR(-ENOENT)) - btrfs_wait_for_commit(root->fs_info, 0); + if (trans == ERR_PTR(-ENOENT)) { + int ret; + + ret = btrfs_wait_for_commit(root->fs_info, 0); + if (ret) + return ERR_PTR(ret); + } return trans; } @@ -931,6 +936,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid) } wait_for_commit(cur_trans, TRANS_STATE_COMPLETED); + ret = cur_trans->aborted; btrfs_put_transaction(cur_trans); out: return ret; @@ -1831,8 +1837,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size + fname.disk_name.len * 2); - parent_inode->i_mtime = current_time(parent_inode); - parent_inode->i_ctime = parent_inode->i_mtime; + parent_inode->i_mtime = inode_set_ctime_current(parent_inode); ret = btrfs_update_inode_fallback(trans, parent_root, BTRFS_I(parent_inode)); if (ret) { btrfs_abort_transaction(trans, ret); diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 038dfa8f1788..ab08a0b01311 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -446,6 +446,20 @@ static int check_root_key(struct extent_buffer *leaf, struct btrfs_key *key, btrfs_item_key_to_cpu(leaf, &item_key, slot); is_root_item = (item_key.type == BTRFS_ROOT_ITEM_KEY); + /* + * Bad rootid for reloc trees. + * + * Reloc trees are only for subvolume trees, other trees only need + * to be COWed to be relocated. + */ + if (unlikely(is_root_item && key->objectid == BTRFS_TREE_RELOC_OBJECTID && + !is_fstree(key->offset))) { + generic_err(leaf, slot, + "invalid reloc tree for root %lld, root id is not a subvolume tree", + key->offset); + return -EUCLEAN; + } + /* No such tree id */ if (unlikely(key->objectid == 0)) { if (is_root_item) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 365a1cc0a3c3..ffcff7188170 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4148,9 +4148,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, inode->i_mtime.tv_nsec); btrfs_set_token_timespec_sec(&token, &item->ctime, - inode->i_ctime.tv_sec); + inode_get_ctime(inode).tv_sec); btrfs_set_token_timespec_nsec(&token, &item->ctime, - inode->i_ctime.tv_nsec); + inode_get_ctime(inode).tv_nsec); /* * We do not need to set the nbytes field, in fact during a fast fsync diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 73f9ea7672db..f6718999d183 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1917,15 +1917,13 @@ out: static void update_dev_time(const char *device_path) { struct path path; - struct timespec64 now; int ret; ret = kern_path(device_path, LOOKUP_FOLLOW, &path); if (ret) return; - now = current_time(d_inode(path.dentry)); - inode_update_time(d_inode(path.dentry), &now, S_MTIME | S_CTIME | S_VERSION); + inode_update_time(d_inode(path.dentry), S_MTIME | S_CTIME | S_VERSION); path_put(&path); } @@ -4078,14 +4076,6 @@ static int alloc_profile_is_valid(u64 flags, int extended) return has_single_bit_set(flags); } -static inline int balance_need_close(struct btrfs_fs_info *fs_info) -{ - /* cancel requested || normal exit path */ - return atomic_read(&fs_info->balance_cancel_req) || - (atomic_read(&fs_info->balance_pause_req) == 0 && - atomic_read(&fs_info->balance_cancel_req) == 0); -} - /* * Validate target profile against allowed profiles and return true if it's OK. * Otherwise print the error message and return false. @@ -4275,6 +4265,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, u64 num_devices; unsigned seq; bool reducing_redundancy; + bool paused = false; int i; if (btrfs_fs_closing(fs_info) || @@ -4405,6 +4396,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req)) { btrfs_info(fs_info, "balance: paused"); btrfs_exclop_balance(fs_info, BTRFS_EXCLOP_BALANCE_PAUSED); + paused = true; } /* * Balance can be canceled by: @@ -4433,8 +4425,8 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, btrfs_update_ioctl_balance_args(fs_info, bargs); } - if ((ret && ret != -ECANCELED && ret != -ENOSPC) || - balance_need_close(fs_info)) { + /* We didn't pause, we can clean everything up. */ + if (!paused) { reset_balance_state(fs_info); btrfs_exclop_finish(fs_info); } @@ -4644,8 +4636,7 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info) } } - BUG_ON(fs_info->balance_ctl || - test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)); + ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)); atomic_dec(&fs_info->balance_cancel_req); mutex_unlock(&fs_info->balance_mutex); return 0; @@ -6404,7 +6395,8 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, (op == BTRFS_MAP_READ || !dev_replace_is_ongoing || !dev_replace->tgtdev)) { set_io_stripe(smap, map, stripe_index, stripe_offset, stripe_nr); - *mirror_num_ret = mirror_num; + if (mirror_num_ret) + *mirror_num_ret = mirror_num; *bioc_ret = NULL; ret = 0; goto out; diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index fc4b20c2688a..96828a13dd43 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -264,7 +264,7 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name, goto out; inode_inc_iversion(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); if (ret) btrfs_abort_transaction(trans, ret); @@ -407,7 +407,7 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler, ret = btrfs_set_prop(trans, inode, name, value, size, flags); if (!ret) { inode_inc_iversion(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); if (ret) btrfs_abort_transaction(trans, ret); diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 85b8b332add9..72b90bc19a19 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -805,6 +805,9 @@ int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info) return -EINVAL; } + btrfs_clear_and_info(info, DISCARD_ASYNC, + "zoned: async discard ignored and disabled for zoned mode"); + return 0; } diff --git a/fs/buffer.c b/fs/buffer.c index bd091329026c..084a6ade108a 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -49,6 +49,7 @@ #include <trace/events/block.h> #include <linux/fscrypt.h> #include <linux/fsverity.h> +#include <linux/sched/isolation.h> #include "internal.h" @@ -1225,19 +1226,14 @@ EXPORT_SYMBOL(mark_buffer_dirty); void mark_buffer_write_io_error(struct buffer_head *bh) { - struct super_block *sb; - set_buffer_write_io_error(bh); /* FIXME: do we need to set this in both places? */ if (bh->b_folio && bh->b_folio->mapping) mapping_set_error(bh->b_folio->mapping, -EIO); - if (bh->b_assoc_map) + if (bh->b_assoc_map) { mapping_set_error(bh->b_assoc_map, -EIO); - rcu_read_lock(); - sb = READ_ONCE(bh->b_bdev->bd_super); - if (sb) - errseq_set(&sb->s_wb_err, -EIO); - rcu_read_unlock(); + errseq_set(&bh->b_assoc_map->host->i_sb->s_wb_err, -EIO); + } } EXPORT_SYMBOL(mark_buffer_write_io_error); @@ -1352,7 +1348,7 @@ static void bh_lru_install(struct buffer_head *bh) * failing page migration. * Skip putting upcoming bh into bh_lru until migration is done. */ - if (lru_cache_disabled()) { + if (lru_cache_disabled() || cpu_is_isolated(smp_processor_id())) { bh_lru_unlock(); return; } @@ -1382,6 +1378,10 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size) check_irqs_on(); bh_lru_lock(); + if (cpu_is_isolated(smp_processor_id())) { + bh_lru_unlock(); + return NULL; + } for (i = 0; i < BH_LRU_SIZE; i++) { struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]); diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c index 175a25fcade8..009d23cd435b 100644 --- a/fs/cachefiles/io.c +++ b/fs/cachefiles/io.c @@ -259,9 +259,7 @@ static void cachefiles_write_complete(struct kiocb *iocb, long ret) _enter("%ld", ret); - /* Tell lockdep we inherited freeze protection from submission thread */ - __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); - __sb_end_write(inode->i_sb, SB_FREEZE_WRITE); + kiocb_end_write(iocb); if (ret < 0) trace_cachefiles_io_error(object, inode, ret, @@ -286,7 +284,6 @@ int __cachefiles_write(struct cachefiles_object *object, { struct cachefiles_cache *cache; struct cachefiles_kiocb *ki; - struct inode *inode; unsigned int old_nofs; ssize_t ret; size_t len = iov_iter_count(iter); @@ -322,19 +319,12 @@ int __cachefiles_write(struct cachefiles_object *object, ki->iocb.ki_complete = cachefiles_write_complete; atomic_long_add(ki->b_writing, &cache->b_writing); - /* Open-code file_start_write here to grab freeze protection, which - * will be released by another thread in aio_complete_rw(). Fool - * lockdep by telling it the lock got released so that it doesn't - * complain about the held lock when we return to userspace. - */ - inode = file_inode(file); - __sb_start_write(inode->i_sb, SB_FREEZE_WRITE); - __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE); + kiocb_start_write(&ki->iocb); get_file(ki->iocb.ki_filp); cachefiles_grab_object(object, cachefiles_obj_get_ioreq); - trace_cachefiles_write(object, inode, ki->iocb.ki_pos, len); + trace_cachefiles_write(object, file_inode(file), ki->iocb.ki_pos, len); old_nofs = memalloc_nofs_save(); ret = cachefiles_inject_write_error(); if (ret == 0) diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 6945a938d396..c91b293267d7 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -93,7 +93,7 @@ int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, char *value = NULL; struct iattr newattrs; struct inode *inode = d_inode(dentry); - struct timespec64 old_ctime = inode->i_ctime; + struct timespec64 old_ctime = inode_get_ctime(inode); umode_t new_mode = inode->i_mode, old_mode = inode->i_mode; if (ceph_snap(inode) != CEPH_NOSNAP) { diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index e2bb0d0072da..09cd6d334604 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1400,7 +1400,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap, arg->mtime = inode->i_mtime; arg->atime = inode->i_atime; - arg->ctime = inode->i_ctime; + arg->ctime = inode_get_ctime(inode); arg->btime = ci->i_btime; arg->change_attr = inode_peek_iversion_raw(inode); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 4a2b39d9a61a..bdcffb04513f 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -2019,9 +2019,10 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) } } +WRAP_DIR_ITER(ceph_readdir) // FIXME! const struct file_operations ceph_dir_fops = { .read = ceph_read_dir, - .iterate = ceph_readdir, + .iterate_shared = shared_ceph_readdir, .llseek = ceph_dir_llseek, .open = ceph_open, .release = ceph_release, @@ -2033,7 +2034,7 @@ const struct file_operations ceph_dir_fops = { }; const struct file_operations ceph_snapdir_fops = { - .iterate = ceph_readdir, + .iterate_shared = shared_ceph_readdir, .llseek = ceph_dir_llseek, .open = ceph_open, .release = ceph_release, diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 8e5f41d45283..fd05d68e2990 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -100,7 +100,7 @@ struct inode *ceph_get_snapdir(struct inode *parent) inode->i_uid = parent->i_uid; inode->i_gid = parent->i_gid; inode->i_mtime = parent->i_mtime; - inode->i_ctime = parent->i_ctime; + inode_set_ctime_to_ts(inode, inode_get_ctime(parent)); inode->i_atime = parent->i_atime; ci->i_rbytes = 0; ci->i_btime = ceph_inode(parent)->i_btime; @@ -688,6 +688,7 @@ void ceph_fill_file_time(struct inode *inode, int issued, struct timespec64 *mtime, struct timespec64 *atime) { struct ceph_inode_info *ci = ceph_inode(inode); + struct timespec64 ictime = inode_get_ctime(inode); int warn = 0; if (issued & (CEPH_CAP_FILE_EXCL| @@ -696,11 +697,11 @@ void ceph_fill_file_time(struct inode *inode, int issued, CEPH_CAP_AUTH_EXCL| CEPH_CAP_XATTR_EXCL)) { if (ci->i_version == 0 || - timespec64_compare(ctime, &inode->i_ctime) > 0) { + timespec64_compare(ctime, &ictime) > 0) { dout("ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n", - inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, + ictime.tv_sec, ictime.tv_nsec, ctime->tv_sec, ctime->tv_nsec); - inode->i_ctime = *ctime; + inode_set_ctime_to_ts(inode, *ctime); } if (ci->i_version == 0 || ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { @@ -738,7 +739,7 @@ void ceph_fill_file_time(struct inode *inode, int issued, } else { /* we have no write|excl caps; whatever the MDS says is true */ if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { - inode->i_ctime = *ctime; + inode_set_ctime_to_ts(inode, *ctime); inode->i_mtime = *mtime; inode->i_atime = *atime; ci->i_time_warp_seq = time_warp_seq; @@ -2166,7 +2167,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) bool only = (ia_valid & (ATTR_SIZE|ATTR_MTIME|ATTR_ATIME| ATTR_MODE|ATTR_UID|ATTR_GID)) == 0; dout("setattr %p ctime %lld.%ld -> %lld.%ld (%s)\n", inode, - inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, + inode_get_ctime(inode).tv_sec, + inode_get_ctime(inode).tv_nsec, attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec, only ? "ctime only" : "ignored"); if (only) { @@ -2191,7 +2193,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) if (dirtied) { inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied, &prealloc_cf); - inode->i_ctime = attr->ia_ctime; + inode_set_ctime_to_ts(inode, attr->ia_ctime); inode_inc_iversion_raw(inode); } @@ -2465,7 +2467,7 @@ int ceph_getattr(struct mnt_idmap *idmap, const struct path *path, return err; } - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); stat->ino = ceph_present_inode(inode); /* diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 66048a86c480..5fb367b1d4b0 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4764,7 +4764,7 @@ static void delayed_work(struct work_struct *work) dout("mdsc delayed_work\n"); - if (mdsc->stopping) + if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED) return; mutex_lock(&mdsc->mutex); @@ -4943,7 +4943,7 @@ void send_flush_mdlog(struct ceph_mds_session *s) void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) { dout("pre_umount\n"); - mdsc->stopping = 1; + mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN; ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true); ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 724307ff89cd..86d2965e68a1 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -380,6 +380,11 @@ struct cap_wait { int want; }; +enum { + CEPH_MDSC_STOPPING_BEGIN = 1, + CEPH_MDSC_STOPPING_FLUSHED = 2, +}; + /* * mds client state */ diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c index cce78d769f55..6d3584f16f9a 100644 --- a/fs/ceph/metric.c +++ b/fs/ceph/metric.c @@ -216,7 +216,7 @@ static void metric_delayed_work(struct work_struct *work) struct ceph_mds_client *mdsc = container_of(m, struct ceph_mds_client, metric); - if (mdsc->stopping) + if (mdsc->stopping || disable_send_metrics) return; if (!m->session || !check_session_state(m->session)) { diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 343d738448dc..c9920ade15f5 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -660,7 +660,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, capsnap->size = i_size_read(inode); capsnap->mtime = inode->i_mtime; capsnap->atime = inode->i_atime; - capsnap->ctime = inode->i_ctime; + capsnap->ctime = inode_get_ctime(inode); capsnap->btime = ci->i_btime; capsnap->change_attr = inode_peek_iversion_raw(inode); capsnap->time_warp_seq = ci->i_time_warp_seq; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 3fc48b43cab0..a5f52013314d 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1374,6 +1374,16 @@ static void ceph_kill_sb(struct super_block *s) ceph_mdsc_pre_umount(fsc->mdsc); flush_fs_workqueues(fsc); + /* + * Though the kill_anon_super() will finally trigger the + * sync_filesystem() anyway, we still need to do it here + * and then bump the stage of shutdown to stop the work + * queue as earlier as possible. + */ + sync_filesystem(s); + + fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED; + kill_anon_super(s); fsc->client->extra_mon_dispatch = NULL; diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 806183959c47..1cbd84cc82a8 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -1238,7 +1238,7 @@ retry: dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL, &prealloc_cf); ci->i_xattrs.dirty = true; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); } spin_unlock(&ci->i_ceph_lock); diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c index 903ca8fa4b9b..ae023853a98f 100644 --- a/fs/coda/coda_linux.c +++ b/fs/coda/coda_linux.c @@ -127,7 +127,8 @@ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr) if (attr->va_mtime.tv_sec != -1) inode->i_mtime = coda_to_timespec64(attr->va_mtime); if (attr->va_ctime.tv_sec != -1) - inode->i_ctime = coda_to_timespec64(attr->va_ctime); + inode_set_ctime_to_ts(inode, + coda_to_timespec64(attr->va_ctime)); } diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 8450b1bd354b..cb512b10473b 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -111,7 +111,7 @@ static inline void coda_dir_update_mtime(struct inode *dir) /* optimistically we can also act as if our nose bleeds. The * granularity of the mtime is coarse anyways so we might actually be * right most of the time. Note: we only do this for directories. */ - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); #endif } @@ -429,21 +429,14 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx) cfi = coda_ftoc(coda_file); host_file = cfi->cfi_container; - if (host_file->f_op->iterate || host_file->f_op->iterate_shared) { + if (host_file->f_op->iterate_shared) { struct inode *host_inode = file_inode(host_file); ret = -ENOENT; if (!IS_DEADDIR(host_inode)) { - if (host_file->f_op->iterate_shared) { - inode_lock_shared(host_inode); - ret = host_file->f_op->iterate_shared(host_file, ctx); - file_accessed(host_file); - inode_unlock_shared(host_inode); - } else { - inode_lock(host_inode); - ret = host_file->f_op->iterate(host_file, ctx); - file_accessed(host_file); - inode_unlock(host_inode); - } + inode_lock_shared(host_inode); + ret = host_file->f_op->iterate_shared(host_file, ctx); + file_accessed(host_file); + inode_unlock_shared(host_inode); } return ret; } @@ -585,10 +578,11 @@ const struct inode_operations coda_dir_inode_operations = { .setattr = coda_setattr, }; +WRAP_DIR_ITER(coda_readdir) // FIXME! const struct file_operations coda_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = coda_readdir, + .iterate_shared = shared_coda_readdir, .open = coda_open, .release = coda_release, .fsync = coda_fsync, diff --git a/fs/coda/file.c b/fs/coda/file.c index 12b26bd13564..42346618b4ed 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -84,7 +84,7 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to) ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos, 0); coda_inode->i_size = file_inode(host_file)->i_size; coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9; - coda_inode->i_mtime = coda_inode->i_ctime = current_time(coda_inode); + coda_inode->i_mtime = inode_set_ctime_current(coda_inode); inode_unlock(coda_inode); file_end_write(host_file); diff --git a/fs/coda/inode.c b/fs/coda/inode.c index d661e6cf17ac..0c7c2528791e 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -256,7 +256,8 @@ int coda_getattr(struct mnt_idmap *idmap, const struct path *path, { int err = coda_revalidate_inode(d_inode(path->dentry)); if (!err) - generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat); + generic_fillattr(&nop_mnt_idmap, request_mask, + d_inode(path->dentry), stat); return err; } @@ -269,7 +270,7 @@ int coda_setattr(struct mnt_idmap *idmap, struct dentry *de, memset(&vattr, 0, sizeof(vattr)); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); coda_iattr_to_vattr(iattr, &vattr); vattr.va_type = C_VNON; /* cannot set type */ diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 1c15edbe70ff..fbdcb3582926 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -88,8 +88,7 @@ int configfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, static inline void set_default_inode_attr(struct inode * inode, umode_t mode) { inode->i_mode = mode; - inode->i_atime = inode->i_mtime = - inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); } static inline void set_inode_attr(struct inode * inode, struct iattr * iattr) @@ -99,7 +98,7 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr) inode->i_gid = iattr->ia_gid; inode->i_atime = iattr->ia_atime; inode->i_mtime = iattr->ia_mtime; - inode->i_ctime = iattr->ia_ctime; + inode_set_ctime_to_ts(inode, iattr->ia_ctime); } struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent *sd, @@ -172,7 +171,7 @@ struct inode *configfs_create(struct dentry *dentry, umode_t mode) return ERR_PTR(-ENOMEM); p_inode = d_inode(dentry->d_parent); - p_inode->i_mtime = p_inode->i_ctime = current_time(p_inode); + p_inode->i_mtime = inode_set_ctime_current(p_inode); configfs_set_inode_lock_class(sd, inode); return inode; } diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 27c6597aa1be..5ee7d7bbb361 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -133,7 +133,8 @@ static struct inode *get_cramfs_inode(struct super_block *sb, } /* Struct copy intentional */ - inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime; + inode->i_mtime = inode->i_atime = inode_set_ctime_to_ts(inode, + zerotime); /* inode->i_nlink is left 1 - arguably wrong for directories, but it's the best we can do without reading the directory contents. 1 yields the right result in GNU find, even @@ -485,12 +486,16 @@ static void cramfs_kill_sb(struct super_block *sb) { struct cramfs_sb_info *sbi = CRAMFS_SB(sb); + generic_shutdown_super(sb); + if (IS_ENABLED(CONFIG_CRAMFS_MTD) && sb->s_mtd) { if (sbi && sbi->mtd_point_size) mtd_unpoint(sb->s_mtd, 0, sbi->mtd_point_size); - kill_mtd_super(sb); + put_mtd_device(sb->s_mtd); + sb->s_mtd = NULL; } else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) { - kill_block_super(sb); + sync_blockdev(sb->s_bdev); + blkdev_put(sb->s_bdev, sb); } kfree(sbi); } diff --git a/fs/dcache.c b/fs/dcache.c index 52e6d5fdab6b..25ac74d30bff 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1664,7 +1664,7 @@ static enum d_walk_ret umount_check(void *_data, struct dentry *dentry) if (dentry == _data && dentry->d_lockref.count == 1) return D_WALK_CONTINUE; - printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%pd} " + WARN(1, "BUG: Dentry %p{i=%lx,n=%pd} " " still in use (%d) [unmount of %s %s]\n", dentry, dentry->d_inode ? @@ -1673,7 +1673,6 @@ static enum d_walk_ret umount_check(void *_data, struct dentry *dentry) dentry->d_lockref.count, dentry->d_sb->s_type->name, dentry->d_sb->s_id); - WARN_ON(1); return D_WALK_CONTINUE; } @@ -3247,8 +3246,6 @@ void d_genocide(struct dentry *parent) d_walk(parent, parent, d_genocide_kill); } -EXPORT_SYMBOL(d_genocide); - void d_tmpfile(struct file *file, struct inode *inode) { struct dentry *dentry = file->f_path.dentry; diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 3f81f73c241a..83e57e9f9fa0 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -72,8 +72,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb) struct inode *inode = new_inode(sb); if (inode) { inode->i_ino = get_next_ino(); - inode->i_atime = inode->i_mtime = - inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); } return inode; } diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index fe3db0eda8e4..299c295a27a0 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -338,7 +338,7 @@ static int mknod_ptmx(struct super_block *sb) } inode->i_ino = 2; - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); mode = S_IFCHR|opts->ptmxmode; init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2)); @@ -451,7 +451,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent) if (!inode) goto fail; inode->i_ino = 1; - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; @@ -534,12 +534,12 @@ void devpts_kill_index(struct pts_fs_info *fsi, int idx) /** * devpts_pty_new -- create a new inode in /dev/pts/ - * @ptmx_inode: inode of the master - * @device: major+minor of the node to be created + * @fsi: Filesystem info for this instance. * @index: used as a name of the node * @priv: what's given back by devpts_get_priv * - * The created inode is returned. Remove it from /dev/pts/ by devpts_pty_kill. + * The dentry for the created inode is returned. + * Remove it from /dev/pts/ with devpts_pty_kill(). */ struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv) { @@ -560,7 +560,7 @@ struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv) inode->i_ino = index + 3; inode->i_uid = opts->setuid ? opts->uid : current_fsuid(); inode->i_gid = opts->setgid ? opts->gid : current_fsgid(); - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); init_special_inode(inode, S_IFCHR|opts->mode, MKDEV(UNIX98_PTY_SLAVE_MAJOR, index)); sprintf(s, "%d", index); @@ -580,7 +580,7 @@ struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv) /** * devpts_get_priv -- get private data for a slave - * @pts_inode: inode of the slave + * @dentry: dentry of the slave * * Returns whatever was passed as priv in devpts_pty_new for a given inode. */ @@ -593,7 +593,7 @@ void *devpts_get_priv(struct dentry *dentry) /** * devpts_pty_kill -- remove inode form /dev/pts/ - * @inode: inode of the slave to be removed + * @dentry: dentry of the slave to be removed * * This is an inverse operation of devpts_pty_new. */ diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index c16f0d660cb7..03bd55069d86 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -441,10 +441,10 @@ int ecryptfs_encrypt_page(struct page *page) } lower_offset = lower_offset_for_page(crypt_stat, page); - enc_extent_virt = kmap(enc_extent_page); + enc_extent_virt = kmap_local_page(enc_extent_page); rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, lower_offset, PAGE_SIZE); - kunmap(enc_extent_page); + kunmap_local(enc_extent_virt); if (rc < 0) { ecryptfs_printk(KERN_ERR, "Error attempting to write lower page; rc = [%d]\n", @@ -490,10 +490,10 @@ int ecryptfs_decrypt_page(struct page *page) BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)); lower_offset = lower_offset_for_page(crypt_stat, page); - page_virt = kmap(page); + page_virt = kmap_local_page(page); rc = ecryptfs_read_lower(page_virt, lower_offset, PAGE_SIZE, ecryptfs_inode); - kunmap(page); + kunmap_local(page_virt); if (rc < 0) { ecryptfs_printk(KERN_ERR, "Error attempting to read lower page; rc = [%d]\n", diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 83274915ba6d..992d9c7e64ae 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -148,7 +148,7 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry, } fsstack_copy_attr_times(dir, lower_dir); set_nlink(inode, ecryptfs_inode_to_lower(inode)->i_nlink); - inode->i_ctime = dir->i_ctime; + inode_set_ctime_to_ts(inode, inode_get_ctime(dir)); out_unlock: dput(lower_dentry); inode_unlock(lower_dir); @@ -982,7 +982,7 @@ static int ecryptfs_getattr_link(struct mnt_idmap *idmap, mount_crypt_stat = &ecryptfs_superblock_to_private( dentry->d_sb)->mount_crypt_stat; - generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat); + generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat); if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) { char *target; size_t targetsiz; @@ -1011,7 +1011,8 @@ static int ecryptfs_getattr(struct mnt_idmap *idmap, if (!rc) { fsstack_copy_attr_all(d_inode(dentry), ecryptfs_inode_to_lower(d_inode(dentry))); - generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat); + generic_fillattr(&nop_mnt_idmap, request_mask, + d_inode(dentry), stat); stat->blocks = lower_stat.blocks; } return rc; diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 373c3e5747e6..e2483acc4366 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -125,7 +125,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, /* This is a header extent */ char *page_virt; - page_virt = kmap_atomic(page); + page_virt = kmap_local_page(page); memset(page_virt, 0, PAGE_SIZE); /* TODO: Support more than one header extent */ if (view_extent_num == 0) { @@ -138,7 +138,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, crypt_stat, &written); } - kunmap_atomic(page_virt); + kunmap_local(page_virt); flush_dcache_page(page); if (rc) { printk(KERN_ERR "%s: Error reading xattr " @@ -255,7 +255,6 @@ out: * @mapping: The eCryptfs object * @pos: The file offset at which to start writing * @len: Length of the write - * @flags: Various flags * @pagep: Pointer to return the page * @fsdata: Pointer to return fs data (unused) * diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 60bdcaddcbe5..3458f153a588 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c @@ -64,11 +64,11 @@ int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode, offset = ((((loff_t)page_for_lower->index) << PAGE_SHIFT) + offset_in_page); - virt = kmap(page_for_lower); + virt = kmap_local_page(page_for_lower); rc = ecryptfs_write_lower(ecryptfs_inode, virt, offset, size); if (rc > 0) rc = 0; - kunmap(page_for_lower); + kunmap_local(virt); return rc; } @@ -140,7 +140,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset, ecryptfs_page_idx, rc); goto out; } - ecryptfs_page_virt = kmap_atomic(ecryptfs_page); + ecryptfs_page_virt = kmap_local_page(ecryptfs_page); /* * pos: where we're now writing, offset: where the request was @@ -163,7 +163,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset, (data + data_offset), num_bytes); data_offset += num_bytes; } - kunmap_atomic(ecryptfs_page_virt); + kunmap_local(ecryptfs_page_virt); flush_dcache_page(ecryptfs_page); SetPageUptodate(ecryptfs_page); unlock_page(ecryptfs_page); @@ -253,11 +253,11 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, int rc; offset = ((((loff_t)page_index) << PAGE_SHIFT) + offset_in_page); - virt = kmap(page_for_ecryptfs); + virt = kmap_local_page(page_for_ecryptfs); rc = ecryptfs_read_lower(virt, offset, size, ecryptfs_inode); if (rc > 0) rc = 0; - kunmap(page_for_ecryptfs); + kunmap_local(virt); flush_dcache_page(page_for_ecryptfs); return rc; } diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c index d57ee15874f9..59b52718a3a2 100644 --- a/fs/efivarfs/file.c +++ b/fs/efivarfs/file.c @@ -51,7 +51,7 @@ static ssize_t efivarfs_file_write(struct file *file, } else { inode_lock(inode); i_size_write(inode, datasize + sizeof(attributes)); - inode->i_mtime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); inode_unlock(inode); } diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c index b973a2c03dde..db9231f0e77b 100644 --- a/fs/efivarfs/inode.c +++ b/fs/efivarfs/inode.c @@ -25,7 +25,7 @@ struct inode *efivarfs_get_inode(struct super_block *sb, if (inode) { inode->i_ino = get_next_ino(); inode->i_mode = mode; - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); inode->i_flags = is_removable ? 0 : S_IMMUTABLE; switch (mode & S_IFMT) { case S_IFREG: diff --git a/fs/efs/inode.c b/fs/efs/inode.c index 3ba94bb005a6..3789d22ba501 100644 --- a/fs/efs/inode.c +++ b/fs/efs/inode.c @@ -105,8 +105,8 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino) inode->i_size = be32_to_cpu(efs_inode->di_size); inode->i_atime.tv_sec = be32_to_cpu(efs_inode->di_atime); inode->i_mtime.tv_sec = be32_to_cpu(efs_inode->di_mtime); - inode->i_ctime.tv_sec = be32_to_cpu(efs_inode->di_ctime); - inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0; + inode_set_ctime(inode, be32_to_cpu(efs_inode->di_ctime), 0); + inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = 0; /* this is the number of blocks in the file */ if (inode->i_size == 0) { diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 2a29943fa5cc..cfad1eac7fd9 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -148,7 +148,7 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx, *maptype = 0; return inpage; } - kunmap_atomic(inpage); + kunmap_local(inpage); might_sleep(); src = erofs_vm_map_ram(rq->in, ctx->inpages); if (!src) @@ -162,7 +162,7 @@ docopy: src = erofs_get_pcpubuf(ctx->inpages); if (!src) { DBG_BUGON(1); - kunmap_atomic(inpage); + kunmap_local(inpage); return ERR_PTR(-EFAULT); } @@ -173,9 +173,9 @@ docopy: min_t(unsigned int, total, PAGE_SIZE - *inputmargin); if (!inpage) - inpage = kmap_atomic(*in); + inpage = kmap_local_page(*in); memcpy(tmp, inpage + *inputmargin, page_copycnt); - kunmap_atomic(inpage); + kunmap_local(inpage); inpage = NULL; tmp += page_copycnt; total -= page_copycnt; @@ -214,7 +214,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, int ret, maptype; DBG_BUGON(*rq->in == NULL); - headpage = kmap_atomic(*rq->in); + headpage = kmap_local_page(*rq->in); /* LZ4 decompression inplace is only safe if zero_padding is enabled */ if (erofs_sb_has_zero_padding(EROFS_SB(rq->sb))) { @@ -223,7 +223,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, min_t(unsigned int, rq->inputsize, rq->sb->s_blocksize - rq->pageofs_in)); if (ret) { - kunmap_atomic(headpage); + kunmap_local(headpage); return ret; } may_inplace = !((rq->pageofs_in + rq->inputsize) & @@ -261,7 +261,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, } if (maptype == 0) { - kunmap_atomic(headpage); + kunmap_local(headpage); } else if (maptype == 1) { vm_unmap_ram(src, ctx->inpages); } else if (maptype == 2) { @@ -289,7 +289,7 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, /* one optimized fast path only for non bigpcluster cases yet */ if (ctx.inpages == 1 && ctx.outpages == 1 && !rq->inplace_io) { DBG_BUGON(!*rq->out); - dst = kmap_atomic(*rq->out); + dst = kmap_local_page(*rq->out); dst_maptype = 0; goto dstmap_out; } @@ -311,7 +311,7 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, dstmap_out: ret = z_erofs_lz4_decompress_mem(&ctx, dst + rq->pageofs_out); if (!dst_maptype) - kunmap_atomic(dst); + kunmap_local(dst); else if (dst_maptype == 2) vm_unmap_ram(dst, ctx.outpages); return ret; @@ -328,7 +328,7 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq, const unsigned int lefthalf = rq->outputsize - righthalf; const unsigned int interlaced_offset = rq->alg == Z_EROFS_COMPRESSION_SHIFTED ? 0 : rq->pageofs_out; - unsigned char *src, *dst; + u8 *src; if (outpages > 2 && rq->alg == Z_EROFS_COMPRESSION_SHIFTED) { DBG_BUGON(1); @@ -341,22 +341,19 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq, } src = kmap_local_page(rq->in[inpages - 1]) + rq->pageofs_in; - if (rq->out[0]) { - dst = kmap_local_page(rq->out[0]); - memcpy(dst + rq->pageofs_out, src + interlaced_offset, - righthalf); - kunmap_local(dst); - } + if (rq->out[0]) + memcpy_to_page(rq->out[0], rq->pageofs_out, + src + interlaced_offset, righthalf); if (outpages > inpages) { DBG_BUGON(!rq->out[outpages - 1]); if (rq->out[outpages - 1] != rq->in[inpages - 1]) { - dst = kmap_local_page(rq->out[outpages - 1]); - memcpy(dst, interlaced_offset ? src : - (src + righthalf), lefthalf); - kunmap_local(dst); + memcpy_to_page(rq->out[outpages - 1], 0, src + + (interlaced_offset ? 0 : righthalf), + lefthalf); } else if (!interlaced_offset) { memmove(src, src + righthalf, lefthalf); + flush_dcache_page(rq->in[inpages - 1]); } } kunmap_local(src); diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index d70b12b81507..edc8ec7581b8 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -105,8 +105,8 @@ static void *erofs_read_inode(struct erofs_buf *buf, set_nlink(inode, le32_to_cpu(die->i_nlink)); /* extended inode has its own timestamp */ - inode->i_ctime.tv_sec = le64_to_cpu(die->i_mtime); - inode->i_ctime.tv_nsec = le32_to_cpu(die->i_mtime_nsec); + inode_set_ctime(inode, le64_to_cpu(die->i_mtime), + le32_to_cpu(die->i_mtime_nsec)); inode->i_size = le64_to_cpu(die->i_size); @@ -148,8 +148,7 @@ static void *erofs_read_inode(struct erofs_buf *buf, set_nlink(inode, le16_to_cpu(dic->i_nlink)); /* use build time for compact inodes */ - inode->i_ctime.tv_sec = sbi->build_time; - inode->i_ctime.tv_nsec = sbi->build_time_nsec; + inode_set_ctime(inode, sbi->build_time, sbi->build_time_nsec); inode->i_size = le32_to_cpu(dic->i_size); if (erofs_inode_is_data_compressed(vi->datalayout)) @@ -176,14 +175,12 @@ static void *erofs_read_inode(struct erofs_buf *buf, vi->chunkbits = sb->s_blocksize_bits + (vi->chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK); } - inode->i_mtime.tv_sec = inode->i_ctime.tv_sec; - inode->i_atime.tv_sec = inode->i_ctime.tv_sec; - inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec; - inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec; + inode->i_mtime = inode->i_atime = inode_get_ctime(inode); inode->i_flags &= ~S_DAX; if (test_opt(&sbi->opt, DAX_ALWAYS) && S_ISREG(inode->i_mode) && - vi->datalayout == EROFS_INODE_FLAT_PLAIN) + (vi->datalayout == EROFS_INODE_FLAT_PLAIN || + vi->datalayout == EROFS_INODE_CHUNK_BASED)) inode->i_flags |= S_DAX; if (!nblks) @@ -372,7 +369,7 @@ int erofs_getattr(struct mnt_idmap *idmap, const struct path *path, stat->attributes_mask |= (STATX_ATTR_COMPRESSED | STATX_ATTR_IMMUTABLE); - generic_fillattr(idmap, inode, stat); + generic_fillattr(idmap, request_mask, inode, stat); return 0; } diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 9d6a3c6158bd..566f68ddfa36 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -889,8 +889,6 @@ static void erofs_kill_sb(struct super_block *sb) { struct erofs_sb_info *sbi; - WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC); - /* pseudo mount for anon inodes */ if (sb->s_flags & SB_KERNMOUNT) { kill_anon_super(sb); diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 5f1890e309c6..de4f12152b62 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1035,7 +1035,7 @@ hitted: */ tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE); - cur = end - min_t(unsigned int, offset + end - map->m_la, end); + cur = end - min_t(erofs_off_t, offset + end - map->m_la, end); if (!(map->m_flags & EROFS_MAP_MAPPED)) { zero_user_segment(page, cur, end); goto next_part; @@ -1144,10 +1144,11 @@ static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be, struct z_erofs_bvec *bvec) { struct z_erofs_bvec_item *item; + unsigned int pgnr; - if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK)) { - unsigned int pgnr; - + if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK) && + (bvec->end == PAGE_SIZE || + bvec->offset + bvec->end == be->pcl->length)) { pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT; DBG_BUGON(pgnr >= be->nr_pages); if (!be->decompressed_pages[pgnr]) { @@ -1841,7 +1842,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, } cur = map->m_la + map->m_llen - 1; - while (cur >= end) { + while ((cur >= end) && (cur < i_size_read(inode))) { pgoff_t index = cur >> PAGE_SHIFT; struct page *page; diff --git a/fs/eventfd.c b/fs/eventfd.c index 8aa36cd37351..33a918f9566c 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -189,7 +189,7 @@ void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) { lockdep_assert_held(&ctx->wqh.lock); - *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count; + *cnt = ((ctx->flags & EFD_SEMAPHORE) && ctx->count) ? 1 : ctx->count; ctx->count -= *cnt; } EXPORT_SYMBOL_GPL(eventfd_ctx_do_read); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 4b1b3362f697..1d9a71a0c4c1 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -975,15 +975,11 @@ again: static int ep_alloc(struct eventpoll **pep) { - int error; - struct user_struct *user; struct eventpoll *ep; - user = get_current_user(); - error = -ENOMEM; ep = kzalloc(sizeof(*ep), GFP_KERNEL); if (unlikely(!ep)) - goto free_uid; + return -ENOMEM; mutex_init(&ep->mtx); rwlock_init(&ep->lock); @@ -992,16 +988,12 @@ static int ep_alloc(struct eventpoll **pep) INIT_LIST_HEAD(&ep->rdllist); ep->rbr = RB_ROOT_CACHED; ep->ovflist = EP_UNACTIVE_PTR; - ep->user = user; + ep->user = get_current_user(); refcount_set(&ep->refcount, 1); *pep = ep; return 0; - -free_uid: - free_uid(user); - return error; } /* diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c index 9f42f25fab92..e918decb3735 100644 --- a/fs/exfat/balloc.c +++ b/fs/exfat/balloc.c @@ -69,7 +69,7 @@ static int exfat_allocate_bitmap(struct super_block *sb, } sbi->map_sectors = ((need_map_size - 1) >> (sb->s_blocksize_bits)) + 1; - sbi->vol_amap = kmalloc_array(sbi->map_sectors, + sbi->vol_amap = kvmalloc_array(sbi->map_sectors, sizeof(struct buffer_head *), GFP_KERNEL); if (!sbi->vol_amap) return -ENOMEM; @@ -84,7 +84,7 @@ static int exfat_allocate_bitmap(struct super_block *sb, while (j < i) brelse(sbi->vol_amap[j++]); - kfree(sbi->vol_amap); + kvfree(sbi->vol_amap); sbi->vol_amap = NULL; return -EIO; } @@ -138,7 +138,7 @@ void exfat_free_bitmap(struct exfat_sb_info *sbi) for (i = 0; i < sbi->map_sectors; i++) __brelse(sbi->vol_amap[i]); - kfree(sbi->vol_amap); + kvfree(sbi->vol_amap); } int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 957574180a5e..e1586bba6d86 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -34,6 +34,7 @@ static int exfat_get_uniname_from_ext_entry(struct super_block *sb, { int i, err; struct exfat_entry_set_cache es; + unsigned int uni_len = 0, len; err = exfat_get_dentry_set(&es, sb, p_dir, entry, ES_ALL_ENTRIES); if (err) @@ -52,7 +53,10 @@ static int exfat_get_uniname_from_ext_entry(struct super_block *sb, if (exfat_get_entry_type(ep) != TYPE_EXTEND) break; - exfat_extract_uni_name(ep, uniname); + len = exfat_extract_uni_name(ep, uniname); + uni_len += len; + if (len != EXFAT_FILE_NAME_LEN || uni_len >= MAX_NAME_LENGTH) + break; uniname += EXFAT_FILE_NAME_LEN; } @@ -214,7 +218,10 @@ static void exfat_free_namebuf(struct exfat_dentry_namebuf *nb) exfat_init_namebuf(nb); } -/* skip iterating emit_dots when dir is empty */ +/* + * Before calling dir_emit*(), sbi->s_lock should be released + * because page fault can occur in dir_emit*(). + */ #define ITER_POS_FILLED_DOTS (2) static int exfat_iterate(struct file *file, struct dir_context *ctx) { @@ -229,11 +236,10 @@ static int exfat_iterate(struct file *file, struct dir_context *ctx) int err = 0, fake_offset = 0; exfat_init_namebuf(nb); - mutex_lock(&EXFAT_SB(sb)->s_lock); cpos = ctx->pos; if (!dir_emit_dots(file, ctx)) - goto unlock; + goto out; if (ctx->pos == ITER_POS_FILLED_DOTS) { cpos = 0; @@ -245,16 +251,18 @@ static int exfat_iterate(struct file *file, struct dir_context *ctx) /* name buffer should be allocated before use */ err = exfat_alloc_namebuf(nb); if (err) - goto unlock; + goto out; get_new: + mutex_lock(&EXFAT_SB(sb)->s_lock); + if (ei->flags == ALLOC_NO_FAT_CHAIN && cpos >= i_size_read(inode)) goto end_of_dir; err = exfat_readdir(inode, &cpos, &de); if (err) { /* - * At least we tried to read a sector. Move cpos to next sector - * position (should be aligned). + * At least we tried to read a sector. + * Move cpos to next sector position (should be aligned). */ if (err == -EIO) { cpos += 1 << (sb->s_blocksize_bits); @@ -277,16 +285,10 @@ get_new: inum = iunique(sb, EXFAT_ROOT_INO); } - /* - * Before calling dir_emit(), sb_lock should be released. - * Because page fault can occur in dir_emit() when the size - * of buffer given from user is larger than one page size. - */ mutex_unlock(&EXFAT_SB(sb)->s_lock); if (!dir_emit(ctx, nb->lfn, strlen(nb->lfn), inum, (de.attr & ATTR_SUBDIR) ? DT_DIR : DT_REG)) - goto out_unlocked; - mutex_lock(&EXFAT_SB(sb)->s_lock); + goto out; ctx->pos = cpos; goto get_new; @@ -294,9 +296,8 @@ end_of_dir: if (!cpos && fake_offset) cpos = ITER_POS_FILLED_DOTS; ctx->pos = cpos; -unlock: mutex_unlock(&EXFAT_SB(sb)->s_lock); -out_unlocked: +out: /* * To improve performance, free namebuf after unlock sb_lock. * If namebuf is not allocated, this function do nothing @@ -305,10 +306,11 @@ out_unlocked: return err; } +WRAP_DIR_ITER(exfat_iterate) // FIXME! const struct file_operations exfat_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = exfat_iterate, + .iterate_shared = shared_exfat_iterate, .unlocked_ioctl = exfat_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = exfat_compat_ioctl, @@ -1079,7 +1081,8 @@ rewind: if (entry_type == TYPE_EXTEND) { unsigned short entry_uniname[16], unichar; - if (step != DIRENT_STEP_NAME) { + if (step != DIRENT_STEP_NAME || + name_len >= MAX_NAME_LENGTH) { step = DIRENT_STEP_FILE; continue; } diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 729ada9e26e8..f55498e5c23d 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -273,8 +273,6 @@ struct exfat_sb_info { spinlock_t inode_hash_lock; struct hlist_head inode_hashtable[EXFAT_HASH_SIZE]; - - struct rcu_head rcu; }; #define EXFAT_CACHE_VALID 0 diff --git a/fs/exfat/file.c b/fs/exfat/file.c index 3cbd270e0cba..32395ef686a2 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -22,7 +22,7 @@ static int exfat_cont_expand(struct inode *inode, loff_t size) if (err) return err; - inode->i_ctime = inode->i_mtime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); mark_inode_dirty(inode); if (!IS_SYNC(inode)) @@ -232,7 +232,7 @@ int exfat_getattr(struct mnt_idmap *idmap, const struct path *path, struct inode *inode = d_backing_inode(path->dentry); struct exfat_inode_info *ei = EXFAT_I(inode); - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); exfat_truncate_atime(&stat->atime); stat->result_mask |= STATX_BTIME; stat->btime.tv_sec = ei->i_crtime.tv_sec; @@ -290,7 +290,7 @@ int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry, } if (attr->ia_valid & ATTR_SIZE) - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); setattr_copy(&nop_mnt_idmap, inode, attr); exfat_truncate_atime(&inode->i_atime); diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index 481dd338f2b8..13329baeafbc 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -355,7 +355,7 @@ static void exfat_write_failed(struct address_space *mapping, loff_t to) if (to > i_size_read(inode)) { truncate_pagecache(inode, i_size_read(inode)); - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); exfat_truncate(inode); } } @@ -398,7 +398,7 @@ static int exfat_write_end(struct file *file, struct address_space *mapping, exfat_write_failed(mapping, pos+len); if (!(err < 0) && !(ei->attr & ATTR_ARCHIVE)) { - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); ei->attr |= ATTR_ARCHIVE; mark_inode_dirty(inode); } @@ -577,7 +577,7 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info) inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> 9; inode->i_mtime = info->mtime; - inode->i_ctime = info->mtime; + inode_set_ctime_to_ts(inode, info->mtime); ei->i_crtime = info->crtime; inode->i_atime = info->atime; diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index e0ff9d156f6f..1b9f587f6cca 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -569,7 +569,7 @@ static int exfat_create(struct mnt_idmap *idmap, struct inode *dir, goto unlock; inode_inc_iversion(dir); - dir->i_ctime = dir->i_mtime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); if (IS_DIRSYNC(dir)) exfat_sync_inode(dir); else @@ -582,8 +582,7 @@ static int exfat_create(struct mnt_idmap *idmap, struct inode *dir, goto unlock; inode_inc_iversion(inode); - inode->i_mtime = inode->i_atime = inode->i_ctime = - EXFAT_I(inode)->i_crtime = current_time(inode); + inode->i_mtime = inode->i_atime = EXFAT_I(inode)->i_crtime = inode_set_ctime_current(inode); exfat_truncate_atime(&inode->i_atime); /* timestamp is already written, so mark_inode_dirty() is unneeded. */ @@ -817,7 +816,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry) ei->dir.dir = DIR_DELETED; inode_inc_iversion(dir); - dir->i_mtime = dir->i_atime = current_time(dir); + dir->i_mtime = dir->i_atime = inode_set_ctime_current(dir); exfat_truncate_atime(&dir->i_atime); if (IS_DIRSYNC(dir)) exfat_sync_inode(dir); @@ -825,7 +824,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry) mark_inode_dirty(dir); clear_nlink(inode); - inode->i_mtime = inode->i_atime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); exfat_truncate_atime(&inode->i_atime); exfat_unhash_inode(inode); exfat_d_version_set(dentry, inode_query_iversion(dir)); @@ -852,7 +851,7 @@ static int exfat_mkdir(struct mnt_idmap *idmap, struct inode *dir, goto unlock; inode_inc_iversion(dir); - dir->i_ctime = dir->i_mtime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); if (IS_DIRSYNC(dir)) exfat_sync_inode(dir); else @@ -866,8 +865,7 @@ static int exfat_mkdir(struct mnt_idmap *idmap, struct inode *dir, goto unlock; inode_inc_iversion(inode); - inode->i_mtime = inode->i_atime = inode->i_ctime = - EXFAT_I(inode)->i_crtime = current_time(inode); + inode->i_mtime = inode->i_atime = EXFAT_I(inode)->i_crtime = inode_set_ctime_current(inode); exfat_truncate_atime(&inode->i_atime); /* timestamp is already written, so mark_inode_dirty() is unneeded. */ @@ -979,7 +977,7 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry) ei->dir.dir = DIR_DELETED; inode_inc_iversion(dir); - dir->i_mtime = dir->i_atime = current_time(dir); + dir->i_mtime = dir->i_atime = inode_set_ctime_current(dir); exfat_truncate_atime(&dir->i_atime); if (IS_DIRSYNC(dir)) exfat_sync_inode(dir); @@ -988,7 +986,7 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry) drop_nlink(dir); clear_nlink(inode); - inode->i_mtime = inode->i_atime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); exfat_truncate_atime(&inode->i_atime); exfat_unhash_inode(inode); exfat_d_version_set(dentry, inode_query_iversion(dir)); @@ -1312,8 +1310,8 @@ static int exfat_rename(struct mnt_idmap *idmap, goto unlock; inode_inc_iversion(new_dir); - new_dir->i_ctime = new_dir->i_mtime = new_dir->i_atime = - EXFAT_I(new_dir)->i_crtime = current_time(new_dir); + simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); + EXFAT_I(new_dir)->i_crtime = current_time(new_dir); exfat_truncate_atime(&new_dir->i_atime); if (IS_DIRSYNC(new_dir)) exfat_sync_inode(new_dir); @@ -1336,7 +1334,6 @@ static int exfat_rename(struct mnt_idmap *idmap, } inode_inc_iversion(old_dir); - old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir); if (IS_DIRSYNC(old_dir)) exfat_sync_inode(old_dir); else @@ -1354,8 +1351,7 @@ static int exfat_rename(struct mnt_idmap *idmap, exfat_warn(sb, "abnormal access to an inode dropped"); WARN_ON(new_inode->i_nlink == 0); } - new_inode->i_ctime = EXFAT_I(new_inode)->i_crtime = - current_time(new_inode); + EXFAT_I(new_inode)->i_crtime = current_time(new_inode); } unlock: diff --git a/fs/exfat/super.c b/fs/exfat/super.c index 8c32460e031e..2778bd9b631e 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -31,16 +31,6 @@ static void exfat_free_iocharset(struct exfat_sb_info *sbi) kfree(sbi->options.iocharset); } -static void exfat_delayed_free(struct rcu_head *p) -{ - struct exfat_sb_info *sbi = container_of(p, struct exfat_sb_info, rcu); - - unload_nls(sbi->nls_io); - exfat_free_iocharset(sbi); - exfat_free_upcase_table(sbi); - kfree(sbi); -} - static void exfat_put_super(struct super_block *sb) { struct exfat_sb_info *sbi = EXFAT_SB(sb); @@ -50,7 +40,8 @@ static void exfat_put_super(struct super_block *sb) brelse(sbi->boot_bh); mutex_unlock(&sbi->s_lock); - call_rcu(&sbi->rcu, exfat_delayed_free); + unload_nls(sbi->nls_io); + exfat_free_upcase_table(sbi); } static int exfat_sync_fs(struct super_block *sb, int wait) @@ -379,8 +370,7 @@ static int exfat_read_root(struct inode *inode) ei->i_size_ondisk = i_size_read(inode); exfat_save_attr(inode, ATTR_SUBDIR); - inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = - current_time(inode); + inode->i_mtime = inode->i_atime = ei->i_crtime = inode_set_ctime_current(inode); exfat_truncate_atime(&inode->i_atime); return 0; } @@ -710,9 +700,6 @@ free_table: check_nls_io: unload_nls(sbi->nls_io); - exfat_free_iocharset(sbi); - sb->s_fs_info = NULL; - kfree(sbi); return err; } @@ -721,14 +708,18 @@ static int exfat_get_tree(struct fs_context *fc) return get_tree_bdev(fc, exfat_fill_super); } +static void exfat_free_sbi(struct exfat_sb_info *sbi) +{ + exfat_free_iocharset(sbi); + kfree(sbi); +} + static void exfat_free(struct fs_context *fc) { struct exfat_sb_info *sbi = fc->s_fs_info; - if (sbi) { - exfat_free_iocharset(sbi); - kfree(sbi); - } + if (sbi) + exfat_free_sbi(sbi); } static int exfat_reconfigure(struct fs_context *fc) @@ -773,12 +764,21 @@ static int exfat_init_fs_context(struct fs_context *fc) return 0; } +static void exfat_kill_sb(struct super_block *sb) +{ + struct exfat_sb_info *sbi = sb->s_fs_info; + + kill_block_super(sb); + if (sbi) + exfat_free_sbi(sbi); +} + static struct file_system_type exfat_fs_type = { .owner = THIS_MODULE, .name = "exfat", .init_fs_context = exfat_init_fs_context, .parameters = exfat_parameters, - .kill_sb = kill_block_super, + .kill_sb = exfat_kill_sb, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 40e624cf7e92..d1dbe47c7975 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -315,7 +315,7 @@ static int get_name(const struct path *path, char *name, struct dentry *child) goto out; error = -EINVAL; - if (!file->f_op->iterate && !file->f_op->iterate_shared) + if (!file->f_op->iterate_shared) goto out_close; buffer.sequence = 0; diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 82b17d7fc93f..7e54c31589c7 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -237,7 +237,7 @@ ext2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, error = __ext2_set_acl(inode, acl, type); if (!error && update_mode) { inode->i_mode = mode; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); mark_inode_dirty(inode); } return error; diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 42db804794bd..b335f17f682f 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -468,7 +468,7 @@ int ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, ext2_set_de_type(de, inode); ext2_commit_chunk(page, pos, len); if (update_times) - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; mark_inode_dirty(dir); return ext2_handle_dirsync(dir); @@ -555,7 +555,7 @@ got_it: de->inode = cpu_to_le32(inode->i_ino); ext2_set_de_type (de, inode); ext2_commit_chunk(page, pos, rec_len); - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; mark_inode_dirty(dir); err = ext2_handle_dirsync(dir); @@ -606,7 +606,7 @@ int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page) pde->rec_len = ext2_rec_len_to_disk(to - from); dir->inode = 0; ext2_commit_chunk(page, pos, to - from); - inode->i_ctime = inode->i_mtime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); EXT2_I(inode)->i_flags &= ~EXT2_BTREE_FL; mark_inode_dirty(inode); return ext2_handle_dirsync(inode); diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index a4e1d7a9c544..124df89689e1 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -549,7 +549,7 @@ got: inode->i_ino = ino; inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); memset(ei->i_data, 0, sizeof(ei->i_data)); ei->i_flags = ext2_mask_flags(mode, EXT2_I(dir)->i_flags & EXT2_FL_INHERITED); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 75983215c7a1..acbab27fe957 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -595,7 +595,7 @@ static void ext2_splice_branch(struct inode *inode, if (where->bh) mark_buffer_dirty_inode(where->bh, inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); mark_inode_dirty(inode); } @@ -1287,7 +1287,7 @@ static int ext2_setsize(struct inode *inode, loff_t newsize) __ext2_truncate_blocks(inode, newsize); filemap_invalidate_unlock(inode->i_mapping); - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); if (inode_needs_sync(inode)) { sync_mapping_buffers(inode->i_mapping); sync_inode_metadata(inode, 1); @@ -1409,9 +1409,9 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); inode->i_size = le32_to_cpu(raw_inode->i_size); inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); - inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime); + inode_set_ctime(inode, (signed)le32_to_cpu(raw_inode->i_ctime), 0); inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime); - inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0; + inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = 0; ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); /* We now have enough fields to check if the inode was active or not. * This is needed because nfsd might try to access dead inodes @@ -1541,7 +1541,7 @@ static int __ext2_write_inode(struct inode *inode, int do_sync) raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le32(inode->i_size); raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); - raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); + raw_inode->i_ctime = cpu_to_le32(inode_get_ctime(inode).tv_sec); raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); @@ -1628,7 +1628,7 @@ int ext2_getattr(struct mnt_idmap *idmap, const struct path *path, STATX_ATTR_IMMUTABLE | STATX_ATTR_NODUMP); - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); return 0; } diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index cc87d413eb43..44e04484e570 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -44,7 +44,7 @@ int ext2_fileattr_set(struct mnt_idmap *idmap, (fa->flags & EXT2_FL_USER_MODIFIABLE); ext2_set_inode_flags(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); mark_inode_dirty(inode); return 0; @@ -77,7 +77,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } inode_lock(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); inode->i_generation = generation; inode_unlock(inode); diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 937dd8f60f96..059517068adc 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -211,7 +211,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir, if (err) return err; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); inode_inc_link_count(inode); ihold(inode); @@ -291,7 +291,7 @@ static int ext2_unlink(struct inode *dir, struct dentry *dentry) if (err) goto out; - inode->i_ctime = dir->i_ctime; + inode_set_ctime_to_ts(inode, inode_get_ctime(dir)); inode_dec_link_count(inode); err = 0; out: @@ -367,7 +367,7 @@ static int ext2_rename (struct mnt_idmap * idmap, ext2_put_page(new_page, new_de); if (err) goto out_dir; - new_inode->i_ctime = current_time(new_inode); + inode_set_ctime_current(new_inode); if (dir_de) drop_nlink(new_inode); inode_dec_link_count(new_inode); @@ -383,7 +383,7 @@ static int ext2_rename (struct mnt_idmap * idmap, * Like most other Unix systems, set the ctime for inodes on a * rename. */ - old_inode->i_ctime = current_time(old_inode); + inode_set_ctime_current(old_inode); mark_inode_dirty(old_inode); err = ext2_delete_entry(old_de, old_page); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 2959afc7541c..aaf3e3e88cb2 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1572,7 +1572,7 @@ out: if (inode->i_size < off+len-towrite) i_size_write(inode, off+len-towrite); inode_inc_iversion(inode); - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); mark_inode_dirty(inode); return len - towrite; } diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 8906ba479aaf..1c9187188d68 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -773,7 +773,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, /* Update the inode. */ EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); if (IS_SYNC(inode)) { error = sync_inode_metadata(inode, 1); /* In case sync failed due to ENOSPC the inode was actually diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 27fcbddfb148..3bffe862f954 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -259,7 +259,7 @@ retry: error = __ext4_set_acl(handle, inode, type, acl, 0 /* xattr_flags */); if (!error && update_mode) { inode->i_mode = mode; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); error = ext4_mark_inode_dirty(handle, inode); } out_stop: diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0a2d55faa095..1e2259d9967d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -868,64 +868,70 @@ struct ext4_inode { * affected filesystem before 2242. */ -static inline __le32 ext4_encode_extra_time(struct timespec64 *time) +static inline __le32 ext4_encode_extra_time(struct timespec64 ts) { - u32 extra =((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK; - return cpu_to_le32(extra | (time->tv_nsec << EXT4_EPOCH_BITS)); + u32 extra = ((ts.tv_sec - (s32)ts.tv_sec) >> 32) & EXT4_EPOCH_MASK; + return cpu_to_le32(extra | (ts.tv_nsec << EXT4_EPOCH_BITS)); } -static inline void ext4_decode_extra_time(struct timespec64 *time, - __le32 extra) +static inline struct timespec64 ext4_decode_extra_time(__le32 base, + __le32 extra) { + struct timespec64 ts = { .tv_sec = (signed)le32_to_cpu(base) }; + if (unlikely(extra & cpu_to_le32(EXT4_EPOCH_MASK))) - time->tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32; - time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; + ts.tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32; + ts.tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; + return ts; } -#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \ +#define EXT4_INODE_SET_XTIME_VAL(xtime, inode, raw_inode, ts) \ do { \ - if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) {\ - (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ - (raw_inode)->xtime ## _extra = \ - ext4_encode_extra_time(&(inode)->xtime); \ - } \ - else \ - (raw_inode)->xtime = cpu_to_le32(clamp_t(int32_t, (inode)->xtime.tv_sec, S32_MIN, S32_MAX)); \ + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) { \ + (raw_inode)->xtime = cpu_to_le32((ts).tv_sec); \ + (raw_inode)->xtime ## _extra = ext4_encode_extra_time(ts); \ + } else \ + (raw_inode)->xtime = cpu_to_le32(clamp_t(int32_t, (ts).tv_sec, S32_MIN, S32_MAX)); \ } while (0) -#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \ -do { \ - if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ - (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \ - if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ - (raw_inode)->xtime ## _extra = \ - ext4_encode_extra_time(&(einode)->xtime); \ -} while (0) +#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \ + EXT4_INODE_SET_XTIME_VAL(xtime, inode, raw_inode, (inode)->xtime) + +#define EXT4_INODE_SET_CTIME(inode, raw_inode) \ + EXT4_INODE_SET_XTIME_VAL(i_ctime, inode, raw_inode, inode_get_ctime(inode)) + +#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ + EXT4_INODE_SET_XTIME_VAL(xtime, &((einode)->vfs_inode), \ + raw_inode, (einode)->xtime) + +#define EXT4_INODE_GET_XTIME_VAL(xtime, inode, raw_inode) \ + (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra) ? \ + ext4_decode_extra_time((raw_inode)->xtime, \ + (raw_inode)->xtime ## _extra) : \ + (struct timespec64) { \ + .tv_sec = (signed)le32_to_cpu((raw_inode)->xtime) \ + }) #define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \ do { \ - (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \ - if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) { \ - ext4_decode_extra_time(&(inode)->xtime, \ - raw_inode->xtime ## _extra); \ - } \ - else \ - (inode)->xtime.tv_nsec = 0; \ + (inode)->xtime = EXT4_INODE_GET_XTIME_VAL(xtime, inode, raw_inode); \ } while (0) +#define EXT4_INODE_GET_CTIME(inode, raw_inode) \ +do { \ + inode_set_ctime_to_ts(inode, \ + EXT4_INODE_GET_XTIME_VAL(i_ctime, inode, raw_inode)); \ +} while (0) -#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \ -do { \ - if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ - (einode)->xtime.tv_sec = \ - (signed)le32_to_cpu((raw_inode)->xtime); \ - else \ - (einode)->xtime.tv_sec = 0; \ - if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ - ext4_decode_extra_time(&(einode)->xtime, \ - raw_inode->xtime ## _extra); \ - else \ - (einode)->xtime.tv_nsec = 0; \ +#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \ +do { \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ + (einode)->xtime = \ + EXT4_INODE_GET_XTIME_VAL(xtime, &(einode->vfs_inode), \ + raw_inode); \ + else \ + (einode)->xtime = (struct timespec64){0, 0}; \ } while (0) #define i_disk_version osd1.linux1.l_i_version diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 77f318ec8abb..b38d59581411 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -234,8 +234,7 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line, might_sleep(); - if (bh->b_bdev->bd_super) - ext4_check_bdev_write_error(bh->b_bdev->bd_super); + ext4_check_bdev_write_error(sb); if (ext4_handle_valid(handle)) { err = jbd2_journal_get_write_access(handle, bh); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e4115d338f10..202c76996b62 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4476,12 +4476,12 @@ retry: map.m_lblk += ret; map.m_len = len = len - ret; epos = (loff_t)map.m_lblk << inode->i_blkbits; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); if (new_size) { if (epos > new_size) epos = new_size; if (ext4_update_inode_size(inode, epos) & 0x1) - inode->i_mtime = inode->i_ctime; + inode->i_mtime = inode_get_ctime(inode); } ret2 = ext4_mark_inode_dirty(handle, inode); ext4_update_inode_fsync_trans(handle, inode, 1); @@ -4617,7 +4617,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, /* Now release the pages and zero block aligned part of pages */ truncate_pagecache_range(inode, start, end - 1); - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags); @@ -4642,7 +4642,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, goto out_mutex; } - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); if (new_size) ext4_update_inode_size(inode, new_size); ret = ext4_mark_inode_dirty(handle, inode); @@ -5378,7 +5378,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len) up_write(&EXT4_I(inode)->i_data_sem); if (IS_SYNC(inode)) ext4_handle_sync(handle); - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); ret = ext4_mark_inode_dirty(handle, inode); ext4_update_inode_fsync_trans(handle, inode, 1); @@ -5488,7 +5488,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len) /* Expand file to avoid data loss if there is error while shifting */ inode->i_size += len; EXT4_I(inode)->i_disksize += len; - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); ret = ext4_mark_inode_dirty(handle, inode); if (ret) goto out_stop; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 754f961cd9fd..48abef5f23e7 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1250,7 +1250,7 @@ got: inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); /* This is the optimal IO size (for stat), not the fs block size */ inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); ei->i_crtime = inode->i_mtime; memset(ei->i_data, 0, sizeof(ei->i_data)); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index a4b7e4bc32d4..003861037374 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1037,7 +1037,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle, * happen is that the times are slightly out of date * and/or different from the directory change time. */ - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); ext4_update_dx_flag(dir); inode_inc_iversion(dir); return 1; @@ -1991,7 +1991,7 @@ out: ext4_orphan_del(handle, inode); if (err == 0) { - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); err = ext4_mark_inode_dirty(handle, inode); if (IS_SYNC(inode)) ext4_handle_sync(handle); diff --git a/fs/ext4/inode-test.c b/fs/ext4/inode-test.c index 7935ea6cf92c..f0c0fd507fbc 100644 --- a/fs/ext4/inode-test.c +++ b/fs/ext4/inode-test.c @@ -245,9 +245,9 @@ static void inode_test_xtimestamp_decoding(struct kunit *test) struct timestamp_expectation *test_param = (struct timestamp_expectation *)(test->param_value); - timestamp.tv_sec = get_32bit_time(test_param); - ext4_decode_extra_time(×tamp, - cpu_to_le32(test_param->extra_bits)); + timestamp = ext4_decode_extra_time( + cpu_to_le32(get_32bit_time(test_param)), + cpu_to_le32(test_param->extra_bits)); KUNIT_EXPECT_EQ_MSG(test, test_param->expected.tv_sec, diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 43775a6ca505..6683076ecb2f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3986,7 +3986,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) if (IS_SYNC(inode)) ext4_handle_sync(handle); - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); ret2 = ext4_mark_inode_dirty(handle, inode); if (unlikely(ret2)) ret = ret2; @@ -4146,7 +4146,7 @@ out_stop: if (inode->i_nlink) ext4_orphan_del(handle, inode); - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); err2 = ext4_mark_inode_dirty(handle, inode); if (unlikely(err2 && !err)) err = err2; @@ -4249,7 +4249,7 @@ static int ext4_fill_raw_inode(struct inode *inode, struct ext4_inode *raw_inode } raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); - EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); + EXT4_INODE_SET_CTIME(inode, raw_inode); EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); @@ -4858,7 +4858,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, } } - EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode); + EXT4_INODE_GET_CTIME(inode, raw_inode); EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode); EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); @@ -4981,7 +4981,7 @@ static void __ext4_update_other_inode_time(struct super_block *sb, spin_unlock(&inode->i_lock); spin_lock(&ei->i_raw_lock); - EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); + EXT4_INODE_SET_CTIME(inode, raw_inode); EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); ext4_inode_csum_set(inode, raw_inode, ei); @@ -5376,10 +5376,8 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry, * Update c/mtime on truncate up, ext4_truncate() will * update c/mtime in shrink case below */ - if (!shrink) { - inode->i_mtime = current_time(inode); - inode->i_ctime = inode->i_mtime; - } + if (!shrink) + inode->i_mtime = inode_set_ctime_current(inode); if (shrink) ext4_fc_track_range(handle, inode, @@ -5537,7 +5535,7 @@ int ext4_getattr(struct mnt_idmap *idmap, const struct path *path, STATX_ATTR_NODUMP | STATX_ATTR_VERITY); - generic_fillattr(idmap, inode, stat); + generic_fillattr(idmap, request_mask, inode, stat); return 0; } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 331859511f80..b0349f451863 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -449,7 +449,8 @@ static long swap_inode_boot_loader(struct super_block *sb, diff = size - size_bl; swap_inode_data(inode, inode_bl); - inode->i_ctime = inode_bl->i_ctime = current_time(inode); + inode_set_ctime_current(inode); + inode_set_ctime_current(inode_bl); inode_inc_iversion(inode); inode->i_generation = get_random_u32(); @@ -663,7 +664,7 @@ static int ext4_ioctl_setflags(struct inode *inode, ext4_set_inode_flags(inode, false); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); inode_inc_iversion(inode); err = ext4_mark_iloc_dirty(handle, inode, &iloc); @@ -774,7 +775,7 @@ static int ext4_ioctl_setproject(struct inode *inode, __u32 projid) } EXT4_I(inode)->i_projid = kprojid; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); inode_inc_iversion(inode); out_dirty: rc = ext4_mark_iloc_dirty(handle, inode, &iloc); @@ -1266,7 +1267,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } err = ext4_reserve_inode_write(handle, inode, &iloc); if (err == 0) { - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); inode_inc_iversion(inode); inode->i_generation = generation; err = ext4_mark_iloc_dirty(handle, inode, &iloc); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a2475b8c9fb5..21b903fe546e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1006,14 +1006,11 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context * fls() instead since we need to know the actual length while modifying * goal length. */ - order = fls(ac->ac_g_ex.fe_len); + order = fls(ac->ac_g_ex.fe_len) - 1; min_order = order - sbi->s_mb_best_avail_max_trim_order; if (min_order < 0) min_order = 0; - if (1 << min_order < ac->ac_o_ex.fe_len) - min_order = fls(ac->ac_o_ex.fe_len) + 1; - if (sbi->s_stripe > 0) { /* * We are assuming that stripe size is always a multiple of @@ -1021,9 +1018,16 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context */ num_stripe_clusters = EXT4_NUM_B2C(sbi, sbi->s_stripe); if (1 << min_order < num_stripe_clusters) - min_order = fls(num_stripe_clusters); + /* + * We consider 1 order less because later we round + * up the goal len to num_stripe_clusters + */ + min_order = fls(num_stripe_clusters) - 1; } + if (1 << min_order < ac->ac_o_ex.fe_len) + min_order = fls(ac->ac_o_ex.fe_len); + for (i = order; i >= min_order; i--) { int frag_order; /* @@ -4761,8 +4765,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) int order, i; struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); struct ext4_locality_group *lg; - struct ext4_prealloc_space *tmp_pa, *cpa = NULL; - ext4_lblk_t tmp_pa_start, tmp_pa_end; + struct ext4_prealloc_space *tmp_pa = NULL, *cpa = NULL; + loff_t tmp_pa_end; struct rb_node *iter; ext4_fsblk_t goal_block; @@ -4770,47 +4774,151 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) return false; - /* first, try per-file preallocation */ + /* + * first, try per-file preallocation by searching the inode pa rbtree. + * + * Here, we can't do a direct traversal of the tree because + * ext4_mb_discard_group_preallocation() can paralelly mark the pa + * deleted and that can cause direct traversal to skip some entries. + */ read_lock(&ei->i_prealloc_lock); + + if (RB_EMPTY_ROOT(&ei->i_prealloc_node)) { + goto try_group_pa; + } + + /* + * Step 1: Find a pa with logical start immediately adjacent to the + * original logical start. This could be on the left or right. + * + * (tmp_pa->pa_lstart never changes so we can skip locking for it). + */ for (iter = ei->i_prealloc_node.rb_node; iter; iter = ext4_mb_pa_rb_next_iter(ac->ac_o_ex.fe_logical, - tmp_pa_start, iter)) { + tmp_pa->pa_lstart, iter)) { tmp_pa = rb_entry(iter, struct ext4_prealloc_space, pa_node.inode_node); + } - /* all fields in this condition don't change, - * so we can skip locking for them */ - tmp_pa_start = tmp_pa->pa_lstart; - tmp_pa_end = tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len); - - /* original request start doesn't lie in this PA */ - if (ac->ac_o_ex.fe_logical < tmp_pa_start || - ac->ac_o_ex.fe_logical >= tmp_pa_end) - continue; + /* + * Step 2: The adjacent pa might be to the right of logical start, find + * the left adjacent pa. After this step we'd have a valid tmp_pa whose + * logical start is towards the left of original request's logical start + */ + if (tmp_pa->pa_lstart > ac->ac_o_ex.fe_logical) { + struct rb_node *tmp; + tmp = rb_prev(&tmp_pa->pa_node.inode_node); - /* non-extent files can't have physical blocks past 2^32 */ - if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) && - (tmp_pa->pa_pstart + EXT4_C2B(sbi, tmp_pa->pa_len) > - EXT4_MAX_BLOCK_FILE_PHYS)) { + if (tmp) { + tmp_pa = rb_entry(tmp, struct ext4_prealloc_space, + pa_node.inode_node); + } else { /* - * Since PAs don't overlap, we won't find any - * other PA to satisfy this. + * If there is no adjacent pa to the left then finding + * an overlapping pa is not possible hence stop searching + * inode pa tree */ - break; + goto try_group_pa; } + } + + BUG_ON(!(tmp_pa && tmp_pa->pa_lstart <= ac->ac_o_ex.fe_logical)); - /* found preallocated blocks, use them */ + /* + * Step 3: If the left adjacent pa is deleted, keep moving left to find + * the first non deleted adjacent pa. After this step we should have a + * valid tmp_pa which is guaranteed to be non deleted. + */ + for (iter = &tmp_pa->pa_node.inode_node;; iter = rb_prev(iter)) { + if (!iter) { + /* + * no non deleted left adjacent pa, so stop searching + * inode pa tree + */ + goto try_group_pa; + } + tmp_pa = rb_entry(iter, struct ext4_prealloc_space, + pa_node.inode_node); spin_lock(&tmp_pa->pa_lock); - if (tmp_pa->pa_deleted == 0 && tmp_pa->pa_free && - likely(ext4_mb_pa_goal_check(ac, tmp_pa))) { - atomic_inc(&tmp_pa->pa_count); - ext4_mb_use_inode_pa(ac, tmp_pa); + if (tmp_pa->pa_deleted == 0) { + /* + * We will keep holding the pa_lock from + * this point on because we don't want group discard + * to delete this pa underneath us. Since group + * discard is anyways an ENOSPC operation it + * should be okay for it to wait a few more cycles. + */ + break; + } else { spin_unlock(&tmp_pa->pa_lock); - read_unlock(&ei->i_prealloc_lock); - return true; } + } + + BUG_ON(!(tmp_pa && tmp_pa->pa_lstart <= ac->ac_o_ex.fe_logical)); + BUG_ON(tmp_pa->pa_deleted == 1); + + /* + * Step 4: We now have the non deleted left adjacent pa. Only this + * pa can possibly satisfy the request hence check if it overlaps + * original logical start and stop searching if it doesn't. + */ + tmp_pa_end = (loff_t)tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len); + + if (ac->ac_o_ex.fe_logical >= tmp_pa_end) { spin_unlock(&tmp_pa->pa_lock); + goto try_group_pa; + } + + /* non-extent files can't have physical blocks past 2^32 */ + if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) && + (tmp_pa->pa_pstart + EXT4_C2B(sbi, tmp_pa->pa_len) > + EXT4_MAX_BLOCK_FILE_PHYS)) { + /* + * Since PAs don't overlap, we won't find any other PA to + * satisfy this. + */ + spin_unlock(&tmp_pa->pa_lock); + goto try_group_pa; + } + + if (tmp_pa->pa_free && likely(ext4_mb_pa_goal_check(ac, tmp_pa))) { + atomic_inc(&tmp_pa->pa_count); + ext4_mb_use_inode_pa(ac, tmp_pa); + spin_unlock(&tmp_pa->pa_lock); + read_unlock(&ei->i_prealloc_lock); + return true; + } else { + /* + * We found a valid overlapping pa but couldn't use it because + * it had no free blocks. This should ideally never happen + * because: + * + * 1. When a new inode pa is added to rbtree it must have + * pa_free > 0 since otherwise we won't actually need + * preallocation. + * + * 2. An inode pa that is in the rbtree can only have it's + * pa_free become zero when another thread calls: + * ext4_mb_new_blocks + * ext4_mb_use_preallocated + * ext4_mb_use_inode_pa + * + * 3. Further, after the above calls make pa_free == 0, we will + * immediately remove it from the rbtree in: + * ext4_mb_new_blocks + * ext4_mb_release_context + * ext4_mb_put_pa + * + * 4. Since the pa_free becoming 0 and pa_free getting removed + * from tree both happen in ext4_mb_new_blocks, which is always + * called with i_data_sem held for data allocations, we can be + * sure that another process will never see a pa in rbtree with + * pa_free == 0. + */ + WARN_ON_ONCE(tmp_pa->pa_free == 0); } + spin_unlock(&tmp_pa->pa_lock); +try_group_pa: read_unlock(&ei->i_prealloc_lock); /* can we use group allocation? */ diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 0caf6c730ce3..933ad03f4f58 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2203,7 +2203,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname, * happen is that the times are slightly out of date * and/or different from the directory change time. */ - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); ext4_update_dx_flag(dir); inode_inc_iversion(dir); err2 = ext4_mark_inode_dirty(handle, dir); @@ -3197,7 +3197,8 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) * recovery. */ inode->i_size = 0; ext4_orphan_add(handle, inode); - inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); + dir->i_mtime = inode_set_ctime_current(dir); + inode_set_ctime_current(inode); retval = ext4_mark_inode_dirty(handle, inode); if (retval) goto end_rmdir; @@ -3271,7 +3272,7 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name, retval = ext4_delete_entry(handle, dir, de, bh); if (retval) goto out_handle; - dir->i_ctime = dir->i_mtime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); ext4_update_dx_flag(dir); retval = ext4_mark_inode_dirty(handle, dir); if (retval) @@ -3286,7 +3287,7 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name, drop_nlink(inode); if (!inode->i_nlink) ext4_orphan_add(handle, inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); retval = ext4_mark_inode_dirty(handle, inode); if (dentry && !retval) ext4_fc_track_unlink(handle, dentry); @@ -3463,7 +3464,7 @@ retry: if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); ext4_inc_count(inode); ihold(inode); @@ -3641,8 +3642,7 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent, if (ext4_has_feature_filetype(ent->dir->i_sb)) ent->de->file_type = file_type; inode_inc_iversion(ent->dir); - ent->dir->i_ctime = ent->dir->i_mtime = - current_time(ent->dir); + ent->dir->i_mtime = inode_set_ctime_current(ent->dir); retval = ext4_mark_inode_dirty(handle, ent->dir); BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata"); if (!ent->inlined) { @@ -3941,7 +3941,7 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir, * Like most other Unix systems, set the ctime for inodes on a * rename. */ - old.inode->i_ctime = current_time(old.inode); + inode_set_ctime_current(old.inode); retval = ext4_mark_inode_dirty(handle, old.inode); if (unlikely(retval)) goto end_rename; @@ -3955,9 +3955,9 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (new.inode) { ext4_dec_count(new.inode); - new.inode->i_ctime = current_time(new.inode); + inode_set_ctime_current(new.inode); } - old.dir->i_ctime = old.dir->i_mtime = current_time(old.dir); + old.dir->i_mtime = inode_set_ctime_current(old.dir); ext4_update_dx_flag(old.dir); if (old.dir_bh) { retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino); @@ -4053,7 +4053,6 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, }; u8 new_file_type; int retval; - struct timespec64 ctime; if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) && !projid_eq(EXT4_I(new_dir)->i_projid, @@ -4147,9 +4146,8 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, * Like most other Unix systems, set the ctime for inodes on a * rename. */ - ctime = current_time(old.inode); - old.inode->i_ctime = ctime; - new.inode->i_ctime = ctime; + inode_set_ctime_current(old.inode); + inode_set_ctime_current(new.inode); retval = ext4_mark_inode_dirty(handle, old.inode); if (unlikely(retval)) goto end_rename; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c94ebf704616..73547d2334fd 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -93,6 +93,7 @@ static int ext4_get_tree(struct fs_context *fc); static int ext4_reconfigure(struct fs_context *fc); static void ext4_fc_free(struct fs_context *fc); static int ext4_init_fs_context(struct fs_context *fc); +static void ext4_kill_sb(struct super_block *sb); static const struct fs_parameter_spec ext4_param_specs[]; /* @@ -135,12 +136,12 @@ static struct file_system_type ext2_fs_type = { .name = "ext2", .init_fs_context = ext4_init_fs_context, .parameters = ext4_param_specs, - .kill_sb = kill_block_super, + .kill_sb = ext4_kill_sb, .fs_flags = FS_REQUIRES_DEV, }; MODULE_ALIAS_FS("ext2"); MODULE_ALIAS("ext2"); -#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type) +#define IS_EXT2_SB(sb) ((sb)->s_type == &ext2_fs_type) #else #define IS_EXT2_SB(sb) (0) #endif @@ -151,12 +152,12 @@ static struct file_system_type ext3_fs_type = { .name = "ext3", .init_fs_context = ext4_init_fs_context, .parameters = ext4_param_specs, - .kill_sb = kill_block_super, + .kill_sb = ext4_kill_sb, .fs_flags = FS_REQUIRES_DEV, }; MODULE_ALIAS_FS("ext3"); MODULE_ALIAS("ext3"); -#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) +#define IS_EXT3_SB(sb) ((sb)->s_type == &ext3_fs_type) static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, @@ -1096,15 +1097,6 @@ void ext4_update_dynamic_rev(struct super_block *sb) */ } -static void ext4_bdev_mark_dead(struct block_device *bdev) -{ - ext4_force_shutdown(bdev->bd_holder, EXT4_GOING_FLAGS_NOLOGFLUSH); -} - -static const struct blk_holder_ops ext4_holder_ops = { - .mark_dead = ext4_bdev_mark_dead, -}; - /* * Open the external journal device */ @@ -1113,7 +1105,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) struct block_device *bdev; bdev = blkdev_get_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE, sb, - &ext4_holder_ops); + &fs_holder_ops); if (IS_ERR(bdev)) goto fail; return bdev; @@ -1125,25 +1117,6 @@ fail: return NULL; } -/* - * Release the journal device - */ -static void ext4_blkdev_remove(struct ext4_sb_info *sbi) -{ - struct block_device *bdev; - bdev = sbi->s_journal_bdev; - if (bdev) { - /* - * Invalidate the journal device's buffers. We don't want them - * floating about in memory - the physical journal device may - * hotswapped, and it breaks the `ro-after' testing code. - */ - invalidate_bdev(bdev); - blkdev_put(bdev, sbi->s_sb); - sbi->s_journal_bdev = NULL; - } -} - static inline struct inode *orphan_list_entry(struct list_head *l) { return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode; @@ -1339,8 +1312,13 @@ static void ext4_put_super(struct super_block *sb) sync_blockdev(sb->s_bdev); invalidate_bdev(sb->s_bdev); if (sbi->s_journal_bdev) { + /* + * Invalidate the journal device's buffers. We don't want them + * floating about in memory - the physical journal device may + * hotswapped, and it breaks the `ro-after' testing code. + */ sync_blockdev(sbi->s_journal_bdev); - ext4_blkdev_remove(sbi); + invalidate_bdev(sbi->s_journal_bdev); } ext4_xattr_destroy_cache(sbi->s_ea_inode_cache); @@ -5572,7 +5550,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) spin_lock_init(&sbi->s_bdev_wb_lock); errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err, &sbi->s_bdev_wb_err); - sb->s_bdev->bd_super = sb; EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; ext4_orphan_cleanup(sb, es); EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; @@ -5664,9 +5641,11 @@ failed_mount: kfree(get_qf_name(sb, sbi, i)); #endif fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy); - /* ext4_blkdev_remove() calls kill_bdev(), release bh before it. */ brelse(sbi->s_sbh); - ext4_blkdev_remove(sbi); + if (sbi->s_journal_bdev) { + invalidate_bdev(sbi->s_journal_bdev); + blkdev_put(sbi->s_journal_bdev, sb); + } out_fail: invalidate_bdev(sb->s_bdev); sb->s_fs_info = NULL; @@ -5854,7 +5833,10 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, if (WARN_ON_ONCE(!ext4_has_feature_journal(sb))) return NULL; + /* see get_tree_bdev why this is needed and safe */ + up_write(&sb->s_umount); bdev = ext4_blkdev_get(j_dev, sb); + down_write(&sb->s_umount); if (bdev == NULL) return NULL; @@ -7103,7 +7085,7 @@ static int ext4_quota_off(struct super_block *sb, int type) } EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL); inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE); - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); err = ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); out_unlock: @@ -7273,13 +7255,24 @@ static inline int ext3_feature_set_ok(struct super_block *sb) return 1; } +static void ext4_kill_sb(struct super_block *sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct block_device *journal_bdev = sbi ? sbi->s_journal_bdev : NULL; + + kill_block_super(sb); + + if (journal_bdev) + blkdev_put(journal_bdev, sb); +} + static struct file_system_type ext4_fs_type = { .owner = THIS_MODULE, .name = "ext4", .init_fs_context = ext4_init_fs_context, .parameters = ext4_param_specs, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, + .kill_sb = ext4_kill_sb, + .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME, }; MODULE_ALIAS_FS("ext4"); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 321e3a888c20..281e1bfbbe3e 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -356,13 +356,13 @@ ext4_xattr_inode_hash(struct ext4_sb_info *sbi, const void *buffer, size_t size) static u64 ext4_xattr_inode_get_ref(struct inode *ea_inode) { - return ((u64)ea_inode->i_ctime.tv_sec << 32) | + return ((u64) inode_get_ctime(ea_inode).tv_sec << 32) | (u32) inode_peek_iversion_raw(ea_inode); } static void ext4_xattr_inode_set_ref(struct inode *ea_inode, u64 ref_count) { - ea_inode->i_ctime.tv_sec = (u32)(ref_count >> 32); + inode_set_ctime(ea_inode, (u32)(ref_count >> 32), 0); inode_set_iversion_raw(ea_inode, ref_count & 0xffffffff); } @@ -1782,6 +1782,20 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, memmove(here, (void *)here + size, (void *)last - (void *)here + sizeof(__u32)); memset(last, 0, size); + + /* + * Update i_inline_off - moved ibody region might contain + * system.data attribute. Handling a failure here won't + * cause other complications for setting an xattr. + */ + if (!is_block && ext4_has_inline_data(inode)) { + ret = ext4_find_inline_data_nolock(inode); + if (ret) { + ext4_warning_inode(inode, + "unable to update i_inline_off"); + goto out; + } + } } else if (s->not_found) { /* Insert new name. */ size_t size = EXT4_XATTR_LEN(name_len); @@ -2459,7 +2473,7 @@ retry_inode: } if (!error) { ext4_xattr_update_super_block(handle, inode->i_sb); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); inode_inc_iversion(inode); if (!value) no_expand = 0; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index d635c58cf5a3..8aa29fe2e87b 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -455,7 +455,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, de->file_type = fs_umode_to_ftype(inode->i_mode); set_page_dirty(page); - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); f2fs_mark_inode_dirty_sync(dir, false); f2fs_put_page(page, 1); } @@ -609,7 +609,7 @@ void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode, f2fs_i_links_write(dir, true); clear_inode_flag(inode, FI_NEW_INODE); } - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); f2fs_mark_inode_dirty_sync(dir, false); if (F2FS_I(dir)->i_current_depth != current_depth) @@ -858,7 +858,7 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode) if (S_ISDIR(inode->i_mode)) f2fs_i_links_write(dir, false); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); f2fs_i_links_write(inode, false); if (S_ISDIR(inode->i_mode)) { @@ -919,7 +919,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, } f2fs_put_page(page, 1); - dir->i_ctime = dir->i_mtime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); f2fs_mark_inode_dirty_sync(dir, false); if (inode) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c7cb2177b252..e18272ae3119 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3303,9 +3303,11 @@ static inline void clear_file(struct inode *inode, int type) static inline bool f2fs_is_time_consistent(struct inode *inode) { + struct timespec64 ctime = inode_get_ctime(inode); + if (!timespec64_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime)) return false; - if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime)) + if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &ctime)) return false; if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime)) return false; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 093039dee992..35886a52edfb 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -794,7 +794,7 @@ int f2fs_truncate(struct inode *inode) if (err) return err; - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); f2fs_mark_inode_dirty_sync(inode, false); return 0; } @@ -882,7 +882,7 @@ int f2fs_getattr(struct mnt_idmap *idmap, const struct path *path, STATX_ATTR_NODUMP | STATX_ATTR_VERITY); - generic_fillattr(idmap, inode, stat); + generic_fillattr(idmap, request_mask, inode, stat); /* we need to show initial sectors used for inline_data/dentries */ if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) || @@ -905,7 +905,7 @@ static void __setattr_copy(struct mnt_idmap *idmap, if (ia_valid & ATTR_MTIME) inode->i_mtime = attr->ia_mtime; if (ia_valid & ATTR_CTIME) - inode->i_ctime = attr->ia_ctime; + inode_set_ctime_to_ts(inode, attr->ia_ctime); if (ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); @@ -1008,7 +1008,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, return err; spin_lock(&F2FS_I(inode)->i_size_lock); - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); F2FS_I(inode)->last_disk_size = i_size_read(inode); spin_unlock(&F2FS_I(inode)->i_size_lock); } @@ -1835,7 +1835,7 @@ static long f2fs_fallocate(struct file *file, int mode, } if (!ret) { - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); f2fs_mark_inode_dirty_sync(inode, false); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); } @@ -1937,7 +1937,7 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) else clear_inode_flag(inode, FI_PROJ_INHERIT); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); f2fs_set_inode_flags(inode); f2fs_mark_inode_dirty_sync(inode, true); return 0; @@ -2874,10 +2874,10 @@ out_src: if (ret) goto out_unlock; - src->i_mtime = src->i_ctime = current_time(src); + src->i_mtime = inode_set_ctime_current(src); f2fs_mark_inode_dirty_sync(src, false); if (src != dst) { - dst->i_mtime = dst->i_ctime = current_time(dst); + dst->i_mtime = inode_set_ctime_current(dst); f2fs_mark_inode_dirty_sync(dst, false); } f2fs_update_time(sbi, REQ_TIME); @@ -3073,7 +3073,7 @@ static int f2fs_ioc_setproject(struct inode *inode, __u32 projid) goto out_unlock; fi->i_projid = kprojid; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); f2fs_mark_inode_dirty_sync(inode, true); out_unlock: f2fs_unlock_op(sbi); @@ -3511,7 +3511,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) } set_inode_flag(inode, FI_COMPRESS_RELEASED); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); f2fs_mark_inode_dirty_sync(inode, true); f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); @@ -3710,7 +3710,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) if (ret >= 0) { clear_inode_flag(inode, FI_COMPRESS_RELEASED); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); f2fs_mark_inode_dirty_sync(inode, true); } unlock_inode: diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 01effd3fcb6c..a1ca394bc327 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -2181,12 +2181,14 @@ out_drop_write: if (err) return err; - err = freeze_super(sbi->sb); + err = freeze_super(sbi->sb, FREEZE_HOLDER_USERSPACE); if (err) return err; if (f2fs_readonly(sbi->sb)) { - thaw_super(sbi->sb); + err = thaw_super(sbi->sb, FREEZE_HOLDER_USERSPACE); + if (err) + return err; return -EROFS; } @@ -2240,6 +2242,6 @@ recover_out: out_err: f2fs_up_write(&sbi->cp_global_sem); f2fs_up_write(&sbi->gc_lock); - thaw_super(sbi->sb); + thaw_super(sbi->sb, FREEZE_HOLDER_USERSPACE); return err; } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 4638fee16a91..88fc9208ffa7 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -698,7 +698,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, set_page_dirty(page); f2fs_put_page(page, 1); - dir->i_ctime = dir->i_mtime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); f2fs_mark_inode_dirty_sync(dir, false); if (inode) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 09e986b050c6..c1c2ba9f28e5 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -403,7 +403,7 @@ static void init_idisk_time(struct inode *inode) struct f2fs_inode_info *fi = F2FS_I(inode); fi->i_disk_time[0] = inode->i_atime; - fi->i_disk_time[1] = inode->i_ctime; + fi->i_disk_time[1] = inode_get_ctime(inode); fi->i_disk_time[2] = inode->i_mtime; } @@ -434,10 +434,10 @@ static int do_read_inode(struct inode *inode) inode->i_blocks = SECTOR_FROM_BLOCK(le64_to_cpu(ri->i_blocks) - 1); inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime); - inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime); + inode_set_ctime(inode, le64_to_cpu(ri->i_ctime), + le32_to_cpu(ri->i_ctime_nsec)); inode->i_mtime.tv_sec = le64_to_cpu(ri->i_mtime); inode->i_atime.tv_nsec = le32_to_cpu(ri->i_atime_nsec); - inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec); inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec); inode->i_generation = le32_to_cpu(ri->i_generation); if (S_ISDIR(inode->i_mode)) @@ -714,10 +714,10 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) set_raw_inline(inode, ri); ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); - ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); + ri->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec); ri->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); ri->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); - ri->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + ri->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec); ri->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); if (S_ISDIR(inode->i_mode)) ri->i_current_depth = diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index bee0568888da..193b22a2d6bf 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -243,7 +243,7 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, inode->i_ino = ino; inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); F2FS_I(inode)->i_crtime = inode->i_mtime; inode->i_generation = get_random_u32(); @@ -420,7 +420,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, f2fs_balance_fs(sbi, true); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); ihold(inode); set_inode_flag(inode, FI_INC_LINK); @@ -1052,7 +1052,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, f2fs_set_link(new_dir, new_entry, new_page, old_inode); new_page = NULL; - new_inode->i_ctime = current_time(new_inode); + inode_set_ctime_current(new_inode); f2fs_down_write(&F2FS_I(new_inode)->i_sem); if (old_dir_entry) f2fs_i_links_write(new_inode, false); @@ -1086,7 +1086,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, f2fs_i_pino_write(old_inode, new_dir->i_ino); f2fs_up_write(&F2FS_I(old_inode)->i_sem); - old_inode->i_ctime = current_time(old_inode); + inode_set_ctime_current(old_inode); f2fs_mark_inode_dirty_sync(old_inode, false); f2fs_delete_entry(old_entry, old_page, old_dir, NULL); @@ -1251,7 +1251,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_i_pino_write(old_inode, new_dir->i_ino); f2fs_up_write(&F2FS_I(old_inode)->i_sem); - old_dir->i_ctime = current_time(old_dir); + inode_set_ctime_current(old_dir); if (old_nlink) { f2fs_down_write(&F2FS_I(old_dir)->i_sem); f2fs_i_links_write(old_dir, old_nlink > 0); @@ -1270,7 +1270,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_i_pino_write(new_inode, old_dir->i_ino); f2fs_up_write(&F2FS_I(new_inode)->i_sem); - new_dir->i_ctime = current_time(new_dir); + inode_set_ctime_current(new_dir); if (new_nlink) { f2fs_down_write(&F2FS_I(new_dir)->i_sem); f2fs_i_links_write(new_dir, new_nlink > 0); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 4e7d4ceeb084..b8637e88d94f 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -321,10 +321,10 @@ static int recover_inode(struct inode *inode, struct page *page) f2fs_i_size_write(inode, le64_to_cpu(raw->i_size)); inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime); - inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); + inode_set_ctime(inode, le64_to_cpu(raw->i_ctime), + le32_to_cpu(raw->i_ctime_nsec)); inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime); inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec); - inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec); inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); F2FS_I(inode)->i_advise = raw->i_advise; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ca31163da00a..aa1f9a3a8037 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1561,7 +1561,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) int i; for (i = 0; i < sbi->s_ndevs; i++) { - blkdev_put(FDEV(i).bdev, sbi->sb->s_type); + blkdev_put(FDEV(i).bdev, sbi->sb); #ifdef CONFIG_BLK_DEV_ZONED kvfree(FDEV(i).blkz_seq); #endif @@ -2703,7 +2703,7 @@ retry: if (len == towrite) return err; - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); f2fs_mark_inode_dirty_sync(inode, false); return len - towrite; } @@ -4198,7 +4198,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) /* Single zoned block device mount */ FDEV(0).bdev = blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev, mode, - sbi->sb->s_type, NULL); + sbi->sb, NULL); } else { /* Multi-device mount */ memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN); @@ -4217,8 +4217,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) sbi->log_blocks_per_seg) - 1; } FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path, mode, - sbi->sb->s_type, - NULL); + sbi->sb, NULL); } if (IS_ERR(FDEV(i).bdev)) return PTR_ERR(FDEV(i).bdev); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 476b186b90a6..4ae93e1df421 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -764,7 +764,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, same: if (is_inode_flag_set(inode, FI_ACL_MODE)) { inode->i_mode = F2FS_I(inode)->i_acl_mode; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); clear_inode_flag(inode, FI_ACL_MODE); } diff --git a/fs/fat/fat.h b/fs/fat/fat.h index e3b690b48e3e..66cf4778cf3b 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -460,8 +460,7 @@ extern struct timespec64 fat_truncate_mtime(const struct msdos_sb_info *sbi, const struct timespec64 *ts); extern int fat_truncate_time(struct inode *inode, struct timespec64 *now, int flags); -extern int fat_update_time(struct inode *inode, struct timespec64 *now, - int flags); +extern int fat_update_time(struct inode *inode, int flags); extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs); int fat_cache_init(void); diff --git a/fs/fat/file.c b/fs/fat/file.c index 456477946dd9..e887e9ab7472 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -401,7 +401,7 @@ int fat_getattr(struct mnt_idmap *idmap, const struct path *path, struct inode *inode = d_inode(path->dentry); struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); - generic_fillattr(idmap, inode, stat); + generic_fillattr(idmap, request_mask, inode, stat); stat->blksize = sbi->cluster_size; if (sbi->options.nfs == FAT_NFS_NOSTALE_RO) { diff --git a/fs/fat/inode.c b/fs/fat/inode.c index d99b8549ec8f..cdd39b6020f3 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -562,7 +562,7 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) & ~((loff_t)sbi->cluster_size - 1)) >> 9; fat_time_fat2unix(sbi, &inode->i_mtime, de->time, de->date, 0); - inode->i_ctime = inode->i_mtime; + inode_set_ctime_to_ts(inode, inode->i_mtime); if (sbi->options.isvfat) { fat_time_fat2unix(sbi, &inode->i_atime, 0, de->adate, 0); fat_time_fat2unix(sbi, &MSDOS_I(inode)->i_crtime, de->ctime, @@ -1407,8 +1407,7 @@ static int fat_read_root(struct inode *inode) MSDOS_I(inode)->mmu_private = inode->i_size; fat_save_attrs(inode, ATTR_DIR); - inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0; - inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0; + inode->i_mtime = inode->i_atime = inode_set_ctime(inode, 0, 0); set_nlink(inode, fat_subdirs(inode)+2); return 0; diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 7e5d6ae305f2..f2304a1054aa 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -332,13 +332,14 @@ int fat_truncate_time(struct inode *inode, struct timespec64 *now, int flags) * but ctime updates are ignored. */ if (flags & S_MTIME) - inode->i_mtime = inode->i_ctime = fat_truncate_mtime(sbi, now); + inode->i_mtime = inode_set_ctime_to_ts(inode, + fat_truncate_mtime(sbi, now)); return 0; } EXPORT_SYMBOL_GPL(fat_truncate_time); -int fat_update_time(struct inode *inode, struct timespec64 *now, int flags) +int fat_update_time(struct inode *inode, int flags) { int dirty_flags = 0; @@ -346,16 +347,13 @@ int fat_update_time(struct inode *inode, struct timespec64 *now, int flags) return 0; if (flags & (S_ATIME | S_CTIME | S_MTIME)) { - fat_truncate_time(inode, now, flags); + fat_truncate_time(inode, NULL, flags); if (inode->i_sb->s_flags & SB_LAZYTIME) dirty_flags |= I_DIRTY_TIME; else dirty_flags |= I_DIRTY_SYNC; } - if ((flags & S_VERSION) && inode_maybe_inc_iversion(inode, false)) - dirty_flags |= I_DIRTY_SYNC; - __mark_inode_dirty(inode, dirty_flags); return 0; } diff --git a/fs/fcntl.c b/fs/fcntl.c index b622be119706..e871009f6c88 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -34,7 +34,7 @@ #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) -static int setfl(int fd, struct file * filp, unsigned long arg) +static int setfl(int fd, struct file * filp, unsigned int arg) { struct inode * inode = file_inode(filp); int error = 0; @@ -112,11 +112,11 @@ void __f_setown(struct file *filp, struct pid *pid, enum pid_type type, } EXPORT_SYMBOL(__f_setown); -int f_setown(struct file *filp, unsigned long arg, int force) +int f_setown(struct file *filp, int who, int force) { enum pid_type type; struct pid *pid = NULL; - int who = arg, ret = 0; + int ret = 0; type = PIDTYPE_TGID; if (who < 0) { @@ -317,28 +317,29 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, struct file *filp) { void __user *argp = (void __user *)arg; + int argi = (int)arg; struct flock flock; long err = -EINVAL; switch (cmd) { case F_DUPFD: - err = f_dupfd(arg, filp, 0); + err = f_dupfd(argi, filp, 0); break; case F_DUPFD_CLOEXEC: - err = f_dupfd(arg, filp, O_CLOEXEC); + err = f_dupfd(argi, filp, O_CLOEXEC); break; case F_GETFD: err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; break; case F_SETFD: err = 0; - set_close_on_exec(fd, arg & FD_CLOEXEC); + set_close_on_exec(fd, argi & FD_CLOEXEC); break; case F_GETFL: err = filp->f_flags; break; case F_SETFL: - err = setfl(fd, filp, arg); + err = setfl(fd, filp, argi); break; #if BITS_PER_LONG != 32 /* 32-bit arches must use fcntl64() */ @@ -375,7 +376,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, force_successful_syscall_return(); break; case F_SETOWN: - err = f_setown(filp, arg, 1); + err = f_setown(filp, argi, 1); break; case F_GETOWN_EX: err = f_getown_ex(filp, arg); @@ -391,28 +392,28 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, break; case F_SETSIG: /* arg == 0 restores default behaviour. */ - if (!valid_signal(arg)) { + if (!valid_signal(argi)) { break; } err = 0; - filp->f_owner.signum = arg; + filp->f_owner.signum = argi; break; case F_GETLEASE: err = fcntl_getlease(filp); break; case F_SETLEASE: - err = fcntl_setlease(fd, filp, arg); + err = fcntl_setlease(fd, filp, argi); break; case F_NOTIFY: - err = fcntl_dirnotify(fd, filp, arg); + err = fcntl_dirnotify(fd, filp, argi); break; case F_SETPIPE_SZ: case F_GETPIPE_SZ: - err = pipe_fcntl(filp, cmd, arg); + err = pipe_fcntl(filp, cmd, argi); break; case F_ADD_SEALS: case F_GET_SEALS: - err = memfd_fcntl(filp, cmd, arg); + err = memfd_fcntl(filp, cmd, argi); break; case F_GET_RW_HINT: case F_SET_RW_HINT: diff --git a/fs/file.c b/fs/file.c index 7893ea161d77..3e4a4dfa38fc 100644 --- a/fs/file.c +++ b/fs/file.c @@ -668,7 +668,7 @@ EXPORT_SYMBOL(close_fd); /* for ksys_close() */ /** * last_fd - return last valid index into fd table - * @cur_fds: files struct + * @fdt: File descriptor table. * * Context: Either rcu read lock or files_lock must be held. * @@ -693,29 +693,30 @@ static inline void __range_cloexec(struct files_struct *cur_fds, spin_unlock(&cur_fds->file_lock); } -static inline void __range_close(struct files_struct *cur_fds, unsigned int fd, +static inline void __range_close(struct files_struct *files, unsigned int fd, unsigned int max_fd) { + struct file *file; unsigned n; - rcu_read_lock(); - n = last_fd(files_fdtable(cur_fds)); - rcu_read_unlock(); + spin_lock(&files->file_lock); + n = last_fd(files_fdtable(files)); max_fd = min(max_fd, n); - while (fd <= max_fd) { - struct file *file; - - spin_lock(&cur_fds->file_lock); - file = pick_file(cur_fds, fd++); - spin_unlock(&cur_fds->file_lock); - + for (; fd <= max_fd; fd++) { + file = pick_file(files, fd); if (file) { - /* found a valid file to close */ - filp_close(file, cur_fds); + spin_unlock(&files->file_lock); + filp_close(file, files); cond_resched(); + spin_lock(&files->file_lock); + } else if (need_resched()) { + spin_unlock(&files->file_lock); + cond_resched(); + spin_lock(&files->file_lock); } } + spin_unlock(&files->file_lock); } /** @@ -723,6 +724,7 @@ static inline void __range_close(struct files_struct *cur_fds, unsigned int fd, * * @fd: starting file descriptor to close * @max_fd: last file descriptor to close + * @flags: CLOSE_RANGE flags. * * This closes a range of file descriptors. All file descriptors * from @fd up to and including @max_fd are closed. @@ -1036,16 +1038,30 @@ unsigned long __fdget_raw(unsigned int fd) return __fget_light(fd, 0); } +/* + * Try to avoid f_pos locking. We only need it if the + * file is marked for FMODE_ATOMIC_POS, and it can be + * accessed multiple ways. + * + * Always do it for directories, because pidfd_getfd() + * can make a file accessible even if it otherwise would + * not be, and for directories this is a correctness + * issue, not a "POSIX requirement". + */ +static inline bool file_needs_f_pos_lock(struct file *file) +{ + return (file->f_mode & FMODE_ATOMIC_POS) && + (file_count(file) > 1 || file->f_op->iterate_shared); +} + unsigned long __fdget_pos(unsigned int fd) { unsigned long v = __fdget(fd); struct file *file = (struct file *)(v & ~3); - if (file && (file->f_mode & FMODE_ATOMIC_POS)) { - if (file_count(file) > 1) { - v |= FDPUT_POS_UNLOCK; - mutex_lock(&file->f_pos_lock); - } + if (file && file_needs_f_pos_lock(file)) { + v |= FDPUT_POS_UNLOCK; + mutex_lock(&file->f_pos_lock); } return v; } diff --git a/fs/file_table.c b/fs/file_table.c index fc7d677ff5ad..ee21b3da9d08 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -461,11 +461,8 @@ void fput(struct file *file) */ void __fput_sync(struct file *file) { - if (atomic_long_dec_and_test(&file->f_count)) { - struct task_struct *task = current; - BUG_ON(!(task->flags & PF_KTHREAD)); + if (atomic_long_dec_and_test(&file->f_count)) __fput(file); - } } EXPORT_SYMBOL(fput); diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index ceb6a12649ba..ac5d43b164b5 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -110,10 +110,9 @@ static inline void dip2vip_cpy(struct vxfs_sb_info *sbi, inode->i_size = vip->vii_size; inode->i_atime.tv_sec = vip->vii_atime; - inode->i_ctime.tv_sec = vip->vii_ctime; + inode_set_ctime(inode, vip->vii_ctime, 0); inode->i_mtime.tv_sec = vip->vii_mtime; inode->i_atime.tv_nsec = 0; - inode->i_ctime.tv_nsec = 0; inode->i_mtime.tv_nsec = 0; inode->i_blocks = vip->vii_blocks; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index aca4b4811394..969ce991b0b0 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1953,9 +1953,9 @@ static long __writeback_inodes_wb(struct bdi_writeback *wb, struct inode *inode = wb_inode(wb->b_io.prev); struct super_block *sb = inode->i_sb; - if (!trylock_super(sb)) { + if (!super_trylock_shared(sb)) { /* - * trylock_super() may fail consistently due to + * super_trylock_shared() may fail consistently due to * s_umount being grabbed by someone else. Don't use * requeue_io() to avoid busy retrying the inode/sb. */ diff --git a/fs/fs_context.c b/fs/fs_context.c index 851214d1d013..a0ad7a0c4680 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -162,6 +162,10 @@ EXPORT_SYMBOL(vfs_parse_fs_param); /** * vfs_parse_fs_string - Convenience function to just parse a string. + * @fc: Filesystem context. + * @key: Parameter name. + * @value: Default value. + * @v_size: Maximum number of bytes in the value. */ int vfs_parse_fs_string(struct fs_context *fc, const char *key, const char *value, size_t v_size) @@ -189,7 +193,7 @@ EXPORT_SYMBOL(vfs_parse_fs_string); /** * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data - * @ctx: The superblock configuration to fill in. + * @fc: The superblock configuration to fill in. * @data: The data to parse * * Parse a blob of data that's in key[=val][,key[=val]]* form. This can be @@ -315,10 +319,31 @@ struct fs_context *fs_context_for_reconfigure(struct dentry *dentry, } EXPORT_SYMBOL(fs_context_for_reconfigure); +/** + * fs_context_for_submount: allocate a new fs_context for a submount + * @type: file_system_type of the new context + * @reference: reference dentry from which to copy relevant info + * + * Allocate a new fs_context suitable for a submount. This also ensures that + * the fc->security object is inherited from @reference (if needed). + */ struct fs_context *fs_context_for_submount(struct file_system_type *type, struct dentry *reference) { - return alloc_fs_context(type, reference, 0, 0, FS_CONTEXT_FOR_SUBMOUNT); + struct fs_context *fc; + int ret; + + fc = alloc_fs_context(type, reference, 0, 0, FS_CONTEXT_FOR_SUBMOUNT); + if (IS_ERR(fc)) + return fc; + + ret = security_fs_context_submount(fc, reference->d_sb); + if (ret) { + put_fs_context(fc); + return ERR_PTR(ret); + } + + return fc; } EXPORT_SYMBOL(fs_context_for_submount); @@ -333,7 +358,7 @@ void fc_drop_locked(struct fs_context *fc) static void legacy_fs_context_free(struct fs_context *fc); /** - * vfs_dup_fc_config: Duplicate a filesystem context. + * vfs_dup_fs_context - Duplicate a filesystem context. * @src_fc: The context to copy. */ struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc) @@ -379,7 +404,9 @@ EXPORT_SYMBOL(vfs_dup_fs_context); /** * logfc - Log a message to a filesystem context - * @fc: The filesystem context to log to. + * @log: The filesystem context to log to, or NULL to use printk. + * @prefix: A string to prefix the output with, or NULL. + * @level: 'w' for a warning, 'e' for an error. Anything else is a notice. * @fmt: The format of the buffer. */ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, ...) @@ -692,6 +719,7 @@ void vfs_clean_context(struct fs_context *fc) security_free_mnt_opts(&fc->security); kfree(fc->source); fc->source = NULL; + fc->exclusive = false; fc->purpose = FS_CONTEXT_FOR_RECONFIGURE; fc->phase = FS_CONTEXT_AWAITING_RECONF; diff --git a/fs/fsopen.c b/fs/fsopen.c index fc9d2d9fd234..ce03f6521c88 100644 --- a/fs/fsopen.c +++ b/fs/fsopen.c @@ -209,6 +209,72 @@ err: return ret; } +static int vfs_cmd_create(struct fs_context *fc, bool exclusive) +{ + struct super_block *sb; + int ret; + + if (fc->phase != FS_CONTEXT_CREATE_PARAMS) + return -EBUSY; + + if (!mount_capable(fc)) + return -EPERM; + + /* require the new mount api */ + if (exclusive && fc->ops == &legacy_fs_context_ops) + return -EOPNOTSUPP; + + fc->phase = FS_CONTEXT_CREATING; + fc->exclusive = exclusive; + + ret = vfs_get_tree(fc); + if (ret) { + fc->phase = FS_CONTEXT_FAILED; + return ret; + } + + sb = fc->root->d_sb; + ret = security_sb_kern_mount(sb); + if (unlikely(ret)) { + fc_drop_locked(fc); + fc->phase = FS_CONTEXT_FAILED; + return ret; + } + + /* vfs_get_tree() callchains will have grabbed @s_umount */ + up_write(&sb->s_umount); + fc->phase = FS_CONTEXT_AWAITING_MOUNT; + return 0; +} + +static int vfs_cmd_reconfigure(struct fs_context *fc) +{ + struct super_block *sb; + int ret; + + if (fc->phase != FS_CONTEXT_RECONF_PARAMS) + return -EBUSY; + + fc->phase = FS_CONTEXT_RECONFIGURING; + + sb = fc->root->d_sb; + if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) { + fc->phase = FS_CONTEXT_FAILED; + return -EPERM; + } + + down_write(&sb->s_umount); + ret = reconfigure_super(fc); + up_write(&sb->s_umount); + if (ret) { + fc->phase = FS_CONTEXT_FAILED; + return ret; + } + + vfs_clean_context(fc); + return 0; +} + /* * Check the state and apply the configuration. Note that this function is * allowed to 'steal' the value by setting param->xxx to NULL before returning. @@ -216,7 +282,6 @@ err: static int vfs_fsconfig_locked(struct fs_context *fc, int cmd, struct fs_parameter *param) { - struct super_block *sb; int ret; ret = finish_clean_context(fc); @@ -224,39 +289,11 @@ static int vfs_fsconfig_locked(struct fs_context *fc, int cmd, return ret; switch (cmd) { case FSCONFIG_CMD_CREATE: - if (fc->phase != FS_CONTEXT_CREATE_PARAMS) - return -EBUSY; - if (!mount_capable(fc)) - return -EPERM; - fc->phase = FS_CONTEXT_CREATING; - ret = vfs_get_tree(fc); - if (ret) - break; - sb = fc->root->d_sb; - ret = security_sb_kern_mount(sb); - if (unlikely(ret)) { - fc_drop_locked(fc); - break; - } - up_write(&sb->s_umount); - fc->phase = FS_CONTEXT_AWAITING_MOUNT; - return 0; + return vfs_cmd_create(fc, false); + case FSCONFIG_CMD_CREATE_EXCL: + return vfs_cmd_create(fc, true); case FSCONFIG_CMD_RECONFIGURE: - if (fc->phase != FS_CONTEXT_RECONF_PARAMS) - return -EBUSY; - fc->phase = FS_CONTEXT_RECONFIGURING; - sb = fc->root->d_sb; - if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) { - ret = -EPERM; - break; - } - down_write(&sb->s_umount); - ret = reconfigure_super(fc); - up_write(&sb->s_umount); - if (ret) - break; - vfs_clean_context(fc); - return 0; + return vfs_cmd_reconfigure(fc); default: if (fc->phase != FS_CONTEXT_CREATE_PARAMS && fc->phase != FS_CONTEXT_RECONF_PARAMS) @@ -264,8 +301,6 @@ static int vfs_fsconfig_locked(struct fs_context *fc, int cmd, return vfs_parse_fs_param(fc, param); } - fc->phase = FS_CONTEXT_FAILED; - return ret; } /** @@ -353,6 +388,7 @@ SYSCALL_DEFINE5(fsconfig, return -EINVAL; break; case FSCONFIG_CMD_CREATE: + case FSCONFIG_CMD_CREATE_EXCL: case FSCONFIG_CMD_RECONFIGURE: if (_key || _value || aux) return -EINVAL; diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 247ef4f76761..ab62e4624256 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -235,7 +235,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, inode->i_mode = mode; inode->i_uid = fc->user_id; inode->i_gid = fc->group_id; - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); /* setting ->i_op to NULL is not allowed */ if (iop) inode->i_op = iop; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 35bc174f9ba2..881524b9a55a 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -258,7 +258,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) spin_unlock(&fi->lock); } kfree(forget); - if (ret == -ENOMEM) + if (ret == -ENOMEM || ret == -EINTR) goto out; if (ret || fuse_invalid_attr(&outarg.attr) || fuse_stale_inode(inode, outarg.generation, &outarg.attr)) @@ -395,8 +395,6 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name goto out_put_forget; err = -EIO; - if (!outarg->nodeid) - goto out_put_forget; if (fuse_invalid_attr(&outarg->attr)) goto out_put_forget; @@ -935,7 +933,7 @@ void fuse_flush_time_update(struct inode *inode) static void fuse_update_ctime_in_cache(struct inode *inode) { if (!IS_NOCMTIME(inode)) { - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); mark_inode_dirty_sync(inode); fuse_flush_time_update(inode); } @@ -1224,7 +1222,7 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file, forget_all_cached_acls(inode); err = fuse_do_getattr(inode, stat, file); } else if (stat) { - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); stat->mode = fi->orig_i_mode; stat->ino = fi->orig_ino; } @@ -1717,8 +1715,8 @@ int fuse_flush_times(struct inode *inode, struct fuse_file *ff) inarg.mtimensec = inode->i_mtime.tv_nsec; if (fm->fc->minor >= 23) { inarg.valid |= FATTR_CTIME; - inarg.ctime = inode->i_ctime.tv_sec; - inarg.ctimensec = inode->i_ctime.tv_nsec; + inarg.ctime = inode_get_ctime(inode).tv_sec; + inarg.ctimensec = inode_get_ctime(inode).tv_nsec; } if (ff) { inarg.valid |= FATTR_FH; @@ -1859,7 +1857,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, if (attr->ia_valid & ATTR_MTIME) inode->i_mtime = attr->ia_mtime; if (attr->ia_valid & ATTR_CTIME) - inode->i_ctime = attr->ia_ctime; + inode_set_ctime_to_ts(inode, attr->ia_ctime); /* FIXME: clear I_DIRTY_SYNC? */ } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index d66070af145d..549358ffea8b 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -194,8 +194,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, inode->i_mtime.tv_nsec = attr->mtimensec; } if (!(cache_mask & STATX_CTIME)) { - inode->i_ctime.tv_sec = attr->ctime; - inode->i_ctime.tv_nsec = attr->ctimensec; + inode_set_ctime(inode, attr->ctime, attr->ctimensec); } if (attr->blksize != 0) @@ -259,8 +258,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, attr->mtimensec = inode->i_mtime.tv_nsec; } if (cache_mask & STATX_CTIME) { - attr->ctime = inode->i_ctime.tv_sec; - attr->ctimensec = inode->i_ctime.tv_nsec; + attr->ctime = inode_get_ctime(inode).tv_sec; + attr->ctimensec = inode_get_ctime(inode).tv_nsec; } if ((attr_version != 0 && fi->attr_version > attr_version) || @@ -318,8 +317,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr, inode->i_size = attr->size; inode->i_mtime.tv_sec = attr->mtime; inode->i_mtime.tv_nsec = attr->mtimensec; - inode->i_ctime.tv_sec = attr->ctime; - inode->i_ctime.tv_nsec = attr->ctimensec; + inode_set_ctime(inode, attr->ctime, attr->ctimensec); if (S_ISREG(inode->i_mode)) { fuse_init_common(inode); fuse_init_file_inode(inode, attr->flags); @@ -1134,7 +1132,10 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, process_init_limits(fc, arg); if (arg->minor >= 6) { - u64 flags = arg->flags | (u64) arg->flags2 << 32; + u64 flags = arg->flags; + + if (flags & FUSE_INIT_EXT) + flags |= (u64) arg->flags2 << 32; ra_pages = arg->max_readahead / PAGE_SIZE; if (flags & FUSE_ASYNC_READ) @@ -1254,7 +1255,8 @@ void fuse_send_init(struct fuse_mount *fm) FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA | FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | - FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP; + FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP | + FUSE_HAS_EXPIRE_ONLY; #ifdef CONFIG_FUSE_DAX if (fm->fc->dax) flags |= FUSE_MAP_ALIGNMENT; @@ -1397,16 +1399,18 @@ EXPORT_SYMBOL_GPL(fuse_dev_free); static void fuse_fill_attr_from_inode(struct fuse_attr *attr, const struct fuse_inode *fi) { + struct timespec64 ctime = inode_get_ctime(&fi->inode); + *attr = (struct fuse_attr){ .ino = fi->inode.i_ino, .size = fi->inode.i_size, .blocks = fi->inode.i_blocks, .atime = fi->inode.i_atime.tv_sec, .mtime = fi->inode.i_mtime.tv_sec, - .ctime = fi->inode.i_ctime.tv_sec, + .ctime = ctime.tv_sec, .atimensec = fi->inode.i_atime.tv_nsec, .mtimensec = fi->inode.i_mtime.tv_nsec, - .ctimensec = fi->inode.i_ctime.tv_nsec, + .ctimensec = ctime.tv_nsec, .mode = fi->inode.i_mode, .nlink = fi->inode.i_nlink, .uid = fi->inode.i_uid.val, diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c index 8e01bfdfc430..726640fa439e 100644 --- a/fs/fuse/ioctl.c +++ b/fs/fuse/ioctl.c @@ -9,14 +9,23 @@ #include <linux/compat.h> #include <linux/fileattr.h> -static ssize_t fuse_send_ioctl(struct fuse_mount *fm, struct fuse_args *args) +static ssize_t fuse_send_ioctl(struct fuse_mount *fm, struct fuse_args *args, + struct fuse_ioctl_out *outarg) { - ssize_t ret = fuse_simple_request(fm, args); + ssize_t ret; + + args->out_args[0].size = sizeof(*outarg); + args->out_args[0].value = outarg; + + ret = fuse_simple_request(fm, args); /* Translate ENOSYS, which shouldn't be returned from fs */ if (ret == -ENOSYS) ret = -ENOTTY; + if (ret >= 0 && outarg->result == -ENOSYS) + outarg->result = -ENOTTY; + return ret; } @@ -264,13 +273,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, } ap.args.out_numargs = 2; - ap.args.out_args[0].size = sizeof(outarg); - ap.args.out_args[0].value = &outarg; ap.args.out_args[1].size = out_size; ap.args.out_pages = true; ap.args.out_argvar = true; - transferred = fuse_send_ioctl(fm, &ap.args); + transferred = fuse_send_ioctl(fm, &ap.args, &outarg); err = transferred; if (transferred < 0) goto out; @@ -399,12 +406,10 @@ static int fuse_priv_ioctl(struct inode *inode, struct fuse_file *ff, args.in_args[1].size = inarg.in_size; args.in_args[1].value = ptr; args.out_numargs = 2; - args.out_args[0].size = sizeof(outarg); - args.out_args[0].value = &outarg; args.out_args[1].size = inarg.out_size; args.out_args[1].value = ptr; - err = fuse_send_ioctl(fm, &args); + err = fuse_send_ioctl(fm, &args, &outarg); if (!err) { if (outarg.result < 0) err = outarg.result; diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index a392aa0f041d..443640e6fb9c 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -142,7 +142,7 @@ int gfs2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, ret = __gfs2_set_acl(inode, acl, type); if (!ret && mode != inode->i_mode) { - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); inode->i_mode = mode; mark_inode_dirty(inode); } diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 8d611fbcf0bd..45ea63f7167d 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1386,7 +1386,7 @@ static int trunc_start(struct inode *inode, u64 newsize) ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; i_size_write(inode, newsize); - ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode); + ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode); gfs2_dinode_out(ip, dibh->b_data); if (journaled) @@ -1583,8 +1583,7 @@ out_unlock: /* Every transaction boundary, we rewrite the dinode to keep its di_blocks current in case of failure. */ - ip->i_inode.i_mtime = ip->i_inode.i_ctime = - current_time(&ip->i_inode); + ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode); gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -1950,7 +1949,7 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) gfs2_statfs_change(sdp, 0, +btotal, 0); gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid, ip->i_inode.i_gid); - ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode); + ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode); gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); up_write(&ip->i_rw_mutex); @@ -1993,7 +1992,7 @@ static int trunc_end(struct gfs2_inode *ip) gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); gfs2_ordered_del_inode(ip); } - ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode); + ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode); ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG; gfs2_trans_add_meta(ip->i_gl, dibh); @@ -2094,7 +2093,7 @@ static int do_grow(struct inode *inode, u64 size) goto do_end_trans; truncate_setsize(inode, size); - ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode); + ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode); gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 54a6d17b8c25..1a2afa88f8be 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -130,7 +130,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); if (ip->i_inode.i_size < offset + size) i_size_write(&ip->i_inode, offset + size); - ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode); + ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -227,7 +227,7 @@ out: if (ip->i_inode.i_size < offset + copied) i_size_write(&ip->i_inode, offset + copied); - ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode); + ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode); gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); @@ -1814,7 +1814,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, gfs2_inum_out(nip, dent); dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode)); dent->de_rahead = cpu_to_be16(gfs2_inode_ra_len(nip)); - tv = current_time(&ip->i_inode); + tv = inode_set_ctime_current(&ip->i_inode); if (ip->i_diskflags & GFS2_DIF_EXHASH) { leaf = (struct gfs2_leaf *)bh->b_data; be16_add_cpu(&leaf->lf_entries, 1); @@ -1825,7 +1825,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, da->bh = NULL; brelse(bh); ip->i_entries++; - ip->i_inode.i_mtime = ip->i_inode.i_ctime = tv; + ip->i_inode.i_mtime = tv; if (S_ISDIR(nip->i_inode.i_mode)) inc_nlink(&ip->i_inode); mark_inode_dirty(inode); @@ -1876,7 +1876,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) const struct qstr *name = &dentry->d_name; struct gfs2_dirent *dent, *prev = NULL; struct buffer_head *bh; - struct timespec64 tv = current_time(&dip->i_inode); + struct timespec64 tv; /* Returns _either_ the entry (if its first in block) or the previous entry otherwise */ @@ -1896,6 +1896,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) } dirent_del(dip, bh, prev, dent); + tv = inode_set_ctime_current(&dip->i_inode); if (dip->i_diskflags & GFS2_DIF_EXHASH) { struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data; u16 entries = be16_to_cpu(leaf->lf_entries); @@ -1910,7 +1911,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) if (!dip->i_entries) gfs2_consist_inode(dip); dip->i_entries--; - dip->i_inode.i_mtime = dip->i_inode.i_ctime = tv; + dip->i_inode.i_mtime = tv; if (d_is_dir(dentry)) drop_nlink(&dip->i_inode); mark_inode_dirty(&dip->i_inode); @@ -1951,7 +1952,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, dent->de_type = cpu_to_be16(new_type); brelse(bh); - dip->i_inode.i_mtime = dip->i_inode.i_ctime = current_time(&dip->i_inode); + dip->i_inode.i_mtime = inode_set_ctime_current(&dip->i_inode); mark_inode_dirty_sync(&dip->i_inode); return 0; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 1bf3c4453516..766186c80682 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -260,7 +260,7 @@ static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask) error = gfs2_meta_inode_buffer(ip, &bh); if (error) goto out_trans_end; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); gfs2_trans_add_meta(ip->i_gl, bh); ip->i_diskflags = new_flags; gfs2_dinode_out(ip, bh->b_data); @@ -1578,7 +1578,7 @@ const struct file_operations gfs2_file_fops = { .fsync = gfs2_fsync, .lock = gfs2_lock, .flock = gfs2_flock, - .splice_read = filemap_splice_read, + .splice_read = copy_splice_read, .splice_write = gfs2_file_splice_write, .setlease = simple_nosetlease, .fallocate = gfs2_fallocate, @@ -1609,7 +1609,7 @@ const struct file_operations gfs2_file_fops_nolock = { .open = gfs2_open, .release = gfs2_release, .fsync = gfs2_fsync, - .splice_read = filemap_splice_read, + .splice_read = copy_splice_read, .splice_write = gfs2_file_splice_write, .setlease = generic_setlease, .fallocate = gfs2_fallocate, diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 54319328b16b..aecdac3cfbe1 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -437,8 +437,8 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) inode->i_atime = atime; inode->i_mtime.tv_sec = be64_to_cpu(str->di_mtime); inode->i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); - inode->i_ctime.tv_sec = be64_to_cpu(str->di_ctime); - inode->i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); + inode_set_ctime(inode, be64_to_cpu(str->di_ctime), + be32_to_cpu(str->di_ctime_nsec)); ip->i_goal = be64_to_cpu(str->di_goal_meta); ip->i_generation = be64_to_cpu(str->di_generation); diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 17c994a0c0d0..a21ac41d6669 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -690,7 +690,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, set_nlink(inode, S_ISDIR(mode) ? 2 : 1); inode->i_rdev = dev; inode->i_size = size; - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); munge_mode_uid_gid(dip, inode); check_and_update_goal(dip); ip->i_goal = dip->i_goal; @@ -1029,7 +1029,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, gfs2_trans_add_meta(ip->i_gl, dibh); inc_nlink(&ip->i_inode); - ip->i_inode.i_ctime = current_time(&ip->i_inode); + inode_set_ctime_current(&ip->i_inode); ihold(inode); d_instantiate(dentry, inode); mark_inode_dirty(inode); @@ -1114,7 +1114,7 @@ static int gfs2_unlink_inode(struct gfs2_inode *dip, return error; ip->i_entries = 0; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); if (S_ISDIR(inode->i_mode)) clear_nlink(inode); else @@ -1371,7 +1371,7 @@ static int update_moved_ino(struct gfs2_inode *ip, struct gfs2_inode *ndip, if (dir_rename) return gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR); - ip->i_inode.i_ctime = current_time(&ip->i_inode); + inode_set_ctime_current(&ip->i_inode); mark_inode_dirty_sync(&ip->i_inode); return 0; } @@ -2071,7 +2071,7 @@ static int gfs2_getattr(struct mnt_idmap *idmap, STATX_ATTR_IMMUTABLE | STATX_ATTR_NODUMP); - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); if (gfs2_holder_initialized(&gh)) gfs2_glock_dq_uninit(&gh); @@ -2139,8 +2139,7 @@ loff_t gfs2_seek_hole(struct file *file, loff_t offset) return vfs_setpos(file, ret, inode->i_sb->s_maxbytes); } -static int gfs2_update_time(struct inode *inode, struct timespec64 *time, - int flags) +static int gfs2_update_time(struct inode *inode, int flags) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_glock *gl = ip->i_gl; @@ -2155,7 +2154,8 @@ static int gfs2_update_time(struct inode *inode, struct timespec64 *time, if (error) return error; } - return generic_update_time(inode, time, flags); + generic_update_time(inode, flags); + return 0; } static const struct inode_operations gfs2_file_iops = { diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 704192b73605..aa5fd06d47bc 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -871,7 +871,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, size = loc + sizeof(struct gfs2_quota); if (size > inode->i_size) i_size_write(inode, size); - inode->i_mtime = inode->i_atime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); mark_inode_dirty(inode); set_bit(QDF_REFRESH, &qd->qd_flags); } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 9f4d5d6549ee..2f701335e8ee 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -412,7 +412,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(inode)); str->di_atime = cpu_to_be64(inode->i_atime.tv_sec); str->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec); - str->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec); + str->di_ctime = cpu_to_be64(inode_get_ctime(inode).tv_sec); str->di_goal_meta = cpu_to_be64(ip->i_goal); str->di_goal_data = cpu_to_be64(ip->i_goal); @@ -429,7 +429,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) str->di_eattr = cpu_to_be64(ip->i_eattr); str->di_atime_nsec = cpu_to_be32(inode->i_atime.tv_nsec); str->di_mtime_nsec = cpu_to_be32(inode->i_mtime.tv_nsec); - str->di_ctime_nsec = cpu_to_be32(inode->i_ctime.tv_nsec); + str->di_ctime_nsec = cpu_to_be32(inode_get_ctime(inode).tv_nsec); } /** @@ -689,7 +689,7 @@ static int gfs2_freeze_locally(struct gfs2_sbd *sdp) struct super_block *sb = sdp->sd_vfs; int error; - error = freeze_super(sb); + error = freeze_super(sb, FREEZE_HOLDER_USERSPACE); if (error) return error; @@ -697,7 +697,9 @@ static int gfs2_freeze_locally(struct gfs2_sbd *sdp) gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE | GFS2_LFC_FREEZE_GO_SYNC); if (gfs2_withdrawn(sdp)) { - thaw_super(sb); + error = thaw_super(sb, FREEZE_HOLDER_USERSPACE); + if (error) + return error; return -EIO; } } @@ -712,7 +714,7 @@ static int gfs2_do_thaw(struct gfs2_sbd *sdp) error = gfs2_freeze_lock_shared(sdp); if (error) goto fail; - error = thaw_super(sb); + error = thaw_super(sb, FREEZE_HOLDER_USERSPACE); if (!error) return 0; @@ -761,7 +763,7 @@ out: * */ -static int gfs2_freeze_super(struct super_block *sb) +static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who) { struct gfs2_sbd *sdp = sb->s_fs_info; int error; @@ -816,7 +818,7 @@ out: * */ -static int gfs2_thaw_super(struct super_block *sb) +static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who) { struct gfs2_sbd *sdp = sb->s_fs_info; int error; diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 2dfbe2f188dd..c60bc7f628e1 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -168,10 +168,10 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len) switch (n) { case 0: - error = thaw_super(sdp->sd_vfs); + error = thaw_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE); break; case 1: - error = freeze_super(sdp->sd_vfs); + error = freeze_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE); break; default: return -EINVAL; diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index ec1631257978..7e835be7032d 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -230,9 +230,11 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + struct super_block *sb = sdp->sd_vfs; struct gfs2_bufdata *bd; struct gfs2_meta_header *mh; struct gfs2_trans *tr = current->journal_info; + bool withdraw = false; lock_buffer(bh); if (buffer_pinned(bh)) { @@ -266,13 +268,15 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) (unsigned long long)bd->bd_bh->b_blocknr); BUG(); } - if (unlikely(test_bit(SDF_FROZEN, &sdp->sd_flags))) { - fs_info(sdp, "GFS2:adding buf while frozen\n"); - gfs2_assert_withdraw(sdp, 0); - } if (unlikely(gfs2_withdrawn(sdp))) { fs_info(sdp, "GFS2:adding buf while withdrawn! 0x%llx\n", (unsigned long long)bd->bd_bh->b_blocknr); + goto out_unlock; + } + if (unlikely(sb->s_writers.frozen == SB_FREEZE_COMPLETE)) { + fs_info(sdp, "GFS2:adding buf while frozen\n"); + withdraw = true; + goto out_unlock; } gfs2_pin(sdp, bd->bd_bh); mh->__pad0 = cpu_to_be64(0); @@ -281,6 +285,8 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) tr->tr_num_buf_new++; out_unlock: gfs2_log_unlock(sdp); + if (withdraw) + gfs2_assert_withdraw(sdp, 0); out: unlock_buffer(bh); } diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 93b36d026bb4..4fea70c0fe3d 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -311,7 +311,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, ea->ea_num_ptrs = 0; } - ip->i_inode.i_ctime = current_time(&ip->i_inode); + inode_set_ctime_current(&ip->i_inode); __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC); gfs2_trans_end(sdp); @@ -763,7 +763,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, if (error) goto out_end_trans; - ip->i_inode.i_ctime = current_time(&ip->i_inode); + inode_set_ctime_current(&ip->i_inode); __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC); out_end_trans: @@ -888,7 +888,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh, if (es->es_el) ea_set_remove_stuffed(ip, es->es_el); - ip->i_inode.i_ctime = current_time(&ip->i_inode); + inode_set_ctime_current(&ip->i_inode); __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC); gfs2_trans_end(GFS2_SB(&ip->i_inode)); @@ -1106,7 +1106,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) ea->ea_type = GFS2_EATYPE_UNUSED; } - ip->i_inode.i_ctime = current_time(&ip->i_inode); + inode_set_ctime_current(&ip->i_inode); __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC); gfs2_trans_end(GFS2_SB(&ip->i_inode)); diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c index d365bf0b8c77..632c226a3972 100644 --- a/fs/hfs/catalog.c +++ b/fs/hfs/catalog.c @@ -133,7 +133,7 @@ int hfs_cat_create(u32 cnid, struct inode *dir, const struct qstr *str, struct i goto err1; dir->i_size++; - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); mark_inode_dirty(dir); hfs_find_exit(&fd); return 0; @@ -269,7 +269,7 @@ int hfs_cat_delete(u32 cnid, struct inode *dir, const struct qstr *str) } dir->i_size--; - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); mark_inode_dirty(dir); res = 0; out: @@ -337,7 +337,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, const struct qstr *src_name, if (err) goto out; dst_dir->i_size++; - dst_dir->i_mtime = dst_dir->i_ctime = current_time(dst_dir); + dst_dir->i_mtime = inode_set_ctime_current(dst_dir); mark_inode_dirty(dst_dir); /* finally remove the old entry */ @@ -349,7 +349,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, const struct qstr *src_name, if (err) goto out; src_dir->i_size--; - src_dir->i_mtime = src_dir->i_ctime = current_time(src_dir); + src_dir->i_mtime = inode_set_ctime_current(src_dir); mark_inode_dirty(src_dir); type = entry.type; diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 3e1e3dcf0b48..b75c26045df4 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -263,7 +263,7 @@ static int hfs_remove(struct inode *dir, struct dentry *dentry) if (res) return res; clear_nlink(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); hfs_delete_inode(inode); mark_inode_dirty(inode); return 0; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 441d7fc952e3..ee349b72cfb3 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -200,7 +200,7 @@ struct inode *hfs_new_inode(struct inode *dir, const struct qstr *name, umode_t inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); set_nlink(inode, 1); - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); HFS_I(inode)->flags = 0; HFS_I(inode)->rsrc_inode = NULL; HFS_I(inode)->fs_blocks = 0; @@ -355,8 +355,8 @@ static int hfs_read_inode(struct inode *inode, void *data) inode->i_mode |= S_IWUGO; inode->i_mode &= ~hsb->s_file_umask; inode->i_mode |= S_IFREG; - inode->i_ctime = inode->i_atime = inode->i_mtime = - hfs_m_to_utime(rec->file.MdDat); + inode->i_atime = inode->i_mtime = inode_set_ctime_to_ts(inode, + hfs_m_to_utime(rec->file.MdDat)); inode->i_op = &hfs_file_inode_operations; inode->i_fop = &hfs_file_operations; inode->i_mapping->a_ops = &hfs_aops; @@ -366,8 +366,8 @@ static int hfs_read_inode(struct inode *inode, void *data) inode->i_size = be16_to_cpu(rec->dir.Val) + 2; HFS_I(inode)->fs_blocks = 0; inode->i_mode = S_IFDIR | (S_IRWXUGO & ~hsb->s_dir_umask); - inode->i_ctime = inode->i_atime = inode->i_mtime = - hfs_m_to_utime(rec->dir.MdDat); + inode->i_atime = inode->i_mtime = inode_set_ctime_to_ts(inode, + hfs_m_to_utime(rec->dir.MdDat)); inode->i_op = &hfs_dir_inode_operations; inode->i_fop = &hfs_dir_operations; break; @@ -654,8 +654,7 @@ int hfs_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, truncate_setsize(inode, attr->ia_size); hfs_file_truncate(inode); - inode->i_atime = inode->i_mtime = inode->i_ctime = - current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); } setattr_copy(&nop_mnt_idmap, inode, attr); diff --git a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c index 2875961fdc10..dc27d418fbcd 100644 --- a/fs/hfs/sysdep.c +++ b/fs/hfs/sysdep.c @@ -28,7 +28,9 @@ static int hfs_revalidate_dentry(struct dentry *dentry, unsigned int flags) /* fix up inode on a timezone change */ diff = sys_tz.tz_minuteswest * 60 - HFS_I(inode)->tz_secondswest; if (diff) { - inode->i_ctime.tv_sec += diff; + struct timespec64 ctime = inode_get_ctime(inode); + + inode_set_ctime(inode, ctime.tv_sec + diff, ctime.tv_nsec); inode->i_atime.tv_sec += diff; inode->i_mtime.tv_sec += diff; HFS_I(inode)->tz_secondswest += diff; diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index 35472cba750e..e71ae2537eaa 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -312,7 +312,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, dir->i_size++; if (S_ISDIR(inode->i_mode)) hfsplus_subfolders_inc(dir); - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY); hfs_find_exit(&fd); @@ -417,7 +417,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, const struct qstr *str) dir->i_size--; if (type == HFSPLUS_FOLDER) hfsplus_subfolders_dec(dir); - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY); if (type == HFSPLUS_FILE || type == HFSPLUS_FOLDER) { @@ -494,7 +494,7 @@ int hfsplus_rename_cat(u32 cnid, dst_dir->i_size++; if (type == HFSPLUS_FOLDER) hfsplus_subfolders_inc(dst_dir); - dst_dir->i_mtime = dst_dir->i_ctime = current_time(dst_dir); + dst_dir->i_mtime = inode_set_ctime_current(dst_dir); /* finally remove the old entry */ err = hfsplus_cat_build_key(sb, src_fd.search_key, @@ -511,7 +511,7 @@ int hfsplus_rename_cat(u32 cnid, src_dir->i_size--; if (type == HFSPLUS_FOLDER) hfsplus_subfolders_dec(src_dir); - src_dir->i_mtime = src_dir->i_ctime = current_time(src_dir); + src_dir->i_mtime = inode_set_ctime_current(src_dir); /* remove old thread entry */ hfsplus_cat_build_key_with_cnid(sb, src_fd.search_key, cnid); diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 56fb5f1312e7..f5c4b3e31a1c 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -346,7 +346,7 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, inc_nlink(inode); hfsplus_instantiate(dst_dentry, inode, cnid); ihold(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); mark_inode_dirty(inode); sbi->file_count++; hfsplus_mark_mdb_dirty(dst_dir->i_sb); @@ -405,7 +405,7 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) hfsplus_delete_inode(inode); } else sbi->file_count--; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); mark_inode_dirty(inode); out: mutex_unlock(&sbi->vh_mutex); @@ -426,7 +426,7 @@ static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry) if (res) goto out; clear_nlink(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); hfsplus_delete_inode(inode); mark_inode_dirty(inode); out: diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 7d1a675e037d..c65c8c4b03dd 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -267,7 +267,7 @@ static int hfsplus_setattr(struct mnt_idmap *idmap, } truncate_setsize(inode, attr->ia_size); hfsplus_file_truncate(inode); - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); } setattr_copy(&nop_mnt_idmap, inode, attr); @@ -298,7 +298,7 @@ int hfsplus_getattr(struct mnt_idmap *idmap, const struct path *path, stat->attributes_mask |= STATX_ATTR_APPEND | STATX_ATTR_IMMUTABLE | STATX_ATTR_NODUMP; - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); return 0; } @@ -392,7 +392,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, struct inode *dir, inode->i_ino = sbi->next_cnid++; inode_init_owner(&nop_mnt_idmap, inode, dir, mode); set_nlink(inode, 1); - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); hip = HFSPLUS_I(inode); INIT_LIST_HEAD(&hip->open_dir_list); @@ -523,7 +523,8 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) inode->i_size = 2 + be32_to_cpu(folder->valence); inode->i_atime = hfsp_mt2ut(folder->access_date); inode->i_mtime = hfsp_mt2ut(folder->content_mod_date); - inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date); + inode_set_ctime_to_ts(inode, + hfsp_mt2ut(folder->attribute_mod_date)); HFSPLUS_I(inode)->create_date = folder->create_date; HFSPLUS_I(inode)->fs_blocks = 0; if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) { @@ -564,7 +565,8 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) } inode->i_atime = hfsp_mt2ut(file->access_date); inode->i_mtime = hfsp_mt2ut(file->content_mod_date); - inode->i_ctime = hfsp_mt2ut(file->attribute_mod_date); + inode_set_ctime_to_ts(inode, + hfsp_mt2ut(file->attribute_mod_date)); HFSPLUS_I(inode)->create_date = file->create_date; } else { pr_err("bad catalog entry used to create inode\n"); @@ -609,7 +611,7 @@ int hfsplus_cat_write_inode(struct inode *inode) hfsplus_cat_set_perms(inode, &folder->permissions); folder->access_date = hfsp_ut2mt(inode->i_atime); folder->content_mod_date = hfsp_ut2mt(inode->i_mtime); - folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime); + folder->attribute_mod_date = hfsp_ut2mt(inode_get_ctime(inode)); folder->valence = cpu_to_be32(inode->i_size - 2); if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) { folder->subfolders = @@ -644,7 +646,7 @@ int hfsplus_cat_write_inode(struct inode *inode) file->flags &= cpu_to_be16(~HFSPLUS_FILE_LOCKED); file->access_date = hfsp_ut2mt(inode->i_atime); file->content_mod_date = hfsp_ut2mt(inode->i_mtime); - file->attribute_mod_date = hfsp_ut2mt(inode->i_ctime); + file->attribute_mod_date = hfsp_ut2mt(inode_get_ctime(inode)); hfs_bnode_write(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_file)); } @@ -700,7 +702,7 @@ int hfsplus_fileattr_set(struct mnt_idmap *idmap, else hip->userflags &= ~HFSPLUS_FLG_NODUMP; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); mark_inode_dirty(inode); return 0; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 46387090eb76..dc5a5cea5fae 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -517,8 +517,7 @@ static int hostfs_inode_update(struct inode *ino, const struct hostfs_stat *st) (struct timespec64){ st->atime.tv_sec, st->atime.tv_nsec }; ino->i_mtime = (struct timespec64){ st->mtime.tv_sec, st->mtime.tv_nsec }; - ino->i_ctime = - (struct timespec64){ st->ctime.tv_sec, st->ctime.tv_nsec }; + inode_set_ctime(ino, st->ctime.tv_sec, st->ctime.tv_nsec); ino->i_size = st->size; ino->i_blocks = st->blocks; return 0; diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index f32f15669996..f36566d61215 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -277,10 +277,10 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned in * inode. */ - if (!result->i_ctime.tv_sec) { - if (!(result->i_ctime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->creation_date)))) - result->i_ctime.tv_sec = 1; - result->i_ctime.tv_nsec = 0; + if (!inode_get_ctime(result).tv_sec) { + time64_t csec = local_to_gmt(dir->i_sb, le32_to_cpu(de->creation_date)); + + inode_set_ctime(result, csec ? csec : 1, 0); result->i_mtime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->write_date)); result->i_mtime.tv_nsec = 0; result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->read_date)); diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index e50e92a42432..479166378bae 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -36,7 +36,7 @@ void hpfs_init_inode(struct inode *i) hpfs_inode->i_rddir_off = NULL; hpfs_inode->i_dirty = 0; - i->i_ctime.tv_sec = i->i_ctime.tv_nsec = 0; + inode_set_ctime(i, 0, 0); i->i_mtime.tv_sec = i->i_mtime.tv_nsec = 0; i->i_atime.tv_sec = i->i_atime.tv_nsec = 0; } @@ -232,7 +232,7 @@ void hpfs_write_inode_nolock(struct inode *i) if (de) { de->write_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_mtime.tv_sec)); de->read_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_atime.tv_sec)); - de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_ctime.tv_sec)); + de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, inode_get_ctime(i).tv_sec)); de->read_only = !(i->i_mode & 0222); de->ea_size = cpu_to_le32(hpfs_inode->i_ea_size); hpfs_mark_4buffers_dirty(&qbh); @@ -242,7 +242,7 @@ void hpfs_write_inode_nolock(struct inode *i) if ((de = map_dirent(i, hpfs_inode->i_dno, "\001\001", 2, NULL, &qbh))) { de->write_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_mtime.tv_sec)); de->read_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_atime.tv_sec)); - de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_ctime.tv_sec)); + de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, inode_get_ctime(i).tv_sec)); de->read_only = !(i->i_mode & 0222); de->ea_size = cpu_to_le32(/*hpfs_inode->i_ea_size*/0); de->file_size = cpu_to_le32(0); diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 69fb40b2c99a..f4eb8d6f5989 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -13,10 +13,9 @@ static void hpfs_update_directory_times(struct inode *dir) { time64_t t = local_to_gmt(dir->i_sb, local_get_seconds(dir->i_sb)); if (t == dir->i_mtime.tv_sec && - t == dir->i_ctime.tv_sec) + t == inode_get_ctime(dir).tv_sec) return; - dir->i_mtime.tv_sec = dir->i_ctime.tv_sec = t; - dir->i_mtime.tv_nsec = dir->i_ctime.tv_nsec = 0; + dir->i_mtime = inode_set_ctime(dir, t, 0); hpfs_write_inode_nolock(dir); } @@ -59,10 +58,8 @@ static int hpfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, result->i_ino = fno; hpfs_i(result)->i_parent_dir = dir->i_ino; hpfs_i(result)->i_dno = dno; - result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)); - result->i_ctime.tv_nsec = 0; - result->i_mtime.tv_nsec = 0; - result->i_atime.tv_nsec = 0; + result->i_mtime = result->i_atime = + inode_set_ctime(result, local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)), 0); hpfs_i(result)->i_ea_size = 0; result->i_mode |= S_IFDIR; result->i_op = &hpfs_dir_iops; @@ -167,10 +164,8 @@ static int hpfs_create(struct mnt_idmap *idmap, struct inode *dir, result->i_fop = &hpfs_file_ops; set_nlink(result, 1); hpfs_i(result)->i_parent_dir = dir->i_ino; - result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)); - result->i_ctime.tv_nsec = 0; - result->i_mtime.tv_nsec = 0; - result->i_atime.tv_nsec = 0; + result->i_mtime = result->i_atime = + inode_set_ctime(result, local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)), 0); hpfs_i(result)->i_ea_size = 0; if (dee.read_only) result->i_mode &= ~0222; @@ -250,10 +245,8 @@ static int hpfs_mknod(struct mnt_idmap *idmap, struct inode *dir, hpfs_init_inode(result); result->i_ino = fno; hpfs_i(result)->i_parent_dir = dir->i_ino; - result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)); - result->i_ctime.tv_nsec = 0; - result->i_mtime.tv_nsec = 0; - result->i_atime.tv_nsec = 0; + result->i_mtime = result->i_atime = + inode_set_ctime(result, local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)), 0); hpfs_i(result)->i_ea_size = 0; result->i_uid = current_fsuid(); result->i_gid = current_fsgid(); @@ -326,10 +319,8 @@ static int hpfs_symlink(struct mnt_idmap *idmap, struct inode *dir, result->i_ino = fno; hpfs_init_inode(result); hpfs_i(result)->i_parent_dir = dir->i_ino; - result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)); - result->i_ctime.tv_nsec = 0; - result->i_mtime.tv_nsec = 0; - result->i_atime.tv_nsec = 0; + result->i_mtime = result->i_atime = + inode_set_ctime(result, local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)), 0); hpfs_i(result)->i_ea_size = 0; result->i_mode = S_IFLNK | 0777; result->i_uid = current_fsuid(); diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 1cb89595b875..758a51564124 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -729,8 +729,9 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) root->i_atime.tv_nsec = 0; root->i_mtime.tv_sec = local_to_gmt(s, le32_to_cpu(de->write_date)); root->i_mtime.tv_nsec = 0; - root->i_ctime.tv_sec = local_to_gmt(s, le32_to_cpu(de->creation_date)); - root->i_ctime.tv_nsec = 0; + inode_set_ctime(root, + local_to_gmt(s, le32_to_cpu(de->creation_date)), + 0); hpfs_i(root)->i_ea_size = le32_to_cpu(de->ea_size); hpfs_i(root)->i_parent_dir = root->i_ino; if (root->i_size == -1) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 7b17ccfa039d..93d3bcfd4fc8 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -887,7 +887,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) i_size_write(inode, offset + len); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); out: inode_unlock(inode); return error; @@ -935,7 +935,7 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb, inode->i_mode = S_IFDIR | ctx->mode; inode->i_uid = ctx->uid; inode->i_gid = ctx->gid; - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); inode->i_op = &hugetlbfs_dir_inode_operations; inode->i_fop = &simple_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ @@ -979,7 +979,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, lockdep_set_class(&inode->i_mapping->i_mmap_rwsem, &hugetlbfs_i_mmap_rwsem_key); inode->i_mapping->a_ops = &hugetlbfs_aops; - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); inode->i_mapping->private_data = resv_map; info->seals = F_SEAL_SEAL; switch (mode & S_IFMT) { @@ -1022,7 +1022,7 @@ static int hugetlbfs_mknod(struct mnt_idmap *idmap, struct inode *dir, inode = hugetlbfs_get_inode(dir->i_sb, dir, mode, dev); if (!inode) return -ENOSPC; - dir->i_ctime = dir->i_mtime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); d_instantiate(dentry, inode); dget(dentry);/* Extra count - pin the dentry in core */ return 0; @@ -1054,7 +1054,7 @@ static int hugetlbfs_tmpfile(struct mnt_idmap *idmap, inode = hugetlbfs_get_inode(dir->i_sb, dir, mode | S_IFREG, 0); if (!inode) return -ENOSPC; - dir->i_ctime = dir->i_mtime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); d_tmpfile(file, inode); return finish_open_simple(file, 0); } @@ -1076,7 +1076,7 @@ static int hugetlbfs_symlink(struct mnt_idmap *idmap, } else iput(inode); } - dir->i_ctime = dir->i_mtime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); return error; } diff --git a/fs/inode.c b/fs/inode.c index 8fefb69e1f84..35fd688168c5 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -16,7 +16,6 @@ #include <linux/fsnotify.h> #include <linux/mount.h> #include <linux/posix_acl.h> -#include <linux/prefetch.h> #include <linux/buffer_head.h> /* for inode_has_buffers */ #include <linux/ratelimit.h> #include <linux/list_lru.h> @@ -752,16 +751,11 @@ EXPORT_SYMBOL_GPL(evict_inodes); /** * invalidate_inodes - attempt to free all inodes on a superblock * @sb: superblock to operate on - * @kill_dirty: flag to guide handling of dirty inodes * - * Attempts to free all inodes for a given superblock. If there were any - * busy inodes return a non-zero value, else zero. - * If @kill_dirty is set, discard dirty inodes too, otherwise treat - * them as busy. + * Attempts to free all inodes (including dirty inodes) for a given superblock. */ -int invalidate_inodes(struct super_block *sb, bool kill_dirty) +void invalidate_inodes(struct super_block *sb) { - int busy = 0; struct inode *inode, *next; LIST_HEAD(dispose); @@ -773,14 +767,8 @@ again: spin_unlock(&inode->i_lock); continue; } - if (inode->i_state & I_DIRTY_ALL && !kill_dirty) { - spin_unlock(&inode->i_lock); - busy = 1; - continue; - } if (atomic_read(&inode->i_count)) { spin_unlock(&inode->i_lock); - busy = 1; continue; } @@ -798,8 +786,6 @@ again: spin_unlock(&sb->s_inode_list_lock); dispose_list(&dispose); - - return busy; } /* @@ -1041,8 +1027,6 @@ struct inode *new_inode(struct super_block *sb) { struct inode *inode; - spin_lock_prefetch(&sb->s_inode_list_lock); - inode = new_inode_pseudo(sb); if (inode) inode_sb_list_add(inode); @@ -1853,6 +1837,7 @@ EXPORT_SYMBOL(bmap); static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, struct timespec64 now) { + struct timespec64 ctime; if (!(mnt->mnt_flags & MNT_RELATIME)) return 1; @@ -1864,7 +1849,8 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, /* * Is ctime younger than or equal to atime? If yes, update atime: */ - if (timespec64_compare(&inode->i_ctime, &inode->i_atime) >= 0) + ctime = inode_get_ctime(inode); + if (timespec64_compare(&ctime, &inode->i_atime) >= 0) return 1; /* @@ -1879,29 +1865,76 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, return 0; } -int generic_update_time(struct inode *inode, struct timespec64 *time, int flags) +/** + * inode_update_timestamps - update the timestamps on the inode + * @inode: inode to be updated + * @flags: S_* flags that needed to be updated + * + * The update_time function is called when an inode's timestamps need to be + * updated for a read or write operation. This function handles updating the + * actual timestamps. It's up to the caller to ensure that the inode is marked + * dirty appropriately. + * + * In the case where any of S_MTIME, S_CTIME, or S_VERSION need to be updated, + * attempt to update all three of them. S_ATIME updates can be handled + * independently of the rest. + * + * Returns a set of S_* flags indicating which values changed. + */ +int inode_update_timestamps(struct inode *inode, int flags) { - int dirty_flags = 0; + int updated = 0; + struct timespec64 now; - if (flags & (S_ATIME | S_CTIME | S_MTIME)) { - if (flags & S_ATIME) - inode->i_atime = *time; - if (flags & S_CTIME) - inode->i_ctime = *time; - if (flags & S_MTIME) - inode->i_mtime = *time; - - if (inode->i_sb->s_flags & SB_LAZYTIME) - dirty_flags |= I_DIRTY_TIME; - else - dirty_flags |= I_DIRTY_SYNC; + if (flags & (S_MTIME|S_CTIME|S_VERSION)) { + struct timespec64 ctime = inode_get_ctime(inode); + + now = inode_set_ctime_current(inode); + if (!timespec64_equal(&now, &ctime)) + updated |= S_CTIME; + if (!timespec64_equal(&now, &inode->i_mtime)) { + inode->i_mtime = now; + updated |= S_MTIME; + } + if (IS_I_VERSION(inode) && inode_maybe_inc_iversion(inode, updated)) + updated |= S_VERSION; + } else { + now = current_time(inode); } - if ((flags & S_VERSION) && inode_maybe_inc_iversion(inode, false)) - dirty_flags |= I_DIRTY_SYNC; + if (flags & S_ATIME) { + if (!timespec64_equal(&now, &inode->i_atime)) { + inode->i_atime = now; + updated |= S_ATIME; + } + } + return updated; +} +EXPORT_SYMBOL(inode_update_timestamps); +/** + * generic_update_time - update the timestamps on the inode + * @inode: inode to be updated + * @flags: S_* flags that needed to be updated + * + * The update_time function is called when an inode's timestamps need to be + * updated for a read or write operation. In the case where any of S_MTIME, S_CTIME, + * or S_VERSION need to be updated we attempt to update all three of them. S_ATIME + * updates can be handled done independently of the rest. + * + * Returns a S_* mask indicating which fields were updated. + */ +int generic_update_time(struct inode *inode, int flags) +{ + int updated = inode_update_timestamps(inode, flags); + int dirty_flags = 0; + + if (updated & (S_ATIME|S_MTIME|S_CTIME)) + dirty_flags = inode->i_sb->s_flags & SB_LAZYTIME ? I_DIRTY_TIME : I_DIRTY_SYNC; + if (updated & S_VERSION) + dirty_flags |= I_DIRTY_SYNC; __mark_inode_dirty(inode, dirty_flags); - return 0; + return updated; } EXPORT_SYMBOL(generic_update_time); @@ -1909,11 +1942,12 @@ EXPORT_SYMBOL(generic_update_time); * This does the actual work of updating an inodes time or version. Must have * had called mnt_want_write() before calling this. */ -int inode_update_time(struct inode *inode, struct timespec64 *time, int flags) +int inode_update_time(struct inode *inode, int flags) { if (inode->i_op->update_time) - return inode->i_op->update_time(inode, time, flags); - return generic_update_time(inode, time, flags); + return inode->i_op->update_time(inode, flags); + generic_update_time(inode, flags); + return 0; } EXPORT_SYMBOL(inode_update_time); @@ -1965,7 +1999,6 @@ void touch_atime(const struct path *path) { struct vfsmount *mnt = path->mnt; struct inode *inode = d_inode(path->dentry); - struct timespec64 now; if (!atime_needs_update(path, inode)) return; @@ -1984,8 +2017,7 @@ void touch_atime(const struct path *path) * We may also fail on filesystems that have the ability to make parts * of the fs read only, e.g. subvolumes in Btrfs. */ - now = current_time(inode); - inode_update_time(inode, &now, S_ATIME); + inode_update_time(inode, S_ATIME); __mnt_drop_write(mnt); skip_update: sb_end_write(inode->i_sb); @@ -2070,18 +2102,63 @@ int file_remove_privs(struct file *file) } EXPORT_SYMBOL(file_remove_privs); -static int inode_needs_update_time(struct inode *inode, struct timespec64 *now) +/** + * current_mgtime - Return FS time (possibly fine-grained) + * @inode: inode. + * + * Return the current time truncated to the time granularity supported by + * the fs, as suitable for a ctime/mtime change. If the ctime is flagged + * as having been QUERIED, get a fine-grained timestamp. + */ +struct timespec64 current_mgtime(struct inode *inode) +{ + struct timespec64 now, ctime; + atomic_long_t *pnsec = (atomic_long_t *)&inode->__i_ctime.tv_nsec; + long nsec = atomic_long_read(pnsec); + + if (nsec & I_CTIME_QUERIED) { + ktime_get_real_ts64(&now); + return timestamp_truncate(now, inode); + } + + ktime_get_coarse_real_ts64(&now); + now = timestamp_truncate(now, inode); + + /* + * If we've recently fetched a fine-grained timestamp + * then the coarse-grained one may still be earlier than the + * existing ctime. Just keep the existing value if so. + */ + ctime = inode_get_ctime(inode); + if (timespec64_compare(&ctime, &now) > 0) + now = ctime; + + return now; +} +EXPORT_SYMBOL(current_mgtime); + +static struct timespec64 current_ctime(struct inode *inode) +{ + if (is_mgtime(inode)) + return current_mgtime(inode); + return current_time(inode); +} + +static int inode_needs_update_time(struct inode *inode) { int sync_it = 0; + struct timespec64 now = current_ctime(inode); + struct timespec64 ctime; /* First try to exhaust all avenues to not sync */ if (IS_NOCMTIME(inode)) return 0; - if (!timespec64_equal(&inode->i_mtime, now)) + if (!timespec64_equal(&inode->i_mtime, &now)) sync_it = S_MTIME; - if (!timespec64_equal(&inode->i_ctime, now)) + ctime = inode_get_ctime(inode); + if (!timespec64_equal(&ctime, &now)) sync_it |= S_CTIME; if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode)) @@ -2090,15 +2167,14 @@ static int inode_needs_update_time(struct inode *inode, struct timespec64 *now) return sync_it; } -static int __file_update_time(struct file *file, struct timespec64 *now, - int sync_mode) +static int __file_update_time(struct file *file, int sync_mode) { int ret = 0; struct inode *inode = file_inode(file); /* try to update time settings */ if (!__mnt_want_write_file(file)) { - ret = inode_update_time(inode, now, sync_mode); + ret = inode_update_time(inode, sync_mode); __mnt_drop_write_file(file); } @@ -2123,13 +2199,12 @@ int file_update_time(struct file *file) { int ret; struct inode *inode = file_inode(file); - struct timespec64 now = current_time(inode); - ret = inode_needs_update_time(inode, &now); + ret = inode_needs_update_time(inode); if (ret <= 0) return ret; - return __file_update_time(file, &now, ret); + return __file_update_time(file, ret); } EXPORT_SYMBOL(file_update_time); @@ -2152,7 +2227,6 @@ static int file_modified_flags(struct file *file, int flags) { int ret; struct inode *inode = file_inode(file); - struct timespec64 now = current_time(inode); /* * Clear the security bits if the process is not being run by root. @@ -2165,13 +2239,13 @@ static int file_modified_flags(struct file *file, int flags) if (unlikely(file->f_mode & FMODE_NOCMTIME)) return 0; - ret = inode_needs_update_time(inode, &now); + ret = inode_needs_update_time(inode); if (ret <= 0) return ret; if (flags & IOCB_NOWAIT) return -EAGAIN; - return __file_update_time(file, &now, ret); + return __file_update_time(file, ret); } /** @@ -2491,15 +2565,59 @@ struct timespec64 current_time(struct inode *inode) struct timespec64 now; ktime_get_coarse_real_ts64(&now); + return timestamp_truncate(now, inode); +} +EXPORT_SYMBOL(current_time); - if (unlikely(!inode->i_sb)) { - WARN(1, "current_time() called with uninitialized super_block in the inode"); +/** + * inode_set_ctime_current - set the ctime to current_time + * @inode: inode + * + * Set the inode->i_ctime to the current value for the inode. Returns + * the current value that was assigned to i_ctime. + */ +struct timespec64 inode_set_ctime_current(struct inode *inode) +{ + struct timespec64 now; + struct timespec64 ctime; + + ctime.tv_nsec = READ_ONCE(inode->__i_ctime.tv_nsec); + if (!(ctime.tv_nsec & I_CTIME_QUERIED)) { + now = current_time(inode); + + /* Just copy it into place if it's not multigrain */ + if (!is_mgtime(inode)) { + inode_set_ctime_to_ts(inode, now); + return now; + } + + /* + * If we've recently updated with a fine-grained timestamp, + * then the coarse-grained one may still be earlier than the + * existing ctime. Just keep the existing value if so. + */ + ctime.tv_sec = inode->__i_ctime.tv_sec; + if (timespec64_compare(&ctime, &now) > 0) + return ctime; + + /* + * Ctime updates are usually protected by the inode_lock, but + * we can still race with someone setting the QUERIED flag. + * Try to swap the new nsec value into place. If it's changed + * in the interim, then just go with a fine-grained timestamp. + */ + if (cmpxchg(&inode->__i_ctime.tv_nsec, ctime.tv_nsec, + now.tv_nsec) != ctime.tv_nsec) + goto fine_grained; + inode->__i_ctime.tv_sec = now.tv_sec; return now; } - - return timestamp_truncate(now, inode); +fine_grained: + ktime_get_real_ts64(&now); + inode_set_ctime_to_ts(inode, timestamp_truncate(now, inode)); + return now; } -EXPORT_SYMBOL(current_time); +EXPORT_SYMBOL(inode_set_ctime_current); /** * in_group_or_capable - check whether caller is CAP_FSETID privileged diff --git a/fs/internal.h b/fs/internal.h index f7a3dc111026..74d3b161dd2c 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -115,7 +115,7 @@ static inline void put_file_access(struct file *file) * super.c */ extern int reconfigure_super(struct fs_context *); -extern bool trylock_super(struct super_block *sb); +extern bool super_trylock_shared(struct super_block *sb); struct super_block *user_get_super(dev_t, bool excl); void put_super(struct super_block *sb); extern bool mount_capable(struct fs_context *); @@ -201,7 +201,7 @@ void lock_two_inodes(struct inode *inode1, struct inode *inode2, * fs-writeback.c */ extern long get_nr_dirty_inodes(void); -extern int invalidate_inodes(struct super_block *, bool); +void invalidate_inodes(struct super_block *sb); /* * dcache.c diff --git a/fs/ioctl.c b/fs/ioctl.c index 5b2481cd4750..f5fd99d6b0d4 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -109,9 +109,6 @@ static int ioctl_fibmap(struct file *filp, int __user *p) * Returns 0 on success, -errno on error, 1 if this was the last * extent that will fit in user array. */ -#define SET_UNKNOWN_FLAGS (FIEMAP_EXTENT_DELALLOC) -#define SET_NO_UNMOUNTED_IO_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED) -#define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE) int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical, u64 phys, u64 len, u32 flags) { @@ -127,6 +124,10 @@ int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical, if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max) return 1; +#define SET_UNKNOWN_FLAGS (FIEMAP_EXTENT_DELALLOC) +#define SET_NO_UNMOUNTED_IO_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED) +#define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE) + if (flags & SET_UNKNOWN_FLAGS) flags |= FIEMAP_EXTENT_UNKNOWN; if (flags & SET_NO_UNMOUNTED_IO_FLAGS) @@ -396,8 +397,8 @@ static int ioctl_fsfreeze(struct file *filp) /* Freeze */ if (sb->s_op->freeze_super) - return sb->s_op->freeze_super(sb); - return freeze_super(sb); + return sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE); + return freeze_super(sb, FREEZE_HOLDER_USERSPACE); } static int ioctl_fsthaw(struct file *filp) @@ -409,8 +410,8 @@ static int ioctl_fsthaw(struct file *filp) /* Thaw */ if (sb->s_op->thaw_super) - return sb->s_op->thaw_super(sb); - return thaw_super(sb); + return sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE); + return thaw_super(sb, FREEZE_HOLDER_USERSPACE); } static int ioctl_file_dedupe_range(struct file *file, @@ -877,6 +878,9 @@ out: #ifdef CONFIG_COMPAT /** * compat_ptr_ioctl - generic implementation of .compat_ioctl file operation + * @file: The file to operate on. + * @cmd: The ioctl command number. + * @arg: The argument to the ioctl. * * This is not normally called as a function, but instead set in struct * file_operations as diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index adb92cdb24b0..aa8967cca1a3 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -872,10 +872,10 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, while ((ret = iomap_iter(&iter, ops)) > 0) iter.processed = iomap_write_iter(&iter, i); - if (unlikely(ret < 0)) + if (unlikely(iter.pos == iocb->ki_pos)) return ret; ret = iter.pos - iocb->ki_pos; - iocb->ki_pos += ret; + iocb->ki_pos = iter.pos; return ret; } EXPORT_SYMBOL_GPL(iomap_file_buffered_write); diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index df9d70588b60..2ee21286ac8f 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -1422,13 +1422,8 @@ static int isofs_read_inode(struct inode *inode, int relocated) inode->i_ino, de->flags[-high_sierra]); } #endif - - inode->i_mtime.tv_sec = - inode->i_atime.tv_sec = - inode->i_ctime.tv_sec = iso_date(de->date, high_sierra); - inode->i_mtime.tv_nsec = - inode->i_atime.tv_nsec = - inode->i_ctime.tv_nsec = 0; + inode->i_mtime = inode->i_atime = + inode_set_ctime(inode, iso_date(de->date, high_sierra), 0); ei->i_first_extent = (isonum_733(de->extent) + isonum_711(de->ext_attr_length)); diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index 48f58c6c9e69..348783a70f57 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -421,10 +421,9 @@ repeat: /* Rock ridge never appears on a High Sierra disk */ cnt = 0; if (rr->u.TF.flags & TF_CREATE) { - inode->i_ctime.tv_sec = - iso_date(rr->u.TF.times[cnt++].time, - 0); - inode->i_ctime.tv_nsec = 0; + inode_set_ctime(inode, + iso_date(rr->u.TF.times[cnt++].time, 0), + 0); } if (rr->u.TF.flags & TF_MODIFY) { inode->i_mtime.tv_sec = @@ -439,10 +438,9 @@ repeat: inode->i_atime.tv_nsec = 0; } if (rr->u.TF.flags & TF_ATTRIBUTES) { - inode->i_ctime.tv_sec = - iso_date(rr->u.TF.times[cnt++].time, - 0); - inode->i_ctime.tv_nsec = 0; + inode_set_ctime(inode, + iso_date(rr->u.TF.times[cnt++].time, 0), + 0); } break; case SIG('S', 'L'): @@ -534,7 +532,7 @@ repeat: inode->i_size = reloc->i_size; inode->i_blocks = reloc->i_blocks; inode->i_atime = reloc->i_atime; - inode->i_ctime = reloc->i_ctime; + inode_set_ctime_to_ts(inode, inode_get_ctime(reloc)); inode->i_mtime = reloc->i_mtime; iput(reloc); break; diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 51bd38da21cd..9ec91017a7f3 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -27,7 +27,7 @@ * * Called with j_list_lock held. */ -static inline void __buffer_unlink_first(struct journal_head *jh) +static inline void __buffer_unlink(struct journal_head *jh) { transaction_t *transaction = jh->b_cp_transaction; @@ -41,45 +41,6 @@ static inline void __buffer_unlink_first(struct journal_head *jh) } /* - * Unlink a buffer from a transaction checkpoint(io) list. - * - * Called with j_list_lock held. - */ -static inline void __buffer_unlink(struct journal_head *jh) -{ - transaction_t *transaction = jh->b_cp_transaction; - - __buffer_unlink_first(jh); - if (transaction->t_checkpoint_io_list == jh) { - transaction->t_checkpoint_io_list = jh->b_cpnext; - if (transaction->t_checkpoint_io_list == jh) - transaction->t_checkpoint_io_list = NULL; - } -} - -/* - * Move a buffer from the checkpoint list to the checkpoint io list - * - * Called with j_list_lock held - */ -static inline void __buffer_relink_io(struct journal_head *jh) -{ - transaction_t *transaction = jh->b_cp_transaction; - - __buffer_unlink_first(jh); - - if (!transaction->t_checkpoint_io_list) { - jh->b_cpnext = jh->b_cpprev = jh; - } else { - jh->b_cpnext = transaction->t_checkpoint_io_list; - jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev; - jh->b_cpprev->b_cpnext = jh; - jh->b_cpnext->b_cpprev = jh; - } - transaction->t_checkpoint_io_list = jh; -} - -/* * Check a checkpoint buffer could be release or not. * * Requires j_list_lock @@ -183,6 +144,7 @@ __flush_batch(journal_t *journal, int *batch_count) struct buffer_head *bh = journal->j_chkpt_bhs[i]; BUFFER_TRACE(bh, "brelse"); __brelse(bh); + journal->j_chkpt_bhs[i] = NULL; } *batch_count = 0; } @@ -242,15 +204,6 @@ restart: jh = transaction->t_checkpoint_list; bh = jh2bh(jh); - if (buffer_locked(bh)) { - get_bh(bh); - spin_unlock(&journal->j_list_lock); - wait_on_buffer(bh); - /* the journal_head may have gone by now */ - BUFFER_TRACE(bh, "brelse"); - __brelse(bh); - goto retry; - } if (jh->b_transaction != NULL) { transaction_t *t = jh->b_transaction; tid_t tid = t->t_tid; @@ -285,30 +238,50 @@ restart: spin_lock(&journal->j_list_lock); goto restart; } - if (!buffer_dirty(bh)) { + if (!trylock_buffer(bh)) { + /* + * The buffer is locked, it may be writing back, or + * flushing out in the last couple of cycles, or + * re-adding into a new transaction, need to check + * it again until it's unlocked. + */ + get_bh(bh); + spin_unlock(&journal->j_list_lock); + wait_on_buffer(bh); + /* the journal_head may have gone by now */ + BUFFER_TRACE(bh, "brelse"); + __brelse(bh); + goto retry; + } else if (!buffer_dirty(bh)) { + unlock_buffer(bh); BUFFER_TRACE(bh, "remove from checkpoint"); - if (__jbd2_journal_remove_checkpoint(jh)) - /* The transaction was released; we're done */ + /* + * If the transaction was released or the checkpoint + * list was empty, we're done. + */ + if (__jbd2_journal_remove_checkpoint(jh) || + !transaction->t_checkpoint_list) goto out; - continue; + } else { + unlock_buffer(bh); + /* + * We are about to write the buffer, it could be + * raced by some other transaction shrink or buffer + * re-log logic once we release the j_list_lock, + * leave it on the checkpoint list and check status + * again to make sure it's clean. + */ + BUFFER_TRACE(bh, "queue"); + get_bh(bh); + J_ASSERT_BH(bh, !buffer_jwrite(bh)); + journal->j_chkpt_bhs[batch_count++] = bh; + transaction->t_chp_stats.cs_written++; + transaction->t_checkpoint_list = jh->b_cpnext; } - /* - * Important: we are about to write the buffer, and - * possibly block, while still holding the journal - * lock. We cannot afford to let the transaction - * logic start messing around with this buffer before - * we write it to disk, as that would break - * recoverability. - */ - BUFFER_TRACE(bh, "queue"); - get_bh(bh); - J_ASSERT_BH(bh, !buffer_jwrite(bh)); - journal->j_chkpt_bhs[batch_count++] = bh; - __buffer_relink_io(jh); - transaction->t_chp_stats.cs_written++; + if ((batch_count == JBD2_NR_BATCH) || - need_resched() || - spin_needbreak(&journal->j_list_lock)) + need_resched() || spin_needbreak(&journal->j_list_lock) || + jh2bh(transaction->t_checkpoint_list) == journal->j_chkpt_bhs[0]) goto unlock_and_flush; } @@ -322,38 +295,6 @@ restart: goto restart; } - /* - * Now we issued all of the transaction's buffers, let's deal - * with the buffers that are out for I/O. - */ -restart2: - /* Did somebody clean up the transaction in the meanwhile? */ - if (journal->j_checkpoint_transactions != transaction || - transaction->t_tid != this_tid) - goto out; - - while (transaction->t_checkpoint_io_list) { - jh = transaction->t_checkpoint_io_list; - bh = jh2bh(jh); - if (buffer_locked(bh)) { - get_bh(bh); - spin_unlock(&journal->j_list_lock); - wait_on_buffer(bh); - /* the journal_head may have gone by now */ - BUFFER_TRACE(bh, "brelse"); - __brelse(bh); - spin_lock(&journal->j_list_lock); - goto restart2; - } - - /* - * Now in whatever state the buffer currently is, we - * know that it has been written out and so we can - * drop it from the list - */ - if (__jbd2_journal_remove_checkpoint(jh)) - break; - } out: spin_unlock(&journal->j_list_lock); result = jbd2_cleanup_journal_tail(journal); @@ -409,49 +350,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal) /* Checkpoint list management */ /* - * journal_clean_one_cp_list - * - * Find all the written-back checkpoint buffers in the given list and - * release them. If 'destroy' is set, clean all buffers unconditionally. - * - * Called with j_list_lock held. - * Returns 1 if we freed the transaction, 0 otherwise. - */ -static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy) -{ - struct journal_head *last_jh; - struct journal_head *next_jh = jh; - - if (!jh) - return 0; - - last_jh = jh->b_cpprev; - do { - jh = next_jh; - next_jh = jh->b_cpnext; - - if (!destroy && __cp_buffer_busy(jh)) - return 0; - - if (__jbd2_journal_remove_checkpoint(jh)) - return 1; - /* - * This function only frees up some memory - * if possible so we dont have an obligation - * to finish processing. Bail out if preemption - * requested: - */ - if (need_resched()) - return 0; - } while (jh != last_jh); - - return 0; -} - -/* * journal_shrink_one_cp_list * - * Find 'nr_to_scan' written-back checkpoint buffers in the given list + * Find all the written-back checkpoint buffers in the given list * and try to release them. If the whole transaction is released, set * the 'released' parameter. Return the number of released checkpointed * buffers. @@ -459,15 +360,15 @@ static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy) * Called with j_list_lock held. */ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh, - unsigned long *nr_to_scan, - bool *released) + bool destroy, bool *released) { struct journal_head *last_jh; struct journal_head *next_jh = jh; unsigned long nr_freed = 0; int ret; - if (!jh || *nr_to_scan == 0) + *released = false; + if (!jh) return 0; last_jh = jh->b_cpprev; @@ -475,12 +376,15 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh, jh = next_jh; next_jh = jh->b_cpnext; - (*nr_to_scan)--; - if (__cp_buffer_busy(jh)) - continue; + if (destroy) { + ret = __jbd2_journal_remove_checkpoint(jh); + } else { + ret = jbd2_journal_try_remove_checkpoint(jh); + if (ret < 0) + continue; + } nr_freed++; - ret = __jbd2_journal_remove_checkpoint(jh); if (ret) { *released = true; break; @@ -488,7 +392,7 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh, if (need_resched()) break; - } while (jh != last_jh && *nr_to_scan); + } while (jh != last_jh); return nr_freed; } @@ -506,11 +410,11 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, unsigned long *nr_to_scan) { transaction_t *transaction, *last_transaction, *next_transaction; - bool released; + bool __maybe_unused released; tid_t first_tid = 0, last_tid = 0, next_tid = 0; tid_t tid = 0; unsigned long nr_freed = 0; - unsigned long nr_scanned = *nr_to_scan; + unsigned long freed; again: spin_lock(&journal->j_list_lock); @@ -539,19 +443,11 @@ again: transaction = next_transaction; next_transaction = transaction->t_cpnext; tid = transaction->t_tid; - released = false; - - nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_list, - nr_to_scan, &released); - if (*nr_to_scan == 0) - break; - if (need_resched() || spin_needbreak(&journal->j_list_lock)) - break; - if (released) - continue; - nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_io_list, - nr_to_scan, &released); + freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list, + false, &released); + nr_freed += freed; + (*nr_to_scan) -= min(*nr_to_scan, freed); if (*nr_to_scan == 0) break; if (need_resched() || spin_needbreak(&journal->j_list_lock)) @@ -572,9 +468,8 @@ again: if (*nr_to_scan && next_tid) goto again; out: - nr_scanned -= *nr_to_scan; trace_jbd2_shrink_checkpoint_list(journal, first_tid, tid, last_tid, - nr_freed, nr_scanned, next_tid); + nr_freed, next_tid); return nr_freed; } @@ -590,7 +485,7 @@ out: void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy) { transaction_t *transaction, *last_transaction, *next_transaction; - int ret; + bool released; transaction = journal->j_checkpoint_transactions; if (!transaction) @@ -601,8 +496,8 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy) do { transaction = next_transaction; next_transaction = transaction->t_cpnext; - ret = journal_clean_one_cp_list(transaction->t_checkpoint_list, - destroy); + journal_shrink_one_cp_list(transaction->t_checkpoint_list, + destroy, &released); /* * This function only frees up some memory if possible so we * dont have an obligation to finish processing. Bail out if @@ -610,23 +505,12 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy) */ if (need_resched()) return; - if (ret) - continue; - /* - * It is essential that we are as careful as in the case of - * t_checkpoint_list with removing the buffer from the list as - * we can possibly see not yet submitted buffers on io_list - */ - ret = journal_clean_one_cp_list(transaction-> - t_checkpoint_io_list, destroy); - if (need_resched()) - return; /* * Stop scanning if we couldn't free the transaction. This * avoids pointless scanning of transactions which still * weren't checkpointed. */ - if (!ret) + if (!released) return; } while (transaction != last_transaction); } @@ -705,7 +589,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) jbd2_journal_put_journal_head(jh); /* Is this transaction empty? */ - if (transaction->t_checkpoint_list || transaction->t_checkpoint_io_list) + if (transaction->t_checkpoint_list) return 0; /* @@ -737,6 +621,34 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) } /* + * Check the checkpoint buffer and try to remove it from the checkpoint + * list if it's clean. Returns -EBUSY if it is not clean, returns 1 if + * it frees the transaction, 0 otherwise. + * + * This function is called with j_list_lock held. + */ +int jbd2_journal_try_remove_checkpoint(struct journal_head *jh) +{ + struct buffer_head *bh = jh2bh(jh); + + if (!trylock_buffer(bh)) + return -EBUSY; + if (buffer_dirty(bh)) { + unlock_buffer(bh); + return -EBUSY; + } + unlock_buffer(bh); + + /* + * Buffer is clean and the IO has finished (we held the buffer + * lock) so the checkpoint is done. We can safely remove the + * buffer from this transaction. + */ + JBUFFER_TRACE(jh, "remove from checkpoint list"); + return __jbd2_journal_remove_checkpoint(jh); +} + +/* * journal_insert_checkpoint: put a committed buffer onto a checkpoint * list so that we know when it is safe to clean the transaction out of * the log. @@ -797,7 +709,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact J_ASSERT(transaction->t_forget == NULL); J_ASSERT(transaction->t_shadow_list == NULL); J_ASSERT(transaction->t_checkpoint_list == NULL); - J_ASSERT(transaction->t_checkpoint_io_list == NULL); J_ASSERT(atomic_read(&transaction->t_updates) == 0); J_ASSERT(journal->j_committing_transaction != transaction); J_ASSERT(journal->j_running_transaction != transaction); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index b33155dd7001..1073259902a6 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -1141,8 +1141,7 @@ restart_loop: spin_lock(&journal->j_list_lock); commit_transaction->t_state = T_FINISHED; /* Check if the transaction can be dropped now that we are finished */ - if (commit_transaction->t_checkpoint_list == NULL && - commit_transaction->t_checkpoint_io_list == NULL) { + if (commit_transaction->t_checkpoint_list == NULL) { __jbd2_journal_drop_transaction(journal, commit_transaction); jbd2_journal_free_transaction(commit_transaction); } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 18611241f451..4d1fda1f7143 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1784,8 +1784,7 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh) * Otherwise, if the buffer has been written to disk, * it is safe to remove the checkpoint and drop it. */ - if (!buffer_dirty(bh)) { - __jbd2_journal_remove_checkpoint(jh); + if (jbd2_journal_try_remove_checkpoint(jh) >= 0) { spin_unlock(&journal->j_list_lock); goto drop; } @@ -2100,35 +2099,6 @@ void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) __brelse(bh); } -/* - * Called from jbd2_journal_try_to_free_buffers(). - * - * Called under jh->b_state_lock - */ -static void -__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) -{ - struct journal_head *jh; - - jh = bh2jh(bh); - - if (buffer_locked(bh) || buffer_dirty(bh)) - goto out; - - if (jh->b_next_transaction != NULL || jh->b_transaction != NULL) - goto out; - - spin_lock(&journal->j_list_lock); - if (jh->b_cp_transaction != NULL) { - /* written-back checkpointed metadata buffer */ - JBUFFER_TRACE(jh, "remove from checkpoint list"); - __jbd2_journal_remove_checkpoint(jh); - } - spin_unlock(&journal->j_list_lock); -out: - return; -} - /** * jbd2_journal_try_to_free_buffers() - try to free page buffers. * @journal: journal for operation @@ -2186,7 +2156,13 @@ bool jbd2_journal_try_to_free_buffers(journal_t *journal, struct folio *folio) continue; spin_lock(&jh->b_state_lock); - __journal_try_to_free_buffer(journal, bh); + if (!jh->b_transaction && !jh->b_next_transaction) { + spin_lock(&journal->j_list_lock); + /* Remove written-back checkpointed metadata buffer */ + if (jh->b_cp_transaction != NULL) + jbd2_journal_try_remove_checkpoint(jh); + spin_unlock(&journal->j_list_lock); + } spin_unlock(&jh->b_state_lock); jbd2_journal_put_journal_head(jh); if (buffer_jbd(bh)) diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 5075a0a6d594..091ab0eaabbe 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -204,7 +204,8 @@ static int jffs2_create(struct mnt_idmap *idmap, struct inode *dir_i, if (ret) goto fail; - dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(ri->ctime)); + dir_i->i_mtime = inode_set_ctime_to_ts(dir_i, + ITIME(je32_to_cpu(ri->ctime))); jffs2_free_raw_inode(ri); @@ -237,7 +238,7 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry) if (dead_f->inocache) set_nlink(d_inode(dentry), dead_f->inocache->pino_nlink); if (!ret) - dir_i->i_mtime = dir_i->i_ctime = ITIME(now); + dir_i->i_mtime = inode_set_ctime_to_ts(dir_i, ITIME(now)); return ret; } /***********************************************************************/ @@ -271,7 +272,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de set_nlink(d_inode(old_dentry), ++f->inocache->pino_nlink); mutex_unlock(&f->sem); d_instantiate(dentry, d_inode(old_dentry)); - dir_i->i_mtime = dir_i->i_ctime = ITIME(now); + dir_i->i_mtime = inode_set_ctime_to_ts(dir_i, ITIME(now)); ihold(d_inode(old_dentry)); } return ret; @@ -422,7 +423,8 @@ static int jffs2_symlink (struct mnt_idmap *idmap, struct inode *dir_i, goto fail; } - dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime)); + dir_i->i_mtime = inode_set_ctime_to_ts(dir_i, + ITIME(je32_to_cpu(rd->mctime))); jffs2_free_raw_dirent(rd); @@ -566,7 +568,8 @@ static int jffs2_mkdir (struct mnt_idmap *idmap, struct inode *dir_i, goto fail; } - dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime)); + dir_i->i_mtime = inode_set_ctime_to_ts(dir_i, + ITIME(je32_to_cpu(rd->mctime))); inc_nlink(dir_i); jffs2_free_raw_dirent(rd); @@ -607,7 +610,7 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry) ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name, dentry->d_name.len, f, now); if (!ret) { - dir_i->i_mtime = dir_i->i_ctime = ITIME(now); + dir_i->i_mtime = inode_set_ctime_to_ts(dir_i, ITIME(now)); clear_nlink(d_inode(dentry)); drop_nlink(dir_i); } @@ -743,7 +746,8 @@ static int jffs2_mknod (struct mnt_idmap *idmap, struct inode *dir_i, goto fail; } - dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime)); + dir_i->i_mtime = inode_set_ctime_to_ts(dir_i, + ITIME(je32_to_cpu(rd->mctime))); jffs2_free_raw_dirent(rd); @@ -864,14 +868,16 @@ static int jffs2_rename (struct mnt_idmap *idmap, * caller won't do it on its own since we are returning an error. */ d_invalidate(new_dentry); - new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now); + new_dir_i->i_mtime = inode_set_ctime_to_ts(new_dir_i, + ITIME(now)); return ret; } if (d_is_dir(old_dentry)) drop_nlink(old_dir_i); - new_dir_i->i_mtime = new_dir_i->i_ctime = old_dir_i->i_mtime = old_dir_i->i_ctime = ITIME(now); + old_dir_i->i_mtime = inode_set_ctime_to_ts(old_dir_i, ITIME(now)); + new_dir_i->i_mtime = inode_set_ctime_to_ts(new_dir_i, ITIME(now)); return 0; } diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 2345ca3f09ee..11c66793960e 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -317,7 +317,8 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping, inode->i_size = pos + writtenlen; inode->i_blocks = (inode->i_size + 511) >> 9; - inode->i_ctime = inode->i_mtime = ITIME(je32_to_cpu(ri->ctime)); + inode->i_mtime = inode_set_ctime_to_ts(inode, + ITIME(je32_to_cpu(ri->ctime))); } } diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 038516bee1ab..0403efab4089 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -115,7 +115,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr) ri->isize = cpu_to_je32((ivalid & ATTR_SIZE)?iattr->ia_size:inode->i_size); ri->atime = cpu_to_je32(I_SEC((ivalid & ATTR_ATIME)?iattr->ia_atime:inode->i_atime)); ri->mtime = cpu_to_je32(I_SEC((ivalid & ATTR_MTIME)?iattr->ia_mtime:inode->i_mtime)); - ri->ctime = cpu_to_je32(I_SEC((ivalid & ATTR_CTIME)?iattr->ia_ctime:inode->i_ctime)); + ri->ctime = cpu_to_je32(I_SEC((ivalid & ATTR_CTIME)?iattr->ia_ctime:inode_get_ctime(inode))); ri->offset = cpu_to_je32(0); ri->csize = ri->dsize = cpu_to_je32(mdatalen); @@ -148,7 +148,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr) } /* It worked. Update the inode */ inode->i_atime = ITIME(je32_to_cpu(ri->atime)); - inode->i_ctime = ITIME(je32_to_cpu(ri->ctime)); + inode_set_ctime_to_ts(inode, ITIME(je32_to_cpu(ri->ctime))); inode->i_mtime = ITIME(je32_to_cpu(ri->mtime)); inode->i_mode = jemode_to_cpu(ri->mode); i_uid_write(inode, je16_to_cpu(ri->uid)); @@ -284,7 +284,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino) inode->i_size = je32_to_cpu(latest_node.isize); inode->i_atime = ITIME(je32_to_cpu(latest_node.atime)); inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime)); - inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime)); + inode_set_ctime_to_ts(inode, ITIME(je32_to_cpu(latest_node.ctime))); set_nlink(inode, f->inocache->pino_nlink); @@ -388,7 +388,7 @@ void jffs2_dirty_inode(struct inode *inode, int flags) iattr.ia_gid = inode->i_gid; iattr.ia_atime = inode->i_atime; iattr.ia_mtime = inode->i_mtime; - iattr.ia_ctime = inode->i_ctime; + iattr.ia_ctime = inode_get_ctime(inode); jffs2_do_setattr(inode, &iattr); } @@ -475,7 +475,7 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r inode->i_mode = jemode_to_cpu(ri->mode); i_gid_write(inode, je16_to_cpu(ri->gid)); i_uid_write(inode, je16_to_cpu(ri->uid)); - inode->i_atime = inode->i_ctime = inode->i_mtime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); ri->atime = ri->mtime = ri->ctime = cpu_to_je32(I_SEC(inode->i_mtime)); inode->i_blocks = 0; diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 8da19766c101..50727a1ff931 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -35,7 +35,7 @@ struct kvec; #define ITIME(sec) ((struct timespec64){sec, 0}) #define JFFS2_NOW() JFFS2_CLAMP_TIME(ktime_get_real_seconds()) #define I_SEC(tv) JFFS2_CLAMP_TIME((tv).tv_sec) -#define JFFS2_F_I_CTIME(f) I_SEC(OFNI_EDONI_2SFFJ(f)->i_ctime) +#define JFFS2_F_I_CTIME(f) I_SEC(inode_get_ctime(OFNI_EDONI_2SFFJ(f))) #define JFFS2_F_I_MTIME(f) I_SEC(OFNI_EDONI_2SFFJ(f)->i_mtime) #define JFFS2_F_I_ATIME(f) I_SEC(OFNI_EDONI_2SFFJ(f)->i_atime) #define sleep_on_spinunlock(wq, s) \ diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index fb96f872d207..1de3602c98de 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -116,7 +116,7 @@ int jfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, if (!rc) { if (update_mode) { inode->i_mode = mode; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); mark_inode_dirty(inode); } rc = txCommit(tid, 1, &inode, 0); diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 8ac10e396050..920d58a1566b 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -393,7 +393,7 @@ void jfs_truncate_nolock(struct inode *ip, loff_t length) break; } - ip->i_mtime = ip->i_ctime = current_time(ip); + ip->i_mtime = inode_set_ctime_current(ip); mark_inode_dirty(ip); txCommit(tid, 1, &ip, 0); diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index ed7989bc2db1..f7bd7e8f5be4 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -96,7 +96,7 @@ int jfs_fileattr_set(struct mnt_idmap *idmap, jfs_inode->mode2 = flags; jfs_set_inode_flags(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); mark_inode_dirty(inode); return 0; diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 390cbfce391f..a40383aa6c84 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -3064,8 +3064,8 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip) ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec); ip->i_mtime.tv_sec = le32_to_cpu(dip->di_mtime.tv_sec); ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec); - ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec); - ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec); + inode_set_ctime(ip, le32_to_cpu(dip->di_ctime.tv_sec), + le32_to_cpu(dip->di_ctime.tv_nsec)); ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks)); ip->i_generation = le32_to_cpu(dip->di_gen); @@ -3139,8 +3139,8 @@ static void copy_to_dinode(struct dinode * dip, struct inode *ip) dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec); dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec); - dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec); - dip->di_ctime.tv_nsec = cpu_to_le32(ip->i_ctime.tv_nsec); + dip->di_ctime.tv_sec = cpu_to_le32(inode_get_ctime(ip).tv_sec); + dip->di_ctime.tv_nsec = cpu_to_le32(inode_get_ctime(ip).tv_nsec); dip->di_mtime.tv_sec = cpu_to_le32(ip->i_mtime.tv_sec); dip->di_mtime.tv_nsec = cpu_to_le32(ip->i_mtime.tv_nsec); dip->di_ixpxd = jfs_ip->ixpxd; /* in-memory pxd's are little-endian */ diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 9e1f02767201..87594efa7f7c 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -97,8 +97,8 @@ struct inode *ialloc(struct inode *parent, umode_t mode) jfs_inode->mode2 |= inode->i_mode; inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); - jfs_inode->otime = inode->i_ctime.tv_sec; + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); + jfs_inode->otime = inode_get_ctime(inode).tv_sec; inode->i_generation = JFS_SBI(sb)->gengen++; jfs_inode->cflag = 0; diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 9b030297aa64..029d47065600 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -149,7 +149,7 @@ static int jfs_create(struct mnt_idmap *idmap, struct inode *dip, mark_inode_dirty(ip); - dip->i_ctime = dip->i_mtime = current_time(dip); + dip->i_mtime = inode_set_ctime_current(dip); mark_inode_dirty(dip); @@ -284,7 +284,7 @@ static int jfs_mkdir(struct mnt_idmap *idmap, struct inode *dip, /* update parent directory inode */ inc_nlink(dip); /* for '..' from child directory */ - dip->i_ctime = dip->i_mtime = current_time(dip); + dip->i_mtime = inode_set_ctime_current(dip); mark_inode_dirty(dip); rc = txCommit(tid, 2, &iplist[0], 0); @@ -390,7 +390,7 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) /* update parent directory's link count corresponding * to ".." entry of the target directory deleted */ - dip->i_ctime = dip->i_mtime = current_time(dip); + dip->i_mtime = inode_set_ctime_current(dip); inode_dec_link_count(dip); /* @@ -512,7 +512,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) ASSERT(ip->i_nlink); - ip->i_ctime = dip->i_ctime = dip->i_mtime = current_time(ip); + dip->i_mtime = inode_set_ctime_to_ts(dip, inode_set_ctime_current(ip)); mark_inode_dirty(dip); /* update target's inode */ @@ -827,8 +827,8 @@ static int jfs_link(struct dentry *old_dentry, /* update object inode */ inc_nlink(ip); /* for new link */ - ip->i_ctime = current_time(ip); - dir->i_ctime = dir->i_mtime = current_time(dir); + inode_set_ctime_current(ip); + dir->i_mtime = inode_set_ctime_current(dir); mark_inode_dirty(dir); ihold(ip); @@ -1028,7 +1028,7 @@ static int jfs_symlink(struct mnt_idmap *idmap, struct inode *dip, mark_inode_dirty(ip); - dip->i_ctime = dip->i_mtime = current_time(dip); + dip->i_mtime = inode_set_ctime_current(dip); mark_inode_dirty(dip); /* * commit update of parent directory and link object @@ -1205,7 +1205,7 @@ static int jfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, tblk->xflag |= COMMIT_DELETE; tblk->u.ip = new_ip; } else { - new_ip->i_ctime = current_time(new_ip); + inode_set_ctime_current(new_ip); mark_inode_dirty(new_ip); } } else { @@ -1268,10 +1268,10 @@ static int jfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, /* * Update ctime on changed/moved inodes & mark dirty */ - old_ip->i_ctime = current_time(old_ip); + inode_set_ctime_current(old_ip); mark_inode_dirty(old_ip); - new_dir->i_ctime = new_dir->i_mtime = current_time(new_dir); + new_dir->i_mtime = inode_set_ctime_current(new_dir); mark_inode_dirty(new_dir); /* Build list of inodes modified by this transaction */ @@ -1283,7 +1283,7 @@ static int jfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (old_dir != new_dir) { iplist[ipcount++] = new_dir; - old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir); + old_dir->i_mtime = inode_set_ctime_current(old_dir); mark_inode_dirty(old_dir); } @@ -1416,7 +1416,7 @@ static int jfs_mknod(struct mnt_idmap *idmap, struct inode *dir, mark_inode_dirty(ip); - dir->i_ctime = dir->i_mtime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); mark_inode_dirty(dir); @@ -1535,9 +1535,10 @@ const struct inode_operations jfs_dir_inode_operations = { #endif }; +WRAP_DIR_ITER(jfs_readdir) // FIXME! const struct file_operations jfs_dir_operations = { .read = generic_read_dir, - .iterate = jfs_readdir, + .iterate_shared = shared_jfs_readdir, .fsync = jfs_fsync, .unlocked_ioctl = jfs_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/fs/jfs/super.c b/fs/jfs/super.c index d2f82cb7db1b..2e2f7f6d36a0 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -818,7 +818,7 @@ out: } if (inode->i_size < off+len-towrite) i_size_write(inode, off+len-towrite); - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); mark_inode_dirty(inode); inode_unlock(inode); return len - towrite; diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 931e50018f88..8577ad494e05 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -647,7 +647,7 @@ static int ea_put(tid_t tid, struct inode *inode, struct ea_buffer *ea_buf, if (old_blocks) dquot_free_block(inode, old_blocks); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); return 0; } diff --git a/fs/kernel_read_file.c b/fs/kernel_read_file.c index 5d826274570c..c429c42a6867 100644 --- a/fs/kernel_read_file.c +++ b/fs/kernel_read_file.c @@ -8,16 +8,16 @@ /** * kernel_read_file() - read file contents into a kernel buffer * - * @file file to read from - * @offset where to start reading from (see below). - * @buf pointer to a "void *" buffer for reading into (if + * @file: file to read from + * @offset: where to start reading from (see below). + * @buf: pointer to a "void *" buffer for reading into (if * *@buf is NULL, a buffer will be allocated, and * @buf_size will be ignored) - * @buf_size size of buf, if already allocated. If @buf not + * @buf_size: size of buf, if already allocated. If @buf not * allocated, this is the largest size to allocate. - * @file_size if non-NULL, the full size of @file will be + * @file_size: if non-NULL, the full size of @file will be * written here. - * @id the kernel_read_file_id identifying the type of + * @id: the kernel_read_file_id identifying the type of * file contents being read (for LSMs to examine) * * @offset must be 0 unless both @buf and @file_size are non-NULL diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 5a1a4af9d3d2..660995856a04 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -556,7 +556,7 @@ void kernfs_put(struct kernfs_node *kn) kfree_const(kn->name); if (kn->iattr) { - simple_xattrs_free(&kn->iattr->xattrs); + simple_xattrs_free(&kn->iattr->xattrs, NULL); kmem_cache_free(kernfs_iattrs_cache, kn->iattr); } spin_lock(&kernfs_idr_lock); diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index b22b74d1a115..922719a343a7 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -151,8 +151,7 @@ ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size) static inline void set_default_inode_attr(struct inode *inode, umode_t mode) { inode->i_mode = mode; - inode->i_atime = inode->i_mtime = - inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); } static inline void set_inode_attr(struct inode *inode, @@ -162,7 +161,7 @@ static inline void set_inode_attr(struct inode *inode, inode->i_gid = attrs->ia_gid; inode->i_atime = attrs->ia_atime; inode->i_mtime = attrs->ia_mtime; - inode->i_ctime = attrs->ia_ctime; + inode_set_ctime_to_ts(inode, attrs->ia_ctime); } static void kernfs_refresh_inode(struct kernfs_node *kn, struct inode *inode) @@ -191,7 +190,7 @@ int kernfs_iop_getattr(struct mnt_idmap *idmap, down_read(&root->kernfs_iattr_rwsem); kernfs_refresh_inode(kn, inode); - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); up_read(&root->kernfs_iattr_rwsem); return 0; @@ -306,11 +305,17 @@ int kernfs_xattr_get(struct kernfs_node *kn, const char *name, int kernfs_xattr_set(struct kernfs_node *kn, const char *name, const void *value, size_t size, int flags) { + struct simple_xattr *old_xattr; struct kernfs_iattrs *attrs = kernfs_iattrs(kn); if (!attrs) return -ENOMEM; - return simple_xattr_set(&attrs->xattrs, name, value, size, flags, NULL); + old_xattr = simple_xattr_set(&attrs->xattrs, name, value, size, flags); + if (IS_ERR(old_xattr)) + return PTR_ERR(old_xattr); + + simple_xattr_free(old_xattr); + return 0; } static int kernfs_vfs_xattr_get(const struct xattr_handler *handler, @@ -342,7 +347,7 @@ static int kernfs_vfs_user_xattr_add(struct kernfs_node *kn, { atomic_t *sz = &kn->iattr->user_xattr_size; atomic_t *nr = &kn->iattr->nr_user_xattrs; - ssize_t removed_size; + struct simple_xattr *old_xattr; int ret; if (atomic_inc_return(nr) > KERNFS_MAX_USER_XATTRS) { @@ -355,13 +360,18 @@ static int kernfs_vfs_user_xattr_add(struct kernfs_node *kn, goto dec_size_out; } - ret = simple_xattr_set(xattrs, full_name, value, size, flags, - &removed_size); - - if (!ret && removed_size >= 0) - size = removed_size; - else if (!ret) + old_xattr = simple_xattr_set(xattrs, full_name, value, size, flags); + if (!old_xattr) return 0; + + if (IS_ERR(old_xattr)) { + ret = PTR_ERR(old_xattr); + goto dec_size_out; + } + + ret = 0; + size = old_xattr->size; + simple_xattr_free(old_xattr); dec_size_out: atomic_sub(size, sz); dec_count_out: @@ -376,18 +386,19 @@ static int kernfs_vfs_user_xattr_rm(struct kernfs_node *kn, { atomic_t *sz = &kn->iattr->user_xattr_size; atomic_t *nr = &kn->iattr->nr_user_xattrs; - ssize_t removed_size; - int ret; + struct simple_xattr *old_xattr; - ret = simple_xattr_set(xattrs, full_name, value, size, flags, - &removed_size); + old_xattr = simple_xattr_set(xattrs, full_name, value, size, flags); + if (!old_xattr) + return 0; - if (removed_size >= 0) { - atomic_sub(removed_size, sz); - atomic_dec(nr); - } + if (IS_ERR(old_xattr)) + return PTR_ERR(old_xattr); - return ret; + atomic_sub(old_xattr->size, sz); + atomic_dec(nr); + simple_xattr_free(old_xattr); + return 0; } static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler, diff --git a/fs/libfs.c b/fs/libfs.c index 5b851315eeed..da78eb64831e 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -33,7 +33,7 @@ int simple_getattr(struct mnt_idmap *idmap, const struct path *path, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9); return 0; } @@ -239,6 +239,254 @@ const struct inode_operations simple_dir_inode_operations = { }; EXPORT_SYMBOL(simple_dir_inode_operations); +static void offset_set(struct dentry *dentry, u32 offset) +{ + dentry->d_fsdata = (void *)((uintptr_t)(offset)); +} + +static u32 dentry2offset(struct dentry *dentry) +{ + return (u32)((uintptr_t)(dentry->d_fsdata)); +} + +static struct lock_class_key simple_offset_xa_lock; + +/** + * simple_offset_init - initialize an offset_ctx + * @octx: directory offset map to be initialized + * + */ +void simple_offset_init(struct offset_ctx *octx) +{ + xa_init_flags(&octx->xa, XA_FLAGS_ALLOC1); + lockdep_set_class(&octx->xa.xa_lock, &simple_offset_xa_lock); + + /* 0 is '.', 1 is '..', so always start with offset 2 */ + octx->next_offset = 2; +} + +/** + * simple_offset_add - Add an entry to a directory's offset map + * @octx: directory offset ctx to be updated + * @dentry: new dentry being added + * + * Returns zero on success. @so_ctx and the dentry offset are updated. + * Otherwise, a negative errno value is returned. + */ +int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry) +{ + static const struct xa_limit limit = XA_LIMIT(2, U32_MAX); + u32 offset; + int ret; + + if (dentry2offset(dentry) != 0) + return -EBUSY; + + ret = xa_alloc_cyclic(&octx->xa, &offset, dentry, limit, + &octx->next_offset, GFP_KERNEL); + if (ret < 0) + return ret; + + offset_set(dentry, offset); + return 0; +} + +/** + * simple_offset_remove - Remove an entry to a directory's offset map + * @octx: directory offset ctx to be updated + * @dentry: dentry being removed + * + */ +void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry) +{ + u32 offset; + + offset = dentry2offset(dentry); + if (offset == 0) + return; + + xa_erase(&octx->xa, offset); + offset_set(dentry, 0); +} + +/** + * simple_offset_rename_exchange - exchange rename with directory offsets + * @old_dir: parent of dentry being moved + * @old_dentry: dentry being moved + * @new_dir: destination parent + * @new_dentry: destination dentry + * + * Returns zero on success. Otherwise a negative errno is returned and the + * rename is rolled back. + */ +int simple_offset_rename_exchange(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry) +{ + struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir); + struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir); + u32 old_index = dentry2offset(old_dentry); + u32 new_index = dentry2offset(new_dentry); + int ret; + + simple_offset_remove(old_ctx, old_dentry); + simple_offset_remove(new_ctx, new_dentry); + + ret = simple_offset_add(new_ctx, old_dentry); + if (ret) + goto out_restore; + + ret = simple_offset_add(old_ctx, new_dentry); + if (ret) { + simple_offset_remove(new_ctx, old_dentry); + goto out_restore; + } + + ret = simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry); + if (ret) { + simple_offset_remove(new_ctx, old_dentry); + simple_offset_remove(old_ctx, new_dentry); + goto out_restore; + } + return 0; + +out_restore: + offset_set(old_dentry, old_index); + xa_store(&old_ctx->xa, old_index, old_dentry, GFP_KERNEL); + offset_set(new_dentry, new_index); + xa_store(&new_ctx->xa, new_index, new_dentry, GFP_KERNEL); + return ret; +} + +/** + * simple_offset_destroy - Release offset map + * @octx: directory offset ctx that is about to be destroyed + * + * During fs teardown (eg. umount), a directory's offset map might still + * contain entries. xa_destroy() cleans out anything that remains. + */ +void simple_offset_destroy(struct offset_ctx *octx) +{ + xa_destroy(&octx->xa); +} + +/** + * offset_dir_llseek - Advance the read position of a directory descriptor + * @file: an open directory whose position is to be updated + * @offset: a byte offset + * @whence: enumerator describing the starting position for this update + * + * SEEK_END, SEEK_DATA, and SEEK_HOLE are not supported for directories. + * + * Returns the updated read position if successful; otherwise a + * negative errno is returned and the read position remains unchanged. + */ +static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence) +{ + switch (whence) { + case SEEK_CUR: + offset += file->f_pos; + fallthrough; + case SEEK_SET: + if (offset >= 0) + break; + fallthrough; + default: + return -EINVAL; + } + + return vfs_setpos(file, offset, U32_MAX); +} + +static struct dentry *offset_find_next(struct xa_state *xas) +{ + struct dentry *child, *found = NULL; + + rcu_read_lock(); + child = xas_next_entry(xas, U32_MAX); + if (!child) + goto out; + spin_lock(&child->d_lock); + if (simple_positive(child)) + found = dget_dlock(child); + spin_unlock(&child->d_lock); +out: + rcu_read_unlock(); + return found; +} + +static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry) +{ + u32 offset = dentry2offset(dentry); + struct inode *inode = d_inode(dentry); + + return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset, + inode->i_ino, fs_umode_to_dtype(inode->i_mode)); +} + +static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx) +{ + struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode); + XA_STATE(xas, &so_ctx->xa, ctx->pos); + struct dentry *dentry; + + while (true) { + dentry = offset_find_next(&xas); + if (!dentry) + break; + + if (!offset_dir_emit(ctx, dentry)) { + dput(dentry); + break; + } + + dput(dentry); + ctx->pos = xas.xa_index + 1; + } +} + +/** + * offset_readdir - Emit entries starting at offset @ctx->pos + * @file: an open directory to iterate over + * @ctx: directory iteration context + * + * Caller must hold @file's i_rwsem to prevent insertion or removal of + * entries during this call. + * + * On entry, @ctx->pos contains an offset that represents the first entry + * to be read from the directory. + * + * The operation continues until there are no more entries to read, or + * until the ctx->actor indicates there is no more space in the caller's + * output buffer. + * + * On return, @ctx->pos contains an offset that will read the next entry + * in this directory when offset_readdir() is called again with @ctx. + * + * Return values: + * %0 - Complete + */ +static int offset_readdir(struct file *file, struct dir_context *ctx) +{ + struct dentry *dir = file->f_path.dentry; + + lockdep_assert_held(&d_inode(dir)->i_rwsem); + + if (!dir_emit_dots(file, ctx)) + return 0; + + offset_iterate_dir(d_inode(dir), ctx); + return 0; +} + +const struct file_operations simple_offset_dir_operations = { + .llseek = offset_dir_llseek, + .iterate_shared = offset_readdir, + .read = generic_read_dir, + .fsync = noop_fsync, +}; + static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev) { struct dentry *child = NULL; @@ -275,7 +523,7 @@ void simple_recursive_removal(struct dentry *dentry, while ((child = find_next_child(this, victim)) == NULL) { // kill and ascend // update metadata while it's still locked - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); clear_nlink(inode); inode_unlock(inode); victim = this; @@ -293,8 +541,7 @@ void simple_recursive_removal(struct dentry *dentry, dput(victim); // unpin it } if (victim == dentry) { - inode->i_ctime = inode->i_mtime = - current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); if (d_is_dir(dentry)) drop_nlink(inode); inode_unlock(inode); @@ -335,7 +582,7 @@ static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc) */ root->i_ino = 1; root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; - root->i_atime = root->i_mtime = root->i_ctime = current_time(root); + root->i_atime = root->i_mtime = inode_set_ctime_current(root); s->s_root = d_make_root(root); if (!s->s_root) return -ENOMEM; @@ -391,7 +638,8 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den { struct inode *inode = d_inode(old_dentry); - inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); + dir->i_mtime = inode_set_ctime_to_ts(dir, + inode_set_ctime_current(inode)); inc_nlink(inode); ihold(inode); dget(dentry); @@ -425,7 +673,8 @@ int simple_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(dentry); - inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); + dir->i_mtime = inode_set_ctime_to_ts(dir, + inode_set_ctime_current(inode)); drop_nlink(inode); dput(dentry); return 0; @@ -444,6 +693,31 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry) } EXPORT_SYMBOL(simple_rmdir); +/** + * simple_rename_timestamp - update the various inode timestamps for rename + * @old_dir: old parent directory + * @old_dentry: dentry that is being renamed + * @new_dir: new parent directory + * @new_dentry: target for rename + * + * POSIX mandates that the old and new parent directories have their ctime and + * mtime updated, and that inodes of @old_dentry and @new_dentry (if any), have + * their ctime updated. + */ +void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct inode *newino = d_inode(new_dentry); + + old_dir->i_mtime = inode_set_ctime_current(old_dir); + if (new_dir != old_dir) + new_dir->i_mtime = inode_set_ctime_current(new_dir); + inode_set_ctime_current(d_inode(old_dentry)); + if (newino) + inode_set_ctime_current(newino); +} +EXPORT_SYMBOL_GPL(simple_rename_timestamp); + int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { @@ -459,11 +733,7 @@ int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, inc_nlink(old_dir); } } - old_dir->i_ctime = old_dir->i_mtime = - new_dir->i_ctime = new_dir->i_mtime = - d_inode(old_dentry)->i_ctime = - d_inode(new_dentry)->i_ctime = current_time(old_dir); - + simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); return 0; } EXPORT_SYMBOL_GPL(simple_rename_exchange); @@ -472,7 +742,6 @@ int simple_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { - struct inode *inode = d_inode(old_dentry); int they_are_dirs = d_is_dir(old_dentry); if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) @@ -495,9 +764,7 @@ int simple_rename(struct mnt_idmap *idmap, struct inode *old_dir, inc_nlink(new_dir); } - old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime = - new_dir->i_mtime = inode->i_ctime = current_time(old_dir); - + simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); return 0; } EXPORT_SYMBOL(simple_rename); @@ -548,21 +815,20 @@ int simple_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, struct page **pagep, void **fsdata) { - struct page *page; - pgoff_t index; - - index = pos >> PAGE_SHIFT; + struct folio *folio; - page = grab_cache_page_write_begin(mapping, index); - if (!page) - return -ENOMEM; + folio = __filemap_get_folio(mapping, pos / PAGE_SIZE, FGP_WRITEBEGIN, + mapping_gfp_mask(mapping)); + if (IS_ERR(folio)) + return PTR_ERR(folio); - *pagep = page; + *pagep = &folio->page; - if (!PageUptodate(page) && (len != PAGE_SIZE)) { - unsigned from = pos & (PAGE_SIZE - 1); + if (!folio_test_uptodate(folio) && (len != folio_size(folio))) { + size_t from = offset_in_folio(folio, pos); - zero_user_segments(page, 0, from, from + len, PAGE_SIZE); + folio_zero_segments(folio, 0, from, + from + len, folio_size(folio)); } return 0; } @@ -594,17 +860,18 @@ static int simple_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { - struct inode *inode = page->mapping->host; + struct folio *folio = page_folio(page); + struct inode *inode = folio->mapping->host; loff_t last_pos = pos + copied; - /* zero the stale part of the page if we did a short copy */ - if (!PageUptodate(page)) { + /* zero the stale part of the folio if we did a short copy */ + if (!folio_test_uptodate(folio)) { if (copied < len) { - unsigned from = pos & (PAGE_SIZE - 1); + size_t from = offset_in_folio(folio, pos); - zero_user(page, from + copied, len - copied); + folio_zero_range(folio, from + copied, len - copied); } - SetPageUptodate(page); + folio_mark_uptodate(folio); } /* * No need to use i_size_read() here, the i_size @@ -613,9 +880,9 @@ static int simple_write_end(struct file *file, struct address_space *mapping, if (last_pos > inode->i_size) i_size_write(inode, last_pos); - set_page_dirty(page); - unlock_page(page); - put_page(page); + folio_mark_dirty(folio); + folio_unlock(folio); + folio_put(folio); return copied; } @@ -659,7 +926,7 @@ int simple_fill_super(struct super_block *s, unsigned long magic, */ inode->i_ino = 1; inode->i_mode = S_IFDIR | 0755; - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; set_nlink(inode, 2); @@ -685,7 +952,7 @@ int simple_fill_super(struct super_block *s, unsigned long magic, goto out; } inode->i_mode = S_IFREG | files->mode; - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); inode->i_fop = files->ops; inode->i_ino = i; d_add(dentry, inode); @@ -1253,7 +1520,7 @@ struct inode *alloc_anon_inode(struct super_block *s) inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); inode->i_flags |= S_PRIVATE; - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); return inode; } EXPORT_SYMBOL(alloc_anon_inode); @@ -1269,7 +1536,7 @@ EXPORT_SYMBOL(alloc_anon_inode); * All arguments are ignored and it just returns -EINVAL. */ int -simple_nosetlease(struct file *filp, long arg, struct file_lock **flp, +simple_nosetlease(struct file *filp, int arg, struct file_lock **flp, void **priv) { return -EINVAL; @@ -1315,7 +1582,7 @@ static int empty_dir_getattr(struct mnt_idmap *idmap, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); return 0; } diff --git a/fs/locks.c b/fs/locks.c index df8b26a42524..265b5190db3e 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -438,7 +438,7 @@ static void flock_make_lock(struct file *filp, struct file_lock *fl, int type) fl->fl_end = OFFSET_MAX; } -static int assign_type(struct file_lock *fl, long type) +static int assign_type(struct file_lock *fl, int type) { switch (type) { case F_RDLCK: @@ -549,7 +549,7 @@ static const struct lock_manager_operations lease_manager_ops = { /* * Initialize a lease, use the default lock manager operations */ -static int lease_init(struct file *filp, long type, struct file_lock *fl) +static int lease_init(struct file *filp, int type, struct file_lock *fl) { if (assign_type(fl, type) != 0) return -EINVAL; @@ -567,7 +567,7 @@ static int lease_init(struct file *filp, long type, struct file_lock *fl) } /* Allocate a file_lock initialised to this type of lease */ -static struct file_lock *lease_alloc(struct file *filp, long type) +static struct file_lock *lease_alloc(struct file *filp, int type) { struct file_lock *fl = locks_alloc_lock(); int error = -ENOMEM; @@ -1666,7 +1666,7 @@ int fcntl_getlease(struct file *filp) * conflict with the lease we're trying to set. */ static int -check_conflicting_open(struct file *filp, const long arg, int flags) +check_conflicting_open(struct file *filp, const int arg, int flags) { struct inode *inode = file_inode(filp); int self_wcount = 0, self_rcount = 0; @@ -1701,7 +1701,7 @@ check_conflicting_open(struct file *filp, const long arg, int flags) } static int -generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv) +generic_add_lease(struct file *filp, int arg, struct file_lock **flp, void **priv) { struct file_lock *fl, *my_fl = NULL, *lease; struct inode *inode = file_inode(filp); @@ -1859,7 +1859,7 @@ static int generic_delete_lease(struct file *filp, void *owner) * The (input) flp->fl_lmops->lm_break function is required * by break_lease(). */ -int generic_setlease(struct file *filp, long arg, struct file_lock **flp, +int generic_setlease(struct file *filp, int arg, struct file_lock **flp, void **priv) { struct inode *inode = file_inode(filp); @@ -1906,7 +1906,7 @@ lease_notifier_chain_init(void) } static inline void -setlease_notifier(long arg, struct file_lock *lease) +setlease_notifier(int arg, struct file_lock *lease) { if (arg != F_UNLCK) srcu_notifier_call_chain(&lease_notifier_chain, arg, lease); @@ -1942,7 +1942,7 @@ EXPORT_SYMBOL_GPL(lease_unregister_notifier); * may be NULL if the lm_setup operation doesn't require it. */ int -vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv) +vfs_setlease(struct file *filp, int arg, struct file_lock **lease, void **priv) { if (lease) setlease_notifier(arg, *lease); @@ -1953,7 +1953,7 @@ vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv) } EXPORT_SYMBOL_GPL(vfs_setlease); -static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) +static int do_fcntl_add_lease(unsigned int fd, struct file *filp, int arg) { struct file_lock *fl; struct fasync_struct *new; @@ -1988,7 +1988,7 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) * Note that you also need to call %F_SETSIG to * receive a signal when the lease is broken. */ -int fcntl_setlease(unsigned int fd, struct file *filp, long arg) +int fcntl_setlease(unsigned int fd, struct file *filp, int arg) { if (arg == F_UNLCK) return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp); diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c index 870207ba23f1..25c08fbfcb9d 100644 --- a/fs/minix/bitmap.c +++ b/fs/minix/bitmap.c @@ -251,7 +251,7 @@ struct inode *minix_new_inode(const struct inode *dir, umode_t mode) } inode_init_owner(&nop_mnt_idmap, inode, dir, mode); inode->i_ino = j; - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); inode->i_blocks = 0; memset(&minix_i(inode)->u, 0, sizeof(minix_i(inode)->u)); insert_inode_hash(inode); diff --git a/fs/minix/dir.c b/fs/minix/dir.c index bf9858f76b6a..20f23e6e58ad 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -281,7 +281,7 @@ got_it: de->inode = inode->i_ino; } dir_commit_chunk(page, pos, sbi->s_dirsize); - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); mark_inode_dirty(dir); err = minix_handle_dirsync(dir); out_put: @@ -313,7 +313,7 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page) else de->inode = 0; dir_commit_chunk(page, pos, len); - inode->i_ctime = inode->i_mtime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); mark_inode_dirty(inode); return minix_handle_dirsync(inode); } @@ -436,7 +436,7 @@ int minix_set_link(struct minix_dir_entry *de, struct page *page, else de->inode = inode->i_ino; dir_commit_chunk(page, pos, sbi->s_dirsize); - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); mark_inode_dirty(dir); return minix_handle_dirsync(dir); } diff --git a/fs/minix/inode.c b/fs/minix/inode.c index e9fbb5303a22..df575473c1cc 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -501,10 +501,7 @@ static struct inode *V1_minix_iget(struct inode *inode) i_gid_write(inode, raw_inode->i_gid); set_nlink(inode, raw_inode->i_nlinks); inode->i_size = raw_inode->i_size; - inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = raw_inode->i_time; - inode->i_mtime.tv_nsec = 0; - inode->i_atime.tv_nsec = 0; - inode->i_ctime.tv_nsec = 0; + inode->i_mtime = inode->i_atime = inode_set_ctime(inode, raw_inode->i_time, 0); inode->i_blocks = 0; for (i = 0; i < 9; i++) minix_inode->u.i1_data[i] = raw_inode->i_zone[i]; @@ -543,10 +540,9 @@ static struct inode *V2_minix_iget(struct inode *inode) inode->i_size = raw_inode->i_size; inode->i_mtime.tv_sec = raw_inode->i_mtime; inode->i_atime.tv_sec = raw_inode->i_atime; - inode->i_ctime.tv_sec = raw_inode->i_ctime; + inode_set_ctime(inode, raw_inode->i_ctime, 0); inode->i_mtime.tv_nsec = 0; inode->i_atime.tv_nsec = 0; - inode->i_ctime.tv_nsec = 0; inode->i_blocks = 0; for (i = 0; i < 10; i++) minix_inode->u.i2_data[i] = raw_inode->i_zone[i]; @@ -622,7 +618,7 @@ static struct buffer_head * V2_minix_update_inode(struct inode * inode) raw_inode->i_size = inode->i_size; raw_inode->i_mtime = inode->i_mtime.tv_sec; raw_inode->i_atime = inode->i_atime.tv_sec; - raw_inode->i_ctime = inode->i_ctime.tv_sec; + raw_inode->i_ctime = inode_get_ctime(inode).tv_sec; if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) raw_inode->i_zone[0] = old_encode_dev(inode->i_rdev); else for (i = 0; i < 10; i++) @@ -660,7 +656,7 @@ int minix_getattr(struct mnt_idmap *idmap, const struct path *path, struct super_block *sb = path->dentry->d_sb; struct inode *inode = d_inode(path->dentry); - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); if (INODE_VERSION(inode) == MINIX_V1) stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size, sb); else diff --git a/fs/minix/itree_common.c b/fs/minix/itree_common.c index 446148792f41..ce18ae37c29d 100644 --- a/fs/minix/itree_common.c +++ b/fs/minix/itree_common.c @@ -131,7 +131,7 @@ static inline int splice_branch(struct inode *inode, /* We are done with atomic stuff, now do the rest of housekeeping */ - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); /* had we spliced it onto indirect block? */ if (where->bh) @@ -350,7 +350,7 @@ do_indirects: } first_whole++; } - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); mark_inode_dirty(inode); } diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 956d5183828d..114084d5636a 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -98,7 +98,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir, { struct inode *inode = d_inode(old_dentry); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); inode_inc_link_count(inode); ihold(inode); return add_nondir(dentry, inode); @@ -154,7 +154,7 @@ static int minix_unlink(struct inode * dir, struct dentry *dentry) if (err) return err; - inode->i_ctime = dir->i_ctime; + inode_set_ctime_to_ts(inode, inode_get_ctime(dir)); inode_dec_link_count(inode); return 0; } @@ -218,7 +218,7 @@ static int minix_rename(struct mnt_idmap *idmap, put_page(new_page); if (err) goto out_dir; - new_inode->i_ctime = current_time(new_inode); + inode_set_ctime_current(new_inode); if (dir_de) drop_nlink(new_inode); inode_dec_link_count(new_inode); diff --git a/fs/namei.c b/fs/namei.c index e56ff39a79bc..567ee547492b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -643,6 +643,8 @@ static bool nd_alloc_stack(struct nameidata *nd) /** * path_connected - Verify that a dentry is below mnt.mnt_root + * @mnt: The mountpoint to check. + * @dentry: The dentry to check. * * Rename can sometimes move a file or directory outside of a bind * mount, path_connected allows those cases to be detected. @@ -1083,6 +1085,7 @@ fs_initcall(init_fs_namei_sysctls); /** * may_follow_link - Check symlink following for unsafe situations * @nd: nameidata pathwalk data + * @inode: Used for idmapping. * * In the case of the sysctl_protected_symlinks sysctl being enabled, * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is @@ -2890,7 +2893,7 @@ int path_pts(struct path *path) dput(path->dentry); path->dentry = parent; child = d_hash_and_lookup(parent, &this); - if (!child) + if (IS_ERR_OR_NULL(child)) return -ENOENT; path->dentry = child; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index c1eda73254e1..6bed1394d748 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -59,7 +59,7 @@ __be32 nfs4_callback_getattr(void *argp, void *resp, res->change_attr = delegation->change_attr; if (nfs_have_writebacks(inode)) res->change_attr++; - res->ctime = inode->i_ctime; + res->ctime = inode_get_ctime(inode); res->mtime = inode->i_mtime; res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) & args->bitmap[0]; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 9a18c5a69ace..aaffaaa336cc 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -472,20 +472,26 @@ out: return result; } -static void -nfs_direct_join_group(struct list_head *list, struct inode *inode) +static void nfs_direct_join_group(struct list_head *list, struct inode *inode) { - struct nfs_page *req, *next; + struct nfs_page *req, *subreq; list_for_each_entry(req, list, wb_list) { - if (req->wb_head != req || req->wb_this_page == req) + if (req->wb_head != req) continue; - for (next = req->wb_this_page; - next != req->wb_head; - next = next->wb_this_page) { - nfs_list_remove_request(next); - nfs_release_request(next); - } + subreq = req->wb_this_page; + if (subreq == req) + continue; + do { + /* + * Remove subrequests from this list before freeing + * them in the call to nfs_join_page_group(). + */ + if (!list_empty(&subreq->wb_list)) { + nfs_list_remove_request(subreq); + nfs_release_request(subreq); + } + } while ((subreq = subreq->wb_this_page) != req); nfs_join_page_group(req, inode); } } diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index e1706e736c64..2dc64454492b 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -116,8 +116,8 @@ static inline void nfs_fscache_update_auxdata(struct nfs_fscache_inode_auxdata * memset(auxdata, 0, sizeof(*auxdata)); auxdata->mtime_sec = inode->i_mtime.tv_sec; auxdata->mtime_nsec = inode->i_mtime.tv_nsec; - auxdata->ctime_sec = inode->i_ctime.tv_sec; - auxdata->ctime_nsec = inode->i_ctime.tv_nsec; + auxdata->ctime_sec = inode_get_ctime(inode).tv_sec; + auxdata->ctime_nsec = inode_get_ctime(inode).tv_nsec; if (NFS_SERVER(inode)->nfs_client->rpc_ops->version == 4) auxdata->change_attr = inode_peek_iversion_raw(inode); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 8172dd4135a1..e21c073158e5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -514,7 +514,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) memset(&inode->i_atime, 0, sizeof(inode->i_atime)); memset(&inode->i_mtime, 0, sizeof(inode->i_mtime)); - memset(&inode->i_ctime, 0, sizeof(inode->i_ctime)); + inode_set_ctime(inode, 0, 0); inode_set_iversion_raw(inode, 0); inode->i_size = 0; clear_nlink(inode); @@ -535,7 +535,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) else if (fattr_supported & NFS_ATTR_FATTR_MTIME) nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = fattr->ctime; + inode_set_ctime_to_ts(inode, fattr->ctime); else if (fattr_supported & NFS_ATTR_FATTR_CTIME) nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME); if (fattr->valid & NFS_ATTR_FATTR_CHANGE) @@ -731,7 +731,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = fattr->ctime; + inode_set_ctime_to_ts(inode, fattr->ctime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); @@ -749,7 +749,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = fattr->ctime; + inode_set_ctime_to_ts(inode, fattr->ctime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); @@ -765,7 +765,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = fattr->ctime; + inode_set_ctime_to_ts(inode, fattr->ctime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); @@ -912,7 +912,7 @@ out_no_revalidate: /* Only return attributes that were revalidated. */ stat->result_mask = nfs_get_valid_attrmask(inode) | request_mask; - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); stat->change_cookie = inode_peek_iversion_raw(inode); stat->attributes_mask |= STATX_ATTR_CHANGE_MONOTONIC; @@ -1444,11 +1444,11 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR); } /* If we have atomic WCC data, we may update some attributes */ - ts = inode->i_ctime; + ts = inode_get_ctime(inode); if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME) && (fattr->valid & NFS_ATTR_FATTR_CTIME) && timespec64_equal(&ts, &fattr->pre_ctime)) { - inode->i_ctime = fattr->ctime; + inode_set_ctime_to_ts(inode, fattr->ctime); } ts = inode->i_mtime; @@ -1510,7 +1510,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec64_equal(&ts, &fattr->mtime)) invalid |= NFS_INO_INVALID_MTIME; - ts = inode->i_ctime; + ts = inode_get_ctime(inode); if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec64_equal(&ts, &fattr->ctime)) invalid |= NFS_INO_INVALID_CTIME; @@ -1997,7 +1997,7 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa } if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 && (fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) { - fattr->pre_ctime = inode->i_ctime; + fattr->pre_ctime = inode_get_ctime(inode); fattr->valid |= NFS_ATTR_FATTR_PRECTIME; } if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 && @@ -2190,7 +2190,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) save_cache_validity & NFS_INO_INVALID_MTIME; if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = fattr->ctime; + inode_set_ctime_to_ts(inode, fattr->ctime); else if (fattr_supported & NFS_ATTR_FATTR_CTIME) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_CTIME; diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 19d51ebf842c..e7494cdd957e 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -215,7 +215,8 @@ nfs_namespace_getattr(struct mnt_idmap *idmap, if (NFS_FH(d_inode(path->dentry))->size != 0) return nfs_getattr(idmap, path, stat, request_mask, query_flags); - generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat); + generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(path->dentry), + stat); return 0; } diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 63802d195556..49f78e23b34c 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -1377,7 +1377,6 @@ ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name, for (i = 0; i < np; i++) { pages[i] = alloc_page(GFP_KERNEL); if (!pages[i]) { - np = i + 1; err = -ENOMEM; goto out; } @@ -1401,8 +1400,8 @@ ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name, } while (exception.retry); out: - while (--np >= 0) - __free_page(pages[np]); + while (--i >= 0) + __free_page(pages[i]); kfree(pages); return err; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 4c9f8bd866ab..47c5c1f86d66 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -328,7 +328,7 @@ extern int update_open_stateid(struct nfs4_state *state, const nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode); -extern int nfs4_proc_setlease(struct file *file, long arg, +extern int nfs4_proc_setlease(struct file *file, int arg, struct file_lock **lease, void **priv); extern int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 4aeadd6e1a6d..02788c3c85e5 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -438,7 +438,7 @@ void nfs42_ssc_unregister_ops(void) } #endif /* CONFIG_NFS_V4_2 */ -static int nfs4_setlease(struct file *file, long arg, struct file_lock **lease, +static int nfs4_setlease(struct file *file, int arg, struct file_lock **lease, void **priv) { return nfs4_proc_setlease(file, arg, lease, priv); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e1a886b58354..d57aaf0cc577 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6004,9 +6004,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, out_ok: ret = res.acl_len; out_free: - for (i = 0; i < npages; i++) - if (pages[i]) - __free_page(pages[i]); + while (--i >= 0) + __free_page(pages[i]); if (res.acl_scratch) __free_page(res.acl_scratch); kfree(pages); @@ -7181,8 +7180,15 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) } else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid)) goto out_restart; break; - case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OLD_STATEID: + if (data->arg.new_lock_owner != 0 && + nfs4_refresh_open_old_stateid(&data->arg.open_stateid, + lsp->ls_state)) + goto out_restart; + if (nfs4_refresh_lock_old_stateid(&data->arg.lock_stateid, lsp)) + goto out_restart; + fallthrough; + case -NFS4ERR_BAD_STATEID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: if (data->arg.new_lock_owner != 0) { @@ -7573,7 +7579,7 @@ static int nfs4_delete_lease(struct file *file, void **priv) return generic_setlease(file, F_UNLCK, NULL, priv); } -static int nfs4_add_lease(struct file *file, long arg, struct file_lock **lease, +static int nfs4_add_lease(struct file *file, int arg, struct file_lock **lease, void **priv) { struct inode *inode = file_inode(file); @@ -7591,7 +7597,7 @@ static int nfs4_add_lease(struct file *file, long arg, struct file_lock **lease, return -EAGAIN; } -int nfs4_proc_setlease(struct file *file, long arg, struct file_lock **lease, +int nfs4_proc_setlease(struct file *file, int arg, struct file_lock **lease, void **priv) { switch (arg) { diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index acda8f033d30..bf378ecd5d9f 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -345,8 +345,10 @@ void nfs_sysfs_move_sb_to_server(struct nfs_server *server) int ret = -ENOMEM; s = kasprintf(GFP_KERNEL, "server-%d", server->s_sysfs_id); - if (s) + if (s) { ret = kobject_rename(&server->kobj, s); + kfree(s); + } if (ret < 0) pr_warn("NFS: rename sysfs %s failed (%d)\n", server->kobj.name, ret); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6e61fa3acaf1..daf305daa751 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1354,9 +1354,9 @@ static void revoke_delegation(struct nfs4_delegation *dp) trace_nfsd_stid_revoke(&dp->dl_stid); if (clp->cl_minorversion) { + spin_lock(&clp->cl_lock); dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; refcount_inc(&dp->dl_stid.sc_count); - spin_lock(&clp->cl_lock); list_add(&dp->dl_recall_lru, &clp->cl_revoked); spin_unlock(&clp->cl_lock); } @@ -6341,8 +6341,6 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || CLOSE_STATEID(stateid)) return status; - if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) - return status; spin_lock(&cl->cl_lock); s = find_stateid_locked(cl, stateid); if (!s) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 1b8b1aab9a15..3709830f90a6 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1105,6 +1105,7 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) if (!nn->nfsd_serv) return -EBUSY; trace_nfsd_end_grace(netns(file)); + nfsd4_end_grace(nn); break; default: return -EINVAL; @@ -1131,7 +1132,7 @@ static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode) /* Following advice from simple_fill_super documentation: */ inode->i_ino = iunique(sb, NFSD_MaxReserved); inode->i_mode = mode; - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); switch (mode & S_IFMT) { case S_IFDIR: inode->i_fop = &simple_dir_operations; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 8a2321d19194..9b7acba382fe 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -520,7 +520,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_sanitize_attrs(inode, iap); - if (check_guard && guardtime != inode->i_ctime.tv_sec) + if (check_guard && guardtime != inode_get_ctime(inode).tv_sec) return nfserr_notsync; /* @@ -956,10 +956,13 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, last_page = page + (offset + sd->len - 1) / PAGE_SIZE; for (page += offset / PAGE_SIZE; page <= last_page; page++) { /* - * Skip page replacement when extending the contents - * of the current page. + * Skip page replacement when extending the contents of the + * current page. But note that we may get two zero_pages in a + * row from shmem. */ - if (page == *(rqstp->rq_next_page - 1)) + if (page == *(rqstp->rq_next_page - 1) && + offset_in_page(rqstp->rq_res.page_base + + rqstp->rq_res.page_len)) continue; if (unlikely(!svc_rqst_replace_page(rqstp, page))) return -EIO; diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index decd6471300b..bce734b68f08 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -429,7 +429,7 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, nilfs_set_de_type(de, inode); nilfs_commit_chunk(page, mapping, from, to); nilfs_put_page(page); - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); } /* @@ -519,7 +519,7 @@ got_it: de->inode = cpu_to_le64(inode->i_ino); nilfs_set_de_type(de, inode); nilfs_commit_chunk(page, page->mapping, from, to); - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); nilfs_mark_inode_dirty(dir); /* OFFSET_CACHE */ out_put: @@ -567,7 +567,7 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page) pde->rec_len = nilfs_rec_len_to_disk(to - from); dir->inode = 0; nilfs_commit_chunk(page, mapping, from, to); - inode->i_ctime = inode->i_mtime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); out: nilfs_put_page(page); return err; diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index a8ce522ac747..d588c719d743 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -366,7 +366,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) atomic64_inc(&root->inodes_count); inode_init_owner(&nop_mnt_idmap, inode, dir, mode); inode->i_ino = ino; - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { err = nilfs_bmap_read(ii->i_bmap, NULL); @@ -450,10 +450,10 @@ int nilfs_read_inode_common(struct inode *inode, set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); inode->i_size = le64_to_cpu(raw_inode->i_size); inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); - inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); + inode_set_ctime(inode, le64_to_cpu(raw_inode->i_ctime), + le32_to_cpu(raw_inode->i_ctime_nsec)); inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); - inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode)) return -EIO; /* this inode is for metadata and corrupted */ @@ -768,9 +768,9 @@ void nilfs_write_inode_common(struct inode *inode, raw_inode->i_gid = cpu_to_le32(i_gid_read(inode)); raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le64(inode->i_size); - raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); + raw_inode->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec); raw_inode->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); - raw_inode->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + raw_inode->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec); raw_inode->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); raw_inode->i_blocks = cpu_to_le64(inode->i_blocks); @@ -875,7 +875,7 @@ void nilfs_truncate(struct inode *inode) nilfs_truncate_bmap(ii, blkoff); - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); if (IS_SYNC(inode)) nilfs_set_transaction_flag(NILFS_TI_SYNC); @@ -1101,9 +1101,17 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty) int __nilfs_mark_inode_dirty(struct inode *inode, int flags) { + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; struct buffer_head *ibh; int err; + /* + * Do not dirty inodes after the log writer has been detached + * and its nilfs_root struct has been freed. + */ + if (unlikely(nilfs_purging(nilfs))) + return 0; + err = nilfs_load_inode_block(inode, &ibh); if (unlikely(err)) { nilfs_warn(inode->i_sb, diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 1dfbc0c34513..40ffade49f38 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -149,7 +149,7 @@ int nilfs_fileattr_set(struct mnt_idmap *idmap, NILFS_I(inode)->i_flags = oldflags | (flags & FS_FL_USER_MODIFIABLE); nilfs_set_inode_flags(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); if (IS_SYNC(inode)) nilfs_set_transaction_flag(NILFS_TI_SYNC); diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index c7024da8f1e2..2a4e7f4a8102 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -185,7 +185,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir, if (err) return err; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); inode_inc_link_count(inode); ihold(inode); @@ -283,7 +283,7 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry) if (err) goto out; - inode->i_ctime = dir->i_ctime; + inode_set_ctime_to_ts(inode, inode_get_ctime(dir)); drop_nlink(inode); err = 0; out: @@ -387,7 +387,7 @@ static int nilfs_rename(struct mnt_idmap *idmap, goto out_dir; nilfs_set_link(new_dir, new_de, new_page, old_inode); nilfs_mark_inode_dirty(new_dir); - new_inode->i_ctime = current_time(new_inode); + inode_set_ctime_current(new_inode); if (dir_de) drop_nlink(new_inode); drop_nlink(new_inode); @@ -406,7 +406,7 @@ static int nilfs_rename(struct mnt_idmap *idmap, * Like most other Unix systems, set the ctime for inodes on a * rename. */ - old_inode->i_ctime = current_time(old_inode); + inode_set_ctime_current(old_inode); nilfs_delete_entry(old_de, old_page); diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index c2553024bd25..7ec16879756e 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -725,6 +725,11 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, struct folio *folio = fbatch.folios[i]; folio_lock(folio); + if (unlikely(folio->mapping != mapping)) { + /* Exclude folios removed from the address space */ + folio_unlock(folio); + continue; + } head = folio_buffers(folio); if (!head) { create_empty_buffers(&folio->page, i_blocksize(inode), 0); @@ -2845,6 +2850,7 @@ void nilfs_detach_log_writer(struct super_block *sb) nilfs_segctor_destroy(nilfs->ns_writer); nilfs->ns_writer = NULL; } + set_nilfs_purging(nilfs); /* Force to free the list of dirty files */ spin_lock(&nilfs->ns_inode_lock); @@ -2857,4 +2863,5 @@ void nilfs_detach_log_writer(struct super_block *sb) up_write(&nilfs->ns_segctor_sem); nilfs_dispose_list(nilfs, &garbage_list, 1); + clear_nilfs_purging(nilfs); } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 0ef8c71bde8e..a5d1fa4e7552 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -35,6 +35,7 @@ #include <linux/writeback.h> #include <linux/seq_file.h> #include <linux/mount.h> +#include <linux/fs_context.h> #include "nilfs.h" #include "export.h" #include "mdt.h" @@ -1216,7 +1217,6 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) } struct nilfs_super_data { - struct block_device *bdev; __u64 cno; int flags; }; @@ -1283,64 +1283,49 @@ static int nilfs_identify(char *data, struct nilfs_super_data *sd) static int nilfs_set_bdev_super(struct super_block *s, void *data) { - s->s_bdev = data; - s->s_dev = s->s_bdev->bd_dev; + s->s_dev = *(dev_t *)data; return 0; } static int nilfs_test_bdev_super(struct super_block *s, void *data) { - return (void *)s->s_bdev == data; + return !(s->s_iflags & SB_I_RETIRED) && s->s_dev == *(dev_t *)data; } static struct dentry * nilfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - struct nilfs_super_data sd; + struct nilfs_super_data sd = { .flags = flags }; struct super_block *s; - struct dentry *root_dentry; - int err, s_new = false; + dev_t dev; + int err; - sd.bdev = blkdev_get_by_path(dev_name, sb_open_mode(flags), fs_type, - NULL); - if (IS_ERR(sd.bdev)) - return ERR_CAST(sd.bdev); + if (nilfs_identify(data, &sd)) + return ERR_PTR(-EINVAL); - sd.cno = 0; - sd.flags = flags; - if (nilfs_identify((char *)data, &sd)) { - err = -EINVAL; - goto failed; - } + err = lookup_bdev(dev_name, &dev); + if (err) + return ERR_PTR(err); - /* - * once the super is inserted into the list by sget, s_umount - * will protect the lockfs code from trying to start a snapshot - * while we are mounting - */ - mutex_lock(&sd.bdev->bd_fsfreeze_mutex); - if (sd.bdev->bd_fsfreeze_count > 0) { - mutex_unlock(&sd.bdev->bd_fsfreeze_mutex); - err = -EBUSY; - goto failed; - } s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, flags, - sd.bdev); - mutex_unlock(&sd.bdev->bd_fsfreeze_mutex); - if (IS_ERR(s)) { - err = PTR_ERR(s); - goto failed; - } + &dev); + if (IS_ERR(s)) + return ERR_CAST(s); if (!s->s_root) { - s_new = true; - - /* New superblock instance created */ - snprintf(s->s_id, sizeof(s->s_id), "%pg", sd.bdev); - sb_set_blocksize(s, block_size(sd.bdev)); - - err = nilfs_fill_super(s, data, flags & SB_SILENT ? 1 : 0); + /* + * We drop s_umount here because we need to open the bdev and + * bdev->open_mutex ranks above s_umount (blkdev_put() -> + * __invalidate_device()). It is safe because we have active sb + * reference and SB_BORN is not set yet. + */ + up_write(&s->s_umount); + err = setup_bdev_super(s, flags, NULL); + down_write(&s->s_umount); + if (!err) + err = nilfs_fill_super(s, data, + flags & SB_SILENT ? 1 : 0); if (err) goto failed_super; @@ -1366,24 +1351,18 @@ nilfs_mount(struct file_system_type *fs_type, int flags, } if (sd.cno) { + struct dentry *root_dentry; + err = nilfs_attach_snapshot(s, sd.cno, &root_dentry); if (err) goto failed_super; - } else { - root_dentry = dget(s->s_root); + return root_dentry; } - if (!s_new) - blkdev_put(sd.bdev, fs_type); - - return root_dentry; + return dget(s->s_root); failed_super: deactivate_locked_super(s); - - failed: - if (!s_new) - blkdev_put(sd.bdev, fs_type); return ERR_PTR(err); } diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 47c7dfbb7ea5..cd4ae1b8ae16 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -29,6 +29,7 @@ enum { THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */ THE_NILFS_GC_RUNNING, /* gc process is running */ THE_NILFS_SB_DIRTY, /* super block is dirty */ + THE_NILFS_PURGING, /* disposing dirty files for cleanup */ }; /** @@ -208,6 +209,7 @@ THE_NILFS_FNS(INIT, init) THE_NILFS_FNS(DISCONTINUED, discontinued) THE_NILFS_FNS(GC_RUNNING, gc_running) THE_NILFS_FNS(SB_DIRTY, sb_dirty) +THE_NILFS_FNS(PURGING, purging) /* * Mount option operations diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 52ccd34b1e79..a026dbd3593f 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -272,7 +272,7 @@ int unregister_nls(struct nls_table * nls) return -EINVAL; } -static struct nls_table *find_nls(char *charset) +static struct nls_table *find_nls(const char *charset) { struct nls_table *nls; spin_lock(&nls_lock); @@ -288,7 +288,7 @@ static struct nls_table *find_nls(char *charset) return nls; } -struct nls_table *load_nls(char *charset) +struct nls_table *load_nls(const char *charset) { return try_then_request_module(find_nls(charset), "nls_%s", charset); } diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 190aa717fa32..ebdcc25df0f7 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -199,7 +199,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id) } /* this conversion is done only at watch creation */ -static __u32 convert_arg(unsigned long arg) +static __u32 convert_arg(unsigned int arg) { __u32 new_mask = FS_EVENT_ON_CHILD; @@ -258,7 +258,7 @@ static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark *dn_mark, * up here. Allocate both a mark for fsnotify to add and a dnotify_struct to be * attached to the fsnotify_mark. */ -int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) +int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg) { struct dnotify_mark *new_dn_mark, *dn_mark; struct fsnotify_mark *new_fsn_mark, *fsn_mark; diff --git a/fs/nsfs.c b/fs/nsfs.c index f602a96a1afe..647a22433bd8 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -84,7 +84,7 @@ slow: return -ENOMEM; } inode->i_ino = ns->inum; - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); inode->i_flags |= S_IMMUTABLE; inode->i_mode = S_IFREG | S_IRUGO; inode->i_fop = &ns_file_operations; diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index 518c3a21a556..4596c90e7b7c 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -1525,10 +1525,11 @@ static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end, #endif /* NTFS_RW */ +WRAP_DIR_ITER(ntfs_readdir) // FIXME! const struct file_operations ntfs_dir_ops = { .llseek = generic_file_llseek, /* Seek inside directory. */ .read = generic_read_dir, /* Return -EISDIR. */ - .iterate = ntfs_readdir, /* Read directory contents. */ + .iterate_shared = shared_ntfs_readdir, /* Read directory contents. */ #ifdef NTFS_RW .fsync = ntfs_dir_fsync, /* Sync a directory to disk. */ #endif /* NTFS_RW */ diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 6c3f38d66579..99ac6ea277c4 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -654,7 +654,7 @@ static int ntfs_read_locked_inode(struct inode *vi) * always changes, when mtime is changed. ctime can be changed on its * own, mtime is then not changed, e.g. when a file is renamed. */ - vi->i_ctime = ntfs2utc(si->last_mft_change_time); + inode_set_ctime_to_ts(vi, ntfs2utc(si->last_mft_change_time)); /* * Last access to the data within the file. Not changed during a rename * for example but changed whenever the file is written to. @@ -1218,7 +1218,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) vi->i_gid = base_vi->i_gid; set_nlink(vi, base_vi->i_nlink); vi->i_mtime = base_vi->i_mtime; - vi->i_ctime = base_vi->i_ctime; + inode_set_ctime_to_ts(vi, inode_get_ctime(base_vi)); vi->i_atime = base_vi->i_atime; vi->i_generation = ni->seq_no = base_ni->seq_no; @@ -1484,7 +1484,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi) vi->i_gid = base_vi->i_gid; set_nlink(vi, base_vi->i_nlink); vi->i_mtime = base_vi->i_mtime; - vi->i_ctime = base_vi->i_ctime; + inode_set_ctime_to_ts(vi, inode_get_ctime(base_vi)); vi->i_atime = base_vi->i_atime; vi->i_generation = ni->seq_no = base_ni->seq_no; /* Set inode type to zero but preserve permissions. */ @@ -2804,13 +2804,14 @@ done: */ if (!IS_NOCMTIME(VFS_I(base_ni)) && !IS_RDONLY(VFS_I(base_ni))) { struct timespec64 now = current_time(VFS_I(base_ni)); + struct timespec64 ctime = inode_get_ctime(VFS_I(base_ni)); int sync_it = 0; if (!timespec64_equal(&VFS_I(base_ni)->i_mtime, &now) || - !timespec64_equal(&VFS_I(base_ni)->i_ctime, &now)) + !timespec64_equal(&ctime, &now)) sync_it = 1; + inode_set_ctime_to_ts(VFS_I(base_ni), now); VFS_I(base_ni)->i_mtime = now; - VFS_I(base_ni)->i_ctime = now; if (sync_it) mark_inode_dirty_sync(VFS_I(base_ni)); @@ -2928,7 +2929,7 @@ int ntfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (ia_valid & ATTR_MTIME) vi->i_mtime = attr->ia_mtime; if (ia_valid & ATTR_CTIME) - vi->i_ctime = attr->ia_ctime; + inode_set_ctime_to_ts(vi, attr->ia_ctime); mark_inode_dirty(vi); out: return err; @@ -3004,7 +3005,7 @@ int __ntfs_write_inode(struct inode *vi, int sync) si->last_data_change_time = nt; modified = true; } - nt = utc2ntfs(vi->i_ctime); + nt = utc2ntfs(inode_get_ctime(vi)); if (si->last_mft_change_time != nt) { ntfs_debug("Updating ctime for inode 0x%lx: old = 0x%llx, " "new = 0x%llx", vi->i_ino, (long long) diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 0155f106ec34..ad1a8f72da22 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -2682,8 +2682,7 @@ mft_rec_already_initialized: vi->i_mode &= ~S_IWUGO; /* Set the inode times to the current time. */ - vi->i_atime = vi->i_mtime = vi->i_ctime = - current_time(vi); + vi->i_atime = vi->i_mtime = inode_set_ctime_current(vi); /* * Set the file size to 0, the ntfs inode sizes are set to 0 by * the call to ntfs_init_big_inode() below. diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 1d6c824246c4..962f12ce6c0a 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -85,7 +85,7 @@ int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path, stat->attributes_mask |= STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED; - generic_fillattr(idmap, inode, stat); + generic_fillattr(idmap, request_mask, inode, stat); stat->result_mask |= STATX_BTIME; stat->btime = ni->i_crtime; @@ -342,7 +342,7 @@ static int ntfs_extend(struct inode *inode, loff_t pos, size_t count, err = 0; } - inode->i_ctime = inode->i_mtime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); mark_inode_dirty(inode); if (IS_SYNC(inode)) { @@ -400,7 +400,7 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size) ni_unlock(ni); ni->std_fa |= FILE_ATTRIBUTE_ARCHIVE; - inode->i_ctime = inode->i_mtime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); if (!IS_DIRSYNC(inode)) { dirty = 1; } else { @@ -642,7 +642,7 @@ out: filemap_invalidate_unlock(mapping); if (!err) { - inode->i_ctime = inode->i_mtime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); mark_inode_dirty(inode); } diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 16bd9faa2d28..2b85cb10f0be 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -3265,6 +3265,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint) if (is_rec_inuse(ni->mi.mrec) && !(sbi->flags & NTFS_FLAGS_LOG_REPLAYING) && inode->i_nlink) { bool modified = false; + struct timespec64 ctime = inode_get_ctime(inode); /* Update times in standard attribute. */ std = ni_std(ni); @@ -3280,7 +3281,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint) modified = true; } - dup.c_time = kernel2nt(&inode->i_ctime); + dup.c_time = kernel2nt(&ctime); if (std->c_time != dup.c_time) { std->c_time = dup.c_time; modified = true; diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index dc7e7ab701c6..4123e126c4d0 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -44,6 +44,7 @@ static struct inode *ntfs_read_mft(struct inode *inode, u64 t64; struct MFT_REC *rec; struct runs_tree *run; + struct timespec64 ctime; inode->i_op = NULL; /* Setup 'uid' and 'gid' */ @@ -169,7 +170,8 @@ next_attr: nt2kernel(std5->cr_time, &ni->i_crtime); #endif nt2kernel(std5->a_time, &inode->i_atime); - nt2kernel(std5->c_time, &inode->i_ctime); + ctime = inode_get_ctime(inode); + nt2kernel(std5->c_time, &ctime); nt2kernel(std5->m_time, &inode->i_mtime); ni->std_fa = std5->fa; @@ -958,7 +960,7 @@ int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos, if (err >= 0) { if (!(ni->std_fa & FILE_ATTRIBUTE_ARCHIVE)) { - inode->i_ctime = inode->i_mtime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); ni->std_fa |= FILE_ATTRIBUTE_ARCHIVE; dirty = true; } @@ -1658,8 +1660,8 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, d_instantiate(dentry, inode); /* Set original time. inode times (i_ctime) may be changed in ntfs_init_acl. */ - inode->i_atime = inode->i_mtime = inode->i_ctime = dir->i_mtime = - dir->i_ctime = ni->i_crtime; + inode->i_atime = inode->i_mtime = inode_set_ctime_to_ts(inode, ni->i_crtime); + dir->i_mtime = inode_set_ctime_to_ts(dir, ni->i_crtime); mark_inode_dirty(dir); mark_inode_dirty(inode); @@ -1765,9 +1767,9 @@ int ntfs_unlink_inode(struct inode *dir, const struct dentry *dentry) if (!err) { drop_nlink(inode); - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); mark_inode_dirty(dir); - inode->i_ctime = dir->i_ctime; + inode_set_ctime_to_ts(inode, inode_get_ctime(dir)); if (inode->i_nlink) mark_inode_dirty(inode); } else if (!ni_remove_name_undo(dir_ni, ni, de, de2, undo_remove)) { diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c index 70f8c859e0ad..ad430d50bd79 100644 --- a/fs/ntfs3/namei.c +++ b/fs/ntfs3/namei.c @@ -156,8 +156,8 @@ static int ntfs_link(struct dentry *ode, struct inode *dir, struct dentry *de) err = ntfs_link_inode(inode, de); if (!err) { - dir->i_ctime = dir->i_mtime = inode->i_ctime = - current_time(dir); + dir->i_mtime = inode_set_ctime_to_ts(inode, + inode_set_ctime_current(dir)); mark_inode_dirty(inode); mark_inode_dirty(dir); d_instantiate(de, inode); @@ -324,14 +324,11 @@ static int ntfs_rename(struct mnt_idmap *idmap, struct inode *dir, /* Restore after failed rename failed too. */ _ntfs_bad_inode(inode); } else if (!err) { - inode->i_ctime = dir->i_ctime = dir->i_mtime = - current_time(dir); + simple_rename_timestamp(dir, dentry, new_dir, new_dentry); mark_inode_dirty(inode); mark_inode_dirty(dir); - if (dir != new_dir) { - new_dir->i_mtime = new_dir->i_ctime = dir->i_ctime; + if (dir != new_dir) mark_inode_dirty(new_dir); - } if (IS_DIRSYNC(dir)) ntfs_sync_inode(dir); diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 1a02072b6b0e..5fffddea554f 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -569,9 +569,9 @@ static void init_once(void *foo) } /* - * put_ntfs - Noinline to reduce binary size. + * Noinline to reduce binary size. */ -static noinline void put_ntfs(struct ntfs_sb_info *sbi) +static noinline void ntfs3_free_sbi(struct ntfs_sb_info *sbi) { kfree(sbi->new_rec); kvfree(ntfs_put_shared(sbi->upcase)); @@ -625,12 +625,6 @@ static void ntfs_put_super(struct super_block *sb) /* Mark rw ntfs as clear, if possible. */ ntfs_set_state(sbi, NTFS_DIRTY_CLEAR); - - put_mount_options(sbi->options); - put_ntfs(sbi); - sb->s_fs_info = NULL; - - sync_blockdev(sb->s_bdev); } static int ntfs_statfs(struct dentry *dentry, struct kstatfs *buf) @@ -1564,15 +1558,7 @@ load_root: put_inode_out: iput(inode); out: - /* - * Free resources here. - * ntfs_fs_free will be called with fc->s_fs_info = NULL - */ - put_mount_options(sbi->options); - put_ntfs(sbi); - sb->s_fs_info = NULL; kfree(boot2); - return err; } @@ -1659,7 +1645,7 @@ static void ntfs_fs_free(struct fs_context *fc) struct ntfs_sb_info *sbi = fc->s_fs_info; if (sbi) - put_ntfs(sbi); + ntfs3_free_sbi(sbi); if (opts) put_mount_options(opts); @@ -1728,13 +1714,24 @@ free_opts: return -ENOMEM; } +static void ntfs3_kill_sb(struct super_block *sb) +{ + struct ntfs_sb_info *sbi = sb->s_fs_info; + + kill_block_super(sb); + + if (sbi->options) + put_mount_options(sbi->options); + ntfs3_free_sbi(sbi); +} + // clang-format off static struct file_system_type ntfs_fs_type = { .owner = THIS_MODULE, .name = "ntfs3", .init_fs_context = ntfs_init_fs_context, .parameters = ntfs_fs_parameters, - .kill_sb = kill_block_super, + .kill_sb = ntfs3_kill_sb, .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; // clang-format on diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 023f314e8950..29fd391899e5 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -637,7 +637,7 @@ static noinline int ntfs_set_acl_ex(struct mnt_idmap *idmap, if (!err) { set_cached_acl(inode, type, acl); inode->i_mode = mode; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); mark_inode_dirty(inode); } @@ -924,7 +924,7 @@ set_new_fa: NULL); out: - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); mark_inode_dirty(inode); return err; diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 9fd03eaf15f8..e75137a8e7cb 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -191,10 +191,10 @@ static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh, } inode->i_mode = new_mode; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); di->i_mode = cpu_to_le16(inode->i_mode); - di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); - di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + di->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec); + di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec); ocfs2_update_inode_fsync_trans(handle, inode, 0); ocfs2_journal_dirty(handle, di_bh); diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 51c93929a146..aef58f1395c8 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -7436,10 +7436,10 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, } inode->i_blocks = ocfs2_inode_sector_count(inode); - inode->i_ctime = inode->i_mtime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); - di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); - di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime(inode).tv_sec); + di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec); ocfs2_update_inode_fsync_trans(handle, inode, 1); ocfs2_journal_dirty(handle, di_bh); diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 8dfc284e85f0..0fdba30740ab 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2048,7 +2048,7 @@ out_write_size: } inode->i_blocks = ocfs2_inode_sector_count(inode); di->i_size = cpu_to_le64((u64)i_size_read(inode)); - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); if (handle) diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 694471fc46b8..8b123d543e6e 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -1658,7 +1658,7 @@ int __ocfs2_add_entry(handle_t *handle, offset, ocfs2_dir_trailer_blk_off(dir->i_sb)); if (ocfs2_dirent_would_fit(de, rec_len)) { - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh); if (retval < 0) { mlog_errno(retval); @@ -2962,11 +2962,11 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, ocfs2_dinode_new_extent_list(dir, di); i_size_write(dir, sb->s_blocksize); - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); di->i_size = cpu_to_le64(sb->s_blocksize); - di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec); - di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec); + di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime(dir).tv_sec); + di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime(dir).tv_nsec); ocfs2_update_inode_fsync_trans(handle, dir, 1); /* diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index ba26c5567cff..81265123ce6c 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -337,7 +337,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb) if (inode) { inode->i_ino = get_next_ino(); inode_init_owner(&nop_mnt_idmap, inode, NULL, mode); - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); inc_nlink(inode); inode->i_fop = &simple_dir_operations; @@ -360,7 +360,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent, inode->i_ino = get_next_ino(); inode_init_owner(&nop_mnt_idmap, inode, parent, mode); - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); ip = DLMFS_I(inode); ip->ip_conn = DLMFS_I(parent)->ip_conn; diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index c28bc983a7b1..c3e2961ee5db 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2162,6 +2162,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; struct ocfs2_meta_lvb *lvb; + struct timespec64 ctime = inode_get_ctime(inode); lvb = ocfs2_dlm_lvb(&lockres->l_lksb); @@ -2185,7 +2186,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) lvb->lvb_iatime_packed = cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); lvb->lvb_ictime_packed = - cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); + cpu_to_be64(ocfs2_pack_timespec(&ctime)); lvb->lvb_imtime_packed = cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); @@ -2208,6 +2209,7 @@ static int ocfs2_refresh_inode_from_lvb(struct inode *inode) struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; struct ocfs2_meta_lvb *lvb; + struct timespec64 ctime; mlog_meta_lvb(0, lockres); @@ -2238,8 +2240,9 @@ static int ocfs2_refresh_inode_from_lvb(struct inode *inode) be64_to_cpu(lvb->lvb_iatime_packed)); ocfs2_unpack_timespec(&inode->i_mtime, be64_to_cpu(lvb->lvb_imtime_packed)); - ocfs2_unpack_timespec(&inode->i_ctime, + ocfs2_unpack_timespec(&ctime, be64_to_cpu(lvb->lvb_ictime_packed)); + inode_set_ctime_to_ts(inode, ctime); spin_unlock(&oi->ip_lock); return 0; } diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 91a194596552..3b91b4cc7c6a 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -232,8 +232,10 @@ int ocfs2_should_update_atime(struct inode *inode, return 0; if (vfsmnt->mnt_flags & MNT_RELATIME) { + struct timespec64 ctime = inode_get_ctime(inode); + if ((timespec64_compare(&inode->i_atime, &inode->i_mtime) <= 0) || - (timespec64_compare(&inode->i_atime, &inode->i_ctime) <= 0)) + (timespec64_compare(&inode->i_atime, &ctime) <= 0)) return 1; return 0; @@ -294,7 +296,7 @@ int ocfs2_set_inode_size(handle_t *handle, i_size_write(inode, new_i_size); inode->i_blocks = ocfs2_inode_sector_count(inode); - inode->i_ctime = inode->i_mtime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); status = ocfs2_mark_inode_dirty(handle, inode, fe_bh); if (status < 0) { @@ -415,12 +417,12 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, } i_size_write(inode, new_i_size); - inode->i_ctime = inode->i_mtime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); di = (struct ocfs2_dinode *) fe_bh->b_data; di->i_size = cpu_to_le64(new_i_size); - di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); - di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime(inode).tv_sec); + di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec); ocfs2_update_inode_fsync_trans(handle, inode, 0); ocfs2_journal_dirty(handle, fe_bh); @@ -824,7 +826,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, i_size_write(inode, abs_to); inode->i_blocks = ocfs2_inode_sector_count(inode); di->i_size = cpu_to_le64((u64)i_size_read(inode)); - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); di->i_mtime_nsec = di->i_ctime_nsec; @@ -1317,7 +1319,7 @@ int ocfs2_getattr(struct mnt_idmap *idmap, const struct path *path, goto bail; } - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); /* * If there is inline data in the inode, the inode will normally not * have data blocks allocated (it may have an external xattr block). @@ -2043,7 +2045,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, goto out_inode_unlock; } - inode->i_ctime = inode->i_mtime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); ret = ocfs2_mark_inode_dirty(handle, inode, di_bh); if (ret < 0) mlog_errno(ret); @@ -2793,10 +2795,11 @@ const struct file_operations ocfs2_fops = { .remap_file_range = ocfs2_remap_file_range, }; +WRAP_DIR_ITER(ocfs2_readdir) // FIXME! const struct file_operations ocfs2_dops = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = ocfs2_readdir, + .iterate_shared = shared_ocfs2_readdir, .fsync = ocfs2_sync_file, .release = ocfs2_dir_release, .open = ocfs2_dir_open, @@ -2842,7 +2845,7 @@ const struct file_operations ocfs2_fops_no_plocks = { const struct file_operations ocfs2_dops_no_plocks = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = ocfs2_readdir, + .iterate_shared = shared_ocfs2_readdir, .fsync = ocfs2_sync_file, .release = ocfs2_dir_release, .open = ocfs2_dir_open, diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index bb116c39b581..e8771600b930 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -306,8 +306,8 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec); inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime); inode->i_mtime.tv_nsec = le32_to_cpu(fe->i_mtime_nsec); - inode->i_ctime.tv_sec = le64_to_cpu(fe->i_ctime); - inode->i_ctime.tv_nsec = le32_to_cpu(fe->i_ctime_nsec); + inode_set_ctime(inode, le64_to_cpu(fe->i_ctime), + le32_to_cpu(fe->i_ctime_nsec)); if (OCFS2_I(inode)->ip_blkno != le64_to_cpu(fe->i_blkno)) mlog(ML_ERROR, @@ -1314,8 +1314,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle, fe->i_mode = cpu_to_le16(inode->i_mode); fe->i_atime = cpu_to_le64(inode->i_atime.tv_sec); fe->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); - fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); - fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + fe->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec); + fe->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec); fe->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); @@ -1352,8 +1352,8 @@ void ocfs2_refresh_inode(struct inode *inode, inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec); inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime); inode->i_mtime.tv_nsec = le32_to_cpu(fe->i_mtime_nsec); - inode->i_ctime.tv_sec = le64_to_cpu(fe->i_ctime); - inode->i_ctime.tv_nsec = le32_to_cpu(fe->i_ctime_nsec); + inode_set_ctime(inode, le64_to_cpu(fe->i_ctime), + le32_to_cpu(fe->i_ctime_nsec)); spin_unlock(&OCFS2_I(inode)->ip_lock); } diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 25d8072ccfce..c19c730c26e2 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -557,7 +557,7 @@ static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers, (unsigned long)bh, (unsigned long long)bh->b_blocknr); - ocfs2_error(bh->b_bdev->bd_super, + ocfs2_error(bh->b_assoc_map->host->i_sb, "JBD2 has aborted our journal, ocfs2 cannot continue\n"); } @@ -780,14 +780,14 @@ void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh) mlog_errno(status); if (!is_handle_aborted(handle)) { journal_t *journal = handle->h_transaction->t_journal; - struct super_block *sb = bh->b_bdev->bd_super; mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed. " "Aborting transaction and journal.\n"); handle->h_err = status; jbd2_journal_abort_handle(handle); jbd2_journal_abort(journal, status); - ocfs2_abort(sb, "Journal already aborted.\n"); + ocfs2_abort(bh->b_assoc_map->host->i_sb, + "Journal already aborted.\n"); } } } diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index b1e32ec4a9d4..05d67968a3a9 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -950,9 +950,9 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context) } di = (struct ocfs2_dinode *)di_bh->b_data; - inode->i_ctime = current_time(inode); - di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); - di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + inode_set_ctime_current(inode); + di->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec); + di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec); ocfs2_update_inode_fsync_trans(handle, inode, 0); ocfs2_journal_dirty(handle, di_bh); diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 17c52225b87d..e4a684d45308 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -793,10 +793,10 @@ static int ocfs2_link(struct dentry *old_dentry, } inc_nlink(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); ocfs2_set_links_count(fe, inode->i_nlink); - fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); - fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + fe->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec); + fe->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec); ocfs2_journal_dirty(handle, fe_bh); err = ocfs2_add_entry(handle, dentry, inode, @@ -995,7 +995,7 @@ static int ocfs2_unlink(struct inode *dir, ocfs2_set_links_count(fe, inode->i_nlink); ocfs2_journal_dirty(handle, fe_bh); - dir->i_ctime = dir->i_mtime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); if (S_ISDIR(inode->i_mode)) drop_nlink(dir); @@ -1537,7 +1537,7 @@ static int ocfs2_rename(struct mnt_idmap *idmap, new_dir_bh, &target_insert); } - old_inode->i_ctime = current_time(old_inode); + inode_set_ctime_current(old_inode); mark_inode_dirty(old_inode); status = ocfs2_journal_access_di(handle, INODE_CACHE(old_inode), @@ -1546,8 +1546,8 @@ static int ocfs2_rename(struct mnt_idmap *idmap, if (status >= 0) { old_di = (struct ocfs2_dinode *) old_inode_bh->b_data; - old_di->i_ctime = cpu_to_le64(old_inode->i_ctime.tv_sec); - old_di->i_ctime_nsec = cpu_to_le32(old_inode->i_ctime.tv_nsec); + old_di->i_ctime = cpu_to_le64(inode_get_ctime(old_inode).tv_sec); + old_di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(old_inode).tv_nsec); ocfs2_journal_dirty(handle, old_inode_bh); } else mlog_errno(status); @@ -1586,9 +1586,9 @@ static int ocfs2_rename(struct mnt_idmap *idmap, if (new_inode) { drop_nlink(new_inode); - new_inode->i_ctime = current_time(new_inode); + inode_set_ctime_current(new_inode); } - old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir); + old_dir->i_mtime = inode_set_ctime_current(old_dir); if (update_dot_dot) { status = ocfs2_update_entry(old_inode, handle, @@ -1610,7 +1610,8 @@ static int ocfs2_rename(struct mnt_idmap *idmap, if (old_dir != new_dir) { /* Keep the same times on both directories.*/ - new_dir->i_ctime = new_dir->i_mtime = old_dir->i_ctime; + new_dir->i_mtime = inode_set_ctime_to_ts(new_dir, + inode_get_ctime(old_dir)); /* * This will also pick up the i_nlink change from the diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 564ab48d03ef..25c8ec3c8c3a 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -3750,9 +3750,9 @@ static int ocfs2_change_ctime(struct inode *inode, goto out_commit; } - inode->i_ctime = current_time(inode); - di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); - di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + inode_set_ctime_current(inode); + di->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec); + di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec); ocfs2_journal_dirty(handle, di_bh); @@ -4073,10 +4073,10 @@ static int ocfs2_complete_reflink(struct inode *s_inode, * we want mtime to appear identical to the source and * update ctime. */ - t_inode->i_ctime = current_time(t_inode); + inode_set_ctime_current(t_inode); - di->i_ctime = cpu_to_le64(t_inode->i_ctime.tv_sec); - di->i_ctime_nsec = cpu_to_le32(t_inode->i_ctime.tv_nsec); + di->i_ctime = cpu_to_le64(inode_get_ctime(t_inode).tv_sec); + di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(t_inode).tv_nsec); t_inode->i_mtime = s_inode->i_mtime; di->i_mtime = s_di->i_mtime; @@ -4456,7 +4456,7 @@ int ocfs2_reflink_update_dest(struct inode *dest, if (newlen > i_size_read(dest)) i_size_write(dest, newlen); spin_unlock(&OCFS2_I(dest)->ip_lock); - dest->i_ctime = dest->i_mtime = current_time(dest); + dest->i_mtime = inode_set_ctime_current(dest); ret = ocfs2_mark_inode_dirty(handle, dest, d_bh); if (ret) { diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 4ac77ff6e676..6510ad783c91 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -3421,9 +3421,9 @@ static int __ocfs2_xattr_set_handle(struct inode *inode, goto out; } - inode->i_ctime = current_time(inode); - di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); - di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + inode_set_ctime_current(inode); + di->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec); + di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec); ocfs2_journal_dirty(ctxt->handle, xis->inode_bh); } out: diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index 82cf7e9a665f..6bda275826d6 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c @@ -143,7 +143,7 @@ static int omfs_add_link(struct dentry *dentry, struct inode *inode) mark_buffer_dirty(bh); brelse(bh); - dir->i_ctime = current_time(dir); + inode_set_ctime_current(dir); /* mark affected inodes dirty to rebuild checksums */ mark_inode_dirty(dir); @@ -399,7 +399,7 @@ static int omfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (err) goto out; - old_inode->i_ctime = current_time(old_inode); + inode_set_ctime_current(old_inode); mark_inode_dirty(old_inode); out: return err; diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index c4c79e07efc7..2f8c1882f45c 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -51,7 +51,7 @@ struct inode *omfs_new_inode(struct inode *dir, umode_t mode) inode_init_owner(&nop_mnt_idmap, inode, NULL, mode); inode->i_mapping->a_ops = &omfs_aops; - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); switch (mode & S_IFMT) { case S_IFDIR: inode->i_op = &omfs_dir_inops; @@ -134,8 +134,8 @@ static int __omfs_write_inode(struct inode *inode, int wait) oi->i_head.h_magic = OMFS_IMAGIC; oi->i_size = cpu_to_be64(inode->i_size); - ctime = inode->i_ctime.tv_sec * 1000LL + - ((inode->i_ctime.tv_nsec + 999)/1000); + ctime = inode_get_ctime(inode).tv_sec * 1000LL + + ((inode_get_ctime(inode).tv_nsec + 999)/1000); oi->i_ctime = cpu_to_be64(ctime); omfs_update_checksums(oi); @@ -232,10 +232,9 @@ struct inode *omfs_iget(struct super_block *sb, ino_t ino) inode->i_atime.tv_sec = ctime; inode->i_mtime.tv_sec = ctime; - inode->i_ctime.tv_sec = ctime; + inode_set_ctime(inode, ctime, nsecs); inode->i_atime.tv_nsec = nsecs; inode->i_mtime.tv_nsec = nsecs; - inode->i_ctime.tv_nsec = nsecs; inode->i_mapping->a_ops = &omfs_aops; diff --git a/fs/open.c b/fs/open.c index b8883ec286f5..98f6601fbac6 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1165,7 +1165,7 @@ EXPORT_SYMBOL_GPL(kernel_file_open); * backing_file_open - open a backing file for kernel internal use * @path: path of the file to open * @flags: open flags - * @path: path of the backing file + * @real_path: path of the backing file * @cred: credentials for open * * Open a backing file for a stackable filesystem (e.g., overlayfs). @@ -1337,7 +1337,7 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op) lookup_flags |= LOOKUP_IN_ROOT; if (how->resolve & RESOLVE_CACHED) { /* Don't bother even trying for create/truncate/tmpfile open */ - if (flags & (O_TRUNC | O_CREAT | O_TMPFILE)) + if (flags & (O_TRUNC | O_CREAT | __O_TMPFILE)) return -EAGAIN; lookup_flags |= LOOKUP_CACHED; } @@ -1518,7 +1518,7 @@ SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode) * "id" is the POSIX thread ID. We use the * files pointer for this.. */ -int filp_close(struct file *filp, fl_owner_t id) +static int filp_flush(struct file *filp, fl_owner_t id) { int retval = 0; @@ -1535,10 +1535,18 @@ int filp_close(struct file *filp, fl_owner_t id) dnotify_flush(filp, id); locks_remove_posix(filp, id); } - fput(filp); return retval; } +int filp_close(struct file *filp, fl_owner_t id) +{ + int retval; + + retval = filp_flush(filp, id); + fput(filp); + + return retval; +} EXPORT_SYMBOL(filp_close); /* @@ -1548,7 +1556,20 @@ EXPORT_SYMBOL(filp_close); */ SYSCALL_DEFINE1(close, unsigned int, fd) { - int retval = close_fd(fd); + int retval; + struct file *file; + + file = close_fd_get_file(fd); + if (!file) + return -EBADF; + + retval = filp_flush(file, current->files); + + /* + * We're returning to user space. Don't bother + * with any delayed fput() cases. + */ + __fput_sync(file); /* can't restart close syscall because file table entry was cleared */ if (unlikely(retval == -ERESTARTSYS || @@ -1561,7 +1582,7 @@ SYSCALL_DEFINE1(close, unsigned int, fd) } /** - * close_range() - Close all file descriptors in a given range. + * sys_close_range() - Close all file descriptors in a given range. * * @fd: starting file descriptor to close * @max_fd: last file descriptor to close diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index f0b7f4d51a17..b2457cb97fa0 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -237,7 +237,7 @@ found: if (IS_ERR(inode)) return ERR_CAST(inode); if (inode->i_state & I_NEW) { - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); ent_oi = OP_I(inode); ent_oi->type = ent_type; ent_oi->u = ent_data; @@ -387,8 +387,7 @@ static int openprom_fill_super(struct super_block *s, struct fs_context *fc) goto out_no_root; } - root_inode->i_mtime = root_inode->i_atime = - root_inode->i_ctime = current_time(root_inode); + root_inode->i_mtime = root_inode->i_atime = inode_set_ctime_current(root_inode); root_inode->i_op = &openprom_inode_operations; root_inode->i_fop = &openprom_operations; root_inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 9014bbcc8031..085912268442 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -871,7 +871,7 @@ int orangefs_getattr(struct mnt_idmap *idmap, const struct path *path, ret = orangefs_inode_getattr(inode, request_mask & STATX_SIZE ? ORANGEFS_GETATTR_SIZE : 0); if (ret == 0) { - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); /* override block size reported to stat */ if (!(request_mask & STATX_SIZE)) @@ -900,12 +900,13 @@ int orangefs_permission(struct mnt_idmap *idmap, return generic_permission(&nop_mnt_idmap, inode, mask); } -int orangefs_update_time(struct inode *inode, struct timespec64 *time, int flags) +int orangefs_update_time(struct inode *inode, int flags) { struct iattr iattr; + gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_update_time: %pU\n", get_khandle_from_ino(inode)); - generic_update_time(inode, time, flags); + flags = generic_update_time(inode, flags); memset(&iattr, 0, sizeof iattr); if (flags & S_ATIME) iattr.ia_valid |= ATTR_ATIME; diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index 77518e248cf7..c9dfd5c6a097 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -421,7 +421,7 @@ static int orangefs_rename(struct mnt_idmap *idmap, ret); if (new_dentry->d_inode) - new_dentry->d_inode->i_ctime = current_time(new_dentry->d_inode); + inode_set_ctime_current(d_inode(new_dentry)); op_release(new_op); return ret; diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index ce20d3443869..b711654ca18a 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -370,7 +370,7 @@ int orangefs_getattr(struct mnt_idmap *idmap, const struct path *path, int orangefs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask); -int orangefs_update_time(struct inode *, struct timespec64 *, int); +int orangefs_update_time(struct inode *, int); /* * defined in xattr.c diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index 46b7dcff18ac..0a9fcfdf552f 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -361,11 +361,11 @@ again2: downcall.resp.getattr.attributes.atime; inode->i_mtime.tv_sec = (time64_t)new_op-> downcall.resp.getattr.attributes.mtime; - inode->i_ctime.tv_sec = (time64_t)new_op-> - downcall.resp.getattr.attributes.ctime; + inode_set_ctime(inode, + (time64_t)new_op->downcall.resp.getattr.attributes.ctime, + 0); inode->i_atime.tv_nsec = 0; inode->i_mtime.tv_nsec = 0; - inode->i_ctime.tv_nsec = 0; /* special case: mark the root inode as sticky */ inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) | diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 21245b00722a..eaa1e6b3e04a 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -239,6 +239,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) static void ovl_file_accessed(struct file *file) { struct inode *inode, *upperinode; + struct timespec64 ctime, uctime; if (file->f_flags & O_NOATIME) return; @@ -249,10 +250,12 @@ static void ovl_file_accessed(struct file *file) if (!upperinode) return; + ctime = inode_get_ctime(inode); + uctime = inode_get_ctime(upperinode); if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) || - !timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) { + !timespec64_equal(&ctime, &uctime))) { inode->i_mtime = upperinode->i_mtime; - inode->i_ctime = upperinode->i_ctime; + inode_set_ctime_to_ts(inode, uctime); } touch_atime(&file->f_path); @@ -290,10 +293,7 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) if (iocb->ki_flags & IOCB_WRITE) { struct inode *inode = file_inode(orig_iocb->ki_filp); - /* Actually acquired in ovl_write_iter() */ - __sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb, - SB_FREEZE_WRITE); - file_end_write(iocb->ki_filp); + kiocb_end_write(iocb); ovl_copyattr(inode); } @@ -409,10 +409,6 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) if (!aio_req) goto out; - file_start_write(real.file); - /* Pacify lockdep, same trick as done in aio_write() */ - __sb_writers_release(file_inode(real.file)->i_sb, - SB_FREEZE_WRITE); aio_req->fd = real; real.flags = 0; aio_req->orig_iocb = iocb; @@ -420,6 +416,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) aio_req->iocb.ki_flags = ifl; aio_req->iocb.ki_complete = ovl_aio_rw_complete; refcount_set(&aio_req->ref, 2); + kiocb_start_write(&aio_req->iocb); ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter); ovl_aio_put(aio_req); if (ret != -EIOCBQUEUED) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index a63e57447be9..f22e27b78025 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -693,7 +693,7 @@ int ovl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, } #endif -int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags) +int ovl_update_time(struct inode *inode, int flags) { if (flags & S_ATIME) { struct ovl_fs *ofs = inode->i_sb->s_fs_info; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 9402591f12aa..8bbe6173bef4 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -665,7 +665,7 @@ static inline struct posix_acl *ovl_get_acl_path(const struct path *path, } #endif -int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags); +int ovl_update_time(struct inode *inode, int flags); bool ovl_is_private_xattr(struct super_block *sb, const char *name); struct ovl_inode_params { diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index ee5c4736480f..de39e067ae65 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -954,10 +954,11 @@ static int ovl_dir_open(struct inode *inode, struct file *file) return 0; } +WRAP_DIR_ITER(ovl_iterate) // FIXME! const struct file_operations ovl_dir_operations = { .read = generic_read_dir, .open = ovl_dir_open, - .iterate = ovl_iterate, + .iterate_shared = shared_ovl_iterate, .llseek = ovl_dir_llseek, .fsync = ovl_dir_fsync, .release = ovl_dir_release, diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 5b069f1a1e44..cc8977498c48 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1460,7 +1460,7 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc) ovl_trusted_xattr_handlers; sb->s_fs_info = ofs; sb->s_flags |= SB_POSIXACL; - sb->s_iflags |= SB_I_SKIP_SYNC; + sb->s_iflags |= SB_I_SKIP_SYNC | SB_I_IMA_UNVERIFIABLE_SIGNATURE; err = -ENOMEM; root_dentry = ovl_get_root(sb, ctx->upper.dentry, oe); diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 7ef9e13c404a..c210b5d496a8 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -1202,6 +1202,6 @@ void ovl_copyattr(struct inode *inode) inode->i_mode = realinode->i_mode; inode->i_atime = realinode->i_atime; inode->i_mtime = realinode->i_mtime; - inode->i_ctime = realinode->i_ctime; + inode_set_ctime_to_ts(inode, inode_get_ctime(realinode)); i_size_write(inode, i_size_read(realinode)); } diff --git a/fs/pipe.c b/fs/pipe.c index 2d88f73f585a..6c1a9b1db907 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -489,7 +489,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) head = pipe->head; if (!pipe_full(head, pipe->tail, pipe->max_usage)) { unsigned int mask = pipe->ring_size - 1; - struct pipe_buffer *buf = &pipe->bufs[head & mask]; + struct pipe_buffer *buf; struct page *page = pipe->tmp_page; int copied; @@ -899,7 +899,7 @@ static struct inode * get_pipe_inode(void) inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); return inode; @@ -1236,7 +1236,7 @@ const struct file_operations pipefifo_fops = { * Currently we rely on the pipe array holding a power-of-2 number * of pages. Returns 0 on error. */ -unsigned int round_pipe_size(unsigned long size) +unsigned int round_pipe_size(unsigned int size) { if (size > (1U << 31)) return 0; @@ -1319,7 +1319,7 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) * Allocate a new array of pipe buffers and copy the info over. Returns the * pipe size if successful, or return -ERROR on error. */ -static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) +static long pipe_set_size(struct pipe_inode_info *pipe, unsigned int arg) { unsigned long user_bufs; unsigned int nr_slots, size; @@ -1387,7 +1387,7 @@ struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice) return pipe; } -long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) +long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg) { struct pipe_inode_info *pipe; long ret; diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 7fa1b738bbab..a05fe94970ce 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -1027,7 +1027,7 @@ int simple_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, return error; } - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); if (IS_I_VERSION(inode)) inode_inc_iversion(inode); set_cached_acl(inode, type, acl); diff --git a/fs/proc/base.c b/fs/proc/base.c index 05452c3b9872..2c009d5d9282 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1902,7 +1902,7 @@ struct inode *proc_pid_make_inode(struct super_block *sb, ei = PROC_I(inode); inode->i_mode = mode; inode->i_ino = get_next_ino(); - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); inode->i_op = &proc_def_inode_operations; /* @@ -1966,7 +1966,7 @@ int pid_getattr(struct mnt_idmap *idmap, const struct path *path, struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb); struct task_struct *task; - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); stat->uid = GLOBAL_ROOT_UID; stat->gid = GLOBAL_ROOT_GID; @@ -2817,7 +2817,7 @@ static int proc_##LSM##_attr_dir_iterate(struct file *filp, \ \ static const struct file_operations proc_##LSM##_attr_dir_ops = { \ .read = generic_read_dir, \ - .iterate = proc_##LSM##_attr_dir_iterate, \ + .iterate_shared = proc_##LSM##_attr_dir_iterate, \ .llseek = default_llseek, \ }; \ \ @@ -3899,7 +3899,7 @@ static int proc_task_getattr(struct mnt_idmap *idmap, { struct inode *inode = d_inode(path->dentry); struct task_struct *p = get_proc_task(inode); - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); if (p) { stat->nlink += get_nr_threads(p); diff --git a/fs/proc/fd.c b/fs/proc/fd.c index b3140deebbbf..6276b3938842 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -352,7 +352,7 @@ static int proc_fd_getattr(struct mnt_idmap *idmap, struct inode *inode = d_inode(path->dentry); int rv = 0; - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); /* If it's a directory, put the number of open fds there */ if (S_ISDIR(inode->i_mode)) { diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 42ae38ff6e7e..775ce0bcf08c 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -146,7 +146,7 @@ static int proc_getattr(struct mnt_idmap *idmap, } } - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); return 0; } diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 67b09a1d9433..532dc9d240f7 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -660,7 +660,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) inode->i_private = de->data; inode->i_ino = de->low_ino; - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); PROC_I(inode)->pde = de; if (is_empty_pde(de)) { make_empty_dir_inode(inode); diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 9cb32e1a78a0..23fc24d16b31 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -309,6 +309,8 @@ static void append_kcore_note(char *notes, size_t *i, const char *name, static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter) { + struct file *file = iocb->ki_filp; + char *buf = file->private_data; loff_t *fpos = &iocb->ki_pos; size_t phdrs_offset, notes_offset, data_offset; size_t page_offline_frozen = 1; @@ -555,10 +557,21 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter) case KCORE_VMEMMAP: case KCORE_TEXT: /* - * We use _copy_to_iter() to bypass usermode hardening - * which would otherwise prevent this operation. + * Sadly we must use a bounce buffer here to be able to + * make use of copy_from_kernel_nofault(), as these + * memory regions might not always be mapped on all + * architectures. */ - if (_copy_to_iter((char *)start, tsz, iter) != tsz) { + if (copy_from_kernel_nofault(buf, (void *)start, tsz)) { + if (iov_iter_zero(tsz, iter) != tsz) { + ret = -EFAULT; + goto out; + } + /* + * We know the bounce buffer is safe to copy from, so + * use _copy_to_iter() directly. + */ + } else if (_copy_to_iter(buf, tsz, iter) != tsz) { ret = -EFAULT; goto out; } @@ -595,6 +608,10 @@ static int open_kcore(struct inode *inode, struct file *filp) if (ret) return ret; + filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!filp->private_data) + return -ENOMEM; + if (kcore_need_update) kcore_update_ram(); if (i_size_read(inode) != proc_root_kcore->size) { @@ -605,9 +622,16 @@ static int open_kcore(struct inode *inode, struct file *filp) return 0; } +static int release_kcore(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + static const struct proc_ops kcore_proc_ops = { .proc_read_iter = read_kcore_iter, .proc_open = open_kcore, + .proc_release = release_kcore, .proc_lseek = default_llseek, }; diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index a0c0419872e3..75f35f128e63 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -308,7 +308,7 @@ static int proc_tgid_net_getattr(struct mnt_idmap *idmap, net = get_proc_task_net(inode); - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); if (net != NULL) { stat->nlink = net->proc_net->nlink; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 5ea42653126e..bf06344a42cc 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -463,7 +463,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, head->count++; spin_unlock(&sysctl_lock); - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); inode->i_mode = table->mode; if (!S_ISDIR(table->mode)) { inode->i_mode |= S_IFREG; @@ -849,7 +849,7 @@ static int proc_sys_getattr(struct mnt_idmap *idmap, if (IS_ERR(head)) return PTR_ERR(head); - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); if (table) stat->mode = (stat->mode & S_IFMT) | table->mode; diff --git a/fs/proc/root.c b/fs/proc/root.c index a86e65a608da..9191248f2dac 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -314,7 +314,8 @@ static int proc_root_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { - generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat); + generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(path->dentry), + stat); stat->nlink = proc_root.nlink + nr_processes(); return 0; } diff --git a/fs/proc/self.c b/fs/proc/self.c index 72cd69bcaf4a..ecc4da8d265e 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -46,7 +46,7 @@ int proc_setup_self(struct super_block *s) struct inode *inode = new_inode(s); if (inode) { inode->i_ino = self_inum; - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); inode->i_mode = S_IFLNK | S_IRWXUGO; inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 507cd4e59d07..fafff1bd34cd 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -587,8 +587,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, bool migration = false; if (pmd_present(*pmd)) { - /* FOLL_DUMP will return -EFAULT on huge zero page */ - page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP); + page = vm_normal_page_pmd(vma, addr, *pmd); } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) { swp_entry_t entry = pmd_to_swp_entry(*pmd); @@ -758,12 +757,14 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, static const struct mm_walk_ops smaps_walk_ops = { .pmd_entry = smaps_pte_range, .hugetlb_entry = smaps_hugetlb_range, + .walk_lock = PGWALK_RDLOCK, }; static const struct mm_walk_ops smaps_shmem_walk_ops = { .pmd_entry = smaps_pte_range, .hugetlb_entry = smaps_hugetlb_range, .pte_hole = smaps_pte_hole, + .walk_lock = PGWALK_RDLOCK, }; /* @@ -1245,6 +1246,7 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end, static const struct mm_walk_ops clear_refs_walk_ops = { .pmd_entry = clear_refs_pte_range, .test_walk = clear_refs_test_walk, + .walk_lock = PGWALK_WRLOCK, }; static ssize_t clear_refs_write(struct file *file, const char __user *buf, @@ -1622,6 +1624,7 @@ static const struct mm_walk_ops pagemap_ops = { .pmd_entry = pagemap_pmd_range, .pte_hole = pagemap_pte_hole, .hugetlb_entry = pagemap_hugetlb_range, + .walk_lock = PGWALK_RDLOCK, }; /* @@ -1935,6 +1938,7 @@ static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, static const struct mm_walk_ops show_numa_ops = { .hugetlb_entry = gather_hugetlb_stats, .pmd_entry = gather_pte_stats, + .walk_lock = PGWALK_RDLOCK, }; /* diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c index a553273fbd41..63ac1f93289f 100644 --- a/fs/proc/thread_self.c +++ b/fs/proc/thread_self.c @@ -46,7 +46,7 @@ int proc_setup_thread_self(struct super_block *s) struct inode *inode = new_inode(s); if (inode) { inode->i_ino = thread_self_inum; - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); inode->i_mode = S_IFLNK | S_IRWXUGO; inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index cb80a7703d58..1fb213f379a5 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -132,7 +132,7 @@ ssize_t read_from_oldmem(struct iov_iter *iter, size_t count, u64 *ppos, bool encrypted) { unsigned long pfn, offset; - size_t nr_bytes; + ssize_t nr_bytes; ssize_t read = 0, tmp; int idx; diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index ffbadb8b3032..a756af980d0c 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -223,7 +223,7 @@ static struct inode *pstore_get_inode(struct super_block *sb) struct inode *inode = new_inode(sb); if (inode) { inode->i_ino = get_next_ino(); - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); } return inode; } @@ -390,7 +390,7 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) inode->i_private = private; if (record->time.tv_sec) - inode->i_mtime = inode->i_ctime = record->time; + inode->i_mtime = inode_set_ctime_to_ts(inode, record->time); d_add(dentry, inode); diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 391ea402920d..a7171f5532a1 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -305,8 +305,7 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino) inode->i_mtime.tv_nsec = 0; inode->i_atime.tv_sec = le32_to_cpu(raw_inode->di_atime); inode->i_atime.tv_nsec = 0; - inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->di_ctime); - inode->i_ctime.tv_nsec = 0; + inode_set_ctime(inode, le32_to_cpu(raw_inode->di_ctime), 0); inode->i_blocks = le32_to_cpu(raw_inode->di_first_xtnt.xtnt_size); memcpy(qnx4_inode, raw_inode, QNX4_DIR_ENTRY_SIZE); diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 85b2fa3b211c..21f90d519f1a 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -562,8 +562,7 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino) inode->i_mtime.tv_nsec = 0; inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->di_atime); inode->i_atime.tv_nsec = 0; - inode->i_ctime.tv_sec = fs32_to_cpu(sbi, raw_inode->di_ctime); - inode->i_ctime.tv_nsec = 0; + inode_set_ctime(inode, fs32_to_cpu(sbi, raw_inode->di_ctime), 0); /* calc blocks based on 512 byte blocksize */ inode->i_blocks = (inode->i_size + 511) >> 9; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index e3e4f4047657..4d826c369da2 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2367,7 +2367,7 @@ int dquot_load_quota_sb(struct super_block *sb, int type, int format_id, if (!fmt) return -ESRCH; - if (!sb->s_op->quota_write || !sb->s_op->quota_read || + if (!sb->dq_op || !sb->s_qcop || (type == PRJQUOTA && sb->dq_op->get_projid == NULL)) { error = -EINVAL; goto out_fmt; diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index fef477c78107..18e8387cab41 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -65,7 +65,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, inode->i_mapping->a_ops = &ram_aops; mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); mapping_set_unevictable(inode->i_mapping); - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); switch (mode & S_IFMT) { default: init_special_inode(inode, mode, dev); @@ -105,7 +105,7 @@ ramfs_mknod(struct mnt_idmap *idmap, struct inode *dir, d_instantiate(dentry, inode); dget(dentry); /* Extra count - pin the dentry in core */ error = 0; - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); } return error; } @@ -138,7 +138,7 @@ static int ramfs_symlink(struct mnt_idmap *idmap, struct inode *dir, if (!error) { d_instantiate(dentry, inode); dget(dentry); - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); } else iput(inode); } diff --git a/fs/read_write.c b/fs/read_write.c index b07de77ef126..4771701c896b 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -71,7 +71,7 @@ EXPORT_SYMBOL(vfs_setpos); * @file: file structure to seek on * @offset: file offset to seek to * @whence: type of seek - * @size: max size of this file in file system + * @maxsize: max size of this file in file system * @eof: offset used for SEEK_END position * * This is a variant of generic_file_llseek that allows passing in a custom diff --git a/fs/readdir.c b/fs/readdir.c index b264ce60114d..c8c46e294431 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -25,6 +25,53 @@ #include <asm/unaligned.h> /* + * Some filesystems were never converted to '->iterate_shared()' + * and their directory iterators want the inode lock held for + * writing. This wrapper allows for converting from the shared + * semantics to the exclusive inode use. + */ +int wrap_directory_iterator(struct file *file, + struct dir_context *ctx, + int (*iter)(struct file *, struct dir_context *)) +{ + struct inode *inode = file_inode(file); + int ret; + + /* + * We'd love to have an 'inode_upgrade_trylock()' operation, + * see the comment in mmap_upgrade_trylock() in mm/memory.c. + * + * But considering this is for "filesystems that never got + * converted", it really doesn't matter. + * + * Also note that since we have to return with the lock held + * for reading, we can't use the "killable()" locking here, + * since we do need to get the lock even if we're dying. + * + * We could do the write part killably and then get the read + * lock unconditionally if it mattered, but see above on why + * this does the very simplistic conversion. + */ + up_read(&inode->i_rwsem); + down_write(&inode->i_rwsem); + + /* + * Since we dropped the inode lock, we should do the + * DEADDIR test again. See 'iterate_dir()' below. + * + * Note that we don't need to re-do the f_pos games, + * since the file must be locked wrt f_pos anyway. + */ + ret = -ENOENT; + if (!IS_DEADDIR(inode)) + ret = iter(file, ctx); + + downgrade_write(&inode->i_rwsem); + return ret; +} +EXPORT_SYMBOL(wrap_directory_iterator); + +/* * Note the "unsafe_put_user() semantics: we goto a * label for errors. */ @@ -40,39 +87,28 @@ int iterate_dir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); - bool shared = false; int res = -ENOTDIR; - if (file->f_op->iterate_shared) - shared = true; - else if (!file->f_op->iterate) + + if (!file->f_op->iterate_shared) goto out; res = security_file_permission(file, MAY_READ); if (res) goto out; - if (shared) - res = down_read_killable(&inode->i_rwsem); - else - res = down_write_killable(&inode->i_rwsem); + res = down_read_killable(&inode->i_rwsem); if (res) goto out; res = -ENOENT; if (!IS_DEADDIR(inode)) { ctx->pos = file->f_pos; - if (shared) - res = file->f_op->iterate_shared(file, ctx); - else - res = file->f_op->iterate(file, ctx); + res = file->f_op->iterate_shared(file, ctx); file->f_pos = ctx->pos; fsnotify_access(file); file_accessed(file); } - if (shared) - inode_unlock_shared(inode); - else - inode_unlock(inode); + inode_unlock_shared(inode); out: return res; } diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 77bd3b27059f..86e55d4bb10d 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1259,9 +1259,8 @@ static void init_inode(struct inode *inode, struct treepath *path) inode->i_size = sd_v1_size(sd); inode->i_atime.tv_sec = sd_v1_atime(sd); inode->i_mtime.tv_sec = sd_v1_mtime(sd); - inode->i_ctime.tv_sec = sd_v1_ctime(sd); + inode_set_ctime(inode, sd_v1_ctime(sd), 0); inode->i_atime.tv_nsec = 0; - inode->i_ctime.tv_nsec = 0; inode->i_mtime.tv_nsec = 0; inode->i_blocks = sd_v1_blocks(sd); @@ -1314,8 +1313,7 @@ static void init_inode(struct inode *inode, struct treepath *path) i_gid_write(inode, sd_v2_gid(sd)); inode->i_mtime.tv_sec = sd_v2_mtime(sd); inode->i_atime.tv_sec = sd_v2_atime(sd); - inode->i_ctime.tv_sec = sd_v2_ctime(sd); - inode->i_ctime.tv_nsec = 0; + inode_set_ctime(inode, sd_v2_ctime(sd), 0); inode->i_mtime.tv_nsec = 0; inode->i_atime.tv_nsec = 0; inode->i_blocks = sd_v2_blocks(sd); @@ -1374,7 +1372,7 @@ static void inode2sd(void *sd, struct inode *inode, loff_t size) set_sd_v2_gid(sd_v2, i_gid_read(inode)); set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec); set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec); - set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec); + set_sd_v2_ctime(sd_v2, inode_get_ctime(inode).tv_sec); set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE)); if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev)); @@ -1394,7 +1392,7 @@ static void inode2sd_v1(void *sd, struct inode *inode, loff_t size) set_sd_v1_nlink(sd_v1, inode->i_nlink); set_sd_v1_size(sd_v1, size); set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec); - set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec); + set_sd_v1_ctime(sd_v1, inode_get_ctime(inode).tv_sec); set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec); if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) @@ -1986,7 +1984,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, /* uid and gid must already be set by the caller for quota init */ - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); inode->i_size = i_size; inode->i_blocks = 0; inode->i_bytes = 0; diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 6bf9b54e58ca..dd33f8cc6eda 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -55,7 +55,7 @@ int reiserfs_fileattr_set(struct mnt_idmap *idmap, } sd_attrs_to_i_attrs(flags, inode); REISERFS_I(inode)->i_attrs = flags; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); mark_inode_dirty(inode); err = 0; unlock: @@ -107,7 +107,7 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) err = -EFAULT; goto setversion_out; } - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); mark_inode_dirty(inode); setversion_out: mnt_drop_write_file(filp); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 479aa4a57602..015bfe4e4524 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2326,7 +2326,7 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev, int i, j; bh = __getblk(dev, block, bufsize); - if (buffer_uptodate(bh)) + if (!bh || buffer_uptodate(bh)) return (bh); if (block + BUFNR > max_block) { @@ -2336,6 +2336,8 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev, j = 1; for (i = 1; i < blocks; i++) { bh = __getblk(dev, block + i, bufsize); + if (!bh) + break; if (buffer_uptodate(bh)) { brelse(bh); break; diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 52240cc891cf..9c5704be2435 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -572,7 +572,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th, } dir->i_size += paste_size; - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); if (!S_ISDIR(inode->i_mode) && visible) /* reiserfs_mkdir or reiserfs_rename will do that by itself */ reiserfs_update_sd(th, dir); @@ -966,7 +966,8 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry) inode->i_nlink); clear_nlink(inode); - inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(dir); + dir->i_mtime = inode_set_ctime_to_ts(dir, + inode_set_ctime_current(inode)); reiserfs_update_sd(&th, inode); DEC_DIR_INODE_NLINK(dir) @@ -1070,11 +1071,11 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry) inc_nlink(inode); goto end_unlink; } - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); reiserfs_update_sd(&th, inode); dir->i_size -= (de.de_entrylen + DEH_SIZE); - dir->i_ctime = dir->i_mtime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); reiserfs_update_sd(&th, dir); if (!savelink) @@ -1250,7 +1251,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir, return err ? err : retval; } - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); reiserfs_update_sd(&th, inode); ihold(inode); @@ -1325,7 +1326,6 @@ static int reiserfs_rename(struct mnt_idmap *idmap, int jbegin_count; umode_t old_inode_mode; unsigned long savelink = 1; - struct timespec64 ctime; if (flags & ~RENAME_NOREPLACE) return -EINVAL; @@ -1576,14 +1576,11 @@ static int reiserfs_rename(struct mnt_idmap *idmap, mark_de_hidden(old_de.de_deh + old_de.de_entry_num); journal_mark_dirty(&th, old_de.de_bh); - ctime = current_time(old_dir); - old_dir->i_ctime = old_dir->i_mtime = ctime; - new_dir->i_ctime = new_dir->i_mtime = ctime; /* * thanks to Alex Adriaanse <alex_a@caltech.edu> for patch * which adds ctime update of renamed object */ - old_inode->i_ctime = ctime; + simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); if (new_dentry_inode) { /* adjust link number of the victim */ @@ -1592,7 +1589,6 @@ static int reiserfs_rename(struct mnt_idmap *idmap, } else { drop_nlink(new_dentry_inode); } - new_dentry_inode->i_ctime = ctime; savelink = new_dentry_inode->i_nlink; } diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index ce5003986789..3676e02a0232 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -2004,7 +2004,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, if (update_timestamps) { inode->i_mtime = current_time(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); } reiserfs_update_sd(th, inode); @@ -2029,7 +2029,7 @@ update_and_out: if (update_timestamps) { /* this is truncate, not file closing */ inode->i_mtime = current_time(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); } reiserfs_update_sd(th, inode); diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 929acce6e731..7eaf36b3de12 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2587,7 +2587,7 @@ out: return err; if (inode->i_size < off + len - towrite) i_size_write(inode, off + len - towrite); - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); mark_inode_dirty(inode); return len - towrite; } diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 651027967159..6000964c2b80 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -466,12 +466,13 @@ int reiserfs_commit_write(struct file *f, struct page *page, static void update_ctime(struct inode *inode) { struct timespec64 now = current_time(inode); + struct timespec64 ctime = inode_get_ctime(inode); if (inode_unhashed(inode) || !inode->i_nlink || - timespec64_equal(&inode->i_ctime, &now)) + timespec64_equal(&ctime, &now)) return; - inode->i_ctime = current_time(inode); + inode_set_ctime_to_ts(inode, now); mark_inode_dirty(inode); } diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 138060452678..064264992b49 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -285,7 +285,7 @@ __reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, if (error == -ENODATA) { error = 0; if (type == ACL_TYPE_ACCESS) { - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); mark_inode_dirty(inode); } } diff --git a/fs/romfs/super.c b/fs/romfs/super.c index c59b230d55b4..5c35f6c76037 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -322,8 +322,7 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos) set_nlink(i, 1); /* Hard to decide.. */ i->i_size = be32_to_cpu(ri.size); - i->i_mtime.tv_sec = i->i_atime.tv_sec = i->i_ctime.tv_sec = 0; - i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0; + i->i_mtime = i->i_atime = inode_set_ctime(i, 0, 0); /* set up mode and ops */ mode = romfs_modemap[nextfh & ROMFH_TYPE]; @@ -583,16 +582,18 @@ static int romfs_init_fs_context(struct fs_context *fc) */ static void romfs_kill_sb(struct super_block *sb) { + generic_shutdown_super(sb); + #ifdef CONFIG_ROMFS_ON_MTD if (sb->s_mtd) { - kill_mtd_super(sb); - return; + put_mtd_device(sb->s_mtd); + sb->s_mtd = NULL; } #endif #ifdef CONFIG_ROMFS_ON_BLOCK if (sb->s_bdev) { - kill_block_super(sb); - return; + sync_blockdev(sb->s_bdev); + blkdev_put(sb->s_bdev, sb); } #endif } diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index fb4162a52844..aec6e9137474 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -153,6 +153,11 @@ cifs_dump_channel(struct seq_file *m, int i, struct cifs_chan *chan) in_flight(server), atomic_read(&server->in_send), atomic_read(&server->num_waiters)); +#ifdef CONFIG_NET_NS + if (server->net) + seq_printf(m, " Net namespace: %u ", server->net->ns.inum); +#endif /* NET_NS */ + } static inline const char *smb_speed_to_str(size_t bps) @@ -430,10 +435,15 @@ skip_rdma: server->reconnect_instance, server->srv_count, server->sec_mode, in_flight(server)); +#ifdef CONFIG_NET_NS + if (server->net) + seq_printf(m, " Net namespace: %u ", server->net->ns.inum); +#endif /* NET_NS */ seq_printf(m, "\nIn Send: %d In MaxReq Wait: %d", atomic_read(&server->in_send), atomic_read(&server->num_waiters)); + if (server->leaf_fullpath) { seq_printf(m, "\nDFS leaf full path: %s", server->leaf_fullpath); diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index a4d8b0ea1c8c..6fc8f43b1c9d 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1077,7 +1077,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence) } static int -cifs_setlease(struct file *file, long arg, struct file_lock **lease, void **priv) +cifs_setlease(struct file *file, int arg, struct file_lock **lease, void **priv) { /* * Note that this is called by vfs setlease with i_lock held to diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h index d7274eefc666..15c8cc4b6680 100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@ -159,6 +159,6 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ /* when changing internal version - update following two lines at same time */ -#define SMB3_PRODUCT_BUILD 43 -#define CIFS_VERSION "2.43" +#define SMB3_PRODUCT_BUILD 44 +#define CIFS_VERSION "2.44" #endif /* _CIFSFS_H */ diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index b5808fe3469a..657dee4b2c8c 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -532,7 +532,7 @@ struct smb_version_operations { /* Check for STATUS_IO_TIMEOUT */ bool (*is_status_io_timeout)(char *buf); /* Check for STATUS_NETWORK_NAME_DELETED */ - void (*is_network_name_deleted)(char *buf, struct TCP_Server_Info *srv); + bool (*is_network_name_deleted)(char *buf, struct TCP_Server_Info *srv); }; struct smb_version_values { @@ -1062,6 +1062,7 @@ struct cifs_ses { unsigned long chans_need_reconnect; /* ========= end: protected by chan_lock ======== */ struct cifs_ses *dfs_root_ses; + struct nls_table *local_nls; }; static inline bool diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 19f7385abeec..25503f1a4fd2 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -129,7 +129,7 @@ again: } spin_unlock(&server->srv_lock); - nls_codepage = load_nls_default(); + nls_codepage = ses->local_nls; /* * need to prevent multiple threads trying to simultaneously @@ -200,7 +200,6 @@ out: rc = -EAGAIN; } - unload_nls(nls_codepage); return rc; } @@ -3184,7 +3183,7 @@ setAclRetry: param_offset = offsetof(struct smb_com_transaction2_spi_req, InformationLevel) - 4; offset = param_offset + params; - parm_data = ((char *) &pSMB->hdr.Protocol) + offset; + parm_data = ((char *)pSMB) + sizeof(pSMB->hdr.smb_buf_length) + offset; pSMB->ParameterOffset = cpu_to_le16(param_offset); /* convert to on the wire format for POSIX ACL */ diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 85dd1b373974..238538dde4e3 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -60,7 +60,7 @@ extern bool disable_legacy_dialects; #define TLINK_IDLE_EXPIRE (600 * HZ) /* Drop the connection to not overload the server */ -#define NUM_STATUS_IO_TIMEOUT 5 +#define MAX_STATUS_IO_TIMEOUT 5 static int ip_connect(struct TCP_Server_Info *server); static int generic_ip_connect(struct TCP_Server_Info *server); @@ -1117,6 +1117,7 @@ cifs_demultiplex_thread(void *p) struct mid_q_entry *mids[MAX_COMPOUND]; char *bufs[MAX_COMPOUND]; unsigned int noreclaim_flag, num_io_timeout = 0; + bool pending_reconnect = false; noreclaim_flag = memalloc_noreclaim_save(); cifs_dbg(FYI, "Demultiplex PID: %d\n", task_pid_nr(current)); @@ -1156,6 +1157,8 @@ cifs_demultiplex_thread(void *p) cifs_dbg(FYI, "RFC1002 header 0x%x\n", pdu_length); if (!is_smb_response(server, buf[0])) continue; + + pending_reconnect = false; next_pdu: server->pdu_size = pdu_length; @@ -1213,10 +1216,13 @@ next_pdu: if (server->ops->is_status_io_timeout && server->ops->is_status_io_timeout(buf)) { num_io_timeout++; - if (num_io_timeout > NUM_STATUS_IO_TIMEOUT) { - cifs_reconnect(server, false); + if (num_io_timeout > MAX_STATUS_IO_TIMEOUT) { + cifs_server_dbg(VFS, + "Number of request timeouts exceeded %d. Reconnecting", + MAX_STATUS_IO_TIMEOUT); + + pending_reconnect = true; num_io_timeout = 0; - continue; } } @@ -1226,9 +1232,14 @@ next_pdu: if (mids[i] != NULL) { mids[i]->resp_buf_size = server->pdu_size; - if (bufs[i] && server->ops->is_network_name_deleted) - server->ops->is_network_name_deleted(bufs[i], - server); + if (bufs[i] != NULL) { + if (server->ops->is_network_name_deleted && + server->ops->is_network_name_deleted(bufs[i], + server)) { + cifs_server_dbg(FYI, + "Share deleted. Reconnect needed"); + } + } if (!mids[i]->multiRsp || mids[i]->multiEnd) mids[i]->callback(mids[i]); @@ -1263,6 +1274,11 @@ next_pdu: buf = server->smallbuf; goto next_pdu; } + + /* do this reconnect at the very end after processing all MIDs */ + if (pending_reconnect) + cifs_reconnect(server, true); + } /* end while !EXITING */ /* buffer usually freed in free_mid - need to free it here on exit */ @@ -1826,6 +1842,10 @@ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx) CIFS_MAX_PASSWORD_LEN)) return 0; } + + if (strcmp(ctx->local_nls->charset, ses->local_nls->charset)) + return 0; + return 1; } @@ -2270,6 +2290,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) ses->sectype = ctx->sectype; ses->sign = ctx->sign; + ses->local_nls = load_nls(ctx->local_nls->charset); /* add server as first channel */ spin_lock(&ses->chan_lock); diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c index 1403a2d1ab17..ee772c3d9f00 100644 --- a/fs/smb/client/dfs.c +++ b/fs/smb/client/dfs.c @@ -66,6 +66,12 @@ static int get_session(struct cifs_mount_ctx *mnt_ctx, const char *full_path) return rc; } +/* + * Track individual DFS referral servers used by new DFS mount. + * + * On success, their lifetime will be shared by final tcon (dfs_ses_list). + * Otherwise, they will be put by dfs_put_root_smb_sessions() in cifs_mount(). + */ static int add_root_smb_session(struct cifs_mount_ctx *mnt_ctx) { struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; @@ -80,11 +86,12 @@ static int add_root_smb_session(struct cifs_mount_ctx *mnt_ctx) INIT_LIST_HEAD(&root_ses->list); spin_lock(&cifs_tcp_ses_lock); - ses->ses_count++; + cifs_smb_ses_inc_refcount(ses); spin_unlock(&cifs_tcp_ses_lock); root_ses->ses = ses; list_add_tail(&root_ses->list, &mnt_ctx->dfs_ses_list); } + /* Select new DFS referral server so that new referrals go through it */ ctx->dfs_root_ses = ses; return 0; } @@ -170,8 +177,12 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx) struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); rc = dfs_get_referral(mnt_ctx, ref_path + 1, NULL, &tl); - if (rc) + if (rc) { + rc = cifs_mount_get_tcon(mnt_ctx); + if (!rc) + rc = cifs_is_path_remote(mnt_ctx); break; + } tit = dfs_cache_get_tgt_iterator(&tl); if (!tit) { @@ -242,7 +253,6 @@ out: int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs) { struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; - struct cifs_ses *ses; bool nodfs = ctx->nodfs; int rc; @@ -276,20 +286,8 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs) } *isdfs = true; - /* - * Prevent DFS root session of being put in the first call to - * cifs_mount_put_conns(). If another DFS root server was not found - * while chasing the referrals (@ctx->dfs_root_ses == @ses), then we - * can safely put extra refcount of @ses. - */ - ses = mnt_ctx->ses; - mnt_ctx->ses = NULL; - mnt_ctx->server = NULL; - rc = __dfs_mount_share(mnt_ctx); - if (ses == ctx->dfs_root_ses) - cifs_put_smb_ses(ses); - - return rc; + add_root_smb_session(mnt_ctx); + return __dfs_mount_share(mnt_ctx); } /* Update dfs referral path of superblock */ diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 879bc8e6555c..2108b3b40ce9 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -1080,12 +1080,12 @@ int cifs_close(struct inode *inode, struct file *file) cfile = file->private_data; file->private_data = NULL; dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL); - if ((cinode->oplock == CIFS_CACHE_RHW_FLG) && - cinode->lease_granted && + if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG) + && cinode->lease_granted && !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) && dclose) { if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) { - inode->i_ctime = inode->i_mtime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); } spin_lock(&cinode->deferred_lock); cifs_add_deferred_close(cfile, dclose); @@ -2596,7 +2596,7 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) write_data, to - from, &offset); cifsFileInfo_put(open_file); /* Does mm or vfs already set times? */ - inode->i_atime = inode->i_mtime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); if ((bytes_written > 0) && (offset)) rc = 0; else if (bytes_written < 0) @@ -4681,9 +4681,9 @@ static int cifs_readpage_worker(struct file *file, struct page *page, io_error: kunmap(page); - unlock_page(page); read_complete: + unlock_page(page); return rc; } @@ -4878,9 +4878,11 @@ void cifs_oplock_break(struct work_struct *work) struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, oplock_break); struct inode *inode = d_inode(cfile->dentry); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifsInodeInfo *cinode = CIFS_I(inode); - struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); - struct TCP_Server_Info *server = tcon->ses->server; + struct cifs_tcon *tcon; + struct TCP_Server_Info *server; + struct tcon_link *tlink; int rc = 0; bool purge_cache = false, oplock_break_cancelled; __u64 persistent_fid, volatile_fid; @@ -4889,6 +4891,12 @@ void cifs_oplock_break(struct work_struct *work) wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, TASK_UNINTERRUPTIBLE); + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) + goto out; + tcon = tlink_tcon(tlink); + server = tcon->ses->server; + server->ops->downgrade_oplock(server, cinode, cfile->oplock_level, cfile->oplock_epoch, &purge_cache); @@ -4938,18 +4946,19 @@ oplock_break_ack: /* * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require * an acknowledgment to be sent when the file has already been closed. - * check for server null, since can race with kill_sb calling tree disconnect. */ spin_lock(&cinode->open_file_lock); - if (tcon->ses && tcon->ses->server && !oplock_break_cancelled && - !list_empty(&cinode->openFileList)) { + /* check list empty since can race with kill_sb calling tree disconnect */ + if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) { spin_unlock(&cinode->open_file_lock); - rc = tcon->ses->server->ops->oplock_response(tcon, persistent_fid, - volatile_fid, net_fid, cinode); + rc = server->ops->oplock_response(tcon, persistent_fid, + volatile_fid, net_fid, cinode); cifs_dbg(FYI, "Oplock release rc = %d\n", rc); } else spin_unlock(&cinode->open_file_lock); + cifs_put_tlink(tlink); +out: cifs_done_oplock_break(cinode); } diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 4946a0c59600..67e16c2ac90e 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -231,6 +231,8 @@ cifs_parse_security_flavors(struct fs_context *fc, char *value, struct smb3_fs_c break; case Opt_sec_none: ctx->nullauth = 1; + kfree(ctx->username); + ctx->username = NULL; break; default: cifs_errorf(fc, "bad security option: %s\n", value); @@ -1201,6 +1203,8 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, case Opt_user: kfree(ctx->username); ctx->username = NULL; + if (ctx->nullauth) + break; if (strlen(param->string) == 0) { /* null user, ie. anonymous authentication */ ctx->nullauth = 1; diff --git a/fs/smb/client/fscache.h b/fs/smb/client/fscache.h index 173999610997..84f3b09367d2 100644 --- a/fs/smb/client/fscache.h +++ b/fs/smb/client/fscache.h @@ -50,12 +50,13 @@ void cifs_fscache_fill_coherency(struct inode *inode, struct cifs_fscache_inode_coherency_data *cd) { struct cifsInodeInfo *cifsi = CIFS_I(inode); + struct timespec64 ctime = inode_get_ctime(inode); memset(cd, 0, sizeof(*cd)); cd->last_write_time_sec = cpu_to_le64(cifsi->netfs.inode.i_mtime.tv_sec); cd->last_write_time_nsec = cpu_to_le32(cifsi->netfs.inode.i_mtime.tv_nsec); - cd->last_change_time_sec = cpu_to_le64(cifsi->netfs.inode.i_ctime.tv_sec); - cd->last_change_time_nsec = cpu_to_le32(cifsi->netfs.inode.i_ctime.tv_nsec); + cd->last_change_time_sec = cpu_to_le64(ctime.tv_sec); + cd->last_change_time_nsec = cpu_to_le32(ctime.tv_nsec); } diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index c3eeae07e139..93fe43789d7a 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -172,7 +172,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) else inode->i_atime = fattr->cf_atime; inode->i_mtime = fattr->cf_mtime; - inode->i_ctime = fattr->cf_ctime; + inode_set_ctime_to_ts(inode, fattr->cf_ctime); inode->i_rdev = fattr->cf_rdev; cifs_nlink_fattr_to_inode(inode, fattr); inode->i_uid = fattr->cf_uid; @@ -1744,9 +1744,9 @@ out_reval: cifs_inode = CIFS_I(inode); cifs_inode->time = 0; /* will force revalidate to get info when needed */ - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); } - dir->i_ctime = dir->i_mtime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); cifs_inode = CIFS_I(dir); CIFS_I(dir)->time = 0; /* force revalidate of dir as well */ unlink_out: @@ -2060,8 +2060,8 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) */ cifsInode->time = 0; - d_inode(direntry)->i_ctime = inode->i_ctime = inode->i_mtime = - current_time(inode); + inode_set_ctime_current(d_inode(direntry)); + inode->i_mtime = inode_set_ctime_current(inode); rmdir_exit: free_dentry_path(page); @@ -2267,8 +2267,8 @@ unlink_target: /* force revalidate to go get info when needed */ CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0; - source_dir->i_ctime = source_dir->i_mtime = target_dir->i_ctime = - target_dir->i_mtime = current_time(source_dir); + source_dir->i_mtime = target_dir->i_mtime = inode_set_ctime_to_ts(source_dir, + inode_set_ctime_current(target_dir)); cifs_rename_exit: kfree(info_buf_source); @@ -2540,7 +2540,7 @@ int cifs_getattr(struct mnt_idmap *idmap, const struct path *path, return rc; } - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); stat->blksize = cifs_sb->ctx->bsize; stat->ino = CIFS_I(inode)->uniqueid; diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index fff092bbc7a3..f7160003e0ed 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -433,16 +433,21 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) * Dump encryption keys. This is an old ioctl that only * handles AES-128-{CCM,GCM}. */ - if (pSMBFile == NULL) - break; if (!capable(CAP_SYS_ADMIN)) { rc = -EACCES; break; } - tcon = tlink_tcon(pSMBFile->tlink); + cifs_sb = CIFS_SB(inode->i_sb); + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) { + rc = PTR_ERR(tlink); + break; + } + tcon = tlink_tcon(tlink); if (!smb3_encryption_required(tcon)) { rc = -EOPNOTSUPP; + cifs_put_tlink(tlink); break; } pkey_inf.cipher_type = @@ -459,6 +464,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) rc = -EFAULT; else rc = 0; + cifs_put_tlink(tlink); break; case CIFS_DUMP_FULL_KEY: /* @@ -470,8 +476,16 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) rc = -EACCES; break; } - tcon = tlink_tcon(pSMBFile->tlink); + cifs_sb = CIFS_SB(inode->i_sb); + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) { + rc = PTR_ERR(tlink); + break; + } + + tcon = tlink_tcon(tlink); rc = cifs_dump_full_key(tcon, (void __user *)arg); + cifs_put_tlink(tlink); break; case CIFS_IOC_NOTIFY: if (!S_ISDIR(inode->i_mode)) { diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index 70dbfe6584f9..d7e85d9a2655 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -95,6 +95,7 @@ sesInfoFree(struct cifs_ses *buf_to_free) return; } + unload_nls(buf_to_free->local_nls); atomic_dec(&sesInfoAllocCount); kfree(buf_to_free->serverOS); kfree(buf_to_free->serverDomain); diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 335c078c42fb..c57ca2050b73 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -1013,6 +1013,7 @@ setup_ntlm_smb3_neg_ret: } +/* See MS-NLMP 2.2.1.3 */ int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen, struct cifs_ses *ses, @@ -1047,7 +1048,8 @@ int build_ntlmssp_auth_blob(unsigned char **pbuffer, flags = ses->ntlmssp->server_flags | NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO | NTLMSSP_NEGOTIATE_WORKSTATION_SUPPLIED; - + /* we only send version information in ntlmssp negotiate, so do not set this flag */ + flags = flags & ~NTLMSSP_NEGOTIATE_VERSION; tmp = *pbuffer + sizeof(AUTHENTICATE_MESSAGE); sec_blob->NegotiateFlags = cpu_to_le32(flags); diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 87abce010974..182e2e879ecf 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -1396,7 +1396,8 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, if (file_inf.LastWriteTime) inode->i_mtime = cifs_NTtimeToUnix(file_inf.LastWriteTime); if (file_inf.ChangeTime) - inode->i_ctime = cifs_NTtimeToUnix(file_inf.ChangeTime); + inode_set_ctime_to_ts(inode, + cifs_NTtimeToUnix(file_inf.ChangeTime)); if (file_inf.LastAccessTime) inode->i_atime = cifs_NTtimeToUnix(file_inf.LastAccessTime); @@ -2395,7 +2396,7 @@ smb2_is_status_io_timeout(char *buf) return false; } -static void +static bool smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server) { struct smb2_hdr *shdr = (struct smb2_hdr *)buf; @@ -2404,7 +2405,7 @@ smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server) struct cifs_tcon *tcon; if (shdr->Status != STATUS_NETWORK_NAME_DELETED) - return; + return false; /* If server is a channel, select the primary channel */ pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; @@ -2419,11 +2420,13 @@ smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server) spin_unlock(&cifs_tcp_ses_lock); pr_warn_once("Server share %s deleted.\n", tcon->tree_name); - return; + return true; } } } spin_unlock(&cifs_tcp_ses_lock); + + return false; } static int diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index e04766fe6f80..a457f07f820d 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -242,7 +242,7 @@ again: } spin_unlock(&server->srv_lock); - nls_codepage = load_nls_default(); + nls_codepage = ses->local_nls; /* * need to prevent multiple threads trying to simultaneously @@ -324,7 +324,6 @@ out: rc = -EAGAIN; } failed: - unload_nls(nls_codepage); return rc; } diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index c6db898dab7c..7676091b3e77 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -160,7 +160,7 @@ smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id) spin_unlock(&ses->ses_lock); continue; } - ++ses->ses_count; + cifs_smb_ses_inc_refcount(ses); spin_unlock(&ses->ses_lock); return ses; } diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h index fb8b2d566efb..b7521e41402e 100644 --- a/fs/smb/server/ksmbd_netlink.h +++ b/fs/smb/server/ksmbd_netlink.h @@ -352,7 +352,8 @@ enum KSMBD_TREE_CONN_STATUS { #define KSMBD_SHARE_FLAG_STREAMS BIT(11) #define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS BIT(12) #define KSMBD_SHARE_FLAG_ACL_XATTR BIT(13) -#define KSMBD_SHARE_FLAG_UPDATE BIT(14) +#define KSMBD_SHARE_FLAG_UPDATE BIT(14) +#define KSMBD_SHARE_FLAG_CROSSMNT BIT(15) /* * Tree connect request flags. diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index ced7a9e916f0..9df121bdf349 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -286,6 +286,7 @@ static void handle_ksmbd_work(struct work_struct *wk) static int queue_ksmbd_work(struct ksmbd_conn *conn) { struct ksmbd_work *work; + int err; work = ksmbd_alloc_work_struct(); if (!work) { @@ -297,7 +298,11 @@ static int queue_ksmbd_work(struct ksmbd_conn *conn) work->request_buf = conn->request_buf; conn->request_buf = NULL; - ksmbd_init_smb_server(work); + err = ksmbd_init_smb_server(work); + if (err) { + ksmbd_free_work_struct(work); + return 0; + } ksmbd_conn_enqueue_request(work); atomic_inc(&conn->r_count); diff --git a/fs/smb/server/smb2misc.c b/fs/smb/server/smb2misc.c index 33b7e6c4ceff..e881df1d10cb 100644 --- a/fs/smb/server/smb2misc.c +++ b/fs/smb/server/smb2misc.c @@ -380,13 +380,13 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work) } if (smb2_req_struct_sizes[command] != pdu->StructureSize2) { - if (command == SMB2_OPLOCK_BREAK_HE && - le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_20 && - le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_21) { + if (!(command == SMB2_OPLOCK_BREAK_HE && + (le16_to_cpu(pdu->StructureSize2) == OP_BREAK_STRUCT_SIZE_20 || + le16_to_cpu(pdu->StructureSize2) == OP_BREAK_STRUCT_SIZE_21))) { /* special case for SMB2.1 lease break message */ ksmbd_debug(SMB, - "Illegal request size %d for oplock break\n", - le16_to_cpu(pdu->StructureSize2)); + "Illegal request size %u for command %d\n", + le16_to_cpu(pdu->StructureSize2), command); return 1; } } diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index cf8822103f50..a947c18915c2 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -87,9 +87,9 @@ struct channel *lookup_chann_list(struct ksmbd_session *sess, struct ksmbd_conn */ int smb2_get_ksmbd_tcon(struct ksmbd_work *work) { - struct smb2_hdr *req_hdr = smb2_get_msg(work->request_buf); + struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work); unsigned int cmd = le16_to_cpu(req_hdr->Command); - int tree_id; + unsigned int tree_id; if (cmd == SMB2_TREE_CONNECT_HE || cmd == SMB2_CANCEL_HE || @@ -114,7 +114,7 @@ int smb2_get_ksmbd_tcon(struct ksmbd_work *work) pr_err("The first operation in the compound does not have tcon\n"); return -EINVAL; } - if (work->tcon->id != tree_id) { + if (tree_id != UINT_MAX && work->tcon->id != tree_id) { pr_err("tree id(%u) is different with id(%u) in first operation\n", tree_id, work->tcon->id); return -EINVAL; @@ -559,9 +559,9 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work) */ int smb2_check_user_session(struct ksmbd_work *work) { - struct smb2_hdr *req_hdr = smb2_get_msg(work->request_buf); + struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work); struct ksmbd_conn *conn = work->conn; - unsigned int cmd = conn->ops->get_cmd_val(work); + unsigned int cmd = le16_to_cpu(req_hdr->Command); unsigned long long sess_id; /* @@ -587,7 +587,7 @@ int smb2_check_user_session(struct ksmbd_work *work) pr_err("The first operation in the compound does not have sess\n"); return -EINVAL; } - if (work->sess->id != sess_id) { + if (sess_id != ULLONG_MAX && work->sess->id != sess_id) { pr_err("session id(%llu) is different with the first operation(%lld)\n", sess_id, work->sess->id); return -EINVAL; @@ -2324,9 +2324,16 @@ next: break; buf_len -= next; eabuf = (struct smb2_ea_info *)((char *)eabuf + next); - if (next < (u32)eabuf->EaNameLength + le16_to_cpu(eabuf->EaValueLength)) + if (buf_len < sizeof(struct smb2_ea_info)) { + rc = -EINVAL; break; + } + if (buf_len < sizeof(struct smb2_ea_info) + eabuf->EaNameLength + + le16_to_cpu(eabuf->EaValueLength)) { + rc = -EINVAL; + break; + } } while (next != 0); kfree(attr_name); @@ -2467,8 +2474,9 @@ static void smb2_update_xattrs(struct ksmbd_tree_connect *tcon, } } -static int smb2_creat(struct ksmbd_work *work, struct path *path, char *name, - int open_flags, umode_t posix_mode, bool is_dir) +static int smb2_creat(struct ksmbd_work *work, struct path *parent_path, + struct path *path, char *name, int open_flags, + umode_t posix_mode, bool is_dir) { struct ksmbd_tree_connect *tcon = work->tcon; struct ksmbd_share_config *share = tcon->share_conf; @@ -2495,7 +2503,7 @@ static int smb2_creat(struct ksmbd_work *work, struct path *path, char *name, return rc; } - rc = ksmbd_vfs_kern_path_locked(work, name, 0, path, 0); + rc = ksmbd_vfs_kern_path_locked(work, name, 0, parent_path, path, 0); if (rc) { pr_err("cannot get linux path (%s), err = %d\n", name, rc); @@ -2565,7 +2573,7 @@ int smb2_open(struct ksmbd_work *work) struct ksmbd_tree_connect *tcon = work->tcon; struct smb2_create_req *req; struct smb2_create_rsp *rsp; - struct path path; + struct path path, parent_path; struct ksmbd_share_config *share = tcon->share_conf; struct ksmbd_file *fp = NULL; struct file *filp = NULL; @@ -2786,7 +2794,8 @@ int smb2_open(struct ksmbd_work *work) goto err_out1; } - rc = ksmbd_vfs_kern_path_locked(work, name, LOOKUP_NO_SYMLINKS, &path, 1); + rc = ksmbd_vfs_kern_path_locked(work, name, LOOKUP_NO_SYMLINKS, + &parent_path, &path, 1); if (!rc) { file_present = true; @@ -2906,7 +2915,8 @@ int smb2_open(struct ksmbd_work *work) /*create file if not present */ if (!file_present) { - rc = smb2_creat(work, &path, name, open_flags, posix_mode, + rc = smb2_creat(work, &parent_path, &path, name, open_flags, + posix_mode, req->CreateOptions & FILE_DIRECTORY_FILE_LE); if (rc) { if (rc == -ENOENT) { @@ -3321,8 +3331,9 @@ int smb2_open(struct ksmbd_work *work) err_out: if (file_present || created) { - inode_unlock(d_inode(path.dentry->d_parent)); - dput(path.dentry); + inode_unlock(d_inode(parent_path.dentry)); + path_put(&path); + path_put(&parent_path); } ksmbd_revert_fsids(work); err_out1: @@ -4391,8 +4402,8 @@ static int get_file_basic_info(struct smb2_query_info_rsp *rsp, } basic_info = (struct smb2_file_basic_info *)rsp->Buffer; - generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp), - &stat); + generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS, + file_inode(fp->filp), &stat); basic_info->CreationTime = cpu_to_le64(fp->create_time); time = ksmbd_UnixTimeToNT(stat.atime); basic_info->LastAccessTime = cpu_to_le64(time); @@ -4417,7 +4428,7 @@ static void get_file_standard_info(struct smb2_query_info_rsp *rsp, struct kstat stat; inode = file_inode(fp->filp); - generic_fillattr(file_mnt_idmap(fp->filp), inode, &stat); + generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS, inode, &stat); sinfo = (struct smb2_file_standard_info *)rsp->Buffer; delete_pending = ksmbd_inode_pending_delete(fp); @@ -4471,7 +4482,7 @@ static int get_file_all_info(struct ksmbd_work *work, return PTR_ERR(filename); inode = file_inode(fp->filp); - generic_fillattr(file_mnt_idmap(fp->filp), inode, &stat); + generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS, inode, &stat); ksmbd_debug(SMB, "filename = %s\n", filename); delete_pending = ksmbd_inode_pending_delete(fp); @@ -4548,8 +4559,8 @@ static void get_file_stream_info(struct ksmbd_work *work, int buf_free_len; struct smb2_query_info_req *req = ksmbd_req_buf_next(work); - generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp), - &stat); + generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS, + file_inode(fp->filp), &stat); file_info = (struct smb2_file_stream_info *)rsp->Buffer; buf_free_len = @@ -4639,8 +4650,8 @@ static void get_file_internal_info(struct smb2_query_info_rsp *rsp, struct smb2_file_internal_info *file_info; struct kstat stat; - generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp), - &stat); + generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS, + file_inode(fp->filp), &stat); file_info = (struct smb2_file_internal_info *)rsp->Buffer; file_info->IndexNumber = cpu_to_le64(stat.ino); rsp->OutputBufferLength = @@ -4665,7 +4676,7 @@ static int get_file_network_open_info(struct smb2_query_info_rsp *rsp, file_info = (struct smb2_file_ntwrk_info *)rsp->Buffer; inode = file_inode(fp->filp); - generic_fillattr(file_mnt_idmap(fp->filp), inode, &stat); + generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS, inode, &stat); file_info->CreationTime = cpu_to_le64(fp->create_time); time = ksmbd_UnixTimeToNT(stat.atime); @@ -4726,8 +4737,8 @@ static void get_file_compression_info(struct smb2_query_info_rsp *rsp, struct smb2_file_comp_info *file_info; struct kstat stat; - generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp), - &stat); + generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS, + file_inode(fp->filp), &stat); file_info = (struct smb2_file_comp_info *)rsp->Buffer; file_info->CompressedFileSize = cpu_to_le64(stat.blocks << 9); @@ -4779,7 +4790,7 @@ static int find_file_posix_info(struct smb2_query_info_rsp *rsp, file_info->LastAccessTime = cpu_to_le64(time); time = ksmbd_UnixTimeToNT(inode->i_mtime); file_info->LastWriteTime = cpu_to_le64(time); - time = ksmbd_UnixTimeToNT(inode->i_ctime); + time = ksmbd_UnixTimeToNT(inode_get_ctime(inode)); file_info->ChangeTime = cpu_to_le64(time); file_info->DosAttributes = fp->f_ci->m_fattr; file_info->Inode = cpu_to_le64(inode->i_ino); @@ -5422,7 +5433,7 @@ int smb2_close(struct ksmbd_work *work) rsp->LastAccessTime = cpu_to_le64(time); time = ksmbd_UnixTimeToNT(inode->i_mtime); rsp->LastWriteTime = cpu_to_le64(time); - time = ksmbd_UnixTimeToNT(inode->i_ctime); + time = ksmbd_UnixTimeToNT(inode_get_ctime(inode)); rsp->ChangeTime = cpu_to_le64(time); ksmbd_fd_put(work, fp); } else { @@ -5545,7 +5556,7 @@ static int smb2_create_link(struct ksmbd_work *work, struct nls_table *local_nls) { char *link_name = NULL, *target_name = NULL, *pathname = NULL; - struct path path; + struct path path, parent_path; bool file_present = false; int rc; @@ -5575,7 +5586,7 @@ static int smb2_create_link(struct ksmbd_work *work, ksmbd_debug(SMB, "target name is %s\n", target_name); rc = ksmbd_vfs_kern_path_locked(work, link_name, LOOKUP_NO_SYMLINKS, - &path, 0); + &parent_path, &path, 0); if (rc) { if (rc != -ENOENT) goto out; @@ -5605,8 +5616,9 @@ static int smb2_create_link(struct ksmbd_work *work, rc = -EINVAL; out: if (file_present) { - inode_unlock(d_inode(path.dentry->d_parent)); + inode_unlock(d_inode(parent_path.dentry)); path_put(&path); + path_put(&parent_path); } if (!IS_ERR(link_name)) kfree(link_name); @@ -5644,7 +5656,7 @@ static int set_file_basic_info(struct ksmbd_file *fp, if (file_info->ChangeTime) attrs.ia_ctime = ksmbd_NTtimeToUnix(file_info->ChangeTime); else - attrs.ia_ctime = inode->i_ctime; + attrs.ia_ctime = inode_get_ctime(inode); if (file_info->LastWriteTime) { attrs.ia_mtime = ksmbd_NTtimeToUnix(file_info->LastWriteTime); @@ -5689,7 +5701,7 @@ static int set_file_basic_info(struct ksmbd_file *fp, return -EACCES; inode_lock(inode); - inode->i_ctime = attrs.ia_ctime; + inode_set_ctime_to_ts(inode, attrs.ia_ctime); attrs.ia_valid &= ~ATTR_CTIME; rc = notify_change(idmap, dentry, &attrs, NULL); inode_unlock(inode); @@ -6209,6 +6221,11 @@ int smb2_read(struct ksmbd_work *work) unsigned int max_read_size = conn->vals->max_read_size; WORK_BUFFERS(work, req, rsp); + if (work->next_smb2_rcv_hdr_off) { + work->send_no_response = 1; + err = -EOPNOTSUPP; + goto out; + } if (test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_PIPE)) { @@ -8609,7 +8626,8 @@ int smb3_decrypt_req(struct ksmbd_work *work) struct smb2_transform_hdr *tr_hdr = smb2_get_msg(buf); int rc = 0; - if (buf_data_size < sizeof(struct smb2_hdr)) { + if (pdu_length < sizeof(struct smb2_transform_hdr) || + buf_data_size < sizeof(struct smb2_hdr)) { pr_err("Transform message is too small (%u)\n", pdu_length); return -ECONNABORTED; diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c index ef20f63e55e6..c2b75d898852 100644 --- a/fs/smb/server/smb_common.c +++ b/fs/smb/server/smb_common.c @@ -388,26 +388,29 @@ static struct smb_version_cmds smb1_server_cmds[1] = { [SMB_COM_NEGOTIATE_EX] = { .proc = smb1_negotiate, }, }; -static void init_smb1_server(struct ksmbd_conn *conn) +static int init_smb1_server(struct ksmbd_conn *conn) { conn->ops = &smb1_server_ops; conn->cmds = smb1_server_cmds; conn->max_cmds = ARRAY_SIZE(smb1_server_cmds); + return 0; } -void ksmbd_init_smb_server(struct ksmbd_work *work) +int ksmbd_init_smb_server(struct ksmbd_work *work) { struct ksmbd_conn *conn = work->conn; __le32 proto; - if (conn->need_neg == false) - return; - proto = *(__le32 *)((struct smb_hdr *)work->request_buf)->Protocol; + if (conn->need_neg == false) { + if (proto == SMB1_PROTO_NUMBER) + return -EINVAL; + return 0; + } + if (proto == SMB1_PROTO_NUMBER) - init_smb1_server(conn); - else - init_smb3_11_server(conn); + return init_smb1_server(conn); + return init_smb3_11_server(conn); } int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level, diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h index aeca0f46068f..f1092519c0c2 100644 --- a/fs/smb/server/smb_common.h +++ b/fs/smb/server/smb_common.h @@ -427,7 +427,7 @@ bool ksmbd_smb_request(struct ksmbd_conn *conn); int ksmbd_lookup_dialect_by_id(__le16 *cli_dialects, __le16 dialects_count); -void ksmbd_init_smb_server(struct ksmbd_work *work); +int ksmbd_init_smb_server(struct ksmbd_work *work); struct ksmbd_kstat; int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index e35914457350..d48756a339a5 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -63,13 +63,13 @@ int ksmbd_vfs_lock_parent(struct dentry *parent, struct dentry *child) static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf, char *pathname, unsigned int flags, + struct path *parent_path, struct path *path) { struct qstr last; struct filename *filename; struct path *root_share_path = &share_conf->vfs_path; int err, type; - struct path parent_path; struct dentry *d; if (pathname[0] == '\0') { @@ -84,7 +84,7 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf, return PTR_ERR(filename); err = vfs_path_parent_lookup(filename, flags, - &parent_path, &last, &type, + parent_path, &last, &type, root_share_path); if (err) { putname(filename); @@ -92,13 +92,13 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf, } if (unlikely(type != LAST_NORM)) { - path_put(&parent_path); + path_put(parent_path); putname(filename); return -ENOENT; } - inode_lock_nested(parent_path.dentry->d_inode, I_MUTEX_PARENT); - d = lookup_one_qstr_excl(&last, parent_path.dentry, 0); + inode_lock_nested(parent_path->dentry->d_inode, I_MUTEX_PARENT); + d = lookup_one_qstr_excl(&last, parent_path->dentry, 0); if (IS_ERR(d)) goto err_out; @@ -108,15 +108,22 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf, } path->dentry = d; - path->mnt = share_conf->vfs_path.mnt; - path_put(&parent_path); - putname(filename); + path->mnt = mntget(parent_path->mnt); + if (test_share_config_flag(share_conf, KSMBD_SHARE_FLAG_CROSSMNT)) { + err = follow_down(path, 0); + if (err < 0) { + path_put(path); + goto err_out; + } + } + + putname(filename); return 0; err_out: - inode_unlock(parent_path.dentry->d_inode); - path_put(&parent_path); + inode_unlock(d_inode(parent_path->dentry)); + path_put(parent_path); putname(filename); return -ENOENT; } @@ -412,7 +419,8 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, { char *stream_buf = NULL, *wbuf; struct mnt_idmap *idmap = file_mnt_idmap(fp->filp); - size_t size, v_len; + size_t size; + ssize_t v_len; int err = 0; ksmbd_debug(VFS, "write stream data pos : %llu, count : %zd\n", @@ -429,9 +437,9 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, fp->stream.name, fp->stream.size, &stream_buf); - if ((int)v_len < 0) { + if (v_len < 0) { pr_err("not found stream in xattr : %zd\n", v_len); - err = (int)v_len; + err = v_len; goto out; } @@ -1194,14 +1202,14 @@ static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name, * Return: 0 on success, otherwise error */ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, - unsigned int flags, struct path *path, - bool caseless) + unsigned int flags, struct path *parent_path, + struct path *path, bool caseless) { struct ksmbd_share_config *share_conf = work->tcon->share_conf; int err; - struct path parent_path; - err = ksmbd_vfs_path_lookup_locked(share_conf, name, flags, path); + err = ksmbd_vfs_path_lookup_locked(share_conf, name, flags, parent_path, + path); if (!err) return 0; @@ -1216,10 +1224,10 @@ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, path_len = strlen(filepath); remain_len = path_len; - parent_path = share_conf->vfs_path; - path_get(&parent_path); + *parent_path = share_conf->vfs_path; + path_get(parent_path); - while (d_can_lookup(parent_path.dentry)) { + while (d_can_lookup(parent_path->dentry)) { char *filename = filepath + path_len - remain_len; char *next = strchrnul(filename, '/'); size_t filename_len = next - filename; @@ -1228,7 +1236,7 @@ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, if (filename_len == 0) break; - err = ksmbd_vfs_lookup_in_dir(&parent_path, filename, + err = ksmbd_vfs_lookup_in_dir(parent_path, filename, filename_len, work->conn->um); if (err) @@ -1245,8 +1253,8 @@ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, goto out2; else if (is_last) goto out1; - path_put(&parent_path); - parent_path = *path; + path_put(parent_path); + *parent_path = *path; next[0] = '/'; remain_len -= filename_len + 1; @@ -1254,16 +1262,17 @@ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, err = -EINVAL; out2: - path_put(&parent_path); + path_put(parent_path); out1: kfree(filepath); } if (!err) { - err = ksmbd_vfs_lock_parent(parent_path.dentry, path->dentry); - if (err) - dput(path->dentry); - path_put(&parent_path); + err = ksmbd_vfs_lock_parent(parent_path->dentry, path->dentry); + if (err) { + path_put(path); + path_put(parent_path); + } } return err; } @@ -1650,7 +1659,8 @@ int ksmbd_vfs_fill_dentry_attrs(struct ksmbd_work *work, u64 time; int rc; - generic_fillattr(idmap, d_inode(dentry), ksmbd_kstat->kstat); + generic_fillattr(idmap, STATX_BASIC_STATS, d_inode(dentry), + ksmbd_kstat->kstat); time = ksmbd_UnixTimeToNT(ksmbd_kstat->kstat->ctime); ksmbd_kstat->create_time = time; diff --git a/fs/smb/server/vfs.h b/fs/smb/server/vfs.h index 80039312c255..72f9fb4b48d1 100644 --- a/fs/smb/server/vfs.h +++ b/fs/smb/server/vfs.h @@ -115,8 +115,8 @@ int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name, int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, const struct path *path, char *attr_name); int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, - unsigned int flags, struct path *path, - bool caseless); + unsigned int flags, struct path *parent_path, + struct path *path, bool caseless); struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work, const char *name, unsigned int flags, diff --git a/fs/splice.c b/fs/splice.c index 004eb1c4ce31..02631013b09f 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -120,17 +120,17 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe, static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { - struct page *page = buf->page; + struct folio *folio = page_folio(buf->page); int err; - if (!PageUptodate(page)) { - lock_page(page); + if (!folio_test_uptodate(folio)) { + folio_lock(folio); /* - * Page got truncated/unhashed. This will cause a 0-byte + * Folio got truncated/unhashed. This will cause a 0-byte * splice, if this is the first page. */ - if (!page->mapping) { + if (!folio->mapping) { err = -ENODATA; goto error; } @@ -138,20 +138,18 @@ static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe, /* * Uh oh, read-error from disk. */ - if (!PageUptodate(page)) { + if (!folio_test_uptodate(folio)) { err = -EIO; goto error; } - /* - * Page is ok afterall, we are done. - */ - unlock_page(page); + /* Folio is ok after all, we are done */ + folio_unlock(folio); } return 0; error: - unlock_page(page); + folio_unlock(folio); return err; } @@ -876,6 +874,8 @@ ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out, msg.msg_flags |= MSG_MORE; if (remain && pipe_occupancy(pipe->head, tail) > 0) msg.msg_flags |= MSG_MORE; + if (out->f_flags & O_NONBLOCK) + msg.msg_flags |= MSG_DONTWAIT; iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, bvec, bc, len - remain); @@ -1267,10 +1267,8 @@ long do_splice(struct file *in, loff_t *off_in, struct file *out, if ((in->f_flags | out->f_flags) & O_NONBLOCK) flags |= SPLICE_F_NONBLOCK; - return splice_pipe_to_pipe(ipipe, opipe, len, flags); - } - - if (ipipe) { + ret = splice_pipe_to_pipe(ipipe, opipe, len, flags); + } else if (ipipe) { if (off_in) return -ESPIPE; if (off_out) { @@ -1295,18 +1293,11 @@ long do_splice(struct file *in, loff_t *off_in, struct file *out, ret = do_splice_from(ipipe, out, &offset, len, flags); file_end_write(out); - if (ret > 0) - fsnotify_modify(out); - if (!off_out) out->f_pos = offset; else *off_out = offset; - - return ret; - } - - if (opipe) { + } else if (opipe) { if (off_out) return -ESPIPE; if (off_in) { @@ -1322,18 +1313,25 @@ long do_splice(struct file *in, loff_t *off_in, struct file *out, ret = splice_file_to_pipe(in, opipe, &offset, len, flags); - if (ret > 0) - fsnotify_access(in); - if (!off_in) in->f_pos = offset; else *off_in = offset; + } else { + ret = -EINVAL; + } - return ret; + if (ret > 0) { + /* + * Generate modify out before access in: + * do_splice_from() may've already sent modify out, + * and this ensures the events get merged. + */ + fsnotify_modify(out); + fsnotify_access(in); } - return -EINVAL; + return ret; } static long __do_splice(struct file *in, loff_t __user *off_in, @@ -1462,6 +1460,9 @@ static long vmsplice_to_user(struct file *file, struct iov_iter *iter, pipe_unlock(pipe); } + if (ret > 0) + fsnotify_access(file); + return ret; } @@ -1491,8 +1492,10 @@ static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter, if (!ret) ret = iter_to_pipe(iter, pipe, buf_flag); pipe_unlock(pipe); - if (ret > 0) + if (ret > 0) { wakeup_pipe_readers(pipe); + fsnotify_modify(file); + } return ret; } @@ -1926,6 +1929,11 @@ long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags) } } + if (ret > 0) { + fsnotify_access(in); + fsnotify_modify(out); + } + return ret; } diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c index 24463145b351..c6e626b00546 100644 --- a/fs/squashfs/inode.c +++ b/fs/squashfs/inode.c @@ -61,7 +61,7 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode, inode->i_ino = le32_to_cpu(sqsh_ino->inode_number); inode->i_mtime.tv_sec = le32_to_cpu(sqsh_ino->mtime); inode->i_atime.tv_sec = inode->i_mtime.tv_sec; - inode->i_ctime.tv_sec = inode->i_mtime.tv_sec; + inode_set_ctime(inode, inode->i_mtime.tv_sec, 0); inode->i_mode = le16_to_cpu(sqsh_ino->mode); inode->i_size = 0; diff --git a/fs/stack.c b/fs/stack.c index c9830924eb12..b5e01bdb5f5f 100644 --- a/fs/stack.c +++ b/fs/stack.c @@ -68,7 +68,7 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src) dest->i_rdev = src->i_rdev; dest->i_atime = src->i_atime; dest->i_mtime = src->i_mtime; - dest->i_ctime = src->i_ctime; + inode_set_ctime_to_ts(dest, inode_get_ctime(src)); dest->i_blkbits = src->i_blkbits; dest->i_flags = src->i_flags; set_nlink(dest, src->i_nlink); diff --git a/fs/stat.c b/fs/stat.c index 7c238da22ef0..136711ae72fb 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -27,10 +27,42 @@ #include "mount.h" /** + * fill_mg_cmtime - Fill in the mtime and ctime and flag ctime as QUERIED + * @stat: where to store the resulting values + * @request_mask: STATX_* values requested + * @inode: inode from which to grab the c/mtime + * + * Given @inode, grab the ctime and mtime out if it and store the result + * in @stat. When fetching the value, flag it as queried so the next write + * will use a fine-grained timestamp. + */ +void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode) +{ + atomic_long_t *pnsec = (atomic_long_t *)&inode->__i_ctime.tv_nsec; + + /* If neither time was requested, then don't report them */ + if (!(request_mask & (STATX_CTIME|STATX_MTIME))) { + stat->result_mask &= ~(STATX_CTIME|STATX_MTIME); + return; + } + + stat->mtime = inode->i_mtime; + stat->ctime.tv_sec = inode->__i_ctime.tv_sec; + /* + * Atomically set the QUERIED flag and fetch the new value with + * the flag masked off. + */ + stat->ctime.tv_nsec = atomic_long_fetch_or(I_CTIME_QUERIED, pnsec) & + ~I_CTIME_QUERIED; +} +EXPORT_SYMBOL(fill_mg_cmtime); + +/** * generic_fillattr - Fill in the basic attributes from the inode struct - * @idmap: idmap of the mount the inode was found from - * @inode: Inode to use as the source - * @stat: Where to fill in the attributes + * @idmap: idmap of the mount the inode was found from + * @request_mask: statx request_mask + * @inode: Inode to use as the source + * @stat: Where to fill in the attributes * * Fill in the basic attributes in the kstat structure from data that's to be * found on the VFS inode structure. This is the default if no getattr inode @@ -42,8 +74,8 @@ * uid and gid filds. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply passs @nop_mnt_idmap. */ -void generic_fillattr(struct mnt_idmap *idmap, struct inode *inode, - struct kstat *stat) +void generic_fillattr(struct mnt_idmap *idmap, u32 request_mask, + struct inode *inode, struct kstat *stat) { vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode); vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); @@ -57,10 +89,22 @@ void generic_fillattr(struct mnt_idmap *idmap, struct inode *inode, stat->rdev = inode->i_rdev; stat->size = i_size_read(inode); stat->atime = inode->i_atime; - stat->mtime = inode->i_mtime; - stat->ctime = inode->i_ctime; + + if (is_mgtime(inode)) { + fill_mg_cmtime(stat, request_mask, inode); + } else { + stat->mtime = inode->i_mtime; + stat->ctime = inode_get_ctime(inode); + } + stat->blksize = i_blocksize(inode); stat->blocks = inode->i_blocks; + + if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) { + stat->result_mask |= STATX_CHANGE_COOKIE; + stat->change_cookie = inode_query_iversion(inode); + } + } EXPORT_SYMBOL(generic_fillattr); @@ -123,17 +167,12 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat, stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT | STATX_ATTR_DAX); - if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) { - stat->result_mask |= STATX_CHANGE_COOKIE; - stat->change_cookie = inode_query_iversion(inode); - } - idmap = mnt_idmap(path->mnt); if (inode->i_op->getattr) return inode->i_op->getattr(idmap, path, stat, request_mask, query_flags); - generic_fillattr(idmap, inode, stat); + generic_fillattr(idmap, request_mask, inode, stat); return 0; } EXPORT_SYMBOL(vfs_getattr_nosec); diff --git a/fs/super.c b/fs/super.c index e781226e2880..692654c2af36 100644 --- a/fs/super.c +++ b/fs/super.c @@ -39,7 +39,7 @@ #include <uapi/linux/mount.h> #include "internal.h" -static int thaw_super_locked(struct super_block *sb); +static int thaw_super_locked(struct super_block *sb, enum freeze_holder who); static LIST_HEAD(super_blocks); static DEFINE_SPINLOCK(sb_lock); @@ -50,6 +50,130 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = { "sb_internal", }; +static inline void __super_lock(struct super_block *sb, bool excl) +{ + if (excl) + down_write(&sb->s_umount); + else + down_read(&sb->s_umount); +} + +static inline void super_unlock(struct super_block *sb, bool excl) +{ + if (excl) + up_write(&sb->s_umount); + else + up_read(&sb->s_umount); +} + +static inline void __super_lock_excl(struct super_block *sb) +{ + __super_lock(sb, true); +} + +static inline void super_unlock_excl(struct super_block *sb) +{ + super_unlock(sb, true); +} + +static inline void super_unlock_shared(struct super_block *sb) +{ + super_unlock(sb, false); +} + +static inline bool wait_born(struct super_block *sb) +{ + unsigned int flags; + + /* + * Pairs with smp_store_release() in super_wake() and ensures + * that we see SB_BORN or SB_DYING after we're woken. + */ + flags = smp_load_acquire(&sb->s_flags); + return flags & (SB_BORN | SB_DYING); +} + +/** + * super_lock - wait for superblock to become ready and lock it + * @sb: superblock to wait for + * @excl: whether exclusive access is required + * + * If the superblock has neither passed through vfs_get_tree() or + * generic_shutdown_super() yet wait for it to happen. Either superblock + * creation will succeed and SB_BORN is set by vfs_get_tree() or we're + * woken and we'll see SB_DYING. + * + * The caller must have acquired a temporary reference on @sb->s_count. + * + * Return: This returns true if SB_BORN was set, false if SB_DYING was + * set. The function acquires s_umount and returns with it held. + */ +static __must_check bool super_lock(struct super_block *sb, bool excl) +{ + + lockdep_assert_not_held(&sb->s_umount); + +relock: + __super_lock(sb, excl); + + /* + * Has gone through generic_shutdown_super() in the meantime. + * @sb->s_root is NULL and @sb->s_active is 0. No one needs to + * grab a reference to this. Tell them so. + */ + if (sb->s_flags & SB_DYING) + return false; + + /* Has called ->get_tree() successfully. */ + if (sb->s_flags & SB_BORN) + return true; + + super_unlock(sb, excl); + + /* wait until the superblock is ready or dying */ + wait_var_event(&sb->s_flags, wait_born(sb)); + + /* + * Neither SB_BORN nor SB_DYING are ever unset so we never loop. + * Just reacquire @sb->s_umount for the caller. + */ + goto relock; +} + +/* wait and acquire read-side of @sb->s_umount */ +static inline bool super_lock_shared(struct super_block *sb) +{ + return super_lock(sb, false); +} + +/* wait and acquire write-side of @sb->s_umount */ +static inline bool super_lock_excl(struct super_block *sb) +{ + return super_lock(sb, true); +} + +/* wake waiters */ +#define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING | SB_DEAD) +static void super_wake(struct super_block *sb, unsigned int flag) +{ + WARN_ON_ONCE((flag & ~SUPER_WAKE_FLAGS)); + WARN_ON_ONCE(hweight32(flag & SUPER_WAKE_FLAGS) > 1); + + /* + * Pairs with smp_load_acquire() in super_lock() to make sure + * all initializations in the superblock are seen by the user + * seeing SB_BORN sent. + */ + smp_store_release(&sb->s_flags, sb->s_flags | flag); + /* + * Pairs with the barrier in prepare_to_wait_event() to make sure + * ___wait_var_event() either sees SB_BORN set or + * waitqueue_active() check in wake_up_var() sees the waiter. + */ + smp_mb(); + wake_up_var(&sb->s_flags); +} + /* * One thing we have to be careful of with a per-sb shrinker is that we don't * drop the last active reference to the superblock from within the shrinker. @@ -76,7 +200,7 @@ static unsigned long super_cache_scan(struct shrinker *shrink, if (!(sc->gfp_mask & __GFP_FS)) return SHRINK_STOP; - if (!trylock_super(sb)) + if (!super_trylock_shared(sb)) return SHRINK_STOP; if (sb->s_op->nr_cached_objects) @@ -110,7 +234,7 @@ static unsigned long super_cache_scan(struct shrinker *shrink, freed += sb->s_op->free_cached_objects(sb, sc); } - up_read(&sb->s_umount); + super_unlock_shared(sb); return freed; } @@ -123,17 +247,17 @@ static unsigned long super_cache_count(struct shrinker *shrink, sb = container_of(shrink, struct super_block, s_shrink); /* - * We don't call trylock_super() here as it is a scalability bottleneck, - * so we're exposed to partial setup state. The shrinker rwsem does not - * protect filesystem operations backing list_lru_shrink_count() or - * s_op->nr_cached_objects(). Counts can change between - * super_cache_count and super_cache_scan, so we really don't need locks - * here. + * We don't call super_trylock_shared() here as it is a scalability + * bottleneck, so we're exposed to partial setup state. The shrinker + * rwsem does not protect filesystem operations backing + * list_lru_shrink_count() or s_op->nr_cached_objects(). Counts can + * change between super_cache_count and super_cache_scan, so we really + * don't need locks here. * * However, if we are currently mounting the superblock, the underlying * filesystem might be in a state of partial construction and hence it - * is dangerous to access it. trylock_super() uses a SB_BORN check to - * avoid this situation, so do the same here. The memory barrier is + * is dangerous to access it. super_trylock_shared() uses a SB_BORN check + * to avoid this situation, so do the same here. The memory barrier is * matched with the one in mount_fs() as we don't hold locks here. */ if (!(sb->s_flags & SB_BORN)) @@ -176,7 +300,7 @@ static void destroy_unused_super(struct super_block *s) { if (!s) return; - up_write(&s->s_umount); + super_unlock_excl(s); list_lru_destroy(&s->s_dentry_lru); list_lru_destroy(&s->s_inode_lru); security_sb_free(s); @@ -337,10 +461,29 @@ void deactivate_locked_super(struct super_block *s) list_lru_destroy(&s->s_dentry_lru); list_lru_destroy(&s->s_inode_lru); + /* + * Remove it from @fs_supers so it isn't found by new + * sget{_fc}() walkers anymore. Any concurrent mounter still + * managing to grab a temporary reference is guaranteed to + * already see SB_DYING and will wait until we notify them about + * SB_DEAD. + */ + spin_lock(&sb_lock); + hlist_del_init(&s->s_instances); + spin_unlock(&sb_lock); + + /* + * Let concurrent mounts know that this thing is really dead. + * We don't need @sb->s_umount here as every concurrent caller + * will see SB_DYING and either discard the superblock or wait + * for SB_DEAD. + */ + super_wake(s, SB_DEAD); + put_filesystem(fs); put_super(s); } else { - up_write(&s->s_umount); + super_unlock_excl(s); } } @@ -357,7 +500,7 @@ EXPORT_SYMBOL(deactivate_locked_super); void deactivate_super(struct super_block *s) { if (!atomic_add_unless(&s->s_active, -1, 1)) { - down_write(&s->s_umount); + __super_lock_excl(s); deactivate_locked_super(s); } } @@ -379,20 +522,61 @@ EXPORT_SYMBOL(deactivate_super); */ static int grab_super(struct super_block *s) __releases(sb_lock) { + bool born; + s->s_count++; spin_unlock(&sb_lock); - down_write(&s->s_umount); - if ((s->s_flags & SB_BORN) && atomic_inc_not_zero(&s->s_active)) { + born = super_lock_excl(s); + if (born && atomic_inc_not_zero(&s->s_active)) { put_super(s); return 1; } - up_write(&s->s_umount); + super_unlock_excl(s); put_super(s); return 0; } +static inline bool wait_dead(struct super_block *sb) +{ + unsigned int flags; + + /* + * Pairs with memory barrier in super_wake() and ensures + * that we see SB_DEAD after we're woken. + */ + flags = smp_load_acquire(&sb->s_flags); + return flags & SB_DEAD; +} + +/** + * grab_super_dead - acquire an active reference to a superblock + * @sb: superblock to acquire + * + * Acquire a temporary reference on a superblock and try to trade it for + * an active reference. This is used in sget{_fc}() to wait for a + * superblock to either become SB_BORN or for it to pass through + * sb->kill() and be marked as SB_DEAD. + * + * Return: This returns true if an active reference could be acquired, + * false if not. + */ +static bool grab_super_dead(struct super_block *sb) +{ + + sb->s_count++; + if (grab_super(sb)) { + put_super(sb); + lockdep_assert_held(&sb->s_umount); + return true; + } + wait_var_event(&sb->s_flags, wait_dead(sb)); + put_super(sb); + lockdep_assert_not_held(&sb->s_umount); + return false; +} + /* - * trylock_super - try to grab ->s_umount shared + * super_trylock_shared - try to grab ->s_umount shared * @sb: reference we are trying to grab * * Try to prevent fs shutdown. This is used in places where we @@ -408,13 +592,13 @@ static int grab_super(struct super_block *s) __releases(sb_lock) * of down_read(). There's a couple of places that are OK with that, but * it's very much not a general-purpose interface. */ -bool trylock_super(struct super_block *sb) +bool super_trylock_shared(struct super_block *sb) { if (down_read_trylock(&sb->s_umount)) { - if (!hlist_unhashed(&sb->s_instances) && - sb->s_root && (sb->s_flags & SB_BORN)) + if (!(sb->s_flags & SB_DYING) && sb->s_root && + (sb->s_flags & SB_BORN)) return true; - up_read(&sb->s_umount); + super_unlock_shared(sb); } return false; @@ -439,13 +623,13 @@ bool trylock_super(struct super_block *sb) void retire_super(struct super_block *sb) { WARN_ON(!sb->s_bdev); - down_write(&sb->s_umount); + __super_lock_excl(sb); if (sb->s_iflags & SB_I_PERSB_BDI) { bdi_unregister(sb->s_bdi); sb->s_iflags &= ~SB_I_PERSB_BDI; } sb->s_iflags |= SB_I_RETIRED; - up_write(&sb->s_umount); + super_unlock_excl(sb); } EXPORT_SYMBOL(retire_super); @@ -517,11 +701,17 @@ void generic_shutdown_super(struct super_block *sb) spin_unlock(&sb->s_inode_list_lock); } } - spin_lock(&sb_lock); - /* should be initialized for __put_super_and_need_restart() */ - hlist_del_init(&sb->s_instances); - spin_unlock(&sb_lock); - up_write(&sb->s_umount); + /* + * Broadcast to everyone that grabbed a temporary reference to this + * superblock before we removed it from @fs_supers that the superblock + * is dying. Every walker of @fs_supers outside of sget{_fc}() will now + * discard this superblock and treat it as dead. + * + * We leave the superblock on @fs_supers so it can be found by + * sget{_fc}() until we passed sb->kill_sb(). + */ + super_wake(sb, SB_DYING); + super_unlock_excl(sb); if (sb->s_bdi != &noop_backing_dev_info) { if (sb->s_iflags & SB_I_PERSB_BDI) bdi_unregister(sb->s_bdi); @@ -546,17 +736,31 @@ bool mount_capable(struct fs_context *fc) * @test: Comparison callback * @set: Setup callback * - * Find or create a superblock using the parameters stored in the filesystem - * context and the two callback functions. + * Create a new superblock or find an existing one. + * + * The @test callback is used to find a matching existing superblock. + * Whether or not the requested parameters in @fc are taken into account + * is specific to the @test callback that is used. They may even be + * completely ignored. + * + * If an extant superblock is matched, it will be returned unless: * - * If an extant superblock is matched, then that will be returned with an - * elevated reference count that the caller must transfer or discard. + * (1) the namespace the filesystem context @fc and the extant + * superblock's namespace differ + * + * (2) the filesystem context @fc has requested that reusing an extant + * superblock is not allowed + * + * In both cases EBUSY will be returned. * * If no match is made, a new superblock will be allocated and basic - * initialisation will be performed (s_type, s_fs_info and s_id will be set and - * the set() callback will be invoked), the superblock will be published and it - * will be returned in a partially constructed state with SB_BORN and SB_ACTIVE - * as yet unset. + * initialisation will be performed (s_type, s_fs_info and s_id will be + * set and the @set callback will be invoked), the superblock will be + * published and it will be returned in a partially constructed state + * with SB_BORN and SB_ACTIVE as yet unset. + * + * Return: On success, an extant or newly created superblock is + * returned. On failure an error pointer is returned. */ struct super_block *sget_fc(struct fs_context *fc, int (*test)(struct super_block *, struct fs_context *), @@ -595,6 +799,11 @@ retry: s->s_type = fc->fs_type; s->s_iflags |= fc->s_iflags; strscpy(s->s_id, s->s_type->name, sizeof(s->s_id)); + /* + * Make the superblock visible on @super_blocks and @fs_supers. + * It's in a nascent state and users should wait on SB_BORN or + * SB_DYING to be set. + */ list_add_tail(&s->s_list, &super_blocks); hlist_add_head(&s->s_instances, &s->s_type->fs_supers); spin_unlock(&sb_lock); @@ -603,12 +812,16 @@ retry: return s; share_extant_sb: - if (user_ns != old->s_user_ns) { + if (user_ns != old->s_user_ns || fc->exclusive) { spin_unlock(&sb_lock); destroy_unused_super(s); + if (fc->exclusive) + warnfc(fc, "reusing existing filesystem not allowed"); + else + warnfc(fc, "reusing existing filesystem in another namespace not allowed"); return ERR_PTR(-EBUSY); } - if (!grab_super(old)) + if (!grab_super_dead(old)) goto retry; destroy_unused_super(s); return old; @@ -652,7 +865,7 @@ retry: destroy_unused_super(s); return ERR_PTR(-EBUSY); } - if (!grab_super(old)) + if (!grab_super_dead(old)) goto retry; destroy_unused_super(s); return old; @@ -685,7 +898,7 @@ EXPORT_SYMBOL(sget); void drop_super(struct super_block *sb) { - up_read(&sb->s_umount); + super_unlock_shared(sb); put_super(sb); } @@ -693,7 +906,7 @@ EXPORT_SYMBOL(drop_super); void drop_super_exclusive(struct super_block *sb) { - up_write(&sb->s_umount); + super_unlock_excl(sb); put_super(sb); } EXPORT_SYMBOL(drop_super_exclusive); @@ -704,7 +917,8 @@ static void __iterate_supers(void (*f)(struct super_block *)) spin_lock(&sb_lock); list_for_each_entry(sb, &super_blocks, s_list) { - if (hlist_unhashed(&sb->s_instances)) + /* Pairs with memory marrier in super_wake(). */ + if (smp_load_acquire(&sb->s_flags) & SB_DYING) continue; sb->s_count++; spin_unlock(&sb_lock); @@ -734,15 +948,15 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg) spin_lock(&sb_lock); list_for_each_entry(sb, &super_blocks, s_list) { - if (hlist_unhashed(&sb->s_instances)) - continue; + bool born; + sb->s_count++; spin_unlock(&sb_lock); - down_read(&sb->s_umount); - if (sb->s_root && (sb->s_flags & SB_BORN)) + born = super_lock_shared(sb); + if (born && sb->s_root) f(sb, arg); - up_read(&sb->s_umount); + super_unlock_shared(sb); spin_lock(&sb_lock); if (p) @@ -770,13 +984,15 @@ void iterate_supers_type(struct file_system_type *type, spin_lock(&sb_lock); hlist_for_each_entry(sb, &type->fs_supers, s_instances) { + bool born; + sb->s_count++; spin_unlock(&sb_lock); - down_read(&sb->s_umount); - if (sb->s_root && (sb->s_flags & SB_BORN)) + born = super_lock_shared(sb); + if (born && sb->s_root) f(sb, arg); - up_read(&sb->s_umount); + super_unlock_shared(sb); spin_lock(&sb_lock); if (p) @@ -791,43 +1007,6 @@ void iterate_supers_type(struct file_system_type *type, EXPORT_SYMBOL(iterate_supers_type); /** - * get_super - get the superblock of a device - * @bdev: device to get the superblock for - * - * Scans the superblock list and finds the superblock of the file system - * mounted on the device given. %NULL is returned if no match is found. - */ -struct super_block *get_super(struct block_device *bdev) -{ - struct super_block *sb; - - if (!bdev) - return NULL; - - spin_lock(&sb_lock); -rescan: - list_for_each_entry(sb, &super_blocks, s_list) { - if (hlist_unhashed(&sb->s_instances)) - continue; - if (sb->s_bdev == bdev) { - sb->s_count++; - spin_unlock(&sb_lock); - down_read(&sb->s_umount); - /* still alive? */ - if (sb->s_root && (sb->s_flags & SB_BORN)) - return sb; - up_read(&sb->s_umount); - /* nope, got unmounted */ - spin_lock(&sb_lock); - __put_super(sb); - goto rescan; - } - } - spin_unlock(&sb_lock); - return NULL; -} - -/** * get_active_super - get an active reference to the superblock of a device * @bdev: device to get the superblock for * @@ -842,15 +1021,12 @@ struct super_block *get_active_super(struct block_device *bdev) if (!bdev) return NULL; -restart: spin_lock(&sb_lock); list_for_each_entry(sb, &super_blocks, s_list) { - if (hlist_unhashed(&sb->s_instances)) - continue; if (sb->s_bdev == bdev) { if (!grab_super(sb)) - goto restart; - up_write(&sb->s_umount); + return NULL; + super_unlock_excl(sb); return sb; } } @@ -863,28 +1039,21 @@ struct super_block *user_get_super(dev_t dev, bool excl) struct super_block *sb; spin_lock(&sb_lock); -rescan: list_for_each_entry(sb, &super_blocks, s_list) { - if (hlist_unhashed(&sb->s_instances)) - continue; if (sb->s_dev == dev) { + bool born; + sb->s_count++; spin_unlock(&sb_lock); - if (excl) - down_write(&sb->s_umount); - else - down_read(&sb->s_umount); /* still alive? */ - if (sb->s_root && (sb->s_flags & SB_BORN)) + born = super_lock(sb, excl); + if (born && sb->s_root) return sb; - if (excl) - up_write(&sb->s_umount); - else - up_read(&sb->s_umount); + super_unlock(sb, excl); /* nope, got unmounted */ spin_lock(&sb_lock); __put_super(sb); - goto rescan; + break; } } spin_unlock(&sb_lock); @@ -926,9 +1095,9 @@ int reconfigure_super(struct fs_context *fc) if (remount_ro) { if (!hlist_empty(&sb->s_pins)) { - up_write(&sb->s_umount); + super_unlock_excl(sb); group_pin_kill(&sb->s_pins); - down_write(&sb->s_umount); + __super_lock_excl(sb); if (!sb->s_root) return 0; if (sb->s_writers.frozen != SB_UNFROZEN) @@ -991,9 +1160,9 @@ cancel_readonly: static void do_emergency_remount_callback(struct super_block *sb) { - down_write(&sb->s_umount); - if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) && - !sb_rdonly(sb)) { + bool born = super_lock_excl(sb); + + if (born && sb->s_root && sb->s_bdev && !sb_rdonly(sb)) { struct fs_context *fc; fc = fs_context_for_reconfigure(sb->s_root, @@ -1004,7 +1173,7 @@ static void do_emergency_remount_callback(struct super_block *sb) put_fs_context(fc); } } - up_write(&sb->s_umount); + super_unlock_excl(sb); } static void do_emergency_remount(struct work_struct *work) @@ -1027,12 +1196,13 @@ void emergency_remount(void) static void do_thaw_all_callback(struct super_block *sb) { - down_write(&sb->s_umount); - if (sb->s_root && sb->s_flags & SB_BORN) { + bool born = super_lock_excl(sb); + + if (born && sb->s_root) { emergency_thaw_bdev(sb); - thaw_super_locked(sb); + thaw_super_locked(sb, FREEZE_HOLDER_USERSPACE); } else { - up_write(&sb->s_umount); + super_unlock_excl(sb); } } @@ -1136,7 +1306,7 @@ static int test_single_super(struct super_block *s, struct fs_context *fc) return 1; } -static int vfs_get_super(struct fs_context *fc, bool reconf, +static int vfs_get_super(struct fs_context *fc, int (*test)(struct super_block *, struct fs_context *), int (*fill_super)(struct super_block *sb, struct fs_context *fc)) @@ -1154,19 +1324,9 @@ static int vfs_get_super(struct fs_context *fc, bool reconf, goto error; sb->s_flags |= SB_ACTIVE; - fc->root = dget(sb->s_root); - } else { - fc->root = dget(sb->s_root); - if (reconf) { - err = reconfigure_super(fc); - if (err < 0) { - dput(fc->root); - fc->root = NULL; - goto error; - } - } } + fc->root = dget(sb->s_root); return 0; error: @@ -1178,7 +1338,7 @@ int get_tree_nodev(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc)) { - return vfs_get_super(fc, false, NULL, fill_super); + return vfs_get_super(fc, NULL, fill_super); } EXPORT_SYMBOL(get_tree_nodev); @@ -1186,54 +1346,81 @@ int get_tree_single(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc)) { - return vfs_get_super(fc, false, test_single_super, fill_super); + return vfs_get_super(fc, test_single_super, fill_super); } EXPORT_SYMBOL(get_tree_single); -int get_tree_single_reconf(struct fs_context *fc, - int (*fill_super)(struct super_block *sb, - struct fs_context *fc)) -{ - return vfs_get_super(fc, true, test_single_super, fill_super); -} -EXPORT_SYMBOL(get_tree_single_reconf); - int get_tree_keyed(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc), void *key) { fc->s_fs_info = key; - return vfs_get_super(fc, false, test_keyed_super, fill_super); + return vfs_get_super(fc, test_keyed_super, fill_super); } EXPORT_SYMBOL(get_tree_keyed); #ifdef CONFIG_BLOCK -static void fs_mark_dead(struct block_device *bdev) +/* + * Lock a super block that the callers holds a reference to. + * + * The caller needs to ensure that the super_block isn't being freed while + * calling this function, e.g. by holding a lock over the call to this function + * and the place that clears the pointer to the superblock used by this function + * before freeing the superblock. + */ +static bool super_lock_shared_active(struct super_block *sb) { - struct super_block *sb; + bool born = super_lock_shared(sb); + + if (!born || !sb->s_root || !(sb->s_flags & SB_ACTIVE)) { + super_unlock_shared(sb); + return false; + } + return true; +} + +static void fs_bdev_mark_dead(struct block_device *bdev, bool surprise) +{ + struct super_block *sb = bdev->bd_holder; + + /* bd_holder_lock ensures that the sb isn't freed */ + lockdep_assert_held(&bdev->bd_holder_lock); - sb = get_super(bdev); - if (!sb) + if (!super_lock_shared_active(sb)) return; + if (!surprise) + sync_filesystem(sb); + shrink_dcache_sb(sb); + invalidate_inodes(sb); if (sb->s_op->shutdown) sb->s_op->shutdown(sb); - drop_super(sb); + + super_unlock_shared(sb); +} + +static void fs_bdev_sync(struct block_device *bdev) +{ + struct super_block *sb = bdev->bd_holder; + + lockdep_assert_held(&bdev->bd_holder_lock); + + if (!super_lock_shared_active(sb)) + return; + sync_filesystem(sb); + super_unlock_shared(sb); } -static const struct blk_holder_ops fs_holder_ops = { - .mark_dead = fs_mark_dead, +const struct blk_holder_ops fs_holder_ops = { + .mark_dead = fs_bdev_mark_dead, + .sync = fs_bdev_sync, }; +EXPORT_SYMBOL_GPL(fs_holder_ops); static int set_bdev_super(struct super_block *s, void *data) { - s->s_bdev = data; - s->s_dev = s->s_bdev->bd_dev; - s->s_bdi = bdi_get(s->s_bdev->bd_disk->bdi); - - if (bdev_stable_writes(s->s_bdev)) - s->s_iflags |= SB_I_STABLE_WRITES; + s->s_dev = *(dev_t *)data; return 0; } @@ -1244,8 +1431,63 @@ static int set_bdev_super_fc(struct super_block *s, struct fs_context *fc) static int test_bdev_super_fc(struct super_block *s, struct fs_context *fc) { - return !(s->s_iflags & SB_I_RETIRED) && s->s_bdev == fc->sget_key; + return !(s->s_iflags & SB_I_RETIRED) && + s->s_dev == *(dev_t *)fc->sget_key; +} + +int setup_bdev_super(struct super_block *sb, int sb_flags, + struct fs_context *fc) +{ + blk_mode_t mode = sb_open_mode(sb_flags); + struct block_device *bdev; + + bdev = blkdev_get_by_dev(sb->s_dev, mode, sb, &fs_holder_ops); + if (IS_ERR(bdev)) { + if (fc) + errorf(fc, "%s: Can't open blockdev", fc->source); + return PTR_ERR(bdev); + } + + /* + * This really should be in blkdev_get_by_dev, but right now can't due + * to legacy issues that require us to allow opening a block device node + * writable from userspace even for a read-only block device. + */ + if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) { + blkdev_put(bdev, sb); + return -EACCES; + } + + /* + * Until SB_BORN flag is set, there can be no active superblock + * references and thus no filesystem freezing. get_active_super() will + * just loop waiting for SB_BORN so even freeze_bdev() cannot proceed. + * + * It is enough to check bdev was not frozen before we set s_bdev. + */ + mutex_lock(&bdev->bd_fsfreeze_mutex); + if (bdev->bd_fsfreeze_count > 0) { + mutex_unlock(&bdev->bd_fsfreeze_mutex); + if (fc) + warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev); + blkdev_put(bdev, sb); + return -EBUSY; + } + spin_lock(&sb_lock); + sb->s_bdev = bdev; + sb->s_bdi = bdi_get(bdev->bd_disk->bdi); + if (bdev_stable_writes(bdev)) + sb->s_iflags |= SB_I_STABLE_WRITES; + spin_unlock(&sb_lock); + mutex_unlock(&bdev->bd_fsfreeze_mutex); + + snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev); + shrinker_debugfs_rename(&sb->s_shrink, "sb-%s:%s", sb->s_type->name, + sb->s_id); + sb_set_blocksize(sb, block_size(bdev)); + return 0; } +EXPORT_SYMBOL_GPL(setup_bdev_super); /** * get_tree_bdev - Get a superblock based on a single block device @@ -1256,73 +1498,49 @@ int get_tree_bdev(struct fs_context *fc, int (*fill_super)(struct super_block *, struct fs_context *)) { - struct block_device *bdev; struct super_block *s; int error = 0; + dev_t dev; if (!fc->source) return invalf(fc, "No source specified"); - bdev = blkdev_get_by_path(fc->source, sb_open_mode(fc->sb_flags), - fc->fs_type, &fs_holder_ops); - if (IS_ERR(bdev)) { - errorf(fc, "%s: Can't open blockdev", fc->source); - return PTR_ERR(bdev); - } - - /* Once the superblock is inserted into the list by sget_fc(), s_umount - * will protect the lockfs code from trying to start a snapshot while - * we are mounting - */ - mutex_lock(&bdev->bd_fsfreeze_mutex); - if (bdev->bd_fsfreeze_count > 0) { - mutex_unlock(&bdev->bd_fsfreeze_mutex); - warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev); - blkdev_put(bdev, fc->fs_type); - return -EBUSY; + error = lookup_bdev(fc->source, &dev); + if (error) { + errorf(fc, "%s: Can't lookup blockdev", fc->source); + return error; } fc->sb_flags |= SB_NOSEC; - fc->sget_key = bdev; + fc->sget_key = &dev; s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc); - mutex_unlock(&bdev->bd_fsfreeze_mutex); - if (IS_ERR(s)) { - blkdev_put(bdev, fc->fs_type); + if (IS_ERR(s)) return PTR_ERR(s); - } if (s->s_root) { /* Don't summarily change the RO/RW state. */ if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) { - warnf(fc, "%pg: Can't mount, would change RO state", bdev); + warnf(fc, "%pg: Can't mount, would change RO state", s->s_bdev); deactivate_locked_super(s); - blkdev_put(bdev, fc->fs_type); return -EBUSY; } - + } else { /* - * s_umount nests inside open_mutex during - * __invalidate_device(). blkdev_put() acquires - * open_mutex and can't be called under s_umount. Drop - * s_umount temporarily. This is safe as we're - * holding an active reference. + * We drop s_umount here because we need to open the bdev and + * bdev->open_mutex ranks above s_umount (blkdev_put() -> + * bdev_mark_dead()). It is safe because we have active sb + * reference and SB_BORN is not set yet. */ - up_write(&s->s_umount); - blkdev_put(bdev, fc->fs_type); - down_write(&s->s_umount); - } else { - snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); - shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s", - fc->fs_type->name, s->s_id); - sb_set_blocksize(s, block_size(bdev)); - error = fill_super(s, fc); + super_unlock_excl(s); + error = setup_bdev_super(s, fc->sb_flags, fc); + __super_lock_excl(s); + if (!error) + error = fill_super(s, fc); if (error) { deactivate_locked_super(s); return error; } - s->s_flags |= SB_ACTIVE; - bdev->bd_super = s; } BUG_ON(fc->root); @@ -1333,79 +1551,52 @@ EXPORT_SYMBOL(get_tree_bdev); static int test_bdev_super(struct super_block *s, void *data) { - return !(s->s_iflags & SB_I_RETIRED) && (void *)s->s_bdev == data; + return !(s->s_iflags & SB_I_RETIRED) && s->s_dev == *(dev_t *)data; } struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)) { - struct block_device *bdev; struct super_block *s; - int error = 0; + int error; + dev_t dev; - bdev = blkdev_get_by_path(dev_name, sb_open_mode(flags), fs_type, - &fs_holder_ops); - if (IS_ERR(bdev)) - return ERR_CAST(bdev); + error = lookup_bdev(dev_name, &dev); + if (error) + return ERR_PTR(error); - /* - * once the super is inserted into the list by sget, s_umount - * will protect the lockfs code from trying to start a snapshot - * while we are mounting - */ - mutex_lock(&bdev->bd_fsfreeze_mutex); - if (bdev->bd_fsfreeze_count > 0) { - mutex_unlock(&bdev->bd_fsfreeze_mutex); - error = -EBUSY; - goto error_bdev; - } - s = sget(fs_type, test_bdev_super, set_bdev_super, flags | SB_NOSEC, - bdev); - mutex_unlock(&bdev->bd_fsfreeze_mutex); + flags |= SB_NOSEC; + s = sget(fs_type, test_bdev_super, set_bdev_super, flags, &dev); if (IS_ERR(s)) - goto error_s; + return ERR_CAST(s); if (s->s_root) { if ((flags ^ s->s_flags) & SB_RDONLY) { deactivate_locked_super(s); - error = -EBUSY; - goto error_bdev; + return ERR_PTR(-EBUSY); } - + } else { /* - * s_umount nests inside open_mutex during - * __invalidate_device(). blkdev_put() acquires - * open_mutex and can't be called under s_umount. Drop - * s_umount temporarily. This is safe as we're - * holding an active reference. + * We drop s_umount here because we need to open the bdev and + * bdev->open_mutex ranks above s_umount (blkdev_put() -> + * bdev_mark_dead()). It is safe because we have active sb + * reference and SB_BORN is not set yet. */ - up_write(&s->s_umount); - blkdev_put(bdev, fs_type); - down_write(&s->s_umount); - } else { - snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); - shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s", - fs_type->name, s->s_id); - sb_set_blocksize(s, block_size(bdev)); - error = fill_super(s, data, flags & SB_SILENT ? 1 : 0); + super_unlock_excl(s); + error = setup_bdev_super(s, flags, NULL); + __super_lock_excl(s); + if (!error) + error = fill_super(s, data, flags & SB_SILENT ? 1 : 0); if (error) { deactivate_locked_super(s); - goto error; + return ERR_PTR(error); } s->s_flags |= SB_ACTIVE; - bdev->bd_super = s; } return dget(s->s_root); - -error_s: - error = PTR_ERR(s); -error_bdev: - blkdev_put(bdev, fs_type); -error: - return ERR_PTR(error); } EXPORT_SYMBOL(mount_bdev); @@ -1413,10 +1604,11 @@ void kill_block_super(struct super_block *sb) { struct block_device *bdev = sb->s_bdev; - bdev->bd_super = NULL; generic_shutdown_super(sb); - sync_blockdev(bdev); - blkdev_put(bdev, sb->s_type); + if (bdev) { + sync_blockdev(bdev); + blkdev_put(bdev, sb); + } } EXPORT_SYMBOL(kill_block_super); @@ -1533,13 +1725,13 @@ int vfs_get_tree(struct fs_context *fc) WARN_ON(!sb->s_bdi); /* - * Write barrier is for super_cache_count(). We place it before setting - * SB_BORN as the data dependency between the two functions is the - * superblock structure contents that we just set up, not the SB_BORN - * flag. + * super_wake() contains a memory barrier which also care of + * ordering for super_cache_count(). We place it before setting + * SB_BORN as the data dependency between the two functions is + * the superblock structure contents that we just set up, not + * the SB_BORN flag. */ - smp_wmb(); - sb->s_flags |= SB_BORN; + super_wake(sb, SB_BORN); error = security_sb_set_mnt_opts(sb, fc->security, 0, NULL); if (unlikely(error)) { @@ -1644,14 +1836,43 @@ static void sb_freeze_unlock(struct super_block *sb, int level) percpu_up_write(sb->s_writers.rw_sem + level); } +static int wait_for_partially_frozen(struct super_block *sb) +{ + int ret = 0; + + do { + unsigned short old = sb->s_writers.frozen; + + up_write(&sb->s_umount); + ret = wait_var_event_killable(&sb->s_writers.frozen, + sb->s_writers.frozen != old); + down_write(&sb->s_umount); + } while (ret == 0 && + sb->s_writers.frozen != SB_UNFROZEN && + sb->s_writers.frozen != SB_FREEZE_COMPLETE); + + return ret; +} + /** * freeze_super - lock the filesystem and force it into a consistent state * @sb: the super to lock + * @who: context that wants to freeze * * Syncs the super to make sure the filesystem is consistent and calls the fs's - * freeze_fs. Subsequent calls to this without first thawing the fs will return + * freeze_fs. Subsequent calls to this without first thawing the fs may return * -EBUSY. * + * @who should be: + * * %FREEZE_HOLDER_USERSPACE if userspace wants to freeze the fs; + * * %FREEZE_HOLDER_KERNEL if the kernel wants to freeze the fs. + * + * The @who argument distinguishes between the kernel and userspace trying to + * freeze the filesystem. Although there cannot be multiple kernel freezes or + * multiple userspace freezes in effect at any given time, the kernel and + * userspace can both hold a filesystem frozen. The filesystem remains frozen + * until there are no kernel or userspace freezes in effect. + * * During this function, sb->s_writers.frozen goes through these values: * * SB_UNFROZEN: File system is normal, all writes progress as usual. @@ -1677,34 +1898,62 @@ static void sb_freeze_unlock(struct super_block *sb, int level) * * sb->s_writers.frozen is protected by sb->s_umount. */ -int freeze_super(struct super_block *sb) +int freeze_super(struct super_block *sb, enum freeze_holder who) { int ret; atomic_inc(&sb->s_active); - down_write(&sb->s_umount); + if (!super_lock_excl(sb)) + WARN(1, "Dying superblock while freezing!"); + +retry: + if (sb->s_writers.frozen == SB_FREEZE_COMPLETE) { + if (sb->s_writers.freeze_holders & who) { + deactivate_locked_super(sb); + return -EBUSY; + } + + WARN_ON(sb->s_writers.freeze_holders == 0); + + /* + * Someone else already holds this type of freeze; share the + * freeze and assign the active ref to the freeze. + */ + sb->s_writers.freeze_holders |= who; + super_unlock_excl(sb); + return 0; + } + if (sb->s_writers.frozen != SB_UNFROZEN) { - deactivate_locked_super(sb); - return -EBUSY; + ret = wait_for_partially_frozen(sb); + if (ret) { + deactivate_locked_super(sb); + return ret; + } + + goto retry; } if (!(sb->s_flags & SB_BORN)) { - up_write(&sb->s_umount); + super_unlock_excl(sb); return 0; /* sic - it's "nothing to do" */ } if (sb_rdonly(sb)) { /* Nothing to do really... */ + sb->s_writers.freeze_holders |= who; sb->s_writers.frozen = SB_FREEZE_COMPLETE; - up_write(&sb->s_umount); + wake_up_var(&sb->s_writers.frozen); + super_unlock_excl(sb); return 0; } sb->s_writers.frozen = SB_FREEZE_WRITE; /* Release s_umount to preserve sb_start_write -> s_umount ordering */ - up_write(&sb->s_umount); + super_unlock_excl(sb); sb_wait_write(sb, SB_FREEZE_WRITE); - down_write(&sb->s_umount); + if (!super_lock_excl(sb)) + WARN(1, "Dying superblock while freezing!"); /* Now we go and block page faults... */ sb->s_writers.frozen = SB_FREEZE_PAGEFAULT; @@ -1715,6 +1964,7 @@ int freeze_super(struct super_block *sb) if (ret) { sb->s_writers.frozen = SB_UNFROZEN; sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT); + wake_up_var(&sb->s_writers.frozen); deactivate_locked_super(sb); return ret; } @@ -1730,6 +1980,7 @@ int freeze_super(struct super_block *sb) "VFS:Filesystem freeze failed\n"); sb->s_writers.frozen = SB_UNFROZEN; sb_freeze_unlock(sb, SB_FREEZE_FS); + wake_up_var(&sb->s_writers.frozen); deactivate_locked_super(sb); return ret; } @@ -1738,24 +1989,50 @@ int freeze_super(struct super_block *sb) * For debugging purposes so that fs can warn if it sees write activity * when frozen is set to SB_FREEZE_COMPLETE, and for thaw_super(). */ + sb->s_writers.freeze_holders |= who; sb->s_writers.frozen = SB_FREEZE_COMPLETE; + wake_up_var(&sb->s_writers.frozen); lockdep_sb_freeze_release(sb); - up_write(&sb->s_umount); + super_unlock_excl(sb); return 0; } EXPORT_SYMBOL(freeze_super); -static int thaw_super_locked(struct super_block *sb) +/* + * Undoes the effect of a freeze_super_locked call. If the filesystem is + * frozen both by userspace and the kernel, a thaw call from either source + * removes that state without releasing the other state or unlocking the + * filesystem. + */ +static int thaw_super_locked(struct super_block *sb, enum freeze_holder who) { int error; - if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) { - up_write(&sb->s_umount); + if (sb->s_writers.frozen == SB_FREEZE_COMPLETE) { + if (!(sb->s_writers.freeze_holders & who)) { + super_unlock_excl(sb); + return -EINVAL; + } + + /* + * Freeze is shared with someone else. Release our hold and + * drop the active ref that freeze_super assigned to the + * freezer. + */ + if (sb->s_writers.freeze_holders & ~who) { + sb->s_writers.freeze_holders &= ~who; + deactivate_locked_super(sb); + return 0; + } + } else { + super_unlock_excl(sb); return -EINVAL; } if (sb_rdonly(sb)) { + sb->s_writers.freeze_holders &= ~who; sb->s_writers.frozen = SB_UNFROZEN; + wake_up_var(&sb->s_writers.frozen); goto out; } @@ -1764,15 +2041,16 @@ static int thaw_super_locked(struct super_block *sb) if (sb->s_op->unfreeze_fs) { error = sb->s_op->unfreeze_fs(sb); if (error) { - printk(KERN_ERR - "VFS:Filesystem thaw failed\n"); + printk(KERN_ERR "VFS:Filesystem thaw failed\n"); lockdep_sb_freeze_release(sb); - up_write(&sb->s_umount); + super_unlock_excl(sb); return error; } } + sb->s_writers.freeze_holders &= ~who; sb->s_writers.frozen = SB_UNFROZEN; + wake_up_var(&sb->s_writers.frozen); sb_freeze_unlock(sb, SB_FREEZE_FS); out: deactivate_locked_super(sb); @@ -1782,13 +2060,20 @@ out: /** * thaw_super -- unlock filesystem * @sb: the super to thaw + * @who: context that wants to freeze + * + * Unlocks the filesystem and marks it writeable again after freeze_super() + * if there are no remaining freezes on the filesystem. * - * Unlocks the filesystem and marks it writeable again after freeze_super(). + * @who should be: + * * %FREEZE_HOLDER_USERSPACE if userspace wants to thaw the fs; + * * %FREEZE_HOLDER_KERNEL if the kernel wants to thaw the fs. */ -int thaw_super(struct super_block *sb) +int thaw_super(struct super_block *sb, enum freeze_holder who) { - down_write(&sb->s_umount); - return thaw_super_locked(sb); + if (!super_lock_excl(sb)) + WARN(1, "Dying superblock while thawing!"); + return thaw_super_locked(sb, who); } EXPORT_SYMBOL(thaw_super); diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index 0140010aa0c3..2f5ead88d00b 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -224,7 +224,7 @@ got_it: memset (de->name + namelen, 0, SYSV_DIRSIZE - namelen - 2); de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino); dir_commit_chunk(page, pos, SYSV_DIRSIZE); - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); mark_inode_dirty(dir); err = sysv_handle_dirsync(dir); out_page: @@ -249,7 +249,7 @@ int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page) } de->inode = 0; dir_commit_chunk(page, pos, SYSV_DIRSIZE); - inode->i_ctime = inode->i_mtime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); mark_inode_dirty(inode); return sysv_handle_dirsync(inode); } @@ -346,7 +346,7 @@ int sysv_set_link(struct sysv_dir_entry *de, struct page *page, } de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino); dir_commit_chunk(page, pos, SYSV_DIRSIZE); - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); mark_inode_dirty(dir); return sysv_handle_dirsync(inode); } diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c index e732879036ab..6719da5889d9 100644 --- a/fs/sysv/ialloc.c +++ b/fs/sysv/ialloc.c @@ -165,7 +165,7 @@ struct inode * sysv_new_inode(const struct inode * dir, umode_t mode) dirty_sb(sb); inode_init_owner(&nop_mnt_idmap, inode, dir, mode); inode->i_ino = fs16_to_cpu(sbi, ino); - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); inode->i_blocks = 0; memset(SYSV_I(inode)->i_data, 0, sizeof(SYSV_I(inode)->i_data)); SYSV_I(inode)->i_dir_start_lookup = 0; diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 9e8d4a6fb2f3..0aa3827d8178 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -202,8 +202,7 @@ struct inode *sysv_iget(struct super_block *sb, unsigned int ino) inode->i_size = fs32_to_cpu(sbi, raw_inode->i_size); inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_atime); inode->i_mtime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_mtime); - inode->i_ctime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_ctime); - inode->i_ctime.tv_nsec = 0; + inode_set_ctime(inode, fs32_to_cpu(sbi, raw_inode->i_ctime), 0); inode->i_atime.tv_nsec = 0; inode->i_mtime.tv_nsec = 0; inode->i_blocks = 0; @@ -256,7 +255,7 @@ static int __sysv_write_inode(struct inode *inode, int wait) raw_inode->i_size = cpu_to_fs32(sbi, inode->i_size); raw_inode->i_atime = cpu_to_fs32(sbi, inode->i_atime.tv_sec); raw_inode->i_mtime = cpu_to_fs32(sbi, inode->i_mtime.tv_sec); - raw_inode->i_ctime = cpu_to_fs32(sbi, inode->i_ctime.tv_sec); + raw_inode->i_ctime = cpu_to_fs32(sbi, inode_get_ctime(inode).tv_sec); si = SYSV_I(inode); if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index 58d7f43a1371..edb94e55de8e 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -183,7 +183,7 @@ static inline int splice_branch(struct inode *inode, *where->p = where->key; write_unlock(&pointers_lock); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); /* had we spliced it onto indirect block? */ if (where->bh) @@ -423,7 +423,7 @@ do_indirects: } n++; } - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); if (IS_SYNC(inode)) sysv_sync_inode (inode); else @@ -449,7 +449,8 @@ int sysv_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct super_block *s = path->dentry->d_sb; - generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat); + generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(path->dentry), + stat); stat->blocks = (s->s_blocksize / 512) * sysv_nblocks(s, stat->size); stat->blksize = s->s_blocksize; return 0; diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index fcf163fea3ad..d6b73798071b 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -103,7 +103,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir, { struct inode *inode = d_inode(old_dentry); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); inode_inc_link_count(inode); ihold(inode); @@ -161,7 +161,7 @@ static int sysv_unlink(struct inode * dir, struct dentry * dentry) err = sysv_delete_entry(de, page); if (!err) { - inode->i_ctime = dir->i_ctime; + inode_set_ctime_to_ts(inode, inode_get_ctime(dir)); inode_dec_link_count(inode); } unmap_and_put_page(page, de); @@ -230,7 +230,7 @@ static int sysv_rename(struct mnt_idmap *idmap, struct inode *old_dir, unmap_and_put_page(new_page, new_de); if (err) goto out_dir; - new_inode->i_ctime = current_time(new_inode); + inode_set_ctime_current(new_inode); if (dir_de) drop_nlink(new_inode); inode_dec_link_count(new_inode); diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 57ac8aa4a724..2feb6c58648c 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -132,7 +132,7 @@ static struct inode *tracefs_get_inode(struct super_block *sb) struct inode *inode = new_inode(sb); if (inode) { inode->i_ino = get_next_ino(); - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); } return inode; } diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 9c9d3f0e36a4..eef9e527d9ff 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -243,8 +243,8 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode) (unsigned int)inode->i_mtime.tv_sec, (unsigned int)inode->i_mtime.tv_nsec); pr_err("\tctime %u.%u\n", - (unsigned int)inode->i_ctime.tv_sec, - (unsigned int)inode->i_ctime.tv_nsec); + (unsigned int) inode_get_ctime(inode).tv_sec, + (unsigned int) inode_get_ctime(inode).tv_nsec); pr_err("\tcreat_sqnum %llu\n", ui->creat_sqnum); pr_err("\txattr_size %u\n", ui->xattr_size); pr_err("\txattr_cnt %u\n", ui->xattr_cnt); diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index ef0499edc248..2f48c58d47cd 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -96,8 +96,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir, inode->i_flags |= S_NOCMTIME; inode_init_owner(&nop_mnt_idmap, inode, dir, mode); - inode->i_mtime = inode->i_atime = inode->i_ctime = - current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); inode->i_mapping->nrpages = 0; if (!is_xattr) { @@ -325,7 +324,7 @@ static int ubifs_create(struct mnt_idmap *idmap, struct inode *dir, mutex_lock(&dir_ui->ui_mutex); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; - dir->i_mtime = dir->i_ctime = inode->i_ctime; + dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode)); err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); if (err) goto out_cancel; @@ -765,10 +764,10 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, inc_nlink(inode); ihold(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; - dir->i_mtime = dir->i_ctime = inode->i_ctime; + dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode)); err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); if (err) goto out_cancel; @@ -838,11 +837,11 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) } lock_2_inodes(dir, inode); - inode->i_ctime = current_time(dir); + inode_set_ctime_current(inode); drop_nlink(inode); dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; - dir->i_mtime = dir->i_ctime = inode->i_ctime; + dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode)); err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0); if (err) goto out_cancel; @@ -940,12 +939,12 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) } lock_2_inodes(dir, inode); - inode->i_ctime = current_time(dir); + inode_set_ctime_current(inode); clear_nlink(inode); drop_nlink(dir); dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; - dir->i_mtime = dir->i_ctime = inode->i_ctime; + dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode)); err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0); if (err) goto out_cancel; @@ -1019,7 +1018,7 @@ static int ubifs_mkdir(struct mnt_idmap *idmap, struct inode *dir, inc_nlink(dir); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; - dir->i_mtime = dir->i_ctime = inode->i_ctime; + dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode)); err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); if (err) { ubifs_err(c, "cannot create directory, error %d", err); @@ -1110,7 +1109,7 @@ static int ubifs_mknod(struct mnt_idmap *idmap, struct inode *dir, mutex_lock(&dir_ui->ui_mutex); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; - dir->i_mtime = dir->i_ctime = inode->i_ctime; + dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode)); err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); if (err) goto out_cancel; @@ -1210,7 +1209,7 @@ static int ubifs_symlink(struct mnt_idmap *idmap, struct inode *dir, mutex_lock(&dir_ui->ui_mutex); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; - dir->i_mtime = dir->i_ctime = inode->i_ctime; + dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode)); err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); if (err) goto out_cancel; @@ -1298,7 +1297,6 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, struct ubifs_budget_req ino_req = { .dirtied_ino = 1, .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; struct ubifs_budget_req wht_req; - struct timespec64 time; unsigned int saved_nlink; struct fscrypt_name old_nm, new_nm; @@ -1414,8 +1412,7 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, * Like most other Unix systems, set the @i_ctime for inodes on a * rename. */ - time = current_time(old_dir); - old_inode->i_ctime = time; + simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); /* We must adjust parent link count when renaming directories */ if (is_dir) { @@ -1444,13 +1441,11 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, old_dir->i_size -= old_sz; ubifs_inode(old_dir)->ui_size = old_dir->i_size; - old_dir->i_mtime = old_dir->i_ctime = time; - new_dir->i_mtime = new_dir->i_ctime = time; /* * And finally, if we unlinked a direntry which happened to have the * same name as the moved direntry, we have to decrement @i_nlink of - * the unlinked inode and change its ctime. + * the unlinked inode. */ if (unlink) { /* @@ -1462,7 +1457,6 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, clear_nlink(new_inode); else drop_nlink(new_inode); - new_inode->i_ctime = time; } else { new_dir->i_size += new_sz; ubifs_inode(new_dir)->ui_size = new_dir->i_size; @@ -1557,7 +1551,6 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry, int sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir); struct inode *fst_inode = d_inode(old_dentry); struct inode *snd_inode = d_inode(new_dentry); - struct timespec64 time; int err; struct fscrypt_name fst_nm, snd_nm; @@ -1588,11 +1581,7 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry, lock_4_inodes(old_dir, new_dir, NULL, NULL); - time = current_time(old_dir); - fst_inode->i_ctime = time; - snd_inode->i_ctime = time; - old_dir->i_mtime = old_dir->i_ctime = time; - new_dir->i_mtime = new_dir->i_ctime = time; + simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); if (old_dir != new_dir) { if (S_ISDIR(fst_inode->i_mode) && !S_ISDIR(snd_inode->i_mode)) { @@ -1665,7 +1654,7 @@ int ubifs_getattr(struct mnt_idmap *idmap, const struct path *path, STATX_ATTR_ENCRYPTED | STATX_ATTR_IMMUTABLE); - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); stat->blksize = UBIFS_BLOCK_SIZE; stat->size = ui->ui_size; diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 6738fe43040b..e5382f0b2587 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1092,7 +1092,7 @@ static void do_attr_changes(struct inode *inode, const struct iattr *attr) if (attr->ia_valid & ATTR_MTIME) inode->i_mtime = attr->ia_mtime; if (attr->ia_valid & ATTR_CTIME) - inode->i_ctime = attr->ia_ctime; + inode_set_ctime_to_ts(inode, attr->ia_ctime); if (attr->ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; @@ -1192,7 +1192,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, mutex_lock(&ui->ui_mutex); ui->ui_size = inode->i_size; /* Truncation changes inode [mc]time */ - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); /* Other attributes may be changed at the same time as well */ do_attr_changes(inode, attr); err = ubifs_jnl_truncate(c, inode, old_size, new_size); @@ -1239,7 +1239,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, mutex_lock(&ui->ui_mutex); if (attr->ia_valid & ATTR_SIZE) { /* Truncation changes inode [mc]time */ - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); /* 'truncate_setsize()' changed @i_size, update @ui_size */ ui->ui_size = inode->i_size; } @@ -1364,8 +1364,10 @@ out: static inline int mctime_update_needed(const struct inode *inode, const struct timespec64 *now) { + struct timespec64 ctime = inode_get_ctime(inode); + if (!timespec64_equal(&inode->i_mtime, now) || - !timespec64_equal(&inode->i_ctime, now)) + !timespec64_equal(&ctime, now)) return 1; return 0; } @@ -1376,8 +1378,7 @@ static inline int mctime_update_needed(const struct inode *inode, * * This function updates time of the inode. */ -int ubifs_update_time(struct inode *inode, struct timespec64 *time, - int flags) +int ubifs_update_time(struct inode *inode, int flags) { struct ubifs_inode *ui = ubifs_inode(inode); struct ubifs_info *c = inode->i_sb->s_fs_info; @@ -1385,21 +1386,17 @@ int ubifs_update_time(struct inode *inode, struct timespec64 *time, .dirtied_ino_d = ALIGN(ui->data_len, 8) }; int err, release; - if (!IS_ENABLED(CONFIG_UBIFS_ATIME_SUPPORT)) - return generic_update_time(inode, time, flags); + if (!IS_ENABLED(CONFIG_UBIFS_ATIME_SUPPORT)) { + generic_update_time(inode, flags); + return 0; + } err = ubifs_budget_space(c, &req); if (err) return err; mutex_lock(&ui->ui_mutex); - if (flags & S_ATIME) - inode->i_atime = *time; - if (flags & S_CTIME) - inode->i_ctime = *time; - if (flags & S_MTIME) - inode->i_mtime = *time; - + inode_update_timestamps(inode, flags); release = ui->dirty; __mark_inode_dirty(inode, I_DIRTY_SYNC); mutex_unlock(&ui->ui_mutex); @@ -1432,7 +1429,7 @@ static int update_mctime(struct inode *inode) return err; mutex_lock(&ui->ui_mutex); - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); release = ui->dirty; mark_inode_dirty_sync(inode); mutex_unlock(&ui->ui_mutex); @@ -1570,7 +1567,7 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf) struct ubifs_inode *ui = ubifs_inode(inode); mutex_lock(&ui->ui_mutex); - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); release = ui->dirty; mark_inode_dirty_sync(inode); mutex_unlock(&ui->ui_mutex); diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 67c5108abd89..d79cabe193c3 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -118,7 +118,7 @@ static int setflags(struct inode *inode, int flags) ui->flags &= ~ioctl2ubifs(UBIFS_SETTABLE_IOCTL_FLAGS); ui->flags |= ioctl2ubifs(flags); ubifs_set_inode_flags(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); release = ui->dirty; mark_inode_dirty_sync(inode); mutex_unlock(&ui->ui_mutex); diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index dc52ac0f4a34..ffc9beee7be6 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -454,8 +454,8 @@ static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino, ino->creat_sqnum = cpu_to_le64(ui->creat_sqnum); ino->atime_sec = cpu_to_le64(inode->i_atime.tv_sec); ino->atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); - ino->ctime_sec = cpu_to_le64(inode->i_ctime.tv_sec); - ino->ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + ino->ctime_sec = cpu_to_le64(inode_get_ctime(inode).tv_sec); + ino->ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec); ino->mtime_sec = cpu_to_le64(inode->i_mtime.tv_sec); ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); ino->uid = cpu_to_le32(i_uid_read(inode)); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 32cb14759796..b08fb28d16b5 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -146,8 +146,8 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec); inode->i_mtime.tv_sec = (int64_t)le64_to_cpu(ino->mtime_sec); inode->i_mtime.tv_nsec = le32_to_cpu(ino->mtime_nsec); - inode->i_ctime.tv_sec = (int64_t)le64_to_cpu(ino->ctime_sec); - inode->i_ctime.tv_nsec = le32_to_cpu(ino->ctime_nsec); + inode_set_ctime(inode, (int64_t)le64_to_cpu(ino->ctime_sec), + le32_to_cpu(ino->ctime_nsec)); inode->i_mode = le32_to_cpu(ino->mode); inode->i_size = le64_to_cpu(ino->size); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 4c36044140e7..ebb3ad6b5e7e 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -2027,7 +2027,7 @@ int ubifs_calc_dark(const struct ubifs_info *c, int spc); int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync); int ubifs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); -int ubifs_update_time(struct inode *inode, struct timespec64 *time, int flags); +int ubifs_update_time(struct inode *inode, int flags); /* dir.c */ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir, diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 349228dd1191..406c82eab513 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -134,7 +134,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, ui->data_len = size; mutex_lock(&host_ui->ui_mutex); - host->i_ctime = current_time(host); + inode_set_ctime_current(host); host_ui->xattr_cnt += 1; host_ui->xattr_size += CALC_DENT_SIZE(fname_len(nm)); host_ui->xattr_size += CALC_XATTR_BYTES(size); @@ -215,7 +215,7 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, ui->data_len = size; mutex_lock(&host_ui->ui_mutex); - host->i_ctime = current_time(host); + inode_set_ctime_current(host); host_ui->xattr_size -= CALC_XATTR_BYTES(old_size); host_ui->xattr_size += CALC_XATTR_BYTES(size); @@ -474,7 +474,7 @@ static int remove_xattr(struct ubifs_info *c, struct inode *host, return err; mutex_lock(&host_ui->ui_mutex); - host->i_ctime = current_time(host); + inode_set_ctime_current(host); host_ui->xattr_cnt -= 1; host_ui->xattr_size -= CALC_DENT_SIZE(fname_len(nm)); host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len); diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 5f7ac8c84798..6b558cbbeb6b 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -100,7 +100,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode) iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT; else iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); iinfo->i_crtime = inode->i_mtime; if (unlikely(insert_inode_locked(inode) < 0)) { make_bad_inode(inode); diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 28cdfc57d946..d089795074e8 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -910,7 +910,7 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map) map->oflags = UDF_BLK_NEW | UDF_BLK_MAPPED; iinfo->i_next_alloc_block = map->lblk + 1; iinfo->i_next_alloc_goal = newblocknum + 1; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); if (IS_SYNC(inode)) udf_sync_inode(inode); @@ -1298,7 +1298,7 @@ set_size: goto out_unlock; } update_time: - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); if (IS_SYNC(inode)) udf_sync_inode(inode); else @@ -1329,6 +1329,7 @@ static int udf_read_inode(struct inode *inode, bool hidden_inode) int bs = inode->i_sb->s_blocksize; int ret = -EIO; uint32_t uid, gid; + struct timespec64 ctime; reread: if (iloc->partitionReferenceNum >= sbi->s_partitions) { @@ -1507,7 +1508,8 @@ reread: udf_disk_stamp_to_time(&inode->i_atime, fe->accessTime); udf_disk_stamp_to_time(&inode->i_mtime, fe->modificationTime); - udf_disk_stamp_to_time(&inode->i_ctime, fe->attrTime); + udf_disk_stamp_to_time(&ctime, fe->attrTime); + inode_set_ctime_to_ts(inode, ctime); iinfo->i_unique = le64_to_cpu(fe->uniqueID); iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr); @@ -1522,7 +1524,8 @@ reread: udf_disk_stamp_to_time(&inode->i_atime, efe->accessTime); udf_disk_stamp_to_time(&inode->i_mtime, efe->modificationTime); udf_disk_stamp_to_time(&iinfo->i_crtime, efe->createTime); - udf_disk_stamp_to_time(&inode->i_ctime, efe->attrTime); + udf_disk_stamp_to_time(&ctime, efe->attrTime); + inode_set_ctime_to_ts(inode, ctime); iinfo->i_unique = le64_to_cpu(efe->uniqueID); iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr); @@ -1799,7 +1802,7 @@ static int udf_update_inode(struct inode *inode, int do_sync) udf_time_to_disk_stamp(&fe->accessTime, inode->i_atime); udf_time_to_disk_stamp(&fe->modificationTime, inode->i_mtime); - udf_time_to_disk_stamp(&fe->attrTime, inode->i_ctime); + udf_time_to_disk_stamp(&fe->attrTime, inode_get_ctime(inode)); memset(&(fe->impIdent), 0, sizeof(struct regid)); strcpy(fe->impIdent.ident, UDF_ID_DEVELOPER); fe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; @@ -1830,12 +1833,12 @@ static int udf_update_inode(struct inode *inode, int do_sync) udf_adjust_time(iinfo, inode->i_atime); udf_adjust_time(iinfo, inode->i_mtime); - udf_adjust_time(iinfo, inode->i_ctime); + udf_adjust_time(iinfo, inode_get_ctime(inode)); udf_time_to_disk_stamp(&efe->accessTime, inode->i_atime); udf_time_to_disk_stamp(&efe->modificationTime, inode->i_mtime); udf_time_to_disk_stamp(&efe->createTime, iinfo->i_crtime); - udf_time_to_disk_stamp(&efe->attrTime, inode->i_ctime); + udf_time_to_disk_stamp(&efe->attrTime, inode_get_ctime(inode)); memset(&(efe->impIdent), 0, sizeof(efe->impIdent)); strcpy(efe->impIdent.ident, UDF_ID_DEVELOPER); diff --git a/fs/udf/namei.c b/fs/udf/namei.c index a95579b043ab..ae55ab8859b6 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -365,7 +365,7 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode) *(__le32 *)((struct allocDescImpUse *)iter.fi.icb.impUse)->impUse = cpu_to_le32(iinfo->i_unique & 0x00000000FFFFFFFFUL); udf_fiiter_write_fi(&iter, NULL); - dir->i_ctime = dir->i_mtime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); mark_inode_dirty(dir); udf_fiiter_release(&iter); udf_add_fid_counter(dir->i_sb, false, 1); @@ -471,7 +471,7 @@ static int udf_mkdir(struct mnt_idmap *idmap, struct inode *dir, udf_fiiter_release(&iter); udf_add_fid_counter(dir->i_sb, true, 1); inc_nlink(dir); - dir->i_ctime = dir->i_mtime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); mark_inode_dirty(dir); d_instantiate_new(dentry, inode); @@ -523,8 +523,8 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry) inode->i_size = 0; inode_dec_link_count(dir); udf_add_fid_counter(dir->i_sb, true, -1); - inode->i_ctime = dir->i_ctime = dir->i_mtime = - current_time(inode); + dir->i_mtime = inode_set_ctime_to_ts(dir, + inode_set_ctime_current(inode)); mark_inode_dirty(dir); ret = 0; end_rmdir: @@ -555,11 +555,11 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry) set_nlink(inode, 1); } udf_fiiter_delete_entry(&iter); - dir->i_ctime = dir->i_mtime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); mark_inode_dirty(dir); inode_dec_link_count(inode); udf_add_fid_counter(dir->i_sb, false, -1); - inode->i_ctime = dir->i_ctime; + inode_set_ctime_to_ts(inode, inode_get_ctime(dir)); ret = 0; end_unlink: udf_fiiter_release(&iter); @@ -746,9 +746,9 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir, inc_nlink(inode); udf_add_fid_counter(dir->i_sb, false, 1); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); mark_inode_dirty(inode); - dir->i_ctime = dir->i_mtime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); mark_inode_dirty(dir); ihold(inode); d_instantiate(dentry, inode); @@ -833,7 +833,7 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir, * Like most other Unix systems, set the ctime for inodes on a * rename. */ - old_inode->i_ctime = current_time(old_inode); + inode_set_ctime_current(old_inode); mark_inode_dirty(old_inode); /* @@ -861,13 +861,13 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir, } if (new_inode) { - new_inode->i_ctime = current_time(new_inode); + inode_set_ctime_current(new_inode); inode_dec_link_count(new_inode); udf_add_fid_counter(old_dir->i_sb, S_ISDIR(new_inode->i_mode), -1); } - old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir); - new_dir->i_ctime = new_dir->i_mtime = current_time(new_dir); + old_dir->i_mtime = inode_set_ctime_current(old_dir); + new_dir->i_mtime = inode_set_ctime_current(new_dir); mark_inode_dirty(old_dir); mark_inode_dirty(new_dir); diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index 779b5c2c75f6..f7eaf7b14594 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c @@ -149,7 +149,7 @@ static int udf_symlink_getattr(struct mnt_idmap *idmap, struct inode *inode = d_backing_inode(dentry); struct page *page; - generic_fillattr(&nop_mnt_idmap, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); page = read_mapping_page(inode->i_mapping, 0, NULL); if (IS_ERR(page)) return PTR_ERR(page); diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 379d75796a5c..fd57f03b6c93 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -107,7 +107,7 @@ void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, ufs_commit_chunk(page, pos, len); ufs_put_page(page); if (update_times) - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); mark_inode_dirty(dir); ufs_handle_dirsync(dir); } @@ -397,7 +397,7 @@ got_it: ufs_set_de_type(sb, de, inode->i_mode); ufs_commit_chunk(page, pos, rec_len); - dir->i_mtime = dir->i_ctime = current_time(dir); + dir->i_mtime = inode_set_ctime_current(dir); mark_inode_dirty(dir); err = ufs_handle_dirsync(dir); @@ -539,7 +539,7 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir, pde->d_reclen = cpu_to_fs16(sb, to - from); dir->d_ino = 0; ufs_commit_chunk(page, pos, to - from); - inode->i_ctime = inode->i_mtime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); mark_inode_dirty(inode); err = ufs_handle_dirsync(inode); out: diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 06bd84d555bd..a1e7bd9d1f98 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -292,7 +292,7 @@ cg_found: inode_init_owner(&nop_mnt_idmap, inode, dir, mode); inode->i_blocks = 0; inode->i_generation = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); ufsi->i_flags = UFS_I(dir)->i_flags; ufsi->i_lastfrag = 0; ufsi->i_shadow = 0; diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index a4246c83a8cd..21a4779a2de5 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -296,7 +296,7 @@ ufs_inode_getfrag(struct inode *inode, unsigned index, if (new) *new = 1; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); if (IS_SYNC(inode)) ufs_sync_inode (inode); mark_inode_dirty(inode); @@ -378,7 +378,7 @@ ufs_inode_getblock(struct inode *inode, u64 ind_block, mark_buffer_dirty(bh); if (IS_SYNC(inode)) sync_dirty_buffer(bh); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); mark_inode_dirty(inode); out: brelse (bh); @@ -580,11 +580,12 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode) inode->i_size = fs64_to_cpu(sb, ufs_inode->ui_size); inode->i_atime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec); - inode->i_ctime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec); + inode_set_ctime(inode, + (signed)fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec), + 0); inode->i_mtime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_mtime.tv_sec); inode->i_mtime.tv_nsec = 0; inode->i_atime.tv_nsec = 0; - inode->i_ctime.tv_nsec = 0; inode->i_blocks = fs32_to_cpu(sb, ufs_inode->ui_blocks); inode->i_generation = fs32_to_cpu(sb, ufs_inode->ui_gen); ufsi->i_flags = fs32_to_cpu(sb, ufs_inode->ui_flags); @@ -626,10 +627,10 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode) inode->i_size = fs64_to_cpu(sb, ufs2_inode->ui_size); inode->i_atime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_atime); - inode->i_ctime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_ctime); + inode_set_ctime(inode, fs64_to_cpu(sb, ufs2_inode->ui_ctime), + fs32_to_cpu(sb, ufs2_inode->ui_ctimensec)); inode->i_mtime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_mtime); inode->i_atime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_atimensec); - inode->i_ctime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_ctimensec); inode->i_mtime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_mtimensec); inode->i_blocks = fs64_to_cpu(sb, ufs2_inode->ui_blocks); inode->i_generation = fs32_to_cpu(sb, ufs2_inode->ui_gen); @@ -726,7 +727,8 @@ static void ufs1_update_inode(struct inode *inode, struct ufs_inode *ufs_inode) ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size); ufs_inode->ui_atime.tv_sec = cpu_to_fs32(sb, inode->i_atime.tv_sec); ufs_inode->ui_atime.tv_usec = 0; - ufs_inode->ui_ctime.tv_sec = cpu_to_fs32(sb, inode->i_ctime.tv_sec); + ufs_inode->ui_ctime.tv_sec = cpu_to_fs32(sb, + inode_get_ctime(inode).tv_sec); ufs_inode->ui_ctime.tv_usec = 0; ufs_inode->ui_mtime.tv_sec = cpu_to_fs32(sb, inode->i_mtime.tv_sec); ufs_inode->ui_mtime.tv_usec = 0; @@ -770,8 +772,9 @@ static void ufs2_update_inode(struct inode *inode, struct ufs2_inode *ufs_inode) ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size); ufs_inode->ui_atime = cpu_to_fs64(sb, inode->i_atime.tv_sec); ufs_inode->ui_atimensec = cpu_to_fs32(sb, inode->i_atime.tv_nsec); - ufs_inode->ui_ctime = cpu_to_fs64(sb, inode->i_ctime.tv_sec); - ufs_inode->ui_ctimensec = cpu_to_fs32(sb, inode->i_ctime.tv_nsec); + ufs_inode->ui_ctime = cpu_to_fs64(sb, inode_get_ctime(inode).tv_sec); + ufs_inode->ui_ctimensec = cpu_to_fs32(sb, + inode_get_ctime(inode).tv_nsec); ufs_inode->ui_mtime = cpu_to_fs64(sb, inode->i_mtime.tv_sec); ufs_inode->ui_mtimensec = cpu_to_fs32(sb, inode->i_mtime.tv_nsec); @@ -1205,7 +1208,7 @@ static int ufs_truncate(struct inode *inode, loff_t size) truncate_setsize(inode, size); ufs_truncate_blocks(inode); - inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); mark_inode_dirty(inode); out: UFSD("EXIT: err %d\n", err); diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 36154b5aca6d..9cad29463791 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -153,7 +153,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir, struct inode *inode = d_inode(old_dentry); int error; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); inode_inc_link_count(inode); ihold(inode); @@ -220,7 +220,7 @@ static int ufs_unlink(struct inode *dir, struct dentry *dentry) if (err) goto out; - inode->i_ctime = dir->i_ctime; + inode_set_ctime_to_ts(inode, inode_get_ctime(dir)); inode_dec_link_count(inode); err = 0; out: @@ -282,7 +282,7 @@ static int ufs_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (!new_de) goto out_dir; ufs_set_link(new_dir, new_de, new_page, old_inode, 1); - new_inode->i_ctime = current_time(new_inode); + inode_set_ctime_current(new_inode); if (dir_de) drop_nlink(new_inode); inode_dec_link_count(new_inode); @@ -298,7 +298,7 @@ static int ufs_rename(struct mnt_idmap *idmap, struct inode *old_dir, * Like most other Unix systems, set the ctime for inodes on a * rename. */ - old_inode->i_ctime = current_time(old_inode); + inode_set_ctime_current(old_inode); ufs_delete_entry(old_dir, old_de, old_page); mark_inode_dirty(old_inode); diff --git a/fs/vboxsf/dir.c b/fs/vboxsf/dir.c index 075f15c43c78..5f1a14d5b927 100644 --- a/fs/vboxsf/dir.c +++ b/fs/vboxsf/dir.c @@ -179,9 +179,10 @@ static int vboxsf_dir_iterate(struct file *dir, struct dir_context *ctx) return 0; } +WRAP_DIR_ITER(vboxsf_dir_iterate) // FIXME! const struct file_operations vboxsf_dir_fops = { .open = vboxsf_dir_open, - .iterate = vboxsf_dir_iterate, + .iterate_shared = shared_vboxsf_dir_iterate, .release = vboxsf_dir_release, .read = generic_read_dir, .llseek = generic_file_llseek, diff --git a/fs/vboxsf/shfl_hostintf.h b/fs/vboxsf/shfl_hostintf.h index aca829062c12..069a019c9247 100644 --- a/fs/vboxsf/shfl_hostintf.h +++ b/fs/vboxsf/shfl_hostintf.h @@ -68,9 +68,9 @@ struct shfl_string { /** UTF-8 or UTF-16 string. Nul terminated. */ union { - u8 utf8[2]; - u16 utf16[1]; - u16 ucs2[1]; /* misnomer, use utf16. */ + u8 legacy_padding[2]; + DECLARE_FLEX_ARRAY(u8, utf8); + DECLARE_FLEX_ARRAY(u16, utf16); } string; }; VMMDEV_ASSERT_SIZE(shfl_string, 6); diff --git a/fs/vboxsf/utils.c b/fs/vboxsf/utils.c index dd0ae1188e87..83f20dd15522 100644 --- a/fs/vboxsf/utils.c +++ b/fs/vboxsf/utils.c @@ -128,8 +128,8 @@ int vboxsf_init_inode(struct vboxsf_sbi *sbi, struct inode *inode, inode->i_atime = ns_to_timespec64( info->access_time.ns_relative_to_unix_epoch); - inode->i_ctime = ns_to_timespec64( - info->change_time.ns_relative_to_unix_epoch); + inode_set_ctime_to_ts(inode, + ns_to_timespec64(info->change_time.ns_relative_to_unix_epoch)); inode->i_mtime = ns_to_timespec64( info->modification_time.ns_relative_to_unix_epoch); return 0; @@ -252,7 +252,7 @@ int vboxsf_getattr(struct mnt_idmap *idmap, const struct path *path, if (err) return err; - generic_fillattr(&nop_mnt_idmap, d_inode(dentry), kstat); + generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), kstat); return 0; } diff --git a/fs/xattr.c b/fs/xattr.c index e7bbb7f57557..efd4736bc94b 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -1040,12 +1040,32 @@ const char *xattr_full_name(const struct xattr_handler *handler, EXPORT_SYMBOL(xattr_full_name); /** - * free_simple_xattr - free an xattr object + * simple_xattr_space - estimate the memory used by a simple xattr + * @name: the full name of the xattr + * @size: the size of its value + * + * This takes no account of how much larger the two slab objects actually are: + * that would depend on the slab implementation, when what is required is a + * deterministic number, which grows with name length and size and quantity. + * + * Return: The approximate number of bytes of memory used by such an xattr. + */ +size_t simple_xattr_space(const char *name, size_t size) +{ + /* + * Use "40" instead of sizeof(struct simple_xattr), to return the + * same result on 32-bit and 64-bit, and even if simple_xattr grows. + */ + return 40 + size + strlen(name); +} + +/** + * simple_xattr_free - free an xattr object * @xattr: the xattr object * * Free the xattr object. Can handle @xattr being NULL. */ -static inline void free_simple_xattr(struct simple_xattr *xattr) +void simple_xattr_free(struct simple_xattr *xattr) { if (xattr) kfree(xattr->name); @@ -1073,7 +1093,7 @@ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size) if (len < sizeof(*new_xattr)) return NULL; - new_xattr = kvmalloc(len, GFP_KERNEL); + new_xattr = kvmalloc(len, GFP_KERNEL_ACCOUNT); if (!new_xattr) return NULL; @@ -1164,7 +1184,6 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, * @value: the value to store along the xattr * @size: the size of @value * @flags: the flags determining how to set the xattr - * @removed_size: the size of the removed xattr * * Set a new xattr object. * If @value is passed a new xattr object will be allocated. If XATTR_REPLACE @@ -1181,29 +1200,27 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, * nothing if XATTR_CREATE is specified in @flags or @flags is zero. For * XATTR_REPLACE we fail as mentioned above. * - * Return: On success zero and on error a negative error code is returned. + * Return: On success, the removed or replaced xattr is returned, to be freed + * by the caller; or NULL if none. On failure a negative error code is returned. */ -int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, - const void *value, size_t size, int flags, - ssize_t *removed_size) +struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs, + const char *name, const void *value, + size_t size, int flags) { - struct simple_xattr *xattr = NULL, *new_xattr = NULL; + struct simple_xattr *old_xattr = NULL, *new_xattr = NULL; struct rb_node *parent = NULL, **rbp; int err = 0, ret; - if (removed_size) - *removed_size = -1; - /* value == NULL means remove */ if (value) { new_xattr = simple_xattr_alloc(value, size); if (!new_xattr) - return -ENOMEM; + return ERR_PTR(-ENOMEM); - new_xattr->name = kstrdup(name, GFP_KERNEL); + new_xattr->name = kstrdup(name, GFP_KERNEL_ACCOUNT); if (!new_xattr->name) { - free_simple_xattr(new_xattr); - return -ENOMEM; + simple_xattr_free(new_xattr); + return ERR_PTR(-ENOMEM); } } @@ -1217,12 +1234,12 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, else if (ret > 0) rbp = &(*rbp)->rb_right; else - xattr = rb_entry(*rbp, struct simple_xattr, rb_node); - if (xattr) + old_xattr = rb_entry(*rbp, struct simple_xattr, rb_node); + if (old_xattr) break; } - if (xattr) { + if (old_xattr) { /* Fail if XATTR_CREATE is requested and the xattr exists. */ if (flags & XATTR_CREATE) { err = -EEXIST; @@ -1230,12 +1247,10 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, } if (new_xattr) - rb_replace_node(&xattr->rb_node, &new_xattr->rb_node, - &xattrs->rb_root); + rb_replace_node(&old_xattr->rb_node, + &new_xattr->rb_node, &xattrs->rb_root); else - rb_erase(&xattr->rb_node, &xattrs->rb_root); - if (!err && removed_size) - *removed_size = xattr->size; + rb_erase(&old_xattr->rb_node, &xattrs->rb_root); } else { /* Fail if XATTR_REPLACE is requested but no xattr is found. */ if (flags & XATTR_REPLACE) { @@ -1260,12 +1275,10 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, out_unlock: write_unlock(&xattrs->lock); - if (err) - free_simple_xattr(new_xattr); - else - free_simple_xattr(xattr); - return err; - + if (!err) + return old_xattr; + simple_xattr_free(new_xattr); + return ERR_PTR(err); } static bool xattr_is_trusted(const char *name) @@ -1370,14 +1383,17 @@ void simple_xattrs_init(struct simple_xattrs *xattrs) /** * simple_xattrs_free - free xattrs * @xattrs: xattr header whose xattrs to destroy + * @freed_space: approximate number of bytes of memory freed from @xattrs * * Destroy all xattrs in @xattr. When this is called no one can hold a * reference to any of the xattrs anymore. */ -void simple_xattrs_free(struct simple_xattrs *xattrs) +void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space) { struct rb_node *rbp; + if (freed_space) + *freed_space = 0; rbp = rb_first(&xattrs->rb_root); while (rbp) { struct simple_xattr *xattr; @@ -1386,7 +1402,10 @@ void simple_xattrs_free(struct simple_xattrs *xattrs) rbp_next = rb_next(rbp); xattr = rb_entry(rbp, struct simple_xattr, rb_node); rb_erase(&xattr->rb_node, &xattrs->rb_root); - free_simple_xattr(xattr); + if (freed_space) + *freed_space += simple_xattr_space(xattr->name, + xattr->size); + simple_xattr_free(xattr); rbp = rbp_next; } } diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 25e2841084e1..f9015f88eca7 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -591,7 +591,7 @@ struct xfs_attr_shortform { uint8_t valuelen; /* actual length of value (no NULL) */ uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ uint8_t nameval[]; /* name & value bytes concatenated */ - } list[1]; /* variable sized array */ + } list[]; /* variable sized array */ }; typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */ @@ -620,19 +620,29 @@ typedef struct xfs_attr_leaf_entry { /* sorted on key, not name */ typedef struct xfs_attr_leaf_name_local { __be16 valuelen; /* number of bytes in value */ __u8 namelen; /* length of name bytes */ - __u8 nameval[1]; /* name/value bytes */ + /* + * In Linux 6.5 this flex array was converted from nameval[1] to + * nameval[]. Be very careful here about extra padding at the end; + * see xfs_attr_leaf_entsize_local() for details. + */ + __u8 nameval[]; /* name/value bytes */ } xfs_attr_leaf_name_local_t; typedef struct xfs_attr_leaf_name_remote { __be32 valueblk; /* block number of value bytes */ __be32 valuelen; /* number of bytes in value */ __u8 namelen; /* length of name bytes */ - __u8 name[1]; /* name bytes */ + /* + * In Linux 6.5 this flex array was converted from name[1] to name[]. + * Be very careful here about extra padding at the end; see + * xfs_attr_leaf_entsize_remote() for details. + */ + __u8 name[]; /* name bytes */ } xfs_attr_leaf_name_remote_t; typedef struct xfs_attr_leafblock { xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */ - xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */ + xfs_attr_leaf_entry_t entries[]; /* sorted on key, not name */ /* * The rest of the block contains the following structures after the * leaf entries, growing from the bottom up. The variables are never @@ -664,7 +674,7 @@ struct xfs_attr3_leaf_hdr { struct xfs_attr3_leafblock { struct xfs_attr3_leaf_hdr hdr; - struct xfs_attr_leaf_entry entries[1]; + struct xfs_attr_leaf_entry entries[]; /* * The rest of the block contains the following structures after the @@ -747,14 +757,61 @@ xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx) */ static inline int xfs_attr_leaf_entsize_remote(int nlen) { - return round_up(sizeof(struct xfs_attr_leaf_name_remote) - 1 + - nlen, XFS_ATTR_LEAF_NAME_ALIGN); + /* + * Prior to Linux 6.5, struct xfs_attr_leaf_name_remote ended with + * name[1], which was used as a flexarray. The layout of this struct + * is 9 bytes of fixed-length fields followed by a __u8 flex array at + * offset 9. + * + * On most architectures, struct xfs_attr_leaf_name_remote had two + * bytes of implicit padding at the end of the struct to make the + * struct length 12. After converting name[1] to name[], there are + * three implicit padding bytes and the struct size remains 12. + * However, there are compiler configurations that do not add implicit + * padding at all (m68k) and have been broken for years. + * + * This entsize computation historically added (the xattr name length) + * to (the padded struct length - 1) and rounded that sum up to the + * nearest multiple of 4 (NAME_ALIGN). IOWs, round_up(11 + nlen, 4). + * This is encoded in the ondisk format, so we cannot change this. + * + * Compute the entsize from offsetof of the flexarray and manually + * adding bytes for the implicit padding. + */ + const size_t remotesize = + offsetof(struct xfs_attr_leaf_name_remote, name) + 2; + + return round_up(remotesize + nlen, XFS_ATTR_LEAF_NAME_ALIGN); } static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen) { - return round_up(sizeof(struct xfs_attr_leaf_name_local) - 1 + - nlen + vlen, XFS_ATTR_LEAF_NAME_ALIGN); + /* + * Prior to Linux 6.5, struct xfs_attr_leaf_name_local ended with + * nameval[1], which was used as a flexarray. The layout of this + * struct is 3 bytes of fixed-length fields followed by a __u8 flex + * array at offset 3. + * + * struct xfs_attr_leaf_name_local had zero bytes of implicit padding + * at the end of the struct to make the struct length 4. On most + * architectures, after converting nameval[1] to nameval[], there is + * one implicit padding byte and the struct size remains 4. However, + * there are compiler configurations that do not add implicit padding + * at all (m68k) and would break. + * + * This entsize computation historically added (the xattr name and + * value length) to (the padded struct length - 1) and rounded that sum + * up to the nearest multiple of 4 (NAME_ALIGN). IOWs, the formula is + * round_up(3 + nlen + vlen, 4). This is encoded in the ondisk format, + * so we cannot change this. + * + * Compute the entsize from offsetof of the flexarray and manually + * adding bytes for the implicit padding. + */ + const size_t localsize = + offsetof(struct xfs_attr_leaf_name_local, nameval); + + return round_up(localsize + nlen + vlen, XFS_ATTR_LEAF_NAME_ALIGN); } static inline int xfs_attr_leaf_entsize_local_max(int bsize) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 9c60ebb328b4..2cbf9ea39b8c 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -592,12 +592,12 @@ typedef struct xfs_attrlist_cursor { struct xfs_attrlist { __s32 al_count; /* number of entries in attrlist */ __s32 al_more; /* T/F: more attrs (do call again) */ - __s32 al_offset[1]; /* byte offsets of attrs [var-sized] */ + __s32 al_offset[]; /* byte offsets of attrs [var-sized] */ }; struct xfs_attrlist_ent { /* data from attr_list() */ __u32 a_valuelen; /* number bytes in value of attr */ - char a_name[1]; /* attr name (NULL terminated) */ + char a_name[]; /* attr name (NULL terminated) */ }; typedef struct xfs_fsop_attrlist_handlereq { diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 758aacd8166b..a35781577cad 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -222,7 +222,8 @@ xfs_inode_from_disk( */ inode->i_atime = xfs_inode_from_disk_ts(from, from->di_atime); inode->i_mtime = xfs_inode_from_disk_ts(from, from->di_mtime); - inode->i_ctime = xfs_inode_from_disk_ts(from, from->di_ctime); + inode_set_ctime_to_ts(inode, + xfs_inode_from_disk_ts(from, from->di_ctime)); ip->i_disk_size = be64_to_cpu(from->di_size); ip->i_nblocks = be64_to_cpu(from->di_nblocks); @@ -316,7 +317,7 @@ xfs_inode_to_disk( to->di_atime = xfs_inode_to_disk_ts(ip, inode->i_atime); to->di_mtime = xfs_inode_to_disk_ts(ip, inode->i_mtime); - to->di_ctime = xfs_inode_to_disk_ts(ip, inode->i_ctime); + to->di_ctime = xfs_inode_to_disk_ts(ip, inode_get_ctime(inode)); to->di_nlink = cpu_to_be32(inode->i_nlink); to->di_gen = cpu_to_be32(inode->i_generation); to->di_mode = cpu_to_be16(inode->i_mode); diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c index cb4796b6e693..ad22656376d3 100644 --- a/fs/xfs/libxfs/xfs_trans_inode.c +++ b/fs/xfs/libxfs/xfs_trans_inode.c @@ -62,12 +62,12 @@ xfs_trans_ichgtime( ASSERT(tp); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - tv = current_time(inode); + /* If the mtime changes, then ctime must also change */ + ASSERT(flags & XFS_ICHGTIME_CHG); + tv = inode_set_ctime_current(inode); if (flags & XFS_ICHGTIME_MOD) inode->i_mtime = tv; - if (flags & XFS_ICHGTIME_CHG) - inode->i_ctime = tv; if (flags & XFS_ICHGTIME_CREATE) ip->i_crtime = tv; } diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index e382a35e98d8..05be757668bb 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2019-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> @@ -8,6 +8,8 @@ #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" #include "xfs_mount.h" #include "xfs_alloc.h" #include "xfs_ialloc.h" @@ -16,6 +18,7 @@ #include "xfs_ag.h" #include "xfs_rtalloc.h" #include "xfs_inode.h" +#include "xfs_icache.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" @@ -53,6 +56,7 @@ struct xchk_fscounters { uint64_t frextents; unsigned long long icount_min; unsigned long long icount_max; + bool frozen; }; /* @@ -123,6 +127,82 @@ xchk_fscount_warmup( return error; } +static inline int +xchk_fsfreeze( + struct xfs_scrub *sc) +{ + int error; + + error = freeze_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL); + trace_xchk_fsfreeze(sc, error); + return error; +} + +static inline int +xchk_fsthaw( + struct xfs_scrub *sc) +{ + int error; + + /* This should always succeed, we have a kernel freeze */ + error = thaw_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL); + trace_xchk_fsthaw(sc, error); + return error; +} + +/* + * We couldn't stabilize the filesystem long enough to sample all the variables + * that comprise the summary counters and compare them to the percpu counters. + * We need to disable all writer threads, which means taking the first two + * freeze levels to put userspace to sleep, and the third freeze level to + * prevent background threads from starting new transactions. Take one level + * more to prevent other callers from unfreezing the filesystem while we run. + */ +STATIC int +xchk_fscounters_freeze( + struct xfs_scrub *sc) +{ + struct xchk_fscounters *fsc = sc->buf; + int error = 0; + + if (sc->flags & XCHK_HAVE_FREEZE_PROT) { + sc->flags &= ~XCHK_HAVE_FREEZE_PROT; + mnt_drop_write_file(sc->file); + } + + /* Try to grab a kernel freeze. */ + while ((error = xchk_fsfreeze(sc)) == -EBUSY) { + if (xchk_should_terminate(sc, &error)) + return error; + + delay(HZ / 10); + } + if (error) + return error; + + fsc->frozen = true; + return 0; +} + +/* Thaw the filesystem after checking or repairing fscounters. */ +STATIC void +xchk_fscounters_cleanup( + void *buf) +{ + struct xchk_fscounters *fsc = buf; + struct xfs_scrub *sc = fsc->sc; + int error; + + if (!fsc->frozen) + return; + + error = xchk_fsthaw(sc); + if (error) + xfs_emerg(sc->mp, "still frozen after scrub, err=%d", error); + else + fsc->frozen = false; +} + int xchk_setup_fscounters( struct xfs_scrub *sc) @@ -140,6 +220,7 @@ xchk_setup_fscounters( sc->buf = kzalloc(sizeof(struct xchk_fscounters), XCHK_GFP_FLAGS); if (!sc->buf) return -ENOMEM; + sc->buf_cleanup = xchk_fscounters_cleanup; fsc = sc->buf; fsc->sc = sc; @@ -150,7 +231,18 @@ xchk_setup_fscounters( if (error) return error; - return xchk_trans_alloc(sc, 0); + /* + * Pause all writer activity in the filesystem while we're scrubbing to + * reduce the likelihood of background perturbations to the counters + * throwing off our calculations. + */ + if (sc->flags & XCHK_TRY_HARDER) { + error = xchk_fscounters_freeze(sc); + if (error) + return error; + } + + return xfs_trans_alloc_empty(sc->mp, &sc->tp); } /* @@ -290,8 +382,7 @@ retry: if (fsc->ifree > fsc->icount) { if (tries--) goto retry; - xchk_set_incomplete(sc); - return 0; + return -EDEADLOCK; } return 0; @@ -367,6 +458,8 @@ xchk_fscount_count_frextents( * Otherwise, we /might/ have a problem. If the change in the summations is * more than we want to tolerate, the filesystem is probably busy and we should * just send back INCOMPLETE and see if userspace will try again. + * + * If we're repairing then we require an exact match. */ static inline bool xchk_fscount_within_range( @@ -396,21 +489,7 @@ xchk_fscount_within_range( if (expected >= min_value && expected <= max_value) return true; - /* - * If the difference between the two summations is too large, the fs - * might just be busy and so we'll mark the scrub incomplete. Return - * true here so that we don't mark the counter corrupt. - * - * XXX: In the future when userspace can grant scrub permission to - * quiesce the filesystem to solve the outsized variance problem, this - * check should be moved up and the return code changed to signal to - * userspace that we need quiesce permission. - */ - if (max_value - min_value >= XCHK_FSCOUNT_MIN_VARIANCE) { - xchk_set_incomplete(sc); - return true; - } - + /* Everything else is bad. */ return false; } @@ -422,6 +501,7 @@ xchk_fscounters( struct xfs_mount *mp = sc->mp; struct xchk_fscounters *fsc = sc->buf; int64_t icount, ifree, fdblocks, frextents; + bool try_again = false; int error; /* Snapshot the percpu counters. */ @@ -431,9 +511,26 @@ xchk_fscounters( frextents = percpu_counter_sum(&mp->m_frextents); /* No negative values, please! */ - if (icount < 0 || ifree < 0 || fdblocks < 0 || frextents < 0) + if (icount < 0 || ifree < 0) xchk_set_corrupt(sc); + /* + * If the filesystem is not frozen, the counter summation calls above + * can race with xfs_mod_freecounter, which subtracts a requested space + * reservation from the counter and undoes the subtraction if that made + * the counter go negative. Therefore, it's possible to see negative + * values here, and we should only flag that as a corruption if we + * froze the fs. This is much more likely to happen with frextents + * since there are no reserved pools. + */ + if (fdblocks < 0 || frextents < 0) { + if (!fsc->frozen) + return -EDEADLOCK; + + xchk_set_corrupt(sc); + return 0; + } + /* See if icount is obviously wrong. */ if (icount < fsc->icount_min || icount > fsc->icount_max) xchk_set_corrupt(sc); @@ -447,12 +544,6 @@ xchk_fscounters( xchk_set_corrupt(sc); /* - * XXX: We can't quiesce percpu counter updates, so exit early. - * This can be re-enabled when we gain exclusive freeze functionality. - */ - return 0; - - /* * If ifree exceeds icount by more than the minimum variance then * something's probably wrong with the counters. */ @@ -463,8 +554,6 @@ xchk_fscounters( error = xchk_fscount_aggregate_agcounts(sc, fsc); if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error)) return error; - if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE) - return 0; /* Count the free extents counter for rt volumes. */ error = xchk_fscount_count_frextents(sc, fsc); @@ -473,20 +562,45 @@ xchk_fscounters( if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE) return 0; - /* Compare the in-core counters with whatever we counted. */ - if (!xchk_fscount_within_range(sc, icount, &mp->m_icount, fsc->icount)) - xchk_set_corrupt(sc); + /* + * Compare the in-core counters with whatever we counted. If the fs is + * frozen, we treat the discrepancy as a corruption because the freeze + * should have stabilized the counter values. Otherwise, we need + * userspace to call us back having granted us freeze permission. + */ + if (!xchk_fscount_within_range(sc, icount, &mp->m_icount, + fsc->icount)) { + if (fsc->frozen) + xchk_set_corrupt(sc); + else + try_again = true; + } - if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree)) - xchk_set_corrupt(sc); + if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree)) { + if (fsc->frozen) + xchk_set_corrupt(sc); + else + try_again = true; + } if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks, - fsc->fdblocks)) - xchk_set_corrupt(sc); + fsc->fdblocks)) { + if (fsc->frozen) + xchk_set_corrupt(sc); + else + try_again = true; + } if (!xchk_fscount_within_range(sc, frextents, &mp->m_frextents, - fsc->frextents)) - xchk_set_corrupt(sc); + fsc->frextents)) { + if (fsc->frozen) + xchk_set_corrupt(sc); + else + try_again = true; + } + + if (try_again) + return -EDEADLOCK; return 0; } diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 3d98f604765e..a0fffbcd022b 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -184,8 +184,10 @@ xchk_teardown( xchk_irele(sc, sc->ip); sc->ip = NULL; } - if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) + if (sc->flags & XCHK_HAVE_FREEZE_PROT) { + sc->flags &= ~XCHK_HAVE_FREEZE_PROT; mnt_drop_write_file(sc->file); + } if (sc->buf) { if (sc->buf_cleanup) sc->buf_cleanup(sc->buf); @@ -505,6 +507,8 @@ retry_op: error = mnt_want_write_file(sc->file); if (error) goto out_sc; + + sc->flags |= XCHK_HAVE_FREEZE_PROT; } /* Set up for the operation. */ diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index e113f2f5c254..f8ba00e51ca9 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -106,6 +106,7 @@ struct xfs_scrub { /* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */ #define XCHK_TRY_HARDER (1U << 0) /* can't get resources, try again */ +#define XCHK_HAVE_FREEZE_PROT (1U << 1) /* do we have freeze protection? */ #define XCHK_FSGATES_DRAIN (1U << 2) /* defer ops draining enabled */ #define XCHK_NEED_DRAIN (1U << 3) /* scrub needs to drain defer ops */ #define XREP_ALREADY_FIXED (1U << 31) /* checking our repair work */ diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index b3894daeb86a..0b54f1a1cf0c 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -98,6 +98,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS); #define XFS_SCRUB_STATE_STRINGS \ { XCHK_TRY_HARDER, "try_harder" }, \ + { XCHK_HAVE_FREEZE_PROT, "nofreeze" }, \ { XCHK_FSGATES_DRAIN, "fsgates_drain" }, \ { XCHK_NEED_DRAIN, "need_drain" }, \ { XREP_ALREADY_FIXED, "already_fixed" } @@ -693,6 +694,31 @@ TRACE_EVENT(xchk_fscounters_within_range, __entry->old_value) ) +DECLARE_EVENT_CLASS(xchk_fsfreeze_class, + TP_PROTO(struct xfs_scrub *sc, int error), + TP_ARGS(sc, error), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, type) + __field(int, error) + ), + TP_fast_assign( + __entry->dev = sc->mp->m_super->s_dev; + __entry->type = sc->sm->sm_type; + __entry->error = error; + ), + TP_printk("dev %d:%d type %s error %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS), + __entry->error) +); +#define DEFINE_XCHK_FSFREEZE_EVENT(name) \ +DEFINE_EVENT(xchk_fsfreeze_class, name, \ + TP_PROTO(struct xfs_scrub *sc, int error), \ + TP_ARGS(sc, error)) +DEFINE_XCHK_FSFREEZE_EVENT(xchk_fsfreeze); +DEFINE_XCHK_FSFREEZE_EVENT(xchk_fsthaw); + TRACE_EVENT(xchk_refcount_incorrect, TP_PROTO(struct xfs_perag *pag, const struct xfs_refcount_irec *irec, xfs_nlink_t seen), diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 791db7d9c849..6b840301817a 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -233,7 +233,7 @@ xfs_acl_set_mode( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); inode->i_mode = mode; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); if (xfs_has_wsync(mp)) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index fbb675563208..fcefab687285 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1644,6 +1644,7 @@ xfs_swap_extents( uint64_t f; int resblks = 0; unsigned int flags = 0; + struct timespec64 ctime; /* * Lock the inodes against other IO, page faults and truncate to @@ -1756,8 +1757,9 @@ xfs_swap_extents( * process that the file was not changed out from * under it. */ - if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) || - (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) || + ctime = inode_get_ctime(VFS_I(ip)); + if ((sbp->bs_ctime.tv_sec != ctime.tv_sec) || + (sbp->bs_ctime.tv_nsec != ctime.tv_nsec) || (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) || (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { error = -EBUSY; diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 15d1e5a7c2d3..3b903f6bce98 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1938,14 +1938,17 @@ void xfs_free_buftarg( struct xfs_buftarg *btp) { + struct block_device *bdev = btp->bt_bdev; + unregister_shrinker(&btp->bt_shrinker); ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0); percpu_counter_destroy(&btp->bt_io_count); list_lru_destroy(&btp->bt_lru); - blkdev_issue_flush(btp->bt_bdev); - invalidate_bdev(btp->bt_bdev); fs_put_dax(btp->bt_daxdev, btp->bt_mount); + /* the main block device is closed by kill_block_super */ + if (bdev != btp->bt_mount->m_super->s_bdev) + blkdev_put(bdev, btp->bt_mount->m_super); kmem_free(btp); } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 9e62cc500140..360fe83a334f 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -843,10 +843,9 @@ xfs_init_new_inode( ip->i_df.if_nextents = 0; ASSERT(ip->i_nblocks == 0); - tv = current_time(inode); + tv = inode_set_ctime_current(inode); inode->i_mtime = tv; inode->i_atime = tv; - inode->i_ctime = tv; ip->i_extsize = 0; ip->i_diflags = 0; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 91c847a84e10..127b2410eb20 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -528,7 +528,7 @@ xfs_inode_to_log_dinode( memset(to->di_pad3, 0, sizeof(to->di_pad3)); to->di_atime = xfs_inode_to_log_dinode_ts(ip, inode->i_atime); to->di_mtime = xfs_inode_to_log_dinode_ts(ip, inode->i_mtime); - to->di_ctime = xfs_inode_to_log_dinode_ts(ip, inode->i_ctime); + to->di_ctime = xfs_inode_to_log_dinode_ts(ip, inode_get_ctime(inode)); to->di_nlink = inode->i_nlink; to->di_gen = inode->i_generation; to->di_mode = inode->i_mode; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 24718adb3c16..2ededd3f6b8c 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -573,10 +573,10 @@ xfs_vn_getattr( stat->gid = vfsgid_into_kgid(vfsgid); stat->ino = ip->i_ino; stat->atime = inode->i_atime; - stat->mtime = inode->i_mtime; - stat->ctime = inode->i_ctime; stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks); + fill_mg_cmtime(stat, request_mask, inode); + if (xfs_has_v3inodes(mp)) { if (request_mask & STATX_BTIME) { stat->result_mask |= STATX_BTIME; @@ -917,7 +917,7 @@ xfs_setattr_size( if (newsize != oldsize && !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) { iattr->ia_ctime = iattr->ia_mtime = - current_time(inode); + current_mgtime(inode); iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME; } @@ -1029,7 +1029,6 @@ xfs_vn_setattr( STATIC int xfs_vn_update_time( struct inode *inode, - struct timespec64 *now, int flags) { struct xfs_inode *ip = XFS_I(inode); @@ -1037,13 +1036,16 @@ xfs_vn_update_time( int log_flags = XFS_ILOG_TIMESTAMP; struct xfs_trans *tp; int error; + struct timespec64 now; trace_xfs_update_time(ip); if (inode->i_sb->s_flags & SB_LAZYTIME) { if (!((flags & S_VERSION) && - inode_maybe_inc_iversion(inode, false))) - return generic_update_time(inode, now, flags); + inode_maybe_inc_iversion(inode, false))) { + generic_update_time(inode, flags); + return 0; + } /* Capture the iversion update that just occurred */ log_flags |= XFS_ILOG_CORE; @@ -1054,12 +1056,15 @@ xfs_vn_update_time( return error; xfs_ilock(ip, XFS_ILOCK_EXCL); - if (flags & S_CTIME) - inode->i_ctime = *now; + if (flags & (S_CTIME|S_MTIME)) + now = inode_set_ctime_current(inode); + else + now = current_time(inode); + if (flags & S_MTIME) - inode->i_mtime = *now; + inode->i_mtime = now; if (flags & S_ATIME) - inode->i_atime = *now; + inode->i_atime = now; xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_log_inode(tp, ip, log_flags); diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index f225413a993c..c2093cb56092 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -100,8 +100,8 @@ xfs_bulkstat_one_int( buf->bs_atime_nsec = inode->i_atime.tv_nsec; buf->bs_mtime = inode->i_mtime.tv_sec; buf->bs_mtime_nsec = inode->i_mtime.tv_nsec; - buf->bs_ctime = inode->i_ctime.tv_sec; - buf->bs_ctime_nsec = inode->i_ctime.tv_nsec; + buf->bs_ctime = inode_get_ctime(inode).tv_sec; + buf->bs_ctime_nsec = inode_get_ctime(inode).tv_nsec; buf->bs_gen = inode->i_generation; buf->bs_mode = inode->i_mode; diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h index 9737b5a9f405..c4cc99b70dd3 100644 --- a/fs/xfs/xfs_ondisk.h +++ b/fs/xfs/xfs_ondisk.h @@ -56,7 +56,7 @@ xfs_check_ondisk_structs(void) /* dir/attr trees */ XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leaf_hdr, 80); - XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leafblock, 88); + XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leafblock, 80); XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_rmt_hdr, 56); XFS_CHECK_STRUCT_SIZE(struct xfs_da3_blkinfo, 56); XFS_CHECK_STRUCT_SIZE(struct xfs_da3_intnode, 64); @@ -88,7 +88,8 @@ xfs_check_ondisk_structs(void) XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, valuelen, 4); XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, namelen, 8); XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, name, 9); - XFS_CHECK_STRUCT_SIZE(xfs_attr_leafblock_t, 40); + XFS_CHECK_STRUCT_SIZE(xfs_attr_leafblock_t, 32); + XFS_CHECK_STRUCT_SIZE(struct xfs_attr_shortform, 4); XFS_CHECK_OFFSET(struct xfs_attr_shortform, hdr.totsize, 0); XFS_CHECK_OFFSET(struct xfs_attr_shortform, hdr.count, 2); XFS_CHECK_OFFSET(struct xfs_attr_shortform, list[0].namelen, 4); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 818510243130..c79eac048456 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -377,17 +377,6 @@ disable_dax: return 0; } -static void -xfs_bdev_mark_dead( - struct block_device *bdev) -{ - xfs_force_shutdown(bdev->bd_holder, SHUTDOWN_DEVICE_REMOVED); -} - -static const struct blk_holder_ops xfs_holder_ops = { - .mark_dead = xfs_bdev_mark_dead, -}; - STATIC int xfs_blkdev_get( xfs_mount_t *mp, @@ -396,8 +385,8 @@ xfs_blkdev_get( { int error = 0; - *bdevp = blkdev_get_by_path(name, BLK_OPEN_READ | BLK_OPEN_WRITE, mp, - &xfs_holder_ops); + *bdevp = blkdev_get_by_path(name, BLK_OPEN_READ | BLK_OPEN_WRITE, + mp->m_super, &fs_holder_ops); if (IS_ERR(*bdevp)) { error = PTR_ERR(*bdevp); xfs_warn(mp, "Invalid device [%s], error=%d", name, error); @@ -407,31 +396,45 @@ xfs_blkdev_get( } STATIC void -xfs_blkdev_put( - struct xfs_mount *mp, - struct block_device *bdev) -{ - if (bdev) - blkdev_put(bdev, mp); -} - -STATIC void -xfs_close_devices( +xfs_shutdown_devices( struct xfs_mount *mp) { + /* + * Udev is triggered whenever anyone closes a block device or unmounts + * a file systemm on a block device. + * The default udev rules invoke blkid to read the fs super and create + * symlinks to the bdev under /dev/disk. For this, it uses buffered + * reads through the page cache. + * + * xfs_db also uses buffered reads to examine metadata. There is no + * coordination between xfs_db and udev, which means that they can run + * concurrently. Note there is no coordination between the kernel and + * blkid either. + * + * On a system with 64k pages, the page cache can cache the superblock + * and the root inode (and hence the root directory) with the same 64k + * page. If udev spawns blkid after the mkfs and the system is busy + * enough that it is still running when xfs_db starts up, they'll both + * read from the same page in the pagecache. + * + * The unmount writes updated inode metadata to disk directly. The XFS + * buffer cache does not use the bdev pagecache, so it needs to + * invalidate that pagecache on unmount. If the above scenario occurs, + * the pagecache no longer reflects what's on disk, xfs_db reads the + * stale metadata, and fails to find /a. Most of the time this succeeds + * because closing a bdev invalidates the page cache, but when processes + * race, everyone loses. + */ if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { - struct block_device *logdev = mp->m_logdev_targp->bt_bdev; - - xfs_free_buftarg(mp->m_logdev_targp); - xfs_blkdev_put(mp, logdev); + blkdev_issue_flush(mp->m_logdev_targp->bt_bdev); + invalidate_bdev(mp->m_logdev_targp->bt_bdev); } if (mp->m_rtdev_targp) { - struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; - - xfs_free_buftarg(mp->m_rtdev_targp); - xfs_blkdev_put(mp, rtdev); + blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev); + invalidate_bdev(mp->m_rtdev_targp->bt_bdev); } - xfs_free_buftarg(mp->m_ddev_targp); + blkdev_issue_flush(mp->m_ddev_targp->bt_bdev); + invalidate_bdev(mp->m_ddev_targp->bt_bdev); } /* @@ -448,17 +451,24 @@ STATIC int xfs_open_devices( struct xfs_mount *mp) { - struct block_device *ddev = mp->m_super->s_bdev; + struct super_block *sb = mp->m_super; + struct block_device *ddev = sb->s_bdev; struct block_device *logdev = NULL, *rtdev = NULL; int error; /* + * blkdev_put() can't be called under s_umount, see the comment + * in get_tree_bdev() for more details + */ + up_write(&sb->s_umount); + + /* * Open real time and log devices - order is important. */ if (mp->m_logname) { error = xfs_blkdev_get(mp, mp->m_logname, &logdev); if (error) - return error; + goto out_relock; } if (mp->m_rtname) { @@ -496,7 +506,10 @@ xfs_open_devices( mp->m_logdev_targp = mp->m_ddev_targp; } - return 0; + error = 0; +out_relock: + down_write(&sb->s_umount); + return error; out_free_rtdev_targ: if (mp->m_rtdev_targp) @@ -504,11 +517,12 @@ xfs_open_devices( out_free_ddev_targ: xfs_free_buftarg(mp->m_ddev_targp); out_close_rtdev: - xfs_blkdev_put(mp, rtdev); + if (rtdev) + blkdev_put(rtdev, sb); out_close_logdev: if (logdev && logdev != ddev) - xfs_blkdev_put(mp, logdev); - return error; + blkdev_put(logdev, sb); + goto out_relock; } /* @@ -758,6 +772,17 @@ static void xfs_mount_free( struct xfs_mount *mp) { + /* + * Free the buftargs here because blkdev_put needs to be called outside + * of sb->s_umount, which is held around the call to ->put_super. + */ + if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) + xfs_free_buftarg(mp->m_logdev_targp); + if (mp->m_rtdev_targp) + xfs_free_buftarg(mp->m_rtdev_targp); + if (mp->m_ddev_targp) + xfs_free_buftarg(mp->m_ddev_targp); + kfree(mp->m_rtname); kfree(mp->m_logname); kmem_free(mp); @@ -1133,10 +1158,6 @@ xfs_fs_put_super( { struct xfs_mount *mp = XFS_M(sb); - /* if ->fill_super failed, we have no mount to tear down */ - if (!sb->s_fs_info) - return; - xfs_notice(mp, "Unmounting Filesystem %pU", &mp->m_sb.sb_uuid); xfs_filestream_unmount(mp); xfs_unmountfs(mp); @@ -1147,10 +1168,7 @@ xfs_fs_put_super( xfs_inodegc_free_percpu(mp); xfs_destroy_percpu_counters(mp); xfs_destroy_mount_workqueues(mp); - xfs_close_devices(mp); - - sb->s_fs_info = NULL; - xfs_mount_free(mp); + xfs_shutdown_devices(mp); } static long @@ -1492,7 +1510,7 @@ xfs_fs_fill_super( error = xfs_fs_validate_params(mp); if (error) - goto out_free_names; + return error; sb_min_blocksize(sb, BBSIZE); sb->s_xattr = xfs_xattr_handlers; @@ -1519,11 +1537,11 @@ xfs_fs_fill_super( error = xfs_open_devices(mp); if (error) - goto out_free_names; + return error; error = xfs_init_mount_workqueues(mp); if (error) - goto out_close_devices; + goto out_shutdown_devices; error = xfs_init_percpu_counters(mp); if (error) @@ -1737,11 +1755,8 @@ xfs_fs_fill_super( xfs_destroy_percpu_counters(mp); out_destroy_workqueues: xfs_destroy_mount_workqueues(mp); - out_close_devices: - xfs_close_devices(mp); - out_free_names: - sb->s_fs_info = NULL; - xfs_mount_free(mp); + out_shutdown_devices: + xfs_shutdown_devices(mp); return error; out_unmount: @@ -1934,7 +1949,8 @@ xfs_fs_reconfigure( return 0; } -static void xfs_fs_free( +static void +xfs_fs_free( struct fs_context *fc) { struct xfs_mount *mp = fc->s_fs_info; @@ -2003,13 +2019,21 @@ static int xfs_init_fs_context( return 0; } +static void +xfs_kill_sb( + struct super_block *sb) +{ + kill_block_super(sb); + xfs_mount_free(XFS_M(sb)); +} + static struct file_system_type xfs_fs_type = { .owner = THIS_MODULE, .name = "xfs", .init_fs_context = xfs_init_fs_context, .parameters = xfs_fs_parameters, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, + .kill_sb = xfs_kill_sb, + .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME, }; MODULE_ALIAS_FS("xfs"); diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index 92c9aaae3663..789cfb74c146 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -341,77 +341,6 @@ static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence) return generic_file_llseek_size(file, offset, whence, isize, isize); } -struct zonefs_zone_append_bio { - /* The target inode of the BIO */ - struct inode *inode; - - /* For sync writes, the target append write offset */ - u64 append_offset; - - /* - * This member must come last, bio_alloc_bioset will allocate enough - * bytes for entire zonefs_bio but relies on bio being last. - */ - struct bio bio; -}; - -static inline struct zonefs_zone_append_bio * -zonefs_zone_append_bio(struct bio *bio) -{ - return container_of(bio, struct zonefs_zone_append_bio, bio); -} - -static void zonefs_file_zone_append_dio_bio_end_io(struct bio *bio) -{ - struct zonefs_zone_append_bio *za_bio = zonefs_zone_append_bio(bio); - struct zonefs_zone *z = zonefs_inode_zone(za_bio->inode); - sector_t za_sector; - - if (bio->bi_status != BLK_STS_OK) - goto bio_end; - - /* - * If the file zone was written underneath the file system, the zone - * append operation can still succedd (if the zone is not full) but - * the write append location will not be where we expect it to be. - * Check that we wrote where we intended to, that is, at z->z_wpoffset. - */ - za_sector = z->z_sector + (za_bio->append_offset >> SECTOR_SHIFT); - if (bio->bi_iter.bi_sector != za_sector) { - zonefs_warn(za_bio->inode->i_sb, - "Invalid write sector %llu for zone at %llu\n", - bio->bi_iter.bi_sector, z->z_sector); - bio->bi_status = BLK_STS_IOERR; - } - -bio_end: - iomap_dio_bio_end_io(bio); -} - -static void zonefs_file_zone_append_dio_submit_io(const struct iomap_iter *iter, - struct bio *bio, - loff_t file_offset) -{ - struct zonefs_zone_append_bio *za_bio = zonefs_zone_append_bio(bio); - struct inode *inode = iter->inode; - struct zonefs_zone *z = zonefs_inode_zone(inode); - - /* - * Issue a zone append BIO to process sync dio writes. The append - * file offset is saved to check the zone append write location - * on completion of the BIO. - */ - za_bio->inode = inode; - za_bio->append_offset = file_offset; - - bio->bi_opf &= ~REQ_OP_WRITE; - bio->bi_opf |= REQ_OP_ZONE_APPEND; - bio->bi_iter.bi_sector = z->z_sector; - bio->bi_end_io = zonefs_file_zone_append_dio_bio_end_io; - - submit_bio(bio); -} - static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size, int error, unsigned int flags) { @@ -442,14 +371,6 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size, return 0; } -static struct bio_set zonefs_zone_append_bio_set; - -static const struct iomap_dio_ops zonefs_zone_append_dio_ops = { - .submit_io = zonefs_file_zone_append_dio_submit_io, - .end_io = zonefs_file_write_dio_end_io, - .bio_set = &zonefs_zone_append_bio_set, -}; - static const struct iomap_dio_ops zonefs_write_dio_ops = { .end_io = zonefs_file_write_dio_end_io, }; @@ -533,9 +454,6 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) struct zonefs_inode_info *zi = ZONEFS_I(inode); struct zonefs_zone *z = zonefs_inode_zone(inode); struct super_block *sb = inode->i_sb; - const struct iomap_dio_ops *dio_ops; - bool sync = is_sync_kiocb(iocb); - bool append = false; ssize_t ret, count; /* @@ -543,7 +461,8 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) * as this can cause write reordering (e.g. the first aio gets EAGAIN * on the inode lock but the second goes through but is now unaligned). */ - if (zonefs_zone_is_seq(z) && !sync && (iocb->ki_flags & IOCB_NOWAIT)) + if (zonefs_zone_is_seq(z) && !is_sync_kiocb(iocb) && + (iocb->ki_flags & IOCB_NOWAIT)) return -EOPNOTSUPP; if (iocb->ki_flags & IOCB_NOWAIT) { @@ -573,18 +492,6 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) goto inode_unlock; } mutex_unlock(&zi->i_truncate_mutex); - append = sync; - } - - if (append) { - unsigned int max = bdev_max_zone_append_sectors(sb->s_bdev); - - max = ALIGN_DOWN(max << SECTOR_SHIFT, sb->s_blocksize); - iov_iter_truncate(from, max); - - dio_ops = &zonefs_zone_append_dio_ops; - } else { - dio_ops = &zonefs_write_dio_ops; } /* @@ -593,7 +500,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) * the user can make sense of the error. */ ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops, - dio_ops, 0, NULL, 0); + &zonefs_write_dio_ops, 0, NULL, 0); if (ret == -ENOTBLK) ret = -EBUSY; @@ -938,15 +845,3 @@ const struct file_operations zonefs_file_operations = { .splice_write = iter_file_splice_write, .iopoll = iocb_bio_iopoll, }; - -int zonefs_file_bioset_init(void) -{ - return bioset_init(&zonefs_zone_append_bio_set, BIO_POOL_SIZE, - offsetof(struct zonefs_zone_append_bio, bio), - BIOSET_NEED_BVECS); -} - -void zonefs_file_bioset_exit(void) -{ - bioset_exit(&zonefs_zone_append_bio_set); -} diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index bbe44a26a8e5..9d1a9808fbbb 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -658,7 +658,8 @@ static struct inode *zonefs_get_file_inode(struct inode *dir, inode->i_ino = ino; inode->i_mode = z->z_mode; - inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime; + inode->i_mtime = inode->i_atime = inode_set_ctime_to_ts(inode, + inode_get_ctime(dir)); inode->i_uid = z->z_uid; inode->i_gid = z->z_gid; inode->i_size = z->z_wpoffset; @@ -694,7 +695,8 @@ static struct inode *zonefs_get_zgroup_inode(struct super_block *sb, inode->i_ino = ino; inode_init_owner(&nop_mnt_idmap, inode, root, S_IFDIR | 0555); inode->i_size = sbi->s_zgroup[ztype].g_nr_zones; - inode->i_ctime = inode->i_mtime = inode->i_atime = root->i_ctime; + inode->i_mtime = inode->i_atime = inode_set_ctime_to_ts(inode, + inode_get_ctime(root)); inode->i_private = &sbi->s_zgroup[ztype]; set_nlink(inode, 2); @@ -1317,7 +1319,7 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent) inode->i_ino = bdev_nr_zones(sb->s_bdev); inode->i_mode = S_IFDIR | 0555; - inode->i_ctime = inode->i_mtime = inode->i_atime = current_time(inode); + inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); inode->i_op = &zonefs_dir_inode_operations; inode->i_fop = &zonefs_dir_operations; inode->i_size = 2; @@ -1412,13 +1414,9 @@ static int __init zonefs_init(void) BUILD_BUG_ON(sizeof(struct zonefs_super) != ZONEFS_SUPER_SIZE); - ret = zonefs_file_bioset_init(); - if (ret) - return ret; - ret = zonefs_init_inodecache(); if (ret) - goto destroy_bioset; + return ret; ret = zonefs_sysfs_init(); if (ret) @@ -1434,8 +1432,6 @@ sysfs_exit: zonefs_sysfs_exit(); destroy_inodecache: zonefs_destroy_inodecache(); -destroy_bioset: - zonefs_file_bioset_exit(); return ret; } @@ -1445,7 +1441,6 @@ static void __exit zonefs_exit(void) unregister_filesystem(&zonefs_type); zonefs_sysfs_exit(); zonefs_destroy_inodecache(); - zonefs_file_bioset_exit(); } MODULE_AUTHOR("Damien Le Moal"); diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h index f663b8ebc2cb..8175652241b5 100644 --- a/fs/zonefs/zonefs.h +++ b/fs/zonefs/zonefs.h @@ -279,8 +279,6 @@ extern const struct file_operations zonefs_dir_operations; extern const struct address_space_operations zonefs_file_aops; extern const struct file_operations zonefs_file_operations; int zonefs_file_truncate(struct inode *inode, loff_t isize); -int zonefs_file_bioset_init(void); -void zonefs_file_bioset_exit(void); /* In sysfs.c */ int zonefs_sysfs_register(struct super_block *sb); |